1 /*-
2 * Copyright (c) 2013-2021, Mellanox Technologies, Ltd. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 #include "opt_rss.h"
27 #include "opt_ratelimit.h"
28
29 #include <linux/kref.h>
30 #include <linux/random.h>
31 #include <linux/delay.h>
32 #include <linux/sched.h>
33 #include <rdma/ib_umem.h>
34 #include <rdma/ib_umem_odp.h>
35 #include <rdma/ib_verbs.h>
36 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
37
38 enum {
39 MAX_PENDING_REG_MR = 8,
40 };
41
42 #define MLX5_UMR_ALIGN 2048
43 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
44 static __be64 mlx5_ib_update_mtt_emergency_buffer[
45 MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
46 __aligned(MLX5_UMR_ALIGN);
47 static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
48 #endif
49
50 static int clean_mr(struct mlx5_ib_mr *mr);
51
destroy_mkey(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)52 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
53 {
54 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
55
56 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
57 /* Wait until all page fault handlers using the mr complete. */
58 synchronize_srcu(&dev->mr_srcu);
59 #endif
60
61 return err;
62 }
63
order2idx(struct mlx5_ib_dev * dev,int order)64 static int order2idx(struct mlx5_ib_dev *dev, int order)
65 {
66 struct mlx5_mr_cache *cache = &dev->cache;
67
68 if (order < cache->ent[0].order)
69 return 0;
70 else
71 return order - cache->ent[0].order;
72 }
73
use_umr_mtt_update(struct mlx5_ib_mr * mr,u64 start,u64 length)74 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
75 {
76 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
77 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
78 }
79
80 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(struct mlx5_ib_mr * mr)81 static void update_odp_mr(struct mlx5_ib_mr *mr)
82 {
83 if (mr->umem->odp_data) {
84 /*
85 * This barrier prevents the compiler from moving the
86 * setting of umem->odp_data->private to point to our
87 * MR, before reg_umr finished, to ensure that the MR
88 * initialization have finished before starting to
89 * handle invalidations.
90 */
91 smp_wmb();
92 mr->umem->odp_data->private = mr;
93 /*
94 * Make sure we will see the new
95 * umem->odp_data->private value in the invalidation
96 * routines, before we can get page faults on the
97 * MR. Page faults can happen once we put the MR in
98 * the tree, below this line. Without the barrier,
99 * there can be a fault handling and an invalidation
100 * before umem->odp_data->private == mr is visible to
101 * the invalidation handler.
102 */
103 smp_wmb();
104 }
105 }
106 #endif
107
reg_mr_callback(int status,struct mlx5_async_work * context)108 static void reg_mr_callback(int status, struct mlx5_async_work *context)
109 {
110 struct mlx5_ib_mr *mr =
111 container_of(context, struct mlx5_ib_mr, cb_work);
112 struct mlx5_ib_dev *dev = mr->dev;
113 struct mlx5_mr_cache *cache = &dev->cache;
114 int c = order2idx(dev, mr->order);
115 struct mlx5_cache_ent *ent = &cache->ent[c];
116 u8 key;
117 unsigned long flags;
118 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
119 int err;
120
121 spin_lock_irqsave(&ent->lock, flags);
122 ent->pending--;
123 spin_unlock_irqrestore(&ent->lock, flags);
124 if (status) {
125 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
126 kfree(mr);
127 dev->fill_delay = 1;
128 mod_timer(&dev->delay_timer, jiffies + HZ);
129 return;
130 }
131
132 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
133 key = dev->mdev->priv.mkey_key++;
134 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
135 mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
136
137 cache->last_add = jiffies;
138
139 spin_lock_irqsave(&ent->lock, flags);
140 list_add_tail(&mr->list, &ent->head);
141 ent->cur++;
142 ent->size++;
143 spin_unlock_irqrestore(&ent->lock, flags);
144
145 spin_lock_irqsave(&table->lock, flags);
146 err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mr->mmkey.key),
147 &mr->mmkey);
148 if (err)
149 pr_err("Error inserting to mkey tree. 0x%x\n", -err);
150 spin_unlock_irqrestore(&table->lock, flags);
151 }
152
add_keys(struct mlx5_ib_dev * dev,int c,int num)153 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
154 {
155 struct mlx5_mr_cache *cache = &dev->cache;
156 struct mlx5_cache_ent *ent = &cache->ent[c];
157 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
158 struct mlx5_ib_mr *mr;
159 int npages = 1 << ent->order;
160 void *mkc;
161 u32 *in;
162 int err = 0;
163 int i;
164
165 in = kzalloc(inlen, GFP_KERNEL);
166 if (!in)
167 return -ENOMEM;
168
169 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
170 for (i = 0; i < num; i++) {
171 if (ent->pending >= MAX_PENDING_REG_MR) {
172 err = -EAGAIN;
173 break;
174 }
175
176 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
177 if (!mr) {
178 err = -ENOMEM;
179 break;
180 }
181 mr->order = ent->order;
182 mr->umred = 1;
183 mr->dev = dev;
184
185 MLX5_SET(mkc, mkc, free, 1);
186 MLX5_SET(mkc, mkc, umr_en, 1);
187 MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT);
188
189 MLX5_SET(mkc, mkc, qpn, 0xffffff);
190 MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
191 MLX5_SET(mkc, mkc, log_page_size, 12);
192
193 spin_lock_irq(&ent->lock);
194 ent->pending++;
195 spin_unlock_irq(&ent->lock);
196 err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
197 &dev->async_ctx, in, inlen,
198 mr->out, sizeof(mr->out),
199 reg_mr_callback, &mr->cb_work);
200 if (err) {
201 spin_lock_irq(&ent->lock);
202 ent->pending--;
203 spin_unlock_irq(&ent->lock);
204 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
205 kfree(mr);
206 break;
207 }
208 }
209
210 kfree(in);
211 return err;
212 }
213
remove_keys(struct mlx5_ib_dev * dev,int c,int num)214 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
215 {
216 struct mlx5_mr_cache *cache = &dev->cache;
217 struct mlx5_cache_ent *ent = &cache->ent[c];
218 struct mlx5_ib_mr *mr;
219 int err;
220 int i;
221
222 for (i = 0; i < num; i++) {
223 spin_lock_irq(&ent->lock);
224 if (list_empty(&ent->head)) {
225 spin_unlock_irq(&ent->lock);
226 return;
227 }
228 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
229 list_del(&mr->list);
230 ent->cur--;
231 ent->size--;
232 spin_unlock_irq(&ent->lock);
233 err = destroy_mkey(dev, mr);
234 if (err)
235 mlx5_ib_warn(dev, "failed destroy mkey\n");
236 else
237 kfree(mr);
238 }
239 }
240
someone_adding(struct mlx5_mr_cache * cache)241 static int someone_adding(struct mlx5_mr_cache *cache)
242 {
243 int i;
244
245 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
246 if (cache->ent[i].cur < cache->ent[i].limit)
247 return 1;
248 }
249
250 return 0;
251 }
252
__cache_work_func(struct mlx5_cache_ent * ent)253 static void __cache_work_func(struct mlx5_cache_ent *ent)
254 {
255 struct mlx5_ib_dev *dev = ent->dev;
256 struct mlx5_mr_cache *cache = &dev->cache;
257 int i = order2idx(dev, ent->order);
258 int err;
259
260 if (cache->stopped)
261 return;
262
263 ent = &dev->cache.ent[i];
264 if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
265 err = add_keys(dev, i, 1);
266 if (ent->cur < 2 * ent->limit) {
267 if (err == -EAGAIN) {
268 mlx5_ib_dbg(dev, "returned eagain, order %d\n",
269 i + 2);
270 queue_delayed_work(cache->wq, &ent->dwork,
271 msecs_to_jiffies(3));
272 } else if (err) {
273 mlx5_ib_warn(dev, "command failed order %d, err %d\n",
274 i + 2, err);
275 queue_delayed_work(cache->wq, &ent->dwork,
276 msecs_to_jiffies(1000));
277 } else {
278 queue_work(cache->wq, &ent->work);
279 }
280 }
281 } else if (ent->cur > 2 * ent->limit) {
282 /*
283 * The remove_keys() logic is performed as garbage collection
284 * task. Such task is intended to be run when no other active
285 * processes are running.
286 *
287 * The need_resched() will return TRUE if there are user tasks
288 * to be activated in near future.
289 *
290 * In such case, we don't execute remove_keys() and postpone
291 * the garbage collection work to try to run in next cycle,
292 * in order to free CPU resources to other tasks.
293 */
294 if (!need_resched() && !someone_adding(cache) &&
295 time_after(jiffies, cache->last_add + 300 * HZ)) {
296 remove_keys(dev, i, 1);
297 if (ent->cur > ent->limit)
298 queue_work(cache->wq, &ent->work);
299 } else {
300 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
301 }
302 }
303 }
304
delayed_cache_work_func(struct work_struct * work)305 static void delayed_cache_work_func(struct work_struct *work)
306 {
307 struct mlx5_cache_ent *ent;
308
309 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
310 __cache_work_func(ent);
311 }
312
cache_work_func(struct work_struct * work)313 static void cache_work_func(struct work_struct *work)
314 {
315 struct mlx5_cache_ent *ent;
316
317 ent = container_of(work, struct mlx5_cache_ent, work);
318 __cache_work_func(ent);
319 }
320
alloc_cached_mr(struct mlx5_ib_dev * dev,int order)321 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
322 {
323 struct mlx5_mr_cache *cache = &dev->cache;
324 struct mlx5_ib_mr *mr = NULL;
325 struct mlx5_cache_ent *ent;
326 int c;
327 int i;
328
329 c = order2idx(dev, order);
330 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
331 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
332 return NULL;
333 }
334
335 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
336 ent = &cache->ent[i];
337
338 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
339
340 spin_lock_irq(&ent->lock);
341 if (!list_empty(&ent->head)) {
342 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
343 list);
344 list_del(&mr->list);
345 ent->cur--;
346 spin_unlock_irq(&ent->lock);
347 if (ent->cur < ent->limit)
348 queue_work(cache->wq, &ent->work);
349 break;
350 }
351 spin_unlock_irq(&ent->lock);
352
353 queue_work(cache->wq, &ent->work);
354 }
355
356 if (!mr)
357 cache->ent[c].miss++;
358
359 return mr;
360 }
361
free_cached_mr(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)362 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
363 {
364 struct mlx5_mr_cache *cache = &dev->cache;
365 struct mlx5_cache_ent *ent;
366 int shrink = 0;
367 int c;
368
369 c = order2idx(dev, mr->order);
370 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
371 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
372 return;
373 }
374 ent = &cache->ent[c];
375 spin_lock_irq(&ent->lock);
376 list_add_tail(&mr->list, &ent->head);
377 ent->cur++;
378 if (ent->cur > 2 * ent->limit)
379 shrink = 1;
380 spin_unlock_irq(&ent->lock);
381
382 if (shrink)
383 queue_work(cache->wq, &ent->work);
384 }
385
clean_keys(struct mlx5_ib_dev * dev,int c)386 static void clean_keys(struct mlx5_ib_dev *dev, int c)
387 {
388 struct mlx5_mr_cache *cache = &dev->cache;
389 struct mlx5_cache_ent *ent = &cache->ent[c];
390 struct mlx5_ib_mr *mr;
391 int err;
392
393 cancel_delayed_work(&ent->dwork);
394 while (1) {
395 spin_lock_irq(&ent->lock);
396 if (list_empty(&ent->head)) {
397 spin_unlock_irq(&ent->lock);
398 return;
399 }
400 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
401 list_del(&mr->list);
402 ent->cur--;
403 ent->size--;
404 spin_unlock_irq(&ent->lock);
405 err = destroy_mkey(dev, mr);
406 if (err)
407 mlx5_ib_warn(dev, "failed destroy mkey\n");
408 else
409 kfree(mr);
410 }
411 }
412
delay_time_func(unsigned long ctx)413 static void delay_time_func(unsigned long ctx)
414 {
415 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
416
417 dev->fill_delay = 0;
418 }
419
mlx5_mr_cache_init(struct mlx5_ib_dev * dev)420 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
421 {
422 struct mlx5_mr_cache *cache = &dev->cache;
423 struct mlx5_cache_ent *ent;
424 int limit;
425 int i;
426
427 mutex_init(&dev->slow_path_mutex);
428 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
429 if (!cache->wq) {
430 mlx5_ib_warn(dev, "failed to create work queue\n");
431 return -ENOMEM;
432 }
433
434 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
435 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
436 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
437 INIT_LIST_HEAD(&cache->ent[i].head);
438 spin_lock_init(&cache->ent[i].lock);
439
440 ent = &cache->ent[i];
441 INIT_LIST_HEAD(&ent->head);
442 spin_lock_init(&ent->lock);
443 ent->order = i + 2;
444 ent->dev = dev;
445
446 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
447 limit = dev->mdev->profile->mr_cache[i].limit;
448 else
449 limit = 0;
450
451 INIT_WORK(&ent->work, cache_work_func);
452 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
453 ent->limit = limit;
454 queue_work(cache->wq, &ent->work);
455 }
456
457 return 0;
458 }
459
mlx5_mr_cache_cleanup(struct mlx5_ib_dev * dev)460 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
461 {
462 int i;
463
464 dev->cache.stopped = 1;
465 flush_workqueue(dev->cache.wq);
466 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
467
468 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
469 clean_keys(dev, i);
470
471 destroy_workqueue(dev->cache.wq);
472 del_timer_sync(&dev->delay_timer);
473
474 return 0;
475 }
476
mlx5_ib_get_dma_mr(struct ib_pd * pd,int acc)477 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
478 {
479 struct mlx5_ib_dev *dev = to_mdev(pd->device);
480 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
481 struct mlx5_core_dev *mdev = dev->mdev;
482 struct mlx5_ib_mr *mr;
483 void *mkc;
484 u32 *in;
485 int err;
486
487 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
488 if (!mr)
489 return ERR_PTR(-ENOMEM);
490
491 in = kzalloc(inlen, GFP_KERNEL);
492 if (!in) {
493 err = -ENOMEM;
494 goto err_free;
495 }
496
497 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
498
499 MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
500 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
501 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
502 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
503 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
504 MLX5_SET(mkc, mkc, lr, 1);
505
506 MLX5_SET(mkc, mkc, length64, 1);
507 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
508 MLX5_SET(mkc, mkc, qpn, 0xffffff);
509 MLX5_SET64(mkc, mkc, start_addr, 0);
510
511 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
512 if (err)
513 goto err_in;
514
515 kfree(in);
516 mr->ibmr.lkey = mr->mmkey.key;
517 mr->ibmr.rkey = mr->mmkey.key;
518 mr->umem = NULL;
519
520 return &mr->ibmr;
521
522 err_in:
523 kfree(in);
524
525 err_free:
526 kfree(mr);
527
528 return ERR_PTR(err);
529 }
530
get_octo_len(u64 addr,u64 len,int page_size)531 static int get_octo_len(u64 addr, u64 len, int page_size)
532 {
533 u64 offset;
534 int npages;
535
536 offset = addr & (page_size - 1);
537 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
538 return (npages + 1) / 2;
539 }
540
use_umr(int order)541 static int use_umr(int order)
542 {
543 return order <= MLX5_MAX_UMR_SHIFT;
544 }
545
dma_map_mr_pas(struct mlx5_ib_dev * dev,struct ib_umem * umem,int npages,int page_shift,int * size,__be64 ** mr_pas,dma_addr_t * dma)546 static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
547 int npages, int page_shift, int *size,
548 __be64 **mr_pas, dma_addr_t *dma)
549 {
550 __be64 *pas;
551 struct device *ddev = dev->ib_dev.dma_device;
552
553 /*
554 * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
555 * To avoid copying garbage after the pas array, we allocate
556 * a little more.
557 */
558 *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
559 *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
560 if (!(*mr_pas))
561 return -ENOMEM;
562
563 pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
564 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
565 /* Clear padding after the actual pages. */
566 memset(pas + npages, 0, *size - npages * sizeof(u64));
567
568 *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
569 if (dma_mapping_error(ddev, *dma)) {
570 kfree(*mr_pas);
571 return -ENOMEM;
572 }
573
574 return 0;
575 }
576
prep_umr_wqe_common(struct ib_pd * pd,struct mlx5_umr_wr * umrwr,struct ib_sge * sg,u64 dma,int n,u32 key,int page_shift)577 static void prep_umr_wqe_common(struct ib_pd *pd, struct mlx5_umr_wr *umrwr,
578 struct ib_sge *sg, u64 dma, int n, u32 key,
579 int page_shift)
580 {
581 struct mlx5_ib_dev *dev = to_mdev(pd->device);
582
583 sg->addr = dma;
584 sg->length = ALIGN(sizeof(u64) * n, 64);
585 sg->lkey = dev->umrc.pd->local_dma_lkey;
586
587 umrwr->wr.next = NULL;
588 umrwr->wr.sg_list = sg;
589 if (n)
590 umrwr->wr.num_sge = 1;
591 else
592 umrwr->wr.num_sge = 0;
593
594 umrwr->wr.opcode = MLX5_IB_WR_UMR;
595
596 umrwr->npages = n;
597 umrwr->page_shift = page_shift;
598 umrwr->mkey = key;
599 }
600
prep_umr_reg_wqe(struct ib_pd * pd,struct mlx5_umr_wr * umrwr,struct ib_sge * sg,u64 dma,int n,u32 key,int page_shift,u64 virt_addr,u64 len,int access_flags)601 static void prep_umr_reg_wqe(struct ib_pd *pd, struct mlx5_umr_wr *umrwr,
602 struct ib_sge *sg, u64 dma, int n, u32 key,
603 int page_shift, u64 virt_addr, u64 len,
604 int access_flags)
605 {
606 prep_umr_wqe_common(pd, umrwr, sg, dma, n, key, page_shift);
607
608 umrwr->wr.send_flags = 0;
609
610 umrwr->target.virt_addr = virt_addr;
611 umrwr->length = len;
612 umrwr->access_flags = access_flags;
613 umrwr->pd = pd;
614 }
615
prep_umr_unreg_wqe(struct mlx5_ib_dev * dev,struct mlx5_umr_wr * umrwr,u32 key)616 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
617 struct mlx5_umr_wr *umrwr, u32 key)
618 {
619 umrwr->wr.send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
620 umrwr->wr.opcode = MLX5_IB_WR_UMR;
621 umrwr->mkey = key;
622 }
623
mr_umem_get(struct ib_pd * pd,u64 start,u64 length,int access_flags,int * npages,int * page_shift,int * ncont,int * order)624 static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
625 int access_flags, int *npages,
626 int *page_shift, int *ncont, int *order)
627 {
628 struct mlx5_ib_dev *dev = to_mdev(pd->device);
629 struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
630 access_flags, 0);
631 if (IS_ERR(umem)) {
632 mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
633 return (void *)umem;
634 }
635
636 mlx5_ib_cont_pages(umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, page_shift, ncont, order);
637 if (!*npages) {
638 mlx5_ib_warn(dev, "avoid zero region\n");
639 ib_umem_release(umem);
640 return ERR_PTR(-EINVAL);
641 }
642
643 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
644 *npages, *ncont, *order, *page_shift);
645
646 return umem;
647 }
648
mlx5_ib_umr_done(struct ib_cq * cq,struct ib_wc * wc)649 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
650 {
651 struct mlx5_ib_umr_context *context =
652 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
653
654 context->status = wc->status;
655 complete(&context->done);
656 }
657
mlx5_ib_init_umr_context(struct mlx5_ib_umr_context * context)658 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
659 {
660 context->cqe.done = mlx5_ib_umr_done;
661 context->status = -1;
662 init_completion(&context->done);
663 }
664
reg_umr(struct ib_pd * pd,struct ib_umem * umem,u64 virt_addr,u64 len,int npages,int page_shift,int order,int access_flags)665 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
666 u64 virt_addr, u64 len, int npages,
667 int page_shift, int order, int access_flags)
668 {
669 struct mlx5_ib_dev *dev = to_mdev(pd->device);
670 struct device *ddev = dev->ib_dev.dma_device;
671 struct umr_common *umrc = &dev->umrc;
672 struct mlx5_ib_umr_context umr_context;
673 struct mlx5_umr_wr umrwr = {};
674 const struct ib_send_wr *bad;
675 struct mlx5_ib_mr *mr;
676 struct ib_sge sg;
677 int size;
678 __be64 *mr_pas;
679 dma_addr_t dma;
680 int err = 0;
681 int i;
682
683 for (i = 0; i < 1; i++) {
684 mr = alloc_cached_mr(dev, order);
685 if (mr)
686 break;
687
688 err = add_keys(dev, order2idx(dev, order), 1);
689 if (err && err != -EAGAIN) {
690 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
691 break;
692 }
693 }
694
695 if (!mr)
696 return ERR_PTR(-EAGAIN);
697
698 err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
699 &dma);
700 if (err)
701 goto free_mr;
702
703 mlx5_ib_init_umr_context(&umr_context);
704
705 umrwr.wr.wr_cqe = &umr_context.cqe;
706 prep_umr_reg_wqe(pd, &umrwr, &sg, dma, npages, mr->mmkey.key,
707 page_shift, virt_addr, len, access_flags);
708
709 down(&umrc->sem);
710 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
711 if (err) {
712 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
713 goto unmap_dma;
714 } else {
715 wait_for_completion(&umr_context.done);
716 if (umr_context.status != IB_WC_SUCCESS) {
717 mlx5_ib_warn(dev, "reg umr failed\n");
718 err = -EFAULT;
719 }
720 }
721
722 mr->mmkey.iova = virt_addr;
723 mr->mmkey.size = len;
724 mr->mmkey.pd = to_mpd(pd)->pdn;
725
726 mr->live = 1;
727
728 unmap_dma:
729 up(&umrc->sem);
730 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
731
732 kfree(mr_pas);
733
734 free_mr:
735 if (err) {
736 free_cached_mr(dev, mr);
737 return ERR_PTR(err);
738 }
739
740 return mr;
741 }
742
743 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
mlx5_ib_update_mtt(struct mlx5_ib_mr * mr,u64 start_page_index,int npages,int zap)744 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
745 int zap)
746 {
747 struct mlx5_ib_dev *dev = mr->dev;
748 struct device *ddev = dev->ib_dev.dma_device;
749 struct umr_common *umrc = &dev->umrc;
750 struct mlx5_ib_umr_context umr_context;
751 struct ib_umem *umem = mr->umem;
752 int size;
753 __be64 *pas;
754 dma_addr_t dma;
755 const struct ib_send_wr *bad;
756 struct mlx5_umr_wr wr;
757 struct ib_sge sg;
758 int err = 0;
759 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
760 const int page_index_mask = page_index_alignment - 1;
761 size_t pages_mapped = 0;
762 size_t pages_to_map = 0;
763 size_t pages_iter = 0;
764 int use_emergency_buf = 0;
765
766 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
767 * so we need to align the offset and length accordingly */
768 if (start_page_index & page_index_mask) {
769 npages += start_page_index & page_index_mask;
770 start_page_index &= ~page_index_mask;
771 }
772
773 pages_to_map = ALIGN(npages, page_index_alignment);
774
775 if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
776 return -EINVAL;
777
778 size = sizeof(u64) * pages_to_map;
779 size = min_t(int, PAGE_SIZE, size);
780 /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
781 * code, when we are called from an invalidation. The pas buffer must
782 * be 2k-aligned for Connect-IB. */
783 pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
784 if (!pas) {
785 mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
786 pas = mlx5_ib_update_mtt_emergency_buffer;
787 size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
788 use_emergency_buf = 1;
789 mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
790 memset(pas, 0, size);
791 }
792 pages_iter = size / sizeof(u64);
793 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
794 if (dma_mapping_error(ddev, dma)) {
795 mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
796 err = -ENOMEM;
797 goto free_pas;
798 }
799
800 for (pages_mapped = 0;
801 pages_mapped < pages_to_map && !err;
802 pages_mapped += pages_iter, start_page_index += pages_iter) {
803 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
804
805 npages = min_t(size_t,
806 pages_iter,
807 ib_umem_num_pages(umem) - start_page_index);
808
809 if (!zap) {
810 __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
811 start_page_index, npages, pas,
812 MLX5_IB_MTT_PRESENT);
813 /* Clear padding after the pages brought from the
814 * umem. */
815 memset(pas + npages, 0, size - npages * sizeof(u64));
816 }
817
818 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
819
820 mlx5_ib_init_umr_context(&umr_context);
821
822 memset(&wr, 0, sizeof(wr));
823 wr.wr.wr_cqe = &umr_context.cqe;
824
825 sg.addr = dma;
826 sg.length = ALIGN(npages * sizeof(u64),
827 MLX5_UMR_MTT_ALIGNMENT);
828 sg.lkey = dev->umrc.pd->local_dma_lkey;
829
830 wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
831 MLX5_IB_SEND_UMR_UPDATE_MTT;
832 wr.wr.sg_list = &sg;
833 wr.wr.num_sge = 1;
834 wr.wr.opcode = MLX5_IB_WR_UMR;
835 wr.npages = sg.length / sizeof(u64);
836 wr.page_shift = PAGE_SHIFT;
837 wr.mkey = mr->mmkey.key;
838 wr.target.offset = start_page_index;
839
840 down(&umrc->sem);
841 err = ib_post_send(umrc->qp, &wr.wr, &bad);
842 if (err) {
843 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
844 } else {
845 wait_for_completion(&umr_context.done);
846 if (umr_context.status != IB_WC_SUCCESS) {
847 mlx5_ib_err(dev, "UMR completion failed, code %d\n",
848 umr_context.status);
849 err = -EFAULT;
850 }
851 }
852 up(&umrc->sem);
853 }
854 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
855
856 free_pas:
857 if (!use_emergency_buf)
858 free_page((unsigned long)pas);
859 else
860 mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
861
862 return err;
863 }
864 #endif
865
866 /*
867 * If ibmr is NULL it will be allocated by reg_create.
868 * Else, the given ibmr will be used.
869 */
reg_create(struct ib_mr * ibmr,struct ib_pd * pd,u64 virt_addr,u64 length,struct ib_umem * umem,int npages,int page_shift,int access_flags)870 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
871 u64 virt_addr, u64 length,
872 struct ib_umem *umem, int npages,
873 int page_shift, int access_flags)
874 {
875 struct mlx5_ib_dev *dev = to_mdev(pd->device);
876 struct mlx5_ib_mr *mr;
877 __be64 *pas;
878 void *mkc;
879 int inlen;
880 u32 *in;
881 int err;
882 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
883
884 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
885 if (!mr)
886 return ERR_PTR(-ENOMEM);
887
888 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
889 sizeof(*pas) * ((npages + 1) / 2) * 2;
890 in = mlx5_vzalloc(inlen);
891 if (!in) {
892 err = -ENOMEM;
893 goto err_1;
894 }
895 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
896 mlx5_ib_populate_pas(dev, umem, page_shift, pas,
897 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
898
899 /* The pg_access bit allows setting the access flags
900 * in the page list submitted with the command. */
901 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
902
903 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
904 MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT);
905 MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
906 MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
907 MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
908 MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
909 MLX5_SET(mkc, mkc, lr, 1);
910
911 MLX5_SET64(mkc, mkc, start_addr, virt_addr);
912 MLX5_SET64(mkc, mkc, len, length);
913 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
914 MLX5_SET(mkc, mkc, bsf_octword_size, 0);
915 MLX5_SET(mkc, mkc, translations_octword_size,
916 get_octo_len(virt_addr, length, 1 << page_shift));
917 MLX5_SET(mkc, mkc, log_page_size, page_shift);
918 MLX5_SET(mkc, mkc, qpn, 0xffffff);
919 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
920 get_octo_len(virt_addr, length, 1 << page_shift));
921
922 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
923 if (err) {
924 mlx5_ib_warn(dev, "create mkey failed\n");
925 goto err_2;
926 }
927 mr->umem = umem;
928 mr->dev = dev;
929 mr->live = 1;
930 kvfree(in);
931
932 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
933
934 return mr;
935
936 err_2:
937 kvfree(in);
938
939 err_1:
940 if (!ibmr)
941 kfree(mr);
942
943 return ERR_PTR(err);
944 }
945
set_mr_fileds(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr,int npages,u64 length,int access_flags)946 static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
947 int npages, u64 length, int access_flags)
948 {
949 mr->npages = npages;
950 atomic_add(npages, &dev->mdev->priv.reg_pages);
951 mr->ibmr.lkey = mr->mmkey.key;
952 mr->ibmr.rkey = mr->mmkey.key;
953 mr->ibmr.length = length;
954 mr->access_flags = access_flags;
955 }
956
mlx5_ib_reg_user_mr(struct ib_pd * pd,u64 start,u64 length,u64 virt_addr,int access_flags,struct ib_udata * udata)957 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
958 u64 virt_addr, int access_flags,
959 struct ib_udata *udata)
960 {
961 struct mlx5_ib_dev *dev = to_mdev(pd->device);
962 struct mlx5_ib_mr *mr = NULL;
963 struct ib_umem *umem;
964 int page_shift;
965 int npages;
966 int ncont;
967 int order;
968 int err;
969
970 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
971 (long long)start, (long long)virt_addr, (long long)length, access_flags);
972 umem = mr_umem_get(pd, start, length, access_flags, &npages,
973 &page_shift, &ncont, &order);
974
975 if (IS_ERR(umem))
976 return (void *)umem;
977
978 if (use_umr(order)) {
979 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
980 order, access_flags);
981 if (PTR_ERR(mr) == -EAGAIN) {
982 mlx5_ib_dbg(dev, "cache empty for order %d", order);
983 mr = NULL;
984 }
985 } else if (access_flags & IB_ACCESS_ON_DEMAND) {
986 err = -EINVAL;
987 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
988 goto error;
989 }
990
991 if (!mr) {
992 mutex_lock(&dev->slow_path_mutex);
993 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
994 page_shift, access_flags);
995 mutex_unlock(&dev->slow_path_mutex);
996 }
997
998 if (IS_ERR(mr)) {
999 err = PTR_ERR(mr);
1000 goto error;
1001 }
1002
1003 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1004
1005 mr->umem = umem;
1006 set_mr_fileds(dev, mr, npages, length, access_flags);
1007
1008 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1009 update_odp_mr(mr);
1010 #endif
1011
1012 return &mr->ibmr;
1013
1014 error:
1015 ib_umem_release(umem);
1016 return ERR_PTR(err);
1017 }
1018
unreg_umr(struct mlx5_ib_dev * dev,struct mlx5_ib_mr * mr)1019 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1020 {
1021 struct mlx5_core_dev *mdev = dev->mdev;
1022 struct umr_common *umrc = &dev->umrc;
1023 struct mlx5_ib_umr_context umr_context;
1024 struct mlx5_umr_wr umrwr = {};
1025 const struct ib_send_wr *bad;
1026 int err;
1027
1028 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1029 return 0;
1030
1031 mlx5_ib_init_umr_context(&umr_context);
1032
1033 umrwr.wr.wr_cqe = &umr_context.cqe;
1034 prep_umr_unreg_wqe(dev, &umrwr, mr->mmkey.key);
1035
1036 down(&umrc->sem);
1037 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1038 if (err) {
1039 up(&umrc->sem);
1040 mlx5_ib_dbg(dev, "err %d\n", err);
1041 goto error;
1042 } else {
1043 wait_for_completion(&umr_context.done);
1044 up(&umrc->sem);
1045 }
1046 if (umr_context.status != IB_WC_SUCCESS) {
1047 mlx5_ib_warn(dev, "unreg umr failed\n");
1048 err = -EFAULT;
1049 goto error;
1050 }
1051 return 0;
1052
1053 error:
1054 return err;
1055 }
1056
rereg_umr(struct ib_pd * pd,struct mlx5_ib_mr * mr,u64 virt_addr,u64 length,int npages,int page_shift,int order,int access_flags,int flags)1057 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
1058 u64 length, int npages, int page_shift, int order,
1059 int access_flags, int flags)
1060 {
1061 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1062 struct device *ddev = dev->ib_dev.dma_device;
1063 struct mlx5_ib_umr_context umr_context;
1064 const struct ib_send_wr *bad;
1065 struct mlx5_umr_wr umrwr = {};
1066 struct ib_sge sg;
1067 struct umr_common *umrc = &dev->umrc;
1068 dma_addr_t dma = 0;
1069 __be64 *mr_pas = NULL;
1070 int size;
1071 int err;
1072
1073 mlx5_ib_init_umr_context(&umr_context);
1074
1075 umrwr.wr.wr_cqe = &umr_context.cqe;
1076 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1077
1078 if (flags & IB_MR_REREG_TRANS) {
1079 err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
1080 &mr_pas, &dma);
1081 if (err)
1082 return err;
1083
1084 umrwr.target.virt_addr = virt_addr;
1085 umrwr.length = length;
1086 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1087 }
1088
1089 prep_umr_wqe_common(pd, &umrwr, &sg, dma, npages, mr->mmkey.key,
1090 page_shift);
1091
1092 if (flags & IB_MR_REREG_PD) {
1093 umrwr.pd = pd;
1094 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
1095 }
1096
1097 if (flags & IB_MR_REREG_ACCESS) {
1098 umrwr.access_flags = access_flags;
1099 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
1100 }
1101
1102 /* post send request to UMR QP */
1103 down(&umrc->sem);
1104 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1105
1106 if (err) {
1107 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
1108 } else {
1109 wait_for_completion(&umr_context.done);
1110 if (umr_context.status != IB_WC_SUCCESS) {
1111 mlx5_ib_warn(dev, "reg umr failed (%u)\n",
1112 umr_context.status);
1113 err = -EFAULT;
1114 }
1115 }
1116
1117 up(&umrc->sem);
1118 if (flags & IB_MR_REREG_TRANS) {
1119 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1120 kfree(mr_pas);
1121 }
1122 return err;
1123 }
1124
mlx5_ib_rereg_user_mr(struct ib_mr * ib_mr,int flags,u64 start,u64 length,u64 virt_addr,int new_access_flags,struct ib_pd * new_pd,struct ib_udata * udata)1125 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1126 u64 length, u64 virt_addr, int new_access_flags,
1127 struct ib_pd *new_pd, struct ib_udata *udata)
1128 {
1129 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1130 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1131 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1132 int access_flags = flags & IB_MR_REREG_ACCESS ?
1133 new_access_flags :
1134 mr->access_flags;
1135 u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
1136 u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
1137 int page_shift = 0;
1138 int npages = 0;
1139 int ncont = 0;
1140 int order = 0;
1141 int err;
1142
1143 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1144 (long long)start, (long long)virt_addr, (long long)length, access_flags);
1145
1146 if (flags != IB_MR_REREG_PD) {
1147 /*
1148 * Replace umem. This needs to be done whether or not UMR is
1149 * used.
1150 */
1151 flags |= IB_MR_REREG_TRANS;
1152 ib_umem_release(mr->umem);
1153 mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
1154 &page_shift, &ncont, &order);
1155 if (IS_ERR(mr->umem)) {
1156 err = PTR_ERR(mr->umem);
1157 mr->umem = NULL;
1158 return err;
1159 }
1160 }
1161
1162 if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
1163 /*
1164 * UMR can't be used - MKey needs to be replaced.
1165 */
1166 if (mr->umred) {
1167 err = unreg_umr(dev, mr);
1168 if (err)
1169 mlx5_ib_warn(dev, "Failed to unregister MR\n");
1170 } else {
1171 err = destroy_mkey(dev, mr);
1172 if (err)
1173 mlx5_ib_warn(dev, "Failed to destroy MKey\n");
1174 }
1175 if (err)
1176 return err;
1177
1178 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1179 page_shift, access_flags);
1180
1181 if (IS_ERR(mr))
1182 return PTR_ERR(mr);
1183
1184 mr->umred = 0;
1185 } else {
1186 /*
1187 * Send a UMR WQE
1188 */
1189 err = rereg_umr(pd, mr, addr, len, npages, page_shift,
1190 order, access_flags, flags);
1191 if (err) {
1192 mlx5_ib_warn(dev, "Failed to rereg UMR\n");
1193 return err;
1194 }
1195 }
1196
1197 if (flags & IB_MR_REREG_PD) {
1198 ib_mr->pd = pd;
1199 mr->mmkey.pd = to_mpd(pd)->pdn;
1200 }
1201
1202 if (flags & IB_MR_REREG_ACCESS)
1203 mr->access_flags = access_flags;
1204
1205 if (flags & IB_MR_REREG_TRANS) {
1206 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1207 set_mr_fileds(dev, mr, npages, len, access_flags);
1208 mr->mmkey.iova = addr;
1209 mr->mmkey.size = len;
1210 }
1211 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1212 update_odp_mr(mr);
1213 #endif
1214
1215 return 0;
1216 }
1217
1218 static int
mlx5_alloc_priv_descs(struct ib_device * device,struct mlx5_ib_mr * mr,int ndescs,int desc_size)1219 mlx5_alloc_priv_descs(struct ib_device *device,
1220 struct mlx5_ib_mr *mr,
1221 int ndescs,
1222 int desc_size)
1223 {
1224 int size = ndescs * desc_size;
1225 int add_size;
1226 int ret;
1227
1228 add_size = max_t(int, MLX5_UMR_ALIGN - 1, 0);
1229
1230 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1231 if (!mr->descs_alloc)
1232 return -ENOMEM;
1233
1234 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1235
1236 mr->desc_map = dma_map_single(device->dma_device, mr->descs,
1237 size, DMA_TO_DEVICE);
1238 if (dma_mapping_error(device->dma_device, mr->desc_map)) {
1239 ret = -ENOMEM;
1240 goto err;
1241 }
1242
1243 return 0;
1244 err:
1245 kfree(mr->descs_alloc);
1246
1247 return ret;
1248 }
1249
1250 static void
mlx5_free_priv_descs(struct mlx5_ib_mr * mr)1251 mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1252 {
1253 if (mr->descs) {
1254 struct ib_device *device = mr->ibmr.device;
1255 int size = mr->max_descs * mr->desc_size;
1256
1257 dma_unmap_single(device->dma_device, mr->desc_map,
1258 size, DMA_TO_DEVICE);
1259 kfree(mr->descs_alloc);
1260 mr->descs = NULL;
1261 }
1262 }
1263
clean_mr(struct mlx5_ib_mr * mr)1264 static int clean_mr(struct mlx5_ib_mr *mr)
1265 {
1266 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1267 int umred = mr->umred;
1268 int err;
1269
1270 if (mr->sig) {
1271 if (mlx5_core_destroy_psv(dev->mdev,
1272 mr->sig->psv_memory.psv_idx))
1273 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1274 mr->sig->psv_memory.psv_idx);
1275 if (mlx5_core_destroy_psv(dev->mdev,
1276 mr->sig->psv_wire.psv_idx))
1277 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1278 mr->sig->psv_wire.psv_idx);
1279 kfree(mr->sig);
1280 mr->sig = NULL;
1281 }
1282
1283 mlx5_free_priv_descs(mr);
1284
1285 if (!umred) {
1286 u32 key = mr->mmkey.key;
1287
1288 err = destroy_mkey(dev, mr);
1289 kfree(mr);
1290 if (err) {
1291 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1292 key, err);
1293 return err;
1294 }
1295 } else {
1296 err = unreg_umr(dev, mr);
1297 if (err) {
1298 mlx5_ib_warn(dev, "failed unregister\n");
1299 return err;
1300 }
1301 free_cached_mr(dev, mr);
1302 }
1303
1304 return 0;
1305 }
1306
mlx5_ib_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)1307 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1308 {
1309 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1310 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1311 int npages = mr->npages;
1312 struct ib_umem *umem = mr->umem;
1313
1314 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1315 if (umem && umem->odp_data) {
1316 /* Prevent new page faults from succeeding */
1317 mr->live = 0;
1318 /* Wait for all running page-fault handlers to finish. */
1319 synchronize_srcu(&dev->mr_srcu);
1320 /* Destroy all page mappings */
1321 mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1322 ib_umem_end(umem));
1323 /*
1324 * We kill the umem before the MR for ODP,
1325 * so that there will not be any invalidations in
1326 * flight, looking at the *mr struct.
1327 */
1328 ib_umem_release(umem);
1329 atomic_sub(npages, &dev->mdev->priv.reg_pages);
1330
1331 /* Avoid double-freeing the umem. */
1332 umem = NULL;
1333 }
1334 #endif
1335
1336 clean_mr(mr);
1337
1338 if (umem) {
1339 ib_umem_release(umem);
1340 atomic_sub(npages, &dev->mdev->priv.reg_pages);
1341 }
1342
1343 return 0;
1344 }
1345
mlx5_ib_alloc_mr(struct ib_pd * pd,enum ib_mr_type mr_type,u32 max_num_sg,struct ib_udata * udata)1346 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1347 enum ib_mr_type mr_type,
1348 u32 max_num_sg, struct ib_udata *udata)
1349 {
1350 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1351 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1352 int ndescs = ALIGN(max_num_sg, 4);
1353 struct mlx5_ib_mr *mr;
1354 void *mkc;
1355 u32 *in;
1356 int err;
1357
1358 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1359 if (!mr)
1360 return ERR_PTR(-ENOMEM);
1361
1362 in = kzalloc(inlen, GFP_KERNEL);
1363 if (!in) {
1364 err = -ENOMEM;
1365 goto err_free;
1366 }
1367
1368 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1369 MLX5_SET(mkc, mkc, free, 1);
1370 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1371 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1372 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1373
1374 if (mr_type == IB_MR_TYPE_MEM_REG) {
1375 mr->access_mode = MLX5_ACCESS_MODE_MTT;
1376 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
1377 err = mlx5_alloc_priv_descs(pd->device, mr,
1378 ndescs, sizeof(u64));
1379 if (err)
1380 goto err_free_in;
1381
1382 mr->desc_size = sizeof(u64);
1383 mr->max_descs = ndescs;
1384 } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
1385 mr->access_mode = MLX5_ACCESS_MODE_KLM;
1386
1387 err = mlx5_alloc_priv_descs(pd->device, mr,
1388 ndescs, sizeof(struct mlx5_klm));
1389 if (err)
1390 goto err_free_in;
1391 mr->desc_size = sizeof(struct mlx5_klm);
1392 mr->max_descs = ndescs;
1393 } else if (mr_type == IB_MR_TYPE_INTEGRITY) {
1394 u32 psv_index[2];
1395
1396 MLX5_SET(mkc, mkc, bsf_en, 1);
1397 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1398 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1399 if (!mr->sig) {
1400 err = -ENOMEM;
1401 goto err_free_in;
1402 }
1403
1404 /* create mem & wire PSVs */
1405 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1406 2, psv_index);
1407 if (err)
1408 goto err_free_sig;
1409
1410 mr->access_mode = MLX5_ACCESS_MODE_KLM;
1411 mr->sig->psv_memory.psv_idx = psv_index[0];
1412 mr->sig->psv_wire.psv_idx = psv_index[1];
1413
1414 mr->sig->sig_status_checked = true;
1415 mr->sig->sig_err_exists = false;
1416 /* Next UMR, Arm SIGERR */
1417 ++mr->sig->sigerr_count;
1418 } else {
1419 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1420 err = -EINVAL;
1421 goto err_free_in;
1422 }
1423
1424 MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
1425 MLX5_SET(mkc, mkc, umr_en, 1);
1426
1427 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1428 if (err)
1429 goto err_destroy_psv;
1430
1431 mr->ibmr.lkey = mr->mmkey.key;
1432 mr->ibmr.rkey = mr->mmkey.key;
1433 mr->umem = NULL;
1434 kfree(in);
1435
1436 return &mr->ibmr;
1437
1438 err_destroy_psv:
1439 if (mr->sig) {
1440 if (mlx5_core_destroy_psv(dev->mdev,
1441 mr->sig->psv_memory.psv_idx))
1442 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1443 mr->sig->psv_memory.psv_idx);
1444 if (mlx5_core_destroy_psv(dev->mdev,
1445 mr->sig->psv_wire.psv_idx))
1446 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1447 mr->sig->psv_wire.psv_idx);
1448 }
1449 mlx5_free_priv_descs(mr);
1450 err_free_sig:
1451 kfree(mr->sig);
1452 err_free_in:
1453 kfree(in);
1454 err_free:
1455 kfree(mr);
1456 return ERR_PTR(err);
1457 }
1458
mlx5_ib_alloc_mw(struct ib_pd * pd,enum ib_mw_type type,struct ib_udata * udata)1459 struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
1460 struct ib_udata *udata)
1461 {
1462 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1463 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1464 struct mlx5_ib_mw *mw = NULL;
1465 u32 *in = NULL;
1466 void *mkc;
1467 int ndescs;
1468 int err;
1469 struct mlx5_ib_alloc_mw req = {};
1470 struct {
1471 __u32 comp_mask;
1472 __u32 response_length;
1473 } resp = {};
1474
1475 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1476 if (err)
1477 return ERR_PTR(err);
1478
1479 if (req.comp_mask || req.reserved1 || req.reserved2)
1480 return ERR_PTR(-EOPNOTSUPP);
1481
1482 if (udata->inlen > sizeof(req) &&
1483 !ib_is_udata_cleared(udata, sizeof(req),
1484 udata->inlen - sizeof(req)))
1485 return ERR_PTR(-EOPNOTSUPP);
1486
1487 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1488
1489 mw = kzalloc(sizeof(*mw), GFP_KERNEL);
1490 in = kzalloc(inlen, GFP_KERNEL);
1491 if (!mw || !in) {
1492 err = -ENOMEM;
1493 goto free;
1494 }
1495
1496 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1497
1498 MLX5_SET(mkc, mkc, free, 1);
1499 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1500 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1501 MLX5_SET(mkc, mkc, umr_en, 1);
1502 MLX5_SET(mkc, mkc, lr, 1);
1503 MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_KLM);
1504 MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
1505 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1506
1507 err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
1508 if (err)
1509 goto free;
1510
1511 mw->ibmw.rkey = mw->mmkey.key;
1512
1513 resp.response_length = min(offsetof(typeof(resp), response_length) +
1514 sizeof(resp.response_length), udata->outlen);
1515 if (resp.response_length) {
1516 err = ib_copy_to_udata(udata, &resp, resp.response_length);
1517 if (err) {
1518 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
1519 goto free;
1520 }
1521 }
1522
1523 kfree(in);
1524 return &mw->ibmw;
1525
1526 free:
1527 kfree(mw);
1528 kfree(in);
1529 return ERR_PTR(err);
1530 }
1531
mlx5_ib_dealloc_mw(struct ib_mw * mw)1532 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1533 {
1534 struct mlx5_ib_mw *mmw = to_mmw(mw);
1535 int err;
1536
1537 err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
1538 &mmw->mmkey);
1539 if (!err)
1540 kfree(mmw);
1541 return err;
1542 }
1543
mlx5_ib_check_mr_status(struct ib_mr * ibmr,u32 check_mask,struct ib_mr_status * mr_status)1544 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1545 struct ib_mr_status *mr_status)
1546 {
1547 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1548 int ret = 0;
1549
1550 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1551 pr_err("Invalid status check mask\n");
1552 ret = -EINVAL;
1553 goto done;
1554 }
1555
1556 mr_status->fail_status = 0;
1557 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1558 if (!mmr->sig) {
1559 ret = -EINVAL;
1560 pr_err("signature status check requested on a non-signature enabled MR\n");
1561 goto done;
1562 }
1563
1564 mmr->sig->sig_status_checked = true;
1565 if (!mmr->sig->sig_err_exists)
1566 goto done;
1567
1568 if (ibmr->lkey == mmr->sig->err_item.key)
1569 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1570 sizeof(mr_status->sig_err));
1571 else {
1572 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1573 mr_status->sig_err.sig_err_offset = 0;
1574 mr_status->sig_err.key = mmr->sig->err_item.key;
1575 }
1576
1577 mmr->sig->sig_err_exists = false;
1578 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1579 }
1580
1581 done:
1582 return ret;
1583 }
1584
1585 static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr * mr,struct scatterlist * sgl,unsigned short sg_nents,unsigned int * sg_offset_p)1586 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
1587 struct scatterlist *sgl,
1588 unsigned short sg_nents,
1589 unsigned int *sg_offset_p)
1590 {
1591 struct scatterlist *sg = sgl;
1592 struct mlx5_klm *klms = mr->descs;
1593 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1594 u32 lkey = mr->ibmr.pd->local_dma_lkey;
1595 int i;
1596
1597 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
1598 mr->ibmr.length = 0;
1599 mr->ndescs = sg_nents;
1600
1601 for_each_sg(sgl, sg, sg_nents, i) {
1602 if (unlikely(i > mr->max_descs))
1603 break;
1604 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
1605 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
1606 klms[i].key = cpu_to_be32(lkey);
1607 mr->ibmr.length += sg_dma_len(sg);
1608
1609 sg_offset = 0;
1610 }
1611
1612 if (sg_offset_p)
1613 *sg_offset_p = sg_offset;
1614
1615 return i;
1616 }
1617
mlx5_set_page(struct ib_mr * ibmr,u64 addr)1618 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1619 {
1620 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1621 __be64 *descs;
1622
1623 if (unlikely(mr->ndescs == mr->max_descs))
1624 return -ENOMEM;
1625
1626 descs = mr->descs;
1627 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1628
1629 return 0;
1630 }
1631
mlx5_ib_map_mr_sg(struct ib_mr * ibmr,struct scatterlist * sg,int sg_nents,unsigned int * sg_offset)1632 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1633 unsigned int *sg_offset)
1634 {
1635 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1636 int n;
1637
1638 mr->ndescs = 0;
1639
1640 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1641 mr->desc_size * mr->max_descs,
1642 DMA_TO_DEVICE);
1643
1644 if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
1645 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
1646 else
1647 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
1648 mlx5_set_page);
1649
1650 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1651 mr->desc_size * mr->max_descs,
1652 DMA_TO_DEVICE);
1653
1654 return n;
1655 }
1656