1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include <rdma/ib_umem_odp.h> 41 #include <rdma/ib_verbs.h> 42 #include "mlx5_ib.h" 43 #include "user.h" 44 45 enum { 46 MAX_PENDING_REG_MR = 8, 47 }; 48 49 #define MLX5_UMR_ALIGN 2048 50 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 51 static __be64 mlx5_ib_update_mtt_emergency_buffer[ 52 MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] 53 __aligned(MLX5_UMR_ALIGN); 54 static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); 55 #endif 56 57 static int clean_mr(struct mlx5_ib_mr *mr); 58 59 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 60 { 61 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 62 63 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 64 /* Wait until all page fault handlers using the mr complete. */ 65 synchronize_srcu(&dev->mr_srcu); 66 #endif 67 68 return err; 69 } 70 71 static int order2idx(struct mlx5_ib_dev *dev, int order) 72 { 73 struct mlx5_mr_cache *cache = &dev->cache; 74 75 if (order < cache->ent[0].order) 76 return 0; 77 else 78 return order - cache->ent[0].order; 79 } 80 81 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) 82 { 83 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= 84 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); 85 } 86 87 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 88 static void update_odp_mr(struct mlx5_ib_mr *mr) 89 { 90 if (mr->umem->odp_data) { 91 /* 92 * This barrier prevents the compiler from moving the 93 * setting of umem->odp_data->private to point to our 94 * MR, before reg_umr finished, to ensure that the MR 95 * initialization have finished before starting to 96 * handle invalidations. 97 */ 98 smp_wmb(); 99 mr->umem->odp_data->private = mr; 100 /* 101 * Make sure we will see the new 102 * umem->odp_data->private value in the invalidation 103 * routines, before we can get page faults on the 104 * MR. Page faults can happen once we put the MR in 105 * the tree, below this line. Without the barrier, 106 * there can be a fault handling and an invalidation 107 * before umem->odp_data->private == mr is visible to 108 * the invalidation handler. 109 */ 110 smp_wmb(); 111 } 112 } 113 #endif 114 115 static void reg_mr_callback(int status, void *context) 116 { 117 struct mlx5_ib_mr *mr = context; 118 struct mlx5_ib_dev *dev = mr->dev; 119 struct mlx5_mr_cache *cache = &dev->cache; 120 int c = order2idx(dev, mr->order); 121 struct mlx5_cache_ent *ent = &cache->ent[c]; 122 u8 key; 123 unsigned long flags; 124 struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; 125 int err; 126 127 spin_lock_irqsave(&ent->lock, flags); 128 ent->pending--; 129 spin_unlock_irqrestore(&ent->lock, flags); 130 if (status) { 131 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 132 kfree(mr); 133 dev->fill_delay = 1; 134 mod_timer(&dev->delay_timer, jiffies + HZ); 135 return; 136 } 137 138 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 139 key = dev->mdev->priv.mkey_key++; 140 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 141 mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; 142 143 cache->last_add = jiffies; 144 145 spin_lock_irqsave(&ent->lock, flags); 146 list_add_tail(&mr->list, &ent->head); 147 ent->cur++; 148 ent->size++; 149 spin_unlock_irqrestore(&ent->lock, flags); 150 151 write_lock_irqsave(&table->lock, flags); 152 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key), 153 &mr->mmkey); 154 if (err) 155 pr_err("Error inserting to mkey tree. 0x%x\n", -err); 156 write_unlock_irqrestore(&table->lock, flags); 157 } 158 159 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 160 { 161 struct mlx5_mr_cache *cache = &dev->cache; 162 struct mlx5_cache_ent *ent = &cache->ent[c]; 163 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 164 struct mlx5_ib_mr *mr; 165 int npages = 1 << ent->order; 166 void *mkc; 167 u32 *in; 168 int err = 0; 169 int i; 170 171 in = kzalloc(inlen, GFP_KERNEL); 172 if (!in) 173 return -ENOMEM; 174 175 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 176 for (i = 0; i < num; i++) { 177 if (ent->pending >= MAX_PENDING_REG_MR) { 178 err = -EAGAIN; 179 break; 180 } 181 182 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 183 if (!mr) { 184 err = -ENOMEM; 185 break; 186 } 187 mr->order = ent->order; 188 mr->umred = 1; 189 mr->dev = dev; 190 191 MLX5_SET(mkc, mkc, free, 1); 192 MLX5_SET(mkc, mkc, umr_en, 1); 193 MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); 194 195 MLX5_SET(mkc, mkc, qpn, 0xffffff); 196 MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); 197 MLX5_SET(mkc, mkc, log_page_size, 12); 198 199 spin_lock_irq(&ent->lock); 200 ent->pending++; 201 spin_unlock_irq(&ent->lock); 202 err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, 203 in, inlen, 204 mr->out, sizeof(mr->out), 205 reg_mr_callback, mr); 206 if (err) { 207 spin_lock_irq(&ent->lock); 208 ent->pending--; 209 spin_unlock_irq(&ent->lock); 210 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 211 kfree(mr); 212 break; 213 } 214 } 215 216 kfree(in); 217 return err; 218 } 219 220 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 221 { 222 struct mlx5_mr_cache *cache = &dev->cache; 223 struct mlx5_cache_ent *ent = &cache->ent[c]; 224 struct mlx5_ib_mr *mr; 225 int err; 226 int i; 227 228 for (i = 0; i < num; i++) { 229 spin_lock_irq(&ent->lock); 230 if (list_empty(&ent->head)) { 231 spin_unlock_irq(&ent->lock); 232 return; 233 } 234 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 235 list_del(&mr->list); 236 ent->cur--; 237 ent->size--; 238 spin_unlock_irq(&ent->lock); 239 err = destroy_mkey(dev, mr); 240 if (err) 241 mlx5_ib_warn(dev, "failed destroy mkey\n"); 242 else 243 kfree(mr); 244 } 245 } 246 247 static ssize_t size_write(struct file *filp, const char __user *buf, 248 size_t count, loff_t *pos) 249 { 250 struct mlx5_cache_ent *ent = filp->private_data; 251 struct mlx5_ib_dev *dev = ent->dev; 252 char lbuf[20]; 253 u32 var; 254 int err; 255 int c; 256 257 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 258 return -EFAULT; 259 260 c = order2idx(dev, ent->order); 261 lbuf[sizeof(lbuf) - 1] = 0; 262 263 if (sscanf(lbuf, "%u", &var) != 1) 264 return -EINVAL; 265 266 if (var < ent->limit) 267 return -EINVAL; 268 269 if (var > ent->size) { 270 do { 271 err = add_keys(dev, c, var - ent->size); 272 if (err && err != -EAGAIN) 273 return err; 274 275 usleep_range(3000, 5000); 276 } while (err); 277 } else if (var < ent->size) { 278 remove_keys(dev, c, ent->size - var); 279 } 280 281 return count; 282 } 283 284 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 285 loff_t *pos) 286 { 287 struct mlx5_cache_ent *ent = filp->private_data; 288 char lbuf[20]; 289 int err; 290 291 if (*pos) 292 return 0; 293 294 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 295 if (err < 0) 296 return err; 297 298 if (copy_to_user(buf, lbuf, err)) 299 return -EFAULT; 300 301 *pos += err; 302 303 return err; 304 } 305 306 static const struct file_operations size_fops = { 307 .owner = THIS_MODULE, 308 .open = simple_open, 309 .write = size_write, 310 .read = size_read, 311 }; 312 313 static ssize_t limit_write(struct file *filp, const char __user *buf, 314 size_t count, loff_t *pos) 315 { 316 struct mlx5_cache_ent *ent = filp->private_data; 317 struct mlx5_ib_dev *dev = ent->dev; 318 char lbuf[20]; 319 u32 var; 320 int err; 321 int c; 322 323 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 324 return -EFAULT; 325 326 c = order2idx(dev, ent->order); 327 lbuf[sizeof(lbuf) - 1] = 0; 328 329 if (sscanf(lbuf, "%u", &var) != 1) 330 return -EINVAL; 331 332 if (var > ent->size) 333 return -EINVAL; 334 335 ent->limit = var; 336 337 if (ent->cur < ent->limit) { 338 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 339 if (err) 340 return err; 341 } 342 343 return count; 344 } 345 346 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 347 loff_t *pos) 348 { 349 struct mlx5_cache_ent *ent = filp->private_data; 350 char lbuf[20]; 351 int err; 352 353 if (*pos) 354 return 0; 355 356 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 357 if (err < 0) 358 return err; 359 360 if (copy_to_user(buf, lbuf, err)) 361 return -EFAULT; 362 363 *pos += err; 364 365 return err; 366 } 367 368 static const struct file_operations limit_fops = { 369 .owner = THIS_MODULE, 370 .open = simple_open, 371 .write = limit_write, 372 .read = limit_read, 373 }; 374 375 static int someone_adding(struct mlx5_mr_cache *cache) 376 { 377 int i; 378 379 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 380 if (cache->ent[i].cur < cache->ent[i].limit) 381 return 1; 382 } 383 384 return 0; 385 } 386 387 static void __cache_work_func(struct mlx5_cache_ent *ent) 388 { 389 struct mlx5_ib_dev *dev = ent->dev; 390 struct mlx5_mr_cache *cache = &dev->cache; 391 int i = order2idx(dev, ent->order); 392 int err; 393 394 if (cache->stopped) 395 return; 396 397 ent = &dev->cache.ent[i]; 398 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 399 err = add_keys(dev, i, 1); 400 if (ent->cur < 2 * ent->limit) { 401 if (err == -EAGAIN) { 402 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 403 i + 2); 404 queue_delayed_work(cache->wq, &ent->dwork, 405 msecs_to_jiffies(3)); 406 } else if (err) { 407 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 408 i + 2, err); 409 queue_delayed_work(cache->wq, &ent->dwork, 410 msecs_to_jiffies(1000)); 411 } else { 412 queue_work(cache->wq, &ent->work); 413 } 414 } 415 } else if (ent->cur > 2 * ent->limit) { 416 /* 417 * The remove_keys() logic is performed as garbage collection 418 * task. Such task is intended to be run when no other active 419 * processes are running. 420 * 421 * The need_resched() will return TRUE if there are user tasks 422 * to be activated in near future. 423 * 424 * In such case, we don't execute remove_keys() and postpone 425 * the garbage collection work to try to run in next cycle, 426 * in order to free CPU resources to other tasks. 427 */ 428 if (!need_resched() && !someone_adding(cache) && 429 time_after(jiffies, cache->last_add + 300 * HZ)) { 430 remove_keys(dev, i, 1); 431 if (ent->cur > ent->limit) 432 queue_work(cache->wq, &ent->work); 433 } else { 434 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 435 } 436 } 437 } 438 439 static void delayed_cache_work_func(struct work_struct *work) 440 { 441 struct mlx5_cache_ent *ent; 442 443 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 444 __cache_work_func(ent); 445 } 446 447 static void cache_work_func(struct work_struct *work) 448 { 449 struct mlx5_cache_ent *ent; 450 451 ent = container_of(work, struct mlx5_cache_ent, work); 452 __cache_work_func(ent); 453 } 454 455 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 456 { 457 struct mlx5_mr_cache *cache = &dev->cache; 458 struct mlx5_ib_mr *mr = NULL; 459 struct mlx5_cache_ent *ent; 460 int c; 461 int i; 462 463 c = order2idx(dev, order); 464 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 465 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 466 return NULL; 467 } 468 469 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 470 ent = &cache->ent[i]; 471 472 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 473 474 spin_lock_irq(&ent->lock); 475 if (!list_empty(&ent->head)) { 476 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 477 list); 478 list_del(&mr->list); 479 ent->cur--; 480 spin_unlock_irq(&ent->lock); 481 if (ent->cur < ent->limit) 482 queue_work(cache->wq, &ent->work); 483 break; 484 } 485 spin_unlock_irq(&ent->lock); 486 487 queue_work(cache->wq, &ent->work); 488 } 489 490 if (!mr) 491 cache->ent[c].miss++; 492 493 return mr; 494 } 495 496 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 497 { 498 struct mlx5_mr_cache *cache = &dev->cache; 499 struct mlx5_cache_ent *ent; 500 int shrink = 0; 501 int c; 502 503 c = order2idx(dev, mr->order); 504 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 505 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 506 return; 507 } 508 ent = &cache->ent[c]; 509 spin_lock_irq(&ent->lock); 510 list_add_tail(&mr->list, &ent->head); 511 ent->cur++; 512 if (ent->cur > 2 * ent->limit) 513 shrink = 1; 514 spin_unlock_irq(&ent->lock); 515 516 if (shrink) 517 queue_work(cache->wq, &ent->work); 518 } 519 520 static void clean_keys(struct mlx5_ib_dev *dev, int c) 521 { 522 struct mlx5_mr_cache *cache = &dev->cache; 523 struct mlx5_cache_ent *ent = &cache->ent[c]; 524 struct mlx5_ib_mr *mr; 525 int err; 526 527 cancel_delayed_work(&ent->dwork); 528 while (1) { 529 spin_lock_irq(&ent->lock); 530 if (list_empty(&ent->head)) { 531 spin_unlock_irq(&ent->lock); 532 return; 533 } 534 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 535 list_del(&mr->list); 536 ent->cur--; 537 ent->size--; 538 spin_unlock_irq(&ent->lock); 539 err = destroy_mkey(dev, mr); 540 if (err) 541 mlx5_ib_warn(dev, "failed destroy mkey\n"); 542 else 543 kfree(mr); 544 } 545 } 546 547 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 548 { 549 struct mlx5_mr_cache *cache = &dev->cache; 550 struct mlx5_cache_ent *ent; 551 int i; 552 553 if (!mlx5_debugfs_root) 554 return 0; 555 556 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); 557 if (!cache->root) 558 return -ENOMEM; 559 560 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 561 ent = &cache->ent[i]; 562 sprintf(ent->name, "%d", ent->order); 563 ent->dir = debugfs_create_dir(ent->name, cache->root); 564 if (!ent->dir) 565 return -ENOMEM; 566 567 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 568 &size_fops); 569 if (!ent->fsize) 570 return -ENOMEM; 571 572 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 573 &limit_fops); 574 if (!ent->flimit) 575 return -ENOMEM; 576 577 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 578 &ent->cur); 579 if (!ent->fcur) 580 return -ENOMEM; 581 582 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 583 &ent->miss); 584 if (!ent->fmiss) 585 return -ENOMEM; 586 } 587 588 return 0; 589 } 590 591 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 592 { 593 if (!mlx5_debugfs_root) 594 return; 595 596 debugfs_remove_recursive(dev->cache.root); 597 } 598 599 static void delay_time_func(unsigned long ctx) 600 { 601 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 602 603 dev->fill_delay = 0; 604 } 605 606 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 607 { 608 struct mlx5_mr_cache *cache = &dev->cache; 609 struct mlx5_cache_ent *ent; 610 int limit; 611 int err; 612 int i; 613 614 cache->wq = create_singlethread_workqueue("mkey_cache"); 615 if (!cache->wq) { 616 mlx5_ib_warn(dev, "failed to create work queue\n"); 617 return -ENOMEM; 618 } 619 620 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 621 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 622 INIT_LIST_HEAD(&cache->ent[i].head); 623 spin_lock_init(&cache->ent[i].lock); 624 625 ent = &cache->ent[i]; 626 INIT_LIST_HEAD(&ent->head); 627 spin_lock_init(&ent->lock); 628 ent->order = i + 2; 629 ent->dev = dev; 630 631 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) 632 limit = dev->mdev->profile->mr_cache[i].limit; 633 else 634 limit = 0; 635 636 INIT_WORK(&ent->work, cache_work_func); 637 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 638 ent->limit = limit; 639 queue_work(cache->wq, &ent->work); 640 } 641 642 err = mlx5_mr_cache_debugfs_init(dev); 643 if (err) 644 mlx5_ib_warn(dev, "cache debugfs failure\n"); 645 646 return 0; 647 } 648 649 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 650 { 651 int i; 652 653 dev->cache.stopped = 1; 654 flush_workqueue(dev->cache.wq); 655 656 mlx5_mr_cache_debugfs_cleanup(dev); 657 658 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 659 clean_keys(dev, i); 660 661 destroy_workqueue(dev->cache.wq); 662 del_timer_sync(&dev->delay_timer); 663 664 return 0; 665 } 666 667 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 668 { 669 struct mlx5_ib_dev *dev = to_mdev(pd->device); 670 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 671 struct mlx5_core_dev *mdev = dev->mdev; 672 struct mlx5_ib_mr *mr; 673 void *mkc; 674 u32 *in; 675 int err; 676 677 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 678 if (!mr) 679 return ERR_PTR(-ENOMEM); 680 681 in = kzalloc(inlen, GFP_KERNEL); 682 if (!in) { 683 err = -ENOMEM; 684 goto err_free; 685 } 686 687 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 688 689 MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); 690 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 691 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 692 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 693 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 694 MLX5_SET(mkc, mkc, lr, 1); 695 696 MLX5_SET(mkc, mkc, length64, 1); 697 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 698 MLX5_SET(mkc, mkc, qpn, 0xffffff); 699 MLX5_SET64(mkc, mkc, start_addr, 0); 700 701 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); 702 if (err) 703 goto err_in; 704 705 kfree(in); 706 mr->ibmr.lkey = mr->mmkey.key; 707 mr->ibmr.rkey = mr->mmkey.key; 708 mr->umem = NULL; 709 710 return &mr->ibmr; 711 712 err_in: 713 kfree(in); 714 715 err_free: 716 kfree(mr); 717 718 return ERR_PTR(err); 719 } 720 721 static int get_octo_len(u64 addr, u64 len, int page_size) 722 { 723 u64 offset; 724 int npages; 725 726 offset = addr & (page_size - 1); 727 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 728 return (npages + 1) / 2; 729 } 730 731 static int use_umr(int order) 732 { 733 return order <= MLX5_MAX_UMR_SHIFT; 734 } 735 736 static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 737 int npages, int page_shift, int *size, 738 __be64 **mr_pas, dma_addr_t *dma) 739 { 740 __be64 *pas; 741 struct device *ddev = dev->ib_dev.dma_device; 742 743 /* 744 * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 745 * To avoid copying garbage after the pas array, we allocate 746 * a little more. 747 */ 748 *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); 749 *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 750 if (!(*mr_pas)) 751 return -ENOMEM; 752 753 pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); 754 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); 755 /* Clear padding after the actual pages. */ 756 memset(pas + npages, 0, *size - npages * sizeof(u64)); 757 758 *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); 759 if (dma_mapping_error(ddev, *dma)) { 760 kfree(*mr_pas); 761 return -ENOMEM; 762 } 763 764 return 0; 765 } 766 767 static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, 768 struct ib_sge *sg, u64 dma, int n, u32 key, 769 int page_shift) 770 { 771 struct mlx5_ib_dev *dev = to_mdev(pd->device); 772 struct mlx5_umr_wr *umrwr = umr_wr(wr); 773 774 sg->addr = dma; 775 sg->length = ALIGN(sizeof(u64) * n, 64); 776 sg->lkey = dev->umrc.pd->local_dma_lkey; 777 778 wr->next = NULL; 779 wr->sg_list = sg; 780 if (n) 781 wr->num_sge = 1; 782 else 783 wr->num_sge = 0; 784 785 wr->opcode = MLX5_IB_WR_UMR; 786 787 umrwr->npages = n; 788 umrwr->page_shift = page_shift; 789 umrwr->mkey = key; 790 } 791 792 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 793 struct ib_sge *sg, u64 dma, int n, u32 key, 794 int page_shift, u64 virt_addr, u64 len, 795 int access_flags) 796 { 797 struct mlx5_umr_wr *umrwr = umr_wr(wr); 798 799 prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); 800 801 wr->send_flags = 0; 802 803 umrwr->target.virt_addr = virt_addr; 804 umrwr->length = len; 805 umrwr->access_flags = access_flags; 806 umrwr->pd = pd; 807 } 808 809 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 810 struct ib_send_wr *wr, u32 key) 811 { 812 struct mlx5_umr_wr *umrwr = umr_wr(wr); 813 814 wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; 815 wr->opcode = MLX5_IB_WR_UMR; 816 umrwr->mkey = key; 817 } 818 819 static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, 820 int access_flags, int *npages, 821 int *page_shift, int *ncont, int *order) 822 { 823 struct mlx5_ib_dev *dev = to_mdev(pd->device); 824 struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, 825 access_flags, 0); 826 if (IS_ERR(umem)) { 827 mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 828 return (void *)umem; 829 } 830 831 mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); 832 if (!*npages) { 833 mlx5_ib_warn(dev, "avoid zero region\n"); 834 ib_umem_release(umem); 835 return ERR_PTR(-EINVAL); 836 } 837 838 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 839 *npages, *ncont, *order, *page_shift); 840 841 return umem; 842 } 843 844 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 845 { 846 struct mlx5_ib_umr_context *context = 847 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 848 849 context->status = wc->status; 850 complete(&context->done); 851 } 852 853 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 854 { 855 context->cqe.done = mlx5_ib_umr_done; 856 context->status = -1; 857 init_completion(&context->done); 858 } 859 860 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 861 u64 virt_addr, u64 len, int npages, 862 int page_shift, int order, int access_flags) 863 { 864 struct mlx5_ib_dev *dev = to_mdev(pd->device); 865 struct device *ddev = dev->ib_dev.dma_device; 866 struct umr_common *umrc = &dev->umrc; 867 struct mlx5_ib_umr_context umr_context; 868 struct mlx5_umr_wr umrwr = {}; 869 struct ib_send_wr *bad; 870 struct mlx5_ib_mr *mr; 871 struct ib_sge sg; 872 int size; 873 __be64 *mr_pas; 874 dma_addr_t dma; 875 int err = 0; 876 int i; 877 878 for (i = 0; i < 1; i++) { 879 mr = alloc_cached_mr(dev, order); 880 if (mr) 881 break; 882 883 err = add_keys(dev, order2idx(dev, order), 1); 884 if (err && err != -EAGAIN) { 885 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 886 break; 887 } 888 } 889 890 if (!mr) 891 return ERR_PTR(-EAGAIN); 892 893 err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, 894 &dma); 895 if (err) 896 goto free_mr; 897 898 mlx5_ib_init_umr_context(&umr_context); 899 900 umrwr.wr.wr_cqe = &umr_context.cqe; 901 prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, 902 page_shift, virt_addr, len, access_flags); 903 904 down(&umrc->sem); 905 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 906 if (err) { 907 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 908 goto unmap_dma; 909 } else { 910 wait_for_completion(&umr_context.done); 911 if (umr_context.status != IB_WC_SUCCESS) { 912 mlx5_ib_warn(dev, "reg umr failed\n"); 913 err = -EFAULT; 914 } 915 } 916 917 mr->mmkey.iova = virt_addr; 918 mr->mmkey.size = len; 919 mr->mmkey.pd = to_mpd(pd)->pdn; 920 921 mr->live = 1; 922 923 unmap_dma: 924 up(&umrc->sem); 925 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 926 927 kfree(mr_pas); 928 929 free_mr: 930 if (err) { 931 free_cached_mr(dev, mr); 932 return ERR_PTR(err); 933 } 934 935 return mr; 936 } 937 938 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 939 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, 940 int zap) 941 { 942 struct mlx5_ib_dev *dev = mr->dev; 943 struct device *ddev = dev->ib_dev.dma_device; 944 struct umr_common *umrc = &dev->umrc; 945 struct mlx5_ib_umr_context umr_context; 946 struct ib_umem *umem = mr->umem; 947 int size; 948 __be64 *pas; 949 dma_addr_t dma; 950 struct ib_send_wr *bad; 951 struct mlx5_umr_wr wr; 952 struct ib_sge sg; 953 int err = 0; 954 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); 955 const int page_index_mask = page_index_alignment - 1; 956 size_t pages_mapped = 0; 957 size_t pages_to_map = 0; 958 size_t pages_iter = 0; 959 int use_emergency_buf = 0; 960 961 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 962 * so we need to align the offset and length accordingly */ 963 if (start_page_index & page_index_mask) { 964 npages += start_page_index & page_index_mask; 965 start_page_index &= ~page_index_mask; 966 } 967 968 pages_to_map = ALIGN(npages, page_index_alignment); 969 970 if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) 971 return -EINVAL; 972 973 size = sizeof(u64) * pages_to_map; 974 size = min_t(int, PAGE_SIZE, size); 975 /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim 976 * code, when we are called from an invalidation. The pas buffer must 977 * be 2k-aligned for Connect-IB. */ 978 pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); 979 if (!pas) { 980 mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); 981 pas = mlx5_ib_update_mtt_emergency_buffer; 982 size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; 983 use_emergency_buf = 1; 984 mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 985 memset(pas, 0, size); 986 } 987 pages_iter = size / sizeof(u64); 988 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 989 if (dma_mapping_error(ddev, dma)) { 990 mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); 991 err = -ENOMEM; 992 goto free_pas; 993 } 994 995 for (pages_mapped = 0; 996 pages_mapped < pages_to_map && !err; 997 pages_mapped += pages_iter, start_page_index += pages_iter) { 998 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 999 1000 npages = min_t(size_t, 1001 pages_iter, 1002 ib_umem_num_pages(umem) - start_page_index); 1003 1004 if (!zap) { 1005 __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, 1006 start_page_index, npages, pas, 1007 MLX5_IB_MTT_PRESENT); 1008 /* Clear padding after the pages brought from the 1009 * umem. */ 1010 memset(pas + npages, 0, size - npages * sizeof(u64)); 1011 } 1012 1013 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 1014 1015 mlx5_ib_init_umr_context(&umr_context); 1016 1017 memset(&wr, 0, sizeof(wr)); 1018 wr.wr.wr_cqe = &umr_context.cqe; 1019 1020 sg.addr = dma; 1021 sg.length = ALIGN(npages * sizeof(u64), 1022 MLX5_UMR_MTT_ALIGNMENT); 1023 sg.lkey = dev->umrc.pd->local_dma_lkey; 1024 1025 wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 1026 MLX5_IB_SEND_UMR_UPDATE_MTT; 1027 wr.wr.sg_list = &sg; 1028 wr.wr.num_sge = 1; 1029 wr.wr.opcode = MLX5_IB_WR_UMR; 1030 wr.npages = sg.length / sizeof(u64); 1031 wr.page_shift = PAGE_SHIFT; 1032 wr.mkey = mr->mmkey.key; 1033 wr.target.offset = start_page_index; 1034 1035 down(&umrc->sem); 1036 err = ib_post_send(umrc->qp, &wr.wr, &bad); 1037 if (err) { 1038 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); 1039 } else { 1040 wait_for_completion(&umr_context.done); 1041 if (umr_context.status != IB_WC_SUCCESS) { 1042 mlx5_ib_err(dev, "UMR completion failed, code %d\n", 1043 umr_context.status); 1044 err = -EFAULT; 1045 } 1046 } 1047 up(&umrc->sem); 1048 } 1049 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1050 1051 free_pas: 1052 if (!use_emergency_buf) 1053 free_page((unsigned long)pas); 1054 else 1055 mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 1056 1057 return err; 1058 } 1059 #endif 1060 1061 /* 1062 * If ibmr is NULL it will be allocated by reg_create. 1063 * Else, the given ibmr will be used. 1064 */ 1065 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, 1066 u64 virt_addr, u64 length, 1067 struct ib_umem *umem, int npages, 1068 int page_shift, int access_flags) 1069 { 1070 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1071 struct mlx5_ib_mr *mr; 1072 __be64 *pas; 1073 void *mkc; 1074 int inlen; 1075 u32 *in; 1076 int err; 1077 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1078 1079 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); 1080 if (!mr) 1081 return ERR_PTR(-ENOMEM); 1082 1083 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 1084 sizeof(*pas) * ((npages + 1) / 2) * 2; 1085 in = mlx5_vzalloc(inlen); 1086 if (!in) { 1087 err = -ENOMEM; 1088 goto err_1; 1089 } 1090 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1091 mlx5_ib_populate_pas(dev, umem, page_shift, pas, 1092 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1093 1094 /* The pg_access bit allows setting the access flags 1095 * in the page list submitted with the command. */ 1096 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1097 1098 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1099 MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); 1100 MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 1101 MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 1102 MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 1103 MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 1104 MLX5_SET(mkc, mkc, lr, 1); 1105 1106 MLX5_SET64(mkc, mkc, start_addr, virt_addr); 1107 MLX5_SET64(mkc, mkc, len, length); 1108 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1109 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1110 MLX5_SET(mkc, mkc, translations_octword_size, 1111 get_octo_len(virt_addr, length, 1 << page_shift)); 1112 MLX5_SET(mkc, mkc, log_page_size, page_shift); 1113 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1114 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1115 get_octo_len(virt_addr, length, 1 << page_shift)); 1116 1117 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1118 if (err) { 1119 mlx5_ib_warn(dev, "create mkey failed\n"); 1120 goto err_2; 1121 } 1122 mr->umem = umem; 1123 mr->dev = dev; 1124 mr->live = 1; 1125 kvfree(in); 1126 1127 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1128 1129 return mr; 1130 1131 err_2: 1132 kvfree(in); 1133 1134 err_1: 1135 if (!ibmr) 1136 kfree(mr); 1137 1138 return ERR_PTR(err); 1139 } 1140 1141 static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1142 int npages, u64 length, int access_flags) 1143 { 1144 mr->npages = npages; 1145 atomic_add(npages, &dev->mdev->priv.reg_pages); 1146 mr->ibmr.lkey = mr->mmkey.key; 1147 mr->ibmr.rkey = mr->mmkey.key; 1148 mr->ibmr.length = length; 1149 mr->access_flags = access_flags; 1150 } 1151 1152 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1153 u64 virt_addr, int access_flags, 1154 struct ib_udata *udata) 1155 { 1156 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1157 struct mlx5_ib_mr *mr = NULL; 1158 struct ib_umem *umem; 1159 int page_shift; 1160 int npages; 1161 int ncont; 1162 int order; 1163 int err; 1164 1165 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1166 start, virt_addr, length, access_flags); 1167 umem = mr_umem_get(pd, start, length, access_flags, &npages, 1168 &page_shift, &ncont, &order); 1169 1170 if (IS_ERR(umem)) 1171 return (void *)umem; 1172 1173 if (use_umr(order)) { 1174 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 1175 order, access_flags); 1176 if (PTR_ERR(mr) == -EAGAIN) { 1177 mlx5_ib_dbg(dev, "cache empty for order %d", order); 1178 mr = NULL; 1179 } 1180 } else if (access_flags & IB_ACCESS_ON_DEMAND) { 1181 err = -EINVAL; 1182 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); 1183 goto error; 1184 } 1185 1186 if (!mr) 1187 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 1188 page_shift, access_flags); 1189 1190 if (IS_ERR(mr)) { 1191 err = PTR_ERR(mr); 1192 goto error; 1193 } 1194 1195 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1196 1197 mr->umem = umem; 1198 set_mr_fileds(dev, mr, npages, length, access_flags); 1199 1200 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1201 update_odp_mr(mr); 1202 #endif 1203 1204 return &mr->ibmr; 1205 1206 error: 1207 ib_umem_release(umem); 1208 return ERR_PTR(err); 1209 } 1210 1211 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1212 { 1213 struct mlx5_core_dev *mdev = dev->mdev; 1214 struct umr_common *umrc = &dev->umrc; 1215 struct mlx5_ib_umr_context umr_context; 1216 struct mlx5_umr_wr umrwr = {}; 1217 struct ib_send_wr *bad; 1218 int err; 1219 1220 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1221 return 0; 1222 1223 mlx5_ib_init_umr_context(&umr_context); 1224 1225 umrwr.wr.wr_cqe = &umr_context.cqe; 1226 prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); 1227 1228 down(&umrc->sem); 1229 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1230 if (err) { 1231 up(&umrc->sem); 1232 mlx5_ib_dbg(dev, "err %d\n", err); 1233 goto error; 1234 } else { 1235 wait_for_completion(&umr_context.done); 1236 up(&umrc->sem); 1237 } 1238 if (umr_context.status != IB_WC_SUCCESS) { 1239 mlx5_ib_warn(dev, "unreg umr failed\n"); 1240 err = -EFAULT; 1241 goto error; 1242 } 1243 return 0; 1244 1245 error: 1246 return err; 1247 } 1248 1249 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, 1250 u64 length, int npages, int page_shift, int order, 1251 int access_flags, int flags) 1252 { 1253 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1254 struct device *ddev = dev->ib_dev.dma_device; 1255 struct mlx5_ib_umr_context umr_context; 1256 struct ib_send_wr *bad; 1257 struct mlx5_umr_wr umrwr = {}; 1258 struct ib_sge sg; 1259 struct umr_common *umrc = &dev->umrc; 1260 dma_addr_t dma = 0; 1261 __be64 *mr_pas = NULL; 1262 int size; 1263 int err; 1264 1265 mlx5_ib_init_umr_context(&umr_context); 1266 1267 umrwr.wr.wr_cqe = &umr_context.cqe; 1268 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1269 1270 if (flags & IB_MR_REREG_TRANS) { 1271 err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, 1272 &mr_pas, &dma); 1273 if (err) 1274 return err; 1275 1276 umrwr.target.virt_addr = virt_addr; 1277 umrwr.length = length; 1278 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1279 } 1280 1281 prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, 1282 page_shift); 1283 1284 if (flags & IB_MR_REREG_PD) { 1285 umrwr.pd = pd; 1286 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; 1287 } 1288 1289 if (flags & IB_MR_REREG_ACCESS) { 1290 umrwr.access_flags = access_flags; 1291 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; 1292 } 1293 1294 /* post send request to UMR QP */ 1295 down(&umrc->sem); 1296 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1297 1298 if (err) { 1299 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 1300 } else { 1301 wait_for_completion(&umr_context.done); 1302 if (umr_context.status != IB_WC_SUCCESS) { 1303 mlx5_ib_warn(dev, "reg umr failed (%u)\n", 1304 umr_context.status); 1305 err = -EFAULT; 1306 } 1307 } 1308 1309 up(&umrc->sem); 1310 if (flags & IB_MR_REREG_TRANS) { 1311 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1312 kfree(mr_pas); 1313 } 1314 return err; 1315 } 1316 1317 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1318 u64 length, u64 virt_addr, int new_access_flags, 1319 struct ib_pd *new_pd, struct ib_udata *udata) 1320 { 1321 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1322 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1323 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; 1324 int access_flags = flags & IB_MR_REREG_ACCESS ? 1325 new_access_flags : 1326 mr->access_flags; 1327 u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; 1328 u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; 1329 int page_shift = 0; 1330 int npages = 0; 1331 int ncont = 0; 1332 int order = 0; 1333 int err; 1334 1335 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1336 start, virt_addr, length, access_flags); 1337 1338 if (flags != IB_MR_REREG_PD) { 1339 /* 1340 * Replace umem. This needs to be done whether or not UMR is 1341 * used. 1342 */ 1343 flags |= IB_MR_REREG_TRANS; 1344 ib_umem_release(mr->umem); 1345 mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, 1346 &page_shift, &ncont, &order); 1347 if (IS_ERR(mr->umem)) { 1348 err = PTR_ERR(mr->umem); 1349 mr->umem = NULL; 1350 return err; 1351 } 1352 } 1353 1354 if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { 1355 /* 1356 * UMR can't be used - MKey needs to be replaced. 1357 */ 1358 if (mr->umred) { 1359 err = unreg_umr(dev, mr); 1360 if (err) 1361 mlx5_ib_warn(dev, "Failed to unregister MR\n"); 1362 } else { 1363 err = destroy_mkey(dev, mr); 1364 if (err) 1365 mlx5_ib_warn(dev, "Failed to destroy MKey\n"); 1366 } 1367 if (err) 1368 return err; 1369 1370 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, 1371 page_shift, access_flags); 1372 1373 if (IS_ERR(mr)) 1374 return PTR_ERR(mr); 1375 1376 mr->umred = 0; 1377 } else { 1378 /* 1379 * Send a UMR WQE 1380 */ 1381 err = rereg_umr(pd, mr, addr, len, npages, page_shift, 1382 order, access_flags, flags); 1383 if (err) { 1384 mlx5_ib_warn(dev, "Failed to rereg UMR\n"); 1385 return err; 1386 } 1387 } 1388 1389 if (flags & IB_MR_REREG_PD) { 1390 ib_mr->pd = pd; 1391 mr->mmkey.pd = to_mpd(pd)->pdn; 1392 } 1393 1394 if (flags & IB_MR_REREG_ACCESS) 1395 mr->access_flags = access_flags; 1396 1397 if (flags & IB_MR_REREG_TRANS) { 1398 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); 1399 set_mr_fileds(dev, mr, npages, len, access_flags); 1400 mr->mmkey.iova = addr; 1401 mr->mmkey.size = len; 1402 } 1403 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1404 update_odp_mr(mr); 1405 #endif 1406 1407 return 0; 1408 } 1409 1410 static int 1411 mlx5_alloc_priv_descs(struct ib_device *device, 1412 struct mlx5_ib_mr *mr, 1413 int ndescs, 1414 int desc_size) 1415 { 1416 int size = ndescs * desc_size; 1417 int add_size; 1418 int ret; 1419 1420 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1421 1422 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1423 if (!mr->descs_alloc) 1424 return -ENOMEM; 1425 1426 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1427 1428 mr->desc_map = dma_map_single(device->dma_device, mr->descs, 1429 size, DMA_TO_DEVICE); 1430 if (dma_mapping_error(device->dma_device, mr->desc_map)) { 1431 ret = -ENOMEM; 1432 goto err; 1433 } 1434 1435 return 0; 1436 err: 1437 kfree(mr->descs_alloc); 1438 1439 return ret; 1440 } 1441 1442 static void 1443 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1444 { 1445 if (mr->descs) { 1446 struct ib_device *device = mr->ibmr.device; 1447 int size = mr->max_descs * mr->desc_size; 1448 1449 dma_unmap_single(device->dma_device, mr->desc_map, 1450 size, DMA_TO_DEVICE); 1451 kfree(mr->descs_alloc); 1452 mr->descs = NULL; 1453 } 1454 } 1455 1456 static int clean_mr(struct mlx5_ib_mr *mr) 1457 { 1458 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1459 int umred = mr->umred; 1460 int err; 1461 1462 if (mr->sig) { 1463 if (mlx5_core_destroy_psv(dev->mdev, 1464 mr->sig->psv_memory.psv_idx)) 1465 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1466 mr->sig->psv_memory.psv_idx); 1467 if (mlx5_core_destroy_psv(dev->mdev, 1468 mr->sig->psv_wire.psv_idx)) 1469 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1470 mr->sig->psv_wire.psv_idx); 1471 kfree(mr->sig); 1472 mr->sig = NULL; 1473 } 1474 1475 mlx5_free_priv_descs(mr); 1476 1477 if (!umred) { 1478 err = destroy_mkey(dev, mr); 1479 if (err) { 1480 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1481 mr->mmkey.key, err); 1482 return err; 1483 } 1484 } else { 1485 err = unreg_umr(dev, mr); 1486 if (err) { 1487 mlx5_ib_warn(dev, "failed unregister\n"); 1488 return err; 1489 } 1490 free_cached_mr(dev, mr); 1491 } 1492 1493 if (!umred) 1494 kfree(mr); 1495 1496 return 0; 1497 } 1498 1499 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1500 { 1501 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1502 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1503 int npages = mr->npages; 1504 struct ib_umem *umem = mr->umem; 1505 1506 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1507 if (umem && umem->odp_data) { 1508 /* Prevent new page faults from succeeding */ 1509 mr->live = 0; 1510 /* Wait for all running page-fault handlers to finish. */ 1511 synchronize_srcu(&dev->mr_srcu); 1512 /* Destroy all page mappings */ 1513 mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1514 ib_umem_end(umem)); 1515 /* 1516 * We kill the umem before the MR for ODP, 1517 * so that there will not be any invalidations in 1518 * flight, looking at the *mr struct. 1519 */ 1520 ib_umem_release(umem); 1521 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1522 1523 /* Avoid double-freeing the umem. */ 1524 umem = NULL; 1525 } 1526 #endif 1527 1528 clean_mr(mr); 1529 1530 if (umem) { 1531 ib_umem_release(umem); 1532 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1533 } 1534 1535 return 0; 1536 } 1537 1538 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 1539 enum ib_mr_type mr_type, 1540 u32 max_num_sg) 1541 { 1542 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1543 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1544 int ndescs = ALIGN(max_num_sg, 4); 1545 struct mlx5_ib_mr *mr; 1546 void *mkc; 1547 u32 *in; 1548 int err; 1549 1550 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1551 if (!mr) 1552 return ERR_PTR(-ENOMEM); 1553 1554 in = kzalloc(inlen, GFP_KERNEL); 1555 if (!in) { 1556 err = -ENOMEM; 1557 goto err_free; 1558 } 1559 1560 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1561 MLX5_SET(mkc, mkc, free, 1); 1562 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1563 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1564 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1565 1566 if (mr_type == IB_MR_TYPE_MEM_REG) { 1567 mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; 1568 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 1569 err = mlx5_alloc_priv_descs(pd->device, mr, 1570 ndescs, sizeof(u64)); 1571 if (err) 1572 goto err_free_in; 1573 1574 mr->desc_size = sizeof(u64); 1575 mr->max_descs = ndescs; 1576 } else if (mr_type == IB_MR_TYPE_SG_GAPS) { 1577 mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; 1578 1579 err = mlx5_alloc_priv_descs(pd->device, mr, 1580 ndescs, sizeof(struct mlx5_klm)); 1581 if (err) 1582 goto err_free_in; 1583 mr->desc_size = sizeof(struct mlx5_klm); 1584 mr->max_descs = ndescs; 1585 } else if (mr_type == IB_MR_TYPE_SIGNATURE) { 1586 u32 psv_index[2]; 1587 1588 MLX5_SET(mkc, mkc, bsf_en, 1); 1589 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 1590 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1591 if (!mr->sig) { 1592 err = -ENOMEM; 1593 goto err_free_in; 1594 } 1595 1596 /* create mem & wire PSVs */ 1597 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 1598 2, psv_index); 1599 if (err) 1600 goto err_free_sig; 1601 1602 mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; 1603 mr->sig->psv_memory.psv_idx = psv_index[0]; 1604 mr->sig->psv_wire.psv_idx = psv_index[1]; 1605 1606 mr->sig->sig_status_checked = true; 1607 mr->sig->sig_err_exists = false; 1608 /* Next UMR, Arm SIGERR */ 1609 ++mr->sig->sigerr_count; 1610 } else { 1611 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 1612 err = -EINVAL; 1613 goto err_free_in; 1614 } 1615 1616 MLX5_SET(mkc, mkc, access_mode, mr->access_mode); 1617 MLX5_SET(mkc, mkc, umr_en, 1); 1618 1619 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1620 if (err) 1621 goto err_destroy_psv; 1622 1623 mr->ibmr.lkey = mr->mmkey.key; 1624 mr->ibmr.rkey = mr->mmkey.key; 1625 mr->umem = NULL; 1626 kfree(in); 1627 1628 return &mr->ibmr; 1629 1630 err_destroy_psv: 1631 if (mr->sig) { 1632 if (mlx5_core_destroy_psv(dev->mdev, 1633 mr->sig->psv_memory.psv_idx)) 1634 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1635 mr->sig->psv_memory.psv_idx); 1636 if (mlx5_core_destroy_psv(dev->mdev, 1637 mr->sig->psv_wire.psv_idx)) 1638 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1639 mr->sig->psv_wire.psv_idx); 1640 } 1641 mlx5_free_priv_descs(mr); 1642 err_free_sig: 1643 kfree(mr->sig); 1644 err_free_in: 1645 kfree(in); 1646 err_free: 1647 kfree(mr); 1648 return ERR_PTR(err); 1649 } 1650 1651 struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 1652 struct ib_udata *udata) 1653 { 1654 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1655 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1656 struct mlx5_ib_mw *mw = NULL; 1657 u32 *in = NULL; 1658 void *mkc; 1659 int ndescs; 1660 int err; 1661 struct mlx5_ib_alloc_mw req = {}; 1662 struct { 1663 __u32 comp_mask; 1664 __u32 response_length; 1665 } resp = {}; 1666 1667 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1668 if (err) 1669 return ERR_PTR(err); 1670 1671 if (req.comp_mask || req.reserved1 || req.reserved2) 1672 return ERR_PTR(-EOPNOTSUPP); 1673 1674 if (udata->inlen > sizeof(req) && 1675 !ib_is_udata_cleared(udata, sizeof(req), 1676 udata->inlen - sizeof(req))) 1677 return ERR_PTR(-EOPNOTSUPP); 1678 1679 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 1680 1681 mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1682 in = kzalloc(inlen, GFP_KERNEL); 1683 if (!mw || !in) { 1684 err = -ENOMEM; 1685 goto free; 1686 } 1687 1688 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1689 1690 MLX5_SET(mkc, mkc, free, 1); 1691 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1692 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1693 MLX5_SET(mkc, mkc, umr_en, 1); 1694 MLX5_SET(mkc, mkc, lr, 1); 1695 MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS); 1696 MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); 1697 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1698 1699 err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); 1700 if (err) 1701 goto free; 1702 1703 mw->ibmw.rkey = mw->mmkey.key; 1704 1705 resp.response_length = min(offsetof(typeof(resp), response_length) + 1706 sizeof(resp.response_length), udata->outlen); 1707 if (resp.response_length) { 1708 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1709 if (err) { 1710 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1711 goto free; 1712 } 1713 } 1714 1715 kfree(in); 1716 return &mw->ibmw; 1717 1718 free: 1719 kfree(mw); 1720 kfree(in); 1721 return ERR_PTR(err); 1722 } 1723 1724 int mlx5_ib_dealloc_mw(struct ib_mw *mw) 1725 { 1726 struct mlx5_ib_mw *mmw = to_mmw(mw); 1727 int err; 1728 1729 err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, 1730 &mmw->mmkey); 1731 if (!err) 1732 kfree(mmw); 1733 return err; 1734 } 1735 1736 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1737 struct ib_mr_status *mr_status) 1738 { 1739 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1740 int ret = 0; 1741 1742 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1743 pr_err("Invalid status check mask\n"); 1744 ret = -EINVAL; 1745 goto done; 1746 } 1747 1748 mr_status->fail_status = 0; 1749 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1750 if (!mmr->sig) { 1751 ret = -EINVAL; 1752 pr_err("signature status check requested on a non-signature enabled MR\n"); 1753 goto done; 1754 } 1755 1756 mmr->sig->sig_status_checked = true; 1757 if (!mmr->sig->sig_err_exists) 1758 goto done; 1759 1760 if (ibmr->lkey == mmr->sig->err_item.key) 1761 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1762 sizeof(mr_status->sig_err)); 1763 else { 1764 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1765 mr_status->sig_err.sig_err_offset = 0; 1766 mr_status->sig_err.key = mmr->sig->err_item.key; 1767 } 1768 1769 mmr->sig->sig_err_exists = false; 1770 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1771 } 1772 1773 done: 1774 return ret; 1775 } 1776 1777 static int 1778 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 1779 struct scatterlist *sgl, 1780 unsigned short sg_nents, 1781 unsigned int *sg_offset_p) 1782 { 1783 struct scatterlist *sg = sgl; 1784 struct mlx5_klm *klms = mr->descs; 1785 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1786 u32 lkey = mr->ibmr.pd->local_dma_lkey; 1787 int i; 1788 1789 mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 1790 mr->ibmr.length = 0; 1791 mr->ndescs = sg_nents; 1792 1793 for_each_sg(sgl, sg, sg_nents, i) { 1794 if (unlikely(i > mr->max_descs)) 1795 break; 1796 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 1797 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 1798 klms[i].key = cpu_to_be32(lkey); 1799 mr->ibmr.length += sg_dma_len(sg); 1800 1801 sg_offset = 0; 1802 } 1803 1804 if (sg_offset_p) 1805 *sg_offset_p = sg_offset; 1806 1807 return i; 1808 } 1809 1810 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 1811 { 1812 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1813 __be64 *descs; 1814 1815 if (unlikely(mr->ndescs == mr->max_descs)) 1816 return -ENOMEM; 1817 1818 descs = mr->descs; 1819 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 1820 1821 return 0; 1822 } 1823 1824 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1825 unsigned int *sg_offset) 1826 { 1827 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1828 int n; 1829 1830 mr->ndescs = 0; 1831 1832 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 1833 mr->desc_size * mr->max_descs, 1834 DMA_TO_DEVICE); 1835 1836 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 1837 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); 1838 else 1839 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 1840 mlx5_set_page); 1841 1842 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 1843 mr->desc_size * mr->max_descs, 1844 DMA_TO_DEVICE); 1845 1846 return n; 1847 } 1848