1 /* 2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include "mlx5_ib.h" 41 42 enum { 43 MAX_PENDING_REG_MR = 8, 44 }; 45 46 enum { 47 MLX5_UMR_ALIGN = 2048 48 }; 49 50 static __be64 *mr_align(__be64 *ptr, int align) 51 { 52 unsigned long mask = align - 1; 53 54 return (__be64 *)(((unsigned long)ptr + mask) & ~mask); 55 } 56 57 static int order2idx(struct mlx5_ib_dev *dev, int order) 58 { 59 struct mlx5_mr_cache *cache = &dev->cache; 60 61 if (order < cache->ent[0].order) 62 return 0; 63 else 64 return order - cache->ent[0].order; 65 } 66 67 static void reg_mr_callback(int status, void *context) 68 { 69 struct mlx5_ib_mr *mr = context; 70 struct mlx5_ib_dev *dev = mr->dev; 71 struct mlx5_mr_cache *cache = &dev->cache; 72 int c = order2idx(dev, mr->order); 73 struct mlx5_cache_ent *ent = &cache->ent[c]; 74 u8 key; 75 unsigned long flags; 76 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 77 int err; 78 79 spin_lock_irqsave(&ent->lock, flags); 80 ent->pending--; 81 spin_unlock_irqrestore(&ent->lock, flags); 82 if (status) { 83 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 84 kfree(mr); 85 dev->fill_delay = 1; 86 mod_timer(&dev->delay_timer, jiffies + HZ); 87 return; 88 } 89 90 if (mr->out.hdr.status) { 91 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", 92 mr->out.hdr.status, 93 be32_to_cpu(mr->out.hdr.syndrome)); 94 kfree(mr); 95 dev->fill_delay = 1; 96 mod_timer(&dev->delay_timer, jiffies + HZ); 97 return; 98 } 99 100 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 101 key = dev->mdev->priv.mkey_key++; 102 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 103 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 104 105 cache->last_add = jiffies; 106 107 spin_lock_irqsave(&ent->lock, flags); 108 list_add_tail(&mr->list, &ent->head); 109 ent->cur++; 110 ent->size++; 111 spin_unlock_irqrestore(&ent->lock, flags); 112 113 write_lock_irqsave(&table->lock, flags); 114 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), 115 &mr->mmr); 116 if (err) 117 pr_err("Error inserting to mr tree. 0x%x\n", -err); 118 write_unlock_irqrestore(&table->lock, flags); 119 } 120 121 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 122 { 123 struct mlx5_mr_cache *cache = &dev->cache; 124 struct mlx5_cache_ent *ent = &cache->ent[c]; 125 struct mlx5_create_mkey_mbox_in *in; 126 struct mlx5_ib_mr *mr; 127 int npages = 1 << ent->order; 128 int err = 0; 129 int i; 130 131 in = kzalloc(sizeof(*in), GFP_KERNEL); 132 if (!in) 133 return -ENOMEM; 134 135 for (i = 0; i < num; i++) { 136 if (ent->pending >= MAX_PENDING_REG_MR) { 137 err = -EAGAIN; 138 break; 139 } 140 141 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 142 if (!mr) { 143 err = -ENOMEM; 144 break; 145 } 146 mr->order = ent->order; 147 mr->umred = 1; 148 mr->dev = dev; 149 in->seg.status = 1 << 6; 150 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 151 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 152 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 153 in->seg.log2_page_size = 12; 154 155 spin_lock_irq(&ent->lock); 156 ent->pending++; 157 spin_unlock_irq(&ent->lock); 158 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, 159 sizeof(*in), reg_mr_callback, 160 mr, &mr->out); 161 if (err) { 162 spin_lock_irq(&ent->lock); 163 ent->pending--; 164 spin_unlock_irq(&ent->lock); 165 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 166 kfree(mr); 167 break; 168 } 169 } 170 171 kfree(in); 172 return err; 173 } 174 175 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 176 { 177 struct mlx5_mr_cache *cache = &dev->cache; 178 struct mlx5_cache_ent *ent = &cache->ent[c]; 179 struct mlx5_ib_mr *mr; 180 int err; 181 int i; 182 183 for (i = 0; i < num; i++) { 184 spin_lock_irq(&ent->lock); 185 if (list_empty(&ent->head)) { 186 spin_unlock_irq(&ent->lock); 187 return; 188 } 189 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 190 list_del(&mr->list); 191 ent->cur--; 192 ent->size--; 193 spin_unlock_irq(&ent->lock); 194 err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 195 if (err) 196 mlx5_ib_warn(dev, "failed destroy mkey\n"); 197 else 198 kfree(mr); 199 } 200 } 201 202 static ssize_t size_write(struct file *filp, const char __user *buf, 203 size_t count, loff_t *pos) 204 { 205 struct mlx5_cache_ent *ent = filp->private_data; 206 struct mlx5_ib_dev *dev = ent->dev; 207 char lbuf[20]; 208 u32 var; 209 int err; 210 int c; 211 212 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 213 return -EFAULT; 214 215 c = order2idx(dev, ent->order); 216 lbuf[sizeof(lbuf) - 1] = 0; 217 218 if (sscanf(lbuf, "%u", &var) != 1) 219 return -EINVAL; 220 221 if (var < ent->limit) 222 return -EINVAL; 223 224 if (var > ent->size) { 225 do { 226 err = add_keys(dev, c, var - ent->size); 227 if (err && err != -EAGAIN) 228 return err; 229 230 usleep_range(3000, 5000); 231 } while (err); 232 } else if (var < ent->size) { 233 remove_keys(dev, c, ent->size - var); 234 } 235 236 return count; 237 } 238 239 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 240 loff_t *pos) 241 { 242 struct mlx5_cache_ent *ent = filp->private_data; 243 char lbuf[20]; 244 int err; 245 246 if (*pos) 247 return 0; 248 249 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 250 if (err < 0) 251 return err; 252 253 if (copy_to_user(buf, lbuf, err)) 254 return -EFAULT; 255 256 *pos += err; 257 258 return err; 259 } 260 261 static const struct file_operations size_fops = { 262 .owner = THIS_MODULE, 263 .open = simple_open, 264 .write = size_write, 265 .read = size_read, 266 }; 267 268 static ssize_t limit_write(struct file *filp, const char __user *buf, 269 size_t count, loff_t *pos) 270 { 271 struct mlx5_cache_ent *ent = filp->private_data; 272 struct mlx5_ib_dev *dev = ent->dev; 273 char lbuf[20]; 274 u32 var; 275 int err; 276 int c; 277 278 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 279 return -EFAULT; 280 281 c = order2idx(dev, ent->order); 282 lbuf[sizeof(lbuf) - 1] = 0; 283 284 if (sscanf(lbuf, "%u", &var) != 1) 285 return -EINVAL; 286 287 if (var > ent->size) 288 return -EINVAL; 289 290 ent->limit = var; 291 292 if (ent->cur < ent->limit) { 293 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 294 if (err) 295 return err; 296 } 297 298 return count; 299 } 300 301 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 302 loff_t *pos) 303 { 304 struct mlx5_cache_ent *ent = filp->private_data; 305 char lbuf[20]; 306 int err; 307 308 if (*pos) 309 return 0; 310 311 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 312 if (err < 0) 313 return err; 314 315 if (copy_to_user(buf, lbuf, err)) 316 return -EFAULT; 317 318 *pos += err; 319 320 return err; 321 } 322 323 static const struct file_operations limit_fops = { 324 .owner = THIS_MODULE, 325 .open = simple_open, 326 .write = limit_write, 327 .read = limit_read, 328 }; 329 330 static int someone_adding(struct mlx5_mr_cache *cache) 331 { 332 int i; 333 334 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 335 if (cache->ent[i].cur < cache->ent[i].limit) 336 return 1; 337 } 338 339 return 0; 340 } 341 342 static void __cache_work_func(struct mlx5_cache_ent *ent) 343 { 344 struct mlx5_ib_dev *dev = ent->dev; 345 struct mlx5_mr_cache *cache = &dev->cache; 346 int i = order2idx(dev, ent->order); 347 int err; 348 349 if (cache->stopped) 350 return; 351 352 ent = &dev->cache.ent[i]; 353 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 354 err = add_keys(dev, i, 1); 355 if (ent->cur < 2 * ent->limit) { 356 if (err == -EAGAIN) { 357 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 358 i + 2); 359 queue_delayed_work(cache->wq, &ent->dwork, 360 msecs_to_jiffies(3)); 361 } else if (err) { 362 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 363 i + 2, err); 364 queue_delayed_work(cache->wq, &ent->dwork, 365 msecs_to_jiffies(1000)); 366 } else { 367 queue_work(cache->wq, &ent->work); 368 } 369 } 370 } else if (ent->cur > 2 * ent->limit) { 371 if (!someone_adding(cache) && 372 time_after(jiffies, cache->last_add + 300 * HZ)) { 373 remove_keys(dev, i, 1); 374 if (ent->cur > ent->limit) 375 queue_work(cache->wq, &ent->work); 376 } else { 377 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 378 } 379 } 380 } 381 382 static void delayed_cache_work_func(struct work_struct *work) 383 { 384 struct mlx5_cache_ent *ent; 385 386 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 387 __cache_work_func(ent); 388 } 389 390 static void cache_work_func(struct work_struct *work) 391 { 392 struct mlx5_cache_ent *ent; 393 394 ent = container_of(work, struct mlx5_cache_ent, work); 395 __cache_work_func(ent); 396 } 397 398 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 399 { 400 struct mlx5_mr_cache *cache = &dev->cache; 401 struct mlx5_ib_mr *mr = NULL; 402 struct mlx5_cache_ent *ent; 403 int c; 404 int i; 405 406 c = order2idx(dev, order); 407 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 408 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 409 return NULL; 410 } 411 412 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 413 ent = &cache->ent[i]; 414 415 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 416 417 spin_lock_irq(&ent->lock); 418 if (!list_empty(&ent->head)) { 419 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 420 list); 421 list_del(&mr->list); 422 ent->cur--; 423 spin_unlock_irq(&ent->lock); 424 if (ent->cur < ent->limit) 425 queue_work(cache->wq, &ent->work); 426 break; 427 } 428 spin_unlock_irq(&ent->lock); 429 430 queue_work(cache->wq, &ent->work); 431 432 if (mr) 433 break; 434 } 435 436 if (!mr) 437 cache->ent[c].miss++; 438 439 return mr; 440 } 441 442 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 443 { 444 struct mlx5_mr_cache *cache = &dev->cache; 445 struct mlx5_cache_ent *ent; 446 int shrink = 0; 447 int c; 448 449 c = order2idx(dev, mr->order); 450 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 451 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 452 return; 453 } 454 ent = &cache->ent[c]; 455 spin_lock_irq(&ent->lock); 456 list_add_tail(&mr->list, &ent->head); 457 ent->cur++; 458 if (ent->cur > 2 * ent->limit) 459 shrink = 1; 460 spin_unlock_irq(&ent->lock); 461 462 if (shrink) 463 queue_work(cache->wq, &ent->work); 464 } 465 466 static void clean_keys(struct mlx5_ib_dev *dev, int c) 467 { 468 struct mlx5_mr_cache *cache = &dev->cache; 469 struct mlx5_cache_ent *ent = &cache->ent[c]; 470 struct mlx5_ib_mr *mr; 471 int err; 472 473 cancel_delayed_work(&ent->dwork); 474 while (1) { 475 spin_lock_irq(&ent->lock); 476 if (list_empty(&ent->head)) { 477 spin_unlock_irq(&ent->lock); 478 return; 479 } 480 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 481 list_del(&mr->list); 482 ent->cur--; 483 ent->size--; 484 spin_unlock_irq(&ent->lock); 485 err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 486 if (err) 487 mlx5_ib_warn(dev, "failed destroy mkey\n"); 488 else 489 kfree(mr); 490 } 491 } 492 493 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 494 { 495 struct mlx5_mr_cache *cache = &dev->cache; 496 struct mlx5_cache_ent *ent; 497 int i; 498 499 if (!mlx5_debugfs_root) 500 return 0; 501 502 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); 503 if (!cache->root) 504 return -ENOMEM; 505 506 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 507 ent = &cache->ent[i]; 508 sprintf(ent->name, "%d", ent->order); 509 ent->dir = debugfs_create_dir(ent->name, cache->root); 510 if (!ent->dir) 511 return -ENOMEM; 512 513 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 514 &size_fops); 515 if (!ent->fsize) 516 return -ENOMEM; 517 518 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 519 &limit_fops); 520 if (!ent->flimit) 521 return -ENOMEM; 522 523 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 524 &ent->cur); 525 if (!ent->fcur) 526 return -ENOMEM; 527 528 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 529 &ent->miss); 530 if (!ent->fmiss) 531 return -ENOMEM; 532 } 533 534 return 0; 535 } 536 537 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 538 { 539 if (!mlx5_debugfs_root) 540 return; 541 542 debugfs_remove_recursive(dev->cache.root); 543 } 544 545 static void delay_time_func(unsigned long ctx) 546 { 547 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 548 549 dev->fill_delay = 0; 550 } 551 552 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 553 { 554 struct mlx5_mr_cache *cache = &dev->cache; 555 struct mlx5_cache_ent *ent; 556 int limit; 557 int err; 558 int i; 559 560 cache->wq = create_singlethread_workqueue("mkey_cache"); 561 if (!cache->wq) { 562 mlx5_ib_warn(dev, "failed to create work queue\n"); 563 return -ENOMEM; 564 } 565 566 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 567 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 568 INIT_LIST_HEAD(&cache->ent[i].head); 569 spin_lock_init(&cache->ent[i].lock); 570 571 ent = &cache->ent[i]; 572 INIT_LIST_HEAD(&ent->head); 573 spin_lock_init(&ent->lock); 574 ent->order = i + 2; 575 ent->dev = dev; 576 577 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) 578 limit = dev->mdev->profile->mr_cache[i].limit; 579 else 580 limit = 0; 581 582 INIT_WORK(&ent->work, cache_work_func); 583 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 584 ent->limit = limit; 585 queue_work(cache->wq, &ent->work); 586 } 587 588 err = mlx5_mr_cache_debugfs_init(dev); 589 if (err) 590 mlx5_ib_warn(dev, "cache debugfs failure\n"); 591 592 return 0; 593 } 594 595 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 596 { 597 int i; 598 599 dev->cache.stopped = 1; 600 flush_workqueue(dev->cache.wq); 601 602 mlx5_mr_cache_debugfs_cleanup(dev); 603 604 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 605 clean_keys(dev, i); 606 607 destroy_workqueue(dev->cache.wq); 608 del_timer_sync(&dev->delay_timer); 609 610 return 0; 611 } 612 613 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 614 { 615 struct mlx5_ib_dev *dev = to_mdev(pd->device); 616 struct mlx5_core_dev *mdev = dev->mdev; 617 struct mlx5_create_mkey_mbox_in *in; 618 struct mlx5_mkey_seg *seg; 619 struct mlx5_ib_mr *mr; 620 int err; 621 622 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 623 if (!mr) 624 return ERR_PTR(-ENOMEM); 625 626 in = kzalloc(sizeof(*in), GFP_KERNEL); 627 if (!in) { 628 err = -ENOMEM; 629 goto err_free; 630 } 631 632 seg = &in->seg; 633 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 634 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 635 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 636 seg->start_addr = 0; 637 638 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 639 NULL); 640 if (err) 641 goto err_in; 642 643 kfree(in); 644 mr->ibmr.lkey = mr->mmr.key; 645 mr->ibmr.rkey = mr->mmr.key; 646 mr->umem = NULL; 647 648 return &mr->ibmr; 649 650 err_in: 651 kfree(in); 652 653 err_free: 654 kfree(mr); 655 656 return ERR_PTR(err); 657 } 658 659 static int get_octo_len(u64 addr, u64 len, int page_size) 660 { 661 u64 offset; 662 int npages; 663 664 offset = addr & (page_size - 1); 665 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 666 return (npages + 1) / 2; 667 } 668 669 static int use_umr(int order) 670 { 671 return order <= 17; 672 } 673 674 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 675 struct ib_sge *sg, u64 dma, int n, u32 key, 676 int page_shift, u64 virt_addr, u64 len, 677 int access_flags) 678 { 679 struct mlx5_ib_dev *dev = to_mdev(pd->device); 680 struct ib_mr *mr = dev->umrc.mr; 681 682 sg->addr = dma; 683 sg->length = ALIGN(sizeof(u64) * n, 64); 684 sg->lkey = mr->lkey; 685 686 wr->next = NULL; 687 wr->send_flags = 0; 688 wr->sg_list = sg; 689 if (n) 690 wr->num_sge = 1; 691 else 692 wr->num_sge = 0; 693 694 wr->opcode = MLX5_IB_WR_UMR; 695 wr->wr.fast_reg.page_list_len = n; 696 wr->wr.fast_reg.page_shift = page_shift; 697 wr->wr.fast_reg.rkey = key; 698 wr->wr.fast_reg.iova_start = virt_addr; 699 wr->wr.fast_reg.length = len; 700 wr->wr.fast_reg.access_flags = access_flags; 701 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; 702 } 703 704 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 705 struct ib_send_wr *wr, u32 key) 706 { 707 wr->send_flags = MLX5_IB_SEND_UMR_UNREG; 708 wr->opcode = MLX5_IB_WR_UMR; 709 wr->wr.fast_reg.rkey = key; 710 } 711 712 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 713 { 714 struct mlx5_ib_umr_context *context; 715 struct ib_wc wc; 716 int err; 717 718 while (1) { 719 err = ib_poll_cq(cq, 1, &wc); 720 if (err < 0) { 721 pr_warn("poll cq error %d\n", err); 722 return; 723 } 724 if (err == 0) 725 break; 726 727 context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; 728 context->status = wc.status; 729 complete(&context->done); 730 } 731 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 732 } 733 734 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 735 u64 virt_addr, u64 len, int npages, 736 int page_shift, int order, int access_flags) 737 { 738 struct mlx5_ib_dev *dev = to_mdev(pd->device); 739 struct device *ddev = dev->ib_dev.dma_device; 740 struct umr_common *umrc = &dev->umrc; 741 struct mlx5_ib_umr_context umr_context; 742 struct ib_send_wr wr, *bad; 743 struct mlx5_ib_mr *mr; 744 struct ib_sge sg; 745 int size = sizeof(u64) * npages; 746 int err = 0; 747 int i; 748 749 for (i = 0; i < 1; i++) { 750 mr = alloc_cached_mr(dev, order); 751 if (mr) 752 break; 753 754 err = add_keys(dev, order2idx(dev, order), 1); 755 if (err && err != -EAGAIN) { 756 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 757 break; 758 } 759 } 760 761 if (!mr) 762 return ERR_PTR(-EAGAIN); 763 764 mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 765 if (!mr->pas) { 766 err = -ENOMEM; 767 goto free_mr; 768 } 769 770 mlx5_ib_populate_pas(dev, umem, page_shift, 771 mr_align(mr->pas, MLX5_UMR_ALIGN), 1); 772 773 mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, 774 DMA_TO_DEVICE); 775 if (dma_mapping_error(ddev, mr->dma)) { 776 err = -ENOMEM; 777 goto free_pas; 778 } 779 780 memset(&wr, 0, sizeof(wr)); 781 wr.wr_id = (u64)(unsigned long)&umr_context; 782 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); 783 784 mlx5_ib_init_umr_context(&umr_context); 785 down(&umrc->sem); 786 err = ib_post_send(umrc->qp, &wr, &bad); 787 if (err) { 788 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 789 goto unmap_dma; 790 } else { 791 wait_for_completion(&umr_context.done); 792 if (umr_context.status != IB_WC_SUCCESS) { 793 mlx5_ib_warn(dev, "reg umr failed\n"); 794 err = -EFAULT; 795 } 796 } 797 798 mr->mmr.iova = virt_addr; 799 mr->mmr.size = len; 800 mr->mmr.pd = to_mpd(pd)->pdn; 801 802 unmap_dma: 803 up(&umrc->sem); 804 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); 805 806 free_pas: 807 kfree(mr->pas); 808 809 free_mr: 810 if (err) { 811 free_cached_mr(dev, mr); 812 return ERR_PTR(err); 813 } 814 815 return mr; 816 } 817 818 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 819 u64 length, struct ib_umem *umem, 820 int npages, int page_shift, 821 int access_flags) 822 { 823 struct mlx5_ib_dev *dev = to_mdev(pd->device); 824 struct mlx5_create_mkey_mbox_in *in; 825 struct mlx5_ib_mr *mr; 826 int inlen; 827 int err; 828 829 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 830 if (!mr) 831 return ERR_PTR(-ENOMEM); 832 833 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 834 in = mlx5_vzalloc(inlen); 835 if (!in) { 836 err = -ENOMEM; 837 goto err_1; 838 } 839 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); 840 841 in->seg.flags = convert_access(access_flags) | 842 MLX5_ACCESS_MODE_MTT; 843 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 844 in->seg.start_addr = cpu_to_be64(virt_addr); 845 in->seg.len = cpu_to_be64(length); 846 in->seg.bsfs_octo_size = 0; 847 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 848 in->seg.log2_page_size = page_shift; 849 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 850 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 851 1 << page_shift)); 852 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, 853 NULL, NULL); 854 if (err) { 855 mlx5_ib_warn(dev, "create mkey failed\n"); 856 goto err_2; 857 } 858 mr->umem = umem; 859 kvfree(in); 860 861 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 862 863 return mr; 864 865 err_2: 866 kvfree(in); 867 868 err_1: 869 kfree(mr); 870 871 return ERR_PTR(err); 872 } 873 874 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 875 u64 virt_addr, int access_flags, 876 struct ib_udata *udata) 877 { 878 struct mlx5_ib_dev *dev = to_mdev(pd->device); 879 struct mlx5_ib_mr *mr = NULL; 880 struct ib_umem *umem; 881 int page_shift; 882 int npages; 883 int ncont; 884 int order; 885 int err; 886 887 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 888 start, virt_addr, length, access_flags); 889 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 890 0); 891 if (IS_ERR(umem)) { 892 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 893 return (void *)umem; 894 } 895 896 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 897 if (!npages) { 898 mlx5_ib_warn(dev, "avoid zero region\n"); 899 err = -EINVAL; 900 goto error; 901 } 902 903 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 904 npages, ncont, order, page_shift); 905 906 if (use_umr(order)) { 907 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 908 order, access_flags); 909 if (PTR_ERR(mr) == -EAGAIN) { 910 mlx5_ib_dbg(dev, "cache empty for order %d", order); 911 mr = NULL; 912 } 913 } 914 915 if (!mr) 916 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 917 access_flags); 918 919 if (IS_ERR(mr)) { 920 err = PTR_ERR(mr); 921 goto error; 922 } 923 924 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 925 926 mr->umem = umem; 927 mr->npages = npages; 928 spin_lock(&dev->mr_lock); 929 dev->mdev->priv.reg_pages += npages; 930 spin_unlock(&dev->mr_lock); 931 mr->ibmr.lkey = mr->mmr.key; 932 mr->ibmr.rkey = mr->mmr.key; 933 934 return &mr->ibmr; 935 936 error: 937 ib_umem_release(umem); 938 return ERR_PTR(err); 939 } 940 941 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 942 { 943 struct umr_common *umrc = &dev->umrc; 944 struct mlx5_ib_umr_context umr_context; 945 struct ib_send_wr wr, *bad; 946 int err; 947 948 memset(&wr, 0, sizeof(wr)); 949 wr.wr_id = (u64)(unsigned long)&umr_context; 950 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); 951 952 mlx5_ib_init_umr_context(&umr_context); 953 down(&umrc->sem); 954 err = ib_post_send(umrc->qp, &wr, &bad); 955 if (err) { 956 up(&umrc->sem); 957 mlx5_ib_dbg(dev, "err %d\n", err); 958 goto error; 959 } else { 960 wait_for_completion(&umr_context.done); 961 up(&umrc->sem); 962 } 963 if (umr_context.status != IB_WC_SUCCESS) { 964 mlx5_ib_warn(dev, "unreg umr failed\n"); 965 err = -EFAULT; 966 goto error; 967 } 968 return 0; 969 970 error: 971 return err; 972 } 973 974 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 975 { 976 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 977 struct mlx5_ib_mr *mr = to_mmr(ibmr); 978 struct ib_umem *umem = mr->umem; 979 int npages = mr->npages; 980 int umred = mr->umred; 981 int err; 982 983 if (!umred) { 984 err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 985 if (err) { 986 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 987 mr->mmr.key, err); 988 return err; 989 } 990 } else { 991 err = unreg_umr(dev, mr); 992 if (err) { 993 mlx5_ib_warn(dev, "failed unregister\n"); 994 return err; 995 } 996 free_cached_mr(dev, mr); 997 } 998 999 if (umem) { 1000 ib_umem_release(umem); 1001 spin_lock(&dev->mr_lock); 1002 dev->mdev->priv.reg_pages -= npages; 1003 spin_unlock(&dev->mr_lock); 1004 } 1005 1006 if (!umred) 1007 kfree(mr); 1008 1009 return 0; 1010 } 1011 1012 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, 1013 struct ib_mr_init_attr *mr_init_attr) 1014 { 1015 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1016 struct mlx5_create_mkey_mbox_in *in; 1017 struct mlx5_ib_mr *mr; 1018 int access_mode, err; 1019 int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); 1020 1021 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1022 if (!mr) 1023 return ERR_PTR(-ENOMEM); 1024 1025 in = kzalloc(sizeof(*in), GFP_KERNEL); 1026 if (!in) { 1027 err = -ENOMEM; 1028 goto err_free; 1029 } 1030 1031 in->seg.status = 1 << 6; /* free */ 1032 in->seg.xlt_oct_size = cpu_to_be32(ndescs); 1033 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1034 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1035 access_mode = MLX5_ACCESS_MODE_MTT; 1036 1037 if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { 1038 u32 psv_index[2]; 1039 1040 in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | 1041 MLX5_MKEY_BSF_EN); 1042 in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); 1043 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1044 if (!mr->sig) { 1045 err = -ENOMEM; 1046 goto err_free_in; 1047 } 1048 1049 /* create mem & wire PSVs */ 1050 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 1051 2, psv_index); 1052 if (err) 1053 goto err_free_sig; 1054 1055 access_mode = MLX5_ACCESS_MODE_KLM; 1056 mr->sig->psv_memory.psv_idx = psv_index[0]; 1057 mr->sig->psv_wire.psv_idx = psv_index[1]; 1058 1059 mr->sig->sig_status_checked = true; 1060 mr->sig->sig_err_exists = false; 1061 /* Next UMR, Arm SIGERR */ 1062 ++mr->sig->sigerr_count; 1063 } 1064 1065 in->seg.flags = MLX5_PERM_UMR_EN | access_mode; 1066 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), 1067 NULL, NULL, NULL); 1068 if (err) 1069 goto err_destroy_psv; 1070 1071 mr->ibmr.lkey = mr->mmr.key; 1072 mr->ibmr.rkey = mr->mmr.key; 1073 mr->umem = NULL; 1074 kfree(in); 1075 1076 return &mr->ibmr; 1077 1078 err_destroy_psv: 1079 if (mr->sig) { 1080 if (mlx5_core_destroy_psv(dev->mdev, 1081 mr->sig->psv_memory.psv_idx)) 1082 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1083 mr->sig->psv_memory.psv_idx); 1084 if (mlx5_core_destroy_psv(dev->mdev, 1085 mr->sig->psv_wire.psv_idx)) 1086 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1087 mr->sig->psv_wire.psv_idx); 1088 } 1089 err_free_sig: 1090 kfree(mr->sig); 1091 err_free_in: 1092 kfree(in); 1093 err_free: 1094 kfree(mr); 1095 return ERR_PTR(err); 1096 } 1097 1098 int mlx5_ib_destroy_mr(struct ib_mr *ibmr) 1099 { 1100 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1101 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1102 int err; 1103 1104 if (mr->sig) { 1105 if (mlx5_core_destroy_psv(dev->mdev, 1106 mr->sig->psv_memory.psv_idx)) 1107 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1108 mr->sig->psv_memory.psv_idx); 1109 if (mlx5_core_destroy_psv(dev->mdev, 1110 mr->sig->psv_wire.psv_idx)) 1111 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1112 mr->sig->psv_wire.psv_idx); 1113 kfree(mr->sig); 1114 } 1115 1116 err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 1117 if (err) { 1118 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1119 mr->mmr.key, err); 1120 return err; 1121 } 1122 1123 kfree(mr); 1124 1125 return err; 1126 } 1127 1128 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, 1129 int max_page_list_len) 1130 { 1131 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1132 struct mlx5_create_mkey_mbox_in *in; 1133 struct mlx5_ib_mr *mr; 1134 int err; 1135 1136 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1137 if (!mr) 1138 return ERR_PTR(-ENOMEM); 1139 1140 in = kzalloc(sizeof(*in), GFP_KERNEL); 1141 if (!in) { 1142 err = -ENOMEM; 1143 goto err_free; 1144 } 1145 1146 in->seg.status = 1 << 6; /* free */ 1147 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); 1148 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1149 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; 1150 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1151 /* 1152 * TBD not needed - issue 197292 */ 1153 in->seg.log2_page_size = PAGE_SHIFT; 1154 1155 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL, 1156 NULL, NULL); 1157 kfree(in); 1158 if (err) 1159 goto err_free; 1160 1161 mr->ibmr.lkey = mr->mmr.key; 1162 mr->ibmr.rkey = mr->mmr.key; 1163 mr->umem = NULL; 1164 1165 return &mr->ibmr; 1166 1167 err_free: 1168 kfree(mr); 1169 return ERR_PTR(err); 1170 } 1171 1172 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 1173 int page_list_len) 1174 { 1175 struct mlx5_ib_fast_reg_page_list *mfrpl; 1176 int size = page_list_len * sizeof(u64); 1177 1178 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); 1179 if (!mfrpl) 1180 return ERR_PTR(-ENOMEM); 1181 1182 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 1183 if (!mfrpl->ibfrpl.page_list) 1184 goto err_free; 1185 1186 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, 1187 size, &mfrpl->map, 1188 GFP_KERNEL); 1189 if (!mfrpl->mapped_page_list) 1190 goto err_free; 1191 1192 WARN_ON(mfrpl->map & 0x3f); 1193 1194 return &mfrpl->ibfrpl; 1195 1196 err_free: 1197 kfree(mfrpl->ibfrpl.page_list); 1198 kfree(mfrpl); 1199 return ERR_PTR(-ENOMEM); 1200 } 1201 1202 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 1203 { 1204 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 1205 struct mlx5_ib_dev *dev = to_mdev(page_list->device); 1206 int size = page_list->max_page_list_len * sizeof(u64); 1207 1208 dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list, 1209 mfrpl->map); 1210 kfree(mfrpl->ibfrpl.page_list); 1211 kfree(mfrpl); 1212 } 1213 1214 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1215 struct ib_mr_status *mr_status) 1216 { 1217 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1218 int ret = 0; 1219 1220 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1221 pr_err("Invalid status check mask\n"); 1222 ret = -EINVAL; 1223 goto done; 1224 } 1225 1226 mr_status->fail_status = 0; 1227 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1228 if (!mmr->sig) { 1229 ret = -EINVAL; 1230 pr_err("signature status check requested on a non-signature enabled MR\n"); 1231 goto done; 1232 } 1233 1234 mmr->sig->sig_status_checked = true; 1235 if (!mmr->sig->sig_err_exists) 1236 goto done; 1237 1238 if (ibmr->lkey == mmr->sig->err_item.key) 1239 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1240 sizeof(mr_status->sig_err)); 1241 else { 1242 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1243 mr_status->sig_err.sig_err_offset = 0; 1244 mr_status->sig_err.key = mmr->sig->err_item.key; 1245 } 1246 1247 mmr->sig->sig_err_exists = false; 1248 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1249 } 1250 1251 done: 1252 return ret; 1253 } 1254