1 /* 2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include "mlx5_ib.h" 41 42 enum { 43 MAX_PENDING_REG_MR = 8, 44 }; 45 46 enum { 47 MLX5_UMR_ALIGN = 2048 48 }; 49 50 static __be64 *mr_align(__be64 *ptr, int align) 51 { 52 unsigned long mask = align - 1; 53 54 return (__be64 *)(((unsigned long)ptr + mask) & ~mask); 55 } 56 57 static int order2idx(struct mlx5_ib_dev *dev, int order) 58 { 59 struct mlx5_mr_cache *cache = &dev->cache; 60 61 if (order < cache->ent[0].order) 62 return 0; 63 else 64 return order - cache->ent[0].order; 65 } 66 67 static void reg_mr_callback(int status, void *context) 68 { 69 struct mlx5_ib_mr *mr = context; 70 struct mlx5_ib_dev *dev = mr->dev; 71 struct mlx5_mr_cache *cache = &dev->cache; 72 int c = order2idx(dev, mr->order); 73 struct mlx5_cache_ent *ent = &cache->ent[c]; 74 u8 key; 75 unsigned long flags; 76 77 spin_lock_irqsave(&ent->lock, flags); 78 ent->pending--; 79 spin_unlock_irqrestore(&ent->lock, flags); 80 if (status) { 81 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 82 kfree(mr); 83 dev->fill_delay = 1; 84 mod_timer(&dev->delay_timer, jiffies + HZ); 85 return; 86 } 87 88 if (mr->out.hdr.status) { 89 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", 90 mr->out.hdr.status, 91 be32_to_cpu(mr->out.hdr.syndrome)); 92 kfree(mr); 93 dev->fill_delay = 1; 94 mod_timer(&dev->delay_timer, jiffies + HZ); 95 return; 96 } 97 98 spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags); 99 key = dev->mdev.priv.mkey_key++; 100 spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags); 101 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 102 103 cache->last_add = jiffies; 104 105 spin_lock_irqsave(&ent->lock, flags); 106 list_add_tail(&mr->list, &ent->head); 107 ent->cur++; 108 ent->size++; 109 spin_unlock_irqrestore(&ent->lock, flags); 110 } 111 112 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 113 { 114 struct mlx5_mr_cache *cache = &dev->cache; 115 struct mlx5_cache_ent *ent = &cache->ent[c]; 116 struct mlx5_create_mkey_mbox_in *in; 117 struct mlx5_ib_mr *mr; 118 int npages = 1 << ent->order; 119 int err = 0; 120 int i; 121 122 in = kzalloc(sizeof(*in), GFP_KERNEL); 123 if (!in) 124 return -ENOMEM; 125 126 for (i = 0; i < num; i++) { 127 if (ent->pending >= MAX_PENDING_REG_MR) { 128 err = -EAGAIN; 129 break; 130 } 131 132 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 133 if (!mr) { 134 err = -ENOMEM; 135 break; 136 } 137 mr->order = ent->order; 138 mr->umred = 1; 139 mr->dev = dev; 140 in->seg.status = 1 << 6; 141 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 142 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 143 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 144 in->seg.log2_page_size = 12; 145 146 spin_lock_irq(&ent->lock); 147 ent->pending++; 148 spin_unlock_irq(&ent->lock); 149 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, 150 sizeof(*in), reg_mr_callback, 151 mr, &mr->out); 152 if (err) { 153 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 154 kfree(mr); 155 break; 156 } 157 } 158 159 kfree(in); 160 return err; 161 } 162 163 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 164 { 165 struct mlx5_mr_cache *cache = &dev->cache; 166 struct mlx5_cache_ent *ent = &cache->ent[c]; 167 struct mlx5_ib_mr *mr; 168 int err; 169 int i; 170 171 for (i = 0; i < num; i++) { 172 spin_lock_irq(&ent->lock); 173 if (list_empty(&ent->head)) { 174 spin_unlock_irq(&ent->lock); 175 return; 176 } 177 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 178 list_del(&mr->list); 179 ent->cur--; 180 ent->size--; 181 spin_unlock_irq(&ent->lock); 182 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 183 if (err) 184 mlx5_ib_warn(dev, "failed destroy mkey\n"); 185 else 186 kfree(mr); 187 } 188 } 189 190 static ssize_t size_write(struct file *filp, const char __user *buf, 191 size_t count, loff_t *pos) 192 { 193 struct mlx5_cache_ent *ent = filp->private_data; 194 struct mlx5_ib_dev *dev = ent->dev; 195 char lbuf[20]; 196 u32 var; 197 int err; 198 int c; 199 200 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 201 return -EFAULT; 202 203 c = order2idx(dev, ent->order); 204 lbuf[sizeof(lbuf) - 1] = 0; 205 206 if (sscanf(lbuf, "%u", &var) != 1) 207 return -EINVAL; 208 209 if (var < ent->limit) 210 return -EINVAL; 211 212 if (var > ent->size) { 213 do { 214 err = add_keys(dev, c, var - ent->size); 215 if (err && err != -EAGAIN) 216 return err; 217 218 usleep_range(3000, 5000); 219 } while (err); 220 } else if (var < ent->size) { 221 remove_keys(dev, c, ent->size - var); 222 } 223 224 return count; 225 } 226 227 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 228 loff_t *pos) 229 { 230 struct mlx5_cache_ent *ent = filp->private_data; 231 char lbuf[20]; 232 int err; 233 234 if (*pos) 235 return 0; 236 237 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 238 if (err < 0) 239 return err; 240 241 if (copy_to_user(buf, lbuf, err)) 242 return -EFAULT; 243 244 *pos += err; 245 246 return err; 247 } 248 249 static const struct file_operations size_fops = { 250 .owner = THIS_MODULE, 251 .open = simple_open, 252 .write = size_write, 253 .read = size_read, 254 }; 255 256 static ssize_t limit_write(struct file *filp, const char __user *buf, 257 size_t count, loff_t *pos) 258 { 259 struct mlx5_cache_ent *ent = filp->private_data; 260 struct mlx5_ib_dev *dev = ent->dev; 261 char lbuf[20]; 262 u32 var; 263 int err; 264 int c; 265 266 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 267 return -EFAULT; 268 269 c = order2idx(dev, ent->order); 270 lbuf[sizeof(lbuf) - 1] = 0; 271 272 if (sscanf(lbuf, "%u", &var) != 1) 273 return -EINVAL; 274 275 if (var > ent->size) 276 return -EINVAL; 277 278 ent->limit = var; 279 280 if (ent->cur < ent->limit) { 281 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 282 if (err) 283 return err; 284 } 285 286 return count; 287 } 288 289 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 290 loff_t *pos) 291 { 292 struct mlx5_cache_ent *ent = filp->private_data; 293 char lbuf[20]; 294 int err; 295 296 if (*pos) 297 return 0; 298 299 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 300 if (err < 0) 301 return err; 302 303 if (copy_to_user(buf, lbuf, err)) 304 return -EFAULT; 305 306 *pos += err; 307 308 return err; 309 } 310 311 static const struct file_operations limit_fops = { 312 .owner = THIS_MODULE, 313 .open = simple_open, 314 .write = limit_write, 315 .read = limit_read, 316 }; 317 318 static int someone_adding(struct mlx5_mr_cache *cache) 319 { 320 int i; 321 322 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 323 if (cache->ent[i].cur < cache->ent[i].limit) 324 return 1; 325 } 326 327 return 0; 328 } 329 330 static void __cache_work_func(struct mlx5_cache_ent *ent) 331 { 332 struct mlx5_ib_dev *dev = ent->dev; 333 struct mlx5_mr_cache *cache = &dev->cache; 334 int i = order2idx(dev, ent->order); 335 int err; 336 337 if (cache->stopped) 338 return; 339 340 ent = &dev->cache.ent[i]; 341 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 342 err = add_keys(dev, i, 1); 343 if (ent->cur < 2 * ent->limit) { 344 if (err == -EAGAIN) { 345 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 346 i + 2); 347 queue_delayed_work(cache->wq, &ent->dwork, 348 msecs_to_jiffies(3)); 349 } else if (err) { 350 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 351 i + 2, err); 352 queue_delayed_work(cache->wq, &ent->dwork, 353 msecs_to_jiffies(1000)); 354 } else { 355 queue_work(cache->wq, &ent->work); 356 } 357 } 358 } else if (ent->cur > 2 * ent->limit) { 359 if (!someone_adding(cache) && 360 time_after(jiffies, cache->last_add + 300 * HZ)) { 361 remove_keys(dev, i, 1); 362 if (ent->cur > ent->limit) 363 queue_work(cache->wq, &ent->work); 364 } else { 365 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 366 } 367 } 368 } 369 370 static void delayed_cache_work_func(struct work_struct *work) 371 { 372 struct mlx5_cache_ent *ent; 373 374 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 375 __cache_work_func(ent); 376 } 377 378 static void cache_work_func(struct work_struct *work) 379 { 380 struct mlx5_cache_ent *ent; 381 382 ent = container_of(work, struct mlx5_cache_ent, work); 383 __cache_work_func(ent); 384 } 385 386 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 387 { 388 struct mlx5_mr_cache *cache = &dev->cache; 389 struct mlx5_ib_mr *mr = NULL; 390 struct mlx5_cache_ent *ent; 391 int c; 392 int i; 393 394 c = order2idx(dev, order); 395 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 396 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 397 return NULL; 398 } 399 400 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 401 ent = &cache->ent[i]; 402 403 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 404 405 spin_lock_irq(&ent->lock); 406 if (!list_empty(&ent->head)) { 407 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 408 list); 409 list_del(&mr->list); 410 ent->cur--; 411 spin_unlock_irq(&ent->lock); 412 if (ent->cur < ent->limit) 413 queue_work(cache->wq, &ent->work); 414 break; 415 } 416 spin_unlock_irq(&ent->lock); 417 418 queue_work(cache->wq, &ent->work); 419 420 if (mr) 421 break; 422 } 423 424 if (!mr) 425 cache->ent[c].miss++; 426 427 return mr; 428 } 429 430 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 431 { 432 struct mlx5_mr_cache *cache = &dev->cache; 433 struct mlx5_cache_ent *ent; 434 int shrink = 0; 435 int c; 436 437 c = order2idx(dev, mr->order); 438 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 439 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 440 return; 441 } 442 ent = &cache->ent[c]; 443 spin_lock_irq(&ent->lock); 444 list_add_tail(&mr->list, &ent->head); 445 ent->cur++; 446 if (ent->cur > 2 * ent->limit) 447 shrink = 1; 448 spin_unlock_irq(&ent->lock); 449 450 if (shrink) 451 queue_work(cache->wq, &ent->work); 452 } 453 454 static void clean_keys(struct mlx5_ib_dev *dev, int c) 455 { 456 struct mlx5_mr_cache *cache = &dev->cache; 457 struct mlx5_cache_ent *ent = &cache->ent[c]; 458 struct mlx5_ib_mr *mr; 459 int err; 460 461 cancel_delayed_work(&ent->dwork); 462 while (1) { 463 spin_lock_irq(&ent->lock); 464 if (list_empty(&ent->head)) { 465 spin_unlock_irq(&ent->lock); 466 return; 467 } 468 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 469 list_del(&mr->list); 470 ent->cur--; 471 ent->size--; 472 spin_unlock_irq(&ent->lock); 473 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 474 if (err) 475 mlx5_ib_warn(dev, "failed destroy mkey\n"); 476 else 477 kfree(mr); 478 } 479 } 480 481 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 482 { 483 struct mlx5_mr_cache *cache = &dev->cache; 484 struct mlx5_cache_ent *ent; 485 int i; 486 487 if (!mlx5_debugfs_root) 488 return 0; 489 490 cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root); 491 if (!cache->root) 492 return -ENOMEM; 493 494 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 495 ent = &cache->ent[i]; 496 sprintf(ent->name, "%d", ent->order); 497 ent->dir = debugfs_create_dir(ent->name, cache->root); 498 if (!ent->dir) 499 return -ENOMEM; 500 501 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 502 &size_fops); 503 if (!ent->fsize) 504 return -ENOMEM; 505 506 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 507 &limit_fops); 508 if (!ent->flimit) 509 return -ENOMEM; 510 511 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 512 &ent->cur); 513 if (!ent->fcur) 514 return -ENOMEM; 515 516 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 517 &ent->miss); 518 if (!ent->fmiss) 519 return -ENOMEM; 520 } 521 522 return 0; 523 } 524 525 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 526 { 527 if (!mlx5_debugfs_root) 528 return; 529 530 debugfs_remove_recursive(dev->cache.root); 531 } 532 533 static void delay_time_func(unsigned long ctx) 534 { 535 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 536 537 dev->fill_delay = 0; 538 } 539 540 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 541 { 542 struct mlx5_mr_cache *cache = &dev->cache; 543 struct mlx5_cache_ent *ent; 544 int limit; 545 int err; 546 int i; 547 548 cache->wq = create_singlethread_workqueue("mkey_cache"); 549 if (!cache->wq) { 550 mlx5_ib_warn(dev, "failed to create work queue\n"); 551 return -ENOMEM; 552 } 553 554 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 555 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 556 INIT_LIST_HEAD(&cache->ent[i].head); 557 spin_lock_init(&cache->ent[i].lock); 558 559 ent = &cache->ent[i]; 560 INIT_LIST_HEAD(&ent->head); 561 spin_lock_init(&ent->lock); 562 ent->order = i + 2; 563 ent->dev = dev; 564 565 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) 566 limit = dev->mdev.profile->mr_cache[i].limit; 567 else 568 limit = 0; 569 570 INIT_WORK(&ent->work, cache_work_func); 571 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 572 ent->limit = limit; 573 queue_work(cache->wq, &ent->work); 574 } 575 576 err = mlx5_mr_cache_debugfs_init(dev); 577 if (err) 578 mlx5_ib_warn(dev, "cache debugfs failure\n"); 579 580 return 0; 581 } 582 583 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 584 { 585 int i; 586 587 dev->cache.stopped = 1; 588 flush_workqueue(dev->cache.wq); 589 590 mlx5_mr_cache_debugfs_cleanup(dev); 591 592 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 593 clean_keys(dev, i); 594 595 destroy_workqueue(dev->cache.wq); 596 del_timer_sync(&dev->delay_timer); 597 598 return 0; 599 } 600 601 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 602 { 603 struct mlx5_ib_dev *dev = to_mdev(pd->device); 604 struct mlx5_core_dev *mdev = &dev->mdev; 605 struct mlx5_create_mkey_mbox_in *in; 606 struct mlx5_mkey_seg *seg; 607 struct mlx5_ib_mr *mr; 608 int err; 609 610 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 611 if (!mr) 612 return ERR_PTR(-ENOMEM); 613 614 in = kzalloc(sizeof(*in), GFP_KERNEL); 615 if (!in) { 616 err = -ENOMEM; 617 goto err_free; 618 } 619 620 seg = &in->seg; 621 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 622 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 623 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 624 seg->start_addr = 0; 625 626 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 627 NULL); 628 if (err) 629 goto err_in; 630 631 kfree(in); 632 mr->ibmr.lkey = mr->mmr.key; 633 mr->ibmr.rkey = mr->mmr.key; 634 mr->umem = NULL; 635 636 return &mr->ibmr; 637 638 err_in: 639 kfree(in); 640 641 err_free: 642 kfree(mr); 643 644 return ERR_PTR(err); 645 } 646 647 static int get_octo_len(u64 addr, u64 len, int page_size) 648 { 649 u64 offset; 650 int npages; 651 652 offset = addr & (page_size - 1); 653 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 654 return (npages + 1) / 2; 655 } 656 657 static int use_umr(int order) 658 { 659 return order <= 17; 660 } 661 662 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 663 struct ib_sge *sg, u64 dma, int n, u32 key, 664 int page_shift, u64 virt_addr, u64 len, 665 int access_flags) 666 { 667 struct mlx5_ib_dev *dev = to_mdev(pd->device); 668 struct ib_mr *mr = dev->umrc.mr; 669 670 sg->addr = dma; 671 sg->length = ALIGN(sizeof(u64) * n, 64); 672 sg->lkey = mr->lkey; 673 674 wr->next = NULL; 675 wr->send_flags = 0; 676 wr->sg_list = sg; 677 if (n) 678 wr->num_sge = 1; 679 else 680 wr->num_sge = 0; 681 682 wr->opcode = MLX5_IB_WR_UMR; 683 wr->wr.fast_reg.page_list_len = n; 684 wr->wr.fast_reg.page_shift = page_shift; 685 wr->wr.fast_reg.rkey = key; 686 wr->wr.fast_reg.iova_start = virt_addr; 687 wr->wr.fast_reg.length = len; 688 wr->wr.fast_reg.access_flags = access_flags; 689 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; 690 } 691 692 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 693 struct ib_send_wr *wr, u32 key) 694 { 695 wr->send_flags = MLX5_IB_SEND_UMR_UNREG; 696 wr->opcode = MLX5_IB_WR_UMR; 697 wr->wr.fast_reg.rkey = key; 698 } 699 700 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 701 { 702 struct mlx5_ib_mr *mr; 703 struct ib_wc wc; 704 int err; 705 706 while (1) { 707 err = ib_poll_cq(cq, 1, &wc); 708 if (err < 0) { 709 pr_warn("poll cq error %d\n", err); 710 return; 711 } 712 if (err == 0) 713 break; 714 715 mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; 716 mr->status = wc.status; 717 complete(&mr->done); 718 } 719 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 720 } 721 722 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 723 u64 virt_addr, u64 len, int npages, 724 int page_shift, int order, int access_flags) 725 { 726 struct mlx5_ib_dev *dev = to_mdev(pd->device); 727 struct device *ddev = dev->ib_dev.dma_device; 728 struct umr_common *umrc = &dev->umrc; 729 struct ib_send_wr wr, *bad; 730 struct mlx5_ib_mr *mr; 731 struct ib_sge sg; 732 int size = sizeof(u64) * npages; 733 int err; 734 int i; 735 736 for (i = 0; i < 1; i++) { 737 mr = alloc_cached_mr(dev, order); 738 if (mr) 739 break; 740 741 err = add_keys(dev, order2idx(dev, order), 1); 742 if (err && err != -EAGAIN) { 743 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 744 break; 745 } 746 } 747 748 if (!mr) 749 return ERR_PTR(-EAGAIN); 750 751 mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 752 if (!mr->pas) { 753 err = -ENOMEM; 754 goto error; 755 } 756 757 mlx5_ib_populate_pas(dev, umem, page_shift, 758 mr_align(mr->pas, MLX5_UMR_ALIGN), 1); 759 760 mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, 761 DMA_TO_DEVICE); 762 if (dma_mapping_error(ddev, mr->dma)) { 763 kfree(mr->pas); 764 err = -ENOMEM; 765 goto error; 766 } 767 768 memset(&wr, 0, sizeof(wr)); 769 wr.wr_id = (u64)(unsigned long)mr; 770 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); 771 772 /* We serialize polls so one process does not kidnap another's 773 * completion. This is not a problem since wr is completed in 774 * around 1 usec 775 */ 776 down(&umrc->sem); 777 init_completion(&mr->done); 778 err = ib_post_send(umrc->qp, &wr, &bad); 779 if (err) { 780 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 781 up(&umrc->sem); 782 goto error; 783 } 784 wait_for_completion(&mr->done); 785 up(&umrc->sem); 786 787 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); 788 kfree(mr->pas); 789 790 if (mr->status != IB_WC_SUCCESS) { 791 mlx5_ib_warn(dev, "reg umr failed\n"); 792 err = -EFAULT; 793 goto error; 794 } 795 796 return mr; 797 798 error: 799 free_cached_mr(dev, mr); 800 return ERR_PTR(err); 801 } 802 803 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 804 u64 length, struct ib_umem *umem, 805 int npages, int page_shift, 806 int access_flags) 807 { 808 struct mlx5_ib_dev *dev = to_mdev(pd->device); 809 struct mlx5_create_mkey_mbox_in *in; 810 struct mlx5_ib_mr *mr; 811 int inlen; 812 int err; 813 814 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 815 if (!mr) 816 return ERR_PTR(-ENOMEM); 817 818 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 819 in = mlx5_vzalloc(inlen); 820 if (!in) { 821 err = -ENOMEM; 822 goto err_1; 823 } 824 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); 825 826 in->seg.flags = convert_access(access_flags) | 827 MLX5_ACCESS_MODE_MTT; 828 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 829 in->seg.start_addr = cpu_to_be64(virt_addr); 830 in->seg.len = cpu_to_be64(length); 831 in->seg.bsfs_octo_size = 0; 832 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 833 in->seg.log2_page_size = page_shift; 834 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 835 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 836 1 << page_shift)); 837 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL, 838 NULL, NULL); 839 if (err) { 840 mlx5_ib_warn(dev, "create mkey failed\n"); 841 goto err_2; 842 } 843 mr->umem = umem; 844 mlx5_vfree(in); 845 846 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 847 848 return mr; 849 850 err_2: 851 mlx5_vfree(in); 852 853 err_1: 854 kfree(mr); 855 856 return ERR_PTR(err); 857 } 858 859 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 860 u64 virt_addr, int access_flags, 861 struct ib_udata *udata) 862 { 863 struct mlx5_ib_dev *dev = to_mdev(pd->device); 864 struct mlx5_ib_mr *mr = NULL; 865 struct ib_umem *umem; 866 int page_shift; 867 int npages; 868 int ncont; 869 int order; 870 int err; 871 872 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", 873 start, virt_addr, length); 874 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 875 0); 876 if (IS_ERR(umem)) { 877 mlx5_ib_dbg(dev, "umem get failed\n"); 878 return (void *)umem; 879 } 880 881 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 882 if (!npages) { 883 mlx5_ib_warn(dev, "avoid zero region\n"); 884 err = -EINVAL; 885 goto error; 886 } 887 888 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 889 npages, ncont, order, page_shift); 890 891 if (use_umr(order)) { 892 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 893 order, access_flags); 894 if (PTR_ERR(mr) == -EAGAIN) { 895 mlx5_ib_dbg(dev, "cache empty for order %d", order); 896 mr = NULL; 897 } 898 } 899 900 if (!mr) 901 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 902 access_flags); 903 904 if (IS_ERR(mr)) { 905 err = PTR_ERR(mr); 906 goto error; 907 } 908 909 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 910 911 mr->umem = umem; 912 mr->npages = npages; 913 spin_lock(&dev->mr_lock); 914 dev->mdev.priv.reg_pages += npages; 915 spin_unlock(&dev->mr_lock); 916 mr->ibmr.lkey = mr->mmr.key; 917 mr->ibmr.rkey = mr->mmr.key; 918 919 return &mr->ibmr; 920 921 error: 922 ib_umem_release(umem); 923 return ERR_PTR(err); 924 } 925 926 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 927 { 928 struct umr_common *umrc = &dev->umrc; 929 struct ib_send_wr wr, *bad; 930 int err; 931 932 memset(&wr, 0, sizeof(wr)); 933 wr.wr_id = (u64)(unsigned long)mr; 934 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); 935 936 down(&umrc->sem); 937 init_completion(&mr->done); 938 err = ib_post_send(umrc->qp, &wr, &bad); 939 if (err) { 940 up(&umrc->sem); 941 mlx5_ib_dbg(dev, "err %d\n", err); 942 goto error; 943 } 944 wait_for_completion(&mr->done); 945 up(&umrc->sem); 946 if (mr->status != IB_WC_SUCCESS) { 947 mlx5_ib_warn(dev, "unreg umr failed\n"); 948 err = -EFAULT; 949 goto error; 950 } 951 return 0; 952 953 error: 954 return err; 955 } 956 957 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 958 { 959 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 960 struct mlx5_ib_mr *mr = to_mmr(ibmr); 961 struct ib_umem *umem = mr->umem; 962 int npages = mr->npages; 963 int umred = mr->umred; 964 int err; 965 966 if (!umred) { 967 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 968 if (err) { 969 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 970 mr->mmr.key, err); 971 return err; 972 } 973 } else { 974 err = unreg_umr(dev, mr); 975 if (err) { 976 mlx5_ib_warn(dev, "failed unregister\n"); 977 return err; 978 } 979 free_cached_mr(dev, mr); 980 } 981 982 if (umem) { 983 ib_umem_release(umem); 984 spin_lock(&dev->mr_lock); 985 dev->mdev.priv.reg_pages -= npages; 986 spin_unlock(&dev->mr_lock); 987 } 988 989 if (!umred) 990 kfree(mr); 991 992 return 0; 993 } 994 995 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, 996 struct ib_mr_init_attr *mr_init_attr) 997 { 998 struct mlx5_ib_dev *dev = to_mdev(pd->device); 999 struct mlx5_create_mkey_mbox_in *in; 1000 struct mlx5_ib_mr *mr; 1001 int access_mode, err; 1002 int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); 1003 1004 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1005 if (!mr) 1006 return ERR_PTR(-ENOMEM); 1007 1008 in = kzalloc(sizeof(*in), GFP_KERNEL); 1009 if (!in) { 1010 err = -ENOMEM; 1011 goto err_free; 1012 } 1013 1014 in->seg.status = 1 << 6; /* free */ 1015 in->seg.xlt_oct_size = cpu_to_be32(ndescs); 1016 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1017 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1018 access_mode = MLX5_ACCESS_MODE_MTT; 1019 1020 if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { 1021 u32 psv_index[2]; 1022 1023 in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | 1024 MLX5_MKEY_BSF_EN); 1025 in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); 1026 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1027 if (!mr->sig) { 1028 err = -ENOMEM; 1029 goto err_free_in; 1030 } 1031 1032 /* create mem & wire PSVs */ 1033 err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn, 1034 2, psv_index); 1035 if (err) 1036 goto err_free_sig; 1037 1038 access_mode = MLX5_ACCESS_MODE_KLM; 1039 mr->sig->psv_memory.psv_idx = psv_index[0]; 1040 mr->sig->psv_wire.psv_idx = psv_index[1]; 1041 1042 mr->sig->sig_status_checked = true; 1043 mr->sig->sig_err_exists = false; 1044 /* Next UMR, Arm SIGERR */ 1045 ++mr->sig->sigerr_count; 1046 } 1047 1048 in->seg.flags = MLX5_PERM_UMR_EN | access_mode; 1049 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), 1050 NULL, NULL, NULL); 1051 if (err) 1052 goto err_destroy_psv; 1053 1054 mr->ibmr.lkey = mr->mmr.key; 1055 mr->ibmr.rkey = mr->mmr.key; 1056 mr->umem = NULL; 1057 kfree(in); 1058 1059 return &mr->ibmr; 1060 1061 err_destroy_psv: 1062 if (mr->sig) { 1063 if (mlx5_core_destroy_psv(&dev->mdev, 1064 mr->sig->psv_memory.psv_idx)) 1065 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1066 mr->sig->psv_memory.psv_idx); 1067 if (mlx5_core_destroy_psv(&dev->mdev, 1068 mr->sig->psv_wire.psv_idx)) 1069 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1070 mr->sig->psv_wire.psv_idx); 1071 } 1072 err_free_sig: 1073 kfree(mr->sig); 1074 err_free_in: 1075 kfree(in); 1076 err_free: 1077 kfree(mr); 1078 return ERR_PTR(err); 1079 } 1080 1081 int mlx5_ib_destroy_mr(struct ib_mr *ibmr) 1082 { 1083 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1084 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1085 int err; 1086 1087 if (mr->sig) { 1088 if (mlx5_core_destroy_psv(&dev->mdev, 1089 mr->sig->psv_memory.psv_idx)) 1090 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1091 mr->sig->psv_memory.psv_idx); 1092 if (mlx5_core_destroy_psv(&dev->mdev, 1093 mr->sig->psv_wire.psv_idx)) 1094 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1095 mr->sig->psv_wire.psv_idx); 1096 kfree(mr->sig); 1097 } 1098 1099 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 1100 if (err) { 1101 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1102 mr->mmr.key, err); 1103 return err; 1104 } 1105 1106 kfree(mr); 1107 1108 return err; 1109 } 1110 1111 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, 1112 int max_page_list_len) 1113 { 1114 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1115 struct mlx5_create_mkey_mbox_in *in; 1116 struct mlx5_ib_mr *mr; 1117 int err; 1118 1119 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1120 if (!mr) 1121 return ERR_PTR(-ENOMEM); 1122 1123 in = kzalloc(sizeof(*in), GFP_KERNEL); 1124 if (!in) { 1125 err = -ENOMEM; 1126 goto err_free; 1127 } 1128 1129 in->seg.status = 1 << 6; /* free */ 1130 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); 1131 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1132 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; 1133 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1134 /* 1135 * TBD not needed - issue 197292 */ 1136 in->seg.log2_page_size = PAGE_SHIFT; 1137 1138 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL, 1139 NULL, NULL); 1140 kfree(in); 1141 if (err) 1142 goto err_free; 1143 1144 mr->ibmr.lkey = mr->mmr.key; 1145 mr->ibmr.rkey = mr->mmr.key; 1146 mr->umem = NULL; 1147 1148 return &mr->ibmr; 1149 1150 err_free: 1151 kfree(mr); 1152 return ERR_PTR(err); 1153 } 1154 1155 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 1156 int page_list_len) 1157 { 1158 struct mlx5_ib_fast_reg_page_list *mfrpl; 1159 int size = page_list_len * sizeof(u64); 1160 1161 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); 1162 if (!mfrpl) 1163 return ERR_PTR(-ENOMEM); 1164 1165 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 1166 if (!mfrpl->ibfrpl.page_list) 1167 goto err_free; 1168 1169 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, 1170 size, &mfrpl->map, 1171 GFP_KERNEL); 1172 if (!mfrpl->mapped_page_list) 1173 goto err_free; 1174 1175 WARN_ON(mfrpl->map & 0x3f); 1176 1177 return &mfrpl->ibfrpl; 1178 1179 err_free: 1180 kfree(mfrpl->ibfrpl.page_list); 1181 kfree(mfrpl); 1182 return ERR_PTR(-ENOMEM); 1183 } 1184 1185 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 1186 { 1187 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 1188 struct mlx5_ib_dev *dev = to_mdev(page_list->device); 1189 int size = page_list->max_page_list_len * sizeof(u64); 1190 1191 dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list, 1192 mfrpl->map); 1193 kfree(mfrpl->ibfrpl.page_list); 1194 kfree(mfrpl); 1195 } 1196 1197 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1198 struct ib_mr_status *mr_status) 1199 { 1200 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1201 int ret = 0; 1202 1203 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1204 pr_err("Invalid status check mask\n"); 1205 ret = -EINVAL; 1206 goto done; 1207 } 1208 1209 mr_status->fail_status = 0; 1210 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1211 if (!mmr->sig) { 1212 ret = -EINVAL; 1213 pr_err("signature status check requested on a non-signature enabled MR\n"); 1214 goto done; 1215 } 1216 1217 mmr->sig->sig_status_checked = true; 1218 if (!mmr->sig->sig_err_exists) 1219 goto done; 1220 1221 if (ibmr->lkey == mmr->sig->err_item.key) 1222 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1223 sizeof(mr_status->sig_err)); 1224 else { 1225 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1226 mr_status->sig_err.sig_err_offset = 0; 1227 mr_status->sig_err.key = mmr->sig->err_item.key; 1228 } 1229 1230 mmr->sig->sig_err_exists = false; 1231 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1232 } 1233 1234 done: 1235 return ret; 1236 } 1237