1 /*- 2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 29 #include <linux/kref.h> 30 #include <linux/random.h> 31 #include <linux/fs.h> 32 #include <linux/delay.h> 33 #include <rdma/ib_umem.h> 34 #include "mlx5_ib.h" 35 36 CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE); 37 38 enum { 39 MAX_PENDING_REG_MR = 8, 40 MAX_MR_RELEASE_TIMEOUT = (60 * 20) /* Allow release timeout up to 20 min */ 41 }; 42 43 #define MLX5_UMR_ALIGN 2048 44 45 static int mlx5_mr_sysfs_init(struct mlx5_ib_dev *dev); 46 static void mlx5_mr_sysfs_cleanup(struct mlx5_ib_dev *dev); 47 48 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 49 { 50 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 51 52 return err; 53 } 54 55 static int order2idx(struct mlx5_ib_dev *dev, int order) 56 { 57 struct mlx5_mr_cache *cache = &dev->cache; 58 59 if (order < cache->ent[0].order) 60 return 0; 61 else 62 return order - cache->ent[0].order; 63 } 64 65 static void reg_mr_callback(int status, void *context) 66 { 67 struct mlx5_ib_mr *mr = context; 68 struct mlx5_ib_dev *dev = mr->dev; 69 struct mlx5_mr_cache *cache = &dev->cache; 70 int c = order2idx(dev, mr->order); 71 struct mlx5_cache_ent *ent = &cache->ent[c]; 72 struct mlx5_core_dev *mdev = dev->mdev; 73 struct mlx5_core_mr *mmr = &mr->mmr; 74 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 75 unsigned long flags; 76 int err; 77 u8 key; 78 79 spin_lock_irqsave(&ent->lock, flags); 80 ent->pending--; 81 spin_unlock_irqrestore(&ent->lock, flags); 82 if (status) { 83 mlx5_ib_warn(dev, "async reg mr failed. status %d, order %d\n", status, ent->order); 84 kfree(mr); 85 dev->fill_delay = 1; 86 mod_timer(&dev->delay_timer, jiffies + HZ); 87 return; 88 } 89 90 if (mr->out.hdr.status) { 91 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", 92 mr->out.hdr.status, 93 be32_to_cpu(mr->out.hdr.syndrome)); 94 kfree(mr); 95 dev->fill_delay = 1; 96 mod_timer(&dev->delay_timer, jiffies + HZ); 97 return; 98 } 99 100 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 101 key = dev->mdev->priv.mkey_key++; 102 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 103 mmr->key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 104 mlx5_ib_dbg(dev, "callbacked mkey 0x%x created\n", 105 be32_to_cpu(mr->out.mkey)); 106 107 cache->last_add = jiffies; 108 109 spin_lock_irqsave(&ent->lock, flags); 110 list_add_tail(&mr->list, &ent->head); 111 ent->cur++; 112 ent->size++; 113 spin_unlock_irqrestore(&ent->lock, flags); 114 115 spin_lock_irqsave(&table->lock, flags); 116 err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mmr->key), mmr); 117 spin_unlock_irqrestore(&table->lock, flags); 118 if (err) { 119 mlx5_ib_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", 120 mmr->key, err); 121 mlx5_core_destroy_mkey(mdev, mmr); 122 } 123 } 124 125 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 126 { 127 struct mlx5_mr_cache *cache = &dev->cache; 128 struct mlx5_cache_ent *ent = &cache->ent[c]; 129 struct mlx5_create_mkey_mbox_in *in; 130 struct mlx5_ib_mr *mr; 131 int npages = 1 << ent->order; 132 int err = 0; 133 int i; 134 135 in = kzalloc(sizeof(*in), GFP_KERNEL); 136 if (!in) 137 return -ENOMEM; 138 139 for (i = 0; i < num; i++) { 140 if (ent->pending >= MAX_PENDING_REG_MR) { 141 err = -EAGAIN; 142 break; 143 } 144 145 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 146 if (!mr) { 147 err = -ENOMEM; 148 break; 149 } 150 mr->order = ent->order; 151 mr->umred = 1; 152 mr->dev = dev; 153 in->seg.status = MLX5_MKEY_STATUS_FREE; 154 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 155 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 156 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 157 in->seg.log2_page_size = 12; 158 159 spin_lock_irq(&ent->lock); 160 ent->pending++; 161 spin_unlock_irq(&ent->lock); 162 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, 163 sizeof(*in), reg_mr_callback, 164 mr, &mr->out); 165 if (err) { 166 spin_lock_irq(&ent->lock); 167 ent->pending--; 168 spin_unlock_irq(&ent->lock); 169 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 170 kfree(mr); 171 break; 172 } 173 } 174 175 kfree(in); 176 return err; 177 } 178 179 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 180 { 181 struct mlx5_mr_cache *cache = &dev->cache; 182 struct mlx5_cache_ent *ent = &cache->ent[c]; 183 struct mlx5_ib_mr *mr; 184 int err; 185 int i; 186 187 for (i = 0; i < num; i++) { 188 spin_lock_irq(&ent->lock); 189 if (list_empty(&ent->head)) { 190 spin_unlock_irq(&ent->lock); 191 return; 192 } 193 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 194 list_del(&mr->list); 195 ent->cur--; 196 ent->size--; 197 spin_unlock_irq(&ent->lock); 198 err = destroy_mkey(dev, mr); 199 if (err) 200 mlx5_ib_warn(dev, "failed destroy mkey\n"); 201 else 202 kfree(mr); 203 } 204 } 205 206 static int someone_adding(struct mlx5_mr_cache *cache) 207 { 208 int i; 209 210 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 211 if (cache->ent[i].cur < cache->ent[i].limit) 212 return 1; 213 } 214 215 return 0; 216 } 217 218 static int someone_releasing(struct mlx5_mr_cache *cache) 219 { 220 int i; 221 222 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 223 if (cache->ent[i].cur > 2 * cache->ent[i].limit) 224 return 1; 225 } 226 227 return 0; 228 } 229 230 static void __cache_work_func(struct mlx5_cache_ent *ent) 231 { 232 struct mlx5_ib_dev *dev = ent->dev; 233 struct mlx5_mr_cache *cache = &dev->cache; 234 int i = order2idx(dev, ent->order); 235 int err; 236 s64 dtime; 237 238 if (cache->stopped) 239 return; 240 241 ent = &dev->cache.ent[i]; 242 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 243 err = add_keys(dev, i, 1); 244 if (ent->cur < 2 * ent->limit) { 245 if (err == -EAGAIN) { 246 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 247 i + 2); 248 cancel_delayed_work(&ent->dwork); 249 if (!queue_delayed_work(cache->wq, &ent->dwork, 250 msecs_to_jiffies(3))) 251 mlx5_ib_warn(dev, "failed queueing delayed work\n"); 252 } else if (err) { 253 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 254 i + 2, err); 255 cancel_delayed_work(&ent->dwork); 256 if (!queue_delayed_work(cache->wq, &ent->dwork, 257 msecs_to_jiffies(1000))) 258 mlx5_ib_warn(dev, "failed queueing delayed work\n"); 259 } else { 260 if (!queue_work(cache->wq, &ent->work)) 261 mlx5_ib_warn(dev, "failed queueing work\n"); 262 } 263 } 264 } else if (ent->cur > 2 * ent->limit) { 265 dtime = (cache->last_add + (s64)cache->rel_timeout * HZ) - jiffies; 266 if (cache->rel_imm || 267 (cache->rel_timeout >= 0 && !someone_adding(cache) && dtime <= 0)) { 268 remove_keys(dev, i, 1); 269 if (ent->cur > ent->limit) 270 if (!queue_work(cache->wq, &ent->work)) 271 mlx5_ib_warn(dev, "failed queueing work\n"); 272 } else if (cache->rel_timeout >= 0) { 273 dtime = max_t(s64, dtime, 0); 274 dtime = min_t(s64, dtime, (MAX_MR_RELEASE_TIMEOUT * HZ)); 275 cancel_delayed_work(&ent->dwork); 276 if (!queue_delayed_work(cache->wq, &ent->dwork, dtime)) 277 mlx5_ib_warn(dev, "failed queueing delayed work\n"); 278 } 279 } else if (cache->rel_imm && !someone_releasing(cache)) { 280 cache->rel_imm = 0; 281 } 282 } 283 284 static void delayed_cache_work_func(struct work_struct *work) 285 { 286 struct mlx5_cache_ent *ent; 287 288 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 289 __cache_work_func(ent); 290 } 291 292 static void cache_work_func(struct work_struct *work) 293 { 294 struct mlx5_cache_ent *ent; 295 296 ent = container_of(work, struct mlx5_cache_ent, work); 297 __cache_work_func(ent); 298 } 299 300 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 301 { 302 struct mlx5_mr_cache *cache = &dev->cache; 303 struct mlx5_cache_ent *ent; 304 int shrink = 0; 305 int c; 306 307 c = order2idx(dev, mr->order); 308 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 309 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 310 return; 311 } 312 ent = &cache->ent[c]; 313 spin_lock_irq(&ent->lock); 314 list_add_tail(&mr->list, &ent->head); 315 ent->cur++; 316 if (ent->cur > 2 * ent->limit) 317 shrink = 1; 318 spin_unlock_irq(&ent->lock); 319 320 if (shrink) 321 if (!queue_work(cache->wq, &ent->work)) 322 mlx5_ib_warn(dev, "failed queueing work\n"); 323 } 324 325 static void clean_keys(struct mlx5_ib_dev *dev, int c) 326 { 327 struct mlx5_mr_cache *cache = &dev->cache; 328 struct mlx5_cache_ent *ent = &cache->ent[c]; 329 struct mlx5_ib_mr *mr; 330 int err; 331 332 cancel_delayed_work(&ent->dwork); 333 while (1) { 334 spin_lock_irq(&ent->lock); 335 if (list_empty(&ent->head)) { 336 spin_unlock_irq(&ent->lock); 337 return; 338 } 339 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 340 list_del(&mr->list); 341 ent->cur--; 342 ent->size--; 343 spin_unlock_irq(&ent->lock); 344 err = destroy_mkey(dev, mr); 345 if (err) 346 mlx5_ib_warn(dev, "failed destroy mkey 0x%x from order %d\n", 347 mr->mmr.key, ent->order); 348 else 349 kfree(mr); 350 } 351 } 352 353 static void delay_time_func(unsigned long ctx) 354 { 355 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 356 357 dev->fill_delay = 0; 358 } 359 360 enum { 361 MLX5_VF_MR_LIMIT = 2, 362 }; 363 364 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 365 { 366 struct mlx5_mr_cache *cache = &dev->cache; 367 struct mlx5_cache_ent *ent; 368 int limit; 369 int err; 370 int i; 371 372 mutex_init(&dev->slow_path_mutex); 373 cache->rel_timeout = 300; 374 cache->wq = create_singlethread_workqueue("mkey_cache"); 375 if (!cache->wq) { 376 mlx5_ib_warn(dev, "failed to create work queue\n"); 377 return -ENOMEM; 378 } 379 380 setup_timer(&dev->delay_timer, delay_time_func, (uintptr_t)dev); 381 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 382 INIT_LIST_HEAD(&cache->ent[i].head); 383 spin_lock_init(&cache->ent[i].lock); 384 385 ent = &cache->ent[i]; 386 INIT_LIST_HEAD(&ent->head); 387 spin_lock_init(&ent->lock); 388 ent->order = i + 2; 389 ent->dev = dev; 390 391 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) { 392 if (mlx5_core_is_pf(dev->mdev)) 393 limit = dev->mdev->profile->mr_cache[i].limit; 394 else 395 limit = MLX5_VF_MR_LIMIT; 396 } else { 397 limit = 0; 398 } 399 400 INIT_WORK(&ent->work, cache_work_func); 401 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 402 ent->limit = limit; 403 if (!queue_work(cache->wq, &ent->work)) 404 mlx5_ib_warn(dev, "failed queueing work\n"); 405 } 406 407 err = mlx5_mr_sysfs_init(dev); 408 if (err) 409 mlx5_ib_warn(dev, "failed to init mr cache sysfs\n"); 410 411 return 0; 412 } 413 414 static void wait_for_async_commands(struct mlx5_ib_dev *dev) 415 { 416 struct mlx5_mr_cache *cache = &dev->cache; 417 struct mlx5_cache_ent *ent; 418 int total = 0; 419 int i; 420 int j; 421 422 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 423 ent = &cache->ent[i]; 424 for (j = 0 ; j < 1000; j++) { 425 if (!ent->pending) 426 break; 427 msleep(50); 428 } 429 } 430 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 431 ent = &cache->ent[i]; 432 total += ent->pending; 433 } 434 435 if (total) 436 mlx5_ib_dbg(dev, "aborted, %d pending requests\n", total); 437 else 438 mlx5_ib_dbg(dev, "done with all pending requests\n"); 439 } 440 441 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 442 { 443 int i; 444 445 dev->cache.stopped = 1; 446 flush_workqueue(dev->cache.wq); 447 mlx5_mr_sysfs_cleanup(dev); 448 449 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 450 clean_keys(dev, i); 451 452 destroy_workqueue(dev->cache.wq); 453 wait_for_async_commands(dev); 454 del_timer_sync(&dev->delay_timer); 455 return 0; 456 } 457 458 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 459 { 460 struct mlx5_ib_dev *dev = to_mdev(pd->device); 461 struct mlx5_core_dev *mdev = dev->mdev; 462 struct mlx5_create_mkey_mbox_in *in; 463 struct mlx5_mkey_seg *seg; 464 struct mlx5_ib_mr *mr; 465 int err; 466 467 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 468 if (!mr) 469 return ERR_PTR(-ENOMEM); 470 471 in = kzalloc(sizeof(*in), GFP_KERNEL); 472 if (!in) { 473 err = -ENOMEM; 474 goto err_free; 475 } 476 477 seg = &in->seg; 478 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 479 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 480 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 481 seg->start_addr = 0; 482 483 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 484 NULL); 485 if (err) 486 goto err_in; 487 488 kfree(in); 489 mr->ibmr.lkey = mr->mmr.key; 490 mr->ibmr.rkey = mr->mmr.key; 491 mr->umem = NULL; 492 493 return &mr->ibmr; 494 495 err_in: 496 kfree(in); 497 498 err_free: 499 kfree(mr); 500 501 return ERR_PTR(err); 502 } 503 504 static int get_octo_len(u64 addr, u64 len, u64 page_size) 505 { 506 u64 offset; 507 int npages; 508 509 offset = addr & (page_size - 1ULL); 510 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 511 return (npages + 1) / 2; 512 } 513 514 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 515 { 516 struct mlx5_ib_umr_context *context; 517 struct ib_wc wc; 518 int err; 519 520 while (1) { 521 err = ib_poll_cq(cq, 1, &wc); 522 if (err < 0) { 523 printf("mlx5_ib: WARN: ""poll cq error %d\n", err); 524 return; 525 } 526 if (err == 0) 527 break; 528 529 context = (struct mlx5_ib_umr_context *)(uintptr_t)wc.wr_id; 530 context->status = wc.status; 531 complete(&context->done); 532 } 533 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 534 } 535 536 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 537 u64 length, struct ib_umem *umem, 538 int npages, int page_shift, 539 int access_flags) 540 { 541 struct mlx5_ib_dev *dev = to_mdev(pd->device); 542 struct mlx5_create_mkey_mbox_in *in; 543 struct mlx5_ib_mr *mr; 544 int inlen; 545 int err; 546 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 547 548 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 549 if (!mr) 550 return ERR_PTR(-ENOMEM); 551 552 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 553 in = mlx5_vzalloc(inlen); 554 if (!in) { 555 err = -ENOMEM; 556 goto err_1; 557 } 558 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 559 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 560 561 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags 562 * in the page list submitted with the command. */ 563 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; 564 in->seg.flags = convert_access(access_flags) | 565 MLX5_ACCESS_MODE_MTT; 566 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 567 in->seg.start_addr = cpu_to_be64(virt_addr); 568 in->seg.len = cpu_to_be64(length); 569 in->seg.bsfs_octo_size = 0; 570 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 571 in->seg.log2_page_size = page_shift; 572 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 573 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 574 1 << page_shift)); 575 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, 576 NULL, NULL); 577 if (err) { 578 mlx5_ib_warn(dev, "create mkey failed\n"); 579 goto err_2; 580 } 581 mr->umem = umem; 582 mr->dev = dev; 583 kvfree(in); 584 585 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 586 587 return mr; 588 589 err_2: 590 kvfree(in); 591 592 err_1: 593 kfree(mr); 594 595 return ERR_PTR(err); 596 } 597 598 enum { 599 MLX5_MAX_REG_ORDER = MAX_MR_CACHE_ENTRIES + 1, 600 MLX5_MAX_REG_SIZE = 2ul * 1024 * 1024 * 1024, 601 }; 602 603 static int clean_mr(struct mlx5_ib_mr *mr) 604 { 605 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 606 int umred = mr->umred; 607 int err; 608 int i; 609 610 if (!umred) { 611 for (i = 0; i < mr->nchild; ++i) { 612 free_cached_mr(dev, mr->children[i]); 613 } 614 kfree(mr->children); 615 616 err = destroy_mkey(dev, mr); 617 if (err) { 618 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 619 mr->mmr.key, err); 620 return err; 621 } 622 } 623 return 0; 624 } 625 626 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 627 u64 virt_addr, int access_flags, 628 struct ib_udata *udata, int mr_id) 629 { 630 struct mlx5_ib_dev *dev = to_mdev(pd->device); 631 struct mlx5_ib_mr *mr = NULL; 632 struct ib_umem *umem; 633 int page_shift; 634 int npages; 635 int ncont; 636 int order; 637 int err; 638 639 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 640 (unsigned long long)start, (unsigned long long)virt_addr, 641 (unsigned long long)length, access_flags); 642 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); 643 if (IS_ERR(umem)) { 644 mlx5_ib_warn(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 645 return (void *)umem; 646 } 647 648 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 649 if (!npages) { 650 mlx5_ib_warn(dev, "avoid zero region\n"); 651 err = -EINVAL; 652 goto error; 653 } 654 655 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 656 npages, ncont, order, page_shift); 657 658 mutex_lock(&dev->slow_path_mutex); 659 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, access_flags); 660 mutex_unlock(&dev->slow_path_mutex); 661 662 if (IS_ERR(mr)) { 663 err = PTR_ERR(mr); 664 mr = NULL; 665 goto error; 666 } 667 668 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 669 670 mr->umem = umem; 671 mr->npages = npages; 672 atomic_add(npages, &dev->mdev->priv.reg_pages); 673 mr->ibmr.lkey = mr->mmr.key; 674 mr->ibmr.rkey = mr->mmr.key; 675 676 return &mr->ibmr; 677 678 error: 679 /* 680 * Destroy the umem *before* destroying the MR, to ensure we 681 * will not have any in-flight notifiers when destroying the 682 * MR. 683 * 684 * As the MR is completely invalid to begin with, and this 685 * error path is only taken if we can't push the mr entry into 686 * the pagefault tree, this is safe. 687 */ 688 689 ib_umem_release(umem); 690 return ERR_PTR(err); 691 } 692 693 CTASSERT(sizeof(((struct ib_phys_buf *)0)->size) == 8); 694 695 struct ib_mr * 696 mlx5_ib_reg_phys_mr(struct ib_pd *pd, 697 struct ib_phys_buf *buffer_list, 698 int num_phys_buf, 699 int access_flags, 700 u64 *virt_addr) 701 { 702 struct mlx5_ib_dev *dev = to_mdev(pd->device); 703 struct mlx5_create_mkey_mbox_in *in; 704 struct mlx5_ib_mr *mr; 705 u64 total_size; 706 u32 octo_len; 707 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 708 unsigned long mask; 709 int shift; 710 int npages; 711 int inlen; 712 int err; 713 int i, j, n; 714 715 mask = buffer_list[0].addr ^ *virt_addr; 716 total_size = 0; 717 for (i = 0; i < num_phys_buf; ++i) { 718 if (i != 0) 719 mask |= buffer_list[i].addr; 720 if (i != num_phys_buf - 1) 721 mask |= buffer_list[i].addr + buffer_list[i].size; 722 723 total_size += buffer_list[i].size; 724 } 725 726 if (mask & ~PAGE_MASK) 727 return ERR_PTR(-EINVAL); 728 729 shift = __ffs(mask | 1 << 31); 730 731 buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); 732 buffer_list[0].addr &= ~0ULL << shift; 733 734 npages = 0; 735 for (i = 0; i < num_phys_buf; ++i) 736 npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift; 737 738 if (!npages) { 739 mlx5_ib_warn(dev, "avoid zero region\n"); 740 return ERR_PTR(-EINVAL); 741 } 742 743 mr = kzalloc(sizeof *mr, GFP_KERNEL); 744 if (!mr) 745 return ERR_PTR(-ENOMEM); 746 747 octo_len = get_octo_len(*virt_addr, total_size, 1ULL << shift); 748 octo_len = ALIGN(octo_len, 4); 749 750 inlen = sizeof(*in) + (octo_len * 16); 751 in = mlx5_vzalloc(inlen); 752 if (!in) { 753 kfree(mr); 754 return ERR_PTR(-ENOMEM); 755 } 756 757 n = 0; 758 for (i = 0; i < num_phys_buf; ++i) { 759 for (j = 0; 760 j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift; 761 ++j) { 762 u64 temp = buffer_list[i].addr + ((u64) j << shift); 763 if (pg_cap) 764 temp |= MLX5_IB_MTT_PRESENT; 765 in->pas[n++] = cpu_to_be64(temp); 766 } 767 } 768 769 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags 770 * in the page list submitted with the command. */ 771 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; 772 in->seg.flags = convert_access(access_flags) | 773 MLX5_ACCESS_MODE_MTT; 774 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 775 in->seg.start_addr = cpu_to_be64(*virt_addr); 776 in->seg.len = cpu_to_be64(total_size); 777 in->seg.bsfs_octo_size = 0; 778 in->seg.xlt_oct_size = cpu_to_be32(octo_len); 779 in->seg.log2_page_size = shift; 780 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 781 in->xlat_oct_act_size = cpu_to_be32(octo_len); 782 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, 783 NULL, NULL); 784 mr->umem = NULL; 785 mr->dev = dev; 786 mr->npages = npages; 787 mr->ibmr.lkey = mr->mmr.key; 788 mr->ibmr.rkey = mr->mmr.key; 789 790 kvfree(in); 791 792 if (err) { 793 kfree(mr); 794 return ERR_PTR(err); 795 } 796 return &mr->ibmr; 797 } 798 799 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 800 { 801 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 802 struct mlx5_ib_mr *mr = to_mmr(ibmr); 803 struct ib_umem *umem = mr->umem; 804 int npages = mr->npages; 805 int umred = mr->umred; 806 int err; 807 808 err = clean_mr(mr); 809 if (err) 810 return err; 811 812 if (umem) { 813 ib_umem_release(umem); 814 atomic_sub(npages, &dev->mdev->priv.reg_pages); 815 } 816 817 if (umred) 818 free_cached_mr(dev, mr); 819 else 820 kfree(mr); 821 822 return 0; 823 } 824 825 int mlx5_ib_destroy_mr(struct ib_mr *ibmr) 826 { 827 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 828 struct mlx5_ib_mr *mr = to_mmr(ibmr); 829 int err; 830 831 if (mr->sig) { 832 if (mlx5_core_destroy_psv(dev->mdev, 833 mr->sig->psv_memory.psv_idx)) 834 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 835 mr->sig->psv_memory.psv_idx); 836 if (mlx5_core_destroy_psv(dev->mdev, 837 mr->sig->psv_wire.psv_idx)) 838 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 839 mr->sig->psv_wire.psv_idx); 840 kfree(mr->sig); 841 } 842 843 err = destroy_mkey(dev, mr); 844 if (err) { 845 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 846 mr->mmr.key, err); 847 return err; 848 } 849 850 kfree(mr); 851 852 return err; 853 } 854 855 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, 856 int max_page_list_len) 857 { 858 struct mlx5_ib_dev *dev = to_mdev(pd->device); 859 struct mlx5_create_mkey_mbox_in *in; 860 struct mlx5_ib_mr *mr; 861 int err; 862 863 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 864 if (!mr) 865 return ERR_PTR(-ENOMEM); 866 867 in = kzalloc(sizeof(*in), GFP_KERNEL); 868 if (!in) { 869 err = -ENOMEM; 870 goto err_free; 871 } 872 873 in->seg.status = MLX5_MKEY_STATUS_FREE; 874 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); 875 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 876 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; 877 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 878 /* 879 * TBD not needed - issue 197292 */ 880 in->seg.log2_page_size = PAGE_SHIFT; 881 882 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL, 883 NULL, NULL); 884 kfree(in); 885 if (err) { 886 mlx5_ib_warn(dev, "failed create mkey\n"); 887 goto err_free; 888 } 889 890 mr->ibmr.lkey = mr->mmr.key; 891 mr->ibmr.rkey = mr->mmr.key; 892 mr->umem = NULL; 893 894 return &mr->ibmr; 895 896 err_free: 897 kfree(mr); 898 return ERR_PTR(err); 899 } 900 901 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 902 int page_list_len) 903 { 904 struct mlx5_ib_fast_reg_page_list *mfrpl; 905 int size = page_list_len * sizeof(u64); 906 907 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); 908 if (!mfrpl) 909 return ERR_PTR(-ENOMEM); 910 911 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 912 if (!mfrpl->ibfrpl.page_list) 913 goto err_free; 914 915 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, 916 size, &mfrpl->map, 917 GFP_KERNEL); 918 if (!mfrpl->mapped_page_list) 919 goto err_free; 920 921 WARN_ON(mfrpl->map & 0x3f); 922 923 return &mfrpl->ibfrpl; 924 925 err_free: 926 kfree(mfrpl->ibfrpl.page_list); 927 kfree(mfrpl); 928 return ERR_PTR(-ENOMEM); 929 } 930 931 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 932 { 933 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 934 struct mlx5_ib_dev *dev = to_mdev(page_list->device); 935 int size = page_list->max_page_list_len * sizeof(u64); 936 937 dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list, 938 mfrpl->map); 939 kfree(mfrpl->ibfrpl.page_list); 940 kfree(mfrpl); 941 } 942 943 struct order_attribute { 944 struct attribute attr; 945 ssize_t (*show)(struct cache_order *, struct order_attribute *, char *buf); 946 ssize_t (*store)(struct cache_order *, struct order_attribute *, 947 const char *buf, size_t count); 948 }; 949 950 static ssize_t cur_show(struct cache_order *co, struct order_attribute *oa, 951 char *buf) 952 { 953 struct mlx5_ib_dev *dev = co->dev; 954 struct mlx5_mr_cache *cache = &dev->cache; 955 struct mlx5_cache_ent *ent = &cache->ent[co->index]; 956 int err; 957 958 err = snprintf(buf, 20, "%d\n", ent->cur); 959 return err; 960 } 961 962 static ssize_t limit_show(struct cache_order *co, struct order_attribute *oa, 963 char *buf) 964 { 965 struct mlx5_ib_dev *dev = co->dev; 966 struct mlx5_mr_cache *cache = &dev->cache; 967 struct mlx5_cache_ent *ent = &cache->ent[co->index]; 968 int err; 969 970 err = snprintf(buf, 20, "%d\n", ent->limit); 971 return err; 972 } 973 974 static ssize_t limit_store(struct cache_order *co, struct order_attribute *oa, 975 const char *buf, size_t count) 976 { 977 struct mlx5_ib_dev *dev = co->dev; 978 struct mlx5_mr_cache *cache = &dev->cache; 979 struct mlx5_cache_ent *ent = &cache->ent[co->index]; 980 u32 var; 981 int err; 982 983 #define kstrtouint(a,b,c) ({*(c) = strtol(a,0,b); 0;}) 984 #define kstrtoint(a,b,c) ({*(c) = strtol(a,0,b); 0;}) 985 986 if (kstrtouint(buf, 0, &var)) 987 return -EINVAL; 988 989 if (var > ent->size) 990 return -EINVAL; 991 992 ent->limit = var; 993 994 if (ent->cur < ent->limit) { 995 err = add_keys(dev, co->index, 2 * ent->limit - ent->cur); 996 if (err) 997 return err; 998 } 999 1000 return count; 1001 } 1002 1003 static ssize_t miss_show(struct cache_order *co, struct order_attribute *oa, 1004 char *buf) 1005 { 1006 struct mlx5_ib_dev *dev = co->dev; 1007 struct mlx5_mr_cache *cache = &dev->cache; 1008 struct mlx5_cache_ent *ent = &cache->ent[co->index]; 1009 int err; 1010 1011 err = snprintf(buf, 20, "%d\n", ent->miss); 1012 return err; 1013 } 1014 1015 static ssize_t miss_store(struct cache_order *co, struct order_attribute *oa, 1016 const char *buf, size_t count) 1017 { 1018 struct mlx5_ib_dev *dev = co->dev; 1019 struct mlx5_mr_cache *cache = &dev->cache; 1020 struct mlx5_cache_ent *ent = &cache->ent[co->index]; 1021 u32 var; 1022 1023 if (kstrtouint(buf, 0, &var)) 1024 return -EINVAL; 1025 1026 if (var != 0) 1027 return -EINVAL; 1028 1029 ent->miss = var; 1030 1031 return count; 1032 } 1033 1034 static ssize_t size_show(struct cache_order *co, struct order_attribute *oa, 1035 char *buf) 1036 { 1037 struct mlx5_ib_dev *dev = co->dev; 1038 struct mlx5_mr_cache *cache = &dev->cache; 1039 struct mlx5_cache_ent *ent = &cache->ent[co->index]; 1040 int err; 1041 1042 err = snprintf(buf, 20, "%d\n", ent->size); 1043 return err; 1044 } 1045 1046 static ssize_t size_store(struct cache_order *co, struct order_attribute *oa, 1047 const char *buf, size_t count) 1048 { 1049 struct mlx5_ib_dev *dev = co->dev; 1050 struct mlx5_mr_cache *cache = &dev->cache; 1051 struct mlx5_cache_ent *ent = &cache->ent[co->index]; 1052 u32 var; 1053 int err; 1054 1055 if (kstrtouint(buf, 0, &var)) 1056 return -EINVAL; 1057 1058 if (var < ent->limit) 1059 return -EINVAL; 1060 1061 if (var > ent->size) { 1062 do { 1063 err = add_keys(dev, co->index, var - ent->size); 1064 if (err && err != -EAGAIN) 1065 return err; 1066 1067 usleep_range(3000, 5000); 1068 } while (err); 1069 } else if (var < ent->size) { 1070 remove_keys(dev, co->index, ent->size - var); 1071 } 1072 1073 return count; 1074 } 1075 1076 static ssize_t order_attr_show(struct kobject *kobj, 1077 struct attribute *attr, char *buf) 1078 { 1079 struct order_attribute *oa = 1080 container_of(attr, struct order_attribute, attr); 1081 struct cache_order *co = container_of(kobj, struct cache_order, kobj); 1082 1083 if (!oa->show) 1084 return -EIO; 1085 1086 return oa->show(co, oa, buf); 1087 } 1088 1089 static ssize_t order_attr_store(struct kobject *kobj, 1090 struct attribute *attr, const char *buf, size_t size) 1091 { 1092 struct order_attribute *oa = 1093 container_of(attr, struct order_attribute, attr); 1094 struct cache_order *co = container_of(kobj, struct cache_order, kobj); 1095 1096 if (!oa->store) 1097 return -EIO; 1098 1099 return oa->store(co, oa, buf, size); 1100 } 1101 1102 static const struct sysfs_ops order_sysfs_ops = { 1103 .show = order_attr_show, 1104 .store = order_attr_store, 1105 }; 1106 1107 #define ORDER_ATTR(_name) struct order_attribute order_attr_##_name = \ 1108 __ATTR(_name, 0644, _name##_show, _name##_store) 1109 #define ORDER_ATTR_RO(_name) struct order_attribute order_attr_##_name = \ 1110 __ATTR(_name, 0444, _name##_show, NULL) 1111 1112 static ORDER_ATTR_RO(cur); 1113 static ORDER_ATTR(limit); 1114 static ORDER_ATTR(miss); 1115 static ORDER_ATTR(size); 1116 1117 static struct attribute *order_default_attrs[] = { 1118 &order_attr_cur.attr, 1119 &order_attr_limit.attr, 1120 &order_attr_miss.attr, 1121 &order_attr_size.attr, 1122 NULL 1123 }; 1124 1125 static struct kobj_type order_type = { 1126 .sysfs_ops = &order_sysfs_ops, 1127 .default_attrs = order_default_attrs 1128 }; 1129 1130 1131 1132 struct cache_attribute { 1133 struct attribute attr; 1134 ssize_t (*show)(struct mlx5_ib_dev *dev, char *buf); 1135 ssize_t (*store)(struct mlx5_ib_dev *dev, const char *buf, size_t count); 1136 }; 1137 1138 static ssize_t rel_imm_show(struct mlx5_ib_dev *dev, char *buf) 1139 { 1140 struct mlx5_mr_cache *cache = &dev->cache; 1141 int err; 1142 1143 err = snprintf(buf, 20, "%d\n", cache->rel_imm); 1144 return err; 1145 } 1146 1147 static ssize_t rel_imm_store(struct mlx5_ib_dev *dev, const char *buf, size_t count) 1148 { 1149 struct mlx5_mr_cache *cache = &dev->cache; 1150 u32 var; 1151 int i; 1152 int found = 0; 1153 1154 if (kstrtouint(buf, 0, &var)) 1155 return -EINVAL; 1156 1157 if (var > 1) 1158 return -EINVAL; 1159 1160 if (var == cache->rel_imm) 1161 return count; 1162 1163 cache->rel_imm = var; 1164 if (cache->rel_imm == 1) { 1165 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 1166 if (cache->ent[i].cur > 2 * cache->ent[i].limit) { 1167 queue_work(cache->wq, &cache->ent[i].work); 1168 found = 1; 1169 } 1170 } 1171 if (!found) 1172 cache->rel_imm = 0; 1173 } 1174 1175 return count; 1176 } 1177 static ssize_t rel_timeout_show(struct mlx5_ib_dev *dev, char *buf) 1178 { 1179 struct mlx5_mr_cache *cache = &dev->cache; 1180 int err; 1181 1182 err = snprintf(buf, 20, "%d\n", cache->rel_timeout); 1183 return err; 1184 } 1185 1186 static ssize_t rel_timeout_store(struct mlx5_ib_dev *dev, const char *buf, size_t count) 1187 { 1188 struct mlx5_mr_cache *cache = &dev->cache; 1189 int var; 1190 int i; 1191 1192 if (kstrtoint(buf, 0, &var)) 1193 return -EINVAL; 1194 1195 if (var < -1 || var > MAX_MR_RELEASE_TIMEOUT) 1196 return -EINVAL; 1197 1198 if (var == cache->rel_timeout) 1199 return count; 1200 1201 if (cache->rel_timeout == -1 || (var < cache->rel_timeout && var != -1)) { 1202 cache->rel_timeout = var; 1203 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 1204 if (cache->ent[i].cur > 2 * cache->ent[i].limit) 1205 queue_work(cache->wq, &cache->ent[i].work); 1206 } 1207 } else { 1208 cache->rel_timeout = var; 1209 } 1210 1211 return count; 1212 } 1213 1214 static ssize_t cache_attr_show(struct kobject *kobj, 1215 struct attribute *attr, char *buf) 1216 { 1217 struct cache_attribute *ca = 1218 container_of(attr, struct cache_attribute, attr); 1219 struct mlx5_ib_dev *dev = container_of(kobj, struct mlx5_ib_dev, mr_cache); 1220 1221 if (!ca->show) 1222 return -EIO; 1223 1224 return ca->show(dev, buf); 1225 } 1226 1227 static ssize_t cache_attr_store(struct kobject *kobj, 1228 struct attribute *attr, const char *buf, size_t size) 1229 { 1230 struct cache_attribute *ca = 1231 container_of(attr, struct cache_attribute, attr); 1232 struct mlx5_ib_dev *dev = container_of(kobj, struct mlx5_ib_dev, mr_cache); 1233 1234 if (!ca->store) 1235 return -EIO; 1236 1237 return ca->store(dev, buf, size); 1238 } 1239 1240 static const struct sysfs_ops cache_sysfs_ops = { 1241 .show = cache_attr_show, 1242 .store = cache_attr_store, 1243 }; 1244 1245 #define CACHE_ATTR(_name) struct cache_attribute cache_attr_##_name = \ 1246 __ATTR(_name, 0644, _name##_show, _name##_store) 1247 1248 static CACHE_ATTR(rel_imm); 1249 static CACHE_ATTR(rel_timeout); 1250 1251 static struct attribute *cache_default_attrs[] = { 1252 &cache_attr_rel_imm.attr, 1253 &cache_attr_rel_timeout.attr, 1254 NULL 1255 }; 1256 1257 static struct kobj_type cache_type = { 1258 .sysfs_ops = &cache_sysfs_ops, 1259 .default_attrs = cache_default_attrs 1260 }; 1261 1262 static int mlx5_mr_sysfs_init(struct mlx5_ib_dev *dev) 1263 { 1264 struct mlx5_mr_cache *cache = &dev->cache; 1265 struct device *device = &dev->ib_dev.dev; 1266 struct cache_order *co; 1267 int o; 1268 int i; 1269 int err; 1270 1271 err = kobject_init_and_add(&dev->mr_cache, &cache_type, 1272 &device->kobj, "mr_cache"); 1273 if (err) 1274 return -ENOMEM; 1275 1276 for (o = 2, i = 0; i < MAX_MR_CACHE_ENTRIES; o++, i++) { 1277 co = &cache->ent[i].co; 1278 co->order = o; 1279 co->index = i; 1280 co->dev = dev; 1281 err = kobject_init_and_add(&co->kobj, &order_type, 1282 &dev->mr_cache, "%d", o); 1283 if (err) 1284 goto err_put; 1285 } 1286 1287 return 0; 1288 1289 err_put: 1290 for (; i >= 0; i--) { 1291 co = &cache->ent[i].co; 1292 kobject_put(&co->kobj); 1293 } 1294 kobject_put(&dev->mr_cache); 1295 1296 return err; 1297 } 1298 1299 static void mlx5_mr_sysfs_cleanup(struct mlx5_ib_dev *dev) 1300 { 1301 struct mlx5_mr_cache *cache = &dev->cache; 1302 struct cache_order *co; 1303 int i; 1304 1305 for (i = MAX_MR_CACHE_ENTRIES - 1; i >= 0; i--) { 1306 co = &cache->ent[i].co; 1307 kobject_put(&co->kobj); 1308 } 1309 kobject_put(&dev->mr_cache); 1310 } 1311