1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * Copyright (c) 2020, Intel Corporation. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 35 #include <linux/kref.h> 36 #include <linux/random.h> 37 #include <linux/debugfs.h> 38 #include <linux/export.h> 39 #include <linux/delay.h> 40 #include <linux/dma-buf.h> 41 #include <linux/dma-resv.h> 42 #include <rdma/ib_umem_odp.h> 43 #include "dm.h" 44 #include "mlx5_ib.h" 45 #include "umr.h" 46 #include "data_direct.h" 47 #include "dmah.h" 48 49 enum { 50 MAX_PENDING_REG_MR = 8, 51 }; 52 53 #define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 54 #define MLX5_UMR_ALIGN 2048 55 56 static void 57 create_mkey_callback(int status, struct mlx5_async_work *context); 58 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 59 u64 iova, int access_flags, 60 unsigned long page_size, bool populate, 61 int access_mode, u16 st_index, u8 ph); 62 static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr); 63 64 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, 65 struct ib_pd *pd) 66 { 67 struct mlx5_ib_dev *dev = to_mdev(pd->device); 68 69 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 70 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 71 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 72 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 73 MLX5_SET(mkc, mkc, lr, 1); 74 75 if (acc & IB_ACCESS_RELAXED_ORDERING) { 76 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) 77 MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); 78 79 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 80 (MLX5_CAP_GEN(dev->mdev, 81 relaxed_ordering_read_pci_enabled) && 82 pcie_relaxed_ordering_enabled(dev->mdev->pdev))) 83 MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); 84 } 85 86 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 87 MLX5_SET(mkc, mkc, qpn, 0xffffff); 88 MLX5_SET64(mkc, mkc, start_addr, start_addr); 89 } 90 91 static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in) 92 { 93 u8 key = atomic_inc_return(&dev->mkey_var); 94 void *mkc; 95 96 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 97 MLX5_SET(mkc, mkc, mkey_7_0, key); 98 *mkey = key; 99 } 100 101 static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, 102 struct mlx5_ib_mkey *mkey, u32 *in, int inlen) 103 { 104 int ret; 105 106 assign_mkey_variant(dev, &mkey->key, in); 107 ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen); 108 if (!ret) 109 init_waitqueue_head(&mkey->wait); 110 111 return ret; 112 } 113 114 static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create) 115 { 116 struct mlx5_ib_dev *dev = async_create->ent->dev; 117 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 118 size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out); 119 120 MLX5_SET(create_mkey_in, async_create->in, opcode, 121 MLX5_CMD_OP_CREATE_MKEY); 122 assign_mkey_variant(dev, &async_create->mkey, async_create->in); 123 return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen, 124 async_create->out, outlen, create_mkey_callback, 125 &async_create->cb_work); 126 } 127 128 static int mkey_cache_max_order(struct mlx5_ib_dev *dev); 129 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); 130 131 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 132 { 133 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); 134 135 return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); 136 } 137 138 static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) 139 { 140 if (status == -ENXIO) /* core driver is not available */ 141 return; 142 143 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 144 if (status != -EREMOTEIO) /* driver specific failure */ 145 return; 146 147 /* Failed in FW, print cmd out failure details */ 148 mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); 149 } 150 151 static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey) 152 { 153 unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE; 154 struct mlx5_mkeys_page *page; 155 156 lockdep_assert_held(&ent->mkeys_queue.lock); 157 if (ent->mkeys_queue.ci >= 158 ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) { 159 page = kzalloc(sizeof(*page), GFP_ATOMIC); 160 if (!page) 161 return -ENOMEM; 162 ent->mkeys_queue.num_pages++; 163 list_add_tail(&page->list, &ent->mkeys_queue.pages_list); 164 } else { 165 page = list_last_entry(&ent->mkeys_queue.pages_list, 166 struct mlx5_mkeys_page, list); 167 } 168 169 page->mkeys[tmp] = mkey; 170 ent->mkeys_queue.ci++; 171 return 0; 172 } 173 174 static int pop_mkey_locked(struct mlx5_cache_ent *ent) 175 { 176 unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE; 177 struct mlx5_mkeys_page *last_page; 178 u32 mkey; 179 180 lockdep_assert_held(&ent->mkeys_queue.lock); 181 last_page = list_last_entry(&ent->mkeys_queue.pages_list, 182 struct mlx5_mkeys_page, list); 183 mkey = last_page->mkeys[tmp]; 184 last_page->mkeys[tmp] = 0; 185 ent->mkeys_queue.ci--; 186 if (ent->mkeys_queue.num_pages > 1 && !tmp) { 187 list_del(&last_page->list); 188 ent->mkeys_queue.num_pages--; 189 kfree(last_page); 190 } 191 return mkey; 192 } 193 194 static void create_mkey_callback(int status, struct mlx5_async_work *context) 195 { 196 struct mlx5r_async_create_mkey *mkey_out = 197 container_of(context, struct mlx5r_async_create_mkey, cb_work); 198 struct mlx5_cache_ent *ent = mkey_out->ent; 199 struct mlx5_ib_dev *dev = ent->dev; 200 unsigned long flags; 201 202 if (status) { 203 create_mkey_warn(dev, status, mkey_out->out); 204 kfree(mkey_out); 205 spin_lock_irqsave(&ent->mkeys_queue.lock, flags); 206 ent->pending--; 207 WRITE_ONCE(dev->fill_delay, 1); 208 spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); 209 mod_timer(&dev->delay_timer, jiffies + HZ); 210 return; 211 } 212 213 mkey_out->mkey |= mlx5_idx_to_mkey( 214 MLX5_GET(create_mkey_out, mkey_out->out, mkey_index)); 215 WRITE_ONCE(dev->cache.last_add, jiffies); 216 217 spin_lock_irqsave(&ent->mkeys_queue.lock, flags); 218 push_mkey_locked(ent, mkey_out->mkey); 219 ent->pending--; 220 /* If we are doing fill_to_high_water then keep going. */ 221 queue_adjust_cache_locked(ent); 222 spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); 223 kfree(mkey_out); 224 } 225 226 static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) 227 { 228 int ret = 0; 229 230 switch (access_mode) { 231 case MLX5_MKC_ACCESS_MODE_MTT: 232 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / 233 sizeof(struct mlx5_mtt)); 234 break; 235 case MLX5_MKC_ACCESS_MODE_KSM: 236 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / 237 sizeof(struct mlx5_klm)); 238 break; 239 default: 240 WARN_ON(1); 241 } 242 return ret; 243 } 244 245 static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) 246 { 247 set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, 248 ent->dev->umrc.pd); 249 MLX5_SET(mkc, mkc, free, 1); 250 MLX5_SET(mkc, mkc, umr_en, 1); 251 MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); 252 MLX5_SET(mkc, mkc, access_mode_4_2, 253 (ent->rb_key.access_mode >> 2) & 0x7); 254 MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); 255 256 MLX5_SET(mkc, mkc, translations_octword_size, 257 get_mkc_octo_size(ent->rb_key.access_mode, 258 ent->rb_key.ndescs)); 259 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 260 261 if (ent->rb_key.ph != MLX5_IB_NO_PH) { 262 MLX5_SET(mkc, mkc, pcie_tph_en, 1); 263 MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph); 264 if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) 265 MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, 266 ent->rb_key.st_index); 267 } 268 } 269 270 /* Asynchronously schedule new MRs to be populated in the cache. */ 271 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) 272 { 273 struct mlx5r_async_create_mkey *async_create; 274 void *mkc; 275 int err = 0; 276 int i; 277 278 for (i = 0; i < num; i++) { 279 async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey), 280 GFP_KERNEL); 281 if (!async_create) 282 return -ENOMEM; 283 mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in, 284 memory_key_mkey_entry); 285 set_cache_mkc(ent, mkc); 286 async_create->ent = ent; 287 288 spin_lock_irq(&ent->mkeys_queue.lock); 289 if (ent->pending >= MAX_PENDING_REG_MR) { 290 err = -EAGAIN; 291 goto free_async_create; 292 } 293 ent->pending++; 294 spin_unlock_irq(&ent->mkeys_queue.lock); 295 296 err = mlx5_ib_create_mkey_cb(async_create); 297 if (err) { 298 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); 299 goto err_create_mkey; 300 } 301 } 302 303 return 0; 304 305 err_create_mkey: 306 spin_lock_irq(&ent->mkeys_queue.lock); 307 ent->pending--; 308 free_async_create: 309 spin_unlock_irq(&ent->mkeys_queue.lock); 310 kfree(async_create); 311 return err; 312 } 313 314 /* Synchronously create a MR in the cache */ 315 static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey) 316 { 317 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 318 void *mkc; 319 u32 *in; 320 int err; 321 322 in = kzalloc(inlen, GFP_KERNEL); 323 if (!in) 324 return -ENOMEM; 325 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 326 set_cache_mkc(ent, mkc); 327 328 err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen); 329 if (err) 330 goto free_in; 331 332 WRITE_ONCE(ent->dev->cache.last_add, jiffies); 333 free_in: 334 kfree(in); 335 return err; 336 } 337 338 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) 339 { 340 u32 mkey; 341 342 lockdep_assert_held(&ent->mkeys_queue.lock); 343 if (!ent->mkeys_queue.ci) 344 return; 345 mkey = pop_mkey_locked(ent); 346 spin_unlock_irq(&ent->mkeys_queue.lock); 347 mlx5_core_destroy_mkey(ent->dev->mdev, mkey); 348 spin_lock_irq(&ent->mkeys_queue.lock); 349 } 350 351 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, 352 bool limit_fill) 353 __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock) 354 { 355 int err; 356 357 lockdep_assert_held(&ent->mkeys_queue.lock); 358 359 while (true) { 360 if (limit_fill) 361 target = ent->limit * 2; 362 if (target == ent->pending + ent->mkeys_queue.ci) 363 return 0; 364 if (target > ent->pending + ent->mkeys_queue.ci) { 365 u32 todo = target - (ent->pending + ent->mkeys_queue.ci); 366 367 spin_unlock_irq(&ent->mkeys_queue.lock); 368 err = add_keys(ent, todo); 369 if (err == -EAGAIN) 370 usleep_range(3000, 5000); 371 spin_lock_irq(&ent->mkeys_queue.lock); 372 if (err) { 373 if (err != -EAGAIN) 374 return err; 375 } else 376 return 0; 377 } else { 378 remove_cache_mr_locked(ent); 379 } 380 } 381 } 382 383 static ssize_t size_write(struct file *filp, const char __user *buf, 384 size_t count, loff_t *pos) 385 { 386 struct mlx5_cache_ent *ent = filp->private_data; 387 u32 target; 388 int err; 389 390 err = kstrtou32_from_user(buf, count, 0, &target); 391 if (err) 392 return err; 393 394 /* 395 * Target is the new value of total_mrs the user requests, however we 396 * cannot free MRs that are in use. Compute the target value for stored 397 * mkeys. 398 */ 399 spin_lock_irq(&ent->mkeys_queue.lock); 400 if (target < ent->in_use) { 401 err = -EINVAL; 402 goto err_unlock; 403 } 404 target = target - ent->in_use; 405 if (target < ent->limit || target > ent->limit*2) { 406 err = -EINVAL; 407 goto err_unlock; 408 } 409 err = resize_available_mrs(ent, target, false); 410 if (err) 411 goto err_unlock; 412 spin_unlock_irq(&ent->mkeys_queue.lock); 413 414 return count; 415 416 err_unlock: 417 spin_unlock_irq(&ent->mkeys_queue.lock); 418 return err; 419 } 420 421 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 422 loff_t *pos) 423 { 424 struct mlx5_cache_ent *ent = filp->private_data; 425 char lbuf[20]; 426 int err; 427 428 err = snprintf(lbuf, sizeof(lbuf), "%ld\n", 429 ent->mkeys_queue.ci + ent->in_use); 430 if (err < 0) 431 return err; 432 433 return simple_read_from_buffer(buf, count, pos, lbuf, err); 434 } 435 436 static const struct file_operations size_fops = { 437 .owner = THIS_MODULE, 438 .open = simple_open, 439 .write = size_write, 440 .read = size_read, 441 }; 442 443 static ssize_t limit_write(struct file *filp, const char __user *buf, 444 size_t count, loff_t *pos) 445 { 446 struct mlx5_cache_ent *ent = filp->private_data; 447 u32 var; 448 int err; 449 450 err = kstrtou32_from_user(buf, count, 0, &var); 451 if (err) 452 return err; 453 454 /* 455 * Upon set we immediately fill the cache to high water mark implied by 456 * the limit. 457 */ 458 spin_lock_irq(&ent->mkeys_queue.lock); 459 ent->limit = var; 460 err = resize_available_mrs(ent, 0, true); 461 spin_unlock_irq(&ent->mkeys_queue.lock); 462 if (err) 463 return err; 464 return count; 465 } 466 467 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 468 loff_t *pos) 469 { 470 struct mlx5_cache_ent *ent = filp->private_data; 471 char lbuf[20]; 472 int err; 473 474 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 475 if (err < 0) 476 return err; 477 478 return simple_read_from_buffer(buf, count, pos, lbuf, err); 479 } 480 481 static const struct file_operations limit_fops = { 482 .owner = THIS_MODULE, 483 .open = simple_open, 484 .write = limit_write, 485 .read = limit_read, 486 }; 487 488 static bool someone_adding(struct mlx5_mkey_cache *cache) 489 { 490 struct mlx5_cache_ent *ent; 491 struct rb_node *node; 492 bool ret; 493 494 mutex_lock(&cache->rb_lock); 495 for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { 496 ent = rb_entry(node, struct mlx5_cache_ent, node); 497 spin_lock_irq(&ent->mkeys_queue.lock); 498 ret = ent->mkeys_queue.ci < ent->limit; 499 spin_unlock_irq(&ent->mkeys_queue.lock); 500 if (ret) { 501 mutex_unlock(&cache->rb_lock); 502 return true; 503 } 504 } 505 mutex_unlock(&cache->rb_lock); 506 return false; 507 } 508 509 /* 510 * Check if the bucket is outside the high/low water mark and schedule an async 511 * update. The cache refill has hysteresis, once the low water mark is hit it is 512 * refilled up to the high mark. 513 */ 514 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) 515 { 516 lockdep_assert_held(&ent->mkeys_queue.lock); 517 518 if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) 519 return; 520 if (ent->mkeys_queue.ci < ent->limit) { 521 ent->fill_to_high_water = true; 522 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 523 } else if (ent->fill_to_high_water && 524 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) { 525 /* 526 * Once we start populating due to hitting a low water mark 527 * continue until we pass the high water mark. 528 */ 529 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 530 } else if (ent->mkeys_queue.ci == 2 * ent->limit) { 531 ent->fill_to_high_water = false; 532 } else if (ent->mkeys_queue.ci > 2 * ent->limit) { 533 /* Queue deletion of excess entries */ 534 ent->fill_to_high_water = false; 535 if (ent->pending) 536 queue_delayed_work(ent->dev->cache.wq, &ent->dwork, 537 secs_to_jiffies(1)); 538 else 539 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 540 } 541 } 542 543 static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) 544 { 545 u32 mkey; 546 547 spin_lock_irq(&ent->mkeys_queue.lock); 548 while (ent->mkeys_queue.ci) { 549 mkey = pop_mkey_locked(ent); 550 spin_unlock_irq(&ent->mkeys_queue.lock); 551 mlx5_core_destroy_mkey(dev->mdev, mkey); 552 spin_lock_irq(&ent->mkeys_queue.lock); 553 } 554 ent->tmp_cleanup_scheduled = false; 555 spin_unlock_irq(&ent->mkeys_queue.lock); 556 } 557 558 static void __cache_work_func(struct mlx5_cache_ent *ent) 559 { 560 struct mlx5_ib_dev *dev = ent->dev; 561 struct mlx5_mkey_cache *cache = &dev->cache; 562 int err; 563 564 spin_lock_irq(&ent->mkeys_queue.lock); 565 if (ent->disabled) 566 goto out; 567 568 if (ent->fill_to_high_water && 569 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit && 570 !READ_ONCE(dev->fill_delay)) { 571 spin_unlock_irq(&ent->mkeys_queue.lock); 572 err = add_keys(ent, 1); 573 spin_lock_irq(&ent->mkeys_queue.lock); 574 if (ent->disabled) 575 goto out; 576 if (err) { 577 /* 578 * EAGAIN only happens if there are pending MRs, so we 579 * will be rescheduled when storing them. The only 580 * failure path here is ENOMEM. 581 */ 582 if (err != -EAGAIN) { 583 mlx5_ib_warn( 584 dev, 585 "add keys command failed, err %d\n", 586 err); 587 queue_delayed_work(cache->wq, &ent->dwork, 588 secs_to_jiffies(1)); 589 } 590 } 591 } else if (ent->mkeys_queue.ci > 2 * ent->limit) { 592 bool need_delay; 593 594 /* 595 * The remove_cache_mr() logic is performed as garbage 596 * collection task. Such task is intended to be run when no 597 * other active processes are running. 598 * 599 * The need_resched() will return TRUE if there are user tasks 600 * to be activated in near future. 601 * 602 * In such case, we don't execute remove_cache_mr() and postpone 603 * the garbage collection work to try to run in next cycle, in 604 * order to free CPU resources to other tasks. 605 */ 606 spin_unlock_irq(&ent->mkeys_queue.lock); 607 need_delay = need_resched() || someone_adding(cache) || 608 !time_after(jiffies, 609 READ_ONCE(cache->last_add) + 300 * HZ); 610 spin_lock_irq(&ent->mkeys_queue.lock); 611 if (ent->disabled) 612 goto out; 613 if (need_delay) { 614 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 615 goto out; 616 } 617 remove_cache_mr_locked(ent); 618 queue_adjust_cache_locked(ent); 619 } 620 out: 621 spin_unlock_irq(&ent->mkeys_queue.lock); 622 } 623 624 static void delayed_cache_work_func(struct work_struct *work) 625 { 626 struct mlx5_cache_ent *ent; 627 628 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 629 /* temp entries are never filled, only cleaned */ 630 if (ent->is_tmp) 631 clean_keys(ent->dev, ent); 632 else 633 __cache_work_func(ent); 634 } 635 636 static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, 637 struct mlx5r_cache_rb_key key2) 638 { 639 int res; 640 641 res = key1.ats - key2.ats; 642 if (res) 643 return res; 644 645 res = key1.access_mode - key2.access_mode; 646 if (res) 647 return res; 648 649 res = key1.access_flags - key2.access_flags; 650 if (res) 651 return res; 652 653 res = key1.st_index - key2.st_index; 654 if (res) 655 return res; 656 657 res = key1.ph - key2.ph; 658 if (res) 659 return res; 660 661 /* 662 * keep ndescs the last in the compare table since the find function 663 * searches for an exact match on all properties and only closest 664 * match in size. 665 */ 666 return key1.ndescs - key2.ndescs; 667 } 668 669 static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, 670 struct mlx5_cache_ent *ent) 671 { 672 struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; 673 struct mlx5_cache_ent *cur; 674 int cmp; 675 676 /* Figure out where to put new node */ 677 while (*new) { 678 cur = rb_entry(*new, struct mlx5_cache_ent, node); 679 parent = *new; 680 cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key); 681 if (cmp > 0) 682 new = &((*new)->rb_left); 683 if (cmp < 0) 684 new = &((*new)->rb_right); 685 if (cmp == 0) 686 return -EEXIST; 687 } 688 689 /* Add new node and rebalance tree. */ 690 rb_link_node(&ent->node, parent, new); 691 rb_insert_color(&ent->node, &cache->rb_root); 692 693 return 0; 694 } 695 696 static struct mlx5_cache_ent * 697 mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, 698 struct mlx5r_cache_rb_key rb_key) 699 { 700 struct rb_node *node = dev->cache.rb_root.rb_node; 701 struct mlx5_cache_ent *cur, *smallest = NULL; 702 u64 ndescs_limit; 703 int cmp; 704 705 /* 706 * Find the smallest ent with order >= requested_order. 707 */ 708 while (node) { 709 cur = rb_entry(node, struct mlx5_cache_ent, node); 710 cmp = cache_ent_key_cmp(cur->rb_key, rb_key); 711 if (cmp > 0) { 712 smallest = cur; 713 node = node->rb_left; 714 } 715 if (cmp < 0) 716 node = node->rb_right; 717 if (cmp == 0) 718 return cur; 719 } 720 721 /* 722 * Limit the usage of mkeys larger than twice the required size while 723 * also allowing the usage of smallest cache entry for small MRs. 724 */ 725 ndescs_limit = max_t(u64, rb_key.ndescs * 2, 726 MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS); 727 728 return (smallest && 729 smallest->rb_key.access_mode == rb_key.access_mode && 730 smallest->rb_key.access_flags == rb_key.access_flags && 731 smallest->rb_key.ats == rb_key.ats && 732 smallest->rb_key.st_index == rb_key.st_index && 733 smallest->rb_key.ph == rb_key.ph && 734 smallest->rb_key.ndescs <= ndescs_limit) ? 735 smallest : 736 NULL; 737 } 738 739 static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, 740 struct mlx5_cache_ent *ent) 741 { 742 struct mlx5_ib_mr *mr; 743 int err; 744 745 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 746 if (!mr) 747 return ERR_PTR(-ENOMEM); 748 749 spin_lock_irq(&ent->mkeys_queue.lock); 750 ent->in_use++; 751 752 if (!ent->mkeys_queue.ci) { 753 queue_adjust_cache_locked(ent); 754 ent->miss++; 755 spin_unlock_irq(&ent->mkeys_queue.lock); 756 err = create_cache_mkey(ent, &mr->mmkey.key); 757 if (err) { 758 spin_lock_irq(&ent->mkeys_queue.lock); 759 ent->in_use--; 760 spin_unlock_irq(&ent->mkeys_queue.lock); 761 kfree(mr); 762 return ERR_PTR(err); 763 } 764 } else { 765 mr->mmkey.key = pop_mkey_locked(ent); 766 queue_adjust_cache_locked(ent); 767 spin_unlock_irq(&ent->mkeys_queue.lock); 768 } 769 mr->mmkey.cache_ent = ent; 770 mr->mmkey.type = MLX5_MKEY_MR; 771 mr->mmkey.rb_key = ent->rb_key; 772 mr->mmkey.cacheable = true; 773 init_waitqueue_head(&mr->mmkey.wait); 774 return mr; 775 } 776 777 static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, 778 int access_flags) 779 { 780 int ret = 0; 781 782 if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && 783 MLX5_CAP_GEN(dev->mdev, atomic) && 784 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 785 ret |= IB_ACCESS_REMOTE_ATOMIC; 786 787 if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && 788 MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && 789 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 790 ret |= IB_ACCESS_RELAXED_ORDERING; 791 792 if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && 793 (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 794 MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) && 795 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 796 ret |= IB_ACCESS_RELAXED_ORDERING; 797 798 return ret; 799 } 800 801 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, 802 int access_flags, int access_mode, 803 int ndescs) 804 { 805 struct mlx5r_cache_rb_key rb_key = { 806 .ndescs = ndescs, 807 .access_mode = access_mode, 808 .access_flags = get_unchangeable_access_flags(dev, access_flags), 809 .ph = MLX5_IB_NO_PH, 810 }; 811 struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key); 812 813 if (!ent) 814 return ERR_PTR(-EOPNOTSUPP); 815 816 return _mlx5_mr_cache_alloc(dev, ent); 817 } 818 819 static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 820 { 821 if (!mlx5_debugfs_root || dev->is_rep) 822 return; 823 824 debugfs_remove_recursive(dev->cache.fs_root); 825 dev->cache.fs_root = NULL; 826 } 827 828 static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, 829 struct mlx5_cache_ent *ent) 830 { 831 int order = order_base_2(ent->rb_key.ndescs); 832 struct dentry *dir; 833 834 if (!mlx5_debugfs_root || dev->is_rep) 835 return; 836 837 if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) 838 order = MLX5_IMR_KSM_CACHE_ENTRY + 2; 839 840 sprintf(ent->name, "%d", order); 841 dir = debugfs_create_dir(ent->name, dev->cache.fs_root); 842 debugfs_create_file("size", 0600, dir, ent, &size_fops); 843 debugfs_create_file("limit", 0600, dir, ent, &limit_fops); 844 debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci); 845 debugfs_create_u32("miss", 0600, dir, &ent->miss); 846 } 847 848 static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) 849 { 850 struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev); 851 struct mlx5_mkey_cache *cache = &dev->cache; 852 853 if (!mlx5_debugfs_root || dev->is_rep) 854 return; 855 856 cache->fs_root = debugfs_create_dir("mr_cache", dbg_root); 857 } 858 859 static void delay_time_func(struct timer_list *t) 860 { 861 struct mlx5_ib_dev *dev = timer_container_of(dev, t, delay_timer); 862 863 WRITE_ONCE(dev->fill_delay, 0); 864 } 865 866 static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent) 867 { 868 struct mlx5_mkeys_page *page; 869 870 page = kzalloc(sizeof(*page), GFP_KERNEL); 871 if (!page) 872 return -ENOMEM; 873 INIT_LIST_HEAD(&ent->mkeys_queue.pages_list); 874 spin_lock_init(&ent->mkeys_queue.lock); 875 list_add_tail(&page->list, &ent->mkeys_queue.pages_list); 876 ent->mkeys_queue.num_pages++; 877 return 0; 878 } 879 880 static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent) 881 { 882 struct mlx5_mkeys_page *page; 883 884 WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1); 885 page = list_last_entry(&ent->mkeys_queue.pages_list, 886 struct mlx5_mkeys_page, list); 887 list_del(&page->list); 888 kfree(page); 889 } 890 891 struct mlx5_cache_ent * 892 mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, 893 struct mlx5r_cache_rb_key rb_key, 894 bool persistent_entry) 895 { 896 struct mlx5_cache_ent *ent; 897 int order; 898 int ret; 899 900 ent = kzalloc(sizeof(*ent), GFP_KERNEL); 901 if (!ent) 902 return ERR_PTR(-ENOMEM); 903 904 ret = mlx5r_mkeys_init(ent); 905 if (ret) 906 goto mkeys_err; 907 ent->rb_key = rb_key; 908 ent->dev = dev; 909 ent->is_tmp = !persistent_entry; 910 911 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 912 913 ret = mlx5_cache_ent_insert(&dev->cache, ent); 914 if (ret) 915 goto ent_insert_err; 916 917 if (persistent_entry) { 918 if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) 919 order = MLX5_IMR_KSM_CACHE_ENTRY; 920 else 921 order = order_base_2(rb_key.ndescs) - 2; 922 923 if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && 924 !dev->is_rep && mlx5_core_is_pf(dev->mdev) && 925 mlx5r_umr_can_load_pas(dev, 0)) 926 ent->limit = dev->mdev->profile.mr_cache[order].limit; 927 else 928 ent->limit = 0; 929 930 mlx5_mkey_cache_debugfs_add_ent(dev, ent); 931 } 932 933 return ent; 934 ent_insert_err: 935 mlx5r_mkeys_uninit(ent); 936 mkeys_err: 937 kfree(ent); 938 return ERR_PTR(ret); 939 } 940 941 static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) 942 { 943 struct rb_root *root = &dev->cache.rb_root; 944 struct mlx5_cache_ent *ent; 945 struct rb_node *node; 946 947 mutex_lock(&dev->cache.rb_lock); 948 node = rb_first(root); 949 while (node) { 950 ent = rb_entry(node, struct mlx5_cache_ent, node); 951 node = rb_next(node); 952 clean_keys(dev, ent); 953 rb_erase(&ent->node, root); 954 mlx5r_mkeys_uninit(ent); 955 kfree(ent); 956 } 957 mutex_unlock(&dev->cache.rb_lock); 958 } 959 960 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) 961 { 962 struct mlx5_mkey_cache *cache = &dev->cache; 963 struct rb_root *root = &dev->cache.rb_root; 964 struct mlx5r_cache_rb_key rb_key = { 965 .access_mode = MLX5_MKC_ACCESS_MODE_MTT, 966 .ph = MLX5_IB_NO_PH, 967 }; 968 struct mlx5_cache_ent *ent; 969 struct rb_node *node; 970 int ret; 971 int i; 972 973 mutex_init(&dev->slow_path_mutex); 974 mutex_init(&dev->cache.rb_lock); 975 dev->cache.rb_root = RB_ROOT; 976 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 977 if (!cache->wq) { 978 mlx5_ib_warn(dev, "failed to create work queue\n"); 979 return -ENOMEM; 980 } 981 982 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); 983 timer_setup(&dev->delay_timer, delay_time_func, 0); 984 mlx5_mkey_cache_debugfs_init(dev); 985 mutex_lock(&cache->rb_lock); 986 for (i = 0; i <= mkey_cache_max_order(dev); i++) { 987 rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i; 988 ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); 989 if (IS_ERR(ent)) { 990 ret = PTR_ERR(ent); 991 goto err; 992 } 993 } 994 995 ret = mlx5_odp_init_mkey_cache(dev); 996 if (ret) 997 goto err; 998 999 mutex_unlock(&cache->rb_lock); 1000 for (node = rb_first(root); node; node = rb_next(node)) { 1001 ent = rb_entry(node, struct mlx5_cache_ent, node); 1002 spin_lock_irq(&ent->mkeys_queue.lock); 1003 queue_adjust_cache_locked(ent); 1004 spin_unlock_irq(&ent->mkeys_queue.lock); 1005 } 1006 1007 return 0; 1008 1009 err: 1010 mutex_unlock(&cache->rb_lock); 1011 mlx5_mkey_cache_debugfs_cleanup(dev); 1012 mlx5r_destroy_cache_entries(dev); 1013 destroy_workqueue(cache->wq); 1014 mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); 1015 return ret; 1016 } 1017 1018 void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) 1019 { 1020 struct rb_root *root = &dev->cache.rb_root; 1021 struct mlx5_cache_ent *ent; 1022 struct rb_node *node; 1023 1024 if (!dev->cache.wq) 1025 return; 1026 1027 mutex_lock(&dev->cache.rb_lock); 1028 for (node = rb_first(root); node; node = rb_next(node)) { 1029 ent = rb_entry(node, struct mlx5_cache_ent, node); 1030 spin_lock_irq(&ent->mkeys_queue.lock); 1031 ent->disabled = true; 1032 spin_unlock_irq(&ent->mkeys_queue.lock); 1033 cancel_delayed_work(&ent->dwork); 1034 } 1035 mutex_unlock(&dev->cache.rb_lock); 1036 1037 /* 1038 * After all entries are disabled and will not reschedule on WQ, 1039 * flush it and all async commands. 1040 */ 1041 flush_workqueue(dev->cache.wq); 1042 1043 mlx5_mkey_cache_debugfs_cleanup(dev); 1044 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); 1045 1046 /* At this point all entries are disabled and have no concurrent work. */ 1047 mlx5r_destroy_cache_entries(dev); 1048 1049 destroy_workqueue(dev->cache.wq); 1050 timer_delete_sync(&dev->delay_timer); 1051 } 1052 1053 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 1054 { 1055 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1056 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1057 struct mlx5_ib_mr *mr; 1058 void *mkc; 1059 u32 *in; 1060 int err; 1061 1062 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1063 if (!mr) 1064 return ERR_PTR(-ENOMEM); 1065 1066 in = kzalloc(inlen, GFP_KERNEL); 1067 if (!in) { 1068 err = -ENOMEM; 1069 goto err_free; 1070 } 1071 1072 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1073 1074 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 1075 MLX5_SET(mkc, mkc, length64, 1); 1076 set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0, 1077 pd); 1078 MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats)); 1079 1080 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1081 if (err) 1082 goto err_in; 1083 1084 kfree(in); 1085 mr->mmkey.type = MLX5_MKEY_MR; 1086 mr->ibmr.lkey = mr->mmkey.key; 1087 mr->ibmr.rkey = mr->mmkey.key; 1088 mr->umem = NULL; 1089 1090 return &mr->ibmr; 1091 1092 err_in: 1093 kfree(in); 1094 1095 err_free: 1096 kfree(mr); 1097 1098 return ERR_PTR(err); 1099 } 1100 1101 static int get_octo_len(u64 addr, u64 len, int page_shift) 1102 { 1103 u64 page_size = 1ULL << page_shift; 1104 u64 offset; 1105 int npages; 1106 1107 offset = addr & (page_size - 1); 1108 npages = ALIGN(len + offset, page_size) >> page_shift; 1109 return (npages + 1) / 2; 1110 } 1111 1112 static int mkey_cache_max_order(struct mlx5_ib_dev *dev) 1113 { 1114 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 1115 return MKEY_CACHE_LAST_STD_ENTRY; 1116 return MLX5_MAX_UMR_SHIFT; 1117 } 1118 1119 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1120 u64 length, int access_flags, u64 iova) 1121 { 1122 mr->ibmr.lkey = mr->mmkey.key; 1123 mr->ibmr.rkey = mr->mmkey.key; 1124 mr->ibmr.length = length; 1125 mr->ibmr.device = &dev->ib_dev; 1126 mr->ibmr.iova = iova; 1127 mr->access_flags = access_flags; 1128 } 1129 1130 static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, 1131 u64 iova) 1132 { 1133 /* 1134 * The alignment of iova has already been checked upon entering 1135 * UVERBS_METHOD_REG_DMABUF_MR 1136 */ 1137 umem->iova = iova; 1138 return PAGE_SIZE; 1139 } 1140 1141 static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 1142 struct ib_umem *umem, u64 iova, 1143 int access_flags, int access_mode, 1144 u16 st_index, u8 ph) 1145 { 1146 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1147 struct mlx5r_cache_rb_key rb_key = {}; 1148 struct mlx5_cache_ent *ent; 1149 struct mlx5_ib_mr *mr; 1150 unsigned long page_size; 1151 1152 if (umem->is_dmabuf) 1153 page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); 1154 else 1155 page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova, 1156 access_mode); 1157 if (WARN_ON(!page_size)) 1158 return ERR_PTR(-EINVAL); 1159 1160 rb_key.access_mode = access_mode; 1161 rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size); 1162 rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); 1163 rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); 1164 rb_key.st_index = st_index; 1165 rb_key.ph = ph; 1166 ent = mkey_cache_ent_from_rb_key(dev, rb_key); 1167 /* 1168 * If the MR can't come from the cache then synchronously create an uncached 1169 * one. 1170 */ 1171 if (!ent) { 1172 mutex_lock(&dev->slow_path_mutex); 1173 mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode, 1174 st_index, ph); 1175 mutex_unlock(&dev->slow_path_mutex); 1176 if (IS_ERR(mr)) 1177 return mr; 1178 mr->mmkey.rb_key = rb_key; 1179 mr->mmkey.cacheable = true; 1180 return mr; 1181 } 1182 1183 mr = _mlx5_mr_cache_alloc(dev, ent); 1184 if (IS_ERR(mr)) 1185 return mr; 1186 1187 mr->ibmr.pd = pd; 1188 mr->umem = umem; 1189 mr->page_shift = order_base_2(page_size); 1190 set_mr_fields(dev, mr, umem->length, access_flags, iova); 1191 1192 return mr; 1193 } 1194 1195 static struct ib_mr * 1196 reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_flags, 1197 u32 crossed_lkey) 1198 { 1199 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1200 int access_mode = MLX5_MKC_ACCESS_MODE_CROSSING; 1201 struct mlx5_ib_mr *mr; 1202 void *mkc; 1203 int inlen; 1204 u32 *in; 1205 int err; 1206 1207 if (!MLX5_CAP_GEN(dev->mdev, crossing_vhca_mkey)) 1208 return ERR_PTR(-EOPNOTSUPP); 1209 1210 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1211 if (!mr) 1212 return ERR_PTR(-ENOMEM); 1213 1214 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1215 in = kvzalloc(inlen, GFP_KERNEL); 1216 if (!in) { 1217 err = -ENOMEM; 1218 goto err_1; 1219 } 1220 1221 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1222 MLX5_SET(mkc, mkc, crossing_target_vhca_id, 1223 MLX5_CAP_GEN(dev->mdev, vhca_id)); 1224 MLX5_SET(mkc, mkc, translations_octword_size, crossed_lkey); 1225 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 1226 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 1227 1228 /* for this crossing mkey IOVA should be 0 and len should be IOVA + len */ 1229 set_mkc_access_pd_addr_fields(mkc, access_flags, 0, pd); 1230 MLX5_SET64(mkc, mkc, len, iova + length); 1231 1232 MLX5_SET(mkc, mkc, free, 0); 1233 MLX5_SET(mkc, mkc, umr_en, 0); 1234 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1235 if (err) 1236 goto err_2; 1237 1238 mr->mmkey.type = MLX5_MKEY_MR; 1239 set_mr_fields(dev, mr, length, access_flags, iova); 1240 mr->ibmr.pd = pd; 1241 kvfree(in); 1242 mlx5_ib_dbg(dev, "crossing mkey = 0x%x\n", mr->mmkey.key); 1243 1244 return &mr->ibmr; 1245 err_2: 1246 kvfree(in); 1247 err_1: 1248 kfree(mr); 1249 return ERR_PTR(err); 1250 } 1251 1252 /* 1253 * If ibmr is NULL it will be allocated by reg_create. 1254 * Else, the given ibmr will be used. 1255 */ 1256 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 1257 u64 iova, int access_flags, 1258 unsigned long page_size, bool populate, 1259 int access_mode, u16 st_index, u8 ph) 1260 { 1261 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1262 struct mlx5_ib_mr *mr; 1263 __be64 *pas; 1264 void *mkc; 1265 int inlen; 1266 u32 *in; 1267 int err; 1268 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) && 1269 (access_mode == MLX5_MKC_ACCESS_MODE_MTT) && 1270 (ph == MLX5_IB_NO_PH); 1271 bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM); 1272 1273 if (!page_size) 1274 return ERR_PTR(-EINVAL); 1275 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1276 if (!mr) 1277 return ERR_PTR(-ENOMEM); 1278 1279 mr->ibmr.pd = pd; 1280 mr->access_flags = access_flags; 1281 mr->page_shift = order_base_2(page_size); 1282 1283 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1284 if (populate) 1285 inlen += sizeof(*pas) * 1286 roundup(ib_umem_num_dma_blocks(umem, page_size), 2); 1287 in = kvzalloc(inlen, GFP_KERNEL); 1288 if (!in) { 1289 err = -ENOMEM; 1290 goto err_1; 1291 } 1292 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1293 if (populate) { 1294 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND || ksm_mode)) { 1295 err = -EINVAL; 1296 goto err_2; 1297 } 1298 mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, 1299 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1300 } 1301 1302 /* The pg_access bit allows setting the access flags 1303 * in the page list submitted with the command. 1304 */ 1305 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1306 1307 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1308 set_mkc_access_pd_addr_fields(mkc, access_flags, iova, 1309 populate ? pd : dev->umrc.pd); 1310 /* In case a data direct flow, overwrite the pdn field by its internal kernel PD */ 1311 if (umem->is_dmabuf && ksm_mode) 1312 MLX5_SET(mkc, mkc, pd, dev->ddr.pdn); 1313 1314 MLX5_SET(mkc, mkc, free, !populate); 1315 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode); 1316 MLX5_SET(mkc, mkc, umr_en, 1); 1317 1318 MLX5_SET64(mkc, mkc, len, umem->length); 1319 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1320 if (ksm_mode) 1321 MLX5_SET(mkc, mkc, translations_octword_size, 1322 get_octo_len(iova, umem->length, mr->page_shift) * 2); 1323 else 1324 MLX5_SET(mkc, mkc, translations_octword_size, 1325 get_octo_len(iova, umem->length, mr->page_shift)); 1326 MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); 1327 if (mlx5_umem_needs_ats(dev, umem, access_flags)) 1328 MLX5_SET(mkc, mkc, ma_translation_mode, 1); 1329 if (populate) { 1330 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1331 get_octo_len(iova, umem->length, mr->page_shift)); 1332 } 1333 1334 if (ph != MLX5_IB_NO_PH) { 1335 MLX5_SET(mkc, mkc, pcie_tph_en, 1); 1336 MLX5_SET(mkc, mkc, pcie_tph_ph, ph); 1337 if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) 1338 MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index); 1339 } 1340 1341 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1342 if (err) { 1343 mlx5_ib_warn(dev, "create mkey failed\n"); 1344 goto err_2; 1345 } 1346 mr->mmkey.type = MLX5_MKEY_MR; 1347 mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift); 1348 mr->umem = umem; 1349 set_mr_fields(dev, mr, umem->length, access_flags, iova); 1350 kvfree(in); 1351 1352 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1353 1354 return mr; 1355 1356 err_2: 1357 kvfree(in); 1358 err_1: 1359 kfree(mr); 1360 return ERR_PTR(err); 1361 } 1362 1363 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, 1364 u64 length, int acc, int mode) 1365 { 1366 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1367 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1368 struct mlx5_ib_mr *mr; 1369 void *mkc; 1370 u32 *in; 1371 int err; 1372 1373 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1374 if (!mr) 1375 return ERR_PTR(-ENOMEM); 1376 1377 in = kzalloc(inlen, GFP_KERNEL); 1378 if (!in) { 1379 err = -ENOMEM; 1380 goto err_free; 1381 } 1382 1383 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1384 1385 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 1386 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); 1387 MLX5_SET64(mkc, mkc, len, length); 1388 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); 1389 1390 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1391 if (err) 1392 goto err_in; 1393 1394 kfree(in); 1395 1396 set_mr_fields(dev, mr, length, acc, start_addr); 1397 1398 return &mr->ibmr; 1399 1400 err_in: 1401 kfree(in); 1402 1403 err_free: 1404 kfree(mr); 1405 1406 return ERR_PTR(err); 1407 } 1408 1409 int mlx5_ib_advise_mr(struct ib_pd *pd, 1410 enum ib_uverbs_advise_mr_advice advice, 1411 u32 flags, 1412 struct ib_sge *sg_list, 1413 u32 num_sge, 1414 struct uverbs_attr_bundle *attrs) 1415 { 1416 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && 1417 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && 1418 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) 1419 return -EOPNOTSUPP; 1420 1421 return mlx5_ib_advise_mr_prefetch(pd, advice, flags, 1422 sg_list, num_sge); 1423 } 1424 1425 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, 1426 struct ib_dm_mr_attr *attr, 1427 struct uverbs_attr_bundle *attrs) 1428 { 1429 struct mlx5_ib_dm *mdm = to_mdm(dm); 1430 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; 1431 u64 start_addr = mdm->dev_addr + attr->offset; 1432 int mode; 1433 1434 switch (mdm->type) { 1435 case MLX5_IB_UAPI_DM_TYPE_MEMIC: 1436 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) 1437 return ERR_PTR(-EINVAL); 1438 1439 mode = MLX5_MKC_ACCESS_MODE_MEMIC; 1440 start_addr -= pci_resource_start(dev->pdev, 0); 1441 break; 1442 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: 1443 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: 1444 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: 1445 case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: 1446 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) 1447 return ERR_PTR(-EINVAL); 1448 1449 mode = MLX5_MKC_ACCESS_MODE_SW_ICM; 1450 break; 1451 default: 1452 return ERR_PTR(-EINVAL); 1453 } 1454 1455 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, 1456 attr->access_flags, mode); 1457 } 1458 1459 static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, 1460 u64 iova, int access_flags, 1461 struct ib_dmah *dmah) 1462 { 1463 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1464 struct mlx5_ib_mr *mr = NULL; 1465 bool xlt_with_umr; 1466 u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX; 1467 u8 ph = MLX5_IB_NO_PH; 1468 int err; 1469 1470 if (dmah) { 1471 struct mlx5_ib_dmah *mdmah = to_mdmah(dmah); 1472 1473 ph = dmah->ph; 1474 if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) 1475 st_index = mdmah->st_index; 1476 } 1477 1478 xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); 1479 if (xlt_with_umr) { 1480 mr = alloc_cacheable_mr(pd, umem, iova, access_flags, 1481 MLX5_MKC_ACCESS_MODE_MTT, 1482 st_index, ph); 1483 } else { 1484 unsigned long page_size = mlx5_umem_mkc_find_best_pgsz( 1485 dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT); 1486 1487 mutex_lock(&dev->slow_path_mutex); 1488 mr = reg_create(pd, umem, iova, access_flags, page_size, 1489 true, MLX5_MKC_ACCESS_MODE_MTT, 1490 st_index, ph); 1491 mutex_unlock(&dev->slow_path_mutex); 1492 } 1493 if (IS_ERR(mr)) { 1494 ib_umem_release(umem); 1495 return ERR_CAST(mr); 1496 } 1497 1498 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1499 1500 atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1501 1502 if (xlt_with_umr) { 1503 /* 1504 * If the MR was created with reg_create then it will be 1505 * configured properly but left disabled. It is safe to go ahead 1506 * and configure it again via UMR while enabling it. 1507 */ 1508 err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); 1509 if (err) { 1510 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1511 return ERR_PTR(err); 1512 } 1513 } 1514 return &mr->ibmr; 1515 } 1516 1517 static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, 1518 u64 iova, int access_flags, 1519 struct ib_udata *udata) 1520 { 1521 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1522 struct ib_umem_odp *odp; 1523 struct mlx5_ib_mr *mr; 1524 int err; 1525 1526 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1527 return ERR_PTR(-EOPNOTSUPP); 1528 1529 err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq); 1530 if (err) 1531 return ERR_PTR(err); 1532 if (!start && length == U64_MAX) { 1533 if (iova != 0) 1534 return ERR_PTR(-EINVAL); 1535 if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1536 return ERR_PTR(-EINVAL); 1537 1538 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); 1539 if (IS_ERR(mr)) 1540 return ERR_CAST(mr); 1541 return &mr->ibmr; 1542 } 1543 1544 /* ODP requires xlt update via umr to work. */ 1545 if (!mlx5r_umr_can_load_pas(dev, length)) 1546 return ERR_PTR(-EINVAL); 1547 1548 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, 1549 &mlx5_mn_ops); 1550 if (IS_ERR(odp)) 1551 return ERR_CAST(odp); 1552 1553 mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags, 1554 MLX5_MKC_ACCESS_MODE_MTT, 1555 MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX, 1556 MLX5_IB_NO_PH); 1557 if (IS_ERR(mr)) { 1558 ib_umem_release(&odp->umem); 1559 return ERR_CAST(mr); 1560 } 1561 xa_init(&mr->implicit_children); 1562 1563 odp->private = mr; 1564 err = mlx5r_store_odp_mkey(dev, &mr->mmkey); 1565 if (err) 1566 goto err_dereg_mr; 1567 1568 err = mlx5_ib_init_odp_mr(mr); 1569 if (err) 1570 goto err_dereg_mr; 1571 return &mr->ibmr; 1572 1573 err_dereg_mr: 1574 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1575 return ERR_PTR(err); 1576 } 1577 1578 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1579 u64 iova, int access_flags, 1580 struct ib_dmah *dmah, 1581 struct ib_udata *udata) 1582 { 1583 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1584 struct ib_umem *umem; 1585 int err; 1586 1587 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || 1588 ((access_flags & IB_ACCESS_ON_DEMAND) && dmah)) 1589 return ERR_PTR(-EOPNOTSUPP); 1590 1591 mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1592 start, iova, length, access_flags); 1593 1594 err = mlx5r_umr_resource_init(dev); 1595 if (err) 1596 return ERR_PTR(err); 1597 1598 if (access_flags & IB_ACCESS_ON_DEMAND) 1599 return create_user_odp_mr(pd, start, length, iova, access_flags, 1600 udata); 1601 umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); 1602 if (IS_ERR(umem)) 1603 return ERR_CAST(umem); 1604 return create_real_mr(pd, umem, iova, access_flags, dmah); 1605 } 1606 1607 static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) 1608 { 1609 struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; 1610 struct mlx5_ib_mr *mr = umem_dmabuf->private; 1611 1612 dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); 1613 1614 if (!umem_dmabuf->sgt || !mr) 1615 return; 1616 1617 mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); 1618 ib_umem_dmabuf_unmap_pages(umem_dmabuf); 1619 } 1620 1621 static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { 1622 .allow_peer2peer = 1, 1623 .move_notify = mlx5_ib_dmabuf_invalidate_cb, 1624 }; 1625 1626 static struct ib_mr * 1627 reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, 1628 u64 offset, u64 length, u64 virt_addr, 1629 int fd, int access_flags, int access_mode, 1630 struct ib_dmah *dmah) 1631 { 1632 bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM); 1633 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1634 struct mlx5_ib_mr *mr = NULL; 1635 struct ib_umem_dmabuf *umem_dmabuf; 1636 u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX; 1637 u8 ph = MLX5_IB_NO_PH; 1638 int err; 1639 1640 err = mlx5r_umr_resource_init(dev); 1641 if (err) 1642 return ERR_PTR(err); 1643 1644 if (!pinned_mode) 1645 umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, 1646 offset, length, fd, 1647 access_flags, 1648 &mlx5_ib_dmabuf_attach_ops); 1649 else 1650 umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev, 1651 dma_device, offset, length, 1652 fd, access_flags); 1653 1654 if (IS_ERR(umem_dmabuf)) { 1655 mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf); 1656 return ERR_CAST(umem_dmabuf); 1657 } 1658 1659 if (dmah) { 1660 struct mlx5_ib_dmah *mdmah = to_mdmah(dmah); 1661 1662 ph = dmah->ph; 1663 if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) 1664 st_index = mdmah->st_index; 1665 } 1666 1667 mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, 1668 access_flags, access_mode, 1669 st_index, ph); 1670 if (IS_ERR(mr)) { 1671 ib_umem_release(&umem_dmabuf->umem); 1672 return ERR_CAST(mr); 1673 } 1674 1675 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1676 1677 atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); 1678 umem_dmabuf->private = mr; 1679 if (!pinned_mode) { 1680 err = mlx5r_store_odp_mkey(dev, &mr->mmkey); 1681 if (err) 1682 goto err_dereg_mr; 1683 } else { 1684 mr->data_direct = true; 1685 } 1686 1687 err = mlx5_ib_init_dmabuf_mr(mr); 1688 if (err) 1689 goto err_dereg_mr; 1690 return &mr->ibmr; 1691 1692 err_dereg_mr: 1693 __mlx5_ib_dereg_mr(&mr->ibmr); 1694 return ERR_PTR(err); 1695 } 1696 1697 static struct ib_mr * 1698 reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset, 1699 u64 length, u64 virt_addr, 1700 int fd, int access_flags) 1701 { 1702 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1703 struct mlx5_data_direct_dev *data_direct_dev; 1704 struct ib_mr *crossing_mr; 1705 struct ib_mr *crossed_mr; 1706 int ret = 0; 1707 1708 /* As of HW behaviour the IOVA must be page aligned in KSM mode */ 1709 if (!PAGE_ALIGNED(virt_addr) || (access_flags & IB_ACCESS_ON_DEMAND)) 1710 return ERR_PTR(-EOPNOTSUPP); 1711 1712 mutex_lock(&dev->data_direct_lock); 1713 data_direct_dev = dev->data_direct_dev; 1714 if (!data_direct_dev) { 1715 ret = -EINVAL; 1716 goto end; 1717 } 1718 1719 /* If no device's 'data direct mkey' with RO flags exists 1720 * mask it out accordingly. 1721 */ 1722 if (!dev->ddr.mkey_ro_valid) 1723 access_flags &= ~IB_ACCESS_RELAXED_ORDERING; 1724 crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev, 1725 offset, length, virt_addr, fd, 1726 access_flags, MLX5_MKC_ACCESS_MODE_KSM, 1727 NULL); 1728 if (IS_ERR(crossed_mr)) { 1729 ret = PTR_ERR(crossed_mr); 1730 goto end; 1731 } 1732 1733 mutex_lock(&dev->slow_path_mutex); 1734 crossing_mr = reg_create_crossing_vhca_mr(pd, virt_addr, length, access_flags, 1735 crossed_mr->lkey); 1736 mutex_unlock(&dev->slow_path_mutex); 1737 if (IS_ERR(crossing_mr)) { 1738 __mlx5_ib_dereg_mr(crossed_mr); 1739 ret = PTR_ERR(crossing_mr); 1740 goto end; 1741 } 1742 1743 list_add_tail(&to_mmr(crossed_mr)->dd_node, &dev->data_direct_mr_list); 1744 to_mmr(crossing_mr)->dd_crossed_mr = to_mmr(crossed_mr); 1745 to_mmr(crossing_mr)->data_direct = true; 1746 end: 1747 mutex_unlock(&dev->data_direct_lock); 1748 return ret ? ERR_PTR(ret) : crossing_mr; 1749 } 1750 1751 struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, 1752 u64 length, u64 virt_addr, 1753 int fd, int access_flags, 1754 struct ib_dmah *dmah, 1755 struct uverbs_attr_bundle *attrs) 1756 { 1757 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1758 int mlx5_access_flags = 0; 1759 int err; 1760 1761 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || 1762 !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1763 return ERR_PTR(-EOPNOTSUPP); 1764 1765 if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) { 1766 err = uverbs_get_flags32(&mlx5_access_flags, attrs, 1767 MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, 1768 MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT); 1769 if (err) 1770 return ERR_PTR(err); 1771 } 1772 1773 mlx5_ib_dbg(dev, 1774 "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x, mlx5_access_flags 0x%x\n", 1775 offset, virt_addr, length, fd, access_flags, mlx5_access_flags); 1776 1777 /* dmabuf requires xlt update via umr to work. */ 1778 if (!mlx5r_umr_can_load_pas(dev, length)) 1779 return ERR_PTR(-EINVAL); 1780 1781 if (mlx5_access_flags & MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT) 1782 return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr, 1783 fd, access_flags); 1784 1785 return reg_user_mr_dmabuf(pd, pd->device->dma_device, 1786 offset, length, virt_addr, 1787 fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT, 1788 dmah); 1789 } 1790 1791 /* 1792 * True if the change in access flags can be done via UMR, only some access 1793 * flags can be updated. 1794 */ 1795 static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, 1796 unsigned int current_access_flags, 1797 unsigned int target_access_flags) 1798 { 1799 unsigned int diffs = current_access_flags ^ target_access_flags; 1800 1801 if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | 1802 IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING | 1803 IB_ACCESS_REMOTE_ATOMIC)) 1804 return false; 1805 return mlx5r_umr_can_reconfig(dev, current_access_flags, 1806 target_access_flags); 1807 } 1808 1809 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, 1810 struct ib_umem *new_umem, 1811 int new_access_flags, u64 iova, 1812 unsigned long *page_size) 1813 { 1814 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1815 1816 /* We only track the allocated sizes of MRs from the cache */ 1817 if (!mr->mmkey.cache_ent) 1818 return false; 1819 if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) 1820 return false; 1821 1822 *page_size = mlx5_umem_mkc_find_best_pgsz( 1823 dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode); 1824 if (WARN_ON(!*page_size)) 1825 return false; 1826 return (mr->mmkey.cache_ent->rb_key.ndescs) >= 1827 ib_umem_num_dma_blocks(new_umem, *page_size); 1828 } 1829 1830 static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, 1831 int access_flags, int flags, struct ib_umem *new_umem, 1832 u64 iova, unsigned long page_size) 1833 { 1834 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1835 int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; 1836 struct ib_umem *old_umem = mr->umem; 1837 int err; 1838 1839 /* 1840 * To keep everything simple the MR is revoked before we start to mess 1841 * with it. This ensure the change is atomic relative to any use of the 1842 * MR. 1843 */ 1844 err = mlx5r_umr_revoke_mr(mr); 1845 if (err) 1846 return err; 1847 1848 if (flags & IB_MR_REREG_PD) { 1849 mr->ibmr.pd = pd; 1850 upd_flags |= MLX5_IB_UPD_XLT_PD; 1851 } 1852 if (flags & IB_MR_REREG_ACCESS) { 1853 mr->access_flags = access_flags; 1854 upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1855 } 1856 1857 mr->ibmr.iova = iova; 1858 mr->ibmr.length = new_umem->length; 1859 mr->page_shift = order_base_2(page_size); 1860 mr->umem = new_umem; 1861 err = mlx5r_umr_update_mr_pas(mr, upd_flags); 1862 if (err) { 1863 /* 1864 * The MR is revoked at this point so there is no issue to free 1865 * new_umem. 1866 */ 1867 mr->umem = old_umem; 1868 return err; 1869 } 1870 1871 atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); 1872 ib_umem_release(old_umem); 1873 atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); 1874 return 0; 1875 } 1876 1877 struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1878 u64 length, u64 iova, int new_access_flags, 1879 struct ib_pd *new_pd, 1880 struct ib_udata *udata) 1881 { 1882 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1883 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1884 int err; 1885 1886 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct || 1887 mr->mmkey.rb_key.ph != MLX5_IB_NO_PH) 1888 return ERR_PTR(-EOPNOTSUPP); 1889 1890 mlx5_ib_dbg( 1891 dev, 1892 "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1893 start, iova, length, new_access_flags); 1894 1895 if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) 1896 return ERR_PTR(-EOPNOTSUPP); 1897 1898 if (!(flags & IB_MR_REREG_ACCESS)) 1899 new_access_flags = mr->access_flags; 1900 if (!(flags & IB_MR_REREG_PD)) 1901 new_pd = ib_mr->pd; 1902 1903 if (!(flags & IB_MR_REREG_TRANS)) { 1904 struct ib_umem *umem; 1905 1906 /* Fast path for PD/access change */ 1907 if (can_use_umr_rereg_access(dev, mr->access_flags, 1908 new_access_flags)) { 1909 err = mlx5r_umr_rereg_pd_access(mr, new_pd, 1910 new_access_flags); 1911 if (err) 1912 return ERR_PTR(err); 1913 return NULL; 1914 } 1915 /* DM or ODP MR's don't have a normal umem so we can't re-use it */ 1916 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) 1917 goto recreate; 1918 1919 /* 1920 * Only one active MR can refer to a umem at one time, revoke 1921 * the old MR before assigning the umem to the new one. 1922 */ 1923 err = mlx5r_umr_revoke_mr(mr); 1924 if (err) 1925 return ERR_PTR(err); 1926 umem = mr->umem; 1927 mr->umem = NULL; 1928 atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1929 1930 return create_real_mr(new_pd, umem, mr->ibmr.iova, 1931 new_access_flags, NULL); 1932 } 1933 1934 /* 1935 * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does 1936 * but the logic around releasing the umem is different 1937 */ 1938 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) 1939 goto recreate; 1940 1941 if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && 1942 can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { 1943 struct ib_umem *new_umem; 1944 unsigned long page_size; 1945 1946 new_umem = ib_umem_get(&dev->ib_dev, start, length, 1947 new_access_flags); 1948 if (IS_ERR(new_umem)) 1949 return ERR_CAST(new_umem); 1950 1951 /* Fast path for PAS change */ 1952 if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, 1953 &page_size)) { 1954 err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, 1955 new_umem, iova, page_size); 1956 if (err) { 1957 ib_umem_release(new_umem); 1958 return ERR_PTR(err); 1959 } 1960 return NULL; 1961 } 1962 return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL); 1963 } 1964 1965 /* 1966 * Everything else has no state we can preserve, just create a new MR 1967 * from scratch 1968 */ 1969 recreate: 1970 return mlx5_ib_reg_user_mr(new_pd, start, length, iova, 1971 new_access_flags, NULL, udata); 1972 } 1973 1974 static int 1975 mlx5_alloc_priv_descs(struct ib_device *device, 1976 struct mlx5_ib_mr *mr, 1977 int ndescs, 1978 int desc_size) 1979 { 1980 struct mlx5_ib_dev *dev = to_mdev(device); 1981 struct device *ddev = &dev->mdev->pdev->dev; 1982 int size = ndescs * desc_size; 1983 int add_size; 1984 int ret; 1985 1986 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1987 if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) { 1988 int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size)); 1989 1990 add_size = min_t(int, end - size, add_size); 1991 } 1992 1993 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1994 if (!mr->descs_alloc) 1995 return -ENOMEM; 1996 1997 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1998 1999 mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); 2000 if (dma_mapping_error(ddev, mr->desc_map)) { 2001 ret = -ENOMEM; 2002 goto err; 2003 } 2004 2005 return 0; 2006 err: 2007 kfree(mr->descs_alloc); 2008 2009 return ret; 2010 } 2011 2012 static void 2013 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 2014 { 2015 if (!mr->umem && !mr->data_direct && 2016 mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { 2017 struct ib_device *device = mr->ibmr.device; 2018 int size = mr->max_descs * mr->desc_size; 2019 struct mlx5_ib_dev *dev = to_mdev(device); 2020 2021 dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, 2022 DMA_TO_DEVICE); 2023 kfree(mr->descs_alloc); 2024 mr->descs = NULL; 2025 } 2026 } 2027 2028 static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, 2029 struct mlx5_ib_mr *mr) 2030 { 2031 struct mlx5_mkey_cache *cache = &dev->cache; 2032 struct mlx5_cache_ent *ent; 2033 int ret; 2034 2035 if (mr->mmkey.cache_ent) { 2036 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2037 goto end; 2038 } 2039 2040 mutex_lock(&cache->rb_lock); 2041 ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); 2042 if (ent) { 2043 if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { 2044 if (ent->disabled) { 2045 mutex_unlock(&cache->rb_lock); 2046 return -EOPNOTSUPP; 2047 } 2048 mr->mmkey.cache_ent = ent; 2049 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2050 mutex_unlock(&cache->rb_lock); 2051 goto end; 2052 } 2053 } 2054 2055 ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false); 2056 mutex_unlock(&cache->rb_lock); 2057 if (IS_ERR(ent)) 2058 return PTR_ERR(ent); 2059 2060 mr->mmkey.cache_ent = ent; 2061 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2062 2063 end: 2064 ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key); 2065 spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2066 return ret; 2067 } 2068 2069 static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr) 2070 { 2071 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 2072 struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); 2073 int err; 2074 2075 lockdep_assert_held(&dev->data_direct_lock); 2076 mr->revoked = true; 2077 err = mlx5r_umr_revoke_mr(mr); 2078 if (WARN_ON(err)) 2079 return err; 2080 2081 ib_umem_dmabuf_revoke(umem_dmabuf); 2082 return 0; 2083 } 2084 2085 void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) 2086 { 2087 struct mlx5_ib_mr *mr, *next; 2088 2089 lockdep_assert_held(&dev->data_direct_lock); 2090 2091 list_for_each_entry_safe(mr, next, &dev->data_direct_mr_list, dd_node) { 2092 list_del(&mr->dd_node); 2093 mlx5_ib_revoke_data_direct_mr(mr); 2094 } 2095 } 2096 2097 static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) 2098 { 2099 bool is_odp_dma_buf = is_dmabuf_mr(mr) && 2100 !to_ib_umem_dmabuf(mr->umem)->pinned; 2101 bool is_odp = is_odp_mr(mr); 2102 int ret; 2103 2104 if (is_odp) 2105 mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2106 2107 if (is_odp_dma_buf) 2108 dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, 2109 NULL); 2110 2111 ret = mlx5r_umr_revoke_mr(mr); 2112 2113 if (is_odp) { 2114 if (!ret) 2115 to_ib_umem_odp(mr->umem)->private = NULL; 2116 mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2117 } 2118 2119 if (is_odp_dma_buf) { 2120 if (!ret) 2121 to_ib_umem_dmabuf(mr->umem)->private = NULL; 2122 dma_resv_unlock( 2123 to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); 2124 } 2125 2126 return ret; 2127 } 2128 2129 static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) 2130 { 2131 bool is_odp_dma_buf = is_dmabuf_mr(mr) && 2132 !to_ib_umem_dmabuf(mr->umem)->pinned; 2133 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 2134 struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; 2135 bool is_odp = is_odp_mr(mr); 2136 bool from_cache = !!ent; 2137 int ret; 2138 2139 if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && 2140 !cache_ent_find_and_store(dev, mr)) { 2141 ent = mr->mmkey.cache_ent; 2142 /* upon storing to a clean temp entry - schedule its cleanup */ 2143 spin_lock_irq(&ent->mkeys_queue.lock); 2144 if (from_cache) 2145 ent->in_use--; 2146 if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { 2147 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 2148 secs_to_jiffies(30)); 2149 ent->tmp_cleanup_scheduled = true; 2150 } 2151 spin_unlock_irq(&ent->mkeys_queue.lock); 2152 return 0; 2153 } 2154 2155 if (ent) { 2156 spin_lock_irq(&ent->mkeys_queue.lock); 2157 ent->in_use--; 2158 mr->mmkey.cache_ent = NULL; 2159 spin_unlock_irq(&ent->mkeys_queue.lock); 2160 } 2161 2162 if (is_odp) 2163 mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2164 2165 if (is_odp_dma_buf) 2166 dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, 2167 NULL); 2168 ret = destroy_mkey(dev, mr); 2169 if (is_odp) { 2170 if (!ret) 2171 to_ib_umem_odp(mr->umem)->private = NULL; 2172 mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2173 } 2174 2175 if (is_odp_dma_buf) { 2176 if (!ret) 2177 to_ib_umem_dmabuf(mr->umem)->private = NULL; 2178 dma_resv_unlock( 2179 to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); 2180 } 2181 return ret; 2182 } 2183 2184 static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) 2185 { 2186 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2187 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 2188 int rc; 2189 2190 /* 2191 * Any async use of the mr must hold the refcount, once the refcount 2192 * goes to zero no other thread, such as ODP page faults, prefetch, any 2193 * UMR activity, etc can touch the mkey. Thus it is safe to destroy it. 2194 */ 2195 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && 2196 refcount_read(&mr->mmkey.usecount) != 0 && 2197 xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))) 2198 mlx5r_deref_wait_odp_mkey(&mr->mmkey); 2199 2200 if (ibmr->type == IB_MR_TYPE_INTEGRITY) { 2201 xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 2202 mr->sig, NULL, GFP_KERNEL); 2203 2204 if (mr->mtt_mr) { 2205 rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); 2206 if (rc) 2207 return rc; 2208 mr->mtt_mr = NULL; 2209 } 2210 if (mr->klm_mr) { 2211 rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL); 2212 if (rc) 2213 return rc; 2214 mr->klm_mr = NULL; 2215 } 2216 2217 if (mlx5_core_destroy_psv(dev->mdev, 2218 mr->sig->psv_memory.psv_idx)) 2219 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 2220 mr->sig->psv_memory.psv_idx); 2221 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 2222 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 2223 mr->sig->psv_wire.psv_idx); 2224 kfree(mr->sig); 2225 mr->sig = NULL; 2226 } 2227 2228 /* Stop DMA */ 2229 rc = mlx5r_handle_mkey_cleanup(mr); 2230 if (rc) 2231 return rc; 2232 2233 if (mr->umem) { 2234 bool is_odp = is_odp_mr(mr); 2235 2236 if (!is_odp) 2237 atomic_sub(ib_umem_num_pages(mr->umem), 2238 &dev->mdev->priv.reg_pages); 2239 ib_umem_release(mr->umem); 2240 if (is_odp) 2241 mlx5_ib_free_odp_mr(mr); 2242 } 2243 2244 if (!mr->mmkey.cache_ent) 2245 mlx5_free_priv_descs(mr); 2246 2247 kfree(mr); 2248 return 0; 2249 } 2250 2251 static int dereg_crossing_data_direct_mr(struct mlx5_ib_dev *dev, 2252 struct mlx5_ib_mr *mr) 2253 { 2254 struct mlx5_ib_mr *dd_crossed_mr = mr->dd_crossed_mr; 2255 int ret; 2256 2257 ret = __mlx5_ib_dereg_mr(&mr->ibmr); 2258 if (ret) 2259 return ret; 2260 2261 mutex_lock(&dev->data_direct_lock); 2262 if (!dd_crossed_mr->revoked) 2263 list_del(&dd_crossed_mr->dd_node); 2264 2265 ret = __mlx5_ib_dereg_mr(&dd_crossed_mr->ibmr); 2266 mutex_unlock(&dev->data_direct_lock); 2267 return ret; 2268 } 2269 2270 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 2271 { 2272 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2273 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 2274 2275 if (mr->data_direct) 2276 return dereg_crossing_data_direct_mr(dev, mr); 2277 2278 return __mlx5_ib_dereg_mr(ibmr); 2279 } 2280 2281 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, 2282 int access_mode, int page_shift) 2283 { 2284 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2285 void *mkc; 2286 2287 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2288 2289 /* This is only used from the kernel, so setting the PD is OK. */ 2290 set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd); 2291 MLX5_SET(mkc, mkc, free, 1); 2292 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 2293 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 2294 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 2295 MLX5_SET(mkc, mkc, umr_en, 1); 2296 MLX5_SET(mkc, mkc, log_page_size, page_shift); 2297 if (access_mode == MLX5_MKC_ACCESS_MODE_PA || 2298 access_mode == MLX5_MKC_ACCESS_MODE_MTT) 2299 MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats)); 2300 } 2301 2302 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2303 int ndescs, int desc_size, int page_shift, 2304 int access_mode, u32 *in, int inlen) 2305 { 2306 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2307 int err; 2308 2309 mr->access_mode = access_mode; 2310 mr->desc_size = desc_size; 2311 mr->max_descs = ndescs; 2312 2313 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); 2314 if (err) 2315 return err; 2316 2317 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); 2318 2319 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 2320 if (err) 2321 goto err_free_descs; 2322 2323 mr->mmkey.type = MLX5_MKEY_MR; 2324 mr->ibmr.lkey = mr->mmkey.key; 2325 mr->ibmr.rkey = mr->mmkey.key; 2326 2327 return 0; 2328 2329 err_free_descs: 2330 mlx5_free_priv_descs(mr); 2331 return err; 2332 } 2333 2334 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, 2335 u32 max_num_sg, u32 max_num_meta_sg, 2336 int desc_size, int access_mode) 2337 { 2338 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2339 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); 2340 int page_shift = 0; 2341 struct mlx5_ib_mr *mr; 2342 u32 *in; 2343 int err; 2344 2345 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2346 if (!mr) 2347 return ERR_PTR(-ENOMEM); 2348 2349 mr->ibmr.pd = pd; 2350 mr->ibmr.device = pd->device; 2351 2352 in = kzalloc(inlen, GFP_KERNEL); 2353 if (!in) { 2354 err = -ENOMEM; 2355 goto err_free; 2356 } 2357 2358 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) 2359 page_shift = PAGE_SHIFT; 2360 2361 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, 2362 access_mode, in, inlen); 2363 if (err) 2364 goto err_free_in; 2365 2366 mr->umem = NULL; 2367 kfree(in); 2368 2369 return mr; 2370 2371 err_free_in: 2372 kfree(in); 2373 err_free: 2374 kfree(mr); 2375 return ERR_PTR(err); 2376 } 2377 2378 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2379 int ndescs, u32 *in, int inlen) 2380 { 2381 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), 2382 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, 2383 inlen); 2384 } 2385 2386 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2387 int ndescs, u32 *in, int inlen) 2388 { 2389 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), 2390 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 2391 } 2392 2393 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2394 int max_num_sg, int max_num_meta_sg, 2395 u32 *in, int inlen) 2396 { 2397 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2398 u32 psv_index[2]; 2399 void *mkc; 2400 int err; 2401 2402 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 2403 if (!mr->sig) 2404 return -ENOMEM; 2405 2406 /* create mem & wire PSVs */ 2407 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); 2408 if (err) 2409 goto err_free_sig; 2410 2411 mr->sig->psv_memory.psv_idx = psv_index[0]; 2412 mr->sig->psv_wire.psv_idx = psv_index[1]; 2413 2414 mr->sig->sig_status_checked = true; 2415 mr->sig->sig_err_exists = false; 2416 /* Next UMR, Arm SIGERR */ 2417 ++mr->sig->sigerr_count; 2418 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 2419 sizeof(struct mlx5_klm), 2420 MLX5_MKC_ACCESS_MODE_KLMS); 2421 if (IS_ERR(mr->klm_mr)) { 2422 err = PTR_ERR(mr->klm_mr); 2423 goto err_destroy_psv; 2424 } 2425 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 2426 sizeof(struct mlx5_mtt), 2427 MLX5_MKC_ACCESS_MODE_MTT); 2428 if (IS_ERR(mr->mtt_mr)) { 2429 err = PTR_ERR(mr->mtt_mr); 2430 goto err_free_klm_mr; 2431 } 2432 2433 /* Set bsf descriptors for mkey */ 2434 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2435 MLX5_SET(mkc, mkc, bsf_en, 1); 2436 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 2437 2438 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, 2439 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 2440 if (err) 2441 goto err_free_mtt_mr; 2442 2443 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 2444 mr->sig, GFP_KERNEL)); 2445 if (err) 2446 goto err_free_descs; 2447 return 0; 2448 2449 err_free_descs: 2450 destroy_mkey(dev, mr); 2451 mlx5_free_priv_descs(mr); 2452 err_free_mtt_mr: 2453 mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); 2454 mr->mtt_mr = NULL; 2455 err_free_klm_mr: 2456 mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL); 2457 mr->klm_mr = NULL; 2458 err_destroy_psv: 2459 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) 2460 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 2461 mr->sig->psv_memory.psv_idx); 2462 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 2463 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 2464 mr->sig->psv_wire.psv_idx); 2465 err_free_sig: 2466 kfree(mr->sig); 2467 2468 return err; 2469 } 2470 2471 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, 2472 enum ib_mr_type mr_type, u32 max_num_sg, 2473 u32 max_num_meta_sg) 2474 { 2475 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2476 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2477 int ndescs = ALIGN(max_num_sg, 4); 2478 struct mlx5_ib_mr *mr; 2479 u32 *in; 2480 int err; 2481 2482 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2483 if (!mr) 2484 return ERR_PTR(-ENOMEM); 2485 2486 in = kzalloc(inlen, GFP_KERNEL); 2487 if (!in) { 2488 err = -ENOMEM; 2489 goto err_free; 2490 } 2491 2492 mr->ibmr.device = pd->device; 2493 mr->umem = NULL; 2494 2495 switch (mr_type) { 2496 case IB_MR_TYPE_MEM_REG: 2497 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); 2498 break; 2499 case IB_MR_TYPE_SG_GAPS: 2500 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); 2501 break; 2502 case IB_MR_TYPE_INTEGRITY: 2503 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, 2504 max_num_meta_sg, in, inlen); 2505 break; 2506 default: 2507 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 2508 err = -EINVAL; 2509 } 2510 2511 if (err) 2512 goto err_free_in; 2513 2514 kfree(in); 2515 2516 return &mr->ibmr; 2517 2518 err_free_in: 2519 kfree(in); 2520 err_free: 2521 kfree(mr); 2522 return ERR_PTR(err); 2523 } 2524 2525 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 2526 u32 max_num_sg) 2527 { 2528 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); 2529 } 2530 2531 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, 2532 u32 max_num_sg, u32 max_num_meta_sg) 2533 { 2534 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, 2535 max_num_meta_sg); 2536 } 2537 2538 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 2539 { 2540 struct mlx5_ib_dev *dev = to_mdev(ibmw->device); 2541 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2542 struct mlx5_ib_mw *mw = to_mmw(ibmw); 2543 unsigned int ndescs; 2544 u32 *in = NULL; 2545 void *mkc; 2546 int err; 2547 struct mlx5_ib_alloc_mw req = {}; 2548 struct { 2549 __u32 comp_mask; 2550 __u32 response_length; 2551 } resp = {}; 2552 2553 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 2554 if (err) 2555 return err; 2556 2557 if (req.comp_mask || req.reserved1 || req.reserved2) 2558 return -EOPNOTSUPP; 2559 2560 if (udata->inlen > sizeof(req) && 2561 !ib_is_udata_cleared(udata, sizeof(req), 2562 udata->inlen - sizeof(req))) 2563 return -EOPNOTSUPP; 2564 2565 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 2566 2567 in = kzalloc(inlen, GFP_KERNEL); 2568 if (!in) 2569 return -ENOMEM; 2570 2571 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2572 2573 MLX5_SET(mkc, mkc, free, 1); 2574 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 2575 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn); 2576 MLX5_SET(mkc, mkc, umr_en, 1); 2577 MLX5_SET(mkc, mkc, lr, 1); 2578 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); 2579 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2))); 2580 MLX5_SET(mkc, mkc, qpn, 0xffffff); 2581 2582 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); 2583 if (err) 2584 goto free; 2585 2586 mw->mmkey.type = MLX5_MKEY_MW; 2587 ibmw->rkey = mw->mmkey.key; 2588 mw->mmkey.ndescs = ndescs; 2589 2590 resp.response_length = 2591 min(offsetofend(typeof(resp), response_length), udata->outlen); 2592 if (resp.response_length) { 2593 err = ib_copy_to_udata(udata, &resp, resp.response_length); 2594 if (err) 2595 goto free_mkey; 2596 } 2597 2598 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 2599 err = mlx5r_store_odp_mkey(dev, &mw->mmkey); 2600 if (err) 2601 goto free_mkey; 2602 } 2603 2604 kfree(in); 2605 return 0; 2606 2607 free_mkey: 2608 mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key); 2609 free: 2610 kfree(in); 2611 return err; 2612 } 2613 2614 int mlx5_ib_dealloc_mw(struct ib_mw *mw) 2615 { 2616 struct mlx5_ib_dev *dev = to_mdev(mw->device); 2617 struct mlx5_ib_mw *mmw = to_mmw(mw); 2618 2619 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && 2620 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) 2621 /* 2622 * pagefault_single_data_segment() may be accessing mmw 2623 * if the user bound an ODP MR to this MW. 2624 */ 2625 mlx5r_deref_wait_odp_mkey(&mmw->mmkey); 2626 2627 return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key); 2628 } 2629 2630 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 2631 struct ib_mr_status *mr_status) 2632 { 2633 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 2634 int ret = 0; 2635 2636 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 2637 pr_err("Invalid status check mask\n"); 2638 ret = -EINVAL; 2639 goto done; 2640 } 2641 2642 mr_status->fail_status = 0; 2643 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 2644 if (!mmr->sig) { 2645 ret = -EINVAL; 2646 pr_err("signature status check requested on a non-signature enabled MR\n"); 2647 goto done; 2648 } 2649 2650 mmr->sig->sig_status_checked = true; 2651 if (!mmr->sig->sig_err_exists) 2652 goto done; 2653 2654 if (ibmr->lkey == mmr->sig->err_item.key) 2655 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 2656 sizeof(mr_status->sig_err)); 2657 else { 2658 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 2659 mr_status->sig_err.sig_err_offset = 0; 2660 mr_status->sig_err.key = mmr->sig->err_item.key; 2661 } 2662 2663 mmr->sig->sig_err_exists = false; 2664 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 2665 } 2666 2667 done: 2668 return ret; 2669 } 2670 2671 static int 2672 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2673 int data_sg_nents, unsigned int *data_sg_offset, 2674 struct scatterlist *meta_sg, int meta_sg_nents, 2675 unsigned int *meta_sg_offset) 2676 { 2677 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2678 unsigned int sg_offset = 0; 2679 int n = 0; 2680 2681 mr->meta_length = 0; 2682 if (data_sg_nents == 1) { 2683 n++; 2684 mr->mmkey.ndescs = 1; 2685 if (data_sg_offset) 2686 sg_offset = *data_sg_offset; 2687 mr->data_length = sg_dma_len(data_sg) - sg_offset; 2688 mr->data_iova = sg_dma_address(data_sg) + sg_offset; 2689 if (meta_sg_nents == 1) { 2690 n++; 2691 mr->meta_ndescs = 1; 2692 if (meta_sg_offset) 2693 sg_offset = *meta_sg_offset; 2694 else 2695 sg_offset = 0; 2696 mr->meta_length = sg_dma_len(meta_sg) - sg_offset; 2697 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; 2698 } 2699 ibmr->length = mr->data_length + mr->meta_length; 2700 } 2701 2702 return n; 2703 } 2704 2705 static int 2706 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 2707 struct scatterlist *sgl, 2708 unsigned short sg_nents, 2709 unsigned int *sg_offset_p, 2710 struct scatterlist *meta_sgl, 2711 unsigned short meta_sg_nents, 2712 unsigned int *meta_sg_offset_p) 2713 { 2714 struct scatterlist *sg = sgl; 2715 struct mlx5_klm *klms = mr->descs; 2716 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2717 u32 lkey = mr->ibmr.pd->local_dma_lkey; 2718 int i, j = 0; 2719 2720 mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 2721 mr->ibmr.length = 0; 2722 2723 for_each_sg(sgl, sg, sg_nents, i) { 2724 if (unlikely(i >= mr->max_descs)) 2725 break; 2726 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 2727 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 2728 klms[i].key = cpu_to_be32(lkey); 2729 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2730 2731 sg_offset = 0; 2732 } 2733 2734 if (sg_offset_p) 2735 *sg_offset_p = sg_offset; 2736 2737 mr->mmkey.ndescs = i; 2738 mr->data_length = mr->ibmr.length; 2739 2740 if (meta_sg_nents) { 2741 sg = meta_sgl; 2742 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; 2743 for_each_sg(meta_sgl, sg, meta_sg_nents, j) { 2744 if (unlikely(i + j >= mr->max_descs)) 2745 break; 2746 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + 2747 sg_offset); 2748 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - 2749 sg_offset); 2750 klms[i + j].key = cpu_to_be32(lkey); 2751 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2752 2753 sg_offset = 0; 2754 } 2755 if (meta_sg_offset_p) 2756 *meta_sg_offset_p = sg_offset; 2757 2758 mr->meta_ndescs = j; 2759 mr->meta_length = mr->ibmr.length - mr->data_length; 2760 } 2761 2762 return i + j; 2763 } 2764 2765 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 2766 { 2767 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2768 __be64 *descs; 2769 2770 if (unlikely(mr->mmkey.ndescs == mr->max_descs)) 2771 return -ENOMEM; 2772 2773 descs = mr->descs; 2774 descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2775 2776 return 0; 2777 } 2778 2779 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) 2780 { 2781 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2782 __be64 *descs; 2783 2784 if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs)) 2785 return -ENOMEM; 2786 2787 descs = mr->descs; 2788 descs[mr->mmkey.ndescs + mr->meta_ndescs++] = 2789 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2790 2791 return 0; 2792 } 2793 2794 static int 2795 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2796 int data_sg_nents, unsigned int *data_sg_offset, 2797 struct scatterlist *meta_sg, int meta_sg_nents, 2798 unsigned int *meta_sg_offset) 2799 { 2800 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2801 struct mlx5_ib_mr *pi_mr = mr->mtt_mr; 2802 int n; 2803 2804 pi_mr->mmkey.ndescs = 0; 2805 pi_mr->meta_ndescs = 0; 2806 pi_mr->meta_length = 0; 2807 2808 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2809 pi_mr->desc_size * pi_mr->max_descs, 2810 DMA_TO_DEVICE); 2811 2812 pi_mr->ibmr.page_size = ibmr->page_size; 2813 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, 2814 mlx5_set_page); 2815 if (n != data_sg_nents) 2816 return n; 2817 2818 pi_mr->data_iova = pi_mr->ibmr.iova; 2819 pi_mr->data_length = pi_mr->ibmr.length; 2820 pi_mr->ibmr.length = pi_mr->data_length; 2821 ibmr->length = pi_mr->data_length; 2822 2823 if (meta_sg_nents) { 2824 u64 page_mask = ~((u64)ibmr->page_size - 1); 2825 u64 iova = pi_mr->data_iova; 2826 2827 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, 2828 meta_sg_offset, mlx5_set_page_pi); 2829 2830 pi_mr->meta_length = pi_mr->ibmr.length; 2831 /* 2832 * PI address for the HW is the offset of the metadata address 2833 * relative to the first data page address. 2834 * It equals to first data page address + size of data pages + 2835 * metadata offset at the first metadata page 2836 */ 2837 pi_mr->pi_iova = (iova & page_mask) + 2838 pi_mr->mmkey.ndescs * ibmr->page_size + 2839 (pi_mr->ibmr.iova & ~page_mask); 2840 /* 2841 * In order to use one MTT MR for data and metadata, we register 2842 * also the gaps between the end of the data and the start of 2843 * the metadata (the sig MR will verify that the HW will access 2844 * to right addresses). This mapping is safe because we use 2845 * internal mkey for the registration. 2846 */ 2847 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; 2848 pi_mr->ibmr.iova = iova; 2849 ibmr->length += pi_mr->meta_length; 2850 } 2851 2852 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2853 pi_mr->desc_size * pi_mr->max_descs, 2854 DMA_TO_DEVICE); 2855 2856 return n; 2857 } 2858 2859 static int 2860 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2861 int data_sg_nents, unsigned int *data_sg_offset, 2862 struct scatterlist *meta_sg, int meta_sg_nents, 2863 unsigned int *meta_sg_offset) 2864 { 2865 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2866 struct mlx5_ib_mr *pi_mr = mr->klm_mr; 2867 int n; 2868 2869 pi_mr->mmkey.ndescs = 0; 2870 pi_mr->meta_ndescs = 0; 2871 pi_mr->meta_length = 0; 2872 2873 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2874 pi_mr->desc_size * pi_mr->max_descs, 2875 DMA_TO_DEVICE); 2876 2877 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, 2878 meta_sg, meta_sg_nents, meta_sg_offset); 2879 2880 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2881 pi_mr->desc_size * pi_mr->max_descs, 2882 DMA_TO_DEVICE); 2883 2884 /* This is zero-based memory region */ 2885 pi_mr->data_iova = 0; 2886 pi_mr->ibmr.iova = 0; 2887 pi_mr->pi_iova = pi_mr->data_length; 2888 ibmr->length = pi_mr->ibmr.length; 2889 2890 return n; 2891 } 2892 2893 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2894 int data_sg_nents, unsigned int *data_sg_offset, 2895 struct scatterlist *meta_sg, int meta_sg_nents, 2896 unsigned int *meta_sg_offset) 2897 { 2898 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2899 struct mlx5_ib_mr *pi_mr = NULL; 2900 int n; 2901 2902 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); 2903 2904 mr->mmkey.ndescs = 0; 2905 mr->data_length = 0; 2906 mr->data_iova = 0; 2907 mr->meta_ndescs = 0; 2908 mr->pi_iova = 0; 2909 /* 2910 * As a performance optimization, if possible, there is no need to 2911 * perform UMR operation to register the data/metadata buffers. 2912 * First try to map the sg lists to PA descriptors with local_dma_lkey. 2913 * Fallback to UMR only in case of a failure. 2914 */ 2915 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2916 data_sg_offset, meta_sg, meta_sg_nents, 2917 meta_sg_offset); 2918 if (n == data_sg_nents + meta_sg_nents) 2919 goto out; 2920 /* 2921 * As a performance optimization, if possible, there is no need to map 2922 * the sg lists to KLM descriptors. First try to map the sg lists to MTT 2923 * descriptors and fallback to KLM only in case of a failure. 2924 * It's more efficient for the HW to work with MTT descriptors 2925 * (especially in high load). 2926 * Use KLM (indirect access) only if it's mandatory. 2927 */ 2928 pi_mr = mr->mtt_mr; 2929 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2930 data_sg_offset, meta_sg, meta_sg_nents, 2931 meta_sg_offset); 2932 if (n == data_sg_nents + meta_sg_nents) 2933 goto out; 2934 2935 pi_mr = mr->klm_mr; 2936 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2937 data_sg_offset, meta_sg, meta_sg_nents, 2938 meta_sg_offset); 2939 if (unlikely(n != data_sg_nents + meta_sg_nents)) 2940 return -ENOMEM; 2941 2942 out: 2943 /* This is zero-based memory region */ 2944 ibmr->iova = 0; 2945 mr->pi_mr = pi_mr; 2946 if (pi_mr) 2947 ibmr->sig_attrs->meta_length = pi_mr->meta_length; 2948 else 2949 ibmr->sig_attrs->meta_length = mr->meta_length; 2950 2951 return 0; 2952 } 2953 2954 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 2955 unsigned int *sg_offset) 2956 { 2957 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2958 int n; 2959 2960 mr->mmkey.ndescs = 0; 2961 2962 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 2963 mr->desc_size * mr->max_descs, 2964 DMA_TO_DEVICE); 2965 2966 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 2967 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, 2968 NULL); 2969 else 2970 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 2971 mlx5_set_page); 2972 2973 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 2974 mr->desc_size * mr->max_descs, 2975 DMA_TO_DEVICE); 2976 2977 return n; 2978 } 2979