1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * Copyright (c) 2020, Intel Corporation. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 35 #include <linux/kref.h> 36 #include <linux/random.h> 37 #include <linux/debugfs.h> 38 #include <linux/export.h> 39 #include <linux/delay.h> 40 #include <linux/dma-buf.h> 41 #include <linux/dma-resv.h> 42 #include <rdma/ib_umem_odp.h> 43 #include "dm.h" 44 #include "mlx5_ib.h" 45 #include "umr.h" 46 #include "data_direct.h" 47 #include "dmah.h" 48 49 enum { 50 MAX_PENDING_REG_MR = 8, 51 }; 52 53 #define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 54 #define MLX5_UMR_ALIGN 2048 55 56 static void 57 create_mkey_callback(int status, struct mlx5_async_work *context); 58 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 59 u64 iova, int access_flags, 60 unsigned long page_size, bool populate, 61 int access_mode, u16 st_index, u8 ph); 62 static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr); 63 64 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, 65 struct ib_pd *pd) 66 { 67 struct mlx5_ib_dev *dev = to_mdev(pd->device); 68 69 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 70 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 71 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 72 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 73 MLX5_SET(mkc, mkc, lr, 1); 74 75 if (acc & IB_ACCESS_RELAXED_ORDERING) { 76 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) 77 MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); 78 79 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 80 (MLX5_CAP_GEN(dev->mdev, 81 relaxed_ordering_read_pci_enabled) && 82 pcie_relaxed_ordering_enabled(dev->mdev->pdev))) 83 MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); 84 } 85 86 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 87 MLX5_SET(mkc, mkc, qpn, 0xffffff); 88 MLX5_SET64(mkc, mkc, start_addr, start_addr); 89 } 90 91 static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in) 92 { 93 u8 key = atomic_inc_return(&dev->mkey_var); 94 void *mkc; 95 96 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 97 MLX5_SET(mkc, mkc, mkey_7_0, key); 98 *mkey = key; 99 } 100 101 static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, 102 struct mlx5_ib_mkey *mkey, u32 *in, int inlen) 103 { 104 int ret; 105 106 assign_mkey_variant(dev, &mkey->key, in); 107 ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen); 108 if (!ret) 109 init_waitqueue_head(&mkey->wait); 110 111 return ret; 112 } 113 114 static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create) 115 { 116 struct mlx5_ib_dev *dev = async_create->ent->dev; 117 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 118 size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out); 119 120 MLX5_SET(create_mkey_in, async_create->in, opcode, 121 MLX5_CMD_OP_CREATE_MKEY); 122 assign_mkey_variant(dev, &async_create->mkey, async_create->in); 123 return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen, 124 async_create->out, outlen, create_mkey_callback, 125 &async_create->cb_work); 126 } 127 128 static int mkey_cache_max_order(struct mlx5_ib_dev *dev); 129 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); 130 131 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 132 { 133 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); 134 135 return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); 136 } 137 138 static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) 139 { 140 if (status == -ENXIO) /* core driver is not available */ 141 return; 142 143 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 144 if (status != -EREMOTEIO) /* driver specific failure */ 145 return; 146 147 /* Failed in FW, print cmd out failure details */ 148 mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); 149 } 150 151 static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey) 152 { 153 unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE; 154 struct mlx5_mkeys_page *page; 155 156 lockdep_assert_held(&ent->mkeys_queue.lock); 157 if (ent->mkeys_queue.ci >= 158 ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) { 159 page = kzalloc(sizeof(*page), GFP_ATOMIC); 160 if (!page) 161 return -ENOMEM; 162 ent->mkeys_queue.num_pages++; 163 list_add_tail(&page->list, &ent->mkeys_queue.pages_list); 164 } else { 165 page = list_last_entry(&ent->mkeys_queue.pages_list, 166 struct mlx5_mkeys_page, list); 167 } 168 169 page->mkeys[tmp] = mkey; 170 ent->mkeys_queue.ci++; 171 return 0; 172 } 173 174 static int pop_mkey_locked(struct mlx5_cache_ent *ent) 175 { 176 unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE; 177 struct mlx5_mkeys_page *last_page; 178 u32 mkey; 179 180 lockdep_assert_held(&ent->mkeys_queue.lock); 181 last_page = list_last_entry(&ent->mkeys_queue.pages_list, 182 struct mlx5_mkeys_page, list); 183 mkey = last_page->mkeys[tmp]; 184 last_page->mkeys[tmp] = 0; 185 ent->mkeys_queue.ci--; 186 if (ent->mkeys_queue.num_pages > 1 && !tmp) { 187 list_del(&last_page->list); 188 ent->mkeys_queue.num_pages--; 189 kfree(last_page); 190 } 191 return mkey; 192 } 193 194 static void create_mkey_callback(int status, struct mlx5_async_work *context) 195 { 196 struct mlx5r_async_create_mkey *mkey_out = 197 container_of(context, struct mlx5r_async_create_mkey, cb_work); 198 struct mlx5_cache_ent *ent = mkey_out->ent; 199 struct mlx5_ib_dev *dev = ent->dev; 200 unsigned long flags; 201 202 if (status) { 203 create_mkey_warn(dev, status, mkey_out->out); 204 kfree(mkey_out); 205 spin_lock_irqsave(&ent->mkeys_queue.lock, flags); 206 ent->pending--; 207 WRITE_ONCE(dev->fill_delay, 1); 208 spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); 209 mod_timer(&dev->delay_timer, jiffies + HZ); 210 return; 211 } 212 213 mkey_out->mkey |= mlx5_idx_to_mkey( 214 MLX5_GET(create_mkey_out, mkey_out->out, mkey_index)); 215 WRITE_ONCE(dev->cache.last_add, jiffies); 216 217 spin_lock_irqsave(&ent->mkeys_queue.lock, flags); 218 push_mkey_locked(ent, mkey_out->mkey); 219 ent->pending--; 220 /* If we are doing fill_to_high_water then keep going. */ 221 queue_adjust_cache_locked(ent); 222 spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); 223 kfree(mkey_out); 224 } 225 226 static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) 227 { 228 int ret = 0; 229 230 switch (access_mode) { 231 case MLX5_MKC_ACCESS_MODE_MTT: 232 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / 233 sizeof(struct mlx5_mtt)); 234 break; 235 case MLX5_MKC_ACCESS_MODE_KSM: 236 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / 237 sizeof(struct mlx5_klm)); 238 break; 239 default: 240 WARN_ON(1); 241 } 242 return ret; 243 } 244 245 static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) 246 { 247 set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, 248 ent->dev->umrc.pd); 249 MLX5_SET(mkc, mkc, free, 1); 250 MLX5_SET(mkc, mkc, umr_en, 1); 251 MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); 252 MLX5_SET(mkc, mkc, access_mode_4_2, 253 (ent->rb_key.access_mode >> 2) & 0x7); 254 MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); 255 256 MLX5_SET(mkc, mkc, translations_octword_size, 257 get_mkc_octo_size(ent->rb_key.access_mode, 258 ent->rb_key.ndescs)); 259 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 260 261 if (ent->rb_key.ph != MLX5_IB_NO_PH) { 262 MLX5_SET(mkc, mkc, pcie_tph_en, 1); 263 MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph); 264 if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) 265 MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, 266 ent->rb_key.st_index); 267 } 268 } 269 270 /* Asynchronously schedule new MRs to be populated in the cache. */ 271 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) 272 { 273 struct mlx5r_async_create_mkey *async_create; 274 void *mkc; 275 int err = 0; 276 int i; 277 278 for (i = 0; i < num; i++) { 279 async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey), 280 GFP_KERNEL); 281 if (!async_create) 282 return -ENOMEM; 283 mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in, 284 memory_key_mkey_entry); 285 set_cache_mkc(ent, mkc); 286 async_create->ent = ent; 287 288 spin_lock_irq(&ent->mkeys_queue.lock); 289 if (ent->pending >= MAX_PENDING_REG_MR) { 290 err = -EAGAIN; 291 goto free_async_create; 292 } 293 ent->pending++; 294 spin_unlock_irq(&ent->mkeys_queue.lock); 295 296 err = mlx5_ib_create_mkey_cb(async_create); 297 if (err) { 298 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); 299 goto err_create_mkey; 300 } 301 } 302 303 return 0; 304 305 err_create_mkey: 306 spin_lock_irq(&ent->mkeys_queue.lock); 307 ent->pending--; 308 free_async_create: 309 spin_unlock_irq(&ent->mkeys_queue.lock); 310 kfree(async_create); 311 return err; 312 } 313 314 /* Synchronously create a MR in the cache */ 315 static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey) 316 { 317 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 318 void *mkc; 319 u32 *in; 320 int err; 321 322 in = kzalloc(inlen, GFP_KERNEL); 323 if (!in) 324 return -ENOMEM; 325 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 326 set_cache_mkc(ent, mkc); 327 328 err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen); 329 if (err) 330 goto free_in; 331 332 WRITE_ONCE(ent->dev->cache.last_add, jiffies); 333 free_in: 334 kfree(in); 335 return err; 336 } 337 338 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) 339 { 340 u32 mkey; 341 342 lockdep_assert_held(&ent->mkeys_queue.lock); 343 if (!ent->mkeys_queue.ci) 344 return; 345 mkey = pop_mkey_locked(ent); 346 spin_unlock_irq(&ent->mkeys_queue.lock); 347 mlx5_core_destroy_mkey(ent->dev->mdev, mkey); 348 spin_lock_irq(&ent->mkeys_queue.lock); 349 } 350 351 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, 352 bool limit_fill) 353 __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock) 354 { 355 int err; 356 357 lockdep_assert_held(&ent->mkeys_queue.lock); 358 359 while (true) { 360 if (limit_fill) 361 target = ent->limit * 2; 362 if (target == ent->pending + ent->mkeys_queue.ci) 363 return 0; 364 if (target > ent->pending + ent->mkeys_queue.ci) { 365 u32 todo = target - (ent->pending + ent->mkeys_queue.ci); 366 367 spin_unlock_irq(&ent->mkeys_queue.lock); 368 err = add_keys(ent, todo); 369 if (err == -EAGAIN) 370 usleep_range(3000, 5000); 371 spin_lock_irq(&ent->mkeys_queue.lock); 372 if (err) { 373 if (err != -EAGAIN) 374 return err; 375 } else 376 return 0; 377 } else { 378 remove_cache_mr_locked(ent); 379 } 380 } 381 } 382 383 static ssize_t size_write(struct file *filp, const char __user *buf, 384 size_t count, loff_t *pos) 385 { 386 struct mlx5_cache_ent *ent = filp->private_data; 387 u32 target; 388 int err; 389 390 err = kstrtou32_from_user(buf, count, 0, &target); 391 if (err) 392 return err; 393 394 /* 395 * Target is the new value of total_mrs the user requests, however we 396 * cannot free MRs that are in use. Compute the target value for stored 397 * mkeys. 398 */ 399 spin_lock_irq(&ent->mkeys_queue.lock); 400 if (target < ent->in_use) { 401 err = -EINVAL; 402 goto err_unlock; 403 } 404 target = target - ent->in_use; 405 if (target < ent->limit || target > ent->limit*2) { 406 err = -EINVAL; 407 goto err_unlock; 408 } 409 err = resize_available_mrs(ent, target, false); 410 if (err) 411 goto err_unlock; 412 spin_unlock_irq(&ent->mkeys_queue.lock); 413 414 return count; 415 416 err_unlock: 417 spin_unlock_irq(&ent->mkeys_queue.lock); 418 return err; 419 } 420 421 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 422 loff_t *pos) 423 { 424 struct mlx5_cache_ent *ent = filp->private_data; 425 char lbuf[20]; 426 int err; 427 428 err = snprintf(lbuf, sizeof(lbuf), "%ld\n", 429 ent->mkeys_queue.ci + ent->in_use); 430 if (err < 0) 431 return err; 432 433 return simple_read_from_buffer(buf, count, pos, lbuf, err); 434 } 435 436 static const struct file_operations size_fops = { 437 .owner = THIS_MODULE, 438 .open = simple_open, 439 .write = size_write, 440 .read = size_read, 441 }; 442 443 static ssize_t limit_write(struct file *filp, const char __user *buf, 444 size_t count, loff_t *pos) 445 { 446 struct mlx5_cache_ent *ent = filp->private_data; 447 u32 var; 448 int err; 449 450 err = kstrtou32_from_user(buf, count, 0, &var); 451 if (err) 452 return err; 453 454 /* 455 * Upon set we immediately fill the cache to high water mark implied by 456 * the limit. 457 */ 458 spin_lock_irq(&ent->mkeys_queue.lock); 459 ent->limit = var; 460 err = resize_available_mrs(ent, 0, true); 461 spin_unlock_irq(&ent->mkeys_queue.lock); 462 if (err) 463 return err; 464 return count; 465 } 466 467 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 468 loff_t *pos) 469 { 470 struct mlx5_cache_ent *ent = filp->private_data; 471 char lbuf[20]; 472 int err; 473 474 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 475 if (err < 0) 476 return err; 477 478 return simple_read_from_buffer(buf, count, pos, lbuf, err); 479 } 480 481 static const struct file_operations limit_fops = { 482 .owner = THIS_MODULE, 483 .open = simple_open, 484 .write = limit_write, 485 .read = limit_read, 486 }; 487 488 static bool someone_adding(struct mlx5_mkey_cache *cache) 489 { 490 struct mlx5_cache_ent *ent; 491 struct rb_node *node; 492 bool ret; 493 494 mutex_lock(&cache->rb_lock); 495 for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { 496 ent = rb_entry(node, struct mlx5_cache_ent, node); 497 spin_lock_irq(&ent->mkeys_queue.lock); 498 ret = ent->mkeys_queue.ci < ent->limit; 499 spin_unlock_irq(&ent->mkeys_queue.lock); 500 if (ret) { 501 mutex_unlock(&cache->rb_lock); 502 return true; 503 } 504 } 505 mutex_unlock(&cache->rb_lock); 506 return false; 507 } 508 509 /* 510 * Check if the bucket is outside the high/low water mark and schedule an async 511 * update. The cache refill has hysteresis, once the low water mark is hit it is 512 * refilled up to the high mark. 513 */ 514 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) 515 { 516 lockdep_assert_held(&ent->mkeys_queue.lock); 517 518 if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) 519 return; 520 if (ent->mkeys_queue.ci < ent->limit) { 521 ent->fill_to_high_water = true; 522 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 523 } else if (ent->fill_to_high_water && 524 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) { 525 /* 526 * Once we start populating due to hitting a low water mark 527 * continue until we pass the high water mark. 528 */ 529 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 530 } else if (ent->mkeys_queue.ci == 2 * ent->limit) { 531 ent->fill_to_high_water = false; 532 } else if (ent->mkeys_queue.ci > 2 * ent->limit) { 533 /* Queue deletion of excess entries */ 534 ent->fill_to_high_water = false; 535 if (ent->pending) 536 queue_delayed_work(ent->dev->cache.wq, &ent->dwork, 537 secs_to_jiffies(1)); 538 else 539 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 540 } 541 } 542 543 static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) 544 { 545 u32 mkey; 546 547 spin_lock_irq(&ent->mkeys_queue.lock); 548 while (ent->mkeys_queue.ci) { 549 mkey = pop_mkey_locked(ent); 550 spin_unlock_irq(&ent->mkeys_queue.lock); 551 mlx5_core_destroy_mkey(dev->mdev, mkey); 552 spin_lock_irq(&ent->mkeys_queue.lock); 553 } 554 ent->tmp_cleanup_scheduled = false; 555 spin_unlock_irq(&ent->mkeys_queue.lock); 556 } 557 558 static void __cache_work_func(struct mlx5_cache_ent *ent) 559 { 560 struct mlx5_ib_dev *dev = ent->dev; 561 struct mlx5_mkey_cache *cache = &dev->cache; 562 int err; 563 564 spin_lock_irq(&ent->mkeys_queue.lock); 565 if (ent->disabled) 566 goto out; 567 568 if (ent->fill_to_high_water && 569 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit && 570 !READ_ONCE(dev->fill_delay)) { 571 spin_unlock_irq(&ent->mkeys_queue.lock); 572 err = add_keys(ent, 1); 573 spin_lock_irq(&ent->mkeys_queue.lock); 574 if (ent->disabled) 575 goto out; 576 if (err) { 577 /* 578 * EAGAIN only happens if there are pending MRs, so we 579 * will be rescheduled when storing them. The only 580 * failure path here is ENOMEM. 581 */ 582 if (err != -EAGAIN) { 583 mlx5_ib_warn( 584 dev, 585 "add keys command failed, err %d\n", 586 err); 587 queue_delayed_work(cache->wq, &ent->dwork, 588 secs_to_jiffies(1)); 589 } 590 } 591 } else if (ent->mkeys_queue.ci > 2 * ent->limit) { 592 bool need_delay; 593 594 /* 595 * The remove_cache_mr() logic is performed as garbage 596 * collection task. Such task is intended to be run when no 597 * other active processes are running. 598 * 599 * The need_resched() will return TRUE if there are user tasks 600 * to be activated in near future. 601 * 602 * In such case, we don't execute remove_cache_mr() and postpone 603 * the garbage collection work to try to run in next cycle, in 604 * order to free CPU resources to other tasks. 605 */ 606 spin_unlock_irq(&ent->mkeys_queue.lock); 607 need_delay = need_resched() || someone_adding(cache) || 608 !time_after(jiffies, 609 READ_ONCE(cache->last_add) + 300 * HZ); 610 spin_lock_irq(&ent->mkeys_queue.lock); 611 if (ent->disabled) 612 goto out; 613 if (need_delay) { 614 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 615 goto out; 616 } 617 remove_cache_mr_locked(ent); 618 queue_adjust_cache_locked(ent); 619 } 620 out: 621 spin_unlock_irq(&ent->mkeys_queue.lock); 622 } 623 624 static void delayed_cache_work_func(struct work_struct *work) 625 { 626 struct mlx5_cache_ent *ent; 627 628 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 629 /* temp entries are never filled, only cleaned */ 630 if (ent->is_tmp) 631 clean_keys(ent->dev, ent); 632 else 633 __cache_work_func(ent); 634 } 635 636 static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, 637 struct mlx5r_cache_rb_key key2) 638 { 639 int res; 640 641 res = key1.ats - key2.ats; 642 if (res) 643 return res; 644 645 res = key1.access_mode - key2.access_mode; 646 if (res) 647 return res; 648 649 res = key1.access_flags - key2.access_flags; 650 if (res) 651 return res; 652 653 res = key1.st_index - key2.st_index; 654 if (res) 655 return res; 656 657 res = key1.ph - key2.ph; 658 if (res) 659 return res; 660 661 /* 662 * keep ndescs the last in the compare table since the find function 663 * searches for an exact match on all properties and only closest 664 * match in size. 665 */ 666 return key1.ndescs - key2.ndescs; 667 } 668 669 static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, 670 struct mlx5_cache_ent *ent) 671 { 672 struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; 673 struct mlx5_cache_ent *cur; 674 int cmp; 675 676 /* Figure out where to put new node */ 677 while (*new) { 678 cur = rb_entry(*new, struct mlx5_cache_ent, node); 679 parent = *new; 680 cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key); 681 if (cmp > 0) 682 new = &((*new)->rb_left); 683 if (cmp < 0) 684 new = &((*new)->rb_right); 685 if (cmp == 0) 686 return -EEXIST; 687 } 688 689 /* Add new node and rebalance tree. */ 690 rb_link_node(&ent->node, parent, new); 691 rb_insert_color(&ent->node, &cache->rb_root); 692 693 return 0; 694 } 695 696 static struct mlx5_cache_ent * 697 mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, 698 struct mlx5r_cache_rb_key rb_key) 699 { 700 struct rb_node *node = dev->cache.rb_root.rb_node; 701 struct mlx5_cache_ent *cur, *smallest = NULL; 702 u64 ndescs_limit; 703 int cmp; 704 705 /* 706 * Find the smallest ent with order >= requested_order. 707 */ 708 while (node) { 709 cur = rb_entry(node, struct mlx5_cache_ent, node); 710 cmp = cache_ent_key_cmp(cur->rb_key, rb_key); 711 if (cmp > 0) { 712 smallest = cur; 713 node = node->rb_left; 714 } 715 if (cmp < 0) 716 node = node->rb_right; 717 if (cmp == 0) 718 return cur; 719 } 720 721 /* 722 * Limit the usage of mkeys larger than twice the required size while 723 * also allowing the usage of smallest cache entry for small MRs. 724 */ 725 ndescs_limit = max_t(u64, rb_key.ndescs * 2, 726 MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS); 727 728 return (smallest && 729 smallest->rb_key.access_mode == rb_key.access_mode && 730 smallest->rb_key.access_flags == rb_key.access_flags && 731 smallest->rb_key.ats == rb_key.ats && 732 smallest->rb_key.st_index == rb_key.st_index && 733 smallest->rb_key.ph == rb_key.ph && 734 smallest->rb_key.ndescs <= ndescs_limit) ? 735 smallest : 736 NULL; 737 } 738 739 static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, 740 struct mlx5_cache_ent *ent) 741 { 742 struct mlx5_ib_mr *mr; 743 int err; 744 745 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 746 if (!mr) 747 return ERR_PTR(-ENOMEM); 748 749 spin_lock_irq(&ent->mkeys_queue.lock); 750 ent->in_use++; 751 752 if (!ent->mkeys_queue.ci) { 753 queue_adjust_cache_locked(ent); 754 ent->miss++; 755 spin_unlock_irq(&ent->mkeys_queue.lock); 756 err = create_cache_mkey(ent, &mr->mmkey.key); 757 if (err) { 758 spin_lock_irq(&ent->mkeys_queue.lock); 759 ent->in_use--; 760 spin_unlock_irq(&ent->mkeys_queue.lock); 761 kfree(mr); 762 return ERR_PTR(err); 763 } 764 } else { 765 mr->mmkey.key = pop_mkey_locked(ent); 766 queue_adjust_cache_locked(ent); 767 spin_unlock_irq(&ent->mkeys_queue.lock); 768 } 769 mr->mmkey.cache_ent = ent; 770 mr->mmkey.type = MLX5_MKEY_MR; 771 mr->mmkey.rb_key = ent->rb_key; 772 mr->mmkey.cacheable = true; 773 init_waitqueue_head(&mr->mmkey.wait); 774 return mr; 775 } 776 777 static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, 778 int access_flags) 779 { 780 int ret = 0; 781 782 if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && 783 MLX5_CAP_GEN(dev->mdev, atomic) && 784 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 785 ret |= IB_ACCESS_REMOTE_ATOMIC; 786 787 if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && 788 MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && 789 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 790 ret |= IB_ACCESS_RELAXED_ORDERING; 791 792 if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && 793 (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 794 MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) && 795 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 796 ret |= IB_ACCESS_RELAXED_ORDERING; 797 798 return ret; 799 } 800 801 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, 802 int access_flags, int access_mode, 803 int ndescs) 804 { 805 struct mlx5r_cache_rb_key rb_key = { 806 .ndescs = ndescs, 807 .access_mode = access_mode, 808 .access_flags = get_unchangeable_access_flags(dev, access_flags), 809 .ph = MLX5_IB_NO_PH, 810 }; 811 struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key); 812 813 if (!ent) 814 return ERR_PTR(-EOPNOTSUPP); 815 816 return _mlx5_mr_cache_alloc(dev, ent); 817 } 818 819 static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 820 { 821 if (!mlx5_debugfs_root || dev->is_rep) 822 return; 823 824 debugfs_remove_recursive(dev->cache.fs_root); 825 dev->cache.fs_root = NULL; 826 } 827 828 static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, 829 struct mlx5_cache_ent *ent) 830 { 831 int order = order_base_2(ent->rb_key.ndescs); 832 struct dentry *dir; 833 834 if (!mlx5_debugfs_root || dev->is_rep) 835 return; 836 837 if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) 838 order = MLX5_IMR_KSM_CACHE_ENTRY + 2; 839 840 sprintf(ent->name, "%d", order); 841 dir = debugfs_create_dir(ent->name, dev->cache.fs_root); 842 debugfs_create_file("size", 0600, dir, ent, &size_fops); 843 debugfs_create_file("limit", 0600, dir, ent, &limit_fops); 844 debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci); 845 debugfs_create_u32("miss", 0600, dir, &ent->miss); 846 } 847 848 static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) 849 { 850 struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev); 851 struct mlx5_mkey_cache *cache = &dev->cache; 852 853 if (!mlx5_debugfs_root || dev->is_rep) 854 return; 855 856 cache->fs_root = debugfs_create_dir("mr_cache", dbg_root); 857 } 858 859 static void delay_time_func(struct timer_list *t) 860 { 861 struct mlx5_ib_dev *dev = timer_container_of(dev, t, delay_timer); 862 863 WRITE_ONCE(dev->fill_delay, 0); 864 } 865 866 static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent) 867 { 868 struct mlx5_mkeys_page *page; 869 870 page = kzalloc(sizeof(*page), GFP_KERNEL); 871 if (!page) 872 return -ENOMEM; 873 INIT_LIST_HEAD(&ent->mkeys_queue.pages_list); 874 spin_lock_init(&ent->mkeys_queue.lock); 875 list_add_tail(&page->list, &ent->mkeys_queue.pages_list); 876 ent->mkeys_queue.num_pages++; 877 return 0; 878 } 879 880 static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent) 881 { 882 struct mlx5_mkeys_page *page; 883 884 WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1); 885 page = list_last_entry(&ent->mkeys_queue.pages_list, 886 struct mlx5_mkeys_page, list); 887 list_del(&page->list); 888 kfree(page); 889 } 890 891 struct mlx5_cache_ent * 892 mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, 893 struct mlx5r_cache_rb_key rb_key, 894 bool persistent_entry) 895 { 896 struct mlx5_cache_ent *ent; 897 int order; 898 int ret; 899 900 ent = kzalloc(sizeof(*ent), GFP_KERNEL); 901 if (!ent) 902 return ERR_PTR(-ENOMEM); 903 904 ret = mlx5r_mkeys_init(ent); 905 if (ret) 906 goto mkeys_err; 907 ent->rb_key = rb_key; 908 ent->dev = dev; 909 ent->is_tmp = !persistent_entry; 910 911 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 912 913 ret = mlx5_cache_ent_insert(&dev->cache, ent); 914 if (ret) 915 goto ent_insert_err; 916 917 if (persistent_entry) { 918 if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) 919 order = MLX5_IMR_KSM_CACHE_ENTRY; 920 else 921 order = order_base_2(rb_key.ndescs) - 2; 922 923 if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && 924 !dev->is_rep && mlx5_core_is_pf(dev->mdev) && 925 mlx5r_umr_can_load_pas(dev, 0)) 926 ent->limit = dev->mdev->profile.mr_cache[order].limit; 927 else 928 ent->limit = 0; 929 930 mlx5_mkey_cache_debugfs_add_ent(dev, ent); 931 } 932 933 return ent; 934 ent_insert_err: 935 mlx5r_mkeys_uninit(ent); 936 mkeys_err: 937 kfree(ent); 938 return ERR_PTR(ret); 939 } 940 941 static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) 942 { 943 struct rb_root *root = &dev->cache.rb_root; 944 struct mlx5_cache_ent *ent; 945 struct rb_node *node; 946 947 mutex_lock(&dev->cache.rb_lock); 948 node = rb_first(root); 949 while (node) { 950 ent = rb_entry(node, struct mlx5_cache_ent, node); 951 node = rb_next(node); 952 clean_keys(dev, ent); 953 rb_erase(&ent->node, root); 954 mlx5r_mkeys_uninit(ent); 955 kfree(ent); 956 } 957 mutex_unlock(&dev->cache.rb_lock); 958 } 959 960 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) 961 { 962 struct mlx5_mkey_cache *cache = &dev->cache; 963 struct rb_root *root = &dev->cache.rb_root; 964 struct mlx5r_cache_rb_key rb_key = { 965 .access_mode = MLX5_MKC_ACCESS_MODE_MTT, 966 .ph = MLX5_IB_NO_PH, 967 }; 968 struct mlx5_cache_ent *ent; 969 struct rb_node *node; 970 int ret; 971 int i; 972 973 mutex_init(&dev->slow_path_mutex); 974 mutex_init(&dev->cache.rb_lock); 975 dev->cache.rb_root = RB_ROOT; 976 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 977 if (!cache->wq) { 978 mlx5_ib_warn(dev, "failed to create work queue\n"); 979 return -ENOMEM; 980 } 981 982 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); 983 timer_setup(&dev->delay_timer, delay_time_func, 0); 984 mlx5_mkey_cache_debugfs_init(dev); 985 mutex_lock(&cache->rb_lock); 986 for (i = 0; i <= mkey_cache_max_order(dev); i++) { 987 rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i; 988 ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); 989 if (IS_ERR(ent)) { 990 ret = PTR_ERR(ent); 991 goto err; 992 } 993 } 994 995 ret = mlx5_odp_init_mkey_cache(dev); 996 if (ret) 997 goto err; 998 999 mutex_unlock(&cache->rb_lock); 1000 for (node = rb_first(root); node; node = rb_next(node)) { 1001 ent = rb_entry(node, struct mlx5_cache_ent, node); 1002 spin_lock_irq(&ent->mkeys_queue.lock); 1003 queue_adjust_cache_locked(ent); 1004 spin_unlock_irq(&ent->mkeys_queue.lock); 1005 } 1006 1007 return 0; 1008 1009 err: 1010 mutex_unlock(&cache->rb_lock); 1011 mlx5_mkey_cache_debugfs_cleanup(dev); 1012 mlx5r_destroy_cache_entries(dev); 1013 destroy_workqueue(cache->wq); 1014 mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); 1015 return ret; 1016 } 1017 1018 void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) 1019 { 1020 struct rb_root *root = &dev->cache.rb_root; 1021 struct mlx5_cache_ent *ent; 1022 struct rb_node *node; 1023 1024 if (!dev->cache.wq) 1025 return; 1026 1027 mutex_lock(&dev->cache.rb_lock); 1028 for (node = rb_first(root); node; node = rb_next(node)) { 1029 ent = rb_entry(node, struct mlx5_cache_ent, node); 1030 spin_lock_irq(&ent->mkeys_queue.lock); 1031 ent->disabled = true; 1032 spin_unlock_irq(&ent->mkeys_queue.lock); 1033 cancel_delayed_work(&ent->dwork); 1034 } 1035 mutex_unlock(&dev->cache.rb_lock); 1036 1037 /* 1038 * After all entries are disabled and will not reschedule on WQ, 1039 * flush it and all async commands. 1040 */ 1041 flush_workqueue(dev->cache.wq); 1042 1043 mlx5_mkey_cache_debugfs_cleanup(dev); 1044 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); 1045 1046 /* At this point all entries are disabled and have no concurrent work. */ 1047 mlx5r_destroy_cache_entries(dev); 1048 1049 destroy_workqueue(dev->cache.wq); 1050 timer_delete_sync(&dev->delay_timer); 1051 } 1052 1053 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 1054 { 1055 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1056 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1057 struct mlx5_ib_mr *mr; 1058 void *mkc; 1059 u32 *in; 1060 int err; 1061 1062 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1063 if (!mr) 1064 return ERR_PTR(-ENOMEM); 1065 1066 in = kzalloc(inlen, GFP_KERNEL); 1067 if (!in) { 1068 err = -ENOMEM; 1069 goto err_free; 1070 } 1071 1072 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1073 1074 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 1075 MLX5_SET(mkc, mkc, length64, 1); 1076 set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0, 1077 pd); 1078 MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats)); 1079 1080 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1081 if (err) 1082 goto err_in; 1083 1084 kfree(in); 1085 mr->mmkey.type = MLX5_MKEY_MR; 1086 mr->ibmr.lkey = mr->mmkey.key; 1087 mr->ibmr.rkey = mr->mmkey.key; 1088 mr->umem = NULL; 1089 1090 return &mr->ibmr; 1091 1092 err_in: 1093 kfree(in); 1094 1095 err_free: 1096 kfree(mr); 1097 1098 return ERR_PTR(err); 1099 } 1100 1101 static int get_octo_len(u64 addr, u64 len, int page_shift) 1102 { 1103 u64 page_size = 1ULL << page_shift; 1104 u64 offset; 1105 int npages; 1106 1107 offset = addr & (page_size - 1); 1108 npages = ALIGN(len + offset, page_size) >> page_shift; 1109 return (npages + 1) / 2; 1110 } 1111 1112 static int mkey_cache_max_order(struct mlx5_ib_dev *dev) 1113 { 1114 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 1115 return MKEY_CACHE_LAST_STD_ENTRY; 1116 return MLX5_MAX_UMR_SHIFT; 1117 } 1118 1119 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1120 u64 length, int access_flags, u64 iova) 1121 { 1122 mr->ibmr.lkey = mr->mmkey.key; 1123 mr->ibmr.rkey = mr->mmkey.key; 1124 mr->ibmr.length = length; 1125 mr->ibmr.device = &dev->ib_dev; 1126 mr->ibmr.iova = iova; 1127 mr->access_flags = access_flags; 1128 } 1129 1130 static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, 1131 u64 iova) 1132 { 1133 /* 1134 * The alignment of iova has already been checked upon entering 1135 * UVERBS_METHOD_REG_DMABUF_MR 1136 */ 1137 umem->iova = iova; 1138 return PAGE_SIZE; 1139 } 1140 1141 static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 1142 struct ib_umem *umem, u64 iova, 1143 int access_flags, int access_mode, 1144 u16 st_index, u8 ph) 1145 { 1146 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1147 struct mlx5r_cache_rb_key rb_key = {}; 1148 struct mlx5_cache_ent *ent; 1149 struct mlx5_ib_mr *mr; 1150 unsigned long page_size; 1151 1152 if (umem->is_dmabuf) 1153 page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); 1154 else 1155 page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova, 1156 access_mode); 1157 if (WARN_ON(!page_size)) 1158 return ERR_PTR(-EINVAL); 1159 1160 rb_key.access_mode = access_mode; 1161 rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size); 1162 rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); 1163 rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); 1164 rb_key.st_index = st_index; 1165 rb_key.ph = ph; 1166 ent = mkey_cache_ent_from_rb_key(dev, rb_key); 1167 /* 1168 * If the MR can't come from the cache then synchronously create an uncached 1169 * one. 1170 */ 1171 if (!ent) { 1172 mutex_lock(&dev->slow_path_mutex); 1173 mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode, 1174 st_index, ph); 1175 mutex_unlock(&dev->slow_path_mutex); 1176 if (IS_ERR(mr)) 1177 return mr; 1178 mr->mmkey.rb_key = rb_key; 1179 mr->mmkey.cacheable = true; 1180 return mr; 1181 } 1182 1183 mr = _mlx5_mr_cache_alloc(dev, ent); 1184 if (IS_ERR(mr)) 1185 return mr; 1186 1187 mr->ibmr.pd = pd; 1188 mr->umem = umem; 1189 mr->page_shift = order_base_2(page_size); 1190 set_mr_fields(dev, mr, umem->length, access_flags, iova); 1191 1192 return mr; 1193 } 1194 1195 static struct ib_mr * 1196 reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_flags, 1197 u32 crossed_lkey) 1198 { 1199 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1200 int access_mode = MLX5_MKC_ACCESS_MODE_CROSSING; 1201 struct mlx5_ib_mr *mr; 1202 void *mkc; 1203 int inlen; 1204 u32 *in; 1205 int err; 1206 1207 if (!MLX5_CAP_GEN(dev->mdev, crossing_vhca_mkey)) 1208 return ERR_PTR(-EOPNOTSUPP); 1209 1210 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1211 if (!mr) 1212 return ERR_PTR(-ENOMEM); 1213 1214 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1215 in = kvzalloc(inlen, GFP_KERNEL); 1216 if (!in) { 1217 err = -ENOMEM; 1218 goto err_1; 1219 } 1220 1221 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1222 MLX5_SET(mkc, mkc, crossing_target_vhca_id, 1223 MLX5_CAP_GEN(dev->mdev, vhca_id)); 1224 MLX5_SET(mkc, mkc, translations_octword_size, crossed_lkey); 1225 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 1226 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 1227 1228 /* for this crossing mkey IOVA should be 0 and len should be IOVA + len */ 1229 set_mkc_access_pd_addr_fields(mkc, access_flags, 0, pd); 1230 MLX5_SET64(mkc, mkc, len, iova + length); 1231 1232 MLX5_SET(mkc, mkc, free, 0); 1233 MLX5_SET(mkc, mkc, umr_en, 0); 1234 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1235 if (err) 1236 goto err_2; 1237 1238 mr->mmkey.type = MLX5_MKEY_MR; 1239 set_mr_fields(dev, mr, length, access_flags, iova); 1240 mr->ibmr.pd = pd; 1241 kvfree(in); 1242 mlx5_ib_dbg(dev, "crossing mkey = 0x%x\n", mr->mmkey.key); 1243 1244 return &mr->ibmr; 1245 err_2: 1246 kvfree(in); 1247 err_1: 1248 kfree(mr); 1249 return ERR_PTR(err); 1250 } 1251 1252 /* 1253 * If ibmr is NULL it will be allocated by reg_create. 1254 * Else, the given ibmr will be used. 1255 */ 1256 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 1257 u64 iova, int access_flags, 1258 unsigned long page_size, bool populate, 1259 int access_mode, u16 st_index, u8 ph) 1260 { 1261 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1262 struct mlx5_ib_mr *mr; 1263 __be64 *pas; 1264 void *mkc; 1265 int inlen; 1266 u32 *in; 1267 int err; 1268 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) && 1269 (access_mode == MLX5_MKC_ACCESS_MODE_MTT) && 1270 (ph == MLX5_IB_NO_PH); 1271 bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM); 1272 1273 if (!page_size) 1274 return ERR_PTR(-EINVAL); 1275 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1276 if (!mr) 1277 return ERR_PTR(-ENOMEM); 1278 1279 mr->ibmr.pd = pd; 1280 mr->access_flags = access_flags; 1281 mr->page_shift = order_base_2(page_size); 1282 1283 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1284 if (populate) 1285 inlen += sizeof(*pas) * 1286 roundup(ib_umem_num_dma_blocks(umem, page_size), 2); 1287 in = kvzalloc(inlen, GFP_KERNEL); 1288 if (!in) { 1289 err = -ENOMEM; 1290 goto err_1; 1291 } 1292 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1293 if (populate) { 1294 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND || ksm_mode)) { 1295 err = -EINVAL; 1296 goto err_2; 1297 } 1298 mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, 1299 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1300 } 1301 1302 /* The pg_access bit allows setting the access flags 1303 * in the page list submitted with the command. 1304 */ 1305 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1306 1307 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1308 set_mkc_access_pd_addr_fields(mkc, access_flags, iova, 1309 populate ? pd : dev->umrc.pd); 1310 /* In case a data direct flow, overwrite the pdn field by its internal kernel PD */ 1311 if (umem->is_dmabuf && ksm_mode) 1312 MLX5_SET(mkc, mkc, pd, dev->ddr.pdn); 1313 1314 MLX5_SET(mkc, mkc, free, !populate); 1315 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode); 1316 MLX5_SET(mkc, mkc, umr_en, 1); 1317 1318 MLX5_SET64(mkc, mkc, len, umem->length); 1319 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1320 if (ksm_mode) 1321 MLX5_SET(mkc, mkc, translations_octword_size, 1322 get_octo_len(iova, umem->length, mr->page_shift) * 2); 1323 else 1324 MLX5_SET(mkc, mkc, translations_octword_size, 1325 get_octo_len(iova, umem->length, mr->page_shift)); 1326 MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); 1327 if (mlx5_umem_needs_ats(dev, umem, access_flags)) 1328 MLX5_SET(mkc, mkc, ma_translation_mode, 1); 1329 if (populate) { 1330 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1331 get_octo_len(iova, umem->length, mr->page_shift)); 1332 } 1333 1334 if (ph != MLX5_IB_NO_PH) { 1335 MLX5_SET(mkc, mkc, pcie_tph_en, 1); 1336 MLX5_SET(mkc, mkc, pcie_tph_ph, ph); 1337 if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) 1338 MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index); 1339 } 1340 1341 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1342 if (err) { 1343 mlx5_ib_warn(dev, "create mkey failed\n"); 1344 goto err_2; 1345 } 1346 mr->mmkey.type = MLX5_MKEY_MR; 1347 mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift); 1348 mr->umem = umem; 1349 set_mr_fields(dev, mr, umem->length, access_flags, iova); 1350 kvfree(in); 1351 1352 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1353 1354 return mr; 1355 1356 err_2: 1357 kvfree(in); 1358 err_1: 1359 kfree(mr); 1360 return ERR_PTR(err); 1361 } 1362 1363 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, 1364 u64 length, int acc, int mode) 1365 { 1366 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1367 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1368 struct mlx5_ib_mr *mr; 1369 void *mkc; 1370 u32 *in; 1371 int err; 1372 1373 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1374 if (!mr) 1375 return ERR_PTR(-ENOMEM); 1376 1377 in = kzalloc(inlen, GFP_KERNEL); 1378 if (!in) { 1379 err = -ENOMEM; 1380 goto err_free; 1381 } 1382 1383 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1384 1385 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 1386 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); 1387 MLX5_SET64(mkc, mkc, len, length); 1388 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); 1389 1390 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1391 if (err) 1392 goto err_in; 1393 1394 kfree(in); 1395 1396 set_mr_fields(dev, mr, length, acc, start_addr); 1397 1398 return &mr->ibmr; 1399 1400 err_in: 1401 kfree(in); 1402 1403 err_free: 1404 kfree(mr); 1405 1406 return ERR_PTR(err); 1407 } 1408 1409 int mlx5_ib_advise_mr(struct ib_pd *pd, 1410 enum ib_uverbs_advise_mr_advice advice, 1411 u32 flags, 1412 struct ib_sge *sg_list, 1413 u32 num_sge, 1414 struct uverbs_attr_bundle *attrs) 1415 { 1416 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && 1417 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && 1418 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) 1419 return -EOPNOTSUPP; 1420 1421 return mlx5_ib_advise_mr_prefetch(pd, advice, flags, 1422 sg_list, num_sge); 1423 } 1424 1425 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, 1426 struct ib_dm_mr_attr *attr, 1427 struct uverbs_attr_bundle *attrs) 1428 { 1429 struct mlx5_ib_dm *mdm = to_mdm(dm); 1430 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; 1431 u64 start_addr = mdm->dev_addr + attr->offset; 1432 int mode; 1433 1434 switch (mdm->type) { 1435 case MLX5_IB_UAPI_DM_TYPE_MEMIC: 1436 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) 1437 return ERR_PTR(-EINVAL); 1438 1439 mode = MLX5_MKC_ACCESS_MODE_MEMIC; 1440 start_addr -= pci_resource_start(dev->pdev, 0); 1441 break; 1442 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: 1443 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: 1444 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: 1445 case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: 1446 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) 1447 return ERR_PTR(-EINVAL); 1448 1449 mode = MLX5_MKC_ACCESS_MODE_SW_ICM; 1450 break; 1451 default: 1452 return ERR_PTR(-EINVAL); 1453 } 1454 1455 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, 1456 attr->access_flags, mode); 1457 } 1458 1459 static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, 1460 u64 iova, int access_flags, 1461 struct ib_dmah *dmah) 1462 { 1463 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1464 struct mlx5_ib_mr *mr = NULL; 1465 bool xlt_with_umr; 1466 u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX; 1467 u8 ph = MLX5_IB_NO_PH; 1468 int err; 1469 1470 if (dmah) { 1471 struct mlx5_ib_dmah *mdmah = to_mdmah(dmah); 1472 1473 ph = dmah->ph; 1474 if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) 1475 st_index = mdmah->st_index; 1476 } 1477 1478 xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); 1479 if (xlt_with_umr) { 1480 mr = alloc_cacheable_mr(pd, umem, iova, access_flags, 1481 MLX5_MKC_ACCESS_MODE_MTT, 1482 st_index, ph); 1483 } else { 1484 unsigned long page_size = mlx5_umem_mkc_find_best_pgsz( 1485 dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT); 1486 1487 mutex_lock(&dev->slow_path_mutex); 1488 mr = reg_create(pd, umem, iova, access_flags, page_size, 1489 true, MLX5_MKC_ACCESS_MODE_MTT, 1490 st_index, ph); 1491 mutex_unlock(&dev->slow_path_mutex); 1492 } 1493 if (IS_ERR(mr)) { 1494 ib_umem_release(umem); 1495 return ERR_CAST(mr); 1496 } 1497 1498 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1499 1500 atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1501 1502 if (xlt_with_umr) { 1503 /* 1504 * If the MR was created with reg_create then it will be 1505 * configured properly but left disabled. It is safe to go ahead 1506 * and configure it again via UMR while enabling it. 1507 */ 1508 err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); 1509 if (err) { 1510 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1511 return ERR_PTR(err); 1512 } 1513 } 1514 return &mr->ibmr; 1515 } 1516 1517 static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, 1518 u64 iova, int access_flags, 1519 struct ib_udata *udata) 1520 { 1521 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1522 struct ib_umem_odp *odp; 1523 struct mlx5_ib_mr *mr; 1524 int err; 1525 1526 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1527 return ERR_PTR(-EOPNOTSUPP); 1528 1529 err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq); 1530 if (err) 1531 return ERR_PTR(err); 1532 if (!start && length == U64_MAX) { 1533 if (iova != 0) 1534 return ERR_PTR(-EINVAL); 1535 if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1536 return ERR_PTR(-EINVAL); 1537 1538 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); 1539 if (IS_ERR(mr)) 1540 return ERR_CAST(mr); 1541 return &mr->ibmr; 1542 } 1543 1544 /* ODP requires xlt update via umr to work. */ 1545 if (!mlx5r_umr_can_load_pas(dev, length)) 1546 return ERR_PTR(-EINVAL); 1547 1548 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, 1549 &mlx5_mn_ops); 1550 if (IS_ERR(odp)) 1551 return ERR_CAST(odp); 1552 1553 mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags, 1554 MLX5_MKC_ACCESS_MODE_MTT, 1555 MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX, 1556 MLX5_IB_NO_PH); 1557 if (IS_ERR(mr)) { 1558 ib_umem_release(&odp->umem); 1559 return ERR_CAST(mr); 1560 } 1561 xa_init(&mr->implicit_children); 1562 1563 odp->private = mr; 1564 err = mlx5r_store_odp_mkey(dev, &mr->mmkey); 1565 if (err) 1566 goto err_dereg_mr; 1567 1568 err = mlx5_ib_init_odp_mr(mr); 1569 if (err) 1570 goto err_dereg_mr; 1571 return &mr->ibmr; 1572 1573 err_dereg_mr: 1574 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1575 return ERR_PTR(err); 1576 } 1577 1578 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1579 u64 iova, int access_flags, 1580 struct ib_dmah *dmah, 1581 struct ib_udata *udata) 1582 { 1583 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1584 struct ib_umem *umem; 1585 int err; 1586 1587 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || 1588 ((access_flags & IB_ACCESS_ON_DEMAND) && dmah)) 1589 return ERR_PTR(-EOPNOTSUPP); 1590 1591 mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1592 start, iova, length, access_flags); 1593 1594 err = mlx5r_umr_resource_init(dev); 1595 if (err) 1596 return ERR_PTR(err); 1597 1598 if (access_flags & IB_ACCESS_ON_DEMAND) 1599 return create_user_odp_mr(pd, start, length, iova, access_flags, 1600 udata); 1601 umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); 1602 if (IS_ERR(umem)) 1603 return ERR_CAST(umem); 1604 return create_real_mr(pd, umem, iova, access_flags, dmah); 1605 } 1606 1607 static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) 1608 { 1609 struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; 1610 struct mlx5_ib_mr *mr = umem_dmabuf->private; 1611 1612 dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); 1613 1614 if (!umem_dmabuf->sgt || !mr) 1615 return; 1616 1617 mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); 1618 ib_umem_dmabuf_unmap_pages(umem_dmabuf); 1619 } 1620 1621 static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { 1622 .allow_peer2peer = 1, 1623 .move_notify = mlx5_ib_dmabuf_invalidate_cb, 1624 }; 1625 1626 static struct ib_mr * 1627 reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, 1628 u64 offset, u64 length, u64 virt_addr, 1629 int fd, int access_flags, int access_mode, 1630 struct ib_dmah *dmah) 1631 { 1632 bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM); 1633 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1634 struct mlx5_ib_mr *mr = NULL; 1635 struct ib_umem_dmabuf *umem_dmabuf; 1636 u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX; 1637 u8 ph = MLX5_IB_NO_PH; 1638 int err; 1639 1640 err = mlx5r_umr_resource_init(dev); 1641 if (err) 1642 return ERR_PTR(err); 1643 1644 if (!pinned_mode) 1645 umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, 1646 offset, length, fd, 1647 access_flags, 1648 &mlx5_ib_dmabuf_attach_ops); 1649 else 1650 umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev, 1651 dma_device, offset, length, 1652 fd, access_flags); 1653 1654 if (IS_ERR(umem_dmabuf)) { 1655 mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n", 1656 PTR_ERR(umem_dmabuf)); 1657 return ERR_CAST(umem_dmabuf); 1658 } 1659 1660 if (dmah) { 1661 struct mlx5_ib_dmah *mdmah = to_mdmah(dmah); 1662 1663 ph = dmah->ph; 1664 if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) 1665 st_index = mdmah->st_index; 1666 } 1667 1668 mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, 1669 access_flags, access_mode, 1670 st_index, ph); 1671 if (IS_ERR(mr)) { 1672 ib_umem_release(&umem_dmabuf->umem); 1673 return ERR_CAST(mr); 1674 } 1675 1676 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1677 1678 atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); 1679 umem_dmabuf->private = mr; 1680 if (!pinned_mode) { 1681 err = mlx5r_store_odp_mkey(dev, &mr->mmkey); 1682 if (err) 1683 goto err_dereg_mr; 1684 } else { 1685 mr->data_direct = true; 1686 } 1687 1688 err = mlx5_ib_init_dmabuf_mr(mr); 1689 if (err) 1690 goto err_dereg_mr; 1691 return &mr->ibmr; 1692 1693 err_dereg_mr: 1694 __mlx5_ib_dereg_mr(&mr->ibmr); 1695 return ERR_PTR(err); 1696 } 1697 1698 static struct ib_mr * 1699 reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset, 1700 u64 length, u64 virt_addr, 1701 int fd, int access_flags) 1702 { 1703 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1704 struct mlx5_data_direct_dev *data_direct_dev; 1705 struct ib_mr *crossing_mr; 1706 struct ib_mr *crossed_mr; 1707 int ret = 0; 1708 1709 /* As of HW behaviour the IOVA must be page aligned in KSM mode */ 1710 if (!PAGE_ALIGNED(virt_addr) || (access_flags & IB_ACCESS_ON_DEMAND)) 1711 return ERR_PTR(-EOPNOTSUPP); 1712 1713 mutex_lock(&dev->data_direct_lock); 1714 data_direct_dev = dev->data_direct_dev; 1715 if (!data_direct_dev) { 1716 ret = -EINVAL; 1717 goto end; 1718 } 1719 1720 /* The device's 'data direct mkey' was created without RO flags to 1721 * simplify things and allow for a single mkey per device. 1722 * Since RO is not a must, mask it out accordingly. 1723 */ 1724 access_flags &= ~IB_ACCESS_RELAXED_ORDERING; 1725 crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev, 1726 offset, length, virt_addr, fd, 1727 access_flags, MLX5_MKC_ACCESS_MODE_KSM, 1728 NULL); 1729 if (IS_ERR(crossed_mr)) { 1730 ret = PTR_ERR(crossed_mr); 1731 goto end; 1732 } 1733 1734 mutex_lock(&dev->slow_path_mutex); 1735 crossing_mr = reg_create_crossing_vhca_mr(pd, virt_addr, length, access_flags, 1736 crossed_mr->lkey); 1737 mutex_unlock(&dev->slow_path_mutex); 1738 if (IS_ERR(crossing_mr)) { 1739 __mlx5_ib_dereg_mr(crossed_mr); 1740 ret = PTR_ERR(crossing_mr); 1741 goto end; 1742 } 1743 1744 list_add_tail(&to_mmr(crossed_mr)->dd_node, &dev->data_direct_mr_list); 1745 to_mmr(crossing_mr)->dd_crossed_mr = to_mmr(crossed_mr); 1746 to_mmr(crossing_mr)->data_direct = true; 1747 end: 1748 mutex_unlock(&dev->data_direct_lock); 1749 return ret ? ERR_PTR(ret) : crossing_mr; 1750 } 1751 1752 struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, 1753 u64 length, u64 virt_addr, 1754 int fd, int access_flags, 1755 struct ib_dmah *dmah, 1756 struct uverbs_attr_bundle *attrs) 1757 { 1758 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1759 int mlx5_access_flags = 0; 1760 int err; 1761 1762 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || 1763 !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1764 return ERR_PTR(-EOPNOTSUPP); 1765 1766 if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) { 1767 err = uverbs_get_flags32(&mlx5_access_flags, attrs, 1768 MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, 1769 MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT); 1770 if (err) 1771 return ERR_PTR(err); 1772 } 1773 1774 mlx5_ib_dbg(dev, 1775 "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x, mlx5_access_flags 0x%x\n", 1776 offset, virt_addr, length, fd, access_flags, mlx5_access_flags); 1777 1778 /* dmabuf requires xlt update via umr to work. */ 1779 if (!mlx5r_umr_can_load_pas(dev, length)) 1780 return ERR_PTR(-EINVAL); 1781 1782 if (mlx5_access_flags & MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT) 1783 return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr, 1784 fd, access_flags); 1785 1786 return reg_user_mr_dmabuf(pd, pd->device->dma_device, 1787 offset, length, virt_addr, 1788 fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT, 1789 dmah); 1790 } 1791 1792 /* 1793 * True if the change in access flags can be done via UMR, only some access 1794 * flags can be updated. 1795 */ 1796 static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, 1797 unsigned int current_access_flags, 1798 unsigned int target_access_flags) 1799 { 1800 unsigned int diffs = current_access_flags ^ target_access_flags; 1801 1802 if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | 1803 IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING | 1804 IB_ACCESS_REMOTE_ATOMIC)) 1805 return false; 1806 return mlx5r_umr_can_reconfig(dev, current_access_flags, 1807 target_access_flags); 1808 } 1809 1810 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, 1811 struct ib_umem *new_umem, 1812 int new_access_flags, u64 iova, 1813 unsigned long *page_size) 1814 { 1815 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1816 1817 /* We only track the allocated sizes of MRs from the cache */ 1818 if (!mr->mmkey.cache_ent) 1819 return false; 1820 if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) 1821 return false; 1822 1823 *page_size = mlx5_umem_mkc_find_best_pgsz( 1824 dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode); 1825 if (WARN_ON(!*page_size)) 1826 return false; 1827 return (mr->mmkey.cache_ent->rb_key.ndescs) >= 1828 ib_umem_num_dma_blocks(new_umem, *page_size); 1829 } 1830 1831 static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, 1832 int access_flags, int flags, struct ib_umem *new_umem, 1833 u64 iova, unsigned long page_size) 1834 { 1835 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1836 int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; 1837 struct ib_umem *old_umem = mr->umem; 1838 int err; 1839 1840 /* 1841 * To keep everything simple the MR is revoked before we start to mess 1842 * with it. This ensure the change is atomic relative to any use of the 1843 * MR. 1844 */ 1845 err = mlx5r_umr_revoke_mr(mr); 1846 if (err) 1847 return err; 1848 1849 if (flags & IB_MR_REREG_PD) { 1850 mr->ibmr.pd = pd; 1851 upd_flags |= MLX5_IB_UPD_XLT_PD; 1852 } 1853 if (flags & IB_MR_REREG_ACCESS) { 1854 mr->access_flags = access_flags; 1855 upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1856 } 1857 1858 mr->ibmr.iova = iova; 1859 mr->ibmr.length = new_umem->length; 1860 mr->page_shift = order_base_2(page_size); 1861 mr->umem = new_umem; 1862 err = mlx5r_umr_update_mr_pas(mr, upd_flags); 1863 if (err) { 1864 /* 1865 * The MR is revoked at this point so there is no issue to free 1866 * new_umem. 1867 */ 1868 mr->umem = old_umem; 1869 return err; 1870 } 1871 1872 atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); 1873 ib_umem_release(old_umem); 1874 atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); 1875 return 0; 1876 } 1877 1878 struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1879 u64 length, u64 iova, int new_access_flags, 1880 struct ib_pd *new_pd, 1881 struct ib_udata *udata) 1882 { 1883 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1884 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1885 int err; 1886 1887 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct || 1888 mr->mmkey.rb_key.ph != MLX5_IB_NO_PH) 1889 return ERR_PTR(-EOPNOTSUPP); 1890 1891 mlx5_ib_dbg( 1892 dev, 1893 "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1894 start, iova, length, new_access_flags); 1895 1896 if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) 1897 return ERR_PTR(-EOPNOTSUPP); 1898 1899 if (!(flags & IB_MR_REREG_ACCESS)) 1900 new_access_flags = mr->access_flags; 1901 if (!(flags & IB_MR_REREG_PD)) 1902 new_pd = ib_mr->pd; 1903 1904 if (!(flags & IB_MR_REREG_TRANS)) { 1905 struct ib_umem *umem; 1906 1907 /* Fast path for PD/access change */ 1908 if (can_use_umr_rereg_access(dev, mr->access_flags, 1909 new_access_flags)) { 1910 err = mlx5r_umr_rereg_pd_access(mr, new_pd, 1911 new_access_flags); 1912 if (err) 1913 return ERR_PTR(err); 1914 return NULL; 1915 } 1916 /* DM or ODP MR's don't have a normal umem so we can't re-use it */ 1917 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) 1918 goto recreate; 1919 1920 /* 1921 * Only one active MR can refer to a umem at one time, revoke 1922 * the old MR before assigning the umem to the new one. 1923 */ 1924 err = mlx5r_umr_revoke_mr(mr); 1925 if (err) 1926 return ERR_PTR(err); 1927 umem = mr->umem; 1928 mr->umem = NULL; 1929 atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1930 1931 return create_real_mr(new_pd, umem, mr->ibmr.iova, 1932 new_access_flags, NULL); 1933 } 1934 1935 /* 1936 * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does 1937 * but the logic around releasing the umem is different 1938 */ 1939 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) 1940 goto recreate; 1941 1942 if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && 1943 can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { 1944 struct ib_umem *new_umem; 1945 unsigned long page_size; 1946 1947 new_umem = ib_umem_get(&dev->ib_dev, start, length, 1948 new_access_flags); 1949 if (IS_ERR(new_umem)) 1950 return ERR_CAST(new_umem); 1951 1952 /* Fast path for PAS change */ 1953 if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, 1954 &page_size)) { 1955 err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, 1956 new_umem, iova, page_size); 1957 if (err) { 1958 ib_umem_release(new_umem); 1959 return ERR_PTR(err); 1960 } 1961 return NULL; 1962 } 1963 return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL); 1964 } 1965 1966 /* 1967 * Everything else has no state we can preserve, just create a new MR 1968 * from scratch 1969 */ 1970 recreate: 1971 return mlx5_ib_reg_user_mr(new_pd, start, length, iova, 1972 new_access_flags, NULL, udata); 1973 } 1974 1975 static int 1976 mlx5_alloc_priv_descs(struct ib_device *device, 1977 struct mlx5_ib_mr *mr, 1978 int ndescs, 1979 int desc_size) 1980 { 1981 struct mlx5_ib_dev *dev = to_mdev(device); 1982 struct device *ddev = &dev->mdev->pdev->dev; 1983 int size = ndescs * desc_size; 1984 int add_size; 1985 int ret; 1986 1987 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1988 if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) { 1989 int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size)); 1990 1991 add_size = min_t(int, end - size, add_size); 1992 } 1993 1994 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1995 if (!mr->descs_alloc) 1996 return -ENOMEM; 1997 1998 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1999 2000 mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); 2001 if (dma_mapping_error(ddev, mr->desc_map)) { 2002 ret = -ENOMEM; 2003 goto err; 2004 } 2005 2006 return 0; 2007 err: 2008 kfree(mr->descs_alloc); 2009 2010 return ret; 2011 } 2012 2013 static void 2014 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 2015 { 2016 if (!mr->umem && !mr->data_direct && 2017 mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { 2018 struct ib_device *device = mr->ibmr.device; 2019 int size = mr->max_descs * mr->desc_size; 2020 struct mlx5_ib_dev *dev = to_mdev(device); 2021 2022 dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, 2023 DMA_TO_DEVICE); 2024 kfree(mr->descs_alloc); 2025 mr->descs = NULL; 2026 } 2027 } 2028 2029 static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, 2030 struct mlx5_ib_mr *mr) 2031 { 2032 struct mlx5_mkey_cache *cache = &dev->cache; 2033 struct mlx5_cache_ent *ent; 2034 int ret; 2035 2036 if (mr->mmkey.cache_ent) { 2037 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2038 goto end; 2039 } 2040 2041 mutex_lock(&cache->rb_lock); 2042 ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); 2043 if (ent) { 2044 if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { 2045 if (ent->disabled) { 2046 mutex_unlock(&cache->rb_lock); 2047 return -EOPNOTSUPP; 2048 } 2049 mr->mmkey.cache_ent = ent; 2050 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2051 mutex_unlock(&cache->rb_lock); 2052 goto end; 2053 } 2054 } 2055 2056 ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false); 2057 mutex_unlock(&cache->rb_lock); 2058 if (IS_ERR(ent)) 2059 return PTR_ERR(ent); 2060 2061 mr->mmkey.cache_ent = ent; 2062 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2063 2064 end: 2065 ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key); 2066 spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2067 return ret; 2068 } 2069 2070 static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr) 2071 { 2072 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 2073 struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); 2074 int err; 2075 2076 lockdep_assert_held(&dev->data_direct_lock); 2077 mr->revoked = true; 2078 err = mlx5r_umr_revoke_mr(mr); 2079 if (WARN_ON(err)) 2080 return err; 2081 2082 ib_umem_dmabuf_revoke(umem_dmabuf); 2083 return 0; 2084 } 2085 2086 void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) 2087 { 2088 struct mlx5_ib_mr *mr, *next; 2089 2090 lockdep_assert_held(&dev->data_direct_lock); 2091 2092 list_for_each_entry_safe(mr, next, &dev->data_direct_mr_list, dd_node) { 2093 list_del(&mr->dd_node); 2094 mlx5_ib_revoke_data_direct_mr(mr); 2095 } 2096 } 2097 2098 static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) 2099 { 2100 bool is_odp_dma_buf = is_dmabuf_mr(mr) && 2101 !to_ib_umem_dmabuf(mr->umem)->pinned; 2102 bool is_odp = is_odp_mr(mr); 2103 int ret; 2104 2105 if (is_odp) 2106 mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2107 2108 if (is_odp_dma_buf) 2109 dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, 2110 NULL); 2111 2112 ret = mlx5r_umr_revoke_mr(mr); 2113 2114 if (is_odp) { 2115 if (!ret) 2116 to_ib_umem_odp(mr->umem)->private = NULL; 2117 mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2118 } 2119 2120 if (is_odp_dma_buf) { 2121 if (!ret) 2122 to_ib_umem_dmabuf(mr->umem)->private = NULL; 2123 dma_resv_unlock( 2124 to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); 2125 } 2126 2127 return ret; 2128 } 2129 2130 static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) 2131 { 2132 bool is_odp_dma_buf = is_dmabuf_mr(mr) && 2133 !to_ib_umem_dmabuf(mr->umem)->pinned; 2134 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 2135 struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; 2136 bool is_odp = is_odp_mr(mr); 2137 bool from_cache = !!ent; 2138 int ret; 2139 2140 if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && 2141 !cache_ent_find_and_store(dev, mr)) { 2142 ent = mr->mmkey.cache_ent; 2143 /* upon storing to a clean temp entry - schedule its cleanup */ 2144 spin_lock_irq(&ent->mkeys_queue.lock); 2145 if (from_cache) 2146 ent->in_use--; 2147 if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { 2148 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 2149 secs_to_jiffies(30)); 2150 ent->tmp_cleanup_scheduled = true; 2151 } 2152 spin_unlock_irq(&ent->mkeys_queue.lock); 2153 return 0; 2154 } 2155 2156 if (ent) { 2157 spin_lock_irq(&ent->mkeys_queue.lock); 2158 ent->in_use--; 2159 mr->mmkey.cache_ent = NULL; 2160 spin_unlock_irq(&ent->mkeys_queue.lock); 2161 } 2162 2163 if (is_odp) 2164 mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2165 2166 if (is_odp_dma_buf) 2167 dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, 2168 NULL); 2169 ret = destroy_mkey(dev, mr); 2170 if (is_odp) { 2171 if (!ret) 2172 to_ib_umem_odp(mr->umem)->private = NULL; 2173 mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2174 } 2175 2176 if (is_odp_dma_buf) { 2177 if (!ret) 2178 to_ib_umem_dmabuf(mr->umem)->private = NULL; 2179 dma_resv_unlock( 2180 to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); 2181 } 2182 return ret; 2183 } 2184 2185 static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) 2186 { 2187 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2188 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 2189 int rc; 2190 2191 /* 2192 * Any async use of the mr must hold the refcount, once the refcount 2193 * goes to zero no other thread, such as ODP page faults, prefetch, any 2194 * UMR activity, etc can touch the mkey. Thus it is safe to destroy it. 2195 */ 2196 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && 2197 refcount_read(&mr->mmkey.usecount) != 0 && 2198 xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))) 2199 mlx5r_deref_wait_odp_mkey(&mr->mmkey); 2200 2201 if (ibmr->type == IB_MR_TYPE_INTEGRITY) { 2202 xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 2203 mr->sig, NULL, GFP_KERNEL); 2204 2205 if (mr->mtt_mr) { 2206 rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); 2207 if (rc) 2208 return rc; 2209 mr->mtt_mr = NULL; 2210 } 2211 if (mr->klm_mr) { 2212 rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL); 2213 if (rc) 2214 return rc; 2215 mr->klm_mr = NULL; 2216 } 2217 2218 if (mlx5_core_destroy_psv(dev->mdev, 2219 mr->sig->psv_memory.psv_idx)) 2220 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 2221 mr->sig->psv_memory.psv_idx); 2222 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 2223 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 2224 mr->sig->psv_wire.psv_idx); 2225 kfree(mr->sig); 2226 mr->sig = NULL; 2227 } 2228 2229 /* Stop DMA */ 2230 rc = mlx5r_handle_mkey_cleanup(mr); 2231 if (rc) 2232 return rc; 2233 2234 if (mr->umem) { 2235 bool is_odp = is_odp_mr(mr); 2236 2237 if (!is_odp) 2238 atomic_sub(ib_umem_num_pages(mr->umem), 2239 &dev->mdev->priv.reg_pages); 2240 ib_umem_release(mr->umem); 2241 if (is_odp) 2242 mlx5_ib_free_odp_mr(mr); 2243 } 2244 2245 if (!mr->mmkey.cache_ent) 2246 mlx5_free_priv_descs(mr); 2247 2248 kfree(mr); 2249 return 0; 2250 } 2251 2252 static int dereg_crossing_data_direct_mr(struct mlx5_ib_dev *dev, 2253 struct mlx5_ib_mr *mr) 2254 { 2255 struct mlx5_ib_mr *dd_crossed_mr = mr->dd_crossed_mr; 2256 int ret; 2257 2258 ret = __mlx5_ib_dereg_mr(&mr->ibmr); 2259 if (ret) 2260 return ret; 2261 2262 mutex_lock(&dev->data_direct_lock); 2263 if (!dd_crossed_mr->revoked) 2264 list_del(&dd_crossed_mr->dd_node); 2265 2266 ret = __mlx5_ib_dereg_mr(&dd_crossed_mr->ibmr); 2267 mutex_unlock(&dev->data_direct_lock); 2268 return ret; 2269 } 2270 2271 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 2272 { 2273 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2274 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 2275 2276 if (mr->data_direct) 2277 return dereg_crossing_data_direct_mr(dev, mr); 2278 2279 return __mlx5_ib_dereg_mr(ibmr); 2280 } 2281 2282 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, 2283 int access_mode, int page_shift) 2284 { 2285 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2286 void *mkc; 2287 2288 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2289 2290 /* This is only used from the kernel, so setting the PD is OK. */ 2291 set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd); 2292 MLX5_SET(mkc, mkc, free, 1); 2293 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 2294 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 2295 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 2296 MLX5_SET(mkc, mkc, umr_en, 1); 2297 MLX5_SET(mkc, mkc, log_page_size, page_shift); 2298 if (access_mode == MLX5_MKC_ACCESS_MODE_PA || 2299 access_mode == MLX5_MKC_ACCESS_MODE_MTT) 2300 MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats)); 2301 } 2302 2303 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2304 int ndescs, int desc_size, int page_shift, 2305 int access_mode, u32 *in, int inlen) 2306 { 2307 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2308 int err; 2309 2310 mr->access_mode = access_mode; 2311 mr->desc_size = desc_size; 2312 mr->max_descs = ndescs; 2313 2314 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); 2315 if (err) 2316 return err; 2317 2318 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); 2319 2320 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 2321 if (err) 2322 goto err_free_descs; 2323 2324 mr->mmkey.type = MLX5_MKEY_MR; 2325 mr->ibmr.lkey = mr->mmkey.key; 2326 mr->ibmr.rkey = mr->mmkey.key; 2327 2328 return 0; 2329 2330 err_free_descs: 2331 mlx5_free_priv_descs(mr); 2332 return err; 2333 } 2334 2335 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, 2336 u32 max_num_sg, u32 max_num_meta_sg, 2337 int desc_size, int access_mode) 2338 { 2339 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2340 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); 2341 int page_shift = 0; 2342 struct mlx5_ib_mr *mr; 2343 u32 *in; 2344 int err; 2345 2346 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2347 if (!mr) 2348 return ERR_PTR(-ENOMEM); 2349 2350 mr->ibmr.pd = pd; 2351 mr->ibmr.device = pd->device; 2352 2353 in = kzalloc(inlen, GFP_KERNEL); 2354 if (!in) { 2355 err = -ENOMEM; 2356 goto err_free; 2357 } 2358 2359 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) 2360 page_shift = PAGE_SHIFT; 2361 2362 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, 2363 access_mode, in, inlen); 2364 if (err) 2365 goto err_free_in; 2366 2367 mr->umem = NULL; 2368 kfree(in); 2369 2370 return mr; 2371 2372 err_free_in: 2373 kfree(in); 2374 err_free: 2375 kfree(mr); 2376 return ERR_PTR(err); 2377 } 2378 2379 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2380 int ndescs, u32 *in, int inlen) 2381 { 2382 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), 2383 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, 2384 inlen); 2385 } 2386 2387 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2388 int ndescs, u32 *in, int inlen) 2389 { 2390 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), 2391 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 2392 } 2393 2394 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2395 int max_num_sg, int max_num_meta_sg, 2396 u32 *in, int inlen) 2397 { 2398 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2399 u32 psv_index[2]; 2400 void *mkc; 2401 int err; 2402 2403 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 2404 if (!mr->sig) 2405 return -ENOMEM; 2406 2407 /* create mem & wire PSVs */ 2408 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); 2409 if (err) 2410 goto err_free_sig; 2411 2412 mr->sig->psv_memory.psv_idx = psv_index[0]; 2413 mr->sig->psv_wire.psv_idx = psv_index[1]; 2414 2415 mr->sig->sig_status_checked = true; 2416 mr->sig->sig_err_exists = false; 2417 /* Next UMR, Arm SIGERR */ 2418 ++mr->sig->sigerr_count; 2419 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 2420 sizeof(struct mlx5_klm), 2421 MLX5_MKC_ACCESS_MODE_KLMS); 2422 if (IS_ERR(mr->klm_mr)) { 2423 err = PTR_ERR(mr->klm_mr); 2424 goto err_destroy_psv; 2425 } 2426 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 2427 sizeof(struct mlx5_mtt), 2428 MLX5_MKC_ACCESS_MODE_MTT); 2429 if (IS_ERR(mr->mtt_mr)) { 2430 err = PTR_ERR(mr->mtt_mr); 2431 goto err_free_klm_mr; 2432 } 2433 2434 /* Set bsf descriptors for mkey */ 2435 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2436 MLX5_SET(mkc, mkc, bsf_en, 1); 2437 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 2438 2439 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, 2440 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 2441 if (err) 2442 goto err_free_mtt_mr; 2443 2444 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 2445 mr->sig, GFP_KERNEL)); 2446 if (err) 2447 goto err_free_descs; 2448 return 0; 2449 2450 err_free_descs: 2451 destroy_mkey(dev, mr); 2452 mlx5_free_priv_descs(mr); 2453 err_free_mtt_mr: 2454 mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); 2455 mr->mtt_mr = NULL; 2456 err_free_klm_mr: 2457 mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL); 2458 mr->klm_mr = NULL; 2459 err_destroy_psv: 2460 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) 2461 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 2462 mr->sig->psv_memory.psv_idx); 2463 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 2464 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 2465 mr->sig->psv_wire.psv_idx); 2466 err_free_sig: 2467 kfree(mr->sig); 2468 2469 return err; 2470 } 2471 2472 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, 2473 enum ib_mr_type mr_type, u32 max_num_sg, 2474 u32 max_num_meta_sg) 2475 { 2476 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2477 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2478 int ndescs = ALIGN(max_num_sg, 4); 2479 struct mlx5_ib_mr *mr; 2480 u32 *in; 2481 int err; 2482 2483 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2484 if (!mr) 2485 return ERR_PTR(-ENOMEM); 2486 2487 in = kzalloc(inlen, GFP_KERNEL); 2488 if (!in) { 2489 err = -ENOMEM; 2490 goto err_free; 2491 } 2492 2493 mr->ibmr.device = pd->device; 2494 mr->umem = NULL; 2495 2496 switch (mr_type) { 2497 case IB_MR_TYPE_MEM_REG: 2498 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); 2499 break; 2500 case IB_MR_TYPE_SG_GAPS: 2501 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); 2502 break; 2503 case IB_MR_TYPE_INTEGRITY: 2504 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, 2505 max_num_meta_sg, in, inlen); 2506 break; 2507 default: 2508 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 2509 err = -EINVAL; 2510 } 2511 2512 if (err) 2513 goto err_free_in; 2514 2515 kfree(in); 2516 2517 return &mr->ibmr; 2518 2519 err_free_in: 2520 kfree(in); 2521 err_free: 2522 kfree(mr); 2523 return ERR_PTR(err); 2524 } 2525 2526 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 2527 u32 max_num_sg) 2528 { 2529 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); 2530 } 2531 2532 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, 2533 u32 max_num_sg, u32 max_num_meta_sg) 2534 { 2535 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, 2536 max_num_meta_sg); 2537 } 2538 2539 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 2540 { 2541 struct mlx5_ib_dev *dev = to_mdev(ibmw->device); 2542 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2543 struct mlx5_ib_mw *mw = to_mmw(ibmw); 2544 unsigned int ndescs; 2545 u32 *in = NULL; 2546 void *mkc; 2547 int err; 2548 struct mlx5_ib_alloc_mw req = {}; 2549 struct { 2550 __u32 comp_mask; 2551 __u32 response_length; 2552 } resp = {}; 2553 2554 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 2555 if (err) 2556 return err; 2557 2558 if (req.comp_mask || req.reserved1 || req.reserved2) 2559 return -EOPNOTSUPP; 2560 2561 if (udata->inlen > sizeof(req) && 2562 !ib_is_udata_cleared(udata, sizeof(req), 2563 udata->inlen - sizeof(req))) 2564 return -EOPNOTSUPP; 2565 2566 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 2567 2568 in = kzalloc(inlen, GFP_KERNEL); 2569 if (!in) 2570 return -ENOMEM; 2571 2572 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2573 2574 MLX5_SET(mkc, mkc, free, 1); 2575 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 2576 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn); 2577 MLX5_SET(mkc, mkc, umr_en, 1); 2578 MLX5_SET(mkc, mkc, lr, 1); 2579 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); 2580 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2))); 2581 MLX5_SET(mkc, mkc, qpn, 0xffffff); 2582 2583 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); 2584 if (err) 2585 goto free; 2586 2587 mw->mmkey.type = MLX5_MKEY_MW; 2588 ibmw->rkey = mw->mmkey.key; 2589 mw->mmkey.ndescs = ndescs; 2590 2591 resp.response_length = 2592 min(offsetofend(typeof(resp), response_length), udata->outlen); 2593 if (resp.response_length) { 2594 err = ib_copy_to_udata(udata, &resp, resp.response_length); 2595 if (err) 2596 goto free_mkey; 2597 } 2598 2599 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 2600 err = mlx5r_store_odp_mkey(dev, &mw->mmkey); 2601 if (err) 2602 goto free_mkey; 2603 } 2604 2605 kfree(in); 2606 return 0; 2607 2608 free_mkey: 2609 mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key); 2610 free: 2611 kfree(in); 2612 return err; 2613 } 2614 2615 int mlx5_ib_dealloc_mw(struct ib_mw *mw) 2616 { 2617 struct mlx5_ib_dev *dev = to_mdev(mw->device); 2618 struct mlx5_ib_mw *mmw = to_mmw(mw); 2619 2620 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && 2621 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) 2622 /* 2623 * pagefault_single_data_segment() may be accessing mmw 2624 * if the user bound an ODP MR to this MW. 2625 */ 2626 mlx5r_deref_wait_odp_mkey(&mmw->mmkey); 2627 2628 return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key); 2629 } 2630 2631 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 2632 struct ib_mr_status *mr_status) 2633 { 2634 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 2635 int ret = 0; 2636 2637 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 2638 pr_err("Invalid status check mask\n"); 2639 ret = -EINVAL; 2640 goto done; 2641 } 2642 2643 mr_status->fail_status = 0; 2644 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 2645 if (!mmr->sig) { 2646 ret = -EINVAL; 2647 pr_err("signature status check requested on a non-signature enabled MR\n"); 2648 goto done; 2649 } 2650 2651 mmr->sig->sig_status_checked = true; 2652 if (!mmr->sig->sig_err_exists) 2653 goto done; 2654 2655 if (ibmr->lkey == mmr->sig->err_item.key) 2656 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 2657 sizeof(mr_status->sig_err)); 2658 else { 2659 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 2660 mr_status->sig_err.sig_err_offset = 0; 2661 mr_status->sig_err.key = mmr->sig->err_item.key; 2662 } 2663 2664 mmr->sig->sig_err_exists = false; 2665 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 2666 } 2667 2668 done: 2669 return ret; 2670 } 2671 2672 static int 2673 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2674 int data_sg_nents, unsigned int *data_sg_offset, 2675 struct scatterlist *meta_sg, int meta_sg_nents, 2676 unsigned int *meta_sg_offset) 2677 { 2678 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2679 unsigned int sg_offset = 0; 2680 int n = 0; 2681 2682 mr->meta_length = 0; 2683 if (data_sg_nents == 1) { 2684 n++; 2685 mr->mmkey.ndescs = 1; 2686 if (data_sg_offset) 2687 sg_offset = *data_sg_offset; 2688 mr->data_length = sg_dma_len(data_sg) - sg_offset; 2689 mr->data_iova = sg_dma_address(data_sg) + sg_offset; 2690 if (meta_sg_nents == 1) { 2691 n++; 2692 mr->meta_ndescs = 1; 2693 if (meta_sg_offset) 2694 sg_offset = *meta_sg_offset; 2695 else 2696 sg_offset = 0; 2697 mr->meta_length = sg_dma_len(meta_sg) - sg_offset; 2698 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; 2699 } 2700 ibmr->length = mr->data_length + mr->meta_length; 2701 } 2702 2703 return n; 2704 } 2705 2706 static int 2707 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 2708 struct scatterlist *sgl, 2709 unsigned short sg_nents, 2710 unsigned int *sg_offset_p, 2711 struct scatterlist *meta_sgl, 2712 unsigned short meta_sg_nents, 2713 unsigned int *meta_sg_offset_p) 2714 { 2715 struct scatterlist *sg = sgl; 2716 struct mlx5_klm *klms = mr->descs; 2717 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2718 u32 lkey = mr->ibmr.pd->local_dma_lkey; 2719 int i, j = 0; 2720 2721 mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 2722 mr->ibmr.length = 0; 2723 2724 for_each_sg(sgl, sg, sg_nents, i) { 2725 if (unlikely(i >= mr->max_descs)) 2726 break; 2727 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 2728 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 2729 klms[i].key = cpu_to_be32(lkey); 2730 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2731 2732 sg_offset = 0; 2733 } 2734 2735 if (sg_offset_p) 2736 *sg_offset_p = sg_offset; 2737 2738 mr->mmkey.ndescs = i; 2739 mr->data_length = mr->ibmr.length; 2740 2741 if (meta_sg_nents) { 2742 sg = meta_sgl; 2743 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; 2744 for_each_sg(meta_sgl, sg, meta_sg_nents, j) { 2745 if (unlikely(i + j >= mr->max_descs)) 2746 break; 2747 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + 2748 sg_offset); 2749 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - 2750 sg_offset); 2751 klms[i + j].key = cpu_to_be32(lkey); 2752 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2753 2754 sg_offset = 0; 2755 } 2756 if (meta_sg_offset_p) 2757 *meta_sg_offset_p = sg_offset; 2758 2759 mr->meta_ndescs = j; 2760 mr->meta_length = mr->ibmr.length - mr->data_length; 2761 } 2762 2763 return i + j; 2764 } 2765 2766 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 2767 { 2768 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2769 __be64 *descs; 2770 2771 if (unlikely(mr->mmkey.ndescs == mr->max_descs)) 2772 return -ENOMEM; 2773 2774 descs = mr->descs; 2775 descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2776 2777 return 0; 2778 } 2779 2780 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) 2781 { 2782 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2783 __be64 *descs; 2784 2785 if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs)) 2786 return -ENOMEM; 2787 2788 descs = mr->descs; 2789 descs[mr->mmkey.ndescs + mr->meta_ndescs++] = 2790 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2791 2792 return 0; 2793 } 2794 2795 static int 2796 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2797 int data_sg_nents, unsigned int *data_sg_offset, 2798 struct scatterlist *meta_sg, int meta_sg_nents, 2799 unsigned int *meta_sg_offset) 2800 { 2801 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2802 struct mlx5_ib_mr *pi_mr = mr->mtt_mr; 2803 int n; 2804 2805 pi_mr->mmkey.ndescs = 0; 2806 pi_mr->meta_ndescs = 0; 2807 pi_mr->meta_length = 0; 2808 2809 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2810 pi_mr->desc_size * pi_mr->max_descs, 2811 DMA_TO_DEVICE); 2812 2813 pi_mr->ibmr.page_size = ibmr->page_size; 2814 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, 2815 mlx5_set_page); 2816 if (n != data_sg_nents) 2817 return n; 2818 2819 pi_mr->data_iova = pi_mr->ibmr.iova; 2820 pi_mr->data_length = pi_mr->ibmr.length; 2821 pi_mr->ibmr.length = pi_mr->data_length; 2822 ibmr->length = pi_mr->data_length; 2823 2824 if (meta_sg_nents) { 2825 u64 page_mask = ~((u64)ibmr->page_size - 1); 2826 u64 iova = pi_mr->data_iova; 2827 2828 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, 2829 meta_sg_offset, mlx5_set_page_pi); 2830 2831 pi_mr->meta_length = pi_mr->ibmr.length; 2832 /* 2833 * PI address for the HW is the offset of the metadata address 2834 * relative to the first data page address. 2835 * It equals to first data page address + size of data pages + 2836 * metadata offset at the first metadata page 2837 */ 2838 pi_mr->pi_iova = (iova & page_mask) + 2839 pi_mr->mmkey.ndescs * ibmr->page_size + 2840 (pi_mr->ibmr.iova & ~page_mask); 2841 /* 2842 * In order to use one MTT MR for data and metadata, we register 2843 * also the gaps between the end of the data and the start of 2844 * the metadata (the sig MR will verify that the HW will access 2845 * to right addresses). This mapping is safe because we use 2846 * internal mkey for the registration. 2847 */ 2848 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; 2849 pi_mr->ibmr.iova = iova; 2850 ibmr->length += pi_mr->meta_length; 2851 } 2852 2853 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2854 pi_mr->desc_size * pi_mr->max_descs, 2855 DMA_TO_DEVICE); 2856 2857 return n; 2858 } 2859 2860 static int 2861 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2862 int data_sg_nents, unsigned int *data_sg_offset, 2863 struct scatterlist *meta_sg, int meta_sg_nents, 2864 unsigned int *meta_sg_offset) 2865 { 2866 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2867 struct mlx5_ib_mr *pi_mr = mr->klm_mr; 2868 int n; 2869 2870 pi_mr->mmkey.ndescs = 0; 2871 pi_mr->meta_ndescs = 0; 2872 pi_mr->meta_length = 0; 2873 2874 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2875 pi_mr->desc_size * pi_mr->max_descs, 2876 DMA_TO_DEVICE); 2877 2878 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, 2879 meta_sg, meta_sg_nents, meta_sg_offset); 2880 2881 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2882 pi_mr->desc_size * pi_mr->max_descs, 2883 DMA_TO_DEVICE); 2884 2885 /* This is zero-based memory region */ 2886 pi_mr->data_iova = 0; 2887 pi_mr->ibmr.iova = 0; 2888 pi_mr->pi_iova = pi_mr->data_length; 2889 ibmr->length = pi_mr->ibmr.length; 2890 2891 return n; 2892 } 2893 2894 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2895 int data_sg_nents, unsigned int *data_sg_offset, 2896 struct scatterlist *meta_sg, int meta_sg_nents, 2897 unsigned int *meta_sg_offset) 2898 { 2899 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2900 struct mlx5_ib_mr *pi_mr = NULL; 2901 int n; 2902 2903 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); 2904 2905 mr->mmkey.ndescs = 0; 2906 mr->data_length = 0; 2907 mr->data_iova = 0; 2908 mr->meta_ndescs = 0; 2909 mr->pi_iova = 0; 2910 /* 2911 * As a performance optimization, if possible, there is no need to 2912 * perform UMR operation to register the data/metadata buffers. 2913 * First try to map the sg lists to PA descriptors with local_dma_lkey. 2914 * Fallback to UMR only in case of a failure. 2915 */ 2916 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2917 data_sg_offset, meta_sg, meta_sg_nents, 2918 meta_sg_offset); 2919 if (n == data_sg_nents + meta_sg_nents) 2920 goto out; 2921 /* 2922 * As a performance optimization, if possible, there is no need to map 2923 * the sg lists to KLM descriptors. First try to map the sg lists to MTT 2924 * descriptors and fallback to KLM only in case of a failure. 2925 * It's more efficient for the HW to work with MTT descriptors 2926 * (especially in high load). 2927 * Use KLM (indirect access) only if it's mandatory. 2928 */ 2929 pi_mr = mr->mtt_mr; 2930 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2931 data_sg_offset, meta_sg, meta_sg_nents, 2932 meta_sg_offset); 2933 if (n == data_sg_nents + meta_sg_nents) 2934 goto out; 2935 2936 pi_mr = mr->klm_mr; 2937 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2938 data_sg_offset, meta_sg, meta_sg_nents, 2939 meta_sg_offset); 2940 if (unlikely(n != data_sg_nents + meta_sg_nents)) 2941 return -ENOMEM; 2942 2943 out: 2944 /* This is zero-based memory region */ 2945 ibmr->iova = 0; 2946 mr->pi_mr = pi_mr; 2947 if (pi_mr) 2948 ibmr->sig_attrs->meta_length = pi_mr->meta_length; 2949 else 2950 ibmr->sig_attrs->meta_length = mr->meta_length; 2951 2952 return 0; 2953 } 2954 2955 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 2956 unsigned int *sg_offset) 2957 { 2958 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2959 int n; 2960 2961 mr->mmkey.ndescs = 0; 2962 2963 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 2964 mr->desc_size * mr->max_descs, 2965 DMA_TO_DEVICE); 2966 2967 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 2968 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, 2969 NULL); 2970 else 2971 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 2972 mlx5_set_page); 2973 2974 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 2975 mr->desc_size * mr->max_descs, 2976 DMA_TO_DEVICE); 2977 2978 return n; 2979 } 2980