1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * Copyright (c) 2020, Intel Corporation. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 35 #include <linux/kref.h> 36 #include <linux/random.h> 37 #include <linux/debugfs.h> 38 #include <linux/export.h> 39 #include <linux/delay.h> 40 #include <linux/dma-buf.h> 41 #include <linux/dma-resv.h> 42 #include <rdma/ib_umem_odp.h> 43 #include "dm.h" 44 #include "mlx5_ib.h" 45 #include "umr.h" 46 #include "data_direct.h" 47 #include "dmah.h" 48 49 enum { 50 MAX_PENDING_REG_MR = 8, 51 }; 52 53 #define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 54 #define MLX5_UMR_ALIGN 2048 55 56 static void 57 create_mkey_callback(int status, struct mlx5_async_work *context); 58 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 59 u64 iova, int access_flags, 60 unsigned long page_size, bool populate, 61 int access_mode, u16 st_index, u8 ph); 62 static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr); 63 64 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, 65 struct ib_pd *pd) 66 { 67 struct mlx5_ib_dev *dev = to_mdev(pd->device); 68 69 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 70 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 71 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 72 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 73 MLX5_SET(mkc, mkc, lr, 1); 74 75 if (acc & IB_ACCESS_RELAXED_ORDERING) { 76 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) 77 MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); 78 79 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 80 (MLX5_CAP_GEN(dev->mdev, 81 relaxed_ordering_read_pci_enabled) && 82 pcie_relaxed_ordering_enabled(dev->mdev->pdev))) 83 MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); 84 } 85 86 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 87 MLX5_SET(mkc, mkc, qpn, 0xffffff); 88 MLX5_SET64(mkc, mkc, start_addr, start_addr); 89 } 90 91 static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in) 92 { 93 u8 key = atomic_inc_return(&dev->mkey_var); 94 void *mkc; 95 96 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 97 MLX5_SET(mkc, mkc, mkey_7_0, key); 98 *mkey = key; 99 } 100 101 static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, 102 struct mlx5_ib_mkey *mkey, u32 *in, int inlen) 103 { 104 int ret; 105 106 assign_mkey_variant(dev, &mkey->key, in); 107 ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen); 108 if (!ret) 109 init_waitqueue_head(&mkey->wait); 110 111 return ret; 112 } 113 114 static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create) 115 { 116 struct mlx5_ib_dev *dev = async_create->ent->dev; 117 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 118 size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out); 119 120 MLX5_SET(create_mkey_in, async_create->in, opcode, 121 MLX5_CMD_OP_CREATE_MKEY); 122 assign_mkey_variant(dev, &async_create->mkey, async_create->in); 123 return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen, 124 async_create->out, outlen, create_mkey_callback, 125 &async_create->cb_work); 126 } 127 128 static int mkey_cache_max_order(struct mlx5_ib_dev *dev); 129 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); 130 131 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 132 { 133 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); 134 135 return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); 136 } 137 138 static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) 139 { 140 if (status == -ENXIO) /* core driver is not available */ 141 return; 142 143 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 144 if (status != -EREMOTEIO) /* driver specific failure */ 145 return; 146 147 /* Failed in FW, print cmd out failure details */ 148 mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); 149 } 150 151 static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey) 152 { 153 unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE; 154 struct mlx5_mkeys_page *page; 155 156 lockdep_assert_held(&ent->mkeys_queue.lock); 157 if (ent->mkeys_queue.ci >= 158 ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) { 159 page = kzalloc_obj(*page, GFP_ATOMIC); 160 if (!page) 161 return -ENOMEM; 162 ent->mkeys_queue.num_pages++; 163 list_add_tail(&page->list, &ent->mkeys_queue.pages_list); 164 } else { 165 page = list_last_entry(&ent->mkeys_queue.pages_list, 166 struct mlx5_mkeys_page, list); 167 } 168 169 page->mkeys[tmp] = mkey; 170 ent->mkeys_queue.ci++; 171 return 0; 172 } 173 174 static int pop_mkey_locked(struct mlx5_cache_ent *ent) 175 { 176 unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE; 177 struct mlx5_mkeys_page *last_page; 178 u32 mkey; 179 180 lockdep_assert_held(&ent->mkeys_queue.lock); 181 last_page = list_last_entry(&ent->mkeys_queue.pages_list, 182 struct mlx5_mkeys_page, list); 183 mkey = last_page->mkeys[tmp]; 184 last_page->mkeys[tmp] = 0; 185 ent->mkeys_queue.ci--; 186 if (ent->mkeys_queue.num_pages > 1 && !tmp) { 187 list_del(&last_page->list); 188 ent->mkeys_queue.num_pages--; 189 kfree(last_page); 190 } 191 return mkey; 192 } 193 194 static void create_mkey_callback(int status, struct mlx5_async_work *context) 195 { 196 struct mlx5r_async_create_mkey *mkey_out = 197 container_of(context, struct mlx5r_async_create_mkey, cb_work); 198 struct mlx5_cache_ent *ent = mkey_out->ent; 199 struct mlx5_ib_dev *dev = ent->dev; 200 unsigned long flags; 201 202 if (status) { 203 create_mkey_warn(dev, status, mkey_out->out); 204 kfree(mkey_out); 205 spin_lock_irqsave(&ent->mkeys_queue.lock, flags); 206 ent->pending--; 207 WRITE_ONCE(dev->fill_delay, 1); 208 spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); 209 mod_timer(&dev->delay_timer, jiffies + HZ); 210 return; 211 } 212 213 mkey_out->mkey |= mlx5_idx_to_mkey( 214 MLX5_GET(create_mkey_out, mkey_out->out, mkey_index)); 215 WRITE_ONCE(dev->cache.last_add, jiffies); 216 217 spin_lock_irqsave(&ent->mkeys_queue.lock, flags); 218 push_mkey_locked(ent, mkey_out->mkey); 219 ent->pending--; 220 /* If we are doing fill_to_high_water then keep going. */ 221 queue_adjust_cache_locked(ent); 222 spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); 223 kfree(mkey_out); 224 } 225 226 static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) 227 { 228 int ret = 0; 229 230 switch (access_mode) { 231 case MLX5_MKC_ACCESS_MODE_MTT: 232 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / 233 sizeof(struct mlx5_mtt)); 234 break; 235 case MLX5_MKC_ACCESS_MODE_KSM: 236 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / 237 sizeof(struct mlx5_klm)); 238 break; 239 default: 240 WARN_ON(1); 241 } 242 return ret; 243 } 244 245 static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) 246 { 247 set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, 248 ent->dev->umrc.pd); 249 MLX5_SET(mkc, mkc, free, 1); 250 MLX5_SET(mkc, mkc, umr_en, 1); 251 MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); 252 MLX5_SET(mkc, mkc, access_mode_4_2, 253 (ent->rb_key.access_mode >> 2) & 0x7); 254 MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); 255 256 MLX5_SET(mkc, mkc, translations_octword_size, 257 get_mkc_octo_size(ent->rb_key.access_mode, 258 ent->rb_key.ndescs)); 259 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 260 261 if (ent->rb_key.ph != MLX5_IB_NO_PH) { 262 MLX5_SET(mkc, mkc, pcie_tph_en, 1); 263 MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph); 264 if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) 265 MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, 266 ent->rb_key.st_index); 267 } 268 } 269 270 /* Asynchronously schedule new MRs to be populated in the cache. */ 271 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) 272 { 273 struct mlx5r_async_create_mkey *async_create; 274 void *mkc; 275 int err = 0; 276 int i; 277 278 for (i = 0; i < num; i++) { 279 async_create = kzalloc_obj(struct mlx5r_async_create_mkey); 280 if (!async_create) 281 return -ENOMEM; 282 mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in, 283 memory_key_mkey_entry); 284 set_cache_mkc(ent, mkc); 285 async_create->ent = ent; 286 287 spin_lock_irq(&ent->mkeys_queue.lock); 288 if (ent->pending >= MAX_PENDING_REG_MR) { 289 err = -EAGAIN; 290 goto free_async_create; 291 } 292 ent->pending++; 293 spin_unlock_irq(&ent->mkeys_queue.lock); 294 295 err = mlx5_ib_create_mkey_cb(async_create); 296 if (err) { 297 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); 298 goto err_create_mkey; 299 } 300 } 301 302 return 0; 303 304 err_create_mkey: 305 spin_lock_irq(&ent->mkeys_queue.lock); 306 ent->pending--; 307 free_async_create: 308 spin_unlock_irq(&ent->mkeys_queue.lock); 309 kfree(async_create); 310 return err; 311 } 312 313 /* Synchronously create a MR in the cache */ 314 static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey) 315 { 316 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 317 void *mkc; 318 u32 *in; 319 int err; 320 321 in = kzalloc(inlen, GFP_KERNEL); 322 if (!in) 323 return -ENOMEM; 324 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 325 set_cache_mkc(ent, mkc); 326 327 err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen); 328 if (err) 329 goto free_in; 330 331 WRITE_ONCE(ent->dev->cache.last_add, jiffies); 332 free_in: 333 kfree(in); 334 return err; 335 } 336 337 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) 338 { 339 u32 mkey; 340 341 lockdep_assert_held(&ent->mkeys_queue.lock); 342 if (!ent->mkeys_queue.ci) 343 return; 344 mkey = pop_mkey_locked(ent); 345 spin_unlock_irq(&ent->mkeys_queue.lock); 346 mlx5_core_destroy_mkey(ent->dev->mdev, mkey); 347 spin_lock_irq(&ent->mkeys_queue.lock); 348 } 349 350 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, 351 bool limit_fill) 352 __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock) 353 { 354 int err; 355 356 lockdep_assert_held(&ent->mkeys_queue.lock); 357 358 while (true) { 359 if (limit_fill) 360 target = ent->limit * 2; 361 if (target == ent->pending + ent->mkeys_queue.ci) 362 return 0; 363 if (target > ent->pending + ent->mkeys_queue.ci) { 364 u32 todo = target - (ent->pending + ent->mkeys_queue.ci); 365 366 spin_unlock_irq(&ent->mkeys_queue.lock); 367 err = add_keys(ent, todo); 368 if (err == -EAGAIN) 369 usleep_range(3000, 5000); 370 spin_lock_irq(&ent->mkeys_queue.lock); 371 if (err) { 372 if (err != -EAGAIN) 373 return err; 374 } else 375 return 0; 376 } else { 377 remove_cache_mr_locked(ent); 378 } 379 } 380 } 381 382 static ssize_t size_write(struct file *filp, const char __user *buf, 383 size_t count, loff_t *pos) 384 { 385 struct mlx5_cache_ent *ent = filp->private_data; 386 u32 target; 387 int err; 388 389 err = kstrtou32_from_user(buf, count, 0, &target); 390 if (err) 391 return err; 392 393 /* 394 * Target is the new value of total_mrs the user requests, however we 395 * cannot free MRs that are in use. Compute the target value for stored 396 * mkeys. 397 */ 398 spin_lock_irq(&ent->mkeys_queue.lock); 399 if (target < ent->in_use) { 400 err = -EINVAL; 401 goto err_unlock; 402 } 403 target = target - ent->in_use; 404 if (target < ent->limit || target > ent->limit*2) { 405 err = -EINVAL; 406 goto err_unlock; 407 } 408 err = resize_available_mrs(ent, target, false); 409 if (err) 410 goto err_unlock; 411 spin_unlock_irq(&ent->mkeys_queue.lock); 412 413 return count; 414 415 err_unlock: 416 spin_unlock_irq(&ent->mkeys_queue.lock); 417 return err; 418 } 419 420 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 421 loff_t *pos) 422 { 423 struct mlx5_cache_ent *ent = filp->private_data; 424 char lbuf[20]; 425 int err; 426 427 err = snprintf(lbuf, sizeof(lbuf), "%ld\n", 428 ent->mkeys_queue.ci + ent->in_use); 429 if (err < 0) 430 return err; 431 432 return simple_read_from_buffer(buf, count, pos, lbuf, err); 433 } 434 435 static const struct file_operations size_fops = { 436 .owner = THIS_MODULE, 437 .open = simple_open, 438 .write = size_write, 439 .read = size_read, 440 }; 441 442 static ssize_t limit_write(struct file *filp, const char __user *buf, 443 size_t count, loff_t *pos) 444 { 445 struct mlx5_cache_ent *ent = filp->private_data; 446 u32 var; 447 int err; 448 449 err = kstrtou32_from_user(buf, count, 0, &var); 450 if (err) 451 return err; 452 453 /* 454 * Upon set we immediately fill the cache to high water mark implied by 455 * the limit. 456 */ 457 spin_lock_irq(&ent->mkeys_queue.lock); 458 ent->limit = var; 459 err = resize_available_mrs(ent, 0, true); 460 spin_unlock_irq(&ent->mkeys_queue.lock); 461 if (err) 462 return err; 463 return count; 464 } 465 466 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 467 loff_t *pos) 468 { 469 struct mlx5_cache_ent *ent = filp->private_data; 470 char lbuf[20]; 471 int err; 472 473 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 474 if (err < 0) 475 return err; 476 477 return simple_read_from_buffer(buf, count, pos, lbuf, err); 478 } 479 480 static const struct file_operations limit_fops = { 481 .owner = THIS_MODULE, 482 .open = simple_open, 483 .write = limit_write, 484 .read = limit_read, 485 }; 486 487 static bool someone_adding(struct mlx5_mkey_cache *cache) 488 { 489 struct mlx5_cache_ent *ent; 490 struct rb_node *node; 491 bool ret; 492 493 mutex_lock(&cache->rb_lock); 494 for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { 495 ent = rb_entry(node, struct mlx5_cache_ent, node); 496 spin_lock_irq(&ent->mkeys_queue.lock); 497 ret = ent->mkeys_queue.ci < ent->limit; 498 spin_unlock_irq(&ent->mkeys_queue.lock); 499 if (ret) { 500 mutex_unlock(&cache->rb_lock); 501 return true; 502 } 503 } 504 mutex_unlock(&cache->rb_lock); 505 return false; 506 } 507 508 /* 509 * Check if the bucket is outside the high/low water mark and schedule an async 510 * update. The cache refill has hysteresis, once the low water mark is hit it is 511 * refilled up to the high mark. 512 */ 513 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) 514 { 515 lockdep_assert_held(&ent->mkeys_queue.lock); 516 517 if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) 518 return; 519 if (ent->mkeys_queue.ci < ent->limit) { 520 ent->fill_to_high_water = true; 521 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 522 } else if (ent->fill_to_high_water && 523 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) { 524 /* 525 * Once we start populating due to hitting a low water mark 526 * continue until we pass the high water mark. 527 */ 528 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 529 } else if (ent->mkeys_queue.ci == 2 * ent->limit) { 530 ent->fill_to_high_water = false; 531 } else if (ent->mkeys_queue.ci > 2 * ent->limit) { 532 /* Queue deletion of excess entries */ 533 ent->fill_to_high_water = false; 534 if (ent->pending) 535 queue_delayed_work(ent->dev->cache.wq, &ent->dwork, 536 secs_to_jiffies(1)); 537 else 538 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 539 } 540 } 541 542 static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) 543 { 544 u32 mkey; 545 546 spin_lock_irq(&ent->mkeys_queue.lock); 547 while (ent->mkeys_queue.ci) { 548 mkey = pop_mkey_locked(ent); 549 spin_unlock_irq(&ent->mkeys_queue.lock); 550 mlx5_core_destroy_mkey(dev->mdev, mkey); 551 spin_lock_irq(&ent->mkeys_queue.lock); 552 } 553 ent->tmp_cleanup_scheduled = false; 554 spin_unlock_irq(&ent->mkeys_queue.lock); 555 } 556 557 static void __cache_work_func(struct mlx5_cache_ent *ent) 558 { 559 struct mlx5_ib_dev *dev = ent->dev; 560 struct mlx5_mkey_cache *cache = &dev->cache; 561 int err; 562 563 spin_lock_irq(&ent->mkeys_queue.lock); 564 if (ent->disabled) 565 goto out; 566 567 if (ent->fill_to_high_water && 568 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit && 569 !READ_ONCE(dev->fill_delay)) { 570 spin_unlock_irq(&ent->mkeys_queue.lock); 571 err = add_keys(ent, 1); 572 spin_lock_irq(&ent->mkeys_queue.lock); 573 if (ent->disabled) 574 goto out; 575 if (err) { 576 /* 577 * EAGAIN only happens if there are pending MRs, so we 578 * will be rescheduled when storing them. The only 579 * failure path here is ENOMEM. 580 */ 581 if (err != -EAGAIN) { 582 mlx5_ib_warn( 583 dev, 584 "add keys command failed, err %d\n", 585 err); 586 queue_delayed_work(cache->wq, &ent->dwork, 587 secs_to_jiffies(1)); 588 } 589 } 590 } else if (ent->mkeys_queue.ci > 2 * ent->limit) { 591 bool need_delay; 592 593 /* 594 * The remove_cache_mr() logic is performed as garbage 595 * collection task. Such task is intended to be run when no 596 * other active processes are running. 597 * 598 * The need_resched() will return TRUE if there are user tasks 599 * to be activated in near future. 600 * 601 * In such case, we don't execute remove_cache_mr() and postpone 602 * the garbage collection work to try to run in next cycle, in 603 * order to free CPU resources to other tasks. 604 */ 605 spin_unlock_irq(&ent->mkeys_queue.lock); 606 need_delay = need_resched() || someone_adding(cache) || 607 !time_after(jiffies, 608 READ_ONCE(cache->last_add) + 300 * HZ); 609 spin_lock_irq(&ent->mkeys_queue.lock); 610 if (ent->disabled) 611 goto out; 612 if (need_delay) { 613 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 614 goto out; 615 } 616 remove_cache_mr_locked(ent); 617 queue_adjust_cache_locked(ent); 618 } 619 out: 620 spin_unlock_irq(&ent->mkeys_queue.lock); 621 } 622 623 static void delayed_cache_work_func(struct work_struct *work) 624 { 625 struct mlx5_cache_ent *ent; 626 627 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 628 /* temp entries are never filled, only cleaned */ 629 if (ent->is_tmp) 630 clean_keys(ent->dev, ent); 631 else 632 __cache_work_func(ent); 633 } 634 635 static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, 636 struct mlx5r_cache_rb_key key2) 637 { 638 int res; 639 640 res = key1.ats - key2.ats; 641 if (res) 642 return res; 643 644 res = key1.access_mode - key2.access_mode; 645 if (res) 646 return res; 647 648 res = key1.access_flags - key2.access_flags; 649 if (res) 650 return res; 651 652 res = key1.st_index - key2.st_index; 653 if (res) 654 return res; 655 656 res = key1.ph - key2.ph; 657 if (res) 658 return res; 659 660 /* 661 * keep ndescs the last in the compare table since the find function 662 * searches for an exact match on all properties and only closest 663 * match in size. 664 */ 665 return key1.ndescs - key2.ndescs; 666 } 667 668 static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, 669 struct mlx5_cache_ent *ent) 670 { 671 struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; 672 struct mlx5_cache_ent *cur; 673 int cmp; 674 675 /* Figure out where to put new node */ 676 while (*new) { 677 cur = rb_entry(*new, struct mlx5_cache_ent, node); 678 parent = *new; 679 cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key); 680 if (cmp > 0) 681 new = &((*new)->rb_left); 682 if (cmp < 0) 683 new = &((*new)->rb_right); 684 if (cmp == 0) 685 return -EEXIST; 686 } 687 688 /* Add new node and rebalance tree. */ 689 rb_link_node(&ent->node, parent, new); 690 rb_insert_color(&ent->node, &cache->rb_root); 691 692 return 0; 693 } 694 695 static struct mlx5_cache_ent * 696 mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, 697 struct mlx5r_cache_rb_key rb_key) 698 { 699 struct rb_node *node = dev->cache.rb_root.rb_node; 700 struct mlx5_cache_ent *cur, *smallest = NULL; 701 u64 ndescs_limit; 702 int cmp; 703 704 /* 705 * Find the smallest ent with order >= requested_order. 706 */ 707 while (node) { 708 cur = rb_entry(node, struct mlx5_cache_ent, node); 709 cmp = cache_ent_key_cmp(cur->rb_key, rb_key); 710 if (cmp > 0) { 711 smallest = cur; 712 node = node->rb_left; 713 } 714 if (cmp < 0) 715 node = node->rb_right; 716 if (cmp == 0) 717 return cur; 718 } 719 720 /* 721 * Limit the usage of mkeys larger than twice the required size while 722 * also allowing the usage of smallest cache entry for small MRs. 723 */ 724 ndescs_limit = max_t(u64, rb_key.ndescs * 2, 725 MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS); 726 727 return (smallest && 728 smallest->rb_key.access_mode == rb_key.access_mode && 729 smallest->rb_key.access_flags == rb_key.access_flags && 730 smallest->rb_key.ats == rb_key.ats && 731 smallest->rb_key.st_index == rb_key.st_index && 732 smallest->rb_key.ph == rb_key.ph && 733 smallest->rb_key.ndescs <= ndescs_limit) ? 734 smallest : 735 NULL; 736 } 737 738 static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, 739 struct mlx5_cache_ent *ent) 740 { 741 struct mlx5_ib_mr *mr; 742 int err; 743 744 mr = kzalloc_obj(*mr); 745 if (!mr) 746 return ERR_PTR(-ENOMEM); 747 748 spin_lock_irq(&ent->mkeys_queue.lock); 749 ent->in_use++; 750 751 if (!ent->mkeys_queue.ci) { 752 queue_adjust_cache_locked(ent); 753 ent->miss++; 754 spin_unlock_irq(&ent->mkeys_queue.lock); 755 err = create_cache_mkey(ent, &mr->mmkey.key); 756 if (err) { 757 spin_lock_irq(&ent->mkeys_queue.lock); 758 ent->in_use--; 759 spin_unlock_irq(&ent->mkeys_queue.lock); 760 kfree(mr); 761 return ERR_PTR(err); 762 } 763 } else { 764 mr->mmkey.key = pop_mkey_locked(ent); 765 queue_adjust_cache_locked(ent); 766 spin_unlock_irq(&ent->mkeys_queue.lock); 767 } 768 mr->mmkey.cache_ent = ent; 769 mr->mmkey.type = MLX5_MKEY_MR; 770 mr->mmkey.rb_key = ent->rb_key; 771 mr->mmkey.cacheable = true; 772 init_waitqueue_head(&mr->mmkey.wait); 773 return mr; 774 } 775 776 static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, 777 int access_flags) 778 { 779 int ret = 0; 780 781 if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && 782 MLX5_CAP_GEN(dev->mdev, atomic) && 783 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 784 ret |= IB_ACCESS_REMOTE_ATOMIC; 785 786 if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && 787 MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && 788 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 789 ret |= IB_ACCESS_RELAXED_ORDERING; 790 791 if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && 792 (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 793 MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) && 794 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 795 ret |= IB_ACCESS_RELAXED_ORDERING; 796 797 return ret; 798 } 799 800 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, 801 int access_flags, int access_mode, 802 int ndescs) 803 { 804 struct mlx5r_cache_rb_key rb_key = { 805 .ndescs = ndescs, 806 .access_mode = access_mode, 807 .access_flags = get_unchangeable_access_flags(dev, access_flags), 808 .ph = MLX5_IB_NO_PH, 809 }; 810 struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key); 811 812 if (!ent) 813 return ERR_PTR(-EOPNOTSUPP); 814 815 return _mlx5_mr_cache_alloc(dev, ent); 816 } 817 818 static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 819 { 820 if (!mlx5_debugfs_root || dev->is_rep) 821 return; 822 823 debugfs_remove_recursive(dev->cache.fs_root); 824 dev->cache.fs_root = NULL; 825 } 826 827 static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, 828 struct mlx5_cache_ent *ent) 829 { 830 int order = order_base_2(ent->rb_key.ndescs); 831 struct dentry *dir; 832 833 if (!mlx5_debugfs_root || dev->is_rep) 834 return; 835 836 if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) 837 order = MLX5_IMR_KSM_CACHE_ENTRY + 2; 838 839 sprintf(ent->name, "%d", order); 840 dir = debugfs_create_dir(ent->name, dev->cache.fs_root); 841 debugfs_create_file("size", 0600, dir, ent, &size_fops); 842 debugfs_create_file("limit", 0600, dir, ent, &limit_fops); 843 debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci); 844 debugfs_create_u32("miss", 0600, dir, &ent->miss); 845 } 846 847 static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) 848 { 849 struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev); 850 struct mlx5_mkey_cache *cache = &dev->cache; 851 852 if (!mlx5_debugfs_root || dev->is_rep) 853 return; 854 855 cache->fs_root = debugfs_create_dir("mr_cache", dbg_root); 856 } 857 858 static void delay_time_func(struct timer_list *t) 859 { 860 struct mlx5_ib_dev *dev = timer_container_of(dev, t, delay_timer); 861 862 WRITE_ONCE(dev->fill_delay, 0); 863 } 864 865 static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent) 866 { 867 struct mlx5_mkeys_page *page; 868 869 page = kzalloc_obj(*page); 870 if (!page) 871 return -ENOMEM; 872 INIT_LIST_HEAD(&ent->mkeys_queue.pages_list); 873 spin_lock_init(&ent->mkeys_queue.lock); 874 list_add_tail(&page->list, &ent->mkeys_queue.pages_list); 875 ent->mkeys_queue.num_pages++; 876 return 0; 877 } 878 879 static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent) 880 { 881 struct mlx5_mkeys_page *page; 882 883 WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1); 884 page = list_last_entry(&ent->mkeys_queue.pages_list, 885 struct mlx5_mkeys_page, list); 886 list_del(&page->list); 887 kfree(page); 888 } 889 890 struct mlx5_cache_ent * 891 mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, 892 struct mlx5r_cache_rb_key rb_key, 893 bool persistent_entry) 894 { 895 struct mlx5_cache_ent *ent; 896 int order; 897 int ret; 898 899 ent = kzalloc_obj(*ent); 900 if (!ent) 901 return ERR_PTR(-ENOMEM); 902 903 ret = mlx5r_mkeys_init(ent); 904 if (ret) 905 goto mkeys_err; 906 ent->rb_key = rb_key; 907 ent->dev = dev; 908 ent->is_tmp = !persistent_entry; 909 910 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 911 912 ret = mlx5_cache_ent_insert(&dev->cache, ent); 913 if (ret) 914 goto ent_insert_err; 915 916 if (persistent_entry) { 917 if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) 918 order = MLX5_IMR_KSM_CACHE_ENTRY; 919 else 920 order = order_base_2(rb_key.ndescs) - 2; 921 922 if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && 923 !dev->is_rep && mlx5_core_is_pf(dev->mdev) && 924 mlx5r_umr_can_load_pas(dev, 0)) 925 ent->limit = dev->mdev->profile.mr_cache[order].limit; 926 else 927 ent->limit = 0; 928 929 mlx5_mkey_cache_debugfs_add_ent(dev, ent); 930 } 931 932 return ent; 933 ent_insert_err: 934 mlx5r_mkeys_uninit(ent); 935 mkeys_err: 936 kfree(ent); 937 return ERR_PTR(ret); 938 } 939 940 static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) 941 { 942 struct rb_root *root = &dev->cache.rb_root; 943 struct mlx5_cache_ent *ent; 944 struct rb_node *node; 945 946 mutex_lock(&dev->cache.rb_lock); 947 node = rb_first(root); 948 while (node) { 949 ent = rb_entry(node, struct mlx5_cache_ent, node); 950 node = rb_next(node); 951 clean_keys(dev, ent); 952 rb_erase(&ent->node, root); 953 mlx5r_mkeys_uninit(ent); 954 kfree(ent); 955 } 956 mutex_unlock(&dev->cache.rb_lock); 957 } 958 959 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) 960 { 961 struct mlx5_mkey_cache *cache = &dev->cache; 962 struct rb_root *root = &dev->cache.rb_root; 963 struct mlx5r_cache_rb_key rb_key = { 964 .access_mode = MLX5_MKC_ACCESS_MODE_MTT, 965 .ph = MLX5_IB_NO_PH, 966 }; 967 struct mlx5_cache_ent *ent; 968 struct rb_node *node; 969 int ret; 970 int i; 971 972 mutex_init(&dev->slow_path_mutex); 973 mutex_init(&dev->cache.rb_lock); 974 dev->cache.rb_root = RB_ROOT; 975 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 976 if (!cache->wq) { 977 mlx5_ib_warn(dev, "failed to create work queue\n"); 978 return -ENOMEM; 979 } 980 981 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); 982 timer_setup(&dev->delay_timer, delay_time_func, 0); 983 mlx5_mkey_cache_debugfs_init(dev); 984 mutex_lock(&cache->rb_lock); 985 for (i = 0; i <= mkey_cache_max_order(dev); i++) { 986 rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i; 987 ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); 988 if (IS_ERR(ent)) { 989 ret = PTR_ERR(ent); 990 goto err; 991 } 992 } 993 994 ret = mlx5_odp_init_mkey_cache(dev); 995 if (ret) 996 goto err; 997 998 mutex_unlock(&cache->rb_lock); 999 for (node = rb_first(root); node; node = rb_next(node)) { 1000 ent = rb_entry(node, struct mlx5_cache_ent, node); 1001 spin_lock_irq(&ent->mkeys_queue.lock); 1002 queue_adjust_cache_locked(ent); 1003 spin_unlock_irq(&ent->mkeys_queue.lock); 1004 } 1005 1006 return 0; 1007 1008 err: 1009 mutex_unlock(&cache->rb_lock); 1010 mlx5_mkey_cache_debugfs_cleanup(dev); 1011 mlx5r_destroy_cache_entries(dev); 1012 destroy_workqueue(cache->wq); 1013 mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); 1014 return ret; 1015 } 1016 1017 void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) 1018 { 1019 struct rb_root *root = &dev->cache.rb_root; 1020 struct mlx5_cache_ent *ent; 1021 struct rb_node *node; 1022 1023 if (!dev->cache.wq) 1024 return; 1025 1026 mutex_lock(&dev->cache.rb_lock); 1027 for (node = rb_first(root); node; node = rb_next(node)) { 1028 ent = rb_entry(node, struct mlx5_cache_ent, node); 1029 spin_lock_irq(&ent->mkeys_queue.lock); 1030 ent->disabled = true; 1031 spin_unlock_irq(&ent->mkeys_queue.lock); 1032 cancel_delayed_work(&ent->dwork); 1033 } 1034 mutex_unlock(&dev->cache.rb_lock); 1035 1036 /* 1037 * After all entries are disabled and will not reschedule on WQ, 1038 * flush it and all async commands. 1039 */ 1040 flush_workqueue(dev->cache.wq); 1041 1042 mlx5_mkey_cache_debugfs_cleanup(dev); 1043 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); 1044 1045 /* At this point all entries are disabled and have no concurrent work. */ 1046 mlx5r_destroy_cache_entries(dev); 1047 1048 destroy_workqueue(dev->cache.wq); 1049 timer_delete_sync(&dev->delay_timer); 1050 } 1051 1052 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 1053 { 1054 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1055 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1056 struct mlx5_ib_mr *mr; 1057 void *mkc; 1058 u32 *in; 1059 int err; 1060 1061 mr = kzalloc_obj(*mr); 1062 if (!mr) 1063 return ERR_PTR(-ENOMEM); 1064 1065 in = kzalloc(inlen, GFP_KERNEL); 1066 if (!in) { 1067 err = -ENOMEM; 1068 goto err_free; 1069 } 1070 1071 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1072 1073 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 1074 MLX5_SET(mkc, mkc, length64, 1); 1075 set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0, 1076 pd); 1077 MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats)); 1078 1079 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1080 if (err) 1081 goto err_in; 1082 1083 kfree(in); 1084 mr->mmkey.type = MLX5_MKEY_MR; 1085 mr->ibmr.lkey = mr->mmkey.key; 1086 mr->ibmr.rkey = mr->mmkey.key; 1087 mr->umem = NULL; 1088 1089 return &mr->ibmr; 1090 1091 err_in: 1092 kfree(in); 1093 1094 err_free: 1095 kfree(mr); 1096 1097 return ERR_PTR(err); 1098 } 1099 1100 static int get_octo_len(u64 addr, u64 len, int page_shift) 1101 { 1102 u64 page_size = 1ULL << page_shift; 1103 u64 offset; 1104 int npages; 1105 1106 offset = addr & (page_size - 1); 1107 npages = ALIGN(len + offset, page_size) >> page_shift; 1108 return (npages + 1) / 2; 1109 } 1110 1111 static int mkey_cache_max_order(struct mlx5_ib_dev *dev) 1112 { 1113 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 1114 return MKEY_CACHE_LAST_STD_ENTRY; 1115 return MLX5_MAX_UMR_SHIFT; 1116 } 1117 1118 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1119 u64 length, int access_flags, u64 iova) 1120 { 1121 mr->ibmr.lkey = mr->mmkey.key; 1122 mr->ibmr.rkey = mr->mmkey.key; 1123 mr->ibmr.length = length; 1124 mr->ibmr.device = &dev->ib_dev; 1125 mr->ibmr.iova = iova; 1126 mr->access_flags = access_flags; 1127 } 1128 1129 static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, 1130 u64 iova) 1131 { 1132 /* 1133 * The alignment of iova has already been checked upon entering 1134 * UVERBS_METHOD_REG_DMABUF_MR 1135 */ 1136 umem->iova = iova; 1137 return PAGE_SIZE; 1138 } 1139 1140 static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 1141 struct ib_umem *umem, u64 iova, 1142 int access_flags, int access_mode, 1143 u16 st_index, u8 ph) 1144 { 1145 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1146 struct mlx5r_cache_rb_key rb_key = {}; 1147 struct mlx5_cache_ent *ent; 1148 struct mlx5_ib_mr *mr; 1149 unsigned long page_size; 1150 1151 if (umem->is_dmabuf) 1152 page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); 1153 else 1154 page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova, 1155 access_mode); 1156 if (WARN_ON(!page_size)) 1157 return ERR_PTR(-EINVAL); 1158 1159 rb_key.access_mode = access_mode; 1160 rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size); 1161 rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); 1162 rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); 1163 rb_key.st_index = st_index; 1164 rb_key.ph = ph; 1165 ent = mkey_cache_ent_from_rb_key(dev, rb_key); 1166 /* 1167 * If the MR can't come from the cache then synchronously create an uncached 1168 * one. 1169 */ 1170 if (!ent) { 1171 mutex_lock(&dev->slow_path_mutex); 1172 mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode, 1173 st_index, ph); 1174 mutex_unlock(&dev->slow_path_mutex); 1175 if (IS_ERR(mr)) 1176 return mr; 1177 mr->mmkey.rb_key = rb_key; 1178 mr->mmkey.cacheable = true; 1179 return mr; 1180 } 1181 1182 mr = _mlx5_mr_cache_alloc(dev, ent); 1183 if (IS_ERR(mr)) 1184 return mr; 1185 1186 mr->ibmr.pd = pd; 1187 mr->umem = umem; 1188 mr->page_shift = order_base_2(page_size); 1189 set_mr_fields(dev, mr, umem->length, access_flags, iova); 1190 1191 return mr; 1192 } 1193 1194 static struct ib_mr * 1195 reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_flags, 1196 u32 crossed_lkey) 1197 { 1198 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1199 int access_mode = MLX5_MKC_ACCESS_MODE_CROSSING; 1200 struct mlx5_ib_mr *mr; 1201 void *mkc; 1202 int inlen; 1203 u32 *in; 1204 int err; 1205 1206 if (!MLX5_CAP_GEN(dev->mdev, crossing_vhca_mkey)) 1207 return ERR_PTR(-EOPNOTSUPP); 1208 1209 mr = kzalloc_obj(*mr); 1210 if (!mr) 1211 return ERR_PTR(-ENOMEM); 1212 1213 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1214 in = kvzalloc(inlen, GFP_KERNEL); 1215 if (!in) { 1216 err = -ENOMEM; 1217 goto err_1; 1218 } 1219 1220 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1221 MLX5_SET(mkc, mkc, crossing_target_vhca_id, 1222 MLX5_CAP_GEN(dev->mdev, vhca_id)); 1223 MLX5_SET(mkc, mkc, translations_octword_size, crossed_lkey); 1224 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 1225 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 1226 1227 /* for this crossing mkey IOVA should be 0 and len should be IOVA + len */ 1228 set_mkc_access_pd_addr_fields(mkc, access_flags, 0, pd); 1229 MLX5_SET64(mkc, mkc, len, iova + length); 1230 1231 MLX5_SET(mkc, mkc, free, 0); 1232 MLX5_SET(mkc, mkc, umr_en, 0); 1233 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1234 if (err) 1235 goto err_2; 1236 1237 mr->mmkey.type = MLX5_MKEY_MR; 1238 set_mr_fields(dev, mr, length, access_flags, iova); 1239 mr->ibmr.pd = pd; 1240 kvfree(in); 1241 mlx5_ib_dbg(dev, "crossing mkey = 0x%x\n", mr->mmkey.key); 1242 1243 return &mr->ibmr; 1244 err_2: 1245 kvfree(in); 1246 err_1: 1247 kfree(mr); 1248 return ERR_PTR(err); 1249 } 1250 1251 /* 1252 * If ibmr is NULL it will be allocated by reg_create. 1253 * Else, the given ibmr will be used. 1254 */ 1255 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 1256 u64 iova, int access_flags, 1257 unsigned long page_size, bool populate, 1258 int access_mode, u16 st_index, u8 ph) 1259 { 1260 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1261 struct mlx5_ib_mr *mr; 1262 __be64 *pas; 1263 void *mkc; 1264 int inlen; 1265 u32 *in; 1266 int err; 1267 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) && 1268 (access_mode == MLX5_MKC_ACCESS_MODE_MTT) && 1269 (ph == MLX5_IB_NO_PH); 1270 bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM); 1271 1272 if (!page_size) 1273 return ERR_PTR(-EINVAL); 1274 mr = kzalloc_obj(*mr); 1275 if (!mr) 1276 return ERR_PTR(-ENOMEM); 1277 1278 mr->ibmr.pd = pd; 1279 mr->access_flags = access_flags; 1280 mr->page_shift = order_base_2(page_size); 1281 1282 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1283 if (populate) 1284 inlen += sizeof(*pas) * 1285 roundup(ib_umem_num_dma_blocks(umem, page_size), 2); 1286 in = kvzalloc(inlen, GFP_KERNEL); 1287 if (!in) { 1288 err = -ENOMEM; 1289 goto err_1; 1290 } 1291 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1292 if (populate) { 1293 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND || ksm_mode)) { 1294 err = -EINVAL; 1295 goto err_2; 1296 } 1297 mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, 1298 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1299 } 1300 1301 /* The pg_access bit allows setting the access flags 1302 * in the page list submitted with the command. 1303 */ 1304 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1305 1306 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1307 set_mkc_access_pd_addr_fields(mkc, access_flags, iova, 1308 populate ? pd : dev->umrc.pd); 1309 /* In case a data direct flow, overwrite the pdn field by its internal kernel PD */ 1310 if (umem->is_dmabuf && ksm_mode) 1311 MLX5_SET(mkc, mkc, pd, dev->ddr.pdn); 1312 1313 MLX5_SET(mkc, mkc, free, !populate); 1314 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode); 1315 MLX5_SET(mkc, mkc, umr_en, 1); 1316 1317 MLX5_SET64(mkc, mkc, len, umem->length); 1318 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1319 if (ksm_mode) 1320 MLX5_SET(mkc, mkc, translations_octword_size, 1321 get_octo_len(iova, umem->length, mr->page_shift) * 2); 1322 else 1323 MLX5_SET(mkc, mkc, translations_octword_size, 1324 get_octo_len(iova, umem->length, mr->page_shift)); 1325 MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); 1326 if (mlx5_umem_needs_ats(dev, umem, access_flags)) 1327 MLX5_SET(mkc, mkc, ma_translation_mode, 1); 1328 if (populate) { 1329 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1330 get_octo_len(iova, umem->length, mr->page_shift)); 1331 } 1332 1333 if (ph != MLX5_IB_NO_PH) { 1334 MLX5_SET(mkc, mkc, pcie_tph_en, 1); 1335 MLX5_SET(mkc, mkc, pcie_tph_ph, ph); 1336 if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) 1337 MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index); 1338 } 1339 1340 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1341 if (err) { 1342 mlx5_ib_warn(dev, "create mkey failed\n"); 1343 goto err_2; 1344 } 1345 mr->mmkey.type = MLX5_MKEY_MR; 1346 mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift); 1347 mr->umem = umem; 1348 set_mr_fields(dev, mr, umem->length, access_flags, iova); 1349 kvfree(in); 1350 1351 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1352 1353 return mr; 1354 1355 err_2: 1356 kvfree(in); 1357 err_1: 1358 kfree(mr); 1359 return ERR_PTR(err); 1360 } 1361 1362 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, 1363 u64 length, int acc, int mode) 1364 { 1365 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1366 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1367 struct mlx5_ib_mr *mr; 1368 void *mkc; 1369 u32 *in; 1370 int err; 1371 1372 mr = kzalloc_obj(*mr); 1373 if (!mr) 1374 return ERR_PTR(-ENOMEM); 1375 1376 in = kzalloc(inlen, GFP_KERNEL); 1377 if (!in) { 1378 err = -ENOMEM; 1379 goto err_free; 1380 } 1381 1382 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1383 1384 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 1385 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); 1386 MLX5_SET64(mkc, mkc, len, length); 1387 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); 1388 1389 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1390 if (err) 1391 goto err_in; 1392 1393 kfree(in); 1394 1395 set_mr_fields(dev, mr, length, acc, start_addr); 1396 1397 return &mr->ibmr; 1398 1399 err_in: 1400 kfree(in); 1401 1402 err_free: 1403 kfree(mr); 1404 1405 return ERR_PTR(err); 1406 } 1407 1408 int mlx5_ib_advise_mr(struct ib_pd *pd, 1409 enum ib_uverbs_advise_mr_advice advice, 1410 u32 flags, 1411 struct ib_sge *sg_list, 1412 u32 num_sge, 1413 struct uverbs_attr_bundle *attrs) 1414 { 1415 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && 1416 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && 1417 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) 1418 return -EOPNOTSUPP; 1419 1420 return mlx5_ib_advise_mr_prefetch(pd, advice, flags, 1421 sg_list, num_sge); 1422 } 1423 1424 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, 1425 struct ib_dm_mr_attr *attr, 1426 struct uverbs_attr_bundle *attrs) 1427 { 1428 struct mlx5_ib_dm *mdm = to_mdm(dm); 1429 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; 1430 u64 start_addr = mdm->dev_addr + attr->offset; 1431 int mode; 1432 1433 switch (mdm->type) { 1434 case MLX5_IB_UAPI_DM_TYPE_MEMIC: 1435 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) 1436 return ERR_PTR(-EINVAL); 1437 1438 mode = MLX5_MKC_ACCESS_MODE_MEMIC; 1439 start_addr -= pci_resource_start(dev->pdev, 0); 1440 break; 1441 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: 1442 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: 1443 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: 1444 case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: 1445 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) 1446 return ERR_PTR(-EINVAL); 1447 1448 mode = MLX5_MKC_ACCESS_MODE_SW_ICM; 1449 break; 1450 default: 1451 return ERR_PTR(-EINVAL); 1452 } 1453 1454 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, 1455 attr->access_flags, mode); 1456 } 1457 1458 static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, 1459 u64 iova, int access_flags, 1460 struct ib_dmah *dmah) 1461 { 1462 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1463 struct mlx5_ib_mr *mr = NULL; 1464 bool xlt_with_umr; 1465 u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX; 1466 u8 ph = MLX5_IB_NO_PH; 1467 int err; 1468 1469 if (dmah) { 1470 struct mlx5_ib_dmah *mdmah = to_mdmah(dmah); 1471 1472 ph = dmah->ph; 1473 if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) 1474 st_index = mdmah->st_index; 1475 } 1476 1477 xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); 1478 if (xlt_with_umr) { 1479 mr = alloc_cacheable_mr(pd, umem, iova, access_flags, 1480 MLX5_MKC_ACCESS_MODE_MTT, 1481 st_index, ph); 1482 } else { 1483 unsigned long page_size = mlx5_umem_mkc_find_best_pgsz( 1484 dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT); 1485 1486 mutex_lock(&dev->slow_path_mutex); 1487 mr = reg_create(pd, umem, iova, access_flags, page_size, 1488 true, MLX5_MKC_ACCESS_MODE_MTT, 1489 st_index, ph); 1490 mutex_unlock(&dev->slow_path_mutex); 1491 } 1492 if (IS_ERR(mr)) { 1493 ib_umem_release(umem); 1494 return ERR_CAST(mr); 1495 } 1496 1497 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1498 1499 atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1500 1501 if (xlt_with_umr) { 1502 /* 1503 * If the MR was created with reg_create then it will be 1504 * configured properly but left disabled. It is safe to go ahead 1505 * and configure it again via UMR while enabling it. 1506 */ 1507 err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); 1508 if (err) { 1509 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1510 return ERR_PTR(err); 1511 } 1512 } 1513 return &mr->ibmr; 1514 } 1515 1516 static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, 1517 u64 iova, int access_flags, 1518 struct ib_udata *udata) 1519 { 1520 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1521 struct ib_umem_odp *odp; 1522 struct mlx5_ib_mr *mr; 1523 int err; 1524 1525 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1526 return ERR_PTR(-EOPNOTSUPP); 1527 1528 err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq); 1529 if (err) 1530 return ERR_PTR(err); 1531 if (!start && length == U64_MAX) { 1532 if (iova != 0) 1533 return ERR_PTR(-EINVAL); 1534 if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1535 return ERR_PTR(-EINVAL); 1536 1537 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); 1538 if (IS_ERR(mr)) 1539 return ERR_CAST(mr); 1540 return &mr->ibmr; 1541 } 1542 1543 /* ODP requires xlt update via umr to work. */ 1544 if (!mlx5r_umr_can_load_pas(dev, length)) 1545 return ERR_PTR(-EINVAL); 1546 1547 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, 1548 &mlx5_mn_ops); 1549 if (IS_ERR(odp)) 1550 return ERR_CAST(odp); 1551 1552 mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags, 1553 MLX5_MKC_ACCESS_MODE_MTT, 1554 MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX, 1555 MLX5_IB_NO_PH); 1556 if (IS_ERR(mr)) { 1557 ib_umem_release(&odp->umem); 1558 return ERR_CAST(mr); 1559 } 1560 xa_init(&mr->implicit_children); 1561 1562 odp->private = mr; 1563 err = mlx5r_store_odp_mkey(dev, &mr->mmkey); 1564 if (err) 1565 goto err_dereg_mr; 1566 1567 err = mlx5_ib_init_odp_mr(mr); 1568 if (err) 1569 goto err_dereg_mr; 1570 return &mr->ibmr; 1571 1572 err_dereg_mr: 1573 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1574 return ERR_PTR(err); 1575 } 1576 1577 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1578 u64 iova, int access_flags, 1579 struct ib_dmah *dmah, 1580 struct ib_udata *udata) 1581 { 1582 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1583 struct ib_umem *umem; 1584 int err; 1585 1586 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || 1587 ((access_flags & IB_ACCESS_ON_DEMAND) && dmah)) 1588 return ERR_PTR(-EOPNOTSUPP); 1589 1590 mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1591 start, iova, length, access_flags); 1592 1593 err = mlx5r_umr_resource_init(dev); 1594 if (err) 1595 return ERR_PTR(err); 1596 1597 if (access_flags & IB_ACCESS_ON_DEMAND) 1598 return create_user_odp_mr(pd, start, length, iova, access_flags, 1599 udata); 1600 umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); 1601 if (IS_ERR(umem)) 1602 return ERR_CAST(umem); 1603 return create_real_mr(pd, umem, iova, access_flags, dmah); 1604 } 1605 1606 static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) 1607 { 1608 struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; 1609 struct mlx5_ib_mr *mr = umem_dmabuf->private; 1610 1611 dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); 1612 1613 if (!umem_dmabuf->sgt || !mr) 1614 return; 1615 1616 mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); 1617 ib_umem_dmabuf_unmap_pages(umem_dmabuf); 1618 } 1619 1620 static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { 1621 .allow_peer2peer = 1, 1622 .move_notify = mlx5_ib_dmabuf_invalidate_cb, 1623 }; 1624 1625 static struct ib_mr * 1626 reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, 1627 u64 offset, u64 length, u64 virt_addr, 1628 int fd, int access_flags, int access_mode, 1629 struct ib_dmah *dmah) 1630 { 1631 bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM); 1632 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1633 struct mlx5_ib_mr *mr = NULL; 1634 struct ib_umem_dmabuf *umem_dmabuf; 1635 u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX; 1636 u8 ph = MLX5_IB_NO_PH; 1637 int err; 1638 1639 err = mlx5r_umr_resource_init(dev); 1640 if (err) 1641 return ERR_PTR(err); 1642 1643 if (!pinned_mode) 1644 umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, 1645 offset, length, fd, 1646 access_flags, 1647 &mlx5_ib_dmabuf_attach_ops); 1648 else if (dma_device) 1649 umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev, 1650 dma_device, offset, length, 1651 fd, access_flags); 1652 else 1653 umem_dmabuf = ib_umem_dmabuf_get_pinned( 1654 &dev->ib_dev, offset, length, fd, access_flags); 1655 1656 if (IS_ERR(umem_dmabuf)) { 1657 mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf); 1658 return ERR_CAST(umem_dmabuf); 1659 } 1660 1661 if (dmah) { 1662 struct mlx5_ib_dmah *mdmah = to_mdmah(dmah); 1663 1664 ph = dmah->ph; 1665 if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) 1666 st_index = mdmah->st_index; 1667 } 1668 1669 mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, 1670 access_flags, access_mode, 1671 st_index, ph); 1672 if (IS_ERR(mr)) { 1673 ib_umem_release(&umem_dmabuf->umem); 1674 return ERR_CAST(mr); 1675 } 1676 1677 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1678 1679 atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); 1680 umem_dmabuf->private = mr; 1681 if (!pinned_mode) { 1682 err = mlx5r_store_odp_mkey(dev, &mr->mmkey); 1683 if (err) 1684 goto err_dereg_mr; 1685 } else { 1686 mr->data_direct = true; 1687 } 1688 1689 err = mlx5_ib_init_dmabuf_mr(mr); 1690 if (err) 1691 goto err_dereg_mr; 1692 return &mr->ibmr; 1693 1694 err_dereg_mr: 1695 __mlx5_ib_dereg_mr(&mr->ibmr); 1696 return ERR_PTR(err); 1697 } 1698 1699 static struct ib_mr * 1700 reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset, 1701 u64 length, u64 virt_addr, 1702 int fd, int access_flags) 1703 { 1704 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1705 struct mlx5_data_direct_dev *data_direct_dev; 1706 struct ib_mr *crossing_mr; 1707 struct ib_mr *crossed_mr; 1708 int ret = 0; 1709 1710 /* As of HW behaviour the IOVA must be page aligned in KSM mode */ 1711 if (!PAGE_ALIGNED(virt_addr) || (access_flags & IB_ACCESS_ON_DEMAND)) 1712 return ERR_PTR(-EOPNOTSUPP); 1713 1714 mutex_lock(&dev->data_direct_lock); 1715 data_direct_dev = dev->data_direct_dev; 1716 if (!data_direct_dev) { 1717 ret = -EINVAL; 1718 goto end; 1719 } 1720 1721 /* If no device's 'data direct mkey' with RO flags exists 1722 * mask it out accordingly. 1723 */ 1724 if (!dev->ddr.mkey_ro_valid) 1725 access_flags &= ~IB_ACCESS_RELAXED_ORDERING; 1726 crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev, 1727 offset, length, virt_addr, fd, 1728 access_flags, MLX5_MKC_ACCESS_MODE_KSM, 1729 NULL); 1730 if (IS_ERR(crossed_mr)) { 1731 ret = PTR_ERR(crossed_mr); 1732 goto end; 1733 } 1734 1735 mutex_lock(&dev->slow_path_mutex); 1736 crossing_mr = reg_create_crossing_vhca_mr(pd, virt_addr, length, access_flags, 1737 crossed_mr->lkey); 1738 mutex_unlock(&dev->slow_path_mutex); 1739 if (IS_ERR(crossing_mr)) { 1740 __mlx5_ib_dereg_mr(crossed_mr); 1741 ret = PTR_ERR(crossing_mr); 1742 goto end; 1743 } 1744 1745 list_add_tail(&to_mmr(crossed_mr)->dd_node, &dev->data_direct_mr_list); 1746 to_mmr(crossing_mr)->dd_crossed_mr = to_mmr(crossed_mr); 1747 to_mmr(crossing_mr)->data_direct = true; 1748 end: 1749 mutex_unlock(&dev->data_direct_lock); 1750 return ret ? ERR_PTR(ret) : crossing_mr; 1751 } 1752 1753 struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, 1754 u64 length, u64 virt_addr, 1755 int fd, int access_flags, 1756 struct ib_dmah *dmah, 1757 struct uverbs_attr_bundle *attrs) 1758 { 1759 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1760 int mlx5_access_flags = 0; 1761 int err; 1762 1763 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || 1764 !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1765 return ERR_PTR(-EOPNOTSUPP); 1766 1767 if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) { 1768 err = uverbs_get_flags32(&mlx5_access_flags, attrs, 1769 MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, 1770 MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT); 1771 if (err) 1772 return ERR_PTR(err); 1773 } 1774 1775 mlx5_ib_dbg(dev, 1776 "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x, mlx5_access_flags 0x%x\n", 1777 offset, virt_addr, length, fd, access_flags, mlx5_access_flags); 1778 1779 /* dmabuf requires xlt update via umr to work. */ 1780 if (!mlx5r_umr_can_load_pas(dev, length)) 1781 return ERR_PTR(-EINVAL); 1782 1783 if (mlx5_access_flags & MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT) 1784 return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr, 1785 fd, access_flags); 1786 1787 return reg_user_mr_dmabuf(pd, NULL, offset, length, virt_addr, fd, 1788 access_flags, MLX5_MKC_ACCESS_MODE_MTT, dmah); 1789 } 1790 1791 /* 1792 * True if the change in access flags can be done via UMR, only some access 1793 * flags can be updated. 1794 */ 1795 static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, 1796 unsigned int current_access_flags, 1797 unsigned int target_access_flags) 1798 { 1799 unsigned int diffs = current_access_flags ^ target_access_flags; 1800 1801 if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | 1802 IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING | 1803 IB_ACCESS_REMOTE_ATOMIC)) 1804 return false; 1805 return mlx5r_umr_can_reconfig(dev, current_access_flags, 1806 target_access_flags); 1807 } 1808 1809 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, 1810 struct ib_umem *new_umem, 1811 int new_access_flags, u64 iova, 1812 unsigned long *page_size) 1813 { 1814 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1815 1816 /* We only track the allocated sizes of MRs from the cache */ 1817 if (!mr->mmkey.cache_ent) 1818 return false; 1819 if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) 1820 return false; 1821 1822 *page_size = mlx5_umem_mkc_find_best_pgsz( 1823 dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode); 1824 if (WARN_ON(!*page_size)) 1825 return false; 1826 return (mr->mmkey.cache_ent->rb_key.ndescs) >= 1827 ib_umem_num_dma_blocks(new_umem, *page_size); 1828 } 1829 1830 static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, 1831 int access_flags, int flags, struct ib_umem *new_umem, 1832 u64 iova, unsigned long page_size) 1833 { 1834 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1835 int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; 1836 struct ib_umem *old_umem = mr->umem; 1837 int err; 1838 1839 /* 1840 * To keep everything simple the MR is revoked before we start to mess 1841 * with it. This ensure the change is atomic relative to any use of the 1842 * MR. 1843 */ 1844 err = mlx5r_umr_revoke_mr(mr); 1845 if (err) 1846 return err; 1847 1848 if (flags & IB_MR_REREG_PD) { 1849 mr->ibmr.pd = pd; 1850 upd_flags |= MLX5_IB_UPD_XLT_PD; 1851 } 1852 if (flags & IB_MR_REREG_ACCESS) { 1853 mr->access_flags = access_flags; 1854 upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1855 } 1856 1857 mr->ibmr.iova = iova; 1858 mr->ibmr.length = new_umem->length; 1859 mr->page_shift = order_base_2(page_size); 1860 mr->umem = new_umem; 1861 err = mlx5r_umr_update_mr_pas(mr, upd_flags); 1862 if (err) { 1863 /* 1864 * The MR is revoked at this point so there is no issue to free 1865 * new_umem. 1866 */ 1867 mr->umem = old_umem; 1868 return err; 1869 } 1870 1871 atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); 1872 ib_umem_release(old_umem); 1873 atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); 1874 return 0; 1875 } 1876 1877 struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1878 u64 length, u64 iova, int new_access_flags, 1879 struct ib_pd *new_pd, 1880 struct ib_udata *udata) 1881 { 1882 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1883 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1884 int err; 1885 1886 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct || 1887 mr->mmkey.rb_key.ph != MLX5_IB_NO_PH) 1888 return ERR_PTR(-EOPNOTSUPP); 1889 1890 mlx5_ib_dbg( 1891 dev, 1892 "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1893 start, iova, length, new_access_flags); 1894 1895 if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) 1896 return ERR_PTR(-EOPNOTSUPP); 1897 1898 if (!(flags & IB_MR_REREG_ACCESS)) 1899 new_access_flags = mr->access_flags; 1900 if (!(flags & IB_MR_REREG_PD)) 1901 new_pd = ib_mr->pd; 1902 1903 if (!(flags & IB_MR_REREG_TRANS)) { 1904 struct ib_umem *umem; 1905 1906 /* Fast path for PD/access change */ 1907 if (can_use_umr_rereg_access(dev, mr->access_flags, 1908 new_access_flags)) { 1909 err = mlx5r_umr_rereg_pd_access(mr, new_pd, 1910 new_access_flags); 1911 if (err) 1912 return ERR_PTR(err); 1913 return NULL; 1914 } 1915 /* DM or ODP MR's don't have a normal umem so we can't re-use it */ 1916 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) 1917 goto recreate; 1918 1919 /* 1920 * Only one active MR can refer to a umem at one time, revoke 1921 * the old MR before assigning the umem to the new one. 1922 */ 1923 err = mlx5r_umr_revoke_mr(mr); 1924 if (err) 1925 return ERR_PTR(err); 1926 umem = mr->umem; 1927 mr->umem = NULL; 1928 atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1929 1930 return create_real_mr(new_pd, umem, mr->ibmr.iova, 1931 new_access_flags, NULL); 1932 } 1933 1934 /* 1935 * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does 1936 * but the logic around releasing the umem is different 1937 */ 1938 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) 1939 goto recreate; 1940 1941 if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && 1942 can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { 1943 struct ib_umem *new_umem; 1944 unsigned long page_size; 1945 1946 new_umem = ib_umem_get(&dev->ib_dev, start, length, 1947 new_access_flags); 1948 if (IS_ERR(new_umem)) 1949 return ERR_CAST(new_umem); 1950 1951 /* Fast path for PAS change */ 1952 if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, 1953 &page_size)) { 1954 err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, 1955 new_umem, iova, page_size); 1956 if (err) { 1957 ib_umem_release(new_umem); 1958 return ERR_PTR(err); 1959 } 1960 return NULL; 1961 } 1962 return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL); 1963 } 1964 1965 /* 1966 * Everything else has no state we can preserve, just create a new MR 1967 * from scratch 1968 */ 1969 recreate: 1970 return mlx5_ib_reg_user_mr(new_pd, start, length, iova, 1971 new_access_flags, NULL, udata); 1972 } 1973 1974 static int 1975 mlx5_alloc_priv_descs(struct ib_device *device, 1976 struct mlx5_ib_mr *mr, 1977 int ndescs, 1978 int desc_size) 1979 { 1980 struct mlx5_ib_dev *dev = to_mdev(device); 1981 struct device *ddev = &dev->mdev->pdev->dev; 1982 int size = ndescs * desc_size; 1983 int add_size; 1984 int ret; 1985 1986 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1987 if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) { 1988 int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size)); 1989 1990 add_size = min_t(int, end - size, add_size); 1991 } 1992 1993 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1994 if (!mr->descs_alloc) 1995 return -ENOMEM; 1996 1997 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1998 1999 mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); 2000 if (dma_mapping_error(ddev, mr->desc_map)) { 2001 ret = -ENOMEM; 2002 goto err; 2003 } 2004 2005 return 0; 2006 err: 2007 kfree(mr->descs_alloc); 2008 2009 return ret; 2010 } 2011 2012 static void 2013 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 2014 { 2015 if (!mr->umem && !mr->data_direct && 2016 mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { 2017 struct ib_device *device = mr->ibmr.device; 2018 int size = mr->max_descs * mr->desc_size; 2019 struct mlx5_ib_dev *dev = to_mdev(device); 2020 2021 dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, 2022 DMA_TO_DEVICE); 2023 kfree(mr->descs_alloc); 2024 mr->descs = NULL; 2025 } 2026 } 2027 2028 static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, 2029 struct mlx5_ib_mr *mr) 2030 { 2031 struct mlx5_mkey_cache *cache = &dev->cache; 2032 struct mlx5_cache_ent *ent; 2033 int ret; 2034 2035 if (mr->mmkey.cache_ent) { 2036 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2037 goto end; 2038 } 2039 2040 mutex_lock(&cache->rb_lock); 2041 ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); 2042 if (ent) { 2043 if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { 2044 if (ent->disabled) { 2045 mutex_unlock(&cache->rb_lock); 2046 return -EOPNOTSUPP; 2047 } 2048 mr->mmkey.cache_ent = ent; 2049 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2050 mutex_unlock(&cache->rb_lock); 2051 goto end; 2052 } 2053 } 2054 2055 ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false); 2056 mutex_unlock(&cache->rb_lock); 2057 if (IS_ERR(ent)) 2058 return PTR_ERR(ent); 2059 2060 mr->mmkey.cache_ent = ent; 2061 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2062 2063 end: 2064 ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key); 2065 spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2066 return ret; 2067 } 2068 2069 static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr) 2070 { 2071 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 2072 struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); 2073 int err; 2074 2075 lockdep_assert_held(&dev->data_direct_lock); 2076 mr->revoked = true; 2077 err = mlx5r_umr_revoke_mr(mr); 2078 if (WARN_ON(err)) 2079 return err; 2080 2081 ib_umem_dmabuf_revoke(umem_dmabuf); 2082 return 0; 2083 } 2084 2085 void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) 2086 { 2087 struct mlx5_ib_mr *mr, *next; 2088 2089 lockdep_assert_held(&dev->data_direct_lock); 2090 2091 list_for_each_entry_safe(mr, next, &dev->data_direct_mr_list, dd_node) { 2092 list_del(&mr->dd_node); 2093 mlx5_ib_revoke_data_direct_mr(mr); 2094 } 2095 } 2096 2097 static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) 2098 { 2099 bool is_odp_dma_buf = is_dmabuf_mr(mr) && 2100 !to_ib_umem_dmabuf(mr->umem)->pinned; 2101 bool is_odp = is_odp_mr(mr); 2102 int ret; 2103 2104 if (is_odp) 2105 mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2106 2107 if (is_odp_dma_buf) 2108 dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, 2109 NULL); 2110 2111 ret = mlx5r_umr_revoke_mr(mr); 2112 2113 if (is_odp) { 2114 if (!ret) 2115 to_ib_umem_odp(mr->umem)->private = NULL; 2116 mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2117 } 2118 2119 if (is_odp_dma_buf) { 2120 if (!ret) 2121 to_ib_umem_dmabuf(mr->umem)->private = NULL; 2122 dma_resv_unlock( 2123 to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); 2124 } 2125 2126 return ret; 2127 } 2128 2129 static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) 2130 { 2131 bool is_odp_dma_buf = is_dmabuf_mr(mr) && 2132 !to_ib_umem_dmabuf(mr->umem)->pinned; 2133 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 2134 struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; 2135 bool is_odp = is_odp_mr(mr); 2136 bool from_cache = !!ent; 2137 int ret; 2138 2139 if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && 2140 !cache_ent_find_and_store(dev, mr)) { 2141 ent = mr->mmkey.cache_ent; 2142 /* upon storing to a clean temp entry - schedule its cleanup */ 2143 spin_lock_irq(&ent->mkeys_queue.lock); 2144 if (from_cache) 2145 ent->in_use--; 2146 if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { 2147 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 2148 secs_to_jiffies(30)); 2149 ent->tmp_cleanup_scheduled = true; 2150 } 2151 spin_unlock_irq(&ent->mkeys_queue.lock); 2152 return 0; 2153 } 2154 2155 if (ent) { 2156 spin_lock_irq(&ent->mkeys_queue.lock); 2157 ent->in_use--; 2158 mr->mmkey.cache_ent = NULL; 2159 spin_unlock_irq(&ent->mkeys_queue.lock); 2160 } 2161 2162 if (is_odp) 2163 mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2164 2165 if (is_odp_dma_buf) 2166 dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, 2167 NULL); 2168 ret = destroy_mkey(dev, mr); 2169 if (is_odp) { 2170 if (!ret) 2171 to_ib_umem_odp(mr->umem)->private = NULL; 2172 mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2173 } 2174 2175 if (is_odp_dma_buf) { 2176 if (!ret) 2177 to_ib_umem_dmabuf(mr->umem)->private = NULL; 2178 dma_resv_unlock( 2179 to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); 2180 } 2181 return ret; 2182 } 2183 2184 static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) 2185 { 2186 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2187 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 2188 int rc; 2189 2190 /* 2191 * Any async use of the mr must hold the refcount, once the refcount 2192 * goes to zero no other thread, such as ODP page faults, prefetch, any 2193 * UMR activity, etc can touch the mkey. Thus it is safe to destroy it. 2194 */ 2195 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && 2196 refcount_read(&mr->mmkey.usecount) != 0 && 2197 xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))) 2198 mlx5r_deref_wait_odp_mkey(&mr->mmkey); 2199 2200 if (ibmr->type == IB_MR_TYPE_INTEGRITY) { 2201 xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 2202 mr->sig, NULL, GFP_KERNEL); 2203 2204 if (mr->mtt_mr) { 2205 rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); 2206 if (rc) 2207 return rc; 2208 mr->mtt_mr = NULL; 2209 } 2210 if (mr->klm_mr) { 2211 rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL); 2212 if (rc) 2213 return rc; 2214 mr->klm_mr = NULL; 2215 } 2216 2217 if (mlx5_core_destroy_psv(dev->mdev, 2218 mr->sig->psv_memory.psv_idx)) 2219 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 2220 mr->sig->psv_memory.psv_idx); 2221 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 2222 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 2223 mr->sig->psv_wire.psv_idx); 2224 kfree(mr->sig); 2225 mr->sig = NULL; 2226 } 2227 2228 /* Stop DMA */ 2229 rc = mlx5r_handle_mkey_cleanup(mr); 2230 if (rc) 2231 return rc; 2232 2233 if (mr->umem) { 2234 bool is_odp = is_odp_mr(mr); 2235 2236 if (!is_odp) 2237 atomic_sub(ib_umem_num_pages(mr->umem), 2238 &dev->mdev->priv.reg_pages); 2239 ib_umem_release(mr->umem); 2240 if (is_odp) 2241 mlx5_ib_free_odp_mr(mr); 2242 } 2243 2244 if (!mr->mmkey.cache_ent) 2245 mlx5_free_priv_descs(mr); 2246 2247 kfree(mr); 2248 return 0; 2249 } 2250 2251 static int dereg_crossing_data_direct_mr(struct mlx5_ib_dev *dev, 2252 struct mlx5_ib_mr *mr) 2253 { 2254 struct mlx5_ib_mr *dd_crossed_mr = mr->dd_crossed_mr; 2255 int ret; 2256 2257 ret = __mlx5_ib_dereg_mr(&mr->ibmr); 2258 if (ret) 2259 return ret; 2260 2261 mutex_lock(&dev->data_direct_lock); 2262 if (!dd_crossed_mr->revoked) 2263 list_del(&dd_crossed_mr->dd_node); 2264 2265 ret = __mlx5_ib_dereg_mr(&dd_crossed_mr->ibmr); 2266 mutex_unlock(&dev->data_direct_lock); 2267 return ret; 2268 } 2269 2270 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 2271 { 2272 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2273 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 2274 2275 if (mr->data_direct) 2276 return dereg_crossing_data_direct_mr(dev, mr); 2277 2278 return __mlx5_ib_dereg_mr(ibmr); 2279 } 2280 2281 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, 2282 int access_mode, int page_shift) 2283 { 2284 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2285 void *mkc; 2286 2287 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2288 2289 /* This is only used from the kernel, so setting the PD is OK. */ 2290 set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd); 2291 MLX5_SET(mkc, mkc, free, 1); 2292 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 2293 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 2294 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 2295 MLX5_SET(mkc, mkc, umr_en, 1); 2296 MLX5_SET(mkc, mkc, log_page_size, page_shift); 2297 if (access_mode == MLX5_MKC_ACCESS_MODE_PA || 2298 access_mode == MLX5_MKC_ACCESS_MODE_MTT) 2299 MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats)); 2300 } 2301 2302 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2303 int ndescs, int desc_size, int page_shift, 2304 int access_mode, u32 *in, int inlen) 2305 { 2306 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2307 int err; 2308 2309 mr->access_mode = access_mode; 2310 mr->desc_size = desc_size; 2311 mr->max_descs = ndescs; 2312 2313 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); 2314 if (err) 2315 return err; 2316 2317 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); 2318 2319 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 2320 if (err) 2321 goto err_free_descs; 2322 2323 mr->mmkey.type = MLX5_MKEY_MR; 2324 mr->ibmr.lkey = mr->mmkey.key; 2325 mr->ibmr.rkey = mr->mmkey.key; 2326 2327 return 0; 2328 2329 err_free_descs: 2330 mlx5_free_priv_descs(mr); 2331 return err; 2332 } 2333 2334 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, 2335 u32 max_num_sg, u32 max_num_meta_sg, 2336 int desc_size, int access_mode) 2337 { 2338 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2339 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); 2340 int page_shift = 0; 2341 struct mlx5_ib_mr *mr; 2342 u32 *in; 2343 int err; 2344 2345 mr = kzalloc_obj(*mr); 2346 if (!mr) 2347 return ERR_PTR(-ENOMEM); 2348 2349 mr->ibmr.pd = pd; 2350 mr->ibmr.device = pd->device; 2351 2352 in = kzalloc(inlen, GFP_KERNEL); 2353 if (!in) { 2354 err = -ENOMEM; 2355 goto err_free; 2356 } 2357 2358 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) 2359 page_shift = PAGE_SHIFT; 2360 2361 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, 2362 access_mode, in, inlen); 2363 if (err) 2364 goto err_free_in; 2365 2366 mr->umem = NULL; 2367 kfree(in); 2368 2369 return mr; 2370 2371 err_free_in: 2372 kfree(in); 2373 err_free: 2374 kfree(mr); 2375 return ERR_PTR(err); 2376 } 2377 2378 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2379 int ndescs, u32 *in, int inlen) 2380 { 2381 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), 2382 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, 2383 inlen); 2384 } 2385 2386 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2387 int ndescs, u32 *in, int inlen) 2388 { 2389 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), 2390 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 2391 } 2392 2393 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2394 int max_num_sg, int max_num_meta_sg, 2395 u32 *in, int inlen) 2396 { 2397 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2398 u32 psv_index[2]; 2399 void *mkc; 2400 int err; 2401 2402 mr->sig = kzalloc_obj(*mr->sig); 2403 if (!mr->sig) 2404 return -ENOMEM; 2405 2406 /* create mem & wire PSVs */ 2407 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); 2408 if (err) 2409 goto err_free_sig; 2410 2411 mr->sig->psv_memory.psv_idx = psv_index[0]; 2412 mr->sig->psv_wire.psv_idx = psv_index[1]; 2413 2414 mr->sig->sig_status_checked = true; 2415 mr->sig->sig_err_exists = false; 2416 /* Next UMR, Arm SIGERR */ 2417 ++mr->sig->sigerr_count; 2418 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 2419 sizeof(struct mlx5_klm), 2420 MLX5_MKC_ACCESS_MODE_KLMS); 2421 if (IS_ERR(mr->klm_mr)) { 2422 err = PTR_ERR(mr->klm_mr); 2423 goto err_destroy_psv; 2424 } 2425 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 2426 sizeof(struct mlx5_mtt), 2427 MLX5_MKC_ACCESS_MODE_MTT); 2428 if (IS_ERR(mr->mtt_mr)) { 2429 err = PTR_ERR(mr->mtt_mr); 2430 goto err_free_klm_mr; 2431 } 2432 2433 /* Set bsf descriptors for mkey */ 2434 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2435 MLX5_SET(mkc, mkc, bsf_en, 1); 2436 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 2437 2438 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, 2439 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 2440 if (err) 2441 goto err_free_mtt_mr; 2442 2443 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 2444 mr->sig, GFP_KERNEL)); 2445 if (err) 2446 goto err_free_descs; 2447 return 0; 2448 2449 err_free_descs: 2450 destroy_mkey(dev, mr); 2451 mlx5_free_priv_descs(mr); 2452 err_free_mtt_mr: 2453 mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); 2454 mr->mtt_mr = NULL; 2455 err_free_klm_mr: 2456 mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL); 2457 mr->klm_mr = NULL; 2458 err_destroy_psv: 2459 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) 2460 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 2461 mr->sig->psv_memory.psv_idx); 2462 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 2463 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 2464 mr->sig->psv_wire.psv_idx); 2465 err_free_sig: 2466 kfree(mr->sig); 2467 2468 return err; 2469 } 2470 2471 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, 2472 enum ib_mr_type mr_type, u32 max_num_sg, 2473 u32 max_num_meta_sg) 2474 { 2475 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2476 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2477 int ndescs = ALIGN(max_num_sg, 4); 2478 struct mlx5_ib_mr *mr; 2479 u32 *in; 2480 int err; 2481 2482 mr = kzalloc_obj(*mr); 2483 if (!mr) 2484 return ERR_PTR(-ENOMEM); 2485 2486 in = kzalloc(inlen, GFP_KERNEL); 2487 if (!in) { 2488 err = -ENOMEM; 2489 goto err_free; 2490 } 2491 2492 mr->ibmr.device = pd->device; 2493 mr->umem = NULL; 2494 2495 switch (mr_type) { 2496 case IB_MR_TYPE_MEM_REG: 2497 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); 2498 break; 2499 case IB_MR_TYPE_SG_GAPS: 2500 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); 2501 break; 2502 case IB_MR_TYPE_INTEGRITY: 2503 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, 2504 max_num_meta_sg, in, inlen); 2505 break; 2506 default: 2507 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 2508 err = -EINVAL; 2509 } 2510 2511 if (err) 2512 goto err_free_in; 2513 2514 kfree(in); 2515 2516 return &mr->ibmr; 2517 2518 err_free_in: 2519 kfree(in); 2520 err_free: 2521 kfree(mr); 2522 return ERR_PTR(err); 2523 } 2524 2525 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 2526 u32 max_num_sg) 2527 { 2528 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); 2529 } 2530 2531 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, 2532 u32 max_num_sg, u32 max_num_meta_sg) 2533 { 2534 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, 2535 max_num_meta_sg); 2536 } 2537 2538 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 2539 { 2540 struct mlx5_ib_dev *dev = to_mdev(ibmw->device); 2541 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2542 struct mlx5_ib_mw *mw = to_mmw(ibmw); 2543 unsigned int ndescs; 2544 u32 *in = NULL; 2545 void *mkc; 2546 int err; 2547 struct mlx5_ib_alloc_mw req = {}; 2548 struct { 2549 __u32 comp_mask; 2550 __u32 response_length; 2551 } resp = {}; 2552 2553 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 2554 if (err) 2555 return err; 2556 2557 if (req.comp_mask || req.reserved1 || req.reserved2) 2558 return -EOPNOTSUPP; 2559 2560 if (udata->inlen > sizeof(req) && 2561 !ib_is_udata_cleared(udata, sizeof(req), 2562 udata->inlen - sizeof(req))) 2563 return -EOPNOTSUPP; 2564 2565 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 2566 2567 in = kzalloc(inlen, GFP_KERNEL); 2568 if (!in) 2569 return -ENOMEM; 2570 2571 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2572 2573 MLX5_SET(mkc, mkc, free, 1); 2574 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 2575 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn); 2576 MLX5_SET(mkc, mkc, umr_en, 1); 2577 MLX5_SET(mkc, mkc, lr, 1); 2578 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); 2579 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2))); 2580 MLX5_SET(mkc, mkc, qpn, 0xffffff); 2581 2582 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); 2583 if (err) 2584 goto free; 2585 2586 mw->mmkey.type = MLX5_MKEY_MW; 2587 ibmw->rkey = mw->mmkey.key; 2588 mw->mmkey.ndescs = ndescs; 2589 2590 resp.response_length = 2591 min(offsetofend(typeof(resp), response_length), udata->outlen); 2592 if (resp.response_length) { 2593 err = ib_copy_to_udata(udata, &resp, resp.response_length); 2594 if (err) 2595 goto free_mkey; 2596 } 2597 2598 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 2599 err = mlx5r_store_odp_mkey(dev, &mw->mmkey); 2600 if (err) 2601 goto free_mkey; 2602 } 2603 2604 kfree(in); 2605 return 0; 2606 2607 free_mkey: 2608 mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key); 2609 free: 2610 kfree(in); 2611 return err; 2612 } 2613 2614 int mlx5_ib_dealloc_mw(struct ib_mw *mw) 2615 { 2616 struct mlx5_ib_dev *dev = to_mdev(mw->device); 2617 struct mlx5_ib_mw *mmw = to_mmw(mw); 2618 2619 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && 2620 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) 2621 /* 2622 * pagefault_single_data_segment() may be accessing mmw 2623 * if the user bound an ODP MR to this MW. 2624 */ 2625 mlx5r_deref_wait_odp_mkey(&mmw->mmkey); 2626 2627 return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key); 2628 } 2629 2630 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 2631 struct ib_mr_status *mr_status) 2632 { 2633 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 2634 int ret = 0; 2635 2636 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 2637 pr_err("Invalid status check mask\n"); 2638 ret = -EINVAL; 2639 goto done; 2640 } 2641 2642 mr_status->fail_status = 0; 2643 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 2644 if (!mmr->sig) { 2645 ret = -EINVAL; 2646 pr_err("signature status check requested on a non-signature enabled MR\n"); 2647 goto done; 2648 } 2649 2650 mmr->sig->sig_status_checked = true; 2651 if (!mmr->sig->sig_err_exists) 2652 goto done; 2653 2654 if (ibmr->lkey == mmr->sig->err_item.key) 2655 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 2656 sizeof(mr_status->sig_err)); 2657 else { 2658 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 2659 mr_status->sig_err.sig_err_offset = 0; 2660 mr_status->sig_err.key = mmr->sig->err_item.key; 2661 } 2662 2663 mmr->sig->sig_err_exists = false; 2664 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 2665 } 2666 2667 done: 2668 return ret; 2669 } 2670 2671 static int 2672 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2673 int data_sg_nents, unsigned int *data_sg_offset, 2674 struct scatterlist *meta_sg, int meta_sg_nents, 2675 unsigned int *meta_sg_offset) 2676 { 2677 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2678 unsigned int sg_offset = 0; 2679 int n = 0; 2680 2681 mr->meta_length = 0; 2682 if (data_sg_nents == 1) { 2683 n++; 2684 mr->mmkey.ndescs = 1; 2685 if (data_sg_offset) 2686 sg_offset = *data_sg_offset; 2687 mr->data_length = sg_dma_len(data_sg) - sg_offset; 2688 mr->data_iova = sg_dma_address(data_sg) + sg_offset; 2689 if (meta_sg_nents == 1) { 2690 n++; 2691 mr->meta_ndescs = 1; 2692 if (meta_sg_offset) 2693 sg_offset = *meta_sg_offset; 2694 else 2695 sg_offset = 0; 2696 mr->meta_length = sg_dma_len(meta_sg) - sg_offset; 2697 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; 2698 } 2699 ibmr->length = mr->data_length + mr->meta_length; 2700 } 2701 2702 return n; 2703 } 2704 2705 static int 2706 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 2707 struct scatterlist *sgl, 2708 unsigned short sg_nents, 2709 unsigned int *sg_offset_p, 2710 struct scatterlist *meta_sgl, 2711 unsigned short meta_sg_nents, 2712 unsigned int *meta_sg_offset_p) 2713 { 2714 struct scatterlist *sg = sgl; 2715 struct mlx5_klm *klms = mr->descs; 2716 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2717 u32 lkey = mr->ibmr.pd->local_dma_lkey; 2718 int i, j = 0; 2719 2720 mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 2721 mr->ibmr.length = 0; 2722 2723 for_each_sg(sgl, sg, sg_nents, i) { 2724 if (unlikely(i >= mr->max_descs)) 2725 break; 2726 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 2727 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 2728 klms[i].key = cpu_to_be32(lkey); 2729 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2730 2731 sg_offset = 0; 2732 } 2733 2734 if (sg_offset_p) 2735 *sg_offset_p = sg_offset; 2736 2737 mr->mmkey.ndescs = i; 2738 mr->data_length = mr->ibmr.length; 2739 2740 if (meta_sg_nents) { 2741 sg = meta_sgl; 2742 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; 2743 for_each_sg(meta_sgl, sg, meta_sg_nents, j) { 2744 if (unlikely(i + j >= mr->max_descs)) 2745 break; 2746 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + 2747 sg_offset); 2748 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - 2749 sg_offset); 2750 klms[i + j].key = cpu_to_be32(lkey); 2751 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2752 2753 sg_offset = 0; 2754 } 2755 if (meta_sg_offset_p) 2756 *meta_sg_offset_p = sg_offset; 2757 2758 mr->meta_ndescs = j; 2759 mr->meta_length = mr->ibmr.length - mr->data_length; 2760 } 2761 2762 return i + j; 2763 } 2764 2765 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 2766 { 2767 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2768 __be64 *descs; 2769 2770 if (unlikely(mr->mmkey.ndescs == mr->max_descs)) 2771 return -ENOMEM; 2772 2773 descs = mr->descs; 2774 descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2775 2776 return 0; 2777 } 2778 2779 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) 2780 { 2781 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2782 __be64 *descs; 2783 2784 if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs)) 2785 return -ENOMEM; 2786 2787 descs = mr->descs; 2788 descs[mr->mmkey.ndescs + mr->meta_ndescs++] = 2789 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2790 2791 return 0; 2792 } 2793 2794 static int 2795 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2796 int data_sg_nents, unsigned int *data_sg_offset, 2797 struct scatterlist *meta_sg, int meta_sg_nents, 2798 unsigned int *meta_sg_offset) 2799 { 2800 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2801 struct mlx5_ib_mr *pi_mr = mr->mtt_mr; 2802 int n; 2803 2804 pi_mr->mmkey.ndescs = 0; 2805 pi_mr->meta_ndescs = 0; 2806 pi_mr->meta_length = 0; 2807 2808 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2809 pi_mr->desc_size * pi_mr->max_descs, 2810 DMA_TO_DEVICE); 2811 2812 pi_mr->ibmr.page_size = ibmr->page_size; 2813 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, 2814 mlx5_set_page); 2815 if (n != data_sg_nents) 2816 return n; 2817 2818 pi_mr->data_iova = pi_mr->ibmr.iova; 2819 pi_mr->data_length = pi_mr->ibmr.length; 2820 pi_mr->ibmr.length = pi_mr->data_length; 2821 ibmr->length = pi_mr->data_length; 2822 2823 if (meta_sg_nents) { 2824 u64 page_mask = ~((u64)ibmr->page_size - 1); 2825 u64 iova = pi_mr->data_iova; 2826 2827 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, 2828 meta_sg_offset, mlx5_set_page_pi); 2829 2830 pi_mr->meta_length = pi_mr->ibmr.length; 2831 /* 2832 * PI address for the HW is the offset of the metadata address 2833 * relative to the first data page address. 2834 * It equals to first data page address + size of data pages + 2835 * metadata offset at the first metadata page 2836 */ 2837 pi_mr->pi_iova = (iova & page_mask) + 2838 pi_mr->mmkey.ndescs * ibmr->page_size + 2839 (pi_mr->ibmr.iova & ~page_mask); 2840 /* 2841 * In order to use one MTT MR for data and metadata, we register 2842 * also the gaps between the end of the data and the start of 2843 * the metadata (the sig MR will verify that the HW will access 2844 * to right addresses). This mapping is safe because we use 2845 * internal mkey for the registration. 2846 */ 2847 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; 2848 pi_mr->ibmr.iova = iova; 2849 ibmr->length += pi_mr->meta_length; 2850 } 2851 2852 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2853 pi_mr->desc_size * pi_mr->max_descs, 2854 DMA_TO_DEVICE); 2855 2856 return n; 2857 } 2858 2859 static int 2860 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2861 int data_sg_nents, unsigned int *data_sg_offset, 2862 struct scatterlist *meta_sg, int meta_sg_nents, 2863 unsigned int *meta_sg_offset) 2864 { 2865 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2866 struct mlx5_ib_mr *pi_mr = mr->klm_mr; 2867 int n; 2868 2869 pi_mr->mmkey.ndescs = 0; 2870 pi_mr->meta_ndescs = 0; 2871 pi_mr->meta_length = 0; 2872 2873 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2874 pi_mr->desc_size * pi_mr->max_descs, 2875 DMA_TO_DEVICE); 2876 2877 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, 2878 meta_sg, meta_sg_nents, meta_sg_offset); 2879 2880 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2881 pi_mr->desc_size * pi_mr->max_descs, 2882 DMA_TO_DEVICE); 2883 2884 /* This is zero-based memory region */ 2885 pi_mr->data_iova = 0; 2886 pi_mr->ibmr.iova = 0; 2887 pi_mr->pi_iova = pi_mr->data_length; 2888 ibmr->length = pi_mr->ibmr.length; 2889 2890 return n; 2891 } 2892 2893 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2894 int data_sg_nents, unsigned int *data_sg_offset, 2895 struct scatterlist *meta_sg, int meta_sg_nents, 2896 unsigned int *meta_sg_offset) 2897 { 2898 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2899 struct mlx5_ib_mr *pi_mr = NULL; 2900 int n; 2901 2902 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); 2903 2904 mr->mmkey.ndescs = 0; 2905 mr->data_length = 0; 2906 mr->data_iova = 0; 2907 mr->meta_ndescs = 0; 2908 mr->pi_iova = 0; 2909 /* 2910 * As a performance optimization, if possible, there is no need to 2911 * perform UMR operation to register the data/metadata buffers. 2912 * First try to map the sg lists to PA descriptors with local_dma_lkey. 2913 * Fallback to UMR only in case of a failure. 2914 */ 2915 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2916 data_sg_offset, meta_sg, meta_sg_nents, 2917 meta_sg_offset); 2918 if (n == data_sg_nents + meta_sg_nents) 2919 goto out; 2920 /* 2921 * As a performance optimization, if possible, there is no need to map 2922 * the sg lists to KLM descriptors. First try to map the sg lists to MTT 2923 * descriptors and fallback to KLM only in case of a failure. 2924 * It's more efficient for the HW to work with MTT descriptors 2925 * (especially in high load). 2926 * Use KLM (indirect access) only if it's mandatory. 2927 */ 2928 pi_mr = mr->mtt_mr; 2929 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2930 data_sg_offset, meta_sg, meta_sg_nents, 2931 meta_sg_offset); 2932 if (n == data_sg_nents + meta_sg_nents) 2933 goto out; 2934 2935 pi_mr = mr->klm_mr; 2936 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2937 data_sg_offset, meta_sg, meta_sg_nents, 2938 meta_sg_offset); 2939 if (unlikely(n != data_sg_nents + meta_sg_nents)) 2940 return -ENOMEM; 2941 2942 out: 2943 /* This is zero-based memory region */ 2944 ibmr->iova = 0; 2945 mr->pi_mr = pi_mr; 2946 if (pi_mr) 2947 ibmr->sig_attrs->meta_length = pi_mr->meta_length; 2948 else 2949 ibmr->sig_attrs->meta_length = mr->meta_length; 2950 2951 return 0; 2952 } 2953 2954 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 2955 unsigned int *sg_offset) 2956 { 2957 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2958 int n; 2959 2960 mr->mmkey.ndescs = 0; 2961 2962 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 2963 mr->desc_size * mr->max_descs, 2964 DMA_TO_DEVICE); 2965 2966 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 2967 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, 2968 NULL); 2969 else 2970 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 2971 mlx5_set_page); 2972 2973 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 2974 mr->desc_size * mr->max_descs, 2975 DMA_TO_DEVICE); 2976 2977 return n; 2978 } 2979