1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/vhost_types.h> 5 #include <linux/vdpa.h> 6 #include <linux/gcd.h> 7 #include <linux/string.h> 8 #include <linux/mlx5/qp.h> 9 #include "mlx5_vdpa.h" 10 11 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */ 12 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \ 13 ({ \ 14 u64 __s = _s; \ 15 u64 _res; \ 16 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \ 17 _res; \ 18 }) 19 20 static int get_octo_len(u64 len, int page_shift) 21 { 22 u64 page_size = 1ULL << page_shift; 23 int npages; 24 25 npages = ALIGN(len, page_size) >> page_shift; 26 return (npages + 1) / 2; 27 } 28 29 static void mlx5_set_access_mode(void *mkc, int mode) 30 { 31 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 32 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2); 33 } 34 35 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) 36 { 37 struct scatterlist *sg; 38 int nsg = mr->nsg; 39 u64 dma_addr; 40 u64 dma_len; 41 int j = 0; 42 int i; 43 44 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) { 45 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg); 46 nsg && dma_len; 47 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size)) 48 mtt[j++] = cpu_to_be64(dma_addr); 49 } 50 } 51 52 struct mlx5_create_mkey_mem { 53 u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; 54 u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; 55 __be64 mtt[]; 56 }; 57 58 struct mlx5_destroy_mkey_mem { 59 u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)]; 60 u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)]; 61 }; 62 63 static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, 64 struct mlx5_vdpa_direct_mr *mr, 65 struct mlx5_create_mkey_mem *mem) 66 { 67 void *in = &mem->in; 68 void *mkc; 69 70 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 71 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 72 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); 73 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); 74 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT); 75 MLX5_SET(mkc, mkc, qpn, 0xffffff); 76 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 77 MLX5_SET64(mkc, mkc, start_addr, mr->offset); 78 MLX5_SET64(mkc, mkc, len, mr->end - mr->start); 79 MLX5_SET(mkc, mkc, log_page_size, mr->log_size); 80 MLX5_SET(mkc, mkc, translations_octword_size, 81 get_octo_len(mr->end - mr->start, mr->log_size)); 82 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 83 get_octo_len(mr->end - mr->start, mr->log_size)); 84 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); 85 86 MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); 87 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 88 } 89 90 static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, 91 struct mlx5_vdpa_direct_mr *mr, 92 struct mlx5_create_mkey_mem *mem) 93 { 94 u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); 95 96 mr->mr = mlx5_idx_to_mkey(mkey_index); 97 } 98 99 static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, 100 struct mlx5_vdpa_direct_mr *mr, 101 struct mlx5_destroy_mkey_mem *mem) 102 { 103 void *in = &mem->in; 104 105 MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); 106 MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); 107 MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr)); 108 } 109 110 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 111 { 112 if (!mr->mr) 113 return; 114 115 mlx5_vdpa_destroy_mkey(mvdev, mr->mr); 116 } 117 118 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 119 { 120 return max_t(u64, map->start, mr->start); 121 } 122 123 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 124 { 125 return min_t(u64, map->last + 1, mr->end); 126 } 127 128 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 129 { 130 return map_end(map, mr) - map_start(map, mr); 131 } 132 133 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1) 134 #define MLX5_VDPA_INVALID_LEN ((u64)-1) 135 136 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey) 137 { 138 struct mlx5_vdpa_direct_mr *s; 139 140 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 141 if (!s) 142 return MLX5_VDPA_INVALID_START_ADDR; 143 144 return s->start; 145 } 146 147 static u64 indir_len(struct mlx5_vdpa_mr *mkey) 148 { 149 struct mlx5_vdpa_direct_mr *s; 150 struct mlx5_vdpa_direct_mr *e; 151 152 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 153 if (!s) 154 return MLX5_VDPA_INVALID_LEN; 155 156 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list); 157 158 return e->end - s->start; 159 } 160 161 #define LOG_MAX_KLM_SIZE 30 162 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE) 163 164 static u32 klm_bcount(u64 size) 165 { 166 return (u32)size; 167 } 168 169 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in) 170 { 171 struct mlx5_vdpa_direct_mr *dmr; 172 struct mlx5_klm *klmarr; 173 struct mlx5_klm *klm; 174 bool first = true; 175 u64 preve; 176 int i; 177 178 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 179 i = 0; 180 list_for_each_entry(dmr, &mkey->head, list) { 181 again: 182 klm = &klmarr[i++]; 183 if (first) { 184 preve = dmr->start; 185 first = false; 186 } 187 188 if (preve == dmr->start) { 189 klm->key = cpu_to_be32(dmr->mr); 190 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); 191 preve = dmr->end; 192 } else { 193 klm->key = cpu_to_be32(mvdev->res.null_mkey); 194 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); 195 preve = dmr->start; 196 goto again; 197 } 198 } 199 } 200 201 static int klm_byte_size(int nklms) 202 { 203 return 16 * ALIGN(nklms, 4); 204 } 205 206 #define MLX5_VDPA_MTT_ALIGN 16 207 208 static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 209 { 210 struct mlx5_vdpa_async_cmd *cmds; 211 struct mlx5_vdpa_direct_mr *dmr; 212 int err = 0; 213 int i = 0; 214 215 cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); 216 if (!cmds) 217 return -ENOMEM; 218 219 list_for_each_entry(dmr, &mr->head, list) { 220 struct mlx5_create_mkey_mem *cmd_mem; 221 int mttlen, mttcount; 222 223 mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN); 224 mttcount = mttlen / sizeof(cmd_mem->mtt[0]); 225 cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); 226 if (!cmd_mem) { 227 err = -ENOMEM; 228 goto done; 229 } 230 231 cmds[i].out = cmd_mem->out; 232 cmds[i].outlen = sizeof(cmd_mem->out); 233 cmds[i].in = cmd_mem->in; 234 cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); 235 236 fill_create_direct_mr(mvdev, dmr, cmd_mem); 237 238 i++; 239 } 240 241 err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); 242 if (err) { 243 244 mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); 245 goto done; 246 } 247 248 i = 0; 249 list_for_each_entry(dmr, &mr->head, list) { 250 struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; 251 struct mlx5_create_mkey_mem *cmd_mem; 252 253 cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); 254 255 if (!cmd->err) { 256 create_direct_mr_end(mvdev, dmr, cmd_mem); 257 } else { 258 err = err ? err : cmd->err; 259 mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", 260 dmr->start, dmr->end, cmd->err); 261 } 262 } 263 264 done: 265 for (i = i-1; i >= 0; i--) { 266 struct mlx5_create_mkey_mem *cmd_mem; 267 268 cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); 269 kvfree(cmd_mem); 270 } 271 272 kvfree(cmds); 273 return err; 274 } 275 276 DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T)) 277 DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T)) 278 279 static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 280 { 281 struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL; 282 struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL; 283 struct mlx5_vdpa_direct_mr *dmr; 284 int err = 0; 285 int i = 0; 286 287 cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); 288 cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL); 289 if (!cmds || !cmd_mem) 290 return -ENOMEM; 291 292 list_for_each_entry(dmr, &mr->head, list) { 293 cmds[i].out = cmd_mem[i].out; 294 cmds[i].outlen = sizeof(cmd_mem[i].out); 295 cmds[i].in = cmd_mem[i].in; 296 cmds[i].inlen = sizeof(cmd_mem[i].in); 297 fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]); 298 i++; 299 } 300 301 err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); 302 if (err) { 303 304 mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err); 305 return err; 306 } 307 308 i = 0; 309 list_for_each_entry(dmr, &mr->head, list) { 310 struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; 311 312 dmr->mr = 0; 313 if (cmd->err) { 314 err = err ? err : cmd->err; 315 mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n", 316 dmr->start, dmr->end, cmd->err); 317 } 318 } 319 320 return err; 321 } 322 323 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 324 { 325 int inlen; 326 void *mkc; 327 void *in; 328 int err; 329 u64 start; 330 u64 len; 331 332 start = indir_start_addr(mr); 333 len = indir_len(mr); 334 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN) 335 return -EINVAL; 336 337 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms); 338 in = kzalloc(inlen, GFP_KERNEL); 339 if (!in) 340 return -ENOMEM; 341 342 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 343 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 344 MLX5_SET(mkc, mkc, lw, 1); 345 MLX5_SET(mkc, mkc, lr, 1); 346 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS); 347 MLX5_SET(mkc, mkc, qpn, 0xffffff); 348 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 349 MLX5_SET64(mkc, mkc, start_addr, start); 350 MLX5_SET64(mkc, mkc, len, len); 351 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16); 352 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms); 353 fill_indir(mvdev, mr, in); 354 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 355 kfree(in); 356 return err; 357 } 358 359 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) 360 { 361 mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey); 362 } 363 364 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, 365 struct vhost_iotlb *iotlb) 366 { 367 struct vhost_iotlb_map *map; 368 unsigned long lgcd = 0; 369 int log_entity_size; 370 unsigned long size; 371 u64 start = 0; 372 int err; 373 struct page *pg; 374 unsigned int nsg; 375 int sglen; 376 u64 pa, offset; 377 u64 paend; 378 struct scatterlist *sg; 379 struct device *dma = mvdev->vdev.dma_dev; 380 381 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 382 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { 383 size = maplen(map, mr); 384 lgcd = gcd(lgcd, size); 385 start += size; 386 } 387 log_entity_size = ilog2(lgcd); 388 389 sglen = 1 << log_entity_size; 390 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size); 391 392 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL); 393 if (err) 394 return err; 395 396 sg = mr->sg_head.sgl; 397 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 398 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { 399 offset = mr->start > map->start ? mr->start - map->start : 0; 400 pa = map->addr + offset; 401 paend = map->addr + offset + maplen(map, mr); 402 for (; pa < paend; pa += sglen) { 403 pg = pfn_to_page(__phys_to_pfn(pa)); 404 if (!sg) { 405 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", 406 map->start, map->last + 1); 407 err = -ENOMEM; 408 goto err_map; 409 } 410 sg_set_page(sg, pg, sglen, 0); 411 sg = sg_next(sg); 412 if (!sg) 413 goto done; 414 } 415 } 416 done: 417 mr->log_size = log_entity_size; 418 mr->nsg = nsg; 419 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 420 if (!mr->nent) { 421 err = -ENOMEM; 422 goto err_map; 423 } 424 425 return 0; 426 427 err_map: 428 sg_free_table(&mr->sg_head); 429 return err; 430 } 431 432 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 433 { 434 struct device *dma = mvdev->vdev.dma_dev; 435 436 destroy_direct_mr(mvdev, mr); 437 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 438 sg_free_table(&mr->sg_head); 439 } 440 441 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, 442 struct mlx5_vdpa_mr *mr, 443 u64 start, 444 u64 size, 445 u8 perm, 446 struct vhost_iotlb *iotlb) 447 { 448 struct mlx5_vdpa_direct_mr *dmr; 449 struct mlx5_vdpa_direct_mr *n; 450 LIST_HEAD(tmp); 451 u64 st; 452 u64 sz; 453 int err; 454 455 st = start; 456 while (size) { 457 sz = (u32)min_t(u64, MAX_KLM_SIZE, size); 458 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); 459 if (!dmr) { 460 err = -ENOMEM; 461 goto err_alloc; 462 } 463 464 dmr->start = st; 465 dmr->end = st + sz; 466 dmr->perm = perm; 467 err = map_direct_mr(mvdev, dmr, iotlb); 468 if (err) { 469 kfree(dmr); 470 goto err_alloc; 471 } 472 473 list_add_tail(&dmr->list, &tmp); 474 size -= sz; 475 mr->num_directs++; 476 mr->num_klms++; 477 st += sz; 478 } 479 list_splice_tail(&tmp, &mr->head); 480 return 0; 481 482 err_alloc: 483 list_for_each_entry_safe(dmr, n, &mr->head, list) { 484 list_del_init(&dmr->list); 485 unmap_direct_mr(mvdev, dmr); 486 kfree(dmr); 487 } 488 return err; 489 } 490 491 /* The iotlb pointer contains a list of maps. Go over the maps, possibly 492 * merging mergeable maps, and create direct memory keys that provide the 493 * device access to memory. The direct mkeys are then referred to by the 494 * indirect memory key that provides access to the enitre address space given 495 * by iotlb. 496 */ 497 static int create_user_mr(struct mlx5_vdpa_dev *mvdev, 498 struct mlx5_vdpa_mr *mr, 499 struct vhost_iotlb *iotlb) 500 { 501 struct mlx5_vdpa_direct_mr *dmr; 502 struct mlx5_vdpa_direct_mr *n; 503 struct vhost_iotlb_map *map; 504 u32 pperm = U16_MAX; 505 u64 last = U64_MAX; 506 u64 ps = U64_MAX; 507 u64 pe = U64_MAX; 508 u64 start = 0; 509 int err = 0; 510 int nnuls; 511 512 INIT_LIST_HEAD(&mr->head); 513 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 514 map = vhost_iotlb_itree_next(map, start, last)) { 515 start = map->start; 516 if (pe == map->start && pperm == map->perm) { 517 pe = map->last + 1; 518 } else { 519 if (ps != U64_MAX) { 520 if (pe < map->start) { 521 /* We have a hole in the map. Check how 522 * many null keys are required to fill it. 523 */ 524 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe, 525 LOG_MAX_KLM_SIZE); 526 mr->num_klms += nnuls; 527 } 528 err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb); 529 if (err) 530 goto err_chain; 531 } 532 ps = map->start; 533 pe = map->last + 1; 534 pperm = map->perm; 535 } 536 } 537 err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb); 538 if (err) 539 goto err_chain; 540 541 err = create_direct_keys(mvdev, mr); 542 if (err) 543 goto err_chain; 544 545 /* Create the memory key that defines the guests's address space. This 546 * memory key refers to the direct keys that contain the MTT 547 * translations 548 */ 549 err = create_indirect_key(mvdev, mr); 550 if (err) 551 goto err_chain; 552 553 mr->user_mr = true; 554 return 0; 555 556 err_chain: 557 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 558 list_del_init(&dmr->list); 559 unmap_direct_mr(mvdev, dmr); 560 kfree(dmr); 561 } 562 return err; 563 } 564 565 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 566 { 567 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 568 void *mkc; 569 u32 *in; 570 int err; 571 572 in = kzalloc(inlen, GFP_KERNEL); 573 if (!in) 574 return -ENOMEM; 575 576 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 577 578 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 579 MLX5_SET(mkc, mkc, length64, 1); 580 MLX5_SET(mkc, mkc, lw, 1); 581 MLX5_SET(mkc, mkc, lr, 1); 582 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 583 MLX5_SET(mkc, mkc, qpn, 0xffffff); 584 585 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 586 if (!err) 587 mr->user_mr = false; 588 589 kfree(in); 590 return err; 591 } 592 593 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 594 { 595 mlx5_vdpa_destroy_mkey(mvdev, mr->mkey); 596 } 597 598 static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src) 599 { 600 struct vhost_iotlb_map *map; 601 u64 start = 0, last = ULLONG_MAX; 602 int err; 603 604 if (dst == src) 605 return -EINVAL; 606 607 if (!src) { 608 err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW); 609 return err; 610 } 611 612 for (map = vhost_iotlb_itree_first(src, start, last); map; 613 map = vhost_iotlb_itree_next(map, start, last)) { 614 err = vhost_iotlb_add_range(dst, map->start, map->last, 615 map->addr, map->perm); 616 if (err) 617 return err; 618 } 619 return 0; 620 } 621 622 static void prune_iotlb(struct vhost_iotlb *iotlb) 623 { 624 vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX); 625 } 626 627 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 628 { 629 struct mlx5_vdpa_direct_mr *dmr; 630 struct mlx5_vdpa_direct_mr *n; 631 632 destroy_indirect_key(mvdev, mr); 633 destroy_direct_keys(mvdev, mr); 634 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 635 list_del_init(&dmr->list); 636 unmap_direct_mr(mvdev, dmr); 637 kfree(dmr); 638 } 639 } 640 641 static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 642 { 643 if (WARN_ON(!mr)) 644 return; 645 646 if (mr->user_mr) 647 destroy_user_mr(mvdev, mr); 648 else 649 destroy_dma_mr(mvdev, mr); 650 651 vhost_iotlb_free(mr->iotlb); 652 653 list_del(&mr->mr_list); 654 655 kfree(mr); 656 } 657 658 /* There can be multiple .set_map() operations in quick succession. 659 * This large delay is a simple way to prevent the MR cleanup from blocking 660 * .set_map() MR creation in this scenario. 661 */ 662 #define MLX5_VDPA_MR_GC_TRIGGER_MS 2000 663 664 static void mlx5_vdpa_mr_gc_handler(struct work_struct *work) 665 { 666 struct mlx5_vdpa_mr_resources *mres; 667 struct mlx5_vdpa_mr *mr, *tmp; 668 struct mlx5_vdpa_dev *mvdev; 669 670 mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work); 671 672 if (atomic_read(&mres->shutdown)) { 673 mutex_lock(&mres->lock); 674 } else if (!mutex_trylock(&mres->lock)) { 675 queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, 676 msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); 677 return; 678 } 679 680 mvdev = container_of(mres, struct mlx5_vdpa_dev, mres); 681 682 list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) { 683 _mlx5_vdpa_destroy_mr(mvdev, mr); 684 } 685 686 mutex_unlock(&mres->lock); 687 } 688 689 static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, 690 struct mlx5_vdpa_mr *mr) 691 { 692 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; 693 694 if (!mr) 695 return; 696 697 if (refcount_dec_and_test(&mr->refcount)) { 698 list_move_tail(&mr->mr_list, &mres->mr_gc_list_head); 699 queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, 700 msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); 701 } 702 } 703 704 void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, 705 struct mlx5_vdpa_mr *mr) 706 { 707 mutex_lock(&mvdev->mres.lock); 708 _mlx5_vdpa_put_mr(mvdev, mr); 709 mutex_unlock(&mvdev->mres.lock); 710 } 711 712 static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, 713 struct mlx5_vdpa_mr *mr) 714 { 715 if (!mr) 716 return; 717 718 refcount_inc(&mr->refcount); 719 } 720 721 void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, 722 struct mlx5_vdpa_mr *mr) 723 { 724 mutex_lock(&mvdev->mres.lock); 725 _mlx5_vdpa_get_mr(mvdev, mr); 726 mutex_unlock(&mvdev->mres.lock); 727 } 728 729 void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, 730 struct mlx5_vdpa_mr *new_mr, 731 unsigned int asid) 732 { 733 struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid]; 734 735 mutex_lock(&mvdev->mres.lock); 736 737 _mlx5_vdpa_put_mr(mvdev, old_mr); 738 mvdev->mres.mr[asid] = new_mr; 739 740 mutex_unlock(&mvdev->mres.lock); 741 } 742 743 static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev) 744 { 745 struct mlx5_vdpa_mr *mr; 746 747 mutex_lock(&mvdev->mres.lock); 748 749 list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) { 750 751 mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: " 752 "mr: %p, mkey: 0x%x, refcount: %u\n", 753 mr, mr->mkey, refcount_read(&mr->refcount)); 754 } 755 756 mutex_unlock(&mvdev->mres.lock); 757 758 } 759 760 void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev) 761 { 762 if (!mvdev->res.valid) 763 return; 764 765 for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) 766 mlx5_vdpa_update_mr(mvdev, NULL, i); 767 768 prune_iotlb(mvdev->cvq.iotlb); 769 770 mlx5_vdpa_show_mr_leaks(mvdev); 771 } 772 773 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, 774 struct mlx5_vdpa_mr *mr, 775 struct vhost_iotlb *iotlb) 776 { 777 int err; 778 779 if (iotlb) 780 err = create_user_mr(mvdev, mr, iotlb); 781 else 782 err = create_dma_mr(mvdev, mr); 783 784 if (err) 785 return err; 786 787 mr->iotlb = vhost_iotlb_alloc(0, 0); 788 if (!mr->iotlb) { 789 err = -ENOMEM; 790 goto err_mr; 791 } 792 793 err = dup_iotlb(mr->iotlb, iotlb); 794 if (err) 795 goto err_iotlb; 796 797 list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head); 798 799 return 0; 800 801 err_iotlb: 802 vhost_iotlb_free(mr->iotlb); 803 804 err_mr: 805 if (iotlb) 806 destroy_user_mr(mvdev, mr); 807 else 808 destroy_dma_mr(mvdev, mr); 809 810 return err; 811 } 812 813 struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, 814 struct vhost_iotlb *iotlb) 815 { 816 struct mlx5_vdpa_mr *mr; 817 int err; 818 819 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 820 if (!mr) 821 return ERR_PTR(-ENOMEM); 822 823 mutex_lock(&mvdev->mres.lock); 824 err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb); 825 mutex_unlock(&mvdev->mres.lock); 826 827 if (err) 828 goto out_err; 829 830 refcount_set(&mr->refcount, 1); 831 832 return mr; 833 834 out_err: 835 kfree(mr); 836 return ERR_PTR(err); 837 } 838 839 int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, 840 struct vhost_iotlb *iotlb, 841 unsigned int asid) 842 { 843 int err; 844 845 if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid) 846 return 0; 847 848 spin_lock(&mvdev->cvq.iommu_lock); 849 850 prune_iotlb(mvdev->cvq.iotlb); 851 err = dup_iotlb(mvdev->cvq.iotlb, iotlb); 852 853 spin_unlock(&mvdev->cvq.iommu_lock); 854 855 return err; 856 } 857 858 int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev) 859 { 860 struct mlx5_vdpa_mr *mr; 861 862 mr = mlx5_vdpa_create_mr(mvdev, NULL); 863 if (IS_ERR(mr)) 864 return PTR_ERR(mr); 865 866 mlx5_vdpa_update_mr(mvdev, mr, 0); 867 868 return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0); 869 } 870 871 int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) 872 { 873 if (asid >= MLX5_VDPA_NUM_AS) 874 return -EINVAL; 875 876 mlx5_vdpa_update_mr(mvdev, NULL, asid); 877 878 if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 879 if (mlx5_vdpa_create_dma_mr(mvdev)) 880 mlx5_vdpa_warn(mvdev, "create DMA MR failed\n"); 881 } else { 882 mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid); 883 } 884 885 return 0; 886 } 887 888 int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev) 889 { 890 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; 891 892 mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc"); 893 if (!mres->wq_gc) 894 return -ENOMEM; 895 896 INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler); 897 898 mutex_init(&mres->lock); 899 900 INIT_LIST_HEAD(&mres->mr_list_head); 901 INIT_LIST_HEAD(&mres->mr_gc_list_head); 902 903 return 0; 904 } 905 906 void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) 907 { 908 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; 909 910 atomic_set(&mres->shutdown, 1); 911 912 flush_delayed_work(&mres->gc_dwork_ent); 913 destroy_workqueue(mres->wq_gc); 914 mres->wq_gc = NULL; 915 mutex_destroy(&mres->lock); 916 } 917