1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. 4 */ 5 6 #include <linux/vmalloc.h> 7 8 #include <rdma/ib_addr.h> 9 #include <rdma/ib_umem.h> 10 #include <rdma/ib_user_verbs.h> 11 #include <rdma/ib_verbs.h> 12 #include <rdma/uverbs_ioctl.h> 13 14 #include "efa.h" 15 16 #define EFA_MMAP_FLAG_SHIFT 56 17 #define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0) 18 #define EFA_MMAP_INVALID U64_MAX 19 20 enum { 21 EFA_MMAP_DMA_PAGE = 0, 22 EFA_MMAP_IO_WC, 23 EFA_MMAP_IO_NC, 24 }; 25 26 #define EFA_AENQ_ENABLED_GROUPS \ 27 (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ 28 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) 29 30 struct efa_mmap_entry { 31 void *obj; 32 u64 address; 33 u64 length; 34 u32 mmap_page; 35 u8 mmap_flag; 36 }; 37 38 static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) 39 { 40 return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) | 41 ((u64)efa->mmap_page << PAGE_SHIFT); 42 } 43 44 #define EFA_CHUNK_PAYLOAD_SHIFT 12 45 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) 46 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 47 48 #define EFA_CHUNK_SHIFT 12 49 #define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT) 50 #define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info) 51 52 #define EFA_PTRS_PER_CHUNK \ 53 ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE) 54 55 #define EFA_CHUNK_USED_SIZE \ 56 ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) 57 58 #define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE 59 60 struct pbl_chunk { 61 dma_addr_t dma_addr; 62 u64 *buf; 63 u32 length; 64 }; 65 66 struct pbl_chunk_list { 67 struct pbl_chunk *chunks; 68 unsigned int size; 69 }; 70 71 struct pbl_context { 72 union { 73 struct { 74 dma_addr_t dma_addr; 75 } continuous; 76 struct { 77 u32 pbl_buf_size_in_pages; 78 struct scatterlist *sgl; 79 int sg_dma_cnt; 80 struct pbl_chunk_list chunk_list; 81 } indirect; 82 } phys; 83 u64 *pbl_buf; 84 u32 pbl_buf_size_in_bytes; 85 u8 physically_continuous; 86 }; 87 88 static inline struct efa_dev *to_edev(struct ib_device *ibdev) 89 { 90 return container_of(ibdev, struct efa_dev, ibdev); 91 } 92 93 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext) 94 { 95 return container_of(ibucontext, struct efa_ucontext, ibucontext); 96 } 97 98 static inline struct efa_pd *to_epd(struct ib_pd *ibpd) 99 { 100 return container_of(ibpd, struct efa_pd, ibpd); 101 } 102 103 static inline struct efa_mr *to_emr(struct ib_mr *ibmr) 104 { 105 return container_of(ibmr, struct efa_mr, ibmr); 106 } 107 108 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp) 109 { 110 return container_of(ibqp, struct efa_qp, ibqp); 111 } 112 113 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq) 114 { 115 return container_of(ibcq, struct efa_cq, ibcq); 116 } 117 118 static inline struct efa_ah *to_eah(struct ib_ah *ibah) 119 { 120 return container_of(ibah, struct efa_ah, ibah); 121 } 122 123 #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ 124 sizeof(((typeof(x) *)0)->fld) <= (sz)) 125 126 #define is_reserved_cleared(reserved) \ 127 !memchr_inv(reserved, 0, sizeof(reserved)) 128 129 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, 130 size_t size, enum dma_data_direction dir) 131 { 132 void *addr; 133 134 addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 135 if (!addr) 136 return NULL; 137 138 *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir); 139 if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) { 140 ibdev_err(&dev->ibdev, "Failed to map DMA address\n"); 141 free_pages_exact(addr, size); 142 return NULL; 143 } 144 145 return addr; 146 } 147 148 /* 149 * This is only called when the ucontext is destroyed and there can be no 150 * concurrent query via mmap or allocate on the xarray, thus we can be sure no 151 * other thread is using the entry pointer. We also know that all the BAR 152 * pages have either been zap'd or munmaped at this point. Normal pages are 153 * refcounted and will be freed at the proper time. 154 */ 155 static void mmap_entries_remove_free(struct efa_dev *dev, 156 struct efa_ucontext *ucontext) 157 { 158 struct efa_mmap_entry *entry; 159 unsigned long mmap_page; 160 161 xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { 162 xa_erase(&ucontext->mmap_xa, mmap_page); 163 164 ibdev_dbg( 165 &dev->ibdev, 166 "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", 167 entry->obj, get_mmap_key(entry), entry->address, 168 entry->length); 169 if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) 170 /* DMA mapping is already gone, now free the pages */ 171 free_pages_exact(phys_to_virt(entry->address), 172 entry->length); 173 kfree(entry); 174 } 175 } 176 177 static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev, 178 struct efa_ucontext *ucontext, 179 u64 key, u64 len) 180 { 181 struct efa_mmap_entry *entry; 182 u64 mmap_page; 183 184 mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT; 185 if (mmap_page > U32_MAX) 186 return NULL; 187 188 entry = xa_load(&ucontext->mmap_xa, mmap_page); 189 if (!entry || get_mmap_key(entry) != key || entry->length != len) 190 return NULL; 191 192 ibdev_dbg(&dev->ibdev, 193 "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", 194 entry->obj, key, entry->address, entry->length); 195 196 return entry; 197 } 198 199 /* 200 * Note this locking scheme cannot support removal of entries, except during 201 * ucontext destruction when the core code guarentees no concurrency. 202 */ 203 static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext, 204 void *obj, u64 address, u64 length, u8 mmap_flag) 205 { 206 struct efa_mmap_entry *entry; 207 int err; 208 209 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 210 if (!entry) 211 return EFA_MMAP_INVALID; 212 213 entry->obj = obj; 214 entry->address = address; 215 entry->length = length; 216 entry->mmap_flag = mmap_flag; 217 218 xa_lock(&ucontext->mmap_xa); 219 entry->mmap_page = ucontext->mmap_xa_page; 220 ucontext->mmap_xa_page += DIV_ROUND_UP(length, PAGE_SIZE); 221 err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, 222 GFP_KERNEL); 223 xa_unlock(&ucontext->mmap_xa); 224 if (err){ 225 kfree(entry); 226 return EFA_MMAP_INVALID; 227 } 228 229 ibdev_dbg( 230 &dev->ibdev, 231 "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", 232 entry->obj, entry->address, entry->length, get_mmap_key(entry)); 233 234 return get_mmap_key(entry); 235 } 236 237 int efa_query_device(struct ib_device *ibdev, 238 struct ib_device_attr *props, 239 struct ib_udata *udata) 240 { 241 struct efa_com_get_device_attr_result *dev_attr; 242 struct efa_ibv_ex_query_device_resp resp = {}; 243 struct efa_dev *dev = to_edev(ibdev); 244 int err; 245 246 if (udata && udata->inlen && 247 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 248 ibdev_dbg(ibdev, 249 "Incompatible ABI params, udata not cleared\n"); 250 return -EINVAL; 251 } 252 253 dev_attr = &dev->dev_attr; 254 255 memset(props, 0, sizeof(*props)); 256 props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE; 257 props->page_size_cap = dev_attr->page_size_cap; 258 props->vendor_id = dev->pdev->vendor; 259 props->vendor_part_id = dev->pdev->device; 260 props->hw_ver = dev->pdev->subsystem_device; 261 props->max_qp = dev_attr->max_qp; 262 props->max_cq = dev_attr->max_cq; 263 props->max_pd = dev_attr->max_pd; 264 props->max_mr = dev_attr->max_mr; 265 props->max_ah = dev_attr->max_ah; 266 props->max_cqe = dev_attr->max_cq_depth; 267 props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth, 268 dev_attr->max_rq_depth); 269 props->max_send_sge = dev_attr->max_sq_sge; 270 props->max_recv_sge = dev_attr->max_rq_sge; 271 272 if (udata && udata->outlen) { 273 resp.max_sq_sge = dev_attr->max_sq_sge; 274 resp.max_rq_sge = dev_attr->max_rq_sge; 275 resp.max_sq_wr = dev_attr->max_sq_depth; 276 resp.max_rq_wr = dev_attr->max_rq_depth; 277 278 err = ib_copy_to_udata(udata, &resp, 279 min(sizeof(resp), udata->outlen)); 280 if (err) { 281 ibdev_dbg(ibdev, 282 "Failed to copy udata for query_device\n"); 283 return err; 284 } 285 } 286 287 return 0; 288 } 289 290 int efa_query_port(struct ib_device *ibdev, u8 port, 291 struct ib_port_attr *props) 292 { 293 struct efa_dev *dev = to_edev(ibdev); 294 295 props->lmc = 1; 296 297 props->state = IB_PORT_ACTIVE; 298 props->phys_state = 5; 299 props->gid_tbl_len = 1; 300 props->pkey_tbl_len = 1; 301 props->active_speed = IB_SPEED_EDR; 302 props->active_width = IB_WIDTH_4X; 303 props->max_mtu = ib_mtu_int_to_enum(dev->mtu); 304 props->active_mtu = ib_mtu_int_to_enum(dev->mtu); 305 props->max_msg_sz = dev->mtu; 306 props->max_vl_num = 1; 307 308 return 0; 309 } 310 311 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 312 int qp_attr_mask, 313 struct ib_qp_init_attr *qp_init_attr) 314 { 315 struct efa_dev *dev = to_edev(ibqp->device); 316 struct efa_com_query_qp_params params = {}; 317 struct efa_com_query_qp_result result; 318 struct efa_qp *qp = to_eqp(ibqp); 319 int err; 320 321 #define EFA_QUERY_QP_SUPP_MASK \ 322 (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ 323 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP) 324 325 if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { 326 ibdev_dbg(&dev->ibdev, 327 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 328 qp_attr_mask, EFA_QUERY_QP_SUPP_MASK); 329 return -EOPNOTSUPP; 330 } 331 332 memset(qp_attr, 0, sizeof(*qp_attr)); 333 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 334 335 params.qp_handle = qp->qp_handle; 336 err = efa_com_query_qp(&dev->edev, ¶ms, &result); 337 if (err) 338 return err; 339 340 qp_attr->qp_state = result.qp_state; 341 qp_attr->qkey = result.qkey; 342 qp_attr->sq_psn = result.sq_psn; 343 qp_attr->sq_draining = result.sq_draining; 344 qp_attr->port_num = 1; 345 346 qp_attr->cap.max_send_wr = qp->max_send_wr; 347 qp_attr->cap.max_recv_wr = qp->max_recv_wr; 348 qp_attr->cap.max_send_sge = qp->max_send_sge; 349 qp_attr->cap.max_recv_sge = qp->max_recv_sge; 350 qp_attr->cap.max_inline_data = qp->max_inline_data; 351 352 qp_init_attr->qp_type = ibqp->qp_type; 353 qp_init_attr->recv_cq = ibqp->recv_cq; 354 qp_init_attr->send_cq = ibqp->send_cq; 355 qp_init_attr->qp_context = ibqp->qp_context; 356 qp_init_attr->cap = qp_attr->cap; 357 358 return 0; 359 } 360 361 int efa_query_gid(struct ib_device *ibdev, u8 port, int index, 362 union ib_gid *gid) 363 { 364 struct efa_dev *dev = to_edev(ibdev); 365 366 memcpy(gid->raw, dev->addr, sizeof(dev->addr)); 367 368 return 0; 369 } 370 371 int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 372 u16 *pkey) 373 { 374 if (index > 0) 375 return -EINVAL; 376 377 *pkey = 0xffff; 378 return 0; 379 } 380 381 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn) 382 { 383 struct efa_com_dealloc_pd_params params = { 384 .pdn = pdn, 385 }; 386 387 return efa_com_dealloc_pd(&dev->edev, ¶ms); 388 } 389 390 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 391 { 392 struct efa_dev *dev = to_edev(ibpd->device); 393 struct efa_ibv_alloc_pd_resp resp = {}; 394 struct efa_com_alloc_pd_result result; 395 struct efa_pd *pd = to_epd(ibpd); 396 int err; 397 398 if (udata->inlen && 399 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 400 ibdev_dbg(&dev->ibdev, 401 "Incompatible ABI params, udata not cleared\n"); 402 err = -EINVAL; 403 goto err_out; 404 } 405 406 err = efa_com_alloc_pd(&dev->edev, &result); 407 if (err) 408 goto err_out; 409 410 pd->pdn = result.pdn; 411 resp.pdn = result.pdn; 412 413 if (udata->outlen) { 414 err = ib_copy_to_udata(udata, &resp, 415 min(sizeof(resp), udata->outlen)); 416 if (err) { 417 ibdev_dbg(&dev->ibdev, 418 "Failed to copy udata for alloc_pd\n"); 419 goto err_dealloc_pd; 420 } 421 } 422 423 ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn); 424 425 return 0; 426 427 err_dealloc_pd: 428 efa_pd_dealloc(dev, result.pdn); 429 err_out: 430 atomic64_inc(&dev->stats.sw_stats.alloc_pd_err); 431 return err; 432 } 433 434 void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 435 { 436 struct efa_dev *dev = to_edev(ibpd->device); 437 struct efa_pd *pd = to_epd(ibpd); 438 439 if (udata->inlen && 440 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 441 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 442 return; 443 } 444 445 ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); 446 efa_pd_dealloc(dev, pd->pdn); 447 } 448 449 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) 450 { 451 struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle }; 452 453 return efa_com_destroy_qp(&dev->edev, ¶ms); 454 } 455 456 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) 457 { 458 struct efa_dev *dev = to_edev(ibqp->pd->device); 459 struct efa_qp *qp = to_eqp(ibqp); 460 int err; 461 462 if (udata->inlen && 463 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 464 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 465 return -EINVAL; 466 } 467 468 ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); 469 err = efa_destroy_qp_handle(dev, qp->qp_handle); 470 if (err) 471 return err; 472 473 if (qp->rq_cpu_addr) { 474 ibdev_dbg(&dev->ibdev, 475 "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", 476 qp->rq_cpu_addr, qp->rq_size, 477 &qp->rq_dma_addr); 478 dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, 479 DMA_TO_DEVICE); 480 } 481 482 kfree(qp); 483 return 0; 484 } 485 486 static int qp_mmap_entries_setup(struct efa_qp *qp, 487 struct efa_dev *dev, 488 struct efa_ucontext *ucontext, 489 struct efa_com_create_qp_params *params, 490 struct efa_ibv_create_qp_resp *resp) 491 { 492 /* 493 * Once an entry is inserted it might be mmapped, hence cannot be 494 * cleaned up until dealloc_ucontext. 495 */ 496 resp->sq_db_mmap_key = 497 mmap_entry_insert(dev, ucontext, qp, 498 dev->db_bar_addr + resp->sq_db_offset, 499 PAGE_SIZE, EFA_MMAP_IO_NC); 500 if (resp->sq_db_mmap_key == EFA_MMAP_INVALID) 501 return -ENOMEM; 502 503 resp->sq_db_offset &= ~PAGE_MASK; 504 505 resp->llq_desc_mmap_key = 506 mmap_entry_insert(dev, ucontext, qp, 507 dev->mem_bar_addr + resp->llq_desc_offset, 508 PAGE_ALIGN(params->sq_ring_size_in_bytes + 509 (resp->llq_desc_offset & ~PAGE_MASK)), 510 EFA_MMAP_IO_WC); 511 if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID) 512 return -ENOMEM; 513 514 resp->llq_desc_offset &= ~PAGE_MASK; 515 516 if (qp->rq_size) { 517 resp->rq_db_mmap_key = 518 mmap_entry_insert(dev, ucontext, qp, 519 dev->db_bar_addr + resp->rq_db_offset, 520 PAGE_SIZE, EFA_MMAP_IO_NC); 521 if (resp->rq_db_mmap_key == EFA_MMAP_INVALID) 522 return -ENOMEM; 523 524 resp->rq_db_offset &= ~PAGE_MASK; 525 526 resp->rq_mmap_key = 527 mmap_entry_insert(dev, ucontext, qp, 528 virt_to_phys(qp->rq_cpu_addr), 529 qp->rq_size, EFA_MMAP_DMA_PAGE); 530 if (resp->rq_mmap_key == EFA_MMAP_INVALID) 531 return -ENOMEM; 532 533 resp->rq_mmap_size = qp->rq_size; 534 } 535 536 return 0; 537 } 538 539 static int efa_qp_validate_cap(struct efa_dev *dev, 540 struct ib_qp_init_attr *init_attr) 541 { 542 if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) { 543 ibdev_dbg(&dev->ibdev, 544 "qp: requested send wr[%u] exceeds the max[%u]\n", 545 init_attr->cap.max_send_wr, 546 dev->dev_attr.max_sq_depth); 547 return -EINVAL; 548 } 549 if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) { 550 ibdev_dbg(&dev->ibdev, 551 "qp: requested receive wr[%u] exceeds the max[%u]\n", 552 init_attr->cap.max_recv_wr, 553 dev->dev_attr.max_rq_depth); 554 return -EINVAL; 555 } 556 if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) { 557 ibdev_dbg(&dev->ibdev, 558 "qp: requested sge send[%u] exceeds the max[%u]\n", 559 init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge); 560 return -EINVAL; 561 } 562 if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) { 563 ibdev_dbg(&dev->ibdev, 564 "qp: requested sge recv[%u] exceeds the max[%u]\n", 565 init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge); 566 return -EINVAL; 567 } 568 if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) { 569 ibdev_dbg(&dev->ibdev, 570 "qp: requested inline data[%u] exceeds the max[%u]\n", 571 init_attr->cap.max_inline_data, 572 dev->dev_attr.inline_buf_size); 573 return -EINVAL; 574 } 575 576 return 0; 577 } 578 579 static int efa_qp_validate_attr(struct efa_dev *dev, 580 struct ib_qp_init_attr *init_attr) 581 { 582 if (init_attr->qp_type != IB_QPT_DRIVER && 583 init_attr->qp_type != IB_QPT_UD) { 584 ibdev_dbg(&dev->ibdev, 585 "Unsupported qp type %d\n", init_attr->qp_type); 586 return -EOPNOTSUPP; 587 } 588 589 if (init_attr->srq) { 590 ibdev_dbg(&dev->ibdev, "SRQ is not supported\n"); 591 return -EOPNOTSUPP; 592 } 593 594 if (init_attr->create_flags) { 595 ibdev_dbg(&dev->ibdev, "Unsupported create flags\n"); 596 return -EOPNOTSUPP; 597 } 598 599 return 0; 600 } 601 602 struct ib_qp *efa_create_qp(struct ib_pd *ibpd, 603 struct ib_qp_init_attr *init_attr, 604 struct ib_udata *udata) 605 { 606 struct efa_com_create_qp_params create_qp_params = {}; 607 struct efa_com_create_qp_result create_qp_resp; 608 struct efa_dev *dev = to_edev(ibpd->device); 609 struct efa_ibv_create_qp_resp resp = {}; 610 struct efa_ibv_create_qp cmd = {}; 611 bool rq_entry_inserted = false; 612 struct efa_ucontext *ucontext; 613 struct efa_qp *qp; 614 int err; 615 616 ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, 617 ibucontext); 618 619 err = efa_qp_validate_cap(dev, init_attr); 620 if (err) 621 goto err_out; 622 623 err = efa_qp_validate_attr(dev, init_attr); 624 if (err) 625 goto err_out; 626 627 if (!field_avail(cmd, driver_qp_type, udata->inlen)) { 628 ibdev_dbg(&dev->ibdev, 629 "Incompatible ABI params, no input udata\n"); 630 err = -EINVAL; 631 goto err_out; 632 } 633 634 if (udata->inlen > sizeof(cmd) && 635 !ib_is_udata_cleared(udata, sizeof(cmd), 636 udata->inlen - sizeof(cmd))) { 637 ibdev_dbg(&dev->ibdev, 638 "Incompatible ABI params, unknown fields in udata\n"); 639 err = -EINVAL; 640 goto err_out; 641 } 642 643 err = ib_copy_from_udata(&cmd, udata, 644 min(sizeof(cmd), udata->inlen)); 645 if (err) { 646 ibdev_dbg(&dev->ibdev, 647 "Cannot copy udata for create_qp\n"); 648 goto err_out; 649 } 650 651 if (cmd.comp_mask) { 652 ibdev_dbg(&dev->ibdev, 653 "Incompatible ABI params, unknown fields in udata\n"); 654 err = -EINVAL; 655 goto err_out; 656 } 657 658 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 659 if (!qp) { 660 err = -ENOMEM; 661 goto err_out; 662 } 663 664 create_qp_params.uarn = ucontext->uarn; 665 create_qp_params.pd = to_epd(ibpd)->pdn; 666 667 if (init_attr->qp_type == IB_QPT_UD) { 668 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD; 669 } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) { 670 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD; 671 } else { 672 ibdev_dbg(&dev->ibdev, 673 "Unsupported qp type %d driver qp type %d\n", 674 init_attr->qp_type, cmd.driver_qp_type); 675 err = -EOPNOTSUPP; 676 goto err_free_qp; 677 } 678 679 ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n", 680 init_attr->qp_type, cmd.driver_qp_type); 681 create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx; 682 create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx; 683 create_qp_params.sq_depth = init_attr->cap.max_send_wr; 684 create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size; 685 686 create_qp_params.rq_depth = init_attr->cap.max_recv_wr; 687 create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size; 688 qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes); 689 if (qp->rq_size) { 690 qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr, 691 qp->rq_size, DMA_TO_DEVICE); 692 if (!qp->rq_cpu_addr) { 693 err = -ENOMEM; 694 goto err_free_qp; 695 } 696 697 ibdev_dbg(&dev->ibdev, 698 "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n", 699 qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); 700 create_qp_params.rq_base_addr = qp->rq_dma_addr; 701 } 702 703 err = efa_com_create_qp(&dev->edev, &create_qp_params, 704 &create_qp_resp); 705 if (err) 706 goto err_free_mapped; 707 708 resp.sq_db_offset = create_qp_resp.sq_db_offset; 709 resp.rq_db_offset = create_qp_resp.rq_db_offset; 710 resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset; 711 resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx; 712 resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx; 713 714 err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params, 715 &resp); 716 if (err) 717 goto err_destroy_qp; 718 719 rq_entry_inserted = true; 720 qp->qp_handle = create_qp_resp.qp_handle; 721 qp->ibqp.qp_num = create_qp_resp.qp_num; 722 qp->ibqp.qp_type = init_attr->qp_type; 723 qp->max_send_wr = init_attr->cap.max_send_wr; 724 qp->max_recv_wr = init_attr->cap.max_recv_wr; 725 qp->max_send_sge = init_attr->cap.max_send_sge; 726 qp->max_recv_sge = init_attr->cap.max_recv_sge; 727 qp->max_inline_data = init_attr->cap.max_inline_data; 728 729 if (udata->outlen) { 730 err = ib_copy_to_udata(udata, &resp, 731 min(sizeof(resp), udata->outlen)); 732 if (err) { 733 ibdev_dbg(&dev->ibdev, 734 "Failed to copy udata for qp[%u]\n", 735 create_qp_resp.qp_num); 736 goto err_destroy_qp; 737 } 738 } 739 740 ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num); 741 742 return &qp->ibqp; 743 744 err_destroy_qp: 745 efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); 746 err_free_mapped: 747 if (qp->rq_size) { 748 dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, 749 DMA_TO_DEVICE); 750 if (!rq_entry_inserted) 751 free_pages_exact(qp->rq_cpu_addr, qp->rq_size); 752 } 753 err_free_qp: 754 kfree(qp); 755 err_out: 756 atomic64_inc(&dev->stats.sw_stats.create_qp_err); 757 return ERR_PTR(err); 758 } 759 760 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, 761 struct ib_qp_attr *qp_attr, int qp_attr_mask, 762 enum ib_qp_state cur_state, 763 enum ib_qp_state new_state) 764 { 765 #define EFA_MODIFY_QP_SUPP_MASK \ 766 (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ 767 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN) 768 769 if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { 770 ibdev_dbg(&dev->ibdev, 771 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 772 qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK); 773 return -EOPNOTSUPP; 774 } 775 776 if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, 777 qp_attr_mask)) { 778 ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); 779 return -EINVAL; 780 } 781 782 if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) { 783 ibdev_dbg(&dev->ibdev, "Can't change port num\n"); 784 return -EOPNOTSUPP; 785 } 786 787 if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) { 788 ibdev_dbg(&dev->ibdev, "Can't change pkey index\n"); 789 return -EOPNOTSUPP; 790 } 791 792 return 0; 793 } 794 795 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 796 int qp_attr_mask, struct ib_udata *udata) 797 { 798 struct efa_dev *dev = to_edev(ibqp->device); 799 struct efa_com_modify_qp_params params = {}; 800 struct efa_qp *qp = to_eqp(ibqp); 801 enum ib_qp_state cur_state; 802 enum ib_qp_state new_state; 803 int err; 804 805 if (udata->inlen && 806 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 807 ibdev_dbg(&dev->ibdev, 808 "Incompatible ABI params, udata not cleared\n"); 809 return -EINVAL; 810 } 811 812 cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state : 813 qp->state; 814 new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state; 815 816 err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state, 817 new_state); 818 if (err) 819 return err; 820 821 params.qp_handle = qp->qp_handle; 822 823 if (qp_attr_mask & IB_QP_STATE) { 824 params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) | 825 BIT(EFA_ADMIN_CUR_QP_STATE_BIT); 826 params.cur_qp_state = qp_attr->cur_qp_state; 827 params.qp_state = qp_attr->qp_state; 828 } 829 830 if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { 831 params.modify_mask |= 832 BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT); 833 params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; 834 } 835 836 if (qp_attr_mask & IB_QP_QKEY) { 837 params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT); 838 params.qkey = qp_attr->qkey; 839 } 840 841 if (qp_attr_mask & IB_QP_SQ_PSN) { 842 params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT); 843 params.sq_psn = qp_attr->sq_psn; 844 } 845 846 err = efa_com_modify_qp(&dev->edev, ¶ms); 847 if (err) 848 return err; 849 850 qp->state = new_state; 851 852 return 0; 853 } 854 855 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) 856 { 857 struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx }; 858 859 return efa_com_destroy_cq(&dev->edev, ¶ms); 860 } 861 862 int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) 863 { 864 struct efa_dev *dev = to_edev(ibcq->device); 865 struct efa_cq *cq = to_ecq(ibcq); 866 int err; 867 868 if (udata->inlen && 869 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 870 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 871 return -EINVAL; 872 } 873 874 ibdev_dbg(&dev->ibdev, 875 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", 876 cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); 877 878 err = efa_destroy_cq_idx(dev, cq->cq_idx); 879 if (err) 880 return err; 881 882 dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, 883 DMA_FROM_DEVICE); 884 885 kfree(cq); 886 return 0; 887 } 888 889 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, 890 struct efa_ibv_create_cq_resp *resp) 891 { 892 resp->q_mmap_size = cq->size; 893 resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq, 894 virt_to_phys(cq->cpu_addr), 895 cq->size, EFA_MMAP_DMA_PAGE); 896 if (resp->q_mmap_key == EFA_MMAP_INVALID) 897 return -ENOMEM; 898 899 return 0; 900 } 901 902 static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries, 903 int vector, struct ib_ucontext *ibucontext, 904 struct ib_udata *udata) 905 { 906 struct efa_ibv_create_cq_resp resp = {}; 907 struct efa_com_create_cq_params params; 908 struct efa_com_create_cq_result result; 909 struct efa_dev *dev = to_edev(ibdev); 910 struct efa_ibv_create_cq cmd = {}; 911 bool cq_entry_inserted = false; 912 struct efa_cq *cq; 913 int err; 914 915 ibdev_dbg(ibdev, "create_cq entries %d\n", entries); 916 917 if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { 918 ibdev_dbg(ibdev, 919 "cq: requested entries[%u] non-positive or greater than max[%u]\n", 920 entries, dev->dev_attr.max_cq_depth); 921 err = -EINVAL; 922 goto err_out; 923 } 924 925 if (!field_avail(cmd, num_sub_cqs, udata->inlen)) { 926 ibdev_dbg(ibdev, 927 "Incompatible ABI params, no input udata\n"); 928 err = -EINVAL; 929 goto err_out; 930 } 931 932 if (udata->inlen > sizeof(cmd) && 933 !ib_is_udata_cleared(udata, sizeof(cmd), 934 udata->inlen - sizeof(cmd))) { 935 ibdev_dbg(ibdev, 936 "Incompatible ABI params, unknown fields in udata\n"); 937 err = -EINVAL; 938 goto err_out; 939 } 940 941 err = ib_copy_from_udata(&cmd, udata, 942 min(sizeof(cmd), udata->inlen)); 943 if (err) { 944 ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n"); 945 goto err_out; 946 } 947 948 if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { 949 ibdev_dbg(ibdev, 950 "Incompatible ABI params, unknown fields in udata\n"); 951 err = -EINVAL; 952 goto err_out; 953 } 954 955 if (!cmd.cq_entry_size) { 956 ibdev_dbg(ibdev, 957 "Invalid entry size [%u]\n", cmd.cq_entry_size); 958 err = -EINVAL; 959 goto err_out; 960 } 961 962 if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) { 963 ibdev_dbg(ibdev, 964 "Invalid number of sub cqs[%u] expected[%u]\n", 965 cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq); 966 err = -EINVAL; 967 goto err_out; 968 } 969 970 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 971 if (!cq) { 972 err = -ENOMEM; 973 goto err_out; 974 } 975 976 cq->ucontext = to_eucontext(ibucontext); 977 cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs); 978 cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size, 979 DMA_FROM_DEVICE); 980 if (!cq->cpu_addr) { 981 err = -ENOMEM; 982 goto err_free_cq; 983 } 984 985 params.uarn = cq->ucontext->uarn; 986 params.cq_depth = entries; 987 params.dma_addr = cq->dma_addr; 988 params.entry_size_in_bytes = cmd.cq_entry_size; 989 params.num_sub_cqs = cmd.num_sub_cqs; 990 err = efa_com_create_cq(&dev->edev, ¶ms, &result); 991 if (err) 992 goto err_free_mapped; 993 994 resp.cq_idx = result.cq_idx; 995 cq->cq_idx = result.cq_idx; 996 cq->ibcq.cqe = result.actual_depth; 997 WARN_ON_ONCE(entries != result.actual_depth); 998 999 err = cq_mmap_entries_setup(dev, cq, &resp); 1000 if (err) { 1001 ibdev_dbg(ibdev, 1002 "Could not setup cq[%u] mmap entries\n", cq->cq_idx); 1003 goto err_destroy_cq; 1004 } 1005 1006 cq_entry_inserted = true; 1007 1008 if (udata->outlen) { 1009 err = ib_copy_to_udata(udata, &resp, 1010 min(sizeof(resp), udata->outlen)); 1011 if (err) { 1012 ibdev_dbg(ibdev, 1013 "Failed to copy udata for create_cq\n"); 1014 goto err_destroy_cq; 1015 } 1016 } 1017 1018 ibdev_dbg(ibdev, 1019 "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", 1020 cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr); 1021 1022 return &cq->ibcq; 1023 1024 err_destroy_cq: 1025 efa_destroy_cq_idx(dev, cq->cq_idx); 1026 err_free_mapped: 1027 dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, 1028 DMA_FROM_DEVICE); 1029 if (!cq_entry_inserted) 1030 free_pages_exact(cq->cpu_addr, cq->size); 1031 err_free_cq: 1032 kfree(cq); 1033 err_out: 1034 atomic64_inc(&dev->stats.sw_stats.create_cq_err); 1035 return ERR_PTR(err); 1036 } 1037 1038 struct ib_cq *efa_create_cq(struct ib_device *ibdev, 1039 const struct ib_cq_init_attr *attr, 1040 struct ib_udata *udata) 1041 { 1042 struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, 1043 struct efa_ucontext, 1044 ibucontext); 1045 1046 return do_create_cq(ibdev, attr->cqe, attr->comp_vector, 1047 &ucontext->ibucontext, udata); 1048 } 1049 1050 static int umem_to_page_list(struct efa_dev *dev, 1051 struct ib_umem *umem, 1052 u64 *page_list, 1053 u32 hp_cnt, 1054 u8 hp_shift) 1055 { 1056 u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); 1057 struct sg_dma_page_iter sg_iter; 1058 unsigned int page_idx = 0; 1059 unsigned int hp_idx = 0; 1060 1061 ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", 1062 hp_cnt, pages_in_hp); 1063 1064 for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 1065 if (page_idx % pages_in_hp == 0) { 1066 page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter); 1067 hp_idx++; 1068 } 1069 1070 page_idx++; 1071 } 1072 1073 return 0; 1074 } 1075 1076 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) 1077 { 1078 struct scatterlist *sglist; 1079 struct page *pg; 1080 int i; 1081 1082 sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL); 1083 if (!sglist) 1084 return NULL; 1085 sg_init_table(sglist, page_cnt); 1086 for (i = 0; i < page_cnt; i++) { 1087 pg = vmalloc_to_page(buf); 1088 if (!pg) 1089 goto err; 1090 sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); 1091 buf += PAGE_SIZE / sizeof(*buf); 1092 } 1093 return sglist; 1094 1095 err: 1096 kfree(sglist); 1097 return NULL; 1098 } 1099 1100 /* 1101 * create a chunk list of physical pages dma addresses from the supplied 1102 * scatter gather list 1103 */ 1104 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) 1105 { 1106 unsigned int entry, payloads_in_sg, chunk_list_size, chunk_idx, payload_idx; 1107 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1108 int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages; 1109 struct scatterlist *pages_sgl = pbl->phys.indirect.sgl; 1110 int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt; 1111 struct efa_com_ctrl_buff_info *ctrl_buf; 1112 u64 *cur_chunk_buf, *prev_chunk_buf; 1113 struct scatterlist *sg; 1114 dma_addr_t dma_addr; 1115 int i; 1116 1117 /* allocate a chunk list that consists of 4KB chunks */ 1118 chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK); 1119 1120 chunk_list->size = chunk_list_size; 1121 chunk_list->chunks = kcalloc(chunk_list_size, 1122 sizeof(*chunk_list->chunks), 1123 GFP_KERNEL); 1124 if (!chunk_list->chunks) 1125 return -ENOMEM; 1126 1127 ibdev_dbg(&dev->ibdev, 1128 "chunk_list_size[%u] - pages[%u]\n", chunk_list_size, 1129 page_cnt); 1130 1131 /* allocate chunk buffers: */ 1132 for (i = 0; i < chunk_list_size; i++) { 1133 chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL); 1134 if (!chunk_list->chunks[i].buf) 1135 goto chunk_list_dealloc; 1136 1137 chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE; 1138 } 1139 chunk_list->chunks[chunk_list_size - 1].length = 1140 ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) + 1141 EFA_CHUNK_PTR_SIZE; 1142 1143 /* fill the dma addresses of sg list pages to chunks: */ 1144 chunk_idx = 0; 1145 payload_idx = 0; 1146 cur_chunk_buf = chunk_list->chunks[0].buf; 1147 for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) { 1148 payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT; 1149 for (i = 0; i < payloads_in_sg; i++) { 1150 cur_chunk_buf[payload_idx++] = 1151 (sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) + 1152 (EFA_CHUNK_PAYLOAD_SIZE * i); 1153 1154 if (payload_idx == EFA_PTRS_PER_CHUNK) { 1155 chunk_idx++; 1156 cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; 1157 payload_idx = 0; 1158 } 1159 } 1160 } 1161 1162 /* map chunks to dma and fill chunks next ptrs */ 1163 for (i = chunk_list_size - 1; i >= 0; i--) { 1164 dma_addr = dma_map_single(&dev->pdev->dev, 1165 chunk_list->chunks[i].buf, 1166 chunk_list->chunks[i].length, 1167 DMA_TO_DEVICE); 1168 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1169 ibdev_err(&dev->ibdev, 1170 "chunk[%u] dma_map_failed\n", i); 1171 goto chunk_list_unmap; 1172 } 1173 1174 chunk_list->chunks[i].dma_addr = dma_addr; 1175 ibdev_dbg(&dev->ibdev, 1176 "chunk[%u] mapped at [%pad]\n", i, &dma_addr); 1177 1178 if (!i) 1179 break; 1180 1181 prev_chunk_buf = chunk_list->chunks[i - 1].buf; 1182 1183 ctrl_buf = (struct efa_com_ctrl_buff_info *) 1184 &prev_chunk_buf[EFA_PTRS_PER_CHUNK]; 1185 ctrl_buf->length = chunk_list->chunks[i].length; 1186 1187 efa_com_set_dma_addr(dma_addr, 1188 &ctrl_buf->address.mem_addr_high, 1189 &ctrl_buf->address.mem_addr_low); 1190 } 1191 1192 return 0; 1193 1194 chunk_list_unmap: 1195 for (; i < chunk_list_size; i++) { 1196 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1197 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1198 } 1199 chunk_list_dealloc: 1200 for (i = 0; i < chunk_list_size; i++) 1201 kfree(chunk_list->chunks[i].buf); 1202 1203 kfree(chunk_list->chunks); 1204 return -ENOMEM; 1205 } 1206 1207 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1208 { 1209 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1210 int i; 1211 1212 for (i = 0; i < chunk_list->size; i++) { 1213 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1214 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1215 kfree(chunk_list->chunks[i].buf); 1216 } 1217 1218 kfree(chunk_list->chunks); 1219 } 1220 1221 /* initialize pbl continuous mode: map pbl buffer to a dma address. */ 1222 static int pbl_continuous_initialize(struct efa_dev *dev, 1223 struct pbl_context *pbl) 1224 { 1225 dma_addr_t dma_addr; 1226 1227 dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf, 1228 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1229 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1230 ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n"); 1231 return -ENOMEM; 1232 } 1233 1234 pbl->phys.continuous.dma_addr = dma_addr; 1235 ibdev_dbg(&dev->ibdev, 1236 "pbl continuous - dma_addr = %pad, size[%u]\n", 1237 &dma_addr, pbl->pbl_buf_size_in_bytes); 1238 1239 return 0; 1240 } 1241 1242 /* 1243 * initialize pbl indirect mode: 1244 * create a chunk list out of the dma addresses of the physical pages of 1245 * pbl buffer. 1246 */ 1247 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl) 1248 { 1249 u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE); 1250 struct scatterlist *sgl; 1251 int sg_dma_cnt, err; 1252 1253 BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE); 1254 sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages); 1255 if (!sgl) 1256 return -ENOMEM; 1257 1258 sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1259 if (!sg_dma_cnt) { 1260 err = -EINVAL; 1261 goto err_map; 1262 } 1263 1264 pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages; 1265 pbl->phys.indirect.sgl = sgl; 1266 pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt; 1267 err = pbl_chunk_list_create(dev, pbl); 1268 if (err) { 1269 ibdev_dbg(&dev->ibdev, 1270 "chunk_list creation failed[%d]\n", err); 1271 goto err_chunk; 1272 } 1273 1274 ibdev_dbg(&dev->ibdev, 1275 "pbl indirect - size[%u], chunks[%u]\n", 1276 pbl->pbl_buf_size_in_bytes, 1277 pbl->phys.indirect.chunk_list.size); 1278 1279 return 0; 1280 1281 err_chunk: 1282 dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1283 err_map: 1284 kfree(sgl); 1285 return err; 1286 } 1287 1288 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl) 1289 { 1290 pbl_chunk_list_destroy(dev, pbl); 1291 dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl, 1292 pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE); 1293 kfree(pbl->phys.indirect.sgl); 1294 } 1295 1296 /* create a page buffer list from a mapped user memory region */ 1297 static int pbl_create(struct efa_dev *dev, 1298 struct pbl_context *pbl, 1299 struct ib_umem *umem, 1300 int hp_cnt, 1301 u8 hp_shift) 1302 { 1303 int err; 1304 1305 pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE; 1306 pbl->pbl_buf = kzalloc(pbl->pbl_buf_size_in_bytes, 1307 GFP_KERNEL | __GFP_NOWARN); 1308 if (pbl->pbl_buf) { 1309 pbl->physically_continuous = 1; 1310 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1311 hp_shift); 1312 if (err) 1313 goto err_continuous; 1314 err = pbl_continuous_initialize(dev, pbl); 1315 if (err) 1316 goto err_continuous; 1317 } else { 1318 pbl->physically_continuous = 0; 1319 pbl->pbl_buf = vzalloc(pbl->pbl_buf_size_in_bytes); 1320 if (!pbl->pbl_buf) 1321 return -ENOMEM; 1322 1323 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1324 hp_shift); 1325 if (err) 1326 goto err_indirect; 1327 err = pbl_indirect_initialize(dev, pbl); 1328 if (err) 1329 goto err_indirect; 1330 } 1331 1332 ibdev_dbg(&dev->ibdev, 1333 "user_pbl_created: user_pages[%u], continuous[%u]\n", 1334 hp_cnt, pbl->physically_continuous); 1335 1336 return 0; 1337 1338 err_continuous: 1339 kfree(pbl->pbl_buf); 1340 return err; 1341 err_indirect: 1342 vfree(pbl->pbl_buf); 1343 return err; 1344 } 1345 1346 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1347 { 1348 if (pbl->physically_continuous) { 1349 dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr, 1350 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1351 kfree(pbl->pbl_buf); 1352 } else { 1353 pbl_indirect_terminate(dev, pbl); 1354 vfree(pbl->pbl_buf); 1355 } 1356 } 1357 1358 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, 1359 struct efa_com_reg_mr_params *params) 1360 { 1361 int err; 1362 1363 params->inline_pbl = 1; 1364 err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, 1365 params->page_num, params->page_shift); 1366 if (err) 1367 return err; 1368 1369 ibdev_dbg(&dev->ibdev, 1370 "inline_pbl_array - pages[%u]\n", params->page_num); 1371 1372 return 0; 1373 } 1374 1375 static int efa_create_pbl(struct efa_dev *dev, 1376 struct pbl_context *pbl, 1377 struct efa_mr *mr, 1378 struct efa_com_reg_mr_params *params) 1379 { 1380 int err; 1381 1382 err = pbl_create(dev, pbl, mr->umem, params->page_num, 1383 params->page_shift); 1384 if (err) { 1385 ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err); 1386 return err; 1387 } 1388 1389 params->inline_pbl = 0; 1390 params->indirect = !pbl->physically_continuous; 1391 if (pbl->physically_continuous) { 1392 params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes; 1393 1394 efa_com_set_dma_addr(pbl->phys.continuous.dma_addr, 1395 ¶ms->pbl.pbl.address.mem_addr_high, 1396 ¶ms->pbl.pbl.address.mem_addr_low); 1397 } else { 1398 params->pbl.pbl.length = 1399 pbl->phys.indirect.chunk_list.chunks[0].length; 1400 1401 efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr, 1402 ¶ms->pbl.pbl.address.mem_addr_high, 1403 ¶ms->pbl.pbl.address.mem_addr_low); 1404 } 1405 1406 return 0; 1407 } 1408 1409 static void efa_cont_pages(struct ib_umem *umem, u64 addr, 1410 unsigned long max_page_shift, 1411 int *count, u8 *shift, u32 *ncont) 1412 { 1413 struct scatterlist *sg; 1414 u64 base = ~0, p = 0; 1415 unsigned long tmp; 1416 unsigned long m; 1417 u64 len, pfn; 1418 int i = 0; 1419 int entry; 1420 1421 addr = addr >> PAGE_SHIFT; 1422 tmp = (unsigned long)addr; 1423 m = find_first_bit(&tmp, BITS_PER_LONG); 1424 if (max_page_shift) 1425 m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); 1426 1427 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 1428 len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE); 1429 pfn = sg_dma_address(sg) >> PAGE_SHIFT; 1430 if (base + p != pfn) { 1431 /* 1432 * If either the offset or the new 1433 * base are unaligned update m 1434 */ 1435 tmp = (unsigned long)(pfn | p); 1436 if (!IS_ALIGNED(tmp, 1 << m)) 1437 m = find_first_bit(&tmp, BITS_PER_LONG); 1438 1439 base = pfn; 1440 p = 0; 1441 } 1442 1443 p += len; 1444 i += len; 1445 } 1446 1447 if (i) { 1448 m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); 1449 *ncont = DIV_ROUND_UP(i, (1 << m)); 1450 } else { 1451 m = 0; 1452 *ncont = 0; 1453 } 1454 1455 *shift = PAGE_SHIFT + m; 1456 *count = i; 1457 } 1458 1459 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, 1460 u64 virt_addr, int access_flags, 1461 struct ib_udata *udata) 1462 { 1463 struct efa_dev *dev = to_edev(ibpd->device); 1464 struct efa_com_reg_mr_params params = {}; 1465 struct efa_com_reg_mr_result result = {}; 1466 unsigned long max_page_shift; 1467 struct pbl_context pbl; 1468 struct efa_mr *mr; 1469 int inline_size; 1470 int npages; 1471 int err; 1472 1473 if (udata->inlen && 1474 !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { 1475 ibdev_dbg(&dev->ibdev, 1476 "Incompatible ABI params, udata not cleared\n"); 1477 err = -EINVAL; 1478 goto err_out; 1479 } 1480 1481 if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) { 1482 ibdev_dbg(&dev->ibdev, 1483 "Unsupported access flags[%#x], supported[%#x]\n", 1484 access_flags, EFA_SUPPORTED_ACCESS_FLAGS); 1485 err = -EOPNOTSUPP; 1486 goto err_out; 1487 } 1488 1489 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1490 if (!mr) { 1491 err = -ENOMEM; 1492 goto err_out; 1493 } 1494 1495 mr->umem = ib_umem_get(udata, start, length, access_flags, 0); 1496 if (IS_ERR(mr->umem)) { 1497 err = PTR_ERR(mr->umem); 1498 ibdev_dbg(&dev->ibdev, 1499 "Failed to pin and map user space memory[%d]\n", err); 1500 goto err_free; 1501 } 1502 1503 params.pd = to_epd(ibpd)->pdn; 1504 params.iova = virt_addr; 1505 params.mr_length_in_bytes = length; 1506 params.permissions = access_flags & 0x1; 1507 max_page_shift = fls64(dev->dev_attr.page_size_cap); 1508 1509 efa_cont_pages(mr->umem, start, max_page_shift, &npages, 1510 ¶ms.page_shift, ¶ms.page_num); 1511 ibdev_dbg(&dev->ibdev, 1512 "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n", 1513 start, length, npages, params.page_shift, params.page_num); 1514 1515 inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); 1516 if (params.page_num <= inline_size) { 1517 err = efa_create_inline_pbl(dev, mr, ¶ms); 1518 if (err) 1519 goto err_unmap; 1520 1521 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1522 if (err) 1523 goto err_unmap; 1524 } else { 1525 err = efa_create_pbl(dev, &pbl, mr, ¶ms); 1526 if (err) 1527 goto err_unmap; 1528 1529 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1530 pbl_destroy(dev, &pbl); 1531 1532 if (err) 1533 goto err_unmap; 1534 } 1535 1536 mr->ibmr.lkey = result.l_key; 1537 mr->ibmr.rkey = result.r_key; 1538 mr->ibmr.length = length; 1539 ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); 1540 1541 return &mr->ibmr; 1542 1543 err_unmap: 1544 ib_umem_release(mr->umem); 1545 err_free: 1546 kfree(mr); 1547 err_out: 1548 atomic64_inc(&dev->stats.sw_stats.reg_mr_err); 1549 return ERR_PTR(err); 1550 } 1551 1552 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 1553 { 1554 struct efa_dev *dev = to_edev(ibmr->device); 1555 struct efa_com_dereg_mr_params params; 1556 struct efa_mr *mr = to_emr(ibmr); 1557 int err; 1558 1559 if (udata->inlen && 1560 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 1561 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 1562 return -EINVAL; 1563 } 1564 1565 ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); 1566 1567 if (mr->umem) { 1568 params.l_key = mr->ibmr.lkey; 1569 err = efa_com_dereg_mr(&dev->edev, ¶ms); 1570 if (err) 1571 return err; 1572 ib_umem_release(mr->umem); 1573 } 1574 1575 kfree(mr); 1576 1577 return 0; 1578 } 1579 1580 int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, 1581 struct ib_port_immutable *immutable) 1582 { 1583 struct ib_port_attr attr; 1584 int err; 1585 1586 err = ib_query_port(ibdev, port_num, &attr); 1587 if (err) { 1588 ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err); 1589 return err; 1590 } 1591 1592 immutable->pkey_tbl_len = attr.pkey_tbl_len; 1593 immutable->gid_tbl_len = attr.gid_tbl_len; 1594 1595 return 0; 1596 } 1597 1598 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) 1599 { 1600 struct efa_com_dealloc_uar_params params = { 1601 .uarn = uarn, 1602 }; 1603 1604 return efa_com_dealloc_uar(&dev->edev, ¶ms); 1605 } 1606 1607 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) 1608 { 1609 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1610 struct efa_dev *dev = to_edev(ibucontext->device); 1611 struct efa_ibv_alloc_ucontext_resp resp = {}; 1612 struct efa_com_alloc_uar_result result; 1613 int err; 1614 1615 /* 1616 * it's fine if the driver does not know all request fields, 1617 * we will ack input fields in our response. 1618 */ 1619 1620 err = efa_com_alloc_uar(&dev->edev, &result); 1621 if (err) 1622 goto err_out; 1623 1624 ucontext->uarn = result.uarn; 1625 xa_init(&ucontext->mmap_xa); 1626 1627 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; 1628 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; 1629 resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; 1630 resp.inline_buf_size = dev->dev_attr.inline_buf_size; 1631 resp.max_llq_size = dev->dev_attr.max_llq_size; 1632 1633 if (udata && udata->outlen) { 1634 err = ib_copy_to_udata(udata, &resp, 1635 min(sizeof(resp), udata->outlen)); 1636 if (err) 1637 goto err_dealloc_uar; 1638 } 1639 1640 return 0; 1641 1642 err_dealloc_uar: 1643 efa_dealloc_uar(dev, result.uarn); 1644 err_out: 1645 atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err); 1646 return err; 1647 } 1648 1649 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) 1650 { 1651 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1652 struct efa_dev *dev = to_edev(ibucontext->device); 1653 1654 mmap_entries_remove_free(dev, ucontext); 1655 efa_dealloc_uar(dev, ucontext->uarn); 1656 } 1657 1658 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, 1659 struct vm_area_struct *vma, u64 key, u64 length) 1660 { 1661 struct efa_mmap_entry *entry; 1662 unsigned long va; 1663 u64 pfn; 1664 int err; 1665 1666 entry = mmap_entry_get(dev, ucontext, key, length); 1667 if (!entry) { 1668 ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n", 1669 key); 1670 return -EINVAL; 1671 } 1672 1673 ibdev_dbg(&dev->ibdev, 1674 "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n", 1675 entry->address, length, entry->mmap_flag); 1676 1677 pfn = entry->address >> PAGE_SHIFT; 1678 switch (entry->mmap_flag) { 1679 case EFA_MMAP_IO_NC: 1680 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, 1681 pgprot_noncached(vma->vm_page_prot)); 1682 break; 1683 case EFA_MMAP_IO_WC: 1684 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, 1685 pgprot_writecombine(vma->vm_page_prot)); 1686 break; 1687 case EFA_MMAP_DMA_PAGE: 1688 for (va = vma->vm_start; va < vma->vm_end; 1689 va += PAGE_SIZE, pfn++) { 1690 err = vm_insert_page(vma, va, pfn_to_page(pfn)); 1691 if (err) 1692 break; 1693 } 1694 break; 1695 default: 1696 err = -EINVAL; 1697 } 1698 1699 if (err) 1700 ibdev_dbg( 1701 &dev->ibdev, 1702 "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", 1703 entry->address, length, entry->mmap_flag, err); 1704 1705 return err; 1706 } 1707 1708 int efa_mmap(struct ib_ucontext *ibucontext, 1709 struct vm_area_struct *vma) 1710 { 1711 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1712 struct efa_dev *dev = to_edev(ibucontext->device); 1713 u64 length = vma->vm_end - vma->vm_start; 1714 u64 key = vma->vm_pgoff << PAGE_SHIFT; 1715 1716 ibdev_dbg(&dev->ibdev, 1717 "start %#lx, end %#lx, length = %#llx, key = %#llx\n", 1718 vma->vm_start, vma->vm_end, length, key); 1719 1720 if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) { 1721 ibdev_dbg(&dev->ibdev, 1722 "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n", 1723 length, PAGE_SIZE, vma->vm_flags); 1724 return -EINVAL; 1725 } 1726 1727 if (vma->vm_flags & VM_EXEC) { 1728 ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); 1729 return -EPERM; 1730 } 1731 1732 return __efa_mmap(dev, ucontext, vma, key, length); 1733 } 1734 1735 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) 1736 { 1737 struct efa_com_destroy_ah_params params = { 1738 .ah = ah->ah, 1739 .pdn = to_epd(ah->ibah.pd)->pdn, 1740 }; 1741 1742 return efa_com_destroy_ah(&dev->edev, ¶ms); 1743 } 1744 1745 int efa_create_ah(struct ib_ah *ibah, 1746 struct rdma_ah_attr *ah_attr, 1747 u32 flags, 1748 struct ib_udata *udata) 1749 { 1750 struct efa_dev *dev = to_edev(ibah->device); 1751 struct efa_com_create_ah_params params = {}; 1752 struct efa_ibv_create_ah_resp resp = {}; 1753 struct efa_com_create_ah_result result; 1754 struct efa_ah *ah = to_eah(ibah); 1755 int err; 1756 1757 if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { 1758 ibdev_dbg(&dev->ibdev, 1759 "Create address handle is not supported in atomic context\n"); 1760 err = -EOPNOTSUPP; 1761 goto err_out; 1762 } 1763 1764 if (udata->inlen && 1765 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 1766 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 1767 err = -EINVAL; 1768 goto err_out; 1769 } 1770 1771 memcpy(params.dest_addr, ah_attr->grh.dgid.raw, 1772 sizeof(params.dest_addr)); 1773 params.pdn = to_epd(ibah->pd)->pdn; 1774 err = efa_com_create_ah(&dev->edev, ¶ms, &result); 1775 if (err) 1776 goto err_out; 1777 1778 memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id)); 1779 ah->ah = result.ah; 1780 1781 resp.efa_address_handle = result.ah; 1782 1783 if (udata->outlen) { 1784 err = ib_copy_to_udata(udata, &resp, 1785 min(sizeof(resp), udata->outlen)); 1786 if (err) { 1787 ibdev_dbg(&dev->ibdev, 1788 "Failed to copy udata for create_ah response\n"); 1789 goto err_destroy_ah; 1790 } 1791 } 1792 ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah); 1793 1794 return 0; 1795 1796 err_destroy_ah: 1797 efa_ah_destroy(dev, ah); 1798 err_out: 1799 atomic64_inc(&dev->stats.sw_stats.create_ah_err); 1800 return err; 1801 } 1802 1803 void efa_destroy_ah(struct ib_ah *ibah, u32 flags) 1804 { 1805 struct efa_dev *dev = to_edev(ibah->pd->device); 1806 struct efa_ah *ah = to_eah(ibah); 1807 1808 ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah); 1809 1810 if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { 1811 ibdev_dbg(&dev->ibdev, 1812 "Destroy address handle is not supported in atomic context\n"); 1813 return; 1814 } 1815 1816 efa_ah_destroy(dev, ah); 1817 } 1818 1819 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, 1820 u8 port_num) 1821 { 1822 return IB_LINK_LAYER_UNSPECIFIED; 1823 } 1824 1825