1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. 4 */ 5 6 #include <linux/dma-buf.h> 7 #include <linux/dma-resv.h> 8 #include <linux/vmalloc.h> 9 #include <linux/log2.h> 10 11 #include <rdma/ib_addr.h> 12 #include <rdma/ib_umem.h> 13 #include <rdma/ib_user_verbs.h> 14 #include <rdma/ib_verbs.h> 15 #include <rdma/uverbs_ioctl.h> 16 #define UVERBS_MODULE_NAME efa_ib 17 #include <rdma/uverbs_named_ioctl.h> 18 #include <rdma/ib_user_ioctl_cmds.h> 19 20 #include "efa.h" 21 #include "efa_io_defs.h" 22 23 enum { 24 EFA_MMAP_DMA_PAGE = 0, 25 EFA_MMAP_IO_WC, 26 EFA_MMAP_IO_NC, 27 }; 28 29 #define EFA_AENQ_ENABLED_GROUPS \ 30 (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ 31 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) 32 33 struct efa_user_mmap_entry { 34 struct rdma_user_mmap_entry rdma_entry; 35 u64 address; 36 u8 mmap_flag; 37 }; 38 39 #define EFA_DEFINE_DEVICE_STATS(op) \ 40 op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ 41 op(EFA_COMPLETED_CMDS, "completed_cmds") \ 42 op(EFA_CMDS_ERR, "cmds_err") \ 43 op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \ 44 op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ 45 op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ 46 op(EFA_CREATE_QP_ERR, "create_qp_err") \ 47 op(EFA_CREATE_CQ_ERR, "create_cq_err") \ 48 op(EFA_REG_MR_ERR, "reg_mr_err") \ 49 op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ 50 op(EFA_CREATE_AH_ERR, "create_ah_err") \ 51 op(EFA_MMAP_ERR, "mmap_err") 52 53 #define EFA_DEFINE_PORT_STATS(op) \ 54 op(EFA_TX_BYTES, "tx_bytes") \ 55 op(EFA_TX_PKTS, "tx_pkts") \ 56 op(EFA_RX_BYTES, "rx_bytes") \ 57 op(EFA_RX_PKTS, "rx_pkts") \ 58 op(EFA_RX_DROPS, "rx_drops") \ 59 op(EFA_SEND_BYTES, "send_bytes") \ 60 op(EFA_SEND_WRS, "send_wrs") \ 61 op(EFA_RECV_BYTES, "recv_bytes") \ 62 op(EFA_RECV_WRS, "recv_wrs") \ 63 op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \ 64 op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ 65 op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ 66 op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ 67 op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \ 68 op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \ 69 op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \ 70 op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \ 71 72 #define EFA_STATS_ENUM(ename, name) ename, 73 #define EFA_STATS_STR(ename, nam) \ 74 [ename].name = nam, 75 76 enum efa_hw_device_stats { 77 EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM) 78 }; 79 80 static const struct rdma_stat_desc efa_device_stats_descs[] = { 81 EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR) 82 }; 83 84 enum efa_hw_port_stats { 85 EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM) 86 }; 87 88 static const struct rdma_stat_desc efa_port_stats_descs[] = { 89 EFA_DEFINE_PORT_STATS(EFA_STATS_STR) 90 }; 91 92 #define EFA_CHUNK_PAYLOAD_SHIFT 12 93 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) 94 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 95 96 #define EFA_CHUNK_SHIFT 12 97 #define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT) 98 #define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info) 99 100 #define EFA_PTRS_PER_CHUNK \ 101 ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE) 102 103 #define EFA_CHUNK_USED_SIZE \ 104 ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) 105 106 struct pbl_chunk { 107 dma_addr_t dma_addr; 108 u64 *buf; 109 u32 length; 110 }; 111 112 struct pbl_chunk_list { 113 struct pbl_chunk *chunks; 114 unsigned int size; 115 }; 116 117 struct pbl_context { 118 union { 119 struct { 120 dma_addr_t dma_addr; 121 } continuous; 122 struct { 123 u32 pbl_buf_size_in_pages; 124 struct scatterlist *sgl; 125 int sg_dma_cnt; 126 struct pbl_chunk_list chunk_list; 127 } indirect; 128 } phys; 129 u64 *pbl_buf; 130 u32 pbl_buf_size_in_bytes; 131 u8 physically_continuous; 132 }; 133 134 static inline struct efa_dev *to_edev(struct ib_device *ibdev) 135 { 136 return container_of(ibdev, struct efa_dev, ibdev); 137 } 138 139 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext) 140 { 141 return container_of(ibucontext, struct efa_ucontext, ibucontext); 142 } 143 144 static inline struct efa_pd *to_epd(struct ib_pd *ibpd) 145 { 146 return container_of(ibpd, struct efa_pd, ibpd); 147 } 148 149 static inline struct efa_mr *to_emr(struct ib_mr *ibmr) 150 { 151 return container_of(ibmr, struct efa_mr, ibmr); 152 } 153 154 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp) 155 { 156 return container_of(ibqp, struct efa_qp, ibqp); 157 } 158 159 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq) 160 { 161 return container_of(ibcq, struct efa_cq, ibcq); 162 } 163 164 static inline struct efa_ah *to_eah(struct ib_ah *ibah) 165 { 166 return container_of(ibah, struct efa_ah, ibah); 167 } 168 169 static inline struct efa_user_mmap_entry * 170 to_emmap(struct rdma_user_mmap_entry *rdma_entry) 171 { 172 return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); 173 } 174 175 #define EFA_DEV_CAP(dev, cap) \ 176 ((dev)->dev_attr.device_caps & \ 177 EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK) 178 179 #define is_reserved_cleared(reserved) \ 180 !memchr_inv(reserved, 0, sizeof(reserved)) 181 182 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, 183 size_t size, enum dma_data_direction dir) 184 { 185 void *addr; 186 187 addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 188 if (!addr) 189 return NULL; 190 191 *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir); 192 if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) { 193 ibdev_err(&dev->ibdev, "Failed to map DMA address\n"); 194 free_pages_exact(addr, size); 195 return NULL; 196 } 197 198 return addr; 199 } 200 201 static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr, 202 dma_addr_t dma_addr, 203 size_t size, enum dma_data_direction dir) 204 { 205 dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir); 206 free_pages_exact(cpu_addr, size); 207 } 208 209 int efa_query_device(struct ib_device *ibdev, 210 struct ib_device_attr *props, 211 struct ib_udata *udata) 212 { 213 struct efa_com_get_device_attr_result *dev_attr; 214 struct efa_ibv_ex_query_device_resp resp = {}; 215 struct efa_dev *dev = to_edev(ibdev); 216 int err; 217 218 if (udata && udata->inlen && 219 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 220 ibdev_dbg(ibdev, 221 "Incompatible ABI params, udata not cleared\n"); 222 return -EINVAL; 223 } 224 225 dev_attr = &dev->dev_attr; 226 227 memset(props, 0, sizeof(*props)); 228 props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE; 229 props->page_size_cap = dev_attr->page_size_cap; 230 props->vendor_id = dev->pdev->vendor; 231 props->vendor_part_id = dev->pdev->device; 232 props->hw_ver = dev->pdev->subsystem_device; 233 props->max_qp = dev_attr->max_qp; 234 props->max_cq = dev_attr->max_cq; 235 props->max_pd = dev_attr->max_pd; 236 props->max_mr = dev_attr->max_mr; 237 props->max_ah = dev_attr->max_ah; 238 props->max_cqe = dev_attr->max_cq_depth; 239 props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth, 240 dev_attr->max_rq_depth); 241 props->max_send_sge = dev_attr->max_sq_sge; 242 props->max_recv_sge = dev_attr->max_rq_sge; 243 props->max_sge_rd = dev_attr->max_wr_rdma_sge; 244 props->max_pkeys = 1; 245 246 if (udata && udata->outlen) { 247 resp.max_sq_sge = dev_attr->max_sq_sge; 248 resp.max_rq_sge = dev_attr->max_rq_sge; 249 resp.max_sq_wr = dev_attr->max_sq_depth; 250 resp.max_rq_wr = dev_attr->max_rq_depth; 251 resp.max_rdma_size = dev_attr->max_rdma_size; 252 253 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID; 254 if (EFA_DEV_CAP(dev, RDMA_READ)) 255 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; 256 257 if (EFA_DEV_CAP(dev, RNR_RETRY)) 258 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; 259 260 if (EFA_DEV_CAP(dev, DATA_POLLING_128)) 261 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128; 262 263 if (EFA_DEV_CAP(dev, RDMA_WRITE)) 264 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE; 265 266 if (dev->neqs) 267 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; 268 269 err = ib_copy_to_udata(udata, &resp, 270 min(sizeof(resp), udata->outlen)); 271 if (err) { 272 ibdev_dbg(ibdev, 273 "Failed to copy udata for query_device\n"); 274 return err; 275 } 276 } 277 278 return 0; 279 } 280 281 int efa_query_port(struct ib_device *ibdev, u32 port, 282 struct ib_port_attr *props) 283 { 284 struct efa_dev *dev = to_edev(ibdev); 285 286 props->lmc = 1; 287 288 props->state = IB_PORT_ACTIVE; 289 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; 290 props->gid_tbl_len = 1; 291 props->pkey_tbl_len = 1; 292 props->active_speed = IB_SPEED_EDR; 293 props->active_width = IB_WIDTH_4X; 294 props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 295 props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 296 props->max_msg_sz = dev->dev_attr.mtu; 297 props->max_vl_num = 1; 298 299 return 0; 300 } 301 302 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 303 int qp_attr_mask, 304 struct ib_qp_init_attr *qp_init_attr) 305 { 306 struct efa_dev *dev = to_edev(ibqp->device); 307 struct efa_com_query_qp_params params = {}; 308 struct efa_com_query_qp_result result; 309 struct efa_qp *qp = to_eqp(ibqp); 310 int err; 311 312 #define EFA_QUERY_QP_SUPP_MASK \ 313 (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ 314 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY) 315 316 if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { 317 ibdev_dbg(&dev->ibdev, 318 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 319 qp_attr_mask, EFA_QUERY_QP_SUPP_MASK); 320 return -EOPNOTSUPP; 321 } 322 323 memset(qp_attr, 0, sizeof(*qp_attr)); 324 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 325 326 params.qp_handle = qp->qp_handle; 327 err = efa_com_query_qp(&dev->edev, ¶ms, &result); 328 if (err) 329 return err; 330 331 qp_attr->qp_state = result.qp_state; 332 qp_attr->qkey = result.qkey; 333 qp_attr->sq_psn = result.sq_psn; 334 qp_attr->sq_draining = result.sq_draining; 335 qp_attr->port_num = 1; 336 qp_attr->rnr_retry = result.rnr_retry; 337 338 qp_attr->cap.max_send_wr = qp->max_send_wr; 339 qp_attr->cap.max_recv_wr = qp->max_recv_wr; 340 qp_attr->cap.max_send_sge = qp->max_send_sge; 341 qp_attr->cap.max_recv_sge = qp->max_recv_sge; 342 qp_attr->cap.max_inline_data = qp->max_inline_data; 343 344 qp_init_attr->qp_type = ibqp->qp_type; 345 qp_init_attr->recv_cq = ibqp->recv_cq; 346 qp_init_attr->send_cq = ibqp->send_cq; 347 qp_init_attr->qp_context = ibqp->qp_context; 348 qp_init_attr->cap = qp_attr->cap; 349 350 return 0; 351 } 352 353 int efa_query_gid(struct ib_device *ibdev, u32 port, int index, 354 union ib_gid *gid) 355 { 356 struct efa_dev *dev = to_edev(ibdev); 357 358 memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr)); 359 360 return 0; 361 } 362 363 int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index, 364 u16 *pkey) 365 { 366 if (index > 0) 367 return -EINVAL; 368 369 *pkey = 0xffff; 370 return 0; 371 } 372 373 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn) 374 { 375 struct efa_com_dealloc_pd_params params = { 376 .pdn = pdn, 377 }; 378 379 return efa_com_dealloc_pd(&dev->edev, ¶ms); 380 } 381 382 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 383 { 384 struct efa_dev *dev = to_edev(ibpd->device); 385 struct efa_ibv_alloc_pd_resp resp = {}; 386 struct efa_com_alloc_pd_result result; 387 struct efa_pd *pd = to_epd(ibpd); 388 int err; 389 390 if (udata->inlen && 391 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 392 ibdev_dbg(&dev->ibdev, 393 "Incompatible ABI params, udata not cleared\n"); 394 err = -EINVAL; 395 goto err_out; 396 } 397 398 err = efa_com_alloc_pd(&dev->edev, &result); 399 if (err) 400 goto err_out; 401 402 pd->pdn = result.pdn; 403 resp.pdn = result.pdn; 404 405 if (udata->outlen) { 406 err = ib_copy_to_udata(udata, &resp, 407 min(sizeof(resp), udata->outlen)); 408 if (err) { 409 ibdev_dbg(&dev->ibdev, 410 "Failed to copy udata for alloc_pd\n"); 411 goto err_dealloc_pd; 412 } 413 } 414 415 ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn); 416 417 return 0; 418 419 err_dealloc_pd: 420 efa_pd_dealloc(dev, result.pdn); 421 err_out: 422 atomic64_inc(&dev->stats.alloc_pd_err); 423 return err; 424 } 425 426 int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 427 { 428 struct efa_dev *dev = to_edev(ibpd->device); 429 struct efa_pd *pd = to_epd(ibpd); 430 431 ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); 432 efa_pd_dealloc(dev, pd->pdn); 433 return 0; 434 } 435 436 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) 437 { 438 struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle }; 439 440 return efa_com_destroy_qp(&dev->edev, ¶ms); 441 } 442 443 static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp) 444 { 445 rdma_user_mmap_entry_remove(qp->rq_mmap_entry); 446 rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); 447 rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); 448 rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); 449 } 450 451 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) 452 { 453 struct efa_dev *dev = to_edev(ibqp->pd->device); 454 struct efa_qp *qp = to_eqp(ibqp); 455 int err; 456 457 ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); 458 459 err = efa_destroy_qp_handle(dev, qp->qp_handle); 460 if (err) 461 return err; 462 463 efa_qp_user_mmap_entries_remove(qp); 464 465 if (qp->rq_cpu_addr) { 466 ibdev_dbg(&dev->ibdev, 467 "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", 468 qp->rq_cpu_addr, qp->rq_size, 469 &qp->rq_dma_addr); 470 efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, 471 qp->rq_size, DMA_TO_DEVICE); 472 } 473 474 return 0; 475 } 476 477 static struct rdma_user_mmap_entry* 478 efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, 479 u64 address, size_t length, 480 u8 mmap_flag, u64 *offset) 481 { 482 struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); 483 int err; 484 485 if (!entry) 486 return NULL; 487 488 entry->address = address; 489 entry->mmap_flag = mmap_flag; 490 491 err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, 492 length); 493 if (err) { 494 kfree(entry); 495 return NULL; 496 } 497 *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); 498 499 return &entry->rdma_entry; 500 } 501 502 static int qp_mmap_entries_setup(struct efa_qp *qp, 503 struct efa_dev *dev, 504 struct efa_ucontext *ucontext, 505 struct efa_com_create_qp_params *params, 506 struct efa_ibv_create_qp_resp *resp) 507 { 508 size_t length; 509 u64 address; 510 511 address = dev->db_bar_addr + resp->sq_db_offset; 512 qp->sq_db_mmap_entry = 513 efa_user_mmap_entry_insert(&ucontext->ibucontext, 514 address, 515 PAGE_SIZE, EFA_MMAP_IO_NC, 516 &resp->sq_db_mmap_key); 517 if (!qp->sq_db_mmap_entry) 518 return -ENOMEM; 519 520 resp->sq_db_offset &= ~PAGE_MASK; 521 522 address = dev->mem_bar_addr + resp->llq_desc_offset; 523 length = PAGE_ALIGN(params->sq_ring_size_in_bytes + 524 (resp->llq_desc_offset & ~PAGE_MASK)); 525 526 qp->llq_desc_mmap_entry = 527 efa_user_mmap_entry_insert(&ucontext->ibucontext, 528 address, length, 529 EFA_MMAP_IO_WC, 530 &resp->llq_desc_mmap_key); 531 if (!qp->llq_desc_mmap_entry) 532 goto err_remove_mmap; 533 534 resp->llq_desc_offset &= ~PAGE_MASK; 535 536 if (qp->rq_size) { 537 address = dev->db_bar_addr + resp->rq_db_offset; 538 539 qp->rq_db_mmap_entry = 540 efa_user_mmap_entry_insert(&ucontext->ibucontext, 541 address, PAGE_SIZE, 542 EFA_MMAP_IO_NC, 543 &resp->rq_db_mmap_key); 544 if (!qp->rq_db_mmap_entry) 545 goto err_remove_mmap; 546 547 resp->rq_db_offset &= ~PAGE_MASK; 548 549 address = virt_to_phys(qp->rq_cpu_addr); 550 qp->rq_mmap_entry = 551 efa_user_mmap_entry_insert(&ucontext->ibucontext, 552 address, qp->rq_size, 553 EFA_MMAP_DMA_PAGE, 554 &resp->rq_mmap_key); 555 if (!qp->rq_mmap_entry) 556 goto err_remove_mmap; 557 558 resp->rq_mmap_size = qp->rq_size; 559 } 560 561 return 0; 562 563 err_remove_mmap: 564 efa_qp_user_mmap_entries_remove(qp); 565 566 return -ENOMEM; 567 } 568 569 static int efa_qp_validate_cap(struct efa_dev *dev, 570 struct ib_qp_init_attr *init_attr) 571 { 572 if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) { 573 ibdev_dbg(&dev->ibdev, 574 "qp: requested send wr[%u] exceeds the max[%u]\n", 575 init_attr->cap.max_send_wr, 576 dev->dev_attr.max_sq_depth); 577 return -EINVAL; 578 } 579 if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) { 580 ibdev_dbg(&dev->ibdev, 581 "qp: requested receive wr[%u] exceeds the max[%u]\n", 582 init_attr->cap.max_recv_wr, 583 dev->dev_attr.max_rq_depth); 584 return -EINVAL; 585 } 586 if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) { 587 ibdev_dbg(&dev->ibdev, 588 "qp: requested sge send[%u] exceeds the max[%u]\n", 589 init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge); 590 return -EINVAL; 591 } 592 if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) { 593 ibdev_dbg(&dev->ibdev, 594 "qp: requested sge recv[%u] exceeds the max[%u]\n", 595 init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge); 596 return -EINVAL; 597 } 598 if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) { 599 ibdev_dbg(&dev->ibdev, 600 "qp: requested inline data[%u] exceeds the max[%u]\n", 601 init_attr->cap.max_inline_data, 602 dev->dev_attr.inline_buf_size); 603 return -EINVAL; 604 } 605 606 return 0; 607 } 608 609 static int efa_qp_validate_attr(struct efa_dev *dev, 610 struct ib_qp_init_attr *init_attr) 611 { 612 if (init_attr->qp_type != IB_QPT_DRIVER && 613 init_attr->qp_type != IB_QPT_UD) { 614 ibdev_dbg(&dev->ibdev, 615 "Unsupported qp type %d\n", init_attr->qp_type); 616 return -EOPNOTSUPP; 617 } 618 619 if (init_attr->srq) { 620 ibdev_dbg(&dev->ibdev, "SRQ is not supported\n"); 621 return -EOPNOTSUPP; 622 } 623 624 if (init_attr->create_flags) { 625 ibdev_dbg(&dev->ibdev, "Unsupported create flags\n"); 626 return -EOPNOTSUPP; 627 } 628 629 return 0; 630 } 631 632 int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, 633 struct ib_udata *udata) 634 { 635 struct efa_com_create_qp_params create_qp_params = {}; 636 struct efa_com_create_qp_result create_qp_resp; 637 struct efa_dev *dev = to_edev(ibqp->device); 638 struct efa_ibv_create_qp_resp resp = {}; 639 struct efa_ibv_create_qp cmd = {}; 640 struct efa_qp *qp = to_eqp(ibqp); 641 struct efa_ucontext *ucontext; 642 int err; 643 644 ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, 645 ibucontext); 646 647 err = efa_qp_validate_cap(dev, init_attr); 648 if (err) 649 goto err_out; 650 651 err = efa_qp_validate_attr(dev, init_attr); 652 if (err) 653 goto err_out; 654 655 if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) { 656 ibdev_dbg(&dev->ibdev, 657 "Incompatible ABI params, no input udata\n"); 658 err = -EINVAL; 659 goto err_out; 660 } 661 662 if (udata->inlen > sizeof(cmd) && 663 !ib_is_udata_cleared(udata, sizeof(cmd), 664 udata->inlen - sizeof(cmd))) { 665 ibdev_dbg(&dev->ibdev, 666 "Incompatible ABI params, unknown fields in udata\n"); 667 err = -EINVAL; 668 goto err_out; 669 } 670 671 err = ib_copy_from_udata(&cmd, udata, 672 min(sizeof(cmd), udata->inlen)); 673 if (err) { 674 ibdev_dbg(&dev->ibdev, 675 "Cannot copy udata for create_qp\n"); 676 goto err_out; 677 } 678 679 if (cmd.comp_mask) { 680 ibdev_dbg(&dev->ibdev, 681 "Incompatible ABI params, unknown fields in udata\n"); 682 err = -EINVAL; 683 goto err_out; 684 } 685 686 create_qp_params.uarn = ucontext->uarn; 687 create_qp_params.pd = to_epd(ibqp->pd)->pdn; 688 689 if (init_attr->qp_type == IB_QPT_UD) { 690 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD; 691 } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) { 692 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD; 693 } else { 694 ibdev_dbg(&dev->ibdev, 695 "Unsupported qp type %d driver qp type %d\n", 696 init_attr->qp_type, cmd.driver_qp_type); 697 err = -EOPNOTSUPP; 698 goto err_out; 699 } 700 701 ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n", 702 init_attr->qp_type, cmd.driver_qp_type); 703 create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx; 704 create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx; 705 create_qp_params.sq_depth = init_attr->cap.max_send_wr; 706 create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size; 707 708 create_qp_params.rq_depth = init_attr->cap.max_recv_wr; 709 create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size; 710 qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes); 711 if (qp->rq_size) { 712 qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr, 713 qp->rq_size, DMA_TO_DEVICE); 714 if (!qp->rq_cpu_addr) { 715 err = -ENOMEM; 716 goto err_out; 717 } 718 719 ibdev_dbg(&dev->ibdev, 720 "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n", 721 qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); 722 create_qp_params.rq_base_addr = qp->rq_dma_addr; 723 } 724 725 err = efa_com_create_qp(&dev->edev, &create_qp_params, 726 &create_qp_resp); 727 if (err) 728 goto err_free_mapped; 729 730 resp.sq_db_offset = create_qp_resp.sq_db_offset; 731 resp.rq_db_offset = create_qp_resp.rq_db_offset; 732 resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset; 733 resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx; 734 resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx; 735 736 err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params, 737 &resp); 738 if (err) 739 goto err_destroy_qp; 740 741 qp->qp_handle = create_qp_resp.qp_handle; 742 qp->ibqp.qp_num = create_qp_resp.qp_num; 743 qp->max_send_wr = init_attr->cap.max_send_wr; 744 qp->max_recv_wr = init_attr->cap.max_recv_wr; 745 qp->max_send_sge = init_attr->cap.max_send_sge; 746 qp->max_recv_sge = init_attr->cap.max_recv_sge; 747 qp->max_inline_data = init_attr->cap.max_inline_data; 748 749 if (udata->outlen) { 750 err = ib_copy_to_udata(udata, &resp, 751 min(sizeof(resp), udata->outlen)); 752 if (err) { 753 ibdev_dbg(&dev->ibdev, 754 "Failed to copy udata for qp[%u]\n", 755 create_qp_resp.qp_num); 756 goto err_remove_mmap_entries; 757 } 758 } 759 760 ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num); 761 762 return 0; 763 764 err_remove_mmap_entries: 765 efa_qp_user_mmap_entries_remove(qp); 766 err_destroy_qp: 767 efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); 768 err_free_mapped: 769 if (qp->rq_size) 770 efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, 771 qp->rq_size, DMA_TO_DEVICE); 772 err_out: 773 atomic64_inc(&dev->stats.create_qp_err); 774 return err; 775 } 776 777 static const struct { 778 int valid; 779 enum ib_qp_attr_mask req_param; 780 enum ib_qp_attr_mask opt_param; 781 } srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 782 [IB_QPS_RESET] = { 783 [IB_QPS_RESET] = { .valid = 1 }, 784 [IB_QPS_INIT] = { 785 .valid = 1, 786 .req_param = IB_QP_PKEY_INDEX | 787 IB_QP_PORT | 788 IB_QP_QKEY, 789 }, 790 }, 791 [IB_QPS_INIT] = { 792 [IB_QPS_RESET] = { .valid = 1 }, 793 [IB_QPS_ERR] = { .valid = 1 }, 794 [IB_QPS_INIT] = { 795 .valid = 1, 796 .opt_param = IB_QP_PKEY_INDEX | 797 IB_QP_PORT | 798 IB_QP_QKEY, 799 }, 800 [IB_QPS_RTR] = { 801 .valid = 1, 802 .opt_param = IB_QP_PKEY_INDEX | 803 IB_QP_QKEY, 804 }, 805 }, 806 [IB_QPS_RTR] = { 807 [IB_QPS_RESET] = { .valid = 1 }, 808 [IB_QPS_ERR] = { .valid = 1 }, 809 [IB_QPS_RTS] = { 810 .valid = 1, 811 .req_param = IB_QP_SQ_PSN, 812 .opt_param = IB_QP_CUR_STATE | 813 IB_QP_QKEY | 814 IB_QP_RNR_RETRY, 815 816 } 817 }, 818 [IB_QPS_RTS] = { 819 [IB_QPS_RESET] = { .valid = 1 }, 820 [IB_QPS_ERR] = { .valid = 1 }, 821 [IB_QPS_RTS] = { 822 .valid = 1, 823 .opt_param = IB_QP_CUR_STATE | 824 IB_QP_QKEY, 825 }, 826 [IB_QPS_SQD] = { 827 .valid = 1, 828 .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY, 829 }, 830 }, 831 [IB_QPS_SQD] = { 832 [IB_QPS_RESET] = { .valid = 1 }, 833 [IB_QPS_ERR] = { .valid = 1 }, 834 [IB_QPS_RTS] = { 835 .valid = 1, 836 .opt_param = IB_QP_CUR_STATE | 837 IB_QP_QKEY, 838 }, 839 [IB_QPS_SQD] = { 840 .valid = 1, 841 .opt_param = IB_QP_PKEY_INDEX | 842 IB_QP_QKEY, 843 } 844 }, 845 [IB_QPS_SQE] = { 846 [IB_QPS_RESET] = { .valid = 1 }, 847 [IB_QPS_ERR] = { .valid = 1 }, 848 [IB_QPS_RTS] = { 849 .valid = 1, 850 .opt_param = IB_QP_CUR_STATE | 851 IB_QP_QKEY, 852 } 853 }, 854 [IB_QPS_ERR] = { 855 [IB_QPS_RESET] = { .valid = 1 }, 856 [IB_QPS_ERR] = { .valid = 1 }, 857 } 858 }; 859 860 static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state, 861 enum ib_qp_state next_state, 862 enum ib_qp_attr_mask mask) 863 { 864 enum ib_qp_attr_mask req_param, opt_param; 865 866 if (mask & IB_QP_CUR_STATE && 867 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && 868 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) 869 return false; 870 871 if (!srd_qp_state_table[cur_state][next_state].valid) 872 return false; 873 874 req_param = srd_qp_state_table[cur_state][next_state].req_param; 875 opt_param = srd_qp_state_table[cur_state][next_state].opt_param; 876 877 if ((mask & req_param) != req_param) 878 return false; 879 880 if (mask & ~(req_param | opt_param | IB_QP_STATE)) 881 return false; 882 883 return true; 884 } 885 886 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, 887 struct ib_qp_attr *qp_attr, int qp_attr_mask, 888 enum ib_qp_state cur_state, 889 enum ib_qp_state new_state) 890 { 891 int err; 892 893 #define EFA_MODIFY_QP_SUPP_MASK \ 894 (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ 895 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \ 896 IB_QP_RNR_RETRY) 897 898 if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { 899 ibdev_dbg(&dev->ibdev, 900 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 901 qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK); 902 return -EOPNOTSUPP; 903 } 904 905 if (qp->ibqp.qp_type == IB_QPT_DRIVER) 906 err = !efa_modify_srd_qp_is_ok(cur_state, new_state, 907 qp_attr_mask); 908 else 909 err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, 910 qp_attr_mask); 911 912 if (err) { 913 ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); 914 return -EINVAL; 915 } 916 917 if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) { 918 ibdev_dbg(&dev->ibdev, "Can't change port num\n"); 919 return -EOPNOTSUPP; 920 } 921 922 if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) { 923 ibdev_dbg(&dev->ibdev, "Can't change pkey index\n"); 924 return -EOPNOTSUPP; 925 } 926 927 return 0; 928 } 929 930 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 931 int qp_attr_mask, struct ib_udata *udata) 932 { 933 struct efa_dev *dev = to_edev(ibqp->device); 934 struct efa_com_modify_qp_params params = {}; 935 struct efa_qp *qp = to_eqp(ibqp); 936 enum ib_qp_state cur_state; 937 enum ib_qp_state new_state; 938 int err; 939 940 if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 941 return -EOPNOTSUPP; 942 943 if (udata->inlen && 944 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 945 ibdev_dbg(&dev->ibdev, 946 "Incompatible ABI params, udata not cleared\n"); 947 return -EINVAL; 948 } 949 950 cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state : 951 qp->state; 952 new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state; 953 954 err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state, 955 new_state); 956 if (err) 957 return err; 958 959 params.qp_handle = qp->qp_handle; 960 961 if (qp_attr_mask & IB_QP_STATE) { 962 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE, 963 1); 964 EFA_SET(¶ms.modify_mask, 965 EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1); 966 params.cur_qp_state = cur_state; 967 params.qp_state = new_state; 968 } 969 970 if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { 971 EFA_SET(¶ms.modify_mask, 972 EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1); 973 params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; 974 } 975 976 if (qp_attr_mask & IB_QP_QKEY) { 977 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1); 978 params.qkey = qp_attr->qkey; 979 } 980 981 if (qp_attr_mask & IB_QP_SQ_PSN) { 982 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1); 983 params.sq_psn = qp_attr->sq_psn; 984 } 985 986 if (qp_attr_mask & IB_QP_RNR_RETRY) { 987 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY, 988 1); 989 params.rnr_retry = qp_attr->rnr_retry; 990 } 991 992 err = efa_com_modify_qp(&dev->edev, ¶ms); 993 if (err) 994 return err; 995 996 qp->state = new_state; 997 998 return 0; 999 } 1000 1001 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) 1002 { 1003 struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx }; 1004 1005 return efa_com_destroy_cq(&dev->edev, ¶ms); 1006 } 1007 1008 static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) 1009 { 1010 rdma_user_mmap_entry_remove(cq->db_mmap_entry); 1011 rdma_user_mmap_entry_remove(cq->mmap_entry); 1012 } 1013 1014 int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) 1015 { 1016 struct efa_dev *dev = to_edev(ibcq->device); 1017 struct efa_cq *cq = to_ecq(ibcq); 1018 1019 ibdev_dbg(&dev->ibdev, 1020 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", 1021 cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); 1022 1023 efa_destroy_cq_idx(dev, cq->cq_idx); 1024 efa_cq_user_mmap_entries_remove(cq); 1025 if (cq->eq) { 1026 xa_erase(&dev->cqs_xa, cq->cq_idx); 1027 synchronize_irq(cq->eq->irq.irqn); 1028 } 1029 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 1030 DMA_FROM_DEVICE); 1031 return 0; 1032 } 1033 1034 static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) 1035 { 1036 return &dev->eqs[vec]; 1037 } 1038 1039 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, 1040 struct efa_ibv_create_cq_resp *resp, 1041 bool db_valid) 1042 { 1043 resp->q_mmap_size = cq->size; 1044 cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, 1045 virt_to_phys(cq->cpu_addr), 1046 cq->size, EFA_MMAP_DMA_PAGE, 1047 &resp->q_mmap_key); 1048 if (!cq->mmap_entry) 1049 return -ENOMEM; 1050 1051 if (db_valid) { 1052 cq->db_mmap_entry = 1053 efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, 1054 dev->db_bar_addr + resp->db_off, 1055 PAGE_SIZE, EFA_MMAP_IO_NC, 1056 &resp->db_mmap_key); 1057 if (!cq->db_mmap_entry) { 1058 rdma_user_mmap_entry_remove(cq->mmap_entry); 1059 return -ENOMEM; 1060 } 1061 1062 resp->db_off &= ~PAGE_MASK; 1063 resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; 1064 } 1065 1066 return 0; 1067 } 1068 1069 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 1070 struct ib_udata *udata) 1071 { 1072 struct efa_ucontext *ucontext = rdma_udata_to_drv_context( 1073 udata, struct efa_ucontext, ibucontext); 1074 struct efa_com_create_cq_params params = {}; 1075 struct efa_ibv_create_cq_resp resp = {}; 1076 struct efa_com_create_cq_result result; 1077 struct ib_device *ibdev = ibcq->device; 1078 struct efa_dev *dev = to_edev(ibdev); 1079 struct efa_ibv_create_cq cmd = {}; 1080 struct efa_cq *cq = to_ecq(ibcq); 1081 int entries = attr->cqe; 1082 bool set_src_addr; 1083 int err; 1084 1085 ibdev_dbg(ibdev, "create_cq entries %d\n", entries); 1086 1087 if (attr->flags) 1088 return -EOPNOTSUPP; 1089 1090 if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { 1091 ibdev_dbg(ibdev, 1092 "cq: requested entries[%u] non-positive or greater than max[%u]\n", 1093 entries, dev->dev_attr.max_cq_depth); 1094 err = -EINVAL; 1095 goto err_out; 1096 } 1097 1098 if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) { 1099 ibdev_dbg(ibdev, 1100 "Incompatible ABI params, no input udata\n"); 1101 err = -EINVAL; 1102 goto err_out; 1103 } 1104 1105 if (udata->inlen > sizeof(cmd) && 1106 !ib_is_udata_cleared(udata, sizeof(cmd), 1107 udata->inlen - sizeof(cmd))) { 1108 ibdev_dbg(ibdev, 1109 "Incompatible ABI params, unknown fields in udata\n"); 1110 err = -EINVAL; 1111 goto err_out; 1112 } 1113 1114 err = ib_copy_from_udata(&cmd, udata, 1115 min(sizeof(cmd), udata->inlen)); 1116 if (err) { 1117 ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n"); 1118 goto err_out; 1119 } 1120 1121 if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { 1122 ibdev_dbg(ibdev, 1123 "Incompatible ABI params, unknown fields in udata\n"); 1124 err = -EINVAL; 1125 goto err_out; 1126 } 1127 1128 set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID); 1129 if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) && 1130 (set_src_addr || 1131 cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) { 1132 ibdev_dbg(ibdev, 1133 "Invalid entry size [%u]\n", cmd.cq_entry_size); 1134 err = -EINVAL; 1135 goto err_out; 1136 } 1137 1138 if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) { 1139 ibdev_dbg(ibdev, 1140 "Invalid number of sub cqs[%u] expected[%u]\n", 1141 cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq); 1142 err = -EINVAL; 1143 goto err_out; 1144 } 1145 1146 cq->ucontext = ucontext; 1147 cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs); 1148 cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size, 1149 DMA_FROM_DEVICE); 1150 if (!cq->cpu_addr) { 1151 err = -ENOMEM; 1152 goto err_out; 1153 } 1154 1155 params.uarn = cq->ucontext->uarn; 1156 params.cq_depth = entries; 1157 params.dma_addr = cq->dma_addr; 1158 params.entry_size_in_bytes = cmd.cq_entry_size; 1159 params.num_sub_cqs = cmd.num_sub_cqs; 1160 params.set_src_addr = set_src_addr; 1161 if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { 1162 cq->eq = efa_vec2eq(dev, attr->comp_vector); 1163 params.eqn = cq->eq->eeq.eqn; 1164 params.interrupt_mode_enabled = true; 1165 } 1166 1167 err = efa_com_create_cq(&dev->edev, ¶ms, &result); 1168 if (err) 1169 goto err_free_mapped; 1170 1171 resp.db_off = result.db_off; 1172 resp.cq_idx = result.cq_idx; 1173 cq->cq_idx = result.cq_idx; 1174 cq->ibcq.cqe = result.actual_depth; 1175 WARN_ON_ONCE(entries != result.actual_depth); 1176 1177 err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); 1178 if (err) { 1179 ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", 1180 cq->cq_idx); 1181 goto err_destroy_cq; 1182 } 1183 1184 if (cq->eq) { 1185 err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); 1186 if (err) { 1187 ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", 1188 cq->cq_idx); 1189 goto err_remove_mmap; 1190 } 1191 } 1192 1193 if (udata->outlen) { 1194 err = ib_copy_to_udata(udata, &resp, 1195 min(sizeof(resp), udata->outlen)); 1196 if (err) { 1197 ibdev_dbg(ibdev, 1198 "Failed to copy udata for create_cq\n"); 1199 goto err_xa_erase; 1200 } 1201 } 1202 1203 ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", 1204 cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr); 1205 1206 return 0; 1207 1208 err_xa_erase: 1209 if (cq->eq) 1210 xa_erase(&dev->cqs_xa, cq->cq_idx); 1211 err_remove_mmap: 1212 efa_cq_user_mmap_entries_remove(cq); 1213 err_destroy_cq: 1214 efa_destroy_cq_idx(dev, cq->cq_idx); 1215 err_free_mapped: 1216 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 1217 DMA_FROM_DEVICE); 1218 1219 err_out: 1220 atomic64_inc(&dev->stats.create_cq_err); 1221 return err; 1222 } 1223 1224 static int umem_to_page_list(struct efa_dev *dev, 1225 struct ib_umem *umem, 1226 u64 *page_list, 1227 u32 hp_cnt, 1228 u8 hp_shift) 1229 { 1230 u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); 1231 struct ib_block_iter biter; 1232 unsigned int hp_idx = 0; 1233 1234 ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", 1235 hp_cnt, pages_in_hp); 1236 1237 rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift)) 1238 page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); 1239 1240 return 0; 1241 } 1242 1243 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) 1244 { 1245 struct scatterlist *sglist; 1246 struct page *pg; 1247 int i; 1248 1249 sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL); 1250 if (!sglist) 1251 return NULL; 1252 sg_init_table(sglist, page_cnt); 1253 for (i = 0; i < page_cnt; i++) { 1254 pg = vmalloc_to_page(buf); 1255 if (!pg) 1256 goto err; 1257 sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); 1258 buf += PAGE_SIZE / sizeof(*buf); 1259 } 1260 return sglist; 1261 1262 err: 1263 kfree(sglist); 1264 return NULL; 1265 } 1266 1267 /* 1268 * create a chunk list of physical pages dma addresses from the supplied 1269 * scatter gather list 1270 */ 1271 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) 1272 { 1273 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1274 int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages; 1275 struct scatterlist *pages_sgl = pbl->phys.indirect.sgl; 1276 unsigned int chunk_list_size, chunk_idx, payload_idx; 1277 int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt; 1278 struct efa_com_ctrl_buff_info *ctrl_buf; 1279 u64 *cur_chunk_buf, *prev_chunk_buf; 1280 struct ib_block_iter biter; 1281 dma_addr_t dma_addr; 1282 int i; 1283 1284 /* allocate a chunk list that consists of 4KB chunks */ 1285 chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK); 1286 1287 chunk_list->size = chunk_list_size; 1288 chunk_list->chunks = kcalloc(chunk_list_size, 1289 sizeof(*chunk_list->chunks), 1290 GFP_KERNEL); 1291 if (!chunk_list->chunks) 1292 return -ENOMEM; 1293 1294 ibdev_dbg(&dev->ibdev, 1295 "chunk_list_size[%u] - pages[%u]\n", chunk_list_size, 1296 page_cnt); 1297 1298 /* allocate chunk buffers: */ 1299 for (i = 0; i < chunk_list_size; i++) { 1300 chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL); 1301 if (!chunk_list->chunks[i].buf) 1302 goto chunk_list_dealloc; 1303 1304 chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE; 1305 } 1306 chunk_list->chunks[chunk_list_size - 1].length = 1307 ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) + 1308 EFA_CHUNK_PTR_SIZE; 1309 1310 /* fill the dma addresses of sg list pages to chunks: */ 1311 chunk_idx = 0; 1312 payload_idx = 0; 1313 cur_chunk_buf = chunk_list->chunks[0].buf; 1314 rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt, 1315 EFA_CHUNK_PAYLOAD_SIZE) { 1316 cur_chunk_buf[payload_idx++] = 1317 rdma_block_iter_dma_address(&biter); 1318 1319 if (payload_idx == EFA_PTRS_PER_CHUNK) { 1320 chunk_idx++; 1321 cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; 1322 payload_idx = 0; 1323 } 1324 } 1325 1326 /* map chunks to dma and fill chunks next ptrs */ 1327 for (i = chunk_list_size - 1; i >= 0; i--) { 1328 dma_addr = dma_map_single(&dev->pdev->dev, 1329 chunk_list->chunks[i].buf, 1330 chunk_list->chunks[i].length, 1331 DMA_TO_DEVICE); 1332 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1333 ibdev_err(&dev->ibdev, 1334 "chunk[%u] dma_map_failed\n", i); 1335 goto chunk_list_unmap; 1336 } 1337 1338 chunk_list->chunks[i].dma_addr = dma_addr; 1339 ibdev_dbg(&dev->ibdev, 1340 "chunk[%u] mapped at [%pad]\n", i, &dma_addr); 1341 1342 if (!i) 1343 break; 1344 1345 prev_chunk_buf = chunk_list->chunks[i - 1].buf; 1346 1347 ctrl_buf = (struct efa_com_ctrl_buff_info *) 1348 &prev_chunk_buf[EFA_PTRS_PER_CHUNK]; 1349 ctrl_buf->length = chunk_list->chunks[i].length; 1350 1351 efa_com_set_dma_addr(dma_addr, 1352 &ctrl_buf->address.mem_addr_high, 1353 &ctrl_buf->address.mem_addr_low); 1354 } 1355 1356 return 0; 1357 1358 chunk_list_unmap: 1359 for (; i < chunk_list_size; i++) { 1360 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1361 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1362 } 1363 chunk_list_dealloc: 1364 for (i = 0; i < chunk_list_size; i++) 1365 kfree(chunk_list->chunks[i].buf); 1366 1367 kfree(chunk_list->chunks); 1368 return -ENOMEM; 1369 } 1370 1371 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1372 { 1373 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1374 int i; 1375 1376 for (i = 0; i < chunk_list->size; i++) { 1377 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1378 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1379 kfree(chunk_list->chunks[i].buf); 1380 } 1381 1382 kfree(chunk_list->chunks); 1383 } 1384 1385 /* initialize pbl continuous mode: map pbl buffer to a dma address. */ 1386 static int pbl_continuous_initialize(struct efa_dev *dev, 1387 struct pbl_context *pbl) 1388 { 1389 dma_addr_t dma_addr; 1390 1391 dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf, 1392 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1393 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1394 ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n"); 1395 return -ENOMEM; 1396 } 1397 1398 pbl->phys.continuous.dma_addr = dma_addr; 1399 ibdev_dbg(&dev->ibdev, 1400 "pbl continuous - dma_addr = %pad, size[%u]\n", 1401 &dma_addr, pbl->pbl_buf_size_in_bytes); 1402 1403 return 0; 1404 } 1405 1406 /* 1407 * initialize pbl indirect mode: 1408 * create a chunk list out of the dma addresses of the physical pages of 1409 * pbl buffer. 1410 */ 1411 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl) 1412 { 1413 u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE); 1414 struct scatterlist *sgl; 1415 int sg_dma_cnt, err; 1416 1417 BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE); 1418 sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages); 1419 if (!sgl) 1420 return -ENOMEM; 1421 1422 sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1423 if (!sg_dma_cnt) { 1424 err = -EINVAL; 1425 goto err_map; 1426 } 1427 1428 pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages; 1429 pbl->phys.indirect.sgl = sgl; 1430 pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt; 1431 err = pbl_chunk_list_create(dev, pbl); 1432 if (err) { 1433 ibdev_dbg(&dev->ibdev, 1434 "chunk_list creation failed[%d]\n", err); 1435 goto err_chunk; 1436 } 1437 1438 ibdev_dbg(&dev->ibdev, 1439 "pbl indirect - size[%u], chunks[%u]\n", 1440 pbl->pbl_buf_size_in_bytes, 1441 pbl->phys.indirect.chunk_list.size); 1442 1443 return 0; 1444 1445 err_chunk: 1446 dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1447 err_map: 1448 kfree(sgl); 1449 return err; 1450 } 1451 1452 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl) 1453 { 1454 pbl_chunk_list_destroy(dev, pbl); 1455 dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl, 1456 pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE); 1457 kfree(pbl->phys.indirect.sgl); 1458 } 1459 1460 /* create a page buffer list from a mapped user memory region */ 1461 static int pbl_create(struct efa_dev *dev, 1462 struct pbl_context *pbl, 1463 struct ib_umem *umem, 1464 int hp_cnt, 1465 u8 hp_shift) 1466 { 1467 int err; 1468 1469 pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE; 1470 pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL); 1471 if (!pbl->pbl_buf) 1472 return -ENOMEM; 1473 1474 if (is_vmalloc_addr(pbl->pbl_buf)) { 1475 pbl->physically_continuous = 0; 1476 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1477 hp_shift); 1478 if (err) 1479 goto err_free; 1480 1481 err = pbl_indirect_initialize(dev, pbl); 1482 if (err) 1483 goto err_free; 1484 } else { 1485 pbl->physically_continuous = 1; 1486 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1487 hp_shift); 1488 if (err) 1489 goto err_free; 1490 1491 err = pbl_continuous_initialize(dev, pbl); 1492 if (err) 1493 goto err_free; 1494 } 1495 1496 ibdev_dbg(&dev->ibdev, 1497 "user_pbl_created: user_pages[%u], continuous[%u]\n", 1498 hp_cnt, pbl->physically_continuous); 1499 1500 return 0; 1501 1502 err_free: 1503 kvfree(pbl->pbl_buf); 1504 return err; 1505 } 1506 1507 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1508 { 1509 if (pbl->physically_continuous) 1510 dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr, 1511 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1512 else 1513 pbl_indirect_terminate(dev, pbl); 1514 1515 kvfree(pbl->pbl_buf); 1516 } 1517 1518 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, 1519 struct efa_com_reg_mr_params *params) 1520 { 1521 int err; 1522 1523 params->inline_pbl = 1; 1524 err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, 1525 params->page_num, params->page_shift); 1526 if (err) 1527 return err; 1528 1529 ibdev_dbg(&dev->ibdev, 1530 "inline_pbl_array - pages[%u]\n", params->page_num); 1531 1532 return 0; 1533 } 1534 1535 static int efa_create_pbl(struct efa_dev *dev, 1536 struct pbl_context *pbl, 1537 struct efa_mr *mr, 1538 struct efa_com_reg_mr_params *params) 1539 { 1540 int err; 1541 1542 err = pbl_create(dev, pbl, mr->umem, params->page_num, 1543 params->page_shift); 1544 if (err) { 1545 ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err); 1546 return err; 1547 } 1548 1549 params->inline_pbl = 0; 1550 params->indirect = !pbl->physically_continuous; 1551 if (pbl->physically_continuous) { 1552 params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes; 1553 1554 efa_com_set_dma_addr(pbl->phys.continuous.dma_addr, 1555 ¶ms->pbl.pbl.address.mem_addr_high, 1556 ¶ms->pbl.pbl.address.mem_addr_low); 1557 } else { 1558 params->pbl.pbl.length = 1559 pbl->phys.indirect.chunk_list.chunks[0].length; 1560 1561 efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr, 1562 ¶ms->pbl.pbl.address.mem_addr_high, 1563 ¶ms->pbl.pbl.address.mem_addr_low); 1564 } 1565 1566 return 0; 1567 } 1568 1569 static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, 1570 struct ib_udata *udata) 1571 { 1572 struct efa_dev *dev = to_edev(ibpd->device); 1573 int supp_access_flags; 1574 struct efa_mr *mr; 1575 1576 if (udata && udata->inlen && 1577 !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { 1578 ibdev_dbg(&dev->ibdev, 1579 "Incompatible ABI params, udata not cleared\n"); 1580 return ERR_PTR(-EINVAL); 1581 } 1582 1583 supp_access_flags = 1584 IB_ACCESS_LOCAL_WRITE | 1585 (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0) | 1586 (EFA_DEV_CAP(dev, RDMA_WRITE) ? IB_ACCESS_REMOTE_WRITE : 0); 1587 1588 access_flags &= ~IB_ACCESS_OPTIONAL; 1589 if (access_flags & ~supp_access_flags) { 1590 ibdev_dbg(&dev->ibdev, 1591 "Unsupported access flags[%#x], supported[%#x]\n", 1592 access_flags, supp_access_flags); 1593 return ERR_PTR(-EOPNOTSUPP); 1594 } 1595 1596 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1597 if (!mr) 1598 return ERR_PTR(-ENOMEM); 1599 1600 return mr; 1601 } 1602 1603 static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, 1604 u64 length, u64 virt_addr, int access_flags) 1605 { 1606 struct efa_dev *dev = to_edev(ibpd->device); 1607 struct efa_com_reg_mr_params params = {}; 1608 struct efa_com_reg_mr_result result = {}; 1609 struct pbl_context pbl; 1610 unsigned int pg_sz; 1611 int inline_size; 1612 int err; 1613 1614 params.pd = to_epd(ibpd)->pdn; 1615 params.iova = virt_addr; 1616 params.mr_length_in_bytes = length; 1617 params.permissions = access_flags; 1618 1619 pg_sz = ib_umem_find_best_pgsz(mr->umem, 1620 dev->dev_attr.page_size_cap, 1621 virt_addr); 1622 if (!pg_sz) { 1623 ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", 1624 dev->dev_attr.page_size_cap); 1625 return -EOPNOTSUPP; 1626 } 1627 1628 params.page_shift = order_base_2(pg_sz); 1629 params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); 1630 1631 ibdev_dbg(&dev->ibdev, 1632 "start %#llx length %#llx params.page_shift %u params.page_num %u\n", 1633 start, length, params.page_shift, params.page_num); 1634 1635 inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); 1636 if (params.page_num <= inline_size) { 1637 err = efa_create_inline_pbl(dev, mr, ¶ms); 1638 if (err) 1639 return err; 1640 1641 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1642 if (err) 1643 return err; 1644 } else { 1645 err = efa_create_pbl(dev, &pbl, mr, ¶ms); 1646 if (err) 1647 return err; 1648 1649 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1650 pbl_destroy(dev, &pbl); 1651 1652 if (err) 1653 return err; 1654 } 1655 1656 mr->ibmr.lkey = result.l_key; 1657 mr->ibmr.rkey = result.r_key; 1658 mr->ibmr.length = length; 1659 mr->ic_info.recv_ic_id = result.ic_info.recv_ic_id; 1660 mr->ic_info.rdma_read_ic_id = result.ic_info.rdma_read_ic_id; 1661 mr->ic_info.rdma_recv_ic_id = result.ic_info.rdma_recv_ic_id; 1662 mr->ic_info.recv_ic_id_valid = result.ic_info.recv_ic_id_valid; 1663 mr->ic_info.rdma_read_ic_id_valid = result.ic_info.rdma_read_ic_id_valid; 1664 mr->ic_info.rdma_recv_ic_id_valid = result.ic_info.rdma_recv_ic_id_valid; 1665 ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); 1666 1667 return 0; 1668 } 1669 1670 struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, 1671 u64 length, u64 virt_addr, 1672 int fd, int access_flags, 1673 struct ib_udata *udata) 1674 { 1675 struct efa_dev *dev = to_edev(ibpd->device); 1676 struct ib_umem_dmabuf *umem_dmabuf; 1677 struct efa_mr *mr; 1678 int err; 1679 1680 mr = efa_alloc_mr(ibpd, access_flags, udata); 1681 if (IS_ERR(mr)) { 1682 err = PTR_ERR(mr); 1683 goto err_out; 1684 } 1685 1686 umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd, 1687 access_flags); 1688 if (IS_ERR(umem_dmabuf)) { 1689 err = PTR_ERR(umem_dmabuf); 1690 ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); 1691 goto err_free; 1692 } 1693 1694 mr->umem = &umem_dmabuf->umem; 1695 err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); 1696 if (err) 1697 goto err_release; 1698 1699 return &mr->ibmr; 1700 1701 err_release: 1702 ib_umem_release(mr->umem); 1703 err_free: 1704 kfree(mr); 1705 err_out: 1706 atomic64_inc(&dev->stats.reg_mr_err); 1707 return ERR_PTR(err); 1708 } 1709 1710 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, 1711 u64 virt_addr, int access_flags, 1712 struct ib_udata *udata) 1713 { 1714 struct efa_dev *dev = to_edev(ibpd->device); 1715 struct efa_mr *mr; 1716 int err; 1717 1718 mr = efa_alloc_mr(ibpd, access_flags, udata); 1719 if (IS_ERR(mr)) { 1720 err = PTR_ERR(mr); 1721 goto err_out; 1722 } 1723 1724 mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); 1725 if (IS_ERR(mr->umem)) { 1726 err = PTR_ERR(mr->umem); 1727 ibdev_dbg(&dev->ibdev, 1728 "Failed to pin and map user space memory[%d]\n", err); 1729 goto err_free; 1730 } 1731 1732 err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); 1733 if (err) 1734 goto err_release; 1735 1736 return &mr->ibmr; 1737 1738 err_release: 1739 ib_umem_release(mr->umem); 1740 err_free: 1741 kfree(mr); 1742 err_out: 1743 atomic64_inc(&dev->stats.reg_mr_err); 1744 return ERR_PTR(err); 1745 } 1746 1747 static int UVERBS_HANDLER(EFA_IB_METHOD_MR_QUERY)(struct uverbs_attr_bundle *attrs) 1748 { 1749 struct ib_mr *ibmr = uverbs_attr_get_obj(attrs, EFA_IB_ATTR_QUERY_MR_HANDLE); 1750 struct efa_mr *mr = to_emr(ibmr); 1751 u16 ic_id_validity = 0; 1752 int ret; 1753 1754 ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, 1755 &mr->ic_info.recv_ic_id, sizeof(mr->ic_info.recv_ic_id)); 1756 if (ret) 1757 return ret; 1758 1759 ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, 1760 &mr->ic_info.rdma_read_ic_id, sizeof(mr->ic_info.rdma_read_ic_id)); 1761 if (ret) 1762 return ret; 1763 1764 ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, 1765 &mr->ic_info.rdma_recv_ic_id, sizeof(mr->ic_info.rdma_recv_ic_id)); 1766 if (ret) 1767 return ret; 1768 1769 if (mr->ic_info.recv_ic_id_valid) 1770 ic_id_validity |= EFA_QUERY_MR_VALIDITY_RECV_IC_ID; 1771 if (mr->ic_info.rdma_read_ic_id_valid) 1772 ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID; 1773 if (mr->ic_info.rdma_recv_ic_id_valid) 1774 ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID; 1775 1776 return uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, 1777 &ic_id_validity, sizeof(ic_id_validity)); 1778 } 1779 1780 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 1781 { 1782 struct efa_dev *dev = to_edev(ibmr->device); 1783 struct efa_com_dereg_mr_params params; 1784 struct efa_mr *mr = to_emr(ibmr); 1785 int err; 1786 1787 ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); 1788 1789 params.l_key = mr->ibmr.lkey; 1790 err = efa_com_dereg_mr(&dev->edev, ¶ms); 1791 if (err) 1792 return err; 1793 1794 ib_umem_release(mr->umem); 1795 kfree(mr); 1796 1797 return 0; 1798 } 1799 1800 int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, 1801 struct ib_port_immutable *immutable) 1802 { 1803 struct ib_port_attr attr; 1804 int err; 1805 1806 err = ib_query_port(ibdev, port_num, &attr); 1807 if (err) { 1808 ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err); 1809 return err; 1810 } 1811 1812 immutable->pkey_tbl_len = attr.pkey_tbl_len; 1813 immutable->gid_tbl_len = attr.gid_tbl_len; 1814 1815 return 0; 1816 } 1817 1818 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) 1819 { 1820 struct efa_com_dealloc_uar_params params = { 1821 .uarn = uarn, 1822 }; 1823 1824 return efa_com_dealloc_uar(&dev->edev, ¶ms); 1825 } 1826 1827 #define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \ 1828 (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \ 1829 NULL : #_attr) 1830 1831 static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext, 1832 const struct efa_ibv_alloc_ucontext_cmd *cmd) 1833 { 1834 struct efa_dev *dev = to_edev(ibucontext->device); 1835 char *attr_str; 1836 1837 if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch, 1838 EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str)) 1839 goto err; 1840 1841 if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth, 1842 EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR, 1843 attr_str)) 1844 goto err; 1845 1846 return 0; 1847 1848 err: 1849 ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n", 1850 attr_str); 1851 return -EOPNOTSUPP; 1852 } 1853 1854 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) 1855 { 1856 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1857 struct efa_dev *dev = to_edev(ibucontext->device); 1858 struct efa_ibv_alloc_ucontext_resp resp = {}; 1859 struct efa_ibv_alloc_ucontext_cmd cmd = {}; 1860 struct efa_com_alloc_uar_result result; 1861 int err; 1862 1863 /* 1864 * it's fine if the driver does not know all request fields, 1865 * we will ack input fields in our response. 1866 */ 1867 1868 err = ib_copy_from_udata(&cmd, udata, 1869 min(sizeof(cmd), udata->inlen)); 1870 if (err) { 1871 ibdev_dbg(&dev->ibdev, 1872 "Cannot copy udata for alloc_ucontext\n"); 1873 goto err_out; 1874 } 1875 1876 err = efa_user_comp_handshake(ibucontext, &cmd); 1877 if (err) 1878 goto err_out; 1879 1880 err = efa_com_alloc_uar(&dev->edev, &result); 1881 if (err) 1882 goto err_out; 1883 1884 ucontext->uarn = result.uarn; 1885 1886 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; 1887 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; 1888 resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; 1889 resp.inline_buf_size = dev->dev_attr.inline_buf_size; 1890 resp.max_llq_size = dev->dev_attr.max_llq_size; 1891 resp.max_tx_batch = dev->dev_attr.max_tx_batch; 1892 resp.min_sq_wr = dev->dev_attr.min_sq_depth; 1893 1894 err = ib_copy_to_udata(udata, &resp, 1895 min(sizeof(resp), udata->outlen)); 1896 if (err) 1897 goto err_dealloc_uar; 1898 1899 return 0; 1900 1901 err_dealloc_uar: 1902 efa_dealloc_uar(dev, result.uarn); 1903 err_out: 1904 atomic64_inc(&dev->stats.alloc_ucontext_err); 1905 return err; 1906 } 1907 1908 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) 1909 { 1910 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1911 struct efa_dev *dev = to_edev(ibucontext->device); 1912 1913 efa_dealloc_uar(dev, ucontext->uarn); 1914 } 1915 1916 void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) 1917 { 1918 struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); 1919 1920 kfree(entry); 1921 } 1922 1923 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, 1924 struct vm_area_struct *vma) 1925 { 1926 struct rdma_user_mmap_entry *rdma_entry; 1927 struct efa_user_mmap_entry *entry; 1928 unsigned long va; 1929 int err = 0; 1930 u64 pfn; 1931 1932 rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); 1933 if (!rdma_entry) { 1934 ibdev_dbg(&dev->ibdev, 1935 "pgoff[%#lx] does not have valid entry\n", 1936 vma->vm_pgoff); 1937 atomic64_inc(&dev->stats.mmap_err); 1938 return -EINVAL; 1939 } 1940 entry = to_emmap(rdma_entry); 1941 1942 ibdev_dbg(&dev->ibdev, 1943 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", 1944 entry->address, rdma_entry->npages * PAGE_SIZE, 1945 entry->mmap_flag); 1946 1947 pfn = entry->address >> PAGE_SHIFT; 1948 switch (entry->mmap_flag) { 1949 case EFA_MMAP_IO_NC: 1950 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, 1951 entry->rdma_entry.npages * PAGE_SIZE, 1952 pgprot_noncached(vma->vm_page_prot), 1953 rdma_entry); 1954 break; 1955 case EFA_MMAP_IO_WC: 1956 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, 1957 entry->rdma_entry.npages * PAGE_SIZE, 1958 pgprot_writecombine(vma->vm_page_prot), 1959 rdma_entry); 1960 break; 1961 case EFA_MMAP_DMA_PAGE: 1962 for (va = vma->vm_start; va < vma->vm_end; 1963 va += PAGE_SIZE, pfn++) { 1964 err = vm_insert_page(vma, va, pfn_to_page(pfn)); 1965 if (err) 1966 break; 1967 } 1968 break; 1969 default: 1970 err = -EINVAL; 1971 } 1972 1973 if (err) { 1974 ibdev_dbg( 1975 &dev->ibdev, 1976 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", 1977 entry->address, rdma_entry->npages * PAGE_SIZE, 1978 entry->mmap_flag, err); 1979 atomic64_inc(&dev->stats.mmap_err); 1980 } 1981 1982 rdma_user_mmap_entry_put(rdma_entry); 1983 return err; 1984 } 1985 1986 int efa_mmap(struct ib_ucontext *ibucontext, 1987 struct vm_area_struct *vma) 1988 { 1989 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1990 struct efa_dev *dev = to_edev(ibucontext->device); 1991 size_t length = vma->vm_end - vma->vm_start; 1992 1993 ibdev_dbg(&dev->ibdev, 1994 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", 1995 vma->vm_start, vma->vm_end, length, vma->vm_pgoff); 1996 1997 return __efa_mmap(dev, ucontext, vma); 1998 } 1999 2000 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) 2001 { 2002 struct efa_com_destroy_ah_params params = { 2003 .ah = ah->ah, 2004 .pdn = to_epd(ah->ibah.pd)->pdn, 2005 }; 2006 2007 return efa_com_destroy_ah(&dev->edev, ¶ms); 2008 } 2009 2010 int efa_create_ah(struct ib_ah *ibah, 2011 struct rdma_ah_init_attr *init_attr, 2012 struct ib_udata *udata) 2013 { 2014 struct rdma_ah_attr *ah_attr = init_attr->ah_attr; 2015 struct efa_dev *dev = to_edev(ibah->device); 2016 struct efa_com_create_ah_params params = {}; 2017 struct efa_ibv_create_ah_resp resp = {}; 2018 struct efa_com_create_ah_result result; 2019 struct efa_ah *ah = to_eah(ibah); 2020 int err; 2021 2022 if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { 2023 ibdev_dbg(&dev->ibdev, 2024 "Create address handle is not supported in atomic context\n"); 2025 err = -EOPNOTSUPP; 2026 goto err_out; 2027 } 2028 2029 if (udata->inlen && 2030 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 2031 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 2032 err = -EINVAL; 2033 goto err_out; 2034 } 2035 2036 memcpy(params.dest_addr, ah_attr->grh.dgid.raw, 2037 sizeof(params.dest_addr)); 2038 params.pdn = to_epd(ibah->pd)->pdn; 2039 err = efa_com_create_ah(&dev->edev, ¶ms, &result); 2040 if (err) 2041 goto err_out; 2042 2043 memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id)); 2044 ah->ah = result.ah; 2045 2046 resp.efa_address_handle = result.ah; 2047 2048 if (udata->outlen) { 2049 err = ib_copy_to_udata(udata, &resp, 2050 min(sizeof(resp), udata->outlen)); 2051 if (err) { 2052 ibdev_dbg(&dev->ibdev, 2053 "Failed to copy udata for create_ah response\n"); 2054 goto err_destroy_ah; 2055 } 2056 } 2057 ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah); 2058 2059 return 0; 2060 2061 err_destroy_ah: 2062 efa_ah_destroy(dev, ah); 2063 err_out: 2064 atomic64_inc(&dev->stats.create_ah_err); 2065 return err; 2066 } 2067 2068 int efa_destroy_ah(struct ib_ah *ibah, u32 flags) 2069 { 2070 struct efa_dev *dev = to_edev(ibah->pd->device); 2071 struct efa_ah *ah = to_eah(ibah); 2072 2073 ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah); 2074 2075 if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { 2076 ibdev_dbg(&dev->ibdev, 2077 "Destroy address handle is not supported in atomic context\n"); 2078 return -EOPNOTSUPP; 2079 } 2080 2081 efa_ah_destroy(dev, ah); 2082 return 0; 2083 } 2084 2085 struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, 2086 u32 port_num) 2087 { 2088 return rdma_alloc_hw_stats_struct(efa_port_stats_descs, 2089 ARRAY_SIZE(efa_port_stats_descs), 2090 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2091 } 2092 2093 struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev) 2094 { 2095 return rdma_alloc_hw_stats_struct(efa_device_stats_descs, 2096 ARRAY_SIZE(efa_device_stats_descs), 2097 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2098 } 2099 2100 static int efa_fill_device_stats(struct efa_dev *dev, 2101 struct rdma_hw_stats *stats) 2102 { 2103 struct efa_com_stats_admin *as = &dev->edev.aq.stats; 2104 struct efa_stats *s = &dev->stats; 2105 2106 stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); 2107 stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); 2108 stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err); 2109 stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); 2110 2111 stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); 2112 stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err); 2113 stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err); 2114 stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err); 2115 stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err); 2116 stats->value[EFA_ALLOC_UCONTEXT_ERR] = 2117 atomic64_read(&s->alloc_ucontext_err); 2118 stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); 2119 stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); 2120 2121 return ARRAY_SIZE(efa_device_stats_descs); 2122 } 2123 2124 static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, 2125 u32 port_num) 2126 { 2127 struct efa_com_get_stats_params params = {}; 2128 union efa_com_get_stats_result result; 2129 struct efa_com_rdma_write_stats *rws; 2130 struct efa_com_rdma_read_stats *rrs; 2131 struct efa_com_messages_stats *ms; 2132 struct efa_com_basic_stats *bs; 2133 int err; 2134 2135 params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; 2136 params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; 2137 2138 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2139 if (err) 2140 return err; 2141 2142 bs = &result.basic_stats; 2143 stats->value[EFA_TX_BYTES] = bs->tx_bytes; 2144 stats->value[EFA_TX_PKTS] = bs->tx_pkts; 2145 stats->value[EFA_RX_BYTES] = bs->rx_bytes; 2146 stats->value[EFA_RX_PKTS] = bs->rx_pkts; 2147 stats->value[EFA_RX_DROPS] = bs->rx_drops; 2148 2149 params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES; 2150 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2151 if (err) 2152 return err; 2153 2154 ms = &result.messages_stats; 2155 stats->value[EFA_SEND_BYTES] = ms->send_bytes; 2156 stats->value[EFA_SEND_WRS] = ms->send_wrs; 2157 stats->value[EFA_RECV_BYTES] = ms->recv_bytes; 2158 stats->value[EFA_RECV_WRS] = ms->recv_wrs; 2159 2160 params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ; 2161 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2162 if (err) 2163 return err; 2164 2165 rrs = &result.rdma_read_stats; 2166 stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs; 2167 stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes; 2168 stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; 2169 stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; 2170 2171 if (EFA_DEV_CAP(dev, RDMA_WRITE)) { 2172 params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE; 2173 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2174 if (err) 2175 return err; 2176 2177 rws = &result.rdma_write_stats; 2178 stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs; 2179 stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes; 2180 stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err; 2181 stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes; 2182 } 2183 2184 return ARRAY_SIZE(efa_port_stats_descs); 2185 } 2186 2187 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, 2188 u32 port_num, int index) 2189 { 2190 if (port_num) 2191 return efa_fill_port_stats(to_edev(ibdev), stats, port_num); 2192 else 2193 return efa_fill_device_stats(to_edev(ibdev), stats); 2194 } 2195 2196 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, 2197 u32 port_num) 2198 { 2199 return IB_LINK_LAYER_UNSPECIFIED; 2200 } 2201 2202 DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY, 2203 UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE, 2204 UVERBS_OBJECT_MR, 2205 UVERBS_ACCESS_READ, 2206 UA_MANDATORY), 2207 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, 2208 UVERBS_ATTR_TYPE(u16), 2209 UA_MANDATORY), 2210 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, 2211 UVERBS_ATTR_TYPE(u16), 2212 UA_MANDATORY), 2213 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, 2214 UVERBS_ATTR_TYPE(u16), 2215 UA_MANDATORY), 2216 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, 2217 UVERBS_ATTR_TYPE(u16), 2218 UA_MANDATORY)); 2219 2220 ADD_UVERBS_METHODS(efa_mr, 2221 UVERBS_OBJECT_MR, 2222 &UVERBS_METHOD(EFA_IB_METHOD_MR_QUERY)); 2223 2224 const struct uapi_definition efa_uapi_defs[] = { 2225 UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, 2226 &efa_mr), 2227 {}, 2228 }; 2229