1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. 4 */ 5 6 #include <linux/dma-buf.h> 7 #include <linux/dma-resv.h> 8 #include <linux/vmalloc.h> 9 #include <linux/log2.h> 10 11 #include <rdma/ib_addr.h> 12 #include <rdma/ib_umem.h> 13 #include <rdma/ib_user_verbs.h> 14 #include <rdma/ib_verbs.h> 15 #include <rdma/uverbs_ioctl.h> 16 #define UVERBS_MODULE_NAME efa_ib 17 #include <rdma/uverbs_named_ioctl.h> 18 #include <rdma/ib_user_ioctl_cmds.h> 19 20 #include "efa.h" 21 #include "efa_io_defs.h" 22 23 enum { 24 EFA_MMAP_DMA_PAGE = 0, 25 EFA_MMAP_IO_WC, 26 EFA_MMAP_IO_NC, 27 }; 28 29 #define EFA_AENQ_ENABLED_GROUPS \ 30 (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ 31 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) 32 33 struct efa_user_mmap_entry { 34 struct rdma_user_mmap_entry rdma_entry; 35 u64 address; 36 u8 mmap_flag; 37 }; 38 39 #define EFA_DEFINE_DEVICE_STATS(op) \ 40 op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ 41 op(EFA_COMPLETED_CMDS, "completed_cmds") \ 42 op(EFA_CMDS_ERR, "cmds_err") \ 43 op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \ 44 op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ 45 op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ 46 op(EFA_CREATE_QP_ERR, "create_qp_err") \ 47 op(EFA_CREATE_CQ_ERR, "create_cq_err") \ 48 op(EFA_REG_MR_ERR, "reg_mr_err") \ 49 op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ 50 op(EFA_CREATE_AH_ERR, "create_ah_err") \ 51 op(EFA_MMAP_ERR, "mmap_err") 52 53 #define EFA_DEFINE_PORT_STATS(op) \ 54 op(EFA_TX_BYTES, "tx_bytes") \ 55 op(EFA_TX_PKTS, "tx_pkts") \ 56 op(EFA_RX_BYTES, "rx_bytes") \ 57 op(EFA_RX_PKTS, "rx_pkts") \ 58 op(EFA_RX_DROPS, "rx_drops") \ 59 op(EFA_SEND_BYTES, "send_bytes") \ 60 op(EFA_SEND_WRS, "send_wrs") \ 61 op(EFA_RECV_BYTES, "recv_bytes") \ 62 op(EFA_RECV_WRS, "recv_wrs") \ 63 op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \ 64 op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ 65 op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ 66 op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ 67 op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \ 68 op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \ 69 op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \ 70 op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \ 71 72 #define EFA_STATS_ENUM(ename, name) ename, 73 #define EFA_STATS_STR(ename, nam) \ 74 [ename].name = nam, 75 76 enum efa_hw_device_stats { 77 EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM) 78 }; 79 80 static const struct rdma_stat_desc efa_device_stats_descs[] = { 81 EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR) 82 }; 83 84 enum efa_hw_port_stats { 85 EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM) 86 }; 87 88 static const struct rdma_stat_desc efa_port_stats_descs[] = { 89 EFA_DEFINE_PORT_STATS(EFA_STATS_STR) 90 }; 91 92 #define EFA_CHUNK_PAYLOAD_SHIFT 12 93 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) 94 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 95 96 #define EFA_CHUNK_SHIFT 12 97 #define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT) 98 #define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info) 99 100 #define EFA_PTRS_PER_CHUNK \ 101 ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE) 102 103 #define EFA_CHUNK_USED_SIZE \ 104 ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) 105 106 struct pbl_chunk { 107 dma_addr_t dma_addr; 108 u64 *buf; 109 u32 length; 110 }; 111 112 struct pbl_chunk_list { 113 struct pbl_chunk *chunks; 114 unsigned int size; 115 }; 116 117 struct pbl_context { 118 union { 119 struct { 120 dma_addr_t dma_addr; 121 } continuous; 122 struct { 123 u32 pbl_buf_size_in_pages; 124 struct scatterlist *sgl; 125 int sg_dma_cnt; 126 struct pbl_chunk_list chunk_list; 127 } indirect; 128 } phys; 129 u64 *pbl_buf; 130 u32 pbl_buf_size_in_bytes; 131 u8 physically_continuous; 132 }; 133 134 static inline struct efa_dev *to_edev(struct ib_device *ibdev) 135 { 136 return container_of(ibdev, struct efa_dev, ibdev); 137 } 138 139 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext) 140 { 141 return container_of(ibucontext, struct efa_ucontext, ibucontext); 142 } 143 144 static inline struct efa_pd *to_epd(struct ib_pd *ibpd) 145 { 146 return container_of(ibpd, struct efa_pd, ibpd); 147 } 148 149 static inline struct efa_mr *to_emr(struct ib_mr *ibmr) 150 { 151 return container_of(ibmr, struct efa_mr, ibmr); 152 } 153 154 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp) 155 { 156 return container_of(ibqp, struct efa_qp, ibqp); 157 } 158 159 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq) 160 { 161 return container_of(ibcq, struct efa_cq, ibcq); 162 } 163 164 static inline struct efa_ah *to_eah(struct ib_ah *ibah) 165 { 166 return container_of(ibah, struct efa_ah, ibah); 167 } 168 169 static inline struct efa_user_mmap_entry * 170 to_emmap(struct rdma_user_mmap_entry *rdma_entry) 171 { 172 return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); 173 } 174 175 #define EFA_DEV_CAP(dev, cap) \ 176 ((dev)->dev_attr.device_caps & \ 177 EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK) 178 179 #define is_reserved_cleared(reserved) \ 180 !memchr_inv(reserved, 0, sizeof(reserved)) 181 182 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, 183 size_t size, enum dma_data_direction dir) 184 { 185 void *addr; 186 187 addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 188 if (!addr) 189 return NULL; 190 191 *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir); 192 if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) { 193 ibdev_err(&dev->ibdev, "Failed to map DMA address\n"); 194 free_pages_exact(addr, size); 195 return NULL; 196 } 197 198 return addr; 199 } 200 201 static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr, 202 dma_addr_t dma_addr, 203 size_t size, enum dma_data_direction dir) 204 { 205 dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir); 206 free_pages_exact(cpu_addr, size); 207 } 208 209 int efa_query_device(struct ib_device *ibdev, 210 struct ib_device_attr *props, 211 struct ib_udata *udata) 212 { 213 struct efa_com_get_device_attr_result *dev_attr; 214 struct efa_ibv_ex_query_device_resp resp = {}; 215 struct efa_dev *dev = to_edev(ibdev); 216 int err; 217 218 if (udata && udata->inlen && 219 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 220 ibdev_dbg(ibdev, 221 "Incompatible ABI params, udata not cleared\n"); 222 return -EINVAL; 223 } 224 225 dev_attr = &dev->dev_attr; 226 227 memset(props, 0, sizeof(*props)); 228 props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE; 229 props->page_size_cap = dev_attr->page_size_cap; 230 props->vendor_id = dev->pdev->vendor; 231 props->vendor_part_id = dev->pdev->device; 232 props->hw_ver = dev->pdev->subsystem_device; 233 props->max_qp = dev_attr->max_qp; 234 props->max_cq = dev_attr->max_cq; 235 props->max_pd = dev_attr->max_pd; 236 props->max_mr = dev_attr->max_mr; 237 props->max_ah = dev_attr->max_ah; 238 props->max_cqe = dev_attr->max_cq_depth; 239 props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth, 240 dev_attr->max_rq_depth); 241 props->max_send_sge = dev_attr->max_sq_sge; 242 props->max_recv_sge = dev_attr->max_rq_sge; 243 props->max_sge_rd = dev_attr->max_wr_rdma_sge; 244 props->max_pkeys = 1; 245 246 if (udata && udata->outlen) { 247 resp.max_sq_sge = dev_attr->max_sq_sge; 248 resp.max_rq_sge = dev_attr->max_rq_sge; 249 resp.max_sq_wr = dev_attr->max_sq_depth; 250 resp.max_rq_wr = dev_attr->max_rq_depth; 251 resp.max_rdma_size = dev_attr->max_rdma_size; 252 253 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID; 254 if (EFA_DEV_CAP(dev, RDMA_READ)) 255 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; 256 257 if (EFA_DEV_CAP(dev, RNR_RETRY)) 258 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; 259 260 if (EFA_DEV_CAP(dev, DATA_POLLING_128)) 261 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128; 262 263 if (EFA_DEV_CAP(dev, RDMA_WRITE)) 264 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE; 265 266 if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV)) 267 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV; 268 269 if (dev->neqs) 270 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; 271 272 err = ib_copy_to_udata(udata, &resp, 273 min(sizeof(resp), udata->outlen)); 274 if (err) { 275 ibdev_dbg(ibdev, 276 "Failed to copy udata for query_device\n"); 277 return err; 278 } 279 } 280 281 return 0; 282 } 283 284 int efa_query_port(struct ib_device *ibdev, u32 port, 285 struct ib_port_attr *props) 286 { 287 struct efa_dev *dev = to_edev(ibdev); 288 289 props->lmc = 1; 290 291 props->state = IB_PORT_ACTIVE; 292 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; 293 props->gid_tbl_len = 1; 294 props->pkey_tbl_len = 1; 295 props->active_speed = IB_SPEED_EDR; 296 props->active_width = IB_WIDTH_4X; 297 props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 298 props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 299 props->max_msg_sz = dev->dev_attr.mtu; 300 props->max_vl_num = 1; 301 302 return 0; 303 } 304 305 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 306 int qp_attr_mask, 307 struct ib_qp_init_attr *qp_init_attr) 308 { 309 struct efa_dev *dev = to_edev(ibqp->device); 310 struct efa_com_query_qp_params params = {}; 311 struct efa_com_query_qp_result result; 312 struct efa_qp *qp = to_eqp(ibqp); 313 int err; 314 315 #define EFA_QUERY_QP_SUPP_MASK \ 316 (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ 317 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY) 318 319 if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { 320 ibdev_dbg(&dev->ibdev, 321 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 322 qp_attr_mask, EFA_QUERY_QP_SUPP_MASK); 323 return -EOPNOTSUPP; 324 } 325 326 memset(qp_attr, 0, sizeof(*qp_attr)); 327 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 328 329 params.qp_handle = qp->qp_handle; 330 err = efa_com_query_qp(&dev->edev, ¶ms, &result); 331 if (err) 332 return err; 333 334 qp_attr->qp_state = result.qp_state; 335 qp_attr->qkey = result.qkey; 336 qp_attr->sq_psn = result.sq_psn; 337 qp_attr->sq_draining = result.sq_draining; 338 qp_attr->port_num = 1; 339 qp_attr->rnr_retry = result.rnr_retry; 340 341 qp_attr->cap.max_send_wr = qp->max_send_wr; 342 qp_attr->cap.max_recv_wr = qp->max_recv_wr; 343 qp_attr->cap.max_send_sge = qp->max_send_sge; 344 qp_attr->cap.max_recv_sge = qp->max_recv_sge; 345 qp_attr->cap.max_inline_data = qp->max_inline_data; 346 347 qp_init_attr->qp_type = ibqp->qp_type; 348 qp_init_attr->recv_cq = ibqp->recv_cq; 349 qp_init_attr->send_cq = ibqp->send_cq; 350 qp_init_attr->qp_context = ibqp->qp_context; 351 qp_init_attr->cap = qp_attr->cap; 352 353 return 0; 354 } 355 356 int efa_query_gid(struct ib_device *ibdev, u32 port, int index, 357 union ib_gid *gid) 358 { 359 struct efa_dev *dev = to_edev(ibdev); 360 361 memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr)); 362 363 return 0; 364 } 365 366 int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index, 367 u16 *pkey) 368 { 369 if (index > 0) 370 return -EINVAL; 371 372 *pkey = 0xffff; 373 return 0; 374 } 375 376 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn) 377 { 378 struct efa_com_dealloc_pd_params params = { 379 .pdn = pdn, 380 }; 381 382 return efa_com_dealloc_pd(&dev->edev, ¶ms); 383 } 384 385 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 386 { 387 struct efa_dev *dev = to_edev(ibpd->device); 388 struct efa_ibv_alloc_pd_resp resp = {}; 389 struct efa_com_alloc_pd_result result; 390 struct efa_pd *pd = to_epd(ibpd); 391 int err; 392 393 if (udata->inlen && 394 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 395 ibdev_dbg(&dev->ibdev, 396 "Incompatible ABI params, udata not cleared\n"); 397 err = -EINVAL; 398 goto err_out; 399 } 400 401 err = efa_com_alloc_pd(&dev->edev, &result); 402 if (err) 403 goto err_out; 404 405 pd->pdn = result.pdn; 406 resp.pdn = result.pdn; 407 408 if (udata->outlen) { 409 err = ib_copy_to_udata(udata, &resp, 410 min(sizeof(resp), udata->outlen)); 411 if (err) { 412 ibdev_dbg(&dev->ibdev, 413 "Failed to copy udata for alloc_pd\n"); 414 goto err_dealloc_pd; 415 } 416 } 417 418 ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn); 419 420 return 0; 421 422 err_dealloc_pd: 423 efa_pd_dealloc(dev, result.pdn); 424 err_out: 425 atomic64_inc(&dev->stats.alloc_pd_err); 426 return err; 427 } 428 429 int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 430 { 431 struct efa_dev *dev = to_edev(ibpd->device); 432 struct efa_pd *pd = to_epd(ibpd); 433 434 ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); 435 efa_pd_dealloc(dev, pd->pdn); 436 return 0; 437 } 438 439 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) 440 { 441 struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle }; 442 443 return efa_com_destroy_qp(&dev->edev, ¶ms); 444 } 445 446 static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp) 447 { 448 rdma_user_mmap_entry_remove(qp->rq_mmap_entry); 449 rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); 450 rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); 451 rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); 452 } 453 454 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) 455 { 456 struct efa_dev *dev = to_edev(ibqp->pd->device); 457 struct efa_qp *qp = to_eqp(ibqp); 458 int err; 459 460 ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); 461 462 err = efa_destroy_qp_handle(dev, qp->qp_handle); 463 if (err) 464 return err; 465 466 efa_qp_user_mmap_entries_remove(qp); 467 468 if (qp->rq_cpu_addr) { 469 ibdev_dbg(&dev->ibdev, 470 "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", 471 qp->rq_cpu_addr, qp->rq_size, 472 &qp->rq_dma_addr); 473 efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, 474 qp->rq_size, DMA_TO_DEVICE); 475 } 476 477 return 0; 478 } 479 480 static struct rdma_user_mmap_entry* 481 efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, 482 u64 address, size_t length, 483 u8 mmap_flag, u64 *offset) 484 { 485 struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); 486 int err; 487 488 if (!entry) 489 return NULL; 490 491 entry->address = address; 492 entry->mmap_flag = mmap_flag; 493 494 err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, 495 length); 496 if (err) { 497 kfree(entry); 498 return NULL; 499 } 500 *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); 501 502 return &entry->rdma_entry; 503 } 504 505 static int qp_mmap_entries_setup(struct efa_qp *qp, 506 struct efa_dev *dev, 507 struct efa_ucontext *ucontext, 508 struct efa_com_create_qp_params *params, 509 struct efa_ibv_create_qp_resp *resp) 510 { 511 size_t length; 512 u64 address; 513 514 address = dev->db_bar_addr + resp->sq_db_offset; 515 qp->sq_db_mmap_entry = 516 efa_user_mmap_entry_insert(&ucontext->ibucontext, 517 address, 518 PAGE_SIZE, EFA_MMAP_IO_NC, 519 &resp->sq_db_mmap_key); 520 if (!qp->sq_db_mmap_entry) 521 return -ENOMEM; 522 523 resp->sq_db_offset &= ~PAGE_MASK; 524 525 address = dev->mem_bar_addr + resp->llq_desc_offset; 526 length = PAGE_ALIGN(params->sq_ring_size_in_bytes + 527 (resp->llq_desc_offset & ~PAGE_MASK)); 528 529 qp->llq_desc_mmap_entry = 530 efa_user_mmap_entry_insert(&ucontext->ibucontext, 531 address, length, 532 EFA_MMAP_IO_WC, 533 &resp->llq_desc_mmap_key); 534 if (!qp->llq_desc_mmap_entry) 535 goto err_remove_mmap; 536 537 resp->llq_desc_offset &= ~PAGE_MASK; 538 539 if (qp->rq_size) { 540 address = dev->db_bar_addr + resp->rq_db_offset; 541 542 qp->rq_db_mmap_entry = 543 efa_user_mmap_entry_insert(&ucontext->ibucontext, 544 address, PAGE_SIZE, 545 EFA_MMAP_IO_NC, 546 &resp->rq_db_mmap_key); 547 if (!qp->rq_db_mmap_entry) 548 goto err_remove_mmap; 549 550 resp->rq_db_offset &= ~PAGE_MASK; 551 552 address = virt_to_phys(qp->rq_cpu_addr); 553 qp->rq_mmap_entry = 554 efa_user_mmap_entry_insert(&ucontext->ibucontext, 555 address, qp->rq_size, 556 EFA_MMAP_DMA_PAGE, 557 &resp->rq_mmap_key); 558 if (!qp->rq_mmap_entry) 559 goto err_remove_mmap; 560 561 resp->rq_mmap_size = qp->rq_size; 562 } 563 564 return 0; 565 566 err_remove_mmap: 567 efa_qp_user_mmap_entries_remove(qp); 568 569 return -ENOMEM; 570 } 571 572 static int efa_qp_validate_cap(struct efa_dev *dev, 573 struct ib_qp_init_attr *init_attr) 574 { 575 if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) { 576 ibdev_dbg(&dev->ibdev, 577 "qp: requested send wr[%u] exceeds the max[%u]\n", 578 init_attr->cap.max_send_wr, 579 dev->dev_attr.max_sq_depth); 580 return -EINVAL; 581 } 582 if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) { 583 ibdev_dbg(&dev->ibdev, 584 "qp: requested receive wr[%u] exceeds the max[%u]\n", 585 init_attr->cap.max_recv_wr, 586 dev->dev_attr.max_rq_depth); 587 return -EINVAL; 588 } 589 if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) { 590 ibdev_dbg(&dev->ibdev, 591 "qp: requested sge send[%u] exceeds the max[%u]\n", 592 init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge); 593 return -EINVAL; 594 } 595 if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) { 596 ibdev_dbg(&dev->ibdev, 597 "qp: requested sge recv[%u] exceeds the max[%u]\n", 598 init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge); 599 return -EINVAL; 600 } 601 if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) { 602 ibdev_dbg(&dev->ibdev, 603 "qp: requested inline data[%u] exceeds the max[%u]\n", 604 init_attr->cap.max_inline_data, 605 dev->dev_attr.inline_buf_size); 606 return -EINVAL; 607 } 608 609 return 0; 610 } 611 612 static int efa_qp_validate_attr(struct efa_dev *dev, 613 struct ib_qp_init_attr *init_attr) 614 { 615 if (init_attr->qp_type != IB_QPT_DRIVER && 616 init_attr->qp_type != IB_QPT_UD) { 617 ibdev_dbg(&dev->ibdev, 618 "Unsupported qp type %d\n", init_attr->qp_type); 619 return -EOPNOTSUPP; 620 } 621 622 if (init_attr->srq) { 623 ibdev_dbg(&dev->ibdev, "SRQ is not supported\n"); 624 return -EOPNOTSUPP; 625 } 626 627 if (init_attr->create_flags) { 628 ibdev_dbg(&dev->ibdev, "Unsupported create flags\n"); 629 return -EOPNOTSUPP; 630 } 631 632 return 0; 633 } 634 635 int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, 636 struct ib_udata *udata) 637 { 638 struct efa_com_create_qp_params create_qp_params = {}; 639 struct efa_com_create_qp_result create_qp_resp; 640 struct efa_dev *dev = to_edev(ibqp->device); 641 struct efa_ibv_create_qp_resp resp = {}; 642 struct efa_ibv_create_qp cmd = {}; 643 struct efa_qp *qp = to_eqp(ibqp); 644 struct efa_ucontext *ucontext; 645 u16 supported_efa_flags = 0; 646 int err; 647 648 ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, 649 ibucontext); 650 651 err = efa_qp_validate_cap(dev, init_attr); 652 if (err) 653 goto err_out; 654 655 err = efa_qp_validate_attr(dev, init_attr); 656 if (err) 657 goto err_out; 658 659 if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) { 660 ibdev_dbg(&dev->ibdev, 661 "Incompatible ABI params, no input udata\n"); 662 err = -EINVAL; 663 goto err_out; 664 } 665 666 if (udata->inlen > sizeof(cmd) && 667 !ib_is_udata_cleared(udata, sizeof(cmd), 668 udata->inlen - sizeof(cmd))) { 669 ibdev_dbg(&dev->ibdev, 670 "Incompatible ABI params, unknown fields in udata\n"); 671 err = -EINVAL; 672 goto err_out; 673 } 674 675 err = ib_copy_from_udata(&cmd, udata, 676 min(sizeof(cmd), udata->inlen)); 677 if (err) { 678 ibdev_dbg(&dev->ibdev, 679 "Cannot copy udata for create_qp\n"); 680 goto err_out; 681 } 682 683 if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_90)) { 684 ibdev_dbg(&dev->ibdev, 685 "Incompatible ABI params, unknown fields in udata\n"); 686 err = -EINVAL; 687 goto err_out; 688 } 689 690 if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV)) 691 supported_efa_flags |= EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV; 692 693 if (cmd.flags & ~supported_efa_flags) { 694 ibdev_dbg(&dev->ibdev, "Unsupported EFA QP create flags[%#x], supported[%#x]\n", 695 cmd.flags, supported_efa_flags); 696 err = -EOPNOTSUPP; 697 goto err_out; 698 } 699 700 create_qp_params.uarn = ucontext->uarn; 701 create_qp_params.pd = to_epd(ibqp->pd)->pdn; 702 703 if (init_attr->qp_type == IB_QPT_UD) { 704 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD; 705 } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) { 706 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD; 707 } else { 708 ibdev_dbg(&dev->ibdev, 709 "Unsupported qp type %d driver qp type %d\n", 710 init_attr->qp_type, cmd.driver_qp_type); 711 err = -EOPNOTSUPP; 712 goto err_out; 713 } 714 715 ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n", 716 init_attr->qp_type, cmd.driver_qp_type); 717 create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx; 718 create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx; 719 create_qp_params.sq_depth = init_attr->cap.max_send_wr; 720 create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size; 721 722 create_qp_params.rq_depth = init_attr->cap.max_recv_wr; 723 create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size; 724 qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes); 725 if (qp->rq_size) { 726 qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr, 727 qp->rq_size, DMA_TO_DEVICE); 728 if (!qp->rq_cpu_addr) { 729 err = -ENOMEM; 730 goto err_out; 731 } 732 733 ibdev_dbg(&dev->ibdev, 734 "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n", 735 qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); 736 create_qp_params.rq_base_addr = qp->rq_dma_addr; 737 } 738 739 if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV) 740 create_qp_params.unsolicited_write_recv = true; 741 742 err = efa_com_create_qp(&dev->edev, &create_qp_params, 743 &create_qp_resp); 744 if (err) 745 goto err_free_mapped; 746 747 resp.sq_db_offset = create_qp_resp.sq_db_offset; 748 resp.rq_db_offset = create_qp_resp.rq_db_offset; 749 resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset; 750 resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx; 751 resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx; 752 753 err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params, 754 &resp); 755 if (err) 756 goto err_destroy_qp; 757 758 qp->qp_handle = create_qp_resp.qp_handle; 759 qp->ibqp.qp_num = create_qp_resp.qp_num; 760 qp->max_send_wr = init_attr->cap.max_send_wr; 761 qp->max_recv_wr = init_attr->cap.max_recv_wr; 762 qp->max_send_sge = init_attr->cap.max_send_sge; 763 qp->max_recv_sge = init_attr->cap.max_recv_sge; 764 qp->max_inline_data = init_attr->cap.max_inline_data; 765 766 if (udata->outlen) { 767 err = ib_copy_to_udata(udata, &resp, 768 min(sizeof(resp), udata->outlen)); 769 if (err) { 770 ibdev_dbg(&dev->ibdev, 771 "Failed to copy udata for qp[%u]\n", 772 create_qp_resp.qp_num); 773 goto err_remove_mmap_entries; 774 } 775 } 776 777 ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num); 778 779 return 0; 780 781 err_remove_mmap_entries: 782 efa_qp_user_mmap_entries_remove(qp); 783 err_destroy_qp: 784 efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); 785 err_free_mapped: 786 if (qp->rq_size) 787 efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, 788 qp->rq_size, DMA_TO_DEVICE); 789 err_out: 790 atomic64_inc(&dev->stats.create_qp_err); 791 return err; 792 } 793 794 static const struct { 795 int valid; 796 enum ib_qp_attr_mask req_param; 797 enum ib_qp_attr_mask opt_param; 798 } srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 799 [IB_QPS_RESET] = { 800 [IB_QPS_RESET] = { .valid = 1 }, 801 [IB_QPS_INIT] = { 802 .valid = 1, 803 .req_param = IB_QP_PKEY_INDEX | 804 IB_QP_PORT | 805 IB_QP_QKEY, 806 }, 807 }, 808 [IB_QPS_INIT] = { 809 [IB_QPS_RESET] = { .valid = 1 }, 810 [IB_QPS_ERR] = { .valid = 1 }, 811 [IB_QPS_INIT] = { 812 .valid = 1, 813 .opt_param = IB_QP_PKEY_INDEX | 814 IB_QP_PORT | 815 IB_QP_QKEY, 816 }, 817 [IB_QPS_RTR] = { 818 .valid = 1, 819 .opt_param = IB_QP_PKEY_INDEX | 820 IB_QP_QKEY, 821 }, 822 }, 823 [IB_QPS_RTR] = { 824 [IB_QPS_RESET] = { .valid = 1 }, 825 [IB_QPS_ERR] = { .valid = 1 }, 826 [IB_QPS_RTS] = { 827 .valid = 1, 828 .req_param = IB_QP_SQ_PSN, 829 .opt_param = IB_QP_CUR_STATE | 830 IB_QP_QKEY | 831 IB_QP_RNR_RETRY, 832 833 } 834 }, 835 [IB_QPS_RTS] = { 836 [IB_QPS_RESET] = { .valid = 1 }, 837 [IB_QPS_ERR] = { .valid = 1 }, 838 [IB_QPS_RTS] = { 839 .valid = 1, 840 .opt_param = IB_QP_CUR_STATE | 841 IB_QP_QKEY, 842 }, 843 [IB_QPS_SQD] = { 844 .valid = 1, 845 .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY, 846 }, 847 }, 848 [IB_QPS_SQD] = { 849 [IB_QPS_RESET] = { .valid = 1 }, 850 [IB_QPS_ERR] = { .valid = 1 }, 851 [IB_QPS_RTS] = { 852 .valid = 1, 853 .opt_param = IB_QP_CUR_STATE | 854 IB_QP_QKEY, 855 }, 856 [IB_QPS_SQD] = { 857 .valid = 1, 858 .opt_param = IB_QP_PKEY_INDEX | 859 IB_QP_QKEY, 860 } 861 }, 862 [IB_QPS_SQE] = { 863 [IB_QPS_RESET] = { .valid = 1 }, 864 [IB_QPS_ERR] = { .valid = 1 }, 865 [IB_QPS_RTS] = { 866 .valid = 1, 867 .opt_param = IB_QP_CUR_STATE | 868 IB_QP_QKEY, 869 } 870 }, 871 [IB_QPS_ERR] = { 872 [IB_QPS_RESET] = { .valid = 1 }, 873 [IB_QPS_ERR] = { .valid = 1 }, 874 } 875 }; 876 877 static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state, 878 enum ib_qp_state next_state, 879 enum ib_qp_attr_mask mask) 880 { 881 enum ib_qp_attr_mask req_param, opt_param; 882 883 if (mask & IB_QP_CUR_STATE && 884 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && 885 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) 886 return false; 887 888 if (!srd_qp_state_table[cur_state][next_state].valid) 889 return false; 890 891 req_param = srd_qp_state_table[cur_state][next_state].req_param; 892 opt_param = srd_qp_state_table[cur_state][next_state].opt_param; 893 894 if ((mask & req_param) != req_param) 895 return false; 896 897 if (mask & ~(req_param | opt_param | IB_QP_STATE)) 898 return false; 899 900 return true; 901 } 902 903 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, 904 struct ib_qp_attr *qp_attr, int qp_attr_mask, 905 enum ib_qp_state cur_state, 906 enum ib_qp_state new_state) 907 { 908 int err; 909 910 #define EFA_MODIFY_QP_SUPP_MASK \ 911 (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ 912 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \ 913 IB_QP_RNR_RETRY) 914 915 if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { 916 ibdev_dbg(&dev->ibdev, 917 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 918 qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK); 919 return -EOPNOTSUPP; 920 } 921 922 if (qp->ibqp.qp_type == IB_QPT_DRIVER) 923 err = !efa_modify_srd_qp_is_ok(cur_state, new_state, 924 qp_attr_mask); 925 else 926 err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, 927 qp_attr_mask); 928 929 if (err) { 930 ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); 931 return -EINVAL; 932 } 933 934 if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) { 935 ibdev_dbg(&dev->ibdev, "Can't change port num\n"); 936 return -EOPNOTSUPP; 937 } 938 939 if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) { 940 ibdev_dbg(&dev->ibdev, "Can't change pkey index\n"); 941 return -EOPNOTSUPP; 942 } 943 944 return 0; 945 } 946 947 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 948 int qp_attr_mask, struct ib_udata *udata) 949 { 950 struct efa_dev *dev = to_edev(ibqp->device); 951 struct efa_com_modify_qp_params params = {}; 952 struct efa_qp *qp = to_eqp(ibqp); 953 enum ib_qp_state cur_state; 954 enum ib_qp_state new_state; 955 int err; 956 957 if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 958 return -EOPNOTSUPP; 959 960 if (udata->inlen && 961 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 962 ibdev_dbg(&dev->ibdev, 963 "Incompatible ABI params, udata not cleared\n"); 964 return -EINVAL; 965 } 966 967 cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state : 968 qp->state; 969 new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state; 970 971 err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state, 972 new_state); 973 if (err) 974 return err; 975 976 params.qp_handle = qp->qp_handle; 977 978 if (qp_attr_mask & IB_QP_STATE) { 979 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE, 980 1); 981 EFA_SET(¶ms.modify_mask, 982 EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1); 983 params.cur_qp_state = cur_state; 984 params.qp_state = new_state; 985 } 986 987 if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { 988 EFA_SET(¶ms.modify_mask, 989 EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1); 990 params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; 991 } 992 993 if (qp_attr_mask & IB_QP_QKEY) { 994 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1); 995 params.qkey = qp_attr->qkey; 996 } 997 998 if (qp_attr_mask & IB_QP_SQ_PSN) { 999 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1); 1000 params.sq_psn = qp_attr->sq_psn; 1001 } 1002 1003 if (qp_attr_mask & IB_QP_RNR_RETRY) { 1004 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY, 1005 1); 1006 params.rnr_retry = qp_attr->rnr_retry; 1007 } 1008 1009 err = efa_com_modify_qp(&dev->edev, ¶ms); 1010 if (err) 1011 return err; 1012 1013 qp->state = new_state; 1014 1015 return 0; 1016 } 1017 1018 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) 1019 { 1020 struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx }; 1021 1022 return efa_com_destroy_cq(&dev->edev, ¶ms); 1023 } 1024 1025 static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) 1026 { 1027 rdma_user_mmap_entry_remove(cq->db_mmap_entry); 1028 rdma_user_mmap_entry_remove(cq->mmap_entry); 1029 } 1030 1031 int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) 1032 { 1033 struct efa_dev *dev = to_edev(ibcq->device); 1034 struct efa_cq *cq = to_ecq(ibcq); 1035 1036 ibdev_dbg(&dev->ibdev, 1037 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", 1038 cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); 1039 1040 efa_destroy_cq_idx(dev, cq->cq_idx); 1041 efa_cq_user_mmap_entries_remove(cq); 1042 if (cq->eq) { 1043 xa_erase(&dev->cqs_xa, cq->cq_idx); 1044 synchronize_irq(cq->eq->irq.irqn); 1045 } 1046 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 1047 DMA_FROM_DEVICE); 1048 return 0; 1049 } 1050 1051 static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) 1052 { 1053 return &dev->eqs[vec]; 1054 } 1055 1056 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, 1057 struct efa_ibv_create_cq_resp *resp, 1058 bool db_valid) 1059 { 1060 resp->q_mmap_size = cq->size; 1061 cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, 1062 virt_to_phys(cq->cpu_addr), 1063 cq->size, EFA_MMAP_DMA_PAGE, 1064 &resp->q_mmap_key); 1065 if (!cq->mmap_entry) 1066 return -ENOMEM; 1067 1068 if (db_valid) { 1069 cq->db_mmap_entry = 1070 efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, 1071 dev->db_bar_addr + resp->db_off, 1072 PAGE_SIZE, EFA_MMAP_IO_NC, 1073 &resp->db_mmap_key); 1074 if (!cq->db_mmap_entry) { 1075 rdma_user_mmap_entry_remove(cq->mmap_entry); 1076 return -ENOMEM; 1077 } 1078 1079 resp->db_off &= ~PAGE_MASK; 1080 resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; 1081 } 1082 1083 return 0; 1084 } 1085 1086 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 1087 struct ib_udata *udata) 1088 { 1089 struct efa_ucontext *ucontext = rdma_udata_to_drv_context( 1090 udata, struct efa_ucontext, ibucontext); 1091 struct efa_com_create_cq_params params = {}; 1092 struct efa_ibv_create_cq_resp resp = {}; 1093 struct efa_com_create_cq_result result; 1094 struct ib_device *ibdev = ibcq->device; 1095 struct efa_dev *dev = to_edev(ibdev); 1096 struct efa_ibv_create_cq cmd = {}; 1097 struct efa_cq *cq = to_ecq(ibcq); 1098 int entries = attr->cqe; 1099 bool set_src_addr; 1100 int err; 1101 1102 ibdev_dbg(ibdev, "create_cq entries %d\n", entries); 1103 1104 if (attr->flags) 1105 return -EOPNOTSUPP; 1106 1107 if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { 1108 ibdev_dbg(ibdev, 1109 "cq: requested entries[%u] non-positive or greater than max[%u]\n", 1110 entries, dev->dev_attr.max_cq_depth); 1111 err = -EINVAL; 1112 goto err_out; 1113 } 1114 1115 if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) { 1116 ibdev_dbg(ibdev, 1117 "Incompatible ABI params, no input udata\n"); 1118 err = -EINVAL; 1119 goto err_out; 1120 } 1121 1122 if (udata->inlen > sizeof(cmd) && 1123 !ib_is_udata_cleared(udata, sizeof(cmd), 1124 udata->inlen - sizeof(cmd))) { 1125 ibdev_dbg(ibdev, 1126 "Incompatible ABI params, unknown fields in udata\n"); 1127 err = -EINVAL; 1128 goto err_out; 1129 } 1130 1131 err = ib_copy_from_udata(&cmd, udata, 1132 min(sizeof(cmd), udata->inlen)); 1133 if (err) { 1134 ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n"); 1135 goto err_out; 1136 } 1137 1138 if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { 1139 ibdev_dbg(ibdev, 1140 "Incompatible ABI params, unknown fields in udata\n"); 1141 err = -EINVAL; 1142 goto err_out; 1143 } 1144 1145 set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID); 1146 if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) && 1147 (set_src_addr || 1148 cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) { 1149 ibdev_dbg(ibdev, 1150 "Invalid entry size [%u]\n", cmd.cq_entry_size); 1151 err = -EINVAL; 1152 goto err_out; 1153 } 1154 1155 if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) { 1156 ibdev_dbg(ibdev, 1157 "Invalid number of sub cqs[%u] expected[%u]\n", 1158 cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq); 1159 err = -EINVAL; 1160 goto err_out; 1161 } 1162 1163 cq->ucontext = ucontext; 1164 cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs); 1165 cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size, 1166 DMA_FROM_DEVICE); 1167 if (!cq->cpu_addr) { 1168 err = -ENOMEM; 1169 goto err_out; 1170 } 1171 1172 params.uarn = cq->ucontext->uarn; 1173 params.cq_depth = entries; 1174 params.dma_addr = cq->dma_addr; 1175 params.entry_size_in_bytes = cmd.cq_entry_size; 1176 params.num_sub_cqs = cmd.num_sub_cqs; 1177 params.set_src_addr = set_src_addr; 1178 if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { 1179 cq->eq = efa_vec2eq(dev, attr->comp_vector); 1180 params.eqn = cq->eq->eeq.eqn; 1181 params.interrupt_mode_enabled = true; 1182 } 1183 1184 err = efa_com_create_cq(&dev->edev, ¶ms, &result); 1185 if (err) 1186 goto err_free_mapped; 1187 1188 resp.db_off = result.db_off; 1189 resp.cq_idx = result.cq_idx; 1190 cq->cq_idx = result.cq_idx; 1191 cq->ibcq.cqe = result.actual_depth; 1192 WARN_ON_ONCE(entries != result.actual_depth); 1193 1194 err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); 1195 if (err) { 1196 ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", 1197 cq->cq_idx); 1198 goto err_destroy_cq; 1199 } 1200 1201 if (cq->eq) { 1202 err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); 1203 if (err) { 1204 ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", 1205 cq->cq_idx); 1206 goto err_remove_mmap; 1207 } 1208 } 1209 1210 if (udata->outlen) { 1211 err = ib_copy_to_udata(udata, &resp, 1212 min(sizeof(resp), udata->outlen)); 1213 if (err) { 1214 ibdev_dbg(ibdev, 1215 "Failed to copy udata for create_cq\n"); 1216 goto err_xa_erase; 1217 } 1218 } 1219 1220 ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", 1221 cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr); 1222 1223 return 0; 1224 1225 err_xa_erase: 1226 if (cq->eq) 1227 xa_erase(&dev->cqs_xa, cq->cq_idx); 1228 err_remove_mmap: 1229 efa_cq_user_mmap_entries_remove(cq); 1230 err_destroy_cq: 1231 efa_destroy_cq_idx(dev, cq->cq_idx); 1232 err_free_mapped: 1233 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 1234 DMA_FROM_DEVICE); 1235 1236 err_out: 1237 atomic64_inc(&dev->stats.create_cq_err); 1238 return err; 1239 } 1240 1241 static int umem_to_page_list(struct efa_dev *dev, 1242 struct ib_umem *umem, 1243 u64 *page_list, 1244 u32 hp_cnt, 1245 u8 hp_shift) 1246 { 1247 u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); 1248 struct ib_block_iter biter; 1249 unsigned int hp_idx = 0; 1250 1251 ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", 1252 hp_cnt, pages_in_hp); 1253 1254 rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift)) 1255 page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); 1256 1257 return 0; 1258 } 1259 1260 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) 1261 { 1262 struct scatterlist *sglist; 1263 struct page *pg; 1264 int i; 1265 1266 sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL); 1267 if (!sglist) 1268 return NULL; 1269 sg_init_table(sglist, page_cnt); 1270 for (i = 0; i < page_cnt; i++) { 1271 pg = vmalloc_to_page(buf); 1272 if (!pg) 1273 goto err; 1274 sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); 1275 buf += PAGE_SIZE / sizeof(*buf); 1276 } 1277 return sglist; 1278 1279 err: 1280 kfree(sglist); 1281 return NULL; 1282 } 1283 1284 /* 1285 * create a chunk list of physical pages dma addresses from the supplied 1286 * scatter gather list 1287 */ 1288 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) 1289 { 1290 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1291 int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages; 1292 struct scatterlist *pages_sgl = pbl->phys.indirect.sgl; 1293 unsigned int chunk_list_size, chunk_idx, payload_idx; 1294 int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt; 1295 struct efa_com_ctrl_buff_info *ctrl_buf; 1296 u64 *cur_chunk_buf, *prev_chunk_buf; 1297 struct ib_block_iter biter; 1298 dma_addr_t dma_addr; 1299 int i; 1300 1301 /* allocate a chunk list that consists of 4KB chunks */ 1302 chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK); 1303 1304 chunk_list->size = chunk_list_size; 1305 chunk_list->chunks = kcalloc(chunk_list_size, 1306 sizeof(*chunk_list->chunks), 1307 GFP_KERNEL); 1308 if (!chunk_list->chunks) 1309 return -ENOMEM; 1310 1311 ibdev_dbg(&dev->ibdev, 1312 "chunk_list_size[%u] - pages[%u]\n", chunk_list_size, 1313 page_cnt); 1314 1315 /* allocate chunk buffers: */ 1316 for (i = 0; i < chunk_list_size; i++) { 1317 chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL); 1318 if (!chunk_list->chunks[i].buf) 1319 goto chunk_list_dealloc; 1320 1321 chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE; 1322 } 1323 chunk_list->chunks[chunk_list_size - 1].length = 1324 ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) + 1325 EFA_CHUNK_PTR_SIZE; 1326 1327 /* fill the dma addresses of sg list pages to chunks: */ 1328 chunk_idx = 0; 1329 payload_idx = 0; 1330 cur_chunk_buf = chunk_list->chunks[0].buf; 1331 rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt, 1332 EFA_CHUNK_PAYLOAD_SIZE) { 1333 cur_chunk_buf[payload_idx++] = 1334 rdma_block_iter_dma_address(&biter); 1335 1336 if (payload_idx == EFA_PTRS_PER_CHUNK) { 1337 chunk_idx++; 1338 cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; 1339 payload_idx = 0; 1340 } 1341 } 1342 1343 /* map chunks to dma and fill chunks next ptrs */ 1344 for (i = chunk_list_size - 1; i >= 0; i--) { 1345 dma_addr = dma_map_single(&dev->pdev->dev, 1346 chunk_list->chunks[i].buf, 1347 chunk_list->chunks[i].length, 1348 DMA_TO_DEVICE); 1349 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1350 ibdev_err(&dev->ibdev, 1351 "chunk[%u] dma_map_failed\n", i); 1352 goto chunk_list_unmap; 1353 } 1354 1355 chunk_list->chunks[i].dma_addr = dma_addr; 1356 ibdev_dbg(&dev->ibdev, 1357 "chunk[%u] mapped at [%pad]\n", i, &dma_addr); 1358 1359 if (!i) 1360 break; 1361 1362 prev_chunk_buf = chunk_list->chunks[i - 1].buf; 1363 1364 ctrl_buf = (struct efa_com_ctrl_buff_info *) 1365 &prev_chunk_buf[EFA_PTRS_PER_CHUNK]; 1366 ctrl_buf->length = chunk_list->chunks[i].length; 1367 1368 efa_com_set_dma_addr(dma_addr, 1369 &ctrl_buf->address.mem_addr_high, 1370 &ctrl_buf->address.mem_addr_low); 1371 } 1372 1373 return 0; 1374 1375 chunk_list_unmap: 1376 for (; i < chunk_list_size; i++) { 1377 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1378 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1379 } 1380 chunk_list_dealloc: 1381 for (i = 0; i < chunk_list_size; i++) 1382 kfree(chunk_list->chunks[i].buf); 1383 1384 kfree(chunk_list->chunks); 1385 return -ENOMEM; 1386 } 1387 1388 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1389 { 1390 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1391 int i; 1392 1393 for (i = 0; i < chunk_list->size; i++) { 1394 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1395 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1396 kfree(chunk_list->chunks[i].buf); 1397 } 1398 1399 kfree(chunk_list->chunks); 1400 } 1401 1402 /* initialize pbl continuous mode: map pbl buffer to a dma address. */ 1403 static int pbl_continuous_initialize(struct efa_dev *dev, 1404 struct pbl_context *pbl) 1405 { 1406 dma_addr_t dma_addr; 1407 1408 dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf, 1409 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1410 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1411 ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n"); 1412 return -ENOMEM; 1413 } 1414 1415 pbl->phys.continuous.dma_addr = dma_addr; 1416 ibdev_dbg(&dev->ibdev, 1417 "pbl continuous - dma_addr = %pad, size[%u]\n", 1418 &dma_addr, pbl->pbl_buf_size_in_bytes); 1419 1420 return 0; 1421 } 1422 1423 /* 1424 * initialize pbl indirect mode: 1425 * create a chunk list out of the dma addresses of the physical pages of 1426 * pbl buffer. 1427 */ 1428 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl) 1429 { 1430 u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE); 1431 struct scatterlist *sgl; 1432 int sg_dma_cnt, err; 1433 1434 BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE); 1435 sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages); 1436 if (!sgl) 1437 return -ENOMEM; 1438 1439 sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1440 if (!sg_dma_cnt) { 1441 err = -EINVAL; 1442 goto err_map; 1443 } 1444 1445 pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages; 1446 pbl->phys.indirect.sgl = sgl; 1447 pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt; 1448 err = pbl_chunk_list_create(dev, pbl); 1449 if (err) { 1450 ibdev_dbg(&dev->ibdev, 1451 "chunk_list creation failed[%d]\n", err); 1452 goto err_chunk; 1453 } 1454 1455 ibdev_dbg(&dev->ibdev, 1456 "pbl indirect - size[%u], chunks[%u]\n", 1457 pbl->pbl_buf_size_in_bytes, 1458 pbl->phys.indirect.chunk_list.size); 1459 1460 return 0; 1461 1462 err_chunk: 1463 dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1464 err_map: 1465 kfree(sgl); 1466 return err; 1467 } 1468 1469 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl) 1470 { 1471 pbl_chunk_list_destroy(dev, pbl); 1472 dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl, 1473 pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE); 1474 kfree(pbl->phys.indirect.sgl); 1475 } 1476 1477 /* create a page buffer list from a mapped user memory region */ 1478 static int pbl_create(struct efa_dev *dev, 1479 struct pbl_context *pbl, 1480 struct ib_umem *umem, 1481 int hp_cnt, 1482 u8 hp_shift) 1483 { 1484 int err; 1485 1486 pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE; 1487 pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL); 1488 if (!pbl->pbl_buf) 1489 return -ENOMEM; 1490 1491 if (is_vmalloc_addr(pbl->pbl_buf)) { 1492 pbl->physically_continuous = 0; 1493 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1494 hp_shift); 1495 if (err) 1496 goto err_free; 1497 1498 err = pbl_indirect_initialize(dev, pbl); 1499 if (err) 1500 goto err_free; 1501 } else { 1502 pbl->physically_continuous = 1; 1503 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1504 hp_shift); 1505 if (err) 1506 goto err_free; 1507 1508 err = pbl_continuous_initialize(dev, pbl); 1509 if (err) 1510 goto err_free; 1511 } 1512 1513 ibdev_dbg(&dev->ibdev, 1514 "user_pbl_created: user_pages[%u], continuous[%u]\n", 1515 hp_cnt, pbl->physically_continuous); 1516 1517 return 0; 1518 1519 err_free: 1520 kvfree(pbl->pbl_buf); 1521 return err; 1522 } 1523 1524 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1525 { 1526 if (pbl->physically_continuous) 1527 dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr, 1528 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1529 else 1530 pbl_indirect_terminate(dev, pbl); 1531 1532 kvfree(pbl->pbl_buf); 1533 } 1534 1535 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, 1536 struct efa_com_reg_mr_params *params) 1537 { 1538 int err; 1539 1540 params->inline_pbl = 1; 1541 err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, 1542 params->page_num, params->page_shift); 1543 if (err) 1544 return err; 1545 1546 ibdev_dbg(&dev->ibdev, 1547 "inline_pbl_array - pages[%u]\n", params->page_num); 1548 1549 return 0; 1550 } 1551 1552 static int efa_create_pbl(struct efa_dev *dev, 1553 struct pbl_context *pbl, 1554 struct efa_mr *mr, 1555 struct efa_com_reg_mr_params *params) 1556 { 1557 int err; 1558 1559 err = pbl_create(dev, pbl, mr->umem, params->page_num, 1560 params->page_shift); 1561 if (err) { 1562 ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err); 1563 return err; 1564 } 1565 1566 params->inline_pbl = 0; 1567 params->indirect = !pbl->physically_continuous; 1568 if (pbl->physically_continuous) { 1569 params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes; 1570 1571 efa_com_set_dma_addr(pbl->phys.continuous.dma_addr, 1572 ¶ms->pbl.pbl.address.mem_addr_high, 1573 ¶ms->pbl.pbl.address.mem_addr_low); 1574 } else { 1575 params->pbl.pbl.length = 1576 pbl->phys.indirect.chunk_list.chunks[0].length; 1577 1578 efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr, 1579 ¶ms->pbl.pbl.address.mem_addr_high, 1580 ¶ms->pbl.pbl.address.mem_addr_low); 1581 } 1582 1583 return 0; 1584 } 1585 1586 static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, 1587 struct ib_udata *udata) 1588 { 1589 struct efa_dev *dev = to_edev(ibpd->device); 1590 int supp_access_flags; 1591 struct efa_mr *mr; 1592 1593 if (udata && udata->inlen && 1594 !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { 1595 ibdev_dbg(&dev->ibdev, 1596 "Incompatible ABI params, udata not cleared\n"); 1597 return ERR_PTR(-EINVAL); 1598 } 1599 1600 supp_access_flags = 1601 IB_ACCESS_LOCAL_WRITE | 1602 (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0) | 1603 (EFA_DEV_CAP(dev, RDMA_WRITE) ? IB_ACCESS_REMOTE_WRITE : 0); 1604 1605 access_flags &= ~IB_ACCESS_OPTIONAL; 1606 if (access_flags & ~supp_access_flags) { 1607 ibdev_dbg(&dev->ibdev, 1608 "Unsupported access flags[%#x], supported[%#x]\n", 1609 access_flags, supp_access_flags); 1610 return ERR_PTR(-EOPNOTSUPP); 1611 } 1612 1613 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1614 if (!mr) 1615 return ERR_PTR(-ENOMEM); 1616 1617 return mr; 1618 } 1619 1620 static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, 1621 u64 length, u64 virt_addr, int access_flags) 1622 { 1623 struct efa_dev *dev = to_edev(ibpd->device); 1624 struct efa_com_reg_mr_params params = {}; 1625 struct efa_com_reg_mr_result result = {}; 1626 struct pbl_context pbl; 1627 unsigned int pg_sz; 1628 int inline_size; 1629 int err; 1630 1631 params.pd = to_epd(ibpd)->pdn; 1632 params.iova = virt_addr; 1633 params.mr_length_in_bytes = length; 1634 params.permissions = access_flags; 1635 1636 pg_sz = ib_umem_find_best_pgsz(mr->umem, 1637 dev->dev_attr.page_size_cap, 1638 virt_addr); 1639 if (!pg_sz) { 1640 ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", 1641 dev->dev_attr.page_size_cap); 1642 return -EOPNOTSUPP; 1643 } 1644 1645 params.page_shift = order_base_2(pg_sz); 1646 params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); 1647 1648 ibdev_dbg(&dev->ibdev, 1649 "start %#llx length %#llx params.page_shift %u params.page_num %u\n", 1650 start, length, params.page_shift, params.page_num); 1651 1652 inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); 1653 if (params.page_num <= inline_size) { 1654 err = efa_create_inline_pbl(dev, mr, ¶ms); 1655 if (err) 1656 return err; 1657 1658 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1659 if (err) 1660 return err; 1661 } else { 1662 err = efa_create_pbl(dev, &pbl, mr, ¶ms); 1663 if (err) 1664 return err; 1665 1666 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1667 pbl_destroy(dev, &pbl); 1668 1669 if (err) 1670 return err; 1671 } 1672 1673 mr->ibmr.lkey = result.l_key; 1674 mr->ibmr.rkey = result.r_key; 1675 mr->ibmr.length = length; 1676 mr->ic_info.recv_ic_id = result.ic_info.recv_ic_id; 1677 mr->ic_info.rdma_read_ic_id = result.ic_info.rdma_read_ic_id; 1678 mr->ic_info.rdma_recv_ic_id = result.ic_info.rdma_recv_ic_id; 1679 mr->ic_info.recv_ic_id_valid = result.ic_info.recv_ic_id_valid; 1680 mr->ic_info.rdma_read_ic_id_valid = result.ic_info.rdma_read_ic_id_valid; 1681 mr->ic_info.rdma_recv_ic_id_valid = result.ic_info.rdma_recv_ic_id_valid; 1682 ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); 1683 1684 return 0; 1685 } 1686 1687 struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, 1688 u64 length, u64 virt_addr, 1689 int fd, int access_flags, 1690 struct ib_udata *udata) 1691 { 1692 struct efa_dev *dev = to_edev(ibpd->device); 1693 struct ib_umem_dmabuf *umem_dmabuf; 1694 struct efa_mr *mr; 1695 int err; 1696 1697 mr = efa_alloc_mr(ibpd, access_flags, udata); 1698 if (IS_ERR(mr)) { 1699 err = PTR_ERR(mr); 1700 goto err_out; 1701 } 1702 1703 umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd, 1704 access_flags); 1705 if (IS_ERR(umem_dmabuf)) { 1706 err = PTR_ERR(umem_dmabuf); 1707 ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); 1708 goto err_free; 1709 } 1710 1711 mr->umem = &umem_dmabuf->umem; 1712 err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); 1713 if (err) 1714 goto err_release; 1715 1716 return &mr->ibmr; 1717 1718 err_release: 1719 ib_umem_release(mr->umem); 1720 err_free: 1721 kfree(mr); 1722 err_out: 1723 atomic64_inc(&dev->stats.reg_mr_err); 1724 return ERR_PTR(err); 1725 } 1726 1727 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, 1728 u64 virt_addr, int access_flags, 1729 struct ib_udata *udata) 1730 { 1731 struct efa_dev *dev = to_edev(ibpd->device); 1732 struct efa_mr *mr; 1733 int err; 1734 1735 mr = efa_alloc_mr(ibpd, access_flags, udata); 1736 if (IS_ERR(mr)) { 1737 err = PTR_ERR(mr); 1738 goto err_out; 1739 } 1740 1741 mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); 1742 if (IS_ERR(mr->umem)) { 1743 err = PTR_ERR(mr->umem); 1744 ibdev_dbg(&dev->ibdev, 1745 "Failed to pin and map user space memory[%d]\n", err); 1746 goto err_free; 1747 } 1748 1749 err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); 1750 if (err) 1751 goto err_release; 1752 1753 return &mr->ibmr; 1754 1755 err_release: 1756 ib_umem_release(mr->umem); 1757 err_free: 1758 kfree(mr); 1759 err_out: 1760 atomic64_inc(&dev->stats.reg_mr_err); 1761 return ERR_PTR(err); 1762 } 1763 1764 static int UVERBS_HANDLER(EFA_IB_METHOD_MR_QUERY)(struct uverbs_attr_bundle *attrs) 1765 { 1766 struct ib_mr *ibmr = uverbs_attr_get_obj(attrs, EFA_IB_ATTR_QUERY_MR_HANDLE); 1767 struct efa_mr *mr = to_emr(ibmr); 1768 u16 ic_id_validity = 0; 1769 int ret; 1770 1771 ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, 1772 &mr->ic_info.recv_ic_id, sizeof(mr->ic_info.recv_ic_id)); 1773 if (ret) 1774 return ret; 1775 1776 ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, 1777 &mr->ic_info.rdma_read_ic_id, sizeof(mr->ic_info.rdma_read_ic_id)); 1778 if (ret) 1779 return ret; 1780 1781 ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, 1782 &mr->ic_info.rdma_recv_ic_id, sizeof(mr->ic_info.rdma_recv_ic_id)); 1783 if (ret) 1784 return ret; 1785 1786 if (mr->ic_info.recv_ic_id_valid) 1787 ic_id_validity |= EFA_QUERY_MR_VALIDITY_RECV_IC_ID; 1788 if (mr->ic_info.rdma_read_ic_id_valid) 1789 ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID; 1790 if (mr->ic_info.rdma_recv_ic_id_valid) 1791 ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID; 1792 1793 return uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, 1794 &ic_id_validity, sizeof(ic_id_validity)); 1795 } 1796 1797 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 1798 { 1799 struct efa_dev *dev = to_edev(ibmr->device); 1800 struct efa_com_dereg_mr_params params; 1801 struct efa_mr *mr = to_emr(ibmr); 1802 int err; 1803 1804 ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); 1805 1806 params.l_key = mr->ibmr.lkey; 1807 err = efa_com_dereg_mr(&dev->edev, ¶ms); 1808 if (err) 1809 return err; 1810 1811 ib_umem_release(mr->umem); 1812 kfree(mr); 1813 1814 return 0; 1815 } 1816 1817 int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, 1818 struct ib_port_immutable *immutable) 1819 { 1820 struct ib_port_attr attr; 1821 int err; 1822 1823 err = ib_query_port(ibdev, port_num, &attr); 1824 if (err) { 1825 ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err); 1826 return err; 1827 } 1828 1829 immutable->pkey_tbl_len = attr.pkey_tbl_len; 1830 immutable->gid_tbl_len = attr.gid_tbl_len; 1831 1832 return 0; 1833 } 1834 1835 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) 1836 { 1837 struct efa_com_dealloc_uar_params params = { 1838 .uarn = uarn, 1839 }; 1840 1841 return efa_com_dealloc_uar(&dev->edev, ¶ms); 1842 } 1843 1844 #define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \ 1845 (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \ 1846 NULL : #_attr) 1847 1848 static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext, 1849 const struct efa_ibv_alloc_ucontext_cmd *cmd) 1850 { 1851 struct efa_dev *dev = to_edev(ibucontext->device); 1852 char *attr_str; 1853 1854 if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch, 1855 EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str)) 1856 goto err; 1857 1858 if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth, 1859 EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR, 1860 attr_str)) 1861 goto err; 1862 1863 return 0; 1864 1865 err: 1866 ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n", 1867 attr_str); 1868 return -EOPNOTSUPP; 1869 } 1870 1871 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) 1872 { 1873 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1874 struct efa_dev *dev = to_edev(ibucontext->device); 1875 struct efa_ibv_alloc_ucontext_resp resp = {}; 1876 struct efa_ibv_alloc_ucontext_cmd cmd = {}; 1877 struct efa_com_alloc_uar_result result; 1878 int err; 1879 1880 /* 1881 * it's fine if the driver does not know all request fields, 1882 * we will ack input fields in our response. 1883 */ 1884 1885 err = ib_copy_from_udata(&cmd, udata, 1886 min(sizeof(cmd), udata->inlen)); 1887 if (err) { 1888 ibdev_dbg(&dev->ibdev, 1889 "Cannot copy udata for alloc_ucontext\n"); 1890 goto err_out; 1891 } 1892 1893 err = efa_user_comp_handshake(ibucontext, &cmd); 1894 if (err) 1895 goto err_out; 1896 1897 err = efa_com_alloc_uar(&dev->edev, &result); 1898 if (err) 1899 goto err_out; 1900 1901 ucontext->uarn = result.uarn; 1902 1903 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; 1904 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; 1905 resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; 1906 resp.inline_buf_size = dev->dev_attr.inline_buf_size; 1907 resp.max_llq_size = dev->dev_attr.max_llq_size; 1908 resp.max_tx_batch = dev->dev_attr.max_tx_batch; 1909 resp.min_sq_wr = dev->dev_attr.min_sq_depth; 1910 1911 err = ib_copy_to_udata(udata, &resp, 1912 min(sizeof(resp), udata->outlen)); 1913 if (err) 1914 goto err_dealloc_uar; 1915 1916 return 0; 1917 1918 err_dealloc_uar: 1919 efa_dealloc_uar(dev, result.uarn); 1920 err_out: 1921 atomic64_inc(&dev->stats.alloc_ucontext_err); 1922 return err; 1923 } 1924 1925 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) 1926 { 1927 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1928 struct efa_dev *dev = to_edev(ibucontext->device); 1929 1930 efa_dealloc_uar(dev, ucontext->uarn); 1931 } 1932 1933 void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) 1934 { 1935 struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); 1936 1937 kfree(entry); 1938 } 1939 1940 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, 1941 struct vm_area_struct *vma) 1942 { 1943 struct rdma_user_mmap_entry *rdma_entry; 1944 struct efa_user_mmap_entry *entry; 1945 unsigned long va; 1946 int err = 0; 1947 u64 pfn; 1948 1949 rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); 1950 if (!rdma_entry) { 1951 ibdev_dbg(&dev->ibdev, 1952 "pgoff[%#lx] does not have valid entry\n", 1953 vma->vm_pgoff); 1954 atomic64_inc(&dev->stats.mmap_err); 1955 return -EINVAL; 1956 } 1957 entry = to_emmap(rdma_entry); 1958 1959 ibdev_dbg(&dev->ibdev, 1960 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", 1961 entry->address, rdma_entry->npages * PAGE_SIZE, 1962 entry->mmap_flag); 1963 1964 pfn = entry->address >> PAGE_SHIFT; 1965 switch (entry->mmap_flag) { 1966 case EFA_MMAP_IO_NC: 1967 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, 1968 entry->rdma_entry.npages * PAGE_SIZE, 1969 pgprot_noncached(vma->vm_page_prot), 1970 rdma_entry); 1971 break; 1972 case EFA_MMAP_IO_WC: 1973 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, 1974 entry->rdma_entry.npages * PAGE_SIZE, 1975 pgprot_writecombine(vma->vm_page_prot), 1976 rdma_entry); 1977 break; 1978 case EFA_MMAP_DMA_PAGE: 1979 for (va = vma->vm_start; va < vma->vm_end; 1980 va += PAGE_SIZE, pfn++) { 1981 err = vm_insert_page(vma, va, pfn_to_page(pfn)); 1982 if (err) 1983 break; 1984 } 1985 break; 1986 default: 1987 err = -EINVAL; 1988 } 1989 1990 if (err) { 1991 ibdev_dbg( 1992 &dev->ibdev, 1993 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", 1994 entry->address, rdma_entry->npages * PAGE_SIZE, 1995 entry->mmap_flag, err); 1996 atomic64_inc(&dev->stats.mmap_err); 1997 } 1998 1999 rdma_user_mmap_entry_put(rdma_entry); 2000 return err; 2001 } 2002 2003 int efa_mmap(struct ib_ucontext *ibucontext, 2004 struct vm_area_struct *vma) 2005 { 2006 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 2007 struct efa_dev *dev = to_edev(ibucontext->device); 2008 size_t length = vma->vm_end - vma->vm_start; 2009 2010 ibdev_dbg(&dev->ibdev, 2011 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", 2012 vma->vm_start, vma->vm_end, length, vma->vm_pgoff); 2013 2014 return __efa_mmap(dev, ucontext, vma); 2015 } 2016 2017 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) 2018 { 2019 struct efa_com_destroy_ah_params params = { 2020 .ah = ah->ah, 2021 .pdn = to_epd(ah->ibah.pd)->pdn, 2022 }; 2023 2024 return efa_com_destroy_ah(&dev->edev, ¶ms); 2025 } 2026 2027 int efa_create_ah(struct ib_ah *ibah, 2028 struct rdma_ah_init_attr *init_attr, 2029 struct ib_udata *udata) 2030 { 2031 struct rdma_ah_attr *ah_attr = init_attr->ah_attr; 2032 struct efa_dev *dev = to_edev(ibah->device); 2033 struct efa_com_create_ah_params params = {}; 2034 struct efa_ibv_create_ah_resp resp = {}; 2035 struct efa_com_create_ah_result result; 2036 struct efa_ah *ah = to_eah(ibah); 2037 int err; 2038 2039 if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { 2040 ibdev_dbg(&dev->ibdev, 2041 "Create address handle is not supported in atomic context\n"); 2042 err = -EOPNOTSUPP; 2043 goto err_out; 2044 } 2045 2046 if (udata->inlen && 2047 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 2048 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 2049 err = -EINVAL; 2050 goto err_out; 2051 } 2052 2053 memcpy(params.dest_addr, ah_attr->grh.dgid.raw, 2054 sizeof(params.dest_addr)); 2055 params.pdn = to_epd(ibah->pd)->pdn; 2056 err = efa_com_create_ah(&dev->edev, ¶ms, &result); 2057 if (err) 2058 goto err_out; 2059 2060 memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id)); 2061 ah->ah = result.ah; 2062 2063 resp.efa_address_handle = result.ah; 2064 2065 if (udata->outlen) { 2066 err = ib_copy_to_udata(udata, &resp, 2067 min(sizeof(resp), udata->outlen)); 2068 if (err) { 2069 ibdev_dbg(&dev->ibdev, 2070 "Failed to copy udata for create_ah response\n"); 2071 goto err_destroy_ah; 2072 } 2073 } 2074 ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah); 2075 2076 return 0; 2077 2078 err_destroy_ah: 2079 efa_ah_destroy(dev, ah); 2080 err_out: 2081 atomic64_inc(&dev->stats.create_ah_err); 2082 return err; 2083 } 2084 2085 int efa_destroy_ah(struct ib_ah *ibah, u32 flags) 2086 { 2087 struct efa_dev *dev = to_edev(ibah->pd->device); 2088 struct efa_ah *ah = to_eah(ibah); 2089 2090 ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah); 2091 2092 if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { 2093 ibdev_dbg(&dev->ibdev, 2094 "Destroy address handle is not supported in atomic context\n"); 2095 return -EOPNOTSUPP; 2096 } 2097 2098 efa_ah_destroy(dev, ah); 2099 return 0; 2100 } 2101 2102 struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, 2103 u32 port_num) 2104 { 2105 return rdma_alloc_hw_stats_struct(efa_port_stats_descs, 2106 ARRAY_SIZE(efa_port_stats_descs), 2107 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2108 } 2109 2110 struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev) 2111 { 2112 return rdma_alloc_hw_stats_struct(efa_device_stats_descs, 2113 ARRAY_SIZE(efa_device_stats_descs), 2114 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2115 } 2116 2117 static int efa_fill_device_stats(struct efa_dev *dev, 2118 struct rdma_hw_stats *stats) 2119 { 2120 struct efa_com_stats_admin *as = &dev->edev.aq.stats; 2121 struct efa_stats *s = &dev->stats; 2122 2123 stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); 2124 stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); 2125 stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err); 2126 stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); 2127 2128 stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); 2129 stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err); 2130 stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err); 2131 stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err); 2132 stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err); 2133 stats->value[EFA_ALLOC_UCONTEXT_ERR] = 2134 atomic64_read(&s->alloc_ucontext_err); 2135 stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); 2136 stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); 2137 2138 return ARRAY_SIZE(efa_device_stats_descs); 2139 } 2140 2141 static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, 2142 u32 port_num) 2143 { 2144 struct efa_com_get_stats_params params = {}; 2145 union efa_com_get_stats_result result; 2146 struct efa_com_rdma_write_stats *rws; 2147 struct efa_com_rdma_read_stats *rrs; 2148 struct efa_com_messages_stats *ms; 2149 struct efa_com_basic_stats *bs; 2150 int err; 2151 2152 params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; 2153 params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; 2154 2155 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2156 if (err) 2157 return err; 2158 2159 bs = &result.basic_stats; 2160 stats->value[EFA_TX_BYTES] = bs->tx_bytes; 2161 stats->value[EFA_TX_PKTS] = bs->tx_pkts; 2162 stats->value[EFA_RX_BYTES] = bs->rx_bytes; 2163 stats->value[EFA_RX_PKTS] = bs->rx_pkts; 2164 stats->value[EFA_RX_DROPS] = bs->rx_drops; 2165 2166 params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES; 2167 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2168 if (err) 2169 return err; 2170 2171 ms = &result.messages_stats; 2172 stats->value[EFA_SEND_BYTES] = ms->send_bytes; 2173 stats->value[EFA_SEND_WRS] = ms->send_wrs; 2174 stats->value[EFA_RECV_BYTES] = ms->recv_bytes; 2175 stats->value[EFA_RECV_WRS] = ms->recv_wrs; 2176 2177 params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ; 2178 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2179 if (err) 2180 return err; 2181 2182 rrs = &result.rdma_read_stats; 2183 stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs; 2184 stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes; 2185 stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; 2186 stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; 2187 2188 if (EFA_DEV_CAP(dev, RDMA_WRITE)) { 2189 params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE; 2190 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2191 if (err) 2192 return err; 2193 2194 rws = &result.rdma_write_stats; 2195 stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs; 2196 stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes; 2197 stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err; 2198 stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes; 2199 } 2200 2201 return ARRAY_SIZE(efa_port_stats_descs); 2202 } 2203 2204 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, 2205 u32 port_num, int index) 2206 { 2207 if (port_num) 2208 return efa_fill_port_stats(to_edev(ibdev), stats, port_num); 2209 else 2210 return efa_fill_device_stats(to_edev(ibdev), stats); 2211 } 2212 2213 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, 2214 u32 port_num) 2215 { 2216 return IB_LINK_LAYER_UNSPECIFIED; 2217 } 2218 2219 DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY, 2220 UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE, 2221 UVERBS_OBJECT_MR, 2222 UVERBS_ACCESS_READ, 2223 UA_MANDATORY), 2224 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, 2225 UVERBS_ATTR_TYPE(u16), 2226 UA_MANDATORY), 2227 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, 2228 UVERBS_ATTR_TYPE(u16), 2229 UA_MANDATORY), 2230 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, 2231 UVERBS_ATTR_TYPE(u16), 2232 UA_MANDATORY), 2233 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, 2234 UVERBS_ATTR_TYPE(u16), 2235 UA_MANDATORY)); 2236 2237 ADD_UVERBS_METHODS(efa_mr, 2238 UVERBS_OBJECT_MR, 2239 &UVERBS_METHOD(EFA_IB_METHOD_MR_QUERY)); 2240 2241 const struct uapi_definition efa_uapi_defs[] = { 2242 UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, 2243 &efa_mr), 2244 {}, 2245 }; 2246