1 /* 2 * Broadcom NetXtreme-E RoCE driver. 3 * 4 * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term 5 * Broadcom refers to Broadcom Limited and/or its subsidiaries. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in 21 * the documentation and/or other materials provided with the 22 * distribution. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 26 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 31 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 33 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 34 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 * 36 * Description: Main component of the bnxt_re driver 37 */ 38 39 #include <linux/module.h> 40 #include <linux/netdevice.h> 41 #include <linux/ethtool.h> 42 #include <linux/mutex.h> 43 #include <linux/list.h> 44 #include <linux/rculist.h> 45 #include <linux/spinlock.h> 46 #include <linux/pci.h> 47 #include <net/dcbnl.h> 48 #include <net/ipv6.h> 49 #include <net/addrconf.h> 50 #include <linux/if_ether.h> 51 #include <linux/auxiliary_bus.h> 52 53 #include <rdma/ib_verbs.h> 54 #include <rdma/ib_user_verbs.h> 55 #include <rdma/ib_umem.h> 56 #include <rdma/ib_addr.h> 57 #include <linux/hashtable.h> 58 59 #include "bnxt_ulp.h" 60 #include "roce_hsi.h" 61 #include "qplib_res.h" 62 #include "qplib_sp.h" 63 #include "qplib_fp.h" 64 #include "qplib_rcfw.h" 65 #include "bnxt_re.h" 66 #include "ib_verbs.h" 67 #include <rdma/bnxt_re-abi.h> 68 #include "bnxt.h" 69 #include "hw_counters.h" 70 #include "debugfs.h" 71 72 static char version[] = 73 BNXT_RE_DESC "\n"; 74 75 MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>"); 76 MODULE_DESCRIPTION(BNXT_RE_DESC); 77 MODULE_LICENSE("Dual BSD/GPL"); 78 79 /* globals */ 80 static DEFINE_MUTEX(bnxt_re_mutex); 81 82 static void bnxt_re_stop_irq(void *handle); 83 static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev); 84 static int bnxt_re_netdev_event(struct notifier_block *notifier, 85 unsigned long event, void *ptr); 86 static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev); 87 static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type); 88 static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev); 89 90 static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, 91 u32 *offset); 92 static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable); 93 static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) 94 { 95 struct bnxt_qplib_chip_ctx *cctx; 96 struct bnxt_en_dev *en_dev; 97 struct bnxt_qplib_res *res; 98 u32 l2db_len = 0; 99 u32 offset = 0; 100 u32 barlen; 101 int rc; 102 103 res = &rdev->qplib_res; 104 en_dev = rdev->en_dev; 105 cctx = rdev->chip_ctx; 106 107 /* Issue qcfg */ 108 rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset); 109 if (rc) 110 dev_info(rdev_to_dev(rdev), 111 "Couldn't get DB bar size, Low latency framework is disabled\n"); 112 /* set register offsets for both UC and WC */ 113 if (bnxt_qplib_is_chip_gen_p7(cctx)) { 114 res->dpi_tbl.ucreg.offset = offset; 115 res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; 116 } else { 117 res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : 118 BNXT_QPLIB_DBR_PF_DB_OFFSET; 119 res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; 120 } 121 122 /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size 123 * is equal to the DB-Bar actual size. This indicates that L2 124 * is mapping entire bar as UC-. RoCE driver can't enable WC mapping 125 * in such cases and DB-push will be disabled. 126 */ 127 barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION); 128 if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) { 129 res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; 130 dev_info(rdev_to_dev(rdev), "Low latency framework is enabled\n"); 131 } 132 } 133 134 static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev) 135 { 136 struct bnxt_qplib_chip_ctx *cctx; 137 138 cctx = rdev->chip_ctx; 139 cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? 140 BNXT_QPLIB_WQE_MODE_VARIABLE : BNXT_QPLIB_WQE_MODE_STATIC; 141 if (bnxt_re_hwrm_qcaps(rdev)) 142 dev_err(rdev_to_dev(rdev), 143 "Failed to query hwrm qcaps\n"); 144 if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) { 145 cctx->modes.toggle_bits |= BNXT_QPLIB_CQ_TOGGLE_BIT; 146 cctx->modes.toggle_bits |= BNXT_QPLIB_SRQ_TOGGLE_BIT; 147 } 148 } 149 150 static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) 151 { 152 struct bnxt_qplib_chip_ctx *chip_ctx; 153 154 if (!rdev->chip_ctx) 155 return; 156 chip_ctx = rdev->chip_ctx; 157 rdev->chip_ctx = NULL; 158 rdev->rcfw.res = NULL; 159 rdev->qplib_res.cctx = NULL; 160 rdev->qplib_res.pdev = NULL; 161 rdev->qplib_res.netdev = NULL; 162 kfree(chip_ctx); 163 } 164 165 static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) 166 { 167 struct bnxt_qplib_chip_ctx *chip_ctx; 168 struct bnxt_en_dev *en_dev; 169 int rc; 170 171 en_dev = rdev->en_dev; 172 173 rdev->qplib_res.pdev = en_dev->pdev; 174 chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL); 175 if (!chip_ctx) 176 return -ENOMEM; 177 chip_ctx->chip_num = en_dev->chip_num; 178 chip_ctx->hw_stats_size = en_dev->hw_ring_stats_size; 179 180 rdev->chip_ctx = chip_ctx; 181 /* rest members to follow eventually */ 182 183 rdev->qplib_res.cctx = rdev->chip_ctx; 184 rdev->rcfw.res = &rdev->qplib_res; 185 rdev->qplib_res.dattr = &rdev->dev_attr; 186 rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); 187 rdev->qplib_res.en_dev = en_dev; 188 189 bnxt_re_set_drv_mode(rdev); 190 191 bnxt_re_set_db_offset(rdev); 192 rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); 193 if (rc) { 194 kfree(rdev->chip_ctx); 195 rdev->chip_ctx = NULL; 196 return rc; 197 } 198 199 if (bnxt_qplib_determine_atomics(en_dev->pdev)) 200 ibdev_info(&rdev->ibdev, 201 "platform doesn't support global atomics."); 202 return 0; 203 } 204 205 /* SR-IOV helper functions */ 206 207 static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev) 208 { 209 if (BNXT_EN_VF(rdev->en_dev)) 210 rdev->is_virtfn = 1; 211 } 212 213 /* Set the maximum number of each resource that the driver actually wants 214 * to allocate. This may be up to the maximum number the firmware has 215 * reserved for the function. The driver may choose to allocate fewer 216 * resources than the firmware maximum. 217 */ 218 static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) 219 { 220 struct bnxt_qplib_dev_attr *attr; 221 struct bnxt_qplib_ctx *ctx; 222 int i; 223 224 attr = &rdev->dev_attr; 225 ctx = &rdev->qplib_ctx; 226 227 ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, 228 attr->max_qp); 229 ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; 230 /* Use max_mr from fw since max_mrw does not get set */ 231 ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr); 232 ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, 233 attr->max_srq); 234 ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); 235 if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) 236 for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) 237 rdev->qplib_ctx.tqm_ctx.qcount[i] = 238 rdev->dev_attr.tqm_alloc_reqs[i]; 239 } 240 241 static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) 242 { 243 struct bnxt_qplib_vf_res *vf_res; 244 u32 mrws = 0; 245 u32 vf_pct; 246 u32 nvfs; 247 248 vf_res = &qplib_ctx->vf_res; 249 /* 250 * Reserve a set of resources for the PF. Divide the remaining 251 * resources among the VFs 252 */ 253 vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; 254 nvfs = num_vf; 255 num_vf = 100 * num_vf; 256 vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf; 257 vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf; 258 vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf; 259 /* 260 * The driver allows many more MRs than other resources. If the 261 * firmware does also, then reserve a fixed amount for the PF and 262 * divide the rest among VFs. VFs may use many MRs for NFS 263 * mounts, ISER, NVME applications, etc. If the firmware severely 264 * restricts the number of MRs, then let PF have half and divide 265 * the rest among VFs, as for the other resource types. 266 */ 267 if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) { 268 mrws = qplib_ctx->mrw_count * vf_pct; 269 nvfs = num_vf; 270 } else { 271 mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF; 272 } 273 vf_res->max_mrw_per_vf = (mrws / nvfs); 274 vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF; 275 } 276 277 static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) 278 { 279 u32 num_vfs; 280 281 memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); 282 bnxt_re_limit_pf_res(rdev); 283 284 num_vfs = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? 285 BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; 286 if (num_vfs) 287 bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); 288 } 289 290 static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) 291 { 292 /* 293 * Use the total VF count since the actual VF count may not be 294 * available at this point. 295 */ 296 rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev); 297 if (!rdev->num_vfs) 298 return; 299 300 bnxt_re_set_resource_limits(rdev); 301 bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, 302 &rdev->qplib_ctx); 303 } 304 305 static void bnxt_re_shutdown(struct auxiliary_device *adev) 306 { 307 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); 308 struct bnxt_re_dev *rdev; 309 310 rdev = en_info->rdev; 311 ib_unregister_device(&rdev->ibdev); 312 bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); 313 } 314 315 static void bnxt_re_stop_irq(void *handle) 316 { 317 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); 318 struct bnxt_qplib_rcfw *rcfw; 319 struct bnxt_re_dev *rdev; 320 struct bnxt_qplib_nq *nq; 321 int indx; 322 323 rdev = en_info->rdev; 324 rcfw = &rdev->rcfw; 325 326 for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) { 327 nq = &rdev->nqr->nq[indx - 1]; 328 bnxt_qplib_nq_stop_irq(nq, false); 329 } 330 331 bnxt_qplib_rcfw_stop_irq(rcfw, false); 332 } 333 334 static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) 335 { 336 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); 337 struct bnxt_msix_entry *msix_ent; 338 struct bnxt_qplib_rcfw *rcfw; 339 struct bnxt_re_dev *rdev; 340 struct bnxt_qplib_nq *nq; 341 int indx, rc; 342 343 rdev = en_info->rdev; 344 msix_ent = rdev->nqr->msix_entries; 345 rcfw = &rdev->rcfw; 346 if (!ent) { 347 /* Not setting the f/w timeout bit in rcfw. 348 * During the driver unload the first command 349 * to f/w will timeout and that will set the 350 * timeout bit. 351 */ 352 ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n"); 353 return; 354 } 355 356 /* Vectors may change after restart, so update with new vectors 357 * in device sctructure. 358 */ 359 for (indx = 0; indx < rdev->nqr->num_msix; indx++) 360 rdev->nqr->msix_entries[indx].vector = ent[indx].vector; 361 362 rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, 363 false); 364 if (rc) { 365 ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n"); 366 return; 367 } 368 for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) { 369 nq = &rdev->nqr->nq[indx - 1]; 370 rc = bnxt_qplib_nq_start_irq(nq, indx - 1, 371 msix_ent[indx].vector, false); 372 if (rc) { 373 ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n", 374 indx - 1); 375 return; 376 } 377 } 378 } 379 380 static struct bnxt_ulp_ops bnxt_re_ulp_ops = { 381 .ulp_irq_stop = bnxt_re_stop_irq, 382 .ulp_irq_restart = bnxt_re_start_irq 383 }; 384 385 /* RoCE -> Net driver */ 386 387 static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) 388 { 389 struct bnxt_en_dev *en_dev; 390 391 en_dev = rdev->en_dev; 392 return bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev->adev); 393 } 394 395 static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd) 396 { 397 hdr->req_type = cpu_to_le16(opcd); 398 hdr->cmpl_ring = cpu_to_le16(-1); 399 hdr->target_id = cpu_to_le16(-1); 400 } 401 402 static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, 403 int msg_len, void *resp, int resp_max_len, 404 int timeout) 405 { 406 fw_msg->msg = msg; 407 fw_msg->msg_len = msg_len; 408 fw_msg->resp = resp; 409 fw_msg->resp_max_len = resp_max_len; 410 fw_msg->timeout = timeout; 411 } 412 413 /* Query device config using common hwrm */ 414 static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, 415 u32 *offset) 416 { 417 struct bnxt_en_dev *en_dev = rdev->en_dev; 418 struct hwrm_func_qcfg_output resp = {0}; 419 struct hwrm_func_qcfg_input req = {0}; 420 struct bnxt_fw_msg fw_msg = {}; 421 int rc; 422 423 bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG); 424 req.fid = cpu_to_le16(0xffff); 425 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 426 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 427 rc = bnxt_send_msg(en_dev, &fw_msg); 428 if (!rc) { 429 *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024); 430 *offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024); 431 } 432 return rc; 433 } 434 435 /* Query function capabilities using common hwrm */ 436 int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) 437 { 438 struct bnxt_en_dev *en_dev = rdev->en_dev; 439 struct hwrm_func_qcaps_output resp = {}; 440 struct hwrm_func_qcaps_input req = {}; 441 struct bnxt_qplib_chip_ctx *cctx; 442 struct bnxt_fw_msg fw_msg = {}; 443 u32 flags_ext2; 444 int rc; 445 446 cctx = rdev->chip_ctx; 447 bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS); 448 req.fid = cpu_to_le16(0xffff); 449 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 450 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 451 452 rc = bnxt_send_msg(en_dev, &fw_msg); 453 if (rc) 454 return rc; 455 cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE; 456 457 flags_ext2 = le32_to_cpu(resp.flags_ext2); 458 cctx->modes.dbr_pacing = flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED || 459 flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED; 460 return 0; 461 } 462 463 static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev) 464 { 465 struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; 466 struct hwrm_func_dbr_pacing_qcfg_output resp = {}; 467 struct hwrm_func_dbr_pacing_qcfg_input req = {}; 468 struct bnxt_en_dev *en_dev = rdev->en_dev; 469 struct bnxt_qplib_chip_ctx *cctx; 470 struct bnxt_fw_msg fw_msg = {}; 471 int rc; 472 473 cctx = rdev->chip_ctx; 474 bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG); 475 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 476 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 477 rc = bnxt_send_msg(en_dev, &fw_msg); 478 if (rc) 479 return rc; 480 481 if ((le32_to_cpu(resp.dbr_stat_db_fifo_reg) & 482 FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) == 483 FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC) 484 cctx->dbr_stat_db_fifo = 485 le32_to_cpu(resp.dbr_stat_db_fifo_reg) & 486 ~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK; 487 488 pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth); 489 if (!pacing_data->fifo_max_depth) 490 pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx); 491 pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask); 492 pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift; 493 494 return 0; 495 } 496 497 /* Update the pacing tunable parameters to the default values */ 498 static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev) 499 { 500 struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; 501 502 pacing_data->do_pacing = rdev->pacing.dbr_def_do_pacing; 503 pacing_data->pacing_th = rdev->pacing.pacing_algo_th; 504 pacing_data->alarm_th = 505 pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; 506 } 507 508 static u32 __get_fifo_occupancy(struct bnxt_re_dev *rdev) 509 { 510 struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; 511 u32 read_val, fifo_occup; 512 513 read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); 514 fifo_occup = pacing_data->fifo_max_depth - 515 ((read_val & pacing_data->fifo_room_mask) >> 516 pacing_data->fifo_room_shift); 517 return fifo_occup; 518 } 519 520 static bool is_dbr_fifo_full(struct bnxt_re_dev *rdev) 521 { 522 u32 max_occup, fifo_occup; 523 524 fifo_occup = __get_fifo_occupancy(rdev); 525 max_occup = BNXT_RE_MAX_FIFO_DEPTH(rdev->chip_ctx) - 1; 526 if (fifo_occup == max_occup) 527 return true; 528 529 return false; 530 } 531 532 static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) 533 { 534 struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; 535 u32 retry_fifo_check = 1000; 536 u32 fifo_occup; 537 538 /* loop shouldn't run infintely as the occupancy usually goes 539 * below pacing algo threshold as soon as pacing kicks in. 540 */ 541 while (1) { 542 fifo_occup = __get_fifo_occupancy(rdev); 543 /* Fifo occupancy cannot be greater the MAX FIFO depth */ 544 if (fifo_occup > pacing_data->fifo_max_depth) 545 break; 546 547 if (fifo_occup < pacing_data->pacing_th) 548 break; 549 if (!retry_fifo_check--) { 550 dev_info_once(rdev_to_dev(rdev), 551 "%s: fifo_occup = 0x%xfifo_max_depth = 0x%x pacing_th = 0x%x\n", 552 __func__, fifo_occup, pacing_data->fifo_max_depth, 553 pacing_data->pacing_th); 554 break; 555 } 556 557 } 558 } 559 560 static void bnxt_re_db_fifo_check(struct work_struct *work) 561 { 562 struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, 563 dbq_fifo_check_work); 564 struct bnxt_qplib_db_pacing_data *pacing_data; 565 u32 pacing_save; 566 567 if (!mutex_trylock(&rdev->pacing.dbq_lock)) 568 return; 569 pacing_data = rdev->qplib_res.pacing_data; 570 pacing_save = rdev->pacing.do_pacing_save; 571 __wait_for_fifo_occupancy_below_th(rdev); 572 cancel_delayed_work_sync(&rdev->dbq_pacing_work); 573 if (pacing_save > rdev->pacing.dbr_def_do_pacing) { 574 /* Double the do_pacing value during the congestion */ 575 pacing_save = pacing_save << 1; 576 } else { 577 /* 578 * when a new congestion is detected increase the do_pacing 579 * by 8 times. And also increase the pacing_th by 4 times. The 580 * reason to increase pacing_th is to give more space for the 581 * queue to oscillate down without getting empty, but also more 582 * room for the queue to increase without causing another alarm. 583 */ 584 pacing_save = pacing_save << 3; 585 pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4; 586 } 587 588 if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING) 589 pacing_save = BNXT_RE_MAX_DBR_DO_PACING; 590 591 pacing_data->do_pacing = pacing_save; 592 rdev->pacing.do_pacing_save = pacing_data->do_pacing; 593 pacing_data->alarm_th = 594 pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; 595 schedule_delayed_work(&rdev->dbq_pacing_work, 596 msecs_to_jiffies(rdev->pacing.dbq_pacing_time)); 597 rdev->stats.pacing.alerts++; 598 mutex_unlock(&rdev->pacing.dbq_lock); 599 } 600 601 static void bnxt_re_pacing_timer_exp(struct work_struct *work) 602 { 603 struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, 604 dbq_pacing_work.work); 605 struct bnxt_qplib_db_pacing_data *pacing_data; 606 u32 fifo_occup; 607 608 if (!mutex_trylock(&rdev->pacing.dbq_lock)) 609 return; 610 611 pacing_data = rdev->qplib_res.pacing_data; 612 fifo_occup = __get_fifo_occupancy(rdev); 613 614 if (fifo_occup > pacing_data->pacing_th) 615 goto restart_timer; 616 617 /* 618 * Instead of immediately going back to the default do_pacing 619 * reduce it by 1/8 times and restart the timer. 620 */ 621 pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3); 622 pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing); 623 if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) { 624 bnxt_re_set_default_pacing_data(rdev); 625 rdev->stats.pacing.complete++; 626 goto dbq_unlock; 627 } 628 629 restart_timer: 630 schedule_delayed_work(&rdev->dbq_pacing_work, 631 msecs_to_jiffies(rdev->pacing.dbq_pacing_time)); 632 rdev->stats.pacing.resched++; 633 dbq_unlock: 634 rdev->pacing.do_pacing_save = pacing_data->do_pacing; 635 mutex_unlock(&rdev->pacing.dbq_lock); 636 } 637 638 void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev) 639 { 640 struct bnxt_qplib_db_pacing_data *pacing_data; 641 642 if (!rdev->pacing.dbr_pacing) 643 return; 644 mutex_lock(&rdev->pacing.dbq_lock); 645 pacing_data = rdev->qplib_res.pacing_data; 646 647 /* 648 * Increase the alarm_th to max so that other user lib instances do not 649 * keep alerting the driver. 650 */ 651 pacing_data->alarm_th = pacing_data->fifo_max_depth; 652 pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING; 653 cancel_work_sync(&rdev->dbq_fifo_check_work); 654 schedule_work(&rdev->dbq_fifo_check_work); 655 mutex_unlock(&rdev->pacing.dbq_lock); 656 } 657 658 static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) 659 { 660 /* Allocate a page for app use */ 661 rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL); 662 if (!rdev->pacing.dbr_page) 663 return -ENOMEM; 664 665 memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE); 666 rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page; 667 668 if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) { 669 free_page((u64)rdev->pacing.dbr_page); 670 rdev->pacing.dbr_page = NULL; 671 return -EIO; 672 } 673 674 /* MAP HW window 2 for reading db fifo depth */ 675 writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK, 676 rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4); 677 rdev->pacing.dbr_db_fifo_reg_off = 678 (rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) + 679 BNXT_RE_GRC_FIFO_REG_BASE; 680 rdev->pacing.dbr_bar_addr = 681 pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off; 682 683 if (is_dbr_fifo_full(rdev)) { 684 free_page((u64)rdev->pacing.dbr_page); 685 rdev->pacing.dbr_page = NULL; 686 return -EIO; 687 } 688 689 rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD; 690 rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME; 691 rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION; 692 rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing; 693 rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off; 694 bnxt_re_set_default_pacing_data(rdev); 695 /* Initialize worker for DBR Pacing */ 696 INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check); 697 INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp); 698 return 0; 699 } 700 701 static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev) 702 { 703 cancel_work_sync(&rdev->dbq_fifo_check_work); 704 cancel_delayed_work_sync(&rdev->dbq_pacing_work); 705 if (rdev->pacing.dbr_page) 706 free_page((u64)rdev->pacing.dbr_page); 707 708 rdev->pacing.dbr_page = NULL; 709 rdev->pacing.dbr_pacing = false; 710 } 711 712 static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, 713 u16 fw_ring_id, int type) 714 { 715 struct bnxt_en_dev *en_dev; 716 struct hwrm_ring_free_input req = {}; 717 struct hwrm_ring_free_output resp; 718 struct bnxt_fw_msg fw_msg = {}; 719 int rc = -EINVAL; 720 721 if (!rdev) 722 return rc; 723 724 en_dev = rdev->en_dev; 725 726 if (!en_dev) 727 return rc; 728 729 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 730 return 0; 731 732 bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE); 733 req.ring_type = type; 734 req.ring_id = cpu_to_le16(fw_ring_id); 735 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 736 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 737 rc = bnxt_send_msg(en_dev, &fw_msg); 738 if (rc) 739 ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x", 740 req.ring_id, rc); 741 return rc; 742 } 743 744 static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, 745 struct bnxt_re_ring_attr *ring_attr, 746 u16 *fw_ring_id) 747 { 748 struct bnxt_en_dev *en_dev = rdev->en_dev; 749 struct hwrm_ring_alloc_input req = {}; 750 struct hwrm_ring_alloc_output resp; 751 struct bnxt_fw_msg fw_msg = {}; 752 int rc = -EINVAL; 753 754 if (!en_dev) 755 return rc; 756 757 bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC); 758 req.enables = 0; 759 req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]); 760 if (ring_attr->pages > 1) { 761 /* Page size is in log2 units */ 762 req.page_size = BNXT_PAGE_SHIFT; 763 req.page_tbl_depth = 1; 764 } 765 req.fbo = 0; 766 /* Association of ring index with doorbell index and MSIX number */ 767 req.logical_id = cpu_to_le16(ring_attr->lrid); 768 req.length = cpu_to_le32(ring_attr->depth + 1); 769 req.ring_type = ring_attr->type; 770 req.int_mode = ring_attr->mode; 771 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 772 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 773 rc = bnxt_send_msg(en_dev, &fw_msg); 774 if (!rc) 775 *fw_ring_id = le16_to_cpu(resp.ring_id); 776 777 return rc; 778 } 779 780 static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, 781 u32 fw_stats_ctx_id) 782 { 783 struct bnxt_en_dev *en_dev = rdev->en_dev; 784 struct hwrm_stat_ctx_free_input req = {}; 785 struct hwrm_stat_ctx_free_output resp = {}; 786 struct bnxt_fw_msg fw_msg = {}; 787 int rc = -EINVAL; 788 789 if (!en_dev) 790 return rc; 791 792 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 793 return 0; 794 795 bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE); 796 req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); 797 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 798 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 799 rc = bnxt_send_msg(en_dev, &fw_msg); 800 if (rc) 801 ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x", 802 rc); 803 804 return rc; 805 } 806 807 static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, 808 dma_addr_t dma_map, 809 u32 *fw_stats_ctx_id) 810 { 811 struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; 812 struct hwrm_stat_ctx_alloc_output resp = {}; 813 struct hwrm_stat_ctx_alloc_input req = {}; 814 struct bnxt_en_dev *en_dev = rdev->en_dev; 815 struct bnxt_fw_msg fw_msg = {}; 816 int rc = -EINVAL; 817 818 *fw_stats_ctx_id = INVALID_STATS_CTX_ID; 819 820 if (!en_dev) 821 return rc; 822 823 bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC); 824 req.update_period_ms = cpu_to_le32(1000); 825 req.stats_dma_addr = cpu_to_le64(dma_map); 826 req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size); 827 req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; 828 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 829 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 830 rc = bnxt_send_msg(en_dev, &fw_msg); 831 if (!rc) 832 *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id); 833 834 return rc; 835 } 836 837 static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext) 838 { 839 } 840 841 /* Device */ 842 843 static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev) 844 { 845 struct ib_device *ibdev = 846 ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE); 847 if (!ibdev) 848 return NULL; 849 850 return container_of(ibdev, struct bnxt_re_dev, ibdev); 851 } 852 853 static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, 854 char *buf) 855 { 856 struct bnxt_re_dev *rdev = 857 rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); 858 859 return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor); 860 } 861 static DEVICE_ATTR_RO(hw_rev); 862 863 static ssize_t hca_type_show(struct device *device, 864 struct device_attribute *attr, char *buf) 865 { 866 struct bnxt_re_dev *rdev = 867 rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); 868 869 return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc); 870 } 871 static DEVICE_ATTR_RO(hca_type); 872 873 static struct attribute *bnxt_re_attributes[] = { 874 &dev_attr_hw_rev.attr, 875 &dev_attr_hca_type.attr, 876 NULL 877 }; 878 879 static const struct attribute_group bnxt_re_dev_attr_group = { 880 .attrs = bnxt_re_attributes, 881 }; 882 883 static int bnxt_re_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr) 884 { 885 struct bnxt_qplib_hwq *mr_hwq; 886 struct nlattr *table_attr; 887 struct bnxt_re_mr *mr; 888 889 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); 890 if (!table_attr) 891 return -EMSGSIZE; 892 893 mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr); 894 mr_hwq = &mr->qplib_mr.hwq; 895 896 if (rdma_nl_put_driver_u32(msg, "page_size", 897 mr_hwq->qe_ppg * mr_hwq->element_size)) 898 goto err; 899 if (rdma_nl_put_driver_u32(msg, "max_elements", mr_hwq->max_elements)) 900 goto err; 901 if (rdma_nl_put_driver_u32(msg, "element_size", mr_hwq->element_size)) 902 goto err; 903 if (rdma_nl_put_driver_u64_hex(msg, "hwq", (unsigned long)mr_hwq)) 904 goto err; 905 if (rdma_nl_put_driver_u64_hex(msg, "va", mr->qplib_mr.va)) 906 goto err; 907 908 nla_nest_end(msg, table_attr); 909 return 0; 910 911 err: 912 nla_nest_cancel(msg, table_attr); 913 return -EMSGSIZE; 914 } 915 916 static int bnxt_re_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) 917 { 918 struct bnxt_re_dev *rdev; 919 struct bnxt_re_mr *mr; 920 int err, len; 921 void *data; 922 923 mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr); 924 rdev = mr->rdev; 925 926 err = bnxt_re_read_context_allowed(rdev); 927 if (err) 928 return err; 929 930 len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 : 931 BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5; 932 data = kzalloc(len, GFP_KERNEL); 933 if (!data) 934 return -ENOMEM; 935 936 err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_MRW, 937 mr->qplib_mr.lkey, len, data); 938 if (!err) 939 err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 940 941 kfree(data); 942 return err; 943 } 944 945 static int bnxt_re_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) 946 { 947 struct bnxt_qplib_hwq *cq_hwq; 948 struct nlattr *table_attr; 949 struct bnxt_re_cq *cq; 950 951 cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); 952 cq_hwq = &cq->qplib_cq.hwq; 953 954 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); 955 if (!table_attr) 956 return -EMSGSIZE; 957 958 if (rdma_nl_put_driver_u32(msg, "cq_depth", cq_hwq->depth)) 959 goto err; 960 if (rdma_nl_put_driver_u32(msg, "max_elements", cq_hwq->max_elements)) 961 goto err; 962 if (rdma_nl_put_driver_u32(msg, "element_size", cq_hwq->element_size)) 963 goto err; 964 if (rdma_nl_put_driver_u32(msg, "max_wqe", cq->qplib_cq.max_wqe)) 965 goto err; 966 967 nla_nest_end(msg, table_attr); 968 return 0; 969 970 err: 971 nla_nest_cancel(msg, table_attr); 972 return -EMSGSIZE; 973 } 974 975 static int bnxt_re_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) 976 { 977 struct bnxt_re_dev *rdev; 978 struct bnxt_re_cq *cq; 979 int err, len; 980 void *data; 981 982 cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); 983 rdev = cq->rdev; 984 985 err = bnxt_re_read_context_allowed(rdev); 986 if (err) 987 return err; 988 989 len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 : 990 BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5; 991 data = kzalloc(len, GFP_KERNEL); 992 if (!data) 993 return -ENOMEM; 994 995 err = bnxt_qplib_read_context(&rdev->rcfw, 996 CMDQ_READ_CONTEXT_TYPE_CQ, 997 cq->qplib_cq.id, len, data); 998 if (!err) 999 err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 1000 1001 kfree(data); 1002 return err; 1003 } 1004 1005 static int bnxt_re_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp) 1006 { 1007 struct bnxt_qplib_qp *qplib_qp; 1008 struct nlattr *table_attr; 1009 struct bnxt_re_qp *qp; 1010 1011 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); 1012 if (!table_attr) 1013 return -EMSGSIZE; 1014 1015 qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); 1016 qplib_qp = &qp->qplib_qp; 1017 1018 if (rdma_nl_put_driver_u32(msg, "sq_max_wqe", qplib_qp->sq.max_wqe)) 1019 goto err; 1020 if (rdma_nl_put_driver_u32(msg, "sq_max_sge", qplib_qp->sq.max_sge)) 1021 goto err; 1022 if (rdma_nl_put_driver_u32(msg, "sq_wqe_size", qplib_qp->sq.wqe_size)) 1023 goto err; 1024 if (rdma_nl_put_driver_u32(msg, "sq_swq_start", qplib_qp->sq.swq_start)) 1025 goto err; 1026 if (rdma_nl_put_driver_u32(msg, "sq_swq_last", qplib_qp->sq.swq_last)) 1027 goto err; 1028 if (rdma_nl_put_driver_u32(msg, "rq_max_wqe", qplib_qp->rq.max_wqe)) 1029 goto err; 1030 if (rdma_nl_put_driver_u32(msg, "rq_max_sge", qplib_qp->rq.max_sge)) 1031 goto err; 1032 if (rdma_nl_put_driver_u32(msg, "rq_wqe_size", qplib_qp->rq.wqe_size)) 1033 goto err; 1034 if (rdma_nl_put_driver_u32(msg, "rq_swq_start", qplib_qp->rq.swq_start)) 1035 goto err; 1036 if (rdma_nl_put_driver_u32(msg, "rq_swq_last", qplib_qp->rq.swq_last)) 1037 goto err; 1038 if (rdma_nl_put_driver_u32(msg, "timeout", qplib_qp->timeout)) 1039 goto err; 1040 1041 nla_nest_end(msg, table_attr); 1042 return 0; 1043 1044 err: 1045 nla_nest_cancel(msg, table_attr); 1046 return -EMSGSIZE; 1047 } 1048 1049 static int bnxt_re_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) 1050 { 1051 struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibqp->device, ibdev); 1052 int err, len; 1053 void *data; 1054 1055 err = bnxt_re_read_context_allowed(rdev); 1056 if (err) 1057 return err; 1058 1059 len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 : 1060 BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5; 1061 data = kzalloc(len, GFP_KERNEL); 1062 if (!data) 1063 return -ENOMEM; 1064 1065 err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_QPC, 1066 ibqp->qp_num, len, data); 1067 if (!err) 1068 err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 1069 1070 kfree(data); 1071 return err; 1072 } 1073 1074 static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq) 1075 { 1076 struct nlattr *table_attr; 1077 struct bnxt_re_srq *srq; 1078 1079 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); 1080 if (!table_attr) 1081 return -EMSGSIZE; 1082 1083 srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); 1084 1085 if (rdma_nl_put_driver_u32_hex(msg, "wqe_size", srq->qplib_srq.wqe_size)) 1086 goto err; 1087 if (rdma_nl_put_driver_u32_hex(msg, "max_wqe", srq->qplib_srq.max_wqe)) 1088 goto err; 1089 if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge)) 1090 goto err; 1091 1092 nla_nest_end(msg, table_attr); 1093 return 0; 1094 1095 err: 1096 nla_nest_cancel(msg, table_attr); 1097 return -EMSGSIZE; 1098 } 1099 1100 static int bnxt_re_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq) 1101 { 1102 struct bnxt_re_dev *rdev; 1103 struct bnxt_re_srq *srq; 1104 int err, len; 1105 void *data; 1106 1107 srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); 1108 rdev = srq->rdev; 1109 1110 err = bnxt_re_read_context_allowed(rdev); 1111 if (err) 1112 return err; 1113 1114 len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 : 1115 BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5; 1116 1117 data = kzalloc(len, GFP_KERNEL); 1118 if (!data) 1119 return -ENOMEM; 1120 1121 err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_SRQ, 1122 srq->qplib_srq.id, len, data); 1123 if (!err) 1124 err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); 1125 1126 kfree(data); 1127 return err; 1128 } 1129 1130 static const struct ib_device_ops bnxt_re_dev_ops = { 1131 .owner = THIS_MODULE, 1132 .driver_id = RDMA_DRIVER_BNXT_RE, 1133 .uverbs_abi_ver = BNXT_RE_ABI_VERSION, 1134 1135 .add_gid = bnxt_re_add_gid, 1136 .alloc_hw_port_stats = bnxt_re_ib_alloc_hw_port_stats, 1137 .alloc_mr = bnxt_re_alloc_mr, 1138 .alloc_pd = bnxt_re_alloc_pd, 1139 .alloc_ucontext = bnxt_re_alloc_ucontext, 1140 .create_ah = bnxt_re_create_ah, 1141 .create_cq = bnxt_re_create_cq, 1142 .create_qp = bnxt_re_create_qp, 1143 .create_srq = bnxt_re_create_srq, 1144 .create_user_ah = bnxt_re_create_ah, 1145 .dealloc_pd = bnxt_re_dealloc_pd, 1146 .dealloc_ucontext = bnxt_re_dealloc_ucontext, 1147 .del_gid = bnxt_re_del_gid, 1148 .dereg_mr = bnxt_re_dereg_mr, 1149 .destroy_ah = bnxt_re_destroy_ah, 1150 .destroy_cq = bnxt_re_destroy_cq, 1151 .destroy_qp = bnxt_re_destroy_qp, 1152 .destroy_srq = bnxt_re_destroy_srq, 1153 .device_group = &bnxt_re_dev_attr_group, 1154 .disassociate_ucontext = bnxt_re_disassociate_ucontext, 1155 .get_dev_fw_str = bnxt_re_query_fw_str, 1156 .get_dma_mr = bnxt_re_get_dma_mr, 1157 .get_hw_stats = bnxt_re_ib_get_hw_stats, 1158 .get_link_layer = bnxt_re_get_link_layer, 1159 .get_port_immutable = bnxt_re_get_port_immutable, 1160 .map_mr_sg = bnxt_re_map_mr_sg, 1161 .mmap = bnxt_re_mmap, 1162 .mmap_free = bnxt_re_mmap_free, 1163 .modify_qp = bnxt_re_modify_qp, 1164 .modify_srq = bnxt_re_modify_srq, 1165 .poll_cq = bnxt_re_poll_cq, 1166 .post_recv = bnxt_re_post_recv, 1167 .post_send = bnxt_re_post_send, 1168 .post_srq_recv = bnxt_re_post_srq_recv, 1169 .query_ah = bnxt_re_query_ah, 1170 .query_device = bnxt_re_query_device, 1171 .modify_device = bnxt_re_modify_device, 1172 .query_pkey = bnxt_re_query_pkey, 1173 .query_port = bnxt_re_query_port, 1174 .query_qp = bnxt_re_query_qp, 1175 .query_srq = bnxt_re_query_srq, 1176 .reg_user_mr = bnxt_re_reg_user_mr, 1177 .reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf, 1178 .req_notify_cq = bnxt_re_req_notify_cq, 1179 .resize_cq = bnxt_re_resize_cq, 1180 INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), 1181 INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq), 1182 INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), 1183 INIT_RDMA_OBJ_SIZE(ib_qp, bnxt_re_qp, ib_qp), 1184 INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq), 1185 INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx), 1186 }; 1187 1188 static const struct ib_device_ops restrack_ops = { 1189 .fill_res_cq_entry = bnxt_re_fill_res_cq_entry, 1190 .fill_res_cq_entry_raw = bnxt_re_fill_res_cq_entry_raw, 1191 .fill_res_qp_entry = bnxt_re_fill_res_qp_entry, 1192 .fill_res_qp_entry_raw = bnxt_re_fill_res_qp_entry_raw, 1193 .fill_res_mr_entry = bnxt_re_fill_res_mr_entry, 1194 .fill_res_mr_entry_raw = bnxt_re_fill_res_mr_entry_raw, 1195 .fill_res_srq_entry = bnxt_re_fill_res_srq_entry, 1196 .fill_res_srq_entry_raw = bnxt_re_fill_res_srq_entry_raw, 1197 }; 1198 1199 static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) 1200 { 1201 struct ib_device *ibdev = &rdev->ibdev; 1202 int ret; 1203 1204 /* ib device init */ 1205 ibdev->node_type = RDMA_NODE_IB_CA; 1206 strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA", 1207 strlen(BNXT_RE_DESC) + 5); 1208 ibdev->phys_port_cnt = 1; 1209 1210 addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); 1211 1212 ibdev->num_comp_vectors = rdev->nqr->num_msix - 1; 1213 ibdev->dev.parent = &rdev->en_dev->pdev->dev; 1214 ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; 1215 1216 if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) 1217 ibdev->driver_def = bnxt_re_uapi_defs; 1218 1219 ib_set_device_ops(ibdev, &bnxt_re_dev_ops); 1220 ib_set_device_ops(ibdev, &restrack_ops); 1221 ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); 1222 if (ret) 1223 return ret; 1224 1225 dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX); 1226 ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ); 1227 return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev); 1228 } 1229 1230 static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev, 1231 struct bnxt_en_dev *en_dev) 1232 { 1233 struct bnxt_re_dev *rdev; 1234 1235 /* Allocate bnxt_re_dev instance here */ 1236 rdev = ib_alloc_device(bnxt_re_dev, ibdev); 1237 if (!rdev) { 1238 ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!", 1239 ROCE_DRV_MODULE_NAME); 1240 return NULL; 1241 } 1242 /* Default values */ 1243 rdev->nb.notifier_call = NULL; 1244 rdev->netdev = en_dev->net; 1245 rdev->en_dev = en_dev; 1246 rdev->adev = adev; 1247 rdev->id = rdev->en_dev->pdev->devfn; 1248 INIT_LIST_HEAD(&rdev->qp_list); 1249 mutex_init(&rdev->qp_lock); 1250 mutex_init(&rdev->pacing.dbq_lock); 1251 atomic_set(&rdev->stats.res.qp_count, 0); 1252 atomic_set(&rdev->stats.res.cq_count, 0); 1253 atomic_set(&rdev->stats.res.srq_count, 0); 1254 atomic_set(&rdev->stats.res.mr_count, 0); 1255 atomic_set(&rdev->stats.res.mw_count, 0); 1256 atomic_set(&rdev->stats.res.ah_count, 0); 1257 atomic_set(&rdev->stats.res.pd_count, 0); 1258 rdev->cosq[0] = 0xFFFF; 1259 rdev->cosq[1] = 0xFFFF; 1260 rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME; 1261 if (bnxt_re_chip_gen_p7(en_dev->chip_num)) { 1262 rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7; 1263 rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7; 1264 } else { 1265 rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5; 1266 rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5; 1267 } 1268 rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE; 1269 1270 return rdev; 1271 } 1272 1273 static int bnxt_re_handle_unaffi_async_event(struct creq_func_event 1274 *unaffi_async) 1275 { 1276 switch (unaffi_async->event) { 1277 case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR: 1278 break; 1279 case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR: 1280 break; 1281 case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR: 1282 break; 1283 case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR: 1284 break; 1285 case CREQ_FUNC_EVENT_EVENT_CQ_ERROR: 1286 break; 1287 case CREQ_FUNC_EVENT_EVENT_TQM_ERROR: 1288 break; 1289 case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR: 1290 break; 1291 case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR: 1292 break; 1293 case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR: 1294 break; 1295 case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR: 1296 break; 1297 case CREQ_FUNC_EVENT_EVENT_TIM_ERROR: 1298 break; 1299 default: 1300 return -EINVAL; 1301 } 1302 return 0; 1303 } 1304 1305 static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, 1306 struct bnxt_re_qp *qp) 1307 { 1308 struct creq_qp_error_notification *err_event; 1309 struct bnxt_re_srq *srq = NULL; 1310 struct ib_event event = {}; 1311 unsigned int flags; 1312 1313 if (qp->qplib_qp.srq) 1314 srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq, 1315 qplib_srq); 1316 1317 if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && 1318 rdma_is_kernel_res(&qp->ib_qp.res)) { 1319 flags = bnxt_re_lock_cqs(qp); 1320 bnxt_qplib_add_flush_qp(&qp->qplib_qp); 1321 bnxt_re_unlock_cqs(qp, flags); 1322 } 1323 1324 event.device = &qp->rdev->ibdev; 1325 event.element.qp = &qp->ib_qp; 1326 event.event = IB_EVENT_QP_FATAL; 1327 1328 err_event = (struct creq_qp_error_notification *)qp_event; 1329 1330 switch (err_event->req_err_state_reason) { 1331 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR: 1332 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT: 1333 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT: 1334 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2: 1335 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3: 1336 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP: 1337 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND: 1338 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG: 1339 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE: 1340 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR: 1341 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR: 1342 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR: 1343 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR: 1344 event.event = IB_EVENT_QP_ACCESS_ERR; 1345 break; 1346 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1: 1347 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4: 1348 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH: 1349 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR: 1350 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR: 1351 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR: 1352 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR: 1353 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR: 1354 event.event = IB_EVENT_QP_REQ_ERR; 1355 break; 1356 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR: 1357 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR: 1358 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR: 1359 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR: 1360 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR: 1361 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR: 1362 case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR: 1363 event.event = IB_EVENT_QP_FATAL; 1364 break; 1365 1366 default: 1367 break; 1368 } 1369 1370 switch (err_event->res_err_state_reason) { 1371 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX: 1372 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH: 1373 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT: 1374 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY: 1375 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR: 1376 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION: 1377 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR: 1378 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY: 1379 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR: 1380 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION: 1381 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR: 1382 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC: 1383 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND: 1384 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY: 1385 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR: 1386 event.event = IB_EVENT_QP_ACCESS_ERR; 1387 break; 1388 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE: 1389 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR: 1390 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE: 1391 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE: 1392 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR: 1393 event.event = IB_EVENT_QP_REQ_ERR; 1394 break; 1395 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW: 1396 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR: 1397 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR: 1398 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR: 1399 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR: 1400 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR: 1401 event.event = IB_EVENT_QP_FATAL; 1402 break; 1403 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR: 1404 case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR: 1405 if (srq) 1406 event.event = IB_EVENT_SRQ_ERR; 1407 break; 1408 default: 1409 break; 1410 } 1411 1412 if (err_event->res_err_state_reason || err_event->req_err_state_reason) { 1413 ibdev_dbg(&qp->rdev->ibdev, 1414 "%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n", 1415 __func__, rdma_is_kernel_res(&qp->ib_qp.res) ? "kernel" : "user", 1416 qp->qplib_qp.id, 1417 err_event->sq_cons_idx, 1418 err_event->rq_cons_idx, 1419 err_event->req_slow_path_state, 1420 err_event->req_err_state_reason, 1421 err_event->res_slow_path_state, 1422 err_event->res_err_state_reason); 1423 } else { 1424 if (srq) 1425 event.event = IB_EVENT_QP_LAST_WQE_REACHED; 1426 } 1427 1428 if (event.event == IB_EVENT_SRQ_ERR && srq->ib_srq.event_handler) { 1429 (*srq->ib_srq.event_handler)(&event, 1430 srq->ib_srq.srq_context); 1431 } else if (event.device && qp->ib_qp.event_handler) { 1432 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); 1433 } 1434 1435 return 0; 1436 } 1437 1438 static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq) 1439 { 1440 struct creq_cq_error_notification *cqerr; 1441 struct ib_event ibevent = {}; 1442 1443 cqerr = event; 1444 switch (cqerr->cq_err_reason) { 1445 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR: 1446 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR: 1447 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR: 1448 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR: 1449 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR: 1450 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR: 1451 ibevent.event = IB_EVENT_CQ_ERR; 1452 break; 1453 default: 1454 break; 1455 } 1456 1457 if (ibevent.event == IB_EVENT_CQ_ERR && cq->ib_cq.event_handler) { 1458 ibevent.element.cq = &cq->ib_cq; 1459 ibevent.device = &cq->rdev->ibdev; 1460 1461 ibdev_dbg(&cq->rdev->ibdev, 1462 "%s err reason %d\n", __func__, cqerr->cq_err_reason); 1463 cq->ib_cq.event_handler(&ibevent, cq->ib_cq.cq_context); 1464 } 1465 1466 return 0; 1467 } 1468 1469 static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async, 1470 void *obj) 1471 { 1472 struct bnxt_qplib_qp *lib_qp; 1473 struct bnxt_qplib_cq *lib_cq; 1474 struct bnxt_re_qp *qp; 1475 struct bnxt_re_cq *cq; 1476 int rc = 0; 1477 u8 event; 1478 1479 if (!obj) 1480 return rc; /* QP was already dead, still return success */ 1481 1482 event = affi_async->event; 1483 switch (event) { 1484 case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: 1485 lib_qp = obj; 1486 qp = container_of(lib_qp, struct bnxt_re_qp, qplib_qp); 1487 rc = bnxt_re_handle_qp_async_event(affi_async, qp); 1488 break; 1489 case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION: 1490 lib_cq = obj; 1491 cq = container_of(lib_cq, struct bnxt_re_cq, qplib_cq); 1492 rc = bnxt_re_handle_cq_async_error(affi_async, cq); 1493 break; 1494 default: 1495 rc = -EINVAL; 1496 } 1497 return rc; 1498 } 1499 1500 static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw, 1501 void *aeqe, void *obj) 1502 { 1503 struct creq_qp_event *affi_async; 1504 struct creq_func_event *unaffi_async; 1505 u8 type; 1506 int rc; 1507 1508 type = ((struct creq_base *)aeqe)->type; 1509 if (type == CREQ_BASE_TYPE_FUNC_EVENT) { 1510 unaffi_async = aeqe; 1511 rc = bnxt_re_handle_unaffi_async_event(unaffi_async); 1512 } else { 1513 affi_async = aeqe; 1514 rc = bnxt_re_handle_affi_async_event(affi_async, obj); 1515 } 1516 1517 return rc; 1518 } 1519 1520 static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, 1521 struct bnxt_qplib_srq *handle, u8 event) 1522 { 1523 struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq, 1524 qplib_srq); 1525 struct ib_event ib_event; 1526 1527 ib_event.device = &srq->rdev->ibdev; 1528 ib_event.element.srq = &srq->ib_srq; 1529 1530 if (srq->ib_srq.event_handler) { 1531 if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT) 1532 ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED; 1533 (*srq->ib_srq.event_handler)(&ib_event, 1534 srq->ib_srq.srq_context); 1535 } 1536 return 0; 1537 } 1538 1539 static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, 1540 struct bnxt_qplib_cq *handle) 1541 { 1542 struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq, 1543 qplib_cq); 1544 1545 if (cq->ib_cq.comp_handler) 1546 (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context); 1547 1548 return 0; 1549 } 1550 1551 static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) 1552 { 1553 int i; 1554 1555 for (i = 1; i < rdev->nqr->num_msix; i++) 1556 bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]); 1557 1558 if (rdev->qplib_res.rcfw) 1559 bnxt_qplib_cleanup_res(&rdev->qplib_res); 1560 } 1561 1562 static int bnxt_re_init_res(struct bnxt_re_dev *rdev) 1563 { 1564 int num_vec_enabled = 0; 1565 int rc = 0, i; 1566 u32 db_offt; 1567 1568 bnxt_qplib_init_res(&rdev->qplib_res); 1569 1570 mutex_init(&rdev->nqr->load_lock); 1571 1572 for (i = 1; i < rdev->nqr->num_msix ; i++) { 1573 db_offt = rdev->nqr->msix_entries[i].db_offset; 1574 rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1], 1575 i - 1, rdev->nqr->msix_entries[i].vector, 1576 db_offt, &bnxt_re_cqn_handler, 1577 &bnxt_re_srqn_handler); 1578 if (rc) { 1579 ibdev_err(&rdev->ibdev, 1580 "Failed to enable NQ with rc = 0x%x", rc); 1581 goto fail; 1582 } 1583 num_vec_enabled++; 1584 } 1585 return 0; 1586 fail: 1587 for (i = num_vec_enabled; i >= 0; i--) 1588 bnxt_qplib_disable_nq(&rdev->nqr->nq[i]); 1589 return rc; 1590 } 1591 1592 static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) 1593 { 1594 struct bnxt_qplib_nq *nq; 1595 u8 type; 1596 int i; 1597 1598 for (i = 0; i < rdev->nqr->num_msix - 1; i++) { 1599 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 1600 nq = &rdev->nqr->nq[i]; 1601 bnxt_re_net_ring_free(rdev, nq->ring_id, type); 1602 bnxt_qplib_free_nq(nq); 1603 nq->res = NULL; 1604 } 1605 } 1606 1607 static void bnxt_re_free_res(struct bnxt_re_dev *rdev) 1608 { 1609 bnxt_re_free_nq_res(rdev); 1610 1611 if (rdev->qplib_res.dpi_tbl.max) { 1612 bnxt_qplib_dealloc_dpi(&rdev->qplib_res, 1613 &rdev->dpi_privileged); 1614 } 1615 if (rdev->qplib_res.rcfw) { 1616 bnxt_qplib_free_res(&rdev->qplib_res); 1617 rdev->qplib_res.rcfw = NULL; 1618 } 1619 } 1620 1621 static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) 1622 { 1623 struct bnxt_re_ring_attr rattr = {}; 1624 int num_vec_created = 0; 1625 int rc, i; 1626 u8 type; 1627 1628 /* Configure and allocate resources for qplib */ 1629 rdev->qplib_res.rcfw = &rdev->rcfw; 1630 rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); 1631 if (rc) 1632 goto fail; 1633 1634 rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev, 1635 rdev->netdev, &rdev->dev_attr); 1636 if (rc) 1637 goto fail; 1638 1639 rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res, 1640 &rdev->dpi_privileged, 1641 rdev, BNXT_QPLIB_DPI_TYPE_KERNEL); 1642 if (rc) 1643 goto dealloc_res; 1644 1645 for (i = 0; i < rdev->nqr->num_msix - 1; i++) { 1646 struct bnxt_qplib_nq *nq; 1647 1648 nq = &rdev->nqr->nq[i]; 1649 nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; 1650 rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq); 1651 if (rc) { 1652 ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x", 1653 i, rc); 1654 goto free_nq; 1655 } 1656 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 1657 rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr; 1658 rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count; 1659 rattr.type = type; 1660 rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; 1661 rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1; 1662 rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx; 1663 rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id); 1664 if (rc) { 1665 ibdev_err(&rdev->ibdev, 1666 "Failed to allocate NQ fw id with rc = 0x%x", 1667 rc); 1668 bnxt_qplib_free_nq(nq); 1669 goto free_nq; 1670 } 1671 num_vec_created++; 1672 } 1673 return 0; 1674 free_nq: 1675 for (i = num_vec_created - 1; i >= 0; i--) { 1676 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 1677 bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type); 1678 bnxt_qplib_free_nq(&rdev->nqr->nq[i]); 1679 } 1680 bnxt_qplib_dealloc_dpi(&rdev->qplib_res, 1681 &rdev->dpi_privileged); 1682 dealloc_res: 1683 bnxt_qplib_free_res(&rdev->qplib_res); 1684 1685 fail: 1686 rdev->qplib_res.rcfw = NULL; 1687 return rc; 1688 } 1689 1690 static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp, 1691 u8 port_num, enum ib_event_type event) 1692 { 1693 struct ib_event ib_event; 1694 1695 ib_event.device = ibdev; 1696 if (qp) { 1697 ib_event.element.qp = qp; 1698 ib_event.event = event; 1699 if (qp->event_handler) 1700 qp->event_handler(&ib_event, qp->qp_context); 1701 1702 } else { 1703 ib_event.element.port_num = port_num; 1704 ib_event.event = event; 1705 ib_dispatch_event(&ib_event); 1706 } 1707 } 1708 1709 static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev, 1710 struct bnxt_re_qp *qp) 1711 { 1712 return (qp->ib_qp.qp_type == IB_QPT_GSI) || 1713 (qp == rdev->gsi_ctx.gsi_sqp); 1714 } 1715 1716 static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) 1717 { 1718 struct bnxt_re_qp *qp; 1719 1720 mutex_lock(&rdev->qp_lock); 1721 list_for_each_entry(qp, &rdev->qp_list, list) { 1722 /* Modify the state of all QPs except QP1/Shadow QP */ 1723 if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) { 1724 if (qp->qplib_qp.state != 1725 CMDQ_MODIFY_QP_NEW_STATE_RESET && 1726 qp->qplib_qp.state != 1727 CMDQ_MODIFY_QP_NEW_STATE_ERR) 1728 bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp, 1729 1, IB_EVENT_QP_FATAL); 1730 } 1731 } 1732 mutex_unlock(&rdev->qp_lock); 1733 } 1734 1735 static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) 1736 { 1737 struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; 1738 struct bnxt_qplib_gid gid; 1739 u16 gid_idx, index; 1740 int rc = 0; 1741 1742 if (!ib_device_try_get(&rdev->ibdev)) 1743 return 0; 1744 1745 for (index = 0; index < sgid_tbl->active; index++) { 1746 gid_idx = sgid_tbl->hw_id[index]; 1747 1748 if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, 1749 sizeof(bnxt_qplib_gid_zero))) 1750 continue; 1751 /* need to modify the VLAN enable setting of non VLAN GID only 1752 * as setting is done for VLAN GID while adding GID 1753 */ 1754 if (sgid_tbl->vlan[index]) 1755 continue; 1756 1757 memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid)); 1758 1759 rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, 1760 rdev->qplib_res.netdev->dev_addr); 1761 } 1762 1763 ib_device_put(&rdev->ibdev); 1764 return rc; 1765 } 1766 1767 static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev) 1768 { 1769 u32 prio_map = 0, tmp_map = 0; 1770 struct net_device *netdev; 1771 struct dcb_app app = {}; 1772 1773 netdev = rdev->netdev; 1774 1775 app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE; 1776 app.protocol = ETH_P_IBOE; 1777 tmp_map = dcb_ieee_getapp_mask(netdev, &app); 1778 prio_map = tmp_map; 1779 1780 app.selector = IEEE_8021QAZ_APP_SEL_DGRAM; 1781 app.protocol = ROCE_V2_UDP_DPORT; 1782 tmp_map = dcb_ieee_getapp_mask(netdev, &app); 1783 prio_map |= tmp_map; 1784 1785 return prio_map; 1786 } 1787 1788 static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) 1789 { 1790 u8 prio_map = 0; 1791 1792 /* Get priority for roce */ 1793 prio_map = bnxt_re_get_priority_mask(rdev); 1794 1795 if (prio_map == rdev->cur_prio_map) 1796 return 0; 1797 rdev->cur_prio_map = prio_map; 1798 /* Actual priorities are not programmed as they are already 1799 * done by L2 driver; just enable or disable priority vlan tagging 1800 */ 1801 if ((prio_map == 0 && rdev->qplib_res.prio) || 1802 (prio_map != 0 && !rdev->qplib_res.prio)) { 1803 rdev->qplib_res.prio = prio_map; 1804 bnxt_re_update_gid(rdev); 1805 } 1806 1807 return 0; 1808 } 1809 1810 static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) 1811 { 1812 struct bnxt_en_dev *en_dev = rdev->en_dev; 1813 struct hwrm_ver_get_output resp = {}; 1814 struct hwrm_ver_get_input req = {}; 1815 struct bnxt_qplib_chip_ctx *cctx; 1816 struct bnxt_fw_msg fw_msg = {}; 1817 int rc; 1818 1819 bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET); 1820 req.hwrm_intf_maj = HWRM_VERSION_MAJOR; 1821 req.hwrm_intf_min = HWRM_VERSION_MINOR; 1822 req.hwrm_intf_upd = HWRM_VERSION_UPDATE; 1823 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1824 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 1825 rc = bnxt_send_msg(en_dev, &fw_msg); 1826 if (rc) { 1827 ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x", 1828 rc); 1829 return; 1830 } 1831 1832 cctx = rdev->chip_ctx; 1833 cctx->hwrm_intf_ver = 1834 (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 | 1835 (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 | 1836 (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 | 1837 le16_to_cpu(resp.hwrm_intf_patch); 1838 1839 cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout); 1840 1841 if (!cctx->hwrm_cmd_max_timeout) 1842 cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT; 1843 } 1844 1845 static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) 1846 { 1847 int rc; 1848 u32 event; 1849 1850 /* Register ib dev */ 1851 rc = bnxt_re_register_ib(rdev); 1852 if (rc) { 1853 pr_err("Failed to register with IB: %#x\n", rc); 1854 return rc; 1855 } 1856 dev_info(rdev_to_dev(rdev), "Device registered with IB successfully"); 1857 set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); 1858 1859 event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ? 1860 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; 1861 1862 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event); 1863 1864 return rc; 1865 } 1866 1867 static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev) 1868 { 1869 rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL); 1870 if (!rdev->nqr) 1871 return -ENOMEM; 1872 1873 return 0; 1874 } 1875 1876 static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev) 1877 { 1878 kfree(rdev->nqr); 1879 rdev->nqr = NULL; 1880 } 1881 1882 static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) 1883 { 1884 u8 type; 1885 int rc; 1886 1887 bnxt_re_debugfs_rem_pdev(rdev); 1888 1889 if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) 1890 cancel_delayed_work_sync(&rdev->worker); 1891 1892 if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, 1893 &rdev->flags)) 1894 bnxt_re_cleanup_res(rdev); 1895 if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags)) 1896 bnxt_re_free_res(rdev); 1897 1898 if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) { 1899 rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); 1900 if (rc) 1901 ibdev_warn(&rdev->ibdev, 1902 "Failed to deinitialize RCFW: %#x", rc); 1903 bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); 1904 bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx); 1905 bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); 1906 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 1907 bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); 1908 bnxt_qplib_free_rcfw_channel(&rdev->rcfw); 1909 } 1910 1911 rdev->nqr->num_msix = 0; 1912 1913 if (rdev->pacing.dbr_pacing) 1914 bnxt_re_deinitialize_dbr_pacing(rdev); 1915 1916 bnxt_re_free_nqr_mem(rdev); 1917 bnxt_re_destroy_chip_ctx(rdev); 1918 if (op_type == BNXT_RE_COMPLETE_REMOVE) { 1919 if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) 1920 bnxt_unregister_dev(rdev->en_dev); 1921 } 1922 } 1923 1924 /* worker thread for polling periodic events. Now used for QoS programming*/ 1925 static void bnxt_re_worker(struct work_struct *work) 1926 { 1927 struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, 1928 worker.work); 1929 1930 bnxt_re_setup_qos(rdev); 1931 schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); 1932 } 1933 1934 static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) 1935 { 1936 struct bnxt_re_ring_attr rattr = {}; 1937 struct bnxt_qplib_creq_ctx *creq; 1938 u32 db_offt; 1939 int vid; 1940 u8 type; 1941 int rc; 1942 1943 if (op_type == BNXT_RE_COMPLETE_INIT) { 1944 /* Registered a new RoCE device instance to netdev */ 1945 rc = bnxt_re_register_netdev(rdev); 1946 if (rc) { 1947 ibdev_err(&rdev->ibdev, 1948 "Failed to register with netedev: %#x\n", rc); 1949 return -EINVAL; 1950 } 1951 } 1952 set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1953 1954 if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) { 1955 ibdev_err(&rdev->ibdev, 1956 "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", 1957 rdev->en_dev->ulp_tbl->msix_requested); 1958 bnxt_unregister_dev(rdev->en_dev); 1959 clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1960 return -EINVAL; 1961 } 1962 ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", 1963 rdev->en_dev->ulp_tbl->msix_requested); 1964 1965 rc = bnxt_re_setup_chip_ctx(rdev); 1966 if (rc) { 1967 bnxt_unregister_dev(rdev->en_dev); 1968 clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1969 ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); 1970 return -EINVAL; 1971 } 1972 1973 rc = bnxt_re_alloc_nqr_mem(rdev); 1974 if (rc) { 1975 bnxt_re_destroy_chip_ctx(rdev); 1976 bnxt_unregister_dev(rdev->en_dev); 1977 clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1978 return rc; 1979 } 1980 rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested; 1981 memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries, 1982 sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix); 1983 1984 /* Check whether VF or PF */ 1985 bnxt_re_get_sriov_func_type(rdev); 1986 1987 bnxt_re_query_hwrm_intf_version(rdev); 1988 1989 /* Establish RCFW Communication Channel to initialize the context 1990 * memory for the function and all child VFs 1991 */ 1992 rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw, 1993 &rdev->qplib_ctx, 1994 BNXT_RE_MAX_QPC_COUNT); 1995 if (rc) { 1996 ibdev_err(&rdev->ibdev, 1997 "Failed to allocate RCFW Channel: %#x\n", rc); 1998 goto fail; 1999 } 2000 2001 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 2002 creq = &rdev->rcfw.creq; 2003 rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr; 2004 rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count; 2005 rattr.type = type; 2006 rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; 2007 rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1; 2008 rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; 2009 rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); 2010 if (rc) { 2011 ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); 2012 goto free_rcfw; 2013 } 2014 db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset; 2015 vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector; 2016 rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, 2017 vid, db_offt, 2018 &bnxt_re_aeq_handler); 2019 if (rc) { 2020 ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n", 2021 rc); 2022 goto free_ring; 2023 } 2024 2025 if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) { 2026 rc = bnxt_re_initialize_dbr_pacing(rdev); 2027 if (!rc) { 2028 rdev->pacing.dbr_pacing = true; 2029 } else { 2030 ibdev_err(&rdev->ibdev, 2031 "DBR pacing disabled with error : %d\n", rc); 2032 rdev->pacing.dbr_pacing = false; 2033 } 2034 } 2035 rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); 2036 if (rc) 2037 goto disable_rcfw; 2038 2039 bnxt_re_set_resource_limits(rdev); 2040 2041 rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0, 2042 bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)); 2043 if (rc) { 2044 ibdev_err(&rdev->ibdev, 2045 "Failed to allocate QPLIB context: %#x\n", rc); 2046 goto disable_rcfw; 2047 } 2048 rc = bnxt_re_net_stats_ctx_alloc(rdev, 2049 rdev->qplib_ctx.stats.dma_map, 2050 &rdev->qplib_ctx.stats.fw_id); 2051 if (rc) { 2052 ibdev_err(&rdev->ibdev, 2053 "Failed to allocate stats context: %#x\n", rc); 2054 goto free_ctx; 2055 } 2056 2057 rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 2058 rdev->is_virtfn); 2059 if (rc) { 2060 ibdev_err(&rdev->ibdev, 2061 "Failed to initialize RCFW: %#x\n", rc); 2062 goto free_sctx; 2063 } 2064 set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags); 2065 2066 /* Resources based on the 'new' device caps */ 2067 rc = bnxt_re_alloc_res(rdev); 2068 if (rc) { 2069 ibdev_err(&rdev->ibdev, 2070 "Failed to allocate resources: %#x\n", rc); 2071 goto fail; 2072 } 2073 set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags); 2074 rc = bnxt_re_init_res(rdev); 2075 if (rc) { 2076 ibdev_err(&rdev->ibdev, 2077 "Failed to initialize resources: %#x\n", rc); 2078 goto fail; 2079 } 2080 2081 set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags); 2082 2083 if (!rdev->is_virtfn) { 2084 rc = bnxt_re_setup_qos(rdev); 2085 if (rc) 2086 ibdev_info(&rdev->ibdev, 2087 "RoCE priority not yet configured\n"); 2088 2089 INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); 2090 set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags); 2091 schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); 2092 2093 if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)) 2094 bnxt_re_vf_res_config(rdev); 2095 } 2096 hash_init(rdev->cq_hash); 2097 if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) 2098 hash_init(rdev->srq_hash); 2099 2100 bnxt_re_debugfs_add_pdev(rdev); 2101 2102 return 0; 2103 free_sctx: 2104 bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); 2105 free_ctx: 2106 bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx); 2107 disable_rcfw: 2108 bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); 2109 free_ring: 2110 type = bnxt_qplib_get_ring_type(rdev->chip_ctx); 2111 bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); 2112 free_rcfw: 2113 bnxt_qplib_free_rcfw_channel(&rdev->rcfw); 2114 fail: 2115 bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); 2116 2117 return rc; 2118 } 2119 2120 static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev, 2121 struct bnxt_re_en_dev_info *en_info, 2122 struct auxiliary_device *adev) 2123 { 2124 /* Before updating the rdev pointer in bnxt_re_en_dev_info structure, 2125 * take the rtnl lock to avoid accessing invalid rdev pointer from 2126 * L2 ULP callbacks. This is applicable in all the places where rdev 2127 * pointer is updated in bnxt_re_en_dev_info. 2128 */ 2129 rtnl_lock(); 2130 en_info->rdev = rdev; 2131 rtnl_unlock(); 2132 } 2133 2134 static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type) 2135 { 2136 struct bnxt_aux_priv *aux_priv = 2137 container_of(adev, struct bnxt_aux_priv, aux_dev); 2138 struct bnxt_re_en_dev_info *en_info; 2139 struct bnxt_en_dev *en_dev; 2140 struct bnxt_re_dev *rdev; 2141 int rc; 2142 2143 en_info = auxiliary_get_drvdata(adev); 2144 en_dev = en_info->en_dev; 2145 2146 2147 rdev = bnxt_re_dev_add(adev, en_dev); 2148 if (!rdev || !rdev_to_dev(rdev)) { 2149 rc = -ENOMEM; 2150 goto exit; 2151 } 2152 2153 bnxt_re_update_en_info_rdev(rdev, en_info, adev); 2154 2155 rc = bnxt_re_dev_init(rdev, op_type); 2156 if (rc) 2157 goto re_dev_dealloc; 2158 2159 rc = bnxt_re_ib_init(rdev); 2160 if (rc) { 2161 pr_err("Failed to register with IB: %s", 2162 aux_priv->aux_dev.name); 2163 goto re_dev_uninit; 2164 } 2165 2166 rdev->nb.notifier_call = bnxt_re_netdev_event; 2167 rc = register_netdevice_notifier(&rdev->nb); 2168 if (rc) { 2169 rdev->nb.notifier_call = NULL; 2170 pr_err("%s: Cannot register to netdevice_notifier", 2171 ROCE_DRV_MODULE_NAME); 2172 goto re_dev_unreg; 2173 } 2174 bnxt_re_setup_cc(rdev, true); 2175 2176 return 0; 2177 2178 re_dev_unreg: 2179 ib_unregister_device(&rdev->ibdev); 2180 re_dev_uninit: 2181 bnxt_re_update_en_info_rdev(NULL, en_info, adev); 2182 bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); 2183 re_dev_dealloc: 2184 ib_dealloc_device(&rdev->ibdev); 2185 exit: 2186 return rc; 2187 } 2188 2189 static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) 2190 { 2191 struct bnxt_qplib_cc_param cc_param = {}; 2192 2193 /* Do not enable congestion control on VFs */ 2194 if (rdev->is_virtfn) 2195 return; 2196 2197 /* Currently enabling only for GenP5 adapters */ 2198 if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) 2199 return; 2200 2201 if (enable) { 2202 cc_param.enable = 1; 2203 cc_param.tos_ecn = 1; 2204 } 2205 2206 cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | 2207 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); 2208 2209 if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param)) 2210 ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable); 2211 } 2212 2213 /* 2214 * "Notifier chain callback can be invoked for the same chain from 2215 * different CPUs at the same time". 2216 * 2217 * For cases when the netdev is already present, our call to the 2218 * register_netdevice_notifier() will actually get the rtnl_lock() 2219 * before sending NETDEV_REGISTER and (if up) NETDEV_UP 2220 * events. 2221 * 2222 * But for cases when the netdev is not already present, the notifier 2223 * chain is subjected to be invoked from different CPUs simultaneously. 2224 * 2225 * This is protected by the netdev_mutex. 2226 */ 2227 static int bnxt_re_netdev_event(struct notifier_block *notifier, 2228 unsigned long event, void *ptr) 2229 { 2230 struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr); 2231 struct bnxt_re_dev *rdev; 2232 2233 real_dev = rdma_vlan_dev_real_dev(netdev); 2234 if (!real_dev) 2235 real_dev = netdev; 2236 2237 if (real_dev != netdev) 2238 goto exit; 2239 2240 rdev = bnxt_re_from_netdev(real_dev); 2241 if (!rdev) 2242 return NOTIFY_DONE; 2243 2244 2245 switch (event) { 2246 case NETDEV_UP: 2247 case NETDEV_DOWN: 2248 case NETDEV_CHANGE: 2249 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, 2250 netif_carrier_ok(real_dev) ? 2251 IB_EVENT_PORT_ACTIVE : 2252 IB_EVENT_PORT_ERR); 2253 break; 2254 default: 2255 break; 2256 } 2257 ib_device_put(&rdev->ibdev); 2258 exit: 2259 return NOTIFY_DONE; 2260 } 2261 2262 #define BNXT_ADEV_NAME "bnxt_en" 2263 2264 static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, 2265 struct auxiliary_device *aux_dev) 2266 { 2267 if (rdev->nb.notifier_call) { 2268 unregister_netdevice_notifier(&rdev->nb); 2269 rdev->nb.notifier_call = NULL; 2270 } else { 2271 /* If notifier is null, we should have already done a 2272 * clean up before coming here. 2273 */ 2274 return; 2275 } 2276 bnxt_re_setup_cc(rdev, false); 2277 ib_unregister_device(&rdev->ibdev); 2278 bnxt_re_dev_uninit(rdev, op_type); 2279 ib_dealloc_device(&rdev->ibdev); 2280 } 2281 2282 static void bnxt_re_remove(struct auxiliary_device *adev) 2283 { 2284 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); 2285 struct bnxt_re_dev *rdev; 2286 2287 mutex_lock(&bnxt_re_mutex); 2288 rdev = en_info->rdev; 2289 2290 if (rdev) 2291 bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev); 2292 kfree(en_info); 2293 mutex_unlock(&bnxt_re_mutex); 2294 } 2295 2296 static int bnxt_re_probe(struct auxiliary_device *adev, 2297 const struct auxiliary_device_id *id) 2298 { 2299 struct bnxt_aux_priv *aux_priv = 2300 container_of(adev, struct bnxt_aux_priv, aux_dev); 2301 struct bnxt_re_en_dev_info *en_info; 2302 struct bnxt_en_dev *en_dev; 2303 int rc; 2304 2305 en_dev = aux_priv->edev; 2306 2307 mutex_lock(&bnxt_re_mutex); 2308 en_info = kzalloc(sizeof(*en_info), GFP_KERNEL); 2309 if (!en_info) { 2310 mutex_unlock(&bnxt_re_mutex); 2311 return -ENOMEM; 2312 } 2313 en_info->en_dev = en_dev; 2314 2315 auxiliary_set_drvdata(adev, en_info); 2316 2317 rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT); 2318 if (rc) 2319 goto err; 2320 mutex_unlock(&bnxt_re_mutex); 2321 return 0; 2322 2323 err: 2324 mutex_unlock(&bnxt_re_mutex); 2325 kfree(en_info); 2326 2327 return rc; 2328 } 2329 2330 static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) 2331 { 2332 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); 2333 struct bnxt_en_dev *en_dev; 2334 struct bnxt_re_dev *rdev; 2335 2336 rdev = en_info->rdev; 2337 en_dev = en_info->en_dev; 2338 mutex_lock(&bnxt_re_mutex); 2339 2340 ibdev_info(&rdev->ibdev, "Handle device suspend call"); 2341 /* Check the current device state from bnxt_en_dev and move the 2342 * device to detached state if FW_FATAL_COND is set. 2343 * This prevents more commands to HW during clean-up, 2344 * in case the device is already in error. 2345 */ 2346 if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) { 2347 set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); 2348 set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); 2349 wake_up_all(&rdev->rcfw.cmdq.waitq); 2350 bnxt_re_dev_stop(rdev); 2351 } 2352 2353 if (rdev->pacing.dbr_pacing) 2354 bnxt_re_set_pacing_dev_state(rdev); 2355 2356 ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx", 2357 __func__, en_dev->en_state); 2358 bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev); 2359 mutex_unlock(&bnxt_re_mutex); 2360 2361 return 0; 2362 } 2363 2364 static int bnxt_re_resume(struct auxiliary_device *adev) 2365 { 2366 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); 2367 struct bnxt_re_dev *rdev; 2368 2369 mutex_lock(&bnxt_re_mutex); 2370 bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT); 2371 rdev = en_info->rdev; 2372 ibdev_info(&rdev->ibdev, "Device resume completed"); 2373 mutex_unlock(&bnxt_re_mutex); 2374 2375 return 0; 2376 } 2377 2378 static const struct auxiliary_device_id bnxt_re_id_table[] = { 2379 { .name = BNXT_ADEV_NAME ".rdma", }, 2380 {}, 2381 }; 2382 2383 MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table); 2384 2385 static struct auxiliary_driver bnxt_re_driver = { 2386 .name = "rdma", 2387 .probe = bnxt_re_probe, 2388 .remove = bnxt_re_remove, 2389 .shutdown = bnxt_re_shutdown, 2390 .suspend = bnxt_re_suspend, 2391 .resume = bnxt_re_resume, 2392 .id_table = bnxt_re_id_table, 2393 }; 2394 2395 static int __init bnxt_re_mod_init(void) 2396 { 2397 int rc; 2398 2399 pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version); 2400 bnxt_re_register_debugfs(); 2401 2402 rc = auxiliary_driver_register(&bnxt_re_driver); 2403 if (rc) { 2404 pr_err("%s: Failed to register auxiliary driver\n", 2405 ROCE_DRV_MODULE_NAME); 2406 goto err_debug; 2407 } 2408 return 0; 2409 err_debug: 2410 bnxt_re_unregister_debugfs(); 2411 return rc; 2412 } 2413 2414 static void __exit bnxt_re_mod_exit(void) 2415 { 2416 auxiliary_driver_unregister(&bnxt_re_driver); 2417 bnxt_re_unregister_debugfs(); 2418 } 2419 2420 module_init(bnxt_re_mod_init); 2421 module_exit(bnxt_re_mod_exit); 2422