1 /* 2 * Copyright (c) 2015-2024, Broadcom. All rights reserved. The term 3 * Broadcom refers to Broadcom Limited and/or its subsidiaries. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 18 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 23 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 25 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 26 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Description: Main component of the bnxt_re driver 29 */ 30 31 #include <linux/if_ether.h> 32 #include <linux/module.h> 33 #include <linux/errno.h> 34 #include <linux/pci.h> 35 #include <linux/dma-mapping.h> 36 #include <linux/slab.h> 37 #include <linux/sched.h> 38 #include <linux/delay.h> 39 #include <linux/fs.h> 40 #include <rdma/ib_user_verbs.h> 41 #include <rdma/ib_addr.h> 42 #include <rdma/ib_cache.h> 43 #include <dev/mlx5/port.h> 44 #include <dev/mlx5/vport.h> 45 #include <linux/list.h> 46 #include <rdma/ib_smi.h> 47 #include <rdma/ib_umem.h> 48 #include <linux/in.h> 49 #include <linux/etherdevice.h> 50 51 #include "bnxt_re.h" 52 #include "ib_verbs.h" 53 #include "bnxt_re-abi.h" 54 #include "bnxt.h" 55 56 static char drv_version[] = 57 "Broadcom NetXtreme-C/E RoCE Driver " ROCE_DRV_MODULE_NAME \ 58 " v" ROCE_DRV_MODULE_VERSION " (" ROCE_DRV_MODULE_RELDATE ")\n"; 59 60 #define BNXT_RE_DESC "Broadcom NetXtreme RoCE" 61 #define BNXT_ADEV_NAME "if_bnxt" 62 63 MODULE_DESCRIPTION("Broadcom NetXtreme-C/E RoCE Driver"); 64 MODULE_LICENSE("Dual BSD/GPL"); 65 MODULE_DEPEND(bnxt_re, linuxkpi, 1, 1, 1); 66 MODULE_DEPEND(bnxt_re, ibcore, 1, 1, 1); 67 MODULE_DEPEND(bnxt_re, if_bnxt, 1, 1, 1); 68 MODULE_VERSION(bnxt_re, 1); 69 70 71 DEFINE_MUTEX(bnxt_re_mutex); /* mutex lock for driver */ 72 73 static unsigned int restrict_mrs = 0; 74 module_param(restrict_mrs, uint, 0); 75 MODULE_PARM_DESC(restrict_mrs, " Restrict the no. of MRs 0 = 256K , 1 = 64K"); 76 77 unsigned int restrict_stats = 0; 78 module_param(restrict_stats, uint, 0); 79 MODULE_PARM_DESC(restrict_stats, "Restrict stats query frequency to ethtool coalesce value. Disabled by default"); 80 81 unsigned int enable_fc = 1; 82 module_param(enable_fc, uint, 0); 83 MODULE_PARM_DESC(enable_fc, "Enable default PFC, CC,ETS during driver load. 1 - fc enable, 0 - fc disable - Default is 1"); 84 85 unsigned int min_tx_depth = 1; 86 module_param(min_tx_depth, uint, 0); 87 MODULE_PARM_DESC(min_tx_depth, "Minimum TX depth - Default is 1"); 88 89 static uint8_t max_msix_vec[BNXT_RE_MAX_DEVICES] = {0}; 90 static unsigned int max_msix_vec_argc; 91 module_param_array(max_msix_vec, byte, &max_msix_vec_argc, 0444); 92 MODULE_PARM_DESC(max_msix_vec, "Max MSI-x vectors per PF (2 - 64) - Default is 64"); 93 94 unsigned int cmdq_shadow_qd = RCFW_CMD_NON_BLOCKING_SHADOW_QD; 95 module_param_named(cmdq_shadow_qd, cmdq_shadow_qd, uint, 0644); 96 MODULE_PARM_DESC(cmdq_shadow_qd, "Perf Stat Debug: Shadow QD Range (1-64) - Default is 64"); 97 98 99 /* globals */ 100 struct list_head bnxt_re_dev_list = LINUX_LIST_HEAD_INIT(bnxt_re_dev_list); 101 static int bnxt_re_probe_count; 102 103 DEFINE_MUTEX(bnxt_re_dev_lock); 104 static u32 gmod_exit; 105 static u32 gadd_dev_inprogress; 106 107 static void bnxt_re_task(struct work_struct *work_task); 108 static struct workqueue_struct *bnxt_re_wq; 109 static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev); 110 static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, 111 u32 *offset); 112 static int bnxt_re_ib_init(struct bnxt_re_dev *rdev); 113 static void bnxt_re_ib_init_2(struct bnxt_re_dev *rdev); 114 void _bnxt_re_remove(struct auxiliary_device *adev); 115 void writel_fbsd(struct bnxt_softc *bp, u32, u8, u32); 116 u32 readl_fbsd(struct bnxt_softc *bp, u32, u8); 117 static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev); 118 119 int bnxt_re_register_netdevice_notifier(struct notifier_block *nb) 120 { 121 int rc; 122 rc = register_netdevice_notifier(nb); 123 return rc; 124 } 125 126 int bnxt_re_unregister_netdevice_notifier(struct notifier_block *nb) 127 { 128 int rc; 129 rc = unregister_netdevice_notifier(nb); 130 return rc; 131 } 132 133 void bnxt_re_set_dma_device(struct ib_device *ibdev, struct bnxt_re_dev *rdev) 134 { 135 ibdev->dma_device = &rdev->en_dev->pdev->dev; 136 } 137 138 void bnxt_re_init_resolve_wq(struct bnxt_re_dev *rdev) 139 { 140 rdev->resolve_wq = create_singlethread_workqueue("bnxt_re_resolve_wq"); 141 INIT_LIST_HEAD(&rdev->mac_wq_list); 142 } 143 144 void bnxt_re_uninit_resolve_wq(struct bnxt_re_dev *rdev) 145 { 146 struct bnxt_re_resolve_dmac_work *tmp_work = NULL, *tmp_st; 147 if (!rdev->resolve_wq) 148 return; 149 flush_workqueue(rdev->resolve_wq); 150 list_for_each_entry_safe(tmp_work, tmp_st, &rdev->mac_wq_list, list) { 151 list_del(&tmp_work->list); 152 kfree(tmp_work); 153 } 154 destroy_workqueue(rdev->resolve_wq); 155 rdev->resolve_wq = NULL; 156 } 157 158 u32 readl_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx) 159 { 160 161 if (bar_idx) 162 return bus_space_read_8(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off); 163 else 164 return bus_space_read_8(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off); 165 } 166 167 void writel_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx, u32 val) 168 { 169 if (bar_idx) 170 bus_space_write_8(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off, htole32(val)); 171 else 172 bus_space_write_8(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off, htole32(val)); 173 } 174 175 static void bnxt_re_update_fifo_occup_slabs(struct bnxt_re_dev *rdev, 176 u32 fifo_occup) 177 { 178 if (fifo_occup > rdev->dbg_stats->dbq.fifo_occup_water_mark) 179 rdev->dbg_stats->dbq.fifo_occup_water_mark = fifo_occup; 180 181 if (fifo_occup > 8 * rdev->pacing_algo_th) 182 rdev->dbg_stats->dbq.fifo_occup_slab_4++; 183 else if (fifo_occup > 4 * rdev->pacing_algo_th) 184 rdev->dbg_stats->dbq.fifo_occup_slab_3++; 185 else if (fifo_occup > 2 * rdev->pacing_algo_th) 186 rdev->dbg_stats->dbq.fifo_occup_slab_2++; 187 else if (fifo_occup > rdev->pacing_algo_th) 188 rdev->dbg_stats->dbq.fifo_occup_slab_1++; 189 } 190 191 static void bnxt_re_update_do_pacing_slabs(struct bnxt_re_dev *rdev) 192 { 193 struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; 194 195 if (pacing_data->do_pacing > rdev->dbg_stats->dbq.do_pacing_water_mark) 196 rdev->dbg_stats->dbq.do_pacing_water_mark = pacing_data->do_pacing; 197 198 if (pacing_data->do_pacing > 16 * rdev->dbr_def_do_pacing) 199 rdev->dbg_stats->dbq.do_pacing_slab_5++; 200 else if (pacing_data->do_pacing > 8 * rdev->dbr_def_do_pacing) 201 rdev->dbg_stats->dbq.do_pacing_slab_4++; 202 else if (pacing_data->do_pacing > 4 * rdev->dbr_def_do_pacing) 203 rdev->dbg_stats->dbq.do_pacing_slab_3++; 204 else if (pacing_data->do_pacing > 2 * rdev->dbr_def_do_pacing) 205 rdev->dbg_stats->dbq.do_pacing_slab_2++; 206 else if (pacing_data->do_pacing > rdev->dbr_def_do_pacing) 207 rdev->dbg_stats->dbq.do_pacing_slab_1++; 208 } 209 210 static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp) 211 { 212 return qp->ib_qp.qp_type == IB_QPT_GSI; 213 } 214 215 static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev) 216 { 217 struct bnxt_re_qp *qp; 218 219 mutex_lock(&rdev->qp_lock); 220 list_for_each_entry(qp, &rdev->qp_list, list) { 221 if (bnxt_re_is_qp1_qp(qp)) { 222 mutex_unlock(&rdev->qp_lock); 223 return qp; 224 } 225 } 226 mutex_unlock(&rdev->qp_lock); 227 return NULL; 228 } 229 230 /* Set the maximum number of each resource that the driver actually wants 231 * to allocate. This may be up to the maximum number the firmware has 232 * reserved for the function. The driver may choose to allocate fewer 233 * resources than the firmware maximum. 234 */ 235 static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) 236 { 237 struct bnxt_qplib_max_res dev_res = {}; 238 struct bnxt_qplib_chip_ctx *cctx; 239 struct bnxt_qplib_dev_attr *attr; 240 struct bnxt_qplib_ctx *hctx; 241 int i; 242 243 attr = rdev->dev_attr; 244 hctx = rdev->qplib_res.hctx; 245 cctx = rdev->chip_ctx; 246 247 bnxt_qplib_max_res_supported(cctx, &rdev->qplib_res, &dev_res, false); 248 if (!_is_chip_gen_p5_p7(cctx)) { 249 hctx->qp_ctx.max = min_t(u32, dev_res.max_qp, attr->max_qp); 250 hctx->mrw_ctx.max = min_t(u32, dev_res.max_mr, attr->max_mr); 251 /* To accommodate 16k MRs and 16k AHs, 252 * driver has to allocate 32k backing store memory 253 */ 254 hctx->mrw_ctx.max *= 2; 255 hctx->srq_ctx.max = min_t(u32, dev_res.max_srq, attr->max_srq); 256 hctx->cq_ctx.max = min_t(u32, dev_res.max_cq, attr->max_cq); 257 for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) 258 hctx->tqm_ctx.qcount[i] = attr->tqm_alloc_reqs[i]; 259 } else { 260 hctx->qp_ctx.max = attr->max_qp ? attr->max_qp : dev_res.max_qp; 261 hctx->mrw_ctx.max = attr->max_mr ? attr->max_mr : dev_res.max_mr; 262 hctx->srq_ctx.max = attr->max_srq ? attr->max_srq : dev_res.max_srq; 263 hctx->cq_ctx.max = attr->max_cq ? attr->max_cq : dev_res.max_cq; 264 } 265 } 266 267 static void bnxt_re_limit_vf_res(struct bnxt_re_dev *rdev, 268 struct bnxt_qplib_vf_res *vf_res, 269 u32 num_vf) 270 { 271 struct bnxt_qplib_chip_ctx *cctx = rdev->chip_ctx; 272 struct bnxt_qplib_max_res dev_res = {}; 273 274 bnxt_qplib_max_res_supported(cctx, &rdev->qplib_res, &dev_res, true); 275 vf_res->max_qp = dev_res.max_qp / num_vf; 276 vf_res->max_srq = dev_res.max_srq / num_vf; 277 vf_res->max_cq = dev_res.max_cq / num_vf; 278 /* 279 * MR and AH shares the same backing store, the value specified 280 * for max_mrw is split into half by the FW for MR and AH 281 */ 282 vf_res->max_mrw = dev_res.max_mr * 2 / num_vf; 283 vf_res->max_gid = BNXT_RE_MAX_GID_PER_VF; 284 } 285 286 static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) 287 { 288 struct bnxt_qplib_ctx *hctx; 289 290 hctx = rdev->qplib_res.hctx; 291 memset(&hctx->vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); 292 bnxt_re_limit_pf_res(rdev); 293 294 if (rdev->num_vfs) 295 bnxt_re_limit_vf_res(rdev, &hctx->vf_res, rdev->num_vfs); 296 } 297 298 static void bnxt_re_dettach_irq(struct bnxt_re_dev *rdev) 299 { 300 struct bnxt_qplib_rcfw *rcfw = NULL; 301 struct bnxt_qplib_nq *nq; 302 int indx; 303 304 rcfw = &rdev->rcfw; 305 for (indx = 0; indx < rdev->nqr.max_init; indx++) { 306 nq = &rdev->nqr.nq[indx]; 307 mutex_lock(&nq->lock); 308 bnxt_qplib_nq_stop_irq(nq, false); 309 mutex_unlock(&nq->lock); 310 } 311 312 bnxt_qplib_rcfw_stop_irq(rcfw, false); 313 } 314 315 static void bnxt_re_detach_err_device(struct bnxt_re_dev *rdev) 316 { 317 /* Free the MSIx vectors only so that L2 can proceed with MSIx disable */ 318 bnxt_re_dettach_irq(rdev); 319 320 /* Set the state as detached to prevent sending any more commands */ 321 set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); 322 set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); 323 wake_up_all(&rdev->rcfw.cmdq.waitq); 324 } 325 326 #define MAX_DSCP_PRI_TUPLE 64 327 328 struct bnxt_re_dcb_work { 329 struct work_struct work; 330 struct bnxt_re_dev *rdev; 331 struct hwrm_async_event_cmpl cmpl; 332 }; 333 334 static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev) 335 { 336 rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq"); 337 } 338 339 static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev) 340 { 341 if (!rdev->dcb_wq) 342 return; 343 flush_workqueue(rdev->dcb_wq); 344 destroy_workqueue(rdev->dcb_wq); 345 rdev->dcb_wq = NULL; 346 } 347 348 static void bnxt_re_init_aer_wq(struct bnxt_re_dev *rdev) 349 { 350 rdev->aer_wq = create_singlethread_workqueue("bnxt_re_aer_wq"); 351 } 352 353 static void bnxt_re_uninit_aer_wq(struct bnxt_re_dev *rdev) 354 { 355 if (!rdev->aer_wq) 356 return; 357 flush_workqueue(rdev->aer_wq); 358 destroy_workqueue(rdev->aer_wq); 359 rdev->aer_wq = NULL; 360 } 361 362 static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev) 363 { 364 struct bnxt_re_qp *qp; 365 366 if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) 367 return 0; 368 369 qp = bnxt_re_get_qp1_qp(rdev); 370 if (!qp) 371 return 0; 372 373 qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP; 374 qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp; 375 376 return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); 377 } 378 379 static void bnxt_re_reconfigure_dscp(struct bnxt_re_dev *rdev) 380 { 381 struct bnxt_qplib_cc_param *cc_param; 382 struct bnxt_re_tc_rec *tc_rec; 383 bool update_cc = false; 384 u8 dscp_user; 385 int rc; 386 387 cc_param = &rdev->cc_param; 388 tc_rec = &rdev->tc_rec[0]; 389 390 if (!(cc_param->roce_dscp_user || cc_param->cnp_dscp_user)) 391 return; 392 393 if (cc_param->cnp_dscp_user) { 394 dscp_user = (cc_param->cnp_dscp_user & 0x3f); 395 if ((tc_rec->cnp_dscp_bv & (1ul << dscp_user)) && 396 (cc_param->alt_tos_dscp != dscp_user)) { 397 cc_param->alt_tos_dscp = dscp_user; 398 cc_param->mask |= CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP; 399 update_cc = true; 400 } 401 } 402 403 if (cc_param->roce_dscp_user) { 404 dscp_user = (cc_param->roce_dscp_user & 0x3f); 405 if ((tc_rec->roce_dscp_bv & (1ul << dscp_user)) && 406 (cc_param->tos_dscp != dscp_user)) { 407 cc_param->tos_dscp = dscp_user; 408 cc_param->mask |= CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP; 409 update_cc = true; 410 } 411 } 412 413 if (update_cc) { 414 rc = bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param); 415 if (rc) 416 dev_err(rdev_to_dev(rdev), "Failed to apply cc settings\n"); 417 } 418 } 419 420 static void bnxt_re_dcb_wq_task(struct work_struct *work) 421 { 422 struct bnxt_qplib_cc_param *cc_param; 423 struct bnxt_re_tc_rec *tc_rec; 424 struct bnxt_re_dev *rdev; 425 struct bnxt_re_dcb_work *dcb_work = 426 container_of(work, struct bnxt_re_dcb_work, work); 427 int rc; 428 429 rdev = dcb_work->rdev; 430 if (!rdev) 431 goto exit; 432 433 mutex_lock(&rdev->cc_lock); 434 435 cc_param = &rdev->cc_param; 436 rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param); 437 if (rc) { 438 dev_err(rdev_to_dev(rdev), "Failed to query ccparam rc:%d", rc); 439 goto fail; 440 } 441 tc_rec = &rdev->tc_rec[0]; 442 /* 443 * Upon the receival of DCB Async event: 444 * If roce_dscp or cnp_dscp or both (which user configured using configfs) 445 * is in the list, re-program the value using modify_roce_cc command 446 */ 447 bnxt_re_reconfigure_dscp(rdev); 448 449 cc_param->roce_pri = tc_rec->roce_prio; 450 if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) { 451 cc_param->qp1_tos_dscp = cc_param->tos_dscp; 452 rc = bnxt_re_update_qp1_tos_dscp(rdev); 453 if (rc) { 454 dev_err(rdev_to_dev(rdev), "%s:Failed to modify QP1 rc:%d", 455 __func__, rc); 456 goto fail; 457 } 458 } 459 460 fail: 461 mutex_unlock(&rdev->cc_lock); 462 exit: 463 kfree(dcb_work); 464 } 465 466 static int bnxt_re_hwrm_dbr_pacing_broadcast_event(struct bnxt_re_dev *rdev) 467 { 468 struct hwrm_func_dbr_pacing_broadcast_event_output resp = {0}; 469 struct hwrm_func_dbr_pacing_broadcast_event_input req = {0}; 470 struct bnxt_en_dev *en_dev = rdev->en_dev; 471 struct bnxt_fw_msg fw_msg; 472 int rc; 473 474 memset(&fw_msg, 0, sizeof(fw_msg)); 475 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 476 HWRM_FUNC_DBR_PACING_BROADCAST_EVENT, -1, -1); 477 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 478 sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); 479 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 480 if (rc) { 481 dev_dbg(rdev_to_dev(rdev), 482 "Failed to send dbr pacing broadcast event rc:%d", rc); 483 return rc; 484 } 485 return 0; 486 } 487 488 static int bnxt_re_hwrm_dbr_pacing_nqlist_query(struct bnxt_re_dev *rdev) 489 { 490 struct hwrm_func_dbr_pacing_nqlist_query_output resp = {0}; 491 struct hwrm_func_dbr_pacing_nqlist_query_input req = {0}; 492 struct bnxt_dbq_nq_list *nq_list = &rdev->nq_list; 493 struct bnxt_en_dev *en_dev = rdev->en_dev; 494 bool primary_found = false; 495 struct bnxt_fw_msg fw_msg; 496 struct bnxt_qplib_nq *nq; 497 int rc, i, j = 1; 498 u16 *nql_ptr; 499 500 nq = &rdev->nqr.nq[0]; 501 502 memset(&fw_msg, 0, sizeof(fw_msg)); 503 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 504 HWRM_FUNC_DBR_PACING_NQLIST_QUERY, -1, -1); 505 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 506 sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); 507 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 508 if (rc) { 509 dev_err(rdev_to_dev(rdev), "Failed to send dbr pacing nq list query rc:%d", rc); 510 return rc; 511 } 512 nq_list->num_nql_entries = le32_to_cpu(resp.num_nqs); 513 nql_ptr = &resp.nq_ring_id0; 514 /* populate the nq_list of the primary function with list received 515 * from FW. Fill the NQ IDs of secondary functions from index 1 to 516 * num_nql_entries - 1. Fill the nq_list->nq_id[0] with the 517 * nq_id of the primary pf 518 */ 519 for (i = 0; i < nq_list->num_nql_entries; i++) { 520 u16 nq_id = *nql_ptr; 521 522 dev_dbg(rdev_to_dev(rdev), 523 "nq_list->nq_id[%d] = %d\n", i, nq_id); 524 if (nq_id != nq->ring_id) { 525 nq_list->nq_id[j] = nq_id; 526 j++; 527 } else { 528 primary_found = true; 529 nq_list->nq_id[0] = nq->ring_id; 530 } 531 nql_ptr++; 532 } 533 if (primary_found) 534 bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 1); 535 536 return 0; 537 } 538 539 static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) 540 { 541 struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; 542 u32 read_val, fifo_occup; 543 bool first_read = true; 544 545 /* loop shouldn't run infintely as the occupancy usually goes 546 * below pacing algo threshold as soon as pacing kicks in. 547 */ 548 while (1) { 549 read_val = readl_fbsd(rdev->en_dev->softc, rdev->dbr_db_fifo_reg_off, 0); 550 fifo_occup = pacing_data->fifo_max_depth - 551 ((read_val & pacing_data->fifo_room_mask) >> 552 pacing_data->fifo_room_shift); 553 /* Fifo occupancy cannot be greater the MAX FIFO depth */ 554 if (fifo_occup > pacing_data->fifo_max_depth) 555 break; 556 557 if (first_read) { 558 bnxt_re_update_fifo_occup_slabs(rdev, fifo_occup); 559 first_read = false; 560 } 561 if (fifo_occup < pacing_data->pacing_th) 562 break; 563 } 564 } 565 566 static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev) 567 { 568 struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; 569 570 pacing_data->do_pacing = rdev->dbr_def_do_pacing; 571 pacing_data->pacing_th = rdev->pacing_algo_th; 572 pacing_data->alarm_th = 573 pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx); 574 } 575 576 #define CAG_RING_MASK 0x7FF 577 #define CAG_RING_SHIFT 17 578 #define WATERMARK_MASK 0xFFF 579 #define WATERMARK_SHIFT 0 580 581 static bool bnxt_re_check_if_dbq_intr_triggered(struct bnxt_re_dev *rdev) 582 { 583 u32 read_val; 584 int j; 585 586 for (j = 0; j < 10; j++) { 587 read_val = readl_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off, 0); 588 dev_dbg(rdev_to_dev(rdev), "AEQ ARM status = 0x%x\n", 589 read_val); 590 if (!read_val) 591 return true; 592 } 593 return false; 594 } 595 596 int bnxt_re_set_dbq_throttling_reg(struct bnxt_re_dev *rdev, u16 nq_id, u32 throttle) 597 { 598 u32 cag_ring_water_mark = 0, read_val; 599 u32 throttle_val; 600 601 /* Convert throttle percentage to value */ 602 throttle_val = (rdev->qplib_res.pacing_data->fifo_max_depth * throttle) / 100; 603 604 if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) { 605 cag_ring_water_mark = (nq_id & CAG_RING_MASK) << CAG_RING_SHIFT | 606 (throttle_val & WATERMARK_MASK); 607 writel_fbsd(rdev->en_dev->softc, rdev->dbr_throttling_reg_off, 0, cag_ring_water_mark); 608 read_val = readl_fbsd(rdev->en_dev->softc , rdev->dbr_throttling_reg_off, 0); 609 dev_dbg(rdev_to_dev(rdev), 610 "%s: dbr_throttling_reg_off read_val = 0x%x\n", 611 __func__, read_val); 612 if (read_val != cag_ring_water_mark) { 613 dev_dbg(rdev_to_dev(rdev), 614 "nq_id = %d write_val=0x%x read_val=0x%x\n", 615 nq_id, cag_ring_water_mark, read_val); 616 return 1; 617 } 618 } 619 writel_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off, 0, 1); 620 return 0; 621 } 622 623 static void bnxt_re_set_dbq_throttling_for_non_primary(struct bnxt_re_dev *rdev) 624 { 625 struct bnxt_dbq_nq_list *nq_list; 626 struct bnxt_qplib_nq *nq; 627 int i; 628 629 nq_list = &rdev->nq_list; 630 /* Run a loop for other Active functions if this is primary function */ 631 if (bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) { 632 dev_dbg(rdev_to_dev(rdev), "%s: nq_list->num_nql_entries= %d\n", 633 __func__, nq_list->num_nql_entries); 634 nq = &rdev->nqr.nq[0]; 635 for (i = nq_list->num_nql_entries - 1; i > 0; i--) { 636 u16 nq_id = nq_list->nq_id[i]; 637 if (nq) 638 dev_dbg(rdev_to_dev(rdev), 639 "%s: nq_id = %d cur_fn_ring_id = %d\n", 640 __func__, nq_id, nq->ring_id); 641 if (bnxt_re_set_dbq_throttling_reg 642 (rdev, nq_id, 0)) 643 break; 644 bnxt_re_check_if_dbq_intr_triggered(rdev); 645 } 646 } 647 } 648 649 static void bnxt_re_handle_dbr_nq_pacing_notification(struct bnxt_re_dev *rdev) 650 { 651 struct bnxt_qplib_nq *nq; 652 int rc = 0; 653 654 nq = &rdev->nqr.nq[0]; 655 656 /* Query the NQ list*/ 657 rc = bnxt_re_hwrm_dbr_pacing_nqlist_query(rdev); 658 if (rc) { 659 dev_err(rdev_to_dev(rdev), 660 "Failed to Query NQ list rc= %d", rc); 661 return; 662 } 663 /*Configure GRC access for Throttling and aeq_arm register */ 664 writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 28, 0, 665 rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_BASE_MASK); 666 667 rdev->dbr_throttling_reg_off = 668 (rdev->chip_ctx->dbr_throttling_reg & 669 BNXT_GRC_OFFSET_MASK) + 0x8000; 670 rdev->dbr_aeq_arm_reg_off = 671 (rdev->chip_ctx->dbr_aeq_arm_reg & 672 BNXT_GRC_OFFSET_MASK) + 0x8000; 673 674 bnxt_re_set_dbq_throttling_reg(rdev, nq->ring_id, rdev->dbq_watermark); 675 } 676 677 static void bnxt_re_dbq_wq_task(struct work_struct *work) 678 { 679 struct bnxt_re_dbq_work *dbq_work = 680 container_of(work, struct bnxt_re_dbq_work, work); 681 struct bnxt_re_dev *rdev; 682 683 rdev = dbq_work->rdev; 684 685 if (!rdev) 686 goto exit; 687 switch (dbq_work->event) { 688 case BNXT_RE_DBQ_EVENT_SCHED: 689 dev_dbg(rdev_to_dev(rdev), "%s: Handle DBQ Pacing event\n", 690 __func__); 691 if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) 692 bnxt_re_hwrm_dbr_pacing_broadcast_event(rdev); 693 else 694 bnxt_re_pacing_alert(rdev); 695 break; 696 case BNXT_RE_DBR_PACING_EVENT: 697 dev_dbg(rdev_to_dev(rdev), "%s: Sched interrupt/pacing worker\n", 698 __func__); 699 if (_is_chip_p7(rdev->chip_ctx)) 700 bnxt_re_pacing_alert(rdev); 701 else if (!rdev->chip_ctx->modes.dbr_pacing_v0) 702 bnxt_re_hwrm_dbr_pacing_qcfg(rdev); 703 break; 704 case BNXT_RE_DBR_NQ_PACING_NOTIFICATION: 705 bnxt_re_handle_dbr_nq_pacing_notification(rdev); 706 /* Issue a broadcast event to notify other functions 707 * that primary changed 708 */ 709 bnxt_re_hwrm_dbr_pacing_broadcast_event(rdev); 710 break; 711 } 712 exit: 713 kfree(dbq_work); 714 } 715 716 static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl) 717 { 718 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); 719 struct bnxt_re_dcb_work *dcb_work; 720 struct bnxt_re_dbq_work *dbq_work; 721 struct bnxt_re_dev *rdev; 722 u16 event_id; 723 u32 data1; 724 u32 data2 = 0; 725 726 if (!cmpl) { 727 pr_err("Async event, bad completion\n"); 728 return; 729 } 730 731 if (!en_info || !en_info->en_dev) { 732 pr_err("Async event, bad en_info or en_dev\n"); 733 return; 734 } 735 rdev = en_info->rdev; 736 737 event_id = le16_to_cpu(cmpl->event_id); 738 data1 = le32_to_cpu(cmpl->event_data1); 739 data2 = le32_to_cpu(cmpl->event_data2); 740 741 if (!rdev || !rdev_to_dev(rdev)) { 742 dev_dbg(NULL, "Async event, bad rdev or netdev\n"); 743 return; 744 } 745 746 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags) || 747 !test_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) { 748 dev_dbg(NULL, "Async event, device already detached\n"); 749 return; 750 } 751 if (data2 >= 0) 752 dev_dbg(rdev_to_dev(rdev), "Async event_id = %d data1 = %d data2 = %d", 753 event_id, data1, data2); 754 755 switch (event_id) { 756 case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: 757 /* Not handling the event in older FWs */ 758 if (!is_qport_service_type_supported(rdev)) 759 break; 760 if (!rdev->dcb_wq) 761 break; 762 dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC); 763 if (!dcb_work) 764 break; 765 766 dcb_work->rdev = rdev; 767 memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl)); 768 INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task); 769 queue_work(rdev->dcb_wq, &dcb_work->work); 770 break; 771 case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: 772 if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { 773 /* Set rcfw flag to control commands send to Bono */ 774 set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); 775 /* Set bnxt_re flag to control commands send via L2 driver */ 776 set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); 777 wake_up_all(&rdev->rcfw.cmdq.waitq); 778 } 779 break; 780 case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD: 781 if (!rdev->dbr_pacing) 782 break; 783 dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC); 784 if (!dbq_work) 785 goto unlock; 786 dbq_work->rdev = rdev; 787 dbq_work->event = BNXT_RE_DBR_PACING_EVENT; 788 INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task); 789 queue_work(rdev->dbq_wq, &dbq_work->work); 790 rdev->dbr_sw_stats->dbq_int_recv++; 791 break; 792 case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE: 793 if (!rdev->dbr_pacing) 794 break; 795 796 dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC); 797 if (!dbq_work) 798 goto unlock; 799 dbq_work->rdev = rdev; 800 dbq_work->event = BNXT_RE_DBR_NQ_PACING_NOTIFICATION; 801 INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task); 802 queue_work(rdev->dbq_wq, &dbq_work->work); 803 break; 804 805 default: 806 break; 807 } 808 unlock: 809 return; 810 } 811 812 static void bnxt_re_db_fifo_check(struct work_struct *work) 813 { 814 struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, 815 dbq_fifo_check_work); 816 struct bnxt_qplib_db_pacing_data *pacing_data; 817 u32 pacing_save; 818 819 if (!mutex_trylock(&rdev->dbq_lock)) 820 return; 821 pacing_data = rdev->qplib_res.pacing_data; 822 pacing_save = rdev->do_pacing_save; 823 __wait_for_fifo_occupancy_below_th(rdev); 824 cancel_delayed_work_sync(&rdev->dbq_pacing_work); 825 if (rdev->dbr_recovery_on) 826 goto recovery_on; 827 if (pacing_save > rdev->dbr_def_do_pacing) { 828 /* Double the do_pacing value during the congestion */ 829 pacing_save = pacing_save << 1; 830 } else { 831 /* 832 * when a new congestion is detected increase the do_pacing 833 * by 8 times. And also increase the pacing_th by 4 times. The 834 * reason to increase pacing_th is to give more space for the 835 * queue to oscillate down without getting empty, but also more 836 * room for the queue to increase without causing another alarm. 837 */ 838 pacing_save = pacing_save << 3; 839 pacing_data->pacing_th = rdev->pacing_algo_th * 4; 840 } 841 842 if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING) 843 pacing_save = BNXT_RE_MAX_DBR_DO_PACING; 844 845 pacing_data->do_pacing = pacing_save; 846 rdev->do_pacing_save = pacing_data->do_pacing; 847 pacing_data->alarm_th = 848 pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx); 849 recovery_on: 850 schedule_delayed_work(&rdev->dbq_pacing_work, 851 msecs_to_jiffies(rdev->dbq_pacing_time)); 852 rdev->dbr_sw_stats->dbq_pacing_alerts++; 853 mutex_unlock(&rdev->dbq_lock); 854 } 855 856 static void bnxt_re_pacing_timer_exp(struct work_struct *work) 857 { 858 struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, 859 dbq_pacing_work.work); 860 struct bnxt_qplib_db_pacing_data *pacing_data; 861 u32 read_val, fifo_occup; 862 struct bnxt_qplib_nq *nq; 863 864 if (!mutex_trylock(&rdev->dbq_lock)) 865 return; 866 867 pacing_data = rdev->qplib_res.pacing_data; 868 read_val = readl_fbsd(rdev->en_dev->softc , rdev->dbr_db_fifo_reg_off, 0); 869 fifo_occup = pacing_data->fifo_max_depth - 870 ((read_val & pacing_data->fifo_room_mask) >> 871 pacing_data->fifo_room_shift); 872 873 if (fifo_occup > pacing_data->pacing_th) 874 goto restart_timer; 875 876 /* 877 * Instead of immediately going back to the default do_pacing 878 * reduce it by 1/8 times and restart the timer. 879 */ 880 pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3); 881 pacing_data->do_pacing = max_t(u32, rdev->dbr_def_do_pacing, pacing_data->do_pacing); 882 /* 883 * If the fifo_occup is less than the interrupt enable threshold 884 * enable the interrupt on the primary PF. 885 */ 886 if (rdev->dbq_int_disable && fifo_occup < rdev->pacing_en_int_th) { 887 if (bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) { 888 if (!rdev->chip_ctx->modes.dbr_pacing_v0) { 889 nq = &rdev->nqr.nq[0]; 890 bnxt_re_set_dbq_throttling_reg(rdev, nq->ring_id, 891 rdev->dbq_watermark); 892 rdev->dbr_sw_stats->dbq_int_en++; 893 rdev->dbq_int_disable = false; 894 } 895 } 896 } 897 if (pacing_data->do_pacing <= rdev->dbr_def_do_pacing) { 898 bnxt_re_set_default_pacing_data(rdev); 899 rdev->dbr_sw_stats->dbq_pacing_complete++; 900 goto dbq_unlock; 901 } 902 restart_timer: 903 schedule_delayed_work(&rdev->dbq_pacing_work, 904 msecs_to_jiffies(rdev->dbq_pacing_time)); 905 bnxt_re_update_do_pacing_slabs(rdev); 906 rdev->dbr_sw_stats->dbq_pacing_resched++; 907 dbq_unlock: 908 rdev->do_pacing_save = pacing_data->do_pacing; 909 mutex_unlock(&rdev->dbq_lock); 910 } 911 912 void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev) 913 { 914 struct bnxt_qplib_db_pacing_data *pacing_data; 915 916 if (!rdev->dbr_pacing) 917 return; 918 mutex_lock(&rdev->dbq_lock); 919 pacing_data = rdev->qplib_res.pacing_data; 920 921 /* 922 * Increase the alarm_th to max so that other user lib instances do not 923 * keep alerting the driver. 924 */ 925 pacing_data->alarm_th = pacing_data->fifo_max_depth; 926 pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING; 927 cancel_work_sync(&rdev->dbq_fifo_check_work); 928 schedule_work(&rdev->dbq_fifo_check_work); 929 mutex_unlock(&rdev->dbq_lock); 930 } 931 932 void bnxt_re_schedule_dbq_event(struct bnxt_qplib_res *res) 933 { 934 struct bnxt_re_dbq_work *dbq_work; 935 struct bnxt_re_dev *rdev; 936 937 rdev = container_of(res, struct bnxt_re_dev, qplib_res); 938 939 atomic_set(&rdev->dbq_intr_running, 1); 940 941 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 942 goto exit; 943 /* Run the loop to send dbq event to other functions 944 * for newer FW 945 */ 946 if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) && 947 !rdev->chip_ctx->modes.dbr_pacing_v0) 948 bnxt_re_set_dbq_throttling_for_non_primary(rdev); 949 950 dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC); 951 if (!dbq_work) 952 goto exit; 953 dbq_work->rdev = rdev; 954 dbq_work->event = BNXT_RE_DBQ_EVENT_SCHED; 955 INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task); 956 queue_work(rdev->dbq_wq, &dbq_work->work); 957 rdev->dbr_sw_stats->dbq_int_recv++; 958 rdev->dbq_int_disable = true; 959 exit: 960 atomic_set(&rdev->dbq_intr_running, 0); 961 } 962 963 static void bnxt_re_free_msix(struct bnxt_re_dev *rdev) 964 { 965 struct bnxt_en_dev *en_dev = rdev->en_dev; 966 int rc; 967 968 rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP); 969 if (rc) 970 dev_err(rdev_to_dev(rdev), "netdev %p free_msix failed! rc = 0x%x", 971 rdev->netdev, rc); 972 } 973 974 static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) 975 { 976 struct bnxt_en_dev *en_dev = rdev->en_dev; 977 int rc = 0, num_msix_want, num_msix_got; 978 struct bnxt_msix_entry *entry; 979 980 /* 981 * Request MSIx based on the function type. This is 982 * a temporory solution to enable max VFs when NPAR is 983 * enabled. 984 * TODO - change the scheme with an adapter specific check 985 * as the latest adapters can support more NQs. For now 986 * this change satisfy all adapter versions. 987 */ 988 989 if (rdev->is_virtfn) 990 num_msix_want = BNXT_RE_MAX_MSIX_VF; 991 else if (BNXT_EN_NPAR(en_dev)) 992 num_msix_want = BNXT_RE_MAX_MSIX_NPAR_PF; 993 else if (_is_chip_gen_p5_p7(rdev->chip_ctx)) 994 num_msix_want = rdev->num_msix_requested ?: BNXT_RE_MAX_MSIX_GEN_P5_PF; 995 else 996 num_msix_want = BNXT_RE_MAX_MSIX_PF; 997 998 /* 999 * Since MSIX vectors are used for both NQs and CREQ, we should try to 1000 * allocate num_online_cpus + 1 by taking into account the CREQ. This 1001 * leaves the number of MSIX vectors for NQs match the number of CPUs 1002 * and allows the system to be fully utilized 1003 */ 1004 num_msix_want = min_t(u32, num_msix_want, num_online_cpus() + 1); 1005 num_msix_want = min_t(u32, num_msix_want, BNXT_RE_MAX_MSIX); 1006 num_msix_want = max_t(u32, num_msix_want, BNXT_RE_MIN_MSIX); 1007 1008 entry = rdev->nqr.msix_entries; 1009 1010 num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP, 1011 entry, num_msix_want); 1012 if (num_msix_got < BNXT_RE_MIN_MSIX) { 1013 rc = -EINVAL; 1014 goto done; 1015 } 1016 if (num_msix_got != num_msix_want) 1017 dev_warn(rdev_to_dev(rdev), 1018 "bnxt_request_msix: wanted %d vectors, got %d\n", 1019 num_msix_want, num_msix_got); 1020 1021 rdev->nqr.num_msix = num_msix_got; 1022 return 0; 1023 done: 1024 if (num_msix_got) 1025 bnxt_re_free_msix(rdev); 1026 return rc; 1027 } 1028 1029 static int __wait_for_ib_unregister(struct bnxt_re_dev *rdev, 1030 struct bnxt_re_en_dev_info *en_info) 1031 { 1032 u64 timeout = 0; 1033 u32 cur_prod = 0, cur_cons = 0; 1034 int retry = 0, rc = 0, ret = 0; 1035 1036 cur_prod = rdev->rcfw.cmdq.hwq.prod; 1037 cur_cons = rdev->rcfw.cmdq.hwq.cons; 1038 timeout = msecs_to_jiffies(BNXT_RE_RECOVERY_IB_UNINIT_WAIT_TIME_MS); 1039 retry = BNXT_RE_RECOVERY_IB_UNINIT_WAIT_RETRY; 1040 /* During module exit, increase timeout ten-fold to 100 mins to wait 1041 * as long as possible for ib_unregister() to complete 1042 */ 1043 if (rdev->mod_exit) 1044 retry *= 10; 1045 do { 1046 /* 1047 * Since the caller of this function invokes with bnxt_re_mutex held, 1048 * release it to avoid holding a lock while in wait / sleep mode. 1049 */ 1050 mutex_unlock(&bnxt_re_mutex); 1051 rc = wait_event_timeout(en_info->waitq, 1052 en_info->ib_uninit_done, 1053 timeout); 1054 mutex_lock(&bnxt_re_mutex); 1055 1056 if (!bnxt_re_is_rdev_valid(rdev)) 1057 break; 1058 1059 if (rc) 1060 break; 1061 1062 if (!RCFW_NO_FW_ACCESS(&rdev->rcfw)) { 1063 /* No need to check for cmdq stall during module exit, 1064 * wait for ib unregister to complete 1065 */ 1066 if (!rdev->mod_exit) 1067 ret = __check_cmdq_stall(&rdev->rcfw, &cur_prod, &cur_cons); 1068 if (ret || en_info->ib_uninit_done) 1069 break; 1070 } 1071 } while (retry--); 1072 1073 return rc; 1074 } 1075 1076 static int bnxt_re_handle_start(struct auxiliary_device *adev) 1077 { 1078 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); 1079 struct bnxt_re_dev *rdev = NULL; 1080 struct ifnet *real_dev; 1081 struct bnxt_en_dev *en_dev; 1082 struct ifnet *netdev; 1083 int rc = 0; 1084 1085 if (!en_info || !en_info->en_dev) { 1086 pr_err("Start, bad en_info or en_dev\n"); 1087 return -EINVAL; 1088 } 1089 netdev = en_info->en_dev->net; 1090 if (en_info->rdev) { 1091 dev_info(rdev_to_dev(en_info->rdev), 1092 "%s: Device is already added adev %p rdev: %p\n", 1093 __func__, adev, en_info->rdev); 1094 return 0; 1095 } 1096 1097 en_dev = en_info->en_dev; 1098 real_dev = rdma_vlan_dev_real_dev(netdev); 1099 if (!real_dev) 1100 real_dev = netdev; 1101 rc = bnxt_re_add_device(&rdev, real_dev, 1102 en_info->gsi_mode, 1103 BNXT_RE_POST_RECOVERY_INIT, 1104 en_info->wqe_mode, 1105 en_info->num_msix_requested, adev); 1106 if (rc) { 1107 /* Add device failed. Unregister the device. 1108 * This has to be done explicitly as 1109 * bnxt_re_stop would not have unregistered 1110 */ 1111 rtnl_lock(); 1112 en_dev->en_ops->bnxt_unregister_device(en_dev, BNXT_ROCE_ULP); 1113 rtnl_unlock(); 1114 mutex_lock(&bnxt_re_dev_lock); 1115 gadd_dev_inprogress--; 1116 mutex_unlock(&bnxt_re_dev_lock); 1117 return rc; 1118 } 1119 rdev->adev = adev; 1120 rtnl_lock(); 1121 bnxt_re_get_link_speed(rdev); 1122 rtnl_unlock(); 1123 rc = bnxt_re_ib_init(rdev); 1124 if (rc) { 1125 dev_err(rdev_to_dev(rdev), "Failed ib_init\n"); 1126 return rc; 1127 } 1128 bnxt_re_ib_init_2(rdev); 1129 1130 return rc; 1131 } 1132 1133 static void bnxt_re_stop(void *handle) 1134 { 1135 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); 1136 struct ifnet *netdev; 1137 struct bnxt_re_dev *rdev; 1138 struct bnxt_en_dev *en_dev; 1139 int rc = 0; 1140 1141 rtnl_unlock(); 1142 mutex_lock(&bnxt_re_mutex); 1143 if (!en_info || !en_info->en_dev) { 1144 pr_err("Stop, bad en_info or en_dev\n"); 1145 goto exit; 1146 } 1147 netdev = en_info->en_dev->net; 1148 rdev = en_info->rdev; 1149 if (!rdev) 1150 goto exit; 1151 1152 if (!bnxt_re_is_rdev_valid(rdev)) 1153 goto exit; 1154 1155 /* 1156 * Check if fw has undergone reset or is in a fatal condition. 1157 * If so, set flags so that no further commands are sent down to FW 1158 */ 1159 en_dev = rdev->en_dev; 1160 if (en_dev->en_state & BNXT_STATE_FW_FATAL_COND || 1161 en_dev->en_state & BNXT_STATE_FW_RESET_DET) { 1162 /* Set rcfw flag to control commands send to Bono */ 1163 set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); 1164 /* Set bnxt_re flag to control commands send via L2 driver */ 1165 set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); 1166 wake_up_all(&rdev->rcfw.cmdq.waitq); 1167 } 1168 1169 if (test_bit(BNXT_RE_FLAG_STOP_IN_PROGRESS, &rdev->flags)) 1170 goto exit; 1171 set_bit(BNXT_RE_FLAG_STOP_IN_PROGRESS, &rdev->flags); 1172 1173 en_info->wqe_mode = rdev->chip_ctx->modes.wqe_mode; 1174 en_info->gsi_mode = rdev->gsi_ctx.gsi_qp_mode; 1175 en_info->num_msix_requested = rdev->num_msix_requested; 1176 en_info->ib_uninit_done = false; 1177 1178 if (rdev->dbr_pacing) 1179 bnxt_re_set_pacing_dev_state(rdev); 1180 1181 dev_info(rdev_to_dev(rdev), "%s: L2 driver notified to stop." 1182 "Attempting to stop and Dispatching event " 1183 "to inform the stack\n", __func__); 1184 init_waitqueue_head(&en_info->waitq); 1185 /* Schedule a work item to handle IB UNINIT for recovery */ 1186 bnxt_re_schedule_work(rdev, NETDEV_UNREGISTER, 1187 NULL, netdev, rdev->adev); 1188 rc = __wait_for_ib_unregister(rdev, en_info); 1189 if (!bnxt_re_is_rdev_valid(rdev)) 1190 goto exit; 1191 if (!rc) { 1192 dev_info(rdev_to_dev(rdev), "%s: Attempt to stop failed\n", 1193 __func__); 1194 bnxt_re_detach_err_device(rdev); 1195 goto exit; 1196 } 1197 bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, rdev->adev); 1198 exit: 1199 mutex_unlock(&bnxt_re_mutex); 1200 /* Take rtnl_lock before return, bnxt_re_stop is called with rtnl_lock */ 1201 rtnl_lock(); 1202 1203 return; 1204 } 1205 1206 static void bnxt_re_start(void *handle) 1207 { 1208 rtnl_unlock(); 1209 mutex_lock(&bnxt_re_mutex); 1210 if (bnxt_re_handle_start((struct auxiliary_device *)handle)) 1211 pr_err("Failed to start RoCE device"); 1212 mutex_unlock(&bnxt_re_mutex); 1213 /* Take rtnl_lock before return, bnxt_re_start is called with rtnl_lock */ 1214 rtnl_lock(); 1215 return; 1216 } 1217 1218 static void bnxt_re_shutdown(void *p) 1219 { 1220 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(p); 1221 struct bnxt_re_dev *rdev; 1222 1223 if (!en_info) { 1224 pr_err("Shutdown, bad en_info\n"); 1225 return; 1226 } 1227 rtnl_unlock(); 1228 mutex_lock(&bnxt_re_mutex); 1229 rdev = en_info->rdev; 1230 if (!rdev || !bnxt_re_is_rdev_valid(rdev)) 1231 goto exit; 1232 1233 /* rtnl_lock held by L2 before coming here */ 1234 bnxt_re_stopqps_and_ib_uninit(rdev); 1235 bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, rdev->adev); 1236 exit: 1237 mutex_unlock(&bnxt_re_mutex); 1238 rtnl_lock(); 1239 return; 1240 } 1241 1242 static void bnxt_re_stop_irq(void *handle) 1243 { 1244 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); 1245 struct bnxt_qplib_rcfw *rcfw = NULL; 1246 struct bnxt_re_dev *rdev; 1247 struct bnxt_qplib_nq *nq; 1248 int indx; 1249 1250 if (!en_info) { 1251 pr_err("Stop irq, bad en_info\n"); 1252 return; 1253 } 1254 rdev = en_info->rdev; 1255 1256 if (!rdev) 1257 return; 1258 1259 rcfw = &rdev->rcfw; 1260 for (indx = 0; indx < rdev->nqr.max_init; indx++) { 1261 nq = &rdev->nqr.nq[indx]; 1262 mutex_lock(&nq->lock); 1263 bnxt_qplib_nq_stop_irq(nq, false); 1264 mutex_unlock(&nq->lock); 1265 } 1266 1267 if (test_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags)) 1268 bnxt_qplib_rcfw_stop_irq(rcfw, false); 1269 } 1270 1271 static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) 1272 { 1273 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); 1274 struct bnxt_msix_entry *msix_ent = NULL; 1275 struct bnxt_qplib_rcfw *rcfw = NULL; 1276 struct bnxt_re_dev *rdev; 1277 struct bnxt_qplib_nq *nq; 1278 int indx, rc, vec; 1279 1280 if (!en_info) { 1281 pr_err("Start irq, bad en_info\n"); 1282 return; 1283 } 1284 rdev = en_info->rdev; 1285 if (!rdev) 1286 return; 1287 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 1288 return; 1289 msix_ent = rdev->nqr.msix_entries; 1290 rcfw = &rdev->rcfw; 1291 1292 if (!ent) { 1293 /* Not setting the f/w timeout bit in rcfw. 1294 * During the driver unload the first command 1295 * to f/w will timeout and that will set the 1296 * timeout bit. 1297 */ 1298 dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n"); 1299 return; 1300 } 1301 1302 /* Vectors may change after restart, so update with new vectors 1303 * in device structure. 1304 */ 1305 for (indx = 0; indx < rdev->nqr.num_msix; indx++) 1306 rdev->nqr.msix_entries[indx].vector = ent[indx].vector; 1307 1308 if (test_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags)) { 1309 rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, 1310 false); 1311 if (rc) { 1312 dev_warn(rdev_to_dev(rdev), 1313 "Failed to reinit CREQ\n"); 1314 return; 1315 } 1316 } 1317 for (indx = 0 ; indx < rdev->nqr.max_init; indx++) { 1318 nq = &rdev->nqr.nq[indx]; 1319 vec = indx + 1; 1320 rc = bnxt_qplib_nq_start_irq(nq, indx, msix_ent[vec].vector, 1321 false); 1322 if (rc) { 1323 dev_warn(rdev_to_dev(rdev), 1324 "Failed to reinit NQ index %d\n", indx); 1325 return; 1326 } 1327 } 1328 } 1329 1330 /* 1331 * Except for ulp_async_notifier, the remaining ulp_ops 1332 * below are called with rtnl_lock held 1333 */ 1334 static struct bnxt_ulp_ops bnxt_re_ulp_ops = { 1335 .ulp_async_notifier = bnxt_re_async_notifier, 1336 .ulp_stop = bnxt_re_stop, 1337 .ulp_start = bnxt_re_start, 1338 .ulp_shutdown = bnxt_re_shutdown, 1339 .ulp_irq_stop = bnxt_re_stop_irq, 1340 .ulp_irq_restart = bnxt_re_start_irq, 1341 }; 1342 1343 static inline const char *bnxt_re_netevent(unsigned long event) 1344 { 1345 BNXT_RE_NETDEV_EVENT(event, NETDEV_UP); 1346 BNXT_RE_NETDEV_EVENT(event, NETDEV_DOWN); 1347 BNXT_RE_NETDEV_EVENT(event, NETDEV_CHANGE); 1348 BNXT_RE_NETDEV_EVENT(event, NETDEV_REGISTER); 1349 BNXT_RE_NETDEV_EVENT(event, NETDEV_UNREGISTER); 1350 BNXT_RE_NETDEV_EVENT(event, NETDEV_CHANGEADDR); 1351 return "Unknown"; 1352 } 1353 1354 /* RoCE -> Net driver */ 1355 1356 /* Driver registration routines used to let the networking driver (bnxt_en) 1357 * to know that the RoCE driver is now installed */ 1358 static void bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev) 1359 { 1360 struct bnxt_en_dev *en_dev = rdev->en_dev; 1361 int rc; 1362 1363 rtnl_lock(); 1364 rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev, 1365 BNXT_ROCE_ULP); 1366 rtnl_unlock(); 1367 if (rc) 1368 dev_err(rdev_to_dev(rdev), "netdev %p unregister failed! rc = 0x%x", 1369 rdev->en_dev->net, rc); 1370 1371 clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1372 } 1373 1374 static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) 1375 { 1376 struct bnxt_en_dev *en_dev = rdev->en_dev; 1377 int rc = 0; 1378 1379 rtnl_lock(); 1380 rc = en_dev->en_ops->bnxt_register_device(en_dev, 1381 BNXT_ROCE_ULP, 1382 &bnxt_re_ulp_ops, 1383 rdev->adev); 1384 rtnl_unlock(); 1385 if (rc) { 1386 dev_err(rdev_to_dev(rdev), "netdev %p register failed! rc = 0x%x", 1387 rdev->netdev, rc); 1388 return rc; 1389 } 1390 1391 return rc; 1392 } 1393 1394 static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) 1395 { 1396 struct bnxt_qplib_chip_ctx *cctx; 1397 struct bnxt_en_dev *en_dev; 1398 struct bnxt_qplib_res *res; 1399 u32 l2db_len = 0; 1400 u32 offset = 0; 1401 u32 barlen; 1402 int rc; 1403 1404 res = &rdev->qplib_res; 1405 en_dev = rdev->en_dev; 1406 cctx = rdev->chip_ctx; 1407 1408 /* Issue qcfg */ 1409 rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset); 1410 if (rc) 1411 dev_info(rdev_to_dev(rdev), 1412 "Couldn't get DB bar size, Low latency framework is disabled\n"); 1413 /* set register offsets for both UC and WC */ 1414 if (_is_chip_p7(cctx)) 1415 res->dpi_tbl.ucreg.offset = offset; 1416 else 1417 res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : 1418 BNXT_QPLIB_DBR_PF_DB_OFFSET; 1419 res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; 1420 1421 /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size 1422 * is equal to the DB-Bar actual size. This indicates that L2 1423 * is mapping entire bar as UC-. RoCE driver can't enable WC mapping 1424 * in such cases and DB-push will be disabled. 1425 */ 1426 barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION); 1427 if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) { 1428 res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; 1429 dev_info(rdev_to_dev(rdev), 1430 "Low latency framework is enabled\n"); 1431 } 1432 1433 return; 1434 } 1435 1436 static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) 1437 { 1438 struct bnxt_qplib_chip_ctx *cctx; 1439 struct bnxt_en_dev *en_dev; 1440 1441 en_dev = rdev->en_dev; 1442 cctx = rdev->chip_ctx; 1443 cctx->modes.wqe_mode = _is_chip_gen_p5_p7(rdev->chip_ctx) ? 1444 mode : BNXT_QPLIB_WQE_MODE_STATIC; 1445 cctx->modes.te_bypass = false; 1446 if (bnxt_re_hwrm_qcaps(rdev)) 1447 dev_err(rdev_to_dev(rdev), 1448 "Failed to query hwrm qcaps\n"); 1449 /* 1450 * TODO: Need a better mechanism for spreading of the 1451 * 512 extended PPP pages in the presence of VF and 1452 * NPAR, until then not enabling push 1453 */ 1454 if (_is_chip_p7(rdev->chip_ctx) && cctx->modes.db_push) { 1455 if (rdev->is_virtfn || BNXT_EN_NPAR(en_dev)) 1456 cctx->modes.db_push = false; 1457 } 1458 1459 rdev->roce_mode = en_dev->flags & BNXT_EN_FLAG_ROCE_CAP; 1460 dev_dbg(rdev_to_dev(rdev), 1461 "RoCE is supported on the device - caps:0x%x", 1462 rdev->roce_mode); 1463 if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) 1464 rdev->roce_mode = BNXT_RE_FLAG_ROCEV2_CAP; 1465 cctx->hw_stats_size = en_dev->hw_ring_stats_size; 1466 } 1467 1468 static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) 1469 { 1470 struct bnxt_qplib_chip_ctx *chip_ctx; 1471 struct bnxt_qplib_res *res; 1472 1473 if (!rdev->chip_ctx) 1474 return; 1475 1476 res = &rdev->qplib_res; 1477 bnxt_qplib_unmap_db_bar(res); 1478 1479 kfree(res->hctx); 1480 res->rcfw = NULL; 1481 kfree(rdev->dev_attr); 1482 rdev->dev_attr = NULL; 1483 1484 chip_ctx = rdev->chip_ctx; 1485 rdev->chip_ctx = NULL; 1486 res->cctx = NULL; 1487 res->hctx = NULL; 1488 res->pdev = NULL; 1489 res->netdev = NULL; 1490 kfree(chip_ctx); 1491 } 1492 1493 static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) 1494 { 1495 struct bnxt_qplib_chip_ctx *chip_ctx; 1496 struct bnxt_en_dev *en_dev; 1497 int rc; 1498 1499 en_dev = rdev->en_dev; 1500 /* Supply pci device to qplib */ 1501 rdev->qplib_res.pdev = en_dev->pdev; 1502 rdev->qplib_res.netdev = rdev->netdev; 1503 rdev->qplib_res.en_dev = en_dev; 1504 1505 chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL); 1506 if (!chip_ctx) 1507 return -ENOMEM; 1508 rdev->chip_ctx = chip_ctx; 1509 rdev->qplib_res.cctx = chip_ctx; 1510 rc = bnxt_re_query_hwrm_intf_version(rdev); 1511 if (rc) 1512 goto fail; 1513 rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL); 1514 if (!rdev->dev_attr) { 1515 rc = -ENOMEM; 1516 goto fail; 1517 } 1518 rdev->qplib_res.dattr = rdev->dev_attr; 1519 rdev->qplib_res.rcfw = &rdev->rcfw; 1520 rdev->qplib_res.is_vf = rdev->is_virtfn; 1521 1522 rdev->qplib_res.hctx = kzalloc(sizeof(*rdev->qplib_res.hctx), 1523 GFP_KERNEL); 1524 if (!rdev->qplib_res.hctx) { 1525 rc = -ENOMEM; 1526 goto fail; 1527 } 1528 bnxt_re_set_drv_mode(rdev, wqe_mode); 1529 1530 bnxt_re_set_db_offset(rdev); 1531 rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); 1532 if (rc) 1533 goto fail; 1534 1535 rc = bnxt_qplib_enable_atomic_ops_to_root(en_dev->pdev); 1536 if (rc) 1537 dev_dbg(rdev_to_dev(rdev), 1538 "platform doesn't support global atomics"); 1539 1540 return 0; 1541 fail: 1542 kfree(rdev->chip_ctx); 1543 rdev->chip_ctx = NULL; 1544 1545 kfree(rdev->dev_attr); 1546 rdev->dev_attr = NULL; 1547 1548 kfree(rdev->qplib_res.hctx); 1549 rdev->qplib_res.hctx = NULL; 1550 return rc; 1551 } 1552 1553 static u16 bnxt_re_get_rtype(struct bnxt_re_dev *rdev) { 1554 return _is_chip_gen_p5_p7(rdev->chip_ctx) ? 1555 HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ : 1556 HWRM_RING_ALLOC_INPUT_RING_TYPE_ROCE_CMPL; 1557 } 1558 1559 static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id) 1560 { 1561 int rc = -EINVAL; 1562 struct hwrm_ring_free_input req = {0}; 1563 struct hwrm_ring_free_output resp; 1564 struct bnxt_en_dev *en_dev = rdev->en_dev; 1565 struct bnxt_fw_msg fw_msg; 1566 1567 if (!en_dev) 1568 return rc; 1569 1570 /* To avoid unnecessary error messages during recovery. 1571 * HW is anyway in error state. So dont send down the command */ 1572 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 1573 return 0; 1574 1575 /* allocation had failed, no need to issue hwrm */ 1576 if (fw_ring_id == 0xffff) 1577 return 0; 1578 1579 memset(&fw_msg, 0, sizeof(fw_msg)); 1580 1581 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1); 1582 req.ring_type = bnxt_re_get_rtype(rdev); 1583 req.ring_id = cpu_to_le16(fw_ring_id); 1584 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1585 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 1586 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1587 if (rc) { 1588 dev_err(rdev_to_dev(rdev), 1589 "Failed to free HW ring with rc = 0x%x", rc); 1590 return rc; 1591 } 1592 dev_dbg(rdev_to_dev(rdev), "HW ring freed with id = 0x%x\n", 1593 fw_ring_id); 1594 1595 return rc; 1596 } 1597 1598 static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, 1599 struct bnxt_re_ring_attr *ring_attr, 1600 u16 *fw_ring_id) 1601 { 1602 int rc = -EINVAL; 1603 struct hwrm_ring_alloc_input req = {0}; 1604 struct hwrm_ring_alloc_output resp; 1605 struct bnxt_en_dev *en_dev = rdev->en_dev; 1606 struct bnxt_fw_msg fw_msg; 1607 1608 if (!en_dev) 1609 return rc; 1610 1611 memset(&fw_msg, 0, sizeof(fw_msg)); 1612 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1); 1613 req.flags = cpu_to_le16(ring_attr->flags); 1614 req.enables = 0; 1615 req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]); 1616 if (ring_attr->pages > 1) { 1617 /* Page size is in log2 units */ 1618 req.page_size = BNXT_PAGE_SHIFT; 1619 req.page_tbl_depth = 1; 1620 } else { 1621 req.page_size = 4; 1622 req.page_tbl_depth = 0; 1623 } 1624 1625 req.fbo = 0; 1626 /* Association of ring index with doorbell index and MSIX number */ 1627 req.logical_id = cpu_to_le16(ring_attr->lrid); 1628 req.length = cpu_to_le32(ring_attr->depth + 1); 1629 req.ring_type = ring_attr->type; 1630 req.int_mode = ring_attr->mode; 1631 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1632 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 1633 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1634 if (rc) { 1635 dev_err(rdev_to_dev(rdev), 1636 "Failed to allocate HW ring with rc = 0x%x", rc); 1637 return rc; 1638 } 1639 *fw_ring_id = le16_to_cpu(resp.ring_id); 1640 dev_dbg(rdev_to_dev(rdev), 1641 "HW ring allocated with id = 0x%x at slot 0x%x", 1642 resp.ring_id, ring_attr->lrid); 1643 1644 return rc; 1645 } 1646 1647 static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, 1648 u32 fw_stats_ctx_id, u16 tid) 1649 { 1650 struct bnxt_en_dev *en_dev = rdev->en_dev; 1651 struct hwrm_stat_ctx_free_input req = {0}; 1652 struct hwrm_stat_ctx_free_output resp; 1653 struct bnxt_fw_msg fw_msg; 1654 int rc = -EINVAL; 1655 1656 if (!en_dev) 1657 return rc; 1658 1659 /* To avoid unnecessary error messages during recovery. 1660 * HW is anyway in error state. So dont send down the command */ 1661 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 1662 return 0; 1663 memset(&fw_msg, 0, sizeof(fw_msg)); 1664 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, tid); 1665 req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); 1666 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1667 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 1668 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1669 if (rc) { 1670 dev_err(rdev_to_dev(rdev), 1671 "Failed to free HW stats ctx with rc = 0x%x", rc); 1672 return rc; 1673 } 1674 dev_dbg(rdev_to_dev(rdev), 1675 "HW stats ctx freed with id = 0x%x", fw_stats_ctx_id); 1676 1677 return rc; 1678 } 1679 1680 static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, u16 tid) 1681 { 1682 struct hwrm_stat_ctx_alloc_output resp = {}; 1683 struct hwrm_stat_ctx_alloc_input req = {}; 1684 struct bnxt_en_dev *en_dev = rdev->en_dev; 1685 struct bnxt_qplib_stats *stat; 1686 struct bnxt_qplib_ctx *hctx; 1687 struct bnxt_fw_msg fw_msg; 1688 int rc = 0; 1689 1690 hctx = rdev->qplib_res.hctx; 1691 stat = (tid == 0xffff) ? &hctx->stats : &hctx->stats2; 1692 stat->fw_id = INVALID_STATS_CTX_ID; 1693 1694 if (!en_dev) 1695 return -EINVAL; 1696 1697 memset(&fw_msg, 0, sizeof(fw_msg)); 1698 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 1699 HWRM_STAT_CTX_ALLOC, -1, tid); 1700 req.update_period_ms = cpu_to_le32(1000); 1701 req.stats_dma_length = rdev->chip_ctx->hw_stats_size; 1702 req.stats_dma_addr = cpu_to_le64(stat->dma_map); 1703 req.stat_ctx_flags = HWRM_STAT_CTX_ALLOC_INPUT_STAT_CTX_FLAGS_ROCE; 1704 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1705 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 1706 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1707 if (rc) { 1708 dev_err(rdev_to_dev(rdev), 1709 "Failed to allocate HW stats ctx, rc = 0x%x", rc); 1710 return rc; 1711 } 1712 stat->fw_id = le32_to_cpu(resp.stat_ctx_id); 1713 dev_dbg(rdev_to_dev(rdev), "HW stats ctx allocated with id = 0x%x", 1714 stat->fw_id); 1715 1716 return rc; 1717 } 1718 1719 static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev) 1720 { 1721 const struct bnxt_en_ops *en_ops; 1722 1723 if (rdev->is_virtfn || 1724 test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 1725 return; 1726 1727 memset(rdev->event_bitmap, 0, sizeof(rdev->event_bitmap)); 1728 en_ops = rdev->en_dev->en_ops; 1729 if (en_ops->bnxt_register_fw_async_events 1730 (rdev->en_dev, BNXT_ROCE_ULP, 1731 (unsigned long *)rdev->event_bitmap, 1732 HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE)) 1733 dev_err(rdev_to_dev(rdev), 1734 "Failed to unregister async event"); 1735 } 1736 1737 static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev) 1738 { 1739 const struct bnxt_en_ops *en_ops; 1740 1741 if (rdev->is_virtfn) 1742 return; 1743 1744 rdev->event_bitmap[0] |= 1745 BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE) | 1746 BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY); 1747 1748 rdev->event_bitmap[2] |= 1749 BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT - 64); 1750 rdev->event_bitmap[2] |= 1751 BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD - 64) | 1752 BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE - 64); 1753 en_ops = rdev->en_dev->en_ops; 1754 if (en_ops->bnxt_register_fw_async_events 1755 (rdev->en_dev, BNXT_ROCE_ULP, 1756 (unsigned long *)rdev->event_bitmap, 1757 HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE)) 1758 dev_err(rdev_to_dev(rdev), 1759 "Failed to reg Async event"); 1760 } 1761 1762 static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) 1763 { 1764 struct bnxt_en_dev *en_dev = rdev->en_dev; 1765 struct hwrm_ver_get_output resp = {0}; 1766 struct hwrm_ver_get_input req = {0}; 1767 struct bnxt_qplib_chip_ctx *cctx; 1768 struct bnxt_fw_msg fw_msg; 1769 int rc = 0; 1770 1771 memset(&fw_msg, 0, sizeof(fw_msg)); 1772 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 1773 HWRM_VER_GET, -1, -1); 1774 req.hwrm_intf_maj = HWRM_VERSION_MAJOR; 1775 req.hwrm_intf_min = HWRM_VERSION_MINOR; 1776 req.hwrm_intf_upd = HWRM_VERSION_UPDATE; 1777 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1778 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 1779 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1780 if (rc) { 1781 dev_err(rdev_to_dev(rdev), 1782 "Failed to query HW version, rc = 0x%x", rc); 1783 return rc; 1784 } 1785 cctx = rdev->chip_ctx; 1786 cctx->hwrm_intf_ver = (u64) le16_to_cpu(resp.hwrm_intf_major) << 48 | 1787 (u64) le16_to_cpu(resp.hwrm_intf_minor) << 32 | 1788 (u64) le16_to_cpu(resp.hwrm_intf_build) << 16 | 1789 le16_to_cpu(resp.hwrm_intf_patch); 1790 1791 cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout); 1792 1793 if (!cctx->hwrm_cmd_max_timeout) 1794 cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT; 1795 1796 cctx->chip_num = le16_to_cpu(resp.chip_num); 1797 cctx->chip_rev = resp.chip_rev; 1798 cctx->chip_metal = resp.chip_metal; 1799 return 0; 1800 } 1801 1802 /* Query device config using common hwrm */ 1803 static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, 1804 u32 *offset) 1805 { 1806 struct bnxt_en_dev *en_dev = rdev->en_dev; 1807 struct hwrm_func_qcfg_output resp = {0}; 1808 struct hwrm_func_qcfg_input req = {0}; 1809 struct bnxt_fw_msg fw_msg; 1810 int rc; 1811 1812 memset(&fw_msg, 0, sizeof(fw_msg)); 1813 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 1814 HWRM_FUNC_QCFG, -1, -1); 1815 req.fid = cpu_to_le16(0xffff); 1816 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1817 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 1818 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1819 if (rc) { 1820 dev_err(rdev_to_dev(rdev), 1821 "Failed to query config, rc = %#x", rc); 1822 return rc; 1823 } 1824 1825 *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024); 1826 *offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024); 1827 return 0; 1828 } 1829 1830 /* Query function capabilities using common hwrm */ 1831 int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) 1832 { 1833 struct bnxt_en_dev *en_dev = rdev->en_dev; 1834 struct hwrm_func_qcaps_output resp = {0}; 1835 struct hwrm_func_qcaps_input req = {0}; 1836 struct bnxt_qplib_chip_ctx *cctx; 1837 struct bnxt_fw_msg fw_msg; 1838 u8 push_enable = false; 1839 int rc; 1840 1841 cctx = rdev->chip_ctx; 1842 memset(&fw_msg, 0, sizeof(fw_msg)); 1843 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 1844 HWRM_FUNC_QCAPS, -1, -1); 1845 req.fid = cpu_to_le16(0xffff); 1846 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1847 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 1848 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1849 if (rc) { 1850 dev_err(rdev_to_dev(rdev), 1851 "Failed to query capabilities, rc = %#x", rc); 1852 return rc; 1853 } 1854 if (_is_chip_p7(rdev->chip_ctx)) 1855 push_enable = 1856 (resp.flags_ext & 1857 HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED) ? 1858 true : false; 1859 else 1860 push_enable = 1861 (resp.flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_WCB_PUSH_MODE) ? 1862 true : false; 1863 cctx->modes.db_push = push_enable; 1864 1865 cctx->modes.dbr_pacing = 1866 resp.flags_ext & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_DBR_PACING_SUPPORTED ? 1867 true : false; 1868 cctx->modes.dbr_pacing_ext = 1869 resp.flags_ext2 & 1870 HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ? 1871 true : false; 1872 cctx->modes.dbr_drop_recov = 1873 (resp.flags_ext2 & 1874 HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_SW_DBR_DROP_RECOVERY_SUPPORTED) ? 1875 true : false; 1876 cctx->modes.dbr_pacing_v0 = 1877 (resp.flags_ext2 & 1878 HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED) ? 1879 true : false; 1880 dev_dbg(rdev_to_dev(rdev), 1881 "%s: cctx->modes.dbr_pacing = %d cctx->modes.dbr_pacing_ext = %d, dbr_drop_recov %d\n", 1882 __func__, cctx->modes.dbr_pacing, cctx->modes.dbr_pacing_ext, cctx->modes.dbr_drop_recov); 1883 1884 return 0; 1885 } 1886 1887 static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev) 1888 { 1889 struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; 1890 struct hwrm_func_dbr_pacing_qcfg_output resp = {0}; 1891 struct hwrm_func_dbr_pacing_qcfg_input req = {0}; 1892 struct bnxt_en_dev *en_dev = rdev->en_dev; 1893 struct bnxt_qplib_chip_ctx *cctx; 1894 struct bnxt_fw_msg fw_msg; 1895 u32 primary_nq_id; 1896 int rc; 1897 1898 cctx = rdev->chip_ctx; 1899 memset(&fw_msg, 0, sizeof(fw_msg)); 1900 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 1901 HWRM_FUNC_DBR_PACING_QCFG, -1, -1); 1902 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1903 sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); 1904 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1905 if (rc) { 1906 dev_dbg(rdev_to_dev(rdev), 1907 "Failed to query dbr pacing config, rc = %#x", rc); 1908 return rc; 1909 } 1910 1911 primary_nq_id = le32_to_cpu(resp.primary_nq_id); 1912 if (primary_nq_id == 0xffffffff && 1913 !bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) { 1914 dev_err(rdev_to_dev(rdev), "%s:%d Invoke bnxt_qplib_dbr_pacing_set_primary_pf with 1\n", 1915 __func__, __LINE__); 1916 bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 1); 1917 } 1918 1919 if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) { 1920 struct bnxt_qplib_nq *nq; 1921 1922 nq = &rdev->nqr.nq[0]; 1923 /* Reset the primary capability */ 1924 if (nq->ring_id != primary_nq_id) 1925 bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 0); 1926 } 1927 1928 if ((resp.dbr_stat_db_fifo_reg & 1929 HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) == 1930 HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC) 1931 cctx->dbr_stat_db_fifo = 1932 resp.dbr_stat_db_fifo_reg & 1933 ~HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK; 1934 1935 if ((resp.dbr_throttling_aeq_arm_reg & 1936 HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_MASK) 1937 == HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_GRC) { 1938 cctx->dbr_aeq_arm_reg = resp.dbr_throttling_aeq_arm_reg & 1939 ~HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK; 1940 cctx->dbr_throttling_reg = cctx->dbr_aeq_arm_reg - 4; 1941 } 1942 pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth); 1943 if (!pacing_data->fifo_max_depth) 1944 pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx); 1945 pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask); 1946 pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift; 1947 dev_dbg(rdev_to_dev(rdev), 1948 "%s: nq:0x%x primary_pf:%d db_fifo:0x%x aeq_arm:0x%x i" 1949 "fifo_max_depth 0x%x , resp.dbr_stat_db_max_fifo_depth 0x%x);\n", 1950 __func__, resp.primary_nq_id, cctx->modes.dbr_primary_pf, 1951 cctx->dbr_stat_db_fifo, cctx->dbr_aeq_arm_reg, 1952 pacing_data->fifo_max_depth, 1953 le32_to_cpu(resp.dbr_stat_db_max_fifo_depth)); 1954 return 0; 1955 } 1956 1957 static int bnxt_re_hwrm_dbr_pacing_cfg(struct bnxt_re_dev *rdev, bool enable) 1958 { 1959 struct hwrm_func_dbr_pacing_cfg_output resp = {0}; 1960 struct hwrm_func_dbr_pacing_cfg_input req = {0}; 1961 struct bnxt_en_dev *en_dev = rdev->en_dev; 1962 struct bnxt_fw_msg fw_msg; 1963 int rc; 1964 1965 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 1966 return 0; 1967 1968 memset(&fw_msg, 0, sizeof(fw_msg)); 1969 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 1970 HWRM_FUNC_DBR_PACING_CFG, -1, -1); 1971 if (enable) { 1972 req.flags = HWRM_FUNC_DBR_PACING_CFG_INPUT_FLAGS_DBR_NQ_EVENT_ENABLE; 1973 req.enables = 1974 cpu_to_le32(HWRM_FUNC_DBR_PACING_CFG_INPUT_ENABLES_PRIMARY_NQ_ID_VALID | 1975 HWRM_FUNC_DBR_PACING_CFG_INPUT_ENABLES_PACING_THRESHOLD_VALID); 1976 } else { 1977 req.flags = HWRM_FUNC_DBR_PACING_CFG_INPUT_FLAGS_DBR_NQ_EVENT_DISABLE; 1978 } 1979 req.primary_nq_id = cpu_to_le32(rdev->dbq_nq_id); 1980 req.pacing_threshold = cpu_to_le32(rdev->dbq_watermark); 1981 dev_dbg(rdev_to_dev(rdev), "%s: nq_id = 0x%x pacing_threshold = 0x%x", 1982 __func__, req.primary_nq_id, req.pacing_threshold); 1983 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 1984 sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); 1985 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 1986 if (rc) { 1987 dev_dbg(rdev_to_dev(rdev), 1988 "Failed to set dbr pacing config, rc = %#x", rc); 1989 return rc; 1990 } 1991 return 0; 1992 } 1993 1994 /* Net -> RoCE driver */ 1995 1996 /* Device */ 1997 struct bnxt_re_dev *bnxt_re_from_netdev(struct ifnet *netdev) 1998 { 1999 struct bnxt_re_dev *rdev; 2000 2001 rcu_read_lock(); 2002 list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) { 2003 if (rdev->netdev == netdev) { 2004 rcu_read_unlock(); 2005 dev_dbg(rdev_to_dev(rdev), 2006 "netdev (%p) found, ref_count = 0x%x", 2007 netdev, atomic_read(&rdev->ref_count)); 2008 return rdev; 2009 } 2010 } 2011 rcu_read_unlock(); 2012 return NULL; 2013 } 2014 2015 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 2016 char *buf) 2017 { 2018 struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); 2019 2020 return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); 2021 } 2022 2023 2024 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 2025 char *buf) 2026 { 2027 struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); 2028 2029 return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); 2030 } 2031 2032 static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL); 2033 static DEVICE_ATTR(hca_type, 0444, show_hca, NULL); 2034 static struct device_attribute *bnxt_re_attributes[] = { 2035 &dev_attr_hw_rev, 2036 &dev_attr_hca_type 2037 }; 2038 2039 int ib_register_device_compat(struct bnxt_re_dev *rdev) 2040 { 2041 struct ib_device *ibdev = &rdev->ibdev; 2042 char name[IB_DEVICE_NAME_MAX]; 2043 2044 memset(name, 0, IB_DEVICE_NAME_MAX); 2045 strlcpy(name, "bnxt_re%d", IB_DEVICE_NAME_MAX); 2046 2047 strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); 2048 2049 return ib_register_device(ibdev, NULL); 2050 } 2051 2052 static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) 2053 { 2054 struct ib_device *ibdev = &rdev->ibdev; 2055 int ret = 0; 2056 2057 /* ib device init */ 2058 ibdev->owner = THIS_MODULE; 2059 ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION; 2060 ibdev->node_type = RDMA_NODE_IB_CA; 2061 strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", 2062 strlen(BNXT_RE_DESC) + 5); 2063 ibdev->phys_port_cnt = 1; 2064 2065 bnxt_qplib_get_guid(rdev->dev_addr, (u8 *)&ibdev->node_guid); 2066 2067 /* Data path irqs is one less than the max msix vectors */ 2068 ibdev->num_comp_vectors = rdev->nqr.num_msix - 1; 2069 bnxt_re_set_dma_device(ibdev, rdev); 2070 ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; 2071 2072 /* User space */ 2073 ibdev->uverbs_cmd_mask = 2074 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2075 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2076 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2077 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2078 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2079 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2080 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2081 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2082 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2083 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2084 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2085 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2086 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2087 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2088 (1ull << IB_USER_VERBS_CMD_REREG_MR) | 2089 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2090 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2091 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2092 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2093 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2094 (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | 2095 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) | 2096 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 2097 (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | 2098 (1ull << IB_USER_VERBS_CMD_QUERY_AH) | 2099 (1ull << IB_USER_VERBS_CMD_DESTROY_AH); 2100 2101 ibdev->uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); 2102 ibdev->uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ); 2103 2104 #define bnxt_re_ib_ah bnxt_re_ah 2105 #define bnxt_re_ib_cq bnxt_re_cq 2106 #define bnxt_re_ib_pd bnxt_re_pd 2107 #define bnxt_re_ib_srq bnxt_re_srq 2108 #define bnxt_re_ib_ucontext bnxt_re_ucontext 2109 INIT_IB_DEVICE_OPS(&ibdev->ops, bnxt_re, BNXT_RE); 2110 2111 ibdev->query_device = bnxt_re_query_device; 2112 ibdev->modify_device = bnxt_re_modify_device; 2113 ibdev->query_port = bnxt_re_query_port; 2114 ibdev->modify_port = bnxt_re_modify_port; 2115 ibdev->get_port_immutable = bnxt_re_get_port_immutable; 2116 ibdev->query_pkey = bnxt_re_query_pkey; 2117 ibdev->query_gid = bnxt_re_query_gid; 2118 ibdev->get_netdev = bnxt_re_get_netdev; 2119 ibdev->add_gid = bnxt_re_add_gid; 2120 ibdev->del_gid = bnxt_re_del_gid; 2121 ibdev->get_link_layer = bnxt_re_get_link_layer; 2122 ibdev->alloc_pd = bnxt_re_alloc_pd; 2123 ibdev->dealloc_pd = bnxt_re_dealloc_pd; 2124 ibdev->create_ah = bnxt_re_create_ah; 2125 ibdev->modify_ah = bnxt_re_modify_ah; 2126 ibdev->query_ah = bnxt_re_query_ah; 2127 ibdev->destroy_ah = bnxt_re_destroy_ah; 2128 ibdev->create_srq = bnxt_re_create_srq; 2129 ibdev->modify_srq = bnxt_re_modify_srq; 2130 ibdev->query_srq = bnxt_re_query_srq; 2131 ibdev->destroy_srq = bnxt_re_destroy_srq; 2132 ibdev->post_srq_recv = bnxt_re_post_srq_recv; 2133 ibdev->create_qp = bnxt_re_create_qp; 2134 ibdev->modify_qp = bnxt_re_modify_qp; 2135 ibdev->query_qp = bnxt_re_query_qp; 2136 ibdev->destroy_qp = bnxt_re_destroy_qp; 2137 ibdev->post_send = bnxt_re_post_send; 2138 ibdev->post_recv = bnxt_re_post_recv; 2139 ibdev->create_cq = bnxt_re_create_cq; 2140 ibdev->modify_cq = bnxt_re_modify_cq; 2141 ibdev->destroy_cq = bnxt_re_destroy_cq; 2142 ibdev->resize_cq = bnxt_re_resize_cq; 2143 ibdev->poll_cq = bnxt_re_poll_cq; 2144 ibdev->req_notify_cq = bnxt_re_req_notify_cq; 2145 ibdev->get_dma_mr = bnxt_re_get_dma_mr; 2146 ibdev->get_hw_stats = bnxt_re_get_hw_stats; 2147 ibdev->alloc_hw_stats = bnxt_re_alloc_hw_port_stats; 2148 ibdev->dereg_mr = bnxt_re_dereg_mr; 2149 ibdev->alloc_mr = bnxt_re_alloc_mr; 2150 ibdev->map_mr_sg = bnxt_re_map_mr_sg; 2151 ibdev->alloc_mw = bnxt_re_alloc_mw; 2152 ibdev->dealloc_mw = bnxt_re_dealloc_mw; 2153 ibdev->reg_user_mr = bnxt_re_reg_user_mr; 2154 ibdev->rereg_user_mr = bnxt_re_rereg_user_mr; 2155 ibdev->disassociate_ucontext = bnxt_re_disassociate_ucntx; 2156 ibdev->alloc_ucontext = bnxt_re_alloc_ucontext; 2157 ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext; 2158 ibdev->mmap = bnxt_re_mmap; 2159 ibdev->process_mad = bnxt_re_process_mad; 2160 2161 ret = ib_register_device_compat(rdev); 2162 return ret; 2163 } 2164 2165 static void bnxt_re_dev_dealloc(struct bnxt_re_dev *rdev) 2166 { 2167 int i = BNXT_RE_REF_WAIT_COUNT; 2168 2169 dev_dbg(rdev_to_dev(rdev), "%s:Remove the device %p\n", __func__, rdev); 2170 /* Wait for rdev refcount to come down */ 2171 while ((atomic_read(&rdev->ref_count) > 1) && i--) 2172 msleep(100); 2173 2174 if (atomic_read(&rdev->ref_count) > 1) 2175 dev_err(rdev_to_dev(rdev), 2176 "Failed waiting for ref count to deplete %d", 2177 atomic_read(&rdev->ref_count)); 2178 2179 atomic_set(&rdev->ref_count, 0); 2180 if_rele(rdev->netdev); 2181 rdev->netdev = NULL; 2182 synchronize_rcu(); 2183 2184 kfree(rdev->gid_map); 2185 kfree(rdev->dbg_stats); 2186 ib_dealloc_device(&rdev->ibdev); 2187 } 2188 2189 static struct bnxt_re_dev *bnxt_re_dev_alloc(struct ifnet *netdev, 2190 struct bnxt_en_dev *en_dev) 2191 { 2192 struct bnxt_re_dev *rdev; 2193 u32 count; 2194 2195 /* Allocate bnxt_re_dev instance here */ 2196 rdev = (struct bnxt_re_dev *)compat_ib_alloc_device(sizeof(*rdev)); 2197 if (!rdev) { 2198 pr_err("%s: bnxt_re_dev allocation failure!", 2199 ROCE_DRV_MODULE_NAME); 2200 return NULL; 2201 } 2202 /* Default values */ 2203 atomic_set(&rdev->ref_count, 0); 2204 rdev->netdev = netdev; 2205 dev_hold(rdev->netdev); 2206 rdev->en_dev = en_dev; 2207 rdev->id = rdev->en_dev->pdev->devfn; 2208 INIT_LIST_HEAD(&rdev->qp_list); 2209 mutex_init(&rdev->qp_lock); 2210 mutex_init(&rdev->cc_lock); 2211 mutex_init(&rdev->dbq_lock); 2212 bnxt_re_clear_rsors_stat(&rdev->stats.rsors); 2213 rdev->cosq[0] = rdev->cosq[1] = 0xFFFF; 2214 rdev->min_tx_depth = 1; 2215 rdev->stats.stats_query_sec = 1; 2216 /* Disable priority vlan as the default mode is DSCP based PFC */ 2217 rdev->cc_param.disable_prio_vlan_tx = 1; 2218 2219 /* Initialize worker for DBR Pacing */ 2220 INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check); 2221 INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp); 2222 rdev->gid_map = kzalloc(sizeof(*(rdev->gid_map)) * 2223 BNXT_RE_MAX_SGID_ENTRIES, 2224 GFP_KERNEL); 2225 if (!rdev->gid_map) { 2226 ib_dealloc_device(&rdev->ibdev); 2227 return NULL; 2228 } 2229 for(count = 0; count < BNXT_RE_MAX_SGID_ENTRIES; count++) 2230 rdev->gid_map[count] = -1; 2231 2232 rdev->dbg_stats = kzalloc(sizeof(*rdev->dbg_stats), GFP_KERNEL); 2233 if (!rdev->dbg_stats) { 2234 ib_dealloc_device(&rdev->ibdev); 2235 return NULL; 2236 } 2237 2238 return rdev; 2239 } 2240 2241 static int bnxt_re_handle_unaffi_async_event( 2242 struct creq_func_event *unaffi_async) 2243 { 2244 switch (unaffi_async->event) { 2245 case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR: 2246 case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR: 2247 case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR: 2248 case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR: 2249 case CREQ_FUNC_EVENT_EVENT_CQ_ERROR: 2250 case CREQ_FUNC_EVENT_EVENT_TQM_ERROR: 2251 case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR: 2252 case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR: 2253 case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR: 2254 case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR: 2255 case CREQ_FUNC_EVENT_EVENT_TIM_ERROR: 2256 break; 2257 default: 2258 return -EINVAL; 2259 } 2260 return 0; 2261 } 2262 2263 static int bnxt_re_handle_qp_async_event(void *qp_event, struct bnxt_re_qp *qp) 2264 { 2265 struct creq_qp_error_notification *err_event; 2266 struct ib_event event; 2267 unsigned int flags; 2268 2269 if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && 2270 !qp->qplib_qp.is_user) { 2271 flags = bnxt_re_lock_cqs(qp); 2272 bnxt_qplib_add_flush_qp(&qp->qplib_qp); 2273 bnxt_re_unlock_cqs(qp, flags); 2274 } 2275 memset(&event, 0, sizeof(event)); 2276 event.device = &qp->rdev->ibdev; 2277 event.element.qp = &qp->ib_qp; 2278 event.event = IB_EVENT_QP_FATAL; 2279 2280 err_event = qp_event; 2281 switch(err_event->res_err_state_reason) { 2282 case CFCQ_RES_ERR_STATE_REASON_RES_EXCEED_MAX: 2283 case CFCQ_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH: 2284 case CFCQ_RES_ERR_STATE_REASON_RES_OPCODE_ERROR: 2285 case CFCQ_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT: 2286 case CFCQ_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY: 2287 case CFCQ_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR: 2288 case CFCQ_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION: 2289 case CFCQ_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR: 2290 case CFCQ_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY: 2291 case CFCQ_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR: 2292 case CFCQ_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION: 2293 case CFCQ_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR: 2294 case CFCQ_RES_ERR_STATE_REASON_RES_IVALID_DUP_RKEY: 2295 case CFCQ_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC: 2296 event.event = IB_EVENT_QP_ACCESS_ERR; 2297 break; 2298 case CFCQ_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE: 2299 case CFCQ_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR: 2300 case CFCQ_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR: 2301 case CFCQ_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE: 2302 case CFCQ_RES_ERR_STATE_REASON_RES_REM_INVALIDATE: 2303 event.event = IB_EVENT_QP_REQ_ERR; 2304 break; 2305 case CFCQ_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW: 2306 case CFCQ_RES_ERR_STATE_REASON_RES_CMP_ERROR: 2307 case CFCQ_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR: 2308 case CFCQ_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR: 2309 case CFCQ_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR: 2310 case CFCQ_RES_ERR_STATE_REASON_RES_MEMORY_ERROR: 2311 case CFCQ_RES_ERR_STATE_REASON_RES_SRQ_ERROR: 2312 event.event = IB_EVENT_QP_FATAL; 2313 break; 2314 default: 2315 if (qp->qplib_qp.srq) 2316 event.event = IB_EVENT_QP_LAST_WQE_REACHED; 2317 break; 2318 } 2319 2320 if (err_event->res_err_state_reason) 2321 dev_err(rdev_to_dev(qp->rdev), 2322 "%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n", 2323 __func__, qp->qplib_qp.is_user ? "user" : "kernel", 2324 qp->qplib_qp.id, 2325 err_event->sq_cons_idx, 2326 err_event->rq_cons_idx, 2327 err_event->req_slow_path_state, 2328 err_event->req_err_state_reason, 2329 err_event->res_slow_path_state, 2330 err_event->res_err_state_reason); 2331 2332 if (event.device && qp->ib_qp.event_handler) 2333 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); 2334 2335 return 0; 2336 } 2337 2338 static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq) 2339 { 2340 struct creq_cq_error_notification *cqerr; 2341 bool send = false; 2342 2343 cqerr = event; 2344 switch (cqerr->cq_err_reason) { 2345 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR: 2346 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR: 2347 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR: 2348 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR: 2349 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR: 2350 case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR: 2351 send = true; 2352 default: 2353 break; 2354 } 2355 2356 if (send && cq->ibcq.event_handler) { 2357 struct ib_event ibevent = {}; 2358 2359 ibevent.event = IB_EVENT_CQ_ERR; 2360 ibevent.element.cq = &cq->ibcq; 2361 ibevent.device = &cq->rdev->ibdev; 2362 2363 dev_err(rdev_to_dev(cq->rdev), 2364 "%s err reason %d\n", __func__, cqerr->cq_err_reason); 2365 cq->ibcq.event_handler(&ibevent, cq->ibcq.cq_context); 2366 } 2367 2368 cq->qplib_cq.is_cq_err_event = true; 2369 2370 return 0; 2371 } 2372 2373 static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async, 2374 void *obj) 2375 { 2376 struct bnxt_qplib_qp *qplqp; 2377 struct bnxt_qplib_cq *qplcq; 2378 struct bnxt_re_qp *qp; 2379 struct bnxt_re_cq *cq; 2380 int rc = 0; 2381 u8 event; 2382 2383 if (!obj) 2384 return rc; /* QP was already dead, still return success */ 2385 2386 event = affi_async->event; 2387 switch (event) { 2388 case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: 2389 qplqp = obj; 2390 qp = container_of(qplqp, struct bnxt_re_qp, qplib_qp); 2391 rc = bnxt_re_handle_qp_async_event(affi_async, qp); 2392 break; 2393 case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION: 2394 qplcq = obj; 2395 cq = container_of(qplcq, struct bnxt_re_cq, qplib_cq); 2396 rc = bnxt_re_handle_cq_async_error(affi_async, cq); 2397 break; 2398 default: 2399 rc = -EINVAL; 2400 } 2401 2402 return rc; 2403 } 2404 2405 static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw, 2406 void *aeqe, void *obj) 2407 { 2408 struct creq_func_event *unaffi_async; 2409 struct creq_qp_event *affi_async; 2410 u8 type; 2411 int rc; 2412 2413 type = ((struct creq_base *)aeqe)->type; 2414 if (type == CREQ_BASE_TYPE_FUNC_EVENT) { 2415 unaffi_async = aeqe; 2416 rc = bnxt_re_handle_unaffi_async_event(unaffi_async); 2417 } else { 2418 affi_async = aeqe; 2419 rc = bnxt_re_handle_affi_async_event(affi_async, obj); 2420 } 2421 2422 return rc; 2423 } 2424 2425 static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, 2426 struct bnxt_qplib_srq *handle, u8 event) 2427 { 2428 struct bnxt_re_srq *srq = to_bnxt_re(handle, struct bnxt_re_srq, 2429 qplib_srq); 2430 struct ib_event ib_event; 2431 2432 if (srq == NULL) { 2433 pr_err("%s: SRQ is NULL, SRQN not handled", 2434 ROCE_DRV_MODULE_NAME); 2435 return -EINVAL; 2436 } 2437 ib_event.device = &srq->rdev->ibdev; 2438 ib_event.element.srq = &srq->ibsrq; 2439 if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT) 2440 ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED; 2441 else 2442 ib_event.event = IB_EVENT_SRQ_ERR; 2443 2444 if (srq->ibsrq.event_handler) { 2445 /* Lock event_handler? */ 2446 (*srq->ibsrq.event_handler)(&ib_event, 2447 srq->ibsrq.srq_context); 2448 } 2449 return 0; 2450 } 2451 2452 static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, 2453 struct bnxt_qplib_cq *handle) 2454 { 2455 struct bnxt_re_cq *cq = to_bnxt_re(handle, struct bnxt_re_cq, 2456 qplib_cq); 2457 u32 *cq_ptr; 2458 2459 if (cq == NULL) { 2460 pr_err("%s: CQ is NULL, CQN not handled", 2461 ROCE_DRV_MODULE_NAME); 2462 return -EINVAL; 2463 } 2464 /* CQ already in destroy path. Do not handle any more events */ 2465 if (handle->destroyed || !atomic_read(&cq->ibcq.usecnt)) { 2466 if (!handle->destroyed) 2467 dev_dbg(NULL, "%s: CQ being destroyed, CQN not handled", 2468 ROCE_DRV_MODULE_NAME); 2469 return 0; 2470 } 2471 2472 if (cq->ibcq.comp_handler) { 2473 if (cq->uctx_cq_page) { 2474 cq_ptr = (u32 *)cq->uctx_cq_page; 2475 *cq_ptr = cq->qplib_cq.toggle; 2476 } 2477 /* Lock comp_handler? */ 2478 (*cq->ibcq.comp_handler)(&cq->ibcq, cq->ibcq.cq_context); 2479 } 2480 2481 return 0; 2482 } 2483 2484 struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev) 2485 { 2486 int min, indx; 2487 2488 mutex_lock(&rdev->nqr.load_lock); 2489 for (indx = 0, min = 0; indx < (rdev->nqr.num_msix - 1); indx++) { 2490 if (rdev->nqr.nq[min].load > rdev->nqr.nq[indx].load) 2491 min = indx; 2492 } 2493 rdev->nqr.nq[min].load++; 2494 mutex_unlock(&rdev->nqr.load_lock); 2495 2496 return &rdev->nqr.nq[min]; 2497 } 2498 2499 void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq) 2500 { 2501 mutex_lock(&rdev->nqr.load_lock); 2502 nq->load--; 2503 mutex_unlock(&rdev->nqr.load_lock); 2504 } 2505 2506 static bool bnxt_re_check_min_attr(struct bnxt_re_dev *rdev) 2507 { 2508 struct bnxt_qplib_dev_attr *attr; 2509 bool rc = true; 2510 2511 attr = rdev->dev_attr; 2512 2513 if (!attr->max_cq || !attr->max_qp || 2514 !attr->max_sgid || !attr->max_mr) { 2515 dev_err(rdev_to_dev(rdev),"Insufficient RoCE resources"); 2516 dev_dbg(rdev_to_dev(rdev), 2517 "max_cq = %d, max_qp = %d, max_dpi = %d, max_sgid = %d, max_mr = %d", 2518 attr->max_cq, attr->max_qp, attr->max_dpi, 2519 attr->max_sgid, attr->max_mr); 2520 rc = false; 2521 } 2522 return rc; 2523 } 2524 2525 static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp, 2526 u8 port_num, enum ib_event_type event) 2527 { 2528 struct ib_event ib_event; 2529 2530 ib_event.device = ibdev; 2531 if (qp) { 2532 ib_event.element.qp = qp; 2533 ib_event.event = event; 2534 if (qp->event_handler) 2535 qp->event_handler(&ib_event, qp->qp_context); 2536 } else { 2537 ib_event.element.port_num = port_num; 2538 ib_event.event = event; 2539 ib_dispatch_event(&ib_event); 2540 } 2541 2542 dev_dbg(rdev_to_dev(to_bnxt_re_dev(ibdev, ibdev)), 2543 "ibdev %p Event 0x%x port_num 0x%x", ibdev, event, port_num); 2544 } 2545 2546 static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev, 2547 struct bnxt_re_qp *qp) 2548 { 2549 if (rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ALL) 2550 return (qp->ib_qp.qp_type == IB_QPT_GSI) || 2551 (qp == rdev->gsi_ctx.gsi_sqp); 2552 else 2553 return (qp->ib_qp.qp_type == IB_QPT_GSI); 2554 } 2555 2556 static void bnxt_re_stop_all_nonqp1_nonshadow_qps(struct bnxt_re_dev *rdev) 2557 { 2558 struct bnxt_qplib_qp *qpl_qp; 2559 bool dev_detached = false; 2560 struct ib_qp_attr qp_attr; 2561 int num_qps_stopped = 0; 2562 int mask = IB_QP_STATE; 2563 struct bnxt_re_qp *qp; 2564 unsigned long flags; 2565 2566 if (!rdev) 2567 return; 2568 2569 restart: 2570 if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) 2571 dev_detached = true; 2572 2573 qp_attr.qp_state = IB_QPS_ERR; 2574 mutex_lock(&rdev->qp_lock); 2575 list_for_each_entry(qp, &rdev->qp_list, list) { 2576 qpl_qp = &qp->qplib_qp; 2577 if (dev_detached || !bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) { 2578 if (qpl_qp->state != 2579 CMDQ_MODIFY_QP_NEW_STATE_RESET && 2580 qpl_qp->state != 2581 CMDQ_MODIFY_QP_NEW_STATE_ERR) { 2582 if (dev_detached) { 2583 /* 2584 * Cant actually send the command down, 2585 * marking the state for bookkeeping 2586 */ 2587 qpl_qp->state = 2588 CMDQ_MODIFY_QP_NEW_STATE_ERR; 2589 qpl_qp->cur_qp_state = qpl_qp->state; 2590 if (!qpl_qp->is_user) { 2591 /* Add to flush list */ 2592 flags = bnxt_re_lock_cqs(qp); 2593 bnxt_qplib_add_flush_qp(qpl_qp); 2594 bnxt_re_unlock_cqs(qp, flags); 2595 } 2596 } else { 2597 num_qps_stopped++; 2598 bnxt_re_modify_qp(&qp->ib_qp, 2599 &qp_attr, mask, 2600 NULL); 2601 } 2602 2603 bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp, 2604 1, IB_EVENT_QP_FATAL); 2605 /* 2606 * 1. Release qp_lock after a budget to unblock other verb 2607 * requests (like qp_destroy) from stack. 2608 * 2. Traverse through the qp_list freshly as addition / deletion 2609 * might have happened since qp_lock is getting released here. 2610 */ 2611 if (num_qps_stopped % BNXT_RE_STOP_QPS_BUDGET == 0) { 2612 mutex_unlock(&rdev->qp_lock); 2613 goto restart; 2614 } 2615 } 2616 } 2617 } 2618 2619 mutex_unlock(&rdev->qp_lock); 2620 } 2621 2622 static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) 2623 { 2624 struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; 2625 struct bnxt_qplib_gid gid; 2626 u16 gid_idx, index; 2627 int rc = 0; 2628 2629 if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) 2630 return 0; 2631 2632 if (sgid_tbl == NULL) { 2633 dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated"); 2634 return -EINVAL; 2635 } 2636 2637 for (index = 0; index < sgid_tbl->active; index++) { 2638 gid_idx = sgid_tbl->hw_id[index]; 2639 2640 if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, 2641 sizeof(bnxt_qplib_gid_zero))) 2642 continue; 2643 /* Need to modify the VLAN enable setting of non VLAN GID only 2644 * as setting is done for VLAN GID while adding GID 2645 * 2646 * If disable_prio_vlan_tx is enable, then we'll need to remove the 2647 * vlan entry from the sgid_tbl. 2648 */ 2649 if (sgid_tbl->vlan[index] == true) 2650 continue; 2651 2652 memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid)); 2653 2654 rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, 2655 rdev->dev_addr); 2656 } 2657 2658 return rc; 2659 } 2660 2661 static void bnxt_re_clear_cc(struct bnxt_re_dev *rdev) 2662 { 2663 struct bnxt_qplib_cc_param *cc_param = &rdev->cc_param; 2664 2665 if (_is_chip_p7(rdev->chip_ctx)) { 2666 cc_param->mask = CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP; 2667 } else { 2668 cc_param->mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE | 2669 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | 2670 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); 2671 2672 if (!is_qport_service_type_supported(rdev)) 2673 cc_param->mask |= 2674 (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP | 2675 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP | 2676 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP); 2677 } 2678 2679 cc_param->cur_mask = cc_param->mask; 2680 2681 if (bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param)) 2682 dev_err(rdev_to_dev(rdev), "Failed to modify cc\n"); 2683 } 2684 2685 static int bnxt_re_setup_cc(struct bnxt_re_dev *rdev) 2686 { 2687 struct bnxt_qplib_cc_param *cc_param = &rdev->cc_param; 2688 int rc; 2689 2690 if (_is_chip_p7(rdev->chip_ctx)) { 2691 cc_param->enable = 0x0; 2692 cc_param->mask = CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP; 2693 } else { 2694 cc_param->enable = 0x1; 2695 cc_param->mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE | 2696 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | 2697 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); 2698 2699 if (!is_qport_service_type_supported(rdev)) 2700 cc_param->mask |= 2701 (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP | 2702 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP | 2703 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP); 2704 } 2705 2706 cc_param->cur_mask = cc_param->mask; 2707 2708 rc = bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param); 2709 if (rc) { 2710 dev_err(rdev_to_dev(rdev), "Failed to modify cc\n"); 2711 return rc; 2712 } 2713 /* Reset the programming mask */ 2714 cc_param->mask = 0; 2715 if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) { 2716 cc_param->qp1_tos_dscp = cc_param->tos_dscp; 2717 rc = bnxt_re_update_qp1_tos_dscp(rdev); 2718 if (rc) { 2719 dev_err(rdev_to_dev(rdev), "%s:Failed to modify QP1:%d", 2720 __func__, rc); 2721 goto clear; 2722 } 2723 } 2724 return 0; 2725 2726 clear: 2727 bnxt_re_clear_cc(rdev); 2728 return rc; 2729 } 2730 2731 int bnxt_re_query_hwrm_dscp2pri(struct bnxt_re_dev *rdev, 2732 struct bnxt_re_dscp2pri *d2p, u16 *count, 2733 u16 target_id) 2734 { 2735 struct bnxt_en_dev *en_dev = rdev->en_dev; 2736 struct hwrm_queue_dscp2pri_qcfg_input req; 2737 struct hwrm_queue_dscp2pri_qcfg_output resp; 2738 struct bnxt_re_dscp2pri *dscp2pri; 2739 struct bnxt_fw_msg fw_msg; 2740 u16 in_count = *count; 2741 dma_addr_t dma_handle; 2742 int rc = 0, i; 2743 u16 data_len; 2744 u8 *kmem; 2745 2746 data_len = *count * sizeof(*dscp2pri); 2747 memset(&fw_msg, 0, sizeof(fw_msg)); 2748 memset(&req, 0, sizeof(req)); 2749 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 2750 HWRM_QUEUE_DSCP2PRI_QCFG, -1, target_id); 2751 req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1; 2752 2753 kmem = dma_zalloc_coherent(&en_dev->pdev->dev, data_len, &dma_handle, 2754 GFP_KERNEL); 2755 if (!kmem) { 2756 dev_err(rdev_to_dev(rdev), 2757 "dma_zalloc_coherent failure, length = %u\n", 2758 (unsigned)data_len); 2759 return -ENOMEM; 2760 } 2761 req.dest_data_addr = cpu_to_le64(dma_handle); 2762 req.dest_data_buffer_size = cpu_to_le16(data_len); 2763 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 2764 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 2765 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 2766 if (rc) 2767 goto out; 2768 2769 /* Upload the DSCP-MASK-PRI tuple(s) */ 2770 dscp2pri = (struct bnxt_re_dscp2pri *)kmem; 2771 for (i = 0; i < le16_to_cpu(resp.entry_cnt) && i < in_count; i++) { 2772 d2p[i].dscp = dscp2pri->dscp; 2773 d2p[i].mask = dscp2pri->mask; 2774 d2p[i].pri = dscp2pri->pri; 2775 dscp2pri++; 2776 } 2777 *count = le16_to_cpu(resp.entry_cnt); 2778 out: 2779 dma_free_coherent(&en_dev->pdev->dev, data_len, kmem, dma_handle); 2780 return rc; 2781 } 2782 2783 int bnxt_re_prio_vlan_tx_update(struct bnxt_re_dev *rdev) 2784 { 2785 /* Remove the VLAN from the GID entry */ 2786 if (rdev->cc_param.disable_prio_vlan_tx) 2787 rdev->qplib_res.prio = false; 2788 else 2789 rdev->qplib_res.prio = true; 2790 2791 return bnxt_re_update_gid(rdev); 2792 } 2793 2794 int bnxt_re_set_hwrm_dscp2pri(struct bnxt_re_dev *rdev, 2795 struct bnxt_re_dscp2pri *d2p, u16 count, 2796 u16 target_id) 2797 { 2798 struct bnxt_en_dev *en_dev = rdev->en_dev; 2799 struct hwrm_queue_dscp2pri_cfg_input req; 2800 struct hwrm_queue_dscp2pri_cfg_output resp; 2801 struct bnxt_fw_msg fw_msg; 2802 struct bnxt_re_dscp2pri *dscp2pri; 2803 int i, rc, data_len = 3 * 256; 2804 dma_addr_t dma_handle; 2805 u8 *kmem; 2806 2807 memset(&req, 0, sizeof(req)); 2808 memset(&fw_msg, 0, sizeof(fw_msg)); 2809 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 2810 HWRM_QUEUE_DSCP2PRI_CFG, -1, target_id); 2811 req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1; 2812 2813 kmem = dma_alloc_coherent(&en_dev->pdev->dev, data_len, &dma_handle, 2814 GFP_KERNEL); 2815 if (!kmem) { 2816 dev_err(rdev_to_dev(rdev), 2817 "dma_alloc_coherent failure, length = %u\n", 2818 (unsigned)data_len); 2819 return -ENOMEM; 2820 } 2821 req.src_data_addr = cpu_to_le64(dma_handle); 2822 2823 /* Download the DSCP-MASK-PRI tuple(s) */ 2824 dscp2pri = (struct bnxt_re_dscp2pri *)kmem; 2825 for (i = 0; i < count; i++) { 2826 dscp2pri->dscp = d2p[i].dscp; 2827 dscp2pri->mask = d2p[i].mask; 2828 dscp2pri->pri = d2p[i].pri; 2829 dscp2pri++; 2830 } 2831 2832 req.entry_cnt = cpu_to_le16(count); 2833 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 2834 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 2835 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 2836 dma_free_coherent(&en_dev->pdev->dev, data_len, kmem, dma_handle); 2837 return rc; 2838 } 2839 2840 int bnxt_re_query_hwrm_qportcfg(struct bnxt_re_dev *rdev, 2841 struct bnxt_re_tc_rec *tc_rec, u16 tid) 2842 { 2843 u8 max_tc, tc, *qptr, *type_ptr0, *type_ptr1; 2844 struct hwrm_queue_qportcfg_output resp = {0}; 2845 struct hwrm_queue_qportcfg_input req = {0}; 2846 struct bnxt_en_dev *en_dev = rdev->en_dev; 2847 struct bnxt_fw_msg fw_msg; 2848 bool def_init = false; 2849 u8 *tmp_type; 2850 u8 cos_id; 2851 int rc; 2852 2853 memset(&fw_msg, 0, sizeof(fw_msg)); 2854 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_QUEUE_QPORTCFG, 2855 -1, tid); 2856 req.port_id = (tid == 0xFFFF) ? en_dev->pf_port_id : 1; 2857 if (BNXT_EN_ASYM_Q(en_dev)) 2858 req.flags = htole32(HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX); 2859 2860 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 2861 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 2862 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 2863 if (rc) 2864 return rc; 2865 2866 if (!resp.max_configurable_queues) 2867 return -EINVAL; 2868 2869 max_tc = resp.max_configurable_queues; 2870 tc_rec->max_tc = max_tc; 2871 2872 if (resp.queue_cfg_info & HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_CFG_INFO_USE_PROFILE_TYPE) 2873 tc_rec->serv_type_enabled = true; 2874 2875 qptr = &resp.queue_id0; 2876 type_ptr0 = &resp.queue_id0_service_profile_type; 2877 type_ptr1 = &resp.queue_id1_service_profile_type; 2878 for (tc = 0; tc < max_tc; tc++) { 2879 tmp_type = tc ? type_ptr1 + (tc - 1) : type_ptr0; 2880 2881 cos_id = *qptr++; 2882 /* RoCE CoS queue is the first cos queue. 2883 * For MP12 and MP17 order is 405 and 141015. 2884 */ 2885 if (is_bnxt_roce_queue(rdev, *qptr, *tmp_type)) { 2886 tc_rec->cos_id_roce = cos_id; 2887 tc_rec->tc_roce = tc; 2888 } else if (is_bnxt_cnp_queue(rdev, *qptr, *tmp_type)) { 2889 tc_rec->cos_id_cnp = cos_id; 2890 tc_rec->tc_cnp = tc; 2891 } else if (!def_init) { 2892 def_init = true; 2893 tc_rec->tc_def = tc; 2894 tc_rec->cos_id_def = cos_id; 2895 } 2896 qptr++; 2897 } 2898 2899 return rc; 2900 } 2901 2902 int bnxt_re_hwrm_cos2bw_qcfg(struct bnxt_re_dev *rdev, u16 target_id, 2903 struct bnxt_re_cos2bw_cfg *cfg) 2904 { 2905 struct bnxt_en_dev *en_dev = rdev->en_dev; 2906 struct hwrm_queue_cos2bw_qcfg_output resp; 2907 struct hwrm_queue_cos2bw_qcfg_input req = {0}; 2908 struct bnxt_fw_msg fw_msg; 2909 int rc, indx; 2910 void *data; 2911 2912 memset(&fw_msg, 0, sizeof(fw_msg)); 2913 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 2914 HWRM_QUEUE_COS2BW_QCFG, -1, target_id); 2915 req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1; 2916 2917 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 2918 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 2919 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 2920 if (rc) 2921 return rc; 2922 data = &resp.queue_id0 + offsetof(struct bnxt_re_cos2bw_cfg, 2923 queue_id); 2924 for (indx = 0; indx < 8; indx++, data += (sizeof(cfg->cfg))) { 2925 memcpy(&cfg->cfg, data, sizeof(cfg->cfg)); 2926 if (indx == 0) 2927 cfg->queue_id = resp.queue_id0; 2928 cfg++; 2929 } 2930 2931 return rc; 2932 } 2933 2934 int bnxt_re_hwrm_cos2bw_cfg(struct bnxt_re_dev *rdev, u16 target_id, 2935 struct bnxt_re_cos2bw_cfg *cfg) 2936 { 2937 struct bnxt_en_dev *en_dev = rdev->en_dev; 2938 struct hwrm_queue_cos2bw_cfg_input req = {0}; 2939 struct hwrm_queue_cos2bw_cfg_output resp = {0}; 2940 struct bnxt_fw_msg fw_msg; 2941 void *data; 2942 int indx; 2943 int rc; 2944 2945 memset(&fw_msg, 0, sizeof(fw_msg)); 2946 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 2947 HWRM_QUEUE_COS2BW_CFG, -1, target_id); 2948 req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1; 2949 2950 /* Chimp wants enable bit to retain previous 2951 * config done by L2 driver 2952 */ 2953 for (indx = 0; indx < 8; indx++) { 2954 if (cfg[indx].queue_id < 40) { 2955 req.enables |= cpu_to_le32( 2956 HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID0_VALID << 2957 indx); 2958 } 2959 2960 data = (char *)&req.unused_0 + indx * (sizeof(*cfg) - 4); 2961 memcpy(data, &cfg[indx].queue_id, sizeof(*cfg) - 4); 2962 if (indx == 0) { 2963 req.queue_id0 = cfg[0].queue_id; 2964 req.unused_0 = 0; 2965 } 2966 } 2967 2968 memset(&resp, 0, sizeof(resp)); 2969 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 2970 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 2971 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 2972 return rc; 2973 } 2974 2975 int bnxt_re_host_pf_id_query(struct bnxt_re_dev *rdev, 2976 struct bnxt_qplib_query_fn_info *fn_info, 2977 u32 *pf_mask, u32 *first_pf) 2978 { 2979 struct hwrm_func_host_pf_ids_query_output resp = {0}; 2980 struct hwrm_func_host_pf_ids_query_input req; 2981 struct bnxt_en_dev *en_dev = rdev->en_dev; 2982 struct bnxt_fw_msg fw_msg; 2983 int rc; 2984 2985 memset(&fw_msg, 0, sizeof(fw_msg)); 2986 memset(&req, 0, sizeof(req)); 2987 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, 2988 HWRM_FUNC_HOST_PF_IDS_QUERY, -1, -1); 2989 /* To query the info from the host EPs */ 2990 switch (fn_info->host) { 2991 case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_SOC: 2992 case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_0: 2993 case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_1: 2994 case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_2: 2995 case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_3: 2996 req.host = fn_info->host; 2997 break; 2998 default: 2999 req.host = HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_0; 3000 break; 3001 } 3002 3003 req.filter = fn_info->filter; 3004 if (req.filter > HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_FILTER_ROCE) 3005 req.filter = HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_FILTER_ALL; 3006 3007 bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 3008 sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 3009 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 3010 3011 3012 *first_pf = le16_to_cpu(resp.first_pf_id); 3013 *pf_mask = le16_to_cpu(resp.pf_ordinal_mask); 3014 3015 return rc; 3016 } 3017 3018 static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev) 3019 { 3020 struct bnxt_qplib_ctx *hctx; 3021 struct bnxt_qplib_res *res; 3022 u16 tid = 0xffff; 3023 3024 res = &rdev->qplib_res; 3025 hctx = res->hctx; 3026 3027 if (test_and_clear_bit(BNXT_RE_FLAG_STATS_CTX_ALLOC, &rdev->flags)) { 3028 bnxt_re_net_stats_ctx_free(rdev, hctx->stats.fw_id, tid); 3029 bnxt_qplib_free_stat_mem(res, &hctx->stats); 3030 } 3031 } 3032 3033 static void bnxt_re_put_stats2_ctx(struct bnxt_re_dev *rdev) 3034 { 3035 test_and_clear_bit(BNXT_RE_FLAG_STATS_CTX2_ALLOC, &rdev->flags); 3036 } 3037 3038 static int bnxt_re_get_stats_ctx(struct bnxt_re_dev *rdev) 3039 { 3040 struct bnxt_qplib_ctx *hctx; 3041 struct bnxt_qplib_res *res; 3042 u16 tid = 0xffff; 3043 int rc; 3044 3045 res = &rdev->qplib_res; 3046 hctx = res->hctx; 3047 3048 rc = bnxt_qplib_alloc_stat_mem(res->pdev, rdev->chip_ctx, &hctx->stats); 3049 if (rc) 3050 return -ENOMEM; 3051 rc = bnxt_re_net_stats_ctx_alloc(rdev, tid); 3052 if (rc) 3053 goto free_stat_mem; 3054 set_bit(BNXT_RE_FLAG_STATS_CTX_ALLOC, &rdev->flags); 3055 3056 return 0; 3057 3058 free_stat_mem: 3059 bnxt_qplib_free_stat_mem(res, &hctx->stats); 3060 3061 return rc; 3062 } 3063 3064 static int bnxt_re_update_dev_attr(struct bnxt_re_dev *rdev) 3065 { 3066 int rc; 3067 3068 rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); 3069 if (rc) 3070 return rc; 3071 if (!bnxt_re_check_min_attr(rdev)) 3072 return -EINVAL; 3073 return 0; 3074 } 3075 3076 static void bnxt_re_free_tbls(struct bnxt_re_dev *rdev) 3077 { 3078 bnxt_qplib_clear_tbls(&rdev->qplib_res); 3079 bnxt_qplib_free_tbls(&rdev->qplib_res); 3080 } 3081 3082 static int bnxt_re_alloc_init_tbls(struct bnxt_re_dev *rdev) 3083 { 3084 struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; 3085 u8 pppp_factor = 0; 3086 int rc; 3087 3088 /* 3089 * TODO: Need a better mechanism for spreading of the 3090 * 512 extended PPP pages. For now, spreading it 3091 * based on port_count 3092 */ 3093 if (_is_chip_p7(chip_ctx) && chip_ctx->modes.db_push) 3094 pppp_factor = rdev->en_dev->port_count; 3095 rc = bnxt_qplib_alloc_tbls(&rdev->qplib_res, pppp_factor); 3096 if (rc) 3097 return rc; 3098 bnxt_qplib_init_tbls(&rdev->qplib_res); 3099 set_bit(BNXT_RE_FLAG_TBLS_ALLOCINIT, &rdev->flags); 3100 3101 return 0; 3102 } 3103 3104 static void bnxt_re_clean_nqs(struct bnxt_re_dev *rdev) 3105 { 3106 struct bnxt_qplib_nq *nq; 3107 int i; 3108 3109 if (!rdev->nqr.max_init) 3110 return; 3111 3112 for (i = (rdev->nqr.max_init - 1) ; i >= 0; i--) { 3113 nq = &rdev->nqr.nq[i]; 3114 bnxt_qplib_disable_nq(nq); 3115 bnxt_re_net_ring_free(rdev, nq->ring_id); 3116 bnxt_qplib_free_nq_mem(nq); 3117 } 3118 rdev->nqr.max_init = 0; 3119 } 3120 3121 static int bnxt_re_setup_nqs(struct bnxt_re_dev *rdev) 3122 { 3123 struct bnxt_re_ring_attr rattr = {}; 3124 struct bnxt_qplib_nq *nq; 3125 int rc, i; 3126 int depth; 3127 u32 offt; 3128 u16 vec; 3129 3130 mutex_init(&rdev->nqr.load_lock); 3131 /* 3132 * TODO: Optimize the depth based on the 3133 * number of NQs. 3134 */ 3135 depth = BNXT_QPLIB_NQE_MAX_CNT; 3136 for (i = 0; i < rdev->nqr.num_msix - 1; i++) { 3137 nq = &rdev->nqr.nq[i]; 3138 vec = rdev->nqr.msix_entries[i + 1].vector; 3139 offt = rdev->nqr.msix_entries[i + 1].db_offset; 3140 nq->hwq.max_elements = depth; 3141 rc = bnxt_qplib_alloc_nq_mem(&rdev->qplib_res, nq); 3142 if (rc) { 3143 dev_err(rdev_to_dev(rdev), 3144 "Failed to get mem for NQ %d, rc = 0x%x", 3145 i, rc); 3146 goto fail_mem; 3147 } 3148 3149 rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr; 3150 rattr.pages = nq->hwq.pbl[rdev->nqr.nq[i].hwq.level].pg_count; 3151 rattr.type = bnxt_re_get_rtype(rdev); 3152 rattr.mode = HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX; 3153 rattr.depth = nq->hwq.max_elements - 1; 3154 rattr.lrid = rdev->nqr.msix_entries[i + 1].ring_idx; 3155 3156 /* Set DBR pacing capability on the first NQ ring only */ 3157 if (!i && bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) 3158 rattr.flags = HWRM_RING_ALLOC_INPUT_FLAGS_NQ_DBR_PACING; 3159 else 3160 rattr.flags = 0; 3161 3162 rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id); 3163 if (rc) { 3164 nq->ring_id = 0xffff; /* Invalid ring-id */ 3165 dev_err(rdev_to_dev(rdev), 3166 "Failed to get fw id for NQ %d, rc = 0x%x", 3167 i, rc); 3168 goto fail_ring; 3169 } 3170 3171 rc = bnxt_qplib_enable_nq(nq, i, vec, offt, 3172 &bnxt_re_cqn_handler, 3173 &bnxt_re_srqn_handler); 3174 if (rc) { 3175 dev_err(rdev_to_dev(rdev), 3176 "Failed to enable NQ %d, rc = 0x%x", i, rc); 3177 goto fail_en; 3178 } 3179 } 3180 3181 rdev->nqr.max_init = i; 3182 return 0; 3183 fail_en: 3184 /* *nq was i'th nq */ 3185 bnxt_re_net_ring_free(rdev, nq->ring_id); 3186 fail_ring: 3187 bnxt_qplib_free_nq_mem(nq); 3188 fail_mem: 3189 rdev->nqr.max_init = i; 3190 return rc; 3191 } 3192 3193 static void bnxt_re_sysfs_destroy_file(struct bnxt_re_dev *rdev) 3194 { 3195 int i; 3196 3197 for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) 3198 device_remove_file(&rdev->ibdev.dev, bnxt_re_attributes[i]); 3199 } 3200 3201 static int bnxt_re_sysfs_create_file(struct bnxt_re_dev *rdev) 3202 { 3203 int i, j, rc = 0; 3204 3205 for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) { 3206 rc = device_create_file(&rdev->ibdev.dev, 3207 bnxt_re_attributes[i]); 3208 if (rc) { 3209 dev_err(rdev_to_dev(rdev), 3210 "Failed to create IB sysfs with rc = 0x%x", rc); 3211 /* Must clean up all created device files */ 3212 for (j = 0; j < i; j++) 3213 device_remove_file(&rdev->ibdev.dev, 3214 bnxt_re_attributes[j]); 3215 clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); 3216 ib_unregister_device(&rdev->ibdev); 3217 return 1; 3218 } 3219 } 3220 return 0; 3221 } 3222 3223 /* worker thread for polling periodic events. Now used for QoS programming*/ 3224 static void bnxt_re_worker(struct work_struct *work) 3225 { 3226 struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, 3227 worker.work); 3228 int rc; 3229 3230 /* QoS is in 30s cadence for PFs*/ 3231 if (!rdev->is_virtfn && !rdev->worker_30s--) 3232 rdev->worker_30s = 30; 3233 /* Use trylock for bnxt_re_dev_lock as this can be 3234 * held for long time by debugfs show path while issuing 3235 * HWRMS. If the debugfs name update is not done in this 3236 * iteration, the driver will check for the same in the 3237 * next schedule of the worker i.e after 1 sec. 3238 */ 3239 if (mutex_trylock(&bnxt_re_dev_lock)) 3240 mutex_unlock(&bnxt_re_dev_lock); 3241 3242 if (!rdev->stats.stats_query_sec) 3243 goto resched; 3244 3245 if (test_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags) && 3246 (rdev->is_virtfn || 3247 !_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags))) { 3248 if (!(rdev->stats.stats_query_counter++ % 3249 rdev->stats.stats_query_sec)) { 3250 rc = bnxt_re_get_qos_stats(rdev); 3251 if (rc && rc != -ENOMEM) 3252 clear_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, 3253 &rdev->flags); 3254 } 3255 } 3256 3257 resched: 3258 schedule_delayed_work(&rdev->worker, msecs_to_jiffies(1000)); 3259 } 3260 3261 static int bnxt_re_alloc_dbr_sw_stats_mem(struct bnxt_re_dev *rdev) 3262 { 3263 if (!(rdev->dbr_drop_recov || rdev->dbr_pacing)) 3264 return 0; 3265 3266 rdev->dbr_sw_stats = kzalloc(sizeof(*rdev->dbr_sw_stats), GFP_KERNEL); 3267 if (!rdev->dbr_sw_stats) 3268 return -ENOMEM; 3269 3270 return 0; 3271 } 3272 3273 static void bnxt_re_free_dbr_sw_stats_mem(struct bnxt_re_dev *rdev) 3274 { 3275 kfree(rdev->dbr_sw_stats); 3276 rdev->dbr_sw_stats = NULL; 3277 } 3278 3279 static int bnxt_re_initialize_dbr_drop_recov(struct bnxt_re_dev *rdev) 3280 { 3281 rdev->dbr_drop_recov_wq = 3282 create_singlethread_workqueue("bnxt_re_dbr_drop_recov"); 3283 if (!rdev->dbr_drop_recov_wq) { 3284 dev_err(rdev_to_dev(rdev), "DBR Drop Revov wq alloc failed!"); 3285 return -EINVAL; 3286 } 3287 rdev->dbr_drop_recov = true; 3288 3289 /* Enable configfs setting dbr_drop_recov by default*/ 3290 rdev->user_dbr_drop_recov = true; 3291 3292 rdev->user_dbr_drop_recov_timeout = BNXT_RE_DBR_RECOV_USERLAND_TIMEOUT; 3293 return 0; 3294 } 3295 3296 static void bnxt_re_deinitialize_dbr_drop_recov(struct bnxt_re_dev *rdev) 3297 { 3298 if (rdev->dbr_drop_recov_wq) { 3299 flush_workqueue(rdev->dbr_drop_recov_wq); 3300 destroy_workqueue(rdev->dbr_drop_recov_wq); 3301 rdev->dbr_drop_recov_wq = NULL; 3302 } 3303 rdev->dbr_drop_recov = false; 3304 } 3305 3306 static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) 3307 { 3308 int rc; 3309 3310 /* Allocate a page for app use */ 3311 rdev->dbr_page = (void *)__get_free_page(GFP_KERNEL); 3312 if (!rdev->dbr_page) { 3313 dev_err(rdev_to_dev(rdev), "DBR page allocation failed!"); 3314 return -ENOMEM; 3315 } 3316 memset((u8 *)rdev->dbr_page, 0, PAGE_SIZE); 3317 rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->dbr_page; 3318 rc = bnxt_re_hwrm_dbr_pacing_qcfg(rdev); 3319 if (rc) { 3320 dev_err(rdev_to_dev(rdev), 3321 "Failed to query dbr pacing config %d\n", rc); 3322 goto fail; 3323 } 3324 /* Create a work queue for scheduling dbq event */ 3325 rdev->dbq_wq = create_singlethread_workqueue("bnxt_re_dbq"); 3326 if (!rdev->dbq_wq) { 3327 dev_err(rdev_to_dev(rdev), "DBQ wq alloc failed!"); 3328 rc = -ENOMEM; 3329 goto fail; 3330 } 3331 /* MAP grc window 2 for reading db fifo depth */ 3332 writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4, 0, 3333 rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK); 3334 rdev->dbr_db_fifo_reg_off = 3335 (rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) + 3336 0x2000; 3337 rdev->qplib_res.pacing_data->grc_reg_offset = rdev->dbr_db_fifo_reg_off; 3338 3339 rdev->dbr_bar_addr = 3340 pci_resource_start(rdev->qplib_res.pdev, 0) + 3341 rdev->dbr_db_fifo_reg_off; 3342 3343 /* Percentage of DB FIFO */ 3344 rdev->dbq_watermark = BNXT_RE_PACING_DBQ_THRESHOLD; 3345 rdev->pacing_en_int_th = BNXT_RE_PACING_EN_INT_THRESHOLD; 3346 rdev->pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD; 3347 rdev->dbq_pacing_time = BNXT_RE_DBR_INT_TIME; 3348 rdev->dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION; 3349 rdev->do_pacing_save = rdev->dbr_def_do_pacing; 3350 bnxt_re_set_default_pacing_data(rdev); 3351 dev_dbg(rdev_to_dev(rdev), "Initialized db pacing\n"); 3352 3353 return 0; 3354 fail: 3355 free_page((u64)rdev->dbr_page); 3356 rdev->dbr_page = NULL; 3357 return rc; 3358 } 3359 3360 static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev) 3361 { 3362 if (rdev->dbq_wq) 3363 flush_workqueue(rdev->dbq_wq); 3364 3365 cancel_work_sync(&rdev->dbq_fifo_check_work); 3366 cancel_delayed_work_sync(&rdev->dbq_pacing_work); 3367 3368 if (rdev->dbq_wq) { 3369 destroy_workqueue(rdev->dbq_wq); 3370 rdev->dbq_wq = NULL; 3371 } 3372 3373 if (rdev->dbr_page) 3374 free_page((u64)rdev->dbr_page); 3375 rdev->dbr_page = NULL; 3376 rdev->dbr_pacing = false; 3377 } 3378 3379 /* enable_dbr_pacing needs to be done only for older FWs 3380 * where host selects primary function. ie. pacing_ext 3381 * flags is not set. 3382 */ 3383 int bnxt_re_enable_dbr_pacing(struct bnxt_re_dev *rdev) 3384 { 3385 struct bnxt_qplib_nq *nq; 3386 3387 nq = &rdev->nqr.nq[0]; 3388 rdev->dbq_nq_id = nq->ring_id; 3389 3390 if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) && 3391 bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) { 3392 if (bnxt_re_hwrm_dbr_pacing_cfg(rdev, true)) { 3393 dev_err(rdev_to_dev(rdev), 3394 "Failed to set dbr pacing config\n"); 3395 return -EIO; 3396 } 3397 /* MAP grc window 8 for ARMing the NQ DBQ */ 3398 writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 28 , 0, 3399 rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_BASE_MASK); 3400 rdev->dbr_aeq_arm_reg_off = 3401 (rdev->chip_ctx->dbr_aeq_arm_reg & 3402 BNXT_GRC_OFFSET_MASK) + 0x8000; 3403 writel_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off , 0, 1); 3404 } 3405 3406 return 0; 3407 } 3408 3409 /* disable_dbr_pacing needs to be done only for older FWs 3410 * where host selects primary function. ie. pacing_ext 3411 * flags is not set. 3412 */ 3413 3414 int bnxt_re_disable_dbr_pacing(struct bnxt_re_dev *rdev) 3415 { 3416 int rc = 0; 3417 3418 if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) && 3419 bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) 3420 rc = bnxt_re_hwrm_dbr_pacing_cfg(rdev, false); 3421 3422 return rc; 3423 } 3424 3425 static void bnxt_re_ib_uninit(struct bnxt_re_dev *rdev) 3426 { 3427 if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { 3428 bnxt_re_sysfs_destroy_file(rdev); 3429 /* Cleanup ib dev */ 3430 ib_unregister_device(&rdev->ibdev); 3431 clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); 3432 return; 3433 } 3434 } 3435 3436 static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) 3437 { 3438 struct bnxt_qplib_dpi *kdpi; 3439 int rc, wait_count = BNXT_RE_RES_FREE_WAIT_COUNT; 3440 3441 bnxt_re_net_unregister_async_event(rdev); 3442 3443 bnxt_re_put_stats2_ctx(rdev); 3444 if (test_and_clear_bit(BNXT_RE_FLAG_DEV_LIST_INITIALIZED, 3445 &rdev->flags)) { 3446 /* did the caller hold the lock? */ 3447 mutex_lock(&bnxt_re_dev_lock); 3448 list_del_rcu(&rdev->list); 3449 mutex_unlock(&bnxt_re_dev_lock); 3450 } 3451 3452 bnxt_re_uninit_resolve_wq(rdev); 3453 bnxt_re_uninit_dcb_wq(rdev); 3454 bnxt_re_uninit_aer_wq(rdev); 3455 3456 bnxt_re_deinitialize_dbr_drop_recov(rdev); 3457 3458 if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) 3459 (void)bnxt_re_disable_dbr_pacing(rdev); 3460 3461 if (test_and_clear_bit(BNXT_RE_FLAG_WORKER_REG, &rdev->flags)) { 3462 cancel_delayed_work_sync(&rdev->worker); 3463 } 3464 3465 /* Wait for ULPs to release references */ 3466 while (atomic_read(&rdev->stats.rsors.cq_count) && --wait_count) 3467 usleep_range(500, 1000); 3468 if (!wait_count) 3469 dev_err(rdev_to_dev(rdev), 3470 "CQ resources not freed by stack, count = 0x%x", 3471 atomic_read(&rdev->stats.rsors.cq_count)); 3472 3473 kdpi = &rdev->dpi_privileged; 3474 if (kdpi->umdbr) { /* kernel DPI was allocated with success */ 3475 (void)bnxt_qplib_dealloc_dpi(&rdev->qplib_res, kdpi); 3476 /* 3477 * Driver just need to know no command had failed 3478 * during driver load sequence and below command is 3479 * required indeed. Piggybacking dpi allocation status. 3480 */ 3481 } 3482 3483 /* Protect the device uninitialization and start_irq/stop_irq L2 3484 * callbacks with rtnl lock to avoid race condition between these calls 3485 */ 3486 rtnl_lock(); 3487 if (test_and_clear_bit(BNXT_RE_FLAG_SETUP_NQ, &rdev->flags)) 3488 bnxt_re_clean_nqs(rdev); 3489 rtnl_unlock(); 3490 3491 if (test_and_clear_bit(BNXT_RE_FLAG_TBLS_ALLOCINIT, &rdev->flags)) 3492 bnxt_re_free_tbls(rdev); 3493 if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_INIT, &rdev->flags)) { 3494 rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); 3495 if (rc) 3496 dev_warn(rdev_to_dev(rdev), 3497 "Failed to deinitialize fw, rc = 0x%x", rc); 3498 } 3499 3500 bnxt_re_put_stats_ctx(rdev); 3501 3502 if (test_and_clear_bit(BNXT_RE_FLAG_ALLOC_CTX, &rdev->flags)) 3503 bnxt_qplib_free_hwctx(&rdev->qplib_res); 3504 3505 rtnl_lock(); 3506 if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) 3507 bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); 3508 3509 if (rdev->dbr_pacing) 3510 bnxt_re_deinitialize_dbr_pacing(rdev); 3511 3512 bnxt_re_free_dbr_sw_stats_mem(rdev); 3513 3514 if (test_and_clear_bit(BNXT_RE_FLAG_NET_RING_ALLOC, &rdev->flags)) 3515 bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id); 3516 3517 if (test_and_clear_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags)) 3518 bnxt_qplib_free_rcfw_channel(&rdev->qplib_res); 3519 3520 if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) 3521 bnxt_re_free_msix(rdev); 3522 rtnl_unlock(); 3523 3524 bnxt_re_destroy_chip_ctx(rdev); 3525 3526 if (op_type != BNXT_RE_PRE_RECOVERY_REMOVE) { 3527 if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, 3528 &rdev->flags)) 3529 bnxt_re_unregister_netdev(rdev); 3530 } 3531 } 3532 3533 static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type, u8 wqe_mode) 3534 { 3535 struct bnxt_re_ring_attr rattr = {}; 3536 struct bnxt_qplib_creq_ctx *creq; 3537 int vec, offset; 3538 int rc = 0; 3539 3540 if (op_type != BNXT_RE_POST_RECOVERY_INIT) { 3541 /* Registered a new RoCE device instance to netdev */ 3542 rc = bnxt_re_register_netdev(rdev); 3543 if (rc) 3544 return -EINVAL; 3545 } 3546 set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 3547 3548 rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); 3549 if (rc) { 3550 dev_err(rdev_to_dev(rdev), "Failed to get chip context rc 0x%x", rc); 3551 bnxt_re_unregister_netdev(rdev); 3552 clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 3553 rc = -EINVAL; 3554 return rc; 3555 } 3556 3557 /* Protect the device initialization and start_irq/stop_irq L2 callbacks 3558 * with rtnl lock to avoid race condition between these calls 3559 */ 3560 rtnl_lock(); 3561 rc = bnxt_re_request_msix(rdev); 3562 if (rc) { 3563 dev_err(rdev_to_dev(rdev), 3564 "Requesting MSI-X vectors failed with rc = 0x%x", rc); 3565 rc = -EINVAL; 3566 goto release_rtnl; 3567 } 3568 set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags); 3569 3570 /* Establish RCFW Communication Channel to initialize the context 3571 memory for the function and all child VFs */ 3572 rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res); 3573 if (rc) { 3574 dev_err(rdev_to_dev(rdev), 3575 "Failed to alloc mem for rcfw, rc = %#x\n", rc); 3576 goto release_rtnl; 3577 } 3578 set_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags); 3579 3580 creq = &rdev->rcfw.creq; 3581 rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr; 3582 rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count; 3583 rattr.type = bnxt_re_get_rtype(rdev); 3584 rattr.mode = HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX; 3585 rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1; 3586 rattr.lrid = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].ring_idx; 3587 rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); 3588 if (rc) { 3589 creq->ring_id = 0xffff; 3590 dev_err(rdev_to_dev(rdev), 3591 "Failed to allocate CREQ fw id with rc = 0x%x", rc); 3592 goto release_rtnl; 3593 } 3594 3595 if (!rdev->chip_ctx) 3596 goto release_rtnl; 3597 /* Program the NQ ID for DBQ notification */ 3598 if (rdev->chip_ctx->modes.dbr_pacing_v0 || 3599 bnxt_qplib_dbr_pacing_en(rdev->chip_ctx) || 3600 bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) { 3601 rc = bnxt_re_initialize_dbr_pacing(rdev); 3602 if (!rc) 3603 rdev->dbr_pacing = true; 3604 else 3605 rdev->dbr_pacing = false; 3606 dev_dbg(rdev_to_dev(rdev), "%s: initialize db pacing ret %d\n", 3607 __func__, rc); 3608 } 3609 3610 vec = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].vector; 3611 offset = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].db_offset; 3612 rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vec, offset, 3613 &bnxt_re_aeq_handler); 3614 if (rc) { 3615 dev_err(rdev_to_dev(rdev), 3616 "Failed to enable RCFW channel with rc = 0x%x", rc); 3617 goto release_rtnl; 3618 } 3619 set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags); 3620 3621 rc = bnxt_re_update_dev_attr(rdev); 3622 if (rc) 3623 goto release_rtnl; 3624 bnxt_re_set_resource_limits(rdev); 3625 if (!rdev->is_virtfn && !_is_chip_gen_p5_p7(rdev->chip_ctx)) { 3626 rc = bnxt_qplib_alloc_hwctx(&rdev->qplib_res); 3627 if (rc) { 3628 dev_err(rdev_to_dev(rdev), 3629 "Failed to alloc hw contexts, rc = 0x%x", rc); 3630 goto release_rtnl; 3631 } 3632 set_bit(BNXT_RE_FLAG_ALLOC_CTX, &rdev->flags); 3633 } 3634 3635 rc = bnxt_re_get_stats_ctx(rdev); 3636 if (rc) 3637 goto release_rtnl; 3638 3639 rc = bnxt_qplib_init_rcfw(&rdev->rcfw, rdev->is_virtfn); 3640 if (rc) { 3641 dev_err(rdev_to_dev(rdev), 3642 "Failed to initialize fw with rc = 0x%x", rc); 3643 goto release_rtnl; 3644 } 3645 set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_INIT, &rdev->flags); 3646 3647 /* Based resource count on the 'new' device caps */ 3648 rc = bnxt_re_update_dev_attr(rdev); 3649 if (rc) 3650 goto release_rtnl; 3651 rc = bnxt_re_alloc_init_tbls(rdev); 3652 if (rc) { 3653 dev_err(rdev_to_dev(rdev), "tbls alloc-init failed rc = %#x", 3654 rc); 3655 goto release_rtnl; 3656 } 3657 rc = bnxt_re_setup_nqs(rdev); 3658 if (rc) { 3659 dev_err(rdev_to_dev(rdev), "NQs alloc-init failed rc = %#x\n", 3660 rc); 3661 if (rdev->nqr.max_init == 0) 3662 goto release_rtnl; 3663 3664 dev_warn(rdev_to_dev(rdev), 3665 "expected nqs %d available nqs %d\n", 3666 rdev->nqr.num_msix, rdev->nqr.max_init); 3667 } 3668 set_bit(BNXT_RE_FLAG_SETUP_NQ, &rdev->flags); 3669 rtnl_unlock(); 3670 3671 rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res, &rdev->dpi_privileged, 3672 rdev, BNXT_QPLIB_DPI_TYPE_KERNEL); 3673 if (rc) 3674 goto fail; 3675 3676 if (rdev->dbr_pacing) 3677 bnxt_re_enable_dbr_pacing(rdev); 3678 3679 if (rdev->chip_ctx->modes.dbr_drop_recov) 3680 bnxt_re_initialize_dbr_drop_recov(rdev); 3681 3682 rc = bnxt_re_alloc_dbr_sw_stats_mem(rdev); 3683 if (rc) 3684 goto fail; 3685 3686 /* This block of code is needed for error recovery support */ 3687 if (!rdev->is_virtfn) { 3688 struct bnxt_re_tc_rec *tc_rec; 3689 3690 tc_rec = &rdev->tc_rec[0]; 3691 rc = bnxt_re_query_hwrm_qportcfg(rdev, tc_rec, 0xFFFF); 3692 if (rc) { 3693 dev_err(rdev_to_dev(rdev), 3694 "Failed to query port config rc:%d", rc); 3695 return rc; 3696 } 3697 3698 /* Query f/w defaults of CC params */ 3699 rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param); 3700 if (rc) 3701 dev_warn(rdev_to_dev(rdev), 3702 "Failed to query CC defaults\n"); 3703 if (1) { 3704 rdev->num_vfs = pci_num_vf(rdev->en_dev->pdev); 3705 if (rdev->num_vfs) { 3706 bnxt_re_set_resource_limits(rdev); 3707 bnxt_qplib_set_func_resources(&rdev->qplib_res); 3708 } 3709 } 3710 } 3711 INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); 3712 set_bit(BNXT_RE_FLAG_WORKER_REG, &rdev->flags); 3713 schedule_delayed_work(&rdev->worker, msecs_to_jiffies(1000)); 3714 3715 bnxt_re_init_dcb_wq(rdev); 3716 bnxt_re_init_aer_wq(rdev); 3717 bnxt_re_init_resolve_wq(rdev); 3718 mutex_lock(&bnxt_re_dev_lock); 3719 list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list); 3720 /* Added to the list, not in progress anymore */ 3721 gadd_dev_inprogress--; 3722 set_bit(BNXT_RE_FLAG_DEV_LIST_INITIALIZED, &rdev->flags); 3723 mutex_unlock(&bnxt_re_dev_lock); 3724 3725 3726 return rc; 3727 release_rtnl: 3728 rtnl_unlock(); 3729 fail: 3730 bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); 3731 3732 return rc; 3733 } 3734 3735 static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) 3736 { 3737 int rc = 0; 3738 3739 rc = bnxt_re_register_ib(rdev); 3740 if (rc) { 3741 dev_err(rdev_to_dev(rdev), 3742 "Register IB failed with rc = 0x%x", rc); 3743 goto fail; 3744 } 3745 if (bnxt_re_sysfs_create_file(rdev)) { 3746 bnxt_re_stopqps_and_ib_uninit(rdev); 3747 goto fail; 3748 } 3749 3750 set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); 3751 set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); 3752 set_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags); 3753 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); 3754 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE); 3755 3756 return rc; 3757 fail: 3758 bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); 3759 return rc; 3760 } 3761 3762 /* wrapper for ib_init funcs */ 3763 int _bnxt_re_ib_init(struct bnxt_re_dev *rdev) 3764 { 3765 return bnxt_re_ib_init(rdev); 3766 } 3767 3768 /* wrapper for aux init funcs */ 3769 int _bnxt_re_ib_init2(struct bnxt_re_dev *rdev) 3770 { 3771 bnxt_re_ib_init_2(rdev); 3772 return 0; /* add return for future proof */ 3773 } 3774 3775 static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev) 3776 { 3777 bnxt_re_dev_dealloc(rdev); 3778 } 3779 3780 3781 static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct ifnet *netdev, 3782 struct bnxt_en_dev *en_dev) 3783 { 3784 struct ifnet *realdev = NULL; 3785 3786 realdev = netdev; 3787 if (realdev) 3788 dev_dbg(NULL, "%s: realdev = %p netdev = %p\n", __func__, 3789 realdev, netdev); 3790 /* 3791 * Note: 3792 * The first argument to bnxt_re_dev_alloc() is 'netdev' and 3793 * not 'realdev', since in the case of bonding we want to 3794 * register the bonded virtual netdev (master) to the ib stack. 3795 * And 'en_dev' (for L2/PCI communication) is the first slave 3796 * device (PF0 on the card). 3797 * In the case of a regular netdev, both netdev and the en_dev 3798 * correspond to the same device. 3799 */ 3800 *rdev = bnxt_re_dev_alloc(netdev, en_dev); 3801 if (!*rdev) { 3802 pr_err("%s: netdev %p not handled", 3803 ROCE_DRV_MODULE_NAME, netdev); 3804 return -ENOMEM; 3805 } 3806 bnxt_re_hold(*rdev); 3807 3808 return 0; 3809 } 3810 3811 void bnxt_re_get_link_speed(struct bnxt_re_dev *rdev) 3812 { 3813 rdev->espeed = rdev->en_dev->espeed; 3814 return; 3815 } 3816 3817 void bnxt_re_stopqps_and_ib_uninit(struct bnxt_re_dev *rdev) 3818 { 3819 dev_dbg(rdev_to_dev(rdev), "%s: Stopping QPs, IB uninit on rdev: %p\n", 3820 __func__, rdev); 3821 bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev); 3822 bnxt_re_ib_uninit(rdev); 3823 } 3824 3825 void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, 3826 struct auxiliary_device *aux_dev) 3827 { 3828 struct bnxt_re_en_dev_info *en_info; 3829 struct bnxt_qplib_cmdq_ctx *cmdq; 3830 struct bnxt_qplib_rcfw *rcfw; 3831 3832 rcfw = &rdev->rcfw; 3833 cmdq = &rcfw->cmdq; 3834 if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags)) 3835 set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); 3836 3837 dev_dbg(rdev_to_dev(rdev), "%s: Removing rdev: %p\n", __func__, rdev); 3838 bnxt_re_dev_uninit(rdev, op_type); 3839 en_info = auxiliary_get_drvdata(aux_dev); 3840 if (en_info) { 3841 rtnl_lock(); 3842 en_info->rdev = NULL; 3843 rtnl_unlock(); 3844 if (op_type != BNXT_RE_PRE_RECOVERY_REMOVE) { 3845 clear_bit(BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV, &en_info->flags); 3846 clear_bit(BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV, &en_info->flags); 3847 clear_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags); 3848 } 3849 } 3850 bnxt_re_dev_unreg(rdev); 3851 } 3852 3853 int bnxt_re_add_device(struct bnxt_re_dev **rdev, 3854 struct ifnet *netdev, 3855 u8 qp_mode, u8 op_type, u8 wqe_mode, 3856 u32 num_msix_requested, 3857 struct auxiliary_device *aux_dev) 3858 { 3859 struct bnxt_re_en_dev_info *en_info; 3860 struct bnxt_en_dev *en_dev; 3861 int rc = 0; 3862 3863 en_info = auxiliary_get_drvdata(aux_dev); 3864 en_dev = en_info->en_dev; 3865 3866 mutex_lock(&bnxt_re_dev_lock); 3867 /* Check if driver already in mod exit and aux_dev is valid */ 3868 if (gmod_exit || !aux_dev) { 3869 mutex_unlock(&bnxt_re_dev_lock); 3870 return -ENODEV; 3871 } 3872 /* Add device in progress */ 3873 gadd_dev_inprogress++; 3874 mutex_unlock(&bnxt_re_dev_lock); 3875 3876 rc = bnxt_re_dev_reg(rdev, netdev, en_dev); 3877 if (rc) { 3878 dev_dbg(NULL, "Failed to create add device for netdev %p\n", 3879 netdev); 3880 /* 3881 * For BNXT_RE_POST_RECOVERY_INIT special case 3882 * called from bnxt_re_start, the work is 3883 * complete only after, bnxt_re_start completes 3884 * bnxt_unregister_device in case of failure. 3885 * So bnxt_re_start will decrement gadd_dev_inprogress 3886 * in case of failure. 3887 */ 3888 if (op_type != BNXT_RE_POST_RECOVERY_INIT) { 3889 mutex_lock(&bnxt_re_dev_lock); 3890 gadd_dev_inprogress--; 3891 mutex_unlock(&bnxt_re_dev_lock); 3892 } 3893 return rc; 3894 } 3895 3896 if (rc != 0) 3897 goto ref_error; 3898 3899 /* 3900 * num_msix_requested = BNXT_RE_MSIX_FROM_MOD_PARAM indicates fresh driver load. 3901 * Otherwaise, this invocation can be the result of lag create / destroy, 3902 * err revovery, hot fw upgrade, etc.. 3903 */ 3904 if (num_msix_requested == BNXT_RE_MSIX_FROM_MOD_PARAM) { 3905 if (bnxt_re_probe_count < BNXT_RE_MAX_DEVICES) 3906 num_msix_requested = max_msix_vec[bnxt_re_probe_count++]; 3907 else 3908 /* Consider as default when probe_count exceeds its limit */ 3909 num_msix_requested = 0; 3910 3911 /* if user specifies only one value, use the same for all PFs */ 3912 if (max_msix_vec_argc == 1) 3913 num_msix_requested = max_msix_vec[0]; 3914 } 3915 3916 (*rdev)->num_msix_requested = num_msix_requested; 3917 (*rdev)->gsi_ctx.gsi_qp_mode = qp_mode; 3918 (*rdev)->adev = aux_dev; 3919 (*rdev)->dev_addr = en_dev->softc->func.mac_addr; 3920 /* Before updating the rdev pointer in bnxt_re_en_dev_info structure, 3921 * take the rtnl lock to avoid accessing invalid rdev pointer from 3922 * L2 ULP callbacks. This is applicable in all the places where rdev 3923 * pointer is updated in bnxt_re_en_dev_info. 3924 */ 3925 rtnl_lock(); 3926 en_info->rdev = *rdev; 3927 rtnl_unlock(); 3928 rc = bnxt_re_dev_init(*rdev, op_type, wqe_mode); 3929 if (rc) { 3930 ref_error: 3931 bnxt_re_dev_unreg(*rdev); 3932 *rdev = NULL; 3933 /* 3934 * For BNXT_RE_POST_RECOVERY_INIT special case 3935 * called from bnxt_re_start, the work is 3936 * complete only after, bnxt_re_start completes 3937 * bnxt_unregister_device in case of failure. 3938 * So bnxt_re_start will decrement gadd_dev_inprogress 3939 * in case of failure. 3940 */ 3941 if (op_type != BNXT_RE_POST_RECOVERY_INIT) { 3942 mutex_lock(&bnxt_re_dev_lock); 3943 gadd_dev_inprogress--; 3944 mutex_unlock(&bnxt_re_dev_lock); 3945 } 3946 } 3947 dev_dbg(rdev_to_dev(*rdev), "%s: Adding rdev: %p\n", __func__, *rdev); 3948 if (!rc) { 3949 set_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags); 3950 } 3951 return rc; 3952 } 3953 3954 struct bnxt_re_dev *bnxt_re_get_peer_pf(struct bnxt_re_dev *rdev) 3955 { 3956 struct pci_dev *pdev_in = rdev->en_dev->pdev; 3957 int tmp_bus_num, bus_num = pdev_in->bus->number; 3958 int tmp_dev_num, dev_num = PCI_SLOT(pdev_in->devfn); 3959 int tmp_func_num, func_num = PCI_FUNC(pdev_in->devfn); 3960 struct bnxt_re_dev *tmp_rdev; 3961 3962 rcu_read_lock(); 3963 list_for_each_entry_rcu(tmp_rdev, &bnxt_re_dev_list, list) { 3964 tmp_bus_num = tmp_rdev->en_dev->pdev->bus->number; 3965 tmp_dev_num = PCI_SLOT(tmp_rdev->en_dev->pdev->devfn); 3966 tmp_func_num = PCI_FUNC(tmp_rdev->en_dev->pdev->devfn); 3967 3968 if (bus_num == tmp_bus_num && dev_num == tmp_dev_num && 3969 func_num != tmp_func_num) { 3970 rcu_read_unlock(); 3971 return tmp_rdev; 3972 } 3973 } 3974 rcu_read_unlock(); 3975 return NULL; 3976 } 3977 3978 3979 int bnxt_re_schedule_work(struct bnxt_re_dev *rdev, unsigned long event, 3980 struct ifnet *vlan_dev, 3981 struct ifnet *netdev, 3982 struct auxiliary_device *adev) 3983 { 3984 struct bnxt_re_work *re_work; 3985 3986 /* Allocate for the deferred task */ 3987 re_work = kzalloc(sizeof(*re_work), GFP_KERNEL); 3988 if (!re_work) 3989 return -ENOMEM; 3990 3991 re_work->rdev = rdev; 3992 re_work->event = event; 3993 re_work->vlan_dev = vlan_dev; 3994 re_work->adev = adev; 3995 INIT_WORK(&re_work->work, bnxt_re_task); 3996 if (rdev) 3997 atomic_inc(&rdev->sched_count); 3998 re_work->netdev = netdev; 3999 queue_work(bnxt_re_wq, &re_work->work); 4000 4001 return 0; 4002 } 4003 4004 4005 int bnxt_re_get_slot_pf_count(struct bnxt_re_dev *rdev) 4006 { 4007 struct pci_dev *pdev_in = rdev->en_dev->pdev; 4008 int tmp_bus_num, bus_num = pdev_in->bus->number; 4009 int tmp_dev_num, dev_num = PCI_SLOT(pdev_in->devfn); 4010 struct bnxt_re_dev *tmp_rdev; 4011 int pf_cnt = 0; 4012 4013 rcu_read_lock(); 4014 list_for_each_entry_rcu(tmp_rdev, &bnxt_re_dev_list, list) { 4015 tmp_bus_num = tmp_rdev->en_dev->pdev->bus->number; 4016 tmp_dev_num = PCI_SLOT(tmp_rdev->en_dev->pdev->devfn); 4017 4018 if (bus_num == tmp_bus_num && dev_num == tmp_dev_num) 4019 pf_cnt++; 4020 } 4021 rcu_read_unlock(); 4022 return pf_cnt; 4023 } 4024 4025 /* Handle all deferred netevents tasks */ 4026 static void bnxt_re_task(struct work_struct *work) 4027 { 4028 struct bnxt_re_en_dev_info *en_info; 4029 struct auxiliary_device *aux_dev; 4030 struct bnxt_re_work *re_work; 4031 struct bnxt_re_dev *rdev; 4032 4033 re_work = container_of(work, struct bnxt_re_work, work); 4034 4035 mutex_lock(&bnxt_re_mutex); 4036 rdev = re_work->rdev; 4037 4038 /* 4039 * If the previous rdev is deleted due to bond creation 4040 * do not handle the event 4041 */ 4042 if (!bnxt_re_is_rdev_valid(rdev)) 4043 goto exit; 4044 4045 /* Ignore the event, if the device is not registred with IB stack. This 4046 * is to avoid handling any event while the device is added/removed. 4047 */ 4048 if (rdev && !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { 4049 dev_dbg(rdev_to_dev(rdev), "%s: Ignoring netdev event 0x%lx", 4050 __func__, re_work->event); 4051 goto done; 4052 } 4053 4054 /* Extra check to silence coverity. We shouldn't handle any event 4055 * when rdev is NULL. 4056 */ 4057 if (!rdev) 4058 goto exit; 4059 4060 dev_dbg(rdev_to_dev(rdev), "Scheduled work for event 0x%lx", 4061 re_work->event); 4062 4063 switch (re_work->event) { 4064 case NETDEV_UP: 4065 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, 4066 IB_EVENT_PORT_ACTIVE); 4067 bnxt_re_net_register_async_event(rdev); 4068 break; 4069 4070 case NETDEV_DOWN: 4071 bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 0); 4072 bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev); 4073 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, 4074 IB_EVENT_PORT_ERR); 4075 break; 4076 4077 case NETDEV_CHANGE: 4078 if (bnxt_re_get_link_state(rdev) == IB_PORT_DOWN) { 4079 bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev); 4080 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, 4081 IB_EVENT_PORT_ERR); 4082 break; 4083 } else if (bnxt_re_get_link_state(rdev) == IB_PORT_ACTIVE) { 4084 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, 4085 IB_EVENT_PORT_ACTIVE); 4086 } 4087 4088 /* temporarily disable the check for SR2 */ 4089 if (!bnxt_qplib_query_cc_param(&rdev->qplib_res, 4090 &rdev->cc_param) && 4091 !_is_chip_p7(rdev->chip_ctx)) { 4092 /* 4093 * Disable CC for 10G speed 4094 * for non p5 devices 4095 */ 4096 if (rdev->sl_espeed == SPEED_10000 && 4097 !_is_chip_gen_p5_p7(rdev->chip_ctx)) { 4098 if (rdev->cc_param.enable) 4099 bnxt_re_clear_cc(rdev); 4100 } else { 4101 if (!rdev->cc_param.enable && 4102 rdev->cc_param.admin_enable) 4103 bnxt_re_setup_cc(rdev); 4104 } 4105 } 4106 break; 4107 4108 case NETDEV_UNREGISTER: 4109 bnxt_re_stopqps_and_ib_uninit(rdev); 4110 aux_dev = rdev->adev; 4111 if (re_work->adev) 4112 goto done; 4113 4114 bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, aux_dev); 4115 4116 break; 4117 4118 default: 4119 break; 4120 } 4121 done: 4122 if (rdev) { 4123 /* memory barrier to guarantee task completion 4124 * before decrementing sched count 4125 */ 4126 mmiowb(); 4127 atomic_dec(&rdev->sched_count); 4128 } 4129 exit: 4130 if (re_work->adev && re_work->event == NETDEV_UNREGISTER) { 4131 en_info = auxiliary_get_drvdata(re_work->adev); 4132 en_info->ib_uninit_done = true; 4133 wake_up(&en_info->waitq); 4134 } 4135 kfree(re_work); 4136 mutex_unlock(&bnxt_re_mutex); 4137 } 4138 4139 /* 4140 "Notifier chain callback can be invoked for the same chain from 4141 different CPUs at the same time". 4142 4143 For cases when the netdev is already present, our call to the 4144 register_netdevice_notifier() will actually get the rtnl_lock() 4145 before sending NETDEV_REGISTER and (if up) NETDEV_UP 4146 events. 4147 4148 But for cases when the netdev is not already present, the notifier 4149 chain is subjected to be invoked from different CPUs simultaneously. 4150 4151 This is protected by the netdev_mutex. 4152 */ 4153 static int bnxt_re_netdev_event(struct notifier_block *notifier, 4154 unsigned long event, void *ptr) 4155 { 4156 struct ifnet *real_dev, *netdev; 4157 struct bnxt_re_dev *rdev = NULL; 4158 4159 netdev = netdev_notifier_info_to_ifp(ptr); 4160 real_dev = rdma_vlan_dev_real_dev(netdev); 4161 if (!real_dev) 4162 real_dev = netdev; 4163 /* In case of bonding,this will be bond's rdev */ 4164 rdev = bnxt_re_from_netdev(real_dev); 4165 4166 if (!rdev) 4167 goto exit; 4168 4169 dev_info(rdev_to_dev(rdev), "%s: Event = %s (0x%lx), rdev %s (real_dev %s)\n", 4170 __func__, bnxt_re_netevent(event), event, 4171 rdev ? rdev->netdev ? if_getdname(rdev->netdev) : "->netdev = NULL" : "= NULL", 4172 (real_dev == netdev) ? "= netdev" : if_getdname(real_dev)); 4173 4174 if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) 4175 goto exit; 4176 4177 bnxt_re_hold(rdev); 4178 4179 if (real_dev != netdev) { 4180 switch (event) { 4181 case NETDEV_UP: 4182 bnxt_re_schedule_work(rdev, event, netdev, 4183 NULL, NULL); 4184 break; 4185 case NETDEV_DOWN: 4186 break; 4187 default: 4188 break; 4189 } 4190 goto done; 4191 } 4192 4193 switch (event) { 4194 case NETDEV_CHANGEADDR: 4195 if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) 4196 bnxt_re_update_shadow_ah(rdev); 4197 bnxt_qplib_get_guid(rdev->dev_addr, 4198 (u8 *)&rdev->ibdev.node_guid); 4199 break; 4200 4201 case NETDEV_CHANGE: 4202 bnxt_re_get_link_speed(rdev); 4203 bnxt_re_schedule_work(rdev, event, NULL, NULL, NULL); 4204 break; 4205 case NETDEV_UNREGISTER: 4206 /* netdev notifier will call NETDEV_UNREGISTER again later since 4207 * we are still holding the reference to the netdev 4208 */ 4209 4210 /* 4211 * Workaround to avoid ib_unregister hang. Check for module 4212 * reference and dont free up the device if the reference 4213 * is non zero. Checking only for PF functions. 4214 */ 4215 4216 if (rdev) { 4217 dev_info(rdev_to_dev(rdev), 4218 "bnxt_re:Unreg recvd when module refcnt > 0"); 4219 dev_info(rdev_to_dev(rdev), 4220 "bnxt_re:Close all apps using bnxt_re devs"); 4221 dev_info(rdev_to_dev(rdev), 4222 "bnxt_re:Remove the configfs entry created for the device"); 4223 dev_info(rdev_to_dev(rdev), 4224 "bnxt_re:Refer documentation for details"); 4225 goto done; 4226 } 4227 4228 if (atomic_read(&rdev->sched_count) > 0) 4229 goto done; 4230 if (!rdev->unreg_sched) { 4231 bnxt_re_schedule_work(rdev, NETDEV_UNREGISTER, 4232 NULL, NULL, NULL); 4233 rdev->unreg_sched = true; 4234 goto done; 4235 } 4236 4237 break; 4238 default: 4239 break; 4240 } 4241 done: 4242 if (rdev) 4243 bnxt_re_put(rdev); 4244 exit: 4245 return NOTIFY_DONE; 4246 } 4247 4248 static struct notifier_block bnxt_re_netdev_notifier = { 4249 .notifier_call = bnxt_re_netdev_event 4250 }; 4251 4252 static void bnxt_re_remove_base_interface(struct bnxt_re_dev *rdev, 4253 struct auxiliary_device *adev) 4254 { 4255 bnxt_re_stopqps_and_ib_uninit(rdev); 4256 bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev); 4257 auxiliary_set_drvdata(adev, NULL); 4258 } 4259 4260 /* 4261 * bnxt_re_remove - Removes the roce aux device 4262 * @adev - aux device pointer 4263 * 4264 * This function removes the roce device. This gets 4265 * called in the mod exit path and pci unbind path. 4266 * If the rdev is bond interace, destroys the lag 4267 * in module exit path, and in pci unbind case 4268 * destroys the lag and recreates other base interface. 4269 * If the device is already removed in error recovery 4270 * path, it just unregister with the L2. 4271 */ 4272 static void bnxt_re_remove(struct auxiliary_device *adev) 4273 { 4274 struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); 4275 struct bnxt_en_dev *en_dev; 4276 struct bnxt_re_dev *rdev; 4277 bool primary_dev = false; 4278 bool secondary_dev = false; 4279 4280 if (!en_info) 4281 return; 4282 4283 mutex_lock(&bnxt_re_mutex); 4284 en_dev = en_info->en_dev; 4285 4286 rdev = en_info->rdev; 4287 4288 if (rdev && bnxt_re_is_rdev_valid(rdev)) { 4289 if (pci_channel_offline(rdev->rcfw.pdev)) 4290 set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); 4291 4292 if (test_bit(BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV, &en_info->flags)) 4293 primary_dev = true; 4294 if (test_bit(BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV, &en_info->flags)) 4295 secondary_dev = true; 4296 4297 /* 4298 * en_dev_info of primary device and secondary device have the 4299 * same rdev pointer when LAG is configured. This rdev pointer 4300 * is rdev of bond interface. 4301 */ 4302 if (!primary_dev && !secondary_dev) { 4303 /* removal of non bond interface */ 4304 bnxt_re_remove_base_interface(rdev, adev); 4305 } else { 4306 /* 4307 * removal of bond primary/secondary interface. In this 4308 * case bond device is already removed, so rdev->binfo 4309 * is NULL. 4310 */ 4311 auxiliary_set_drvdata(adev, NULL); 4312 } 4313 } else { 4314 /* device is removed from ulp stop, unregister the net dev */ 4315 if (test_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags)) { 4316 rtnl_lock(); 4317 en_dev->en_ops->bnxt_unregister_device(en_dev, 4318 BNXT_ROCE_ULP); 4319 rtnl_unlock(); 4320 } 4321 } 4322 mutex_unlock(&bnxt_re_mutex); 4323 return; 4324 } 4325 4326 /* wrapper for all external user context callers */ 4327 void _bnxt_re_remove(struct auxiliary_device *adev) 4328 { 4329 bnxt_re_remove(adev); 4330 } 4331 4332 static void bnxt_re_ib_init_2(struct bnxt_re_dev *rdev) 4333 { 4334 int rc; 4335 4336 rc = bnxt_re_get_device_stats(rdev); 4337 if (rc) 4338 dev_err(rdev_to_dev(rdev), 4339 "Failed initial device stat query"); 4340 4341 bnxt_re_net_register_async_event(rdev); 4342 } 4343 4344 static int bnxt_re_probe(struct auxiliary_device *adev, 4345 const struct auxiliary_device_id *id) 4346 { 4347 struct bnxt_aux_dev *aux_dev = 4348 container_of(adev, struct bnxt_aux_dev, aux_dev); 4349 struct bnxt_re_en_dev_info *en_info; 4350 struct bnxt_en_dev *en_dev = NULL; 4351 struct bnxt_re_dev *rdev; 4352 int rc = -ENODEV; 4353 4354 if (aux_dev) 4355 en_dev = aux_dev->edev; 4356 4357 if (!en_dev) 4358 return rc; 4359 4360 if (en_dev->ulp_version != BNXT_ULP_VERSION) { 4361 pr_err("%s: probe error: bnxt_en ulp version magic %x is not compatible!\n", 4362 ROCE_DRV_MODULE_NAME, en_dev->ulp_version); 4363 return -EINVAL; 4364 } 4365 4366 en_info = kzalloc(sizeof(*en_info), GFP_KERNEL); 4367 if (!en_info) 4368 return -ENOMEM; 4369 memset(en_info, 0, sizeof(struct bnxt_re_en_dev_info)); 4370 en_info->en_dev = en_dev; 4371 auxiliary_set_drvdata(adev, en_info); 4372 4373 mutex_lock(&bnxt_re_mutex); 4374 rc = bnxt_re_add_device(&rdev, en_dev->net, 4375 BNXT_RE_GSI_MODE_ALL, 4376 BNXT_RE_COMPLETE_INIT, 4377 BNXT_QPLIB_WQE_MODE_STATIC, 4378 BNXT_RE_MSIX_FROM_MOD_PARAM, adev); 4379 if (rc) { 4380 mutex_unlock(&bnxt_re_mutex); 4381 return rc; 4382 } 4383 4384 rc = bnxt_re_ib_init(rdev); 4385 if (rc) 4386 goto err; 4387 4388 bnxt_re_ib_init_2(rdev); 4389 4390 dev_dbg(rdev_to_dev(rdev), "%s: adev: %p\n", __func__, adev); 4391 rdev->adev = adev; 4392 4393 mutex_unlock(&bnxt_re_mutex); 4394 4395 return 0; 4396 4397 err: 4398 mutex_unlock(&bnxt_re_mutex); 4399 bnxt_re_remove(adev); 4400 4401 return rc; 4402 } 4403 4404 static const struct auxiliary_device_id bnxt_re_id_table[] = { 4405 { .name = BNXT_ADEV_NAME ".rdma", }, 4406 {}, 4407 }; 4408 4409 MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table); 4410 4411 static struct auxiliary_driver bnxt_re_driver = { 4412 .name = "rdma", 4413 .probe = bnxt_re_probe, 4414 .remove = bnxt_re_remove, 4415 .id_table = bnxt_re_id_table, 4416 }; 4417 4418 static int __init bnxt_re_mod_init(void) 4419 { 4420 int rc = 0; 4421 4422 pr_info("%s: %s", ROCE_DRV_MODULE_NAME, drv_version); 4423 4424 bnxt_re_wq = create_singlethread_workqueue("bnxt_re"); 4425 if (!bnxt_re_wq) 4426 return -ENOMEM; 4427 4428 rc = bnxt_re_register_netdevice_notifier(&bnxt_re_netdev_notifier); 4429 if (rc) { 4430 pr_err("%s: Cannot register to netdevice_notifier", 4431 ROCE_DRV_MODULE_NAME); 4432 goto err_netdev; 4433 } 4434 4435 INIT_LIST_HEAD(&bnxt_re_dev_list); 4436 4437 rc = auxiliary_driver_register(&bnxt_re_driver); 4438 if (rc) { 4439 pr_err("%s: Failed to register auxiliary driver\n", 4440 ROCE_DRV_MODULE_NAME); 4441 goto err_auxdrv; 4442 } 4443 4444 return 0; 4445 4446 err_auxdrv: 4447 bnxt_re_unregister_netdevice_notifier(&bnxt_re_netdev_notifier); 4448 4449 err_netdev: 4450 destroy_workqueue(bnxt_re_wq); 4451 4452 return rc; 4453 } 4454 4455 static void __exit bnxt_re_mod_exit(void) 4456 { 4457 gmod_exit = 1; 4458 auxiliary_driver_unregister(&bnxt_re_driver); 4459 4460 bnxt_re_unregister_netdevice_notifier(&bnxt_re_netdev_notifier); 4461 4462 if (bnxt_re_wq) 4463 destroy_workqueue(bnxt_re_wq); 4464 } 4465 4466 module_init(bnxt_re_mod_init); 4467 module_exit(bnxt_re_mod_exit); 4468