xref: /freebsd/sys/dev/bnxt/bnxt_re/main.c (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 /*
2  * Copyright (c) 2015-2024, Broadcom. All rights reserved.  The term
3  * Broadcom refers to Broadcom Limited and/or its subsidiaries.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
26  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * Description: Main component of the bnxt_re driver
29  */
30 
31 #include <linux/if_ether.h>
32 #include <linux/module.h>
33 #include <linux/errno.h>
34 #include <linux/pci.h>
35 #include <linux/dma-mapping.h>
36 #include <linux/slab.h>
37 #include <linux/sched.h>
38 #include <linux/delay.h>
39 #include <linux/fs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/ib_addr.h>
42 #include <rdma/ib_cache.h>
43 #include <dev/mlx5/port.h>
44 #include <dev/mlx5/vport.h>
45 #include <linux/list.h>
46 #include <rdma/ib_smi.h>
47 #include <rdma/ib_umem.h>
48 #include <linux/in.h>
49 #include <linux/etherdevice.h>
50 
51 #include "bnxt_re.h"
52 #include "ib_verbs.h"
53 #include "bnxt_re-abi.h"
54 #include "bnxt.h"
55 
56 static char drv_version[] =
57 		"Broadcom NetXtreme-C/E RoCE Driver " ROCE_DRV_MODULE_NAME \
58 		" v" ROCE_DRV_MODULE_VERSION " (" ROCE_DRV_MODULE_RELDATE ")\n";
59 
60 #define BNXT_RE_DESC	"Broadcom NetXtreme RoCE"
61 #define BNXT_ADEV_NAME "if_bnxt"
62 
63 MODULE_DESCRIPTION("Broadcom NetXtreme-C/E RoCE Driver");
64 MODULE_LICENSE("Dual BSD/GPL");
65 MODULE_DEPEND(bnxt_re, linuxkpi, 1, 1, 1);
66 MODULE_DEPEND(bnxt_re, ibcore, 1, 1, 1);
67 MODULE_DEPEND(bnxt_re, if_bnxt, 1, 1, 1);
68 MODULE_VERSION(bnxt_re, 1);
69 
70 
71 DEFINE_MUTEX(bnxt_re_mutex); /* mutex lock for driver */
72 
73 static unsigned int restrict_mrs = 0;
74 module_param(restrict_mrs, uint, 0);
75 MODULE_PARM_DESC(restrict_mrs, " Restrict the no. of MRs 0 = 256K , 1 = 64K");
76 
77 unsigned int restrict_stats = 0;
78 module_param(restrict_stats, uint, 0);
79 MODULE_PARM_DESC(restrict_stats, "Restrict stats query frequency to ethtool coalesce value. Disabled by default");
80 
81 unsigned int enable_fc = 1;
82 module_param(enable_fc, uint, 0);
83 MODULE_PARM_DESC(enable_fc, "Enable default PFC, CC,ETS during driver load. 1 - fc enable, 0 - fc disable - Default is 1");
84 
85 unsigned int min_tx_depth = 1;
86 module_param(min_tx_depth, uint, 0);
87 MODULE_PARM_DESC(min_tx_depth, "Minimum TX depth - Default is 1");
88 
89 static uint8_t max_msix_vec[BNXT_RE_MAX_DEVICES] = {0};
90 static unsigned int max_msix_vec_argc;
91 module_param_array(max_msix_vec, byte, &max_msix_vec_argc, 0444);
92 MODULE_PARM_DESC(max_msix_vec, "Max MSI-x vectors per PF (2 - 64) - Default is 64");
93 
94 unsigned int cmdq_shadow_qd = RCFW_CMD_NON_BLOCKING_SHADOW_QD;
95 module_param_named(cmdq_shadow_qd, cmdq_shadow_qd, uint, 0644);
96 MODULE_PARM_DESC(cmdq_shadow_qd, "Perf Stat Debug: Shadow QD Range (1-64) - Default is 64");
97 
98 
99 /* globals */
100 struct list_head bnxt_re_dev_list = LINUX_LIST_HEAD_INIT(bnxt_re_dev_list);
101 static int bnxt_re_probe_count;
102 
103 DEFINE_MUTEX(bnxt_re_dev_lock);
104 static u32 gmod_exit;
105 static u32 gadd_dev_inprogress;
106 
107 static void bnxt_re_task(struct work_struct *work_task);
108 static struct workqueue_struct *bnxt_re_wq;
109 static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev);
110 static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
111 			     u32 *offset);
112 static int bnxt_re_ib_init(struct bnxt_re_dev *rdev);
113 static void bnxt_re_ib_init_2(struct bnxt_re_dev *rdev);
114 void _bnxt_re_remove(struct auxiliary_device *adev);
115 void writel_fbsd(struct bnxt_softc *bp, u32, u8, u32);
116 u32 readl_fbsd(struct bnxt_softc *bp, u32, u8);
117 static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev);
118 
119 int bnxt_re_register_netdevice_notifier(struct notifier_block *nb)
120 {
121 	int rc;
122 	rc = register_netdevice_notifier(nb);
123 	return rc;
124 }
125 
126 int bnxt_re_unregister_netdevice_notifier(struct notifier_block *nb)
127 {
128 	int rc;
129 	rc = unregister_netdevice_notifier(nb);
130 	return rc;
131 }
132 
133 void bnxt_re_set_dma_device(struct ib_device *ibdev, struct bnxt_re_dev *rdev)
134 {
135 	ibdev->dma_device = &rdev->en_dev->pdev->dev;
136 }
137 
138 void bnxt_re_init_resolve_wq(struct bnxt_re_dev *rdev)
139 {
140 	rdev->resolve_wq = create_singlethread_workqueue("bnxt_re_resolve_wq");
141 	 INIT_LIST_HEAD(&rdev->mac_wq_list);
142 }
143 
144 void bnxt_re_uninit_resolve_wq(struct bnxt_re_dev *rdev)
145 {
146 	struct bnxt_re_resolve_dmac_work *tmp_work = NULL, *tmp_st;
147 	if (!rdev->resolve_wq)
148 		return;
149 	flush_workqueue(rdev->resolve_wq);
150 	list_for_each_entry_safe(tmp_work, tmp_st, &rdev->mac_wq_list, list) {
151 			list_del(&tmp_work->list);
152 			kfree(tmp_work);
153 	}
154 	destroy_workqueue(rdev->resolve_wq);
155 	rdev->resolve_wq = NULL;
156 }
157 
158 u32 readl_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx)
159 {
160 
161 	if (bar_idx)
162 		return bus_space_read_8(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off);
163 	else
164 		return bus_space_read_8(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off);
165 }
166 
167 void writel_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx, u32 val)
168 {
169 	if (bar_idx)
170 		bus_space_write_8(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off, htole32(val));
171 	else
172 		bus_space_write_8(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off, htole32(val));
173 }
174 
175 static void bnxt_re_update_fifo_occup_slabs(struct bnxt_re_dev *rdev,
176 					    u32 fifo_occup)
177 {
178 	if (fifo_occup > rdev->dbg_stats->dbq.fifo_occup_water_mark)
179 		rdev->dbg_stats->dbq.fifo_occup_water_mark = fifo_occup;
180 
181 	if (fifo_occup > 8 * rdev->pacing_algo_th)
182 		rdev->dbg_stats->dbq.fifo_occup_slab_4++;
183 	else if (fifo_occup > 4 * rdev->pacing_algo_th)
184 		rdev->dbg_stats->dbq.fifo_occup_slab_3++;
185 	else if (fifo_occup > 2 * rdev->pacing_algo_th)
186 		rdev->dbg_stats->dbq.fifo_occup_slab_2++;
187 	else if (fifo_occup > rdev->pacing_algo_th)
188 		rdev->dbg_stats->dbq.fifo_occup_slab_1++;
189 }
190 
191 static void bnxt_re_update_do_pacing_slabs(struct bnxt_re_dev *rdev)
192 {
193 	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
194 
195 	if (pacing_data->do_pacing > rdev->dbg_stats->dbq.do_pacing_water_mark)
196 		rdev->dbg_stats->dbq.do_pacing_water_mark = pacing_data->do_pacing;
197 
198 	if (pacing_data->do_pacing > 16 * rdev->dbr_def_do_pacing)
199 		rdev->dbg_stats->dbq.do_pacing_slab_5++;
200 	else if (pacing_data->do_pacing > 8 * rdev->dbr_def_do_pacing)
201 		rdev->dbg_stats->dbq.do_pacing_slab_4++;
202 	else if (pacing_data->do_pacing > 4 * rdev->dbr_def_do_pacing)
203 		rdev->dbg_stats->dbq.do_pacing_slab_3++;
204 	else if (pacing_data->do_pacing > 2 * rdev->dbr_def_do_pacing)
205 		rdev->dbg_stats->dbq.do_pacing_slab_2++;
206 	else if (pacing_data->do_pacing > rdev->dbr_def_do_pacing)
207 		rdev->dbg_stats->dbq.do_pacing_slab_1++;
208 }
209 
210 static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp)
211 {
212 	return qp->ib_qp.qp_type == IB_QPT_GSI;
213 }
214 
215 static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev)
216 {
217 	struct bnxt_re_qp *qp;
218 
219 	mutex_lock(&rdev->qp_lock);
220 	list_for_each_entry(qp, &rdev->qp_list, list) {
221 		if (bnxt_re_is_qp1_qp(qp)) {
222 			mutex_unlock(&rdev->qp_lock);
223 			return qp;
224 		}
225 	}
226 	mutex_unlock(&rdev->qp_lock);
227 	return NULL;
228 }
229 
230 /* Set the maximum number of each resource that the driver actually wants
231  * to allocate. This may be up to the maximum number the firmware has
232  * reserved for the function. The driver may choose to allocate fewer
233  * resources than the firmware maximum.
234  */
235 static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
236 {
237 	struct bnxt_qplib_max_res dev_res = {};
238 	struct bnxt_qplib_chip_ctx *cctx;
239 	struct bnxt_qplib_dev_attr *attr;
240 	struct bnxt_qplib_ctx *hctx;
241 	int i;
242 
243 	attr = rdev->dev_attr;
244 	hctx = rdev->qplib_res.hctx;
245 	cctx = rdev->chip_ctx;
246 
247 	bnxt_qplib_max_res_supported(cctx, &rdev->qplib_res, &dev_res, false);
248 	if (!_is_chip_gen_p5_p7(cctx)) {
249 		hctx->qp_ctx.max = min_t(u32, dev_res.max_qp, attr->max_qp);
250 		hctx->mrw_ctx.max = min_t(u32, dev_res.max_mr, attr->max_mr);
251 		/* To accommodate 16k MRs and 16k AHs,
252 		 * driver has to allocate 32k backing store memory
253 		 */
254 		hctx->mrw_ctx.max *= 2;
255 		hctx->srq_ctx.max = min_t(u32, dev_res.max_srq, attr->max_srq);
256 		hctx->cq_ctx.max = min_t(u32, dev_res.max_cq, attr->max_cq);
257 		for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
258 			hctx->tqm_ctx.qcount[i] = attr->tqm_alloc_reqs[i];
259 	} else {
260 		hctx->qp_ctx.max = attr->max_qp ? attr->max_qp : dev_res.max_qp;
261 		hctx->mrw_ctx.max = attr->max_mr ? attr->max_mr : dev_res.max_mr;
262 		hctx->srq_ctx.max = attr->max_srq ? attr->max_srq : dev_res.max_srq;
263 		hctx->cq_ctx.max = attr->max_cq ? attr->max_cq : dev_res.max_cq;
264 	}
265 }
266 
267 static void bnxt_re_limit_vf_res(struct bnxt_re_dev *rdev,
268 				 struct bnxt_qplib_vf_res *vf_res,
269 				 u32 num_vf)
270 {
271 	struct bnxt_qplib_chip_ctx *cctx = rdev->chip_ctx;
272 	struct bnxt_qplib_max_res dev_res = {};
273 
274 	bnxt_qplib_max_res_supported(cctx, &rdev->qplib_res, &dev_res, true);
275 	vf_res->max_qp = dev_res.max_qp / num_vf;
276 	vf_res->max_srq = dev_res.max_srq / num_vf;
277 	vf_res->max_cq = dev_res.max_cq / num_vf;
278 	/*
279 	 * MR and AH shares the same backing store, the value specified
280 	 * for max_mrw is split into half by the FW for MR and AH
281 	 */
282 	vf_res->max_mrw = dev_res.max_mr * 2 / num_vf;
283 	vf_res->max_gid = BNXT_RE_MAX_GID_PER_VF;
284 }
285 
286 static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
287 {
288 	struct bnxt_qplib_ctx *hctx;
289 
290 	hctx = rdev->qplib_res.hctx;
291 	memset(&hctx->vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
292 	bnxt_re_limit_pf_res(rdev);
293 
294 	if (rdev->num_vfs)
295 		bnxt_re_limit_vf_res(rdev, &hctx->vf_res, rdev->num_vfs);
296 }
297 
298 static void bnxt_re_dettach_irq(struct bnxt_re_dev *rdev)
299 {
300 	struct bnxt_qplib_rcfw *rcfw = NULL;
301 	struct bnxt_qplib_nq *nq;
302 	int indx;
303 
304 	rcfw = &rdev->rcfw;
305 	for (indx = 0; indx < rdev->nqr.max_init; indx++) {
306 		nq = &rdev->nqr.nq[indx];
307 		mutex_lock(&nq->lock);
308 		bnxt_qplib_nq_stop_irq(nq, false);
309 		mutex_unlock(&nq->lock);
310 	}
311 
312 	bnxt_qplib_rcfw_stop_irq(rcfw, false);
313 }
314 
315 static void bnxt_re_detach_err_device(struct bnxt_re_dev *rdev)
316 {
317 	/* Free the MSIx vectors only so that L2 can proceed with MSIx disable */
318 	bnxt_re_dettach_irq(rdev);
319 
320 	/* Set the state as detached to prevent sending any more commands */
321 	set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
322 	set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
323 	wake_up_all(&rdev->rcfw.cmdq.waitq);
324 }
325 
326 #define MAX_DSCP_PRI_TUPLE	64
327 
328 struct bnxt_re_dcb_work {
329 	struct work_struct work;
330 	struct bnxt_re_dev *rdev;
331 	struct hwrm_async_event_cmpl cmpl;
332 };
333 
334 static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev)
335 {
336 	rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq");
337 }
338 
339 static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev)
340 {
341 	if (!rdev->dcb_wq)
342 		return;
343 	flush_workqueue(rdev->dcb_wq);
344 	destroy_workqueue(rdev->dcb_wq);
345 	rdev->dcb_wq = NULL;
346 }
347 
348 static void bnxt_re_init_aer_wq(struct bnxt_re_dev *rdev)
349 {
350 	rdev->aer_wq = create_singlethread_workqueue("bnxt_re_aer_wq");
351 }
352 
353 static void bnxt_re_uninit_aer_wq(struct bnxt_re_dev *rdev)
354 {
355 	if (!rdev->aer_wq)
356 		return;
357 	flush_workqueue(rdev->aer_wq);
358 	destroy_workqueue(rdev->aer_wq);
359 	rdev->aer_wq = NULL;
360 }
361 
362 static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev)
363 {
364 	struct bnxt_re_qp *qp;
365 
366 	if (!_is_chip_gen_p5_p7(rdev->chip_ctx))
367 		return 0;
368 
369 	qp = bnxt_re_get_qp1_qp(rdev);
370 	if (!qp)
371 		return 0;
372 
373 	qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP;
374 	qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp;
375 
376 	return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
377 }
378 
379 static void bnxt_re_reconfigure_dscp(struct bnxt_re_dev *rdev)
380 {
381 	struct bnxt_qplib_cc_param *cc_param;
382 	struct bnxt_re_tc_rec *tc_rec;
383 	bool update_cc = false;
384 	u8 dscp_user;
385 	int rc;
386 
387 	cc_param = &rdev->cc_param;
388 	tc_rec = &rdev->tc_rec[0];
389 
390 	if (!(cc_param->roce_dscp_user || cc_param->cnp_dscp_user))
391 		return;
392 
393 	if (cc_param->cnp_dscp_user) {
394 		dscp_user = (cc_param->cnp_dscp_user & 0x3f);
395 		if ((tc_rec->cnp_dscp_bv & (1ul << dscp_user)) &&
396 		    (cc_param->alt_tos_dscp != dscp_user)) {
397 			cc_param->alt_tos_dscp = dscp_user;
398 			cc_param->mask |= CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP;
399 			update_cc = true;
400 		}
401 	}
402 
403 	if (cc_param->roce_dscp_user) {
404 		dscp_user = (cc_param->roce_dscp_user & 0x3f);
405 		if ((tc_rec->roce_dscp_bv & (1ul << dscp_user)) &&
406 		    (cc_param->tos_dscp != dscp_user)) {
407 			cc_param->tos_dscp = dscp_user;
408 			cc_param->mask |= CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP;
409 			update_cc = true;
410 		}
411 	}
412 
413 	if (update_cc) {
414 		rc = bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param);
415 		if (rc)
416 			dev_err(rdev_to_dev(rdev), "Failed to apply cc settings\n");
417 	}
418 }
419 
420 static void bnxt_re_dcb_wq_task(struct work_struct *work)
421 {
422 	struct bnxt_qplib_cc_param *cc_param;
423 	struct bnxt_re_tc_rec *tc_rec;
424 	struct bnxt_re_dev *rdev;
425 	struct bnxt_re_dcb_work *dcb_work =
426 			container_of(work, struct bnxt_re_dcb_work, work);
427 	int rc;
428 
429 	rdev = dcb_work->rdev;
430 	if (!rdev)
431 		goto exit;
432 
433 	mutex_lock(&rdev->cc_lock);
434 
435 	cc_param = &rdev->cc_param;
436 	rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param);
437 	if (rc) {
438 		dev_err(rdev_to_dev(rdev), "Failed to query ccparam rc:%d", rc);
439 		goto fail;
440 	}
441 	tc_rec = &rdev->tc_rec[0];
442 	/*
443 	 * Upon the receival of DCB Async event:
444 	 *   If roce_dscp or cnp_dscp or both (which user configured using configfs)
445 	 *   is in the list, re-program the value using modify_roce_cc command
446 	 */
447 	bnxt_re_reconfigure_dscp(rdev);
448 
449 	cc_param->roce_pri = tc_rec->roce_prio;
450 	if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) {
451 		cc_param->qp1_tos_dscp = cc_param->tos_dscp;
452 		rc = bnxt_re_update_qp1_tos_dscp(rdev);
453 		if (rc) {
454 			dev_err(rdev_to_dev(rdev), "%s:Failed to modify QP1 rc:%d",
455 				__func__, rc);
456 			goto fail;
457 		}
458 	}
459 
460 fail:
461 	mutex_unlock(&rdev->cc_lock);
462 exit:
463 	kfree(dcb_work);
464 }
465 
466 static int bnxt_re_hwrm_dbr_pacing_broadcast_event(struct bnxt_re_dev *rdev)
467 {
468 	struct hwrm_func_dbr_pacing_broadcast_event_output resp = {0};
469 	struct hwrm_func_dbr_pacing_broadcast_event_input req = {0};
470 	struct bnxt_en_dev *en_dev = rdev->en_dev;
471 	struct bnxt_fw_msg fw_msg;
472 	int rc;
473 
474 	memset(&fw_msg, 0, sizeof(fw_msg));
475 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
476 			      HWRM_FUNC_DBR_PACING_BROADCAST_EVENT, -1, -1);
477 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
478 			    sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
479 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
480 	if (rc) {
481 		dev_dbg(rdev_to_dev(rdev),
482 			"Failed to send dbr pacing broadcast event rc:%d", rc);
483 		return rc;
484 	}
485 	return 0;
486 }
487 
488 static int bnxt_re_hwrm_dbr_pacing_nqlist_query(struct bnxt_re_dev *rdev)
489 {
490 	struct hwrm_func_dbr_pacing_nqlist_query_output resp = {0};
491 	struct hwrm_func_dbr_pacing_nqlist_query_input req = {0};
492 	struct bnxt_dbq_nq_list *nq_list = &rdev->nq_list;
493 	struct bnxt_en_dev *en_dev = rdev->en_dev;
494 	bool primary_found = false;
495 	struct bnxt_fw_msg fw_msg;
496 	struct bnxt_qplib_nq *nq;
497 	int rc, i, j = 1;
498 	u16 *nql_ptr;
499 
500 	nq = &rdev->nqr.nq[0];
501 
502 	memset(&fw_msg, 0, sizeof(fw_msg));
503 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
504 			      HWRM_FUNC_DBR_PACING_NQLIST_QUERY, -1, -1);
505 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
506 			    sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
507 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
508 	if (rc) {
509 		dev_err(rdev_to_dev(rdev), "Failed to send dbr pacing nq list query rc:%d", rc);
510 		return rc;
511 	}
512 	nq_list->num_nql_entries = le32_to_cpu(resp.num_nqs);
513 	nql_ptr = &resp.nq_ring_id0;
514 	/* populate the nq_list of the primary function with list received
515 	 * from FW. Fill the NQ IDs of secondary functions from index 1 to
516 	 * num_nql_entries - 1. Fill the  nq_list->nq_id[0] with the
517 	 * nq_id of the primary pf
518 	 */
519 	for (i = 0; i < nq_list->num_nql_entries; i++) {
520 		u16 nq_id = *nql_ptr;
521 
522 		dev_dbg(rdev_to_dev(rdev),
523 			"nq_list->nq_id[%d] = %d\n", i, nq_id);
524 		if (nq_id != nq->ring_id) {
525 			nq_list->nq_id[j] = nq_id;
526 			j++;
527 		} else {
528 			primary_found = true;
529 			nq_list->nq_id[0] = nq->ring_id;
530 		}
531 		nql_ptr++;
532 	}
533 	if (primary_found)
534 		bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 1);
535 
536 	return 0;
537 }
538 
539 static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev)
540 {
541 	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
542 	u32 read_val, fifo_occup;
543 	bool first_read = true;
544 
545 	/* loop shouldn't run infintely as the occupancy usually goes
546 	 * below pacing algo threshold as soon as pacing kicks in.
547 	 */
548 	while (1) {
549 		read_val = readl_fbsd(rdev->en_dev->softc, rdev->dbr_db_fifo_reg_off, 0);
550 		fifo_occup = pacing_data->fifo_max_depth -
551 			     ((read_val & pacing_data->fifo_room_mask) >>
552 			      pacing_data->fifo_room_shift);
553 		/* Fifo occupancy cannot be greater the MAX FIFO depth */
554 		if (fifo_occup > pacing_data->fifo_max_depth)
555 			break;
556 
557 		if (first_read) {
558 			bnxt_re_update_fifo_occup_slabs(rdev, fifo_occup);
559 			first_read = false;
560 		}
561 		if (fifo_occup < pacing_data->pacing_th)
562 			break;
563 	}
564 }
565 
566 static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev)
567 {
568 	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
569 
570 	pacing_data->do_pacing = rdev->dbr_def_do_pacing;
571 	pacing_data->pacing_th = rdev->pacing_algo_th;
572 	pacing_data->alarm_th =
573 		pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx);
574 }
575 
576 #define CAG_RING_MASK 0x7FF
577 #define CAG_RING_SHIFT 17
578 #define WATERMARK_MASK 0xFFF
579 #define WATERMARK_SHIFT	0
580 
581 static bool bnxt_re_check_if_dbq_intr_triggered(struct bnxt_re_dev *rdev)
582 {
583 	u32 read_val;
584 	int j;
585 
586 	for (j = 0; j < 10; j++) {
587 		read_val = readl_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off, 0);
588 		dev_dbg(rdev_to_dev(rdev), "AEQ ARM status = 0x%x\n",
589 			read_val);
590 		if (!read_val)
591 			return true;
592 	}
593 	return false;
594 }
595 
596 int bnxt_re_set_dbq_throttling_reg(struct bnxt_re_dev *rdev, u16 nq_id, u32 throttle)
597 {
598 	u32 cag_ring_water_mark = 0, read_val;
599 	u32 throttle_val;
600 
601 	/* Convert throttle percentage to value */
602 	throttle_val = (rdev->qplib_res.pacing_data->fifo_max_depth * throttle) / 100;
603 
604 	if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) {
605 		cag_ring_water_mark = (nq_id & CAG_RING_MASK) << CAG_RING_SHIFT |
606 				      (throttle_val & WATERMARK_MASK);
607 		writel_fbsd(rdev->en_dev->softc,  rdev->dbr_throttling_reg_off, 0, cag_ring_water_mark);
608 		read_val = readl_fbsd(rdev->en_dev->softc , rdev->dbr_throttling_reg_off, 0);
609 		dev_dbg(rdev_to_dev(rdev),
610 			"%s: dbr_throttling_reg_off read_val = 0x%x\n",
611 			__func__, read_val);
612 		if (read_val != cag_ring_water_mark) {
613 			dev_dbg(rdev_to_dev(rdev),
614 				"nq_id = %d write_val=0x%x read_val=0x%x\n",
615 				nq_id, cag_ring_water_mark, read_val);
616 			return 1;
617 		}
618 	}
619 	writel_fbsd(rdev->en_dev->softc,  rdev->dbr_aeq_arm_reg_off, 0, 1);
620 	return 0;
621 }
622 
623 static void bnxt_re_set_dbq_throttling_for_non_primary(struct bnxt_re_dev *rdev)
624 {
625 	struct bnxt_dbq_nq_list *nq_list;
626 	struct bnxt_qplib_nq *nq;
627 	int i;
628 
629 	nq_list = &rdev->nq_list;
630 	/* Run a loop for other Active functions if this is primary function */
631 	if (bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) {
632 		dev_dbg(rdev_to_dev(rdev), "%s:  nq_list->num_nql_entries= %d\n",
633 			__func__, nq_list->num_nql_entries);
634 		nq = &rdev->nqr.nq[0];
635 		for (i = nq_list->num_nql_entries - 1; i > 0; i--) {
636 			u16 nq_id = nq_list->nq_id[i];
637 			if (nq)
638 				dev_dbg(rdev_to_dev(rdev),
639 					"%s: nq_id = %d cur_fn_ring_id = %d\n",
640 					__func__, nq_id, nq->ring_id);
641 			if (bnxt_re_set_dbq_throttling_reg
642 					(rdev, nq_id, 0))
643 				break;
644 			bnxt_re_check_if_dbq_intr_triggered(rdev);
645 		}
646 	}
647 }
648 
649 static void bnxt_re_handle_dbr_nq_pacing_notification(struct bnxt_re_dev *rdev)
650 {
651 	struct bnxt_qplib_nq *nq;
652 	int rc = 0;
653 
654 	nq = &rdev->nqr.nq[0];
655 
656 	/* Query the NQ list*/
657 	rc = bnxt_re_hwrm_dbr_pacing_nqlist_query(rdev);
658 	if (rc) {
659 		dev_err(rdev_to_dev(rdev),
660 			"Failed to Query NQ list rc= %d", rc);
661 		return;
662 	}
663 	/*Configure GRC access for Throttling and aeq_arm register */
664 	writel_fbsd(rdev->en_dev->softc,  BNXT_GRCPF_REG_WINDOW_BASE_OUT + 28, 0,
665 		    rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_BASE_MASK);
666 
667 	rdev->dbr_throttling_reg_off =
668 		(rdev->chip_ctx->dbr_throttling_reg &
669 		 BNXT_GRC_OFFSET_MASK) + 0x8000;
670 	rdev->dbr_aeq_arm_reg_off =
671 		(rdev->chip_ctx->dbr_aeq_arm_reg &
672 		 BNXT_GRC_OFFSET_MASK) + 0x8000;
673 
674 	bnxt_re_set_dbq_throttling_reg(rdev, nq->ring_id, rdev->dbq_watermark);
675 }
676 
677 static void bnxt_re_dbq_wq_task(struct work_struct *work)
678 {
679 	struct bnxt_re_dbq_work *dbq_work =
680 			container_of(work, struct bnxt_re_dbq_work, work);
681 	struct bnxt_re_dev *rdev;
682 
683 	rdev = dbq_work->rdev;
684 
685 	if (!rdev)
686 		goto exit;
687 	switch (dbq_work->event) {
688 	case BNXT_RE_DBQ_EVENT_SCHED:
689 		dev_dbg(rdev_to_dev(rdev), "%s: Handle DBQ Pacing event\n",
690 			__func__);
691 		if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx))
692 			bnxt_re_hwrm_dbr_pacing_broadcast_event(rdev);
693 		else
694 			bnxt_re_pacing_alert(rdev);
695 		break;
696 	case BNXT_RE_DBR_PACING_EVENT:
697 		dev_dbg(rdev_to_dev(rdev), "%s: Sched interrupt/pacing worker\n",
698 			__func__);
699 		if (_is_chip_p7(rdev->chip_ctx))
700 			bnxt_re_pacing_alert(rdev);
701 		else if (!rdev->chip_ctx->modes.dbr_pacing_v0)
702 			bnxt_re_hwrm_dbr_pacing_qcfg(rdev);
703 		break;
704 	case BNXT_RE_DBR_NQ_PACING_NOTIFICATION:
705 		bnxt_re_handle_dbr_nq_pacing_notification(rdev);
706 		/* Issue a broadcast event to notify other functions
707 		 * that primary changed
708 		 */
709 		bnxt_re_hwrm_dbr_pacing_broadcast_event(rdev);
710 		break;
711 	}
712 exit:
713 	kfree(dbq_work);
714 }
715 
716 static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl)
717 {
718 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
719 	struct bnxt_re_dcb_work *dcb_work;
720 	struct bnxt_re_dbq_work *dbq_work;
721 	struct bnxt_re_dev *rdev;
722 	u16 event_id;
723 	u32 data1;
724 	u32 data2 = 0;
725 
726 	if (!cmpl) {
727 		pr_err("Async event, bad completion\n");
728 		return;
729 	}
730 
731 	if (!en_info || !en_info->en_dev) {
732 		pr_err("Async event, bad en_info or en_dev\n");
733 		return;
734 	}
735 	rdev = en_info->rdev;
736 
737 	event_id = le16_to_cpu(cmpl->event_id);
738 	data1 = le32_to_cpu(cmpl->event_data1);
739 	data2 = le32_to_cpu(cmpl->event_data2);
740 
741 	if (!rdev || !rdev_to_dev(rdev)) {
742 		dev_dbg(NULL, "Async event, bad rdev or netdev\n");
743 		return;
744 	}
745 
746 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags) ||
747 	    !test_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
748 		dev_dbg(NULL, "Async event, device already detached\n");
749 		return;
750 	}
751 	if (data2 >= 0)
752 		dev_dbg(rdev_to_dev(rdev), "Async event_id = %d data1 = %d data2 = %d",
753 			event_id, data1, data2);
754 
755 	switch (event_id) {
756 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE:
757 		/* Not handling the event in older FWs */
758 		if (!is_qport_service_type_supported(rdev))
759 			break;
760 		if (!rdev->dcb_wq)
761 			break;
762 		dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC);
763 		if (!dcb_work)
764 			break;
765 
766 		dcb_work->rdev = rdev;
767 		memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl));
768 		INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task);
769 		queue_work(rdev->dcb_wq, &dcb_work->work);
770 		break;
771 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
772 		if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) {
773 			/* Set rcfw flag to control commands send to Bono */
774 			set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
775 			/* Set bnxt_re flag to control commands send via L2 driver */
776 			set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
777 			wake_up_all(&rdev->rcfw.cmdq.waitq);
778 		}
779 		break;
780 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD:
781 		if (!rdev->dbr_pacing)
782 			break;
783 		dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC);
784 		if (!dbq_work)
785 			goto unlock;
786 		dbq_work->rdev = rdev;
787 		dbq_work->event = BNXT_RE_DBR_PACING_EVENT;
788 		INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task);
789 		queue_work(rdev->dbq_wq, &dbq_work->work);
790 		rdev->dbr_sw_stats->dbq_int_recv++;
791 		break;
792 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE:
793 		if (!rdev->dbr_pacing)
794 			break;
795 
796 		dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC);
797 		if (!dbq_work)
798 			goto unlock;
799 		dbq_work->rdev = rdev;
800 		dbq_work->event = BNXT_RE_DBR_NQ_PACING_NOTIFICATION;
801 		INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task);
802 		queue_work(rdev->dbq_wq, &dbq_work->work);
803 		break;
804 
805 	default:
806 		break;
807 	}
808 unlock:
809 	return;
810 }
811 
812 static void bnxt_re_db_fifo_check(struct work_struct *work)
813 {
814 	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
815 						dbq_fifo_check_work);
816 	struct bnxt_qplib_db_pacing_data *pacing_data;
817 	u32 pacing_save;
818 
819 	if (!mutex_trylock(&rdev->dbq_lock))
820 		return;
821 	pacing_data = rdev->qplib_res.pacing_data;
822 	pacing_save = rdev->do_pacing_save;
823 	__wait_for_fifo_occupancy_below_th(rdev);
824 	cancel_delayed_work_sync(&rdev->dbq_pacing_work);
825 	if (rdev->dbr_recovery_on)
826 		goto recovery_on;
827 	if (pacing_save > rdev->dbr_def_do_pacing) {
828 		/* Double the do_pacing value during the congestion */
829 		pacing_save = pacing_save << 1;
830 	} else {
831 		/*
832 		 * when a new congestion is detected increase the do_pacing
833 		 * by 8 times. And also increase the pacing_th by 4 times. The
834 		 * reason to increase pacing_th is to give more space for the
835 		 * queue to oscillate down without getting empty, but also more
836 		 * room for the queue to increase without causing another alarm.
837 		 */
838 		pacing_save = pacing_save << 3;
839 		pacing_data->pacing_th = rdev->pacing_algo_th * 4;
840 	}
841 
842 	if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING)
843 		pacing_save = BNXT_RE_MAX_DBR_DO_PACING;
844 
845 	pacing_data->do_pacing = pacing_save;
846 	rdev->do_pacing_save = pacing_data->do_pacing;
847 	pacing_data->alarm_th =
848 		pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx);
849 recovery_on:
850 	schedule_delayed_work(&rdev->dbq_pacing_work,
851 			      msecs_to_jiffies(rdev->dbq_pacing_time));
852 	rdev->dbr_sw_stats->dbq_pacing_alerts++;
853 	mutex_unlock(&rdev->dbq_lock);
854 }
855 
856 static void bnxt_re_pacing_timer_exp(struct work_struct *work)
857 {
858 	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
859 						dbq_pacing_work.work);
860 	struct bnxt_qplib_db_pacing_data *pacing_data;
861 	u32 read_val, fifo_occup;
862 	struct bnxt_qplib_nq *nq;
863 
864 	if (!mutex_trylock(&rdev->dbq_lock))
865 		return;
866 
867 	pacing_data = rdev->qplib_res.pacing_data;
868 	read_val = readl_fbsd(rdev->en_dev->softc , rdev->dbr_db_fifo_reg_off, 0);
869 	fifo_occup = pacing_data->fifo_max_depth -
870 		     ((read_val & pacing_data->fifo_room_mask) >>
871 		      pacing_data->fifo_room_shift);
872 
873 	if (fifo_occup > pacing_data->pacing_th)
874 		goto restart_timer;
875 
876 	/*
877 	 * Instead of immediately going back to the default do_pacing
878 	 * reduce it by 1/8 times and restart the timer.
879 	 */
880 	pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3);
881 	pacing_data->do_pacing = max_t(u32, rdev->dbr_def_do_pacing, pacing_data->do_pacing);
882 	/*
883 	 * If the fifo_occup is less than the interrupt enable threshold
884 	 * enable the interrupt on the primary PF.
885 	 */
886 	if (rdev->dbq_int_disable && fifo_occup < rdev->pacing_en_int_th) {
887 		if (bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) {
888 			if (!rdev->chip_ctx->modes.dbr_pacing_v0) {
889 				nq = &rdev->nqr.nq[0];
890 				bnxt_re_set_dbq_throttling_reg(rdev, nq->ring_id,
891 							       rdev->dbq_watermark);
892 				rdev->dbr_sw_stats->dbq_int_en++;
893 				rdev->dbq_int_disable = false;
894 			}
895 		}
896 	}
897 	if (pacing_data->do_pacing <= rdev->dbr_def_do_pacing) {
898 		bnxt_re_set_default_pacing_data(rdev);
899 		rdev->dbr_sw_stats->dbq_pacing_complete++;
900 		goto dbq_unlock;
901 	}
902 restart_timer:
903 	schedule_delayed_work(&rdev->dbq_pacing_work,
904 			      msecs_to_jiffies(rdev->dbq_pacing_time));
905 	bnxt_re_update_do_pacing_slabs(rdev);
906 	rdev->dbr_sw_stats->dbq_pacing_resched++;
907 dbq_unlock:
908 	rdev->do_pacing_save = pacing_data->do_pacing;
909 	mutex_unlock(&rdev->dbq_lock);
910 }
911 
912 void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev)
913 {
914 	struct bnxt_qplib_db_pacing_data *pacing_data;
915 
916 	if (!rdev->dbr_pacing)
917 		return;
918 	mutex_lock(&rdev->dbq_lock);
919 	pacing_data = rdev->qplib_res.pacing_data;
920 
921 	/*
922 	 * Increase the alarm_th to max so that other user lib instances do not
923 	 * keep alerting the driver.
924 	 */
925 	pacing_data->alarm_th = pacing_data->fifo_max_depth;
926 	pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING;
927 	cancel_work_sync(&rdev->dbq_fifo_check_work);
928 	schedule_work(&rdev->dbq_fifo_check_work);
929 	mutex_unlock(&rdev->dbq_lock);
930 }
931 
932 void bnxt_re_schedule_dbq_event(struct bnxt_qplib_res *res)
933 {
934 	struct bnxt_re_dbq_work *dbq_work;
935 	struct bnxt_re_dev *rdev;
936 
937 	rdev = container_of(res, struct bnxt_re_dev, qplib_res);
938 
939 	atomic_set(&rdev->dbq_intr_running, 1);
940 
941 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
942 		goto exit;
943 	/* Run the loop to send dbq event to other functions
944 	 * for newer FW
945 	 */
946 	if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) &&
947 	    !rdev->chip_ctx->modes.dbr_pacing_v0)
948 		bnxt_re_set_dbq_throttling_for_non_primary(rdev);
949 
950 	dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC);
951 	if (!dbq_work)
952 		goto exit;
953 	dbq_work->rdev = rdev;
954 	dbq_work->event = BNXT_RE_DBQ_EVENT_SCHED;
955 	INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task);
956 	queue_work(rdev->dbq_wq, &dbq_work->work);
957 	rdev->dbr_sw_stats->dbq_int_recv++;
958 	rdev->dbq_int_disable = true;
959 exit:
960 	atomic_set(&rdev->dbq_intr_running, 0);
961 }
962 
963 static void bnxt_re_free_msix(struct bnxt_re_dev *rdev)
964 {
965 	struct bnxt_en_dev *en_dev = rdev->en_dev;
966 	int rc;
967 
968 	rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
969 	if (rc)
970 		dev_err(rdev_to_dev(rdev), "netdev %p free_msix failed! rc = 0x%x",
971 			rdev->netdev, rc);
972 }
973 
974 static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
975 {
976 	struct bnxt_en_dev *en_dev = rdev->en_dev;
977 	int rc = 0, num_msix_want, num_msix_got;
978 	struct bnxt_msix_entry *entry;
979 
980 	/*
981 	 * Request MSIx based on the function type. This is
982 	 * a temporory solution to enable max VFs when NPAR is
983 	 * enabled.
984 	 * TODO - change the scheme with an adapter specific check
985 	 * as the latest adapters can support more NQs. For now
986 	 * this change satisfy all adapter versions.
987 	 */
988 
989 	if (rdev->is_virtfn)
990 		num_msix_want = BNXT_RE_MAX_MSIX_VF;
991 	else if (BNXT_EN_NPAR(en_dev))
992 		num_msix_want = BNXT_RE_MAX_MSIX_NPAR_PF;
993 	else if (_is_chip_gen_p5_p7(rdev->chip_ctx))
994 		num_msix_want = rdev->num_msix_requested ?: BNXT_RE_MAX_MSIX_GEN_P5_PF;
995 	else
996 		num_msix_want = BNXT_RE_MAX_MSIX_PF;
997 
998 	/*
999 	 * Since MSIX vectors are used for both NQs and CREQ, we should try to
1000 	 * allocate num_online_cpus + 1 by taking into account the CREQ. This
1001 	 * leaves the number of MSIX vectors for NQs match the number of CPUs
1002 	 * and allows the system to be fully utilized
1003 	 */
1004 	num_msix_want = min_t(u32, num_msix_want, num_online_cpus() + 1);
1005 	num_msix_want = min_t(u32, num_msix_want, BNXT_RE_MAX_MSIX);
1006 	num_msix_want = max_t(u32, num_msix_want, BNXT_RE_MIN_MSIX);
1007 
1008 	entry = rdev->nqr.msix_entries;
1009 
1010 	num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
1011 							 entry, num_msix_want);
1012 	if (num_msix_got < BNXT_RE_MIN_MSIX) {
1013 		rc = -EINVAL;
1014 		goto done;
1015 	}
1016 	if (num_msix_got != num_msix_want)
1017 		dev_warn(rdev_to_dev(rdev),
1018 			 "bnxt_request_msix: wanted %d vectors, got %d\n",
1019 			 num_msix_want, num_msix_got);
1020 
1021 	rdev->nqr.num_msix = num_msix_got;
1022 	return 0;
1023 done:
1024 	if (num_msix_got)
1025 		bnxt_re_free_msix(rdev);
1026 	return rc;
1027 }
1028 
1029 static int  __wait_for_ib_unregister(struct bnxt_re_dev *rdev,
1030 				     struct bnxt_re_en_dev_info *en_info)
1031 {
1032 	u64 timeout = 0;
1033 	u32 cur_prod = 0, cur_cons = 0;
1034 	int retry = 0, rc = 0, ret = 0;
1035 
1036 	cur_prod = rdev->rcfw.cmdq.hwq.prod;
1037 	cur_cons = rdev->rcfw.cmdq.hwq.cons;
1038 	timeout = msecs_to_jiffies(BNXT_RE_RECOVERY_IB_UNINIT_WAIT_TIME_MS);
1039 	retry = BNXT_RE_RECOVERY_IB_UNINIT_WAIT_RETRY;
1040 	/* During module exit, increase timeout ten-fold to 100 mins to wait
1041 	 * as long as possible for ib_unregister() to complete
1042 	 */
1043 	if (rdev->mod_exit)
1044 		retry *= 10;
1045 	do {
1046 		/*
1047 		 * Since the caller of this function invokes with bnxt_re_mutex held,
1048 		 * release it to avoid holding a lock while in wait / sleep mode.
1049 		 */
1050 		mutex_unlock(&bnxt_re_mutex);
1051 		rc = wait_event_timeout(en_info->waitq,
1052 					en_info->ib_uninit_done,
1053 					timeout);
1054 		mutex_lock(&bnxt_re_mutex);
1055 
1056 		if (!bnxt_re_is_rdev_valid(rdev))
1057 			break;
1058 
1059 		if (rc)
1060 			break;
1061 
1062 		if (!RCFW_NO_FW_ACCESS(&rdev->rcfw)) {
1063 			/* No need to check for cmdq stall during module exit,
1064 			 * wait for ib unregister to complete
1065 			 */
1066 			if (!rdev->mod_exit)
1067 				ret = __check_cmdq_stall(&rdev->rcfw, &cur_prod, &cur_cons);
1068 			if (ret || en_info->ib_uninit_done)
1069 				break;
1070 		}
1071 	} while (retry--);
1072 
1073 	return rc;
1074 }
1075 
1076 static int bnxt_re_handle_start(struct auxiliary_device *adev)
1077 {
1078 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
1079 	struct bnxt_re_dev *rdev = NULL;
1080 	struct ifnet *real_dev;
1081 	struct bnxt_en_dev *en_dev;
1082 	struct ifnet *netdev;
1083 	int rc = 0;
1084 
1085 	if (!en_info || !en_info->en_dev) {
1086 		pr_err("Start, bad en_info or en_dev\n");
1087 		return -EINVAL;
1088 	}
1089 	netdev = en_info->en_dev->net;
1090 	if (en_info->rdev) {
1091 		dev_info(rdev_to_dev(en_info->rdev),
1092 			 "%s: Device is already added adev %p rdev: %p\n",
1093 			 __func__, adev, en_info->rdev);
1094 		return 0;
1095 	}
1096 
1097 	en_dev = en_info->en_dev;
1098 	real_dev = rdma_vlan_dev_real_dev(netdev);
1099 	if (!real_dev)
1100 		real_dev = netdev;
1101 	rc = bnxt_re_add_device(&rdev, real_dev,
1102 				en_info->gsi_mode,
1103 				BNXT_RE_POST_RECOVERY_INIT,
1104 				en_info->wqe_mode,
1105 				en_info->num_msix_requested, adev);
1106 	if (rc) {
1107 		/* Add device failed. Unregister the device.
1108 		 * This has to be done explicitly as
1109 		 * bnxt_re_stop would not have unregistered
1110 		 */
1111 		rtnl_lock();
1112 		en_dev->en_ops->bnxt_unregister_device(en_dev, BNXT_ROCE_ULP);
1113 		rtnl_unlock();
1114 		mutex_lock(&bnxt_re_dev_lock);
1115 		gadd_dev_inprogress--;
1116 		mutex_unlock(&bnxt_re_dev_lock);
1117 		return rc;
1118 	}
1119 	rdev->adev = adev;
1120 	rtnl_lock();
1121 	bnxt_re_get_link_speed(rdev);
1122 	rtnl_unlock();
1123 	rc = bnxt_re_ib_init(rdev);
1124 	if (rc) {
1125 		dev_err(rdev_to_dev(rdev), "Failed ib_init\n");
1126 		return rc;
1127 	}
1128 	bnxt_re_ib_init_2(rdev);
1129 
1130 	return rc;
1131 }
1132 
1133 static void bnxt_re_stop(void *handle)
1134 {
1135 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
1136 	struct ifnet *netdev;
1137 	struct bnxt_re_dev *rdev;
1138 	struct bnxt_en_dev *en_dev;
1139 	int rc = 0;
1140 
1141 	rtnl_unlock();
1142 	mutex_lock(&bnxt_re_mutex);
1143 	if (!en_info || !en_info->en_dev) {
1144 		pr_err("Stop, bad en_info or en_dev\n");
1145 		goto exit;
1146 	}
1147 	netdev = en_info->en_dev->net;
1148 	rdev = en_info->rdev;
1149 	if (!rdev)
1150 		goto exit;
1151 
1152 	if (!bnxt_re_is_rdev_valid(rdev))
1153 		goto exit;
1154 
1155 	/*
1156 	 * Check if fw has undergone reset or is in a fatal condition.
1157 	 * If so, set flags so that no further commands are sent down to FW
1158 	 */
1159 	en_dev = rdev->en_dev;
1160 	if (en_dev->en_state & BNXT_STATE_FW_FATAL_COND ||
1161 	    en_dev->en_state & BNXT_STATE_FW_RESET_DET) {
1162 		/* Set rcfw flag to control commands send to Bono */
1163 		set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
1164 		/* Set bnxt_re flag to control commands send via L2 driver */
1165 		set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
1166 		wake_up_all(&rdev->rcfw.cmdq.waitq);
1167 	}
1168 
1169 	if (test_bit(BNXT_RE_FLAG_STOP_IN_PROGRESS, &rdev->flags))
1170 		goto exit;
1171 	set_bit(BNXT_RE_FLAG_STOP_IN_PROGRESS, &rdev->flags);
1172 
1173 	en_info->wqe_mode = rdev->chip_ctx->modes.wqe_mode;
1174 	en_info->gsi_mode = rdev->gsi_ctx.gsi_qp_mode;
1175 	en_info->num_msix_requested = rdev->num_msix_requested;
1176 	en_info->ib_uninit_done = false;
1177 
1178 	if (rdev->dbr_pacing)
1179 		bnxt_re_set_pacing_dev_state(rdev);
1180 
1181 	dev_info(rdev_to_dev(rdev), "%s: L2 driver notified to stop."
1182 		 "Attempting to stop and Dispatching event "
1183 		 "to inform the stack\n", __func__);
1184 	init_waitqueue_head(&en_info->waitq);
1185 	/* Schedule a work item to handle IB UNINIT for recovery */
1186 	bnxt_re_schedule_work(rdev, NETDEV_UNREGISTER,
1187 			      NULL, netdev, rdev->adev);
1188 	rc = __wait_for_ib_unregister(rdev, en_info);
1189 	if (!bnxt_re_is_rdev_valid(rdev))
1190 		goto exit;
1191 	if (!rc) {
1192 		dev_info(rdev_to_dev(rdev), "%s: Attempt to stop failed\n",
1193 			 __func__);
1194 		bnxt_re_detach_err_device(rdev);
1195 		goto exit;
1196 	}
1197 	bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, rdev->adev);
1198 exit:
1199 	mutex_unlock(&bnxt_re_mutex);
1200 	/* Take rtnl_lock before return, bnxt_re_stop is called with rtnl_lock */
1201 	rtnl_lock();
1202 
1203 	return;
1204 }
1205 
1206 static void bnxt_re_start(void *handle)
1207 {
1208 	rtnl_unlock();
1209 	mutex_lock(&bnxt_re_mutex);
1210 	if (bnxt_re_handle_start((struct auxiliary_device *)handle))
1211 		pr_err("Failed to start RoCE device");
1212 	mutex_unlock(&bnxt_re_mutex);
1213 	/* Take rtnl_lock before return, bnxt_re_start is called with rtnl_lock */
1214 	rtnl_lock();
1215 	return;
1216 }
1217 
1218 static void bnxt_re_shutdown(void *p)
1219 {
1220 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(p);
1221 	struct bnxt_re_dev *rdev;
1222 
1223 	if (!en_info) {
1224 		pr_err("Shutdown, bad en_info\n");
1225 		return;
1226 	}
1227 	rtnl_unlock();
1228 	mutex_lock(&bnxt_re_mutex);
1229 	rdev = en_info->rdev;
1230 	if (!rdev || !bnxt_re_is_rdev_valid(rdev))
1231 		goto exit;
1232 
1233 	/* rtnl_lock held by L2 before coming here */
1234 	bnxt_re_stopqps_and_ib_uninit(rdev);
1235 	bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, rdev->adev);
1236 exit:
1237 	mutex_unlock(&bnxt_re_mutex);
1238 	rtnl_lock();
1239 	return;
1240 }
1241 
1242 static void bnxt_re_stop_irq(void *handle)
1243 {
1244 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
1245 	struct bnxt_qplib_rcfw *rcfw = NULL;
1246 	struct bnxt_re_dev *rdev;
1247 	struct bnxt_qplib_nq *nq;
1248 	int indx;
1249 
1250 	if (!en_info) {
1251 		pr_err("Stop irq, bad en_info\n");
1252 		return;
1253 	}
1254 	rdev = en_info->rdev;
1255 
1256 	if (!rdev)
1257 		return;
1258 
1259 	rcfw = &rdev->rcfw;
1260 	for (indx = 0; indx < rdev->nqr.max_init; indx++) {
1261 		nq = &rdev->nqr.nq[indx];
1262 		mutex_lock(&nq->lock);
1263 		bnxt_qplib_nq_stop_irq(nq, false);
1264 		mutex_unlock(&nq->lock);
1265 	}
1266 
1267 	if (test_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags))
1268 		bnxt_qplib_rcfw_stop_irq(rcfw, false);
1269 }
1270 
1271 static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
1272 {
1273 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
1274 	struct bnxt_msix_entry *msix_ent = NULL;
1275 	struct bnxt_qplib_rcfw *rcfw = NULL;
1276 	struct bnxt_re_dev *rdev;
1277 	struct bnxt_qplib_nq *nq;
1278 	int indx, rc, vec;
1279 
1280 	if (!en_info) {
1281 		pr_err("Start irq, bad en_info\n");
1282 		return;
1283 	}
1284 	rdev = en_info->rdev;
1285 	if (!rdev)
1286 		return;
1287 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1288 		return;
1289 	msix_ent = rdev->nqr.msix_entries;
1290 	rcfw = &rdev->rcfw;
1291 
1292 	if (!ent) {
1293 		/* Not setting the f/w timeout bit in rcfw.
1294 		 * During the driver unload the first command
1295 		 * to f/w will timeout and that will set the
1296 		 * timeout bit.
1297 		 */
1298 		dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n");
1299 		return;
1300 	}
1301 
1302 	/* Vectors may change after restart, so update with new vectors
1303 	 * in device structure.
1304 	 */
1305 	for (indx = 0; indx < rdev->nqr.num_msix; indx++)
1306 		rdev->nqr.msix_entries[indx].vector = ent[indx].vector;
1307 
1308 	if (test_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags)) {
1309 		rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
1310 					       false);
1311 		if (rc) {
1312 			dev_warn(rdev_to_dev(rdev),
1313 				 "Failed to reinit CREQ\n");
1314 			return;
1315 		}
1316 	}
1317 	for (indx = 0 ; indx < rdev->nqr.max_init; indx++) {
1318 		nq = &rdev->nqr.nq[indx];
1319 		vec = indx + 1;
1320 		rc = bnxt_qplib_nq_start_irq(nq, indx, msix_ent[vec].vector,
1321 					     false);
1322 		if (rc) {
1323 			dev_warn(rdev_to_dev(rdev),
1324 				 "Failed to reinit NQ index %d\n", indx);
1325 			return;
1326 		}
1327 	}
1328 }
1329 
1330 /*
1331  * Except for ulp_async_notifier, the remaining ulp_ops
1332  * below are called with rtnl_lock held
1333  */
1334 static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
1335 	.ulp_async_notifier = bnxt_re_async_notifier,
1336 	.ulp_stop = bnxt_re_stop,
1337 	.ulp_start = bnxt_re_start,
1338 	.ulp_shutdown = bnxt_re_shutdown,
1339 	.ulp_irq_stop = bnxt_re_stop_irq,
1340 	.ulp_irq_restart = bnxt_re_start_irq,
1341 };
1342 
1343 static inline const char *bnxt_re_netevent(unsigned long event)
1344 {
1345 	BNXT_RE_NETDEV_EVENT(event, NETDEV_UP);
1346 	BNXT_RE_NETDEV_EVENT(event, NETDEV_DOWN);
1347 	BNXT_RE_NETDEV_EVENT(event, NETDEV_CHANGE);
1348 	BNXT_RE_NETDEV_EVENT(event, NETDEV_REGISTER);
1349 	BNXT_RE_NETDEV_EVENT(event, NETDEV_UNREGISTER);
1350 	BNXT_RE_NETDEV_EVENT(event, NETDEV_CHANGEADDR);
1351 	return "Unknown";
1352 }
1353 
1354 /* RoCE -> Net driver */
1355 
1356 /* Driver registration routines used to let the networking driver (bnxt_en)
1357  * to know that the RoCE driver is now installed */
1358 static void bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev)
1359 {
1360 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1361 	int rc;
1362 
1363 	rtnl_lock();
1364 	rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
1365 						    BNXT_ROCE_ULP);
1366 	rtnl_unlock();
1367 	if (rc)
1368 		dev_err(rdev_to_dev(rdev), "netdev %p unregister failed! rc = 0x%x",
1369 			rdev->en_dev->net, rc);
1370 
1371 	clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1372 }
1373 
1374 static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
1375 {
1376 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1377 	int rc = 0;
1378 
1379 	rtnl_lock();
1380 	rc = en_dev->en_ops->bnxt_register_device(en_dev,
1381 						  BNXT_ROCE_ULP,
1382 						  &bnxt_re_ulp_ops,
1383 						  rdev->adev);
1384 	rtnl_unlock();
1385 	if (rc) {
1386 		dev_err(rdev_to_dev(rdev), "netdev %p register failed! rc = 0x%x",
1387 			rdev->netdev, rc);
1388 		return rc;
1389 	}
1390 
1391 	return rc;
1392 }
1393 
1394 static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
1395 {
1396 	struct bnxt_qplib_chip_ctx *cctx;
1397 	struct bnxt_en_dev *en_dev;
1398 	struct bnxt_qplib_res *res;
1399 	u32 l2db_len = 0;
1400 	u32 offset = 0;
1401 	u32 barlen;
1402 	int rc;
1403 
1404 	res = &rdev->qplib_res;
1405 	en_dev = rdev->en_dev;
1406 	cctx = rdev->chip_ctx;
1407 
1408 	/* Issue qcfg */
1409 	rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset);
1410 	if (rc)
1411 		dev_info(rdev_to_dev(rdev),
1412 			 "Couldn't get DB bar size, Low latency framework is disabled\n");
1413 	/* set register offsets for both UC and WC */
1414 	if (_is_chip_p7(cctx))
1415 		res->dpi_tbl.ucreg.offset = offset;
1416 	else
1417 		res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
1418 							 BNXT_QPLIB_DBR_PF_DB_OFFSET;
1419 	res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
1420 
1421 	/* If WC mapping is disabled by L2 driver then en_dev->l2_db_size
1422 	 * is equal to the DB-Bar actual size. This indicates that L2
1423 	 * is mapping entire bar as UC-. RoCE driver can't enable WC mapping
1424 	 * in such cases and DB-push will be disabled.
1425 	 */
1426 	barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION);
1427 	if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) {
1428 		res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
1429 		dev_info(rdev_to_dev(rdev),
1430 			 "Low latency framework is enabled\n");
1431 	}
1432 
1433 	return;
1434 }
1435 
1436 static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
1437 {
1438 	struct bnxt_qplib_chip_ctx *cctx;
1439 	struct bnxt_en_dev *en_dev;
1440 
1441 	en_dev = rdev->en_dev;
1442 	cctx = rdev->chip_ctx;
1443 	cctx->modes.wqe_mode = _is_chip_gen_p5_p7(rdev->chip_ctx) ?
1444 					mode : BNXT_QPLIB_WQE_MODE_STATIC;
1445 	cctx->modes.te_bypass = false;
1446 	if (bnxt_re_hwrm_qcaps(rdev))
1447 		dev_err(rdev_to_dev(rdev),
1448 			"Failed to query hwrm qcaps\n");
1449 	 /*
1450 	  * TODO: Need a better mechanism for spreading of the
1451 	  * 512 extended PPP pages in the presence of VF and
1452 	  * NPAR, until then not enabling push
1453 	  */
1454 	if (_is_chip_p7(rdev->chip_ctx) && cctx->modes.db_push) {
1455 		if (rdev->is_virtfn || BNXT_EN_NPAR(en_dev))
1456 			cctx->modes.db_push = false;
1457 	}
1458 
1459 	rdev->roce_mode = en_dev->flags & BNXT_EN_FLAG_ROCE_CAP;
1460 	dev_dbg(rdev_to_dev(rdev),
1461 		"RoCE is supported on the device - caps:0x%x",
1462 		rdev->roce_mode);
1463 	if (!_is_chip_gen_p5_p7(rdev->chip_ctx))
1464 		rdev->roce_mode = BNXT_RE_FLAG_ROCEV2_CAP;
1465 	cctx->hw_stats_size = en_dev->hw_ring_stats_size;
1466 }
1467 
1468 static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
1469 {
1470 	struct bnxt_qplib_chip_ctx *chip_ctx;
1471 	struct bnxt_qplib_res *res;
1472 
1473 	if (!rdev->chip_ctx)
1474 		return;
1475 
1476 	res = &rdev->qplib_res;
1477 	bnxt_qplib_unmap_db_bar(res);
1478 
1479 	kfree(res->hctx);
1480 	res->rcfw = NULL;
1481 	kfree(rdev->dev_attr);
1482 	rdev->dev_attr = NULL;
1483 
1484 	chip_ctx = rdev->chip_ctx;
1485 	rdev->chip_ctx = NULL;
1486 	res->cctx = NULL;
1487 	res->hctx = NULL;
1488 	res->pdev = NULL;
1489 	res->netdev = NULL;
1490 	kfree(chip_ctx);
1491 }
1492 
1493 static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
1494 {
1495 	struct bnxt_qplib_chip_ctx *chip_ctx;
1496 	struct bnxt_en_dev *en_dev;
1497 	int rc;
1498 
1499 	en_dev = rdev->en_dev;
1500 	/* Supply pci device to qplib */
1501 	rdev->qplib_res.pdev = en_dev->pdev;
1502 	rdev->qplib_res.netdev = rdev->netdev;
1503 	rdev->qplib_res.en_dev = en_dev;
1504 
1505 	chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
1506 	if (!chip_ctx)
1507 		return -ENOMEM;
1508 	rdev->chip_ctx = chip_ctx;
1509 	rdev->qplib_res.cctx = chip_ctx;
1510 	rc = bnxt_re_query_hwrm_intf_version(rdev);
1511 	if (rc)
1512 		goto fail;
1513 	rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL);
1514 	if (!rdev->dev_attr) {
1515 		rc = -ENOMEM;
1516 		goto fail;
1517 	}
1518 	rdev->qplib_res.dattr = rdev->dev_attr;
1519 	rdev->qplib_res.rcfw = &rdev->rcfw;
1520 	rdev->qplib_res.is_vf = rdev->is_virtfn;
1521 
1522 	rdev->qplib_res.hctx = kzalloc(sizeof(*rdev->qplib_res.hctx),
1523 				       GFP_KERNEL);
1524 	if (!rdev->qplib_res.hctx) {
1525 		rc = -ENOMEM;
1526 		goto fail;
1527 	}
1528 	bnxt_re_set_drv_mode(rdev, wqe_mode);
1529 
1530 	bnxt_re_set_db_offset(rdev);
1531 	rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
1532 	if (rc)
1533 		goto fail;
1534 
1535 	rc = bnxt_qplib_enable_atomic_ops_to_root(en_dev->pdev);
1536 	if (rc)
1537 		dev_dbg(rdev_to_dev(rdev),
1538 			"platform doesn't support global atomics");
1539 
1540 	return 0;
1541 fail:
1542 	kfree(rdev->chip_ctx);
1543 	rdev->chip_ctx = NULL;
1544 
1545 	kfree(rdev->dev_attr);
1546 	rdev->dev_attr = NULL;
1547 
1548 	kfree(rdev->qplib_res.hctx);
1549 	rdev->qplib_res.hctx = NULL;
1550 	return rc;
1551 }
1552 
1553 static u16 bnxt_re_get_rtype(struct bnxt_re_dev *rdev) {
1554 	return _is_chip_gen_p5_p7(rdev->chip_ctx) ?
1555 	       HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ :
1556 	       HWRM_RING_ALLOC_INPUT_RING_TYPE_ROCE_CMPL;
1557 }
1558 
1559 static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
1560 {
1561 	int rc = -EINVAL;
1562 	struct hwrm_ring_free_input req = {0};
1563 	struct hwrm_ring_free_output resp;
1564 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1565 	struct bnxt_fw_msg fw_msg;
1566 
1567 	if (!en_dev)
1568 		return rc;
1569 
1570 	/* To avoid unnecessary error messages during recovery.
1571 	 * HW is anyway in error state. So dont send down the command */
1572 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1573 		return 0;
1574 
1575 	/* allocation had failed, no need to issue hwrm */
1576 	if (fw_ring_id == 0xffff)
1577 		return 0;
1578 
1579 	memset(&fw_msg, 0, sizeof(fw_msg));
1580 
1581 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
1582 	req.ring_type = bnxt_re_get_rtype(rdev);
1583 	req.ring_id = cpu_to_le16(fw_ring_id);
1584 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1585 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1586 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1587 	if (rc) {
1588 		dev_err(rdev_to_dev(rdev),
1589 			"Failed to free HW ring with rc = 0x%x", rc);
1590 		return rc;
1591 	}
1592 	dev_dbg(rdev_to_dev(rdev), "HW ring freed with id = 0x%x\n",
1593 		fw_ring_id);
1594 
1595 	return rc;
1596 }
1597 
1598 static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
1599 				  struct bnxt_re_ring_attr *ring_attr,
1600 				  u16 *fw_ring_id)
1601 {
1602 	int rc = -EINVAL;
1603 	struct hwrm_ring_alloc_input req = {0};
1604 	struct hwrm_ring_alloc_output resp;
1605 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1606 	struct bnxt_fw_msg fw_msg;
1607 
1608 	if (!en_dev)
1609 		return rc;
1610 
1611 	memset(&fw_msg, 0, sizeof(fw_msg));
1612 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
1613 	req.flags = cpu_to_le16(ring_attr->flags);
1614 	req.enables = 0;
1615 	req.page_tbl_addr =  cpu_to_le64(ring_attr->dma_arr[0]);
1616 	if (ring_attr->pages > 1) {
1617 		/* Page size is in log2 units */
1618 		req.page_size = BNXT_PAGE_SHIFT;
1619 		req.page_tbl_depth = 1;
1620 	} else {
1621 		req.page_size = 4;
1622 		req.page_tbl_depth = 0;
1623 	}
1624 
1625 	req.fbo = 0;
1626 	/* Association of ring index with doorbell index and MSIX number */
1627 	req.logical_id = cpu_to_le16(ring_attr->lrid);
1628 	req.length = cpu_to_le32(ring_attr->depth + 1);
1629 	req.ring_type = ring_attr->type;
1630 	req.int_mode = ring_attr->mode;
1631 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1632 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1633 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1634 	if (rc) {
1635 		dev_err(rdev_to_dev(rdev),
1636 			"Failed to allocate HW ring with rc = 0x%x", rc);
1637 		return rc;
1638 	}
1639 	*fw_ring_id = le16_to_cpu(resp.ring_id);
1640 	dev_dbg(rdev_to_dev(rdev),
1641 		"HW ring allocated with id = 0x%x at slot 0x%x",
1642 		resp.ring_id, ring_attr->lrid);
1643 
1644 	return rc;
1645 }
1646 
1647 static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
1648 				      u32 fw_stats_ctx_id, u16 tid)
1649 {
1650 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1651 	struct hwrm_stat_ctx_free_input req = {0};
1652 	struct hwrm_stat_ctx_free_output resp;
1653 	struct bnxt_fw_msg fw_msg;
1654 	int rc = -EINVAL;
1655 
1656 	if (!en_dev)
1657 		return rc;
1658 
1659 	/* To avoid unnecessary error messages during recovery.
1660 	 * HW is anyway in error state. So dont send down the command */
1661 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1662 		return 0;
1663 	memset(&fw_msg, 0, sizeof(fw_msg));
1664 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, tid);
1665 	req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
1666 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1667 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1668 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1669 	if (rc) {
1670 		dev_err(rdev_to_dev(rdev),
1671 			"Failed to free HW stats ctx with rc = 0x%x", rc);
1672 		return rc;
1673 	}
1674 	dev_dbg(rdev_to_dev(rdev),
1675 		"HW stats ctx freed with id = 0x%x", fw_stats_ctx_id);
1676 
1677 	return rc;
1678 }
1679 
1680 static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, u16 tid)
1681 {
1682 	struct hwrm_stat_ctx_alloc_output resp = {};
1683 	struct hwrm_stat_ctx_alloc_input req = {};
1684 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1685 	struct bnxt_qplib_stats *stat;
1686 	struct bnxt_qplib_ctx *hctx;
1687 	struct bnxt_fw_msg fw_msg;
1688 	int rc = 0;
1689 
1690 	hctx = rdev->qplib_res.hctx;
1691 	stat = (tid == 0xffff) ? &hctx->stats : &hctx->stats2;
1692 	stat->fw_id = INVALID_STATS_CTX_ID;
1693 
1694 	if (!en_dev)
1695 		return -EINVAL;
1696 
1697 	memset(&fw_msg, 0, sizeof(fw_msg));
1698 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1699 			      HWRM_STAT_CTX_ALLOC, -1, tid);
1700 	req.update_period_ms = cpu_to_le32(1000);
1701 	req.stats_dma_length = rdev->chip_ctx->hw_stats_size;
1702 	req.stats_dma_addr = cpu_to_le64(stat->dma_map);
1703 	req.stat_ctx_flags = HWRM_STAT_CTX_ALLOC_INPUT_STAT_CTX_FLAGS_ROCE;
1704 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1705 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1706 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1707 	if (rc) {
1708 		dev_err(rdev_to_dev(rdev),
1709 			"Failed to allocate HW stats ctx, rc = 0x%x", rc);
1710 		return rc;
1711 	}
1712 	stat->fw_id = le32_to_cpu(resp.stat_ctx_id);
1713 	dev_dbg(rdev_to_dev(rdev), "HW stats ctx allocated with id = 0x%x",
1714 		stat->fw_id);
1715 
1716 	return rc;
1717 }
1718 
1719 static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev)
1720 {
1721 	const struct bnxt_en_ops *en_ops;
1722 
1723 	if (rdev->is_virtfn ||
1724 	    test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1725 		return;
1726 
1727 	memset(rdev->event_bitmap, 0, sizeof(rdev->event_bitmap));
1728 	en_ops = rdev->en_dev->en_ops;
1729 	if (en_ops->bnxt_register_fw_async_events
1730 	    (rdev->en_dev, BNXT_ROCE_ULP,
1731 	     (unsigned long *)rdev->event_bitmap,
1732 	      HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE))
1733 		dev_err(rdev_to_dev(rdev),
1734 			"Failed to unregister async event");
1735 }
1736 
1737 static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev)
1738 {
1739 	const struct bnxt_en_ops *en_ops;
1740 
1741 	if (rdev->is_virtfn)
1742 		return;
1743 
1744 	rdev->event_bitmap[0] |=
1745 		BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE) |
1746 		BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY);
1747 
1748 	rdev->event_bitmap[2] |=
1749 	   BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT - 64);
1750 	rdev->event_bitmap[2] |=
1751 		BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD - 64) |
1752 		BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE - 64);
1753 	en_ops = rdev->en_dev->en_ops;
1754 	if (en_ops->bnxt_register_fw_async_events
1755 	    (rdev->en_dev, BNXT_ROCE_ULP,
1756 	     (unsigned long *)rdev->event_bitmap,
1757 	      HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE))
1758 		dev_err(rdev_to_dev(rdev),
1759 			"Failed to reg Async event");
1760 }
1761 
1762 static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
1763 {
1764 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1765 	struct hwrm_ver_get_output resp = {0};
1766 	struct hwrm_ver_get_input req = {0};
1767 	struct bnxt_qplib_chip_ctx *cctx;
1768 	struct bnxt_fw_msg fw_msg;
1769 	int rc = 0;
1770 
1771 	memset(&fw_msg, 0, sizeof(fw_msg));
1772 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1773 			      HWRM_VER_GET, -1, -1);
1774 	req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
1775 	req.hwrm_intf_min = HWRM_VERSION_MINOR;
1776 	req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
1777 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1778 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1779 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1780 	if (rc) {
1781 		dev_err(rdev_to_dev(rdev),
1782 			"Failed to query HW version, rc = 0x%x", rc);
1783 		return rc;
1784 	}
1785 	cctx = rdev->chip_ctx;
1786 	cctx->hwrm_intf_ver = (u64) le16_to_cpu(resp.hwrm_intf_major) << 48 |
1787 			      (u64) le16_to_cpu(resp.hwrm_intf_minor) << 32 |
1788 			      (u64) le16_to_cpu(resp.hwrm_intf_build) << 16 |
1789 				    le16_to_cpu(resp.hwrm_intf_patch);
1790 
1791 	cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
1792 
1793 	if (!cctx->hwrm_cmd_max_timeout)
1794 		cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
1795 
1796 	cctx->chip_num = le16_to_cpu(resp.chip_num);
1797 	cctx->chip_rev = resp.chip_rev;
1798 	cctx->chip_metal = resp.chip_metal;
1799 	return 0;
1800 }
1801 
1802 /* Query device config using common hwrm */
1803 static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
1804 			     u32 *offset)
1805 {
1806 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1807 	struct hwrm_func_qcfg_output resp = {0};
1808 	struct hwrm_func_qcfg_input req = {0};
1809 	struct bnxt_fw_msg fw_msg;
1810 	int rc;
1811 
1812 	memset(&fw_msg, 0, sizeof(fw_msg));
1813 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1814 			      HWRM_FUNC_QCFG, -1, -1);
1815 	req.fid = cpu_to_le16(0xffff);
1816 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1817 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1818 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1819 	if (rc) {
1820 		dev_err(rdev_to_dev(rdev),
1821 			"Failed to query config, rc = %#x", rc);
1822 		return rc;
1823 	}
1824 
1825 	*db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024);
1826 	*offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024);
1827 	return 0;
1828 }
1829 
1830 /* Query function capabilities using common hwrm */
1831 int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
1832 {
1833 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1834 	struct hwrm_func_qcaps_output resp = {0};
1835 	struct hwrm_func_qcaps_input req = {0};
1836 	struct bnxt_qplib_chip_ctx *cctx;
1837 	struct bnxt_fw_msg fw_msg;
1838 	u8 push_enable = false;
1839 	int rc;
1840 
1841 	cctx = rdev->chip_ctx;
1842 	memset(&fw_msg, 0, sizeof(fw_msg));
1843 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1844 			      HWRM_FUNC_QCAPS, -1, -1);
1845 	req.fid = cpu_to_le16(0xffff);
1846 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1847 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1848 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1849 	if (rc) {
1850 		dev_err(rdev_to_dev(rdev),
1851 			"Failed to query capabilities, rc = %#x", rc);
1852 		return rc;
1853 	}
1854 	if (_is_chip_p7(rdev->chip_ctx))
1855 		push_enable =
1856 			(resp.flags_ext &
1857 			 HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED) ?
1858 			 true : false;
1859 	else
1860 		push_enable =
1861 			(resp.flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_WCB_PUSH_MODE) ?
1862 			 true : false;
1863 	cctx->modes.db_push = push_enable;
1864 
1865 	cctx->modes.dbr_pacing =
1866 		resp.flags_ext & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_DBR_PACING_SUPPORTED ?
1867 			true : false;
1868 	cctx->modes.dbr_pacing_ext =
1869 		resp.flags_ext2 &
1870 			HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ?
1871 			true : false;
1872 	cctx->modes.dbr_drop_recov =
1873 		(resp.flags_ext2 &
1874 		 HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_SW_DBR_DROP_RECOVERY_SUPPORTED) ?
1875 			true : false;
1876 	cctx->modes.dbr_pacing_v0 =
1877 		(resp.flags_ext2 &
1878 		 HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED) ?
1879 			true : false;
1880 	dev_dbg(rdev_to_dev(rdev),
1881 		"%s: cctx->modes.dbr_pacing = %d cctx->modes.dbr_pacing_ext = %d, dbr_drop_recov %d\n",
1882 		__func__, cctx->modes.dbr_pacing, cctx->modes.dbr_pacing_ext, cctx->modes.dbr_drop_recov);
1883 
1884 	return 0;
1885 }
1886 
1887 static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev)
1888 {
1889 	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
1890 	struct hwrm_func_dbr_pacing_qcfg_output resp = {0};
1891 	struct hwrm_func_dbr_pacing_qcfg_input req = {0};
1892 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1893 	struct bnxt_qplib_chip_ctx *cctx;
1894 	struct bnxt_fw_msg fw_msg;
1895 	u32 primary_nq_id;
1896 	int rc;
1897 
1898 	cctx = rdev->chip_ctx;
1899 	memset(&fw_msg, 0, sizeof(fw_msg));
1900 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1901 			      HWRM_FUNC_DBR_PACING_QCFG, -1, -1);
1902 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1903 			    sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
1904 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1905 	if (rc) {
1906 		dev_dbg(rdev_to_dev(rdev),
1907 			"Failed to query dbr pacing config, rc = %#x", rc);
1908 		return rc;
1909 	}
1910 
1911 	primary_nq_id = le32_to_cpu(resp.primary_nq_id);
1912 	if (primary_nq_id == 0xffffffff &&
1913 	    !bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) {
1914 		dev_err(rdev_to_dev(rdev), "%s:%d Invoke bnxt_qplib_dbr_pacing_set_primary_pf with 1\n",
1915 			__func__, __LINE__);
1916 		bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 1);
1917 	}
1918 
1919 	if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) {
1920 		struct bnxt_qplib_nq *nq;
1921 
1922 		nq = &rdev->nqr.nq[0];
1923 		/* Reset the primary capability */
1924 		if (nq->ring_id != primary_nq_id)
1925 			bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 0);
1926 	}
1927 
1928 	if ((resp.dbr_stat_db_fifo_reg &
1929 	     HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) ==
1930 	    HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC)
1931 		cctx->dbr_stat_db_fifo =
1932 		resp.dbr_stat_db_fifo_reg &
1933 		~HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
1934 
1935 	if ((resp.dbr_throttling_aeq_arm_reg &
1936 	    HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_MASK)
1937 	    == HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_GRC) {
1938 		cctx->dbr_aeq_arm_reg = resp.dbr_throttling_aeq_arm_reg &
1939 			~HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
1940 		cctx->dbr_throttling_reg = cctx->dbr_aeq_arm_reg - 4;
1941 	}
1942 	pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth);
1943 	if (!pacing_data->fifo_max_depth)
1944 		pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx);
1945 	pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask);
1946 	pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift;
1947 	dev_dbg(rdev_to_dev(rdev),
1948 		"%s: nq:0x%x primary_pf:%d db_fifo:0x%x aeq_arm:0x%x i"
1949 		"fifo_max_depth 0x%x , resp.dbr_stat_db_max_fifo_depth 0x%x);\n",
1950 		__func__, resp.primary_nq_id, cctx->modes.dbr_primary_pf,
1951 		 cctx->dbr_stat_db_fifo, cctx->dbr_aeq_arm_reg,
1952 		 pacing_data->fifo_max_depth,
1953 		le32_to_cpu(resp.dbr_stat_db_max_fifo_depth));
1954 	return 0;
1955 }
1956 
1957 static int bnxt_re_hwrm_dbr_pacing_cfg(struct bnxt_re_dev *rdev, bool enable)
1958 {
1959 	struct hwrm_func_dbr_pacing_cfg_output resp = {0};
1960 	struct hwrm_func_dbr_pacing_cfg_input req = {0};
1961 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1962 	struct bnxt_fw_msg fw_msg;
1963 	int rc;
1964 
1965 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1966 		return 0;
1967 
1968 	memset(&fw_msg, 0, sizeof(fw_msg));
1969 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1970 			      HWRM_FUNC_DBR_PACING_CFG, -1, -1);
1971 	if (enable) {
1972 		req.flags = HWRM_FUNC_DBR_PACING_CFG_INPUT_FLAGS_DBR_NQ_EVENT_ENABLE;
1973 		req.enables =
1974 		cpu_to_le32(HWRM_FUNC_DBR_PACING_CFG_INPUT_ENABLES_PRIMARY_NQ_ID_VALID |
1975 			    HWRM_FUNC_DBR_PACING_CFG_INPUT_ENABLES_PACING_THRESHOLD_VALID);
1976 	} else {
1977 		req.flags = HWRM_FUNC_DBR_PACING_CFG_INPUT_FLAGS_DBR_NQ_EVENT_DISABLE;
1978 	}
1979 	req.primary_nq_id = cpu_to_le32(rdev->dbq_nq_id);
1980 	req.pacing_threshold = cpu_to_le32(rdev->dbq_watermark);
1981 	dev_dbg(rdev_to_dev(rdev), "%s: nq_id = 0x%x pacing_threshold = 0x%x",
1982 		__func__, req.primary_nq_id, req.pacing_threshold);
1983 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1984 			    sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
1985 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1986 	if (rc) {
1987 		dev_dbg(rdev_to_dev(rdev),
1988 			"Failed to set dbr pacing config, rc = %#x", rc);
1989 		return rc;
1990 	}
1991 	return 0;
1992 }
1993 
1994 /* Net -> RoCE driver */
1995 
1996 /* Device */
1997 struct bnxt_re_dev *bnxt_re_from_netdev(struct ifnet *netdev)
1998 {
1999 	struct bnxt_re_dev *rdev;
2000 
2001 	rcu_read_lock();
2002 	list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
2003 		if (rdev->netdev == netdev) {
2004 			rcu_read_unlock();
2005 			dev_dbg(rdev_to_dev(rdev),
2006 				"netdev (%p) found, ref_count = 0x%x",
2007 				netdev, atomic_read(&rdev->ref_count));
2008 			return rdev;
2009 		}
2010 	}
2011 	rcu_read_unlock();
2012 	return NULL;
2013 }
2014 
2015 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2016 			char *buf)
2017 {
2018 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
2019 
2020 	return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
2021 }
2022 
2023 
2024 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2025 			char *buf)
2026 {
2027 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
2028 
2029 	return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
2030 }
2031 
2032 static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
2033 static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
2034 static struct device_attribute *bnxt_re_attributes[] = {
2035 	&dev_attr_hw_rev,
2036 	&dev_attr_hca_type
2037 };
2038 
2039 int ib_register_device_compat(struct bnxt_re_dev *rdev)
2040 {
2041 	struct ib_device *ibdev = &rdev->ibdev;
2042 	char name[IB_DEVICE_NAME_MAX];
2043 
2044 	memset(name, 0, IB_DEVICE_NAME_MAX);
2045 	strlcpy(name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
2046 
2047 	strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
2048 
2049 	return ib_register_device(ibdev, NULL);
2050 }
2051 
2052 static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
2053 {
2054 	struct ib_device *ibdev = &rdev->ibdev;
2055 	int ret = 0;
2056 
2057 	/* ib device init */
2058 	ibdev->owner = THIS_MODULE;
2059 	ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
2060 	ibdev->node_type = RDMA_NODE_IB_CA;
2061 	strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
2062 		strlen(BNXT_RE_DESC) + 5);
2063 	ibdev->phys_port_cnt = 1;
2064 
2065 	bnxt_qplib_get_guid(rdev->dev_addr, (u8 *)&ibdev->node_guid);
2066 
2067 	/* Data path irqs is one less than the max msix vectors */
2068 	ibdev->num_comp_vectors	= rdev->nqr.num_msix - 1;
2069 	bnxt_re_set_dma_device(ibdev, rdev);
2070 	ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
2071 
2072 	/* User space */
2073 	ibdev->uverbs_cmd_mask =
2074 			(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
2075 			(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
2076 			(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
2077 			(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
2078 			(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
2079 			(1ull << IB_USER_VERBS_CMD_REG_MR)		|
2080 			(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
2081 			(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2082 			(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
2083 			(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
2084 			(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
2085 			(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
2086 			(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
2087 			(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
2088 			(1ull << IB_USER_VERBS_CMD_REREG_MR)		|
2089 			(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
2090 			(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
2091 			(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
2092 			(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
2093 			(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
2094 			(1ull << IB_USER_VERBS_CMD_ALLOC_MW)		|
2095 			(1ull << IB_USER_VERBS_CMD_DEALLOC_MW)		|
2096 			(1ull << IB_USER_VERBS_CMD_CREATE_AH)		|
2097 			(1ull << IB_USER_VERBS_CMD_MODIFY_AH)		|
2098 			(1ull << IB_USER_VERBS_CMD_QUERY_AH)		|
2099 			(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
2100 
2101 	ibdev->uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP);
2102 	ibdev->uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ);
2103 
2104 #define bnxt_re_ib_ah bnxt_re_ah
2105 #define bnxt_re_ib_cq bnxt_re_cq
2106 #define bnxt_re_ib_pd bnxt_re_pd
2107 #define bnxt_re_ib_srq bnxt_re_srq
2108 #define bnxt_re_ib_ucontext bnxt_re_ucontext
2109 	INIT_IB_DEVICE_OPS(&ibdev->ops, bnxt_re, BNXT_RE);
2110 
2111 	ibdev->query_device		= bnxt_re_query_device;
2112 	ibdev->modify_device		= bnxt_re_modify_device;
2113 	ibdev->query_port		= bnxt_re_query_port;
2114 	ibdev->modify_port		= bnxt_re_modify_port;
2115 	ibdev->get_port_immutable	= bnxt_re_get_port_immutable;
2116 	ibdev->query_pkey		= bnxt_re_query_pkey;
2117 	ibdev->query_gid		= bnxt_re_query_gid;
2118 	ibdev->get_netdev		= bnxt_re_get_netdev;
2119 	ibdev->add_gid			= bnxt_re_add_gid;
2120 	ibdev->del_gid			= bnxt_re_del_gid;
2121 	ibdev->get_link_layer		= bnxt_re_get_link_layer;
2122 	ibdev->alloc_pd			= bnxt_re_alloc_pd;
2123 	ibdev->dealloc_pd		= bnxt_re_dealloc_pd;
2124 	ibdev->create_ah		= bnxt_re_create_ah;
2125 	ibdev->modify_ah		= bnxt_re_modify_ah;
2126 	ibdev->query_ah			= bnxt_re_query_ah;
2127 	ibdev->destroy_ah		= bnxt_re_destroy_ah;
2128 	ibdev->create_srq		= bnxt_re_create_srq;
2129 	ibdev->modify_srq		= bnxt_re_modify_srq;
2130 	ibdev->query_srq		= bnxt_re_query_srq;
2131 	ibdev->destroy_srq		= bnxt_re_destroy_srq;
2132 	ibdev->post_srq_recv		= bnxt_re_post_srq_recv;
2133 	ibdev->create_qp		= bnxt_re_create_qp;
2134 	ibdev->modify_qp		= bnxt_re_modify_qp;
2135 	ibdev->query_qp			= bnxt_re_query_qp;
2136 	ibdev->destroy_qp		= bnxt_re_destroy_qp;
2137 	ibdev->post_send		= bnxt_re_post_send;
2138 	ibdev->post_recv		= bnxt_re_post_recv;
2139 	ibdev->create_cq		= bnxt_re_create_cq;
2140 	ibdev->modify_cq		= bnxt_re_modify_cq;
2141 	ibdev->destroy_cq		= bnxt_re_destroy_cq;
2142 	ibdev->resize_cq		= bnxt_re_resize_cq;
2143 	ibdev->poll_cq			= bnxt_re_poll_cq;
2144 	ibdev->req_notify_cq		= bnxt_re_req_notify_cq;
2145 	ibdev->get_dma_mr		= bnxt_re_get_dma_mr;
2146 	ibdev->get_hw_stats		= bnxt_re_get_hw_stats;
2147 	ibdev->alloc_hw_stats		= bnxt_re_alloc_hw_port_stats;
2148 	ibdev->dereg_mr			= bnxt_re_dereg_mr;
2149 	ibdev->alloc_mr			= bnxt_re_alloc_mr;
2150 	ibdev->map_mr_sg		= bnxt_re_map_mr_sg;
2151 	ibdev->alloc_mw			= bnxt_re_alloc_mw;
2152 	ibdev->dealloc_mw		= bnxt_re_dealloc_mw;
2153 	ibdev->reg_user_mr		= bnxt_re_reg_user_mr;
2154 	ibdev->rereg_user_mr		= bnxt_re_rereg_user_mr;
2155 	ibdev->disassociate_ucontext	= bnxt_re_disassociate_ucntx;
2156 	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;
2157 	ibdev->dealloc_ucontext		= bnxt_re_dealloc_ucontext;
2158 	ibdev->mmap			= bnxt_re_mmap;
2159 	ibdev->process_mad		= bnxt_re_process_mad;
2160 
2161 	ret = ib_register_device_compat(rdev);
2162 	return ret;
2163 }
2164 
2165 static void bnxt_re_dev_dealloc(struct bnxt_re_dev *rdev)
2166 {
2167 	int i = BNXT_RE_REF_WAIT_COUNT;
2168 
2169 	dev_dbg(rdev_to_dev(rdev), "%s:Remove the device %p\n", __func__, rdev);
2170 	/* Wait for rdev refcount to come down */
2171 	while ((atomic_read(&rdev->ref_count) > 1) && i--)
2172 		msleep(100);
2173 
2174 	if (atomic_read(&rdev->ref_count) > 1)
2175 		dev_err(rdev_to_dev(rdev),
2176 			"Failed waiting for ref count to deplete %d",
2177 			atomic_read(&rdev->ref_count));
2178 
2179 	atomic_set(&rdev->ref_count, 0);
2180 	if_rele(rdev->netdev);
2181 	rdev->netdev = NULL;
2182 	synchronize_rcu();
2183 
2184 	kfree(rdev->gid_map);
2185 	kfree(rdev->dbg_stats);
2186 	ib_dealloc_device(&rdev->ibdev);
2187 }
2188 
2189 static struct bnxt_re_dev *bnxt_re_dev_alloc(struct ifnet *netdev,
2190 					   struct bnxt_en_dev *en_dev)
2191 {
2192 	struct bnxt_re_dev *rdev;
2193 	u32 count;
2194 
2195 	/* Allocate bnxt_re_dev instance here */
2196 	rdev = (struct bnxt_re_dev *)compat_ib_alloc_device(sizeof(*rdev));
2197 	if (!rdev) {
2198 		pr_err("%s: bnxt_re_dev allocation failure!",
2199 			ROCE_DRV_MODULE_NAME);
2200 		return NULL;
2201 	}
2202 	/* Default values */
2203 	atomic_set(&rdev->ref_count, 0);
2204 	rdev->netdev = netdev;
2205 	dev_hold(rdev->netdev);
2206 	rdev->en_dev = en_dev;
2207 	rdev->id = rdev->en_dev->pdev->devfn;
2208 	INIT_LIST_HEAD(&rdev->qp_list);
2209 	mutex_init(&rdev->qp_lock);
2210 	mutex_init(&rdev->cc_lock);
2211 	mutex_init(&rdev->dbq_lock);
2212 	bnxt_re_clear_rsors_stat(&rdev->stats.rsors);
2213 	rdev->cosq[0] = rdev->cosq[1] = 0xFFFF;
2214 	rdev->min_tx_depth = 1;
2215 	rdev->stats.stats_query_sec = 1;
2216 	/* Disable priority vlan as the default mode is DSCP based PFC */
2217 	rdev->cc_param.disable_prio_vlan_tx = 1;
2218 
2219 	/* Initialize worker for DBR Pacing */
2220 	INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check);
2221 	INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp);
2222 	rdev->gid_map = kzalloc(sizeof(*(rdev->gid_map)) *
2223 				  BNXT_RE_MAX_SGID_ENTRIES,
2224 				  GFP_KERNEL);
2225 	if (!rdev->gid_map) {
2226 		ib_dealloc_device(&rdev->ibdev);
2227 		return NULL;
2228 	}
2229 	for(count = 0; count < BNXT_RE_MAX_SGID_ENTRIES; count++)
2230 		rdev->gid_map[count] = -1;
2231 
2232 	rdev->dbg_stats = kzalloc(sizeof(*rdev->dbg_stats), GFP_KERNEL);
2233 	if (!rdev->dbg_stats) {
2234 		ib_dealloc_device(&rdev->ibdev);
2235 		return NULL;
2236 	}
2237 
2238 	return rdev;
2239 }
2240 
2241 static int bnxt_re_handle_unaffi_async_event(
2242 		struct creq_func_event *unaffi_async)
2243 {
2244 	switch (unaffi_async->event) {
2245 	case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
2246 	case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
2247 	case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
2248 	case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
2249 	case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
2250 	case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
2251 	case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
2252 	case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
2253 	case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
2254 	case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
2255 	case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
2256 		break;
2257 	default:
2258 		return -EINVAL;
2259 	}
2260 	return 0;
2261 }
2262 
2263 static int bnxt_re_handle_qp_async_event(void *qp_event, struct bnxt_re_qp *qp)
2264 {
2265 	struct creq_qp_error_notification *err_event;
2266 	struct ib_event event;
2267 	unsigned int flags;
2268 
2269 	if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
2270 	    !qp->qplib_qp.is_user) {
2271 		flags = bnxt_re_lock_cqs(qp);
2272 		bnxt_qplib_add_flush_qp(&qp->qplib_qp);
2273 		bnxt_re_unlock_cqs(qp, flags);
2274 	}
2275 	memset(&event, 0, sizeof(event));
2276 	event.device = &qp->rdev->ibdev;
2277 	event.element.qp = &qp->ib_qp;
2278 	event.event = IB_EVENT_QP_FATAL;
2279 
2280 	err_event = qp_event;
2281 	switch(err_event->res_err_state_reason) {
2282 	case CFCQ_RES_ERR_STATE_REASON_RES_EXCEED_MAX:
2283 	case CFCQ_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH:
2284 	case CFCQ_RES_ERR_STATE_REASON_RES_OPCODE_ERROR:
2285 	case CFCQ_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT:
2286 	case CFCQ_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY:
2287 	case CFCQ_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR:
2288 	case CFCQ_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION:
2289 	case CFCQ_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR:
2290 	case CFCQ_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY:
2291 	case CFCQ_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR:
2292 	case CFCQ_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION:
2293 	case CFCQ_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR:
2294 	case CFCQ_RES_ERR_STATE_REASON_RES_IVALID_DUP_RKEY:
2295 	case CFCQ_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC:
2296 		event.event = IB_EVENT_QP_ACCESS_ERR;
2297 		break;
2298 	case CFCQ_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE:
2299 	case CFCQ_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR:
2300 	case CFCQ_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR:
2301 	case CFCQ_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE:
2302 	case CFCQ_RES_ERR_STATE_REASON_RES_REM_INVALIDATE:
2303 		event.event = IB_EVENT_QP_REQ_ERR;
2304 		break;
2305 	case CFCQ_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW:
2306 	case CFCQ_RES_ERR_STATE_REASON_RES_CMP_ERROR:
2307 	case CFCQ_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR:
2308 	case CFCQ_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR:
2309 	case CFCQ_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR:
2310 	case CFCQ_RES_ERR_STATE_REASON_RES_MEMORY_ERROR:
2311 	case CFCQ_RES_ERR_STATE_REASON_RES_SRQ_ERROR:
2312 		event.event = IB_EVENT_QP_FATAL;
2313 		break;
2314 	default:
2315 		if (qp->qplib_qp.srq)
2316 			event.event = IB_EVENT_QP_LAST_WQE_REACHED;
2317 		break;
2318 	}
2319 
2320 	if (err_event->res_err_state_reason)
2321 		dev_err(rdev_to_dev(qp->rdev),
2322 			"%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n",
2323 			__func__,  qp->qplib_qp.is_user ? "user" : "kernel",
2324 			qp->qplib_qp.id,
2325 			err_event->sq_cons_idx,
2326 			err_event->rq_cons_idx,
2327 			err_event->req_slow_path_state,
2328 			err_event->req_err_state_reason,
2329 			err_event->res_slow_path_state,
2330 			err_event->res_err_state_reason);
2331 
2332 	if (event.device && qp->ib_qp.event_handler)
2333 		qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
2334 
2335 	return 0;
2336 }
2337 
2338 static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq)
2339 {
2340 	struct creq_cq_error_notification *cqerr;
2341 	bool send = false;
2342 
2343 	cqerr = event;
2344 	switch (cqerr->cq_err_reason) {
2345 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR:
2346 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR:
2347 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR:
2348 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR:
2349 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR:
2350 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR:
2351 		send = true;
2352 	default:
2353 		break;
2354 	}
2355 
2356 	if (send && cq->ibcq.event_handler) {
2357 		struct ib_event ibevent = {};
2358 
2359 		ibevent.event = IB_EVENT_CQ_ERR;
2360 		ibevent.element.cq = &cq->ibcq;
2361 		ibevent.device = &cq->rdev->ibdev;
2362 
2363 		dev_err(rdev_to_dev(cq->rdev),
2364 			"%s err reason %d\n", __func__, cqerr->cq_err_reason);
2365 		cq->ibcq.event_handler(&ibevent, cq->ibcq.cq_context);
2366 	}
2367 
2368 	cq->qplib_cq.is_cq_err_event = true;
2369 
2370 	return 0;
2371 }
2372 
2373 static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
2374 					   void *obj)
2375 {
2376 	struct bnxt_qplib_qp *qplqp;
2377 	struct bnxt_qplib_cq *qplcq;
2378 	struct bnxt_re_qp *qp;
2379 	struct bnxt_re_cq *cq;
2380 	int rc = 0;
2381 	u8 event;
2382 
2383 	if (!obj)
2384 		return rc; /* QP was already dead, still return success */
2385 
2386 	event = affi_async->event;
2387 	switch (event) {
2388 	case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
2389 		qplqp = obj;
2390 		qp = container_of(qplqp, struct bnxt_re_qp, qplib_qp);
2391 		rc = bnxt_re_handle_qp_async_event(affi_async, qp);
2392 		break;
2393 	case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION:
2394 		qplcq = obj;
2395 		cq = container_of(qplcq, struct bnxt_re_cq, qplib_cq);
2396 		rc = bnxt_re_handle_cq_async_error(affi_async, cq);
2397 		break;
2398 	default:
2399 		rc = -EINVAL;
2400 	}
2401 
2402 	return rc;
2403 }
2404 
2405 static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
2406 			       void *aeqe, void *obj)
2407 {
2408 	struct creq_func_event *unaffi_async;
2409 	struct creq_qp_event *affi_async;
2410 	u8 type;
2411 	int rc;
2412 
2413 	type = ((struct creq_base *)aeqe)->type;
2414 	if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
2415 		unaffi_async = aeqe;
2416 		rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
2417 	} else {
2418 		affi_async = aeqe;
2419 		rc = bnxt_re_handle_affi_async_event(affi_async, obj);
2420 	}
2421 
2422 	return rc;
2423 }
2424 
2425 static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
2426 				struct bnxt_qplib_srq *handle, u8 event)
2427 {
2428 	struct bnxt_re_srq *srq = to_bnxt_re(handle, struct bnxt_re_srq,
2429 					     qplib_srq);
2430 	struct ib_event ib_event;
2431 
2432 	if (srq == NULL) {
2433 		pr_err("%s: SRQ is NULL, SRQN not handled",
2434 			ROCE_DRV_MODULE_NAME);
2435 		return -EINVAL;
2436 	}
2437 	ib_event.device = &srq->rdev->ibdev;
2438 	ib_event.element.srq = &srq->ibsrq;
2439 	if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
2440 		ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
2441 	else
2442 		ib_event.event = IB_EVENT_SRQ_ERR;
2443 
2444 	if (srq->ibsrq.event_handler) {
2445 		/* Lock event_handler? */
2446 		(*srq->ibsrq.event_handler)(&ib_event,
2447 					     srq->ibsrq.srq_context);
2448 	}
2449 	return 0;
2450 }
2451 
2452 static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
2453 			       struct bnxt_qplib_cq *handle)
2454 {
2455 	struct bnxt_re_cq *cq = to_bnxt_re(handle, struct bnxt_re_cq,
2456 					   qplib_cq);
2457 	u32 *cq_ptr;
2458 
2459 	if (cq == NULL) {
2460 		pr_err("%s: CQ is NULL, CQN not handled",
2461 			ROCE_DRV_MODULE_NAME);
2462 		return -EINVAL;
2463 	}
2464 	/* CQ already in destroy path. Do not handle any more events */
2465 	if (handle->destroyed || !atomic_read(&cq->ibcq.usecnt)) {
2466 		if (!handle->destroyed)
2467 			dev_dbg(NULL, "%s: CQ being destroyed, CQN not handled",
2468 				ROCE_DRV_MODULE_NAME);
2469 		return 0;
2470 	}
2471 
2472 	if (cq->ibcq.comp_handler) {
2473 		if (cq->uctx_cq_page) {
2474 			cq_ptr = (u32 *)cq->uctx_cq_page;
2475 			*cq_ptr = cq->qplib_cq.toggle;
2476 		}
2477 		/* Lock comp_handler? */
2478 		(*cq->ibcq.comp_handler)(&cq->ibcq, cq->ibcq.cq_context);
2479 	}
2480 
2481 	return 0;
2482 }
2483 
2484 struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev)
2485 {
2486 	int min, indx;
2487 
2488 	mutex_lock(&rdev->nqr.load_lock);
2489 	for (indx = 0, min = 0; indx < (rdev->nqr.num_msix - 1); indx++) {
2490 		if (rdev->nqr.nq[min].load > rdev->nqr.nq[indx].load)
2491 			min = indx;
2492 	}
2493 	rdev->nqr.nq[min].load++;
2494 	mutex_unlock(&rdev->nqr.load_lock);
2495 
2496 	return &rdev->nqr.nq[min];
2497 }
2498 
2499 void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq)
2500 {
2501 	mutex_lock(&rdev->nqr.load_lock);
2502 	nq->load--;
2503 	mutex_unlock(&rdev->nqr.load_lock);
2504 }
2505 
2506 static bool bnxt_re_check_min_attr(struct bnxt_re_dev *rdev)
2507 {
2508 	struct bnxt_qplib_dev_attr *attr;
2509 	bool rc = true;
2510 
2511 	attr = rdev->dev_attr;
2512 
2513 	if (!attr->max_cq || !attr->max_qp ||
2514 	    !attr->max_sgid || !attr->max_mr) {
2515 		dev_err(rdev_to_dev(rdev),"Insufficient RoCE resources");
2516 		dev_dbg(rdev_to_dev(rdev),
2517 			"max_cq = %d, max_qp = %d, max_dpi = %d, max_sgid = %d, max_mr = %d",
2518 			attr->max_cq, attr->max_qp, attr->max_dpi,
2519 			attr->max_sgid, attr->max_mr);
2520 		rc = false;
2521 	}
2522 	return rc;
2523 }
2524 
2525 static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
2526 				   u8 port_num, enum ib_event_type event)
2527 {
2528 	struct ib_event ib_event;
2529 
2530 	ib_event.device = ibdev;
2531 	if (qp) {
2532 		ib_event.element.qp = qp;
2533 		ib_event.event = event;
2534 		if (qp->event_handler)
2535 			qp->event_handler(&ib_event, qp->qp_context);
2536 	} else {
2537 		ib_event.element.port_num = port_num;
2538 		ib_event.event = event;
2539 		ib_dispatch_event(&ib_event);
2540 	}
2541 
2542 	dev_dbg(rdev_to_dev(to_bnxt_re_dev(ibdev, ibdev)),
2543 		"ibdev %p Event 0x%x port_num 0x%x", ibdev, event, port_num);
2544 }
2545 
2546 static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
2547 					struct bnxt_re_qp *qp)
2548 {
2549 	if (rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ALL)
2550 		return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
2551 			(qp == rdev->gsi_ctx.gsi_sqp);
2552 	else
2553 		return (qp->ib_qp.qp_type == IB_QPT_GSI);
2554 }
2555 
2556 static void bnxt_re_stop_all_nonqp1_nonshadow_qps(struct bnxt_re_dev *rdev)
2557 {
2558 	struct bnxt_qplib_qp *qpl_qp;
2559 	bool dev_detached = false;
2560 	struct ib_qp_attr qp_attr;
2561 	int num_qps_stopped = 0;
2562 	int mask = IB_QP_STATE;
2563 	struct bnxt_re_qp *qp;
2564 	unsigned long flags;
2565 
2566 	if (!rdev)
2567 		return;
2568 
2569 restart:
2570 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
2571 		dev_detached = true;
2572 
2573 	qp_attr.qp_state = IB_QPS_ERR;
2574 	mutex_lock(&rdev->qp_lock);
2575 	list_for_each_entry(qp, &rdev->qp_list, list) {
2576 		qpl_qp = &qp->qplib_qp;
2577 		if (dev_detached || !bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
2578 			if (qpl_qp->state !=
2579 			    CMDQ_MODIFY_QP_NEW_STATE_RESET &&
2580 			    qpl_qp->state !=
2581 			    CMDQ_MODIFY_QP_NEW_STATE_ERR) {
2582 				if (dev_detached) {
2583 					/*
2584 					 * Cant actually send the command down,
2585 					 * marking the state for bookkeeping
2586 					 */
2587 					qpl_qp->state =
2588 						CMDQ_MODIFY_QP_NEW_STATE_ERR;
2589 					qpl_qp->cur_qp_state = qpl_qp->state;
2590 					if (!qpl_qp->is_user) {
2591 						/* Add to flush list */
2592 						flags = bnxt_re_lock_cqs(qp);
2593 						bnxt_qplib_add_flush_qp(qpl_qp);
2594 						bnxt_re_unlock_cqs(qp, flags);
2595 					}
2596 				} else {
2597 					num_qps_stopped++;
2598 					bnxt_re_modify_qp(&qp->ib_qp,
2599 							  &qp_attr, mask,
2600 							  NULL);
2601 				}
2602 
2603 				bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
2604 						       1, IB_EVENT_QP_FATAL);
2605 				/*
2606 				 * 1. Release qp_lock after a budget to unblock other verb
2607 				 *    requests (like qp_destroy) from stack.
2608 				 * 2. Traverse through the qp_list freshly as addition / deletion
2609 				 *    might have happened since qp_lock is getting released here.
2610 				 */
2611 				if (num_qps_stopped % BNXT_RE_STOP_QPS_BUDGET == 0) {
2612 					mutex_unlock(&rdev->qp_lock);
2613 					goto restart;
2614 				}
2615 			}
2616 		}
2617 	}
2618 
2619 	mutex_unlock(&rdev->qp_lock);
2620 }
2621 
2622 static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
2623 {
2624 	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
2625 	struct bnxt_qplib_gid gid;
2626 	u16 gid_idx, index;
2627 	int rc = 0;
2628 
2629 	if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
2630 		return 0;
2631 
2632 	if (sgid_tbl == NULL) {
2633 		dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated");
2634 		return -EINVAL;
2635 	}
2636 
2637 	for (index = 0; index < sgid_tbl->active; index++) {
2638 		gid_idx = sgid_tbl->hw_id[index];
2639 
2640 		if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
2641 			    sizeof(bnxt_qplib_gid_zero)))
2642 			continue;
2643 		/* Need to modify the VLAN enable setting of non VLAN GID only
2644 		 * as setting is done for VLAN GID while adding GID
2645 		 *
2646 		 * If disable_prio_vlan_tx is enable, then we'll need to remove the
2647 		 * vlan entry from the sgid_tbl.
2648 		 */
2649 		if (sgid_tbl->vlan[index] == true)
2650 			continue;
2651 
2652 		memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
2653 
2654 		rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
2655 					    rdev->dev_addr);
2656 	}
2657 
2658 	return rc;
2659 }
2660 
2661 static void bnxt_re_clear_cc(struct bnxt_re_dev *rdev)
2662 {
2663 	struct bnxt_qplib_cc_param *cc_param = &rdev->cc_param;
2664 
2665 	if (_is_chip_p7(rdev->chip_ctx)) {
2666 		cc_param->mask = CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP;
2667 	} else {
2668 		cc_param->mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE |
2669 				  CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
2670 				  CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
2671 
2672 		if (!is_qport_service_type_supported(rdev))
2673 			cc_param->mask |=
2674 			(CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP |
2675 			 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP |
2676 			 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP);
2677 	}
2678 
2679 	cc_param->cur_mask  = cc_param->mask;
2680 
2681 	if (bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param))
2682 		dev_err(rdev_to_dev(rdev), "Failed to modify cc\n");
2683 }
2684 
2685 static int bnxt_re_setup_cc(struct bnxt_re_dev *rdev)
2686 {
2687 	struct bnxt_qplib_cc_param *cc_param = &rdev->cc_param;
2688 	int rc;
2689 
2690 	if (_is_chip_p7(rdev->chip_ctx)) {
2691 		cc_param->enable = 0x0;
2692 		cc_param->mask = CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP;
2693 	} else {
2694 		cc_param->enable = 0x1;
2695 		cc_param->mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE |
2696 				  CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
2697 				  CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
2698 
2699 		if (!is_qport_service_type_supported(rdev))
2700 			cc_param->mask |=
2701 			(CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP |
2702 			 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP |
2703 			 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP);
2704 	}
2705 
2706 	cc_param->cur_mask  = cc_param->mask;
2707 
2708 	rc = bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param);
2709 	if (rc) {
2710 		dev_err(rdev_to_dev(rdev), "Failed to modify cc\n");
2711 		return rc;
2712 	}
2713 	/* Reset the programming mask */
2714 	cc_param->mask = 0;
2715 	if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) {
2716 		cc_param->qp1_tos_dscp = cc_param->tos_dscp;
2717 		rc = bnxt_re_update_qp1_tos_dscp(rdev);
2718 		if (rc) {
2719 			dev_err(rdev_to_dev(rdev), "%s:Failed to modify QP1:%d",
2720 				__func__, rc);
2721 			goto clear;
2722 		}
2723 	}
2724 	return 0;
2725 
2726 clear:
2727 	bnxt_re_clear_cc(rdev);
2728 	return rc;
2729 }
2730 
2731 int bnxt_re_query_hwrm_dscp2pri(struct bnxt_re_dev *rdev,
2732 				struct bnxt_re_dscp2pri *d2p, u16 *count,
2733 				u16 target_id)
2734 {
2735 	struct bnxt_en_dev *en_dev = rdev->en_dev;
2736 	struct hwrm_queue_dscp2pri_qcfg_input req;
2737 	struct hwrm_queue_dscp2pri_qcfg_output resp;
2738 	struct bnxt_re_dscp2pri *dscp2pri;
2739 	struct bnxt_fw_msg fw_msg;
2740 	u16 in_count = *count;
2741 	dma_addr_t dma_handle;
2742 	int rc = 0, i;
2743 	u16 data_len;
2744 	u8 *kmem;
2745 
2746 	data_len = *count * sizeof(*dscp2pri);
2747 	memset(&fw_msg, 0, sizeof(fw_msg));
2748 	memset(&req, 0, sizeof(req));
2749 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2750 			      HWRM_QUEUE_DSCP2PRI_QCFG, -1, target_id);
2751 	req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1;
2752 
2753 	kmem = dma_zalloc_coherent(&en_dev->pdev->dev, data_len, &dma_handle,
2754 				   GFP_KERNEL);
2755 	if (!kmem) {
2756 		dev_err(rdev_to_dev(rdev),
2757 			"dma_zalloc_coherent failure, length = %u\n",
2758 			(unsigned)data_len);
2759 		return -ENOMEM;
2760 	}
2761 	req.dest_data_addr = cpu_to_le64(dma_handle);
2762 	req.dest_data_buffer_size = cpu_to_le16(data_len);
2763 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2764 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2765 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2766 	if (rc)
2767 		goto out;
2768 
2769 	/* Upload the DSCP-MASK-PRI tuple(s) */
2770 	dscp2pri = (struct bnxt_re_dscp2pri *)kmem;
2771 	for (i = 0; i < le16_to_cpu(resp.entry_cnt) && i < in_count; i++) {
2772 		d2p[i].dscp = dscp2pri->dscp;
2773 		d2p[i].mask = dscp2pri->mask;
2774 		d2p[i].pri = dscp2pri->pri;
2775 		dscp2pri++;
2776 	}
2777 	*count = le16_to_cpu(resp.entry_cnt);
2778 out:
2779 	dma_free_coherent(&en_dev->pdev->dev, data_len, kmem, dma_handle);
2780 	return rc;
2781 }
2782 
2783 int bnxt_re_prio_vlan_tx_update(struct bnxt_re_dev *rdev)
2784 {
2785 	/* Remove the VLAN from the GID entry */
2786 	if (rdev->cc_param.disable_prio_vlan_tx)
2787 		rdev->qplib_res.prio = false;
2788 	else
2789 		rdev->qplib_res.prio = true;
2790 
2791 	return bnxt_re_update_gid(rdev);
2792 }
2793 
2794 int bnxt_re_set_hwrm_dscp2pri(struct bnxt_re_dev *rdev,
2795 			      struct bnxt_re_dscp2pri *d2p, u16 count,
2796 			      u16 target_id)
2797 {
2798 	struct bnxt_en_dev *en_dev = rdev->en_dev;
2799 	struct hwrm_queue_dscp2pri_cfg_input req;
2800 	struct hwrm_queue_dscp2pri_cfg_output resp;
2801 	struct bnxt_fw_msg fw_msg;
2802 	struct bnxt_re_dscp2pri *dscp2pri;
2803 	int i, rc, data_len = 3 * 256;
2804 	dma_addr_t dma_handle;
2805 	u8 *kmem;
2806 
2807 	memset(&req, 0, sizeof(req));
2808 	memset(&fw_msg, 0, sizeof(fw_msg));
2809 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2810 			      HWRM_QUEUE_DSCP2PRI_CFG, -1, target_id);
2811 	req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1;
2812 
2813 	kmem = dma_alloc_coherent(&en_dev->pdev->dev, data_len, &dma_handle,
2814 				  GFP_KERNEL);
2815 	if (!kmem) {
2816 		dev_err(rdev_to_dev(rdev),
2817 			"dma_alloc_coherent failure, length = %u\n",
2818 			(unsigned)data_len);
2819 		return -ENOMEM;
2820 	}
2821 	req.src_data_addr = cpu_to_le64(dma_handle);
2822 
2823 	/* Download the DSCP-MASK-PRI tuple(s) */
2824 	dscp2pri = (struct bnxt_re_dscp2pri *)kmem;
2825 	for (i = 0; i < count; i++) {
2826 		dscp2pri->dscp = d2p[i].dscp;
2827 		dscp2pri->mask = d2p[i].mask;
2828 		dscp2pri->pri = d2p[i].pri;
2829 		dscp2pri++;
2830 	}
2831 
2832 	req.entry_cnt = cpu_to_le16(count);
2833 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2834 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2835 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2836 	dma_free_coherent(&en_dev->pdev->dev, data_len, kmem, dma_handle);
2837 	return rc;
2838 }
2839 
2840 int bnxt_re_query_hwrm_qportcfg(struct bnxt_re_dev *rdev,
2841 			struct bnxt_re_tc_rec *tc_rec, u16 tid)
2842 {
2843 	u8 max_tc, tc, *qptr, *type_ptr0, *type_ptr1;
2844 	struct hwrm_queue_qportcfg_output resp = {0};
2845 	struct hwrm_queue_qportcfg_input req = {0};
2846 	struct bnxt_en_dev *en_dev = rdev->en_dev;
2847 	struct bnxt_fw_msg fw_msg;
2848 	bool def_init = false;
2849 	u8 *tmp_type;
2850 	u8 cos_id;
2851 	int rc;
2852 
2853 	memset(&fw_msg, 0, sizeof(fw_msg));
2854 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_QUEUE_QPORTCFG,
2855 			      -1, tid);
2856 	req.port_id = (tid == 0xFFFF) ? en_dev->pf_port_id : 1;
2857 	if (BNXT_EN_ASYM_Q(en_dev))
2858 		req.flags = htole32(HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX);
2859 
2860 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2861 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2862 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2863 	if (rc)
2864 		return rc;
2865 
2866 	if (!resp.max_configurable_queues)
2867 		return -EINVAL;
2868 
2869 	max_tc = resp.max_configurable_queues;
2870 	tc_rec->max_tc = max_tc;
2871 
2872 	if (resp.queue_cfg_info & HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_CFG_INFO_USE_PROFILE_TYPE)
2873 		tc_rec->serv_type_enabled = true;
2874 
2875 	qptr = &resp.queue_id0;
2876 	type_ptr0 = &resp.queue_id0_service_profile_type;
2877 	type_ptr1 = &resp.queue_id1_service_profile_type;
2878 	for (tc = 0; tc < max_tc; tc++) {
2879 		tmp_type = tc ? type_ptr1 + (tc - 1) : type_ptr0;
2880 
2881 		cos_id = *qptr++;
2882 		/* RoCE CoS queue is the first cos queue.
2883 		 * For MP12 and MP17 order is 405 and 141015.
2884 		 */
2885 		if (is_bnxt_roce_queue(rdev, *qptr, *tmp_type)) {
2886 			tc_rec->cos_id_roce = cos_id;
2887 			tc_rec->tc_roce = tc;
2888 		} else if (is_bnxt_cnp_queue(rdev, *qptr, *tmp_type)) {
2889 			tc_rec->cos_id_cnp = cos_id;
2890 			tc_rec->tc_cnp = tc;
2891 		} else if (!def_init) {
2892 			def_init = true;
2893 			tc_rec->tc_def = tc;
2894 			tc_rec->cos_id_def = cos_id;
2895 		}
2896 		qptr++;
2897 	}
2898 
2899 	return rc;
2900 }
2901 
2902 int bnxt_re_hwrm_cos2bw_qcfg(struct bnxt_re_dev *rdev, u16 target_id,
2903 			     struct bnxt_re_cos2bw_cfg *cfg)
2904 {
2905 	struct bnxt_en_dev *en_dev = rdev->en_dev;
2906 	struct hwrm_queue_cos2bw_qcfg_output resp;
2907 	struct hwrm_queue_cos2bw_qcfg_input req = {0};
2908 	struct bnxt_fw_msg fw_msg;
2909 	int rc, indx;
2910 	void *data;
2911 
2912 	memset(&fw_msg, 0, sizeof(fw_msg));
2913 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2914 			      HWRM_QUEUE_COS2BW_QCFG, -1, target_id);
2915 	req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1;
2916 
2917 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2918 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2919 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2920 	if (rc)
2921 		return rc;
2922 	data = &resp.queue_id0 + offsetof(struct bnxt_re_cos2bw_cfg,
2923 					  queue_id);
2924 	for (indx = 0; indx < 8; indx++, data += (sizeof(cfg->cfg))) {
2925 		memcpy(&cfg->cfg, data, sizeof(cfg->cfg));
2926 		if (indx == 0)
2927 			cfg->queue_id = resp.queue_id0;
2928 		cfg++;
2929 	}
2930 
2931 	return rc;
2932 }
2933 
2934 int bnxt_re_hwrm_cos2bw_cfg(struct bnxt_re_dev *rdev, u16 target_id,
2935 			    struct bnxt_re_cos2bw_cfg *cfg)
2936 {
2937 	struct bnxt_en_dev *en_dev = rdev->en_dev;
2938 	struct hwrm_queue_cos2bw_cfg_input req = {0};
2939 	struct hwrm_queue_cos2bw_cfg_output resp = {0};
2940 	struct bnxt_fw_msg fw_msg;
2941 	void *data;
2942 	int indx;
2943 	int rc;
2944 
2945 	memset(&fw_msg, 0, sizeof(fw_msg));
2946 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2947 			      HWRM_QUEUE_COS2BW_CFG, -1, target_id);
2948 	req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1;
2949 
2950 	/* Chimp wants enable bit to retain previous
2951 	 * config done by L2 driver
2952 	 */
2953 	for (indx = 0; indx < 8; indx++) {
2954 		if (cfg[indx].queue_id < 40) {
2955 			req.enables |= cpu_to_le32(
2956 				HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID0_VALID <<
2957 				indx);
2958 		}
2959 
2960 		data = (char *)&req.unused_0 + indx * (sizeof(*cfg) - 4);
2961 		memcpy(data, &cfg[indx].queue_id, sizeof(*cfg) - 4);
2962 		if (indx == 0) {
2963 			req.queue_id0 = cfg[0].queue_id;
2964 			req.unused_0 = 0;
2965 		}
2966 	}
2967 
2968 	memset(&resp, 0, sizeof(resp));
2969 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2970 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2971 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2972 	return rc;
2973 }
2974 
2975 int bnxt_re_host_pf_id_query(struct bnxt_re_dev *rdev,
2976 			     struct bnxt_qplib_query_fn_info *fn_info,
2977 			     u32 *pf_mask, u32 *first_pf)
2978 {
2979 	struct hwrm_func_host_pf_ids_query_output resp = {0};
2980 	struct hwrm_func_host_pf_ids_query_input req;
2981 	struct bnxt_en_dev *en_dev = rdev->en_dev;
2982 	struct bnxt_fw_msg fw_msg;
2983 	int rc;
2984 
2985 	memset(&fw_msg, 0, sizeof(fw_msg));
2986 	memset(&req, 0, sizeof(req));
2987 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2988 			      HWRM_FUNC_HOST_PF_IDS_QUERY, -1, -1);
2989 	/* To query the info from the host EPs */
2990 	switch (fn_info->host) {
2991 		case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_SOC:
2992 		case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_0:
2993 		case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_1:
2994 		case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_2:
2995 		case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_3:
2996 			req.host = fn_info->host;
2997 		break;
2998 		default:
2999 			req.host = HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_0;
3000 		break;
3001 	}
3002 
3003 	req.filter = fn_info->filter;
3004 	if (req.filter > HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_FILTER_ROCE)
3005 		req.filter = HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_FILTER_ALL;
3006 
3007 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
3008 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
3009 	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
3010 
3011 
3012 	*first_pf = le16_to_cpu(resp.first_pf_id);
3013 	*pf_mask = le16_to_cpu(resp.pf_ordinal_mask);
3014 
3015 	return rc;
3016 }
3017 
3018 static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev)
3019 {
3020 	struct bnxt_qplib_ctx *hctx;
3021 	struct bnxt_qplib_res *res;
3022 	u16 tid = 0xffff;
3023 
3024 	res = &rdev->qplib_res;
3025 	hctx = res->hctx;
3026 
3027 	if (test_and_clear_bit(BNXT_RE_FLAG_STATS_CTX_ALLOC, &rdev->flags)) {
3028 		bnxt_re_net_stats_ctx_free(rdev, hctx->stats.fw_id, tid);
3029 		bnxt_qplib_free_stat_mem(res, &hctx->stats);
3030 	}
3031 }
3032 
3033 static void bnxt_re_put_stats2_ctx(struct bnxt_re_dev *rdev)
3034 {
3035 	test_and_clear_bit(BNXT_RE_FLAG_STATS_CTX2_ALLOC, &rdev->flags);
3036 }
3037 
3038 static int bnxt_re_get_stats_ctx(struct bnxt_re_dev *rdev)
3039 {
3040 	struct bnxt_qplib_ctx *hctx;
3041 	struct bnxt_qplib_res *res;
3042 	u16 tid = 0xffff;
3043 	int rc;
3044 
3045 	res = &rdev->qplib_res;
3046 	hctx = res->hctx;
3047 
3048 	rc = bnxt_qplib_alloc_stat_mem(res->pdev, rdev->chip_ctx, &hctx->stats);
3049 	if (rc)
3050 		return -ENOMEM;
3051 	rc = bnxt_re_net_stats_ctx_alloc(rdev, tid);
3052 	if (rc)
3053 		goto free_stat_mem;
3054 	set_bit(BNXT_RE_FLAG_STATS_CTX_ALLOC, &rdev->flags);
3055 
3056 	return 0;
3057 
3058 free_stat_mem:
3059 	bnxt_qplib_free_stat_mem(res, &hctx->stats);
3060 
3061 	return rc;
3062 }
3063 
3064 static int bnxt_re_update_dev_attr(struct bnxt_re_dev *rdev)
3065 {
3066 	int rc;
3067 
3068 	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw);
3069 	if (rc)
3070 		return rc;
3071 	if (!bnxt_re_check_min_attr(rdev))
3072 		return -EINVAL;
3073 	return 0;
3074 }
3075 
3076 static void bnxt_re_free_tbls(struct bnxt_re_dev *rdev)
3077 {
3078 	bnxt_qplib_clear_tbls(&rdev->qplib_res);
3079 	bnxt_qplib_free_tbls(&rdev->qplib_res);
3080 }
3081 
3082 static int bnxt_re_alloc_init_tbls(struct bnxt_re_dev *rdev)
3083 {
3084 	struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
3085 	u8 pppp_factor = 0;
3086 	int rc;
3087 
3088 	 /*
3089 	  * TODO: Need a better mechanism for spreading of the
3090 	  * 512 extended PPP pages. For now, spreading it
3091 	  * based on port_count
3092 	  */
3093 	if (_is_chip_p7(chip_ctx) && chip_ctx->modes.db_push)
3094 		pppp_factor = rdev->en_dev->port_count;
3095 	rc = bnxt_qplib_alloc_tbls(&rdev->qplib_res, pppp_factor);
3096 	if (rc)
3097 		return rc;
3098 	bnxt_qplib_init_tbls(&rdev->qplib_res);
3099 	set_bit(BNXT_RE_FLAG_TBLS_ALLOCINIT, &rdev->flags);
3100 
3101 	return 0;
3102 }
3103 
3104 static void bnxt_re_clean_nqs(struct bnxt_re_dev *rdev)
3105 {
3106 	struct bnxt_qplib_nq *nq;
3107 	int i;
3108 
3109 	if (!rdev->nqr.max_init)
3110 		return;
3111 
3112 	for (i = (rdev->nqr.max_init - 1) ; i >= 0; i--) {
3113 		nq = &rdev->nqr.nq[i];
3114 		bnxt_qplib_disable_nq(nq);
3115 		bnxt_re_net_ring_free(rdev, nq->ring_id);
3116 		bnxt_qplib_free_nq_mem(nq);
3117 	}
3118 	rdev->nqr.max_init = 0;
3119 }
3120 
3121 static int bnxt_re_setup_nqs(struct bnxt_re_dev *rdev)
3122 {
3123 	struct bnxt_re_ring_attr rattr = {};
3124 	struct bnxt_qplib_nq *nq;
3125 	int rc, i;
3126 	int depth;
3127 	u32 offt;
3128 	u16 vec;
3129 
3130 	mutex_init(&rdev->nqr.load_lock);
3131 	/*
3132 	 * TODO: Optimize the depth based on the
3133 	 * number of NQs.
3134 	 */
3135 	depth = BNXT_QPLIB_NQE_MAX_CNT;
3136 	for (i = 0; i < rdev->nqr.num_msix - 1; i++) {
3137 		nq = &rdev->nqr.nq[i];
3138 		vec = rdev->nqr.msix_entries[i + 1].vector;
3139 		offt = rdev->nqr.msix_entries[i + 1].db_offset;
3140 		nq->hwq.max_elements = depth;
3141 		rc = bnxt_qplib_alloc_nq_mem(&rdev->qplib_res, nq);
3142 		if (rc) {
3143 			dev_err(rdev_to_dev(rdev),
3144 				"Failed to get mem for NQ %d, rc = 0x%x",
3145 				i, rc);
3146 			goto fail_mem;
3147 		}
3148 
3149 		rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
3150 		rattr.pages = nq->hwq.pbl[rdev->nqr.nq[i].hwq.level].pg_count;
3151 		rattr.type = bnxt_re_get_rtype(rdev);
3152 		rattr.mode = HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX;
3153 		rattr.depth = nq->hwq.max_elements - 1;
3154 		rattr.lrid = rdev->nqr.msix_entries[i + 1].ring_idx;
3155 
3156 		/* Set DBR pacing capability on the first NQ ring only */
3157 		if (!i && bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx))
3158 			rattr.flags = HWRM_RING_ALLOC_INPUT_FLAGS_NQ_DBR_PACING;
3159 		else
3160 			rattr.flags = 0;
3161 
3162 		rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
3163 		if (rc) {
3164 			nq->ring_id = 0xffff; /* Invalid ring-id */
3165 			dev_err(rdev_to_dev(rdev),
3166 				"Failed to get fw id for NQ %d, rc = 0x%x",
3167 				i, rc);
3168 			goto fail_ring;
3169 		}
3170 
3171 		rc = bnxt_qplib_enable_nq(nq, i, vec, offt,
3172 					  &bnxt_re_cqn_handler,
3173 					  &bnxt_re_srqn_handler);
3174 		if (rc) {
3175 			dev_err(rdev_to_dev(rdev),
3176 				"Failed to enable NQ %d, rc = 0x%x", i, rc);
3177 			goto fail_en;
3178 		}
3179 	}
3180 
3181 	rdev->nqr.max_init = i;
3182 	return 0;
3183 fail_en:
3184 	/* *nq was i'th nq */
3185 	bnxt_re_net_ring_free(rdev, nq->ring_id);
3186 fail_ring:
3187 	bnxt_qplib_free_nq_mem(nq);
3188 fail_mem:
3189 	rdev->nqr.max_init = i;
3190 	return rc;
3191 }
3192 
3193 static void bnxt_re_sysfs_destroy_file(struct bnxt_re_dev *rdev)
3194 {
3195 	int i;
3196 
3197 	for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
3198 		device_remove_file(&rdev->ibdev.dev, bnxt_re_attributes[i]);
3199 }
3200 
3201 static int bnxt_re_sysfs_create_file(struct bnxt_re_dev *rdev)
3202 {
3203 	int i, j, rc = 0;
3204 
3205 	for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
3206 		rc = device_create_file(&rdev->ibdev.dev,
3207 					bnxt_re_attributes[i]);
3208 		if (rc) {
3209 			dev_err(rdev_to_dev(rdev),
3210 				"Failed to create IB sysfs with rc = 0x%x", rc);
3211 			/* Must clean up all created device files */
3212 			for (j = 0; j < i; j++)
3213 				device_remove_file(&rdev->ibdev.dev,
3214 						   bnxt_re_attributes[j]);
3215 			clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
3216 			ib_unregister_device(&rdev->ibdev);
3217 			return 1;
3218 		}
3219 	}
3220 	return 0;
3221 }
3222 
3223 /* worker thread for polling periodic events. Now used for QoS programming*/
3224 static void bnxt_re_worker(struct work_struct *work)
3225 {
3226 	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
3227 						worker.work);
3228 	int rc;
3229 
3230 	/* QoS is in 30s cadence for PFs*/
3231 	if (!rdev->is_virtfn && !rdev->worker_30s--)
3232 		rdev->worker_30s = 30;
3233 	/* Use trylock for  bnxt_re_dev_lock as this can be
3234 	 * held for long time by debugfs show path while issuing
3235 	 * HWRMS. If the debugfs name update is not done in this
3236 	 * iteration, the driver will check for the same in the
3237 	 * next schedule of the worker i.e after 1 sec.
3238 	 */
3239 	if (mutex_trylock(&bnxt_re_dev_lock))
3240 		mutex_unlock(&bnxt_re_dev_lock);
3241 
3242 	if (!rdev->stats.stats_query_sec)
3243 		goto resched;
3244 
3245 	if (test_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags) &&
3246 	    (rdev->is_virtfn ||
3247 	    !_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags))) {
3248 		if (!(rdev->stats.stats_query_counter++ %
3249 		      rdev->stats.stats_query_sec)) {
3250 			rc = bnxt_re_get_qos_stats(rdev);
3251 			if (rc && rc != -ENOMEM)
3252 				clear_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS,
3253 					  &rdev->flags);
3254 			}
3255 	}
3256 
3257 resched:
3258 	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(1000));
3259 }
3260 
3261 static int bnxt_re_alloc_dbr_sw_stats_mem(struct bnxt_re_dev *rdev)
3262 {
3263 	if (!(rdev->dbr_drop_recov || rdev->dbr_pacing))
3264 		return 0;
3265 
3266 	rdev->dbr_sw_stats = kzalloc(sizeof(*rdev->dbr_sw_stats), GFP_KERNEL);
3267 	if (!rdev->dbr_sw_stats)
3268 		return -ENOMEM;
3269 
3270 	return 0;
3271 }
3272 
3273 static void bnxt_re_free_dbr_sw_stats_mem(struct bnxt_re_dev *rdev)
3274 {
3275 	kfree(rdev->dbr_sw_stats);
3276 	rdev->dbr_sw_stats = NULL;
3277 }
3278 
3279 static int bnxt_re_initialize_dbr_drop_recov(struct bnxt_re_dev *rdev)
3280 {
3281 	rdev->dbr_drop_recov_wq =
3282 		create_singlethread_workqueue("bnxt_re_dbr_drop_recov");
3283 	if (!rdev->dbr_drop_recov_wq) {
3284 		dev_err(rdev_to_dev(rdev), "DBR Drop Revov wq alloc failed!");
3285 		return -EINVAL;
3286 	}
3287 	rdev->dbr_drop_recov = true;
3288 
3289 	/* Enable configfs setting dbr_drop_recov by default*/
3290 	rdev->user_dbr_drop_recov = true;
3291 
3292 	rdev->user_dbr_drop_recov_timeout = BNXT_RE_DBR_RECOV_USERLAND_TIMEOUT;
3293 	return 0;
3294 }
3295 
3296 static void bnxt_re_deinitialize_dbr_drop_recov(struct bnxt_re_dev *rdev)
3297 {
3298 	if (rdev->dbr_drop_recov_wq) {
3299 		flush_workqueue(rdev->dbr_drop_recov_wq);
3300 		destroy_workqueue(rdev->dbr_drop_recov_wq);
3301 		rdev->dbr_drop_recov_wq = NULL;
3302 	}
3303 	rdev->dbr_drop_recov = false;
3304 }
3305 
3306 static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
3307 {
3308 	int rc;
3309 
3310 	/* Allocate a page for app use */
3311 	rdev->dbr_page = (void *)__get_free_page(GFP_KERNEL);
3312 	if (!rdev->dbr_page) {
3313 		dev_err(rdev_to_dev(rdev), "DBR page allocation failed!");
3314 		return -ENOMEM;
3315 	}
3316 	memset((u8 *)rdev->dbr_page, 0, PAGE_SIZE);
3317 	rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->dbr_page;
3318 	rc = bnxt_re_hwrm_dbr_pacing_qcfg(rdev);
3319 	if (rc) {
3320 		dev_err(rdev_to_dev(rdev),
3321 			"Failed to query dbr pacing config %d\n", rc);
3322 		goto fail;
3323 	}
3324 	/* Create a work queue for scheduling dbq event */
3325 	rdev->dbq_wq = create_singlethread_workqueue("bnxt_re_dbq");
3326 	if (!rdev->dbq_wq) {
3327 		dev_err(rdev_to_dev(rdev), "DBQ wq alloc failed!");
3328 		rc = -ENOMEM;
3329 		goto fail;
3330 	}
3331 	/* MAP grc window 2 for reading db fifo depth */
3332 	writel_fbsd(rdev->en_dev->softc,  BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4, 0,
3333 			rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK);
3334 	rdev->dbr_db_fifo_reg_off =
3335 		(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) +
3336 		0x2000;
3337 	rdev->qplib_res.pacing_data->grc_reg_offset = rdev->dbr_db_fifo_reg_off;
3338 
3339 	rdev->dbr_bar_addr =
3340 		pci_resource_start(rdev->qplib_res.pdev, 0) +
3341 		rdev->dbr_db_fifo_reg_off;
3342 
3343 	/* Percentage of DB FIFO */
3344 	rdev->dbq_watermark = BNXT_RE_PACING_DBQ_THRESHOLD;
3345 	rdev->pacing_en_int_th = BNXT_RE_PACING_EN_INT_THRESHOLD;
3346 	rdev->pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD;
3347 	rdev->dbq_pacing_time = BNXT_RE_DBR_INT_TIME;
3348 	rdev->dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION;
3349 	rdev->do_pacing_save = rdev->dbr_def_do_pacing;
3350 	bnxt_re_set_default_pacing_data(rdev);
3351 	dev_dbg(rdev_to_dev(rdev), "Initialized db pacing\n");
3352 
3353 	return 0;
3354 fail:
3355 	free_page((u64)rdev->dbr_page);
3356 	rdev->dbr_page = NULL;
3357 	return rc;
3358 }
3359 
3360 static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
3361 {
3362 	if (rdev->dbq_wq)
3363 		flush_workqueue(rdev->dbq_wq);
3364 
3365 	cancel_work_sync(&rdev->dbq_fifo_check_work);
3366 	cancel_delayed_work_sync(&rdev->dbq_pacing_work);
3367 
3368 	if (rdev->dbq_wq) {
3369 		destroy_workqueue(rdev->dbq_wq);
3370 		rdev->dbq_wq = NULL;
3371 	}
3372 
3373 	if (rdev->dbr_page)
3374 		free_page((u64)rdev->dbr_page);
3375 	rdev->dbr_page = NULL;
3376 	rdev->dbr_pacing = false;
3377 }
3378 
3379 /* enable_dbr_pacing needs to be done only for older FWs
3380  * where host selects primary function. ie. pacing_ext
3381  * flags is not set.
3382  */
3383 int bnxt_re_enable_dbr_pacing(struct bnxt_re_dev *rdev)
3384 {
3385 	struct bnxt_qplib_nq *nq;
3386 
3387 	nq = &rdev->nqr.nq[0];
3388 	rdev->dbq_nq_id = nq->ring_id;
3389 
3390 	if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) &&
3391 	    bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) {
3392 		if (bnxt_re_hwrm_dbr_pacing_cfg(rdev, true)) {
3393 			dev_err(rdev_to_dev(rdev),
3394 					"Failed to set dbr pacing config\n");
3395 			return -EIO;
3396 		}
3397 		/* MAP grc window 8 for ARMing the NQ DBQ */
3398 		writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 28 , 0,
3399 			    rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_BASE_MASK);
3400 		rdev->dbr_aeq_arm_reg_off =
3401 			(rdev->chip_ctx->dbr_aeq_arm_reg &
3402 			 BNXT_GRC_OFFSET_MASK) + 0x8000;
3403 		writel_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off , 0, 1);
3404 	}
3405 
3406 	return 0;
3407 }
3408 
3409 /* disable_dbr_pacing needs to be done only for older FWs
3410  * where host selects primary function. ie. pacing_ext
3411  * flags is not set.
3412  */
3413 
3414 int bnxt_re_disable_dbr_pacing(struct bnxt_re_dev *rdev)
3415 {
3416 	int rc = 0;
3417 
3418 	if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) &&
3419 	    bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx))
3420 		rc = bnxt_re_hwrm_dbr_pacing_cfg(rdev, false);
3421 
3422 	return rc;
3423 }
3424 
3425 static void bnxt_re_ib_uninit(struct bnxt_re_dev *rdev)
3426 {
3427 	if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
3428 		bnxt_re_sysfs_destroy_file(rdev);
3429 		/* Cleanup ib dev */
3430 		ib_unregister_device(&rdev->ibdev);
3431 		clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
3432 		return;
3433 	}
3434 }
3435 
3436 static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
3437 {
3438 	struct bnxt_qplib_dpi *kdpi;
3439 	int rc, wait_count = BNXT_RE_RES_FREE_WAIT_COUNT;
3440 
3441 	bnxt_re_net_unregister_async_event(rdev);
3442 
3443 	bnxt_re_put_stats2_ctx(rdev);
3444 	if (test_and_clear_bit(BNXT_RE_FLAG_DEV_LIST_INITIALIZED,
3445 			       &rdev->flags)) {
3446 		/* did the caller hold the lock? */
3447 		mutex_lock(&bnxt_re_dev_lock);
3448 		list_del_rcu(&rdev->list);
3449 		mutex_unlock(&bnxt_re_dev_lock);
3450 	}
3451 
3452 	bnxt_re_uninit_resolve_wq(rdev);
3453 	bnxt_re_uninit_dcb_wq(rdev);
3454 	bnxt_re_uninit_aer_wq(rdev);
3455 
3456 	bnxt_re_deinitialize_dbr_drop_recov(rdev);
3457 
3458 	if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx))
3459 		(void)bnxt_re_disable_dbr_pacing(rdev);
3460 
3461 	if (test_and_clear_bit(BNXT_RE_FLAG_WORKER_REG, &rdev->flags)) {
3462 		cancel_delayed_work_sync(&rdev->worker);
3463 	}
3464 
3465 	/* Wait for ULPs to release references */
3466 	while (atomic_read(&rdev->stats.rsors.cq_count) && --wait_count)
3467 		usleep_range(500, 1000);
3468 	if (!wait_count)
3469 		dev_err(rdev_to_dev(rdev),
3470 			"CQ resources not freed by stack, count = 0x%x",
3471 			atomic_read(&rdev->stats.rsors.cq_count));
3472 
3473 	kdpi = &rdev->dpi_privileged;
3474 	if (kdpi->umdbr) { /* kernel DPI was allocated with success */
3475 		(void)bnxt_qplib_dealloc_dpi(&rdev->qplib_res, kdpi);
3476 		/*
3477 		 * Driver just need to know no command had failed
3478 		 * during driver load sequence and below command is
3479 		 * required indeed. Piggybacking dpi allocation status.
3480 		 */
3481 	}
3482 
3483 	/* Protect the device uninitialization and start_irq/stop_irq L2
3484 	 * callbacks with rtnl lock to avoid race condition between these calls
3485 	 */
3486 	rtnl_lock();
3487 	if (test_and_clear_bit(BNXT_RE_FLAG_SETUP_NQ, &rdev->flags))
3488 		bnxt_re_clean_nqs(rdev);
3489 	rtnl_unlock();
3490 
3491 	if (test_and_clear_bit(BNXT_RE_FLAG_TBLS_ALLOCINIT, &rdev->flags))
3492 		bnxt_re_free_tbls(rdev);
3493 	if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_INIT, &rdev->flags)) {
3494 		rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
3495 		if (rc)
3496 			dev_warn(rdev_to_dev(rdev),
3497 				 "Failed to deinitialize fw, rc = 0x%x", rc);
3498 	}
3499 
3500 	bnxt_re_put_stats_ctx(rdev);
3501 
3502 	if (test_and_clear_bit(BNXT_RE_FLAG_ALLOC_CTX, &rdev->flags))
3503 		bnxt_qplib_free_hwctx(&rdev->qplib_res);
3504 
3505 	rtnl_lock();
3506 	if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags))
3507 		bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
3508 
3509 	if (rdev->dbr_pacing)
3510 		bnxt_re_deinitialize_dbr_pacing(rdev);
3511 
3512 	bnxt_re_free_dbr_sw_stats_mem(rdev);
3513 
3514 	if (test_and_clear_bit(BNXT_RE_FLAG_NET_RING_ALLOC, &rdev->flags))
3515 		bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id);
3516 
3517 	if (test_and_clear_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags))
3518 		bnxt_qplib_free_rcfw_channel(&rdev->qplib_res);
3519 
3520 	if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags))
3521 		bnxt_re_free_msix(rdev);
3522 	rtnl_unlock();
3523 
3524 	bnxt_re_destroy_chip_ctx(rdev);
3525 
3526 	if (op_type != BNXT_RE_PRE_RECOVERY_REMOVE) {
3527 		if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED,
3528 				       &rdev->flags))
3529 			bnxt_re_unregister_netdev(rdev);
3530 	}
3531 }
3532 
3533 static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type, u8 wqe_mode)
3534 {
3535 	struct bnxt_re_ring_attr rattr = {};
3536 	struct bnxt_qplib_creq_ctx *creq;
3537 	int vec, offset;
3538 	int rc = 0;
3539 
3540 	if (op_type != BNXT_RE_POST_RECOVERY_INIT) {
3541 		/* Registered a new RoCE device instance to netdev */
3542 		rc = bnxt_re_register_netdev(rdev);
3543 		if (rc)
3544 			return -EINVAL;
3545 	}
3546 	set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
3547 
3548 	rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
3549 	if (rc) {
3550 		dev_err(rdev_to_dev(rdev), "Failed to get chip context rc 0x%x", rc);
3551 		bnxt_re_unregister_netdev(rdev);
3552 		clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
3553 		rc = -EINVAL;
3554 		return rc;
3555 	}
3556 
3557 	/* Protect the device initialization and start_irq/stop_irq L2 callbacks
3558 	 * with rtnl lock to avoid race condition between these calls
3559 	 */
3560 	rtnl_lock();
3561 	rc = bnxt_re_request_msix(rdev);
3562 	if (rc) {
3563 		dev_err(rdev_to_dev(rdev),
3564 			"Requesting MSI-X vectors failed with rc = 0x%x", rc);
3565 		rc = -EINVAL;
3566 		goto release_rtnl;
3567 	}
3568 	set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
3569 
3570 	/* Establish RCFW Communication Channel to initialize the context
3571 	   memory for the function and all child VFs */
3572 	rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res);
3573 	if (rc) {
3574 		dev_err(rdev_to_dev(rdev),
3575 			"Failed to alloc mem for rcfw, rc = %#x\n", rc);
3576 		goto release_rtnl;
3577 	}
3578 	set_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags);
3579 
3580 	creq = &rdev->rcfw.creq;
3581 	rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
3582 	rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
3583 	rattr.type = bnxt_re_get_rtype(rdev);
3584 	rattr.mode = HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX;
3585 	rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
3586 	rattr.lrid = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
3587 	rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
3588 	if (rc) {
3589 		creq->ring_id = 0xffff;
3590 		dev_err(rdev_to_dev(rdev),
3591 			"Failed to allocate CREQ fw id with rc = 0x%x", rc);
3592 		goto release_rtnl;
3593 	}
3594 
3595 	if (!rdev->chip_ctx)
3596 		goto release_rtnl;
3597 	/* Program the NQ ID for DBQ notification */
3598 	if (rdev->chip_ctx->modes.dbr_pacing_v0 ||
3599 	    bnxt_qplib_dbr_pacing_en(rdev->chip_ctx) ||
3600 	    bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) {
3601 		rc = bnxt_re_initialize_dbr_pacing(rdev);
3602 		if (!rc)
3603 			rdev->dbr_pacing = true;
3604 		else
3605 			rdev->dbr_pacing = false;
3606 		dev_dbg(rdev_to_dev(rdev), "%s: initialize db pacing ret %d\n",
3607 			__func__, rc);
3608 	}
3609 
3610 	vec = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].vector;
3611 	offset = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].db_offset;
3612 	rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vec, offset,
3613 					    &bnxt_re_aeq_handler);
3614 	if (rc) {
3615 		dev_err(rdev_to_dev(rdev),
3616 			"Failed to enable RCFW channel with rc = 0x%x", rc);
3617 		goto release_rtnl;
3618 	}
3619 	set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
3620 
3621 	rc = bnxt_re_update_dev_attr(rdev);
3622 	if (rc)
3623 		goto release_rtnl;
3624 	bnxt_re_set_resource_limits(rdev);
3625 	if (!rdev->is_virtfn && !_is_chip_gen_p5_p7(rdev->chip_ctx)) {
3626 		rc = bnxt_qplib_alloc_hwctx(&rdev->qplib_res);
3627 		if (rc) {
3628 			dev_err(rdev_to_dev(rdev),
3629 				"Failed to alloc hw contexts, rc = 0x%x", rc);
3630 			goto release_rtnl;
3631 		}
3632 		set_bit(BNXT_RE_FLAG_ALLOC_CTX, &rdev->flags);
3633 	}
3634 
3635 	rc = bnxt_re_get_stats_ctx(rdev);
3636 	if (rc)
3637 		goto release_rtnl;
3638 
3639 	rc = bnxt_qplib_init_rcfw(&rdev->rcfw, rdev->is_virtfn);
3640 	if (rc) {
3641 		dev_err(rdev_to_dev(rdev),
3642 			"Failed to initialize fw with rc = 0x%x", rc);
3643 		goto release_rtnl;
3644 	}
3645 	set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_INIT, &rdev->flags);
3646 
3647 	/* Based resource count on the 'new' device caps */
3648 	rc = bnxt_re_update_dev_attr(rdev);
3649 	if (rc)
3650 		goto release_rtnl;
3651 	rc = bnxt_re_alloc_init_tbls(rdev);
3652 	if (rc) {
3653 		dev_err(rdev_to_dev(rdev), "tbls alloc-init failed rc = %#x",
3654 			rc);
3655 		goto release_rtnl;
3656 	}
3657 	rc = bnxt_re_setup_nqs(rdev);
3658 	if (rc) {
3659 		dev_err(rdev_to_dev(rdev), "NQs alloc-init failed rc = %#x\n",
3660 			rc);
3661 		if (rdev->nqr.max_init == 0)
3662 			goto release_rtnl;
3663 
3664 		dev_warn(rdev_to_dev(rdev),
3665 			"expected nqs %d available nqs %d\n",
3666 			rdev->nqr.num_msix, rdev->nqr.max_init);
3667 	}
3668 	set_bit(BNXT_RE_FLAG_SETUP_NQ, &rdev->flags);
3669 	rtnl_unlock();
3670 
3671 	rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res, &rdev->dpi_privileged,
3672 				  rdev, BNXT_QPLIB_DPI_TYPE_KERNEL);
3673 	if (rc)
3674 		goto fail;
3675 
3676 	if (rdev->dbr_pacing)
3677 		bnxt_re_enable_dbr_pacing(rdev);
3678 
3679 	if (rdev->chip_ctx->modes.dbr_drop_recov)
3680 		bnxt_re_initialize_dbr_drop_recov(rdev);
3681 
3682 	rc = bnxt_re_alloc_dbr_sw_stats_mem(rdev);
3683 	if (rc)
3684 		goto fail;
3685 
3686 	/* This block of code is needed for error recovery support */
3687 	if (!rdev->is_virtfn) {
3688 		struct bnxt_re_tc_rec *tc_rec;
3689 
3690 		tc_rec = &rdev->tc_rec[0];
3691 		rc =  bnxt_re_query_hwrm_qportcfg(rdev, tc_rec, 0xFFFF);
3692 		if (rc) {
3693 			dev_err(rdev_to_dev(rdev),
3694 				"Failed to query port config rc:%d", rc);
3695 			return rc;
3696 		}
3697 
3698 		/* Query f/w defaults of CC params */
3699 		rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param);
3700 		if (rc)
3701 			dev_warn(rdev_to_dev(rdev),
3702 				"Failed to query CC defaults\n");
3703 		if (1) {
3704 			rdev->num_vfs = pci_num_vf(rdev->en_dev->pdev);
3705 			if (rdev->num_vfs) {
3706 				bnxt_re_set_resource_limits(rdev);
3707 				bnxt_qplib_set_func_resources(&rdev->qplib_res);
3708 			}
3709 		}
3710 	}
3711 	INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
3712 	set_bit(BNXT_RE_FLAG_WORKER_REG, &rdev->flags);
3713 	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(1000));
3714 
3715 	bnxt_re_init_dcb_wq(rdev);
3716 	bnxt_re_init_aer_wq(rdev);
3717 	bnxt_re_init_resolve_wq(rdev);
3718 	mutex_lock(&bnxt_re_dev_lock);
3719 	list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
3720 	/* Added to the list, not in progress anymore */
3721 	gadd_dev_inprogress--;
3722 	set_bit(BNXT_RE_FLAG_DEV_LIST_INITIALIZED, &rdev->flags);
3723 	mutex_unlock(&bnxt_re_dev_lock);
3724 
3725 
3726 	return rc;
3727 release_rtnl:
3728 	rtnl_unlock();
3729 fail:
3730 	bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
3731 
3732 	return rc;
3733 }
3734 
3735 static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
3736 {
3737 	int rc = 0;
3738 
3739 	rc = bnxt_re_register_ib(rdev);
3740 	if (rc) {
3741 		dev_err(rdev_to_dev(rdev),
3742 			"Register IB failed with rc = 0x%x", rc);
3743 		goto fail;
3744 	}
3745 	if (bnxt_re_sysfs_create_file(rdev)) {
3746 		bnxt_re_stopqps_and_ib_uninit(rdev);
3747 		goto fail;
3748 	}
3749 
3750 	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
3751 	set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
3752 	set_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags);
3753 	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
3754 	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
3755 
3756 	return rc;
3757 fail:
3758 	bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
3759 	return rc;
3760 }
3761 
3762 /* wrapper for ib_init funcs */
3763 int _bnxt_re_ib_init(struct bnxt_re_dev *rdev)
3764 {
3765 	return bnxt_re_ib_init(rdev);
3766 }
3767 
3768 /* wrapper for aux init funcs */
3769 int _bnxt_re_ib_init2(struct bnxt_re_dev *rdev)
3770 {
3771 	bnxt_re_ib_init_2(rdev);
3772 	return 0; /* add return for future proof */
3773 }
3774 
3775 static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
3776 {
3777 	bnxt_re_dev_dealloc(rdev);
3778 }
3779 
3780 
3781 static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct ifnet *netdev,
3782 			   struct bnxt_en_dev *en_dev)
3783 {
3784 	struct ifnet *realdev = NULL;
3785 
3786 	realdev = netdev;
3787 	if (realdev)
3788 		dev_dbg(NULL, "%s: realdev = %p netdev = %p\n", __func__,
3789 			realdev, netdev);
3790 	/*
3791 	 * Note:
3792 	 * The first argument to bnxt_re_dev_alloc() is 'netdev' and
3793 	 * not 'realdev', since in the case of bonding we want to
3794 	 * register the bonded virtual netdev (master) to the ib stack.
3795 	 * And 'en_dev' (for L2/PCI communication) is the first slave
3796 	 * device (PF0 on the card).
3797 	 * In the case of a regular netdev, both netdev and the en_dev
3798 	 * correspond to the same device.
3799 	 */
3800 	*rdev = bnxt_re_dev_alloc(netdev, en_dev);
3801 	if (!*rdev) {
3802 		pr_err("%s: netdev %p not handled",
3803 			ROCE_DRV_MODULE_NAME, netdev);
3804 		return -ENOMEM;
3805 	}
3806 	bnxt_re_hold(*rdev);
3807 
3808 	return 0;
3809 }
3810 
3811 void bnxt_re_get_link_speed(struct bnxt_re_dev *rdev)
3812 {
3813 	rdev->espeed = rdev->en_dev->espeed;
3814 	return;
3815 }
3816 
3817 void bnxt_re_stopqps_and_ib_uninit(struct bnxt_re_dev *rdev)
3818 {
3819 	dev_dbg(rdev_to_dev(rdev), "%s: Stopping QPs, IB uninit on rdev: %p\n",
3820 		__func__, rdev);
3821 	bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev);
3822 	bnxt_re_ib_uninit(rdev);
3823 }
3824 
3825 void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
3826 			   struct auxiliary_device *aux_dev)
3827 {
3828 	struct bnxt_re_en_dev_info *en_info;
3829 	struct bnxt_qplib_cmdq_ctx *cmdq;
3830 	struct bnxt_qplib_rcfw *rcfw;
3831 
3832 	rcfw = &rdev->rcfw;
3833 	cmdq = &rcfw->cmdq;
3834 	if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
3835 		set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
3836 
3837 	dev_dbg(rdev_to_dev(rdev), "%s: Removing rdev: %p\n", __func__, rdev);
3838 	bnxt_re_dev_uninit(rdev, op_type);
3839 	en_info = auxiliary_get_drvdata(aux_dev);
3840 	if (en_info) {
3841 		rtnl_lock();
3842 		en_info->rdev = NULL;
3843 		rtnl_unlock();
3844 		if (op_type != BNXT_RE_PRE_RECOVERY_REMOVE) {
3845 			clear_bit(BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV, &en_info->flags);
3846 			clear_bit(BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV, &en_info->flags);
3847 			clear_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags);
3848 		}
3849 	}
3850 	bnxt_re_dev_unreg(rdev);
3851 }
3852 
3853 int bnxt_re_add_device(struct bnxt_re_dev **rdev,
3854 		       struct ifnet *netdev,
3855 		       u8 qp_mode, u8 op_type, u8 wqe_mode,
3856 		       u32 num_msix_requested,
3857 		       struct auxiliary_device *aux_dev)
3858 {
3859 	struct bnxt_re_en_dev_info *en_info;
3860 	struct bnxt_en_dev *en_dev;
3861 	int rc = 0;
3862 
3863 	en_info = auxiliary_get_drvdata(aux_dev);
3864 	en_dev = en_info->en_dev;
3865 
3866 	mutex_lock(&bnxt_re_dev_lock);
3867 	/* Check if driver already in mod exit and aux_dev is valid */
3868 	if (gmod_exit || !aux_dev) {
3869 		mutex_unlock(&bnxt_re_dev_lock);
3870 		return -ENODEV;
3871 	}
3872 	/* Add device in progress */
3873 	gadd_dev_inprogress++;
3874 	mutex_unlock(&bnxt_re_dev_lock);
3875 
3876 	rc = bnxt_re_dev_reg(rdev, netdev, en_dev);
3877 	if (rc) {
3878 		dev_dbg(NULL, "Failed to create add device for netdev %p\n",
3879 			netdev);
3880 		/*
3881 		 * For BNXT_RE_POST_RECOVERY_INIT special case
3882 		 * called from bnxt_re_start, the work is
3883 		 * complete only after, bnxt_re_start completes
3884 		 * bnxt_unregister_device in case of failure.
3885 		 * So bnxt_re_start will decrement gadd_dev_inprogress
3886 		 * in case of failure.
3887 		 */
3888 		if (op_type != BNXT_RE_POST_RECOVERY_INIT) {
3889 			mutex_lock(&bnxt_re_dev_lock);
3890 			gadd_dev_inprogress--;
3891 			mutex_unlock(&bnxt_re_dev_lock);
3892 		}
3893 		return rc;
3894 	}
3895 
3896 	if (rc != 0)
3897 		goto ref_error;
3898 
3899 	/*
3900 	 *  num_msix_requested = BNXT_RE_MSIX_FROM_MOD_PARAM indicates fresh driver load.
3901 	 *  Otherwaise, this invocation can be the result of lag create / destroy,
3902 	 *  err revovery, hot fw upgrade, etc..
3903 	 */
3904 	if (num_msix_requested == BNXT_RE_MSIX_FROM_MOD_PARAM) {
3905 		if (bnxt_re_probe_count < BNXT_RE_MAX_DEVICES)
3906 			num_msix_requested = max_msix_vec[bnxt_re_probe_count++];
3907 		else
3908 			/* Consider as default when probe_count exceeds its limit */
3909 			num_msix_requested = 0;
3910 
3911 		/* if user specifies only one value, use the same for all PFs */
3912 		if (max_msix_vec_argc == 1)
3913 			num_msix_requested = max_msix_vec[0];
3914 	}
3915 
3916 	(*rdev)->num_msix_requested = num_msix_requested;
3917 	(*rdev)->gsi_ctx.gsi_qp_mode = qp_mode;
3918 	(*rdev)->adev = aux_dev;
3919 	(*rdev)->dev_addr = en_dev->softc->func.mac_addr;
3920 	/* Before updating the rdev pointer in bnxt_re_en_dev_info structure,
3921 	 * take the rtnl lock to avoid accessing invalid rdev pointer from
3922 	 * L2 ULP callbacks. This is applicable in all the places where rdev
3923 	 * pointer is updated in bnxt_re_en_dev_info.
3924 	 */
3925 	rtnl_lock();
3926 	en_info->rdev = *rdev;
3927 	rtnl_unlock();
3928 	rc = bnxt_re_dev_init(*rdev, op_type, wqe_mode);
3929 	if (rc) {
3930 ref_error:
3931 		bnxt_re_dev_unreg(*rdev);
3932 		*rdev = NULL;
3933 		/*
3934 		 * For BNXT_RE_POST_RECOVERY_INIT special case
3935 		 * called from bnxt_re_start, the work is
3936 		 * complete only after, bnxt_re_start completes
3937 		 * bnxt_unregister_device in case of failure.
3938 		 * So bnxt_re_start will decrement gadd_dev_inprogress
3939 		 * in case of failure.
3940 		 */
3941 		if (op_type != BNXT_RE_POST_RECOVERY_INIT) {
3942 			mutex_lock(&bnxt_re_dev_lock);
3943 			gadd_dev_inprogress--;
3944 			mutex_unlock(&bnxt_re_dev_lock);
3945 		}
3946 	}
3947 	dev_dbg(rdev_to_dev(*rdev), "%s: Adding rdev: %p\n", __func__, *rdev);
3948 	if (!rc) {
3949 		set_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags);
3950 	}
3951 	return rc;
3952 }
3953 
3954 struct bnxt_re_dev *bnxt_re_get_peer_pf(struct bnxt_re_dev *rdev)
3955 {
3956 	struct pci_dev *pdev_in = rdev->en_dev->pdev;
3957 	int tmp_bus_num, bus_num = pdev_in->bus->number;
3958 	int tmp_dev_num, dev_num = PCI_SLOT(pdev_in->devfn);
3959 	int tmp_func_num, func_num = PCI_FUNC(pdev_in->devfn);
3960 	struct bnxt_re_dev *tmp_rdev;
3961 
3962 	rcu_read_lock();
3963 	list_for_each_entry_rcu(tmp_rdev, &bnxt_re_dev_list, list) {
3964 		tmp_bus_num = tmp_rdev->en_dev->pdev->bus->number;
3965 		tmp_dev_num = PCI_SLOT(tmp_rdev->en_dev->pdev->devfn);
3966 		tmp_func_num = PCI_FUNC(tmp_rdev->en_dev->pdev->devfn);
3967 
3968 		if (bus_num == tmp_bus_num && dev_num == tmp_dev_num &&
3969 		    func_num != tmp_func_num) {
3970 			rcu_read_unlock();
3971 			return tmp_rdev;
3972 		}
3973 	}
3974 	rcu_read_unlock();
3975 	return NULL;
3976 }
3977 
3978 
3979 int bnxt_re_schedule_work(struct bnxt_re_dev *rdev, unsigned long event,
3980 			  struct ifnet *vlan_dev,
3981 			  struct ifnet *netdev,
3982 			  struct auxiliary_device *adev)
3983 {
3984 	struct bnxt_re_work *re_work;
3985 
3986 	/* Allocate for the deferred task */
3987 	re_work = kzalloc(sizeof(*re_work), GFP_KERNEL);
3988 	if (!re_work)
3989 		return -ENOMEM;
3990 
3991 	re_work->rdev = rdev;
3992 	re_work->event = event;
3993 	re_work->vlan_dev = vlan_dev;
3994 	re_work->adev = adev;
3995 	INIT_WORK(&re_work->work, bnxt_re_task);
3996 	if (rdev)
3997 		atomic_inc(&rdev->sched_count);
3998 	re_work->netdev = netdev;
3999 	queue_work(bnxt_re_wq, &re_work->work);
4000 
4001 	return 0;
4002 }
4003 
4004 
4005 int bnxt_re_get_slot_pf_count(struct bnxt_re_dev *rdev)
4006 {
4007 	struct pci_dev *pdev_in = rdev->en_dev->pdev;
4008 	int tmp_bus_num, bus_num = pdev_in->bus->number;
4009 	int tmp_dev_num, dev_num = PCI_SLOT(pdev_in->devfn);
4010 	struct bnxt_re_dev *tmp_rdev;
4011 	int pf_cnt = 0;
4012 
4013 	rcu_read_lock();
4014 	list_for_each_entry_rcu(tmp_rdev, &bnxt_re_dev_list, list) {
4015 		tmp_bus_num = tmp_rdev->en_dev->pdev->bus->number;
4016 		tmp_dev_num = PCI_SLOT(tmp_rdev->en_dev->pdev->devfn);
4017 
4018 		if (bus_num == tmp_bus_num && dev_num == tmp_dev_num)
4019 			pf_cnt++;
4020 	}
4021 	rcu_read_unlock();
4022 	return pf_cnt;
4023 }
4024 
4025 /* Handle all deferred netevents tasks */
4026 static void bnxt_re_task(struct work_struct *work)
4027 {
4028 	struct bnxt_re_en_dev_info *en_info;
4029 	struct auxiliary_device *aux_dev;
4030 	struct bnxt_re_work *re_work;
4031 	struct bnxt_re_dev *rdev;
4032 
4033 	re_work = container_of(work, struct bnxt_re_work, work);
4034 
4035 	mutex_lock(&bnxt_re_mutex);
4036 	rdev = re_work->rdev;
4037 
4038 	/*
4039 	 * If the previous rdev is deleted due to bond creation
4040 	 * do not handle the event
4041 	 */
4042 	if (!bnxt_re_is_rdev_valid(rdev))
4043 		goto exit;
4044 
4045 	/* Ignore the event, if the device is not registred with IB stack. This
4046 	 * is to avoid handling any event while the device is added/removed.
4047 	 */
4048 	if (rdev && !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
4049 		dev_dbg(rdev_to_dev(rdev), "%s: Ignoring netdev event 0x%lx",
4050 			__func__, re_work->event);
4051 		goto done;
4052 	}
4053 
4054 	/* Extra check to silence coverity. We shouldn't handle any event
4055 	 * when rdev is NULL.
4056 	 */
4057 	if (!rdev)
4058 		goto exit;
4059 
4060 	dev_dbg(rdev_to_dev(rdev), "Scheduled work for event 0x%lx",
4061 		re_work->event);
4062 
4063 	switch (re_work->event) {
4064 	case NETDEV_UP:
4065 		bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
4066 				       IB_EVENT_PORT_ACTIVE);
4067 		bnxt_re_net_register_async_event(rdev);
4068 		break;
4069 
4070 	case NETDEV_DOWN:
4071 		bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 0);
4072 		bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev);
4073 		bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
4074 				       IB_EVENT_PORT_ERR);
4075 		break;
4076 
4077 	case NETDEV_CHANGE:
4078 		if (bnxt_re_get_link_state(rdev) == IB_PORT_DOWN) {
4079 			bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev);
4080 			bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
4081 					       IB_EVENT_PORT_ERR);
4082 			break;
4083 		} else if (bnxt_re_get_link_state(rdev) == IB_PORT_ACTIVE) {
4084 			bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
4085 					       IB_EVENT_PORT_ACTIVE);
4086 		}
4087 
4088 		/* temporarily disable the check for SR2 */
4089 		if (!bnxt_qplib_query_cc_param(&rdev->qplib_res,
4090 					       &rdev->cc_param) &&
4091 		    !_is_chip_p7(rdev->chip_ctx)) {
4092 			/*
4093 			 *  Disable CC for 10G speed
4094 			 * for non p5 devices
4095 			 */
4096 			if (rdev->sl_espeed == SPEED_10000 &&
4097 			    !_is_chip_gen_p5_p7(rdev->chip_ctx)) {
4098 				if (rdev->cc_param.enable)
4099 					bnxt_re_clear_cc(rdev);
4100 			} else {
4101 				if (!rdev->cc_param.enable &&
4102 				    rdev->cc_param.admin_enable)
4103 					bnxt_re_setup_cc(rdev);
4104 			}
4105 		}
4106 		break;
4107 
4108 	case NETDEV_UNREGISTER:
4109 		bnxt_re_stopqps_and_ib_uninit(rdev);
4110 		aux_dev = rdev->adev;
4111 		if (re_work->adev)
4112 			goto done;
4113 
4114 		bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, aux_dev);
4115 
4116 		break;
4117 
4118 	default:
4119 		break;
4120 	}
4121 done:
4122 	if (rdev) {
4123 		/* memory barrier to guarantee task completion
4124 		 * before decrementing sched count
4125 		 */
4126 		mmiowb();
4127 		atomic_dec(&rdev->sched_count);
4128 	}
4129 exit:
4130 	if (re_work->adev && re_work->event == NETDEV_UNREGISTER) {
4131 		en_info = auxiliary_get_drvdata(re_work->adev);
4132 		en_info->ib_uninit_done = true;
4133 		wake_up(&en_info->waitq);
4134 	}
4135 	kfree(re_work);
4136 	mutex_unlock(&bnxt_re_mutex);
4137 }
4138 
4139 /*
4140     "Notifier chain callback can be invoked for the same chain from
4141     different CPUs at the same time".
4142 
4143     For cases when the netdev is already present, our call to the
4144     register_netdevice_notifier() will actually get the rtnl_lock()
4145     before sending NETDEV_REGISTER and (if up) NETDEV_UP
4146     events.
4147 
4148     But for cases when the netdev is not already present, the notifier
4149     chain is subjected to be invoked from different CPUs simultaneously.
4150 
4151     This is protected by the netdev_mutex.
4152 */
4153 static int bnxt_re_netdev_event(struct notifier_block *notifier,
4154 				unsigned long event, void *ptr)
4155 {
4156 	struct ifnet *real_dev, *netdev;
4157 	struct bnxt_re_dev *rdev = NULL;
4158 
4159 	netdev = netdev_notifier_info_to_ifp(ptr);
4160 	real_dev = rdma_vlan_dev_real_dev(netdev);
4161 	if (!real_dev)
4162 		real_dev = netdev;
4163 	/* In case of bonding,this will be bond's rdev */
4164 	rdev = bnxt_re_from_netdev(real_dev);
4165 
4166 	if (!rdev)
4167 		goto exit;
4168 
4169 	dev_info(rdev_to_dev(rdev), "%s: Event = %s (0x%lx), rdev %s (real_dev %s)\n",
4170 		 __func__, bnxt_re_netevent(event), event,
4171 		 rdev ? rdev->netdev ? rdev->netdev->if_dname : "->netdev = NULL" : "= NULL",
4172 		 (real_dev == netdev) ? "= netdev" : real_dev->if_dname);
4173 
4174 	if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
4175 		goto exit;
4176 
4177 	bnxt_re_hold(rdev);
4178 
4179 	if (real_dev != netdev) {
4180 		switch (event) {
4181 		case NETDEV_UP:
4182 			bnxt_re_schedule_work(rdev, event, netdev,
4183 					      NULL, NULL);
4184 			break;
4185 		case NETDEV_DOWN:
4186 			break;
4187 		default:
4188 			break;
4189 		}
4190 		goto done;
4191 	}
4192 
4193 	switch (event) {
4194 	case NETDEV_CHANGEADDR:
4195 		if (!_is_chip_gen_p5_p7(rdev->chip_ctx))
4196 			bnxt_re_update_shadow_ah(rdev);
4197 		bnxt_qplib_get_guid(rdev->dev_addr,
4198 				    (u8 *)&rdev->ibdev.node_guid);
4199 		break;
4200 
4201 	case NETDEV_CHANGE:
4202 		bnxt_re_get_link_speed(rdev);
4203 		bnxt_re_schedule_work(rdev, event, NULL, NULL, NULL);
4204 		break;
4205 	case NETDEV_UNREGISTER:
4206 		/* netdev notifier will call NETDEV_UNREGISTER again later since
4207 		 * we are still holding the reference to the netdev
4208 		 */
4209 
4210 		/*
4211 		 *  Workaround to avoid ib_unregister hang. Check for module
4212 		 *  reference and dont free up the device if the reference
4213 		 *  is non zero. Checking only for PF functions.
4214 		 */
4215 
4216 		if (rdev) {
4217 			dev_info(rdev_to_dev(rdev),
4218 				 "bnxt_re:Unreg recvd when module refcnt > 0");
4219 			dev_info(rdev_to_dev(rdev),
4220 				 "bnxt_re:Close all apps using bnxt_re devs");
4221 			dev_info(rdev_to_dev(rdev),
4222 				 "bnxt_re:Remove the configfs entry created for the device");
4223 			dev_info(rdev_to_dev(rdev),
4224 				 "bnxt_re:Refer documentation for details");
4225 			goto done;
4226 		}
4227 
4228 		if (atomic_read(&rdev->sched_count) > 0)
4229 			goto done;
4230 		if (!rdev->unreg_sched) {
4231 			bnxt_re_schedule_work(rdev, NETDEV_UNREGISTER,
4232 					      NULL, NULL, NULL);
4233 			rdev->unreg_sched = true;
4234 			goto done;
4235 		}
4236 
4237 		break;
4238 	default:
4239 		break;
4240 	}
4241 done:
4242 	if (rdev)
4243 		bnxt_re_put(rdev);
4244 exit:
4245 	return NOTIFY_DONE;
4246 }
4247 
4248 static struct notifier_block bnxt_re_netdev_notifier = {
4249 	.notifier_call = bnxt_re_netdev_event
4250 };
4251 
4252 static void bnxt_re_remove_base_interface(struct bnxt_re_dev *rdev,
4253 					  struct auxiliary_device *adev)
4254 {
4255 	bnxt_re_stopqps_and_ib_uninit(rdev);
4256 	bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev);
4257 	auxiliary_set_drvdata(adev, NULL);
4258 }
4259 
4260 /*
4261  *  bnxt_re_remove  -	Removes the roce aux device
4262  *  @adev  -  aux device pointer
4263  *
4264  * This function removes the roce device. This gets
4265  * called in the mod exit path and pci unbind path.
4266  * If the rdev is bond interace, destroys the lag
4267  * in module exit path, and in pci unbind case
4268  * destroys the lag and recreates other base interface.
4269  * If the device is already removed in error recovery
4270  * path, it just unregister with the L2.
4271  */
4272 static void bnxt_re_remove(struct auxiliary_device *adev)
4273 {
4274 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
4275 	struct bnxt_en_dev *en_dev;
4276 	struct bnxt_re_dev *rdev;
4277 	bool primary_dev = false;
4278 	bool secondary_dev = false;
4279 
4280 	if (!en_info)
4281 		return;
4282 
4283 	mutex_lock(&bnxt_re_mutex);
4284 	en_dev = en_info->en_dev;
4285 
4286 	rdev = en_info->rdev;
4287 
4288 	if (rdev && bnxt_re_is_rdev_valid(rdev)) {
4289 		if (pci_channel_offline(rdev->rcfw.pdev))
4290 			set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
4291 
4292 		if (test_bit(BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV, &en_info->flags))
4293 			primary_dev = true;
4294 		if (test_bit(BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV, &en_info->flags))
4295 			secondary_dev = true;
4296 
4297 		/*
4298 		 * en_dev_info of primary device and secondary device have the
4299 		 * same rdev pointer when LAG is configured. This rdev pointer
4300 		 * is rdev of bond interface.
4301 		 */
4302 		if (!primary_dev && !secondary_dev) {
4303 			/* removal of non bond interface */
4304 			bnxt_re_remove_base_interface(rdev, adev);
4305 		} else {
4306 			/*
4307 			 * removal of bond primary/secondary interface. In this
4308 			 * case bond device is already removed, so rdev->binfo
4309 			 * is NULL.
4310 			 */
4311 			auxiliary_set_drvdata(adev, NULL);
4312 		}
4313 	} else {
4314 		/* device is removed from ulp stop, unregister the net dev */
4315 		if (test_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags)) {
4316 			rtnl_lock();
4317 			en_dev->en_ops->bnxt_unregister_device(en_dev,
4318 							       BNXT_ROCE_ULP);
4319 			rtnl_unlock();
4320 		}
4321 	}
4322 	mutex_unlock(&bnxt_re_mutex);
4323 	return;
4324 }
4325 
4326 /* wrapper for all external user context callers */
4327 void _bnxt_re_remove(struct auxiliary_device *adev)
4328 {
4329 	bnxt_re_remove(adev);
4330 }
4331 
4332 static void bnxt_re_ib_init_2(struct bnxt_re_dev *rdev)
4333 {
4334 	int rc;
4335 
4336 	rc = bnxt_re_get_device_stats(rdev);
4337 	if (rc)
4338 		dev_err(rdev_to_dev(rdev),
4339 			"Failed initial device stat query");
4340 
4341 	bnxt_re_net_register_async_event(rdev);
4342 }
4343 
4344 static int bnxt_re_probe(struct auxiliary_device *adev,
4345 			 const struct auxiliary_device_id *id)
4346 {
4347 	struct bnxt_aux_dev *aux_dev =
4348 		container_of(adev, struct bnxt_aux_dev, aux_dev);
4349 	struct bnxt_re_en_dev_info *en_info;
4350 	struct bnxt_en_dev *en_dev = NULL;
4351 	struct bnxt_re_dev *rdev;
4352 	int rc = -ENODEV;
4353 
4354 	if (aux_dev)
4355 		en_dev = aux_dev->edev;
4356 
4357 	if (!en_dev)
4358 		return rc;
4359 
4360 	if (en_dev->ulp_version != BNXT_ULP_VERSION) {
4361 		pr_err("%s: probe error: bnxt_en ulp version magic %x is not compatible!\n",
4362 			ROCE_DRV_MODULE_NAME, en_dev->ulp_version);
4363 		return -EINVAL;
4364 	}
4365 
4366 	en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
4367 	if (!en_info)
4368 		return -ENOMEM;
4369 	memset(en_info, 0, sizeof(struct bnxt_re_en_dev_info));
4370 	en_info->en_dev = en_dev;
4371 	auxiliary_set_drvdata(adev, en_info);
4372 
4373 	mutex_lock(&bnxt_re_mutex);
4374 	rc = bnxt_re_add_device(&rdev, en_dev->net,
4375 				BNXT_RE_GSI_MODE_ALL,
4376 				BNXT_RE_COMPLETE_INIT,
4377 				BNXT_QPLIB_WQE_MODE_STATIC,
4378 				BNXT_RE_MSIX_FROM_MOD_PARAM, adev);
4379 	if (rc) {
4380 		mutex_unlock(&bnxt_re_mutex);
4381 		return rc;
4382 	}
4383 
4384 	rc = bnxt_re_ib_init(rdev);
4385 	if (rc)
4386 		goto err;
4387 
4388 	bnxt_re_ib_init_2(rdev);
4389 
4390 	dev_dbg(rdev_to_dev(rdev), "%s: adev: %p\n", __func__, adev);
4391 	rdev->adev = adev;
4392 
4393 	mutex_unlock(&bnxt_re_mutex);
4394 
4395 	return 0;
4396 
4397 err:
4398 	mutex_unlock(&bnxt_re_mutex);
4399 	bnxt_re_remove(adev);
4400 
4401 	return rc;
4402 }
4403 
4404 static const struct auxiliary_device_id bnxt_re_id_table[] = {
4405 	{ .name = BNXT_ADEV_NAME ".rdma", },
4406 	{},
4407 };
4408 
4409 MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table);
4410 
4411 static struct auxiliary_driver bnxt_re_driver = {
4412 	.name = "rdma",
4413 	.probe = bnxt_re_probe,
4414 	.remove = bnxt_re_remove,
4415 	.id_table = bnxt_re_id_table,
4416 };
4417 
4418 static int __init bnxt_re_mod_init(void)
4419 {
4420 	int rc = 0;
4421 
4422 	pr_info("%s: %s", ROCE_DRV_MODULE_NAME, drv_version);
4423 
4424 	bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
4425 	if (!bnxt_re_wq)
4426 		return -ENOMEM;
4427 
4428 	rc = bnxt_re_register_netdevice_notifier(&bnxt_re_netdev_notifier);
4429 	if (rc) {
4430 		pr_err("%s: Cannot register to netdevice_notifier",
4431 			ROCE_DRV_MODULE_NAME);
4432 		goto err_netdev;
4433 	}
4434 
4435 	INIT_LIST_HEAD(&bnxt_re_dev_list);
4436 
4437 	rc = auxiliary_driver_register(&bnxt_re_driver);
4438 	if (rc) {
4439 		pr_err("%s: Failed to register auxiliary driver\n",
4440 		       ROCE_DRV_MODULE_NAME);
4441 		goto err_auxdrv;
4442 	}
4443 
4444 	return 0;
4445 
4446 err_auxdrv:
4447 	bnxt_re_unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
4448 
4449 err_netdev:
4450 	destroy_workqueue(bnxt_re_wq);
4451 
4452 	return rc;
4453 }
4454 
4455 static void __exit bnxt_re_mod_exit(void)
4456 {
4457 	gmod_exit = 1;
4458 	auxiliary_driver_unregister(&bnxt_re_driver);
4459 
4460 	bnxt_re_unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
4461 
4462 	if (bnxt_re_wq)
4463 		destroy_workqueue(bnxt_re_wq);
4464 }
4465 
4466 module_init(bnxt_re_mod_init);
4467 module_exit(bnxt_re_mod_exit);
4468