xref: /linux/drivers/infiniband/hw/bnxt_re/main.c (revision 643e2e259c2b25a2af0ae4c23c6e16586d9fd19c)
1 /*
2  * Broadcom NetXtreme-E RoCE driver.
3  *
4  * Copyright (c) 2016 - 2017, Broadcom. All rights reserved.  The term
5  * Broadcom refers to Broadcom Limited and/or its subsidiaries.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * BSD license below:
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  *
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in
21  *    the documentation and/or other materials provided with the
22  *    distribution.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
26  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
34  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  * Description: Main component of the bnxt_re driver
37  */
38 
39 #include <linux/module.h>
40 #include <linux/netdevice.h>
41 #include <linux/ethtool.h>
42 #include <linux/mutex.h>
43 #include <linux/list.h>
44 #include <linux/rculist.h>
45 #include <linux/spinlock.h>
46 #include <linux/pci.h>
47 #include <net/dcbnl.h>
48 #include <net/ipv6.h>
49 #include <net/addrconf.h>
50 #include <linux/if_ether.h>
51 #include <linux/auxiliary_bus.h>
52 
53 #include <rdma/ib_verbs.h>
54 #include <rdma/ib_user_verbs.h>
55 #include <rdma/ib_umem.h>
56 #include <rdma/ib_addr.h>
57 #include <linux/hashtable.h>
58 
59 #include "bnxt_ulp.h"
60 #include "roce_hsi.h"
61 #include "qplib_res.h"
62 #include "qplib_sp.h"
63 #include "qplib_fp.h"
64 #include "qplib_rcfw.h"
65 #include "bnxt_re.h"
66 #include "ib_verbs.h"
67 #include <rdma/bnxt_re-abi.h>
68 #include "bnxt.h"
69 #include "hw_counters.h"
70 #include "debugfs.h"
71 
72 static char version[] =
73 		BNXT_RE_DESC "\n";
74 
75 MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
76 MODULE_DESCRIPTION(BNXT_RE_DESC);
77 MODULE_LICENSE("Dual BSD/GPL");
78 
79 /* globals */
80 static DEFINE_MUTEX(bnxt_re_mutex);
81 
82 static void bnxt_re_stop_irq(void *handle);
83 static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev);
84 static int bnxt_re_netdev_event(struct notifier_block *notifier,
85 				unsigned long event, void *ptr);
86 static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev);
87 static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type);
88 static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
89 
90 static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
91 			     u32 *offset);
92 static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable);
93 static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
94 {
95 	struct bnxt_qplib_chip_ctx *cctx;
96 	struct bnxt_en_dev *en_dev;
97 	struct bnxt_qplib_res *res;
98 	u32 l2db_len = 0;
99 	u32 offset = 0;
100 	u32 barlen;
101 	int rc;
102 
103 	res = &rdev->qplib_res;
104 	en_dev = rdev->en_dev;
105 	cctx = rdev->chip_ctx;
106 
107 	/* Issue qcfg */
108 	rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset);
109 	if (rc)
110 		dev_info(rdev_to_dev(rdev),
111 			 "Couldn't get DB bar size, Low latency framework is disabled\n");
112 	/* set register offsets for both UC and WC */
113 	if (bnxt_qplib_is_chip_gen_p7(cctx)) {
114 		res->dpi_tbl.ucreg.offset = offset;
115 		res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
116 	} else {
117 		res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
118 							 BNXT_QPLIB_DBR_PF_DB_OFFSET;
119 		res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
120 	}
121 
122 	/* If WC mapping is disabled by L2 driver then en_dev->l2_db_size
123 	 * is equal to the DB-Bar actual size. This indicates that L2
124 	 * is mapping entire bar as UC-. RoCE driver can't enable WC mapping
125 	 * in such cases and DB-push will be disabled.
126 	 */
127 	barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION);
128 	if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) {
129 		res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
130 		dev_info(rdev_to_dev(rdev),  "Low latency framework is enabled\n");
131 	}
132 }
133 
134 static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev)
135 {
136 	struct bnxt_qplib_chip_ctx *cctx;
137 
138 	cctx = rdev->chip_ctx;
139 	cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ?
140 			       BNXT_QPLIB_WQE_MODE_VARIABLE : BNXT_QPLIB_WQE_MODE_STATIC;
141 	if (bnxt_re_hwrm_qcaps(rdev))
142 		dev_err(rdev_to_dev(rdev),
143 			"Failed to query hwrm qcaps\n");
144 	if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) {
145 		cctx->modes.toggle_bits |= BNXT_QPLIB_CQ_TOGGLE_BIT;
146 		cctx->modes.toggle_bits |= BNXT_QPLIB_SRQ_TOGGLE_BIT;
147 	}
148 }
149 
150 static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
151 {
152 	struct bnxt_qplib_chip_ctx *chip_ctx;
153 
154 	if (!rdev->chip_ctx)
155 		return;
156 	chip_ctx = rdev->chip_ctx;
157 	rdev->chip_ctx = NULL;
158 	rdev->rcfw.res = NULL;
159 	rdev->qplib_res.cctx = NULL;
160 	rdev->qplib_res.pdev = NULL;
161 	rdev->qplib_res.netdev = NULL;
162 	kfree(chip_ctx);
163 }
164 
165 static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
166 {
167 	struct bnxt_qplib_chip_ctx *chip_ctx;
168 	struct bnxt_en_dev *en_dev;
169 	int rc;
170 
171 	en_dev = rdev->en_dev;
172 
173 	rdev->qplib_res.pdev = en_dev->pdev;
174 	chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
175 	if (!chip_ctx)
176 		return -ENOMEM;
177 	chip_ctx->chip_num = en_dev->chip_num;
178 	chip_ctx->hw_stats_size = en_dev->hw_ring_stats_size;
179 
180 	rdev->chip_ctx = chip_ctx;
181 	/* rest members to follow eventually */
182 
183 	rdev->qplib_res.cctx = rdev->chip_ctx;
184 	rdev->rcfw.res = &rdev->qplib_res;
185 	rdev->qplib_res.dattr = &rdev->dev_attr;
186 	rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
187 	rdev->qplib_res.en_dev = en_dev;
188 
189 	bnxt_re_set_drv_mode(rdev);
190 
191 	bnxt_re_set_db_offset(rdev);
192 	rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
193 	if (rc) {
194 		kfree(rdev->chip_ctx);
195 		rdev->chip_ctx = NULL;
196 		return rc;
197 	}
198 
199 	if (bnxt_qplib_determine_atomics(en_dev->pdev))
200 		ibdev_info(&rdev->ibdev,
201 			   "platform doesn't support global atomics.");
202 	return 0;
203 }
204 
205 /* SR-IOV helper functions */
206 
207 static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
208 {
209 	if (BNXT_EN_VF(rdev->en_dev))
210 		rdev->is_virtfn = 1;
211 }
212 
213 /* Set the maximum number of each resource that the driver actually wants
214  * to allocate. This may be up to the maximum number the firmware has
215  * reserved for the function. The driver may choose to allocate fewer
216  * resources than the firmware maximum.
217  */
218 static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
219 {
220 	struct bnxt_qplib_dev_attr *attr;
221 	struct bnxt_qplib_ctx *ctx;
222 	int i;
223 
224 	attr = &rdev->dev_attr;
225 	ctx = &rdev->qplib_ctx;
226 
227 	ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
228 			       attr->max_qp);
229 	ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
230 	/* Use max_mr from fw since max_mrw does not get set */
231 	ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr);
232 	ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
233 				attr->max_srq);
234 	ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
235 	if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
236 		for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
237 			rdev->qplib_ctx.tqm_ctx.qcount[i] =
238 			rdev->dev_attr.tqm_alloc_reqs[i];
239 }
240 
241 static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
242 {
243 	struct bnxt_qplib_vf_res *vf_res;
244 	u32 mrws = 0;
245 	u32 vf_pct;
246 	u32 nvfs;
247 
248 	vf_res = &qplib_ctx->vf_res;
249 	/*
250 	 * Reserve a set of resources for the PF. Divide the remaining
251 	 * resources among the VFs
252 	 */
253 	vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
254 	nvfs = num_vf;
255 	num_vf = 100 * num_vf;
256 	vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf;
257 	vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf;
258 	vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf;
259 	/*
260 	 * The driver allows many more MRs than other resources. If the
261 	 * firmware does also, then reserve a fixed amount for the PF and
262 	 * divide the rest among VFs. VFs may use many MRs for NFS
263 	 * mounts, ISER, NVME applications, etc. If the firmware severely
264 	 * restricts the number of MRs, then let PF have half and divide
265 	 * the rest among VFs, as for the other resource types.
266 	 */
267 	if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) {
268 		mrws = qplib_ctx->mrw_count * vf_pct;
269 		nvfs = num_vf;
270 	} else {
271 		mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF;
272 	}
273 	vf_res->max_mrw_per_vf = (mrws / nvfs);
274 	vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF;
275 }
276 
277 static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
278 {
279 	u32 num_vfs;
280 
281 	memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
282 	bnxt_re_limit_pf_res(rdev);
283 
284 	num_vfs =  bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
285 			BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
286 	if (num_vfs)
287 		bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
288 }
289 
290 static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
291 {
292 	/*
293 	 * Use the total VF count since the actual VF count may not be
294 	 * available at this point.
295 	 */
296 	rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
297 	if (!rdev->num_vfs)
298 		return;
299 
300 	bnxt_re_set_resource_limits(rdev);
301 	bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
302 				      &rdev->qplib_ctx);
303 }
304 
305 static void bnxt_re_shutdown(struct auxiliary_device *adev)
306 {
307 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
308 	struct bnxt_re_dev *rdev;
309 
310 	rdev = en_info->rdev;
311 	ib_unregister_device(&rdev->ibdev);
312 	bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
313 }
314 
315 static void bnxt_re_stop_irq(void *handle)
316 {
317 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
318 	struct bnxt_qplib_rcfw *rcfw;
319 	struct bnxt_re_dev *rdev;
320 	struct bnxt_qplib_nq *nq;
321 	int indx;
322 
323 	rdev = en_info->rdev;
324 	rcfw = &rdev->rcfw;
325 
326 	for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) {
327 		nq = &rdev->nqr->nq[indx - 1];
328 		bnxt_qplib_nq_stop_irq(nq, false);
329 	}
330 
331 	bnxt_qplib_rcfw_stop_irq(rcfw, false);
332 }
333 
334 static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
335 {
336 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
337 	struct bnxt_msix_entry *msix_ent;
338 	struct bnxt_qplib_rcfw *rcfw;
339 	struct bnxt_re_dev *rdev;
340 	struct bnxt_qplib_nq *nq;
341 	int indx, rc;
342 
343 	rdev = en_info->rdev;
344 	msix_ent = rdev->nqr->msix_entries;
345 	rcfw = &rdev->rcfw;
346 	if (!ent) {
347 		/* Not setting the f/w timeout bit in rcfw.
348 		 * During the driver unload the first command
349 		 * to f/w will timeout and that will set the
350 		 * timeout bit.
351 		 */
352 		ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n");
353 		return;
354 	}
355 
356 	/* Vectors may change after restart, so update with new vectors
357 	 * in device sctructure.
358 	 */
359 	for (indx = 0; indx < rdev->nqr->num_msix; indx++)
360 		rdev->nqr->msix_entries[indx].vector = ent[indx].vector;
361 
362 	rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
363 				       false);
364 	if (rc) {
365 		ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n");
366 		return;
367 	}
368 	for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) {
369 		nq = &rdev->nqr->nq[indx - 1];
370 		rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
371 					     msix_ent[indx].vector, false);
372 		if (rc) {
373 			ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
374 				   indx - 1);
375 			return;
376 		}
377 	}
378 }
379 
380 static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
381 	.ulp_irq_stop = bnxt_re_stop_irq,
382 	.ulp_irq_restart = bnxt_re_start_irq
383 };
384 
385 /* RoCE -> Net driver */
386 
387 static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
388 {
389 	struct bnxt_en_dev *en_dev;
390 
391 	en_dev = rdev->en_dev;
392 	return bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev->adev);
393 }
394 
395 static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
396 {
397 	hdr->req_type = cpu_to_le16(opcd);
398 	hdr->cmpl_ring = cpu_to_le16(-1);
399 	hdr->target_id = cpu_to_le16(-1);
400 }
401 
402 static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
403 				int msg_len, void *resp, int resp_max_len,
404 				int timeout)
405 {
406 	fw_msg->msg = msg;
407 	fw_msg->msg_len = msg_len;
408 	fw_msg->resp = resp;
409 	fw_msg->resp_max_len = resp_max_len;
410 	fw_msg->timeout = timeout;
411 }
412 
413 /* Query device config using common hwrm */
414 static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
415 			     u32 *offset)
416 {
417 	struct bnxt_en_dev *en_dev = rdev->en_dev;
418 	struct hwrm_func_qcfg_output resp = {0};
419 	struct hwrm_func_qcfg_input req = {0};
420 	struct bnxt_fw_msg fw_msg = {};
421 	int rc;
422 
423 	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG);
424 	req.fid = cpu_to_le16(0xffff);
425 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
426 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
427 	rc = bnxt_send_msg(en_dev, &fw_msg);
428 	if (!rc) {
429 		*db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024);
430 		*offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024);
431 	}
432 	return rc;
433 }
434 
435 /* Query function capabilities using common hwrm */
436 int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
437 {
438 	struct bnxt_en_dev *en_dev = rdev->en_dev;
439 	struct hwrm_func_qcaps_output resp = {};
440 	struct hwrm_func_qcaps_input req = {};
441 	struct bnxt_qplib_chip_ctx *cctx;
442 	struct bnxt_fw_msg fw_msg = {};
443 	u32 flags_ext2;
444 	int rc;
445 
446 	cctx = rdev->chip_ctx;
447 	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS);
448 	req.fid = cpu_to_le16(0xffff);
449 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
450 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
451 
452 	rc = bnxt_send_msg(en_dev, &fw_msg);
453 	if (rc)
454 		return rc;
455 	cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE;
456 
457 	flags_ext2 = le32_to_cpu(resp.flags_ext2);
458 	cctx->modes.dbr_pacing = flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ||
459 				 flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED;
460 	return 0;
461 }
462 
463 static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev)
464 {
465 	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
466 	struct hwrm_func_dbr_pacing_qcfg_output resp = {};
467 	struct hwrm_func_dbr_pacing_qcfg_input req = {};
468 	struct bnxt_en_dev *en_dev = rdev->en_dev;
469 	struct bnxt_qplib_chip_ctx *cctx;
470 	struct bnxt_fw_msg fw_msg = {};
471 	int rc;
472 
473 	cctx = rdev->chip_ctx;
474 	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG);
475 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
476 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
477 	rc = bnxt_send_msg(en_dev, &fw_msg);
478 	if (rc)
479 		return rc;
480 
481 	if ((le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
482 	    FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) ==
483 		FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC)
484 		cctx->dbr_stat_db_fifo =
485 			le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
486 			~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
487 
488 	pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth);
489 	if (!pacing_data->fifo_max_depth)
490 		pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx);
491 	pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask);
492 	pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift;
493 
494 	return 0;
495 }
496 
497 /* Update the pacing tunable parameters to the default values */
498 static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev)
499 {
500 	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
501 
502 	pacing_data->do_pacing = rdev->pacing.dbr_def_do_pacing;
503 	pacing_data->pacing_th = rdev->pacing.pacing_algo_th;
504 	pacing_data->alarm_th =
505 		pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
506 }
507 
508 static u32 __get_fifo_occupancy(struct bnxt_re_dev *rdev)
509 {
510 	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
511 	u32 read_val, fifo_occup;
512 
513 	read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
514 	fifo_occup = pacing_data->fifo_max_depth -
515 		     ((read_val & pacing_data->fifo_room_mask) >>
516 		      pacing_data->fifo_room_shift);
517 	return fifo_occup;
518 }
519 
520 static bool is_dbr_fifo_full(struct bnxt_re_dev *rdev)
521 {
522 	u32 max_occup, fifo_occup;
523 
524 	fifo_occup = __get_fifo_occupancy(rdev);
525 	max_occup = BNXT_RE_MAX_FIFO_DEPTH(rdev->chip_ctx) - 1;
526 	if (fifo_occup == max_occup)
527 		return true;
528 
529 	return false;
530 }
531 
532 static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev)
533 {
534 	struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
535 	u32 retry_fifo_check = 1000;
536 	u32 fifo_occup;
537 
538 	/* loop shouldn't run infintely as the occupancy usually goes
539 	 * below pacing algo threshold as soon as pacing kicks in.
540 	 */
541 	while (1) {
542 		fifo_occup = __get_fifo_occupancy(rdev);
543 		/* Fifo occupancy cannot be greater the MAX FIFO depth */
544 		if (fifo_occup > pacing_data->fifo_max_depth)
545 			break;
546 
547 		if (fifo_occup < pacing_data->pacing_th)
548 			break;
549 		if (!retry_fifo_check--) {
550 			dev_info_once(rdev_to_dev(rdev),
551 				      "%s: fifo_occup = 0x%xfifo_max_depth = 0x%x pacing_th = 0x%x\n",
552 				      __func__, fifo_occup, pacing_data->fifo_max_depth,
553 					pacing_data->pacing_th);
554 			break;
555 		}
556 
557 	}
558 }
559 
560 static void bnxt_re_db_fifo_check(struct work_struct *work)
561 {
562 	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
563 			dbq_fifo_check_work);
564 	struct bnxt_qplib_db_pacing_data *pacing_data;
565 	u32 pacing_save;
566 
567 	if (!mutex_trylock(&rdev->pacing.dbq_lock))
568 		return;
569 	pacing_data = rdev->qplib_res.pacing_data;
570 	pacing_save = rdev->pacing.do_pacing_save;
571 	__wait_for_fifo_occupancy_below_th(rdev);
572 	cancel_delayed_work_sync(&rdev->dbq_pacing_work);
573 	if (pacing_save > rdev->pacing.dbr_def_do_pacing) {
574 		/* Double the do_pacing value during the congestion */
575 		pacing_save = pacing_save << 1;
576 	} else {
577 		/*
578 		 * when a new congestion is detected increase the do_pacing
579 		 * by 8 times. And also increase the pacing_th by 4 times. The
580 		 * reason to increase pacing_th is to give more space for the
581 		 * queue to oscillate down without getting empty, but also more
582 		 * room for the queue to increase without causing another alarm.
583 		 */
584 		pacing_save = pacing_save << 3;
585 		pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4;
586 	}
587 
588 	if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING)
589 		pacing_save = BNXT_RE_MAX_DBR_DO_PACING;
590 
591 	pacing_data->do_pacing = pacing_save;
592 	rdev->pacing.do_pacing_save = pacing_data->do_pacing;
593 	pacing_data->alarm_th =
594 		pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
595 	schedule_delayed_work(&rdev->dbq_pacing_work,
596 			      msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
597 	rdev->stats.pacing.alerts++;
598 	mutex_unlock(&rdev->pacing.dbq_lock);
599 }
600 
601 static void bnxt_re_pacing_timer_exp(struct work_struct *work)
602 {
603 	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
604 			dbq_pacing_work.work);
605 	struct bnxt_qplib_db_pacing_data *pacing_data;
606 	u32 fifo_occup;
607 
608 	if (!mutex_trylock(&rdev->pacing.dbq_lock))
609 		return;
610 
611 	pacing_data = rdev->qplib_res.pacing_data;
612 	fifo_occup = __get_fifo_occupancy(rdev);
613 
614 	if (fifo_occup > pacing_data->pacing_th)
615 		goto restart_timer;
616 
617 	/*
618 	 * Instead of immediately going back to the default do_pacing
619 	 * reduce it by 1/8 times and restart the timer.
620 	 */
621 	pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3);
622 	pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing);
623 	if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) {
624 		bnxt_re_set_default_pacing_data(rdev);
625 		rdev->stats.pacing.complete++;
626 		goto dbq_unlock;
627 	}
628 
629 restart_timer:
630 	schedule_delayed_work(&rdev->dbq_pacing_work,
631 			      msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
632 	rdev->stats.pacing.resched++;
633 dbq_unlock:
634 	rdev->pacing.do_pacing_save = pacing_data->do_pacing;
635 	mutex_unlock(&rdev->pacing.dbq_lock);
636 }
637 
638 void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev)
639 {
640 	struct bnxt_qplib_db_pacing_data *pacing_data;
641 
642 	if (!rdev->pacing.dbr_pacing)
643 		return;
644 	mutex_lock(&rdev->pacing.dbq_lock);
645 	pacing_data = rdev->qplib_res.pacing_data;
646 
647 	/*
648 	 * Increase the alarm_th to max so that other user lib instances do not
649 	 * keep alerting the driver.
650 	 */
651 	pacing_data->alarm_th = pacing_data->fifo_max_depth;
652 	pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING;
653 	cancel_work_sync(&rdev->dbq_fifo_check_work);
654 	schedule_work(&rdev->dbq_fifo_check_work);
655 	mutex_unlock(&rdev->pacing.dbq_lock);
656 }
657 
658 static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
659 {
660 	/* Allocate a page for app use */
661 	rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL);
662 	if (!rdev->pacing.dbr_page)
663 		return -ENOMEM;
664 
665 	memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE);
666 	rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page;
667 
668 	if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) {
669 		free_page((u64)rdev->pacing.dbr_page);
670 		rdev->pacing.dbr_page = NULL;
671 		return -EIO;
672 	}
673 
674 	/* MAP HW window 2 for reading db fifo depth */
675 	writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK,
676 	       rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
677 	rdev->pacing.dbr_db_fifo_reg_off =
678 		(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) +
679 		 BNXT_RE_GRC_FIFO_REG_BASE;
680 	rdev->pacing.dbr_bar_addr =
681 		pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off;
682 
683 	if (is_dbr_fifo_full(rdev)) {
684 		free_page((u64)rdev->pacing.dbr_page);
685 		rdev->pacing.dbr_page = NULL;
686 		return -EIO;
687 	}
688 
689 	rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD;
690 	rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME;
691 	rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION;
692 	rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing;
693 	rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off;
694 	bnxt_re_set_default_pacing_data(rdev);
695 	/* Initialize worker for DBR Pacing */
696 	INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check);
697 	INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp);
698 	return 0;
699 }
700 
701 static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
702 {
703 	cancel_work_sync(&rdev->dbq_fifo_check_work);
704 	cancel_delayed_work_sync(&rdev->dbq_pacing_work);
705 	if (rdev->pacing.dbr_page)
706 		free_page((u64)rdev->pacing.dbr_page);
707 
708 	rdev->pacing.dbr_page = NULL;
709 	rdev->pacing.dbr_pacing = false;
710 }
711 
712 static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
713 				 u16 fw_ring_id, int type)
714 {
715 	struct bnxt_en_dev *en_dev;
716 	struct hwrm_ring_free_input req = {};
717 	struct hwrm_ring_free_output resp;
718 	struct bnxt_fw_msg fw_msg = {};
719 	int rc = -EINVAL;
720 
721 	if (!rdev)
722 		return rc;
723 
724 	en_dev = rdev->en_dev;
725 
726 	if (!en_dev)
727 		return rc;
728 
729 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
730 		return 0;
731 
732 	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
733 	req.ring_type = type;
734 	req.ring_id = cpu_to_le16(fw_ring_id);
735 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
736 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
737 	rc = bnxt_send_msg(en_dev, &fw_msg);
738 	if (rc)
739 		ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
740 			  req.ring_id, rc);
741 	return rc;
742 }
743 
744 static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
745 				  struct bnxt_re_ring_attr *ring_attr,
746 				  u16 *fw_ring_id)
747 {
748 	struct bnxt_en_dev *en_dev = rdev->en_dev;
749 	struct hwrm_ring_alloc_input req = {};
750 	struct hwrm_ring_alloc_output resp;
751 	struct bnxt_fw_msg fw_msg = {};
752 	int rc = -EINVAL;
753 
754 	if (!en_dev)
755 		return rc;
756 
757 	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
758 	req.enables = 0;
759 	req.page_tbl_addr =  cpu_to_le64(ring_attr->dma_arr[0]);
760 	if (ring_attr->pages > 1) {
761 		/* Page size is in log2 units */
762 		req.page_size = BNXT_PAGE_SHIFT;
763 		req.page_tbl_depth = 1;
764 	}
765 	req.fbo = 0;
766 	/* Association of ring index with doorbell index and MSIX number */
767 	req.logical_id = cpu_to_le16(ring_attr->lrid);
768 	req.length = cpu_to_le32(ring_attr->depth + 1);
769 	req.ring_type = ring_attr->type;
770 	req.int_mode = ring_attr->mode;
771 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
772 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
773 	rc = bnxt_send_msg(en_dev, &fw_msg);
774 	if (!rc)
775 		*fw_ring_id = le16_to_cpu(resp.ring_id);
776 
777 	return rc;
778 }
779 
780 static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
781 				      u32 fw_stats_ctx_id)
782 {
783 	struct bnxt_en_dev *en_dev = rdev->en_dev;
784 	struct hwrm_stat_ctx_free_input req = {};
785 	struct hwrm_stat_ctx_free_output resp = {};
786 	struct bnxt_fw_msg fw_msg = {};
787 	int rc = -EINVAL;
788 
789 	if (!en_dev)
790 		return rc;
791 
792 	if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
793 		return 0;
794 
795 	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
796 	req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
797 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
798 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
799 	rc = bnxt_send_msg(en_dev, &fw_msg);
800 	if (rc)
801 		ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
802 			  rc);
803 
804 	return rc;
805 }
806 
807 static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
808 				       dma_addr_t dma_map,
809 				       u32 *fw_stats_ctx_id)
810 {
811 	struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
812 	struct hwrm_stat_ctx_alloc_output resp = {};
813 	struct hwrm_stat_ctx_alloc_input req = {};
814 	struct bnxt_en_dev *en_dev = rdev->en_dev;
815 	struct bnxt_fw_msg fw_msg = {};
816 	int rc = -EINVAL;
817 
818 	*fw_stats_ctx_id = INVALID_STATS_CTX_ID;
819 
820 	if (!en_dev)
821 		return rc;
822 
823 	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
824 	req.update_period_ms = cpu_to_le32(1000);
825 	req.stats_dma_addr = cpu_to_le64(dma_map);
826 	req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
827 	req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
828 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
829 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
830 	rc = bnxt_send_msg(en_dev, &fw_msg);
831 	if (!rc)
832 		*fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
833 
834 	return rc;
835 }
836 
837 static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext)
838 {
839 }
840 
841 /* Device */
842 
843 static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
844 {
845 	struct ib_device *ibdev =
846 		ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE);
847 	if (!ibdev)
848 		return NULL;
849 
850 	return container_of(ibdev, struct bnxt_re_dev, ibdev);
851 }
852 
853 static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
854 			   char *buf)
855 {
856 	struct bnxt_re_dev *rdev =
857 		rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
858 
859 	return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor);
860 }
861 static DEVICE_ATTR_RO(hw_rev);
862 
863 static ssize_t hca_type_show(struct device *device,
864 			     struct device_attribute *attr, char *buf)
865 {
866 	struct bnxt_re_dev *rdev =
867 		rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
868 
869 	return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc);
870 }
871 static DEVICE_ATTR_RO(hca_type);
872 
873 static struct attribute *bnxt_re_attributes[] = {
874 	&dev_attr_hw_rev.attr,
875 	&dev_attr_hca_type.attr,
876 	NULL
877 };
878 
879 static const struct attribute_group bnxt_re_dev_attr_group = {
880 	.attrs = bnxt_re_attributes,
881 };
882 
883 static int bnxt_re_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
884 {
885 	struct bnxt_qplib_hwq *mr_hwq;
886 	struct nlattr *table_attr;
887 	struct bnxt_re_mr *mr;
888 
889 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
890 	if (!table_attr)
891 		return -EMSGSIZE;
892 
893 	mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
894 	mr_hwq = &mr->qplib_mr.hwq;
895 
896 	if (rdma_nl_put_driver_u32(msg, "page_size",
897 				   mr_hwq->qe_ppg * mr_hwq->element_size))
898 		goto err;
899 	if (rdma_nl_put_driver_u32(msg, "max_elements", mr_hwq->max_elements))
900 		goto err;
901 	if (rdma_nl_put_driver_u32(msg, "element_size", mr_hwq->element_size))
902 		goto err;
903 	if (rdma_nl_put_driver_u64_hex(msg, "hwq", (unsigned long)mr_hwq))
904 		goto err;
905 	if (rdma_nl_put_driver_u64_hex(msg, "va", mr->qplib_mr.va))
906 		goto err;
907 
908 	nla_nest_end(msg, table_attr);
909 	return 0;
910 
911 err:
912 	nla_nest_cancel(msg, table_attr);
913 	return -EMSGSIZE;
914 }
915 
916 static int bnxt_re_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
917 {
918 	struct bnxt_re_dev *rdev;
919 	struct bnxt_re_mr *mr;
920 	int err, len;
921 	void *data;
922 
923 	mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
924 	rdev = mr->rdev;
925 
926 	err = bnxt_re_read_context_allowed(rdev);
927 	if (err)
928 		return err;
929 
930 	len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 :
931 							  BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5;
932 	data = kzalloc(len, GFP_KERNEL);
933 	if (!data)
934 		return -ENOMEM;
935 
936 	err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_MRW,
937 				      mr->qplib_mr.lkey, len, data);
938 	if (!err)
939 		err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
940 
941 	kfree(data);
942 	return err;
943 }
944 
945 static int bnxt_re_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
946 {
947 	struct bnxt_qplib_hwq *cq_hwq;
948 	struct nlattr *table_attr;
949 	struct bnxt_re_cq *cq;
950 
951 	cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
952 	cq_hwq = &cq->qplib_cq.hwq;
953 
954 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
955 	if (!table_attr)
956 		return -EMSGSIZE;
957 
958 	if (rdma_nl_put_driver_u32(msg, "cq_depth", cq_hwq->depth))
959 		goto err;
960 	if (rdma_nl_put_driver_u32(msg, "max_elements", cq_hwq->max_elements))
961 		goto err;
962 	if (rdma_nl_put_driver_u32(msg, "element_size", cq_hwq->element_size))
963 		goto err;
964 	if (rdma_nl_put_driver_u32(msg, "max_wqe", cq->qplib_cq.max_wqe))
965 		goto err;
966 
967 	nla_nest_end(msg, table_attr);
968 	return 0;
969 
970 err:
971 	nla_nest_cancel(msg, table_attr);
972 	return -EMSGSIZE;
973 }
974 
975 static int bnxt_re_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
976 {
977 	struct bnxt_re_dev *rdev;
978 	struct bnxt_re_cq *cq;
979 	int err, len;
980 	void *data;
981 
982 	cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
983 	rdev = cq->rdev;
984 
985 	err = bnxt_re_read_context_allowed(rdev);
986 	if (err)
987 		return err;
988 
989 	len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 :
990 					BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5;
991 	data = kzalloc(len, GFP_KERNEL);
992 	if (!data)
993 		return -ENOMEM;
994 
995 	err = bnxt_qplib_read_context(&rdev->rcfw,
996 				      CMDQ_READ_CONTEXT_TYPE_CQ,
997 				      cq->qplib_cq.id, len, data);
998 	if (!err)
999 		err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
1000 
1001 	kfree(data);
1002 	return err;
1003 }
1004 
1005 static int bnxt_re_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
1006 {
1007 	struct bnxt_qplib_qp *qplib_qp;
1008 	struct nlattr *table_attr;
1009 	struct bnxt_re_qp *qp;
1010 
1011 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
1012 	if (!table_attr)
1013 		return -EMSGSIZE;
1014 
1015 	qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
1016 	qplib_qp = &qp->qplib_qp;
1017 
1018 	if (rdma_nl_put_driver_u32(msg, "sq_max_wqe", qplib_qp->sq.max_wqe))
1019 		goto err;
1020 	if (rdma_nl_put_driver_u32(msg, "sq_max_sge", qplib_qp->sq.max_sge))
1021 		goto err;
1022 	if (rdma_nl_put_driver_u32(msg, "sq_wqe_size", qplib_qp->sq.wqe_size))
1023 		goto err;
1024 	if (rdma_nl_put_driver_u32(msg, "sq_swq_start", qplib_qp->sq.swq_start))
1025 		goto err;
1026 	if (rdma_nl_put_driver_u32(msg, "sq_swq_last", qplib_qp->sq.swq_last))
1027 		goto err;
1028 	if (rdma_nl_put_driver_u32(msg, "rq_max_wqe", qplib_qp->rq.max_wqe))
1029 		goto err;
1030 	if (rdma_nl_put_driver_u32(msg, "rq_max_sge", qplib_qp->rq.max_sge))
1031 		goto err;
1032 	if (rdma_nl_put_driver_u32(msg, "rq_wqe_size", qplib_qp->rq.wqe_size))
1033 		goto err;
1034 	if (rdma_nl_put_driver_u32(msg, "rq_swq_start", qplib_qp->rq.swq_start))
1035 		goto err;
1036 	if (rdma_nl_put_driver_u32(msg, "rq_swq_last", qplib_qp->rq.swq_last))
1037 		goto err;
1038 	if (rdma_nl_put_driver_u32(msg, "timeout", qplib_qp->timeout))
1039 		goto err;
1040 
1041 	nla_nest_end(msg, table_attr);
1042 	return 0;
1043 
1044 err:
1045 	nla_nest_cancel(msg, table_attr);
1046 	return -EMSGSIZE;
1047 }
1048 
1049 static int bnxt_re_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
1050 {
1051 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibqp->device, ibdev);
1052 	int err, len;
1053 	void *data;
1054 
1055 	err = bnxt_re_read_context_allowed(rdev);
1056 	if (err)
1057 		return err;
1058 
1059 	len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 :
1060 							  BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5;
1061 	data = kzalloc(len, GFP_KERNEL);
1062 	if (!data)
1063 		return -ENOMEM;
1064 
1065 	err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_QPC,
1066 				      ibqp->qp_num, len, data);
1067 	if (!err)
1068 		err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
1069 
1070 	kfree(data);
1071 	return err;
1072 }
1073 
1074 static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq)
1075 {
1076 	struct nlattr *table_attr;
1077 	struct bnxt_re_srq *srq;
1078 
1079 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
1080 	if (!table_attr)
1081 		return -EMSGSIZE;
1082 
1083 	srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
1084 
1085 	if (rdma_nl_put_driver_u32_hex(msg, "wqe_size", srq->qplib_srq.wqe_size))
1086 		goto err;
1087 	if (rdma_nl_put_driver_u32_hex(msg, "max_wqe", srq->qplib_srq.max_wqe))
1088 		goto err;
1089 	if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge))
1090 		goto err;
1091 
1092 	nla_nest_end(msg, table_attr);
1093 	return 0;
1094 
1095 err:
1096 	nla_nest_cancel(msg, table_attr);
1097 	return -EMSGSIZE;
1098 }
1099 
1100 static int bnxt_re_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq)
1101 {
1102 	struct bnxt_re_dev *rdev;
1103 	struct bnxt_re_srq *srq;
1104 	int err, len;
1105 	void *data;
1106 
1107 	srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
1108 	rdev = srq->rdev;
1109 
1110 	err = bnxt_re_read_context_allowed(rdev);
1111 	if (err)
1112 		return err;
1113 
1114 	len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 :
1115 							  BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5;
1116 
1117 	data = kzalloc(len, GFP_KERNEL);
1118 	if (!data)
1119 		return -ENOMEM;
1120 
1121 	err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_SRQ,
1122 				      srq->qplib_srq.id, len, data);
1123 	if (!err)
1124 		err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
1125 
1126 	kfree(data);
1127 	return err;
1128 }
1129 
1130 static const struct ib_device_ops bnxt_re_dev_ops = {
1131 	.owner = THIS_MODULE,
1132 	.driver_id = RDMA_DRIVER_BNXT_RE,
1133 	.uverbs_abi_ver = BNXT_RE_ABI_VERSION,
1134 
1135 	.add_gid = bnxt_re_add_gid,
1136 	.alloc_hw_port_stats = bnxt_re_ib_alloc_hw_port_stats,
1137 	.alloc_mr = bnxt_re_alloc_mr,
1138 	.alloc_pd = bnxt_re_alloc_pd,
1139 	.alloc_ucontext = bnxt_re_alloc_ucontext,
1140 	.create_ah = bnxt_re_create_ah,
1141 	.create_cq = bnxt_re_create_cq,
1142 	.create_qp = bnxt_re_create_qp,
1143 	.create_srq = bnxt_re_create_srq,
1144 	.create_user_ah = bnxt_re_create_ah,
1145 	.dealloc_pd = bnxt_re_dealloc_pd,
1146 	.dealloc_ucontext = bnxt_re_dealloc_ucontext,
1147 	.del_gid = bnxt_re_del_gid,
1148 	.dereg_mr = bnxt_re_dereg_mr,
1149 	.destroy_ah = bnxt_re_destroy_ah,
1150 	.destroy_cq = bnxt_re_destroy_cq,
1151 	.destroy_qp = bnxt_re_destroy_qp,
1152 	.destroy_srq = bnxt_re_destroy_srq,
1153 	.device_group = &bnxt_re_dev_attr_group,
1154 	.disassociate_ucontext = bnxt_re_disassociate_ucontext,
1155 	.get_dev_fw_str = bnxt_re_query_fw_str,
1156 	.get_dma_mr = bnxt_re_get_dma_mr,
1157 	.get_hw_stats = bnxt_re_ib_get_hw_stats,
1158 	.get_link_layer = bnxt_re_get_link_layer,
1159 	.get_port_immutable = bnxt_re_get_port_immutable,
1160 	.map_mr_sg = bnxt_re_map_mr_sg,
1161 	.mmap = bnxt_re_mmap,
1162 	.mmap_free = bnxt_re_mmap_free,
1163 	.modify_qp = bnxt_re_modify_qp,
1164 	.modify_srq = bnxt_re_modify_srq,
1165 	.poll_cq = bnxt_re_poll_cq,
1166 	.post_recv = bnxt_re_post_recv,
1167 	.post_send = bnxt_re_post_send,
1168 	.post_srq_recv = bnxt_re_post_srq_recv,
1169 	.query_ah = bnxt_re_query_ah,
1170 	.query_device = bnxt_re_query_device,
1171 	.modify_device = bnxt_re_modify_device,
1172 	.query_pkey = bnxt_re_query_pkey,
1173 	.query_port = bnxt_re_query_port,
1174 	.query_qp = bnxt_re_query_qp,
1175 	.query_srq = bnxt_re_query_srq,
1176 	.reg_user_mr = bnxt_re_reg_user_mr,
1177 	.reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf,
1178 	.req_notify_cq = bnxt_re_req_notify_cq,
1179 	.resize_cq = bnxt_re_resize_cq,
1180 	INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
1181 	INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
1182 	INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
1183 	INIT_RDMA_OBJ_SIZE(ib_qp, bnxt_re_qp, ib_qp),
1184 	INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq),
1185 	INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
1186 };
1187 
1188 static const struct ib_device_ops restrack_ops = {
1189 	.fill_res_cq_entry = bnxt_re_fill_res_cq_entry,
1190 	.fill_res_cq_entry_raw = bnxt_re_fill_res_cq_entry_raw,
1191 	.fill_res_qp_entry = bnxt_re_fill_res_qp_entry,
1192 	.fill_res_qp_entry_raw = bnxt_re_fill_res_qp_entry_raw,
1193 	.fill_res_mr_entry = bnxt_re_fill_res_mr_entry,
1194 	.fill_res_mr_entry_raw = bnxt_re_fill_res_mr_entry_raw,
1195 	.fill_res_srq_entry = bnxt_re_fill_res_srq_entry,
1196 	.fill_res_srq_entry_raw = bnxt_re_fill_res_srq_entry_raw,
1197 };
1198 
1199 static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
1200 {
1201 	struct ib_device *ibdev = &rdev->ibdev;
1202 	int ret;
1203 
1204 	/* ib device init */
1205 	ibdev->node_type = RDMA_NODE_IB_CA;
1206 	strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
1207 		strlen(BNXT_RE_DESC) + 5);
1208 	ibdev->phys_port_cnt = 1;
1209 
1210 	addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr);
1211 
1212 	ibdev->num_comp_vectors	= rdev->nqr->num_msix - 1;
1213 	ibdev->dev.parent = &rdev->en_dev->pdev->dev;
1214 	ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
1215 
1216 	if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
1217 		ibdev->driver_def = bnxt_re_uapi_defs;
1218 
1219 	ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
1220 	ib_set_device_ops(ibdev, &restrack_ops);
1221 	ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
1222 	if (ret)
1223 		return ret;
1224 
1225 	dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
1226 	ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ);
1227 	return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
1228 }
1229 
1230 static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
1231 					   struct bnxt_en_dev *en_dev)
1232 {
1233 	struct bnxt_re_dev *rdev;
1234 
1235 	/* Allocate bnxt_re_dev instance here */
1236 	rdev = ib_alloc_device(bnxt_re_dev, ibdev);
1237 	if (!rdev) {
1238 		ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!",
1239 			  ROCE_DRV_MODULE_NAME);
1240 		return NULL;
1241 	}
1242 	/* Default values */
1243 	rdev->nb.notifier_call = NULL;
1244 	rdev->netdev = en_dev->net;
1245 	rdev->en_dev = en_dev;
1246 	rdev->adev = adev;
1247 	rdev->id = rdev->en_dev->pdev->devfn;
1248 	INIT_LIST_HEAD(&rdev->qp_list);
1249 	mutex_init(&rdev->qp_lock);
1250 	mutex_init(&rdev->pacing.dbq_lock);
1251 	atomic_set(&rdev->stats.res.qp_count, 0);
1252 	atomic_set(&rdev->stats.res.cq_count, 0);
1253 	atomic_set(&rdev->stats.res.srq_count, 0);
1254 	atomic_set(&rdev->stats.res.mr_count, 0);
1255 	atomic_set(&rdev->stats.res.mw_count, 0);
1256 	atomic_set(&rdev->stats.res.ah_count, 0);
1257 	atomic_set(&rdev->stats.res.pd_count, 0);
1258 	rdev->cosq[0] = 0xFFFF;
1259 	rdev->cosq[1] = 0xFFFF;
1260 	rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME;
1261 	if (bnxt_re_chip_gen_p7(en_dev->chip_num)) {
1262 		rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7;
1263 		rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7;
1264 	} else {
1265 		rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5;
1266 		rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5;
1267 	}
1268 	rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE;
1269 
1270 	return rdev;
1271 }
1272 
1273 static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
1274 					     *unaffi_async)
1275 {
1276 	switch (unaffi_async->event) {
1277 	case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
1278 		break;
1279 	case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
1280 		break;
1281 	case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
1282 		break;
1283 	case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
1284 		break;
1285 	case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
1286 		break;
1287 	case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
1288 		break;
1289 	case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
1290 		break;
1291 	case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
1292 		break;
1293 	case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
1294 		break;
1295 	case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
1296 		break;
1297 	case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
1298 		break;
1299 	default:
1300 		return -EINVAL;
1301 	}
1302 	return 0;
1303 }
1304 
1305 static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
1306 					 struct bnxt_re_qp *qp)
1307 {
1308 	struct creq_qp_error_notification *err_event;
1309 	struct bnxt_re_srq *srq = NULL;
1310 	struct ib_event event = {};
1311 	unsigned int flags;
1312 
1313 	if (qp->qplib_qp.srq)
1314 		srq =  container_of(qp->qplib_qp.srq, struct bnxt_re_srq,
1315 				    qplib_srq);
1316 
1317 	if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
1318 	    rdma_is_kernel_res(&qp->ib_qp.res)) {
1319 		flags = bnxt_re_lock_cqs(qp);
1320 		bnxt_qplib_add_flush_qp(&qp->qplib_qp);
1321 		bnxt_re_unlock_cqs(qp, flags);
1322 	}
1323 
1324 	event.device = &qp->rdev->ibdev;
1325 	event.element.qp = &qp->ib_qp;
1326 	event.event = IB_EVENT_QP_FATAL;
1327 
1328 	err_event = (struct creq_qp_error_notification *)qp_event;
1329 
1330 	switch (err_event->req_err_state_reason) {
1331 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR:
1332 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT:
1333 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT:
1334 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2:
1335 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3:
1336 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP:
1337 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND:
1338 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG:
1339 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE:
1340 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR:
1341 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR:
1342 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR:
1343 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR:
1344 		event.event = IB_EVENT_QP_ACCESS_ERR;
1345 		break;
1346 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1:
1347 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4:
1348 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH:
1349 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR:
1350 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR:
1351 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR:
1352 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR:
1353 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR:
1354 		event.event = IB_EVENT_QP_REQ_ERR;
1355 		break;
1356 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR:
1357 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR:
1358 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR:
1359 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR:
1360 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR:
1361 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR:
1362 	case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR:
1363 		event.event = IB_EVENT_QP_FATAL;
1364 		break;
1365 
1366 	default:
1367 		break;
1368 	}
1369 
1370 	switch (err_event->res_err_state_reason) {
1371 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX:
1372 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH:
1373 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT:
1374 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY:
1375 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR:
1376 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION:
1377 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR:
1378 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY:
1379 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR:
1380 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION:
1381 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR:
1382 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC:
1383 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND:
1384 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY:
1385 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR:
1386 		event.event = IB_EVENT_QP_ACCESS_ERR;
1387 		break;
1388 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE:
1389 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR:
1390 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE:
1391 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE:
1392 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR:
1393 		event.event = IB_EVENT_QP_REQ_ERR;
1394 		break;
1395 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW:
1396 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR:
1397 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR:
1398 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR:
1399 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR:
1400 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR:
1401 		event.event = IB_EVENT_QP_FATAL;
1402 		break;
1403 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR:
1404 	case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR:
1405 		if (srq)
1406 			event.event = IB_EVENT_SRQ_ERR;
1407 		break;
1408 	default:
1409 		break;
1410 	}
1411 
1412 	if (err_event->res_err_state_reason || err_event->req_err_state_reason) {
1413 		ibdev_dbg(&qp->rdev->ibdev,
1414 			  "%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n",
1415 			   __func__, rdma_is_kernel_res(&qp->ib_qp.res) ? "kernel" : "user",
1416 			   qp->qplib_qp.id,
1417 			   err_event->sq_cons_idx,
1418 			   err_event->rq_cons_idx,
1419 			   err_event->req_slow_path_state,
1420 			   err_event->req_err_state_reason,
1421 			   err_event->res_slow_path_state,
1422 			   err_event->res_err_state_reason);
1423 	} else {
1424 		if (srq)
1425 			event.event = IB_EVENT_QP_LAST_WQE_REACHED;
1426 	}
1427 
1428 	if (event.event == IB_EVENT_SRQ_ERR && srq->ib_srq.event_handler)  {
1429 		(*srq->ib_srq.event_handler)(&event,
1430 				srq->ib_srq.srq_context);
1431 	} else if (event.device && qp->ib_qp.event_handler) {
1432 		qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
1433 	}
1434 
1435 	return 0;
1436 }
1437 
1438 static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq)
1439 {
1440 	struct creq_cq_error_notification *cqerr;
1441 	struct ib_event ibevent = {};
1442 
1443 	cqerr = event;
1444 	switch (cqerr->cq_err_reason) {
1445 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR:
1446 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR:
1447 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR:
1448 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR:
1449 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR:
1450 	case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR:
1451 		ibevent.event = IB_EVENT_CQ_ERR;
1452 		break;
1453 	default:
1454 		break;
1455 	}
1456 
1457 	if (ibevent.event == IB_EVENT_CQ_ERR && cq->ib_cq.event_handler) {
1458 		ibevent.element.cq = &cq->ib_cq;
1459 		ibevent.device = &cq->rdev->ibdev;
1460 
1461 		ibdev_dbg(&cq->rdev->ibdev,
1462 			  "%s err reason %d\n", __func__, cqerr->cq_err_reason);
1463 		cq->ib_cq.event_handler(&ibevent, cq->ib_cq.cq_context);
1464 	}
1465 
1466 	return 0;
1467 }
1468 
1469 static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
1470 					   void *obj)
1471 {
1472 	struct bnxt_qplib_qp *lib_qp;
1473 	struct bnxt_qplib_cq *lib_cq;
1474 	struct bnxt_re_qp *qp;
1475 	struct bnxt_re_cq *cq;
1476 	int rc = 0;
1477 	u8 event;
1478 
1479 	if (!obj)
1480 		return rc; /* QP was already dead, still return success */
1481 
1482 	event = affi_async->event;
1483 	switch (event) {
1484 	case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
1485 		lib_qp = obj;
1486 		qp = container_of(lib_qp, struct bnxt_re_qp, qplib_qp);
1487 		rc = bnxt_re_handle_qp_async_event(affi_async, qp);
1488 		break;
1489 	case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION:
1490 		lib_cq = obj;
1491 		cq = container_of(lib_cq, struct bnxt_re_cq, qplib_cq);
1492 		rc = bnxt_re_handle_cq_async_error(affi_async, cq);
1493 		break;
1494 	default:
1495 		rc = -EINVAL;
1496 	}
1497 	return rc;
1498 }
1499 
1500 static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
1501 			       void *aeqe, void *obj)
1502 {
1503 	struct creq_qp_event *affi_async;
1504 	struct creq_func_event *unaffi_async;
1505 	u8 type;
1506 	int rc;
1507 
1508 	type = ((struct creq_base *)aeqe)->type;
1509 	if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
1510 		unaffi_async = aeqe;
1511 		rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
1512 	} else {
1513 		affi_async = aeqe;
1514 		rc = bnxt_re_handle_affi_async_event(affi_async, obj);
1515 	}
1516 
1517 	return rc;
1518 }
1519 
1520 static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
1521 				struct bnxt_qplib_srq *handle, u8 event)
1522 {
1523 	struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
1524 					       qplib_srq);
1525 	struct ib_event ib_event;
1526 
1527 	ib_event.device = &srq->rdev->ibdev;
1528 	ib_event.element.srq = &srq->ib_srq;
1529 
1530 	if (srq->ib_srq.event_handler) {
1531 		if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
1532 			ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
1533 		(*srq->ib_srq.event_handler)(&ib_event,
1534 					     srq->ib_srq.srq_context);
1535 	}
1536 	return 0;
1537 }
1538 
1539 static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
1540 			       struct bnxt_qplib_cq *handle)
1541 {
1542 	struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
1543 					     qplib_cq);
1544 
1545 	if (cq->ib_cq.comp_handler)
1546 		(*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
1547 
1548 	return 0;
1549 }
1550 
1551 static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
1552 {
1553 	int i;
1554 
1555 	for (i = 1; i < rdev->nqr->num_msix; i++)
1556 		bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]);
1557 
1558 	if (rdev->qplib_res.rcfw)
1559 		bnxt_qplib_cleanup_res(&rdev->qplib_res);
1560 }
1561 
1562 static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
1563 {
1564 	int num_vec_enabled = 0;
1565 	int rc = 0, i;
1566 	u32 db_offt;
1567 
1568 	bnxt_qplib_init_res(&rdev->qplib_res);
1569 
1570 	mutex_init(&rdev->nqr->load_lock);
1571 
1572 	for (i = 1; i < rdev->nqr->num_msix ; i++) {
1573 		db_offt = rdev->nqr->msix_entries[i].db_offset;
1574 		rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1],
1575 					  i - 1, rdev->nqr->msix_entries[i].vector,
1576 					  db_offt, &bnxt_re_cqn_handler,
1577 					  &bnxt_re_srqn_handler);
1578 		if (rc) {
1579 			ibdev_err(&rdev->ibdev,
1580 				  "Failed to enable NQ with rc = 0x%x", rc);
1581 			goto fail;
1582 		}
1583 		num_vec_enabled++;
1584 	}
1585 	return 0;
1586 fail:
1587 	for (i = num_vec_enabled; i >= 0; i--)
1588 		bnxt_qplib_disable_nq(&rdev->nqr->nq[i]);
1589 	return rc;
1590 }
1591 
1592 static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
1593 {
1594 	struct bnxt_qplib_nq *nq;
1595 	u8 type;
1596 	int i;
1597 
1598 	for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
1599 		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1600 		nq = &rdev->nqr->nq[i];
1601 		bnxt_re_net_ring_free(rdev, nq->ring_id, type);
1602 		bnxt_qplib_free_nq(nq);
1603 		nq->res = NULL;
1604 	}
1605 }
1606 
1607 static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
1608 {
1609 	bnxt_re_free_nq_res(rdev);
1610 
1611 	if (rdev->qplib_res.dpi_tbl.max) {
1612 		bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
1613 				       &rdev->dpi_privileged);
1614 	}
1615 	if (rdev->qplib_res.rcfw) {
1616 		bnxt_qplib_free_res(&rdev->qplib_res);
1617 		rdev->qplib_res.rcfw = NULL;
1618 	}
1619 }
1620 
1621 static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
1622 {
1623 	struct bnxt_re_ring_attr rattr = {};
1624 	int num_vec_created = 0;
1625 	int rc, i;
1626 	u8 type;
1627 
1628 	/* Configure and allocate resources for qplib */
1629 	rdev->qplib_res.rcfw = &rdev->rcfw;
1630 	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
1631 	if (rc)
1632 		goto fail;
1633 
1634 	rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
1635 				  rdev->netdev, &rdev->dev_attr);
1636 	if (rc)
1637 		goto fail;
1638 
1639 	rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res,
1640 				  &rdev->dpi_privileged,
1641 				  rdev, BNXT_QPLIB_DPI_TYPE_KERNEL);
1642 	if (rc)
1643 		goto dealloc_res;
1644 
1645 	for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
1646 		struct bnxt_qplib_nq *nq;
1647 
1648 		nq = &rdev->nqr->nq[i];
1649 		nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
1650 		rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq);
1651 		if (rc) {
1652 			ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
1653 				  i, rc);
1654 			goto free_nq;
1655 		}
1656 		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1657 		rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
1658 		rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count;
1659 		rattr.type = type;
1660 		rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
1661 		rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
1662 		rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx;
1663 		rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
1664 		if (rc) {
1665 			ibdev_err(&rdev->ibdev,
1666 				  "Failed to allocate NQ fw id with rc = 0x%x",
1667 				  rc);
1668 			bnxt_qplib_free_nq(nq);
1669 			goto free_nq;
1670 		}
1671 		num_vec_created++;
1672 	}
1673 	return 0;
1674 free_nq:
1675 	for (i = num_vec_created - 1; i >= 0; i--) {
1676 		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1677 		bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type);
1678 		bnxt_qplib_free_nq(&rdev->nqr->nq[i]);
1679 	}
1680 	bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
1681 			       &rdev->dpi_privileged);
1682 dealloc_res:
1683 	bnxt_qplib_free_res(&rdev->qplib_res);
1684 
1685 fail:
1686 	rdev->qplib_res.rcfw = NULL;
1687 	return rc;
1688 }
1689 
1690 static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
1691 				   u8 port_num, enum ib_event_type event)
1692 {
1693 	struct ib_event ib_event;
1694 
1695 	ib_event.device = ibdev;
1696 	if (qp) {
1697 		ib_event.element.qp = qp;
1698 		ib_event.event = event;
1699 		if (qp->event_handler)
1700 			qp->event_handler(&ib_event, qp->qp_context);
1701 
1702 	} else {
1703 		ib_event.element.port_num = port_num;
1704 		ib_event.event = event;
1705 		ib_dispatch_event(&ib_event);
1706 	}
1707 }
1708 
1709 static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
1710 					struct bnxt_re_qp *qp)
1711 {
1712 	return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
1713 	       (qp == rdev->gsi_ctx.gsi_sqp);
1714 }
1715 
1716 static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
1717 {
1718 	struct bnxt_re_qp *qp;
1719 
1720 	mutex_lock(&rdev->qp_lock);
1721 	list_for_each_entry(qp, &rdev->qp_list, list) {
1722 		/* Modify the state of all QPs except QP1/Shadow QP */
1723 		if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
1724 			if (qp->qplib_qp.state !=
1725 			    CMDQ_MODIFY_QP_NEW_STATE_RESET &&
1726 			    qp->qplib_qp.state !=
1727 			    CMDQ_MODIFY_QP_NEW_STATE_ERR)
1728 				bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
1729 						       1, IB_EVENT_QP_FATAL);
1730 		}
1731 	}
1732 	mutex_unlock(&rdev->qp_lock);
1733 }
1734 
1735 static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
1736 {
1737 	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
1738 	struct bnxt_qplib_gid gid;
1739 	u16 gid_idx, index;
1740 	int rc = 0;
1741 
1742 	if (!ib_device_try_get(&rdev->ibdev))
1743 		return 0;
1744 
1745 	for (index = 0; index < sgid_tbl->active; index++) {
1746 		gid_idx = sgid_tbl->hw_id[index];
1747 
1748 		if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
1749 			    sizeof(bnxt_qplib_gid_zero)))
1750 			continue;
1751 		/* need to modify the VLAN enable setting of non VLAN GID only
1752 		 * as setting is done for VLAN GID while adding GID
1753 		 */
1754 		if (sgid_tbl->vlan[index])
1755 			continue;
1756 
1757 		memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
1758 
1759 		rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
1760 					    rdev->qplib_res.netdev->dev_addr);
1761 	}
1762 
1763 	ib_device_put(&rdev->ibdev);
1764 	return rc;
1765 }
1766 
1767 static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
1768 {
1769 	u32 prio_map = 0, tmp_map = 0;
1770 	struct net_device *netdev;
1771 	struct dcb_app app = {};
1772 
1773 	netdev = rdev->netdev;
1774 
1775 	app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
1776 	app.protocol = ETH_P_IBOE;
1777 	tmp_map = dcb_ieee_getapp_mask(netdev, &app);
1778 	prio_map = tmp_map;
1779 
1780 	app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
1781 	app.protocol = ROCE_V2_UDP_DPORT;
1782 	tmp_map = dcb_ieee_getapp_mask(netdev, &app);
1783 	prio_map |= tmp_map;
1784 
1785 	return prio_map;
1786 }
1787 
1788 static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
1789 {
1790 	u8 prio_map = 0;
1791 
1792 	/* Get priority for roce */
1793 	prio_map = bnxt_re_get_priority_mask(rdev);
1794 
1795 	if (prio_map == rdev->cur_prio_map)
1796 		return 0;
1797 	rdev->cur_prio_map = prio_map;
1798 	/* Actual priorities are not programmed as they are already
1799 	 * done by L2 driver; just enable or disable priority vlan tagging
1800 	 */
1801 	if ((prio_map == 0 && rdev->qplib_res.prio) ||
1802 	    (prio_map != 0 && !rdev->qplib_res.prio)) {
1803 		rdev->qplib_res.prio = prio_map;
1804 		bnxt_re_update_gid(rdev);
1805 	}
1806 
1807 	return 0;
1808 }
1809 
1810 static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
1811 {
1812 	struct bnxt_en_dev *en_dev = rdev->en_dev;
1813 	struct hwrm_ver_get_output resp = {};
1814 	struct hwrm_ver_get_input req = {};
1815 	struct bnxt_qplib_chip_ctx *cctx;
1816 	struct bnxt_fw_msg fw_msg = {};
1817 	int rc;
1818 
1819 	bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET);
1820 	req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
1821 	req.hwrm_intf_min = HWRM_VERSION_MINOR;
1822 	req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
1823 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1824 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1825 	rc = bnxt_send_msg(en_dev, &fw_msg);
1826 	if (rc) {
1827 		ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
1828 			  rc);
1829 		return;
1830 	}
1831 
1832 	cctx = rdev->chip_ctx;
1833 	cctx->hwrm_intf_ver =
1834 		(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
1835 		(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
1836 		(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
1837 		le16_to_cpu(resp.hwrm_intf_patch);
1838 
1839 	cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
1840 
1841 	if (!cctx->hwrm_cmd_max_timeout)
1842 		cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
1843 }
1844 
1845 static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
1846 {
1847 	int rc;
1848 	u32 event;
1849 
1850 	/* Register ib dev */
1851 	rc = bnxt_re_register_ib(rdev);
1852 	if (rc) {
1853 		pr_err("Failed to register with IB: %#x\n", rc);
1854 		return rc;
1855 	}
1856 	dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
1857 	set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
1858 
1859 	event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
1860 		IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
1861 
1862 	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event);
1863 
1864 	return rc;
1865 }
1866 
1867 static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev)
1868 {
1869 	rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL);
1870 	if (!rdev->nqr)
1871 		return -ENOMEM;
1872 
1873 	return 0;
1874 }
1875 
1876 static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev)
1877 {
1878 	kfree(rdev->nqr);
1879 	rdev->nqr = NULL;
1880 }
1881 
1882 static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
1883 {
1884 	u8 type;
1885 	int rc;
1886 
1887 	bnxt_re_debugfs_rem_pdev(rdev);
1888 
1889 	if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
1890 		cancel_delayed_work_sync(&rdev->worker);
1891 
1892 	if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
1893 			       &rdev->flags))
1894 		bnxt_re_cleanup_res(rdev);
1895 	if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
1896 		bnxt_re_free_res(rdev);
1897 
1898 	if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
1899 		rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
1900 		if (rc)
1901 			ibdev_warn(&rdev->ibdev,
1902 				   "Failed to deinitialize RCFW: %#x", rc);
1903 		bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
1904 		bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
1905 		bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
1906 		type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1907 		bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
1908 		bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
1909 	}
1910 
1911 	rdev->nqr->num_msix = 0;
1912 
1913 	if (rdev->pacing.dbr_pacing)
1914 		bnxt_re_deinitialize_dbr_pacing(rdev);
1915 
1916 	bnxt_re_free_nqr_mem(rdev);
1917 	bnxt_re_destroy_chip_ctx(rdev);
1918 	if (op_type == BNXT_RE_COMPLETE_REMOVE) {
1919 		if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
1920 			bnxt_unregister_dev(rdev->en_dev);
1921 	}
1922 }
1923 
1924 /* worker thread for polling periodic events. Now used for QoS programming*/
1925 static void bnxt_re_worker(struct work_struct *work)
1926 {
1927 	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
1928 						worker.work);
1929 
1930 	bnxt_re_setup_qos(rdev);
1931 	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
1932 }
1933 
1934 static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
1935 {
1936 	struct bnxt_re_ring_attr rattr = {};
1937 	struct bnxt_qplib_creq_ctx *creq;
1938 	u32 db_offt;
1939 	int vid;
1940 	u8 type;
1941 	int rc;
1942 
1943 	if (op_type == BNXT_RE_COMPLETE_INIT) {
1944 		/* Registered a new RoCE device instance to netdev */
1945 		rc = bnxt_re_register_netdev(rdev);
1946 		if (rc) {
1947 			ibdev_err(&rdev->ibdev,
1948 				  "Failed to register with netedev: %#x\n", rc);
1949 			return -EINVAL;
1950 		}
1951 	}
1952 	set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1953 
1954 	if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) {
1955 		ibdev_err(&rdev->ibdev,
1956 			  "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
1957 			  rdev->en_dev->ulp_tbl->msix_requested);
1958 		bnxt_unregister_dev(rdev->en_dev);
1959 		clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1960 		return -EINVAL;
1961 	}
1962 	ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
1963 		  rdev->en_dev->ulp_tbl->msix_requested);
1964 
1965 	rc = bnxt_re_setup_chip_ctx(rdev);
1966 	if (rc) {
1967 		bnxt_unregister_dev(rdev->en_dev);
1968 		clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1969 		ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
1970 		return -EINVAL;
1971 	}
1972 
1973 	rc = bnxt_re_alloc_nqr_mem(rdev);
1974 	if (rc) {
1975 		bnxt_re_destroy_chip_ctx(rdev);
1976 		bnxt_unregister_dev(rdev->en_dev);
1977 		clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1978 		return rc;
1979 	}
1980 	rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
1981 	memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries,
1982 	       sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix);
1983 
1984 	/* Check whether VF or PF */
1985 	bnxt_re_get_sriov_func_type(rdev);
1986 
1987 	bnxt_re_query_hwrm_intf_version(rdev);
1988 
1989 	/* Establish RCFW Communication Channel to initialize the context
1990 	 * memory for the function and all child VFs
1991 	 */
1992 	rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
1993 					   &rdev->qplib_ctx,
1994 					   BNXT_RE_MAX_QPC_COUNT);
1995 	if (rc) {
1996 		ibdev_err(&rdev->ibdev,
1997 			  "Failed to allocate RCFW Channel: %#x\n", rc);
1998 		goto fail;
1999 	}
2000 
2001 	type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
2002 	creq = &rdev->rcfw.creq;
2003 	rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
2004 	rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
2005 	rattr.type = type;
2006 	rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
2007 	rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
2008 	rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
2009 	rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
2010 	if (rc) {
2011 		ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
2012 		goto free_rcfw;
2013 	}
2014 	db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
2015 	vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector;
2016 	rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
2017 					    vid, db_offt,
2018 					    &bnxt_re_aeq_handler);
2019 	if (rc) {
2020 		ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
2021 			  rc);
2022 		goto free_ring;
2023 	}
2024 
2025 	if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) {
2026 		rc = bnxt_re_initialize_dbr_pacing(rdev);
2027 		if (!rc) {
2028 			rdev->pacing.dbr_pacing = true;
2029 		} else {
2030 			ibdev_err(&rdev->ibdev,
2031 				  "DBR pacing disabled with error : %d\n", rc);
2032 			rdev->pacing.dbr_pacing = false;
2033 		}
2034 	}
2035 	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
2036 	if (rc)
2037 		goto disable_rcfw;
2038 
2039 	bnxt_re_set_resource_limits(rdev);
2040 
2041 	rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0,
2042 				  bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx));
2043 	if (rc) {
2044 		ibdev_err(&rdev->ibdev,
2045 			  "Failed to allocate QPLIB context: %#x\n", rc);
2046 		goto disable_rcfw;
2047 	}
2048 	rc = bnxt_re_net_stats_ctx_alloc(rdev,
2049 					 rdev->qplib_ctx.stats.dma_map,
2050 					 &rdev->qplib_ctx.stats.fw_id);
2051 	if (rc) {
2052 		ibdev_err(&rdev->ibdev,
2053 			  "Failed to allocate stats context: %#x\n", rc);
2054 		goto free_ctx;
2055 	}
2056 
2057 	rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
2058 				  rdev->is_virtfn);
2059 	if (rc) {
2060 		ibdev_err(&rdev->ibdev,
2061 			  "Failed to initialize RCFW: %#x\n", rc);
2062 		goto free_sctx;
2063 	}
2064 	set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
2065 
2066 	/* Resources based on the 'new' device caps */
2067 	rc = bnxt_re_alloc_res(rdev);
2068 	if (rc) {
2069 		ibdev_err(&rdev->ibdev,
2070 			  "Failed to allocate resources: %#x\n", rc);
2071 		goto fail;
2072 	}
2073 	set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
2074 	rc = bnxt_re_init_res(rdev);
2075 	if (rc) {
2076 		ibdev_err(&rdev->ibdev,
2077 			  "Failed to initialize resources: %#x\n", rc);
2078 		goto fail;
2079 	}
2080 
2081 	set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
2082 
2083 	if (!rdev->is_virtfn) {
2084 		rc = bnxt_re_setup_qos(rdev);
2085 		if (rc)
2086 			ibdev_info(&rdev->ibdev,
2087 				   "RoCE priority not yet configured\n");
2088 
2089 		INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
2090 		set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
2091 		schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
2092 
2093 		if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT))
2094 			bnxt_re_vf_res_config(rdev);
2095 	}
2096 	hash_init(rdev->cq_hash);
2097 	if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT)
2098 		hash_init(rdev->srq_hash);
2099 
2100 	bnxt_re_debugfs_add_pdev(rdev);
2101 
2102 	return 0;
2103 free_sctx:
2104 	bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
2105 free_ctx:
2106 	bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
2107 disable_rcfw:
2108 	bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
2109 free_ring:
2110 	type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
2111 	bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
2112 free_rcfw:
2113 	bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
2114 fail:
2115 	bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
2116 
2117 	return rc;
2118 }
2119 
2120 static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev,
2121 					struct bnxt_re_en_dev_info *en_info,
2122 					struct auxiliary_device *adev)
2123 {
2124 	/* Before updating the rdev pointer in bnxt_re_en_dev_info structure,
2125 	 * take the rtnl lock to avoid accessing invalid rdev pointer from
2126 	 * L2 ULP callbacks. This is applicable in all the places where rdev
2127 	 * pointer is updated in bnxt_re_en_dev_info.
2128 	 */
2129 	rtnl_lock();
2130 	en_info->rdev = rdev;
2131 	rtnl_unlock();
2132 }
2133 
2134 static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type)
2135 {
2136 	struct bnxt_aux_priv *aux_priv =
2137 		container_of(adev, struct bnxt_aux_priv, aux_dev);
2138 	struct bnxt_re_en_dev_info *en_info;
2139 	struct bnxt_en_dev *en_dev;
2140 	struct bnxt_re_dev *rdev;
2141 	int rc;
2142 
2143 	en_info = auxiliary_get_drvdata(adev);
2144 	en_dev = en_info->en_dev;
2145 
2146 
2147 	rdev = bnxt_re_dev_add(adev, en_dev);
2148 	if (!rdev || !rdev_to_dev(rdev)) {
2149 		rc = -ENOMEM;
2150 		goto exit;
2151 	}
2152 
2153 	bnxt_re_update_en_info_rdev(rdev, en_info, adev);
2154 
2155 	rc = bnxt_re_dev_init(rdev, op_type);
2156 	if (rc)
2157 		goto re_dev_dealloc;
2158 
2159 	rc = bnxt_re_ib_init(rdev);
2160 	if (rc) {
2161 		pr_err("Failed to register with IB: %s",
2162 			aux_priv->aux_dev.name);
2163 		goto re_dev_uninit;
2164 	}
2165 
2166 	rdev->nb.notifier_call = bnxt_re_netdev_event;
2167 	rc = register_netdevice_notifier(&rdev->nb);
2168 	if (rc) {
2169 		rdev->nb.notifier_call = NULL;
2170 		pr_err("%s: Cannot register to netdevice_notifier",
2171 		       ROCE_DRV_MODULE_NAME);
2172 		goto re_dev_unreg;
2173 	}
2174 	bnxt_re_setup_cc(rdev, true);
2175 
2176 	return 0;
2177 
2178 re_dev_unreg:
2179 	ib_unregister_device(&rdev->ibdev);
2180 re_dev_uninit:
2181 	bnxt_re_update_en_info_rdev(NULL, en_info, adev);
2182 	bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
2183 re_dev_dealloc:
2184 	ib_dealloc_device(&rdev->ibdev);
2185 exit:
2186 	return rc;
2187 }
2188 
2189 static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
2190 {
2191 	struct bnxt_qplib_cc_param cc_param = {};
2192 
2193 	/* Do not enable congestion control on VFs */
2194 	if (rdev->is_virtfn)
2195 		return;
2196 
2197 	/* Currently enabling only for GenP5 adapters */
2198 	if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
2199 		return;
2200 
2201 	if (enable) {
2202 		cc_param.enable  = 1;
2203 		cc_param.tos_ecn = 1;
2204 	}
2205 
2206 	cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
2207 			 CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
2208 
2209 	if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
2210 		ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
2211 }
2212 
2213 /*
2214  * "Notifier chain callback can be invoked for the same chain from
2215  * different CPUs at the same time".
2216  *
2217  * For cases when the netdev is already present, our call to the
2218  * register_netdevice_notifier() will actually get the rtnl_lock()
2219  * before sending NETDEV_REGISTER and (if up) NETDEV_UP
2220  * events.
2221  *
2222  * But for cases when the netdev is not already present, the notifier
2223  * chain is subjected to be invoked from different CPUs simultaneously.
2224  *
2225  * This is protected by the netdev_mutex.
2226  */
2227 static int bnxt_re_netdev_event(struct notifier_block *notifier,
2228 				unsigned long event, void *ptr)
2229 {
2230 	struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
2231 	struct bnxt_re_dev *rdev;
2232 
2233 	real_dev = rdma_vlan_dev_real_dev(netdev);
2234 	if (!real_dev)
2235 		real_dev = netdev;
2236 
2237 	if (real_dev != netdev)
2238 		goto exit;
2239 
2240 	rdev = bnxt_re_from_netdev(real_dev);
2241 	if (!rdev)
2242 		return NOTIFY_DONE;
2243 
2244 
2245 	switch (event) {
2246 	case NETDEV_UP:
2247 	case NETDEV_DOWN:
2248 	case NETDEV_CHANGE:
2249 		bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
2250 					netif_carrier_ok(real_dev) ?
2251 					IB_EVENT_PORT_ACTIVE :
2252 					IB_EVENT_PORT_ERR);
2253 		break;
2254 	default:
2255 		break;
2256 	}
2257 	ib_device_put(&rdev->ibdev);
2258 exit:
2259 	return NOTIFY_DONE;
2260 }
2261 
2262 #define BNXT_ADEV_NAME "bnxt_en"
2263 
2264 static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
2265 				  struct auxiliary_device *aux_dev)
2266 {
2267 	if (rdev->nb.notifier_call) {
2268 		unregister_netdevice_notifier(&rdev->nb);
2269 		rdev->nb.notifier_call = NULL;
2270 	} else {
2271 		/* If notifier is null, we should have already done a
2272 		 * clean up before coming here.
2273 		 */
2274 		return;
2275 	}
2276 	bnxt_re_setup_cc(rdev, false);
2277 	ib_unregister_device(&rdev->ibdev);
2278 	bnxt_re_dev_uninit(rdev, op_type);
2279 	ib_dealloc_device(&rdev->ibdev);
2280 }
2281 
2282 static void bnxt_re_remove(struct auxiliary_device *adev)
2283 {
2284 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
2285 	struct bnxt_re_dev *rdev;
2286 
2287 	mutex_lock(&bnxt_re_mutex);
2288 	rdev = en_info->rdev;
2289 
2290 	if (rdev)
2291 		bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev);
2292 	kfree(en_info);
2293 	mutex_unlock(&bnxt_re_mutex);
2294 }
2295 
2296 static int bnxt_re_probe(struct auxiliary_device *adev,
2297 			 const struct auxiliary_device_id *id)
2298 {
2299 	struct bnxt_aux_priv *aux_priv =
2300 		container_of(adev, struct bnxt_aux_priv, aux_dev);
2301 	struct bnxt_re_en_dev_info *en_info;
2302 	struct bnxt_en_dev *en_dev;
2303 	int rc;
2304 
2305 	en_dev = aux_priv->edev;
2306 
2307 	mutex_lock(&bnxt_re_mutex);
2308 	en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
2309 	if (!en_info) {
2310 		mutex_unlock(&bnxt_re_mutex);
2311 		return -ENOMEM;
2312 	}
2313 	en_info->en_dev = en_dev;
2314 
2315 	auxiliary_set_drvdata(adev, en_info);
2316 
2317 	rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT);
2318 	if (rc)
2319 		goto err;
2320 	mutex_unlock(&bnxt_re_mutex);
2321 	return 0;
2322 
2323 err:
2324 	mutex_unlock(&bnxt_re_mutex);
2325 	kfree(en_info);
2326 
2327 	return rc;
2328 }
2329 
2330 static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
2331 {
2332 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
2333 	struct bnxt_en_dev *en_dev;
2334 	struct bnxt_re_dev *rdev;
2335 
2336 	rdev = en_info->rdev;
2337 	en_dev = en_info->en_dev;
2338 	mutex_lock(&bnxt_re_mutex);
2339 
2340 	ibdev_info(&rdev->ibdev, "Handle device suspend call");
2341 	/* Check the current device state from bnxt_en_dev and move the
2342 	 * device to detached state if FW_FATAL_COND is set.
2343 	 * This prevents more commands to HW during clean-up,
2344 	 * in case the device is already in error.
2345 	 */
2346 	if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) {
2347 		set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
2348 		set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
2349 		wake_up_all(&rdev->rcfw.cmdq.waitq);
2350 		bnxt_re_dev_stop(rdev);
2351 	}
2352 
2353 	if (rdev->pacing.dbr_pacing)
2354 		bnxt_re_set_pacing_dev_state(rdev);
2355 
2356 	ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx",
2357 		   __func__, en_dev->en_state);
2358 	bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev);
2359 	mutex_unlock(&bnxt_re_mutex);
2360 
2361 	return 0;
2362 }
2363 
2364 static int bnxt_re_resume(struct auxiliary_device *adev)
2365 {
2366 	struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
2367 	struct bnxt_re_dev *rdev;
2368 
2369 	mutex_lock(&bnxt_re_mutex);
2370 	bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT);
2371 	rdev = en_info->rdev;
2372 	ibdev_info(&rdev->ibdev, "Device resume completed");
2373 	mutex_unlock(&bnxt_re_mutex);
2374 
2375 	return 0;
2376 }
2377 
2378 static const struct auxiliary_device_id bnxt_re_id_table[] = {
2379 	{ .name = BNXT_ADEV_NAME ".rdma", },
2380 	{},
2381 };
2382 
2383 MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table);
2384 
2385 static struct auxiliary_driver bnxt_re_driver = {
2386 	.name = "rdma",
2387 	.probe = bnxt_re_probe,
2388 	.remove = bnxt_re_remove,
2389 	.shutdown = bnxt_re_shutdown,
2390 	.suspend = bnxt_re_suspend,
2391 	.resume = bnxt_re_resume,
2392 	.id_table = bnxt_re_id_table,
2393 };
2394 
2395 static int __init bnxt_re_mod_init(void)
2396 {
2397 	int rc;
2398 
2399 	pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
2400 	bnxt_re_register_debugfs();
2401 
2402 	rc = auxiliary_driver_register(&bnxt_re_driver);
2403 	if (rc) {
2404 		pr_err("%s: Failed to register auxiliary driver\n",
2405 			ROCE_DRV_MODULE_NAME);
2406 		goto err_debug;
2407 	}
2408 	return 0;
2409 err_debug:
2410 	bnxt_re_unregister_debugfs();
2411 	return rc;
2412 }
2413 
2414 static void __exit bnxt_re_mod_exit(void)
2415 {
2416 	auxiliary_driver_unregister(&bnxt_re_driver);
2417 	bnxt_re_unregister_debugfs();
2418 }
2419 
2420 module_init(bnxt_re_mod_init);
2421 module_exit(bnxt_re_mod_exit);
2422