xref: /freebsd/sys/dev/irdma/irdma_kcompat.c (revision f81cdf24ba5436367377f7c8e8f51f6df2a75ca7)
1 /*-
2  * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
3  *
4  * Copyright (c) 2018 - 2023 Intel Corporation
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenFabrics.org BSD license below:
11  *
12  *   Redistribution and use in source and binary forms, with or
13  *   without modification, are permitted provided that the following
14  *   conditions are met:
15  *
16  *    - Redistributions of source code must retain the above
17  *	copyright notice, this list of conditions and the following
18  *	disclaimer.
19  *
20  *    - Redistributions in binary form must reproduce the above
21  *	copyright notice, this list of conditions and the following
22  *	disclaimer in the documentation and/or other materials
23  *	provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include "irdma_main.h"
36 
37 #define IRDMA_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
38 
39 static u16 kc_rdma_flow_label_to_udp_sport(u32 fl) {
40 	u32 fl_low = fl & 0x03FFF;
41 	u32 fl_high = fl & 0xFC000;
42 
43 	fl_low ^= fl_high >> 14;
44 
45 	return (u16)(fl_low | IRDMA_ROCE_UDP_ENCAP_VALID_PORT_MIN);
46 }
47 
48 #define IRDMA_GRH_FLOWLABEL_MASK (0x000FFFFF)
49 
50 static u32 kc_rdma_calc_flow_label(u32 lqpn, u32 rqpn) {
51 	u64 fl = (u64)lqpn * rqpn;
52 
53 	fl ^= fl >> 20;
54 	fl ^= fl >> 40;
55 
56 	return (u32)(fl & IRDMA_GRH_FLOWLABEL_MASK);
57 }
58 
59 u16
60 kc_rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
61 {
62 	if (!fl)
63 		fl = kc_rdma_calc_flow_label(lqpn, rqpn);
64 	return kc_rdma_flow_label_to_udp_sport(fl);
65 }
66 
67 void
68 irdma_get_dev_fw_str(struct ib_device *dev,
69 		     char *str,
70 		     size_t str_len)
71 {
72 	struct irdma_device *iwdev = to_iwdev(dev);
73 
74 	snprintf(str, str_len, "%u.%u",
75 		 irdma_fw_major_ver(&iwdev->rf->sc_dev),
76 		 irdma_fw_minor_ver(&iwdev->rf->sc_dev));
77 }
78 
79 int
80 irdma_add_gid(struct ib_device *device,
81 	      u8 port_num,
82 	      unsigned int index,
83 	      const union ib_gid *gid,
84 	      const struct ib_gid_attr *attr,
85 	      void **context)
86 {
87 	return 0;
88 }
89 
90 int
91 irdma_del_gid(struct ib_device *device,
92 	      u8 port_num,
93 	      unsigned int index,
94 	      void **context)
95 {
96 	return 0;
97 }
98 
99 /**
100  * irdma_alloc_mr - register stag for fast memory registration
101  * @pd: ibpd pointer
102  * @mr_type: memory for stag registrion
103  * @max_num_sg: man number of pages
104  * @udata: user data
105  */
106 struct ib_mr *
107 irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
108 	       u32 max_num_sg, struct ib_udata *udata)
109 {
110 	struct irdma_device *iwdev = to_iwdev(pd->device);
111 	struct irdma_pble_alloc *palloc;
112 	struct irdma_pbl *iwpbl;
113 	struct irdma_mr *iwmr;
114 	int status;
115 	u32 stag;
116 	int err_code = -ENOMEM;
117 
118 	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
119 	if (!iwmr)
120 		return ERR_PTR(-ENOMEM);
121 
122 	stag = irdma_create_stag(iwdev);
123 	if (!stag) {
124 		err_code = -ENOMEM;
125 		goto err;
126 	}
127 
128 	iwmr->stag = stag;
129 	iwmr->ibmr.rkey = stag;
130 	iwmr->ibmr.lkey = stag;
131 	iwmr->ibmr.pd = pd;
132 	iwmr->ibmr.device = pd->device;
133 	iwpbl = &iwmr->iwpbl;
134 	iwpbl->iwmr = iwmr;
135 	iwmr->type = IRDMA_MEMREG_TYPE_MEM;
136 	palloc = &iwpbl->pble_alloc;
137 	iwmr->page_cnt = max_num_sg;
138 	/* Assume system PAGE_SIZE as the sg page sizes are unknown. */
139 	iwmr->len = max_num_sg * PAGE_SIZE;
140 	status = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
141 				false);
142 	if (status)
143 		goto err_get_pble;
144 
145 	err_code = irdma_hw_alloc_stag(iwdev, iwmr);
146 	if (err_code)
147 		goto err_alloc_stag;
148 
149 	iwpbl->pbl_allocated = true;
150 
151 	return &iwmr->ibmr;
152 err_alloc_stag:
153 	irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
154 err_get_pble:
155 	irdma_free_stag(iwdev, stag);
156 err:
157 	kfree(iwmr);
158 
159 	return ERR_PTR(err_code);
160 }
161 
162 #define IRDMA_ALLOC_UCTX_MIN_REQ_LEN offsetofend(struct irdma_alloc_ucontext_req, rsvd8)
163 #define IRDMA_ALLOC_UCTX_MIN_RESP_LEN offsetofend(struct irdma_alloc_ucontext_resp, rsvd)
164 /**
165  * irdma_alloc_ucontext - Allocate the user context data structure
166  * @uctx: context
167  * @udata: user data
168  *
169  * This keeps track of all objects associated with a particular
170  * user-mode client.
171  */
172 int
173 irdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
174 {
175 	struct ib_device *ibdev = uctx->device;
176 	struct irdma_device *iwdev = to_iwdev(ibdev);
177 	struct irdma_alloc_ucontext_req req = {0};
178 	struct irdma_alloc_ucontext_resp uresp = {0};
179 	struct irdma_ucontext *ucontext = to_ucontext(uctx);
180 	struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs;
181 
182 	if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN ||
183 	    udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN)
184 		return -EINVAL;
185 
186 	if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen)))
187 		return -EINVAL;
188 
189 	if (req.userspace_ver < 4 || req.userspace_ver > IRDMA_ABI_VER)
190 		goto ver_error;
191 
192 	ucontext->iwdev = iwdev;
193 	ucontext->abi_ver = req.userspace_ver;
194 
195 	if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR)
196 		ucontext->use_raw_attrs = true;
197 
198 	/* GEN_1 support for libi40iw */
199 	if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) {
200 		if (uk_attrs->hw_rev != IRDMA_GEN_1)
201 			return -EOPNOTSUPP;
202 
203 		ucontext->legacy_mode = true;
204 		uresp.max_qps = iwdev->rf->max_qp;
205 		uresp.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds;
206 		uresp.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2;
207 		uresp.kernel_ver = req.userspace_ver;
208 		if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)))
209 			return -EFAULT;
210 	} else {
211 		u64 bar_off;
212 
213 		uresp.kernel_ver = IRDMA_ABI_VER;
214 		uresp.feature_flags = uk_attrs->feature_flags;
215 		uresp.max_hw_wq_frags = uk_attrs->max_hw_wq_frags;
216 		uresp.max_hw_read_sges = uk_attrs->max_hw_read_sges;
217 		uresp.max_hw_inline = uk_attrs->max_hw_inline;
218 		uresp.max_hw_rq_quanta = uk_attrs->max_hw_rq_quanta;
219 		uresp.max_hw_wq_quanta = uk_attrs->max_hw_wq_quanta;
220 		uresp.max_hw_sq_chunk = uk_attrs->max_hw_sq_chunk;
221 		uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size;
222 		uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size;
223 		uresp.hw_rev = uk_attrs->hw_rev;
224 		uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR;
225 		uresp.min_hw_wq_size = uk_attrs->min_hw_wq_size;
226 		uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE;
227 
228 		bar_off =
229 		    (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET];
230 		ucontext->db_mmap_entry =
231 		    irdma_user_mmap_entry_insert(ucontext, bar_off,
232 						 IRDMA_MMAP_IO_NC,
233 						 &uresp.db_mmap_key);
234 		if (!ucontext->db_mmap_entry) {
235 			return -ENOMEM;
236 		}
237 
238 		if (ib_copy_to_udata(udata, &uresp,
239 				     min(sizeof(uresp), udata->outlen))) {
240 			rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
241 			return -EFAULT;
242 		}
243 	}
244 
245 	INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
246 	spin_lock_init(&ucontext->cq_reg_mem_list_lock);
247 	INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
248 	spin_lock_init(&ucontext->qp_reg_mem_list_lock);
249 	INIT_LIST_HEAD(&ucontext->vma_list);
250 	mutex_init(&ucontext->vma_list_mutex);
251 
252 	return 0;
253 
254 ver_error:
255 	irdma_dev_err(&iwdev->ibdev,
256 		      "Invalid userspace driver version detected. Detected version %d, should be %d\n",
257 		      req.userspace_ver, IRDMA_ABI_VER);
258 	return -EINVAL;
259 }
260 
261 
262 /**
263  * irdma_dealloc_ucontext - deallocate the user context data structure
264  * @context: user context created during alloc
265  */
266 void
267 irdma_dealloc_ucontext(struct ib_ucontext *context)
268 {
269 	struct irdma_ucontext *ucontext = to_ucontext(context);
270 
271 	rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
272 
273 	return;
274 }
275 
276 
277 #define IRDMA_ALLOC_PD_MIN_RESP_LEN offsetofend(struct irdma_alloc_pd_resp, rsvd)
278 /**
279  * irdma_alloc_pd - allocate protection domain
280  * @pd: protection domain
281  * @udata: user data
282  */
283 int
284 irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
285 {
286 	struct irdma_pd *iwpd = to_iwpd(pd);
287 	struct irdma_device *iwdev = to_iwdev(pd->device);
288 	struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
289 	struct irdma_pci_f *rf = iwdev->rf;
290 	struct irdma_alloc_pd_resp uresp = {0};
291 	struct irdma_sc_pd *sc_pd;
292 	u32 pd_id = 0;
293 	int err;
294 
295 	if (udata && udata->outlen < IRDMA_ALLOC_PD_MIN_RESP_LEN)
296 		return -EINVAL;
297 
298 	err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id,
299 			       &rf->next_pd);
300 	if (err)
301 		return err;
302 
303 	sc_pd = &iwpd->sc_pd;
304 	if (udata) {
305 		struct irdma_ucontext *ucontext =
306 		rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
307 
308 		irdma_sc_pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
309 		uresp.pd_id = pd_id;
310 		if (ib_copy_to_udata(udata, &uresp,
311 				     min(sizeof(uresp), udata->outlen))) {
312 			err = -EFAULT;
313 			goto error;
314 		}
315 	} else {
316 		irdma_sc_pd_init(dev, sc_pd, pd_id, IRDMA_ABI_VER);
317 	}
318 
319 	spin_lock_init(&iwpd->udqp_list_lock);
320 	INIT_LIST_HEAD(&iwpd->udqp_list);
321 
322 	return 0;
323 
324 error:
325 
326 	irdma_free_rsrc(rf, rf->allocated_pds, pd_id);
327 
328 	return err;
329 }
330 
331 
332 void
333 irdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
334 {
335 	struct irdma_pd *iwpd = to_iwpd(ibpd);
336 	struct irdma_device *iwdev = to_iwdev(ibpd->device);
337 
338 	irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_pds, iwpd->sc_pd.pd_id);
339 }
340 
341 
342 
343 /**
344  * irdma_find_qp_update_qs - update QS handle for UD QPs
345  * @rf: RDMA PCI function
346  * @pd: protection domain object
347  * @user_pri: selected user priority
348  */
349 static void
350 irdma_find_qp_update_qs(struct irdma_pci_f *rf,
351 			struct irdma_pd *pd, u8 user_pri)
352 {
353 	struct irdma_qp *iwqp;
354 	struct list_head *tmp_node, *list_node;
355 	struct irdma_udqs_work *work;
356 	unsigned long flags;
357 	bool qs_change;
358 
359 	spin_lock_irqsave(&pd->udqp_list_lock, flags);
360 	list_for_each_safe(list_node, tmp_node, &pd->udqp_list) {
361 		qs_change = true;
362 		iwqp = list_entry(list_node, struct irdma_qp, ud_list_elem);
363 		irdma_qp_add_ref(&iwqp->ibqp);
364 		/* check if qs_handle needs to be changed */
365 		if (iwqp->sc_qp.qs_handle == iwqp->sc_qp.vsi->qos[user_pri].qs_handle) {
366 			if (iwqp->ctx_info.user_pri == user_pri) {
367 				/* qs_handle and user_pri don't change */
368 				irdma_qp_rem_ref(&iwqp->ibqp);
369 				continue;
370 			}
371 			qs_change = false;
372 		}
373 		/* perform qp qos change */
374 		work = kzalloc(sizeof(*work), GFP_ATOMIC);
375 		if (!work) {
376 			irdma_qp_rem_ref(&iwqp->ibqp);
377 			spin_unlock_irqrestore(&pd->udqp_list_lock, flags);
378 			return;
379 		}
380 		work->iwqp = iwqp;
381 		work->user_prio = user_pri;
382 		work->qs_change = qs_change;
383 		INIT_WORK(&work->work, irdma_udqp_qs_worker);
384 		if (qs_change)
385 			irdma_cqp_qp_suspend_resume(&iwqp->sc_qp, IRDMA_OP_SUSPEND);
386 		queue_work(rf->iwdev->cleanup_wq, &work->work);
387 	}
388 	spin_unlock_irqrestore(&pd->udqp_list_lock, flags);
389 }
390 
391 static void
392 irdma_fill_ah_info(struct vnet *vnet, struct irdma_ah_info *ah_info,
393 		   const struct ib_gid_attr *sgid_attr,
394 		   union irdma_sockaddr *sgid_addr,
395 		   union irdma_sockaddr *dgid_addr,
396 		   u8 *dmac, u8 net_type)
397 {
398 	if (net_type == RDMA_NETWORK_IPV4) {
399 		ah_info->ipv4_valid = true;
400 		ah_info->dest_ip_addr[0] =
401 		    ntohl(dgid_addr->saddr_in.sin_addr.s_addr);
402 		ah_info->src_ip_addr[0] =
403 		    ntohl(sgid_addr->saddr_in.sin_addr.s_addr);
404 		CURVNET_SET_QUIET(vnet);
405 		ah_info->do_lpbk = irdma_ipv4_is_lpb(ah_info->src_ip_addr[0],
406 						     ah_info->dest_ip_addr[0]);
407 		CURVNET_RESTORE();
408 		if (ipv4_is_multicast(dgid_addr->saddr_in.sin_addr.s_addr)) {
409 			irdma_mcast_mac_v4(ah_info->dest_ip_addr, dmac);
410 		}
411 	} else {
412 		irdma_copy_ip_ntohl(ah_info->dest_ip_addr,
413 				    dgid_addr->saddr_in6.sin6_addr.__u6_addr.__u6_addr32);
414 		irdma_copy_ip_ntohl(ah_info->src_ip_addr,
415 				    sgid_addr->saddr_in6.sin6_addr.__u6_addr.__u6_addr32);
416 		ah_info->do_lpbk = irdma_ipv6_is_lpb(ah_info->src_ip_addr,
417 						     ah_info->dest_ip_addr);
418 		if (rdma_is_multicast_addr(&dgid_addr->saddr_in6.sin6_addr)) {
419 			irdma_mcast_mac_v6(ah_info->dest_ip_addr, dmac);
420 		}
421 	}
422 }
423 
424 static inline u8 irdma_roce_get_vlan_prio(if_t ndev, u8 prio)
425 {
426 	return prio;
427 }
428 
429 static int
430 irdma_create_ah_vlan_tag(struct irdma_device *iwdev,
431 			 struct irdma_pd *pd,
432 			 struct irdma_ah_info *ah_info,
433 			 const struct ib_gid_attr *sgid_attr,
434 			 u8 *dmac)
435 {
436 	u16 vlan_prio;
437 
438 	if (sgid_attr->ndev && is_vlan_dev(sgid_attr->ndev))
439 		ah_info->vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev);
440 	else
441 		ah_info->vlan_tag = VLAN_N_VID;
442 
443 	ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr, dmac);
444 
445 	if (ah_info->dst_arpindex == -1)
446 		return -EINVAL;
447 
448 	if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb_vlan_mode)
449 		ah_info->vlan_tag = 0;
450 
451 	if (ah_info->vlan_tag < VLAN_N_VID) {
452 		ah_info->insert_vlan_tag = true;
453 		vlan_prio = (u16)irdma_roce_get_vlan_prio(sgid_attr->ndev,
454 							  rt_tos2priority(ah_info->tc_tos));
455 		ah_info->vlan_tag |= vlan_prio << VLAN_PRIO_SHIFT;
456 		irdma_find_qp_update_qs(iwdev->rf, pd, vlan_prio);
457 	}
458 	if (iwdev->roce_dcqcn_en) {
459 		ah_info->tc_tos &= ~ECN_CODE_PT_MASK;
460 		ah_info->tc_tos |= ECN_CODE_PT_VAL;
461 	}
462 
463 	return 0;
464 }
465 
466 static int
467 irdma_create_ah_wait(struct irdma_pci_f *rf,
468 		     struct irdma_sc_ah *sc_ah, bool sleep)
469 {
470 	int ret;
471 
472 	if (!sleep) {
473 		int cnt = rf->sc_dev.hw_attrs.max_cqp_compl_wait_time_ms *
474 		CQP_TIMEOUT_THRESHOLD;
475 		struct irdma_cqp_request *cqp_request =
476 		sc_ah->ah_info.cqp_request;
477 
478 		do {
479 			irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
480 			mdelay(1);
481 		} while (!READ_ONCE(cqp_request->request_done) && --cnt);
482 
483 		if (cnt && !cqp_request->compl_info.op_ret_val) {
484 			irdma_put_cqp_request(&rf->cqp, cqp_request);
485 			sc_ah->ah_info.ah_valid = true;
486 		} else {
487 			ret = !cnt ? -ETIMEDOUT : -EINVAL;
488 			irdma_dev_err(&rf->iwdev->ibdev, "CQP create AH error ret = %d opt_ret_val = %d",
489 				      ret, cqp_request->compl_info.op_ret_val);
490 			irdma_put_cqp_request(&rf->cqp, cqp_request);
491 			if (!cnt && !rf->reset) {
492 				rf->reset = true;
493 				rf->gen_ops.request_reset(rf);
494 			}
495 			return ret;
496 		}
497 	}
498 
499 	return 0;
500 }
501 
502 #define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd)
503 
504 /**
505  * irdma_create_ah - create address handle
506  * @ib_ah: ptr to AH
507  * @attr: address handle attributes
508  * @flags: AH flags to wait
509  * @udata: user data
510  *
511  * returns 0 on success, error otherwise
512  */
513 int
514 irdma_create_ah(struct ib_ah *ib_ah,
515 		struct ib_ah_attr *attr, u32 flags,
516 		struct ib_udata *udata)
517 {
518 	struct irdma_pd *pd = to_iwpd(ib_ah->pd);
519 	struct irdma_ah *ah = container_of(ib_ah, struct irdma_ah, ibah);
520 	struct irdma_device *iwdev = to_iwdev(ib_ah->pd->device);
521 	union ib_gid sgid;
522 	struct ib_gid_attr sgid_attr;
523 	struct irdma_pci_f *rf = iwdev->rf;
524 	struct irdma_sc_ah *sc_ah;
525 	u32 ah_id = 0;
526 	struct irdma_ah_info *ah_info;
527 	struct irdma_create_ah_resp uresp = {};
528 	union irdma_sockaddr sgid_addr, dgid_addr;
529 	int err;
530 	u8 dmac[ETHER_ADDR_LEN];
531 	bool sleep = (flags & RDMA_CREATE_AH_SLEEPABLE) != 0;
532 
533 	if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN)
534 		return -EINVAL;
535 
536 	err = irdma_alloc_rsrc(rf, rf->allocated_ahs,
537 			       rf->max_ah, &ah_id, &rf->next_ah);
538 
539 	if (err)
540 		return err;
541 
542 	ah->pd = pd;
543 	sc_ah = &ah->sc_ah;
544 	sc_ah->ah_info.ah_idx = ah_id;
545 	sc_ah->ah_info.vsi = &iwdev->vsi;
546 	irdma_sc_init_ah(&rf->sc_dev, sc_ah);
547 	ah->sgid_index = attr->grh.sgid_index;
548 	memcpy(&ah->dgid, &attr->grh.dgid, sizeof(ah->dgid));
549 	rcu_read_lock();
550 	err = ib_get_cached_gid(&iwdev->ibdev, attr->port_num,
551 				attr->grh.sgid_index, &sgid, &sgid_attr);
552 	rcu_read_unlock();
553 	if (err) {
554 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
555 			    "GID lookup at idx=%d with port=%d failed\n",
556 			    attr->grh.sgid_index, attr->port_num);
557 		err = -EINVAL;
558 		goto err_gid_l2;
559 	}
560 	rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid);
561 	rdma_gid2ip((struct sockaddr *)&dgid_addr, &attr->grh.dgid);
562 	ah->av.attrs = *attr;
563 	ah->av.net_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
564 
565 	if (sgid_attr.ndev)
566 		dev_put(sgid_attr.ndev);
567 
568 	ah_info = &sc_ah->ah_info;
569 	ah_info->ah_idx = ah_id;
570 	ah_info->pd_idx = pd->sc_pd.pd_id;
571 	ether_addr_copy(ah_info->mac_addr, if_getlladdr(iwdev->netdev));
572 
573 	if (attr->ah_flags & IB_AH_GRH) {
574 		ah_info->flow_label = attr->grh.flow_label;
575 		ah_info->hop_ttl = attr->grh.hop_limit;
576 		ah_info->tc_tos = attr->grh.traffic_class;
577 	}
578 
579 	ether_addr_copy(dmac, attr->dmac);
580 
581 	irdma_fill_ah_info(if_getvnet(iwdev->netdev), ah_info, &sgid_attr, &sgid_addr, &dgid_addr,
582 			   dmac, ah->av.net_type);
583 
584 	err = irdma_create_ah_vlan_tag(iwdev, pd, ah_info, &sgid_attr, dmac);
585 	if (err)
586 		goto err_gid_l2;
587 
588 	err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE,
589 			      sleep, NULL, sc_ah);
590 	if (err) {
591 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "CQP-OP Create AH fail");
592 		goto err_gid_l2;
593 	}
594 
595 	err = irdma_create_ah_wait(rf, sc_ah, sleep);
596 	if (err)
597 		goto err_gid_l2;
598 
599 	if (udata) {
600 		uresp.ah_id = ah->sc_ah.ah_info.ah_idx;
601 		err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen));
602 		if (err) {
603 			irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah,
604 					IRDMA_OP_AH_DESTROY, false, NULL, ah);
605 			goto err_gid_l2;
606 		}
607 	}
608 
609 	return 0;
610 err_gid_l2:
611 	irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id);
612 
613 	return err;
614 }
615 
616 void
617 irdma_ether_copy(u8 *dmac, struct ib_ah_attr *attr)
618 {
619 	ether_addr_copy(dmac, attr->dmac);
620 }
621 
622 int
623 irdma_create_ah_stub(struct ib_ah *ib_ah,
624 		     struct ib_ah_attr *attr, u32 flags,
625 		     struct ib_udata *udata)
626 {
627 	return -ENOSYS;
628 }
629 
630 void
631 irdma_destroy_ah_stub(struct ib_ah *ibah, u32 flags)
632 {
633 	return;
634 }
635 
636 
637 /**
638  * irdma_free_qp_rsrc - free up memory resources for qp
639  * @iwqp: qp ptr (user or kernel)
640  */
641 void
642 irdma_free_qp_rsrc(struct irdma_qp *iwqp)
643 {
644 	struct irdma_device *iwdev = iwqp->iwdev;
645 	struct irdma_pci_f *rf = iwdev->rf;
646 	u32 qp_num = iwqp->ibqp.qp_num;
647 
648 	irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
649 	irdma_dealloc_push_page(rf, iwqp);
650 	if (iwqp->sc_qp.vsi) {
651 		irdma_qp_rem_qos(&iwqp->sc_qp);
652 		iwqp->sc_qp.dev->ws_remove(iwqp->sc_qp.vsi,
653 					   iwqp->sc_qp.user_pri);
654 	}
655 
656 	if (qp_num > 2)
657 		irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
658 	irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->q2_ctx_mem);
659 	irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->kqp.dma_mem);
660 	kfree(iwqp->kqp.sig_trk_mem);
661 	iwqp->kqp.sig_trk_mem = NULL;
662 	kfree(iwqp->kqp.sq_wrid_mem);
663 	kfree(iwqp->kqp.rq_wrid_mem);
664 	kfree(iwqp->sg_list);
665 	kfree(iwqp);
666 }
667 
668 /**
669  * irdma_create_qp - create qp
670  * @ibpd: ptr of pd
671  * @init_attr: attributes for qp
672  * @udata: user data for create qp
673  */
674 struct ib_qp *
675 irdma_create_qp(struct ib_pd *ibpd,
676 		struct ib_qp_init_attr *init_attr,
677 		struct ib_udata *udata)
678 {
679 #define IRDMA_CREATE_QP_MIN_REQ_LEN offsetofend(struct irdma_create_qp_req, user_compl_ctx)
680 #define IRDMA_CREATE_QP_MIN_RESP_LEN offsetofend(struct irdma_create_qp_resp, rsvd)
681 	struct irdma_pd *iwpd = to_iwpd(ibpd);
682 	struct irdma_device *iwdev = to_iwdev(ibpd->device);
683 	struct irdma_pci_f *rf = iwdev->rf;
684 	struct irdma_qp *iwqp;
685 	struct irdma_create_qp_resp uresp = {0};
686 	u32 qp_num = 0;
687 	int ret;
688 	int err_code;
689 	struct irdma_sc_qp *qp;
690 	struct irdma_sc_dev *dev = &rf->sc_dev;
691 	struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs;
692 	struct irdma_qp_init_info init_info = {{0}};
693 	struct irdma_qp_host_ctx_info *ctx_info;
694 	unsigned long flags;
695 
696 	err_code = irdma_validate_qp_attrs(init_attr, iwdev);
697 	if (err_code)
698 		return ERR_PTR(err_code);
699 
700 	if (udata && (udata->inlen < IRDMA_CREATE_QP_MIN_REQ_LEN ||
701 		      udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN))
702 		return ERR_PTR(-EINVAL);
703 
704 	init_info.vsi = &iwdev->vsi;
705 	init_info.qp_uk_init_info.uk_attrs = uk_attrs;
706 	init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr;
707 	init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr;
708 	init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
709 	init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
710 	init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
711 
712 	iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL);
713 	if (!iwqp)
714 		return ERR_PTR(-ENOMEM);
715 
716 	iwqp->sg_list = kcalloc(uk_attrs->max_hw_wq_frags, sizeof(*iwqp->sg_list),
717 				GFP_KERNEL);
718 	if (!iwqp->sg_list) {
719 		kfree(iwqp);
720 		return ERR_PTR(-ENOMEM);
721 	}
722 
723 	qp = &iwqp->sc_qp;
724 	qp->qp_uk.back_qp = iwqp;
725 	qp->qp_uk.lock = &iwqp->lock;
726 	qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
727 
728 	iwqp->iwdev = iwdev;
729 	iwqp->q2_ctx_mem.size = IRDMA_Q2_BUF_SIZE + IRDMA_QP_CTX_SIZE;
730 	iwqp->q2_ctx_mem.va = irdma_allocate_dma_mem(dev->hw, &iwqp->q2_ctx_mem,
731 						     iwqp->q2_ctx_mem.size,
732 						     256);
733 	if (!iwqp->q2_ctx_mem.va) {
734 		kfree(iwqp->sg_list);
735 		kfree(iwqp);
736 		return ERR_PTR(-ENOMEM);
737 	}
738 
739 	init_info.q2 = iwqp->q2_ctx_mem.va;
740 	init_info.q2_pa = iwqp->q2_ctx_mem.pa;
741 	init_info.host_ctx = (__le64 *) (init_info.q2 + IRDMA_Q2_BUF_SIZE);
742 	init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE;
743 
744 	if (init_attr->qp_type == IB_QPT_GSI)
745 		qp_num = 1;
746 	else
747 		err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp,
748 					    &qp_num, &rf->next_qp);
749 	if (err_code)
750 		goto error;
751 
752 	iwqp->iwpd = iwpd;
753 	iwqp->ibqp.qp_num = qp_num;
754 	qp = &iwqp->sc_qp;
755 	iwqp->iwscq = to_iwcq(init_attr->send_cq);
756 	iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
757 	iwqp->host_ctx.va = init_info.host_ctx;
758 	iwqp->host_ctx.pa = init_info.host_ctx_pa;
759 	iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE;
760 
761 	init_info.pd = &iwpd->sc_pd;
762 	init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
763 	if (!rdma_protocol_roce(&iwdev->ibdev, 1))
764 		init_info.qp_uk_init_info.first_sq_wq = 1;
765 	iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
766 	init_waitqueue_head(&iwqp->waitq);
767 	init_waitqueue_head(&iwqp->mod_qp_waitq);
768 
769 	spin_lock_init(&iwqp->dwork_flush_lock);
770 
771 	if (udata) {
772 		init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
773 		err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, init_attr);
774 	} else {
775 		INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
776 		init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
777 		err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
778 	}
779 
780 	if (err_code) {
781 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "setup qp failed\n");
782 		goto error;
783 	}
784 
785 	if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
786 		if (init_attr->qp_type == IB_QPT_RC) {
787 			init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_RC;
788 			init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM |
789 			    IRDMA_WRITE_WITH_IMM |
790 			    IRDMA_ROCE;
791 		} else {
792 			init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_UD;
793 			init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM |
794 			    IRDMA_ROCE;
795 		}
796 	} else {
797 		init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_IWARP;
798 		init_info.qp_uk_init_info.qp_caps = IRDMA_WRITE_WITH_IMM;
799 	}
800 
801 	ret = irdma_sc_qp_init(qp, &init_info);
802 	if (ret) {
803 		err_code = -EPROTO;
804 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "qp_init fail\n");
805 		goto error;
806 	}
807 
808 	ctx_info = &iwqp->ctx_info;
809 	ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
810 	ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
811 
812 	if (rdma_protocol_roce(&iwdev->ibdev, 1))
813 		irdma_roce_fill_and_set_qpctx_info(iwqp, ctx_info);
814 	else
815 		irdma_iw_fill_and_set_qpctx_info(iwqp, ctx_info);
816 
817 	err_code = irdma_cqp_create_qp_cmd(iwqp);
818 	if (err_code)
819 		goto error;
820 
821 	atomic_set(&iwqp->refcnt, 1);
822 	spin_lock_init(&iwqp->lock);
823 	spin_lock_init(&iwqp->sc_qp.pfpdu.lock);
824 	iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
825 	rf->qp_table[qp_num] = iwqp;
826 
827 	if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
828 		if (dev->ws_add(&iwdev->vsi, 0)) {
829 			irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp);
830 			err_code = -EINVAL;
831 			goto error;
832 		}
833 
834 		irdma_qp_add_qos(&iwqp->sc_qp);
835 		spin_lock_irqsave(&iwpd->udqp_list_lock, flags);
836 		if (iwqp->sc_qp.qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD)
837 			list_add_tail(&iwqp->ud_list_elem, &iwpd->udqp_list);
838 		spin_unlock_irqrestore(&iwpd->udqp_list_lock, flags);
839 	}
840 
841 	if (udata) {
842 		/* GEN_1 legacy support with libi40iw does not have expanded uresp struct */
843 		if (udata->outlen == IRDMA_CREATE_QP_MIN_RESP_LEN) {
844 			uresp.lsmm = 1;
845 			uresp.push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1;
846 		} else {
847 			if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) {
848 				uresp.lsmm = 1;
849 				if (qp->qp_uk.start_wqe_idx) {
850 					uresp.comp_mask |= IRDMA_CREATE_QP_USE_START_WQE_IDX;
851 					uresp.start_wqe_idx = qp->qp_uk.start_wqe_idx;
852 				}
853 			}
854 		}
855 		uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size;
856 		uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size;
857 		uresp.qp_id = qp_num;
858 		uresp.qp_caps = qp->qp_uk.qp_caps;
859 
860 		err_code = ib_copy_to_udata(udata, &uresp,
861 					    min(sizeof(uresp), udata->outlen));
862 		if (err_code) {
863 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy_to_udata failed\n");
864 			irdma_destroy_qp(&iwqp->ibqp, udata);
865 			return ERR_PTR(err_code);
866 		}
867 	}
868 
869 	init_completion(&iwqp->free_qp);
870 	return &iwqp->ibqp;
871 
872 error:
873 	irdma_free_qp_rsrc(iwqp);
874 
875 	return ERR_PTR(err_code);
876 }
877 
878 /**
879  * irdma_destroy_qp - destroy qp
880  * @ibqp: qp's ib pointer also to get to device's qp address
881  * @udata: user data
882  */
883 int
884 irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
885 {
886 	struct irdma_qp *iwqp = to_iwqp(ibqp);
887 	struct irdma_device *iwdev = iwqp->iwdev;
888 	unsigned long flags;
889 
890 	if (iwqp->sc_qp.qp_uk.destroy_pending)
891 		goto free_rsrc;
892 	iwqp->sc_qp.qp_uk.destroy_pending = true;
893 
894 	spin_lock_irqsave(&iwqp->iwpd->udqp_list_lock, flags);
895 	if (iwqp->sc_qp.qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD)
896 		list_del(&iwqp->ud_list_elem);
897 	spin_unlock_irqrestore(&iwqp->iwpd->udqp_list_lock, flags);
898 
899 	if (iwqp->iwarp_state >= IRDMA_QP_STATE_IDLE)
900 		irdma_modify_qp_to_err(&iwqp->sc_qp);
901 
902 	if (!iwqp->user_mode) {
903 		if (iwqp->iwscq) {
904 			irdma_clean_cqes(iwqp, iwqp->iwscq);
905 			if (iwqp->iwrcq != iwqp->iwscq)
906 				irdma_clean_cqes(iwqp, iwqp->iwrcq);
907 		}
908 	}
909 	irdma_qp_rem_ref(&iwqp->ibqp);
910 	wait_for_completion(&iwqp->free_qp);
911 	irdma_free_lsmm_rsrc(iwqp);
912 	if (!iwdev->rf->reset && irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp))
913 		return (iwdev->rf->rdma_ver <= IRDMA_GEN_2 && !iwqp->user_mode) ? 0 : -ENOTRECOVERABLE;
914 free_rsrc:
915 	irdma_remove_push_mmap_entries(iwqp);
916 	irdma_free_qp_rsrc(iwqp);
917 
918 	return 0;
919 }
920 
921 /**
922  * irdma_create_cq - create cq
923  * @ibcq: CQ allocated
924  * @attr: attributes for cq
925  * @udata: user data
926  */
927 int
928 irdma_create_cq(struct ib_cq *ibcq,
929 		const struct ib_cq_init_attr *attr,
930 		struct ib_udata *udata)
931 {
932 #define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf)
933 #define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size)
934 	struct ib_device *ibdev = ibcq->device;
935 	struct irdma_device *iwdev = to_iwdev(ibdev);
936 	struct irdma_pci_f *rf = iwdev->rf;
937 	struct irdma_cq *iwcq = to_iwcq(ibcq);
938 	u32 cq_num = 0;
939 	struct irdma_sc_cq *cq;
940 	struct irdma_sc_dev *dev = &rf->sc_dev;
941 	struct irdma_cq_init_info info = {0};
942 	int status;
943 	struct irdma_cqp_request *cqp_request;
944 	struct cqp_cmds_info *cqp_info;
945 	struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
946 	unsigned long flags;
947 	int err_code;
948 	int entries = attr->cqe;
949 	bool cqe_64byte_ena;
950 
951 	err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev);
952 	if (err_code)
953 		return err_code;
954 
955 	if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN ||
956 		      udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN))
957 		return -EINVAL;
958 	err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num,
959 				    &rf->next_cq);
960 	if (err_code)
961 		return err_code;
962 	cq = &iwcq->sc_cq;
963 	cq->back_cq = iwcq;
964 	atomic_set(&iwcq->refcnt, 1);
965 	spin_lock_init(&iwcq->lock);
966 	INIT_LIST_HEAD(&iwcq->resize_list);
967 	INIT_LIST_HEAD(&iwcq->cmpl_generated);
968 	info.dev = dev;
969 	ukinfo->cq_size = max(entries, 4);
970 	ukinfo->cq_id = cq_num;
971 	cqe_64byte_ena = (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE) ? true : false;
972 	ukinfo->avoid_mem_cflct = cqe_64byte_ena;
973 	iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
974 	atomic_set(&iwcq->armed, 0);
975 	if (attr->comp_vector < rf->ceqs_count)
976 		info.ceq_id = attr->comp_vector;
977 	info.ceq_id_valid = true;
978 	info.ceqe_mask = 1;
979 	info.type = IRDMA_CQ_TYPE_IWARP;
980 	info.vsi = &iwdev->vsi;
981 
982 	if (udata) {
983 		struct irdma_ucontext *ucontext;
984 		struct irdma_create_cq_req req = {0};
985 		struct irdma_cq_mr *cqmr;
986 		struct irdma_pbl *iwpbl;
987 		struct irdma_pbl *iwpbl_shadow;
988 		struct irdma_cq_mr *cqmr_shadow;
989 
990 		iwcq->user_mode = true;
991 		ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
992 
993 		if (ib_copy_from_udata(&req, udata,
994 				       min(sizeof(req), udata->inlen))) {
995 			err_code = -EFAULT;
996 			goto cq_free_rsrc;
997 		}
998 
999 		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
1000 		iwpbl = irdma_get_pbl((unsigned long)req.user_cq_buf,
1001 				      &ucontext->cq_reg_mem_list);
1002 		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
1003 		if (!iwpbl) {
1004 			err_code = -EPROTO;
1005 			goto cq_free_rsrc;
1006 		}
1007 		iwcq->iwpbl = iwpbl;
1008 		iwcq->cq_mem_size = 0;
1009 		cqmr = &iwpbl->cq_mr;
1010 
1011 		if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
1012 		    IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) {
1013 			spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
1014 			iwpbl_shadow = irdma_get_pbl((unsigned long)req.user_shadow_area,
1015 						     &ucontext->cq_reg_mem_list);
1016 			spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
1017 
1018 			if (!iwpbl_shadow) {
1019 				err_code = -EPROTO;
1020 				goto cq_free_rsrc;
1021 			}
1022 			iwcq->iwpbl_shadow = iwpbl_shadow;
1023 			cqmr_shadow = &iwpbl_shadow->cq_mr;
1024 			info.shadow_area_pa = cqmr_shadow->cq_pbl.addr;
1025 			cqmr->split = true;
1026 		} else {
1027 			info.shadow_area_pa = cqmr->shadow;
1028 		}
1029 		if (iwpbl->pbl_allocated) {
1030 			info.virtual_map = true;
1031 			info.pbl_chunk_size = 1;
1032 			info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
1033 		} else {
1034 			info.cq_base_pa = cqmr->cq_pbl.addr;
1035 		}
1036 	} else {
1037 		/* Kmode allocations */
1038 		int rsize;
1039 
1040 		if (entries < 1 || entries > rf->max_cqe) {
1041 			err_code = -EINVAL;
1042 			goto cq_free_rsrc;
1043 		}
1044 
1045 		entries++;
1046 		if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
1047 			entries *= 2;
1048 		ukinfo->cq_size = entries;
1049 
1050 		if (cqe_64byte_ena)
1051 			rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe);
1052 		else
1053 			rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
1054 		iwcq->kmem.size = round_up(rsize, IRDMA_HW_PAGE_SIZE);
1055 		iwcq->kmem.va = irdma_allocate_dma_mem(dev->hw, &iwcq->kmem,
1056 						       iwcq->kmem.size, IRDMA_HW_PAGE_SIZE);
1057 		if (!iwcq->kmem.va) {
1058 			err_code = -ENOMEM;
1059 			goto cq_free_rsrc;
1060 		}
1061 
1062 		iwcq->kmem_shadow.size = IRDMA_SHADOW_AREA_SIZE << 3;
1063 		iwcq->kmem_shadow.va = irdma_allocate_dma_mem(dev->hw,
1064 							      &iwcq->kmem_shadow,
1065 							      iwcq->kmem_shadow.size,
1066 							      64);
1067 
1068 		if (!iwcq->kmem_shadow.va) {
1069 			err_code = -ENOMEM;
1070 			goto cq_kmem_free;
1071 		}
1072 		info.shadow_area_pa = iwcq->kmem_shadow.pa;
1073 		ukinfo->shadow_area = iwcq->kmem_shadow.va;
1074 		ukinfo->cq_base = iwcq->kmem.va;
1075 		info.cq_base_pa = iwcq->kmem.pa;
1076 	}
1077 
1078 	info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
1079 					 (u32)IRDMA_MAX_CQ_READ_THRESH);
1080 	if (irdma_sc_cq_init(cq, &info)) {
1081 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "init cq fail\n");
1082 		err_code = -EPROTO;
1083 		goto cq_kmem_free;
1084 	}
1085 
1086 	cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
1087 	if (!cqp_request) {
1088 		err_code = -ENOMEM;
1089 		goto cq_kmem_free;
1090 	}
1091 	cqp_info = &cqp_request->info;
1092 	cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
1093 	cqp_info->post_sq = 1;
1094 	cqp_info->in.u.cq_create.cq = cq;
1095 	cqp_info->in.u.cq_create.check_overflow = true;
1096 	cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
1097 	status = irdma_handle_cqp_op(rf, cqp_request);
1098 	irdma_put_cqp_request(&rf->cqp, cqp_request);
1099 	if (status) {
1100 		err_code = -ENOMEM;
1101 		goto cq_kmem_free;
1102 	}
1103 
1104 	if (udata) {
1105 		struct irdma_create_cq_resp resp = {0};
1106 
1107 		resp.cq_id = info.cq_uk_init_info.cq_id;
1108 		resp.cq_size = info.cq_uk_init_info.cq_size;
1109 		if (ib_copy_to_udata(udata, &resp,
1110 				     min(sizeof(resp), udata->outlen))) {
1111 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy to user data\n");
1112 			err_code = -EPROTO;
1113 			goto cq_destroy;
1114 		}
1115 	}
1116 
1117 	rf->cq_table[cq_num] = iwcq;
1118 	init_completion(&iwcq->free_cq);
1119 
1120 	return 0;
1121 cq_destroy:
1122 	irdma_cq_wq_destroy(rf, cq);
1123 cq_kmem_free:
1124 	if (!iwcq->user_mode) {
1125 		irdma_free_dma_mem(dev->hw, &iwcq->kmem);
1126 		irdma_free_dma_mem(dev->hw, &iwcq->kmem_shadow);
1127 	}
1128 cq_free_rsrc:
1129 	irdma_free_rsrc(rf, rf->allocated_cqs, cq_num);
1130 	return err_code;
1131 }
1132 
1133 /**
1134  * irdma_copy_user_pgaddrs - copy user page address to pble's os locally
1135  * @iwmr: iwmr for IB's user page addresses
1136  * @pbl: ple pointer to save 1 level or 0 level pble
1137  * @level: indicated level 0, 1 or 2
1138  */
1139 
1140 void
1141 irdma_copy_user_pgaddrs(struct irdma_mr *iwmr, u64 *pbl,
1142 			enum irdma_pble_level level)
1143 {
1144 	struct ib_umem *region = iwmr->region;
1145 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
1146 	int chunk_pages, entry, i;
1147 	struct scatterlist *sg;
1148 	u64 pg_addr = 0;
1149 	struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
1150 	struct irdma_pble_info *pinfo;
1151 	u32 idx = 0;
1152 	u32 pbl_cnt = 0;
1153 
1154 	pinfo = (level == PBLE_LEVEL_1) ? NULL : palloc->level2.leaf;
1155 	for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
1156 		chunk_pages = DIV_ROUND_UP(sg_dma_len(sg), iwmr->page_size);
1157 		if (iwmr->type == IRDMA_MEMREG_TYPE_QP && !iwpbl->qp_mr.sq_page)
1158 			iwpbl->qp_mr.sq_page = sg_page(sg);
1159 		for (i = 0; i < chunk_pages; i++) {
1160 			pg_addr = sg_dma_address(sg) + (i * iwmr->page_size);
1161 			if ((entry + i) == 0)
1162 				*pbl = pg_addr & iwmr->page_msk;
1163 			else if (!(pg_addr & ~iwmr->page_msk))
1164 				*pbl = pg_addr;
1165 			else
1166 				continue;
1167 			if (++pbl_cnt == palloc->total_cnt)
1168 				break;
1169 			pbl = irdma_next_pbl_addr(pbl, &pinfo, &idx);
1170 		}
1171 	}
1172 }
1173 
1174 /**
1175  * irdma_destroy_ah - Destroy address handle
1176  * @ibah: pointer to address handle
1177  * @ah_flags: destroy flags
1178  */
1179 
1180 void
1181 irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags)
1182 {
1183 	struct irdma_device *iwdev = to_iwdev(ibah->device);
1184 	struct irdma_ah *ah = to_iwah(ibah);
1185 
1186 	irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY,
1187 			false, NULL, ah);
1188 
1189 	irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs,
1190 			ah->sc_ah.ah_info.ah_idx);
1191 }
1192 
1193 
1194 int
1195 irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
1196 {
1197 	struct irdma_mr *iwmr = to_iwmr(ib_mr);
1198 	struct irdma_device *iwdev = to_iwdev(ib_mr->device);
1199 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
1200 	int ret;
1201 
1202 	if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) {
1203 		if (iwmr->region) {
1204 			struct irdma_ucontext *ucontext;
1205 
1206 			ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
1207 
1208 			irdma_del_memlist(iwmr, ucontext);
1209 		}
1210 		goto done;
1211 	}
1212 
1213 	ret = irdma_hwdereg_mr(ib_mr);
1214 	if (ret)
1215 		return ret;
1216 
1217 	irdma_free_stag(iwdev, iwmr->stag);
1218 done:
1219 	if (iwpbl->pbl_allocated)
1220 		irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
1221 
1222 	if (iwmr->region)
1223 		ib_umem_release(iwmr->region);
1224 
1225 	kfree(iwmr);
1226 
1227 	return 0;
1228 }
1229 
1230 /*
1231  * irdma_rereg_user_mr - Re-Register a user memory region @ibmr: ib mem to access iwarp mr pointer @flags: bit mask to
1232  * indicate which of the attr's of MR modified @start: virtual start address @len: length of mr @virt: virtual address
1233  * @new access flags: bit mask of access flags @new_pd: ptr of pd @udata: user data
1234  */
1235 int
1236 irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 len,
1237 		    u64 virt, int new_access, struct ib_pd *new_pd,
1238 		    struct ib_udata *udata)
1239 {
1240 	struct irdma_device *iwdev = to_iwdev(ib_mr->device);
1241 	struct irdma_mr *iwmr = to_iwmr(ib_mr);
1242 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
1243 	int ret;
1244 
1245 	if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
1246 		return -EINVAL;
1247 
1248 	if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
1249 		return -EOPNOTSUPP;
1250 
1251 	ret = irdma_hwdereg_mr(ib_mr);
1252 	if (ret)
1253 		return ret;
1254 
1255 	if (flags & IB_MR_REREG_ACCESS)
1256 		iwmr->access = new_access;
1257 
1258 	if (flags & IB_MR_REREG_PD) {
1259 		iwmr->ibmr.pd = new_pd;
1260 		iwmr->ibmr.device = new_pd->device;
1261 	}
1262 
1263 	if (flags & IB_MR_REREG_TRANS) {
1264 		if (iwpbl->pbl_allocated) {
1265 			irdma_free_pble(iwdev->rf->pble_rsrc,
1266 					&iwpbl->pble_alloc);
1267 			iwpbl->pbl_allocated = false;
1268 		}
1269 		if (iwmr->region) {
1270 			ib_umem_release(iwmr->region);
1271 			iwmr->region = NULL;
1272 		}
1273 
1274 		ib_mr = irdma_rereg_mr_trans(iwmr, start, len, virt, udata);
1275 		if (IS_ERR(ib_mr))
1276 			return PTR_ERR(ib_mr);
1277 
1278 	} else {
1279 		ret = irdma_hwreg_mr(iwdev, iwmr, iwmr->access);
1280 		if (ret)
1281 			return ret;
1282 	}
1283 
1284 	return 0;
1285 }
1286 
1287 int
1288 kc_irdma_set_roce_cm_info(struct irdma_qp *iwqp, struct ib_qp_attr *attr,
1289 			  u16 *vlan_id)
1290 {
1291 	int ret;
1292 	union ib_gid sgid;
1293 	struct ib_gid_attr sgid_attr;
1294 	struct irdma_av *av = &iwqp->roce_ah.av;
1295 
1296 	ret = ib_get_cached_gid(iwqp->ibqp.device, attr->ah_attr.port_num,
1297 				attr->ah_attr.grh.sgid_index, &sgid,
1298 				&sgid_attr);
1299 	if (ret)
1300 		return ret;
1301 
1302 	if (sgid_attr.ndev) {
1303 		*vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
1304 		ether_addr_copy(iwqp->ctx_info.roce_info->mac_addr, if_getlladdr(sgid_attr.ndev));
1305 	}
1306 
1307 	av->net_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
1308 	rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid);
1309 	dev_put(sgid_attr.ndev);
1310 	iwqp->sc_qp.user_pri = iwqp->ctx_info.user_pri;
1311 
1312 	return 0;
1313 }
1314 
1315 /**
1316  * irdma_destroy_cq - destroy cq
1317  * @ib_cq: cq pointer
1318  * @udata: user data
1319  */
1320 void
1321 irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
1322 {
1323 	struct irdma_device *iwdev = to_iwdev(ib_cq->device);
1324 	struct irdma_cq *iwcq = to_iwcq(ib_cq);
1325 	struct irdma_sc_cq *cq = &iwcq->sc_cq;
1326 	struct irdma_sc_dev *dev = cq->dev;
1327 	struct irdma_sc_ceq *ceq = dev->ceq[cq->ceq_id];
1328 	struct irdma_ceq *iwceq = container_of(ceq, struct irdma_ceq, sc_ceq);
1329 	unsigned long flags;
1330 
1331 	spin_lock_irqsave(&iwcq->lock, flags);
1332 	if (!list_empty(&iwcq->cmpl_generated))
1333 		irdma_remove_cmpls_list(iwcq);
1334 	if (!list_empty(&iwcq->resize_list))
1335 		irdma_process_resize_list(iwcq, iwdev, NULL);
1336 	spin_unlock_irqrestore(&iwcq->lock, flags);
1337 
1338 	irdma_cq_rem_ref(ib_cq);
1339 	wait_for_completion(&iwcq->free_cq);
1340 
1341 	irdma_cq_wq_destroy(iwdev->rf, cq);
1342 
1343 	spin_lock_irqsave(&iwceq->ce_lock, flags);
1344 	irdma_sc_cleanup_ceqes(cq, ceq);
1345 	spin_unlock_irqrestore(&iwceq->ce_lock, flags);
1346 	irdma_cq_free_rsrc(iwdev->rf, iwcq);
1347 }
1348 
1349 /**
1350  * kc_set_loc_seq_num_mss - Set local seq number and mss
1351  * @cm_node: cm node info
1352  */
1353 void
1354 kc_set_loc_seq_num_mss(struct irdma_cm_node *cm_node)
1355 {
1356 	struct timespec ts;
1357 
1358 	getnanotime(&ts);
1359 	cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
1360 	if (cm_node->iwdev->vsi.mtu > 1500 &&
1361 	    2 * cm_node->iwdev->vsi.mtu > cm_node->iwdev->rcv_wnd)
1362 		cm_node->tcp_cntxt.mss = (cm_node->ipv4) ?
1363 		    (1500 - IRDMA_MTU_TO_MSS_IPV4) :
1364 		    (1500 - IRDMA_MTU_TO_MSS_IPV6);
1365 	else
1366 		cm_node->tcp_cntxt.mss = (cm_node->ipv4) ?
1367 		    (cm_node->iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV4) :
1368 		    (cm_node->iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV6);
1369 }
1370 
1371 /**
1372  * irdma_disassociate_ucontext - Disassociate user context
1373  * @context: ib user context
1374  */
1375 void
1376 irdma_disassociate_ucontext(struct ib_ucontext *context)
1377 {
1378 }
1379 
1380 struct ib_device *
1381 ib_device_get_by_netdev(if_t netdev, int driver_id)
1382 {
1383 	struct irdma_device *iwdev;
1384 	struct irdma_handler *hdl;
1385 	unsigned long flags;
1386 
1387 	spin_lock_irqsave(&irdma_handler_lock, flags);
1388 	list_for_each_entry(hdl, &irdma_handlers, list) {
1389 		iwdev = hdl->iwdev;
1390 		if (netdev == iwdev->netdev) {
1391 			spin_unlock_irqrestore(&irdma_handler_lock,
1392 					       flags);
1393 			return &iwdev->ibdev;
1394 		}
1395 	}
1396 	spin_unlock_irqrestore(&irdma_handler_lock, flags);
1397 
1398 	return NULL;
1399 }
1400 
1401 void
1402 ib_unregister_device_put(struct ib_device *device)
1403 {
1404 	ib_unregister_device(device);
1405 }
1406 
1407 /**
1408  * irdma_query_gid_roce - Query port GID for Roce
1409  * @ibdev: device pointer from stack
1410  * @port: port number
1411  * @index: Entry index
1412  * @gid: Global ID
1413  */
1414 int
1415 irdma_query_gid_roce(struct ib_device *ibdev, u8 port, int index,
1416 		     union ib_gid *gid)
1417 {
1418 	int ret;
1419 
1420 	ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
1421 	if (ret == -EAGAIN) {
1422 		memcpy(gid, &zgid, sizeof(*gid));
1423 		return 0;
1424 	}
1425 
1426 	return ret;
1427 }
1428 
1429 /**
1430  * irdma_modify_port - modify port attributes
1431  * @ibdev: device pointer from stack
1432  * @port: port number for query
1433  * @mask: Property mask
1434  * @props: returning device attributes
1435  */
1436 int
1437 irdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
1438 		  struct ib_port_modify *props)
1439 {
1440 	if (port > 1)
1441 		return -EINVAL;
1442 
1443 	return 0;
1444 }
1445 
1446 /**
1447  * irdma_query_pkey - Query partition key
1448  * @ibdev: device pointer from stack
1449  * @port: port number
1450  * @index: index of pkey
1451  * @pkey: pointer to store the pkey
1452  */
1453 int
1454 irdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1455 		 u16 *pkey)
1456 {
1457 	if (index >= IRDMA_PKEY_TBL_SZ)
1458 		return -EINVAL;
1459 
1460 	*pkey = IRDMA_DEFAULT_PKEY;
1461 	return 0;
1462 }
1463 
1464 int
1465 irdma_roce_port_immutable(struct ib_device *ibdev, u8 port_num,
1466 			  struct ib_port_immutable *immutable)
1467 {
1468 	struct ib_port_attr attr;
1469 	int err;
1470 
1471 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
1472 	err = ib_query_port(ibdev, port_num, &attr);
1473 	if (err)
1474 		return err;
1475 
1476 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1477 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
1478 	immutable->gid_tbl_len = attr.gid_tbl_len;
1479 
1480 	return 0;
1481 }
1482 
1483 int
1484 irdma_iw_port_immutable(struct ib_device *ibdev, u8 port_num,
1485 			struct ib_port_immutable *immutable)
1486 {
1487 	struct ib_port_attr attr;
1488 	int err;
1489 
1490 	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
1491 	err = ib_query_port(ibdev, port_num, &attr);
1492 	if (err)
1493 		return err;
1494 	immutable->gid_tbl_len = 1;
1495 
1496 	return 0;
1497 }
1498 
1499 /**
1500  * irdma_query_port - get port attributes
1501  * @ibdev: device pointer from stack
1502  * @port: port number for query
1503  * @props: returning device attributes
1504  */
1505 int
1506 irdma_query_port(struct ib_device *ibdev, u8 port,
1507 		 struct ib_port_attr *props)
1508 {
1509 	struct irdma_device *iwdev = to_iwdev(ibdev);
1510 	if_t netdev = iwdev->netdev;
1511 
1512 	/* no need to zero out pros here. done by caller */
1513 
1514 	props->max_mtu = IB_MTU_4096;
1515 	props->active_mtu = ib_mtu_int_to_enum(if_getmtu(netdev));
1516 	props->lid = 1;
1517 	props->lmc = 0;
1518 	props->sm_lid = 0;
1519 	props->sm_sl = 0;
1520 	if ((if_getlinkstate(netdev) == LINK_STATE_UP) &&
1521 	    (if_getdrvflags(netdev) & IFF_DRV_RUNNING)) {
1522 		props->state = IB_PORT_ACTIVE;
1523 		props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
1524 	} else {
1525 		props->state = IB_PORT_DOWN;
1526 		props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
1527 	}
1528 	ib_get_eth_speed(ibdev, port, &props->active_speed, &props->active_width);
1529 
1530 	if (rdma_protocol_roce(ibdev, 1)) {
1531 		props->gid_tbl_len = 32;
1532 		props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
1533 		props->pkey_tbl_len = IRDMA_PKEY_TBL_SZ;
1534 	} else {
1535 		props->gid_tbl_len = 1;
1536 	}
1537 	props->qkey_viol_cntr = 0;
1538 	props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_REINIT_SUP;
1539 	props->max_msg_sz = iwdev->rf->sc_dev.hw_attrs.max_hw_outbound_msg_size;
1540 
1541 	return 0;
1542 }
1543 
1544 static const char *const irdma_hw_stat_names[] = {
1545 	/* gen1 - 32-bit */
1546 	[IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
1547 	[IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
1548 	[IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
1549 	[IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
1550 	[IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
1551 	[IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
1552 	[IRDMA_HW_STAT_INDEX_RXVLANERR] = "rxVlanErrors",
1553 	/* gen1 - 64-bit */
1554 	[IRDMA_HW_STAT_INDEX_IP4RXOCTS] = "ip4InOctets",
1555 	[IRDMA_HW_STAT_INDEX_IP4RXPKTS] = "ip4InPkts",
1556 	[IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = "ip4InReasmRqd",
1557 	[IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = "ip4InMcastPkts",
1558 	[IRDMA_HW_STAT_INDEX_IP4TXOCTS] = "ip4OutOctets",
1559 	[IRDMA_HW_STAT_INDEX_IP4TXPKTS] = "ip4OutPkts",
1560 	[IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = "ip4OutSegRqd",
1561 	[IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = "ip4OutMcastPkts",
1562 	[IRDMA_HW_STAT_INDEX_IP6RXOCTS] = "ip6InOctets",
1563 	[IRDMA_HW_STAT_INDEX_IP6RXPKTS] = "ip6InPkts",
1564 	[IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = "ip6InReasmRqd",
1565 	[IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = "ip6InMcastPkts",
1566 	[IRDMA_HW_STAT_INDEX_IP6TXOCTS] = "ip6OutOctets",
1567 	[IRDMA_HW_STAT_INDEX_IP6TXPKTS] = "ip6OutPkts",
1568 	[IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = "ip6OutSegRqd",
1569 	[IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = "ip6OutMcastPkts",
1570 	[IRDMA_HW_STAT_INDEX_RDMARXRDS] = "InRdmaReads",
1571 	[IRDMA_HW_STAT_INDEX_RDMARXSNDS] = "InRdmaSends",
1572 	[IRDMA_HW_STAT_INDEX_RDMARXWRS] = "InRdmaWrites",
1573 	[IRDMA_HW_STAT_INDEX_RDMATXRDS] = "OutRdmaReads",
1574 	[IRDMA_HW_STAT_INDEX_RDMATXSNDS] = "OutRdmaSends",
1575 	[IRDMA_HW_STAT_INDEX_RDMATXWRS] = "OutRdmaWrites",
1576 	[IRDMA_HW_STAT_INDEX_RDMAVBND] = "RdmaBnd",
1577 	[IRDMA_HW_STAT_INDEX_RDMAVINV] = "RdmaInv",
1578 
1579 	/* gen2 - 32-bit */
1580 	[IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = "cnpHandled",
1581 	[IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = "cnpIgnored",
1582 	[IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = "cnpSent",
1583 	/* gen2 - 64-bit */
1584 	[IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = "ip4InMcastOctets",
1585 	[IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = "ip4OutMcastOctets",
1586 	[IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = "ip6InMcastOctets",
1587 	[IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = "ip6OutMcastOctets",
1588 	[IRDMA_HW_STAT_INDEX_UDPRXPKTS] = "RxUDP",
1589 	[IRDMA_HW_STAT_INDEX_UDPTXPKTS] = "TxUDP",
1590 	[IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = "RxECNMrkd",
1591 	[IRDMA_HW_STAT_INDEX_TCPRTXSEG] = "RetransSegs",
1592 	[IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = "InOptErrors",
1593 	[IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = "InProtoErrors",
1594 	[IRDMA_HW_STAT_INDEX_TCPRXSEGS] = "InSegs",
1595 	[IRDMA_HW_STAT_INDEX_TCPTXSEG] = "OutSegs",
1596 };
1597 
1598 /**
1599  * irdma_alloc_hw_stats - Allocate a hw stats structure
1600  * @ibdev: device pointer from stack
1601  * @port_num: port number
1602  */
1603 struct rdma_hw_stats *
1604 irdma_alloc_hw_stats(struct ib_device *ibdev,
1605 		     u8 port_num)
1606 {
1607 	struct irdma_device *iwdev = to_iwdev(ibdev);
1608 	struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
1609 
1610 	int num_counters = dev->hw_attrs.max_stat_idx;
1611 	unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
1612 
1613 	return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters,
1614 					  lifespan);
1615 }
1616 
1617 /**
1618  * irdma_get_hw_stats - Populates the rdma_hw_stats structure
1619  * @ibdev: device pointer from stack
1620  * @stats: stats pointer from stack
1621  * @port_num: port number
1622  * @index: which hw counter the stack is requesting we update
1623  */
1624 int
1625 irdma_get_hw_stats(struct ib_device *ibdev,
1626 		   struct rdma_hw_stats *stats, u8 port_num,
1627 		   int index)
1628 {
1629 	struct irdma_device *iwdev = to_iwdev(ibdev);
1630 	struct irdma_dev_hw_stats *hw_stats = &iwdev->vsi.pestat->hw_stats;
1631 
1632 	if (iwdev->rf->rdma_ver >= IRDMA_GEN_2)
1633 		irdma_cqp_gather_stats_cmd(&iwdev->rf->sc_dev, iwdev->vsi.pestat, true);
1634 
1635 	memcpy(&stats->value[0], hw_stats, sizeof(u64)* stats->num_counters);
1636 
1637 	return stats->num_counters;
1638 }
1639 
1640 /**
1641  * irdma_query_gid - Query port GID
1642  * @ibdev: device pointer from stack
1643  * @port: port number
1644  * @index: Entry index
1645  * @gid: Global ID
1646  */
1647 int
1648 irdma_query_gid(struct ib_device *ibdev, u8 port, int index,
1649 		union ib_gid *gid)
1650 {
1651 	struct irdma_device *iwdev = to_iwdev(ibdev);
1652 
1653 	memset(gid->raw, 0, sizeof(gid->raw));
1654 	ether_addr_copy(gid->raw, if_getlladdr(iwdev->netdev));
1655 
1656 	return 0;
1657 }
1658 
1659 enum rdma_link_layer
1660 irdma_get_link_layer(struct ib_device *ibdev,
1661 		     u8 port_num)
1662 {
1663 	return IB_LINK_LAYER_ETHERNET;
1664 }
1665 
1666 inline enum ib_mtu
1667 ib_mtu_int_to_enum(int mtu)
1668 {
1669 	if (mtu >= 4096)
1670 		return IB_MTU_4096;
1671 	else if (mtu >= 2048)
1672 		return IB_MTU_2048;
1673 	else if (mtu >= 1024)
1674 		return IB_MTU_1024;
1675 	else if (mtu >= 512)
1676 		return IB_MTU_512;
1677 	else
1678 		return IB_MTU_256;
1679 }
1680 
1681 inline void
1682 kc_set_roce_uverbs_cmd_mask(struct irdma_device *iwdev)
1683 {
1684 	iwdev->ibdev.uverbs_cmd_mask |=
1685 	    BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) |
1686 	    BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
1687 	    BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) |
1688 	    BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST);
1689 }
1690 
1691 inline void
1692 kc_set_rdma_uverbs_cmd_mask(struct irdma_device *iwdev)
1693 {
1694 	iwdev->ibdev.uverbs_cmd_mask =
1695 	    BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
1696 	    BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
1697 	    BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
1698 	    BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
1699 	    BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
1700 	    BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
1701 	    BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
1702 	    BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
1703 	    BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1704 	    BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
1705 	    BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) |
1706 	    BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
1707 	    BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
1708 	    BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
1709 	    BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
1710 	    BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
1711 	    BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) |
1712 	    BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
1713 	    BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) |
1714 	    BIT_ULL(IB_USER_VERBS_CMD_POST_SEND);
1715 	iwdev->ibdev.uverbs_ex_cmd_mask =
1716 	    BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_QP) |
1717 	    BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
1718 
1719 	if (iwdev->rf->rdma_ver >= IRDMA_GEN_2)
1720 		iwdev->ibdev.uverbs_ex_cmd_mask |= BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_CQ);
1721 }
1722 
1723 int
1724 ib_get_eth_speed(struct ib_device *ibdev, u32 port_num, u8 *speed, u8 *width)
1725 {
1726 	if_t netdev = ibdev->get_netdev(ibdev, port_num);
1727 	u32 netdev_speed;
1728 
1729 	if (!netdev)
1730 		return -ENODEV;
1731 
1732 	netdev_speed = if_getbaudrate(netdev);
1733 	dev_put(netdev);
1734 	if (netdev_speed <= SPEED_1000) {
1735 		*width = IB_WIDTH_1X;
1736 		*speed = IB_SPEED_SDR;
1737 	} else if (netdev_speed <= SPEED_10000) {
1738 		*width = IB_WIDTH_1X;
1739 		*speed = IB_SPEED_FDR10;
1740 	} else if (netdev_speed <= SPEED_20000) {
1741 		*width = IB_WIDTH_4X;
1742 		*speed = IB_SPEED_DDR;
1743 	} else if (netdev_speed <= SPEED_25000) {
1744 		*width = IB_WIDTH_1X;
1745 		*speed = IB_SPEED_EDR;
1746 	} else if (netdev_speed <= SPEED_40000) {
1747 		*width = IB_WIDTH_4X;
1748 		*speed = IB_SPEED_FDR10;
1749 	} else {
1750 		*width = IB_WIDTH_4X;
1751 		*speed = IB_SPEED_EDR;
1752 	}
1753 
1754 	return 0;
1755 }
1756 
1757 u64
1758 irdma_mac_to_u64(const u8 *eth_add)
1759 {
1760 	int idx;
1761 	u64 u64_eth_add;
1762 
1763 	for (idx = 0, u64_eth_add = 0; idx < ETHER_ADDR_LEN; idx++)
1764 		u64_eth_add = u64_eth_add << 8 | eth_add[idx];
1765 
1766 	return u64_eth_add;
1767 }
1768