xref: /freebsd/sys/dev/qlnx/qlnxr/qlnxr_cm.c (revision e0c4386e7e71d93b0edc0c8fa156263fc4a8b0b6)
1 /*
2  * Copyright (c) 2018-2019 Cavium, Inc.
3  * All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions
7  *  are met:
8  *
9  *  1. Redistributions of source code must retain the above copyright
10  *     notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  *  POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "qlnxr_def.h"
30 #include "rdma_common.h"
31 #include "qlnxr_cm.h"
32 
33 void
34 qlnxr_inc_sw_gsi_cons(struct qlnxr_qp_hwq_info *info)
35 {
36 	info->gsi_cons = (info->gsi_cons + 1) % info->max_wr;
37 }
38 
39 void
40 qlnxr_store_gsi_qp_cq(struct qlnxr_dev *dev,
41 		struct qlnxr_qp *qp,
42 		struct ib_qp_init_attr *attrs)
43 {
44 	QL_DPRINT12(dev->ha, "enter\n");
45 
46 	dev->gsi_qp_created = 1;
47 	dev->gsi_sqcq = get_qlnxr_cq((attrs->send_cq));
48 	dev->gsi_rqcq = get_qlnxr_cq((attrs->recv_cq));
49 	dev->gsi_qp = qp;
50 
51 	QL_DPRINT12(dev->ha, "exit\n");
52 
53 	return;
54 }
55 
56 void
57 qlnxr_ll2_complete_tx_packet(void *cxt,
58 		uint8_t connection_handle,
59 		void *cookie,
60 		dma_addr_t first_frag_addr,
61 		bool b_last_fragment,
62 		bool b_last_packet)
63 {
64 	struct qlnxr_dev *dev = (struct qlnxr_dev *)cxt;
65 	struct ecore_roce_ll2_packet *pkt = cookie;
66 	struct qlnxr_cq *cq = dev->gsi_sqcq;
67 	struct qlnxr_qp *qp = dev->gsi_qp;
68 	unsigned long flags;
69 
70 	QL_DPRINT12(dev->ha, "enter\n");
71 
72 	qlnx_dma_free_coherent(&dev->ha->cdev, pkt->header.vaddr,
73 			pkt->header.baddr, pkt->header.len);
74 	kfree(pkt);
75 
76 	spin_lock_irqsave(&qp->q_lock, flags);
77 
78 	qlnxr_inc_sw_gsi_cons(&qp->sq);
79 
80 	spin_unlock_irqrestore(&qp->q_lock, flags);
81 
82 	if (cq->ibcq.comp_handler)
83 		(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
84 
85 	QL_DPRINT12(dev->ha, "exit\n");
86 
87 	return;
88 }
89 
90 void
91 qlnxr_ll2_complete_rx_packet(void *cxt,
92 		struct ecore_ll2_comp_rx_data *data)
93 {
94 	struct qlnxr_dev *dev = (struct qlnxr_dev *)cxt;
95 	struct qlnxr_cq *cq = dev->gsi_rqcq;
96 	// struct qlnxr_qp *qp = dev->gsi_qp;
97 	struct qlnxr_qp *qp = NULL;
98 	unsigned long flags;
99 	// uint32_t delay_count = 0, gsi_cons = 0;
100 	//void * dest_va;
101 
102 	QL_DPRINT12(dev->ha, "enter\n");
103 
104 	if (data->u.data_length_error) {
105 		/* TODO: add statistic */
106 	}
107 
108 	if (data->cookie == NULL) {
109 		QL_DPRINT12(dev->ha, "cookie is NULL, bad sign\n");
110 	}
111 
112 	if (data->qp_id == 1) {
113 		qp = dev->gsi_qp;
114 	} else {
115 		/* TODO: This will be needed for UD QP support */
116 		/* For RoCEv1 this is invalid */
117 		QL_DPRINT12(dev->ha, "invalid QP\n");
118 		return;
119 	}
120 	/* note: currently only one recv sg is supported */
121 	QL_DPRINT12(dev->ha, "MAD received on QP : %x\n", data->rx_buf_addr);
122 
123 	spin_lock_irqsave(&qp->q_lock, flags);
124 
125 	qp->rqe_wr_id[qp->rq.gsi_cons].rc =
126 		data->u.data_length_error ? -EINVAL : 0;
127 	qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = data->vlan;
128 	/* note: length stands for data length i.e. GRH is excluded */
129 	qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length =
130 		data->length.data_length;
131 	*((u32 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[0]) =
132 		ntohl(data->opaque_data_0);
133 	*((u16 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[4]) =
134 		ntohs((u16)data->opaque_data_1);
135 
136 	qlnxr_inc_sw_gsi_cons(&qp->rq);
137 
138 	spin_unlock_irqrestore(&qp->q_lock, flags);
139 
140 	if (cq->ibcq.comp_handler)
141 		(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
142 
143 	QL_DPRINT12(dev->ha, "exit\n");
144 
145 	return;
146 }
147 
148 void qlnxr_ll2_release_rx_packet(void *cxt,
149 		u8 connection_handle,
150 		void *cookie,
151 		dma_addr_t rx_buf_addr,
152 		bool b_last_packet)
153 {
154 	/* Do nothing... */
155 }
156 
157 static void
158 qlnxr_destroy_gsi_cq(struct qlnxr_dev *dev,
159 		struct ib_qp_init_attr *attrs)
160 {
161 	struct ecore_rdma_destroy_cq_in_params iparams;
162 	struct ecore_rdma_destroy_cq_out_params oparams;
163 	struct qlnxr_cq *cq;
164 
165 	QL_DPRINT12(dev->ha, "enter\n");
166 
167 	cq = get_qlnxr_cq((attrs->send_cq));
168 	iparams.icid = cq->icid;
169 	ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
170 	ecore_chain_free(&dev->ha->cdev, &cq->pbl);
171 
172 	cq = get_qlnxr_cq((attrs->recv_cq));
173 	/* if a dedicated recv_cq was used, delete it too */
174 	if (iparams.icid != cq->icid) {
175 		iparams.icid = cq->icid;
176 		ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
177 		ecore_chain_free(&dev->ha->cdev, &cq->pbl);
178 	}
179 
180 	QL_DPRINT12(dev->ha, "exit\n");
181 
182 	return;
183 }
184 
185 static inline int
186 qlnxr_check_gsi_qp_attrs(struct qlnxr_dev *dev,
187 		struct ib_qp_init_attr *attrs)
188 {
189 	QL_DPRINT12(dev->ha, "enter\n");
190 
191 	if (attrs->cap.max_recv_sge > QLNXR_GSI_MAX_RECV_SGE) {
192 		QL_DPRINT11(dev->ha,
193 			"(attrs->cap.max_recv_sge > QLNXR_GSI_MAX_RECV_SGE)\n");
194 		return -EINVAL;
195 	}
196 
197 	if (attrs->cap.max_recv_wr > QLNXR_GSI_MAX_RECV_WR) {
198 		QL_DPRINT11(dev->ha,
199 			"(attrs->cap.max_recv_wr > QLNXR_GSI_MAX_RECV_WR)\n");
200 		return -EINVAL;
201 	}
202 
203 	if (attrs->cap.max_send_wr > QLNXR_GSI_MAX_SEND_WR) {
204 		QL_DPRINT11(dev->ha,
205 			"(attrs->cap.max_send_wr > QLNXR_GSI_MAX_SEND_WR)\n");
206 		return -EINVAL;
207 	}
208 
209 	QL_DPRINT12(dev->ha, "exit\n");
210 
211 	return 0;
212 }
213 
214 static int
215 qlnxr_ll2_post_tx(struct qlnxr_dev *dev, struct ecore_roce_ll2_packet *pkt)
216 {
217 	enum ecore_ll2_roce_flavor_type roce_flavor;
218 	struct ecore_ll2_tx_pkt_info ll2_tx_pkt;
219 	int rc;
220 	int i;
221 
222 	QL_DPRINT12(dev->ha, "enter\n");
223 
224 	memset(&ll2_tx_pkt, 0, sizeof(ll2_tx_pkt));
225 
226 	if (pkt->roce_mode != ROCE_V1) {
227 		QL_DPRINT11(dev->ha, "roce_mode != ROCE_V1\n");
228 		return (-1);
229 	}
230 
231 	roce_flavor = (pkt->roce_mode == ROCE_V1) ?
232 		ECORE_LL2_ROCE : ECORE_LL2_RROCE;
233 
234 	ll2_tx_pkt.num_of_bds = 1 /* hdr */ +  pkt->n_seg;
235 	ll2_tx_pkt.vlan = 0; /* ??? */
236 	ll2_tx_pkt.tx_dest = ECORE_LL2_TX_DEST_NW;
237 	ll2_tx_pkt.ecore_roce_flavor = roce_flavor;
238 	ll2_tx_pkt.first_frag = pkt->header.baddr;
239 	ll2_tx_pkt.first_frag_len = pkt->header.len;
240 	ll2_tx_pkt.cookie = pkt;
241 	ll2_tx_pkt.enable_ip_cksum = 1; // Only for RoCEv2:IPv4
242 
243 	/* tx header */
244 	rc = ecore_ll2_prepare_tx_packet(dev->rdma_ctx,
245 			dev->gsi_ll2_handle,
246 			&ll2_tx_pkt,
247 			1);
248 	if (rc) {
249 		QL_DPRINT11(dev->ha, "ecore_ll2_prepare_tx_packet failed\n");
250 
251 		/* TX failed while posting header - release resources*/
252                 qlnx_dma_free_coherent(&dev->ha->cdev,
253 			pkt->header.vaddr,
254 			pkt->header.baddr,
255                         pkt->header.len);
256 
257 		kfree(pkt);
258 
259 		return rc;
260 	}
261 
262 	/* tx payload */
263 	for (i = 0; i < pkt->n_seg; i++) {
264 		rc = ecore_ll2_set_fragment_of_tx_packet(dev->rdma_ctx,
265 						       dev->gsi_ll2_handle,
266 						       pkt->payload[i].baddr,
267 						       pkt->payload[i].len);
268 		if (rc) {
269 			/* if failed not much to do here, partial packet has
270 			 * been posted we can't free memory, will need to wait
271 			 * for completion
272 			 */
273 			QL_DPRINT11(dev->ha,
274 				"ecore_ll2_set_fragment_of_tx_packet failed\n");
275 			return rc;
276 		}
277 	}
278 	struct ecore_ll2_stats stats = {0};
279 	rc = ecore_ll2_get_stats(dev->rdma_ctx, dev->gsi_ll2_handle, &stats);
280 	if (rc) {
281 		QL_DPRINT11(dev->ha, "failed to obtain ll2 stats\n");
282 	}
283 	QL_DPRINT12(dev->ha, "exit\n");
284 
285 	return 0;
286 }
287 
288 int
289 qlnxr_ll2_stop(struct qlnxr_dev *dev)
290 {
291 	int rc;
292 
293 	QL_DPRINT12(dev->ha, "enter\n");
294 
295 	if (dev->gsi_ll2_handle == 0xFF)
296 		return 0;
297 
298 	/* remove LL2 MAC address filter */
299 	rc = qlnx_rdma_ll2_set_mac_filter(dev->rdma_ctx,
300 			  dev->gsi_ll2_mac_address, NULL);
301 
302 	rc = ecore_ll2_terminate_connection(dev->rdma_ctx,
303 			dev->gsi_ll2_handle);
304 
305 	ecore_ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
306 
307 	dev->gsi_ll2_handle = 0xFF;
308 
309 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
310 	return rc;
311 }
312 
313 int qlnxr_ll2_start(struct qlnxr_dev *dev,
314 		   struct ib_qp_init_attr *attrs,
315 		   struct qlnxr_qp *qp)
316 {
317 	struct ecore_ll2_acquire_data data;
318 	struct ecore_ll2_cbs cbs;
319 	int rc;
320 
321 	QL_DPRINT12(dev->ha, "enter\n");
322 
323 	/* configure and start LL2 */
324 	cbs.rx_comp_cb = qlnxr_ll2_complete_rx_packet;
325 	cbs.tx_comp_cb = qlnxr_ll2_complete_tx_packet;
326 	cbs.rx_release_cb = qlnxr_ll2_release_rx_packet;
327 	cbs.tx_release_cb = qlnxr_ll2_complete_tx_packet;
328 	cbs.cookie = dev;
329 	dev->gsi_ll2_handle = 0xFF;
330 
331 	memset(&data, 0, sizeof(data));
332 	data.input.conn_type = ECORE_LL2_TYPE_ROCE;
333 	data.input.mtu = if_getmtu(dev->ha->ifp);
334 	data.input.rx_num_desc = 8 * 1024;
335 	data.input.rx_drop_ttl0_flg = 1;
336 	data.input.rx_vlan_removal_en = 0;
337 	data.input.tx_num_desc = 8 * 1024;
338 	data.input.tx_tc = 0;
339 	data.input.tx_dest = ECORE_LL2_TX_DEST_NW;
340 	data.input.ai_err_packet_too_big = ECORE_LL2_DROP_PACKET;
341 	data.input.ai_err_no_buf = ECORE_LL2_DROP_PACKET;
342 	data.input.gsi_enable = 1;
343 	data.p_connection_handle = &dev->gsi_ll2_handle;
344 	data.cbs = &cbs;
345 
346 	rc = ecore_ll2_acquire_connection(dev->rdma_ctx, &data);
347 
348 	if (rc) {
349 		QL_DPRINT11(dev->ha,
350 			"ecore_ll2_acquire_connection failed: %d\n",
351 			rc);
352 		return rc;
353 	}
354 
355 	QL_DPRINT11(dev->ha,
356 		"ll2 connection acquired successfully\n");
357 	rc = ecore_ll2_establish_connection(dev->rdma_ctx,
358 		dev->gsi_ll2_handle);
359 
360 	if (rc) {
361 		QL_DPRINT11(dev->ha,
362 			"ecore_ll2_establish_connection failed\n", rc);
363 		goto err1;
364 	}
365 
366 	QL_DPRINT11(dev->ha,
367 		"ll2 connection established successfully\n");
368 	rc = qlnx_rdma_ll2_set_mac_filter(dev->rdma_ctx, NULL,
369 			dev->ha->primary_mac);
370 	if (rc) {
371 		QL_DPRINT11(dev->ha, "qlnx_rdma_ll2_set_mac_filter failed\n", rc);
372 		goto err2;
373 	}
374 
375 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
376 	return 0;
377 
378 err2:
379 	ecore_ll2_terminate_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
380 err1:
381 	ecore_ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
382 
383 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
384 	return rc;
385 }
386 
387 struct ib_qp*
388 qlnxr_create_gsi_qp(struct qlnxr_dev *dev,
389 		 struct ib_qp_init_attr *attrs,
390 		 struct qlnxr_qp *qp)
391 {
392 	int rc;
393 
394 	QL_DPRINT12(dev->ha, "enter\n");
395 
396 	rc = qlnxr_check_gsi_qp_attrs(dev, attrs);
397 
398 	if (rc) {
399 		QL_DPRINT11(dev->ha, "qlnxr_check_gsi_qp_attrs failed\n");
400 		return ERR_PTR(rc);
401 	}
402 
403 	rc = qlnxr_ll2_start(dev, attrs, qp);
404 	if (rc) {
405 		QL_DPRINT11(dev->ha, "qlnxr_ll2_start failed\n");
406 		return ERR_PTR(rc);
407 	}
408 
409 	/* create QP */
410 	qp->ibqp.qp_num = 1;
411 	qp->rq.max_wr = attrs->cap.max_recv_wr;
412 	qp->sq.max_wr = attrs->cap.max_send_wr;
413 
414 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
415 				GFP_KERNEL);
416 	if (!qp->rqe_wr_id) {
417 		QL_DPRINT11(dev->ha, "(!qp->rqe_wr_id)\n");
418 		goto err;
419 	}
420 
421 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
422 				GFP_KERNEL);
423 	if (!qp->wqe_wr_id) {
424 		QL_DPRINT11(dev->ha, "(!qp->wqe_wr_id)\n");
425 		goto err;
426 	}
427 
428 	qlnxr_store_gsi_qp_cq(dev, qp, attrs);
429 	memcpy(dev->gsi_ll2_mac_address, dev->ha->primary_mac, ETH_ALEN);
430 
431 	/* the GSI CQ is handled by the driver so remove it from the FW */
432 	qlnxr_destroy_gsi_cq(dev, attrs);
433 	dev->gsi_rqcq->cq_type = QLNXR_CQ_TYPE_GSI;
434 	dev->gsi_rqcq->cq_type = QLNXR_CQ_TYPE_GSI;
435 
436 	QL_DPRINT12(dev->ha, "exit &qp->ibqp = %p\n", &qp->ibqp);
437 
438 	return &qp->ibqp;
439 err:
440 	kfree(qp->rqe_wr_id);
441 
442 	rc = qlnxr_ll2_stop(dev);
443 
444 	QL_DPRINT12(dev->ha, "exit with error\n");
445 
446 	return ERR_PTR(-ENOMEM);
447 }
448 
449 int
450 qlnxr_destroy_gsi_qp(struct qlnxr_dev *dev)
451 {
452 	int rc = 0;
453 
454 	QL_DPRINT12(dev->ha, "enter\n");
455 
456 	rc = qlnxr_ll2_stop(dev);
457 
458 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
459 	return (rc);
460 }
461 
462 static inline bool
463 qlnxr_get_vlan_id_gsi(struct ib_ah_attr *ah_attr, u16 *vlan_id)
464 {
465 	u16 tmp_vlan_id;
466 	union ib_gid *dgid = &ah_attr->grh.dgid;
467 
468 	tmp_vlan_id = (dgid->raw[11] << 8) | dgid->raw[12];
469 	if (tmp_vlan_id < 0x1000) {
470 		*vlan_id = tmp_vlan_id;
471 		return true;
472 	} else {
473 		*vlan_id = 0;
474 		return false;
475 	}
476 }
477 
478 #define QLNXR_MAX_UD_HEADER_SIZE	(100)
479 #define QLNXR_GSI_QPN		(1)
480 static inline int
481 qlnxr_gsi_build_header(struct qlnxr_dev *dev,
482 		struct qlnxr_qp *qp,
483 		const struct ib_send_wr *swr,
484 		struct ib_ud_header *udh,
485 		int *roce_mode)
486 {
487 	bool has_vlan = false, has_grh_ipv6 = true;
488 	struct ib_ah_attr *ah_attr = &get_qlnxr_ah((ud_wr(swr)->ah))->attr;
489 	struct ib_global_route *grh = &ah_attr->grh;
490 	union ib_gid sgid;
491 	int send_size = 0;
492 	u16 vlan_id = 0;
493 	u16 ether_type;
494 
495 	int rc = 0;
496 	int ip_ver = 0;
497 	bool has_udp = false;
498 
499 	int i;
500 
501 	send_size = 0;
502 	for (i = 0; i < swr->num_sge; ++i)
503 		send_size += swr->sg_list[i].length;
504 
505 	has_vlan = qlnxr_get_vlan_id_gsi(ah_attr, &vlan_id);
506 	ether_type = ETH_P_ROCE;
507 	*roce_mode = ROCE_V1;
508 	if (grh->sgid_index < QLNXR_MAX_SGID)
509 		sgid = dev->sgid_tbl[grh->sgid_index];
510 	else
511 		sgid = dev->sgid_tbl[0];
512 
513 	rc = ib_ud_header_init(send_size, false /* LRH */, true /* ETH */,
514 			has_vlan, has_grh_ipv6, ip_ver, has_udp,
515 			0 /* immediate */, udh);
516 
517 	if (rc) {
518 		QL_DPRINT11(dev->ha, "gsi post send: failed to init header\n");
519 		return rc;
520 	}
521 
522 	/* ENET + VLAN headers*/
523 	memcpy(udh->eth.dmac_h, ah_attr->dmac, ETH_ALEN);
524 	memcpy(udh->eth.smac_h, dev->ha->primary_mac, ETH_ALEN);
525 	if (has_vlan) {
526 		udh->eth.type = htons(ETH_P_8021Q);
527 		udh->vlan.tag = htons(vlan_id);
528 		udh->vlan.type = htons(ether_type);
529 	} else {
530 		udh->eth.type = htons(ether_type);
531 	}
532 
533 	for (int j = 0; j < 4; j++) {
534 		QL_DPRINT12(dev->ha, "destination mac: %x\n",
535 				udh->eth.dmac_h[j]);
536 	}
537 	for (int j = 0; j < 4; j++) {
538 		QL_DPRINT12(dev->ha, "source mac: %x\n",
539 				udh->eth.smac_h[j]);
540 	}
541 
542 	QL_DPRINT12(dev->ha, "QP: %p, opcode: %d, wq: %lx, roce: %x, hops:%d,"
543 			"imm : %d, vlan :%d, AH: %p\n",
544 			qp, swr->opcode, swr->wr_id, *roce_mode, grh->hop_limit,
545 			0, has_vlan, get_qlnxr_ah((ud_wr(swr)->ah)));
546 
547 	if (has_grh_ipv6) {
548 		/* GRH / IPv6 header */
549 		udh->grh.traffic_class = grh->traffic_class;
550 		udh->grh.flow_label = grh->flow_label;
551 		udh->grh.hop_limit = grh->hop_limit;
552 		udh->grh.destination_gid = grh->dgid;
553 		memcpy(&udh->grh.source_gid.raw, &sgid.raw,
554 		       sizeof(udh->grh.source_gid.raw));
555 		QL_DPRINT12(dev->ha, "header: tc: %x, flow_label : %x, "
556 			"hop_limit: %x \n", udh->grh.traffic_class,
557 			udh->grh.flow_label, udh->grh.hop_limit);
558 		for (i = 0; i < 16; i++) {
559 			QL_DPRINT12(dev->ha, "udh dgid = %x\n", udh->grh.destination_gid.raw[i]);
560 		}
561 		for (i = 0; i < 16; i++) {
562 			QL_DPRINT12(dev->ha, "udh sgid = %x\n", udh->grh.source_gid.raw[i]);
563 		}
564 		udh->grh.next_header = 0x1b;
565 	}
566 #ifdef DEFINE_IB_UD_HEADER_INIT_UDP_PRESENT
567         /* This is for RoCEv2 */
568 	else {
569                 /* IPv4 header */
570                 u32 ipv4_addr;
571 
572                 udh->ip4.protocol = IPPROTO_UDP;
573                 udh->ip4.tos = htonl(grh->flow_label);
574                 udh->ip4.frag_off = htons(IP_DF);
575                 udh->ip4.ttl = grh->hop_limit;
576 
577                 ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw);
578                 udh->ip4.saddr = ipv4_addr;
579                 ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw);
580                 udh->ip4.daddr = ipv4_addr;
581                 /* note: checksum is calculated by the device */
582         }
583 #endif
584 
585 	/* BTH */
586 	udh->bth.solicited_event = !!(swr->send_flags & IB_SEND_SOLICITED);
587 	udh->bth.pkey = QLNXR_ROCE_PKEY_DEFAULT;/* TODO: ib_get_cahced_pkey?! */
588 	//udh->bth.destination_qpn = htonl(ud_wr(swr)->remote_qpn);
589 	udh->bth.destination_qpn = OSAL_CPU_TO_BE32(ud_wr(swr)->remote_qpn);
590 	//udh->bth.psn = htonl((qp->sq_psn++) & ((1 << 24) - 1));
591 	udh->bth.psn = OSAL_CPU_TO_BE32((qp->sq_psn++) & ((1 << 24) - 1));
592 	udh->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
593 
594 	/* DETH */
595 	//udh->deth.qkey = htonl(0x80010000); /* qp->qkey */ /* TODO: what is?! */
596 	//udh->deth.source_qpn = htonl(QLNXR_GSI_QPN);
597 	udh->deth.qkey = OSAL_CPU_TO_BE32(0x80010000); /* qp->qkey */ /* TODO: what is?! */
598 	udh->deth.source_qpn = OSAL_CPU_TO_BE32(QLNXR_GSI_QPN);
599 	QL_DPRINT12(dev->ha, "exit\n");
600 	return 0;
601 }
602 
603 static inline int
604 qlnxr_gsi_build_packet(struct qlnxr_dev *dev,
605 	struct qlnxr_qp *qp, const struct ib_send_wr *swr,
606 	struct ecore_roce_ll2_packet **p_packet)
607 {
608 	u8 ud_header_buffer[QLNXR_MAX_UD_HEADER_SIZE];
609 	struct ecore_roce_ll2_packet *packet;
610 	int roce_mode, header_size;
611 	struct ib_ud_header udh;
612 	int i, rc;
613 
614 	QL_DPRINT12(dev->ha, "enter\n");
615 
616 	*p_packet = NULL;
617 
618 	rc = qlnxr_gsi_build_header(dev, qp, swr, &udh, &roce_mode);
619 	if (rc) {
620 		QL_DPRINT11(dev->ha,
621 			"qlnxr_gsi_build_header failed rc = %d\n", rc);
622 		return rc;
623 	}
624 
625 	header_size = ib_ud_header_pack(&udh, &ud_header_buffer);
626 
627 	packet = kzalloc(sizeof(*packet), GFP_ATOMIC);
628 	if (!packet) {
629 		QL_DPRINT11(dev->ha, "packet == NULL\n");
630 		return -ENOMEM;
631 	}
632 
633 	packet->header.vaddr = qlnx_dma_alloc_coherent(&dev->ha->cdev,
634 					&packet->header.baddr,
635 					header_size);
636 	if (!packet->header.vaddr) {
637 		QL_DPRINT11(dev->ha, "packet->header.vaddr == NULL\n");
638 		kfree(packet);
639 		return -ENOMEM;
640 	}
641 
642 	if (memcmp(udh.eth.smac_h, udh.eth.dmac_h, ETH_ALEN))
643 		packet->tx_dest = ECORE_ROCE_LL2_TX_DEST_NW;
644 	else
645 		packet->tx_dest = ECORE_ROCE_LL2_TX_DEST_LB;
646 
647 	packet->roce_mode = roce_mode;
648 	memcpy(packet->header.vaddr, ud_header_buffer, header_size);
649 	packet->header.len = header_size;
650 	packet->n_seg = swr->num_sge;
651 	qp->wqe_wr_id[qp->sq.prod].bytes_len = IB_GRH_BYTES; //RDMA_GRH_BYTES
652 	for (i = 0; i < packet->n_seg; i++) {
653 		packet->payload[i].baddr = swr->sg_list[i].addr;
654 		packet->payload[i].len = swr->sg_list[i].length;
655 		qp->wqe_wr_id[qp->sq.prod].bytes_len +=
656 			packet->payload[i].len;
657 		QL_DPRINT11(dev->ha, "baddr: %p, len: %d\n",
658 				packet->payload[i].baddr,
659 				packet->payload[i].len);
660 	}
661 
662 	*p_packet = packet;
663 
664 	QL_DPRINT12(dev->ha, "exit, packet->n_seg: %d\n", packet->n_seg);
665 	return 0;
666 }
667 
668 int
669 qlnxr_gsi_post_send(struct ib_qp *ibqp,
670 		const struct ib_send_wr *wr,
671 		const struct ib_send_wr **bad_wr)
672 {
673 	struct ecore_roce_ll2_packet *pkt = NULL;
674 	struct qlnxr_qp *qp = get_qlnxr_qp(ibqp);
675 	struct qlnxr_dev *dev = qp->dev;
676 	unsigned long flags;
677 	int rc;
678 
679 	QL_DPRINT12(dev->ha, "exit\n");
680 
681 	if (qp->state != ECORE_ROCE_QP_STATE_RTS) {
682 		QL_DPRINT11(dev->ha,
683 			"(qp->state != ECORE_ROCE_QP_STATE_RTS)\n");
684 		*bad_wr = wr;
685 		return -EINVAL;
686 	}
687 
688 	if (wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE) {
689 		QL_DPRINT11(dev->ha,
690 			"(wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE)\n");
691 		rc = -EINVAL;
692 		goto err;
693 	}
694 
695 	if (wr->opcode != IB_WR_SEND) {
696 		QL_DPRINT11(dev->ha, "(wr->opcode > IB_WR_SEND)\n");
697 		rc = -EINVAL;
698 		goto err;
699 	}
700 
701 	spin_lock_irqsave(&qp->q_lock, flags);
702 
703 	rc = qlnxr_gsi_build_packet(dev, qp, wr, &pkt);
704 	if(rc) {
705 		spin_unlock_irqrestore(&qp->q_lock, flags);
706 		QL_DPRINT11(dev->ha, "qlnxr_gsi_build_packet failed\n");
707 		goto err;
708 	}
709 
710 	rc = qlnxr_ll2_post_tx(dev, pkt);
711 
712 	if (!rc) {
713 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
714 		qp->wqe_wr_id[qp->sq.prod].signaled =
715 			!!(wr->send_flags & IB_SEND_SIGNALED);
716 		qp->wqe_wr_id[qp->sq.prod].opcode = IB_WC_SEND;
717 		qlnxr_inc_sw_prod(&qp->sq);
718 		QL_DPRINT11(dev->ha, "packet sent over gsi qp\n");
719 	} else {
720 		QL_DPRINT11(dev->ha, "qlnxr_ll2_post_tx failed\n");
721 		rc = -EAGAIN;
722 		*bad_wr = wr;
723 	}
724 
725 	spin_unlock_irqrestore(&qp->q_lock, flags);
726 
727 	if (wr->next != NULL) {
728 		*bad_wr = wr->next;
729 		rc=-EINVAL;
730 	}
731 
732 	QL_DPRINT12(dev->ha, "exit\n");
733 	return rc;
734 
735 err:
736 	*bad_wr = wr;
737 	QL_DPRINT12(dev->ha, "exit error\n");
738 	return rc;
739 }
740 
741 #define	QLNXR_LL2_RX_BUFFER_SIZE	(4 * 1024)
742 int
743 qlnxr_gsi_post_recv(struct ib_qp *ibqp,
744 		const struct ib_recv_wr *wr,
745 		const struct ib_recv_wr **bad_wr)
746 {
747 	struct qlnxr_dev *dev = get_qlnxr_dev((ibqp->device));
748 	struct qlnxr_qp *qp = get_qlnxr_qp(ibqp);
749 	unsigned long flags;
750 	int rc = 0;
751 
752 	QL_DPRINT12(dev->ha, "enter, wr: %p\n", wr);
753 
754 	if ((qp->state != ECORE_ROCE_QP_STATE_RTR) &&
755 	    (qp->state != ECORE_ROCE_QP_STATE_RTS)) {
756 		*bad_wr = wr;
757 		QL_DPRINT11(dev->ha, "exit 0\n");
758 		return -EINVAL;
759 	}
760 
761 	spin_lock_irqsave(&qp->q_lock, flags);
762 
763 	while (wr) {
764 		if (wr->num_sge > QLNXR_GSI_MAX_RECV_SGE) {
765 			QL_DPRINT11(dev->ha, "exit 1\n");
766 			goto err;
767 		}
768 
769 		rc = ecore_ll2_post_rx_buffer(dev->rdma_ctx,
770 				dev->gsi_ll2_handle,
771 				wr->sg_list[0].addr,
772 				wr->sg_list[0].length,
773 				0 /* cookie */,
774 				1 /* notify_fw */);
775 		if (rc) {
776 			QL_DPRINT11(dev->ha, "exit 2\n");
777 			goto err;
778 		}
779 
780 		memset(&qp->rqe_wr_id[qp->rq.prod], 0,
781 			sizeof(qp->rqe_wr_id[qp->rq.prod]));
782 		qp->rqe_wr_id[qp->rq.prod].sg_list[0] = wr->sg_list[0];
783 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
784 
785 		qlnxr_inc_sw_prod(&qp->rq);
786 
787 		wr = wr->next;
788 	}
789 
790 	spin_unlock_irqrestore(&qp->q_lock, flags);
791 
792 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
793 	return rc;
794 err:
795 
796 	spin_unlock_irqrestore(&qp->q_lock, flags);
797 	*bad_wr = wr;
798 
799 	QL_DPRINT12(dev->ha, "exit with -ENOMEM\n");
800 	return -ENOMEM;
801 }
802 
803 int
804 qlnxr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
805 {
806 	struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device));
807 	struct qlnxr_cq *cq = get_qlnxr_cq(ibcq);
808 	struct qlnxr_qp *qp = dev->gsi_qp;
809 	unsigned long flags;
810 	int i = 0;
811 
812 	QL_DPRINT12(dev->ha, "enter\n");
813 
814 	spin_lock_irqsave(&cq->cq_lock, flags);
815 
816 	while (i < num_entries && qp->rq.cons != qp->rq.gsi_cons) {
817 		memset(&wc[i], 0, sizeof(*wc));
818 
819 		wc[i].qp = &qp->ibqp;
820 		wc[i].wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
821 		wc[i].opcode = IB_WC_RECV;
822 		wc[i].pkey_index = 0;
823 		wc[i].status = (qp->rqe_wr_id[qp->rq.cons].rc)?
824 			       IB_WC_GENERAL_ERR:IB_WC_SUCCESS;
825 		/* 0 - currently only one recv sg is supported */
826 		wc[i].byte_len = qp->rqe_wr_id[qp->rq.cons].sg_list[0].length;
827 		wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK;
828 
829 		memcpy(&wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac, ETH_ALEN);
830 		wc[i].wc_flags |= IB_WC_WITH_SMAC;
831 
832 		if (qp->rqe_wr_id[qp->rq.cons].vlan_id) {
833 			wc[i].wc_flags |= IB_WC_WITH_VLAN;
834 			wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id;
835 		}
836 
837 		qlnxr_inc_sw_cons(&qp->rq);
838 		i++;
839 	}
840 
841 	while (i < num_entries && qp->sq.cons != qp->sq.gsi_cons) {
842 		memset(&wc[i], 0, sizeof(*wc));
843 
844 		wc[i].qp = &qp->ibqp;
845 		wc[i].wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
846 		wc[i].opcode = IB_WC_SEND;
847 		wc[i].status = IB_WC_SUCCESS;
848 
849 		qlnxr_inc_sw_cons(&qp->sq);
850 		i++;
851 	}
852 
853 	spin_unlock_irqrestore(&cq->cq_lock, flags);
854 
855 	QL_DPRINT12(dev->ha, "exit i = %d\n", i);
856 	return i;
857 }
858