xref: /freebsd/sys/dev/qlnx/qlnxr/qlnxr_cm.c (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 /*
2  * Copyright (c) 2018-2019 Cavium, Inc.
3  * All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions
7  *  are met:
8  *
9  *  1. Redistributions of source code must retain the above copyright
10  *     notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  *  POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "qlnxr_def.h"
32 #include "rdma_common.h"
33 #include "qlnxr_cm.h"
34 
35 void
36 qlnxr_inc_sw_gsi_cons(struct qlnxr_qp_hwq_info *info)
37 {
38 	info->gsi_cons = (info->gsi_cons + 1) % info->max_wr;
39 }
40 
41 void
42 qlnxr_store_gsi_qp_cq(struct qlnxr_dev *dev,
43 		struct qlnxr_qp *qp,
44 		struct ib_qp_init_attr *attrs)
45 {
46 	QL_DPRINT12(dev->ha, "enter\n");
47 
48 	dev->gsi_qp_created = 1;
49 	dev->gsi_sqcq = get_qlnxr_cq((attrs->send_cq));
50 	dev->gsi_rqcq = get_qlnxr_cq((attrs->recv_cq));
51 	dev->gsi_qp = qp;
52 
53 	QL_DPRINT12(dev->ha, "exit\n");
54 
55 	return;
56 }
57 
58 void
59 qlnxr_ll2_complete_tx_packet(void *cxt,
60 		uint8_t connection_handle,
61 		void *cookie,
62 		dma_addr_t first_frag_addr,
63 		bool b_last_fragment,
64 		bool b_last_packet)
65 {
66 	struct qlnxr_dev *dev = (struct qlnxr_dev *)cxt;
67 	struct ecore_roce_ll2_packet *pkt = cookie;
68 	struct qlnxr_cq *cq = dev->gsi_sqcq;
69 	struct qlnxr_qp *qp = dev->gsi_qp;
70 	unsigned long flags;
71 
72 	QL_DPRINT12(dev->ha, "enter\n");
73 
74 	qlnx_dma_free_coherent(&dev->ha->cdev, pkt->header.vaddr,
75 			pkt->header.baddr, pkt->header.len);
76 	kfree(pkt);
77 
78 	spin_lock_irqsave(&qp->q_lock, flags);
79 
80 	qlnxr_inc_sw_gsi_cons(&qp->sq);
81 
82 	spin_unlock_irqrestore(&qp->q_lock, flags);
83 
84 	if (cq->ibcq.comp_handler)
85 		(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
86 
87 	QL_DPRINT12(dev->ha, "exit\n");
88 
89 	return;
90 }
91 
92 void
93 qlnxr_ll2_complete_rx_packet(void *cxt,
94 		struct ecore_ll2_comp_rx_data *data)
95 {
96 	struct qlnxr_dev *dev = (struct qlnxr_dev *)cxt;
97 	struct qlnxr_cq *cq = dev->gsi_rqcq;
98 	// struct qlnxr_qp *qp = dev->gsi_qp;
99 	struct qlnxr_qp *qp = NULL;
100 	unsigned long flags;
101 	// uint32_t delay_count = 0, gsi_cons = 0;
102 	//void * dest_va;
103 
104 	QL_DPRINT12(dev->ha, "enter\n");
105 
106 	if (data->u.data_length_error) {
107 		/* TODO: add statistic */
108 	}
109 
110 	if (data->cookie == NULL) {
111 		QL_DPRINT12(dev->ha, "cookie is NULL, bad sign\n");
112 	}
113 
114 	if (data->qp_id == 1) {
115 		qp = dev->gsi_qp;
116 	} else {
117 		/* TODO: This will be needed for UD QP support */
118 		/* For RoCEv1 this is invalid */
119 		QL_DPRINT12(dev->ha, "invalid QP\n");
120 		return;
121 	}
122 	/* note: currently only one recv sg is supported */
123 	QL_DPRINT12(dev->ha, "MAD received on QP : %x\n", data->rx_buf_addr);
124 
125 	spin_lock_irqsave(&qp->q_lock, flags);
126 
127 	qp->rqe_wr_id[qp->rq.gsi_cons].rc =
128 		data->u.data_length_error ? -EINVAL : 0;
129 	qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = data->vlan;
130 	/* note: length stands for data length i.e. GRH is excluded */
131 	qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length =
132 		data->length.data_length;
133 	*((u32 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[0]) =
134 		ntohl(data->opaque_data_0);
135 	*((u16 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[4]) =
136 		ntohs((u16)data->opaque_data_1);
137 
138 	qlnxr_inc_sw_gsi_cons(&qp->rq);
139 
140 	spin_unlock_irqrestore(&qp->q_lock, flags);
141 
142 	if (cq->ibcq.comp_handler)
143 		(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
144 
145 	QL_DPRINT12(dev->ha, "exit\n");
146 
147 	return;
148 }
149 
150 void qlnxr_ll2_release_rx_packet(void *cxt,
151 		u8 connection_handle,
152 		void *cookie,
153 		dma_addr_t rx_buf_addr,
154 		bool b_last_packet)
155 {
156 	/* Do nothing... */
157 }
158 
159 static void
160 qlnxr_destroy_gsi_cq(struct qlnxr_dev *dev,
161 		struct ib_qp_init_attr *attrs)
162 {
163 	struct ecore_rdma_destroy_cq_in_params iparams;
164 	struct ecore_rdma_destroy_cq_out_params oparams;
165 	struct qlnxr_cq *cq;
166 
167 	QL_DPRINT12(dev->ha, "enter\n");
168 
169 	cq = get_qlnxr_cq((attrs->send_cq));
170 	iparams.icid = cq->icid;
171 	ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
172 	ecore_chain_free(&dev->ha->cdev, &cq->pbl);
173 
174 	cq = get_qlnxr_cq((attrs->recv_cq));
175 	/* if a dedicated recv_cq was used, delete it too */
176 	if (iparams.icid != cq->icid) {
177 		iparams.icid = cq->icid;
178 		ecore_rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
179 		ecore_chain_free(&dev->ha->cdev, &cq->pbl);
180 	}
181 
182 	QL_DPRINT12(dev->ha, "exit\n");
183 
184 	return;
185 }
186 
187 static inline int
188 qlnxr_check_gsi_qp_attrs(struct qlnxr_dev *dev,
189 		struct ib_qp_init_attr *attrs)
190 {
191 	QL_DPRINT12(dev->ha, "enter\n");
192 
193 	if (attrs->cap.max_recv_sge > QLNXR_GSI_MAX_RECV_SGE) {
194 		QL_DPRINT11(dev->ha,
195 			"(attrs->cap.max_recv_sge > QLNXR_GSI_MAX_RECV_SGE)\n");
196 		return -EINVAL;
197 	}
198 
199 	if (attrs->cap.max_recv_wr > QLNXR_GSI_MAX_RECV_WR) {
200 		QL_DPRINT11(dev->ha,
201 			"(attrs->cap.max_recv_wr > QLNXR_GSI_MAX_RECV_WR)\n");
202 		return -EINVAL;
203 	}
204 
205 	if (attrs->cap.max_send_wr > QLNXR_GSI_MAX_SEND_WR) {
206 		QL_DPRINT11(dev->ha,
207 			"(attrs->cap.max_send_wr > QLNXR_GSI_MAX_SEND_WR)\n");
208 		return -EINVAL;
209 	}
210 
211 	QL_DPRINT12(dev->ha, "exit\n");
212 
213 	return 0;
214 }
215 
216 static int
217 qlnxr_ll2_post_tx(struct qlnxr_dev *dev, struct ecore_roce_ll2_packet *pkt)
218 {
219 	enum ecore_ll2_roce_flavor_type roce_flavor;
220 	struct ecore_ll2_tx_pkt_info ll2_tx_pkt;
221 	int rc;
222 	int i;
223 
224 	QL_DPRINT12(dev->ha, "enter\n");
225 
226 	memset(&ll2_tx_pkt, 0, sizeof(ll2_tx_pkt));
227 
228 	if (pkt->roce_mode != ROCE_V1) {
229 		QL_DPRINT11(dev->ha, "roce_mode != ROCE_V1\n");
230 		return (-1);
231 	}
232 
233 	roce_flavor = (pkt->roce_mode == ROCE_V1) ?
234 		ECORE_LL2_ROCE : ECORE_LL2_RROCE;
235 
236 	ll2_tx_pkt.num_of_bds = 1 /* hdr */ +  pkt->n_seg;
237 	ll2_tx_pkt.vlan = 0; /* ??? */
238 	ll2_tx_pkt.tx_dest = ECORE_LL2_TX_DEST_NW;
239 	ll2_tx_pkt.ecore_roce_flavor = roce_flavor;
240 	ll2_tx_pkt.first_frag = pkt->header.baddr;
241 	ll2_tx_pkt.first_frag_len = pkt->header.len;
242 	ll2_tx_pkt.cookie = pkt;
243 	ll2_tx_pkt.enable_ip_cksum = 1; // Only for RoCEv2:IPv4
244 
245 	/* tx header */
246 	rc = ecore_ll2_prepare_tx_packet(dev->rdma_ctx,
247 			dev->gsi_ll2_handle,
248 			&ll2_tx_pkt,
249 			1);
250 	if (rc) {
251 		QL_DPRINT11(dev->ha, "ecore_ll2_prepare_tx_packet failed\n");
252 
253 		/* TX failed while posting header - release resources*/
254                 qlnx_dma_free_coherent(&dev->ha->cdev,
255 			pkt->header.vaddr,
256 			pkt->header.baddr,
257                         pkt->header.len);
258 
259 		kfree(pkt);
260 
261 		return rc;
262 	}
263 
264 	/* tx payload */
265 	for (i = 0; i < pkt->n_seg; i++) {
266 		rc = ecore_ll2_set_fragment_of_tx_packet(dev->rdma_ctx,
267 						       dev->gsi_ll2_handle,
268 						       pkt->payload[i].baddr,
269 						       pkt->payload[i].len);
270 		if (rc) {
271 			/* if failed not much to do here, partial packet has
272 			 * been posted we can't free memory, will need to wait
273 			 * for completion
274 			 */
275 			QL_DPRINT11(dev->ha,
276 				"ecore_ll2_set_fragment_of_tx_packet failed\n");
277 			return rc;
278 		}
279 	}
280 	struct ecore_ll2_stats stats = {0};
281 	rc = ecore_ll2_get_stats(dev->rdma_ctx, dev->gsi_ll2_handle, &stats);
282 	if (rc) {
283 		QL_DPRINT11(dev->ha, "failed to obtain ll2 stats\n");
284 	}
285 	QL_DPRINT12(dev->ha, "exit\n");
286 
287 	return 0;
288 }
289 
290 int
291 qlnxr_ll2_stop(struct qlnxr_dev *dev)
292 {
293 	int rc;
294 
295 	QL_DPRINT12(dev->ha, "enter\n");
296 
297 	if (dev->gsi_ll2_handle == 0xFF)
298 		return 0;
299 
300 	/* remove LL2 MAC address filter */
301 	rc = qlnx_rdma_ll2_set_mac_filter(dev->rdma_ctx,
302 			  dev->gsi_ll2_mac_address, NULL);
303 
304 	rc = ecore_ll2_terminate_connection(dev->rdma_ctx,
305 			dev->gsi_ll2_handle);
306 
307 	ecore_ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
308 
309 	dev->gsi_ll2_handle = 0xFF;
310 
311 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
312 	return rc;
313 }
314 
315 int qlnxr_ll2_start(struct qlnxr_dev *dev,
316 		   struct ib_qp_init_attr *attrs,
317 		   struct qlnxr_qp *qp)
318 {
319 	struct ecore_ll2_acquire_data data;
320 	struct ecore_ll2_cbs cbs;
321 	int rc;
322 
323 	QL_DPRINT12(dev->ha, "enter\n");
324 
325 	/* configure and start LL2 */
326 	cbs.rx_comp_cb = qlnxr_ll2_complete_rx_packet;
327 	cbs.tx_comp_cb = qlnxr_ll2_complete_tx_packet;
328 	cbs.rx_release_cb = qlnxr_ll2_release_rx_packet;
329 	cbs.tx_release_cb = qlnxr_ll2_complete_tx_packet;
330 	cbs.cookie = dev;
331 	dev->gsi_ll2_handle = 0xFF;
332 
333 	memset(&data, 0, sizeof(data));
334 	data.input.conn_type = ECORE_LL2_TYPE_ROCE;
335 	data.input.mtu = if_getmtu(dev->ha->ifp);
336 	data.input.rx_num_desc = 8 * 1024;
337 	data.input.rx_drop_ttl0_flg = 1;
338 	data.input.rx_vlan_removal_en = 0;
339 	data.input.tx_num_desc = 8 * 1024;
340 	data.input.tx_tc = 0;
341 	data.input.tx_dest = ECORE_LL2_TX_DEST_NW;
342 	data.input.ai_err_packet_too_big = ECORE_LL2_DROP_PACKET;
343 	data.input.ai_err_no_buf = ECORE_LL2_DROP_PACKET;
344 	data.input.gsi_enable = 1;
345 	data.p_connection_handle = &dev->gsi_ll2_handle;
346 	data.cbs = &cbs;
347 
348 	rc = ecore_ll2_acquire_connection(dev->rdma_ctx, &data);
349 
350 	if (rc) {
351 		QL_DPRINT11(dev->ha,
352 			"ecore_ll2_acquire_connection failed: %d\n",
353 			rc);
354 		return rc;
355 	}
356 
357 	QL_DPRINT11(dev->ha,
358 		"ll2 connection acquired successfully\n");
359 	rc = ecore_ll2_establish_connection(dev->rdma_ctx,
360 		dev->gsi_ll2_handle);
361 
362 	if (rc) {
363 		QL_DPRINT11(dev->ha,
364 			"ecore_ll2_establish_connection failed\n", rc);
365 		goto err1;
366 	}
367 
368 	QL_DPRINT11(dev->ha,
369 		"ll2 connection established successfully\n");
370 	rc = qlnx_rdma_ll2_set_mac_filter(dev->rdma_ctx, NULL,
371 			dev->ha->primary_mac);
372 	if (rc) {
373 		QL_DPRINT11(dev->ha, "qlnx_rdma_ll2_set_mac_filter failed\n", rc);
374 		goto err2;
375 	}
376 
377 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
378 	return 0;
379 
380 err2:
381 	ecore_ll2_terminate_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
382 err1:
383 	ecore_ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
384 
385 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
386 	return rc;
387 }
388 
389 struct ib_qp*
390 qlnxr_create_gsi_qp(struct qlnxr_dev *dev,
391 		 struct ib_qp_init_attr *attrs,
392 		 struct qlnxr_qp *qp)
393 {
394 	int rc;
395 
396 	QL_DPRINT12(dev->ha, "enter\n");
397 
398 	rc = qlnxr_check_gsi_qp_attrs(dev, attrs);
399 
400 	if (rc) {
401 		QL_DPRINT11(dev->ha, "qlnxr_check_gsi_qp_attrs failed\n");
402 		return ERR_PTR(rc);
403 	}
404 
405 	rc = qlnxr_ll2_start(dev, attrs, qp);
406 	if (rc) {
407 		QL_DPRINT11(dev->ha, "qlnxr_ll2_start failed\n");
408 		return ERR_PTR(rc);
409 	}
410 
411 	/* create QP */
412 	qp->ibqp.qp_num = 1;
413 	qp->rq.max_wr = attrs->cap.max_recv_wr;
414 	qp->sq.max_wr = attrs->cap.max_send_wr;
415 
416 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
417 				GFP_KERNEL);
418 	if (!qp->rqe_wr_id) {
419 		QL_DPRINT11(dev->ha, "(!qp->rqe_wr_id)\n");
420 		goto err;
421 	}
422 
423 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
424 				GFP_KERNEL);
425 	if (!qp->wqe_wr_id) {
426 		QL_DPRINT11(dev->ha, "(!qp->wqe_wr_id)\n");
427 		goto err;
428 	}
429 
430 	qlnxr_store_gsi_qp_cq(dev, qp, attrs);
431 	memcpy(dev->gsi_ll2_mac_address, dev->ha->primary_mac, ETH_ALEN);
432 
433 	/* the GSI CQ is handled by the driver so remove it from the FW */
434 	qlnxr_destroy_gsi_cq(dev, attrs);
435 	dev->gsi_rqcq->cq_type = QLNXR_CQ_TYPE_GSI;
436 	dev->gsi_rqcq->cq_type = QLNXR_CQ_TYPE_GSI;
437 
438 	QL_DPRINT12(dev->ha, "exit &qp->ibqp = %p\n", &qp->ibqp);
439 
440 	return &qp->ibqp;
441 err:
442 	kfree(qp->rqe_wr_id);
443 
444 	rc = qlnxr_ll2_stop(dev);
445 
446 	QL_DPRINT12(dev->ha, "exit with error\n");
447 
448 	return ERR_PTR(-ENOMEM);
449 }
450 
451 int
452 qlnxr_destroy_gsi_qp(struct qlnxr_dev *dev)
453 {
454 	int rc = 0;
455 
456 	QL_DPRINT12(dev->ha, "enter\n");
457 
458 	rc = qlnxr_ll2_stop(dev);
459 
460 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
461 	return (rc);
462 }
463 
464 static inline bool
465 qlnxr_get_vlan_id_gsi(struct ib_ah_attr *ah_attr, u16 *vlan_id)
466 {
467 	u16 tmp_vlan_id;
468 	union ib_gid *dgid = &ah_attr->grh.dgid;
469 
470 	tmp_vlan_id = (dgid->raw[11] << 8) | dgid->raw[12];
471 	if (tmp_vlan_id < 0x1000) {
472 		*vlan_id = tmp_vlan_id;
473 		return true;
474 	} else {
475 		*vlan_id = 0;
476 		return false;
477 	}
478 }
479 
480 #define QLNXR_MAX_UD_HEADER_SIZE	(100)
481 #define QLNXR_GSI_QPN		(1)
482 static inline int
483 qlnxr_gsi_build_header(struct qlnxr_dev *dev,
484 		struct qlnxr_qp *qp,
485 		const struct ib_send_wr *swr,
486 		struct ib_ud_header *udh,
487 		int *roce_mode)
488 {
489 	bool has_vlan = false, has_grh_ipv6 = true;
490 	struct ib_ah_attr *ah_attr = &get_qlnxr_ah((ud_wr(swr)->ah))->attr;
491 	struct ib_global_route *grh = &ah_attr->grh;
492 	union ib_gid sgid;
493 	int send_size = 0;
494 	u16 vlan_id = 0;
495 	u16 ether_type;
496 
497 	int rc = 0;
498 	int ip_ver = 0;
499 	bool has_udp = false;
500 
501 	int i;
502 
503 	send_size = 0;
504 	for (i = 0; i < swr->num_sge; ++i)
505 		send_size += swr->sg_list[i].length;
506 
507 	has_vlan = qlnxr_get_vlan_id_gsi(ah_attr, &vlan_id);
508 	ether_type = ETH_P_ROCE;
509 	*roce_mode = ROCE_V1;
510 	if (grh->sgid_index < QLNXR_MAX_SGID)
511 		sgid = dev->sgid_tbl[grh->sgid_index];
512 	else
513 		sgid = dev->sgid_tbl[0];
514 
515 	rc = ib_ud_header_init(send_size, false /* LRH */, true /* ETH */,
516 			has_vlan, has_grh_ipv6, ip_ver, has_udp,
517 			0 /* immediate */, udh);
518 
519 	if (rc) {
520 		QL_DPRINT11(dev->ha, "gsi post send: failed to init header\n");
521 		return rc;
522 	}
523 
524 	/* ENET + VLAN headers*/
525 	memcpy(udh->eth.dmac_h, ah_attr->dmac, ETH_ALEN);
526 	memcpy(udh->eth.smac_h, dev->ha->primary_mac, ETH_ALEN);
527 	if (has_vlan) {
528 		udh->eth.type = htons(ETH_P_8021Q);
529 		udh->vlan.tag = htons(vlan_id);
530 		udh->vlan.type = htons(ether_type);
531 	} else {
532 		udh->eth.type = htons(ether_type);
533 	}
534 
535 	for (int j = 0; j < 4; j++) {
536 		QL_DPRINT12(dev->ha, "destination mac: %x\n",
537 				udh->eth.dmac_h[j]);
538 	}
539 	for (int j = 0; j < 4; j++) {
540 		QL_DPRINT12(dev->ha, "source mac: %x\n",
541 				udh->eth.smac_h[j]);
542 	}
543 
544 	QL_DPRINT12(dev->ha, "QP: %p, opcode: %d, wq: %lx, roce: %x, hops:%d,"
545 			"imm : %d, vlan :%d, AH: %p\n",
546 			qp, swr->opcode, swr->wr_id, *roce_mode, grh->hop_limit,
547 			0, has_vlan, get_qlnxr_ah((ud_wr(swr)->ah)));
548 
549 	if (has_grh_ipv6) {
550 		/* GRH / IPv6 header */
551 		udh->grh.traffic_class = grh->traffic_class;
552 		udh->grh.flow_label = grh->flow_label;
553 		udh->grh.hop_limit = grh->hop_limit;
554 		udh->grh.destination_gid = grh->dgid;
555 		memcpy(&udh->grh.source_gid.raw, &sgid.raw,
556 		       sizeof(udh->grh.source_gid.raw));
557 		QL_DPRINT12(dev->ha, "header: tc: %x, flow_label : %x, "
558 			"hop_limit: %x \n", udh->grh.traffic_class,
559 			udh->grh.flow_label, udh->grh.hop_limit);
560 		for (i = 0; i < 16; i++) {
561 			QL_DPRINT12(dev->ha, "udh dgid = %x\n", udh->grh.destination_gid.raw[i]);
562 		}
563 		for (i = 0; i < 16; i++) {
564 			QL_DPRINT12(dev->ha, "udh sgid = %x\n", udh->grh.source_gid.raw[i]);
565 		}
566 		udh->grh.next_header = 0x1b;
567 	}
568 #ifdef DEFINE_IB_UD_HEADER_INIT_UDP_PRESENT
569         /* This is for RoCEv2 */
570 	else {
571                 /* IPv4 header */
572                 u32 ipv4_addr;
573 
574                 udh->ip4.protocol = IPPROTO_UDP;
575                 udh->ip4.tos = htonl(grh->flow_label);
576                 udh->ip4.frag_off = htons(IP_DF);
577                 udh->ip4.ttl = grh->hop_limit;
578 
579                 ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw);
580                 udh->ip4.saddr = ipv4_addr;
581                 ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw);
582                 udh->ip4.daddr = ipv4_addr;
583                 /* note: checksum is calculated by the device */
584         }
585 #endif
586 
587 	/* BTH */
588 	udh->bth.solicited_event = !!(swr->send_flags & IB_SEND_SOLICITED);
589 	udh->bth.pkey = QLNXR_ROCE_PKEY_DEFAULT;/* TODO: ib_get_cahced_pkey?! */
590 	//udh->bth.destination_qpn = htonl(ud_wr(swr)->remote_qpn);
591 	udh->bth.destination_qpn = OSAL_CPU_TO_BE32(ud_wr(swr)->remote_qpn);
592 	//udh->bth.psn = htonl((qp->sq_psn++) & ((1 << 24) - 1));
593 	udh->bth.psn = OSAL_CPU_TO_BE32((qp->sq_psn++) & ((1 << 24) - 1));
594 	udh->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
595 
596 	/* DETH */
597 	//udh->deth.qkey = htonl(0x80010000); /* qp->qkey */ /* TODO: what is?! */
598 	//udh->deth.source_qpn = htonl(QLNXR_GSI_QPN);
599 	udh->deth.qkey = OSAL_CPU_TO_BE32(0x80010000); /* qp->qkey */ /* TODO: what is?! */
600 	udh->deth.source_qpn = OSAL_CPU_TO_BE32(QLNXR_GSI_QPN);
601 	QL_DPRINT12(dev->ha, "exit\n");
602 	return 0;
603 }
604 
605 static inline int
606 qlnxr_gsi_build_packet(struct qlnxr_dev *dev,
607 	struct qlnxr_qp *qp, const struct ib_send_wr *swr,
608 	struct ecore_roce_ll2_packet **p_packet)
609 {
610 	u8 ud_header_buffer[QLNXR_MAX_UD_HEADER_SIZE];
611 	struct ecore_roce_ll2_packet *packet;
612 	int roce_mode, header_size;
613 	struct ib_ud_header udh;
614 	int i, rc;
615 
616 	QL_DPRINT12(dev->ha, "enter\n");
617 
618 	*p_packet = NULL;
619 
620 	rc = qlnxr_gsi_build_header(dev, qp, swr, &udh, &roce_mode);
621 	if (rc) {
622 		QL_DPRINT11(dev->ha,
623 			"qlnxr_gsi_build_header failed rc = %d\n", rc);
624 		return rc;
625 	}
626 
627 	header_size = ib_ud_header_pack(&udh, &ud_header_buffer);
628 
629 	packet = kzalloc(sizeof(*packet), GFP_ATOMIC);
630 	if (!packet) {
631 		QL_DPRINT11(dev->ha, "packet == NULL\n");
632 		return -ENOMEM;
633 	}
634 
635 	packet->header.vaddr = qlnx_dma_alloc_coherent(&dev->ha->cdev,
636 					&packet->header.baddr,
637 					header_size);
638 	if (!packet->header.vaddr) {
639 		QL_DPRINT11(dev->ha, "packet->header.vaddr == NULL\n");
640 		kfree(packet);
641 		return -ENOMEM;
642 	}
643 
644 	if (memcmp(udh.eth.smac_h, udh.eth.dmac_h, ETH_ALEN))
645 		packet->tx_dest = ECORE_ROCE_LL2_TX_DEST_NW;
646 	else
647 		packet->tx_dest = ECORE_ROCE_LL2_TX_DEST_LB;
648 
649 	packet->roce_mode = roce_mode;
650 	memcpy(packet->header.vaddr, ud_header_buffer, header_size);
651 	packet->header.len = header_size;
652 	packet->n_seg = swr->num_sge;
653 	qp->wqe_wr_id[qp->sq.prod].bytes_len = IB_GRH_BYTES; //RDMA_GRH_BYTES
654 	for (i = 0; i < packet->n_seg; i++) {
655 		packet->payload[i].baddr = swr->sg_list[i].addr;
656 		packet->payload[i].len = swr->sg_list[i].length;
657 		qp->wqe_wr_id[qp->sq.prod].bytes_len +=
658 			packet->payload[i].len;
659 		QL_DPRINT11(dev->ha, "baddr: %p, len: %d\n",
660 				packet->payload[i].baddr,
661 				packet->payload[i].len);
662 	}
663 
664 	*p_packet = packet;
665 
666 	QL_DPRINT12(dev->ha, "exit, packet->n_seg: %d\n", packet->n_seg);
667 	return 0;
668 }
669 
670 int
671 qlnxr_gsi_post_send(struct ib_qp *ibqp,
672 		const struct ib_send_wr *wr,
673 		const struct ib_send_wr **bad_wr)
674 {
675 	struct ecore_roce_ll2_packet *pkt = NULL;
676 	struct qlnxr_qp *qp = get_qlnxr_qp(ibqp);
677 	struct qlnxr_dev *dev = qp->dev;
678 	unsigned long flags;
679 	int rc;
680 
681 	QL_DPRINT12(dev->ha, "exit\n");
682 
683 	if (qp->state != ECORE_ROCE_QP_STATE_RTS) {
684 		QL_DPRINT11(dev->ha,
685 			"(qp->state != ECORE_ROCE_QP_STATE_RTS)\n");
686 		*bad_wr = wr;
687 		return -EINVAL;
688 	}
689 
690 	if (wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE) {
691 		QL_DPRINT11(dev->ha,
692 			"(wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE)\n");
693 		rc = -EINVAL;
694 		goto err;
695 	}
696 
697 	if (wr->opcode != IB_WR_SEND) {
698 		QL_DPRINT11(dev->ha, "(wr->opcode > IB_WR_SEND)\n");
699 		rc = -EINVAL;
700 		goto err;
701 	}
702 
703 	spin_lock_irqsave(&qp->q_lock, flags);
704 
705 	rc = qlnxr_gsi_build_packet(dev, qp, wr, &pkt);
706 	if(rc) {
707 		spin_unlock_irqrestore(&qp->q_lock, flags);
708 		QL_DPRINT11(dev->ha, "qlnxr_gsi_build_packet failed\n");
709 		goto err;
710 	}
711 
712 	rc = qlnxr_ll2_post_tx(dev, pkt);
713 
714 	if (!rc) {
715 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
716 		qp->wqe_wr_id[qp->sq.prod].signaled =
717 			!!(wr->send_flags & IB_SEND_SIGNALED);
718 		qp->wqe_wr_id[qp->sq.prod].opcode = IB_WC_SEND;
719 		qlnxr_inc_sw_prod(&qp->sq);
720 		QL_DPRINT11(dev->ha, "packet sent over gsi qp\n");
721 	} else {
722 		QL_DPRINT11(dev->ha, "qlnxr_ll2_post_tx failed\n");
723 		rc = -EAGAIN;
724 		*bad_wr = wr;
725 	}
726 
727 	spin_unlock_irqrestore(&qp->q_lock, flags);
728 
729 	if (wr->next != NULL) {
730 		*bad_wr = wr->next;
731 		rc=-EINVAL;
732 	}
733 
734 	QL_DPRINT12(dev->ha, "exit\n");
735 	return rc;
736 
737 err:
738 	*bad_wr = wr;
739 	QL_DPRINT12(dev->ha, "exit error\n");
740 	return rc;
741 }
742 
743 #define	QLNXR_LL2_RX_BUFFER_SIZE	(4 * 1024)
744 int
745 qlnxr_gsi_post_recv(struct ib_qp *ibqp,
746 		const struct ib_recv_wr *wr,
747 		const struct ib_recv_wr **bad_wr)
748 {
749 	struct qlnxr_dev *dev = get_qlnxr_dev((ibqp->device));
750 	struct qlnxr_qp *qp = get_qlnxr_qp(ibqp);
751 	unsigned long flags;
752 	int rc = 0;
753 
754 	QL_DPRINT12(dev->ha, "enter, wr: %p\n", wr);
755 
756 	if ((qp->state != ECORE_ROCE_QP_STATE_RTR) &&
757 	    (qp->state != ECORE_ROCE_QP_STATE_RTS)) {
758 		*bad_wr = wr;
759 		QL_DPRINT11(dev->ha, "exit 0\n");
760 		return -EINVAL;
761 	}
762 
763 	spin_lock_irqsave(&qp->q_lock, flags);
764 
765 	while (wr) {
766 		if (wr->num_sge > QLNXR_GSI_MAX_RECV_SGE) {
767 			QL_DPRINT11(dev->ha, "exit 1\n");
768 			goto err;
769 		}
770 
771 		rc = ecore_ll2_post_rx_buffer(dev->rdma_ctx,
772 				dev->gsi_ll2_handle,
773 				wr->sg_list[0].addr,
774 				wr->sg_list[0].length,
775 				0 /* cookie */,
776 				1 /* notify_fw */);
777 		if (rc) {
778 			QL_DPRINT11(dev->ha, "exit 2\n");
779 			goto err;
780 		}
781 
782 		memset(&qp->rqe_wr_id[qp->rq.prod], 0,
783 			sizeof(qp->rqe_wr_id[qp->rq.prod]));
784 		qp->rqe_wr_id[qp->rq.prod].sg_list[0] = wr->sg_list[0];
785 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
786 
787 		qlnxr_inc_sw_prod(&qp->rq);
788 
789 		wr = wr->next;
790 	}
791 
792 	spin_unlock_irqrestore(&qp->q_lock, flags);
793 
794 	QL_DPRINT12(dev->ha, "exit rc = %d\n", rc);
795 	return rc;
796 err:
797 
798 	spin_unlock_irqrestore(&qp->q_lock, flags);
799 	*bad_wr = wr;
800 
801 	QL_DPRINT12(dev->ha, "exit with -ENOMEM\n");
802 	return -ENOMEM;
803 }
804 
805 int
806 qlnxr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
807 {
808 	struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device));
809 	struct qlnxr_cq *cq = get_qlnxr_cq(ibcq);
810 	struct qlnxr_qp *qp = dev->gsi_qp;
811 	unsigned long flags;
812 	int i = 0;
813 
814 	QL_DPRINT12(dev->ha, "enter\n");
815 
816 	spin_lock_irqsave(&cq->cq_lock, flags);
817 
818 	while (i < num_entries && qp->rq.cons != qp->rq.gsi_cons) {
819 		memset(&wc[i], 0, sizeof(*wc));
820 
821 		wc[i].qp = &qp->ibqp;
822 		wc[i].wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
823 		wc[i].opcode = IB_WC_RECV;
824 		wc[i].pkey_index = 0;
825 		wc[i].status = (qp->rqe_wr_id[qp->rq.cons].rc)?
826 			       IB_WC_GENERAL_ERR:IB_WC_SUCCESS;
827 		/* 0 - currently only one recv sg is supported */
828 		wc[i].byte_len = qp->rqe_wr_id[qp->rq.cons].sg_list[0].length;
829 		wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK;
830 
831 		memcpy(&wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac, ETH_ALEN);
832 		wc[i].wc_flags |= IB_WC_WITH_SMAC;
833 
834 		if (qp->rqe_wr_id[qp->rq.cons].vlan_id) {
835 			wc[i].wc_flags |= IB_WC_WITH_VLAN;
836 			wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id;
837 		}
838 
839 		qlnxr_inc_sw_cons(&qp->rq);
840 		i++;
841 	}
842 
843 	while (i < num_entries && qp->sq.cons != qp->sq.gsi_cons) {
844 		memset(&wc[i], 0, sizeof(*wc));
845 
846 		wc[i].qp = &qp->ibqp;
847 		wc[i].wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
848 		wc[i].opcode = IB_WC_SEND;
849 		wc[i].status = IB_WC_SUCCESS;
850 
851 		qlnxr_inc_sw_cons(&qp->sq);
852 		i++;
853 	}
854 
855 	spin_unlock_irqrestore(&cq->cq_lock, flags);
856 
857 	QL_DPRINT12(dev->ha, "exit i = %d\n", i);
858 	return i;
859 }
860