xref: /linux/drivers/infiniband/ulp/iser/iser_verbs.c (revision 005438a8eef063495ac059d128eea71b58de50e5)
1 /*
2  * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4  * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *	- Redistributions of source code must retain the above
17  *	  copyright notice, this list of conditions and the following
18  *	  disclaimer.
19  *
20  *	- Redistributions in binary form must reproduce the above
21  *	  copyright notice, this list of conditions and the following
22  *	  disclaimer in the documentation and/or other materials
23  *	  provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 #include <linux/kernel.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37 #include <linux/delay.h>
38 
39 #include "iscsi_iser.h"
40 
41 #define ISCSI_ISER_MAX_CONN	8
42 #define ISER_MAX_RX_LEN		(ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
43 #define ISER_MAX_TX_LEN		(ISER_QP_MAX_REQ_DTOS  * ISCSI_ISER_MAX_CONN)
44 #define ISER_MAX_CQ_LEN		(ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
45 				 ISCSI_ISER_MAX_CONN)
46 
47 static int iser_cq_poll_limit = 512;
48 
49 static void iser_cq_tasklet_fn(unsigned long data);
50 static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
51 
52 static void iser_cq_event_callback(struct ib_event *cause, void *context)
53 {
54 	iser_err("cq event %s (%d)\n",
55 		 ib_event_msg(cause->event), cause->event);
56 }
57 
58 static void iser_qp_event_callback(struct ib_event *cause, void *context)
59 {
60 	iser_err("qp event %s (%d)\n",
61 		 ib_event_msg(cause->event), cause->event);
62 }
63 
64 static void iser_event_handler(struct ib_event_handler *handler,
65 				struct ib_event *event)
66 {
67 	iser_err("async event %s (%d) on device %s port %d\n",
68 		 ib_event_msg(event->event), event->event,
69 		 event->device->name, event->element.port_num);
70 }
71 
72 /**
73  * iser_create_device_ib_res - creates Protection Domain (PD), Completion
74  * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
75  * the adapator.
76  *
77  * returns 0 on success, -1 on failure
78  */
79 static int iser_create_device_ib_res(struct iser_device *device)
80 {
81 	struct ib_device_attr *dev_attr = &device->dev_attr;
82 	int ret, i, max_cqe;
83 
84 	ret = ib_query_device(device->ib_device, dev_attr);
85 	if (ret) {
86 		pr_warn("Query device failed for %s\n", device->ib_device->name);
87 		return ret;
88 	}
89 
90 	/* Assign function handles  - based on FMR support */
91 	if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
92 	    device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
93 		iser_info("FMR supported, using FMR for registration\n");
94 		device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
95 		device->iser_free_rdma_reg_res = iser_free_fmr_pool;
96 		device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
97 		device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
98 	} else
99 	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
100 		iser_info("FastReg supported, using FastReg for registration\n");
101 		device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool;
102 		device->iser_free_rdma_reg_res = iser_free_fastreg_pool;
103 		device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg;
104 		device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;
105 	} else {
106 		iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
107 		return -1;
108 	}
109 
110 	device->comps_used = min_t(int, num_online_cpus(),
111 				 device->ib_device->num_comp_vectors);
112 
113 	device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
114 				GFP_KERNEL);
115 	if (!device->comps)
116 		goto comps_err;
117 
118 	max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
119 
120 	iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
121 		  device->comps_used, device->ib_device->name,
122 		  device->ib_device->num_comp_vectors, max_cqe);
123 
124 	device->pd = ib_alloc_pd(device->ib_device);
125 	if (IS_ERR(device->pd))
126 		goto pd_err;
127 
128 	for (i = 0; i < device->comps_used; i++) {
129 		struct ib_cq_init_attr cq_attr = {};
130 		struct iser_comp *comp = &device->comps[i];
131 
132 		comp->device = device;
133 		cq_attr.cqe = max_cqe;
134 		cq_attr.comp_vector = i;
135 		comp->cq = ib_create_cq(device->ib_device,
136 					iser_cq_callback,
137 					iser_cq_event_callback,
138 					(void *)comp,
139 					&cq_attr);
140 		if (IS_ERR(comp->cq)) {
141 			comp->cq = NULL;
142 			goto cq_err;
143 		}
144 
145 		if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
146 			goto cq_err;
147 
148 		tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
149 			     (unsigned long)comp);
150 	}
151 
152 	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
153 				   IB_ACCESS_REMOTE_WRITE |
154 				   IB_ACCESS_REMOTE_READ);
155 	if (IS_ERR(device->mr))
156 		goto dma_mr_err;
157 
158 	INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
159 				iser_event_handler);
160 	if (ib_register_event_handler(&device->event_handler))
161 		goto handler_err;
162 
163 	return 0;
164 
165 handler_err:
166 	ib_dereg_mr(device->mr);
167 dma_mr_err:
168 	for (i = 0; i < device->comps_used; i++)
169 		tasklet_kill(&device->comps[i].tasklet);
170 cq_err:
171 	for (i = 0; i < device->comps_used; i++) {
172 		struct iser_comp *comp = &device->comps[i];
173 
174 		if (comp->cq)
175 			ib_destroy_cq(comp->cq);
176 	}
177 	ib_dealloc_pd(device->pd);
178 pd_err:
179 	kfree(device->comps);
180 comps_err:
181 	iser_err("failed to allocate an IB resource\n");
182 	return -1;
183 }
184 
185 /**
186  * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
187  * CQ and PD created with the device associated with the adapator.
188  */
189 static void iser_free_device_ib_res(struct iser_device *device)
190 {
191 	int i;
192 	BUG_ON(device->mr == NULL);
193 
194 	for (i = 0; i < device->comps_used; i++) {
195 		struct iser_comp *comp = &device->comps[i];
196 
197 		tasklet_kill(&comp->tasklet);
198 		ib_destroy_cq(comp->cq);
199 		comp->cq = NULL;
200 	}
201 
202 	(void)ib_unregister_event_handler(&device->event_handler);
203 	(void)ib_dereg_mr(device->mr);
204 	(void)ib_dealloc_pd(device->pd);
205 
206 	kfree(device->comps);
207 	device->comps = NULL;
208 
209 	device->mr = NULL;
210 	device->pd = NULL;
211 }
212 
213 /**
214  * iser_create_fmr_pool - Creates FMR pool and page_vector
215  *
216  * returns 0 on success, or errno code on failure
217  */
218 int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
219 {
220 	struct iser_device *device = ib_conn->device;
221 	struct ib_fmr_pool_param params;
222 	int ret = -ENOMEM;
223 
224 	ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) +
225 					(sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
226 					GFP_KERNEL);
227 	if (!ib_conn->fmr.page_vec)
228 		return ret;
229 
230 	ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);
231 
232 	params.page_shift        = SHIFT_4K;
233 	/* when the first/last SG element are not start/end *
234 	 * page aligned, the map whould be of N+1 pages     */
235 	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
236 	/* make the pool size twice the max number of SCSI commands *
237 	 * the ML is expected to queue, watermark for unmap at 50%  */
238 	params.pool_size	 = cmds_max * 2;
239 	params.dirty_watermark	 = cmds_max;
240 	params.cache		 = 0;
241 	params.flush_function	 = NULL;
242 	params.access		 = (IB_ACCESS_LOCAL_WRITE  |
243 				    IB_ACCESS_REMOTE_WRITE |
244 				    IB_ACCESS_REMOTE_READ);
245 
246 	ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, &params);
247 	if (!IS_ERR(ib_conn->fmr.pool))
248 		return 0;
249 
250 	/* no FMR => no need for page_vec */
251 	kfree(ib_conn->fmr.page_vec);
252 	ib_conn->fmr.page_vec = NULL;
253 
254 	ret = PTR_ERR(ib_conn->fmr.pool);
255 	ib_conn->fmr.pool = NULL;
256 	if (ret != -ENOSYS) {
257 		iser_err("FMR allocation failed, err %d\n", ret);
258 		return ret;
259 	} else {
260 		iser_warn("FMRs are not supported, using unaligned mode\n");
261 		return 0;
262 	}
263 }
264 
265 /**
266  * iser_free_fmr_pool - releases the FMR pool and page vec
267  */
268 void iser_free_fmr_pool(struct ib_conn *ib_conn)
269 {
270 	iser_info("freeing conn %p fmr pool %p\n",
271 		  ib_conn, ib_conn->fmr.pool);
272 
273 	if (ib_conn->fmr.pool != NULL)
274 		ib_destroy_fmr_pool(ib_conn->fmr.pool);
275 
276 	ib_conn->fmr.pool = NULL;
277 
278 	kfree(ib_conn->fmr.page_vec);
279 	ib_conn->fmr.page_vec = NULL;
280 }
281 
282 static int
283 iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
284 		  struct fast_reg_descriptor *desc)
285 {
286 	struct iser_pi_context *pi_ctx = NULL;
287 	struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
288 					       .flags = IB_MR_SIGNATURE_EN};
289 	int ret = 0;
290 
291 	desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
292 	if (!desc->pi_ctx)
293 		return -ENOMEM;
294 
295 	pi_ctx = desc->pi_ctx;
296 
297 	pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
298 					    ISCSI_ISER_SG_TABLESIZE);
299 	if (IS_ERR(pi_ctx->prot_frpl)) {
300 		ret = PTR_ERR(pi_ctx->prot_frpl);
301 		goto prot_frpl_failure;
302 	}
303 
304 	pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
305 					ISCSI_ISER_SG_TABLESIZE + 1);
306 	if (IS_ERR(pi_ctx->prot_mr)) {
307 		ret = PTR_ERR(pi_ctx->prot_mr);
308 		goto prot_mr_failure;
309 	}
310 	desc->reg_indicators |= ISER_PROT_KEY_VALID;
311 
312 	pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
313 	if (IS_ERR(pi_ctx->sig_mr)) {
314 		ret = PTR_ERR(pi_ctx->sig_mr);
315 		goto sig_mr_failure;
316 	}
317 	desc->reg_indicators |= ISER_SIG_KEY_VALID;
318 	desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
319 
320 	return 0;
321 
322 sig_mr_failure:
323 	ib_dereg_mr(desc->pi_ctx->prot_mr);
324 prot_mr_failure:
325 	ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
326 prot_frpl_failure:
327 	kfree(desc->pi_ctx);
328 
329 	return ret;
330 }
331 
332 static void
333 iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
334 {
335 	ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
336 	ib_dereg_mr(pi_ctx->prot_mr);
337 	ib_destroy_mr(pi_ctx->sig_mr);
338 	kfree(pi_ctx);
339 }
340 
341 static int
342 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
343 			 bool pi_enable, struct fast_reg_descriptor *desc)
344 {
345 	int ret;
346 
347 	desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
348 						      ISCSI_ISER_SG_TABLESIZE + 1);
349 	if (IS_ERR(desc->data_frpl)) {
350 		ret = PTR_ERR(desc->data_frpl);
351 		iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
352 			 ret);
353 		return PTR_ERR(desc->data_frpl);
354 	}
355 
356 	desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
357 	if (IS_ERR(desc->data_mr)) {
358 		ret = PTR_ERR(desc->data_mr);
359 		iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
360 		goto fast_reg_mr_failure;
361 	}
362 	desc->reg_indicators |= ISER_DATA_KEY_VALID;
363 
364 	if (pi_enable) {
365 		ret = iser_alloc_pi_ctx(ib_device, pd, desc);
366 		if (ret)
367 			goto pi_ctx_alloc_failure;
368 	}
369 
370 	return 0;
371 pi_ctx_alloc_failure:
372 	ib_dereg_mr(desc->data_mr);
373 fast_reg_mr_failure:
374 	ib_free_fast_reg_page_list(desc->data_frpl);
375 
376 	return ret;
377 }
378 
379 /**
380  * iser_create_fastreg_pool - Creates pool of fast_reg descriptors
381  * for fast registration work requests.
382  * returns 0 on success, or errno code on failure
383  */
384 int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
385 {
386 	struct iser_device *device = ib_conn->device;
387 	struct fast_reg_descriptor *desc;
388 	int i, ret;
389 
390 	INIT_LIST_HEAD(&ib_conn->fastreg.pool);
391 	ib_conn->fastreg.pool_size = 0;
392 	for (i = 0; i < cmds_max; i++) {
393 		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
394 		if (!desc) {
395 			iser_err("Failed to allocate a new fast_reg descriptor\n");
396 			ret = -ENOMEM;
397 			goto err;
398 		}
399 
400 		ret = iser_create_fastreg_desc(device->ib_device, device->pd,
401 					       ib_conn->pi_support, desc);
402 		if (ret) {
403 			iser_err("Failed to create fastreg descriptor err=%d\n",
404 				 ret);
405 			kfree(desc);
406 			goto err;
407 		}
408 
409 		list_add_tail(&desc->list, &ib_conn->fastreg.pool);
410 		ib_conn->fastreg.pool_size++;
411 	}
412 
413 	return 0;
414 
415 err:
416 	iser_free_fastreg_pool(ib_conn);
417 	return ret;
418 }
419 
420 /**
421  * iser_free_fastreg_pool - releases the pool of fast_reg descriptors
422  */
423 void iser_free_fastreg_pool(struct ib_conn *ib_conn)
424 {
425 	struct fast_reg_descriptor *desc, *tmp;
426 	int i = 0;
427 
428 	if (list_empty(&ib_conn->fastreg.pool))
429 		return;
430 
431 	iser_info("freeing conn %p fr pool\n", ib_conn);
432 
433 	list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
434 		list_del(&desc->list);
435 		ib_free_fast_reg_page_list(desc->data_frpl);
436 		ib_dereg_mr(desc->data_mr);
437 		if (desc->pi_ctx)
438 			iser_free_pi_ctx(desc->pi_ctx);
439 		kfree(desc);
440 		++i;
441 	}
442 
443 	if (i < ib_conn->fastreg.pool_size)
444 		iser_warn("pool still has %d regions registered\n",
445 			  ib_conn->fastreg.pool_size - i);
446 }
447 
448 /**
449  * iser_create_ib_conn_res - Queue-Pair (QP)
450  *
451  * returns 0 on success, -1 on failure
452  */
453 static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
454 {
455 	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
456 						   ib_conn);
457 	struct iser_device	*device;
458 	struct ib_device_attr *dev_attr;
459 	struct ib_qp_init_attr	init_attr;
460 	int			ret = -ENOMEM;
461 	int index, min_index = 0;
462 
463 	BUG_ON(ib_conn->device == NULL);
464 
465 	device = ib_conn->device;
466 	dev_attr = &device->dev_attr;
467 
468 	memset(&init_attr, 0, sizeof init_attr);
469 
470 	mutex_lock(&ig.connlist_mutex);
471 	/* select the CQ with the minimal number of usages */
472 	for (index = 0; index < device->comps_used; index++) {
473 		if (device->comps[index].active_qps <
474 		    device->comps[min_index].active_qps)
475 			min_index = index;
476 	}
477 	ib_conn->comp = &device->comps[min_index];
478 	ib_conn->comp->active_qps++;
479 	mutex_unlock(&ig.connlist_mutex);
480 	iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
481 
482 	init_attr.event_handler = iser_qp_event_callback;
483 	init_attr.qp_context	= (void *)ib_conn;
484 	init_attr.send_cq	= ib_conn->comp->cq;
485 	init_attr.recv_cq	= ib_conn->comp->cq;
486 	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
487 	init_attr.cap.max_send_sge = 2;
488 	init_attr.cap.max_recv_sge = 1;
489 	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;
490 	init_attr.qp_type	= IB_QPT_RC;
491 	if (ib_conn->pi_support) {
492 		init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
493 		init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
494 		iser_conn->max_cmds =
495 			ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
496 	} else {
497 		if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
498 			init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
499 			iser_conn->max_cmds =
500 				ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
501 		} else {
502 			init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
503 			iser_conn->max_cmds =
504 				ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
505 			iser_dbg("device %s supports max_send_wr %d\n",
506 				 device->ib_device->name, dev_attr->max_qp_wr);
507 		}
508 	}
509 
510 	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
511 	if (ret)
512 		goto out_err;
513 
514 	ib_conn->qp = ib_conn->cma_id->qp;
515 	iser_info("setting conn %p cma_id %p qp %p\n",
516 		  ib_conn, ib_conn->cma_id,
517 		  ib_conn->cma_id->qp);
518 	return ret;
519 
520 out_err:
521 	mutex_lock(&ig.connlist_mutex);
522 	ib_conn->comp->active_qps--;
523 	mutex_unlock(&ig.connlist_mutex);
524 	iser_err("unable to alloc mem or create resource, err %d\n", ret);
525 
526 	return ret;
527 }
528 
529 /**
530  * based on the resolved device node GUID see if there already allocated
531  * device for this device. If there's no such, create one.
532  */
533 static
534 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
535 {
536 	struct iser_device *device;
537 
538 	mutex_lock(&ig.device_list_mutex);
539 
540 	list_for_each_entry(device, &ig.device_list, ig_list)
541 		/* find if there's a match using the node GUID */
542 		if (device->ib_device->node_guid == cma_id->device->node_guid)
543 			goto inc_refcnt;
544 
545 	device = kzalloc(sizeof *device, GFP_KERNEL);
546 	if (device == NULL)
547 		goto out;
548 
549 	/* assign this device to the device */
550 	device->ib_device = cma_id->device;
551 	/* init the device and link it into ig device list */
552 	if (iser_create_device_ib_res(device)) {
553 		kfree(device);
554 		device = NULL;
555 		goto out;
556 	}
557 	list_add(&device->ig_list, &ig.device_list);
558 
559 inc_refcnt:
560 	device->refcount++;
561 out:
562 	mutex_unlock(&ig.device_list_mutex);
563 	return device;
564 }
565 
566 /* if there's no demand for this device, release it */
567 static void iser_device_try_release(struct iser_device *device)
568 {
569 	mutex_lock(&ig.device_list_mutex);
570 	device->refcount--;
571 	iser_info("device %p refcount %d\n", device, device->refcount);
572 	if (!device->refcount) {
573 		iser_free_device_ib_res(device);
574 		list_del(&device->ig_list);
575 		kfree(device);
576 	}
577 	mutex_unlock(&ig.device_list_mutex);
578 }
579 
580 /**
581  * Called with state mutex held
582  **/
583 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
584 				     enum iser_conn_state comp,
585 				     enum iser_conn_state exch)
586 {
587 	int ret;
588 
589 	ret = (iser_conn->state == comp);
590 	if (ret)
591 		iser_conn->state = exch;
592 
593 	return ret;
594 }
595 
596 void iser_release_work(struct work_struct *work)
597 {
598 	struct iser_conn *iser_conn;
599 
600 	iser_conn = container_of(work, struct iser_conn, release_work);
601 
602 	/* Wait for conn_stop to complete */
603 	wait_for_completion(&iser_conn->stop_completion);
604 	/* Wait for IB resouces cleanup to complete */
605 	wait_for_completion(&iser_conn->ib_completion);
606 
607 	mutex_lock(&iser_conn->state_mutex);
608 	iser_conn->state = ISER_CONN_DOWN;
609 	mutex_unlock(&iser_conn->state_mutex);
610 
611 	iser_conn_release(iser_conn);
612 }
613 
614 /**
615  * iser_free_ib_conn_res - release IB related resources
616  * @iser_conn: iser connection struct
617  * @destroy: indicator if we need to try to release the
618  *     iser device and memory regoins pool (only iscsi
619  *     shutdown and DEVICE_REMOVAL will use this).
620  *
621  * This routine is called with the iser state mutex held
622  * so the cm_id removal is out of here. It is Safe to
623  * be invoked multiple times.
624  */
625 static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
626 				  bool destroy)
627 {
628 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
629 	struct iser_device *device = ib_conn->device;
630 
631 	iser_info("freeing conn %p cma_id %p qp %p\n",
632 		  iser_conn, ib_conn->cma_id, ib_conn->qp);
633 
634 	if (ib_conn->qp != NULL) {
635 		ib_conn->comp->active_qps--;
636 		rdma_destroy_qp(ib_conn->cma_id);
637 		ib_conn->qp = NULL;
638 	}
639 
640 	if (destroy) {
641 		if (iser_conn->rx_descs)
642 			iser_free_rx_descriptors(iser_conn);
643 
644 		if (device != NULL) {
645 			iser_device_try_release(device);
646 			ib_conn->device = NULL;
647 		}
648 	}
649 }
650 
651 /**
652  * Frees all conn objects and deallocs conn descriptor
653  */
654 void iser_conn_release(struct iser_conn *iser_conn)
655 {
656 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
657 
658 	mutex_lock(&ig.connlist_mutex);
659 	list_del(&iser_conn->conn_list);
660 	mutex_unlock(&ig.connlist_mutex);
661 
662 	mutex_lock(&iser_conn->state_mutex);
663 	/* In case we endup here without ep_disconnect being invoked. */
664 	if (iser_conn->state != ISER_CONN_DOWN) {
665 		iser_warn("iser conn %p state %d, expected state down.\n",
666 			  iser_conn, iser_conn->state);
667 		iscsi_destroy_endpoint(iser_conn->ep);
668 		iser_conn->state = ISER_CONN_DOWN;
669 	}
670 	/*
671 	 * In case we never got to bind stage, we still need to
672 	 * release IB resources (which is safe to call more than once).
673 	 */
674 	iser_free_ib_conn_res(iser_conn, true);
675 	mutex_unlock(&iser_conn->state_mutex);
676 
677 	if (ib_conn->cma_id != NULL) {
678 		rdma_destroy_id(ib_conn->cma_id);
679 		ib_conn->cma_id = NULL;
680 	}
681 
682 	kfree(iser_conn);
683 }
684 
685 /**
686  * triggers start of the disconnect procedures and wait for them to be done
687  * Called with state mutex held
688  */
689 int iser_conn_terminate(struct iser_conn *iser_conn)
690 {
691 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
692 	struct ib_send_wr *bad_wr;
693 	int err = 0;
694 
695 	/* terminate the iser conn only if the conn state is UP */
696 	if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
697 				       ISER_CONN_TERMINATING))
698 		return 0;
699 
700 	iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
701 
702 	/* suspend queuing of new iscsi commands */
703 	if (iser_conn->iscsi_conn)
704 		iscsi_suspend_queue(iser_conn->iscsi_conn);
705 
706 	/*
707 	 * In case we didn't already clean up the cma_id (peer initiated
708 	 * a disconnection), we need to Cause the CMA to change the QP
709 	 * state to ERROR.
710 	 */
711 	if (ib_conn->cma_id) {
712 		err = rdma_disconnect(ib_conn->cma_id);
713 		if (err)
714 			iser_err("Failed to disconnect, conn: 0x%p err %d\n",
715 				 iser_conn, err);
716 
717 		/* post an indication that all flush errors were consumed */
718 		err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
719 		if (err) {
720 			iser_err("conn %p failed to post beacon", ib_conn);
721 			return 1;
722 		}
723 
724 		wait_for_completion(&ib_conn->flush_comp);
725 	}
726 
727 	return 1;
728 }
729 
730 /**
731  * Called with state mutex held
732  **/
733 static void iser_connect_error(struct rdma_cm_id *cma_id)
734 {
735 	struct iser_conn *iser_conn;
736 
737 	iser_conn = (struct iser_conn *)cma_id->context;
738 	iser_conn->state = ISER_CONN_TERMINATING;
739 }
740 
741 /**
742  * Called with state mutex held
743  **/
744 static void iser_addr_handler(struct rdma_cm_id *cma_id)
745 {
746 	struct iser_device *device;
747 	struct iser_conn   *iser_conn;
748 	struct ib_conn   *ib_conn;
749 	int    ret;
750 
751 	iser_conn = (struct iser_conn *)cma_id->context;
752 	if (iser_conn->state != ISER_CONN_PENDING)
753 		/* bailout */
754 		return;
755 
756 	ib_conn = &iser_conn->ib_conn;
757 	device = iser_device_find_by_ib_device(cma_id);
758 	if (!device) {
759 		iser_err("device lookup/creation failed\n");
760 		iser_connect_error(cma_id);
761 		return;
762 	}
763 
764 	ib_conn->device = device;
765 
766 	/* connection T10-PI support */
767 	if (iser_pi_enable) {
768 		if (!(device->dev_attr.device_cap_flags &
769 		      IB_DEVICE_SIGNATURE_HANDOVER)) {
770 			iser_warn("T10-PI requested but not supported on %s, "
771 				  "continue without T10-PI\n",
772 				  ib_conn->device->ib_device->name);
773 			ib_conn->pi_support = false;
774 		} else {
775 			ib_conn->pi_support = true;
776 		}
777 	}
778 
779 	ret = rdma_resolve_route(cma_id, 1000);
780 	if (ret) {
781 		iser_err("resolve route failed: %d\n", ret);
782 		iser_connect_error(cma_id);
783 		return;
784 	}
785 }
786 
787 /**
788  * Called with state mutex held
789  **/
790 static void iser_route_handler(struct rdma_cm_id *cma_id)
791 {
792 	struct rdma_conn_param conn_param;
793 	int    ret;
794 	struct iser_cm_hdr req_hdr;
795 	struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
796 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
797 	struct iser_device *device = ib_conn->device;
798 
799 	if (iser_conn->state != ISER_CONN_PENDING)
800 		/* bailout */
801 		return;
802 
803 	ret = iser_create_ib_conn_res(ib_conn);
804 	if (ret)
805 		goto failure;
806 
807 	memset(&conn_param, 0, sizeof conn_param);
808 	conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
809 	conn_param.initiator_depth     = 1;
810 	conn_param.retry_count	       = 7;
811 	conn_param.rnr_retry_count     = 6;
812 
813 	memset(&req_hdr, 0, sizeof(req_hdr));
814 	req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
815 			ISER_SEND_W_INV_NOT_SUPPORTED);
816 	conn_param.private_data		= (void *)&req_hdr;
817 	conn_param.private_data_len	= sizeof(struct iser_cm_hdr);
818 
819 	ret = rdma_connect(cma_id, &conn_param);
820 	if (ret) {
821 		iser_err("failure connecting: %d\n", ret);
822 		goto failure;
823 	}
824 
825 	return;
826 failure:
827 	iser_connect_error(cma_id);
828 }
829 
830 static void iser_connected_handler(struct rdma_cm_id *cma_id)
831 {
832 	struct iser_conn *iser_conn;
833 	struct ib_qp_attr attr;
834 	struct ib_qp_init_attr init_attr;
835 
836 	iser_conn = (struct iser_conn *)cma_id->context;
837 	if (iser_conn->state != ISER_CONN_PENDING)
838 		/* bailout */
839 		return;
840 
841 	(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
842 	iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
843 
844 	iser_conn->state = ISER_CONN_UP;
845 	complete(&iser_conn->up_completion);
846 }
847 
848 static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
849 {
850 	struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
851 
852 	if (iser_conn_terminate(iser_conn)) {
853 		if (iser_conn->iscsi_conn)
854 			iscsi_conn_failure(iser_conn->iscsi_conn,
855 					   ISCSI_ERR_CONN_FAILED);
856 		else
857 			iser_err("iscsi_iser connection isn't bound\n");
858 	}
859 }
860 
861 static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
862 				 bool destroy)
863 {
864 	struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
865 
866 	/*
867 	 * We are not guaranteed that we visited disconnected_handler
868 	 * by now, call it here to be safe that we handle CM drep
869 	 * and flush errors.
870 	 */
871 	iser_disconnected_handler(cma_id);
872 	iser_free_ib_conn_res(iser_conn, destroy);
873 	complete(&iser_conn->ib_completion);
874 };
875 
876 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
877 {
878 	struct iser_conn *iser_conn;
879 	int ret = 0;
880 
881 	iser_conn = (struct iser_conn *)cma_id->context;
882 	iser_info("%s (%d): status %d conn %p id %p\n",
883 		  rdma_event_msg(event->event), event->event,
884 		  event->status, cma_id->context, cma_id);
885 
886 	mutex_lock(&iser_conn->state_mutex);
887 	switch (event->event) {
888 	case RDMA_CM_EVENT_ADDR_RESOLVED:
889 		iser_addr_handler(cma_id);
890 		break;
891 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
892 		iser_route_handler(cma_id);
893 		break;
894 	case RDMA_CM_EVENT_ESTABLISHED:
895 		iser_connected_handler(cma_id);
896 		break;
897 	case RDMA_CM_EVENT_ADDR_ERROR:
898 	case RDMA_CM_EVENT_ROUTE_ERROR:
899 	case RDMA_CM_EVENT_CONNECT_ERROR:
900 	case RDMA_CM_EVENT_UNREACHABLE:
901 	case RDMA_CM_EVENT_REJECTED:
902 		iser_connect_error(cma_id);
903 		break;
904 	case RDMA_CM_EVENT_DISCONNECTED:
905 	case RDMA_CM_EVENT_ADDR_CHANGE:
906 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
907 		iser_cleanup_handler(cma_id, false);
908 		break;
909 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
910 		/*
911 		 * we *must* destroy the device as we cannot rely
912 		 * on iscsid to be around to initiate error handling.
913 		 * also if we are not in state DOWN implicitly destroy
914 		 * the cma_id.
915 		 */
916 		iser_cleanup_handler(cma_id, true);
917 		if (iser_conn->state != ISER_CONN_DOWN) {
918 			iser_conn->ib_conn.cma_id = NULL;
919 			ret = 1;
920 		}
921 		break;
922 	default:
923 		iser_err("Unexpected RDMA CM event: %s (%d)\n",
924 			 rdma_event_msg(event->event), event->event);
925 		break;
926 	}
927 	mutex_unlock(&iser_conn->state_mutex);
928 
929 	return ret;
930 }
931 
932 void iser_conn_init(struct iser_conn *iser_conn)
933 {
934 	iser_conn->state = ISER_CONN_INIT;
935 	iser_conn->ib_conn.post_recv_buf_count = 0;
936 	init_completion(&iser_conn->ib_conn.flush_comp);
937 	init_completion(&iser_conn->stop_completion);
938 	init_completion(&iser_conn->ib_completion);
939 	init_completion(&iser_conn->up_completion);
940 	INIT_LIST_HEAD(&iser_conn->conn_list);
941 	spin_lock_init(&iser_conn->ib_conn.lock);
942 	mutex_init(&iser_conn->state_mutex);
943 }
944 
945  /**
946  * starts the process of connecting to the target
947  * sleeps until the connection is established or rejected
948  */
949 int iser_connect(struct iser_conn   *iser_conn,
950 		 struct sockaddr    *src_addr,
951 		 struct sockaddr    *dst_addr,
952 		 int                 non_blocking)
953 {
954 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
955 	int err = 0;
956 
957 	mutex_lock(&iser_conn->state_mutex);
958 
959 	sprintf(iser_conn->name, "%pISp", dst_addr);
960 
961 	iser_info("connecting to: %s\n", iser_conn->name);
962 
963 	/* the device is known only --after-- address resolution */
964 	ib_conn->device = NULL;
965 
966 	iser_conn->state = ISER_CONN_PENDING;
967 
968 	ib_conn->beacon.wr_id = ISER_BEACON_WRID;
969 	ib_conn->beacon.opcode = IB_WR_SEND;
970 
971 	ib_conn->cma_id = rdma_create_id(iser_cma_handler,
972 					 (void *)iser_conn,
973 					 RDMA_PS_TCP, IB_QPT_RC);
974 	if (IS_ERR(ib_conn->cma_id)) {
975 		err = PTR_ERR(ib_conn->cma_id);
976 		iser_err("rdma_create_id failed: %d\n", err);
977 		goto id_failure;
978 	}
979 
980 	err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
981 	if (err) {
982 		iser_err("rdma_resolve_addr failed: %d\n", err);
983 		goto addr_failure;
984 	}
985 
986 	if (!non_blocking) {
987 		wait_for_completion_interruptible(&iser_conn->up_completion);
988 
989 		if (iser_conn->state != ISER_CONN_UP) {
990 			err =  -EIO;
991 			goto connect_failure;
992 		}
993 	}
994 	mutex_unlock(&iser_conn->state_mutex);
995 
996 	mutex_lock(&ig.connlist_mutex);
997 	list_add(&iser_conn->conn_list, &ig.connlist);
998 	mutex_unlock(&ig.connlist_mutex);
999 	return 0;
1000 
1001 id_failure:
1002 	ib_conn->cma_id = NULL;
1003 addr_failure:
1004 	iser_conn->state = ISER_CONN_DOWN;
1005 connect_failure:
1006 	mutex_unlock(&iser_conn->state_mutex);
1007 	iser_conn_release(iser_conn);
1008 	return err;
1009 }
1010 
1011 int iser_post_recvl(struct iser_conn *iser_conn)
1012 {
1013 	struct ib_recv_wr rx_wr, *rx_wr_failed;
1014 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
1015 	struct ib_sge	  sge;
1016 	int ib_ret;
1017 
1018 	sge.addr   = iser_conn->login_resp_dma;
1019 	sge.length = ISER_RX_LOGIN_SIZE;
1020 	sge.lkey   = ib_conn->device->mr->lkey;
1021 
1022 	rx_wr.wr_id   = (uintptr_t)iser_conn->login_resp_buf;
1023 	rx_wr.sg_list = &sge;
1024 	rx_wr.num_sge = 1;
1025 	rx_wr.next    = NULL;
1026 
1027 	ib_conn->post_recv_buf_count++;
1028 	ib_ret	= ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
1029 	if (ib_ret) {
1030 		iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1031 		ib_conn->post_recv_buf_count--;
1032 	}
1033 	return ib_ret;
1034 }
1035 
1036 int iser_post_recvm(struct iser_conn *iser_conn, int count)
1037 {
1038 	struct ib_recv_wr *rx_wr, *rx_wr_failed;
1039 	int i, ib_ret;
1040 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
1041 	unsigned int my_rx_head = iser_conn->rx_desc_head;
1042 	struct iser_rx_desc *rx_desc;
1043 
1044 	for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
1045 		rx_desc		= &iser_conn->rx_descs[my_rx_head];
1046 		rx_wr->wr_id	= (uintptr_t)rx_desc;
1047 		rx_wr->sg_list	= &rx_desc->rx_sg;
1048 		rx_wr->num_sge	= 1;
1049 		rx_wr->next	= rx_wr + 1;
1050 		my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
1051 	}
1052 
1053 	rx_wr--;
1054 	rx_wr->next = NULL; /* mark end of work requests list */
1055 
1056 	ib_conn->post_recv_buf_count += count;
1057 	ib_ret	= ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
1058 	if (ib_ret) {
1059 		iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1060 		ib_conn->post_recv_buf_count -= count;
1061 	} else
1062 		iser_conn->rx_desc_head = my_rx_head;
1063 	return ib_ret;
1064 }
1065 
1066 
1067 /**
1068  * iser_start_send - Initiate a Send DTO operation
1069  *
1070  * returns 0 on success, -1 on failure
1071  */
1072 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
1073 		   bool signal)
1074 {
1075 	int		  ib_ret;
1076 	struct ib_send_wr send_wr, *send_wr_failed;
1077 
1078 	ib_dma_sync_single_for_device(ib_conn->device->ib_device,
1079 				      tx_desc->dma_addr, ISER_HEADERS_LEN,
1080 				      DMA_TO_DEVICE);
1081 
1082 	send_wr.next	   = NULL;
1083 	send_wr.wr_id	   = (uintptr_t)tx_desc;
1084 	send_wr.sg_list	   = tx_desc->tx_sg;
1085 	send_wr.num_sge	   = tx_desc->num_sge;
1086 	send_wr.opcode	   = IB_WR_SEND;
1087 	send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
1088 
1089 	ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
1090 	if (ib_ret)
1091 		iser_err("ib_post_send failed, ret:%d\n", ib_ret);
1092 
1093 	return ib_ret;
1094 }
1095 
1096 /**
1097  * is_iser_tx_desc - Indicate if the completion wr_id
1098  *     is a TX descriptor or not.
1099  * @iser_conn: iser connection
1100  * @wr_id: completion WR identifier
1101  *
1102  * Since we cannot rely on wc opcode in FLUSH errors
1103  * we must work around it by checking if the wr_id address
1104  * falls in the iser connection rx_descs buffer. If so
1105  * it is an RX descriptor, otherwize it is a TX.
1106  */
1107 static inline bool
1108 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
1109 {
1110 	void *start = iser_conn->rx_descs;
1111 	int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
1112 
1113 	if (wr_id >= start && wr_id < start + len)
1114 		return false;
1115 
1116 	return true;
1117 }
1118 
1119 /**
1120  * iser_handle_comp_error() - Handle error completion
1121  * @ib_conn:   connection RDMA resources
1122  * @wc:        work completion
1123  *
1124  * Notes: We may handle a FLUSH error completion and in this case
1125  *        we only cleanup in case TX type was DATAOUT. For non-FLUSH
1126  *        error completion we should also notify iscsi layer that
1127  *        connection is failed (in case we passed bind stage).
1128  */
1129 static void
1130 iser_handle_comp_error(struct ib_conn *ib_conn,
1131 		       struct ib_wc *wc)
1132 {
1133 	void *wr_id = (void *)(uintptr_t)wc->wr_id;
1134 	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
1135 						   ib_conn);
1136 
1137 	if (wc->status != IB_WC_WR_FLUSH_ERR)
1138 		if (iser_conn->iscsi_conn)
1139 			iscsi_conn_failure(iser_conn->iscsi_conn,
1140 					   ISCSI_ERR_CONN_FAILED);
1141 
1142 	if (wc->wr_id == ISER_FASTREG_LI_WRID)
1143 		return;
1144 
1145 	if (is_iser_tx_desc(iser_conn, wr_id)) {
1146 		struct iser_tx_desc *desc = wr_id;
1147 
1148 		if (desc->type == ISCSI_TX_DATAOUT)
1149 			kmem_cache_free(ig.desc_cache, desc);
1150 	} else {
1151 		ib_conn->post_recv_buf_count--;
1152 	}
1153 }
1154 
1155 /**
1156  * iser_handle_wc - handle a single work completion
1157  * @wc: work completion
1158  *
1159  * Soft-IRQ context, work completion can be either
1160  * SEND or RECV, and can turn out successful or
1161  * with error (or flush error).
1162  */
1163 static void iser_handle_wc(struct ib_wc *wc)
1164 {
1165 	struct ib_conn *ib_conn;
1166 	struct iser_tx_desc *tx_desc;
1167 	struct iser_rx_desc *rx_desc;
1168 
1169 	ib_conn = wc->qp->qp_context;
1170 	if (likely(wc->status == IB_WC_SUCCESS)) {
1171 		if (wc->opcode == IB_WC_RECV) {
1172 			rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
1173 			iser_rcv_completion(rx_desc, wc->byte_len,
1174 					    ib_conn);
1175 		} else
1176 		if (wc->opcode == IB_WC_SEND) {
1177 			tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
1178 			iser_snd_completion(tx_desc, ib_conn);
1179 		} else {
1180 			iser_err("Unknown wc opcode %d\n", wc->opcode);
1181 		}
1182 	} else {
1183 		if (wc->status != IB_WC_WR_FLUSH_ERR)
1184 			iser_err("%s (%d): wr id %llx vend_err %x\n",
1185 				 ib_wc_status_msg(wc->status), wc->status,
1186 				 wc->wr_id, wc->vendor_err);
1187 		else
1188 			iser_dbg("%s (%d): wr id %llx\n",
1189 				 ib_wc_status_msg(wc->status), wc->status,
1190 				 wc->wr_id);
1191 
1192 		if (wc->wr_id == ISER_BEACON_WRID)
1193 			/* all flush errors were consumed */
1194 			complete(&ib_conn->flush_comp);
1195 		else
1196 			iser_handle_comp_error(ib_conn, wc);
1197 	}
1198 }
1199 
1200 /**
1201  * iser_cq_tasklet_fn - iSER completion polling loop
1202  * @data: iSER completion context
1203  *
1204  * Soft-IRQ context, polling connection CQ until
1205  * either CQ was empty or we exausted polling budget
1206  */
1207 static void iser_cq_tasklet_fn(unsigned long data)
1208 {
1209 	struct iser_comp *comp = (struct iser_comp *)data;
1210 	struct ib_cq *cq = comp->cq;
1211 	struct ib_wc *const wcs = comp->wcs;
1212 	int i, n, completed = 0;
1213 
1214 	while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
1215 		for (i = 0; i < n; i++)
1216 			iser_handle_wc(&wcs[i]);
1217 
1218 		completed += n;
1219 		if (completed >= iser_cq_poll_limit)
1220 			break;
1221 	}
1222 
1223 	/*
1224 	 * It is assumed here that arming CQ only once its empty
1225 	 * would not cause interrupts to be missed.
1226 	 */
1227 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1228 
1229 	iser_dbg("got %d completions\n", completed);
1230 }
1231 
1232 static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
1233 {
1234 	struct iser_comp *comp = cq_context;
1235 
1236 	tasklet_schedule(&comp->tasklet);
1237 }
1238 
1239 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
1240 			     enum iser_data_dir cmd_dir, sector_t *sector)
1241 {
1242 	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
1243 	struct fast_reg_descriptor *desc = reg->mem_h;
1244 	unsigned long sector_size = iser_task->sc->device->sector_size;
1245 	struct ib_mr_status mr_status;
1246 	int ret;
1247 
1248 	if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) {
1249 		desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
1250 		ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
1251 					 IB_MR_CHECK_SIG_STATUS, &mr_status);
1252 		if (ret) {
1253 			pr_err("ib_check_mr_status failed, ret %d\n", ret);
1254 			goto err;
1255 		}
1256 
1257 		if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
1258 			sector_t sector_off = mr_status.sig_err.sig_err_offset;
1259 
1260 			do_div(sector_off, sector_size + 8);
1261 			*sector = scsi_get_lba(iser_task->sc) + sector_off;
1262 
1263 			pr_err("PI error found type %d at sector %llx "
1264 			       "expected %x vs actual %x\n",
1265 			       mr_status.sig_err.err_type,
1266 			       (unsigned long long)*sector,
1267 			       mr_status.sig_err.expected,
1268 			       mr_status.sig_err.actual);
1269 
1270 			switch (mr_status.sig_err.err_type) {
1271 			case IB_SIG_BAD_GUARD:
1272 				return 0x1;
1273 			case IB_SIG_BAD_REFTAG:
1274 				return 0x3;
1275 			case IB_SIG_BAD_APPTAG:
1276 				return 0x2;
1277 			}
1278 		}
1279 	}
1280 
1281 	return 0;
1282 err:
1283 	/* Not alot we can do here, return ambiguous guard error */
1284 	return 0x1;
1285 }
1286