xref: /linux/drivers/infiniband/ulp/srp/ib_srp.c (revision cf2f33a4e54096f90652cca3511fd6a456ea5abe)
1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
44 
45 #include <linux/atomic.h>
46 
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
53 
54 #include "ib_srp.h"
55 
56 #define DRV_NAME	"ib_srp"
57 #define PFX		DRV_NAME ": "
58 #define DRV_VERSION	"1.0"
59 #define DRV_RELDATE	"July 1, 2013"
60 
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
66 
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr;
72 static bool register_always;
73 static int topspin_workarounds = 1;
74 
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
77 
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80 		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
81 
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84 		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
85 
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88 		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
89 
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
93 
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
97 
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100 		 "Use memory registration even for contiguous memory regions");
101 
102 static const struct kernel_param_ops srp_tmo_ops;
103 
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
106 		S_IRUGO | S_IWUSR);
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
108 
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
111 		S_IRUGO | S_IWUSR);
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113 		 "Number of seconds between the observation of a transport"
114 		 " layer error and failing all I/O. \"off\" means that this"
115 		 " functionality is disabled.");
116 
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
119 		S_IRUGO | S_IWUSR);
120 MODULE_PARM_DESC(dev_loss_tmo,
121 		 "Maximum number of seconds that the SRP transport should"
122 		 " insulate transport layer errors. After this time has been"
123 		 " exceeded the SCSI host is removed. Should be"
124 		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125 		 " if fast_io_fail_tmo has not been set. \"off\" means that"
126 		 " this functionality is disabled.");
127 
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131 		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
132 
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
138 
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
141 
142 static struct ib_client srp_client = {
143 	.name   = "srp",
144 	.add    = srp_add_one,
145 	.remove = srp_remove_one
146 };
147 
148 static struct ib_sa_client srp_sa_client;
149 
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
151 {
152 	int tmo = *(int *)kp->arg;
153 
154 	if (tmo >= 0)
155 		return sprintf(buffer, "%d", tmo);
156 	else
157 		return sprintf(buffer, "off");
158 }
159 
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
161 {
162 	int tmo, res;
163 
164 	res = srp_parse_tmo(&tmo, val);
165 	if (res)
166 		goto out;
167 
168 	if (kp->arg == &srp_reconnect_delay)
169 		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
170 				    srp_dev_loss_tmo);
171 	else if (kp->arg == &srp_fast_io_fail_tmo)
172 		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
173 	else
174 		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
175 				    tmo);
176 	if (res)
177 		goto out;
178 	*(int *)kp->arg = tmo;
179 
180 out:
181 	return res;
182 }
183 
184 static const struct kernel_param_ops srp_tmo_ops = {
185 	.get = srp_tmo_get,
186 	.set = srp_tmo_set,
187 };
188 
189 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
190 {
191 	return (struct srp_target_port *) host->hostdata;
192 }
193 
194 static const char *srp_target_info(struct Scsi_Host *host)
195 {
196 	return host_to_target(host)->target_name;
197 }
198 
199 static int srp_target_is_topspin(struct srp_target_port *target)
200 {
201 	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
202 	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
203 
204 	return topspin_workarounds &&
205 		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
206 		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
207 }
208 
209 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
210 				   gfp_t gfp_mask,
211 				   enum dma_data_direction direction)
212 {
213 	struct srp_iu *iu;
214 
215 	iu = kmalloc(sizeof *iu, gfp_mask);
216 	if (!iu)
217 		goto out;
218 
219 	iu->buf = kzalloc(size, gfp_mask);
220 	if (!iu->buf)
221 		goto out_free_iu;
222 
223 	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
224 				    direction);
225 	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
226 		goto out_free_buf;
227 
228 	iu->size      = size;
229 	iu->direction = direction;
230 
231 	return iu;
232 
233 out_free_buf:
234 	kfree(iu->buf);
235 out_free_iu:
236 	kfree(iu);
237 out:
238 	return NULL;
239 }
240 
241 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
242 {
243 	if (!iu)
244 		return;
245 
246 	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
247 			    iu->direction);
248 	kfree(iu->buf);
249 	kfree(iu);
250 }
251 
252 static void srp_qp_event(struct ib_event *event, void *context)
253 {
254 	pr_debug("QP event %s (%d)\n",
255 		 ib_event_msg(event->event), event->event);
256 }
257 
258 static int srp_init_qp(struct srp_target_port *target,
259 		       struct ib_qp *qp)
260 {
261 	struct ib_qp_attr *attr;
262 	int ret;
263 
264 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
265 	if (!attr)
266 		return -ENOMEM;
267 
268 	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269 				  target->srp_host->port,
270 				  be16_to_cpu(target->pkey),
271 				  &attr->pkey_index);
272 	if (ret)
273 		goto out;
274 
275 	attr->qp_state        = IB_QPS_INIT;
276 	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277 				    IB_ACCESS_REMOTE_WRITE);
278 	attr->port_num        = target->srp_host->port;
279 
280 	ret = ib_modify_qp(qp, attr,
281 			   IB_QP_STATE		|
282 			   IB_QP_PKEY_INDEX	|
283 			   IB_QP_ACCESS_FLAGS	|
284 			   IB_QP_PORT);
285 
286 out:
287 	kfree(attr);
288 	return ret;
289 }
290 
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
292 {
293 	struct srp_target_port *target = ch->target;
294 	struct ib_cm_id *new_cm_id;
295 
296 	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
297 				    srp_cm_handler, ch);
298 	if (IS_ERR(new_cm_id))
299 		return PTR_ERR(new_cm_id);
300 
301 	if (ch->cm_id)
302 		ib_destroy_cm_id(ch->cm_id);
303 	ch->cm_id = new_cm_id;
304 	ch->path.sgid = target->sgid;
305 	ch->path.dgid = target->orig_dgid;
306 	ch->path.pkey = target->pkey;
307 	ch->path.service_id = target->service_id;
308 
309 	return 0;
310 }
311 
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
313 {
314 	struct srp_device *dev = target->srp_host->srp_dev;
315 	struct ib_fmr_pool_param fmr_param;
316 
317 	memset(&fmr_param, 0, sizeof(fmr_param));
318 	fmr_param.pool_size	    = target->scsi_host->can_queue;
319 	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
320 	fmr_param.cache		    = 1;
321 	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322 	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
323 	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
324 				       IB_ACCESS_REMOTE_WRITE |
325 				       IB_ACCESS_REMOTE_READ);
326 
327 	return ib_create_fmr_pool(dev->pd, &fmr_param);
328 }
329 
330 /**
331  * srp_destroy_fr_pool() - free the resources owned by a pool
332  * @pool: Fast registration pool to be destroyed.
333  */
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
335 {
336 	int i;
337 	struct srp_fr_desc *d;
338 
339 	if (!pool)
340 		return;
341 
342 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
343 		if (d->frpl)
344 			ib_free_fast_reg_page_list(d->frpl);
345 		if (d->mr)
346 			ib_dereg_mr(d->mr);
347 	}
348 	kfree(pool);
349 }
350 
351 /**
352  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
353  * @device:            IB device to allocate fast registration descriptors for.
354  * @pd:                Protection domain associated with the FR descriptors.
355  * @pool_size:         Number of descriptors to allocate.
356  * @max_page_list_len: Maximum fast registration work request page list length.
357  */
358 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
359 					      struct ib_pd *pd, int pool_size,
360 					      int max_page_list_len)
361 {
362 	struct srp_fr_pool *pool;
363 	struct srp_fr_desc *d;
364 	struct ib_mr *mr;
365 	struct ib_fast_reg_page_list *frpl;
366 	int i, ret = -EINVAL;
367 
368 	if (pool_size <= 0)
369 		goto err;
370 	ret = -ENOMEM;
371 	pool = kzalloc(sizeof(struct srp_fr_pool) +
372 		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
373 	if (!pool)
374 		goto err;
375 	pool->size = pool_size;
376 	pool->max_page_list_len = max_page_list_len;
377 	spin_lock_init(&pool->lock);
378 	INIT_LIST_HEAD(&pool->free_list);
379 
380 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
381 		mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
382 		if (IS_ERR(mr)) {
383 			ret = PTR_ERR(mr);
384 			goto destroy_pool;
385 		}
386 		d->mr = mr;
387 		frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
388 		if (IS_ERR(frpl)) {
389 			ret = PTR_ERR(frpl);
390 			goto destroy_pool;
391 		}
392 		d->frpl = frpl;
393 		list_add_tail(&d->entry, &pool->free_list);
394 	}
395 
396 out:
397 	return pool;
398 
399 destroy_pool:
400 	srp_destroy_fr_pool(pool);
401 
402 err:
403 	pool = ERR_PTR(ret);
404 	goto out;
405 }
406 
407 /**
408  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
409  * @pool: Pool to obtain descriptor from.
410  */
411 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
412 {
413 	struct srp_fr_desc *d = NULL;
414 	unsigned long flags;
415 
416 	spin_lock_irqsave(&pool->lock, flags);
417 	if (!list_empty(&pool->free_list)) {
418 		d = list_first_entry(&pool->free_list, typeof(*d), entry);
419 		list_del(&d->entry);
420 	}
421 	spin_unlock_irqrestore(&pool->lock, flags);
422 
423 	return d;
424 }
425 
426 /**
427  * srp_fr_pool_put() - put an FR descriptor back in the free list
428  * @pool: Pool the descriptor was allocated from.
429  * @desc: Pointer to an array of fast registration descriptor pointers.
430  * @n:    Number of descriptors to put back.
431  *
432  * Note: The caller must already have queued an invalidation request for
433  * desc->mr->rkey before calling this function.
434  */
435 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
436 			    int n)
437 {
438 	unsigned long flags;
439 	int i;
440 
441 	spin_lock_irqsave(&pool->lock, flags);
442 	for (i = 0; i < n; i++)
443 		list_add(&desc[i]->entry, &pool->free_list);
444 	spin_unlock_irqrestore(&pool->lock, flags);
445 }
446 
447 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
448 {
449 	struct srp_device *dev = target->srp_host->srp_dev;
450 
451 	return srp_create_fr_pool(dev->dev, dev->pd,
452 				  target->scsi_host->can_queue,
453 				  dev->max_pages_per_mr);
454 }
455 
456 /**
457  * srp_destroy_qp() - destroy an RDMA queue pair
458  * @ch: SRP RDMA channel.
459  *
460  * Change a queue pair into the error state and wait until all receive
461  * completions have been processed before destroying it. This avoids that
462  * the receive completion handler can access the queue pair while it is
463  * being destroyed.
464  */
465 static void srp_destroy_qp(struct srp_rdma_ch *ch)
466 {
467 	static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
468 	static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
469 	struct ib_recv_wr *bad_wr;
470 	int ret;
471 
472 	/* Destroying a QP and reusing ch->done is only safe if not connected */
473 	WARN_ON_ONCE(ch->connected);
474 
475 	ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
476 	WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
477 	if (ret)
478 		goto out;
479 
480 	init_completion(&ch->done);
481 	ret = ib_post_recv(ch->qp, &wr, &bad_wr);
482 	WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
483 	if (ret == 0)
484 		wait_for_completion(&ch->done);
485 
486 out:
487 	ib_destroy_qp(ch->qp);
488 }
489 
490 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
491 {
492 	struct srp_target_port *target = ch->target;
493 	struct srp_device *dev = target->srp_host->srp_dev;
494 	struct ib_qp_init_attr *init_attr;
495 	struct ib_cq *recv_cq, *send_cq;
496 	struct ib_qp *qp;
497 	struct ib_fmr_pool *fmr_pool = NULL;
498 	struct srp_fr_pool *fr_pool = NULL;
499 	const int m = 1 + dev->use_fast_reg;
500 	struct ib_cq_init_attr cq_attr = {};
501 	int ret;
502 
503 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
504 	if (!init_attr)
505 		return -ENOMEM;
506 
507 	/* + 1 for SRP_LAST_WR_ID */
508 	cq_attr.cqe = target->queue_size + 1;
509 	cq_attr.comp_vector = ch->comp_vector;
510 	recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
511 			       &cq_attr);
512 	if (IS_ERR(recv_cq)) {
513 		ret = PTR_ERR(recv_cq);
514 		goto err;
515 	}
516 
517 	cq_attr.cqe = m * target->queue_size;
518 	cq_attr.comp_vector = ch->comp_vector;
519 	send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
520 			       &cq_attr);
521 	if (IS_ERR(send_cq)) {
522 		ret = PTR_ERR(send_cq);
523 		goto err_recv_cq;
524 	}
525 
526 	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
527 
528 	init_attr->event_handler       = srp_qp_event;
529 	init_attr->cap.max_send_wr     = m * target->queue_size;
530 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
531 	init_attr->cap.max_recv_sge    = 1;
532 	init_attr->cap.max_send_sge    = 1;
533 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
534 	init_attr->qp_type             = IB_QPT_RC;
535 	init_attr->send_cq             = send_cq;
536 	init_attr->recv_cq             = recv_cq;
537 
538 	qp = ib_create_qp(dev->pd, init_attr);
539 	if (IS_ERR(qp)) {
540 		ret = PTR_ERR(qp);
541 		goto err_send_cq;
542 	}
543 
544 	ret = srp_init_qp(target, qp);
545 	if (ret)
546 		goto err_qp;
547 
548 	if (dev->use_fast_reg && dev->has_fr) {
549 		fr_pool = srp_alloc_fr_pool(target);
550 		if (IS_ERR(fr_pool)) {
551 			ret = PTR_ERR(fr_pool);
552 			shost_printk(KERN_WARNING, target->scsi_host, PFX
553 				     "FR pool allocation failed (%d)\n", ret);
554 			goto err_qp;
555 		}
556 		if (ch->fr_pool)
557 			srp_destroy_fr_pool(ch->fr_pool);
558 		ch->fr_pool = fr_pool;
559 	} else if (!dev->use_fast_reg && dev->has_fmr) {
560 		fmr_pool = srp_alloc_fmr_pool(target);
561 		if (IS_ERR(fmr_pool)) {
562 			ret = PTR_ERR(fmr_pool);
563 			shost_printk(KERN_WARNING, target->scsi_host, PFX
564 				     "FMR pool allocation failed (%d)\n", ret);
565 			goto err_qp;
566 		}
567 		if (ch->fmr_pool)
568 			ib_destroy_fmr_pool(ch->fmr_pool);
569 		ch->fmr_pool = fmr_pool;
570 	}
571 
572 	if (ch->qp)
573 		srp_destroy_qp(ch);
574 	if (ch->recv_cq)
575 		ib_destroy_cq(ch->recv_cq);
576 	if (ch->send_cq)
577 		ib_destroy_cq(ch->send_cq);
578 
579 	ch->qp = qp;
580 	ch->recv_cq = recv_cq;
581 	ch->send_cq = send_cq;
582 
583 	kfree(init_attr);
584 	return 0;
585 
586 err_qp:
587 	ib_destroy_qp(qp);
588 
589 err_send_cq:
590 	ib_destroy_cq(send_cq);
591 
592 err_recv_cq:
593 	ib_destroy_cq(recv_cq);
594 
595 err:
596 	kfree(init_attr);
597 	return ret;
598 }
599 
600 /*
601  * Note: this function may be called without srp_alloc_iu_bufs() having been
602  * invoked. Hence the ch->[rt]x_ring checks.
603  */
604 static void srp_free_ch_ib(struct srp_target_port *target,
605 			   struct srp_rdma_ch *ch)
606 {
607 	struct srp_device *dev = target->srp_host->srp_dev;
608 	int i;
609 
610 	if (!ch->target)
611 		return;
612 
613 	if (ch->cm_id) {
614 		ib_destroy_cm_id(ch->cm_id);
615 		ch->cm_id = NULL;
616 	}
617 
618 	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
619 	if (!ch->qp)
620 		return;
621 
622 	if (dev->use_fast_reg) {
623 		if (ch->fr_pool)
624 			srp_destroy_fr_pool(ch->fr_pool);
625 	} else {
626 		if (ch->fmr_pool)
627 			ib_destroy_fmr_pool(ch->fmr_pool);
628 	}
629 	srp_destroy_qp(ch);
630 	ib_destroy_cq(ch->send_cq);
631 	ib_destroy_cq(ch->recv_cq);
632 
633 	/*
634 	 * Avoid that the SCSI error handler tries to use this channel after
635 	 * it has been freed. The SCSI error handler can namely continue
636 	 * trying to perform recovery actions after scsi_remove_host()
637 	 * returned.
638 	 */
639 	ch->target = NULL;
640 
641 	ch->qp = NULL;
642 	ch->send_cq = ch->recv_cq = NULL;
643 
644 	if (ch->rx_ring) {
645 		for (i = 0; i < target->queue_size; ++i)
646 			srp_free_iu(target->srp_host, ch->rx_ring[i]);
647 		kfree(ch->rx_ring);
648 		ch->rx_ring = NULL;
649 	}
650 	if (ch->tx_ring) {
651 		for (i = 0; i < target->queue_size; ++i)
652 			srp_free_iu(target->srp_host, ch->tx_ring[i]);
653 		kfree(ch->tx_ring);
654 		ch->tx_ring = NULL;
655 	}
656 }
657 
658 static void srp_path_rec_completion(int status,
659 				    struct ib_sa_path_rec *pathrec,
660 				    void *ch_ptr)
661 {
662 	struct srp_rdma_ch *ch = ch_ptr;
663 	struct srp_target_port *target = ch->target;
664 
665 	ch->status = status;
666 	if (status)
667 		shost_printk(KERN_ERR, target->scsi_host,
668 			     PFX "Got failed path rec status %d\n", status);
669 	else
670 		ch->path = *pathrec;
671 	complete(&ch->done);
672 }
673 
674 static int srp_lookup_path(struct srp_rdma_ch *ch)
675 {
676 	struct srp_target_port *target = ch->target;
677 	int ret;
678 
679 	ch->path.numb_path = 1;
680 
681 	init_completion(&ch->done);
682 
683 	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
684 					       target->srp_host->srp_dev->dev,
685 					       target->srp_host->port,
686 					       &ch->path,
687 					       IB_SA_PATH_REC_SERVICE_ID |
688 					       IB_SA_PATH_REC_DGID	 |
689 					       IB_SA_PATH_REC_SGID	 |
690 					       IB_SA_PATH_REC_NUMB_PATH	 |
691 					       IB_SA_PATH_REC_PKEY,
692 					       SRP_PATH_REC_TIMEOUT_MS,
693 					       GFP_KERNEL,
694 					       srp_path_rec_completion,
695 					       ch, &ch->path_query);
696 	if (ch->path_query_id < 0)
697 		return ch->path_query_id;
698 
699 	ret = wait_for_completion_interruptible(&ch->done);
700 	if (ret < 0)
701 		return ret;
702 
703 	if (ch->status < 0)
704 		shost_printk(KERN_WARNING, target->scsi_host,
705 			     PFX "Path record query failed\n");
706 
707 	return ch->status;
708 }
709 
710 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
711 {
712 	struct srp_target_port *target = ch->target;
713 	struct {
714 		struct ib_cm_req_param param;
715 		struct srp_login_req   priv;
716 	} *req = NULL;
717 	int status;
718 
719 	req = kzalloc(sizeof *req, GFP_KERNEL);
720 	if (!req)
721 		return -ENOMEM;
722 
723 	req->param.primary_path		      = &ch->path;
724 	req->param.alternate_path 	      = NULL;
725 	req->param.service_id 		      = target->service_id;
726 	req->param.qp_num		      = ch->qp->qp_num;
727 	req->param.qp_type		      = ch->qp->qp_type;
728 	req->param.private_data 	      = &req->priv;
729 	req->param.private_data_len 	      = sizeof req->priv;
730 	req->param.flow_control 	      = 1;
731 
732 	get_random_bytes(&req->param.starting_psn, 4);
733 	req->param.starting_psn 	     &= 0xffffff;
734 
735 	/*
736 	 * Pick some arbitrary defaults here; we could make these
737 	 * module parameters if anyone cared about setting them.
738 	 */
739 	req->param.responder_resources	      = 4;
740 	req->param.remote_cm_response_timeout = 20;
741 	req->param.local_cm_response_timeout  = 20;
742 	req->param.retry_count                = target->tl_retry_count;
743 	req->param.rnr_retry_count 	      = 7;
744 	req->param.max_cm_retries 	      = 15;
745 
746 	req->priv.opcode     	= SRP_LOGIN_REQ;
747 	req->priv.tag        	= 0;
748 	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
749 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
750 					      SRP_BUF_FORMAT_INDIRECT);
751 	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
752 				   SRP_MULTICHAN_SINGLE);
753 	/*
754 	 * In the published SRP specification (draft rev. 16a), the
755 	 * port identifier format is 8 bytes of ID extension followed
756 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
757 	 * opposite order, so that the GUID comes first.
758 	 *
759 	 * Targets conforming to these obsolete drafts can be
760 	 * recognized by the I/O Class they report.
761 	 */
762 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
763 		memcpy(req->priv.initiator_port_id,
764 		       &target->sgid.global.interface_id, 8);
765 		memcpy(req->priv.initiator_port_id + 8,
766 		       &target->initiator_ext, 8);
767 		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
768 		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
769 	} else {
770 		memcpy(req->priv.initiator_port_id,
771 		       &target->initiator_ext, 8);
772 		memcpy(req->priv.initiator_port_id + 8,
773 		       &target->sgid.global.interface_id, 8);
774 		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
775 		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
776 	}
777 
778 	/*
779 	 * Topspin/Cisco SRP targets will reject our login unless we
780 	 * zero out the first 8 bytes of our initiator port ID and set
781 	 * the second 8 bytes to the local node GUID.
782 	 */
783 	if (srp_target_is_topspin(target)) {
784 		shost_printk(KERN_DEBUG, target->scsi_host,
785 			     PFX "Topspin/Cisco initiator port ID workaround "
786 			     "activated for target GUID %016llx\n",
787 			     be64_to_cpu(target->ioc_guid));
788 		memset(req->priv.initiator_port_id, 0, 8);
789 		memcpy(req->priv.initiator_port_id + 8,
790 		       &target->srp_host->srp_dev->dev->node_guid, 8);
791 	}
792 
793 	status = ib_send_cm_req(ch->cm_id, &req->param);
794 
795 	kfree(req);
796 
797 	return status;
798 }
799 
800 static bool srp_queue_remove_work(struct srp_target_port *target)
801 {
802 	bool changed = false;
803 
804 	spin_lock_irq(&target->lock);
805 	if (target->state != SRP_TARGET_REMOVED) {
806 		target->state = SRP_TARGET_REMOVED;
807 		changed = true;
808 	}
809 	spin_unlock_irq(&target->lock);
810 
811 	if (changed)
812 		queue_work(srp_remove_wq, &target->remove_work);
813 
814 	return changed;
815 }
816 
817 static void srp_disconnect_target(struct srp_target_port *target)
818 {
819 	struct srp_rdma_ch *ch;
820 	int i;
821 
822 	/* XXX should send SRP_I_LOGOUT request */
823 
824 	for (i = 0; i < target->ch_count; i++) {
825 		ch = &target->ch[i];
826 		ch->connected = false;
827 		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
828 			shost_printk(KERN_DEBUG, target->scsi_host,
829 				     PFX "Sending CM DREQ failed\n");
830 		}
831 	}
832 }
833 
834 static void srp_free_req_data(struct srp_target_port *target,
835 			      struct srp_rdma_ch *ch)
836 {
837 	struct srp_device *dev = target->srp_host->srp_dev;
838 	struct ib_device *ibdev = dev->dev;
839 	struct srp_request *req;
840 	int i;
841 
842 	if (!ch->req_ring)
843 		return;
844 
845 	for (i = 0; i < target->req_ring_size; ++i) {
846 		req = &ch->req_ring[i];
847 		if (dev->use_fast_reg)
848 			kfree(req->fr_list);
849 		else
850 			kfree(req->fmr_list);
851 		kfree(req->map_page);
852 		if (req->indirect_dma_addr) {
853 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
854 					    target->indirect_size,
855 					    DMA_TO_DEVICE);
856 		}
857 		kfree(req->indirect_desc);
858 	}
859 
860 	kfree(ch->req_ring);
861 	ch->req_ring = NULL;
862 }
863 
864 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
865 {
866 	struct srp_target_port *target = ch->target;
867 	struct srp_device *srp_dev = target->srp_host->srp_dev;
868 	struct ib_device *ibdev = srp_dev->dev;
869 	struct srp_request *req;
870 	void *mr_list;
871 	dma_addr_t dma_addr;
872 	int i, ret = -ENOMEM;
873 
874 	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
875 			       GFP_KERNEL);
876 	if (!ch->req_ring)
877 		goto out;
878 
879 	for (i = 0; i < target->req_ring_size; ++i) {
880 		req = &ch->req_ring[i];
881 		mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
882 				  GFP_KERNEL);
883 		if (!mr_list)
884 			goto out;
885 		if (srp_dev->use_fast_reg)
886 			req->fr_list = mr_list;
887 		else
888 			req->fmr_list = mr_list;
889 		req->map_page = kmalloc(srp_dev->max_pages_per_mr *
890 					sizeof(void *), GFP_KERNEL);
891 		if (!req->map_page)
892 			goto out;
893 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
894 		if (!req->indirect_desc)
895 			goto out;
896 
897 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
898 					     target->indirect_size,
899 					     DMA_TO_DEVICE);
900 		if (ib_dma_mapping_error(ibdev, dma_addr))
901 			goto out;
902 
903 		req->indirect_dma_addr = dma_addr;
904 	}
905 	ret = 0;
906 
907 out:
908 	return ret;
909 }
910 
911 /**
912  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
913  * @shost: SCSI host whose attributes to remove from sysfs.
914  *
915  * Note: Any attributes defined in the host template and that did not exist
916  * before invocation of this function will be ignored.
917  */
918 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
919 {
920 	struct device_attribute **attr;
921 
922 	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
923 		device_remove_file(&shost->shost_dev, *attr);
924 }
925 
926 static void srp_remove_target(struct srp_target_port *target)
927 {
928 	struct srp_rdma_ch *ch;
929 	int i;
930 
931 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
932 
933 	srp_del_scsi_host_attr(target->scsi_host);
934 	srp_rport_get(target->rport);
935 	srp_remove_host(target->scsi_host);
936 	scsi_remove_host(target->scsi_host);
937 	srp_stop_rport_timers(target->rport);
938 	srp_disconnect_target(target);
939 	for (i = 0; i < target->ch_count; i++) {
940 		ch = &target->ch[i];
941 		srp_free_ch_ib(target, ch);
942 	}
943 	cancel_work_sync(&target->tl_err_work);
944 	srp_rport_put(target->rport);
945 	for (i = 0; i < target->ch_count; i++) {
946 		ch = &target->ch[i];
947 		srp_free_req_data(target, ch);
948 	}
949 	kfree(target->ch);
950 	target->ch = NULL;
951 
952 	spin_lock(&target->srp_host->target_lock);
953 	list_del(&target->list);
954 	spin_unlock(&target->srp_host->target_lock);
955 
956 	scsi_host_put(target->scsi_host);
957 }
958 
959 static void srp_remove_work(struct work_struct *work)
960 {
961 	struct srp_target_port *target =
962 		container_of(work, struct srp_target_port, remove_work);
963 
964 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
965 
966 	srp_remove_target(target);
967 }
968 
969 static void srp_rport_delete(struct srp_rport *rport)
970 {
971 	struct srp_target_port *target = rport->lld_data;
972 
973 	srp_queue_remove_work(target);
974 }
975 
976 /**
977  * srp_connected_ch() - number of connected channels
978  * @target: SRP target port.
979  */
980 static int srp_connected_ch(struct srp_target_port *target)
981 {
982 	int i, c = 0;
983 
984 	for (i = 0; i < target->ch_count; i++)
985 		c += target->ch[i].connected;
986 
987 	return c;
988 }
989 
990 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
991 {
992 	struct srp_target_port *target = ch->target;
993 	int ret;
994 
995 	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
996 
997 	ret = srp_lookup_path(ch);
998 	if (ret)
999 		return ret;
1000 
1001 	while (1) {
1002 		init_completion(&ch->done);
1003 		ret = srp_send_req(ch, multich);
1004 		if (ret)
1005 			return ret;
1006 		ret = wait_for_completion_interruptible(&ch->done);
1007 		if (ret < 0)
1008 			return ret;
1009 
1010 		/*
1011 		 * The CM event handling code will set status to
1012 		 * SRP_PORT_REDIRECT if we get a port redirect REJ
1013 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1014 		 * redirect REJ back.
1015 		 */
1016 		switch (ch->status) {
1017 		case 0:
1018 			ch->connected = true;
1019 			return 0;
1020 
1021 		case SRP_PORT_REDIRECT:
1022 			ret = srp_lookup_path(ch);
1023 			if (ret)
1024 				return ret;
1025 			break;
1026 
1027 		case SRP_DLID_REDIRECT:
1028 			break;
1029 
1030 		case SRP_STALE_CONN:
1031 			shost_printk(KERN_ERR, target->scsi_host, PFX
1032 				     "giving up on stale connection\n");
1033 			ch->status = -ECONNRESET;
1034 			return ch->status;
1035 
1036 		default:
1037 			return ch->status;
1038 		}
1039 	}
1040 }
1041 
1042 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1043 {
1044 	struct ib_send_wr *bad_wr;
1045 	struct ib_send_wr wr = {
1046 		.opcode		    = IB_WR_LOCAL_INV,
1047 		.wr_id		    = LOCAL_INV_WR_ID_MASK,
1048 		.next		    = NULL,
1049 		.num_sge	    = 0,
1050 		.send_flags	    = 0,
1051 		.ex.invalidate_rkey = rkey,
1052 	};
1053 
1054 	return ib_post_send(ch->qp, &wr, &bad_wr);
1055 }
1056 
1057 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1058 			   struct srp_rdma_ch *ch,
1059 			   struct srp_request *req)
1060 {
1061 	struct srp_target_port *target = ch->target;
1062 	struct srp_device *dev = target->srp_host->srp_dev;
1063 	struct ib_device *ibdev = dev->dev;
1064 	int i, res;
1065 
1066 	if (!scsi_sglist(scmnd) ||
1067 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1068 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1069 		return;
1070 
1071 	if (dev->use_fast_reg) {
1072 		struct srp_fr_desc **pfr;
1073 
1074 		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1075 			res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1076 			if (res < 0) {
1077 				shost_printk(KERN_ERR, target->scsi_host, PFX
1078 				  "Queueing INV WR for rkey %#x failed (%d)\n",
1079 				  (*pfr)->mr->rkey, res);
1080 				queue_work(system_long_wq,
1081 					   &target->tl_err_work);
1082 			}
1083 		}
1084 		if (req->nmdesc)
1085 			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1086 					req->nmdesc);
1087 	} else {
1088 		struct ib_pool_fmr **pfmr;
1089 
1090 		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1091 			ib_fmr_pool_unmap(*pfmr);
1092 	}
1093 
1094 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1095 			scmnd->sc_data_direction);
1096 }
1097 
1098 /**
1099  * srp_claim_req - Take ownership of the scmnd associated with a request.
1100  * @ch: SRP RDMA channel.
1101  * @req: SRP request.
1102  * @sdev: If not NULL, only take ownership for this SCSI device.
1103  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1104  *         ownership of @req->scmnd if it equals @scmnd.
1105  *
1106  * Return value:
1107  * Either NULL or a pointer to the SCSI command the caller became owner of.
1108  */
1109 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1110 				       struct srp_request *req,
1111 				       struct scsi_device *sdev,
1112 				       struct scsi_cmnd *scmnd)
1113 {
1114 	unsigned long flags;
1115 
1116 	spin_lock_irqsave(&ch->lock, flags);
1117 	if (req->scmnd &&
1118 	    (!sdev || req->scmnd->device == sdev) &&
1119 	    (!scmnd || req->scmnd == scmnd)) {
1120 		scmnd = req->scmnd;
1121 		req->scmnd = NULL;
1122 	} else {
1123 		scmnd = NULL;
1124 	}
1125 	spin_unlock_irqrestore(&ch->lock, flags);
1126 
1127 	return scmnd;
1128 }
1129 
1130 /**
1131  * srp_free_req() - Unmap data and add request to the free request list.
1132  * @ch:     SRP RDMA channel.
1133  * @req:    Request to be freed.
1134  * @scmnd:  SCSI command associated with @req.
1135  * @req_lim_delta: Amount to be added to @target->req_lim.
1136  */
1137 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1138 			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1139 {
1140 	unsigned long flags;
1141 
1142 	srp_unmap_data(scmnd, ch, req);
1143 
1144 	spin_lock_irqsave(&ch->lock, flags);
1145 	ch->req_lim += req_lim_delta;
1146 	spin_unlock_irqrestore(&ch->lock, flags);
1147 }
1148 
1149 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1150 			   struct scsi_device *sdev, int result)
1151 {
1152 	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1153 
1154 	if (scmnd) {
1155 		srp_free_req(ch, req, scmnd, 0);
1156 		scmnd->result = result;
1157 		scmnd->scsi_done(scmnd);
1158 	}
1159 }
1160 
1161 static void srp_terminate_io(struct srp_rport *rport)
1162 {
1163 	struct srp_target_port *target = rport->lld_data;
1164 	struct srp_rdma_ch *ch;
1165 	struct Scsi_Host *shost = target->scsi_host;
1166 	struct scsi_device *sdev;
1167 	int i, j;
1168 
1169 	/*
1170 	 * Invoking srp_terminate_io() while srp_queuecommand() is running
1171 	 * is not safe. Hence the warning statement below.
1172 	 */
1173 	shost_for_each_device(sdev, shost)
1174 		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1175 
1176 	for (i = 0; i < target->ch_count; i++) {
1177 		ch = &target->ch[i];
1178 
1179 		for (j = 0; j < target->req_ring_size; ++j) {
1180 			struct srp_request *req = &ch->req_ring[j];
1181 
1182 			srp_finish_req(ch, req, NULL,
1183 				       DID_TRANSPORT_FAILFAST << 16);
1184 		}
1185 	}
1186 }
1187 
1188 /*
1189  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1190  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1191  * srp_reset_device() or srp_reset_host() calls will occur while this function
1192  * is in progress. One way to realize that is not to call this function
1193  * directly but to call srp_reconnect_rport() instead since that last function
1194  * serializes calls of this function via rport->mutex and also blocks
1195  * srp_queuecommand() calls before invoking this function.
1196  */
1197 static int srp_rport_reconnect(struct srp_rport *rport)
1198 {
1199 	struct srp_target_port *target = rport->lld_data;
1200 	struct srp_rdma_ch *ch;
1201 	int i, j, ret = 0;
1202 	bool multich = false;
1203 
1204 	srp_disconnect_target(target);
1205 
1206 	if (target->state == SRP_TARGET_SCANNING)
1207 		return -ENODEV;
1208 
1209 	/*
1210 	 * Now get a new local CM ID so that we avoid confusing the target in
1211 	 * case things are really fouled up. Doing so also ensures that all CM
1212 	 * callbacks will have finished before a new QP is allocated.
1213 	 */
1214 	for (i = 0; i < target->ch_count; i++) {
1215 		ch = &target->ch[i];
1216 		ret += srp_new_cm_id(ch);
1217 	}
1218 	for (i = 0; i < target->ch_count; i++) {
1219 		ch = &target->ch[i];
1220 		for (j = 0; j < target->req_ring_size; ++j) {
1221 			struct srp_request *req = &ch->req_ring[j];
1222 
1223 			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1224 		}
1225 	}
1226 	for (i = 0; i < target->ch_count; i++) {
1227 		ch = &target->ch[i];
1228 		/*
1229 		 * Whether or not creating a new CM ID succeeded, create a new
1230 		 * QP. This guarantees that all completion callback function
1231 		 * invocations have finished before request resetting starts.
1232 		 */
1233 		ret += srp_create_ch_ib(ch);
1234 
1235 		INIT_LIST_HEAD(&ch->free_tx);
1236 		for (j = 0; j < target->queue_size; ++j)
1237 			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1238 	}
1239 
1240 	target->qp_in_error = false;
1241 
1242 	for (i = 0; i < target->ch_count; i++) {
1243 		ch = &target->ch[i];
1244 		if (ret)
1245 			break;
1246 		ret = srp_connect_ch(ch, multich);
1247 		multich = true;
1248 	}
1249 
1250 	if (ret == 0)
1251 		shost_printk(KERN_INFO, target->scsi_host,
1252 			     PFX "reconnect succeeded\n");
1253 
1254 	return ret;
1255 }
1256 
1257 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1258 			 unsigned int dma_len, u32 rkey)
1259 {
1260 	struct srp_direct_buf *desc = state->desc;
1261 
1262 	desc->va = cpu_to_be64(dma_addr);
1263 	desc->key = cpu_to_be32(rkey);
1264 	desc->len = cpu_to_be32(dma_len);
1265 
1266 	state->total_len += dma_len;
1267 	state->desc++;
1268 	state->ndesc++;
1269 }
1270 
1271 static int srp_map_finish_fmr(struct srp_map_state *state,
1272 			      struct srp_rdma_ch *ch)
1273 {
1274 	struct ib_pool_fmr *fmr;
1275 	u64 io_addr = 0;
1276 
1277 	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1278 				   state->npages, io_addr);
1279 	if (IS_ERR(fmr))
1280 		return PTR_ERR(fmr);
1281 
1282 	*state->next_fmr++ = fmr;
1283 	state->nmdesc++;
1284 
1285 	srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
1286 
1287 	return 0;
1288 }
1289 
1290 static int srp_map_finish_fr(struct srp_map_state *state,
1291 			     struct srp_rdma_ch *ch)
1292 {
1293 	struct srp_target_port *target = ch->target;
1294 	struct srp_device *dev = target->srp_host->srp_dev;
1295 	struct ib_send_wr *bad_wr;
1296 	struct ib_send_wr wr;
1297 	struct srp_fr_desc *desc;
1298 	u32 rkey;
1299 
1300 	desc = srp_fr_pool_get(ch->fr_pool);
1301 	if (!desc)
1302 		return -ENOMEM;
1303 
1304 	rkey = ib_inc_rkey(desc->mr->rkey);
1305 	ib_update_fast_reg_key(desc->mr, rkey);
1306 
1307 	memcpy(desc->frpl->page_list, state->pages,
1308 	       sizeof(state->pages[0]) * state->npages);
1309 
1310 	memset(&wr, 0, sizeof(wr));
1311 	wr.opcode = IB_WR_FAST_REG_MR;
1312 	wr.wr_id = FAST_REG_WR_ID_MASK;
1313 	wr.wr.fast_reg.iova_start = state->base_dma_addr;
1314 	wr.wr.fast_reg.page_list = desc->frpl;
1315 	wr.wr.fast_reg.page_list_len = state->npages;
1316 	wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
1317 	wr.wr.fast_reg.length = state->dma_len;
1318 	wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
1319 				       IB_ACCESS_REMOTE_READ |
1320 				       IB_ACCESS_REMOTE_WRITE);
1321 	wr.wr.fast_reg.rkey = desc->mr->lkey;
1322 
1323 	*state->next_fr++ = desc;
1324 	state->nmdesc++;
1325 
1326 	srp_map_desc(state, state->base_dma_addr, state->dma_len,
1327 		     desc->mr->rkey);
1328 
1329 	return ib_post_send(ch->qp, &wr, &bad_wr);
1330 }
1331 
1332 static int srp_finish_mapping(struct srp_map_state *state,
1333 			      struct srp_rdma_ch *ch)
1334 {
1335 	struct srp_target_port *target = ch->target;
1336 	int ret = 0;
1337 
1338 	if (state->npages == 0)
1339 		return 0;
1340 
1341 	if (state->npages == 1 && !register_always)
1342 		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1343 			     target->rkey);
1344 	else
1345 		ret = target->srp_host->srp_dev->use_fast_reg ?
1346 			srp_map_finish_fr(state, ch) :
1347 			srp_map_finish_fmr(state, ch);
1348 
1349 	if (ret == 0) {
1350 		state->npages = 0;
1351 		state->dma_len = 0;
1352 	}
1353 
1354 	return ret;
1355 }
1356 
1357 static void srp_map_update_start(struct srp_map_state *state,
1358 				 struct scatterlist *sg, int sg_index,
1359 				 dma_addr_t dma_addr)
1360 {
1361 	state->unmapped_sg = sg;
1362 	state->unmapped_index = sg_index;
1363 	state->unmapped_addr = dma_addr;
1364 }
1365 
1366 static int srp_map_sg_entry(struct srp_map_state *state,
1367 			    struct srp_rdma_ch *ch,
1368 			    struct scatterlist *sg, int sg_index,
1369 			    bool use_mr)
1370 {
1371 	struct srp_target_port *target = ch->target;
1372 	struct srp_device *dev = target->srp_host->srp_dev;
1373 	struct ib_device *ibdev = dev->dev;
1374 	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1375 	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1376 	unsigned int len;
1377 	int ret;
1378 
1379 	if (!dma_len)
1380 		return 0;
1381 
1382 	if (!use_mr) {
1383 		/*
1384 		 * Once we're in direct map mode for a request, we don't
1385 		 * go back to FMR or FR mode, so no need to update anything
1386 		 * other than the descriptor.
1387 		 */
1388 		srp_map_desc(state, dma_addr, dma_len, target->rkey);
1389 		return 0;
1390 	}
1391 
1392 	/*
1393 	 * Since not all RDMA HW drivers support non-zero page offsets for
1394 	 * FMR, if we start at an offset into a page, don't merge into the
1395 	 * current FMR mapping. Finish it out, and use the kernel's MR for
1396 	 * this sg entry.
1397 	 */
1398 	if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
1399 	    dma_len > dev->mr_max_size) {
1400 		ret = srp_finish_mapping(state, ch);
1401 		if (ret)
1402 			return ret;
1403 
1404 		srp_map_desc(state, dma_addr, dma_len, target->rkey);
1405 		srp_map_update_start(state, NULL, 0, 0);
1406 		return 0;
1407 	}
1408 
1409 	/*
1410 	 * If this is the first sg that will be mapped via FMR or via FR, save
1411 	 * our position. We need to know the first unmapped entry, its index,
1412 	 * and the first unmapped address within that entry to be able to
1413 	 * restart mapping after an error.
1414 	 */
1415 	if (!state->unmapped_sg)
1416 		srp_map_update_start(state, sg, sg_index, dma_addr);
1417 
1418 	while (dma_len) {
1419 		unsigned offset = dma_addr & ~dev->mr_page_mask;
1420 		if (state->npages == dev->max_pages_per_mr || offset != 0) {
1421 			ret = srp_finish_mapping(state, ch);
1422 			if (ret)
1423 				return ret;
1424 
1425 			srp_map_update_start(state, sg, sg_index, dma_addr);
1426 		}
1427 
1428 		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1429 
1430 		if (!state->npages)
1431 			state->base_dma_addr = dma_addr;
1432 		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1433 		state->dma_len += len;
1434 		dma_addr += len;
1435 		dma_len -= len;
1436 	}
1437 
1438 	/*
1439 	 * If the last entry of the MR wasn't a full page, then we need to
1440 	 * close it out and start a new one -- we can only merge at page
1441 	 * boundries.
1442 	 */
1443 	ret = 0;
1444 	if (len != dev->mr_page_size) {
1445 		ret = srp_finish_mapping(state, ch);
1446 		if (!ret)
1447 			srp_map_update_start(state, NULL, 0, 0);
1448 	}
1449 	return ret;
1450 }
1451 
1452 static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
1453 		      struct srp_request *req, struct scatterlist *scat,
1454 		      int count)
1455 {
1456 	struct srp_target_port *target = ch->target;
1457 	struct srp_device *dev = target->srp_host->srp_dev;
1458 	struct ib_device *ibdev = dev->dev;
1459 	struct scatterlist *sg;
1460 	int i;
1461 	bool use_mr;
1462 
1463 	state->desc	= req->indirect_desc;
1464 	state->pages	= req->map_page;
1465 	if (dev->use_fast_reg) {
1466 		state->next_fr = req->fr_list;
1467 		use_mr = !!ch->fr_pool;
1468 	} else {
1469 		state->next_fmr = req->fmr_list;
1470 		use_mr = !!ch->fmr_pool;
1471 	}
1472 
1473 	for_each_sg(scat, sg, count, i) {
1474 		if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
1475 			/*
1476 			 * Memory registration failed, so backtrack to the
1477 			 * first unmapped entry and continue on without using
1478 			 * memory registration.
1479 			 */
1480 			dma_addr_t dma_addr;
1481 			unsigned int dma_len;
1482 
1483 backtrack:
1484 			sg = state->unmapped_sg;
1485 			i = state->unmapped_index;
1486 
1487 			dma_addr = ib_sg_dma_address(ibdev, sg);
1488 			dma_len = ib_sg_dma_len(ibdev, sg);
1489 			dma_len -= (state->unmapped_addr - dma_addr);
1490 			dma_addr = state->unmapped_addr;
1491 			use_mr = false;
1492 			srp_map_desc(state, dma_addr, dma_len, target->rkey);
1493 		}
1494 	}
1495 
1496 	if (use_mr && srp_finish_mapping(state, ch))
1497 		goto backtrack;
1498 
1499 	req->nmdesc = state->nmdesc;
1500 
1501 	return 0;
1502 }
1503 
1504 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1505 			struct srp_request *req)
1506 {
1507 	struct srp_target_port *target = ch->target;
1508 	struct scatterlist *scat;
1509 	struct srp_cmd *cmd = req->cmd->buf;
1510 	int len, nents, count;
1511 	struct srp_device *dev;
1512 	struct ib_device *ibdev;
1513 	struct srp_map_state state;
1514 	struct srp_indirect_buf *indirect_hdr;
1515 	u32 table_len;
1516 	u8 fmt;
1517 
1518 	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1519 		return sizeof (struct srp_cmd);
1520 
1521 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1522 	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1523 		shost_printk(KERN_WARNING, target->scsi_host,
1524 			     PFX "Unhandled data direction %d\n",
1525 			     scmnd->sc_data_direction);
1526 		return -EINVAL;
1527 	}
1528 
1529 	nents = scsi_sg_count(scmnd);
1530 	scat  = scsi_sglist(scmnd);
1531 
1532 	dev = target->srp_host->srp_dev;
1533 	ibdev = dev->dev;
1534 
1535 	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1536 	if (unlikely(count == 0))
1537 		return -EIO;
1538 
1539 	fmt = SRP_DATA_DESC_DIRECT;
1540 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
1541 
1542 	if (count == 1 && !register_always) {
1543 		/*
1544 		 * The midlayer only generated a single gather/scatter
1545 		 * entry, or DMA mapping coalesced everything to a
1546 		 * single entry.  So a direct descriptor along with
1547 		 * the DMA MR suffices.
1548 		 */
1549 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1550 
1551 		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1552 		buf->key = cpu_to_be32(target->rkey);
1553 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1554 
1555 		req->nmdesc = 0;
1556 		goto map_complete;
1557 	}
1558 
1559 	/*
1560 	 * We have more than one scatter/gather entry, so build our indirect
1561 	 * descriptor table, trying to merge as many entries as we can.
1562 	 */
1563 	indirect_hdr = (void *) cmd->add_data;
1564 
1565 	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1566 				   target->indirect_size, DMA_TO_DEVICE);
1567 
1568 	memset(&state, 0, sizeof(state));
1569 	srp_map_sg(&state, ch, req, scat, count);
1570 
1571 	/* We've mapped the request, now pull as much of the indirect
1572 	 * descriptor table as we can into the command buffer. If this
1573 	 * target is not using an external indirect table, we are
1574 	 * guaranteed to fit into the command, as the SCSI layer won't
1575 	 * give us more S/G entries than we allow.
1576 	 */
1577 	if (state.ndesc == 1) {
1578 		/*
1579 		 * Memory registration collapsed the sg-list into one entry,
1580 		 * so use a direct descriptor.
1581 		 */
1582 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1583 
1584 		*buf = req->indirect_desc[0];
1585 		goto map_complete;
1586 	}
1587 
1588 	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1589 						!target->allow_ext_sg)) {
1590 		shost_printk(KERN_ERR, target->scsi_host,
1591 			     "Could not fit S/G list into SRP_CMD\n");
1592 		return -EIO;
1593 	}
1594 
1595 	count = min(state.ndesc, target->cmd_sg_cnt);
1596 	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1597 
1598 	fmt = SRP_DATA_DESC_INDIRECT;
1599 	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1600 	len += count * sizeof (struct srp_direct_buf);
1601 
1602 	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1603 	       count * sizeof (struct srp_direct_buf));
1604 
1605 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1606 	indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
1607 	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1608 	indirect_hdr->len = cpu_to_be32(state.total_len);
1609 
1610 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1611 		cmd->data_out_desc_cnt = count;
1612 	else
1613 		cmd->data_in_desc_cnt = count;
1614 
1615 	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1616 				      DMA_TO_DEVICE);
1617 
1618 map_complete:
1619 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1620 		cmd->buf_fmt = fmt << 4;
1621 	else
1622 		cmd->buf_fmt = fmt;
1623 
1624 	return len;
1625 }
1626 
1627 /*
1628  * Return an IU and possible credit to the free pool
1629  */
1630 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1631 			  enum srp_iu_type iu_type)
1632 {
1633 	unsigned long flags;
1634 
1635 	spin_lock_irqsave(&ch->lock, flags);
1636 	list_add(&iu->list, &ch->free_tx);
1637 	if (iu_type != SRP_IU_RSP)
1638 		++ch->req_lim;
1639 	spin_unlock_irqrestore(&ch->lock, flags);
1640 }
1641 
1642 /*
1643  * Must be called with ch->lock held to protect req_lim and free_tx.
1644  * If IU is not sent, it must be returned using srp_put_tx_iu().
1645  *
1646  * Note:
1647  * An upper limit for the number of allocated information units for each
1648  * request type is:
1649  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1650  *   more than Scsi_Host.can_queue requests.
1651  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1652  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1653  *   one unanswered SRP request to an initiator.
1654  */
1655 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1656 				      enum srp_iu_type iu_type)
1657 {
1658 	struct srp_target_port *target = ch->target;
1659 	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1660 	struct srp_iu *iu;
1661 
1662 	srp_send_completion(ch->send_cq, ch);
1663 
1664 	if (list_empty(&ch->free_tx))
1665 		return NULL;
1666 
1667 	/* Initiator responses to target requests do not consume credits */
1668 	if (iu_type != SRP_IU_RSP) {
1669 		if (ch->req_lim <= rsv) {
1670 			++target->zero_req_lim;
1671 			return NULL;
1672 		}
1673 
1674 		--ch->req_lim;
1675 	}
1676 
1677 	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1678 	list_del(&iu->list);
1679 	return iu;
1680 }
1681 
1682 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1683 {
1684 	struct srp_target_port *target = ch->target;
1685 	struct ib_sge list;
1686 	struct ib_send_wr wr, *bad_wr;
1687 
1688 	list.addr   = iu->dma;
1689 	list.length = len;
1690 	list.lkey   = target->lkey;
1691 
1692 	wr.next       = NULL;
1693 	wr.wr_id      = (uintptr_t) iu;
1694 	wr.sg_list    = &list;
1695 	wr.num_sge    = 1;
1696 	wr.opcode     = IB_WR_SEND;
1697 	wr.send_flags = IB_SEND_SIGNALED;
1698 
1699 	return ib_post_send(ch->qp, &wr, &bad_wr);
1700 }
1701 
1702 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1703 {
1704 	struct srp_target_port *target = ch->target;
1705 	struct ib_recv_wr wr, *bad_wr;
1706 	struct ib_sge list;
1707 
1708 	list.addr   = iu->dma;
1709 	list.length = iu->size;
1710 	list.lkey   = target->lkey;
1711 
1712 	wr.next     = NULL;
1713 	wr.wr_id    = (uintptr_t) iu;
1714 	wr.sg_list  = &list;
1715 	wr.num_sge  = 1;
1716 
1717 	return ib_post_recv(ch->qp, &wr, &bad_wr);
1718 }
1719 
1720 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1721 {
1722 	struct srp_target_port *target = ch->target;
1723 	struct srp_request *req;
1724 	struct scsi_cmnd *scmnd;
1725 	unsigned long flags;
1726 
1727 	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1728 		spin_lock_irqsave(&ch->lock, flags);
1729 		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1730 		spin_unlock_irqrestore(&ch->lock, flags);
1731 
1732 		ch->tsk_mgmt_status = -1;
1733 		if (be32_to_cpu(rsp->resp_data_len) >= 4)
1734 			ch->tsk_mgmt_status = rsp->data[3];
1735 		complete(&ch->tsk_mgmt_done);
1736 	} else {
1737 		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1738 		if (scmnd) {
1739 			req = (void *)scmnd->host_scribble;
1740 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1741 		}
1742 		if (!scmnd) {
1743 			shost_printk(KERN_ERR, target->scsi_host,
1744 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1745 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1746 
1747 			spin_lock_irqsave(&ch->lock, flags);
1748 			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1749 			spin_unlock_irqrestore(&ch->lock, flags);
1750 
1751 			return;
1752 		}
1753 		scmnd->result = rsp->status;
1754 
1755 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1756 			memcpy(scmnd->sense_buffer, rsp->data +
1757 			       be32_to_cpu(rsp->resp_data_len),
1758 			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1759 				     SCSI_SENSE_BUFFERSIZE));
1760 		}
1761 
1762 		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1763 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1764 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1765 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1766 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1767 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1768 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1769 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1770 
1771 		srp_free_req(ch, req, scmnd,
1772 			     be32_to_cpu(rsp->req_lim_delta));
1773 
1774 		scmnd->host_scribble = NULL;
1775 		scmnd->scsi_done(scmnd);
1776 	}
1777 }
1778 
1779 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1780 			       void *rsp, int len)
1781 {
1782 	struct srp_target_port *target = ch->target;
1783 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1784 	unsigned long flags;
1785 	struct srp_iu *iu;
1786 	int err;
1787 
1788 	spin_lock_irqsave(&ch->lock, flags);
1789 	ch->req_lim += req_delta;
1790 	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1791 	spin_unlock_irqrestore(&ch->lock, flags);
1792 
1793 	if (!iu) {
1794 		shost_printk(KERN_ERR, target->scsi_host, PFX
1795 			     "no IU available to send response\n");
1796 		return 1;
1797 	}
1798 
1799 	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1800 	memcpy(iu->buf, rsp, len);
1801 	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1802 
1803 	err = srp_post_send(ch, iu, len);
1804 	if (err) {
1805 		shost_printk(KERN_ERR, target->scsi_host, PFX
1806 			     "unable to post response: %d\n", err);
1807 		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1808 	}
1809 
1810 	return err;
1811 }
1812 
1813 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1814 				 struct srp_cred_req *req)
1815 {
1816 	struct srp_cred_rsp rsp = {
1817 		.opcode = SRP_CRED_RSP,
1818 		.tag = req->tag,
1819 	};
1820 	s32 delta = be32_to_cpu(req->req_lim_delta);
1821 
1822 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1823 		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1824 			     "problems processing SRP_CRED_REQ\n");
1825 }
1826 
1827 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1828 				struct srp_aer_req *req)
1829 {
1830 	struct srp_target_port *target = ch->target;
1831 	struct srp_aer_rsp rsp = {
1832 		.opcode = SRP_AER_RSP,
1833 		.tag = req->tag,
1834 	};
1835 	s32 delta = be32_to_cpu(req->req_lim_delta);
1836 
1837 	shost_printk(KERN_ERR, target->scsi_host, PFX
1838 		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1839 
1840 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1841 		shost_printk(KERN_ERR, target->scsi_host, PFX
1842 			     "problems processing SRP_AER_REQ\n");
1843 }
1844 
1845 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1846 {
1847 	struct srp_target_port *target = ch->target;
1848 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1849 	struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1850 	int res;
1851 	u8 opcode;
1852 
1853 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1854 				   DMA_FROM_DEVICE);
1855 
1856 	opcode = *(u8 *) iu->buf;
1857 
1858 	if (0) {
1859 		shost_printk(KERN_ERR, target->scsi_host,
1860 			     PFX "recv completion, opcode 0x%02x\n", opcode);
1861 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1862 			       iu->buf, wc->byte_len, true);
1863 	}
1864 
1865 	switch (opcode) {
1866 	case SRP_RSP:
1867 		srp_process_rsp(ch, iu->buf);
1868 		break;
1869 
1870 	case SRP_CRED_REQ:
1871 		srp_process_cred_req(ch, iu->buf);
1872 		break;
1873 
1874 	case SRP_AER_REQ:
1875 		srp_process_aer_req(ch, iu->buf);
1876 		break;
1877 
1878 	case SRP_T_LOGOUT:
1879 		/* XXX Handle target logout */
1880 		shost_printk(KERN_WARNING, target->scsi_host,
1881 			     PFX "Got target logout request\n");
1882 		break;
1883 
1884 	default:
1885 		shost_printk(KERN_WARNING, target->scsi_host,
1886 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1887 		break;
1888 	}
1889 
1890 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1891 				      DMA_FROM_DEVICE);
1892 
1893 	res = srp_post_recv(ch, iu);
1894 	if (res != 0)
1895 		shost_printk(KERN_ERR, target->scsi_host,
1896 			     PFX "Recv failed with error code %d\n", res);
1897 }
1898 
1899 /**
1900  * srp_tl_err_work() - handle a transport layer error
1901  * @work: Work structure embedded in an SRP target port.
1902  *
1903  * Note: This function may get invoked before the rport has been created,
1904  * hence the target->rport test.
1905  */
1906 static void srp_tl_err_work(struct work_struct *work)
1907 {
1908 	struct srp_target_port *target;
1909 
1910 	target = container_of(work, struct srp_target_port, tl_err_work);
1911 	if (target->rport)
1912 		srp_start_tl_fail_timers(target->rport);
1913 }
1914 
1915 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1916 			      bool send_err, struct srp_rdma_ch *ch)
1917 {
1918 	struct srp_target_port *target = ch->target;
1919 
1920 	if (wr_id == SRP_LAST_WR_ID) {
1921 		complete(&ch->done);
1922 		return;
1923 	}
1924 
1925 	if (ch->connected && !target->qp_in_error) {
1926 		if (wr_id & LOCAL_INV_WR_ID_MASK) {
1927 			shost_printk(KERN_ERR, target->scsi_host, PFX
1928 				     "LOCAL_INV failed with status %s (%d)\n",
1929 				     ib_wc_status_msg(wc_status), wc_status);
1930 		} else if (wr_id & FAST_REG_WR_ID_MASK) {
1931 			shost_printk(KERN_ERR, target->scsi_host, PFX
1932 				     "FAST_REG_MR failed status %s (%d)\n",
1933 				     ib_wc_status_msg(wc_status), wc_status);
1934 		} else {
1935 			shost_printk(KERN_ERR, target->scsi_host,
1936 				     PFX "failed %s status %s (%d) for iu %p\n",
1937 				     send_err ? "send" : "receive",
1938 				     ib_wc_status_msg(wc_status), wc_status,
1939 				     (void *)(uintptr_t)wr_id);
1940 		}
1941 		queue_work(system_long_wq, &target->tl_err_work);
1942 	}
1943 	target->qp_in_error = true;
1944 }
1945 
1946 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
1947 {
1948 	struct srp_rdma_ch *ch = ch_ptr;
1949 	struct ib_wc wc;
1950 
1951 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1952 	while (ib_poll_cq(cq, 1, &wc) > 0) {
1953 		if (likely(wc.status == IB_WC_SUCCESS)) {
1954 			srp_handle_recv(ch, &wc);
1955 		} else {
1956 			srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
1957 		}
1958 	}
1959 }
1960 
1961 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
1962 {
1963 	struct srp_rdma_ch *ch = ch_ptr;
1964 	struct ib_wc wc;
1965 	struct srp_iu *iu;
1966 
1967 	while (ib_poll_cq(cq, 1, &wc) > 0) {
1968 		if (likely(wc.status == IB_WC_SUCCESS)) {
1969 			iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1970 			list_add(&iu->list, &ch->free_tx);
1971 		} else {
1972 			srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
1973 		}
1974 	}
1975 }
1976 
1977 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1978 {
1979 	struct srp_target_port *target = host_to_target(shost);
1980 	struct srp_rport *rport = target->rport;
1981 	struct srp_rdma_ch *ch;
1982 	struct srp_request *req;
1983 	struct srp_iu *iu;
1984 	struct srp_cmd *cmd;
1985 	struct ib_device *dev;
1986 	unsigned long flags;
1987 	u32 tag;
1988 	u16 idx;
1989 	int len, ret;
1990 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1991 
1992 	/*
1993 	 * The SCSI EH thread is the only context from which srp_queuecommand()
1994 	 * can get invoked for blocked devices (SDEV_BLOCK /
1995 	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1996 	 * locking the rport mutex if invoked from inside the SCSI EH.
1997 	 */
1998 	if (in_scsi_eh)
1999 		mutex_lock(&rport->mutex);
2000 
2001 	scmnd->result = srp_chkready(target->rport);
2002 	if (unlikely(scmnd->result))
2003 		goto err;
2004 
2005 	WARN_ON_ONCE(scmnd->request->tag < 0);
2006 	tag = blk_mq_unique_tag(scmnd->request);
2007 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2008 	idx = blk_mq_unique_tag_to_tag(tag);
2009 	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2010 		  dev_name(&shost->shost_gendev), tag, idx,
2011 		  target->req_ring_size);
2012 
2013 	spin_lock_irqsave(&ch->lock, flags);
2014 	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2015 	spin_unlock_irqrestore(&ch->lock, flags);
2016 
2017 	if (!iu)
2018 		goto err;
2019 
2020 	req = &ch->req_ring[idx];
2021 	dev = target->srp_host->srp_dev->dev;
2022 	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2023 				   DMA_TO_DEVICE);
2024 
2025 	scmnd->host_scribble = (void *) req;
2026 
2027 	cmd = iu->buf;
2028 	memset(cmd, 0, sizeof *cmd);
2029 
2030 	cmd->opcode = SRP_CMD;
2031 	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2032 	cmd->tag    = tag;
2033 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2034 
2035 	req->scmnd    = scmnd;
2036 	req->cmd      = iu;
2037 
2038 	len = srp_map_data(scmnd, ch, req);
2039 	if (len < 0) {
2040 		shost_printk(KERN_ERR, target->scsi_host,
2041 			     PFX "Failed to map data (%d)\n", len);
2042 		/*
2043 		 * If we ran out of memory descriptors (-ENOMEM) because an
2044 		 * application is queuing many requests with more than
2045 		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2046 		 * to reduce queue depth temporarily.
2047 		 */
2048 		scmnd->result = len == -ENOMEM ?
2049 			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2050 		goto err_iu;
2051 	}
2052 
2053 	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2054 				      DMA_TO_DEVICE);
2055 
2056 	if (srp_post_send(ch, iu, len)) {
2057 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2058 		goto err_unmap;
2059 	}
2060 
2061 	ret = 0;
2062 
2063 unlock_rport:
2064 	if (in_scsi_eh)
2065 		mutex_unlock(&rport->mutex);
2066 
2067 	return ret;
2068 
2069 err_unmap:
2070 	srp_unmap_data(scmnd, ch, req);
2071 
2072 err_iu:
2073 	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2074 
2075 	/*
2076 	 * Avoid that the loops that iterate over the request ring can
2077 	 * encounter a dangling SCSI command pointer.
2078 	 */
2079 	req->scmnd = NULL;
2080 
2081 err:
2082 	if (scmnd->result) {
2083 		scmnd->scsi_done(scmnd);
2084 		ret = 0;
2085 	} else {
2086 		ret = SCSI_MLQUEUE_HOST_BUSY;
2087 	}
2088 
2089 	goto unlock_rport;
2090 }
2091 
2092 /*
2093  * Note: the resources allocated in this function are freed in
2094  * srp_free_ch_ib().
2095  */
2096 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2097 {
2098 	struct srp_target_port *target = ch->target;
2099 	int i;
2100 
2101 	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2102 			      GFP_KERNEL);
2103 	if (!ch->rx_ring)
2104 		goto err_no_ring;
2105 	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2106 			      GFP_KERNEL);
2107 	if (!ch->tx_ring)
2108 		goto err_no_ring;
2109 
2110 	for (i = 0; i < target->queue_size; ++i) {
2111 		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2112 					      ch->max_ti_iu_len,
2113 					      GFP_KERNEL, DMA_FROM_DEVICE);
2114 		if (!ch->rx_ring[i])
2115 			goto err;
2116 	}
2117 
2118 	for (i = 0; i < target->queue_size; ++i) {
2119 		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2120 					      target->max_iu_len,
2121 					      GFP_KERNEL, DMA_TO_DEVICE);
2122 		if (!ch->tx_ring[i])
2123 			goto err;
2124 
2125 		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2126 	}
2127 
2128 	return 0;
2129 
2130 err:
2131 	for (i = 0; i < target->queue_size; ++i) {
2132 		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2133 		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2134 	}
2135 
2136 
2137 err_no_ring:
2138 	kfree(ch->tx_ring);
2139 	ch->tx_ring = NULL;
2140 	kfree(ch->rx_ring);
2141 	ch->rx_ring = NULL;
2142 
2143 	return -ENOMEM;
2144 }
2145 
2146 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2147 {
2148 	uint64_t T_tr_ns, max_compl_time_ms;
2149 	uint32_t rq_tmo_jiffies;
2150 
2151 	/*
2152 	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2153 	 * table 91), both the QP timeout and the retry count have to be set
2154 	 * for RC QP's during the RTR to RTS transition.
2155 	 */
2156 	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2157 		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2158 
2159 	/*
2160 	 * Set target->rq_tmo_jiffies to one second more than the largest time
2161 	 * it can take before an error completion is generated. See also
2162 	 * C9-140..142 in the IBTA spec for more information about how to
2163 	 * convert the QP Local ACK Timeout value to nanoseconds.
2164 	 */
2165 	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2166 	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2167 	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2168 	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2169 
2170 	return rq_tmo_jiffies;
2171 }
2172 
2173 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2174 			       struct srp_login_rsp *lrsp,
2175 			       struct srp_rdma_ch *ch)
2176 {
2177 	struct srp_target_port *target = ch->target;
2178 	struct ib_qp_attr *qp_attr = NULL;
2179 	int attr_mask = 0;
2180 	int ret;
2181 	int i;
2182 
2183 	if (lrsp->opcode == SRP_LOGIN_RSP) {
2184 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2185 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2186 
2187 		/*
2188 		 * Reserve credits for task management so we don't
2189 		 * bounce requests back to the SCSI mid-layer.
2190 		 */
2191 		target->scsi_host->can_queue
2192 			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2193 			      target->scsi_host->can_queue);
2194 		target->scsi_host->cmd_per_lun
2195 			= min_t(int, target->scsi_host->can_queue,
2196 				target->scsi_host->cmd_per_lun);
2197 	} else {
2198 		shost_printk(KERN_WARNING, target->scsi_host,
2199 			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2200 		ret = -ECONNRESET;
2201 		goto error;
2202 	}
2203 
2204 	if (!ch->rx_ring) {
2205 		ret = srp_alloc_iu_bufs(ch);
2206 		if (ret)
2207 			goto error;
2208 	}
2209 
2210 	ret = -ENOMEM;
2211 	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2212 	if (!qp_attr)
2213 		goto error;
2214 
2215 	qp_attr->qp_state = IB_QPS_RTR;
2216 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2217 	if (ret)
2218 		goto error_free;
2219 
2220 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2221 	if (ret)
2222 		goto error_free;
2223 
2224 	for (i = 0; i < target->queue_size; i++) {
2225 		struct srp_iu *iu = ch->rx_ring[i];
2226 
2227 		ret = srp_post_recv(ch, iu);
2228 		if (ret)
2229 			goto error_free;
2230 	}
2231 
2232 	qp_attr->qp_state = IB_QPS_RTS;
2233 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2234 	if (ret)
2235 		goto error_free;
2236 
2237 	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2238 
2239 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2240 	if (ret)
2241 		goto error_free;
2242 
2243 	ret = ib_send_cm_rtu(cm_id, NULL, 0);
2244 
2245 error_free:
2246 	kfree(qp_attr);
2247 
2248 error:
2249 	ch->status = ret;
2250 }
2251 
2252 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2253 			       struct ib_cm_event *event,
2254 			       struct srp_rdma_ch *ch)
2255 {
2256 	struct srp_target_port *target = ch->target;
2257 	struct Scsi_Host *shost = target->scsi_host;
2258 	struct ib_class_port_info *cpi;
2259 	int opcode;
2260 
2261 	switch (event->param.rej_rcvd.reason) {
2262 	case IB_CM_REJ_PORT_CM_REDIRECT:
2263 		cpi = event->param.rej_rcvd.ari;
2264 		ch->path.dlid = cpi->redirect_lid;
2265 		ch->path.pkey = cpi->redirect_pkey;
2266 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2267 		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2268 
2269 		ch->status = ch->path.dlid ?
2270 			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2271 		break;
2272 
2273 	case IB_CM_REJ_PORT_REDIRECT:
2274 		if (srp_target_is_topspin(target)) {
2275 			/*
2276 			 * Topspin/Cisco SRP gateways incorrectly send
2277 			 * reject reason code 25 when they mean 24
2278 			 * (port redirect).
2279 			 */
2280 			memcpy(ch->path.dgid.raw,
2281 			       event->param.rej_rcvd.ari, 16);
2282 
2283 			shost_printk(KERN_DEBUG, shost,
2284 				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2285 				     be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2286 				     be64_to_cpu(ch->path.dgid.global.interface_id));
2287 
2288 			ch->status = SRP_PORT_REDIRECT;
2289 		} else {
2290 			shost_printk(KERN_WARNING, shost,
2291 				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2292 			ch->status = -ECONNRESET;
2293 		}
2294 		break;
2295 
2296 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2297 		shost_printk(KERN_WARNING, shost,
2298 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2299 		ch->status = -ECONNRESET;
2300 		break;
2301 
2302 	case IB_CM_REJ_CONSUMER_DEFINED:
2303 		opcode = *(u8 *) event->private_data;
2304 		if (opcode == SRP_LOGIN_REJ) {
2305 			struct srp_login_rej *rej = event->private_data;
2306 			u32 reason = be32_to_cpu(rej->reason);
2307 
2308 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2309 				shost_printk(KERN_WARNING, shost,
2310 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2311 			else
2312 				shost_printk(KERN_WARNING, shost, PFX
2313 					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2314 					     target->sgid.raw,
2315 					     target->orig_dgid.raw, reason);
2316 		} else
2317 			shost_printk(KERN_WARNING, shost,
2318 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2319 				     " opcode 0x%02x\n", opcode);
2320 		ch->status = -ECONNRESET;
2321 		break;
2322 
2323 	case IB_CM_REJ_STALE_CONN:
2324 		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2325 		ch->status = SRP_STALE_CONN;
2326 		break;
2327 
2328 	default:
2329 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2330 			     event->param.rej_rcvd.reason);
2331 		ch->status = -ECONNRESET;
2332 	}
2333 }
2334 
2335 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2336 {
2337 	struct srp_rdma_ch *ch = cm_id->context;
2338 	struct srp_target_port *target = ch->target;
2339 	int comp = 0;
2340 
2341 	switch (event->event) {
2342 	case IB_CM_REQ_ERROR:
2343 		shost_printk(KERN_DEBUG, target->scsi_host,
2344 			     PFX "Sending CM REQ failed\n");
2345 		comp = 1;
2346 		ch->status = -ECONNRESET;
2347 		break;
2348 
2349 	case IB_CM_REP_RECEIVED:
2350 		comp = 1;
2351 		srp_cm_rep_handler(cm_id, event->private_data, ch);
2352 		break;
2353 
2354 	case IB_CM_REJ_RECEIVED:
2355 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2356 		comp = 1;
2357 
2358 		srp_cm_rej_handler(cm_id, event, ch);
2359 		break;
2360 
2361 	case IB_CM_DREQ_RECEIVED:
2362 		shost_printk(KERN_WARNING, target->scsi_host,
2363 			     PFX "DREQ received - connection closed\n");
2364 		ch->connected = false;
2365 		if (ib_send_cm_drep(cm_id, NULL, 0))
2366 			shost_printk(KERN_ERR, target->scsi_host,
2367 				     PFX "Sending CM DREP failed\n");
2368 		queue_work(system_long_wq, &target->tl_err_work);
2369 		break;
2370 
2371 	case IB_CM_TIMEWAIT_EXIT:
2372 		shost_printk(KERN_ERR, target->scsi_host,
2373 			     PFX "connection closed\n");
2374 		comp = 1;
2375 
2376 		ch->status = 0;
2377 		break;
2378 
2379 	case IB_CM_MRA_RECEIVED:
2380 	case IB_CM_DREQ_ERROR:
2381 	case IB_CM_DREP_RECEIVED:
2382 		break;
2383 
2384 	default:
2385 		shost_printk(KERN_WARNING, target->scsi_host,
2386 			     PFX "Unhandled CM event %d\n", event->event);
2387 		break;
2388 	}
2389 
2390 	if (comp)
2391 		complete(&ch->done);
2392 
2393 	return 0;
2394 }
2395 
2396 /**
2397  * srp_change_queue_depth - setting device queue depth
2398  * @sdev: scsi device struct
2399  * @qdepth: requested queue depth
2400  *
2401  * Returns queue depth.
2402  */
2403 static int
2404 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2405 {
2406 	if (!sdev->tagged_supported)
2407 		qdepth = 1;
2408 	return scsi_change_queue_depth(sdev, qdepth);
2409 }
2410 
2411 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2412 			     u8 func)
2413 {
2414 	struct srp_target_port *target = ch->target;
2415 	struct srp_rport *rport = target->rport;
2416 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2417 	struct srp_iu *iu;
2418 	struct srp_tsk_mgmt *tsk_mgmt;
2419 
2420 	if (!ch->connected || target->qp_in_error)
2421 		return -1;
2422 
2423 	init_completion(&ch->tsk_mgmt_done);
2424 
2425 	/*
2426 	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2427 	 * invoked while a task management function is being sent.
2428 	 */
2429 	mutex_lock(&rport->mutex);
2430 	spin_lock_irq(&ch->lock);
2431 	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2432 	spin_unlock_irq(&ch->lock);
2433 
2434 	if (!iu) {
2435 		mutex_unlock(&rport->mutex);
2436 
2437 		return -1;
2438 	}
2439 
2440 	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2441 				   DMA_TO_DEVICE);
2442 	tsk_mgmt = iu->buf;
2443 	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2444 
2445 	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2446 	int_to_scsilun(lun, &tsk_mgmt->lun);
2447 	tsk_mgmt->tag		= req_tag | SRP_TAG_TSK_MGMT;
2448 	tsk_mgmt->tsk_mgmt_func = func;
2449 	tsk_mgmt->task_tag	= req_tag;
2450 
2451 	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2452 				      DMA_TO_DEVICE);
2453 	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2454 		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2455 		mutex_unlock(&rport->mutex);
2456 
2457 		return -1;
2458 	}
2459 	mutex_unlock(&rport->mutex);
2460 
2461 	if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2462 					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2463 		return -1;
2464 
2465 	return 0;
2466 }
2467 
2468 static int srp_abort(struct scsi_cmnd *scmnd)
2469 {
2470 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2471 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2472 	u32 tag;
2473 	u16 ch_idx;
2474 	struct srp_rdma_ch *ch;
2475 	int ret;
2476 
2477 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2478 
2479 	if (!req)
2480 		return SUCCESS;
2481 	tag = blk_mq_unique_tag(scmnd->request);
2482 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2483 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2484 		return SUCCESS;
2485 	ch = &target->ch[ch_idx];
2486 	if (!srp_claim_req(ch, req, NULL, scmnd))
2487 		return SUCCESS;
2488 	shost_printk(KERN_ERR, target->scsi_host,
2489 		     "Sending SRP abort for tag %#x\n", tag);
2490 	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2491 			      SRP_TSK_ABORT_TASK) == 0)
2492 		ret = SUCCESS;
2493 	else if (target->rport->state == SRP_RPORT_LOST)
2494 		ret = FAST_IO_FAIL;
2495 	else
2496 		ret = FAILED;
2497 	srp_free_req(ch, req, scmnd, 0);
2498 	scmnd->result = DID_ABORT << 16;
2499 	scmnd->scsi_done(scmnd);
2500 
2501 	return ret;
2502 }
2503 
2504 static int srp_reset_device(struct scsi_cmnd *scmnd)
2505 {
2506 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2507 	struct srp_rdma_ch *ch;
2508 	int i;
2509 
2510 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2511 
2512 	ch = &target->ch[0];
2513 	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2514 			      SRP_TSK_LUN_RESET))
2515 		return FAILED;
2516 	if (ch->tsk_mgmt_status)
2517 		return FAILED;
2518 
2519 	for (i = 0; i < target->ch_count; i++) {
2520 		ch = &target->ch[i];
2521 		for (i = 0; i < target->req_ring_size; ++i) {
2522 			struct srp_request *req = &ch->req_ring[i];
2523 
2524 			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2525 		}
2526 	}
2527 
2528 	return SUCCESS;
2529 }
2530 
2531 static int srp_reset_host(struct scsi_cmnd *scmnd)
2532 {
2533 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2534 
2535 	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2536 
2537 	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2538 }
2539 
2540 static int srp_slave_configure(struct scsi_device *sdev)
2541 {
2542 	struct Scsi_Host *shost = sdev->host;
2543 	struct srp_target_port *target = host_to_target(shost);
2544 	struct request_queue *q = sdev->request_queue;
2545 	unsigned long timeout;
2546 
2547 	if (sdev->type == TYPE_DISK) {
2548 		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2549 		blk_queue_rq_timeout(q, timeout);
2550 	}
2551 
2552 	return 0;
2553 }
2554 
2555 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2556 			   char *buf)
2557 {
2558 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2559 
2560 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2561 }
2562 
2563 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2564 			     char *buf)
2565 {
2566 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2567 
2568 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2569 }
2570 
2571 static ssize_t show_service_id(struct device *dev,
2572 			       struct device_attribute *attr, char *buf)
2573 {
2574 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2575 
2576 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2577 }
2578 
2579 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2580 			 char *buf)
2581 {
2582 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2583 
2584 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2585 }
2586 
2587 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2588 			 char *buf)
2589 {
2590 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2591 
2592 	return sprintf(buf, "%pI6\n", target->sgid.raw);
2593 }
2594 
2595 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2596 			 char *buf)
2597 {
2598 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2599 	struct srp_rdma_ch *ch = &target->ch[0];
2600 
2601 	return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2602 }
2603 
2604 static ssize_t show_orig_dgid(struct device *dev,
2605 			      struct device_attribute *attr, char *buf)
2606 {
2607 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2608 
2609 	return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2610 }
2611 
2612 static ssize_t show_req_lim(struct device *dev,
2613 			    struct device_attribute *attr, char *buf)
2614 {
2615 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2616 	struct srp_rdma_ch *ch;
2617 	int i, req_lim = INT_MAX;
2618 
2619 	for (i = 0; i < target->ch_count; i++) {
2620 		ch = &target->ch[i];
2621 		req_lim = min(req_lim, ch->req_lim);
2622 	}
2623 	return sprintf(buf, "%d\n", req_lim);
2624 }
2625 
2626 static ssize_t show_zero_req_lim(struct device *dev,
2627 				 struct device_attribute *attr, char *buf)
2628 {
2629 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2630 
2631 	return sprintf(buf, "%d\n", target->zero_req_lim);
2632 }
2633 
2634 static ssize_t show_local_ib_port(struct device *dev,
2635 				  struct device_attribute *attr, char *buf)
2636 {
2637 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2638 
2639 	return sprintf(buf, "%d\n", target->srp_host->port);
2640 }
2641 
2642 static ssize_t show_local_ib_device(struct device *dev,
2643 				    struct device_attribute *attr, char *buf)
2644 {
2645 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2646 
2647 	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2648 }
2649 
2650 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2651 			     char *buf)
2652 {
2653 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2654 
2655 	return sprintf(buf, "%d\n", target->ch_count);
2656 }
2657 
2658 static ssize_t show_comp_vector(struct device *dev,
2659 				struct device_attribute *attr, char *buf)
2660 {
2661 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2662 
2663 	return sprintf(buf, "%d\n", target->comp_vector);
2664 }
2665 
2666 static ssize_t show_tl_retry_count(struct device *dev,
2667 				   struct device_attribute *attr, char *buf)
2668 {
2669 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2670 
2671 	return sprintf(buf, "%d\n", target->tl_retry_count);
2672 }
2673 
2674 static ssize_t show_cmd_sg_entries(struct device *dev,
2675 				   struct device_attribute *attr, char *buf)
2676 {
2677 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2678 
2679 	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2680 }
2681 
2682 static ssize_t show_allow_ext_sg(struct device *dev,
2683 				 struct device_attribute *attr, char *buf)
2684 {
2685 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2686 
2687 	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2688 }
2689 
2690 static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
2691 static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
2692 static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
2693 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
2694 static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
2695 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
2696 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
2697 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2698 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
2699 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2700 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2701 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2702 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2703 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2704 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2705 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2706 
2707 static struct device_attribute *srp_host_attrs[] = {
2708 	&dev_attr_id_ext,
2709 	&dev_attr_ioc_guid,
2710 	&dev_attr_service_id,
2711 	&dev_attr_pkey,
2712 	&dev_attr_sgid,
2713 	&dev_attr_dgid,
2714 	&dev_attr_orig_dgid,
2715 	&dev_attr_req_lim,
2716 	&dev_attr_zero_req_lim,
2717 	&dev_attr_local_ib_port,
2718 	&dev_attr_local_ib_device,
2719 	&dev_attr_ch_count,
2720 	&dev_attr_comp_vector,
2721 	&dev_attr_tl_retry_count,
2722 	&dev_attr_cmd_sg_entries,
2723 	&dev_attr_allow_ext_sg,
2724 	NULL
2725 };
2726 
2727 static struct scsi_host_template srp_template = {
2728 	.module				= THIS_MODULE,
2729 	.name				= "InfiniBand SRP initiator",
2730 	.proc_name			= DRV_NAME,
2731 	.slave_configure		= srp_slave_configure,
2732 	.info				= srp_target_info,
2733 	.queuecommand			= srp_queuecommand,
2734 	.change_queue_depth             = srp_change_queue_depth,
2735 	.eh_abort_handler		= srp_abort,
2736 	.eh_device_reset_handler	= srp_reset_device,
2737 	.eh_host_reset_handler		= srp_reset_host,
2738 	.skip_settle_delay		= true,
2739 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
2740 	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
2741 	.this_id			= -1,
2742 	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
2743 	.use_clustering			= ENABLE_CLUSTERING,
2744 	.shost_attrs			= srp_host_attrs,
2745 	.use_blk_tags			= 1,
2746 	.track_queue_depth		= 1,
2747 };
2748 
2749 static int srp_sdev_count(struct Scsi_Host *host)
2750 {
2751 	struct scsi_device *sdev;
2752 	int c = 0;
2753 
2754 	shost_for_each_device(sdev, host)
2755 		c++;
2756 
2757 	return c;
2758 }
2759 
2760 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2761 {
2762 	struct srp_rport_identifiers ids;
2763 	struct srp_rport *rport;
2764 
2765 	target->state = SRP_TARGET_SCANNING;
2766 	sprintf(target->target_name, "SRP.T10:%016llX",
2767 		be64_to_cpu(target->id_ext));
2768 
2769 	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2770 		return -ENODEV;
2771 
2772 	memcpy(ids.port_id, &target->id_ext, 8);
2773 	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2774 	ids.roles = SRP_RPORT_ROLE_TARGET;
2775 	rport = srp_rport_add(target->scsi_host, &ids);
2776 	if (IS_ERR(rport)) {
2777 		scsi_remove_host(target->scsi_host);
2778 		return PTR_ERR(rport);
2779 	}
2780 
2781 	rport->lld_data = target;
2782 	target->rport = rport;
2783 
2784 	spin_lock(&host->target_lock);
2785 	list_add_tail(&target->list, &host->target_list);
2786 	spin_unlock(&host->target_lock);
2787 
2788 	scsi_scan_target(&target->scsi_host->shost_gendev,
2789 			 0, target->scsi_id, SCAN_WILD_CARD, 0);
2790 
2791 	if (srp_connected_ch(target) < target->ch_count ||
2792 	    target->qp_in_error) {
2793 		shost_printk(KERN_INFO, target->scsi_host,
2794 			     PFX "SCSI scan failed - removing SCSI host\n");
2795 		srp_queue_remove_work(target);
2796 		goto out;
2797 	}
2798 
2799 	pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2800 		 dev_name(&target->scsi_host->shost_gendev),
2801 		 srp_sdev_count(target->scsi_host));
2802 
2803 	spin_lock_irq(&target->lock);
2804 	if (target->state == SRP_TARGET_SCANNING)
2805 		target->state = SRP_TARGET_LIVE;
2806 	spin_unlock_irq(&target->lock);
2807 
2808 out:
2809 	return 0;
2810 }
2811 
2812 static void srp_release_dev(struct device *dev)
2813 {
2814 	struct srp_host *host =
2815 		container_of(dev, struct srp_host, dev);
2816 
2817 	complete(&host->released);
2818 }
2819 
2820 static struct class srp_class = {
2821 	.name    = "infiniband_srp",
2822 	.dev_release = srp_release_dev
2823 };
2824 
2825 /**
2826  * srp_conn_unique() - check whether the connection to a target is unique
2827  * @host:   SRP host.
2828  * @target: SRP target port.
2829  */
2830 static bool srp_conn_unique(struct srp_host *host,
2831 			    struct srp_target_port *target)
2832 {
2833 	struct srp_target_port *t;
2834 	bool ret = false;
2835 
2836 	if (target->state == SRP_TARGET_REMOVED)
2837 		goto out;
2838 
2839 	ret = true;
2840 
2841 	spin_lock(&host->target_lock);
2842 	list_for_each_entry(t, &host->target_list, list) {
2843 		if (t != target &&
2844 		    target->id_ext == t->id_ext &&
2845 		    target->ioc_guid == t->ioc_guid &&
2846 		    target->initiator_ext == t->initiator_ext) {
2847 			ret = false;
2848 			break;
2849 		}
2850 	}
2851 	spin_unlock(&host->target_lock);
2852 
2853 out:
2854 	return ret;
2855 }
2856 
2857 /*
2858  * Target ports are added by writing
2859  *
2860  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2861  *     pkey=<P_Key>,service_id=<service ID>
2862  *
2863  * to the add_target sysfs attribute.
2864  */
2865 enum {
2866 	SRP_OPT_ERR		= 0,
2867 	SRP_OPT_ID_EXT		= 1 << 0,
2868 	SRP_OPT_IOC_GUID	= 1 << 1,
2869 	SRP_OPT_DGID		= 1 << 2,
2870 	SRP_OPT_PKEY		= 1 << 3,
2871 	SRP_OPT_SERVICE_ID	= 1 << 4,
2872 	SRP_OPT_MAX_SECT	= 1 << 5,
2873 	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
2874 	SRP_OPT_IO_CLASS	= 1 << 7,
2875 	SRP_OPT_INITIATOR_EXT	= 1 << 8,
2876 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
2877 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
2878 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
2879 	SRP_OPT_COMP_VECTOR	= 1 << 12,
2880 	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
2881 	SRP_OPT_QUEUE_SIZE	= 1 << 14,
2882 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
2883 				   SRP_OPT_IOC_GUID	|
2884 				   SRP_OPT_DGID		|
2885 				   SRP_OPT_PKEY		|
2886 				   SRP_OPT_SERVICE_ID),
2887 };
2888 
2889 static const match_table_t srp_opt_tokens = {
2890 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
2891 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
2892 	{ SRP_OPT_DGID,			"dgid=%s" 		},
2893 	{ SRP_OPT_PKEY,			"pkey=%x" 		},
2894 	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
2895 	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
2896 	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
2897 	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
2898 	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
2899 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
2900 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
2901 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
2902 	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
2903 	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
2904 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
2905 	{ SRP_OPT_ERR,			NULL 			}
2906 };
2907 
2908 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2909 {
2910 	char *options, *sep_opt;
2911 	char *p;
2912 	char dgid[3];
2913 	substring_t args[MAX_OPT_ARGS];
2914 	int opt_mask = 0;
2915 	int token;
2916 	int ret = -EINVAL;
2917 	int i;
2918 
2919 	options = kstrdup(buf, GFP_KERNEL);
2920 	if (!options)
2921 		return -ENOMEM;
2922 
2923 	sep_opt = options;
2924 	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2925 		if (!*p)
2926 			continue;
2927 
2928 		token = match_token(p, srp_opt_tokens, args);
2929 		opt_mask |= token;
2930 
2931 		switch (token) {
2932 		case SRP_OPT_ID_EXT:
2933 			p = match_strdup(args);
2934 			if (!p) {
2935 				ret = -ENOMEM;
2936 				goto out;
2937 			}
2938 			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2939 			kfree(p);
2940 			break;
2941 
2942 		case SRP_OPT_IOC_GUID:
2943 			p = match_strdup(args);
2944 			if (!p) {
2945 				ret = -ENOMEM;
2946 				goto out;
2947 			}
2948 			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
2949 			kfree(p);
2950 			break;
2951 
2952 		case SRP_OPT_DGID:
2953 			p = match_strdup(args);
2954 			if (!p) {
2955 				ret = -ENOMEM;
2956 				goto out;
2957 			}
2958 			if (strlen(p) != 32) {
2959 				pr_warn("bad dest GID parameter '%s'\n", p);
2960 				kfree(p);
2961 				goto out;
2962 			}
2963 
2964 			for (i = 0; i < 16; ++i) {
2965 				strlcpy(dgid, p + i * 2, sizeof(dgid));
2966 				if (sscanf(dgid, "%hhx",
2967 					   &target->orig_dgid.raw[i]) < 1) {
2968 					ret = -EINVAL;
2969 					kfree(p);
2970 					goto out;
2971 				}
2972 			}
2973 			kfree(p);
2974 			break;
2975 
2976 		case SRP_OPT_PKEY:
2977 			if (match_hex(args, &token)) {
2978 				pr_warn("bad P_Key parameter '%s'\n", p);
2979 				goto out;
2980 			}
2981 			target->pkey = cpu_to_be16(token);
2982 			break;
2983 
2984 		case SRP_OPT_SERVICE_ID:
2985 			p = match_strdup(args);
2986 			if (!p) {
2987 				ret = -ENOMEM;
2988 				goto out;
2989 			}
2990 			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
2991 			kfree(p);
2992 			break;
2993 
2994 		case SRP_OPT_MAX_SECT:
2995 			if (match_int(args, &token)) {
2996 				pr_warn("bad max sect parameter '%s'\n", p);
2997 				goto out;
2998 			}
2999 			target->scsi_host->max_sectors = token;
3000 			break;
3001 
3002 		case SRP_OPT_QUEUE_SIZE:
3003 			if (match_int(args, &token) || token < 1) {
3004 				pr_warn("bad queue_size parameter '%s'\n", p);
3005 				goto out;
3006 			}
3007 			target->scsi_host->can_queue = token;
3008 			target->queue_size = token + SRP_RSP_SQ_SIZE +
3009 					     SRP_TSK_MGMT_SQ_SIZE;
3010 			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3011 				target->scsi_host->cmd_per_lun = token;
3012 			break;
3013 
3014 		case SRP_OPT_MAX_CMD_PER_LUN:
3015 			if (match_int(args, &token) || token < 1) {
3016 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3017 					p);
3018 				goto out;
3019 			}
3020 			target->scsi_host->cmd_per_lun = token;
3021 			break;
3022 
3023 		case SRP_OPT_IO_CLASS:
3024 			if (match_hex(args, &token)) {
3025 				pr_warn("bad IO class parameter '%s'\n", p);
3026 				goto out;
3027 			}
3028 			if (token != SRP_REV10_IB_IO_CLASS &&
3029 			    token != SRP_REV16A_IB_IO_CLASS) {
3030 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3031 					token, SRP_REV10_IB_IO_CLASS,
3032 					SRP_REV16A_IB_IO_CLASS);
3033 				goto out;
3034 			}
3035 			target->io_class = token;
3036 			break;
3037 
3038 		case SRP_OPT_INITIATOR_EXT:
3039 			p = match_strdup(args);
3040 			if (!p) {
3041 				ret = -ENOMEM;
3042 				goto out;
3043 			}
3044 			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3045 			kfree(p);
3046 			break;
3047 
3048 		case SRP_OPT_CMD_SG_ENTRIES:
3049 			if (match_int(args, &token) || token < 1 || token > 255) {
3050 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3051 					p);
3052 				goto out;
3053 			}
3054 			target->cmd_sg_cnt = token;
3055 			break;
3056 
3057 		case SRP_OPT_ALLOW_EXT_SG:
3058 			if (match_int(args, &token)) {
3059 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3060 				goto out;
3061 			}
3062 			target->allow_ext_sg = !!token;
3063 			break;
3064 
3065 		case SRP_OPT_SG_TABLESIZE:
3066 			if (match_int(args, &token) || token < 1 ||
3067 					token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3068 				pr_warn("bad max sg_tablesize parameter '%s'\n",
3069 					p);
3070 				goto out;
3071 			}
3072 			target->sg_tablesize = token;
3073 			break;
3074 
3075 		case SRP_OPT_COMP_VECTOR:
3076 			if (match_int(args, &token) || token < 0) {
3077 				pr_warn("bad comp_vector parameter '%s'\n", p);
3078 				goto out;
3079 			}
3080 			target->comp_vector = token;
3081 			break;
3082 
3083 		case SRP_OPT_TL_RETRY_COUNT:
3084 			if (match_int(args, &token) || token < 2 || token > 7) {
3085 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3086 					p);
3087 				goto out;
3088 			}
3089 			target->tl_retry_count = token;
3090 			break;
3091 
3092 		default:
3093 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3094 				p);
3095 			goto out;
3096 		}
3097 	}
3098 
3099 	if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3100 		ret = 0;
3101 	else
3102 		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3103 			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3104 			    !(srp_opt_tokens[i].token & opt_mask))
3105 				pr_warn("target creation request is missing parameter '%s'\n",
3106 					srp_opt_tokens[i].pattern);
3107 
3108 	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3109 	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3110 		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3111 			target->scsi_host->cmd_per_lun,
3112 			target->scsi_host->can_queue);
3113 
3114 out:
3115 	kfree(options);
3116 	return ret;
3117 }
3118 
3119 static ssize_t srp_create_target(struct device *dev,
3120 				 struct device_attribute *attr,
3121 				 const char *buf, size_t count)
3122 {
3123 	struct srp_host *host =
3124 		container_of(dev, struct srp_host, dev);
3125 	struct Scsi_Host *target_host;
3126 	struct srp_target_port *target;
3127 	struct srp_rdma_ch *ch;
3128 	struct srp_device *srp_dev = host->srp_dev;
3129 	struct ib_device *ibdev = srp_dev->dev;
3130 	int ret, node_idx, node, cpu, i;
3131 	bool multich = false;
3132 
3133 	target_host = scsi_host_alloc(&srp_template,
3134 				      sizeof (struct srp_target_port));
3135 	if (!target_host)
3136 		return -ENOMEM;
3137 
3138 	target_host->transportt  = ib_srp_transport_template;
3139 	target_host->max_channel = 0;
3140 	target_host->max_id      = 1;
3141 	target_host->max_lun     = -1LL;
3142 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3143 
3144 	target = host_to_target(target_host);
3145 
3146 	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3147 	target->scsi_host	= target_host;
3148 	target->srp_host	= host;
3149 	target->lkey		= host->srp_dev->mr->lkey;
3150 	target->rkey		= host->srp_dev->mr->rkey;
3151 	target->cmd_sg_cnt	= cmd_sg_entries;
3152 	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3153 	target->allow_ext_sg	= allow_ext_sg;
3154 	target->tl_retry_count	= 7;
3155 	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3156 
3157 	/*
3158 	 * Avoid that the SCSI host can be removed by srp_remove_target()
3159 	 * before this function returns.
3160 	 */
3161 	scsi_host_get(target->scsi_host);
3162 
3163 	mutex_lock(&host->add_target_mutex);
3164 
3165 	ret = srp_parse_options(buf, target);
3166 	if (ret)
3167 		goto out;
3168 
3169 	ret = scsi_init_shared_tag_map(target_host, target_host->can_queue);
3170 	if (ret)
3171 		goto out;
3172 
3173 	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3174 
3175 	if (!srp_conn_unique(target->srp_host, target)) {
3176 		shost_printk(KERN_INFO, target->scsi_host,
3177 			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3178 			     be64_to_cpu(target->id_ext),
3179 			     be64_to_cpu(target->ioc_guid),
3180 			     be64_to_cpu(target->initiator_ext));
3181 		ret = -EEXIST;
3182 		goto out;
3183 	}
3184 
3185 	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3186 	    target->cmd_sg_cnt < target->sg_tablesize) {
3187 		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3188 		target->sg_tablesize = target->cmd_sg_cnt;
3189 	}
3190 
3191 	target_host->sg_tablesize = target->sg_tablesize;
3192 	target->indirect_size = target->sg_tablesize *
3193 				sizeof (struct srp_direct_buf);
3194 	target->max_iu_len = sizeof (struct srp_cmd) +
3195 			     sizeof (struct srp_indirect_buf) +
3196 			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3197 
3198 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3199 	INIT_WORK(&target->remove_work, srp_remove_work);
3200 	spin_lock_init(&target->lock);
3201 	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
3202 	if (ret)
3203 		goto out;
3204 
3205 	ret = -ENOMEM;
3206 	target->ch_count = max_t(unsigned, num_online_nodes(),
3207 				 min(ch_count ? :
3208 				     min(4 * num_online_nodes(),
3209 					 ibdev->num_comp_vectors),
3210 				     num_online_cpus()));
3211 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3212 			     GFP_KERNEL);
3213 	if (!target->ch)
3214 		goto out;
3215 
3216 	node_idx = 0;
3217 	for_each_online_node(node) {
3218 		const int ch_start = (node_idx * target->ch_count /
3219 				      num_online_nodes());
3220 		const int ch_end = ((node_idx + 1) * target->ch_count /
3221 				    num_online_nodes());
3222 		const int cv_start = (node_idx * ibdev->num_comp_vectors /
3223 				      num_online_nodes() + target->comp_vector)
3224 				     % ibdev->num_comp_vectors;
3225 		const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3226 				    num_online_nodes() + target->comp_vector)
3227 				   % ibdev->num_comp_vectors;
3228 		int cpu_idx = 0;
3229 
3230 		for_each_online_cpu(cpu) {
3231 			if (cpu_to_node(cpu) != node)
3232 				continue;
3233 			if (ch_start + cpu_idx >= ch_end)
3234 				continue;
3235 			ch = &target->ch[ch_start + cpu_idx];
3236 			ch->target = target;
3237 			ch->comp_vector = cv_start == cv_end ? cv_start :
3238 				cv_start + cpu_idx % (cv_end - cv_start);
3239 			spin_lock_init(&ch->lock);
3240 			INIT_LIST_HEAD(&ch->free_tx);
3241 			ret = srp_new_cm_id(ch);
3242 			if (ret)
3243 				goto err_disconnect;
3244 
3245 			ret = srp_create_ch_ib(ch);
3246 			if (ret)
3247 				goto err_disconnect;
3248 
3249 			ret = srp_alloc_req_data(ch);
3250 			if (ret)
3251 				goto err_disconnect;
3252 
3253 			ret = srp_connect_ch(ch, multich);
3254 			if (ret) {
3255 				shost_printk(KERN_ERR, target->scsi_host,
3256 					     PFX "Connection %d/%d failed\n",
3257 					     ch_start + cpu_idx,
3258 					     target->ch_count);
3259 				if (node_idx == 0 && cpu_idx == 0) {
3260 					goto err_disconnect;
3261 				} else {
3262 					srp_free_ch_ib(target, ch);
3263 					srp_free_req_data(target, ch);
3264 					target->ch_count = ch - target->ch;
3265 					break;
3266 				}
3267 			}
3268 
3269 			multich = true;
3270 			cpu_idx++;
3271 		}
3272 		node_idx++;
3273 	}
3274 
3275 	target->scsi_host->nr_hw_queues = target->ch_count;
3276 
3277 	ret = srp_add_target(host, target);
3278 	if (ret)
3279 		goto err_disconnect;
3280 
3281 	if (target->state != SRP_TARGET_REMOVED) {
3282 		shost_printk(KERN_DEBUG, target->scsi_host, PFX
3283 			     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3284 			     be64_to_cpu(target->id_ext),
3285 			     be64_to_cpu(target->ioc_guid),
3286 			     be16_to_cpu(target->pkey),
3287 			     be64_to_cpu(target->service_id),
3288 			     target->sgid.raw, target->orig_dgid.raw);
3289 	}
3290 
3291 	ret = count;
3292 
3293 out:
3294 	mutex_unlock(&host->add_target_mutex);
3295 
3296 	scsi_host_put(target->scsi_host);
3297 
3298 	return ret;
3299 
3300 err_disconnect:
3301 	srp_disconnect_target(target);
3302 
3303 	for (i = 0; i < target->ch_count; i++) {
3304 		ch = &target->ch[i];
3305 		srp_free_ch_ib(target, ch);
3306 		srp_free_req_data(target, ch);
3307 	}
3308 
3309 	kfree(target->ch);
3310 	goto out;
3311 }
3312 
3313 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3314 
3315 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3316 			  char *buf)
3317 {
3318 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3319 
3320 	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3321 }
3322 
3323 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3324 
3325 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3326 			 char *buf)
3327 {
3328 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3329 
3330 	return sprintf(buf, "%d\n", host->port);
3331 }
3332 
3333 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3334 
3335 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3336 {
3337 	struct srp_host *host;
3338 
3339 	host = kzalloc(sizeof *host, GFP_KERNEL);
3340 	if (!host)
3341 		return NULL;
3342 
3343 	INIT_LIST_HEAD(&host->target_list);
3344 	spin_lock_init(&host->target_lock);
3345 	init_completion(&host->released);
3346 	mutex_init(&host->add_target_mutex);
3347 	host->srp_dev = device;
3348 	host->port = port;
3349 
3350 	host->dev.class = &srp_class;
3351 	host->dev.parent = device->dev->dma_device;
3352 	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3353 
3354 	if (device_register(&host->dev))
3355 		goto free_host;
3356 	if (device_create_file(&host->dev, &dev_attr_add_target))
3357 		goto err_class;
3358 	if (device_create_file(&host->dev, &dev_attr_ibdev))
3359 		goto err_class;
3360 	if (device_create_file(&host->dev, &dev_attr_port))
3361 		goto err_class;
3362 
3363 	return host;
3364 
3365 err_class:
3366 	device_unregister(&host->dev);
3367 
3368 free_host:
3369 	kfree(host);
3370 
3371 	return NULL;
3372 }
3373 
3374 static void srp_add_one(struct ib_device *device)
3375 {
3376 	struct srp_device *srp_dev;
3377 	struct ib_device_attr *dev_attr;
3378 	struct srp_host *host;
3379 	int mr_page_shift, p;
3380 	u64 max_pages_per_mr;
3381 
3382 	dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3383 	if (!dev_attr)
3384 		return;
3385 
3386 	if (ib_query_device(device, dev_attr)) {
3387 		pr_warn("Query device failed for %s\n", device->name);
3388 		goto free_attr;
3389 	}
3390 
3391 	srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3392 	if (!srp_dev)
3393 		goto free_attr;
3394 
3395 	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3396 			    device->map_phys_fmr && device->unmap_fmr);
3397 	srp_dev->has_fr = (dev_attr->device_cap_flags &
3398 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
3399 	if (!srp_dev->has_fmr && !srp_dev->has_fr)
3400 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3401 
3402 	srp_dev->use_fast_reg = (srp_dev->has_fr &&
3403 				 (!srp_dev->has_fmr || prefer_fr));
3404 
3405 	/*
3406 	 * Use the smallest page size supported by the HCA, down to a
3407 	 * minimum of 4096 bytes. We're unlikely to build large sglists
3408 	 * out of smaller entries.
3409 	 */
3410 	mr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1);
3411 	srp_dev->mr_page_size	= 1 << mr_page_shift;
3412 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3413 	max_pages_per_mr	= dev_attr->max_mr_size;
3414 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3415 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3416 					  max_pages_per_mr);
3417 	if (srp_dev->use_fast_reg) {
3418 		srp_dev->max_pages_per_mr =
3419 			min_t(u32, srp_dev->max_pages_per_mr,
3420 			      dev_attr->max_fast_reg_page_list_len);
3421 	}
3422 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
3423 				   srp_dev->max_pages_per_mr;
3424 	pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3425 		 device->name, mr_page_shift, dev_attr->max_mr_size,
3426 		 dev_attr->max_fast_reg_page_list_len,
3427 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3428 
3429 	INIT_LIST_HEAD(&srp_dev->dev_list);
3430 
3431 	srp_dev->dev = device;
3432 	srp_dev->pd  = ib_alloc_pd(device);
3433 	if (IS_ERR(srp_dev->pd))
3434 		goto free_dev;
3435 
3436 	srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
3437 				    IB_ACCESS_LOCAL_WRITE |
3438 				    IB_ACCESS_REMOTE_READ |
3439 				    IB_ACCESS_REMOTE_WRITE);
3440 	if (IS_ERR(srp_dev->mr))
3441 		goto err_pd;
3442 
3443 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3444 		host = srp_add_port(srp_dev, p);
3445 		if (host)
3446 			list_add_tail(&host->list, &srp_dev->dev_list);
3447 	}
3448 
3449 	ib_set_client_data(device, &srp_client, srp_dev);
3450 
3451 	goto free_attr;
3452 
3453 err_pd:
3454 	ib_dealloc_pd(srp_dev->pd);
3455 
3456 free_dev:
3457 	kfree(srp_dev);
3458 
3459 free_attr:
3460 	kfree(dev_attr);
3461 }
3462 
3463 static void srp_remove_one(struct ib_device *device)
3464 {
3465 	struct srp_device *srp_dev;
3466 	struct srp_host *host, *tmp_host;
3467 	struct srp_target_port *target;
3468 
3469 	srp_dev = ib_get_client_data(device, &srp_client);
3470 	if (!srp_dev)
3471 		return;
3472 
3473 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3474 		device_unregister(&host->dev);
3475 		/*
3476 		 * Wait for the sysfs entry to go away, so that no new
3477 		 * target ports can be created.
3478 		 */
3479 		wait_for_completion(&host->released);
3480 
3481 		/*
3482 		 * Remove all target ports.
3483 		 */
3484 		spin_lock(&host->target_lock);
3485 		list_for_each_entry(target, &host->target_list, list)
3486 			srp_queue_remove_work(target);
3487 		spin_unlock(&host->target_lock);
3488 
3489 		/*
3490 		 * Wait for tl_err and target port removal tasks.
3491 		 */
3492 		flush_workqueue(system_long_wq);
3493 		flush_workqueue(srp_remove_wq);
3494 
3495 		kfree(host);
3496 	}
3497 
3498 	ib_dereg_mr(srp_dev->mr);
3499 	ib_dealloc_pd(srp_dev->pd);
3500 
3501 	kfree(srp_dev);
3502 }
3503 
3504 static struct srp_function_template ib_srp_transport_functions = {
3505 	.has_rport_state	 = true,
3506 	.reset_timer_if_blocked	 = true,
3507 	.reconnect_delay	 = &srp_reconnect_delay,
3508 	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
3509 	.dev_loss_tmo		 = &srp_dev_loss_tmo,
3510 	.reconnect		 = srp_rport_reconnect,
3511 	.rport_delete		 = srp_rport_delete,
3512 	.terminate_rport_io	 = srp_terminate_io,
3513 };
3514 
3515 static int __init srp_init_module(void)
3516 {
3517 	int ret;
3518 
3519 	BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3520 
3521 	if (srp_sg_tablesize) {
3522 		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3523 		if (!cmd_sg_entries)
3524 			cmd_sg_entries = srp_sg_tablesize;
3525 	}
3526 
3527 	if (!cmd_sg_entries)
3528 		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3529 
3530 	if (cmd_sg_entries > 255) {
3531 		pr_warn("Clamping cmd_sg_entries to 255\n");
3532 		cmd_sg_entries = 255;
3533 	}
3534 
3535 	if (!indirect_sg_entries)
3536 		indirect_sg_entries = cmd_sg_entries;
3537 	else if (indirect_sg_entries < cmd_sg_entries) {
3538 		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3539 			cmd_sg_entries);
3540 		indirect_sg_entries = cmd_sg_entries;
3541 	}
3542 
3543 	srp_remove_wq = create_workqueue("srp_remove");
3544 	if (!srp_remove_wq) {
3545 		ret = -ENOMEM;
3546 		goto out;
3547 	}
3548 
3549 	ret = -ENOMEM;
3550 	ib_srp_transport_template =
3551 		srp_attach_transport(&ib_srp_transport_functions);
3552 	if (!ib_srp_transport_template)
3553 		goto destroy_wq;
3554 
3555 	ret = class_register(&srp_class);
3556 	if (ret) {
3557 		pr_err("couldn't register class infiniband_srp\n");
3558 		goto release_tr;
3559 	}
3560 
3561 	ib_sa_register_client(&srp_sa_client);
3562 
3563 	ret = ib_register_client(&srp_client);
3564 	if (ret) {
3565 		pr_err("couldn't register IB client\n");
3566 		goto unreg_sa;
3567 	}
3568 
3569 out:
3570 	return ret;
3571 
3572 unreg_sa:
3573 	ib_sa_unregister_client(&srp_sa_client);
3574 	class_unregister(&srp_class);
3575 
3576 release_tr:
3577 	srp_release_transport(ib_srp_transport_template);
3578 
3579 destroy_wq:
3580 	destroy_workqueue(srp_remove_wq);
3581 	goto out;
3582 }
3583 
3584 static void __exit srp_cleanup_module(void)
3585 {
3586 	ib_unregister_client(&srp_client);
3587 	ib_sa_unregister_client(&srp_sa_client);
3588 	class_unregister(&srp_class);
3589 	srp_release_transport(ib_srp_transport_template);
3590 	destroy_workqueue(srp_remove_wq);
3591 }
3592 
3593 module_init(srp_init_module);
3594 module_exit(srp_cleanup_module);
3595