xref: /linux/drivers/infiniband/ulp/srp/ib_srp.c (revision 32786fdc9506aeba98278c1844d4bfb766863832)
1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
44 
45 #include <linux/atomic.h>
46 
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
53 
54 #include "ib_srp.h"
55 
56 #define DRV_NAME	"ib_srp"
57 #define PFX		DRV_NAME ": "
58 #define DRV_VERSION	"2.0"
59 #define DRV_RELDATE	"July 26, 2015"
60 
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
66 
67 #if !defined(CONFIG_DYNAMIC_DEBUG)
68 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
69 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
70 #endif
71 
72 static unsigned int srp_sg_tablesize;
73 static unsigned int cmd_sg_entries;
74 static unsigned int indirect_sg_entries;
75 static bool allow_ext_sg;
76 static bool prefer_fr = true;
77 static bool register_always = true;
78 static bool never_register;
79 static int topspin_workarounds = 1;
80 
81 module_param(srp_sg_tablesize, uint, 0444);
82 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
83 
84 module_param(cmd_sg_entries, uint, 0444);
85 MODULE_PARM_DESC(cmd_sg_entries,
86 		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
87 
88 module_param(indirect_sg_entries, uint, 0444);
89 MODULE_PARM_DESC(indirect_sg_entries,
90 		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
91 
92 module_param(allow_ext_sg, bool, 0444);
93 MODULE_PARM_DESC(allow_ext_sg,
94 		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
95 
96 module_param(topspin_workarounds, int, 0444);
97 MODULE_PARM_DESC(topspin_workarounds,
98 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
99 
100 module_param(prefer_fr, bool, 0444);
101 MODULE_PARM_DESC(prefer_fr,
102 "Whether to use fast registration if both FMR and fast registration are supported");
103 
104 module_param(register_always, bool, 0444);
105 MODULE_PARM_DESC(register_always,
106 		 "Use memory registration even for contiguous memory regions");
107 
108 module_param(never_register, bool, 0444);
109 MODULE_PARM_DESC(never_register, "Never register memory");
110 
111 static const struct kernel_param_ops srp_tmo_ops;
112 
113 static int srp_reconnect_delay = 10;
114 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
115 		S_IRUGO | S_IWUSR);
116 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
117 
118 static int srp_fast_io_fail_tmo = 15;
119 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
120 		S_IRUGO | S_IWUSR);
121 MODULE_PARM_DESC(fast_io_fail_tmo,
122 		 "Number of seconds between the observation of a transport"
123 		 " layer error and failing all I/O. \"off\" means that this"
124 		 " functionality is disabled.");
125 
126 static int srp_dev_loss_tmo = 600;
127 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
128 		S_IRUGO | S_IWUSR);
129 MODULE_PARM_DESC(dev_loss_tmo,
130 		 "Maximum number of seconds that the SRP transport should"
131 		 " insulate transport layer errors. After this time has been"
132 		 " exceeded the SCSI host is removed. Should be"
133 		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
134 		 " if fast_io_fail_tmo has not been set. \"off\" means that"
135 		 " this functionality is disabled.");
136 
137 static unsigned ch_count;
138 module_param(ch_count, uint, 0444);
139 MODULE_PARM_DESC(ch_count,
140 		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
141 
142 static void srp_add_one(struct ib_device *device);
143 static void srp_remove_one(struct ib_device *device, void *client_data);
144 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
145 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
146 		const char *opname);
147 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
148 
149 static struct scsi_transport_template *ib_srp_transport_template;
150 static struct workqueue_struct *srp_remove_wq;
151 
152 static struct ib_client srp_client = {
153 	.name   = "srp",
154 	.add    = srp_add_one,
155 	.remove = srp_remove_one
156 };
157 
158 static struct ib_sa_client srp_sa_client;
159 
160 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
161 {
162 	int tmo = *(int *)kp->arg;
163 
164 	if (tmo >= 0)
165 		return sprintf(buffer, "%d", tmo);
166 	else
167 		return sprintf(buffer, "off");
168 }
169 
170 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
171 {
172 	int tmo, res;
173 
174 	res = srp_parse_tmo(&tmo, val);
175 	if (res)
176 		goto out;
177 
178 	if (kp->arg == &srp_reconnect_delay)
179 		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
180 				    srp_dev_loss_tmo);
181 	else if (kp->arg == &srp_fast_io_fail_tmo)
182 		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
183 	else
184 		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
185 				    tmo);
186 	if (res)
187 		goto out;
188 	*(int *)kp->arg = tmo;
189 
190 out:
191 	return res;
192 }
193 
194 static const struct kernel_param_ops srp_tmo_ops = {
195 	.get = srp_tmo_get,
196 	.set = srp_tmo_set,
197 };
198 
199 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
200 {
201 	return (struct srp_target_port *) host->hostdata;
202 }
203 
204 static const char *srp_target_info(struct Scsi_Host *host)
205 {
206 	return host_to_target(host)->target_name;
207 }
208 
209 static int srp_target_is_topspin(struct srp_target_port *target)
210 {
211 	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
212 	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
213 
214 	return topspin_workarounds &&
215 		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
216 		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
217 }
218 
219 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
220 				   gfp_t gfp_mask,
221 				   enum dma_data_direction direction)
222 {
223 	struct srp_iu *iu;
224 
225 	iu = kmalloc(sizeof *iu, gfp_mask);
226 	if (!iu)
227 		goto out;
228 
229 	iu->buf = kzalloc(size, gfp_mask);
230 	if (!iu->buf)
231 		goto out_free_iu;
232 
233 	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
234 				    direction);
235 	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
236 		goto out_free_buf;
237 
238 	iu->size      = size;
239 	iu->direction = direction;
240 
241 	return iu;
242 
243 out_free_buf:
244 	kfree(iu->buf);
245 out_free_iu:
246 	kfree(iu);
247 out:
248 	return NULL;
249 }
250 
251 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
252 {
253 	if (!iu)
254 		return;
255 
256 	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
257 			    iu->direction);
258 	kfree(iu->buf);
259 	kfree(iu);
260 }
261 
262 static void srp_qp_event(struct ib_event *event, void *context)
263 {
264 	pr_debug("QP event %s (%d)\n",
265 		 ib_event_msg(event->event), event->event);
266 }
267 
268 static int srp_init_qp(struct srp_target_port *target,
269 		       struct ib_qp *qp)
270 {
271 	struct ib_qp_attr *attr;
272 	int ret;
273 
274 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
275 	if (!attr)
276 		return -ENOMEM;
277 
278 	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
279 				  target->srp_host->port,
280 				  be16_to_cpu(target->pkey),
281 				  &attr->pkey_index);
282 	if (ret)
283 		goto out;
284 
285 	attr->qp_state        = IB_QPS_INIT;
286 	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
287 				    IB_ACCESS_REMOTE_WRITE);
288 	attr->port_num        = target->srp_host->port;
289 
290 	ret = ib_modify_qp(qp, attr,
291 			   IB_QP_STATE		|
292 			   IB_QP_PKEY_INDEX	|
293 			   IB_QP_ACCESS_FLAGS	|
294 			   IB_QP_PORT);
295 
296 out:
297 	kfree(attr);
298 	return ret;
299 }
300 
301 static int srp_new_cm_id(struct srp_rdma_ch *ch)
302 {
303 	struct srp_target_port *target = ch->target;
304 	struct ib_cm_id *new_cm_id;
305 
306 	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
307 				    srp_cm_handler, ch);
308 	if (IS_ERR(new_cm_id))
309 		return PTR_ERR(new_cm_id);
310 
311 	if (ch->cm_id)
312 		ib_destroy_cm_id(ch->cm_id);
313 	ch->cm_id = new_cm_id;
314 	ch->path.sgid = target->sgid;
315 	ch->path.dgid = target->orig_dgid;
316 	ch->path.pkey = target->pkey;
317 	ch->path.service_id = target->service_id;
318 
319 	return 0;
320 }
321 
322 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
323 {
324 	struct srp_device *dev = target->srp_host->srp_dev;
325 	struct ib_fmr_pool_param fmr_param;
326 
327 	memset(&fmr_param, 0, sizeof(fmr_param));
328 	fmr_param.pool_size	    = target->mr_pool_size;
329 	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
330 	fmr_param.cache		    = 1;
331 	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
332 	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
333 	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
334 				       IB_ACCESS_REMOTE_WRITE |
335 				       IB_ACCESS_REMOTE_READ);
336 
337 	return ib_create_fmr_pool(dev->pd, &fmr_param);
338 }
339 
340 /**
341  * srp_destroy_fr_pool() - free the resources owned by a pool
342  * @pool: Fast registration pool to be destroyed.
343  */
344 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
345 {
346 	int i;
347 	struct srp_fr_desc *d;
348 
349 	if (!pool)
350 		return;
351 
352 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
353 		if (d->mr)
354 			ib_dereg_mr(d->mr);
355 	}
356 	kfree(pool);
357 }
358 
359 /**
360  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
361  * @device:            IB device to allocate fast registration descriptors for.
362  * @pd:                Protection domain associated with the FR descriptors.
363  * @pool_size:         Number of descriptors to allocate.
364  * @max_page_list_len: Maximum fast registration work request page list length.
365  */
366 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
367 					      struct ib_pd *pd, int pool_size,
368 					      int max_page_list_len)
369 {
370 	struct srp_fr_pool *pool;
371 	struct srp_fr_desc *d;
372 	struct ib_mr *mr;
373 	int i, ret = -EINVAL;
374 
375 	if (pool_size <= 0)
376 		goto err;
377 	ret = -ENOMEM;
378 	pool = kzalloc(sizeof(struct srp_fr_pool) +
379 		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
380 	if (!pool)
381 		goto err;
382 	pool->size = pool_size;
383 	pool->max_page_list_len = max_page_list_len;
384 	spin_lock_init(&pool->lock);
385 	INIT_LIST_HEAD(&pool->free_list);
386 
387 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
388 		mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
389 				 max_page_list_len);
390 		if (IS_ERR(mr)) {
391 			ret = PTR_ERR(mr);
392 			if (ret == -ENOMEM)
393 				pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
394 					dev_name(&device->dev));
395 			goto destroy_pool;
396 		}
397 		d->mr = mr;
398 		list_add_tail(&d->entry, &pool->free_list);
399 	}
400 
401 out:
402 	return pool;
403 
404 destroy_pool:
405 	srp_destroy_fr_pool(pool);
406 
407 err:
408 	pool = ERR_PTR(ret);
409 	goto out;
410 }
411 
412 /**
413  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
414  * @pool: Pool to obtain descriptor from.
415  */
416 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
417 {
418 	struct srp_fr_desc *d = NULL;
419 	unsigned long flags;
420 
421 	spin_lock_irqsave(&pool->lock, flags);
422 	if (!list_empty(&pool->free_list)) {
423 		d = list_first_entry(&pool->free_list, typeof(*d), entry);
424 		list_del(&d->entry);
425 	}
426 	spin_unlock_irqrestore(&pool->lock, flags);
427 
428 	return d;
429 }
430 
431 /**
432  * srp_fr_pool_put() - put an FR descriptor back in the free list
433  * @pool: Pool the descriptor was allocated from.
434  * @desc: Pointer to an array of fast registration descriptor pointers.
435  * @n:    Number of descriptors to put back.
436  *
437  * Note: The caller must already have queued an invalidation request for
438  * desc->mr->rkey before calling this function.
439  */
440 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
441 			    int n)
442 {
443 	unsigned long flags;
444 	int i;
445 
446 	spin_lock_irqsave(&pool->lock, flags);
447 	for (i = 0; i < n; i++)
448 		list_add(&desc[i]->entry, &pool->free_list);
449 	spin_unlock_irqrestore(&pool->lock, flags);
450 }
451 
452 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
453 {
454 	struct srp_device *dev = target->srp_host->srp_dev;
455 
456 	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
457 				  dev->max_pages_per_mr);
458 }
459 
460 /**
461  * srp_destroy_qp() - destroy an RDMA queue pair
462  * @qp: RDMA queue pair.
463  *
464  * Drain the qp before destroying it.  This avoids that the receive
465  * completion handler can access the queue pair while it is
466  * being destroyed.
467  */
468 static void srp_destroy_qp(struct ib_qp *qp)
469 {
470 	ib_drain_rq(qp);
471 	ib_destroy_qp(qp);
472 }
473 
474 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
475 {
476 	struct srp_target_port *target = ch->target;
477 	struct srp_device *dev = target->srp_host->srp_dev;
478 	struct ib_qp_init_attr *init_attr;
479 	struct ib_cq *recv_cq, *send_cq;
480 	struct ib_qp *qp;
481 	struct ib_fmr_pool *fmr_pool = NULL;
482 	struct srp_fr_pool *fr_pool = NULL;
483 	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
484 	int ret;
485 
486 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
487 	if (!init_attr)
488 		return -ENOMEM;
489 
490 	/* queue_size + 1 for ib_drain_rq() */
491 	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
492 				ch->comp_vector, IB_POLL_SOFTIRQ);
493 	if (IS_ERR(recv_cq)) {
494 		ret = PTR_ERR(recv_cq);
495 		goto err;
496 	}
497 
498 	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
499 				ch->comp_vector, IB_POLL_DIRECT);
500 	if (IS_ERR(send_cq)) {
501 		ret = PTR_ERR(send_cq);
502 		goto err_recv_cq;
503 	}
504 
505 	init_attr->event_handler       = srp_qp_event;
506 	init_attr->cap.max_send_wr     = m * target->queue_size;
507 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
508 	init_attr->cap.max_recv_sge    = 1;
509 	init_attr->cap.max_send_sge    = 1;
510 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
511 	init_attr->qp_type             = IB_QPT_RC;
512 	init_attr->send_cq             = send_cq;
513 	init_attr->recv_cq             = recv_cq;
514 
515 	qp = ib_create_qp(dev->pd, init_attr);
516 	if (IS_ERR(qp)) {
517 		ret = PTR_ERR(qp);
518 		goto err_send_cq;
519 	}
520 
521 	ret = srp_init_qp(target, qp);
522 	if (ret)
523 		goto err_qp;
524 
525 	if (dev->use_fast_reg) {
526 		fr_pool = srp_alloc_fr_pool(target);
527 		if (IS_ERR(fr_pool)) {
528 			ret = PTR_ERR(fr_pool);
529 			shost_printk(KERN_WARNING, target->scsi_host, PFX
530 				     "FR pool allocation failed (%d)\n", ret);
531 			goto err_qp;
532 		}
533 	} else if (dev->use_fmr) {
534 		fmr_pool = srp_alloc_fmr_pool(target);
535 		if (IS_ERR(fmr_pool)) {
536 			ret = PTR_ERR(fmr_pool);
537 			shost_printk(KERN_WARNING, target->scsi_host, PFX
538 				     "FMR pool allocation failed (%d)\n", ret);
539 			goto err_qp;
540 		}
541 	}
542 
543 	if (ch->qp)
544 		srp_destroy_qp(ch->qp);
545 	if (ch->recv_cq)
546 		ib_free_cq(ch->recv_cq);
547 	if (ch->send_cq)
548 		ib_free_cq(ch->send_cq);
549 
550 	ch->qp = qp;
551 	ch->recv_cq = recv_cq;
552 	ch->send_cq = send_cq;
553 
554 	if (dev->use_fast_reg) {
555 		if (ch->fr_pool)
556 			srp_destroy_fr_pool(ch->fr_pool);
557 		ch->fr_pool = fr_pool;
558 	} else if (dev->use_fmr) {
559 		if (ch->fmr_pool)
560 			ib_destroy_fmr_pool(ch->fmr_pool);
561 		ch->fmr_pool = fmr_pool;
562 	}
563 
564 	kfree(init_attr);
565 	return 0;
566 
567 err_qp:
568 	srp_destroy_qp(qp);
569 
570 err_send_cq:
571 	ib_free_cq(send_cq);
572 
573 err_recv_cq:
574 	ib_free_cq(recv_cq);
575 
576 err:
577 	kfree(init_attr);
578 	return ret;
579 }
580 
581 /*
582  * Note: this function may be called without srp_alloc_iu_bufs() having been
583  * invoked. Hence the ch->[rt]x_ring checks.
584  */
585 static void srp_free_ch_ib(struct srp_target_port *target,
586 			   struct srp_rdma_ch *ch)
587 {
588 	struct srp_device *dev = target->srp_host->srp_dev;
589 	int i;
590 
591 	if (!ch->target)
592 		return;
593 
594 	if (ch->cm_id) {
595 		ib_destroy_cm_id(ch->cm_id);
596 		ch->cm_id = NULL;
597 	}
598 
599 	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
600 	if (!ch->qp)
601 		return;
602 
603 	if (dev->use_fast_reg) {
604 		if (ch->fr_pool)
605 			srp_destroy_fr_pool(ch->fr_pool);
606 	} else if (dev->use_fmr) {
607 		if (ch->fmr_pool)
608 			ib_destroy_fmr_pool(ch->fmr_pool);
609 	}
610 
611 	srp_destroy_qp(ch->qp);
612 	ib_free_cq(ch->send_cq);
613 	ib_free_cq(ch->recv_cq);
614 
615 	/*
616 	 * Avoid that the SCSI error handler tries to use this channel after
617 	 * it has been freed. The SCSI error handler can namely continue
618 	 * trying to perform recovery actions after scsi_remove_host()
619 	 * returned.
620 	 */
621 	ch->target = NULL;
622 
623 	ch->qp = NULL;
624 	ch->send_cq = ch->recv_cq = NULL;
625 
626 	if (ch->rx_ring) {
627 		for (i = 0; i < target->queue_size; ++i)
628 			srp_free_iu(target->srp_host, ch->rx_ring[i]);
629 		kfree(ch->rx_ring);
630 		ch->rx_ring = NULL;
631 	}
632 	if (ch->tx_ring) {
633 		for (i = 0; i < target->queue_size; ++i)
634 			srp_free_iu(target->srp_host, ch->tx_ring[i]);
635 		kfree(ch->tx_ring);
636 		ch->tx_ring = NULL;
637 	}
638 }
639 
640 static void srp_path_rec_completion(int status,
641 				    struct ib_sa_path_rec *pathrec,
642 				    void *ch_ptr)
643 {
644 	struct srp_rdma_ch *ch = ch_ptr;
645 	struct srp_target_port *target = ch->target;
646 
647 	ch->status = status;
648 	if (status)
649 		shost_printk(KERN_ERR, target->scsi_host,
650 			     PFX "Got failed path rec status %d\n", status);
651 	else
652 		ch->path = *pathrec;
653 	complete(&ch->done);
654 }
655 
656 static int srp_lookup_path(struct srp_rdma_ch *ch)
657 {
658 	struct srp_target_port *target = ch->target;
659 	int ret;
660 
661 	ch->path.numb_path = 1;
662 
663 	init_completion(&ch->done);
664 
665 	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
666 					       target->srp_host->srp_dev->dev,
667 					       target->srp_host->port,
668 					       &ch->path,
669 					       IB_SA_PATH_REC_SERVICE_ID |
670 					       IB_SA_PATH_REC_DGID	 |
671 					       IB_SA_PATH_REC_SGID	 |
672 					       IB_SA_PATH_REC_NUMB_PATH	 |
673 					       IB_SA_PATH_REC_PKEY,
674 					       SRP_PATH_REC_TIMEOUT_MS,
675 					       GFP_KERNEL,
676 					       srp_path_rec_completion,
677 					       ch, &ch->path_query);
678 	if (ch->path_query_id < 0)
679 		return ch->path_query_id;
680 
681 	ret = wait_for_completion_interruptible(&ch->done);
682 	if (ret < 0)
683 		return ret;
684 
685 	if (ch->status < 0)
686 		shost_printk(KERN_WARNING, target->scsi_host,
687 			     PFX "Path record query failed\n");
688 
689 	return ch->status;
690 }
691 
692 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
693 {
694 	struct srp_target_port *target = ch->target;
695 	struct {
696 		struct ib_cm_req_param param;
697 		struct srp_login_req   priv;
698 	} *req = NULL;
699 	int status;
700 
701 	req = kzalloc(sizeof *req, GFP_KERNEL);
702 	if (!req)
703 		return -ENOMEM;
704 
705 	req->param.primary_path		      = &ch->path;
706 	req->param.alternate_path 	      = NULL;
707 	req->param.service_id 		      = target->service_id;
708 	req->param.qp_num		      = ch->qp->qp_num;
709 	req->param.qp_type		      = ch->qp->qp_type;
710 	req->param.private_data 	      = &req->priv;
711 	req->param.private_data_len 	      = sizeof req->priv;
712 	req->param.flow_control 	      = 1;
713 
714 	get_random_bytes(&req->param.starting_psn, 4);
715 	req->param.starting_psn 	     &= 0xffffff;
716 
717 	/*
718 	 * Pick some arbitrary defaults here; we could make these
719 	 * module parameters if anyone cared about setting them.
720 	 */
721 	req->param.responder_resources	      = 4;
722 	req->param.remote_cm_response_timeout = 20;
723 	req->param.local_cm_response_timeout  = 20;
724 	req->param.retry_count                = target->tl_retry_count;
725 	req->param.rnr_retry_count 	      = 7;
726 	req->param.max_cm_retries 	      = 15;
727 
728 	req->priv.opcode     	= SRP_LOGIN_REQ;
729 	req->priv.tag        	= 0;
730 	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
731 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
732 					      SRP_BUF_FORMAT_INDIRECT);
733 	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
734 				   SRP_MULTICHAN_SINGLE);
735 	/*
736 	 * In the published SRP specification (draft rev. 16a), the
737 	 * port identifier format is 8 bytes of ID extension followed
738 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
739 	 * opposite order, so that the GUID comes first.
740 	 *
741 	 * Targets conforming to these obsolete drafts can be
742 	 * recognized by the I/O Class they report.
743 	 */
744 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
745 		memcpy(req->priv.initiator_port_id,
746 		       &target->sgid.global.interface_id, 8);
747 		memcpy(req->priv.initiator_port_id + 8,
748 		       &target->initiator_ext, 8);
749 		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
750 		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
751 	} else {
752 		memcpy(req->priv.initiator_port_id,
753 		       &target->initiator_ext, 8);
754 		memcpy(req->priv.initiator_port_id + 8,
755 		       &target->sgid.global.interface_id, 8);
756 		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
757 		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
758 	}
759 
760 	/*
761 	 * Topspin/Cisco SRP targets will reject our login unless we
762 	 * zero out the first 8 bytes of our initiator port ID and set
763 	 * the second 8 bytes to the local node GUID.
764 	 */
765 	if (srp_target_is_topspin(target)) {
766 		shost_printk(KERN_DEBUG, target->scsi_host,
767 			     PFX "Topspin/Cisco initiator port ID workaround "
768 			     "activated for target GUID %016llx\n",
769 			     be64_to_cpu(target->ioc_guid));
770 		memset(req->priv.initiator_port_id, 0, 8);
771 		memcpy(req->priv.initiator_port_id + 8,
772 		       &target->srp_host->srp_dev->dev->node_guid, 8);
773 	}
774 
775 	status = ib_send_cm_req(ch->cm_id, &req->param);
776 
777 	kfree(req);
778 
779 	return status;
780 }
781 
782 static bool srp_queue_remove_work(struct srp_target_port *target)
783 {
784 	bool changed = false;
785 
786 	spin_lock_irq(&target->lock);
787 	if (target->state != SRP_TARGET_REMOVED) {
788 		target->state = SRP_TARGET_REMOVED;
789 		changed = true;
790 	}
791 	spin_unlock_irq(&target->lock);
792 
793 	if (changed)
794 		queue_work(srp_remove_wq, &target->remove_work);
795 
796 	return changed;
797 }
798 
799 static void srp_disconnect_target(struct srp_target_port *target)
800 {
801 	struct srp_rdma_ch *ch;
802 	int i;
803 
804 	/* XXX should send SRP_I_LOGOUT request */
805 
806 	for (i = 0; i < target->ch_count; i++) {
807 		ch = &target->ch[i];
808 		ch->connected = false;
809 		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
810 			shost_printk(KERN_DEBUG, target->scsi_host,
811 				     PFX "Sending CM DREQ failed\n");
812 		}
813 	}
814 }
815 
816 static void srp_free_req_data(struct srp_target_port *target,
817 			      struct srp_rdma_ch *ch)
818 {
819 	struct srp_device *dev = target->srp_host->srp_dev;
820 	struct ib_device *ibdev = dev->dev;
821 	struct srp_request *req;
822 	int i;
823 
824 	if (!ch->req_ring)
825 		return;
826 
827 	for (i = 0; i < target->req_ring_size; ++i) {
828 		req = &ch->req_ring[i];
829 		if (dev->use_fast_reg) {
830 			kfree(req->fr_list);
831 		} else {
832 			kfree(req->fmr_list);
833 			kfree(req->map_page);
834 		}
835 		if (req->indirect_dma_addr) {
836 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
837 					    target->indirect_size,
838 					    DMA_TO_DEVICE);
839 		}
840 		kfree(req->indirect_desc);
841 	}
842 
843 	kfree(ch->req_ring);
844 	ch->req_ring = NULL;
845 }
846 
847 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
848 {
849 	struct srp_target_port *target = ch->target;
850 	struct srp_device *srp_dev = target->srp_host->srp_dev;
851 	struct ib_device *ibdev = srp_dev->dev;
852 	struct srp_request *req;
853 	void *mr_list;
854 	dma_addr_t dma_addr;
855 	int i, ret = -ENOMEM;
856 
857 	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
858 			       GFP_KERNEL);
859 	if (!ch->req_ring)
860 		goto out;
861 
862 	for (i = 0; i < target->req_ring_size; ++i) {
863 		req = &ch->req_ring[i];
864 		mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
865 				  GFP_KERNEL);
866 		if (!mr_list)
867 			goto out;
868 		if (srp_dev->use_fast_reg) {
869 			req->fr_list = mr_list;
870 		} else {
871 			req->fmr_list = mr_list;
872 			req->map_page = kmalloc(srp_dev->max_pages_per_mr *
873 						sizeof(void *), GFP_KERNEL);
874 			if (!req->map_page)
875 				goto out;
876 		}
877 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
878 		if (!req->indirect_desc)
879 			goto out;
880 
881 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
882 					     target->indirect_size,
883 					     DMA_TO_DEVICE);
884 		if (ib_dma_mapping_error(ibdev, dma_addr))
885 			goto out;
886 
887 		req->indirect_dma_addr = dma_addr;
888 	}
889 	ret = 0;
890 
891 out:
892 	return ret;
893 }
894 
895 /**
896  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
897  * @shost: SCSI host whose attributes to remove from sysfs.
898  *
899  * Note: Any attributes defined in the host template and that did not exist
900  * before invocation of this function will be ignored.
901  */
902 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
903 {
904 	struct device_attribute **attr;
905 
906 	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
907 		device_remove_file(&shost->shost_dev, *attr);
908 }
909 
910 static void srp_remove_target(struct srp_target_port *target)
911 {
912 	struct srp_rdma_ch *ch;
913 	int i;
914 
915 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
916 
917 	srp_del_scsi_host_attr(target->scsi_host);
918 	srp_rport_get(target->rport);
919 	srp_remove_host(target->scsi_host);
920 	scsi_remove_host(target->scsi_host);
921 	srp_stop_rport_timers(target->rport);
922 	srp_disconnect_target(target);
923 	for (i = 0; i < target->ch_count; i++) {
924 		ch = &target->ch[i];
925 		srp_free_ch_ib(target, ch);
926 	}
927 	cancel_work_sync(&target->tl_err_work);
928 	srp_rport_put(target->rport);
929 	for (i = 0; i < target->ch_count; i++) {
930 		ch = &target->ch[i];
931 		srp_free_req_data(target, ch);
932 	}
933 	kfree(target->ch);
934 	target->ch = NULL;
935 
936 	spin_lock(&target->srp_host->target_lock);
937 	list_del(&target->list);
938 	spin_unlock(&target->srp_host->target_lock);
939 
940 	scsi_host_put(target->scsi_host);
941 }
942 
943 static void srp_remove_work(struct work_struct *work)
944 {
945 	struct srp_target_port *target =
946 		container_of(work, struct srp_target_port, remove_work);
947 
948 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
949 
950 	srp_remove_target(target);
951 }
952 
953 static void srp_rport_delete(struct srp_rport *rport)
954 {
955 	struct srp_target_port *target = rport->lld_data;
956 
957 	srp_queue_remove_work(target);
958 }
959 
960 /**
961  * srp_connected_ch() - number of connected channels
962  * @target: SRP target port.
963  */
964 static int srp_connected_ch(struct srp_target_port *target)
965 {
966 	int i, c = 0;
967 
968 	for (i = 0; i < target->ch_count; i++)
969 		c += target->ch[i].connected;
970 
971 	return c;
972 }
973 
974 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
975 {
976 	struct srp_target_port *target = ch->target;
977 	int ret;
978 
979 	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
980 
981 	ret = srp_lookup_path(ch);
982 	if (ret)
983 		goto out;
984 
985 	while (1) {
986 		init_completion(&ch->done);
987 		ret = srp_send_req(ch, multich);
988 		if (ret)
989 			goto out;
990 		ret = wait_for_completion_interruptible(&ch->done);
991 		if (ret < 0)
992 			goto out;
993 
994 		/*
995 		 * The CM event handling code will set status to
996 		 * SRP_PORT_REDIRECT if we get a port redirect REJ
997 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
998 		 * redirect REJ back.
999 		 */
1000 		ret = ch->status;
1001 		switch (ret) {
1002 		case 0:
1003 			ch->connected = true;
1004 			goto out;
1005 
1006 		case SRP_PORT_REDIRECT:
1007 			ret = srp_lookup_path(ch);
1008 			if (ret)
1009 				goto out;
1010 			break;
1011 
1012 		case SRP_DLID_REDIRECT:
1013 			break;
1014 
1015 		case SRP_STALE_CONN:
1016 			shost_printk(KERN_ERR, target->scsi_host, PFX
1017 				     "giving up on stale connection\n");
1018 			ret = -ECONNRESET;
1019 			goto out;
1020 
1021 		default:
1022 			goto out;
1023 		}
1024 	}
1025 
1026 out:
1027 	return ret <= 0 ? ret : -ENODEV;
1028 }
1029 
1030 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1031 {
1032 	srp_handle_qp_err(cq, wc, "INV RKEY");
1033 }
1034 
1035 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1036 		u32 rkey)
1037 {
1038 	struct ib_send_wr *bad_wr;
1039 	struct ib_send_wr wr = {
1040 		.opcode		    = IB_WR_LOCAL_INV,
1041 		.next		    = NULL,
1042 		.num_sge	    = 0,
1043 		.send_flags	    = 0,
1044 		.ex.invalidate_rkey = rkey,
1045 	};
1046 
1047 	wr.wr_cqe = &req->reg_cqe;
1048 	req->reg_cqe.done = srp_inv_rkey_err_done;
1049 	return ib_post_send(ch->qp, &wr, &bad_wr);
1050 }
1051 
1052 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1053 			   struct srp_rdma_ch *ch,
1054 			   struct srp_request *req)
1055 {
1056 	struct srp_target_port *target = ch->target;
1057 	struct srp_device *dev = target->srp_host->srp_dev;
1058 	struct ib_device *ibdev = dev->dev;
1059 	int i, res;
1060 
1061 	if (!scsi_sglist(scmnd) ||
1062 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1063 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1064 		return;
1065 
1066 	if (dev->use_fast_reg) {
1067 		struct srp_fr_desc **pfr;
1068 
1069 		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1070 			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1071 			if (res < 0) {
1072 				shost_printk(KERN_ERR, target->scsi_host, PFX
1073 				  "Queueing INV WR for rkey %#x failed (%d)\n",
1074 				  (*pfr)->mr->rkey, res);
1075 				queue_work(system_long_wq,
1076 					   &target->tl_err_work);
1077 			}
1078 		}
1079 		if (req->nmdesc)
1080 			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1081 					req->nmdesc);
1082 	} else if (dev->use_fmr) {
1083 		struct ib_pool_fmr **pfmr;
1084 
1085 		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1086 			ib_fmr_pool_unmap(*pfmr);
1087 	}
1088 
1089 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1090 			scmnd->sc_data_direction);
1091 }
1092 
1093 /**
1094  * srp_claim_req - Take ownership of the scmnd associated with a request.
1095  * @ch: SRP RDMA channel.
1096  * @req: SRP request.
1097  * @sdev: If not NULL, only take ownership for this SCSI device.
1098  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1099  *         ownership of @req->scmnd if it equals @scmnd.
1100  *
1101  * Return value:
1102  * Either NULL or a pointer to the SCSI command the caller became owner of.
1103  */
1104 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1105 				       struct srp_request *req,
1106 				       struct scsi_device *sdev,
1107 				       struct scsi_cmnd *scmnd)
1108 {
1109 	unsigned long flags;
1110 
1111 	spin_lock_irqsave(&ch->lock, flags);
1112 	if (req->scmnd &&
1113 	    (!sdev || req->scmnd->device == sdev) &&
1114 	    (!scmnd || req->scmnd == scmnd)) {
1115 		scmnd = req->scmnd;
1116 		req->scmnd = NULL;
1117 	} else {
1118 		scmnd = NULL;
1119 	}
1120 	spin_unlock_irqrestore(&ch->lock, flags);
1121 
1122 	return scmnd;
1123 }
1124 
1125 /**
1126  * srp_free_req() - Unmap data and adjust ch->req_lim.
1127  * @ch:     SRP RDMA channel.
1128  * @req:    Request to be freed.
1129  * @scmnd:  SCSI command associated with @req.
1130  * @req_lim_delta: Amount to be added to @target->req_lim.
1131  */
1132 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1133 			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1134 {
1135 	unsigned long flags;
1136 
1137 	srp_unmap_data(scmnd, ch, req);
1138 
1139 	spin_lock_irqsave(&ch->lock, flags);
1140 	ch->req_lim += req_lim_delta;
1141 	spin_unlock_irqrestore(&ch->lock, flags);
1142 }
1143 
1144 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1145 			   struct scsi_device *sdev, int result)
1146 {
1147 	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1148 
1149 	if (scmnd) {
1150 		srp_free_req(ch, req, scmnd, 0);
1151 		scmnd->result = result;
1152 		scmnd->scsi_done(scmnd);
1153 	}
1154 }
1155 
1156 static void srp_terminate_io(struct srp_rport *rport)
1157 {
1158 	struct srp_target_port *target = rport->lld_data;
1159 	struct srp_rdma_ch *ch;
1160 	struct Scsi_Host *shost = target->scsi_host;
1161 	struct scsi_device *sdev;
1162 	int i, j;
1163 
1164 	/*
1165 	 * Invoking srp_terminate_io() while srp_queuecommand() is running
1166 	 * is not safe. Hence the warning statement below.
1167 	 */
1168 	shost_for_each_device(sdev, shost)
1169 		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1170 
1171 	for (i = 0; i < target->ch_count; i++) {
1172 		ch = &target->ch[i];
1173 
1174 		for (j = 0; j < target->req_ring_size; ++j) {
1175 			struct srp_request *req = &ch->req_ring[j];
1176 
1177 			srp_finish_req(ch, req, NULL,
1178 				       DID_TRANSPORT_FAILFAST << 16);
1179 		}
1180 	}
1181 }
1182 
1183 /*
1184  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1185  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1186  * srp_reset_device() or srp_reset_host() calls will occur while this function
1187  * is in progress. One way to realize that is not to call this function
1188  * directly but to call srp_reconnect_rport() instead since that last function
1189  * serializes calls of this function via rport->mutex and also blocks
1190  * srp_queuecommand() calls before invoking this function.
1191  */
1192 static int srp_rport_reconnect(struct srp_rport *rport)
1193 {
1194 	struct srp_target_port *target = rport->lld_data;
1195 	struct srp_rdma_ch *ch;
1196 	int i, j, ret = 0;
1197 	bool multich = false;
1198 
1199 	srp_disconnect_target(target);
1200 
1201 	if (target->state == SRP_TARGET_SCANNING)
1202 		return -ENODEV;
1203 
1204 	/*
1205 	 * Now get a new local CM ID so that we avoid confusing the target in
1206 	 * case things are really fouled up. Doing so also ensures that all CM
1207 	 * callbacks will have finished before a new QP is allocated.
1208 	 */
1209 	for (i = 0; i < target->ch_count; i++) {
1210 		ch = &target->ch[i];
1211 		ret += srp_new_cm_id(ch);
1212 	}
1213 	for (i = 0; i < target->ch_count; i++) {
1214 		ch = &target->ch[i];
1215 		for (j = 0; j < target->req_ring_size; ++j) {
1216 			struct srp_request *req = &ch->req_ring[j];
1217 
1218 			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1219 		}
1220 	}
1221 	for (i = 0; i < target->ch_count; i++) {
1222 		ch = &target->ch[i];
1223 		/*
1224 		 * Whether or not creating a new CM ID succeeded, create a new
1225 		 * QP. This guarantees that all completion callback function
1226 		 * invocations have finished before request resetting starts.
1227 		 */
1228 		ret += srp_create_ch_ib(ch);
1229 
1230 		INIT_LIST_HEAD(&ch->free_tx);
1231 		for (j = 0; j < target->queue_size; ++j)
1232 			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1233 	}
1234 
1235 	target->qp_in_error = false;
1236 
1237 	for (i = 0; i < target->ch_count; i++) {
1238 		ch = &target->ch[i];
1239 		if (ret)
1240 			break;
1241 		ret = srp_connect_ch(ch, multich);
1242 		multich = true;
1243 	}
1244 
1245 	if (ret == 0)
1246 		shost_printk(KERN_INFO, target->scsi_host,
1247 			     PFX "reconnect succeeded\n");
1248 
1249 	return ret;
1250 }
1251 
1252 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1253 			 unsigned int dma_len, u32 rkey)
1254 {
1255 	struct srp_direct_buf *desc = state->desc;
1256 
1257 	WARN_ON_ONCE(!dma_len);
1258 
1259 	desc->va = cpu_to_be64(dma_addr);
1260 	desc->key = cpu_to_be32(rkey);
1261 	desc->len = cpu_to_be32(dma_len);
1262 
1263 	state->total_len += dma_len;
1264 	state->desc++;
1265 	state->ndesc++;
1266 }
1267 
1268 static int srp_map_finish_fmr(struct srp_map_state *state,
1269 			      struct srp_rdma_ch *ch)
1270 {
1271 	struct srp_target_port *target = ch->target;
1272 	struct srp_device *dev = target->srp_host->srp_dev;
1273 	struct ib_pd *pd = target->pd;
1274 	struct ib_pool_fmr *fmr;
1275 	u64 io_addr = 0;
1276 
1277 	if (state->fmr.next >= state->fmr.end) {
1278 		shost_printk(KERN_ERR, ch->target->scsi_host,
1279 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1280 			     ch->target->mr_per_cmd);
1281 		return -ENOMEM;
1282 	}
1283 
1284 	WARN_ON_ONCE(!dev->use_fmr);
1285 
1286 	if (state->npages == 0)
1287 		return 0;
1288 
1289 	if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1290 		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1291 			     pd->unsafe_global_rkey);
1292 		goto reset_state;
1293 	}
1294 
1295 	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1296 				   state->npages, io_addr);
1297 	if (IS_ERR(fmr))
1298 		return PTR_ERR(fmr);
1299 
1300 	*state->fmr.next++ = fmr;
1301 	state->nmdesc++;
1302 
1303 	srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1304 		     state->dma_len, fmr->fmr->rkey);
1305 
1306 reset_state:
1307 	state->npages = 0;
1308 	state->dma_len = 0;
1309 
1310 	return 0;
1311 }
1312 
1313 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1314 {
1315 	srp_handle_qp_err(cq, wc, "FAST REG");
1316 }
1317 
1318 /*
1319  * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1320  * where to start in the first element. If sg_offset_p != NULL then
1321  * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1322  * byte that has not yet been mapped.
1323  */
1324 static int srp_map_finish_fr(struct srp_map_state *state,
1325 			     struct srp_request *req,
1326 			     struct srp_rdma_ch *ch, int sg_nents,
1327 			     unsigned int *sg_offset_p)
1328 {
1329 	struct srp_target_port *target = ch->target;
1330 	struct srp_device *dev = target->srp_host->srp_dev;
1331 	struct ib_pd *pd = target->pd;
1332 	struct ib_send_wr *bad_wr;
1333 	struct ib_reg_wr wr;
1334 	struct srp_fr_desc *desc;
1335 	u32 rkey;
1336 	int n, err;
1337 
1338 	if (state->fr.next >= state->fr.end) {
1339 		shost_printk(KERN_ERR, ch->target->scsi_host,
1340 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1341 			     ch->target->mr_per_cmd);
1342 		return -ENOMEM;
1343 	}
1344 
1345 	WARN_ON_ONCE(!dev->use_fast_reg);
1346 
1347 	if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1348 		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1349 
1350 		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1351 			     sg_dma_len(state->sg) - sg_offset,
1352 			     pd->unsafe_global_rkey);
1353 		if (sg_offset_p)
1354 			*sg_offset_p = 0;
1355 		return 1;
1356 	}
1357 
1358 	desc = srp_fr_pool_get(ch->fr_pool);
1359 	if (!desc)
1360 		return -ENOMEM;
1361 
1362 	rkey = ib_inc_rkey(desc->mr->rkey);
1363 	ib_update_fast_reg_key(desc->mr, rkey);
1364 
1365 	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1366 			 dev->mr_page_size);
1367 	if (unlikely(n < 0)) {
1368 		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1369 		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1370 			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1371 			 sg_offset_p ? *sg_offset_p : -1, n);
1372 		return n;
1373 	}
1374 
1375 	WARN_ON_ONCE(desc->mr->length == 0);
1376 
1377 	req->reg_cqe.done = srp_reg_mr_err_done;
1378 
1379 	wr.wr.next = NULL;
1380 	wr.wr.opcode = IB_WR_REG_MR;
1381 	wr.wr.wr_cqe = &req->reg_cqe;
1382 	wr.wr.num_sge = 0;
1383 	wr.wr.send_flags = 0;
1384 	wr.mr = desc->mr;
1385 	wr.key = desc->mr->rkey;
1386 	wr.access = (IB_ACCESS_LOCAL_WRITE |
1387 		     IB_ACCESS_REMOTE_READ |
1388 		     IB_ACCESS_REMOTE_WRITE);
1389 
1390 	*state->fr.next++ = desc;
1391 	state->nmdesc++;
1392 
1393 	srp_map_desc(state, desc->mr->iova,
1394 		     desc->mr->length, desc->mr->rkey);
1395 
1396 	err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1397 	if (unlikely(err)) {
1398 		WARN_ON_ONCE(err == -ENOMEM);
1399 		return err;
1400 	}
1401 
1402 	return n;
1403 }
1404 
1405 static int srp_map_sg_entry(struct srp_map_state *state,
1406 			    struct srp_rdma_ch *ch,
1407 			    struct scatterlist *sg)
1408 {
1409 	struct srp_target_port *target = ch->target;
1410 	struct srp_device *dev = target->srp_host->srp_dev;
1411 	struct ib_device *ibdev = dev->dev;
1412 	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1413 	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1414 	unsigned int len = 0;
1415 	int ret;
1416 
1417 	WARN_ON_ONCE(!dma_len);
1418 
1419 	while (dma_len) {
1420 		unsigned offset = dma_addr & ~dev->mr_page_mask;
1421 
1422 		if (state->npages == dev->max_pages_per_mr ||
1423 		    (state->npages > 0 && offset != 0)) {
1424 			ret = srp_map_finish_fmr(state, ch);
1425 			if (ret)
1426 				return ret;
1427 		}
1428 
1429 		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1430 
1431 		if (!state->npages)
1432 			state->base_dma_addr = dma_addr;
1433 		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1434 		state->dma_len += len;
1435 		dma_addr += len;
1436 		dma_len -= len;
1437 	}
1438 
1439 	/*
1440 	 * If the end of the MR is not on a page boundary then we need to
1441 	 * close it out and start a new one -- we can only merge at page
1442 	 * boundaries.
1443 	 */
1444 	ret = 0;
1445 	if ((dma_addr & ~dev->mr_page_mask) != 0)
1446 		ret = srp_map_finish_fmr(state, ch);
1447 	return ret;
1448 }
1449 
1450 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1451 			  struct srp_request *req, struct scatterlist *scat,
1452 			  int count)
1453 {
1454 	struct scatterlist *sg;
1455 	int i, ret;
1456 
1457 	state->pages = req->map_page;
1458 	state->fmr.next = req->fmr_list;
1459 	state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1460 
1461 	for_each_sg(scat, sg, count, i) {
1462 		ret = srp_map_sg_entry(state, ch, sg);
1463 		if (ret)
1464 			return ret;
1465 	}
1466 
1467 	ret = srp_map_finish_fmr(state, ch);
1468 	if (ret)
1469 		return ret;
1470 
1471 	return 0;
1472 }
1473 
1474 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1475 			 struct srp_request *req, struct scatterlist *scat,
1476 			 int count)
1477 {
1478 	unsigned int sg_offset = 0;
1479 
1480 	state->fr.next = req->fr_list;
1481 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1482 	state->sg = scat;
1483 
1484 	if (count == 0)
1485 		return 0;
1486 
1487 	while (count) {
1488 		int i, n;
1489 
1490 		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1491 		if (unlikely(n < 0))
1492 			return n;
1493 
1494 		count -= n;
1495 		for (i = 0; i < n; i++)
1496 			state->sg = sg_next(state->sg);
1497 	}
1498 
1499 	return 0;
1500 }
1501 
1502 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1503 			  struct srp_request *req, struct scatterlist *scat,
1504 			  int count)
1505 {
1506 	struct srp_target_port *target = ch->target;
1507 	struct srp_device *dev = target->srp_host->srp_dev;
1508 	struct scatterlist *sg;
1509 	int i;
1510 
1511 	for_each_sg(scat, sg, count, i) {
1512 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1513 			     ib_sg_dma_len(dev->dev, sg),
1514 			     target->pd->unsafe_global_rkey);
1515 	}
1516 
1517 	return 0;
1518 }
1519 
1520 /*
1521  * Register the indirect data buffer descriptor with the HCA.
1522  *
1523  * Note: since the indirect data buffer descriptor has been allocated with
1524  * kmalloc() it is guaranteed that this buffer is a physically contiguous
1525  * memory buffer.
1526  */
1527 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1528 		       void **next_mr, void **end_mr, u32 idb_len,
1529 		       __be32 *idb_rkey)
1530 {
1531 	struct srp_target_port *target = ch->target;
1532 	struct srp_device *dev = target->srp_host->srp_dev;
1533 	struct srp_map_state state;
1534 	struct srp_direct_buf idb_desc;
1535 	u64 idb_pages[1];
1536 	struct scatterlist idb_sg[1];
1537 	int ret;
1538 
1539 	memset(&state, 0, sizeof(state));
1540 	memset(&idb_desc, 0, sizeof(idb_desc));
1541 	state.gen.next = next_mr;
1542 	state.gen.end = end_mr;
1543 	state.desc = &idb_desc;
1544 	state.base_dma_addr = req->indirect_dma_addr;
1545 	state.dma_len = idb_len;
1546 
1547 	if (dev->use_fast_reg) {
1548 		state.sg = idb_sg;
1549 		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1550 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1551 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1552 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1553 #endif
1554 		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1555 		if (ret < 0)
1556 			return ret;
1557 		WARN_ON_ONCE(ret < 1);
1558 	} else if (dev->use_fmr) {
1559 		state.pages = idb_pages;
1560 		state.pages[0] = (req->indirect_dma_addr &
1561 				  dev->mr_page_mask);
1562 		state.npages = 1;
1563 		ret = srp_map_finish_fmr(&state, ch);
1564 		if (ret < 0)
1565 			return ret;
1566 	} else {
1567 		return -EINVAL;
1568 	}
1569 
1570 	*idb_rkey = idb_desc.key;
1571 
1572 	return 0;
1573 }
1574 
1575 static void srp_check_mapping(struct srp_map_state *state,
1576 			      struct srp_rdma_ch *ch, struct srp_request *req,
1577 			      struct scatterlist *scat, int count)
1578 {
1579 	struct srp_device *dev = ch->target->srp_host->srp_dev;
1580 	struct srp_fr_desc **pfr;
1581 	u64 desc_len = 0, mr_len = 0;
1582 	int i;
1583 
1584 	for (i = 0; i < state->ndesc; i++)
1585 		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1586 	if (dev->use_fast_reg)
1587 		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1588 			mr_len += (*pfr)->mr->length;
1589 	else if (dev->use_fmr)
1590 		for (i = 0; i < state->nmdesc; i++)
1591 			mr_len += be32_to_cpu(req->indirect_desc[i].len);
1592 	if (desc_len != scsi_bufflen(req->scmnd) ||
1593 	    mr_len > scsi_bufflen(req->scmnd))
1594 		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1595 		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1596 		       state->ndesc, state->nmdesc);
1597 }
1598 
1599 /**
1600  * srp_map_data() - map SCSI data buffer onto an SRP request
1601  * @scmnd: SCSI command to map
1602  * @ch: SRP RDMA channel
1603  * @req: SRP request
1604  *
1605  * Returns the length in bytes of the SRP_CMD IU or a negative value if
1606  * mapping failed.
1607  */
1608 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1609 			struct srp_request *req)
1610 {
1611 	struct srp_target_port *target = ch->target;
1612 	struct ib_pd *pd = target->pd;
1613 	struct scatterlist *scat;
1614 	struct srp_cmd *cmd = req->cmd->buf;
1615 	int len, nents, count, ret;
1616 	struct srp_device *dev;
1617 	struct ib_device *ibdev;
1618 	struct srp_map_state state;
1619 	struct srp_indirect_buf *indirect_hdr;
1620 	u32 idb_len, table_len;
1621 	__be32 idb_rkey;
1622 	u8 fmt;
1623 
1624 	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1625 		return sizeof (struct srp_cmd);
1626 
1627 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1628 	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1629 		shost_printk(KERN_WARNING, target->scsi_host,
1630 			     PFX "Unhandled data direction %d\n",
1631 			     scmnd->sc_data_direction);
1632 		return -EINVAL;
1633 	}
1634 
1635 	nents = scsi_sg_count(scmnd);
1636 	scat  = scsi_sglist(scmnd);
1637 
1638 	dev = target->srp_host->srp_dev;
1639 	ibdev = dev->dev;
1640 
1641 	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1642 	if (unlikely(count == 0))
1643 		return -EIO;
1644 
1645 	fmt = SRP_DATA_DESC_DIRECT;
1646 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
1647 
1648 	if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1649 		/*
1650 		 * The midlayer only generated a single gather/scatter
1651 		 * entry, or DMA mapping coalesced everything to a
1652 		 * single entry.  So a direct descriptor along with
1653 		 * the DMA MR suffices.
1654 		 */
1655 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1656 
1657 		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1658 		buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1659 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1660 
1661 		req->nmdesc = 0;
1662 		goto map_complete;
1663 	}
1664 
1665 	/*
1666 	 * We have more than one scatter/gather entry, so build our indirect
1667 	 * descriptor table, trying to merge as many entries as we can.
1668 	 */
1669 	indirect_hdr = (void *) cmd->add_data;
1670 
1671 	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1672 				   target->indirect_size, DMA_TO_DEVICE);
1673 
1674 	memset(&state, 0, sizeof(state));
1675 	state.desc = req->indirect_desc;
1676 	if (dev->use_fast_reg)
1677 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1678 	else if (dev->use_fmr)
1679 		ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1680 	else
1681 		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1682 	req->nmdesc = state.nmdesc;
1683 	if (ret < 0)
1684 		goto unmap;
1685 
1686 	{
1687 		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1688 			"Memory mapping consistency check");
1689 		if (DYNAMIC_DEBUG_BRANCH(ddm))
1690 			srp_check_mapping(&state, ch, req, scat, count);
1691 	}
1692 
1693 	/* We've mapped the request, now pull as much of the indirect
1694 	 * descriptor table as we can into the command buffer. If this
1695 	 * target is not using an external indirect table, we are
1696 	 * guaranteed to fit into the command, as the SCSI layer won't
1697 	 * give us more S/G entries than we allow.
1698 	 */
1699 	if (state.ndesc == 1) {
1700 		/*
1701 		 * Memory registration collapsed the sg-list into one entry,
1702 		 * so use a direct descriptor.
1703 		 */
1704 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1705 
1706 		*buf = req->indirect_desc[0];
1707 		goto map_complete;
1708 	}
1709 
1710 	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1711 						!target->allow_ext_sg)) {
1712 		shost_printk(KERN_ERR, target->scsi_host,
1713 			     "Could not fit S/G list into SRP_CMD\n");
1714 		ret = -EIO;
1715 		goto unmap;
1716 	}
1717 
1718 	count = min(state.ndesc, target->cmd_sg_cnt);
1719 	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1720 	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1721 
1722 	fmt = SRP_DATA_DESC_INDIRECT;
1723 	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1724 	len += count * sizeof (struct srp_direct_buf);
1725 
1726 	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1727 	       count * sizeof (struct srp_direct_buf));
1728 
1729 	if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1730 		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1731 				  idb_len, &idb_rkey);
1732 		if (ret < 0)
1733 			goto unmap;
1734 		req->nmdesc++;
1735 	} else {
1736 		idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1737 	}
1738 
1739 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1740 	indirect_hdr->table_desc.key = idb_rkey;
1741 	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1742 	indirect_hdr->len = cpu_to_be32(state.total_len);
1743 
1744 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1745 		cmd->data_out_desc_cnt = count;
1746 	else
1747 		cmd->data_in_desc_cnt = count;
1748 
1749 	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1750 				      DMA_TO_DEVICE);
1751 
1752 map_complete:
1753 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1754 		cmd->buf_fmt = fmt << 4;
1755 	else
1756 		cmd->buf_fmt = fmt;
1757 
1758 	return len;
1759 
1760 unmap:
1761 	srp_unmap_data(scmnd, ch, req);
1762 	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1763 		ret = -E2BIG;
1764 	return ret;
1765 }
1766 
1767 /*
1768  * Return an IU and possible credit to the free pool
1769  */
1770 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1771 			  enum srp_iu_type iu_type)
1772 {
1773 	unsigned long flags;
1774 
1775 	spin_lock_irqsave(&ch->lock, flags);
1776 	list_add(&iu->list, &ch->free_tx);
1777 	if (iu_type != SRP_IU_RSP)
1778 		++ch->req_lim;
1779 	spin_unlock_irqrestore(&ch->lock, flags);
1780 }
1781 
1782 /*
1783  * Must be called with ch->lock held to protect req_lim and free_tx.
1784  * If IU is not sent, it must be returned using srp_put_tx_iu().
1785  *
1786  * Note:
1787  * An upper limit for the number of allocated information units for each
1788  * request type is:
1789  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1790  *   more than Scsi_Host.can_queue requests.
1791  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1792  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1793  *   one unanswered SRP request to an initiator.
1794  */
1795 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1796 				      enum srp_iu_type iu_type)
1797 {
1798 	struct srp_target_port *target = ch->target;
1799 	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1800 	struct srp_iu *iu;
1801 
1802 	ib_process_cq_direct(ch->send_cq, -1);
1803 
1804 	if (list_empty(&ch->free_tx))
1805 		return NULL;
1806 
1807 	/* Initiator responses to target requests do not consume credits */
1808 	if (iu_type != SRP_IU_RSP) {
1809 		if (ch->req_lim <= rsv) {
1810 			++target->zero_req_lim;
1811 			return NULL;
1812 		}
1813 
1814 		--ch->req_lim;
1815 	}
1816 
1817 	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1818 	list_del(&iu->list);
1819 	return iu;
1820 }
1821 
1822 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1823 {
1824 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1825 	struct srp_rdma_ch *ch = cq->cq_context;
1826 
1827 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1828 		srp_handle_qp_err(cq, wc, "SEND");
1829 		return;
1830 	}
1831 
1832 	list_add(&iu->list, &ch->free_tx);
1833 }
1834 
1835 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1836 {
1837 	struct srp_target_port *target = ch->target;
1838 	struct ib_sge list;
1839 	struct ib_send_wr wr, *bad_wr;
1840 
1841 	list.addr   = iu->dma;
1842 	list.length = len;
1843 	list.lkey   = target->lkey;
1844 
1845 	iu->cqe.done = srp_send_done;
1846 
1847 	wr.next       = NULL;
1848 	wr.wr_cqe     = &iu->cqe;
1849 	wr.sg_list    = &list;
1850 	wr.num_sge    = 1;
1851 	wr.opcode     = IB_WR_SEND;
1852 	wr.send_flags = IB_SEND_SIGNALED;
1853 
1854 	return ib_post_send(ch->qp, &wr, &bad_wr);
1855 }
1856 
1857 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1858 {
1859 	struct srp_target_port *target = ch->target;
1860 	struct ib_recv_wr wr, *bad_wr;
1861 	struct ib_sge list;
1862 
1863 	list.addr   = iu->dma;
1864 	list.length = iu->size;
1865 	list.lkey   = target->lkey;
1866 
1867 	iu->cqe.done = srp_recv_done;
1868 
1869 	wr.next     = NULL;
1870 	wr.wr_cqe   = &iu->cqe;
1871 	wr.sg_list  = &list;
1872 	wr.num_sge  = 1;
1873 
1874 	return ib_post_recv(ch->qp, &wr, &bad_wr);
1875 }
1876 
1877 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1878 {
1879 	struct srp_target_port *target = ch->target;
1880 	struct srp_request *req;
1881 	struct scsi_cmnd *scmnd;
1882 	unsigned long flags;
1883 
1884 	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1885 		spin_lock_irqsave(&ch->lock, flags);
1886 		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1887 		spin_unlock_irqrestore(&ch->lock, flags);
1888 
1889 		ch->tsk_mgmt_status = -1;
1890 		if (be32_to_cpu(rsp->resp_data_len) >= 4)
1891 			ch->tsk_mgmt_status = rsp->data[3];
1892 		complete(&ch->tsk_mgmt_done);
1893 	} else {
1894 		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1895 		if (scmnd) {
1896 			req = (void *)scmnd->host_scribble;
1897 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1898 		}
1899 		if (!scmnd) {
1900 			shost_printk(KERN_ERR, target->scsi_host,
1901 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1902 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1903 
1904 			spin_lock_irqsave(&ch->lock, flags);
1905 			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1906 			spin_unlock_irqrestore(&ch->lock, flags);
1907 
1908 			return;
1909 		}
1910 		scmnd->result = rsp->status;
1911 
1912 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1913 			memcpy(scmnd->sense_buffer, rsp->data +
1914 			       be32_to_cpu(rsp->resp_data_len),
1915 			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1916 				     SCSI_SENSE_BUFFERSIZE));
1917 		}
1918 
1919 		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1920 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1921 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1922 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1923 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1924 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1925 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1926 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1927 
1928 		srp_free_req(ch, req, scmnd,
1929 			     be32_to_cpu(rsp->req_lim_delta));
1930 
1931 		scmnd->host_scribble = NULL;
1932 		scmnd->scsi_done(scmnd);
1933 	}
1934 }
1935 
1936 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1937 			       void *rsp, int len)
1938 {
1939 	struct srp_target_port *target = ch->target;
1940 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1941 	unsigned long flags;
1942 	struct srp_iu *iu;
1943 	int err;
1944 
1945 	spin_lock_irqsave(&ch->lock, flags);
1946 	ch->req_lim += req_delta;
1947 	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1948 	spin_unlock_irqrestore(&ch->lock, flags);
1949 
1950 	if (!iu) {
1951 		shost_printk(KERN_ERR, target->scsi_host, PFX
1952 			     "no IU available to send response\n");
1953 		return 1;
1954 	}
1955 
1956 	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1957 	memcpy(iu->buf, rsp, len);
1958 	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1959 
1960 	err = srp_post_send(ch, iu, len);
1961 	if (err) {
1962 		shost_printk(KERN_ERR, target->scsi_host, PFX
1963 			     "unable to post response: %d\n", err);
1964 		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1965 	}
1966 
1967 	return err;
1968 }
1969 
1970 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1971 				 struct srp_cred_req *req)
1972 {
1973 	struct srp_cred_rsp rsp = {
1974 		.opcode = SRP_CRED_RSP,
1975 		.tag = req->tag,
1976 	};
1977 	s32 delta = be32_to_cpu(req->req_lim_delta);
1978 
1979 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1980 		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1981 			     "problems processing SRP_CRED_REQ\n");
1982 }
1983 
1984 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1985 				struct srp_aer_req *req)
1986 {
1987 	struct srp_target_port *target = ch->target;
1988 	struct srp_aer_rsp rsp = {
1989 		.opcode = SRP_AER_RSP,
1990 		.tag = req->tag,
1991 	};
1992 	s32 delta = be32_to_cpu(req->req_lim_delta);
1993 
1994 	shost_printk(KERN_ERR, target->scsi_host, PFX
1995 		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1996 
1997 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1998 		shost_printk(KERN_ERR, target->scsi_host, PFX
1999 			     "problems processing SRP_AER_REQ\n");
2000 }
2001 
2002 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2003 {
2004 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2005 	struct srp_rdma_ch *ch = cq->cq_context;
2006 	struct srp_target_port *target = ch->target;
2007 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2008 	int res;
2009 	u8 opcode;
2010 
2011 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2012 		srp_handle_qp_err(cq, wc, "RECV");
2013 		return;
2014 	}
2015 
2016 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2017 				   DMA_FROM_DEVICE);
2018 
2019 	opcode = *(u8 *) iu->buf;
2020 
2021 	if (0) {
2022 		shost_printk(KERN_ERR, target->scsi_host,
2023 			     PFX "recv completion, opcode 0x%02x\n", opcode);
2024 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2025 			       iu->buf, wc->byte_len, true);
2026 	}
2027 
2028 	switch (opcode) {
2029 	case SRP_RSP:
2030 		srp_process_rsp(ch, iu->buf);
2031 		break;
2032 
2033 	case SRP_CRED_REQ:
2034 		srp_process_cred_req(ch, iu->buf);
2035 		break;
2036 
2037 	case SRP_AER_REQ:
2038 		srp_process_aer_req(ch, iu->buf);
2039 		break;
2040 
2041 	case SRP_T_LOGOUT:
2042 		/* XXX Handle target logout */
2043 		shost_printk(KERN_WARNING, target->scsi_host,
2044 			     PFX "Got target logout request\n");
2045 		break;
2046 
2047 	default:
2048 		shost_printk(KERN_WARNING, target->scsi_host,
2049 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2050 		break;
2051 	}
2052 
2053 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2054 				      DMA_FROM_DEVICE);
2055 
2056 	res = srp_post_recv(ch, iu);
2057 	if (res != 0)
2058 		shost_printk(KERN_ERR, target->scsi_host,
2059 			     PFX "Recv failed with error code %d\n", res);
2060 }
2061 
2062 /**
2063  * srp_tl_err_work() - handle a transport layer error
2064  * @work: Work structure embedded in an SRP target port.
2065  *
2066  * Note: This function may get invoked before the rport has been created,
2067  * hence the target->rport test.
2068  */
2069 static void srp_tl_err_work(struct work_struct *work)
2070 {
2071 	struct srp_target_port *target;
2072 
2073 	target = container_of(work, struct srp_target_port, tl_err_work);
2074 	if (target->rport)
2075 		srp_start_tl_fail_timers(target->rport);
2076 }
2077 
2078 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2079 		const char *opname)
2080 {
2081 	struct srp_rdma_ch *ch = cq->cq_context;
2082 	struct srp_target_port *target = ch->target;
2083 
2084 	if (ch->connected && !target->qp_in_error) {
2085 		shost_printk(KERN_ERR, target->scsi_host,
2086 			     PFX "failed %s status %s (%d) for CQE %p\n",
2087 			     opname, ib_wc_status_msg(wc->status), wc->status,
2088 			     wc->wr_cqe);
2089 		queue_work(system_long_wq, &target->tl_err_work);
2090 	}
2091 	target->qp_in_error = true;
2092 }
2093 
2094 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2095 {
2096 	struct srp_target_port *target = host_to_target(shost);
2097 	struct srp_rport *rport = target->rport;
2098 	struct srp_rdma_ch *ch;
2099 	struct srp_request *req;
2100 	struct srp_iu *iu;
2101 	struct srp_cmd *cmd;
2102 	struct ib_device *dev;
2103 	unsigned long flags;
2104 	u32 tag;
2105 	u16 idx;
2106 	int len, ret;
2107 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2108 
2109 	/*
2110 	 * The SCSI EH thread is the only context from which srp_queuecommand()
2111 	 * can get invoked for blocked devices (SDEV_BLOCK /
2112 	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2113 	 * locking the rport mutex if invoked from inside the SCSI EH.
2114 	 */
2115 	if (in_scsi_eh)
2116 		mutex_lock(&rport->mutex);
2117 
2118 	scmnd->result = srp_chkready(target->rport);
2119 	if (unlikely(scmnd->result))
2120 		goto err;
2121 
2122 	WARN_ON_ONCE(scmnd->request->tag < 0);
2123 	tag = blk_mq_unique_tag(scmnd->request);
2124 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2125 	idx = blk_mq_unique_tag_to_tag(tag);
2126 	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2127 		  dev_name(&shost->shost_gendev), tag, idx,
2128 		  target->req_ring_size);
2129 
2130 	spin_lock_irqsave(&ch->lock, flags);
2131 	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2132 	spin_unlock_irqrestore(&ch->lock, flags);
2133 
2134 	if (!iu)
2135 		goto err;
2136 
2137 	req = &ch->req_ring[idx];
2138 	dev = target->srp_host->srp_dev->dev;
2139 	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2140 				   DMA_TO_DEVICE);
2141 
2142 	scmnd->host_scribble = (void *) req;
2143 
2144 	cmd = iu->buf;
2145 	memset(cmd, 0, sizeof *cmd);
2146 
2147 	cmd->opcode = SRP_CMD;
2148 	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2149 	cmd->tag    = tag;
2150 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2151 
2152 	req->scmnd    = scmnd;
2153 	req->cmd      = iu;
2154 
2155 	len = srp_map_data(scmnd, ch, req);
2156 	if (len < 0) {
2157 		shost_printk(KERN_ERR, target->scsi_host,
2158 			     PFX "Failed to map data (%d)\n", len);
2159 		/*
2160 		 * If we ran out of memory descriptors (-ENOMEM) because an
2161 		 * application is queuing many requests with more than
2162 		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2163 		 * to reduce queue depth temporarily.
2164 		 */
2165 		scmnd->result = len == -ENOMEM ?
2166 			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2167 		goto err_iu;
2168 	}
2169 
2170 	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2171 				      DMA_TO_DEVICE);
2172 
2173 	if (srp_post_send(ch, iu, len)) {
2174 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2175 		goto err_unmap;
2176 	}
2177 
2178 	ret = 0;
2179 
2180 unlock_rport:
2181 	if (in_scsi_eh)
2182 		mutex_unlock(&rport->mutex);
2183 
2184 	return ret;
2185 
2186 err_unmap:
2187 	srp_unmap_data(scmnd, ch, req);
2188 
2189 err_iu:
2190 	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2191 
2192 	/*
2193 	 * Avoid that the loops that iterate over the request ring can
2194 	 * encounter a dangling SCSI command pointer.
2195 	 */
2196 	req->scmnd = NULL;
2197 
2198 err:
2199 	if (scmnd->result) {
2200 		scmnd->scsi_done(scmnd);
2201 		ret = 0;
2202 	} else {
2203 		ret = SCSI_MLQUEUE_HOST_BUSY;
2204 	}
2205 
2206 	goto unlock_rport;
2207 }
2208 
2209 /*
2210  * Note: the resources allocated in this function are freed in
2211  * srp_free_ch_ib().
2212  */
2213 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2214 {
2215 	struct srp_target_port *target = ch->target;
2216 	int i;
2217 
2218 	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2219 			      GFP_KERNEL);
2220 	if (!ch->rx_ring)
2221 		goto err_no_ring;
2222 	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2223 			      GFP_KERNEL);
2224 	if (!ch->tx_ring)
2225 		goto err_no_ring;
2226 
2227 	for (i = 0; i < target->queue_size; ++i) {
2228 		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2229 					      ch->max_ti_iu_len,
2230 					      GFP_KERNEL, DMA_FROM_DEVICE);
2231 		if (!ch->rx_ring[i])
2232 			goto err;
2233 	}
2234 
2235 	for (i = 0; i < target->queue_size; ++i) {
2236 		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2237 					      target->max_iu_len,
2238 					      GFP_KERNEL, DMA_TO_DEVICE);
2239 		if (!ch->tx_ring[i])
2240 			goto err;
2241 
2242 		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2243 	}
2244 
2245 	return 0;
2246 
2247 err:
2248 	for (i = 0; i < target->queue_size; ++i) {
2249 		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2250 		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2251 	}
2252 
2253 
2254 err_no_ring:
2255 	kfree(ch->tx_ring);
2256 	ch->tx_ring = NULL;
2257 	kfree(ch->rx_ring);
2258 	ch->rx_ring = NULL;
2259 
2260 	return -ENOMEM;
2261 }
2262 
2263 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2264 {
2265 	uint64_t T_tr_ns, max_compl_time_ms;
2266 	uint32_t rq_tmo_jiffies;
2267 
2268 	/*
2269 	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2270 	 * table 91), both the QP timeout and the retry count have to be set
2271 	 * for RC QP's during the RTR to RTS transition.
2272 	 */
2273 	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2274 		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2275 
2276 	/*
2277 	 * Set target->rq_tmo_jiffies to one second more than the largest time
2278 	 * it can take before an error completion is generated. See also
2279 	 * C9-140..142 in the IBTA spec for more information about how to
2280 	 * convert the QP Local ACK Timeout value to nanoseconds.
2281 	 */
2282 	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2283 	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2284 	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2285 	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2286 
2287 	return rq_tmo_jiffies;
2288 }
2289 
2290 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2291 			       const struct srp_login_rsp *lrsp,
2292 			       struct srp_rdma_ch *ch)
2293 {
2294 	struct srp_target_port *target = ch->target;
2295 	struct ib_qp_attr *qp_attr = NULL;
2296 	int attr_mask = 0;
2297 	int ret;
2298 	int i;
2299 
2300 	if (lrsp->opcode == SRP_LOGIN_RSP) {
2301 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2302 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2303 
2304 		/*
2305 		 * Reserve credits for task management so we don't
2306 		 * bounce requests back to the SCSI mid-layer.
2307 		 */
2308 		target->scsi_host->can_queue
2309 			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2310 			      target->scsi_host->can_queue);
2311 		target->scsi_host->cmd_per_lun
2312 			= min_t(int, target->scsi_host->can_queue,
2313 				target->scsi_host->cmd_per_lun);
2314 	} else {
2315 		shost_printk(KERN_WARNING, target->scsi_host,
2316 			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2317 		ret = -ECONNRESET;
2318 		goto error;
2319 	}
2320 
2321 	if (!ch->rx_ring) {
2322 		ret = srp_alloc_iu_bufs(ch);
2323 		if (ret)
2324 			goto error;
2325 	}
2326 
2327 	ret = -ENOMEM;
2328 	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2329 	if (!qp_attr)
2330 		goto error;
2331 
2332 	qp_attr->qp_state = IB_QPS_RTR;
2333 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2334 	if (ret)
2335 		goto error_free;
2336 
2337 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2338 	if (ret)
2339 		goto error_free;
2340 
2341 	for (i = 0; i < target->queue_size; i++) {
2342 		struct srp_iu *iu = ch->rx_ring[i];
2343 
2344 		ret = srp_post_recv(ch, iu);
2345 		if (ret)
2346 			goto error_free;
2347 	}
2348 
2349 	qp_attr->qp_state = IB_QPS_RTS;
2350 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2351 	if (ret)
2352 		goto error_free;
2353 
2354 	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2355 
2356 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2357 	if (ret)
2358 		goto error_free;
2359 
2360 	ret = ib_send_cm_rtu(cm_id, NULL, 0);
2361 
2362 error_free:
2363 	kfree(qp_attr);
2364 
2365 error:
2366 	ch->status = ret;
2367 }
2368 
2369 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2370 			       struct ib_cm_event *event,
2371 			       struct srp_rdma_ch *ch)
2372 {
2373 	struct srp_target_port *target = ch->target;
2374 	struct Scsi_Host *shost = target->scsi_host;
2375 	struct ib_class_port_info *cpi;
2376 	int opcode;
2377 
2378 	switch (event->param.rej_rcvd.reason) {
2379 	case IB_CM_REJ_PORT_CM_REDIRECT:
2380 		cpi = event->param.rej_rcvd.ari;
2381 		ch->path.dlid = cpi->redirect_lid;
2382 		ch->path.pkey = cpi->redirect_pkey;
2383 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2384 		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2385 
2386 		ch->status = ch->path.dlid ?
2387 			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2388 		break;
2389 
2390 	case IB_CM_REJ_PORT_REDIRECT:
2391 		if (srp_target_is_topspin(target)) {
2392 			/*
2393 			 * Topspin/Cisco SRP gateways incorrectly send
2394 			 * reject reason code 25 when they mean 24
2395 			 * (port redirect).
2396 			 */
2397 			memcpy(ch->path.dgid.raw,
2398 			       event->param.rej_rcvd.ari, 16);
2399 
2400 			shost_printk(KERN_DEBUG, shost,
2401 				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2402 				     be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2403 				     be64_to_cpu(ch->path.dgid.global.interface_id));
2404 
2405 			ch->status = SRP_PORT_REDIRECT;
2406 		} else {
2407 			shost_printk(KERN_WARNING, shost,
2408 				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2409 			ch->status = -ECONNRESET;
2410 		}
2411 		break;
2412 
2413 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2414 		shost_printk(KERN_WARNING, shost,
2415 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2416 		ch->status = -ECONNRESET;
2417 		break;
2418 
2419 	case IB_CM_REJ_CONSUMER_DEFINED:
2420 		opcode = *(u8 *) event->private_data;
2421 		if (opcode == SRP_LOGIN_REJ) {
2422 			struct srp_login_rej *rej = event->private_data;
2423 			u32 reason = be32_to_cpu(rej->reason);
2424 
2425 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2426 				shost_printk(KERN_WARNING, shost,
2427 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2428 			else
2429 				shost_printk(KERN_WARNING, shost, PFX
2430 					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2431 					     target->sgid.raw,
2432 					     target->orig_dgid.raw, reason);
2433 		} else
2434 			shost_printk(KERN_WARNING, shost,
2435 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2436 				     " opcode 0x%02x\n", opcode);
2437 		ch->status = -ECONNRESET;
2438 		break;
2439 
2440 	case IB_CM_REJ_STALE_CONN:
2441 		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2442 		ch->status = SRP_STALE_CONN;
2443 		break;
2444 
2445 	default:
2446 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2447 			     event->param.rej_rcvd.reason);
2448 		ch->status = -ECONNRESET;
2449 	}
2450 }
2451 
2452 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2453 {
2454 	struct srp_rdma_ch *ch = cm_id->context;
2455 	struct srp_target_port *target = ch->target;
2456 	int comp = 0;
2457 
2458 	switch (event->event) {
2459 	case IB_CM_REQ_ERROR:
2460 		shost_printk(KERN_DEBUG, target->scsi_host,
2461 			     PFX "Sending CM REQ failed\n");
2462 		comp = 1;
2463 		ch->status = -ECONNRESET;
2464 		break;
2465 
2466 	case IB_CM_REP_RECEIVED:
2467 		comp = 1;
2468 		srp_cm_rep_handler(cm_id, event->private_data, ch);
2469 		break;
2470 
2471 	case IB_CM_REJ_RECEIVED:
2472 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2473 		comp = 1;
2474 
2475 		srp_cm_rej_handler(cm_id, event, ch);
2476 		break;
2477 
2478 	case IB_CM_DREQ_RECEIVED:
2479 		shost_printk(KERN_WARNING, target->scsi_host,
2480 			     PFX "DREQ received - connection closed\n");
2481 		ch->connected = false;
2482 		if (ib_send_cm_drep(cm_id, NULL, 0))
2483 			shost_printk(KERN_ERR, target->scsi_host,
2484 				     PFX "Sending CM DREP failed\n");
2485 		queue_work(system_long_wq, &target->tl_err_work);
2486 		break;
2487 
2488 	case IB_CM_TIMEWAIT_EXIT:
2489 		shost_printk(KERN_ERR, target->scsi_host,
2490 			     PFX "connection closed\n");
2491 		comp = 1;
2492 
2493 		ch->status = 0;
2494 		break;
2495 
2496 	case IB_CM_MRA_RECEIVED:
2497 	case IB_CM_DREQ_ERROR:
2498 	case IB_CM_DREP_RECEIVED:
2499 		break;
2500 
2501 	default:
2502 		shost_printk(KERN_WARNING, target->scsi_host,
2503 			     PFX "Unhandled CM event %d\n", event->event);
2504 		break;
2505 	}
2506 
2507 	if (comp)
2508 		complete(&ch->done);
2509 
2510 	return 0;
2511 }
2512 
2513 /**
2514  * srp_change_queue_depth - setting device queue depth
2515  * @sdev: scsi device struct
2516  * @qdepth: requested queue depth
2517  *
2518  * Returns queue depth.
2519  */
2520 static int
2521 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2522 {
2523 	if (!sdev->tagged_supported)
2524 		qdepth = 1;
2525 	return scsi_change_queue_depth(sdev, qdepth);
2526 }
2527 
2528 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2529 			     u8 func)
2530 {
2531 	struct srp_target_port *target = ch->target;
2532 	struct srp_rport *rport = target->rport;
2533 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2534 	struct srp_iu *iu;
2535 	struct srp_tsk_mgmt *tsk_mgmt;
2536 
2537 	if (!ch->connected || target->qp_in_error)
2538 		return -1;
2539 
2540 	init_completion(&ch->tsk_mgmt_done);
2541 
2542 	/*
2543 	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2544 	 * invoked while a task management function is being sent.
2545 	 */
2546 	mutex_lock(&rport->mutex);
2547 	spin_lock_irq(&ch->lock);
2548 	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2549 	spin_unlock_irq(&ch->lock);
2550 
2551 	if (!iu) {
2552 		mutex_unlock(&rport->mutex);
2553 
2554 		return -1;
2555 	}
2556 
2557 	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2558 				   DMA_TO_DEVICE);
2559 	tsk_mgmt = iu->buf;
2560 	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2561 
2562 	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2563 	int_to_scsilun(lun, &tsk_mgmt->lun);
2564 	tsk_mgmt->tag		= req_tag | SRP_TAG_TSK_MGMT;
2565 	tsk_mgmt->tsk_mgmt_func = func;
2566 	tsk_mgmt->task_tag	= req_tag;
2567 
2568 	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2569 				      DMA_TO_DEVICE);
2570 	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2571 		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2572 		mutex_unlock(&rport->mutex);
2573 
2574 		return -1;
2575 	}
2576 	mutex_unlock(&rport->mutex);
2577 
2578 	if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2579 					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2580 		return -1;
2581 
2582 	return 0;
2583 }
2584 
2585 static int srp_abort(struct scsi_cmnd *scmnd)
2586 {
2587 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2588 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2589 	u32 tag;
2590 	u16 ch_idx;
2591 	struct srp_rdma_ch *ch;
2592 	int ret;
2593 
2594 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2595 
2596 	if (!req)
2597 		return SUCCESS;
2598 	tag = blk_mq_unique_tag(scmnd->request);
2599 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2600 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2601 		return SUCCESS;
2602 	ch = &target->ch[ch_idx];
2603 	if (!srp_claim_req(ch, req, NULL, scmnd))
2604 		return SUCCESS;
2605 	shost_printk(KERN_ERR, target->scsi_host,
2606 		     "Sending SRP abort for tag %#x\n", tag);
2607 	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2608 			      SRP_TSK_ABORT_TASK) == 0)
2609 		ret = SUCCESS;
2610 	else if (target->rport->state == SRP_RPORT_LOST)
2611 		ret = FAST_IO_FAIL;
2612 	else
2613 		ret = FAILED;
2614 	srp_free_req(ch, req, scmnd, 0);
2615 	scmnd->result = DID_ABORT << 16;
2616 	scmnd->scsi_done(scmnd);
2617 
2618 	return ret;
2619 }
2620 
2621 static int srp_reset_device(struct scsi_cmnd *scmnd)
2622 {
2623 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2624 	struct srp_rdma_ch *ch;
2625 	int i;
2626 
2627 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2628 
2629 	ch = &target->ch[0];
2630 	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2631 			      SRP_TSK_LUN_RESET))
2632 		return FAILED;
2633 	if (ch->tsk_mgmt_status)
2634 		return FAILED;
2635 
2636 	for (i = 0; i < target->ch_count; i++) {
2637 		ch = &target->ch[i];
2638 		for (i = 0; i < target->req_ring_size; ++i) {
2639 			struct srp_request *req = &ch->req_ring[i];
2640 
2641 			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2642 		}
2643 	}
2644 
2645 	return SUCCESS;
2646 }
2647 
2648 static int srp_reset_host(struct scsi_cmnd *scmnd)
2649 {
2650 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2651 
2652 	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2653 
2654 	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2655 }
2656 
2657 static int srp_slave_alloc(struct scsi_device *sdev)
2658 {
2659 	struct Scsi_Host *shost = sdev->host;
2660 	struct srp_target_port *target = host_to_target(shost);
2661 	struct srp_device *srp_dev = target->srp_host->srp_dev;
2662 	struct ib_device *ibdev = srp_dev->dev;
2663 
2664 	if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
2665 		blk_queue_virt_boundary(sdev->request_queue,
2666 					~srp_dev->mr_page_mask);
2667 
2668 	return 0;
2669 }
2670 
2671 static int srp_slave_configure(struct scsi_device *sdev)
2672 {
2673 	struct Scsi_Host *shost = sdev->host;
2674 	struct srp_target_port *target = host_to_target(shost);
2675 	struct request_queue *q = sdev->request_queue;
2676 	unsigned long timeout;
2677 
2678 	if (sdev->type == TYPE_DISK) {
2679 		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2680 		blk_queue_rq_timeout(q, timeout);
2681 	}
2682 
2683 	return 0;
2684 }
2685 
2686 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2687 			   char *buf)
2688 {
2689 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2690 
2691 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2692 }
2693 
2694 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2695 			     char *buf)
2696 {
2697 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2698 
2699 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2700 }
2701 
2702 static ssize_t show_service_id(struct device *dev,
2703 			       struct device_attribute *attr, char *buf)
2704 {
2705 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2706 
2707 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2708 }
2709 
2710 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2711 			 char *buf)
2712 {
2713 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2714 
2715 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2716 }
2717 
2718 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2719 			 char *buf)
2720 {
2721 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2722 
2723 	return sprintf(buf, "%pI6\n", target->sgid.raw);
2724 }
2725 
2726 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2727 			 char *buf)
2728 {
2729 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2730 	struct srp_rdma_ch *ch = &target->ch[0];
2731 
2732 	return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2733 }
2734 
2735 static ssize_t show_orig_dgid(struct device *dev,
2736 			      struct device_attribute *attr, char *buf)
2737 {
2738 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2739 
2740 	return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2741 }
2742 
2743 static ssize_t show_req_lim(struct device *dev,
2744 			    struct device_attribute *attr, char *buf)
2745 {
2746 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2747 	struct srp_rdma_ch *ch;
2748 	int i, req_lim = INT_MAX;
2749 
2750 	for (i = 0; i < target->ch_count; i++) {
2751 		ch = &target->ch[i];
2752 		req_lim = min(req_lim, ch->req_lim);
2753 	}
2754 	return sprintf(buf, "%d\n", req_lim);
2755 }
2756 
2757 static ssize_t show_zero_req_lim(struct device *dev,
2758 				 struct device_attribute *attr, char *buf)
2759 {
2760 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2761 
2762 	return sprintf(buf, "%d\n", target->zero_req_lim);
2763 }
2764 
2765 static ssize_t show_local_ib_port(struct device *dev,
2766 				  struct device_attribute *attr, char *buf)
2767 {
2768 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2769 
2770 	return sprintf(buf, "%d\n", target->srp_host->port);
2771 }
2772 
2773 static ssize_t show_local_ib_device(struct device *dev,
2774 				    struct device_attribute *attr, char *buf)
2775 {
2776 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2777 
2778 	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2779 }
2780 
2781 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2782 			     char *buf)
2783 {
2784 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2785 
2786 	return sprintf(buf, "%d\n", target->ch_count);
2787 }
2788 
2789 static ssize_t show_comp_vector(struct device *dev,
2790 				struct device_attribute *attr, char *buf)
2791 {
2792 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2793 
2794 	return sprintf(buf, "%d\n", target->comp_vector);
2795 }
2796 
2797 static ssize_t show_tl_retry_count(struct device *dev,
2798 				   struct device_attribute *attr, char *buf)
2799 {
2800 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2801 
2802 	return sprintf(buf, "%d\n", target->tl_retry_count);
2803 }
2804 
2805 static ssize_t show_cmd_sg_entries(struct device *dev,
2806 				   struct device_attribute *attr, char *buf)
2807 {
2808 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2809 
2810 	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2811 }
2812 
2813 static ssize_t show_allow_ext_sg(struct device *dev,
2814 				 struct device_attribute *attr, char *buf)
2815 {
2816 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2817 
2818 	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2819 }
2820 
2821 static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
2822 static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
2823 static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
2824 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
2825 static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
2826 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
2827 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
2828 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2829 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
2830 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2831 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2832 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2833 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2834 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2835 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2836 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2837 
2838 static struct device_attribute *srp_host_attrs[] = {
2839 	&dev_attr_id_ext,
2840 	&dev_attr_ioc_guid,
2841 	&dev_attr_service_id,
2842 	&dev_attr_pkey,
2843 	&dev_attr_sgid,
2844 	&dev_attr_dgid,
2845 	&dev_attr_orig_dgid,
2846 	&dev_attr_req_lim,
2847 	&dev_attr_zero_req_lim,
2848 	&dev_attr_local_ib_port,
2849 	&dev_attr_local_ib_device,
2850 	&dev_attr_ch_count,
2851 	&dev_attr_comp_vector,
2852 	&dev_attr_tl_retry_count,
2853 	&dev_attr_cmd_sg_entries,
2854 	&dev_attr_allow_ext_sg,
2855 	NULL
2856 };
2857 
2858 static struct scsi_host_template srp_template = {
2859 	.module				= THIS_MODULE,
2860 	.name				= "InfiniBand SRP initiator",
2861 	.proc_name			= DRV_NAME,
2862 	.slave_alloc			= srp_slave_alloc,
2863 	.slave_configure		= srp_slave_configure,
2864 	.info				= srp_target_info,
2865 	.queuecommand			= srp_queuecommand,
2866 	.change_queue_depth             = srp_change_queue_depth,
2867 	.eh_abort_handler		= srp_abort,
2868 	.eh_device_reset_handler	= srp_reset_device,
2869 	.eh_host_reset_handler		= srp_reset_host,
2870 	.skip_settle_delay		= true,
2871 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
2872 	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
2873 	.this_id			= -1,
2874 	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
2875 	.use_clustering			= ENABLE_CLUSTERING,
2876 	.shost_attrs			= srp_host_attrs,
2877 	.track_queue_depth		= 1,
2878 };
2879 
2880 static int srp_sdev_count(struct Scsi_Host *host)
2881 {
2882 	struct scsi_device *sdev;
2883 	int c = 0;
2884 
2885 	shost_for_each_device(sdev, host)
2886 		c++;
2887 
2888 	return c;
2889 }
2890 
2891 /*
2892  * Return values:
2893  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2894  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2895  *    removal has been scheduled.
2896  * 0 and target->state != SRP_TARGET_REMOVED upon success.
2897  */
2898 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2899 {
2900 	struct srp_rport_identifiers ids;
2901 	struct srp_rport *rport;
2902 
2903 	target->state = SRP_TARGET_SCANNING;
2904 	sprintf(target->target_name, "SRP.T10:%016llX",
2905 		be64_to_cpu(target->id_ext));
2906 
2907 	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2908 		return -ENODEV;
2909 
2910 	memcpy(ids.port_id, &target->id_ext, 8);
2911 	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2912 	ids.roles = SRP_RPORT_ROLE_TARGET;
2913 	rport = srp_rport_add(target->scsi_host, &ids);
2914 	if (IS_ERR(rport)) {
2915 		scsi_remove_host(target->scsi_host);
2916 		return PTR_ERR(rport);
2917 	}
2918 
2919 	rport->lld_data = target;
2920 	target->rport = rport;
2921 
2922 	spin_lock(&host->target_lock);
2923 	list_add_tail(&target->list, &host->target_list);
2924 	spin_unlock(&host->target_lock);
2925 
2926 	scsi_scan_target(&target->scsi_host->shost_gendev,
2927 			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2928 
2929 	if (srp_connected_ch(target) < target->ch_count ||
2930 	    target->qp_in_error) {
2931 		shost_printk(KERN_INFO, target->scsi_host,
2932 			     PFX "SCSI scan failed - removing SCSI host\n");
2933 		srp_queue_remove_work(target);
2934 		goto out;
2935 	}
2936 
2937 	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2938 		 dev_name(&target->scsi_host->shost_gendev),
2939 		 srp_sdev_count(target->scsi_host));
2940 
2941 	spin_lock_irq(&target->lock);
2942 	if (target->state == SRP_TARGET_SCANNING)
2943 		target->state = SRP_TARGET_LIVE;
2944 	spin_unlock_irq(&target->lock);
2945 
2946 out:
2947 	return 0;
2948 }
2949 
2950 static void srp_release_dev(struct device *dev)
2951 {
2952 	struct srp_host *host =
2953 		container_of(dev, struct srp_host, dev);
2954 
2955 	complete(&host->released);
2956 }
2957 
2958 static struct class srp_class = {
2959 	.name    = "infiniband_srp",
2960 	.dev_release = srp_release_dev
2961 };
2962 
2963 /**
2964  * srp_conn_unique() - check whether the connection to a target is unique
2965  * @host:   SRP host.
2966  * @target: SRP target port.
2967  */
2968 static bool srp_conn_unique(struct srp_host *host,
2969 			    struct srp_target_port *target)
2970 {
2971 	struct srp_target_port *t;
2972 	bool ret = false;
2973 
2974 	if (target->state == SRP_TARGET_REMOVED)
2975 		goto out;
2976 
2977 	ret = true;
2978 
2979 	spin_lock(&host->target_lock);
2980 	list_for_each_entry(t, &host->target_list, list) {
2981 		if (t != target &&
2982 		    target->id_ext == t->id_ext &&
2983 		    target->ioc_guid == t->ioc_guid &&
2984 		    target->initiator_ext == t->initiator_ext) {
2985 			ret = false;
2986 			break;
2987 		}
2988 	}
2989 	spin_unlock(&host->target_lock);
2990 
2991 out:
2992 	return ret;
2993 }
2994 
2995 /*
2996  * Target ports are added by writing
2997  *
2998  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2999  *     pkey=<P_Key>,service_id=<service ID>
3000  *
3001  * to the add_target sysfs attribute.
3002  */
3003 enum {
3004 	SRP_OPT_ERR		= 0,
3005 	SRP_OPT_ID_EXT		= 1 << 0,
3006 	SRP_OPT_IOC_GUID	= 1 << 1,
3007 	SRP_OPT_DGID		= 1 << 2,
3008 	SRP_OPT_PKEY		= 1 << 3,
3009 	SRP_OPT_SERVICE_ID	= 1 << 4,
3010 	SRP_OPT_MAX_SECT	= 1 << 5,
3011 	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
3012 	SRP_OPT_IO_CLASS	= 1 << 7,
3013 	SRP_OPT_INITIATOR_EXT	= 1 << 8,
3014 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
3015 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
3016 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3017 	SRP_OPT_COMP_VECTOR	= 1 << 12,
3018 	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3019 	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3020 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
3021 				   SRP_OPT_IOC_GUID	|
3022 				   SRP_OPT_DGID		|
3023 				   SRP_OPT_PKEY		|
3024 				   SRP_OPT_SERVICE_ID),
3025 };
3026 
3027 static const match_table_t srp_opt_tokens = {
3028 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3029 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3030 	{ SRP_OPT_DGID,			"dgid=%s" 		},
3031 	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3032 	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3033 	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3034 	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3035 	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3036 	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3037 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3038 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3039 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3040 	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3041 	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3042 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3043 	{ SRP_OPT_ERR,			NULL 			}
3044 };
3045 
3046 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3047 {
3048 	char *options, *sep_opt;
3049 	char *p;
3050 	char dgid[3];
3051 	substring_t args[MAX_OPT_ARGS];
3052 	int opt_mask = 0;
3053 	int token;
3054 	int ret = -EINVAL;
3055 	int i;
3056 
3057 	options = kstrdup(buf, GFP_KERNEL);
3058 	if (!options)
3059 		return -ENOMEM;
3060 
3061 	sep_opt = options;
3062 	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3063 		if (!*p)
3064 			continue;
3065 
3066 		token = match_token(p, srp_opt_tokens, args);
3067 		opt_mask |= token;
3068 
3069 		switch (token) {
3070 		case SRP_OPT_ID_EXT:
3071 			p = match_strdup(args);
3072 			if (!p) {
3073 				ret = -ENOMEM;
3074 				goto out;
3075 			}
3076 			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3077 			kfree(p);
3078 			break;
3079 
3080 		case SRP_OPT_IOC_GUID:
3081 			p = match_strdup(args);
3082 			if (!p) {
3083 				ret = -ENOMEM;
3084 				goto out;
3085 			}
3086 			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3087 			kfree(p);
3088 			break;
3089 
3090 		case SRP_OPT_DGID:
3091 			p = match_strdup(args);
3092 			if (!p) {
3093 				ret = -ENOMEM;
3094 				goto out;
3095 			}
3096 			if (strlen(p) != 32) {
3097 				pr_warn("bad dest GID parameter '%s'\n", p);
3098 				kfree(p);
3099 				goto out;
3100 			}
3101 
3102 			for (i = 0; i < 16; ++i) {
3103 				strlcpy(dgid, p + i * 2, sizeof(dgid));
3104 				if (sscanf(dgid, "%hhx",
3105 					   &target->orig_dgid.raw[i]) < 1) {
3106 					ret = -EINVAL;
3107 					kfree(p);
3108 					goto out;
3109 				}
3110 			}
3111 			kfree(p);
3112 			break;
3113 
3114 		case SRP_OPT_PKEY:
3115 			if (match_hex(args, &token)) {
3116 				pr_warn("bad P_Key parameter '%s'\n", p);
3117 				goto out;
3118 			}
3119 			target->pkey = cpu_to_be16(token);
3120 			break;
3121 
3122 		case SRP_OPT_SERVICE_ID:
3123 			p = match_strdup(args);
3124 			if (!p) {
3125 				ret = -ENOMEM;
3126 				goto out;
3127 			}
3128 			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3129 			kfree(p);
3130 			break;
3131 
3132 		case SRP_OPT_MAX_SECT:
3133 			if (match_int(args, &token)) {
3134 				pr_warn("bad max sect parameter '%s'\n", p);
3135 				goto out;
3136 			}
3137 			target->scsi_host->max_sectors = token;
3138 			break;
3139 
3140 		case SRP_OPT_QUEUE_SIZE:
3141 			if (match_int(args, &token) || token < 1) {
3142 				pr_warn("bad queue_size parameter '%s'\n", p);
3143 				goto out;
3144 			}
3145 			target->scsi_host->can_queue = token;
3146 			target->queue_size = token + SRP_RSP_SQ_SIZE +
3147 					     SRP_TSK_MGMT_SQ_SIZE;
3148 			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3149 				target->scsi_host->cmd_per_lun = token;
3150 			break;
3151 
3152 		case SRP_OPT_MAX_CMD_PER_LUN:
3153 			if (match_int(args, &token) || token < 1) {
3154 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3155 					p);
3156 				goto out;
3157 			}
3158 			target->scsi_host->cmd_per_lun = token;
3159 			break;
3160 
3161 		case SRP_OPT_IO_CLASS:
3162 			if (match_hex(args, &token)) {
3163 				pr_warn("bad IO class parameter '%s'\n", p);
3164 				goto out;
3165 			}
3166 			if (token != SRP_REV10_IB_IO_CLASS &&
3167 			    token != SRP_REV16A_IB_IO_CLASS) {
3168 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3169 					token, SRP_REV10_IB_IO_CLASS,
3170 					SRP_REV16A_IB_IO_CLASS);
3171 				goto out;
3172 			}
3173 			target->io_class = token;
3174 			break;
3175 
3176 		case SRP_OPT_INITIATOR_EXT:
3177 			p = match_strdup(args);
3178 			if (!p) {
3179 				ret = -ENOMEM;
3180 				goto out;
3181 			}
3182 			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3183 			kfree(p);
3184 			break;
3185 
3186 		case SRP_OPT_CMD_SG_ENTRIES:
3187 			if (match_int(args, &token) || token < 1 || token > 255) {
3188 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3189 					p);
3190 				goto out;
3191 			}
3192 			target->cmd_sg_cnt = token;
3193 			break;
3194 
3195 		case SRP_OPT_ALLOW_EXT_SG:
3196 			if (match_int(args, &token)) {
3197 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3198 				goto out;
3199 			}
3200 			target->allow_ext_sg = !!token;
3201 			break;
3202 
3203 		case SRP_OPT_SG_TABLESIZE:
3204 			if (match_int(args, &token) || token < 1 ||
3205 					token > SG_MAX_SEGMENTS) {
3206 				pr_warn("bad max sg_tablesize parameter '%s'\n",
3207 					p);
3208 				goto out;
3209 			}
3210 			target->sg_tablesize = token;
3211 			break;
3212 
3213 		case SRP_OPT_COMP_VECTOR:
3214 			if (match_int(args, &token) || token < 0) {
3215 				pr_warn("bad comp_vector parameter '%s'\n", p);
3216 				goto out;
3217 			}
3218 			target->comp_vector = token;
3219 			break;
3220 
3221 		case SRP_OPT_TL_RETRY_COUNT:
3222 			if (match_int(args, &token) || token < 2 || token > 7) {
3223 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3224 					p);
3225 				goto out;
3226 			}
3227 			target->tl_retry_count = token;
3228 			break;
3229 
3230 		default:
3231 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3232 				p);
3233 			goto out;
3234 		}
3235 	}
3236 
3237 	if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3238 		ret = 0;
3239 	else
3240 		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3241 			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3242 			    !(srp_opt_tokens[i].token & opt_mask))
3243 				pr_warn("target creation request is missing parameter '%s'\n",
3244 					srp_opt_tokens[i].pattern);
3245 
3246 	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3247 	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3248 		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3249 			target->scsi_host->cmd_per_lun,
3250 			target->scsi_host->can_queue);
3251 
3252 out:
3253 	kfree(options);
3254 	return ret;
3255 }
3256 
3257 static ssize_t srp_create_target(struct device *dev,
3258 				 struct device_attribute *attr,
3259 				 const char *buf, size_t count)
3260 {
3261 	struct srp_host *host =
3262 		container_of(dev, struct srp_host, dev);
3263 	struct Scsi_Host *target_host;
3264 	struct srp_target_port *target;
3265 	struct srp_rdma_ch *ch;
3266 	struct srp_device *srp_dev = host->srp_dev;
3267 	struct ib_device *ibdev = srp_dev->dev;
3268 	int ret, node_idx, node, cpu, i;
3269 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3270 	bool multich = false;
3271 
3272 	target_host = scsi_host_alloc(&srp_template,
3273 				      sizeof (struct srp_target_port));
3274 	if (!target_host)
3275 		return -ENOMEM;
3276 
3277 	target_host->transportt  = ib_srp_transport_template;
3278 	target_host->max_channel = 0;
3279 	target_host->max_id      = 1;
3280 	target_host->max_lun     = -1LL;
3281 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3282 
3283 	target = host_to_target(target_host);
3284 
3285 	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3286 	target->scsi_host	= target_host;
3287 	target->srp_host	= host;
3288 	target->pd		= host->srp_dev->pd;
3289 	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3290 	target->cmd_sg_cnt	= cmd_sg_entries;
3291 	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3292 	target->allow_ext_sg	= allow_ext_sg;
3293 	target->tl_retry_count	= 7;
3294 	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3295 
3296 	/*
3297 	 * Avoid that the SCSI host can be removed by srp_remove_target()
3298 	 * before this function returns.
3299 	 */
3300 	scsi_host_get(target->scsi_host);
3301 
3302 	ret = mutex_lock_interruptible(&host->add_target_mutex);
3303 	if (ret < 0)
3304 		goto put;
3305 
3306 	ret = srp_parse_options(buf, target);
3307 	if (ret)
3308 		goto out;
3309 
3310 	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3311 
3312 	if (!srp_conn_unique(target->srp_host, target)) {
3313 		shost_printk(KERN_INFO, target->scsi_host,
3314 			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3315 			     be64_to_cpu(target->id_ext),
3316 			     be64_to_cpu(target->ioc_guid),
3317 			     be64_to_cpu(target->initiator_ext));
3318 		ret = -EEXIST;
3319 		goto out;
3320 	}
3321 
3322 	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3323 	    target->cmd_sg_cnt < target->sg_tablesize) {
3324 		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3325 		target->sg_tablesize = target->cmd_sg_cnt;
3326 	}
3327 
3328 	if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3329 		/*
3330 		 * FR and FMR can only map one HCA page per entry. If the
3331 		 * start address is not aligned on a HCA page boundary two
3332 		 * entries will be used for the head and the tail although
3333 		 * these two entries combined contain at most one HCA page of
3334 		 * data. Hence the "+ 1" in the calculation below.
3335 		 *
3336 		 * The indirect data buffer descriptor is contiguous so the
3337 		 * memory for that buffer will only be registered if
3338 		 * register_always is true. Hence add one to mr_per_cmd if
3339 		 * register_always has been set.
3340 		 */
3341 		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3342 				  (ilog2(srp_dev->mr_page_size) - 9);
3343 		mr_per_cmd = register_always +
3344 			(target->scsi_host->max_sectors + 1 +
3345 			 max_sectors_per_mr - 1) / max_sectors_per_mr;
3346 		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3347 			 target->scsi_host->max_sectors,
3348 			 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3349 			 max_sectors_per_mr, mr_per_cmd);
3350 	}
3351 
3352 	target_host->sg_tablesize = target->sg_tablesize;
3353 	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3354 	target->mr_per_cmd = mr_per_cmd;
3355 	target->indirect_size = target->sg_tablesize *
3356 				sizeof (struct srp_direct_buf);
3357 	target->max_iu_len = sizeof (struct srp_cmd) +
3358 			     sizeof (struct srp_indirect_buf) +
3359 			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3360 
3361 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3362 	INIT_WORK(&target->remove_work, srp_remove_work);
3363 	spin_lock_init(&target->lock);
3364 	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3365 	if (ret)
3366 		goto out;
3367 
3368 	ret = -ENOMEM;
3369 	target->ch_count = max_t(unsigned, num_online_nodes(),
3370 				 min(ch_count ? :
3371 				     min(4 * num_online_nodes(),
3372 					 ibdev->num_comp_vectors),
3373 				     num_online_cpus()));
3374 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3375 			     GFP_KERNEL);
3376 	if (!target->ch)
3377 		goto out;
3378 
3379 	node_idx = 0;
3380 	for_each_online_node(node) {
3381 		const int ch_start = (node_idx * target->ch_count /
3382 				      num_online_nodes());
3383 		const int ch_end = ((node_idx + 1) * target->ch_count /
3384 				    num_online_nodes());
3385 		const int cv_start = (node_idx * ibdev->num_comp_vectors /
3386 				      num_online_nodes() + target->comp_vector)
3387 				     % ibdev->num_comp_vectors;
3388 		const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3389 				    num_online_nodes() + target->comp_vector)
3390 				   % ibdev->num_comp_vectors;
3391 		int cpu_idx = 0;
3392 
3393 		for_each_online_cpu(cpu) {
3394 			if (cpu_to_node(cpu) != node)
3395 				continue;
3396 			if (ch_start + cpu_idx >= ch_end)
3397 				continue;
3398 			ch = &target->ch[ch_start + cpu_idx];
3399 			ch->target = target;
3400 			ch->comp_vector = cv_start == cv_end ? cv_start :
3401 				cv_start + cpu_idx % (cv_end - cv_start);
3402 			spin_lock_init(&ch->lock);
3403 			INIT_LIST_HEAD(&ch->free_tx);
3404 			ret = srp_new_cm_id(ch);
3405 			if (ret)
3406 				goto err_disconnect;
3407 
3408 			ret = srp_create_ch_ib(ch);
3409 			if (ret)
3410 				goto err_disconnect;
3411 
3412 			ret = srp_alloc_req_data(ch);
3413 			if (ret)
3414 				goto err_disconnect;
3415 
3416 			ret = srp_connect_ch(ch, multich);
3417 			if (ret) {
3418 				shost_printk(KERN_ERR, target->scsi_host,
3419 					     PFX "Connection %d/%d failed\n",
3420 					     ch_start + cpu_idx,
3421 					     target->ch_count);
3422 				if (node_idx == 0 && cpu_idx == 0) {
3423 					goto err_disconnect;
3424 				} else {
3425 					srp_free_ch_ib(target, ch);
3426 					srp_free_req_data(target, ch);
3427 					target->ch_count = ch - target->ch;
3428 					goto connected;
3429 				}
3430 			}
3431 
3432 			multich = true;
3433 			cpu_idx++;
3434 		}
3435 		node_idx++;
3436 	}
3437 
3438 connected:
3439 	target->scsi_host->nr_hw_queues = target->ch_count;
3440 
3441 	ret = srp_add_target(host, target);
3442 	if (ret)
3443 		goto err_disconnect;
3444 
3445 	if (target->state != SRP_TARGET_REMOVED) {
3446 		shost_printk(KERN_DEBUG, target->scsi_host, PFX
3447 			     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3448 			     be64_to_cpu(target->id_ext),
3449 			     be64_to_cpu(target->ioc_guid),
3450 			     be16_to_cpu(target->pkey),
3451 			     be64_to_cpu(target->service_id),
3452 			     target->sgid.raw, target->orig_dgid.raw);
3453 	}
3454 
3455 	ret = count;
3456 
3457 out:
3458 	mutex_unlock(&host->add_target_mutex);
3459 
3460 put:
3461 	scsi_host_put(target->scsi_host);
3462 	if (ret < 0)
3463 		scsi_host_put(target->scsi_host);
3464 
3465 	return ret;
3466 
3467 err_disconnect:
3468 	srp_disconnect_target(target);
3469 
3470 	for (i = 0; i < target->ch_count; i++) {
3471 		ch = &target->ch[i];
3472 		srp_free_ch_ib(target, ch);
3473 		srp_free_req_data(target, ch);
3474 	}
3475 
3476 	kfree(target->ch);
3477 	goto out;
3478 }
3479 
3480 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3481 
3482 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3483 			  char *buf)
3484 {
3485 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3486 
3487 	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3488 }
3489 
3490 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3491 
3492 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3493 			 char *buf)
3494 {
3495 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3496 
3497 	return sprintf(buf, "%d\n", host->port);
3498 }
3499 
3500 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3501 
3502 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3503 {
3504 	struct srp_host *host;
3505 
3506 	host = kzalloc(sizeof *host, GFP_KERNEL);
3507 	if (!host)
3508 		return NULL;
3509 
3510 	INIT_LIST_HEAD(&host->target_list);
3511 	spin_lock_init(&host->target_lock);
3512 	init_completion(&host->released);
3513 	mutex_init(&host->add_target_mutex);
3514 	host->srp_dev = device;
3515 	host->port = port;
3516 
3517 	host->dev.class = &srp_class;
3518 	host->dev.parent = device->dev->dma_device;
3519 	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3520 
3521 	if (device_register(&host->dev))
3522 		goto free_host;
3523 	if (device_create_file(&host->dev, &dev_attr_add_target))
3524 		goto err_class;
3525 	if (device_create_file(&host->dev, &dev_attr_ibdev))
3526 		goto err_class;
3527 	if (device_create_file(&host->dev, &dev_attr_port))
3528 		goto err_class;
3529 
3530 	return host;
3531 
3532 err_class:
3533 	device_unregister(&host->dev);
3534 
3535 free_host:
3536 	kfree(host);
3537 
3538 	return NULL;
3539 }
3540 
3541 static void srp_add_one(struct ib_device *device)
3542 {
3543 	struct srp_device *srp_dev;
3544 	struct ib_device_attr *attr = &device->attrs;
3545 	struct srp_host *host;
3546 	int mr_page_shift, p;
3547 	u64 max_pages_per_mr;
3548 	unsigned int flags = 0;
3549 
3550 	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3551 	if (!srp_dev)
3552 		return;
3553 
3554 	/*
3555 	 * Use the smallest page size supported by the HCA, down to a
3556 	 * minimum of 4096 bytes. We're unlikely to build large sglists
3557 	 * out of smaller entries.
3558 	 */
3559 	mr_page_shift		= max(12, ffs(attr->page_size_cap) - 1);
3560 	srp_dev->mr_page_size	= 1 << mr_page_shift;
3561 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3562 	max_pages_per_mr	= attr->max_mr_size;
3563 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3564 	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3565 		 attr->max_mr_size, srp_dev->mr_page_size,
3566 		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3567 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3568 					  max_pages_per_mr);
3569 
3570 	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3571 			    device->map_phys_fmr && device->unmap_fmr);
3572 	srp_dev->has_fr = (attr->device_cap_flags &
3573 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
3574 	if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3575 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3576 	} else if (!never_register &&
3577 		   attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
3578 		srp_dev->use_fast_reg = (srp_dev->has_fr &&
3579 					 (!srp_dev->has_fmr || prefer_fr));
3580 		srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3581 	}
3582 
3583 	if (never_register || !register_always ||
3584 	    (!srp_dev->has_fmr && !srp_dev->has_fr))
3585 		flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3586 
3587 	if (srp_dev->use_fast_reg) {
3588 		srp_dev->max_pages_per_mr =
3589 			min_t(u32, srp_dev->max_pages_per_mr,
3590 			      attr->max_fast_reg_page_list_len);
3591 	}
3592 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
3593 				   srp_dev->max_pages_per_mr;
3594 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3595 		 device->name, mr_page_shift, attr->max_mr_size,
3596 		 attr->max_fast_reg_page_list_len,
3597 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3598 
3599 	INIT_LIST_HEAD(&srp_dev->dev_list);
3600 
3601 	srp_dev->dev = device;
3602 	srp_dev->pd  = ib_alloc_pd(device, flags);
3603 	if (IS_ERR(srp_dev->pd))
3604 		goto free_dev;
3605 
3606 
3607 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3608 		host = srp_add_port(srp_dev, p);
3609 		if (host)
3610 			list_add_tail(&host->list, &srp_dev->dev_list);
3611 	}
3612 
3613 	ib_set_client_data(device, &srp_client, srp_dev);
3614 	return;
3615 
3616 free_dev:
3617 	kfree(srp_dev);
3618 }
3619 
3620 static void srp_remove_one(struct ib_device *device, void *client_data)
3621 {
3622 	struct srp_device *srp_dev;
3623 	struct srp_host *host, *tmp_host;
3624 	struct srp_target_port *target;
3625 
3626 	srp_dev = client_data;
3627 	if (!srp_dev)
3628 		return;
3629 
3630 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3631 		device_unregister(&host->dev);
3632 		/*
3633 		 * Wait for the sysfs entry to go away, so that no new
3634 		 * target ports can be created.
3635 		 */
3636 		wait_for_completion(&host->released);
3637 
3638 		/*
3639 		 * Remove all target ports.
3640 		 */
3641 		spin_lock(&host->target_lock);
3642 		list_for_each_entry(target, &host->target_list, list)
3643 			srp_queue_remove_work(target);
3644 		spin_unlock(&host->target_lock);
3645 
3646 		/*
3647 		 * Wait for tl_err and target port removal tasks.
3648 		 */
3649 		flush_workqueue(system_long_wq);
3650 		flush_workqueue(srp_remove_wq);
3651 
3652 		kfree(host);
3653 	}
3654 
3655 	ib_dealloc_pd(srp_dev->pd);
3656 
3657 	kfree(srp_dev);
3658 }
3659 
3660 static struct srp_function_template ib_srp_transport_functions = {
3661 	.has_rport_state	 = true,
3662 	.reset_timer_if_blocked	 = true,
3663 	.reconnect_delay	 = &srp_reconnect_delay,
3664 	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
3665 	.dev_loss_tmo		 = &srp_dev_loss_tmo,
3666 	.reconnect		 = srp_rport_reconnect,
3667 	.rport_delete		 = srp_rport_delete,
3668 	.terminate_rport_io	 = srp_terminate_io,
3669 };
3670 
3671 static int __init srp_init_module(void)
3672 {
3673 	int ret;
3674 
3675 	if (srp_sg_tablesize) {
3676 		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3677 		if (!cmd_sg_entries)
3678 			cmd_sg_entries = srp_sg_tablesize;
3679 	}
3680 
3681 	if (!cmd_sg_entries)
3682 		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3683 
3684 	if (cmd_sg_entries > 255) {
3685 		pr_warn("Clamping cmd_sg_entries to 255\n");
3686 		cmd_sg_entries = 255;
3687 	}
3688 
3689 	if (!indirect_sg_entries)
3690 		indirect_sg_entries = cmd_sg_entries;
3691 	else if (indirect_sg_entries < cmd_sg_entries) {
3692 		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3693 			cmd_sg_entries);
3694 		indirect_sg_entries = cmd_sg_entries;
3695 	}
3696 
3697 	srp_remove_wq = create_workqueue("srp_remove");
3698 	if (!srp_remove_wq) {
3699 		ret = -ENOMEM;
3700 		goto out;
3701 	}
3702 
3703 	ret = -ENOMEM;
3704 	ib_srp_transport_template =
3705 		srp_attach_transport(&ib_srp_transport_functions);
3706 	if (!ib_srp_transport_template)
3707 		goto destroy_wq;
3708 
3709 	ret = class_register(&srp_class);
3710 	if (ret) {
3711 		pr_err("couldn't register class infiniband_srp\n");
3712 		goto release_tr;
3713 	}
3714 
3715 	ib_sa_register_client(&srp_sa_client);
3716 
3717 	ret = ib_register_client(&srp_client);
3718 	if (ret) {
3719 		pr_err("couldn't register IB client\n");
3720 		goto unreg_sa;
3721 	}
3722 
3723 out:
3724 	return ret;
3725 
3726 unreg_sa:
3727 	ib_sa_unregister_client(&srp_sa_client);
3728 	class_unregister(&srp_class);
3729 
3730 release_tr:
3731 	srp_release_transport(ib_srp_transport_template);
3732 
3733 destroy_wq:
3734 	destroy_workqueue(srp_remove_wq);
3735 	goto out;
3736 }
3737 
3738 static void __exit srp_cleanup_module(void)
3739 {
3740 	ib_unregister_client(&srp_client);
3741 	ib_sa_unregister_client(&srp_sa_client);
3742 	class_unregister(&srp_class);
3743 	srp_release_transport(ib_srp_transport_template);
3744 	destroy_workqueue(srp_remove_wq);
3745 }
3746 
3747 module_init(srp_init_module);
3748 module_exit(srp_cleanup_module);
3749