xref: /linux/drivers/infiniband/ulp/srp/ib_srp.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
44 
45 #include <linux/atomic.h>
46 
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
53 
54 #include "ib_srp.h"
55 
56 #define DRV_NAME	"ib_srp"
57 #define PFX		DRV_NAME ": "
58 #define DRV_VERSION	"2.0"
59 #define DRV_RELDATE	"July 26, 2015"
60 
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
66 
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static bool never_register;
74 static int topspin_workarounds = 1;
75 
76 module_param(srp_sg_tablesize, uint, 0444);
77 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
78 
79 module_param(cmd_sg_entries, uint, 0444);
80 MODULE_PARM_DESC(cmd_sg_entries,
81 		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
82 
83 module_param(indirect_sg_entries, uint, 0444);
84 MODULE_PARM_DESC(indirect_sg_entries,
85 		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
86 
87 module_param(allow_ext_sg, bool, 0444);
88 MODULE_PARM_DESC(allow_ext_sg,
89 		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
90 
91 module_param(topspin_workarounds, int, 0444);
92 MODULE_PARM_DESC(topspin_workarounds,
93 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
94 
95 module_param(prefer_fr, bool, 0444);
96 MODULE_PARM_DESC(prefer_fr,
97 "Whether to use fast registration if both FMR and fast registration are supported");
98 
99 module_param(register_always, bool, 0444);
100 MODULE_PARM_DESC(register_always,
101 		 "Use memory registration even for contiguous memory regions");
102 
103 module_param(never_register, bool, 0444);
104 MODULE_PARM_DESC(never_register, "Never register memory");
105 
106 static const struct kernel_param_ops srp_tmo_ops;
107 
108 static int srp_reconnect_delay = 10;
109 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
110 		S_IRUGO | S_IWUSR);
111 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
112 
113 static int srp_fast_io_fail_tmo = 15;
114 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
115 		S_IRUGO | S_IWUSR);
116 MODULE_PARM_DESC(fast_io_fail_tmo,
117 		 "Number of seconds between the observation of a transport"
118 		 " layer error and failing all I/O. \"off\" means that this"
119 		 " functionality is disabled.");
120 
121 static int srp_dev_loss_tmo = 600;
122 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
123 		S_IRUGO | S_IWUSR);
124 MODULE_PARM_DESC(dev_loss_tmo,
125 		 "Maximum number of seconds that the SRP transport should"
126 		 " insulate transport layer errors. After this time has been"
127 		 " exceeded the SCSI host is removed. Should be"
128 		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
129 		 " if fast_io_fail_tmo has not been set. \"off\" means that"
130 		 " this functionality is disabled.");
131 
132 static unsigned ch_count;
133 module_param(ch_count, uint, 0444);
134 MODULE_PARM_DESC(ch_count,
135 		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
136 
137 static void srp_add_one(struct ib_device *device);
138 static void srp_remove_one(struct ib_device *device, void *client_data);
139 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
140 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
141 		const char *opname);
142 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
143 
144 static struct scsi_transport_template *ib_srp_transport_template;
145 static struct workqueue_struct *srp_remove_wq;
146 
147 static struct ib_client srp_client = {
148 	.name   = "srp",
149 	.add    = srp_add_one,
150 	.remove = srp_remove_one
151 };
152 
153 static struct ib_sa_client srp_sa_client;
154 
155 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
156 {
157 	int tmo = *(int *)kp->arg;
158 
159 	if (tmo >= 0)
160 		return sprintf(buffer, "%d", tmo);
161 	else
162 		return sprintf(buffer, "off");
163 }
164 
165 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
166 {
167 	int tmo, res;
168 
169 	res = srp_parse_tmo(&tmo, val);
170 	if (res)
171 		goto out;
172 
173 	if (kp->arg == &srp_reconnect_delay)
174 		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
175 				    srp_dev_loss_tmo);
176 	else if (kp->arg == &srp_fast_io_fail_tmo)
177 		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
178 	else
179 		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
180 				    tmo);
181 	if (res)
182 		goto out;
183 	*(int *)kp->arg = tmo;
184 
185 out:
186 	return res;
187 }
188 
189 static const struct kernel_param_ops srp_tmo_ops = {
190 	.get = srp_tmo_get,
191 	.set = srp_tmo_set,
192 };
193 
194 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
195 {
196 	return (struct srp_target_port *) host->hostdata;
197 }
198 
199 static const char *srp_target_info(struct Scsi_Host *host)
200 {
201 	return host_to_target(host)->target_name;
202 }
203 
204 static int srp_target_is_topspin(struct srp_target_port *target)
205 {
206 	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
207 	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
208 
209 	return topspin_workarounds &&
210 		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
211 		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
212 }
213 
214 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
215 				   gfp_t gfp_mask,
216 				   enum dma_data_direction direction)
217 {
218 	struct srp_iu *iu;
219 
220 	iu = kmalloc(sizeof *iu, gfp_mask);
221 	if (!iu)
222 		goto out;
223 
224 	iu->buf = kzalloc(size, gfp_mask);
225 	if (!iu->buf)
226 		goto out_free_iu;
227 
228 	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
229 				    direction);
230 	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
231 		goto out_free_buf;
232 
233 	iu->size      = size;
234 	iu->direction = direction;
235 
236 	return iu;
237 
238 out_free_buf:
239 	kfree(iu->buf);
240 out_free_iu:
241 	kfree(iu);
242 out:
243 	return NULL;
244 }
245 
246 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
247 {
248 	if (!iu)
249 		return;
250 
251 	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
252 			    iu->direction);
253 	kfree(iu->buf);
254 	kfree(iu);
255 }
256 
257 static void srp_qp_event(struct ib_event *event, void *context)
258 {
259 	pr_debug("QP event %s (%d)\n",
260 		 ib_event_msg(event->event), event->event);
261 }
262 
263 static int srp_init_qp(struct srp_target_port *target,
264 		       struct ib_qp *qp)
265 {
266 	struct ib_qp_attr *attr;
267 	int ret;
268 
269 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
270 	if (!attr)
271 		return -ENOMEM;
272 
273 	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
274 				  target->srp_host->port,
275 				  be16_to_cpu(target->pkey),
276 				  &attr->pkey_index);
277 	if (ret)
278 		goto out;
279 
280 	attr->qp_state        = IB_QPS_INIT;
281 	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
282 				    IB_ACCESS_REMOTE_WRITE);
283 	attr->port_num        = target->srp_host->port;
284 
285 	ret = ib_modify_qp(qp, attr,
286 			   IB_QP_STATE		|
287 			   IB_QP_PKEY_INDEX	|
288 			   IB_QP_ACCESS_FLAGS	|
289 			   IB_QP_PORT);
290 
291 out:
292 	kfree(attr);
293 	return ret;
294 }
295 
296 static int srp_new_cm_id(struct srp_rdma_ch *ch)
297 {
298 	struct srp_target_port *target = ch->target;
299 	struct ib_cm_id *new_cm_id;
300 
301 	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
302 				    srp_cm_handler, ch);
303 	if (IS_ERR(new_cm_id))
304 		return PTR_ERR(new_cm_id);
305 
306 	if (ch->cm_id)
307 		ib_destroy_cm_id(ch->cm_id);
308 	ch->cm_id = new_cm_id;
309 	ch->path.sgid = target->sgid;
310 	ch->path.dgid = target->orig_dgid;
311 	ch->path.pkey = target->pkey;
312 	ch->path.service_id = target->service_id;
313 
314 	return 0;
315 }
316 
317 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
318 {
319 	struct srp_device *dev = target->srp_host->srp_dev;
320 	struct ib_fmr_pool_param fmr_param;
321 
322 	memset(&fmr_param, 0, sizeof(fmr_param));
323 	fmr_param.pool_size	    = target->mr_pool_size;
324 	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
325 	fmr_param.cache		    = 1;
326 	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
327 	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
328 	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
329 				       IB_ACCESS_REMOTE_WRITE |
330 				       IB_ACCESS_REMOTE_READ);
331 
332 	return ib_create_fmr_pool(dev->pd, &fmr_param);
333 }
334 
335 /**
336  * srp_destroy_fr_pool() - free the resources owned by a pool
337  * @pool: Fast registration pool to be destroyed.
338  */
339 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
340 {
341 	int i;
342 	struct srp_fr_desc *d;
343 
344 	if (!pool)
345 		return;
346 
347 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
348 		if (d->mr)
349 			ib_dereg_mr(d->mr);
350 	}
351 	kfree(pool);
352 }
353 
354 /**
355  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
356  * @device:            IB device to allocate fast registration descriptors for.
357  * @pd:                Protection domain associated with the FR descriptors.
358  * @pool_size:         Number of descriptors to allocate.
359  * @max_page_list_len: Maximum fast registration work request page list length.
360  */
361 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
362 					      struct ib_pd *pd, int pool_size,
363 					      int max_page_list_len)
364 {
365 	struct srp_fr_pool *pool;
366 	struct srp_fr_desc *d;
367 	struct ib_mr *mr;
368 	int i, ret = -EINVAL;
369 
370 	if (pool_size <= 0)
371 		goto err;
372 	ret = -ENOMEM;
373 	pool = kzalloc(sizeof(struct srp_fr_pool) +
374 		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
375 	if (!pool)
376 		goto err;
377 	pool->size = pool_size;
378 	pool->max_page_list_len = max_page_list_len;
379 	spin_lock_init(&pool->lock);
380 	INIT_LIST_HEAD(&pool->free_list);
381 
382 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
383 		mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
384 				 max_page_list_len);
385 		if (IS_ERR(mr)) {
386 			ret = PTR_ERR(mr);
387 			goto destroy_pool;
388 		}
389 		d->mr = mr;
390 		list_add_tail(&d->entry, &pool->free_list);
391 	}
392 
393 out:
394 	return pool;
395 
396 destroy_pool:
397 	srp_destroy_fr_pool(pool);
398 
399 err:
400 	pool = ERR_PTR(ret);
401 	goto out;
402 }
403 
404 /**
405  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
406  * @pool: Pool to obtain descriptor from.
407  */
408 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
409 {
410 	struct srp_fr_desc *d = NULL;
411 	unsigned long flags;
412 
413 	spin_lock_irqsave(&pool->lock, flags);
414 	if (!list_empty(&pool->free_list)) {
415 		d = list_first_entry(&pool->free_list, typeof(*d), entry);
416 		list_del(&d->entry);
417 	}
418 	spin_unlock_irqrestore(&pool->lock, flags);
419 
420 	return d;
421 }
422 
423 /**
424  * srp_fr_pool_put() - put an FR descriptor back in the free list
425  * @pool: Pool the descriptor was allocated from.
426  * @desc: Pointer to an array of fast registration descriptor pointers.
427  * @n:    Number of descriptors to put back.
428  *
429  * Note: The caller must already have queued an invalidation request for
430  * desc->mr->rkey before calling this function.
431  */
432 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
433 			    int n)
434 {
435 	unsigned long flags;
436 	int i;
437 
438 	spin_lock_irqsave(&pool->lock, flags);
439 	for (i = 0; i < n; i++)
440 		list_add(&desc[i]->entry, &pool->free_list);
441 	spin_unlock_irqrestore(&pool->lock, flags);
442 }
443 
444 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
445 {
446 	struct srp_device *dev = target->srp_host->srp_dev;
447 
448 	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
449 				  dev->max_pages_per_mr);
450 }
451 
452 /**
453  * srp_destroy_qp() - destroy an RDMA queue pair
454  * @qp: RDMA queue pair.
455  *
456  * Drain the qp before destroying it.  This avoids that the receive
457  * completion handler can access the queue pair while it is
458  * being destroyed.
459  */
460 static void srp_destroy_qp(struct ib_qp *qp)
461 {
462 	ib_drain_rq(qp);
463 	ib_destroy_qp(qp);
464 }
465 
466 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
467 {
468 	struct srp_target_port *target = ch->target;
469 	struct srp_device *dev = target->srp_host->srp_dev;
470 	struct ib_qp_init_attr *init_attr;
471 	struct ib_cq *recv_cq, *send_cq;
472 	struct ib_qp *qp;
473 	struct ib_fmr_pool *fmr_pool = NULL;
474 	struct srp_fr_pool *fr_pool = NULL;
475 	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
476 	int ret;
477 
478 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
479 	if (!init_attr)
480 		return -ENOMEM;
481 
482 	/* queue_size + 1 for ib_drain_rq() */
483 	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
484 				ch->comp_vector, IB_POLL_SOFTIRQ);
485 	if (IS_ERR(recv_cq)) {
486 		ret = PTR_ERR(recv_cq);
487 		goto err;
488 	}
489 
490 	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
491 				ch->comp_vector, IB_POLL_DIRECT);
492 	if (IS_ERR(send_cq)) {
493 		ret = PTR_ERR(send_cq);
494 		goto err_recv_cq;
495 	}
496 
497 	init_attr->event_handler       = srp_qp_event;
498 	init_attr->cap.max_send_wr     = m * target->queue_size;
499 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
500 	init_attr->cap.max_recv_sge    = 1;
501 	init_attr->cap.max_send_sge    = 1;
502 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
503 	init_attr->qp_type             = IB_QPT_RC;
504 	init_attr->send_cq             = send_cq;
505 	init_attr->recv_cq             = recv_cq;
506 
507 	qp = ib_create_qp(dev->pd, init_attr);
508 	if (IS_ERR(qp)) {
509 		ret = PTR_ERR(qp);
510 		goto err_send_cq;
511 	}
512 
513 	ret = srp_init_qp(target, qp);
514 	if (ret)
515 		goto err_qp;
516 
517 	if (dev->use_fast_reg) {
518 		fr_pool = srp_alloc_fr_pool(target);
519 		if (IS_ERR(fr_pool)) {
520 			ret = PTR_ERR(fr_pool);
521 			shost_printk(KERN_WARNING, target->scsi_host, PFX
522 				     "FR pool allocation failed (%d)\n", ret);
523 			goto err_qp;
524 		}
525 	} else if (dev->use_fmr) {
526 		fmr_pool = srp_alloc_fmr_pool(target);
527 		if (IS_ERR(fmr_pool)) {
528 			ret = PTR_ERR(fmr_pool);
529 			shost_printk(KERN_WARNING, target->scsi_host, PFX
530 				     "FMR pool allocation failed (%d)\n", ret);
531 			goto err_qp;
532 		}
533 	}
534 
535 	if (ch->qp)
536 		srp_destroy_qp(ch->qp);
537 	if (ch->recv_cq)
538 		ib_free_cq(ch->recv_cq);
539 	if (ch->send_cq)
540 		ib_free_cq(ch->send_cq);
541 
542 	ch->qp = qp;
543 	ch->recv_cq = recv_cq;
544 	ch->send_cq = send_cq;
545 
546 	if (dev->use_fast_reg) {
547 		if (ch->fr_pool)
548 			srp_destroy_fr_pool(ch->fr_pool);
549 		ch->fr_pool = fr_pool;
550 	} else if (dev->use_fmr) {
551 		if (ch->fmr_pool)
552 			ib_destroy_fmr_pool(ch->fmr_pool);
553 		ch->fmr_pool = fmr_pool;
554 	}
555 
556 	kfree(init_attr);
557 	return 0;
558 
559 err_qp:
560 	srp_destroy_qp(qp);
561 
562 err_send_cq:
563 	ib_free_cq(send_cq);
564 
565 err_recv_cq:
566 	ib_free_cq(recv_cq);
567 
568 err:
569 	kfree(init_attr);
570 	return ret;
571 }
572 
573 /*
574  * Note: this function may be called without srp_alloc_iu_bufs() having been
575  * invoked. Hence the ch->[rt]x_ring checks.
576  */
577 static void srp_free_ch_ib(struct srp_target_port *target,
578 			   struct srp_rdma_ch *ch)
579 {
580 	struct srp_device *dev = target->srp_host->srp_dev;
581 	int i;
582 
583 	if (!ch->target)
584 		return;
585 
586 	if (ch->cm_id) {
587 		ib_destroy_cm_id(ch->cm_id);
588 		ch->cm_id = NULL;
589 	}
590 
591 	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
592 	if (!ch->qp)
593 		return;
594 
595 	if (dev->use_fast_reg) {
596 		if (ch->fr_pool)
597 			srp_destroy_fr_pool(ch->fr_pool);
598 	} else if (dev->use_fmr) {
599 		if (ch->fmr_pool)
600 			ib_destroy_fmr_pool(ch->fmr_pool);
601 	}
602 
603 	srp_destroy_qp(ch->qp);
604 	ib_free_cq(ch->send_cq);
605 	ib_free_cq(ch->recv_cq);
606 
607 	/*
608 	 * Avoid that the SCSI error handler tries to use this channel after
609 	 * it has been freed. The SCSI error handler can namely continue
610 	 * trying to perform recovery actions after scsi_remove_host()
611 	 * returned.
612 	 */
613 	ch->target = NULL;
614 
615 	ch->qp = NULL;
616 	ch->send_cq = ch->recv_cq = NULL;
617 
618 	if (ch->rx_ring) {
619 		for (i = 0; i < target->queue_size; ++i)
620 			srp_free_iu(target->srp_host, ch->rx_ring[i]);
621 		kfree(ch->rx_ring);
622 		ch->rx_ring = NULL;
623 	}
624 	if (ch->tx_ring) {
625 		for (i = 0; i < target->queue_size; ++i)
626 			srp_free_iu(target->srp_host, ch->tx_ring[i]);
627 		kfree(ch->tx_ring);
628 		ch->tx_ring = NULL;
629 	}
630 }
631 
632 static void srp_path_rec_completion(int status,
633 				    struct ib_sa_path_rec *pathrec,
634 				    void *ch_ptr)
635 {
636 	struct srp_rdma_ch *ch = ch_ptr;
637 	struct srp_target_port *target = ch->target;
638 
639 	ch->status = status;
640 	if (status)
641 		shost_printk(KERN_ERR, target->scsi_host,
642 			     PFX "Got failed path rec status %d\n", status);
643 	else
644 		ch->path = *pathrec;
645 	complete(&ch->done);
646 }
647 
648 static int srp_lookup_path(struct srp_rdma_ch *ch)
649 {
650 	struct srp_target_port *target = ch->target;
651 	int ret;
652 
653 	ch->path.numb_path = 1;
654 
655 	init_completion(&ch->done);
656 
657 	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
658 					       target->srp_host->srp_dev->dev,
659 					       target->srp_host->port,
660 					       &ch->path,
661 					       IB_SA_PATH_REC_SERVICE_ID |
662 					       IB_SA_PATH_REC_DGID	 |
663 					       IB_SA_PATH_REC_SGID	 |
664 					       IB_SA_PATH_REC_NUMB_PATH	 |
665 					       IB_SA_PATH_REC_PKEY,
666 					       SRP_PATH_REC_TIMEOUT_MS,
667 					       GFP_KERNEL,
668 					       srp_path_rec_completion,
669 					       ch, &ch->path_query);
670 	if (ch->path_query_id < 0)
671 		return ch->path_query_id;
672 
673 	ret = wait_for_completion_interruptible(&ch->done);
674 	if (ret < 0)
675 		return ret;
676 
677 	if (ch->status < 0)
678 		shost_printk(KERN_WARNING, target->scsi_host,
679 			     PFX "Path record query failed\n");
680 
681 	return ch->status;
682 }
683 
684 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
685 {
686 	struct srp_target_port *target = ch->target;
687 	struct {
688 		struct ib_cm_req_param param;
689 		struct srp_login_req   priv;
690 	} *req = NULL;
691 	int status;
692 
693 	req = kzalloc(sizeof *req, GFP_KERNEL);
694 	if (!req)
695 		return -ENOMEM;
696 
697 	req->param.primary_path		      = &ch->path;
698 	req->param.alternate_path 	      = NULL;
699 	req->param.service_id 		      = target->service_id;
700 	req->param.qp_num		      = ch->qp->qp_num;
701 	req->param.qp_type		      = ch->qp->qp_type;
702 	req->param.private_data 	      = &req->priv;
703 	req->param.private_data_len 	      = sizeof req->priv;
704 	req->param.flow_control 	      = 1;
705 
706 	get_random_bytes(&req->param.starting_psn, 4);
707 	req->param.starting_psn 	     &= 0xffffff;
708 
709 	/*
710 	 * Pick some arbitrary defaults here; we could make these
711 	 * module parameters if anyone cared about setting them.
712 	 */
713 	req->param.responder_resources	      = 4;
714 	req->param.remote_cm_response_timeout = 20;
715 	req->param.local_cm_response_timeout  = 20;
716 	req->param.retry_count                = target->tl_retry_count;
717 	req->param.rnr_retry_count 	      = 7;
718 	req->param.max_cm_retries 	      = 15;
719 
720 	req->priv.opcode     	= SRP_LOGIN_REQ;
721 	req->priv.tag        	= 0;
722 	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
723 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
724 					      SRP_BUF_FORMAT_INDIRECT);
725 	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
726 				   SRP_MULTICHAN_SINGLE);
727 	/*
728 	 * In the published SRP specification (draft rev. 16a), the
729 	 * port identifier format is 8 bytes of ID extension followed
730 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
731 	 * opposite order, so that the GUID comes first.
732 	 *
733 	 * Targets conforming to these obsolete drafts can be
734 	 * recognized by the I/O Class they report.
735 	 */
736 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
737 		memcpy(req->priv.initiator_port_id,
738 		       &target->sgid.global.interface_id, 8);
739 		memcpy(req->priv.initiator_port_id + 8,
740 		       &target->initiator_ext, 8);
741 		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
742 		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
743 	} else {
744 		memcpy(req->priv.initiator_port_id,
745 		       &target->initiator_ext, 8);
746 		memcpy(req->priv.initiator_port_id + 8,
747 		       &target->sgid.global.interface_id, 8);
748 		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
749 		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
750 	}
751 
752 	/*
753 	 * Topspin/Cisco SRP targets will reject our login unless we
754 	 * zero out the first 8 bytes of our initiator port ID and set
755 	 * the second 8 bytes to the local node GUID.
756 	 */
757 	if (srp_target_is_topspin(target)) {
758 		shost_printk(KERN_DEBUG, target->scsi_host,
759 			     PFX "Topspin/Cisco initiator port ID workaround "
760 			     "activated for target GUID %016llx\n",
761 			     be64_to_cpu(target->ioc_guid));
762 		memset(req->priv.initiator_port_id, 0, 8);
763 		memcpy(req->priv.initiator_port_id + 8,
764 		       &target->srp_host->srp_dev->dev->node_guid, 8);
765 	}
766 
767 	status = ib_send_cm_req(ch->cm_id, &req->param);
768 
769 	kfree(req);
770 
771 	return status;
772 }
773 
774 static bool srp_queue_remove_work(struct srp_target_port *target)
775 {
776 	bool changed = false;
777 
778 	spin_lock_irq(&target->lock);
779 	if (target->state != SRP_TARGET_REMOVED) {
780 		target->state = SRP_TARGET_REMOVED;
781 		changed = true;
782 	}
783 	spin_unlock_irq(&target->lock);
784 
785 	if (changed)
786 		queue_work(srp_remove_wq, &target->remove_work);
787 
788 	return changed;
789 }
790 
791 static void srp_disconnect_target(struct srp_target_port *target)
792 {
793 	struct srp_rdma_ch *ch;
794 	int i;
795 
796 	/* XXX should send SRP_I_LOGOUT request */
797 
798 	for (i = 0; i < target->ch_count; i++) {
799 		ch = &target->ch[i];
800 		ch->connected = false;
801 		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
802 			shost_printk(KERN_DEBUG, target->scsi_host,
803 				     PFX "Sending CM DREQ failed\n");
804 		}
805 	}
806 }
807 
808 static void srp_free_req_data(struct srp_target_port *target,
809 			      struct srp_rdma_ch *ch)
810 {
811 	struct srp_device *dev = target->srp_host->srp_dev;
812 	struct ib_device *ibdev = dev->dev;
813 	struct srp_request *req;
814 	int i;
815 
816 	if (!ch->req_ring)
817 		return;
818 
819 	for (i = 0; i < target->req_ring_size; ++i) {
820 		req = &ch->req_ring[i];
821 		if (dev->use_fast_reg) {
822 			kfree(req->fr_list);
823 		} else {
824 			kfree(req->fmr_list);
825 			kfree(req->map_page);
826 		}
827 		if (req->indirect_dma_addr) {
828 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
829 					    target->indirect_size,
830 					    DMA_TO_DEVICE);
831 		}
832 		kfree(req->indirect_desc);
833 	}
834 
835 	kfree(ch->req_ring);
836 	ch->req_ring = NULL;
837 }
838 
839 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
840 {
841 	struct srp_target_port *target = ch->target;
842 	struct srp_device *srp_dev = target->srp_host->srp_dev;
843 	struct ib_device *ibdev = srp_dev->dev;
844 	struct srp_request *req;
845 	void *mr_list;
846 	dma_addr_t dma_addr;
847 	int i, ret = -ENOMEM;
848 
849 	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
850 			       GFP_KERNEL);
851 	if (!ch->req_ring)
852 		goto out;
853 
854 	for (i = 0; i < target->req_ring_size; ++i) {
855 		req = &ch->req_ring[i];
856 		mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
857 				  GFP_KERNEL);
858 		if (!mr_list)
859 			goto out;
860 		if (srp_dev->use_fast_reg) {
861 			req->fr_list = mr_list;
862 		} else {
863 			req->fmr_list = mr_list;
864 			req->map_page = kmalloc(srp_dev->max_pages_per_mr *
865 						sizeof(void *), GFP_KERNEL);
866 			if (!req->map_page)
867 				goto out;
868 		}
869 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
870 		if (!req->indirect_desc)
871 			goto out;
872 
873 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
874 					     target->indirect_size,
875 					     DMA_TO_DEVICE);
876 		if (ib_dma_mapping_error(ibdev, dma_addr))
877 			goto out;
878 
879 		req->indirect_dma_addr = dma_addr;
880 	}
881 	ret = 0;
882 
883 out:
884 	return ret;
885 }
886 
887 /**
888  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
889  * @shost: SCSI host whose attributes to remove from sysfs.
890  *
891  * Note: Any attributes defined in the host template and that did not exist
892  * before invocation of this function will be ignored.
893  */
894 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
895 {
896 	struct device_attribute **attr;
897 
898 	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
899 		device_remove_file(&shost->shost_dev, *attr);
900 }
901 
902 static void srp_remove_target(struct srp_target_port *target)
903 {
904 	struct srp_rdma_ch *ch;
905 	int i;
906 
907 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
908 
909 	srp_del_scsi_host_attr(target->scsi_host);
910 	srp_rport_get(target->rport);
911 	srp_remove_host(target->scsi_host);
912 	scsi_remove_host(target->scsi_host);
913 	srp_stop_rport_timers(target->rport);
914 	srp_disconnect_target(target);
915 	for (i = 0; i < target->ch_count; i++) {
916 		ch = &target->ch[i];
917 		srp_free_ch_ib(target, ch);
918 	}
919 	cancel_work_sync(&target->tl_err_work);
920 	srp_rport_put(target->rport);
921 	for (i = 0; i < target->ch_count; i++) {
922 		ch = &target->ch[i];
923 		srp_free_req_data(target, ch);
924 	}
925 	kfree(target->ch);
926 	target->ch = NULL;
927 
928 	spin_lock(&target->srp_host->target_lock);
929 	list_del(&target->list);
930 	spin_unlock(&target->srp_host->target_lock);
931 
932 	scsi_host_put(target->scsi_host);
933 }
934 
935 static void srp_remove_work(struct work_struct *work)
936 {
937 	struct srp_target_port *target =
938 		container_of(work, struct srp_target_port, remove_work);
939 
940 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
941 
942 	srp_remove_target(target);
943 }
944 
945 static void srp_rport_delete(struct srp_rport *rport)
946 {
947 	struct srp_target_port *target = rport->lld_data;
948 
949 	srp_queue_remove_work(target);
950 }
951 
952 /**
953  * srp_connected_ch() - number of connected channels
954  * @target: SRP target port.
955  */
956 static int srp_connected_ch(struct srp_target_port *target)
957 {
958 	int i, c = 0;
959 
960 	for (i = 0; i < target->ch_count; i++)
961 		c += target->ch[i].connected;
962 
963 	return c;
964 }
965 
966 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
967 {
968 	struct srp_target_port *target = ch->target;
969 	int ret;
970 
971 	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
972 
973 	ret = srp_lookup_path(ch);
974 	if (ret)
975 		goto out;
976 
977 	while (1) {
978 		init_completion(&ch->done);
979 		ret = srp_send_req(ch, multich);
980 		if (ret)
981 			goto out;
982 		ret = wait_for_completion_interruptible(&ch->done);
983 		if (ret < 0)
984 			goto out;
985 
986 		/*
987 		 * The CM event handling code will set status to
988 		 * SRP_PORT_REDIRECT if we get a port redirect REJ
989 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
990 		 * redirect REJ back.
991 		 */
992 		ret = ch->status;
993 		switch (ret) {
994 		case 0:
995 			ch->connected = true;
996 			goto out;
997 
998 		case SRP_PORT_REDIRECT:
999 			ret = srp_lookup_path(ch);
1000 			if (ret)
1001 				goto out;
1002 			break;
1003 
1004 		case SRP_DLID_REDIRECT:
1005 			break;
1006 
1007 		case SRP_STALE_CONN:
1008 			shost_printk(KERN_ERR, target->scsi_host, PFX
1009 				     "giving up on stale connection\n");
1010 			ret = -ECONNRESET;
1011 			goto out;
1012 
1013 		default:
1014 			goto out;
1015 		}
1016 	}
1017 
1018 out:
1019 	return ret <= 0 ? ret : -ENODEV;
1020 }
1021 
1022 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1023 {
1024 	srp_handle_qp_err(cq, wc, "INV RKEY");
1025 }
1026 
1027 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1028 		u32 rkey)
1029 {
1030 	struct ib_send_wr *bad_wr;
1031 	struct ib_send_wr wr = {
1032 		.opcode		    = IB_WR_LOCAL_INV,
1033 		.next		    = NULL,
1034 		.num_sge	    = 0,
1035 		.send_flags	    = 0,
1036 		.ex.invalidate_rkey = rkey,
1037 	};
1038 
1039 	wr.wr_cqe = &req->reg_cqe;
1040 	req->reg_cqe.done = srp_inv_rkey_err_done;
1041 	return ib_post_send(ch->qp, &wr, &bad_wr);
1042 }
1043 
1044 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1045 			   struct srp_rdma_ch *ch,
1046 			   struct srp_request *req)
1047 {
1048 	struct srp_target_port *target = ch->target;
1049 	struct srp_device *dev = target->srp_host->srp_dev;
1050 	struct ib_device *ibdev = dev->dev;
1051 	int i, res;
1052 
1053 	if (!scsi_sglist(scmnd) ||
1054 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1055 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1056 		return;
1057 
1058 	if (dev->use_fast_reg) {
1059 		struct srp_fr_desc **pfr;
1060 
1061 		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1062 			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1063 			if (res < 0) {
1064 				shost_printk(KERN_ERR, target->scsi_host, PFX
1065 				  "Queueing INV WR for rkey %#x failed (%d)\n",
1066 				  (*pfr)->mr->rkey, res);
1067 				queue_work(system_long_wq,
1068 					   &target->tl_err_work);
1069 			}
1070 		}
1071 		if (req->nmdesc)
1072 			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1073 					req->nmdesc);
1074 	} else if (dev->use_fmr) {
1075 		struct ib_pool_fmr **pfmr;
1076 
1077 		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1078 			ib_fmr_pool_unmap(*pfmr);
1079 	}
1080 
1081 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1082 			scmnd->sc_data_direction);
1083 }
1084 
1085 /**
1086  * srp_claim_req - Take ownership of the scmnd associated with a request.
1087  * @ch: SRP RDMA channel.
1088  * @req: SRP request.
1089  * @sdev: If not NULL, only take ownership for this SCSI device.
1090  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1091  *         ownership of @req->scmnd if it equals @scmnd.
1092  *
1093  * Return value:
1094  * Either NULL or a pointer to the SCSI command the caller became owner of.
1095  */
1096 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1097 				       struct srp_request *req,
1098 				       struct scsi_device *sdev,
1099 				       struct scsi_cmnd *scmnd)
1100 {
1101 	unsigned long flags;
1102 
1103 	spin_lock_irqsave(&ch->lock, flags);
1104 	if (req->scmnd &&
1105 	    (!sdev || req->scmnd->device == sdev) &&
1106 	    (!scmnd || req->scmnd == scmnd)) {
1107 		scmnd = req->scmnd;
1108 		req->scmnd = NULL;
1109 	} else {
1110 		scmnd = NULL;
1111 	}
1112 	spin_unlock_irqrestore(&ch->lock, flags);
1113 
1114 	return scmnd;
1115 }
1116 
1117 /**
1118  * srp_free_req() - Unmap data and adjust ch->req_lim.
1119  * @ch:     SRP RDMA channel.
1120  * @req:    Request to be freed.
1121  * @scmnd:  SCSI command associated with @req.
1122  * @req_lim_delta: Amount to be added to @target->req_lim.
1123  */
1124 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1125 			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1126 {
1127 	unsigned long flags;
1128 
1129 	srp_unmap_data(scmnd, ch, req);
1130 
1131 	spin_lock_irqsave(&ch->lock, flags);
1132 	ch->req_lim += req_lim_delta;
1133 	spin_unlock_irqrestore(&ch->lock, flags);
1134 }
1135 
1136 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1137 			   struct scsi_device *sdev, int result)
1138 {
1139 	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1140 
1141 	if (scmnd) {
1142 		srp_free_req(ch, req, scmnd, 0);
1143 		scmnd->result = result;
1144 		scmnd->scsi_done(scmnd);
1145 	}
1146 }
1147 
1148 static void srp_terminate_io(struct srp_rport *rport)
1149 {
1150 	struct srp_target_port *target = rport->lld_data;
1151 	struct srp_rdma_ch *ch;
1152 	struct Scsi_Host *shost = target->scsi_host;
1153 	struct scsi_device *sdev;
1154 	int i, j;
1155 
1156 	/*
1157 	 * Invoking srp_terminate_io() while srp_queuecommand() is running
1158 	 * is not safe. Hence the warning statement below.
1159 	 */
1160 	shost_for_each_device(sdev, shost)
1161 		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1162 
1163 	for (i = 0; i < target->ch_count; i++) {
1164 		ch = &target->ch[i];
1165 
1166 		for (j = 0; j < target->req_ring_size; ++j) {
1167 			struct srp_request *req = &ch->req_ring[j];
1168 
1169 			srp_finish_req(ch, req, NULL,
1170 				       DID_TRANSPORT_FAILFAST << 16);
1171 		}
1172 	}
1173 }
1174 
1175 /*
1176  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1177  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1178  * srp_reset_device() or srp_reset_host() calls will occur while this function
1179  * is in progress. One way to realize that is not to call this function
1180  * directly but to call srp_reconnect_rport() instead since that last function
1181  * serializes calls of this function via rport->mutex and also blocks
1182  * srp_queuecommand() calls before invoking this function.
1183  */
1184 static int srp_rport_reconnect(struct srp_rport *rport)
1185 {
1186 	struct srp_target_port *target = rport->lld_data;
1187 	struct srp_rdma_ch *ch;
1188 	int i, j, ret = 0;
1189 	bool multich = false;
1190 
1191 	srp_disconnect_target(target);
1192 
1193 	if (target->state == SRP_TARGET_SCANNING)
1194 		return -ENODEV;
1195 
1196 	/*
1197 	 * Now get a new local CM ID so that we avoid confusing the target in
1198 	 * case things are really fouled up. Doing so also ensures that all CM
1199 	 * callbacks will have finished before a new QP is allocated.
1200 	 */
1201 	for (i = 0; i < target->ch_count; i++) {
1202 		ch = &target->ch[i];
1203 		ret += srp_new_cm_id(ch);
1204 	}
1205 	for (i = 0; i < target->ch_count; i++) {
1206 		ch = &target->ch[i];
1207 		for (j = 0; j < target->req_ring_size; ++j) {
1208 			struct srp_request *req = &ch->req_ring[j];
1209 
1210 			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1211 		}
1212 	}
1213 	for (i = 0; i < target->ch_count; i++) {
1214 		ch = &target->ch[i];
1215 		/*
1216 		 * Whether or not creating a new CM ID succeeded, create a new
1217 		 * QP. This guarantees that all completion callback function
1218 		 * invocations have finished before request resetting starts.
1219 		 */
1220 		ret += srp_create_ch_ib(ch);
1221 
1222 		INIT_LIST_HEAD(&ch->free_tx);
1223 		for (j = 0; j < target->queue_size; ++j)
1224 			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1225 	}
1226 
1227 	target->qp_in_error = false;
1228 
1229 	for (i = 0; i < target->ch_count; i++) {
1230 		ch = &target->ch[i];
1231 		if (ret)
1232 			break;
1233 		ret = srp_connect_ch(ch, multich);
1234 		multich = true;
1235 	}
1236 
1237 	if (ret == 0)
1238 		shost_printk(KERN_INFO, target->scsi_host,
1239 			     PFX "reconnect succeeded\n");
1240 
1241 	return ret;
1242 }
1243 
1244 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1245 			 unsigned int dma_len, u32 rkey)
1246 {
1247 	struct srp_direct_buf *desc = state->desc;
1248 
1249 	WARN_ON_ONCE(!dma_len);
1250 
1251 	desc->va = cpu_to_be64(dma_addr);
1252 	desc->key = cpu_to_be32(rkey);
1253 	desc->len = cpu_to_be32(dma_len);
1254 
1255 	state->total_len += dma_len;
1256 	state->desc++;
1257 	state->ndesc++;
1258 }
1259 
1260 static int srp_map_finish_fmr(struct srp_map_state *state,
1261 			      struct srp_rdma_ch *ch)
1262 {
1263 	struct srp_target_port *target = ch->target;
1264 	struct srp_device *dev = target->srp_host->srp_dev;
1265 	struct ib_pool_fmr *fmr;
1266 	u64 io_addr = 0;
1267 
1268 	if (state->fmr.next >= state->fmr.end)
1269 		return -ENOMEM;
1270 
1271 	WARN_ON_ONCE(!dev->use_fmr);
1272 
1273 	if (state->npages == 0)
1274 		return 0;
1275 
1276 	if (state->npages == 1 && target->global_mr) {
1277 		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1278 			     target->global_mr->rkey);
1279 		goto reset_state;
1280 	}
1281 
1282 	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1283 				   state->npages, io_addr);
1284 	if (IS_ERR(fmr))
1285 		return PTR_ERR(fmr);
1286 
1287 	*state->fmr.next++ = fmr;
1288 	state->nmdesc++;
1289 
1290 	srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1291 		     state->dma_len, fmr->fmr->rkey);
1292 
1293 reset_state:
1294 	state->npages = 0;
1295 	state->dma_len = 0;
1296 
1297 	return 0;
1298 }
1299 
1300 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1301 {
1302 	srp_handle_qp_err(cq, wc, "FAST REG");
1303 }
1304 
1305 /*
1306  * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1307  * where to start in the first element. If sg_offset_p != NULL then
1308  * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1309  * byte that has not yet been mapped.
1310  */
1311 static int srp_map_finish_fr(struct srp_map_state *state,
1312 			     struct srp_request *req,
1313 			     struct srp_rdma_ch *ch, int sg_nents,
1314 			     unsigned int *sg_offset_p)
1315 {
1316 	struct srp_target_port *target = ch->target;
1317 	struct srp_device *dev = target->srp_host->srp_dev;
1318 	struct ib_send_wr *bad_wr;
1319 	struct ib_reg_wr wr;
1320 	struct srp_fr_desc *desc;
1321 	u32 rkey;
1322 	int n, err;
1323 
1324 	if (state->fr.next >= state->fr.end)
1325 		return -ENOMEM;
1326 
1327 	WARN_ON_ONCE(!dev->use_fast_reg);
1328 
1329 	if (sg_nents == 1 && target->global_mr) {
1330 		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1331 
1332 		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1333 			     sg_dma_len(state->sg) - sg_offset,
1334 			     target->global_mr->rkey);
1335 		if (sg_offset_p)
1336 			*sg_offset_p = 0;
1337 		return 1;
1338 	}
1339 
1340 	desc = srp_fr_pool_get(ch->fr_pool);
1341 	if (!desc)
1342 		return -ENOMEM;
1343 
1344 	rkey = ib_inc_rkey(desc->mr->rkey);
1345 	ib_update_fast_reg_key(desc->mr, rkey);
1346 
1347 	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1348 			 dev->mr_page_size);
1349 	if (unlikely(n < 0)) {
1350 		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1351 		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1352 			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1353 			 sg_offset_p ? *sg_offset_p : -1, n);
1354 		return n;
1355 	}
1356 
1357 	WARN_ON_ONCE(desc->mr->length == 0);
1358 
1359 	req->reg_cqe.done = srp_reg_mr_err_done;
1360 
1361 	wr.wr.next = NULL;
1362 	wr.wr.opcode = IB_WR_REG_MR;
1363 	wr.wr.wr_cqe = &req->reg_cqe;
1364 	wr.wr.num_sge = 0;
1365 	wr.wr.send_flags = 0;
1366 	wr.mr = desc->mr;
1367 	wr.key = desc->mr->rkey;
1368 	wr.access = (IB_ACCESS_LOCAL_WRITE |
1369 		     IB_ACCESS_REMOTE_READ |
1370 		     IB_ACCESS_REMOTE_WRITE);
1371 
1372 	*state->fr.next++ = desc;
1373 	state->nmdesc++;
1374 
1375 	srp_map_desc(state, desc->mr->iova,
1376 		     desc->mr->length, desc->mr->rkey);
1377 
1378 	err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1379 	if (unlikely(err)) {
1380 		WARN_ON_ONCE(err == -ENOMEM);
1381 		return err;
1382 	}
1383 
1384 	return n;
1385 }
1386 
1387 static int srp_map_sg_entry(struct srp_map_state *state,
1388 			    struct srp_rdma_ch *ch,
1389 			    struct scatterlist *sg, int sg_index)
1390 {
1391 	struct srp_target_port *target = ch->target;
1392 	struct srp_device *dev = target->srp_host->srp_dev;
1393 	struct ib_device *ibdev = dev->dev;
1394 	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1395 	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1396 	unsigned int len = 0;
1397 	int ret;
1398 
1399 	WARN_ON_ONCE(!dma_len);
1400 
1401 	while (dma_len) {
1402 		unsigned offset = dma_addr & ~dev->mr_page_mask;
1403 		if (state->npages == dev->max_pages_per_mr || offset != 0) {
1404 			ret = srp_map_finish_fmr(state, ch);
1405 			if (ret)
1406 				return ret;
1407 		}
1408 
1409 		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1410 
1411 		if (!state->npages)
1412 			state->base_dma_addr = dma_addr;
1413 		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1414 		state->dma_len += len;
1415 		dma_addr += len;
1416 		dma_len -= len;
1417 	}
1418 
1419 	/*
1420 	 * If the last entry of the MR wasn't a full page, then we need to
1421 	 * close it out and start a new one -- we can only merge at page
1422 	 * boundaries.
1423 	 */
1424 	ret = 0;
1425 	if (len != dev->mr_page_size)
1426 		ret = srp_map_finish_fmr(state, ch);
1427 	return ret;
1428 }
1429 
1430 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1431 			  struct srp_request *req, struct scatterlist *scat,
1432 			  int count)
1433 {
1434 	struct scatterlist *sg;
1435 	int i, ret;
1436 
1437 	state->pages = req->map_page;
1438 	state->fmr.next = req->fmr_list;
1439 	state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1440 
1441 	for_each_sg(scat, sg, count, i) {
1442 		ret = srp_map_sg_entry(state, ch, sg, i);
1443 		if (ret)
1444 			return ret;
1445 	}
1446 
1447 	ret = srp_map_finish_fmr(state, ch);
1448 	if (ret)
1449 		return ret;
1450 
1451 	return 0;
1452 }
1453 
1454 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1455 			 struct srp_request *req, struct scatterlist *scat,
1456 			 int count)
1457 {
1458 	unsigned int sg_offset = 0;
1459 
1460 	state->fr.next = req->fr_list;
1461 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1462 	state->sg = scat;
1463 
1464 	if (count == 0)
1465 		return 0;
1466 
1467 	while (count) {
1468 		int i, n;
1469 
1470 		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1471 		if (unlikely(n < 0))
1472 			return n;
1473 
1474 		count -= n;
1475 		for (i = 0; i < n; i++)
1476 			state->sg = sg_next(state->sg);
1477 	}
1478 
1479 	return 0;
1480 }
1481 
1482 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1483 			  struct srp_request *req, struct scatterlist *scat,
1484 			  int count)
1485 {
1486 	struct srp_target_port *target = ch->target;
1487 	struct srp_device *dev = target->srp_host->srp_dev;
1488 	struct scatterlist *sg;
1489 	int i;
1490 
1491 	for_each_sg(scat, sg, count, i) {
1492 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1493 			     ib_sg_dma_len(dev->dev, sg),
1494 			     target->global_mr->rkey);
1495 	}
1496 
1497 	return 0;
1498 }
1499 
1500 /*
1501  * Register the indirect data buffer descriptor with the HCA.
1502  *
1503  * Note: since the indirect data buffer descriptor has been allocated with
1504  * kmalloc() it is guaranteed that this buffer is a physically contiguous
1505  * memory buffer.
1506  */
1507 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1508 		       void **next_mr, void **end_mr, u32 idb_len,
1509 		       __be32 *idb_rkey)
1510 {
1511 	struct srp_target_port *target = ch->target;
1512 	struct srp_device *dev = target->srp_host->srp_dev;
1513 	struct srp_map_state state;
1514 	struct srp_direct_buf idb_desc;
1515 	u64 idb_pages[1];
1516 	struct scatterlist idb_sg[1];
1517 	int ret;
1518 
1519 	memset(&state, 0, sizeof(state));
1520 	memset(&idb_desc, 0, sizeof(idb_desc));
1521 	state.gen.next = next_mr;
1522 	state.gen.end = end_mr;
1523 	state.desc = &idb_desc;
1524 	state.base_dma_addr = req->indirect_dma_addr;
1525 	state.dma_len = idb_len;
1526 
1527 	if (dev->use_fast_reg) {
1528 		state.sg = idb_sg;
1529 		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1530 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1531 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1532 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1533 #endif
1534 		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1535 		if (ret < 0)
1536 			return ret;
1537 		WARN_ON_ONCE(ret < 1);
1538 	} else if (dev->use_fmr) {
1539 		state.pages = idb_pages;
1540 		state.pages[0] = (req->indirect_dma_addr &
1541 				  dev->mr_page_mask);
1542 		state.npages = 1;
1543 		ret = srp_map_finish_fmr(&state, ch);
1544 		if (ret < 0)
1545 			return ret;
1546 	} else {
1547 		return -EINVAL;
1548 	}
1549 
1550 	*idb_rkey = idb_desc.key;
1551 
1552 	return 0;
1553 }
1554 
1555 #if defined(DYNAMIC_DATA_DEBUG)
1556 static void srp_check_mapping(struct srp_map_state *state,
1557 			      struct srp_rdma_ch *ch, struct srp_request *req,
1558 			      struct scatterlist *scat, int count)
1559 {
1560 	struct srp_device *dev = ch->target->srp_host->srp_dev;
1561 	struct srp_fr_desc **pfr;
1562 	u64 desc_len = 0, mr_len = 0;
1563 	int i;
1564 
1565 	for (i = 0; i < state->ndesc; i++)
1566 		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1567 	if (dev->use_fast_reg)
1568 		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1569 			mr_len += (*pfr)->mr->length;
1570 	else if (dev->use_fmr)
1571 		for (i = 0; i < state->nmdesc; i++)
1572 			mr_len += be32_to_cpu(req->indirect_desc[i].len);
1573 	if (desc_len != scsi_bufflen(req->scmnd) ||
1574 	    mr_len > scsi_bufflen(req->scmnd))
1575 		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1576 		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1577 		       state->ndesc, state->nmdesc);
1578 }
1579 #endif
1580 
1581 /**
1582  * srp_map_data() - map SCSI data buffer onto an SRP request
1583  * @scmnd: SCSI command to map
1584  * @ch: SRP RDMA channel
1585  * @req: SRP request
1586  *
1587  * Returns the length in bytes of the SRP_CMD IU or a negative value if
1588  * mapping failed.
1589  */
1590 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1591 			struct srp_request *req)
1592 {
1593 	struct srp_target_port *target = ch->target;
1594 	struct scatterlist *scat;
1595 	struct srp_cmd *cmd = req->cmd->buf;
1596 	int len, nents, count, ret;
1597 	struct srp_device *dev;
1598 	struct ib_device *ibdev;
1599 	struct srp_map_state state;
1600 	struct srp_indirect_buf *indirect_hdr;
1601 	u32 idb_len, table_len;
1602 	__be32 idb_rkey;
1603 	u8 fmt;
1604 
1605 	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1606 		return sizeof (struct srp_cmd);
1607 
1608 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1609 	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1610 		shost_printk(KERN_WARNING, target->scsi_host,
1611 			     PFX "Unhandled data direction %d\n",
1612 			     scmnd->sc_data_direction);
1613 		return -EINVAL;
1614 	}
1615 
1616 	nents = scsi_sg_count(scmnd);
1617 	scat  = scsi_sglist(scmnd);
1618 
1619 	dev = target->srp_host->srp_dev;
1620 	ibdev = dev->dev;
1621 
1622 	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1623 	if (unlikely(count == 0))
1624 		return -EIO;
1625 
1626 	fmt = SRP_DATA_DESC_DIRECT;
1627 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
1628 
1629 	if (count == 1 && target->global_mr) {
1630 		/*
1631 		 * The midlayer only generated a single gather/scatter
1632 		 * entry, or DMA mapping coalesced everything to a
1633 		 * single entry.  So a direct descriptor along with
1634 		 * the DMA MR suffices.
1635 		 */
1636 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1637 
1638 		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1639 		buf->key = cpu_to_be32(target->global_mr->rkey);
1640 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1641 
1642 		req->nmdesc = 0;
1643 		goto map_complete;
1644 	}
1645 
1646 	/*
1647 	 * We have more than one scatter/gather entry, so build our indirect
1648 	 * descriptor table, trying to merge as many entries as we can.
1649 	 */
1650 	indirect_hdr = (void *) cmd->add_data;
1651 
1652 	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1653 				   target->indirect_size, DMA_TO_DEVICE);
1654 
1655 	memset(&state, 0, sizeof(state));
1656 	state.desc = req->indirect_desc;
1657 	if (dev->use_fast_reg)
1658 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1659 	else if (dev->use_fmr)
1660 		ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1661 	else
1662 		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1663 	req->nmdesc = state.nmdesc;
1664 	if (ret < 0)
1665 		goto unmap;
1666 
1667 #if defined(DYNAMIC_DEBUG)
1668 	{
1669 		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1670 			"Memory mapping consistency check");
1671 		if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
1672 			srp_check_mapping(&state, ch, req, scat, count);
1673 	}
1674 #endif
1675 
1676 	/* We've mapped the request, now pull as much of the indirect
1677 	 * descriptor table as we can into the command buffer. If this
1678 	 * target is not using an external indirect table, we are
1679 	 * guaranteed to fit into the command, as the SCSI layer won't
1680 	 * give us more S/G entries than we allow.
1681 	 */
1682 	if (state.ndesc == 1) {
1683 		/*
1684 		 * Memory registration collapsed the sg-list into one entry,
1685 		 * so use a direct descriptor.
1686 		 */
1687 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1688 
1689 		*buf = req->indirect_desc[0];
1690 		goto map_complete;
1691 	}
1692 
1693 	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1694 						!target->allow_ext_sg)) {
1695 		shost_printk(KERN_ERR, target->scsi_host,
1696 			     "Could not fit S/G list into SRP_CMD\n");
1697 		ret = -EIO;
1698 		goto unmap;
1699 	}
1700 
1701 	count = min(state.ndesc, target->cmd_sg_cnt);
1702 	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1703 	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1704 
1705 	fmt = SRP_DATA_DESC_INDIRECT;
1706 	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1707 	len += count * sizeof (struct srp_direct_buf);
1708 
1709 	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1710 	       count * sizeof (struct srp_direct_buf));
1711 
1712 	if (!target->global_mr) {
1713 		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1714 				  idb_len, &idb_rkey);
1715 		if (ret < 0)
1716 			goto unmap;
1717 		req->nmdesc++;
1718 	} else {
1719 		idb_rkey = cpu_to_be32(target->global_mr->rkey);
1720 	}
1721 
1722 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1723 	indirect_hdr->table_desc.key = idb_rkey;
1724 	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1725 	indirect_hdr->len = cpu_to_be32(state.total_len);
1726 
1727 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1728 		cmd->data_out_desc_cnt = count;
1729 	else
1730 		cmd->data_in_desc_cnt = count;
1731 
1732 	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1733 				      DMA_TO_DEVICE);
1734 
1735 map_complete:
1736 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1737 		cmd->buf_fmt = fmt << 4;
1738 	else
1739 		cmd->buf_fmt = fmt;
1740 
1741 	return len;
1742 
1743 unmap:
1744 	srp_unmap_data(scmnd, ch, req);
1745 	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1746 		ret = -E2BIG;
1747 	return ret;
1748 }
1749 
1750 /*
1751  * Return an IU and possible credit to the free pool
1752  */
1753 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1754 			  enum srp_iu_type iu_type)
1755 {
1756 	unsigned long flags;
1757 
1758 	spin_lock_irqsave(&ch->lock, flags);
1759 	list_add(&iu->list, &ch->free_tx);
1760 	if (iu_type != SRP_IU_RSP)
1761 		++ch->req_lim;
1762 	spin_unlock_irqrestore(&ch->lock, flags);
1763 }
1764 
1765 /*
1766  * Must be called with ch->lock held to protect req_lim and free_tx.
1767  * If IU is not sent, it must be returned using srp_put_tx_iu().
1768  *
1769  * Note:
1770  * An upper limit for the number of allocated information units for each
1771  * request type is:
1772  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1773  *   more than Scsi_Host.can_queue requests.
1774  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1775  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1776  *   one unanswered SRP request to an initiator.
1777  */
1778 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1779 				      enum srp_iu_type iu_type)
1780 {
1781 	struct srp_target_port *target = ch->target;
1782 	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1783 	struct srp_iu *iu;
1784 
1785 	ib_process_cq_direct(ch->send_cq, -1);
1786 
1787 	if (list_empty(&ch->free_tx))
1788 		return NULL;
1789 
1790 	/* Initiator responses to target requests do not consume credits */
1791 	if (iu_type != SRP_IU_RSP) {
1792 		if (ch->req_lim <= rsv) {
1793 			++target->zero_req_lim;
1794 			return NULL;
1795 		}
1796 
1797 		--ch->req_lim;
1798 	}
1799 
1800 	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1801 	list_del(&iu->list);
1802 	return iu;
1803 }
1804 
1805 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1806 {
1807 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1808 	struct srp_rdma_ch *ch = cq->cq_context;
1809 
1810 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1811 		srp_handle_qp_err(cq, wc, "SEND");
1812 		return;
1813 	}
1814 
1815 	list_add(&iu->list, &ch->free_tx);
1816 }
1817 
1818 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1819 {
1820 	struct srp_target_port *target = ch->target;
1821 	struct ib_sge list;
1822 	struct ib_send_wr wr, *bad_wr;
1823 
1824 	list.addr   = iu->dma;
1825 	list.length = len;
1826 	list.lkey   = target->lkey;
1827 
1828 	iu->cqe.done = srp_send_done;
1829 
1830 	wr.next       = NULL;
1831 	wr.wr_cqe     = &iu->cqe;
1832 	wr.sg_list    = &list;
1833 	wr.num_sge    = 1;
1834 	wr.opcode     = IB_WR_SEND;
1835 	wr.send_flags = IB_SEND_SIGNALED;
1836 
1837 	return ib_post_send(ch->qp, &wr, &bad_wr);
1838 }
1839 
1840 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1841 {
1842 	struct srp_target_port *target = ch->target;
1843 	struct ib_recv_wr wr, *bad_wr;
1844 	struct ib_sge list;
1845 
1846 	list.addr   = iu->dma;
1847 	list.length = iu->size;
1848 	list.lkey   = target->lkey;
1849 
1850 	iu->cqe.done = srp_recv_done;
1851 
1852 	wr.next     = NULL;
1853 	wr.wr_cqe   = &iu->cqe;
1854 	wr.sg_list  = &list;
1855 	wr.num_sge  = 1;
1856 
1857 	return ib_post_recv(ch->qp, &wr, &bad_wr);
1858 }
1859 
1860 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1861 {
1862 	struct srp_target_port *target = ch->target;
1863 	struct srp_request *req;
1864 	struct scsi_cmnd *scmnd;
1865 	unsigned long flags;
1866 
1867 	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1868 		spin_lock_irqsave(&ch->lock, flags);
1869 		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1870 		spin_unlock_irqrestore(&ch->lock, flags);
1871 
1872 		ch->tsk_mgmt_status = -1;
1873 		if (be32_to_cpu(rsp->resp_data_len) >= 4)
1874 			ch->tsk_mgmt_status = rsp->data[3];
1875 		complete(&ch->tsk_mgmt_done);
1876 	} else {
1877 		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1878 		if (scmnd) {
1879 			req = (void *)scmnd->host_scribble;
1880 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1881 		}
1882 		if (!scmnd) {
1883 			shost_printk(KERN_ERR, target->scsi_host,
1884 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1885 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1886 
1887 			spin_lock_irqsave(&ch->lock, flags);
1888 			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1889 			spin_unlock_irqrestore(&ch->lock, flags);
1890 
1891 			return;
1892 		}
1893 		scmnd->result = rsp->status;
1894 
1895 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1896 			memcpy(scmnd->sense_buffer, rsp->data +
1897 			       be32_to_cpu(rsp->resp_data_len),
1898 			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1899 				     SCSI_SENSE_BUFFERSIZE));
1900 		}
1901 
1902 		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1903 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1904 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1905 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1906 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1907 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1908 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1909 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1910 
1911 		srp_free_req(ch, req, scmnd,
1912 			     be32_to_cpu(rsp->req_lim_delta));
1913 
1914 		scmnd->host_scribble = NULL;
1915 		scmnd->scsi_done(scmnd);
1916 	}
1917 }
1918 
1919 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1920 			       void *rsp, int len)
1921 {
1922 	struct srp_target_port *target = ch->target;
1923 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1924 	unsigned long flags;
1925 	struct srp_iu *iu;
1926 	int err;
1927 
1928 	spin_lock_irqsave(&ch->lock, flags);
1929 	ch->req_lim += req_delta;
1930 	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1931 	spin_unlock_irqrestore(&ch->lock, flags);
1932 
1933 	if (!iu) {
1934 		shost_printk(KERN_ERR, target->scsi_host, PFX
1935 			     "no IU available to send response\n");
1936 		return 1;
1937 	}
1938 
1939 	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1940 	memcpy(iu->buf, rsp, len);
1941 	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1942 
1943 	err = srp_post_send(ch, iu, len);
1944 	if (err) {
1945 		shost_printk(KERN_ERR, target->scsi_host, PFX
1946 			     "unable to post response: %d\n", err);
1947 		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1948 	}
1949 
1950 	return err;
1951 }
1952 
1953 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1954 				 struct srp_cred_req *req)
1955 {
1956 	struct srp_cred_rsp rsp = {
1957 		.opcode = SRP_CRED_RSP,
1958 		.tag = req->tag,
1959 	};
1960 	s32 delta = be32_to_cpu(req->req_lim_delta);
1961 
1962 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1963 		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1964 			     "problems processing SRP_CRED_REQ\n");
1965 }
1966 
1967 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1968 				struct srp_aer_req *req)
1969 {
1970 	struct srp_target_port *target = ch->target;
1971 	struct srp_aer_rsp rsp = {
1972 		.opcode = SRP_AER_RSP,
1973 		.tag = req->tag,
1974 	};
1975 	s32 delta = be32_to_cpu(req->req_lim_delta);
1976 
1977 	shost_printk(KERN_ERR, target->scsi_host, PFX
1978 		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1979 
1980 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1981 		shost_printk(KERN_ERR, target->scsi_host, PFX
1982 			     "problems processing SRP_AER_REQ\n");
1983 }
1984 
1985 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1986 {
1987 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1988 	struct srp_rdma_ch *ch = cq->cq_context;
1989 	struct srp_target_port *target = ch->target;
1990 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1991 	int res;
1992 	u8 opcode;
1993 
1994 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1995 		srp_handle_qp_err(cq, wc, "RECV");
1996 		return;
1997 	}
1998 
1999 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2000 				   DMA_FROM_DEVICE);
2001 
2002 	opcode = *(u8 *) iu->buf;
2003 
2004 	if (0) {
2005 		shost_printk(KERN_ERR, target->scsi_host,
2006 			     PFX "recv completion, opcode 0x%02x\n", opcode);
2007 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2008 			       iu->buf, wc->byte_len, true);
2009 	}
2010 
2011 	switch (opcode) {
2012 	case SRP_RSP:
2013 		srp_process_rsp(ch, iu->buf);
2014 		break;
2015 
2016 	case SRP_CRED_REQ:
2017 		srp_process_cred_req(ch, iu->buf);
2018 		break;
2019 
2020 	case SRP_AER_REQ:
2021 		srp_process_aer_req(ch, iu->buf);
2022 		break;
2023 
2024 	case SRP_T_LOGOUT:
2025 		/* XXX Handle target logout */
2026 		shost_printk(KERN_WARNING, target->scsi_host,
2027 			     PFX "Got target logout request\n");
2028 		break;
2029 
2030 	default:
2031 		shost_printk(KERN_WARNING, target->scsi_host,
2032 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2033 		break;
2034 	}
2035 
2036 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2037 				      DMA_FROM_DEVICE);
2038 
2039 	res = srp_post_recv(ch, iu);
2040 	if (res != 0)
2041 		shost_printk(KERN_ERR, target->scsi_host,
2042 			     PFX "Recv failed with error code %d\n", res);
2043 }
2044 
2045 /**
2046  * srp_tl_err_work() - handle a transport layer error
2047  * @work: Work structure embedded in an SRP target port.
2048  *
2049  * Note: This function may get invoked before the rport has been created,
2050  * hence the target->rport test.
2051  */
2052 static void srp_tl_err_work(struct work_struct *work)
2053 {
2054 	struct srp_target_port *target;
2055 
2056 	target = container_of(work, struct srp_target_port, tl_err_work);
2057 	if (target->rport)
2058 		srp_start_tl_fail_timers(target->rport);
2059 }
2060 
2061 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2062 		const char *opname)
2063 {
2064 	struct srp_rdma_ch *ch = cq->cq_context;
2065 	struct srp_target_port *target = ch->target;
2066 
2067 	if (ch->connected && !target->qp_in_error) {
2068 		shost_printk(KERN_ERR, target->scsi_host,
2069 			     PFX "failed %s status %s (%d) for CQE %p\n",
2070 			     opname, ib_wc_status_msg(wc->status), wc->status,
2071 			     wc->wr_cqe);
2072 		queue_work(system_long_wq, &target->tl_err_work);
2073 	}
2074 	target->qp_in_error = true;
2075 }
2076 
2077 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2078 {
2079 	struct srp_target_port *target = host_to_target(shost);
2080 	struct srp_rport *rport = target->rport;
2081 	struct srp_rdma_ch *ch;
2082 	struct srp_request *req;
2083 	struct srp_iu *iu;
2084 	struct srp_cmd *cmd;
2085 	struct ib_device *dev;
2086 	unsigned long flags;
2087 	u32 tag;
2088 	u16 idx;
2089 	int len, ret;
2090 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2091 
2092 	/*
2093 	 * The SCSI EH thread is the only context from which srp_queuecommand()
2094 	 * can get invoked for blocked devices (SDEV_BLOCK /
2095 	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2096 	 * locking the rport mutex if invoked from inside the SCSI EH.
2097 	 */
2098 	if (in_scsi_eh)
2099 		mutex_lock(&rport->mutex);
2100 
2101 	scmnd->result = srp_chkready(target->rport);
2102 	if (unlikely(scmnd->result))
2103 		goto err;
2104 
2105 	WARN_ON_ONCE(scmnd->request->tag < 0);
2106 	tag = blk_mq_unique_tag(scmnd->request);
2107 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2108 	idx = blk_mq_unique_tag_to_tag(tag);
2109 	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2110 		  dev_name(&shost->shost_gendev), tag, idx,
2111 		  target->req_ring_size);
2112 
2113 	spin_lock_irqsave(&ch->lock, flags);
2114 	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2115 	spin_unlock_irqrestore(&ch->lock, flags);
2116 
2117 	if (!iu)
2118 		goto err;
2119 
2120 	req = &ch->req_ring[idx];
2121 	dev = target->srp_host->srp_dev->dev;
2122 	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2123 				   DMA_TO_DEVICE);
2124 
2125 	scmnd->host_scribble = (void *) req;
2126 
2127 	cmd = iu->buf;
2128 	memset(cmd, 0, sizeof *cmd);
2129 
2130 	cmd->opcode = SRP_CMD;
2131 	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2132 	cmd->tag    = tag;
2133 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2134 
2135 	req->scmnd    = scmnd;
2136 	req->cmd      = iu;
2137 
2138 	len = srp_map_data(scmnd, ch, req);
2139 	if (len < 0) {
2140 		shost_printk(KERN_ERR, target->scsi_host,
2141 			     PFX "Failed to map data (%d)\n", len);
2142 		/*
2143 		 * If we ran out of memory descriptors (-ENOMEM) because an
2144 		 * application is queuing many requests with more than
2145 		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2146 		 * to reduce queue depth temporarily.
2147 		 */
2148 		scmnd->result = len == -ENOMEM ?
2149 			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2150 		goto err_iu;
2151 	}
2152 
2153 	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2154 				      DMA_TO_DEVICE);
2155 
2156 	if (srp_post_send(ch, iu, len)) {
2157 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2158 		goto err_unmap;
2159 	}
2160 
2161 	ret = 0;
2162 
2163 unlock_rport:
2164 	if (in_scsi_eh)
2165 		mutex_unlock(&rport->mutex);
2166 
2167 	return ret;
2168 
2169 err_unmap:
2170 	srp_unmap_data(scmnd, ch, req);
2171 
2172 err_iu:
2173 	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2174 
2175 	/*
2176 	 * Avoid that the loops that iterate over the request ring can
2177 	 * encounter a dangling SCSI command pointer.
2178 	 */
2179 	req->scmnd = NULL;
2180 
2181 err:
2182 	if (scmnd->result) {
2183 		scmnd->scsi_done(scmnd);
2184 		ret = 0;
2185 	} else {
2186 		ret = SCSI_MLQUEUE_HOST_BUSY;
2187 	}
2188 
2189 	goto unlock_rport;
2190 }
2191 
2192 /*
2193  * Note: the resources allocated in this function are freed in
2194  * srp_free_ch_ib().
2195  */
2196 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2197 {
2198 	struct srp_target_port *target = ch->target;
2199 	int i;
2200 
2201 	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2202 			      GFP_KERNEL);
2203 	if (!ch->rx_ring)
2204 		goto err_no_ring;
2205 	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2206 			      GFP_KERNEL);
2207 	if (!ch->tx_ring)
2208 		goto err_no_ring;
2209 
2210 	for (i = 0; i < target->queue_size; ++i) {
2211 		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2212 					      ch->max_ti_iu_len,
2213 					      GFP_KERNEL, DMA_FROM_DEVICE);
2214 		if (!ch->rx_ring[i])
2215 			goto err;
2216 	}
2217 
2218 	for (i = 0; i < target->queue_size; ++i) {
2219 		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2220 					      target->max_iu_len,
2221 					      GFP_KERNEL, DMA_TO_DEVICE);
2222 		if (!ch->tx_ring[i])
2223 			goto err;
2224 
2225 		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2226 	}
2227 
2228 	return 0;
2229 
2230 err:
2231 	for (i = 0; i < target->queue_size; ++i) {
2232 		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2233 		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2234 	}
2235 
2236 
2237 err_no_ring:
2238 	kfree(ch->tx_ring);
2239 	ch->tx_ring = NULL;
2240 	kfree(ch->rx_ring);
2241 	ch->rx_ring = NULL;
2242 
2243 	return -ENOMEM;
2244 }
2245 
2246 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2247 {
2248 	uint64_t T_tr_ns, max_compl_time_ms;
2249 	uint32_t rq_tmo_jiffies;
2250 
2251 	/*
2252 	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2253 	 * table 91), both the QP timeout and the retry count have to be set
2254 	 * for RC QP's during the RTR to RTS transition.
2255 	 */
2256 	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2257 		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2258 
2259 	/*
2260 	 * Set target->rq_tmo_jiffies to one second more than the largest time
2261 	 * it can take before an error completion is generated. See also
2262 	 * C9-140..142 in the IBTA spec for more information about how to
2263 	 * convert the QP Local ACK Timeout value to nanoseconds.
2264 	 */
2265 	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2266 	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2267 	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2268 	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2269 
2270 	return rq_tmo_jiffies;
2271 }
2272 
2273 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2274 			       const struct srp_login_rsp *lrsp,
2275 			       struct srp_rdma_ch *ch)
2276 {
2277 	struct srp_target_port *target = ch->target;
2278 	struct ib_qp_attr *qp_attr = NULL;
2279 	int attr_mask = 0;
2280 	int ret;
2281 	int i;
2282 
2283 	if (lrsp->opcode == SRP_LOGIN_RSP) {
2284 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2285 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2286 
2287 		/*
2288 		 * Reserve credits for task management so we don't
2289 		 * bounce requests back to the SCSI mid-layer.
2290 		 */
2291 		target->scsi_host->can_queue
2292 			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2293 			      target->scsi_host->can_queue);
2294 		target->scsi_host->cmd_per_lun
2295 			= min_t(int, target->scsi_host->can_queue,
2296 				target->scsi_host->cmd_per_lun);
2297 	} else {
2298 		shost_printk(KERN_WARNING, target->scsi_host,
2299 			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2300 		ret = -ECONNRESET;
2301 		goto error;
2302 	}
2303 
2304 	if (!ch->rx_ring) {
2305 		ret = srp_alloc_iu_bufs(ch);
2306 		if (ret)
2307 			goto error;
2308 	}
2309 
2310 	ret = -ENOMEM;
2311 	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2312 	if (!qp_attr)
2313 		goto error;
2314 
2315 	qp_attr->qp_state = IB_QPS_RTR;
2316 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2317 	if (ret)
2318 		goto error_free;
2319 
2320 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2321 	if (ret)
2322 		goto error_free;
2323 
2324 	for (i = 0; i < target->queue_size; i++) {
2325 		struct srp_iu *iu = ch->rx_ring[i];
2326 
2327 		ret = srp_post_recv(ch, iu);
2328 		if (ret)
2329 			goto error_free;
2330 	}
2331 
2332 	qp_attr->qp_state = IB_QPS_RTS;
2333 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2334 	if (ret)
2335 		goto error_free;
2336 
2337 	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2338 
2339 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2340 	if (ret)
2341 		goto error_free;
2342 
2343 	ret = ib_send_cm_rtu(cm_id, NULL, 0);
2344 
2345 error_free:
2346 	kfree(qp_attr);
2347 
2348 error:
2349 	ch->status = ret;
2350 }
2351 
2352 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2353 			       struct ib_cm_event *event,
2354 			       struct srp_rdma_ch *ch)
2355 {
2356 	struct srp_target_port *target = ch->target;
2357 	struct Scsi_Host *shost = target->scsi_host;
2358 	struct ib_class_port_info *cpi;
2359 	int opcode;
2360 
2361 	switch (event->param.rej_rcvd.reason) {
2362 	case IB_CM_REJ_PORT_CM_REDIRECT:
2363 		cpi = event->param.rej_rcvd.ari;
2364 		ch->path.dlid = cpi->redirect_lid;
2365 		ch->path.pkey = cpi->redirect_pkey;
2366 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2367 		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2368 
2369 		ch->status = ch->path.dlid ?
2370 			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2371 		break;
2372 
2373 	case IB_CM_REJ_PORT_REDIRECT:
2374 		if (srp_target_is_topspin(target)) {
2375 			/*
2376 			 * Topspin/Cisco SRP gateways incorrectly send
2377 			 * reject reason code 25 when they mean 24
2378 			 * (port redirect).
2379 			 */
2380 			memcpy(ch->path.dgid.raw,
2381 			       event->param.rej_rcvd.ari, 16);
2382 
2383 			shost_printk(KERN_DEBUG, shost,
2384 				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2385 				     be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2386 				     be64_to_cpu(ch->path.dgid.global.interface_id));
2387 
2388 			ch->status = SRP_PORT_REDIRECT;
2389 		} else {
2390 			shost_printk(KERN_WARNING, shost,
2391 				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2392 			ch->status = -ECONNRESET;
2393 		}
2394 		break;
2395 
2396 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2397 		shost_printk(KERN_WARNING, shost,
2398 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2399 		ch->status = -ECONNRESET;
2400 		break;
2401 
2402 	case IB_CM_REJ_CONSUMER_DEFINED:
2403 		opcode = *(u8 *) event->private_data;
2404 		if (opcode == SRP_LOGIN_REJ) {
2405 			struct srp_login_rej *rej = event->private_data;
2406 			u32 reason = be32_to_cpu(rej->reason);
2407 
2408 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2409 				shost_printk(KERN_WARNING, shost,
2410 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2411 			else
2412 				shost_printk(KERN_WARNING, shost, PFX
2413 					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2414 					     target->sgid.raw,
2415 					     target->orig_dgid.raw, reason);
2416 		} else
2417 			shost_printk(KERN_WARNING, shost,
2418 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2419 				     " opcode 0x%02x\n", opcode);
2420 		ch->status = -ECONNRESET;
2421 		break;
2422 
2423 	case IB_CM_REJ_STALE_CONN:
2424 		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2425 		ch->status = SRP_STALE_CONN;
2426 		break;
2427 
2428 	default:
2429 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2430 			     event->param.rej_rcvd.reason);
2431 		ch->status = -ECONNRESET;
2432 	}
2433 }
2434 
2435 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2436 {
2437 	struct srp_rdma_ch *ch = cm_id->context;
2438 	struct srp_target_port *target = ch->target;
2439 	int comp = 0;
2440 
2441 	switch (event->event) {
2442 	case IB_CM_REQ_ERROR:
2443 		shost_printk(KERN_DEBUG, target->scsi_host,
2444 			     PFX "Sending CM REQ failed\n");
2445 		comp = 1;
2446 		ch->status = -ECONNRESET;
2447 		break;
2448 
2449 	case IB_CM_REP_RECEIVED:
2450 		comp = 1;
2451 		srp_cm_rep_handler(cm_id, event->private_data, ch);
2452 		break;
2453 
2454 	case IB_CM_REJ_RECEIVED:
2455 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2456 		comp = 1;
2457 
2458 		srp_cm_rej_handler(cm_id, event, ch);
2459 		break;
2460 
2461 	case IB_CM_DREQ_RECEIVED:
2462 		shost_printk(KERN_WARNING, target->scsi_host,
2463 			     PFX "DREQ received - connection closed\n");
2464 		ch->connected = false;
2465 		if (ib_send_cm_drep(cm_id, NULL, 0))
2466 			shost_printk(KERN_ERR, target->scsi_host,
2467 				     PFX "Sending CM DREP failed\n");
2468 		queue_work(system_long_wq, &target->tl_err_work);
2469 		break;
2470 
2471 	case IB_CM_TIMEWAIT_EXIT:
2472 		shost_printk(KERN_ERR, target->scsi_host,
2473 			     PFX "connection closed\n");
2474 		comp = 1;
2475 
2476 		ch->status = 0;
2477 		break;
2478 
2479 	case IB_CM_MRA_RECEIVED:
2480 	case IB_CM_DREQ_ERROR:
2481 	case IB_CM_DREP_RECEIVED:
2482 		break;
2483 
2484 	default:
2485 		shost_printk(KERN_WARNING, target->scsi_host,
2486 			     PFX "Unhandled CM event %d\n", event->event);
2487 		break;
2488 	}
2489 
2490 	if (comp)
2491 		complete(&ch->done);
2492 
2493 	return 0;
2494 }
2495 
2496 /**
2497  * srp_change_queue_depth - setting device queue depth
2498  * @sdev: scsi device struct
2499  * @qdepth: requested queue depth
2500  *
2501  * Returns queue depth.
2502  */
2503 static int
2504 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2505 {
2506 	if (!sdev->tagged_supported)
2507 		qdepth = 1;
2508 	return scsi_change_queue_depth(sdev, qdepth);
2509 }
2510 
2511 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2512 			     u8 func)
2513 {
2514 	struct srp_target_port *target = ch->target;
2515 	struct srp_rport *rport = target->rport;
2516 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2517 	struct srp_iu *iu;
2518 	struct srp_tsk_mgmt *tsk_mgmt;
2519 
2520 	if (!ch->connected || target->qp_in_error)
2521 		return -1;
2522 
2523 	init_completion(&ch->tsk_mgmt_done);
2524 
2525 	/*
2526 	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2527 	 * invoked while a task management function is being sent.
2528 	 */
2529 	mutex_lock(&rport->mutex);
2530 	spin_lock_irq(&ch->lock);
2531 	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2532 	spin_unlock_irq(&ch->lock);
2533 
2534 	if (!iu) {
2535 		mutex_unlock(&rport->mutex);
2536 
2537 		return -1;
2538 	}
2539 
2540 	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2541 				   DMA_TO_DEVICE);
2542 	tsk_mgmt = iu->buf;
2543 	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2544 
2545 	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2546 	int_to_scsilun(lun, &tsk_mgmt->lun);
2547 	tsk_mgmt->tag		= req_tag | SRP_TAG_TSK_MGMT;
2548 	tsk_mgmt->tsk_mgmt_func = func;
2549 	tsk_mgmt->task_tag	= req_tag;
2550 
2551 	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2552 				      DMA_TO_DEVICE);
2553 	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2554 		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2555 		mutex_unlock(&rport->mutex);
2556 
2557 		return -1;
2558 	}
2559 	mutex_unlock(&rport->mutex);
2560 
2561 	if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2562 					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2563 		return -1;
2564 
2565 	return 0;
2566 }
2567 
2568 static int srp_abort(struct scsi_cmnd *scmnd)
2569 {
2570 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2571 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2572 	u32 tag;
2573 	u16 ch_idx;
2574 	struct srp_rdma_ch *ch;
2575 	int ret;
2576 
2577 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2578 
2579 	if (!req)
2580 		return SUCCESS;
2581 	tag = blk_mq_unique_tag(scmnd->request);
2582 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2583 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2584 		return SUCCESS;
2585 	ch = &target->ch[ch_idx];
2586 	if (!srp_claim_req(ch, req, NULL, scmnd))
2587 		return SUCCESS;
2588 	shost_printk(KERN_ERR, target->scsi_host,
2589 		     "Sending SRP abort for tag %#x\n", tag);
2590 	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2591 			      SRP_TSK_ABORT_TASK) == 0)
2592 		ret = SUCCESS;
2593 	else if (target->rport->state == SRP_RPORT_LOST)
2594 		ret = FAST_IO_FAIL;
2595 	else
2596 		ret = FAILED;
2597 	srp_free_req(ch, req, scmnd, 0);
2598 	scmnd->result = DID_ABORT << 16;
2599 	scmnd->scsi_done(scmnd);
2600 
2601 	return ret;
2602 }
2603 
2604 static int srp_reset_device(struct scsi_cmnd *scmnd)
2605 {
2606 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2607 	struct srp_rdma_ch *ch;
2608 	int i;
2609 
2610 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2611 
2612 	ch = &target->ch[0];
2613 	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2614 			      SRP_TSK_LUN_RESET))
2615 		return FAILED;
2616 	if (ch->tsk_mgmt_status)
2617 		return FAILED;
2618 
2619 	for (i = 0; i < target->ch_count; i++) {
2620 		ch = &target->ch[i];
2621 		for (i = 0; i < target->req_ring_size; ++i) {
2622 			struct srp_request *req = &ch->req_ring[i];
2623 
2624 			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2625 		}
2626 	}
2627 
2628 	return SUCCESS;
2629 }
2630 
2631 static int srp_reset_host(struct scsi_cmnd *scmnd)
2632 {
2633 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2634 
2635 	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2636 
2637 	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2638 }
2639 
2640 static int srp_slave_alloc(struct scsi_device *sdev)
2641 {
2642 	struct Scsi_Host *shost = sdev->host;
2643 	struct srp_target_port *target = host_to_target(shost);
2644 	struct srp_device *srp_dev = target->srp_host->srp_dev;
2645 	struct ib_device *ibdev = srp_dev->dev;
2646 
2647 	if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
2648 		blk_queue_virt_boundary(sdev->request_queue,
2649 					~srp_dev->mr_page_mask);
2650 
2651 	return 0;
2652 }
2653 
2654 static int srp_slave_configure(struct scsi_device *sdev)
2655 {
2656 	struct Scsi_Host *shost = sdev->host;
2657 	struct srp_target_port *target = host_to_target(shost);
2658 	struct request_queue *q = sdev->request_queue;
2659 	unsigned long timeout;
2660 
2661 	if (sdev->type == TYPE_DISK) {
2662 		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2663 		blk_queue_rq_timeout(q, timeout);
2664 	}
2665 
2666 	return 0;
2667 }
2668 
2669 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2670 			   char *buf)
2671 {
2672 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2673 
2674 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2675 }
2676 
2677 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2678 			     char *buf)
2679 {
2680 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2681 
2682 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2683 }
2684 
2685 static ssize_t show_service_id(struct device *dev,
2686 			       struct device_attribute *attr, char *buf)
2687 {
2688 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2689 
2690 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2691 }
2692 
2693 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2694 			 char *buf)
2695 {
2696 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2697 
2698 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2699 }
2700 
2701 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2702 			 char *buf)
2703 {
2704 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2705 
2706 	return sprintf(buf, "%pI6\n", target->sgid.raw);
2707 }
2708 
2709 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2710 			 char *buf)
2711 {
2712 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2713 	struct srp_rdma_ch *ch = &target->ch[0];
2714 
2715 	return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2716 }
2717 
2718 static ssize_t show_orig_dgid(struct device *dev,
2719 			      struct device_attribute *attr, char *buf)
2720 {
2721 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2722 
2723 	return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2724 }
2725 
2726 static ssize_t show_req_lim(struct device *dev,
2727 			    struct device_attribute *attr, char *buf)
2728 {
2729 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2730 	struct srp_rdma_ch *ch;
2731 	int i, req_lim = INT_MAX;
2732 
2733 	for (i = 0; i < target->ch_count; i++) {
2734 		ch = &target->ch[i];
2735 		req_lim = min(req_lim, ch->req_lim);
2736 	}
2737 	return sprintf(buf, "%d\n", req_lim);
2738 }
2739 
2740 static ssize_t show_zero_req_lim(struct device *dev,
2741 				 struct device_attribute *attr, char *buf)
2742 {
2743 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2744 
2745 	return sprintf(buf, "%d\n", target->zero_req_lim);
2746 }
2747 
2748 static ssize_t show_local_ib_port(struct device *dev,
2749 				  struct device_attribute *attr, char *buf)
2750 {
2751 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2752 
2753 	return sprintf(buf, "%d\n", target->srp_host->port);
2754 }
2755 
2756 static ssize_t show_local_ib_device(struct device *dev,
2757 				    struct device_attribute *attr, char *buf)
2758 {
2759 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2760 
2761 	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2762 }
2763 
2764 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2765 			     char *buf)
2766 {
2767 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2768 
2769 	return sprintf(buf, "%d\n", target->ch_count);
2770 }
2771 
2772 static ssize_t show_comp_vector(struct device *dev,
2773 				struct device_attribute *attr, char *buf)
2774 {
2775 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2776 
2777 	return sprintf(buf, "%d\n", target->comp_vector);
2778 }
2779 
2780 static ssize_t show_tl_retry_count(struct device *dev,
2781 				   struct device_attribute *attr, char *buf)
2782 {
2783 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2784 
2785 	return sprintf(buf, "%d\n", target->tl_retry_count);
2786 }
2787 
2788 static ssize_t show_cmd_sg_entries(struct device *dev,
2789 				   struct device_attribute *attr, char *buf)
2790 {
2791 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2792 
2793 	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2794 }
2795 
2796 static ssize_t show_allow_ext_sg(struct device *dev,
2797 				 struct device_attribute *attr, char *buf)
2798 {
2799 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2800 
2801 	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2802 }
2803 
2804 static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
2805 static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
2806 static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
2807 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
2808 static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
2809 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
2810 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
2811 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2812 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
2813 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2814 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2815 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2816 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2817 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2818 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2819 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2820 
2821 static struct device_attribute *srp_host_attrs[] = {
2822 	&dev_attr_id_ext,
2823 	&dev_attr_ioc_guid,
2824 	&dev_attr_service_id,
2825 	&dev_attr_pkey,
2826 	&dev_attr_sgid,
2827 	&dev_attr_dgid,
2828 	&dev_attr_orig_dgid,
2829 	&dev_attr_req_lim,
2830 	&dev_attr_zero_req_lim,
2831 	&dev_attr_local_ib_port,
2832 	&dev_attr_local_ib_device,
2833 	&dev_attr_ch_count,
2834 	&dev_attr_comp_vector,
2835 	&dev_attr_tl_retry_count,
2836 	&dev_attr_cmd_sg_entries,
2837 	&dev_attr_allow_ext_sg,
2838 	NULL
2839 };
2840 
2841 static struct scsi_host_template srp_template = {
2842 	.module				= THIS_MODULE,
2843 	.name				= "InfiniBand SRP initiator",
2844 	.proc_name			= DRV_NAME,
2845 	.slave_alloc			= srp_slave_alloc,
2846 	.slave_configure		= srp_slave_configure,
2847 	.info				= srp_target_info,
2848 	.queuecommand			= srp_queuecommand,
2849 	.change_queue_depth             = srp_change_queue_depth,
2850 	.eh_abort_handler		= srp_abort,
2851 	.eh_device_reset_handler	= srp_reset_device,
2852 	.eh_host_reset_handler		= srp_reset_host,
2853 	.skip_settle_delay		= true,
2854 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
2855 	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
2856 	.this_id			= -1,
2857 	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
2858 	.use_clustering			= ENABLE_CLUSTERING,
2859 	.shost_attrs			= srp_host_attrs,
2860 	.track_queue_depth		= 1,
2861 };
2862 
2863 static int srp_sdev_count(struct Scsi_Host *host)
2864 {
2865 	struct scsi_device *sdev;
2866 	int c = 0;
2867 
2868 	shost_for_each_device(sdev, host)
2869 		c++;
2870 
2871 	return c;
2872 }
2873 
2874 /*
2875  * Return values:
2876  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2877  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2878  *    removal has been scheduled.
2879  * 0 and target->state != SRP_TARGET_REMOVED upon success.
2880  */
2881 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2882 {
2883 	struct srp_rport_identifiers ids;
2884 	struct srp_rport *rport;
2885 
2886 	target->state = SRP_TARGET_SCANNING;
2887 	sprintf(target->target_name, "SRP.T10:%016llX",
2888 		be64_to_cpu(target->id_ext));
2889 
2890 	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2891 		return -ENODEV;
2892 
2893 	memcpy(ids.port_id, &target->id_ext, 8);
2894 	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2895 	ids.roles = SRP_RPORT_ROLE_TARGET;
2896 	rport = srp_rport_add(target->scsi_host, &ids);
2897 	if (IS_ERR(rport)) {
2898 		scsi_remove_host(target->scsi_host);
2899 		return PTR_ERR(rport);
2900 	}
2901 
2902 	rport->lld_data = target;
2903 	target->rport = rport;
2904 
2905 	spin_lock(&host->target_lock);
2906 	list_add_tail(&target->list, &host->target_list);
2907 	spin_unlock(&host->target_lock);
2908 
2909 	scsi_scan_target(&target->scsi_host->shost_gendev,
2910 			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2911 
2912 	if (srp_connected_ch(target) < target->ch_count ||
2913 	    target->qp_in_error) {
2914 		shost_printk(KERN_INFO, target->scsi_host,
2915 			     PFX "SCSI scan failed - removing SCSI host\n");
2916 		srp_queue_remove_work(target);
2917 		goto out;
2918 	}
2919 
2920 	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2921 		 dev_name(&target->scsi_host->shost_gendev),
2922 		 srp_sdev_count(target->scsi_host));
2923 
2924 	spin_lock_irq(&target->lock);
2925 	if (target->state == SRP_TARGET_SCANNING)
2926 		target->state = SRP_TARGET_LIVE;
2927 	spin_unlock_irq(&target->lock);
2928 
2929 out:
2930 	return 0;
2931 }
2932 
2933 static void srp_release_dev(struct device *dev)
2934 {
2935 	struct srp_host *host =
2936 		container_of(dev, struct srp_host, dev);
2937 
2938 	complete(&host->released);
2939 }
2940 
2941 static struct class srp_class = {
2942 	.name    = "infiniband_srp",
2943 	.dev_release = srp_release_dev
2944 };
2945 
2946 /**
2947  * srp_conn_unique() - check whether the connection to a target is unique
2948  * @host:   SRP host.
2949  * @target: SRP target port.
2950  */
2951 static bool srp_conn_unique(struct srp_host *host,
2952 			    struct srp_target_port *target)
2953 {
2954 	struct srp_target_port *t;
2955 	bool ret = false;
2956 
2957 	if (target->state == SRP_TARGET_REMOVED)
2958 		goto out;
2959 
2960 	ret = true;
2961 
2962 	spin_lock(&host->target_lock);
2963 	list_for_each_entry(t, &host->target_list, list) {
2964 		if (t != target &&
2965 		    target->id_ext == t->id_ext &&
2966 		    target->ioc_guid == t->ioc_guid &&
2967 		    target->initiator_ext == t->initiator_ext) {
2968 			ret = false;
2969 			break;
2970 		}
2971 	}
2972 	spin_unlock(&host->target_lock);
2973 
2974 out:
2975 	return ret;
2976 }
2977 
2978 /*
2979  * Target ports are added by writing
2980  *
2981  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2982  *     pkey=<P_Key>,service_id=<service ID>
2983  *
2984  * to the add_target sysfs attribute.
2985  */
2986 enum {
2987 	SRP_OPT_ERR		= 0,
2988 	SRP_OPT_ID_EXT		= 1 << 0,
2989 	SRP_OPT_IOC_GUID	= 1 << 1,
2990 	SRP_OPT_DGID		= 1 << 2,
2991 	SRP_OPT_PKEY		= 1 << 3,
2992 	SRP_OPT_SERVICE_ID	= 1 << 4,
2993 	SRP_OPT_MAX_SECT	= 1 << 5,
2994 	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
2995 	SRP_OPT_IO_CLASS	= 1 << 7,
2996 	SRP_OPT_INITIATOR_EXT	= 1 << 8,
2997 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
2998 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
2999 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3000 	SRP_OPT_COMP_VECTOR	= 1 << 12,
3001 	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3002 	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3003 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
3004 				   SRP_OPT_IOC_GUID	|
3005 				   SRP_OPT_DGID		|
3006 				   SRP_OPT_PKEY		|
3007 				   SRP_OPT_SERVICE_ID),
3008 };
3009 
3010 static const match_table_t srp_opt_tokens = {
3011 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3012 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3013 	{ SRP_OPT_DGID,			"dgid=%s" 		},
3014 	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3015 	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3016 	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3017 	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3018 	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3019 	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3020 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3021 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3022 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3023 	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3024 	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3025 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3026 	{ SRP_OPT_ERR,			NULL 			}
3027 };
3028 
3029 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3030 {
3031 	char *options, *sep_opt;
3032 	char *p;
3033 	char dgid[3];
3034 	substring_t args[MAX_OPT_ARGS];
3035 	int opt_mask = 0;
3036 	int token;
3037 	int ret = -EINVAL;
3038 	int i;
3039 
3040 	options = kstrdup(buf, GFP_KERNEL);
3041 	if (!options)
3042 		return -ENOMEM;
3043 
3044 	sep_opt = options;
3045 	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3046 		if (!*p)
3047 			continue;
3048 
3049 		token = match_token(p, srp_opt_tokens, args);
3050 		opt_mask |= token;
3051 
3052 		switch (token) {
3053 		case SRP_OPT_ID_EXT:
3054 			p = match_strdup(args);
3055 			if (!p) {
3056 				ret = -ENOMEM;
3057 				goto out;
3058 			}
3059 			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3060 			kfree(p);
3061 			break;
3062 
3063 		case SRP_OPT_IOC_GUID:
3064 			p = match_strdup(args);
3065 			if (!p) {
3066 				ret = -ENOMEM;
3067 				goto out;
3068 			}
3069 			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3070 			kfree(p);
3071 			break;
3072 
3073 		case SRP_OPT_DGID:
3074 			p = match_strdup(args);
3075 			if (!p) {
3076 				ret = -ENOMEM;
3077 				goto out;
3078 			}
3079 			if (strlen(p) != 32) {
3080 				pr_warn("bad dest GID parameter '%s'\n", p);
3081 				kfree(p);
3082 				goto out;
3083 			}
3084 
3085 			for (i = 0; i < 16; ++i) {
3086 				strlcpy(dgid, p + i * 2, sizeof(dgid));
3087 				if (sscanf(dgid, "%hhx",
3088 					   &target->orig_dgid.raw[i]) < 1) {
3089 					ret = -EINVAL;
3090 					kfree(p);
3091 					goto out;
3092 				}
3093 			}
3094 			kfree(p);
3095 			break;
3096 
3097 		case SRP_OPT_PKEY:
3098 			if (match_hex(args, &token)) {
3099 				pr_warn("bad P_Key parameter '%s'\n", p);
3100 				goto out;
3101 			}
3102 			target->pkey = cpu_to_be16(token);
3103 			break;
3104 
3105 		case SRP_OPT_SERVICE_ID:
3106 			p = match_strdup(args);
3107 			if (!p) {
3108 				ret = -ENOMEM;
3109 				goto out;
3110 			}
3111 			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3112 			kfree(p);
3113 			break;
3114 
3115 		case SRP_OPT_MAX_SECT:
3116 			if (match_int(args, &token)) {
3117 				pr_warn("bad max sect parameter '%s'\n", p);
3118 				goto out;
3119 			}
3120 			target->scsi_host->max_sectors = token;
3121 			break;
3122 
3123 		case SRP_OPT_QUEUE_SIZE:
3124 			if (match_int(args, &token) || token < 1) {
3125 				pr_warn("bad queue_size parameter '%s'\n", p);
3126 				goto out;
3127 			}
3128 			target->scsi_host->can_queue = token;
3129 			target->queue_size = token + SRP_RSP_SQ_SIZE +
3130 					     SRP_TSK_MGMT_SQ_SIZE;
3131 			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3132 				target->scsi_host->cmd_per_lun = token;
3133 			break;
3134 
3135 		case SRP_OPT_MAX_CMD_PER_LUN:
3136 			if (match_int(args, &token) || token < 1) {
3137 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3138 					p);
3139 				goto out;
3140 			}
3141 			target->scsi_host->cmd_per_lun = token;
3142 			break;
3143 
3144 		case SRP_OPT_IO_CLASS:
3145 			if (match_hex(args, &token)) {
3146 				pr_warn("bad IO class parameter '%s'\n", p);
3147 				goto out;
3148 			}
3149 			if (token != SRP_REV10_IB_IO_CLASS &&
3150 			    token != SRP_REV16A_IB_IO_CLASS) {
3151 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3152 					token, SRP_REV10_IB_IO_CLASS,
3153 					SRP_REV16A_IB_IO_CLASS);
3154 				goto out;
3155 			}
3156 			target->io_class = token;
3157 			break;
3158 
3159 		case SRP_OPT_INITIATOR_EXT:
3160 			p = match_strdup(args);
3161 			if (!p) {
3162 				ret = -ENOMEM;
3163 				goto out;
3164 			}
3165 			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3166 			kfree(p);
3167 			break;
3168 
3169 		case SRP_OPT_CMD_SG_ENTRIES:
3170 			if (match_int(args, &token) || token < 1 || token > 255) {
3171 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3172 					p);
3173 				goto out;
3174 			}
3175 			target->cmd_sg_cnt = token;
3176 			break;
3177 
3178 		case SRP_OPT_ALLOW_EXT_SG:
3179 			if (match_int(args, &token)) {
3180 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3181 				goto out;
3182 			}
3183 			target->allow_ext_sg = !!token;
3184 			break;
3185 
3186 		case SRP_OPT_SG_TABLESIZE:
3187 			if (match_int(args, &token) || token < 1 ||
3188 					token > SG_MAX_SEGMENTS) {
3189 				pr_warn("bad max sg_tablesize parameter '%s'\n",
3190 					p);
3191 				goto out;
3192 			}
3193 			target->sg_tablesize = token;
3194 			break;
3195 
3196 		case SRP_OPT_COMP_VECTOR:
3197 			if (match_int(args, &token) || token < 0) {
3198 				pr_warn("bad comp_vector parameter '%s'\n", p);
3199 				goto out;
3200 			}
3201 			target->comp_vector = token;
3202 			break;
3203 
3204 		case SRP_OPT_TL_RETRY_COUNT:
3205 			if (match_int(args, &token) || token < 2 || token > 7) {
3206 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3207 					p);
3208 				goto out;
3209 			}
3210 			target->tl_retry_count = token;
3211 			break;
3212 
3213 		default:
3214 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3215 				p);
3216 			goto out;
3217 		}
3218 	}
3219 
3220 	if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3221 		ret = 0;
3222 	else
3223 		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3224 			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3225 			    !(srp_opt_tokens[i].token & opt_mask))
3226 				pr_warn("target creation request is missing parameter '%s'\n",
3227 					srp_opt_tokens[i].pattern);
3228 
3229 	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3230 	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3231 		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3232 			target->scsi_host->cmd_per_lun,
3233 			target->scsi_host->can_queue);
3234 
3235 out:
3236 	kfree(options);
3237 	return ret;
3238 }
3239 
3240 static ssize_t srp_create_target(struct device *dev,
3241 				 struct device_attribute *attr,
3242 				 const char *buf, size_t count)
3243 {
3244 	struct srp_host *host =
3245 		container_of(dev, struct srp_host, dev);
3246 	struct Scsi_Host *target_host;
3247 	struct srp_target_port *target;
3248 	struct srp_rdma_ch *ch;
3249 	struct srp_device *srp_dev = host->srp_dev;
3250 	struct ib_device *ibdev = srp_dev->dev;
3251 	int ret, node_idx, node, cpu, i;
3252 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3253 	bool multich = false;
3254 
3255 	target_host = scsi_host_alloc(&srp_template,
3256 				      sizeof (struct srp_target_port));
3257 	if (!target_host)
3258 		return -ENOMEM;
3259 
3260 	target_host->transportt  = ib_srp_transport_template;
3261 	target_host->max_channel = 0;
3262 	target_host->max_id      = 1;
3263 	target_host->max_lun     = -1LL;
3264 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3265 
3266 	target = host_to_target(target_host);
3267 
3268 	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3269 	target->scsi_host	= target_host;
3270 	target->srp_host	= host;
3271 	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3272 	target->global_mr	= host->srp_dev->global_mr;
3273 	target->cmd_sg_cnt	= cmd_sg_entries;
3274 	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3275 	target->allow_ext_sg	= allow_ext_sg;
3276 	target->tl_retry_count	= 7;
3277 	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3278 
3279 	/*
3280 	 * Avoid that the SCSI host can be removed by srp_remove_target()
3281 	 * before this function returns.
3282 	 */
3283 	scsi_host_get(target->scsi_host);
3284 
3285 	mutex_lock(&host->add_target_mutex);
3286 
3287 	ret = srp_parse_options(buf, target);
3288 	if (ret)
3289 		goto out;
3290 
3291 	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3292 
3293 	if (!srp_conn_unique(target->srp_host, target)) {
3294 		shost_printk(KERN_INFO, target->scsi_host,
3295 			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3296 			     be64_to_cpu(target->id_ext),
3297 			     be64_to_cpu(target->ioc_guid),
3298 			     be64_to_cpu(target->initiator_ext));
3299 		ret = -EEXIST;
3300 		goto out;
3301 	}
3302 
3303 	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3304 	    target->cmd_sg_cnt < target->sg_tablesize) {
3305 		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3306 		target->sg_tablesize = target->cmd_sg_cnt;
3307 	}
3308 
3309 	if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3310 		/*
3311 		 * FR and FMR can only map one HCA page per entry. If the
3312 		 * start address is not aligned on a HCA page boundary two
3313 		 * entries will be used for the head and the tail although
3314 		 * these two entries combined contain at most one HCA page of
3315 		 * data. Hence the "+ 1" in the calculation below.
3316 		 *
3317 		 * The indirect data buffer descriptor is contiguous so the
3318 		 * memory for that buffer will only be registered if
3319 		 * register_always is true. Hence add one to mr_per_cmd if
3320 		 * register_always has been set.
3321 		 */
3322 		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3323 				  (ilog2(srp_dev->mr_page_size) - 9);
3324 		mr_per_cmd = register_always +
3325 			(target->scsi_host->max_sectors + 1 +
3326 			 max_sectors_per_mr - 1) / max_sectors_per_mr;
3327 		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3328 			 target->scsi_host->max_sectors,
3329 			 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3330 			 max_sectors_per_mr, mr_per_cmd);
3331 	}
3332 
3333 	target_host->sg_tablesize = target->sg_tablesize;
3334 	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3335 	target->mr_per_cmd = mr_per_cmd;
3336 	target->indirect_size = target->sg_tablesize *
3337 				sizeof (struct srp_direct_buf);
3338 	target->max_iu_len = sizeof (struct srp_cmd) +
3339 			     sizeof (struct srp_indirect_buf) +
3340 			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3341 
3342 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3343 	INIT_WORK(&target->remove_work, srp_remove_work);
3344 	spin_lock_init(&target->lock);
3345 	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3346 	if (ret)
3347 		goto out;
3348 
3349 	ret = -ENOMEM;
3350 	target->ch_count = max_t(unsigned, num_online_nodes(),
3351 				 min(ch_count ? :
3352 				     min(4 * num_online_nodes(),
3353 					 ibdev->num_comp_vectors),
3354 				     num_online_cpus()));
3355 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3356 			     GFP_KERNEL);
3357 	if (!target->ch)
3358 		goto out;
3359 
3360 	node_idx = 0;
3361 	for_each_online_node(node) {
3362 		const int ch_start = (node_idx * target->ch_count /
3363 				      num_online_nodes());
3364 		const int ch_end = ((node_idx + 1) * target->ch_count /
3365 				    num_online_nodes());
3366 		const int cv_start = (node_idx * ibdev->num_comp_vectors /
3367 				      num_online_nodes() + target->comp_vector)
3368 				     % ibdev->num_comp_vectors;
3369 		const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3370 				    num_online_nodes() + target->comp_vector)
3371 				   % ibdev->num_comp_vectors;
3372 		int cpu_idx = 0;
3373 
3374 		for_each_online_cpu(cpu) {
3375 			if (cpu_to_node(cpu) != node)
3376 				continue;
3377 			if (ch_start + cpu_idx >= ch_end)
3378 				continue;
3379 			ch = &target->ch[ch_start + cpu_idx];
3380 			ch->target = target;
3381 			ch->comp_vector = cv_start == cv_end ? cv_start :
3382 				cv_start + cpu_idx % (cv_end - cv_start);
3383 			spin_lock_init(&ch->lock);
3384 			INIT_LIST_HEAD(&ch->free_tx);
3385 			ret = srp_new_cm_id(ch);
3386 			if (ret)
3387 				goto err_disconnect;
3388 
3389 			ret = srp_create_ch_ib(ch);
3390 			if (ret)
3391 				goto err_disconnect;
3392 
3393 			ret = srp_alloc_req_data(ch);
3394 			if (ret)
3395 				goto err_disconnect;
3396 
3397 			ret = srp_connect_ch(ch, multich);
3398 			if (ret) {
3399 				shost_printk(KERN_ERR, target->scsi_host,
3400 					     PFX "Connection %d/%d failed\n",
3401 					     ch_start + cpu_idx,
3402 					     target->ch_count);
3403 				if (node_idx == 0 && cpu_idx == 0) {
3404 					goto err_disconnect;
3405 				} else {
3406 					srp_free_ch_ib(target, ch);
3407 					srp_free_req_data(target, ch);
3408 					target->ch_count = ch - target->ch;
3409 					goto connected;
3410 				}
3411 			}
3412 
3413 			multich = true;
3414 			cpu_idx++;
3415 		}
3416 		node_idx++;
3417 	}
3418 
3419 connected:
3420 	target->scsi_host->nr_hw_queues = target->ch_count;
3421 
3422 	ret = srp_add_target(host, target);
3423 	if (ret)
3424 		goto err_disconnect;
3425 
3426 	if (target->state != SRP_TARGET_REMOVED) {
3427 		shost_printk(KERN_DEBUG, target->scsi_host, PFX
3428 			     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3429 			     be64_to_cpu(target->id_ext),
3430 			     be64_to_cpu(target->ioc_guid),
3431 			     be16_to_cpu(target->pkey),
3432 			     be64_to_cpu(target->service_id),
3433 			     target->sgid.raw, target->orig_dgid.raw);
3434 	}
3435 
3436 	ret = count;
3437 
3438 out:
3439 	mutex_unlock(&host->add_target_mutex);
3440 
3441 	scsi_host_put(target->scsi_host);
3442 	if (ret < 0)
3443 		scsi_host_put(target->scsi_host);
3444 
3445 	return ret;
3446 
3447 err_disconnect:
3448 	srp_disconnect_target(target);
3449 
3450 	for (i = 0; i < target->ch_count; i++) {
3451 		ch = &target->ch[i];
3452 		srp_free_ch_ib(target, ch);
3453 		srp_free_req_data(target, ch);
3454 	}
3455 
3456 	kfree(target->ch);
3457 	goto out;
3458 }
3459 
3460 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3461 
3462 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3463 			  char *buf)
3464 {
3465 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3466 
3467 	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3468 }
3469 
3470 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3471 
3472 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3473 			 char *buf)
3474 {
3475 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3476 
3477 	return sprintf(buf, "%d\n", host->port);
3478 }
3479 
3480 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3481 
3482 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3483 {
3484 	struct srp_host *host;
3485 
3486 	host = kzalloc(sizeof *host, GFP_KERNEL);
3487 	if (!host)
3488 		return NULL;
3489 
3490 	INIT_LIST_HEAD(&host->target_list);
3491 	spin_lock_init(&host->target_lock);
3492 	init_completion(&host->released);
3493 	mutex_init(&host->add_target_mutex);
3494 	host->srp_dev = device;
3495 	host->port = port;
3496 
3497 	host->dev.class = &srp_class;
3498 	host->dev.parent = device->dev->dma_device;
3499 	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3500 
3501 	if (device_register(&host->dev))
3502 		goto free_host;
3503 	if (device_create_file(&host->dev, &dev_attr_add_target))
3504 		goto err_class;
3505 	if (device_create_file(&host->dev, &dev_attr_ibdev))
3506 		goto err_class;
3507 	if (device_create_file(&host->dev, &dev_attr_port))
3508 		goto err_class;
3509 
3510 	return host;
3511 
3512 err_class:
3513 	device_unregister(&host->dev);
3514 
3515 free_host:
3516 	kfree(host);
3517 
3518 	return NULL;
3519 }
3520 
3521 static void srp_add_one(struct ib_device *device)
3522 {
3523 	struct srp_device *srp_dev;
3524 	struct srp_host *host;
3525 	int mr_page_shift, p;
3526 	u64 max_pages_per_mr;
3527 
3528 	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3529 	if (!srp_dev)
3530 		return;
3531 
3532 	/*
3533 	 * Use the smallest page size supported by the HCA, down to a
3534 	 * minimum of 4096 bytes. We're unlikely to build large sglists
3535 	 * out of smaller entries.
3536 	 */
3537 	mr_page_shift		= max(12, ffs(device->attrs.page_size_cap) - 1);
3538 	srp_dev->mr_page_size	= 1 << mr_page_shift;
3539 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3540 	max_pages_per_mr	= device->attrs.max_mr_size;
3541 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3542 	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3543 		 device->attrs.max_mr_size, srp_dev->mr_page_size,
3544 		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3545 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3546 					  max_pages_per_mr);
3547 
3548 	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3549 			    device->map_phys_fmr && device->unmap_fmr);
3550 	srp_dev->has_fr = (device->attrs.device_cap_flags &
3551 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
3552 	if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3553 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3554 	} else if (!never_register &&
3555 		   device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
3556 		srp_dev->use_fast_reg = (srp_dev->has_fr &&
3557 					 (!srp_dev->has_fmr || prefer_fr));
3558 		srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3559 	}
3560 
3561 	if (srp_dev->use_fast_reg) {
3562 		srp_dev->max_pages_per_mr =
3563 			min_t(u32, srp_dev->max_pages_per_mr,
3564 			      device->attrs.max_fast_reg_page_list_len);
3565 	}
3566 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
3567 				   srp_dev->max_pages_per_mr;
3568 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3569 		 device->name, mr_page_shift, device->attrs.max_mr_size,
3570 		 device->attrs.max_fast_reg_page_list_len,
3571 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3572 
3573 	INIT_LIST_HEAD(&srp_dev->dev_list);
3574 
3575 	srp_dev->dev = device;
3576 	srp_dev->pd  = ib_alloc_pd(device);
3577 	if (IS_ERR(srp_dev->pd))
3578 		goto free_dev;
3579 
3580 	if (never_register || !register_always ||
3581 	    (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3582 		srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3583 						   IB_ACCESS_LOCAL_WRITE |
3584 						   IB_ACCESS_REMOTE_READ |
3585 						   IB_ACCESS_REMOTE_WRITE);
3586 		if (IS_ERR(srp_dev->global_mr))
3587 			goto err_pd;
3588 	}
3589 
3590 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3591 		host = srp_add_port(srp_dev, p);
3592 		if (host)
3593 			list_add_tail(&host->list, &srp_dev->dev_list);
3594 	}
3595 
3596 	ib_set_client_data(device, &srp_client, srp_dev);
3597 	return;
3598 
3599 err_pd:
3600 	ib_dealloc_pd(srp_dev->pd);
3601 
3602 free_dev:
3603 	kfree(srp_dev);
3604 }
3605 
3606 static void srp_remove_one(struct ib_device *device, void *client_data)
3607 {
3608 	struct srp_device *srp_dev;
3609 	struct srp_host *host, *tmp_host;
3610 	struct srp_target_port *target;
3611 
3612 	srp_dev = client_data;
3613 	if (!srp_dev)
3614 		return;
3615 
3616 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3617 		device_unregister(&host->dev);
3618 		/*
3619 		 * Wait for the sysfs entry to go away, so that no new
3620 		 * target ports can be created.
3621 		 */
3622 		wait_for_completion(&host->released);
3623 
3624 		/*
3625 		 * Remove all target ports.
3626 		 */
3627 		spin_lock(&host->target_lock);
3628 		list_for_each_entry(target, &host->target_list, list)
3629 			srp_queue_remove_work(target);
3630 		spin_unlock(&host->target_lock);
3631 
3632 		/*
3633 		 * Wait for tl_err and target port removal tasks.
3634 		 */
3635 		flush_workqueue(system_long_wq);
3636 		flush_workqueue(srp_remove_wq);
3637 
3638 		kfree(host);
3639 	}
3640 
3641 	if (srp_dev->global_mr)
3642 		ib_dereg_mr(srp_dev->global_mr);
3643 	ib_dealloc_pd(srp_dev->pd);
3644 
3645 	kfree(srp_dev);
3646 }
3647 
3648 static struct srp_function_template ib_srp_transport_functions = {
3649 	.has_rport_state	 = true,
3650 	.reset_timer_if_blocked	 = true,
3651 	.reconnect_delay	 = &srp_reconnect_delay,
3652 	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
3653 	.dev_loss_tmo		 = &srp_dev_loss_tmo,
3654 	.reconnect		 = srp_rport_reconnect,
3655 	.rport_delete		 = srp_rport_delete,
3656 	.terminate_rport_io	 = srp_terminate_io,
3657 };
3658 
3659 static int __init srp_init_module(void)
3660 {
3661 	int ret;
3662 
3663 	if (srp_sg_tablesize) {
3664 		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3665 		if (!cmd_sg_entries)
3666 			cmd_sg_entries = srp_sg_tablesize;
3667 	}
3668 
3669 	if (!cmd_sg_entries)
3670 		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3671 
3672 	if (cmd_sg_entries > 255) {
3673 		pr_warn("Clamping cmd_sg_entries to 255\n");
3674 		cmd_sg_entries = 255;
3675 	}
3676 
3677 	if (!indirect_sg_entries)
3678 		indirect_sg_entries = cmd_sg_entries;
3679 	else if (indirect_sg_entries < cmd_sg_entries) {
3680 		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3681 			cmd_sg_entries);
3682 		indirect_sg_entries = cmd_sg_entries;
3683 	}
3684 
3685 	srp_remove_wq = create_workqueue("srp_remove");
3686 	if (!srp_remove_wq) {
3687 		ret = -ENOMEM;
3688 		goto out;
3689 	}
3690 
3691 	ret = -ENOMEM;
3692 	ib_srp_transport_template =
3693 		srp_attach_transport(&ib_srp_transport_functions);
3694 	if (!ib_srp_transport_template)
3695 		goto destroy_wq;
3696 
3697 	ret = class_register(&srp_class);
3698 	if (ret) {
3699 		pr_err("couldn't register class infiniband_srp\n");
3700 		goto release_tr;
3701 	}
3702 
3703 	ib_sa_register_client(&srp_sa_client);
3704 
3705 	ret = ib_register_client(&srp_client);
3706 	if (ret) {
3707 		pr_err("couldn't register IB client\n");
3708 		goto unreg_sa;
3709 	}
3710 
3711 out:
3712 	return ret;
3713 
3714 unreg_sa:
3715 	ib_sa_unregister_client(&srp_sa_client);
3716 	class_unregister(&srp_class);
3717 
3718 release_tr:
3719 	srp_release_transport(ib_srp_transport_template);
3720 
3721 destroy_wq:
3722 	destroy_workqueue(srp_remove_wq);
3723 	goto out;
3724 }
3725 
3726 static void __exit srp_cleanup_module(void)
3727 {
3728 	ib_unregister_client(&srp_client);
3729 	ib_sa_unregister_client(&srp_sa_client);
3730 	class_unregister(&srp_class);
3731 	srp_release_transport(ib_srp_transport_template);
3732 	destroy_workqueue(srp_remove_wq);
3733 }
3734 
3735 module_init(srp_init_module);
3736 module_exit(srp_cleanup_module);
3737