1 /* 2 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/module.h> 36 #include <linux/init.h> 37 #include <linux/slab.h> 38 #include <linux/err.h> 39 #include <linux/string.h> 40 #include <linux/parser.h> 41 #include <linux/random.h> 42 #include <linux/jiffies.h> 43 #include <linux/lockdep.h> 44 #include <linux/inet.h> 45 #include <rdma/ib_cache.h> 46 47 #include <linux/atomic.h> 48 49 #include <scsi/scsi.h> 50 #include <scsi/scsi_device.h> 51 #include <scsi/scsi_dbg.h> 52 #include <scsi/scsi_tcq.h> 53 #include <scsi/srp.h> 54 #include <scsi/scsi_transport_srp.h> 55 56 #include "ib_srp.h" 57 58 #define DRV_NAME "ib_srp" 59 #define PFX DRV_NAME ": " 60 61 MODULE_AUTHOR("Roland Dreier"); 62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); 63 MODULE_LICENSE("Dual BSD/GPL"); 64 65 #if !defined(CONFIG_DYNAMIC_DEBUG) 66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) 67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false 68 #endif 69 70 static unsigned int srp_sg_tablesize; 71 static unsigned int cmd_sg_entries; 72 static unsigned int indirect_sg_entries; 73 static bool allow_ext_sg; 74 static bool prefer_fr = true; 75 static bool register_always = true; 76 static bool never_register; 77 static int topspin_workarounds = 1; 78 79 module_param(srp_sg_tablesize, uint, 0444); 80 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); 81 82 module_param(cmd_sg_entries, uint, 0444); 83 MODULE_PARM_DESC(cmd_sg_entries, 84 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); 85 86 module_param(indirect_sg_entries, uint, 0444); 87 MODULE_PARM_DESC(indirect_sg_entries, 88 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")"); 89 90 module_param(allow_ext_sg, bool, 0444); 91 MODULE_PARM_DESC(allow_ext_sg, 92 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); 93 94 module_param(topspin_workarounds, int, 0444); 95 MODULE_PARM_DESC(topspin_workarounds, 96 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 97 98 module_param(prefer_fr, bool, 0444); 99 MODULE_PARM_DESC(prefer_fr, 100 "Whether to use fast registration if both FMR and fast registration are supported"); 101 102 module_param(register_always, bool, 0444); 103 MODULE_PARM_DESC(register_always, 104 "Use memory registration even for contiguous memory regions"); 105 106 module_param(never_register, bool, 0444); 107 MODULE_PARM_DESC(never_register, "Never register memory"); 108 109 static const struct kernel_param_ops srp_tmo_ops; 110 111 static int srp_reconnect_delay = 10; 112 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, 113 S_IRUGO | S_IWUSR); 114 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); 115 116 static int srp_fast_io_fail_tmo = 15; 117 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, 118 S_IRUGO | S_IWUSR); 119 MODULE_PARM_DESC(fast_io_fail_tmo, 120 "Number of seconds between the observation of a transport" 121 " layer error and failing all I/O. \"off\" means that this" 122 " functionality is disabled."); 123 124 static int srp_dev_loss_tmo = 600; 125 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, 126 S_IRUGO | S_IWUSR); 127 MODULE_PARM_DESC(dev_loss_tmo, 128 "Maximum number of seconds that the SRP transport should" 129 " insulate transport layer errors. After this time has been" 130 " exceeded the SCSI host is removed. Should be" 131 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 132 " if fast_io_fail_tmo has not been set. \"off\" means that" 133 " this functionality is disabled."); 134 135 static unsigned ch_count; 136 module_param(ch_count, uint, 0444); 137 MODULE_PARM_DESC(ch_count, 138 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); 139 140 static void srp_add_one(struct ib_device *device); 141 static void srp_remove_one(struct ib_device *device, void *client_data); 142 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 143 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 144 const char *opname); 145 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, 146 const struct ib_cm_event *event); 147 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 148 struct rdma_cm_event *event); 149 150 static struct scsi_transport_template *ib_srp_transport_template; 151 static struct workqueue_struct *srp_remove_wq; 152 153 static struct ib_client srp_client = { 154 .name = "srp", 155 .add = srp_add_one, 156 .remove = srp_remove_one 157 }; 158 159 static struct ib_sa_client srp_sa_client; 160 161 static int srp_tmo_get(char *buffer, const struct kernel_param *kp) 162 { 163 int tmo = *(int *)kp->arg; 164 165 if (tmo >= 0) 166 return sprintf(buffer, "%d", tmo); 167 else 168 return sprintf(buffer, "off"); 169 } 170 171 static int srp_tmo_set(const char *val, const struct kernel_param *kp) 172 { 173 int tmo, res; 174 175 res = srp_parse_tmo(&tmo, val); 176 if (res) 177 goto out; 178 179 if (kp->arg == &srp_reconnect_delay) 180 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, 181 srp_dev_loss_tmo); 182 else if (kp->arg == &srp_fast_io_fail_tmo) 183 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); 184 else 185 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, 186 tmo); 187 if (res) 188 goto out; 189 *(int *)kp->arg = tmo; 190 191 out: 192 return res; 193 } 194 195 static const struct kernel_param_ops srp_tmo_ops = { 196 .get = srp_tmo_get, 197 .set = srp_tmo_set, 198 }; 199 200 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 201 { 202 return (struct srp_target_port *) host->hostdata; 203 } 204 205 static const char *srp_target_info(struct Scsi_Host *host) 206 { 207 return host_to_target(host)->target_name; 208 } 209 210 static int srp_target_is_topspin(struct srp_target_port *target) 211 { 212 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; 213 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; 214 215 return topspin_workarounds && 216 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || 217 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); 218 } 219 220 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, 221 gfp_t gfp_mask, 222 enum dma_data_direction direction) 223 { 224 struct srp_iu *iu; 225 226 iu = kmalloc(sizeof *iu, gfp_mask); 227 if (!iu) 228 goto out; 229 230 iu->buf = kzalloc(size, gfp_mask); 231 if (!iu->buf) 232 goto out_free_iu; 233 234 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, 235 direction); 236 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) 237 goto out_free_buf; 238 239 iu->size = size; 240 iu->direction = direction; 241 242 return iu; 243 244 out_free_buf: 245 kfree(iu->buf); 246 out_free_iu: 247 kfree(iu); 248 out: 249 return NULL; 250 } 251 252 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) 253 { 254 if (!iu) 255 return; 256 257 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, 258 iu->direction); 259 kfree(iu->buf); 260 kfree(iu); 261 } 262 263 static void srp_qp_event(struct ib_event *event, void *context) 264 { 265 pr_debug("QP event %s (%d)\n", 266 ib_event_msg(event->event), event->event); 267 } 268 269 static int srp_init_ib_qp(struct srp_target_port *target, 270 struct ib_qp *qp) 271 { 272 struct ib_qp_attr *attr; 273 int ret; 274 275 attr = kmalloc(sizeof *attr, GFP_KERNEL); 276 if (!attr) 277 return -ENOMEM; 278 279 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 280 target->srp_host->port, 281 be16_to_cpu(target->ib_cm.pkey), 282 &attr->pkey_index); 283 if (ret) 284 goto out; 285 286 attr->qp_state = IB_QPS_INIT; 287 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | 288 IB_ACCESS_REMOTE_WRITE); 289 attr->port_num = target->srp_host->port; 290 291 ret = ib_modify_qp(qp, attr, 292 IB_QP_STATE | 293 IB_QP_PKEY_INDEX | 294 IB_QP_ACCESS_FLAGS | 295 IB_QP_PORT); 296 297 out: 298 kfree(attr); 299 return ret; 300 } 301 302 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch) 303 { 304 struct srp_target_port *target = ch->target; 305 struct ib_cm_id *new_cm_id; 306 307 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, 308 srp_ib_cm_handler, ch); 309 if (IS_ERR(new_cm_id)) 310 return PTR_ERR(new_cm_id); 311 312 if (ch->ib_cm.cm_id) 313 ib_destroy_cm_id(ch->ib_cm.cm_id); 314 ch->ib_cm.cm_id = new_cm_id; 315 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev, 316 target->srp_host->port)) 317 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA; 318 else 319 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB; 320 ch->ib_cm.path.sgid = target->sgid; 321 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid; 322 ch->ib_cm.path.pkey = target->ib_cm.pkey; 323 ch->ib_cm.path.service_id = target->ib_cm.service_id; 324 325 return 0; 326 } 327 328 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) 329 { 330 struct srp_target_port *target = ch->target; 331 struct rdma_cm_id *new_cm_id; 332 int ret; 333 334 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch, 335 RDMA_PS_TCP, IB_QPT_RC); 336 if (IS_ERR(new_cm_id)) { 337 ret = PTR_ERR(new_cm_id); 338 new_cm_id = NULL; 339 goto out; 340 } 341 342 init_completion(&ch->done); 343 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ? 344 (struct sockaddr *)&target->rdma_cm.src : NULL, 345 (struct sockaddr *)&target->rdma_cm.dst, 346 SRP_PATH_REC_TIMEOUT_MS); 347 if (ret) { 348 pr_err("No route available from %pIS to %pIS (%d)\n", 349 &target->rdma_cm.src, &target->rdma_cm.dst, ret); 350 goto out; 351 } 352 ret = wait_for_completion_interruptible(&ch->done); 353 if (ret < 0) 354 goto out; 355 356 ret = ch->status; 357 if (ret) { 358 pr_err("Resolving address %pIS failed (%d)\n", 359 &target->rdma_cm.dst, ret); 360 goto out; 361 } 362 363 swap(ch->rdma_cm.cm_id, new_cm_id); 364 365 out: 366 if (new_cm_id) 367 rdma_destroy_id(new_cm_id); 368 369 return ret; 370 } 371 372 static int srp_new_cm_id(struct srp_rdma_ch *ch) 373 { 374 struct srp_target_port *target = ch->target; 375 376 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) : 377 srp_new_ib_cm_id(ch); 378 } 379 380 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) 381 { 382 struct srp_device *dev = target->srp_host->srp_dev; 383 struct ib_fmr_pool_param fmr_param; 384 385 memset(&fmr_param, 0, sizeof(fmr_param)); 386 fmr_param.pool_size = target->mr_pool_size; 387 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 388 fmr_param.cache = 1; 389 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; 390 fmr_param.page_shift = ilog2(dev->mr_page_size); 391 fmr_param.access = (IB_ACCESS_LOCAL_WRITE | 392 IB_ACCESS_REMOTE_WRITE | 393 IB_ACCESS_REMOTE_READ); 394 395 return ib_create_fmr_pool(dev->pd, &fmr_param); 396 } 397 398 /** 399 * srp_destroy_fr_pool() - free the resources owned by a pool 400 * @pool: Fast registration pool to be destroyed. 401 */ 402 static void srp_destroy_fr_pool(struct srp_fr_pool *pool) 403 { 404 int i; 405 struct srp_fr_desc *d; 406 407 if (!pool) 408 return; 409 410 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 411 if (d->mr) 412 ib_dereg_mr(d->mr); 413 } 414 kfree(pool); 415 } 416 417 /** 418 * srp_create_fr_pool() - allocate and initialize a pool for fast registration 419 * @device: IB device to allocate fast registration descriptors for. 420 * @pd: Protection domain associated with the FR descriptors. 421 * @pool_size: Number of descriptors to allocate. 422 * @max_page_list_len: Maximum fast registration work request page list length. 423 */ 424 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, 425 struct ib_pd *pd, int pool_size, 426 int max_page_list_len) 427 { 428 struct srp_fr_pool *pool; 429 struct srp_fr_desc *d; 430 struct ib_mr *mr; 431 int i, ret = -EINVAL; 432 enum ib_mr_type mr_type; 433 434 if (pool_size <= 0) 435 goto err; 436 ret = -ENOMEM; 437 pool = kzalloc(sizeof(struct srp_fr_pool) + 438 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); 439 if (!pool) 440 goto err; 441 pool->size = pool_size; 442 pool->max_page_list_len = max_page_list_len; 443 spin_lock_init(&pool->lock); 444 INIT_LIST_HEAD(&pool->free_list); 445 446 if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 447 mr_type = IB_MR_TYPE_SG_GAPS; 448 else 449 mr_type = IB_MR_TYPE_MEM_REG; 450 451 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 452 mr = ib_alloc_mr(pd, mr_type, max_page_list_len); 453 if (IS_ERR(mr)) { 454 ret = PTR_ERR(mr); 455 if (ret == -ENOMEM) 456 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", 457 dev_name(&device->dev)); 458 goto destroy_pool; 459 } 460 d->mr = mr; 461 list_add_tail(&d->entry, &pool->free_list); 462 } 463 464 out: 465 return pool; 466 467 destroy_pool: 468 srp_destroy_fr_pool(pool); 469 470 err: 471 pool = ERR_PTR(ret); 472 goto out; 473 } 474 475 /** 476 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration 477 * @pool: Pool to obtain descriptor from. 478 */ 479 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) 480 { 481 struct srp_fr_desc *d = NULL; 482 unsigned long flags; 483 484 spin_lock_irqsave(&pool->lock, flags); 485 if (!list_empty(&pool->free_list)) { 486 d = list_first_entry(&pool->free_list, typeof(*d), entry); 487 list_del(&d->entry); 488 } 489 spin_unlock_irqrestore(&pool->lock, flags); 490 491 return d; 492 } 493 494 /** 495 * srp_fr_pool_put() - put an FR descriptor back in the free list 496 * @pool: Pool the descriptor was allocated from. 497 * @desc: Pointer to an array of fast registration descriptor pointers. 498 * @n: Number of descriptors to put back. 499 * 500 * Note: The caller must already have queued an invalidation request for 501 * desc->mr->rkey before calling this function. 502 */ 503 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, 504 int n) 505 { 506 unsigned long flags; 507 int i; 508 509 spin_lock_irqsave(&pool->lock, flags); 510 for (i = 0; i < n; i++) 511 list_add(&desc[i]->entry, &pool->free_list); 512 spin_unlock_irqrestore(&pool->lock, flags); 513 } 514 515 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) 516 { 517 struct srp_device *dev = target->srp_host->srp_dev; 518 519 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, 520 dev->max_pages_per_mr); 521 } 522 523 /** 524 * srp_destroy_qp() - destroy an RDMA queue pair 525 * @ch: SRP RDMA channel. 526 * 527 * Drain the qp before destroying it. This avoids that the receive 528 * completion handler can access the queue pair while it is 529 * being destroyed. 530 */ 531 static void srp_destroy_qp(struct srp_rdma_ch *ch) 532 { 533 spin_lock_irq(&ch->lock); 534 ib_process_cq_direct(ch->send_cq, -1); 535 spin_unlock_irq(&ch->lock); 536 537 ib_drain_qp(ch->qp); 538 ib_destroy_qp(ch->qp); 539 } 540 541 static int srp_create_ch_ib(struct srp_rdma_ch *ch) 542 { 543 struct srp_target_port *target = ch->target; 544 struct srp_device *dev = target->srp_host->srp_dev; 545 struct ib_qp_init_attr *init_attr; 546 struct ib_cq *recv_cq, *send_cq; 547 struct ib_qp *qp; 548 struct ib_fmr_pool *fmr_pool = NULL; 549 struct srp_fr_pool *fr_pool = NULL; 550 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; 551 int ret; 552 553 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 554 if (!init_attr) 555 return -ENOMEM; 556 557 /* queue_size + 1 for ib_drain_rq() */ 558 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 559 ch->comp_vector, IB_POLL_SOFTIRQ); 560 if (IS_ERR(recv_cq)) { 561 ret = PTR_ERR(recv_cq); 562 goto err; 563 } 564 565 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, 566 ch->comp_vector, IB_POLL_DIRECT); 567 if (IS_ERR(send_cq)) { 568 ret = PTR_ERR(send_cq); 569 goto err_recv_cq; 570 } 571 572 init_attr->event_handler = srp_qp_event; 573 init_attr->cap.max_send_wr = m * target->queue_size; 574 init_attr->cap.max_recv_wr = target->queue_size + 1; 575 init_attr->cap.max_recv_sge = 1; 576 init_attr->cap.max_send_sge = 1; 577 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 578 init_attr->qp_type = IB_QPT_RC; 579 init_attr->send_cq = send_cq; 580 init_attr->recv_cq = recv_cq; 581 582 if (target->using_rdma_cm) { 583 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr); 584 qp = ch->rdma_cm.cm_id->qp; 585 } else { 586 qp = ib_create_qp(dev->pd, init_attr); 587 if (!IS_ERR(qp)) { 588 ret = srp_init_ib_qp(target, qp); 589 if (ret) 590 ib_destroy_qp(qp); 591 } else { 592 ret = PTR_ERR(qp); 593 } 594 } 595 if (ret) { 596 pr_err("QP creation failed for dev %s: %d\n", 597 dev_name(&dev->dev->dev), ret); 598 goto err_send_cq; 599 } 600 601 if (dev->use_fast_reg) { 602 fr_pool = srp_alloc_fr_pool(target); 603 if (IS_ERR(fr_pool)) { 604 ret = PTR_ERR(fr_pool); 605 shost_printk(KERN_WARNING, target->scsi_host, PFX 606 "FR pool allocation failed (%d)\n", ret); 607 goto err_qp; 608 } 609 } else if (dev->use_fmr) { 610 fmr_pool = srp_alloc_fmr_pool(target); 611 if (IS_ERR(fmr_pool)) { 612 ret = PTR_ERR(fmr_pool); 613 shost_printk(KERN_WARNING, target->scsi_host, PFX 614 "FMR pool allocation failed (%d)\n", ret); 615 goto err_qp; 616 } 617 } 618 619 if (ch->qp) 620 srp_destroy_qp(ch); 621 if (ch->recv_cq) 622 ib_free_cq(ch->recv_cq); 623 if (ch->send_cq) 624 ib_free_cq(ch->send_cq); 625 626 ch->qp = qp; 627 ch->recv_cq = recv_cq; 628 ch->send_cq = send_cq; 629 630 if (dev->use_fast_reg) { 631 if (ch->fr_pool) 632 srp_destroy_fr_pool(ch->fr_pool); 633 ch->fr_pool = fr_pool; 634 } else if (dev->use_fmr) { 635 if (ch->fmr_pool) 636 ib_destroy_fmr_pool(ch->fmr_pool); 637 ch->fmr_pool = fmr_pool; 638 } 639 640 kfree(init_attr); 641 return 0; 642 643 err_qp: 644 if (target->using_rdma_cm) 645 rdma_destroy_qp(ch->rdma_cm.cm_id); 646 else 647 ib_destroy_qp(qp); 648 649 err_send_cq: 650 ib_free_cq(send_cq); 651 652 err_recv_cq: 653 ib_free_cq(recv_cq); 654 655 err: 656 kfree(init_attr); 657 return ret; 658 } 659 660 /* 661 * Note: this function may be called without srp_alloc_iu_bufs() having been 662 * invoked. Hence the ch->[rt]x_ring checks. 663 */ 664 static void srp_free_ch_ib(struct srp_target_port *target, 665 struct srp_rdma_ch *ch) 666 { 667 struct srp_device *dev = target->srp_host->srp_dev; 668 int i; 669 670 if (!ch->target) 671 return; 672 673 if (target->using_rdma_cm) { 674 if (ch->rdma_cm.cm_id) { 675 rdma_destroy_id(ch->rdma_cm.cm_id); 676 ch->rdma_cm.cm_id = NULL; 677 } 678 } else { 679 if (ch->ib_cm.cm_id) { 680 ib_destroy_cm_id(ch->ib_cm.cm_id); 681 ch->ib_cm.cm_id = NULL; 682 } 683 } 684 685 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ 686 if (!ch->qp) 687 return; 688 689 if (dev->use_fast_reg) { 690 if (ch->fr_pool) 691 srp_destroy_fr_pool(ch->fr_pool); 692 } else if (dev->use_fmr) { 693 if (ch->fmr_pool) 694 ib_destroy_fmr_pool(ch->fmr_pool); 695 } 696 697 srp_destroy_qp(ch); 698 ib_free_cq(ch->send_cq); 699 ib_free_cq(ch->recv_cq); 700 701 /* 702 * Avoid that the SCSI error handler tries to use this channel after 703 * it has been freed. The SCSI error handler can namely continue 704 * trying to perform recovery actions after scsi_remove_host() 705 * returned. 706 */ 707 ch->target = NULL; 708 709 ch->qp = NULL; 710 ch->send_cq = ch->recv_cq = NULL; 711 712 if (ch->rx_ring) { 713 for (i = 0; i < target->queue_size; ++i) 714 srp_free_iu(target->srp_host, ch->rx_ring[i]); 715 kfree(ch->rx_ring); 716 ch->rx_ring = NULL; 717 } 718 if (ch->tx_ring) { 719 for (i = 0; i < target->queue_size; ++i) 720 srp_free_iu(target->srp_host, ch->tx_ring[i]); 721 kfree(ch->tx_ring); 722 ch->tx_ring = NULL; 723 } 724 } 725 726 static void srp_path_rec_completion(int status, 727 struct sa_path_rec *pathrec, 728 void *ch_ptr) 729 { 730 struct srp_rdma_ch *ch = ch_ptr; 731 struct srp_target_port *target = ch->target; 732 733 ch->status = status; 734 if (status) 735 shost_printk(KERN_ERR, target->scsi_host, 736 PFX "Got failed path rec status %d\n", status); 737 else 738 ch->ib_cm.path = *pathrec; 739 complete(&ch->done); 740 } 741 742 static int srp_ib_lookup_path(struct srp_rdma_ch *ch) 743 { 744 struct srp_target_port *target = ch->target; 745 int ret; 746 747 ch->ib_cm.path.numb_path = 1; 748 749 init_completion(&ch->done); 750 751 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client, 752 target->srp_host->srp_dev->dev, 753 target->srp_host->port, 754 &ch->ib_cm.path, 755 IB_SA_PATH_REC_SERVICE_ID | 756 IB_SA_PATH_REC_DGID | 757 IB_SA_PATH_REC_SGID | 758 IB_SA_PATH_REC_NUMB_PATH | 759 IB_SA_PATH_REC_PKEY, 760 SRP_PATH_REC_TIMEOUT_MS, 761 GFP_KERNEL, 762 srp_path_rec_completion, 763 ch, &ch->ib_cm.path_query); 764 if (ch->ib_cm.path_query_id < 0) 765 return ch->ib_cm.path_query_id; 766 767 ret = wait_for_completion_interruptible(&ch->done); 768 if (ret < 0) 769 return ret; 770 771 if (ch->status < 0) 772 shost_printk(KERN_WARNING, target->scsi_host, 773 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n", 774 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw, 775 be16_to_cpu(target->ib_cm.pkey), 776 be64_to_cpu(target->ib_cm.service_id)); 777 778 return ch->status; 779 } 780 781 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch) 782 { 783 struct srp_target_port *target = ch->target; 784 int ret; 785 786 init_completion(&ch->done); 787 788 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS); 789 if (ret) 790 return ret; 791 792 wait_for_completion_interruptible(&ch->done); 793 794 if (ch->status != 0) 795 shost_printk(KERN_WARNING, target->scsi_host, 796 PFX "Path resolution failed\n"); 797 798 return ch->status; 799 } 800 801 static int srp_lookup_path(struct srp_rdma_ch *ch) 802 { 803 struct srp_target_port *target = ch->target; 804 805 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) : 806 srp_ib_lookup_path(ch); 807 } 808 809 static u8 srp_get_subnet_timeout(struct srp_host *host) 810 { 811 struct ib_port_attr attr; 812 int ret; 813 u8 subnet_timeout = 18; 814 815 ret = ib_query_port(host->srp_dev->dev, host->port, &attr); 816 if (ret == 0) 817 subnet_timeout = attr.subnet_timeout; 818 819 if (unlikely(subnet_timeout < 15)) 820 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n", 821 dev_name(&host->srp_dev->dev->dev), subnet_timeout); 822 823 return subnet_timeout; 824 } 825 826 static int srp_send_req(struct srp_rdma_ch *ch, bool multich) 827 { 828 struct srp_target_port *target = ch->target; 829 struct { 830 struct rdma_conn_param rdma_param; 831 struct srp_login_req_rdma rdma_req; 832 struct ib_cm_req_param ib_param; 833 struct srp_login_req ib_req; 834 } *req = NULL; 835 char *ipi, *tpi; 836 int status; 837 838 req = kzalloc(sizeof *req, GFP_KERNEL); 839 if (!req) 840 return -ENOMEM; 841 842 req->ib_param.flow_control = 1; 843 req->ib_param.retry_count = target->tl_retry_count; 844 845 /* 846 * Pick some arbitrary defaults here; we could make these 847 * module parameters if anyone cared about setting them. 848 */ 849 req->ib_param.responder_resources = 4; 850 req->ib_param.rnr_retry_count = 7; 851 req->ib_param.max_cm_retries = 15; 852 853 req->ib_req.opcode = SRP_LOGIN_REQ; 854 req->ib_req.tag = 0; 855 req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len); 856 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 857 SRP_BUF_FORMAT_INDIRECT); 858 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI : 859 SRP_MULTICHAN_SINGLE); 860 861 if (target->using_rdma_cm) { 862 req->rdma_param.flow_control = req->ib_param.flow_control; 863 req->rdma_param.responder_resources = 864 req->ib_param.responder_resources; 865 req->rdma_param.initiator_depth = req->ib_param.initiator_depth; 866 req->rdma_param.retry_count = req->ib_param.retry_count; 867 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count; 868 req->rdma_param.private_data = &req->rdma_req; 869 req->rdma_param.private_data_len = sizeof(req->rdma_req); 870 871 req->rdma_req.opcode = req->ib_req.opcode; 872 req->rdma_req.tag = req->ib_req.tag; 873 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len; 874 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt; 875 req->rdma_req.req_flags = req->ib_req.req_flags; 876 877 ipi = req->rdma_req.initiator_port_id; 878 tpi = req->rdma_req.target_port_id; 879 } else { 880 u8 subnet_timeout; 881 882 subnet_timeout = srp_get_subnet_timeout(target->srp_host); 883 884 req->ib_param.primary_path = &ch->ib_cm.path; 885 req->ib_param.alternate_path = NULL; 886 req->ib_param.service_id = target->ib_cm.service_id; 887 get_random_bytes(&req->ib_param.starting_psn, 4); 888 req->ib_param.starting_psn &= 0xffffff; 889 req->ib_param.qp_num = ch->qp->qp_num; 890 req->ib_param.qp_type = ch->qp->qp_type; 891 req->ib_param.local_cm_response_timeout = subnet_timeout + 2; 892 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2; 893 req->ib_param.private_data = &req->ib_req; 894 req->ib_param.private_data_len = sizeof(req->ib_req); 895 896 ipi = req->ib_req.initiator_port_id; 897 tpi = req->ib_req.target_port_id; 898 } 899 900 /* 901 * In the published SRP specification (draft rev. 16a), the 902 * port identifier format is 8 bytes of ID extension followed 903 * by 8 bytes of GUID. Older drafts put the two halves in the 904 * opposite order, so that the GUID comes first. 905 * 906 * Targets conforming to these obsolete drafts can be 907 * recognized by the I/O Class they report. 908 */ 909 if (target->io_class == SRP_REV10_IB_IO_CLASS) { 910 memcpy(ipi, &target->sgid.global.interface_id, 8); 911 memcpy(ipi + 8, &target->initiator_ext, 8); 912 memcpy(tpi, &target->ioc_guid, 8); 913 memcpy(tpi + 8, &target->id_ext, 8); 914 } else { 915 memcpy(ipi, &target->initiator_ext, 8); 916 memcpy(ipi + 8, &target->sgid.global.interface_id, 8); 917 memcpy(tpi, &target->id_ext, 8); 918 memcpy(tpi + 8, &target->ioc_guid, 8); 919 } 920 921 /* 922 * Topspin/Cisco SRP targets will reject our login unless we 923 * zero out the first 8 bytes of our initiator port ID and set 924 * the second 8 bytes to the local node GUID. 925 */ 926 if (srp_target_is_topspin(target)) { 927 shost_printk(KERN_DEBUG, target->scsi_host, 928 PFX "Topspin/Cisco initiator port ID workaround " 929 "activated for target GUID %016llx\n", 930 be64_to_cpu(target->ioc_guid)); 931 memset(ipi, 0, 8); 932 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8); 933 } 934 935 if (target->using_rdma_cm) 936 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param); 937 else 938 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param); 939 940 kfree(req); 941 942 return status; 943 } 944 945 static bool srp_queue_remove_work(struct srp_target_port *target) 946 { 947 bool changed = false; 948 949 spin_lock_irq(&target->lock); 950 if (target->state != SRP_TARGET_REMOVED) { 951 target->state = SRP_TARGET_REMOVED; 952 changed = true; 953 } 954 spin_unlock_irq(&target->lock); 955 956 if (changed) 957 queue_work(srp_remove_wq, &target->remove_work); 958 959 return changed; 960 } 961 962 static void srp_disconnect_target(struct srp_target_port *target) 963 { 964 struct srp_rdma_ch *ch; 965 int i, ret; 966 967 /* XXX should send SRP_I_LOGOUT request */ 968 969 for (i = 0; i < target->ch_count; i++) { 970 ch = &target->ch[i]; 971 ch->connected = false; 972 ret = 0; 973 if (target->using_rdma_cm) { 974 if (ch->rdma_cm.cm_id) 975 rdma_disconnect(ch->rdma_cm.cm_id); 976 } else { 977 if (ch->ib_cm.cm_id) 978 ret = ib_send_cm_dreq(ch->ib_cm.cm_id, 979 NULL, 0); 980 } 981 if (ret < 0) { 982 shost_printk(KERN_DEBUG, target->scsi_host, 983 PFX "Sending CM DREQ failed\n"); 984 } 985 } 986 } 987 988 static void srp_free_req_data(struct srp_target_port *target, 989 struct srp_rdma_ch *ch) 990 { 991 struct srp_device *dev = target->srp_host->srp_dev; 992 struct ib_device *ibdev = dev->dev; 993 struct srp_request *req; 994 int i; 995 996 if (!ch->req_ring) 997 return; 998 999 for (i = 0; i < target->req_ring_size; ++i) { 1000 req = &ch->req_ring[i]; 1001 if (dev->use_fast_reg) { 1002 kfree(req->fr_list); 1003 } else { 1004 kfree(req->fmr_list); 1005 kfree(req->map_page); 1006 } 1007 if (req->indirect_dma_addr) { 1008 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 1009 target->indirect_size, 1010 DMA_TO_DEVICE); 1011 } 1012 kfree(req->indirect_desc); 1013 } 1014 1015 kfree(ch->req_ring); 1016 ch->req_ring = NULL; 1017 } 1018 1019 static int srp_alloc_req_data(struct srp_rdma_ch *ch) 1020 { 1021 struct srp_target_port *target = ch->target; 1022 struct srp_device *srp_dev = target->srp_host->srp_dev; 1023 struct ib_device *ibdev = srp_dev->dev; 1024 struct srp_request *req; 1025 void *mr_list; 1026 dma_addr_t dma_addr; 1027 int i, ret = -ENOMEM; 1028 1029 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), 1030 GFP_KERNEL); 1031 if (!ch->req_ring) 1032 goto out; 1033 1034 for (i = 0; i < target->req_ring_size; ++i) { 1035 req = &ch->req_ring[i]; 1036 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), 1037 GFP_KERNEL); 1038 if (!mr_list) 1039 goto out; 1040 if (srp_dev->use_fast_reg) { 1041 req->fr_list = mr_list; 1042 } else { 1043 req->fmr_list = mr_list; 1044 req->map_page = kmalloc_array(srp_dev->max_pages_per_mr, 1045 sizeof(void *), 1046 GFP_KERNEL); 1047 if (!req->map_page) 1048 goto out; 1049 } 1050 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 1051 if (!req->indirect_desc) 1052 goto out; 1053 1054 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, 1055 target->indirect_size, 1056 DMA_TO_DEVICE); 1057 if (ib_dma_mapping_error(ibdev, dma_addr)) 1058 goto out; 1059 1060 req->indirect_dma_addr = dma_addr; 1061 } 1062 ret = 0; 1063 1064 out: 1065 return ret; 1066 } 1067 1068 /** 1069 * srp_del_scsi_host_attr() - Remove attributes defined in the host template. 1070 * @shost: SCSI host whose attributes to remove from sysfs. 1071 * 1072 * Note: Any attributes defined in the host template and that did not exist 1073 * before invocation of this function will be ignored. 1074 */ 1075 static void srp_del_scsi_host_attr(struct Scsi_Host *shost) 1076 { 1077 struct device_attribute **attr; 1078 1079 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 1080 device_remove_file(&shost->shost_dev, *attr); 1081 } 1082 1083 static void srp_remove_target(struct srp_target_port *target) 1084 { 1085 struct srp_rdma_ch *ch; 1086 int i; 1087 1088 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 1089 1090 srp_del_scsi_host_attr(target->scsi_host); 1091 srp_rport_get(target->rport); 1092 srp_remove_host(target->scsi_host); 1093 scsi_remove_host(target->scsi_host); 1094 srp_stop_rport_timers(target->rport); 1095 srp_disconnect_target(target); 1096 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); 1097 for (i = 0; i < target->ch_count; i++) { 1098 ch = &target->ch[i]; 1099 srp_free_ch_ib(target, ch); 1100 } 1101 cancel_work_sync(&target->tl_err_work); 1102 srp_rport_put(target->rport); 1103 for (i = 0; i < target->ch_count; i++) { 1104 ch = &target->ch[i]; 1105 srp_free_req_data(target, ch); 1106 } 1107 kfree(target->ch); 1108 target->ch = NULL; 1109 1110 spin_lock(&target->srp_host->target_lock); 1111 list_del(&target->list); 1112 spin_unlock(&target->srp_host->target_lock); 1113 1114 scsi_host_put(target->scsi_host); 1115 } 1116 1117 static void srp_remove_work(struct work_struct *work) 1118 { 1119 struct srp_target_port *target = 1120 container_of(work, struct srp_target_port, remove_work); 1121 1122 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 1123 1124 srp_remove_target(target); 1125 } 1126 1127 static void srp_rport_delete(struct srp_rport *rport) 1128 { 1129 struct srp_target_port *target = rport->lld_data; 1130 1131 srp_queue_remove_work(target); 1132 } 1133 1134 /** 1135 * srp_connected_ch() - number of connected channels 1136 * @target: SRP target port. 1137 */ 1138 static int srp_connected_ch(struct srp_target_port *target) 1139 { 1140 int i, c = 0; 1141 1142 for (i = 0; i < target->ch_count; i++) 1143 c += target->ch[i].connected; 1144 1145 return c; 1146 } 1147 1148 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) 1149 { 1150 struct srp_target_port *target = ch->target; 1151 int ret; 1152 1153 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); 1154 1155 ret = srp_lookup_path(ch); 1156 if (ret) 1157 goto out; 1158 1159 while (1) { 1160 init_completion(&ch->done); 1161 ret = srp_send_req(ch, multich); 1162 if (ret) 1163 goto out; 1164 ret = wait_for_completion_interruptible(&ch->done); 1165 if (ret < 0) 1166 goto out; 1167 1168 /* 1169 * The CM event handling code will set status to 1170 * SRP_PORT_REDIRECT if we get a port redirect REJ 1171 * back, or SRP_DLID_REDIRECT if we get a lid/qp 1172 * redirect REJ back. 1173 */ 1174 ret = ch->status; 1175 switch (ret) { 1176 case 0: 1177 ch->connected = true; 1178 goto out; 1179 1180 case SRP_PORT_REDIRECT: 1181 ret = srp_lookup_path(ch); 1182 if (ret) 1183 goto out; 1184 break; 1185 1186 case SRP_DLID_REDIRECT: 1187 break; 1188 1189 case SRP_STALE_CONN: 1190 shost_printk(KERN_ERR, target->scsi_host, PFX 1191 "giving up on stale connection\n"); 1192 ret = -ECONNRESET; 1193 goto out; 1194 1195 default: 1196 goto out; 1197 } 1198 } 1199 1200 out: 1201 return ret <= 0 ? ret : -ENODEV; 1202 } 1203 1204 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) 1205 { 1206 srp_handle_qp_err(cq, wc, "INV RKEY"); 1207 } 1208 1209 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, 1210 u32 rkey) 1211 { 1212 struct ib_send_wr wr = { 1213 .opcode = IB_WR_LOCAL_INV, 1214 .next = NULL, 1215 .num_sge = 0, 1216 .send_flags = 0, 1217 .ex.invalidate_rkey = rkey, 1218 }; 1219 1220 wr.wr_cqe = &req->reg_cqe; 1221 req->reg_cqe.done = srp_inv_rkey_err_done; 1222 return ib_post_send(ch->qp, &wr, NULL); 1223 } 1224 1225 static void srp_unmap_data(struct scsi_cmnd *scmnd, 1226 struct srp_rdma_ch *ch, 1227 struct srp_request *req) 1228 { 1229 struct srp_target_port *target = ch->target; 1230 struct srp_device *dev = target->srp_host->srp_dev; 1231 struct ib_device *ibdev = dev->dev; 1232 int i, res; 1233 1234 if (!scsi_sglist(scmnd) || 1235 (scmnd->sc_data_direction != DMA_TO_DEVICE && 1236 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 1237 return; 1238 1239 if (dev->use_fast_reg) { 1240 struct srp_fr_desc **pfr; 1241 1242 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1243 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); 1244 if (res < 0) { 1245 shost_printk(KERN_ERR, target->scsi_host, PFX 1246 "Queueing INV WR for rkey %#x failed (%d)\n", 1247 (*pfr)->mr->rkey, res); 1248 queue_work(system_long_wq, 1249 &target->tl_err_work); 1250 } 1251 } 1252 if (req->nmdesc) 1253 srp_fr_pool_put(ch->fr_pool, req->fr_list, 1254 req->nmdesc); 1255 } else if (dev->use_fmr) { 1256 struct ib_pool_fmr **pfmr; 1257 1258 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) 1259 ib_fmr_pool_unmap(*pfmr); 1260 } 1261 1262 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), 1263 scmnd->sc_data_direction); 1264 } 1265 1266 /** 1267 * srp_claim_req - Take ownership of the scmnd associated with a request. 1268 * @ch: SRP RDMA channel. 1269 * @req: SRP request. 1270 * @sdev: If not NULL, only take ownership for this SCSI device. 1271 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take 1272 * ownership of @req->scmnd if it equals @scmnd. 1273 * 1274 * Return value: 1275 * Either NULL or a pointer to the SCSI command the caller became owner of. 1276 */ 1277 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch, 1278 struct srp_request *req, 1279 struct scsi_device *sdev, 1280 struct scsi_cmnd *scmnd) 1281 { 1282 unsigned long flags; 1283 1284 spin_lock_irqsave(&ch->lock, flags); 1285 if (req->scmnd && 1286 (!sdev || req->scmnd->device == sdev) && 1287 (!scmnd || req->scmnd == scmnd)) { 1288 scmnd = req->scmnd; 1289 req->scmnd = NULL; 1290 } else { 1291 scmnd = NULL; 1292 } 1293 spin_unlock_irqrestore(&ch->lock, flags); 1294 1295 return scmnd; 1296 } 1297 1298 /** 1299 * srp_free_req() - Unmap data and adjust ch->req_lim. 1300 * @ch: SRP RDMA channel. 1301 * @req: Request to be freed. 1302 * @scmnd: SCSI command associated with @req. 1303 * @req_lim_delta: Amount to be added to @target->req_lim. 1304 */ 1305 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req, 1306 struct scsi_cmnd *scmnd, s32 req_lim_delta) 1307 { 1308 unsigned long flags; 1309 1310 srp_unmap_data(scmnd, ch, req); 1311 1312 spin_lock_irqsave(&ch->lock, flags); 1313 ch->req_lim += req_lim_delta; 1314 spin_unlock_irqrestore(&ch->lock, flags); 1315 } 1316 1317 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, 1318 struct scsi_device *sdev, int result) 1319 { 1320 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL); 1321 1322 if (scmnd) { 1323 srp_free_req(ch, req, scmnd, 0); 1324 scmnd->result = result; 1325 scmnd->scsi_done(scmnd); 1326 } 1327 } 1328 1329 static void srp_terminate_io(struct srp_rport *rport) 1330 { 1331 struct srp_target_port *target = rport->lld_data; 1332 struct srp_rdma_ch *ch; 1333 struct Scsi_Host *shost = target->scsi_host; 1334 struct scsi_device *sdev; 1335 int i, j; 1336 1337 /* 1338 * Invoking srp_terminate_io() while srp_queuecommand() is running 1339 * is not safe. Hence the warning statement below. 1340 */ 1341 shost_for_each_device(sdev, shost) 1342 WARN_ON_ONCE(sdev->request_queue->request_fn_active); 1343 1344 for (i = 0; i < target->ch_count; i++) { 1345 ch = &target->ch[i]; 1346 1347 for (j = 0; j < target->req_ring_size; ++j) { 1348 struct srp_request *req = &ch->req_ring[j]; 1349 1350 srp_finish_req(ch, req, NULL, 1351 DID_TRANSPORT_FAILFAST << 16); 1352 } 1353 } 1354 } 1355 1356 /* 1357 * It is up to the caller to ensure that srp_rport_reconnect() calls are 1358 * serialized and that no concurrent srp_queuecommand(), srp_abort(), 1359 * srp_reset_device() or srp_reset_host() calls will occur while this function 1360 * is in progress. One way to realize that is not to call this function 1361 * directly but to call srp_reconnect_rport() instead since that last function 1362 * serializes calls of this function via rport->mutex and also blocks 1363 * srp_queuecommand() calls before invoking this function. 1364 */ 1365 static int srp_rport_reconnect(struct srp_rport *rport) 1366 { 1367 struct srp_target_port *target = rport->lld_data; 1368 struct srp_rdma_ch *ch; 1369 int i, j, ret = 0; 1370 bool multich = false; 1371 1372 srp_disconnect_target(target); 1373 1374 if (target->state == SRP_TARGET_SCANNING) 1375 return -ENODEV; 1376 1377 /* 1378 * Now get a new local CM ID so that we avoid confusing the target in 1379 * case things are really fouled up. Doing so also ensures that all CM 1380 * callbacks will have finished before a new QP is allocated. 1381 */ 1382 for (i = 0; i < target->ch_count; i++) { 1383 ch = &target->ch[i]; 1384 ret += srp_new_cm_id(ch); 1385 } 1386 for (i = 0; i < target->ch_count; i++) { 1387 ch = &target->ch[i]; 1388 for (j = 0; j < target->req_ring_size; ++j) { 1389 struct srp_request *req = &ch->req_ring[j]; 1390 1391 srp_finish_req(ch, req, NULL, DID_RESET << 16); 1392 } 1393 } 1394 for (i = 0; i < target->ch_count; i++) { 1395 ch = &target->ch[i]; 1396 /* 1397 * Whether or not creating a new CM ID succeeded, create a new 1398 * QP. This guarantees that all completion callback function 1399 * invocations have finished before request resetting starts. 1400 */ 1401 ret += srp_create_ch_ib(ch); 1402 1403 INIT_LIST_HEAD(&ch->free_tx); 1404 for (j = 0; j < target->queue_size; ++j) 1405 list_add(&ch->tx_ring[j]->list, &ch->free_tx); 1406 } 1407 1408 target->qp_in_error = false; 1409 1410 for (i = 0; i < target->ch_count; i++) { 1411 ch = &target->ch[i]; 1412 if (ret) 1413 break; 1414 ret = srp_connect_ch(ch, multich); 1415 multich = true; 1416 } 1417 1418 if (ret == 0) 1419 shost_printk(KERN_INFO, target->scsi_host, 1420 PFX "reconnect succeeded\n"); 1421 1422 return ret; 1423 } 1424 1425 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, 1426 unsigned int dma_len, u32 rkey) 1427 { 1428 struct srp_direct_buf *desc = state->desc; 1429 1430 WARN_ON_ONCE(!dma_len); 1431 1432 desc->va = cpu_to_be64(dma_addr); 1433 desc->key = cpu_to_be32(rkey); 1434 desc->len = cpu_to_be32(dma_len); 1435 1436 state->total_len += dma_len; 1437 state->desc++; 1438 state->ndesc++; 1439 } 1440 1441 static int srp_map_finish_fmr(struct srp_map_state *state, 1442 struct srp_rdma_ch *ch) 1443 { 1444 struct srp_target_port *target = ch->target; 1445 struct srp_device *dev = target->srp_host->srp_dev; 1446 struct ib_pool_fmr *fmr; 1447 u64 io_addr = 0; 1448 1449 if (state->fmr.next >= state->fmr.end) { 1450 shost_printk(KERN_ERR, ch->target->scsi_host, 1451 PFX "Out of MRs (mr_per_cmd = %d)\n", 1452 ch->target->mr_per_cmd); 1453 return -ENOMEM; 1454 } 1455 1456 WARN_ON_ONCE(!dev->use_fmr); 1457 1458 if (state->npages == 0) 1459 return 0; 1460 1461 if (state->npages == 1 && target->global_rkey) { 1462 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1463 target->global_rkey); 1464 goto reset_state; 1465 } 1466 1467 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, 1468 state->npages, io_addr); 1469 if (IS_ERR(fmr)) 1470 return PTR_ERR(fmr); 1471 1472 *state->fmr.next++ = fmr; 1473 state->nmdesc++; 1474 1475 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, 1476 state->dma_len, fmr->fmr->rkey); 1477 1478 reset_state: 1479 state->npages = 0; 1480 state->dma_len = 0; 1481 1482 return 0; 1483 } 1484 1485 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) 1486 { 1487 srp_handle_qp_err(cq, wc, "FAST REG"); 1488 } 1489 1490 /* 1491 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset 1492 * where to start in the first element. If sg_offset_p != NULL then 1493 * *sg_offset_p is updated to the offset in state->sg[retval] of the first 1494 * byte that has not yet been mapped. 1495 */ 1496 static int srp_map_finish_fr(struct srp_map_state *state, 1497 struct srp_request *req, 1498 struct srp_rdma_ch *ch, int sg_nents, 1499 unsigned int *sg_offset_p) 1500 { 1501 struct srp_target_port *target = ch->target; 1502 struct srp_device *dev = target->srp_host->srp_dev; 1503 struct ib_reg_wr wr; 1504 struct srp_fr_desc *desc; 1505 u32 rkey; 1506 int n, err; 1507 1508 if (state->fr.next >= state->fr.end) { 1509 shost_printk(KERN_ERR, ch->target->scsi_host, 1510 PFX "Out of MRs (mr_per_cmd = %d)\n", 1511 ch->target->mr_per_cmd); 1512 return -ENOMEM; 1513 } 1514 1515 WARN_ON_ONCE(!dev->use_fast_reg); 1516 1517 if (sg_nents == 1 && target->global_rkey) { 1518 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1519 1520 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, 1521 sg_dma_len(state->sg) - sg_offset, 1522 target->global_rkey); 1523 if (sg_offset_p) 1524 *sg_offset_p = 0; 1525 return 1; 1526 } 1527 1528 desc = srp_fr_pool_get(ch->fr_pool); 1529 if (!desc) 1530 return -ENOMEM; 1531 1532 rkey = ib_inc_rkey(desc->mr->rkey); 1533 ib_update_fast_reg_key(desc->mr, rkey); 1534 1535 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, 1536 dev->mr_page_size); 1537 if (unlikely(n < 0)) { 1538 srp_fr_pool_put(ch->fr_pool, &desc, 1); 1539 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", 1540 dev_name(&req->scmnd->device->sdev_gendev), sg_nents, 1541 sg_offset_p ? *sg_offset_p : -1, n); 1542 return n; 1543 } 1544 1545 WARN_ON_ONCE(desc->mr->length == 0); 1546 1547 req->reg_cqe.done = srp_reg_mr_err_done; 1548 1549 wr.wr.next = NULL; 1550 wr.wr.opcode = IB_WR_REG_MR; 1551 wr.wr.wr_cqe = &req->reg_cqe; 1552 wr.wr.num_sge = 0; 1553 wr.wr.send_flags = 0; 1554 wr.mr = desc->mr; 1555 wr.key = desc->mr->rkey; 1556 wr.access = (IB_ACCESS_LOCAL_WRITE | 1557 IB_ACCESS_REMOTE_READ | 1558 IB_ACCESS_REMOTE_WRITE); 1559 1560 *state->fr.next++ = desc; 1561 state->nmdesc++; 1562 1563 srp_map_desc(state, desc->mr->iova, 1564 desc->mr->length, desc->mr->rkey); 1565 1566 err = ib_post_send(ch->qp, &wr.wr, NULL); 1567 if (unlikely(err)) { 1568 WARN_ON_ONCE(err == -ENOMEM); 1569 return err; 1570 } 1571 1572 return n; 1573 } 1574 1575 static int srp_map_sg_entry(struct srp_map_state *state, 1576 struct srp_rdma_ch *ch, 1577 struct scatterlist *sg) 1578 { 1579 struct srp_target_port *target = ch->target; 1580 struct srp_device *dev = target->srp_host->srp_dev; 1581 struct ib_device *ibdev = dev->dev; 1582 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); 1583 unsigned int dma_len = ib_sg_dma_len(ibdev, sg); 1584 unsigned int len = 0; 1585 int ret; 1586 1587 WARN_ON_ONCE(!dma_len); 1588 1589 while (dma_len) { 1590 unsigned offset = dma_addr & ~dev->mr_page_mask; 1591 1592 if (state->npages == dev->max_pages_per_mr || 1593 (state->npages > 0 && offset != 0)) { 1594 ret = srp_map_finish_fmr(state, ch); 1595 if (ret) 1596 return ret; 1597 } 1598 1599 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); 1600 1601 if (!state->npages) 1602 state->base_dma_addr = dma_addr; 1603 state->pages[state->npages++] = dma_addr & dev->mr_page_mask; 1604 state->dma_len += len; 1605 dma_addr += len; 1606 dma_len -= len; 1607 } 1608 1609 /* 1610 * If the end of the MR is not on a page boundary then we need to 1611 * close it out and start a new one -- we can only merge at page 1612 * boundaries. 1613 */ 1614 ret = 0; 1615 if ((dma_addr & ~dev->mr_page_mask) != 0) 1616 ret = srp_map_finish_fmr(state, ch); 1617 return ret; 1618 } 1619 1620 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1621 struct srp_request *req, struct scatterlist *scat, 1622 int count) 1623 { 1624 struct scatterlist *sg; 1625 int i, ret; 1626 1627 state->pages = req->map_page; 1628 state->fmr.next = req->fmr_list; 1629 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; 1630 1631 for_each_sg(scat, sg, count, i) { 1632 ret = srp_map_sg_entry(state, ch, sg); 1633 if (ret) 1634 return ret; 1635 } 1636 1637 ret = srp_map_finish_fmr(state, ch); 1638 if (ret) 1639 return ret; 1640 1641 return 0; 1642 } 1643 1644 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1645 struct srp_request *req, struct scatterlist *scat, 1646 int count) 1647 { 1648 unsigned int sg_offset = 0; 1649 1650 state->fr.next = req->fr_list; 1651 state->fr.end = req->fr_list + ch->target->mr_per_cmd; 1652 state->sg = scat; 1653 1654 if (count == 0) 1655 return 0; 1656 1657 while (count) { 1658 int i, n; 1659 1660 n = srp_map_finish_fr(state, req, ch, count, &sg_offset); 1661 if (unlikely(n < 0)) 1662 return n; 1663 1664 count -= n; 1665 for (i = 0; i < n; i++) 1666 state->sg = sg_next(state->sg); 1667 } 1668 1669 return 0; 1670 } 1671 1672 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, 1673 struct srp_request *req, struct scatterlist *scat, 1674 int count) 1675 { 1676 struct srp_target_port *target = ch->target; 1677 struct srp_device *dev = target->srp_host->srp_dev; 1678 struct scatterlist *sg; 1679 int i; 1680 1681 for_each_sg(scat, sg, count, i) { 1682 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), 1683 ib_sg_dma_len(dev->dev, sg), 1684 target->global_rkey); 1685 } 1686 1687 return 0; 1688 } 1689 1690 /* 1691 * Register the indirect data buffer descriptor with the HCA. 1692 * 1693 * Note: since the indirect data buffer descriptor has been allocated with 1694 * kmalloc() it is guaranteed that this buffer is a physically contiguous 1695 * memory buffer. 1696 */ 1697 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, 1698 void **next_mr, void **end_mr, u32 idb_len, 1699 __be32 *idb_rkey) 1700 { 1701 struct srp_target_port *target = ch->target; 1702 struct srp_device *dev = target->srp_host->srp_dev; 1703 struct srp_map_state state; 1704 struct srp_direct_buf idb_desc; 1705 u64 idb_pages[1]; 1706 struct scatterlist idb_sg[1]; 1707 int ret; 1708 1709 memset(&state, 0, sizeof(state)); 1710 memset(&idb_desc, 0, sizeof(idb_desc)); 1711 state.gen.next = next_mr; 1712 state.gen.end = end_mr; 1713 state.desc = &idb_desc; 1714 state.base_dma_addr = req->indirect_dma_addr; 1715 state.dma_len = idb_len; 1716 1717 if (dev->use_fast_reg) { 1718 state.sg = idb_sg; 1719 sg_init_one(idb_sg, req->indirect_desc, idb_len); 1720 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1721 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1722 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1723 #endif 1724 ret = srp_map_finish_fr(&state, req, ch, 1, NULL); 1725 if (ret < 0) 1726 return ret; 1727 WARN_ON_ONCE(ret < 1); 1728 } else if (dev->use_fmr) { 1729 state.pages = idb_pages; 1730 state.pages[0] = (req->indirect_dma_addr & 1731 dev->mr_page_mask); 1732 state.npages = 1; 1733 ret = srp_map_finish_fmr(&state, ch); 1734 if (ret < 0) 1735 return ret; 1736 } else { 1737 return -EINVAL; 1738 } 1739 1740 *idb_rkey = idb_desc.key; 1741 1742 return 0; 1743 } 1744 1745 static void srp_check_mapping(struct srp_map_state *state, 1746 struct srp_rdma_ch *ch, struct srp_request *req, 1747 struct scatterlist *scat, int count) 1748 { 1749 struct srp_device *dev = ch->target->srp_host->srp_dev; 1750 struct srp_fr_desc **pfr; 1751 u64 desc_len = 0, mr_len = 0; 1752 int i; 1753 1754 for (i = 0; i < state->ndesc; i++) 1755 desc_len += be32_to_cpu(req->indirect_desc[i].len); 1756 if (dev->use_fast_reg) 1757 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) 1758 mr_len += (*pfr)->mr->length; 1759 else if (dev->use_fmr) 1760 for (i = 0; i < state->nmdesc; i++) 1761 mr_len += be32_to_cpu(req->indirect_desc[i].len); 1762 if (desc_len != scsi_bufflen(req->scmnd) || 1763 mr_len > scsi_bufflen(req->scmnd)) 1764 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", 1765 scsi_bufflen(req->scmnd), desc_len, mr_len, 1766 state->ndesc, state->nmdesc); 1767 } 1768 1769 /** 1770 * srp_map_data() - map SCSI data buffer onto an SRP request 1771 * @scmnd: SCSI command to map 1772 * @ch: SRP RDMA channel 1773 * @req: SRP request 1774 * 1775 * Returns the length in bytes of the SRP_CMD IU or a negative value if 1776 * mapping failed. 1777 */ 1778 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1779 struct srp_request *req) 1780 { 1781 struct srp_target_port *target = ch->target; 1782 struct scatterlist *scat; 1783 struct srp_cmd *cmd = req->cmd->buf; 1784 int len, nents, count, ret; 1785 struct srp_device *dev; 1786 struct ib_device *ibdev; 1787 struct srp_map_state state; 1788 struct srp_indirect_buf *indirect_hdr; 1789 u32 idb_len, table_len; 1790 __be32 idb_rkey; 1791 u8 fmt; 1792 1793 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) 1794 return sizeof (struct srp_cmd); 1795 1796 if (scmnd->sc_data_direction != DMA_FROM_DEVICE && 1797 scmnd->sc_data_direction != DMA_TO_DEVICE) { 1798 shost_printk(KERN_WARNING, target->scsi_host, 1799 PFX "Unhandled data direction %d\n", 1800 scmnd->sc_data_direction); 1801 return -EINVAL; 1802 } 1803 1804 nents = scsi_sg_count(scmnd); 1805 scat = scsi_sglist(scmnd); 1806 1807 dev = target->srp_host->srp_dev; 1808 ibdev = dev->dev; 1809 1810 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); 1811 if (unlikely(count == 0)) 1812 return -EIO; 1813 1814 fmt = SRP_DATA_DESC_DIRECT; 1815 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); 1816 1817 if (count == 1 && target->global_rkey) { 1818 /* 1819 * The midlayer only generated a single gather/scatter 1820 * entry, or DMA mapping coalesced everything to a 1821 * single entry. So a direct descriptor along with 1822 * the DMA MR suffices. 1823 */ 1824 struct srp_direct_buf *buf = (void *) cmd->add_data; 1825 1826 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); 1827 buf->key = cpu_to_be32(target->global_rkey); 1828 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); 1829 1830 req->nmdesc = 0; 1831 goto map_complete; 1832 } 1833 1834 /* 1835 * We have more than one scatter/gather entry, so build our indirect 1836 * descriptor table, trying to merge as many entries as we can. 1837 */ 1838 indirect_hdr = (void *) cmd->add_data; 1839 1840 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, 1841 target->indirect_size, DMA_TO_DEVICE); 1842 1843 memset(&state, 0, sizeof(state)); 1844 state.desc = req->indirect_desc; 1845 if (dev->use_fast_reg) 1846 ret = srp_map_sg_fr(&state, ch, req, scat, count); 1847 else if (dev->use_fmr) 1848 ret = srp_map_sg_fmr(&state, ch, req, scat, count); 1849 else 1850 ret = srp_map_sg_dma(&state, ch, req, scat, count); 1851 req->nmdesc = state.nmdesc; 1852 if (ret < 0) 1853 goto unmap; 1854 1855 { 1856 DEFINE_DYNAMIC_DEBUG_METADATA(ddm, 1857 "Memory mapping consistency check"); 1858 if (DYNAMIC_DEBUG_BRANCH(ddm)) 1859 srp_check_mapping(&state, ch, req, scat, count); 1860 } 1861 1862 /* We've mapped the request, now pull as much of the indirect 1863 * descriptor table as we can into the command buffer. If this 1864 * target is not using an external indirect table, we are 1865 * guaranteed to fit into the command, as the SCSI layer won't 1866 * give us more S/G entries than we allow. 1867 */ 1868 if (state.ndesc == 1) { 1869 /* 1870 * Memory registration collapsed the sg-list into one entry, 1871 * so use a direct descriptor. 1872 */ 1873 struct srp_direct_buf *buf = (void *) cmd->add_data; 1874 1875 *buf = req->indirect_desc[0]; 1876 goto map_complete; 1877 } 1878 1879 if (unlikely(target->cmd_sg_cnt < state.ndesc && 1880 !target->allow_ext_sg)) { 1881 shost_printk(KERN_ERR, target->scsi_host, 1882 "Could not fit S/G list into SRP_CMD\n"); 1883 ret = -EIO; 1884 goto unmap; 1885 } 1886 1887 count = min(state.ndesc, target->cmd_sg_cnt); 1888 table_len = state.ndesc * sizeof (struct srp_direct_buf); 1889 idb_len = sizeof(struct srp_indirect_buf) + table_len; 1890 1891 fmt = SRP_DATA_DESC_INDIRECT; 1892 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); 1893 len += count * sizeof (struct srp_direct_buf); 1894 1895 memcpy(indirect_hdr->desc_list, req->indirect_desc, 1896 count * sizeof (struct srp_direct_buf)); 1897 1898 if (!target->global_rkey) { 1899 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1900 idb_len, &idb_rkey); 1901 if (ret < 0) 1902 goto unmap; 1903 req->nmdesc++; 1904 } else { 1905 idb_rkey = cpu_to_be32(target->global_rkey); 1906 } 1907 1908 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); 1909 indirect_hdr->table_desc.key = idb_rkey; 1910 indirect_hdr->table_desc.len = cpu_to_be32(table_len); 1911 indirect_hdr->len = cpu_to_be32(state.total_len); 1912 1913 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1914 cmd->data_out_desc_cnt = count; 1915 else 1916 cmd->data_in_desc_cnt = count; 1917 1918 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, 1919 DMA_TO_DEVICE); 1920 1921 map_complete: 1922 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1923 cmd->buf_fmt = fmt << 4; 1924 else 1925 cmd->buf_fmt = fmt; 1926 1927 return len; 1928 1929 unmap: 1930 srp_unmap_data(scmnd, ch, req); 1931 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) 1932 ret = -E2BIG; 1933 return ret; 1934 } 1935 1936 /* 1937 * Return an IU and possible credit to the free pool 1938 */ 1939 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu, 1940 enum srp_iu_type iu_type) 1941 { 1942 unsigned long flags; 1943 1944 spin_lock_irqsave(&ch->lock, flags); 1945 list_add(&iu->list, &ch->free_tx); 1946 if (iu_type != SRP_IU_RSP) 1947 ++ch->req_lim; 1948 spin_unlock_irqrestore(&ch->lock, flags); 1949 } 1950 1951 /* 1952 * Must be called with ch->lock held to protect req_lim and free_tx. 1953 * If IU is not sent, it must be returned using srp_put_tx_iu(). 1954 * 1955 * Note: 1956 * An upper limit for the number of allocated information units for each 1957 * request type is: 1958 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues 1959 * more than Scsi_Host.can_queue requests. 1960 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. 1961 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than 1962 * one unanswered SRP request to an initiator. 1963 */ 1964 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, 1965 enum srp_iu_type iu_type) 1966 { 1967 struct srp_target_port *target = ch->target; 1968 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 1969 struct srp_iu *iu; 1970 1971 lockdep_assert_held(&ch->lock); 1972 1973 ib_process_cq_direct(ch->send_cq, -1); 1974 1975 if (list_empty(&ch->free_tx)) 1976 return NULL; 1977 1978 /* Initiator responses to target requests do not consume credits */ 1979 if (iu_type != SRP_IU_RSP) { 1980 if (ch->req_lim <= rsv) { 1981 ++target->zero_req_lim; 1982 return NULL; 1983 } 1984 1985 --ch->req_lim; 1986 } 1987 1988 iu = list_first_entry(&ch->free_tx, struct srp_iu, list); 1989 list_del(&iu->list); 1990 return iu; 1991 } 1992 1993 /* 1994 * Note: if this function is called from inside ib_drain_sq() then it will 1995 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE 1996 * with status IB_WC_SUCCESS then that's a bug. 1997 */ 1998 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) 1999 { 2000 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2001 struct srp_rdma_ch *ch = cq->cq_context; 2002 2003 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2004 srp_handle_qp_err(cq, wc, "SEND"); 2005 return; 2006 } 2007 2008 lockdep_assert_held(&ch->lock); 2009 2010 list_add(&iu->list, &ch->free_tx); 2011 } 2012 2013 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 2014 { 2015 struct srp_target_port *target = ch->target; 2016 struct ib_sge list; 2017 struct ib_send_wr wr; 2018 2019 list.addr = iu->dma; 2020 list.length = len; 2021 list.lkey = target->lkey; 2022 2023 iu->cqe.done = srp_send_done; 2024 2025 wr.next = NULL; 2026 wr.wr_cqe = &iu->cqe; 2027 wr.sg_list = &list; 2028 wr.num_sge = 1; 2029 wr.opcode = IB_WR_SEND; 2030 wr.send_flags = IB_SEND_SIGNALED; 2031 2032 return ib_post_send(ch->qp, &wr, NULL); 2033 } 2034 2035 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) 2036 { 2037 struct srp_target_port *target = ch->target; 2038 struct ib_recv_wr wr; 2039 struct ib_sge list; 2040 2041 list.addr = iu->dma; 2042 list.length = iu->size; 2043 list.lkey = target->lkey; 2044 2045 iu->cqe.done = srp_recv_done; 2046 2047 wr.next = NULL; 2048 wr.wr_cqe = &iu->cqe; 2049 wr.sg_list = &list; 2050 wr.num_sge = 1; 2051 2052 return ib_post_recv(ch->qp, &wr, NULL); 2053 } 2054 2055 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) 2056 { 2057 struct srp_target_port *target = ch->target; 2058 struct srp_request *req; 2059 struct scsi_cmnd *scmnd; 2060 unsigned long flags; 2061 2062 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { 2063 spin_lock_irqsave(&ch->lock, flags); 2064 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 2065 if (rsp->tag == ch->tsk_mgmt_tag) { 2066 ch->tsk_mgmt_status = -1; 2067 if (be32_to_cpu(rsp->resp_data_len) >= 4) 2068 ch->tsk_mgmt_status = rsp->data[3]; 2069 complete(&ch->tsk_mgmt_done); 2070 } else { 2071 shost_printk(KERN_ERR, target->scsi_host, 2072 "Received tsk mgmt response too late for tag %#llx\n", 2073 rsp->tag); 2074 } 2075 spin_unlock_irqrestore(&ch->lock, flags); 2076 } else { 2077 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); 2078 if (scmnd && scmnd->host_scribble) { 2079 req = (void *)scmnd->host_scribble; 2080 scmnd = srp_claim_req(ch, req, NULL, scmnd); 2081 } else { 2082 scmnd = NULL; 2083 } 2084 if (!scmnd) { 2085 shost_printk(KERN_ERR, target->scsi_host, 2086 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", 2087 rsp->tag, ch - target->ch, ch->qp->qp_num); 2088 2089 spin_lock_irqsave(&ch->lock, flags); 2090 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 2091 spin_unlock_irqrestore(&ch->lock, flags); 2092 2093 return; 2094 } 2095 scmnd->result = rsp->status; 2096 2097 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 2098 memcpy(scmnd->sense_buffer, rsp->data + 2099 be32_to_cpu(rsp->resp_data_len), 2100 min_t(int, be32_to_cpu(rsp->sense_data_len), 2101 SCSI_SENSE_BUFFERSIZE)); 2102 } 2103 2104 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) 2105 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 2106 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) 2107 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); 2108 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) 2109 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); 2110 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) 2111 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); 2112 2113 srp_free_req(ch, req, scmnd, 2114 be32_to_cpu(rsp->req_lim_delta)); 2115 2116 scmnd->host_scribble = NULL; 2117 scmnd->scsi_done(scmnd); 2118 } 2119 } 2120 2121 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, 2122 void *rsp, int len) 2123 { 2124 struct srp_target_port *target = ch->target; 2125 struct ib_device *dev = target->srp_host->srp_dev->dev; 2126 unsigned long flags; 2127 struct srp_iu *iu; 2128 int err; 2129 2130 spin_lock_irqsave(&ch->lock, flags); 2131 ch->req_lim += req_delta; 2132 iu = __srp_get_tx_iu(ch, SRP_IU_RSP); 2133 spin_unlock_irqrestore(&ch->lock, flags); 2134 2135 if (!iu) { 2136 shost_printk(KERN_ERR, target->scsi_host, PFX 2137 "no IU available to send response\n"); 2138 return 1; 2139 } 2140 2141 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); 2142 memcpy(iu->buf, rsp, len); 2143 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); 2144 2145 err = srp_post_send(ch, iu, len); 2146 if (err) { 2147 shost_printk(KERN_ERR, target->scsi_host, PFX 2148 "unable to post response: %d\n", err); 2149 srp_put_tx_iu(ch, iu, SRP_IU_RSP); 2150 } 2151 2152 return err; 2153 } 2154 2155 static void srp_process_cred_req(struct srp_rdma_ch *ch, 2156 struct srp_cred_req *req) 2157 { 2158 struct srp_cred_rsp rsp = { 2159 .opcode = SRP_CRED_RSP, 2160 .tag = req->tag, 2161 }; 2162 s32 delta = be32_to_cpu(req->req_lim_delta); 2163 2164 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2165 shost_printk(KERN_ERR, ch->target->scsi_host, PFX 2166 "problems processing SRP_CRED_REQ\n"); 2167 } 2168 2169 static void srp_process_aer_req(struct srp_rdma_ch *ch, 2170 struct srp_aer_req *req) 2171 { 2172 struct srp_target_port *target = ch->target; 2173 struct srp_aer_rsp rsp = { 2174 .opcode = SRP_AER_RSP, 2175 .tag = req->tag, 2176 }; 2177 s32 delta = be32_to_cpu(req->req_lim_delta); 2178 2179 shost_printk(KERN_ERR, target->scsi_host, PFX 2180 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); 2181 2182 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2183 shost_printk(KERN_ERR, target->scsi_host, PFX 2184 "problems processing SRP_AER_REQ\n"); 2185 } 2186 2187 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) 2188 { 2189 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2190 struct srp_rdma_ch *ch = cq->cq_context; 2191 struct srp_target_port *target = ch->target; 2192 struct ib_device *dev = target->srp_host->srp_dev->dev; 2193 int res; 2194 u8 opcode; 2195 2196 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2197 srp_handle_qp_err(cq, wc, "RECV"); 2198 return; 2199 } 2200 2201 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 2202 DMA_FROM_DEVICE); 2203 2204 opcode = *(u8 *) iu->buf; 2205 2206 if (0) { 2207 shost_printk(KERN_ERR, target->scsi_host, 2208 PFX "recv completion, opcode 0x%02x\n", opcode); 2209 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, 2210 iu->buf, wc->byte_len, true); 2211 } 2212 2213 switch (opcode) { 2214 case SRP_RSP: 2215 srp_process_rsp(ch, iu->buf); 2216 break; 2217 2218 case SRP_CRED_REQ: 2219 srp_process_cred_req(ch, iu->buf); 2220 break; 2221 2222 case SRP_AER_REQ: 2223 srp_process_aer_req(ch, iu->buf); 2224 break; 2225 2226 case SRP_T_LOGOUT: 2227 /* XXX Handle target logout */ 2228 shost_printk(KERN_WARNING, target->scsi_host, 2229 PFX "Got target logout request\n"); 2230 break; 2231 2232 default: 2233 shost_printk(KERN_WARNING, target->scsi_host, 2234 PFX "Unhandled SRP opcode 0x%02x\n", opcode); 2235 break; 2236 } 2237 2238 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len, 2239 DMA_FROM_DEVICE); 2240 2241 res = srp_post_recv(ch, iu); 2242 if (res != 0) 2243 shost_printk(KERN_ERR, target->scsi_host, 2244 PFX "Recv failed with error code %d\n", res); 2245 } 2246 2247 /** 2248 * srp_tl_err_work() - handle a transport layer error 2249 * @work: Work structure embedded in an SRP target port. 2250 * 2251 * Note: This function may get invoked before the rport has been created, 2252 * hence the target->rport test. 2253 */ 2254 static void srp_tl_err_work(struct work_struct *work) 2255 { 2256 struct srp_target_port *target; 2257 2258 target = container_of(work, struct srp_target_port, tl_err_work); 2259 if (target->rport) 2260 srp_start_tl_fail_timers(target->rport); 2261 } 2262 2263 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 2264 const char *opname) 2265 { 2266 struct srp_rdma_ch *ch = cq->cq_context; 2267 struct srp_target_port *target = ch->target; 2268 2269 if (ch->connected && !target->qp_in_error) { 2270 shost_printk(KERN_ERR, target->scsi_host, 2271 PFX "failed %s status %s (%d) for CQE %p\n", 2272 opname, ib_wc_status_msg(wc->status), wc->status, 2273 wc->wr_cqe); 2274 queue_work(system_long_wq, &target->tl_err_work); 2275 } 2276 target->qp_in_error = true; 2277 } 2278 2279 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2280 { 2281 struct srp_target_port *target = host_to_target(shost); 2282 struct srp_rport *rport = target->rport; 2283 struct srp_rdma_ch *ch; 2284 struct srp_request *req; 2285 struct srp_iu *iu; 2286 struct srp_cmd *cmd; 2287 struct ib_device *dev; 2288 unsigned long flags; 2289 u32 tag; 2290 u16 idx; 2291 int len, ret; 2292 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; 2293 2294 /* 2295 * The SCSI EH thread is the only context from which srp_queuecommand() 2296 * can get invoked for blocked devices (SDEV_BLOCK / 2297 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by 2298 * locking the rport mutex if invoked from inside the SCSI EH. 2299 */ 2300 if (in_scsi_eh) 2301 mutex_lock(&rport->mutex); 2302 2303 scmnd->result = srp_chkready(target->rport); 2304 if (unlikely(scmnd->result)) 2305 goto err; 2306 2307 WARN_ON_ONCE(scmnd->request->tag < 0); 2308 tag = blk_mq_unique_tag(scmnd->request); 2309 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; 2310 idx = blk_mq_unique_tag_to_tag(tag); 2311 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", 2312 dev_name(&shost->shost_gendev), tag, idx, 2313 target->req_ring_size); 2314 2315 spin_lock_irqsave(&ch->lock, flags); 2316 iu = __srp_get_tx_iu(ch, SRP_IU_CMD); 2317 spin_unlock_irqrestore(&ch->lock, flags); 2318 2319 if (!iu) 2320 goto err; 2321 2322 req = &ch->req_ring[idx]; 2323 dev = target->srp_host->srp_dev->dev; 2324 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, 2325 DMA_TO_DEVICE); 2326 2327 scmnd->host_scribble = (void *) req; 2328 2329 cmd = iu->buf; 2330 memset(cmd, 0, sizeof *cmd); 2331 2332 cmd->opcode = SRP_CMD; 2333 int_to_scsilun(scmnd->device->lun, &cmd->lun); 2334 cmd->tag = tag; 2335 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); 2336 2337 req->scmnd = scmnd; 2338 req->cmd = iu; 2339 2340 len = srp_map_data(scmnd, ch, req); 2341 if (len < 0) { 2342 shost_printk(KERN_ERR, target->scsi_host, 2343 PFX "Failed to map data (%d)\n", len); 2344 /* 2345 * If we ran out of memory descriptors (-ENOMEM) because an 2346 * application is queuing many requests with more than 2347 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer 2348 * to reduce queue depth temporarily. 2349 */ 2350 scmnd->result = len == -ENOMEM ? 2351 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; 2352 goto err_iu; 2353 } 2354 2355 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, 2356 DMA_TO_DEVICE); 2357 2358 if (srp_post_send(ch, iu, len)) { 2359 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); 2360 goto err_unmap; 2361 } 2362 2363 ret = 0; 2364 2365 unlock_rport: 2366 if (in_scsi_eh) 2367 mutex_unlock(&rport->mutex); 2368 2369 return ret; 2370 2371 err_unmap: 2372 srp_unmap_data(scmnd, ch, req); 2373 2374 err_iu: 2375 srp_put_tx_iu(ch, iu, SRP_IU_CMD); 2376 2377 /* 2378 * Avoid that the loops that iterate over the request ring can 2379 * encounter a dangling SCSI command pointer. 2380 */ 2381 req->scmnd = NULL; 2382 2383 err: 2384 if (scmnd->result) { 2385 scmnd->scsi_done(scmnd); 2386 ret = 0; 2387 } else { 2388 ret = SCSI_MLQUEUE_HOST_BUSY; 2389 } 2390 2391 goto unlock_rport; 2392 } 2393 2394 /* 2395 * Note: the resources allocated in this function are freed in 2396 * srp_free_ch_ib(). 2397 */ 2398 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) 2399 { 2400 struct srp_target_port *target = ch->target; 2401 int i; 2402 2403 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring), 2404 GFP_KERNEL); 2405 if (!ch->rx_ring) 2406 goto err_no_ring; 2407 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring), 2408 GFP_KERNEL); 2409 if (!ch->tx_ring) 2410 goto err_no_ring; 2411 2412 for (i = 0; i < target->queue_size; ++i) { 2413 ch->rx_ring[i] = srp_alloc_iu(target->srp_host, 2414 ch->max_ti_iu_len, 2415 GFP_KERNEL, DMA_FROM_DEVICE); 2416 if (!ch->rx_ring[i]) 2417 goto err; 2418 } 2419 2420 for (i = 0; i < target->queue_size; ++i) { 2421 ch->tx_ring[i] = srp_alloc_iu(target->srp_host, 2422 target->max_iu_len, 2423 GFP_KERNEL, DMA_TO_DEVICE); 2424 if (!ch->tx_ring[i]) 2425 goto err; 2426 2427 list_add(&ch->tx_ring[i]->list, &ch->free_tx); 2428 } 2429 2430 return 0; 2431 2432 err: 2433 for (i = 0; i < target->queue_size; ++i) { 2434 srp_free_iu(target->srp_host, ch->rx_ring[i]); 2435 srp_free_iu(target->srp_host, ch->tx_ring[i]); 2436 } 2437 2438 2439 err_no_ring: 2440 kfree(ch->tx_ring); 2441 ch->tx_ring = NULL; 2442 kfree(ch->rx_ring); 2443 ch->rx_ring = NULL; 2444 2445 return -ENOMEM; 2446 } 2447 2448 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) 2449 { 2450 uint64_t T_tr_ns, max_compl_time_ms; 2451 uint32_t rq_tmo_jiffies; 2452 2453 /* 2454 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, 2455 * table 91), both the QP timeout and the retry count have to be set 2456 * for RC QP's during the RTR to RTS transition. 2457 */ 2458 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != 2459 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); 2460 2461 /* 2462 * Set target->rq_tmo_jiffies to one second more than the largest time 2463 * it can take before an error completion is generated. See also 2464 * C9-140..142 in the IBTA spec for more information about how to 2465 * convert the QP Local ACK Timeout value to nanoseconds. 2466 */ 2467 T_tr_ns = 4096 * (1ULL << qp_attr->timeout); 2468 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; 2469 do_div(max_compl_time_ms, NSEC_PER_MSEC); 2470 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); 2471 2472 return rq_tmo_jiffies; 2473 } 2474 2475 static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 2476 const struct srp_login_rsp *lrsp, 2477 struct srp_rdma_ch *ch) 2478 { 2479 struct srp_target_port *target = ch->target; 2480 struct ib_qp_attr *qp_attr = NULL; 2481 int attr_mask = 0; 2482 int ret = 0; 2483 int i; 2484 2485 if (lrsp->opcode == SRP_LOGIN_RSP) { 2486 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); 2487 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); 2488 2489 /* 2490 * Reserve credits for task management so we don't 2491 * bounce requests back to the SCSI mid-layer. 2492 */ 2493 target->scsi_host->can_queue 2494 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE, 2495 target->scsi_host->can_queue); 2496 target->scsi_host->cmd_per_lun 2497 = min_t(int, target->scsi_host->can_queue, 2498 target->scsi_host->cmd_per_lun); 2499 } else { 2500 shost_printk(KERN_WARNING, target->scsi_host, 2501 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 2502 ret = -ECONNRESET; 2503 goto error; 2504 } 2505 2506 if (!ch->rx_ring) { 2507 ret = srp_alloc_iu_bufs(ch); 2508 if (ret) 2509 goto error; 2510 } 2511 2512 for (i = 0; i < target->queue_size; i++) { 2513 struct srp_iu *iu = ch->rx_ring[i]; 2514 2515 ret = srp_post_recv(ch, iu); 2516 if (ret) 2517 goto error; 2518 } 2519 2520 if (!target->using_rdma_cm) { 2521 ret = -ENOMEM; 2522 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); 2523 if (!qp_attr) 2524 goto error; 2525 2526 qp_attr->qp_state = IB_QPS_RTR; 2527 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2528 if (ret) 2529 goto error_free; 2530 2531 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2532 if (ret) 2533 goto error_free; 2534 2535 qp_attr->qp_state = IB_QPS_RTS; 2536 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2537 if (ret) 2538 goto error_free; 2539 2540 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); 2541 2542 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2543 if (ret) 2544 goto error_free; 2545 2546 ret = ib_send_cm_rtu(cm_id, NULL, 0); 2547 } 2548 2549 error_free: 2550 kfree(qp_attr); 2551 2552 error: 2553 ch->status = ret; 2554 } 2555 2556 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, 2557 const struct ib_cm_event *event, 2558 struct srp_rdma_ch *ch) 2559 { 2560 struct srp_target_port *target = ch->target; 2561 struct Scsi_Host *shost = target->scsi_host; 2562 struct ib_class_port_info *cpi; 2563 int opcode; 2564 u16 dlid; 2565 2566 switch (event->param.rej_rcvd.reason) { 2567 case IB_CM_REJ_PORT_CM_REDIRECT: 2568 cpi = event->param.rej_rcvd.ari; 2569 dlid = be16_to_cpu(cpi->redirect_lid); 2570 sa_path_set_dlid(&ch->ib_cm.path, dlid); 2571 ch->ib_cm.path.pkey = cpi->redirect_pkey; 2572 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; 2573 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16); 2574 2575 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; 2576 break; 2577 2578 case IB_CM_REJ_PORT_REDIRECT: 2579 if (srp_target_is_topspin(target)) { 2580 union ib_gid *dgid = &ch->ib_cm.path.dgid; 2581 2582 /* 2583 * Topspin/Cisco SRP gateways incorrectly send 2584 * reject reason code 25 when they mean 24 2585 * (port redirect). 2586 */ 2587 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16); 2588 2589 shost_printk(KERN_DEBUG, shost, 2590 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 2591 be64_to_cpu(dgid->global.subnet_prefix), 2592 be64_to_cpu(dgid->global.interface_id)); 2593 2594 ch->status = SRP_PORT_REDIRECT; 2595 } else { 2596 shost_printk(KERN_WARNING, shost, 2597 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); 2598 ch->status = -ECONNRESET; 2599 } 2600 break; 2601 2602 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2603 shost_printk(KERN_WARNING, shost, 2604 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2605 ch->status = -ECONNRESET; 2606 break; 2607 2608 case IB_CM_REJ_CONSUMER_DEFINED: 2609 opcode = *(u8 *) event->private_data; 2610 if (opcode == SRP_LOGIN_REJ) { 2611 struct srp_login_rej *rej = event->private_data; 2612 u32 reason = be32_to_cpu(rej->reason); 2613 2614 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2615 shost_printk(KERN_WARNING, shost, 2616 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2617 else 2618 shost_printk(KERN_WARNING, shost, PFX 2619 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", 2620 target->sgid.raw, 2621 target->ib_cm.orig_dgid.raw, 2622 reason); 2623 } else 2624 shost_printk(KERN_WARNING, shost, 2625 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 2626 " opcode 0x%02x\n", opcode); 2627 ch->status = -ECONNRESET; 2628 break; 2629 2630 case IB_CM_REJ_STALE_CONN: 2631 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); 2632 ch->status = SRP_STALE_CONN; 2633 break; 2634 2635 default: 2636 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2637 event->param.rej_rcvd.reason); 2638 ch->status = -ECONNRESET; 2639 } 2640 } 2641 2642 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, 2643 const struct ib_cm_event *event) 2644 { 2645 struct srp_rdma_ch *ch = cm_id->context; 2646 struct srp_target_port *target = ch->target; 2647 int comp = 0; 2648 2649 switch (event->event) { 2650 case IB_CM_REQ_ERROR: 2651 shost_printk(KERN_DEBUG, target->scsi_host, 2652 PFX "Sending CM REQ failed\n"); 2653 comp = 1; 2654 ch->status = -ECONNRESET; 2655 break; 2656 2657 case IB_CM_REP_RECEIVED: 2658 comp = 1; 2659 srp_cm_rep_handler(cm_id, event->private_data, ch); 2660 break; 2661 2662 case IB_CM_REJ_RECEIVED: 2663 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2664 comp = 1; 2665 2666 srp_ib_cm_rej_handler(cm_id, event, ch); 2667 break; 2668 2669 case IB_CM_DREQ_RECEIVED: 2670 shost_printk(KERN_WARNING, target->scsi_host, 2671 PFX "DREQ received - connection closed\n"); 2672 ch->connected = false; 2673 if (ib_send_cm_drep(cm_id, NULL, 0)) 2674 shost_printk(KERN_ERR, target->scsi_host, 2675 PFX "Sending CM DREP failed\n"); 2676 queue_work(system_long_wq, &target->tl_err_work); 2677 break; 2678 2679 case IB_CM_TIMEWAIT_EXIT: 2680 shost_printk(KERN_ERR, target->scsi_host, 2681 PFX "connection closed\n"); 2682 comp = 1; 2683 2684 ch->status = 0; 2685 break; 2686 2687 case IB_CM_MRA_RECEIVED: 2688 case IB_CM_DREQ_ERROR: 2689 case IB_CM_DREP_RECEIVED: 2690 break; 2691 2692 default: 2693 shost_printk(KERN_WARNING, target->scsi_host, 2694 PFX "Unhandled CM event %d\n", event->event); 2695 break; 2696 } 2697 2698 if (comp) 2699 complete(&ch->done); 2700 2701 return 0; 2702 } 2703 2704 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch, 2705 struct rdma_cm_event *event) 2706 { 2707 struct srp_target_port *target = ch->target; 2708 struct Scsi_Host *shost = target->scsi_host; 2709 int opcode; 2710 2711 switch (event->status) { 2712 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2713 shost_printk(KERN_WARNING, shost, 2714 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2715 ch->status = -ECONNRESET; 2716 break; 2717 2718 case IB_CM_REJ_CONSUMER_DEFINED: 2719 opcode = *(u8 *) event->param.conn.private_data; 2720 if (opcode == SRP_LOGIN_REJ) { 2721 struct srp_login_rej *rej = 2722 (struct srp_login_rej *) 2723 event->param.conn.private_data; 2724 u32 reason = be32_to_cpu(rej->reason); 2725 2726 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2727 shost_printk(KERN_WARNING, shost, 2728 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2729 else 2730 shost_printk(KERN_WARNING, shost, 2731 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); 2732 } else { 2733 shost_printk(KERN_WARNING, shost, 2734 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n", 2735 opcode); 2736 } 2737 ch->status = -ECONNRESET; 2738 break; 2739 2740 case IB_CM_REJ_STALE_CONN: 2741 shost_printk(KERN_WARNING, shost, 2742 " REJ reason: stale connection\n"); 2743 ch->status = SRP_STALE_CONN; 2744 break; 2745 2746 default: 2747 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2748 event->status); 2749 ch->status = -ECONNRESET; 2750 break; 2751 } 2752 } 2753 2754 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 2755 struct rdma_cm_event *event) 2756 { 2757 struct srp_rdma_ch *ch = cm_id->context; 2758 struct srp_target_port *target = ch->target; 2759 int comp = 0; 2760 2761 switch (event->event) { 2762 case RDMA_CM_EVENT_ADDR_RESOLVED: 2763 ch->status = 0; 2764 comp = 1; 2765 break; 2766 2767 case RDMA_CM_EVENT_ADDR_ERROR: 2768 ch->status = -ENXIO; 2769 comp = 1; 2770 break; 2771 2772 case RDMA_CM_EVENT_ROUTE_RESOLVED: 2773 ch->status = 0; 2774 comp = 1; 2775 break; 2776 2777 case RDMA_CM_EVENT_ROUTE_ERROR: 2778 case RDMA_CM_EVENT_UNREACHABLE: 2779 ch->status = -EHOSTUNREACH; 2780 comp = 1; 2781 break; 2782 2783 case RDMA_CM_EVENT_CONNECT_ERROR: 2784 shost_printk(KERN_DEBUG, target->scsi_host, 2785 PFX "Sending CM REQ failed\n"); 2786 comp = 1; 2787 ch->status = -ECONNRESET; 2788 break; 2789 2790 case RDMA_CM_EVENT_ESTABLISHED: 2791 comp = 1; 2792 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch); 2793 break; 2794 2795 case RDMA_CM_EVENT_REJECTED: 2796 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2797 comp = 1; 2798 2799 srp_rdma_cm_rej_handler(ch, event); 2800 break; 2801 2802 case RDMA_CM_EVENT_DISCONNECTED: 2803 if (ch->connected) { 2804 shost_printk(KERN_WARNING, target->scsi_host, 2805 PFX "received DREQ\n"); 2806 rdma_disconnect(ch->rdma_cm.cm_id); 2807 comp = 1; 2808 ch->status = 0; 2809 queue_work(system_long_wq, &target->tl_err_work); 2810 } 2811 break; 2812 2813 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 2814 shost_printk(KERN_ERR, target->scsi_host, 2815 PFX "connection closed\n"); 2816 2817 comp = 1; 2818 ch->status = 0; 2819 break; 2820 2821 default: 2822 shost_printk(KERN_WARNING, target->scsi_host, 2823 PFX "Unhandled CM event %d\n", event->event); 2824 break; 2825 } 2826 2827 if (comp) 2828 complete(&ch->done); 2829 2830 return 0; 2831 } 2832 2833 /** 2834 * srp_change_queue_depth - setting device queue depth 2835 * @sdev: scsi device struct 2836 * @qdepth: requested queue depth 2837 * 2838 * Returns queue depth. 2839 */ 2840 static int 2841 srp_change_queue_depth(struct scsi_device *sdev, int qdepth) 2842 { 2843 if (!sdev->tagged_supported) 2844 qdepth = 1; 2845 return scsi_change_queue_depth(sdev, qdepth); 2846 } 2847 2848 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, 2849 u8 func, u8 *status) 2850 { 2851 struct srp_target_port *target = ch->target; 2852 struct srp_rport *rport = target->rport; 2853 struct ib_device *dev = target->srp_host->srp_dev->dev; 2854 struct srp_iu *iu; 2855 struct srp_tsk_mgmt *tsk_mgmt; 2856 int res; 2857 2858 if (!ch->connected || target->qp_in_error) 2859 return -1; 2860 2861 /* 2862 * Lock the rport mutex to avoid that srp_create_ch_ib() is 2863 * invoked while a task management function is being sent. 2864 */ 2865 mutex_lock(&rport->mutex); 2866 spin_lock_irq(&ch->lock); 2867 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT); 2868 spin_unlock_irq(&ch->lock); 2869 2870 if (!iu) { 2871 mutex_unlock(&rport->mutex); 2872 2873 return -1; 2874 } 2875 2876 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 2877 DMA_TO_DEVICE); 2878 tsk_mgmt = iu->buf; 2879 memset(tsk_mgmt, 0, sizeof *tsk_mgmt); 2880 2881 tsk_mgmt->opcode = SRP_TSK_MGMT; 2882 int_to_scsilun(lun, &tsk_mgmt->lun); 2883 tsk_mgmt->tsk_mgmt_func = func; 2884 tsk_mgmt->task_tag = req_tag; 2885 2886 spin_lock_irq(&ch->lock); 2887 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT; 2888 tsk_mgmt->tag = ch->tsk_mgmt_tag; 2889 spin_unlock_irq(&ch->lock); 2890 2891 init_completion(&ch->tsk_mgmt_done); 2892 2893 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, 2894 DMA_TO_DEVICE); 2895 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { 2896 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT); 2897 mutex_unlock(&rport->mutex); 2898 2899 return -1; 2900 } 2901 res = wait_for_completion_timeout(&ch->tsk_mgmt_done, 2902 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)); 2903 if (res > 0 && status) 2904 *status = ch->tsk_mgmt_status; 2905 mutex_unlock(&rport->mutex); 2906 2907 WARN_ON_ONCE(res < 0); 2908 2909 return res > 0 ? 0 : -1; 2910 } 2911 2912 static int srp_abort(struct scsi_cmnd *scmnd) 2913 { 2914 struct srp_target_port *target = host_to_target(scmnd->device->host); 2915 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 2916 u32 tag; 2917 u16 ch_idx; 2918 struct srp_rdma_ch *ch; 2919 int ret; 2920 2921 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 2922 2923 if (!req) 2924 return SUCCESS; 2925 tag = blk_mq_unique_tag(scmnd->request); 2926 ch_idx = blk_mq_unique_tag_to_hwq(tag); 2927 if (WARN_ON_ONCE(ch_idx >= target->ch_count)) 2928 return SUCCESS; 2929 ch = &target->ch[ch_idx]; 2930 if (!srp_claim_req(ch, req, NULL, scmnd)) 2931 return SUCCESS; 2932 shost_printk(KERN_ERR, target->scsi_host, 2933 "Sending SRP abort for tag %#x\n", tag); 2934 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, 2935 SRP_TSK_ABORT_TASK, NULL) == 0) 2936 ret = SUCCESS; 2937 else if (target->rport->state == SRP_RPORT_LOST) 2938 ret = FAST_IO_FAIL; 2939 else 2940 ret = FAILED; 2941 if (ret == SUCCESS) { 2942 srp_free_req(ch, req, scmnd, 0); 2943 scmnd->result = DID_ABORT << 16; 2944 scmnd->scsi_done(scmnd); 2945 } 2946 2947 return ret; 2948 } 2949 2950 static int srp_reset_device(struct scsi_cmnd *scmnd) 2951 { 2952 struct srp_target_port *target = host_to_target(scmnd->device->host); 2953 struct srp_rdma_ch *ch; 2954 int i; 2955 u8 status; 2956 2957 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); 2958 2959 ch = &target->ch[0]; 2960 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, 2961 SRP_TSK_LUN_RESET, &status)) 2962 return FAILED; 2963 if (status) 2964 return FAILED; 2965 2966 for (i = 0; i < target->ch_count; i++) { 2967 ch = &target->ch[i]; 2968 for (i = 0; i < target->req_ring_size; ++i) { 2969 struct srp_request *req = &ch->req_ring[i]; 2970 2971 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); 2972 } 2973 } 2974 2975 return SUCCESS; 2976 } 2977 2978 static int srp_reset_host(struct scsi_cmnd *scmnd) 2979 { 2980 struct srp_target_port *target = host_to_target(scmnd->device->host); 2981 2982 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 2983 2984 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 2985 } 2986 2987 static int srp_target_alloc(struct scsi_target *starget) 2988 { 2989 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); 2990 struct srp_target_port *target = host_to_target(shost); 2991 2992 if (target->target_can_queue) 2993 starget->can_queue = target->target_can_queue; 2994 return 0; 2995 } 2996 2997 static int srp_slave_alloc(struct scsi_device *sdev) 2998 { 2999 struct Scsi_Host *shost = sdev->host; 3000 struct srp_target_port *target = host_to_target(shost); 3001 struct srp_device *srp_dev = target->srp_host->srp_dev; 3002 struct ib_device *ibdev = srp_dev->dev; 3003 3004 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 3005 blk_queue_virt_boundary(sdev->request_queue, 3006 ~srp_dev->mr_page_mask); 3007 3008 return 0; 3009 } 3010 3011 static int srp_slave_configure(struct scsi_device *sdev) 3012 { 3013 struct Scsi_Host *shost = sdev->host; 3014 struct srp_target_port *target = host_to_target(shost); 3015 struct request_queue *q = sdev->request_queue; 3016 unsigned long timeout; 3017 3018 if (sdev->type == TYPE_DISK) { 3019 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); 3020 blk_queue_rq_timeout(q, timeout); 3021 } 3022 3023 return 0; 3024 } 3025 3026 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 3027 char *buf) 3028 { 3029 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3030 3031 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 3032 } 3033 3034 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, 3035 char *buf) 3036 { 3037 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3038 3039 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 3040 } 3041 3042 static ssize_t show_service_id(struct device *dev, 3043 struct device_attribute *attr, char *buf) 3044 { 3045 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3046 3047 if (target->using_rdma_cm) 3048 return -ENOENT; 3049 return sprintf(buf, "0x%016llx\n", 3050 be64_to_cpu(target->ib_cm.service_id)); 3051 } 3052 3053 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, 3054 char *buf) 3055 { 3056 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3057 3058 if (target->using_rdma_cm) 3059 return -ENOENT; 3060 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); 3061 } 3062 3063 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, 3064 char *buf) 3065 { 3066 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3067 3068 return sprintf(buf, "%pI6\n", target->sgid.raw); 3069 } 3070 3071 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 3072 char *buf) 3073 { 3074 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3075 struct srp_rdma_ch *ch = &target->ch[0]; 3076 3077 if (target->using_rdma_cm) 3078 return -ENOENT; 3079 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); 3080 } 3081 3082 static ssize_t show_orig_dgid(struct device *dev, 3083 struct device_attribute *attr, char *buf) 3084 { 3085 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3086 3087 if (target->using_rdma_cm) 3088 return -ENOENT; 3089 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); 3090 } 3091 3092 static ssize_t show_req_lim(struct device *dev, 3093 struct device_attribute *attr, char *buf) 3094 { 3095 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3096 struct srp_rdma_ch *ch; 3097 int i, req_lim = INT_MAX; 3098 3099 for (i = 0; i < target->ch_count; i++) { 3100 ch = &target->ch[i]; 3101 req_lim = min(req_lim, ch->req_lim); 3102 } 3103 return sprintf(buf, "%d\n", req_lim); 3104 } 3105 3106 static ssize_t show_zero_req_lim(struct device *dev, 3107 struct device_attribute *attr, char *buf) 3108 { 3109 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3110 3111 return sprintf(buf, "%d\n", target->zero_req_lim); 3112 } 3113 3114 static ssize_t show_local_ib_port(struct device *dev, 3115 struct device_attribute *attr, char *buf) 3116 { 3117 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3118 3119 return sprintf(buf, "%d\n", target->srp_host->port); 3120 } 3121 3122 static ssize_t show_local_ib_device(struct device *dev, 3123 struct device_attribute *attr, char *buf) 3124 { 3125 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3126 3127 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); 3128 } 3129 3130 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, 3131 char *buf) 3132 { 3133 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3134 3135 return sprintf(buf, "%d\n", target->ch_count); 3136 } 3137 3138 static ssize_t show_comp_vector(struct device *dev, 3139 struct device_attribute *attr, char *buf) 3140 { 3141 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3142 3143 return sprintf(buf, "%d\n", target->comp_vector); 3144 } 3145 3146 static ssize_t show_tl_retry_count(struct device *dev, 3147 struct device_attribute *attr, char *buf) 3148 { 3149 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3150 3151 return sprintf(buf, "%d\n", target->tl_retry_count); 3152 } 3153 3154 static ssize_t show_cmd_sg_entries(struct device *dev, 3155 struct device_attribute *attr, char *buf) 3156 { 3157 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3158 3159 return sprintf(buf, "%u\n", target->cmd_sg_cnt); 3160 } 3161 3162 static ssize_t show_allow_ext_sg(struct device *dev, 3163 struct device_attribute *attr, char *buf) 3164 { 3165 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3166 3167 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 3168 } 3169 3170 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 3171 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 3172 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 3173 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 3174 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); 3175 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 3176 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 3177 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 3178 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 3179 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 3180 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 3181 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); 3182 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 3183 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); 3184 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 3185 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 3186 3187 static struct device_attribute *srp_host_attrs[] = { 3188 &dev_attr_id_ext, 3189 &dev_attr_ioc_guid, 3190 &dev_attr_service_id, 3191 &dev_attr_pkey, 3192 &dev_attr_sgid, 3193 &dev_attr_dgid, 3194 &dev_attr_orig_dgid, 3195 &dev_attr_req_lim, 3196 &dev_attr_zero_req_lim, 3197 &dev_attr_local_ib_port, 3198 &dev_attr_local_ib_device, 3199 &dev_attr_ch_count, 3200 &dev_attr_comp_vector, 3201 &dev_attr_tl_retry_count, 3202 &dev_attr_cmd_sg_entries, 3203 &dev_attr_allow_ext_sg, 3204 NULL 3205 }; 3206 3207 static struct scsi_host_template srp_template = { 3208 .module = THIS_MODULE, 3209 .name = "InfiniBand SRP initiator", 3210 .proc_name = DRV_NAME, 3211 .target_alloc = srp_target_alloc, 3212 .slave_alloc = srp_slave_alloc, 3213 .slave_configure = srp_slave_configure, 3214 .info = srp_target_info, 3215 .queuecommand = srp_queuecommand, 3216 .change_queue_depth = srp_change_queue_depth, 3217 .eh_timed_out = srp_timed_out, 3218 .eh_abort_handler = srp_abort, 3219 .eh_device_reset_handler = srp_reset_device, 3220 .eh_host_reset_handler = srp_reset_host, 3221 .skip_settle_delay = true, 3222 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 3223 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, 3224 .this_id = -1, 3225 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, 3226 .use_clustering = ENABLE_CLUSTERING, 3227 .shost_attrs = srp_host_attrs, 3228 .track_queue_depth = 1, 3229 }; 3230 3231 static int srp_sdev_count(struct Scsi_Host *host) 3232 { 3233 struct scsi_device *sdev; 3234 int c = 0; 3235 3236 shost_for_each_device(sdev, host) 3237 c++; 3238 3239 return c; 3240 } 3241 3242 /* 3243 * Return values: 3244 * < 0 upon failure. Caller is responsible for SRP target port cleanup. 3245 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port 3246 * removal has been scheduled. 3247 * 0 and target->state != SRP_TARGET_REMOVED upon success. 3248 */ 3249 static int srp_add_target(struct srp_host *host, struct srp_target_port *target) 3250 { 3251 struct srp_rport_identifiers ids; 3252 struct srp_rport *rport; 3253 3254 target->state = SRP_TARGET_SCANNING; 3255 sprintf(target->target_name, "SRP.T10:%016llX", 3256 be64_to_cpu(target->id_ext)); 3257 3258 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent)) 3259 return -ENODEV; 3260 3261 memcpy(ids.port_id, &target->id_ext, 8); 3262 memcpy(ids.port_id + 8, &target->ioc_guid, 8); 3263 ids.roles = SRP_RPORT_ROLE_TARGET; 3264 rport = srp_rport_add(target->scsi_host, &ids); 3265 if (IS_ERR(rport)) { 3266 scsi_remove_host(target->scsi_host); 3267 return PTR_ERR(rport); 3268 } 3269 3270 rport->lld_data = target; 3271 target->rport = rport; 3272 3273 spin_lock(&host->target_lock); 3274 list_add_tail(&target->list, &host->target_list); 3275 spin_unlock(&host->target_lock); 3276 3277 scsi_scan_target(&target->scsi_host->shost_gendev, 3278 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); 3279 3280 if (srp_connected_ch(target) < target->ch_count || 3281 target->qp_in_error) { 3282 shost_printk(KERN_INFO, target->scsi_host, 3283 PFX "SCSI scan failed - removing SCSI host\n"); 3284 srp_queue_remove_work(target); 3285 goto out; 3286 } 3287 3288 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", 3289 dev_name(&target->scsi_host->shost_gendev), 3290 srp_sdev_count(target->scsi_host)); 3291 3292 spin_lock_irq(&target->lock); 3293 if (target->state == SRP_TARGET_SCANNING) 3294 target->state = SRP_TARGET_LIVE; 3295 spin_unlock_irq(&target->lock); 3296 3297 out: 3298 return 0; 3299 } 3300 3301 static void srp_release_dev(struct device *dev) 3302 { 3303 struct srp_host *host = 3304 container_of(dev, struct srp_host, dev); 3305 3306 complete(&host->released); 3307 } 3308 3309 static struct class srp_class = { 3310 .name = "infiniband_srp", 3311 .dev_release = srp_release_dev 3312 }; 3313 3314 /** 3315 * srp_conn_unique() - check whether the connection to a target is unique 3316 * @host: SRP host. 3317 * @target: SRP target port. 3318 */ 3319 static bool srp_conn_unique(struct srp_host *host, 3320 struct srp_target_port *target) 3321 { 3322 struct srp_target_port *t; 3323 bool ret = false; 3324 3325 if (target->state == SRP_TARGET_REMOVED) 3326 goto out; 3327 3328 ret = true; 3329 3330 spin_lock(&host->target_lock); 3331 list_for_each_entry(t, &host->target_list, list) { 3332 if (t != target && 3333 target->id_ext == t->id_ext && 3334 target->ioc_guid == t->ioc_guid && 3335 target->initiator_ext == t->initiator_ext) { 3336 ret = false; 3337 break; 3338 } 3339 } 3340 spin_unlock(&host->target_lock); 3341 3342 out: 3343 return ret; 3344 } 3345 3346 /* 3347 * Target ports are added by writing 3348 * 3349 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, 3350 * pkey=<P_Key>,service_id=<service ID> 3351 * or 3352 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>, 3353 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number> 3354 * 3355 * to the add_target sysfs attribute. 3356 */ 3357 enum { 3358 SRP_OPT_ERR = 0, 3359 SRP_OPT_ID_EXT = 1 << 0, 3360 SRP_OPT_IOC_GUID = 1 << 1, 3361 SRP_OPT_DGID = 1 << 2, 3362 SRP_OPT_PKEY = 1 << 3, 3363 SRP_OPT_SERVICE_ID = 1 << 4, 3364 SRP_OPT_MAX_SECT = 1 << 5, 3365 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, 3366 SRP_OPT_IO_CLASS = 1 << 7, 3367 SRP_OPT_INITIATOR_EXT = 1 << 8, 3368 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 3369 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 3370 SRP_OPT_SG_TABLESIZE = 1 << 11, 3371 SRP_OPT_COMP_VECTOR = 1 << 12, 3372 SRP_OPT_TL_RETRY_COUNT = 1 << 13, 3373 SRP_OPT_QUEUE_SIZE = 1 << 14, 3374 SRP_OPT_IP_SRC = 1 << 15, 3375 SRP_OPT_IP_DEST = 1 << 16, 3376 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, 3377 }; 3378 3379 static unsigned int srp_opt_mandatory[] = { 3380 SRP_OPT_ID_EXT | 3381 SRP_OPT_IOC_GUID | 3382 SRP_OPT_DGID | 3383 SRP_OPT_PKEY | 3384 SRP_OPT_SERVICE_ID, 3385 SRP_OPT_ID_EXT | 3386 SRP_OPT_IOC_GUID | 3387 SRP_OPT_IP_DEST, 3388 }; 3389 3390 static const match_table_t srp_opt_tokens = { 3391 { SRP_OPT_ID_EXT, "id_ext=%s" }, 3392 { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, 3393 { SRP_OPT_DGID, "dgid=%s" }, 3394 { SRP_OPT_PKEY, "pkey=%x" }, 3395 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 3396 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 3397 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, 3398 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" }, 3399 { SRP_OPT_IO_CLASS, "io_class=%x" }, 3400 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, 3401 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 3402 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 3403 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 3404 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 3405 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, 3406 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, 3407 { SRP_OPT_IP_SRC, "src=%s" }, 3408 { SRP_OPT_IP_DEST, "dest=%s" }, 3409 { SRP_OPT_ERR, NULL } 3410 }; 3411 3412 /** 3413 * srp_parse_in - parse an IP address and port number combination 3414 * 3415 * Parse the following address formats: 3416 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. 3417 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5. 3418 */ 3419 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa, 3420 const char *addr_port_str) 3421 { 3422 char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL); 3423 char *port_str; 3424 int ret; 3425 3426 if (!addr) 3427 return -ENOMEM; 3428 port_str = strrchr(addr, ':'); 3429 if (!port_str) 3430 return -EINVAL; 3431 *port_str++ = '\0'; 3432 ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa); 3433 if (ret && addr[0]) { 3434 addr_end = addr + strlen(addr) - 1; 3435 if (addr[0] == '[' && *addr_end == ']') { 3436 *addr_end = '\0'; 3437 ret = inet_pton_with_scope(net, AF_INET6, addr + 1, 3438 port_str, sa); 3439 } 3440 } 3441 kfree(addr); 3442 pr_debug("%s -> %pISpfsc\n", addr_port_str, sa); 3443 return ret; 3444 } 3445 3446 static int srp_parse_options(struct net *net, const char *buf, 3447 struct srp_target_port *target) 3448 { 3449 char *options, *sep_opt; 3450 char *p; 3451 substring_t args[MAX_OPT_ARGS]; 3452 unsigned long long ull; 3453 int opt_mask = 0; 3454 int token; 3455 int ret = -EINVAL; 3456 int i; 3457 3458 options = kstrdup(buf, GFP_KERNEL); 3459 if (!options) 3460 return -ENOMEM; 3461 3462 sep_opt = options; 3463 while ((p = strsep(&sep_opt, ",\n")) != NULL) { 3464 if (!*p) 3465 continue; 3466 3467 token = match_token(p, srp_opt_tokens, args); 3468 opt_mask |= token; 3469 3470 switch (token) { 3471 case SRP_OPT_ID_EXT: 3472 p = match_strdup(args); 3473 if (!p) { 3474 ret = -ENOMEM; 3475 goto out; 3476 } 3477 ret = kstrtoull(p, 16, &ull); 3478 if (ret) { 3479 pr_warn("invalid id_ext parameter '%s'\n", p); 3480 kfree(p); 3481 goto out; 3482 } 3483 target->id_ext = cpu_to_be64(ull); 3484 kfree(p); 3485 break; 3486 3487 case SRP_OPT_IOC_GUID: 3488 p = match_strdup(args); 3489 if (!p) { 3490 ret = -ENOMEM; 3491 goto out; 3492 } 3493 ret = kstrtoull(p, 16, &ull); 3494 if (ret) { 3495 pr_warn("invalid ioc_guid parameter '%s'\n", p); 3496 kfree(p); 3497 goto out; 3498 } 3499 target->ioc_guid = cpu_to_be64(ull); 3500 kfree(p); 3501 break; 3502 3503 case SRP_OPT_DGID: 3504 p = match_strdup(args); 3505 if (!p) { 3506 ret = -ENOMEM; 3507 goto out; 3508 } 3509 if (strlen(p) != 32) { 3510 pr_warn("bad dest GID parameter '%s'\n", p); 3511 kfree(p); 3512 goto out; 3513 } 3514 3515 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16); 3516 kfree(p); 3517 if (ret < 0) 3518 goto out; 3519 break; 3520 3521 case SRP_OPT_PKEY: 3522 if (match_hex(args, &token)) { 3523 pr_warn("bad P_Key parameter '%s'\n", p); 3524 goto out; 3525 } 3526 target->ib_cm.pkey = cpu_to_be16(token); 3527 break; 3528 3529 case SRP_OPT_SERVICE_ID: 3530 p = match_strdup(args); 3531 if (!p) { 3532 ret = -ENOMEM; 3533 goto out; 3534 } 3535 ret = kstrtoull(p, 16, &ull); 3536 if (ret) { 3537 pr_warn("bad service_id parameter '%s'\n", p); 3538 kfree(p); 3539 goto out; 3540 } 3541 target->ib_cm.service_id = cpu_to_be64(ull); 3542 kfree(p); 3543 break; 3544 3545 case SRP_OPT_IP_SRC: 3546 p = match_strdup(args); 3547 if (!p) { 3548 ret = -ENOMEM; 3549 goto out; 3550 } 3551 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p); 3552 if (ret < 0) { 3553 pr_warn("bad source parameter '%s'\n", p); 3554 kfree(p); 3555 goto out; 3556 } 3557 target->rdma_cm.src_specified = true; 3558 kfree(p); 3559 break; 3560 3561 case SRP_OPT_IP_DEST: 3562 p = match_strdup(args); 3563 if (!p) { 3564 ret = -ENOMEM; 3565 goto out; 3566 } 3567 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p); 3568 if (ret < 0) { 3569 pr_warn("bad dest parameter '%s'\n", p); 3570 kfree(p); 3571 goto out; 3572 } 3573 target->using_rdma_cm = true; 3574 kfree(p); 3575 break; 3576 3577 case SRP_OPT_MAX_SECT: 3578 if (match_int(args, &token)) { 3579 pr_warn("bad max sect parameter '%s'\n", p); 3580 goto out; 3581 } 3582 target->scsi_host->max_sectors = token; 3583 break; 3584 3585 case SRP_OPT_QUEUE_SIZE: 3586 if (match_int(args, &token) || token < 1) { 3587 pr_warn("bad queue_size parameter '%s'\n", p); 3588 goto out; 3589 } 3590 target->scsi_host->can_queue = token; 3591 target->queue_size = token + SRP_RSP_SQ_SIZE + 3592 SRP_TSK_MGMT_SQ_SIZE; 3593 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3594 target->scsi_host->cmd_per_lun = token; 3595 break; 3596 3597 case SRP_OPT_MAX_CMD_PER_LUN: 3598 if (match_int(args, &token) || token < 1) { 3599 pr_warn("bad max cmd_per_lun parameter '%s'\n", 3600 p); 3601 goto out; 3602 } 3603 target->scsi_host->cmd_per_lun = token; 3604 break; 3605 3606 case SRP_OPT_TARGET_CAN_QUEUE: 3607 if (match_int(args, &token) || token < 1) { 3608 pr_warn("bad max target_can_queue parameter '%s'\n", 3609 p); 3610 goto out; 3611 } 3612 target->target_can_queue = token; 3613 break; 3614 3615 case SRP_OPT_IO_CLASS: 3616 if (match_hex(args, &token)) { 3617 pr_warn("bad IO class parameter '%s'\n", p); 3618 goto out; 3619 } 3620 if (token != SRP_REV10_IB_IO_CLASS && 3621 token != SRP_REV16A_IB_IO_CLASS) { 3622 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 3623 token, SRP_REV10_IB_IO_CLASS, 3624 SRP_REV16A_IB_IO_CLASS); 3625 goto out; 3626 } 3627 target->io_class = token; 3628 break; 3629 3630 case SRP_OPT_INITIATOR_EXT: 3631 p = match_strdup(args); 3632 if (!p) { 3633 ret = -ENOMEM; 3634 goto out; 3635 } 3636 ret = kstrtoull(p, 16, &ull); 3637 if (ret) { 3638 pr_warn("bad initiator_ext value '%s'\n", p); 3639 kfree(p); 3640 goto out; 3641 } 3642 target->initiator_ext = cpu_to_be64(ull); 3643 kfree(p); 3644 break; 3645 3646 case SRP_OPT_CMD_SG_ENTRIES: 3647 if (match_int(args, &token) || token < 1 || token > 255) { 3648 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 3649 p); 3650 goto out; 3651 } 3652 target->cmd_sg_cnt = token; 3653 break; 3654 3655 case SRP_OPT_ALLOW_EXT_SG: 3656 if (match_int(args, &token)) { 3657 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 3658 goto out; 3659 } 3660 target->allow_ext_sg = !!token; 3661 break; 3662 3663 case SRP_OPT_SG_TABLESIZE: 3664 if (match_int(args, &token) || token < 1 || 3665 token > SG_MAX_SEGMENTS) { 3666 pr_warn("bad max sg_tablesize parameter '%s'\n", 3667 p); 3668 goto out; 3669 } 3670 target->sg_tablesize = token; 3671 break; 3672 3673 case SRP_OPT_COMP_VECTOR: 3674 if (match_int(args, &token) || token < 0) { 3675 pr_warn("bad comp_vector parameter '%s'\n", p); 3676 goto out; 3677 } 3678 target->comp_vector = token; 3679 break; 3680 3681 case SRP_OPT_TL_RETRY_COUNT: 3682 if (match_int(args, &token) || token < 2 || token > 7) { 3683 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", 3684 p); 3685 goto out; 3686 } 3687 target->tl_retry_count = token; 3688 break; 3689 3690 default: 3691 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 3692 p); 3693 goto out; 3694 } 3695 } 3696 3697 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) { 3698 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) { 3699 ret = 0; 3700 break; 3701 } 3702 } 3703 if (ret) 3704 pr_warn("target creation request is missing one or more parameters\n"); 3705 3706 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue 3707 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3708 pr_warn("cmd_per_lun = %d > queue_size = %d\n", 3709 target->scsi_host->cmd_per_lun, 3710 target->scsi_host->can_queue); 3711 3712 out: 3713 kfree(options); 3714 return ret; 3715 } 3716 3717 static ssize_t srp_create_target(struct device *dev, 3718 struct device_attribute *attr, 3719 const char *buf, size_t count) 3720 { 3721 struct srp_host *host = 3722 container_of(dev, struct srp_host, dev); 3723 struct Scsi_Host *target_host; 3724 struct srp_target_port *target; 3725 struct srp_rdma_ch *ch; 3726 struct srp_device *srp_dev = host->srp_dev; 3727 struct ib_device *ibdev = srp_dev->dev; 3728 int ret, node_idx, node, cpu, i; 3729 unsigned int max_sectors_per_mr, mr_per_cmd = 0; 3730 bool multich = false; 3731 3732 target_host = scsi_host_alloc(&srp_template, 3733 sizeof (struct srp_target_port)); 3734 if (!target_host) 3735 return -ENOMEM; 3736 3737 target_host->transportt = ib_srp_transport_template; 3738 target_host->max_channel = 0; 3739 target_host->max_id = 1; 3740 target_host->max_lun = -1LL; 3741 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; 3742 3743 target = host_to_target(target_host); 3744 3745 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); 3746 target->io_class = SRP_REV16A_IB_IO_CLASS; 3747 target->scsi_host = target_host; 3748 target->srp_host = host; 3749 target->lkey = host->srp_dev->pd->local_dma_lkey; 3750 target->global_rkey = host->srp_dev->global_rkey; 3751 target->cmd_sg_cnt = cmd_sg_entries; 3752 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 3753 target->allow_ext_sg = allow_ext_sg; 3754 target->tl_retry_count = 7; 3755 target->queue_size = SRP_DEFAULT_QUEUE_SIZE; 3756 3757 /* 3758 * Avoid that the SCSI host can be removed by srp_remove_target() 3759 * before this function returns. 3760 */ 3761 scsi_host_get(target->scsi_host); 3762 3763 ret = mutex_lock_interruptible(&host->add_target_mutex); 3764 if (ret < 0) 3765 goto put; 3766 3767 ret = srp_parse_options(target->net, buf, target); 3768 if (ret) 3769 goto out; 3770 3771 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; 3772 3773 if (!srp_conn_unique(target->srp_host, target)) { 3774 if (target->using_rdma_cm) { 3775 shost_printk(KERN_INFO, target->scsi_host, 3776 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n", 3777 be64_to_cpu(target->id_ext), 3778 be64_to_cpu(target->ioc_guid), 3779 &target->rdma_cm.dst); 3780 } else { 3781 shost_printk(KERN_INFO, target->scsi_host, 3782 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 3783 be64_to_cpu(target->id_ext), 3784 be64_to_cpu(target->ioc_guid), 3785 be64_to_cpu(target->initiator_ext)); 3786 } 3787 ret = -EEXIST; 3788 goto out; 3789 } 3790 3791 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && 3792 target->cmd_sg_cnt < target->sg_tablesize) { 3793 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 3794 target->sg_tablesize = target->cmd_sg_cnt; 3795 } 3796 3797 if (srp_dev->use_fast_reg || srp_dev->use_fmr) { 3798 bool gaps_reg = (ibdev->attrs.device_cap_flags & 3799 IB_DEVICE_SG_GAPS_REG); 3800 3801 max_sectors_per_mr = srp_dev->max_pages_per_mr << 3802 (ilog2(srp_dev->mr_page_size) - 9); 3803 if (!gaps_reg) { 3804 /* 3805 * FR and FMR can only map one HCA page per entry. If 3806 * the start address is not aligned on a HCA page 3807 * boundary two entries will be used for the head and 3808 * the tail although these two entries combined 3809 * contain at most one HCA page of data. Hence the "+ 3810 * 1" in the calculation below. 3811 * 3812 * The indirect data buffer descriptor is contiguous 3813 * so the memory for that buffer will only be 3814 * registered if register_always is true. Hence add 3815 * one to mr_per_cmd if register_always has been set. 3816 */ 3817 mr_per_cmd = register_always + 3818 (target->scsi_host->max_sectors + 1 + 3819 max_sectors_per_mr - 1) / max_sectors_per_mr; 3820 } else { 3821 mr_per_cmd = register_always + 3822 (target->sg_tablesize + 3823 srp_dev->max_pages_per_mr - 1) / 3824 srp_dev->max_pages_per_mr; 3825 } 3826 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", 3827 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size, 3828 max_sectors_per_mr, mr_per_cmd); 3829 } 3830 3831 target_host->sg_tablesize = target->sg_tablesize; 3832 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; 3833 target->mr_per_cmd = mr_per_cmd; 3834 target->indirect_size = target->sg_tablesize * 3835 sizeof (struct srp_direct_buf); 3836 target->max_iu_len = sizeof (struct srp_cmd) + 3837 sizeof (struct srp_indirect_buf) + 3838 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 3839 3840 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3841 INIT_WORK(&target->remove_work, srp_remove_work); 3842 spin_lock_init(&target->lock); 3843 ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); 3844 if (ret) 3845 goto out; 3846 3847 ret = -ENOMEM; 3848 target->ch_count = max_t(unsigned, num_online_nodes(), 3849 min(ch_count ? : 3850 min(4 * num_online_nodes(), 3851 ibdev->num_comp_vectors), 3852 num_online_cpus())); 3853 target->ch = kcalloc(target->ch_count, sizeof(*target->ch), 3854 GFP_KERNEL); 3855 if (!target->ch) 3856 goto out; 3857 3858 node_idx = 0; 3859 for_each_online_node(node) { 3860 const int ch_start = (node_idx * target->ch_count / 3861 num_online_nodes()); 3862 const int ch_end = ((node_idx + 1) * target->ch_count / 3863 num_online_nodes()); 3864 const int cv_start = node_idx * ibdev->num_comp_vectors / 3865 num_online_nodes(); 3866 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / 3867 num_online_nodes(); 3868 int cpu_idx = 0; 3869 3870 for_each_online_cpu(cpu) { 3871 if (cpu_to_node(cpu) != node) 3872 continue; 3873 if (ch_start + cpu_idx >= ch_end) 3874 continue; 3875 ch = &target->ch[ch_start + cpu_idx]; 3876 ch->target = target; 3877 ch->comp_vector = cv_start == cv_end ? cv_start : 3878 cv_start + cpu_idx % (cv_end - cv_start); 3879 spin_lock_init(&ch->lock); 3880 INIT_LIST_HEAD(&ch->free_tx); 3881 ret = srp_new_cm_id(ch); 3882 if (ret) 3883 goto err_disconnect; 3884 3885 ret = srp_create_ch_ib(ch); 3886 if (ret) 3887 goto err_disconnect; 3888 3889 ret = srp_alloc_req_data(ch); 3890 if (ret) 3891 goto err_disconnect; 3892 3893 ret = srp_connect_ch(ch, multich); 3894 if (ret) { 3895 char dst[64]; 3896 3897 if (target->using_rdma_cm) 3898 snprintf(dst, sizeof(dst), "%pIS", 3899 &target->rdma_cm.dst); 3900 else 3901 snprintf(dst, sizeof(dst), "%pI6", 3902 target->ib_cm.orig_dgid.raw); 3903 shost_printk(KERN_ERR, target->scsi_host, 3904 PFX "Connection %d/%d to %s failed\n", 3905 ch_start + cpu_idx, 3906 target->ch_count, dst); 3907 if (node_idx == 0 && cpu_idx == 0) { 3908 goto free_ch; 3909 } else { 3910 srp_free_ch_ib(target, ch); 3911 srp_free_req_data(target, ch); 3912 target->ch_count = ch - target->ch; 3913 goto connected; 3914 } 3915 } 3916 3917 multich = true; 3918 cpu_idx++; 3919 } 3920 node_idx++; 3921 } 3922 3923 connected: 3924 target->scsi_host->nr_hw_queues = target->ch_count; 3925 3926 ret = srp_add_target(host, target); 3927 if (ret) 3928 goto err_disconnect; 3929 3930 if (target->state != SRP_TARGET_REMOVED) { 3931 if (target->using_rdma_cm) { 3932 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3933 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n", 3934 be64_to_cpu(target->id_ext), 3935 be64_to_cpu(target->ioc_guid), 3936 target->sgid.raw, &target->rdma_cm.dst); 3937 } else { 3938 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3939 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", 3940 be64_to_cpu(target->id_ext), 3941 be64_to_cpu(target->ioc_guid), 3942 be16_to_cpu(target->ib_cm.pkey), 3943 be64_to_cpu(target->ib_cm.service_id), 3944 target->sgid.raw, 3945 target->ib_cm.orig_dgid.raw); 3946 } 3947 } 3948 3949 ret = count; 3950 3951 out: 3952 mutex_unlock(&host->add_target_mutex); 3953 3954 put: 3955 scsi_host_put(target->scsi_host); 3956 if (ret < 0) { 3957 /* 3958 * If a call to srp_remove_target() has not been scheduled, 3959 * drop the network namespace reference now that was obtained 3960 * earlier in this function. 3961 */ 3962 if (target->state != SRP_TARGET_REMOVED) 3963 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); 3964 scsi_host_put(target->scsi_host); 3965 } 3966 3967 return ret; 3968 3969 err_disconnect: 3970 srp_disconnect_target(target); 3971 3972 free_ch: 3973 for (i = 0; i < target->ch_count; i++) { 3974 ch = &target->ch[i]; 3975 srp_free_ch_ib(target, ch); 3976 srp_free_req_data(target, ch); 3977 } 3978 3979 kfree(target->ch); 3980 goto out; 3981 } 3982 3983 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); 3984 3985 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, 3986 char *buf) 3987 { 3988 struct srp_host *host = container_of(dev, struct srp_host, dev); 3989 3990 return sprintf(buf, "%s\n", host->srp_dev->dev->name); 3991 } 3992 3993 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 3994 3995 static ssize_t show_port(struct device *dev, struct device_attribute *attr, 3996 char *buf) 3997 { 3998 struct srp_host *host = container_of(dev, struct srp_host, dev); 3999 4000 return sprintf(buf, "%d\n", host->port); 4001 } 4002 4003 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 4004 4005 static struct srp_host *srp_add_port(struct srp_device *device, u8 port) 4006 { 4007 struct srp_host *host; 4008 4009 host = kzalloc(sizeof *host, GFP_KERNEL); 4010 if (!host) 4011 return NULL; 4012 4013 INIT_LIST_HEAD(&host->target_list); 4014 spin_lock_init(&host->target_lock); 4015 init_completion(&host->released); 4016 mutex_init(&host->add_target_mutex); 4017 host->srp_dev = device; 4018 host->port = port; 4019 4020 host->dev.class = &srp_class; 4021 host->dev.parent = device->dev->dev.parent; 4022 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port); 4023 4024 if (device_register(&host->dev)) 4025 goto free_host; 4026 if (device_create_file(&host->dev, &dev_attr_add_target)) 4027 goto err_class; 4028 if (device_create_file(&host->dev, &dev_attr_ibdev)) 4029 goto err_class; 4030 if (device_create_file(&host->dev, &dev_attr_port)) 4031 goto err_class; 4032 4033 return host; 4034 4035 err_class: 4036 device_unregister(&host->dev); 4037 4038 free_host: 4039 kfree(host); 4040 4041 return NULL; 4042 } 4043 4044 static void srp_add_one(struct ib_device *device) 4045 { 4046 struct srp_device *srp_dev; 4047 struct ib_device_attr *attr = &device->attrs; 4048 struct srp_host *host; 4049 int mr_page_shift, p; 4050 u64 max_pages_per_mr; 4051 unsigned int flags = 0; 4052 4053 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); 4054 if (!srp_dev) 4055 return; 4056 4057 /* 4058 * Use the smallest page size supported by the HCA, down to a 4059 * minimum of 4096 bytes. We're unlikely to build large sglists 4060 * out of smaller entries. 4061 */ 4062 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); 4063 srp_dev->mr_page_size = 1 << mr_page_shift; 4064 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 4065 max_pages_per_mr = attr->max_mr_size; 4066 do_div(max_pages_per_mr, srp_dev->mr_page_size); 4067 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, 4068 attr->max_mr_size, srp_dev->mr_page_size, 4069 max_pages_per_mr, SRP_MAX_PAGES_PER_MR); 4070 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 4071 max_pages_per_mr); 4072 4073 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 4074 device->map_phys_fmr && device->unmap_fmr); 4075 srp_dev->has_fr = (attr->device_cap_flags & 4076 IB_DEVICE_MEM_MGT_EXTENSIONS); 4077 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { 4078 dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 4079 } else if (!never_register && 4080 attr->max_mr_size >= 2 * srp_dev->mr_page_size) { 4081 srp_dev->use_fast_reg = (srp_dev->has_fr && 4082 (!srp_dev->has_fmr || prefer_fr)); 4083 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 4084 } 4085 4086 if (never_register || !register_always || 4087 (!srp_dev->has_fmr && !srp_dev->has_fr)) 4088 flags |= IB_PD_UNSAFE_GLOBAL_RKEY; 4089 4090 if (srp_dev->use_fast_reg) { 4091 srp_dev->max_pages_per_mr = 4092 min_t(u32, srp_dev->max_pages_per_mr, 4093 attr->max_fast_reg_page_list_len); 4094 } 4095 srp_dev->mr_max_size = srp_dev->mr_page_size * 4096 srp_dev->max_pages_per_mr; 4097 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 4098 device->name, mr_page_shift, attr->max_mr_size, 4099 attr->max_fast_reg_page_list_len, 4100 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 4101 4102 INIT_LIST_HEAD(&srp_dev->dev_list); 4103 4104 srp_dev->dev = device; 4105 srp_dev->pd = ib_alloc_pd(device, flags); 4106 if (IS_ERR(srp_dev->pd)) 4107 goto free_dev; 4108 4109 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { 4110 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; 4111 WARN_ON_ONCE(srp_dev->global_rkey == 0); 4112 } 4113 4114 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { 4115 host = srp_add_port(srp_dev, p); 4116 if (host) 4117 list_add_tail(&host->list, &srp_dev->dev_list); 4118 } 4119 4120 ib_set_client_data(device, &srp_client, srp_dev); 4121 return; 4122 4123 free_dev: 4124 kfree(srp_dev); 4125 } 4126 4127 static void srp_remove_one(struct ib_device *device, void *client_data) 4128 { 4129 struct srp_device *srp_dev; 4130 struct srp_host *host, *tmp_host; 4131 struct srp_target_port *target; 4132 4133 srp_dev = client_data; 4134 if (!srp_dev) 4135 return; 4136 4137 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 4138 device_unregister(&host->dev); 4139 /* 4140 * Wait for the sysfs entry to go away, so that no new 4141 * target ports can be created. 4142 */ 4143 wait_for_completion(&host->released); 4144 4145 /* 4146 * Remove all target ports. 4147 */ 4148 spin_lock(&host->target_lock); 4149 list_for_each_entry(target, &host->target_list, list) 4150 srp_queue_remove_work(target); 4151 spin_unlock(&host->target_lock); 4152 4153 /* 4154 * Wait for tl_err and target port removal tasks. 4155 */ 4156 flush_workqueue(system_long_wq); 4157 flush_workqueue(srp_remove_wq); 4158 4159 kfree(host); 4160 } 4161 4162 ib_dealloc_pd(srp_dev->pd); 4163 4164 kfree(srp_dev); 4165 } 4166 4167 static struct srp_function_template ib_srp_transport_functions = { 4168 .has_rport_state = true, 4169 .reset_timer_if_blocked = true, 4170 .reconnect_delay = &srp_reconnect_delay, 4171 .fast_io_fail_tmo = &srp_fast_io_fail_tmo, 4172 .dev_loss_tmo = &srp_dev_loss_tmo, 4173 .reconnect = srp_rport_reconnect, 4174 .rport_delete = srp_rport_delete, 4175 .terminate_rport_io = srp_terminate_io, 4176 }; 4177 4178 static int __init srp_init_module(void) 4179 { 4180 int ret; 4181 4182 if (srp_sg_tablesize) { 4183 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 4184 if (!cmd_sg_entries) 4185 cmd_sg_entries = srp_sg_tablesize; 4186 } 4187 4188 if (!cmd_sg_entries) 4189 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 4190 4191 if (cmd_sg_entries > 255) { 4192 pr_warn("Clamping cmd_sg_entries to 255\n"); 4193 cmd_sg_entries = 255; 4194 } 4195 4196 if (!indirect_sg_entries) 4197 indirect_sg_entries = cmd_sg_entries; 4198 else if (indirect_sg_entries < cmd_sg_entries) { 4199 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 4200 cmd_sg_entries); 4201 indirect_sg_entries = cmd_sg_entries; 4202 } 4203 4204 if (indirect_sg_entries > SG_MAX_SEGMENTS) { 4205 pr_warn("Clamping indirect_sg_entries to %u\n", 4206 SG_MAX_SEGMENTS); 4207 indirect_sg_entries = SG_MAX_SEGMENTS; 4208 } 4209 4210 srp_remove_wq = create_workqueue("srp_remove"); 4211 if (!srp_remove_wq) { 4212 ret = -ENOMEM; 4213 goto out; 4214 } 4215 4216 ret = -ENOMEM; 4217 ib_srp_transport_template = 4218 srp_attach_transport(&ib_srp_transport_functions); 4219 if (!ib_srp_transport_template) 4220 goto destroy_wq; 4221 4222 ret = class_register(&srp_class); 4223 if (ret) { 4224 pr_err("couldn't register class infiniband_srp\n"); 4225 goto release_tr; 4226 } 4227 4228 ib_sa_register_client(&srp_sa_client); 4229 4230 ret = ib_register_client(&srp_client); 4231 if (ret) { 4232 pr_err("couldn't register IB client\n"); 4233 goto unreg_sa; 4234 } 4235 4236 out: 4237 return ret; 4238 4239 unreg_sa: 4240 ib_sa_unregister_client(&srp_sa_client); 4241 class_unregister(&srp_class); 4242 4243 release_tr: 4244 srp_release_transport(ib_srp_transport_template); 4245 4246 destroy_wq: 4247 destroy_workqueue(srp_remove_wq); 4248 goto out; 4249 } 4250 4251 static void __exit srp_cleanup_module(void) 4252 { 4253 ib_unregister_client(&srp_client); 4254 ib_sa_unregister_client(&srp_sa_client); 4255 class_unregister(&srp_class); 4256 srp_release_transport(ib_srp_transport_template); 4257 destroy_workqueue(srp_remove_wq); 4258 } 4259 4260 module_init(srp_init_module); 4261 module_exit(srp_cleanup_module); 4262