1 /* 2 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/module.h> 36 #include <linux/init.h> 37 #include <linux/slab.h> 38 #include <linux/err.h> 39 #include <linux/string.h> 40 #include <linux/parser.h> 41 #include <linux/random.h> 42 #include <linux/jiffies.h> 43 #include <rdma/ib_cache.h> 44 45 #include <linux/atomic.h> 46 47 #include <scsi/scsi.h> 48 #include <scsi/scsi_device.h> 49 #include <scsi/scsi_dbg.h> 50 #include <scsi/scsi_tcq.h> 51 #include <scsi/srp.h> 52 #include <scsi/scsi_transport_srp.h> 53 54 #include "ib_srp.h" 55 56 #define DRV_NAME "ib_srp" 57 #define PFX DRV_NAME ": " 58 #define DRV_VERSION "2.0" 59 #define DRV_RELDATE "July 26, 2015" 60 61 MODULE_AUTHOR("Roland Dreier"); 62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); 63 MODULE_LICENSE("Dual BSD/GPL"); 64 MODULE_VERSION(DRV_VERSION); 65 MODULE_INFO(release_date, DRV_RELDATE); 66 67 #if !defined(CONFIG_DYNAMIC_DEBUG) 68 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) 69 #define DYNAMIC_DEBUG_BRANCH(descriptor) false 70 #endif 71 72 static unsigned int srp_sg_tablesize; 73 static unsigned int cmd_sg_entries; 74 static unsigned int indirect_sg_entries; 75 static bool allow_ext_sg; 76 static bool prefer_fr = true; 77 static bool register_always = true; 78 static bool never_register; 79 static int topspin_workarounds = 1; 80 81 module_param(srp_sg_tablesize, uint, 0444); 82 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); 83 84 module_param(cmd_sg_entries, uint, 0444); 85 MODULE_PARM_DESC(cmd_sg_entries, 86 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); 87 88 module_param(indirect_sg_entries, uint, 0444); 89 MODULE_PARM_DESC(indirect_sg_entries, 90 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")"); 91 92 module_param(allow_ext_sg, bool, 0444); 93 MODULE_PARM_DESC(allow_ext_sg, 94 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); 95 96 module_param(topspin_workarounds, int, 0444); 97 MODULE_PARM_DESC(topspin_workarounds, 98 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 99 100 module_param(prefer_fr, bool, 0444); 101 MODULE_PARM_DESC(prefer_fr, 102 "Whether to use fast registration if both FMR and fast registration are supported"); 103 104 module_param(register_always, bool, 0444); 105 MODULE_PARM_DESC(register_always, 106 "Use memory registration even for contiguous memory regions"); 107 108 module_param(never_register, bool, 0444); 109 MODULE_PARM_DESC(never_register, "Never register memory"); 110 111 static const struct kernel_param_ops srp_tmo_ops; 112 113 static int srp_reconnect_delay = 10; 114 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, 115 S_IRUGO | S_IWUSR); 116 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); 117 118 static int srp_fast_io_fail_tmo = 15; 119 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, 120 S_IRUGO | S_IWUSR); 121 MODULE_PARM_DESC(fast_io_fail_tmo, 122 "Number of seconds between the observation of a transport" 123 " layer error and failing all I/O. \"off\" means that this" 124 " functionality is disabled."); 125 126 static int srp_dev_loss_tmo = 600; 127 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, 128 S_IRUGO | S_IWUSR); 129 MODULE_PARM_DESC(dev_loss_tmo, 130 "Maximum number of seconds that the SRP transport should" 131 " insulate transport layer errors. After this time has been" 132 " exceeded the SCSI host is removed. Should be" 133 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 134 " if fast_io_fail_tmo has not been set. \"off\" means that" 135 " this functionality is disabled."); 136 137 static unsigned ch_count; 138 module_param(ch_count, uint, 0444); 139 MODULE_PARM_DESC(ch_count, 140 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); 141 142 static void srp_add_one(struct ib_device *device); 143 static void srp_remove_one(struct ib_device *device, void *client_data); 144 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 145 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 146 const char *opname); 147 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 148 149 static struct scsi_transport_template *ib_srp_transport_template; 150 static struct workqueue_struct *srp_remove_wq; 151 152 static struct ib_client srp_client = { 153 .name = "srp", 154 .add = srp_add_one, 155 .remove = srp_remove_one 156 }; 157 158 static struct ib_sa_client srp_sa_client; 159 160 static int srp_tmo_get(char *buffer, const struct kernel_param *kp) 161 { 162 int tmo = *(int *)kp->arg; 163 164 if (tmo >= 0) 165 return sprintf(buffer, "%d", tmo); 166 else 167 return sprintf(buffer, "off"); 168 } 169 170 static int srp_tmo_set(const char *val, const struct kernel_param *kp) 171 { 172 int tmo, res; 173 174 res = srp_parse_tmo(&tmo, val); 175 if (res) 176 goto out; 177 178 if (kp->arg == &srp_reconnect_delay) 179 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, 180 srp_dev_loss_tmo); 181 else if (kp->arg == &srp_fast_io_fail_tmo) 182 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); 183 else 184 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, 185 tmo); 186 if (res) 187 goto out; 188 *(int *)kp->arg = tmo; 189 190 out: 191 return res; 192 } 193 194 static const struct kernel_param_ops srp_tmo_ops = { 195 .get = srp_tmo_get, 196 .set = srp_tmo_set, 197 }; 198 199 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 200 { 201 return (struct srp_target_port *) host->hostdata; 202 } 203 204 static const char *srp_target_info(struct Scsi_Host *host) 205 { 206 return host_to_target(host)->target_name; 207 } 208 209 static int srp_target_is_topspin(struct srp_target_port *target) 210 { 211 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; 212 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; 213 214 return topspin_workarounds && 215 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || 216 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); 217 } 218 219 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, 220 gfp_t gfp_mask, 221 enum dma_data_direction direction) 222 { 223 struct srp_iu *iu; 224 225 iu = kmalloc(sizeof *iu, gfp_mask); 226 if (!iu) 227 goto out; 228 229 iu->buf = kzalloc(size, gfp_mask); 230 if (!iu->buf) 231 goto out_free_iu; 232 233 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, 234 direction); 235 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) 236 goto out_free_buf; 237 238 iu->size = size; 239 iu->direction = direction; 240 241 return iu; 242 243 out_free_buf: 244 kfree(iu->buf); 245 out_free_iu: 246 kfree(iu); 247 out: 248 return NULL; 249 } 250 251 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) 252 { 253 if (!iu) 254 return; 255 256 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, 257 iu->direction); 258 kfree(iu->buf); 259 kfree(iu); 260 } 261 262 static void srp_qp_event(struct ib_event *event, void *context) 263 { 264 pr_debug("QP event %s (%d)\n", 265 ib_event_msg(event->event), event->event); 266 } 267 268 static int srp_init_qp(struct srp_target_port *target, 269 struct ib_qp *qp) 270 { 271 struct ib_qp_attr *attr; 272 int ret; 273 274 attr = kmalloc(sizeof *attr, GFP_KERNEL); 275 if (!attr) 276 return -ENOMEM; 277 278 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 279 target->srp_host->port, 280 be16_to_cpu(target->pkey), 281 &attr->pkey_index); 282 if (ret) 283 goto out; 284 285 attr->qp_state = IB_QPS_INIT; 286 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | 287 IB_ACCESS_REMOTE_WRITE); 288 attr->port_num = target->srp_host->port; 289 290 ret = ib_modify_qp(qp, attr, 291 IB_QP_STATE | 292 IB_QP_PKEY_INDEX | 293 IB_QP_ACCESS_FLAGS | 294 IB_QP_PORT); 295 296 out: 297 kfree(attr); 298 return ret; 299 } 300 301 static int srp_new_cm_id(struct srp_rdma_ch *ch) 302 { 303 struct srp_target_port *target = ch->target; 304 struct ib_cm_id *new_cm_id; 305 306 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, 307 srp_cm_handler, ch); 308 if (IS_ERR(new_cm_id)) 309 return PTR_ERR(new_cm_id); 310 311 if (ch->cm_id) 312 ib_destroy_cm_id(ch->cm_id); 313 ch->cm_id = new_cm_id; 314 ch->path.sgid = target->sgid; 315 ch->path.dgid = target->orig_dgid; 316 ch->path.pkey = target->pkey; 317 ch->path.service_id = target->service_id; 318 319 return 0; 320 } 321 322 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) 323 { 324 struct srp_device *dev = target->srp_host->srp_dev; 325 struct ib_fmr_pool_param fmr_param; 326 327 memset(&fmr_param, 0, sizeof(fmr_param)); 328 fmr_param.pool_size = target->mr_pool_size; 329 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 330 fmr_param.cache = 1; 331 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; 332 fmr_param.page_shift = ilog2(dev->mr_page_size); 333 fmr_param.access = (IB_ACCESS_LOCAL_WRITE | 334 IB_ACCESS_REMOTE_WRITE | 335 IB_ACCESS_REMOTE_READ); 336 337 return ib_create_fmr_pool(dev->pd, &fmr_param); 338 } 339 340 /** 341 * srp_destroy_fr_pool() - free the resources owned by a pool 342 * @pool: Fast registration pool to be destroyed. 343 */ 344 static void srp_destroy_fr_pool(struct srp_fr_pool *pool) 345 { 346 int i; 347 struct srp_fr_desc *d; 348 349 if (!pool) 350 return; 351 352 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 353 if (d->mr) 354 ib_dereg_mr(d->mr); 355 } 356 kfree(pool); 357 } 358 359 /** 360 * srp_create_fr_pool() - allocate and initialize a pool for fast registration 361 * @device: IB device to allocate fast registration descriptors for. 362 * @pd: Protection domain associated with the FR descriptors. 363 * @pool_size: Number of descriptors to allocate. 364 * @max_page_list_len: Maximum fast registration work request page list length. 365 */ 366 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, 367 struct ib_pd *pd, int pool_size, 368 int max_page_list_len) 369 { 370 struct srp_fr_pool *pool; 371 struct srp_fr_desc *d; 372 struct ib_mr *mr; 373 int i, ret = -EINVAL; 374 375 if (pool_size <= 0) 376 goto err; 377 ret = -ENOMEM; 378 pool = kzalloc(sizeof(struct srp_fr_pool) + 379 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); 380 if (!pool) 381 goto err; 382 pool->size = pool_size; 383 pool->max_page_list_len = max_page_list_len; 384 spin_lock_init(&pool->lock); 385 INIT_LIST_HEAD(&pool->free_list); 386 387 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 388 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 389 max_page_list_len); 390 if (IS_ERR(mr)) { 391 ret = PTR_ERR(mr); 392 if (ret == -ENOMEM) 393 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", 394 dev_name(&device->dev)); 395 goto destroy_pool; 396 } 397 d->mr = mr; 398 list_add_tail(&d->entry, &pool->free_list); 399 } 400 401 out: 402 return pool; 403 404 destroy_pool: 405 srp_destroy_fr_pool(pool); 406 407 err: 408 pool = ERR_PTR(ret); 409 goto out; 410 } 411 412 /** 413 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration 414 * @pool: Pool to obtain descriptor from. 415 */ 416 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) 417 { 418 struct srp_fr_desc *d = NULL; 419 unsigned long flags; 420 421 spin_lock_irqsave(&pool->lock, flags); 422 if (!list_empty(&pool->free_list)) { 423 d = list_first_entry(&pool->free_list, typeof(*d), entry); 424 list_del(&d->entry); 425 } 426 spin_unlock_irqrestore(&pool->lock, flags); 427 428 return d; 429 } 430 431 /** 432 * srp_fr_pool_put() - put an FR descriptor back in the free list 433 * @pool: Pool the descriptor was allocated from. 434 * @desc: Pointer to an array of fast registration descriptor pointers. 435 * @n: Number of descriptors to put back. 436 * 437 * Note: The caller must already have queued an invalidation request for 438 * desc->mr->rkey before calling this function. 439 */ 440 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, 441 int n) 442 { 443 unsigned long flags; 444 int i; 445 446 spin_lock_irqsave(&pool->lock, flags); 447 for (i = 0; i < n; i++) 448 list_add(&desc[i]->entry, &pool->free_list); 449 spin_unlock_irqrestore(&pool->lock, flags); 450 } 451 452 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) 453 { 454 struct srp_device *dev = target->srp_host->srp_dev; 455 456 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, 457 dev->max_pages_per_mr); 458 } 459 460 /** 461 * srp_destroy_qp() - destroy an RDMA queue pair 462 * @qp: RDMA queue pair. 463 * 464 * Drain the qp before destroying it. This avoids that the receive 465 * completion handler can access the queue pair while it is 466 * being destroyed. 467 */ 468 static void srp_destroy_qp(struct ib_qp *qp) 469 { 470 ib_drain_rq(qp); 471 ib_destroy_qp(qp); 472 } 473 474 static int srp_create_ch_ib(struct srp_rdma_ch *ch) 475 { 476 struct srp_target_port *target = ch->target; 477 struct srp_device *dev = target->srp_host->srp_dev; 478 struct ib_qp_init_attr *init_attr; 479 struct ib_cq *recv_cq, *send_cq; 480 struct ib_qp *qp; 481 struct ib_fmr_pool *fmr_pool = NULL; 482 struct srp_fr_pool *fr_pool = NULL; 483 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; 484 int ret; 485 486 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 487 if (!init_attr) 488 return -ENOMEM; 489 490 /* queue_size + 1 for ib_drain_rq() */ 491 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 492 ch->comp_vector, IB_POLL_SOFTIRQ); 493 if (IS_ERR(recv_cq)) { 494 ret = PTR_ERR(recv_cq); 495 goto err; 496 } 497 498 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, 499 ch->comp_vector, IB_POLL_DIRECT); 500 if (IS_ERR(send_cq)) { 501 ret = PTR_ERR(send_cq); 502 goto err_recv_cq; 503 } 504 505 init_attr->event_handler = srp_qp_event; 506 init_attr->cap.max_send_wr = m * target->queue_size; 507 init_attr->cap.max_recv_wr = target->queue_size + 1; 508 init_attr->cap.max_recv_sge = 1; 509 init_attr->cap.max_send_sge = 1; 510 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 511 init_attr->qp_type = IB_QPT_RC; 512 init_attr->send_cq = send_cq; 513 init_attr->recv_cq = recv_cq; 514 515 qp = ib_create_qp(dev->pd, init_attr); 516 if (IS_ERR(qp)) { 517 ret = PTR_ERR(qp); 518 goto err_send_cq; 519 } 520 521 ret = srp_init_qp(target, qp); 522 if (ret) 523 goto err_qp; 524 525 if (dev->use_fast_reg) { 526 fr_pool = srp_alloc_fr_pool(target); 527 if (IS_ERR(fr_pool)) { 528 ret = PTR_ERR(fr_pool); 529 shost_printk(KERN_WARNING, target->scsi_host, PFX 530 "FR pool allocation failed (%d)\n", ret); 531 goto err_qp; 532 } 533 } else if (dev->use_fmr) { 534 fmr_pool = srp_alloc_fmr_pool(target); 535 if (IS_ERR(fmr_pool)) { 536 ret = PTR_ERR(fmr_pool); 537 shost_printk(KERN_WARNING, target->scsi_host, PFX 538 "FMR pool allocation failed (%d)\n", ret); 539 goto err_qp; 540 } 541 } 542 543 if (ch->qp) 544 srp_destroy_qp(ch->qp); 545 if (ch->recv_cq) 546 ib_free_cq(ch->recv_cq); 547 if (ch->send_cq) 548 ib_free_cq(ch->send_cq); 549 550 ch->qp = qp; 551 ch->recv_cq = recv_cq; 552 ch->send_cq = send_cq; 553 554 if (dev->use_fast_reg) { 555 if (ch->fr_pool) 556 srp_destroy_fr_pool(ch->fr_pool); 557 ch->fr_pool = fr_pool; 558 } else if (dev->use_fmr) { 559 if (ch->fmr_pool) 560 ib_destroy_fmr_pool(ch->fmr_pool); 561 ch->fmr_pool = fmr_pool; 562 } 563 564 kfree(init_attr); 565 return 0; 566 567 err_qp: 568 srp_destroy_qp(qp); 569 570 err_send_cq: 571 ib_free_cq(send_cq); 572 573 err_recv_cq: 574 ib_free_cq(recv_cq); 575 576 err: 577 kfree(init_attr); 578 return ret; 579 } 580 581 /* 582 * Note: this function may be called without srp_alloc_iu_bufs() having been 583 * invoked. Hence the ch->[rt]x_ring checks. 584 */ 585 static void srp_free_ch_ib(struct srp_target_port *target, 586 struct srp_rdma_ch *ch) 587 { 588 struct srp_device *dev = target->srp_host->srp_dev; 589 int i; 590 591 if (!ch->target) 592 return; 593 594 if (ch->cm_id) { 595 ib_destroy_cm_id(ch->cm_id); 596 ch->cm_id = NULL; 597 } 598 599 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ 600 if (!ch->qp) 601 return; 602 603 if (dev->use_fast_reg) { 604 if (ch->fr_pool) 605 srp_destroy_fr_pool(ch->fr_pool); 606 } else if (dev->use_fmr) { 607 if (ch->fmr_pool) 608 ib_destroy_fmr_pool(ch->fmr_pool); 609 } 610 611 srp_destroy_qp(ch->qp); 612 ib_free_cq(ch->send_cq); 613 ib_free_cq(ch->recv_cq); 614 615 /* 616 * Avoid that the SCSI error handler tries to use this channel after 617 * it has been freed. The SCSI error handler can namely continue 618 * trying to perform recovery actions after scsi_remove_host() 619 * returned. 620 */ 621 ch->target = NULL; 622 623 ch->qp = NULL; 624 ch->send_cq = ch->recv_cq = NULL; 625 626 if (ch->rx_ring) { 627 for (i = 0; i < target->queue_size; ++i) 628 srp_free_iu(target->srp_host, ch->rx_ring[i]); 629 kfree(ch->rx_ring); 630 ch->rx_ring = NULL; 631 } 632 if (ch->tx_ring) { 633 for (i = 0; i < target->queue_size; ++i) 634 srp_free_iu(target->srp_host, ch->tx_ring[i]); 635 kfree(ch->tx_ring); 636 ch->tx_ring = NULL; 637 } 638 } 639 640 static void srp_path_rec_completion(int status, 641 struct ib_sa_path_rec *pathrec, 642 void *ch_ptr) 643 { 644 struct srp_rdma_ch *ch = ch_ptr; 645 struct srp_target_port *target = ch->target; 646 647 ch->status = status; 648 if (status) 649 shost_printk(KERN_ERR, target->scsi_host, 650 PFX "Got failed path rec status %d\n", status); 651 else 652 ch->path = *pathrec; 653 complete(&ch->done); 654 } 655 656 static int srp_lookup_path(struct srp_rdma_ch *ch) 657 { 658 struct srp_target_port *target = ch->target; 659 int ret; 660 661 ch->path.numb_path = 1; 662 663 init_completion(&ch->done); 664 665 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client, 666 target->srp_host->srp_dev->dev, 667 target->srp_host->port, 668 &ch->path, 669 IB_SA_PATH_REC_SERVICE_ID | 670 IB_SA_PATH_REC_DGID | 671 IB_SA_PATH_REC_SGID | 672 IB_SA_PATH_REC_NUMB_PATH | 673 IB_SA_PATH_REC_PKEY, 674 SRP_PATH_REC_TIMEOUT_MS, 675 GFP_KERNEL, 676 srp_path_rec_completion, 677 ch, &ch->path_query); 678 if (ch->path_query_id < 0) 679 return ch->path_query_id; 680 681 ret = wait_for_completion_interruptible(&ch->done); 682 if (ret < 0) 683 return ret; 684 685 if (ch->status < 0) 686 shost_printk(KERN_WARNING, target->scsi_host, 687 PFX "Path record query failed\n"); 688 689 return ch->status; 690 } 691 692 static int srp_send_req(struct srp_rdma_ch *ch, bool multich) 693 { 694 struct srp_target_port *target = ch->target; 695 struct { 696 struct ib_cm_req_param param; 697 struct srp_login_req priv; 698 } *req = NULL; 699 int status; 700 701 req = kzalloc(sizeof *req, GFP_KERNEL); 702 if (!req) 703 return -ENOMEM; 704 705 req->param.primary_path = &ch->path; 706 req->param.alternate_path = NULL; 707 req->param.service_id = target->service_id; 708 req->param.qp_num = ch->qp->qp_num; 709 req->param.qp_type = ch->qp->qp_type; 710 req->param.private_data = &req->priv; 711 req->param.private_data_len = sizeof req->priv; 712 req->param.flow_control = 1; 713 714 get_random_bytes(&req->param.starting_psn, 4); 715 req->param.starting_psn &= 0xffffff; 716 717 /* 718 * Pick some arbitrary defaults here; we could make these 719 * module parameters if anyone cared about setting them. 720 */ 721 req->param.responder_resources = 4; 722 req->param.remote_cm_response_timeout = 20; 723 req->param.local_cm_response_timeout = 20; 724 req->param.retry_count = target->tl_retry_count; 725 req->param.rnr_retry_count = 7; 726 req->param.max_cm_retries = 15; 727 728 req->priv.opcode = SRP_LOGIN_REQ; 729 req->priv.tag = 0; 730 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len); 731 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 732 SRP_BUF_FORMAT_INDIRECT); 733 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI : 734 SRP_MULTICHAN_SINGLE); 735 /* 736 * In the published SRP specification (draft rev. 16a), the 737 * port identifier format is 8 bytes of ID extension followed 738 * by 8 bytes of GUID. Older drafts put the two halves in the 739 * opposite order, so that the GUID comes first. 740 * 741 * Targets conforming to these obsolete drafts can be 742 * recognized by the I/O Class they report. 743 */ 744 if (target->io_class == SRP_REV10_IB_IO_CLASS) { 745 memcpy(req->priv.initiator_port_id, 746 &target->sgid.global.interface_id, 8); 747 memcpy(req->priv.initiator_port_id + 8, 748 &target->initiator_ext, 8); 749 memcpy(req->priv.target_port_id, &target->ioc_guid, 8); 750 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8); 751 } else { 752 memcpy(req->priv.initiator_port_id, 753 &target->initiator_ext, 8); 754 memcpy(req->priv.initiator_port_id + 8, 755 &target->sgid.global.interface_id, 8); 756 memcpy(req->priv.target_port_id, &target->id_ext, 8); 757 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8); 758 } 759 760 /* 761 * Topspin/Cisco SRP targets will reject our login unless we 762 * zero out the first 8 bytes of our initiator port ID and set 763 * the second 8 bytes to the local node GUID. 764 */ 765 if (srp_target_is_topspin(target)) { 766 shost_printk(KERN_DEBUG, target->scsi_host, 767 PFX "Topspin/Cisco initiator port ID workaround " 768 "activated for target GUID %016llx\n", 769 be64_to_cpu(target->ioc_guid)); 770 memset(req->priv.initiator_port_id, 0, 8); 771 memcpy(req->priv.initiator_port_id + 8, 772 &target->srp_host->srp_dev->dev->node_guid, 8); 773 } 774 775 status = ib_send_cm_req(ch->cm_id, &req->param); 776 777 kfree(req); 778 779 return status; 780 } 781 782 static bool srp_queue_remove_work(struct srp_target_port *target) 783 { 784 bool changed = false; 785 786 spin_lock_irq(&target->lock); 787 if (target->state != SRP_TARGET_REMOVED) { 788 target->state = SRP_TARGET_REMOVED; 789 changed = true; 790 } 791 spin_unlock_irq(&target->lock); 792 793 if (changed) 794 queue_work(srp_remove_wq, &target->remove_work); 795 796 return changed; 797 } 798 799 static void srp_disconnect_target(struct srp_target_port *target) 800 { 801 struct srp_rdma_ch *ch; 802 int i; 803 804 /* XXX should send SRP_I_LOGOUT request */ 805 806 for (i = 0; i < target->ch_count; i++) { 807 ch = &target->ch[i]; 808 ch->connected = false; 809 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { 810 shost_printk(KERN_DEBUG, target->scsi_host, 811 PFX "Sending CM DREQ failed\n"); 812 } 813 } 814 } 815 816 static void srp_free_req_data(struct srp_target_port *target, 817 struct srp_rdma_ch *ch) 818 { 819 struct srp_device *dev = target->srp_host->srp_dev; 820 struct ib_device *ibdev = dev->dev; 821 struct srp_request *req; 822 int i; 823 824 if (!ch->req_ring) 825 return; 826 827 for (i = 0; i < target->req_ring_size; ++i) { 828 req = &ch->req_ring[i]; 829 if (dev->use_fast_reg) { 830 kfree(req->fr_list); 831 } else { 832 kfree(req->fmr_list); 833 kfree(req->map_page); 834 } 835 if (req->indirect_dma_addr) { 836 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 837 target->indirect_size, 838 DMA_TO_DEVICE); 839 } 840 kfree(req->indirect_desc); 841 } 842 843 kfree(ch->req_ring); 844 ch->req_ring = NULL; 845 } 846 847 static int srp_alloc_req_data(struct srp_rdma_ch *ch) 848 { 849 struct srp_target_port *target = ch->target; 850 struct srp_device *srp_dev = target->srp_host->srp_dev; 851 struct ib_device *ibdev = srp_dev->dev; 852 struct srp_request *req; 853 void *mr_list; 854 dma_addr_t dma_addr; 855 int i, ret = -ENOMEM; 856 857 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), 858 GFP_KERNEL); 859 if (!ch->req_ring) 860 goto out; 861 862 for (i = 0; i < target->req_ring_size; ++i) { 863 req = &ch->req_ring[i]; 864 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *), 865 GFP_KERNEL); 866 if (!mr_list) 867 goto out; 868 if (srp_dev->use_fast_reg) { 869 req->fr_list = mr_list; 870 } else { 871 req->fmr_list = mr_list; 872 req->map_page = kmalloc(srp_dev->max_pages_per_mr * 873 sizeof(void *), GFP_KERNEL); 874 if (!req->map_page) 875 goto out; 876 } 877 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 878 if (!req->indirect_desc) 879 goto out; 880 881 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, 882 target->indirect_size, 883 DMA_TO_DEVICE); 884 if (ib_dma_mapping_error(ibdev, dma_addr)) 885 goto out; 886 887 req->indirect_dma_addr = dma_addr; 888 } 889 ret = 0; 890 891 out: 892 return ret; 893 } 894 895 /** 896 * srp_del_scsi_host_attr() - Remove attributes defined in the host template. 897 * @shost: SCSI host whose attributes to remove from sysfs. 898 * 899 * Note: Any attributes defined in the host template and that did not exist 900 * before invocation of this function will be ignored. 901 */ 902 static void srp_del_scsi_host_attr(struct Scsi_Host *shost) 903 { 904 struct device_attribute **attr; 905 906 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 907 device_remove_file(&shost->shost_dev, *attr); 908 } 909 910 static void srp_remove_target(struct srp_target_port *target) 911 { 912 struct srp_rdma_ch *ch; 913 int i; 914 915 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 916 917 srp_del_scsi_host_attr(target->scsi_host); 918 srp_rport_get(target->rport); 919 srp_remove_host(target->scsi_host); 920 scsi_remove_host(target->scsi_host); 921 srp_stop_rport_timers(target->rport); 922 srp_disconnect_target(target); 923 for (i = 0; i < target->ch_count; i++) { 924 ch = &target->ch[i]; 925 srp_free_ch_ib(target, ch); 926 } 927 cancel_work_sync(&target->tl_err_work); 928 srp_rport_put(target->rport); 929 for (i = 0; i < target->ch_count; i++) { 930 ch = &target->ch[i]; 931 srp_free_req_data(target, ch); 932 } 933 kfree(target->ch); 934 target->ch = NULL; 935 936 spin_lock(&target->srp_host->target_lock); 937 list_del(&target->list); 938 spin_unlock(&target->srp_host->target_lock); 939 940 scsi_host_put(target->scsi_host); 941 } 942 943 static void srp_remove_work(struct work_struct *work) 944 { 945 struct srp_target_port *target = 946 container_of(work, struct srp_target_port, remove_work); 947 948 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 949 950 srp_remove_target(target); 951 } 952 953 static void srp_rport_delete(struct srp_rport *rport) 954 { 955 struct srp_target_port *target = rport->lld_data; 956 957 srp_queue_remove_work(target); 958 } 959 960 /** 961 * srp_connected_ch() - number of connected channels 962 * @target: SRP target port. 963 */ 964 static int srp_connected_ch(struct srp_target_port *target) 965 { 966 int i, c = 0; 967 968 for (i = 0; i < target->ch_count; i++) 969 c += target->ch[i].connected; 970 971 return c; 972 } 973 974 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) 975 { 976 struct srp_target_port *target = ch->target; 977 int ret; 978 979 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); 980 981 ret = srp_lookup_path(ch); 982 if (ret) 983 goto out; 984 985 while (1) { 986 init_completion(&ch->done); 987 ret = srp_send_req(ch, multich); 988 if (ret) 989 goto out; 990 ret = wait_for_completion_interruptible(&ch->done); 991 if (ret < 0) 992 goto out; 993 994 /* 995 * The CM event handling code will set status to 996 * SRP_PORT_REDIRECT if we get a port redirect REJ 997 * back, or SRP_DLID_REDIRECT if we get a lid/qp 998 * redirect REJ back. 999 */ 1000 ret = ch->status; 1001 switch (ret) { 1002 case 0: 1003 ch->connected = true; 1004 goto out; 1005 1006 case SRP_PORT_REDIRECT: 1007 ret = srp_lookup_path(ch); 1008 if (ret) 1009 goto out; 1010 break; 1011 1012 case SRP_DLID_REDIRECT: 1013 break; 1014 1015 case SRP_STALE_CONN: 1016 shost_printk(KERN_ERR, target->scsi_host, PFX 1017 "giving up on stale connection\n"); 1018 ret = -ECONNRESET; 1019 goto out; 1020 1021 default: 1022 goto out; 1023 } 1024 } 1025 1026 out: 1027 return ret <= 0 ? ret : -ENODEV; 1028 } 1029 1030 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) 1031 { 1032 srp_handle_qp_err(cq, wc, "INV RKEY"); 1033 } 1034 1035 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, 1036 u32 rkey) 1037 { 1038 struct ib_send_wr *bad_wr; 1039 struct ib_send_wr wr = { 1040 .opcode = IB_WR_LOCAL_INV, 1041 .next = NULL, 1042 .num_sge = 0, 1043 .send_flags = 0, 1044 .ex.invalidate_rkey = rkey, 1045 }; 1046 1047 wr.wr_cqe = &req->reg_cqe; 1048 req->reg_cqe.done = srp_inv_rkey_err_done; 1049 return ib_post_send(ch->qp, &wr, &bad_wr); 1050 } 1051 1052 static void srp_unmap_data(struct scsi_cmnd *scmnd, 1053 struct srp_rdma_ch *ch, 1054 struct srp_request *req) 1055 { 1056 struct srp_target_port *target = ch->target; 1057 struct srp_device *dev = target->srp_host->srp_dev; 1058 struct ib_device *ibdev = dev->dev; 1059 int i, res; 1060 1061 if (!scsi_sglist(scmnd) || 1062 (scmnd->sc_data_direction != DMA_TO_DEVICE && 1063 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 1064 return; 1065 1066 if (dev->use_fast_reg) { 1067 struct srp_fr_desc **pfr; 1068 1069 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1070 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); 1071 if (res < 0) { 1072 shost_printk(KERN_ERR, target->scsi_host, PFX 1073 "Queueing INV WR for rkey %#x failed (%d)\n", 1074 (*pfr)->mr->rkey, res); 1075 queue_work(system_long_wq, 1076 &target->tl_err_work); 1077 } 1078 } 1079 if (req->nmdesc) 1080 srp_fr_pool_put(ch->fr_pool, req->fr_list, 1081 req->nmdesc); 1082 } else if (dev->use_fmr) { 1083 struct ib_pool_fmr **pfmr; 1084 1085 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) 1086 ib_fmr_pool_unmap(*pfmr); 1087 } 1088 1089 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), 1090 scmnd->sc_data_direction); 1091 } 1092 1093 /** 1094 * srp_claim_req - Take ownership of the scmnd associated with a request. 1095 * @ch: SRP RDMA channel. 1096 * @req: SRP request. 1097 * @sdev: If not NULL, only take ownership for this SCSI device. 1098 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take 1099 * ownership of @req->scmnd if it equals @scmnd. 1100 * 1101 * Return value: 1102 * Either NULL or a pointer to the SCSI command the caller became owner of. 1103 */ 1104 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch, 1105 struct srp_request *req, 1106 struct scsi_device *sdev, 1107 struct scsi_cmnd *scmnd) 1108 { 1109 unsigned long flags; 1110 1111 spin_lock_irqsave(&ch->lock, flags); 1112 if (req->scmnd && 1113 (!sdev || req->scmnd->device == sdev) && 1114 (!scmnd || req->scmnd == scmnd)) { 1115 scmnd = req->scmnd; 1116 req->scmnd = NULL; 1117 } else { 1118 scmnd = NULL; 1119 } 1120 spin_unlock_irqrestore(&ch->lock, flags); 1121 1122 return scmnd; 1123 } 1124 1125 /** 1126 * srp_free_req() - Unmap data and adjust ch->req_lim. 1127 * @ch: SRP RDMA channel. 1128 * @req: Request to be freed. 1129 * @scmnd: SCSI command associated with @req. 1130 * @req_lim_delta: Amount to be added to @target->req_lim. 1131 */ 1132 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req, 1133 struct scsi_cmnd *scmnd, s32 req_lim_delta) 1134 { 1135 unsigned long flags; 1136 1137 srp_unmap_data(scmnd, ch, req); 1138 1139 spin_lock_irqsave(&ch->lock, flags); 1140 ch->req_lim += req_lim_delta; 1141 spin_unlock_irqrestore(&ch->lock, flags); 1142 } 1143 1144 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, 1145 struct scsi_device *sdev, int result) 1146 { 1147 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL); 1148 1149 if (scmnd) { 1150 srp_free_req(ch, req, scmnd, 0); 1151 scmnd->result = result; 1152 scmnd->scsi_done(scmnd); 1153 } 1154 } 1155 1156 static void srp_terminate_io(struct srp_rport *rport) 1157 { 1158 struct srp_target_port *target = rport->lld_data; 1159 struct srp_rdma_ch *ch; 1160 struct Scsi_Host *shost = target->scsi_host; 1161 struct scsi_device *sdev; 1162 int i, j; 1163 1164 /* 1165 * Invoking srp_terminate_io() while srp_queuecommand() is running 1166 * is not safe. Hence the warning statement below. 1167 */ 1168 shost_for_each_device(sdev, shost) 1169 WARN_ON_ONCE(sdev->request_queue->request_fn_active); 1170 1171 for (i = 0; i < target->ch_count; i++) { 1172 ch = &target->ch[i]; 1173 1174 for (j = 0; j < target->req_ring_size; ++j) { 1175 struct srp_request *req = &ch->req_ring[j]; 1176 1177 srp_finish_req(ch, req, NULL, 1178 DID_TRANSPORT_FAILFAST << 16); 1179 } 1180 } 1181 } 1182 1183 /* 1184 * It is up to the caller to ensure that srp_rport_reconnect() calls are 1185 * serialized and that no concurrent srp_queuecommand(), srp_abort(), 1186 * srp_reset_device() or srp_reset_host() calls will occur while this function 1187 * is in progress. One way to realize that is not to call this function 1188 * directly but to call srp_reconnect_rport() instead since that last function 1189 * serializes calls of this function via rport->mutex and also blocks 1190 * srp_queuecommand() calls before invoking this function. 1191 */ 1192 static int srp_rport_reconnect(struct srp_rport *rport) 1193 { 1194 struct srp_target_port *target = rport->lld_data; 1195 struct srp_rdma_ch *ch; 1196 int i, j, ret = 0; 1197 bool multich = false; 1198 1199 srp_disconnect_target(target); 1200 1201 if (target->state == SRP_TARGET_SCANNING) 1202 return -ENODEV; 1203 1204 /* 1205 * Now get a new local CM ID so that we avoid confusing the target in 1206 * case things are really fouled up. Doing so also ensures that all CM 1207 * callbacks will have finished before a new QP is allocated. 1208 */ 1209 for (i = 0; i < target->ch_count; i++) { 1210 ch = &target->ch[i]; 1211 ret += srp_new_cm_id(ch); 1212 } 1213 for (i = 0; i < target->ch_count; i++) { 1214 ch = &target->ch[i]; 1215 for (j = 0; j < target->req_ring_size; ++j) { 1216 struct srp_request *req = &ch->req_ring[j]; 1217 1218 srp_finish_req(ch, req, NULL, DID_RESET << 16); 1219 } 1220 } 1221 for (i = 0; i < target->ch_count; i++) { 1222 ch = &target->ch[i]; 1223 /* 1224 * Whether or not creating a new CM ID succeeded, create a new 1225 * QP. This guarantees that all completion callback function 1226 * invocations have finished before request resetting starts. 1227 */ 1228 ret += srp_create_ch_ib(ch); 1229 1230 INIT_LIST_HEAD(&ch->free_tx); 1231 for (j = 0; j < target->queue_size; ++j) 1232 list_add(&ch->tx_ring[j]->list, &ch->free_tx); 1233 } 1234 1235 target->qp_in_error = false; 1236 1237 for (i = 0; i < target->ch_count; i++) { 1238 ch = &target->ch[i]; 1239 if (ret) 1240 break; 1241 ret = srp_connect_ch(ch, multich); 1242 multich = true; 1243 } 1244 1245 if (ret == 0) 1246 shost_printk(KERN_INFO, target->scsi_host, 1247 PFX "reconnect succeeded\n"); 1248 1249 return ret; 1250 } 1251 1252 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, 1253 unsigned int dma_len, u32 rkey) 1254 { 1255 struct srp_direct_buf *desc = state->desc; 1256 1257 WARN_ON_ONCE(!dma_len); 1258 1259 desc->va = cpu_to_be64(dma_addr); 1260 desc->key = cpu_to_be32(rkey); 1261 desc->len = cpu_to_be32(dma_len); 1262 1263 state->total_len += dma_len; 1264 state->desc++; 1265 state->ndesc++; 1266 } 1267 1268 static int srp_map_finish_fmr(struct srp_map_state *state, 1269 struct srp_rdma_ch *ch) 1270 { 1271 struct srp_target_port *target = ch->target; 1272 struct srp_device *dev = target->srp_host->srp_dev; 1273 struct ib_pd *pd = target->pd; 1274 struct ib_pool_fmr *fmr; 1275 u64 io_addr = 0; 1276 1277 if (state->fmr.next >= state->fmr.end) { 1278 shost_printk(KERN_ERR, ch->target->scsi_host, 1279 PFX "Out of MRs (mr_per_cmd = %d)\n", 1280 ch->target->mr_per_cmd); 1281 return -ENOMEM; 1282 } 1283 1284 WARN_ON_ONCE(!dev->use_fmr); 1285 1286 if (state->npages == 0) 1287 return 0; 1288 1289 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1290 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1291 pd->unsafe_global_rkey); 1292 goto reset_state; 1293 } 1294 1295 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, 1296 state->npages, io_addr); 1297 if (IS_ERR(fmr)) 1298 return PTR_ERR(fmr); 1299 1300 *state->fmr.next++ = fmr; 1301 state->nmdesc++; 1302 1303 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, 1304 state->dma_len, fmr->fmr->rkey); 1305 1306 reset_state: 1307 state->npages = 0; 1308 state->dma_len = 0; 1309 1310 return 0; 1311 } 1312 1313 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) 1314 { 1315 srp_handle_qp_err(cq, wc, "FAST REG"); 1316 } 1317 1318 /* 1319 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset 1320 * where to start in the first element. If sg_offset_p != NULL then 1321 * *sg_offset_p is updated to the offset in state->sg[retval] of the first 1322 * byte that has not yet been mapped. 1323 */ 1324 static int srp_map_finish_fr(struct srp_map_state *state, 1325 struct srp_request *req, 1326 struct srp_rdma_ch *ch, int sg_nents, 1327 unsigned int *sg_offset_p) 1328 { 1329 struct srp_target_port *target = ch->target; 1330 struct srp_device *dev = target->srp_host->srp_dev; 1331 struct ib_pd *pd = target->pd; 1332 struct ib_send_wr *bad_wr; 1333 struct ib_reg_wr wr; 1334 struct srp_fr_desc *desc; 1335 u32 rkey; 1336 int n, err; 1337 1338 if (state->fr.next >= state->fr.end) { 1339 shost_printk(KERN_ERR, ch->target->scsi_host, 1340 PFX "Out of MRs (mr_per_cmd = %d)\n", 1341 ch->target->mr_per_cmd); 1342 return -ENOMEM; 1343 } 1344 1345 WARN_ON_ONCE(!dev->use_fast_reg); 1346 1347 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1348 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1349 1350 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, 1351 sg_dma_len(state->sg) - sg_offset, 1352 pd->unsafe_global_rkey); 1353 if (sg_offset_p) 1354 *sg_offset_p = 0; 1355 return 1; 1356 } 1357 1358 desc = srp_fr_pool_get(ch->fr_pool); 1359 if (!desc) 1360 return -ENOMEM; 1361 1362 rkey = ib_inc_rkey(desc->mr->rkey); 1363 ib_update_fast_reg_key(desc->mr, rkey); 1364 1365 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, 1366 dev->mr_page_size); 1367 if (unlikely(n < 0)) { 1368 srp_fr_pool_put(ch->fr_pool, &desc, 1); 1369 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", 1370 dev_name(&req->scmnd->device->sdev_gendev), sg_nents, 1371 sg_offset_p ? *sg_offset_p : -1, n); 1372 return n; 1373 } 1374 1375 WARN_ON_ONCE(desc->mr->length == 0); 1376 1377 req->reg_cqe.done = srp_reg_mr_err_done; 1378 1379 wr.wr.next = NULL; 1380 wr.wr.opcode = IB_WR_REG_MR; 1381 wr.wr.wr_cqe = &req->reg_cqe; 1382 wr.wr.num_sge = 0; 1383 wr.wr.send_flags = 0; 1384 wr.mr = desc->mr; 1385 wr.key = desc->mr->rkey; 1386 wr.access = (IB_ACCESS_LOCAL_WRITE | 1387 IB_ACCESS_REMOTE_READ | 1388 IB_ACCESS_REMOTE_WRITE); 1389 1390 *state->fr.next++ = desc; 1391 state->nmdesc++; 1392 1393 srp_map_desc(state, desc->mr->iova, 1394 desc->mr->length, desc->mr->rkey); 1395 1396 err = ib_post_send(ch->qp, &wr.wr, &bad_wr); 1397 if (unlikely(err)) { 1398 WARN_ON_ONCE(err == -ENOMEM); 1399 return err; 1400 } 1401 1402 return n; 1403 } 1404 1405 static int srp_map_sg_entry(struct srp_map_state *state, 1406 struct srp_rdma_ch *ch, 1407 struct scatterlist *sg) 1408 { 1409 struct srp_target_port *target = ch->target; 1410 struct srp_device *dev = target->srp_host->srp_dev; 1411 struct ib_device *ibdev = dev->dev; 1412 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); 1413 unsigned int dma_len = ib_sg_dma_len(ibdev, sg); 1414 unsigned int len = 0; 1415 int ret; 1416 1417 WARN_ON_ONCE(!dma_len); 1418 1419 while (dma_len) { 1420 unsigned offset = dma_addr & ~dev->mr_page_mask; 1421 1422 if (state->npages == dev->max_pages_per_mr || 1423 (state->npages > 0 && offset != 0)) { 1424 ret = srp_map_finish_fmr(state, ch); 1425 if (ret) 1426 return ret; 1427 } 1428 1429 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); 1430 1431 if (!state->npages) 1432 state->base_dma_addr = dma_addr; 1433 state->pages[state->npages++] = dma_addr & dev->mr_page_mask; 1434 state->dma_len += len; 1435 dma_addr += len; 1436 dma_len -= len; 1437 } 1438 1439 /* 1440 * If the end of the MR is not on a page boundary then we need to 1441 * close it out and start a new one -- we can only merge at page 1442 * boundaries. 1443 */ 1444 ret = 0; 1445 if ((dma_addr & ~dev->mr_page_mask) != 0) 1446 ret = srp_map_finish_fmr(state, ch); 1447 return ret; 1448 } 1449 1450 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1451 struct srp_request *req, struct scatterlist *scat, 1452 int count) 1453 { 1454 struct scatterlist *sg; 1455 int i, ret; 1456 1457 state->pages = req->map_page; 1458 state->fmr.next = req->fmr_list; 1459 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; 1460 1461 for_each_sg(scat, sg, count, i) { 1462 ret = srp_map_sg_entry(state, ch, sg); 1463 if (ret) 1464 return ret; 1465 } 1466 1467 ret = srp_map_finish_fmr(state, ch); 1468 if (ret) 1469 return ret; 1470 1471 return 0; 1472 } 1473 1474 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1475 struct srp_request *req, struct scatterlist *scat, 1476 int count) 1477 { 1478 unsigned int sg_offset = 0; 1479 1480 state->fr.next = req->fr_list; 1481 state->fr.end = req->fr_list + ch->target->mr_per_cmd; 1482 state->sg = scat; 1483 1484 if (count == 0) 1485 return 0; 1486 1487 while (count) { 1488 int i, n; 1489 1490 n = srp_map_finish_fr(state, req, ch, count, &sg_offset); 1491 if (unlikely(n < 0)) 1492 return n; 1493 1494 count -= n; 1495 for (i = 0; i < n; i++) 1496 state->sg = sg_next(state->sg); 1497 } 1498 1499 return 0; 1500 } 1501 1502 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, 1503 struct srp_request *req, struct scatterlist *scat, 1504 int count) 1505 { 1506 struct srp_target_port *target = ch->target; 1507 struct srp_device *dev = target->srp_host->srp_dev; 1508 struct scatterlist *sg; 1509 int i; 1510 1511 for_each_sg(scat, sg, count, i) { 1512 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), 1513 ib_sg_dma_len(dev->dev, sg), 1514 target->pd->unsafe_global_rkey); 1515 } 1516 1517 return 0; 1518 } 1519 1520 /* 1521 * Register the indirect data buffer descriptor with the HCA. 1522 * 1523 * Note: since the indirect data buffer descriptor has been allocated with 1524 * kmalloc() it is guaranteed that this buffer is a physically contiguous 1525 * memory buffer. 1526 */ 1527 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, 1528 void **next_mr, void **end_mr, u32 idb_len, 1529 __be32 *idb_rkey) 1530 { 1531 struct srp_target_port *target = ch->target; 1532 struct srp_device *dev = target->srp_host->srp_dev; 1533 struct srp_map_state state; 1534 struct srp_direct_buf idb_desc; 1535 u64 idb_pages[1]; 1536 struct scatterlist idb_sg[1]; 1537 int ret; 1538 1539 memset(&state, 0, sizeof(state)); 1540 memset(&idb_desc, 0, sizeof(idb_desc)); 1541 state.gen.next = next_mr; 1542 state.gen.end = end_mr; 1543 state.desc = &idb_desc; 1544 state.base_dma_addr = req->indirect_dma_addr; 1545 state.dma_len = idb_len; 1546 1547 if (dev->use_fast_reg) { 1548 state.sg = idb_sg; 1549 sg_init_one(idb_sg, req->indirect_desc, idb_len); 1550 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1551 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1552 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1553 #endif 1554 ret = srp_map_finish_fr(&state, req, ch, 1, NULL); 1555 if (ret < 0) 1556 return ret; 1557 WARN_ON_ONCE(ret < 1); 1558 } else if (dev->use_fmr) { 1559 state.pages = idb_pages; 1560 state.pages[0] = (req->indirect_dma_addr & 1561 dev->mr_page_mask); 1562 state.npages = 1; 1563 ret = srp_map_finish_fmr(&state, ch); 1564 if (ret < 0) 1565 return ret; 1566 } else { 1567 return -EINVAL; 1568 } 1569 1570 *idb_rkey = idb_desc.key; 1571 1572 return 0; 1573 } 1574 1575 static void srp_check_mapping(struct srp_map_state *state, 1576 struct srp_rdma_ch *ch, struct srp_request *req, 1577 struct scatterlist *scat, int count) 1578 { 1579 struct srp_device *dev = ch->target->srp_host->srp_dev; 1580 struct srp_fr_desc **pfr; 1581 u64 desc_len = 0, mr_len = 0; 1582 int i; 1583 1584 for (i = 0; i < state->ndesc; i++) 1585 desc_len += be32_to_cpu(req->indirect_desc[i].len); 1586 if (dev->use_fast_reg) 1587 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) 1588 mr_len += (*pfr)->mr->length; 1589 else if (dev->use_fmr) 1590 for (i = 0; i < state->nmdesc; i++) 1591 mr_len += be32_to_cpu(req->indirect_desc[i].len); 1592 if (desc_len != scsi_bufflen(req->scmnd) || 1593 mr_len > scsi_bufflen(req->scmnd)) 1594 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", 1595 scsi_bufflen(req->scmnd), desc_len, mr_len, 1596 state->ndesc, state->nmdesc); 1597 } 1598 1599 /** 1600 * srp_map_data() - map SCSI data buffer onto an SRP request 1601 * @scmnd: SCSI command to map 1602 * @ch: SRP RDMA channel 1603 * @req: SRP request 1604 * 1605 * Returns the length in bytes of the SRP_CMD IU or a negative value if 1606 * mapping failed. 1607 */ 1608 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1609 struct srp_request *req) 1610 { 1611 struct srp_target_port *target = ch->target; 1612 struct ib_pd *pd = target->pd; 1613 struct scatterlist *scat; 1614 struct srp_cmd *cmd = req->cmd->buf; 1615 int len, nents, count, ret; 1616 struct srp_device *dev; 1617 struct ib_device *ibdev; 1618 struct srp_map_state state; 1619 struct srp_indirect_buf *indirect_hdr; 1620 u32 idb_len, table_len; 1621 __be32 idb_rkey; 1622 u8 fmt; 1623 1624 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) 1625 return sizeof (struct srp_cmd); 1626 1627 if (scmnd->sc_data_direction != DMA_FROM_DEVICE && 1628 scmnd->sc_data_direction != DMA_TO_DEVICE) { 1629 shost_printk(KERN_WARNING, target->scsi_host, 1630 PFX "Unhandled data direction %d\n", 1631 scmnd->sc_data_direction); 1632 return -EINVAL; 1633 } 1634 1635 nents = scsi_sg_count(scmnd); 1636 scat = scsi_sglist(scmnd); 1637 1638 dev = target->srp_host->srp_dev; 1639 ibdev = dev->dev; 1640 1641 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); 1642 if (unlikely(count == 0)) 1643 return -EIO; 1644 1645 fmt = SRP_DATA_DESC_DIRECT; 1646 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); 1647 1648 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1649 /* 1650 * The midlayer only generated a single gather/scatter 1651 * entry, or DMA mapping coalesced everything to a 1652 * single entry. So a direct descriptor along with 1653 * the DMA MR suffices. 1654 */ 1655 struct srp_direct_buf *buf = (void *) cmd->add_data; 1656 1657 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); 1658 buf->key = cpu_to_be32(pd->unsafe_global_rkey); 1659 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); 1660 1661 req->nmdesc = 0; 1662 goto map_complete; 1663 } 1664 1665 /* 1666 * We have more than one scatter/gather entry, so build our indirect 1667 * descriptor table, trying to merge as many entries as we can. 1668 */ 1669 indirect_hdr = (void *) cmd->add_data; 1670 1671 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, 1672 target->indirect_size, DMA_TO_DEVICE); 1673 1674 memset(&state, 0, sizeof(state)); 1675 state.desc = req->indirect_desc; 1676 if (dev->use_fast_reg) 1677 ret = srp_map_sg_fr(&state, ch, req, scat, count); 1678 else if (dev->use_fmr) 1679 ret = srp_map_sg_fmr(&state, ch, req, scat, count); 1680 else 1681 ret = srp_map_sg_dma(&state, ch, req, scat, count); 1682 req->nmdesc = state.nmdesc; 1683 if (ret < 0) 1684 goto unmap; 1685 1686 { 1687 DEFINE_DYNAMIC_DEBUG_METADATA(ddm, 1688 "Memory mapping consistency check"); 1689 if (DYNAMIC_DEBUG_BRANCH(ddm)) 1690 srp_check_mapping(&state, ch, req, scat, count); 1691 } 1692 1693 /* We've mapped the request, now pull as much of the indirect 1694 * descriptor table as we can into the command buffer. If this 1695 * target is not using an external indirect table, we are 1696 * guaranteed to fit into the command, as the SCSI layer won't 1697 * give us more S/G entries than we allow. 1698 */ 1699 if (state.ndesc == 1) { 1700 /* 1701 * Memory registration collapsed the sg-list into one entry, 1702 * so use a direct descriptor. 1703 */ 1704 struct srp_direct_buf *buf = (void *) cmd->add_data; 1705 1706 *buf = req->indirect_desc[0]; 1707 goto map_complete; 1708 } 1709 1710 if (unlikely(target->cmd_sg_cnt < state.ndesc && 1711 !target->allow_ext_sg)) { 1712 shost_printk(KERN_ERR, target->scsi_host, 1713 "Could not fit S/G list into SRP_CMD\n"); 1714 ret = -EIO; 1715 goto unmap; 1716 } 1717 1718 count = min(state.ndesc, target->cmd_sg_cnt); 1719 table_len = state.ndesc * sizeof (struct srp_direct_buf); 1720 idb_len = sizeof(struct srp_indirect_buf) + table_len; 1721 1722 fmt = SRP_DATA_DESC_INDIRECT; 1723 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); 1724 len += count * sizeof (struct srp_direct_buf); 1725 1726 memcpy(indirect_hdr->desc_list, req->indirect_desc, 1727 count * sizeof (struct srp_direct_buf)); 1728 1729 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1730 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1731 idb_len, &idb_rkey); 1732 if (ret < 0) 1733 goto unmap; 1734 req->nmdesc++; 1735 } else { 1736 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey); 1737 } 1738 1739 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); 1740 indirect_hdr->table_desc.key = idb_rkey; 1741 indirect_hdr->table_desc.len = cpu_to_be32(table_len); 1742 indirect_hdr->len = cpu_to_be32(state.total_len); 1743 1744 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1745 cmd->data_out_desc_cnt = count; 1746 else 1747 cmd->data_in_desc_cnt = count; 1748 1749 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, 1750 DMA_TO_DEVICE); 1751 1752 map_complete: 1753 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1754 cmd->buf_fmt = fmt << 4; 1755 else 1756 cmd->buf_fmt = fmt; 1757 1758 return len; 1759 1760 unmap: 1761 srp_unmap_data(scmnd, ch, req); 1762 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) 1763 ret = -E2BIG; 1764 return ret; 1765 } 1766 1767 /* 1768 * Return an IU and possible credit to the free pool 1769 */ 1770 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu, 1771 enum srp_iu_type iu_type) 1772 { 1773 unsigned long flags; 1774 1775 spin_lock_irqsave(&ch->lock, flags); 1776 list_add(&iu->list, &ch->free_tx); 1777 if (iu_type != SRP_IU_RSP) 1778 ++ch->req_lim; 1779 spin_unlock_irqrestore(&ch->lock, flags); 1780 } 1781 1782 /* 1783 * Must be called with ch->lock held to protect req_lim and free_tx. 1784 * If IU is not sent, it must be returned using srp_put_tx_iu(). 1785 * 1786 * Note: 1787 * An upper limit for the number of allocated information units for each 1788 * request type is: 1789 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues 1790 * more than Scsi_Host.can_queue requests. 1791 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. 1792 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than 1793 * one unanswered SRP request to an initiator. 1794 */ 1795 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, 1796 enum srp_iu_type iu_type) 1797 { 1798 struct srp_target_port *target = ch->target; 1799 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 1800 struct srp_iu *iu; 1801 1802 ib_process_cq_direct(ch->send_cq, -1); 1803 1804 if (list_empty(&ch->free_tx)) 1805 return NULL; 1806 1807 /* Initiator responses to target requests do not consume credits */ 1808 if (iu_type != SRP_IU_RSP) { 1809 if (ch->req_lim <= rsv) { 1810 ++target->zero_req_lim; 1811 return NULL; 1812 } 1813 1814 --ch->req_lim; 1815 } 1816 1817 iu = list_first_entry(&ch->free_tx, struct srp_iu, list); 1818 list_del(&iu->list); 1819 return iu; 1820 } 1821 1822 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) 1823 { 1824 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 1825 struct srp_rdma_ch *ch = cq->cq_context; 1826 1827 if (unlikely(wc->status != IB_WC_SUCCESS)) { 1828 srp_handle_qp_err(cq, wc, "SEND"); 1829 return; 1830 } 1831 1832 list_add(&iu->list, &ch->free_tx); 1833 } 1834 1835 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 1836 { 1837 struct srp_target_port *target = ch->target; 1838 struct ib_sge list; 1839 struct ib_send_wr wr, *bad_wr; 1840 1841 list.addr = iu->dma; 1842 list.length = len; 1843 list.lkey = target->lkey; 1844 1845 iu->cqe.done = srp_send_done; 1846 1847 wr.next = NULL; 1848 wr.wr_cqe = &iu->cqe; 1849 wr.sg_list = &list; 1850 wr.num_sge = 1; 1851 wr.opcode = IB_WR_SEND; 1852 wr.send_flags = IB_SEND_SIGNALED; 1853 1854 return ib_post_send(ch->qp, &wr, &bad_wr); 1855 } 1856 1857 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) 1858 { 1859 struct srp_target_port *target = ch->target; 1860 struct ib_recv_wr wr, *bad_wr; 1861 struct ib_sge list; 1862 1863 list.addr = iu->dma; 1864 list.length = iu->size; 1865 list.lkey = target->lkey; 1866 1867 iu->cqe.done = srp_recv_done; 1868 1869 wr.next = NULL; 1870 wr.wr_cqe = &iu->cqe; 1871 wr.sg_list = &list; 1872 wr.num_sge = 1; 1873 1874 return ib_post_recv(ch->qp, &wr, &bad_wr); 1875 } 1876 1877 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) 1878 { 1879 struct srp_target_port *target = ch->target; 1880 struct srp_request *req; 1881 struct scsi_cmnd *scmnd; 1882 unsigned long flags; 1883 1884 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { 1885 spin_lock_irqsave(&ch->lock, flags); 1886 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 1887 spin_unlock_irqrestore(&ch->lock, flags); 1888 1889 ch->tsk_mgmt_status = -1; 1890 if (be32_to_cpu(rsp->resp_data_len) >= 4) 1891 ch->tsk_mgmt_status = rsp->data[3]; 1892 complete(&ch->tsk_mgmt_done); 1893 } else { 1894 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); 1895 if (scmnd) { 1896 req = (void *)scmnd->host_scribble; 1897 scmnd = srp_claim_req(ch, req, NULL, scmnd); 1898 } 1899 if (!scmnd) { 1900 shost_printk(KERN_ERR, target->scsi_host, 1901 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", 1902 rsp->tag, ch - target->ch, ch->qp->qp_num); 1903 1904 spin_lock_irqsave(&ch->lock, flags); 1905 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 1906 spin_unlock_irqrestore(&ch->lock, flags); 1907 1908 return; 1909 } 1910 scmnd->result = rsp->status; 1911 1912 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 1913 memcpy(scmnd->sense_buffer, rsp->data + 1914 be32_to_cpu(rsp->resp_data_len), 1915 min_t(int, be32_to_cpu(rsp->sense_data_len), 1916 SCSI_SENSE_BUFFERSIZE)); 1917 } 1918 1919 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) 1920 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 1921 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) 1922 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); 1923 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) 1924 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); 1925 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) 1926 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); 1927 1928 srp_free_req(ch, req, scmnd, 1929 be32_to_cpu(rsp->req_lim_delta)); 1930 1931 scmnd->host_scribble = NULL; 1932 scmnd->scsi_done(scmnd); 1933 } 1934 } 1935 1936 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, 1937 void *rsp, int len) 1938 { 1939 struct srp_target_port *target = ch->target; 1940 struct ib_device *dev = target->srp_host->srp_dev->dev; 1941 unsigned long flags; 1942 struct srp_iu *iu; 1943 int err; 1944 1945 spin_lock_irqsave(&ch->lock, flags); 1946 ch->req_lim += req_delta; 1947 iu = __srp_get_tx_iu(ch, SRP_IU_RSP); 1948 spin_unlock_irqrestore(&ch->lock, flags); 1949 1950 if (!iu) { 1951 shost_printk(KERN_ERR, target->scsi_host, PFX 1952 "no IU available to send response\n"); 1953 return 1; 1954 } 1955 1956 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); 1957 memcpy(iu->buf, rsp, len); 1958 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); 1959 1960 err = srp_post_send(ch, iu, len); 1961 if (err) { 1962 shost_printk(KERN_ERR, target->scsi_host, PFX 1963 "unable to post response: %d\n", err); 1964 srp_put_tx_iu(ch, iu, SRP_IU_RSP); 1965 } 1966 1967 return err; 1968 } 1969 1970 static void srp_process_cred_req(struct srp_rdma_ch *ch, 1971 struct srp_cred_req *req) 1972 { 1973 struct srp_cred_rsp rsp = { 1974 .opcode = SRP_CRED_RSP, 1975 .tag = req->tag, 1976 }; 1977 s32 delta = be32_to_cpu(req->req_lim_delta); 1978 1979 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 1980 shost_printk(KERN_ERR, ch->target->scsi_host, PFX 1981 "problems processing SRP_CRED_REQ\n"); 1982 } 1983 1984 static void srp_process_aer_req(struct srp_rdma_ch *ch, 1985 struct srp_aer_req *req) 1986 { 1987 struct srp_target_port *target = ch->target; 1988 struct srp_aer_rsp rsp = { 1989 .opcode = SRP_AER_RSP, 1990 .tag = req->tag, 1991 }; 1992 s32 delta = be32_to_cpu(req->req_lim_delta); 1993 1994 shost_printk(KERN_ERR, target->scsi_host, PFX 1995 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); 1996 1997 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 1998 shost_printk(KERN_ERR, target->scsi_host, PFX 1999 "problems processing SRP_AER_REQ\n"); 2000 } 2001 2002 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) 2003 { 2004 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2005 struct srp_rdma_ch *ch = cq->cq_context; 2006 struct srp_target_port *target = ch->target; 2007 struct ib_device *dev = target->srp_host->srp_dev->dev; 2008 int res; 2009 u8 opcode; 2010 2011 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2012 srp_handle_qp_err(cq, wc, "RECV"); 2013 return; 2014 } 2015 2016 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 2017 DMA_FROM_DEVICE); 2018 2019 opcode = *(u8 *) iu->buf; 2020 2021 if (0) { 2022 shost_printk(KERN_ERR, target->scsi_host, 2023 PFX "recv completion, opcode 0x%02x\n", opcode); 2024 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, 2025 iu->buf, wc->byte_len, true); 2026 } 2027 2028 switch (opcode) { 2029 case SRP_RSP: 2030 srp_process_rsp(ch, iu->buf); 2031 break; 2032 2033 case SRP_CRED_REQ: 2034 srp_process_cred_req(ch, iu->buf); 2035 break; 2036 2037 case SRP_AER_REQ: 2038 srp_process_aer_req(ch, iu->buf); 2039 break; 2040 2041 case SRP_T_LOGOUT: 2042 /* XXX Handle target logout */ 2043 shost_printk(KERN_WARNING, target->scsi_host, 2044 PFX "Got target logout request\n"); 2045 break; 2046 2047 default: 2048 shost_printk(KERN_WARNING, target->scsi_host, 2049 PFX "Unhandled SRP opcode 0x%02x\n", opcode); 2050 break; 2051 } 2052 2053 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len, 2054 DMA_FROM_DEVICE); 2055 2056 res = srp_post_recv(ch, iu); 2057 if (res != 0) 2058 shost_printk(KERN_ERR, target->scsi_host, 2059 PFX "Recv failed with error code %d\n", res); 2060 } 2061 2062 /** 2063 * srp_tl_err_work() - handle a transport layer error 2064 * @work: Work structure embedded in an SRP target port. 2065 * 2066 * Note: This function may get invoked before the rport has been created, 2067 * hence the target->rport test. 2068 */ 2069 static void srp_tl_err_work(struct work_struct *work) 2070 { 2071 struct srp_target_port *target; 2072 2073 target = container_of(work, struct srp_target_port, tl_err_work); 2074 if (target->rport) 2075 srp_start_tl_fail_timers(target->rport); 2076 } 2077 2078 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 2079 const char *opname) 2080 { 2081 struct srp_rdma_ch *ch = cq->cq_context; 2082 struct srp_target_port *target = ch->target; 2083 2084 if (ch->connected && !target->qp_in_error) { 2085 shost_printk(KERN_ERR, target->scsi_host, 2086 PFX "failed %s status %s (%d) for CQE %p\n", 2087 opname, ib_wc_status_msg(wc->status), wc->status, 2088 wc->wr_cqe); 2089 queue_work(system_long_wq, &target->tl_err_work); 2090 } 2091 target->qp_in_error = true; 2092 } 2093 2094 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2095 { 2096 struct srp_target_port *target = host_to_target(shost); 2097 struct srp_rport *rport = target->rport; 2098 struct srp_rdma_ch *ch; 2099 struct srp_request *req; 2100 struct srp_iu *iu; 2101 struct srp_cmd *cmd; 2102 struct ib_device *dev; 2103 unsigned long flags; 2104 u32 tag; 2105 u16 idx; 2106 int len, ret; 2107 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; 2108 2109 /* 2110 * The SCSI EH thread is the only context from which srp_queuecommand() 2111 * can get invoked for blocked devices (SDEV_BLOCK / 2112 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by 2113 * locking the rport mutex if invoked from inside the SCSI EH. 2114 */ 2115 if (in_scsi_eh) 2116 mutex_lock(&rport->mutex); 2117 2118 scmnd->result = srp_chkready(target->rport); 2119 if (unlikely(scmnd->result)) 2120 goto err; 2121 2122 WARN_ON_ONCE(scmnd->request->tag < 0); 2123 tag = blk_mq_unique_tag(scmnd->request); 2124 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; 2125 idx = blk_mq_unique_tag_to_tag(tag); 2126 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", 2127 dev_name(&shost->shost_gendev), tag, idx, 2128 target->req_ring_size); 2129 2130 spin_lock_irqsave(&ch->lock, flags); 2131 iu = __srp_get_tx_iu(ch, SRP_IU_CMD); 2132 spin_unlock_irqrestore(&ch->lock, flags); 2133 2134 if (!iu) 2135 goto err; 2136 2137 req = &ch->req_ring[idx]; 2138 dev = target->srp_host->srp_dev->dev; 2139 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, 2140 DMA_TO_DEVICE); 2141 2142 scmnd->host_scribble = (void *) req; 2143 2144 cmd = iu->buf; 2145 memset(cmd, 0, sizeof *cmd); 2146 2147 cmd->opcode = SRP_CMD; 2148 int_to_scsilun(scmnd->device->lun, &cmd->lun); 2149 cmd->tag = tag; 2150 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); 2151 2152 req->scmnd = scmnd; 2153 req->cmd = iu; 2154 2155 len = srp_map_data(scmnd, ch, req); 2156 if (len < 0) { 2157 shost_printk(KERN_ERR, target->scsi_host, 2158 PFX "Failed to map data (%d)\n", len); 2159 /* 2160 * If we ran out of memory descriptors (-ENOMEM) because an 2161 * application is queuing many requests with more than 2162 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer 2163 * to reduce queue depth temporarily. 2164 */ 2165 scmnd->result = len == -ENOMEM ? 2166 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; 2167 goto err_iu; 2168 } 2169 2170 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, 2171 DMA_TO_DEVICE); 2172 2173 if (srp_post_send(ch, iu, len)) { 2174 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); 2175 goto err_unmap; 2176 } 2177 2178 ret = 0; 2179 2180 unlock_rport: 2181 if (in_scsi_eh) 2182 mutex_unlock(&rport->mutex); 2183 2184 return ret; 2185 2186 err_unmap: 2187 srp_unmap_data(scmnd, ch, req); 2188 2189 err_iu: 2190 srp_put_tx_iu(ch, iu, SRP_IU_CMD); 2191 2192 /* 2193 * Avoid that the loops that iterate over the request ring can 2194 * encounter a dangling SCSI command pointer. 2195 */ 2196 req->scmnd = NULL; 2197 2198 err: 2199 if (scmnd->result) { 2200 scmnd->scsi_done(scmnd); 2201 ret = 0; 2202 } else { 2203 ret = SCSI_MLQUEUE_HOST_BUSY; 2204 } 2205 2206 goto unlock_rport; 2207 } 2208 2209 /* 2210 * Note: the resources allocated in this function are freed in 2211 * srp_free_ch_ib(). 2212 */ 2213 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) 2214 { 2215 struct srp_target_port *target = ch->target; 2216 int i; 2217 2218 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring), 2219 GFP_KERNEL); 2220 if (!ch->rx_ring) 2221 goto err_no_ring; 2222 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring), 2223 GFP_KERNEL); 2224 if (!ch->tx_ring) 2225 goto err_no_ring; 2226 2227 for (i = 0; i < target->queue_size; ++i) { 2228 ch->rx_ring[i] = srp_alloc_iu(target->srp_host, 2229 ch->max_ti_iu_len, 2230 GFP_KERNEL, DMA_FROM_DEVICE); 2231 if (!ch->rx_ring[i]) 2232 goto err; 2233 } 2234 2235 for (i = 0; i < target->queue_size; ++i) { 2236 ch->tx_ring[i] = srp_alloc_iu(target->srp_host, 2237 target->max_iu_len, 2238 GFP_KERNEL, DMA_TO_DEVICE); 2239 if (!ch->tx_ring[i]) 2240 goto err; 2241 2242 list_add(&ch->tx_ring[i]->list, &ch->free_tx); 2243 } 2244 2245 return 0; 2246 2247 err: 2248 for (i = 0; i < target->queue_size; ++i) { 2249 srp_free_iu(target->srp_host, ch->rx_ring[i]); 2250 srp_free_iu(target->srp_host, ch->tx_ring[i]); 2251 } 2252 2253 2254 err_no_ring: 2255 kfree(ch->tx_ring); 2256 ch->tx_ring = NULL; 2257 kfree(ch->rx_ring); 2258 ch->rx_ring = NULL; 2259 2260 return -ENOMEM; 2261 } 2262 2263 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) 2264 { 2265 uint64_t T_tr_ns, max_compl_time_ms; 2266 uint32_t rq_tmo_jiffies; 2267 2268 /* 2269 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, 2270 * table 91), both the QP timeout and the retry count have to be set 2271 * for RC QP's during the RTR to RTS transition. 2272 */ 2273 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != 2274 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); 2275 2276 /* 2277 * Set target->rq_tmo_jiffies to one second more than the largest time 2278 * it can take before an error completion is generated. See also 2279 * C9-140..142 in the IBTA spec for more information about how to 2280 * convert the QP Local ACK Timeout value to nanoseconds. 2281 */ 2282 T_tr_ns = 4096 * (1ULL << qp_attr->timeout); 2283 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; 2284 do_div(max_compl_time_ms, NSEC_PER_MSEC); 2285 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); 2286 2287 return rq_tmo_jiffies; 2288 } 2289 2290 static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 2291 const struct srp_login_rsp *lrsp, 2292 struct srp_rdma_ch *ch) 2293 { 2294 struct srp_target_port *target = ch->target; 2295 struct ib_qp_attr *qp_attr = NULL; 2296 int attr_mask = 0; 2297 int ret; 2298 int i; 2299 2300 if (lrsp->opcode == SRP_LOGIN_RSP) { 2301 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); 2302 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); 2303 2304 /* 2305 * Reserve credits for task management so we don't 2306 * bounce requests back to the SCSI mid-layer. 2307 */ 2308 target->scsi_host->can_queue 2309 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE, 2310 target->scsi_host->can_queue); 2311 target->scsi_host->cmd_per_lun 2312 = min_t(int, target->scsi_host->can_queue, 2313 target->scsi_host->cmd_per_lun); 2314 } else { 2315 shost_printk(KERN_WARNING, target->scsi_host, 2316 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 2317 ret = -ECONNRESET; 2318 goto error; 2319 } 2320 2321 if (!ch->rx_ring) { 2322 ret = srp_alloc_iu_bufs(ch); 2323 if (ret) 2324 goto error; 2325 } 2326 2327 ret = -ENOMEM; 2328 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); 2329 if (!qp_attr) 2330 goto error; 2331 2332 qp_attr->qp_state = IB_QPS_RTR; 2333 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2334 if (ret) 2335 goto error_free; 2336 2337 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2338 if (ret) 2339 goto error_free; 2340 2341 for (i = 0; i < target->queue_size; i++) { 2342 struct srp_iu *iu = ch->rx_ring[i]; 2343 2344 ret = srp_post_recv(ch, iu); 2345 if (ret) 2346 goto error_free; 2347 } 2348 2349 qp_attr->qp_state = IB_QPS_RTS; 2350 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2351 if (ret) 2352 goto error_free; 2353 2354 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); 2355 2356 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2357 if (ret) 2358 goto error_free; 2359 2360 ret = ib_send_cm_rtu(cm_id, NULL, 0); 2361 2362 error_free: 2363 kfree(qp_attr); 2364 2365 error: 2366 ch->status = ret; 2367 } 2368 2369 static void srp_cm_rej_handler(struct ib_cm_id *cm_id, 2370 struct ib_cm_event *event, 2371 struct srp_rdma_ch *ch) 2372 { 2373 struct srp_target_port *target = ch->target; 2374 struct Scsi_Host *shost = target->scsi_host; 2375 struct ib_class_port_info *cpi; 2376 int opcode; 2377 2378 switch (event->param.rej_rcvd.reason) { 2379 case IB_CM_REJ_PORT_CM_REDIRECT: 2380 cpi = event->param.rej_rcvd.ari; 2381 ch->path.dlid = cpi->redirect_lid; 2382 ch->path.pkey = cpi->redirect_pkey; 2383 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; 2384 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); 2385 2386 ch->status = ch->path.dlid ? 2387 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; 2388 break; 2389 2390 case IB_CM_REJ_PORT_REDIRECT: 2391 if (srp_target_is_topspin(target)) { 2392 /* 2393 * Topspin/Cisco SRP gateways incorrectly send 2394 * reject reason code 25 when they mean 24 2395 * (port redirect). 2396 */ 2397 memcpy(ch->path.dgid.raw, 2398 event->param.rej_rcvd.ari, 16); 2399 2400 shost_printk(KERN_DEBUG, shost, 2401 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 2402 be64_to_cpu(ch->path.dgid.global.subnet_prefix), 2403 be64_to_cpu(ch->path.dgid.global.interface_id)); 2404 2405 ch->status = SRP_PORT_REDIRECT; 2406 } else { 2407 shost_printk(KERN_WARNING, shost, 2408 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); 2409 ch->status = -ECONNRESET; 2410 } 2411 break; 2412 2413 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2414 shost_printk(KERN_WARNING, shost, 2415 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2416 ch->status = -ECONNRESET; 2417 break; 2418 2419 case IB_CM_REJ_CONSUMER_DEFINED: 2420 opcode = *(u8 *) event->private_data; 2421 if (opcode == SRP_LOGIN_REJ) { 2422 struct srp_login_rej *rej = event->private_data; 2423 u32 reason = be32_to_cpu(rej->reason); 2424 2425 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2426 shost_printk(KERN_WARNING, shost, 2427 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2428 else 2429 shost_printk(KERN_WARNING, shost, PFX 2430 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", 2431 target->sgid.raw, 2432 target->orig_dgid.raw, reason); 2433 } else 2434 shost_printk(KERN_WARNING, shost, 2435 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 2436 " opcode 0x%02x\n", opcode); 2437 ch->status = -ECONNRESET; 2438 break; 2439 2440 case IB_CM_REJ_STALE_CONN: 2441 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); 2442 ch->status = SRP_STALE_CONN; 2443 break; 2444 2445 default: 2446 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2447 event->param.rej_rcvd.reason); 2448 ch->status = -ECONNRESET; 2449 } 2450 } 2451 2452 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 2453 { 2454 struct srp_rdma_ch *ch = cm_id->context; 2455 struct srp_target_port *target = ch->target; 2456 int comp = 0; 2457 2458 switch (event->event) { 2459 case IB_CM_REQ_ERROR: 2460 shost_printk(KERN_DEBUG, target->scsi_host, 2461 PFX "Sending CM REQ failed\n"); 2462 comp = 1; 2463 ch->status = -ECONNRESET; 2464 break; 2465 2466 case IB_CM_REP_RECEIVED: 2467 comp = 1; 2468 srp_cm_rep_handler(cm_id, event->private_data, ch); 2469 break; 2470 2471 case IB_CM_REJ_RECEIVED: 2472 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2473 comp = 1; 2474 2475 srp_cm_rej_handler(cm_id, event, ch); 2476 break; 2477 2478 case IB_CM_DREQ_RECEIVED: 2479 shost_printk(KERN_WARNING, target->scsi_host, 2480 PFX "DREQ received - connection closed\n"); 2481 ch->connected = false; 2482 if (ib_send_cm_drep(cm_id, NULL, 0)) 2483 shost_printk(KERN_ERR, target->scsi_host, 2484 PFX "Sending CM DREP failed\n"); 2485 queue_work(system_long_wq, &target->tl_err_work); 2486 break; 2487 2488 case IB_CM_TIMEWAIT_EXIT: 2489 shost_printk(KERN_ERR, target->scsi_host, 2490 PFX "connection closed\n"); 2491 comp = 1; 2492 2493 ch->status = 0; 2494 break; 2495 2496 case IB_CM_MRA_RECEIVED: 2497 case IB_CM_DREQ_ERROR: 2498 case IB_CM_DREP_RECEIVED: 2499 break; 2500 2501 default: 2502 shost_printk(KERN_WARNING, target->scsi_host, 2503 PFX "Unhandled CM event %d\n", event->event); 2504 break; 2505 } 2506 2507 if (comp) 2508 complete(&ch->done); 2509 2510 return 0; 2511 } 2512 2513 /** 2514 * srp_change_queue_depth - setting device queue depth 2515 * @sdev: scsi device struct 2516 * @qdepth: requested queue depth 2517 * 2518 * Returns queue depth. 2519 */ 2520 static int 2521 srp_change_queue_depth(struct scsi_device *sdev, int qdepth) 2522 { 2523 if (!sdev->tagged_supported) 2524 qdepth = 1; 2525 return scsi_change_queue_depth(sdev, qdepth); 2526 } 2527 2528 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, 2529 u8 func) 2530 { 2531 struct srp_target_port *target = ch->target; 2532 struct srp_rport *rport = target->rport; 2533 struct ib_device *dev = target->srp_host->srp_dev->dev; 2534 struct srp_iu *iu; 2535 struct srp_tsk_mgmt *tsk_mgmt; 2536 2537 if (!ch->connected || target->qp_in_error) 2538 return -1; 2539 2540 init_completion(&ch->tsk_mgmt_done); 2541 2542 /* 2543 * Lock the rport mutex to avoid that srp_create_ch_ib() is 2544 * invoked while a task management function is being sent. 2545 */ 2546 mutex_lock(&rport->mutex); 2547 spin_lock_irq(&ch->lock); 2548 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT); 2549 spin_unlock_irq(&ch->lock); 2550 2551 if (!iu) { 2552 mutex_unlock(&rport->mutex); 2553 2554 return -1; 2555 } 2556 2557 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 2558 DMA_TO_DEVICE); 2559 tsk_mgmt = iu->buf; 2560 memset(tsk_mgmt, 0, sizeof *tsk_mgmt); 2561 2562 tsk_mgmt->opcode = SRP_TSK_MGMT; 2563 int_to_scsilun(lun, &tsk_mgmt->lun); 2564 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT; 2565 tsk_mgmt->tsk_mgmt_func = func; 2566 tsk_mgmt->task_tag = req_tag; 2567 2568 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, 2569 DMA_TO_DEVICE); 2570 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { 2571 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT); 2572 mutex_unlock(&rport->mutex); 2573 2574 return -1; 2575 } 2576 mutex_unlock(&rport->mutex); 2577 2578 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done, 2579 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) 2580 return -1; 2581 2582 return 0; 2583 } 2584 2585 static int srp_abort(struct scsi_cmnd *scmnd) 2586 { 2587 struct srp_target_port *target = host_to_target(scmnd->device->host); 2588 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 2589 u32 tag; 2590 u16 ch_idx; 2591 struct srp_rdma_ch *ch; 2592 int ret; 2593 2594 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 2595 2596 if (!req) 2597 return SUCCESS; 2598 tag = blk_mq_unique_tag(scmnd->request); 2599 ch_idx = blk_mq_unique_tag_to_hwq(tag); 2600 if (WARN_ON_ONCE(ch_idx >= target->ch_count)) 2601 return SUCCESS; 2602 ch = &target->ch[ch_idx]; 2603 if (!srp_claim_req(ch, req, NULL, scmnd)) 2604 return SUCCESS; 2605 shost_printk(KERN_ERR, target->scsi_host, 2606 "Sending SRP abort for tag %#x\n", tag); 2607 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, 2608 SRP_TSK_ABORT_TASK) == 0) 2609 ret = SUCCESS; 2610 else if (target->rport->state == SRP_RPORT_LOST) 2611 ret = FAST_IO_FAIL; 2612 else 2613 ret = FAILED; 2614 srp_free_req(ch, req, scmnd, 0); 2615 scmnd->result = DID_ABORT << 16; 2616 scmnd->scsi_done(scmnd); 2617 2618 return ret; 2619 } 2620 2621 static int srp_reset_device(struct scsi_cmnd *scmnd) 2622 { 2623 struct srp_target_port *target = host_to_target(scmnd->device->host); 2624 struct srp_rdma_ch *ch; 2625 int i; 2626 2627 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); 2628 2629 ch = &target->ch[0]; 2630 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, 2631 SRP_TSK_LUN_RESET)) 2632 return FAILED; 2633 if (ch->tsk_mgmt_status) 2634 return FAILED; 2635 2636 for (i = 0; i < target->ch_count; i++) { 2637 ch = &target->ch[i]; 2638 for (i = 0; i < target->req_ring_size; ++i) { 2639 struct srp_request *req = &ch->req_ring[i]; 2640 2641 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); 2642 } 2643 } 2644 2645 return SUCCESS; 2646 } 2647 2648 static int srp_reset_host(struct scsi_cmnd *scmnd) 2649 { 2650 struct srp_target_port *target = host_to_target(scmnd->device->host); 2651 2652 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 2653 2654 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 2655 } 2656 2657 static int srp_slave_alloc(struct scsi_device *sdev) 2658 { 2659 struct Scsi_Host *shost = sdev->host; 2660 struct srp_target_port *target = host_to_target(shost); 2661 struct srp_device *srp_dev = target->srp_host->srp_dev; 2662 struct ib_device *ibdev = srp_dev->dev; 2663 2664 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 2665 blk_queue_virt_boundary(sdev->request_queue, 2666 ~srp_dev->mr_page_mask); 2667 2668 return 0; 2669 } 2670 2671 static int srp_slave_configure(struct scsi_device *sdev) 2672 { 2673 struct Scsi_Host *shost = sdev->host; 2674 struct srp_target_port *target = host_to_target(shost); 2675 struct request_queue *q = sdev->request_queue; 2676 unsigned long timeout; 2677 2678 if (sdev->type == TYPE_DISK) { 2679 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); 2680 blk_queue_rq_timeout(q, timeout); 2681 } 2682 2683 return 0; 2684 } 2685 2686 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 2687 char *buf) 2688 { 2689 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2690 2691 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 2692 } 2693 2694 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, 2695 char *buf) 2696 { 2697 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2698 2699 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 2700 } 2701 2702 static ssize_t show_service_id(struct device *dev, 2703 struct device_attribute *attr, char *buf) 2704 { 2705 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2706 2707 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id)); 2708 } 2709 2710 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, 2711 char *buf) 2712 { 2713 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2714 2715 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey)); 2716 } 2717 2718 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, 2719 char *buf) 2720 { 2721 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2722 2723 return sprintf(buf, "%pI6\n", target->sgid.raw); 2724 } 2725 2726 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 2727 char *buf) 2728 { 2729 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2730 struct srp_rdma_ch *ch = &target->ch[0]; 2731 2732 return sprintf(buf, "%pI6\n", ch->path.dgid.raw); 2733 } 2734 2735 static ssize_t show_orig_dgid(struct device *dev, 2736 struct device_attribute *attr, char *buf) 2737 { 2738 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2739 2740 return sprintf(buf, "%pI6\n", target->orig_dgid.raw); 2741 } 2742 2743 static ssize_t show_req_lim(struct device *dev, 2744 struct device_attribute *attr, char *buf) 2745 { 2746 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2747 struct srp_rdma_ch *ch; 2748 int i, req_lim = INT_MAX; 2749 2750 for (i = 0; i < target->ch_count; i++) { 2751 ch = &target->ch[i]; 2752 req_lim = min(req_lim, ch->req_lim); 2753 } 2754 return sprintf(buf, "%d\n", req_lim); 2755 } 2756 2757 static ssize_t show_zero_req_lim(struct device *dev, 2758 struct device_attribute *attr, char *buf) 2759 { 2760 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2761 2762 return sprintf(buf, "%d\n", target->zero_req_lim); 2763 } 2764 2765 static ssize_t show_local_ib_port(struct device *dev, 2766 struct device_attribute *attr, char *buf) 2767 { 2768 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2769 2770 return sprintf(buf, "%d\n", target->srp_host->port); 2771 } 2772 2773 static ssize_t show_local_ib_device(struct device *dev, 2774 struct device_attribute *attr, char *buf) 2775 { 2776 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2777 2778 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); 2779 } 2780 2781 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, 2782 char *buf) 2783 { 2784 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2785 2786 return sprintf(buf, "%d\n", target->ch_count); 2787 } 2788 2789 static ssize_t show_comp_vector(struct device *dev, 2790 struct device_attribute *attr, char *buf) 2791 { 2792 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2793 2794 return sprintf(buf, "%d\n", target->comp_vector); 2795 } 2796 2797 static ssize_t show_tl_retry_count(struct device *dev, 2798 struct device_attribute *attr, char *buf) 2799 { 2800 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2801 2802 return sprintf(buf, "%d\n", target->tl_retry_count); 2803 } 2804 2805 static ssize_t show_cmd_sg_entries(struct device *dev, 2806 struct device_attribute *attr, char *buf) 2807 { 2808 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2809 2810 return sprintf(buf, "%u\n", target->cmd_sg_cnt); 2811 } 2812 2813 static ssize_t show_allow_ext_sg(struct device *dev, 2814 struct device_attribute *attr, char *buf) 2815 { 2816 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2817 2818 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 2819 } 2820 2821 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 2822 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 2823 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 2824 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 2825 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); 2826 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 2827 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 2828 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 2829 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 2830 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 2831 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 2832 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); 2833 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 2834 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); 2835 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 2836 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 2837 2838 static struct device_attribute *srp_host_attrs[] = { 2839 &dev_attr_id_ext, 2840 &dev_attr_ioc_guid, 2841 &dev_attr_service_id, 2842 &dev_attr_pkey, 2843 &dev_attr_sgid, 2844 &dev_attr_dgid, 2845 &dev_attr_orig_dgid, 2846 &dev_attr_req_lim, 2847 &dev_attr_zero_req_lim, 2848 &dev_attr_local_ib_port, 2849 &dev_attr_local_ib_device, 2850 &dev_attr_ch_count, 2851 &dev_attr_comp_vector, 2852 &dev_attr_tl_retry_count, 2853 &dev_attr_cmd_sg_entries, 2854 &dev_attr_allow_ext_sg, 2855 NULL 2856 }; 2857 2858 static struct scsi_host_template srp_template = { 2859 .module = THIS_MODULE, 2860 .name = "InfiniBand SRP initiator", 2861 .proc_name = DRV_NAME, 2862 .slave_alloc = srp_slave_alloc, 2863 .slave_configure = srp_slave_configure, 2864 .info = srp_target_info, 2865 .queuecommand = srp_queuecommand, 2866 .change_queue_depth = srp_change_queue_depth, 2867 .eh_abort_handler = srp_abort, 2868 .eh_device_reset_handler = srp_reset_device, 2869 .eh_host_reset_handler = srp_reset_host, 2870 .skip_settle_delay = true, 2871 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 2872 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, 2873 .this_id = -1, 2874 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, 2875 .use_clustering = ENABLE_CLUSTERING, 2876 .shost_attrs = srp_host_attrs, 2877 .track_queue_depth = 1, 2878 }; 2879 2880 static int srp_sdev_count(struct Scsi_Host *host) 2881 { 2882 struct scsi_device *sdev; 2883 int c = 0; 2884 2885 shost_for_each_device(sdev, host) 2886 c++; 2887 2888 return c; 2889 } 2890 2891 /* 2892 * Return values: 2893 * < 0 upon failure. Caller is responsible for SRP target port cleanup. 2894 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port 2895 * removal has been scheduled. 2896 * 0 and target->state != SRP_TARGET_REMOVED upon success. 2897 */ 2898 static int srp_add_target(struct srp_host *host, struct srp_target_port *target) 2899 { 2900 struct srp_rport_identifiers ids; 2901 struct srp_rport *rport; 2902 2903 target->state = SRP_TARGET_SCANNING; 2904 sprintf(target->target_name, "SRP.T10:%016llX", 2905 be64_to_cpu(target->id_ext)); 2906 2907 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device)) 2908 return -ENODEV; 2909 2910 memcpy(ids.port_id, &target->id_ext, 8); 2911 memcpy(ids.port_id + 8, &target->ioc_guid, 8); 2912 ids.roles = SRP_RPORT_ROLE_TARGET; 2913 rport = srp_rport_add(target->scsi_host, &ids); 2914 if (IS_ERR(rport)) { 2915 scsi_remove_host(target->scsi_host); 2916 return PTR_ERR(rport); 2917 } 2918 2919 rport->lld_data = target; 2920 target->rport = rport; 2921 2922 spin_lock(&host->target_lock); 2923 list_add_tail(&target->list, &host->target_list); 2924 spin_unlock(&host->target_lock); 2925 2926 scsi_scan_target(&target->scsi_host->shost_gendev, 2927 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); 2928 2929 if (srp_connected_ch(target) < target->ch_count || 2930 target->qp_in_error) { 2931 shost_printk(KERN_INFO, target->scsi_host, 2932 PFX "SCSI scan failed - removing SCSI host\n"); 2933 srp_queue_remove_work(target); 2934 goto out; 2935 } 2936 2937 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", 2938 dev_name(&target->scsi_host->shost_gendev), 2939 srp_sdev_count(target->scsi_host)); 2940 2941 spin_lock_irq(&target->lock); 2942 if (target->state == SRP_TARGET_SCANNING) 2943 target->state = SRP_TARGET_LIVE; 2944 spin_unlock_irq(&target->lock); 2945 2946 out: 2947 return 0; 2948 } 2949 2950 static void srp_release_dev(struct device *dev) 2951 { 2952 struct srp_host *host = 2953 container_of(dev, struct srp_host, dev); 2954 2955 complete(&host->released); 2956 } 2957 2958 static struct class srp_class = { 2959 .name = "infiniband_srp", 2960 .dev_release = srp_release_dev 2961 }; 2962 2963 /** 2964 * srp_conn_unique() - check whether the connection to a target is unique 2965 * @host: SRP host. 2966 * @target: SRP target port. 2967 */ 2968 static bool srp_conn_unique(struct srp_host *host, 2969 struct srp_target_port *target) 2970 { 2971 struct srp_target_port *t; 2972 bool ret = false; 2973 2974 if (target->state == SRP_TARGET_REMOVED) 2975 goto out; 2976 2977 ret = true; 2978 2979 spin_lock(&host->target_lock); 2980 list_for_each_entry(t, &host->target_list, list) { 2981 if (t != target && 2982 target->id_ext == t->id_ext && 2983 target->ioc_guid == t->ioc_guid && 2984 target->initiator_ext == t->initiator_ext) { 2985 ret = false; 2986 break; 2987 } 2988 } 2989 spin_unlock(&host->target_lock); 2990 2991 out: 2992 return ret; 2993 } 2994 2995 /* 2996 * Target ports are added by writing 2997 * 2998 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, 2999 * pkey=<P_Key>,service_id=<service ID> 3000 * 3001 * to the add_target sysfs attribute. 3002 */ 3003 enum { 3004 SRP_OPT_ERR = 0, 3005 SRP_OPT_ID_EXT = 1 << 0, 3006 SRP_OPT_IOC_GUID = 1 << 1, 3007 SRP_OPT_DGID = 1 << 2, 3008 SRP_OPT_PKEY = 1 << 3, 3009 SRP_OPT_SERVICE_ID = 1 << 4, 3010 SRP_OPT_MAX_SECT = 1 << 5, 3011 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, 3012 SRP_OPT_IO_CLASS = 1 << 7, 3013 SRP_OPT_INITIATOR_EXT = 1 << 8, 3014 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 3015 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 3016 SRP_OPT_SG_TABLESIZE = 1 << 11, 3017 SRP_OPT_COMP_VECTOR = 1 << 12, 3018 SRP_OPT_TL_RETRY_COUNT = 1 << 13, 3019 SRP_OPT_QUEUE_SIZE = 1 << 14, 3020 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 3021 SRP_OPT_IOC_GUID | 3022 SRP_OPT_DGID | 3023 SRP_OPT_PKEY | 3024 SRP_OPT_SERVICE_ID), 3025 }; 3026 3027 static const match_table_t srp_opt_tokens = { 3028 { SRP_OPT_ID_EXT, "id_ext=%s" }, 3029 { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, 3030 { SRP_OPT_DGID, "dgid=%s" }, 3031 { SRP_OPT_PKEY, "pkey=%x" }, 3032 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 3033 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 3034 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, 3035 { SRP_OPT_IO_CLASS, "io_class=%x" }, 3036 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, 3037 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 3038 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 3039 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 3040 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 3041 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, 3042 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, 3043 { SRP_OPT_ERR, NULL } 3044 }; 3045 3046 static int srp_parse_options(const char *buf, struct srp_target_port *target) 3047 { 3048 char *options, *sep_opt; 3049 char *p; 3050 char dgid[3]; 3051 substring_t args[MAX_OPT_ARGS]; 3052 int opt_mask = 0; 3053 int token; 3054 int ret = -EINVAL; 3055 int i; 3056 3057 options = kstrdup(buf, GFP_KERNEL); 3058 if (!options) 3059 return -ENOMEM; 3060 3061 sep_opt = options; 3062 while ((p = strsep(&sep_opt, ",\n")) != NULL) { 3063 if (!*p) 3064 continue; 3065 3066 token = match_token(p, srp_opt_tokens, args); 3067 opt_mask |= token; 3068 3069 switch (token) { 3070 case SRP_OPT_ID_EXT: 3071 p = match_strdup(args); 3072 if (!p) { 3073 ret = -ENOMEM; 3074 goto out; 3075 } 3076 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3077 kfree(p); 3078 break; 3079 3080 case SRP_OPT_IOC_GUID: 3081 p = match_strdup(args); 3082 if (!p) { 3083 ret = -ENOMEM; 3084 goto out; 3085 } 3086 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3087 kfree(p); 3088 break; 3089 3090 case SRP_OPT_DGID: 3091 p = match_strdup(args); 3092 if (!p) { 3093 ret = -ENOMEM; 3094 goto out; 3095 } 3096 if (strlen(p) != 32) { 3097 pr_warn("bad dest GID parameter '%s'\n", p); 3098 kfree(p); 3099 goto out; 3100 } 3101 3102 for (i = 0; i < 16; ++i) { 3103 strlcpy(dgid, p + i * 2, sizeof(dgid)); 3104 if (sscanf(dgid, "%hhx", 3105 &target->orig_dgid.raw[i]) < 1) { 3106 ret = -EINVAL; 3107 kfree(p); 3108 goto out; 3109 } 3110 } 3111 kfree(p); 3112 break; 3113 3114 case SRP_OPT_PKEY: 3115 if (match_hex(args, &token)) { 3116 pr_warn("bad P_Key parameter '%s'\n", p); 3117 goto out; 3118 } 3119 target->pkey = cpu_to_be16(token); 3120 break; 3121 3122 case SRP_OPT_SERVICE_ID: 3123 p = match_strdup(args); 3124 if (!p) { 3125 ret = -ENOMEM; 3126 goto out; 3127 } 3128 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3129 kfree(p); 3130 break; 3131 3132 case SRP_OPT_MAX_SECT: 3133 if (match_int(args, &token)) { 3134 pr_warn("bad max sect parameter '%s'\n", p); 3135 goto out; 3136 } 3137 target->scsi_host->max_sectors = token; 3138 break; 3139 3140 case SRP_OPT_QUEUE_SIZE: 3141 if (match_int(args, &token) || token < 1) { 3142 pr_warn("bad queue_size parameter '%s'\n", p); 3143 goto out; 3144 } 3145 target->scsi_host->can_queue = token; 3146 target->queue_size = token + SRP_RSP_SQ_SIZE + 3147 SRP_TSK_MGMT_SQ_SIZE; 3148 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3149 target->scsi_host->cmd_per_lun = token; 3150 break; 3151 3152 case SRP_OPT_MAX_CMD_PER_LUN: 3153 if (match_int(args, &token) || token < 1) { 3154 pr_warn("bad max cmd_per_lun parameter '%s'\n", 3155 p); 3156 goto out; 3157 } 3158 target->scsi_host->cmd_per_lun = token; 3159 break; 3160 3161 case SRP_OPT_IO_CLASS: 3162 if (match_hex(args, &token)) { 3163 pr_warn("bad IO class parameter '%s'\n", p); 3164 goto out; 3165 } 3166 if (token != SRP_REV10_IB_IO_CLASS && 3167 token != SRP_REV16A_IB_IO_CLASS) { 3168 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 3169 token, SRP_REV10_IB_IO_CLASS, 3170 SRP_REV16A_IB_IO_CLASS); 3171 goto out; 3172 } 3173 target->io_class = token; 3174 break; 3175 3176 case SRP_OPT_INITIATOR_EXT: 3177 p = match_strdup(args); 3178 if (!p) { 3179 ret = -ENOMEM; 3180 goto out; 3181 } 3182 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3183 kfree(p); 3184 break; 3185 3186 case SRP_OPT_CMD_SG_ENTRIES: 3187 if (match_int(args, &token) || token < 1 || token > 255) { 3188 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 3189 p); 3190 goto out; 3191 } 3192 target->cmd_sg_cnt = token; 3193 break; 3194 3195 case SRP_OPT_ALLOW_EXT_SG: 3196 if (match_int(args, &token)) { 3197 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 3198 goto out; 3199 } 3200 target->allow_ext_sg = !!token; 3201 break; 3202 3203 case SRP_OPT_SG_TABLESIZE: 3204 if (match_int(args, &token) || token < 1 || 3205 token > SG_MAX_SEGMENTS) { 3206 pr_warn("bad max sg_tablesize parameter '%s'\n", 3207 p); 3208 goto out; 3209 } 3210 target->sg_tablesize = token; 3211 break; 3212 3213 case SRP_OPT_COMP_VECTOR: 3214 if (match_int(args, &token) || token < 0) { 3215 pr_warn("bad comp_vector parameter '%s'\n", p); 3216 goto out; 3217 } 3218 target->comp_vector = token; 3219 break; 3220 3221 case SRP_OPT_TL_RETRY_COUNT: 3222 if (match_int(args, &token) || token < 2 || token > 7) { 3223 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", 3224 p); 3225 goto out; 3226 } 3227 target->tl_retry_count = token; 3228 break; 3229 3230 default: 3231 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 3232 p); 3233 goto out; 3234 } 3235 } 3236 3237 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL) 3238 ret = 0; 3239 else 3240 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) 3241 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && 3242 !(srp_opt_tokens[i].token & opt_mask)) 3243 pr_warn("target creation request is missing parameter '%s'\n", 3244 srp_opt_tokens[i].pattern); 3245 3246 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue 3247 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3248 pr_warn("cmd_per_lun = %d > queue_size = %d\n", 3249 target->scsi_host->cmd_per_lun, 3250 target->scsi_host->can_queue); 3251 3252 out: 3253 kfree(options); 3254 return ret; 3255 } 3256 3257 static ssize_t srp_create_target(struct device *dev, 3258 struct device_attribute *attr, 3259 const char *buf, size_t count) 3260 { 3261 struct srp_host *host = 3262 container_of(dev, struct srp_host, dev); 3263 struct Scsi_Host *target_host; 3264 struct srp_target_port *target; 3265 struct srp_rdma_ch *ch; 3266 struct srp_device *srp_dev = host->srp_dev; 3267 struct ib_device *ibdev = srp_dev->dev; 3268 int ret, node_idx, node, cpu, i; 3269 unsigned int max_sectors_per_mr, mr_per_cmd = 0; 3270 bool multich = false; 3271 3272 target_host = scsi_host_alloc(&srp_template, 3273 sizeof (struct srp_target_port)); 3274 if (!target_host) 3275 return -ENOMEM; 3276 3277 target_host->transportt = ib_srp_transport_template; 3278 target_host->max_channel = 0; 3279 target_host->max_id = 1; 3280 target_host->max_lun = -1LL; 3281 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; 3282 3283 target = host_to_target(target_host); 3284 3285 target->io_class = SRP_REV16A_IB_IO_CLASS; 3286 target->scsi_host = target_host; 3287 target->srp_host = host; 3288 target->pd = host->srp_dev->pd; 3289 target->lkey = host->srp_dev->pd->local_dma_lkey; 3290 target->cmd_sg_cnt = cmd_sg_entries; 3291 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 3292 target->allow_ext_sg = allow_ext_sg; 3293 target->tl_retry_count = 7; 3294 target->queue_size = SRP_DEFAULT_QUEUE_SIZE; 3295 3296 /* 3297 * Avoid that the SCSI host can be removed by srp_remove_target() 3298 * before this function returns. 3299 */ 3300 scsi_host_get(target->scsi_host); 3301 3302 ret = mutex_lock_interruptible(&host->add_target_mutex); 3303 if (ret < 0) 3304 goto put; 3305 3306 ret = srp_parse_options(buf, target); 3307 if (ret) 3308 goto out; 3309 3310 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; 3311 3312 if (!srp_conn_unique(target->srp_host, target)) { 3313 shost_printk(KERN_INFO, target->scsi_host, 3314 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 3315 be64_to_cpu(target->id_ext), 3316 be64_to_cpu(target->ioc_guid), 3317 be64_to_cpu(target->initiator_ext)); 3318 ret = -EEXIST; 3319 goto out; 3320 } 3321 3322 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && 3323 target->cmd_sg_cnt < target->sg_tablesize) { 3324 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 3325 target->sg_tablesize = target->cmd_sg_cnt; 3326 } 3327 3328 if (srp_dev->use_fast_reg || srp_dev->use_fmr) { 3329 /* 3330 * FR and FMR can only map one HCA page per entry. If the 3331 * start address is not aligned on a HCA page boundary two 3332 * entries will be used for the head and the tail although 3333 * these two entries combined contain at most one HCA page of 3334 * data. Hence the "+ 1" in the calculation below. 3335 * 3336 * The indirect data buffer descriptor is contiguous so the 3337 * memory for that buffer will only be registered if 3338 * register_always is true. Hence add one to mr_per_cmd if 3339 * register_always has been set. 3340 */ 3341 max_sectors_per_mr = srp_dev->max_pages_per_mr << 3342 (ilog2(srp_dev->mr_page_size) - 9); 3343 mr_per_cmd = register_always + 3344 (target->scsi_host->max_sectors + 1 + 3345 max_sectors_per_mr - 1) / max_sectors_per_mr; 3346 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", 3347 target->scsi_host->max_sectors, 3348 srp_dev->max_pages_per_mr, srp_dev->mr_page_size, 3349 max_sectors_per_mr, mr_per_cmd); 3350 } 3351 3352 target_host->sg_tablesize = target->sg_tablesize; 3353 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; 3354 target->mr_per_cmd = mr_per_cmd; 3355 target->indirect_size = target->sg_tablesize * 3356 sizeof (struct srp_direct_buf); 3357 target->max_iu_len = sizeof (struct srp_cmd) + 3358 sizeof (struct srp_indirect_buf) + 3359 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 3360 3361 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3362 INIT_WORK(&target->remove_work, srp_remove_work); 3363 spin_lock_init(&target->lock); 3364 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); 3365 if (ret) 3366 goto out; 3367 3368 ret = -ENOMEM; 3369 target->ch_count = max_t(unsigned, num_online_nodes(), 3370 min(ch_count ? : 3371 min(4 * num_online_nodes(), 3372 ibdev->num_comp_vectors), 3373 num_online_cpus())); 3374 target->ch = kcalloc(target->ch_count, sizeof(*target->ch), 3375 GFP_KERNEL); 3376 if (!target->ch) 3377 goto out; 3378 3379 node_idx = 0; 3380 for_each_online_node(node) { 3381 const int ch_start = (node_idx * target->ch_count / 3382 num_online_nodes()); 3383 const int ch_end = ((node_idx + 1) * target->ch_count / 3384 num_online_nodes()); 3385 const int cv_start = (node_idx * ibdev->num_comp_vectors / 3386 num_online_nodes() + target->comp_vector) 3387 % ibdev->num_comp_vectors; 3388 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors / 3389 num_online_nodes() + target->comp_vector) 3390 % ibdev->num_comp_vectors; 3391 int cpu_idx = 0; 3392 3393 for_each_online_cpu(cpu) { 3394 if (cpu_to_node(cpu) != node) 3395 continue; 3396 if (ch_start + cpu_idx >= ch_end) 3397 continue; 3398 ch = &target->ch[ch_start + cpu_idx]; 3399 ch->target = target; 3400 ch->comp_vector = cv_start == cv_end ? cv_start : 3401 cv_start + cpu_idx % (cv_end - cv_start); 3402 spin_lock_init(&ch->lock); 3403 INIT_LIST_HEAD(&ch->free_tx); 3404 ret = srp_new_cm_id(ch); 3405 if (ret) 3406 goto err_disconnect; 3407 3408 ret = srp_create_ch_ib(ch); 3409 if (ret) 3410 goto err_disconnect; 3411 3412 ret = srp_alloc_req_data(ch); 3413 if (ret) 3414 goto err_disconnect; 3415 3416 ret = srp_connect_ch(ch, multich); 3417 if (ret) { 3418 shost_printk(KERN_ERR, target->scsi_host, 3419 PFX "Connection %d/%d failed\n", 3420 ch_start + cpu_idx, 3421 target->ch_count); 3422 if (node_idx == 0 && cpu_idx == 0) { 3423 goto err_disconnect; 3424 } else { 3425 srp_free_ch_ib(target, ch); 3426 srp_free_req_data(target, ch); 3427 target->ch_count = ch - target->ch; 3428 goto connected; 3429 } 3430 } 3431 3432 multich = true; 3433 cpu_idx++; 3434 } 3435 node_idx++; 3436 } 3437 3438 connected: 3439 target->scsi_host->nr_hw_queues = target->ch_count; 3440 3441 ret = srp_add_target(host, target); 3442 if (ret) 3443 goto err_disconnect; 3444 3445 if (target->state != SRP_TARGET_REMOVED) { 3446 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3447 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", 3448 be64_to_cpu(target->id_ext), 3449 be64_to_cpu(target->ioc_guid), 3450 be16_to_cpu(target->pkey), 3451 be64_to_cpu(target->service_id), 3452 target->sgid.raw, target->orig_dgid.raw); 3453 } 3454 3455 ret = count; 3456 3457 out: 3458 mutex_unlock(&host->add_target_mutex); 3459 3460 put: 3461 scsi_host_put(target->scsi_host); 3462 if (ret < 0) 3463 scsi_host_put(target->scsi_host); 3464 3465 return ret; 3466 3467 err_disconnect: 3468 srp_disconnect_target(target); 3469 3470 for (i = 0; i < target->ch_count; i++) { 3471 ch = &target->ch[i]; 3472 srp_free_ch_ib(target, ch); 3473 srp_free_req_data(target, ch); 3474 } 3475 3476 kfree(target->ch); 3477 goto out; 3478 } 3479 3480 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); 3481 3482 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, 3483 char *buf) 3484 { 3485 struct srp_host *host = container_of(dev, struct srp_host, dev); 3486 3487 return sprintf(buf, "%s\n", host->srp_dev->dev->name); 3488 } 3489 3490 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 3491 3492 static ssize_t show_port(struct device *dev, struct device_attribute *attr, 3493 char *buf) 3494 { 3495 struct srp_host *host = container_of(dev, struct srp_host, dev); 3496 3497 return sprintf(buf, "%d\n", host->port); 3498 } 3499 3500 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 3501 3502 static struct srp_host *srp_add_port(struct srp_device *device, u8 port) 3503 { 3504 struct srp_host *host; 3505 3506 host = kzalloc(sizeof *host, GFP_KERNEL); 3507 if (!host) 3508 return NULL; 3509 3510 INIT_LIST_HEAD(&host->target_list); 3511 spin_lock_init(&host->target_lock); 3512 init_completion(&host->released); 3513 mutex_init(&host->add_target_mutex); 3514 host->srp_dev = device; 3515 host->port = port; 3516 3517 host->dev.class = &srp_class; 3518 host->dev.parent = device->dev->dma_device; 3519 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port); 3520 3521 if (device_register(&host->dev)) 3522 goto free_host; 3523 if (device_create_file(&host->dev, &dev_attr_add_target)) 3524 goto err_class; 3525 if (device_create_file(&host->dev, &dev_attr_ibdev)) 3526 goto err_class; 3527 if (device_create_file(&host->dev, &dev_attr_port)) 3528 goto err_class; 3529 3530 return host; 3531 3532 err_class: 3533 device_unregister(&host->dev); 3534 3535 free_host: 3536 kfree(host); 3537 3538 return NULL; 3539 } 3540 3541 static void srp_add_one(struct ib_device *device) 3542 { 3543 struct srp_device *srp_dev; 3544 struct ib_device_attr *attr = &device->attrs; 3545 struct srp_host *host; 3546 int mr_page_shift, p; 3547 u64 max_pages_per_mr; 3548 unsigned int flags = 0; 3549 3550 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); 3551 if (!srp_dev) 3552 return; 3553 3554 /* 3555 * Use the smallest page size supported by the HCA, down to a 3556 * minimum of 4096 bytes. We're unlikely to build large sglists 3557 * out of smaller entries. 3558 */ 3559 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); 3560 srp_dev->mr_page_size = 1 << mr_page_shift; 3561 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 3562 max_pages_per_mr = attr->max_mr_size; 3563 do_div(max_pages_per_mr, srp_dev->mr_page_size); 3564 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, 3565 attr->max_mr_size, srp_dev->mr_page_size, 3566 max_pages_per_mr, SRP_MAX_PAGES_PER_MR); 3567 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 3568 max_pages_per_mr); 3569 3570 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 3571 device->map_phys_fmr && device->unmap_fmr); 3572 srp_dev->has_fr = (attr->device_cap_flags & 3573 IB_DEVICE_MEM_MGT_EXTENSIONS); 3574 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { 3575 dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 3576 } else if (!never_register && 3577 attr->max_mr_size >= 2 * srp_dev->mr_page_size) { 3578 srp_dev->use_fast_reg = (srp_dev->has_fr && 3579 (!srp_dev->has_fmr || prefer_fr)); 3580 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 3581 } 3582 3583 if (never_register || !register_always || 3584 (!srp_dev->has_fmr && !srp_dev->has_fr)) 3585 flags |= IB_PD_UNSAFE_GLOBAL_RKEY; 3586 3587 if (srp_dev->use_fast_reg) { 3588 srp_dev->max_pages_per_mr = 3589 min_t(u32, srp_dev->max_pages_per_mr, 3590 attr->max_fast_reg_page_list_len); 3591 } 3592 srp_dev->mr_max_size = srp_dev->mr_page_size * 3593 srp_dev->max_pages_per_mr; 3594 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 3595 device->name, mr_page_shift, attr->max_mr_size, 3596 attr->max_fast_reg_page_list_len, 3597 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 3598 3599 INIT_LIST_HEAD(&srp_dev->dev_list); 3600 3601 srp_dev->dev = device; 3602 srp_dev->pd = ib_alloc_pd(device, flags); 3603 if (IS_ERR(srp_dev->pd)) 3604 goto free_dev; 3605 3606 3607 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { 3608 host = srp_add_port(srp_dev, p); 3609 if (host) 3610 list_add_tail(&host->list, &srp_dev->dev_list); 3611 } 3612 3613 ib_set_client_data(device, &srp_client, srp_dev); 3614 return; 3615 3616 free_dev: 3617 kfree(srp_dev); 3618 } 3619 3620 static void srp_remove_one(struct ib_device *device, void *client_data) 3621 { 3622 struct srp_device *srp_dev; 3623 struct srp_host *host, *tmp_host; 3624 struct srp_target_port *target; 3625 3626 srp_dev = client_data; 3627 if (!srp_dev) 3628 return; 3629 3630 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 3631 device_unregister(&host->dev); 3632 /* 3633 * Wait for the sysfs entry to go away, so that no new 3634 * target ports can be created. 3635 */ 3636 wait_for_completion(&host->released); 3637 3638 /* 3639 * Remove all target ports. 3640 */ 3641 spin_lock(&host->target_lock); 3642 list_for_each_entry(target, &host->target_list, list) 3643 srp_queue_remove_work(target); 3644 spin_unlock(&host->target_lock); 3645 3646 /* 3647 * Wait for tl_err and target port removal tasks. 3648 */ 3649 flush_workqueue(system_long_wq); 3650 flush_workqueue(srp_remove_wq); 3651 3652 kfree(host); 3653 } 3654 3655 ib_dealloc_pd(srp_dev->pd); 3656 3657 kfree(srp_dev); 3658 } 3659 3660 static struct srp_function_template ib_srp_transport_functions = { 3661 .has_rport_state = true, 3662 .reset_timer_if_blocked = true, 3663 .reconnect_delay = &srp_reconnect_delay, 3664 .fast_io_fail_tmo = &srp_fast_io_fail_tmo, 3665 .dev_loss_tmo = &srp_dev_loss_tmo, 3666 .reconnect = srp_rport_reconnect, 3667 .rport_delete = srp_rport_delete, 3668 .terminate_rport_io = srp_terminate_io, 3669 }; 3670 3671 static int __init srp_init_module(void) 3672 { 3673 int ret; 3674 3675 if (srp_sg_tablesize) { 3676 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 3677 if (!cmd_sg_entries) 3678 cmd_sg_entries = srp_sg_tablesize; 3679 } 3680 3681 if (!cmd_sg_entries) 3682 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 3683 3684 if (cmd_sg_entries > 255) { 3685 pr_warn("Clamping cmd_sg_entries to 255\n"); 3686 cmd_sg_entries = 255; 3687 } 3688 3689 if (!indirect_sg_entries) 3690 indirect_sg_entries = cmd_sg_entries; 3691 else if (indirect_sg_entries < cmd_sg_entries) { 3692 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 3693 cmd_sg_entries); 3694 indirect_sg_entries = cmd_sg_entries; 3695 } 3696 3697 srp_remove_wq = create_workqueue("srp_remove"); 3698 if (!srp_remove_wq) { 3699 ret = -ENOMEM; 3700 goto out; 3701 } 3702 3703 ret = -ENOMEM; 3704 ib_srp_transport_template = 3705 srp_attach_transport(&ib_srp_transport_functions); 3706 if (!ib_srp_transport_template) 3707 goto destroy_wq; 3708 3709 ret = class_register(&srp_class); 3710 if (ret) { 3711 pr_err("couldn't register class infiniband_srp\n"); 3712 goto release_tr; 3713 } 3714 3715 ib_sa_register_client(&srp_sa_client); 3716 3717 ret = ib_register_client(&srp_client); 3718 if (ret) { 3719 pr_err("couldn't register IB client\n"); 3720 goto unreg_sa; 3721 } 3722 3723 out: 3724 return ret; 3725 3726 unreg_sa: 3727 ib_sa_unregister_client(&srp_sa_client); 3728 class_unregister(&srp_class); 3729 3730 release_tr: 3731 srp_release_transport(ib_srp_transport_template); 3732 3733 destroy_wq: 3734 destroy_workqueue(srp_remove_wq); 3735 goto out; 3736 } 3737 3738 static void __exit srp_cleanup_module(void) 3739 { 3740 ib_unregister_client(&srp_client); 3741 ib_sa_unregister_client(&srp_sa_client); 3742 class_unregister(&srp_class); 3743 srp_release_transport(ib_srp_transport_template); 3744 destroy_workqueue(srp_remove_wq); 3745 } 3746 3747 module_init(srp_init_module); 3748 module_exit(srp_cleanup_module); 3749