1 /* 2 * Copyright (c) 2016 Avago Technologies. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful. 9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, 10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A 11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO 12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. 13 * See the GNU General Public License for more details, a copy of which 14 * can be found in the file COPYING included with this package 15 * 16 */ 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 #include <linux/module.h> 19 #include <linux/parser.h> 20 #include <uapi/scsi/fc/fc_fs.h> 21 #include <uapi/scsi/fc/fc_els.h> 22 #include <linux/delay.h> 23 24 #include "nvme.h" 25 #include "fabrics.h" 26 #include <linux/nvme-fc-driver.h> 27 #include <linux/nvme-fc.h> 28 29 30 /* *************************** Data Structures/Defines ****************** */ 31 32 33 /* 34 * We handle AEN commands ourselves and don't even let the 35 * block layer know about them. 36 */ 37 #define NVME_FC_NR_AEN_COMMANDS 1 38 #define NVME_FC_AQ_BLKMQ_DEPTH \ 39 (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) 40 #define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) 41 42 enum nvme_fc_queue_flags { 43 NVME_FC_Q_CONNECTED = (1 << 0), 44 }; 45 46 #define NVMEFC_QUEUE_DELAY 3 /* ms units */ 47 48 struct nvme_fc_queue { 49 struct nvme_fc_ctrl *ctrl; 50 struct device *dev; 51 struct blk_mq_hw_ctx *hctx; 52 void *lldd_handle; 53 int queue_size; 54 size_t cmnd_capsule_len; 55 u32 qnum; 56 u32 rqcnt; 57 u32 seqno; 58 59 u64 connection_id; 60 atomic_t csn; 61 62 unsigned long flags; 63 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 64 65 enum nvme_fcop_flags { 66 FCOP_FLAGS_TERMIO = (1 << 0), 67 FCOP_FLAGS_RELEASED = (1 << 1), 68 FCOP_FLAGS_COMPLETE = (1 << 2), 69 FCOP_FLAGS_AEN = (1 << 3), 70 }; 71 72 struct nvmefc_ls_req_op { 73 struct nvmefc_ls_req ls_req; 74 75 struct nvme_fc_rport *rport; 76 struct nvme_fc_queue *queue; 77 struct request *rq; 78 u32 flags; 79 80 int ls_error; 81 struct completion ls_done; 82 struct list_head lsreq_list; /* rport->ls_req_list */ 83 bool req_queued; 84 }; 85 86 enum nvme_fcpop_state { 87 FCPOP_STATE_UNINIT = 0, 88 FCPOP_STATE_IDLE = 1, 89 FCPOP_STATE_ACTIVE = 2, 90 FCPOP_STATE_ABORTED = 3, 91 FCPOP_STATE_COMPLETE = 4, 92 }; 93 94 struct nvme_fc_fcp_op { 95 struct nvme_request nreq; /* 96 * nvme/host/core.c 97 * requires this to be 98 * the 1st element in the 99 * private structure 100 * associated with the 101 * request. 102 */ 103 struct nvmefc_fcp_req fcp_req; 104 105 struct nvme_fc_ctrl *ctrl; 106 struct nvme_fc_queue *queue; 107 struct request *rq; 108 109 atomic_t state; 110 u32 flags; 111 u32 rqno; 112 u32 nents; 113 114 struct nvme_fc_cmd_iu cmd_iu; 115 struct nvme_fc_ersp_iu rsp_iu; 116 }; 117 118 struct nvme_fc_lport { 119 struct nvme_fc_local_port localport; 120 121 struct ida endp_cnt; 122 struct list_head port_list; /* nvme_fc_port_list */ 123 struct list_head endp_list; 124 struct device *dev; /* physical device for dma */ 125 struct nvme_fc_port_template *ops; 126 struct kref ref; 127 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 128 129 struct nvme_fc_rport { 130 struct nvme_fc_remote_port remoteport; 131 132 struct list_head endp_list; /* for lport->endp_list */ 133 struct list_head ctrl_list; 134 struct list_head ls_req_list; 135 struct device *dev; /* physical device for dma */ 136 struct nvme_fc_lport *lport; 137 spinlock_t lock; 138 struct kref ref; 139 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 140 141 enum nvme_fcctrl_flags { 142 FCCTRL_TERMIO = (1 << 0), 143 }; 144 145 struct nvme_fc_ctrl { 146 spinlock_t lock; 147 struct nvme_fc_queue *queues; 148 struct device *dev; 149 struct nvme_fc_lport *lport; 150 struct nvme_fc_rport *rport; 151 u32 queue_count; 152 u32 cnum; 153 154 u64 association_id; 155 156 u64 cap; 157 158 struct list_head ctrl_list; /* rport->ctrl_list */ 159 160 struct blk_mq_tag_set admin_tag_set; 161 struct blk_mq_tag_set tag_set; 162 163 struct work_struct delete_work; 164 struct work_struct reset_work; 165 struct delayed_work connect_work; 166 167 struct kref ref; 168 u32 flags; 169 u32 iocnt; 170 171 struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; 172 173 struct nvme_ctrl ctrl; 174 }; 175 176 static inline struct nvme_fc_ctrl * 177 to_fc_ctrl(struct nvme_ctrl *ctrl) 178 { 179 return container_of(ctrl, struct nvme_fc_ctrl, ctrl); 180 } 181 182 static inline struct nvme_fc_lport * 183 localport_to_lport(struct nvme_fc_local_port *portptr) 184 { 185 return container_of(portptr, struct nvme_fc_lport, localport); 186 } 187 188 static inline struct nvme_fc_rport * 189 remoteport_to_rport(struct nvme_fc_remote_port *portptr) 190 { 191 return container_of(portptr, struct nvme_fc_rport, remoteport); 192 } 193 194 static inline struct nvmefc_ls_req_op * 195 ls_req_to_lsop(struct nvmefc_ls_req *lsreq) 196 { 197 return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); 198 } 199 200 static inline struct nvme_fc_fcp_op * 201 fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) 202 { 203 return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); 204 } 205 206 207 208 /* *************************** Globals **************************** */ 209 210 211 static DEFINE_SPINLOCK(nvme_fc_lock); 212 213 static LIST_HEAD(nvme_fc_lport_list); 214 static DEFINE_IDA(nvme_fc_local_port_cnt); 215 static DEFINE_IDA(nvme_fc_ctrl_cnt); 216 217 static struct workqueue_struct *nvme_fc_wq; 218 219 220 221 /* *********************** FC-NVME Port Management ************************ */ 222 223 static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); 224 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 225 struct nvme_fc_queue *, unsigned int); 226 227 228 /** 229 * nvme_fc_register_localport - transport entry point called by an 230 * LLDD to register the existence of a NVME 231 * host FC port. 232 * @pinfo: pointer to information about the port to be registered 233 * @template: LLDD entrypoints and operational parameters for the port 234 * @dev: physical hardware device node port corresponds to. Will be 235 * used for DMA mappings 236 * @lport_p: pointer to a local port pointer. Upon success, the routine 237 * will allocate a nvme_fc_local_port structure and place its 238 * address in the local port pointer. Upon failure, local port 239 * pointer will be set to 0. 240 * 241 * Returns: 242 * a completion status. Must be 0 upon success; a negative errno 243 * (ex: -ENXIO) upon failure. 244 */ 245 int 246 nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, 247 struct nvme_fc_port_template *template, 248 struct device *dev, 249 struct nvme_fc_local_port **portptr) 250 { 251 struct nvme_fc_lport *newrec; 252 unsigned long flags; 253 int ret, idx; 254 255 if (!template->localport_delete || !template->remoteport_delete || 256 !template->ls_req || !template->fcp_io || 257 !template->ls_abort || !template->fcp_abort || 258 !template->max_hw_queues || !template->max_sgl_segments || 259 !template->max_dif_sgl_segments || !template->dma_boundary) { 260 ret = -EINVAL; 261 goto out_reghost_failed; 262 } 263 264 newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), 265 GFP_KERNEL); 266 if (!newrec) { 267 ret = -ENOMEM; 268 goto out_reghost_failed; 269 } 270 271 idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); 272 if (idx < 0) { 273 ret = -ENOSPC; 274 goto out_fail_kfree; 275 } 276 277 if (!get_device(dev) && dev) { 278 ret = -ENODEV; 279 goto out_ida_put; 280 } 281 282 INIT_LIST_HEAD(&newrec->port_list); 283 INIT_LIST_HEAD(&newrec->endp_list); 284 kref_init(&newrec->ref); 285 newrec->ops = template; 286 newrec->dev = dev; 287 ida_init(&newrec->endp_cnt); 288 newrec->localport.private = &newrec[1]; 289 newrec->localport.node_name = pinfo->node_name; 290 newrec->localport.port_name = pinfo->port_name; 291 newrec->localport.port_role = pinfo->port_role; 292 newrec->localport.port_id = pinfo->port_id; 293 newrec->localport.port_state = FC_OBJSTATE_ONLINE; 294 newrec->localport.port_num = idx; 295 296 spin_lock_irqsave(&nvme_fc_lock, flags); 297 list_add_tail(&newrec->port_list, &nvme_fc_lport_list); 298 spin_unlock_irqrestore(&nvme_fc_lock, flags); 299 300 if (dev) 301 dma_set_seg_boundary(dev, template->dma_boundary); 302 303 *portptr = &newrec->localport; 304 return 0; 305 306 out_ida_put: 307 ida_simple_remove(&nvme_fc_local_port_cnt, idx); 308 out_fail_kfree: 309 kfree(newrec); 310 out_reghost_failed: 311 *portptr = NULL; 312 313 return ret; 314 } 315 EXPORT_SYMBOL_GPL(nvme_fc_register_localport); 316 317 static void 318 nvme_fc_free_lport(struct kref *ref) 319 { 320 struct nvme_fc_lport *lport = 321 container_of(ref, struct nvme_fc_lport, ref); 322 unsigned long flags; 323 324 WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); 325 WARN_ON(!list_empty(&lport->endp_list)); 326 327 /* remove from transport list */ 328 spin_lock_irqsave(&nvme_fc_lock, flags); 329 list_del(&lport->port_list); 330 spin_unlock_irqrestore(&nvme_fc_lock, flags); 331 332 /* let the LLDD know we've finished tearing it down */ 333 lport->ops->localport_delete(&lport->localport); 334 335 ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); 336 ida_destroy(&lport->endp_cnt); 337 338 put_device(lport->dev); 339 340 kfree(lport); 341 } 342 343 static void 344 nvme_fc_lport_put(struct nvme_fc_lport *lport) 345 { 346 kref_put(&lport->ref, nvme_fc_free_lport); 347 } 348 349 static int 350 nvme_fc_lport_get(struct nvme_fc_lport *lport) 351 { 352 return kref_get_unless_zero(&lport->ref); 353 } 354 355 /** 356 * nvme_fc_unregister_localport - transport entry point called by an 357 * LLDD to deregister/remove a previously 358 * registered a NVME host FC port. 359 * @localport: pointer to the (registered) local port that is to be 360 * deregistered. 361 * 362 * Returns: 363 * a completion status. Must be 0 upon success; a negative errno 364 * (ex: -ENXIO) upon failure. 365 */ 366 int 367 nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) 368 { 369 struct nvme_fc_lport *lport = localport_to_lport(portptr); 370 unsigned long flags; 371 372 if (!portptr) 373 return -EINVAL; 374 375 spin_lock_irqsave(&nvme_fc_lock, flags); 376 377 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 378 spin_unlock_irqrestore(&nvme_fc_lock, flags); 379 return -EINVAL; 380 } 381 portptr->port_state = FC_OBJSTATE_DELETED; 382 383 spin_unlock_irqrestore(&nvme_fc_lock, flags); 384 385 nvme_fc_lport_put(lport); 386 387 return 0; 388 } 389 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); 390 391 /** 392 * nvme_fc_register_remoteport - transport entry point called by an 393 * LLDD to register the existence of a NVME 394 * subsystem FC port on its fabric. 395 * @localport: pointer to the (registered) local port that the remote 396 * subsystem port is connected to. 397 * @pinfo: pointer to information about the port to be registered 398 * @rport_p: pointer to a remote port pointer. Upon success, the routine 399 * will allocate a nvme_fc_remote_port structure and place its 400 * address in the remote port pointer. Upon failure, remote port 401 * pointer will be set to 0. 402 * 403 * Returns: 404 * a completion status. Must be 0 upon success; a negative errno 405 * (ex: -ENXIO) upon failure. 406 */ 407 int 408 nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 409 struct nvme_fc_port_info *pinfo, 410 struct nvme_fc_remote_port **portptr) 411 { 412 struct nvme_fc_lport *lport = localport_to_lport(localport); 413 struct nvme_fc_rport *newrec; 414 unsigned long flags; 415 int ret, idx; 416 417 newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 418 GFP_KERNEL); 419 if (!newrec) { 420 ret = -ENOMEM; 421 goto out_reghost_failed; 422 } 423 424 if (!nvme_fc_lport_get(lport)) { 425 ret = -ESHUTDOWN; 426 goto out_kfree_rport; 427 } 428 429 idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 430 if (idx < 0) { 431 ret = -ENOSPC; 432 goto out_lport_put; 433 } 434 435 INIT_LIST_HEAD(&newrec->endp_list); 436 INIT_LIST_HEAD(&newrec->ctrl_list); 437 INIT_LIST_HEAD(&newrec->ls_req_list); 438 kref_init(&newrec->ref); 439 spin_lock_init(&newrec->lock); 440 newrec->remoteport.localport = &lport->localport; 441 newrec->dev = lport->dev; 442 newrec->lport = lport; 443 newrec->remoteport.private = &newrec[1]; 444 newrec->remoteport.port_role = pinfo->port_role; 445 newrec->remoteport.node_name = pinfo->node_name; 446 newrec->remoteport.port_name = pinfo->port_name; 447 newrec->remoteport.port_id = pinfo->port_id; 448 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 449 newrec->remoteport.port_num = idx; 450 451 spin_lock_irqsave(&nvme_fc_lock, flags); 452 list_add_tail(&newrec->endp_list, &lport->endp_list); 453 spin_unlock_irqrestore(&nvme_fc_lock, flags); 454 455 *portptr = &newrec->remoteport; 456 return 0; 457 458 out_lport_put: 459 nvme_fc_lport_put(lport); 460 out_kfree_rport: 461 kfree(newrec); 462 out_reghost_failed: 463 *portptr = NULL; 464 return ret; 465 } 466 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 467 468 static void 469 nvme_fc_free_rport(struct kref *ref) 470 { 471 struct nvme_fc_rport *rport = 472 container_of(ref, struct nvme_fc_rport, ref); 473 struct nvme_fc_lport *lport = 474 localport_to_lport(rport->remoteport.localport); 475 unsigned long flags; 476 477 WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); 478 WARN_ON(!list_empty(&rport->ctrl_list)); 479 480 /* remove from lport list */ 481 spin_lock_irqsave(&nvme_fc_lock, flags); 482 list_del(&rport->endp_list); 483 spin_unlock_irqrestore(&nvme_fc_lock, flags); 484 485 /* let the LLDD know we've finished tearing it down */ 486 lport->ops->remoteport_delete(&rport->remoteport); 487 488 ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); 489 490 kfree(rport); 491 492 nvme_fc_lport_put(lport); 493 } 494 495 static void 496 nvme_fc_rport_put(struct nvme_fc_rport *rport) 497 { 498 kref_put(&rport->ref, nvme_fc_free_rport); 499 } 500 501 static int 502 nvme_fc_rport_get(struct nvme_fc_rport *rport) 503 { 504 return kref_get_unless_zero(&rport->ref); 505 } 506 507 static int 508 nvme_fc_abort_lsops(struct nvme_fc_rport *rport) 509 { 510 struct nvmefc_ls_req_op *lsop; 511 unsigned long flags; 512 513 restart: 514 spin_lock_irqsave(&rport->lock, flags); 515 516 list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { 517 if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { 518 lsop->flags |= FCOP_FLAGS_TERMIO; 519 spin_unlock_irqrestore(&rport->lock, flags); 520 rport->lport->ops->ls_abort(&rport->lport->localport, 521 &rport->remoteport, 522 &lsop->ls_req); 523 goto restart; 524 } 525 } 526 spin_unlock_irqrestore(&rport->lock, flags); 527 528 return 0; 529 } 530 531 /** 532 * nvme_fc_unregister_remoteport - transport entry point called by an 533 * LLDD to deregister/remove a previously 534 * registered a NVME subsystem FC port. 535 * @remoteport: pointer to the (registered) remote port that is to be 536 * deregistered. 537 * 538 * Returns: 539 * a completion status. Must be 0 upon success; a negative errno 540 * (ex: -ENXIO) upon failure. 541 */ 542 int 543 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) 544 { 545 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 546 struct nvme_fc_ctrl *ctrl; 547 unsigned long flags; 548 549 if (!portptr) 550 return -EINVAL; 551 552 spin_lock_irqsave(&rport->lock, flags); 553 554 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 555 spin_unlock_irqrestore(&rport->lock, flags); 556 return -EINVAL; 557 } 558 portptr->port_state = FC_OBJSTATE_DELETED; 559 560 /* tear down all associations to the remote port */ 561 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 562 __nvme_fc_del_ctrl(ctrl); 563 564 spin_unlock_irqrestore(&rport->lock, flags); 565 566 nvme_fc_abort_lsops(rport); 567 568 nvme_fc_rport_put(rport); 569 return 0; 570 } 571 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); 572 573 574 /* *********************** FC-NVME DMA Handling **************************** */ 575 576 /* 577 * The fcloop device passes in a NULL device pointer. Real LLD's will 578 * pass in a valid device pointer. If NULL is passed to the dma mapping 579 * routines, depending on the platform, it may or may not succeed, and 580 * may crash. 581 * 582 * As such: 583 * Wrapper all the dma routines and check the dev pointer. 584 * 585 * If simple mappings (return just a dma address, we'll noop them, 586 * returning a dma address of 0. 587 * 588 * On more complex mappings (dma_map_sg), a pseudo routine fills 589 * in the scatter list, setting all dma addresses to 0. 590 */ 591 592 static inline dma_addr_t 593 fc_dma_map_single(struct device *dev, void *ptr, size_t size, 594 enum dma_data_direction dir) 595 { 596 return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; 597 } 598 599 static inline int 600 fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 601 { 602 return dev ? dma_mapping_error(dev, dma_addr) : 0; 603 } 604 605 static inline void 606 fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 607 enum dma_data_direction dir) 608 { 609 if (dev) 610 dma_unmap_single(dev, addr, size, dir); 611 } 612 613 static inline void 614 fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, 615 enum dma_data_direction dir) 616 { 617 if (dev) 618 dma_sync_single_for_cpu(dev, addr, size, dir); 619 } 620 621 static inline void 622 fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, 623 enum dma_data_direction dir) 624 { 625 if (dev) 626 dma_sync_single_for_device(dev, addr, size, dir); 627 } 628 629 /* pseudo dma_map_sg call */ 630 static int 631 fc_map_sg(struct scatterlist *sg, int nents) 632 { 633 struct scatterlist *s; 634 int i; 635 636 WARN_ON(nents == 0 || sg[0].length == 0); 637 638 for_each_sg(sg, s, nents, i) { 639 s->dma_address = 0L; 640 #ifdef CONFIG_NEED_SG_DMA_LENGTH 641 s->dma_length = s->length; 642 #endif 643 } 644 return nents; 645 } 646 647 static inline int 648 fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 649 enum dma_data_direction dir) 650 { 651 return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); 652 } 653 654 static inline void 655 fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 656 enum dma_data_direction dir) 657 { 658 if (dev) 659 dma_unmap_sg(dev, sg, nents, dir); 660 } 661 662 663 /* *********************** FC-NVME LS Handling **************************** */ 664 665 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); 666 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); 667 668 669 static void 670 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) 671 { 672 struct nvme_fc_rport *rport = lsop->rport; 673 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 674 unsigned long flags; 675 676 spin_lock_irqsave(&rport->lock, flags); 677 678 if (!lsop->req_queued) { 679 spin_unlock_irqrestore(&rport->lock, flags); 680 return; 681 } 682 683 list_del(&lsop->lsreq_list); 684 685 lsop->req_queued = false; 686 687 spin_unlock_irqrestore(&rport->lock, flags); 688 689 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 690 (lsreq->rqstlen + lsreq->rsplen), 691 DMA_BIDIRECTIONAL); 692 693 nvme_fc_rport_put(rport); 694 } 695 696 static int 697 __nvme_fc_send_ls_req(struct nvme_fc_rport *rport, 698 struct nvmefc_ls_req_op *lsop, 699 void (*done)(struct nvmefc_ls_req *req, int status)) 700 { 701 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 702 unsigned long flags; 703 int ret = 0; 704 705 if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 706 return -ECONNREFUSED; 707 708 if (!nvme_fc_rport_get(rport)) 709 return -ESHUTDOWN; 710 711 lsreq->done = done; 712 lsop->rport = rport; 713 lsop->req_queued = false; 714 INIT_LIST_HEAD(&lsop->lsreq_list); 715 init_completion(&lsop->ls_done); 716 717 lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, 718 lsreq->rqstlen + lsreq->rsplen, 719 DMA_BIDIRECTIONAL); 720 if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { 721 ret = -EFAULT; 722 goto out_putrport; 723 } 724 lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; 725 726 spin_lock_irqsave(&rport->lock, flags); 727 728 list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); 729 730 lsop->req_queued = true; 731 732 spin_unlock_irqrestore(&rport->lock, flags); 733 734 ret = rport->lport->ops->ls_req(&rport->lport->localport, 735 &rport->remoteport, lsreq); 736 if (ret) 737 goto out_unlink; 738 739 return 0; 740 741 out_unlink: 742 lsop->ls_error = ret; 743 spin_lock_irqsave(&rport->lock, flags); 744 lsop->req_queued = false; 745 list_del(&lsop->lsreq_list); 746 spin_unlock_irqrestore(&rport->lock, flags); 747 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 748 (lsreq->rqstlen + lsreq->rsplen), 749 DMA_BIDIRECTIONAL); 750 out_putrport: 751 nvme_fc_rport_put(rport); 752 753 return ret; 754 } 755 756 static void 757 nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) 758 { 759 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 760 761 lsop->ls_error = status; 762 complete(&lsop->ls_done); 763 } 764 765 static int 766 nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) 767 { 768 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 769 struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; 770 int ret; 771 772 ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); 773 774 if (!ret) { 775 /* 776 * No timeout/not interruptible as we need the struct 777 * to exist until the lldd calls us back. Thus mandate 778 * wait until driver calls back. lldd responsible for 779 * the timeout action 780 */ 781 wait_for_completion(&lsop->ls_done); 782 783 __nvme_fc_finish_ls_req(lsop); 784 785 ret = lsop->ls_error; 786 } 787 788 if (ret) 789 return ret; 790 791 /* ACC or RJT payload ? */ 792 if (rjt->w0.ls_cmd == FCNVME_LS_RJT) 793 return -ENXIO; 794 795 return 0; 796 } 797 798 static int 799 nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, 800 struct nvmefc_ls_req_op *lsop, 801 void (*done)(struct nvmefc_ls_req *req, int status)) 802 { 803 /* don't wait for completion */ 804 805 return __nvme_fc_send_ls_req(rport, lsop, done); 806 } 807 808 /* Validation Error indexes into the string table below */ 809 enum { 810 VERR_NO_ERROR = 0, 811 VERR_LSACC = 1, 812 VERR_LSDESC_RQST = 2, 813 VERR_LSDESC_RQST_LEN = 3, 814 VERR_ASSOC_ID = 4, 815 VERR_ASSOC_ID_LEN = 5, 816 VERR_CONN_ID = 6, 817 VERR_CONN_ID_LEN = 7, 818 VERR_CR_ASSOC = 8, 819 VERR_CR_ASSOC_ACC_LEN = 9, 820 VERR_CR_CONN = 10, 821 VERR_CR_CONN_ACC_LEN = 11, 822 VERR_DISCONN = 12, 823 VERR_DISCONN_ACC_LEN = 13, 824 }; 825 826 static char *validation_errors[] = { 827 "OK", 828 "Not LS_ACC", 829 "Not LSDESC_RQST", 830 "Bad LSDESC_RQST Length", 831 "Not Association ID", 832 "Bad Association ID Length", 833 "Not Connection ID", 834 "Bad Connection ID Length", 835 "Not CR_ASSOC Rqst", 836 "Bad CR_ASSOC ACC Length", 837 "Not CR_CONN Rqst", 838 "Bad CR_CONN ACC Length", 839 "Not Disconnect Rqst", 840 "Bad Disconnect ACC Length", 841 }; 842 843 static int 844 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, 845 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) 846 { 847 struct nvmefc_ls_req_op *lsop; 848 struct nvmefc_ls_req *lsreq; 849 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; 850 struct fcnvme_ls_cr_assoc_acc *assoc_acc; 851 int ret, fcret = 0; 852 853 lsop = kzalloc((sizeof(*lsop) + 854 ctrl->lport->ops->lsrqst_priv_sz + 855 sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); 856 if (!lsop) { 857 ret = -ENOMEM; 858 goto out_no_memory; 859 } 860 lsreq = &lsop->ls_req; 861 862 lsreq->private = (void *)&lsop[1]; 863 assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) 864 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 865 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; 866 867 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; 868 assoc_rqst->desc_list_len = 869 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 870 871 assoc_rqst->assoc_cmd.desc_tag = 872 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); 873 assoc_rqst->assoc_cmd.desc_len = 874 fcnvme_lsdesc_len( 875 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 876 877 assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 878 assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); 879 /* Linux supports only Dynamic controllers */ 880 assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); 881 uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); 882 strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, 883 min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); 884 strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, 885 min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); 886 887 lsop->queue = queue; 888 lsreq->rqstaddr = assoc_rqst; 889 lsreq->rqstlen = sizeof(*assoc_rqst); 890 lsreq->rspaddr = assoc_acc; 891 lsreq->rsplen = sizeof(*assoc_acc); 892 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 893 894 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 895 if (ret) 896 goto out_free_buffer; 897 898 /* process connect LS completion */ 899 900 /* validate the ACC response */ 901 if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 902 fcret = VERR_LSACC; 903 else if (assoc_acc->hdr.desc_list_len != 904 fcnvme_lsdesc_len( 905 sizeof(struct fcnvme_ls_cr_assoc_acc))) 906 fcret = VERR_CR_ASSOC_ACC_LEN; 907 else if (assoc_acc->hdr.rqst.desc_tag != 908 cpu_to_be32(FCNVME_LSDESC_RQST)) 909 fcret = VERR_LSDESC_RQST; 910 else if (assoc_acc->hdr.rqst.desc_len != 911 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 912 fcret = VERR_LSDESC_RQST_LEN; 913 else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) 914 fcret = VERR_CR_ASSOC; 915 else if (assoc_acc->associd.desc_tag != 916 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 917 fcret = VERR_ASSOC_ID; 918 else if (assoc_acc->associd.desc_len != 919 fcnvme_lsdesc_len( 920 sizeof(struct fcnvme_lsdesc_assoc_id))) 921 fcret = VERR_ASSOC_ID_LEN; 922 else if (assoc_acc->connectid.desc_tag != 923 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 924 fcret = VERR_CONN_ID; 925 else if (assoc_acc->connectid.desc_len != 926 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 927 fcret = VERR_CONN_ID_LEN; 928 929 if (fcret) { 930 ret = -EBADF; 931 dev_err(ctrl->dev, 932 "q %d connect failed: %s\n", 933 queue->qnum, validation_errors[fcret]); 934 } else { 935 ctrl->association_id = 936 be64_to_cpu(assoc_acc->associd.association_id); 937 queue->connection_id = 938 be64_to_cpu(assoc_acc->connectid.connection_id); 939 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 940 } 941 942 out_free_buffer: 943 kfree(lsop); 944 out_no_memory: 945 if (ret) 946 dev_err(ctrl->dev, 947 "queue %d connect admin queue failed (%d).\n", 948 queue->qnum, ret); 949 return ret; 950 } 951 952 static int 953 nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 954 u16 qsize, u16 ersp_ratio) 955 { 956 struct nvmefc_ls_req_op *lsop; 957 struct nvmefc_ls_req *lsreq; 958 struct fcnvme_ls_cr_conn_rqst *conn_rqst; 959 struct fcnvme_ls_cr_conn_acc *conn_acc; 960 int ret, fcret = 0; 961 962 lsop = kzalloc((sizeof(*lsop) + 963 ctrl->lport->ops->lsrqst_priv_sz + 964 sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); 965 if (!lsop) { 966 ret = -ENOMEM; 967 goto out_no_memory; 968 } 969 lsreq = &lsop->ls_req; 970 971 lsreq->private = (void *)&lsop[1]; 972 conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) 973 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 974 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; 975 976 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; 977 conn_rqst->desc_list_len = cpu_to_be32( 978 sizeof(struct fcnvme_lsdesc_assoc_id) + 979 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 980 981 conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 982 conn_rqst->associd.desc_len = 983 fcnvme_lsdesc_len( 984 sizeof(struct fcnvme_lsdesc_assoc_id)); 985 conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 986 conn_rqst->connect_cmd.desc_tag = 987 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); 988 conn_rqst->connect_cmd.desc_len = 989 fcnvme_lsdesc_len( 990 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 991 conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 992 conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); 993 conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize); 994 995 lsop->queue = queue; 996 lsreq->rqstaddr = conn_rqst; 997 lsreq->rqstlen = sizeof(*conn_rqst); 998 lsreq->rspaddr = conn_acc; 999 lsreq->rsplen = sizeof(*conn_acc); 1000 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1001 1002 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 1003 if (ret) 1004 goto out_free_buffer; 1005 1006 /* process connect LS completion */ 1007 1008 /* validate the ACC response */ 1009 if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1010 fcret = VERR_LSACC; 1011 else if (conn_acc->hdr.desc_list_len != 1012 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) 1013 fcret = VERR_CR_CONN_ACC_LEN; 1014 else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) 1015 fcret = VERR_LSDESC_RQST; 1016 else if (conn_acc->hdr.rqst.desc_len != 1017 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1018 fcret = VERR_LSDESC_RQST_LEN; 1019 else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) 1020 fcret = VERR_CR_CONN; 1021 else if (conn_acc->connectid.desc_tag != 1022 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1023 fcret = VERR_CONN_ID; 1024 else if (conn_acc->connectid.desc_len != 1025 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1026 fcret = VERR_CONN_ID_LEN; 1027 1028 if (fcret) { 1029 ret = -EBADF; 1030 dev_err(ctrl->dev, 1031 "q %d connect failed: %s\n", 1032 queue->qnum, validation_errors[fcret]); 1033 } else { 1034 queue->connection_id = 1035 be64_to_cpu(conn_acc->connectid.connection_id); 1036 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1037 } 1038 1039 out_free_buffer: 1040 kfree(lsop); 1041 out_no_memory: 1042 if (ret) 1043 dev_err(ctrl->dev, 1044 "queue %d connect command failed (%d).\n", 1045 queue->qnum, ret); 1046 return ret; 1047 } 1048 1049 static void 1050 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) 1051 { 1052 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1053 1054 __nvme_fc_finish_ls_req(lsop); 1055 1056 /* fc-nvme iniator doesn't care about success or failure of cmd */ 1057 1058 kfree(lsop); 1059 } 1060 1061 /* 1062 * This routine sends a FC-NVME LS to disconnect (aka terminate) 1063 * the FC-NVME Association. Terminating the association also 1064 * terminates the FC-NVME connections (per queue, both admin and io 1065 * queues) that are part of the association. E.g. things are torn 1066 * down, and the related FC-NVME Association ID and Connection IDs 1067 * become invalid. 1068 * 1069 * The behavior of the fc-nvme initiator is such that it's 1070 * understanding of the association and connections will implicitly 1071 * be torn down. The action is implicit as it may be due to a loss of 1072 * connectivity with the fc-nvme target, so you may never get a 1073 * response even if you tried. As such, the action of this routine 1074 * is to asynchronously send the LS, ignore any results of the LS, and 1075 * continue on with terminating the association. If the fc-nvme target 1076 * is present and receives the LS, it too can tear down. 1077 */ 1078 static void 1079 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) 1080 { 1081 struct fcnvme_ls_disconnect_rqst *discon_rqst; 1082 struct fcnvme_ls_disconnect_acc *discon_acc; 1083 struct nvmefc_ls_req_op *lsop; 1084 struct nvmefc_ls_req *lsreq; 1085 int ret; 1086 1087 lsop = kzalloc((sizeof(*lsop) + 1088 ctrl->lport->ops->lsrqst_priv_sz + 1089 sizeof(*discon_rqst) + sizeof(*discon_acc)), 1090 GFP_KERNEL); 1091 if (!lsop) 1092 /* couldn't sent it... too bad */ 1093 return; 1094 1095 lsreq = &lsop->ls_req; 1096 1097 lsreq->private = (void *)&lsop[1]; 1098 discon_rqst = (struct fcnvme_ls_disconnect_rqst *) 1099 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1100 discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; 1101 1102 discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; 1103 discon_rqst->desc_list_len = cpu_to_be32( 1104 sizeof(struct fcnvme_lsdesc_assoc_id) + 1105 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1106 1107 discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1108 discon_rqst->associd.desc_len = 1109 fcnvme_lsdesc_len( 1110 sizeof(struct fcnvme_lsdesc_assoc_id)); 1111 1112 discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1113 1114 discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 1115 FCNVME_LSDESC_DISCONN_CMD); 1116 discon_rqst->discon_cmd.desc_len = 1117 fcnvme_lsdesc_len( 1118 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1119 discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; 1120 discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); 1121 1122 lsreq->rqstaddr = discon_rqst; 1123 lsreq->rqstlen = sizeof(*discon_rqst); 1124 lsreq->rspaddr = discon_acc; 1125 lsreq->rsplen = sizeof(*discon_acc); 1126 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1127 1128 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, 1129 nvme_fc_disconnect_assoc_done); 1130 if (ret) 1131 kfree(lsop); 1132 1133 /* only meaningful part to terminating the association */ 1134 ctrl->association_id = 0; 1135 } 1136 1137 1138 /* *********************** NVME Ctrl Routines **************************** */ 1139 1140 static void __nvme_fc_final_op_cleanup(struct request *rq); 1141 1142 static int 1143 nvme_fc_reinit_request(void *data, struct request *rq) 1144 { 1145 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1146 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1147 1148 memset(cmdiu, 0, sizeof(*cmdiu)); 1149 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1150 cmdiu->fc_id = NVME_CMD_FC_ID; 1151 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1152 memset(&op->rsp_iu, 0, sizeof(op->rsp_iu)); 1153 1154 return 0; 1155 } 1156 1157 static void 1158 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1159 struct nvme_fc_fcp_op *op) 1160 { 1161 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma, 1162 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1163 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma, 1164 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1165 1166 atomic_set(&op->state, FCPOP_STATE_UNINIT); 1167 } 1168 1169 static void 1170 nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, 1171 unsigned int hctx_idx) 1172 { 1173 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1174 1175 return __nvme_fc_exit_request(set->driver_data, op); 1176 } 1177 1178 static int 1179 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) 1180 { 1181 int state; 1182 1183 state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1184 if (state != FCPOP_STATE_ACTIVE) { 1185 atomic_set(&op->state, state); 1186 return -ECANCELED; 1187 } 1188 1189 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, 1190 &ctrl->rport->remoteport, 1191 op->queue->lldd_handle, 1192 &op->fcp_req); 1193 1194 return 0; 1195 } 1196 1197 static void 1198 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) 1199 { 1200 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1201 unsigned long flags; 1202 int i, ret; 1203 1204 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1205 if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) 1206 continue; 1207 1208 spin_lock_irqsave(&ctrl->lock, flags); 1209 if (ctrl->flags & FCCTRL_TERMIO) { 1210 ctrl->iocnt++; 1211 aen_op->flags |= FCOP_FLAGS_TERMIO; 1212 } 1213 spin_unlock_irqrestore(&ctrl->lock, flags); 1214 1215 ret = __nvme_fc_abort_op(ctrl, aen_op); 1216 if (ret) { 1217 /* 1218 * if __nvme_fc_abort_op failed the io wasn't 1219 * active. Thus this call path is running in 1220 * parallel to the io complete. Treat as non-error. 1221 */ 1222 1223 /* back out the flags/counters */ 1224 spin_lock_irqsave(&ctrl->lock, flags); 1225 if (ctrl->flags & FCCTRL_TERMIO) 1226 ctrl->iocnt--; 1227 aen_op->flags &= ~FCOP_FLAGS_TERMIO; 1228 spin_unlock_irqrestore(&ctrl->lock, flags); 1229 return; 1230 } 1231 } 1232 } 1233 1234 static inline int 1235 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, 1236 struct nvme_fc_fcp_op *op) 1237 { 1238 unsigned long flags; 1239 bool complete_rq = false; 1240 1241 spin_lock_irqsave(&ctrl->lock, flags); 1242 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1243 if (ctrl->flags & FCCTRL_TERMIO) 1244 ctrl->iocnt--; 1245 } 1246 if (op->flags & FCOP_FLAGS_RELEASED) 1247 complete_rq = true; 1248 else 1249 op->flags |= FCOP_FLAGS_COMPLETE; 1250 spin_unlock_irqrestore(&ctrl->lock, flags); 1251 1252 return complete_rq; 1253 } 1254 1255 static void 1256 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) 1257 { 1258 struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); 1259 struct request *rq = op->rq; 1260 struct nvmefc_fcp_req *freq = &op->fcp_req; 1261 struct nvme_fc_ctrl *ctrl = op->ctrl; 1262 struct nvme_fc_queue *queue = op->queue; 1263 struct nvme_completion *cqe = &op->rsp_iu.cqe; 1264 struct nvme_command *sqe = &op->cmd_iu.sqe; 1265 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1266 union nvme_result result; 1267 bool complete_rq; 1268 1269 /* 1270 * WARNING: 1271 * The current linux implementation of a nvme controller 1272 * allocates a single tag set for all io queues and sizes 1273 * the io queues to fully hold all possible tags. Thus, the 1274 * implementation does not reference or care about the sqhd 1275 * value as it never needs to use the sqhd/sqtail pointers 1276 * for submission pacing. 1277 * 1278 * This affects the FC-NVME implementation in two ways: 1279 * 1) As the value doesn't matter, we don't need to waste 1280 * cycles extracting it from ERSPs and stamping it in the 1281 * cases where the transport fabricates CQEs on successful 1282 * completions. 1283 * 2) The FC-NVME implementation requires that delivery of 1284 * ERSP completions are to go back to the nvme layer in order 1285 * relative to the rsn, such that the sqhd value will always 1286 * be "in order" for the nvme layer. As the nvme layer in 1287 * linux doesn't care about sqhd, there's no need to return 1288 * them in order. 1289 * 1290 * Additionally: 1291 * As the core nvme layer in linux currently does not look at 1292 * every field in the cqe - in cases where the FC transport must 1293 * fabricate a CQE, the following fields will not be set as they 1294 * are not referenced: 1295 * cqe.sqid, cqe.sqhd, cqe.command_id 1296 */ 1297 1298 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1299 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1300 1301 if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) 1302 status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); 1303 else if (freq->status) 1304 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1305 1306 /* 1307 * For the linux implementation, if we have an unsuccesful 1308 * status, they blk-mq layer can typically be called with the 1309 * non-zero status and the content of the cqe isn't important. 1310 */ 1311 if (status) 1312 goto done; 1313 1314 /* 1315 * command completed successfully relative to the wire 1316 * protocol. However, validate anything received and 1317 * extract the status and result from the cqe (create it 1318 * where necessary). 1319 */ 1320 1321 switch (freq->rcv_rsplen) { 1322 1323 case 0: 1324 case NVME_FC_SIZEOF_ZEROS_RSP: 1325 /* 1326 * No response payload or 12 bytes of payload (which 1327 * should all be zeros) are considered successful and 1328 * no payload in the CQE by the transport. 1329 */ 1330 if (freq->transferred_length != 1331 be32_to_cpu(op->cmd_iu.data_len)) { 1332 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1333 goto done; 1334 } 1335 result.u64 = 0; 1336 break; 1337 1338 case sizeof(struct nvme_fc_ersp_iu): 1339 /* 1340 * The ERSP IU contains a full completion with CQE. 1341 * Validate ERSP IU and look at cqe. 1342 */ 1343 if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != 1344 (freq->rcv_rsplen / 4) || 1345 be32_to_cpu(op->rsp_iu.xfrd_len) != 1346 freq->transferred_length || 1347 op->rsp_iu.status_code || 1348 sqe->common.command_id != cqe->command_id)) { 1349 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1350 goto done; 1351 } 1352 result = cqe->result; 1353 status = cqe->status; 1354 break; 1355 1356 default: 1357 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1358 goto done; 1359 } 1360 1361 done: 1362 if (op->flags & FCOP_FLAGS_AEN) { 1363 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1364 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1365 atomic_set(&op->state, FCPOP_STATE_IDLE); 1366 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1367 nvme_fc_ctrl_put(ctrl); 1368 return; 1369 } 1370 1371 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1372 if (!complete_rq) { 1373 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1374 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1375 if (blk_queue_dying(rq->q)) 1376 status |= cpu_to_le16(NVME_SC_DNR << 1); 1377 } 1378 nvme_end_request(rq, status, result); 1379 } else 1380 __nvme_fc_final_op_cleanup(rq); 1381 } 1382 1383 static int 1384 __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, 1385 struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, 1386 struct request *rq, u32 rqno) 1387 { 1388 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1389 int ret = 0; 1390 1391 memset(op, 0, sizeof(*op)); 1392 op->fcp_req.cmdaddr = &op->cmd_iu; 1393 op->fcp_req.cmdlen = sizeof(op->cmd_iu); 1394 op->fcp_req.rspaddr = &op->rsp_iu; 1395 op->fcp_req.rsplen = sizeof(op->rsp_iu); 1396 op->fcp_req.done = nvme_fc_fcpio_done; 1397 op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; 1398 op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; 1399 op->ctrl = ctrl; 1400 op->queue = queue; 1401 op->rq = rq; 1402 op->rqno = rqno; 1403 1404 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1405 cmdiu->fc_id = NVME_CMD_FC_ID; 1406 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1407 1408 op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, 1409 &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); 1410 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { 1411 dev_err(ctrl->dev, 1412 "FCP Op failed - cmdiu dma mapping failed.\n"); 1413 ret = EFAULT; 1414 goto out_on_error; 1415 } 1416 1417 op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev, 1418 &op->rsp_iu, sizeof(op->rsp_iu), 1419 DMA_FROM_DEVICE); 1420 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { 1421 dev_err(ctrl->dev, 1422 "FCP Op failed - rspiu dma mapping failed.\n"); 1423 ret = EFAULT; 1424 } 1425 1426 atomic_set(&op->state, FCPOP_STATE_IDLE); 1427 out_on_error: 1428 return ret; 1429 } 1430 1431 static int 1432 nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, 1433 unsigned int hctx_idx, unsigned int numa_node) 1434 { 1435 struct nvme_fc_ctrl *ctrl = set->driver_data; 1436 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1437 struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1]; 1438 1439 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); 1440 } 1441 1442 static int 1443 nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq, 1444 unsigned int hctx_idx, unsigned int numa_node) 1445 { 1446 struct nvme_fc_ctrl *ctrl = set->driver_data; 1447 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1448 struct nvme_fc_queue *queue = &ctrl->queues[0]; 1449 1450 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); 1451 } 1452 1453 static int 1454 nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) 1455 { 1456 struct nvme_fc_fcp_op *aen_op; 1457 struct nvme_fc_cmd_iu *cmdiu; 1458 struct nvme_command *sqe; 1459 void *private; 1460 int i, ret; 1461 1462 aen_op = ctrl->aen_ops; 1463 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1464 private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 1465 GFP_KERNEL); 1466 if (!private) 1467 return -ENOMEM; 1468 1469 cmdiu = &aen_op->cmd_iu; 1470 sqe = &cmdiu->sqe; 1471 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], 1472 aen_op, (struct request *)NULL, 1473 (AEN_CMDID_BASE + i)); 1474 if (ret) { 1475 kfree(private); 1476 return ret; 1477 } 1478 1479 aen_op->flags = FCOP_FLAGS_AEN; 1480 aen_op->fcp_req.first_sgl = NULL; /* no sg list */ 1481 aen_op->fcp_req.private = private; 1482 1483 memset(sqe, 0, sizeof(*sqe)); 1484 sqe->common.opcode = nvme_admin_async_event; 1485 /* Note: core layer may overwrite the sqe.command_id value */ 1486 sqe->common.command_id = AEN_CMDID_BASE + i; 1487 } 1488 return 0; 1489 } 1490 1491 static void 1492 nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) 1493 { 1494 struct nvme_fc_fcp_op *aen_op; 1495 int i; 1496 1497 aen_op = ctrl->aen_ops; 1498 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1499 if (!aen_op->fcp_req.private) 1500 continue; 1501 1502 __nvme_fc_exit_request(ctrl, aen_op); 1503 1504 kfree(aen_op->fcp_req.private); 1505 aen_op->fcp_req.private = NULL; 1506 } 1507 } 1508 1509 static inline void 1510 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, 1511 unsigned int qidx) 1512 { 1513 struct nvme_fc_queue *queue = &ctrl->queues[qidx]; 1514 1515 hctx->driver_data = queue; 1516 queue->hctx = hctx; 1517 } 1518 1519 static int 1520 nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1521 unsigned int hctx_idx) 1522 { 1523 struct nvme_fc_ctrl *ctrl = data; 1524 1525 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); 1526 1527 return 0; 1528 } 1529 1530 static int 1531 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1532 unsigned int hctx_idx) 1533 { 1534 struct nvme_fc_ctrl *ctrl = data; 1535 1536 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); 1537 1538 return 0; 1539 } 1540 1541 static void 1542 nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size) 1543 { 1544 struct nvme_fc_queue *queue; 1545 1546 queue = &ctrl->queues[idx]; 1547 memset(queue, 0, sizeof(*queue)); 1548 queue->ctrl = ctrl; 1549 queue->qnum = idx; 1550 atomic_set(&queue->csn, 1); 1551 queue->dev = ctrl->dev; 1552 1553 if (idx > 0) 1554 queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; 1555 else 1556 queue->cmnd_capsule_len = sizeof(struct nvme_command); 1557 1558 queue->queue_size = queue_size; 1559 1560 /* 1561 * Considered whether we should allocate buffers for all SQEs 1562 * and CQEs and dma map them - mapping their respective entries 1563 * into the request structures (kernel vm addr and dma address) 1564 * thus the driver could use the buffers/mappings directly. 1565 * It only makes sense if the LLDD would use them for its 1566 * messaging api. It's very unlikely most adapter api's would use 1567 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload 1568 * structures were used instead. 1569 */ 1570 } 1571 1572 /* 1573 * This routine terminates a queue at the transport level. 1574 * The transport has already ensured that all outstanding ios on 1575 * the queue have been terminated. 1576 * The transport will send a Disconnect LS request to terminate 1577 * the queue's connection. Termination of the admin queue will also 1578 * terminate the association at the target. 1579 */ 1580 static void 1581 nvme_fc_free_queue(struct nvme_fc_queue *queue) 1582 { 1583 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) 1584 return; 1585 1586 /* 1587 * Current implementation never disconnects a single queue. 1588 * It always terminates a whole association. So there is never 1589 * a disconnect(queue) LS sent to the target. 1590 */ 1591 1592 queue->connection_id = 0; 1593 clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1594 } 1595 1596 static void 1597 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, 1598 struct nvme_fc_queue *queue, unsigned int qidx) 1599 { 1600 if (ctrl->lport->ops->delete_queue) 1601 ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, 1602 queue->lldd_handle); 1603 queue->lldd_handle = NULL; 1604 } 1605 1606 static void 1607 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) 1608 { 1609 int i; 1610 1611 for (i = 1; i < ctrl->queue_count; i++) 1612 nvme_fc_free_queue(&ctrl->queues[i]); 1613 } 1614 1615 static int 1616 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, 1617 struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) 1618 { 1619 int ret = 0; 1620 1621 queue->lldd_handle = NULL; 1622 if (ctrl->lport->ops->create_queue) 1623 ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, 1624 qidx, qsize, &queue->lldd_handle); 1625 1626 return ret; 1627 } 1628 1629 static void 1630 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) 1631 { 1632 struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1]; 1633 int i; 1634 1635 for (i = ctrl->queue_count - 1; i >= 1; i--, queue--) 1636 __nvme_fc_delete_hw_queue(ctrl, queue, i); 1637 } 1638 1639 static int 1640 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1641 { 1642 struct nvme_fc_queue *queue = &ctrl->queues[1]; 1643 int i, ret; 1644 1645 for (i = 1; i < ctrl->queue_count; i++, queue++) { 1646 ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); 1647 if (ret) 1648 goto delete_queues; 1649 } 1650 1651 return 0; 1652 1653 delete_queues: 1654 for (; i >= 0; i--) 1655 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); 1656 return ret; 1657 } 1658 1659 static int 1660 nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1661 { 1662 int i, ret = 0; 1663 1664 for (i = 1; i < ctrl->queue_count; i++) { 1665 ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, 1666 (qsize / 5)); 1667 if (ret) 1668 break; 1669 ret = nvmf_connect_io_queue(&ctrl->ctrl, i); 1670 if (ret) 1671 break; 1672 } 1673 1674 return ret; 1675 } 1676 1677 static void 1678 nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) 1679 { 1680 int i; 1681 1682 for (i = 1; i < ctrl->queue_count; i++) 1683 nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize); 1684 } 1685 1686 static void 1687 nvme_fc_ctrl_free(struct kref *ref) 1688 { 1689 struct nvme_fc_ctrl *ctrl = 1690 container_of(ref, struct nvme_fc_ctrl, ref); 1691 unsigned long flags; 1692 1693 if (ctrl->ctrl.tagset) { 1694 blk_cleanup_queue(ctrl->ctrl.connect_q); 1695 blk_mq_free_tag_set(&ctrl->tag_set); 1696 } 1697 1698 /* remove from rport list */ 1699 spin_lock_irqsave(&ctrl->rport->lock, flags); 1700 list_del(&ctrl->ctrl_list); 1701 spin_unlock_irqrestore(&ctrl->rport->lock, flags); 1702 1703 blk_cleanup_queue(ctrl->ctrl.admin_q); 1704 blk_mq_free_tag_set(&ctrl->admin_tag_set); 1705 1706 kfree(ctrl->queues); 1707 1708 put_device(ctrl->dev); 1709 nvme_fc_rport_put(ctrl->rport); 1710 1711 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 1712 if (ctrl->ctrl.opts) 1713 nvmf_free_options(ctrl->ctrl.opts); 1714 kfree(ctrl); 1715 } 1716 1717 static void 1718 nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) 1719 { 1720 kref_put(&ctrl->ref, nvme_fc_ctrl_free); 1721 } 1722 1723 static int 1724 nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) 1725 { 1726 return kref_get_unless_zero(&ctrl->ref); 1727 } 1728 1729 /* 1730 * All accesses from nvme core layer done - can now free the 1731 * controller. Called after last nvme_put_ctrl() call 1732 */ 1733 static void 1734 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) 1735 { 1736 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 1737 1738 WARN_ON(nctrl != &ctrl->ctrl); 1739 1740 nvme_fc_ctrl_put(ctrl); 1741 } 1742 1743 static void 1744 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 1745 { 1746 dev_warn(ctrl->ctrl.device, 1747 "NVME-FC{%d}: transport association error detected: %s\n", 1748 ctrl->cnum, errmsg); 1749 dev_warn(ctrl->ctrl.device, 1750 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 1751 1752 /* stop the queues on error, cleanup is in reset thread */ 1753 if (ctrl->queue_count > 1) 1754 nvme_stop_queues(&ctrl->ctrl); 1755 1756 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 1757 dev_err(ctrl->ctrl.device, 1758 "NVME-FC{%d}: error_recovery: Couldn't change state " 1759 "to RECONNECTING\n", ctrl->cnum); 1760 return; 1761 } 1762 1763 if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) 1764 dev_err(ctrl->ctrl.device, 1765 "NVME-FC{%d}: error_recovery: Failed to schedule " 1766 "reset work\n", ctrl->cnum); 1767 } 1768 1769 static enum blk_eh_timer_return 1770 nvme_fc_timeout(struct request *rq, bool reserved) 1771 { 1772 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1773 struct nvme_fc_ctrl *ctrl = op->ctrl; 1774 int ret; 1775 1776 if (reserved) 1777 return BLK_EH_RESET_TIMER; 1778 1779 ret = __nvme_fc_abort_op(ctrl, op); 1780 if (ret) 1781 /* io wasn't active to abort consider it done */ 1782 return BLK_EH_HANDLED; 1783 1784 /* 1785 * we can't individually ABTS an io without affecting the queue, 1786 * thus killing the queue, adn thus the association. 1787 * So resolve by performing a controller reset, which will stop 1788 * the host/io stack, terminate the association on the link, 1789 * and recreate an association on the link. 1790 */ 1791 nvme_fc_error_recovery(ctrl, "io timeout error"); 1792 1793 return BLK_EH_HANDLED; 1794 } 1795 1796 static int 1797 nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 1798 struct nvme_fc_fcp_op *op) 1799 { 1800 struct nvmefc_fcp_req *freq = &op->fcp_req; 1801 enum dma_data_direction dir; 1802 int ret; 1803 1804 freq->sg_cnt = 0; 1805 1806 if (!blk_rq_payload_bytes(rq)) 1807 return 0; 1808 1809 freq->sg_table.sgl = freq->first_sgl; 1810 ret = sg_alloc_table_chained(&freq->sg_table, 1811 blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); 1812 if (ret) 1813 return -ENOMEM; 1814 1815 op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); 1816 WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); 1817 dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 1818 freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, 1819 op->nents, dir); 1820 if (unlikely(freq->sg_cnt <= 0)) { 1821 sg_free_table_chained(&freq->sg_table, true); 1822 freq->sg_cnt = 0; 1823 return -EFAULT; 1824 } 1825 1826 /* 1827 * TODO: blk_integrity_rq(rq) for DIF 1828 */ 1829 return 0; 1830 } 1831 1832 static void 1833 nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 1834 struct nvme_fc_fcp_op *op) 1835 { 1836 struct nvmefc_fcp_req *freq = &op->fcp_req; 1837 1838 if (!freq->sg_cnt) 1839 return; 1840 1841 fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, 1842 ((rq_data_dir(rq) == WRITE) ? 1843 DMA_TO_DEVICE : DMA_FROM_DEVICE)); 1844 1845 nvme_cleanup_cmd(rq); 1846 1847 sg_free_table_chained(&freq->sg_table, true); 1848 1849 freq->sg_cnt = 0; 1850 } 1851 1852 /* 1853 * In FC, the queue is a logical thing. At transport connect, the target 1854 * creates its "queue" and returns a handle that is to be given to the 1855 * target whenever it posts something to the corresponding SQ. When an 1856 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the 1857 * command contained within the SQE, an io, and assigns a FC exchange 1858 * to it. The SQE and the associated SQ handle are sent in the initial 1859 * CMD IU sents on the exchange. All transfers relative to the io occur 1860 * as part of the exchange. The CQE is the last thing for the io, 1861 * which is transferred (explicitly or implicitly) with the RSP IU 1862 * sent on the exchange. After the CQE is received, the FC exchange is 1863 * terminaed and the Exchange may be used on a different io. 1864 * 1865 * The transport to LLDD api has the transport making a request for a 1866 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange 1867 * resource and transfers the command. The LLDD will then process all 1868 * steps to complete the io. Upon completion, the transport done routine 1869 * is called. 1870 * 1871 * So - while the operation is outstanding to the LLDD, there is a link 1872 * level FC exchange resource that is also outstanding. This must be 1873 * considered in all cleanup operations. 1874 */ 1875 static int 1876 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1877 struct nvme_fc_fcp_op *op, u32 data_len, 1878 enum nvmefc_fcp_datadir io_dir) 1879 { 1880 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1881 struct nvme_command *sqe = &cmdiu->sqe; 1882 u32 csn; 1883 int ret; 1884 1885 /* 1886 * before attempting to send the io, check to see if we believe 1887 * the target device is present 1888 */ 1889 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1890 return BLK_MQ_RQ_QUEUE_ERROR; 1891 1892 if (!nvme_fc_ctrl_get(ctrl)) 1893 return BLK_MQ_RQ_QUEUE_ERROR; 1894 1895 /* format the FC-NVME CMD IU and fcp_req */ 1896 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 1897 csn = atomic_inc_return(&queue->csn); 1898 cmdiu->csn = cpu_to_be32(csn); 1899 cmdiu->data_len = cpu_to_be32(data_len); 1900 switch (io_dir) { 1901 case NVMEFC_FCP_WRITE: 1902 cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; 1903 break; 1904 case NVMEFC_FCP_READ: 1905 cmdiu->flags = FCNVME_CMD_FLAGS_READ; 1906 break; 1907 case NVMEFC_FCP_NODATA: 1908 cmdiu->flags = 0; 1909 break; 1910 } 1911 op->fcp_req.payload_length = data_len; 1912 op->fcp_req.io_dir = io_dir; 1913 op->fcp_req.transferred_length = 0; 1914 op->fcp_req.rcv_rsplen = 0; 1915 op->fcp_req.status = NVME_SC_SUCCESS; 1916 op->fcp_req.sqid = cpu_to_le16(queue->qnum); 1917 1918 /* 1919 * validate per fabric rules, set fields mandated by fabric spec 1920 * as well as those by FC-NVME spec. 1921 */ 1922 WARN_ON_ONCE(sqe->common.metadata); 1923 WARN_ON_ONCE(sqe->common.dptr.prp1); 1924 WARN_ON_ONCE(sqe->common.dptr.prp2); 1925 sqe->common.flags |= NVME_CMD_SGL_METABUF; 1926 1927 /* 1928 * format SQE DPTR field per FC-NVME rules 1929 * type=data block descr; subtype=offset; 1930 * offset is currently 0. 1931 */ 1932 sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET; 1933 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 1934 sqe->rw.dptr.sgl.addr = 0; 1935 1936 if (!(op->flags & FCOP_FLAGS_AEN)) { 1937 ret = nvme_fc_map_data(ctrl, op->rq, op); 1938 if (ret < 0) { 1939 nvme_cleanup_cmd(op->rq); 1940 nvme_fc_ctrl_put(ctrl); 1941 return (ret == -ENOMEM || ret == -EAGAIN) ? 1942 BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR; 1943 } 1944 } 1945 1946 fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma, 1947 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1948 1949 atomic_set(&op->state, FCPOP_STATE_ACTIVE); 1950 1951 if (!(op->flags & FCOP_FLAGS_AEN)) 1952 blk_mq_start_request(op->rq); 1953 1954 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 1955 &ctrl->rport->remoteport, 1956 queue->lldd_handle, &op->fcp_req); 1957 1958 if (ret) { 1959 if (op->rq) { /* normal request */ 1960 nvme_fc_unmap_data(ctrl, op->rq, op); 1961 nvme_cleanup_cmd(op->rq); 1962 } 1963 /* else - aen. no cleanup needed */ 1964 1965 nvme_fc_ctrl_put(ctrl); 1966 1967 if (ret != -EBUSY) 1968 return BLK_MQ_RQ_QUEUE_ERROR; 1969 1970 if (op->rq) { 1971 blk_mq_stop_hw_queues(op->rq->q); 1972 blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY); 1973 } 1974 return BLK_MQ_RQ_QUEUE_BUSY; 1975 } 1976 1977 return BLK_MQ_RQ_QUEUE_OK; 1978 } 1979 1980 static int 1981 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 1982 const struct blk_mq_queue_data *bd) 1983 { 1984 struct nvme_ns *ns = hctx->queue->queuedata; 1985 struct nvme_fc_queue *queue = hctx->driver_data; 1986 struct nvme_fc_ctrl *ctrl = queue->ctrl; 1987 struct request *rq = bd->rq; 1988 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1989 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1990 struct nvme_command *sqe = &cmdiu->sqe; 1991 enum nvmefc_fcp_datadir io_dir; 1992 u32 data_len; 1993 int ret; 1994 1995 ret = nvme_setup_cmd(ns, rq, sqe); 1996 if (ret) 1997 return ret; 1998 1999 data_len = blk_rq_payload_bytes(rq); 2000 if (data_len) 2001 io_dir = ((rq_data_dir(rq) == WRITE) ? 2002 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); 2003 else 2004 io_dir = NVMEFC_FCP_NODATA; 2005 2006 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); 2007 } 2008 2009 static struct blk_mq_tags * 2010 nvme_fc_tagset(struct nvme_fc_queue *queue) 2011 { 2012 if (queue->qnum == 0) 2013 return queue->ctrl->admin_tag_set.tags[queue->qnum]; 2014 2015 return queue->ctrl->tag_set.tags[queue->qnum - 1]; 2016 } 2017 2018 static int 2019 nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 2020 2021 { 2022 struct nvme_fc_queue *queue = hctx->driver_data; 2023 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2024 struct request *req; 2025 struct nvme_fc_fcp_op *op; 2026 2027 req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); 2028 if (!req) 2029 return 0; 2030 2031 op = blk_mq_rq_to_pdu(req); 2032 2033 if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && 2034 (ctrl->lport->ops->poll_queue)) 2035 ctrl->lport->ops->poll_queue(&ctrl->lport->localport, 2036 queue->lldd_handle); 2037 2038 return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); 2039 } 2040 2041 static void 2042 nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) 2043 { 2044 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); 2045 struct nvme_fc_fcp_op *aen_op; 2046 unsigned long flags; 2047 bool terminating = false; 2048 int ret; 2049 2050 if (aer_idx > NVME_FC_NR_AEN_COMMANDS) 2051 return; 2052 2053 spin_lock_irqsave(&ctrl->lock, flags); 2054 if (ctrl->flags & FCCTRL_TERMIO) 2055 terminating = true; 2056 spin_unlock_irqrestore(&ctrl->lock, flags); 2057 2058 if (terminating) 2059 return; 2060 2061 aen_op = &ctrl->aen_ops[aer_idx]; 2062 2063 ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, 2064 NVMEFC_FCP_NODATA); 2065 if (ret) 2066 dev_err(ctrl->ctrl.device, 2067 "failed async event work [%d]\n", aer_idx); 2068 } 2069 2070 static void 2071 __nvme_fc_final_op_cleanup(struct request *rq) 2072 { 2073 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2074 struct nvme_fc_ctrl *ctrl = op->ctrl; 2075 2076 atomic_set(&op->state, FCPOP_STATE_IDLE); 2077 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | 2078 FCOP_FLAGS_COMPLETE); 2079 2080 nvme_cleanup_cmd(rq); 2081 nvme_fc_unmap_data(ctrl, rq, op); 2082 nvme_complete_rq(rq); 2083 nvme_fc_ctrl_put(ctrl); 2084 2085 } 2086 2087 static void 2088 nvme_fc_complete_rq(struct request *rq) 2089 { 2090 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2091 struct nvme_fc_ctrl *ctrl = op->ctrl; 2092 unsigned long flags; 2093 bool completed = false; 2094 2095 /* 2096 * the core layer, on controller resets after calling 2097 * nvme_shutdown_ctrl(), calls complete_rq without our 2098 * calling blk_mq_complete_request(), thus there may still 2099 * be live i/o outstanding with the LLDD. Means transport has 2100 * to track complete calls vs fcpio_done calls to know what 2101 * path to take on completes and dones. 2102 */ 2103 spin_lock_irqsave(&ctrl->lock, flags); 2104 if (op->flags & FCOP_FLAGS_COMPLETE) 2105 completed = true; 2106 else 2107 op->flags |= FCOP_FLAGS_RELEASED; 2108 spin_unlock_irqrestore(&ctrl->lock, flags); 2109 2110 if (completed) 2111 __nvme_fc_final_op_cleanup(rq); 2112 } 2113 2114 /* 2115 * This routine is used by the transport when it needs to find active 2116 * io on a queue that is to be terminated. The transport uses 2117 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke 2118 * this routine to kill them on a 1 by 1 basis. 2119 * 2120 * As FC allocates FC exchange for each io, the transport must contact 2121 * the LLDD to terminate the exchange, thus releasing the FC exchange. 2122 * After terminating the exchange the LLDD will call the transport's 2123 * normal io done path for the request, but it will have an aborted 2124 * status. The done path will return the io request back to the block 2125 * layer with an error status. 2126 */ 2127 static void 2128 nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) 2129 { 2130 struct nvme_ctrl *nctrl = data; 2131 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2132 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); 2133 unsigned long flags; 2134 int status; 2135 2136 if (!blk_mq_request_started(req)) 2137 return; 2138 2139 spin_lock_irqsave(&ctrl->lock, flags); 2140 if (ctrl->flags & FCCTRL_TERMIO) { 2141 ctrl->iocnt++; 2142 op->flags |= FCOP_FLAGS_TERMIO; 2143 } 2144 spin_unlock_irqrestore(&ctrl->lock, flags); 2145 2146 status = __nvme_fc_abort_op(ctrl, op); 2147 if (status) { 2148 /* 2149 * if __nvme_fc_abort_op failed the io wasn't 2150 * active. Thus this call path is running in 2151 * parallel to the io complete. Treat as non-error. 2152 */ 2153 2154 /* back out the flags/counters */ 2155 spin_lock_irqsave(&ctrl->lock, flags); 2156 if (ctrl->flags & FCCTRL_TERMIO) 2157 ctrl->iocnt--; 2158 op->flags &= ~FCOP_FLAGS_TERMIO; 2159 spin_unlock_irqrestore(&ctrl->lock, flags); 2160 return; 2161 } 2162 } 2163 2164 2165 static const struct blk_mq_ops nvme_fc_mq_ops = { 2166 .queue_rq = nvme_fc_queue_rq, 2167 .complete = nvme_fc_complete_rq, 2168 .init_request = nvme_fc_init_request, 2169 .exit_request = nvme_fc_exit_request, 2170 .reinit_request = nvme_fc_reinit_request, 2171 .init_hctx = nvme_fc_init_hctx, 2172 .poll = nvme_fc_poll, 2173 .timeout = nvme_fc_timeout, 2174 }; 2175 2176 static int 2177 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) 2178 { 2179 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2180 int ret; 2181 2182 ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); 2183 if (ret) { 2184 dev_info(ctrl->ctrl.device, 2185 "set_queue_count failed: %d\n", ret); 2186 return ret; 2187 } 2188 2189 ctrl->queue_count = opts->nr_io_queues + 1; 2190 if (!opts->nr_io_queues) 2191 return 0; 2192 2193 nvme_fc_init_io_queues(ctrl); 2194 2195 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 2196 ctrl->tag_set.ops = &nvme_fc_mq_ops; 2197 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 2198 ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 2199 ctrl->tag_set.numa_node = NUMA_NO_NODE; 2200 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 2201 ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + 2202 (SG_CHUNK_SIZE * 2203 sizeof(struct scatterlist)) + 2204 ctrl->lport->ops->fcprqst_priv_sz; 2205 ctrl->tag_set.driver_data = ctrl; 2206 ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; 2207 ctrl->tag_set.timeout = NVME_IO_TIMEOUT; 2208 2209 ret = blk_mq_alloc_tag_set(&ctrl->tag_set); 2210 if (ret) 2211 return ret; 2212 2213 ctrl->ctrl.tagset = &ctrl->tag_set; 2214 2215 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); 2216 if (IS_ERR(ctrl->ctrl.connect_q)) { 2217 ret = PTR_ERR(ctrl->ctrl.connect_q); 2218 goto out_free_tag_set; 2219 } 2220 2221 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2222 if (ret) 2223 goto out_cleanup_blk_queue; 2224 2225 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2226 if (ret) 2227 goto out_delete_hw_queues; 2228 2229 return 0; 2230 2231 out_delete_hw_queues: 2232 nvme_fc_delete_hw_io_queues(ctrl); 2233 out_cleanup_blk_queue: 2234 nvme_stop_keep_alive(&ctrl->ctrl); 2235 blk_cleanup_queue(ctrl->ctrl.connect_q); 2236 out_free_tag_set: 2237 blk_mq_free_tag_set(&ctrl->tag_set); 2238 nvme_fc_free_io_queues(ctrl); 2239 2240 /* force put free routine to ignore io queues */ 2241 ctrl->ctrl.tagset = NULL; 2242 2243 return ret; 2244 } 2245 2246 static int 2247 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) 2248 { 2249 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2250 int ret; 2251 2252 ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); 2253 if (ret) { 2254 dev_info(ctrl->ctrl.device, 2255 "set_queue_count failed: %d\n", ret); 2256 return ret; 2257 } 2258 2259 /* check for io queues existing */ 2260 if (ctrl->queue_count == 1) 2261 return 0; 2262 2263 nvme_fc_init_io_queues(ctrl); 2264 2265 ret = blk_mq_reinit_tagset(&ctrl->tag_set); 2266 if (ret) 2267 goto out_free_io_queues; 2268 2269 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2270 if (ret) 2271 goto out_free_io_queues; 2272 2273 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2274 if (ret) 2275 goto out_delete_hw_queues; 2276 2277 return 0; 2278 2279 out_delete_hw_queues: 2280 nvme_fc_delete_hw_io_queues(ctrl); 2281 out_free_io_queues: 2282 nvme_fc_free_io_queues(ctrl); 2283 return ret; 2284 } 2285 2286 /* 2287 * This routine restarts the controller on the host side, and 2288 * on the link side, recreates the controller association. 2289 */ 2290 static int 2291 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2292 { 2293 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2294 u32 segs; 2295 int ret; 2296 bool changed; 2297 2298 ++ctrl->ctrl.opts->nr_reconnects; 2299 2300 /* 2301 * Create the admin queue 2302 */ 2303 2304 nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); 2305 2306 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2307 NVME_FC_AQ_BLKMQ_DEPTH); 2308 if (ret) 2309 goto out_free_queue; 2310 2311 ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], 2312 NVME_FC_AQ_BLKMQ_DEPTH, 2313 (NVME_FC_AQ_BLKMQ_DEPTH / 4)); 2314 if (ret) 2315 goto out_delete_hw_queue; 2316 2317 if (ctrl->ctrl.state != NVME_CTRL_NEW) 2318 blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); 2319 2320 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2321 if (ret) 2322 goto out_disconnect_admin_queue; 2323 2324 /* 2325 * Check controller capabilities 2326 * 2327 * todo:- add code to check if ctrl attributes changed from 2328 * prior connection values 2329 */ 2330 2331 ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); 2332 if (ret) { 2333 dev_err(ctrl->ctrl.device, 2334 "prop_get NVME_REG_CAP failed\n"); 2335 goto out_disconnect_admin_queue; 2336 } 2337 2338 ctrl->ctrl.sqsize = 2339 min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); 2340 2341 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); 2342 if (ret) 2343 goto out_disconnect_admin_queue; 2344 2345 segs = min_t(u32, NVME_FC_MAX_SEGMENTS, 2346 ctrl->lport->ops->max_sgl_segments); 2347 ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); 2348 2349 ret = nvme_init_identify(&ctrl->ctrl); 2350 if (ret) 2351 goto out_disconnect_admin_queue; 2352 2353 /* sanity checks */ 2354 2355 /* FC-NVME does not have other data in the capsule */ 2356 if (ctrl->ctrl.icdoff) { 2357 dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", 2358 ctrl->ctrl.icdoff); 2359 goto out_disconnect_admin_queue; 2360 } 2361 2362 nvme_start_keep_alive(&ctrl->ctrl); 2363 2364 /* FC-NVME supports normal SGL Data Block Descriptors */ 2365 2366 if (opts->queue_size > ctrl->ctrl.maxcmd) { 2367 /* warn if maxcmd is lower than queue_size */ 2368 dev_warn(ctrl->ctrl.device, 2369 "queue_size %zu > ctrl maxcmd %u, reducing " 2370 "to queue_size\n", 2371 opts->queue_size, ctrl->ctrl.maxcmd); 2372 opts->queue_size = ctrl->ctrl.maxcmd; 2373 } 2374 2375 ret = nvme_fc_init_aen_ops(ctrl); 2376 if (ret) 2377 goto out_term_aen_ops; 2378 2379 /* 2380 * Create the io queues 2381 */ 2382 2383 if (ctrl->queue_count > 1) { 2384 if (ctrl->ctrl.state == NVME_CTRL_NEW) 2385 ret = nvme_fc_create_io_queues(ctrl); 2386 else 2387 ret = nvme_fc_reinit_io_queues(ctrl); 2388 if (ret) 2389 goto out_term_aen_ops; 2390 } 2391 2392 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2393 WARN_ON_ONCE(!changed); 2394 2395 ctrl->ctrl.opts->nr_reconnects = 0; 2396 2397 if (ctrl->queue_count > 1) { 2398 nvme_start_queues(&ctrl->ctrl); 2399 nvme_queue_scan(&ctrl->ctrl); 2400 nvme_queue_async_events(&ctrl->ctrl); 2401 } 2402 2403 return 0; /* Success */ 2404 2405 out_term_aen_ops: 2406 nvme_fc_term_aen_ops(ctrl); 2407 nvme_stop_keep_alive(&ctrl->ctrl); 2408 out_disconnect_admin_queue: 2409 /* send a Disconnect(association) LS to fc-nvme target */ 2410 nvme_fc_xmt_disconnect_assoc(ctrl); 2411 out_delete_hw_queue: 2412 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2413 out_free_queue: 2414 nvme_fc_free_queue(&ctrl->queues[0]); 2415 2416 return ret; 2417 } 2418 2419 /* 2420 * This routine stops operation of the controller on the host side. 2421 * On the host os stack side: Admin and IO queues are stopped, 2422 * outstanding ios on them terminated via FC ABTS. 2423 * On the link side: the association is terminated. 2424 */ 2425 static void 2426 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) 2427 { 2428 unsigned long flags; 2429 2430 nvme_stop_keep_alive(&ctrl->ctrl); 2431 2432 spin_lock_irqsave(&ctrl->lock, flags); 2433 ctrl->flags |= FCCTRL_TERMIO; 2434 ctrl->iocnt = 0; 2435 spin_unlock_irqrestore(&ctrl->lock, flags); 2436 2437 /* 2438 * If io queues are present, stop them and terminate all outstanding 2439 * ios on them. As FC allocates FC exchange for each io, the 2440 * transport must contact the LLDD to terminate the exchange, 2441 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 2442 * to tell us what io's are busy and invoke a transport routine 2443 * to kill them with the LLDD. After terminating the exchange 2444 * the LLDD will call the transport's normal io done path, but it 2445 * will have an aborted status. The done path will return the 2446 * io requests back to the block layer as part of normal completions 2447 * (but with error status). 2448 */ 2449 if (ctrl->queue_count > 1) { 2450 nvme_stop_queues(&ctrl->ctrl); 2451 blk_mq_tagset_busy_iter(&ctrl->tag_set, 2452 nvme_fc_terminate_exchange, &ctrl->ctrl); 2453 } 2454 2455 /* 2456 * Other transports, which don't have link-level contexts bound 2457 * to sqe's, would try to gracefully shutdown the controller by 2458 * writing the registers for shutdown and polling (call 2459 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 2460 * just aborted and we will wait on those contexts, and given 2461 * there was no indication of how live the controlelr is on the 2462 * link, don't send more io to create more contexts for the 2463 * shutdown. Let the controller fail via keepalive failure if 2464 * its still present. 2465 */ 2466 2467 /* 2468 * clean up the admin queue. Same thing as above. 2469 * use blk_mq_tagset_busy_itr() and the transport routine to 2470 * terminate the exchanges. 2471 */ 2472 blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); 2473 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2474 nvme_fc_terminate_exchange, &ctrl->ctrl); 2475 2476 /* kill the aens as they are a separate path */ 2477 nvme_fc_abort_aen_ops(ctrl); 2478 2479 /* wait for all io that had to be aborted */ 2480 spin_lock_irqsave(&ctrl->lock, flags); 2481 while (ctrl->iocnt) { 2482 spin_unlock_irqrestore(&ctrl->lock, flags); 2483 msleep(1000); 2484 spin_lock_irqsave(&ctrl->lock, flags); 2485 } 2486 ctrl->flags &= ~FCCTRL_TERMIO; 2487 spin_unlock_irqrestore(&ctrl->lock, flags); 2488 2489 nvme_fc_term_aen_ops(ctrl); 2490 2491 /* 2492 * send a Disconnect(association) LS to fc-nvme target 2493 * Note: could have been sent at top of process, but 2494 * cleaner on link traffic if after the aborts complete. 2495 * Note: if association doesn't exist, association_id will be 0 2496 */ 2497 if (ctrl->association_id) 2498 nvme_fc_xmt_disconnect_assoc(ctrl); 2499 2500 if (ctrl->ctrl.tagset) { 2501 nvme_fc_delete_hw_io_queues(ctrl); 2502 nvme_fc_free_io_queues(ctrl); 2503 } 2504 2505 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2506 nvme_fc_free_queue(&ctrl->queues[0]); 2507 } 2508 2509 static void 2510 nvme_fc_delete_ctrl_work(struct work_struct *work) 2511 { 2512 struct nvme_fc_ctrl *ctrl = 2513 container_of(work, struct nvme_fc_ctrl, delete_work); 2514 2515 cancel_work_sync(&ctrl->reset_work); 2516 cancel_delayed_work_sync(&ctrl->connect_work); 2517 2518 /* 2519 * kill the association on the link side. this will block 2520 * waiting for io to terminate 2521 */ 2522 nvme_fc_delete_association(ctrl); 2523 2524 /* 2525 * tear down the controller 2526 * After the last reference on the nvme ctrl is removed, 2527 * the transport nvme_fc_nvme_ctrl_freed() callback will be 2528 * invoked. From there, the transport will tear down it's 2529 * logical queues and association. 2530 */ 2531 nvme_uninit_ctrl(&ctrl->ctrl); 2532 2533 nvme_put_ctrl(&ctrl->ctrl); 2534 } 2535 2536 static bool 2537 __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) 2538 { 2539 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 2540 return true; 2541 2542 if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) 2543 return true; 2544 2545 return false; 2546 } 2547 2548 static int 2549 __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) 2550 { 2551 return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; 2552 } 2553 2554 /* 2555 * Request from nvme core layer to delete the controller 2556 */ 2557 static int 2558 nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) 2559 { 2560 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2561 int ret; 2562 2563 if (!kref_get_unless_zero(&ctrl->ctrl.kref)) 2564 return -EBUSY; 2565 2566 ret = __nvme_fc_del_ctrl(ctrl); 2567 2568 if (!ret) 2569 flush_workqueue(nvme_fc_wq); 2570 2571 nvme_put_ctrl(&ctrl->ctrl); 2572 2573 return ret; 2574 } 2575 2576 static void 2577 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) 2578 { 2579 /* If we are resetting/deleting then do nothing */ 2580 if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { 2581 WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || 2582 ctrl->ctrl.state == NVME_CTRL_LIVE); 2583 return; 2584 } 2585 2586 dev_info(ctrl->ctrl.device, 2587 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2588 ctrl->cnum, status); 2589 2590 if (nvmf_should_reconnect(&ctrl->ctrl)) { 2591 dev_info(ctrl->ctrl.device, 2592 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", 2593 ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); 2594 queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, 2595 ctrl->ctrl.opts->reconnect_delay * HZ); 2596 } else { 2597 dev_warn(ctrl->ctrl.device, 2598 "NVME-FC{%d}: Max reconnect attempts (%d) " 2599 "reached. Removing controller\n", 2600 ctrl->cnum, ctrl->ctrl.opts->nr_reconnects); 2601 WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); 2602 } 2603 } 2604 2605 static void 2606 nvme_fc_reset_ctrl_work(struct work_struct *work) 2607 { 2608 struct nvme_fc_ctrl *ctrl = 2609 container_of(work, struct nvme_fc_ctrl, reset_work); 2610 int ret; 2611 2612 /* will block will waiting for io to terminate */ 2613 nvme_fc_delete_association(ctrl); 2614 2615 ret = nvme_fc_create_association(ctrl); 2616 if (ret) 2617 nvme_fc_reconnect_or_delete(ctrl, ret); 2618 else 2619 dev_info(ctrl->ctrl.device, 2620 "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); 2621 } 2622 2623 /* 2624 * called by the nvme core layer, for sysfs interface that requests 2625 * a reset of the nvme controller 2626 */ 2627 static int 2628 nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) 2629 { 2630 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2631 2632 dev_info(ctrl->ctrl.device, 2633 "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); 2634 2635 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) 2636 return -EBUSY; 2637 2638 if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) 2639 return -EBUSY; 2640 2641 flush_work(&ctrl->reset_work); 2642 2643 return 0; 2644 } 2645 2646 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2647 .name = "fc", 2648 .module = THIS_MODULE, 2649 .flags = NVME_F_FABRICS, 2650 .reg_read32 = nvmf_reg_read32, 2651 .reg_read64 = nvmf_reg_read64, 2652 .reg_write32 = nvmf_reg_write32, 2653 .reset_ctrl = nvme_fc_reset_nvme_ctrl, 2654 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2655 .submit_async_event = nvme_fc_submit_async_event, 2656 .delete_ctrl = nvme_fc_del_nvme_ctrl, 2657 .get_subsysnqn = nvmf_get_subsysnqn, 2658 .get_address = nvmf_get_address, 2659 }; 2660 2661 static void 2662 nvme_fc_connect_ctrl_work(struct work_struct *work) 2663 { 2664 int ret; 2665 2666 struct nvme_fc_ctrl *ctrl = 2667 container_of(to_delayed_work(work), 2668 struct nvme_fc_ctrl, connect_work); 2669 2670 ret = nvme_fc_create_association(ctrl); 2671 if (ret) 2672 nvme_fc_reconnect_or_delete(ctrl, ret); 2673 else 2674 dev_info(ctrl->ctrl.device, 2675 "NVME-FC{%d}: controller reconnect complete\n", 2676 ctrl->cnum); 2677 } 2678 2679 2680 static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2681 .queue_rq = nvme_fc_queue_rq, 2682 .complete = nvme_fc_complete_rq, 2683 .init_request = nvme_fc_init_admin_request, 2684 .exit_request = nvme_fc_exit_request, 2685 .reinit_request = nvme_fc_reinit_request, 2686 .init_hctx = nvme_fc_init_admin_hctx, 2687 .timeout = nvme_fc_timeout, 2688 }; 2689 2690 2691 static struct nvme_ctrl * 2692 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, 2693 struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) 2694 { 2695 struct nvme_fc_ctrl *ctrl; 2696 unsigned long flags; 2697 int ret, idx; 2698 2699 if (!(rport->remoteport.port_role & 2700 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 2701 ret = -EBADR; 2702 goto out_fail; 2703 } 2704 2705 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 2706 if (!ctrl) { 2707 ret = -ENOMEM; 2708 goto out_fail; 2709 } 2710 2711 idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); 2712 if (idx < 0) { 2713 ret = -ENOSPC; 2714 goto out_free_ctrl; 2715 } 2716 2717 ctrl->ctrl.opts = opts; 2718 INIT_LIST_HEAD(&ctrl->ctrl_list); 2719 ctrl->lport = lport; 2720 ctrl->rport = rport; 2721 ctrl->dev = lport->dev; 2722 ctrl->cnum = idx; 2723 2724 get_device(ctrl->dev); 2725 kref_init(&ctrl->ref); 2726 2727 INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); 2728 INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); 2729 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 2730 spin_lock_init(&ctrl->lock); 2731 2732 /* io queue count */ 2733 ctrl->queue_count = min_t(unsigned int, 2734 opts->nr_io_queues, 2735 lport->ops->max_hw_queues); 2736 opts->nr_io_queues = ctrl->queue_count; /* so opts has valid value */ 2737 ctrl->queue_count++; /* +1 for admin queue */ 2738 2739 ctrl->ctrl.sqsize = opts->queue_size - 1; 2740 ctrl->ctrl.kato = opts->kato; 2741 2742 ret = -ENOMEM; 2743 ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), 2744 GFP_KERNEL); 2745 if (!ctrl->queues) 2746 goto out_free_ida; 2747 2748 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 2749 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 2750 ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; 2751 ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ 2752 ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; 2753 ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + 2754 (SG_CHUNK_SIZE * 2755 sizeof(struct scatterlist)) + 2756 ctrl->lport->ops->fcprqst_priv_sz; 2757 ctrl->admin_tag_set.driver_data = ctrl; 2758 ctrl->admin_tag_set.nr_hw_queues = 1; 2759 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 2760 2761 ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); 2762 if (ret) 2763 goto out_free_queues; 2764 2765 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); 2766 if (IS_ERR(ctrl->ctrl.admin_q)) { 2767 ret = PTR_ERR(ctrl->ctrl.admin_q); 2768 goto out_free_admin_tag_set; 2769 } 2770 2771 /* 2772 * Would have been nice to init io queues tag set as well. 2773 * However, we require interaction from the controller 2774 * for max io queue count before we can do so. 2775 * Defer this to the connect path. 2776 */ 2777 2778 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); 2779 if (ret) 2780 goto out_cleanup_admin_q; 2781 2782 /* at this point, teardown path changes to ref counting on nvme ctrl */ 2783 2784 spin_lock_irqsave(&rport->lock, flags); 2785 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 2786 spin_unlock_irqrestore(&rport->lock, flags); 2787 2788 ret = nvme_fc_create_association(ctrl); 2789 if (ret) { 2790 ctrl->ctrl.opts = NULL; 2791 /* initiate nvme ctrl ref counting teardown */ 2792 nvme_uninit_ctrl(&ctrl->ctrl); 2793 2794 /* as we're past the point where we transition to the ref 2795 * counting teardown path, if we return a bad pointer here, 2796 * the calling routine, thinking it's prior to the 2797 * transition, will do an rport put. Since the teardown 2798 * path also does a rport put, we do an extra get here to 2799 * so proper order/teardown happens. 2800 */ 2801 nvme_fc_rport_get(rport); 2802 2803 if (ret > 0) 2804 ret = -EIO; 2805 return ERR_PTR(ret); 2806 } 2807 2808 kref_get(&ctrl->ctrl.kref); 2809 2810 dev_info(ctrl->ctrl.device, 2811 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 2812 ctrl->cnum, ctrl->ctrl.opts->subsysnqn); 2813 2814 return &ctrl->ctrl; 2815 2816 out_cleanup_admin_q: 2817 blk_cleanup_queue(ctrl->ctrl.admin_q); 2818 out_free_admin_tag_set: 2819 blk_mq_free_tag_set(&ctrl->admin_tag_set); 2820 out_free_queues: 2821 kfree(ctrl->queues); 2822 out_free_ida: 2823 put_device(ctrl->dev); 2824 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 2825 out_free_ctrl: 2826 kfree(ctrl); 2827 out_fail: 2828 /* exit via here doesn't follow ctlr ref points */ 2829 return ERR_PTR(ret); 2830 } 2831 2832 enum { 2833 FCT_TRADDR_ERR = 0, 2834 FCT_TRADDR_WWNN = 1 << 0, 2835 FCT_TRADDR_WWPN = 1 << 1, 2836 }; 2837 2838 struct nvmet_fc_traddr { 2839 u64 nn; 2840 u64 pn; 2841 }; 2842 2843 static const match_table_t traddr_opt_tokens = { 2844 { FCT_TRADDR_WWNN, "nn-%s" }, 2845 { FCT_TRADDR_WWPN, "pn-%s" }, 2846 { FCT_TRADDR_ERR, NULL } 2847 }; 2848 2849 static int 2850 nvme_fc_parse_address(struct nvmet_fc_traddr *traddr, char *buf) 2851 { 2852 substring_t args[MAX_OPT_ARGS]; 2853 char *options, *o, *p; 2854 int token, ret = 0; 2855 u64 token64; 2856 2857 options = o = kstrdup(buf, GFP_KERNEL); 2858 if (!options) 2859 return -ENOMEM; 2860 2861 while ((p = strsep(&o, ":\n")) != NULL) { 2862 if (!*p) 2863 continue; 2864 2865 token = match_token(p, traddr_opt_tokens, args); 2866 switch (token) { 2867 case FCT_TRADDR_WWNN: 2868 if (match_u64(args, &token64)) { 2869 ret = -EINVAL; 2870 goto out; 2871 } 2872 traddr->nn = token64; 2873 break; 2874 case FCT_TRADDR_WWPN: 2875 if (match_u64(args, &token64)) { 2876 ret = -EINVAL; 2877 goto out; 2878 } 2879 traddr->pn = token64; 2880 break; 2881 default: 2882 pr_warn("unknown traddr token or missing value '%s'\n", 2883 p); 2884 ret = -EINVAL; 2885 goto out; 2886 } 2887 } 2888 2889 out: 2890 kfree(options); 2891 return ret; 2892 } 2893 2894 static struct nvme_ctrl * 2895 nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) 2896 { 2897 struct nvme_fc_lport *lport; 2898 struct nvme_fc_rport *rport; 2899 struct nvme_ctrl *ctrl; 2900 struct nvmet_fc_traddr laddr = { 0L, 0L }; 2901 struct nvmet_fc_traddr raddr = { 0L, 0L }; 2902 unsigned long flags; 2903 int ret; 2904 2905 ret = nvme_fc_parse_address(&raddr, opts->traddr); 2906 if (ret || !raddr.nn || !raddr.pn) 2907 return ERR_PTR(-EINVAL); 2908 2909 ret = nvme_fc_parse_address(&laddr, opts->host_traddr); 2910 if (ret || !laddr.nn || !laddr.pn) 2911 return ERR_PTR(-EINVAL); 2912 2913 /* find the host and remote ports to connect together */ 2914 spin_lock_irqsave(&nvme_fc_lock, flags); 2915 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 2916 if (lport->localport.node_name != laddr.nn || 2917 lport->localport.port_name != laddr.pn) 2918 continue; 2919 2920 list_for_each_entry(rport, &lport->endp_list, endp_list) { 2921 if (rport->remoteport.node_name != raddr.nn || 2922 rport->remoteport.port_name != raddr.pn) 2923 continue; 2924 2925 /* if fail to get reference fall through. Will error */ 2926 if (!nvme_fc_rport_get(rport)) 2927 break; 2928 2929 spin_unlock_irqrestore(&nvme_fc_lock, flags); 2930 2931 ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); 2932 if (IS_ERR(ctrl)) 2933 nvme_fc_rport_put(rport); 2934 return ctrl; 2935 } 2936 } 2937 spin_unlock_irqrestore(&nvme_fc_lock, flags); 2938 2939 return ERR_PTR(-ENOENT); 2940 } 2941 2942 2943 static struct nvmf_transport_ops nvme_fc_transport = { 2944 .name = "fc", 2945 .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, 2946 .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, 2947 .create_ctrl = nvme_fc_create_ctrl, 2948 }; 2949 2950 static int __init nvme_fc_init_module(void) 2951 { 2952 int ret; 2953 2954 nvme_fc_wq = create_workqueue("nvme_fc_wq"); 2955 if (!nvme_fc_wq) 2956 return -ENOMEM; 2957 2958 ret = nvmf_register_transport(&nvme_fc_transport); 2959 if (ret) 2960 goto err; 2961 2962 return 0; 2963 err: 2964 destroy_workqueue(nvme_fc_wq); 2965 return ret; 2966 } 2967 2968 static void __exit nvme_fc_exit_module(void) 2969 { 2970 /* sanity check - all lports should be removed */ 2971 if (!list_empty(&nvme_fc_lport_list)) 2972 pr_warn("%s: localport list not empty\n", __func__); 2973 2974 nvmf_unregister_transport(&nvme_fc_transport); 2975 2976 destroy_workqueue(nvme_fc_wq); 2977 2978 ida_destroy(&nvme_fc_local_port_cnt); 2979 ida_destroy(&nvme_fc_ctrl_cnt); 2980 } 2981 2982 module_init(nvme_fc_init_module); 2983 module_exit(nvme_fc_exit_module); 2984 2985 MODULE_LICENSE("GPL v2"); 2986