1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/module.h> 8 #include <linux/random.h> 9 #include <linux/rculist.h> 10 #include <linux/pci-p2pdma.h> 11 12 #include "nvmet.h" 13 14 struct workqueue_struct *buffered_io_wq; 15 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 16 static DEFINE_IDA(cntlid_ida); 17 18 /* 19 * This read/write semaphore is used to synchronize access to configuration 20 * information on a target system that will result in discovery log page 21 * information change for at least one host. 22 * The full list of resources to protected by this semaphore is: 23 * 24 * - subsystems list 25 * - per-subsystem allowed hosts list 26 * - allow_any_host subsystem attribute 27 * - nvmet_genctr 28 * - the nvmet_transports array 29 * 30 * When updating any of those lists/structures write lock should be obtained, 31 * while when reading (popolating discovery log page or checking host-subsystem 32 * link) read lock is obtained to allow concurrent reads. 33 */ 34 DECLARE_RWSEM(nvmet_config_sem); 35 36 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 37 u64 nvmet_ana_chgcnt; 38 DECLARE_RWSEM(nvmet_ana_sem); 39 40 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 41 { 42 u16 status; 43 44 switch (errno) { 45 case -ENOSPC: 46 req->error_loc = offsetof(struct nvme_rw_command, length); 47 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 48 break; 49 case -EREMOTEIO: 50 req->error_loc = offsetof(struct nvme_rw_command, slba); 51 status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 52 break; 53 case -EOPNOTSUPP: 54 req->error_loc = offsetof(struct nvme_common_command, opcode); 55 switch (req->cmd->common.opcode) { 56 case nvme_cmd_dsm: 57 case nvme_cmd_write_zeroes: 58 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 59 break; 60 default: 61 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 62 } 63 break; 64 case -ENODATA: 65 req->error_loc = offsetof(struct nvme_rw_command, nsid); 66 status = NVME_SC_ACCESS_DENIED; 67 break; 68 case -EIO: 69 /* FALLTHRU */ 70 default: 71 req->error_loc = offsetof(struct nvme_common_command, opcode); 72 status = NVME_SC_INTERNAL | NVME_SC_DNR; 73 } 74 75 return status; 76 } 77 78 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 79 const char *subsysnqn); 80 81 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 82 size_t len) 83 { 84 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 85 req->error_loc = offsetof(struct nvme_common_command, dptr); 86 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 87 } 88 return 0; 89 } 90 91 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 92 { 93 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 94 req->error_loc = offsetof(struct nvme_common_command, dptr); 95 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 96 } 97 return 0; 98 } 99 100 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 101 { 102 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 103 req->error_loc = offsetof(struct nvme_common_command, dptr); 104 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 105 } 106 return 0; 107 } 108 109 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 110 { 111 struct nvmet_ns *ns; 112 113 if (list_empty(&subsys->namespaces)) 114 return 0; 115 116 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 117 return ns->nsid; 118 } 119 120 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 121 { 122 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 123 } 124 125 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 126 { 127 struct nvmet_req *req; 128 129 while (1) { 130 mutex_lock(&ctrl->lock); 131 if (!ctrl->nr_async_event_cmds) { 132 mutex_unlock(&ctrl->lock); 133 return; 134 } 135 136 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 137 mutex_unlock(&ctrl->lock); 138 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 139 } 140 } 141 142 static void nvmet_async_event_work(struct work_struct *work) 143 { 144 struct nvmet_ctrl *ctrl = 145 container_of(work, struct nvmet_ctrl, async_event_work); 146 struct nvmet_async_event *aen; 147 struct nvmet_req *req; 148 149 while (1) { 150 mutex_lock(&ctrl->lock); 151 aen = list_first_entry_or_null(&ctrl->async_events, 152 struct nvmet_async_event, entry); 153 if (!aen || !ctrl->nr_async_event_cmds) { 154 mutex_unlock(&ctrl->lock); 155 return; 156 } 157 158 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 159 nvmet_set_result(req, nvmet_async_event_result(aen)); 160 161 list_del(&aen->entry); 162 kfree(aen); 163 164 mutex_unlock(&ctrl->lock); 165 nvmet_req_complete(req, 0); 166 } 167 } 168 169 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 170 u8 event_info, u8 log_page) 171 { 172 struct nvmet_async_event *aen; 173 174 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 175 if (!aen) 176 return; 177 178 aen->event_type = event_type; 179 aen->event_info = event_info; 180 aen->log_page = log_page; 181 182 mutex_lock(&ctrl->lock); 183 list_add_tail(&aen->entry, &ctrl->async_events); 184 mutex_unlock(&ctrl->lock); 185 186 schedule_work(&ctrl->async_event_work); 187 } 188 189 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 190 { 191 u32 i; 192 193 mutex_lock(&ctrl->lock); 194 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 195 goto out_unlock; 196 197 for (i = 0; i < ctrl->nr_changed_ns; i++) { 198 if (ctrl->changed_ns_list[i] == nsid) 199 goto out_unlock; 200 } 201 202 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 203 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 204 ctrl->nr_changed_ns = U32_MAX; 205 goto out_unlock; 206 } 207 208 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 209 out_unlock: 210 mutex_unlock(&ctrl->lock); 211 } 212 213 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 214 { 215 struct nvmet_ctrl *ctrl; 216 217 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 218 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 219 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 220 continue; 221 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 222 NVME_AER_NOTICE_NS_CHANGED, 223 NVME_LOG_CHANGED_NS); 224 } 225 } 226 227 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 228 struct nvmet_port *port) 229 { 230 struct nvmet_ctrl *ctrl; 231 232 mutex_lock(&subsys->lock); 233 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 234 if (port && ctrl->port != port) 235 continue; 236 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 237 continue; 238 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 239 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 240 } 241 mutex_unlock(&subsys->lock); 242 } 243 244 void nvmet_port_send_ana_event(struct nvmet_port *port) 245 { 246 struct nvmet_subsys_link *p; 247 248 down_read(&nvmet_config_sem); 249 list_for_each_entry(p, &port->subsystems, entry) 250 nvmet_send_ana_event(p->subsys, port); 251 up_read(&nvmet_config_sem); 252 } 253 254 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 255 { 256 int ret = 0; 257 258 down_write(&nvmet_config_sem); 259 if (nvmet_transports[ops->type]) 260 ret = -EINVAL; 261 else 262 nvmet_transports[ops->type] = ops; 263 up_write(&nvmet_config_sem); 264 265 return ret; 266 } 267 EXPORT_SYMBOL_GPL(nvmet_register_transport); 268 269 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 270 { 271 down_write(&nvmet_config_sem); 272 nvmet_transports[ops->type] = NULL; 273 up_write(&nvmet_config_sem); 274 } 275 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 276 277 int nvmet_enable_port(struct nvmet_port *port) 278 { 279 const struct nvmet_fabrics_ops *ops; 280 int ret; 281 282 lockdep_assert_held(&nvmet_config_sem); 283 284 ops = nvmet_transports[port->disc_addr.trtype]; 285 if (!ops) { 286 up_write(&nvmet_config_sem); 287 request_module("nvmet-transport-%d", port->disc_addr.trtype); 288 down_write(&nvmet_config_sem); 289 ops = nvmet_transports[port->disc_addr.trtype]; 290 if (!ops) { 291 pr_err("transport type %d not supported\n", 292 port->disc_addr.trtype); 293 return -EINVAL; 294 } 295 } 296 297 if (!try_module_get(ops->owner)) 298 return -EINVAL; 299 300 ret = ops->add_port(port); 301 if (ret) { 302 module_put(ops->owner); 303 return ret; 304 } 305 306 /* If the transport didn't set inline_data_size, then disable it. */ 307 if (port->inline_data_size < 0) 308 port->inline_data_size = 0; 309 310 port->enabled = true; 311 return 0; 312 } 313 314 void nvmet_disable_port(struct nvmet_port *port) 315 { 316 const struct nvmet_fabrics_ops *ops; 317 318 lockdep_assert_held(&nvmet_config_sem); 319 320 port->enabled = false; 321 322 ops = nvmet_transports[port->disc_addr.trtype]; 323 ops->remove_port(port); 324 module_put(ops->owner); 325 } 326 327 static void nvmet_keep_alive_timer(struct work_struct *work) 328 { 329 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 330 struct nvmet_ctrl, ka_work); 331 bool cmd_seen = ctrl->cmd_seen; 332 333 ctrl->cmd_seen = false; 334 if (cmd_seen) { 335 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 336 ctrl->cntlid); 337 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 338 return; 339 } 340 341 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 342 ctrl->cntlid, ctrl->kato); 343 344 nvmet_ctrl_fatal_error(ctrl); 345 } 346 347 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 348 { 349 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 350 ctrl->cntlid, ctrl->kato); 351 352 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 353 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 354 } 355 356 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 357 { 358 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 359 360 cancel_delayed_work_sync(&ctrl->ka_work); 361 } 362 363 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 364 __le32 nsid) 365 { 366 struct nvmet_ns *ns; 367 368 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 369 if (ns->nsid == le32_to_cpu(nsid)) 370 return ns; 371 } 372 373 return NULL; 374 } 375 376 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 377 { 378 struct nvmet_ns *ns; 379 380 rcu_read_lock(); 381 ns = __nvmet_find_namespace(ctrl, nsid); 382 if (ns) 383 percpu_ref_get(&ns->ref); 384 rcu_read_unlock(); 385 386 return ns; 387 } 388 389 static void nvmet_destroy_namespace(struct percpu_ref *ref) 390 { 391 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 392 393 complete(&ns->disable_done); 394 } 395 396 void nvmet_put_namespace(struct nvmet_ns *ns) 397 { 398 percpu_ref_put(&ns->ref); 399 } 400 401 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 402 { 403 nvmet_bdev_ns_disable(ns); 404 nvmet_file_ns_disable(ns); 405 } 406 407 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 408 { 409 int ret; 410 struct pci_dev *p2p_dev; 411 412 if (!ns->use_p2pmem) 413 return 0; 414 415 if (!ns->bdev) { 416 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 417 return -EINVAL; 418 } 419 420 if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) { 421 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 422 ns->device_path); 423 return -EINVAL; 424 } 425 426 if (ns->p2p_dev) { 427 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 428 if (ret < 0) 429 return -EINVAL; 430 } else { 431 /* 432 * Right now we just check that there is p2pmem available so 433 * we can report an error to the user right away if there 434 * is not. We'll find the actual device to use once we 435 * setup the controller when the port's device is available. 436 */ 437 438 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 439 if (!p2p_dev) { 440 pr_err("no peer-to-peer memory is available for %s\n", 441 ns->device_path); 442 return -EINVAL; 443 } 444 445 pci_dev_put(p2p_dev); 446 } 447 448 return 0; 449 } 450 451 /* 452 * Note: ctrl->subsys->lock should be held when calling this function 453 */ 454 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 455 struct nvmet_ns *ns) 456 { 457 struct device *clients[2]; 458 struct pci_dev *p2p_dev; 459 int ret; 460 461 if (!ctrl->p2p_client || !ns->use_p2pmem) 462 return; 463 464 if (ns->p2p_dev) { 465 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 466 if (ret < 0) 467 return; 468 469 p2p_dev = pci_dev_get(ns->p2p_dev); 470 } else { 471 clients[0] = ctrl->p2p_client; 472 clients[1] = nvmet_ns_dev(ns); 473 474 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 475 if (!p2p_dev) { 476 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 477 dev_name(ctrl->p2p_client), ns->device_path); 478 return; 479 } 480 } 481 482 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 483 if (ret < 0) 484 pci_dev_put(p2p_dev); 485 486 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 487 ns->nsid); 488 } 489 490 int nvmet_ns_enable(struct nvmet_ns *ns) 491 { 492 struct nvmet_subsys *subsys = ns->subsys; 493 struct nvmet_ctrl *ctrl; 494 int ret; 495 496 mutex_lock(&subsys->lock); 497 ret = -EMFILE; 498 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 499 goto out_unlock; 500 ret = 0; 501 if (ns->enabled) 502 goto out_unlock; 503 504 ret = nvmet_bdev_ns_enable(ns); 505 if (ret == -ENOTBLK) 506 ret = nvmet_file_ns_enable(ns); 507 if (ret) 508 goto out_unlock; 509 510 ret = nvmet_p2pmem_ns_enable(ns); 511 if (ret) 512 goto out_unlock; 513 514 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 515 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 516 517 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 518 0, GFP_KERNEL); 519 if (ret) 520 goto out_dev_put; 521 522 if (ns->nsid > subsys->max_nsid) 523 subsys->max_nsid = ns->nsid; 524 525 /* 526 * The namespaces list needs to be sorted to simplify the implementation 527 * of the Identify Namepace List subcommand. 528 */ 529 if (list_empty(&subsys->namespaces)) { 530 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 531 } else { 532 struct nvmet_ns *old; 533 534 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 535 BUG_ON(ns->nsid == old->nsid); 536 if (ns->nsid < old->nsid) 537 break; 538 } 539 540 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 541 } 542 subsys->nr_namespaces++; 543 544 nvmet_ns_changed(subsys, ns->nsid); 545 ns->enabled = true; 546 ret = 0; 547 out_unlock: 548 mutex_unlock(&subsys->lock); 549 return ret; 550 out_dev_put: 551 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 552 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 553 554 nvmet_ns_dev_disable(ns); 555 goto out_unlock; 556 } 557 558 void nvmet_ns_disable(struct nvmet_ns *ns) 559 { 560 struct nvmet_subsys *subsys = ns->subsys; 561 struct nvmet_ctrl *ctrl; 562 563 mutex_lock(&subsys->lock); 564 if (!ns->enabled) 565 goto out_unlock; 566 567 ns->enabled = false; 568 list_del_rcu(&ns->dev_link); 569 if (ns->nsid == subsys->max_nsid) 570 subsys->max_nsid = nvmet_max_nsid(subsys); 571 572 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 573 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 574 575 mutex_unlock(&subsys->lock); 576 577 /* 578 * Now that we removed the namespaces from the lookup list, we 579 * can kill the per_cpu ref and wait for any remaining references 580 * to be dropped, as well as a RCU grace period for anyone only 581 * using the namepace under rcu_read_lock(). Note that we can't 582 * use call_rcu here as we need to ensure the namespaces have 583 * been fully destroyed before unloading the module. 584 */ 585 percpu_ref_kill(&ns->ref); 586 synchronize_rcu(); 587 wait_for_completion(&ns->disable_done); 588 percpu_ref_exit(&ns->ref); 589 590 mutex_lock(&subsys->lock); 591 592 subsys->nr_namespaces--; 593 nvmet_ns_changed(subsys, ns->nsid); 594 nvmet_ns_dev_disable(ns); 595 out_unlock: 596 mutex_unlock(&subsys->lock); 597 } 598 599 void nvmet_ns_free(struct nvmet_ns *ns) 600 { 601 nvmet_ns_disable(ns); 602 603 down_write(&nvmet_ana_sem); 604 nvmet_ana_group_enabled[ns->anagrpid]--; 605 up_write(&nvmet_ana_sem); 606 607 kfree(ns->device_path); 608 kfree(ns); 609 } 610 611 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 612 { 613 struct nvmet_ns *ns; 614 615 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 616 if (!ns) 617 return NULL; 618 619 INIT_LIST_HEAD(&ns->dev_link); 620 init_completion(&ns->disable_done); 621 622 ns->nsid = nsid; 623 ns->subsys = subsys; 624 625 down_write(&nvmet_ana_sem); 626 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 627 nvmet_ana_group_enabled[ns->anagrpid]++; 628 up_write(&nvmet_ana_sem); 629 630 uuid_gen(&ns->uuid); 631 ns->buffered_io = false; 632 633 return ns; 634 } 635 636 static void nvmet_update_sq_head(struct nvmet_req *req) 637 { 638 if (req->sq->size) { 639 u32 old_sqhd, new_sqhd; 640 641 do { 642 old_sqhd = req->sq->sqhd; 643 new_sqhd = (old_sqhd + 1) % req->sq->size; 644 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 645 old_sqhd); 646 } 647 req->rsp->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 648 } 649 650 static void nvmet_set_error(struct nvmet_req *req, u16 status) 651 { 652 struct nvmet_ctrl *ctrl = req->sq->ctrl; 653 struct nvme_error_slot *new_error_slot; 654 unsigned long flags; 655 656 req->rsp->status = cpu_to_le16(status << 1); 657 658 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 659 return; 660 661 spin_lock_irqsave(&ctrl->error_lock, flags); 662 ctrl->err_counter++; 663 new_error_slot = 664 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 665 666 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 667 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 668 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 669 new_error_slot->status_field = cpu_to_le16(status << 1); 670 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 671 new_error_slot->lba = cpu_to_le64(req->error_slba); 672 new_error_slot->nsid = req->cmd->common.nsid; 673 spin_unlock_irqrestore(&ctrl->error_lock, flags); 674 675 /* set the more bit for this request */ 676 req->rsp->status |= cpu_to_le16(1 << 14); 677 } 678 679 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 680 { 681 if (!req->sq->sqhd_disabled) 682 nvmet_update_sq_head(req); 683 req->rsp->sq_id = cpu_to_le16(req->sq->qid); 684 req->rsp->command_id = req->cmd->common.command_id; 685 686 if (unlikely(status)) 687 nvmet_set_error(req, status); 688 if (req->ns) 689 nvmet_put_namespace(req->ns); 690 req->ops->queue_response(req); 691 } 692 693 void nvmet_req_complete(struct nvmet_req *req, u16 status) 694 { 695 __nvmet_req_complete(req, status); 696 percpu_ref_put(&req->sq->ref); 697 } 698 EXPORT_SYMBOL_GPL(nvmet_req_complete); 699 700 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 701 u16 qid, u16 size) 702 { 703 cq->qid = qid; 704 cq->size = size; 705 706 ctrl->cqs[qid] = cq; 707 } 708 709 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 710 u16 qid, u16 size) 711 { 712 sq->sqhd = 0; 713 sq->qid = qid; 714 sq->size = size; 715 716 ctrl->sqs[qid] = sq; 717 } 718 719 static void nvmet_confirm_sq(struct percpu_ref *ref) 720 { 721 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 722 723 complete(&sq->confirm_done); 724 } 725 726 void nvmet_sq_destroy(struct nvmet_sq *sq) 727 { 728 /* 729 * If this is the admin queue, complete all AERs so that our 730 * queue doesn't have outstanding requests on it. 731 */ 732 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 733 nvmet_async_events_free(sq->ctrl); 734 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 735 wait_for_completion(&sq->confirm_done); 736 wait_for_completion(&sq->free_done); 737 percpu_ref_exit(&sq->ref); 738 739 if (sq->ctrl) { 740 nvmet_ctrl_put(sq->ctrl); 741 sq->ctrl = NULL; /* allows reusing the queue later */ 742 } 743 } 744 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 745 746 static void nvmet_sq_free(struct percpu_ref *ref) 747 { 748 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 749 750 complete(&sq->free_done); 751 } 752 753 int nvmet_sq_init(struct nvmet_sq *sq) 754 { 755 int ret; 756 757 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 758 if (ret) { 759 pr_err("percpu_ref init failed!\n"); 760 return ret; 761 } 762 init_completion(&sq->free_done); 763 init_completion(&sq->confirm_done); 764 765 return 0; 766 } 767 EXPORT_SYMBOL_GPL(nvmet_sq_init); 768 769 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 770 struct nvmet_ns *ns) 771 { 772 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 773 774 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 775 return NVME_SC_ANA_INACCESSIBLE; 776 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 777 return NVME_SC_ANA_PERSISTENT_LOSS; 778 if (unlikely(state == NVME_ANA_CHANGE)) 779 return NVME_SC_ANA_TRANSITION; 780 return 0; 781 } 782 783 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 784 { 785 if (unlikely(req->ns->readonly)) { 786 switch (req->cmd->common.opcode) { 787 case nvme_cmd_read: 788 case nvme_cmd_flush: 789 break; 790 default: 791 return NVME_SC_NS_WRITE_PROTECTED; 792 } 793 } 794 795 return 0; 796 } 797 798 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 799 { 800 struct nvme_command *cmd = req->cmd; 801 u16 ret; 802 803 ret = nvmet_check_ctrl_status(req, cmd); 804 if (unlikely(ret)) 805 return ret; 806 807 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); 808 if (unlikely(!req->ns)) { 809 req->error_loc = offsetof(struct nvme_common_command, nsid); 810 return NVME_SC_INVALID_NS | NVME_SC_DNR; 811 } 812 ret = nvmet_check_ana_state(req->port, req->ns); 813 if (unlikely(ret)) { 814 req->error_loc = offsetof(struct nvme_common_command, nsid); 815 return ret; 816 } 817 ret = nvmet_io_cmd_check_access(req); 818 if (unlikely(ret)) { 819 req->error_loc = offsetof(struct nvme_common_command, nsid); 820 return ret; 821 } 822 823 if (req->ns->file) 824 return nvmet_file_parse_io_cmd(req); 825 else 826 return nvmet_bdev_parse_io_cmd(req); 827 } 828 829 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 830 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) 831 { 832 u8 flags = req->cmd->common.flags; 833 u16 status; 834 835 req->cq = cq; 836 req->sq = sq; 837 req->ops = ops; 838 req->sg = NULL; 839 req->sg_cnt = 0; 840 req->transfer_len = 0; 841 req->rsp->status = 0; 842 req->rsp->sq_head = 0; 843 req->ns = NULL; 844 req->error_loc = NVMET_NO_ERROR_LOC; 845 req->error_slba = 0; 846 847 /* no support for fused commands yet */ 848 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 849 req->error_loc = offsetof(struct nvme_common_command, flags); 850 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 851 goto fail; 852 } 853 854 /* 855 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 856 * contains an address of a single contiguous physical buffer that is 857 * byte aligned. 858 */ 859 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 860 req->error_loc = offsetof(struct nvme_common_command, flags); 861 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 862 goto fail; 863 } 864 865 if (unlikely(!req->sq->ctrl)) 866 /* will return an error for any Non-connect command: */ 867 status = nvmet_parse_connect_cmd(req); 868 else if (likely(req->sq->qid != 0)) 869 status = nvmet_parse_io_cmd(req); 870 else if (req->cmd->common.opcode == nvme_fabrics_command) 871 status = nvmet_parse_fabrics_cmd(req); 872 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) 873 status = nvmet_parse_discovery_cmd(req); 874 else 875 status = nvmet_parse_admin_cmd(req); 876 877 if (status) 878 goto fail; 879 880 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 881 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 882 goto fail; 883 } 884 885 if (sq->ctrl) 886 sq->ctrl->cmd_seen = true; 887 888 return true; 889 890 fail: 891 __nvmet_req_complete(req, status); 892 return false; 893 } 894 EXPORT_SYMBOL_GPL(nvmet_req_init); 895 896 void nvmet_req_uninit(struct nvmet_req *req) 897 { 898 percpu_ref_put(&req->sq->ref); 899 if (req->ns) 900 nvmet_put_namespace(req->ns); 901 } 902 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 903 904 void nvmet_req_execute(struct nvmet_req *req) 905 { 906 if (unlikely(req->data_len != req->transfer_len)) { 907 req->error_loc = offsetof(struct nvme_common_command, dptr); 908 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 909 } else 910 req->execute(req); 911 } 912 EXPORT_SYMBOL_GPL(nvmet_req_execute); 913 914 int nvmet_req_alloc_sgl(struct nvmet_req *req) 915 { 916 struct pci_dev *p2p_dev = NULL; 917 918 if (IS_ENABLED(CONFIG_PCI_P2PDMA)) { 919 if (req->sq->ctrl && req->ns) 920 p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, 921 req->ns->nsid); 922 923 req->p2p_dev = NULL; 924 if (req->sq->qid && p2p_dev) { 925 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 926 req->transfer_len); 927 if (req->sg) { 928 req->p2p_dev = p2p_dev; 929 return 0; 930 } 931 } 932 933 /* 934 * If no P2P memory was available we fallback to using 935 * regular memory 936 */ 937 } 938 939 req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); 940 if (!req->sg) 941 return -ENOMEM; 942 943 return 0; 944 } 945 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl); 946 947 void nvmet_req_free_sgl(struct nvmet_req *req) 948 { 949 if (req->p2p_dev) 950 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 951 else 952 sgl_free(req->sg); 953 954 req->sg = NULL; 955 req->sg_cnt = 0; 956 } 957 EXPORT_SYMBOL_GPL(nvmet_req_free_sgl); 958 959 static inline bool nvmet_cc_en(u32 cc) 960 { 961 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 962 } 963 964 static inline u8 nvmet_cc_css(u32 cc) 965 { 966 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 967 } 968 969 static inline u8 nvmet_cc_mps(u32 cc) 970 { 971 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 972 } 973 974 static inline u8 nvmet_cc_ams(u32 cc) 975 { 976 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 977 } 978 979 static inline u8 nvmet_cc_shn(u32 cc) 980 { 981 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 982 } 983 984 static inline u8 nvmet_cc_iosqes(u32 cc) 985 { 986 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 987 } 988 989 static inline u8 nvmet_cc_iocqes(u32 cc) 990 { 991 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 992 } 993 994 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 995 { 996 lockdep_assert_held(&ctrl->lock); 997 998 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 999 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 1000 nvmet_cc_mps(ctrl->cc) != 0 || 1001 nvmet_cc_ams(ctrl->cc) != 0 || 1002 nvmet_cc_css(ctrl->cc) != 0) { 1003 ctrl->csts = NVME_CSTS_CFS; 1004 return; 1005 } 1006 1007 ctrl->csts = NVME_CSTS_RDY; 1008 1009 /* 1010 * Controllers that are not yet enabled should not really enforce the 1011 * keep alive timeout, but we still want to track a timeout and cleanup 1012 * in case a host died before it enabled the controller. Hence, simply 1013 * reset the keep alive timer when the controller is enabled. 1014 */ 1015 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 1016 } 1017 1018 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1019 { 1020 lockdep_assert_held(&ctrl->lock); 1021 1022 /* XXX: tear down queues? */ 1023 ctrl->csts &= ~NVME_CSTS_RDY; 1024 ctrl->cc = 0; 1025 } 1026 1027 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1028 { 1029 u32 old; 1030 1031 mutex_lock(&ctrl->lock); 1032 old = ctrl->cc; 1033 ctrl->cc = new; 1034 1035 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1036 nvmet_start_ctrl(ctrl); 1037 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1038 nvmet_clear_ctrl(ctrl); 1039 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1040 nvmet_clear_ctrl(ctrl); 1041 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1042 } 1043 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1044 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1045 mutex_unlock(&ctrl->lock); 1046 } 1047 1048 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1049 { 1050 /* command sets supported: NVMe command set: */ 1051 ctrl->cap = (1ULL << 37); 1052 /* CC.EN timeout in 500msec units: */ 1053 ctrl->cap |= (15ULL << 24); 1054 /* maximum queue entries supported: */ 1055 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 1056 } 1057 1058 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 1059 struct nvmet_req *req, struct nvmet_ctrl **ret) 1060 { 1061 struct nvmet_subsys *subsys; 1062 struct nvmet_ctrl *ctrl; 1063 u16 status = 0; 1064 1065 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1066 if (!subsys) { 1067 pr_warn("connect request for invalid subsystem %s!\n", 1068 subsysnqn); 1069 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1070 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1071 } 1072 1073 mutex_lock(&subsys->lock); 1074 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1075 if (ctrl->cntlid == cntlid) { 1076 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1077 pr_warn("hostnqn mismatch.\n"); 1078 continue; 1079 } 1080 if (!kref_get_unless_zero(&ctrl->ref)) 1081 continue; 1082 1083 *ret = ctrl; 1084 goto out; 1085 } 1086 } 1087 1088 pr_warn("could not find controller %d for subsys %s / host %s\n", 1089 cntlid, subsysnqn, hostnqn); 1090 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1091 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1092 1093 out: 1094 mutex_unlock(&subsys->lock); 1095 nvmet_subsys_put(subsys); 1096 return status; 1097 } 1098 1099 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 1100 { 1101 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1102 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1103 cmd->common.opcode, req->sq->qid); 1104 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 1105 } 1106 1107 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1108 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1109 cmd->common.opcode, req->sq->qid); 1110 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 1111 } 1112 return 0; 1113 } 1114 1115 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1116 { 1117 struct nvmet_host_link *p; 1118 1119 lockdep_assert_held(&nvmet_config_sem); 1120 1121 if (subsys->allow_any_host) 1122 return true; 1123 1124 if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */ 1125 return true; 1126 1127 list_for_each_entry(p, &subsys->hosts, entry) { 1128 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1129 return true; 1130 } 1131 1132 return false; 1133 } 1134 1135 /* 1136 * Note: ctrl->subsys->lock should be held when calling this function 1137 */ 1138 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1139 struct nvmet_req *req) 1140 { 1141 struct nvmet_ns *ns; 1142 1143 if (!req->p2p_client) 1144 return; 1145 1146 ctrl->p2p_client = get_device(req->p2p_client); 1147 1148 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) 1149 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1150 } 1151 1152 /* 1153 * Note: ctrl->subsys->lock should be held when calling this function 1154 */ 1155 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1156 { 1157 struct radix_tree_iter iter; 1158 void __rcu **slot; 1159 1160 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1161 pci_dev_put(radix_tree_deref_slot(slot)); 1162 1163 put_device(ctrl->p2p_client); 1164 } 1165 1166 static void nvmet_fatal_error_handler(struct work_struct *work) 1167 { 1168 struct nvmet_ctrl *ctrl = 1169 container_of(work, struct nvmet_ctrl, fatal_err_work); 1170 1171 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1172 ctrl->ops->delete_ctrl(ctrl); 1173 } 1174 1175 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 1176 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 1177 { 1178 struct nvmet_subsys *subsys; 1179 struct nvmet_ctrl *ctrl; 1180 int ret; 1181 u16 status; 1182 1183 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1184 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1185 if (!subsys) { 1186 pr_warn("connect request for invalid subsystem %s!\n", 1187 subsysnqn); 1188 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1189 goto out; 1190 } 1191 1192 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1193 down_read(&nvmet_config_sem); 1194 if (!nvmet_host_allowed(subsys, hostnqn)) { 1195 pr_info("connect by host %s for subsystem %s not allowed\n", 1196 hostnqn, subsysnqn); 1197 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 1198 up_read(&nvmet_config_sem); 1199 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 1200 goto out_put_subsystem; 1201 } 1202 up_read(&nvmet_config_sem); 1203 1204 status = NVME_SC_INTERNAL; 1205 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 1206 if (!ctrl) 1207 goto out_put_subsystem; 1208 mutex_init(&ctrl->lock); 1209 1210 nvmet_init_cap(ctrl); 1211 1212 ctrl->port = req->port; 1213 1214 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1215 INIT_LIST_HEAD(&ctrl->async_events); 1216 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1217 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1218 1219 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 1220 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 1221 1222 kref_init(&ctrl->ref); 1223 ctrl->subsys = subsys; 1224 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1225 1226 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1227 sizeof(__le32), GFP_KERNEL); 1228 if (!ctrl->changed_ns_list) 1229 goto out_free_ctrl; 1230 1231 ctrl->cqs = kcalloc(subsys->max_qid + 1, 1232 sizeof(struct nvmet_cq *), 1233 GFP_KERNEL); 1234 if (!ctrl->cqs) 1235 goto out_free_changed_ns_list; 1236 1237 ctrl->sqs = kcalloc(subsys->max_qid + 1, 1238 sizeof(struct nvmet_sq *), 1239 GFP_KERNEL); 1240 if (!ctrl->sqs) 1241 goto out_free_cqs; 1242 1243 ret = ida_simple_get(&cntlid_ida, 1244 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 1245 GFP_KERNEL); 1246 if (ret < 0) { 1247 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 1248 goto out_free_sqs; 1249 } 1250 ctrl->cntlid = ret; 1251 1252 ctrl->ops = req->ops; 1253 1254 /* 1255 * Discovery controllers may use some arbitrary high value 1256 * in order to cleanup stale discovery sessions 1257 */ 1258 if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato) 1259 kato = NVMET_DISC_KATO_MS; 1260 1261 /* keep-alive timeout in seconds */ 1262 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1263 1264 ctrl->err_counter = 0; 1265 spin_lock_init(&ctrl->error_lock); 1266 1267 nvmet_start_keep_alive_timer(ctrl); 1268 1269 mutex_lock(&subsys->lock); 1270 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1271 nvmet_setup_p2p_ns_map(ctrl, req); 1272 mutex_unlock(&subsys->lock); 1273 1274 *ctrlp = ctrl; 1275 return 0; 1276 1277 out_free_sqs: 1278 kfree(ctrl->sqs); 1279 out_free_cqs: 1280 kfree(ctrl->cqs); 1281 out_free_changed_ns_list: 1282 kfree(ctrl->changed_ns_list); 1283 out_free_ctrl: 1284 kfree(ctrl); 1285 out_put_subsystem: 1286 nvmet_subsys_put(subsys); 1287 out: 1288 return status; 1289 } 1290 1291 static void nvmet_ctrl_free(struct kref *ref) 1292 { 1293 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1294 struct nvmet_subsys *subsys = ctrl->subsys; 1295 1296 mutex_lock(&subsys->lock); 1297 nvmet_release_p2p_ns_map(ctrl); 1298 list_del(&ctrl->subsys_entry); 1299 mutex_unlock(&subsys->lock); 1300 1301 nvmet_stop_keep_alive_timer(ctrl); 1302 1303 flush_work(&ctrl->async_event_work); 1304 cancel_work_sync(&ctrl->fatal_err_work); 1305 1306 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 1307 1308 kfree(ctrl->sqs); 1309 kfree(ctrl->cqs); 1310 kfree(ctrl->changed_ns_list); 1311 kfree(ctrl); 1312 1313 nvmet_subsys_put(subsys); 1314 } 1315 1316 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1317 { 1318 kref_put(&ctrl->ref, nvmet_ctrl_free); 1319 } 1320 1321 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1322 { 1323 mutex_lock(&ctrl->lock); 1324 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1325 ctrl->csts |= NVME_CSTS_CFS; 1326 schedule_work(&ctrl->fatal_err_work); 1327 } 1328 mutex_unlock(&ctrl->lock); 1329 } 1330 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1331 1332 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1333 const char *subsysnqn) 1334 { 1335 struct nvmet_subsys_link *p; 1336 1337 if (!port) 1338 return NULL; 1339 1340 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1341 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1342 return NULL; 1343 return nvmet_disc_subsys; 1344 } 1345 1346 down_read(&nvmet_config_sem); 1347 list_for_each_entry(p, &port->subsystems, entry) { 1348 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1349 NVMF_NQN_SIZE)) { 1350 if (!kref_get_unless_zero(&p->subsys->ref)) 1351 break; 1352 up_read(&nvmet_config_sem); 1353 return p->subsys; 1354 } 1355 } 1356 up_read(&nvmet_config_sem); 1357 return NULL; 1358 } 1359 1360 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1361 enum nvme_subsys_type type) 1362 { 1363 struct nvmet_subsys *subsys; 1364 1365 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1366 if (!subsys) 1367 return NULL; 1368 1369 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 1370 /* generate a random serial number as our controllers are ephemeral: */ 1371 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 1372 1373 switch (type) { 1374 case NVME_NQN_NVME: 1375 subsys->max_qid = NVMET_NR_QUEUES; 1376 break; 1377 case NVME_NQN_DISC: 1378 subsys->max_qid = 0; 1379 break; 1380 default: 1381 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1382 kfree(subsys); 1383 return NULL; 1384 } 1385 subsys->type = type; 1386 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1387 GFP_KERNEL); 1388 if (!subsys->subsysnqn) { 1389 kfree(subsys); 1390 return NULL; 1391 } 1392 1393 kref_init(&subsys->ref); 1394 1395 mutex_init(&subsys->lock); 1396 INIT_LIST_HEAD(&subsys->namespaces); 1397 INIT_LIST_HEAD(&subsys->ctrls); 1398 INIT_LIST_HEAD(&subsys->hosts); 1399 1400 return subsys; 1401 } 1402 1403 static void nvmet_subsys_free(struct kref *ref) 1404 { 1405 struct nvmet_subsys *subsys = 1406 container_of(ref, struct nvmet_subsys, ref); 1407 1408 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1409 1410 kfree(subsys->subsysnqn); 1411 kfree(subsys); 1412 } 1413 1414 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1415 { 1416 struct nvmet_ctrl *ctrl; 1417 1418 mutex_lock(&subsys->lock); 1419 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1420 ctrl->ops->delete_ctrl(ctrl); 1421 mutex_unlock(&subsys->lock); 1422 } 1423 1424 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1425 { 1426 kref_put(&subsys->ref, nvmet_subsys_free); 1427 } 1428 1429 static int __init nvmet_init(void) 1430 { 1431 int error; 1432 1433 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1434 1435 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1436 WQ_MEM_RECLAIM, 0); 1437 if (!buffered_io_wq) { 1438 error = -ENOMEM; 1439 goto out; 1440 } 1441 1442 error = nvmet_init_discovery(); 1443 if (error) 1444 goto out_free_work_queue; 1445 1446 error = nvmet_init_configfs(); 1447 if (error) 1448 goto out_exit_discovery; 1449 return 0; 1450 1451 out_exit_discovery: 1452 nvmet_exit_discovery(); 1453 out_free_work_queue: 1454 destroy_workqueue(buffered_io_wq); 1455 out: 1456 return error; 1457 } 1458 1459 static void __exit nvmet_exit(void) 1460 { 1461 nvmet_exit_configfs(); 1462 nvmet_exit_discovery(); 1463 ida_destroy(&cntlid_ida); 1464 destroy_workqueue(buffered_io_wq); 1465 1466 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1467 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1468 } 1469 1470 module_init(nvmet_init); 1471 module_exit(nvmet_exit); 1472 1473 MODULE_LICENSE("GPL v2"); 1474