1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/module.h> 8 #include <linux/random.h> 9 #include <linux/rculist.h> 10 #include <linux/pci-p2pdma.h> 11 #include <linux/scatterlist.h> 12 13 #include <generated/utsrelease.h> 14 15 #define CREATE_TRACE_POINTS 16 #include "trace.h" 17 18 #include "nvmet.h" 19 #include "debugfs.h" 20 21 struct kmem_cache *nvmet_bvec_cache; 22 struct workqueue_struct *buffered_io_wq; 23 struct workqueue_struct *zbd_wq; 24 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 25 static DEFINE_IDA(cntlid_ida); 26 27 struct workqueue_struct *nvmet_wq; 28 EXPORT_SYMBOL_GPL(nvmet_wq); 29 30 /* 31 * This read/write semaphore is used to synchronize access to configuration 32 * information on a target system that will result in discovery log page 33 * information change for at least one host. 34 * The full list of resources to protected by this semaphore is: 35 * 36 * - subsystems list 37 * - per-subsystem allowed hosts list 38 * - allow_any_host subsystem attribute 39 * - nvmet_genctr 40 * - the nvmet_transports array 41 * 42 * When updating any of those lists/structures write lock should be obtained, 43 * while when reading (popolating discovery log page or checking host-subsystem 44 * link) read lock is obtained to allow concurrent reads. 45 */ 46 DECLARE_RWSEM(nvmet_config_sem); 47 48 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 49 u64 nvmet_ana_chgcnt; 50 DECLARE_RWSEM(nvmet_ana_sem); 51 52 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 53 { 54 switch (errno) { 55 case 0: 56 return NVME_SC_SUCCESS; 57 case -ENOSPC: 58 req->error_loc = offsetof(struct nvme_rw_command, length); 59 return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR; 60 case -EREMOTEIO: 61 req->error_loc = offsetof(struct nvme_rw_command, slba); 62 return NVME_SC_LBA_RANGE | NVME_STATUS_DNR; 63 case -EOPNOTSUPP: 64 req->error_loc = offsetof(struct nvme_common_command, opcode); 65 switch (req->cmd->common.opcode) { 66 case nvme_cmd_dsm: 67 case nvme_cmd_write_zeroes: 68 return NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR; 69 default: 70 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 71 } 72 break; 73 case -ENODATA: 74 req->error_loc = offsetof(struct nvme_rw_command, nsid); 75 return NVME_SC_ACCESS_DENIED; 76 case -EIO: 77 fallthrough; 78 default: 79 req->error_loc = offsetof(struct nvme_common_command, opcode); 80 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 81 } 82 } 83 84 u16 nvmet_report_invalid_opcode(struct nvmet_req *req) 85 { 86 pr_debug("unhandled cmd %d on qid %d\n", req->cmd->common.opcode, 87 req->sq->qid); 88 89 req->error_loc = offsetof(struct nvme_common_command, opcode); 90 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 91 } 92 93 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 94 const char *subsysnqn); 95 96 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 97 size_t len) 98 { 99 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 100 req->error_loc = offsetof(struct nvme_common_command, dptr); 101 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 102 } 103 return 0; 104 } 105 106 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 107 { 108 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 109 req->error_loc = offsetof(struct nvme_common_command, dptr); 110 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 111 } 112 return 0; 113 } 114 115 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 116 { 117 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 118 req->error_loc = offsetof(struct nvme_common_command, dptr); 119 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 120 } 121 return 0; 122 } 123 124 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys) 125 { 126 struct nvmet_ns *cur; 127 unsigned long idx; 128 u32 nsid = 0; 129 130 nvmet_for_each_enabled_ns(&subsys->namespaces, idx, cur) 131 nsid = cur->nsid; 132 133 return nsid; 134 } 135 136 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 137 { 138 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 139 } 140 141 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) 142 { 143 struct nvmet_req *req; 144 145 mutex_lock(&ctrl->lock); 146 while (ctrl->nr_async_event_cmds) { 147 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 148 mutex_unlock(&ctrl->lock); 149 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR); 150 mutex_lock(&ctrl->lock); 151 } 152 mutex_unlock(&ctrl->lock); 153 } 154 155 static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) 156 { 157 struct nvmet_async_event *aen; 158 struct nvmet_req *req; 159 160 mutex_lock(&ctrl->lock); 161 while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { 162 aen = list_first_entry(&ctrl->async_events, 163 struct nvmet_async_event, entry); 164 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 165 nvmet_set_result(req, nvmet_async_event_result(aen)); 166 167 list_del(&aen->entry); 168 kfree(aen); 169 170 mutex_unlock(&ctrl->lock); 171 trace_nvmet_async_event(ctrl, req->cqe->result.u32); 172 nvmet_req_complete(req, 0); 173 mutex_lock(&ctrl->lock); 174 } 175 mutex_unlock(&ctrl->lock); 176 } 177 178 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 179 { 180 struct nvmet_async_event *aen, *tmp; 181 182 mutex_lock(&ctrl->lock); 183 list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { 184 list_del(&aen->entry); 185 kfree(aen); 186 } 187 mutex_unlock(&ctrl->lock); 188 } 189 190 static void nvmet_async_event_work(struct work_struct *work) 191 { 192 struct nvmet_ctrl *ctrl = 193 container_of(work, struct nvmet_ctrl, async_event_work); 194 195 nvmet_async_events_process(ctrl); 196 } 197 198 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 199 u8 event_info, u8 log_page) 200 { 201 struct nvmet_async_event *aen; 202 203 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 204 if (!aen) 205 return; 206 207 aen->event_type = event_type; 208 aen->event_info = event_info; 209 aen->log_page = log_page; 210 211 mutex_lock(&ctrl->lock); 212 list_add_tail(&aen->entry, &ctrl->async_events); 213 mutex_unlock(&ctrl->lock); 214 215 queue_work(nvmet_wq, &ctrl->async_event_work); 216 } 217 218 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 219 { 220 u32 i; 221 222 mutex_lock(&ctrl->lock); 223 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 224 goto out_unlock; 225 226 for (i = 0; i < ctrl->nr_changed_ns; i++) { 227 if (ctrl->changed_ns_list[i] == nsid) 228 goto out_unlock; 229 } 230 231 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 232 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 233 ctrl->nr_changed_ns = U32_MAX; 234 goto out_unlock; 235 } 236 237 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 238 out_unlock: 239 mutex_unlock(&ctrl->lock); 240 } 241 242 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 243 { 244 struct nvmet_ctrl *ctrl; 245 246 lockdep_assert_held(&subsys->lock); 247 248 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 249 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 250 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 251 continue; 252 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 253 NVME_AER_NOTICE_NS_CHANGED, 254 NVME_LOG_CHANGED_NS); 255 } 256 } 257 258 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 259 struct nvmet_port *port) 260 { 261 struct nvmet_ctrl *ctrl; 262 263 mutex_lock(&subsys->lock); 264 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 265 if (port && ctrl->port != port) 266 continue; 267 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 268 continue; 269 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 270 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 271 } 272 mutex_unlock(&subsys->lock); 273 } 274 275 void nvmet_port_send_ana_event(struct nvmet_port *port) 276 { 277 struct nvmet_subsys_link *p; 278 279 down_read(&nvmet_config_sem); 280 list_for_each_entry(p, &port->subsystems, entry) 281 nvmet_send_ana_event(p->subsys, port); 282 up_read(&nvmet_config_sem); 283 } 284 285 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 286 { 287 int ret = 0; 288 289 down_write(&nvmet_config_sem); 290 if (nvmet_transports[ops->type]) 291 ret = -EINVAL; 292 else 293 nvmet_transports[ops->type] = ops; 294 up_write(&nvmet_config_sem); 295 296 return ret; 297 } 298 EXPORT_SYMBOL_GPL(nvmet_register_transport); 299 300 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 301 { 302 down_write(&nvmet_config_sem); 303 nvmet_transports[ops->type] = NULL; 304 up_write(&nvmet_config_sem); 305 } 306 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 307 308 void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys) 309 { 310 struct nvmet_ctrl *ctrl; 311 312 mutex_lock(&subsys->lock); 313 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 314 if (ctrl->port == port) 315 ctrl->ops->delete_ctrl(ctrl); 316 } 317 mutex_unlock(&subsys->lock); 318 } 319 320 int nvmet_enable_port(struct nvmet_port *port) 321 { 322 const struct nvmet_fabrics_ops *ops; 323 int ret; 324 325 lockdep_assert_held(&nvmet_config_sem); 326 327 if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) 328 return -EINVAL; 329 330 ops = nvmet_transports[port->disc_addr.trtype]; 331 if (!ops) { 332 up_write(&nvmet_config_sem); 333 request_module("nvmet-transport-%d", port->disc_addr.trtype); 334 down_write(&nvmet_config_sem); 335 ops = nvmet_transports[port->disc_addr.trtype]; 336 if (!ops) { 337 pr_err("transport type %d not supported\n", 338 port->disc_addr.trtype); 339 return -EINVAL; 340 } 341 } 342 343 if (!try_module_get(ops->owner)) 344 return -EINVAL; 345 346 /* 347 * If the user requested PI support and the transport isn't pi capable, 348 * don't enable the port. 349 */ 350 if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) { 351 pr_err("T10-PI is not supported by transport type %d\n", 352 port->disc_addr.trtype); 353 ret = -EINVAL; 354 goto out_put; 355 } 356 357 ret = ops->add_port(port); 358 if (ret) 359 goto out_put; 360 361 /* If the transport didn't set inline_data_size, then disable it. */ 362 if (port->inline_data_size < 0) 363 port->inline_data_size = 0; 364 365 /* 366 * If the transport didn't set the max_queue_size properly, then clamp 367 * it to the target limits. Also set default values in case the 368 * transport didn't set it at all. 369 */ 370 if (port->max_queue_size < 0) 371 port->max_queue_size = NVMET_MAX_QUEUE_SIZE; 372 else 373 port->max_queue_size = clamp_t(int, port->max_queue_size, 374 NVMET_MIN_QUEUE_SIZE, 375 NVMET_MAX_QUEUE_SIZE); 376 377 port->enabled = true; 378 port->tr_ops = ops; 379 return 0; 380 381 out_put: 382 module_put(ops->owner); 383 return ret; 384 } 385 386 void nvmet_disable_port(struct nvmet_port *port) 387 { 388 const struct nvmet_fabrics_ops *ops; 389 390 lockdep_assert_held(&nvmet_config_sem); 391 392 port->enabled = false; 393 port->tr_ops = NULL; 394 395 ops = nvmet_transports[port->disc_addr.trtype]; 396 ops->remove_port(port); 397 module_put(ops->owner); 398 } 399 400 static void nvmet_keep_alive_timer(struct work_struct *work) 401 { 402 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 403 struct nvmet_ctrl, ka_work); 404 bool reset_tbkas = ctrl->reset_tbkas; 405 406 ctrl->reset_tbkas = false; 407 if (reset_tbkas) { 408 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 409 ctrl->cntlid); 410 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 411 return; 412 } 413 414 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 415 ctrl->cntlid, ctrl->kato); 416 417 nvmet_ctrl_fatal_error(ctrl); 418 } 419 420 void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 421 { 422 if (unlikely(ctrl->kato == 0)) 423 return; 424 425 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 426 ctrl->cntlid, ctrl->kato); 427 428 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 429 } 430 431 void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 432 { 433 if (unlikely(ctrl->kato == 0)) 434 return; 435 436 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 437 438 cancel_delayed_work_sync(&ctrl->ka_work); 439 } 440 441 u16 nvmet_req_find_ns(struct nvmet_req *req) 442 { 443 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 444 struct nvmet_subsys *subsys = nvmet_req_subsys(req); 445 446 req->ns = xa_load(&subsys->namespaces, nsid); 447 if (unlikely(!req->ns || !req->ns->enabled)) { 448 req->error_loc = offsetof(struct nvme_common_command, nsid); 449 if (!req->ns) /* ns doesn't exist! */ 450 return NVME_SC_INVALID_NS | NVME_STATUS_DNR; 451 452 /* ns exists but it's disabled */ 453 req->ns = NULL; 454 return NVME_SC_INTERNAL_PATH_ERROR; 455 } 456 457 percpu_ref_get(&req->ns->ref); 458 return NVME_SC_SUCCESS; 459 } 460 461 static void nvmet_destroy_namespace(struct percpu_ref *ref) 462 { 463 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 464 465 complete(&ns->disable_done); 466 } 467 468 void nvmet_put_namespace(struct nvmet_ns *ns) 469 { 470 percpu_ref_put(&ns->ref); 471 } 472 473 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 474 { 475 nvmet_bdev_ns_disable(ns); 476 nvmet_file_ns_disable(ns); 477 } 478 479 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 480 { 481 int ret; 482 struct pci_dev *p2p_dev; 483 484 if (!ns->use_p2pmem) 485 return 0; 486 487 if (!ns->bdev) { 488 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 489 return -EINVAL; 490 } 491 492 if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) { 493 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 494 ns->device_path); 495 return -EINVAL; 496 } 497 498 if (ns->p2p_dev) { 499 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 500 if (ret < 0) 501 return -EINVAL; 502 } else { 503 /* 504 * Right now we just check that there is p2pmem available so 505 * we can report an error to the user right away if there 506 * is not. We'll find the actual device to use once we 507 * setup the controller when the port's device is available. 508 */ 509 510 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 511 if (!p2p_dev) { 512 pr_err("no peer-to-peer memory is available for %s\n", 513 ns->device_path); 514 return -EINVAL; 515 } 516 517 pci_dev_put(p2p_dev); 518 } 519 520 return 0; 521 } 522 523 /* 524 * Note: ctrl->subsys->lock should be held when calling this function 525 */ 526 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 527 struct nvmet_ns *ns) 528 { 529 struct device *clients[2]; 530 struct pci_dev *p2p_dev; 531 int ret; 532 533 if (!ctrl->p2p_client || !ns->use_p2pmem) 534 return; 535 536 if (ns->p2p_dev) { 537 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 538 if (ret < 0) 539 return; 540 541 p2p_dev = pci_dev_get(ns->p2p_dev); 542 } else { 543 clients[0] = ctrl->p2p_client; 544 clients[1] = nvmet_ns_dev(ns); 545 546 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 547 if (!p2p_dev) { 548 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 549 dev_name(ctrl->p2p_client), ns->device_path); 550 return; 551 } 552 } 553 554 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 555 if (ret < 0) 556 pci_dev_put(p2p_dev); 557 558 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 559 ns->nsid); 560 } 561 562 bool nvmet_ns_revalidate(struct nvmet_ns *ns) 563 { 564 loff_t oldsize = ns->size; 565 566 if (ns->bdev) 567 nvmet_bdev_ns_revalidate(ns); 568 else 569 nvmet_file_ns_revalidate(ns); 570 571 return oldsize != ns->size; 572 } 573 574 int nvmet_ns_enable(struct nvmet_ns *ns) 575 { 576 struct nvmet_subsys *subsys = ns->subsys; 577 struct nvmet_ctrl *ctrl; 578 int ret; 579 580 mutex_lock(&subsys->lock); 581 ret = 0; 582 583 if (nvmet_is_passthru_subsys(subsys)) { 584 pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); 585 goto out_unlock; 586 } 587 588 if (ns->enabled) 589 goto out_unlock; 590 591 ret = -EMFILE; 592 593 ret = nvmet_bdev_ns_enable(ns); 594 if (ret == -ENOTBLK) 595 ret = nvmet_file_ns_enable(ns); 596 if (ret) 597 goto out_unlock; 598 599 ret = nvmet_p2pmem_ns_enable(ns); 600 if (ret) 601 goto out_dev_disable; 602 603 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 604 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 605 606 if (ns->pr.enable) { 607 ret = nvmet_pr_init_ns(ns); 608 if (ret) 609 goto out_dev_put; 610 } 611 612 if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) 613 goto out_pr_exit; 614 615 nvmet_ns_changed(subsys, ns->nsid); 616 ns->enabled = true; 617 xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 618 ret = 0; 619 out_unlock: 620 mutex_unlock(&subsys->lock); 621 return ret; 622 out_pr_exit: 623 if (ns->pr.enable) 624 nvmet_pr_exit_ns(ns); 625 out_dev_put: 626 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 627 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 628 out_dev_disable: 629 nvmet_ns_dev_disable(ns); 630 goto out_unlock; 631 } 632 633 void nvmet_ns_disable(struct nvmet_ns *ns) 634 { 635 struct nvmet_subsys *subsys = ns->subsys; 636 struct nvmet_ctrl *ctrl; 637 638 mutex_lock(&subsys->lock); 639 if (!ns->enabled) 640 goto out_unlock; 641 642 ns->enabled = false; 643 xa_clear_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 644 645 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 646 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 647 648 mutex_unlock(&subsys->lock); 649 650 /* 651 * Now that we removed the namespaces from the lookup list, we 652 * can kill the per_cpu ref and wait for any remaining references 653 * to be dropped, as well as a RCU grace period for anyone only 654 * using the namepace under rcu_read_lock(). Note that we can't 655 * use call_rcu here as we need to ensure the namespaces have 656 * been fully destroyed before unloading the module. 657 */ 658 percpu_ref_kill(&ns->ref); 659 synchronize_rcu(); 660 wait_for_completion(&ns->disable_done); 661 percpu_ref_exit(&ns->ref); 662 663 if (ns->pr.enable) 664 nvmet_pr_exit_ns(ns); 665 666 mutex_lock(&subsys->lock); 667 nvmet_ns_changed(subsys, ns->nsid); 668 nvmet_ns_dev_disable(ns); 669 out_unlock: 670 mutex_unlock(&subsys->lock); 671 } 672 673 void nvmet_ns_free(struct nvmet_ns *ns) 674 { 675 struct nvmet_subsys *subsys = ns->subsys; 676 677 nvmet_ns_disable(ns); 678 679 mutex_lock(&subsys->lock); 680 681 xa_erase(&subsys->namespaces, ns->nsid); 682 if (ns->nsid == subsys->max_nsid) 683 subsys->max_nsid = nvmet_max_nsid(subsys); 684 685 subsys->nr_namespaces--; 686 mutex_unlock(&subsys->lock); 687 688 down_write(&nvmet_ana_sem); 689 nvmet_ana_group_enabled[ns->anagrpid]--; 690 up_write(&nvmet_ana_sem); 691 692 kfree(ns->device_path); 693 kfree(ns); 694 } 695 696 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 697 { 698 struct nvmet_ns *ns; 699 700 mutex_lock(&subsys->lock); 701 702 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 703 goto out_unlock; 704 705 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 706 if (!ns) 707 goto out_unlock; 708 709 init_completion(&ns->disable_done); 710 711 ns->nsid = nsid; 712 ns->subsys = subsys; 713 714 if (ns->nsid > subsys->max_nsid) 715 subsys->max_nsid = nsid; 716 717 if (xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL)) 718 goto out_exit; 719 720 subsys->nr_namespaces++; 721 722 mutex_unlock(&subsys->lock); 723 724 down_write(&nvmet_ana_sem); 725 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 726 nvmet_ana_group_enabled[ns->anagrpid]++; 727 up_write(&nvmet_ana_sem); 728 729 uuid_gen(&ns->uuid); 730 ns->buffered_io = false; 731 ns->csi = NVME_CSI_NVM; 732 733 return ns; 734 out_exit: 735 subsys->max_nsid = nvmet_max_nsid(subsys); 736 kfree(ns); 737 out_unlock: 738 mutex_unlock(&subsys->lock); 739 return NULL; 740 } 741 742 static void nvmet_update_sq_head(struct nvmet_req *req) 743 { 744 if (req->sq->size) { 745 u32 old_sqhd, new_sqhd; 746 747 old_sqhd = READ_ONCE(req->sq->sqhd); 748 do { 749 new_sqhd = (old_sqhd + 1) % req->sq->size; 750 } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd)); 751 } 752 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 753 } 754 755 static void nvmet_set_error(struct nvmet_req *req, u16 status) 756 { 757 struct nvmet_ctrl *ctrl = req->sq->ctrl; 758 struct nvme_error_slot *new_error_slot; 759 unsigned long flags; 760 761 req->cqe->status = cpu_to_le16(status << 1); 762 763 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 764 return; 765 766 spin_lock_irqsave(&ctrl->error_lock, flags); 767 ctrl->err_counter++; 768 new_error_slot = 769 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 770 771 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 772 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 773 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 774 new_error_slot->status_field = cpu_to_le16(status << 1); 775 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 776 new_error_slot->lba = cpu_to_le64(req->error_slba); 777 new_error_slot->nsid = req->cmd->common.nsid; 778 spin_unlock_irqrestore(&ctrl->error_lock, flags); 779 780 /* set the more bit for this request */ 781 req->cqe->status |= cpu_to_le16(1 << 14); 782 } 783 784 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 785 { 786 struct nvmet_ns *ns = req->ns; 787 struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref; 788 789 if (!req->sq->sqhd_disabled) 790 nvmet_update_sq_head(req); 791 req->cqe->sq_id = cpu_to_le16(req->sq->qid); 792 req->cqe->command_id = req->cmd->common.command_id; 793 794 if (unlikely(status)) 795 nvmet_set_error(req, status); 796 797 trace_nvmet_req_complete(req); 798 799 req->ops->queue_response(req); 800 801 if (pc_ref) 802 nvmet_pr_put_ns_pc_ref(pc_ref); 803 if (ns) 804 nvmet_put_namespace(ns); 805 } 806 807 void nvmet_req_complete(struct nvmet_req *req, u16 status) 808 { 809 struct nvmet_sq *sq = req->sq; 810 811 __nvmet_req_complete(req, status); 812 percpu_ref_put(&sq->ref); 813 } 814 EXPORT_SYMBOL_GPL(nvmet_req_complete); 815 816 void nvmet_cq_init(struct nvmet_cq *cq) 817 { 818 refcount_set(&cq->ref, 1); 819 } 820 EXPORT_SYMBOL_GPL(nvmet_cq_init); 821 822 bool nvmet_cq_get(struct nvmet_cq *cq) 823 { 824 return refcount_inc_not_zero(&cq->ref); 825 } 826 EXPORT_SYMBOL_GPL(nvmet_cq_get); 827 828 void nvmet_cq_put(struct nvmet_cq *cq) 829 { 830 if (refcount_dec_and_test(&cq->ref)) 831 nvmet_cq_destroy(cq); 832 } 833 EXPORT_SYMBOL_GPL(nvmet_cq_put); 834 835 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 836 u16 qid, u16 size) 837 { 838 cq->qid = qid; 839 cq->size = size; 840 841 ctrl->cqs[qid] = cq; 842 } 843 844 void nvmet_cq_destroy(struct nvmet_cq *cq) 845 { 846 struct nvmet_ctrl *ctrl = cq->ctrl; 847 848 if (ctrl) { 849 ctrl->cqs[cq->qid] = NULL; 850 nvmet_ctrl_put(cq->ctrl); 851 cq->ctrl = NULL; 852 } 853 } 854 855 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 856 u16 qid, u16 size) 857 { 858 sq->sqhd = 0; 859 sq->qid = qid; 860 sq->size = size; 861 862 ctrl->sqs[qid] = sq; 863 } 864 865 static void nvmet_confirm_sq(struct percpu_ref *ref) 866 { 867 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 868 869 complete(&sq->confirm_done); 870 } 871 872 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 873 { 874 if (!ctrl->cqs) 875 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 876 877 if (cqid > ctrl->subsys->max_qid) 878 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 879 880 if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid])) 881 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 882 883 return NVME_SC_SUCCESS; 884 } 885 886 u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 887 { 888 if (!cqid) 889 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 890 return nvmet_check_cqid(ctrl, cqid, create); 891 } 892 893 bool nvmet_cq_in_use(struct nvmet_cq *cq) 894 { 895 return refcount_read(&cq->ref) > 1; 896 } 897 EXPORT_SYMBOL_GPL(nvmet_cq_in_use); 898 899 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 900 u16 qid, u16 size) 901 { 902 u16 status; 903 904 status = nvmet_check_cqid(ctrl, qid, true); 905 if (status != NVME_SC_SUCCESS) 906 return status; 907 908 if (!kref_get_unless_zero(&ctrl->ref)) 909 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 910 cq->ctrl = ctrl; 911 912 nvmet_cq_init(cq); 913 nvmet_cq_setup(ctrl, cq, qid, size); 914 915 return NVME_SC_SUCCESS; 916 } 917 EXPORT_SYMBOL_GPL(nvmet_cq_create); 918 919 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, 920 bool create) 921 { 922 if (!ctrl->sqs) 923 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 924 925 if (sqid > ctrl->subsys->max_qid) 926 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 927 928 if ((create && ctrl->sqs[sqid]) || 929 (!create && !ctrl->sqs[sqid])) 930 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 931 932 return NVME_SC_SUCCESS; 933 } 934 935 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 936 struct nvmet_cq *cq, u16 sqid, u16 size) 937 { 938 u16 status; 939 int ret; 940 941 if (!kref_get_unless_zero(&ctrl->ref)) 942 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 943 944 status = nvmet_check_sqid(ctrl, sqid, true); 945 if (status != NVME_SC_SUCCESS) 946 return status; 947 948 ret = nvmet_sq_init(sq, cq); 949 if (ret) { 950 status = NVME_SC_INTERNAL | NVME_STATUS_DNR; 951 goto ctrl_put; 952 } 953 954 nvmet_sq_setup(ctrl, sq, sqid, size); 955 sq->ctrl = ctrl; 956 957 return NVME_SC_SUCCESS; 958 959 ctrl_put: 960 nvmet_ctrl_put(ctrl); 961 return status; 962 } 963 EXPORT_SYMBOL_GPL(nvmet_sq_create); 964 965 void nvmet_sq_destroy(struct nvmet_sq *sq) 966 { 967 struct nvmet_ctrl *ctrl = sq->ctrl; 968 969 /* 970 * If this is the admin queue, complete all AERs so that our 971 * queue doesn't have outstanding requests on it. 972 */ 973 if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) 974 nvmet_async_events_failall(ctrl); 975 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 976 wait_for_completion(&sq->confirm_done); 977 wait_for_completion(&sq->free_done); 978 percpu_ref_exit(&sq->ref); 979 nvmet_auth_sq_free(sq); 980 nvmet_cq_put(sq->cq); 981 982 /* 983 * we must reference the ctrl again after waiting for inflight IO 984 * to complete. Because admin connect may have sneaked in after we 985 * store sq->ctrl locally, but before we killed the percpu_ref. the 986 * admin connect allocates and assigns sq->ctrl, which now needs a 987 * final ref put, as this ctrl is going away. 988 */ 989 ctrl = sq->ctrl; 990 991 if (ctrl) { 992 /* 993 * The teardown flow may take some time, and the host may not 994 * send us keep-alive during this period, hence reset the 995 * traffic based keep-alive timer so we don't trigger a 996 * controller teardown as a result of a keep-alive expiration. 997 */ 998 ctrl->reset_tbkas = true; 999 sq->ctrl->sqs[sq->qid] = NULL; 1000 nvmet_ctrl_put(ctrl); 1001 sq->ctrl = NULL; /* allows reusing the queue later */ 1002 } 1003 } 1004 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 1005 1006 static void nvmet_sq_free(struct percpu_ref *ref) 1007 { 1008 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 1009 1010 complete(&sq->free_done); 1011 } 1012 1013 int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq) 1014 { 1015 int ret; 1016 1017 if (!nvmet_cq_get(cq)) 1018 return -EINVAL; 1019 1020 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 1021 if (ret) { 1022 pr_err("percpu_ref init failed!\n"); 1023 nvmet_cq_put(cq); 1024 return ret; 1025 } 1026 init_completion(&sq->free_done); 1027 init_completion(&sq->confirm_done); 1028 nvmet_auth_sq_init(sq); 1029 sq->cq = cq; 1030 1031 return 0; 1032 } 1033 EXPORT_SYMBOL_GPL(nvmet_sq_init); 1034 1035 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 1036 struct nvmet_ns *ns) 1037 { 1038 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 1039 1040 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 1041 return NVME_SC_ANA_INACCESSIBLE; 1042 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 1043 return NVME_SC_ANA_PERSISTENT_LOSS; 1044 if (unlikely(state == NVME_ANA_CHANGE)) 1045 return NVME_SC_ANA_TRANSITION; 1046 return 0; 1047 } 1048 1049 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 1050 { 1051 if (unlikely(req->ns->readonly)) { 1052 switch (req->cmd->common.opcode) { 1053 case nvme_cmd_read: 1054 case nvme_cmd_flush: 1055 break; 1056 default: 1057 return NVME_SC_NS_WRITE_PROTECTED; 1058 } 1059 } 1060 1061 return 0; 1062 } 1063 1064 static u32 nvmet_io_cmd_transfer_len(struct nvmet_req *req) 1065 { 1066 struct nvme_command *cmd = req->cmd; 1067 u32 metadata_len = 0; 1068 1069 if (nvme_is_fabrics(cmd)) 1070 return nvmet_fabrics_io_cmd_data_len(req); 1071 1072 if (!req->ns) 1073 return 0; 1074 1075 switch (req->cmd->common.opcode) { 1076 case nvme_cmd_read: 1077 case nvme_cmd_write: 1078 case nvme_cmd_zone_append: 1079 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 1080 metadata_len = nvmet_rw_metadata_len(req); 1081 return nvmet_rw_data_len(req) + metadata_len; 1082 case nvme_cmd_dsm: 1083 return nvmet_dsm_len(req); 1084 case nvme_cmd_zone_mgmt_recv: 1085 return (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; 1086 default: 1087 return 0; 1088 } 1089 } 1090 1091 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 1092 { 1093 struct nvme_command *cmd = req->cmd; 1094 u16 ret; 1095 1096 if (nvme_is_fabrics(cmd)) 1097 return nvmet_parse_fabrics_io_cmd(req); 1098 1099 if (unlikely(!nvmet_check_auth_status(req))) 1100 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1101 1102 ret = nvmet_check_ctrl_status(req); 1103 if (unlikely(ret)) 1104 return ret; 1105 1106 if (nvmet_is_passthru_req(req)) 1107 return nvmet_parse_passthru_io_cmd(req); 1108 1109 ret = nvmet_req_find_ns(req); 1110 if (unlikely(ret)) 1111 return ret; 1112 1113 ret = nvmet_check_ana_state(req->port, req->ns); 1114 if (unlikely(ret)) { 1115 req->error_loc = offsetof(struct nvme_common_command, nsid); 1116 return ret; 1117 } 1118 ret = nvmet_io_cmd_check_access(req); 1119 if (unlikely(ret)) { 1120 req->error_loc = offsetof(struct nvme_common_command, nsid); 1121 return ret; 1122 } 1123 1124 if (req->ns->pr.enable) { 1125 ret = nvmet_parse_pr_cmd(req); 1126 if (!ret) 1127 return ret; 1128 } 1129 1130 switch (req->ns->csi) { 1131 case NVME_CSI_NVM: 1132 if (req->ns->file) 1133 ret = nvmet_file_parse_io_cmd(req); 1134 else 1135 ret = nvmet_bdev_parse_io_cmd(req); 1136 break; 1137 case NVME_CSI_ZNS: 1138 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) 1139 ret = nvmet_bdev_zns_parse_io_cmd(req); 1140 else 1141 ret = NVME_SC_INVALID_IO_CMD_SET; 1142 break; 1143 default: 1144 ret = NVME_SC_INVALID_IO_CMD_SET; 1145 } 1146 if (ret) 1147 return ret; 1148 1149 if (req->ns->pr.enable) { 1150 ret = nvmet_pr_check_cmd_access(req); 1151 if (ret) 1152 return ret; 1153 1154 ret = nvmet_pr_get_ns_pc_ref(req); 1155 } 1156 return ret; 1157 } 1158 1159 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, 1160 const struct nvmet_fabrics_ops *ops) 1161 { 1162 u8 flags = req->cmd->common.flags; 1163 u16 status; 1164 1165 req->cq = sq->cq; 1166 req->sq = sq; 1167 req->ops = ops; 1168 req->sg = NULL; 1169 req->metadata_sg = NULL; 1170 req->sg_cnt = 0; 1171 req->metadata_sg_cnt = 0; 1172 req->transfer_len = 0; 1173 req->metadata_len = 0; 1174 req->cqe->result.u64 = 0; 1175 req->cqe->status = 0; 1176 req->cqe->sq_head = 0; 1177 req->ns = NULL; 1178 req->error_loc = NVMET_NO_ERROR_LOC; 1179 req->error_slba = 0; 1180 req->pc_ref = NULL; 1181 1182 /* no support for fused commands yet */ 1183 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 1184 req->error_loc = offsetof(struct nvme_common_command, flags); 1185 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1186 goto fail; 1187 } 1188 1189 /* 1190 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 1191 * contains an address of a single contiguous physical buffer that is 1192 * byte aligned. For PCI controllers, this is optional so not enforced. 1193 */ 1194 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 1195 if (!req->sq->ctrl || !nvmet_is_pci_ctrl(req->sq->ctrl)) { 1196 req->error_loc = 1197 offsetof(struct nvme_common_command, flags); 1198 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1199 goto fail; 1200 } 1201 } 1202 1203 if (unlikely(!req->sq->ctrl)) 1204 /* will return an error for any non-connect command: */ 1205 status = nvmet_parse_connect_cmd(req); 1206 else if (likely(req->sq->qid != 0)) 1207 status = nvmet_parse_io_cmd(req); 1208 else 1209 status = nvmet_parse_admin_cmd(req); 1210 1211 if (status) 1212 goto fail; 1213 1214 trace_nvmet_req_init(req, req->cmd); 1215 1216 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 1217 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1218 goto fail; 1219 } 1220 1221 if (sq->ctrl) 1222 sq->ctrl->reset_tbkas = true; 1223 1224 return true; 1225 1226 fail: 1227 __nvmet_req_complete(req, status); 1228 return false; 1229 } 1230 EXPORT_SYMBOL_GPL(nvmet_req_init); 1231 1232 void nvmet_req_uninit(struct nvmet_req *req) 1233 { 1234 percpu_ref_put(&req->sq->ref); 1235 if (req->pc_ref) 1236 nvmet_pr_put_ns_pc_ref(req->pc_ref); 1237 if (req->ns) 1238 nvmet_put_namespace(req->ns); 1239 } 1240 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 1241 1242 size_t nvmet_req_transfer_len(struct nvmet_req *req) 1243 { 1244 if (likely(req->sq->qid != 0)) 1245 return nvmet_io_cmd_transfer_len(req); 1246 if (unlikely(!req->sq->ctrl)) 1247 return nvmet_connect_cmd_data_len(req); 1248 return nvmet_admin_cmd_data_len(req); 1249 } 1250 EXPORT_SYMBOL_GPL(nvmet_req_transfer_len); 1251 1252 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) 1253 { 1254 if (unlikely(len != req->transfer_len)) { 1255 u16 status; 1256 1257 req->error_loc = offsetof(struct nvme_common_command, dptr); 1258 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1259 status = NVME_SC_SGL_INVALID_DATA; 1260 else 1261 status = NVME_SC_INVALID_FIELD; 1262 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1263 return false; 1264 } 1265 1266 return true; 1267 } 1268 EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); 1269 1270 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) 1271 { 1272 if (unlikely(data_len > req->transfer_len)) { 1273 u16 status; 1274 1275 req->error_loc = offsetof(struct nvme_common_command, dptr); 1276 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1277 status = NVME_SC_SGL_INVALID_DATA; 1278 else 1279 status = NVME_SC_INVALID_FIELD; 1280 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1281 return false; 1282 } 1283 1284 return true; 1285 } 1286 1287 static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) 1288 { 1289 return req->transfer_len - req->metadata_len; 1290 } 1291 1292 static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev, 1293 struct nvmet_req *req) 1294 { 1295 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 1296 nvmet_data_transfer_len(req)); 1297 if (!req->sg) 1298 goto out_err; 1299 1300 if (req->metadata_len) { 1301 req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev, 1302 &req->metadata_sg_cnt, req->metadata_len); 1303 if (!req->metadata_sg) 1304 goto out_free_sg; 1305 } 1306 1307 req->p2p_dev = p2p_dev; 1308 1309 return 0; 1310 out_free_sg: 1311 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1312 out_err: 1313 return -ENOMEM; 1314 } 1315 1316 static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req) 1317 { 1318 if (!IS_ENABLED(CONFIG_PCI_P2PDMA) || 1319 !req->sq->ctrl || !req->sq->qid || !req->ns) 1320 return NULL; 1321 return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid); 1322 } 1323 1324 int nvmet_req_alloc_sgls(struct nvmet_req *req) 1325 { 1326 struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req); 1327 1328 if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req)) 1329 return 0; 1330 1331 req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, 1332 &req->sg_cnt); 1333 if (unlikely(!req->sg)) 1334 goto out; 1335 1336 if (req->metadata_len) { 1337 req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, 1338 &req->metadata_sg_cnt); 1339 if (unlikely(!req->metadata_sg)) 1340 goto out_free; 1341 } 1342 1343 return 0; 1344 out_free: 1345 sgl_free(req->sg); 1346 out: 1347 return -ENOMEM; 1348 } 1349 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); 1350 1351 void nvmet_req_free_sgls(struct nvmet_req *req) 1352 { 1353 if (req->p2p_dev) { 1354 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1355 if (req->metadata_sg) 1356 pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); 1357 req->p2p_dev = NULL; 1358 } else { 1359 sgl_free(req->sg); 1360 if (req->metadata_sg) 1361 sgl_free(req->metadata_sg); 1362 } 1363 1364 req->sg = NULL; 1365 req->metadata_sg = NULL; 1366 req->sg_cnt = 0; 1367 req->metadata_sg_cnt = 0; 1368 } 1369 EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); 1370 1371 static inline bool nvmet_css_supported(u8 cc_css) 1372 { 1373 switch (cc_css << NVME_CC_CSS_SHIFT) { 1374 case NVME_CC_CSS_NVM: 1375 case NVME_CC_CSS_CSI: 1376 return true; 1377 default: 1378 return false; 1379 } 1380 } 1381 1382 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 1383 { 1384 lockdep_assert_held(&ctrl->lock); 1385 1386 /* 1387 * Only I/O controllers should verify iosqes,iocqes. 1388 * Strictly speaking, the spec says a discovery controller 1389 * should verify iosqes,iocqes are zeroed, however that 1390 * would break backwards compatibility, so don't enforce it. 1391 */ 1392 if (!nvmet_is_disc_subsys(ctrl->subsys) && 1393 (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1394 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { 1395 ctrl->csts = NVME_CSTS_CFS; 1396 return; 1397 } 1398 1399 if (nvmet_cc_mps(ctrl->cc) != 0 || 1400 nvmet_cc_ams(ctrl->cc) != 0 || 1401 !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) { 1402 ctrl->csts = NVME_CSTS_CFS; 1403 return; 1404 } 1405 1406 ctrl->csts = NVME_CSTS_RDY; 1407 1408 /* 1409 * Controllers that are not yet enabled should not really enforce the 1410 * keep alive timeout, but we still want to track a timeout and cleanup 1411 * in case a host died before it enabled the controller. Hence, simply 1412 * reset the keep alive timer when the controller is enabled. 1413 */ 1414 if (ctrl->kato) 1415 mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 1416 } 1417 1418 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1419 { 1420 lockdep_assert_held(&ctrl->lock); 1421 1422 /* XXX: tear down queues? */ 1423 ctrl->csts &= ~NVME_CSTS_RDY; 1424 ctrl->cc = 0; 1425 } 1426 1427 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1428 { 1429 u32 old; 1430 1431 mutex_lock(&ctrl->lock); 1432 old = ctrl->cc; 1433 ctrl->cc = new; 1434 1435 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1436 nvmet_start_ctrl(ctrl); 1437 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1438 nvmet_clear_ctrl(ctrl); 1439 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1440 nvmet_clear_ctrl(ctrl); 1441 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1442 } 1443 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1444 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1445 mutex_unlock(&ctrl->lock); 1446 } 1447 EXPORT_SYMBOL_GPL(nvmet_update_cc); 1448 1449 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1450 { 1451 /* command sets supported: NVMe command set: */ 1452 ctrl->cap = (1ULL << 37); 1453 /* Controller supports one or more I/O Command Sets */ 1454 ctrl->cap |= (1ULL << 43); 1455 /* CC.EN timeout in 500msec units: */ 1456 ctrl->cap |= (15ULL << 24); 1457 /* maximum queue entries supported: */ 1458 if (ctrl->ops->get_max_queue_size) 1459 ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl), 1460 ctrl->port->max_queue_size) - 1; 1461 else 1462 ctrl->cap |= ctrl->port->max_queue_size - 1; 1463 1464 if (nvmet_is_passthru_subsys(ctrl->subsys)) 1465 nvmet_passthrough_override_cap(ctrl); 1466 } 1467 1468 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, 1469 const char *hostnqn, u16 cntlid, 1470 struct nvmet_req *req) 1471 { 1472 struct nvmet_ctrl *ctrl = NULL; 1473 struct nvmet_subsys *subsys; 1474 1475 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1476 if (!subsys) { 1477 pr_warn("connect request for invalid subsystem %s!\n", 1478 subsysnqn); 1479 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1480 goto out; 1481 } 1482 1483 mutex_lock(&subsys->lock); 1484 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1485 if (ctrl->cntlid == cntlid) { 1486 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1487 pr_warn("hostnqn mismatch.\n"); 1488 continue; 1489 } 1490 if (!kref_get_unless_zero(&ctrl->ref)) 1491 continue; 1492 1493 /* ctrl found */ 1494 goto found; 1495 } 1496 } 1497 1498 ctrl = NULL; /* ctrl not found */ 1499 pr_warn("could not find controller %d for subsys %s / host %s\n", 1500 cntlid, subsysnqn, hostnqn); 1501 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1502 1503 found: 1504 mutex_unlock(&subsys->lock); 1505 nvmet_subsys_put(subsys); 1506 out: 1507 return ctrl; 1508 } 1509 1510 u16 nvmet_check_ctrl_status(struct nvmet_req *req) 1511 { 1512 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1513 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1514 req->cmd->common.opcode, req->sq->qid); 1515 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1516 } 1517 1518 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1519 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1520 req->cmd->common.opcode, req->sq->qid); 1521 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1522 } 1523 1524 if (unlikely(!nvmet_check_auth_status(req))) { 1525 pr_warn("qid %d not authenticated\n", req->sq->qid); 1526 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1527 } 1528 return 0; 1529 } 1530 1531 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1532 { 1533 struct nvmet_host_link *p; 1534 1535 lockdep_assert_held(&nvmet_config_sem); 1536 1537 if (subsys->allow_any_host) 1538 return true; 1539 1540 if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */ 1541 return true; 1542 1543 list_for_each_entry(p, &subsys->hosts, entry) { 1544 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1545 return true; 1546 } 1547 1548 return false; 1549 } 1550 1551 /* 1552 * Note: ctrl->subsys->lock should be held when calling this function 1553 */ 1554 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1555 struct device *p2p_client) 1556 { 1557 struct nvmet_ns *ns; 1558 unsigned long idx; 1559 1560 if (!p2p_client) 1561 return; 1562 1563 ctrl->p2p_client = get_device(p2p_client); 1564 1565 nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) 1566 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1567 } 1568 1569 /* 1570 * Note: ctrl->subsys->lock should be held when calling this function 1571 */ 1572 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1573 { 1574 struct radix_tree_iter iter; 1575 void __rcu **slot; 1576 1577 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1578 pci_dev_put(radix_tree_deref_slot(slot)); 1579 1580 put_device(ctrl->p2p_client); 1581 } 1582 1583 static void nvmet_fatal_error_handler(struct work_struct *work) 1584 { 1585 struct nvmet_ctrl *ctrl = 1586 container_of(work, struct nvmet_ctrl, fatal_err_work); 1587 1588 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1589 ctrl->ops->delete_ctrl(ctrl); 1590 } 1591 1592 struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) 1593 { 1594 struct nvmet_subsys *subsys; 1595 struct nvmet_ctrl *ctrl; 1596 u32 kato = args->kato; 1597 u8 dhchap_status; 1598 int ret; 1599 1600 args->status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR; 1601 subsys = nvmet_find_get_subsys(args->port, args->subsysnqn); 1602 if (!subsys) { 1603 pr_warn("connect request for invalid subsystem %s!\n", 1604 args->subsysnqn); 1605 args->result = IPO_IATTR_CONNECT_DATA(subsysnqn); 1606 args->error_loc = offsetof(struct nvme_common_command, dptr); 1607 return NULL; 1608 } 1609 1610 down_read(&nvmet_config_sem); 1611 if (!nvmet_host_allowed(subsys, args->hostnqn)) { 1612 pr_info("connect by host %s for subsystem %s not allowed\n", 1613 args->hostnqn, args->subsysnqn); 1614 args->result = IPO_IATTR_CONNECT_DATA(hostnqn); 1615 up_read(&nvmet_config_sem); 1616 args->status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1617 args->error_loc = offsetof(struct nvme_common_command, dptr); 1618 goto out_put_subsystem; 1619 } 1620 up_read(&nvmet_config_sem); 1621 1622 args->status = NVME_SC_INTERNAL; 1623 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 1624 if (!ctrl) 1625 goto out_put_subsystem; 1626 mutex_init(&ctrl->lock); 1627 1628 ctrl->port = args->port; 1629 ctrl->ops = args->ops; 1630 1631 #ifdef CONFIG_NVME_TARGET_PASSTHRU 1632 /* By default, set loop targets to clear IDS by default */ 1633 if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP) 1634 subsys->clear_ids = 1; 1635 #endif 1636 1637 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1638 INIT_LIST_HEAD(&ctrl->async_events); 1639 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1640 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1641 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 1642 1643 memcpy(ctrl->subsysnqn, args->subsysnqn, NVMF_NQN_SIZE); 1644 memcpy(ctrl->hostnqn, args->hostnqn, NVMF_NQN_SIZE); 1645 1646 kref_init(&ctrl->ref); 1647 ctrl->subsys = subsys; 1648 ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; 1649 nvmet_init_cap(ctrl); 1650 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1651 1652 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1653 sizeof(__le32), GFP_KERNEL); 1654 if (!ctrl->changed_ns_list) 1655 goto out_free_ctrl; 1656 1657 ctrl->sqs = kcalloc(subsys->max_qid + 1, 1658 sizeof(struct nvmet_sq *), 1659 GFP_KERNEL); 1660 if (!ctrl->sqs) 1661 goto out_free_changed_ns_list; 1662 1663 ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *), 1664 GFP_KERNEL); 1665 if (!ctrl->cqs) 1666 goto out_free_sqs; 1667 1668 ret = ida_alloc_range(&cntlid_ida, 1669 subsys->cntlid_min, subsys->cntlid_max, 1670 GFP_KERNEL); 1671 if (ret < 0) { 1672 args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 1673 goto out_free_cqs; 1674 } 1675 ctrl->cntlid = ret; 1676 1677 /* 1678 * Discovery controllers may use some arbitrary high value 1679 * in order to cleanup stale discovery sessions 1680 */ 1681 if (nvmet_is_disc_subsys(ctrl->subsys) && !kato) 1682 kato = NVMET_DISC_KATO_MS; 1683 1684 /* keep-alive timeout in seconds */ 1685 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1686 1687 ctrl->err_counter = 0; 1688 spin_lock_init(&ctrl->error_lock); 1689 1690 nvmet_start_keep_alive_timer(ctrl); 1691 1692 mutex_lock(&subsys->lock); 1693 ret = nvmet_ctrl_init_pr(ctrl); 1694 if (ret) 1695 goto init_pr_fail; 1696 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1697 nvmet_setup_p2p_ns_map(ctrl, args->p2p_client); 1698 nvmet_debugfs_ctrl_setup(ctrl); 1699 mutex_unlock(&subsys->lock); 1700 1701 if (args->hostid) 1702 uuid_copy(&ctrl->hostid, args->hostid); 1703 1704 dhchap_status = nvmet_setup_auth(ctrl, args->sq); 1705 if (dhchap_status) { 1706 pr_err("Failed to setup authentication, dhchap status %u\n", 1707 dhchap_status); 1708 nvmet_ctrl_put(ctrl); 1709 if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED) 1710 args->status = 1711 NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1712 else 1713 args->status = NVME_SC_INTERNAL; 1714 return NULL; 1715 } 1716 1717 args->status = NVME_SC_SUCCESS; 1718 1719 pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n", 1720 nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", 1721 ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, 1722 ctrl->pi_support ? " T10-PI is enabled" : "", 1723 nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "", 1724 nvmet_queue_tls_keyid(args->sq) ? ", TLS" : ""); 1725 1726 return ctrl; 1727 1728 init_pr_fail: 1729 mutex_unlock(&subsys->lock); 1730 nvmet_stop_keep_alive_timer(ctrl); 1731 ida_free(&cntlid_ida, ctrl->cntlid); 1732 out_free_cqs: 1733 kfree(ctrl->cqs); 1734 out_free_sqs: 1735 kfree(ctrl->sqs); 1736 out_free_changed_ns_list: 1737 kfree(ctrl->changed_ns_list); 1738 out_free_ctrl: 1739 kfree(ctrl); 1740 out_put_subsystem: 1741 nvmet_subsys_put(subsys); 1742 return NULL; 1743 } 1744 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl); 1745 1746 static void nvmet_ctrl_free(struct kref *ref) 1747 { 1748 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1749 struct nvmet_subsys *subsys = ctrl->subsys; 1750 1751 mutex_lock(&subsys->lock); 1752 nvmet_ctrl_destroy_pr(ctrl); 1753 nvmet_release_p2p_ns_map(ctrl); 1754 list_del(&ctrl->subsys_entry); 1755 mutex_unlock(&subsys->lock); 1756 1757 nvmet_stop_keep_alive_timer(ctrl); 1758 1759 flush_work(&ctrl->async_event_work); 1760 cancel_work_sync(&ctrl->fatal_err_work); 1761 1762 nvmet_destroy_auth(ctrl); 1763 1764 nvmet_debugfs_ctrl_free(ctrl); 1765 1766 ida_free(&cntlid_ida, ctrl->cntlid); 1767 1768 nvmet_async_events_free(ctrl); 1769 kfree(ctrl->sqs); 1770 kfree(ctrl->cqs); 1771 kfree(ctrl->changed_ns_list); 1772 kfree(ctrl); 1773 1774 nvmet_subsys_put(subsys); 1775 } 1776 1777 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1778 { 1779 kref_put(&ctrl->ref, nvmet_ctrl_free); 1780 } 1781 EXPORT_SYMBOL_GPL(nvmet_ctrl_put); 1782 1783 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1784 { 1785 mutex_lock(&ctrl->lock); 1786 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1787 ctrl->csts |= NVME_CSTS_CFS; 1788 queue_work(nvmet_wq, &ctrl->fatal_err_work); 1789 } 1790 mutex_unlock(&ctrl->lock); 1791 } 1792 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1793 1794 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl, 1795 char *traddr, size_t traddr_len) 1796 { 1797 if (!ctrl->ops->host_traddr) 1798 return -EOPNOTSUPP; 1799 return ctrl->ops->host_traddr(ctrl, traddr, traddr_len); 1800 } 1801 1802 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1803 const char *subsysnqn) 1804 { 1805 struct nvmet_subsys_link *p; 1806 1807 if (!port) 1808 return NULL; 1809 1810 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1811 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1812 return NULL; 1813 return nvmet_disc_subsys; 1814 } 1815 1816 down_read(&nvmet_config_sem); 1817 if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn, 1818 NVMF_NQN_SIZE)) { 1819 if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) { 1820 up_read(&nvmet_config_sem); 1821 return nvmet_disc_subsys; 1822 } 1823 } 1824 list_for_each_entry(p, &port->subsystems, entry) { 1825 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1826 NVMF_NQN_SIZE)) { 1827 if (!kref_get_unless_zero(&p->subsys->ref)) 1828 break; 1829 up_read(&nvmet_config_sem); 1830 return p->subsys; 1831 } 1832 } 1833 up_read(&nvmet_config_sem); 1834 return NULL; 1835 } 1836 1837 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1838 enum nvme_subsys_type type) 1839 { 1840 struct nvmet_subsys *subsys; 1841 char serial[NVMET_SN_MAX_SIZE / 2]; 1842 int ret; 1843 1844 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1845 if (!subsys) 1846 return ERR_PTR(-ENOMEM); 1847 1848 subsys->ver = NVMET_DEFAULT_VS; 1849 /* generate a random serial number as our controllers are ephemeral: */ 1850 get_random_bytes(&serial, sizeof(serial)); 1851 bin2hex(subsys->serial, &serial, sizeof(serial)); 1852 1853 subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); 1854 if (!subsys->model_number) { 1855 ret = -ENOMEM; 1856 goto free_subsys; 1857 } 1858 1859 subsys->ieee_oui = 0; 1860 1861 subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL); 1862 if (!subsys->firmware_rev) { 1863 ret = -ENOMEM; 1864 goto free_mn; 1865 } 1866 1867 switch (type) { 1868 case NVME_NQN_NVME: 1869 subsys->max_qid = NVMET_NR_QUEUES; 1870 break; 1871 case NVME_NQN_DISC: 1872 case NVME_NQN_CURR: 1873 subsys->max_qid = 0; 1874 break; 1875 default: 1876 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1877 ret = -EINVAL; 1878 goto free_fr; 1879 } 1880 subsys->type = type; 1881 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1882 GFP_KERNEL); 1883 if (!subsys->subsysnqn) { 1884 ret = -ENOMEM; 1885 goto free_fr; 1886 } 1887 subsys->cntlid_min = NVME_CNTLID_MIN; 1888 subsys->cntlid_max = NVME_CNTLID_MAX; 1889 kref_init(&subsys->ref); 1890 1891 mutex_init(&subsys->lock); 1892 xa_init(&subsys->namespaces); 1893 INIT_LIST_HEAD(&subsys->ctrls); 1894 INIT_LIST_HEAD(&subsys->hosts); 1895 1896 ret = nvmet_debugfs_subsys_setup(subsys); 1897 if (ret) 1898 goto free_subsysnqn; 1899 1900 return subsys; 1901 1902 free_subsysnqn: 1903 kfree(subsys->subsysnqn); 1904 free_fr: 1905 kfree(subsys->firmware_rev); 1906 free_mn: 1907 kfree(subsys->model_number); 1908 free_subsys: 1909 kfree(subsys); 1910 return ERR_PTR(ret); 1911 } 1912 1913 static void nvmet_subsys_free(struct kref *ref) 1914 { 1915 struct nvmet_subsys *subsys = 1916 container_of(ref, struct nvmet_subsys, ref); 1917 1918 WARN_ON_ONCE(!xa_empty(&subsys->namespaces)); 1919 1920 nvmet_debugfs_subsys_free(subsys); 1921 1922 xa_destroy(&subsys->namespaces); 1923 nvmet_passthru_subsys_free(subsys); 1924 1925 kfree(subsys->subsysnqn); 1926 kfree(subsys->model_number); 1927 kfree(subsys->firmware_rev); 1928 kfree(subsys); 1929 } 1930 1931 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1932 { 1933 struct nvmet_ctrl *ctrl; 1934 1935 mutex_lock(&subsys->lock); 1936 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1937 ctrl->ops->delete_ctrl(ctrl); 1938 mutex_unlock(&subsys->lock); 1939 } 1940 1941 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1942 { 1943 kref_put(&subsys->ref, nvmet_subsys_free); 1944 } 1945 1946 static int __init nvmet_init(void) 1947 { 1948 int error = -ENOMEM; 1949 1950 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1951 1952 nvmet_bvec_cache = kmem_cache_create("nvmet-bvec", 1953 NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, 1954 SLAB_HWCACHE_ALIGN, NULL); 1955 if (!nvmet_bvec_cache) 1956 return -ENOMEM; 1957 1958 zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); 1959 if (!zbd_wq) 1960 goto out_destroy_bvec_cache; 1961 1962 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1963 WQ_MEM_RECLAIM, 0); 1964 if (!buffered_io_wq) 1965 goto out_free_zbd_work_queue; 1966 1967 nvmet_wq = alloc_workqueue("nvmet-wq", 1968 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); 1969 if (!nvmet_wq) 1970 goto out_free_buffered_work_queue; 1971 1972 error = nvmet_init_discovery(); 1973 if (error) 1974 goto out_free_nvmet_work_queue; 1975 1976 error = nvmet_init_debugfs(); 1977 if (error) 1978 goto out_exit_discovery; 1979 1980 error = nvmet_init_configfs(); 1981 if (error) 1982 goto out_exit_debugfs; 1983 1984 return 0; 1985 1986 out_exit_debugfs: 1987 nvmet_exit_debugfs(); 1988 out_exit_discovery: 1989 nvmet_exit_discovery(); 1990 out_free_nvmet_work_queue: 1991 destroy_workqueue(nvmet_wq); 1992 out_free_buffered_work_queue: 1993 destroy_workqueue(buffered_io_wq); 1994 out_free_zbd_work_queue: 1995 destroy_workqueue(zbd_wq); 1996 out_destroy_bvec_cache: 1997 kmem_cache_destroy(nvmet_bvec_cache); 1998 return error; 1999 } 2000 2001 static void __exit nvmet_exit(void) 2002 { 2003 nvmet_exit_configfs(); 2004 nvmet_exit_debugfs(); 2005 nvmet_exit_discovery(); 2006 ida_destroy(&cntlid_ida); 2007 destroy_workqueue(nvmet_wq); 2008 destroy_workqueue(buffered_io_wq); 2009 destroy_workqueue(zbd_wq); 2010 kmem_cache_destroy(nvmet_bvec_cache); 2011 2012 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 2013 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 2014 } 2015 2016 module_init(nvmet_init); 2017 module_exit(nvmet_exit); 2018 2019 MODULE_DESCRIPTION("NVMe target core framework"); 2020 MODULE_LICENSE("GPL v2"); 2021