1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/module.h> 8 #include <linux/random.h> 9 #include <linux/rculist.h> 10 #include <linux/pci-p2pdma.h> 11 #include <linux/scatterlist.h> 12 13 #include <generated/utsrelease.h> 14 15 #define CREATE_TRACE_POINTS 16 #include "trace.h" 17 18 #include "nvmet.h" 19 #include "debugfs.h" 20 21 struct kmem_cache *nvmet_bvec_cache; 22 struct workqueue_struct *buffered_io_wq; 23 struct workqueue_struct *zbd_wq; 24 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 25 static DEFINE_IDA(cntlid_ida); 26 27 struct workqueue_struct *nvmet_wq; 28 EXPORT_SYMBOL_GPL(nvmet_wq); 29 30 /* 31 * This read/write semaphore is used to synchronize access to configuration 32 * information on a target system that will result in discovery log page 33 * information change for at least one host. 34 * The full list of resources to protected by this semaphore is: 35 * 36 * - subsystems list 37 * - per-subsystem allowed hosts list 38 * - allow_any_host subsystem attribute 39 * - nvmet_genctr 40 * - the nvmet_transports array 41 * 42 * When updating any of those lists/structures write lock should be obtained, 43 * while when reading (popolating discovery log page or checking host-subsystem 44 * link) read lock is obtained to allow concurrent reads. 45 */ 46 DECLARE_RWSEM(nvmet_config_sem); 47 48 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 49 u64 nvmet_ana_chgcnt; 50 DECLARE_RWSEM(nvmet_ana_sem); 51 52 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 53 { 54 switch (errno) { 55 case 0: 56 return NVME_SC_SUCCESS; 57 case -ENOSPC: 58 req->error_loc = offsetof(struct nvme_rw_command, length); 59 return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR; 60 case -EREMOTEIO: 61 req->error_loc = offsetof(struct nvme_rw_command, slba); 62 return NVME_SC_LBA_RANGE | NVME_STATUS_DNR; 63 case -EOPNOTSUPP: 64 req->error_loc = offsetof(struct nvme_common_command, opcode); 65 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 66 case -ENODATA: 67 req->error_loc = offsetof(struct nvme_rw_command, nsid); 68 return NVME_SC_ACCESS_DENIED; 69 case -EIO: 70 fallthrough; 71 default: 72 req->error_loc = offsetof(struct nvme_common_command, opcode); 73 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 74 } 75 } 76 77 u16 nvmet_report_invalid_opcode(struct nvmet_req *req) 78 { 79 pr_debug("unhandled cmd %d on qid %d\n", req->cmd->common.opcode, 80 req->sq->qid); 81 82 req->error_loc = offsetof(struct nvme_common_command, opcode); 83 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 84 } 85 86 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 87 const char *subsysnqn); 88 89 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 90 size_t len) 91 { 92 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 93 req->error_loc = offsetof(struct nvme_common_command, dptr); 94 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 95 } 96 return 0; 97 } 98 99 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 100 { 101 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 102 req->error_loc = offsetof(struct nvme_common_command, dptr); 103 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 104 } 105 return 0; 106 } 107 108 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 109 { 110 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 111 req->error_loc = offsetof(struct nvme_common_command, dptr); 112 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 113 } 114 return 0; 115 } 116 117 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys) 118 { 119 struct nvmet_ns *cur; 120 unsigned long idx; 121 u32 nsid = 0; 122 123 nvmet_for_each_enabled_ns(&subsys->namespaces, idx, cur) 124 nsid = cur->nsid; 125 126 return nsid; 127 } 128 129 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 130 { 131 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 132 } 133 134 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) 135 { 136 struct nvmet_req *req; 137 138 mutex_lock(&ctrl->lock); 139 while (ctrl->nr_async_event_cmds) { 140 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 141 mutex_unlock(&ctrl->lock); 142 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR); 143 mutex_lock(&ctrl->lock); 144 } 145 mutex_unlock(&ctrl->lock); 146 } 147 148 static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) 149 { 150 struct nvmet_async_event *aen; 151 struct nvmet_req *req; 152 153 mutex_lock(&ctrl->lock); 154 while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { 155 aen = list_first_entry(&ctrl->async_events, 156 struct nvmet_async_event, entry); 157 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 158 nvmet_set_result(req, nvmet_async_event_result(aen)); 159 160 list_del(&aen->entry); 161 kfree(aen); 162 163 mutex_unlock(&ctrl->lock); 164 trace_nvmet_async_event(ctrl, req->cqe->result.u32); 165 nvmet_req_complete(req, 0); 166 mutex_lock(&ctrl->lock); 167 } 168 mutex_unlock(&ctrl->lock); 169 } 170 171 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 172 { 173 struct nvmet_async_event *aen, *tmp; 174 175 mutex_lock(&ctrl->lock); 176 list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { 177 list_del(&aen->entry); 178 kfree(aen); 179 } 180 mutex_unlock(&ctrl->lock); 181 } 182 183 static void nvmet_async_event_work(struct work_struct *work) 184 { 185 struct nvmet_ctrl *ctrl = 186 container_of(work, struct nvmet_ctrl, async_event_work); 187 188 nvmet_async_events_process(ctrl); 189 } 190 191 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 192 u8 event_info, u8 log_page) 193 { 194 struct nvmet_async_event *aen; 195 196 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 197 if (!aen) 198 return; 199 200 aen->event_type = event_type; 201 aen->event_info = event_info; 202 aen->log_page = log_page; 203 204 mutex_lock(&ctrl->lock); 205 list_add_tail(&aen->entry, &ctrl->async_events); 206 mutex_unlock(&ctrl->lock); 207 208 queue_work(nvmet_wq, &ctrl->async_event_work); 209 } 210 211 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 212 { 213 u32 i; 214 215 mutex_lock(&ctrl->lock); 216 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 217 goto out_unlock; 218 219 for (i = 0; i < ctrl->nr_changed_ns; i++) { 220 if (ctrl->changed_ns_list[i] == nsid) 221 goto out_unlock; 222 } 223 224 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 225 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 226 ctrl->nr_changed_ns = U32_MAX; 227 goto out_unlock; 228 } 229 230 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 231 out_unlock: 232 mutex_unlock(&ctrl->lock); 233 } 234 235 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 236 { 237 struct nvmet_ctrl *ctrl; 238 239 lockdep_assert_held(&subsys->lock); 240 241 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 242 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 243 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 244 continue; 245 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 246 NVME_AER_NOTICE_NS_CHANGED, 247 NVME_LOG_CHANGED_NS); 248 } 249 } 250 251 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 252 struct nvmet_port *port) 253 { 254 struct nvmet_ctrl *ctrl; 255 256 mutex_lock(&subsys->lock); 257 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 258 if (port && ctrl->port != port) 259 continue; 260 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 261 continue; 262 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 263 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 264 } 265 mutex_unlock(&subsys->lock); 266 } 267 268 void nvmet_port_send_ana_event(struct nvmet_port *port) 269 { 270 struct nvmet_subsys_link *p; 271 272 down_read(&nvmet_config_sem); 273 list_for_each_entry(p, &port->subsystems, entry) 274 nvmet_send_ana_event(p->subsys, port); 275 up_read(&nvmet_config_sem); 276 } 277 278 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 279 { 280 int ret = 0; 281 282 down_write(&nvmet_config_sem); 283 if (nvmet_transports[ops->type]) 284 ret = -EINVAL; 285 else 286 nvmet_transports[ops->type] = ops; 287 up_write(&nvmet_config_sem); 288 289 return ret; 290 } 291 EXPORT_SYMBOL_GPL(nvmet_register_transport); 292 293 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 294 { 295 down_write(&nvmet_config_sem); 296 nvmet_transports[ops->type] = NULL; 297 up_write(&nvmet_config_sem); 298 } 299 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 300 301 void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys) 302 { 303 struct nvmet_ctrl *ctrl; 304 305 mutex_lock(&subsys->lock); 306 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 307 if (ctrl->port == port) 308 ctrl->ops->delete_ctrl(ctrl); 309 } 310 mutex_unlock(&subsys->lock); 311 } 312 313 int nvmet_enable_port(struct nvmet_port *port) 314 { 315 const struct nvmet_fabrics_ops *ops; 316 int ret; 317 318 lockdep_assert_held(&nvmet_config_sem); 319 320 if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) 321 return -EINVAL; 322 323 ops = nvmet_transports[port->disc_addr.trtype]; 324 if (!ops) { 325 up_write(&nvmet_config_sem); 326 request_module("nvmet-transport-%d", port->disc_addr.trtype); 327 down_write(&nvmet_config_sem); 328 ops = nvmet_transports[port->disc_addr.trtype]; 329 if (!ops) { 330 pr_err("transport type %d not supported\n", 331 port->disc_addr.trtype); 332 return -EINVAL; 333 } 334 } 335 336 if (!try_module_get(ops->owner)) 337 return -EINVAL; 338 339 /* 340 * If the user requested PI support and the transport isn't pi capable, 341 * don't enable the port. 342 */ 343 if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) { 344 pr_err("T10-PI is not supported by transport type %d\n", 345 port->disc_addr.trtype); 346 ret = -EINVAL; 347 goto out_put; 348 } 349 350 ret = ops->add_port(port); 351 if (ret) 352 goto out_put; 353 354 /* If the transport didn't set inline_data_size, then disable it. */ 355 if (port->inline_data_size < 0) 356 port->inline_data_size = 0; 357 358 /* 359 * If the transport didn't set the max_queue_size properly, then clamp 360 * it to the target limits. Also set default values in case the 361 * transport didn't set it at all. 362 */ 363 if (port->max_queue_size < 0) 364 port->max_queue_size = NVMET_MAX_QUEUE_SIZE; 365 else 366 port->max_queue_size = clamp_t(int, port->max_queue_size, 367 NVMET_MIN_QUEUE_SIZE, 368 NVMET_MAX_QUEUE_SIZE); 369 370 port->enabled = true; 371 port->tr_ops = ops; 372 return 0; 373 374 out_put: 375 module_put(ops->owner); 376 return ret; 377 } 378 379 void nvmet_disable_port(struct nvmet_port *port) 380 { 381 const struct nvmet_fabrics_ops *ops; 382 383 lockdep_assert_held(&nvmet_config_sem); 384 385 port->enabled = false; 386 port->tr_ops = NULL; 387 388 ops = nvmet_transports[port->disc_addr.trtype]; 389 ops->remove_port(port); 390 module_put(ops->owner); 391 } 392 393 static void nvmet_keep_alive_timer(struct work_struct *work) 394 { 395 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 396 struct nvmet_ctrl, ka_work); 397 bool reset_tbkas = ctrl->reset_tbkas; 398 399 ctrl->reset_tbkas = false; 400 if (reset_tbkas) { 401 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 402 ctrl->cntlid); 403 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 404 return; 405 } 406 407 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 408 ctrl->cntlid, ctrl->kato); 409 410 nvmet_ctrl_fatal_error(ctrl); 411 } 412 413 void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 414 { 415 if (unlikely(ctrl->kato == 0)) 416 return; 417 418 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 419 ctrl->cntlid, ctrl->kato); 420 421 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 422 } 423 424 void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 425 { 426 if (unlikely(ctrl->kato == 0)) 427 return; 428 429 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 430 431 cancel_delayed_work_sync(&ctrl->ka_work); 432 } 433 434 u16 nvmet_req_find_ns(struct nvmet_req *req) 435 { 436 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 437 struct nvmet_subsys *subsys = nvmet_req_subsys(req); 438 439 req->ns = xa_load(&subsys->namespaces, nsid); 440 if (unlikely(!req->ns || !req->ns->enabled)) { 441 req->error_loc = offsetof(struct nvme_common_command, nsid); 442 if (!req->ns) /* ns doesn't exist! */ 443 return NVME_SC_INVALID_NS | NVME_STATUS_DNR; 444 445 /* ns exists but it's disabled */ 446 req->ns = NULL; 447 return NVME_SC_INTERNAL_PATH_ERROR; 448 } 449 450 percpu_ref_get(&req->ns->ref); 451 return NVME_SC_SUCCESS; 452 } 453 454 static void nvmet_destroy_namespace(struct percpu_ref *ref) 455 { 456 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 457 458 complete(&ns->disable_done); 459 } 460 461 void nvmet_put_namespace(struct nvmet_ns *ns) 462 { 463 percpu_ref_put(&ns->ref); 464 } 465 466 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 467 { 468 nvmet_bdev_ns_disable(ns); 469 nvmet_file_ns_disable(ns); 470 } 471 472 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 473 { 474 int ret; 475 struct pci_dev *p2p_dev; 476 477 if (!ns->use_p2pmem) 478 return 0; 479 480 if (!ns->bdev) { 481 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 482 return -EINVAL; 483 } 484 485 if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) { 486 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 487 ns->device_path); 488 return -EINVAL; 489 } 490 491 if (ns->p2p_dev) { 492 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 493 if (ret < 0) 494 return -EINVAL; 495 } else { 496 /* 497 * Right now we just check that there is p2pmem available so 498 * we can report an error to the user right away if there 499 * is not. We'll find the actual device to use once we 500 * setup the controller when the port's device is available. 501 */ 502 503 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 504 if (!p2p_dev) { 505 pr_err("no peer-to-peer memory is available for %s\n", 506 ns->device_path); 507 return -EINVAL; 508 } 509 510 pci_dev_put(p2p_dev); 511 } 512 513 return 0; 514 } 515 516 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 517 struct nvmet_ns *ns) 518 { 519 struct device *clients[2]; 520 struct pci_dev *p2p_dev; 521 int ret; 522 523 lockdep_assert_held(&ctrl->subsys->lock); 524 525 if (!ctrl->p2p_client || !ns->use_p2pmem) 526 return; 527 528 if (ns->p2p_dev) { 529 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 530 if (ret < 0) 531 return; 532 533 p2p_dev = pci_dev_get(ns->p2p_dev); 534 } else { 535 clients[0] = ctrl->p2p_client; 536 clients[1] = nvmet_ns_dev(ns); 537 538 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 539 if (!p2p_dev) { 540 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 541 dev_name(ctrl->p2p_client), ns->device_path); 542 return; 543 } 544 } 545 546 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 547 if (ret < 0) 548 pci_dev_put(p2p_dev); 549 550 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 551 ns->nsid); 552 } 553 554 bool nvmet_ns_revalidate(struct nvmet_ns *ns) 555 { 556 loff_t oldsize = ns->size; 557 558 if (ns->bdev) 559 nvmet_bdev_ns_revalidate(ns); 560 else 561 nvmet_file_ns_revalidate(ns); 562 563 return oldsize != ns->size; 564 } 565 566 int nvmet_ns_enable(struct nvmet_ns *ns) 567 { 568 struct nvmet_subsys *subsys = ns->subsys; 569 struct nvmet_ctrl *ctrl; 570 int ret; 571 572 mutex_lock(&subsys->lock); 573 ret = 0; 574 575 if (nvmet_is_passthru_subsys(subsys)) { 576 pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); 577 goto out_unlock; 578 } 579 580 if (ns->enabled) 581 goto out_unlock; 582 583 ret = nvmet_bdev_ns_enable(ns); 584 if (ret == -ENOTBLK) 585 ret = nvmet_file_ns_enable(ns); 586 if (ret) 587 goto out_unlock; 588 589 ret = nvmet_p2pmem_ns_enable(ns); 590 if (ret) 591 goto out_dev_disable; 592 593 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 594 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 595 596 if (ns->pr.enable) { 597 ret = nvmet_pr_init_ns(ns); 598 if (ret) 599 goto out_dev_put; 600 } 601 602 if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) 603 goto out_pr_exit; 604 605 nvmet_ns_changed(subsys, ns->nsid); 606 ns->enabled = true; 607 xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 608 ret = 0; 609 out_unlock: 610 mutex_unlock(&subsys->lock); 611 return ret; 612 out_pr_exit: 613 if (ns->pr.enable) 614 nvmet_pr_exit_ns(ns); 615 out_dev_put: 616 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 617 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 618 out_dev_disable: 619 nvmet_ns_dev_disable(ns); 620 goto out_unlock; 621 } 622 623 void nvmet_ns_disable(struct nvmet_ns *ns) 624 { 625 struct nvmet_subsys *subsys = ns->subsys; 626 struct nvmet_ctrl *ctrl; 627 628 mutex_lock(&subsys->lock); 629 if (!ns->enabled) 630 goto out_unlock; 631 632 ns->enabled = false; 633 xa_clear_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 634 635 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 636 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 637 638 mutex_unlock(&subsys->lock); 639 640 /* 641 * Now that we removed the namespaces from the lookup list, we 642 * can kill the per_cpu ref and wait for any remaining references 643 * to be dropped, as well as a RCU grace period for anyone only 644 * using the namespace under rcu_read_lock(). Note that we can't 645 * use call_rcu here as we need to ensure the namespaces have 646 * been fully destroyed before unloading the module. 647 */ 648 percpu_ref_kill(&ns->ref); 649 synchronize_rcu(); 650 wait_for_completion(&ns->disable_done); 651 percpu_ref_exit(&ns->ref); 652 653 if (ns->pr.enable) 654 nvmet_pr_exit_ns(ns); 655 656 mutex_lock(&subsys->lock); 657 nvmet_ns_changed(subsys, ns->nsid); 658 nvmet_ns_dev_disable(ns); 659 out_unlock: 660 mutex_unlock(&subsys->lock); 661 } 662 663 void nvmet_ns_free(struct nvmet_ns *ns) 664 { 665 struct nvmet_subsys *subsys = ns->subsys; 666 667 nvmet_ns_disable(ns); 668 669 mutex_lock(&subsys->lock); 670 671 xa_erase(&subsys->namespaces, ns->nsid); 672 if (ns->nsid == subsys->max_nsid) 673 subsys->max_nsid = nvmet_max_nsid(subsys); 674 675 subsys->nr_namespaces--; 676 mutex_unlock(&subsys->lock); 677 678 down_write(&nvmet_ana_sem); 679 nvmet_ana_group_enabled[ns->anagrpid]--; 680 up_write(&nvmet_ana_sem); 681 682 kfree(ns->device_path); 683 kfree(ns); 684 } 685 686 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 687 { 688 struct nvmet_ns *ns; 689 690 mutex_lock(&subsys->lock); 691 692 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 693 goto out_unlock; 694 695 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 696 if (!ns) 697 goto out_unlock; 698 699 init_completion(&ns->disable_done); 700 701 ns->nsid = nsid; 702 ns->subsys = subsys; 703 704 if (ns->nsid > subsys->max_nsid) 705 subsys->max_nsid = nsid; 706 707 if (xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL)) 708 goto out_exit; 709 710 subsys->nr_namespaces++; 711 712 mutex_unlock(&subsys->lock); 713 714 down_write(&nvmet_ana_sem); 715 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 716 nvmet_ana_group_enabled[ns->anagrpid]++; 717 up_write(&nvmet_ana_sem); 718 719 uuid_gen(&ns->uuid); 720 ns->buffered_io = false; 721 ns->csi = NVME_CSI_NVM; 722 723 return ns; 724 out_exit: 725 subsys->max_nsid = nvmet_max_nsid(subsys); 726 kfree(ns); 727 out_unlock: 728 mutex_unlock(&subsys->lock); 729 return NULL; 730 } 731 732 static void nvmet_update_sq_head(struct nvmet_req *req) 733 { 734 if (req->sq->size) { 735 u32 old_sqhd, new_sqhd; 736 737 old_sqhd = READ_ONCE(req->sq->sqhd); 738 do { 739 new_sqhd = (old_sqhd + 1) % req->sq->size; 740 } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd)); 741 } 742 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 743 } 744 745 static void nvmet_set_error(struct nvmet_req *req, u16 status) 746 { 747 struct nvmet_ctrl *ctrl = req->sq->ctrl; 748 struct nvme_error_slot *new_error_slot; 749 unsigned long flags; 750 751 req->cqe->status = cpu_to_le16(status << 1); 752 753 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 754 return; 755 756 spin_lock_irqsave(&ctrl->error_lock, flags); 757 ctrl->err_counter++; 758 new_error_slot = 759 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 760 761 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 762 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 763 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 764 new_error_slot->status_field = cpu_to_le16(status << 1); 765 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 766 new_error_slot->lba = cpu_to_le64(req->error_slba); 767 new_error_slot->nsid = req->cmd->common.nsid; 768 spin_unlock_irqrestore(&ctrl->error_lock, flags); 769 770 /* set the more bit for this request */ 771 req->cqe->status |= cpu_to_le16(1 << 14); 772 } 773 774 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 775 { 776 struct nvmet_ns *ns = req->ns; 777 struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref; 778 779 if (!req->sq->sqhd_disabled) 780 nvmet_update_sq_head(req); 781 req->cqe->sq_id = cpu_to_le16(req->sq->qid); 782 req->cqe->command_id = req->cmd->common.command_id; 783 784 if (unlikely(status)) 785 nvmet_set_error(req, status); 786 787 trace_nvmet_req_complete(req); 788 789 req->ops->queue_response(req); 790 791 if (pc_ref) 792 nvmet_pr_put_ns_pc_ref(pc_ref); 793 if (ns) 794 nvmet_put_namespace(ns); 795 } 796 797 void nvmet_req_complete(struct nvmet_req *req, u16 status) 798 { 799 struct nvmet_sq *sq = req->sq; 800 801 __nvmet_req_complete(req, status); 802 percpu_ref_put(&sq->ref); 803 } 804 EXPORT_SYMBOL_GPL(nvmet_req_complete); 805 806 void nvmet_cq_init(struct nvmet_cq *cq) 807 { 808 refcount_set(&cq->ref, 1); 809 } 810 EXPORT_SYMBOL_GPL(nvmet_cq_init); 811 812 bool nvmet_cq_get(struct nvmet_cq *cq) 813 { 814 return refcount_inc_not_zero(&cq->ref); 815 } 816 EXPORT_SYMBOL_GPL(nvmet_cq_get); 817 818 void nvmet_cq_put(struct nvmet_cq *cq) 819 { 820 if (refcount_dec_and_test(&cq->ref)) 821 nvmet_cq_destroy(cq); 822 } 823 EXPORT_SYMBOL_GPL(nvmet_cq_put); 824 825 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 826 u16 qid, u16 size) 827 { 828 cq->qid = qid; 829 cq->size = size; 830 831 ctrl->cqs[qid] = cq; 832 } 833 834 void nvmet_cq_destroy(struct nvmet_cq *cq) 835 { 836 struct nvmet_ctrl *ctrl = cq->ctrl; 837 838 if (ctrl) { 839 ctrl->cqs[cq->qid] = NULL; 840 nvmet_ctrl_put(cq->ctrl); 841 cq->ctrl = NULL; 842 } 843 } 844 845 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 846 u16 qid, u16 size) 847 { 848 sq->sqhd = 0; 849 sq->qid = qid; 850 sq->size = size; 851 852 ctrl->sqs[qid] = sq; 853 } 854 855 static void nvmet_confirm_sq(struct percpu_ref *ref) 856 { 857 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 858 859 complete(&sq->confirm_done); 860 } 861 862 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 863 { 864 if (!ctrl->cqs) 865 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 866 867 if (cqid > ctrl->subsys->max_qid) 868 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 869 870 if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid])) 871 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 872 873 return NVME_SC_SUCCESS; 874 } 875 876 u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 877 { 878 if (!cqid) 879 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 880 return nvmet_check_cqid(ctrl, cqid, create); 881 } 882 883 bool nvmet_cq_in_use(struct nvmet_cq *cq) 884 { 885 return refcount_read(&cq->ref) > 1; 886 } 887 EXPORT_SYMBOL_GPL(nvmet_cq_in_use); 888 889 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 890 u16 qid, u16 size) 891 { 892 u16 status; 893 894 status = nvmet_check_cqid(ctrl, qid, true); 895 if (status != NVME_SC_SUCCESS) 896 return status; 897 898 if (!kref_get_unless_zero(&ctrl->ref)) 899 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 900 cq->ctrl = ctrl; 901 902 nvmet_cq_init(cq); 903 nvmet_cq_setup(ctrl, cq, qid, size); 904 905 return NVME_SC_SUCCESS; 906 } 907 EXPORT_SYMBOL_GPL(nvmet_cq_create); 908 909 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, 910 bool create) 911 { 912 if (!ctrl->sqs) 913 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 914 915 if (sqid > ctrl->subsys->max_qid) 916 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 917 918 if ((create && ctrl->sqs[sqid]) || 919 (!create && !ctrl->sqs[sqid])) 920 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 921 922 return NVME_SC_SUCCESS; 923 } 924 925 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 926 struct nvmet_cq *cq, u16 sqid, u16 size) 927 { 928 u16 status; 929 int ret; 930 931 if (!kref_get_unless_zero(&ctrl->ref)) 932 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 933 934 status = nvmet_check_sqid(ctrl, sqid, true); 935 if (status != NVME_SC_SUCCESS) 936 return status; 937 938 ret = nvmet_sq_init(sq, cq); 939 if (ret) { 940 status = NVME_SC_INTERNAL | NVME_STATUS_DNR; 941 goto ctrl_put; 942 } 943 944 nvmet_sq_setup(ctrl, sq, sqid, size); 945 sq->ctrl = ctrl; 946 947 return NVME_SC_SUCCESS; 948 949 ctrl_put: 950 nvmet_ctrl_put(ctrl); 951 return status; 952 } 953 EXPORT_SYMBOL_GPL(nvmet_sq_create); 954 955 void nvmet_sq_destroy(struct nvmet_sq *sq) 956 { 957 struct nvmet_ctrl *ctrl = sq->ctrl; 958 959 /* 960 * If this is the admin queue, complete all AERs so that our 961 * queue doesn't have outstanding requests on it. 962 */ 963 if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) 964 nvmet_async_events_failall(ctrl); 965 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 966 wait_for_completion(&sq->confirm_done); 967 wait_for_completion(&sq->free_done); 968 percpu_ref_exit(&sq->ref); 969 nvmet_auth_sq_free(sq); 970 nvmet_cq_put(sq->cq); 971 972 /* 973 * we must reference the ctrl again after waiting for inflight IO 974 * to complete. Because admin connect may have sneaked in after we 975 * store sq->ctrl locally, but before we killed the percpu_ref. the 976 * admin connect allocates and assigns sq->ctrl, which now needs a 977 * final ref put, as this ctrl is going away. 978 */ 979 ctrl = sq->ctrl; 980 981 if (ctrl) { 982 /* 983 * The teardown flow may take some time, and the host may not 984 * send us keep-alive during this period, hence reset the 985 * traffic based keep-alive timer so we don't trigger a 986 * controller teardown as a result of a keep-alive expiration. 987 */ 988 ctrl->reset_tbkas = true; 989 sq->ctrl->sqs[sq->qid] = NULL; 990 nvmet_ctrl_put(ctrl); 991 sq->ctrl = NULL; /* allows reusing the queue later */ 992 } 993 } 994 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 995 996 static void nvmet_sq_free(struct percpu_ref *ref) 997 { 998 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 999 1000 complete(&sq->free_done); 1001 } 1002 1003 int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq) 1004 { 1005 int ret; 1006 1007 if (!nvmet_cq_get(cq)) 1008 return -EINVAL; 1009 1010 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 1011 if (ret) { 1012 pr_err("percpu_ref init failed!\n"); 1013 nvmet_cq_put(cq); 1014 return ret; 1015 } 1016 init_completion(&sq->free_done); 1017 init_completion(&sq->confirm_done); 1018 nvmet_auth_sq_init(sq); 1019 sq->cq = cq; 1020 1021 return 0; 1022 } 1023 EXPORT_SYMBOL_GPL(nvmet_sq_init); 1024 1025 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 1026 struct nvmet_ns *ns) 1027 { 1028 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 1029 1030 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 1031 return NVME_SC_ANA_INACCESSIBLE; 1032 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 1033 return NVME_SC_ANA_PERSISTENT_LOSS; 1034 if (unlikely(state == NVME_ANA_CHANGE)) 1035 return NVME_SC_ANA_TRANSITION; 1036 return 0; 1037 } 1038 1039 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 1040 { 1041 if (unlikely(req->ns->readonly)) { 1042 switch (req->cmd->common.opcode) { 1043 case nvme_cmd_read: 1044 case nvme_cmd_flush: 1045 break; 1046 default: 1047 return NVME_SC_NS_WRITE_PROTECTED; 1048 } 1049 } 1050 1051 return 0; 1052 } 1053 1054 static u32 nvmet_io_cmd_transfer_len(struct nvmet_req *req) 1055 { 1056 struct nvme_command *cmd = req->cmd; 1057 u32 metadata_len = 0; 1058 1059 if (nvme_is_fabrics(cmd)) 1060 return nvmet_fabrics_io_cmd_data_len(req); 1061 1062 if (!req->ns) 1063 return 0; 1064 1065 switch (req->cmd->common.opcode) { 1066 case nvme_cmd_read: 1067 case nvme_cmd_write: 1068 case nvme_cmd_zone_append: 1069 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 1070 metadata_len = nvmet_rw_metadata_len(req); 1071 return nvmet_rw_data_len(req) + metadata_len; 1072 case nvme_cmd_dsm: 1073 return nvmet_dsm_len(req); 1074 case nvme_cmd_zone_mgmt_recv: 1075 return (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; 1076 default: 1077 return 0; 1078 } 1079 } 1080 1081 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 1082 { 1083 struct nvme_command *cmd = req->cmd; 1084 u16 ret; 1085 1086 if (nvme_is_fabrics(cmd)) 1087 return nvmet_parse_fabrics_io_cmd(req); 1088 1089 if (unlikely(!nvmet_check_auth_status(req))) 1090 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1091 1092 ret = nvmet_check_ctrl_status(req); 1093 if (unlikely(ret)) 1094 return ret; 1095 1096 if (nvmet_is_passthru_req(req)) 1097 return nvmet_parse_passthru_io_cmd(req); 1098 1099 ret = nvmet_req_find_ns(req); 1100 if (unlikely(ret)) 1101 return ret; 1102 1103 ret = nvmet_check_ana_state(req->port, req->ns); 1104 if (unlikely(ret)) { 1105 req->error_loc = offsetof(struct nvme_common_command, nsid); 1106 return ret; 1107 } 1108 ret = nvmet_io_cmd_check_access(req); 1109 if (unlikely(ret)) { 1110 req->error_loc = offsetof(struct nvme_common_command, nsid); 1111 return ret; 1112 } 1113 1114 if (req->ns->pr.enable) { 1115 ret = nvmet_parse_pr_cmd(req); 1116 if (!ret) 1117 return ret; 1118 } 1119 1120 switch (req->ns->csi) { 1121 case NVME_CSI_NVM: 1122 if (req->ns->file) 1123 ret = nvmet_file_parse_io_cmd(req); 1124 else 1125 ret = nvmet_bdev_parse_io_cmd(req); 1126 break; 1127 case NVME_CSI_ZNS: 1128 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) 1129 ret = nvmet_bdev_zns_parse_io_cmd(req); 1130 else 1131 ret = NVME_SC_INVALID_IO_CMD_SET; 1132 break; 1133 default: 1134 ret = NVME_SC_INVALID_IO_CMD_SET; 1135 } 1136 if (ret) 1137 return ret; 1138 1139 if (req->ns->pr.enable) { 1140 ret = nvmet_pr_check_cmd_access(req); 1141 if (ret) 1142 return ret; 1143 1144 ret = nvmet_pr_get_ns_pc_ref(req); 1145 } 1146 return ret; 1147 } 1148 1149 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, 1150 const struct nvmet_fabrics_ops *ops) 1151 { 1152 u8 flags = req->cmd->common.flags; 1153 u16 status; 1154 1155 req->cq = sq->cq; 1156 req->sq = sq; 1157 req->ops = ops; 1158 req->sg = NULL; 1159 req->metadata_sg = NULL; 1160 req->sg_cnt = 0; 1161 req->metadata_sg_cnt = 0; 1162 req->transfer_len = 0; 1163 req->metadata_len = 0; 1164 req->cqe->result.u64 = 0; 1165 req->cqe->status = 0; 1166 req->cqe->sq_head = 0; 1167 req->ns = NULL; 1168 req->error_loc = NVMET_NO_ERROR_LOC; 1169 req->error_slba = 0; 1170 req->pc_ref = NULL; 1171 1172 /* no support for fused commands yet */ 1173 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 1174 req->error_loc = offsetof(struct nvme_common_command, flags); 1175 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1176 goto fail; 1177 } 1178 1179 /* 1180 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 1181 * contains an address of a single contiguous physical buffer that is 1182 * byte aligned. For PCI controllers, this is optional so not enforced. 1183 */ 1184 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 1185 if (!req->sq->ctrl || !nvmet_is_pci_ctrl(req->sq->ctrl)) { 1186 req->error_loc = 1187 offsetof(struct nvme_common_command, flags); 1188 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1189 goto fail; 1190 } 1191 } 1192 1193 if (unlikely(!req->sq->ctrl)) 1194 /* will return an error for any non-connect command: */ 1195 status = nvmet_parse_connect_cmd(req); 1196 else if (likely(req->sq->qid != 0)) 1197 status = nvmet_parse_io_cmd(req); 1198 else 1199 status = nvmet_parse_admin_cmd(req); 1200 1201 if (status) 1202 goto fail; 1203 1204 trace_nvmet_req_init(req, req->cmd); 1205 1206 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 1207 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1208 goto fail; 1209 } 1210 1211 if (sq->ctrl) 1212 sq->ctrl->reset_tbkas = true; 1213 1214 return true; 1215 1216 fail: 1217 __nvmet_req_complete(req, status); 1218 return false; 1219 } 1220 EXPORT_SYMBOL_GPL(nvmet_req_init); 1221 1222 void nvmet_req_uninit(struct nvmet_req *req) 1223 { 1224 percpu_ref_put(&req->sq->ref); 1225 if (req->pc_ref) 1226 nvmet_pr_put_ns_pc_ref(req->pc_ref); 1227 if (req->ns) 1228 nvmet_put_namespace(req->ns); 1229 } 1230 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 1231 1232 size_t nvmet_req_transfer_len(struct nvmet_req *req) 1233 { 1234 if (likely(req->sq->qid != 0)) 1235 return nvmet_io_cmd_transfer_len(req); 1236 if (unlikely(!req->sq->ctrl)) 1237 return nvmet_connect_cmd_data_len(req); 1238 return nvmet_admin_cmd_data_len(req); 1239 } 1240 EXPORT_SYMBOL_GPL(nvmet_req_transfer_len); 1241 1242 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) 1243 { 1244 if (unlikely(len != req->transfer_len)) { 1245 u16 status; 1246 1247 req->error_loc = offsetof(struct nvme_common_command, dptr); 1248 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1249 status = NVME_SC_SGL_INVALID_DATA; 1250 else 1251 status = NVME_SC_INVALID_FIELD; 1252 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1253 return false; 1254 } 1255 1256 return true; 1257 } 1258 EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); 1259 1260 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) 1261 { 1262 if (unlikely(data_len > req->transfer_len)) { 1263 u16 status; 1264 1265 req->error_loc = offsetof(struct nvme_common_command, dptr); 1266 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1267 status = NVME_SC_SGL_INVALID_DATA; 1268 else 1269 status = NVME_SC_INVALID_FIELD; 1270 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1271 return false; 1272 } 1273 1274 return true; 1275 } 1276 1277 static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) 1278 { 1279 return req->transfer_len - req->metadata_len; 1280 } 1281 1282 static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev, 1283 struct nvmet_req *req) 1284 { 1285 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 1286 nvmet_data_transfer_len(req)); 1287 if (!req->sg) 1288 goto out_err; 1289 1290 if (req->metadata_len) { 1291 req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev, 1292 &req->metadata_sg_cnt, req->metadata_len); 1293 if (!req->metadata_sg) 1294 goto out_free_sg; 1295 } 1296 1297 req->p2p_dev = p2p_dev; 1298 1299 return 0; 1300 out_free_sg: 1301 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1302 out_err: 1303 return -ENOMEM; 1304 } 1305 1306 static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req) 1307 { 1308 if (!IS_ENABLED(CONFIG_PCI_P2PDMA) || 1309 !req->sq->ctrl || !req->sq->qid || !req->ns) 1310 return NULL; 1311 return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid); 1312 } 1313 1314 int nvmet_req_alloc_sgls(struct nvmet_req *req) 1315 { 1316 struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req); 1317 1318 if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req)) 1319 return 0; 1320 1321 req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, 1322 &req->sg_cnt); 1323 if (unlikely(!req->sg)) 1324 goto out; 1325 1326 if (req->metadata_len) { 1327 req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, 1328 &req->metadata_sg_cnt); 1329 if (unlikely(!req->metadata_sg)) 1330 goto out_free; 1331 } 1332 1333 return 0; 1334 out_free: 1335 sgl_free(req->sg); 1336 out: 1337 return -ENOMEM; 1338 } 1339 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); 1340 1341 void nvmet_req_free_sgls(struct nvmet_req *req) 1342 { 1343 if (req->p2p_dev) { 1344 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1345 if (req->metadata_sg) 1346 pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); 1347 req->p2p_dev = NULL; 1348 } else { 1349 sgl_free(req->sg); 1350 if (req->metadata_sg) 1351 sgl_free(req->metadata_sg); 1352 } 1353 1354 req->sg = NULL; 1355 req->metadata_sg = NULL; 1356 req->sg_cnt = 0; 1357 req->metadata_sg_cnt = 0; 1358 } 1359 EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); 1360 1361 static inline bool nvmet_css_supported(u8 cc_css) 1362 { 1363 switch (cc_css << NVME_CC_CSS_SHIFT) { 1364 case NVME_CC_CSS_NVM: 1365 case NVME_CC_CSS_CSI: 1366 return true; 1367 default: 1368 return false; 1369 } 1370 } 1371 1372 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 1373 { 1374 lockdep_assert_held(&ctrl->lock); 1375 1376 /* 1377 * Only I/O controllers should verify iosqes,iocqes. 1378 * Strictly speaking, the spec says a discovery controller 1379 * should verify iosqes,iocqes are zeroed, however that 1380 * would break backwards compatibility, so don't enforce it. 1381 */ 1382 if (!nvmet_is_disc_subsys(ctrl->subsys) && 1383 (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1384 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { 1385 ctrl->csts = NVME_CSTS_CFS; 1386 return; 1387 } 1388 1389 if (nvmet_cc_mps(ctrl->cc) != 0 || 1390 nvmet_cc_ams(ctrl->cc) != 0 || 1391 !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) { 1392 ctrl->csts = NVME_CSTS_CFS; 1393 return; 1394 } 1395 1396 ctrl->csts = NVME_CSTS_RDY; 1397 1398 /* 1399 * Controllers that are not yet enabled should not really enforce the 1400 * keep alive timeout, but we still want to track a timeout and cleanup 1401 * in case a host died before it enabled the controller. Hence, simply 1402 * reset the keep alive timer when the controller is enabled. 1403 */ 1404 if (ctrl->kato) 1405 mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 1406 } 1407 1408 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1409 { 1410 lockdep_assert_held(&ctrl->lock); 1411 1412 /* XXX: tear down queues? */ 1413 ctrl->csts &= ~NVME_CSTS_RDY; 1414 ctrl->cc = 0; 1415 } 1416 1417 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1418 { 1419 u32 old; 1420 1421 mutex_lock(&ctrl->lock); 1422 old = ctrl->cc; 1423 ctrl->cc = new; 1424 1425 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1426 nvmet_start_ctrl(ctrl); 1427 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1428 nvmet_clear_ctrl(ctrl); 1429 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1430 nvmet_clear_ctrl(ctrl); 1431 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1432 } 1433 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1434 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1435 mutex_unlock(&ctrl->lock); 1436 } 1437 EXPORT_SYMBOL_GPL(nvmet_update_cc); 1438 1439 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1440 { 1441 /* command sets supported: NVMe command set: */ 1442 ctrl->cap = (1ULL << 37); 1443 /* Controller supports one or more I/O Command Sets */ 1444 ctrl->cap |= (1ULL << 43); 1445 /* CC.EN timeout in 500msec units: */ 1446 ctrl->cap |= (15ULL << 24); 1447 /* maximum queue entries supported: */ 1448 if (ctrl->ops->get_max_queue_size) 1449 ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl), 1450 ctrl->port->max_queue_size) - 1; 1451 else 1452 ctrl->cap |= ctrl->port->max_queue_size - 1; 1453 1454 if (nvmet_is_passthru_subsys(ctrl->subsys)) 1455 nvmet_passthrough_override_cap(ctrl); 1456 } 1457 1458 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, 1459 const char *hostnqn, u16 cntlid, 1460 struct nvmet_req *req) 1461 { 1462 struct nvmet_ctrl *ctrl = NULL; 1463 struct nvmet_subsys *subsys; 1464 1465 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1466 if (!subsys) { 1467 pr_warn("connect request for invalid subsystem %s!\n", 1468 subsysnqn); 1469 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1470 goto out; 1471 } 1472 1473 mutex_lock(&subsys->lock); 1474 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1475 if (ctrl->cntlid == cntlid) { 1476 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1477 pr_warn("hostnqn mismatch.\n"); 1478 continue; 1479 } 1480 if (!kref_get_unless_zero(&ctrl->ref)) 1481 continue; 1482 1483 /* ctrl found */ 1484 goto found; 1485 } 1486 } 1487 1488 ctrl = NULL; /* ctrl not found */ 1489 pr_warn("could not find controller %d for subsys %s / host %s\n", 1490 cntlid, subsysnqn, hostnqn); 1491 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1492 1493 found: 1494 mutex_unlock(&subsys->lock); 1495 nvmet_subsys_put(subsys); 1496 out: 1497 return ctrl; 1498 } 1499 1500 u16 nvmet_check_ctrl_status(struct nvmet_req *req) 1501 { 1502 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1503 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1504 req->cmd->common.opcode, req->sq->qid); 1505 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1506 } 1507 1508 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1509 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1510 req->cmd->common.opcode, req->sq->qid); 1511 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1512 } 1513 1514 if (unlikely(!nvmet_check_auth_status(req))) { 1515 pr_warn("qid %d not authenticated\n", req->sq->qid); 1516 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1517 } 1518 return 0; 1519 } 1520 1521 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1522 { 1523 struct nvmet_host_link *p; 1524 1525 lockdep_assert_held(&nvmet_config_sem); 1526 1527 if (subsys->allow_any_host) 1528 return true; 1529 1530 if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */ 1531 return true; 1532 1533 list_for_each_entry(p, &subsys->hosts, entry) { 1534 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1535 return true; 1536 } 1537 1538 return false; 1539 } 1540 1541 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1542 struct device *p2p_client) 1543 { 1544 struct nvmet_ns *ns; 1545 unsigned long idx; 1546 1547 lockdep_assert_held(&ctrl->subsys->lock); 1548 1549 if (!p2p_client) 1550 return; 1551 1552 ctrl->p2p_client = get_device(p2p_client); 1553 1554 nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) 1555 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1556 } 1557 1558 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1559 { 1560 struct radix_tree_iter iter; 1561 void __rcu **slot; 1562 1563 lockdep_assert_held(&ctrl->subsys->lock); 1564 1565 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1566 pci_dev_put(radix_tree_deref_slot(slot)); 1567 1568 put_device(ctrl->p2p_client); 1569 } 1570 1571 static void nvmet_fatal_error_handler(struct work_struct *work) 1572 { 1573 struct nvmet_ctrl *ctrl = 1574 container_of(work, struct nvmet_ctrl, fatal_err_work); 1575 1576 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1577 ctrl->ops->delete_ctrl(ctrl); 1578 } 1579 1580 struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) 1581 { 1582 struct nvmet_subsys *subsys; 1583 struct nvmet_ctrl *ctrl; 1584 u32 kato = args->kato; 1585 u8 dhchap_status; 1586 int ret; 1587 1588 args->status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR; 1589 subsys = nvmet_find_get_subsys(args->port, args->subsysnqn); 1590 if (!subsys) { 1591 pr_warn("connect request for invalid subsystem %s!\n", 1592 args->subsysnqn); 1593 args->result = IPO_IATTR_CONNECT_DATA(subsysnqn); 1594 args->error_loc = offsetof(struct nvme_common_command, dptr); 1595 return NULL; 1596 } 1597 1598 down_read(&nvmet_config_sem); 1599 if (!nvmet_host_allowed(subsys, args->hostnqn)) { 1600 pr_info("connect by host %s for subsystem %s not allowed\n", 1601 args->hostnqn, args->subsysnqn); 1602 args->result = IPO_IATTR_CONNECT_DATA(hostnqn); 1603 up_read(&nvmet_config_sem); 1604 args->status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1605 args->error_loc = offsetof(struct nvme_common_command, dptr); 1606 goto out_put_subsystem; 1607 } 1608 up_read(&nvmet_config_sem); 1609 1610 args->status = NVME_SC_INTERNAL; 1611 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 1612 if (!ctrl) 1613 goto out_put_subsystem; 1614 mutex_init(&ctrl->lock); 1615 1616 ctrl->port = args->port; 1617 ctrl->ops = args->ops; 1618 1619 #ifdef CONFIG_NVME_TARGET_PASSTHRU 1620 /* By default, set loop targets to clear IDS by default */ 1621 if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP) 1622 subsys->clear_ids = 1; 1623 #endif 1624 1625 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1626 INIT_LIST_HEAD(&ctrl->async_events); 1627 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1628 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1629 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 1630 1631 memcpy(ctrl->subsysnqn, args->subsysnqn, NVMF_NQN_SIZE); 1632 memcpy(ctrl->hostnqn, args->hostnqn, NVMF_NQN_SIZE); 1633 1634 kref_init(&ctrl->ref); 1635 ctrl->subsys = subsys; 1636 ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; 1637 nvmet_init_cap(ctrl); 1638 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1639 1640 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1641 sizeof(__le32), GFP_KERNEL); 1642 if (!ctrl->changed_ns_list) 1643 goto out_free_ctrl; 1644 1645 ctrl->sqs = kcalloc(subsys->max_qid + 1, 1646 sizeof(struct nvmet_sq *), 1647 GFP_KERNEL); 1648 if (!ctrl->sqs) 1649 goto out_free_changed_ns_list; 1650 1651 ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *), 1652 GFP_KERNEL); 1653 if (!ctrl->cqs) 1654 goto out_free_sqs; 1655 1656 ret = ida_alloc_range(&cntlid_ida, 1657 subsys->cntlid_min, subsys->cntlid_max, 1658 GFP_KERNEL); 1659 if (ret < 0) { 1660 args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 1661 goto out_free_cqs; 1662 } 1663 ctrl->cntlid = ret; 1664 1665 /* 1666 * Discovery controllers may use some arbitrary high value 1667 * in order to cleanup stale discovery sessions 1668 */ 1669 if (nvmet_is_disc_subsys(ctrl->subsys) && !kato) 1670 kato = NVMET_DISC_KATO_MS; 1671 1672 /* keep-alive timeout in seconds */ 1673 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1674 1675 ctrl->err_counter = 0; 1676 spin_lock_init(&ctrl->error_lock); 1677 1678 nvmet_start_keep_alive_timer(ctrl); 1679 1680 mutex_lock(&subsys->lock); 1681 ret = nvmet_ctrl_init_pr(ctrl); 1682 if (ret) 1683 goto init_pr_fail; 1684 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1685 nvmet_setup_p2p_ns_map(ctrl, args->p2p_client); 1686 nvmet_debugfs_ctrl_setup(ctrl); 1687 mutex_unlock(&subsys->lock); 1688 1689 if (args->hostid) 1690 uuid_copy(&ctrl->hostid, args->hostid); 1691 1692 dhchap_status = nvmet_setup_auth(ctrl, args->sq); 1693 if (dhchap_status) { 1694 pr_err("Failed to setup authentication, dhchap status %u\n", 1695 dhchap_status); 1696 nvmet_ctrl_put(ctrl); 1697 if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED) 1698 args->status = 1699 NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1700 else 1701 args->status = NVME_SC_INTERNAL; 1702 return NULL; 1703 } 1704 1705 args->status = NVME_SC_SUCCESS; 1706 1707 pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n", 1708 nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", 1709 ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, 1710 ctrl->pi_support ? " T10-PI is enabled" : "", 1711 nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "", 1712 nvmet_queue_tls_keyid(args->sq) ? ", TLS" : ""); 1713 1714 return ctrl; 1715 1716 init_pr_fail: 1717 mutex_unlock(&subsys->lock); 1718 nvmet_stop_keep_alive_timer(ctrl); 1719 ida_free(&cntlid_ida, ctrl->cntlid); 1720 out_free_cqs: 1721 kfree(ctrl->cqs); 1722 out_free_sqs: 1723 kfree(ctrl->sqs); 1724 out_free_changed_ns_list: 1725 kfree(ctrl->changed_ns_list); 1726 out_free_ctrl: 1727 kfree(ctrl); 1728 out_put_subsystem: 1729 nvmet_subsys_put(subsys); 1730 return NULL; 1731 } 1732 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl); 1733 1734 static void nvmet_ctrl_free(struct kref *ref) 1735 { 1736 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1737 struct nvmet_subsys *subsys = ctrl->subsys; 1738 1739 mutex_lock(&subsys->lock); 1740 nvmet_ctrl_destroy_pr(ctrl); 1741 nvmet_release_p2p_ns_map(ctrl); 1742 list_del(&ctrl->subsys_entry); 1743 mutex_unlock(&subsys->lock); 1744 1745 nvmet_stop_keep_alive_timer(ctrl); 1746 1747 flush_work(&ctrl->async_event_work); 1748 cancel_work_sync(&ctrl->fatal_err_work); 1749 1750 nvmet_destroy_auth(ctrl); 1751 1752 nvmet_debugfs_ctrl_free(ctrl); 1753 1754 ida_free(&cntlid_ida, ctrl->cntlid); 1755 1756 nvmet_async_events_free(ctrl); 1757 kfree(ctrl->sqs); 1758 kfree(ctrl->cqs); 1759 kfree(ctrl->changed_ns_list); 1760 kfree(ctrl); 1761 1762 nvmet_subsys_put(subsys); 1763 } 1764 1765 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1766 { 1767 kref_put(&ctrl->ref, nvmet_ctrl_free); 1768 } 1769 EXPORT_SYMBOL_GPL(nvmet_ctrl_put); 1770 1771 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1772 { 1773 mutex_lock(&ctrl->lock); 1774 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1775 ctrl->csts |= NVME_CSTS_CFS; 1776 queue_work(nvmet_wq, &ctrl->fatal_err_work); 1777 } 1778 mutex_unlock(&ctrl->lock); 1779 } 1780 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1781 1782 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl, 1783 char *traddr, size_t traddr_len) 1784 { 1785 if (!ctrl->ops->host_traddr) 1786 return -EOPNOTSUPP; 1787 return ctrl->ops->host_traddr(ctrl, traddr, traddr_len); 1788 } 1789 1790 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1791 const char *subsysnqn) 1792 { 1793 struct nvmet_subsys_link *p; 1794 1795 if (!port) 1796 return NULL; 1797 1798 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1799 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1800 return NULL; 1801 return nvmet_disc_subsys; 1802 } 1803 1804 down_read(&nvmet_config_sem); 1805 if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn, 1806 NVMF_NQN_SIZE)) { 1807 if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) { 1808 up_read(&nvmet_config_sem); 1809 return nvmet_disc_subsys; 1810 } 1811 } 1812 list_for_each_entry(p, &port->subsystems, entry) { 1813 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1814 NVMF_NQN_SIZE)) { 1815 if (!kref_get_unless_zero(&p->subsys->ref)) 1816 break; 1817 up_read(&nvmet_config_sem); 1818 return p->subsys; 1819 } 1820 } 1821 up_read(&nvmet_config_sem); 1822 return NULL; 1823 } 1824 1825 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1826 enum nvme_subsys_type type) 1827 { 1828 struct nvmet_subsys *subsys; 1829 char serial[NVMET_SN_MAX_SIZE / 2]; 1830 int ret; 1831 1832 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1833 if (!subsys) 1834 return ERR_PTR(-ENOMEM); 1835 1836 subsys->ver = NVMET_DEFAULT_VS; 1837 /* generate a random serial number as our controllers are ephemeral: */ 1838 get_random_bytes(&serial, sizeof(serial)); 1839 bin2hex(subsys->serial, &serial, sizeof(serial)); 1840 1841 subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); 1842 if (!subsys->model_number) { 1843 ret = -ENOMEM; 1844 goto free_subsys; 1845 } 1846 1847 subsys->ieee_oui = 0; 1848 1849 subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL); 1850 if (!subsys->firmware_rev) { 1851 ret = -ENOMEM; 1852 goto free_mn; 1853 } 1854 1855 switch (type) { 1856 case NVME_NQN_NVME: 1857 subsys->max_qid = NVMET_NR_QUEUES; 1858 break; 1859 case NVME_NQN_DISC: 1860 case NVME_NQN_CURR: 1861 subsys->max_qid = 0; 1862 break; 1863 default: 1864 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1865 ret = -EINVAL; 1866 goto free_fr; 1867 } 1868 subsys->type = type; 1869 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1870 GFP_KERNEL); 1871 if (!subsys->subsysnqn) { 1872 ret = -ENOMEM; 1873 goto free_fr; 1874 } 1875 subsys->cntlid_min = NVME_CNTLID_MIN; 1876 subsys->cntlid_max = NVME_CNTLID_MAX; 1877 kref_init(&subsys->ref); 1878 1879 mutex_init(&subsys->lock); 1880 xa_init(&subsys->namespaces); 1881 INIT_LIST_HEAD(&subsys->ctrls); 1882 INIT_LIST_HEAD(&subsys->hosts); 1883 1884 ret = nvmet_debugfs_subsys_setup(subsys); 1885 if (ret) 1886 goto free_subsysnqn; 1887 1888 return subsys; 1889 1890 free_subsysnqn: 1891 kfree(subsys->subsysnqn); 1892 free_fr: 1893 kfree(subsys->firmware_rev); 1894 free_mn: 1895 kfree(subsys->model_number); 1896 free_subsys: 1897 kfree(subsys); 1898 return ERR_PTR(ret); 1899 } 1900 1901 static void nvmet_subsys_free(struct kref *ref) 1902 { 1903 struct nvmet_subsys *subsys = 1904 container_of(ref, struct nvmet_subsys, ref); 1905 1906 WARN_ON_ONCE(!xa_empty(&subsys->namespaces)); 1907 1908 nvmet_debugfs_subsys_free(subsys); 1909 1910 xa_destroy(&subsys->namespaces); 1911 nvmet_passthru_subsys_free(subsys); 1912 1913 kfree(subsys->subsysnqn); 1914 kfree(subsys->model_number); 1915 kfree(subsys->firmware_rev); 1916 kfree(subsys); 1917 } 1918 1919 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1920 { 1921 struct nvmet_ctrl *ctrl; 1922 1923 mutex_lock(&subsys->lock); 1924 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1925 ctrl->ops->delete_ctrl(ctrl); 1926 mutex_unlock(&subsys->lock); 1927 } 1928 1929 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1930 { 1931 kref_put(&subsys->ref, nvmet_subsys_free); 1932 } 1933 1934 static int __init nvmet_init(void) 1935 { 1936 int error = -ENOMEM; 1937 1938 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1939 1940 nvmet_bvec_cache = kmem_cache_create("nvmet-bvec", 1941 NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, 1942 SLAB_HWCACHE_ALIGN, NULL); 1943 if (!nvmet_bvec_cache) 1944 return -ENOMEM; 1945 1946 zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); 1947 if (!zbd_wq) 1948 goto out_destroy_bvec_cache; 1949 1950 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1951 WQ_MEM_RECLAIM, 0); 1952 if (!buffered_io_wq) 1953 goto out_free_zbd_work_queue; 1954 1955 nvmet_wq = alloc_workqueue("nvmet-wq", 1956 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); 1957 if (!nvmet_wq) 1958 goto out_free_buffered_work_queue; 1959 1960 error = nvmet_init_debugfs(); 1961 if (error) 1962 goto out_free_nvmet_work_queue; 1963 1964 error = nvmet_init_discovery(); 1965 if (error) 1966 goto out_exit_debugfs; 1967 1968 error = nvmet_init_configfs(); 1969 if (error) 1970 goto out_exit_discovery; 1971 1972 return 0; 1973 1974 out_exit_discovery: 1975 nvmet_exit_discovery(); 1976 out_exit_debugfs: 1977 nvmet_exit_debugfs(); 1978 out_free_nvmet_work_queue: 1979 destroy_workqueue(nvmet_wq); 1980 out_free_buffered_work_queue: 1981 destroy_workqueue(buffered_io_wq); 1982 out_free_zbd_work_queue: 1983 destroy_workqueue(zbd_wq); 1984 out_destroy_bvec_cache: 1985 kmem_cache_destroy(nvmet_bvec_cache); 1986 return error; 1987 } 1988 1989 static void __exit nvmet_exit(void) 1990 { 1991 nvmet_exit_configfs(); 1992 nvmet_exit_discovery(); 1993 nvmet_exit_debugfs(); 1994 ida_destroy(&cntlid_ida); 1995 destroy_workqueue(nvmet_wq); 1996 destroy_workqueue(buffered_io_wq); 1997 destroy_workqueue(zbd_wq); 1998 kmem_cache_destroy(nvmet_bvec_cache); 1999 2000 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 2001 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 2002 } 2003 2004 module_init(nvmet_init); 2005 module_exit(nvmet_exit); 2006 2007 MODULE_DESCRIPTION("NVMe target core framework"); 2008 MODULE_LICENSE("GPL v2"); 2009