1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/hex.h> 8 #include <linux/module.h> 9 #include <linux/random.h> 10 #include <linux/rculist.h> 11 #include <linux/pci-p2pdma.h> 12 #include <linux/scatterlist.h> 13 14 #include <generated/utsrelease.h> 15 16 #define CREATE_TRACE_POINTS 17 #include "trace.h" 18 19 #include "nvmet.h" 20 #include "debugfs.h" 21 22 struct kmem_cache *nvmet_bvec_cache; 23 struct workqueue_struct *buffered_io_wq; 24 struct workqueue_struct *zbd_wq; 25 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 26 static DEFINE_IDA(cntlid_ida); 27 28 struct workqueue_struct *nvmet_wq; 29 EXPORT_SYMBOL_GPL(nvmet_wq); 30 struct workqueue_struct *nvmet_aen_wq; 31 EXPORT_SYMBOL_GPL(nvmet_aen_wq); 32 33 /* 34 * This read/write semaphore is used to synchronize access to configuration 35 * information on a target system that will result in discovery log page 36 * information change for at least one host. 37 * The full list of resources to protected by this semaphore is: 38 * 39 * - subsystems list 40 * - per-subsystem allowed hosts list 41 * - allow_any_host subsystem attribute 42 * - nvmet_genctr 43 * - the nvmet_transports array 44 * 45 * When updating any of those lists/structures write lock should be obtained, 46 * while when reading (populating discovery log page or checking host-subsystem 47 * link) read lock is obtained to allow concurrent reads. 48 */ 49 DECLARE_RWSEM(nvmet_config_sem); 50 51 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 52 u64 nvmet_ana_chgcnt; 53 DECLARE_RWSEM(nvmet_ana_sem); 54 55 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 56 { 57 switch (errno) { 58 case 0: 59 return NVME_SC_SUCCESS; 60 case -ENOSPC: 61 req->error_loc = offsetof(struct nvme_rw_command, length); 62 return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR; 63 case -EREMOTEIO: 64 req->error_loc = offsetof(struct nvme_rw_command, slba); 65 return NVME_SC_LBA_RANGE | NVME_STATUS_DNR; 66 case -EOPNOTSUPP: 67 req->error_loc = offsetof(struct nvme_common_command, opcode); 68 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 69 case -ENODATA: 70 req->error_loc = offsetof(struct nvme_rw_command, nsid); 71 return NVME_SC_ACCESS_DENIED; 72 case -EIO: 73 fallthrough; 74 default: 75 req->error_loc = offsetof(struct nvme_common_command, opcode); 76 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 77 } 78 } 79 80 u16 nvmet_report_invalid_opcode(struct nvmet_req *req) 81 { 82 pr_debug("unhandled cmd %d on qid %d\n", req->cmd->common.opcode, 83 req->sq->qid); 84 85 req->error_loc = offsetof(struct nvme_common_command, opcode); 86 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 87 } 88 89 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 90 const char *subsysnqn); 91 92 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 93 size_t len) 94 { 95 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 96 req->error_loc = offsetof(struct nvme_common_command, dptr); 97 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 98 } 99 return 0; 100 } 101 102 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 103 { 104 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 105 req->error_loc = offsetof(struct nvme_common_command, dptr); 106 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 107 } 108 return 0; 109 } 110 111 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 112 { 113 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 114 req->error_loc = offsetof(struct nvme_common_command, dptr); 115 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 116 } 117 return 0; 118 } 119 120 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys) 121 { 122 struct nvmet_ns *cur; 123 unsigned long idx; 124 u32 nsid = 0; 125 126 nvmet_for_each_enabled_ns(&subsys->namespaces, idx, cur) 127 nsid = cur->nsid; 128 129 return nsid; 130 } 131 132 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 133 { 134 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 135 } 136 137 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) 138 { 139 struct nvmet_req *req; 140 141 mutex_lock(&ctrl->lock); 142 while (ctrl->nr_async_event_cmds) { 143 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 144 mutex_unlock(&ctrl->lock); 145 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR); 146 mutex_lock(&ctrl->lock); 147 } 148 mutex_unlock(&ctrl->lock); 149 } 150 151 static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) 152 { 153 struct nvmet_async_event *aen; 154 struct nvmet_req *req; 155 156 mutex_lock(&ctrl->lock); 157 while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { 158 aen = list_first_entry(&ctrl->async_events, 159 struct nvmet_async_event, entry); 160 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 161 nvmet_set_result(req, nvmet_async_event_result(aen)); 162 163 list_del(&aen->entry); 164 kfree(aen); 165 166 mutex_unlock(&ctrl->lock); 167 trace_nvmet_async_event(ctrl, req->cqe->result.u32); 168 nvmet_req_complete(req, 0); 169 mutex_lock(&ctrl->lock); 170 } 171 mutex_unlock(&ctrl->lock); 172 } 173 174 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 175 { 176 struct nvmet_async_event *aen, *tmp; 177 178 mutex_lock(&ctrl->lock); 179 list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { 180 list_del(&aen->entry); 181 kfree(aen); 182 } 183 mutex_unlock(&ctrl->lock); 184 } 185 186 static void nvmet_async_event_work(struct work_struct *work) 187 { 188 struct nvmet_ctrl *ctrl = 189 container_of(work, struct nvmet_ctrl, async_event_work); 190 191 nvmet_async_events_process(ctrl); 192 } 193 194 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 195 u8 event_info, u8 log_page) 196 { 197 struct nvmet_async_event *aen; 198 199 aen = kmalloc_obj(*aen); 200 if (!aen) 201 return; 202 203 aen->event_type = event_type; 204 aen->event_info = event_info; 205 aen->log_page = log_page; 206 207 mutex_lock(&ctrl->lock); 208 list_add_tail(&aen->entry, &ctrl->async_events); 209 mutex_unlock(&ctrl->lock); 210 211 queue_work(nvmet_aen_wq, &ctrl->async_event_work); 212 } 213 214 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 215 { 216 u32 i; 217 218 mutex_lock(&ctrl->lock); 219 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 220 goto out_unlock; 221 222 for (i = 0; i < ctrl->nr_changed_ns; i++) { 223 if (ctrl->changed_ns_list[i] == nsid) 224 goto out_unlock; 225 } 226 227 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 228 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 229 ctrl->nr_changed_ns = U32_MAX; 230 goto out_unlock; 231 } 232 233 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 234 out_unlock: 235 mutex_unlock(&ctrl->lock); 236 } 237 238 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 239 { 240 struct nvmet_ctrl *ctrl; 241 242 lockdep_assert_held(&subsys->lock); 243 244 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 245 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 246 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 247 continue; 248 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 249 NVME_AER_NOTICE_NS_CHANGED, 250 NVME_LOG_CHANGED_NS); 251 } 252 } 253 254 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 255 struct nvmet_port *port) 256 { 257 struct nvmet_ctrl *ctrl; 258 259 mutex_lock(&subsys->lock); 260 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 261 if (port && ctrl->port != port) 262 continue; 263 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 264 continue; 265 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 266 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 267 } 268 mutex_unlock(&subsys->lock); 269 } 270 271 void nvmet_port_send_ana_event(struct nvmet_port *port) 272 { 273 struct nvmet_subsys_link *p; 274 275 down_read(&nvmet_config_sem); 276 list_for_each_entry(p, &port->subsystems, entry) 277 nvmet_send_ana_event(p->subsys, port); 278 up_read(&nvmet_config_sem); 279 } 280 281 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 282 { 283 int ret = 0; 284 285 down_write(&nvmet_config_sem); 286 if (nvmet_transports[ops->type]) 287 ret = -EINVAL; 288 else 289 nvmet_transports[ops->type] = ops; 290 up_write(&nvmet_config_sem); 291 292 return ret; 293 } 294 EXPORT_SYMBOL_GPL(nvmet_register_transport); 295 296 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 297 { 298 down_write(&nvmet_config_sem); 299 nvmet_transports[ops->type] = NULL; 300 up_write(&nvmet_config_sem); 301 } 302 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 303 304 void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys) 305 { 306 struct nvmet_ctrl *ctrl; 307 308 mutex_lock(&subsys->lock); 309 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 310 if (ctrl->port == port) 311 ctrl->ops->delete_ctrl(ctrl); 312 } 313 mutex_unlock(&subsys->lock); 314 } 315 316 int nvmet_enable_port(struct nvmet_port *port) 317 { 318 const struct nvmet_fabrics_ops *ops; 319 int ret; 320 321 lockdep_assert_held(&nvmet_config_sem); 322 323 if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) 324 return -EINVAL; 325 326 ops = nvmet_transports[port->disc_addr.trtype]; 327 if (!ops) { 328 up_write(&nvmet_config_sem); 329 request_module("nvmet-transport-%d", port->disc_addr.trtype); 330 down_write(&nvmet_config_sem); 331 ops = nvmet_transports[port->disc_addr.trtype]; 332 if (!ops) { 333 pr_err("transport type %d not supported\n", 334 port->disc_addr.trtype); 335 return -EINVAL; 336 } 337 } 338 339 if (!try_module_get(ops->owner)) 340 return -EINVAL; 341 342 /* 343 * If the user requested PI support and the transport isn't pi capable, 344 * don't enable the port. 345 */ 346 if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) { 347 pr_err("T10-PI is not supported by transport type %d\n", 348 port->disc_addr.trtype); 349 ret = -EINVAL; 350 goto out_put; 351 } 352 353 ret = ops->add_port(port); 354 if (ret) 355 goto out_put; 356 357 /* If the transport didn't set inline_data_size, then disable it. */ 358 if (port->inline_data_size < 0) 359 port->inline_data_size = 0; 360 361 /* 362 * If the transport didn't set the max_queue_size properly, then clamp 363 * it to the target limits. Also set default values in case the 364 * transport didn't set it at all. 365 */ 366 if (port->max_queue_size < 0) 367 port->max_queue_size = NVMET_MAX_QUEUE_SIZE; 368 else 369 port->max_queue_size = clamp_t(int, port->max_queue_size, 370 NVMET_MIN_QUEUE_SIZE, 371 NVMET_MAX_QUEUE_SIZE); 372 373 /* 374 * If the transport didn't set the mdts properly, then clamp it to the 375 * target limits. Also set default values in case the transport didn't 376 * set it at all. 377 */ 378 if (port->mdts < 0 || port->mdts > NVMET_MAX_MDTS) 379 port->mdts = 0; 380 381 port->enabled = true; 382 port->tr_ops = ops; 383 return 0; 384 385 out_put: 386 module_put(ops->owner); 387 return ret; 388 } 389 390 void nvmet_disable_port(struct nvmet_port *port) 391 { 392 const struct nvmet_fabrics_ops *ops; 393 394 lockdep_assert_held(&nvmet_config_sem); 395 396 port->enabled = false; 397 port->tr_ops = NULL; 398 399 ops = nvmet_transports[port->disc_addr.trtype]; 400 ops->remove_port(port); 401 module_put(ops->owner); 402 } 403 404 static void nvmet_keep_alive_timer(struct work_struct *work) 405 { 406 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 407 struct nvmet_ctrl, ka_work); 408 bool reset_tbkas = ctrl->reset_tbkas; 409 410 ctrl->reset_tbkas = false; 411 if (reset_tbkas) { 412 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 413 ctrl->cntlid); 414 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 415 return; 416 } 417 418 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 419 ctrl->cntlid, ctrl->kato); 420 421 nvmet_ctrl_fatal_error(ctrl); 422 } 423 424 void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 425 { 426 if (unlikely(ctrl->kato == 0)) 427 return; 428 429 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 430 ctrl->cntlid, ctrl->kato); 431 432 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 433 } 434 435 void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 436 { 437 if (unlikely(ctrl->kato == 0)) 438 return; 439 440 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 441 442 cancel_delayed_work_sync(&ctrl->ka_work); 443 } 444 445 u16 nvmet_req_find_ns(struct nvmet_req *req) 446 { 447 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 448 struct nvmet_subsys *subsys = nvmet_req_subsys(req); 449 450 req->ns = xa_load(&subsys->namespaces, nsid); 451 if (unlikely(!req->ns || !req->ns->enabled)) { 452 req->error_loc = offsetof(struct nvme_common_command, nsid); 453 if (!req->ns) /* ns doesn't exist! */ 454 return NVME_SC_INVALID_NS | NVME_STATUS_DNR; 455 456 /* ns exists but it's disabled */ 457 req->ns = NULL; 458 return NVME_SC_INTERNAL_PATH_ERROR; 459 } 460 461 percpu_ref_get(&req->ns->ref); 462 return NVME_SC_SUCCESS; 463 } 464 465 static void nvmet_destroy_namespace(struct percpu_ref *ref) 466 { 467 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 468 469 complete(&ns->disable_done); 470 } 471 472 void nvmet_put_namespace(struct nvmet_ns *ns) 473 { 474 percpu_ref_put(&ns->ref); 475 } 476 477 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 478 { 479 nvmet_bdev_ns_disable(ns); 480 nvmet_file_ns_disable(ns); 481 } 482 483 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 484 { 485 int ret; 486 struct pci_dev *p2p_dev; 487 488 if (!ns->use_p2pmem) 489 return 0; 490 491 if (!ns->bdev) { 492 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 493 return -EINVAL; 494 } 495 496 if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) { 497 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 498 ns->device_path); 499 return -EINVAL; 500 } 501 502 if (ns->p2p_dev) { 503 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 504 if (ret < 0) 505 return -EINVAL; 506 } else { 507 /* 508 * Right now we just check that there is p2pmem available so 509 * we can report an error to the user right away if there 510 * is not. We'll find the actual device to use once we 511 * setup the controller when the port's device is available. 512 */ 513 514 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 515 if (!p2p_dev) { 516 pr_err("no peer-to-peer memory is available for %s\n", 517 ns->device_path); 518 return -EINVAL; 519 } 520 521 pci_dev_put(p2p_dev); 522 } 523 524 return 0; 525 } 526 527 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 528 struct nvmet_ns *ns) 529 { 530 struct device *clients[2]; 531 struct pci_dev *p2p_dev; 532 int ret; 533 534 lockdep_assert_held(&ctrl->subsys->lock); 535 536 if (!ctrl->p2p_client || !ns->use_p2pmem) 537 return; 538 539 if (ns->p2p_dev) { 540 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 541 if (ret < 0) 542 return; 543 544 p2p_dev = pci_dev_get(ns->p2p_dev); 545 } else { 546 clients[0] = ctrl->p2p_client; 547 clients[1] = nvmet_ns_dev(ns); 548 549 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 550 if (!p2p_dev) { 551 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 552 dev_name(ctrl->p2p_client), ns->device_path); 553 return; 554 } 555 } 556 557 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 558 if (ret < 0) 559 pci_dev_put(p2p_dev); 560 561 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 562 ns->nsid); 563 } 564 565 bool nvmet_ns_revalidate(struct nvmet_ns *ns) 566 { 567 loff_t oldsize = ns->size; 568 569 if (ns->bdev) 570 nvmet_bdev_ns_revalidate(ns); 571 else 572 nvmet_file_ns_revalidate(ns); 573 574 return oldsize != ns->size; 575 } 576 577 int nvmet_ns_enable(struct nvmet_ns *ns) 578 { 579 struct nvmet_subsys *subsys = ns->subsys; 580 struct nvmet_ctrl *ctrl; 581 int ret; 582 583 mutex_lock(&subsys->lock); 584 ret = 0; 585 586 if (nvmet_is_passthru_subsys(subsys)) { 587 pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); 588 goto out_unlock; 589 } 590 591 if (ns->enabled) 592 goto out_unlock; 593 594 ret = nvmet_bdev_ns_enable(ns); 595 if (ret == -ENOTBLK) 596 ret = nvmet_file_ns_enable(ns); 597 if (ret) 598 goto out_unlock; 599 600 ret = nvmet_p2pmem_ns_enable(ns); 601 if (ret) 602 goto out_dev_disable; 603 604 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 605 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 606 607 if (ns->pr.enable) { 608 ret = nvmet_pr_init_ns(ns); 609 if (ret) 610 goto out_dev_put; 611 } 612 613 if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) 614 goto out_pr_exit; 615 616 nvmet_ns_changed(subsys, ns->nsid); 617 ns->enabled = true; 618 xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 619 ret = 0; 620 out_unlock: 621 mutex_unlock(&subsys->lock); 622 return ret; 623 out_pr_exit: 624 if (ns->pr.enable) 625 nvmet_pr_exit_ns(ns); 626 out_dev_put: 627 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 628 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 629 out_dev_disable: 630 nvmet_ns_dev_disable(ns); 631 goto out_unlock; 632 } 633 634 void nvmet_ns_disable(struct nvmet_ns *ns) 635 { 636 struct nvmet_subsys *subsys = ns->subsys; 637 struct nvmet_ctrl *ctrl; 638 639 mutex_lock(&subsys->lock); 640 if (!ns->enabled) 641 goto out_unlock; 642 643 ns->enabled = false; 644 xa_clear_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 645 646 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 647 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 648 649 mutex_unlock(&subsys->lock); 650 651 /* 652 * Now that we removed the namespaces from the lookup list, we 653 * can kill the per_cpu ref and wait for any remaining references 654 * to be dropped, as well as a RCU grace period for anyone only 655 * using the namespace under rcu_read_lock(). Note that we can't 656 * use call_rcu here as we need to ensure the namespaces have 657 * been fully destroyed before unloading the module. 658 */ 659 percpu_ref_kill(&ns->ref); 660 synchronize_rcu(); 661 wait_for_completion(&ns->disable_done); 662 percpu_ref_exit(&ns->ref); 663 664 if (ns->pr.enable) 665 nvmet_pr_exit_ns(ns); 666 667 mutex_lock(&subsys->lock); 668 nvmet_ns_changed(subsys, ns->nsid); 669 nvmet_ns_dev_disable(ns); 670 out_unlock: 671 mutex_unlock(&subsys->lock); 672 } 673 674 void nvmet_ns_free(struct nvmet_ns *ns) 675 { 676 struct nvmet_subsys *subsys = ns->subsys; 677 678 nvmet_ns_disable(ns); 679 680 mutex_lock(&subsys->lock); 681 682 xa_erase(&subsys->namespaces, ns->nsid); 683 if (ns->nsid == subsys->max_nsid) 684 subsys->max_nsid = nvmet_max_nsid(subsys); 685 686 subsys->nr_namespaces--; 687 mutex_unlock(&subsys->lock); 688 689 down_write(&nvmet_ana_sem); 690 nvmet_ana_group_enabled[ns->anagrpid]--; 691 up_write(&nvmet_ana_sem); 692 693 kfree(ns->device_path); 694 kfree(ns); 695 } 696 697 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 698 { 699 struct nvmet_ns *ns; 700 701 mutex_lock(&subsys->lock); 702 703 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 704 goto out_unlock; 705 706 ns = kzalloc_obj(*ns); 707 if (!ns) 708 goto out_unlock; 709 710 init_completion(&ns->disable_done); 711 712 ns->nsid = nsid; 713 ns->subsys = subsys; 714 715 if (ns->nsid > subsys->max_nsid) 716 subsys->max_nsid = nsid; 717 718 if (xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL)) 719 goto out_exit; 720 721 subsys->nr_namespaces++; 722 723 mutex_unlock(&subsys->lock); 724 725 down_write(&nvmet_ana_sem); 726 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 727 nvmet_ana_group_enabled[ns->anagrpid]++; 728 up_write(&nvmet_ana_sem); 729 730 uuid_gen(&ns->uuid); 731 ns->buffered_io = false; 732 ns->csi = NVME_CSI_NVM; 733 734 return ns; 735 out_exit: 736 subsys->max_nsid = nvmet_max_nsid(subsys); 737 kfree(ns); 738 out_unlock: 739 mutex_unlock(&subsys->lock); 740 return NULL; 741 } 742 743 static void nvmet_update_sq_head(struct nvmet_req *req) 744 { 745 if (req->sq->size) { 746 u32 old_sqhd, new_sqhd; 747 748 old_sqhd = READ_ONCE(req->sq->sqhd); 749 do { 750 new_sqhd = (old_sqhd + 1) % req->sq->size; 751 } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd)); 752 } 753 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 754 } 755 756 static void nvmet_set_error(struct nvmet_req *req, u16 status) 757 { 758 struct nvmet_ctrl *ctrl = req->sq->ctrl; 759 struct nvme_error_slot *new_error_slot; 760 unsigned long flags; 761 762 req->cqe->status = cpu_to_le16(status << 1); 763 764 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 765 return; 766 767 spin_lock_irqsave(&ctrl->error_lock, flags); 768 ctrl->err_counter++; 769 new_error_slot = 770 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 771 772 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 773 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 774 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 775 new_error_slot->status_field = cpu_to_le16(status << 1); 776 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 777 new_error_slot->lba = cpu_to_le64(req->error_slba); 778 new_error_slot->nsid = req->cmd->common.nsid; 779 spin_unlock_irqrestore(&ctrl->error_lock, flags); 780 781 /* set the more bit for this request */ 782 req->cqe->status |= cpu_to_le16(1 << 14); 783 } 784 785 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 786 { 787 struct nvmet_ns *ns = req->ns; 788 struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref; 789 790 if (!req->sq->sqhd_disabled) 791 nvmet_update_sq_head(req); 792 req->cqe->sq_id = cpu_to_le16(req->sq->qid); 793 req->cqe->command_id = req->cmd->common.command_id; 794 795 if (unlikely(status)) 796 nvmet_set_error(req, status); 797 798 trace_nvmet_req_complete(req); 799 800 req->ops->queue_response(req); 801 802 if (pc_ref) 803 nvmet_pr_put_ns_pc_ref(pc_ref); 804 if (ns) 805 nvmet_put_namespace(ns); 806 } 807 808 void nvmet_req_complete(struct nvmet_req *req, u16 status) 809 { 810 struct nvmet_sq *sq = req->sq; 811 812 __nvmet_req_complete(req, status); 813 percpu_ref_put(&sq->ref); 814 } 815 EXPORT_SYMBOL_GPL(nvmet_req_complete); 816 817 void nvmet_cq_init(struct nvmet_cq *cq) 818 { 819 refcount_set(&cq->ref, 1); 820 } 821 EXPORT_SYMBOL_GPL(nvmet_cq_init); 822 823 bool nvmet_cq_get(struct nvmet_cq *cq) 824 { 825 return refcount_inc_not_zero(&cq->ref); 826 } 827 EXPORT_SYMBOL_GPL(nvmet_cq_get); 828 829 void nvmet_cq_put(struct nvmet_cq *cq) 830 { 831 if (refcount_dec_and_test(&cq->ref)) 832 nvmet_cq_destroy(cq); 833 } 834 EXPORT_SYMBOL_GPL(nvmet_cq_put); 835 836 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 837 u16 qid, u16 size) 838 { 839 cq->qid = qid; 840 cq->size = size; 841 842 ctrl->cqs[qid] = cq; 843 } 844 845 void nvmet_cq_destroy(struct nvmet_cq *cq) 846 { 847 struct nvmet_ctrl *ctrl = cq->ctrl; 848 849 if (ctrl) { 850 ctrl->cqs[cq->qid] = NULL; 851 nvmet_ctrl_put(cq->ctrl); 852 cq->ctrl = NULL; 853 } 854 } 855 856 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 857 u16 qid, u16 size) 858 { 859 sq->sqhd = 0; 860 sq->qid = qid; 861 sq->size = size; 862 863 ctrl->sqs[qid] = sq; 864 } 865 866 static void nvmet_confirm_sq(struct percpu_ref *ref) 867 { 868 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 869 870 complete(&sq->confirm_done); 871 } 872 873 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 874 { 875 if (!ctrl->cqs) 876 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 877 878 if (cqid > ctrl->subsys->max_qid) 879 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 880 881 if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid])) 882 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 883 884 return NVME_SC_SUCCESS; 885 } 886 887 u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 888 { 889 if (!cqid) 890 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 891 return nvmet_check_cqid(ctrl, cqid, create); 892 } 893 894 bool nvmet_cq_in_use(struct nvmet_cq *cq) 895 { 896 return refcount_read(&cq->ref) > 1; 897 } 898 EXPORT_SYMBOL_GPL(nvmet_cq_in_use); 899 900 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 901 u16 qid, u16 size) 902 { 903 u16 status; 904 905 status = nvmet_check_cqid(ctrl, qid, true); 906 if (status != NVME_SC_SUCCESS) 907 return status; 908 909 if (!kref_get_unless_zero(&ctrl->ref)) 910 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 911 cq->ctrl = ctrl; 912 913 nvmet_cq_init(cq); 914 nvmet_cq_setup(ctrl, cq, qid, size); 915 916 return NVME_SC_SUCCESS; 917 } 918 EXPORT_SYMBOL_GPL(nvmet_cq_create); 919 920 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, 921 bool create) 922 { 923 if (!ctrl->sqs) 924 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 925 926 if (sqid > ctrl->subsys->max_qid) 927 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 928 929 if ((create && ctrl->sqs[sqid]) || 930 (!create && !ctrl->sqs[sqid])) 931 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 932 933 return NVME_SC_SUCCESS; 934 } 935 936 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 937 struct nvmet_cq *cq, u16 sqid, u16 size) 938 { 939 u16 status; 940 int ret; 941 942 if (!kref_get_unless_zero(&ctrl->ref)) 943 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 944 945 status = nvmet_check_sqid(ctrl, sqid, true); 946 if (status != NVME_SC_SUCCESS) 947 return status; 948 949 ret = nvmet_sq_init(sq, cq); 950 if (ret) { 951 status = NVME_SC_INTERNAL | NVME_STATUS_DNR; 952 goto ctrl_put; 953 } 954 955 nvmet_sq_setup(ctrl, sq, sqid, size); 956 sq->ctrl = ctrl; 957 958 return NVME_SC_SUCCESS; 959 960 ctrl_put: 961 nvmet_ctrl_put(ctrl); 962 return status; 963 } 964 EXPORT_SYMBOL_GPL(nvmet_sq_create); 965 966 void nvmet_sq_destroy(struct nvmet_sq *sq) 967 { 968 struct nvmet_ctrl *ctrl = sq->ctrl; 969 970 /* 971 * If this is the admin queue, complete all AERs so that our 972 * queue doesn't have outstanding requests on it. 973 */ 974 if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) 975 nvmet_async_events_failall(ctrl); 976 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 977 wait_for_completion(&sq->confirm_done); 978 wait_for_completion(&sq->free_done); 979 percpu_ref_exit(&sq->ref); 980 nvmet_auth_sq_free(sq); 981 nvmet_cq_put(sq->cq); 982 983 /* 984 * we must reference the ctrl again after waiting for inflight IO 985 * to complete. Because admin connect may have sneaked in after we 986 * store sq->ctrl locally, but before we killed the percpu_ref. the 987 * admin connect allocates and assigns sq->ctrl, which now needs a 988 * final ref put, as this ctrl is going away. 989 */ 990 ctrl = sq->ctrl; 991 992 if (ctrl) { 993 /* 994 * The teardown flow may take some time, and the host may not 995 * send us keep-alive during this period, hence reset the 996 * traffic based keep-alive timer so we don't trigger a 997 * controller teardown as a result of a keep-alive expiration. 998 */ 999 ctrl->reset_tbkas = true; 1000 sq->ctrl->sqs[sq->qid] = NULL; 1001 nvmet_ctrl_put(ctrl); 1002 sq->ctrl = NULL; /* allows reusing the queue later */ 1003 } 1004 } 1005 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 1006 1007 static void nvmet_sq_free(struct percpu_ref *ref) 1008 { 1009 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 1010 1011 complete(&sq->free_done); 1012 } 1013 1014 int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq) 1015 { 1016 int ret; 1017 1018 if (!nvmet_cq_get(cq)) 1019 return -EINVAL; 1020 1021 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 1022 if (ret) { 1023 pr_err("percpu_ref init failed!\n"); 1024 nvmet_cq_put(cq); 1025 return ret; 1026 } 1027 init_completion(&sq->free_done); 1028 init_completion(&sq->confirm_done); 1029 nvmet_auth_sq_init(sq); 1030 sq->cq = cq; 1031 1032 return 0; 1033 } 1034 EXPORT_SYMBOL_GPL(nvmet_sq_init); 1035 1036 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 1037 struct nvmet_ns *ns) 1038 { 1039 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 1040 1041 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 1042 return NVME_SC_ANA_INACCESSIBLE; 1043 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 1044 return NVME_SC_ANA_PERSISTENT_LOSS; 1045 if (unlikely(state == NVME_ANA_CHANGE)) 1046 return NVME_SC_ANA_TRANSITION; 1047 return 0; 1048 } 1049 1050 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 1051 { 1052 if (unlikely(req->ns->readonly)) { 1053 switch (req->cmd->common.opcode) { 1054 case nvme_cmd_read: 1055 case nvme_cmd_flush: 1056 break; 1057 default: 1058 return NVME_SC_NS_WRITE_PROTECTED; 1059 } 1060 } 1061 1062 return 0; 1063 } 1064 1065 static u32 nvmet_io_cmd_transfer_len(struct nvmet_req *req) 1066 { 1067 struct nvme_command *cmd = req->cmd; 1068 u32 metadata_len = 0; 1069 1070 if (nvme_is_fabrics(cmd)) 1071 return nvmet_fabrics_io_cmd_data_len(req); 1072 1073 if (!req->ns) 1074 return 0; 1075 1076 switch (req->cmd->common.opcode) { 1077 case nvme_cmd_read: 1078 case nvme_cmd_write: 1079 case nvme_cmd_zone_append: 1080 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 1081 metadata_len = nvmet_rw_metadata_len(req); 1082 return nvmet_rw_data_len(req) + metadata_len; 1083 case nvme_cmd_dsm: 1084 return nvmet_dsm_len(req); 1085 case nvme_cmd_zone_mgmt_recv: 1086 return (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; 1087 default: 1088 return 0; 1089 } 1090 } 1091 1092 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 1093 { 1094 struct nvme_command *cmd = req->cmd; 1095 u16 ret; 1096 1097 if (nvme_is_fabrics(cmd)) 1098 return nvmet_parse_fabrics_io_cmd(req); 1099 1100 if (unlikely(!nvmet_check_auth_status(req))) 1101 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1102 1103 ret = nvmet_check_ctrl_status(req); 1104 if (unlikely(ret)) 1105 return ret; 1106 1107 if (nvmet_is_passthru_req(req)) 1108 return nvmet_parse_passthru_io_cmd(req); 1109 1110 ret = nvmet_req_find_ns(req); 1111 if (unlikely(ret)) 1112 return ret; 1113 1114 ret = nvmet_check_ana_state(req->port, req->ns); 1115 if (unlikely(ret)) { 1116 req->error_loc = offsetof(struct nvme_common_command, nsid); 1117 return ret; 1118 } 1119 ret = nvmet_io_cmd_check_access(req); 1120 if (unlikely(ret)) { 1121 req->error_loc = offsetof(struct nvme_common_command, nsid); 1122 return ret; 1123 } 1124 1125 if (req->ns->pr.enable) { 1126 ret = nvmet_parse_pr_cmd(req); 1127 if (!ret) 1128 return ret; 1129 } 1130 1131 switch (req->ns->csi) { 1132 case NVME_CSI_NVM: 1133 if (req->ns->file) 1134 ret = nvmet_file_parse_io_cmd(req); 1135 else 1136 ret = nvmet_bdev_parse_io_cmd(req); 1137 break; 1138 case NVME_CSI_ZNS: 1139 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) 1140 ret = nvmet_bdev_zns_parse_io_cmd(req); 1141 else 1142 ret = NVME_SC_INVALID_IO_CMD_SET; 1143 break; 1144 default: 1145 ret = NVME_SC_INVALID_IO_CMD_SET; 1146 } 1147 if (ret) 1148 return ret; 1149 1150 if (req->ns->pr.enable) { 1151 ret = nvmet_pr_check_cmd_access(req); 1152 if (ret) 1153 return ret; 1154 1155 ret = nvmet_pr_get_ns_pc_ref(req); 1156 } 1157 return ret; 1158 } 1159 1160 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, 1161 const struct nvmet_fabrics_ops *ops) 1162 { 1163 u8 flags = req->cmd->common.flags; 1164 u16 status; 1165 1166 req->cq = sq->cq; 1167 req->sq = sq; 1168 req->ops = ops; 1169 req->sg = NULL; 1170 req->metadata_sg = NULL; 1171 req->sg_cnt = 0; 1172 req->metadata_sg_cnt = 0; 1173 req->transfer_len = 0; 1174 req->metadata_len = 0; 1175 req->cqe->result.u64 = 0; 1176 req->cqe->status = 0; 1177 req->cqe->sq_head = 0; 1178 req->ns = NULL; 1179 req->error_loc = NVMET_NO_ERROR_LOC; 1180 req->error_slba = 0; 1181 req->pc_ref = NULL; 1182 1183 /* no support for fused commands yet */ 1184 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 1185 req->error_loc = offsetof(struct nvme_common_command, flags); 1186 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1187 goto fail; 1188 } 1189 1190 /* 1191 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 1192 * contains an address of a single contiguous physical buffer that is 1193 * byte aligned. For PCI controllers, this is optional so not enforced. 1194 */ 1195 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 1196 if (!req->sq->ctrl || !nvmet_is_pci_ctrl(req->sq->ctrl)) { 1197 req->error_loc = 1198 offsetof(struct nvme_common_command, flags); 1199 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1200 goto fail; 1201 } 1202 } 1203 1204 if (unlikely(!req->sq->ctrl)) 1205 /* will return an error for any non-connect command: */ 1206 status = nvmet_parse_connect_cmd(req); 1207 else if (likely(req->sq->qid != 0)) 1208 status = nvmet_parse_io_cmd(req); 1209 else 1210 status = nvmet_parse_admin_cmd(req); 1211 1212 if (status) 1213 goto fail; 1214 1215 trace_nvmet_req_init(req, req->cmd); 1216 1217 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 1218 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1219 goto fail; 1220 } 1221 1222 if (sq->ctrl) 1223 sq->ctrl->reset_tbkas = true; 1224 1225 return true; 1226 1227 fail: 1228 __nvmet_req_complete(req, status); 1229 return false; 1230 } 1231 EXPORT_SYMBOL_GPL(nvmet_req_init); 1232 1233 void nvmet_req_uninit(struct nvmet_req *req) 1234 { 1235 percpu_ref_put(&req->sq->ref); 1236 if (req->pc_ref) 1237 nvmet_pr_put_ns_pc_ref(req->pc_ref); 1238 if (req->ns) 1239 nvmet_put_namespace(req->ns); 1240 } 1241 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 1242 1243 size_t nvmet_req_transfer_len(struct nvmet_req *req) 1244 { 1245 if (likely(req->sq->qid != 0)) 1246 return nvmet_io_cmd_transfer_len(req); 1247 if (unlikely(!req->sq->ctrl)) 1248 return nvmet_connect_cmd_data_len(req); 1249 return nvmet_admin_cmd_data_len(req); 1250 } 1251 EXPORT_SYMBOL_GPL(nvmet_req_transfer_len); 1252 1253 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) 1254 { 1255 if (unlikely(len != req->transfer_len)) { 1256 u16 status; 1257 1258 req->error_loc = offsetof(struct nvme_common_command, dptr); 1259 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1260 status = NVME_SC_SGL_INVALID_DATA; 1261 else 1262 status = NVME_SC_INVALID_FIELD; 1263 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1264 return false; 1265 } 1266 1267 return true; 1268 } 1269 EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); 1270 1271 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) 1272 { 1273 if (unlikely(data_len > req->transfer_len)) { 1274 u16 status; 1275 1276 req->error_loc = offsetof(struct nvme_common_command, dptr); 1277 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1278 status = NVME_SC_SGL_INVALID_DATA; 1279 else 1280 status = NVME_SC_INVALID_FIELD; 1281 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1282 return false; 1283 } 1284 1285 return true; 1286 } 1287 1288 static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) 1289 { 1290 return req->transfer_len - req->metadata_len; 1291 } 1292 1293 static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev, 1294 struct nvmet_req *req) 1295 { 1296 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 1297 nvmet_data_transfer_len(req)); 1298 if (!req->sg) 1299 goto out_err; 1300 1301 if (req->metadata_len) { 1302 req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev, 1303 &req->metadata_sg_cnt, req->metadata_len); 1304 if (!req->metadata_sg) 1305 goto out_free_sg; 1306 } 1307 1308 req->p2p_dev = p2p_dev; 1309 1310 return 0; 1311 out_free_sg: 1312 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1313 out_err: 1314 return -ENOMEM; 1315 } 1316 1317 static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req) 1318 { 1319 if (!IS_ENABLED(CONFIG_PCI_P2PDMA) || 1320 !req->sq->ctrl || !req->sq->qid || !req->ns) 1321 return NULL; 1322 return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid); 1323 } 1324 1325 int nvmet_req_alloc_sgls(struct nvmet_req *req) 1326 { 1327 struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req); 1328 1329 if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req)) 1330 return 0; 1331 1332 req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, 1333 &req->sg_cnt); 1334 if (unlikely(!req->sg)) 1335 goto out; 1336 1337 if (req->metadata_len) { 1338 req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, 1339 &req->metadata_sg_cnt); 1340 if (unlikely(!req->metadata_sg)) 1341 goto out_free; 1342 } 1343 1344 return 0; 1345 out_free: 1346 sgl_free(req->sg); 1347 out: 1348 return -ENOMEM; 1349 } 1350 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); 1351 1352 void nvmet_req_free_sgls(struct nvmet_req *req) 1353 { 1354 if (req->p2p_dev) { 1355 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1356 if (req->metadata_sg) 1357 pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); 1358 req->p2p_dev = NULL; 1359 } else { 1360 sgl_free(req->sg); 1361 if (req->metadata_sg) 1362 sgl_free(req->metadata_sg); 1363 } 1364 1365 req->sg = NULL; 1366 req->metadata_sg = NULL; 1367 req->sg_cnt = 0; 1368 req->metadata_sg_cnt = 0; 1369 } 1370 EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); 1371 1372 static inline bool nvmet_css_supported(u8 cc_css) 1373 { 1374 switch (cc_css << NVME_CC_CSS_SHIFT) { 1375 case NVME_CC_CSS_NVM: 1376 case NVME_CC_CSS_CSI: 1377 return true; 1378 default: 1379 return false; 1380 } 1381 } 1382 1383 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 1384 { 1385 lockdep_assert_held(&ctrl->lock); 1386 1387 /* 1388 * Only I/O controllers should verify iosqes,iocqes. 1389 * Strictly speaking, the spec says a discovery controller 1390 * should verify iosqes,iocqes are zeroed, however that 1391 * would break backwards compatibility, so don't enforce it. 1392 */ 1393 if (!nvmet_is_disc_subsys(ctrl->subsys) && 1394 (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1395 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { 1396 ctrl->csts = NVME_CSTS_CFS; 1397 return; 1398 } 1399 1400 if (nvmet_cc_mps(ctrl->cc) != 0 || 1401 nvmet_cc_ams(ctrl->cc) != 0 || 1402 !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) { 1403 ctrl->csts = NVME_CSTS_CFS; 1404 return; 1405 } 1406 1407 ctrl->csts = NVME_CSTS_RDY; 1408 1409 /* 1410 * Controllers that are not yet enabled should not really enforce the 1411 * keep alive timeout, but we still want to track a timeout and cleanup 1412 * in case a host died before it enabled the controller. Hence, simply 1413 * reset the keep alive timer when the controller is enabled. 1414 */ 1415 if (ctrl->kato) 1416 mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 1417 } 1418 1419 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1420 { 1421 lockdep_assert_held(&ctrl->lock); 1422 1423 /* XXX: tear down queues? */ 1424 ctrl->csts &= ~NVME_CSTS_RDY; 1425 ctrl->cc = 0; 1426 } 1427 1428 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1429 { 1430 u32 old; 1431 1432 mutex_lock(&ctrl->lock); 1433 old = ctrl->cc; 1434 ctrl->cc = new; 1435 1436 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1437 nvmet_start_ctrl(ctrl); 1438 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1439 nvmet_clear_ctrl(ctrl); 1440 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1441 nvmet_clear_ctrl(ctrl); 1442 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1443 } 1444 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1445 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1446 mutex_unlock(&ctrl->lock); 1447 } 1448 EXPORT_SYMBOL_GPL(nvmet_update_cc); 1449 1450 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1451 { 1452 /* command sets supported: NVMe command set: */ 1453 ctrl->cap = (1ULL << 37); 1454 /* Controller supports one or more I/O Command Sets */ 1455 ctrl->cap |= (1ULL << 43); 1456 /* CC.EN timeout in 500msec units: */ 1457 ctrl->cap |= (15ULL << 24); 1458 /* maximum queue entries supported: */ 1459 if (ctrl->ops->get_max_queue_size) 1460 ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl), 1461 ctrl->port->max_queue_size) - 1; 1462 else 1463 ctrl->cap |= ctrl->port->max_queue_size - 1; 1464 1465 if (nvmet_is_passthru_subsys(ctrl->subsys)) 1466 nvmet_passthrough_override_cap(ctrl); 1467 } 1468 1469 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, 1470 const char *hostnqn, u16 cntlid, 1471 struct nvmet_req *req) 1472 { 1473 struct nvmet_ctrl *ctrl = NULL; 1474 struct nvmet_subsys *subsys; 1475 1476 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1477 if (!subsys) { 1478 pr_warn("connect request for invalid subsystem %s!\n", 1479 subsysnqn); 1480 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1481 goto out; 1482 } 1483 1484 mutex_lock(&subsys->lock); 1485 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1486 if (ctrl->cntlid == cntlid) { 1487 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1488 pr_warn("hostnqn mismatch.\n"); 1489 continue; 1490 } 1491 if (!kref_get_unless_zero(&ctrl->ref)) 1492 continue; 1493 1494 /* ctrl found */ 1495 goto found; 1496 } 1497 } 1498 1499 ctrl = NULL; /* ctrl not found */ 1500 pr_warn("could not find controller %d for subsys %s / host %s\n", 1501 cntlid, subsysnqn, hostnqn); 1502 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1503 1504 found: 1505 mutex_unlock(&subsys->lock); 1506 nvmet_subsys_put(subsys); 1507 out: 1508 return ctrl; 1509 } 1510 1511 u16 nvmet_check_ctrl_status(struct nvmet_req *req) 1512 { 1513 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1514 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1515 req->cmd->common.opcode, req->sq->qid); 1516 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1517 } 1518 1519 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1520 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1521 req->cmd->common.opcode, req->sq->qid); 1522 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1523 } 1524 1525 if (unlikely(!nvmet_check_auth_status(req))) { 1526 pr_warn("qid %d not authenticated\n", req->sq->qid); 1527 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1528 } 1529 return 0; 1530 } 1531 1532 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1533 { 1534 struct nvmet_host_link *p; 1535 1536 lockdep_assert_held(&nvmet_config_sem); 1537 1538 if (subsys->allow_any_host) 1539 return true; 1540 1541 if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */ 1542 return true; 1543 1544 list_for_each_entry(p, &subsys->hosts, entry) { 1545 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1546 return true; 1547 } 1548 1549 return false; 1550 } 1551 1552 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1553 struct device *p2p_client) 1554 { 1555 struct nvmet_ns *ns; 1556 unsigned long idx; 1557 1558 lockdep_assert_held(&ctrl->subsys->lock); 1559 1560 if (!p2p_client) 1561 return; 1562 1563 ctrl->p2p_client = get_device(p2p_client); 1564 1565 nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) 1566 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1567 } 1568 1569 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1570 { 1571 struct radix_tree_iter iter; 1572 void __rcu **slot; 1573 1574 lockdep_assert_held(&ctrl->subsys->lock); 1575 1576 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1577 pci_dev_put(radix_tree_deref_slot(slot)); 1578 1579 put_device(ctrl->p2p_client); 1580 } 1581 1582 static void nvmet_fatal_error_handler(struct work_struct *work) 1583 { 1584 struct nvmet_ctrl *ctrl = 1585 container_of(work, struct nvmet_ctrl, fatal_err_work); 1586 1587 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1588 ctrl->ops->delete_ctrl(ctrl); 1589 } 1590 1591 struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) 1592 { 1593 struct nvmet_subsys *subsys; 1594 struct nvmet_ctrl *ctrl; 1595 u32 kato = args->kato; 1596 u8 dhchap_status; 1597 int ret; 1598 1599 args->status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR; 1600 subsys = nvmet_find_get_subsys(args->port, args->subsysnqn); 1601 if (!subsys) { 1602 pr_warn("connect request for invalid subsystem %s!\n", 1603 args->subsysnqn); 1604 args->result = IPO_IATTR_CONNECT_DATA(subsysnqn); 1605 args->error_loc = offsetof(struct nvme_common_command, dptr); 1606 return NULL; 1607 } 1608 1609 down_read(&nvmet_config_sem); 1610 if (!nvmet_host_allowed(subsys, args->hostnqn)) { 1611 pr_info("connect by host %s for subsystem %s not allowed\n", 1612 args->hostnqn, args->subsysnqn); 1613 args->result = IPO_IATTR_CONNECT_DATA(hostnqn); 1614 up_read(&nvmet_config_sem); 1615 args->status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1616 args->error_loc = offsetof(struct nvme_common_command, dptr); 1617 goto out_put_subsystem; 1618 } 1619 up_read(&nvmet_config_sem); 1620 1621 args->status = NVME_SC_INTERNAL; 1622 ctrl = kzalloc_obj(*ctrl); 1623 if (!ctrl) 1624 goto out_put_subsystem; 1625 mutex_init(&ctrl->lock); 1626 1627 ctrl->port = args->port; 1628 ctrl->ops = args->ops; 1629 1630 #ifdef CONFIG_NVME_TARGET_PASSTHRU 1631 /* By default, set loop targets to clear IDS by default */ 1632 if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP) 1633 subsys->clear_ids = 1; 1634 #endif 1635 1636 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1637 INIT_LIST_HEAD(&ctrl->async_events); 1638 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1639 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1640 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 1641 1642 memcpy(ctrl->hostnqn, args->hostnqn, NVMF_NQN_SIZE); 1643 1644 kref_init(&ctrl->ref); 1645 ctrl->subsys = subsys; 1646 ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; 1647 nvmet_init_cap(ctrl); 1648 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1649 1650 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1651 sizeof(__le32), GFP_KERNEL); 1652 if (!ctrl->changed_ns_list) 1653 goto out_free_ctrl; 1654 1655 ctrl->sqs = kzalloc_objs(struct nvmet_sq *, subsys->max_qid + 1); 1656 if (!ctrl->sqs) 1657 goto out_free_changed_ns_list; 1658 1659 ctrl->cqs = kzalloc_objs(struct nvmet_cq *, subsys->max_qid + 1); 1660 if (!ctrl->cqs) 1661 goto out_free_sqs; 1662 1663 ret = ida_alloc_range(&cntlid_ida, 1664 subsys->cntlid_min, subsys->cntlid_max, 1665 GFP_KERNEL); 1666 if (ret < 0) { 1667 args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 1668 goto out_free_cqs; 1669 } 1670 ctrl->cntlid = ret; 1671 1672 /* 1673 * Discovery controllers may use some arbitrary high value 1674 * in order to cleanup stale discovery sessions 1675 */ 1676 if (nvmet_is_disc_subsys(ctrl->subsys) && !kato) 1677 kato = NVMET_DISC_KATO_MS; 1678 1679 /* keep-alive timeout in seconds */ 1680 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1681 1682 ctrl->err_counter = 0; 1683 spin_lock_init(&ctrl->error_lock); 1684 1685 nvmet_start_keep_alive_timer(ctrl); 1686 1687 mutex_lock(&subsys->lock); 1688 ret = nvmet_ctrl_init_pr(ctrl); 1689 if (ret) 1690 goto init_pr_fail; 1691 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1692 nvmet_setup_p2p_ns_map(ctrl, args->p2p_client); 1693 nvmet_debugfs_ctrl_setup(ctrl); 1694 mutex_unlock(&subsys->lock); 1695 1696 if (args->hostid) 1697 uuid_copy(&ctrl->hostid, args->hostid); 1698 1699 dhchap_status = nvmet_setup_auth(ctrl, args->sq, false); 1700 if (dhchap_status) { 1701 pr_err("Failed to setup authentication, dhchap status %u\n", 1702 dhchap_status); 1703 nvmet_ctrl_put(ctrl); 1704 if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED) 1705 args->status = 1706 NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1707 else 1708 args->status = NVME_SC_INTERNAL; 1709 return NULL; 1710 } 1711 1712 args->status = NVME_SC_SUCCESS; 1713 1714 pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n", 1715 nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", 1716 ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, 1717 ctrl->pi_support ? " T10-PI is enabled" : "", 1718 nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "", 1719 nvmet_queue_tls_keyid(args->sq) ? ", TLS" : ""); 1720 1721 return ctrl; 1722 1723 init_pr_fail: 1724 mutex_unlock(&subsys->lock); 1725 nvmet_stop_keep_alive_timer(ctrl); 1726 ida_free(&cntlid_ida, ctrl->cntlid); 1727 out_free_cqs: 1728 kfree(ctrl->cqs); 1729 out_free_sqs: 1730 kfree(ctrl->sqs); 1731 out_free_changed_ns_list: 1732 kfree(ctrl->changed_ns_list); 1733 out_free_ctrl: 1734 kfree(ctrl); 1735 out_put_subsystem: 1736 nvmet_subsys_put(subsys); 1737 return NULL; 1738 } 1739 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl); 1740 1741 static void nvmet_ctrl_free(struct kref *ref) 1742 { 1743 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1744 struct nvmet_subsys *subsys = ctrl->subsys; 1745 1746 mutex_lock(&subsys->lock); 1747 nvmet_ctrl_destroy_pr(ctrl); 1748 nvmet_release_p2p_ns_map(ctrl); 1749 list_del(&ctrl->subsys_entry); 1750 mutex_unlock(&subsys->lock); 1751 1752 nvmet_stop_keep_alive_timer(ctrl); 1753 1754 cancel_work_sync(&ctrl->async_event_work); 1755 cancel_work_sync(&ctrl->fatal_err_work); 1756 1757 nvmet_destroy_auth(ctrl); 1758 1759 nvmet_debugfs_ctrl_free(ctrl); 1760 1761 ida_free(&cntlid_ida, ctrl->cntlid); 1762 1763 nvmet_async_events_free(ctrl); 1764 kfree(ctrl->sqs); 1765 kfree(ctrl->cqs); 1766 kfree(ctrl->changed_ns_list); 1767 kfree(ctrl); 1768 1769 nvmet_subsys_put(subsys); 1770 } 1771 1772 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1773 { 1774 kref_put(&ctrl->ref, nvmet_ctrl_free); 1775 } 1776 EXPORT_SYMBOL_GPL(nvmet_ctrl_put); 1777 1778 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1779 { 1780 mutex_lock(&ctrl->lock); 1781 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1782 ctrl->csts |= NVME_CSTS_CFS; 1783 queue_work(nvmet_wq, &ctrl->fatal_err_work); 1784 } 1785 mutex_unlock(&ctrl->lock); 1786 } 1787 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1788 1789 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl, 1790 char *traddr, size_t traddr_len) 1791 { 1792 if (!ctrl->ops->host_traddr) 1793 return -EOPNOTSUPP; 1794 return ctrl->ops->host_traddr(ctrl, traddr, traddr_len); 1795 } 1796 1797 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1798 const char *subsysnqn) 1799 { 1800 struct nvmet_subsys_link *p; 1801 1802 if (!port) 1803 return NULL; 1804 1805 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1806 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1807 return NULL; 1808 return nvmet_disc_subsys; 1809 } 1810 1811 down_read(&nvmet_config_sem); 1812 if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn, 1813 NVMF_NQN_SIZE)) { 1814 if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) { 1815 up_read(&nvmet_config_sem); 1816 return nvmet_disc_subsys; 1817 } 1818 } 1819 list_for_each_entry(p, &port->subsystems, entry) { 1820 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1821 NVMF_NQN_SIZE)) { 1822 if (!kref_get_unless_zero(&p->subsys->ref)) 1823 break; 1824 up_read(&nvmet_config_sem); 1825 return p->subsys; 1826 } 1827 } 1828 up_read(&nvmet_config_sem); 1829 return NULL; 1830 } 1831 1832 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1833 enum nvme_subsys_type type) 1834 { 1835 struct nvmet_subsys *subsys; 1836 char serial[NVMET_SN_MAX_SIZE / 2]; 1837 int ret; 1838 1839 subsys = kzalloc_obj(*subsys); 1840 if (!subsys) 1841 return ERR_PTR(-ENOMEM); 1842 1843 subsys->ver = NVMET_DEFAULT_VS; 1844 /* generate a random serial number as our controllers are ephemeral: */ 1845 get_random_bytes(&serial, sizeof(serial)); 1846 bin2hex(subsys->serial, &serial, sizeof(serial)); 1847 1848 subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); 1849 if (!subsys->model_number) { 1850 ret = -ENOMEM; 1851 goto free_subsys; 1852 } 1853 1854 subsys->ieee_oui = 0; 1855 1856 subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL); 1857 if (!subsys->firmware_rev) { 1858 ret = -ENOMEM; 1859 goto free_mn; 1860 } 1861 1862 switch (type) { 1863 case NVME_NQN_NVME: 1864 subsys->max_qid = NVMET_NR_QUEUES; 1865 break; 1866 case NVME_NQN_DISC: 1867 case NVME_NQN_CURR: 1868 subsys->max_qid = 0; 1869 break; 1870 default: 1871 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1872 ret = -EINVAL; 1873 goto free_fr; 1874 } 1875 subsys->type = type; 1876 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1877 GFP_KERNEL); 1878 if (!subsys->subsysnqn) { 1879 ret = -ENOMEM; 1880 goto free_fr; 1881 } 1882 subsys->cntlid_min = NVME_CNTLID_MIN; 1883 subsys->cntlid_max = NVME_CNTLID_MAX; 1884 kref_init(&subsys->ref); 1885 1886 mutex_init(&subsys->lock); 1887 xa_init(&subsys->namespaces); 1888 INIT_LIST_HEAD(&subsys->ctrls); 1889 INIT_LIST_HEAD(&subsys->hosts); 1890 1891 ret = nvmet_debugfs_subsys_setup(subsys); 1892 if (ret) 1893 goto free_subsysnqn; 1894 1895 return subsys; 1896 1897 free_subsysnqn: 1898 kfree(subsys->subsysnqn); 1899 free_fr: 1900 kfree(subsys->firmware_rev); 1901 free_mn: 1902 kfree(subsys->model_number); 1903 free_subsys: 1904 kfree(subsys); 1905 return ERR_PTR(ret); 1906 } 1907 1908 static void nvmet_subsys_free(struct kref *ref) 1909 { 1910 struct nvmet_subsys *subsys = 1911 container_of(ref, struct nvmet_subsys, ref); 1912 1913 WARN_ON_ONCE(!list_empty(&subsys->ctrls)); 1914 WARN_ON_ONCE(!list_empty(&subsys->hosts)); 1915 WARN_ON_ONCE(!xa_empty(&subsys->namespaces)); 1916 1917 nvmet_debugfs_subsys_free(subsys); 1918 1919 xa_destroy(&subsys->namespaces); 1920 nvmet_passthru_subsys_free(subsys); 1921 1922 kfree(subsys->subsysnqn); 1923 kfree(subsys->model_number); 1924 kfree(subsys->firmware_rev); 1925 kfree(subsys); 1926 } 1927 1928 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1929 { 1930 struct nvmet_ctrl *ctrl; 1931 1932 mutex_lock(&subsys->lock); 1933 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1934 ctrl->ops->delete_ctrl(ctrl); 1935 mutex_unlock(&subsys->lock); 1936 } 1937 1938 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1939 { 1940 kref_put(&subsys->ref, nvmet_subsys_free); 1941 } 1942 1943 static int __init nvmet_init(void) 1944 { 1945 int error = -ENOMEM; 1946 1947 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1948 1949 nvmet_bvec_cache = kmem_cache_create("nvmet-bvec", 1950 NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, 1951 SLAB_HWCACHE_ALIGN, NULL); 1952 if (!nvmet_bvec_cache) 1953 return -ENOMEM; 1954 1955 zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM | WQ_PERCPU, 1956 0); 1957 if (!zbd_wq) 1958 goto out_destroy_bvec_cache; 1959 1960 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1961 WQ_MEM_RECLAIM | WQ_PERCPU, 0); 1962 if (!buffered_io_wq) 1963 goto out_free_zbd_work_queue; 1964 1965 nvmet_wq = alloc_workqueue("nvmet-wq", 1966 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); 1967 if (!nvmet_wq) 1968 goto out_free_buffered_work_queue; 1969 1970 nvmet_aen_wq = alloc_workqueue("nvmet-aen-wq", 1971 WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1972 if (!nvmet_aen_wq) 1973 goto out_free_nvmet_work_queue; 1974 1975 error = nvmet_init_debugfs(); 1976 if (error) 1977 goto out_free_nvmet_aen_work_queue; 1978 1979 error = nvmet_init_discovery(); 1980 if (error) 1981 goto out_exit_debugfs; 1982 1983 error = nvmet_init_configfs(); 1984 if (error) 1985 goto out_exit_discovery; 1986 1987 return 0; 1988 1989 out_exit_discovery: 1990 nvmet_exit_discovery(); 1991 out_exit_debugfs: 1992 nvmet_exit_debugfs(); 1993 out_free_nvmet_aen_work_queue: 1994 destroy_workqueue(nvmet_aen_wq); 1995 out_free_nvmet_work_queue: 1996 destroy_workqueue(nvmet_wq); 1997 out_free_buffered_work_queue: 1998 destroy_workqueue(buffered_io_wq); 1999 out_free_zbd_work_queue: 2000 destroy_workqueue(zbd_wq); 2001 out_destroy_bvec_cache: 2002 kmem_cache_destroy(nvmet_bvec_cache); 2003 return error; 2004 } 2005 2006 static void __exit nvmet_exit(void) 2007 { 2008 nvmet_exit_configfs(); 2009 nvmet_exit_discovery(); 2010 nvmet_exit_debugfs(); 2011 ida_destroy(&cntlid_ida); 2012 destroy_workqueue(nvmet_aen_wq); 2013 destroy_workqueue(nvmet_wq); 2014 destroy_workqueue(buffered_io_wq); 2015 destroy_workqueue(zbd_wq); 2016 kmem_cache_destroy(nvmet_bvec_cache); 2017 2018 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 2019 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 2020 } 2021 2022 module_init(nvmet_init); 2023 module_exit(nvmet_exit); 2024 2025 MODULE_DESCRIPTION("NVMe target core framework"); 2026 MODULE_LICENSE("GPL v2"); 2027