1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/hex.h> 8 #include <linux/module.h> 9 #include <linux/random.h> 10 #include <linux/rculist.h> 11 #include <linux/pci-p2pdma.h> 12 #include <linux/scatterlist.h> 13 14 #include <generated/utsrelease.h> 15 16 #define CREATE_TRACE_POINTS 17 #include "trace.h" 18 19 #include "nvmet.h" 20 #include "debugfs.h" 21 22 struct kmem_cache *nvmet_bvec_cache; 23 struct workqueue_struct *buffered_io_wq; 24 struct workqueue_struct *zbd_wq; 25 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 26 static DEFINE_IDA(cntlid_ida); 27 28 struct workqueue_struct *nvmet_wq; 29 EXPORT_SYMBOL_GPL(nvmet_wq); 30 31 /* 32 * This read/write semaphore is used to synchronize access to configuration 33 * information on a target system that will result in discovery log page 34 * information change for at least one host. 35 * The full list of resources to protected by this semaphore is: 36 * 37 * - subsystems list 38 * - per-subsystem allowed hosts list 39 * - allow_any_host subsystem attribute 40 * - nvmet_genctr 41 * - the nvmet_transports array 42 * 43 * When updating any of those lists/structures write lock should be obtained, 44 * while when reading (populating discovery log page or checking host-subsystem 45 * link) read lock is obtained to allow concurrent reads. 46 */ 47 DECLARE_RWSEM(nvmet_config_sem); 48 49 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 50 u64 nvmet_ana_chgcnt; 51 DECLARE_RWSEM(nvmet_ana_sem); 52 53 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 54 { 55 switch (errno) { 56 case 0: 57 return NVME_SC_SUCCESS; 58 case -ENOSPC: 59 req->error_loc = offsetof(struct nvme_rw_command, length); 60 return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR; 61 case -EREMOTEIO: 62 req->error_loc = offsetof(struct nvme_rw_command, slba); 63 return NVME_SC_LBA_RANGE | NVME_STATUS_DNR; 64 case -EOPNOTSUPP: 65 req->error_loc = offsetof(struct nvme_common_command, opcode); 66 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 67 case -ENODATA: 68 req->error_loc = offsetof(struct nvme_rw_command, nsid); 69 return NVME_SC_ACCESS_DENIED; 70 case -EIO: 71 fallthrough; 72 default: 73 req->error_loc = offsetof(struct nvme_common_command, opcode); 74 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 75 } 76 } 77 78 u16 nvmet_report_invalid_opcode(struct nvmet_req *req) 79 { 80 pr_debug("unhandled cmd %d on qid %d\n", req->cmd->common.opcode, 81 req->sq->qid); 82 83 req->error_loc = offsetof(struct nvme_common_command, opcode); 84 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 85 } 86 87 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 88 const char *subsysnqn); 89 90 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 91 size_t len) 92 { 93 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 94 req->error_loc = offsetof(struct nvme_common_command, dptr); 95 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 96 } 97 return 0; 98 } 99 100 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 101 { 102 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 103 req->error_loc = offsetof(struct nvme_common_command, dptr); 104 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 105 } 106 return 0; 107 } 108 109 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 110 { 111 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 112 req->error_loc = offsetof(struct nvme_common_command, dptr); 113 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 114 } 115 return 0; 116 } 117 118 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys) 119 { 120 struct nvmet_ns *cur; 121 unsigned long idx; 122 u32 nsid = 0; 123 124 nvmet_for_each_enabled_ns(&subsys->namespaces, idx, cur) 125 nsid = cur->nsid; 126 127 return nsid; 128 } 129 130 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 131 { 132 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 133 } 134 135 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) 136 { 137 struct nvmet_req *req; 138 139 mutex_lock(&ctrl->lock); 140 while (ctrl->nr_async_event_cmds) { 141 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 142 mutex_unlock(&ctrl->lock); 143 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR); 144 mutex_lock(&ctrl->lock); 145 } 146 mutex_unlock(&ctrl->lock); 147 } 148 149 static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) 150 { 151 struct nvmet_async_event *aen; 152 struct nvmet_req *req; 153 154 mutex_lock(&ctrl->lock); 155 while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { 156 aen = list_first_entry(&ctrl->async_events, 157 struct nvmet_async_event, entry); 158 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 159 nvmet_set_result(req, nvmet_async_event_result(aen)); 160 161 list_del(&aen->entry); 162 kfree(aen); 163 164 mutex_unlock(&ctrl->lock); 165 trace_nvmet_async_event(ctrl, req->cqe->result.u32); 166 nvmet_req_complete(req, 0); 167 mutex_lock(&ctrl->lock); 168 } 169 mutex_unlock(&ctrl->lock); 170 } 171 172 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 173 { 174 struct nvmet_async_event *aen, *tmp; 175 176 mutex_lock(&ctrl->lock); 177 list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { 178 list_del(&aen->entry); 179 kfree(aen); 180 } 181 mutex_unlock(&ctrl->lock); 182 } 183 184 static void nvmet_async_event_work(struct work_struct *work) 185 { 186 struct nvmet_ctrl *ctrl = 187 container_of(work, struct nvmet_ctrl, async_event_work); 188 189 nvmet_async_events_process(ctrl); 190 } 191 192 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 193 u8 event_info, u8 log_page) 194 { 195 struct nvmet_async_event *aen; 196 197 aen = kmalloc_obj(*aen); 198 if (!aen) 199 return; 200 201 aen->event_type = event_type; 202 aen->event_info = event_info; 203 aen->log_page = log_page; 204 205 mutex_lock(&ctrl->lock); 206 list_add_tail(&aen->entry, &ctrl->async_events); 207 mutex_unlock(&ctrl->lock); 208 209 queue_work(nvmet_wq, &ctrl->async_event_work); 210 } 211 212 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 213 { 214 u32 i; 215 216 mutex_lock(&ctrl->lock); 217 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 218 goto out_unlock; 219 220 for (i = 0; i < ctrl->nr_changed_ns; i++) { 221 if (ctrl->changed_ns_list[i] == nsid) 222 goto out_unlock; 223 } 224 225 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 226 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 227 ctrl->nr_changed_ns = U32_MAX; 228 goto out_unlock; 229 } 230 231 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 232 out_unlock: 233 mutex_unlock(&ctrl->lock); 234 } 235 236 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 237 { 238 struct nvmet_ctrl *ctrl; 239 240 lockdep_assert_held(&subsys->lock); 241 242 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 243 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 244 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 245 continue; 246 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 247 NVME_AER_NOTICE_NS_CHANGED, 248 NVME_LOG_CHANGED_NS); 249 } 250 } 251 252 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 253 struct nvmet_port *port) 254 { 255 struct nvmet_ctrl *ctrl; 256 257 mutex_lock(&subsys->lock); 258 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 259 if (port && ctrl->port != port) 260 continue; 261 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 262 continue; 263 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 264 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 265 } 266 mutex_unlock(&subsys->lock); 267 } 268 269 void nvmet_port_send_ana_event(struct nvmet_port *port) 270 { 271 struct nvmet_subsys_link *p; 272 273 down_read(&nvmet_config_sem); 274 list_for_each_entry(p, &port->subsystems, entry) 275 nvmet_send_ana_event(p->subsys, port); 276 up_read(&nvmet_config_sem); 277 } 278 279 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 280 { 281 int ret = 0; 282 283 down_write(&nvmet_config_sem); 284 if (nvmet_transports[ops->type]) 285 ret = -EINVAL; 286 else 287 nvmet_transports[ops->type] = ops; 288 up_write(&nvmet_config_sem); 289 290 return ret; 291 } 292 EXPORT_SYMBOL_GPL(nvmet_register_transport); 293 294 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 295 { 296 down_write(&nvmet_config_sem); 297 nvmet_transports[ops->type] = NULL; 298 up_write(&nvmet_config_sem); 299 } 300 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 301 302 void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys) 303 { 304 struct nvmet_ctrl *ctrl; 305 306 mutex_lock(&subsys->lock); 307 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 308 if (ctrl->port == port) 309 ctrl->ops->delete_ctrl(ctrl); 310 } 311 mutex_unlock(&subsys->lock); 312 } 313 314 int nvmet_enable_port(struct nvmet_port *port) 315 { 316 const struct nvmet_fabrics_ops *ops; 317 int ret; 318 319 lockdep_assert_held(&nvmet_config_sem); 320 321 if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) 322 return -EINVAL; 323 324 ops = nvmet_transports[port->disc_addr.trtype]; 325 if (!ops) { 326 up_write(&nvmet_config_sem); 327 request_module("nvmet-transport-%d", port->disc_addr.trtype); 328 down_write(&nvmet_config_sem); 329 ops = nvmet_transports[port->disc_addr.trtype]; 330 if (!ops) { 331 pr_err("transport type %d not supported\n", 332 port->disc_addr.trtype); 333 return -EINVAL; 334 } 335 } 336 337 if (!try_module_get(ops->owner)) 338 return -EINVAL; 339 340 /* 341 * If the user requested PI support and the transport isn't pi capable, 342 * don't enable the port. 343 */ 344 if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) { 345 pr_err("T10-PI is not supported by transport type %d\n", 346 port->disc_addr.trtype); 347 ret = -EINVAL; 348 goto out_put; 349 } 350 351 ret = ops->add_port(port); 352 if (ret) 353 goto out_put; 354 355 /* If the transport didn't set inline_data_size, then disable it. */ 356 if (port->inline_data_size < 0) 357 port->inline_data_size = 0; 358 359 /* 360 * If the transport didn't set the max_queue_size properly, then clamp 361 * it to the target limits. Also set default values in case the 362 * transport didn't set it at all. 363 */ 364 if (port->max_queue_size < 0) 365 port->max_queue_size = NVMET_MAX_QUEUE_SIZE; 366 else 367 port->max_queue_size = clamp_t(int, port->max_queue_size, 368 NVMET_MIN_QUEUE_SIZE, 369 NVMET_MAX_QUEUE_SIZE); 370 371 /* 372 * If the transport didn't set the mdts properly, then clamp it to the 373 * target limits. Also set default values in case the transport didn't 374 * set it at all. 375 */ 376 if (port->mdts < 0 || port->mdts > NVMET_MAX_MDTS) 377 port->mdts = 0; 378 379 port->enabled = true; 380 port->tr_ops = ops; 381 return 0; 382 383 out_put: 384 module_put(ops->owner); 385 return ret; 386 } 387 388 void nvmet_disable_port(struct nvmet_port *port) 389 { 390 const struct nvmet_fabrics_ops *ops; 391 392 lockdep_assert_held(&nvmet_config_sem); 393 394 port->enabled = false; 395 port->tr_ops = NULL; 396 397 ops = nvmet_transports[port->disc_addr.trtype]; 398 ops->remove_port(port); 399 module_put(ops->owner); 400 } 401 402 static void nvmet_keep_alive_timer(struct work_struct *work) 403 { 404 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 405 struct nvmet_ctrl, ka_work); 406 bool reset_tbkas = ctrl->reset_tbkas; 407 408 ctrl->reset_tbkas = false; 409 if (reset_tbkas) { 410 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 411 ctrl->cntlid); 412 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 413 return; 414 } 415 416 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 417 ctrl->cntlid, ctrl->kato); 418 419 nvmet_ctrl_fatal_error(ctrl); 420 } 421 422 void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 423 { 424 if (unlikely(ctrl->kato == 0)) 425 return; 426 427 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 428 ctrl->cntlid, ctrl->kato); 429 430 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 431 } 432 433 void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 434 { 435 if (unlikely(ctrl->kato == 0)) 436 return; 437 438 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 439 440 cancel_delayed_work_sync(&ctrl->ka_work); 441 } 442 443 u16 nvmet_req_find_ns(struct nvmet_req *req) 444 { 445 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 446 struct nvmet_subsys *subsys = nvmet_req_subsys(req); 447 448 req->ns = xa_load(&subsys->namespaces, nsid); 449 if (unlikely(!req->ns || !req->ns->enabled)) { 450 req->error_loc = offsetof(struct nvme_common_command, nsid); 451 if (!req->ns) /* ns doesn't exist! */ 452 return NVME_SC_INVALID_NS | NVME_STATUS_DNR; 453 454 /* ns exists but it's disabled */ 455 req->ns = NULL; 456 return NVME_SC_INTERNAL_PATH_ERROR; 457 } 458 459 percpu_ref_get(&req->ns->ref); 460 return NVME_SC_SUCCESS; 461 } 462 463 static void nvmet_destroy_namespace(struct percpu_ref *ref) 464 { 465 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 466 467 complete(&ns->disable_done); 468 } 469 470 void nvmet_put_namespace(struct nvmet_ns *ns) 471 { 472 percpu_ref_put(&ns->ref); 473 } 474 475 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 476 { 477 nvmet_bdev_ns_disable(ns); 478 nvmet_file_ns_disable(ns); 479 } 480 481 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 482 { 483 int ret; 484 struct pci_dev *p2p_dev; 485 486 if (!ns->use_p2pmem) 487 return 0; 488 489 if (!ns->bdev) { 490 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 491 return -EINVAL; 492 } 493 494 if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) { 495 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 496 ns->device_path); 497 return -EINVAL; 498 } 499 500 if (ns->p2p_dev) { 501 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 502 if (ret < 0) 503 return -EINVAL; 504 } else { 505 /* 506 * Right now we just check that there is p2pmem available so 507 * we can report an error to the user right away if there 508 * is not. We'll find the actual device to use once we 509 * setup the controller when the port's device is available. 510 */ 511 512 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 513 if (!p2p_dev) { 514 pr_err("no peer-to-peer memory is available for %s\n", 515 ns->device_path); 516 return -EINVAL; 517 } 518 519 pci_dev_put(p2p_dev); 520 } 521 522 return 0; 523 } 524 525 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 526 struct nvmet_ns *ns) 527 { 528 struct device *clients[2]; 529 struct pci_dev *p2p_dev; 530 int ret; 531 532 lockdep_assert_held(&ctrl->subsys->lock); 533 534 if (!ctrl->p2p_client || !ns->use_p2pmem) 535 return; 536 537 if (ns->p2p_dev) { 538 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 539 if (ret < 0) 540 return; 541 542 p2p_dev = pci_dev_get(ns->p2p_dev); 543 } else { 544 clients[0] = ctrl->p2p_client; 545 clients[1] = nvmet_ns_dev(ns); 546 547 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 548 if (!p2p_dev) { 549 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 550 dev_name(ctrl->p2p_client), ns->device_path); 551 return; 552 } 553 } 554 555 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 556 if (ret < 0) 557 pci_dev_put(p2p_dev); 558 559 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 560 ns->nsid); 561 } 562 563 bool nvmet_ns_revalidate(struct nvmet_ns *ns) 564 { 565 loff_t oldsize = ns->size; 566 567 if (ns->bdev) 568 nvmet_bdev_ns_revalidate(ns); 569 else 570 nvmet_file_ns_revalidate(ns); 571 572 return oldsize != ns->size; 573 } 574 575 int nvmet_ns_enable(struct nvmet_ns *ns) 576 { 577 struct nvmet_subsys *subsys = ns->subsys; 578 struct nvmet_ctrl *ctrl; 579 int ret; 580 581 mutex_lock(&subsys->lock); 582 ret = 0; 583 584 if (nvmet_is_passthru_subsys(subsys)) { 585 pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); 586 goto out_unlock; 587 } 588 589 if (ns->enabled) 590 goto out_unlock; 591 592 ret = nvmet_bdev_ns_enable(ns); 593 if (ret == -ENOTBLK) 594 ret = nvmet_file_ns_enable(ns); 595 if (ret) 596 goto out_unlock; 597 598 ret = nvmet_p2pmem_ns_enable(ns); 599 if (ret) 600 goto out_dev_disable; 601 602 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 603 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 604 605 if (ns->pr.enable) { 606 ret = nvmet_pr_init_ns(ns); 607 if (ret) 608 goto out_dev_put; 609 } 610 611 if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) 612 goto out_pr_exit; 613 614 nvmet_ns_changed(subsys, ns->nsid); 615 ns->enabled = true; 616 xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 617 ret = 0; 618 out_unlock: 619 mutex_unlock(&subsys->lock); 620 return ret; 621 out_pr_exit: 622 if (ns->pr.enable) 623 nvmet_pr_exit_ns(ns); 624 out_dev_put: 625 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 626 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 627 out_dev_disable: 628 nvmet_ns_dev_disable(ns); 629 goto out_unlock; 630 } 631 632 void nvmet_ns_disable(struct nvmet_ns *ns) 633 { 634 struct nvmet_subsys *subsys = ns->subsys; 635 struct nvmet_ctrl *ctrl; 636 637 mutex_lock(&subsys->lock); 638 if (!ns->enabled) 639 goto out_unlock; 640 641 ns->enabled = false; 642 xa_clear_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 643 644 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 645 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 646 647 mutex_unlock(&subsys->lock); 648 649 /* 650 * Now that we removed the namespaces from the lookup list, we 651 * can kill the per_cpu ref and wait for any remaining references 652 * to be dropped, as well as a RCU grace period for anyone only 653 * using the namespace under rcu_read_lock(). Note that we can't 654 * use call_rcu here as we need to ensure the namespaces have 655 * been fully destroyed before unloading the module. 656 */ 657 percpu_ref_kill(&ns->ref); 658 synchronize_rcu(); 659 wait_for_completion(&ns->disable_done); 660 percpu_ref_exit(&ns->ref); 661 662 if (ns->pr.enable) 663 nvmet_pr_exit_ns(ns); 664 665 mutex_lock(&subsys->lock); 666 nvmet_ns_changed(subsys, ns->nsid); 667 nvmet_ns_dev_disable(ns); 668 out_unlock: 669 mutex_unlock(&subsys->lock); 670 } 671 672 void nvmet_ns_free(struct nvmet_ns *ns) 673 { 674 struct nvmet_subsys *subsys = ns->subsys; 675 676 nvmet_ns_disable(ns); 677 678 mutex_lock(&subsys->lock); 679 680 xa_erase(&subsys->namespaces, ns->nsid); 681 if (ns->nsid == subsys->max_nsid) 682 subsys->max_nsid = nvmet_max_nsid(subsys); 683 684 subsys->nr_namespaces--; 685 mutex_unlock(&subsys->lock); 686 687 down_write(&nvmet_ana_sem); 688 nvmet_ana_group_enabled[ns->anagrpid]--; 689 up_write(&nvmet_ana_sem); 690 691 kfree(ns->device_path); 692 kfree(ns); 693 } 694 695 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 696 { 697 struct nvmet_ns *ns; 698 699 mutex_lock(&subsys->lock); 700 701 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 702 goto out_unlock; 703 704 ns = kzalloc_obj(*ns); 705 if (!ns) 706 goto out_unlock; 707 708 init_completion(&ns->disable_done); 709 710 ns->nsid = nsid; 711 ns->subsys = subsys; 712 713 if (ns->nsid > subsys->max_nsid) 714 subsys->max_nsid = nsid; 715 716 if (xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL)) 717 goto out_exit; 718 719 subsys->nr_namespaces++; 720 721 mutex_unlock(&subsys->lock); 722 723 down_write(&nvmet_ana_sem); 724 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 725 nvmet_ana_group_enabled[ns->anagrpid]++; 726 up_write(&nvmet_ana_sem); 727 728 uuid_gen(&ns->uuid); 729 ns->buffered_io = false; 730 ns->csi = NVME_CSI_NVM; 731 732 return ns; 733 out_exit: 734 subsys->max_nsid = nvmet_max_nsid(subsys); 735 kfree(ns); 736 out_unlock: 737 mutex_unlock(&subsys->lock); 738 return NULL; 739 } 740 741 static void nvmet_update_sq_head(struct nvmet_req *req) 742 { 743 if (req->sq->size) { 744 u32 old_sqhd, new_sqhd; 745 746 old_sqhd = READ_ONCE(req->sq->sqhd); 747 do { 748 new_sqhd = (old_sqhd + 1) % req->sq->size; 749 } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd)); 750 } 751 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 752 } 753 754 static void nvmet_set_error(struct nvmet_req *req, u16 status) 755 { 756 struct nvmet_ctrl *ctrl = req->sq->ctrl; 757 struct nvme_error_slot *new_error_slot; 758 unsigned long flags; 759 760 req->cqe->status = cpu_to_le16(status << 1); 761 762 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 763 return; 764 765 spin_lock_irqsave(&ctrl->error_lock, flags); 766 ctrl->err_counter++; 767 new_error_slot = 768 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 769 770 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 771 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 772 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 773 new_error_slot->status_field = cpu_to_le16(status << 1); 774 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 775 new_error_slot->lba = cpu_to_le64(req->error_slba); 776 new_error_slot->nsid = req->cmd->common.nsid; 777 spin_unlock_irqrestore(&ctrl->error_lock, flags); 778 779 /* set the more bit for this request */ 780 req->cqe->status |= cpu_to_le16(1 << 14); 781 } 782 783 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 784 { 785 struct nvmet_ns *ns = req->ns; 786 struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref; 787 788 if (!req->sq->sqhd_disabled) 789 nvmet_update_sq_head(req); 790 req->cqe->sq_id = cpu_to_le16(req->sq->qid); 791 req->cqe->command_id = req->cmd->common.command_id; 792 793 if (unlikely(status)) 794 nvmet_set_error(req, status); 795 796 trace_nvmet_req_complete(req); 797 798 req->ops->queue_response(req); 799 800 if (pc_ref) 801 nvmet_pr_put_ns_pc_ref(pc_ref); 802 if (ns) 803 nvmet_put_namespace(ns); 804 } 805 806 void nvmet_req_complete(struct nvmet_req *req, u16 status) 807 { 808 struct nvmet_sq *sq = req->sq; 809 810 __nvmet_req_complete(req, status); 811 percpu_ref_put(&sq->ref); 812 } 813 EXPORT_SYMBOL_GPL(nvmet_req_complete); 814 815 void nvmet_cq_init(struct nvmet_cq *cq) 816 { 817 refcount_set(&cq->ref, 1); 818 } 819 EXPORT_SYMBOL_GPL(nvmet_cq_init); 820 821 bool nvmet_cq_get(struct nvmet_cq *cq) 822 { 823 return refcount_inc_not_zero(&cq->ref); 824 } 825 EXPORT_SYMBOL_GPL(nvmet_cq_get); 826 827 void nvmet_cq_put(struct nvmet_cq *cq) 828 { 829 if (refcount_dec_and_test(&cq->ref)) 830 nvmet_cq_destroy(cq); 831 } 832 EXPORT_SYMBOL_GPL(nvmet_cq_put); 833 834 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 835 u16 qid, u16 size) 836 { 837 cq->qid = qid; 838 cq->size = size; 839 840 ctrl->cqs[qid] = cq; 841 } 842 843 void nvmet_cq_destroy(struct nvmet_cq *cq) 844 { 845 struct nvmet_ctrl *ctrl = cq->ctrl; 846 847 if (ctrl) { 848 ctrl->cqs[cq->qid] = NULL; 849 nvmet_ctrl_put(cq->ctrl); 850 cq->ctrl = NULL; 851 } 852 } 853 854 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 855 u16 qid, u16 size) 856 { 857 sq->sqhd = 0; 858 sq->qid = qid; 859 sq->size = size; 860 861 ctrl->sqs[qid] = sq; 862 } 863 864 static void nvmet_confirm_sq(struct percpu_ref *ref) 865 { 866 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 867 868 complete(&sq->confirm_done); 869 } 870 871 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 872 { 873 if (!ctrl->cqs) 874 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 875 876 if (cqid > ctrl->subsys->max_qid) 877 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 878 879 if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid])) 880 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 881 882 return NVME_SC_SUCCESS; 883 } 884 885 u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 886 { 887 if (!cqid) 888 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 889 return nvmet_check_cqid(ctrl, cqid, create); 890 } 891 892 bool nvmet_cq_in_use(struct nvmet_cq *cq) 893 { 894 return refcount_read(&cq->ref) > 1; 895 } 896 EXPORT_SYMBOL_GPL(nvmet_cq_in_use); 897 898 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 899 u16 qid, u16 size) 900 { 901 u16 status; 902 903 status = nvmet_check_cqid(ctrl, qid, true); 904 if (status != NVME_SC_SUCCESS) 905 return status; 906 907 if (!kref_get_unless_zero(&ctrl->ref)) 908 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 909 cq->ctrl = ctrl; 910 911 nvmet_cq_init(cq); 912 nvmet_cq_setup(ctrl, cq, qid, size); 913 914 return NVME_SC_SUCCESS; 915 } 916 EXPORT_SYMBOL_GPL(nvmet_cq_create); 917 918 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, 919 bool create) 920 { 921 if (!ctrl->sqs) 922 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 923 924 if (sqid > ctrl->subsys->max_qid) 925 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 926 927 if ((create && ctrl->sqs[sqid]) || 928 (!create && !ctrl->sqs[sqid])) 929 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 930 931 return NVME_SC_SUCCESS; 932 } 933 934 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 935 struct nvmet_cq *cq, u16 sqid, u16 size) 936 { 937 u16 status; 938 int ret; 939 940 if (!kref_get_unless_zero(&ctrl->ref)) 941 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 942 943 status = nvmet_check_sqid(ctrl, sqid, true); 944 if (status != NVME_SC_SUCCESS) 945 return status; 946 947 ret = nvmet_sq_init(sq, cq); 948 if (ret) { 949 status = NVME_SC_INTERNAL | NVME_STATUS_DNR; 950 goto ctrl_put; 951 } 952 953 nvmet_sq_setup(ctrl, sq, sqid, size); 954 sq->ctrl = ctrl; 955 956 return NVME_SC_SUCCESS; 957 958 ctrl_put: 959 nvmet_ctrl_put(ctrl); 960 return status; 961 } 962 EXPORT_SYMBOL_GPL(nvmet_sq_create); 963 964 void nvmet_sq_destroy(struct nvmet_sq *sq) 965 { 966 struct nvmet_ctrl *ctrl = sq->ctrl; 967 968 /* 969 * If this is the admin queue, complete all AERs so that our 970 * queue doesn't have outstanding requests on it. 971 */ 972 if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) 973 nvmet_async_events_failall(ctrl); 974 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 975 wait_for_completion(&sq->confirm_done); 976 wait_for_completion(&sq->free_done); 977 percpu_ref_exit(&sq->ref); 978 nvmet_auth_sq_free(sq); 979 nvmet_cq_put(sq->cq); 980 981 /* 982 * we must reference the ctrl again after waiting for inflight IO 983 * to complete. Because admin connect may have sneaked in after we 984 * store sq->ctrl locally, but before we killed the percpu_ref. the 985 * admin connect allocates and assigns sq->ctrl, which now needs a 986 * final ref put, as this ctrl is going away. 987 */ 988 ctrl = sq->ctrl; 989 990 if (ctrl) { 991 /* 992 * The teardown flow may take some time, and the host may not 993 * send us keep-alive during this period, hence reset the 994 * traffic based keep-alive timer so we don't trigger a 995 * controller teardown as a result of a keep-alive expiration. 996 */ 997 ctrl->reset_tbkas = true; 998 sq->ctrl->sqs[sq->qid] = NULL; 999 nvmet_ctrl_put(ctrl); 1000 sq->ctrl = NULL; /* allows reusing the queue later */ 1001 } 1002 } 1003 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 1004 1005 static void nvmet_sq_free(struct percpu_ref *ref) 1006 { 1007 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 1008 1009 complete(&sq->free_done); 1010 } 1011 1012 int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq) 1013 { 1014 int ret; 1015 1016 if (!nvmet_cq_get(cq)) 1017 return -EINVAL; 1018 1019 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 1020 if (ret) { 1021 pr_err("percpu_ref init failed!\n"); 1022 nvmet_cq_put(cq); 1023 return ret; 1024 } 1025 init_completion(&sq->free_done); 1026 init_completion(&sq->confirm_done); 1027 nvmet_auth_sq_init(sq); 1028 sq->cq = cq; 1029 1030 return 0; 1031 } 1032 EXPORT_SYMBOL_GPL(nvmet_sq_init); 1033 1034 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 1035 struct nvmet_ns *ns) 1036 { 1037 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 1038 1039 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 1040 return NVME_SC_ANA_INACCESSIBLE; 1041 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 1042 return NVME_SC_ANA_PERSISTENT_LOSS; 1043 if (unlikely(state == NVME_ANA_CHANGE)) 1044 return NVME_SC_ANA_TRANSITION; 1045 return 0; 1046 } 1047 1048 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 1049 { 1050 if (unlikely(req->ns->readonly)) { 1051 switch (req->cmd->common.opcode) { 1052 case nvme_cmd_read: 1053 case nvme_cmd_flush: 1054 break; 1055 default: 1056 return NVME_SC_NS_WRITE_PROTECTED; 1057 } 1058 } 1059 1060 return 0; 1061 } 1062 1063 static u32 nvmet_io_cmd_transfer_len(struct nvmet_req *req) 1064 { 1065 struct nvme_command *cmd = req->cmd; 1066 u32 metadata_len = 0; 1067 1068 if (nvme_is_fabrics(cmd)) 1069 return nvmet_fabrics_io_cmd_data_len(req); 1070 1071 if (!req->ns) 1072 return 0; 1073 1074 switch (req->cmd->common.opcode) { 1075 case nvme_cmd_read: 1076 case nvme_cmd_write: 1077 case nvme_cmd_zone_append: 1078 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 1079 metadata_len = nvmet_rw_metadata_len(req); 1080 return nvmet_rw_data_len(req) + metadata_len; 1081 case nvme_cmd_dsm: 1082 return nvmet_dsm_len(req); 1083 case nvme_cmd_zone_mgmt_recv: 1084 return (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; 1085 default: 1086 return 0; 1087 } 1088 } 1089 1090 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 1091 { 1092 struct nvme_command *cmd = req->cmd; 1093 u16 ret; 1094 1095 if (nvme_is_fabrics(cmd)) 1096 return nvmet_parse_fabrics_io_cmd(req); 1097 1098 if (unlikely(!nvmet_check_auth_status(req))) 1099 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1100 1101 ret = nvmet_check_ctrl_status(req); 1102 if (unlikely(ret)) 1103 return ret; 1104 1105 if (nvmet_is_passthru_req(req)) 1106 return nvmet_parse_passthru_io_cmd(req); 1107 1108 ret = nvmet_req_find_ns(req); 1109 if (unlikely(ret)) 1110 return ret; 1111 1112 ret = nvmet_check_ana_state(req->port, req->ns); 1113 if (unlikely(ret)) { 1114 req->error_loc = offsetof(struct nvme_common_command, nsid); 1115 return ret; 1116 } 1117 ret = nvmet_io_cmd_check_access(req); 1118 if (unlikely(ret)) { 1119 req->error_loc = offsetof(struct nvme_common_command, nsid); 1120 return ret; 1121 } 1122 1123 if (req->ns->pr.enable) { 1124 ret = nvmet_parse_pr_cmd(req); 1125 if (!ret) 1126 return ret; 1127 } 1128 1129 switch (req->ns->csi) { 1130 case NVME_CSI_NVM: 1131 if (req->ns->file) 1132 ret = nvmet_file_parse_io_cmd(req); 1133 else 1134 ret = nvmet_bdev_parse_io_cmd(req); 1135 break; 1136 case NVME_CSI_ZNS: 1137 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) 1138 ret = nvmet_bdev_zns_parse_io_cmd(req); 1139 else 1140 ret = NVME_SC_INVALID_IO_CMD_SET; 1141 break; 1142 default: 1143 ret = NVME_SC_INVALID_IO_CMD_SET; 1144 } 1145 if (ret) 1146 return ret; 1147 1148 if (req->ns->pr.enable) { 1149 ret = nvmet_pr_check_cmd_access(req); 1150 if (ret) 1151 return ret; 1152 1153 ret = nvmet_pr_get_ns_pc_ref(req); 1154 } 1155 return ret; 1156 } 1157 1158 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, 1159 const struct nvmet_fabrics_ops *ops) 1160 { 1161 u8 flags = req->cmd->common.flags; 1162 u16 status; 1163 1164 req->cq = sq->cq; 1165 req->sq = sq; 1166 req->ops = ops; 1167 req->sg = NULL; 1168 req->metadata_sg = NULL; 1169 req->sg_cnt = 0; 1170 req->metadata_sg_cnt = 0; 1171 req->transfer_len = 0; 1172 req->metadata_len = 0; 1173 req->cqe->result.u64 = 0; 1174 req->cqe->status = 0; 1175 req->cqe->sq_head = 0; 1176 req->ns = NULL; 1177 req->error_loc = NVMET_NO_ERROR_LOC; 1178 req->error_slba = 0; 1179 req->pc_ref = NULL; 1180 1181 /* no support for fused commands yet */ 1182 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 1183 req->error_loc = offsetof(struct nvme_common_command, flags); 1184 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1185 goto fail; 1186 } 1187 1188 /* 1189 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 1190 * contains an address of a single contiguous physical buffer that is 1191 * byte aligned. For PCI controllers, this is optional so not enforced. 1192 */ 1193 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 1194 if (!req->sq->ctrl || !nvmet_is_pci_ctrl(req->sq->ctrl)) { 1195 req->error_loc = 1196 offsetof(struct nvme_common_command, flags); 1197 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1198 goto fail; 1199 } 1200 } 1201 1202 if (unlikely(!req->sq->ctrl)) 1203 /* will return an error for any non-connect command: */ 1204 status = nvmet_parse_connect_cmd(req); 1205 else if (likely(req->sq->qid != 0)) 1206 status = nvmet_parse_io_cmd(req); 1207 else 1208 status = nvmet_parse_admin_cmd(req); 1209 1210 if (status) 1211 goto fail; 1212 1213 trace_nvmet_req_init(req, req->cmd); 1214 1215 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 1216 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1217 goto fail; 1218 } 1219 1220 if (sq->ctrl) 1221 sq->ctrl->reset_tbkas = true; 1222 1223 return true; 1224 1225 fail: 1226 __nvmet_req_complete(req, status); 1227 return false; 1228 } 1229 EXPORT_SYMBOL_GPL(nvmet_req_init); 1230 1231 void nvmet_req_uninit(struct nvmet_req *req) 1232 { 1233 percpu_ref_put(&req->sq->ref); 1234 if (req->pc_ref) 1235 nvmet_pr_put_ns_pc_ref(req->pc_ref); 1236 if (req->ns) 1237 nvmet_put_namespace(req->ns); 1238 } 1239 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 1240 1241 size_t nvmet_req_transfer_len(struct nvmet_req *req) 1242 { 1243 if (likely(req->sq->qid != 0)) 1244 return nvmet_io_cmd_transfer_len(req); 1245 if (unlikely(!req->sq->ctrl)) 1246 return nvmet_connect_cmd_data_len(req); 1247 return nvmet_admin_cmd_data_len(req); 1248 } 1249 EXPORT_SYMBOL_GPL(nvmet_req_transfer_len); 1250 1251 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) 1252 { 1253 if (unlikely(len != req->transfer_len)) { 1254 u16 status; 1255 1256 req->error_loc = offsetof(struct nvme_common_command, dptr); 1257 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1258 status = NVME_SC_SGL_INVALID_DATA; 1259 else 1260 status = NVME_SC_INVALID_FIELD; 1261 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1262 return false; 1263 } 1264 1265 return true; 1266 } 1267 EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); 1268 1269 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) 1270 { 1271 if (unlikely(data_len > req->transfer_len)) { 1272 u16 status; 1273 1274 req->error_loc = offsetof(struct nvme_common_command, dptr); 1275 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1276 status = NVME_SC_SGL_INVALID_DATA; 1277 else 1278 status = NVME_SC_INVALID_FIELD; 1279 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1280 return false; 1281 } 1282 1283 return true; 1284 } 1285 1286 static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) 1287 { 1288 return req->transfer_len - req->metadata_len; 1289 } 1290 1291 static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev, 1292 struct nvmet_req *req) 1293 { 1294 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 1295 nvmet_data_transfer_len(req)); 1296 if (!req->sg) 1297 goto out_err; 1298 1299 if (req->metadata_len) { 1300 req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev, 1301 &req->metadata_sg_cnt, req->metadata_len); 1302 if (!req->metadata_sg) 1303 goto out_free_sg; 1304 } 1305 1306 req->p2p_dev = p2p_dev; 1307 1308 return 0; 1309 out_free_sg: 1310 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1311 out_err: 1312 return -ENOMEM; 1313 } 1314 1315 static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req) 1316 { 1317 if (!IS_ENABLED(CONFIG_PCI_P2PDMA) || 1318 !req->sq->ctrl || !req->sq->qid || !req->ns) 1319 return NULL; 1320 return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid); 1321 } 1322 1323 int nvmet_req_alloc_sgls(struct nvmet_req *req) 1324 { 1325 struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req); 1326 1327 if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req)) 1328 return 0; 1329 1330 req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, 1331 &req->sg_cnt); 1332 if (unlikely(!req->sg)) 1333 goto out; 1334 1335 if (req->metadata_len) { 1336 req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, 1337 &req->metadata_sg_cnt); 1338 if (unlikely(!req->metadata_sg)) 1339 goto out_free; 1340 } 1341 1342 return 0; 1343 out_free: 1344 sgl_free(req->sg); 1345 out: 1346 return -ENOMEM; 1347 } 1348 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); 1349 1350 void nvmet_req_free_sgls(struct nvmet_req *req) 1351 { 1352 if (req->p2p_dev) { 1353 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1354 if (req->metadata_sg) 1355 pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); 1356 req->p2p_dev = NULL; 1357 } else { 1358 sgl_free(req->sg); 1359 if (req->metadata_sg) 1360 sgl_free(req->metadata_sg); 1361 } 1362 1363 req->sg = NULL; 1364 req->metadata_sg = NULL; 1365 req->sg_cnt = 0; 1366 req->metadata_sg_cnt = 0; 1367 } 1368 EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); 1369 1370 static inline bool nvmet_css_supported(u8 cc_css) 1371 { 1372 switch (cc_css << NVME_CC_CSS_SHIFT) { 1373 case NVME_CC_CSS_NVM: 1374 case NVME_CC_CSS_CSI: 1375 return true; 1376 default: 1377 return false; 1378 } 1379 } 1380 1381 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 1382 { 1383 lockdep_assert_held(&ctrl->lock); 1384 1385 /* 1386 * Only I/O controllers should verify iosqes,iocqes. 1387 * Strictly speaking, the spec says a discovery controller 1388 * should verify iosqes,iocqes are zeroed, however that 1389 * would break backwards compatibility, so don't enforce it. 1390 */ 1391 if (!nvmet_is_disc_subsys(ctrl->subsys) && 1392 (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1393 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { 1394 ctrl->csts = NVME_CSTS_CFS; 1395 return; 1396 } 1397 1398 if (nvmet_cc_mps(ctrl->cc) != 0 || 1399 nvmet_cc_ams(ctrl->cc) != 0 || 1400 !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) { 1401 ctrl->csts = NVME_CSTS_CFS; 1402 return; 1403 } 1404 1405 ctrl->csts = NVME_CSTS_RDY; 1406 1407 /* 1408 * Controllers that are not yet enabled should not really enforce the 1409 * keep alive timeout, but we still want to track a timeout and cleanup 1410 * in case a host died before it enabled the controller. Hence, simply 1411 * reset the keep alive timer when the controller is enabled. 1412 */ 1413 if (ctrl->kato) 1414 mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 1415 } 1416 1417 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1418 { 1419 lockdep_assert_held(&ctrl->lock); 1420 1421 /* XXX: tear down queues? */ 1422 ctrl->csts &= ~NVME_CSTS_RDY; 1423 ctrl->cc = 0; 1424 } 1425 1426 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1427 { 1428 u32 old; 1429 1430 mutex_lock(&ctrl->lock); 1431 old = ctrl->cc; 1432 ctrl->cc = new; 1433 1434 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1435 nvmet_start_ctrl(ctrl); 1436 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1437 nvmet_clear_ctrl(ctrl); 1438 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1439 nvmet_clear_ctrl(ctrl); 1440 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1441 } 1442 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1443 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1444 mutex_unlock(&ctrl->lock); 1445 } 1446 EXPORT_SYMBOL_GPL(nvmet_update_cc); 1447 1448 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1449 { 1450 /* command sets supported: NVMe command set: */ 1451 ctrl->cap = (1ULL << 37); 1452 /* Controller supports one or more I/O Command Sets */ 1453 ctrl->cap |= (1ULL << 43); 1454 /* CC.EN timeout in 500msec units: */ 1455 ctrl->cap |= (15ULL << 24); 1456 /* maximum queue entries supported: */ 1457 if (ctrl->ops->get_max_queue_size) 1458 ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl), 1459 ctrl->port->max_queue_size) - 1; 1460 else 1461 ctrl->cap |= ctrl->port->max_queue_size - 1; 1462 1463 if (nvmet_is_passthru_subsys(ctrl->subsys)) 1464 nvmet_passthrough_override_cap(ctrl); 1465 } 1466 1467 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, 1468 const char *hostnqn, u16 cntlid, 1469 struct nvmet_req *req) 1470 { 1471 struct nvmet_ctrl *ctrl = NULL; 1472 struct nvmet_subsys *subsys; 1473 1474 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1475 if (!subsys) { 1476 pr_warn("connect request for invalid subsystem %s!\n", 1477 subsysnqn); 1478 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1479 goto out; 1480 } 1481 1482 mutex_lock(&subsys->lock); 1483 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1484 if (ctrl->cntlid == cntlid) { 1485 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1486 pr_warn("hostnqn mismatch.\n"); 1487 continue; 1488 } 1489 if (!kref_get_unless_zero(&ctrl->ref)) 1490 continue; 1491 1492 /* ctrl found */ 1493 goto found; 1494 } 1495 } 1496 1497 ctrl = NULL; /* ctrl not found */ 1498 pr_warn("could not find controller %d for subsys %s / host %s\n", 1499 cntlid, subsysnqn, hostnqn); 1500 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1501 1502 found: 1503 mutex_unlock(&subsys->lock); 1504 nvmet_subsys_put(subsys); 1505 out: 1506 return ctrl; 1507 } 1508 1509 u16 nvmet_check_ctrl_status(struct nvmet_req *req) 1510 { 1511 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1512 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1513 req->cmd->common.opcode, req->sq->qid); 1514 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1515 } 1516 1517 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1518 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1519 req->cmd->common.opcode, req->sq->qid); 1520 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1521 } 1522 1523 if (unlikely(!nvmet_check_auth_status(req))) { 1524 pr_warn("qid %d not authenticated\n", req->sq->qid); 1525 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1526 } 1527 return 0; 1528 } 1529 1530 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1531 { 1532 struct nvmet_host_link *p; 1533 1534 lockdep_assert_held(&nvmet_config_sem); 1535 1536 if (subsys->allow_any_host) 1537 return true; 1538 1539 if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */ 1540 return true; 1541 1542 list_for_each_entry(p, &subsys->hosts, entry) { 1543 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1544 return true; 1545 } 1546 1547 return false; 1548 } 1549 1550 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1551 struct device *p2p_client) 1552 { 1553 struct nvmet_ns *ns; 1554 unsigned long idx; 1555 1556 lockdep_assert_held(&ctrl->subsys->lock); 1557 1558 if (!p2p_client) 1559 return; 1560 1561 ctrl->p2p_client = get_device(p2p_client); 1562 1563 nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) 1564 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1565 } 1566 1567 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1568 { 1569 struct radix_tree_iter iter; 1570 void __rcu **slot; 1571 1572 lockdep_assert_held(&ctrl->subsys->lock); 1573 1574 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1575 pci_dev_put(radix_tree_deref_slot(slot)); 1576 1577 put_device(ctrl->p2p_client); 1578 } 1579 1580 static void nvmet_fatal_error_handler(struct work_struct *work) 1581 { 1582 struct nvmet_ctrl *ctrl = 1583 container_of(work, struct nvmet_ctrl, fatal_err_work); 1584 1585 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1586 ctrl->ops->delete_ctrl(ctrl); 1587 } 1588 1589 struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) 1590 { 1591 struct nvmet_subsys *subsys; 1592 struct nvmet_ctrl *ctrl; 1593 u32 kato = args->kato; 1594 u8 dhchap_status; 1595 int ret; 1596 1597 args->status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR; 1598 subsys = nvmet_find_get_subsys(args->port, args->subsysnqn); 1599 if (!subsys) { 1600 pr_warn("connect request for invalid subsystem %s!\n", 1601 args->subsysnqn); 1602 args->result = IPO_IATTR_CONNECT_DATA(subsysnqn); 1603 args->error_loc = offsetof(struct nvme_common_command, dptr); 1604 return NULL; 1605 } 1606 1607 down_read(&nvmet_config_sem); 1608 if (!nvmet_host_allowed(subsys, args->hostnqn)) { 1609 pr_info("connect by host %s for subsystem %s not allowed\n", 1610 args->hostnqn, args->subsysnqn); 1611 args->result = IPO_IATTR_CONNECT_DATA(hostnqn); 1612 up_read(&nvmet_config_sem); 1613 args->status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1614 args->error_loc = offsetof(struct nvme_common_command, dptr); 1615 goto out_put_subsystem; 1616 } 1617 up_read(&nvmet_config_sem); 1618 1619 args->status = NVME_SC_INTERNAL; 1620 ctrl = kzalloc_obj(*ctrl); 1621 if (!ctrl) 1622 goto out_put_subsystem; 1623 mutex_init(&ctrl->lock); 1624 1625 ctrl->port = args->port; 1626 ctrl->ops = args->ops; 1627 1628 #ifdef CONFIG_NVME_TARGET_PASSTHRU 1629 /* By default, set loop targets to clear IDS by default */ 1630 if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP) 1631 subsys->clear_ids = 1; 1632 #endif 1633 1634 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1635 INIT_LIST_HEAD(&ctrl->async_events); 1636 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1637 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1638 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 1639 1640 memcpy(ctrl->hostnqn, args->hostnqn, NVMF_NQN_SIZE); 1641 1642 kref_init(&ctrl->ref); 1643 ctrl->subsys = subsys; 1644 ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; 1645 nvmet_init_cap(ctrl); 1646 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1647 1648 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1649 sizeof(__le32), GFP_KERNEL); 1650 if (!ctrl->changed_ns_list) 1651 goto out_free_ctrl; 1652 1653 ctrl->sqs = kzalloc_objs(struct nvmet_sq *, subsys->max_qid + 1); 1654 if (!ctrl->sqs) 1655 goto out_free_changed_ns_list; 1656 1657 ctrl->cqs = kzalloc_objs(struct nvmet_cq *, subsys->max_qid + 1); 1658 if (!ctrl->cqs) 1659 goto out_free_sqs; 1660 1661 ret = ida_alloc_range(&cntlid_ida, 1662 subsys->cntlid_min, subsys->cntlid_max, 1663 GFP_KERNEL); 1664 if (ret < 0) { 1665 args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 1666 goto out_free_cqs; 1667 } 1668 ctrl->cntlid = ret; 1669 1670 /* 1671 * Discovery controllers may use some arbitrary high value 1672 * in order to cleanup stale discovery sessions 1673 */ 1674 if (nvmet_is_disc_subsys(ctrl->subsys) && !kato) 1675 kato = NVMET_DISC_KATO_MS; 1676 1677 /* keep-alive timeout in seconds */ 1678 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1679 1680 ctrl->err_counter = 0; 1681 spin_lock_init(&ctrl->error_lock); 1682 1683 nvmet_start_keep_alive_timer(ctrl); 1684 1685 mutex_lock(&subsys->lock); 1686 ret = nvmet_ctrl_init_pr(ctrl); 1687 if (ret) 1688 goto init_pr_fail; 1689 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1690 nvmet_setup_p2p_ns_map(ctrl, args->p2p_client); 1691 nvmet_debugfs_ctrl_setup(ctrl); 1692 mutex_unlock(&subsys->lock); 1693 1694 if (args->hostid) 1695 uuid_copy(&ctrl->hostid, args->hostid); 1696 1697 dhchap_status = nvmet_setup_auth(ctrl, args->sq, false); 1698 if (dhchap_status) { 1699 pr_err("Failed to setup authentication, dhchap status %u\n", 1700 dhchap_status); 1701 nvmet_ctrl_put(ctrl); 1702 if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED) 1703 args->status = 1704 NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1705 else 1706 args->status = NVME_SC_INTERNAL; 1707 return NULL; 1708 } 1709 1710 args->status = NVME_SC_SUCCESS; 1711 1712 pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n", 1713 nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", 1714 ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, 1715 ctrl->pi_support ? " T10-PI is enabled" : "", 1716 nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "", 1717 nvmet_queue_tls_keyid(args->sq) ? ", TLS" : ""); 1718 1719 return ctrl; 1720 1721 init_pr_fail: 1722 mutex_unlock(&subsys->lock); 1723 nvmet_stop_keep_alive_timer(ctrl); 1724 ida_free(&cntlid_ida, ctrl->cntlid); 1725 out_free_cqs: 1726 kfree(ctrl->cqs); 1727 out_free_sqs: 1728 kfree(ctrl->sqs); 1729 out_free_changed_ns_list: 1730 kfree(ctrl->changed_ns_list); 1731 out_free_ctrl: 1732 kfree(ctrl); 1733 out_put_subsystem: 1734 nvmet_subsys_put(subsys); 1735 return NULL; 1736 } 1737 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl); 1738 1739 static void nvmet_ctrl_free(struct kref *ref) 1740 { 1741 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1742 struct nvmet_subsys *subsys = ctrl->subsys; 1743 1744 mutex_lock(&subsys->lock); 1745 nvmet_ctrl_destroy_pr(ctrl); 1746 nvmet_release_p2p_ns_map(ctrl); 1747 list_del(&ctrl->subsys_entry); 1748 mutex_unlock(&subsys->lock); 1749 1750 nvmet_stop_keep_alive_timer(ctrl); 1751 1752 cancel_work_sync(&ctrl->async_event_work); 1753 cancel_work_sync(&ctrl->fatal_err_work); 1754 1755 nvmet_destroy_auth(ctrl); 1756 1757 nvmet_debugfs_ctrl_free(ctrl); 1758 1759 ida_free(&cntlid_ida, ctrl->cntlid); 1760 1761 nvmet_async_events_free(ctrl); 1762 kfree(ctrl->sqs); 1763 kfree(ctrl->cqs); 1764 kfree(ctrl->changed_ns_list); 1765 kfree(ctrl); 1766 1767 nvmet_subsys_put(subsys); 1768 } 1769 1770 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1771 { 1772 kref_put(&ctrl->ref, nvmet_ctrl_free); 1773 } 1774 EXPORT_SYMBOL_GPL(nvmet_ctrl_put); 1775 1776 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1777 { 1778 mutex_lock(&ctrl->lock); 1779 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1780 ctrl->csts |= NVME_CSTS_CFS; 1781 queue_work(nvmet_wq, &ctrl->fatal_err_work); 1782 } 1783 mutex_unlock(&ctrl->lock); 1784 } 1785 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1786 1787 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl, 1788 char *traddr, size_t traddr_len) 1789 { 1790 if (!ctrl->ops->host_traddr) 1791 return -EOPNOTSUPP; 1792 return ctrl->ops->host_traddr(ctrl, traddr, traddr_len); 1793 } 1794 1795 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1796 const char *subsysnqn) 1797 { 1798 struct nvmet_subsys_link *p; 1799 1800 if (!port) 1801 return NULL; 1802 1803 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1804 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1805 return NULL; 1806 return nvmet_disc_subsys; 1807 } 1808 1809 down_read(&nvmet_config_sem); 1810 if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn, 1811 NVMF_NQN_SIZE)) { 1812 if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) { 1813 up_read(&nvmet_config_sem); 1814 return nvmet_disc_subsys; 1815 } 1816 } 1817 list_for_each_entry(p, &port->subsystems, entry) { 1818 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1819 NVMF_NQN_SIZE)) { 1820 if (!kref_get_unless_zero(&p->subsys->ref)) 1821 break; 1822 up_read(&nvmet_config_sem); 1823 return p->subsys; 1824 } 1825 } 1826 up_read(&nvmet_config_sem); 1827 return NULL; 1828 } 1829 1830 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1831 enum nvme_subsys_type type) 1832 { 1833 struct nvmet_subsys *subsys; 1834 char serial[NVMET_SN_MAX_SIZE / 2]; 1835 int ret; 1836 1837 subsys = kzalloc_obj(*subsys); 1838 if (!subsys) 1839 return ERR_PTR(-ENOMEM); 1840 1841 subsys->ver = NVMET_DEFAULT_VS; 1842 /* generate a random serial number as our controllers are ephemeral: */ 1843 get_random_bytes(&serial, sizeof(serial)); 1844 bin2hex(subsys->serial, &serial, sizeof(serial)); 1845 1846 subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); 1847 if (!subsys->model_number) { 1848 ret = -ENOMEM; 1849 goto free_subsys; 1850 } 1851 1852 subsys->ieee_oui = 0; 1853 1854 subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL); 1855 if (!subsys->firmware_rev) { 1856 ret = -ENOMEM; 1857 goto free_mn; 1858 } 1859 1860 switch (type) { 1861 case NVME_NQN_NVME: 1862 subsys->max_qid = NVMET_NR_QUEUES; 1863 break; 1864 case NVME_NQN_DISC: 1865 case NVME_NQN_CURR: 1866 subsys->max_qid = 0; 1867 break; 1868 default: 1869 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1870 ret = -EINVAL; 1871 goto free_fr; 1872 } 1873 subsys->type = type; 1874 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1875 GFP_KERNEL); 1876 if (!subsys->subsysnqn) { 1877 ret = -ENOMEM; 1878 goto free_fr; 1879 } 1880 subsys->cntlid_min = NVME_CNTLID_MIN; 1881 subsys->cntlid_max = NVME_CNTLID_MAX; 1882 kref_init(&subsys->ref); 1883 1884 mutex_init(&subsys->lock); 1885 xa_init(&subsys->namespaces); 1886 INIT_LIST_HEAD(&subsys->ctrls); 1887 INIT_LIST_HEAD(&subsys->hosts); 1888 1889 ret = nvmet_debugfs_subsys_setup(subsys); 1890 if (ret) 1891 goto free_subsysnqn; 1892 1893 return subsys; 1894 1895 free_subsysnqn: 1896 kfree(subsys->subsysnqn); 1897 free_fr: 1898 kfree(subsys->firmware_rev); 1899 free_mn: 1900 kfree(subsys->model_number); 1901 free_subsys: 1902 kfree(subsys); 1903 return ERR_PTR(ret); 1904 } 1905 1906 static void nvmet_subsys_free(struct kref *ref) 1907 { 1908 struct nvmet_subsys *subsys = 1909 container_of(ref, struct nvmet_subsys, ref); 1910 1911 WARN_ON_ONCE(!list_empty(&subsys->ctrls)); 1912 WARN_ON_ONCE(!list_empty(&subsys->hosts)); 1913 WARN_ON_ONCE(!xa_empty(&subsys->namespaces)); 1914 1915 nvmet_debugfs_subsys_free(subsys); 1916 1917 xa_destroy(&subsys->namespaces); 1918 nvmet_passthru_subsys_free(subsys); 1919 1920 kfree(subsys->subsysnqn); 1921 kfree(subsys->model_number); 1922 kfree(subsys->firmware_rev); 1923 kfree(subsys); 1924 } 1925 1926 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1927 { 1928 struct nvmet_ctrl *ctrl; 1929 1930 mutex_lock(&subsys->lock); 1931 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1932 ctrl->ops->delete_ctrl(ctrl); 1933 mutex_unlock(&subsys->lock); 1934 } 1935 1936 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1937 { 1938 kref_put(&subsys->ref, nvmet_subsys_free); 1939 } 1940 1941 static int __init nvmet_init(void) 1942 { 1943 int error = -ENOMEM; 1944 1945 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1946 1947 nvmet_bvec_cache = kmem_cache_create("nvmet-bvec", 1948 NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, 1949 SLAB_HWCACHE_ALIGN, NULL); 1950 if (!nvmet_bvec_cache) 1951 return -ENOMEM; 1952 1953 zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM | WQ_PERCPU, 1954 0); 1955 if (!zbd_wq) 1956 goto out_destroy_bvec_cache; 1957 1958 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1959 WQ_MEM_RECLAIM | WQ_PERCPU, 0); 1960 if (!buffered_io_wq) 1961 goto out_free_zbd_work_queue; 1962 1963 nvmet_wq = alloc_workqueue("nvmet-wq", 1964 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); 1965 if (!nvmet_wq) 1966 goto out_free_buffered_work_queue; 1967 1968 error = nvmet_init_debugfs(); 1969 if (error) 1970 goto out_free_nvmet_work_queue; 1971 1972 error = nvmet_init_discovery(); 1973 if (error) 1974 goto out_exit_debugfs; 1975 1976 error = nvmet_init_configfs(); 1977 if (error) 1978 goto out_exit_discovery; 1979 1980 return 0; 1981 1982 out_exit_discovery: 1983 nvmet_exit_discovery(); 1984 out_exit_debugfs: 1985 nvmet_exit_debugfs(); 1986 out_free_nvmet_work_queue: 1987 destroy_workqueue(nvmet_wq); 1988 out_free_buffered_work_queue: 1989 destroy_workqueue(buffered_io_wq); 1990 out_free_zbd_work_queue: 1991 destroy_workqueue(zbd_wq); 1992 out_destroy_bvec_cache: 1993 kmem_cache_destroy(nvmet_bvec_cache); 1994 return error; 1995 } 1996 1997 static void __exit nvmet_exit(void) 1998 { 1999 nvmet_exit_configfs(); 2000 nvmet_exit_discovery(); 2001 nvmet_exit_debugfs(); 2002 ida_destroy(&cntlid_ida); 2003 destroy_workqueue(nvmet_wq); 2004 destroy_workqueue(buffered_io_wq); 2005 destroy_workqueue(zbd_wq); 2006 kmem_cache_destroy(nvmet_bvec_cache); 2007 2008 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 2009 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 2010 } 2011 2012 module_init(nvmet_init); 2013 module_exit(nvmet_exit); 2014 2015 MODULE_DESCRIPTION("NVMe target core framework"); 2016 MODULE_LICENSE("GPL v2"); 2017