1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/hex.h> 8 #include <linux/module.h> 9 #include <linux/random.h> 10 #include <linux/rculist.h> 11 #include <linux/pci-p2pdma.h> 12 #include <linux/scatterlist.h> 13 14 #include <generated/utsrelease.h> 15 16 #define CREATE_TRACE_POINTS 17 #include "trace.h" 18 19 #include "nvmet.h" 20 #include "debugfs.h" 21 22 struct kmem_cache *nvmet_bvec_cache; 23 struct workqueue_struct *buffered_io_wq; 24 struct workqueue_struct *zbd_wq; 25 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 26 static DEFINE_IDA(cntlid_ida); 27 28 struct workqueue_struct *nvmet_wq; 29 EXPORT_SYMBOL_GPL(nvmet_wq); 30 31 /* 32 * This read/write semaphore is used to synchronize access to configuration 33 * information on a target system that will result in discovery log page 34 * information change for at least one host. 35 * The full list of resources to protected by this semaphore is: 36 * 37 * - subsystems list 38 * - per-subsystem allowed hosts list 39 * - allow_any_host subsystem attribute 40 * - nvmet_genctr 41 * - the nvmet_transports array 42 * 43 * When updating any of those lists/structures write lock should be obtained, 44 * while when reading (populating discovery log page or checking host-subsystem 45 * link) read lock is obtained to allow concurrent reads. 46 */ 47 DECLARE_RWSEM(nvmet_config_sem); 48 49 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 50 u64 nvmet_ana_chgcnt; 51 DECLARE_RWSEM(nvmet_ana_sem); 52 53 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 54 { 55 switch (errno) { 56 case 0: 57 return NVME_SC_SUCCESS; 58 case -ENOSPC: 59 req->error_loc = offsetof(struct nvme_rw_command, length); 60 return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR; 61 case -EREMOTEIO: 62 req->error_loc = offsetof(struct nvme_rw_command, slba); 63 return NVME_SC_LBA_RANGE | NVME_STATUS_DNR; 64 case -EOPNOTSUPP: 65 req->error_loc = offsetof(struct nvme_common_command, opcode); 66 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 67 case -ENODATA: 68 req->error_loc = offsetof(struct nvme_rw_command, nsid); 69 return NVME_SC_ACCESS_DENIED; 70 case -EIO: 71 fallthrough; 72 default: 73 req->error_loc = offsetof(struct nvme_common_command, opcode); 74 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 75 } 76 } 77 78 u16 nvmet_report_invalid_opcode(struct nvmet_req *req) 79 { 80 pr_debug("unhandled cmd %d on qid %d\n", req->cmd->common.opcode, 81 req->sq->qid); 82 83 req->error_loc = offsetof(struct nvme_common_command, opcode); 84 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 85 } 86 87 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 88 const char *subsysnqn); 89 90 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 91 size_t len) 92 { 93 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 94 req->error_loc = offsetof(struct nvme_common_command, dptr); 95 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 96 } 97 return 0; 98 } 99 100 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 101 { 102 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 103 req->error_loc = offsetof(struct nvme_common_command, dptr); 104 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 105 } 106 return 0; 107 } 108 109 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 110 { 111 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 112 req->error_loc = offsetof(struct nvme_common_command, dptr); 113 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 114 } 115 return 0; 116 } 117 118 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys) 119 { 120 struct nvmet_ns *cur; 121 unsigned long idx; 122 u32 nsid = 0; 123 124 nvmet_for_each_enabled_ns(&subsys->namespaces, idx, cur) 125 nsid = cur->nsid; 126 127 return nsid; 128 } 129 130 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 131 { 132 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 133 } 134 135 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) 136 { 137 struct nvmet_req *req; 138 139 mutex_lock(&ctrl->lock); 140 while (ctrl->nr_async_event_cmds) { 141 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 142 mutex_unlock(&ctrl->lock); 143 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR); 144 mutex_lock(&ctrl->lock); 145 } 146 mutex_unlock(&ctrl->lock); 147 } 148 149 static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) 150 { 151 struct nvmet_async_event *aen; 152 struct nvmet_req *req; 153 154 mutex_lock(&ctrl->lock); 155 while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { 156 aen = list_first_entry(&ctrl->async_events, 157 struct nvmet_async_event, entry); 158 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 159 nvmet_set_result(req, nvmet_async_event_result(aen)); 160 161 list_del(&aen->entry); 162 kfree(aen); 163 164 mutex_unlock(&ctrl->lock); 165 trace_nvmet_async_event(ctrl, req->cqe->result.u32); 166 nvmet_req_complete(req, 0); 167 mutex_lock(&ctrl->lock); 168 } 169 mutex_unlock(&ctrl->lock); 170 } 171 172 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 173 { 174 struct nvmet_async_event *aen, *tmp; 175 176 mutex_lock(&ctrl->lock); 177 list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { 178 list_del(&aen->entry); 179 kfree(aen); 180 } 181 mutex_unlock(&ctrl->lock); 182 } 183 184 static void nvmet_async_event_work(struct work_struct *work) 185 { 186 struct nvmet_ctrl *ctrl = 187 container_of(work, struct nvmet_ctrl, async_event_work); 188 189 nvmet_async_events_process(ctrl); 190 } 191 192 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 193 u8 event_info, u8 log_page) 194 { 195 struct nvmet_async_event *aen; 196 197 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 198 if (!aen) 199 return; 200 201 aen->event_type = event_type; 202 aen->event_info = event_info; 203 aen->log_page = log_page; 204 205 mutex_lock(&ctrl->lock); 206 list_add_tail(&aen->entry, &ctrl->async_events); 207 mutex_unlock(&ctrl->lock); 208 209 queue_work(nvmet_wq, &ctrl->async_event_work); 210 } 211 212 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 213 { 214 u32 i; 215 216 mutex_lock(&ctrl->lock); 217 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 218 goto out_unlock; 219 220 for (i = 0; i < ctrl->nr_changed_ns; i++) { 221 if (ctrl->changed_ns_list[i] == nsid) 222 goto out_unlock; 223 } 224 225 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 226 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 227 ctrl->nr_changed_ns = U32_MAX; 228 goto out_unlock; 229 } 230 231 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 232 out_unlock: 233 mutex_unlock(&ctrl->lock); 234 } 235 236 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 237 { 238 struct nvmet_ctrl *ctrl; 239 240 lockdep_assert_held(&subsys->lock); 241 242 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 243 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 244 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 245 continue; 246 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 247 NVME_AER_NOTICE_NS_CHANGED, 248 NVME_LOG_CHANGED_NS); 249 } 250 } 251 252 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 253 struct nvmet_port *port) 254 { 255 struct nvmet_ctrl *ctrl; 256 257 mutex_lock(&subsys->lock); 258 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 259 if (port && ctrl->port != port) 260 continue; 261 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 262 continue; 263 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 264 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 265 } 266 mutex_unlock(&subsys->lock); 267 } 268 269 void nvmet_port_send_ana_event(struct nvmet_port *port) 270 { 271 struct nvmet_subsys_link *p; 272 273 down_read(&nvmet_config_sem); 274 list_for_each_entry(p, &port->subsystems, entry) 275 nvmet_send_ana_event(p->subsys, port); 276 up_read(&nvmet_config_sem); 277 } 278 279 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 280 { 281 int ret = 0; 282 283 down_write(&nvmet_config_sem); 284 if (nvmet_transports[ops->type]) 285 ret = -EINVAL; 286 else 287 nvmet_transports[ops->type] = ops; 288 up_write(&nvmet_config_sem); 289 290 return ret; 291 } 292 EXPORT_SYMBOL_GPL(nvmet_register_transport); 293 294 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 295 { 296 down_write(&nvmet_config_sem); 297 nvmet_transports[ops->type] = NULL; 298 up_write(&nvmet_config_sem); 299 } 300 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 301 302 void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys) 303 { 304 struct nvmet_ctrl *ctrl; 305 306 mutex_lock(&subsys->lock); 307 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 308 if (ctrl->port == port) 309 ctrl->ops->delete_ctrl(ctrl); 310 } 311 mutex_unlock(&subsys->lock); 312 } 313 314 int nvmet_enable_port(struct nvmet_port *port) 315 { 316 const struct nvmet_fabrics_ops *ops; 317 int ret; 318 319 lockdep_assert_held(&nvmet_config_sem); 320 321 if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) 322 return -EINVAL; 323 324 ops = nvmet_transports[port->disc_addr.trtype]; 325 if (!ops) { 326 up_write(&nvmet_config_sem); 327 request_module("nvmet-transport-%d", port->disc_addr.trtype); 328 down_write(&nvmet_config_sem); 329 ops = nvmet_transports[port->disc_addr.trtype]; 330 if (!ops) { 331 pr_err("transport type %d not supported\n", 332 port->disc_addr.trtype); 333 return -EINVAL; 334 } 335 } 336 337 if (!try_module_get(ops->owner)) 338 return -EINVAL; 339 340 /* 341 * If the user requested PI support and the transport isn't pi capable, 342 * don't enable the port. 343 */ 344 if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) { 345 pr_err("T10-PI is not supported by transport type %d\n", 346 port->disc_addr.trtype); 347 ret = -EINVAL; 348 goto out_put; 349 } 350 351 ret = ops->add_port(port); 352 if (ret) 353 goto out_put; 354 355 /* If the transport didn't set inline_data_size, then disable it. */ 356 if (port->inline_data_size < 0) 357 port->inline_data_size = 0; 358 359 /* 360 * If the transport didn't set the max_queue_size properly, then clamp 361 * it to the target limits. Also set default values in case the 362 * transport didn't set it at all. 363 */ 364 if (port->max_queue_size < 0) 365 port->max_queue_size = NVMET_MAX_QUEUE_SIZE; 366 else 367 port->max_queue_size = clamp_t(int, port->max_queue_size, 368 NVMET_MIN_QUEUE_SIZE, 369 NVMET_MAX_QUEUE_SIZE); 370 371 port->enabled = true; 372 port->tr_ops = ops; 373 return 0; 374 375 out_put: 376 module_put(ops->owner); 377 return ret; 378 } 379 380 void nvmet_disable_port(struct nvmet_port *port) 381 { 382 const struct nvmet_fabrics_ops *ops; 383 384 lockdep_assert_held(&nvmet_config_sem); 385 386 port->enabled = false; 387 port->tr_ops = NULL; 388 389 ops = nvmet_transports[port->disc_addr.trtype]; 390 ops->remove_port(port); 391 module_put(ops->owner); 392 } 393 394 static void nvmet_keep_alive_timer(struct work_struct *work) 395 { 396 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 397 struct nvmet_ctrl, ka_work); 398 bool reset_tbkas = ctrl->reset_tbkas; 399 400 ctrl->reset_tbkas = false; 401 if (reset_tbkas) { 402 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 403 ctrl->cntlid); 404 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 405 return; 406 } 407 408 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 409 ctrl->cntlid, ctrl->kato); 410 411 nvmet_ctrl_fatal_error(ctrl); 412 } 413 414 void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 415 { 416 if (unlikely(ctrl->kato == 0)) 417 return; 418 419 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 420 ctrl->cntlid, ctrl->kato); 421 422 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 423 } 424 425 void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 426 { 427 if (unlikely(ctrl->kato == 0)) 428 return; 429 430 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 431 432 cancel_delayed_work_sync(&ctrl->ka_work); 433 } 434 435 u16 nvmet_req_find_ns(struct nvmet_req *req) 436 { 437 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 438 struct nvmet_subsys *subsys = nvmet_req_subsys(req); 439 440 req->ns = xa_load(&subsys->namespaces, nsid); 441 if (unlikely(!req->ns || !req->ns->enabled)) { 442 req->error_loc = offsetof(struct nvme_common_command, nsid); 443 if (!req->ns) /* ns doesn't exist! */ 444 return NVME_SC_INVALID_NS | NVME_STATUS_DNR; 445 446 /* ns exists but it's disabled */ 447 req->ns = NULL; 448 return NVME_SC_INTERNAL_PATH_ERROR; 449 } 450 451 percpu_ref_get(&req->ns->ref); 452 return NVME_SC_SUCCESS; 453 } 454 455 static void nvmet_destroy_namespace(struct percpu_ref *ref) 456 { 457 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 458 459 complete(&ns->disable_done); 460 } 461 462 void nvmet_put_namespace(struct nvmet_ns *ns) 463 { 464 percpu_ref_put(&ns->ref); 465 } 466 467 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 468 { 469 nvmet_bdev_ns_disable(ns); 470 nvmet_file_ns_disable(ns); 471 } 472 473 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 474 { 475 int ret; 476 struct pci_dev *p2p_dev; 477 478 if (!ns->use_p2pmem) 479 return 0; 480 481 if (!ns->bdev) { 482 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 483 return -EINVAL; 484 } 485 486 if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) { 487 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 488 ns->device_path); 489 return -EINVAL; 490 } 491 492 if (ns->p2p_dev) { 493 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 494 if (ret < 0) 495 return -EINVAL; 496 } else { 497 /* 498 * Right now we just check that there is p2pmem available so 499 * we can report an error to the user right away if there 500 * is not. We'll find the actual device to use once we 501 * setup the controller when the port's device is available. 502 */ 503 504 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 505 if (!p2p_dev) { 506 pr_err("no peer-to-peer memory is available for %s\n", 507 ns->device_path); 508 return -EINVAL; 509 } 510 511 pci_dev_put(p2p_dev); 512 } 513 514 return 0; 515 } 516 517 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 518 struct nvmet_ns *ns) 519 { 520 struct device *clients[2]; 521 struct pci_dev *p2p_dev; 522 int ret; 523 524 lockdep_assert_held(&ctrl->subsys->lock); 525 526 if (!ctrl->p2p_client || !ns->use_p2pmem) 527 return; 528 529 if (ns->p2p_dev) { 530 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 531 if (ret < 0) 532 return; 533 534 p2p_dev = pci_dev_get(ns->p2p_dev); 535 } else { 536 clients[0] = ctrl->p2p_client; 537 clients[1] = nvmet_ns_dev(ns); 538 539 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 540 if (!p2p_dev) { 541 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 542 dev_name(ctrl->p2p_client), ns->device_path); 543 return; 544 } 545 } 546 547 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 548 if (ret < 0) 549 pci_dev_put(p2p_dev); 550 551 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 552 ns->nsid); 553 } 554 555 bool nvmet_ns_revalidate(struct nvmet_ns *ns) 556 { 557 loff_t oldsize = ns->size; 558 559 if (ns->bdev) 560 nvmet_bdev_ns_revalidate(ns); 561 else 562 nvmet_file_ns_revalidate(ns); 563 564 return oldsize != ns->size; 565 } 566 567 int nvmet_ns_enable(struct nvmet_ns *ns) 568 { 569 struct nvmet_subsys *subsys = ns->subsys; 570 struct nvmet_ctrl *ctrl; 571 int ret; 572 573 mutex_lock(&subsys->lock); 574 ret = 0; 575 576 if (nvmet_is_passthru_subsys(subsys)) { 577 pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); 578 goto out_unlock; 579 } 580 581 if (ns->enabled) 582 goto out_unlock; 583 584 ret = nvmet_bdev_ns_enable(ns); 585 if (ret == -ENOTBLK) 586 ret = nvmet_file_ns_enable(ns); 587 if (ret) 588 goto out_unlock; 589 590 ret = nvmet_p2pmem_ns_enable(ns); 591 if (ret) 592 goto out_dev_disable; 593 594 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 595 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 596 597 if (ns->pr.enable) { 598 ret = nvmet_pr_init_ns(ns); 599 if (ret) 600 goto out_dev_put; 601 } 602 603 if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) 604 goto out_pr_exit; 605 606 nvmet_ns_changed(subsys, ns->nsid); 607 ns->enabled = true; 608 xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 609 ret = 0; 610 out_unlock: 611 mutex_unlock(&subsys->lock); 612 return ret; 613 out_pr_exit: 614 if (ns->pr.enable) 615 nvmet_pr_exit_ns(ns); 616 out_dev_put: 617 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 618 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 619 out_dev_disable: 620 nvmet_ns_dev_disable(ns); 621 goto out_unlock; 622 } 623 624 void nvmet_ns_disable(struct nvmet_ns *ns) 625 { 626 struct nvmet_subsys *subsys = ns->subsys; 627 struct nvmet_ctrl *ctrl; 628 629 mutex_lock(&subsys->lock); 630 if (!ns->enabled) 631 goto out_unlock; 632 633 ns->enabled = false; 634 xa_clear_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 635 636 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 637 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 638 639 mutex_unlock(&subsys->lock); 640 641 /* 642 * Now that we removed the namespaces from the lookup list, we 643 * can kill the per_cpu ref and wait for any remaining references 644 * to be dropped, as well as a RCU grace period for anyone only 645 * using the namespace under rcu_read_lock(). Note that we can't 646 * use call_rcu here as we need to ensure the namespaces have 647 * been fully destroyed before unloading the module. 648 */ 649 percpu_ref_kill(&ns->ref); 650 synchronize_rcu(); 651 wait_for_completion(&ns->disable_done); 652 percpu_ref_exit(&ns->ref); 653 654 if (ns->pr.enable) 655 nvmet_pr_exit_ns(ns); 656 657 mutex_lock(&subsys->lock); 658 nvmet_ns_changed(subsys, ns->nsid); 659 nvmet_ns_dev_disable(ns); 660 out_unlock: 661 mutex_unlock(&subsys->lock); 662 } 663 664 void nvmet_ns_free(struct nvmet_ns *ns) 665 { 666 struct nvmet_subsys *subsys = ns->subsys; 667 668 nvmet_ns_disable(ns); 669 670 mutex_lock(&subsys->lock); 671 672 xa_erase(&subsys->namespaces, ns->nsid); 673 if (ns->nsid == subsys->max_nsid) 674 subsys->max_nsid = nvmet_max_nsid(subsys); 675 676 subsys->nr_namespaces--; 677 mutex_unlock(&subsys->lock); 678 679 down_write(&nvmet_ana_sem); 680 nvmet_ana_group_enabled[ns->anagrpid]--; 681 up_write(&nvmet_ana_sem); 682 683 kfree(ns->device_path); 684 kfree(ns); 685 } 686 687 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 688 { 689 struct nvmet_ns *ns; 690 691 mutex_lock(&subsys->lock); 692 693 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 694 goto out_unlock; 695 696 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 697 if (!ns) 698 goto out_unlock; 699 700 init_completion(&ns->disable_done); 701 702 ns->nsid = nsid; 703 ns->subsys = subsys; 704 705 if (ns->nsid > subsys->max_nsid) 706 subsys->max_nsid = nsid; 707 708 if (xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL)) 709 goto out_exit; 710 711 subsys->nr_namespaces++; 712 713 mutex_unlock(&subsys->lock); 714 715 down_write(&nvmet_ana_sem); 716 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 717 nvmet_ana_group_enabled[ns->anagrpid]++; 718 up_write(&nvmet_ana_sem); 719 720 uuid_gen(&ns->uuid); 721 ns->buffered_io = false; 722 ns->csi = NVME_CSI_NVM; 723 724 return ns; 725 out_exit: 726 subsys->max_nsid = nvmet_max_nsid(subsys); 727 kfree(ns); 728 out_unlock: 729 mutex_unlock(&subsys->lock); 730 return NULL; 731 } 732 733 static void nvmet_update_sq_head(struct nvmet_req *req) 734 { 735 if (req->sq->size) { 736 u32 old_sqhd, new_sqhd; 737 738 old_sqhd = READ_ONCE(req->sq->sqhd); 739 do { 740 new_sqhd = (old_sqhd + 1) % req->sq->size; 741 } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd)); 742 } 743 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 744 } 745 746 static void nvmet_set_error(struct nvmet_req *req, u16 status) 747 { 748 struct nvmet_ctrl *ctrl = req->sq->ctrl; 749 struct nvme_error_slot *new_error_slot; 750 unsigned long flags; 751 752 req->cqe->status = cpu_to_le16(status << 1); 753 754 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 755 return; 756 757 spin_lock_irqsave(&ctrl->error_lock, flags); 758 ctrl->err_counter++; 759 new_error_slot = 760 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 761 762 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 763 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 764 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 765 new_error_slot->status_field = cpu_to_le16(status << 1); 766 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 767 new_error_slot->lba = cpu_to_le64(req->error_slba); 768 new_error_slot->nsid = req->cmd->common.nsid; 769 spin_unlock_irqrestore(&ctrl->error_lock, flags); 770 771 /* set the more bit for this request */ 772 req->cqe->status |= cpu_to_le16(1 << 14); 773 } 774 775 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 776 { 777 struct nvmet_ns *ns = req->ns; 778 struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref; 779 780 if (!req->sq->sqhd_disabled) 781 nvmet_update_sq_head(req); 782 req->cqe->sq_id = cpu_to_le16(req->sq->qid); 783 req->cqe->command_id = req->cmd->common.command_id; 784 785 if (unlikely(status)) 786 nvmet_set_error(req, status); 787 788 trace_nvmet_req_complete(req); 789 790 req->ops->queue_response(req); 791 792 if (pc_ref) 793 nvmet_pr_put_ns_pc_ref(pc_ref); 794 if (ns) 795 nvmet_put_namespace(ns); 796 } 797 798 void nvmet_req_complete(struct nvmet_req *req, u16 status) 799 { 800 struct nvmet_sq *sq = req->sq; 801 802 __nvmet_req_complete(req, status); 803 percpu_ref_put(&sq->ref); 804 } 805 EXPORT_SYMBOL_GPL(nvmet_req_complete); 806 807 void nvmet_cq_init(struct nvmet_cq *cq) 808 { 809 refcount_set(&cq->ref, 1); 810 } 811 EXPORT_SYMBOL_GPL(nvmet_cq_init); 812 813 bool nvmet_cq_get(struct nvmet_cq *cq) 814 { 815 return refcount_inc_not_zero(&cq->ref); 816 } 817 EXPORT_SYMBOL_GPL(nvmet_cq_get); 818 819 void nvmet_cq_put(struct nvmet_cq *cq) 820 { 821 if (refcount_dec_and_test(&cq->ref)) 822 nvmet_cq_destroy(cq); 823 } 824 EXPORT_SYMBOL_GPL(nvmet_cq_put); 825 826 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 827 u16 qid, u16 size) 828 { 829 cq->qid = qid; 830 cq->size = size; 831 832 ctrl->cqs[qid] = cq; 833 } 834 835 void nvmet_cq_destroy(struct nvmet_cq *cq) 836 { 837 struct nvmet_ctrl *ctrl = cq->ctrl; 838 839 if (ctrl) { 840 ctrl->cqs[cq->qid] = NULL; 841 nvmet_ctrl_put(cq->ctrl); 842 cq->ctrl = NULL; 843 } 844 } 845 846 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 847 u16 qid, u16 size) 848 { 849 sq->sqhd = 0; 850 sq->qid = qid; 851 sq->size = size; 852 853 ctrl->sqs[qid] = sq; 854 } 855 856 static void nvmet_confirm_sq(struct percpu_ref *ref) 857 { 858 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 859 860 complete(&sq->confirm_done); 861 } 862 863 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 864 { 865 if (!ctrl->cqs) 866 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 867 868 if (cqid > ctrl->subsys->max_qid) 869 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 870 871 if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid])) 872 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 873 874 return NVME_SC_SUCCESS; 875 } 876 877 u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 878 { 879 if (!cqid) 880 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 881 return nvmet_check_cqid(ctrl, cqid, create); 882 } 883 884 bool nvmet_cq_in_use(struct nvmet_cq *cq) 885 { 886 return refcount_read(&cq->ref) > 1; 887 } 888 EXPORT_SYMBOL_GPL(nvmet_cq_in_use); 889 890 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 891 u16 qid, u16 size) 892 { 893 u16 status; 894 895 status = nvmet_check_cqid(ctrl, qid, true); 896 if (status != NVME_SC_SUCCESS) 897 return status; 898 899 if (!kref_get_unless_zero(&ctrl->ref)) 900 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 901 cq->ctrl = ctrl; 902 903 nvmet_cq_init(cq); 904 nvmet_cq_setup(ctrl, cq, qid, size); 905 906 return NVME_SC_SUCCESS; 907 } 908 EXPORT_SYMBOL_GPL(nvmet_cq_create); 909 910 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, 911 bool create) 912 { 913 if (!ctrl->sqs) 914 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 915 916 if (sqid > ctrl->subsys->max_qid) 917 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 918 919 if ((create && ctrl->sqs[sqid]) || 920 (!create && !ctrl->sqs[sqid])) 921 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 922 923 return NVME_SC_SUCCESS; 924 } 925 926 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 927 struct nvmet_cq *cq, u16 sqid, u16 size) 928 { 929 u16 status; 930 int ret; 931 932 if (!kref_get_unless_zero(&ctrl->ref)) 933 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 934 935 status = nvmet_check_sqid(ctrl, sqid, true); 936 if (status != NVME_SC_SUCCESS) 937 return status; 938 939 ret = nvmet_sq_init(sq, cq); 940 if (ret) { 941 status = NVME_SC_INTERNAL | NVME_STATUS_DNR; 942 goto ctrl_put; 943 } 944 945 nvmet_sq_setup(ctrl, sq, sqid, size); 946 sq->ctrl = ctrl; 947 948 return NVME_SC_SUCCESS; 949 950 ctrl_put: 951 nvmet_ctrl_put(ctrl); 952 return status; 953 } 954 EXPORT_SYMBOL_GPL(nvmet_sq_create); 955 956 void nvmet_sq_destroy(struct nvmet_sq *sq) 957 { 958 struct nvmet_ctrl *ctrl = sq->ctrl; 959 960 /* 961 * If this is the admin queue, complete all AERs so that our 962 * queue doesn't have outstanding requests on it. 963 */ 964 if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) 965 nvmet_async_events_failall(ctrl); 966 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 967 wait_for_completion(&sq->confirm_done); 968 wait_for_completion(&sq->free_done); 969 percpu_ref_exit(&sq->ref); 970 nvmet_auth_sq_free(sq); 971 nvmet_cq_put(sq->cq); 972 973 /* 974 * we must reference the ctrl again after waiting for inflight IO 975 * to complete. Because admin connect may have sneaked in after we 976 * store sq->ctrl locally, but before we killed the percpu_ref. the 977 * admin connect allocates and assigns sq->ctrl, which now needs a 978 * final ref put, as this ctrl is going away. 979 */ 980 ctrl = sq->ctrl; 981 982 if (ctrl) { 983 /* 984 * The teardown flow may take some time, and the host may not 985 * send us keep-alive during this period, hence reset the 986 * traffic based keep-alive timer so we don't trigger a 987 * controller teardown as a result of a keep-alive expiration. 988 */ 989 ctrl->reset_tbkas = true; 990 sq->ctrl->sqs[sq->qid] = NULL; 991 nvmet_ctrl_put(ctrl); 992 sq->ctrl = NULL; /* allows reusing the queue later */ 993 } 994 } 995 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 996 997 static void nvmet_sq_free(struct percpu_ref *ref) 998 { 999 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 1000 1001 complete(&sq->free_done); 1002 } 1003 1004 int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq) 1005 { 1006 int ret; 1007 1008 if (!nvmet_cq_get(cq)) 1009 return -EINVAL; 1010 1011 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 1012 if (ret) { 1013 pr_err("percpu_ref init failed!\n"); 1014 nvmet_cq_put(cq); 1015 return ret; 1016 } 1017 init_completion(&sq->free_done); 1018 init_completion(&sq->confirm_done); 1019 nvmet_auth_sq_init(sq); 1020 sq->cq = cq; 1021 1022 return 0; 1023 } 1024 EXPORT_SYMBOL_GPL(nvmet_sq_init); 1025 1026 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 1027 struct nvmet_ns *ns) 1028 { 1029 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 1030 1031 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 1032 return NVME_SC_ANA_INACCESSIBLE; 1033 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 1034 return NVME_SC_ANA_PERSISTENT_LOSS; 1035 if (unlikely(state == NVME_ANA_CHANGE)) 1036 return NVME_SC_ANA_TRANSITION; 1037 return 0; 1038 } 1039 1040 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 1041 { 1042 if (unlikely(req->ns->readonly)) { 1043 switch (req->cmd->common.opcode) { 1044 case nvme_cmd_read: 1045 case nvme_cmd_flush: 1046 break; 1047 default: 1048 return NVME_SC_NS_WRITE_PROTECTED; 1049 } 1050 } 1051 1052 return 0; 1053 } 1054 1055 static u32 nvmet_io_cmd_transfer_len(struct nvmet_req *req) 1056 { 1057 struct nvme_command *cmd = req->cmd; 1058 u32 metadata_len = 0; 1059 1060 if (nvme_is_fabrics(cmd)) 1061 return nvmet_fabrics_io_cmd_data_len(req); 1062 1063 if (!req->ns) 1064 return 0; 1065 1066 switch (req->cmd->common.opcode) { 1067 case nvme_cmd_read: 1068 case nvme_cmd_write: 1069 case nvme_cmd_zone_append: 1070 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 1071 metadata_len = nvmet_rw_metadata_len(req); 1072 return nvmet_rw_data_len(req) + metadata_len; 1073 case nvme_cmd_dsm: 1074 return nvmet_dsm_len(req); 1075 case nvme_cmd_zone_mgmt_recv: 1076 return (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; 1077 default: 1078 return 0; 1079 } 1080 } 1081 1082 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 1083 { 1084 struct nvme_command *cmd = req->cmd; 1085 u16 ret; 1086 1087 if (nvme_is_fabrics(cmd)) 1088 return nvmet_parse_fabrics_io_cmd(req); 1089 1090 if (unlikely(!nvmet_check_auth_status(req))) 1091 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1092 1093 ret = nvmet_check_ctrl_status(req); 1094 if (unlikely(ret)) 1095 return ret; 1096 1097 if (nvmet_is_passthru_req(req)) 1098 return nvmet_parse_passthru_io_cmd(req); 1099 1100 ret = nvmet_req_find_ns(req); 1101 if (unlikely(ret)) 1102 return ret; 1103 1104 ret = nvmet_check_ana_state(req->port, req->ns); 1105 if (unlikely(ret)) { 1106 req->error_loc = offsetof(struct nvme_common_command, nsid); 1107 return ret; 1108 } 1109 ret = nvmet_io_cmd_check_access(req); 1110 if (unlikely(ret)) { 1111 req->error_loc = offsetof(struct nvme_common_command, nsid); 1112 return ret; 1113 } 1114 1115 if (req->ns->pr.enable) { 1116 ret = nvmet_parse_pr_cmd(req); 1117 if (!ret) 1118 return ret; 1119 } 1120 1121 switch (req->ns->csi) { 1122 case NVME_CSI_NVM: 1123 if (req->ns->file) 1124 ret = nvmet_file_parse_io_cmd(req); 1125 else 1126 ret = nvmet_bdev_parse_io_cmd(req); 1127 break; 1128 case NVME_CSI_ZNS: 1129 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) 1130 ret = nvmet_bdev_zns_parse_io_cmd(req); 1131 else 1132 ret = NVME_SC_INVALID_IO_CMD_SET; 1133 break; 1134 default: 1135 ret = NVME_SC_INVALID_IO_CMD_SET; 1136 } 1137 if (ret) 1138 return ret; 1139 1140 if (req->ns->pr.enable) { 1141 ret = nvmet_pr_check_cmd_access(req); 1142 if (ret) 1143 return ret; 1144 1145 ret = nvmet_pr_get_ns_pc_ref(req); 1146 } 1147 return ret; 1148 } 1149 1150 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, 1151 const struct nvmet_fabrics_ops *ops) 1152 { 1153 u8 flags = req->cmd->common.flags; 1154 u16 status; 1155 1156 req->cq = sq->cq; 1157 req->sq = sq; 1158 req->ops = ops; 1159 req->sg = NULL; 1160 req->metadata_sg = NULL; 1161 req->sg_cnt = 0; 1162 req->metadata_sg_cnt = 0; 1163 req->transfer_len = 0; 1164 req->metadata_len = 0; 1165 req->cqe->result.u64 = 0; 1166 req->cqe->status = 0; 1167 req->cqe->sq_head = 0; 1168 req->ns = NULL; 1169 req->error_loc = NVMET_NO_ERROR_LOC; 1170 req->error_slba = 0; 1171 req->pc_ref = NULL; 1172 1173 /* no support for fused commands yet */ 1174 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 1175 req->error_loc = offsetof(struct nvme_common_command, flags); 1176 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1177 goto fail; 1178 } 1179 1180 /* 1181 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 1182 * contains an address of a single contiguous physical buffer that is 1183 * byte aligned. For PCI controllers, this is optional so not enforced. 1184 */ 1185 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 1186 if (!req->sq->ctrl || !nvmet_is_pci_ctrl(req->sq->ctrl)) { 1187 req->error_loc = 1188 offsetof(struct nvme_common_command, flags); 1189 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1190 goto fail; 1191 } 1192 } 1193 1194 if (unlikely(!req->sq->ctrl)) 1195 /* will return an error for any non-connect command: */ 1196 status = nvmet_parse_connect_cmd(req); 1197 else if (likely(req->sq->qid != 0)) 1198 status = nvmet_parse_io_cmd(req); 1199 else 1200 status = nvmet_parse_admin_cmd(req); 1201 1202 if (status) 1203 goto fail; 1204 1205 trace_nvmet_req_init(req, req->cmd); 1206 1207 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 1208 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1209 goto fail; 1210 } 1211 1212 if (sq->ctrl) 1213 sq->ctrl->reset_tbkas = true; 1214 1215 return true; 1216 1217 fail: 1218 __nvmet_req_complete(req, status); 1219 return false; 1220 } 1221 EXPORT_SYMBOL_GPL(nvmet_req_init); 1222 1223 void nvmet_req_uninit(struct nvmet_req *req) 1224 { 1225 percpu_ref_put(&req->sq->ref); 1226 if (req->pc_ref) 1227 nvmet_pr_put_ns_pc_ref(req->pc_ref); 1228 if (req->ns) 1229 nvmet_put_namespace(req->ns); 1230 } 1231 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 1232 1233 size_t nvmet_req_transfer_len(struct nvmet_req *req) 1234 { 1235 if (likely(req->sq->qid != 0)) 1236 return nvmet_io_cmd_transfer_len(req); 1237 if (unlikely(!req->sq->ctrl)) 1238 return nvmet_connect_cmd_data_len(req); 1239 return nvmet_admin_cmd_data_len(req); 1240 } 1241 EXPORT_SYMBOL_GPL(nvmet_req_transfer_len); 1242 1243 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) 1244 { 1245 if (unlikely(len != req->transfer_len)) { 1246 u16 status; 1247 1248 req->error_loc = offsetof(struct nvme_common_command, dptr); 1249 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1250 status = NVME_SC_SGL_INVALID_DATA; 1251 else 1252 status = NVME_SC_INVALID_FIELD; 1253 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1254 return false; 1255 } 1256 1257 return true; 1258 } 1259 EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); 1260 1261 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) 1262 { 1263 if (unlikely(data_len > req->transfer_len)) { 1264 u16 status; 1265 1266 req->error_loc = offsetof(struct nvme_common_command, dptr); 1267 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1268 status = NVME_SC_SGL_INVALID_DATA; 1269 else 1270 status = NVME_SC_INVALID_FIELD; 1271 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1272 return false; 1273 } 1274 1275 return true; 1276 } 1277 1278 static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) 1279 { 1280 return req->transfer_len - req->metadata_len; 1281 } 1282 1283 static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev, 1284 struct nvmet_req *req) 1285 { 1286 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 1287 nvmet_data_transfer_len(req)); 1288 if (!req->sg) 1289 goto out_err; 1290 1291 if (req->metadata_len) { 1292 req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev, 1293 &req->metadata_sg_cnt, req->metadata_len); 1294 if (!req->metadata_sg) 1295 goto out_free_sg; 1296 } 1297 1298 req->p2p_dev = p2p_dev; 1299 1300 return 0; 1301 out_free_sg: 1302 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1303 out_err: 1304 return -ENOMEM; 1305 } 1306 1307 static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req) 1308 { 1309 if (!IS_ENABLED(CONFIG_PCI_P2PDMA) || 1310 !req->sq->ctrl || !req->sq->qid || !req->ns) 1311 return NULL; 1312 return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid); 1313 } 1314 1315 int nvmet_req_alloc_sgls(struct nvmet_req *req) 1316 { 1317 struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req); 1318 1319 if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req)) 1320 return 0; 1321 1322 req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, 1323 &req->sg_cnt); 1324 if (unlikely(!req->sg)) 1325 goto out; 1326 1327 if (req->metadata_len) { 1328 req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, 1329 &req->metadata_sg_cnt); 1330 if (unlikely(!req->metadata_sg)) 1331 goto out_free; 1332 } 1333 1334 return 0; 1335 out_free: 1336 sgl_free(req->sg); 1337 out: 1338 return -ENOMEM; 1339 } 1340 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); 1341 1342 void nvmet_req_free_sgls(struct nvmet_req *req) 1343 { 1344 if (req->p2p_dev) { 1345 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1346 if (req->metadata_sg) 1347 pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); 1348 req->p2p_dev = NULL; 1349 } else { 1350 sgl_free(req->sg); 1351 if (req->metadata_sg) 1352 sgl_free(req->metadata_sg); 1353 } 1354 1355 req->sg = NULL; 1356 req->metadata_sg = NULL; 1357 req->sg_cnt = 0; 1358 req->metadata_sg_cnt = 0; 1359 } 1360 EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); 1361 1362 static inline bool nvmet_css_supported(u8 cc_css) 1363 { 1364 switch (cc_css << NVME_CC_CSS_SHIFT) { 1365 case NVME_CC_CSS_NVM: 1366 case NVME_CC_CSS_CSI: 1367 return true; 1368 default: 1369 return false; 1370 } 1371 } 1372 1373 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 1374 { 1375 lockdep_assert_held(&ctrl->lock); 1376 1377 /* 1378 * Only I/O controllers should verify iosqes,iocqes. 1379 * Strictly speaking, the spec says a discovery controller 1380 * should verify iosqes,iocqes are zeroed, however that 1381 * would break backwards compatibility, so don't enforce it. 1382 */ 1383 if (!nvmet_is_disc_subsys(ctrl->subsys) && 1384 (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1385 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { 1386 ctrl->csts = NVME_CSTS_CFS; 1387 return; 1388 } 1389 1390 if (nvmet_cc_mps(ctrl->cc) != 0 || 1391 nvmet_cc_ams(ctrl->cc) != 0 || 1392 !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) { 1393 ctrl->csts = NVME_CSTS_CFS; 1394 return; 1395 } 1396 1397 ctrl->csts = NVME_CSTS_RDY; 1398 1399 /* 1400 * Controllers that are not yet enabled should not really enforce the 1401 * keep alive timeout, but we still want to track a timeout and cleanup 1402 * in case a host died before it enabled the controller. Hence, simply 1403 * reset the keep alive timer when the controller is enabled. 1404 */ 1405 if (ctrl->kato) 1406 mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 1407 } 1408 1409 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1410 { 1411 lockdep_assert_held(&ctrl->lock); 1412 1413 /* XXX: tear down queues? */ 1414 ctrl->csts &= ~NVME_CSTS_RDY; 1415 ctrl->cc = 0; 1416 } 1417 1418 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1419 { 1420 u32 old; 1421 1422 mutex_lock(&ctrl->lock); 1423 old = ctrl->cc; 1424 ctrl->cc = new; 1425 1426 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1427 nvmet_start_ctrl(ctrl); 1428 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1429 nvmet_clear_ctrl(ctrl); 1430 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1431 nvmet_clear_ctrl(ctrl); 1432 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1433 } 1434 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1435 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1436 mutex_unlock(&ctrl->lock); 1437 } 1438 EXPORT_SYMBOL_GPL(nvmet_update_cc); 1439 1440 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1441 { 1442 /* command sets supported: NVMe command set: */ 1443 ctrl->cap = (1ULL << 37); 1444 /* Controller supports one or more I/O Command Sets */ 1445 ctrl->cap |= (1ULL << 43); 1446 /* CC.EN timeout in 500msec units: */ 1447 ctrl->cap |= (15ULL << 24); 1448 /* maximum queue entries supported: */ 1449 if (ctrl->ops->get_max_queue_size) 1450 ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl), 1451 ctrl->port->max_queue_size) - 1; 1452 else 1453 ctrl->cap |= ctrl->port->max_queue_size - 1; 1454 1455 if (nvmet_is_passthru_subsys(ctrl->subsys)) 1456 nvmet_passthrough_override_cap(ctrl); 1457 } 1458 1459 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, 1460 const char *hostnqn, u16 cntlid, 1461 struct nvmet_req *req) 1462 { 1463 struct nvmet_ctrl *ctrl = NULL; 1464 struct nvmet_subsys *subsys; 1465 1466 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1467 if (!subsys) { 1468 pr_warn("connect request for invalid subsystem %s!\n", 1469 subsysnqn); 1470 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1471 goto out; 1472 } 1473 1474 mutex_lock(&subsys->lock); 1475 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1476 if (ctrl->cntlid == cntlid) { 1477 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1478 pr_warn("hostnqn mismatch.\n"); 1479 continue; 1480 } 1481 if (!kref_get_unless_zero(&ctrl->ref)) 1482 continue; 1483 1484 /* ctrl found */ 1485 goto found; 1486 } 1487 } 1488 1489 ctrl = NULL; /* ctrl not found */ 1490 pr_warn("could not find controller %d for subsys %s / host %s\n", 1491 cntlid, subsysnqn, hostnqn); 1492 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1493 1494 found: 1495 mutex_unlock(&subsys->lock); 1496 nvmet_subsys_put(subsys); 1497 out: 1498 return ctrl; 1499 } 1500 1501 u16 nvmet_check_ctrl_status(struct nvmet_req *req) 1502 { 1503 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1504 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1505 req->cmd->common.opcode, req->sq->qid); 1506 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1507 } 1508 1509 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1510 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1511 req->cmd->common.opcode, req->sq->qid); 1512 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1513 } 1514 1515 if (unlikely(!nvmet_check_auth_status(req))) { 1516 pr_warn("qid %d not authenticated\n", req->sq->qid); 1517 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1518 } 1519 return 0; 1520 } 1521 1522 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1523 { 1524 struct nvmet_host_link *p; 1525 1526 lockdep_assert_held(&nvmet_config_sem); 1527 1528 if (subsys->allow_any_host) 1529 return true; 1530 1531 if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */ 1532 return true; 1533 1534 list_for_each_entry(p, &subsys->hosts, entry) { 1535 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1536 return true; 1537 } 1538 1539 return false; 1540 } 1541 1542 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1543 struct device *p2p_client) 1544 { 1545 struct nvmet_ns *ns; 1546 unsigned long idx; 1547 1548 lockdep_assert_held(&ctrl->subsys->lock); 1549 1550 if (!p2p_client) 1551 return; 1552 1553 ctrl->p2p_client = get_device(p2p_client); 1554 1555 nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) 1556 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1557 } 1558 1559 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1560 { 1561 struct radix_tree_iter iter; 1562 void __rcu **slot; 1563 1564 lockdep_assert_held(&ctrl->subsys->lock); 1565 1566 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1567 pci_dev_put(radix_tree_deref_slot(slot)); 1568 1569 put_device(ctrl->p2p_client); 1570 } 1571 1572 static void nvmet_fatal_error_handler(struct work_struct *work) 1573 { 1574 struct nvmet_ctrl *ctrl = 1575 container_of(work, struct nvmet_ctrl, fatal_err_work); 1576 1577 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1578 ctrl->ops->delete_ctrl(ctrl); 1579 } 1580 1581 struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) 1582 { 1583 struct nvmet_subsys *subsys; 1584 struct nvmet_ctrl *ctrl; 1585 u32 kato = args->kato; 1586 u8 dhchap_status; 1587 int ret; 1588 1589 args->status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR; 1590 subsys = nvmet_find_get_subsys(args->port, args->subsysnqn); 1591 if (!subsys) { 1592 pr_warn("connect request for invalid subsystem %s!\n", 1593 args->subsysnqn); 1594 args->result = IPO_IATTR_CONNECT_DATA(subsysnqn); 1595 args->error_loc = offsetof(struct nvme_common_command, dptr); 1596 return NULL; 1597 } 1598 1599 down_read(&nvmet_config_sem); 1600 if (!nvmet_host_allowed(subsys, args->hostnqn)) { 1601 pr_info("connect by host %s for subsystem %s not allowed\n", 1602 args->hostnqn, args->subsysnqn); 1603 args->result = IPO_IATTR_CONNECT_DATA(hostnqn); 1604 up_read(&nvmet_config_sem); 1605 args->status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1606 args->error_loc = offsetof(struct nvme_common_command, dptr); 1607 goto out_put_subsystem; 1608 } 1609 up_read(&nvmet_config_sem); 1610 1611 args->status = NVME_SC_INTERNAL; 1612 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 1613 if (!ctrl) 1614 goto out_put_subsystem; 1615 mutex_init(&ctrl->lock); 1616 1617 ctrl->port = args->port; 1618 ctrl->ops = args->ops; 1619 1620 #ifdef CONFIG_NVME_TARGET_PASSTHRU 1621 /* By default, set loop targets to clear IDS by default */ 1622 if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP) 1623 subsys->clear_ids = 1; 1624 #endif 1625 1626 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1627 INIT_LIST_HEAD(&ctrl->async_events); 1628 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1629 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1630 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 1631 1632 memcpy(ctrl->hostnqn, args->hostnqn, NVMF_NQN_SIZE); 1633 1634 kref_init(&ctrl->ref); 1635 ctrl->subsys = subsys; 1636 ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; 1637 nvmet_init_cap(ctrl); 1638 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1639 1640 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1641 sizeof(__le32), GFP_KERNEL); 1642 if (!ctrl->changed_ns_list) 1643 goto out_free_ctrl; 1644 1645 ctrl->sqs = kcalloc(subsys->max_qid + 1, 1646 sizeof(struct nvmet_sq *), 1647 GFP_KERNEL); 1648 if (!ctrl->sqs) 1649 goto out_free_changed_ns_list; 1650 1651 ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *), 1652 GFP_KERNEL); 1653 if (!ctrl->cqs) 1654 goto out_free_sqs; 1655 1656 ret = ida_alloc_range(&cntlid_ida, 1657 subsys->cntlid_min, subsys->cntlid_max, 1658 GFP_KERNEL); 1659 if (ret < 0) { 1660 args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 1661 goto out_free_cqs; 1662 } 1663 ctrl->cntlid = ret; 1664 1665 /* 1666 * Discovery controllers may use some arbitrary high value 1667 * in order to cleanup stale discovery sessions 1668 */ 1669 if (nvmet_is_disc_subsys(ctrl->subsys) && !kato) 1670 kato = NVMET_DISC_KATO_MS; 1671 1672 /* keep-alive timeout in seconds */ 1673 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1674 1675 ctrl->err_counter = 0; 1676 spin_lock_init(&ctrl->error_lock); 1677 1678 nvmet_start_keep_alive_timer(ctrl); 1679 1680 mutex_lock(&subsys->lock); 1681 ret = nvmet_ctrl_init_pr(ctrl); 1682 if (ret) 1683 goto init_pr_fail; 1684 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1685 nvmet_setup_p2p_ns_map(ctrl, args->p2p_client); 1686 nvmet_debugfs_ctrl_setup(ctrl); 1687 mutex_unlock(&subsys->lock); 1688 1689 if (args->hostid) 1690 uuid_copy(&ctrl->hostid, args->hostid); 1691 1692 dhchap_status = nvmet_setup_auth(ctrl, args->sq); 1693 if (dhchap_status) { 1694 pr_err("Failed to setup authentication, dhchap status %u\n", 1695 dhchap_status); 1696 nvmet_ctrl_put(ctrl); 1697 if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED) 1698 args->status = 1699 NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1700 else 1701 args->status = NVME_SC_INTERNAL; 1702 return NULL; 1703 } 1704 1705 args->status = NVME_SC_SUCCESS; 1706 1707 pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n", 1708 nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", 1709 ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, 1710 ctrl->pi_support ? " T10-PI is enabled" : "", 1711 nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "", 1712 nvmet_queue_tls_keyid(args->sq) ? ", TLS" : ""); 1713 1714 return ctrl; 1715 1716 init_pr_fail: 1717 mutex_unlock(&subsys->lock); 1718 nvmet_stop_keep_alive_timer(ctrl); 1719 ida_free(&cntlid_ida, ctrl->cntlid); 1720 out_free_cqs: 1721 kfree(ctrl->cqs); 1722 out_free_sqs: 1723 kfree(ctrl->sqs); 1724 out_free_changed_ns_list: 1725 kfree(ctrl->changed_ns_list); 1726 out_free_ctrl: 1727 kfree(ctrl); 1728 out_put_subsystem: 1729 nvmet_subsys_put(subsys); 1730 return NULL; 1731 } 1732 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl); 1733 1734 static void nvmet_ctrl_free(struct kref *ref) 1735 { 1736 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1737 struct nvmet_subsys *subsys = ctrl->subsys; 1738 1739 mutex_lock(&subsys->lock); 1740 nvmet_ctrl_destroy_pr(ctrl); 1741 nvmet_release_p2p_ns_map(ctrl); 1742 list_del(&ctrl->subsys_entry); 1743 mutex_unlock(&subsys->lock); 1744 1745 nvmet_stop_keep_alive_timer(ctrl); 1746 1747 flush_work(&ctrl->async_event_work); 1748 cancel_work_sync(&ctrl->fatal_err_work); 1749 1750 nvmet_destroy_auth(ctrl); 1751 1752 nvmet_debugfs_ctrl_free(ctrl); 1753 1754 ida_free(&cntlid_ida, ctrl->cntlid); 1755 1756 nvmet_async_events_free(ctrl); 1757 kfree(ctrl->sqs); 1758 kfree(ctrl->cqs); 1759 kfree(ctrl->changed_ns_list); 1760 kfree(ctrl); 1761 1762 nvmet_subsys_put(subsys); 1763 } 1764 1765 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1766 { 1767 kref_put(&ctrl->ref, nvmet_ctrl_free); 1768 } 1769 EXPORT_SYMBOL_GPL(nvmet_ctrl_put); 1770 1771 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1772 { 1773 mutex_lock(&ctrl->lock); 1774 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1775 ctrl->csts |= NVME_CSTS_CFS; 1776 queue_work(nvmet_wq, &ctrl->fatal_err_work); 1777 } 1778 mutex_unlock(&ctrl->lock); 1779 } 1780 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1781 1782 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl, 1783 char *traddr, size_t traddr_len) 1784 { 1785 if (!ctrl->ops->host_traddr) 1786 return -EOPNOTSUPP; 1787 return ctrl->ops->host_traddr(ctrl, traddr, traddr_len); 1788 } 1789 1790 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1791 const char *subsysnqn) 1792 { 1793 struct nvmet_subsys_link *p; 1794 1795 if (!port) 1796 return NULL; 1797 1798 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1799 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1800 return NULL; 1801 return nvmet_disc_subsys; 1802 } 1803 1804 down_read(&nvmet_config_sem); 1805 if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn, 1806 NVMF_NQN_SIZE)) { 1807 if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) { 1808 up_read(&nvmet_config_sem); 1809 return nvmet_disc_subsys; 1810 } 1811 } 1812 list_for_each_entry(p, &port->subsystems, entry) { 1813 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1814 NVMF_NQN_SIZE)) { 1815 if (!kref_get_unless_zero(&p->subsys->ref)) 1816 break; 1817 up_read(&nvmet_config_sem); 1818 return p->subsys; 1819 } 1820 } 1821 up_read(&nvmet_config_sem); 1822 return NULL; 1823 } 1824 1825 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1826 enum nvme_subsys_type type) 1827 { 1828 struct nvmet_subsys *subsys; 1829 char serial[NVMET_SN_MAX_SIZE / 2]; 1830 int ret; 1831 1832 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1833 if (!subsys) 1834 return ERR_PTR(-ENOMEM); 1835 1836 subsys->ver = NVMET_DEFAULT_VS; 1837 /* generate a random serial number as our controllers are ephemeral: */ 1838 get_random_bytes(&serial, sizeof(serial)); 1839 bin2hex(subsys->serial, &serial, sizeof(serial)); 1840 1841 subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); 1842 if (!subsys->model_number) { 1843 ret = -ENOMEM; 1844 goto free_subsys; 1845 } 1846 1847 subsys->ieee_oui = 0; 1848 1849 subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL); 1850 if (!subsys->firmware_rev) { 1851 ret = -ENOMEM; 1852 goto free_mn; 1853 } 1854 1855 switch (type) { 1856 case NVME_NQN_NVME: 1857 subsys->max_qid = NVMET_NR_QUEUES; 1858 break; 1859 case NVME_NQN_DISC: 1860 case NVME_NQN_CURR: 1861 subsys->max_qid = 0; 1862 break; 1863 default: 1864 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1865 ret = -EINVAL; 1866 goto free_fr; 1867 } 1868 subsys->type = type; 1869 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1870 GFP_KERNEL); 1871 if (!subsys->subsysnqn) { 1872 ret = -ENOMEM; 1873 goto free_fr; 1874 } 1875 subsys->cntlid_min = NVME_CNTLID_MIN; 1876 subsys->cntlid_max = NVME_CNTLID_MAX; 1877 kref_init(&subsys->ref); 1878 1879 mutex_init(&subsys->lock); 1880 xa_init(&subsys->namespaces); 1881 INIT_LIST_HEAD(&subsys->ctrls); 1882 INIT_LIST_HEAD(&subsys->hosts); 1883 1884 ret = nvmet_debugfs_subsys_setup(subsys); 1885 if (ret) 1886 goto free_subsysnqn; 1887 1888 return subsys; 1889 1890 free_subsysnqn: 1891 kfree(subsys->subsysnqn); 1892 free_fr: 1893 kfree(subsys->firmware_rev); 1894 free_mn: 1895 kfree(subsys->model_number); 1896 free_subsys: 1897 kfree(subsys); 1898 return ERR_PTR(ret); 1899 } 1900 1901 static void nvmet_subsys_free(struct kref *ref) 1902 { 1903 struct nvmet_subsys *subsys = 1904 container_of(ref, struct nvmet_subsys, ref); 1905 1906 WARN_ON_ONCE(!list_empty(&subsys->ctrls)); 1907 WARN_ON_ONCE(!list_empty(&subsys->hosts)); 1908 WARN_ON_ONCE(!xa_empty(&subsys->namespaces)); 1909 1910 nvmet_debugfs_subsys_free(subsys); 1911 1912 xa_destroy(&subsys->namespaces); 1913 nvmet_passthru_subsys_free(subsys); 1914 1915 kfree(subsys->subsysnqn); 1916 kfree(subsys->model_number); 1917 kfree(subsys->firmware_rev); 1918 kfree(subsys); 1919 } 1920 1921 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1922 { 1923 struct nvmet_ctrl *ctrl; 1924 1925 mutex_lock(&subsys->lock); 1926 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1927 ctrl->ops->delete_ctrl(ctrl); 1928 mutex_unlock(&subsys->lock); 1929 } 1930 1931 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1932 { 1933 kref_put(&subsys->ref, nvmet_subsys_free); 1934 } 1935 1936 static int __init nvmet_init(void) 1937 { 1938 int error = -ENOMEM; 1939 1940 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1941 1942 nvmet_bvec_cache = kmem_cache_create("nvmet-bvec", 1943 NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, 1944 SLAB_HWCACHE_ALIGN, NULL); 1945 if (!nvmet_bvec_cache) 1946 return -ENOMEM; 1947 1948 zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); 1949 if (!zbd_wq) 1950 goto out_destroy_bvec_cache; 1951 1952 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1953 WQ_MEM_RECLAIM, 0); 1954 if (!buffered_io_wq) 1955 goto out_free_zbd_work_queue; 1956 1957 nvmet_wq = alloc_workqueue("nvmet-wq", 1958 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); 1959 if (!nvmet_wq) 1960 goto out_free_buffered_work_queue; 1961 1962 error = nvmet_init_debugfs(); 1963 if (error) 1964 goto out_free_nvmet_work_queue; 1965 1966 error = nvmet_init_discovery(); 1967 if (error) 1968 goto out_exit_debugfs; 1969 1970 error = nvmet_init_configfs(); 1971 if (error) 1972 goto out_exit_discovery; 1973 1974 return 0; 1975 1976 out_exit_discovery: 1977 nvmet_exit_discovery(); 1978 out_exit_debugfs: 1979 nvmet_exit_debugfs(); 1980 out_free_nvmet_work_queue: 1981 destroy_workqueue(nvmet_wq); 1982 out_free_buffered_work_queue: 1983 destroy_workqueue(buffered_io_wq); 1984 out_free_zbd_work_queue: 1985 destroy_workqueue(zbd_wq); 1986 out_destroy_bvec_cache: 1987 kmem_cache_destroy(nvmet_bvec_cache); 1988 return error; 1989 } 1990 1991 static void __exit nvmet_exit(void) 1992 { 1993 nvmet_exit_configfs(); 1994 nvmet_exit_discovery(); 1995 nvmet_exit_debugfs(); 1996 ida_destroy(&cntlid_ida); 1997 destroy_workqueue(nvmet_wq); 1998 destroy_workqueue(buffered_io_wq); 1999 destroy_workqueue(zbd_wq); 2000 kmem_cache_destroy(nvmet_bvec_cache); 2001 2002 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 2003 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 2004 } 2005 2006 module_init(nvmet_init); 2007 module_exit(nvmet_exit); 2008 2009 MODULE_DESCRIPTION("NVMe target core framework"); 2010 MODULE_LICENSE("GPL v2"); 2011