1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/module.h> 8 #include <linux/random.h> 9 #include <linux/rculist.h> 10 #include <linux/pci-p2pdma.h> 11 #include <linux/scatterlist.h> 12 13 #include <generated/utsrelease.h> 14 15 #define CREATE_TRACE_POINTS 16 #include "trace.h" 17 18 #include "nvmet.h" 19 #include "debugfs.h" 20 21 struct kmem_cache *nvmet_bvec_cache; 22 struct workqueue_struct *buffered_io_wq; 23 struct workqueue_struct *zbd_wq; 24 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 25 static DEFINE_IDA(cntlid_ida); 26 27 struct workqueue_struct *nvmet_wq; 28 EXPORT_SYMBOL_GPL(nvmet_wq); 29 30 /* 31 * This read/write semaphore is used to synchronize access to configuration 32 * information on a target system that will result in discovery log page 33 * information change for at least one host. 34 * The full list of resources to protected by this semaphore is: 35 * 36 * - subsystems list 37 * - per-subsystem allowed hosts list 38 * - allow_any_host subsystem attribute 39 * - nvmet_genctr 40 * - the nvmet_transports array 41 * 42 * When updating any of those lists/structures write lock should be obtained, 43 * while when reading (popolating discovery log page or checking host-subsystem 44 * link) read lock is obtained to allow concurrent reads. 45 */ 46 DECLARE_RWSEM(nvmet_config_sem); 47 48 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 49 u64 nvmet_ana_chgcnt; 50 DECLARE_RWSEM(nvmet_ana_sem); 51 52 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 53 { 54 switch (errno) { 55 case 0: 56 return NVME_SC_SUCCESS; 57 case -ENOSPC: 58 req->error_loc = offsetof(struct nvme_rw_command, length); 59 return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR; 60 case -EREMOTEIO: 61 req->error_loc = offsetof(struct nvme_rw_command, slba); 62 return NVME_SC_LBA_RANGE | NVME_STATUS_DNR; 63 case -EOPNOTSUPP: 64 req->error_loc = offsetof(struct nvme_common_command, opcode); 65 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 66 case -ENODATA: 67 req->error_loc = offsetof(struct nvme_rw_command, nsid); 68 return NVME_SC_ACCESS_DENIED; 69 case -EIO: 70 fallthrough; 71 default: 72 req->error_loc = offsetof(struct nvme_common_command, opcode); 73 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 74 } 75 } 76 77 u16 nvmet_report_invalid_opcode(struct nvmet_req *req) 78 { 79 pr_debug("unhandled cmd %d on qid %d\n", req->cmd->common.opcode, 80 req->sq->qid); 81 82 req->error_loc = offsetof(struct nvme_common_command, opcode); 83 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 84 } 85 86 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 87 const char *subsysnqn); 88 89 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 90 size_t len) 91 { 92 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 93 req->error_loc = offsetof(struct nvme_common_command, dptr); 94 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 95 } 96 return 0; 97 } 98 99 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 100 { 101 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 102 req->error_loc = offsetof(struct nvme_common_command, dptr); 103 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 104 } 105 return 0; 106 } 107 108 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 109 { 110 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 111 req->error_loc = offsetof(struct nvme_common_command, dptr); 112 return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 113 } 114 return 0; 115 } 116 117 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys) 118 { 119 struct nvmet_ns *cur; 120 unsigned long idx; 121 u32 nsid = 0; 122 123 nvmet_for_each_enabled_ns(&subsys->namespaces, idx, cur) 124 nsid = cur->nsid; 125 126 return nsid; 127 } 128 129 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 130 { 131 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 132 } 133 134 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) 135 { 136 struct nvmet_req *req; 137 138 mutex_lock(&ctrl->lock); 139 while (ctrl->nr_async_event_cmds) { 140 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 141 mutex_unlock(&ctrl->lock); 142 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR); 143 mutex_lock(&ctrl->lock); 144 } 145 mutex_unlock(&ctrl->lock); 146 } 147 148 static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) 149 { 150 struct nvmet_async_event *aen; 151 struct nvmet_req *req; 152 153 mutex_lock(&ctrl->lock); 154 while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { 155 aen = list_first_entry(&ctrl->async_events, 156 struct nvmet_async_event, entry); 157 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 158 nvmet_set_result(req, nvmet_async_event_result(aen)); 159 160 list_del(&aen->entry); 161 kfree(aen); 162 163 mutex_unlock(&ctrl->lock); 164 trace_nvmet_async_event(ctrl, req->cqe->result.u32); 165 nvmet_req_complete(req, 0); 166 mutex_lock(&ctrl->lock); 167 } 168 mutex_unlock(&ctrl->lock); 169 } 170 171 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 172 { 173 struct nvmet_async_event *aen, *tmp; 174 175 mutex_lock(&ctrl->lock); 176 list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { 177 list_del(&aen->entry); 178 kfree(aen); 179 } 180 mutex_unlock(&ctrl->lock); 181 } 182 183 static void nvmet_async_event_work(struct work_struct *work) 184 { 185 struct nvmet_ctrl *ctrl = 186 container_of(work, struct nvmet_ctrl, async_event_work); 187 188 nvmet_async_events_process(ctrl); 189 } 190 191 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 192 u8 event_info, u8 log_page) 193 { 194 struct nvmet_async_event *aen; 195 196 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 197 if (!aen) 198 return; 199 200 aen->event_type = event_type; 201 aen->event_info = event_info; 202 aen->log_page = log_page; 203 204 mutex_lock(&ctrl->lock); 205 list_add_tail(&aen->entry, &ctrl->async_events); 206 mutex_unlock(&ctrl->lock); 207 208 queue_work(nvmet_wq, &ctrl->async_event_work); 209 } 210 211 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 212 { 213 u32 i; 214 215 mutex_lock(&ctrl->lock); 216 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 217 goto out_unlock; 218 219 for (i = 0; i < ctrl->nr_changed_ns; i++) { 220 if (ctrl->changed_ns_list[i] == nsid) 221 goto out_unlock; 222 } 223 224 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 225 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 226 ctrl->nr_changed_ns = U32_MAX; 227 goto out_unlock; 228 } 229 230 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 231 out_unlock: 232 mutex_unlock(&ctrl->lock); 233 } 234 235 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 236 { 237 struct nvmet_ctrl *ctrl; 238 239 lockdep_assert_held(&subsys->lock); 240 241 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 242 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 243 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 244 continue; 245 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 246 NVME_AER_NOTICE_NS_CHANGED, 247 NVME_LOG_CHANGED_NS); 248 } 249 } 250 251 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 252 struct nvmet_port *port) 253 { 254 struct nvmet_ctrl *ctrl; 255 256 mutex_lock(&subsys->lock); 257 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 258 if (port && ctrl->port != port) 259 continue; 260 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 261 continue; 262 nvmet_add_async_event(ctrl, NVME_AER_NOTICE, 263 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 264 } 265 mutex_unlock(&subsys->lock); 266 } 267 268 void nvmet_port_send_ana_event(struct nvmet_port *port) 269 { 270 struct nvmet_subsys_link *p; 271 272 down_read(&nvmet_config_sem); 273 list_for_each_entry(p, &port->subsystems, entry) 274 nvmet_send_ana_event(p->subsys, port); 275 up_read(&nvmet_config_sem); 276 } 277 278 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 279 { 280 int ret = 0; 281 282 down_write(&nvmet_config_sem); 283 if (nvmet_transports[ops->type]) 284 ret = -EINVAL; 285 else 286 nvmet_transports[ops->type] = ops; 287 up_write(&nvmet_config_sem); 288 289 return ret; 290 } 291 EXPORT_SYMBOL_GPL(nvmet_register_transport); 292 293 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 294 { 295 down_write(&nvmet_config_sem); 296 nvmet_transports[ops->type] = NULL; 297 up_write(&nvmet_config_sem); 298 } 299 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 300 301 void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys) 302 { 303 struct nvmet_ctrl *ctrl; 304 305 mutex_lock(&subsys->lock); 306 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 307 if (ctrl->port == port) 308 ctrl->ops->delete_ctrl(ctrl); 309 } 310 mutex_unlock(&subsys->lock); 311 } 312 313 int nvmet_enable_port(struct nvmet_port *port) 314 { 315 const struct nvmet_fabrics_ops *ops; 316 int ret; 317 318 lockdep_assert_held(&nvmet_config_sem); 319 320 if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) 321 return -EINVAL; 322 323 ops = nvmet_transports[port->disc_addr.trtype]; 324 if (!ops) { 325 up_write(&nvmet_config_sem); 326 request_module("nvmet-transport-%d", port->disc_addr.trtype); 327 down_write(&nvmet_config_sem); 328 ops = nvmet_transports[port->disc_addr.trtype]; 329 if (!ops) { 330 pr_err("transport type %d not supported\n", 331 port->disc_addr.trtype); 332 return -EINVAL; 333 } 334 } 335 336 if (!try_module_get(ops->owner)) 337 return -EINVAL; 338 339 /* 340 * If the user requested PI support and the transport isn't pi capable, 341 * don't enable the port. 342 */ 343 if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) { 344 pr_err("T10-PI is not supported by transport type %d\n", 345 port->disc_addr.trtype); 346 ret = -EINVAL; 347 goto out_put; 348 } 349 350 ret = ops->add_port(port); 351 if (ret) 352 goto out_put; 353 354 /* If the transport didn't set inline_data_size, then disable it. */ 355 if (port->inline_data_size < 0) 356 port->inline_data_size = 0; 357 358 /* 359 * If the transport didn't set the max_queue_size properly, then clamp 360 * it to the target limits. Also set default values in case the 361 * transport didn't set it at all. 362 */ 363 if (port->max_queue_size < 0) 364 port->max_queue_size = NVMET_MAX_QUEUE_SIZE; 365 else 366 port->max_queue_size = clamp_t(int, port->max_queue_size, 367 NVMET_MIN_QUEUE_SIZE, 368 NVMET_MAX_QUEUE_SIZE); 369 370 port->enabled = true; 371 port->tr_ops = ops; 372 return 0; 373 374 out_put: 375 module_put(ops->owner); 376 return ret; 377 } 378 379 void nvmet_disable_port(struct nvmet_port *port) 380 { 381 const struct nvmet_fabrics_ops *ops; 382 383 lockdep_assert_held(&nvmet_config_sem); 384 385 port->enabled = false; 386 port->tr_ops = NULL; 387 388 ops = nvmet_transports[port->disc_addr.trtype]; 389 ops->remove_port(port); 390 module_put(ops->owner); 391 } 392 393 static void nvmet_keep_alive_timer(struct work_struct *work) 394 { 395 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 396 struct nvmet_ctrl, ka_work); 397 bool reset_tbkas = ctrl->reset_tbkas; 398 399 ctrl->reset_tbkas = false; 400 if (reset_tbkas) { 401 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 402 ctrl->cntlid); 403 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 404 return; 405 } 406 407 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 408 ctrl->cntlid, ctrl->kato); 409 410 nvmet_ctrl_fatal_error(ctrl); 411 } 412 413 void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 414 { 415 if (unlikely(ctrl->kato == 0)) 416 return; 417 418 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 419 ctrl->cntlid, ctrl->kato); 420 421 queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 422 } 423 424 void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 425 { 426 if (unlikely(ctrl->kato == 0)) 427 return; 428 429 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 430 431 cancel_delayed_work_sync(&ctrl->ka_work); 432 } 433 434 u16 nvmet_req_find_ns(struct nvmet_req *req) 435 { 436 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 437 struct nvmet_subsys *subsys = nvmet_req_subsys(req); 438 439 req->ns = xa_load(&subsys->namespaces, nsid); 440 if (unlikely(!req->ns || !req->ns->enabled)) { 441 req->error_loc = offsetof(struct nvme_common_command, nsid); 442 if (!req->ns) /* ns doesn't exist! */ 443 return NVME_SC_INVALID_NS | NVME_STATUS_DNR; 444 445 /* ns exists but it's disabled */ 446 req->ns = NULL; 447 return NVME_SC_INTERNAL_PATH_ERROR; 448 } 449 450 percpu_ref_get(&req->ns->ref); 451 return NVME_SC_SUCCESS; 452 } 453 454 static void nvmet_destroy_namespace(struct percpu_ref *ref) 455 { 456 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 457 458 complete(&ns->disable_done); 459 } 460 461 void nvmet_put_namespace(struct nvmet_ns *ns) 462 { 463 percpu_ref_put(&ns->ref); 464 } 465 466 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 467 { 468 nvmet_bdev_ns_disable(ns); 469 nvmet_file_ns_disable(ns); 470 } 471 472 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 473 { 474 int ret; 475 struct pci_dev *p2p_dev; 476 477 if (!ns->use_p2pmem) 478 return 0; 479 480 if (!ns->bdev) { 481 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 482 return -EINVAL; 483 } 484 485 if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) { 486 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 487 ns->device_path); 488 return -EINVAL; 489 } 490 491 if (ns->p2p_dev) { 492 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 493 if (ret < 0) 494 return -EINVAL; 495 } else { 496 /* 497 * Right now we just check that there is p2pmem available so 498 * we can report an error to the user right away if there 499 * is not. We'll find the actual device to use once we 500 * setup the controller when the port's device is available. 501 */ 502 503 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 504 if (!p2p_dev) { 505 pr_err("no peer-to-peer memory is available for %s\n", 506 ns->device_path); 507 return -EINVAL; 508 } 509 510 pci_dev_put(p2p_dev); 511 } 512 513 return 0; 514 } 515 516 /* 517 * Note: ctrl->subsys->lock should be held when calling this function 518 */ 519 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 520 struct nvmet_ns *ns) 521 { 522 struct device *clients[2]; 523 struct pci_dev *p2p_dev; 524 int ret; 525 526 if (!ctrl->p2p_client || !ns->use_p2pmem) 527 return; 528 529 if (ns->p2p_dev) { 530 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 531 if (ret < 0) 532 return; 533 534 p2p_dev = pci_dev_get(ns->p2p_dev); 535 } else { 536 clients[0] = ctrl->p2p_client; 537 clients[1] = nvmet_ns_dev(ns); 538 539 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 540 if (!p2p_dev) { 541 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 542 dev_name(ctrl->p2p_client), ns->device_path); 543 return; 544 } 545 } 546 547 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 548 if (ret < 0) 549 pci_dev_put(p2p_dev); 550 551 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 552 ns->nsid); 553 } 554 555 bool nvmet_ns_revalidate(struct nvmet_ns *ns) 556 { 557 loff_t oldsize = ns->size; 558 559 if (ns->bdev) 560 nvmet_bdev_ns_revalidate(ns); 561 else 562 nvmet_file_ns_revalidate(ns); 563 564 return oldsize != ns->size; 565 } 566 567 int nvmet_ns_enable(struct nvmet_ns *ns) 568 { 569 struct nvmet_subsys *subsys = ns->subsys; 570 struct nvmet_ctrl *ctrl; 571 int ret; 572 573 mutex_lock(&subsys->lock); 574 ret = 0; 575 576 if (nvmet_is_passthru_subsys(subsys)) { 577 pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); 578 goto out_unlock; 579 } 580 581 if (ns->enabled) 582 goto out_unlock; 583 584 ret = nvmet_bdev_ns_enable(ns); 585 if (ret == -ENOTBLK) 586 ret = nvmet_file_ns_enable(ns); 587 if (ret) 588 goto out_unlock; 589 590 ret = nvmet_p2pmem_ns_enable(ns); 591 if (ret) 592 goto out_dev_disable; 593 594 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 595 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 596 597 if (ns->pr.enable) { 598 ret = nvmet_pr_init_ns(ns); 599 if (ret) 600 goto out_dev_put; 601 } 602 603 if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) 604 goto out_pr_exit; 605 606 nvmet_ns_changed(subsys, ns->nsid); 607 ns->enabled = true; 608 xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 609 ret = 0; 610 out_unlock: 611 mutex_unlock(&subsys->lock); 612 return ret; 613 out_pr_exit: 614 if (ns->pr.enable) 615 nvmet_pr_exit_ns(ns); 616 out_dev_put: 617 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 618 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 619 out_dev_disable: 620 nvmet_ns_dev_disable(ns); 621 goto out_unlock; 622 } 623 624 void nvmet_ns_disable(struct nvmet_ns *ns) 625 { 626 struct nvmet_subsys *subsys = ns->subsys; 627 struct nvmet_ctrl *ctrl; 628 629 mutex_lock(&subsys->lock); 630 if (!ns->enabled) 631 goto out_unlock; 632 633 ns->enabled = false; 634 xa_clear_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); 635 636 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 637 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 638 639 mutex_unlock(&subsys->lock); 640 641 /* 642 * Now that we removed the namespaces from the lookup list, we 643 * can kill the per_cpu ref and wait for any remaining references 644 * to be dropped, as well as a RCU grace period for anyone only 645 * using the namespace under rcu_read_lock(). Note that we can't 646 * use call_rcu here as we need to ensure the namespaces have 647 * been fully destroyed before unloading the module. 648 */ 649 percpu_ref_kill(&ns->ref); 650 synchronize_rcu(); 651 wait_for_completion(&ns->disable_done); 652 percpu_ref_exit(&ns->ref); 653 654 if (ns->pr.enable) 655 nvmet_pr_exit_ns(ns); 656 657 mutex_lock(&subsys->lock); 658 nvmet_ns_changed(subsys, ns->nsid); 659 nvmet_ns_dev_disable(ns); 660 out_unlock: 661 mutex_unlock(&subsys->lock); 662 } 663 664 void nvmet_ns_free(struct nvmet_ns *ns) 665 { 666 struct nvmet_subsys *subsys = ns->subsys; 667 668 nvmet_ns_disable(ns); 669 670 mutex_lock(&subsys->lock); 671 672 xa_erase(&subsys->namespaces, ns->nsid); 673 if (ns->nsid == subsys->max_nsid) 674 subsys->max_nsid = nvmet_max_nsid(subsys); 675 676 subsys->nr_namespaces--; 677 mutex_unlock(&subsys->lock); 678 679 down_write(&nvmet_ana_sem); 680 nvmet_ana_group_enabled[ns->anagrpid]--; 681 up_write(&nvmet_ana_sem); 682 683 kfree(ns->device_path); 684 kfree(ns); 685 } 686 687 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 688 { 689 struct nvmet_ns *ns; 690 691 mutex_lock(&subsys->lock); 692 693 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 694 goto out_unlock; 695 696 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 697 if (!ns) 698 goto out_unlock; 699 700 init_completion(&ns->disable_done); 701 702 ns->nsid = nsid; 703 ns->subsys = subsys; 704 705 if (ns->nsid > subsys->max_nsid) 706 subsys->max_nsid = nsid; 707 708 if (xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL)) 709 goto out_exit; 710 711 subsys->nr_namespaces++; 712 713 mutex_unlock(&subsys->lock); 714 715 down_write(&nvmet_ana_sem); 716 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 717 nvmet_ana_group_enabled[ns->anagrpid]++; 718 up_write(&nvmet_ana_sem); 719 720 uuid_gen(&ns->uuid); 721 ns->buffered_io = false; 722 ns->csi = NVME_CSI_NVM; 723 724 return ns; 725 out_exit: 726 subsys->max_nsid = nvmet_max_nsid(subsys); 727 kfree(ns); 728 out_unlock: 729 mutex_unlock(&subsys->lock); 730 return NULL; 731 } 732 733 static void nvmet_update_sq_head(struct nvmet_req *req) 734 { 735 if (req->sq->size) { 736 u32 old_sqhd, new_sqhd; 737 738 old_sqhd = READ_ONCE(req->sq->sqhd); 739 do { 740 new_sqhd = (old_sqhd + 1) % req->sq->size; 741 } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd)); 742 } 743 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 744 } 745 746 static void nvmet_set_error(struct nvmet_req *req, u16 status) 747 { 748 struct nvmet_ctrl *ctrl = req->sq->ctrl; 749 struct nvme_error_slot *new_error_slot; 750 unsigned long flags; 751 752 req->cqe->status = cpu_to_le16(status << 1); 753 754 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 755 return; 756 757 spin_lock_irqsave(&ctrl->error_lock, flags); 758 ctrl->err_counter++; 759 new_error_slot = 760 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 761 762 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 763 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 764 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 765 new_error_slot->status_field = cpu_to_le16(status << 1); 766 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 767 new_error_slot->lba = cpu_to_le64(req->error_slba); 768 new_error_slot->nsid = req->cmd->common.nsid; 769 spin_unlock_irqrestore(&ctrl->error_lock, flags); 770 771 /* set the more bit for this request */ 772 req->cqe->status |= cpu_to_le16(1 << 14); 773 } 774 775 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 776 { 777 struct nvmet_ns *ns = req->ns; 778 struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref; 779 780 if (!req->sq->sqhd_disabled) 781 nvmet_update_sq_head(req); 782 req->cqe->sq_id = cpu_to_le16(req->sq->qid); 783 req->cqe->command_id = req->cmd->common.command_id; 784 785 if (unlikely(status)) 786 nvmet_set_error(req, status); 787 788 trace_nvmet_req_complete(req); 789 790 req->ops->queue_response(req); 791 792 if (pc_ref) 793 nvmet_pr_put_ns_pc_ref(pc_ref); 794 if (ns) 795 nvmet_put_namespace(ns); 796 } 797 798 void nvmet_req_complete(struct nvmet_req *req, u16 status) 799 { 800 struct nvmet_sq *sq = req->sq; 801 802 __nvmet_req_complete(req, status); 803 percpu_ref_put(&sq->ref); 804 } 805 EXPORT_SYMBOL_GPL(nvmet_req_complete); 806 807 void nvmet_cq_init(struct nvmet_cq *cq) 808 { 809 refcount_set(&cq->ref, 1); 810 } 811 EXPORT_SYMBOL_GPL(nvmet_cq_init); 812 813 bool nvmet_cq_get(struct nvmet_cq *cq) 814 { 815 return refcount_inc_not_zero(&cq->ref); 816 } 817 EXPORT_SYMBOL_GPL(nvmet_cq_get); 818 819 void nvmet_cq_put(struct nvmet_cq *cq) 820 { 821 if (refcount_dec_and_test(&cq->ref)) 822 nvmet_cq_destroy(cq); 823 } 824 EXPORT_SYMBOL_GPL(nvmet_cq_put); 825 826 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 827 u16 qid, u16 size) 828 { 829 cq->qid = qid; 830 cq->size = size; 831 832 ctrl->cqs[qid] = cq; 833 } 834 835 void nvmet_cq_destroy(struct nvmet_cq *cq) 836 { 837 struct nvmet_ctrl *ctrl = cq->ctrl; 838 839 if (ctrl) { 840 ctrl->cqs[cq->qid] = NULL; 841 nvmet_ctrl_put(cq->ctrl); 842 cq->ctrl = NULL; 843 } 844 } 845 846 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 847 u16 qid, u16 size) 848 { 849 sq->sqhd = 0; 850 sq->qid = qid; 851 sq->size = size; 852 853 ctrl->sqs[qid] = sq; 854 } 855 856 static void nvmet_confirm_sq(struct percpu_ref *ref) 857 { 858 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 859 860 complete(&sq->confirm_done); 861 } 862 863 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 864 { 865 if (!ctrl->cqs) 866 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 867 868 if (cqid > ctrl->subsys->max_qid) 869 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 870 871 if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid])) 872 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 873 874 return NVME_SC_SUCCESS; 875 } 876 877 u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 878 { 879 if (!cqid) 880 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 881 return nvmet_check_cqid(ctrl, cqid, create); 882 } 883 884 bool nvmet_cq_in_use(struct nvmet_cq *cq) 885 { 886 return refcount_read(&cq->ref) > 1; 887 } 888 EXPORT_SYMBOL_GPL(nvmet_cq_in_use); 889 890 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 891 u16 qid, u16 size) 892 { 893 u16 status; 894 895 status = nvmet_check_cqid(ctrl, qid, true); 896 if (status != NVME_SC_SUCCESS) 897 return status; 898 899 if (!kref_get_unless_zero(&ctrl->ref)) 900 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 901 cq->ctrl = ctrl; 902 903 nvmet_cq_init(cq); 904 nvmet_cq_setup(ctrl, cq, qid, size); 905 906 return NVME_SC_SUCCESS; 907 } 908 EXPORT_SYMBOL_GPL(nvmet_cq_create); 909 910 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, 911 bool create) 912 { 913 if (!ctrl->sqs) 914 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 915 916 if (sqid > ctrl->subsys->max_qid) 917 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 918 919 if ((create && ctrl->sqs[sqid]) || 920 (!create && !ctrl->sqs[sqid])) 921 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 922 923 return NVME_SC_SUCCESS; 924 } 925 926 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 927 struct nvmet_cq *cq, u16 sqid, u16 size) 928 { 929 u16 status; 930 int ret; 931 932 if (!kref_get_unless_zero(&ctrl->ref)) 933 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 934 935 status = nvmet_check_sqid(ctrl, sqid, true); 936 if (status != NVME_SC_SUCCESS) 937 return status; 938 939 ret = nvmet_sq_init(sq, cq); 940 if (ret) { 941 status = NVME_SC_INTERNAL | NVME_STATUS_DNR; 942 goto ctrl_put; 943 } 944 945 nvmet_sq_setup(ctrl, sq, sqid, size); 946 sq->ctrl = ctrl; 947 948 return NVME_SC_SUCCESS; 949 950 ctrl_put: 951 nvmet_ctrl_put(ctrl); 952 return status; 953 } 954 EXPORT_SYMBOL_GPL(nvmet_sq_create); 955 956 void nvmet_sq_destroy(struct nvmet_sq *sq) 957 { 958 struct nvmet_ctrl *ctrl = sq->ctrl; 959 960 /* 961 * If this is the admin queue, complete all AERs so that our 962 * queue doesn't have outstanding requests on it. 963 */ 964 if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) 965 nvmet_async_events_failall(ctrl); 966 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 967 wait_for_completion(&sq->confirm_done); 968 wait_for_completion(&sq->free_done); 969 percpu_ref_exit(&sq->ref); 970 nvmet_auth_sq_free(sq); 971 nvmet_cq_put(sq->cq); 972 973 /* 974 * we must reference the ctrl again after waiting for inflight IO 975 * to complete. Because admin connect may have sneaked in after we 976 * store sq->ctrl locally, but before we killed the percpu_ref. the 977 * admin connect allocates and assigns sq->ctrl, which now needs a 978 * final ref put, as this ctrl is going away. 979 */ 980 ctrl = sq->ctrl; 981 982 if (ctrl) { 983 /* 984 * The teardown flow may take some time, and the host may not 985 * send us keep-alive during this period, hence reset the 986 * traffic based keep-alive timer so we don't trigger a 987 * controller teardown as a result of a keep-alive expiration. 988 */ 989 ctrl->reset_tbkas = true; 990 sq->ctrl->sqs[sq->qid] = NULL; 991 nvmet_ctrl_put(ctrl); 992 sq->ctrl = NULL; /* allows reusing the queue later */ 993 } 994 } 995 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 996 997 static void nvmet_sq_free(struct percpu_ref *ref) 998 { 999 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 1000 1001 complete(&sq->free_done); 1002 } 1003 1004 int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq) 1005 { 1006 int ret; 1007 1008 if (!nvmet_cq_get(cq)) 1009 return -EINVAL; 1010 1011 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 1012 if (ret) { 1013 pr_err("percpu_ref init failed!\n"); 1014 nvmet_cq_put(cq); 1015 return ret; 1016 } 1017 init_completion(&sq->free_done); 1018 init_completion(&sq->confirm_done); 1019 nvmet_auth_sq_init(sq); 1020 sq->cq = cq; 1021 1022 return 0; 1023 } 1024 EXPORT_SYMBOL_GPL(nvmet_sq_init); 1025 1026 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 1027 struct nvmet_ns *ns) 1028 { 1029 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 1030 1031 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 1032 return NVME_SC_ANA_INACCESSIBLE; 1033 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 1034 return NVME_SC_ANA_PERSISTENT_LOSS; 1035 if (unlikely(state == NVME_ANA_CHANGE)) 1036 return NVME_SC_ANA_TRANSITION; 1037 return 0; 1038 } 1039 1040 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 1041 { 1042 if (unlikely(req->ns->readonly)) { 1043 switch (req->cmd->common.opcode) { 1044 case nvme_cmd_read: 1045 case nvme_cmd_flush: 1046 break; 1047 default: 1048 return NVME_SC_NS_WRITE_PROTECTED; 1049 } 1050 } 1051 1052 return 0; 1053 } 1054 1055 static u32 nvmet_io_cmd_transfer_len(struct nvmet_req *req) 1056 { 1057 struct nvme_command *cmd = req->cmd; 1058 u32 metadata_len = 0; 1059 1060 if (nvme_is_fabrics(cmd)) 1061 return nvmet_fabrics_io_cmd_data_len(req); 1062 1063 if (!req->ns) 1064 return 0; 1065 1066 switch (req->cmd->common.opcode) { 1067 case nvme_cmd_read: 1068 case nvme_cmd_write: 1069 case nvme_cmd_zone_append: 1070 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 1071 metadata_len = nvmet_rw_metadata_len(req); 1072 return nvmet_rw_data_len(req) + metadata_len; 1073 case nvme_cmd_dsm: 1074 return nvmet_dsm_len(req); 1075 case nvme_cmd_zone_mgmt_recv: 1076 return (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; 1077 default: 1078 return 0; 1079 } 1080 } 1081 1082 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 1083 { 1084 struct nvme_command *cmd = req->cmd; 1085 u16 ret; 1086 1087 if (nvme_is_fabrics(cmd)) 1088 return nvmet_parse_fabrics_io_cmd(req); 1089 1090 if (unlikely(!nvmet_check_auth_status(req))) 1091 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1092 1093 ret = nvmet_check_ctrl_status(req); 1094 if (unlikely(ret)) 1095 return ret; 1096 1097 if (nvmet_is_passthru_req(req)) 1098 return nvmet_parse_passthru_io_cmd(req); 1099 1100 ret = nvmet_req_find_ns(req); 1101 if (unlikely(ret)) 1102 return ret; 1103 1104 ret = nvmet_check_ana_state(req->port, req->ns); 1105 if (unlikely(ret)) { 1106 req->error_loc = offsetof(struct nvme_common_command, nsid); 1107 return ret; 1108 } 1109 ret = nvmet_io_cmd_check_access(req); 1110 if (unlikely(ret)) { 1111 req->error_loc = offsetof(struct nvme_common_command, nsid); 1112 return ret; 1113 } 1114 1115 if (req->ns->pr.enable) { 1116 ret = nvmet_parse_pr_cmd(req); 1117 if (!ret) 1118 return ret; 1119 } 1120 1121 switch (req->ns->csi) { 1122 case NVME_CSI_NVM: 1123 if (req->ns->file) 1124 ret = nvmet_file_parse_io_cmd(req); 1125 else 1126 ret = nvmet_bdev_parse_io_cmd(req); 1127 break; 1128 case NVME_CSI_ZNS: 1129 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) 1130 ret = nvmet_bdev_zns_parse_io_cmd(req); 1131 else 1132 ret = NVME_SC_INVALID_IO_CMD_SET; 1133 break; 1134 default: 1135 ret = NVME_SC_INVALID_IO_CMD_SET; 1136 } 1137 if (ret) 1138 return ret; 1139 1140 if (req->ns->pr.enable) { 1141 ret = nvmet_pr_check_cmd_access(req); 1142 if (ret) 1143 return ret; 1144 1145 ret = nvmet_pr_get_ns_pc_ref(req); 1146 } 1147 return ret; 1148 } 1149 1150 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, 1151 const struct nvmet_fabrics_ops *ops) 1152 { 1153 u8 flags = req->cmd->common.flags; 1154 u16 status; 1155 1156 req->cq = sq->cq; 1157 req->sq = sq; 1158 req->ops = ops; 1159 req->sg = NULL; 1160 req->metadata_sg = NULL; 1161 req->sg_cnt = 0; 1162 req->metadata_sg_cnt = 0; 1163 req->transfer_len = 0; 1164 req->metadata_len = 0; 1165 req->cqe->result.u64 = 0; 1166 req->cqe->status = 0; 1167 req->cqe->sq_head = 0; 1168 req->ns = NULL; 1169 req->error_loc = NVMET_NO_ERROR_LOC; 1170 req->error_slba = 0; 1171 req->pc_ref = NULL; 1172 1173 /* no support for fused commands yet */ 1174 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 1175 req->error_loc = offsetof(struct nvme_common_command, flags); 1176 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1177 goto fail; 1178 } 1179 1180 /* 1181 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 1182 * contains an address of a single contiguous physical buffer that is 1183 * byte aligned. For PCI controllers, this is optional so not enforced. 1184 */ 1185 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 1186 if (!req->sq->ctrl || !nvmet_is_pci_ctrl(req->sq->ctrl)) { 1187 req->error_loc = 1188 offsetof(struct nvme_common_command, flags); 1189 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1190 goto fail; 1191 } 1192 } 1193 1194 if (unlikely(!req->sq->ctrl)) 1195 /* will return an error for any non-connect command: */ 1196 status = nvmet_parse_connect_cmd(req); 1197 else if (likely(req->sq->qid != 0)) 1198 status = nvmet_parse_io_cmd(req); 1199 else 1200 status = nvmet_parse_admin_cmd(req); 1201 1202 if (status) 1203 goto fail; 1204 1205 trace_nvmet_req_init(req, req->cmd); 1206 1207 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 1208 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 1209 goto fail; 1210 } 1211 1212 if (sq->ctrl) 1213 sq->ctrl->reset_tbkas = true; 1214 1215 return true; 1216 1217 fail: 1218 __nvmet_req_complete(req, status); 1219 return false; 1220 } 1221 EXPORT_SYMBOL_GPL(nvmet_req_init); 1222 1223 void nvmet_req_uninit(struct nvmet_req *req) 1224 { 1225 percpu_ref_put(&req->sq->ref); 1226 if (req->pc_ref) 1227 nvmet_pr_put_ns_pc_ref(req->pc_ref); 1228 if (req->ns) 1229 nvmet_put_namespace(req->ns); 1230 } 1231 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 1232 1233 size_t nvmet_req_transfer_len(struct nvmet_req *req) 1234 { 1235 if (likely(req->sq->qid != 0)) 1236 return nvmet_io_cmd_transfer_len(req); 1237 if (unlikely(!req->sq->ctrl)) 1238 return nvmet_connect_cmd_data_len(req); 1239 return nvmet_admin_cmd_data_len(req); 1240 } 1241 EXPORT_SYMBOL_GPL(nvmet_req_transfer_len); 1242 1243 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) 1244 { 1245 if (unlikely(len != req->transfer_len)) { 1246 u16 status; 1247 1248 req->error_loc = offsetof(struct nvme_common_command, dptr); 1249 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1250 status = NVME_SC_SGL_INVALID_DATA; 1251 else 1252 status = NVME_SC_INVALID_FIELD; 1253 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1254 return false; 1255 } 1256 1257 return true; 1258 } 1259 EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); 1260 1261 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) 1262 { 1263 if (unlikely(data_len > req->transfer_len)) { 1264 u16 status; 1265 1266 req->error_loc = offsetof(struct nvme_common_command, dptr); 1267 if (req->cmd->common.flags & NVME_CMD_SGL_ALL) 1268 status = NVME_SC_SGL_INVALID_DATA; 1269 else 1270 status = NVME_SC_INVALID_FIELD; 1271 nvmet_req_complete(req, status | NVME_STATUS_DNR); 1272 return false; 1273 } 1274 1275 return true; 1276 } 1277 1278 static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) 1279 { 1280 return req->transfer_len - req->metadata_len; 1281 } 1282 1283 static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev, 1284 struct nvmet_req *req) 1285 { 1286 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 1287 nvmet_data_transfer_len(req)); 1288 if (!req->sg) 1289 goto out_err; 1290 1291 if (req->metadata_len) { 1292 req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev, 1293 &req->metadata_sg_cnt, req->metadata_len); 1294 if (!req->metadata_sg) 1295 goto out_free_sg; 1296 } 1297 1298 req->p2p_dev = p2p_dev; 1299 1300 return 0; 1301 out_free_sg: 1302 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1303 out_err: 1304 return -ENOMEM; 1305 } 1306 1307 static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req) 1308 { 1309 if (!IS_ENABLED(CONFIG_PCI_P2PDMA) || 1310 !req->sq->ctrl || !req->sq->qid || !req->ns) 1311 return NULL; 1312 return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid); 1313 } 1314 1315 int nvmet_req_alloc_sgls(struct nvmet_req *req) 1316 { 1317 struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req); 1318 1319 if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req)) 1320 return 0; 1321 1322 req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, 1323 &req->sg_cnt); 1324 if (unlikely(!req->sg)) 1325 goto out; 1326 1327 if (req->metadata_len) { 1328 req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, 1329 &req->metadata_sg_cnt); 1330 if (unlikely(!req->metadata_sg)) 1331 goto out_free; 1332 } 1333 1334 return 0; 1335 out_free: 1336 sgl_free(req->sg); 1337 out: 1338 return -ENOMEM; 1339 } 1340 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); 1341 1342 void nvmet_req_free_sgls(struct nvmet_req *req) 1343 { 1344 if (req->p2p_dev) { 1345 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1346 if (req->metadata_sg) 1347 pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); 1348 req->p2p_dev = NULL; 1349 } else { 1350 sgl_free(req->sg); 1351 if (req->metadata_sg) 1352 sgl_free(req->metadata_sg); 1353 } 1354 1355 req->sg = NULL; 1356 req->metadata_sg = NULL; 1357 req->sg_cnt = 0; 1358 req->metadata_sg_cnt = 0; 1359 } 1360 EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); 1361 1362 static inline bool nvmet_css_supported(u8 cc_css) 1363 { 1364 switch (cc_css << NVME_CC_CSS_SHIFT) { 1365 case NVME_CC_CSS_NVM: 1366 case NVME_CC_CSS_CSI: 1367 return true; 1368 default: 1369 return false; 1370 } 1371 } 1372 1373 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 1374 { 1375 lockdep_assert_held(&ctrl->lock); 1376 1377 /* 1378 * Only I/O controllers should verify iosqes,iocqes. 1379 * Strictly speaking, the spec says a discovery controller 1380 * should verify iosqes,iocqes are zeroed, however that 1381 * would break backwards compatibility, so don't enforce it. 1382 */ 1383 if (!nvmet_is_disc_subsys(ctrl->subsys) && 1384 (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1385 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { 1386 ctrl->csts = NVME_CSTS_CFS; 1387 return; 1388 } 1389 1390 if (nvmet_cc_mps(ctrl->cc) != 0 || 1391 nvmet_cc_ams(ctrl->cc) != 0 || 1392 !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) { 1393 ctrl->csts = NVME_CSTS_CFS; 1394 return; 1395 } 1396 1397 ctrl->csts = NVME_CSTS_RDY; 1398 1399 /* 1400 * Controllers that are not yet enabled should not really enforce the 1401 * keep alive timeout, but we still want to track a timeout and cleanup 1402 * in case a host died before it enabled the controller. Hence, simply 1403 * reset the keep alive timer when the controller is enabled. 1404 */ 1405 if (ctrl->kato) 1406 mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); 1407 } 1408 1409 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1410 { 1411 lockdep_assert_held(&ctrl->lock); 1412 1413 /* XXX: tear down queues? */ 1414 ctrl->csts &= ~NVME_CSTS_RDY; 1415 ctrl->cc = 0; 1416 } 1417 1418 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1419 { 1420 u32 old; 1421 1422 mutex_lock(&ctrl->lock); 1423 old = ctrl->cc; 1424 ctrl->cc = new; 1425 1426 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1427 nvmet_start_ctrl(ctrl); 1428 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1429 nvmet_clear_ctrl(ctrl); 1430 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1431 nvmet_clear_ctrl(ctrl); 1432 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1433 } 1434 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1435 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1436 mutex_unlock(&ctrl->lock); 1437 } 1438 EXPORT_SYMBOL_GPL(nvmet_update_cc); 1439 1440 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1441 { 1442 /* command sets supported: NVMe command set: */ 1443 ctrl->cap = (1ULL << 37); 1444 /* Controller supports one or more I/O Command Sets */ 1445 ctrl->cap |= (1ULL << 43); 1446 /* CC.EN timeout in 500msec units: */ 1447 ctrl->cap |= (15ULL << 24); 1448 /* maximum queue entries supported: */ 1449 if (ctrl->ops->get_max_queue_size) 1450 ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl), 1451 ctrl->port->max_queue_size) - 1; 1452 else 1453 ctrl->cap |= ctrl->port->max_queue_size - 1; 1454 1455 if (nvmet_is_passthru_subsys(ctrl->subsys)) 1456 nvmet_passthrough_override_cap(ctrl); 1457 } 1458 1459 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, 1460 const char *hostnqn, u16 cntlid, 1461 struct nvmet_req *req) 1462 { 1463 struct nvmet_ctrl *ctrl = NULL; 1464 struct nvmet_subsys *subsys; 1465 1466 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1467 if (!subsys) { 1468 pr_warn("connect request for invalid subsystem %s!\n", 1469 subsysnqn); 1470 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1471 goto out; 1472 } 1473 1474 mutex_lock(&subsys->lock); 1475 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1476 if (ctrl->cntlid == cntlid) { 1477 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1478 pr_warn("hostnqn mismatch.\n"); 1479 continue; 1480 } 1481 if (!kref_get_unless_zero(&ctrl->ref)) 1482 continue; 1483 1484 /* ctrl found */ 1485 goto found; 1486 } 1487 } 1488 1489 ctrl = NULL; /* ctrl not found */ 1490 pr_warn("could not find controller %d for subsys %s / host %s\n", 1491 cntlid, subsysnqn, hostnqn); 1492 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1493 1494 found: 1495 mutex_unlock(&subsys->lock); 1496 nvmet_subsys_put(subsys); 1497 out: 1498 return ctrl; 1499 } 1500 1501 u16 nvmet_check_ctrl_status(struct nvmet_req *req) 1502 { 1503 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1504 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1505 req->cmd->common.opcode, req->sq->qid); 1506 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1507 } 1508 1509 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1510 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1511 req->cmd->common.opcode, req->sq->qid); 1512 return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR; 1513 } 1514 1515 if (unlikely(!nvmet_check_auth_status(req))) { 1516 pr_warn("qid %d not authenticated\n", req->sq->qid); 1517 return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; 1518 } 1519 return 0; 1520 } 1521 1522 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1523 { 1524 struct nvmet_host_link *p; 1525 1526 lockdep_assert_held(&nvmet_config_sem); 1527 1528 if (subsys->allow_any_host) 1529 return true; 1530 1531 if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */ 1532 return true; 1533 1534 list_for_each_entry(p, &subsys->hosts, entry) { 1535 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1536 return true; 1537 } 1538 1539 return false; 1540 } 1541 1542 /* 1543 * Note: ctrl->subsys->lock should be held when calling this function 1544 */ 1545 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1546 struct device *p2p_client) 1547 { 1548 struct nvmet_ns *ns; 1549 unsigned long idx; 1550 1551 if (!p2p_client) 1552 return; 1553 1554 ctrl->p2p_client = get_device(p2p_client); 1555 1556 nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) 1557 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1558 } 1559 1560 /* 1561 * Note: ctrl->subsys->lock should be held when calling this function 1562 */ 1563 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1564 { 1565 struct radix_tree_iter iter; 1566 void __rcu **slot; 1567 1568 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1569 pci_dev_put(radix_tree_deref_slot(slot)); 1570 1571 put_device(ctrl->p2p_client); 1572 } 1573 1574 static void nvmet_fatal_error_handler(struct work_struct *work) 1575 { 1576 struct nvmet_ctrl *ctrl = 1577 container_of(work, struct nvmet_ctrl, fatal_err_work); 1578 1579 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1580 ctrl->ops->delete_ctrl(ctrl); 1581 } 1582 1583 struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) 1584 { 1585 struct nvmet_subsys *subsys; 1586 struct nvmet_ctrl *ctrl; 1587 u32 kato = args->kato; 1588 u8 dhchap_status; 1589 int ret; 1590 1591 args->status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR; 1592 subsys = nvmet_find_get_subsys(args->port, args->subsysnqn); 1593 if (!subsys) { 1594 pr_warn("connect request for invalid subsystem %s!\n", 1595 args->subsysnqn); 1596 args->result = IPO_IATTR_CONNECT_DATA(subsysnqn); 1597 args->error_loc = offsetof(struct nvme_common_command, dptr); 1598 return NULL; 1599 } 1600 1601 down_read(&nvmet_config_sem); 1602 if (!nvmet_host_allowed(subsys, args->hostnqn)) { 1603 pr_info("connect by host %s for subsystem %s not allowed\n", 1604 args->hostnqn, args->subsysnqn); 1605 args->result = IPO_IATTR_CONNECT_DATA(hostnqn); 1606 up_read(&nvmet_config_sem); 1607 args->status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1608 args->error_loc = offsetof(struct nvme_common_command, dptr); 1609 goto out_put_subsystem; 1610 } 1611 up_read(&nvmet_config_sem); 1612 1613 args->status = NVME_SC_INTERNAL; 1614 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 1615 if (!ctrl) 1616 goto out_put_subsystem; 1617 mutex_init(&ctrl->lock); 1618 1619 ctrl->port = args->port; 1620 ctrl->ops = args->ops; 1621 1622 #ifdef CONFIG_NVME_TARGET_PASSTHRU 1623 /* By default, set loop targets to clear IDS by default */ 1624 if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP) 1625 subsys->clear_ids = 1; 1626 #endif 1627 1628 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1629 INIT_LIST_HEAD(&ctrl->async_events); 1630 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1631 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1632 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 1633 1634 memcpy(ctrl->subsysnqn, args->subsysnqn, NVMF_NQN_SIZE); 1635 memcpy(ctrl->hostnqn, args->hostnqn, NVMF_NQN_SIZE); 1636 1637 kref_init(&ctrl->ref); 1638 ctrl->subsys = subsys; 1639 ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; 1640 nvmet_init_cap(ctrl); 1641 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1642 1643 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1644 sizeof(__le32), GFP_KERNEL); 1645 if (!ctrl->changed_ns_list) 1646 goto out_free_ctrl; 1647 1648 ctrl->sqs = kcalloc(subsys->max_qid + 1, 1649 sizeof(struct nvmet_sq *), 1650 GFP_KERNEL); 1651 if (!ctrl->sqs) 1652 goto out_free_changed_ns_list; 1653 1654 ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *), 1655 GFP_KERNEL); 1656 if (!ctrl->cqs) 1657 goto out_free_sqs; 1658 1659 ret = ida_alloc_range(&cntlid_ida, 1660 subsys->cntlid_min, subsys->cntlid_max, 1661 GFP_KERNEL); 1662 if (ret < 0) { 1663 args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 1664 goto out_free_cqs; 1665 } 1666 ctrl->cntlid = ret; 1667 1668 /* 1669 * Discovery controllers may use some arbitrary high value 1670 * in order to cleanup stale discovery sessions 1671 */ 1672 if (nvmet_is_disc_subsys(ctrl->subsys) && !kato) 1673 kato = NVMET_DISC_KATO_MS; 1674 1675 /* keep-alive timeout in seconds */ 1676 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1677 1678 ctrl->err_counter = 0; 1679 spin_lock_init(&ctrl->error_lock); 1680 1681 nvmet_start_keep_alive_timer(ctrl); 1682 1683 mutex_lock(&subsys->lock); 1684 ret = nvmet_ctrl_init_pr(ctrl); 1685 if (ret) 1686 goto init_pr_fail; 1687 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1688 nvmet_setup_p2p_ns_map(ctrl, args->p2p_client); 1689 nvmet_debugfs_ctrl_setup(ctrl); 1690 mutex_unlock(&subsys->lock); 1691 1692 if (args->hostid) 1693 uuid_copy(&ctrl->hostid, args->hostid); 1694 1695 dhchap_status = nvmet_setup_auth(ctrl, args->sq); 1696 if (dhchap_status) { 1697 pr_err("Failed to setup authentication, dhchap status %u\n", 1698 dhchap_status); 1699 nvmet_ctrl_put(ctrl); 1700 if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED) 1701 args->status = 1702 NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR; 1703 else 1704 args->status = NVME_SC_INTERNAL; 1705 return NULL; 1706 } 1707 1708 args->status = NVME_SC_SUCCESS; 1709 1710 pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n", 1711 nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", 1712 ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, 1713 ctrl->pi_support ? " T10-PI is enabled" : "", 1714 nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "", 1715 nvmet_queue_tls_keyid(args->sq) ? ", TLS" : ""); 1716 1717 return ctrl; 1718 1719 init_pr_fail: 1720 mutex_unlock(&subsys->lock); 1721 nvmet_stop_keep_alive_timer(ctrl); 1722 ida_free(&cntlid_ida, ctrl->cntlid); 1723 out_free_cqs: 1724 kfree(ctrl->cqs); 1725 out_free_sqs: 1726 kfree(ctrl->sqs); 1727 out_free_changed_ns_list: 1728 kfree(ctrl->changed_ns_list); 1729 out_free_ctrl: 1730 kfree(ctrl); 1731 out_put_subsystem: 1732 nvmet_subsys_put(subsys); 1733 return NULL; 1734 } 1735 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl); 1736 1737 static void nvmet_ctrl_free(struct kref *ref) 1738 { 1739 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1740 struct nvmet_subsys *subsys = ctrl->subsys; 1741 1742 mutex_lock(&subsys->lock); 1743 nvmet_ctrl_destroy_pr(ctrl); 1744 nvmet_release_p2p_ns_map(ctrl); 1745 list_del(&ctrl->subsys_entry); 1746 mutex_unlock(&subsys->lock); 1747 1748 nvmet_stop_keep_alive_timer(ctrl); 1749 1750 flush_work(&ctrl->async_event_work); 1751 cancel_work_sync(&ctrl->fatal_err_work); 1752 1753 nvmet_destroy_auth(ctrl); 1754 1755 nvmet_debugfs_ctrl_free(ctrl); 1756 1757 ida_free(&cntlid_ida, ctrl->cntlid); 1758 1759 nvmet_async_events_free(ctrl); 1760 kfree(ctrl->sqs); 1761 kfree(ctrl->cqs); 1762 kfree(ctrl->changed_ns_list); 1763 kfree(ctrl); 1764 1765 nvmet_subsys_put(subsys); 1766 } 1767 1768 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1769 { 1770 kref_put(&ctrl->ref, nvmet_ctrl_free); 1771 } 1772 EXPORT_SYMBOL_GPL(nvmet_ctrl_put); 1773 1774 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1775 { 1776 mutex_lock(&ctrl->lock); 1777 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1778 ctrl->csts |= NVME_CSTS_CFS; 1779 queue_work(nvmet_wq, &ctrl->fatal_err_work); 1780 } 1781 mutex_unlock(&ctrl->lock); 1782 } 1783 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1784 1785 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl, 1786 char *traddr, size_t traddr_len) 1787 { 1788 if (!ctrl->ops->host_traddr) 1789 return -EOPNOTSUPP; 1790 return ctrl->ops->host_traddr(ctrl, traddr, traddr_len); 1791 } 1792 1793 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1794 const char *subsysnqn) 1795 { 1796 struct nvmet_subsys_link *p; 1797 1798 if (!port) 1799 return NULL; 1800 1801 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1802 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1803 return NULL; 1804 return nvmet_disc_subsys; 1805 } 1806 1807 down_read(&nvmet_config_sem); 1808 if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn, 1809 NVMF_NQN_SIZE)) { 1810 if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) { 1811 up_read(&nvmet_config_sem); 1812 return nvmet_disc_subsys; 1813 } 1814 } 1815 list_for_each_entry(p, &port->subsystems, entry) { 1816 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1817 NVMF_NQN_SIZE)) { 1818 if (!kref_get_unless_zero(&p->subsys->ref)) 1819 break; 1820 up_read(&nvmet_config_sem); 1821 return p->subsys; 1822 } 1823 } 1824 up_read(&nvmet_config_sem); 1825 return NULL; 1826 } 1827 1828 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1829 enum nvme_subsys_type type) 1830 { 1831 struct nvmet_subsys *subsys; 1832 char serial[NVMET_SN_MAX_SIZE / 2]; 1833 int ret; 1834 1835 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1836 if (!subsys) 1837 return ERR_PTR(-ENOMEM); 1838 1839 subsys->ver = NVMET_DEFAULT_VS; 1840 /* generate a random serial number as our controllers are ephemeral: */ 1841 get_random_bytes(&serial, sizeof(serial)); 1842 bin2hex(subsys->serial, &serial, sizeof(serial)); 1843 1844 subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); 1845 if (!subsys->model_number) { 1846 ret = -ENOMEM; 1847 goto free_subsys; 1848 } 1849 1850 subsys->ieee_oui = 0; 1851 1852 subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL); 1853 if (!subsys->firmware_rev) { 1854 ret = -ENOMEM; 1855 goto free_mn; 1856 } 1857 1858 switch (type) { 1859 case NVME_NQN_NVME: 1860 subsys->max_qid = NVMET_NR_QUEUES; 1861 break; 1862 case NVME_NQN_DISC: 1863 case NVME_NQN_CURR: 1864 subsys->max_qid = 0; 1865 break; 1866 default: 1867 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1868 ret = -EINVAL; 1869 goto free_fr; 1870 } 1871 subsys->type = type; 1872 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1873 GFP_KERNEL); 1874 if (!subsys->subsysnqn) { 1875 ret = -ENOMEM; 1876 goto free_fr; 1877 } 1878 subsys->cntlid_min = NVME_CNTLID_MIN; 1879 subsys->cntlid_max = NVME_CNTLID_MAX; 1880 kref_init(&subsys->ref); 1881 1882 mutex_init(&subsys->lock); 1883 xa_init(&subsys->namespaces); 1884 INIT_LIST_HEAD(&subsys->ctrls); 1885 INIT_LIST_HEAD(&subsys->hosts); 1886 1887 ret = nvmet_debugfs_subsys_setup(subsys); 1888 if (ret) 1889 goto free_subsysnqn; 1890 1891 return subsys; 1892 1893 free_subsysnqn: 1894 kfree(subsys->subsysnqn); 1895 free_fr: 1896 kfree(subsys->firmware_rev); 1897 free_mn: 1898 kfree(subsys->model_number); 1899 free_subsys: 1900 kfree(subsys); 1901 return ERR_PTR(ret); 1902 } 1903 1904 static void nvmet_subsys_free(struct kref *ref) 1905 { 1906 struct nvmet_subsys *subsys = 1907 container_of(ref, struct nvmet_subsys, ref); 1908 1909 WARN_ON_ONCE(!xa_empty(&subsys->namespaces)); 1910 1911 nvmet_debugfs_subsys_free(subsys); 1912 1913 xa_destroy(&subsys->namespaces); 1914 nvmet_passthru_subsys_free(subsys); 1915 1916 kfree(subsys->subsysnqn); 1917 kfree(subsys->model_number); 1918 kfree(subsys->firmware_rev); 1919 kfree(subsys); 1920 } 1921 1922 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1923 { 1924 struct nvmet_ctrl *ctrl; 1925 1926 mutex_lock(&subsys->lock); 1927 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1928 ctrl->ops->delete_ctrl(ctrl); 1929 mutex_unlock(&subsys->lock); 1930 } 1931 1932 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1933 { 1934 kref_put(&subsys->ref, nvmet_subsys_free); 1935 } 1936 1937 static int __init nvmet_init(void) 1938 { 1939 int error = -ENOMEM; 1940 1941 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1942 1943 nvmet_bvec_cache = kmem_cache_create("nvmet-bvec", 1944 NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, 1945 SLAB_HWCACHE_ALIGN, NULL); 1946 if (!nvmet_bvec_cache) 1947 return -ENOMEM; 1948 1949 zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); 1950 if (!zbd_wq) 1951 goto out_destroy_bvec_cache; 1952 1953 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1954 WQ_MEM_RECLAIM, 0); 1955 if (!buffered_io_wq) 1956 goto out_free_zbd_work_queue; 1957 1958 nvmet_wq = alloc_workqueue("nvmet-wq", 1959 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); 1960 if (!nvmet_wq) 1961 goto out_free_buffered_work_queue; 1962 1963 error = nvmet_init_debugfs(); 1964 if (error) 1965 goto out_free_nvmet_work_queue; 1966 1967 error = nvmet_init_discovery(); 1968 if (error) 1969 goto out_exit_debugfs; 1970 1971 error = nvmet_init_configfs(); 1972 if (error) 1973 goto out_exit_discovery; 1974 1975 return 0; 1976 1977 out_exit_discovery: 1978 nvmet_exit_discovery(); 1979 out_exit_debugfs: 1980 nvmet_exit_debugfs(); 1981 out_free_nvmet_work_queue: 1982 destroy_workqueue(nvmet_wq); 1983 out_free_buffered_work_queue: 1984 destroy_workqueue(buffered_io_wq); 1985 out_free_zbd_work_queue: 1986 destroy_workqueue(zbd_wq); 1987 out_destroy_bvec_cache: 1988 kmem_cache_destroy(nvmet_bvec_cache); 1989 return error; 1990 } 1991 1992 static void __exit nvmet_exit(void) 1993 { 1994 nvmet_exit_configfs(); 1995 nvmet_exit_discovery(); 1996 nvmet_exit_debugfs(); 1997 ida_destroy(&cntlid_ida); 1998 destroy_workqueue(nvmet_wq); 1999 destroy_workqueue(buffered_io_wq); 2000 destroy_workqueue(zbd_wq); 2001 kmem_cache_destroy(nvmet_bvec_cache); 2002 2003 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 2004 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 2005 } 2006 2007 module_init(nvmet_init); 2008 module_exit(nvmet_exit); 2009 2010 MODULE_DESCRIPTION("NVMe target core framework"); 2011 MODULE_LICENSE("GPL v2"); 2012