Lines Matching +full:multi +full:- +full:ported

1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2017-2018 Christoph Hellwig.
6 #include <linux/backing-dev.h>
18 bool *arg = kp->arg; in multipath_param_set()
27 return -EINVAL; in multipath_param_set()
46 bool *arg = kp->arg; in multipath_always_on_set()
66 …"create multipath node always except for private namespace with non-unique nsid; note that this al…
70 [NVME_IOPOLICY_RR] = "round-robin",
71 [NVME_IOPOLICY_QD] = "queue-depth",
79 return -EINVAL; in nvme_set_iopolicy()
82 else if (!strncmp(val, "round-robin", 11)) in nvme_set_iopolicy()
84 else if (!strncmp(val, "queue-depth", 11)) in nvme_set_iopolicy()
87 return -EINVAL; in nvme_set_iopolicy()
100 "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'");
104 subsys->iopolicy = iopolicy; in nvme_mpath_default_iopolicy()
111 lockdep_assert_held(&subsys->lock); in nvme_mpath_unfreeze()
112 list_for_each_entry(h, &subsys->nsheads, entry) in nvme_mpath_unfreeze()
113 if (h->disk) in nvme_mpath_unfreeze()
114 blk_mq_unfreeze_queue_nomemrestore(h->disk->queue); in nvme_mpath_unfreeze()
121 lockdep_assert_held(&subsys->lock); in nvme_mpath_wait_freeze()
122 list_for_each_entry(h, &subsys->nsheads, entry) in nvme_mpath_wait_freeze()
123 if (h->disk) in nvme_mpath_wait_freeze()
124 blk_mq_freeze_queue_wait(h->disk->queue); in nvme_mpath_wait_freeze()
131 lockdep_assert_held(&subsys->lock); in nvme_mpath_start_freeze()
132 list_for_each_entry(h, &subsys->nsheads, entry) in nvme_mpath_start_freeze()
133 if (h->disk) in nvme_mpath_start_freeze()
134 blk_freeze_queue_start(h->disk->queue); in nvme_mpath_start_freeze()
139 struct nvme_ns *ns = req->q->queuedata; in nvme_failover_req()
140 u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK; in nvme_failover_req()
148 * ready to serve this namespace. Kick of a re-read of the ANA in nvme_failover_req()
151 if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) { in nvme_failover_req()
152 set_bit(NVME_NS_ANA_PENDING, &ns->flags); in nvme_failover_req()
153 queue_work(nvme_wq, &ns->ctrl->ana_work); in nvme_failover_req()
156 spin_lock_irqsave(&ns->head->requeue_lock, flags); in nvme_failover_req()
157 for (bio = req->bio; bio; bio = bio->bi_next) { in nvme_failover_req()
158 bio_set_dev(bio, ns->head->disk->part0); in nvme_failover_req()
159 if (bio->bi_opf & REQ_POLLED) { in nvme_failover_req()
160 bio->bi_opf &= ~REQ_POLLED; in nvme_failover_req()
161 bio->bi_cookie = BLK_QC_T_NONE; in nvme_failover_req()
170 bio->bi_opf &= ~REQ_NOWAIT; in nvme_failover_req()
172 blk_steal_bios(&ns->head->requeue_list, req); in nvme_failover_req()
173 spin_unlock_irqrestore(&ns->head->requeue_lock, flags); in nvme_failover_req()
175 nvme_req(req)->status = 0; in nvme_failover_req()
177 kblockd_schedule_work(&ns->head->requeue_work); in nvme_failover_req()
182 struct nvme_ns *ns = rq->q->queuedata; in nvme_mpath_start_request()
183 struct gendisk *disk = ns->head->disk; in nvme_mpath_start_request()
185 if ((READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) && in nvme_mpath_start_request()
186 !(nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)) { in nvme_mpath_start_request()
187 atomic_inc(&ns->ctrl->nr_active); in nvme_mpath_start_request()
188 nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE; in nvme_mpath_start_request()
191 if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq) || in nvme_mpath_start_request()
192 (nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) in nvme_mpath_start_request()
195 nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; in nvme_mpath_start_request()
196 nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq), in nvme_mpath_start_request()
203 struct nvme_ns *ns = rq->q->queuedata; in nvme_mpath_end_request()
205 if (nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE) in nvme_mpath_end_request()
206 atomic_dec_if_positive(&ns->ctrl->nr_active); in nvme_mpath_end_request()
208 if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) in nvme_mpath_end_request()
210 bdev_end_io_acct(ns->head->disk->part0, req_op(rq), in nvme_mpath_end_request()
212 nvme_req(rq)->start_time); in nvme_mpath_end_request()
220 srcu_idx = srcu_read_lock(&ctrl->srcu); in nvme_kick_requeue_lists()
221 list_for_each_entry_srcu(ns, &ctrl->namespaces, list, in nvme_kick_requeue_lists()
222 srcu_read_lock_held(&ctrl->srcu)) { in nvme_kick_requeue_lists()
223 if (!ns->head->disk) in nvme_kick_requeue_lists()
225 kblockd_schedule_work(&ns->head->requeue_work); in nvme_kick_requeue_lists()
226 if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) in nvme_kick_requeue_lists()
227 disk_uevent(ns->head->disk, KOBJ_CHANGE); in nvme_kick_requeue_lists()
229 srcu_read_unlock(&ctrl->srcu, srcu_idx); in nvme_kick_requeue_lists()
235 [NVME_ANA_NONOPTIMIZED] = "non-optimized",
237 [NVME_ANA_PERSISTENT_LOSS] = "persistent-loss",
243 struct nvme_ns_head *head = ns->head; in nvme_mpath_clear_current_path()
251 if (ns == rcu_access_pointer(head->current_path[node])) { in nvme_mpath_clear_current_path()
252 rcu_assign_pointer(head->current_path[node], NULL); in nvme_mpath_clear_current_path()
265 srcu_idx = srcu_read_lock(&ctrl->srcu); in nvme_mpath_clear_ctrl_paths()
266 list_for_each_entry_srcu(ns, &ctrl->namespaces, list, in nvme_mpath_clear_ctrl_paths()
267 srcu_read_lock_held(&ctrl->srcu)) { in nvme_mpath_clear_ctrl_paths()
269 kblockd_schedule_work(&ns->head->requeue_work); in nvme_mpath_clear_ctrl_paths()
271 srcu_read_unlock(&ctrl->srcu, srcu_idx); in nvme_mpath_clear_ctrl_paths()
276 struct nvme_ns_head *head = ns->head; in nvme_mpath_revalidate_paths()
277 sector_t capacity = get_capacity(head->disk); in nvme_mpath_revalidate_paths()
281 srcu_idx = srcu_read_lock(&head->srcu); in nvme_mpath_revalidate_paths()
282 list_for_each_entry_srcu(ns, &head->list, siblings, in nvme_mpath_revalidate_paths()
283 srcu_read_lock_held(&head->srcu)) { in nvme_mpath_revalidate_paths()
284 if (capacity != get_capacity(ns->disk)) in nvme_mpath_revalidate_paths()
285 clear_bit(NVME_NS_READY, &ns->flags); in nvme_mpath_revalidate_paths()
287 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_mpath_revalidate_paths()
290 rcu_assign_pointer(head->current_path[node], NULL); in nvme_mpath_revalidate_paths()
291 kblockd_schedule_work(&head->requeue_work); in nvme_mpath_revalidate_paths()
296 enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl); in nvme_path_is_disabled()
305 if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) || in nvme_path_is_disabled()
306 !test_bit(NVME_NS_READY, &ns->flags)) in nvme_path_is_disabled()
316 list_for_each_entry_srcu(ns, &head->list, siblings, in __nvme_find_path()
317 srcu_read_lock_held(&head->srcu)) { in __nvme_find_path()
321 if (ns->ctrl->numa_node != NUMA_NO_NODE && in __nvme_find_path()
322 READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) in __nvme_find_path()
323 distance = node_distance(node, ns->ctrl->numa_node); in __nvme_find_path()
327 switch (ns->ana_state) { in __nvme_find_path()
348 rcu_assign_pointer(head->current_path[node], found); in __nvme_find_path()
355 ns = list_next_or_null_rcu(&head->list, &ns->siblings, struct nvme_ns, in nvme_next_ns()
359 return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings); in nvme_next_ns()
366 struct nvme_ns *old = srcu_dereference(head->current_path[node], in nvme_round_robin_path()
367 &head->srcu); in nvme_round_robin_path()
372 if (list_is_singular(&head->list)) { in nvme_round_robin_path()
384 if (ns->ana_state == NVME_ANA_OPTIMIZED) { in nvme_round_robin_path()
388 if (ns->ana_state == NVME_ANA_NONOPTIMIZED) in nvme_round_robin_path()
393 * The loop above skips the current path for round-robin semantics. in nvme_round_robin_path()
395 * - no other optimized path found and current is optimized, in nvme_round_robin_path()
396 * - no other usable path found and current is usable. in nvme_round_robin_path()
399 (old->ana_state == NVME_ANA_OPTIMIZED || in nvme_round_robin_path()
400 (!found && old->ana_state == NVME_ANA_NONOPTIMIZED))) in nvme_round_robin_path()
406 rcu_assign_pointer(head->current_path[node], found); in nvme_round_robin_path()
416 list_for_each_entry_srcu(ns, &head->list, siblings, in nvme_queue_depth_path()
417 srcu_read_lock_held(&head->srcu)) { in nvme_queue_depth_path()
421 depth = atomic_read(&ns->ctrl->nr_active); in nvme_queue_depth_path()
423 switch (ns->ana_state) { in nvme_queue_depth_path()
449 return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE && in nvme_path_is_optimized()
450 ns->ana_state == NVME_ANA_OPTIMIZED; in nvme_path_is_optimized()
458 ns = srcu_dereference(head->current_path[node], &head->srcu); in nvme_numa_path()
468 switch (READ_ONCE(head->subsys->iopolicy)) { in nvme_find_path()
482 if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) in nvme_available_path()
485 list_for_each_entry_srcu(ns, &head->list, siblings, in nvme_available_path()
486 srcu_read_lock_held(&head->srcu)) { in nvme_available_path()
487 if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) in nvme_available_path()
489 switch (nvme_ctrl_state(ns->ctrl)) { in nvme_available_path()
500 * If "head->delayed_removal_secs" is configured (i.e., non-zero), do in nvme_available_path()
506 * non-zero, this flag is set to true. When zero, the flag is cleared. in nvme_available_path()
513 struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data; in nvme_ns_head_submit_bio()
514 struct device *dev = disk_to_dev(head->disk); in nvme_ns_head_submit_bio()
527 srcu_idx = srcu_read_lock(&head->srcu); in nvme_ns_head_submit_bio()
530 bio_set_dev(bio, ns->disk->part0); in nvme_ns_head_submit_bio()
531 bio->bi_opf |= REQ_NVME_MPATH; in nvme_ns_head_submit_bio()
532 trace_block_bio_remap(bio, disk_devt(ns->head->disk), in nvme_ns_head_submit_bio()
533 bio->bi_iter.bi_sector); in nvme_ns_head_submit_bio()
536 dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); in nvme_ns_head_submit_bio()
538 spin_lock_irq(&head->requeue_lock); in nvme_ns_head_submit_bio()
539 bio_list_add(&head->requeue_list, bio); in nvme_ns_head_submit_bio()
540 spin_unlock_irq(&head->requeue_lock); in nvme_ns_head_submit_bio()
542 dev_warn_ratelimited(dev, "no available path - failing I/O\n"); in nvme_ns_head_submit_bio()
547 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_ns_head_submit_bio()
552 if (!nvme_tryget_ns_head(disk->private_data)) in nvme_ns_head_open()
553 return -ENXIO; in nvme_ns_head_open()
559 nvme_put_ns_head(disk->private_data); in nvme_ns_head_release()
565 struct nvme_ns_head *head = disk->private_data; in nvme_ns_head_get_unique_id()
567 int srcu_idx, ret = -EWOULDBLOCK; in nvme_ns_head_get_unique_id()
569 srcu_idx = srcu_read_lock(&head->srcu); in nvme_ns_head_get_unique_id()
573 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_ns_head_get_unique_id()
581 struct nvme_ns_head *head = disk->private_data; in nvme_ns_head_report_zones()
583 int srcu_idx, ret = -EWOULDBLOCK; in nvme_ns_head_report_zones()
585 srcu_idx = srcu_read_lock(&head->srcu); in nvme_ns_head_report_zones()
589 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_ns_head_report_zones()
616 if (!nvme_tryget_ns_head(cdev_to_ns_head(inode->i_cdev))) in nvme_ns_head_chr_open()
617 return -ENXIO; in nvme_ns_head_chr_open()
623 nvme_put_ns_head(cdev_to_ns_head(inode->i_cdev)); in nvme_ns_head_chr_release()
641 head->cdev_device.parent = &head->subsys->dev; in nvme_add_ns_head_cdev()
642 ret = dev_set_name(&head->cdev_device, "ng%dn%d", in nvme_add_ns_head_cdev()
643 head->subsys->instance, head->instance); in nvme_add_ns_head_cdev()
646 ret = nvme_cdev_add(&head->cdev, &head->cdev_device, in nvme_add_ns_head_cdev()
657 &head->disk->state))) in nvme_partition_scan_work()
660 mutex_lock(&head->disk->open_mutex); in nvme_partition_scan_work()
661 bdev_disk_changed(head->disk, false); in nvme_partition_scan_work()
662 mutex_unlock(&head->disk->open_mutex); in nvme_partition_scan_work()
671 spin_lock_irq(&head->requeue_lock); in nvme_requeue_work()
672 next = bio_list_get(&head->requeue_list); in nvme_requeue_work()
673 spin_unlock_irq(&head->requeue_lock); in nvme_requeue_work()
676 next = bio->bi_next; in nvme_requeue_work()
677 bio->bi_next = NULL; in nvme_requeue_work()
685 if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { in nvme_remove_head()
690 kblockd_schedule_work(&head->requeue_work); in nvme_remove_head()
692 nvme_cdev_del(&head->cdev, &head->cdev_device); in nvme_remove_head()
693 synchronize_srcu(&head->srcu); in nvme_remove_head()
694 del_gendisk(head->disk); in nvme_remove_head()
705 mutex_lock(&head->subsys->lock); in nvme_remove_head_work()
706 if (list_empty(&head->list)) { in nvme_remove_head_work()
707 list_del_init(&head->entry); in nvme_remove_head_work()
710 mutex_unlock(&head->subsys->lock); in nvme_remove_head_work()
721 mutex_init(&head->lock); in nvme_mpath_alloc_disk()
722 bio_list_init(&head->requeue_list); in nvme_mpath_alloc_disk()
723 spin_lock_init(&head->requeue_lock); in nvme_mpath_alloc_disk()
724 INIT_WORK(&head->requeue_work, nvme_requeue_work); in nvme_mpath_alloc_disk()
725 INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work); in nvme_mpath_alloc_disk()
726 INIT_DELAYED_WORK(&head->remove_work, nvme_remove_head_work); in nvme_mpath_alloc_disk()
727 head->delayed_removal_secs = 0; in nvme_mpath_alloc_disk()
731 * regardless of whether the disk is single/multi ported, and whether in nvme_mpath_alloc_disk()
739 if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || in nvme_mpath_alloc_disk()
751 if (head->ids.csi == NVME_CSI_ZNS) in nvme_mpath_alloc_disk()
754 head->disk = blk_alloc_disk(&lim, ctrl->numa_node); in nvme_mpath_alloc_disk()
755 if (IS_ERR(head->disk)) in nvme_mpath_alloc_disk()
756 return PTR_ERR(head->disk); in nvme_mpath_alloc_disk()
757 head->disk->fops = &nvme_ns_head_ops; in nvme_mpath_alloc_disk()
758 head->disk->private_data = head; in nvme_mpath_alloc_disk()
768 set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state); in nvme_mpath_alloc_disk()
769 sprintf(head->disk->disk_name, "nvme%dn%d", in nvme_mpath_alloc_disk()
770 ctrl->subsys->instance, head->instance); in nvme_mpath_alloc_disk()
777 struct nvme_ns_head *head = ns->head; in nvme_mpath_set_live()
780 if (!head->disk) in nvme_mpath_set_live()
788 if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { in nvme_mpath_set_live()
789 rc = device_add_disk(&head->subsys->dev, head->disk, in nvme_mpath_set_live()
792 clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags); in nvme_mpath_set_live()
796 kblockd_schedule_work(&head->partition_scan_work); in nvme_mpath_set_live()
799 nvme_mpath_add_sysfs_link(ns->head); in nvme_mpath_set_live()
801 mutex_lock(&head->lock); in nvme_mpath_set_live()
805 srcu_idx = srcu_read_lock(&head->srcu); in nvme_mpath_set_live()
808 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_mpath_set_live()
810 mutex_unlock(&head->lock); in nvme_mpath_set_live()
812 synchronize_srcu(&head->srcu); in nvme_mpath_set_live()
813 kblockd_schedule_work(&head->requeue_work); in nvme_mpath_set_live()
820 void *base = ctrl->ana_log_buf; in nvme_parse_ana_log()
824 lockdep_assert_held(&ctrl->ana_lock); in nvme_parse_ana_log()
826 for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) { in nvme_parse_ana_log()
831 if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc))) in nvme_parse_ana_log()
832 return -EINVAL; in nvme_parse_ana_log()
834 nr_nsids = le32_to_cpu(desc->nnsids); in nvme_parse_ana_log()
837 if (WARN_ON_ONCE(desc->grpid == 0)) in nvme_parse_ana_log()
838 return -EINVAL; in nvme_parse_ana_log()
839 if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax)) in nvme_parse_ana_log()
840 return -EINVAL; in nvme_parse_ana_log()
841 if (WARN_ON_ONCE(desc->state == 0)) in nvme_parse_ana_log()
842 return -EINVAL; in nvme_parse_ana_log()
843 if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE)) in nvme_parse_ana_log()
844 return -EINVAL; in nvme_parse_ana_log()
847 if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size)) in nvme_parse_ana_log()
848 return -EINVAL; in nvme_parse_ana_log()
868 ns->ana_grpid = le32_to_cpu(desc->grpid); in nvme_update_ns_ana_state()
869 ns->ana_state = desc->state; in nvme_update_ns_ana_state()
870 clear_bit(NVME_NS_ANA_PENDING, &ns->flags); in nvme_update_ns_ana_state()
880 if (nvme_state_is_live(ns->ana_state) && in nvme_update_ns_ana_state()
881 nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) in nvme_update_ns_ana_state()
888 * or non-optimized) while we alloc the ns then sysfs link would in nvme_update_ns_ana_state()
899 if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags)) in nvme_update_ns_ana_state()
900 nvme_mpath_add_sysfs_link(ns->head); in nvme_update_ns_ana_state()
907 u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0; in nvme_update_ana_state()
912 dev_dbg(ctrl->device, "ANA group %d: %s.\n", in nvme_update_ana_state()
913 le32_to_cpu(desc->grpid), in nvme_update_ana_state()
914 nvme_ana_state_names[desc->state]); in nvme_update_ana_state()
916 if (desc->state == NVME_ANA_CHANGE) in nvme_update_ana_state()
922 srcu_idx = srcu_read_lock(&ctrl->srcu); in nvme_update_ana_state()
923 list_for_each_entry_srcu(ns, &ctrl->namespaces, list, in nvme_update_ana_state()
924 srcu_read_lock_held(&ctrl->srcu)) { in nvme_update_ana_state()
927 nsid = le32_to_cpu(desc->nsids[n]); in nvme_update_ana_state()
928 if (ns->head->ns_id < nsid) in nvme_update_ana_state()
930 if (ns->head->ns_id == nsid) in nvme_update_ana_state()
934 if (ns->head->ns_id > nsid) in nvme_update_ana_state()
937 srcu_read_unlock(&ctrl->srcu, srcu_idx); in nvme_update_ana_state()
946 mutex_lock(&ctrl->ana_lock); in nvme_read_ana_log()
948 ctrl->ana_log_buf, ctrl->ana_log_size, 0); in nvme_read_ana_log()
950 dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error); in nvme_read_ana_log()
971 mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies); in nvme_read_ana_log()
973 timer_delete_sync(&ctrl->anatt_timer); in nvme_read_ana_log()
975 mutex_unlock(&ctrl->ana_lock); in nvme_read_ana_log()
993 if (!ctrl->ana_log_buf) in nvme_mpath_update()
996 mutex_lock(&ctrl->ana_lock); in nvme_mpath_update()
998 mutex_unlock(&ctrl->ana_lock); in nvme_mpath_update()
1005 dev_info(ctrl->device, "ANATT timeout, resetting controller.\n"); in nvme_anatt_timeout()
1013 timer_delete_sync(&ctrl->anatt_timer); in nvme_mpath_stop()
1014 cancel_work_sync(&ctrl->ana_work); in nvme_mpath_stop()
1028 nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); in nvme_subsys_iopolicy_show()
1035 int old_iopolicy = READ_ONCE(subsys->iopolicy); in nvme_subsys_iopolicy_update()
1040 WRITE_ONCE(subsys->iopolicy, iopolicy); in nvme_subsys_iopolicy_update()
1044 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) in nvme_subsys_iopolicy_update()
1049 subsys->subnqn, in nvme_subsys_iopolicy_update()
1068 return -EINVAL; in nvme_subsys_iopolicy_store()
1076 return sysfs_emit(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid); in ana_grpid_show()
1085 return sysfs_emit(buf, "%s\n", nvme_ana_state_names[ns->ana_state]); in ana_state_show()
1094 if (ns->head->subsys->iopolicy != NVME_IOPOLICY_QD) in queue_depth_show()
1097 return sysfs_emit(buf, "%d\n", atomic_read(&ns->ctrl->nr_active)); in queue_depth_show()
1108 struct nvme_ns_head *head = ns->head; in numa_nodes_show()
1110 if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA) in numa_nodes_show()
1115 srcu_idx = srcu_read_lock(&head->srcu); in numa_nodes_show()
1117 current_ns = srcu_dereference(head->current_path[node], in numa_nodes_show()
1118 &head->srcu); in numa_nodes_show()
1122 srcu_read_unlock(&head->srcu, srcu_idx); in numa_nodes_show()
1132 struct nvme_ns_head *head = disk->private_data; in delayed_removal_secs_show()
1135 mutex_lock(&head->subsys->lock); in delayed_removal_secs_show()
1136 ret = sysfs_emit(buf, "%u\n", head->delayed_removal_secs); in delayed_removal_secs_show()
1137 mutex_unlock(&head->subsys->lock); in delayed_removal_secs_show()
1145 struct nvme_ns_head *head = disk->private_data; in delayed_removal_secs_store()
1153 mutex_lock(&head->subsys->lock); in delayed_removal_secs_store()
1154 head->delayed_removal_secs = sec; in delayed_removal_secs_store()
1156 set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags); in delayed_removal_secs_store()
1158 clear_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags); in delayed_removal_secs_store()
1159 mutex_unlock(&head->subsys->lock); in delayed_removal_secs_store()
1164 synchronize_srcu(&head->srcu); in delayed_removal_secs_store()
1176 if (desc->grpid != dst->grpid) in nvme_lookup_ana_group_desc()
1180 return -ENXIO; /* just break out of the loop */ in nvme_lookup_ana_group_desc()
1194 if (!test_bit(GD_ADDED, &head->disk->state)) in nvme_mpath_add_sysfs_link()
1197 kobj = &disk_to_dev(head->disk)->kobj; in nvme_mpath_add_sysfs_link()
1200 * loop through each ns chained through the head->list and create the in nvme_mpath_add_sysfs_link()
1203 srcu_idx = srcu_read_lock(&head->srcu); in nvme_mpath_add_sysfs_link()
1205 list_for_each_entry_srcu(ns, &head->list, siblings, in nvme_mpath_add_sysfs_link()
1206 srcu_read_lock_held(&head->srcu)) { in nvme_mpath_add_sysfs_link()
1211 if (!test_bit(GD_ADDED, &ns->disk->state)) in nvme_mpath_add_sysfs_link()
1216 * When path ana state transitions from optimized to non- in nvme_mpath_add_sysfs_link()
1217 * optimized or vice-versa, the nvme_mpath_set_live() is in nvme_mpath_add_sysfs_link()
1219 * link already exists for the given path and we attempt to re- in nvme_mpath_add_sysfs_link()
1226 if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) in nvme_mpath_add_sysfs_link()
1229 target = disk_to_dev(ns->disk); in nvme_mpath_add_sysfs_link()
1232 * ns path gendisk kobject @target->kobj. in nvme_mpath_add_sysfs_link()
1235 &target->kobj, dev_name(target)); in nvme_mpath_add_sysfs_link()
1237 dev_err(disk_to_dev(ns->head->disk), in nvme_mpath_add_sysfs_link()
1240 clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); in nvme_mpath_add_sysfs_link()
1244 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_mpath_add_sysfs_link()
1252 if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) in nvme_mpath_remove_sysfs_link()
1255 target = disk_to_dev(ns->disk); in nvme_mpath_remove_sysfs_link()
1256 kobj = &disk_to_dev(ns->head->disk)->kobj; in nvme_mpath_remove_sysfs_link()
1259 clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); in nvme_mpath_remove_sysfs_link()
1264 if (nvme_ctrl_use_ana(ns->ctrl)) { in nvme_mpath_add_disk()
1270 mutex_lock(&ns->ctrl->ana_lock); in nvme_mpath_add_disk()
1271 ns->ana_grpid = le32_to_cpu(anagrpid); in nvme_mpath_add_disk()
1272 nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc); in nvme_mpath_add_disk()
1273 mutex_unlock(&ns->ctrl->ana_lock); in nvme_mpath_add_disk()
1278 /* group desc not found: trigger a re-read */ in nvme_mpath_add_disk()
1279 set_bit(NVME_NS_ANA_PENDING, &ns->flags); in nvme_mpath_add_disk()
1280 queue_work(nvme_wq, &ns->ctrl->ana_work); in nvme_mpath_add_disk()
1283 ns->ana_state = NVME_ANA_OPTIMIZED; in nvme_mpath_add_disk()
1288 if (blk_queue_is_zoned(ns->queue) && ns->head->disk) in nvme_mpath_add_disk()
1289 ns->head->disk->nr_zones = ns->disk->nr_zones; in nvme_mpath_add_disk()
1297 if (!head->disk) in nvme_mpath_remove_disk()
1300 mutex_lock(&head->subsys->lock); in nvme_mpath_remove_disk()
1303 * head->list is expected to be empty. However, nvme_remove_ns() and in nvme_mpath_remove_disk()
1304 * nvme_init_ns_head() can run concurrently and so if head->delayed_ in nvme_mpath_remove_disk()
1306 * this point, head->list may no longer be empty. Therefore, we recheck in nvme_mpath_remove_disk()
1307 * head->list here. If it is no longer empty then we skip enqueuing the in nvme_mpath_remove_disk()
1310 if (!list_empty(&head->list)) in nvme_mpath_remove_disk()
1313 if (head->delayed_removal_secs) { in nvme_mpath_remove_disk()
1320 mod_delayed_work(nvme_wq, &head->remove_work, in nvme_mpath_remove_disk()
1321 head->delayed_removal_secs * HZ); in nvme_mpath_remove_disk()
1323 list_del_init(&head->entry); in nvme_mpath_remove_disk()
1327 mutex_unlock(&head->subsys->lock); in nvme_mpath_remove_disk()
1334 if (!head->disk) in nvme_mpath_put_disk()
1337 kblockd_schedule_work(&head->requeue_work); in nvme_mpath_put_disk()
1338 flush_work(&head->requeue_work); in nvme_mpath_put_disk()
1339 flush_work(&head->partition_scan_work); in nvme_mpath_put_disk()
1340 put_disk(head->disk); in nvme_mpath_put_disk()
1345 mutex_init(&ctrl->ana_lock); in nvme_mpath_init_ctrl()
1346 timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0); in nvme_mpath_init_ctrl()
1347 INIT_WORK(&ctrl->ana_work, nvme_ana_work); in nvme_mpath_init_ctrl()
1352 size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT; in nvme_mpath_init_identify()
1357 if (!multipath || !ctrl->subsys || in nvme_mpath_init_identify()
1358 !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) in nvme_mpath_init_identify()
1362 atomic_set(&ctrl->nr_active, 0); in nvme_mpath_init_identify()
1364 if (!ctrl->max_namespaces || in nvme_mpath_init_identify()
1365 ctrl->max_namespaces > le32_to_cpu(id->nn)) { in nvme_mpath_init_identify()
1366 dev_err(ctrl->device, in nvme_mpath_init_identify()
1367 "Invalid MNAN value %u\n", ctrl->max_namespaces); in nvme_mpath_init_identify()
1368 return -EINVAL; in nvme_mpath_init_identify()
1371 ctrl->anacap = id->anacap; in nvme_mpath_init_identify()
1372 ctrl->anatt = id->anatt; in nvme_mpath_init_identify()
1373 ctrl->nanagrpid = le32_to_cpu(id->nanagrpid); in nvme_mpath_init_identify()
1374 ctrl->anagrpmax = le32_to_cpu(id->anagrpmax); in nvme_mpath_init_identify()
1377 ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) + in nvme_mpath_init_identify()
1378 ctrl->max_namespaces * sizeof(__le32); in nvme_mpath_init_identify()
1380 dev_err(ctrl->device, in nvme_mpath_init_identify()
1383 dev_err(ctrl->device, "disabling ANA support.\n"); in nvme_mpath_init_identify()
1386 if (ana_log_size > ctrl->ana_log_size) { in nvme_mpath_init_identify()
1389 ctrl->ana_log_buf = kvmalloc(ana_log_size, GFP_KERNEL); in nvme_mpath_init_identify()
1390 if (!ctrl->ana_log_buf) in nvme_mpath_init_identify()
1391 return -ENOMEM; in nvme_mpath_init_identify()
1393 ctrl->ana_log_size = ana_log_size; in nvme_mpath_init_identify()
1406 kvfree(ctrl->ana_log_buf); in nvme_mpath_uninit()
1407 ctrl->ana_log_buf = NULL; in nvme_mpath_uninit()
1408 ctrl->ana_log_size = 0; in nvme_mpath_uninit()