Lines Matching +full:ns +full:-

1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2017-2018 Christoph Hellwig.
6 #include <linux/backing-dev.h>
18 bool *arg = kp->arg; in multipath_param_set()
27 return -EINVAL; in multipath_param_set()
46 bool *arg = kp->arg; in multipath_always_on_set()
66 …"create multipath node always except for private namespace with non-unique nsid; note that this al…
70 [NVME_IOPOLICY_RR] = "round-robin",
71 [NVME_IOPOLICY_QD] = "queue-depth",
79 return -EINVAL; in nvme_set_iopolicy()
82 else if (!strncmp(val, "round-robin", 11)) in nvme_set_iopolicy()
84 else if (!strncmp(val, "queue-depth", 11)) in nvme_set_iopolicy()
87 return -EINVAL; in nvme_set_iopolicy()
100 "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'");
104 subsys->iopolicy = iopolicy; in nvme_mpath_default_iopolicy()
111 lockdep_assert_held(&subsys->lock); in nvme_mpath_unfreeze()
112 list_for_each_entry(h, &subsys->nsheads, entry) in nvme_mpath_unfreeze()
113 if (h->disk) in nvme_mpath_unfreeze()
114 blk_mq_unfreeze_queue_nomemrestore(h->disk->queue); in nvme_mpath_unfreeze()
121 lockdep_assert_held(&subsys->lock); in nvme_mpath_wait_freeze()
122 list_for_each_entry(h, &subsys->nsheads, entry) in nvme_mpath_wait_freeze()
123 if (h->disk) in nvme_mpath_wait_freeze()
124 blk_mq_freeze_queue_wait(h->disk->queue); in nvme_mpath_wait_freeze()
131 lockdep_assert_held(&subsys->lock); in nvme_mpath_start_freeze()
132 list_for_each_entry(h, &subsys->nsheads, entry) in nvme_mpath_start_freeze()
133 if (h->disk) in nvme_mpath_start_freeze()
134 blk_freeze_queue_start(h->disk->queue); in nvme_mpath_start_freeze()
139 struct nvme_ns *ns = req->q->queuedata; in nvme_failover_req() local
140 u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK; in nvme_failover_req()
144 nvme_mpath_clear_current_path(ns); in nvme_failover_req()
148 * ready to serve this namespace. Kick of a re-read of the ANA in nvme_failover_req()
151 if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) { in nvme_failover_req()
152 set_bit(NVME_NS_ANA_PENDING, &ns->flags); in nvme_failover_req()
153 queue_work(nvme_wq, &ns->ctrl->ana_work); in nvme_failover_req()
156 spin_lock_irqsave(&ns->head->requeue_lock, flags); in nvme_failover_req()
157 for (bio = req->bio; bio; bio = bio->bi_next) { in nvme_failover_req()
158 bio_set_dev(bio, ns->head->disk->part0); in nvme_failover_req()
159 if (bio->bi_opf & REQ_POLLED) { in nvme_failover_req()
160 bio->bi_opf &= ~REQ_POLLED; in nvme_failover_req()
161 bio->bi_cookie = BLK_QC_T_NONE; in nvme_failover_req()
170 bio->bi_opf &= ~REQ_NOWAIT; in nvme_failover_req()
172 blk_steal_bios(&ns->head->requeue_list, req); in nvme_failover_req()
173 spin_unlock_irqrestore(&ns->head->requeue_lock, flags); in nvme_failover_req()
175 nvme_req(req)->status = 0; in nvme_failover_req()
177 kblockd_schedule_work(&ns->head->requeue_work); in nvme_failover_req()
182 struct nvme_ns *ns = rq->q->queuedata; in nvme_mpath_start_request() local
183 struct gendisk *disk = ns->head->disk; in nvme_mpath_start_request()
185 if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) { in nvme_mpath_start_request()
186 atomic_inc(&ns->ctrl->nr_active); in nvme_mpath_start_request()
187 nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE; in nvme_mpath_start_request()
190 if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq)) in nvme_mpath_start_request()
193 nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; in nvme_mpath_start_request()
194 nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq), in nvme_mpath_start_request()
201 struct nvme_ns *ns = rq->q->queuedata; in nvme_mpath_end_request() local
203 if (nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE) in nvme_mpath_end_request()
204 atomic_dec_if_positive(&ns->ctrl->nr_active); in nvme_mpath_end_request()
206 if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) in nvme_mpath_end_request()
208 bdev_end_io_acct(ns->head->disk->part0, req_op(rq), in nvme_mpath_end_request()
210 nvme_req(rq)->start_time); in nvme_mpath_end_request()
215 struct nvme_ns *ns; in nvme_kick_requeue_lists() local
218 srcu_idx = srcu_read_lock(&ctrl->srcu); in nvme_kick_requeue_lists()
219 list_for_each_entry_srcu(ns, &ctrl->namespaces, list, in nvme_kick_requeue_lists()
220 srcu_read_lock_held(&ctrl->srcu)) { in nvme_kick_requeue_lists()
221 if (!ns->head->disk) in nvme_kick_requeue_lists()
223 kblockd_schedule_work(&ns->head->requeue_work); in nvme_kick_requeue_lists()
224 if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) in nvme_kick_requeue_lists()
225 disk_uevent(ns->head->disk, KOBJ_CHANGE); in nvme_kick_requeue_lists()
227 srcu_read_unlock(&ctrl->srcu, srcu_idx); in nvme_kick_requeue_lists()
233 [NVME_ANA_NONOPTIMIZED] = "non-optimized",
235 [NVME_ANA_PERSISTENT_LOSS] = "persistent-loss",
239 bool nvme_mpath_clear_current_path(struct nvme_ns *ns) in nvme_mpath_clear_current_path() argument
241 struct nvme_ns_head *head = ns->head; in nvme_mpath_clear_current_path()
249 if (ns == rcu_access_pointer(head->current_path[node])) { in nvme_mpath_clear_current_path()
250 rcu_assign_pointer(head->current_path[node], NULL); in nvme_mpath_clear_current_path()
260 struct nvme_ns *ns; in nvme_mpath_clear_ctrl_paths() local
263 srcu_idx = srcu_read_lock(&ctrl->srcu); in nvme_mpath_clear_ctrl_paths()
264 list_for_each_entry_srcu(ns, &ctrl->namespaces, list, in nvme_mpath_clear_ctrl_paths()
265 srcu_read_lock_held(&ctrl->srcu)) { in nvme_mpath_clear_ctrl_paths()
266 nvme_mpath_clear_current_path(ns); in nvme_mpath_clear_ctrl_paths()
267 kblockd_schedule_work(&ns->head->requeue_work); in nvme_mpath_clear_ctrl_paths()
269 srcu_read_unlock(&ctrl->srcu, srcu_idx); in nvme_mpath_clear_ctrl_paths()
272 void nvme_mpath_revalidate_paths(struct nvme_ns *ns) in nvme_mpath_revalidate_paths() argument
274 struct nvme_ns_head *head = ns->head; in nvme_mpath_revalidate_paths()
275 sector_t capacity = get_capacity(head->disk); in nvme_mpath_revalidate_paths()
279 srcu_idx = srcu_read_lock(&head->srcu); in nvme_mpath_revalidate_paths()
280 list_for_each_entry_srcu(ns, &head->list, siblings, in nvme_mpath_revalidate_paths()
281 srcu_read_lock_held(&head->srcu)) { in nvme_mpath_revalidate_paths()
282 if (capacity != get_capacity(ns->disk)) in nvme_mpath_revalidate_paths()
283 clear_bit(NVME_NS_READY, &ns->flags); in nvme_mpath_revalidate_paths()
285 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_mpath_revalidate_paths()
288 rcu_assign_pointer(head->current_path[node], NULL); in nvme_mpath_revalidate_paths()
289 kblockd_schedule_work(&head->requeue_work); in nvme_mpath_revalidate_paths()
292 static bool nvme_path_is_disabled(struct nvme_ns *ns) in nvme_path_is_disabled() argument
294 enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl); in nvme_path_is_disabled()
303 if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) || in nvme_path_is_disabled()
304 !test_bit(NVME_NS_READY, &ns->flags)) in nvme_path_is_disabled()
312 struct nvme_ns *found = NULL, *fallback = NULL, *ns; in __nvme_find_path() local
314 list_for_each_entry_srcu(ns, &head->list, siblings, in __nvme_find_path()
315 srcu_read_lock_held(&head->srcu)) { in __nvme_find_path()
316 if (nvme_path_is_disabled(ns)) in __nvme_find_path()
319 if (ns->ctrl->numa_node != NUMA_NO_NODE && in __nvme_find_path()
320 READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) in __nvme_find_path()
321 distance = node_distance(node, ns->ctrl->numa_node); in __nvme_find_path()
325 switch (ns->ana_state) { in __nvme_find_path()
329 found = ns; in __nvme_find_path()
335 fallback = ns; in __nvme_find_path()
346 rcu_assign_pointer(head->current_path[node], found); in __nvme_find_path()
351 struct nvme_ns *ns) in nvme_next_ns() argument
353 ns = list_next_or_null_rcu(&head->list, &ns->siblings, struct nvme_ns, in nvme_next_ns()
355 if (ns) in nvme_next_ns()
356 return ns; in nvme_next_ns()
357 return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings); in nvme_next_ns()
362 struct nvme_ns *ns, *found = NULL; in nvme_round_robin_path() local
364 struct nvme_ns *old = srcu_dereference(head->current_path[node], in nvme_round_robin_path()
365 &head->srcu); in nvme_round_robin_path()
370 if (list_is_singular(&head->list)) { in nvme_round_robin_path()
376 for (ns = nvme_next_ns(head, old); in nvme_round_robin_path()
377 ns && ns != old; in nvme_round_robin_path()
378 ns = nvme_next_ns(head, ns)) { in nvme_round_robin_path()
379 if (nvme_path_is_disabled(ns)) in nvme_round_robin_path()
382 if (ns->ana_state == NVME_ANA_OPTIMIZED) { in nvme_round_robin_path()
383 found = ns; in nvme_round_robin_path()
386 if (ns->ana_state == NVME_ANA_NONOPTIMIZED) in nvme_round_robin_path()
387 found = ns; in nvme_round_robin_path()
391 * The loop above skips the current path for round-robin semantics. in nvme_round_robin_path()
393 * - no other optimized path found and current is optimized, in nvme_round_robin_path()
394 * - no other usable path found and current is usable. in nvme_round_robin_path()
397 (old->ana_state == NVME_ANA_OPTIMIZED || in nvme_round_robin_path()
398 (!found && old->ana_state == NVME_ANA_NONOPTIMIZED))) in nvme_round_robin_path()
404 rcu_assign_pointer(head->current_path[node], found); in nvme_round_robin_path()
410 struct nvme_ns *best_opt = NULL, *best_nonopt = NULL, *ns; in nvme_queue_depth_path() local
414 list_for_each_entry_srcu(ns, &head->list, siblings, in nvme_queue_depth_path()
415 srcu_read_lock_held(&head->srcu)) { in nvme_queue_depth_path()
416 if (nvme_path_is_disabled(ns)) in nvme_queue_depth_path()
419 depth = atomic_read(&ns->ctrl->nr_active); in nvme_queue_depth_path()
421 switch (ns->ana_state) { in nvme_queue_depth_path()
425 best_opt = ns; in nvme_queue_depth_path()
431 best_nonopt = ns; in nvme_queue_depth_path()
445 static inline bool nvme_path_is_optimized(struct nvme_ns *ns) in nvme_path_is_optimized() argument
447 return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE && in nvme_path_is_optimized()
448 ns->ana_state == NVME_ANA_OPTIMIZED; in nvme_path_is_optimized()
454 struct nvme_ns *ns; in nvme_numa_path() local
456 ns = srcu_dereference(head->current_path[node], &head->srcu); in nvme_numa_path()
457 if (unlikely(!ns)) in nvme_numa_path()
459 if (unlikely(!nvme_path_is_optimized(ns))) in nvme_numa_path()
461 return ns; in nvme_numa_path()
466 switch (READ_ONCE(head->subsys->iopolicy)) { in nvme_find_path()
478 struct nvme_ns *ns; in nvme_available_path() local
480 if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) in nvme_available_path()
483 list_for_each_entry_srcu(ns, &head->list, siblings, in nvme_available_path()
484 srcu_read_lock_held(&head->srcu)) { in nvme_available_path()
485 if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) in nvme_available_path()
487 switch (nvme_ctrl_state(ns->ctrl)) { in nvme_available_path()
498 * If "head->delayed_removal_secs" is configured (i.e., non-zero), do in nvme_available_path()
504 * non-zero, this flag is set to true. When zero, the flag is cleared. in nvme_available_path()
511 struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data; in nvme_ns_head_submit_bio()
512 struct device *dev = disk_to_dev(head->disk); in nvme_ns_head_submit_bio()
513 struct nvme_ns *ns; in nvme_ns_head_submit_bio() local
525 srcu_idx = srcu_read_lock(&head->srcu); in nvme_ns_head_submit_bio()
526 ns = nvme_find_path(head); in nvme_ns_head_submit_bio()
527 if (likely(ns)) { in nvme_ns_head_submit_bio()
528 bio_set_dev(bio, ns->disk->part0); in nvme_ns_head_submit_bio()
529 bio->bi_opf |= REQ_NVME_MPATH; in nvme_ns_head_submit_bio()
530 trace_block_bio_remap(bio, disk_devt(ns->head->disk), in nvme_ns_head_submit_bio()
531 bio->bi_iter.bi_sector); in nvme_ns_head_submit_bio()
534 dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); in nvme_ns_head_submit_bio()
536 spin_lock_irq(&head->requeue_lock); in nvme_ns_head_submit_bio()
537 bio_list_add(&head->requeue_list, bio); in nvme_ns_head_submit_bio()
538 spin_unlock_irq(&head->requeue_lock); in nvme_ns_head_submit_bio()
540 dev_warn_ratelimited(dev, "no available path - failing I/O\n"); in nvme_ns_head_submit_bio()
545 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_ns_head_submit_bio()
550 if (!nvme_tryget_ns_head(disk->private_data)) in nvme_ns_head_open()
551 return -ENXIO; in nvme_ns_head_open()
557 nvme_put_ns_head(disk->private_data); in nvme_ns_head_release()
563 struct nvme_ns_head *head = disk->private_data; in nvme_ns_head_get_unique_id()
564 struct nvme_ns *ns; in nvme_ns_head_get_unique_id() local
565 int srcu_idx, ret = -EWOULDBLOCK; in nvme_ns_head_get_unique_id()
567 srcu_idx = srcu_read_lock(&head->srcu); in nvme_ns_head_get_unique_id()
568 ns = nvme_find_path(head); in nvme_ns_head_get_unique_id()
569 if (ns) in nvme_ns_head_get_unique_id()
570 ret = nvme_ns_get_unique_id(ns, id, type); in nvme_ns_head_get_unique_id()
571 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_ns_head_get_unique_id()
579 struct nvme_ns_head *head = disk->private_data; in nvme_ns_head_report_zones()
580 struct nvme_ns *ns; in nvme_ns_head_report_zones() local
581 int srcu_idx, ret = -EWOULDBLOCK; in nvme_ns_head_report_zones()
583 srcu_idx = srcu_read_lock(&head->srcu); in nvme_ns_head_report_zones()
584 ns = nvme_find_path(head); in nvme_ns_head_report_zones()
585 if (ns) in nvme_ns_head_report_zones()
586 ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data); in nvme_ns_head_report_zones()
587 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_ns_head_report_zones()
614 if (!nvme_tryget_ns_head(cdev_to_ns_head(inode->i_cdev))) in nvme_ns_head_chr_open()
615 return -ENXIO; in nvme_ns_head_chr_open()
621 nvme_put_ns_head(cdev_to_ns_head(inode->i_cdev)); in nvme_ns_head_chr_release()
639 head->cdev_device.parent = &head->subsys->dev; in nvme_add_ns_head_cdev()
640 ret = dev_set_name(&head->cdev_device, "ng%dn%d", in nvme_add_ns_head_cdev()
641 head->subsys->instance, head->instance); in nvme_add_ns_head_cdev()
644 ret = nvme_cdev_add(&head->cdev, &head->cdev_device, in nvme_add_ns_head_cdev()
655 &head->disk->state))) in nvme_partition_scan_work()
658 mutex_lock(&head->disk->open_mutex); in nvme_partition_scan_work()
659 bdev_disk_changed(head->disk, false); in nvme_partition_scan_work()
660 mutex_unlock(&head->disk->open_mutex); in nvme_partition_scan_work()
669 spin_lock_irq(&head->requeue_lock); in nvme_requeue_work()
670 next = bio_list_get(&head->requeue_list); in nvme_requeue_work()
671 spin_unlock_irq(&head->requeue_lock); in nvme_requeue_work()
674 next = bio->bi_next; in nvme_requeue_work()
675 bio->bi_next = NULL; in nvme_requeue_work()
683 if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { in nvme_remove_head()
688 kblockd_schedule_work(&head->requeue_work); in nvme_remove_head()
690 nvme_cdev_del(&head->cdev, &head->cdev_device); in nvme_remove_head()
691 synchronize_srcu(&head->srcu); in nvme_remove_head()
692 del_gendisk(head->disk); in nvme_remove_head()
703 mutex_lock(&head->subsys->lock); in nvme_remove_head_work()
704 if (list_empty(&head->list)) { in nvme_remove_head_work()
705 list_del_init(&head->entry); in nvme_remove_head_work()
708 mutex_unlock(&head->subsys->lock); in nvme_remove_head_work()
719 mutex_init(&head->lock); in nvme_mpath_alloc_disk()
720 bio_list_init(&head->requeue_list); in nvme_mpath_alloc_disk()
721 spin_lock_init(&head->requeue_lock); in nvme_mpath_alloc_disk()
722 INIT_WORK(&head->requeue_work, nvme_requeue_work); in nvme_mpath_alloc_disk()
723 INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work); in nvme_mpath_alloc_disk()
724 INIT_DELAYED_WORK(&head->remove_work, nvme_remove_head_work); in nvme_mpath_alloc_disk()
725 head->delayed_removal_secs = 0; in nvme_mpath_alloc_disk()
737 if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || in nvme_mpath_alloc_disk()
749 if (head->ids.csi == NVME_CSI_ZNS) in nvme_mpath_alloc_disk()
752 head->disk = blk_alloc_disk(&lim, ctrl->numa_node); in nvme_mpath_alloc_disk()
753 if (IS_ERR(head->disk)) in nvme_mpath_alloc_disk()
754 return PTR_ERR(head->disk); in nvme_mpath_alloc_disk()
755 head->disk->fops = &nvme_ns_head_ops; in nvme_mpath_alloc_disk()
756 head->disk->private_data = head; in nvme_mpath_alloc_disk()
766 set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state); in nvme_mpath_alloc_disk()
767 sprintf(head->disk->disk_name, "nvme%dn%d", in nvme_mpath_alloc_disk()
768 ctrl->subsys->instance, head->instance); in nvme_mpath_alloc_disk()
773 static void nvme_mpath_set_live(struct nvme_ns *ns) in nvme_mpath_set_live() argument
775 struct nvme_ns_head *head = ns->head; in nvme_mpath_set_live()
778 if (!head->disk) in nvme_mpath_set_live()
786 if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { in nvme_mpath_set_live()
787 rc = device_add_disk(&head->subsys->dev, head->disk, in nvme_mpath_set_live()
790 clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags); in nvme_mpath_set_live()
794 kblockd_schedule_work(&head->partition_scan_work); in nvme_mpath_set_live()
797 nvme_mpath_add_sysfs_link(ns->head); in nvme_mpath_set_live()
799 mutex_lock(&head->lock); in nvme_mpath_set_live()
800 if (nvme_path_is_optimized(ns)) { in nvme_mpath_set_live()
803 srcu_idx = srcu_read_lock(&head->srcu); in nvme_mpath_set_live()
806 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_mpath_set_live()
808 mutex_unlock(&head->lock); in nvme_mpath_set_live()
810 synchronize_srcu(&head->srcu); in nvme_mpath_set_live()
811 kblockd_schedule_work(&head->requeue_work); in nvme_mpath_set_live()
818 void *base = ctrl->ana_log_buf; in nvme_parse_ana_log()
822 lockdep_assert_held(&ctrl->ana_lock); in nvme_parse_ana_log()
824 for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) { in nvme_parse_ana_log()
829 if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc))) in nvme_parse_ana_log()
830 return -EINVAL; in nvme_parse_ana_log()
832 nr_nsids = le32_to_cpu(desc->nnsids); in nvme_parse_ana_log()
835 if (WARN_ON_ONCE(desc->grpid == 0)) in nvme_parse_ana_log()
836 return -EINVAL; in nvme_parse_ana_log()
837 if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax)) in nvme_parse_ana_log()
838 return -EINVAL; in nvme_parse_ana_log()
839 if (WARN_ON_ONCE(desc->state == 0)) in nvme_parse_ana_log()
840 return -EINVAL; in nvme_parse_ana_log()
841 if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE)) in nvme_parse_ana_log()
842 return -EINVAL; in nvme_parse_ana_log()
845 if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size)) in nvme_parse_ana_log()
846 return -EINVAL; in nvme_parse_ana_log()
864 struct nvme_ns *ns) in nvme_update_ns_ana_state() argument
866 ns->ana_grpid = le32_to_cpu(desc->grpid); in nvme_update_ns_ana_state()
867 ns->ana_state = desc->state; in nvme_update_ns_ana_state()
868 clear_bit(NVME_NS_ANA_PENDING, &ns->flags); in nvme_update_ns_ana_state()
878 if (nvme_state_is_live(ns->ana_state) && in nvme_update_ns_ana_state()
879 nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) in nvme_update_ns_ana_state()
880 nvme_mpath_set_live(ns); in nvme_update_ns_ana_state()
886 * or non-optimized) while we alloc the ns then sysfs link would in nvme_update_ns_ana_state()
897 if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags)) in nvme_update_ns_ana_state()
898 nvme_mpath_add_sysfs_link(ns->head); in nvme_update_ns_ana_state()
905 u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0; in nvme_update_ana_state()
907 struct nvme_ns *ns; in nvme_update_ana_state() local
910 dev_dbg(ctrl->device, "ANA group %d: %s.\n", in nvme_update_ana_state()
911 le32_to_cpu(desc->grpid), in nvme_update_ana_state()
912 nvme_ana_state_names[desc->state]); in nvme_update_ana_state()
914 if (desc->state == NVME_ANA_CHANGE) in nvme_update_ana_state()
920 srcu_idx = srcu_read_lock(&ctrl->srcu); in nvme_update_ana_state()
921 list_for_each_entry_srcu(ns, &ctrl->namespaces, list, in nvme_update_ana_state()
922 srcu_read_lock_held(&ctrl->srcu)) { in nvme_update_ana_state()
925 nsid = le32_to_cpu(desc->nsids[n]); in nvme_update_ana_state()
926 if (ns->head->ns_id < nsid) in nvme_update_ana_state()
928 if (ns->head->ns_id == nsid) in nvme_update_ana_state()
929 nvme_update_ns_ana_state(desc, ns); in nvme_update_ana_state()
932 if (ns->head->ns_id > nsid) in nvme_update_ana_state()
935 srcu_read_unlock(&ctrl->srcu, srcu_idx); in nvme_update_ana_state()
944 mutex_lock(&ctrl->ana_lock); in nvme_read_ana_log()
946 ctrl->ana_log_buf, ctrl->ana_log_size, 0); in nvme_read_ana_log()
948 dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error); in nvme_read_ana_log()
969 mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies); in nvme_read_ana_log()
971 timer_delete_sync(&ctrl->anatt_timer); in nvme_read_ana_log()
973 mutex_unlock(&ctrl->ana_lock); in nvme_read_ana_log()
991 if (!ctrl->ana_log_buf) in nvme_mpath_update()
994 mutex_lock(&ctrl->ana_lock); in nvme_mpath_update()
996 mutex_unlock(&ctrl->ana_lock); in nvme_mpath_update()
1003 dev_info(ctrl->device, "ANATT timeout, resetting controller.\n"); in nvme_anatt_timeout()
1011 timer_delete_sync(&ctrl->anatt_timer); in nvme_mpath_stop()
1012 cancel_work_sync(&ctrl->ana_work); in nvme_mpath_stop()
1026 nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); in nvme_subsys_iopolicy_show()
1033 int old_iopolicy = READ_ONCE(subsys->iopolicy); in nvme_subsys_iopolicy_update()
1038 WRITE_ONCE(subsys->iopolicy, iopolicy); in nvme_subsys_iopolicy_update()
1042 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) in nvme_subsys_iopolicy_update()
1047 subsys->subnqn, in nvme_subsys_iopolicy_update()
1066 return -EINVAL; in nvme_subsys_iopolicy_store()
1074 return sysfs_emit(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid); in ana_grpid_show()
1081 struct nvme_ns *ns = nvme_get_ns_from_dev(dev); in ana_state_show() local
1083 return sysfs_emit(buf, "%s\n", nvme_ana_state_names[ns->ana_state]); in ana_state_show()
1090 struct nvme_ns *ns = nvme_get_ns_from_dev(dev); in queue_depth_show() local
1092 if (ns->head->subsys->iopolicy != NVME_IOPOLICY_QD) in queue_depth_show()
1095 return sysfs_emit(buf, "%d\n", atomic_read(&ns->ctrl->nr_active)); in queue_depth_show()
1105 struct nvme_ns *ns = nvme_get_ns_from_dev(dev); in numa_nodes_show() local
1106 struct nvme_ns_head *head = ns->head; in numa_nodes_show()
1108 if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA) in numa_nodes_show()
1113 srcu_idx = srcu_read_lock(&head->srcu); in numa_nodes_show()
1115 current_ns = srcu_dereference(head->current_path[node], in numa_nodes_show()
1116 &head->srcu); in numa_nodes_show()
1117 if (ns == current_ns) in numa_nodes_show()
1120 srcu_read_unlock(&head->srcu, srcu_idx); in numa_nodes_show()
1130 struct nvme_ns_head *head = disk->private_data; in delayed_removal_secs_show()
1133 mutex_lock(&head->subsys->lock); in delayed_removal_secs_show()
1134 ret = sysfs_emit(buf, "%u\n", head->delayed_removal_secs); in delayed_removal_secs_show()
1135 mutex_unlock(&head->subsys->lock); in delayed_removal_secs_show()
1143 struct nvme_ns_head *head = disk->private_data; in delayed_removal_secs_store()
1151 mutex_lock(&head->subsys->lock); in delayed_removal_secs_store()
1152 head->delayed_removal_secs = sec; in delayed_removal_secs_store()
1154 set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags); in delayed_removal_secs_store()
1156 clear_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags); in delayed_removal_secs_store()
1157 mutex_unlock(&head->subsys->lock); in delayed_removal_secs_store()
1162 synchronize_srcu(&head->srcu); in delayed_removal_secs_store()
1174 if (desc->grpid != dst->grpid) in nvme_lookup_ana_group_desc()
1178 return -ENXIO; /* just break out of the loop */ in nvme_lookup_ana_group_desc()
1185 struct nvme_ns *ns; in nvme_mpath_add_sysfs_link() local
1192 if (!test_bit(GD_ADDED, &head->disk->state)) in nvme_mpath_add_sysfs_link()
1195 kobj = &disk_to_dev(head->disk)->kobj; in nvme_mpath_add_sysfs_link()
1198 * loop through each ns chained through the head->list and create the in nvme_mpath_add_sysfs_link()
1199 * sysfs link from head node to the ns path node in nvme_mpath_add_sysfs_link()
1201 srcu_idx = srcu_read_lock(&head->srcu); in nvme_mpath_add_sysfs_link()
1203 list_for_each_entry_srcu(ns, &head->list, siblings, in nvme_mpath_add_sysfs_link()
1204 srcu_read_lock_held(&head->srcu)) { in nvme_mpath_add_sysfs_link()
1206 * Ensure that ns path disk node is already added otherwise we in nvme_mpath_add_sysfs_link()
1209 if (!test_bit(GD_ADDED, &ns->disk->state)) in nvme_mpath_add_sysfs_link()
1214 * When path ana state transitions from optimized to non- in nvme_mpath_add_sysfs_link()
1215 * optimized or vice-versa, the nvme_mpath_set_live() is in nvme_mpath_add_sysfs_link()
1217 * link already exists for the given path and we attempt to re- in nvme_mpath_add_sysfs_link()
1224 if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) in nvme_mpath_add_sysfs_link()
1227 target = disk_to_dev(ns->disk); in nvme_mpath_add_sysfs_link()
1230 * ns path gendisk kobject @target->kobj. in nvme_mpath_add_sysfs_link()
1233 &target->kobj, dev_name(target)); in nvme_mpath_add_sysfs_link()
1235 dev_err(disk_to_dev(ns->head->disk), in nvme_mpath_add_sysfs_link()
1238 clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); in nvme_mpath_add_sysfs_link()
1242 srcu_read_unlock(&head->srcu, srcu_idx); in nvme_mpath_add_sysfs_link()
1245 void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns) in nvme_mpath_remove_sysfs_link() argument
1250 if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) in nvme_mpath_remove_sysfs_link()
1253 target = disk_to_dev(ns->disk); in nvme_mpath_remove_sysfs_link()
1254 kobj = &disk_to_dev(ns->head->disk)->kobj; in nvme_mpath_remove_sysfs_link()
1257 clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); in nvme_mpath_remove_sysfs_link()
1260 void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) in nvme_mpath_add_disk() argument
1262 if (nvme_ctrl_use_ana(ns->ctrl)) { in nvme_mpath_add_disk()
1268 mutex_lock(&ns->ctrl->ana_lock); in nvme_mpath_add_disk()
1269 ns->ana_grpid = le32_to_cpu(anagrpid); in nvme_mpath_add_disk()
1270 nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc); in nvme_mpath_add_disk()
1271 mutex_unlock(&ns->ctrl->ana_lock); in nvme_mpath_add_disk()
1274 nvme_update_ns_ana_state(&desc, ns); in nvme_mpath_add_disk()
1276 /* group desc not found: trigger a re-read */ in nvme_mpath_add_disk()
1277 set_bit(NVME_NS_ANA_PENDING, &ns->flags); in nvme_mpath_add_disk()
1278 queue_work(nvme_wq, &ns->ctrl->ana_work); in nvme_mpath_add_disk()
1281 ns->ana_state = NVME_ANA_OPTIMIZED; in nvme_mpath_add_disk()
1282 nvme_mpath_set_live(ns); in nvme_mpath_add_disk()
1286 if (blk_queue_is_zoned(ns->queue) && ns->head->disk) in nvme_mpath_add_disk()
1287 ns->head->disk->nr_zones = ns->disk->nr_zones; in nvme_mpath_add_disk()
1295 if (!head->disk) in nvme_mpath_remove_disk()
1298 mutex_lock(&head->subsys->lock); in nvme_mpath_remove_disk()
1301 * head->list is expected to be empty. However, nvme_remove_ns() and in nvme_mpath_remove_disk()
1302 * nvme_init_ns_head() can run concurrently and so if head->delayed_ in nvme_mpath_remove_disk()
1304 * this point, head->list may no longer be empty. Therefore, we recheck in nvme_mpath_remove_disk()
1305 * head->list here. If it is no longer empty then we skip enqueuing the in nvme_mpath_remove_disk()
1308 if (!list_empty(&head->list)) in nvme_mpath_remove_disk()
1311 if (head->delayed_removal_secs) { in nvme_mpath_remove_disk()
1318 mod_delayed_work(nvme_wq, &head->remove_work, in nvme_mpath_remove_disk()
1319 head->delayed_removal_secs * HZ); in nvme_mpath_remove_disk()
1321 list_del_init(&head->entry); in nvme_mpath_remove_disk()
1325 mutex_unlock(&head->subsys->lock); in nvme_mpath_remove_disk()
1332 if (!head->disk) in nvme_mpath_put_disk()
1335 kblockd_schedule_work(&head->requeue_work); in nvme_mpath_put_disk()
1336 flush_work(&head->requeue_work); in nvme_mpath_put_disk()
1337 flush_work(&head->partition_scan_work); in nvme_mpath_put_disk()
1338 put_disk(head->disk); in nvme_mpath_put_disk()
1343 mutex_init(&ctrl->ana_lock); in nvme_mpath_init_ctrl()
1344 timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0); in nvme_mpath_init_ctrl()
1345 INIT_WORK(&ctrl->ana_work, nvme_ana_work); in nvme_mpath_init_ctrl()
1350 size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT; in nvme_mpath_init_identify()
1355 if (!multipath || !ctrl->subsys || in nvme_mpath_init_identify()
1356 !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) in nvme_mpath_init_identify()
1360 atomic_set(&ctrl->nr_active, 0); in nvme_mpath_init_identify()
1362 if (!ctrl->max_namespaces || in nvme_mpath_init_identify()
1363 ctrl->max_namespaces > le32_to_cpu(id->nn)) { in nvme_mpath_init_identify()
1364 dev_err(ctrl->device, in nvme_mpath_init_identify()
1365 "Invalid MNAN value %u\n", ctrl->max_namespaces); in nvme_mpath_init_identify()
1366 return -EINVAL; in nvme_mpath_init_identify()
1369 ctrl->anacap = id->anacap; in nvme_mpath_init_identify()
1370 ctrl->anatt = id->anatt; in nvme_mpath_init_identify()
1371 ctrl->nanagrpid = le32_to_cpu(id->nanagrpid); in nvme_mpath_init_identify()
1372 ctrl->anagrpmax = le32_to_cpu(id->anagrpmax); in nvme_mpath_init_identify()
1375 ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) + in nvme_mpath_init_identify()
1376 ctrl->max_namespaces * sizeof(__le32); in nvme_mpath_init_identify()
1378 dev_err(ctrl->device, in nvme_mpath_init_identify()
1381 dev_err(ctrl->device, "disabling ANA support.\n"); in nvme_mpath_init_identify()
1384 if (ana_log_size > ctrl->ana_log_size) { in nvme_mpath_init_identify()
1387 ctrl->ana_log_buf = kvmalloc(ana_log_size, GFP_KERNEL); in nvme_mpath_init_identify()
1388 if (!ctrl->ana_log_buf) in nvme_mpath_init_identify()
1389 return -ENOMEM; in nvme_mpath_init_identify()
1391 ctrl->ana_log_size = ana_log_size; in nvme_mpath_init_identify()
1404 kvfree(ctrl->ana_log_buf); in nvme_mpath_uninit()
1405 ctrl->ana_log_buf = NULL; in nvme_mpath_uninit()
1406 ctrl->ana_log_size = 0; in nvme_mpath_uninit()