Lines Matching +full:wait +full:- +full:queue

1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 2013-2014 Jens Axboe
6 * Copyright (C) 2013-2014 Christoph Hellwig
10 #include <linux/backing-dev.h>
13 #include <linux/blk-integrity.h>
29 #include <linux/blk-crypto.h>
35 #include <linux/t10-pi.h>
37 #include "blk-mq.h"
38 #include "blk-mq-debugfs.h"
39 #include "blk-pm.h"
40 #include "blk-stat.h"
41 #include "blk-mq-sched.h"
42 #include "blk-rq-qos.h"
58 * have pending work in this hardware queue.
62 return !list_empty_careful(&hctx->dispatch) || in blk_mq_hctx_has_pending()
63 sbitmap_any_bit_set(&hctx->ctx_map) || in blk_mq_hctx_has_pending()
68 * Mark this ctx as having pending work in this hardware queue
73 const int bit = ctx->index_hw[hctx->type]; in blk_mq_hctx_mark_pending()
75 if (!sbitmap_test_bit(&hctx->ctx_map, bit)) in blk_mq_hctx_mark_pending()
76 sbitmap_set_bit(&hctx->ctx_map, bit); in blk_mq_hctx_mark_pending()
82 const int bit = ctx->index_hw[hctx->type]; in blk_mq_hctx_clear_pending()
84 sbitmap_clear_bit(&hctx->ctx_map, bit); in blk_mq_hctx_clear_pending()
96 if (rq->rq_flags & RQF_IO_STAT && in blk_mq_check_inflight()
97 (!bdev_is_partition(mi->part) || rq->part == mi->part) && in blk_mq_check_inflight()
99 mi->inflight[rq_data_dir(rq)]++; in blk_mq_check_inflight()
131 if (!q->mq_freeze_depth) { in blk_freeze_set_owner()
132 q->mq_freeze_owner = owner; in blk_freeze_set_owner()
133 q->mq_freeze_owner_depth = 1; in blk_freeze_set_owner()
137 if (owner == q->mq_freeze_owner) in blk_freeze_set_owner()
138 q->mq_freeze_owner_depth += 1; in blk_freeze_set_owner()
145 if (!q->mq_freeze_owner) in blk_unfreeze_check_owner()
147 if (q->mq_freeze_owner != current) in blk_unfreeze_check_owner()
149 if (--q->mq_freeze_owner_depth == 0) { in blk_unfreeze_check_owner()
150 q->mq_freeze_owner = NULL; in blk_unfreeze_check_owner()
175 mutex_lock(&q->mq_freeze_lock); in __blk_freeze_queue_start()
177 if (++q->mq_freeze_depth == 1) { in __blk_freeze_queue_start()
178 percpu_ref_kill(&q->q_usage_counter); in __blk_freeze_queue_start()
179 mutex_unlock(&q->mq_freeze_lock); in __blk_freeze_queue_start()
183 mutex_unlock(&q->mq_freeze_lock); in __blk_freeze_queue_start()
198 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); in blk_mq_freeze_queue_wait()
205 return wait_event_timeout(q->mq_freeze_wq, in blk_mq_freeze_queue_wait_timeout()
206 percpu_ref_is_zero(&q->q_usage_counter), in blk_mq_freeze_queue_wait_timeout()
222 mutex_lock(&q->mq_freeze_lock); in __blk_mq_unfreeze_queue()
224 q->q_usage_counter.data->force_atomic = true; in __blk_mq_unfreeze_queue()
225 q->mq_freeze_depth--; in __blk_mq_unfreeze_queue()
226 WARN_ON_ONCE(q->mq_freeze_depth < 0); in __blk_mq_unfreeze_queue()
227 if (!q->mq_freeze_depth) { in __blk_mq_unfreeze_queue()
228 percpu_ref_resurrect(&q->q_usage_counter); in __blk_mq_unfreeze_queue()
229 wake_up_all(&q->mq_freeze_wq); in __blk_mq_unfreeze_queue()
232 mutex_unlock(&q->mq_freeze_lock); in __blk_mq_unfreeze_queue()
247 * Unlike blk_freeze_queue_start, the queue doesn't need to be unfrozen
272 spin_lock_irqsave(&q->queue_lock, flags); in blk_mq_quiesce_queue_nowait()
273 if (!q->quiesce_depth++) in blk_mq_quiesce_queue_nowait()
275 spin_unlock_irqrestore(&q->queue_lock, flags); in blk_mq_quiesce_queue_nowait()
280 * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
281 * @set: tag_set to wait on
290 if (set->flags & BLK_MQ_F_BLOCKING) in blk_mq_wait_quiesce_done()
291 synchronize_srcu(set->srcu); in blk_mq_wait_quiesce_done()
298 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
299 * @q: request queue.
303 * sure no dispatch can happen until the queue is unquiesced via
309 /* nothing to wait for non-mq queues */ in blk_mq_quiesce_queue()
311 blk_mq_wait_quiesce_done(q->tag_set); in blk_mq_quiesce_queue()
316 * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
317 * @q: request queue.
319 * This function recovers queue into the state before quiescing
327 spin_lock_irqsave(&q->queue_lock, flags); in blk_mq_unquiesce_queue()
328 if (WARN_ON_ONCE(q->quiesce_depth <= 0)) { in blk_mq_unquiesce_queue()
330 } else if (!--q->quiesce_depth) { in blk_mq_unquiesce_queue()
334 spin_unlock_irqrestore(&q->queue_lock, flags); in blk_mq_unquiesce_queue()
346 mutex_lock(&set->tag_list_lock); in blk_mq_quiesce_tagset()
347 list_for_each_entry(q, &set->tag_list, tag_set_list) { in blk_mq_quiesce_tagset()
351 mutex_unlock(&set->tag_list_lock); in blk_mq_quiesce_tagset()
361 mutex_lock(&set->tag_list_lock); in blk_mq_unquiesce_tagset()
362 list_for_each_entry(q, &set->tag_list, tag_set_list) { in blk_mq_unquiesce_tagset()
366 mutex_unlock(&set->tag_list_lock); in blk_mq_unquiesce_tagset()
377 blk_mq_tag_wakeup_all(hctx->tags, true); in blk_mq_wake_waiters()
384 INIT_LIST_HEAD(&rq->queuelist); in blk_rq_init()
385 rq->q = q; in blk_rq_init()
386 rq->__sector = (sector_t) -1; in blk_rq_init()
387 INIT_HLIST_NODE(&rq->hash); in blk_rq_init()
388 RB_CLEAR_NODE(&rq->rb_node); in blk_rq_init()
389 rq->tag = BLK_MQ_NO_TAG; in blk_rq_init()
390 rq->internal_tag = BLK_MQ_NO_TAG; in blk_rq_init()
391 rq->start_time_ns = blk_time_get_ns(); in blk_rq_init()
400 if (blk_queue_rq_alloc_time(rq->q)) in blk_mq_rq_time_init()
401 rq->alloc_time_ns = alloc_time_ns; in blk_mq_rq_time_init()
403 rq->alloc_time_ns = 0; in blk_mq_rq_time_init()
410 struct blk_mq_ctx *ctx = data->ctx; in blk_mq_rq_ctx_init()
411 struct blk_mq_hw_ctx *hctx = data->hctx; in blk_mq_rq_ctx_init()
412 struct request_queue *q = data->q; in blk_mq_rq_ctx_init()
413 struct request *rq = tags->static_rqs[tag]; in blk_mq_rq_ctx_init()
415 rq->q = q; in blk_mq_rq_ctx_init()
416 rq->mq_ctx = ctx; in blk_mq_rq_ctx_init()
417 rq->mq_hctx = hctx; in blk_mq_rq_ctx_init()
418 rq->cmd_flags = data->cmd_flags; in blk_mq_rq_ctx_init()
420 if (data->flags & BLK_MQ_REQ_PM) in blk_mq_rq_ctx_init()
421 data->rq_flags |= RQF_PM; in blk_mq_rq_ctx_init()
422 rq->rq_flags = data->rq_flags; in blk_mq_rq_ctx_init()
424 if (data->rq_flags & RQF_SCHED_TAGS) { in blk_mq_rq_ctx_init()
425 rq->tag = BLK_MQ_NO_TAG; in blk_mq_rq_ctx_init()
426 rq->internal_tag = tag; in blk_mq_rq_ctx_init()
428 rq->tag = tag; in blk_mq_rq_ctx_init()
429 rq->internal_tag = BLK_MQ_NO_TAG; in blk_mq_rq_ctx_init()
431 rq->timeout = 0; in blk_mq_rq_ctx_init()
433 rq->part = NULL; in blk_mq_rq_ctx_init()
434 rq->io_start_time_ns = 0; in blk_mq_rq_ctx_init()
435 rq->stats_sectors = 0; in blk_mq_rq_ctx_init()
436 rq->nr_phys_segments = 0; in blk_mq_rq_ctx_init()
437 rq->nr_integrity_segments = 0; in blk_mq_rq_ctx_init()
438 rq->end_io = NULL; in blk_mq_rq_ctx_init()
439 rq->end_io_data = NULL; in blk_mq_rq_ctx_init()
442 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_rq_ctx_init()
444 WRITE_ONCE(rq->deadline, 0); in blk_mq_rq_ctx_init()
447 if (rq->rq_flags & RQF_USE_SCHED) { in blk_mq_rq_ctx_init()
448 struct elevator_queue *e = data->q->elevator; in blk_mq_rq_ctx_init()
450 INIT_HLIST_NODE(&rq->hash); in blk_mq_rq_ctx_init()
451 RB_CLEAR_NODE(&rq->rb_node); in blk_mq_rq_ctx_init()
453 if (e->type->ops.prepare_request) in blk_mq_rq_ctx_init()
454 e->type->ops.prepare_request(rq); in blk_mq_rq_ctx_init()
469 tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset); in __blk_mq_alloc_requests_batch()
478 prefetch(tags->static_rqs[tag]); in __blk_mq_alloc_requests_batch()
481 rq_list_add_head(data->cached_rqs, rq); in __blk_mq_alloc_requests_batch()
484 if (!(data->rq_flags & RQF_SCHED_TAGS)) in __blk_mq_alloc_requests_batch()
485 blk_mq_add_active_requests(data->hctx, nr); in __blk_mq_alloc_requests_batch()
487 percpu_ref_get_many(&data->q->q_usage_counter, nr - 1); in __blk_mq_alloc_requests_batch()
488 data->nr_tags -= nr; in __blk_mq_alloc_requests_batch()
490 return rq_list_pop(data->cached_rqs); in __blk_mq_alloc_requests_batch()
495 struct request_queue *q = data->q; in __blk_mq_alloc_requests()
504 if (data->cmd_flags & REQ_NOWAIT) in __blk_mq_alloc_requests()
505 data->flags |= BLK_MQ_REQ_NOWAIT; in __blk_mq_alloc_requests()
508 data->ctx = blk_mq_get_ctx(q); in __blk_mq_alloc_requests()
509 data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx); in __blk_mq_alloc_requests()
511 if (q->elevator) { in __blk_mq_alloc_requests()
514 * enabled for the queue. in __blk_mq_alloc_requests()
516 data->rq_flags |= RQF_SCHED_TAGS; in __blk_mq_alloc_requests()
522 if ((data->cmd_flags & REQ_OP_MASK) != REQ_OP_FLUSH && in __blk_mq_alloc_requests()
523 !blk_op_is_passthrough(data->cmd_flags)) { in __blk_mq_alloc_requests()
524 struct elevator_mq_ops *ops = &q->elevator->type->ops; in __blk_mq_alloc_requests()
526 WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED); in __blk_mq_alloc_requests()
528 data->rq_flags |= RQF_USE_SCHED; in __blk_mq_alloc_requests()
529 if (ops->limit_depth) in __blk_mq_alloc_requests()
530 ops->limit_depth(data->cmd_flags, data); in __blk_mq_alloc_requests()
533 blk_mq_tag_busy(data->hctx); in __blk_mq_alloc_requests()
536 if (data->flags & BLK_MQ_REQ_RESERVED) in __blk_mq_alloc_requests()
537 data->rq_flags |= RQF_RESV; in __blk_mq_alloc_requests()
542 if (data->nr_tags > 1) { in __blk_mq_alloc_requests()
548 data->nr_tags = 1; in __blk_mq_alloc_requests()
558 if (data->flags & BLK_MQ_REQ_NOWAIT) in __blk_mq_alloc_requests()
570 if (!(data->rq_flags & RQF_SCHED_TAGS)) in __blk_mq_alloc_requests()
571 blk_mq_inc_active_requests(data->hctx); in __blk_mq_alloc_requests()
586 .nr_tags = plug->nr_ios, in blk_mq_rq_cache_fill()
587 .cached_rqs = &plug->cached_rqs, in blk_mq_rq_cache_fill()
594 plug->nr_ios = 1; in blk_mq_rq_cache_fill()
606 struct blk_plug *plug = current->plug; in blk_mq_alloc_cached_request()
612 if (rq_list_empty(&plug->cached_rqs)) { in blk_mq_alloc_cached_request()
613 if (plug->nr_ios == 1) in blk_mq_alloc_cached_request()
619 rq = rq_list_peek(&plug->cached_rqs); in blk_mq_alloc_cached_request()
620 if (!rq || rq->q != q) in blk_mq_alloc_cached_request()
623 if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) in blk_mq_alloc_cached_request()
625 if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) in blk_mq_alloc_cached_request()
628 rq_list_pop(&plug->cached_rqs); in blk_mq_alloc_cached_request()
632 rq->cmd_flags = opf; in blk_mq_alloc_cached_request()
633 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_alloc_cached_request()
660 rq->__data_len = 0; in blk_mq_alloc_request()
661 rq->__sector = (sector_t) -1; in blk_mq_alloc_request()
662 rq->bio = rq->biotail = NULL; in blk_mq_alloc_request()
666 return ERR_PTR(-EWOULDBLOCK); in blk_mq_alloc_request()
693 * a specific queue. in blk_mq_alloc_request_hctx()
697 return ERR_PTR(-EINVAL); in blk_mq_alloc_request_hctx()
699 if (hctx_idx >= q->nr_hw_queues) in blk_mq_alloc_request_hctx()
700 return ERR_PTR(-EIO); in blk_mq_alloc_request_hctx()
708 * If not tell the caller that it should skip this queue. in blk_mq_alloc_request_hctx()
710 ret = -EXDEV; in blk_mq_alloc_request_hctx()
711 data.hctx = xa_load(&q->hctx_table, hctx_idx); in blk_mq_alloc_request_hctx()
714 cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask); in blk_mq_alloc_request_hctx()
719 if (q->elevator) in blk_mq_alloc_request_hctx()
727 ret = -EWOULDBLOCK; in blk_mq_alloc_request_hctx()
735 rq->__data_len = 0; in blk_mq_alloc_request_hctx()
736 rq->__sector = (sector_t) -1; in blk_mq_alloc_request_hctx()
737 rq->bio = rq->biotail = NULL; in blk_mq_alloc_request_hctx()
748 struct request_queue *q = rq->q; in blk_mq_finish_request()
752 if (rq->rq_flags & RQF_USE_SCHED) { in blk_mq_finish_request()
753 q->elevator->type->ops.finish_request(rq); in blk_mq_finish_request()
759 rq->rq_flags &= ~RQF_USE_SCHED; in blk_mq_finish_request()
765 struct request_queue *q = rq->q; in __blk_mq_free_request()
766 struct blk_mq_ctx *ctx = rq->mq_ctx; in __blk_mq_free_request()
767 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in __blk_mq_free_request()
768 const int sched_tag = rq->internal_tag; in __blk_mq_free_request()
772 rq->mq_hctx = NULL; in __blk_mq_free_request()
774 if (rq->tag != BLK_MQ_NO_TAG) { in __blk_mq_free_request()
776 blk_mq_put_tag(hctx->tags, ctx, rq->tag); in __blk_mq_free_request()
779 blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); in __blk_mq_free_request()
786 struct request_queue *q = rq->q; in blk_mq_free_request()
791 laptop_io_completion(q->disk->bdi); in blk_mq_free_request()
795 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_free_request()
805 while ((rq = rq_list_pop(&plug->cached_rqs)) != NULL) in blk_mq_free_plug_rqs()
812 rq->q->disk ? rq->q->disk->disk_name : "?", in blk_dump_rq_flags()
813 (__force unsigned long long) rq->cmd_flags); in blk_dump_rq_flags()
819 rq->bio, rq->biotail, blk_rq_bytes(rq)); in blk_dump_rq_flags()
825 if (req->rq_flags & RQF_IO_STAT) { in blk_account_io_completion()
829 part_stat_add(req->part, sectors[sgrp], bytes >> 9); in blk_account_io_completion()
840 req->q->disk ? req->q->disk->disk_name : "?", in blk_print_req_error()
843 (__force u32)(req->cmd_flags & ~REQ_OP_MASK), in blk_print_req_error()
844 req->nr_phys_segments, in blk_print_req_error()
854 const bool is_flush = (req->rq_flags & RQF_FLUSH_SEQ) != 0; in blk_complete_request()
856 struct bio *bio = req->bio; in blk_complete_request()
875 struct bio *next = bio->bi_next; in blk_complete_request()
892 if (!req->end_io) { in blk_complete_request()
893 req->bio = NULL; in blk_complete_request()
894 req->__data_len = 0; in blk_complete_request()
899 * blk_update_request - Complete multiple bytes without completing the request
917 * %false - this request doesn't have any more data
918 * %true - this request has more data
923 bool is_flush = req->rq_flags & RQF_FLUSH_SEQ; in blk_update_request()
924 bool quiet = req->rq_flags & RQF_QUIET; in blk_update_request()
929 if (!req->bio) in blk_update_request()
944 !test_bit(GD_DEAD, &req->q->disk->state)) { in blk_update_request()
952 while (req->bio) { in blk_update_request()
953 struct bio *bio = req->bio; in blk_update_request()
954 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); in blk_update_request()
957 bio->bi_status = error; in blk_update_request()
959 if (bio_bytes == bio->bi_iter.bi_size) { in blk_update_request()
960 req->bio = bio->bi_next; in blk_update_request()
967 bio->bi_status = BLK_STS_IOERR; in blk_update_request()
978 if (!bio->bi_iter.bi_size) { in blk_update_request()
985 nr_bytes -= bio_bytes; in blk_update_request()
994 if (!req->bio) { in blk_update_request()
1000 req->__data_len = 0; in blk_update_request()
1004 req->__data_len -= total_bytes; in blk_update_request()
1008 req->__sector += total_bytes >> 9; in blk_update_request()
1011 if (req->rq_flags & RQF_MIXED_MERGE) { in blk_update_request()
1012 req->cmd_flags &= ~REQ_FAILFAST_MASK; in blk_update_request()
1013 req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; in blk_update_request()
1016 if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { in blk_update_request()
1023 req->__data_len = blk_rq_cur_bytes(req); in blk_update_request()
1027 req->nr_phys_segments = blk_recalc_rq_segments(req); in blk_update_request()
1043 if ((req->rq_flags & (RQF_IO_STAT|RQF_FLUSH_SEQ)) == RQF_IO_STAT) { in blk_account_io_done()
1047 update_io_ticks(req->part, jiffies, true); in blk_account_io_done()
1048 part_stat_inc(req->part, ios[sgrp]); in blk_account_io_done()
1049 part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); in blk_account_io_done()
1050 part_stat_local_dec(req->part, in blk_account_io_done()
1058 struct bio *bio = req->bio; in blk_rq_passthrough_stats()
1060 if (!blk_queue_passthrough_stat(req->q)) in blk_rq_passthrough_stats()
1072 if (!bio->bi_bdev) in blk_rq_passthrough_stats()
1081 if (blk_rq_bytes(req) & (bdev_logical_block_size(bio->bi_bdev) - 1)) in blk_rq_passthrough_stats()
1090 if (!blk_queue_io_stat(req->q)) in blk_account_io_start()
1095 req->rq_flags |= RQF_IO_STAT; in blk_account_io_start()
1096 req->start_time_ns = blk_time_get_ns(); in blk_account_io_start()
1099 * All non-passthrough requests are created from a bio with one in blk_account_io_start()
1101 * generated by the state machine in blk-flush.c is cloned onto the in blk_account_io_start()
1102 * lower device by dm-multipath we can get here without a bio. in blk_account_io_start()
1104 if (req->bio) in blk_account_io_start()
1105 req->part = req->bio->bi_bdev; in blk_account_io_start()
1107 req->part = req->q->disk->part0; in blk_account_io_start()
1110 update_io_ticks(req->part, jiffies, false); in blk_account_io_start()
1111 part_stat_local_inc(req->part, in_flight[op_is_write(req_op(req))]); in blk_account_io_start()
1117 if (rq->rq_flags & RQF_STATS) in __blk_mq_end_request_acct()
1131 if (rq->end_io) { in __blk_mq_end_request()
1132 rq_qos_done(rq->q, rq); in __blk_mq_end_request()
1133 if (rq->end_io(rq, error) == RQ_END_IO_FREE) in __blk_mq_end_request()
1154 struct request_queue *q = hctx->queue; in blk_mq_flush_tag_batch()
1158 blk_mq_put_tags(hctx->tags, tag_array, nr_tags); in blk_mq_flush_tag_batch()
1159 percpu_ref_put_many(&q->q_usage_counter, nr_tags); in blk_mq_flush_tag_batch()
1169 if (iob->need_ts) in blk_mq_end_request_batch()
1172 while ((rq = rq_list_pop(&iob->req_list)) != NULL) { in blk_mq_end_request_batch()
1173 prefetch(rq->bio); in blk_mq_end_request_batch()
1174 prefetch(rq->rq_next); in blk_mq_end_request_batch()
1177 if (iob->need_ts) in blk_mq_end_request_batch()
1182 rq_qos_done(rq->q, rq); in blk_mq_end_request_batch()
1188 if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE) in blk_mq_end_request_batch()
1191 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_end_request_batch()
1198 if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) { in blk_mq_end_request_batch()
1202 cur_hctx = rq->mq_hctx; in blk_mq_end_request_batch()
1204 tags[nr_tags++] = rq->tag; in blk_mq_end_request_batch()
1218 rq->q->mq_ops->complete(rq); in blk_complete_reqs()
1242 !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) in blk_mq_complete_need_ipi()
1254 if (cpu == rq->mq_ctx->cpu || in blk_mq_complete_need_ipi()
1255 (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) && in blk_mq_complete_need_ipi()
1256 cpus_share_cache(cpu, rq->mq_ctx->cpu) && in blk_mq_complete_need_ipi()
1257 cpus_equal_capacity(cpu, rq->mq_ctx->cpu))) in blk_mq_complete_need_ipi()
1261 return cpu_online(rq->mq_ctx->cpu); in blk_mq_complete_need_ipi()
1268 cpu = rq->mq_ctx->cpu; in blk_mq_complete_send_ipi()
1269 if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu))) in blk_mq_complete_send_ipi()
1279 if (llist_add(&rq->ipi_list, list)) in blk_mq_raise_softirq()
1286 WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); in blk_mq_complete_request_remote()
1293 if ((rq->mq_hctx->nr_ctx == 1 && in blk_mq_complete_request_remote()
1294 rq->mq_ctx->cpu == raw_smp_processor_id()) || in blk_mq_complete_request_remote()
1295 rq->cmd_flags & REQ_POLLED) in blk_mq_complete_request_remote()
1303 if (rq->q->nr_hw_queues == 1) { in blk_mq_complete_request_remote()
1312 * blk_mq_complete_request - end I/O on a request
1316 * Complete a request by scheduling the ->complete_rq operation.
1321 rq->q->mq_ops->complete(rq); in blk_mq_complete_request()
1326 * blk_mq_start_request - Start processing a request
1335 struct request_queue *q = rq->q; in blk_mq_start_request()
1339 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) && in blk_mq_start_request()
1341 rq->io_start_time_ns = blk_time_get_ns(); in blk_mq_start_request()
1342 rq->stats_sectors = blk_rq_sectors(rq); in blk_mq_start_request()
1343 rq->rq_flags |= RQF_STATS; in blk_mq_start_request()
1350 WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); in blk_mq_start_request()
1351 rq->mq_hctx->tags->rqs[rq->tag] = rq; in blk_mq_start_request()
1356 if (rq->bio && rq->bio->bi_opf & REQ_POLLED) in blk_mq_start_request()
1357 WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num); in blk_mq_start_request()
1362 * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
1368 if (plug->multiple_queues) in blk_plug_max_rq_count()
1375 struct request *last = rq_list_peek(&plug->mq_list); in blk_add_rq_to_plug()
1377 if (!plug->rq_count) { in blk_add_rq_to_plug()
1378 trace_block_plug(rq->q); in blk_add_rq_to_plug()
1379 } else if (plug->rq_count >= blk_plug_max_rq_count(plug) || in blk_add_rq_to_plug()
1380 (!blk_queue_nomerges(rq->q) && in blk_add_rq_to_plug()
1384 trace_block_plug(rq->q); in blk_add_rq_to_plug()
1387 if (!plug->multiple_queues && last && last->q != rq->q) in blk_add_rq_to_plug()
1388 plug->multiple_queues = true; in blk_add_rq_to_plug()
1391 * ->queue_rqs() directly in blk_add_rq_to_plug()
1393 if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS)) in blk_add_rq_to_plug()
1394 plug->has_elevator = true; in blk_add_rq_to_plug()
1395 rq_list_add_tail(&plug->mq_list, rq); in blk_add_rq_to_plug()
1396 plug->rq_count++; in blk_add_rq_to_plug()
1400 * blk_execute_rq_nowait - insert a request to I/O scheduler for execution
1402 * @at_head: insert request at head or tail of queue
1405 * Insert a fully prepared request at the back of the I/O scheduler queue
1406 * for execution. Don't wait for completion.
1409 * This function will invoke @done directly if the queue is dead.
1413 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_execute_rq_nowait()
1420 if (current->plug && !at_head) { in blk_execute_rq_nowait()
1421 blk_add_rq_to_plug(current->plug, rq); in blk_execute_rq_nowait()
1426 blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING); in blk_execute_rq_nowait()
1437 struct blk_rq_wait *wait = rq->end_io_data; in blk_end_sync_rq() local
1439 wait->ret = ret; in blk_end_sync_rq()
1440 complete(&wait->done); in blk_end_sync_rq()
1446 if (!rq->mq_hctx) in blk_rq_is_poll()
1448 if (rq->mq_hctx->type != HCTX_TYPE_POLL) in blk_rq_is_poll()
1454 static void blk_rq_poll_completion(struct request *rq, struct completion *wait) in blk_rq_poll_completion() argument
1457 blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0); in blk_rq_poll_completion()
1459 } while (!completion_done(wait)); in blk_rq_poll_completion()
1463 * blk_execute_rq - insert a request into queue for execution
1465 * @at_head: insert request at head or tail of queue
1468 * Insert a fully prepared request at the back of the I/O scheduler queue
1469 * for execution and wait for completion.
1474 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_execute_rq()
1475 struct blk_rq_wait wait = { in blk_execute_rq() local
1476 .done = COMPLETION_INITIALIZER_ONSTACK(wait.done), in blk_execute_rq()
1482 rq->end_io_data = &wait; in blk_execute_rq()
1483 rq->end_io = blk_end_sync_rq; in blk_execute_rq()
1490 blk_rq_poll_completion(rq, &wait.done); in blk_execute_rq()
1492 blk_wait_io(&wait.done); in blk_execute_rq()
1494 return wait.ret; in blk_execute_rq()
1500 struct request_queue *q = rq->q; in __blk_mq_requeue_request()
1508 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in __blk_mq_requeue_request()
1509 rq->rq_flags &= ~RQF_TIMED_OUT; in __blk_mq_requeue_request()
1515 struct request_queue *q = rq->q; in blk_mq_requeue_request()
1520 /* this request will be re-inserted to io scheduler queue */ in blk_mq_requeue_request()
1523 spin_lock_irqsave(&q->requeue_lock, flags); in blk_mq_requeue_request()
1524 list_add_tail(&rq->queuelist, &q->requeue_list); in blk_mq_requeue_request()
1525 spin_unlock_irqrestore(&q->requeue_lock, flags); in blk_mq_requeue_request()
1540 spin_lock_irq(&q->requeue_lock); in blk_mq_requeue_work()
1541 list_splice_init(&q->requeue_list, &rq_list); in blk_mq_requeue_work()
1542 list_splice_init(&q->flush_list, &flush_list); in blk_mq_requeue_work()
1543 spin_unlock_irq(&q->requeue_lock); in blk_mq_requeue_work()
1547 list_del_init(&rq->queuelist); in blk_mq_requeue_work()
1550 * driver already and might have driver-specific data allocated in blk_mq_requeue_work()
1554 if (rq->rq_flags & RQF_DONTPREP) in blk_mq_requeue_work()
1562 list_del_init(&rq->queuelist); in blk_mq_requeue_work()
1571 kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0); in blk_mq_kick_requeue_list()
1578 kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, in blk_mq_delay_kick_requeue_list()
1585 return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq); in blk_is_flush_data_rq()
1591 * If we find a request that isn't idle we know the queue is busy in blk_mq_rq_inflight()
1595 * In case of queue quiesce, if one flush data request is completed, in blk_mq_rq_inflight()
1600 if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) && in blk_mq_rq_inflight()
1623 req->rq_flags |= RQF_TIMED_OUT; in blk_mq_rq_timed_out()
1624 if (req->q->mq_ops->timeout) { in blk_mq_rq_timed_out()
1627 ret = req->q->mq_ops->timeout(req); in blk_mq_rq_timed_out()
1648 if (rq->rq_flags & RQF_TIMED_OUT) in blk_mq_req_expired()
1651 deadline = READ_ONCE(rq->deadline); in blk_mq_req_expired()
1652 if (time_after_eq(expired->timeout_start, deadline)) in blk_mq_req_expired()
1655 if (expired->next == 0) in blk_mq_req_expired()
1656 expired->next = deadline; in blk_mq_req_expired()
1657 else if (time_after(expired->next, deadline)) in blk_mq_req_expired()
1658 expired->next = deadline; in blk_mq_req_expired()
1665 if (rq->end_io(rq, 0) == RQ_END_IO_FREE) in blk_mq_put_rq_ref()
1684 expired->has_timedout_rq = true; in blk_mq_check_expired()
1710 * timeout at the same time a queue freeze is waiting in blk_mq_timeout_work()
1712 * acquire the queue reference here. in blk_mq_timeout_work()
1716 * obtain a reference even in the short window between the queue in blk_mq_timeout_work()
1722 if (!percpu_ref_tryget(&q->q_usage_counter)) in blk_mq_timeout_work()
1725 /* check if there is any timed-out request */ in blk_mq_timeout_work()
1731 * uses srcu or rcu, wait for a synchronization point to in blk_mq_timeout_work()
1734 blk_mq_wait_quiesce_done(q->tag_set); in blk_mq_timeout_work()
1741 mod_timer(&q->timeout, expired.next); in blk_mq_timeout_work()
1766 struct blk_mq_hw_ctx *hctx = flush_data->hctx; in flush_busy_ctx()
1767 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; in flush_busy_ctx()
1768 enum hctx_type type = hctx->type; in flush_busy_ctx()
1770 spin_lock(&ctx->lock); in flush_busy_ctx()
1771 list_splice_tail_init(&ctx->rq_lists[type], flush_data->list); in flush_busy_ctx()
1773 spin_unlock(&ctx->lock); in flush_busy_ctx()
1779 * to the for-dispatch
1788 sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); in blk_mq_flush_busy_ctxs()
1800 struct blk_mq_hw_ctx *hctx = dispatch_data->hctx; in dispatch_rq_from_ctx()
1801 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; in dispatch_rq_from_ctx()
1802 enum hctx_type type = hctx->type; in dispatch_rq_from_ctx()
1804 spin_lock(&ctx->lock); in dispatch_rq_from_ctx()
1805 if (!list_empty(&ctx->rq_lists[type])) { in dispatch_rq_from_ctx()
1806 dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next); in dispatch_rq_from_ctx()
1807 list_del_init(&dispatch_data->rq->queuelist); in dispatch_rq_from_ctx()
1808 if (list_empty(&ctx->rq_lists[type])) in dispatch_rq_from_ctx()
1811 spin_unlock(&ctx->lock); in dispatch_rq_from_ctx()
1813 return !dispatch_data->rq; in dispatch_rq_from_ctx()
1819 unsigned off = start ? start->index_hw[hctx->type] : 0; in blk_mq_dequeue_from_ctx()
1825 __sbitmap_for_each_set(&hctx->ctx_map, off, in blk_mq_dequeue_from_ctx()
1833 struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; in __blk_mq_alloc_driver_tag()
1834 unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; in __blk_mq_alloc_driver_tag()
1837 blk_mq_tag_busy(rq->mq_hctx); in __blk_mq_alloc_driver_tag()
1839 if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { in __blk_mq_alloc_driver_tag()
1840 bt = &rq->mq_hctx->tags->breserved_tags; in __blk_mq_alloc_driver_tag()
1843 if (!hctx_may_queue(rq->mq_hctx, bt)) in __blk_mq_alloc_driver_tag()
1851 rq->tag = tag + tag_offset; in __blk_mq_alloc_driver_tag()
1852 blk_mq_inc_active_requests(rq->mq_hctx); in __blk_mq_alloc_driver_tag()
1856 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, in blk_mq_dispatch_wake() argument
1861 hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); in blk_mq_dispatch_wake()
1863 spin_lock(&hctx->dispatch_wait_lock); in blk_mq_dispatch_wake()
1864 if (!list_empty(&wait->entry)) { in blk_mq_dispatch_wake()
1867 list_del_init(&wait->entry); in blk_mq_dispatch_wake()
1868 sbq = &hctx->tags->bitmap_tags; in blk_mq_dispatch_wake()
1869 atomic_dec(&sbq->ws_active); in blk_mq_dispatch_wake()
1871 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_dispatch_wake()
1879 * the tag wakeups. For non-shared tags, we can simply mark us needing a
1888 wait_queue_entry_t *wait; in blk_mq_mark_tag_wait() local
1891 if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && in blk_mq_mark_tag_wait()
1892 !(blk_mq_is_shared_tags(hctx->flags))) { in blk_mq_mark_tag_wait()
1897 * allocation failure and adding the hardware queue to the wait in blk_mq_mark_tag_wait()
1898 * queue. in blk_mq_mark_tag_wait()
1901 * At most this will cost an extra queue run. in blk_mq_mark_tag_wait()
1906 wait = &hctx->dispatch_wait; in blk_mq_mark_tag_wait()
1907 if (!list_empty_careful(&wait->entry)) in blk_mq_mark_tag_wait()
1910 if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) in blk_mq_mark_tag_wait()
1911 sbq = &hctx->tags->breserved_tags; in blk_mq_mark_tag_wait()
1913 sbq = &hctx->tags->bitmap_tags; in blk_mq_mark_tag_wait()
1914 wq = &bt_wait_ptr(sbq, hctx)->wait; in blk_mq_mark_tag_wait()
1916 spin_lock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1917 spin_lock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1918 if (!list_empty(&wait->entry)) { in blk_mq_mark_tag_wait()
1919 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1920 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1924 atomic_inc(&sbq->ws_active); in blk_mq_mark_tag_wait()
1925 wait->flags &= ~WQ_FLAG_EXCLUSIVE; in blk_mq_mark_tag_wait()
1926 __add_wait_queue(wq, wait); in blk_mq_mark_tag_wait()
1932 * Order adding us to wait queue and allocating driver tag. in blk_mq_mark_tag_wait()
1938 * Otherwise, re-order of adding wait queue and getting driver tag in blk_mq_mark_tag_wait()
1940 * the waitqueue_active() may not observe us in wait queue. in blk_mq_mark_tag_wait()
1946 * allocation failure and adding the hardware queue to the wait in blk_mq_mark_tag_wait()
1947 * queue. in blk_mq_mark_tag_wait()
1951 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1952 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1957 * We got a tag, remove ourselves from the wait queue to ensure in blk_mq_mark_tag_wait()
1960 list_del_init(&wait->entry); in blk_mq_mark_tag_wait()
1961 atomic_dec(&sbq->ws_active); in blk_mq_mark_tag_wait()
1962 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1963 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1972 * - EWMA is one simple way to compute running average value
1973 * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
1974 * - take 4 as factor for avoiding to get too small(0) result, and this
1981 ewma = hctx->dispatch_busy; in blk_mq_update_dispatch_busy()
1986 ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1; in blk_mq_update_dispatch_busy()
1991 hctx->dispatch_busy = ewma; in blk_mq_update_dispatch_busy()
1999 list_add(&rq->queuelist, list); in blk_mq_handle_dev_resource()
2012 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_prep_dispatch_rq()
2013 int budget_token = -1; in blk_mq_prep_dispatch_rq()
2016 budget_token = blk_mq_get_dispatch_budget(rq->q); in blk_mq_prep_dispatch_rq()
2027 * rerun the hardware queue when a tag is freed. The in blk_mq_prep_dispatch_rq()
2028 * waitqueue takes care of that. If the queue is run in blk_mq_prep_dispatch_rq()
2030 * we'll re-run it below. in blk_mq_prep_dispatch_rq()
2038 blk_mq_put_dispatch_budget(rq->q, budget_token); in blk_mq_prep_dispatch_rq()
2061 * blk_mq_commit_rqs will notify driver using bd->last that there is no
2065 * 1) did not queue everything initially scheduled to queue
2066 * 2) the last attempt to queue a request failed
2071 if (hctx->queue->mq_ops->commit_rqs && queued) { in blk_mq_commit_rqs()
2072 trace_block_unplug(hctx->queue, queued, !from_schedule); in blk_mq_commit_rqs()
2073 hctx->queue->mq_ops->commit_rqs(hctx); in blk_mq_commit_rqs()
2084 struct request_queue *q = hctx->queue; in blk_mq_dispatch_rq_list()
2102 WARN_ON_ONCE(hctx != rq->mq_hctx); in blk_mq_dispatch_rq_list()
2107 list_del_init(&rq->queuelist); in blk_mq_dispatch_rq_list()
2117 nr_budgets--; in blk_mq_dispatch_rq_list()
2118 ret = q->mq_ops->queue_rq(hctx, &bd); in blk_mq_dispatch_rq_list()
2141 * Any items that need requeuing? Stuff them into hctx->dispatch, in blk_mq_dispatch_rq_list()
2142 * that is where we will continue on next queue run. in blk_mq_dispatch_rq_list()
2146 /* For non-shared tags, the RESTART check will suffice */ in blk_mq_dispatch_rq_list()
2148 ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) || in blk_mq_dispatch_rq_list()
2149 blk_mq_is_shared_tags(hctx->flags)); in blk_mq_dispatch_rq_list()
2154 spin_lock(&hctx->lock); in blk_mq_dispatch_rq_list()
2155 list_splice_tail_init(list, &hctx->dispatch); in blk_mq_dispatch_rq_list()
2156 spin_unlock(&hctx->lock); in blk_mq_dispatch_rq_list()
2159 * Order adding requests to hctx->dispatch and checking in blk_mq_dispatch_rq_list()
2162 * miss the new added requests to hctx->dispatch, meantime in blk_mq_dispatch_rq_list()
2170 * thread and hence that a queue rerun is needed. in blk_mq_dispatch_rq_list()
2174 * waitqueue is no longer active, ensure that we run the queue in blk_mq_dispatch_rq_list()
2178 * the hardware queue got stopped and restarted before requests in blk_mq_dispatch_rq_list()
2179 * were pushed back onto the dispatch list. Rerun the queue to in blk_mq_dispatch_rq_list()
2181 * - blk_mq_run_hw_queue() checks whether or not a queue has in blk_mq_dispatch_rq_list()
2182 * been stopped before rerunning a queue. in blk_mq_dispatch_rq_list()
2183 * - Some but not all block drivers stop a queue before in blk_mq_dispatch_rq_list()
2184 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq in blk_mq_dispatch_rq_list()
2185 * and dm-rq. in blk_mq_dispatch_rq_list()
2188 * bit is set, run queue after a delay to avoid IO stalls in blk_mq_dispatch_rq_list()
2189 * that could otherwise occur if the queue is idle. We'll do in blk_mq_dispatch_rq_list()
2197 (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) in blk_mq_dispatch_rq_list()
2212 int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); in blk_mq_first_mapped_cpu()
2215 cpu = cpumask_first(hctx->cpumask); in blk_mq_first_mapped_cpu()
2220 * ->next_cpu is always calculated from hctx->cpumask, so simply use
2225 return hctx->next_cpu >= nr_cpu_ids; in blk_mq_hctx_empty_cpumask()
2231 * For now we just round-robin here, switching for every
2237 int next_cpu = hctx->next_cpu; in blk_mq_hctx_next_cpu()
2240 if (hctx->queue->nr_hw_queues == 1 || blk_mq_hctx_empty_cpumask(hctx)) in blk_mq_hctx_next_cpu()
2243 if (--hctx->next_cpu_batch <= 0) { in blk_mq_hctx_next_cpu()
2245 next_cpu = cpumask_next_and(next_cpu, hctx->cpumask, in blk_mq_hctx_next_cpu()
2249 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; in blk_mq_hctx_next_cpu()
2263 * Make sure to re-select CPU next time once after CPUs in blk_mq_hctx_next_cpu()
2264 * in hctx->cpumask become online again. in blk_mq_hctx_next_cpu()
2266 hctx->next_cpu = next_cpu; in blk_mq_hctx_next_cpu()
2267 hctx->next_cpu_batch = 1; in blk_mq_hctx_next_cpu()
2271 hctx->next_cpu = next_cpu; in blk_mq_hctx_next_cpu()
2276 * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
2277 * @hctx: Pointer to the hardware queue to run.
2278 * @msecs: Milliseconds of delay to wait before running the queue.
2280 * Run a hardware queue asynchronously with a delay of @msecs.
2286 kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, in blk_mq_delay_run_hw_queue()
2296 * When queue is quiesced, we may be switching io scheduler, or in blk_mq_hw_queue_need_run()
2297 * updating nr_hw_queues, or other things, and we can't run queue in blk_mq_hw_queue_need_run()
2300 * And queue will be rerun in blk_mq_unquiesce_queue() if it is in blk_mq_hw_queue_need_run()
2303 __blk_mq_run_dispatch_ops(hctx->queue, false, in blk_mq_hw_queue_need_run()
2304 need_run = !blk_queue_quiesced(hctx->queue) && in blk_mq_hw_queue_need_run()
2310 * blk_mq_run_hw_queue - Start to run a hardware queue.
2311 * @hctx: Pointer to the hardware queue to run.
2312 * @async: If we want to run the queue asynchronously.
2314 * Check if the request queue is not in a quiesced state and if there are
2315 * pending requests to be sent. If this is true, run the queue to send requests
2323 * We can't run the queue inline with interrupts disabled. in blk_mq_run_hw_queue()
2327 might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING); in blk_mq_run_hw_queue()
2335 * if hw queue is quiesced locklessly above, we need the use in blk_mq_run_hw_queue()
2336 * ->queue_lock to make sure we see the up-to-date status to in blk_mq_run_hw_queue()
2337 * not miss rerunning the hw queue. in blk_mq_run_hw_queue()
2339 spin_lock_irqsave(&hctx->queue->queue_lock, flags); in blk_mq_run_hw_queue()
2341 spin_unlock_irqrestore(&hctx->queue->queue_lock, flags); in blk_mq_run_hw_queue()
2347 if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { in blk_mq_run_hw_queue()
2352 blk_mq_run_dispatch_ops(hctx->queue, in blk_mq_run_hw_queue()
2358 * Return prefered queue to dispatch from (if any) for non-mq aware IO
2371 struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT]; in blk_mq_get_sq_hctx()
2379 * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
2380 * @q: Pointer to the request queue to run.
2381 * @async: If we want to run the queue asynchronously.
2400 !list_empty_careful(&hctx->dispatch)) in blk_mq_run_hw_queues()
2407 * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
2408 * @q: Pointer to the request queue to run.
2409 * @msecs: Milliseconds of delay to wait before running the queues.
2425 * if another hctx is re-delaying the other's work in blk_mq_delay_run_hw_queues()
2428 if (delayed_work_pending(&hctx->run_work)) in blk_mq_delay_run_hw_queues()
2436 !list_empty_careful(&hctx->dispatch)) in blk_mq_delay_run_hw_queues()
2453 cancel_delayed_work(&hctx->run_work); in blk_mq_stop_hw_queue()
2455 set_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_stop_hw_queue()
2480 clear_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_start_hw_queue()
2482 blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING); in blk_mq_start_hw_queue()
2501 clear_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_start_stopped_hw_queue()
2519 (hctx->flags & BLK_MQ_F_BLOCKING)); in blk_mq_start_stopped_hw_queues()
2528 blk_mq_run_dispatch_ops(hctx->queue, in blk_mq_run_work_fn()
2533 * blk_mq_request_bypass_insert - Insert a request at dispatch list.
2542 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_request_bypass_insert()
2544 spin_lock(&hctx->lock); in blk_mq_request_bypass_insert()
2546 list_add(&rq->queuelist, &hctx->dispatch); in blk_mq_request_bypass_insert()
2548 list_add_tail(&rq->queuelist, &hctx->dispatch); in blk_mq_request_bypass_insert()
2549 spin_unlock(&hctx->lock); in blk_mq_request_bypass_insert()
2557 enum hctx_type type = hctx->type; in blk_mq_insert_requests()
2560 * Try to issue requests directly if the hw queue isn't busy to save an in blk_mq_insert_requests()
2561 * extra enqueue & dequeue to the sw queue. in blk_mq_insert_requests()
2563 if (!hctx->dispatch_busy && !run_queue_async) { in blk_mq_insert_requests()
2564 blk_mq_run_dispatch_ops(hctx->queue, in blk_mq_insert_requests()
2571 * preemption doesn't flush plug list, so it's possible ctx->cpu is in blk_mq_insert_requests()
2575 BUG_ON(rq->mq_ctx != ctx); in blk_mq_insert_requests()
2577 if (rq->cmd_flags & REQ_NOWAIT) in blk_mq_insert_requests()
2581 spin_lock(&ctx->lock); in blk_mq_insert_requests()
2582 list_splice_tail_init(list, &ctx->rq_lists[type]); in blk_mq_insert_requests()
2584 spin_unlock(&ctx->lock); in blk_mq_insert_requests()
2591 struct request_queue *q = rq->q; in blk_mq_insert_request()
2592 struct blk_mq_ctx *ctx = rq->mq_ctx; in blk_mq_insert_request()
2593 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_insert_request()
2597 * Passthrough request have to be added to hctx->dispatch in blk_mq_insert_request()
2600 * them, which gets them added to hctx->dispatch. in blk_mq_insert_request()
2603 * and it is added to the scheduler queue, there is no chance to in blk_mq_insert_request()
2604 * dispatch it given we prioritize requests in hctx->dispatch. in blk_mq_insert_request()
2609 * Firstly normal IO request is inserted to scheduler queue or in blk_mq_insert_request()
2610 * sw queue, meantime we add flush request to dispatch queue( in blk_mq_insert_request()
2611 * hctx->dispatch) directly and there is at most one in-flight in blk_mq_insert_request()
2612 * flush request for each hw queue, so it doesn't matter to add in blk_mq_insert_request()
2613 * flush request to tail or front of the dispatch queue. in blk_mq_insert_request()
2615 * Secondly in case of NCQ, flush request belongs to non-NCQ in blk_mq_insert_request()
2617 * in-flight normal IO request(NCQ command). When adding flush in blk_mq_insert_request()
2618 * rq to the front of hctx->dispatch, it is easier to introduce in blk_mq_insert_request()
2620 * compared with adding to the tail of dispatch queue, then in blk_mq_insert_request()
2624 * drive when adding flush rq to the front of hctx->dispatch. in blk_mq_insert_request()
2626 * Simply queue flush rq to the front of hctx->dispatch so that in blk_mq_insert_request()
2630 } else if (q->elevator) { in blk_mq_insert_request()
2633 WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG); in blk_mq_insert_request()
2635 list_add(&rq->queuelist, &list); in blk_mq_insert_request()
2636 q->elevator->type->ops.insert_requests(hctx, &list, flags); in blk_mq_insert_request()
2640 spin_lock(&ctx->lock); in blk_mq_insert_request()
2642 list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]); in blk_mq_insert_request()
2644 list_add_tail(&rq->queuelist, in blk_mq_insert_request()
2645 &ctx->rq_lists[hctx->type]); in blk_mq_insert_request()
2647 spin_unlock(&ctx->lock); in blk_mq_insert_request()
2656 if (bio->bi_opf & REQ_RAHEAD) in blk_mq_bio_to_request()
2657 rq->cmd_flags |= REQ_FAILFAST_MASK; in blk_mq_bio_to_request()
2659 rq->__sector = bio->bi_iter.bi_sector; in blk_mq_bio_to_request()
2662 rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, in blk_mq_bio_to_request()
2675 struct request_queue *q = rq->q; in __blk_mq_issue_directly()
2683 * For OK queue, we are done. For error, caller may kill it. in __blk_mq_issue_directly()
2687 ret = q->mq_ops->queue_rq(hctx, &bd); in __blk_mq_issue_directly()
2709 budget_token = blk_mq_get_dispatch_budget(rq->q); in blk_mq_get_budget_and_tag()
2714 blk_mq_put_dispatch_budget(rq->q, budget_token); in blk_mq_get_budget_and_tag()
2721 * blk_mq_try_issue_directly - Try to send a request directly to device driver.
2722 * @hctx: Pointer of the associated hardware queue.
2726 * request directly to device driver. Else, insert at hctx->dispatch queue, so
2728 * queue have higher priority.
2735 if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { in blk_mq_try_issue_directly()
2741 if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) { in blk_mq_try_issue_directly()
2743 blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT); in blk_mq_try_issue_directly()
2764 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_request_issue_directly()
2766 if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { in blk_mq_request_issue_directly()
2784 while ((rq = rq_list_pop(&plug->mq_list))) { in blk_mq_plug_issue_direct()
2785 bool last = rq_list_empty(&plug->mq_list); in blk_mq_plug_issue_direct()
2787 if (hctx != rq->mq_hctx) { in blk_mq_plug_issue_direct()
2792 hctx = rq->mq_hctx; in blk_mq_plug_issue_direct()
2821 q->mq_ops->queue_rqs(&plug->mq_list); in __blk_mq_flush_plug_list()
2834 struct request *rq = rq_list_pop(&plug->mq_list); in blk_mq_dispatch_plug_list()
2837 this_hctx = rq->mq_hctx; in blk_mq_dispatch_plug_list()
2838 this_ctx = rq->mq_ctx; in blk_mq_dispatch_plug_list()
2840 } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx || in blk_mq_dispatch_plug_list()
2845 list_add_tail(&rq->queuelist, &list); in blk_mq_dispatch_plug_list()
2847 } while (!rq_list_empty(&plug->mq_list)); in blk_mq_dispatch_plug_list()
2849 plug->mq_list = requeue_list; in blk_mq_dispatch_plug_list()
2850 trace_block_unplug(this_hctx->queue, depth, !from_sched); in blk_mq_dispatch_plug_list()
2852 percpu_ref_get(&this_hctx->queue->q_usage_counter); in blk_mq_dispatch_plug_list()
2855 spin_lock(&this_hctx->lock); in blk_mq_dispatch_plug_list()
2856 list_splice_tail_init(&list, &this_hctx->dispatch); in blk_mq_dispatch_plug_list()
2857 spin_unlock(&this_hctx->lock); in blk_mq_dispatch_plug_list()
2859 } else if (this_hctx->queue->elevator) { in blk_mq_dispatch_plug_list()
2860 this_hctx->queue->elevator->type->ops.insert_requests(this_hctx, in blk_mq_dispatch_plug_list()
2866 percpu_ref_put(&this_hctx->queue->q_usage_counter); in blk_mq_dispatch_plug_list()
2876 * plug->mq_list via a schedule() in the driver's queue_rq() callback. in blk_mq_flush_plug_list()
2881 if (plug->rq_count == 0) in blk_mq_flush_plug_list()
2883 depth = plug->rq_count; in blk_mq_flush_plug_list()
2884 plug->rq_count = 0; in blk_mq_flush_plug_list()
2886 if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { in blk_mq_flush_plug_list()
2889 rq = rq_list_peek(&plug->mq_list); in blk_mq_flush_plug_list()
2890 q = rq->q; in blk_mq_flush_plug_list()
2894 * Peek first request and see if we have a ->queue_rqs() hook. in blk_mq_flush_plug_list()
2897 * same queue, caller must ensure that's the case. in blk_mq_flush_plug_list()
2899 if (q->mq_ops->queue_rqs) { in blk_mq_flush_plug_list()
2902 if (rq_list_empty(&plug->mq_list)) in blk_mq_flush_plug_list()
2908 if (rq_list_empty(&plug->mq_list)) in blk_mq_flush_plug_list()
2914 } while (!rq_list_empty(&plug->mq_list)); in blk_mq_flush_plug_list()
2927 list_del_init(&rq->queuelist); in blk_mq_try_issue_list_directly()
2970 .cmd_flags = bio->bi_opf, in blk_mq_get_new_requests()
2977 data.nr_tags = plug->nr_ios; in blk_mq_get_new_requests()
2978 plug->nr_ios = 1; in blk_mq_get_new_requests()
2979 data.cached_rqs = &plug->cached_rqs; in blk_mq_get_new_requests()
2986 if (bio->bi_opf & REQ_NOWAIT) in blk_mq_get_new_requests()
3002 rq = rq_list_peek(&plug->cached_rqs); in blk_mq_peek_cached_request()
3003 if (!rq || rq->q != q) in blk_mq_peek_cached_request()
3005 if (type != rq->mq_hctx->type && in blk_mq_peek_cached_request()
3006 (type != HCTX_TYPE_READ || rq->mq_hctx->type != HCTX_TYPE_DEFAULT)) in blk_mq_peek_cached_request()
3008 if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) in blk_mq_peek_cached_request()
3016 if (rq_list_pop(&plug->cached_rqs) != rq) in blk_mq_use_cached_rq()
3020 * If any qos ->throttle() end up blocking, we will have flushed the in blk_mq_use_cached_rq()
3024 rq_qos_throttle(rq->q, bio); in blk_mq_use_cached_rq()
3027 rq->cmd_flags = bio->bi_opf; in blk_mq_use_cached_rq()
3028 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_use_cached_rq()
3033 unsigned int bs_mask = queue_logical_block_size(q) - 1; in bio_unaligned()
3036 if ((bio->bi_iter.bi_size & bs_mask) || in bio_unaligned()
3037 ((bio->bi_iter.bi_sector << SECTOR_SHIFT) & bs_mask)) in bio_unaligned()
3043 * blk_mq_submit_bio - Create and send a request to block device.
3049 * * We want to place request at plug queue for possible future merging
3050 * * There is an IO scheduler active at this queue
3052 * It will not queue the request if there is an error with the bio, or at the
3057 struct request_queue *q = bdev_get_queue(bio->bi_bdev); in blk_mq_submit_bio()
3058 struct blk_plug *plug = current->plug; in blk_mq_submit_bio()
3059 const int is_sync = op_is_sync(bio->bi_opf); in blk_mq_submit_bio()
3066 * If the plug has a cached request for this queue, try to use it. in blk_mq_submit_bio()
3068 rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf); in blk_mq_submit_bio()
3073 * on the queue usage counter, and is the only write BIO in-flight for in blk_mq_submit_bio()
3077 nr_segs = bio->__bi_nr_segments; in blk_mq_submit_bio()
3096 * check has to be done with queue usage counter held in blk_mq_submit_bio()
3103 bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); in blk_mq_submit_bio()
3133 bio->bi_status = ret; in blk_mq_submit_bio()
3142 if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq)) in blk_mq_submit_bio()
3150 hctx = rq->mq_hctx; in blk_mq_submit_bio()
3151 if ((rq->rq_flags & RQF_USE_SCHED) || in blk_mq_submit_bio()
3152 (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) { in blk_mq_submit_bio()
3162 * Don't drop the queue reference if we were trying to use a cached in blk_mq_submit_bio()
3171 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
3176 struct request_queue *q = rq->q; in blk_insert_cloned_request()
3185 * a non-read/write command (discard, write same,etc.) the in blk_insert_cloned_request()
3186 * low-level device driver will set the relevant queue limit to in blk_insert_cloned_request()
3187 * 0 to prevent blk-lib from issuing more of the offending in blk_insert_cloned_request()
3188 * operations. Commands queued prior to the queue limit being in blk_insert_cloned_request()
3201 * The queue settings related to segment counting may differ from the in blk_insert_cloned_request()
3202 * original queue. in blk_insert_cloned_request()
3204 rq->nr_phys_segments = blk_recalc_rq_segments(rq); in blk_insert_cloned_request()
3205 if (rq->nr_phys_segments > max_segments) { in blk_insert_cloned_request()
3207 __func__, rq->nr_phys_segments, max_segments); in blk_insert_cloned_request()
3211 if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) in blk_insert_cloned_request()
3234 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
3244 while ((bio = rq->bio) != NULL) { in blk_rq_unprep_clone()
3245 rq->bio = bio->bi_next; in blk_rq_unprep_clone()
3253 * blk_rq_prep_clone - Helper function to setup clone request
3280 struct bio *bio = bio_alloc_clone(rq->q->disk->part0, bio_src, in blk_rq_prep_clone()
3290 if (rq->bio) { in blk_rq_prep_clone()
3291 rq->biotail->bi_next = bio; in blk_rq_prep_clone()
3292 rq->biotail = bio; in blk_rq_prep_clone()
3294 rq->bio = rq->biotail = bio; in blk_rq_prep_clone()
3299 rq->__sector = blk_rq_pos(rq_src); in blk_rq_prep_clone()
3300 rq->__data_len = blk_rq_bytes(rq_src); in blk_rq_prep_clone()
3301 if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) { in blk_rq_prep_clone()
3302 rq->rq_flags |= RQF_SPECIAL_PAYLOAD; in blk_rq_prep_clone()
3303 rq->special_vec = rq_src->special_vec; in blk_rq_prep_clone()
3305 rq->nr_phys_segments = rq_src->nr_phys_segments; in blk_rq_prep_clone()
3307 if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) in blk_rq_prep_clone()
3315 return -ENOMEM; in blk_rq_prep_clone()
3326 if (rq->bio) { in blk_steal_bios()
3327 if (list->tail) in blk_steal_bios()
3328 list->tail->bi_next = rq->bio; in blk_steal_bios()
3330 list->head = rq->bio; in blk_steal_bios()
3331 list->tail = rq->biotail; in blk_steal_bios()
3333 rq->bio = NULL; in blk_steal_bios()
3334 rq->biotail = NULL; in blk_steal_bios()
3337 rq->__data_len = 0; in blk_steal_bios()
3360 list_for_each_entry(page, &tags->page_list, lru) { in blk_mq_clear_rq_mapping()
3362 unsigned long end = start + order_to_size(page->private); in blk_mq_clear_rq_mapping()
3365 for (i = 0; i < drv_tags->nr_tags; i++) { in blk_mq_clear_rq_mapping()
3366 struct request *rq = drv_tags->rqs[i]; in blk_mq_clear_rq_mapping()
3371 cmpxchg(&drv_tags->rqs[i], rq, NULL); in blk_mq_clear_rq_mapping()
3377 * Wait until all pending iteration is done. in blk_mq_clear_rq_mapping()
3380 * after the ->lock is released. in blk_mq_clear_rq_mapping()
3382 spin_lock_irqsave(&drv_tags->lock, flags); in blk_mq_clear_rq_mapping()
3383 spin_unlock_irqrestore(&drv_tags->lock, flags); in blk_mq_clear_rq_mapping()
3392 if (list_empty(&tags->page_list)) in blk_mq_free_rqs()
3395 if (blk_mq_is_shared_tags(set->flags)) in blk_mq_free_rqs()
3396 drv_tags = set->shared_tags; in blk_mq_free_rqs()
3398 drv_tags = set->tags[hctx_idx]; in blk_mq_free_rqs()
3400 if (tags->static_rqs && set->ops->exit_request) { in blk_mq_free_rqs()
3403 for (i = 0; i < tags->nr_tags; i++) { in blk_mq_free_rqs()
3404 struct request *rq = tags->static_rqs[i]; in blk_mq_free_rqs()
3408 set->ops->exit_request(set, rq, hctx_idx); in blk_mq_free_rqs()
3409 tags->static_rqs[i] = NULL; in blk_mq_free_rqs()
3415 while (!list_empty(&tags->page_list)) { in blk_mq_free_rqs()
3416 page = list_first_entry(&tags->page_list, struct page, lru); in blk_mq_free_rqs()
3417 list_del_init(&page->lru); in blk_mq_free_rqs()
3423 __free_pages(page, page->private); in blk_mq_free_rqs()
3429 kfree(tags->rqs); in blk_mq_free_rq_map()
3430 tags->rqs = NULL; in blk_mq_free_rq_map()
3431 kfree(tags->static_rqs); in blk_mq_free_rq_map()
3432 tags->static_rqs = NULL; in blk_mq_free_rq_map()
3442 for (i = 0; i < set->nr_maps; i++) { in hctx_idx_to_type()
3443 unsigned int start = set->map[i].queue_offset; in hctx_idx_to_type()
3444 unsigned int end = start + set->map[i].nr_queues; in hctx_idx_to_type()
3450 if (i >= set->nr_maps) in hctx_idx_to_type()
3461 return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx); in blk_mq_get_hctx_node()
3473 node = set->numa_node; in blk_mq_alloc_rq_map()
3476 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); in blk_mq_alloc_rq_map()
3480 tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *), in blk_mq_alloc_rq_map()
3483 if (!tags->rqs) in blk_mq_alloc_rq_map()
3486 tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *), in blk_mq_alloc_rq_map()
3489 if (!tags->static_rqs) in blk_mq_alloc_rq_map()
3495 kfree(tags->rqs); in blk_mq_alloc_rq_map()
3506 if (set->ops->init_request) { in blk_mq_init_request()
3507 ret = set->ops->init_request(set, rq, hctx_idx, node); in blk_mq_init_request()
3512 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_init_request()
3525 node = set->numa_node; in blk_mq_alloc_rqs()
3527 INIT_LIST_HEAD(&tags->page_list); in blk_mq_alloc_rqs()
3533 rq_size = round_up(sizeof(struct request) + set->cmd_size, in blk_mq_alloc_rqs()
3543 while (this_order && left < order_to_size(this_order - 1)) in blk_mq_alloc_rqs()
3544 this_order--; in blk_mq_alloc_rqs()
3552 if (!this_order--) in blk_mq_alloc_rqs()
3561 page->private = this_order; in blk_mq_alloc_rqs()
3562 list_add_tail(&page->lru, &tags->page_list); in blk_mq_alloc_rqs()
3567 * to additional allocations like via ops->init_request(). in blk_mq_alloc_rqs()
3571 to_do = min(entries_per_page, depth - i); in blk_mq_alloc_rqs()
3572 left -= to_do * rq_size; in blk_mq_alloc_rqs()
3576 tags->static_rqs[i] = rq; in blk_mq_alloc_rqs()
3578 tags->static_rqs[i] = NULL; in blk_mq_alloc_rqs()
3590 return -ENOMEM; in blk_mq_alloc_rqs()
3602 if (rq->mq_hctx != iter_data->hctx) in blk_mq_has_request()
3604 iter_data->has_rq = true; in blk_mq_has_request()
3610 struct blk_mq_tags *tags = hctx->sched_tags ? in blk_mq_hctx_has_requests()
3611 hctx->sched_tags : hctx->tags; in blk_mq_hctx_has_requests()
3623 enum hctx_type type = hctx->type; in blk_mq_hctx_has_online_cpu()
3627 * hctx->cpumask has to rule out isolated CPUs, but userspace still in blk_mq_hctx_has_online_cpu()
3628 * might submit IOs on these isolated CPUs, so use the queue map to in blk_mq_hctx_has_online_cpu()
3632 struct blk_mq_hw_ctx *h = blk_mq_map_queue_type(hctx->queue, in blk_mq_hctx_has_online_cpu()
3661 set_bit(BLK_MQ_S_INACTIVE, &hctx->state); in blk_mq_hctx_notify_offline()
3665 * Try to grab a reference to the queue and wait for any outstanding in blk_mq_hctx_notify_offline()
3666 * requests. If we could not grab a reference the queue has been in blk_mq_hctx_notify_offline()
3669 if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) { in blk_mq_hctx_notify_offline()
3672 percpu_ref_put(&hctx->queue->q_usage_counter); in blk_mq_hctx_notify_offline()
3681 * Isolated CPUs have been ruled out from hctx->cpumask, which is supposed
3688 struct blk_mq_hw_ctx *mapped_hctx = blk_mq_map_queue_type(hctx->queue, in blk_mq_cpu_mapped_to_hctx()
3689 hctx->type, cpu); in blk_mq_cpu_mapped_to_hctx()
3700 clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); in blk_mq_hctx_notify_online()
3706 * software queue to the hw queue dispatch list, and ensure that it
3720 ctx = __blk_mq_get_ctx(hctx->queue, cpu); in blk_mq_hctx_notify_dead()
3721 type = hctx->type; in blk_mq_hctx_notify_dead()
3723 spin_lock(&ctx->lock); in blk_mq_hctx_notify_dead()
3724 if (!list_empty(&ctx->rq_lists[type])) { in blk_mq_hctx_notify_dead()
3725 list_splice_init(&ctx->rq_lists[type], &tmp); in blk_mq_hctx_notify_dead()
3728 spin_unlock(&ctx->lock); in blk_mq_hctx_notify_dead()
3733 spin_lock(&hctx->lock); in blk_mq_hctx_notify_dead()
3734 list_splice_tail_init(&tmp, &hctx->dispatch); in blk_mq_hctx_notify_dead()
3735 spin_unlock(&hctx->lock); in blk_mq_hctx_notify_dead()
3745 if (!(hctx->flags & BLK_MQ_F_STACKING) && in __blk_mq_remove_cpuhp()
3746 !hlist_unhashed(&hctx->cpuhp_online)) { in __blk_mq_remove_cpuhp()
3748 &hctx->cpuhp_online); in __blk_mq_remove_cpuhp()
3749 INIT_HLIST_NODE(&hctx->cpuhp_online); in __blk_mq_remove_cpuhp()
3752 if (!hlist_unhashed(&hctx->cpuhp_dead)) { in __blk_mq_remove_cpuhp()
3754 &hctx->cpuhp_dead); in __blk_mq_remove_cpuhp()
3755 INIT_HLIST_NODE(&hctx->cpuhp_dead); in __blk_mq_remove_cpuhp()
3770 if (!(hctx->flags & BLK_MQ_F_STACKING) && in __blk_mq_add_cpuhp()
3771 hlist_unhashed(&hctx->cpuhp_online)) in __blk_mq_add_cpuhp()
3773 &hctx->cpuhp_online); in __blk_mq_add_cpuhp()
3775 if (hlist_unhashed(&hctx->cpuhp_dead)) in __blk_mq_add_cpuhp()
3777 &hctx->cpuhp_dead); in __blk_mq_add_cpuhp()
3799 spin_lock(&q->unused_hctx_lock); in blk_mq_remove_hw_queues_cpuhp()
3800 list_splice_init(&q->unused_hctx_list, &hctx_list); in blk_mq_remove_hw_queues_cpuhp()
3801 spin_unlock(&q->unused_hctx_lock); in blk_mq_remove_hw_queues_cpuhp()
3807 spin_lock(&q->unused_hctx_lock); in blk_mq_remove_hw_queues_cpuhp()
3808 list_splice(&hctx_list, &q->unused_hctx_list); in blk_mq_remove_hw_queues_cpuhp()
3809 spin_unlock(&q->unused_hctx_lock); in blk_mq_remove_hw_queues_cpuhp()
3829 * Before freeing hw queue, clearing the flush request reference in
3830 * tags->rqs[] for avoiding potential UAF.
3838 /* The hw queue may not be mapped yet */ in blk_mq_clear_flush_rq_mapping()
3845 cmpxchg(&tags->rqs[i], flush_rq, NULL); in blk_mq_clear_flush_rq_mapping()
3848 * Wait until all pending iteration is done. in blk_mq_clear_flush_rq_mapping()
3851 * after the ->lock is released. in blk_mq_clear_flush_rq_mapping()
3853 spin_lock_irqsave(&tags->lock, flags); in blk_mq_clear_flush_rq_mapping()
3854 spin_unlock_irqrestore(&tags->lock, flags); in blk_mq_clear_flush_rq_mapping()
3857 /* hctx->ctxs will be freed in queue's release handler */
3862 struct request *flush_rq = hctx->fq->flush_rq; in blk_mq_exit_hctx()
3868 blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], in blk_mq_exit_hctx()
3869 set->queue_depth, flush_rq); in blk_mq_exit_hctx()
3870 if (set->ops->exit_request) in blk_mq_exit_hctx()
3871 set->ops->exit_request(set, flush_rq, hctx_idx); in blk_mq_exit_hctx()
3873 if (set->ops->exit_hctx) in blk_mq_exit_hctx()
3874 set->ops->exit_hctx(hctx, hctx_idx); in blk_mq_exit_hctx()
3876 xa_erase(&q->hctx_table, hctx_idx); in blk_mq_exit_hctx()
3878 spin_lock(&q->unused_hctx_lock); in blk_mq_exit_hctx()
3879 list_add(&hctx->hctx_list, &q->unused_hctx_list); in blk_mq_exit_hctx()
3880 spin_unlock(&q->unused_hctx_lock); in blk_mq_exit_hctx()
3901 hctx->queue_num = hctx_idx; in blk_mq_init_hctx()
3903 hctx->tags = set->tags[hctx_idx]; in blk_mq_init_hctx()
3905 if (set->ops->init_hctx && in blk_mq_init_hctx()
3906 set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) in blk_mq_init_hctx()
3909 if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, in blk_mq_init_hctx()
3910 hctx->numa_node)) in blk_mq_init_hctx()
3913 if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL)) in blk_mq_init_hctx()
3919 if (set->ops->exit_request) in blk_mq_init_hctx()
3920 set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); in blk_mq_init_hctx()
3922 if (set->ops->exit_hctx) in blk_mq_init_hctx()
3923 set->ops->exit_hctx(hctx, hctx_idx); in blk_mq_init_hctx()
3925 return -1; in blk_mq_init_hctx()
3939 if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node)) in blk_mq_alloc_hctx()
3942 atomic_set(&hctx->nr_active, 0); in blk_mq_alloc_hctx()
3944 node = set->numa_node; in blk_mq_alloc_hctx()
3945 hctx->numa_node = node; in blk_mq_alloc_hctx()
3947 INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); in blk_mq_alloc_hctx()
3948 spin_lock_init(&hctx->lock); in blk_mq_alloc_hctx()
3949 INIT_LIST_HEAD(&hctx->dispatch); in blk_mq_alloc_hctx()
3950 INIT_HLIST_NODE(&hctx->cpuhp_dead); in blk_mq_alloc_hctx()
3951 INIT_HLIST_NODE(&hctx->cpuhp_online); in blk_mq_alloc_hctx()
3952 hctx->queue = q; in blk_mq_alloc_hctx()
3953 hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_alloc_hctx()
3955 INIT_LIST_HEAD(&hctx->hctx_list); in blk_mq_alloc_hctx()
3961 hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), in blk_mq_alloc_hctx()
3963 if (!hctx->ctxs) in blk_mq_alloc_hctx()
3966 if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), in blk_mq_alloc_hctx()
3969 hctx->nr_ctx = 0; in blk_mq_alloc_hctx()
3971 spin_lock_init(&hctx->dispatch_wait_lock); in blk_mq_alloc_hctx()
3972 init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); in blk_mq_alloc_hctx()
3973 INIT_LIST_HEAD(&hctx->dispatch_wait.entry); in blk_mq_alloc_hctx()
3975 hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp); in blk_mq_alloc_hctx()
3976 if (!hctx->fq) in blk_mq_alloc_hctx()
3984 sbitmap_free(&hctx->ctx_map); in blk_mq_alloc_hctx()
3986 kfree(hctx->ctxs); in blk_mq_alloc_hctx()
3988 free_cpumask_var(hctx->cpumask); in blk_mq_alloc_hctx()
3998 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_init_cpu_queues()
4002 struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); in blk_mq_init_cpu_queues()
4006 __ctx->cpu = i; in blk_mq_init_cpu_queues()
4007 spin_lock_init(&__ctx->lock); in blk_mq_init_cpu_queues()
4009 INIT_LIST_HEAD(&__ctx->rq_lists[k]); in blk_mq_init_cpu_queues()
4011 __ctx->queue = q; in blk_mq_init_cpu_queues()
4014 * Set local node, IFF we have more than one hw queue. If in blk_mq_init_cpu_queues()
4017 for (j = 0; j < set->nr_maps; j++) { in blk_mq_init_cpu_queues()
4019 if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) in blk_mq_init_cpu_queues()
4020 hctx->numa_node = cpu_to_node(i); in blk_mq_init_cpu_queues()
4032 tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags); in blk_mq_alloc_map_and_rqs()
4048 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_map_and_rqs()
4049 set->tags[hctx_idx] = set->shared_tags; in __blk_mq_alloc_map_and_rqs()
4054 set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx, in __blk_mq_alloc_map_and_rqs()
4055 set->queue_depth); in __blk_mq_alloc_map_and_rqs()
4057 return set->tags[hctx_idx]; in __blk_mq_alloc_map_and_rqs()
4073 if (!blk_mq_is_shared_tags(set->flags)) in __blk_mq_free_map_and_rqs()
4074 blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx); in __blk_mq_free_map_and_rqs()
4076 set->tags[hctx_idx] = NULL; in __blk_mq_free_map_and_rqs()
4085 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_map_swqueue()
4088 cpumask_clear(hctx->cpumask); in blk_mq_map_swqueue()
4089 hctx->nr_ctx = 0; in blk_mq_map_swqueue()
4090 hctx->dispatch_from = NULL; in blk_mq_map_swqueue()
4100 ctx = per_cpu_ptr(q->queue_ctx, i); in blk_mq_map_swqueue()
4101 for (j = 0; j < set->nr_maps; j++) { in blk_mq_map_swqueue()
4102 if (!set->map[j].nr_queues) { in blk_mq_map_swqueue()
4103 ctx->hctxs[j] = blk_mq_map_queue_type(q, in blk_mq_map_swqueue()
4107 hctx_idx = set->map[j].mq_map[i]; in blk_mq_map_swqueue()
4108 /* unmapped hw queue can be remapped after CPU topo changed */ in blk_mq_map_swqueue()
4109 if (!set->tags[hctx_idx] && in blk_mq_map_swqueue()
4117 set->map[j].mq_map[i] = 0; in blk_mq_map_swqueue()
4121 ctx->hctxs[j] = hctx; in blk_mq_map_swqueue()
4125 * devices share queues across queue maps. in blk_mq_map_swqueue()
4127 if (cpumask_test_cpu(i, hctx->cpumask)) in blk_mq_map_swqueue()
4130 cpumask_set_cpu(i, hctx->cpumask); in blk_mq_map_swqueue()
4131 hctx->type = j; in blk_mq_map_swqueue()
4132 ctx->index_hw[hctx->type] = hctx->nr_ctx; in blk_mq_map_swqueue()
4133 hctx->ctxs[hctx->nr_ctx++] = ctx; in blk_mq_map_swqueue()
4139 BUG_ON(!hctx->nr_ctx); in blk_mq_map_swqueue()
4143 ctx->hctxs[j] = blk_mq_map_queue_type(q, in blk_mq_map_swqueue()
4151 * If no software queues are mapped to this hardware queue, in blk_mq_map_swqueue()
4154 if (!hctx->nr_ctx) { in blk_mq_map_swqueue()
4155 /* Never unmap queue 0. We need it as a in blk_mq_map_swqueue()
4162 hctx->tags = NULL; in blk_mq_map_swqueue()
4166 hctx->tags = set->tags[i]; in blk_mq_map_swqueue()
4167 WARN_ON(!hctx->tags); in blk_mq_map_swqueue()
4174 sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx); in blk_mq_map_swqueue()
4177 * Rule out isolated CPUs from hctx->cpumask to avoid in blk_mq_map_swqueue()
4180 for_each_cpu(cpu, hctx->cpumask) { in blk_mq_map_swqueue()
4182 cpumask_clear_cpu(cpu, hctx->cpumask); in blk_mq_map_swqueue()
4188 hctx->next_cpu = blk_mq_first_mapped_cpu(hctx); in blk_mq_map_swqueue()
4189 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; in blk_mq_map_swqueue()
4195 * the queue isn't live yet.
4204 hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; in queue_set_hctx_shared()
4207 hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; in queue_set_hctx_shared()
4217 lockdep_assert_held(&set->tag_list_lock); in blk_mq_update_tag_set_shared()
4219 list_for_each_entry(q, &set->tag_list, tag_set_list) { in blk_mq_update_tag_set_shared()
4228 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_del_queue_tag_set()
4230 mutex_lock(&set->tag_list_lock); in blk_mq_del_queue_tag_set()
4231 list_del(&q->tag_set_list); in blk_mq_del_queue_tag_set()
4232 if (list_is_singular(&set->tag_list)) { in blk_mq_del_queue_tag_set()
4234 set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_del_queue_tag_set()
4235 /* update existing queue */ in blk_mq_del_queue_tag_set()
4238 mutex_unlock(&set->tag_list_lock); in blk_mq_del_queue_tag_set()
4239 INIT_LIST_HEAD(&q->tag_set_list); in blk_mq_del_queue_tag_set()
4245 mutex_lock(&set->tag_list_lock); in blk_mq_add_queue_tag_set()
4250 if (!list_empty(&set->tag_list) && in blk_mq_add_queue_tag_set()
4251 !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { in blk_mq_add_queue_tag_set()
4252 set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_add_queue_tag_set()
4253 /* update existing queue */ in blk_mq_add_queue_tag_set()
4256 if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) in blk_mq_add_queue_tag_set()
4258 list_add_tail(&q->tag_set_list, &set->tag_list); in blk_mq_add_queue_tag_set()
4260 mutex_unlock(&set->tag_list_lock); in blk_mq_add_queue_tag_set()
4263 /* All allocations will be freed in release handler of q->mq_kobj */
4271 return -ENOMEM; in blk_mq_alloc_ctxs()
4273 ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx); in blk_mq_alloc_ctxs()
4274 if (!ctxs->queue_ctx) in blk_mq_alloc_ctxs()
4278 struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu); in blk_mq_alloc_ctxs()
4279 ctx->ctxs = ctxs; in blk_mq_alloc_ctxs()
4282 q->mq_kobj = &ctxs->kobj; in blk_mq_alloc_ctxs()
4283 q->queue_ctx = ctxs->queue_ctx; in blk_mq_alloc_ctxs()
4288 return -ENOMEM; in blk_mq_alloc_ctxs()
4293 * request queue's release handler for avoiding use-after-free
4294 * and headache because q->mq_kobj shouldn't have been introduced,
4303 WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list)); in blk_mq_release()
4306 list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) { in blk_mq_release()
4307 list_del_init(&hctx->hctx_list); in blk_mq_release()
4308 kobject_put(&hctx->kobj); in blk_mq_release()
4311 xa_destroy(&q->hctx_table); in blk_mq_release()
4314 * release .mq_kobj and sw queue's kobject now because in blk_mq_release()
4315 * both share lifetime with request queue. in blk_mq_release()
4322 return set->nr_maps > HCTX_TYPE_POLL && in blk_mq_can_poll()
4323 set->map[HCTX_TYPE_POLL].nr_queues; in blk_mq_can_poll()
4335 lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; in blk_mq_alloc_queue()
4337 lim->features |= BLK_FEAT_POLL; in blk_mq_alloc_queue()
4339 q = blk_alloc_queue(lim, set->numa_node); in blk_mq_alloc_queue()
4342 q->queuedata = queuedata; in blk_mq_alloc_queue()
4353 * blk_mq_destroy_queue - shutdown a request queue
4354 * @q: request queue to shutdown
4356 * This shuts down a request queue allocated by blk_mq_alloc_queue(). All future
4357 * requests will be failed with -ENODEV. The caller is responsible for dropping
4390 disk = __alloc_disk_node(q, set->numa_node, lkclass); in __blk_mq_alloc_disk()
4394 return ERR_PTR(-ENOMEM); in __blk_mq_alloc_disk()
4396 set_bit(GD_OWNS_QUEUE, &disk->state); in __blk_mq_alloc_disk()
4420 return hlist_unhashed(&hctx->cpuhp_online) && in blk_mq_hctx_is_reusable()
4421 hlist_unhashed(&hctx->cpuhp_dead); in blk_mq_hctx_is_reusable()
4431 spin_lock(&q->unused_hctx_lock); in blk_mq_alloc_and_init_hctx()
4432 list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) { in blk_mq_alloc_and_init_hctx()
4433 if (tmp->numa_node == node && blk_mq_hctx_is_reusable(tmp)) { in blk_mq_alloc_and_init_hctx()
4439 list_del_init(&hctx->hctx_list); in blk_mq_alloc_and_init_hctx()
4440 spin_unlock(&q->unused_hctx_lock); in blk_mq_alloc_and_init_hctx()
4453 kobject_put(&hctx->kobj); in blk_mq_alloc_and_init_hctx()
4465 mutex_lock(&q->sysfs_lock); in blk_mq_realloc_hw_ctxs()
4466 for (i = 0; i < set->nr_hw_queues; i++) { in blk_mq_realloc_hw_ctxs()
4469 struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i); in blk_mq_realloc_hw_ctxs()
4472 old_node = old_hctx->numa_node; in blk_mq_realloc_hw_ctxs()
4487 * hctxs and keep the previous q->nr_hw_queues. in blk_mq_realloc_hw_ctxs()
4489 if (i != set->nr_hw_queues) { in blk_mq_realloc_hw_ctxs()
4490 j = q->nr_hw_queues; in blk_mq_realloc_hw_ctxs()
4493 q->nr_hw_queues = set->nr_hw_queues; in blk_mq_realloc_hw_ctxs()
4496 xa_for_each_start(&q->hctx_table, j, hctx, j) in blk_mq_realloc_hw_ctxs()
4498 mutex_unlock(&q->sysfs_lock); in blk_mq_realloc_hw_ctxs()
4510 /* mark the queue as mq asap */ in blk_mq_init_allocated_queue()
4511 q->mq_ops = set->ops; in blk_mq_init_allocated_queue()
4514 * ->tag_set has to be setup before initialize hctx, which cpuphp in blk_mq_init_allocated_queue()
4515 * handler needs it for checking queue mapping in blk_mq_init_allocated_queue()
4517 q->tag_set = set; in blk_mq_init_allocated_queue()
4522 /* init q->mq_kobj and sw queues' kobjects */ in blk_mq_init_allocated_queue()
4525 INIT_LIST_HEAD(&q->unused_hctx_list); in blk_mq_init_allocated_queue()
4526 spin_lock_init(&q->unused_hctx_lock); in blk_mq_init_allocated_queue()
4528 xa_init(&q->hctx_table); in blk_mq_init_allocated_queue()
4531 if (!q->nr_hw_queues) in blk_mq_init_allocated_queue()
4534 INIT_WORK(&q->timeout_work, blk_mq_timeout_work); in blk_mq_init_allocated_queue()
4535 blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); in blk_mq_init_allocated_queue()
4537 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; in blk_mq_init_allocated_queue()
4539 INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); in blk_mq_init_allocated_queue()
4540 INIT_LIST_HEAD(&q->flush_list); in blk_mq_init_allocated_queue()
4541 INIT_LIST_HEAD(&q->requeue_list); in blk_mq_init_allocated_queue()
4542 spin_lock_init(&q->requeue_lock); in blk_mq_init_allocated_queue()
4544 q->nr_requests = set->queue_depth; in blk_mq_init_allocated_queue()
4546 blk_mq_init_cpu_queues(q, set->nr_hw_queues); in blk_mq_init_allocated_queue()
4554 q->mq_ops = NULL; in blk_mq_init_allocated_queue()
4555 return -ENOMEM; in blk_mq_init_allocated_queue()
4562 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_exit_queue()
4564 /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */ in blk_mq_exit_queue()
4565 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); in blk_mq_exit_queue()
4566 /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */ in blk_mq_exit_queue()
4574 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_rq_maps()
4575 set->shared_tags = blk_mq_alloc_map_and_rqs(set, in __blk_mq_alloc_rq_maps()
4577 set->queue_depth); in __blk_mq_alloc_rq_maps()
4578 if (!set->shared_tags) in __blk_mq_alloc_rq_maps()
4579 return -ENOMEM; in __blk_mq_alloc_rq_maps()
4582 for (i = 0; i < set->nr_hw_queues; i++) { in __blk_mq_alloc_rq_maps()
4591 while (--i >= 0) in __blk_mq_alloc_rq_maps()
4594 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_rq_maps()
4595 blk_mq_free_map_and_rqs(set, set->shared_tags, in __blk_mq_alloc_rq_maps()
4599 return -ENOMEM; in __blk_mq_alloc_rq_maps()
4604 * may reduce the depth asked for, if memory is tight. set->queue_depth
4612 depth = set->queue_depth; in blk_mq_alloc_set_map_and_rqs()
4618 set->queue_depth >>= 1; in blk_mq_alloc_set_map_and_rqs()
4619 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) { in blk_mq_alloc_set_map_and_rqs()
4620 err = -ENOMEM; in blk_mq_alloc_set_map_and_rqs()
4623 } while (set->queue_depth); in blk_mq_alloc_set_map_and_rqs()
4625 if (!set->queue_depth || err) { in blk_mq_alloc_set_map_and_rqs()
4626 pr_err("blk-mq: failed to allocate request map\n"); in blk_mq_alloc_set_map_and_rqs()
4627 return -ENOMEM; in blk_mq_alloc_set_map_and_rqs()
4630 if (depth != set->queue_depth) in blk_mq_alloc_set_map_and_rqs()
4631 pr_info("blk-mq: reduced tag depth (%u -> %u)\n", in blk_mq_alloc_set_map_and_rqs()
4632 depth, set->queue_depth); in blk_mq_alloc_set_map_and_rqs()
4641 * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the in blk_mq_update_queue_map()
4644 if (set->nr_maps == 1) in blk_mq_update_queue_map()
4645 set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues; in blk_mq_update_queue_map()
4647 if (set->ops->map_queues) { in blk_mq_update_queue_map()
4654 * for (queue = 0; queue < set->nr_hw_queues; queue++) { in blk_mq_update_queue_map()
4655 * mask = get_cpu_mask(queue) in blk_mq_update_queue_map()
4657 * set->map[x].mq_map[cpu] = queue; in blk_mq_update_queue_map()
4662 * to any hw queue. in blk_mq_update_queue_map()
4664 for (i = 0; i < set->nr_maps; i++) in blk_mq_update_queue_map()
4665 blk_mq_clear_mq_map(&set->map[i]); in blk_mq_update_queue_map()
4667 set->ops->map_queues(set); in blk_mq_update_queue_map()
4669 BUG_ON(set->nr_maps > 1); in blk_mq_update_queue_map()
4670 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); in blk_mq_update_queue_map()
4680 if (set->nr_hw_queues >= new_nr_hw_queues) in blk_mq_realloc_tag_set_tags()
4684 GFP_KERNEL, set->numa_node); in blk_mq_realloc_tag_set_tags()
4686 return -ENOMEM; in blk_mq_realloc_tag_set_tags()
4688 if (set->tags) in blk_mq_realloc_tag_set_tags()
4689 memcpy(new_tags, set->tags, set->nr_hw_queues * in blk_mq_realloc_tag_set_tags()
4690 sizeof(*set->tags)); in blk_mq_realloc_tag_set_tags()
4691 kfree(set->tags); in blk_mq_realloc_tag_set_tags()
4692 set->tags = new_tags; in blk_mq_realloc_tag_set_tags()
4694 for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { in blk_mq_realloc_tag_set_tags()
4696 while (--i >= set->nr_hw_queues) in blk_mq_realloc_tag_set_tags()
4698 return -ENOMEM; in blk_mq_realloc_tag_set_tags()
4704 set->nr_hw_queues = new_nr_hw_queues; in blk_mq_realloc_tag_set_tags()
4712 * value will be stored in set->queue_depth.
4720 if (!set->nr_hw_queues) in blk_mq_alloc_tag_set()
4721 return -EINVAL; in blk_mq_alloc_tag_set()
4722 if (!set->queue_depth) in blk_mq_alloc_tag_set()
4723 return -EINVAL; in blk_mq_alloc_tag_set()
4724 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) in blk_mq_alloc_tag_set()
4725 return -EINVAL; in blk_mq_alloc_tag_set()
4727 if (!set->ops->queue_rq) in blk_mq_alloc_tag_set()
4728 return -EINVAL; in blk_mq_alloc_tag_set()
4730 if (!set->ops->get_budget ^ !set->ops->put_budget) in blk_mq_alloc_tag_set()
4731 return -EINVAL; in blk_mq_alloc_tag_set()
4733 if (set->queue_depth > BLK_MQ_MAX_DEPTH) { in blk_mq_alloc_tag_set()
4734 pr_info("blk-mq: reduced tag depth to %u\n", in blk_mq_alloc_tag_set()
4736 set->queue_depth = BLK_MQ_MAX_DEPTH; in blk_mq_alloc_tag_set()
4739 if (!set->nr_maps) in blk_mq_alloc_tag_set()
4740 set->nr_maps = 1; in blk_mq_alloc_tag_set()
4741 else if (set->nr_maps > HCTX_MAX_TYPES) in blk_mq_alloc_tag_set()
4742 return -EINVAL; in blk_mq_alloc_tag_set()
4750 set->queue_depth = min(64U, set->queue_depth); in blk_mq_alloc_tag_set()
4756 if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) in blk_mq_alloc_tag_set()
4757 set->nr_hw_queues = nr_cpu_ids; in blk_mq_alloc_tag_set()
4759 if (set->flags & BLK_MQ_F_BLOCKING) { in blk_mq_alloc_tag_set()
4760 set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL); in blk_mq_alloc_tag_set()
4761 if (!set->srcu) in blk_mq_alloc_tag_set()
4762 return -ENOMEM; in blk_mq_alloc_tag_set()
4763 ret = init_srcu_struct(set->srcu); in blk_mq_alloc_tag_set()
4768 ret = -ENOMEM; in blk_mq_alloc_tag_set()
4769 set->tags = kcalloc_node(set->nr_hw_queues, in blk_mq_alloc_tag_set()
4771 set->numa_node); in blk_mq_alloc_tag_set()
4772 if (!set->tags) in blk_mq_alloc_tag_set()
4775 for (i = 0; i < set->nr_maps; i++) { in blk_mq_alloc_tag_set()
4776 set->map[i].mq_map = kcalloc_node(nr_cpu_ids, in blk_mq_alloc_tag_set()
4777 sizeof(set->map[i].mq_map[0]), in blk_mq_alloc_tag_set()
4778 GFP_KERNEL, set->numa_node); in blk_mq_alloc_tag_set()
4779 if (!set->map[i].mq_map) in blk_mq_alloc_tag_set()
4781 set->map[i].nr_queues = set->nr_hw_queues; in blk_mq_alloc_tag_set()
4790 mutex_init(&set->tag_list_lock); in blk_mq_alloc_tag_set()
4791 INIT_LIST_HEAD(&set->tag_list); in blk_mq_alloc_tag_set()
4796 for (i = 0; i < set->nr_maps; i++) { in blk_mq_alloc_tag_set()
4797 kfree(set->map[i].mq_map); in blk_mq_alloc_tag_set()
4798 set->map[i].mq_map = NULL; in blk_mq_alloc_tag_set()
4800 kfree(set->tags); in blk_mq_alloc_tag_set()
4801 set->tags = NULL; in blk_mq_alloc_tag_set()
4803 if (set->flags & BLK_MQ_F_BLOCKING) in blk_mq_alloc_tag_set()
4804 cleanup_srcu_struct(set->srcu); in blk_mq_alloc_tag_set()
4806 if (set->flags & BLK_MQ_F_BLOCKING) in blk_mq_alloc_tag_set()
4807 kfree(set->srcu); in blk_mq_alloc_tag_set()
4812 /* allocate and initialize a tagset for a simple single-queue device */
4818 set->ops = ops; in blk_mq_alloc_sq_tag_set()
4819 set->nr_hw_queues = 1; in blk_mq_alloc_sq_tag_set()
4820 set->nr_maps = 1; in blk_mq_alloc_sq_tag_set()
4821 set->queue_depth = queue_depth; in blk_mq_alloc_sq_tag_set()
4822 set->numa_node = NUMA_NO_NODE; in blk_mq_alloc_sq_tag_set()
4823 set->flags = set_flags; in blk_mq_alloc_sq_tag_set()
4832 for (i = 0; i < set->nr_hw_queues; i++) in blk_mq_free_tag_set()
4835 if (blk_mq_is_shared_tags(set->flags)) { in blk_mq_free_tag_set()
4836 blk_mq_free_map_and_rqs(set, set->shared_tags, in blk_mq_free_tag_set()
4840 for (j = 0; j < set->nr_maps; j++) { in blk_mq_free_tag_set()
4841 kfree(set->map[j].mq_map); in blk_mq_free_tag_set()
4842 set->map[j].mq_map = NULL; in blk_mq_free_tag_set()
4845 kfree(set->tags); in blk_mq_free_tag_set()
4846 set->tags = NULL; in blk_mq_free_tag_set()
4847 if (set->flags & BLK_MQ_F_BLOCKING) { in blk_mq_free_tag_set()
4848 cleanup_srcu_struct(set->srcu); in blk_mq_free_tag_set()
4849 kfree(set->srcu); in blk_mq_free_tag_set()
4856 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_update_nr_requests()
4861 if (WARN_ON_ONCE(!q->mq_freeze_depth)) in blk_mq_update_nr_requests()
4862 return -EINVAL; in blk_mq_update_nr_requests()
4865 return -EINVAL; in blk_mq_update_nr_requests()
4867 if (q->nr_requests == nr) in blk_mq_update_nr_requests()
4874 if (!hctx->tags) in blk_mq_update_nr_requests()
4878 * queue depth. This is similar to what the old code would do. in blk_mq_update_nr_requests()
4880 if (hctx->sched_tags) { in blk_mq_update_nr_requests()
4881 ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, in blk_mq_update_nr_requests()
4884 ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, in blk_mq_update_nr_requests()
4889 if (q->elevator && q->elevator->type->ops.depth_updated) in blk_mq_update_nr_requests()
4890 q->elevator->type->ops.depth_updated(hctx); in blk_mq_update_nr_requests()
4893 q->nr_requests = nr; in blk_mq_update_nr_requests()
4894 if (blk_mq_is_shared_tags(set->flags)) { in blk_mq_update_nr_requests()
4895 if (q->elevator) in blk_mq_update_nr_requests()
4931 /* q->elevator needs protection from ->sysfs_lock */ in blk_mq_elv_switch_none()
4932 mutex_lock(&q->sysfs_lock); in blk_mq_elv_switch_none()
4935 if (!q->elevator) { in blk_mq_elv_switch_none()
4940 INIT_LIST_HEAD(&qe->node); in blk_mq_elv_switch_none()
4941 qe->q = q; in blk_mq_elv_switch_none()
4942 qe->type = q->elevator->type; in blk_mq_elv_switch_none()
4944 __elevator_get(qe->type); in blk_mq_elv_switch_none()
4945 list_add(&qe->node, head); in blk_mq_elv_switch_none()
4948 mutex_unlock(&q->sysfs_lock); in blk_mq_elv_switch_none()
4959 if (qe->q == q) in blk_lookup_qe_pair()
4974 t = qe->type; in blk_mq_elv_switch_back()
4975 list_del(&qe->node); in blk_mq_elv_switch_back()
4978 mutex_lock(&q->sysfs_lock); in blk_mq_elv_switch_back()
4982 mutex_unlock(&q->sysfs_lock); in blk_mq_elv_switch_back()
4990 int prev_nr_hw_queues = set->nr_hw_queues; in __blk_mq_update_nr_hw_queues()
4993 lockdep_assert_held(&set->tag_list_lock); in __blk_mq_update_nr_hw_queues()
4995 if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids) in __blk_mq_update_nr_hw_queues()
4999 if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) in __blk_mq_update_nr_hw_queues()
5002 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
5007 * updating the new sw to hw queue mappings. in __blk_mq_update_nr_hw_queues()
5009 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
5013 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
5023 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
5028 if (q->nr_hw_queues != set->nr_hw_queues) { in __blk_mq_update_nr_hw_queues()
5033 for (; i < set->nr_hw_queues; i++) in __blk_mq_update_nr_hw_queues()
5036 set->nr_hw_queues = prev_nr_hw_queues; in __blk_mq_update_nr_hw_queues()
5050 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
5056 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
5059 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
5063 for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) in __blk_mq_update_nr_hw_queues()
5069 mutex_lock(&set->tag_list_lock); in blk_mq_update_nr_hw_queues()
5071 mutex_unlock(&set->tag_list_lock); in blk_mq_update_nr_hw_queues()
5082 ret = q->mq_ops->poll(hctx, iob); in blk_hctx_poll()
5105 struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie); in blk_mq_poll()
5113 struct request_queue *q = rq->q; in blk_rq_poll()
5118 if (!percpu_ref_tryget(&q->q_usage_counter)) in blk_rq_poll()
5121 ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags); in blk_rq_poll()
5130 return rq->mq_ctx->cpu; in blk_mq_rq_cpu()
5139 cancel_delayed_work_sync(&q->requeue_work); in blk_mq_cancel_work_sync()
5142 cancel_delayed_work_sync(&hctx->run_work); in blk_mq_cancel_work_sync()