blk-mq.c - OpenGrok cross reference for /linux/block/blk-mq.c

Lines Matching +full:wait +full:- +full:queue
1 // SPDX-License-Identifier: GPL-2.0
5  * Copyright (C) 2013-2014 Jens Axboe
6  * Copyright (C) 2013-2014 Christoph Hellwig
10 #include <linux/backing-dev.h>
13 #include <linux/blk-integrity.h>
29 #include <linux/blk-crypto.h>
35 #include <linux/t10-pi.h>
37 #include "blk-mq.h"
38 #include "blk-mq-debugfs.h"
39 #include "blk-pm.h"
40 #include "blk-stat.h"
41 #include "blk-mq-sched.h"
42 #include "blk-rq-qos.h"
58  * have pending work in this hardware queue.
62 	return !list_empty_careful(&hctx->dispatch) ||  in blk_mq_hctx_has_pending()
63 		sbitmap_any_bit_set(&hctx->ctx_map) ||  in blk_mq_hctx_has_pending()
68  * Mark this ctx as having pending work in this hardware queue
73 	const int bit = ctx->index_hw[hctx->type];  in blk_mq_hctx_mark_pending()
75 	if (!sbitmap_test_bit(&hctx->ctx_map, bit))  in blk_mq_hctx_mark_pending()
76 		sbitmap_set_bit(&hctx->ctx_map, bit);  in blk_mq_hctx_mark_pending()
82 	const int bit = ctx->index_hw[hctx->type];  in blk_mq_hctx_clear_pending()
84 	sbitmap_clear_bit(&hctx->ctx_map, bit);  in blk_mq_hctx_clear_pending()
96 	if (rq->rq_flags & RQF_IO_STAT &&  in blk_mq_check_inflight()
97 	    (!bdev_is_partition(mi->part) || rq->part == mi->part) &&  in blk_mq_check_inflight()
99 		mi->inflight[rq_data_dir(rq)]++;  in blk_mq_check_inflight()
131 	if (!q->mq_freeze_depth) {  in blk_freeze_set_owner()
132 		q->mq_freeze_owner = owner;  in blk_freeze_set_owner()
133 		q->mq_freeze_owner_depth = 1;  in blk_freeze_set_owner()
137 	if (owner == q->mq_freeze_owner)  in blk_freeze_set_owner()
138 		q->mq_freeze_owner_depth += 1;  in blk_freeze_set_owner()
145 	if (!q->mq_freeze_owner)  in blk_unfreeze_check_owner()
147 	if (q->mq_freeze_owner != current)  in blk_unfreeze_check_owner()
149 	if (--q->mq_freeze_owner_depth == 0) {  in blk_unfreeze_check_owner()
150 		q->mq_freeze_owner = NULL;  in blk_unfreeze_check_owner()
175 	mutex_lock(&q->mq_freeze_lock);  in __blk_freeze_queue_start()
177 	if (++q->mq_freeze_depth == 1) {  in __blk_freeze_queue_start()
178 		percpu_ref_kill(&q->q_usage_counter);  in __blk_freeze_queue_start()
179 		mutex_unlock(&q->mq_freeze_lock);  in __blk_freeze_queue_start()
183 		mutex_unlock(&q->mq_freeze_lock);  in __blk_freeze_queue_start()
198 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));  in blk_mq_freeze_queue_wait()
205 	return wait_event_timeout(q->mq_freeze_wq,  in blk_mq_freeze_queue_wait_timeout()
206 					percpu_ref_is_zero(&q->q_usage_counter),  in blk_mq_freeze_queue_wait_timeout()
222 	mutex_lock(&q->mq_freeze_lock);  in __blk_mq_unfreeze_queue()
224 		q->q_usage_counter.data->force_atomic = true;  in __blk_mq_unfreeze_queue()
225 	q->mq_freeze_depth--;  in __blk_mq_unfreeze_queue()
226 	WARN_ON_ONCE(q->mq_freeze_depth < 0);  in __blk_mq_unfreeze_queue()
227 	if (!q->mq_freeze_depth) {  in __blk_mq_unfreeze_queue()
228 		percpu_ref_resurrect(&q->q_usage_counter);  in __blk_mq_unfreeze_queue()
229 		wake_up_all(&q->mq_freeze_wq);  in __blk_mq_unfreeze_queue()
232 	mutex_unlock(&q->mq_freeze_lock);  in __blk_mq_unfreeze_queue()
247  * Unlike blk_freeze_queue_start, the queue doesn't need to be unfrozen
272 	spin_lock_irqsave(&q->queue_lock, flags);  in blk_mq_quiesce_queue_nowait()
273 	if (!q->quiesce_depth++)  in blk_mq_quiesce_queue_nowait()
275 	spin_unlock_irqrestore(&q->queue_lock, flags);  in blk_mq_quiesce_queue_nowait()
280  * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
281  * @set: tag_set to wait on
290 	if (set->flags & BLK_MQ_F_BLOCKING)  in blk_mq_wait_quiesce_done()
291 		synchronize_srcu(set->srcu);  in blk_mq_wait_quiesce_done()
298  * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
299  * @q: request queue.
303  * sure no dispatch can happen until the queue is unquiesced via
309 	/* nothing to wait for non-mq queues */  in blk_mq_quiesce_queue()
311 		blk_mq_wait_quiesce_done(q->tag_set);  in blk_mq_quiesce_queue()
316  * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
317  * @q: request queue.
319  * This function recovers queue into the state before quiescing
327 	spin_lock_irqsave(&q->queue_lock, flags);  in blk_mq_unquiesce_queue()
328 	if (WARN_ON_ONCE(q->quiesce_depth <= 0)) {  in blk_mq_unquiesce_queue()
330 	} else if (!--q->quiesce_depth) {  in blk_mq_unquiesce_queue()
334 	spin_unlock_irqrestore(&q->queue_lock, flags);  in blk_mq_unquiesce_queue()
346 	mutex_lock(&set->tag_list_lock);  in blk_mq_quiesce_tagset()
347 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in blk_mq_quiesce_tagset()
351 	mutex_unlock(&set->tag_list_lock);  in blk_mq_quiesce_tagset()
361 	mutex_lock(&set->tag_list_lock);  in blk_mq_unquiesce_tagset()
362 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in blk_mq_unquiesce_tagset()
366 	mutex_unlock(&set->tag_list_lock);  in blk_mq_unquiesce_tagset()
377 			blk_mq_tag_wakeup_all(hctx->tags, true);  in blk_mq_wake_waiters()
384 	INIT_LIST_HEAD(&rq->queuelist);  in blk_rq_init()
385 	rq->q = q;  in blk_rq_init()
386 	rq->__sector = (sector_t) -1;  in blk_rq_init()
387 	INIT_HLIST_NODE(&rq->hash);  in blk_rq_init()
388 	RB_CLEAR_NODE(&rq->rb_node);  in blk_rq_init()
389 	rq->tag = BLK_MQ_NO_TAG;  in blk_rq_init()
390 	rq->internal_tag = BLK_MQ_NO_TAG;  in blk_rq_init()
391 	rq->start_time_ns = blk_time_get_ns();  in blk_rq_init()
400 	if (blk_queue_rq_alloc_time(rq->q))  in blk_mq_rq_time_init()
401 		rq->alloc_time_ns = alloc_time_ns;  in blk_mq_rq_time_init()
403 		rq->alloc_time_ns = 0;  in blk_mq_rq_time_init()
410 	struct blk_mq_ctx *ctx = data->ctx;  in blk_mq_rq_ctx_init()
411 	struct blk_mq_hw_ctx *hctx = data->hctx;  in blk_mq_rq_ctx_init()
412 	struct request_queue *q = data->q;  in blk_mq_rq_ctx_init()
413 	struct request *rq = tags->static_rqs[tag];  in blk_mq_rq_ctx_init()
415 	rq->q = q;  in blk_mq_rq_ctx_init()
416 	rq->mq_ctx = ctx;  in blk_mq_rq_ctx_init()
417 	rq->mq_hctx = hctx;  in blk_mq_rq_ctx_init()
418 	rq->cmd_flags = data->cmd_flags;  in blk_mq_rq_ctx_init()
420 	if (data->flags & BLK_MQ_REQ_PM)  in blk_mq_rq_ctx_init()
421 		data->rq_flags |= RQF_PM;  in blk_mq_rq_ctx_init()
422 	rq->rq_flags = data->rq_flags;  in blk_mq_rq_ctx_init()
424 	if (data->rq_flags & RQF_SCHED_TAGS) {  in blk_mq_rq_ctx_init()
425 		rq->tag = BLK_MQ_NO_TAG;  in blk_mq_rq_ctx_init()
426 		rq->internal_tag = tag;  in blk_mq_rq_ctx_init()
428 		rq->tag = tag;  in blk_mq_rq_ctx_init()
429 		rq->internal_tag = BLK_MQ_NO_TAG;  in blk_mq_rq_ctx_init()
431 	rq->timeout = 0;  in blk_mq_rq_ctx_init()
433 	rq->part = NULL;  in blk_mq_rq_ctx_init()
434 	rq->io_start_time_ns = 0;  in blk_mq_rq_ctx_init()
435 	rq->stats_sectors = 0;  in blk_mq_rq_ctx_init()
436 	rq->nr_phys_segments = 0;  in blk_mq_rq_ctx_init()
437 	rq->nr_integrity_segments = 0;  in blk_mq_rq_ctx_init()
438 	rq->end_io = NULL;  in blk_mq_rq_ctx_init()
439 	rq->end_io_data = NULL;  in blk_mq_rq_ctx_init()
442 	INIT_LIST_HEAD(&rq->queuelist);  in blk_mq_rq_ctx_init()
444 	WRITE_ONCE(rq->deadline, 0);  in blk_mq_rq_ctx_init()
447 	if (rq->rq_flags & RQF_USE_SCHED) {  in blk_mq_rq_ctx_init()
448 		struct elevator_queue *e = data->q->elevator;  in blk_mq_rq_ctx_init()
450 		INIT_HLIST_NODE(&rq->hash);  in blk_mq_rq_ctx_init()
451 		RB_CLEAR_NODE(&rq->rb_node);  in blk_mq_rq_ctx_init()
453 		if (e->type->ops.prepare_request)  in blk_mq_rq_ctx_init()
454 			e->type->ops.prepare_request(rq);  in blk_mq_rq_ctx_init()
469 	tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);  in __blk_mq_alloc_requests_batch()
478 		prefetch(tags->static_rqs[tag]);  in __blk_mq_alloc_requests_batch()
481 		rq_list_add_head(data->cached_rqs, rq);  in __blk_mq_alloc_requests_batch()
484 	if (!(data->rq_flags & RQF_SCHED_TAGS))  in __blk_mq_alloc_requests_batch()
485 		blk_mq_add_active_requests(data->hctx, nr);  in __blk_mq_alloc_requests_batch()
487 	percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);  in __blk_mq_alloc_requests_batch()
488 	data->nr_tags -= nr;  in __blk_mq_alloc_requests_batch()
490 	return rq_list_pop(data->cached_rqs);  in __blk_mq_alloc_requests_batch()
495 	struct request_queue *q = data->q;  in __blk_mq_alloc_requests()
504 	if (data->cmd_flags & REQ_NOWAIT)  in __blk_mq_alloc_requests()
505 		data->flags |= BLK_MQ_REQ_NOWAIT;  in __blk_mq_alloc_requests()
508 	data->ctx = blk_mq_get_ctx(q);  in __blk_mq_alloc_requests()
509 	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);  in __blk_mq_alloc_requests()
511 	if (q->elevator) {  in __blk_mq_alloc_requests()
514 		 * enabled for the queue.  in __blk_mq_alloc_requests()
516 		data->rq_flags |= RQF_SCHED_TAGS;  in __blk_mq_alloc_requests()
522 		if ((data->cmd_flags & REQ_OP_MASK) != REQ_OP_FLUSH &&  in __blk_mq_alloc_requests()
523 		    !blk_op_is_passthrough(data->cmd_flags)) {  in __blk_mq_alloc_requests()
524 			struct elevator_mq_ops *ops = &q->elevator->type->ops;  in __blk_mq_alloc_requests()
526 			WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);  in __blk_mq_alloc_requests()
528 			data->rq_flags |= RQF_USE_SCHED;  in __blk_mq_alloc_requests()
529 			if (ops->limit_depth)  in __blk_mq_alloc_requests()
530 				ops->limit_depth(data->cmd_flags, data);  in __blk_mq_alloc_requests()
533 		blk_mq_tag_busy(data->hctx);  in __blk_mq_alloc_requests()
536 	if (data->flags & BLK_MQ_REQ_RESERVED)  in __blk_mq_alloc_requests()
537 		data->rq_flags |= RQF_RESV;  in __blk_mq_alloc_requests()
542 	if (data->nr_tags > 1) {  in __blk_mq_alloc_requests()
548 		data->nr_tags = 1;  in __blk_mq_alloc_requests()
558 		if (data->flags & BLK_MQ_REQ_NOWAIT)  in __blk_mq_alloc_requests()
570 	if (!(data->rq_flags & RQF_SCHED_TAGS))  in __blk_mq_alloc_requests()
571 		blk_mq_inc_active_requests(data->hctx);  in __blk_mq_alloc_requests()
586 		.nr_tags	= plug->nr_ios,  in blk_mq_rq_cache_fill()
587 		.cached_rqs	= &plug->cached_rqs,  in blk_mq_rq_cache_fill()
594 	plug->nr_ios = 1;  in blk_mq_rq_cache_fill()
606 	struct blk_plug *plug = current->plug;  in blk_mq_alloc_cached_request()
612 	if (rq_list_empty(&plug->cached_rqs)) {  in blk_mq_alloc_cached_request()
613 		if (plug->nr_ios == 1)  in blk_mq_alloc_cached_request()
619 		rq = rq_list_peek(&plug->cached_rqs);  in blk_mq_alloc_cached_request()
620 		if (!rq || rq->q != q)  in blk_mq_alloc_cached_request()
623 		if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type)  in blk_mq_alloc_cached_request()
625 		if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))  in blk_mq_alloc_cached_request()
628 		rq_list_pop(&plug->cached_rqs);  in blk_mq_alloc_cached_request()
632 	rq->cmd_flags = opf;  in blk_mq_alloc_cached_request()
633 	INIT_LIST_HEAD(&rq->queuelist);  in blk_mq_alloc_cached_request()
660 	rq->__data_len = 0;  in blk_mq_alloc_request()
661 	rq->__sector = (sector_t) -1;  in blk_mq_alloc_request()
662 	rq->bio = rq->biotail = NULL;  in blk_mq_alloc_request()
666 	return ERR_PTR(-EWOULDBLOCK);  in blk_mq_alloc_request()
693 	 * a specific queue.  in blk_mq_alloc_request_hctx()
697 		return ERR_PTR(-EINVAL);  in blk_mq_alloc_request_hctx()
699 	if (hctx_idx >= q->nr_hw_queues)  in blk_mq_alloc_request_hctx()
700 		return ERR_PTR(-EIO);  in blk_mq_alloc_request_hctx()
708 	 * If not tell the caller that it should skip this queue.  in blk_mq_alloc_request_hctx()
710 	ret = -EXDEV;  in blk_mq_alloc_request_hctx()
711 	data.hctx = xa_load(&q->hctx_table, hctx_idx);  in blk_mq_alloc_request_hctx()
714 	cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);  in blk_mq_alloc_request_hctx()
719 	if (q->elevator)  in blk_mq_alloc_request_hctx()
727 	ret = -EWOULDBLOCK;  in blk_mq_alloc_request_hctx()
735 	rq->__data_len = 0;  in blk_mq_alloc_request_hctx()
736 	rq->__sector = (sector_t) -1;  in blk_mq_alloc_request_hctx()
737 	rq->bio = rq->biotail = NULL;  in blk_mq_alloc_request_hctx()
748 	struct request_queue *q = rq->q;  in blk_mq_finish_request()
752 	if (rq->rq_flags & RQF_USE_SCHED) {  in blk_mq_finish_request()
753 		q->elevator->type->ops.finish_request(rq);  in blk_mq_finish_request()
759 		rq->rq_flags &= ~RQF_USE_SCHED;  in blk_mq_finish_request()
765 	struct request_queue *q = rq->q;  in __blk_mq_free_request()
766 	struct blk_mq_ctx *ctx = rq->mq_ctx;  in __blk_mq_free_request()
767 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in __blk_mq_free_request()
768 	const int sched_tag = rq->internal_tag;  in __blk_mq_free_request()
772 	rq->mq_hctx = NULL;  in __blk_mq_free_request()
774 	if (rq->tag != BLK_MQ_NO_TAG) {  in __blk_mq_free_request()
776 		blk_mq_put_tag(hctx->tags, ctx, rq->tag);  in __blk_mq_free_request()
779 		blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);  in __blk_mq_free_request()
786 	struct request_queue *q = rq->q;  in blk_mq_free_request()
791 		laptop_io_completion(q->disk->bdi);  in blk_mq_free_request()
795 	WRITE_ONCE(rq->state, MQ_RQ_IDLE);  in blk_mq_free_request()
805 	while ((rq = rq_list_pop(&plug->cached_rqs)) != NULL)  in blk_mq_free_plug_rqs()
812 		rq->q->disk ? rq->q->disk->disk_name : "?",  in blk_dump_rq_flags()
813 		(__force unsigned long long) rq->cmd_flags);  in blk_dump_rq_flags()
819 	       rq->bio, rq->biotail, blk_rq_bytes(rq));  in blk_dump_rq_flags()
825 	if (req->rq_flags & RQF_IO_STAT) {  in blk_account_io_completion()
829 		part_stat_add(req->part, sectors[sgrp], bytes >> 9);  in blk_account_io_completion()
840 		req->q->disk ? req->q->disk->disk_name : "?",  in blk_print_req_error()
843 		(__force u32)(req->cmd_flags & ~REQ_OP_MASK),  in blk_print_req_error()
844 		req->nr_phys_segments,  in blk_print_req_error()
854 	const bool is_flush = (req->rq_flags & RQF_FLUSH_SEQ) != 0;  in blk_complete_request()
856 	struct bio *bio = req->bio;  in blk_complete_request()
875 		struct bio *next = bio->bi_next;  in blk_complete_request()
892 	if (!req->end_io) {  in blk_complete_request()
893 		req->bio = NULL;  in blk_complete_request()
894 		req->__data_len = 0;  in blk_complete_request()
899  * blk_update_request - Complete multiple bytes without completing the request
917  *     %false - this request doesn't have any more data
918  *     %true  - this request has more data
923 	bool is_flush = req->rq_flags & RQF_FLUSH_SEQ;  in blk_update_request()
924 	bool quiet = req->rq_flags & RQF_QUIET;  in blk_update_request()
929 	if (!req->bio)  in blk_update_request()
944 	    !test_bit(GD_DEAD, &req->q->disk->state)) {  in blk_update_request()
952 	while (req->bio) {  in blk_update_request()
953 		struct bio *bio = req->bio;  in blk_update_request()
954 		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);  in blk_update_request()
957 			bio->bi_status = error;  in blk_update_request()
959 		if (bio_bytes == bio->bi_iter.bi_size) {  in blk_update_request()
960 			req->bio = bio->bi_next;  in blk_update_request()
967 			bio->bi_status = BLK_STS_IOERR;  in blk_update_request()
978 		if (!bio->bi_iter.bi_size) {  in blk_update_request()
985 		nr_bytes -= bio_bytes;  in blk_update_request()
994 	if (!req->bio) {  in blk_update_request()
1000 		req->__data_len = 0;  in blk_update_request()
1004 	req->__data_len -= total_bytes;  in blk_update_request()
1008 		req->__sector += total_bytes >> 9;  in blk_update_request()
1011 	if (req->rq_flags & RQF_MIXED_MERGE) {  in blk_update_request()
1012 		req->cmd_flags &= ~REQ_FAILFAST_MASK;  in blk_update_request()
1013 		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;  in blk_update_request()
1016 	if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {  in blk_update_request()
1023 			req->__data_len = blk_rq_cur_bytes(req);  in blk_update_request()
1027 		req->nr_phys_segments = blk_recalc_rq_segments(req);  in blk_update_request()
1043 	if ((req->rq_flags & (RQF_IO_STAT|RQF_FLUSH_SEQ)) == RQF_IO_STAT) {  in blk_account_io_done()
1047 		update_io_ticks(req->part, jiffies, true);  in blk_account_io_done()
1048 		part_stat_inc(req->part, ios[sgrp]);  in blk_account_io_done()
1049 		part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);  in blk_account_io_done()
1050 		part_stat_local_dec(req->part,  in blk_account_io_done()
1058 	struct bio *bio = req->bio;  in blk_rq_passthrough_stats()
1060 	if (!blk_queue_passthrough_stat(req->q))  in blk_rq_passthrough_stats()
1072 	if (!bio->bi_bdev)  in blk_rq_passthrough_stats()
1081 	if (blk_rq_bytes(req) & (bdev_logical_block_size(bio->bi_bdev) - 1))  in blk_rq_passthrough_stats()
1090 	if (!blk_queue_io_stat(req->q))  in blk_account_io_start()
1095 	req->rq_flags |= RQF_IO_STAT;  in blk_account_io_start()
1096 	req->start_time_ns = blk_time_get_ns();  in blk_account_io_start()
1099 	 * All non-passthrough requests are created from a bio with one  in blk_account_io_start()
1101 	 * generated by the state machine in blk-flush.c is cloned onto the  in blk_account_io_start()
1102 	 * lower device by dm-multipath we can get here without a bio.  in blk_account_io_start()
1104 	if (req->bio)  in blk_account_io_start()
1105 		req->part = req->bio->bi_bdev;  in blk_account_io_start()
1107 		req->part = req->q->disk->part0;  in blk_account_io_start()
1110 	update_io_ticks(req->part, jiffies, false);  in blk_account_io_start()
1111 	part_stat_local_inc(req->part, in_flight[op_is_write(req_op(req))]);  in blk_account_io_start()
1117 	if (rq->rq_flags & RQF_STATS)  in __blk_mq_end_request_acct()
1131 	if (rq->end_io) {  in __blk_mq_end_request()
1132 		rq_qos_done(rq->q, rq);  in __blk_mq_end_request()
1133 		if (rq->end_io(rq, error) == RQ_END_IO_FREE)  in __blk_mq_end_request()
1154 	struct request_queue *q = hctx->queue;  in blk_mq_flush_tag_batch()
1158 	blk_mq_put_tags(hctx->tags, tag_array, nr_tags);  in blk_mq_flush_tag_batch()
1159 	percpu_ref_put_many(&q->q_usage_counter, nr_tags);  in blk_mq_flush_tag_batch()
1169 	if (iob->need_ts)  in blk_mq_end_request_batch()
1172 	while ((rq = rq_list_pop(&iob->req_list)) != NULL) {  in blk_mq_end_request_batch()
1173 		prefetch(rq->bio);  in blk_mq_end_request_batch()
1174 		prefetch(rq->rq_next);  in blk_mq_end_request_batch()
1177 		if (iob->need_ts)  in blk_mq_end_request_batch()
1182 		rq_qos_done(rq->q, rq);  in blk_mq_end_request_batch()
1188 		if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE)  in blk_mq_end_request_batch()
1191 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);  in blk_mq_end_request_batch()
1198 		if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) {  in blk_mq_end_request_batch()
1202 			cur_hctx = rq->mq_hctx;  in blk_mq_end_request_batch()
1204 		tags[nr_tags++] = rq->tag;  in blk_mq_end_request_batch()
1218 		rq->q->mq_ops->complete(rq);  in blk_complete_reqs()
1242 	    !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))  in blk_mq_complete_need_ipi()
1254 	if (cpu == rq->mq_ctx->cpu ||  in blk_mq_complete_need_ipi()
1255 	    (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&  in blk_mq_complete_need_ipi()
1256 	     cpus_share_cache(cpu, rq->mq_ctx->cpu) &&  in blk_mq_complete_need_ipi()
1257 	     cpus_equal_capacity(cpu, rq->mq_ctx->cpu)))  in blk_mq_complete_need_ipi()
1261 	return cpu_online(rq->mq_ctx->cpu);  in blk_mq_complete_need_ipi()
1268 	cpu = rq->mq_ctx->cpu;  in blk_mq_complete_send_ipi()
1269 	if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu)))  in blk_mq_complete_send_ipi()
1279 	if (llist_add(&rq->ipi_list, list))  in blk_mq_raise_softirq()
1286 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);  in blk_mq_complete_request_remote()
1293 	if ((rq->mq_hctx->nr_ctx == 1 &&  in blk_mq_complete_request_remote()
1294 	     rq->mq_ctx->cpu == raw_smp_processor_id()) ||  in blk_mq_complete_request_remote()
1295 	     rq->cmd_flags & REQ_POLLED)  in blk_mq_complete_request_remote()
1303 	if (rq->q->nr_hw_queues == 1) {  in blk_mq_complete_request_remote()
1312  * blk_mq_complete_request - end I/O on a request
1316  *	Complete a request by scheduling the ->complete_rq operation.
1321 		rq->q->mq_ops->complete(rq);  in blk_mq_complete_request()
1326  * blk_mq_start_request - Start processing a request
1335 	struct request_queue *q = rq->q;  in blk_mq_start_request()
1339 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) &&  in blk_mq_start_request()
1341 		rq->io_start_time_ns = blk_time_get_ns();  in blk_mq_start_request()
1342 		rq->stats_sectors = blk_rq_sectors(rq);  in blk_mq_start_request()
1343 		rq->rq_flags |= RQF_STATS;  in blk_mq_start_request()
1350 	WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);  in blk_mq_start_request()
1351 	rq->mq_hctx->tags->rqs[rq->tag] = rq;  in blk_mq_start_request()
1356 	if (rq->bio && rq->bio->bi_opf & REQ_POLLED)  in blk_mq_start_request()
1357 	        WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);  in blk_mq_start_request()
1362  * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
1368 	if (plug->multiple_queues)  in blk_plug_max_rq_count()
1375 	struct request *last = rq_list_peek(&plug->mq_list);  in blk_add_rq_to_plug()
1377 	if (!plug->rq_count) {  in blk_add_rq_to_plug()
1378 		trace_block_plug(rq->q);  in blk_add_rq_to_plug()
1379 	} else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||  in blk_add_rq_to_plug()
1380 		   (!blk_queue_nomerges(rq->q) &&  in blk_add_rq_to_plug()
1384 		trace_block_plug(rq->q);  in blk_add_rq_to_plug()
1387 	if (!plug->multiple_queues && last && last->q != rq->q)  in blk_add_rq_to_plug()
1388 		plug->multiple_queues = true;  in blk_add_rq_to_plug()
1391 	 * ->queue_rqs() directly  in blk_add_rq_to_plug()
1393 	if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))  in blk_add_rq_to_plug()
1394 		plug->has_elevator = true;  in blk_add_rq_to_plug()
1395 	rq_list_add_tail(&plug->mq_list, rq);  in blk_add_rq_to_plug()
1396 	plug->rq_count++;  in blk_add_rq_to_plug()
1400  * blk_execute_rq_nowait - insert a request to I/O scheduler for execution
1402  * @at_head:    insert request at head or tail of queue
1405  *    Insert a fully prepared request at the back of the I/O scheduler queue
1406  *    for execution.  Don't wait for completion.
1409  *    This function will invoke @done directly if the queue is dead.
1413 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_execute_rq_nowait()
1420 	if (current->plug && !at_head) {  in blk_execute_rq_nowait()
1421 		blk_add_rq_to_plug(current->plug, rq);  in blk_execute_rq_nowait()
1426 	blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);  in blk_execute_rq_nowait()
1437 	struct blk_rq_wait *wait = rq->end_io_data;  in blk_end_sync_rq()  local
1439 	wait->ret = ret;  in blk_end_sync_rq()
1440 	complete(&wait->done);  in blk_end_sync_rq()
1446 	if (!rq->mq_hctx)  in blk_rq_is_poll()
1448 	if (rq->mq_hctx->type != HCTX_TYPE_POLL)  in blk_rq_is_poll()
1454 static void blk_rq_poll_completion(struct request *rq, struct completion *wait)  in blk_rq_poll_completion()  argument
1457 		blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0);  in blk_rq_poll_completion()
1459 	} while (!completion_done(wait));  in blk_rq_poll_completion()
1463  * blk_execute_rq - insert a request into queue for execution
1465  * @at_head:    insert request at head or tail of queue
1468  *    Insert a fully prepared request at the back of the I/O scheduler queue
1469  *    for execution and wait for completion.
1474 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_execute_rq()
1475 	struct blk_rq_wait wait = {  in blk_execute_rq()  local
1476 		.done = COMPLETION_INITIALIZER_ONSTACK(wait.done),  in blk_execute_rq()
1482 	rq->end_io_data = &wait;  in blk_execute_rq()
1483 	rq->end_io = blk_end_sync_rq;  in blk_execute_rq()
1490 		blk_rq_poll_completion(rq, &wait.done);  in blk_execute_rq()
1492 		blk_wait_io(&wait.done);  in blk_execute_rq()
1494 	return wait.ret;  in blk_execute_rq()
1500 	struct request_queue *q = rq->q;  in __blk_mq_requeue_request()
1508 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);  in __blk_mq_requeue_request()
1509 		rq->rq_flags &= ~RQF_TIMED_OUT;  in __blk_mq_requeue_request()
1515 	struct request_queue *q = rq->q;  in blk_mq_requeue_request()
1520 	/* this request will be re-inserted to io scheduler queue */  in blk_mq_requeue_request()
1523 	spin_lock_irqsave(&q->requeue_lock, flags);  in blk_mq_requeue_request()
1524 	list_add_tail(&rq->queuelist, &q->requeue_list);  in blk_mq_requeue_request()
1525 	spin_unlock_irqrestore(&q->requeue_lock, flags);  in blk_mq_requeue_request()
1540 	spin_lock_irq(&q->requeue_lock);  in blk_mq_requeue_work()
1541 	list_splice_init(&q->requeue_list, &rq_list);  in blk_mq_requeue_work()
1542 	list_splice_init(&q->flush_list, &flush_list);  in blk_mq_requeue_work()
1543 	spin_unlock_irq(&q->requeue_lock);  in blk_mq_requeue_work()
1547 		list_del_init(&rq->queuelist);  in blk_mq_requeue_work()
1550 		 * driver already and might have driver-specific data allocated  in blk_mq_requeue_work()
1554 		if (rq->rq_flags & RQF_DONTPREP)  in blk_mq_requeue_work()
1562 		list_del_init(&rq->queuelist);  in blk_mq_requeue_work()
1571 	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);  in blk_mq_kick_requeue_list()
1578 	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,  in blk_mq_delay_kick_requeue_list()
1585 	return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);  in blk_is_flush_data_rq()
1591 	 * If we find a request that isn't idle we know the queue is busy  in blk_mq_rq_inflight()
1595 	 * In case of queue quiesce, if one flush data request is completed,  in blk_mq_rq_inflight()
1600 	if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&  in blk_mq_rq_inflight()
1623 	req->rq_flags |= RQF_TIMED_OUT;  in blk_mq_rq_timed_out()
1624 	if (req->q->mq_ops->timeout) {  in blk_mq_rq_timed_out()
1627 		ret = req->q->mq_ops->timeout(req);  in blk_mq_rq_timed_out()
1648 	if (rq->rq_flags & RQF_TIMED_OUT)  in blk_mq_req_expired()
1651 	deadline = READ_ONCE(rq->deadline);  in blk_mq_req_expired()
1652 	if (time_after_eq(expired->timeout_start, deadline))  in blk_mq_req_expired()
1655 	if (expired->next == 0)  in blk_mq_req_expired()
1656 		expired->next = deadline;  in blk_mq_req_expired()
1657 	else if (time_after(expired->next, deadline))  in blk_mq_req_expired()
1658 		expired->next = deadline;  in blk_mq_req_expired()
1665 		if (rq->end_io(rq, 0) == RQ_END_IO_FREE)  in blk_mq_put_rq_ref()
1684 		expired->has_timedout_rq = true;  in blk_mq_check_expired()
1710 	 * timeout at the same time a queue freeze is waiting  in blk_mq_timeout_work()
1712 	 * acquire the queue reference here.  in blk_mq_timeout_work()
1716 	 * obtain a reference even in the short window between the queue  in blk_mq_timeout_work()
1722 	if (!percpu_ref_tryget(&q->q_usage_counter))  in blk_mq_timeout_work()
1725 	/* check if there is any timed-out request */  in blk_mq_timeout_work()
1731 		 * uses srcu or rcu, wait for a synchronization point to  in blk_mq_timeout_work()
1734 		blk_mq_wait_quiesce_done(q->tag_set);  in blk_mq_timeout_work()
1741 		mod_timer(&q->timeout, expired.next);  in blk_mq_timeout_work()
1766 	struct blk_mq_hw_ctx *hctx = flush_data->hctx;  in flush_busy_ctx()
1767 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];  in flush_busy_ctx()
1768 	enum hctx_type type = hctx->type;  in flush_busy_ctx()
1770 	spin_lock(&ctx->lock);  in flush_busy_ctx()
1771 	list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);  in flush_busy_ctx()
1773 	spin_unlock(&ctx->lock);  in flush_busy_ctx()
1779  * to the for-dispatch
1788 	sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);  in blk_mq_flush_busy_ctxs()
1800 	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;  in dispatch_rq_from_ctx()
1801 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];  in dispatch_rq_from_ctx()
1802 	enum hctx_type type = hctx->type;  in dispatch_rq_from_ctx()
1804 	spin_lock(&ctx->lock);  in dispatch_rq_from_ctx()
1805 	if (!list_empty(&ctx->rq_lists[type])) {  in dispatch_rq_from_ctx()
1806 		dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);  in dispatch_rq_from_ctx()
1807 		list_del_init(&dispatch_data->rq->queuelist);  in dispatch_rq_from_ctx()
1808 		if (list_empty(&ctx->rq_lists[type]))  in dispatch_rq_from_ctx()
1811 	spin_unlock(&ctx->lock);  in dispatch_rq_from_ctx()
1813 	return !dispatch_data->rq;  in dispatch_rq_from_ctx()
1819 	unsigned off = start ? start->index_hw[hctx->type] : 0;  in blk_mq_dequeue_from_ctx()
1825 	__sbitmap_for_each_set(&hctx->ctx_map, off,  in blk_mq_dequeue_from_ctx()
1833 	struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;  in __blk_mq_alloc_driver_tag()
1834 	unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;  in __blk_mq_alloc_driver_tag()
1837 	blk_mq_tag_busy(rq->mq_hctx);  in __blk_mq_alloc_driver_tag()
1839 	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {  in __blk_mq_alloc_driver_tag()
1840 		bt = &rq->mq_hctx->tags->breserved_tags;  in __blk_mq_alloc_driver_tag()
1843 		if (!hctx_may_queue(rq->mq_hctx, bt))  in __blk_mq_alloc_driver_tag()
1851 	rq->tag = tag + tag_offset;  in __blk_mq_alloc_driver_tag()
1852 	blk_mq_inc_active_requests(rq->mq_hctx);  in __blk_mq_alloc_driver_tag()
1856 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,  in blk_mq_dispatch_wake()  argument
1861 	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);  in blk_mq_dispatch_wake()
1863 	spin_lock(&hctx->dispatch_wait_lock);  in blk_mq_dispatch_wake()
1864 	if (!list_empty(&wait->entry)) {  in blk_mq_dispatch_wake()
1867 		list_del_init(&wait->entry);  in blk_mq_dispatch_wake()
1868 		sbq = &hctx->tags->bitmap_tags;  in blk_mq_dispatch_wake()
1869 		atomic_dec(&sbq->ws_active);  in blk_mq_dispatch_wake()
1871 	spin_unlock(&hctx->dispatch_wait_lock);  in blk_mq_dispatch_wake()
1879  * the tag wakeups. For non-shared tags, we can simply mark us needing a
1888 	wait_queue_entry_t *wait;  in blk_mq_mark_tag_wait()  local
1891 	if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&  in blk_mq_mark_tag_wait()
1892 	    !(blk_mq_is_shared_tags(hctx->flags))) {  in blk_mq_mark_tag_wait()
1897 		 * allocation failure and adding the hardware queue to the wait  in blk_mq_mark_tag_wait()
1898 		 * queue.  in blk_mq_mark_tag_wait()
1901 		 * At most this will cost an extra queue run.  in blk_mq_mark_tag_wait()
1906 	wait = &hctx->dispatch_wait;  in blk_mq_mark_tag_wait()
1907 	if (!list_empty_careful(&wait->entry))  in blk_mq_mark_tag_wait()
1910 	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag))  in blk_mq_mark_tag_wait()
1911 		sbq = &hctx->tags->breserved_tags;  in blk_mq_mark_tag_wait()
1913 		sbq = &hctx->tags->bitmap_tags;  in blk_mq_mark_tag_wait()
1914 	wq = &bt_wait_ptr(sbq, hctx)->wait;  in blk_mq_mark_tag_wait()
1916 	spin_lock_irq(&wq->lock);  in blk_mq_mark_tag_wait()
1917 	spin_lock(&hctx->dispatch_wait_lock);  in blk_mq_mark_tag_wait()
1918 	if (!list_empty(&wait->entry)) {  in blk_mq_mark_tag_wait()
1919 		spin_unlock(&hctx->dispatch_wait_lock);  in blk_mq_mark_tag_wait()
1920 		spin_unlock_irq(&wq->lock);  in blk_mq_mark_tag_wait()
1924 	atomic_inc(&sbq->ws_active);  in blk_mq_mark_tag_wait()
1925 	wait->flags &= ~WQ_FLAG_EXCLUSIVE;  in blk_mq_mark_tag_wait()
1926 	__add_wait_queue(wq, wait);  in blk_mq_mark_tag_wait()
1932 	 * Order adding us to wait queue and allocating driver tag.  in blk_mq_mark_tag_wait()
1938 	 * Otherwise, re-order of adding wait queue and getting driver tag  in blk_mq_mark_tag_wait()
1940 	 * the waitqueue_active() may not observe us in wait queue.  in blk_mq_mark_tag_wait()
1946 	 * allocation failure and adding the hardware queue to the wait  in blk_mq_mark_tag_wait()
1947 	 * queue.  in blk_mq_mark_tag_wait()
1951 		spin_unlock(&hctx->dispatch_wait_lock);  in blk_mq_mark_tag_wait()
1952 		spin_unlock_irq(&wq->lock);  in blk_mq_mark_tag_wait()
1957 	 * We got a tag, remove ourselves from the wait queue to ensure  in blk_mq_mark_tag_wait()
1960 	list_del_init(&wait->entry);  in blk_mq_mark_tag_wait()
1961 	atomic_dec(&sbq->ws_active);  in blk_mq_mark_tag_wait()
1962 	spin_unlock(&hctx->dispatch_wait_lock);  in blk_mq_mark_tag_wait()
1963 	spin_unlock_irq(&wq->lock);  in blk_mq_mark_tag_wait()
1972  * - EWMA is one simple way to compute running average value
1973  * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
1974  * - take 4 as factor for avoiding to get too small(0) result, and this
1981 	ewma = hctx->dispatch_busy;  in blk_mq_update_dispatch_busy()
1986 	ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;  in blk_mq_update_dispatch_busy()
1991 	hctx->dispatch_busy = ewma;  in blk_mq_update_dispatch_busy()
1999 	list_add(&rq->queuelist, list);  in blk_mq_handle_dev_resource()
2012 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_mq_prep_dispatch_rq()
2013 	int budget_token = -1;  in blk_mq_prep_dispatch_rq()
2016 		budget_token = blk_mq_get_dispatch_budget(rq->q);  in blk_mq_prep_dispatch_rq()
2027 		 * rerun the hardware queue when a tag is freed. The  in blk_mq_prep_dispatch_rq()
2028 		 * waitqueue takes care of that. If the queue is run  in blk_mq_prep_dispatch_rq()
2030 		 * we'll re-run it below.  in blk_mq_prep_dispatch_rq()
2038 				blk_mq_put_dispatch_budget(rq->q, budget_token);  in blk_mq_prep_dispatch_rq()
2061  * blk_mq_commit_rqs will notify driver using bd->last that there is no
2065  *  1) did not queue everything initially scheduled to queue
2066  *  2) the last attempt to queue a request failed
2071 	if (hctx->queue->mq_ops->commit_rqs && queued) {  in blk_mq_commit_rqs()
2072 		trace_block_unplug(hctx->queue, queued, !from_schedule);  in blk_mq_commit_rqs()
2073 		hctx->queue->mq_ops->commit_rqs(hctx);  in blk_mq_commit_rqs()
2084 	struct request_queue *q = hctx->queue;  in blk_mq_dispatch_rq_list()
2102 		WARN_ON_ONCE(hctx != rq->mq_hctx);  in blk_mq_dispatch_rq_list()
2107 		list_del_init(&rq->queuelist);  in blk_mq_dispatch_rq_list()
2117 			nr_budgets--;  in blk_mq_dispatch_rq_list()
2118 		ret = q->mq_ops->queue_rq(hctx, &bd);  in blk_mq_dispatch_rq_list()
2141 	 * Any items that need requeuing? Stuff them into hctx->dispatch,  in blk_mq_dispatch_rq_list()
2142 	 * that is where we will continue on next queue run.  in blk_mq_dispatch_rq_list()
2146 		/* For non-shared tags, the RESTART check will suffice */  in blk_mq_dispatch_rq_list()
2148 			((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||  in blk_mq_dispatch_rq_list()
2149 			blk_mq_is_shared_tags(hctx->flags));  in blk_mq_dispatch_rq_list()
2154 		spin_lock(&hctx->lock);  in blk_mq_dispatch_rq_list()
2155 		list_splice_tail_init(list, &hctx->dispatch);  in blk_mq_dispatch_rq_list()
2156 		spin_unlock(&hctx->lock);  in blk_mq_dispatch_rq_list()
2159 		 * Order adding requests to hctx->dispatch and checking  in blk_mq_dispatch_rq_list()
2162 		 * miss the new added requests to hctx->dispatch, meantime  in blk_mq_dispatch_rq_list()
2170 		 * thread and hence that a queue rerun is needed.  in blk_mq_dispatch_rq_list()
2174 		 * waitqueue is no longer active, ensure that we run the queue  in blk_mq_dispatch_rq_list()
2178 		 * the hardware queue got stopped and restarted before requests  in blk_mq_dispatch_rq_list()
2179 		 * were pushed back onto the dispatch list. Rerun the queue to  in blk_mq_dispatch_rq_list()
2181 		 * - blk_mq_run_hw_queue() checks whether or not a queue has  in blk_mq_dispatch_rq_list()
2182 		 *   been stopped before rerunning a queue.  in blk_mq_dispatch_rq_list()
2183 		 * - Some but not all block drivers stop a queue before  in blk_mq_dispatch_rq_list()
2184 		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq  in blk_mq_dispatch_rq_list()
2185 		 *   and dm-rq.  in blk_mq_dispatch_rq_list()
2188 		 * bit is set, run queue after a delay to avoid IO stalls  in blk_mq_dispatch_rq_list()
2189 		 * that could otherwise occur if the queue is idle.  We'll do  in blk_mq_dispatch_rq_list()
2197 		    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))  in blk_mq_dispatch_rq_list()
2212 	int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);  in blk_mq_first_mapped_cpu()
2215 		cpu = cpumask_first(hctx->cpumask);  in blk_mq_first_mapped_cpu()
2220  * ->next_cpu is always calculated from hctx->cpumask, so simply use
2225         return hctx->next_cpu >= nr_cpu_ids;  in blk_mq_hctx_empty_cpumask()
2231  * For now we just round-robin here, switching for every
2237 	int next_cpu = hctx->next_cpu;  in blk_mq_hctx_next_cpu()
2240 	if (hctx->queue->nr_hw_queues == 1 || blk_mq_hctx_empty_cpumask(hctx))  in blk_mq_hctx_next_cpu()
2243 	if (--hctx->next_cpu_batch <= 0) {  in blk_mq_hctx_next_cpu()
2245 		next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,  in blk_mq_hctx_next_cpu()
2249 		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;  in blk_mq_hctx_next_cpu()
2263 		 * Make sure to re-select CPU next time once after CPUs  in blk_mq_hctx_next_cpu()
2264 		 * in hctx->cpumask become online again.  in blk_mq_hctx_next_cpu()
2266 		hctx->next_cpu = next_cpu;  in blk_mq_hctx_next_cpu()
2267 		hctx->next_cpu_batch = 1;  in blk_mq_hctx_next_cpu()
2271 	hctx->next_cpu = next_cpu;  in blk_mq_hctx_next_cpu()
2276  * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
2277  * @hctx: Pointer to the hardware queue to run.
2278  * @msecs: Milliseconds of delay to wait before running the queue.
2280  * Run a hardware queue asynchronously with a delay of @msecs.
2286 	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,  in blk_mq_delay_run_hw_queue()
2296 	 * When queue is quiesced, we may be switching io scheduler, or  in blk_mq_hw_queue_need_run()
2297 	 * updating nr_hw_queues, or other things, and we can't run queue  in blk_mq_hw_queue_need_run()
2300 	 * And queue will be rerun in blk_mq_unquiesce_queue() if it is  in blk_mq_hw_queue_need_run()
2303 	__blk_mq_run_dispatch_ops(hctx->queue, false,  in blk_mq_hw_queue_need_run()
2304 		need_run = !blk_queue_quiesced(hctx->queue) &&  in blk_mq_hw_queue_need_run()
2310  * blk_mq_run_hw_queue - Start to run a hardware queue.
2311  * @hctx: Pointer to the hardware queue to run.
2312  * @async: If we want to run the queue asynchronously.
2314  * Check if the request queue is not in a quiesced state and if there are
2315  * pending requests to be sent. If this is true, run the queue to send requests
2323 	 * We can't run the queue inline with interrupts disabled.  in blk_mq_run_hw_queue()
2327 	might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);  in blk_mq_run_hw_queue()
2335 		 * if hw queue is quiesced locklessly above, we need the use  in blk_mq_run_hw_queue()
2336 		 * ->queue_lock to make sure we see the up-to-date status to  in blk_mq_run_hw_queue()
2337 		 * not miss rerunning the hw queue.  in blk_mq_run_hw_queue()
2339 		spin_lock_irqsave(&hctx->queue->queue_lock, flags);  in blk_mq_run_hw_queue()
2341 		spin_unlock_irqrestore(&hctx->queue->queue_lock, flags);  in blk_mq_run_hw_queue()
2347 	if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {  in blk_mq_run_hw_queue()
2352 	blk_mq_run_dispatch_ops(hctx->queue,  in blk_mq_run_hw_queue()
2358  * Return prefered queue to dispatch from (if any) for non-mq aware IO
2371 	struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT];  in blk_mq_get_sq_hctx()
2379  * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
2380  * @q: Pointer to the request queue to run.
2381  * @async: If we want to run the queue asynchronously.
2400 		    !list_empty_careful(&hctx->dispatch))  in blk_mq_run_hw_queues()
2407  * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
2408  * @q: Pointer to the request queue to run.
2409  * @msecs: Milliseconds of delay to wait before running the queues.
2425 		 * if another hctx is re-delaying the other's work  in blk_mq_delay_run_hw_queues()
2428 		if (delayed_work_pending(&hctx->run_work))  in blk_mq_delay_run_hw_queues()
2436 		    !list_empty_careful(&hctx->dispatch))  in blk_mq_delay_run_hw_queues()
2453 	cancel_delayed_work(&hctx->run_work);  in blk_mq_stop_hw_queue()
2455 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);  in blk_mq_stop_hw_queue()
2480 	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);  in blk_mq_start_hw_queue()
2482 	blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);  in blk_mq_start_hw_queue()
2501 	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);  in blk_mq_start_stopped_hw_queue()
2519 					(hctx->flags & BLK_MQ_F_BLOCKING));  in blk_mq_start_stopped_hw_queues()
2528 	blk_mq_run_dispatch_ops(hctx->queue,  in blk_mq_run_work_fn()
2533  * blk_mq_request_bypass_insert - Insert a request at dispatch list.
2542 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_mq_request_bypass_insert()
2544 	spin_lock(&hctx->lock);  in blk_mq_request_bypass_insert()
2546 		list_add(&rq->queuelist, &hctx->dispatch);  in blk_mq_request_bypass_insert()
2548 		list_add_tail(&rq->queuelist, &hctx->dispatch);  in blk_mq_request_bypass_insert()
2549 	spin_unlock(&hctx->lock);  in blk_mq_request_bypass_insert()
2557 	enum hctx_type type = hctx->type;  in blk_mq_insert_requests()
2560 	 * Try to issue requests directly if the hw queue isn't busy to save an  in blk_mq_insert_requests()
2561 	 * extra enqueue & dequeue to the sw queue.  in blk_mq_insert_requests()
2563 	if (!hctx->dispatch_busy && !run_queue_async) {  in blk_mq_insert_requests()
2564 		blk_mq_run_dispatch_ops(hctx->queue,  in blk_mq_insert_requests()
2571 	 * preemption doesn't flush plug list, so it's possible ctx->cpu is  in blk_mq_insert_requests()
2575 		BUG_ON(rq->mq_ctx != ctx);  in blk_mq_insert_requests()
2577 		if (rq->cmd_flags & REQ_NOWAIT)  in blk_mq_insert_requests()
2581 	spin_lock(&ctx->lock);  in blk_mq_insert_requests()
2582 	list_splice_tail_init(list, &ctx->rq_lists[type]);  in blk_mq_insert_requests()
2584 	spin_unlock(&ctx->lock);  in blk_mq_insert_requests()
2591 	struct request_queue *q = rq->q;  in blk_mq_insert_request()
2592 	struct blk_mq_ctx *ctx = rq->mq_ctx;  in blk_mq_insert_request()
2593 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_mq_insert_request()
2597 		 * Passthrough request have to be added to hctx->dispatch  in blk_mq_insert_request()
2600 		 * them, which gets them added to hctx->dispatch.  in blk_mq_insert_request()
2603 		 * and it is added to the scheduler queue, there is no chance to  in blk_mq_insert_request()
2604 		 * dispatch it given we prioritize requests in hctx->dispatch.  in blk_mq_insert_request()
2609 		 * Firstly normal IO request is inserted to scheduler queue or  in blk_mq_insert_request()
2610 		 * sw queue, meantime we add flush request to dispatch queue(  in blk_mq_insert_request()
2611 		 * hctx->dispatch) directly and there is at most one in-flight  in blk_mq_insert_request()
2612 		 * flush request for each hw queue, so it doesn't matter to add  in blk_mq_insert_request()
2613 		 * flush request to tail or front of the dispatch queue.  in blk_mq_insert_request()
2615 		 * Secondly in case of NCQ, flush request belongs to non-NCQ  in blk_mq_insert_request()
2617 		 * in-flight normal IO request(NCQ command). When adding flush  in blk_mq_insert_request()
2618 		 * rq to the front of hctx->dispatch, it is easier to introduce  in blk_mq_insert_request()
2620 		 * compared with adding to the tail of dispatch queue, then  in blk_mq_insert_request()
2624 		 * drive when adding flush rq to the front of hctx->dispatch.  in blk_mq_insert_request()
2626 		 * Simply queue flush rq to the front of hctx->dispatch so that  in blk_mq_insert_request()
2630 	} else if (q->elevator) {  in blk_mq_insert_request()
2633 		WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG);  in blk_mq_insert_request()
2635 		list_add(&rq->queuelist, &list);  in blk_mq_insert_request()
2636 		q->elevator->type->ops.insert_requests(hctx, &list, flags);  in blk_mq_insert_request()
2640 		spin_lock(&ctx->lock);  in blk_mq_insert_request()
2642 			list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);  in blk_mq_insert_request()
2644 			list_add_tail(&rq->queuelist,  in blk_mq_insert_request()
2645 				      &ctx->rq_lists[hctx->type]);  in blk_mq_insert_request()
2647 		spin_unlock(&ctx->lock);  in blk_mq_insert_request()
2656 	if (bio->bi_opf & REQ_RAHEAD)  in blk_mq_bio_to_request()
2657 		rq->cmd_flags |= REQ_FAILFAST_MASK;  in blk_mq_bio_to_request()
2659 	rq->__sector = bio->bi_iter.bi_sector;  in blk_mq_bio_to_request()
2662 		rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,  in blk_mq_bio_to_request()
2675 	struct request_queue *q = rq->q;  in __blk_mq_issue_directly()
2683 	 * For OK queue, we are done. For error, caller may kill it.  in __blk_mq_issue_directly()
2687 	ret = q->mq_ops->queue_rq(hctx, &bd);  in __blk_mq_issue_directly()
2709 	budget_token = blk_mq_get_dispatch_budget(rq->q);  in blk_mq_get_budget_and_tag()
2714 		blk_mq_put_dispatch_budget(rq->q, budget_token);  in blk_mq_get_budget_and_tag()
2721  * blk_mq_try_issue_directly - Try to send a request directly to device driver.
2722  * @hctx: Pointer of the associated hardware queue.
2726  * request directly to device driver. Else, insert at hctx->dispatch queue, so
2728  * queue have higher priority.
2735 	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {  in blk_mq_try_issue_directly()
2741 	if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {  in blk_mq_try_issue_directly()
2743 		blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT);  in blk_mq_try_issue_directly()
2764 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_mq_request_issue_directly()
2766 	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {  in blk_mq_request_issue_directly()
2784 	while ((rq = rq_list_pop(&plug->mq_list))) {  in blk_mq_plug_issue_direct()
2785 		bool last = rq_list_empty(&plug->mq_list);  in blk_mq_plug_issue_direct()
2787 		if (hctx != rq->mq_hctx) {  in blk_mq_plug_issue_direct()
2792 			hctx = rq->mq_hctx;  in blk_mq_plug_issue_direct()
2821 	q->mq_ops->queue_rqs(&plug->mq_list);  in __blk_mq_flush_plug_list()
2834 		struct request *rq = rq_list_pop(&plug->mq_list);  in blk_mq_dispatch_plug_list()
2837 			this_hctx = rq->mq_hctx;  in blk_mq_dispatch_plug_list()
2838 			this_ctx = rq->mq_ctx;  in blk_mq_dispatch_plug_list()
2840 		} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx ||  in blk_mq_dispatch_plug_list()
2845 		list_add_tail(&rq->queuelist, &list);  in blk_mq_dispatch_plug_list()
2847 	} while (!rq_list_empty(&plug->mq_list));  in blk_mq_dispatch_plug_list()
2849 	plug->mq_list = requeue_list;  in blk_mq_dispatch_plug_list()
2850 	trace_block_unplug(this_hctx->queue, depth, !from_sched);  in blk_mq_dispatch_plug_list()
2852 	percpu_ref_get(&this_hctx->queue->q_usage_counter);  in blk_mq_dispatch_plug_list()
2855 		spin_lock(&this_hctx->lock);  in blk_mq_dispatch_plug_list()
2856 		list_splice_tail_init(&list, &this_hctx->dispatch);  in blk_mq_dispatch_plug_list()
2857 		spin_unlock(&this_hctx->lock);  in blk_mq_dispatch_plug_list()
2859 	} else if (this_hctx->queue->elevator) {  in blk_mq_dispatch_plug_list()
2860 		this_hctx->queue->elevator->type->ops.insert_requests(this_hctx,  in blk_mq_dispatch_plug_list()
2866 	percpu_ref_put(&this_hctx->queue->q_usage_counter);  in blk_mq_dispatch_plug_list()
2876 	 * plug->mq_list via a schedule() in the driver's queue_rq() callback.  in blk_mq_flush_plug_list()
2881 	if (plug->rq_count == 0)  in blk_mq_flush_plug_list()
2883 	depth = plug->rq_count;  in blk_mq_flush_plug_list()
2884 	plug->rq_count = 0;  in blk_mq_flush_plug_list()
2886 	if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {  in blk_mq_flush_plug_list()
2889 		rq = rq_list_peek(&plug->mq_list);  in blk_mq_flush_plug_list()
2890 		q = rq->q;  in blk_mq_flush_plug_list()
2894 		 * Peek first request and see if we have a ->queue_rqs() hook.  in blk_mq_flush_plug_list()
2897 		 * same queue, caller must ensure that's the case.  in blk_mq_flush_plug_list()
2899 		if (q->mq_ops->queue_rqs) {  in blk_mq_flush_plug_list()
2902 			if (rq_list_empty(&plug->mq_list))  in blk_mq_flush_plug_list()
2908 		if (rq_list_empty(&plug->mq_list))  in blk_mq_flush_plug_list()
2914 	} while (!rq_list_empty(&plug->mq_list));  in blk_mq_flush_plug_list()
2927 		list_del_init(&rq->queuelist);  in blk_mq_try_issue_list_directly()
2970 		.cmd_flags	= bio->bi_opf,  in blk_mq_get_new_requests()
2977 		data.nr_tags = plug->nr_ios;  in blk_mq_get_new_requests()
2978 		plug->nr_ios = 1;  in blk_mq_get_new_requests()
2979 		data.cached_rqs = &plug->cached_rqs;  in blk_mq_get_new_requests()
2986 	if (bio->bi_opf & REQ_NOWAIT)  in blk_mq_get_new_requests()
3002 	rq = rq_list_peek(&plug->cached_rqs);  in blk_mq_peek_cached_request()
3003 	if (!rq || rq->q != q)  in blk_mq_peek_cached_request()
3005 	if (type != rq->mq_hctx->type &&  in blk_mq_peek_cached_request()
3006 	    (type != HCTX_TYPE_READ || rq->mq_hctx->type != HCTX_TYPE_DEFAULT))  in blk_mq_peek_cached_request()
3008 	if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))  in blk_mq_peek_cached_request()
3016 	if (rq_list_pop(&plug->cached_rqs) != rq)  in blk_mq_use_cached_rq()
3020 	 * If any qos ->throttle() end up blocking, we will have flushed the  in blk_mq_use_cached_rq()
3024 	rq_qos_throttle(rq->q, bio);  in blk_mq_use_cached_rq()
3027 	rq->cmd_flags = bio->bi_opf;  in blk_mq_use_cached_rq()
3028 	INIT_LIST_HEAD(&rq->queuelist);  in blk_mq_use_cached_rq()
3033 	unsigned int bs_mask = queue_logical_block_size(q) - 1;  in bio_unaligned()
3036 	if ((bio->bi_iter.bi_size & bs_mask) ||  in bio_unaligned()
3037 	    ((bio->bi_iter.bi_sector << SECTOR_SHIFT) & bs_mask))  in bio_unaligned()
3043  * blk_mq_submit_bio - Create and send a request to block device.
3049  * * We want to place request at plug queue for possible future merging
3050  * * There is an IO scheduler active at this queue
3052  * It will not queue the request if there is an error with the bio, or at the
3057 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);  in blk_mq_submit_bio()
3058 	struct blk_plug *plug = current->plug;  in blk_mq_submit_bio()
3059 	const int is_sync = op_is_sync(bio->bi_opf);  in blk_mq_submit_bio()
3066 	 * If the plug has a cached request for this queue, try to use it.  in blk_mq_submit_bio()
3068 	rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);  in blk_mq_submit_bio()
3073 	 * on the queue usage counter, and is the only write BIO in-flight for  in blk_mq_submit_bio()
3077 		nr_segs = bio->__bi_nr_segments;  in blk_mq_submit_bio()
3096 	 * check has to be done with queue usage counter held  in blk_mq_submit_bio()
3103 	bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);  in blk_mq_submit_bio()
3133 		bio->bi_status = ret;  in blk_mq_submit_bio()
3142 	if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))  in blk_mq_submit_bio()
3150 	hctx = rq->mq_hctx;  in blk_mq_submit_bio()
3151 	if ((rq->rq_flags & RQF_USE_SCHED) ||  in blk_mq_submit_bio()
3152 	    (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {  in blk_mq_submit_bio()
3162 	 * Don't drop the queue reference if we were trying to use a cached  in blk_mq_submit_bio()
3171  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
3176 	struct request_queue *q = rq->q;  in blk_insert_cloned_request()
3185 		 * a non-read/write command (discard, write same,etc.) the  in blk_insert_cloned_request()
3186 		 * low-level device driver will set the relevant queue limit to  in blk_insert_cloned_request()
3187 		 * 0 to prevent blk-lib from issuing more of the offending  in blk_insert_cloned_request()
3188 		 * operations. Commands queued prior to the queue limit being  in blk_insert_cloned_request()
3201 	 * The queue settings related to segment counting may differ from the  in blk_insert_cloned_request()
3202 	 * original queue.  in blk_insert_cloned_request()
3204 	rq->nr_phys_segments = blk_recalc_rq_segments(rq);  in blk_insert_cloned_request()
3205 	if (rq->nr_phys_segments > max_segments) {  in blk_insert_cloned_request()
3207 			__func__, rq->nr_phys_segments, max_segments);  in blk_insert_cloned_request()
3211 	if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))  in blk_insert_cloned_request()
3234  * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
3244 	while ((bio = rq->bio) != NULL) {  in blk_rq_unprep_clone()
3245 		rq->bio = bio->bi_next;  in blk_rq_unprep_clone()
3253  * blk_rq_prep_clone - Helper function to setup clone request
3280 		struct bio *bio	 = bio_alloc_clone(rq->q->disk->part0, bio_src,  in blk_rq_prep_clone()
3290 		if (rq->bio) {  in blk_rq_prep_clone()
3291 			rq->biotail->bi_next = bio;  in blk_rq_prep_clone()
3292 			rq->biotail = bio;  in blk_rq_prep_clone()
3294 			rq->bio = rq->biotail = bio;  in blk_rq_prep_clone()
3299 	rq->__sector = blk_rq_pos(rq_src);  in blk_rq_prep_clone()
3300 	rq->__data_len = blk_rq_bytes(rq_src);  in blk_rq_prep_clone()
3301 	if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {  in blk_rq_prep_clone()
3302 		rq->rq_flags |= RQF_SPECIAL_PAYLOAD;  in blk_rq_prep_clone()
3303 		rq->special_vec = rq_src->special_vec;  in blk_rq_prep_clone()
3305 	rq->nr_phys_segments = rq_src->nr_phys_segments;  in blk_rq_prep_clone()
3307 	if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)  in blk_rq_prep_clone()
3315 	return -ENOMEM;  in blk_rq_prep_clone()
3326 	if (rq->bio) {  in blk_steal_bios()
3327 		if (list->tail)  in blk_steal_bios()
3328 			list->tail->bi_next = rq->bio;  in blk_steal_bios()
3330 			list->head = rq->bio;  in blk_steal_bios()
3331 		list->tail = rq->biotail;  in blk_steal_bios()
3333 		rq->bio = NULL;  in blk_steal_bios()
3334 		rq->biotail = NULL;  in blk_steal_bios()
3337 	rq->__data_len = 0;  in blk_steal_bios()
3360 	list_for_each_entry(page, &tags->page_list, lru) {  in blk_mq_clear_rq_mapping()
3362 		unsigned long end = start + order_to_size(page->private);  in blk_mq_clear_rq_mapping()
3365 		for (i = 0; i < drv_tags->nr_tags; i++) {  in blk_mq_clear_rq_mapping()
3366 			struct request *rq = drv_tags->rqs[i];  in blk_mq_clear_rq_mapping()
3371 				cmpxchg(&drv_tags->rqs[i], rq, NULL);  in blk_mq_clear_rq_mapping()
3377 	 * Wait until all pending iteration is done.  in blk_mq_clear_rq_mapping()
3380 	 * after the ->lock is released.  in blk_mq_clear_rq_mapping()
3382 	spin_lock_irqsave(&drv_tags->lock, flags);  in blk_mq_clear_rq_mapping()
3383 	spin_unlock_irqrestore(&drv_tags->lock, flags);  in blk_mq_clear_rq_mapping()
3392 	if (list_empty(&tags->page_list))  in blk_mq_free_rqs()
3395 	if (blk_mq_is_shared_tags(set->flags))  in blk_mq_free_rqs()
3396 		drv_tags = set->shared_tags;  in blk_mq_free_rqs()
3398 		drv_tags = set->tags[hctx_idx];  in blk_mq_free_rqs()
3400 	if (tags->static_rqs && set->ops->exit_request) {  in blk_mq_free_rqs()
3403 		for (i = 0; i < tags->nr_tags; i++) {  in blk_mq_free_rqs()
3404 			struct request *rq = tags->static_rqs[i];  in blk_mq_free_rqs()
3408 			set->ops->exit_request(set, rq, hctx_idx);  in blk_mq_free_rqs()
3409 			tags->static_rqs[i] = NULL;  in blk_mq_free_rqs()
3415 	while (!list_empty(&tags->page_list)) {  in blk_mq_free_rqs()
3416 		page = list_first_entry(&tags->page_list, struct page, lru);  in blk_mq_free_rqs()
3417 		list_del_init(&page->lru);  in blk_mq_free_rqs()
3423 		__free_pages(page, page->private);  in blk_mq_free_rqs()
3429 	kfree(tags->rqs);  in blk_mq_free_rq_map()
3430 	tags->rqs = NULL;  in blk_mq_free_rq_map()
3431 	kfree(tags->static_rqs);  in blk_mq_free_rq_map()
3432 	tags->static_rqs = NULL;  in blk_mq_free_rq_map()
3442 	for (i = 0; i < set->nr_maps; i++) {  in hctx_idx_to_type()
3443 		unsigned int start = set->map[i].queue_offset;  in hctx_idx_to_type()
3444 		unsigned int end = start + set->map[i].nr_queues;  in hctx_idx_to_type()
3450 	if (i >= set->nr_maps)  in hctx_idx_to_type()
3461 	return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx);  in blk_mq_get_hctx_node()
3473 		node = set->numa_node;  in blk_mq_alloc_rq_map()
3476 				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));  in blk_mq_alloc_rq_map()
3480 	tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),  in blk_mq_alloc_rq_map()
3483 	if (!tags->rqs)  in blk_mq_alloc_rq_map()
3486 	tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *),  in blk_mq_alloc_rq_map()
3489 	if (!tags->static_rqs)  in blk_mq_alloc_rq_map()
3495 	kfree(tags->rqs);  in blk_mq_alloc_rq_map()
3506 	if (set->ops->init_request) {  in blk_mq_init_request()
3507 		ret = set->ops->init_request(set, rq, hctx_idx, node);  in blk_mq_init_request()
3512 	WRITE_ONCE(rq->state, MQ_RQ_IDLE);  in blk_mq_init_request()
3525 		node = set->numa_node;  in blk_mq_alloc_rqs()
3527 	INIT_LIST_HEAD(&tags->page_list);  in blk_mq_alloc_rqs()
3533 	rq_size = round_up(sizeof(struct request) + set->cmd_size,  in blk_mq_alloc_rqs()
3543 		while (this_order && left < order_to_size(this_order - 1))  in blk_mq_alloc_rqs()
3544 			this_order--;  in blk_mq_alloc_rqs()
3552 			if (!this_order--)  in blk_mq_alloc_rqs()
3561 		page->private = this_order;  in blk_mq_alloc_rqs()
3562 		list_add_tail(&page->lru, &tags->page_list);  in blk_mq_alloc_rqs()
3567 		 * to additional allocations like via ops->init_request().  in blk_mq_alloc_rqs()
3571 		to_do = min(entries_per_page, depth - i);  in blk_mq_alloc_rqs()
3572 		left -= to_do * rq_size;  in blk_mq_alloc_rqs()
3576 			tags->static_rqs[i] = rq;  in blk_mq_alloc_rqs()
3578 				tags->static_rqs[i] = NULL;  in blk_mq_alloc_rqs()
3590 	return -ENOMEM;  in blk_mq_alloc_rqs()
3602 	if (rq->mq_hctx != iter_data->hctx)  in blk_mq_has_request()
3604 	iter_data->has_rq = true;  in blk_mq_has_request()
3610 	struct blk_mq_tags *tags = hctx->sched_tags ?  in blk_mq_hctx_has_requests()
3611 			hctx->sched_tags : hctx->tags;  in blk_mq_hctx_has_requests()
3623 	enum hctx_type type = hctx->type;  in blk_mq_hctx_has_online_cpu()
3627 	 * hctx->cpumask has to rule out isolated CPUs, but userspace still  in blk_mq_hctx_has_online_cpu()
3628 	 * might submit IOs on these isolated CPUs, so use the queue map to  in blk_mq_hctx_has_online_cpu()
3632 		struct blk_mq_hw_ctx *h = blk_mq_map_queue_type(hctx->queue,  in blk_mq_hctx_has_online_cpu()
3661 	set_bit(BLK_MQ_S_INACTIVE, &hctx->state);  in blk_mq_hctx_notify_offline()
3665 	 * Try to grab a reference to the queue and wait for any outstanding  in blk_mq_hctx_notify_offline()
3666 	 * requests.  If we could not grab a reference the queue has been  in blk_mq_hctx_notify_offline()
3669 	if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) {  in blk_mq_hctx_notify_offline()
3672 		percpu_ref_put(&hctx->queue->q_usage_counter);  in blk_mq_hctx_notify_offline()
3681  * Isolated CPUs have been ruled out from hctx->cpumask, which is supposed
3688 	struct blk_mq_hw_ctx *mapped_hctx = blk_mq_map_queue_type(hctx->queue,  in blk_mq_cpu_mapped_to_hctx()
3689 			hctx->type, cpu);  in blk_mq_cpu_mapped_to_hctx()
3700 		clear_bit(BLK_MQ_S_INACTIVE, &hctx->state);  in blk_mq_hctx_notify_online()
3706  * software queue to the hw queue dispatch list, and ensure that it
3720 	ctx = __blk_mq_get_ctx(hctx->queue, cpu);  in blk_mq_hctx_notify_dead()
3721 	type = hctx->type;  in blk_mq_hctx_notify_dead()
3723 	spin_lock(&ctx->lock);  in blk_mq_hctx_notify_dead()
3724 	if (!list_empty(&ctx->rq_lists[type])) {  in blk_mq_hctx_notify_dead()
3725 		list_splice_init(&ctx->rq_lists[type], &tmp);  in blk_mq_hctx_notify_dead()
3728 	spin_unlock(&ctx->lock);  in blk_mq_hctx_notify_dead()
3733 	spin_lock(&hctx->lock);  in blk_mq_hctx_notify_dead()
3734 	list_splice_tail_init(&tmp, &hctx->dispatch);  in blk_mq_hctx_notify_dead()
3735 	spin_unlock(&hctx->lock);  in blk_mq_hctx_notify_dead()
3745 	if (!(hctx->flags & BLK_MQ_F_STACKING) &&  in __blk_mq_remove_cpuhp()
3746 	    !hlist_unhashed(&hctx->cpuhp_online)) {  in __blk_mq_remove_cpuhp()
3748 						    &hctx->cpuhp_online);  in __blk_mq_remove_cpuhp()
3749 		INIT_HLIST_NODE(&hctx->cpuhp_online);  in __blk_mq_remove_cpuhp()
3752 	if (!hlist_unhashed(&hctx->cpuhp_dead)) {  in __blk_mq_remove_cpuhp()
3754 						    &hctx->cpuhp_dead);  in __blk_mq_remove_cpuhp()
3755 		INIT_HLIST_NODE(&hctx->cpuhp_dead);  in __blk_mq_remove_cpuhp()
3770 	if (!(hctx->flags & BLK_MQ_F_STACKING) &&  in __blk_mq_add_cpuhp()
3771 	    hlist_unhashed(&hctx->cpuhp_online))  in __blk_mq_add_cpuhp()
3773 				&hctx->cpuhp_online);  in __blk_mq_add_cpuhp()
3775 	if (hlist_unhashed(&hctx->cpuhp_dead))  in __blk_mq_add_cpuhp()
3777 				&hctx->cpuhp_dead);  in __blk_mq_add_cpuhp()
3799 	spin_lock(&q->unused_hctx_lock);  in blk_mq_remove_hw_queues_cpuhp()
3800 	list_splice_init(&q->unused_hctx_list, &hctx_list);  in blk_mq_remove_hw_queues_cpuhp()
3801 	spin_unlock(&q->unused_hctx_lock);  in blk_mq_remove_hw_queues_cpuhp()
3807 	spin_lock(&q->unused_hctx_lock);  in blk_mq_remove_hw_queues_cpuhp()
3808 	list_splice(&hctx_list, &q->unused_hctx_list);  in blk_mq_remove_hw_queues_cpuhp()
3809 	spin_unlock(&q->unused_hctx_lock);  in blk_mq_remove_hw_queues_cpuhp()
3829  * Before freeing hw queue, clearing the flush request reference in
3830  * tags->rqs[] for avoiding potential UAF.
3838 	/* The hw queue may not be mapped yet */  in blk_mq_clear_flush_rq_mapping()
3845 		cmpxchg(&tags->rqs[i], flush_rq, NULL);  in blk_mq_clear_flush_rq_mapping()
3848 	 * Wait until all pending iteration is done.  in blk_mq_clear_flush_rq_mapping()
3851 	 * after the ->lock is released.  in blk_mq_clear_flush_rq_mapping()
3853 	spin_lock_irqsave(&tags->lock, flags);  in blk_mq_clear_flush_rq_mapping()
3854 	spin_unlock_irqrestore(&tags->lock, flags);  in blk_mq_clear_flush_rq_mapping()
3857 /* hctx->ctxs will be freed in queue's release handler */
3862 	struct request *flush_rq = hctx->fq->flush_rq;  in blk_mq_exit_hctx()
3868 		blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],  in blk_mq_exit_hctx()
3869 				set->queue_depth, flush_rq);  in blk_mq_exit_hctx()
3870 	if (set->ops->exit_request)  in blk_mq_exit_hctx()
3871 		set->ops->exit_request(set, flush_rq, hctx_idx);  in blk_mq_exit_hctx()
3873 	if (set->ops->exit_hctx)  in blk_mq_exit_hctx()
3874 		set->ops->exit_hctx(hctx, hctx_idx);  in blk_mq_exit_hctx()
3876 	xa_erase(&q->hctx_table, hctx_idx);  in blk_mq_exit_hctx()
3878 	spin_lock(&q->unused_hctx_lock);  in blk_mq_exit_hctx()
3879 	list_add(&hctx->hctx_list, &q->unused_hctx_list);  in blk_mq_exit_hctx()
3880 	spin_unlock(&q->unused_hctx_lock);  in blk_mq_exit_hctx()
3901 	hctx->queue_num = hctx_idx;  in blk_mq_init_hctx()
3903 	hctx->tags = set->tags[hctx_idx];  in blk_mq_init_hctx()
3905 	if (set->ops->init_hctx &&  in blk_mq_init_hctx()
3906 	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))  in blk_mq_init_hctx()
3909 	if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,  in blk_mq_init_hctx()
3910 				hctx->numa_node))  in blk_mq_init_hctx()
3913 	if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))  in blk_mq_init_hctx()
3919 	if (set->ops->exit_request)  in blk_mq_init_hctx()
3920 		set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);  in blk_mq_init_hctx()
3922 	if (set->ops->exit_hctx)  in blk_mq_init_hctx()
3923 		set->ops->exit_hctx(hctx, hctx_idx);  in blk_mq_init_hctx()
3925 	return -1;  in blk_mq_init_hctx()
3939 	if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))  in blk_mq_alloc_hctx()
3942 	atomic_set(&hctx->nr_active, 0);  in blk_mq_alloc_hctx()
3944 		node = set->numa_node;  in blk_mq_alloc_hctx()
3945 	hctx->numa_node = node;  in blk_mq_alloc_hctx()
3947 	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);  in blk_mq_alloc_hctx()
3948 	spin_lock_init(&hctx->lock);  in blk_mq_alloc_hctx()
3949 	INIT_LIST_HEAD(&hctx->dispatch);  in blk_mq_alloc_hctx()
3950 	INIT_HLIST_NODE(&hctx->cpuhp_dead);  in blk_mq_alloc_hctx()
3951 	INIT_HLIST_NODE(&hctx->cpuhp_online);  in blk_mq_alloc_hctx()
3952 	hctx->queue = q;  in blk_mq_alloc_hctx()
3953 	hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;  in blk_mq_alloc_hctx()
3955 	INIT_LIST_HEAD(&hctx->hctx_list);  in blk_mq_alloc_hctx()
3961 	hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),  in blk_mq_alloc_hctx()
3963 	if (!hctx->ctxs)  in blk_mq_alloc_hctx()
3966 	if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),  in blk_mq_alloc_hctx()
3969 	hctx->nr_ctx = 0;  in blk_mq_alloc_hctx()
3971 	spin_lock_init(&hctx->dispatch_wait_lock);  in blk_mq_alloc_hctx()
3972 	init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);  in blk_mq_alloc_hctx()
3973 	INIT_LIST_HEAD(&hctx->dispatch_wait.entry);  in blk_mq_alloc_hctx()
3975 	hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);  in blk_mq_alloc_hctx()
3976 	if (!hctx->fq)  in blk_mq_alloc_hctx()
3984 	sbitmap_free(&hctx->ctx_map);  in blk_mq_alloc_hctx()
3986 	kfree(hctx->ctxs);  in blk_mq_alloc_hctx()
3988 	free_cpumask_var(hctx->cpumask);  in blk_mq_alloc_hctx()
3998 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_init_cpu_queues()
4002 		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);  in blk_mq_init_cpu_queues()
4006 		__ctx->cpu = i;  in blk_mq_init_cpu_queues()
4007 		spin_lock_init(&__ctx->lock);  in blk_mq_init_cpu_queues()
4009 			INIT_LIST_HEAD(&__ctx->rq_lists[k]);  in blk_mq_init_cpu_queues()
4011 		__ctx->queue = q;  in blk_mq_init_cpu_queues()
4014 		 * Set local node, IFF we have more than one hw queue. If  in blk_mq_init_cpu_queues()
4017 		for (j = 0; j < set->nr_maps; j++) {  in blk_mq_init_cpu_queues()
4019 			if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)  in blk_mq_init_cpu_queues()
4020 				hctx->numa_node = cpu_to_node(i);  in blk_mq_init_cpu_queues()
4032 	tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags);  in blk_mq_alloc_map_and_rqs()
4048 	if (blk_mq_is_shared_tags(set->flags)) {  in __blk_mq_alloc_map_and_rqs()
4049 		set->tags[hctx_idx] = set->shared_tags;  in __blk_mq_alloc_map_and_rqs()
4054 	set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx,  in __blk_mq_alloc_map_and_rqs()
4055 						       set->queue_depth);  in __blk_mq_alloc_map_and_rqs()
4057 	return set->tags[hctx_idx];  in __blk_mq_alloc_map_and_rqs()
4073 	if (!blk_mq_is_shared_tags(set->flags))  in __blk_mq_free_map_and_rqs()
4074 		blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx);  in __blk_mq_free_map_and_rqs()
4076 	set->tags[hctx_idx] = NULL;  in __blk_mq_free_map_and_rqs()
4085 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_map_swqueue()
4088 		cpumask_clear(hctx->cpumask);  in blk_mq_map_swqueue()
4089 		hctx->nr_ctx = 0;  in blk_mq_map_swqueue()
4090 		hctx->dispatch_from = NULL;  in blk_mq_map_swqueue()
4100 		ctx = per_cpu_ptr(q->queue_ctx, i);  in blk_mq_map_swqueue()
4101 		for (j = 0; j < set->nr_maps; j++) {  in blk_mq_map_swqueue()
4102 			if (!set->map[j].nr_queues) {  in blk_mq_map_swqueue()
4103 				ctx->hctxs[j] = blk_mq_map_queue_type(q,  in blk_mq_map_swqueue()
4107 			hctx_idx = set->map[j].mq_map[i];  in blk_mq_map_swqueue()
4108 			/* unmapped hw queue can be remapped after CPU topo changed */  in blk_mq_map_swqueue()
4109 			if (!set->tags[hctx_idx] &&  in blk_mq_map_swqueue()
4117 				set->map[j].mq_map[i] = 0;  in blk_mq_map_swqueue()
4121 			ctx->hctxs[j] = hctx;  in blk_mq_map_swqueue()
4125 			 * devices share queues across queue maps.  in blk_mq_map_swqueue()
4127 			if (cpumask_test_cpu(i, hctx->cpumask))  in blk_mq_map_swqueue()
4130 			cpumask_set_cpu(i, hctx->cpumask);  in blk_mq_map_swqueue()
4131 			hctx->type = j;  in blk_mq_map_swqueue()
4132 			ctx->index_hw[hctx->type] = hctx->nr_ctx;  in blk_mq_map_swqueue()
4133 			hctx->ctxs[hctx->nr_ctx++] = ctx;  in blk_mq_map_swqueue()
4139 			BUG_ON(!hctx->nr_ctx);  in blk_mq_map_swqueue()
4143 			ctx->hctxs[j] = blk_mq_map_queue_type(q,  in blk_mq_map_swqueue()
4151 		 * If no software queues are mapped to this hardware queue,  in blk_mq_map_swqueue()
4154 		if (!hctx->nr_ctx) {  in blk_mq_map_swqueue()
4155 			/* Never unmap queue 0.  We need it as a  in blk_mq_map_swqueue()
4162 			hctx->tags = NULL;  in blk_mq_map_swqueue()
4166 		hctx->tags = set->tags[i];  in blk_mq_map_swqueue()
4167 		WARN_ON(!hctx->tags);  in blk_mq_map_swqueue()
4174 		sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);  in blk_mq_map_swqueue()
4177 		 * Rule out isolated CPUs from hctx->cpumask to avoid  in blk_mq_map_swqueue()
4180 		for_each_cpu(cpu, hctx->cpumask) {  in blk_mq_map_swqueue()
4182 				cpumask_clear_cpu(cpu, hctx->cpumask);  in blk_mq_map_swqueue()
4188 		hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);  in blk_mq_map_swqueue()
4189 		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;  in blk_mq_map_swqueue()
4195  * the queue isn't live yet.
4204 			hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;  in queue_set_hctx_shared()
4207 			hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;  in queue_set_hctx_shared()
4217 	lockdep_assert_held(&set->tag_list_lock);  in blk_mq_update_tag_set_shared()
4219 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in blk_mq_update_tag_set_shared()
4228 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_del_queue_tag_set()
4230 	mutex_lock(&set->tag_list_lock);  in blk_mq_del_queue_tag_set()
4231 	list_del(&q->tag_set_list);  in blk_mq_del_queue_tag_set()
4232 	if (list_is_singular(&set->tag_list)) {  in blk_mq_del_queue_tag_set()
4234 		set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;  in blk_mq_del_queue_tag_set()
4235 		/* update existing queue */  in blk_mq_del_queue_tag_set()
4238 	mutex_unlock(&set->tag_list_lock);  in blk_mq_del_queue_tag_set()
4239 	INIT_LIST_HEAD(&q->tag_set_list);  in blk_mq_del_queue_tag_set()
4245 	mutex_lock(&set->tag_list_lock);  in blk_mq_add_queue_tag_set()
4250 	if (!list_empty(&set->tag_list) &&  in blk_mq_add_queue_tag_set()
4251 	    !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {  in blk_mq_add_queue_tag_set()
4252 		set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;  in blk_mq_add_queue_tag_set()
4253 		/* update existing queue */  in blk_mq_add_queue_tag_set()
4256 	if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)  in blk_mq_add_queue_tag_set()
4258 	list_add_tail(&q->tag_set_list, &set->tag_list);  in blk_mq_add_queue_tag_set()
4260 	mutex_unlock(&set->tag_list_lock);  in blk_mq_add_queue_tag_set()
4263 /* All allocations will be freed in release handler of q->mq_kobj */
4271 		return -ENOMEM;  in blk_mq_alloc_ctxs()
4273 	ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);  in blk_mq_alloc_ctxs()
4274 	if (!ctxs->queue_ctx)  in blk_mq_alloc_ctxs()
4278 		struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);  in blk_mq_alloc_ctxs()
4279 		ctx->ctxs = ctxs;  in blk_mq_alloc_ctxs()
4282 	q->mq_kobj = &ctxs->kobj;  in blk_mq_alloc_ctxs()
4283 	q->queue_ctx = ctxs->queue_ctx;  in blk_mq_alloc_ctxs()
4288 	return -ENOMEM;  in blk_mq_alloc_ctxs()
4293  * request queue's release handler for avoiding use-after-free
4294  * and headache because q->mq_kobj shouldn't have been introduced,
4303 		WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));  in blk_mq_release()
4306 	list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {  in blk_mq_release()
4307 		list_del_init(&hctx->hctx_list);  in blk_mq_release()
4308 		kobject_put(&hctx->kobj);  in blk_mq_release()
4311 	xa_destroy(&q->hctx_table);  in blk_mq_release()
4314 	 * release .mq_kobj and sw queue's kobject now because  in blk_mq_release()
4315 	 * both share lifetime with request queue.  in blk_mq_release()
4322 	return set->nr_maps > HCTX_TYPE_POLL &&  in blk_mq_can_poll()
4323 		set->map[HCTX_TYPE_POLL].nr_queues;  in blk_mq_can_poll()
4335 	lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;  in blk_mq_alloc_queue()
4337 		lim->features |= BLK_FEAT_POLL;  in blk_mq_alloc_queue()
4339 	q = blk_alloc_queue(lim, set->numa_node);  in blk_mq_alloc_queue()
4342 	q->queuedata = queuedata;  in blk_mq_alloc_queue()
4353  * blk_mq_destroy_queue - shutdown a request queue
4354  * @q: request queue to shutdown
4356  * This shuts down a request queue allocated by blk_mq_alloc_queue(). All future
4357  * requests will be failed with -ENODEV. The caller is responsible for dropping
4390 	disk = __alloc_disk_node(q, set->numa_node, lkclass);  in __blk_mq_alloc_disk()
4394 		return ERR_PTR(-ENOMEM);  in __blk_mq_alloc_disk()
4396 	set_bit(GD_OWNS_QUEUE, &disk->state);  in __blk_mq_alloc_disk()
4420 	return hlist_unhashed(&hctx->cpuhp_online) &&  in blk_mq_hctx_is_reusable()
4421 		hlist_unhashed(&hctx->cpuhp_dead);  in blk_mq_hctx_is_reusable()
4431 	spin_lock(&q->unused_hctx_lock);  in blk_mq_alloc_and_init_hctx()
4432 	list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {  in blk_mq_alloc_and_init_hctx()
4433 		if (tmp->numa_node == node && blk_mq_hctx_is_reusable(tmp)) {  in blk_mq_alloc_and_init_hctx()
4439 		list_del_init(&hctx->hctx_list);  in blk_mq_alloc_and_init_hctx()
4440 	spin_unlock(&q->unused_hctx_lock);  in blk_mq_alloc_and_init_hctx()
4453 	kobject_put(&hctx->kobj);  in blk_mq_alloc_and_init_hctx()
4465 	mutex_lock(&q->sysfs_lock);  in blk_mq_realloc_hw_ctxs()
4466 	for (i = 0; i < set->nr_hw_queues; i++) {  in blk_mq_realloc_hw_ctxs()
4469 		struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i);  in blk_mq_realloc_hw_ctxs()
4472 			old_node = old_hctx->numa_node;  in blk_mq_realloc_hw_ctxs()
4487 	 * hctxs and keep the previous q->nr_hw_queues.  in blk_mq_realloc_hw_ctxs()
4489 	if (i != set->nr_hw_queues) {  in blk_mq_realloc_hw_ctxs()
4490 		j = q->nr_hw_queues;  in blk_mq_realloc_hw_ctxs()
4493 		q->nr_hw_queues = set->nr_hw_queues;  in blk_mq_realloc_hw_ctxs()
4496 	xa_for_each_start(&q->hctx_table, j, hctx, j)  in blk_mq_realloc_hw_ctxs()
4498 	mutex_unlock(&q->sysfs_lock);  in blk_mq_realloc_hw_ctxs()
4510 	/* mark the queue as mq asap */  in blk_mq_init_allocated_queue()
4511 	q->mq_ops = set->ops;  in blk_mq_init_allocated_queue()
4514 	 * ->tag_set has to be setup before initialize hctx, which cpuphp  in blk_mq_init_allocated_queue()
4515 	 * handler needs it for checking queue mapping  in blk_mq_init_allocated_queue()
4517 	q->tag_set = set;  in blk_mq_init_allocated_queue()
4522 	/* init q->mq_kobj and sw queues' kobjects */  in blk_mq_init_allocated_queue()
4525 	INIT_LIST_HEAD(&q->unused_hctx_list);  in blk_mq_init_allocated_queue()
4526 	spin_lock_init(&q->unused_hctx_lock);  in blk_mq_init_allocated_queue()
4528 	xa_init(&q->hctx_table);  in blk_mq_init_allocated_queue()
4531 	if (!q->nr_hw_queues)  in blk_mq_init_allocated_queue()
4534 	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);  in blk_mq_init_allocated_queue()
4535 	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);  in blk_mq_init_allocated_queue()
4537 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;  in blk_mq_init_allocated_queue()
4539 	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);  in blk_mq_init_allocated_queue()
4540 	INIT_LIST_HEAD(&q->flush_list);  in blk_mq_init_allocated_queue()
4541 	INIT_LIST_HEAD(&q->requeue_list);  in blk_mq_init_allocated_queue()
4542 	spin_lock_init(&q->requeue_lock);  in blk_mq_init_allocated_queue()
4544 	q->nr_requests = set->queue_depth;  in blk_mq_init_allocated_queue()
4546 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);  in blk_mq_init_allocated_queue()
4554 	q->mq_ops = NULL;  in blk_mq_init_allocated_queue()
4555 	return -ENOMEM;  in blk_mq_init_allocated_queue()
4562 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_exit_queue()
4564 	/* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */  in blk_mq_exit_queue()
4565 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);  in blk_mq_exit_queue()
4566 	/* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */  in blk_mq_exit_queue()
4574 	if (blk_mq_is_shared_tags(set->flags)) {  in __blk_mq_alloc_rq_maps()
4575 		set->shared_tags = blk_mq_alloc_map_and_rqs(set,  in __blk_mq_alloc_rq_maps()
4577 						set->queue_depth);  in __blk_mq_alloc_rq_maps()
4578 		if (!set->shared_tags)  in __blk_mq_alloc_rq_maps()
4579 			return -ENOMEM;  in __blk_mq_alloc_rq_maps()
4582 	for (i = 0; i < set->nr_hw_queues; i++) {  in __blk_mq_alloc_rq_maps()
4591 	while (--i >= 0)  in __blk_mq_alloc_rq_maps()
4594 	if (blk_mq_is_shared_tags(set->flags)) {  in __blk_mq_alloc_rq_maps()
4595 		blk_mq_free_map_and_rqs(set, set->shared_tags,  in __blk_mq_alloc_rq_maps()
4599 	return -ENOMEM;  in __blk_mq_alloc_rq_maps()
4604  * may reduce the depth asked for, if memory is tight. set->queue_depth
4612 	depth = set->queue_depth;  in blk_mq_alloc_set_map_and_rqs()
4618 		set->queue_depth >>= 1;  in blk_mq_alloc_set_map_and_rqs()
4619 		if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {  in blk_mq_alloc_set_map_and_rqs()
4620 			err = -ENOMEM;  in blk_mq_alloc_set_map_and_rqs()
4623 	} while (set->queue_depth);  in blk_mq_alloc_set_map_and_rqs()
4625 	if (!set->queue_depth || err) {  in blk_mq_alloc_set_map_and_rqs()
4626 		pr_err("blk-mq: failed to allocate request map\n");  in blk_mq_alloc_set_map_and_rqs()
4627 		return -ENOMEM;  in blk_mq_alloc_set_map_and_rqs()
4630 	if (depth != set->queue_depth)  in blk_mq_alloc_set_map_and_rqs()
4631 		pr_info("blk-mq: reduced tag depth (%u -> %u)\n",  in blk_mq_alloc_set_map_and_rqs()
4632 						depth, set->queue_depth);  in blk_mq_alloc_set_map_and_rqs()
4641 	 * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the  in blk_mq_update_queue_map()
4644 	if (set->nr_maps == 1)  in blk_mq_update_queue_map()
4645 		set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;  in blk_mq_update_queue_map()
4647 	if (set->ops->map_queues) {  in blk_mq_update_queue_map()
4654 		 * for (queue = 0; queue < set->nr_hw_queues; queue++) {  in blk_mq_update_queue_map()
4655 		 * 	mask = get_cpu_mask(queue)  in blk_mq_update_queue_map()
4657 		 * 		set->map[x].mq_map[cpu] = queue;  in blk_mq_update_queue_map()
4662 		 * to any hw queue.  in blk_mq_update_queue_map()
4664 		for (i = 0; i < set->nr_maps; i++)  in blk_mq_update_queue_map()
4665 			blk_mq_clear_mq_map(&set->map[i]);  in blk_mq_update_queue_map()
4667 		set->ops->map_queues(set);  in blk_mq_update_queue_map()
4669 		BUG_ON(set->nr_maps > 1);  in blk_mq_update_queue_map()
4670 		blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);  in blk_mq_update_queue_map()
4680 	if (set->nr_hw_queues >= new_nr_hw_queues)  in blk_mq_realloc_tag_set_tags()
4684 				GFP_KERNEL, set->numa_node);  in blk_mq_realloc_tag_set_tags()
4686 		return -ENOMEM;  in blk_mq_realloc_tag_set_tags()
4688 	if (set->tags)  in blk_mq_realloc_tag_set_tags()
4689 		memcpy(new_tags, set->tags, set->nr_hw_queues *  in blk_mq_realloc_tag_set_tags()
4690 		       sizeof(*set->tags));  in blk_mq_realloc_tag_set_tags()
4691 	kfree(set->tags);  in blk_mq_realloc_tag_set_tags()
4692 	set->tags = new_tags;  in blk_mq_realloc_tag_set_tags()
4694 	for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {  in blk_mq_realloc_tag_set_tags()
4696 			while (--i >= set->nr_hw_queues)  in blk_mq_realloc_tag_set_tags()
4698 			return -ENOMEM;  in blk_mq_realloc_tag_set_tags()
4704 	set->nr_hw_queues = new_nr_hw_queues;  in blk_mq_realloc_tag_set_tags()
4712  * value will be stored in set->queue_depth.
4720 	if (!set->nr_hw_queues)  in blk_mq_alloc_tag_set()
4721 		return -EINVAL;  in blk_mq_alloc_tag_set()
4722 	if (!set->queue_depth)  in blk_mq_alloc_tag_set()
4723 		return -EINVAL;  in blk_mq_alloc_tag_set()
4724 	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)  in blk_mq_alloc_tag_set()
4725 		return -EINVAL;  in blk_mq_alloc_tag_set()
4727 	if (!set->ops->queue_rq)  in blk_mq_alloc_tag_set()
4728 		return -EINVAL;  in blk_mq_alloc_tag_set()
4730 	if (!set->ops->get_budget ^ !set->ops->put_budget)  in blk_mq_alloc_tag_set()
4731 		return -EINVAL;  in blk_mq_alloc_tag_set()
4733 	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {  in blk_mq_alloc_tag_set()
4734 		pr_info("blk-mq: reduced tag depth to %u\n",  in blk_mq_alloc_tag_set()
4736 		set->queue_depth = BLK_MQ_MAX_DEPTH;  in blk_mq_alloc_tag_set()
4739 	if (!set->nr_maps)  in blk_mq_alloc_tag_set()
4740 		set->nr_maps = 1;  in blk_mq_alloc_tag_set()
4741 	else if (set->nr_maps > HCTX_MAX_TYPES)  in blk_mq_alloc_tag_set()
4742 		return -EINVAL;  in blk_mq_alloc_tag_set()
4750 		set->queue_depth = min(64U, set->queue_depth);  in blk_mq_alloc_tag_set()
4756 	if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)  in blk_mq_alloc_tag_set()
4757 		set->nr_hw_queues = nr_cpu_ids;  in blk_mq_alloc_tag_set()
4759 	if (set->flags & BLK_MQ_F_BLOCKING) {  in blk_mq_alloc_tag_set()
4760 		set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL);  in blk_mq_alloc_tag_set()
4761 		if (!set->srcu)  in blk_mq_alloc_tag_set()
4762 			return -ENOMEM;  in blk_mq_alloc_tag_set()
4763 		ret = init_srcu_struct(set->srcu);  in blk_mq_alloc_tag_set()
4768 	ret = -ENOMEM;  in blk_mq_alloc_tag_set()
4769 	set->tags = kcalloc_node(set->nr_hw_queues,  in blk_mq_alloc_tag_set()
4771 				 set->numa_node);  in blk_mq_alloc_tag_set()
4772 	if (!set->tags)  in blk_mq_alloc_tag_set()
4775 	for (i = 0; i < set->nr_maps; i++) {  in blk_mq_alloc_tag_set()
4776 		set->map[i].mq_map = kcalloc_node(nr_cpu_ids,  in blk_mq_alloc_tag_set()
4777 						  sizeof(set->map[i].mq_map[0]),  in blk_mq_alloc_tag_set()
4778 						  GFP_KERNEL, set->numa_node);  in blk_mq_alloc_tag_set()
4779 		if (!set->map[i].mq_map)  in blk_mq_alloc_tag_set()
4781 		set->map[i].nr_queues = set->nr_hw_queues;  in blk_mq_alloc_tag_set()
4790 	mutex_init(&set->tag_list_lock);  in blk_mq_alloc_tag_set()
4791 	INIT_LIST_HEAD(&set->tag_list);  in blk_mq_alloc_tag_set()
4796 	for (i = 0; i < set->nr_maps; i++) {  in blk_mq_alloc_tag_set()
4797 		kfree(set->map[i].mq_map);  in blk_mq_alloc_tag_set()
4798 		set->map[i].mq_map = NULL;  in blk_mq_alloc_tag_set()
4800 	kfree(set->tags);  in blk_mq_alloc_tag_set()
4801 	set->tags = NULL;  in blk_mq_alloc_tag_set()
4803 	if (set->flags & BLK_MQ_F_BLOCKING)  in blk_mq_alloc_tag_set()
4804 		cleanup_srcu_struct(set->srcu);  in blk_mq_alloc_tag_set()
4806 	if (set->flags & BLK_MQ_F_BLOCKING)  in blk_mq_alloc_tag_set()
4807 		kfree(set->srcu);  in blk_mq_alloc_tag_set()
4812 /* allocate and initialize a tagset for a simple single-queue device */
4818 	set->ops = ops;  in blk_mq_alloc_sq_tag_set()
4819 	set->nr_hw_queues = 1;  in blk_mq_alloc_sq_tag_set()
4820 	set->nr_maps = 1;  in blk_mq_alloc_sq_tag_set()
4821 	set->queue_depth = queue_depth;  in blk_mq_alloc_sq_tag_set()
4822 	set->numa_node = NUMA_NO_NODE;  in blk_mq_alloc_sq_tag_set()
4823 	set->flags = set_flags;  in blk_mq_alloc_sq_tag_set()
4832 	for (i = 0; i < set->nr_hw_queues; i++)  in blk_mq_free_tag_set()
4835 	if (blk_mq_is_shared_tags(set->flags)) {  in blk_mq_free_tag_set()
4836 		blk_mq_free_map_and_rqs(set, set->shared_tags,  in blk_mq_free_tag_set()
4840 	for (j = 0; j < set->nr_maps; j++) {  in blk_mq_free_tag_set()
4841 		kfree(set->map[j].mq_map);  in blk_mq_free_tag_set()
4842 		set->map[j].mq_map = NULL;  in blk_mq_free_tag_set()
4845 	kfree(set->tags);  in blk_mq_free_tag_set()
4846 	set->tags = NULL;  in blk_mq_free_tag_set()
4847 	if (set->flags & BLK_MQ_F_BLOCKING) {  in blk_mq_free_tag_set()
4848 		cleanup_srcu_struct(set->srcu);  in blk_mq_free_tag_set()
4849 		kfree(set->srcu);  in blk_mq_free_tag_set()
4856 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_update_nr_requests()
4861 	if (WARN_ON_ONCE(!q->mq_freeze_depth))  in blk_mq_update_nr_requests()
4862 		return -EINVAL;  in blk_mq_update_nr_requests()
4865 		return -EINVAL;  in blk_mq_update_nr_requests()
4867 	if (q->nr_requests == nr)  in blk_mq_update_nr_requests()
4874 		if (!hctx->tags)  in blk_mq_update_nr_requests()
4878 		 * queue depth. This is similar to what the old code would do.  in blk_mq_update_nr_requests()
4880 		if (hctx->sched_tags) {  in blk_mq_update_nr_requests()
4881 			ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,  in blk_mq_update_nr_requests()
4884 			ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,  in blk_mq_update_nr_requests()
4889 		if (q->elevator && q->elevator->type->ops.depth_updated)  in blk_mq_update_nr_requests()
4890 			q->elevator->type->ops.depth_updated(hctx);  in blk_mq_update_nr_requests()
4893 		q->nr_requests = nr;  in blk_mq_update_nr_requests()
4894 		if (blk_mq_is_shared_tags(set->flags)) {  in blk_mq_update_nr_requests()
4895 			if (q->elevator)  in blk_mq_update_nr_requests()
4931 	/* q->elevator needs protection from ->sysfs_lock */  in blk_mq_elv_switch_none()
4932 	mutex_lock(&q->sysfs_lock);  in blk_mq_elv_switch_none()
4935 	if (!q->elevator) {  in blk_mq_elv_switch_none()
4940 	INIT_LIST_HEAD(&qe->node);  in blk_mq_elv_switch_none()
4941 	qe->q = q;  in blk_mq_elv_switch_none()
4942 	qe->type = q->elevator->type;  in blk_mq_elv_switch_none()
4944 	__elevator_get(qe->type);  in blk_mq_elv_switch_none()
4945 	list_add(&qe->node, head);  in blk_mq_elv_switch_none()
4948 	mutex_unlock(&q->sysfs_lock);  in blk_mq_elv_switch_none()
4959 		if (qe->q == q)  in blk_lookup_qe_pair()
4974 	t = qe->type;  in blk_mq_elv_switch_back()
4975 	list_del(&qe->node);  in blk_mq_elv_switch_back()
4978 	mutex_lock(&q->sysfs_lock);  in blk_mq_elv_switch_back()
4982 	mutex_unlock(&q->sysfs_lock);  in blk_mq_elv_switch_back()
4990 	int prev_nr_hw_queues = set->nr_hw_queues;  in __blk_mq_update_nr_hw_queues()
4993 	lockdep_assert_held(&set->tag_list_lock);  in __blk_mq_update_nr_hw_queues()
4995 	if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids)  in __blk_mq_update_nr_hw_queues()
4999 	if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)  in __blk_mq_update_nr_hw_queues()
5002 	list_for_each_entry(q, &set->tag_list, tag_set_list)  in __blk_mq_update_nr_hw_queues()
5007 	 * updating the new sw to hw queue mappings.  in __blk_mq_update_nr_hw_queues()
5009 	list_for_each_entry(q, &set->tag_list, tag_set_list)  in __blk_mq_update_nr_hw_queues()
5013 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in __blk_mq_update_nr_hw_queues()
5023 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in __blk_mq_update_nr_hw_queues()
5028 		if (q->nr_hw_queues != set->nr_hw_queues) {  in __blk_mq_update_nr_hw_queues()
5033 			for (; i < set->nr_hw_queues; i++)  in __blk_mq_update_nr_hw_queues()
5036 			set->nr_hw_queues = prev_nr_hw_queues;  in __blk_mq_update_nr_hw_queues()
5050 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in __blk_mq_update_nr_hw_queues()
5056 	list_for_each_entry(q, &set->tag_list, tag_set_list)  in __blk_mq_update_nr_hw_queues()
5059 	list_for_each_entry(q, &set->tag_list, tag_set_list)  in __blk_mq_update_nr_hw_queues()
5063 	for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)  in __blk_mq_update_nr_hw_queues()
5069 	mutex_lock(&set->tag_list_lock);  in blk_mq_update_nr_hw_queues()
5071 	mutex_unlock(&set->tag_list_lock);  in blk_mq_update_nr_hw_queues()
5082 		ret = q->mq_ops->poll(hctx, iob);  in blk_hctx_poll()
5105 	struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie);  in blk_mq_poll()
5113 	struct request_queue *q = rq->q;  in blk_rq_poll()
5118 	if (!percpu_ref_tryget(&q->q_usage_counter))  in blk_rq_poll()
5121 	ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags);  in blk_rq_poll()
5130 	return rq->mq_ctx->cpu;  in blk_mq_rq_cpu()
5139 	cancel_delayed_work_sync(&q->requeue_work);  in blk_mq_cancel_work_sync()
5142 		cancel_delayed_work_sync(&hctx->run_work);  in blk_mq_cancel_work_sync()