blk-mq.c (4aef2ec9022b217f74d0f4c9b84081f07cc223d9) blk-mq.c (a892c8d52c02284076fbbacae6692aa5c5807d11)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Block multiqueue core code
4 *
5 * Copyright (C) 2013-2014 Jens Axboe
6 * Copyright (C) 2013-2014 Christoph Hellwig
7 */
8#include <linux/kernel.h>

--- 12 unchanged lines hidden (view full) ---

21#include <linux/cpu.h>
22#include <linux/cache.h>
23#include <linux/sched/sysctl.h>
24#include <linux/sched/topology.h>
25#include <linux/sched/signal.h>
26#include <linux/delay.h>
27#include <linux/crash_dump.h>
28#include <linux/prefetch.h>
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Block multiqueue core code
4 *
5 * Copyright (C) 2013-2014 Jens Axboe
6 * Copyright (C) 2013-2014 Christoph Hellwig
7 */
8#include <linux/kernel.h>

--- 12 unchanged lines hidden (view full) ---

21#include <linux/cpu.h>
22#include <linux/cache.h>
23#include <linux/sched/sysctl.h>
24#include <linux/sched/topology.h>
25#include <linux/sched/signal.h>
26#include <linux/delay.h>
27#include <linux/crash_dump.h>
28#include <linux/prefetch.h>
29#include <linux/blk-crypto.h>
29
30#include <trace/events/block.h>
31
32#include <linux/blk-mq.h>
33#include <linux/t10-pi.h>
34#include "blk.h"
35#include "blk-mq.h"
36#include "blk-mq-debugfs.h"

--- 275 unchanged lines hidden (view full) ---

312 else
313 rq->start_time_ns = 0;
314 rq->io_start_time_ns = 0;
315 rq->stats_sectors = 0;
316 rq->nr_phys_segments = 0;
317#if defined(CONFIG_BLK_DEV_INTEGRITY)
318 rq->nr_integrity_segments = 0;
319#endif
30
31#include <trace/events/block.h>
32
33#include <linux/blk-mq.h>
34#include <linux/t10-pi.h>
35#include "blk.h"
36#include "blk-mq.h"
37#include "blk-mq-debugfs.h"

--- 275 unchanged lines hidden (view full) ---

313 else
314 rq->start_time_ns = 0;
315 rq->io_start_time_ns = 0;
316 rq->stats_sectors = 0;
317 rq->nr_phys_segments = 0;
318#if defined(CONFIG_BLK_DEV_INTEGRITY)
319 rq->nr_integrity_segments = 0;
320#endif
321 blk_crypto_rq_set_defaults(rq);
320 /* tag was already set */
322 /* tag was already set */
321 rq->extra_len = 0;
322 WRITE_ONCE(rq->deadline, 0);
323
324 rq->timeout = 0;
325
326 rq->end_io = NULL;
327 rq->end_io_data = NULL;
328
329 data->ctx->rq_dispatched[op_is_sync(op)]++;

--- 139 unchanged lines hidden (view full) ---

469
470static void __blk_mq_free_request(struct request *rq)
471{
472 struct request_queue *q = rq->q;
473 struct blk_mq_ctx *ctx = rq->mq_ctx;
474 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
475 const int sched_tag = rq->internal_tag;
476
323 WRITE_ONCE(rq->deadline, 0);
324
325 rq->timeout = 0;
326
327 rq->end_io = NULL;
328 rq->end_io_data = NULL;
329
330 data->ctx->rq_dispatched[op_is_sync(op)]++;

--- 139 unchanged lines hidden (view full) ---

470
471static void __blk_mq_free_request(struct request *rq)
472{
473 struct request_queue *q = rq->q;
474 struct blk_mq_ctx *ctx = rq->mq_ctx;
475 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
476 const int sched_tag = rq->internal_tag;
477
478 blk_crypto_free_request(rq);
477 blk_pm_mark_last_busy(rq);
478 rq->mq_hctx = NULL;
479 if (rq->tag != -1)
480 blk_mq_put_tag(hctx->tags, ctx, rq->tag);
481 if (sched_tag != -1)
482 blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
483 blk_mq_sched_restart(hctx);
484 blk_queue_exit(q);

--- 177 unchanged lines hidden (view full) ---

662 rq_qos_issue(q, rq);
663 }
664
665 WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
666
667 blk_add_timer(rq);
668 WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
669
479 blk_pm_mark_last_busy(rq);
480 rq->mq_hctx = NULL;
481 if (rq->tag != -1)
482 blk_mq_put_tag(hctx->tags, ctx, rq->tag);
483 if (sched_tag != -1)
484 blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
485 blk_mq_sched_restart(hctx);
486 blk_queue_exit(q);

--- 177 unchanged lines hidden (view full) ---

664 rq_qos_issue(q, rq);
665 }
666
667 WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
668
669 blk_add_timer(rq);
670 WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
671
670 if (q->dma_drain_size && blk_rq_bytes(rq)) {
671 /*
672 * Make sure space for the drain appears. We know we can do
673 * this because max_hw_segments has been adjusted to be one
674 * fewer than the device can handle.
675 */
676 rq->nr_phys_segments++;
677 }
678
679#ifdef CONFIG_BLK_DEV_INTEGRITY
680 if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
681 q->integrity.profile->prepare_fn(rq);
682#endif
683}
684EXPORT_SYMBOL(blk_mq_start_request);
685
686static void __blk_mq_requeue_request(struct request *rq)
687{
688 struct request_queue *q = rq->q;
689
690 blk_mq_put_driver_tag(rq);
691
692 trace_block_rq_requeue(q, rq);
693 rq_qos_requeue(q, rq);
694
695 if (blk_mq_request_started(rq)) {
696 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
697 rq->rq_flags &= ~RQF_TIMED_OUT;
672#ifdef CONFIG_BLK_DEV_INTEGRITY
673 if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
674 q->integrity.profile->prepare_fn(rq);
675#endif
676}
677EXPORT_SYMBOL(blk_mq_start_request);
678
679static void __blk_mq_requeue_request(struct request *rq)
680{
681 struct request_queue *q = rq->q;
682
683 blk_mq_put_driver_tag(rq);
684
685 trace_block_rq_requeue(q, rq);
686 rq_qos_requeue(q, rq);
687
688 if (blk_mq_request_started(rq)) {
689 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
690 rq->rq_flags &= ~RQF_TIMED_OUT;
698 if (q->dma_drain_size && blk_rq_bytes(rq))
699 rq->nr_phys_segments--;
700 }
701}
702
703void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
704{
705 __blk_mq_requeue_request(rq);
706
707 /* this request will be re-inserted to io scheduler queue */

--- 482 unchanged lines hidden (view full) ---

1190 */
1191 if (next)
1192 blk_mq_put_driver_tag(next);
1193
1194 list_add(&rq->queuelist, list);
1195 __blk_mq_requeue_request(rq);
1196}
1197
691 }
692}
693
694void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
695{
696 __blk_mq_requeue_request(rq);
697
698 /* this request will be re-inserted to io scheduler queue */

--- 482 unchanged lines hidden (view full) ---

1181 */
1182 if (next)
1183 blk_mq_put_driver_tag(next);
1184
1185 list_add(&rq->queuelist, list);
1186 __blk_mq_requeue_request(rq);
1187}
1188
1189static void blk_mq_handle_zone_resource(struct request *rq,
1190 struct list_head *zone_list)
1191{
1192 /*
1193 * If we end up here it is because we cannot dispatch a request to a
1194 * specific zone due to LLD level zone-write locking or other zone
1195 * related resource not being available. In this case, set the request
1196 * aside in zone_list for retrying it later.
1197 */
1198 list_add(&rq->queuelist, zone_list);
1199 __blk_mq_requeue_request(rq);
1200}
1201
1198/*
1199 * Returns true if we did some work AND can potentially do more.
1200 */
1201bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
1202 bool got_budget)
1203{
1204 struct blk_mq_hw_ctx *hctx;
1205 struct request *rq, *nxt;
1206 bool no_tag = false;
1207 int errors, queued;
1208 blk_status_t ret = BLK_STS_OK;
1202/*
1203 * Returns true if we did some work AND can potentially do more.
1204 */
1205bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
1206 bool got_budget)
1207{
1208 struct blk_mq_hw_ctx *hctx;
1209 struct request *rq, *nxt;
1210 bool no_tag = false;
1211 int errors, queued;
1212 blk_status_t ret = BLK_STS_OK;
1213 bool no_budget_avail = false;
1214 LIST_HEAD(zone_list);
1209
1210 if (list_empty(list))
1211 return false;
1212
1213 WARN_ON(!list_is_singular(list) && got_budget);
1214
1215 /*
1216 * Now process all the entries, sending them to the driver.
1217 */
1218 errors = queued = 0;
1219 do {
1220 struct blk_mq_queue_data bd;
1221
1222 rq = list_first_entry(list, struct request, queuelist);
1223
1224 hctx = rq->mq_hctx;
1225 if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
1226 blk_mq_put_driver_tag(rq);
1215
1216 if (list_empty(list))
1217 return false;
1218
1219 WARN_ON(!list_is_singular(list) && got_budget);
1220
1221 /*
1222 * Now process all the entries, sending them to the driver.
1223 */
1224 errors = queued = 0;
1225 do {
1226 struct blk_mq_queue_data bd;
1227
1228 rq = list_first_entry(list, struct request, queuelist);
1229
1230 hctx = rq->mq_hctx;
1231 if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
1232 blk_mq_put_driver_tag(rq);
1233 no_budget_avail = true;
1227 break;
1228 }
1229
1230 if (!blk_mq_get_driver_tag(rq)) {
1231 /*
1232 * The initial allocation attempt failed, so we need to
1233 * rerun the hardware queue when a tag is freed. The
1234 * waitqueue takes care of that. If the queue is run

--- 26 unchanged lines hidden (view full) ---

1261 nxt = list_first_entry(list, struct request, queuelist);
1262 bd.last = !blk_mq_get_driver_tag(nxt);
1263 }
1264
1265 ret = q->mq_ops->queue_rq(hctx, &bd);
1266 if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
1267 blk_mq_handle_dev_resource(rq, list);
1268 break;
1234 break;
1235 }
1236
1237 if (!blk_mq_get_driver_tag(rq)) {
1238 /*
1239 * The initial allocation attempt failed, so we need to
1240 * rerun the hardware queue when a tag is freed. The
1241 * waitqueue takes care of that. If the queue is run

--- 26 unchanged lines hidden (view full) ---

1268 nxt = list_first_entry(list, struct request, queuelist);
1269 bd.last = !blk_mq_get_driver_tag(nxt);
1270 }
1271
1272 ret = q->mq_ops->queue_rq(hctx, &bd);
1273 if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
1274 blk_mq_handle_dev_resource(rq, list);
1275 break;
1276 } else if (ret == BLK_STS_ZONE_RESOURCE) {
1277 /*
1278 * Move the request to zone_list and keep going through
1279 * the dispatch list to find more requests the drive can
1280 * accept.
1281 */
1282 blk_mq_handle_zone_resource(rq, &zone_list);
1283 if (list_empty(list))
1284 break;
1285 continue;
1269 }
1270
1271 if (unlikely(ret != BLK_STS_OK)) {
1272 errors++;
1273 blk_mq_end_request(rq, BLK_STS_IOERR);
1274 continue;
1275 }
1276
1277 queued++;
1278 } while (!list_empty(list));
1279
1286 }
1287
1288 if (unlikely(ret != BLK_STS_OK)) {
1289 errors++;
1290 blk_mq_end_request(rq, BLK_STS_IOERR);
1291 continue;
1292 }
1293
1294 queued++;
1295 } while (!list_empty(list));
1296
1297 if (!list_empty(&zone_list))
1298 list_splice_tail_init(&zone_list, list);
1299
1280 hctx->dispatched[queued_to_index(queued)]++;
1281
1282 /*
1283 * Any items that need requeuing? Stuff them into hctx->dispatch,
1284 * that is where we will continue on next queue run.
1285 */
1286 if (!list_empty(list)) {
1287 bool needs_restart;

--- 27 unchanged lines hidden (view full) ---

1315 * - blk_mq_run_hw_queue() checks whether or not a queue has
1316 * been stopped before rerunning a queue.
1317 * - Some but not all block drivers stop a queue before
1318 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
1319 * and dm-rq.
1320 *
1321 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
1322 * bit is set, run queue after a delay to avoid IO stalls
1300 hctx->dispatched[queued_to_index(queued)]++;
1301
1302 /*
1303 * Any items that need requeuing? Stuff them into hctx->dispatch,
1304 * that is where we will continue on next queue run.
1305 */
1306 if (!list_empty(list)) {
1307 bool needs_restart;

--- 27 unchanged lines hidden (view full) ---

1335 * - blk_mq_run_hw_queue() checks whether or not a queue has
1336 * been stopped before rerunning a queue.
1337 * - Some but not all block drivers stop a queue before
1338 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
1339 * and dm-rq.
1340 *
1341 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
1342 * bit is set, run queue after a delay to avoid IO stalls
1323 * that could otherwise occur if the queue is idle.
1343 * that could otherwise occur if the queue is idle. We'll do
1344 * similar if we couldn't get budget and SCHED_RESTART is set.
1324 */
1325 needs_restart = blk_mq_sched_needs_restart(hctx);
1326 if (!needs_restart ||
1327 (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
1328 blk_mq_run_hw_queue(hctx, true);
1345 */
1346 needs_restart = blk_mq_sched_needs_restart(hctx);
1347 if (!needs_restart ||
1348 (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
1349 blk_mq_run_hw_queue(hctx, true);
1329 else if (needs_restart && (ret == BLK_STS_RESOURCE))
1350 else if (needs_restart && (ret == BLK_STS_RESOURCE ||
1351 no_budget_avail))
1330 blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
1331
1332 blk_mq_update_dispatch_busy(hctx, true);
1333 return false;
1334 } else
1335 blk_mq_update_dispatch_busy(hctx, false);
1336
1337 /*

--- 199 unchanged lines hidden (view full) ---

1537 continue;
1538
1539 blk_mq_run_hw_queue(hctx, async);
1540 }
1541}
1542EXPORT_SYMBOL(blk_mq_run_hw_queues);
1543
1544/**
1352 blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
1353
1354 blk_mq_update_dispatch_busy(hctx, true);
1355 return false;
1356 } else
1357 blk_mq_update_dispatch_busy(hctx, false);
1358
1359 /*

--- 199 unchanged lines hidden (view full) ---

1559 continue;
1560
1561 blk_mq_run_hw_queue(hctx, async);
1562 }
1563}
1564EXPORT_SYMBOL(blk_mq_run_hw_queues);
1565
1566/**
1567 * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
1568 * @q: Pointer to the request queue to run.
1569 * @msecs: Microseconds of delay to wait before running the queues.
1570 */
1571void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
1572{
1573 struct blk_mq_hw_ctx *hctx;
1574 int i;
1575
1576 queue_for_each_hw_ctx(q, hctx, i) {
1577 if (blk_mq_hctx_stopped(hctx))
1578 continue;
1579
1580 blk_mq_delay_run_hw_queue(hctx, msecs);
1581 }
1582}
1583EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
1584
1585/**
1545 * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
1546 * @q: request queue.
1547 *
1548 * The caller is responsible for serializing this function against
1549 * blk_mq_{start,stop}_hw_queue().
1550 */
1551bool blk_mq_queue_stopped(struct request_queue *q)
1552{

--- 224 unchanged lines hidden (view full) ---

1777 unsigned int nr_segs)
1778{
1779 if (bio->bi_opf & REQ_RAHEAD)
1780 rq->cmd_flags |= REQ_FAILFAST_MASK;
1781
1782 rq->__sector = bio->bi_iter.bi_sector;
1783 rq->write_hint = bio->bi_write_hint;
1784 blk_rq_bio_prep(rq, bio, nr_segs);
1586 * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
1587 * @q: request queue.
1588 *
1589 * The caller is responsible for serializing this function against
1590 * blk_mq_{start,stop}_hw_queue().
1591 */
1592bool blk_mq_queue_stopped(struct request_queue *q)
1593{

--- 224 unchanged lines hidden (view full) ---

1818 unsigned int nr_segs)
1819{
1820 if (bio->bi_opf & REQ_RAHEAD)
1821 rq->cmd_flags |= REQ_FAILFAST_MASK;
1822
1823 rq->__sector = bio->bi_iter.bi_sector;
1824 rq->write_hint = bio->bi_write_hint;
1825 blk_rq_bio_prep(rq, bio, nr_segs);
1826 blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
1785
1786 blk_account_io_start(rq, true);
1787}
1788
1789static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
1790 struct request *rq,
1791 blk_qc_t *cookie, bool last)
1792{

--- 175 unchanged lines hidden (view full) ---

1968 * * We want to place request at plug queue for possible future merging
1969 * * There is an IO scheduler active at this queue
1970 *
1971 * It will not queue the request if there is an error with the bio, or at the
1972 * request creation.
1973 *
1974 * Returns: Request queue cookie.
1975 */
1827
1828 blk_account_io_start(rq, true);
1829}
1830
1831static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
1832 struct request *rq,
1833 blk_qc_t *cookie, bool last)
1834{

--- 175 unchanged lines hidden (view full) ---

2010 * * We want to place request at plug queue for possible future merging
2011 * * There is an IO scheduler active at this queue
2012 *
2013 * It will not queue the request if there is an error with the bio, or at the
2014 * request creation.
2015 *
2016 * Returns: Request queue cookie.
2017 */
1976static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
2018blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1977{
1978 const int is_sync = op_is_sync(bio->bi_opf);
1979 const int is_flush_fua = op_is_flush(bio->bi_opf);
1980 struct blk_mq_alloc_data data = { .flags = 0};
1981 struct request *rq;
1982 struct blk_plug *plug;
1983 struct request *same_queue_rq = NULL;
1984 unsigned int nr_segs;
1985 blk_qc_t cookie;
2019{
2020 const int is_sync = op_is_sync(bio->bi_opf);
2021 const int is_flush_fua = op_is_flush(bio->bi_opf);
2022 struct blk_mq_alloc_data data = { .flags = 0};
2023 struct request *rq;
2024 struct blk_plug *plug;
2025 struct request *same_queue_rq = NULL;
2026 unsigned int nr_segs;
2027 blk_qc_t cookie;
2028 blk_status_t ret;
1986
1987 blk_queue_bounce(q, &bio);
1988 __blk_queue_split(q, &bio, &nr_segs);
1989
1990 if (!bio_integrity_prep(bio))
1991 return BLK_QC_T_NONE;
1992
1993 if (!is_flush_fua && !blk_queue_nomerges(q) &&

--- 17 unchanged lines hidden (view full) ---

2011 trace_block_getrq(q, bio, bio->bi_opf);
2012
2013 rq_qos_track(q, rq, bio);
2014
2015 cookie = request_to_qc_t(data.hctx, rq);
2016
2017 blk_mq_bio_to_request(rq, bio, nr_segs);
2018
2029
2030 blk_queue_bounce(q, &bio);
2031 __blk_queue_split(q, &bio, &nr_segs);
2032
2033 if (!bio_integrity_prep(bio))
2034 return BLK_QC_T_NONE;
2035
2036 if (!is_flush_fua && !blk_queue_nomerges(q) &&

--- 17 unchanged lines hidden (view full) ---

2054 trace_block_getrq(q, bio, bio->bi_opf);
2055
2056 rq_qos_track(q, rq, bio);
2057
2058 cookie = request_to_qc_t(data.hctx, rq);
2059
2060 blk_mq_bio_to_request(rq, bio, nr_segs);
2061
2062 ret = blk_crypto_init_request(rq);
2063 if (ret != BLK_STS_OK) {
2064 bio->bi_status = ret;
2065 bio_endio(bio);
2066 blk_mq_free_request(rq);
2067 return BLK_QC_T_NONE;
2068 }
2069
2019 plug = blk_mq_plug(q, bio);
2020 if (unlikely(is_flush_fua)) {
2021 /* Bypass scheduler for flush requests */
2022 blk_insert_flush(rq);
2023 blk_mq_run_hw_queue(data.hctx, true);
2024 } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
2025 !blk_queue_nonrot(q))) {
2026 /*

--- 53 unchanged lines hidden (view full) ---

2080 blk_mq_try_issue_directly(data.hctx, rq, &cookie);
2081 } else {
2082 /* Default case. */
2083 blk_mq_sched_insert_request(rq, false, true, true);
2084 }
2085
2086 return cookie;
2087}
2070 plug = blk_mq_plug(q, bio);
2071 if (unlikely(is_flush_fua)) {
2072 /* Bypass scheduler for flush requests */
2073 blk_insert_flush(rq);
2074 blk_mq_run_hw_queue(data.hctx, true);
2075 } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
2076 !blk_queue_nonrot(q))) {
2077 /*

--- 53 unchanged lines hidden (view full) ---

2131 blk_mq_try_issue_directly(data.hctx, rq, &cookie);
2132 } else {
2133 /* Default case. */
2134 blk_mq_sched_insert_request(rq, false, true, true);
2135 }
2136
2137 return cookie;
2138}
2139EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */
2088
2089void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
2090 unsigned int hctx_idx)
2091{
2092 struct page *page;
2093
2094 if (tags->rqs && set->ops->exit_request) {
2095 int i;

--- 372 unchanged lines hidden (view full) ---

2468 for (j = 0; j < set->nr_maps; j++) {
2469 hctx = blk_mq_map_queue_type(q, j, i);
2470 if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
2471 hctx->numa_node = local_memory_node(cpu_to_node(i));
2472 }
2473 }
2474}
2475
2140
2141void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
2142 unsigned int hctx_idx)
2143{
2144 struct page *page;
2145
2146 if (tags->rqs && set->ops->exit_request) {
2147 int i;

--- 372 unchanged lines hidden (view full) ---

2520 for (j = 0; j < set->nr_maps; j++) {
2521 hctx = blk_mq_map_queue_type(q, j, i);
2522 if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
2523 hctx->numa_node = local_memory_node(cpu_to_node(i));
2524 }
2525 }
2526}
2527
2476static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
2528static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
2529 int hctx_idx)
2477{
2478 int ret = 0;
2479
2480 set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
2481 set->queue_depth, set->reserved_tags);
2482 if (!set->tags[hctx_idx])
2483 return false;
2484

--- 31 unchanged lines hidden (view full) ---

2516 }
2517
2518 /*
2519 * Map software to hardware queues.
2520 *
2521 * If the cpu isn't present, the cpu is mapped to first hctx.
2522 */
2523 for_each_possible_cpu(i) {
2530{
2531 int ret = 0;
2532
2533 set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
2534 set->queue_depth, set->reserved_tags);
2535 if (!set->tags[hctx_idx])
2536 return false;
2537

--- 31 unchanged lines hidden (view full) ---

2569 }
2570
2571 /*
2572 * Map software to hardware queues.
2573 *
2574 * If the cpu isn't present, the cpu is mapped to first hctx.
2575 */
2576 for_each_possible_cpu(i) {
2524 hctx_idx = set->map[HCTX_TYPE_DEFAULT].mq_map[i];
2525 /* unmapped hw queue can be remapped after CPU topo changed */
2526 if (!set->tags[hctx_idx] &&
2527 !__blk_mq_alloc_rq_map(set, hctx_idx)) {
2528 /*
2529 * If tags initialization fail for some hctx,
2530 * that hctx won't be brought online. In this
2531 * case, remap the current ctx to hctx[0] which
2532 * is guaranteed to always have tags allocated
2533 */
2534 set->map[HCTX_TYPE_DEFAULT].mq_map[i] = 0;
2535 }
2536
2537 ctx = per_cpu_ptr(q->queue_ctx, i);
2538 for (j = 0; j < set->nr_maps; j++) {
2539 if (!set->map[j].nr_queues) {
2540 ctx->hctxs[j] = blk_mq_map_queue_type(q,
2541 HCTX_TYPE_DEFAULT, i);
2542 continue;
2543 }
2577
2578 ctx = per_cpu_ptr(q->queue_ctx, i);
2579 for (j = 0; j < set->nr_maps; j++) {
2580 if (!set->map[j].nr_queues) {
2581 ctx->hctxs[j] = blk_mq_map_queue_type(q,
2582 HCTX_TYPE_DEFAULT, i);
2583 continue;
2584 }
2585 hctx_idx = set->map[j].mq_map[i];
2586 /* unmapped hw queue can be remapped after CPU topo changed */
2587 if (!set->tags[hctx_idx] &&
2588 !__blk_mq_alloc_map_and_request(set, hctx_idx)) {
2589 /*
2590 * If tags initialization fail for some hctx,
2591 * that hctx won't be brought online. In this
2592 * case, remap the current ctx to hctx[0] which
2593 * is guaranteed to always have tags allocated
2594 */
2595 set->map[j].mq_map[i] = 0;
2596 }
2544
2545 hctx = blk_mq_map_queue_type(q, j, i);
2546 ctx->hctxs[j] = hctx;
2547 /*
2548 * If the CPU is already set in the mask, then we've
2549 * mapped this one already. This can happen if
2550 * devices share queues across queue maps.
2551 */

--- 387 unchanged lines hidden (view full) ---

2939 blk_queue_flag_set(QUEUE_FLAG_POLL, q);
2940
2941 q->sg_reserved_size = INT_MAX;
2942
2943 INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
2944 INIT_LIST_HEAD(&q->requeue_list);
2945 spin_lock_init(&q->requeue_lock);
2946
2597
2598 hctx = blk_mq_map_queue_type(q, j, i);
2599 ctx->hctxs[j] = hctx;
2600 /*
2601 * If the CPU is already set in the mask, then we've
2602 * mapped this one already. This can happen if
2603 * devices share queues across queue maps.
2604 */

--- 387 unchanged lines hidden (view full) ---

2992 blk_queue_flag_set(QUEUE_FLAG_POLL, q);
2993
2994 q->sg_reserved_size = INT_MAX;
2995
2996 INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
2997 INIT_LIST_HEAD(&q->requeue_list);
2998 spin_lock_init(&q->requeue_lock);
2999
2947 q->make_request_fn = blk_mq_make_request;
2948 q->nr_requests = set->queue_depth;
2949
2950 /*
2951 * Default to classic polling
2952 */
2953 q->poll_nsec = BLK_MQ_POLL_CLASSIC;
2954
2955 blk_mq_init_cpu_queues(q, set->nr_hw_queues);

--- 27 unchanged lines hidden (view full) ---

2983 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
2984}
2985
2986static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
2987{
2988 int i;
2989
2990 for (i = 0; i < set->nr_hw_queues; i++)
3000 q->nr_requests = set->queue_depth;
3001
3002 /*
3003 * Default to classic polling
3004 */
3005 q->poll_nsec = BLK_MQ_POLL_CLASSIC;
3006
3007 blk_mq_init_cpu_queues(q, set->nr_hw_queues);

--- 27 unchanged lines hidden (view full) ---

3035 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
3036}
3037
3038static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
3039{
3040 int i;
3041
3042 for (i = 0; i < set->nr_hw_queues; i++)
2991 if (!__blk_mq_alloc_rq_map(set, i))
3043 if (!__blk_mq_alloc_map_and_request(set, i))
2992 goto out_unwind;
2993
2994 return 0;
2995
2996out_unwind:
2997 while (--i >= 0)
3044 goto out_unwind;
3045
3046 return 0;
3047
3048out_unwind:
3049 while (--i >= 0)
2998 blk_mq_free_rq_map(set->tags[i]);
3050 blk_mq_free_map_and_requests(set, i);
2999
3000 return -ENOMEM;
3001}
3002
3003/*
3004 * Allocate the request maps associated with this tag_set. Note that this
3005 * may reduce the depth asked for, if memory is tight. set->queue_depth
3006 * will be updated to reflect the allocated depth.
3007 */
3051
3052 return -ENOMEM;
3053}
3054
3055/*
3056 * Allocate the request maps associated with this tag_set. Note that this
3057 * may reduce the depth asked for, if memory is tight. set->queue_depth
3058 * will be updated to reflect the allocated depth.
3059 */
3008static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
3060static int blk_mq_alloc_map_and_requests(struct blk_mq_tag_set *set)
3009{
3010 unsigned int depth;
3011 int err;
3012
3013 depth = set->queue_depth;
3014 do {
3015 err = __blk_mq_alloc_rq_maps(set);
3016 if (!err)

--- 143 unchanged lines hidden (view full) ---

3160 goto out_free_mq_map;
3161 set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
3162 }
3163
3164 ret = blk_mq_update_queue_map(set);
3165 if (ret)
3166 goto out_free_mq_map;
3167
3061{
3062 unsigned int depth;
3063 int err;
3064
3065 depth = set->queue_depth;
3066 do {
3067 err = __blk_mq_alloc_rq_maps(set);
3068 if (!err)

--- 143 unchanged lines hidden (view full) ---

3212 goto out_free_mq_map;
3213 set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
3214 }
3215
3216 ret = blk_mq_update_queue_map(set);
3217 if (ret)
3218 goto out_free_mq_map;
3219
3168 ret = blk_mq_alloc_rq_maps(set);
3220 ret = blk_mq_alloc_map_and_requests(set);
3169 if (ret)
3170 goto out_free_mq_map;
3171
3172 mutex_init(&set->tag_list_lock);
3173 INIT_LIST_HEAD(&set->tag_list);
3174
3175 return 0;
3176

--- 165 unchanged lines hidden (view full) ---

3342 if (!blk_mq_elv_switch_none(&head, q))
3343 goto switch_back;
3344
3345 list_for_each_entry(q, &set->tag_list, tag_set_list) {
3346 blk_mq_debugfs_unregister_hctxs(q);
3347 blk_mq_sysfs_unregister(q);
3348 }
3349
3221 if (ret)
3222 goto out_free_mq_map;
3223
3224 mutex_init(&set->tag_list_lock);
3225 INIT_LIST_HEAD(&set->tag_list);
3226
3227 return 0;
3228

--- 165 unchanged lines hidden (view full) ---

3394 if (!blk_mq_elv_switch_none(&head, q))
3395 goto switch_back;
3396
3397 list_for_each_entry(q, &set->tag_list, tag_set_list) {
3398 blk_mq_debugfs_unregister_hctxs(q);
3399 blk_mq_sysfs_unregister(q);
3400 }
3401
3402 prev_nr_hw_queues = set->nr_hw_queues;
3350 if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
3351 0)
3352 goto reregister;
3353
3403 if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
3404 0)
3405 goto reregister;
3406
3354 prev_nr_hw_queues = set->nr_hw_queues;
3355 set->nr_hw_queues = nr_hw_queues;
3407 set->nr_hw_queues = nr_hw_queues;
3356 blk_mq_update_queue_map(set);
3357fallback:
3408fallback:
3409 blk_mq_update_queue_map(set);
3358 list_for_each_entry(q, &set->tag_list, tag_set_list) {
3359 blk_mq_realloc_hw_ctxs(set, q);
3360 if (q->nr_hw_queues != set->nr_hw_queues) {
3361 pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
3362 nr_hw_queues, prev_nr_hw_queues);
3363 set->nr_hw_queues = prev_nr_hw_queues;
3364 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
3365 goto fallback;

--- 249 unchanged lines hidden ---
3410 list_for_each_entry(q, &set->tag_list, tag_set_list) {
3411 blk_mq_realloc_hw_ctxs(set, q);
3412 if (q->nr_hw_queues != set->nr_hw_queues) {
3413 pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
3414 nr_hw_queues, prev_nr_hw_queues);
3415 set->nr_hw_queues = prev_nr_hw_queues;
3416 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
3417 goto fallback;

--- 249 unchanged lines hidden ---