13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 23dcf60bcSChristoph Hellwig 3a7905043SJosef Bacik #include "blk-rq-qos.h" 4a7905043SJosef Bacik 5a7905043SJosef Bacik /* 6a7905043SJosef Bacik * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, 7a7905043SJosef Bacik * false if 'v' + 1 would be bigger than 'below'. 8a7905043SJosef Bacik */ 922f17952SJosef Bacik static bool atomic_inc_below(atomic_t *v, unsigned int below) 10a7905043SJosef Bacik { 1122f17952SJosef Bacik unsigned int cur = atomic_read(v); 12a7905043SJosef Bacik 13a7905043SJosef Bacik for (;;) { 1422f17952SJosef Bacik unsigned int old; 15a7905043SJosef Bacik 16a7905043SJosef Bacik if (cur >= below) 17a7905043SJosef Bacik return false; 18a7905043SJosef Bacik old = atomic_cmpxchg(v, cur, cur + 1); 19a7905043SJosef Bacik if (old == cur) 20a7905043SJosef Bacik break; 21a7905043SJosef Bacik cur = old; 22a7905043SJosef Bacik } 23a7905043SJosef Bacik 24a7905043SJosef Bacik return true; 25a7905043SJosef Bacik } 26a7905043SJosef Bacik 2722f17952SJosef Bacik bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) 28a7905043SJosef Bacik { 29a7905043SJosef Bacik return atomic_inc_below(&rq_wait->inflight, limit); 30a7905043SJosef Bacik } 31a7905043SJosef Bacik 32e5045454SJens Axboe void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) 33a7905043SJosef Bacik { 34e5045454SJens Axboe do { 35a7905043SJosef Bacik if (rqos->ops->cleanup) 36c1c80384SJosef Bacik rqos->ops->cleanup(rqos, bio); 37e5045454SJens Axboe rqos = rqos->next; 38e5045454SJens Axboe } while (rqos); 39a7905043SJosef Bacik } 40a7905043SJosef Bacik 41e5045454SJens Axboe void __rq_qos_done(struct rq_qos *rqos, struct request *rq) 42a7905043SJosef Bacik { 43e5045454SJens Axboe do { 44a7905043SJosef Bacik if (rqos->ops->done) 45a7905043SJosef Bacik rqos->ops->done(rqos, rq); 46e5045454SJens Axboe rqos = rqos->next; 47e5045454SJens Axboe } while (rqos); 48a7905043SJosef Bacik } 49a7905043SJosef Bacik 50e5045454SJens Axboe void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) 51a7905043SJosef Bacik { 52e5045454SJens Axboe do { 53a7905043SJosef Bacik if (rqos->ops->issue) 54a7905043SJosef Bacik rqos->ops->issue(rqos, rq); 55e5045454SJens Axboe rqos = rqos->next; 56e5045454SJens Axboe } while (rqos); 57a7905043SJosef Bacik } 58a7905043SJosef Bacik 59e5045454SJens Axboe void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) 60a7905043SJosef Bacik { 61e5045454SJens Axboe do { 62a7905043SJosef Bacik if (rqos->ops->requeue) 63a7905043SJosef Bacik rqos->ops->requeue(rqos, rq); 64e5045454SJens Axboe rqos = rqos->next; 65e5045454SJens Axboe } while (rqos); 66a7905043SJosef Bacik } 67a7905043SJosef Bacik 68e5045454SJens Axboe void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) 69a7905043SJosef Bacik { 70e5045454SJens Axboe do { 71a7905043SJosef Bacik if (rqos->ops->throttle) 72d5337560SChristoph Hellwig rqos->ops->throttle(rqos, bio); 73e5045454SJens Axboe rqos = rqos->next; 74e5045454SJens Axboe } while (rqos); 75c1c80384SJosef Bacik } 76c1c80384SJosef Bacik 77e5045454SJens Axboe void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 78c1c80384SJosef Bacik { 79e5045454SJens Axboe do { 80c1c80384SJosef Bacik if (rqos->ops->track) 81c1c80384SJosef Bacik rqos->ops->track(rqos, rq, bio); 82e5045454SJens Axboe rqos = rqos->next; 83e5045454SJens Axboe } while (rqos); 84a7905043SJosef Bacik } 85a7905043SJosef Bacik 86e5045454SJens Axboe void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) 8767b42d0bSJosef Bacik { 88e5045454SJens Axboe do { 8967b42d0bSJosef Bacik if (rqos->ops->done_bio) 9067b42d0bSJosef Bacik rqos->ops->done_bio(rqos, bio); 91e5045454SJens Axboe rqos = rqos->next; 92e5045454SJens Axboe } while (rqos); 9367b42d0bSJosef Bacik } 9467b42d0bSJosef Bacik 95a7905043SJosef Bacik /* 96a7905043SJosef Bacik * Return true, if we can't increase the depth further by scaling 97a7905043SJosef Bacik */ 98a7905043SJosef Bacik bool rq_depth_calc_max_depth(struct rq_depth *rqd) 99a7905043SJosef Bacik { 100a7905043SJosef Bacik unsigned int depth; 101a7905043SJosef Bacik bool ret = false; 102a7905043SJosef Bacik 103a7905043SJosef Bacik /* 104a7905043SJosef Bacik * For QD=1 devices, this is a special case. It's important for those 105a7905043SJosef Bacik * to have one request ready when one completes, so force a depth of 106a7905043SJosef Bacik * 2 for those devices. On the backend, it'll be a depth of 1 anyway, 107a7905043SJosef Bacik * since the device can't have more than that in flight. If we're 108a7905043SJosef Bacik * scaling down, then keep a setting of 1/1/1. 109a7905043SJosef Bacik */ 110a7905043SJosef Bacik if (rqd->queue_depth == 1) { 111a7905043SJosef Bacik if (rqd->scale_step > 0) 112a7905043SJosef Bacik rqd->max_depth = 1; 113a7905043SJosef Bacik else { 114a7905043SJosef Bacik rqd->max_depth = 2; 115a7905043SJosef Bacik ret = true; 116a7905043SJosef Bacik } 117a7905043SJosef Bacik } else { 118a7905043SJosef Bacik /* 119a7905043SJosef Bacik * scale_step == 0 is our default state. If we have suffered 120a7905043SJosef Bacik * latency spikes, step will be > 0, and we shrink the 121a7905043SJosef Bacik * allowed write depths. If step is < 0, we're only doing 122a7905043SJosef Bacik * writes, and we allow a temporarily higher depth to 123a7905043SJosef Bacik * increase performance. 124a7905043SJosef Bacik */ 125a7905043SJosef Bacik depth = min_t(unsigned int, rqd->default_depth, 126a7905043SJosef Bacik rqd->queue_depth); 127a7905043SJosef Bacik if (rqd->scale_step > 0) 128a7905043SJosef Bacik depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); 129a7905043SJosef Bacik else if (rqd->scale_step < 0) { 130a7905043SJosef Bacik unsigned int maxd = 3 * rqd->queue_depth / 4; 131a7905043SJosef Bacik 132a7905043SJosef Bacik depth = 1 + ((depth - 1) << -rqd->scale_step); 133a7905043SJosef Bacik if (depth > maxd) { 134a7905043SJosef Bacik depth = maxd; 135a7905043SJosef Bacik ret = true; 136a7905043SJosef Bacik } 137a7905043SJosef Bacik } 138a7905043SJosef Bacik 139a7905043SJosef Bacik rqd->max_depth = depth; 140a7905043SJosef Bacik } 141a7905043SJosef Bacik 142a7905043SJosef Bacik return ret; 143a7905043SJosef Bacik } 144a7905043SJosef Bacik 145a7905043SJosef Bacik void rq_depth_scale_up(struct rq_depth *rqd) 146a7905043SJosef Bacik { 147a7905043SJosef Bacik /* 148a7905043SJosef Bacik * Hit max in previous round, stop here 149a7905043SJosef Bacik */ 150a7905043SJosef Bacik if (rqd->scaled_max) 151a7905043SJosef Bacik return; 152a7905043SJosef Bacik 153a7905043SJosef Bacik rqd->scale_step--; 154a7905043SJosef Bacik 155a7905043SJosef Bacik rqd->scaled_max = rq_depth_calc_max_depth(rqd); 156a7905043SJosef Bacik } 157a7905043SJosef Bacik 158a7905043SJosef Bacik /* 159a7905043SJosef Bacik * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we 160a7905043SJosef Bacik * had a latency violation. 161a7905043SJosef Bacik */ 162a7905043SJosef Bacik void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) 163a7905043SJosef Bacik { 164a7905043SJosef Bacik /* 165a7905043SJosef Bacik * Stop scaling down when we've hit the limit. This also prevents 166a7905043SJosef Bacik * ->scale_step from going to crazy values, if the device can't 167a7905043SJosef Bacik * keep up. 168a7905043SJosef Bacik */ 169a7905043SJosef Bacik if (rqd->max_depth == 1) 170a7905043SJosef Bacik return; 171a7905043SJosef Bacik 172a7905043SJosef Bacik if (rqd->scale_step < 0 && hard_throttle) 173a7905043SJosef Bacik rqd->scale_step = 0; 174a7905043SJosef Bacik else 175a7905043SJosef Bacik rqd->scale_step++; 176a7905043SJosef Bacik 177a7905043SJosef Bacik rqd->scaled_max = false; 178a7905043SJosef Bacik rq_depth_calc_max_depth(rqd); 179a7905043SJosef Bacik } 180a7905043SJosef Bacik 18184f60324SJosef Bacik struct rq_qos_wait_data { 18284f60324SJosef Bacik struct wait_queue_entry wq; 18384f60324SJosef Bacik struct task_struct *task; 18484f60324SJosef Bacik struct rq_wait *rqw; 18584f60324SJosef Bacik acquire_inflight_cb_t *cb; 18684f60324SJosef Bacik void *private_data; 18784f60324SJosef Bacik bool got_token; 18884f60324SJosef Bacik }; 18984f60324SJosef Bacik 19084f60324SJosef Bacik static int rq_qos_wake_function(struct wait_queue_entry *curr, 19184f60324SJosef Bacik unsigned int mode, int wake_flags, void *key) 19284f60324SJosef Bacik { 19384f60324SJosef Bacik struct rq_qos_wait_data *data = container_of(curr, 19484f60324SJosef Bacik struct rq_qos_wait_data, 19584f60324SJosef Bacik wq); 19684f60324SJosef Bacik 19784f60324SJosef Bacik /* 19884f60324SJosef Bacik * If we fail to get a budget, return -1 to interrupt the wake up loop 19984f60324SJosef Bacik * in __wake_up_common. 20084f60324SJosef Bacik */ 20184f60324SJosef Bacik if (!data->cb(data->rqw, data->private_data)) 20284f60324SJosef Bacik return -1; 20384f60324SJosef Bacik 20484f60324SJosef Bacik data->got_token = true; 20584f60324SJosef Bacik list_del_init(&curr->entry); 20684f60324SJosef Bacik wake_up_process(data->task); 20784f60324SJosef Bacik return 1; 20884f60324SJosef Bacik } 20984f60324SJosef Bacik 21084f60324SJosef Bacik /** 21184f60324SJosef Bacik * rq_qos_wait - throttle on a rqw if we need to 212*83826a50SBart Van Assche * @rqw: rqw to throttle on 213*83826a50SBart Van Assche * @private_data: caller provided specific data 214*83826a50SBart Van Assche * @acquire_inflight_cb: inc the rqw->inflight counter if we can 215*83826a50SBart Van Assche * @cleanup_cb: the callback to cleanup in case we race with a waker 21684f60324SJosef Bacik * 21784f60324SJosef Bacik * This provides a uniform place for the rq_qos users to do their throttling. 21884f60324SJosef Bacik * Since you can end up with a lot of things sleeping at once, this manages the 21984f60324SJosef Bacik * waking up based on the resources available. The acquire_inflight_cb should 22084f60324SJosef Bacik * inc the rqw->inflight if we have the ability to do so, or return false if not 22184f60324SJosef Bacik * and then we will sleep until the room becomes available. 22284f60324SJosef Bacik * 22384f60324SJosef Bacik * cleanup_cb is in case that we race with a waker and need to cleanup the 22484f60324SJosef Bacik * inflight count accordingly. 22584f60324SJosef Bacik */ 22684f60324SJosef Bacik void rq_qos_wait(struct rq_wait *rqw, void *private_data, 22784f60324SJosef Bacik acquire_inflight_cb_t *acquire_inflight_cb, 22884f60324SJosef Bacik cleanup_cb_t *cleanup_cb) 22984f60324SJosef Bacik { 23084f60324SJosef Bacik struct rq_qos_wait_data data = { 23184f60324SJosef Bacik .wq = { 23284f60324SJosef Bacik .func = rq_qos_wake_function, 23384f60324SJosef Bacik .entry = LIST_HEAD_INIT(data.wq.entry), 23484f60324SJosef Bacik }, 23584f60324SJosef Bacik .task = current, 23684f60324SJosef Bacik .rqw = rqw, 23784f60324SJosef Bacik .cb = acquire_inflight_cb, 23884f60324SJosef Bacik .private_data = private_data, 23984f60324SJosef Bacik }; 24084f60324SJosef Bacik bool has_sleeper; 24184f60324SJosef Bacik 24284f60324SJosef Bacik has_sleeper = wq_has_sleeper(&rqw->wait); 24384f60324SJosef Bacik if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) 24484f60324SJosef Bacik return; 24584f60324SJosef Bacik 24684f60324SJosef Bacik prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); 24784f60324SJosef Bacik do { 24884f60324SJosef Bacik if (data.got_token) 24984f60324SJosef Bacik break; 25084f60324SJosef Bacik if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { 25184f60324SJosef Bacik finish_wait(&rqw->wait, &data.wq); 25284f60324SJosef Bacik 25384f60324SJosef Bacik /* 25484f60324SJosef Bacik * We raced with wbt_wake_function() getting a token, 25584f60324SJosef Bacik * which means we now have two. Put our local token 25684f60324SJosef Bacik * and wake anyone else potentially waiting for one. 25784f60324SJosef Bacik */ 25884f60324SJosef Bacik if (data.got_token) 25984f60324SJosef Bacik cleanup_cb(rqw, private_data); 26084f60324SJosef Bacik break; 26184f60324SJosef Bacik } 26284f60324SJosef Bacik io_schedule(); 26384f60324SJosef Bacik has_sleeper = false; 26484f60324SJosef Bacik } while (1); 26584f60324SJosef Bacik finish_wait(&rqw->wait, &data.wq); 26684f60324SJosef Bacik } 26784f60324SJosef Bacik 268a7905043SJosef Bacik void rq_qos_exit(struct request_queue *q) 269a7905043SJosef Bacik { 270cc56694fSMing Lei blk_mq_debugfs_unregister_queue_rqos(q); 271cc56694fSMing Lei 272a7905043SJosef Bacik while (q->rq_qos) { 273a7905043SJosef Bacik struct rq_qos *rqos = q->rq_qos; 274a7905043SJosef Bacik q->rq_qos = rqos->next; 275a7905043SJosef Bacik rqos->ops->exit(rqos); 276a7905043SJosef Bacik } 277a7905043SJosef Bacik } 278