13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 23dcf60bcSChristoph Hellwig 3a7905043SJosef Bacik #include "blk-rq-qos.h" 4a7905043SJosef Bacik 5a7905043SJosef Bacik /* 6a7905043SJosef Bacik * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, 7a7905043SJosef Bacik * false if 'v' + 1 would be bigger than 'below'. 8a7905043SJosef Bacik */ 922f17952SJosef Bacik static bool atomic_inc_below(atomic_t *v, unsigned int below) 10a7905043SJosef Bacik { 1122f17952SJosef Bacik unsigned int cur = atomic_read(v); 12a7905043SJosef Bacik 13f4b1e27dSUros Bizjak do { 14a7905043SJosef Bacik if (cur >= below) 15a7905043SJosef Bacik return false; 16f4b1e27dSUros Bizjak } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); 17a7905043SJosef Bacik 18a7905043SJosef Bacik return true; 19a7905043SJosef Bacik } 20a7905043SJosef Bacik 2122f17952SJosef Bacik bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) 22a7905043SJosef Bacik { 23a7905043SJosef Bacik return atomic_inc_below(&rq_wait->inflight, limit); 24a7905043SJosef Bacik } 25a7905043SJosef Bacik 26e5045454SJens Axboe void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) 27a7905043SJosef Bacik { 28e5045454SJens Axboe do { 29a7905043SJosef Bacik if (rqos->ops->cleanup) 30c1c80384SJosef Bacik rqos->ops->cleanup(rqos, bio); 31e5045454SJens Axboe rqos = rqos->next; 32e5045454SJens Axboe } while (rqos); 33a7905043SJosef Bacik } 34a7905043SJosef Bacik 35e5045454SJens Axboe void __rq_qos_done(struct rq_qos *rqos, struct request *rq) 36a7905043SJosef Bacik { 37e5045454SJens Axboe do { 38a7905043SJosef Bacik if (rqos->ops->done) 39a7905043SJosef Bacik rqos->ops->done(rqos, rq); 40e5045454SJens Axboe rqos = rqos->next; 41e5045454SJens Axboe } while (rqos); 42a7905043SJosef Bacik } 43a7905043SJosef Bacik 44e5045454SJens Axboe void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) 45a7905043SJosef Bacik { 46e5045454SJens Axboe do { 47a7905043SJosef Bacik if (rqos->ops->issue) 48a7905043SJosef Bacik rqos->ops->issue(rqos, rq); 49e5045454SJens Axboe rqos = rqos->next; 50e5045454SJens Axboe } while (rqos); 51a7905043SJosef Bacik } 52a7905043SJosef Bacik 53e5045454SJens Axboe void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) 54a7905043SJosef Bacik { 55e5045454SJens Axboe do { 56a7905043SJosef Bacik if (rqos->ops->requeue) 57a7905043SJosef Bacik rqos->ops->requeue(rqos, rq); 58e5045454SJens Axboe rqos = rqos->next; 59e5045454SJens Axboe } while (rqos); 60a7905043SJosef Bacik } 61a7905043SJosef Bacik 62e5045454SJens Axboe void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) 63a7905043SJosef Bacik { 64e5045454SJens Axboe do { 65a7905043SJosef Bacik if (rqos->ops->throttle) 66d5337560SChristoph Hellwig rqos->ops->throttle(rqos, bio); 67e5045454SJens Axboe rqos = rqos->next; 68e5045454SJens Axboe } while (rqos); 69c1c80384SJosef Bacik } 70c1c80384SJosef Bacik 71e5045454SJens Axboe void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) 72c1c80384SJosef Bacik { 73e5045454SJens Axboe do { 74c1c80384SJosef Bacik if (rqos->ops->track) 75c1c80384SJosef Bacik rqos->ops->track(rqos, rq, bio); 76e5045454SJens Axboe rqos = rqos->next; 77e5045454SJens Axboe } while (rqos); 78a7905043SJosef Bacik } 79a7905043SJosef Bacik 80d3e65fffSTejun Heo void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) 81d3e65fffSTejun Heo { 82d3e65fffSTejun Heo do { 83d3e65fffSTejun Heo if (rqos->ops->merge) 84d3e65fffSTejun Heo rqos->ops->merge(rqos, rq, bio); 85d3e65fffSTejun Heo rqos = rqos->next; 86d3e65fffSTejun Heo } while (rqos); 87d3e65fffSTejun Heo } 88d3e65fffSTejun Heo 89e5045454SJens Axboe void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) 9067b42d0bSJosef Bacik { 91e5045454SJens Axboe do { 9267b42d0bSJosef Bacik if (rqos->ops->done_bio) 9367b42d0bSJosef Bacik rqos->ops->done_bio(rqos, bio); 94e5045454SJens Axboe rqos = rqos->next; 95e5045454SJens Axboe } while (rqos); 9667b42d0bSJosef Bacik } 9767b42d0bSJosef Bacik 989677a3e0STejun Heo void __rq_qos_queue_depth_changed(struct rq_qos *rqos) 999677a3e0STejun Heo { 1009677a3e0STejun Heo do { 1019677a3e0STejun Heo if (rqos->ops->queue_depth_changed) 1029677a3e0STejun Heo rqos->ops->queue_depth_changed(rqos); 1039677a3e0STejun Heo rqos = rqos->next; 1049677a3e0STejun Heo } while (rqos); 1059677a3e0STejun Heo } 1069677a3e0STejun Heo 107a7905043SJosef Bacik /* 108a7905043SJosef Bacik * Return true, if we can't increase the depth further by scaling 109a7905043SJosef Bacik */ 110a7905043SJosef Bacik bool rq_depth_calc_max_depth(struct rq_depth *rqd) 111a7905043SJosef Bacik { 112a7905043SJosef Bacik unsigned int depth; 113a7905043SJosef Bacik bool ret = false; 114a7905043SJosef Bacik 115a7905043SJosef Bacik /* 116a7905043SJosef Bacik * For QD=1 devices, this is a special case. It's important for those 117a7905043SJosef Bacik * to have one request ready when one completes, so force a depth of 118a7905043SJosef Bacik * 2 for those devices. On the backend, it'll be a depth of 1 anyway, 119a7905043SJosef Bacik * since the device can't have more than that in flight. If we're 120a7905043SJosef Bacik * scaling down, then keep a setting of 1/1/1. 121a7905043SJosef Bacik */ 122a7905043SJosef Bacik if (rqd->queue_depth == 1) { 123a7905043SJosef Bacik if (rqd->scale_step > 0) 124a7905043SJosef Bacik rqd->max_depth = 1; 125a7905043SJosef Bacik else { 126a7905043SJosef Bacik rqd->max_depth = 2; 127a7905043SJosef Bacik ret = true; 128a7905043SJosef Bacik } 129a7905043SJosef Bacik } else { 130a7905043SJosef Bacik /* 131a7905043SJosef Bacik * scale_step == 0 is our default state. If we have suffered 132a7905043SJosef Bacik * latency spikes, step will be > 0, and we shrink the 133a7905043SJosef Bacik * allowed write depths. If step is < 0, we're only doing 134a7905043SJosef Bacik * writes, and we allow a temporarily higher depth to 135a7905043SJosef Bacik * increase performance. 136a7905043SJosef Bacik */ 137a7905043SJosef Bacik depth = min_t(unsigned int, rqd->default_depth, 138a7905043SJosef Bacik rqd->queue_depth); 139a7905043SJosef Bacik if (rqd->scale_step > 0) 140a7905043SJosef Bacik depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); 141a7905043SJosef Bacik else if (rqd->scale_step < 0) { 142a7905043SJosef Bacik unsigned int maxd = 3 * rqd->queue_depth / 4; 143a7905043SJosef Bacik 144a7905043SJosef Bacik depth = 1 + ((depth - 1) << -rqd->scale_step); 145a7905043SJosef Bacik if (depth > maxd) { 146a7905043SJosef Bacik depth = maxd; 147a7905043SJosef Bacik ret = true; 148a7905043SJosef Bacik } 149a7905043SJosef Bacik } 150a7905043SJosef Bacik 151a7905043SJosef Bacik rqd->max_depth = depth; 152a7905043SJosef Bacik } 153a7905043SJosef Bacik 154a7905043SJosef Bacik return ret; 155a7905043SJosef Bacik } 156a7905043SJosef Bacik 157b84477d3SHarshad Shirwadkar /* Returns true on success and false if scaling up wasn't possible */ 158b84477d3SHarshad Shirwadkar bool rq_depth_scale_up(struct rq_depth *rqd) 159a7905043SJosef Bacik { 160a7905043SJosef Bacik /* 161a7905043SJosef Bacik * Hit max in previous round, stop here 162a7905043SJosef Bacik */ 163a7905043SJosef Bacik if (rqd->scaled_max) 164b84477d3SHarshad Shirwadkar return false; 165a7905043SJosef Bacik 166a7905043SJosef Bacik rqd->scale_step--; 167a7905043SJosef Bacik 168a7905043SJosef Bacik rqd->scaled_max = rq_depth_calc_max_depth(rqd); 169b84477d3SHarshad Shirwadkar return true; 170a7905043SJosef Bacik } 171a7905043SJosef Bacik 172a7905043SJosef Bacik /* 173a7905043SJosef Bacik * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we 174b84477d3SHarshad Shirwadkar * had a latency violation. Returns true on success and returns false if 175b84477d3SHarshad Shirwadkar * scaling down wasn't possible. 176a7905043SJosef Bacik */ 177b84477d3SHarshad Shirwadkar bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) 178a7905043SJosef Bacik { 179a7905043SJosef Bacik /* 180a7905043SJosef Bacik * Stop scaling down when we've hit the limit. This also prevents 181a7905043SJosef Bacik * ->scale_step from going to crazy values, if the device can't 182a7905043SJosef Bacik * keep up. 183a7905043SJosef Bacik */ 184a7905043SJosef Bacik if (rqd->max_depth == 1) 185b84477d3SHarshad Shirwadkar return false; 186a7905043SJosef Bacik 187a7905043SJosef Bacik if (rqd->scale_step < 0 && hard_throttle) 188a7905043SJosef Bacik rqd->scale_step = 0; 189a7905043SJosef Bacik else 190a7905043SJosef Bacik rqd->scale_step++; 191a7905043SJosef Bacik 192a7905043SJosef Bacik rqd->scaled_max = false; 193a7905043SJosef Bacik rq_depth_calc_max_depth(rqd); 194b84477d3SHarshad Shirwadkar return true; 195a7905043SJosef Bacik } 196a7905043SJosef Bacik 19784f60324SJosef Bacik struct rq_qos_wait_data { 19884f60324SJosef Bacik struct wait_queue_entry wq; 19984f60324SJosef Bacik struct task_struct *task; 20084f60324SJosef Bacik struct rq_wait *rqw; 20184f60324SJosef Bacik acquire_inflight_cb_t *cb; 20284f60324SJosef Bacik void *private_data; 20384f60324SJosef Bacik bool got_token; 20484f60324SJosef Bacik }; 20584f60324SJosef Bacik 20684f60324SJosef Bacik static int rq_qos_wake_function(struct wait_queue_entry *curr, 20784f60324SJosef Bacik unsigned int mode, int wake_flags, void *key) 20884f60324SJosef Bacik { 20984f60324SJosef Bacik struct rq_qos_wait_data *data = container_of(curr, 21084f60324SJosef Bacik struct rq_qos_wait_data, 21184f60324SJosef Bacik wq); 21284f60324SJosef Bacik 21384f60324SJosef Bacik /* 21484f60324SJosef Bacik * If we fail to get a budget, return -1 to interrupt the wake up loop 21584f60324SJosef Bacik * in __wake_up_common. 21684f60324SJosef Bacik */ 21784f60324SJosef Bacik if (!data->cb(data->rqw, data->private_data)) 21884f60324SJosef Bacik return -1; 21984f60324SJosef Bacik 22084f60324SJosef Bacik data->got_token = true; 221ac38297fSJosef Bacik smp_wmb(); 22284f60324SJosef Bacik list_del_init(&curr->entry); 22384f60324SJosef Bacik wake_up_process(data->task); 22484f60324SJosef Bacik return 1; 22584f60324SJosef Bacik } 22684f60324SJosef Bacik 22784f60324SJosef Bacik /** 22884f60324SJosef Bacik * rq_qos_wait - throttle on a rqw if we need to 22983826a50SBart Van Assche * @rqw: rqw to throttle on 23083826a50SBart Van Assche * @private_data: caller provided specific data 23183826a50SBart Van Assche * @acquire_inflight_cb: inc the rqw->inflight counter if we can 23283826a50SBart Van Assche * @cleanup_cb: the callback to cleanup in case we race with a waker 23384f60324SJosef Bacik * 23484f60324SJosef Bacik * This provides a uniform place for the rq_qos users to do their throttling. 23584f60324SJosef Bacik * Since you can end up with a lot of things sleeping at once, this manages the 23684f60324SJosef Bacik * waking up based on the resources available. The acquire_inflight_cb should 23784f60324SJosef Bacik * inc the rqw->inflight if we have the ability to do so, or return false if not 23884f60324SJosef Bacik * and then we will sleep until the room becomes available. 23984f60324SJosef Bacik * 24084f60324SJosef Bacik * cleanup_cb is in case that we race with a waker and need to cleanup the 24184f60324SJosef Bacik * inflight count accordingly. 24284f60324SJosef Bacik */ 24384f60324SJosef Bacik void rq_qos_wait(struct rq_wait *rqw, void *private_data, 24484f60324SJosef Bacik acquire_inflight_cb_t *acquire_inflight_cb, 24584f60324SJosef Bacik cleanup_cb_t *cleanup_cb) 24684f60324SJosef Bacik { 24784f60324SJosef Bacik struct rq_qos_wait_data data = { 24884f60324SJosef Bacik .wq = { 24984f60324SJosef Bacik .func = rq_qos_wake_function, 25084f60324SJosef Bacik .entry = LIST_HEAD_INIT(data.wq.entry), 25184f60324SJosef Bacik }, 25284f60324SJosef Bacik .task = current, 25384f60324SJosef Bacik .rqw = rqw, 25484f60324SJosef Bacik .cb = acquire_inflight_cb, 25584f60324SJosef Bacik .private_data = private_data, 25684f60324SJosef Bacik }; 25784f60324SJosef Bacik bool has_sleeper; 25884f60324SJosef Bacik 25984f60324SJosef Bacik has_sleeper = wq_has_sleeper(&rqw->wait); 26084f60324SJosef Bacik if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) 26184f60324SJosef Bacik return; 26284f60324SJosef Bacik 26311c7aa0dSJan Kara has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, 26411c7aa0dSJan Kara TASK_UNINTERRUPTIBLE); 26584f60324SJosef Bacik do { 266ac38297fSJosef Bacik /* The memory barrier in set_task_state saves us here. */ 26784f60324SJosef Bacik if (data.got_token) 26884f60324SJosef Bacik break; 26984f60324SJosef Bacik if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { 27084f60324SJosef Bacik finish_wait(&rqw->wait, &data.wq); 27184f60324SJosef Bacik 27284f60324SJosef Bacik /* 27384f60324SJosef Bacik * We raced with wbt_wake_function() getting a token, 27484f60324SJosef Bacik * which means we now have two. Put our local token 27584f60324SJosef Bacik * and wake anyone else potentially waiting for one. 27684f60324SJosef Bacik */ 277ac38297fSJosef Bacik smp_rmb(); 27884f60324SJosef Bacik if (data.got_token) 27984f60324SJosef Bacik cleanup_cb(rqw, private_data); 28084f60324SJosef Bacik break; 28184f60324SJosef Bacik } 28284f60324SJosef Bacik io_schedule(); 28364e7ea87SJosef Bacik has_sleeper = true; 284d14a9b38SJosef Bacik set_current_state(TASK_UNINTERRUPTIBLE); 28584f60324SJosef Bacik } while (1); 28684f60324SJosef Bacik finish_wait(&rqw->wait, &data.wq); 28784f60324SJosef Bacik } 28884f60324SJosef Bacik 289a7905043SJosef Bacik void rq_qos_exit(struct request_queue *q) 290a7905043SJosef Bacik { 291a7905043SJosef Bacik while (q->rq_qos) { 292a7905043SJosef Bacik struct rq_qos *rqos = q->rq_qos; 293a7905043SJosef Bacik q->rq_qos = rqos->next; 294a7905043SJosef Bacik rqos->ops->exit(rqos); 295a7905043SJosef Bacik } 296a7905043SJosef Bacik } 297b494f9c5SChristoph Hellwig 298ce57b558SChristoph Hellwig int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, 2993963d84dSChristoph Hellwig const struct rq_qos_ops *ops) 300b494f9c5SChristoph Hellwig { 301ce57b558SChristoph Hellwig struct request_queue *q = disk->queue; 302ce57b558SChristoph Hellwig 303*ba91c849SChristoph Hellwig rqos->disk = disk; 304ce57b558SChristoph Hellwig rqos->id = id; 305ce57b558SChristoph Hellwig rqos->ops = ops; 306ce57b558SChristoph Hellwig 307b494f9c5SChristoph Hellwig /* 308b494f9c5SChristoph Hellwig * No IO can be in-flight when adding rqos, so freeze queue, which 309b494f9c5SChristoph Hellwig * is fine since we only support rq_qos for blk-mq queue. 310b494f9c5SChristoph Hellwig * 311b494f9c5SChristoph Hellwig * Reuse ->queue_lock for protecting against other concurrent 312b494f9c5SChristoph Hellwig * rq_qos adding/deleting 313b494f9c5SChristoph Hellwig */ 314b494f9c5SChristoph Hellwig blk_mq_freeze_queue(q); 315b494f9c5SChristoph Hellwig 316b494f9c5SChristoph Hellwig spin_lock_irq(&q->queue_lock); 317b494f9c5SChristoph Hellwig if (rq_qos_id(q, rqos->id)) 318b494f9c5SChristoph Hellwig goto ebusy; 319b494f9c5SChristoph Hellwig rqos->next = q->rq_qos; 320b494f9c5SChristoph Hellwig q->rq_qos = rqos; 321b494f9c5SChristoph Hellwig spin_unlock_irq(&q->queue_lock); 322b494f9c5SChristoph Hellwig 323b494f9c5SChristoph Hellwig blk_mq_unfreeze_queue(q); 324b494f9c5SChristoph Hellwig 325b494f9c5SChristoph Hellwig if (rqos->ops->debugfs_attrs) { 326b494f9c5SChristoph Hellwig mutex_lock(&q->debugfs_mutex); 327b494f9c5SChristoph Hellwig blk_mq_debugfs_register_rqos(rqos); 328b494f9c5SChristoph Hellwig mutex_unlock(&q->debugfs_mutex); 329b494f9c5SChristoph Hellwig } 330b494f9c5SChristoph Hellwig 331b494f9c5SChristoph Hellwig return 0; 332b494f9c5SChristoph Hellwig ebusy: 333b494f9c5SChristoph Hellwig spin_unlock_irq(&q->queue_lock); 334b494f9c5SChristoph Hellwig blk_mq_unfreeze_queue(q); 335b494f9c5SChristoph Hellwig return -EBUSY; 336b494f9c5SChristoph Hellwig } 337b494f9c5SChristoph Hellwig 338ce57b558SChristoph Hellwig void rq_qos_del(struct rq_qos *rqos) 339b494f9c5SChristoph Hellwig { 340*ba91c849SChristoph Hellwig struct request_queue *q = rqos->disk->queue; 341b494f9c5SChristoph Hellwig struct rq_qos **cur; 342b494f9c5SChristoph Hellwig 343b494f9c5SChristoph Hellwig /* 344b494f9c5SChristoph Hellwig * See comment in rq_qos_add() about freezing queue & using 345b494f9c5SChristoph Hellwig * ->queue_lock. 346b494f9c5SChristoph Hellwig */ 347b494f9c5SChristoph Hellwig blk_mq_freeze_queue(q); 348b494f9c5SChristoph Hellwig 349b494f9c5SChristoph Hellwig spin_lock_irq(&q->queue_lock); 350b494f9c5SChristoph Hellwig for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { 351b494f9c5SChristoph Hellwig if (*cur == rqos) { 352b494f9c5SChristoph Hellwig *cur = rqos->next; 353b494f9c5SChristoph Hellwig break; 354b494f9c5SChristoph Hellwig } 355b494f9c5SChristoph Hellwig } 356b494f9c5SChristoph Hellwig spin_unlock_irq(&q->queue_lock); 357b494f9c5SChristoph Hellwig 358b494f9c5SChristoph Hellwig blk_mq_unfreeze_queue(q); 359b494f9c5SChristoph Hellwig 360b494f9c5SChristoph Hellwig mutex_lock(&q->debugfs_mutex); 361b494f9c5SChristoph Hellwig blk_mq_debugfs_unregister_rqos(rqos); 362b494f9c5SChristoph Hellwig mutex_unlock(&q->debugfs_mutex); 363b494f9c5SChristoph Hellwig } 364