1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef INT_BLK_MQ_H 3 #define INT_BLK_MQ_H 4 5 #include <linux/blk-mq.h> 6 #include "blk-stat.h" 7 8 struct blk_mq_tag_set; 9 struct elevator_tags; 10 11 struct blk_mq_ctxs { 12 struct kobject kobj; 13 struct blk_mq_ctx __percpu *queue_ctx; 14 }; 15 16 /** 17 * struct blk_mq_ctx - State for a software queue facing the submitting CPUs 18 */ 19 struct blk_mq_ctx { 20 struct { 21 spinlock_t lock; 22 struct list_head rq_lists[HCTX_MAX_TYPES]; 23 } ____cacheline_aligned_in_smp; 24 25 unsigned int cpu; 26 unsigned short index_hw[HCTX_MAX_TYPES]; 27 struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; 28 29 struct request_queue *queue; 30 struct blk_mq_ctxs *ctxs; 31 struct kobject kobj; 32 } ____cacheline_aligned_in_smp; 33 34 enum { 35 BLK_MQ_NO_TAG = -1U, 36 BLK_MQ_TAG_MIN = 1, 37 BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, 38 }; 39 40 #define BLK_MQ_CPU_WORK_BATCH (8) 41 42 typedef unsigned int __bitwise blk_insert_t; 43 #define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01) 44 45 void blk_mq_submit_bio(struct bio *bio); 46 int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, 47 unsigned int flags); 48 void blk_mq_exit_queue(struct request_queue *q); 49 struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q, 50 struct elevator_tags *tags, 51 unsigned int nr); 52 void blk_mq_wake_waiters(struct request_queue *q); 53 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *, 54 bool); 55 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); 56 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, 57 struct blk_mq_ctx *start); 58 void blk_mq_put_rq_ref(struct request *rq); 59 60 /* 61 * Internal helpers for allocating/freeing the request map 62 */ 63 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, 64 unsigned int hctx_idx); 65 void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags); 66 struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, 67 unsigned int hctx_idx, unsigned int depth); 68 void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, 69 struct blk_mq_tags *tags, 70 unsigned int hctx_idx); 71 72 /* 73 * CPU -> queue mappings 74 */ 75 extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); 76 77 /* 78 * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue 79 * @q: request queue 80 * @type: the hctx type index 81 * @cpu: CPU 82 */ 83 static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, 84 enum hctx_type type, 85 unsigned int cpu) 86 { 87 return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]); 88 } 89 90 static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf) 91 { 92 enum hctx_type type = HCTX_TYPE_DEFAULT; 93 94 /* 95 * The caller ensure that if REQ_POLLED, poll must be enabled. 96 */ 97 if (opf & REQ_POLLED) 98 type = HCTX_TYPE_POLL; 99 else if ((opf & REQ_OP_MASK) == REQ_OP_READ) 100 type = HCTX_TYPE_READ; 101 return type; 102 } 103 104 /* 105 * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue 106 * @opf: operation type (REQ_OP_*) and flags (e.g. REQ_POLLED). 107 * @ctx: software queue cpu ctx 108 */ 109 static inline struct blk_mq_hw_ctx *blk_mq_map_queue(blk_opf_t opf, 110 struct blk_mq_ctx *ctx) 111 { 112 return ctx->hctxs[blk_mq_get_hctx_type(opf)]; 113 } 114 115 /* 116 * Default to double of smaller one between hw queue_depth and 117 * 128, since we don't split into sync/async like the old code 118 * did. Additionally, this is a per-hw queue depth. 119 */ 120 static inline unsigned int blk_mq_default_nr_requests( 121 struct blk_mq_tag_set *set) 122 { 123 return 2 * min_t(unsigned int, set->queue_depth, BLKDEV_DEFAULT_RQ); 124 } 125 126 /* 127 * sysfs helpers 128 */ 129 extern void blk_mq_sysfs_init(struct request_queue *q); 130 extern void blk_mq_sysfs_deinit(struct request_queue *q); 131 int blk_mq_sysfs_register(struct gendisk *disk); 132 void blk_mq_sysfs_unregister(struct gendisk *disk); 133 int blk_mq_sysfs_register_hctxs(struct request_queue *q); 134 void blk_mq_sysfs_unregister_hctxs(struct request_queue *q); 135 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); 136 void blk_mq_free_plug_rqs(struct blk_plug *plug); 137 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); 138 139 void blk_mq_cancel_work_sync(struct request_queue *q); 140 141 void blk_mq_release(struct request_queue *q); 142 143 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, 144 unsigned int cpu) 145 { 146 return per_cpu_ptr(q->queue_ctx, cpu); 147 } 148 149 /* 150 * This assumes per-cpu software queueing queues. They could be per-node 151 * as well, for instance. For now this is hardcoded as-is. Note that we don't 152 * care about preemption, since we know the ctx's are persistent. This does 153 * mean that we can't rely on ctx always matching the currently running CPU. 154 */ 155 static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) 156 { 157 return __blk_mq_get_ctx(q, raw_smp_processor_id()); 158 } 159 160 struct blk_mq_alloc_data { 161 /* input parameter */ 162 struct request_queue *q; 163 blk_mq_req_flags_t flags; 164 unsigned int shallow_depth; 165 blk_opf_t cmd_flags; 166 req_flags_t rq_flags; 167 168 /* allocate multiple requests/tags in one go */ 169 unsigned int nr_tags; 170 struct rq_list *cached_rqs; 171 172 /* input & output parameter */ 173 struct blk_mq_ctx *ctx; 174 struct blk_mq_hw_ctx *hctx; 175 }; 176 177 struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, 178 unsigned int reserved_tags, unsigned int flags, int node); 179 void blk_mq_free_tags(struct blk_mq_tag_set *set, struct blk_mq_tags *tags); 180 181 unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); 182 unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags, 183 unsigned int *offset); 184 void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, 185 unsigned int tag); 186 void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); 187 void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, 188 unsigned int size); 189 void blk_mq_tag_update_sched_shared_tags(struct request_queue *q); 190 191 void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); 192 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, 193 void *priv); 194 void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, 195 void *priv); 196 197 static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt, 198 struct blk_mq_hw_ctx *hctx) 199 { 200 if (!hctx) 201 return &bt->ws[0]; 202 return sbq_wait_ptr(bt, &hctx->wait_index); 203 } 204 205 void __blk_mq_tag_busy(struct blk_mq_hw_ctx *); 206 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); 207 208 static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) 209 { 210 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 211 __blk_mq_tag_busy(hctx); 212 } 213 214 static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) 215 { 216 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 217 __blk_mq_tag_idle(hctx); 218 } 219 220 static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, 221 unsigned int tag) 222 { 223 return tag < tags->nr_reserved_tags; 224 } 225 226 static inline bool blk_mq_is_shared_tags(unsigned int flags) 227 { 228 return flags & BLK_MQ_F_TAG_HCTX_SHARED; 229 } 230 231 static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) 232 { 233 if (data->rq_flags & RQF_SCHED_TAGS) 234 return data->hctx->sched_tags; 235 return data->hctx->tags; 236 } 237 238 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) 239 { 240 /* Fast path: hardware queue is not stopped most of the time. */ 241 if (likely(!test_bit(BLK_MQ_S_STOPPED, &hctx->state))) 242 return false; 243 244 /* 245 * This barrier is used to order adding of dispatch list before and 246 * the test of BLK_MQ_S_STOPPED below. Pairs with the memory barrier 247 * in blk_mq_start_stopped_hw_queue() so that dispatch code could 248 * either see BLK_MQ_S_STOPPED is cleared or dispatch list is not 249 * empty to avoid missing dispatching requests. 250 */ 251 smp_mb(); 252 253 return test_bit(BLK_MQ_S_STOPPED, &hctx->state); 254 } 255 256 static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) 257 { 258 return hctx->nr_ctx && hctx->tags; 259 } 260 261 void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2]); 262 263 static inline void blk_mq_put_dispatch_budget(struct request_queue *q, 264 int budget_token) 265 { 266 if (q->mq_ops->put_budget) 267 q->mq_ops->put_budget(q, budget_token); 268 } 269 270 static inline int blk_mq_get_dispatch_budget(struct request_queue *q) 271 { 272 if (q->mq_ops->get_budget) 273 return q->mq_ops->get_budget(q); 274 return 0; 275 } 276 277 static inline void blk_mq_set_rq_budget_token(struct request *rq, int token) 278 { 279 if (token < 0) 280 return; 281 282 if (rq->q->mq_ops->set_rq_budget_token) 283 rq->q->mq_ops->set_rq_budget_token(rq, token); 284 } 285 286 static inline int blk_mq_get_rq_budget_token(struct request *rq) 287 { 288 if (rq->q->mq_ops->get_rq_budget_token) 289 return rq->q->mq_ops->get_rq_budget_token(rq); 290 return -1; 291 } 292 293 static inline void __blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx, 294 int val) 295 { 296 if (blk_mq_is_shared_tags(hctx->flags)) 297 atomic_add(val, &hctx->queue->nr_active_requests_shared_tags); 298 else 299 atomic_add(val, &hctx->nr_active); 300 } 301 302 static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) 303 { 304 __blk_mq_add_active_requests(hctx, 1); 305 } 306 307 static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, 308 int val) 309 { 310 if (blk_mq_is_shared_tags(hctx->flags)) 311 atomic_sub(val, &hctx->queue->nr_active_requests_shared_tags); 312 else 313 atomic_sub(val, &hctx->nr_active); 314 } 315 316 static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) 317 { 318 __blk_mq_sub_active_requests(hctx, 1); 319 } 320 321 static inline void blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx, 322 int val) 323 { 324 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 325 __blk_mq_add_active_requests(hctx, val); 326 } 327 328 static inline void blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) 329 { 330 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 331 __blk_mq_inc_active_requests(hctx); 332 } 333 334 static inline void blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, 335 int val) 336 { 337 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 338 __blk_mq_sub_active_requests(hctx, val); 339 } 340 341 static inline void blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) 342 { 343 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 344 __blk_mq_dec_active_requests(hctx); 345 } 346 347 static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) 348 { 349 if (blk_mq_is_shared_tags(hctx->flags)) 350 return atomic_read(&hctx->queue->nr_active_requests_shared_tags); 351 return atomic_read(&hctx->nr_active); 352 } 353 static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, 354 struct request *rq) 355 { 356 blk_mq_dec_active_requests(hctx); 357 blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); 358 rq->tag = BLK_MQ_NO_TAG; 359 } 360 361 static inline void blk_mq_put_driver_tag(struct request *rq) 362 { 363 if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG) 364 return; 365 366 __blk_mq_put_driver_tag(rq->mq_hctx, rq); 367 } 368 369 bool __blk_mq_alloc_driver_tag(struct request *rq); 370 371 static inline bool blk_mq_get_driver_tag(struct request *rq) 372 { 373 if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) 374 return false; 375 376 return true; 377 } 378 379 static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) 380 { 381 int cpu; 382 383 for_each_possible_cpu(cpu) 384 qmap->mq_map[cpu] = 0; 385 } 386 387 /* Free all requests on the list */ 388 static inline void blk_mq_free_requests(struct list_head *list) 389 { 390 while (!list_empty(list)) { 391 struct request *rq = list_entry_rq(list->next); 392 393 list_del_init(&rq->queuelist); 394 blk_mq_free_request(rq); 395 } 396 } 397 398 /* 399 * For shared tag users, we track the number of currently active users 400 * and attempt to provide a fair share of the tag depth for each of them. 401 */ 402 static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, 403 struct sbitmap_queue *bt) 404 { 405 unsigned int depth, users; 406 407 if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) 408 return true; 409 410 /* 411 * Don't try dividing an ant 412 */ 413 if (bt->sb.depth == 1) 414 return true; 415 416 if (blk_mq_is_shared_tags(hctx->flags)) { 417 struct request_queue *q = hctx->queue; 418 419 if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) 420 return true; 421 } else { 422 if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) 423 return true; 424 } 425 426 users = READ_ONCE(hctx->tags->active_queues); 427 if (!users) 428 return true; 429 430 /* 431 * Allow at least some tags 432 */ 433 depth = max((bt->sb.depth + users - 1) / users, 4U); 434 return __blk_mq_active_requests(hctx) < depth; 435 } 436 437 /* run the code block in @dispatch_ops with rcu/srcu read lock held */ 438 #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \ 439 do { \ 440 if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) { \ 441 struct blk_mq_tag_set *__tag_set = (q)->tag_set; \ 442 int srcu_idx; \ 443 \ 444 might_sleep_if(check_sleep); \ 445 srcu_idx = srcu_read_lock(__tag_set->srcu); \ 446 (dispatch_ops); \ 447 srcu_read_unlock(__tag_set->srcu, srcu_idx); \ 448 } else { \ 449 rcu_read_lock(); \ 450 (dispatch_ops); \ 451 rcu_read_unlock(); \ 452 } \ 453 } while (0) 454 455 #define blk_mq_run_dispatch_ops(q, dispatch_ops) \ 456 __blk_mq_run_dispatch_ops(q, true, dispatch_ops) \ 457 458 static inline bool blk_mq_can_poll(struct request_queue *q) 459 { 460 return (q->limits.features & BLK_FEAT_POLL) && 461 q->tag_set->map[HCTX_TYPE_POLL].nr_queues; 462 } 463 464 #endif 465