1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef INT_BLK_MQ_H 3 #define INT_BLK_MQ_H 4 5 #include <linux/blk-mq.h> 6 #include "blk-stat.h" 7 8 struct blk_mq_tag_set; 9 struct elevator_tags; 10 11 struct blk_mq_ctxs { 12 struct kobject kobj; 13 struct blk_mq_ctx __percpu *queue_ctx; 14 }; 15 16 /** 17 * struct blk_mq_ctx - State for a software queue facing the submitting CPUs 18 */ 19 struct blk_mq_ctx { 20 struct { 21 spinlock_t lock; 22 struct list_head rq_lists[HCTX_MAX_TYPES]; 23 } ____cacheline_aligned_in_smp; 24 25 unsigned int cpu; 26 unsigned short index_hw[HCTX_MAX_TYPES]; 27 struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; 28 29 struct request_queue *queue; 30 struct blk_mq_ctxs *ctxs; 31 struct kobject kobj; 32 } ____cacheline_aligned_in_smp; 33 34 enum { 35 BLK_MQ_NO_TAG = -1U, 36 BLK_MQ_TAG_MIN = 1, 37 BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, 38 }; 39 40 #define BLK_MQ_CPU_WORK_BATCH (8) 41 42 typedef unsigned int __bitwise blk_insert_t; 43 #define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01) 44 45 void blk_mq_submit_bio(struct bio *bio); 46 int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, 47 unsigned int flags); 48 void blk_mq_exit_queue(struct request_queue *q); 49 struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q, 50 struct elevator_tags *tags, 51 unsigned int nr); 52 void blk_mq_wake_waiters(struct request_queue *q); 53 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *, 54 bool); 55 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); 56 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, 57 struct blk_mq_ctx *start); 58 void blk_mq_put_rq_ref(struct request *rq); 59 60 /* 61 * Internal helpers for allocating/freeing the request map 62 */ 63 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, 64 unsigned int hctx_idx); 65 void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags); 66 struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, 67 unsigned int hctx_idx, unsigned int depth); 68 void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, 69 struct blk_mq_tags *tags, 70 unsigned int hctx_idx); 71 72 /* 73 * CPU -> queue mappings 74 */ 75 extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); 76 77 /* 78 * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue 79 * @q: request queue 80 * @type: the hctx type index 81 * @cpu: CPU 82 */ 83 static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, 84 enum hctx_type type, 85 unsigned int cpu) 86 { 87 return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]); 88 } 89 90 static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf) 91 { 92 enum hctx_type type = HCTX_TYPE_DEFAULT; 93 94 /* 95 * The caller ensure that if REQ_POLLED, poll must be enabled. 96 */ 97 if (opf & REQ_POLLED) 98 type = HCTX_TYPE_POLL; 99 else if ((opf & REQ_OP_MASK) == REQ_OP_READ) 100 type = HCTX_TYPE_READ; 101 return type; 102 } 103 104 /* 105 * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue 106 * @opf: operation type (REQ_OP_*) and flags (e.g. REQ_POLLED). 107 * @ctx: software queue cpu ctx 108 */ 109 static inline struct blk_mq_hw_ctx *blk_mq_map_queue(blk_opf_t opf, 110 struct blk_mq_ctx *ctx) 111 { 112 return ctx->hctxs[blk_mq_get_hctx_type(opf)]; 113 } 114 115 /* 116 * Default to double of smaller one between hw queue_depth and 117 * 128, since we don't split into sync/async like the old code 118 * did. Additionally, this is a per-hw queue depth. 119 */ 120 static inline unsigned int blk_mq_default_nr_requests( 121 struct blk_mq_tag_set *set) 122 { 123 return 2 * min_t(unsigned int, set->queue_depth, BLKDEV_DEFAULT_RQ); 124 } 125 126 /* 127 * sysfs helpers 128 */ 129 extern void blk_mq_sysfs_init(struct request_queue *q); 130 extern void blk_mq_sysfs_deinit(struct request_queue *q); 131 int blk_mq_sysfs_register(struct gendisk *disk); 132 void blk_mq_sysfs_unregister(struct gendisk *disk); 133 int blk_mq_sysfs_register_hctxs(struct request_queue *q); 134 void blk_mq_sysfs_unregister_hctxs(struct request_queue *q); 135 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); 136 void blk_mq_free_plug_rqs(struct blk_plug *plug); 137 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); 138 139 void blk_mq_cancel_work_sync(struct request_queue *q); 140 141 void blk_mq_release(struct request_queue *q); 142 143 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, 144 unsigned int cpu) 145 { 146 return per_cpu_ptr(q->queue_ctx, cpu); 147 } 148 149 /* 150 * This assumes per-cpu software queueing queues. They could be per-node 151 * as well, for instance. For now this is hardcoded as-is. Note that we don't 152 * care about preemption, since we know the ctx's are persistent. This does 153 * mean that we can't rely on ctx always matching the currently running CPU. 154 */ 155 static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) 156 { 157 return __blk_mq_get_ctx(q, raw_smp_processor_id()); 158 } 159 160 struct blk_mq_alloc_data { 161 /* input parameter */ 162 struct request_queue *q; 163 blk_mq_req_flags_t flags; 164 unsigned int shallow_depth; 165 blk_opf_t cmd_flags; 166 req_flags_t rq_flags; 167 168 /* allocate multiple requests/tags in one go */ 169 unsigned int nr_tags; 170 struct rq_list *cached_rqs; 171 172 /* input & output parameter */ 173 struct blk_mq_ctx *ctx; 174 struct blk_mq_hw_ctx *hctx; 175 }; 176 177 struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, 178 unsigned int reserved_tags, unsigned int flags, int node); 179 void blk_mq_free_tags(struct blk_mq_tag_set *set, struct blk_mq_tags *tags); 180 181 unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); 182 unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags, 183 unsigned int *offset); 184 void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, 185 unsigned int tag); 186 void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); 187 void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, 188 unsigned int size); 189 void blk_mq_tag_update_sched_shared_tags(struct request_queue *q, 190 unsigned int nr); 191 192 void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); 193 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, 194 void *priv); 195 void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, 196 void *priv); 197 198 static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt, 199 struct blk_mq_hw_ctx *hctx) 200 { 201 if (!hctx) 202 return &bt->ws[0]; 203 return sbq_wait_ptr(bt, &hctx->wait_index); 204 } 205 206 void __blk_mq_tag_busy(struct blk_mq_hw_ctx *); 207 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); 208 209 static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) 210 { 211 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 212 __blk_mq_tag_busy(hctx); 213 } 214 215 static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) 216 { 217 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 218 __blk_mq_tag_idle(hctx); 219 } 220 221 static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, 222 unsigned int tag) 223 { 224 return tag < tags->nr_reserved_tags; 225 } 226 227 static inline bool blk_mq_is_shared_tags(unsigned int flags) 228 { 229 return flags & BLK_MQ_F_TAG_HCTX_SHARED; 230 } 231 232 static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) 233 { 234 if (data->rq_flags & RQF_SCHED_TAGS) 235 return data->hctx->sched_tags; 236 return data->hctx->tags; 237 } 238 239 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) 240 { 241 /* Fast path: hardware queue is not stopped most of the time. */ 242 if (likely(!test_bit(BLK_MQ_S_STOPPED, &hctx->state))) 243 return false; 244 245 /* 246 * This barrier is used to order adding of dispatch list before and 247 * the test of BLK_MQ_S_STOPPED below. Pairs with the memory barrier 248 * in blk_mq_start_stopped_hw_queue() so that dispatch code could 249 * either see BLK_MQ_S_STOPPED is cleared or dispatch list is not 250 * empty to avoid missing dispatching requests. 251 */ 252 smp_mb(); 253 254 return test_bit(BLK_MQ_S_STOPPED, &hctx->state); 255 } 256 257 static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) 258 { 259 return hctx->nr_ctx && hctx->tags; 260 } 261 262 void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2]); 263 264 static inline void blk_mq_put_dispatch_budget(struct request_queue *q, 265 int budget_token) 266 { 267 if (q->mq_ops->put_budget) 268 q->mq_ops->put_budget(q, budget_token); 269 } 270 271 static inline int blk_mq_get_dispatch_budget(struct request_queue *q) 272 { 273 if (q->mq_ops->get_budget) 274 return q->mq_ops->get_budget(q); 275 return 0; 276 } 277 278 static inline void blk_mq_set_rq_budget_token(struct request *rq, int token) 279 { 280 if (token < 0) 281 return; 282 283 if (rq->q->mq_ops->set_rq_budget_token) 284 rq->q->mq_ops->set_rq_budget_token(rq, token); 285 } 286 287 static inline int blk_mq_get_rq_budget_token(struct request *rq) 288 { 289 if (rq->q->mq_ops->get_rq_budget_token) 290 return rq->q->mq_ops->get_rq_budget_token(rq); 291 return -1; 292 } 293 294 static inline void __blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx, 295 int val) 296 { 297 if (blk_mq_is_shared_tags(hctx->flags)) 298 atomic_add(val, &hctx->queue->nr_active_requests_shared_tags); 299 else 300 atomic_add(val, &hctx->nr_active); 301 } 302 303 static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) 304 { 305 __blk_mq_add_active_requests(hctx, 1); 306 } 307 308 static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, 309 int val) 310 { 311 if (blk_mq_is_shared_tags(hctx->flags)) 312 atomic_sub(val, &hctx->queue->nr_active_requests_shared_tags); 313 else 314 atomic_sub(val, &hctx->nr_active); 315 } 316 317 static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) 318 { 319 __blk_mq_sub_active_requests(hctx, 1); 320 } 321 322 static inline void blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx, 323 int val) 324 { 325 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 326 __blk_mq_add_active_requests(hctx, val); 327 } 328 329 static inline void blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) 330 { 331 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 332 __blk_mq_inc_active_requests(hctx); 333 } 334 335 static inline void blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, 336 int val) 337 { 338 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 339 __blk_mq_sub_active_requests(hctx, val); 340 } 341 342 static inline void blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) 343 { 344 if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) 345 __blk_mq_dec_active_requests(hctx); 346 } 347 348 static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) 349 { 350 if (blk_mq_is_shared_tags(hctx->flags)) 351 return atomic_read(&hctx->queue->nr_active_requests_shared_tags); 352 return atomic_read(&hctx->nr_active); 353 } 354 static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, 355 struct request *rq) 356 { 357 blk_mq_dec_active_requests(hctx); 358 blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); 359 rq->tag = BLK_MQ_NO_TAG; 360 } 361 362 static inline void blk_mq_put_driver_tag(struct request *rq) 363 { 364 if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG) 365 return; 366 367 __blk_mq_put_driver_tag(rq->mq_hctx, rq); 368 } 369 370 bool __blk_mq_alloc_driver_tag(struct request *rq); 371 372 static inline bool blk_mq_get_driver_tag(struct request *rq) 373 { 374 if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) 375 return false; 376 377 return true; 378 } 379 380 static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) 381 { 382 int cpu; 383 384 for_each_possible_cpu(cpu) 385 qmap->mq_map[cpu] = 0; 386 } 387 388 /* Free all requests on the list */ 389 static inline void blk_mq_free_requests(struct list_head *list) 390 { 391 while (!list_empty(list)) { 392 struct request *rq = list_entry_rq(list->next); 393 394 list_del_init(&rq->queuelist); 395 blk_mq_free_request(rq); 396 } 397 } 398 399 /* 400 * For shared tag users, we track the number of currently active users 401 * and attempt to provide a fair share of the tag depth for each of them. 402 */ 403 static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, 404 struct sbitmap_queue *bt) 405 { 406 unsigned int depth, users; 407 408 if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) 409 return true; 410 411 /* 412 * Don't try dividing an ant 413 */ 414 if (bt->sb.depth == 1) 415 return true; 416 417 if (blk_mq_is_shared_tags(hctx->flags)) { 418 struct request_queue *q = hctx->queue; 419 420 if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) 421 return true; 422 } else { 423 if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) 424 return true; 425 } 426 427 users = READ_ONCE(hctx->tags->active_queues); 428 if (!users) 429 return true; 430 431 /* 432 * Allow at least some tags 433 */ 434 depth = max((bt->sb.depth + users - 1) / users, 4U); 435 return __blk_mq_active_requests(hctx) < depth; 436 } 437 438 /* run the code block in @dispatch_ops with rcu/srcu read lock held */ 439 #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \ 440 do { \ 441 if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) { \ 442 struct blk_mq_tag_set *__tag_set = (q)->tag_set; \ 443 int srcu_idx; \ 444 \ 445 might_sleep_if(check_sleep); \ 446 srcu_idx = srcu_read_lock(__tag_set->srcu); \ 447 (dispatch_ops); \ 448 srcu_read_unlock(__tag_set->srcu, srcu_idx); \ 449 } else { \ 450 rcu_read_lock(); \ 451 (dispatch_ops); \ 452 rcu_read_unlock(); \ 453 } \ 454 } while (0) 455 456 #define blk_mq_run_dispatch_ops(q, dispatch_ops) \ 457 __blk_mq_run_dispatch_ops(q, true, dispatch_ops) \ 458 459 static inline bool blk_mq_can_poll(struct request_queue *q) 460 { 461 return (q->limits.features & BLK_FEAT_POLL) && 462 q->tag_set->map[HCTX_TYPE_POLL].nr_queues; 463 } 464 465 #endif 466