1 /* 2 * Block device elevator/IO-scheduler. 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * 6 * 30042000 Jens Axboe <axboe@kernel.dk> : 7 * 8 * Split the elevator a bit so that it is possible to choose a different 9 * one or even write a new "plug in". There are three pieces: 10 * - elevator_fn, inserts a new request in the queue list 11 * - elevator_merge_fn, decides whether a new buffer can be merged with 12 * an existing request 13 * - elevator_dequeue_fn, called when a request is taken off the active list 14 * 15 * 20082000 Dave Jones <davej@suse.de> : 16 * Removed tests for max-bomb-segments, which was breaking elvtune 17 * when run without -bN 18 * 19 * Jens: 20 * - Rework again to work with bio instead of buffer_heads 21 * - loose bi_dev comparisons, partition handling is right now 22 * - completely modularize elevator setup and teardown 23 * 24 */ 25 #include <linux/kernel.h> 26 #include <linux/fs.h> 27 #include <linux/blkdev.h> 28 #include <linux/elevator.h> 29 #include <linux/bio.h> 30 #include <linux/module.h> 31 #include <linux/slab.h> 32 #include <linux/init.h> 33 #include <linux/compiler.h> 34 #include <linux/blktrace_api.h> 35 #include <linux/hash.h> 36 #include <linux/uaccess.h> 37 #include <linux/pm_runtime.h> 38 #include <linux/blk-cgroup.h> 39 40 #include <trace/events/block.h> 41 42 #include "blk.h" 43 #include "blk-mq-sched.h" 44 45 static DEFINE_SPINLOCK(elv_list_lock); 46 static LIST_HEAD(elv_list); 47 48 /* 49 * Merge hash stuff. 50 */ 51 #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) 52 53 /* 54 * Query io scheduler to see if the current process issuing bio may be 55 * merged with rq. 56 */ 57 static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) 58 { 59 struct request_queue *q = rq->q; 60 struct elevator_queue *e = q->elevator; 61 62 if (e->uses_mq && e->type->ops.mq.allow_merge) 63 return e->type->ops.mq.allow_merge(q, rq, bio); 64 else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn) 65 return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio); 66 67 return 1; 68 } 69 70 /* 71 * can we safely merge with this request? 72 */ 73 bool elv_bio_merge_ok(struct request *rq, struct bio *bio) 74 { 75 if (!blk_rq_merge_ok(rq, bio)) 76 return false; 77 78 if (!elv_iosched_allow_bio_merge(rq, bio)) 79 return false; 80 81 return true; 82 } 83 EXPORT_SYMBOL(elv_bio_merge_ok); 84 85 static struct elevator_type *elevator_find(const char *name) 86 { 87 struct elevator_type *e; 88 89 list_for_each_entry(e, &elv_list, list) { 90 if (!strcmp(e->elevator_name, name)) 91 return e; 92 } 93 94 return NULL; 95 } 96 97 static void elevator_put(struct elevator_type *e) 98 { 99 module_put(e->elevator_owner); 100 } 101 102 static struct elevator_type *elevator_get(const char *name, bool try_loading) 103 { 104 struct elevator_type *e; 105 106 spin_lock(&elv_list_lock); 107 108 e = elevator_find(name); 109 if (!e && try_loading) { 110 spin_unlock(&elv_list_lock); 111 request_module("%s-iosched", name); 112 spin_lock(&elv_list_lock); 113 e = elevator_find(name); 114 } 115 116 if (e && !try_module_get(e->elevator_owner)) 117 e = NULL; 118 119 spin_unlock(&elv_list_lock); 120 121 return e; 122 } 123 124 static char chosen_elevator[ELV_NAME_MAX]; 125 126 static int __init elevator_setup(char *str) 127 { 128 /* 129 * Be backwards-compatible with previous kernels, so users 130 * won't get the wrong elevator. 131 */ 132 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 133 return 1; 134 } 135 136 __setup("elevator=", elevator_setup); 137 138 /* called during boot to load the elevator chosen by the elevator param */ 139 void __init load_default_elevator_module(void) 140 { 141 struct elevator_type *e; 142 143 if (!chosen_elevator[0]) 144 return; 145 146 spin_lock(&elv_list_lock); 147 e = elevator_find(chosen_elevator); 148 spin_unlock(&elv_list_lock); 149 150 if (!e) 151 request_module("%s-iosched", chosen_elevator); 152 } 153 154 static struct kobj_type elv_ktype; 155 156 struct elevator_queue *elevator_alloc(struct request_queue *q, 157 struct elevator_type *e) 158 { 159 struct elevator_queue *eq; 160 161 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); 162 if (unlikely(!eq)) 163 return NULL; 164 165 eq->type = e; 166 kobject_init(&eq->kobj, &elv_ktype); 167 mutex_init(&eq->sysfs_lock); 168 hash_init(eq->hash); 169 eq->uses_mq = e->uses_mq; 170 171 return eq; 172 } 173 EXPORT_SYMBOL(elevator_alloc); 174 175 static void elevator_release(struct kobject *kobj) 176 { 177 struct elevator_queue *e; 178 179 e = container_of(kobj, struct elevator_queue, kobj); 180 elevator_put(e->type); 181 kfree(e); 182 } 183 184 int elevator_init(struct request_queue *q, char *name) 185 { 186 struct elevator_type *e = NULL; 187 int err; 188 189 /* 190 * q->sysfs_lock must be held to provide mutual exclusion between 191 * elevator_switch() and here. 192 */ 193 lockdep_assert_held(&q->sysfs_lock); 194 195 if (unlikely(q->elevator)) 196 return 0; 197 198 INIT_LIST_HEAD(&q->queue_head); 199 q->last_merge = NULL; 200 q->end_sector = 0; 201 q->boundary_rq = NULL; 202 203 if (name) { 204 e = elevator_get(name, true); 205 if (!e) 206 return -EINVAL; 207 } 208 209 /* 210 * Use the default elevator specified by config boot param for 211 * non-mq devices, or by config option. Don't try to load modules 212 * as we could be running off async and request_module() isn't 213 * allowed from async. 214 */ 215 if (!e && !q->mq_ops && *chosen_elevator) { 216 e = elevator_get(chosen_elevator, false); 217 if (!e) 218 printk(KERN_ERR "I/O scheduler %s not found\n", 219 chosen_elevator); 220 } 221 222 if (!e) { 223 if (q->mq_ops && q->nr_hw_queues == 1) 224 e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false); 225 else if (q->mq_ops) 226 e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false); 227 else 228 e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); 229 230 if (!e) { 231 printk(KERN_ERR 232 "Default I/O scheduler not found. " \ 233 "Using noop/none.\n"); 234 e = elevator_get("noop", false); 235 } 236 } 237 238 if (e->uses_mq) { 239 err = blk_mq_sched_setup(q); 240 if (!err) 241 err = e->ops.mq.init_sched(q, e); 242 } else 243 err = e->ops.sq.elevator_init_fn(q, e); 244 if (err) { 245 if (e->uses_mq) 246 blk_mq_sched_teardown(q); 247 elevator_put(e); 248 } 249 return err; 250 } 251 EXPORT_SYMBOL(elevator_init); 252 253 void elevator_exit(struct elevator_queue *e) 254 { 255 mutex_lock(&e->sysfs_lock); 256 if (e->uses_mq && e->type->ops.mq.exit_sched) 257 e->type->ops.mq.exit_sched(e); 258 else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) 259 e->type->ops.sq.elevator_exit_fn(e); 260 mutex_unlock(&e->sysfs_lock); 261 262 kobject_put(&e->kobj); 263 } 264 EXPORT_SYMBOL(elevator_exit); 265 266 static inline void __elv_rqhash_del(struct request *rq) 267 { 268 hash_del(&rq->hash); 269 rq->rq_flags &= ~RQF_HASHED; 270 } 271 272 void elv_rqhash_del(struct request_queue *q, struct request *rq) 273 { 274 if (ELV_ON_HASH(rq)) 275 __elv_rqhash_del(rq); 276 } 277 EXPORT_SYMBOL_GPL(elv_rqhash_del); 278 279 void elv_rqhash_add(struct request_queue *q, struct request *rq) 280 { 281 struct elevator_queue *e = q->elevator; 282 283 BUG_ON(ELV_ON_HASH(rq)); 284 hash_add(e->hash, &rq->hash, rq_hash_key(rq)); 285 rq->rq_flags |= RQF_HASHED; 286 } 287 EXPORT_SYMBOL_GPL(elv_rqhash_add); 288 289 void elv_rqhash_reposition(struct request_queue *q, struct request *rq) 290 { 291 __elv_rqhash_del(rq); 292 elv_rqhash_add(q, rq); 293 } 294 295 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) 296 { 297 struct elevator_queue *e = q->elevator; 298 struct hlist_node *next; 299 struct request *rq; 300 301 hash_for_each_possible_safe(e->hash, rq, next, hash, offset) { 302 BUG_ON(!ELV_ON_HASH(rq)); 303 304 if (unlikely(!rq_mergeable(rq))) { 305 __elv_rqhash_del(rq); 306 continue; 307 } 308 309 if (rq_hash_key(rq) == offset) 310 return rq; 311 } 312 313 return NULL; 314 } 315 316 /* 317 * RB-tree support functions for inserting/lookup/removal of requests 318 * in a sorted RB tree. 319 */ 320 void elv_rb_add(struct rb_root *root, struct request *rq) 321 { 322 struct rb_node **p = &root->rb_node; 323 struct rb_node *parent = NULL; 324 struct request *__rq; 325 326 while (*p) { 327 parent = *p; 328 __rq = rb_entry(parent, struct request, rb_node); 329 330 if (blk_rq_pos(rq) < blk_rq_pos(__rq)) 331 p = &(*p)->rb_left; 332 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq)) 333 p = &(*p)->rb_right; 334 } 335 336 rb_link_node(&rq->rb_node, parent, p); 337 rb_insert_color(&rq->rb_node, root); 338 } 339 EXPORT_SYMBOL(elv_rb_add); 340 341 void elv_rb_del(struct rb_root *root, struct request *rq) 342 { 343 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 344 rb_erase(&rq->rb_node, root); 345 RB_CLEAR_NODE(&rq->rb_node); 346 } 347 EXPORT_SYMBOL(elv_rb_del); 348 349 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 350 { 351 struct rb_node *n = root->rb_node; 352 struct request *rq; 353 354 while (n) { 355 rq = rb_entry(n, struct request, rb_node); 356 357 if (sector < blk_rq_pos(rq)) 358 n = n->rb_left; 359 else if (sector > blk_rq_pos(rq)) 360 n = n->rb_right; 361 else 362 return rq; 363 } 364 365 return NULL; 366 } 367 EXPORT_SYMBOL(elv_rb_find); 368 369 /* 370 * Insert rq into dispatch queue of q. Queue lock must be held on 371 * entry. rq is sort instead into the dispatch queue. To be used by 372 * specific elevators. 373 */ 374 void elv_dispatch_sort(struct request_queue *q, struct request *rq) 375 { 376 sector_t boundary; 377 struct list_head *entry; 378 379 if (q->last_merge == rq) 380 q->last_merge = NULL; 381 382 elv_rqhash_del(q, rq); 383 384 q->nr_sorted--; 385 386 boundary = q->end_sector; 387 list_for_each_prev(entry, &q->queue_head) { 388 struct request *pos = list_entry_rq(entry); 389 390 if (req_op(rq) != req_op(pos)) 391 break; 392 if (rq_data_dir(rq) != rq_data_dir(pos)) 393 break; 394 if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER)) 395 break; 396 if (blk_rq_pos(rq) >= boundary) { 397 if (blk_rq_pos(pos) < boundary) 398 continue; 399 } else { 400 if (blk_rq_pos(pos) >= boundary) 401 break; 402 } 403 if (blk_rq_pos(rq) >= blk_rq_pos(pos)) 404 break; 405 } 406 407 list_add(&rq->queuelist, entry); 408 } 409 EXPORT_SYMBOL(elv_dispatch_sort); 410 411 /* 412 * Insert rq into dispatch queue of q. Queue lock must be held on 413 * entry. rq is added to the back of the dispatch queue. To be used by 414 * specific elevators. 415 */ 416 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 417 { 418 if (q->last_merge == rq) 419 q->last_merge = NULL; 420 421 elv_rqhash_del(q, rq); 422 423 q->nr_sorted--; 424 425 q->end_sector = rq_end_sector(rq); 426 q->boundary_rq = rq; 427 list_add_tail(&rq->queuelist, &q->queue_head); 428 } 429 EXPORT_SYMBOL(elv_dispatch_add_tail); 430 431 int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) 432 { 433 struct elevator_queue *e = q->elevator; 434 struct request *__rq; 435 int ret; 436 437 /* 438 * Levels of merges: 439 * nomerges: No merges at all attempted 440 * noxmerges: Only simple one-hit cache try 441 * merges: All merge tries attempted 442 */ 443 if (blk_queue_nomerges(q) || !bio_mergeable(bio)) 444 return ELEVATOR_NO_MERGE; 445 446 /* 447 * First try one-hit cache. 448 */ 449 if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { 450 ret = blk_try_merge(q->last_merge, bio); 451 if (ret != ELEVATOR_NO_MERGE) { 452 *req = q->last_merge; 453 return ret; 454 } 455 } 456 457 if (blk_queue_noxmerges(q)) 458 return ELEVATOR_NO_MERGE; 459 460 /* 461 * See if our hash lookup can find a potential backmerge. 462 */ 463 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); 464 if (__rq && elv_bio_merge_ok(__rq, bio)) { 465 *req = __rq; 466 return ELEVATOR_BACK_MERGE; 467 } 468 469 if (e->uses_mq && e->type->ops.mq.request_merge) 470 return e->type->ops.mq.request_merge(q, req, bio); 471 else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn) 472 return e->type->ops.sq.elevator_merge_fn(q, req, bio); 473 474 return ELEVATOR_NO_MERGE; 475 } 476 477 /* 478 * Attempt to do an insertion back merge. Only check for the case where 479 * we can append 'rq' to an existing request, so we can throw 'rq' away 480 * afterwards. 481 * 482 * Returns true if we merged, false otherwise 483 */ 484 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) 485 { 486 struct request *__rq; 487 bool ret; 488 489 if (blk_queue_nomerges(q)) 490 return false; 491 492 /* 493 * First try one-hit cache. 494 */ 495 if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) 496 return true; 497 498 if (blk_queue_noxmerges(q)) 499 return false; 500 501 ret = false; 502 /* 503 * See if our hash lookup can find a potential backmerge. 504 */ 505 while (1) { 506 __rq = elv_rqhash_find(q, blk_rq_pos(rq)); 507 if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) 508 break; 509 510 /* The merged request could be merged with others, try again */ 511 ret = true; 512 rq = __rq; 513 } 514 515 return ret; 516 } 517 518 void elv_merged_request(struct request_queue *q, struct request *rq, int type) 519 { 520 struct elevator_queue *e = q->elevator; 521 522 if (e->uses_mq && e->type->ops.mq.request_merged) 523 e->type->ops.mq.request_merged(q, rq, type); 524 else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn) 525 e->type->ops.sq.elevator_merged_fn(q, rq, type); 526 527 if (type == ELEVATOR_BACK_MERGE) 528 elv_rqhash_reposition(q, rq); 529 530 q->last_merge = rq; 531 } 532 533 void elv_merge_requests(struct request_queue *q, struct request *rq, 534 struct request *next) 535 { 536 struct elevator_queue *e = q->elevator; 537 bool next_sorted = false; 538 539 if (e->uses_mq && e->type->ops.mq.requests_merged) 540 e->type->ops.mq.requests_merged(q, rq, next); 541 else if (e->type->ops.sq.elevator_merge_req_fn) { 542 next_sorted = next->rq_flags & RQF_SORTED; 543 if (next_sorted) 544 e->type->ops.sq.elevator_merge_req_fn(q, rq, next); 545 } 546 547 elv_rqhash_reposition(q, rq); 548 549 if (next_sorted) { 550 elv_rqhash_del(q, next); 551 q->nr_sorted--; 552 } 553 554 q->last_merge = rq; 555 } 556 557 void elv_bio_merged(struct request_queue *q, struct request *rq, 558 struct bio *bio) 559 { 560 struct elevator_queue *e = q->elevator; 561 562 if (WARN_ON_ONCE(e->uses_mq)) 563 return; 564 565 if (e->type->ops.sq.elevator_bio_merged_fn) 566 e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); 567 } 568 569 #ifdef CONFIG_PM 570 static void blk_pm_requeue_request(struct request *rq) 571 { 572 if (rq->q->dev && !(rq->rq_flags & RQF_PM)) 573 rq->q->nr_pending--; 574 } 575 576 static void blk_pm_add_request(struct request_queue *q, struct request *rq) 577 { 578 if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && 579 (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) 580 pm_request_resume(q->dev); 581 } 582 #else 583 static inline void blk_pm_requeue_request(struct request *rq) {} 584 static inline void blk_pm_add_request(struct request_queue *q, 585 struct request *rq) 586 { 587 } 588 #endif 589 590 void elv_requeue_request(struct request_queue *q, struct request *rq) 591 { 592 /* 593 * it already went through dequeue, we need to decrement the 594 * in_flight count again 595 */ 596 if (blk_account_rq(rq)) { 597 q->in_flight[rq_is_sync(rq)]--; 598 if (rq->rq_flags & RQF_SORTED) 599 elv_deactivate_rq(q, rq); 600 } 601 602 rq->rq_flags &= ~RQF_STARTED; 603 604 blk_pm_requeue_request(rq); 605 606 __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); 607 } 608 609 void elv_drain_elevator(struct request_queue *q) 610 { 611 struct elevator_queue *e = q->elevator; 612 static int printed; 613 614 if (WARN_ON_ONCE(e->uses_mq)) 615 return; 616 617 lockdep_assert_held(q->queue_lock); 618 619 while (e->type->ops.sq.elevator_dispatch_fn(q, 1)) 620 ; 621 if (q->nr_sorted && printed++ < 10) { 622 printk(KERN_ERR "%s: forced dispatching is broken " 623 "(nr_sorted=%u), please report this\n", 624 q->elevator->type->elevator_name, q->nr_sorted); 625 } 626 } 627 628 void __elv_add_request(struct request_queue *q, struct request *rq, int where) 629 { 630 trace_block_rq_insert(q, rq); 631 632 blk_pm_add_request(q, rq); 633 634 rq->q = q; 635 636 if (rq->rq_flags & RQF_SOFTBARRIER) { 637 /* barriers are scheduling boundary, update end_sector */ 638 if (rq->cmd_type == REQ_TYPE_FS) { 639 q->end_sector = rq_end_sector(rq); 640 q->boundary_rq = rq; 641 } 642 } else if (!(rq->rq_flags & RQF_ELVPRIV) && 643 (where == ELEVATOR_INSERT_SORT || 644 where == ELEVATOR_INSERT_SORT_MERGE)) 645 where = ELEVATOR_INSERT_BACK; 646 647 switch (where) { 648 case ELEVATOR_INSERT_REQUEUE: 649 case ELEVATOR_INSERT_FRONT: 650 rq->rq_flags |= RQF_SOFTBARRIER; 651 list_add(&rq->queuelist, &q->queue_head); 652 break; 653 654 case ELEVATOR_INSERT_BACK: 655 rq->rq_flags |= RQF_SOFTBARRIER; 656 elv_drain_elevator(q); 657 list_add_tail(&rq->queuelist, &q->queue_head); 658 /* 659 * We kick the queue here for the following reasons. 660 * - The elevator might have returned NULL previously 661 * to delay requests and returned them now. As the 662 * queue wasn't empty before this request, ll_rw_blk 663 * won't run the queue on return, resulting in hang. 664 * - Usually, back inserted requests won't be merged 665 * with anything. There's no point in delaying queue 666 * processing. 667 */ 668 __blk_run_queue(q); 669 break; 670 671 case ELEVATOR_INSERT_SORT_MERGE: 672 /* 673 * If we succeed in merging this request with one in the 674 * queue already, we are done - rq has now been freed, 675 * so no need to do anything further. 676 */ 677 if (elv_attempt_insert_merge(q, rq)) 678 break; 679 case ELEVATOR_INSERT_SORT: 680 BUG_ON(rq->cmd_type != REQ_TYPE_FS); 681 rq->rq_flags |= RQF_SORTED; 682 q->nr_sorted++; 683 if (rq_mergeable(rq)) { 684 elv_rqhash_add(q, rq); 685 if (!q->last_merge) 686 q->last_merge = rq; 687 } 688 689 /* 690 * Some ioscheds (cfq) run q->request_fn directly, so 691 * rq cannot be accessed after calling 692 * elevator_add_req_fn. 693 */ 694 q->elevator->type->ops.sq.elevator_add_req_fn(q, rq); 695 break; 696 697 case ELEVATOR_INSERT_FLUSH: 698 rq->rq_flags |= RQF_SOFTBARRIER; 699 blk_insert_flush(rq); 700 break; 701 default: 702 printk(KERN_ERR "%s: bad insertion point %d\n", 703 __func__, where); 704 BUG(); 705 } 706 } 707 EXPORT_SYMBOL(__elv_add_request); 708 709 void elv_add_request(struct request_queue *q, struct request *rq, int where) 710 { 711 unsigned long flags; 712 713 spin_lock_irqsave(q->queue_lock, flags); 714 __elv_add_request(q, rq, where); 715 spin_unlock_irqrestore(q->queue_lock, flags); 716 } 717 EXPORT_SYMBOL(elv_add_request); 718 719 struct request *elv_latter_request(struct request_queue *q, struct request *rq) 720 { 721 struct elevator_queue *e = q->elevator; 722 723 if (e->uses_mq && e->type->ops.mq.next_request) 724 return e->type->ops.mq.next_request(q, rq); 725 else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn) 726 return e->type->ops.sq.elevator_latter_req_fn(q, rq); 727 728 return NULL; 729 } 730 731 struct request *elv_former_request(struct request_queue *q, struct request *rq) 732 { 733 struct elevator_queue *e = q->elevator; 734 735 if (e->uses_mq && e->type->ops.mq.former_request) 736 return e->type->ops.mq.former_request(q, rq); 737 if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn) 738 return e->type->ops.sq.elevator_former_req_fn(q, rq); 739 return NULL; 740 } 741 742 int elv_set_request(struct request_queue *q, struct request *rq, 743 struct bio *bio, gfp_t gfp_mask) 744 { 745 struct elevator_queue *e = q->elevator; 746 747 if (WARN_ON_ONCE(e->uses_mq)) 748 return 0; 749 750 if (e->type->ops.sq.elevator_set_req_fn) 751 return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask); 752 return 0; 753 } 754 755 void elv_put_request(struct request_queue *q, struct request *rq) 756 { 757 struct elevator_queue *e = q->elevator; 758 759 if (WARN_ON_ONCE(e->uses_mq)) 760 return; 761 762 if (e->type->ops.sq.elevator_put_req_fn) 763 e->type->ops.sq.elevator_put_req_fn(rq); 764 } 765 766 int elv_may_queue(struct request_queue *q, unsigned int op) 767 { 768 struct elevator_queue *e = q->elevator; 769 770 if (WARN_ON_ONCE(e->uses_mq)) 771 return 0; 772 773 if (e->type->ops.sq.elevator_may_queue_fn) 774 return e->type->ops.sq.elevator_may_queue_fn(q, op); 775 776 return ELV_MQUEUE_MAY; 777 } 778 779 void elv_completed_request(struct request_queue *q, struct request *rq) 780 { 781 struct elevator_queue *e = q->elevator; 782 783 if (WARN_ON_ONCE(e->uses_mq)) 784 return; 785 786 /* 787 * request is released from the driver, io must be done 788 */ 789 if (blk_account_rq(rq)) { 790 q->in_flight[rq_is_sync(rq)]--; 791 if ((rq->rq_flags & RQF_SORTED) && 792 e->type->ops.sq.elevator_completed_req_fn) 793 e->type->ops.sq.elevator_completed_req_fn(q, rq); 794 } 795 } 796 797 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 798 799 static ssize_t 800 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 801 { 802 struct elv_fs_entry *entry = to_elv(attr); 803 struct elevator_queue *e; 804 ssize_t error; 805 806 if (!entry->show) 807 return -EIO; 808 809 e = container_of(kobj, struct elevator_queue, kobj); 810 mutex_lock(&e->sysfs_lock); 811 error = e->type ? entry->show(e, page) : -ENOENT; 812 mutex_unlock(&e->sysfs_lock); 813 return error; 814 } 815 816 static ssize_t 817 elv_attr_store(struct kobject *kobj, struct attribute *attr, 818 const char *page, size_t length) 819 { 820 struct elv_fs_entry *entry = to_elv(attr); 821 struct elevator_queue *e; 822 ssize_t error; 823 824 if (!entry->store) 825 return -EIO; 826 827 e = container_of(kobj, struct elevator_queue, kobj); 828 mutex_lock(&e->sysfs_lock); 829 error = e->type ? entry->store(e, page, length) : -ENOENT; 830 mutex_unlock(&e->sysfs_lock); 831 return error; 832 } 833 834 static const struct sysfs_ops elv_sysfs_ops = { 835 .show = elv_attr_show, 836 .store = elv_attr_store, 837 }; 838 839 static struct kobj_type elv_ktype = { 840 .sysfs_ops = &elv_sysfs_ops, 841 .release = elevator_release, 842 }; 843 844 int elv_register_queue(struct request_queue *q) 845 { 846 struct elevator_queue *e = q->elevator; 847 int error; 848 849 error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); 850 if (!error) { 851 struct elv_fs_entry *attr = e->type->elevator_attrs; 852 if (attr) { 853 while (attr->attr.name) { 854 if (sysfs_create_file(&e->kobj, &attr->attr)) 855 break; 856 attr++; 857 } 858 } 859 kobject_uevent(&e->kobj, KOBJ_ADD); 860 e->registered = 1; 861 if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn) 862 e->type->ops.sq.elevator_registered_fn(q); 863 } 864 return error; 865 } 866 EXPORT_SYMBOL(elv_register_queue); 867 868 void elv_unregister_queue(struct request_queue *q) 869 { 870 if (q) { 871 struct elevator_queue *e = q->elevator; 872 873 kobject_uevent(&e->kobj, KOBJ_REMOVE); 874 kobject_del(&e->kobj); 875 e->registered = 0; 876 } 877 } 878 EXPORT_SYMBOL(elv_unregister_queue); 879 880 int elv_register(struct elevator_type *e) 881 { 882 char *def = ""; 883 884 /* create icq_cache if requested */ 885 if (e->icq_size) { 886 if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || 887 WARN_ON(e->icq_align < __alignof__(struct io_cq))) 888 return -EINVAL; 889 890 snprintf(e->icq_cache_name, sizeof(e->icq_cache_name), 891 "%s_io_cq", e->elevator_name); 892 e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size, 893 e->icq_align, 0, NULL); 894 if (!e->icq_cache) 895 return -ENOMEM; 896 } 897 898 /* register, don't allow duplicate names */ 899 spin_lock(&elv_list_lock); 900 if (elevator_find(e->elevator_name)) { 901 spin_unlock(&elv_list_lock); 902 if (e->icq_cache) 903 kmem_cache_destroy(e->icq_cache); 904 return -EBUSY; 905 } 906 list_add_tail(&e->list, &elv_list); 907 spin_unlock(&elv_list_lock); 908 909 /* print pretty message */ 910 if (!strcmp(e->elevator_name, chosen_elevator) || 911 (!*chosen_elevator && 912 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 913 def = " (default)"; 914 915 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, 916 def); 917 return 0; 918 } 919 EXPORT_SYMBOL_GPL(elv_register); 920 921 void elv_unregister(struct elevator_type *e) 922 { 923 /* unregister */ 924 spin_lock(&elv_list_lock); 925 list_del_init(&e->list); 926 spin_unlock(&elv_list_lock); 927 928 /* 929 * Destroy icq_cache if it exists. icq's are RCU managed. Make 930 * sure all RCU operations are complete before proceeding. 931 */ 932 if (e->icq_cache) { 933 rcu_barrier(); 934 kmem_cache_destroy(e->icq_cache); 935 e->icq_cache = NULL; 936 } 937 } 938 EXPORT_SYMBOL_GPL(elv_unregister); 939 940 /* 941 * switch to new_e io scheduler. be careful not to introduce deadlocks - 942 * we don't free the old io scheduler, before we have allocated what we 943 * need for the new one. this way we have a chance of going back to the old 944 * one, if the new one fails init for some reason. 945 */ 946 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) 947 { 948 struct elevator_queue *old = q->elevator; 949 bool old_registered = false; 950 int err; 951 952 if (q->mq_ops) { 953 blk_mq_freeze_queue(q); 954 blk_mq_quiesce_queue(q); 955 } 956 957 /* 958 * Turn on BYPASS and drain all requests w/ elevator private data. 959 * Block layer doesn't call into a quiesced elevator - all requests 960 * are directly put on the dispatch list without elevator data 961 * using INSERT_BACK. All requests have SOFTBARRIER set and no 962 * merge happens either. 963 */ 964 if (old) { 965 old_registered = old->registered; 966 967 if (old->uses_mq) 968 blk_mq_sched_teardown(q); 969 970 if (!q->mq_ops) 971 blk_queue_bypass_start(q); 972 973 /* unregister and clear all auxiliary data of the old elevator */ 974 if (old_registered) 975 elv_unregister_queue(q); 976 977 spin_lock_irq(q->queue_lock); 978 ioc_clear_queue(q); 979 spin_unlock_irq(q->queue_lock); 980 } 981 982 /* allocate, init and register new elevator */ 983 if (new_e) { 984 if (new_e->uses_mq) { 985 err = blk_mq_sched_setup(q); 986 if (!err) 987 err = new_e->ops.mq.init_sched(q, new_e); 988 } else 989 err = new_e->ops.sq.elevator_init_fn(q, new_e); 990 if (err) 991 goto fail_init; 992 993 err = elv_register_queue(q); 994 if (err) 995 goto fail_register; 996 } else 997 q->elevator = NULL; 998 999 /* done, kill the old one and finish */ 1000 if (old) { 1001 elevator_exit(old); 1002 if (!q->mq_ops) 1003 blk_queue_bypass_end(q); 1004 } 1005 1006 if (q->mq_ops) { 1007 blk_mq_unfreeze_queue(q); 1008 blk_mq_start_stopped_hw_queues(q, true); 1009 } 1010 1011 if (new_e) 1012 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); 1013 else 1014 blk_add_trace_msg(q, "elv switch: none"); 1015 1016 return 0; 1017 1018 fail_register: 1019 if (q->mq_ops) 1020 blk_mq_sched_teardown(q); 1021 elevator_exit(q->elevator); 1022 fail_init: 1023 /* switch failed, restore and re-register old elevator */ 1024 if (old) { 1025 q->elevator = old; 1026 elv_register_queue(q); 1027 if (!q->mq_ops) 1028 blk_queue_bypass_end(q); 1029 } 1030 if (q->mq_ops) { 1031 blk_mq_unfreeze_queue(q); 1032 blk_mq_start_stopped_hw_queues(q, true); 1033 } 1034 1035 return err; 1036 } 1037 1038 /* 1039 * Switch this queue to the given IO scheduler. 1040 */ 1041 static int __elevator_change(struct request_queue *q, const char *name) 1042 { 1043 char elevator_name[ELV_NAME_MAX]; 1044 struct elevator_type *e; 1045 1046 /* 1047 * Special case for mq, turn off scheduling 1048 */ 1049 if (q->mq_ops && !strncmp(name, "none", 4)) 1050 return elevator_switch(q, NULL); 1051 1052 strlcpy(elevator_name, name, sizeof(elevator_name)); 1053 e = elevator_get(strstrip(elevator_name), true); 1054 if (!e) { 1055 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 1056 return -EINVAL; 1057 } 1058 1059 if (q->elevator && 1060 !strcmp(elevator_name, q->elevator->type->elevator_name)) { 1061 elevator_put(e); 1062 return 0; 1063 } 1064 1065 if (!e->uses_mq && q->mq_ops) { 1066 elevator_put(e); 1067 return -EINVAL; 1068 } 1069 if (e->uses_mq && !q->mq_ops) { 1070 elevator_put(e); 1071 return -EINVAL; 1072 } 1073 1074 return elevator_switch(q, e); 1075 } 1076 1077 int elevator_change(struct request_queue *q, const char *name) 1078 { 1079 int ret; 1080 1081 /* Protect q->elevator from elevator_init() */ 1082 mutex_lock(&q->sysfs_lock); 1083 ret = __elevator_change(q, name); 1084 mutex_unlock(&q->sysfs_lock); 1085 1086 return ret; 1087 } 1088 EXPORT_SYMBOL(elevator_change); 1089 1090 ssize_t elv_iosched_store(struct request_queue *q, const char *name, 1091 size_t count) 1092 { 1093 int ret; 1094 1095 if (!(q->mq_ops || q->request_fn)) 1096 return count; 1097 1098 ret = __elevator_change(q, name); 1099 if (!ret) 1100 return count; 1101 1102 printk(KERN_ERR "elevator: switch to %s failed\n", name); 1103 return ret; 1104 } 1105 1106 ssize_t elv_iosched_show(struct request_queue *q, char *name) 1107 { 1108 struct elevator_queue *e = q->elevator; 1109 struct elevator_type *elv = NULL; 1110 struct elevator_type *__e; 1111 int len = 0; 1112 1113 if (!blk_queue_stackable(q)) 1114 return sprintf(name, "none\n"); 1115 1116 if (!q->elevator) 1117 len += sprintf(name+len, "[none] "); 1118 else 1119 elv = e->type; 1120 1121 spin_lock(&elv_list_lock); 1122 list_for_each_entry(__e, &elv_list, list) { 1123 if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) { 1124 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1125 continue; 1126 } 1127 if (__e->uses_mq && q->mq_ops) 1128 len += sprintf(name+len, "%s ", __e->elevator_name); 1129 else if (!__e->uses_mq && !q->mq_ops) 1130 len += sprintf(name+len, "%s ", __e->elevator_name); 1131 } 1132 spin_unlock(&elv_list_lock); 1133 1134 if (q->mq_ops && q->elevator) 1135 len += sprintf(name+len, "none"); 1136 1137 len += sprintf(len+name, "\n"); 1138 return len; 1139 } 1140 1141 struct request *elv_rb_former_request(struct request_queue *q, 1142 struct request *rq) 1143 { 1144 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1145 1146 if (rbprev) 1147 return rb_entry_rq(rbprev); 1148 1149 return NULL; 1150 } 1151 EXPORT_SYMBOL(elv_rb_former_request); 1152 1153 struct request *elv_rb_latter_request(struct request_queue *q, 1154 struct request *rq) 1155 { 1156 struct rb_node *rbnext = rb_next(&rq->rb_node); 1157 1158 if (rbnext) 1159 return rb_entry_rq(rbnext); 1160 1161 return NULL; 1162 } 1163 EXPORT_SYMBOL(elv_rb_latter_request); 1164