1 /* 2 * Block device elevator/IO-scheduler. 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * 6 * 30042000 Jens Axboe <axboe@kernel.dk> : 7 * 8 * Split the elevator a bit so that it is possible to choose a different 9 * one or even write a new "plug in". There are three pieces: 10 * - elevator_fn, inserts a new request in the queue list 11 * - elevator_merge_fn, decides whether a new buffer can be merged with 12 * an existing request 13 * - elevator_dequeue_fn, called when a request is taken off the active list 14 * 15 * 20082000 Dave Jones <davej@suse.de> : 16 * Removed tests for max-bomb-segments, which was breaking elvtune 17 * when run without -bN 18 * 19 * Jens: 20 * - Rework again to work with bio instead of buffer_heads 21 * - loose bi_dev comparisons, partition handling is right now 22 * - completely modularize elevator setup and teardown 23 * 24 */ 25 #include <linux/kernel.h> 26 #include <linux/fs.h> 27 #include <linux/blkdev.h> 28 #include <linux/elevator.h> 29 #include <linux/bio.h> 30 #include <linux/module.h> 31 #include <linux/slab.h> 32 #include <linux/init.h> 33 #include <linux/compiler.h> 34 #include <linux/delay.h> 35 #include <linux/blktrace_api.h> 36 #include <linux/hash.h> 37 38 #include <asm/uaccess.h> 39 40 static DEFINE_SPINLOCK(elv_list_lock); 41 static LIST_HEAD(elv_list); 42 43 /* 44 * Merge hash stuff. 45 */ 46 static const int elv_hash_shift = 6; 47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3) 48 #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 49 #define ELV_HASH_ENTRIES (1 << elv_hash_shift) 50 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 51 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 52 53 /* 54 * Query io scheduler to see if the current process issuing bio may be 55 * merged with rq. 56 */ 57 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) 58 { 59 request_queue_t *q = rq->q; 60 elevator_t *e = q->elevator; 61 62 if (e->ops->elevator_allow_merge_fn) 63 return e->ops->elevator_allow_merge_fn(q, rq, bio); 64 65 return 1; 66 } 67 68 /* 69 * can we safely merge with this request? 70 */ 71 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 72 { 73 if (!rq_mergeable(rq)) 74 return 0; 75 76 /* 77 * different data direction or already started, don't merge 78 */ 79 if (bio_data_dir(bio) != rq_data_dir(rq)) 80 return 0; 81 82 /* 83 * must be same device and not a special request 84 */ 85 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) 86 return 0; 87 88 if (!elv_iosched_allow_merge(rq, bio)) 89 return 0; 90 91 return 1; 92 } 93 EXPORT_SYMBOL(elv_rq_merge_ok); 94 95 static inline int elv_try_merge(struct request *__rq, struct bio *bio) 96 { 97 int ret = ELEVATOR_NO_MERGE; 98 99 /* 100 * we can merge and sequence is ok, check if it's possible 101 */ 102 if (elv_rq_merge_ok(__rq, bio)) { 103 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 104 ret = ELEVATOR_BACK_MERGE; 105 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 106 ret = ELEVATOR_FRONT_MERGE; 107 } 108 109 return ret; 110 } 111 112 static struct elevator_type *elevator_find(const char *name) 113 { 114 struct elevator_type *e; 115 struct list_head *entry; 116 117 list_for_each(entry, &elv_list) { 118 119 e = list_entry(entry, struct elevator_type, list); 120 121 if (!strcmp(e->elevator_name, name)) 122 return e; 123 } 124 125 return NULL; 126 } 127 128 static void elevator_put(struct elevator_type *e) 129 { 130 module_put(e->elevator_owner); 131 } 132 133 static struct elevator_type *elevator_get(const char *name) 134 { 135 struct elevator_type *e; 136 137 spin_lock_irq(&elv_list_lock); 138 139 e = elevator_find(name); 140 if (e && !try_module_get(e->elevator_owner)) 141 e = NULL; 142 143 spin_unlock_irq(&elv_list_lock); 144 145 return e; 146 } 147 148 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) 149 { 150 return eq->ops->elevator_init_fn(q); 151 } 152 153 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, 154 void *data) 155 { 156 q->elevator = eq; 157 eq->elevator_data = data; 158 } 159 160 static char chosen_elevator[16]; 161 162 static int __init elevator_setup(char *str) 163 { 164 /* 165 * Be backwards-compatible with previous kernels, so users 166 * won't get the wrong elevator. 167 */ 168 if (!strcmp(str, "as")) 169 strcpy(chosen_elevator, "anticipatory"); 170 else 171 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 172 return 1; 173 } 174 175 __setup("elevator=", elevator_setup); 176 177 static struct kobj_type elv_ktype; 178 179 static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e) 180 { 181 elevator_t *eq; 182 int i; 183 184 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node); 185 if (unlikely(!eq)) 186 goto err; 187 188 memset(eq, 0, sizeof(*eq)); 189 eq->ops = &e->ops; 190 eq->elevator_type = e; 191 kobject_init(&eq->kobj); 192 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 193 eq->kobj.ktype = &elv_ktype; 194 mutex_init(&eq->sysfs_lock); 195 196 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, 197 GFP_KERNEL, q->node); 198 if (!eq->hash) 199 goto err; 200 201 for (i = 0; i < ELV_HASH_ENTRIES; i++) 202 INIT_HLIST_HEAD(&eq->hash[i]); 203 204 return eq; 205 err: 206 kfree(eq); 207 elevator_put(e); 208 return NULL; 209 } 210 211 static void elevator_release(struct kobject *kobj) 212 { 213 elevator_t *e = container_of(kobj, elevator_t, kobj); 214 215 elevator_put(e->elevator_type); 216 kfree(e->hash); 217 kfree(e); 218 } 219 220 int elevator_init(request_queue_t *q, char *name) 221 { 222 struct elevator_type *e = NULL; 223 struct elevator_queue *eq; 224 int ret = 0; 225 void *data; 226 227 INIT_LIST_HEAD(&q->queue_head); 228 q->last_merge = NULL; 229 q->end_sector = 0; 230 q->boundary_rq = NULL; 231 232 if (name && !(e = elevator_get(name))) 233 return -EINVAL; 234 235 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) 236 printk("I/O scheduler %s not found\n", chosen_elevator); 237 238 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { 239 printk("Default I/O scheduler not found, using no-op\n"); 240 e = elevator_get("noop"); 241 } 242 243 eq = elevator_alloc(q, e); 244 if (!eq) 245 return -ENOMEM; 246 247 data = elevator_init_queue(q, eq); 248 if (!data) { 249 kobject_put(&eq->kobj); 250 return -ENOMEM; 251 } 252 253 elevator_attach(q, eq, data); 254 return ret; 255 } 256 257 EXPORT_SYMBOL(elevator_init); 258 259 void elevator_exit(elevator_t *e) 260 { 261 mutex_lock(&e->sysfs_lock); 262 if (e->ops->elevator_exit_fn) 263 e->ops->elevator_exit_fn(e); 264 e->ops = NULL; 265 mutex_unlock(&e->sysfs_lock); 266 267 kobject_put(&e->kobj); 268 } 269 270 EXPORT_SYMBOL(elevator_exit); 271 272 static inline void __elv_rqhash_del(struct request *rq) 273 { 274 hlist_del_init(&rq->hash); 275 } 276 277 static void elv_rqhash_del(request_queue_t *q, struct request *rq) 278 { 279 if (ELV_ON_HASH(rq)) 280 __elv_rqhash_del(rq); 281 } 282 283 static void elv_rqhash_add(request_queue_t *q, struct request *rq) 284 { 285 elevator_t *e = q->elevator; 286 287 BUG_ON(ELV_ON_HASH(rq)); 288 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 289 } 290 291 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) 292 { 293 __elv_rqhash_del(rq); 294 elv_rqhash_add(q, rq); 295 } 296 297 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) 298 { 299 elevator_t *e = q->elevator; 300 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 301 struct hlist_node *entry, *next; 302 struct request *rq; 303 304 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { 305 BUG_ON(!ELV_ON_HASH(rq)); 306 307 if (unlikely(!rq_mergeable(rq))) { 308 __elv_rqhash_del(rq); 309 continue; 310 } 311 312 if (rq_hash_key(rq) == offset) 313 return rq; 314 } 315 316 return NULL; 317 } 318 319 /* 320 * RB-tree support functions for inserting/lookup/removal of requests 321 * in a sorted RB tree. 322 */ 323 struct request *elv_rb_add(struct rb_root *root, struct request *rq) 324 { 325 struct rb_node **p = &root->rb_node; 326 struct rb_node *parent = NULL; 327 struct request *__rq; 328 329 while (*p) { 330 parent = *p; 331 __rq = rb_entry(parent, struct request, rb_node); 332 333 if (rq->sector < __rq->sector) 334 p = &(*p)->rb_left; 335 else if (rq->sector > __rq->sector) 336 p = &(*p)->rb_right; 337 else 338 return __rq; 339 } 340 341 rb_link_node(&rq->rb_node, parent, p); 342 rb_insert_color(&rq->rb_node, root); 343 return NULL; 344 } 345 346 EXPORT_SYMBOL(elv_rb_add); 347 348 void elv_rb_del(struct rb_root *root, struct request *rq) 349 { 350 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 351 rb_erase(&rq->rb_node, root); 352 RB_CLEAR_NODE(&rq->rb_node); 353 } 354 355 EXPORT_SYMBOL(elv_rb_del); 356 357 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 358 { 359 struct rb_node *n = root->rb_node; 360 struct request *rq; 361 362 while (n) { 363 rq = rb_entry(n, struct request, rb_node); 364 365 if (sector < rq->sector) 366 n = n->rb_left; 367 else if (sector > rq->sector) 368 n = n->rb_right; 369 else 370 return rq; 371 } 372 373 return NULL; 374 } 375 376 EXPORT_SYMBOL(elv_rb_find); 377 378 /* 379 * Insert rq into dispatch queue of q. Queue lock must be held on 380 * entry. rq is sort insted into the dispatch queue. To be used by 381 * specific elevators. 382 */ 383 void elv_dispatch_sort(request_queue_t *q, struct request *rq) 384 { 385 sector_t boundary; 386 struct list_head *entry; 387 388 if (q->last_merge == rq) 389 q->last_merge = NULL; 390 391 elv_rqhash_del(q, rq); 392 393 q->nr_sorted--; 394 395 boundary = q->end_sector; 396 397 list_for_each_prev(entry, &q->queue_head) { 398 struct request *pos = list_entry_rq(entry); 399 400 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 401 break; 402 if (rq->sector >= boundary) { 403 if (pos->sector < boundary) 404 continue; 405 } else { 406 if (pos->sector >= boundary) 407 break; 408 } 409 if (rq->sector >= pos->sector) 410 break; 411 } 412 413 list_add(&rq->queuelist, entry); 414 } 415 416 EXPORT_SYMBOL(elv_dispatch_sort); 417 418 /* 419 * Insert rq into dispatch queue of q. Queue lock must be held on 420 * entry. rq is added to the back of the dispatch queue. To be used by 421 * specific elevators. 422 */ 423 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 424 { 425 if (q->last_merge == rq) 426 q->last_merge = NULL; 427 428 elv_rqhash_del(q, rq); 429 430 q->nr_sorted--; 431 432 q->end_sector = rq_end_sector(rq); 433 q->boundary_rq = rq; 434 list_add_tail(&rq->queuelist, &q->queue_head); 435 } 436 437 EXPORT_SYMBOL(elv_dispatch_add_tail); 438 439 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 440 { 441 elevator_t *e = q->elevator; 442 struct request *__rq; 443 int ret; 444 445 /* 446 * First try one-hit cache. 447 */ 448 if (q->last_merge) { 449 ret = elv_try_merge(q->last_merge, bio); 450 if (ret != ELEVATOR_NO_MERGE) { 451 *req = q->last_merge; 452 return ret; 453 } 454 } 455 456 /* 457 * See if our hash lookup can find a potential backmerge. 458 */ 459 __rq = elv_rqhash_find(q, bio->bi_sector); 460 if (__rq && elv_rq_merge_ok(__rq, bio)) { 461 *req = __rq; 462 return ELEVATOR_BACK_MERGE; 463 } 464 465 if (e->ops->elevator_merge_fn) 466 return e->ops->elevator_merge_fn(q, req, bio); 467 468 return ELEVATOR_NO_MERGE; 469 } 470 471 void elv_merged_request(request_queue_t *q, struct request *rq, int type) 472 { 473 elevator_t *e = q->elevator; 474 475 if (e->ops->elevator_merged_fn) 476 e->ops->elevator_merged_fn(q, rq, type); 477 478 if (type == ELEVATOR_BACK_MERGE) 479 elv_rqhash_reposition(q, rq); 480 481 q->last_merge = rq; 482 } 483 484 void elv_merge_requests(request_queue_t *q, struct request *rq, 485 struct request *next) 486 { 487 elevator_t *e = q->elevator; 488 489 if (e->ops->elevator_merge_req_fn) 490 e->ops->elevator_merge_req_fn(q, rq, next); 491 492 elv_rqhash_reposition(q, rq); 493 elv_rqhash_del(q, next); 494 495 q->nr_sorted--; 496 q->last_merge = rq; 497 } 498 499 void elv_requeue_request(request_queue_t *q, struct request *rq) 500 { 501 elevator_t *e = q->elevator; 502 503 /* 504 * it already went through dequeue, we need to decrement the 505 * in_flight count again 506 */ 507 if (blk_account_rq(rq)) { 508 q->in_flight--; 509 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) 510 e->ops->elevator_deactivate_req_fn(q, rq); 511 } 512 513 rq->cmd_flags &= ~REQ_STARTED; 514 515 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 516 } 517 518 static void elv_drain_elevator(request_queue_t *q) 519 { 520 static int printed; 521 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 522 ; 523 if (q->nr_sorted == 0) 524 return; 525 if (printed++ < 10) { 526 printk(KERN_ERR "%s: forced dispatching is broken " 527 "(nr_sorted=%u), please report this\n", 528 q->elevator->elevator_type->elevator_name, q->nr_sorted); 529 } 530 } 531 532 void elv_insert(request_queue_t *q, struct request *rq, int where) 533 { 534 struct list_head *pos; 535 unsigned ordseq; 536 int unplug_it = 1; 537 538 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 539 540 rq->q = q; 541 542 switch (where) { 543 case ELEVATOR_INSERT_FRONT: 544 rq->cmd_flags |= REQ_SOFTBARRIER; 545 546 list_add(&rq->queuelist, &q->queue_head); 547 break; 548 549 case ELEVATOR_INSERT_BACK: 550 rq->cmd_flags |= REQ_SOFTBARRIER; 551 elv_drain_elevator(q); 552 list_add_tail(&rq->queuelist, &q->queue_head); 553 /* 554 * We kick the queue here for the following reasons. 555 * - The elevator might have returned NULL previously 556 * to delay requests and returned them now. As the 557 * queue wasn't empty before this request, ll_rw_blk 558 * won't run the queue on return, resulting in hang. 559 * - Usually, back inserted requests won't be merged 560 * with anything. There's no point in delaying queue 561 * processing. 562 */ 563 blk_remove_plug(q); 564 q->request_fn(q); 565 break; 566 567 case ELEVATOR_INSERT_SORT: 568 BUG_ON(!blk_fs_request(rq)); 569 rq->cmd_flags |= REQ_SORTED; 570 q->nr_sorted++; 571 if (rq_mergeable(rq)) { 572 elv_rqhash_add(q, rq); 573 if (!q->last_merge) 574 q->last_merge = rq; 575 } 576 577 /* 578 * Some ioscheds (cfq) run q->request_fn directly, so 579 * rq cannot be accessed after calling 580 * elevator_add_req_fn. 581 */ 582 q->elevator->ops->elevator_add_req_fn(q, rq); 583 break; 584 585 case ELEVATOR_INSERT_REQUEUE: 586 /* 587 * If ordered flush isn't in progress, we do front 588 * insertion; otherwise, requests should be requeued 589 * in ordseq order. 590 */ 591 rq->cmd_flags |= REQ_SOFTBARRIER; 592 593 /* 594 * Most requeues happen because of a busy condition, 595 * don't force unplug of the queue for that case. 596 */ 597 unplug_it = 0; 598 599 if (q->ordseq == 0) { 600 list_add(&rq->queuelist, &q->queue_head); 601 break; 602 } 603 604 ordseq = blk_ordered_req_seq(rq); 605 606 list_for_each(pos, &q->queue_head) { 607 struct request *pos_rq = list_entry_rq(pos); 608 if (ordseq <= blk_ordered_req_seq(pos_rq)) 609 break; 610 } 611 612 list_add_tail(&rq->queuelist, pos); 613 break; 614 615 default: 616 printk(KERN_ERR "%s: bad insertion point %d\n", 617 __FUNCTION__, where); 618 BUG(); 619 } 620 621 if (unplug_it && blk_queue_plugged(q)) { 622 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 623 - q->in_flight; 624 625 if (nrq >= q->unplug_thresh) 626 __generic_unplug_device(q); 627 } 628 } 629 630 void __elv_add_request(request_queue_t *q, struct request *rq, int where, 631 int plug) 632 { 633 if (q->ordcolor) 634 rq->cmd_flags |= REQ_ORDERED_COLOR; 635 636 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 637 /* 638 * toggle ordered color 639 */ 640 if (blk_barrier_rq(rq)) 641 q->ordcolor ^= 1; 642 643 /* 644 * barriers implicitly indicate back insertion 645 */ 646 if (where == ELEVATOR_INSERT_SORT) 647 where = ELEVATOR_INSERT_BACK; 648 649 /* 650 * this request is scheduling boundary, update 651 * end_sector 652 */ 653 if (blk_fs_request(rq)) { 654 q->end_sector = rq_end_sector(rq); 655 q->boundary_rq = rq; 656 } 657 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 658 where = ELEVATOR_INSERT_BACK; 659 660 if (plug) 661 blk_plug_device(q); 662 663 elv_insert(q, rq, where); 664 } 665 666 EXPORT_SYMBOL(__elv_add_request); 667 668 void elv_add_request(request_queue_t *q, struct request *rq, int where, 669 int plug) 670 { 671 unsigned long flags; 672 673 spin_lock_irqsave(q->queue_lock, flags); 674 __elv_add_request(q, rq, where, plug); 675 spin_unlock_irqrestore(q->queue_lock, flags); 676 } 677 678 EXPORT_SYMBOL(elv_add_request); 679 680 static inline struct request *__elv_next_request(request_queue_t *q) 681 { 682 struct request *rq; 683 684 while (1) { 685 while (!list_empty(&q->queue_head)) { 686 rq = list_entry_rq(q->queue_head.next); 687 if (blk_do_ordered(q, &rq)) 688 return rq; 689 } 690 691 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 692 return NULL; 693 } 694 } 695 696 struct request *elv_next_request(request_queue_t *q) 697 { 698 struct request *rq; 699 int ret; 700 701 while ((rq = __elv_next_request(q)) != NULL) { 702 if (!(rq->cmd_flags & REQ_STARTED)) { 703 elevator_t *e = q->elevator; 704 705 /* 706 * This is the first time the device driver 707 * sees this request (possibly after 708 * requeueing). Notify IO scheduler. 709 */ 710 if (blk_sorted_rq(rq) && 711 e->ops->elevator_activate_req_fn) 712 e->ops->elevator_activate_req_fn(q, rq); 713 714 /* 715 * just mark as started even if we don't start 716 * it, a request that has been delayed should 717 * not be passed by new incoming requests 718 */ 719 rq->cmd_flags |= REQ_STARTED; 720 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 721 } 722 723 if (!q->boundary_rq || q->boundary_rq == rq) { 724 q->end_sector = rq_end_sector(rq); 725 q->boundary_rq = NULL; 726 } 727 728 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) 729 break; 730 731 ret = q->prep_rq_fn(q, rq); 732 if (ret == BLKPREP_OK) { 733 break; 734 } else if (ret == BLKPREP_DEFER) { 735 /* 736 * the request may have been (partially) prepped. 737 * we need to keep this request in the front to 738 * avoid resource deadlock. REQ_STARTED will 739 * prevent other fs requests from passing this one. 740 */ 741 rq = NULL; 742 break; 743 } else if (ret == BLKPREP_KILL) { 744 int nr_bytes = rq->hard_nr_sectors << 9; 745 746 if (!nr_bytes) 747 nr_bytes = rq->data_len; 748 749 blkdev_dequeue_request(rq); 750 rq->cmd_flags |= REQ_QUIET; 751 end_that_request_chunk(rq, 0, nr_bytes); 752 end_that_request_last(rq, 0); 753 } else { 754 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 755 ret); 756 break; 757 } 758 } 759 760 return rq; 761 } 762 763 EXPORT_SYMBOL(elv_next_request); 764 765 void elv_dequeue_request(request_queue_t *q, struct request *rq) 766 { 767 BUG_ON(list_empty(&rq->queuelist)); 768 BUG_ON(ELV_ON_HASH(rq)); 769 770 list_del_init(&rq->queuelist); 771 772 /* 773 * the time frame between a request being removed from the lists 774 * and to it is freed is accounted as io that is in progress at 775 * the driver side. 776 */ 777 if (blk_account_rq(rq)) 778 q->in_flight++; 779 } 780 781 EXPORT_SYMBOL(elv_dequeue_request); 782 783 int elv_queue_empty(request_queue_t *q) 784 { 785 elevator_t *e = q->elevator; 786 787 if (!list_empty(&q->queue_head)) 788 return 0; 789 790 if (e->ops->elevator_queue_empty_fn) 791 return e->ops->elevator_queue_empty_fn(q); 792 793 return 1; 794 } 795 796 EXPORT_SYMBOL(elv_queue_empty); 797 798 struct request *elv_latter_request(request_queue_t *q, struct request *rq) 799 { 800 elevator_t *e = q->elevator; 801 802 if (e->ops->elevator_latter_req_fn) 803 return e->ops->elevator_latter_req_fn(q, rq); 804 return NULL; 805 } 806 807 struct request *elv_former_request(request_queue_t *q, struct request *rq) 808 { 809 elevator_t *e = q->elevator; 810 811 if (e->ops->elevator_former_req_fn) 812 return e->ops->elevator_former_req_fn(q, rq); 813 return NULL; 814 } 815 816 int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) 817 { 818 elevator_t *e = q->elevator; 819 820 if (e->ops->elevator_set_req_fn) 821 return e->ops->elevator_set_req_fn(q, rq, gfp_mask); 822 823 rq->elevator_private = NULL; 824 return 0; 825 } 826 827 void elv_put_request(request_queue_t *q, struct request *rq) 828 { 829 elevator_t *e = q->elevator; 830 831 if (e->ops->elevator_put_req_fn) 832 e->ops->elevator_put_req_fn(rq); 833 } 834 835 int elv_may_queue(request_queue_t *q, int rw) 836 { 837 elevator_t *e = q->elevator; 838 839 if (e->ops->elevator_may_queue_fn) 840 return e->ops->elevator_may_queue_fn(q, rw); 841 842 return ELV_MQUEUE_MAY; 843 } 844 845 void elv_completed_request(request_queue_t *q, struct request *rq) 846 { 847 elevator_t *e = q->elevator; 848 849 /* 850 * request is released from the driver, io must be done 851 */ 852 if (blk_account_rq(rq)) { 853 q->in_flight--; 854 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 855 e->ops->elevator_completed_req_fn(q, rq); 856 } 857 858 /* 859 * Check if the queue is waiting for fs requests to be 860 * drained for flush sequence. 861 */ 862 if (unlikely(q->ordseq)) { 863 struct request *first_rq = list_entry_rq(q->queue_head.next); 864 if (q->in_flight == 0 && 865 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 866 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 867 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 868 q->request_fn(q); 869 } 870 } 871 } 872 873 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 874 875 static ssize_t 876 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 877 { 878 elevator_t *e = container_of(kobj, elevator_t, kobj); 879 struct elv_fs_entry *entry = to_elv(attr); 880 ssize_t error; 881 882 if (!entry->show) 883 return -EIO; 884 885 mutex_lock(&e->sysfs_lock); 886 error = e->ops ? entry->show(e, page) : -ENOENT; 887 mutex_unlock(&e->sysfs_lock); 888 return error; 889 } 890 891 static ssize_t 892 elv_attr_store(struct kobject *kobj, struct attribute *attr, 893 const char *page, size_t length) 894 { 895 elevator_t *e = container_of(kobj, elevator_t, kobj); 896 struct elv_fs_entry *entry = to_elv(attr); 897 ssize_t error; 898 899 if (!entry->store) 900 return -EIO; 901 902 mutex_lock(&e->sysfs_lock); 903 error = e->ops ? entry->store(e, page, length) : -ENOENT; 904 mutex_unlock(&e->sysfs_lock); 905 return error; 906 } 907 908 static struct sysfs_ops elv_sysfs_ops = { 909 .show = elv_attr_show, 910 .store = elv_attr_store, 911 }; 912 913 static struct kobj_type elv_ktype = { 914 .sysfs_ops = &elv_sysfs_ops, 915 .release = elevator_release, 916 }; 917 918 int elv_register_queue(struct request_queue *q) 919 { 920 elevator_t *e = q->elevator; 921 int error; 922 923 e->kobj.parent = &q->kobj; 924 925 error = kobject_add(&e->kobj); 926 if (!error) { 927 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; 928 if (attr) { 929 while (attr->attr.name) { 930 if (sysfs_create_file(&e->kobj, &attr->attr)) 931 break; 932 attr++; 933 } 934 } 935 kobject_uevent(&e->kobj, KOBJ_ADD); 936 } 937 return error; 938 } 939 940 static void __elv_unregister_queue(elevator_t *e) 941 { 942 kobject_uevent(&e->kobj, KOBJ_REMOVE); 943 kobject_del(&e->kobj); 944 } 945 946 void elv_unregister_queue(struct request_queue *q) 947 { 948 if (q) 949 __elv_unregister_queue(q->elevator); 950 } 951 952 int elv_register(struct elevator_type *e) 953 { 954 spin_lock_irq(&elv_list_lock); 955 BUG_ON(elevator_find(e->elevator_name)); 956 list_add_tail(&e->list, &elv_list); 957 spin_unlock_irq(&elv_list_lock); 958 959 printk(KERN_INFO "io scheduler %s registered", e->elevator_name); 960 if (!strcmp(e->elevator_name, chosen_elevator) || 961 (!*chosen_elevator && 962 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 963 printk(" (default)"); 964 printk("\n"); 965 return 0; 966 } 967 EXPORT_SYMBOL_GPL(elv_register); 968 969 void elv_unregister(struct elevator_type *e) 970 { 971 struct task_struct *g, *p; 972 973 /* 974 * Iterate every thread in the process to remove the io contexts. 975 */ 976 if (e->ops.trim) { 977 read_lock(&tasklist_lock); 978 do_each_thread(g, p) { 979 task_lock(p); 980 if (p->io_context) 981 e->ops.trim(p->io_context); 982 task_unlock(p); 983 } while_each_thread(g, p); 984 read_unlock(&tasklist_lock); 985 } 986 987 spin_lock_irq(&elv_list_lock); 988 list_del_init(&e->list); 989 spin_unlock_irq(&elv_list_lock); 990 } 991 EXPORT_SYMBOL_GPL(elv_unregister); 992 993 /* 994 * switch to new_e io scheduler. be careful not to introduce deadlocks - 995 * we don't free the old io scheduler, before we have allocated what we 996 * need for the new one. this way we have a chance of going back to the old 997 * one, if the new one fails init for some reason. 998 */ 999 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 1000 { 1001 elevator_t *old_elevator, *e; 1002 void *data; 1003 1004 /* 1005 * Allocate new elevator 1006 */ 1007 e = elevator_alloc(q, new_e); 1008 if (!e) 1009 return 0; 1010 1011 data = elevator_init_queue(q, e); 1012 if (!data) { 1013 kobject_put(&e->kobj); 1014 return 0; 1015 } 1016 1017 /* 1018 * Turn on BYPASS and drain all requests w/ elevator private data 1019 */ 1020 spin_lock_irq(q->queue_lock); 1021 1022 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1023 1024 elv_drain_elevator(q); 1025 1026 while (q->rq.elvpriv) { 1027 blk_remove_plug(q); 1028 q->request_fn(q); 1029 spin_unlock_irq(q->queue_lock); 1030 msleep(10); 1031 spin_lock_irq(q->queue_lock); 1032 elv_drain_elevator(q); 1033 } 1034 1035 /* 1036 * Remember old elevator. 1037 */ 1038 old_elevator = q->elevator; 1039 1040 /* 1041 * attach and start new elevator 1042 */ 1043 elevator_attach(q, e, data); 1044 1045 spin_unlock_irq(q->queue_lock); 1046 1047 __elv_unregister_queue(old_elevator); 1048 1049 if (elv_register_queue(q)) 1050 goto fail_register; 1051 1052 /* 1053 * finally exit old elevator and turn off BYPASS. 1054 */ 1055 elevator_exit(old_elevator); 1056 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1057 return 1; 1058 1059 fail_register: 1060 /* 1061 * switch failed, exit the new io scheduler and reattach the old 1062 * one again (along with re-adding the sysfs dir) 1063 */ 1064 elevator_exit(e); 1065 q->elevator = old_elevator; 1066 elv_register_queue(q); 1067 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1068 return 0; 1069 } 1070 1071 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) 1072 { 1073 char elevator_name[ELV_NAME_MAX]; 1074 size_t len; 1075 struct elevator_type *e; 1076 1077 elevator_name[sizeof(elevator_name) - 1] = '\0'; 1078 strncpy(elevator_name, name, sizeof(elevator_name) - 1); 1079 len = strlen(elevator_name); 1080 1081 if (len && elevator_name[len - 1] == '\n') 1082 elevator_name[len - 1] = '\0'; 1083 1084 e = elevator_get(elevator_name); 1085 if (!e) { 1086 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 1087 return -EINVAL; 1088 } 1089 1090 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 1091 elevator_put(e); 1092 return count; 1093 } 1094 1095 if (!elevator_switch(q, e)) 1096 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); 1097 return count; 1098 } 1099 1100 ssize_t elv_iosched_show(request_queue_t *q, char *name) 1101 { 1102 elevator_t *e = q->elevator; 1103 struct elevator_type *elv = e->elevator_type; 1104 struct list_head *entry; 1105 int len = 0; 1106 1107 spin_lock_irq(&elv_list_lock); 1108 list_for_each(entry, &elv_list) { 1109 struct elevator_type *__e; 1110 1111 __e = list_entry(entry, struct elevator_type, list); 1112 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1113 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1114 else 1115 len += sprintf(name+len, "%s ", __e->elevator_name); 1116 } 1117 spin_unlock_irq(&elv_list_lock); 1118 1119 len += sprintf(len+name, "\n"); 1120 return len; 1121 } 1122 1123 struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) 1124 { 1125 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1126 1127 if (rbprev) 1128 return rb_entry_rq(rbprev); 1129 1130 return NULL; 1131 } 1132 1133 EXPORT_SYMBOL(elv_rb_former_request); 1134 1135 struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) 1136 { 1137 struct rb_node *rbnext = rb_next(&rq->rb_node); 1138 1139 if (rbnext) 1140 return rb_entry_rq(rbnext); 1141 1142 return NULL; 1143 } 1144 1145 EXPORT_SYMBOL(elv_rb_latter_request); 1146