1 /* 2 * Block device elevator/IO-scheduler. 3 * 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 5 * 6 * 30042000 Jens Axboe <axboe@kernel.dk> : 7 * 8 * Split the elevator a bit so that it is possible to choose a different 9 * one or even write a new "plug in". There are three pieces: 10 * - elevator_fn, inserts a new request in the queue list 11 * - elevator_merge_fn, decides whether a new buffer can be merged with 12 * an existing request 13 * - elevator_dequeue_fn, called when a request is taken off the active list 14 * 15 * 20082000 Dave Jones <davej@suse.de> : 16 * Removed tests for max-bomb-segments, which was breaking elvtune 17 * when run without -bN 18 * 19 * Jens: 20 * - Rework again to work with bio instead of buffer_heads 21 * - loose bi_dev comparisons, partition handling is right now 22 * - completely modularize elevator setup and teardown 23 * 24 */ 25 #include <linux/kernel.h> 26 #include <linux/fs.h> 27 #include <linux/blkdev.h> 28 #include <linux/elevator.h> 29 #include <linux/bio.h> 30 #include <linux/module.h> 31 #include <linux/slab.h> 32 #include <linux/init.h> 33 #include <linux/compiler.h> 34 #include <linux/delay.h> 35 #include <linux/blktrace_api.h> 36 #include <linux/hash.h> 37 38 #include <asm/uaccess.h> 39 40 static DEFINE_SPINLOCK(elv_list_lock); 41 static LIST_HEAD(elv_list); 42 43 /* 44 * Merge hash stuff. 45 */ 46 static const int elv_hash_shift = 6; 47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3) 48 #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 49 #define ELV_HASH_ENTRIES (1 << elv_hash_shift) 50 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 51 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 52 53 /* 54 * can we safely merge with this request? 55 */ 56 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 57 { 58 if (!rq_mergeable(rq)) 59 return 0; 60 61 /* 62 * different data direction or already started, don't merge 63 */ 64 if (bio_data_dir(bio) != rq_data_dir(rq)) 65 return 0; 66 67 /* 68 * same device and no special stuff set, merge is ok 69 */ 70 if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special) 71 return 1; 72 73 return 0; 74 } 75 EXPORT_SYMBOL(elv_rq_merge_ok); 76 77 static inline int elv_try_merge(struct request *__rq, struct bio *bio) 78 { 79 int ret = ELEVATOR_NO_MERGE; 80 81 /* 82 * we can merge and sequence is ok, check if it's possible 83 */ 84 if (elv_rq_merge_ok(__rq, bio)) { 85 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 86 ret = ELEVATOR_BACK_MERGE; 87 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 88 ret = ELEVATOR_FRONT_MERGE; 89 } 90 91 return ret; 92 } 93 94 static struct elevator_type *elevator_find(const char *name) 95 { 96 struct elevator_type *e; 97 struct list_head *entry; 98 99 list_for_each(entry, &elv_list) { 100 101 e = list_entry(entry, struct elevator_type, list); 102 103 if (!strcmp(e->elevator_name, name)) 104 return e; 105 } 106 107 return NULL; 108 } 109 110 static void elevator_put(struct elevator_type *e) 111 { 112 module_put(e->elevator_owner); 113 } 114 115 static struct elevator_type *elevator_get(const char *name) 116 { 117 struct elevator_type *e; 118 119 spin_lock_irq(&elv_list_lock); 120 121 e = elevator_find(name); 122 if (e && !try_module_get(e->elevator_owner)) 123 e = NULL; 124 125 spin_unlock_irq(&elv_list_lock); 126 127 return e; 128 } 129 130 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) 131 { 132 return eq->ops->elevator_init_fn(q); 133 } 134 135 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, 136 void *data) 137 { 138 q->elevator = eq; 139 eq->elevator_data = data; 140 } 141 142 static char chosen_elevator[16]; 143 144 static int __init elevator_setup(char *str) 145 { 146 /* 147 * Be backwards-compatible with previous kernels, so users 148 * won't get the wrong elevator. 149 */ 150 if (!strcmp(str, "as")) 151 strcpy(chosen_elevator, "anticipatory"); 152 else 153 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 154 return 1; 155 } 156 157 __setup("elevator=", elevator_setup); 158 159 static struct kobj_type elv_ktype; 160 161 static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e) 162 { 163 elevator_t *eq; 164 int i; 165 166 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node); 167 if (unlikely(!eq)) 168 goto err; 169 170 memset(eq, 0, sizeof(*eq)); 171 eq->ops = &e->ops; 172 eq->elevator_type = e; 173 kobject_init(&eq->kobj); 174 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 175 eq->kobj.ktype = &elv_ktype; 176 mutex_init(&eq->sysfs_lock); 177 178 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, 179 GFP_KERNEL, q->node); 180 if (!eq->hash) 181 goto err; 182 183 for (i = 0; i < ELV_HASH_ENTRIES; i++) 184 INIT_HLIST_HEAD(&eq->hash[i]); 185 186 return eq; 187 err: 188 kfree(eq); 189 elevator_put(e); 190 return NULL; 191 } 192 193 static void elevator_release(struct kobject *kobj) 194 { 195 elevator_t *e = container_of(kobj, elevator_t, kobj); 196 197 elevator_put(e->elevator_type); 198 kfree(e->hash); 199 kfree(e); 200 } 201 202 int elevator_init(request_queue_t *q, char *name) 203 { 204 struct elevator_type *e = NULL; 205 struct elevator_queue *eq; 206 int ret = 0; 207 void *data; 208 209 INIT_LIST_HEAD(&q->queue_head); 210 q->last_merge = NULL; 211 q->end_sector = 0; 212 q->boundary_rq = NULL; 213 214 if (name && !(e = elevator_get(name))) 215 return -EINVAL; 216 217 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) 218 printk("I/O scheduler %s not found\n", chosen_elevator); 219 220 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { 221 printk("Default I/O scheduler not found, using no-op\n"); 222 e = elevator_get("noop"); 223 } 224 225 eq = elevator_alloc(q, e); 226 if (!eq) 227 return -ENOMEM; 228 229 data = elevator_init_queue(q, eq); 230 if (!data) { 231 kobject_put(&eq->kobj); 232 return -ENOMEM; 233 } 234 235 elevator_attach(q, eq, data); 236 return ret; 237 } 238 239 EXPORT_SYMBOL(elevator_init); 240 241 void elevator_exit(elevator_t *e) 242 { 243 mutex_lock(&e->sysfs_lock); 244 if (e->ops->elevator_exit_fn) 245 e->ops->elevator_exit_fn(e); 246 e->ops = NULL; 247 mutex_unlock(&e->sysfs_lock); 248 249 kobject_put(&e->kobj); 250 } 251 252 EXPORT_SYMBOL(elevator_exit); 253 254 static inline void __elv_rqhash_del(struct request *rq) 255 { 256 hlist_del_init(&rq->hash); 257 } 258 259 static void elv_rqhash_del(request_queue_t *q, struct request *rq) 260 { 261 if (ELV_ON_HASH(rq)) 262 __elv_rqhash_del(rq); 263 } 264 265 static void elv_rqhash_add(request_queue_t *q, struct request *rq) 266 { 267 elevator_t *e = q->elevator; 268 269 BUG_ON(ELV_ON_HASH(rq)); 270 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 271 } 272 273 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) 274 { 275 __elv_rqhash_del(rq); 276 elv_rqhash_add(q, rq); 277 } 278 279 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) 280 { 281 elevator_t *e = q->elevator; 282 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 283 struct hlist_node *entry, *next; 284 struct request *rq; 285 286 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { 287 BUG_ON(!ELV_ON_HASH(rq)); 288 289 if (unlikely(!rq_mergeable(rq))) { 290 __elv_rqhash_del(rq); 291 continue; 292 } 293 294 if (rq_hash_key(rq) == offset) 295 return rq; 296 } 297 298 return NULL; 299 } 300 301 /* 302 * RB-tree support functions for inserting/lookup/removal of requests 303 * in a sorted RB tree. 304 */ 305 struct request *elv_rb_add(struct rb_root *root, struct request *rq) 306 { 307 struct rb_node **p = &root->rb_node; 308 struct rb_node *parent = NULL; 309 struct request *__rq; 310 311 while (*p) { 312 parent = *p; 313 __rq = rb_entry(parent, struct request, rb_node); 314 315 if (rq->sector < __rq->sector) 316 p = &(*p)->rb_left; 317 else if (rq->sector > __rq->sector) 318 p = &(*p)->rb_right; 319 else 320 return __rq; 321 } 322 323 rb_link_node(&rq->rb_node, parent, p); 324 rb_insert_color(&rq->rb_node, root); 325 return NULL; 326 } 327 328 EXPORT_SYMBOL(elv_rb_add); 329 330 void elv_rb_del(struct rb_root *root, struct request *rq) 331 { 332 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 333 rb_erase(&rq->rb_node, root); 334 RB_CLEAR_NODE(&rq->rb_node); 335 } 336 337 EXPORT_SYMBOL(elv_rb_del); 338 339 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 340 { 341 struct rb_node *n = root->rb_node; 342 struct request *rq; 343 344 while (n) { 345 rq = rb_entry(n, struct request, rb_node); 346 347 if (sector < rq->sector) 348 n = n->rb_left; 349 else if (sector > rq->sector) 350 n = n->rb_right; 351 else 352 return rq; 353 } 354 355 return NULL; 356 } 357 358 EXPORT_SYMBOL(elv_rb_find); 359 360 /* 361 * Insert rq into dispatch queue of q. Queue lock must be held on 362 * entry. rq is sort insted into the dispatch queue. To be used by 363 * specific elevators. 364 */ 365 void elv_dispatch_sort(request_queue_t *q, struct request *rq) 366 { 367 sector_t boundary; 368 struct list_head *entry; 369 370 if (q->last_merge == rq) 371 q->last_merge = NULL; 372 373 elv_rqhash_del(q, rq); 374 375 q->nr_sorted--; 376 377 boundary = q->end_sector; 378 379 list_for_each_prev(entry, &q->queue_head) { 380 struct request *pos = list_entry_rq(entry); 381 382 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 383 break; 384 if (rq->sector >= boundary) { 385 if (pos->sector < boundary) 386 continue; 387 } else { 388 if (pos->sector >= boundary) 389 break; 390 } 391 if (rq->sector >= pos->sector) 392 break; 393 } 394 395 list_add(&rq->queuelist, entry); 396 } 397 398 EXPORT_SYMBOL(elv_dispatch_sort); 399 400 /* 401 * Insert rq into dispatch queue of q. Queue lock must be held on 402 * entry. rq is added to the back of the dispatch queue. To be used by 403 * specific elevators. 404 */ 405 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 406 { 407 if (q->last_merge == rq) 408 q->last_merge = NULL; 409 410 elv_rqhash_del(q, rq); 411 412 q->nr_sorted--; 413 414 q->end_sector = rq_end_sector(rq); 415 q->boundary_rq = rq; 416 list_add_tail(&rq->queuelist, &q->queue_head); 417 } 418 419 EXPORT_SYMBOL(elv_dispatch_add_tail); 420 421 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 422 { 423 elevator_t *e = q->elevator; 424 struct request *__rq; 425 int ret; 426 427 /* 428 * First try one-hit cache. 429 */ 430 if (q->last_merge) { 431 ret = elv_try_merge(q->last_merge, bio); 432 if (ret != ELEVATOR_NO_MERGE) { 433 *req = q->last_merge; 434 return ret; 435 } 436 } 437 438 /* 439 * See if our hash lookup can find a potential backmerge. 440 */ 441 __rq = elv_rqhash_find(q, bio->bi_sector); 442 if (__rq && elv_rq_merge_ok(__rq, bio)) { 443 *req = __rq; 444 return ELEVATOR_BACK_MERGE; 445 } 446 447 if (e->ops->elevator_merge_fn) 448 return e->ops->elevator_merge_fn(q, req, bio); 449 450 return ELEVATOR_NO_MERGE; 451 } 452 453 void elv_merged_request(request_queue_t *q, struct request *rq, int type) 454 { 455 elevator_t *e = q->elevator; 456 457 if (e->ops->elevator_merged_fn) 458 e->ops->elevator_merged_fn(q, rq, type); 459 460 if (type == ELEVATOR_BACK_MERGE) 461 elv_rqhash_reposition(q, rq); 462 463 q->last_merge = rq; 464 } 465 466 void elv_merge_requests(request_queue_t *q, struct request *rq, 467 struct request *next) 468 { 469 elevator_t *e = q->elevator; 470 471 if (e->ops->elevator_merge_req_fn) 472 e->ops->elevator_merge_req_fn(q, rq, next); 473 474 elv_rqhash_reposition(q, rq); 475 elv_rqhash_del(q, next); 476 477 q->nr_sorted--; 478 q->last_merge = rq; 479 } 480 481 void elv_requeue_request(request_queue_t *q, struct request *rq) 482 { 483 elevator_t *e = q->elevator; 484 485 /* 486 * it already went through dequeue, we need to decrement the 487 * in_flight count again 488 */ 489 if (blk_account_rq(rq)) { 490 q->in_flight--; 491 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) 492 e->ops->elevator_deactivate_req_fn(q, rq); 493 } 494 495 rq->cmd_flags &= ~REQ_STARTED; 496 497 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 498 } 499 500 static void elv_drain_elevator(request_queue_t *q) 501 { 502 static int printed; 503 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 504 ; 505 if (q->nr_sorted == 0) 506 return; 507 if (printed++ < 10) { 508 printk(KERN_ERR "%s: forced dispatching is broken " 509 "(nr_sorted=%u), please report this\n", 510 q->elevator->elevator_type->elevator_name, q->nr_sorted); 511 } 512 } 513 514 void elv_insert(request_queue_t *q, struct request *rq, int where) 515 { 516 struct list_head *pos; 517 unsigned ordseq; 518 int unplug_it = 1; 519 520 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 521 522 rq->q = q; 523 524 switch (where) { 525 case ELEVATOR_INSERT_FRONT: 526 rq->cmd_flags |= REQ_SOFTBARRIER; 527 528 list_add(&rq->queuelist, &q->queue_head); 529 break; 530 531 case ELEVATOR_INSERT_BACK: 532 rq->cmd_flags |= REQ_SOFTBARRIER; 533 elv_drain_elevator(q); 534 list_add_tail(&rq->queuelist, &q->queue_head); 535 /* 536 * We kick the queue here for the following reasons. 537 * - The elevator might have returned NULL previously 538 * to delay requests and returned them now. As the 539 * queue wasn't empty before this request, ll_rw_blk 540 * won't run the queue on return, resulting in hang. 541 * - Usually, back inserted requests won't be merged 542 * with anything. There's no point in delaying queue 543 * processing. 544 */ 545 blk_remove_plug(q); 546 q->request_fn(q); 547 break; 548 549 case ELEVATOR_INSERT_SORT: 550 BUG_ON(!blk_fs_request(rq)); 551 rq->cmd_flags |= REQ_SORTED; 552 q->nr_sorted++; 553 if (rq_mergeable(rq)) { 554 elv_rqhash_add(q, rq); 555 if (!q->last_merge) 556 q->last_merge = rq; 557 } 558 559 /* 560 * Some ioscheds (cfq) run q->request_fn directly, so 561 * rq cannot be accessed after calling 562 * elevator_add_req_fn. 563 */ 564 q->elevator->ops->elevator_add_req_fn(q, rq); 565 break; 566 567 case ELEVATOR_INSERT_REQUEUE: 568 /* 569 * If ordered flush isn't in progress, we do front 570 * insertion; otherwise, requests should be requeued 571 * in ordseq order. 572 */ 573 rq->cmd_flags |= REQ_SOFTBARRIER; 574 575 if (q->ordseq == 0) { 576 list_add(&rq->queuelist, &q->queue_head); 577 break; 578 } 579 580 ordseq = blk_ordered_req_seq(rq); 581 582 list_for_each(pos, &q->queue_head) { 583 struct request *pos_rq = list_entry_rq(pos); 584 if (ordseq <= blk_ordered_req_seq(pos_rq)) 585 break; 586 } 587 588 list_add_tail(&rq->queuelist, pos); 589 /* 590 * most requeues happen because of a busy condition, don't 591 * force unplug of the queue for that case. 592 */ 593 unplug_it = 0; 594 break; 595 596 default: 597 printk(KERN_ERR "%s: bad insertion point %d\n", 598 __FUNCTION__, where); 599 BUG(); 600 } 601 602 if (unplug_it && blk_queue_plugged(q)) { 603 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 604 - q->in_flight; 605 606 if (nrq >= q->unplug_thresh) 607 __generic_unplug_device(q); 608 } 609 } 610 611 void __elv_add_request(request_queue_t *q, struct request *rq, int where, 612 int plug) 613 { 614 if (q->ordcolor) 615 rq->cmd_flags |= REQ_ORDERED_COLOR; 616 617 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 618 /* 619 * toggle ordered color 620 */ 621 if (blk_barrier_rq(rq)) 622 q->ordcolor ^= 1; 623 624 /* 625 * barriers implicitly indicate back insertion 626 */ 627 if (where == ELEVATOR_INSERT_SORT) 628 where = ELEVATOR_INSERT_BACK; 629 630 /* 631 * this request is scheduling boundary, update 632 * end_sector 633 */ 634 if (blk_fs_request(rq)) { 635 q->end_sector = rq_end_sector(rq); 636 q->boundary_rq = rq; 637 } 638 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 639 where = ELEVATOR_INSERT_BACK; 640 641 if (plug) 642 blk_plug_device(q); 643 644 elv_insert(q, rq, where); 645 } 646 647 EXPORT_SYMBOL(__elv_add_request); 648 649 void elv_add_request(request_queue_t *q, struct request *rq, int where, 650 int plug) 651 { 652 unsigned long flags; 653 654 spin_lock_irqsave(q->queue_lock, flags); 655 __elv_add_request(q, rq, where, plug); 656 spin_unlock_irqrestore(q->queue_lock, flags); 657 } 658 659 EXPORT_SYMBOL(elv_add_request); 660 661 static inline struct request *__elv_next_request(request_queue_t *q) 662 { 663 struct request *rq; 664 665 while (1) { 666 while (!list_empty(&q->queue_head)) { 667 rq = list_entry_rq(q->queue_head.next); 668 if (blk_do_ordered(q, &rq)) 669 return rq; 670 } 671 672 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 673 return NULL; 674 } 675 } 676 677 struct request *elv_next_request(request_queue_t *q) 678 { 679 struct request *rq; 680 int ret; 681 682 while ((rq = __elv_next_request(q)) != NULL) { 683 if (!(rq->cmd_flags & REQ_STARTED)) { 684 elevator_t *e = q->elevator; 685 686 /* 687 * This is the first time the device driver 688 * sees this request (possibly after 689 * requeueing). Notify IO scheduler. 690 */ 691 if (blk_sorted_rq(rq) && 692 e->ops->elevator_activate_req_fn) 693 e->ops->elevator_activate_req_fn(q, rq); 694 695 /* 696 * just mark as started even if we don't start 697 * it, a request that has been delayed should 698 * not be passed by new incoming requests 699 */ 700 rq->cmd_flags |= REQ_STARTED; 701 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 702 } 703 704 if (!q->boundary_rq || q->boundary_rq == rq) { 705 q->end_sector = rq_end_sector(rq); 706 q->boundary_rq = NULL; 707 } 708 709 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) 710 break; 711 712 ret = q->prep_rq_fn(q, rq); 713 if (ret == BLKPREP_OK) { 714 break; 715 } else if (ret == BLKPREP_DEFER) { 716 /* 717 * the request may have been (partially) prepped. 718 * we need to keep this request in the front to 719 * avoid resource deadlock. REQ_STARTED will 720 * prevent other fs requests from passing this one. 721 */ 722 rq = NULL; 723 break; 724 } else if (ret == BLKPREP_KILL) { 725 int nr_bytes = rq->hard_nr_sectors << 9; 726 727 if (!nr_bytes) 728 nr_bytes = rq->data_len; 729 730 blkdev_dequeue_request(rq); 731 rq->cmd_flags |= REQ_QUIET; 732 end_that_request_chunk(rq, 0, nr_bytes); 733 end_that_request_last(rq, 0); 734 } else { 735 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 736 ret); 737 break; 738 } 739 } 740 741 return rq; 742 } 743 744 EXPORT_SYMBOL(elv_next_request); 745 746 void elv_dequeue_request(request_queue_t *q, struct request *rq) 747 { 748 BUG_ON(list_empty(&rq->queuelist)); 749 BUG_ON(ELV_ON_HASH(rq)); 750 751 list_del_init(&rq->queuelist); 752 753 /* 754 * the time frame between a request being removed from the lists 755 * and to it is freed is accounted as io that is in progress at 756 * the driver side. 757 */ 758 if (blk_account_rq(rq)) 759 q->in_flight++; 760 } 761 762 EXPORT_SYMBOL(elv_dequeue_request); 763 764 int elv_queue_empty(request_queue_t *q) 765 { 766 elevator_t *e = q->elevator; 767 768 if (!list_empty(&q->queue_head)) 769 return 0; 770 771 if (e->ops->elevator_queue_empty_fn) 772 return e->ops->elevator_queue_empty_fn(q); 773 774 return 1; 775 } 776 777 EXPORT_SYMBOL(elv_queue_empty); 778 779 struct request *elv_latter_request(request_queue_t *q, struct request *rq) 780 { 781 elevator_t *e = q->elevator; 782 783 if (e->ops->elevator_latter_req_fn) 784 return e->ops->elevator_latter_req_fn(q, rq); 785 return NULL; 786 } 787 788 struct request *elv_former_request(request_queue_t *q, struct request *rq) 789 { 790 elevator_t *e = q->elevator; 791 792 if (e->ops->elevator_former_req_fn) 793 return e->ops->elevator_former_req_fn(q, rq); 794 return NULL; 795 } 796 797 int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) 798 { 799 elevator_t *e = q->elevator; 800 801 if (e->ops->elevator_set_req_fn) 802 return e->ops->elevator_set_req_fn(q, rq, gfp_mask); 803 804 rq->elevator_private = NULL; 805 return 0; 806 } 807 808 void elv_put_request(request_queue_t *q, struct request *rq) 809 { 810 elevator_t *e = q->elevator; 811 812 if (e->ops->elevator_put_req_fn) 813 e->ops->elevator_put_req_fn(rq); 814 } 815 816 int elv_may_queue(request_queue_t *q, int rw) 817 { 818 elevator_t *e = q->elevator; 819 820 if (e->ops->elevator_may_queue_fn) 821 return e->ops->elevator_may_queue_fn(q, rw); 822 823 return ELV_MQUEUE_MAY; 824 } 825 826 void elv_completed_request(request_queue_t *q, struct request *rq) 827 { 828 elevator_t *e = q->elevator; 829 830 /* 831 * request is released from the driver, io must be done 832 */ 833 if (blk_account_rq(rq)) { 834 q->in_flight--; 835 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 836 e->ops->elevator_completed_req_fn(q, rq); 837 } 838 839 /* 840 * Check if the queue is waiting for fs requests to be 841 * drained for flush sequence. 842 */ 843 if (unlikely(q->ordseq)) { 844 struct request *first_rq = list_entry_rq(q->queue_head.next); 845 if (q->in_flight == 0 && 846 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 847 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 848 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 849 q->request_fn(q); 850 } 851 } 852 } 853 854 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 855 856 static ssize_t 857 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 858 { 859 elevator_t *e = container_of(kobj, elevator_t, kobj); 860 struct elv_fs_entry *entry = to_elv(attr); 861 ssize_t error; 862 863 if (!entry->show) 864 return -EIO; 865 866 mutex_lock(&e->sysfs_lock); 867 error = e->ops ? entry->show(e, page) : -ENOENT; 868 mutex_unlock(&e->sysfs_lock); 869 return error; 870 } 871 872 static ssize_t 873 elv_attr_store(struct kobject *kobj, struct attribute *attr, 874 const char *page, size_t length) 875 { 876 elevator_t *e = container_of(kobj, elevator_t, kobj); 877 struct elv_fs_entry *entry = to_elv(attr); 878 ssize_t error; 879 880 if (!entry->store) 881 return -EIO; 882 883 mutex_lock(&e->sysfs_lock); 884 error = e->ops ? entry->store(e, page, length) : -ENOENT; 885 mutex_unlock(&e->sysfs_lock); 886 return error; 887 } 888 889 static struct sysfs_ops elv_sysfs_ops = { 890 .show = elv_attr_show, 891 .store = elv_attr_store, 892 }; 893 894 static struct kobj_type elv_ktype = { 895 .sysfs_ops = &elv_sysfs_ops, 896 .release = elevator_release, 897 }; 898 899 int elv_register_queue(struct request_queue *q) 900 { 901 elevator_t *e = q->elevator; 902 int error; 903 904 e->kobj.parent = &q->kobj; 905 906 error = kobject_add(&e->kobj); 907 if (!error) { 908 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; 909 if (attr) { 910 while (attr->attr.name) { 911 if (sysfs_create_file(&e->kobj, &attr->attr)) 912 break; 913 attr++; 914 } 915 } 916 kobject_uevent(&e->kobj, KOBJ_ADD); 917 } 918 return error; 919 } 920 921 static void __elv_unregister_queue(elevator_t *e) 922 { 923 kobject_uevent(&e->kobj, KOBJ_REMOVE); 924 kobject_del(&e->kobj); 925 } 926 927 void elv_unregister_queue(struct request_queue *q) 928 { 929 if (q) 930 __elv_unregister_queue(q->elevator); 931 } 932 933 int elv_register(struct elevator_type *e) 934 { 935 spin_lock_irq(&elv_list_lock); 936 BUG_ON(elevator_find(e->elevator_name)); 937 list_add_tail(&e->list, &elv_list); 938 spin_unlock_irq(&elv_list_lock); 939 940 printk(KERN_INFO "io scheduler %s registered", e->elevator_name); 941 if (!strcmp(e->elevator_name, chosen_elevator) || 942 (!*chosen_elevator && 943 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 944 printk(" (default)"); 945 printk("\n"); 946 return 0; 947 } 948 EXPORT_SYMBOL_GPL(elv_register); 949 950 void elv_unregister(struct elevator_type *e) 951 { 952 struct task_struct *g, *p; 953 954 /* 955 * Iterate every thread in the process to remove the io contexts. 956 */ 957 if (e->ops.trim) { 958 read_lock(&tasklist_lock); 959 do_each_thread(g, p) { 960 task_lock(p); 961 if (p->io_context) 962 e->ops.trim(p->io_context); 963 task_unlock(p); 964 } while_each_thread(g, p); 965 read_unlock(&tasklist_lock); 966 } 967 968 spin_lock_irq(&elv_list_lock); 969 list_del_init(&e->list); 970 spin_unlock_irq(&elv_list_lock); 971 } 972 EXPORT_SYMBOL_GPL(elv_unregister); 973 974 /* 975 * switch to new_e io scheduler. be careful not to introduce deadlocks - 976 * we don't free the old io scheduler, before we have allocated what we 977 * need for the new one. this way we have a chance of going back to the old 978 * one, if the new one fails init for some reason. 979 */ 980 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 981 { 982 elevator_t *old_elevator, *e; 983 void *data; 984 985 /* 986 * Allocate new elevator 987 */ 988 e = elevator_alloc(q, new_e); 989 if (!e) 990 return 0; 991 992 data = elevator_init_queue(q, e); 993 if (!data) { 994 kobject_put(&e->kobj); 995 return 0; 996 } 997 998 /* 999 * Turn on BYPASS and drain all requests w/ elevator private data 1000 */ 1001 spin_lock_irq(q->queue_lock); 1002 1003 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1004 1005 elv_drain_elevator(q); 1006 1007 while (q->rq.elvpriv) { 1008 blk_remove_plug(q); 1009 q->request_fn(q); 1010 spin_unlock_irq(q->queue_lock); 1011 msleep(10); 1012 spin_lock_irq(q->queue_lock); 1013 elv_drain_elevator(q); 1014 } 1015 1016 /* 1017 * Remember old elevator. 1018 */ 1019 old_elevator = q->elevator; 1020 1021 /* 1022 * attach and start new elevator 1023 */ 1024 elevator_attach(q, e, data); 1025 1026 spin_unlock_irq(q->queue_lock); 1027 1028 __elv_unregister_queue(old_elevator); 1029 1030 if (elv_register_queue(q)) 1031 goto fail_register; 1032 1033 /* 1034 * finally exit old elevator and turn off BYPASS. 1035 */ 1036 elevator_exit(old_elevator); 1037 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1038 return 1; 1039 1040 fail_register: 1041 /* 1042 * switch failed, exit the new io scheduler and reattach the old 1043 * one again (along with re-adding the sysfs dir) 1044 */ 1045 elevator_exit(e); 1046 q->elevator = old_elevator; 1047 elv_register_queue(q); 1048 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1049 return 0; 1050 } 1051 1052 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) 1053 { 1054 char elevator_name[ELV_NAME_MAX]; 1055 size_t len; 1056 struct elevator_type *e; 1057 1058 elevator_name[sizeof(elevator_name) - 1] = '\0'; 1059 strncpy(elevator_name, name, sizeof(elevator_name) - 1); 1060 len = strlen(elevator_name); 1061 1062 if (len && elevator_name[len - 1] == '\n') 1063 elevator_name[len - 1] = '\0'; 1064 1065 e = elevator_get(elevator_name); 1066 if (!e) { 1067 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 1068 return -EINVAL; 1069 } 1070 1071 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 1072 elevator_put(e); 1073 return count; 1074 } 1075 1076 if (!elevator_switch(q, e)) 1077 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); 1078 return count; 1079 } 1080 1081 ssize_t elv_iosched_show(request_queue_t *q, char *name) 1082 { 1083 elevator_t *e = q->elevator; 1084 struct elevator_type *elv = e->elevator_type; 1085 struct list_head *entry; 1086 int len = 0; 1087 1088 spin_lock_irq(&elv_list_lock); 1089 list_for_each(entry, &elv_list) { 1090 struct elevator_type *__e; 1091 1092 __e = list_entry(entry, struct elevator_type, list); 1093 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1094 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1095 else 1096 len += sprintf(name+len, "%s ", __e->elevator_name); 1097 } 1098 spin_unlock_irq(&elv_list_lock); 1099 1100 len += sprintf(len+name, "\n"); 1101 return len; 1102 } 1103 1104 struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) 1105 { 1106 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1107 1108 if (rbprev) 1109 return rb_entry_rq(rbprev); 1110 1111 return NULL; 1112 } 1113 1114 EXPORT_SYMBOL(elv_rb_former_request); 1115 1116 struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) 1117 { 1118 struct rb_node *rbnext = rb_next(&rq->rb_node); 1119 1120 if (rbnext) 1121 return rb_entry_rq(rbnext); 1122 1123 return NULL; 1124 } 1125 1126 EXPORT_SYMBOL(elv_rb_latter_request); 1127