xref: /linux/block/elevator.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Block device elevator/IO-scheduler.
4  *
5  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
6  *
7  * 30042000 Jens Axboe <axboe@kernel.dk> :
8  *
9  * Split the elevator a bit so that it is possible to choose a different
10  * one or even write a new "plug in". There are three pieces:
11  * - elevator_fn, inserts a new request in the queue list
12  * - elevator_merge_fn, decides whether a new buffer can be merged with
13  *   an existing request
14  * - elevator_dequeue_fn, called when a request is taken off the active list
15  *
16  * 20082000 Dave Jones <davej@suse.de> :
17  * Removed tests for max-bomb-segments, which was breaking elvtune
18  *  when run without -bN
19  *
20  * Jens:
21  * - Rework again to work with bio instead of buffer_heads
22  * - loose bi_dev comparisons, partition handling is right now
23  * - completely modularize elevator setup and teardown
24  *
25  */
26 #include <linux/kernel.h>
27 #include <linux/fs.h>
28 #include <linux/blkdev.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/blktrace_api.h>
35 #include <linux/hash.h>
36 #include <linux/uaccess.h>
37 #include <linux/pm_runtime.h>
38 
39 #include <trace/events/block.h>
40 
41 #include "elevator.h"
42 #include "blk.h"
43 #include "blk-mq-sched.h"
44 #include "blk-pm.h"
45 #include "blk-wbt.h"
46 #include "blk-cgroup.h"
47 
48 static DEFINE_SPINLOCK(elv_list_lock);
49 static LIST_HEAD(elv_list);
50 
51 /*
52  * Merge hash stuff.
53  */
54 #define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
55 
56 /*
57  * Query io scheduler to see if the current process issuing bio may be
58  * merged with rq.
59  */
60 static bool elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
61 {
62 	struct request_queue *q = rq->q;
63 	struct elevator_queue *e = q->elevator;
64 
65 	if (e->type->ops.allow_merge)
66 		return e->type->ops.allow_merge(q, rq, bio);
67 
68 	return true;
69 }
70 
71 /*
72  * can we safely merge with this request?
73  */
74 bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
75 {
76 	if (!blk_rq_merge_ok(rq, bio))
77 		return false;
78 
79 	if (!elv_iosched_allow_bio_merge(rq, bio))
80 		return false;
81 
82 	return true;
83 }
84 EXPORT_SYMBOL(elv_bio_merge_ok);
85 
86 /**
87  * elevator_match - Check whether @e's name or alias matches @name
88  * @e: Scheduler to test
89  * @name: Elevator name to test
90  *
91  * Return true if the elevator @e's name or alias matches @name.
92  */
93 static bool elevator_match(const struct elevator_type *e, const char *name)
94 {
95 	return !strcmp(e->elevator_name, name) ||
96 		(e->elevator_alias && !strcmp(e->elevator_alias, name));
97 }
98 
99 static struct elevator_type *__elevator_find(const char *name)
100 {
101 	struct elevator_type *e;
102 
103 	list_for_each_entry(e, &elv_list, list)
104 		if (elevator_match(e, name))
105 			return e;
106 	return NULL;
107 }
108 
109 static struct elevator_type *elevator_find_get(const char *name)
110 {
111 	struct elevator_type *e;
112 
113 	spin_lock(&elv_list_lock);
114 	e = __elevator_find(name);
115 	if (e && (!elevator_tryget(e)))
116 		e = NULL;
117 	spin_unlock(&elv_list_lock);
118 	return e;
119 }
120 
121 static const struct kobj_type elv_ktype;
122 
123 struct elevator_queue *elevator_alloc(struct request_queue *q,
124 		struct elevator_type *e, struct elevator_resources *res)
125 {
126 	struct elevator_queue *eq;
127 
128 	eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
129 	if (unlikely(!eq))
130 		return NULL;
131 
132 	__elevator_get(e);
133 	eq->type = e;
134 	kobject_init(&eq->kobj, &elv_ktype);
135 	mutex_init(&eq->sysfs_lock);
136 	hash_init(eq->hash);
137 	eq->et = res->et;
138 	eq->elevator_data = res->data;
139 
140 	return eq;
141 }
142 
143 static void elevator_release(struct kobject *kobj)
144 {
145 	struct elevator_queue *e;
146 
147 	e = container_of(kobj, struct elevator_queue, kobj);
148 	elevator_put(e->type);
149 	kfree(e);
150 }
151 
152 static void elevator_exit(struct request_queue *q)
153 {
154 	struct elevator_queue *e = q->elevator;
155 
156 	lockdep_assert_held(&q->elevator_lock);
157 
158 	ioc_clear_queue(q);
159 
160 	mutex_lock(&e->sysfs_lock);
161 	blk_mq_exit_sched(q, e);
162 	mutex_unlock(&e->sysfs_lock);
163 }
164 
165 static inline void __elv_rqhash_del(struct request *rq)
166 {
167 	hash_del(&rq->hash);
168 	rq->rq_flags &= ~RQF_HASHED;
169 }
170 
171 void elv_rqhash_del(struct request_queue *q, struct request *rq)
172 {
173 	if (ELV_ON_HASH(rq))
174 		__elv_rqhash_del(rq);
175 }
176 EXPORT_SYMBOL_GPL(elv_rqhash_del);
177 
178 void elv_rqhash_add(struct request_queue *q, struct request *rq)
179 {
180 	struct elevator_queue *e = q->elevator;
181 
182 	BUG_ON(ELV_ON_HASH(rq));
183 	hash_add(e->hash, &rq->hash, rq_hash_key(rq));
184 	rq->rq_flags |= RQF_HASHED;
185 }
186 EXPORT_SYMBOL_GPL(elv_rqhash_add);
187 
188 void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
189 {
190 	__elv_rqhash_del(rq);
191 	elv_rqhash_add(q, rq);
192 }
193 
194 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
195 {
196 	struct elevator_queue *e = q->elevator;
197 	struct hlist_node *next;
198 	struct request *rq;
199 
200 	hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
201 		BUG_ON(!ELV_ON_HASH(rq));
202 
203 		if (unlikely(!rq_mergeable(rq))) {
204 			__elv_rqhash_del(rq);
205 			continue;
206 		}
207 
208 		if (rq_hash_key(rq) == offset)
209 			return rq;
210 	}
211 
212 	return NULL;
213 }
214 
215 /*
216  * RB-tree support functions for inserting/lookup/removal of requests
217  * in a sorted RB tree.
218  */
219 void elv_rb_add(struct rb_root *root, struct request *rq)
220 {
221 	struct rb_node **p = &root->rb_node;
222 	struct rb_node *parent = NULL;
223 	struct request *__rq;
224 
225 	while (*p) {
226 		parent = *p;
227 		__rq = rb_entry(parent, struct request, rb_node);
228 
229 		if (blk_rq_pos(rq) < blk_rq_pos(__rq))
230 			p = &(*p)->rb_left;
231 		else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
232 			p = &(*p)->rb_right;
233 	}
234 
235 	rb_link_node(&rq->rb_node, parent, p);
236 	rb_insert_color(&rq->rb_node, root);
237 }
238 EXPORT_SYMBOL(elv_rb_add);
239 
240 void elv_rb_del(struct rb_root *root, struct request *rq)
241 {
242 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
243 	rb_erase(&rq->rb_node, root);
244 	RB_CLEAR_NODE(&rq->rb_node);
245 }
246 EXPORT_SYMBOL(elv_rb_del);
247 
248 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
249 {
250 	struct rb_node *n = root->rb_node;
251 	struct request *rq;
252 
253 	while (n) {
254 		rq = rb_entry(n, struct request, rb_node);
255 
256 		if (sector < blk_rq_pos(rq))
257 			n = n->rb_left;
258 		else if (sector > blk_rq_pos(rq))
259 			n = n->rb_right;
260 		else
261 			return rq;
262 	}
263 
264 	return NULL;
265 }
266 EXPORT_SYMBOL(elv_rb_find);
267 
268 enum elv_merge elv_merge(struct request_queue *q, struct request **req,
269 		struct bio *bio)
270 {
271 	struct elevator_queue *e = q->elevator;
272 	struct request *__rq;
273 
274 	/*
275 	 * Levels of merges:
276 	 * 	nomerges:  No merges at all attempted
277 	 * 	noxmerges: Only simple one-hit cache try
278 	 * 	merges:	   All merge tries attempted
279 	 */
280 	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
281 		return ELEVATOR_NO_MERGE;
282 
283 	/*
284 	 * First try one-hit cache.
285 	 */
286 	if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
287 		enum elv_merge ret = blk_try_merge(q->last_merge, bio);
288 
289 		if (ret != ELEVATOR_NO_MERGE) {
290 			*req = q->last_merge;
291 			return ret;
292 		}
293 	}
294 
295 	if (blk_queue_noxmerges(q))
296 		return ELEVATOR_NO_MERGE;
297 
298 	/*
299 	 * See if our hash lookup can find a potential backmerge.
300 	 */
301 	__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
302 	if (__rq && elv_bio_merge_ok(__rq, bio)) {
303 		*req = __rq;
304 
305 		if (blk_discard_mergable(__rq))
306 			return ELEVATOR_DISCARD_MERGE;
307 		return ELEVATOR_BACK_MERGE;
308 	}
309 
310 	if (e->type->ops.request_merge)
311 		return e->type->ops.request_merge(q, req, bio);
312 
313 	return ELEVATOR_NO_MERGE;
314 }
315 
316 /*
317  * Attempt to do an insertion back merge. Only check for the case where
318  * we can append 'rq' to an existing request, so we can throw 'rq' away
319  * afterwards.
320  *
321  * Returns true if we merged, false otherwise. 'free' will contain all
322  * requests that need to be freed.
323  */
324 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq,
325 			      struct list_head *free)
326 {
327 	struct request *__rq;
328 	bool ret;
329 
330 	if (blk_queue_nomerges(q))
331 		return false;
332 
333 	/*
334 	 * First try one-hit cache.
335 	 */
336 	if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) {
337 		list_add(&rq->queuelist, free);
338 		return true;
339 	}
340 
341 	if (blk_queue_noxmerges(q))
342 		return false;
343 
344 	ret = false;
345 	/*
346 	 * See if our hash lookup can find a potential backmerge.
347 	 */
348 	while (1) {
349 		__rq = elv_rqhash_find(q, blk_rq_pos(rq));
350 		if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
351 			break;
352 
353 		list_add(&rq->queuelist, free);
354 		/* The merged request could be merged with others, try again */
355 		ret = true;
356 		rq = __rq;
357 	}
358 
359 	return ret;
360 }
361 
362 void elv_merged_request(struct request_queue *q, struct request *rq,
363 		enum elv_merge type)
364 {
365 	struct elevator_queue *e = q->elevator;
366 
367 	if (e->type->ops.request_merged)
368 		e->type->ops.request_merged(q, rq, type);
369 
370 	if (type == ELEVATOR_BACK_MERGE)
371 		elv_rqhash_reposition(q, rq);
372 
373 	q->last_merge = rq;
374 }
375 
376 void elv_merge_requests(struct request_queue *q, struct request *rq,
377 			     struct request *next)
378 {
379 	struct elevator_queue *e = q->elevator;
380 
381 	if (e->type->ops.requests_merged)
382 		e->type->ops.requests_merged(q, rq, next);
383 
384 	elv_rqhash_reposition(q, rq);
385 	q->last_merge = rq;
386 }
387 
388 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
389 {
390 	struct elevator_queue *e = q->elevator;
391 
392 	if (e->type->ops.next_request)
393 		return e->type->ops.next_request(q, rq);
394 
395 	return NULL;
396 }
397 
398 struct request *elv_former_request(struct request_queue *q, struct request *rq)
399 {
400 	struct elevator_queue *e = q->elevator;
401 
402 	if (e->type->ops.former_request)
403 		return e->type->ops.former_request(q, rq);
404 
405 	return NULL;
406 }
407 
408 #define to_elv(atr) container_of_const((atr), struct elv_fs_entry, attr)
409 
410 static ssize_t
411 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
412 {
413 	const struct elv_fs_entry *entry = to_elv(attr);
414 	struct elevator_queue *e;
415 	ssize_t error = -ENODEV;
416 
417 	if (!entry->show)
418 		return -EIO;
419 
420 	e = container_of(kobj, struct elevator_queue, kobj);
421 	mutex_lock(&e->sysfs_lock);
422 	if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
423 		error = entry->show(e, page);
424 	mutex_unlock(&e->sysfs_lock);
425 	return error;
426 }
427 
428 static ssize_t
429 elv_attr_store(struct kobject *kobj, struct attribute *attr,
430 	       const char *page, size_t length)
431 {
432 	const struct elv_fs_entry *entry = to_elv(attr);
433 	struct elevator_queue *e;
434 	ssize_t error = -ENODEV;
435 
436 	if (!entry->store)
437 		return -EIO;
438 
439 	e = container_of(kobj, struct elevator_queue, kobj);
440 	mutex_lock(&e->sysfs_lock);
441 	if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
442 		error = entry->store(e, page, length);
443 	mutex_unlock(&e->sysfs_lock);
444 	return error;
445 }
446 
447 static const struct sysfs_ops elv_sysfs_ops = {
448 	.show	= elv_attr_show,
449 	.store	= elv_attr_store,
450 };
451 
452 static const struct kobj_type elv_ktype = {
453 	.sysfs_ops	= &elv_sysfs_ops,
454 	.release	= elevator_release,
455 };
456 
457 static int elv_register_queue(struct request_queue *q,
458 			      struct elevator_queue *e,
459 			      bool uevent)
460 {
461 	int error;
462 
463 	error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
464 	if (!error) {
465 		const struct elv_fs_entry *attr = e->type->elevator_attrs;
466 		if (attr) {
467 			while (attr->attr.name) {
468 				if (sysfs_create_file(&e->kobj, &attr->attr))
469 					break;
470 				attr++;
471 			}
472 		}
473 		if (uevent)
474 			kobject_uevent(&e->kobj, KOBJ_ADD);
475 
476 		/*
477 		 * Sched is initialized, it is ready to export it via
478 		 * debugfs
479 		 */
480 		blk_mq_sched_reg_debugfs(q);
481 		set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
482 	}
483 	return error;
484 }
485 
486 static void elv_unregister_queue(struct request_queue *q,
487 				 struct elevator_queue *e)
488 {
489 	if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
490 		kobject_uevent(&e->kobj, KOBJ_REMOVE);
491 		kobject_del(&e->kobj);
492 
493 		/* unexport via debugfs before exiting sched */
494 		blk_mq_sched_unreg_debugfs(q);
495 	}
496 }
497 
498 int elv_register(struct elevator_type *e)
499 {
500 	/* finish request is mandatory */
501 	if (WARN_ON_ONCE(!e->ops.finish_request))
502 		return -EINVAL;
503 	/* insert_requests and dispatch_request are mandatory */
504 	if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
505 		return -EINVAL;
506 
507 	/* create icq_cache if requested */
508 	if (e->icq_size) {
509 		if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
510 		    WARN_ON(e->icq_align < __alignof__(struct io_cq)))
511 			return -EINVAL;
512 
513 		snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
514 			 "%s_io_cq", e->elevator_name);
515 		e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
516 						 e->icq_align, 0, NULL);
517 		if (!e->icq_cache)
518 			return -ENOMEM;
519 	}
520 
521 	/* register, don't allow duplicate names */
522 	spin_lock(&elv_list_lock);
523 	if (__elevator_find(e->elevator_name)) {
524 		spin_unlock(&elv_list_lock);
525 		kmem_cache_destroy(e->icq_cache);
526 		return -EBUSY;
527 	}
528 	list_add_tail(&e->list, &elv_list);
529 	spin_unlock(&elv_list_lock);
530 
531 	printk(KERN_INFO "io scheduler %s registered\n", e->elevator_name);
532 
533 	return 0;
534 }
535 EXPORT_SYMBOL_GPL(elv_register);
536 
537 void elv_unregister(struct elevator_type *e)
538 {
539 	/* unregister */
540 	spin_lock(&elv_list_lock);
541 	list_del_init(&e->list);
542 	spin_unlock(&elv_list_lock);
543 
544 	/*
545 	 * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
546 	 * sure all RCU operations are complete before proceeding.
547 	 */
548 	if (e->icq_cache) {
549 		rcu_barrier();
550 		kmem_cache_destroy(e->icq_cache);
551 		e->icq_cache = NULL;
552 	}
553 }
554 EXPORT_SYMBOL_GPL(elv_unregister);
555 
556 /*
557  * Switch to new_e io scheduler.
558  *
559  * If switching fails, we are most likely running out of memory and not able
560  * to restore the old io scheduler, so leaving the io scheduler being none.
561  */
562 static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
563 {
564 	struct elevator_type *new_e = NULL;
565 	int ret = 0;
566 
567 	WARN_ON_ONCE(q->mq_freeze_depth == 0);
568 	lockdep_assert_held(&q->elevator_lock);
569 
570 	if (strncmp(ctx->name, "none", 4)) {
571 		new_e = elevator_find_get(ctx->name);
572 		if (!new_e)
573 			return -EINVAL;
574 	}
575 
576 	blk_mq_quiesce_queue(q);
577 
578 	if (q->elevator) {
579 		ctx->old = q->elevator;
580 		elevator_exit(q);
581 	}
582 
583 	if (new_e) {
584 		ret = blk_mq_init_sched(q, new_e, &ctx->res);
585 		if (ret)
586 			goto out_unfreeze;
587 		ctx->new = q->elevator;
588 	} else {
589 		blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
590 		q->elevator = NULL;
591 		q->nr_requests = q->tag_set->queue_depth;
592 	}
593 	blk_add_trace_msg(q, "elv switch: %s", ctx->name);
594 
595 out_unfreeze:
596 	blk_mq_unquiesce_queue(q);
597 
598 	if (ret) {
599 		pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
600 			new_e->elevator_name);
601 	}
602 
603 	if (new_e)
604 		elevator_put(new_e);
605 	return ret;
606 }
607 
608 static void elv_exit_and_release(struct elv_change_ctx *ctx,
609 		struct request_queue *q)
610 {
611 	struct elevator_queue *e;
612 	unsigned memflags;
613 
614 	memflags = blk_mq_freeze_queue(q);
615 	mutex_lock(&q->elevator_lock);
616 	e = q->elevator;
617 	elevator_exit(q);
618 	mutex_unlock(&q->elevator_lock);
619 	blk_mq_unfreeze_queue(q, memflags);
620 	if (e) {
621 		blk_mq_free_sched_res(&ctx->res, ctx->type, q->tag_set);
622 		kobject_put(&e->kobj);
623 	}
624 }
625 
626 static int elevator_change_done(struct request_queue *q,
627 				struct elv_change_ctx *ctx)
628 {
629 	int ret = 0;
630 
631 	if (ctx->old) {
632 		struct elevator_resources res = {
633 			.et = ctx->old->et,
634 			.data = ctx->old->elevator_data
635 		};
636 		bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT,
637 				&ctx->old->flags);
638 
639 		elv_unregister_queue(q, ctx->old);
640 		blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set);
641 		kobject_put(&ctx->old->kobj);
642 		if (enable_wbt)
643 			wbt_enable_default(q->disk);
644 	}
645 	if (ctx->new) {
646 		ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
647 		if (ret)
648 			elv_exit_and_release(ctx, q);
649 	}
650 	return ret;
651 }
652 
653 /*
654  * Switch this queue to the given IO scheduler.
655  */
656 static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
657 {
658 	unsigned int memflags;
659 	struct blk_mq_tag_set *set = q->tag_set;
660 	int ret = 0;
661 
662 	lockdep_assert_held(&set->update_nr_hwq_lock);
663 
664 	if (strncmp(ctx->name, "none", 4)) {
665 		ret = blk_mq_alloc_sched_res(q, ctx->type, &ctx->res,
666 				set->nr_hw_queues);
667 		if (ret)
668 			return ret;
669 	}
670 
671 	memflags = blk_mq_freeze_queue(q);
672 	/*
673 	 * May be called before adding disk, when there isn't any FS I/O,
674 	 * so freezing queue plus canceling dispatch work is enough to
675 	 * drain any dispatch activities originated from passthrough
676 	 * requests, then no need to quiesce queue which may add long boot
677 	 * latency, especially when lots of disks are involved.
678 	 *
679 	 * Disk isn't added yet, so verifying queue lock only manually.
680 	 */
681 	blk_mq_cancel_work_sync(q);
682 	mutex_lock(&q->elevator_lock);
683 	if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
684 		ret = elevator_switch(q, ctx);
685 	mutex_unlock(&q->elevator_lock);
686 	blk_mq_unfreeze_queue(q, memflags);
687 	if (!ret)
688 		ret = elevator_change_done(q, ctx);
689 
690 	/*
691 	 * Free sched resource if it's allocated but we couldn't switch elevator.
692 	 */
693 	if (!ctx->new)
694 		blk_mq_free_sched_res(&ctx->res, ctx->type, set);
695 
696 	return ret;
697 }
698 
699 /*
700  * The I/O scheduler depends on the number of hardware queues, this forces a
701  * reattachment when nr_hw_queues changes.
702  */
703 void elv_update_nr_hw_queues(struct request_queue *q,
704 		struct elv_change_ctx *ctx)
705 {
706 	struct blk_mq_tag_set *set = q->tag_set;
707 	int ret = -ENODEV;
708 
709 	WARN_ON_ONCE(q->mq_freeze_depth == 0);
710 
711 	if (ctx->type && !blk_queue_dying(q) && blk_queue_registered(q)) {
712 		mutex_lock(&q->elevator_lock);
713 		/* force to reattach elevator after nr_hw_queue is updated */
714 		ret = elevator_switch(q, ctx);
715 		mutex_unlock(&q->elevator_lock);
716 	}
717 	blk_mq_unfreeze_queue_nomemrestore(q);
718 	if (!ret)
719 		WARN_ON_ONCE(elevator_change_done(q, ctx));
720 
721 	/*
722 	 * Free sched resource if it's allocated but we couldn't switch elevator.
723 	 */
724 	if (!ctx->new)
725 		blk_mq_free_sched_res(&ctx->res, ctx->type, set);
726 }
727 
728 /*
729  * Use the default elevator settings. If the chosen elevator initialization
730  * fails, fall back to the "none" elevator (no elevator).
731  */
732 void elevator_set_default(struct request_queue *q)
733 {
734 	struct elv_change_ctx ctx = {
735 		.name = "mq-deadline",
736 		.no_uevent = true,
737 	};
738 	int err;
739 
740 	/* now we allow to switch elevator */
741 	blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q);
742 
743 	if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
744 		return;
745 
746 	/*
747 	 * For single queue devices, default to using mq-deadline. If we
748 	 * have multiple queues or mq-deadline is not available, default
749 	 * to "none".
750 	 */
751 	ctx.type = elevator_find_get(ctx.name);
752 	if (!ctx.type)
753 		return;
754 
755 	if ((q->nr_hw_queues == 1 ||
756 			blk_mq_is_shared_tags(q->tag_set->flags))) {
757 		err = elevator_change(q, &ctx);
758 		if (err < 0)
759 			pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n",
760 					ctx.name, err);
761 	}
762 	elevator_put(ctx.type);
763 }
764 
765 void elevator_set_none(struct request_queue *q)
766 {
767 	struct elv_change_ctx ctx = {
768 		.name	= "none",
769 	};
770 	int err;
771 
772 	err = elevator_change(q, &ctx);
773 	if (err < 0)
774 		pr_warn("%s: set none elevator failed %d\n", __func__, err);
775 }
776 
777 static void elv_iosched_load_module(const char *elevator_name)
778 {
779 	struct elevator_type *found;
780 
781 	spin_lock(&elv_list_lock);
782 	found = __elevator_find(elevator_name);
783 	spin_unlock(&elv_list_lock);
784 
785 	if (!found)
786 		request_module("%s-iosched", elevator_name);
787 }
788 
789 ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
790 			  size_t count)
791 {
792 	char elevator_name[ELV_NAME_MAX];
793 	struct elv_change_ctx ctx = {};
794 	int ret;
795 	struct request_queue *q = disk->queue;
796 	struct blk_mq_tag_set *set = q->tag_set;
797 
798 	/* Make sure queue is not in the middle of being removed */
799 	if (!blk_queue_registered(q))
800 		return -ENOENT;
801 
802 	/*
803 	 * If the attribute needs to load a module, do it before freezing the
804 	 * queue to ensure that the module file can be read when the request
805 	 * queue is the one for the device storing the module file.
806 	 */
807 	strscpy(elevator_name, buf, sizeof(elevator_name));
808 	ctx.name = strstrip(elevator_name);
809 
810 	elv_iosched_load_module(ctx.name);
811 	ctx.type = elevator_find_get(ctx.name);
812 
813 	down_read(&set->update_nr_hwq_lock);
814 	if (!blk_queue_no_elv_switch(q)) {
815 		ret = elevator_change(q, &ctx);
816 		if (!ret)
817 			ret = count;
818 	} else {
819 		ret = -ENOENT;
820 	}
821 	up_read(&set->update_nr_hwq_lock);
822 
823 	if (ctx.type)
824 		elevator_put(ctx.type);
825 	return ret;
826 }
827 
828 ssize_t elv_iosched_show(struct gendisk *disk, char *name)
829 {
830 	struct request_queue *q = disk->queue;
831 	struct elevator_type *cur = NULL, *e;
832 	int len = 0;
833 
834 	mutex_lock(&q->elevator_lock);
835 	if (!q->elevator) {
836 		len += sprintf(name+len, "[none] ");
837 	} else {
838 		len += sprintf(name+len, "none ");
839 		cur = q->elevator->type;
840 	}
841 
842 	spin_lock(&elv_list_lock);
843 	list_for_each_entry(e, &elv_list, list) {
844 		if (e == cur)
845 			len += sprintf(name+len, "[%s] ", e->elevator_name);
846 		else
847 			len += sprintf(name+len, "%s ", e->elevator_name);
848 	}
849 	spin_unlock(&elv_list_lock);
850 
851 	len += sprintf(name+len, "\n");
852 	mutex_unlock(&q->elevator_lock);
853 
854 	return len;
855 }
856 
857 struct request *elv_rb_former_request(struct request_queue *q,
858 				      struct request *rq)
859 {
860 	struct rb_node *rbprev = rb_prev(&rq->rb_node);
861 
862 	if (rbprev)
863 		return rb_entry_rq(rbprev);
864 
865 	return NULL;
866 }
867 EXPORT_SYMBOL(elv_rb_former_request);
868 
869 struct request *elv_rb_latter_request(struct request_queue *q,
870 				      struct request *rq)
871 {
872 	struct rb_node *rbnext = rb_next(&rq->rb_node);
873 
874 	if (rbnext)
875 		return rb_entry_rq(rbnext);
876 
877 	return NULL;
878 }
879 EXPORT_SYMBOL(elv_rb_latter_request);
880 
881 static int __init elevator_setup(char *str)
882 {
883 	pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
884 		"Please use sysfs to set IO scheduler for individual devices.\n");
885 	return 1;
886 }
887 
888 __setup("elevator=", elevator_setup);
889