xref: /linux/block/elevator.c (revision 67d85b062dcb49af9c903a58842a4ed7281f57b8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Block device elevator/IO-scheduler.
4  *
5  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
6  *
7  * 30042000 Jens Axboe <axboe@kernel.dk> :
8  *
9  * Split the elevator a bit so that it is possible to choose a different
10  * one or even write a new "plug in". There are three pieces:
11  * - elevator_fn, inserts a new request in the queue list
12  * - elevator_merge_fn, decides whether a new buffer can be merged with
13  *   an existing request
14  * - elevator_dequeue_fn, called when a request is taken off the active list
15  *
16  * 20082000 Dave Jones <davej@suse.de> :
17  * Removed tests for max-bomb-segments, which was breaking elvtune
18  *  when run without -bN
19  *
20  * Jens:
21  * - Rework again to work with bio instead of buffer_heads
22  * - loose bi_dev comparisons, partition handling is right now
23  * - completely modularize elevator setup and teardown
24  *
25  */
26 #include <linux/kernel.h>
27 #include <linux/fs.h>
28 #include <linux/blkdev.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/blktrace_api.h>
35 #include <linux/hash.h>
36 #include <linux/uaccess.h>
37 #include <linux/pm_runtime.h>
38 
39 #include <trace/events/block.h>
40 
41 #include "elevator.h"
42 #include "blk.h"
43 #include "blk-mq-sched.h"
44 #include "blk-pm.h"
45 #include "blk-wbt.h"
46 #include "blk-cgroup.h"
47 
48 static DEFINE_SPINLOCK(elv_list_lock);
49 static LIST_HEAD(elv_list);
50 
51 /*
52  * Merge hash stuff.
53  */
54 #define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
55 
56 /*
57  * Query io scheduler to see if the current process issuing bio may be
58  * merged with rq.
59  */
60 static bool elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
61 {
62 	struct request_queue *q = rq->q;
63 	struct elevator_queue *e = q->elevator;
64 
65 	if (e->type->ops.allow_merge)
66 		return e->type->ops.allow_merge(q, rq, bio);
67 
68 	return true;
69 }
70 
71 /*
72  * can we safely merge with this request?
73  */
74 bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
75 {
76 	if (!blk_rq_merge_ok(rq, bio))
77 		return false;
78 
79 	if (!elv_iosched_allow_bio_merge(rq, bio))
80 		return false;
81 
82 	return true;
83 }
84 EXPORT_SYMBOL(elv_bio_merge_ok);
85 
86 /**
87  * elevator_match - Check whether @e's name or alias matches @name
88  * @e: Scheduler to test
89  * @name: Elevator name to test
90  *
91  * Return true if the elevator @e's name or alias matches @name.
92  */
93 static bool elevator_match(const struct elevator_type *e, const char *name)
94 {
95 	return !strcmp(e->elevator_name, name) ||
96 		(e->elevator_alias && !strcmp(e->elevator_alias, name));
97 }
98 
99 static struct elevator_type *__elevator_find(const char *name)
100 {
101 	struct elevator_type *e;
102 
103 	list_for_each_entry(e, &elv_list, list)
104 		if (elevator_match(e, name))
105 			return e;
106 	return NULL;
107 }
108 
109 static struct elevator_type *elevator_find_get(const char *name)
110 {
111 	struct elevator_type *e;
112 
113 	spin_lock(&elv_list_lock);
114 	e = __elevator_find(name);
115 	if (e && (!elevator_tryget(e)))
116 		e = NULL;
117 	spin_unlock(&elv_list_lock);
118 	return e;
119 }
120 
121 static const struct kobj_type elv_ktype;
122 
123 struct elevator_queue *elevator_alloc(struct request_queue *q,
124 		struct elevator_type *e, struct elevator_resources *res)
125 {
126 	struct elevator_queue *eq;
127 
128 	eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
129 	if (unlikely(!eq))
130 		return NULL;
131 
132 	__elevator_get(e);
133 	eq->type = e;
134 	kobject_init(&eq->kobj, &elv_ktype);
135 	mutex_init(&eq->sysfs_lock);
136 	hash_init(eq->hash);
137 	eq->et = res->et;
138 	eq->elevator_data = res->data;
139 
140 	return eq;
141 }
142 
143 static void elevator_release(struct kobject *kobj)
144 {
145 	struct elevator_queue *e;
146 
147 	e = container_of(kobj, struct elevator_queue, kobj);
148 	elevator_put(e->type);
149 	kfree(e);
150 }
151 
152 static void elevator_exit(struct request_queue *q)
153 {
154 	struct elevator_queue *e = q->elevator;
155 
156 	lockdep_assert_held(&q->elevator_lock);
157 
158 	ioc_clear_queue(q);
159 
160 	mutex_lock(&e->sysfs_lock);
161 	blk_mq_exit_sched(q, e);
162 	mutex_unlock(&e->sysfs_lock);
163 }
164 
165 static inline void __elv_rqhash_del(struct request *rq)
166 {
167 	hash_del(&rq->hash);
168 	rq->rq_flags &= ~RQF_HASHED;
169 }
170 
171 void elv_rqhash_del(struct request_queue *q, struct request *rq)
172 {
173 	if (ELV_ON_HASH(rq))
174 		__elv_rqhash_del(rq);
175 }
176 EXPORT_SYMBOL_GPL(elv_rqhash_del);
177 
178 void elv_rqhash_add(struct request_queue *q, struct request *rq)
179 {
180 	struct elevator_queue *e = q->elevator;
181 
182 	BUG_ON(ELV_ON_HASH(rq));
183 	hash_add(e->hash, &rq->hash, rq_hash_key(rq));
184 	rq->rq_flags |= RQF_HASHED;
185 }
186 EXPORT_SYMBOL_GPL(elv_rqhash_add);
187 
188 void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
189 {
190 	__elv_rqhash_del(rq);
191 	elv_rqhash_add(q, rq);
192 }
193 
194 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
195 {
196 	struct elevator_queue *e = q->elevator;
197 	struct hlist_node *next;
198 	struct request *rq;
199 
200 	hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
201 		BUG_ON(!ELV_ON_HASH(rq));
202 
203 		if (unlikely(!rq_mergeable(rq))) {
204 			__elv_rqhash_del(rq);
205 			continue;
206 		}
207 
208 		if (rq_hash_key(rq) == offset)
209 			return rq;
210 	}
211 
212 	return NULL;
213 }
214 
215 /*
216  * RB-tree support functions for inserting/lookup/removal of requests
217  * in a sorted RB tree.
218  */
219 void elv_rb_add(struct rb_root *root, struct request *rq)
220 {
221 	struct rb_node **p = &root->rb_node;
222 	struct rb_node *parent = NULL;
223 	struct request *__rq;
224 
225 	while (*p) {
226 		parent = *p;
227 		__rq = rb_entry(parent, struct request, rb_node);
228 
229 		if (blk_rq_pos(rq) < blk_rq_pos(__rq))
230 			p = &(*p)->rb_left;
231 		else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
232 			p = &(*p)->rb_right;
233 	}
234 
235 	rb_link_node(&rq->rb_node, parent, p);
236 	rb_insert_color(&rq->rb_node, root);
237 }
238 EXPORT_SYMBOL(elv_rb_add);
239 
240 void elv_rb_del(struct rb_root *root, struct request *rq)
241 {
242 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
243 	rb_erase(&rq->rb_node, root);
244 	RB_CLEAR_NODE(&rq->rb_node);
245 }
246 EXPORT_SYMBOL(elv_rb_del);
247 
248 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
249 {
250 	struct rb_node *n = root->rb_node;
251 	struct request *rq;
252 
253 	while (n) {
254 		rq = rb_entry(n, struct request, rb_node);
255 
256 		if (sector < blk_rq_pos(rq))
257 			n = n->rb_left;
258 		else if (sector > blk_rq_pos(rq))
259 			n = n->rb_right;
260 		else
261 			return rq;
262 	}
263 
264 	return NULL;
265 }
266 EXPORT_SYMBOL(elv_rb_find);
267 
268 enum elv_merge elv_merge(struct request_queue *q, struct request **req,
269 		struct bio *bio)
270 {
271 	struct elevator_queue *e = q->elevator;
272 	struct request *__rq;
273 
274 	/*
275 	 * Levels of merges:
276 	 * 	nomerges:  No merges at all attempted
277 	 * 	noxmerges: Only simple one-hit cache try
278 	 * 	merges:	   All merge tries attempted
279 	 */
280 	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
281 		return ELEVATOR_NO_MERGE;
282 
283 	/*
284 	 * First try one-hit cache.
285 	 */
286 	if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
287 		enum elv_merge ret = blk_try_merge(q->last_merge, bio);
288 
289 		if (ret != ELEVATOR_NO_MERGE) {
290 			*req = q->last_merge;
291 			return ret;
292 		}
293 	}
294 
295 	if (blk_queue_noxmerges(q))
296 		return ELEVATOR_NO_MERGE;
297 
298 	/*
299 	 * See if our hash lookup can find a potential backmerge.
300 	 */
301 	__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
302 	if (__rq && elv_bio_merge_ok(__rq, bio)) {
303 		*req = __rq;
304 
305 		if (blk_discard_mergable(__rq))
306 			return ELEVATOR_DISCARD_MERGE;
307 		return ELEVATOR_BACK_MERGE;
308 	}
309 
310 	if (e->type->ops.request_merge)
311 		return e->type->ops.request_merge(q, req, bio);
312 
313 	return ELEVATOR_NO_MERGE;
314 }
315 
316 /*
317  * Attempt to do an insertion back merge. Only check for the case where
318  * we can append 'rq' to an existing request, so we can throw 'rq' away
319  * afterwards.
320  *
321  * Returns true if we merged, false otherwise. 'free' will contain all
322  * requests that need to be freed.
323  */
324 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq,
325 			      struct list_head *free)
326 {
327 	struct request *__rq;
328 	bool ret;
329 
330 	if (blk_queue_nomerges(q))
331 		return false;
332 
333 	/*
334 	 * First try one-hit cache.
335 	 */
336 	if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) {
337 		list_add(&rq->queuelist, free);
338 		return true;
339 	}
340 
341 	if (blk_queue_noxmerges(q))
342 		return false;
343 
344 	ret = false;
345 	/*
346 	 * See if our hash lookup can find a potential backmerge.
347 	 */
348 	while (1) {
349 		__rq = elv_rqhash_find(q, blk_rq_pos(rq));
350 		if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
351 			break;
352 
353 		list_add(&rq->queuelist, free);
354 		/* The merged request could be merged with others, try again */
355 		ret = true;
356 		rq = __rq;
357 	}
358 
359 	return ret;
360 }
361 
362 void elv_merged_request(struct request_queue *q, struct request *rq,
363 		enum elv_merge type)
364 {
365 	struct elevator_queue *e = q->elevator;
366 
367 	if (e->type->ops.request_merged)
368 		e->type->ops.request_merged(q, rq, type);
369 
370 	if (type == ELEVATOR_BACK_MERGE)
371 		elv_rqhash_reposition(q, rq);
372 
373 	q->last_merge = rq;
374 }
375 
376 void elv_merge_requests(struct request_queue *q, struct request *rq,
377 			     struct request *next)
378 {
379 	struct elevator_queue *e = q->elevator;
380 
381 	if (e->type->ops.requests_merged)
382 		e->type->ops.requests_merged(q, rq, next);
383 
384 	elv_rqhash_reposition(q, rq);
385 	q->last_merge = rq;
386 }
387 
388 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
389 {
390 	struct elevator_queue *e = q->elevator;
391 
392 	if (e->type->ops.next_request)
393 		return e->type->ops.next_request(q, rq);
394 
395 	return NULL;
396 }
397 
398 struct request *elv_former_request(struct request_queue *q, struct request *rq)
399 {
400 	struct elevator_queue *e = q->elevator;
401 
402 	if (e->type->ops.former_request)
403 		return e->type->ops.former_request(q, rq);
404 
405 	return NULL;
406 }
407 
408 #define to_elv(atr) container_of_const((atr), struct elv_fs_entry, attr)
409 
410 static ssize_t
411 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
412 {
413 	const struct elv_fs_entry *entry = to_elv(attr);
414 	struct elevator_queue *e;
415 	ssize_t error = -ENODEV;
416 
417 	if (!entry->show)
418 		return -EIO;
419 
420 	e = container_of(kobj, struct elevator_queue, kobj);
421 	mutex_lock(&e->sysfs_lock);
422 	if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
423 		error = entry->show(e, page);
424 	mutex_unlock(&e->sysfs_lock);
425 	return error;
426 }
427 
428 static ssize_t
429 elv_attr_store(struct kobject *kobj, struct attribute *attr,
430 	       const char *page, size_t length)
431 {
432 	const struct elv_fs_entry *entry = to_elv(attr);
433 	struct elevator_queue *e;
434 	ssize_t error = -ENODEV;
435 
436 	if (!entry->store)
437 		return -EIO;
438 
439 	e = container_of(kobj, struct elevator_queue, kobj);
440 	mutex_lock(&e->sysfs_lock);
441 	if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
442 		error = entry->store(e, page, length);
443 	mutex_unlock(&e->sysfs_lock);
444 	return error;
445 }
446 
447 static const struct sysfs_ops elv_sysfs_ops = {
448 	.show	= elv_attr_show,
449 	.store	= elv_attr_store,
450 };
451 
452 static const struct kobj_type elv_ktype = {
453 	.sysfs_ops	= &elv_sysfs_ops,
454 	.release	= elevator_release,
455 };
456 
457 static int elv_register_queue(struct request_queue *q,
458 			      struct elevator_queue *e,
459 			      bool uevent)
460 {
461 	int error;
462 
463 	error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
464 	if (!error) {
465 		const struct elv_fs_entry *attr = e->type->elevator_attrs;
466 		if (attr) {
467 			while (attr->attr.name) {
468 				if (sysfs_create_file(&e->kobj, &attr->attr))
469 					break;
470 				attr++;
471 			}
472 		}
473 		if (uevent)
474 			kobject_uevent(&e->kobj, KOBJ_ADD);
475 
476 		/*
477 		 * Sched is initialized, it is ready to export it via
478 		 * debugfs
479 		 */
480 		blk_mq_sched_reg_debugfs(q);
481 		set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
482 	}
483 	return error;
484 }
485 
486 static void elv_unregister_queue(struct request_queue *q,
487 				 struct elevator_queue *e)
488 {
489 	if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
490 		kobject_uevent(&e->kobj, KOBJ_REMOVE);
491 		kobject_del(&e->kobj);
492 
493 		/* unexport via debugfs before exiting sched */
494 		blk_mq_sched_unreg_debugfs(q);
495 	}
496 }
497 
498 int elv_register(struct elevator_type *e)
499 {
500 	/* finish request is mandatory */
501 	if (WARN_ON_ONCE(!e->ops.finish_request))
502 		return -EINVAL;
503 	/* insert_requests and dispatch_request are mandatory */
504 	if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
505 		return -EINVAL;
506 
507 	/* create icq_cache if requested */
508 	if (e->icq_size) {
509 		if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
510 		    WARN_ON(e->icq_align < __alignof__(struct io_cq)))
511 			return -EINVAL;
512 
513 		snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
514 			 "%s_io_cq", e->elevator_name);
515 		e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
516 						 e->icq_align, 0, NULL);
517 		if (!e->icq_cache)
518 			return -ENOMEM;
519 	}
520 
521 	/* register, don't allow duplicate names */
522 	spin_lock(&elv_list_lock);
523 	if (__elevator_find(e->elevator_name)) {
524 		spin_unlock(&elv_list_lock);
525 		kmem_cache_destroy(e->icq_cache);
526 		return -EBUSY;
527 	}
528 	list_add_tail(&e->list, &elv_list);
529 	spin_unlock(&elv_list_lock);
530 
531 	printk(KERN_INFO "io scheduler %s registered\n", e->elevator_name);
532 
533 	return 0;
534 }
535 EXPORT_SYMBOL_GPL(elv_register);
536 
537 void elv_unregister(struct elevator_type *e)
538 {
539 	/* unregister */
540 	spin_lock(&elv_list_lock);
541 	list_del_init(&e->list);
542 	spin_unlock(&elv_list_lock);
543 
544 	/*
545 	 * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
546 	 * sure all RCU operations are complete before proceeding.
547 	 */
548 	if (e->icq_cache) {
549 		rcu_barrier();
550 		kmem_cache_destroy(e->icq_cache);
551 		e->icq_cache = NULL;
552 	}
553 }
554 EXPORT_SYMBOL_GPL(elv_unregister);
555 
556 /*
557  * Switch to new_e io scheduler.
558  *
559  * If switching fails, we are most likely running out of memory and not able
560  * to restore the old io scheduler, so leaving the io scheduler being none.
561  */
562 static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
563 {
564 	struct elevator_type *new_e = NULL;
565 	int ret = 0;
566 
567 	WARN_ON_ONCE(q->mq_freeze_depth == 0);
568 	lockdep_assert_held(&q->elevator_lock);
569 
570 	if (strncmp(ctx->name, "none", 4)) {
571 		new_e = elevator_find_get(ctx->name);
572 		if (!new_e)
573 			return -EINVAL;
574 	}
575 
576 	blk_mq_quiesce_queue(q);
577 
578 	if (q->elevator) {
579 		ctx->old = q->elevator;
580 		elevator_exit(q);
581 	}
582 
583 	if (new_e) {
584 		ret = blk_mq_init_sched(q, new_e, &ctx->res);
585 		if (ret)
586 			goto out_unfreeze;
587 		ctx->new = q->elevator;
588 	} else {
589 		blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
590 		q->elevator = NULL;
591 		q->nr_requests = q->tag_set->queue_depth;
592 	}
593 	blk_add_trace_msg(q, "elv switch: %s", ctx->name);
594 
595 out_unfreeze:
596 	blk_mq_unquiesce_queue(q);
597 
598 	if (ret) {
599 		pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
600 			new_e->elevator_name);
601 	}
602 
603 	if (new_e)
604 		elevator_put(new_e);
605 	return ret;
606 }
607 
608 static void elv_exit_and_release(struct elv_change_ctx *ctx,
609 		struct request_queue *q)
610 {
611 	struct elevator_queue *e;
612 	unsigned memflags;
613 
614 	memflags = blk_mq_freeze_queue(q);
615 	mutex_lock(&q->elevator_lock);
616 	e = q->elevator;
617 	elevator_exit(q);
618 	mutex_unlock(&q->elevator_lock);
619 	blk_mq_unfreeze_queue(q, memflags);
620 	if (e) {
621 		blk_mq_free_sched_res(&ctx->res, ctx->type, q->tag_set);
622 		kobject_put(&e->kobj);
623 	}
624 }
625 
626 static int elevator_change_done(struct request_queue *q,
627 				struct elv_change_ctx *ctx)
628 {
629 	int ret = 0;
630 
631 	if (ctx->old) {
632 		struct elevator_resources res = {
633 			.et = ctx->old->et,
634 			.data = ctx->old->elevator_data
635 		};
636 
637 		elv_unregister_queue(q, ctx->old);
638 		blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set);
639 		kobject_put(&ctx->old->kobj);
640 	}
641 	if (ctx->new) {
642 		ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
643 		if (ret)
644 			elv_exit_and_release(ctx, q);
645 	}
646 	return ret;
647 }
648 
649 /*
650  * Switch this queue to the given IO scheduler.
651  */
652 static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
653 {
654 	unsigned int memflags;
655 	struct blk_mq_tag_set *set = q->tag_set;
656 	int ret = 0;
657 
658 	lockdep_assert_held(&set->update_nr_hwq_lock);
659 
660 	if (strncmp(ctx->name, "none", 4)) {
661 		ret = blk_mq_alloc_sched_res(q, ctx->type, &ctx->res,
662 				set->nr_hw_queues);
663 		if (ret)
664 			return ret;
665 	}
666 
667 	memflags = blk_mq_freeze_queue(q);
668 	/*
669 	 * May be called before adding disk, when there isn't any FS I/O,
670 	 * so freezing queue plus canceling dispatch work is enough to
671 	 * drain any dispatch activities originated from passthrough
672 	 * requests, then no need to quiesce queue which may add long boot
673 	 * latency, especially when lots of disks are involved.
674 	 *
675 	 * Disk isn't added yet, so verifying queue lock only manually.
676 	 */
677 	blk_mq_cancel_work_sync(q);
678 	mutex_lock(&q->elevator_lock);
679 	if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
680 		ret = elevator_switch(q, ctx);
681 	mutex_unlock(&q->elevator_lock);
682 	blk_mq_unfreeze_queue(q, memflags);
683 	if (!ret)
684 		ret = elevator_change_done(q, ctx);
685 
686 	/*
687 	 * Free sched resource if it's allocated but we couldn't switch elevator.
688 	 */
689 	if (!ctx->new)
690 		blk_mq_free_sched_res(&ctx->res, ctx->type, set);
691 
692 	return ret;
693 }
694 
695 /*
696  * The I/O scheduler depends on the number of hardware queues, this forces a
697  * reattachment when nr_hw_queues changes.
698  */
699 void elv_update_nr_hw_queues(struct request_queue *q,
700 		struct elv_change_ctx *ctx)
701 {
702 	struct blk_mq_tag_set *set = q->tag_set;
703 	int ret = -ENODEV;
704 
705 	WARN_ON_ONCE(q->mq_freeze_depth == 0);
706 
707 	if (ctx->type && !blk_queue_dying(q) && blk_queue_registered(q)) {
708 		mutex_lock(&q->elevator_lock);
709 		/* force to reattach elevator after nr_hw_queue is updated */
710 		ret = elevator_switch(q, ctx);
711 		mutex_unlock(&q->elevator_lock);
712 	}
713 	blk_mq_unfreeze_queue_nomemrestore(q);
714 	if (!ret)
715 		WARN_ON_ONCE(elevator_change_done(q, ctx));
716 
717 	/*
718 	 * Free sched resource if it's allocated but we couldn't switch elevator.
719 	 */
720 	if (!ctx->new)
721 		blk_mq_free_sched_res(&ctx->res, ctx->type, set);
722 }
723 
724 /*
725  * Use the default elevator settings. If the chosen elevator initialization
726  * fails, fall back to the "none" elevator (no elevator).
727  */
728 void elevator_set_default(struct request_queue *q)
729 {
730 	struct elv_change_ctx ctx = {
731 		.name = "mq-deadline",
732 		.no_uevent = true,
733 	};
734 	int err;
735 
736 	/* now we allow to switch elevator */
737 	blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q);
738 
739 	if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
740 		return;
741 
742 	/*
743 	 * For single queue devices, default to using mq-deadline. If we
744 	 * have multiple queues or mq-deadline is not available, default
745 	 * to "none".
746 	 */
747 	ctx.type = elevator_find_get(ctx.name);
748 	if (!ctx.type)
749 		return;
750 
751 	if ((q->nr_hw_queues == 1 ||
752 			blk_mq_is_shared_tags(q->tag_set->flags))) {
753 		err = elevator_change(q, &ctx);
754 		if (err < 0)
755 			pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n",
756 					ctx.name, err);
757 	}
758 	elevator_put(ctx.type);
759 }
760 
761 void elevator_set_none(struct request_queue *q)
762 {
763 	struct elv_change_ctx ctx = {
764 		.name	= "none",
765 	};
766 	int err;
767 
768 	err = elevator_change(q, &ctx);
769 	if (err < 0)
770 		pr_warn("%s: set none elevator failed %d\n", __func__, err);
771 }
772 
773 static void elv_iosched_load_module(const char *elevator_name)
774 {
775 	struct elevator_type *found;
776 
777 	spin_lock(&elv_list_lock);
778 	found = __elevator_find(elevator_name);
779 	spin_unlock(&elv_list_lock);
780 
781 	if (!found)
782 		request_module("%s-iosched", elevator_name);
783 }
784 
785 ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
786 			  size_t count)
787 {
788 	char elevator_name[ELV_NAME_MAX];
789 	struct elv_change_ctx ctx = {};
790 	int ret;
791 	struct request_queue *q = disk->queue;
792 	struct blk_mq_tag_set *set = q->tag_set;
793 
794 	/* Make sure queue is not in the middle of being removed */
795 	if (!blk_queue_registered(q))
796 		return -ENOENT;
797 
798 	/*
799 	 * If the attribute needs to load a module, do it before freezing the
800 	 * queue to ensure that the module file can be read when the request
801 	 * queue is the one for the device storing the module file.
802 	 */
803 	strscpy(elevator_name, buf, sizeof(elevator_name));
804 	ctx.name = strstrip(elevator_name);
805 
806 	elv_iosched_load_module(ctx.name);
807 	ctx.type = elevator_find_get(ctx.name);
808 
809 	down_read(&set->update_nr_hwq_lock);
810 	if (!blk_queue_no_elv_switch(q)) {
811 		ret = elevator_change(q, &ctx);
812 		if (!ret)
813 			ret = count;
814 	} else {
815 		ret = -ENOENT;
816 	}
817 	up_read(&set->update_nr_hwq_lock);
818 
819 	if (ctx.type)
820 		elevator_put(ctx.type);
821 	return ret;
822 }
823 
824 ssize_t elv_iosched_show(struct gendisk *disk, char *name)
825 {
826 	struct request_queue *q = disk->queue;
827 	struct elevator_type *cur = NULL, *e;
828 	int len = 0;
829 
830 	mutex_lock(&q->elevator_lock);
831 	if (!q->elevator) {
832 		len += sprintf(name+len, "[none] ");
833 	} else {
834 		len += sprintf(name+len, "none ");
835 		cur = q->elevator->type;
836 	}
837 
838 	spin_lock(&elv_list_lock);
839 	list_for_each_entry(e, &elv_list, list) {
840 		if (e == cur)
841 			len += sprintf(name+len, "[%s] ", e->elevator_name);
842 		else
843 			len += sprintf(name+len, "%s ", e->elevator_name);
844 	}
845 	spin_unlock(&elv_list_lock);
846 
847 	len += sprintf(name+len, "\n");
848 	mutex_unlock(&q->elevator_lock);
849 
850 	return len;
851 }
852 
853 struct request *elv_rb_former_request(struct request_queue *q,
854 				      struct request *rq)
855 {
856 	struct rb_node *rbprev = rb_prev(&rq->rb_node);
857 
858 	if (rbprev)
859 		return rb_entry_rq(rbprev);
860 
861 	return NULL;
862 }
863 EXPORT_SYMBOL(elv_rb_former_request);
864 
865 struct request *elv_rb_latter_request(struct request_queue *q,
866 				      struct request *rq)
867 {
868 	struct rb_node *rbnext = rb_next(&rq->rb_node);
869 
870 	if (rbnext)
871 		return rb_entry_rq(rbnext);
872 
873 	return NULL;
874 }
875 EXPORT_SYMBOL(elv_rb_latter_request);
876 
877 static int __init elevator_setup(char *str)
878 {
879 	pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
880 		"Please use sysfs to set IO scheduler for individual devices.\n");
881 	return 1;
882 }
883 
884 __setup("elevator=", elevator_setup);
885