xref: /linux/block/blk-sysfs.c (revision 4b99990cdf9560e8a071640baf19f312e6ae02f4)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to sysfs handling
4  */
5 #include <linux/kernel.h>
6 #include <linux/slab.h>
7 #include <linux/module.h>
8 #include <linux/bio.h>
9 #include <linux/blkdev.h>
10 #include <linux/backing-dev.h>
11 #include <linux/blktrace_api.h>
12 #include <linux/debugfs.h>
13 
14 #include "blk.h"
15 #include "blk-mq.h"
16 #include "blk-mq-debugfs.h"
17 #include "blk-mq-sched.h"
18 #include "blk-rq-qos.h"
19 #include "blk-wbt.h"
20 #include "blk-cgroup.h"
21 #include "blk-throttle.h"
22 #include "error-injection.h"
23 
24 struct queue_sysfs_entry {
25 	struct attribute attr;
26 	ssize_t (*show)(struct gendisk *disk, char *page);
27 	ssize_t (*show_limit)(struct gendisk *disk, char *page);
28 
29 	ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
30 	int (*store_limit)(struct gendisk *disk, const char *page,
31 			size_t count, struct queue_limits *lim);
32 };
33 
34 static ssize_t
35 queue_var_show(unsigned long var, char *page)
36 {
37 	return sysfs_emit(page, "%lu\n", var);
38 }
39 
40 static ssize_t
41 queue_var_store(unsigned long *var, const char *page, size_t count)
42 {
43 	int err;
44 	unsigned long v;
45 
46 	err = kstrtoul(page, 10, &v);
47 	if (err || v > UINT_MAX)
48 		return -EINVAL;
49 
50 	*var = v;
51 
52 	return count;
53 }
54 
55 static ssize_t queue_requests_show(struct gendisk *disk, char *page)
56 {
57 	ssize_t ret;
58 
59 	mutex_lock(&disk->queue->elevator_lock);
60 	ret = queue_var_show(disk->queue->nr_requests, page);
61 	mutex_unlock(&disk->queue->elevator_lock);
62 	return ret;
63 }
64 
65 static ssize_t
66 queue_requests_store(struct gendisk *disk, const char *page, size_t count)
67 {
68 	struct request_queue *q = disk->queue;
69 	struct blk_mq_tag_set *set = q->tag_set;
70 	struct elevator_tags *et = NULL;
71 	unsigned int memflags;
72 	unsigned long nr;
73 	int ret;
74 
75 	ret = queue_var_store(&nr, page, count);
76 	if (ret < 0)
77 		return ret;
78 
79 	/*
80 	 * Serialize updating nr_requests with concurrent queue_requests_store()
81 	 * and switching elevator.
82 	 *
83 	 * Use trylock to avoid circular lock dependency with kernfs active
84 	 * reference during concurrent disk deletion:
85 	 *   update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del)
86 	 *   kn->active -> update_nr_hwq_lock (via this sysfs write path)
87 	 */
88 	if (!down_write_trylock(&set->update_nr_hwq_lock))
89 		return -EBUSY;
90 
91 	if (nr == q->nr_requests)
92 		goto unlock;
93 
94 	if (nr < BLKDEV_MIN_RQ)
95 		nr = BLKDEV_MIN_RQ;
96 
97 	/*
98 	 * Switching elevator is protected by update_nr_hwq_lock:
99 	 *  - read lock is held from elevator sysfs attribute;
100 	 *  - write lock is held from updating nr_hw_queues;
101 	 * Hence it's safe to access q->elevator here with write lock held.
102 	 */
103 	if (nr <= set->reserved_tags ||
104 	    (q->elevator && nr > MAX_SCHED_RQ) ||
105 	    (!q->elevator && nr > set->queue_depth)) {
106 		ret = -EINVAL;
107 		goto unlock;
108 	}
109 
110 	if (!blk_mq_is_shared_tags(set->flags) && q->elevator &&
111 	    nr > q->elevator->et->nr_requests) {
112 		/*
113 		 * Tags will grow, allocate memory before freezing queue to
114 		 * prevent deadlock.
115 		 */
116 		et = blk_mq_alloc_sched_tags(set, q->nr_hw_queues, nr);
117 		if (!et) {
118 			ret = -ENOMEM;
119 			goto unlock;
120 		}
121 	}
122 
123 	memflags = blk_mq_freeze_queue(q);
124 	mutex_lock(&q->elevator_lock);
125 	et = blk_mq_update_nr_requests(q, et, nr);
126 	mutex_unlock(&q->elevator_lock);
127 	blk_mq_unfreeze_queue(q, memflags);
128 
129 	if (et)
130 		blk_mq_free_sched_tags(et, set);
131 
132 unlock:
133 	up_write(&set->update_nr_hwq_lock);
134 	return ret;
135 }
136 
137 static ssize_t queue_async_depth_show(struct gendisk *disk, char *page)
138 {
139 	guard(mutex)(&disk->queue->elevator_lock);
140 
141 	return queue_var_show(disk->queue->async_depth, page);
142 }
143 
144 static ssize_t
145 queue_async_depth_store(struct gendisk *disk, const char *page, size_t count)
146 {
147 	struct request_queue *q = disk->queue;
148 	unsigned int memflags;
149 	unsigned long nr;
150 	int ret;
151 
152 	if (!queue_is_mq(q))
153 		return -EINVAL;
154 
155 	ret = queue_var_store(&nr, page, count);
156 	if (ret < 0)
157 		return ret;
158 
159 	if (nr == 0)
160 		return -EINVAL;
161 
162 	memflags = blk_mq_freeze_queue(q);
163 	scoped_guard(mutex, &q->elevator_lock) {
164 		if (q->elevator) {
165 			q->async_depth = min(q->nr_requests, nr);
166 			if (q->elevator->type->ops.depth_updated)
167 				q->elevator->type->ops.depth_updated(q);
168 		} else {
169 			ret = -EINVAL;
170 		}
171 	}
172 	blk_mq_unfreeze_queue(q, memflags);
173 
174 	return ret;
175 }
176 
177 static ssize_t queue_ra_show(struct gendisk *disk, char *page)
178 {
179 	ssize_t ret;
180 
181 	mutex_lock(&disk->queue->limits_lock);
182 	ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
183 	mutex_unlock(&disk->queue->limits_lock);
184 
185 	return ret;
186 }
187 
188 static ssize_t
189 queue_ra_store(struct gendisk *disk, const char *page, size_t count)
190 {
191 	unsigned long ra_kb;
192 	ssize_t ret;
193 	struct request_queue *q = disk->queue;
194 
195 	ret = queue_var_store(&ra_kb, page, count);
196 	if (ret < 0)
197 		return ret;
198 	/*
199 	 * The ->ra_pages change below is protected by ->limits_lock because it
200 	 * is usually calculated from the queue limits by
201 	 * queue_limits_commit_update().
202 	 *
203 	 * bdi->ra_pages reads are not serialized against bdi->ra_pages writes.
204 	 * Use WRITE_ONCE() to write bdi->ra_pages once.
205 	 */
206 	mutex_lock(&q->limits_lock);
207 	WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10));
208 	mutex_unlock(&q->limits_lock);
209 
210 	return ret;
211 }
212 
213 #define QUEUE_SYSFS_LIMIT_SHOW(_field)					\
214 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page)	\
215 {									\
216 	return queue_var_show(disk->queue->limits._field, page);	\
217 }
218 
219 QUEUE_SYSFS_LIMIT_SHOW(max_segments)
220 QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments)
221 QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments)
222 QUEUE_SYSFS_LIMIT_SHOW(max_segment_size)
223 QUEUE_SYSFS_LIMIT_SHOW(max_write_streams)
224 QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity)
225 QUEUE_SYSFS_LIMIT_SHOW(logical_block_size)
226 QUEUE_SYSFS_LIMIT_SHOW(physical_block_size)
227 QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors)
228 QUEUE_SYSFS_LIMIT_SHOW(io_min)
229 QUEUE_SYSFS_LIMIT_SHOW(io_opt)
230 QUEUE_SYSFS_LIMIT_SHOW(discard_granularity)
231 QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity)
232 QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask)
233 QUEUE_SYSFS_LIMIT_SHOW(dma_alignment)
234 QUEUE_SYSFS_LIMIT_SHOW(max_open_zones)
235 QUEUE_SYSFS_LIMIT_SHOW(max_active_zones)
236 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min)
237 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max)
238 
239 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field)			\
240 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page)	\
241 {									\
242 	return sysfs_emit(page, "%llu\n",				\
243 		(unsigned long long)disk->queue->limits._field <<	\
244 			SECTOR_SHIFT);					\
245 }
246 
247 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors)
248 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors)
249 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
250 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors)
251 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors)
252 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
253 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
254 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors)
255 
256 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field)			\
257 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page)	\
258 {									\
259 	return queue_var_show(disk->queue->limits._field >> 1, page);	\
260 }
261 
262 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors)
263 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
264 
265 #define QUEUE_SYSFS_SHOW_CONST(_name, _val)				\
266 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page)	\
267 {									\
268 	return sysfs_emit(page, "%d\n", _val);				\
269 }
270 
271 /* deprecated fields */
272 QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0)
273 QUEUE_SYSFS_SHOW_CONST(write_same_max, 0)
274 QUEUE_SYSFS_SHOW_CONST(poll_delay, -1)
275 
276 static int queue_max_discard_sectors_store(struct gendisk *disk,
277 		const char *page, size_t count, struct queue_limits *lim)
278 {
279 	unsigned long max_discard_bytes;
280 	ssize_t ret;
281 
282 	ret = queue_var_store(&max_discard_bytes, page, count);
283 	if (ret < 0)
284 		return ret;
285 
286 	if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1))
287 		return -EINVAL;
288 
289 	if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
290 		return -EINVAL;
291 
292 	lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
293 	return 0;
294 }
295 
296 static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk,
297 		const char *page, size_t count, struct queue_limits *lim)
298 {
299 	unsigned long max_zeroes_bytes, max_hw_zeroes_bytes;
300 	ssize_t ret;
301 
302 	ret = queue_var_store(&max_zeroes_bytes, page, count);
303 	if (ret < 0)
304 		return ret;
305 
306 	max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT;
307 	if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes)
308 		return -EINVAL;
309 
310 	lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT;
311 	return 0;
312 }
313 
314 static int
315 queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count,
316 		struct queue_limits *lim)
317 {
318 	unsigned long max_sectors_kb;
319 	ssize_t ret;
320 
321 	ret = queue_var_store(&max_sectors_kb, page, count);
322 	if (ret < 0)
323 		return ret;
324 
325 	lim->max_user_sectors = max_sectors_kb << 1;
326 	return 0;
327 }
328 
329 static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
330 		size_t count, struct queue_limits *lim, blk_features_t feature)
331 {
332 	unsigned long val;
333 	ssize_t ret;
334 
335 	ret = queue_var_store(&val, page, count);
336 	if (ret < 0)
337 		return ret;
338 
339 	if (val)
340 		lim->features |= feature;
341 	else
342 		lim->features &= ~feature;
343 	return 0;
344 }
345 
346 #define QUEUE_SYSFS_FEATURE(_name, _feature)				\
347 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page)	\
348 {									\
349 	return sysfs_emit(page, "%u\n",					\
350 		!!(disk->queue->limits.features & _feature));		\
351 }									\
352 static int queue_##_name##_store(struct gendisk *disk,			\
353 		const char *page, size_t count, struct queue_limits *lim) \
354 {									\
355 	return queue_feature_store(disk, page, count, lim, _feature);	\
356 }
357 
358 QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
359 QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
360 QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
361 QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
362 
363 #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature)			\
364 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page)	\
365 {									\
366 	return sysfs_emit(page, "%u\n",					\
367 		!!(disk->queue->limits.features & _feature));		\
368 }
369 
370 QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA);
371 QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
372 
373 static ssize_t queue_poll_show(struct gendisk *disk, char *page)
374 {
375 	if (queue_is_mq(disk->queue))
376 		return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
377 
378 	return sysfs_emit(page, "%u\n",
379 			!!(disk->queue->limits.features & BLK_FEAT_POLL));
380 }
381 
382 static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
383 {
384 	if (blk_queue_is_zoned(disk->queue))
385 		return sysfs_emit(page, "host-managed\n");
386 	return sysfs_emit(page, "none\n");
387 }
388 
389 static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
390 {
391 	return queue_var_show(disk_nr_zones(disk), page);
392 }
393 
394 static ssize_t queue_zoned_qd1_writes_show(struct gendisk *disk, char *page)
395 {
396 	return queue_var_show(!!blk_queue_zoned_qd1_writes(disk->queue),
397 			      page);
398 }
399 
400 static ssize_t queue_zoned_qd1_writes_store(struct gendisk *disk,
401 					    const char *page, size_t count)
402 {
403 	struct request_queue *q = disk->queue;
404 	unsigned long qd1_writes;
405 	unsigned int memflags;
406 	ssize_t ret;
407 
408 	ret = queue_var_store(&qd1_writes, page, count);
409 	if (ret < 0)
410 		return ret;
411 
412 	memflags = blk_mq_freeze_queue(q);
413 	blk_mq_quiesce_queue(q);
414 	if (qd1_writes)
415 		blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q);
416 	else
417 		blk_queue_flag_clear(QUEUE_FLAG_ZONED_QD1_WRITES, q);
418 	blk_mq_unquiesce_queue(q);
419 	blk_mq_unfreeze_queue(q, memflags);
420 
421 	return count;
422 }
423 
424 static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
425 {
426 	return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page);
427 }
428 
429 static int queue_iostats_passthrough_store(struct gendisk *disk,
430 		const char *page, size_t count, struct queue_limits *lim)
431 {
432 	unsigned long ios;
433 	ssize_t ret;
434 
435 	ret = queue_var_store(&ios, page, count);
436 	if (ret < 0)
437 		return ret;
438 
439 	if (ios)
440 		lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH;
441 	else
442 		lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH;
443 	return 0;
444 }
445 
446 static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
447 {
448 	return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
449 			       blk_queue_noxmerges(disk->queue), page);
450 }
451 
452 static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
453 				    size_t count)
454 {
455 	unsigned long nm;
456 	struct request_queue *q = disk->queue;
457 	ssize_t ret = queue_var_store(&nm, page, count);
458 
459 	if (ret < 0)
460 		return ret;
461 
462 	blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
463 	blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
464 	if (nm == 2)
465 		blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
466 	else if (nm)
467 		blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
468 
469 	return ret;
470 }
471 
472 static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
473 {
474 	bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
475 	bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
476 
477 	return queue_var_show(set << force, page);
478 }
479 
480 static ssize_t
481 queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
482 {
483 	ssize_t ret = -EINVAL;
484 #ifdef CONFIG_SMP
485 	struct request_queue *q = disk->queue;
486 	unsigned long val;
487 
488 	ret = queue_var_store(&val, page, count);
489 	if (ret < 0)
490 		return ret;
491 
492 	/*
493 	 * Here we update two queue flags each using atomic bitops, although
494 	 * updating two flags isn't atomic it should be harmless as those flags
495 	 * are accessed individually using atomic test_bit operation. So we
496 	 * don't grab any lock while updating these flags.
497 	 */
498 	if (val == 2) {
499 		blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
500 		blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
501 	} else if (val == 1) {
502 		blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
503 		blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
504 	} else if (val == 0) {
505 		blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
506 		blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
507 	}
508 #endif
509 	return ret;
510 }
511 
512 static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
513 				size_t count)
514 {
515 	return count;
516 }
517 
518 static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
519 				size_t count)
520 {
521 	ssize_t ret = count;
522 	struct request_queue *q = disk->queue;
523 
524 	if (!(q->limits.features & BLK_FEAT_POLL)) {
525 		ret = -EINVAL;
526 		goto out;
527 	}
528 
529 	pr_info_ratelimited("writes to the poll attribute are ignored.\n");
530 	pr_info_ratelimited("please use driver specific parameters instead.\n");
531 out:
532 	return ret;
533 }
534 
535 static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
536 {
537 	return sysfs_emit(page, "%u\n",
538 			jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout)));
539 }
540 
541 static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
542 				  size_t count)
543 {
544 	unsigned int val;
545 	int err;
546 	struct request_queue *q = disk->queue;
547 
548 	err = kstrtou32(page, 10, &val);
549 	if (err || val == 0)
550 		return -EINVAL;
551 
552 	blk_queue_rq_timeout(q, msecs_to_jiffies(val));
553 
554 	return count;
555 }
556 
557 static ssize_t queue_wc_show(struct gendisk *disk, char *page)
558 {
559 	if (blk_queue_write_cache(disk->queue))
560 		return sysfs_emit(page, "write back\n");
561 	return sysfs_emit(page, "write through\n");
562 }
563 
564 static int queue_wc_store(struct gendisk *disk, const char *page,
565 		size_t count, struct queue_limits *lim)
566 {
567 	bool disable;
568 
569 	if (!strncmp(page, "write back", 10)) {
570 		disable = false;
571 	} else if (!strncmp(page, "write through", 13) ||
572 		   !strncmp(page, "none", 4)) {
573 		disable = true;
574 	} else {
575 		return -EINVAL;
576 	}
577 
578 	if (disable)
579 		lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
580 	else
581 		lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
582 	return 0;
583 }
584 
585 #define QUEUE_RO_ENTRY(_prefix, _name)				\
586 static const struct queue_sysfs_entry _prefix##_entry = {	\
587 	.attr	= { .name = _name, .mode = 0444 },		\
588 	.show	= _prefix##_show,				\
589 };
590 
591 #define QUEUE_RW_ENTRY(_prefix, _name)				\
592 static const struct queue_sysfs_entry _prefix##_entry = {	\
593 	.attr	= { .name = _name, .mode = 0644 },		\
594 	.show	= _prefix##_show,				\
595 	.store	= _prefix##_store,				\
596 };
597 
598 #define QUEUE_LIM_RO_ENTRY(_prefix, _name)			\
599 static const struct queue_sysfs_entry _prefix##_entry = {	\
600 	.attr		= { .name = _name, .mode = 0444 },	\
601 	.show_limit	= _prefix##_show,			\
602 }
603 
604 #define QUEUE_LIM_RW_ENTRY(_prefix, _name)			\
605 static const struct queue_sysfs_entry _prefix##_entry = {	\
606 	.attr		= { .name = _name, .mode = 0644 },	\
607 	.show_limit	= _prefix##_show,			\
608 	.store_limit	= _prefix##_store,			\
609 }
610 
611 QUEUE_RW_ENTRY(queue_requests, "nr_requests");
612 QUEUE_RW_ENTRY(queue_async_depth, "async_depth");
613 QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
614 QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
615 QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
616 QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments");
617 QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
618 QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size");
619 QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams");
620 QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity");
621 QUEUE_RW_ENTRY(elv_iosched, "scheduler");
622 
623 QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size");
624 QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size");
625 QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors");
626 QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size");
627 QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size");
628 
629 QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
630 QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity");
631 QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
632 QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
633 QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
634 
635 QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
636 QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors,
637 		"atomic_write_boundary_bytes");
638 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
639 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
640 
641 QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
642 QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
643 QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors,
644 		"write_zeroes_unmap_max_hw_bytes");
645 QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors,
646 		"write_zeroes_unmap_max_bytes");
647 QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
648 QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
649 
650 QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned");
651 QUEUE_RW_ENTRY(queue_zoned_qd1_writes, "zoned_qd1_writes");
652 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
653 QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones");
654 QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones");
655 
656 QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
657 QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
658 QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
659 QUEUE_RW_ENTRY(queue_poll, "io_poll");
660 QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
661 QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache");
662 QUEUE_LIM_RO_ENTRY(queue_fua, "fua");
663 QUEUE_LIM_RO_ENTRY(queue_dax, "dax");
664 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
665 QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
666 QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment");
667 
668 /* legacy alias for logical_block_size: */
669 static const struct queue_sysfs_entry queue_hw_sector_size_entry = {
670 	.attr		= {.name = "hw_sector_size", .mode = 0444 },
671 	.show_limit	= queue_logical_block_size_show,
672 };
673 
674 QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational");
675 QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats");
676 QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random");
677 QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes");
678 
679 #ifdef CONFIG_BLK_WBT
680 static ssize_t queue_var_store64(s64 *var, const char *page)
681 {
682 	int err;
683 	s64 v;
684 
685 	err = kstrtos64(page, 10, &v);
686 	if (err < 0)
687 		return err;
688 
689 	*var = v;
690 	return 0;
691 }
692 
693 static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
694 {
695 	ssize_t ret;
696 	struct request_queue *q = disk->queue;
697 
698 	mutex_lock(&disk->rqos_state_mutex);
699 	if (!wbt_rq_qos(q)) {
700 		ret = -EINVAL;
701 		goto out;
702 	}
703 
704 	if (wbt_disabled(q)) {
705 		ret = sysfs_emit(page, "0\n");
706 		goto out;
707 	}
708 
709 	ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
710 out:
711 	mutex_unlock(&disk->rqos_state_mutex);
712 	return ret;
713 }
714 
715 static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
716 				  size_t count)
717 {
718 	ssize_t ret;
719 	s64 val;
720 
721 	ret = queue_var_store64(&val, page);
722 	if (ret < 0)
723 		return ret;
724 	if (val < -1)
725 		return -EINVAL;
726 
727 	ret = wbt_set_lat(disk, val);
728 	return ret ? ret : count;
729 }
730 
731 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
732 #endif
733 
734 /* Common attributes for bio-based and request-based queues. */
735 static const struct attribute *const queue_attrs[] = {
736 	/*
737 	 * Attributes which are protected with q->limits_lock.
738 	 */
739 	&queue_max_hw_sectors_entry.attr,
740 	&queue_max_sectors_entry.attr,
741 	&queue_max_segments_entry.attr,
742 	&queue_max_discard_segments_entry.attr,
743 	&queue_max_integrity_segments_entry.attr,
744 	&queue_max_segment_size_entry.attr,
745 	&queue_max_write_streams_entry.attr,
746 	&queue_write_stream_granularity_entry.attr,
747 	&queue_hw_sector_size_entry.attr,
748 	&queue_logical_block_size_entry.attr,
749 	&queue_physical_block_size_entry.attr,
750 	&queue_chunk_sectors_entry.attr,
751 	&queue_io_min_entry.attr,
752 	&queue_io_opt_entry.attr,
753 	&queue_discard_granularity_entry.attr,
754 	&queue_max_discard_sectors_entry.attr,
755 	&queue_max_hw_discard_sectors_entry.attr,
756 	&queue_atomic_write_max_sectors_entry.attr,
757 	&queue_atomic_write_boundary_sectors_entry.attr,
758 	&queue_atomic_write_unit_min_entry.attr,
759 	&queue_atomic_write_unit_max_entry.attr,
760 	&queue_max_write_zeroes_sectors_entry.attr,
761 	&queue_max_hw_wzeroes_unmap_sectors_entry.attr,
762 	&queue_max_wzeroes_unmap_sectors_entry.attr,
763 	&queue_max_zone_append_sectors_entry.attr,
764 	&queue_zone_write_granularity_entry.attr,
765 	&queue_rotational_entry.attr,
766 	&queue_zoned_entry.attr,
767 	&queue_max_open_zones_entry.attr,
768 	&queue_max_active_zones_entry.attr,
769 	&queue_iostats_passthrough_entry.attr,
770 	&queue_iostats_entry.attr,
771 	&queue_stable_writes_entry.attr,
772 	&queue_add_random_entry.attr,
773 	&queue_wc_entry.attr,
774 	&queue_fua_entry.attr,
775 	&queue_dax_entry.attr,
776 	&queue_virt_boundary_mask_entry.attr,
777 	&queue_dma_alignment_entry.attr,
778 	&queue_ra_entry.attr,
779 
780 	/*
781 	 * Attributes which don't require locking.
782 	 */
783 	&queue_discard_zeroes_data_entry.attr,
784 	&queue_write_same_max_entry.attr,
785 	&queue_nr_zones_entry.attr,
786 	&queue_nomerges_entry.attr,
787 	&queue_poll_entry.attr,
788 	&queue_poll_delay_entry.attr,
789 	&queue_zoned_qd1_writes_entry.attr,
790 
791 	NULL,
792 };
793 
794 /* Request-based queue attributes that are not relevant for bio-based queues. */
795 static const struct attribute *const blk_mq_queue_attrs[] = {
796 	/*
797 	 * Attributes which require some form of locking other than
798 	 * q->sysfs_lock.
799 	 */
800 	&elv_iosched_entry.attr,
801 	&queue_requests_entry.attr,
802 	&queue_async_depth_entry.attr,
803 #ifdef CONFIG_BLK_WBT
804 	&queue_wb_lat_entry.attr,
805 #endif
806 	/*
807 	 * Attributes which don't require locking.
808 	 */
809 	&queue_rq_affinity_entry.attr,
810 	&queue_io_timeout_entry.attr,
811 
812 	NULL,
813 };
814 
815 static umode_t queue_attr_visible(struct kobject *kobj, const struct attribute *attr,
816 				int n)
817 {
818 	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
819 	struct request_queue *q = disk->queue;
820 
821 	if ((attr == &queue_max_open_zones_entry.attr ||
822 	     attr == &queue_max_active_zones_entry.attr ||
823 	     attr == &queue_zoned_qd1_writes_entry.attr) &&
824 	    !blk_queue_is_zoned(q))
825 		return 0;
826 
827 	return attr->mode;
828 }
829 
830 static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
831 					 const struct attribute *attr, int n)
832 {
833 	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
834 	struct request_queue *q = disk->queue;
835 
836 	if (!queue_is_mq(q))
837 		return 0;
838 
839 	if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout)
840 		return 0;
841 
842 	return attr->mode;
843 }
844 
845 static const struct attribute_group queue_attr_group = {
846 	.attrs_const = queue_attrs,
847 	.is_visible_const = queue_attr_visible,
848 };
849 
850 static const struct attribute_group blk_mq_queue_attr_group = {
851 	.attrs_const = blk_mq_queue_attrs,
852 	.is_visible_const = blk_mq_queue_attr_visible,
853 };
854 
855 #define to_queue(atr) container_of_const((atr), struct queue_sysfs_entry, attr)
856 
857 static ssize_t
858 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
859 {
860 	struct queue_sysfs_entry *entry = to_queue(attr);
861 	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
862 
863 	if (!entry->show && !entry->show_limit)
864 		return -EIO;
865 
866 	if (entry->show_limit) {
867 		ssize_t res;
868 
869 		mutex_lock(&disk->queue->limits_lock);
870 		res = entry->show_limit(disk, page);
871 		mutex_unlock(&disk->queue->limits_lock);
872 		return res;
873 	}
874 
875 	return entry->show(disk, page);
876 }
877 
878 static ssize_t
879 queue_attr_store(struct kobject *kobj, struct attribute *attr,
880 		    const char *page, size_t length)
881 {
882 	struct queue_sysfs_entry *entry = to_queue(attr);
883 	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
884 	struct request_queue *q = disk->queue;
885 
886 	if (!entry->store_limit && !entry->store)
887 		return -EIO;
888 
889 	if (entry->store_limit) {
890 		ssize_t res;
891 
892 		struct queue_limits lim = queue_limits_start_update(q);
893 
894 		res = entry->store_limit(disk, page, length, &lim);
895 		if (res < 0) {
896 			queue_limits_cancel_update(q);
897 			return res;
898 		}
899 
900 		res = queue_limits_commit_update_frozen(q, &lim);
901 		if (res)
902 			return res;
903 		return length;
904 	}
905 
906 	return entry->store(disk, page, length);
907 }
908 
909 static const struct sysfs_ops queue_sysfs_ops = {
910 	.show	= queue_attr_show,
911 	.store	= queue_attr_store,
912 };
913 
914 static const struct attribute_group *blk_queue_attr_groups[] = {
915 	&queue_attr_group,
916 	&blk_mq_queue_attr_group,
917 	NULL
918 };
919 
920 static void blk_queue_release(struct kobject *kobj)
921 {
922 	/* nothing to do here, all data is associated with the parent gendisk */
923 }
924 
925 const struct kobj_type blk_queue_ktype = {
926 	.default_groups = blk_queue_attr_groups,
927 	.sysfs_ops	= &queue_sysfs_ops,
928 	.release	= blk_queue_release,
929 };
930 
931 static void blk_debugfs_remove(struct gendisk *disk)
932 {
933 	struct request_queue *q = disk->queue;
934 
935 	blk_debugfs_lock_nomemsave(q);
936 	blk_trace_shutdown(q);
937 	if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION))
938 		blk_error_injection_exit(disk);
939 	debugfs_remove_recursive(q->debugfs_dir);
940 	q->debugfs_dir = NULL;
941 	q->sched_debugfs_dir = NULL;
942 	q->rqos_debugfs_dir = NULL;
943 	blk_debugfs_unlock_nomemrestore(q);
944 }
945 
946 /**
947  * blk_register_queue - register a block layer queue with sysfs
948  * @disk: Disk of which the request queue should be registered with sysfs.
949  */
950 int blk_register_queue(struct gendisk *disk)
951 {
952 	struct request_queue *q = disk->queue;
953 	unsigned int memflags;
954 	int ret;
955 
956 	ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
957 	if (ret < 0)
958 		return ret;
959 
960 	if (queue_is_mq(q)) {
961 		ret = blk_mq_sysfs_register(disk);
962 		if (ret)
963 			goto out_del_queue_kobj;
964 	}
965 	mutex_lock(&q->sysfs_lock);
966 
967 	memflags = blk_debugfs_lock(q);
968 	q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
969 	if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION))
970 		blk_error_injection_init(disk);
971 	if (queue_is_mq(q))
972 		blk_mq_debugfs_register(q);
973 	blk_debugfs_unlock(q, memflags);
974 
975 	/*
976 	 * For blk-mq rotational zoned devices, default to using QD=1
977 	 * writes. For non-mq rotational zoned devices, the device driver can
978 	 * set an appropriate default.
979 	 */
980 	if (queue_is_mq(q) && blk_queue_rot(q) && blk_queue_is_zoned(q))
981 		blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q);
982 
983 	ret = disk_register_independent_access_ranges(disk);
984 	if (ret)
985 		goto out_debugfs_remove;
986 
987 	ret = blk_crypto_sysfs_register(disk);
988 	if (ret)
989 		goto out_unregister_ia_ranges;
990 
991 	if (queue_is_mq(q))
992 		elevator_set_default(q);
993 
994 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
995 	wbt_init_enable_default(disk);
996 
997 	/* Now everything is ready and send out KOBJ_ADD uevent */
998 	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
999 	if (q->elevator)
1000 		kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
1001 	mutex_unlock(&q->sysfs_lock);
1002 
1003 	/*
1004 	 * SCSI probing may synchronously create and destroy a lot of
1005 	 * request_queues for non-existent devices.  Shutting down a fully
1006 	 * functional queue takes measureable wallclock time as RCU grace
1007 	 * periods are involved.  To avoid excessive latency in these
1008 	 * cases, a request_queue starts out in a degraded mode which is
1009 	 * faster to shut down and is made fully functional here as
1010 	 * request_queues for non-existent devices never get registered.
1011 	 */
1012 	blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
1013 	percpu_ref_switch_to_percpu(&q->q_usage_counter);
1014 
1015 	return ret;
1016 
1017 out_unregister_ia_ranges:
1018 	disk_unregister_independent_access_ranges(disk);
1019 out_debugfs_remove:
1020 	blk_debugfs_remove(disk);
1021 	mutex_unlock(&q->sysfs_lock);
1022 	if (queue_is_mq(q))
1023 		blk_mq_sysfs_unregister(disk);
1024 out_del_queue_kobj:
1025 	kobject_del(&disk->queue_kobj);
1026 	return ret;
1027 }
1028 
1029 /**
1030  * blk_unregister_queue - counterpart of blk_register_queue()
1031  * @disk: Disk of which the request queue should be unregistered from sysfs.
1032  *
1033  * Note: the caller is responsible for guaranteeing that this function is called
1034  * after blk_register_queue() has finished.
1035  */
1036 void blk_unregister_queue(struct gendisk *disk)
1037 {
1038 	struct request_queue *q = disk->queue;
1039 
1040 	if (WARN_ON(!q))
1041 		return;
1042 
1043 	/* Return early if disk->queue was never registered. */
1044 	if (!blk_queue_registered(q))
1045 		return;
1046 
1047 	/*
1048 	 * Since sysfs_remove_dir() prevents adding new directory entries
1049 	 * before removal of existing entries starts, protect against
1050 	 * concurrent elv_iosched_store() calls.
1051 	 */
1052 	mutex_lock(&q->sysfs_lock);
1053 	blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
1054 	mutex_unlock(&q->sysfs_lock);
1055 
1056 	/*
1057 	 * Remove the sysfs attributes before unregistering the queue data
1058 	 * structures that can be modified through sysfs.
1059 	 */
1060 	if (queue_is_mq(q))
1061 		blk_mq_sysfs_unregister(disk);
1062 	blk_crypto_sysfs_unregister(disk);
1063 
1064 	mutex_lock(&q->sysfs_lock);
1065 	disk_unregister_independent_access_ranges(disk);
1066 	mutex_unlock(&q->sysfs_lock);
1067 
1068 	/* Now that we've deleted all child objects, we can delete the queue. */
1069 	kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE);
1070 	kobject_del(&disk->queue_kobj);
1071 
1072 	if (queue_is_mq(q))
1073 		elevator_set_none(q);
1074 
1075 	blk_debugfs_remove(disk);
1076 }
1077