1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to sysfs handling 4 */ 5 #include <linux/kernel.h> 6 #include <linux/slab.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/backing-dev.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/debugfs.h> 13 14 #include "blk.h" 15 #include "blk-mq.h" 16 #include "blk-mq-debugfs.h" 17 #include "blk-mq-sched.h" 18 #include "blk-rq-qos.h" 19 #include "blk-wbt.h" 20 #include "blk-cgroup.h" 21 #include "blk-throttle.h" 22 23 struct queue_sysfs_entry { 24 struct attribute attr; 25 ssize_t (*show)(struct gendisk *disk, char *page); 26 ssize_t (*show_limit)(struct gendisk *disk, char *page); 27 28 ssize_t (*store)(struct gendisk *disk, const char *page, size_t count); 29 int (*store_limit)(struct gendisk *disk, const char *page, 30 size_t count, struct queue_limits *lim); 31 }; 32 33 static ssize_t 34 queue_var_show(unsigned long var, char *page) 35 { 36 return sysfs_emit(page, "%lu\n", var); 37 } 38 39 static ssize_t 40 queue_var_store(unsigned long *var, const char *page, size_t count) 41 { 42 int err; 43 unsigned long v; 44 45 err = kstrtoul(page, 10, &v); 46 if (err || v > UINT_MAX) 47 return -EINVAL; 48 49 *var = v; 50 51 return count; 52 } 53 54 static ssize_t queue_requests_show(struct gendisk *disk, char *page) 55 { 56 ssize_t ret; 57 58 mutex_lock(&disk->queue->elevator_lock); 59 ret = queue_var_show(disk->queue->nr_requests, page); 60 mutex_unlock(&disk->queue->elevator_lock); 61 return ret; 62 } 63 64 static ssize_t 65 queue_requests_store(struct gendisk *disk, const char *page, size_t count) 66 { 67 struct request_queue *q = disk->queue; 68 struct blk_mq_tag_set *set = q->tag_set; 69 struct elevator_tags *et = NULL; 70 unsigned int memflags; 71 unsigned long nr; 72 int ret; 73 74 ret = queue_var_store(&nr, page, count); 75 if (ret < 0) 76 return ret; 77 78 /* 79 * Serialize updating nr_requests with concurrent queue_requests_store() 80 * and switching elevator. 81 * 82 * Use trylock to avoid circular lock dependency with kernfs active 83 * reference during concurrent disk deletion: 84 * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) 85 * kn->active -> update_nr_hwq_lock (via this sysfs write path) 86 */ 87 if (!down_write_trylock(&set->update_nr_hwq_lock)) 88 return -EBUSY; 89 90 if (nr == q->nr_requests) 91 goto unlock; 92 93 if (nr < BLKDEV_MIN_RQ) 94 nr = BLKDEV_MIN_RQ; 95 96 /* 97 * Switching elevator is protected by update_nr_hwq_lock: 98 * - read lock is held from elevator sysfs attribute; 99 * - write lock is held from updating nr_hw_queues; 100 * Hence it's safe to access q->elevator here with write lock held. 101 */ 102 if (nr <= set->reserved_tags || 103 (q->elevator && nr > MAX_SCHED_RQ) || 104 (!q->elevator && nr > set->queue_depth)) { 105 ret = -EINVAL; 106 goto unlock; 107 } 108 109 if (!blk_mq_is_shared_tags(set->flags) && q->elevator && 110 nr > q->elevator->et->nr_requests) { 111 /* 112 * Tags will grow, allocate memory before freezing queue to 113 * prevent deadlock. 114 */ 115 et = blk_mq_alloc_sched_tags(set, q->nr_hw_queues, nr); 116 if (!et) { 117 ret = -ENOMEM; 118 goto unlock; 119 } 120 } 121 122 memflags = blk_mq_freeze_queue(q); 123 mutex_lock(&q->elevator_lock); 124 et = blk_mq_update_nr_requests(q, et, nr); 125 mutex_unlock(&q->elevator_lock); 126 blk_mq_unfreeze_queue(q, memflags); 127 128 if (et) 129 blk_mq_free_sched_tags(et, set); 130 131 unlock: 132 up_write(&set->update_nr_hwq_lock); 133 return ret; 134 } 135 136 static ssize_t queue_async_depth_show(struct gendisk *disk, char *page) 137 { 138 guard(mutex)(&disk->queue->elevator_lock); 139 140 return queue_var_show(disk->queue->async_depth, page); 141 } 142 143 static ssize_t 144 queue_async_depth_store(struct gendisk *disk, const char *page, size_t count) 145 { 146 struct request_queue *q = disk->queue; 147 unsigned int memflags; 148 unsigned long nr; 149 int ret; 150 151 if (!queue_is_mq(q)) 152 return -EINVAL; 153 154 ret = queue_var_store(&nr, page, count); 155 if (ret < 0) 156 return ret; 157 158 if (nr == 0) 159 return -EINVAL; 160 161 memflags = blk_mq_freeze_queue(q); 162 scoped_guard(mutex, &q->elevator_lock) { 163 if (q->elevator) { 164 q->async_depth = min(q->nr_requests, nr); 165 if (q->elevator->type->ops.depth_updated) 166 q->elevator->type->ops.depth_updated(q); 167 } else { 168 ret = -EINVAL; 169 } 170 } 171 blk_mq_unfreeze_queue(q, memflags); 172 173 return ret; 174 } 175 176 static ssize_t queue_ra_show(struct gendisk *disk, char *page) 177 { 178 ssize_t ret; 179 180 mutex_lock(&disk->queue->limits_lock); 181 ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page); 182 mutex_unlock(&disk->queue->limits_lock); 183 184 return ret; 185 } 186 187 static ssize_t 188 queue_ra_store(struct gendisk *disk, const char *page, size_t count) 189 { 190 unsigned long ra_kb; 191 ssize_t ret; 192 struct request_queue *q = disk->queue; 193 194 ret = queue_var_store(&ra_kb, page, count); 195 if (ret < 0) 196 return ret; 197 /* 198 * The ->ra_pages change below is protected by ->limits_lock because it 199 * is usually calculated from the queue limits by 200 * queue_limits_commit_update(). 201 * 202 * bdi->ra_pages reads are not serialized against bdi->ra_pages writes. 203 * Use WRITE_ONCE() to write bdi->ra_pages once. 204 */ 205 mutex_lock(&q->limits_lock); 206 WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10)); 207 mutex_unlock(&q->limits_lock); 208 209 return ret; 210 } 211 212 #define QUEUE_SYSFS_LIMIT_SHOW(_field) \ 213 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 214 { \ 215 return queue_var_show(disk->queue->limits._field, page); \ 216 } 217 218 QUEUE_SYSFS_LIMIT_SHOW(max_segments) 219 QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments) 220 QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments) 221 QUEUE_SYSFS_LIMIT_SHOW(max_segment_size) 222 QUEUE_SYSFS_LIMIT_SHOW(max_write_streams) 223 QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity) 224 QUEUE_SYSFS_LIMIT_SHOW(logical_block_size) 225 QUEUE_SYSFS_LIMIT_SHOW(physical_block_size) 226 QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors) 227 QUEUE_SYSFS_LIMIT_SHOW(io_min) 228 QUEUE_SYSFS_LIMIT_SHOW(io_opt) 229 QUEUE_SYSFS_LIMIT_SHOW(discard_granularity) 230 QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity) 231 QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask) 232 QUEUE_SYSFS_LIMIT_SHOW(dma_alignment) 233 QUEUE_SYSFS_LIMIT_SHOW(max_open_zones) 234 QUEUE_SYSFS_LIMIT_SHOW(max_active_zones) 235 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min) 236 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max) 237 238 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \ 239 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 240 { \ 241 return sysfs_emit(page, "%llu\n", \ 242 (unsigned long long)disk->queue->limits._field << \ 243 SECTOR_SHIFT); \ 244 } 245 246 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors) 247 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors) 248 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors) 249 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors) 250 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors) 251 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors) 252 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors) 253 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors) 254 255 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \ 256 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 257 { \ 258 return queue_var_show(disk->queue->limits._field >> 1, page); \ 259 } 260 261 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors) 262 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors) 263 264 #define QUEUE_SYSFS_SHOW_CONST(_name, _val) \ 265 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 266 { \ 267 return sysfs_emit(page, "%d\n", _val); \ 268 } 269 270 /* deprecated fields */ 271 QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0) 272 QUEUE_SYSFS_SHOW_CONST(write_same_max, 0) 273 QUEUE_SYSFS_SHOW_CONST(poll_delay, -1) 274 275 static int queue_max_discard_sectors_store(struct gendisk *disk, 276 const char *page, size_t count, struct queue_limits *lim) 277 { 278 unsigned long max_discard_bytes; 279 ssize_t ret; 280 281 ret = queue_var_store(&max_discard_bytes, page, count); 282 if (ret < 0) 283 return ret; 284 285 if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1)) 286 return -EINVAL; 287 288 if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX) 289 return -EINVAL; 290 291 lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT; 292 return 0; 293 } 294 295 static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk, 296 const char *page, size_t count, struct queue_limits *lim) 297 { 298 unsigned long max_zeroes_bytes, max_hw_zeroes_bytes; 299 ssize_t ret; 300 301 ret = queue_var_store(&max_zeroes_bytes, page, count); 302 if (ret < 0) 303 return ret; 304 305 max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT; 306 if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes) 307 return -EINVAL; 308 309 lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT; 310 return 0; 311 } 312 313 static int 314 queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count, 315 struct queue_limits *lim) 316 { 317 unsigned long max_sectors_kb; 318 ssize_t ret; 319 320 ret = queue_var_store(&max_sectors_kb, page, count); 321 if (ret < 0) 322 return ret; 323 324 lim->max_user_sectors = max_sectors_kb << 1; 325 return 0; 326 } 327 328 static ssize_t queue_feature_store(struct gendisk *disk, const char *page, 329 size_t count, struct queue_limits *lim, blk_features_t feature) 330 { 331 unsigned long val; 332 ssize_t ret; 333 334 ret = queue_var_store(&val, page, count); 335 if (ret < 0) 336 return ret; 337 338 if (val) 339 lim->features |= feature; 340 else 341 lim->features &= ~feature; 342 return 0; 343 } 344 345 #define QUEUE_SYSFS_FEATURE(_name, _feature) \ 346 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 347 { \ 348 return sysfs_emit(page, "%u\n", \ 349 !!(disk->queue->limits.features & _feature)); \ 350 } \ 351 static int queue_##_name##_store(struct gendisk *disk, \ 352 const char *page, size_t count, struct queue_limits *lim) \ 353 { \ 354 return queue_feature_store(disk, page, count, lim, _feature); \ 355 } 356 357 QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) 358 QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) 359 QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) 360 QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES); 361 362 #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \ 363 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 364 { \ 365 return sysfs_emit(page, "%u\n", \ 366 !!(disk->queue->limits.features & _feature)); \ 367 } 368 369 QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA); 370 QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX); 371 372 static ssize_t queue_poll_show(struct gendisk *disk, char *page) 373 { 374 if (queue_is_mq(disk->queue)) 375 return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); 376 377 return sysfs_emit(page, "%u\n", 378 !!(disk->queue->limits.features & BLK_FEAT_POLL)); 379 } 380 381 static ssize_t queue_zoned_show(struct gendisk *disk, char *page) 382 { 383 if (blk_queue_is_zoned(disk->queue)) 384 return sysfs_emit(page, "host-managed\n"); 385 return sysfs_emit(page, "none\n"); 386 } 387 388 static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) 389 { 390 return queue_var_show(disk_nr_zones(disk), page); 391 } 392 393 static ssize_t queue_zoned_qd1_writes_show(struct gendisk *disk, char *page) 394 { 395 return queue_var_show(!!blk_queue_zoned_qd1_writes(disk->queue), 396 page); 397 } 398 399 static ssize_t queue_zoned_qd1_writes_store(struct gendisk *disk, 400 const char *page, size_t count) 401 { 402 struct request_queue *q = disk->queue; 403 unsigned long qd1_writes; 404 unsigned int memflags; 405 ssize_t ret; 406 407 ret = queue_var_store(&qd1_writes, page, count); 408 if (ret < 0) 409 return ret; 410 411 memflags = blk_mq_freeze_queue(q); 412 blk_mq_quiesce_queue(q); 413 if (qd1_writes) 414 blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q); 415 else 416 blk_queue_flag_clear(QUEUE_FLAG_ZONED_QD1_WRITES, q); 417 blk_mq_unquiesce_queue(q); 418 blk_mq_unfreeze_queue(q, memflags); 419 420 return count; 421 } 422 423 static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) 424 { 425 return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page); 426 } 427 428 static int queue_iostats_passthrough_store(struct gendisk *disk, 429 const char *page, size_t count, struct queue_limits *lim) 430 { 431 unsigned long ios; 432 ssize_t ret; 433 434 ret = queue_var_store(&ios, page, count); 435 if (ret < 0) 436 return ret; 437 438 if (ios) 439 lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; 440 else 441 lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; 442 return 0; 443 } 444 445 static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) 446 { 447 return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | 448 blk_queue_noxmerges(disk->queue), page); 449 } 450 451 static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page, 452 size_t count) 453 { 454 unsigned long nm; 455 struct request_queue *q = disk->queue; 456 ssize_t ret = queue_var_store(&nm, page, count); 457 458 if (ret < 0) 459 return ret; 460 461 blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 462 blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); 463 if (nm == 2) 464 blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); 465 else if (nm) 466 blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); 467 468 return ret; 469 } 470 471 static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page) 472 { 473 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags); 474 bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags); 475 476 return queue_var_show(set << force, page); 477 } 478 479 static ssize_t 480 queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count) 481 { 482 ssize_t ret = -EINVAL; 483 #ifdef CONFIG_SMP 484 struct request_queue *q = disk->queue; 485 unsigned long val; 486 487 ret = queue_var_store(&val, page, count); 488 if (ret < 0) 489 return ret; 490 491 /* 492 * Here we update two queue flags each using atomic bitops, although 493 * updating two flags isn't atomic it should be harmless as those flags 494 * are accessed individually using atomic test_bit operation. So we 495 * don't grab any lock while updating these flags. 496 */ 497 if (val == 2) { 498 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 499 blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); 500 } else if (val == 1) { 501 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 502 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 503 } else if (val == 0) { 504 blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 505 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 506 } 507 #endif 508 return ret; 509 } 510 511 static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page, 512 size_t count) 513 { 514 return count; 515 } 516 517 static ssize_t queue_poll_store(struct gendisk *disk, const char *page, 518 size_t count) 519 { 520 ssize_t ret = count; 521 struct request_queue *q = disk->queue; 522 523 if (!(q->limits.features & BLK_FEAT_POLL)) { 524 ret = -EINVAL; 525 goto out; 526 } 527 528 pr_info_ratelimited("writes to the poll attribute are ignored.\n"); 529 pr_info_ratelimited("please use driver specific parameters instead.\n"); 530 out: 531 return ret; 532 } 533 534 static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page) 535 { 536 return sysfs_emit(page, "%u\n", 537 jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout))); 538 } 539 540 static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, 541 size_t count) 542 { 543 unsigned int val; 544 int err; 545 struct request_queue *q = disk->queue; 546 547 err = kstrtou32(page, 10, &val); 548 if (err || val == 0) 549 return -EINVAL; 550 551 blk_queue_rq_timeout(q, msecs_to_jiffies(val)); 552 553 return count; 554 } 555 556 static ssize_t queue_wc_show(struct gendisk *disk, char *page) 557 { 558 if (blk_queue_write_cache(disk->queue)) 559 return sysfs_emit(page, "write back\n"); 560 return sysfs_emit(page, "write through\n"); 561 } 562 563 static int queue_wc_store(struct gendisk *disk, const char *page, 564 size_t count, struct queue_limits *lim) 565 { 566 bool disable; 567 568 if (!strncmp(page, "write back", 10)) { 569 disable = false; 570 } else if (!strncmp(page, "write through", 13) || 571 !strncmp(page, "none", 4)) { 572 disable = true; 573 } else { 574 return -EINVAL; 575 } 576 577 if (disable) 578 lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED; 579 else 580 lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED; 581 return 0; 582 } 583 584 #define QUEUE_RO_ENTRY(_prefix, _name) \ 585 static const struct queue_sysfs_entry _prefix##_entry = { \ 586 .attr = { .name = _name, .mode = 0444 }, \ 587 .show = _prefix##_show, \ 588 }; 589 590 #define QUEUE_RW_ENTRY(_prefix, _name) \ 591 static const struct queue_sysfs_entry _prefix##_entry = { \ 592 .attr = { .name = _name, .mode = 0644 }, \ 593 .show = _prefix##_show, \ 594 .store = _prefix##_store, \ 595 }; 596 597 #define QUEUE_LIM_RO_ENTRY(_prefix, _name) \ 598 static const struct queue_sysfs_entry _prefix##_entry = { \ 599 .attr = { .name = _name, .mode = 0444 }, \ 600 .show_limit = _prefix##_show, \ 601 } 602 603 #define QUEUE_LIM_RW_ENTRY(_prefix, _name) \ 604 static const struct queue_sysfs_entry _prefix##_entry = { \ 605 .attr = { .name = _name, .mode = 0644 }, \ 606 .show_limit = _prefix##_show, \ 607 .store_limit = _prefix##_store, \ 608 } 609 610 QUEUE_RW_ENTRY(queue_requests, "nr_requests"); 611 QUEUE_RW_ENTRY(queue_async_depth, "async_depth"); 612 QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb"); 613 QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb"); 614 QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb"); 615 QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments"); 616 QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments"); 617 QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size"); 618 QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams"); 619 QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity"); 620 QUEUE_RW_ENTRY(elv_iosched, "scheduler"); 621 622 QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size"); 623 QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size"); 624 QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors"); 625 QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size"); 626 QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size"); 627 628 QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments"); 629 QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity"); 630 QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes"); 631 QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes"); 632 QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data"); 633 634 QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes"); 635 QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors, 636 "atomic_write_boundary_bytes"); 637 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes"); 638 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes"); 639 640 QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes"); 641 QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes"); 642 QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors, 643 "write_zeroes_unmap_max_hw_bytes"); 644 QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors, 645 "write_zeroes_unmap_max_bytes"); 646 QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes"); 647 QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity"); 648 649 QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned"); 650 QUEUE_RW_ENTRY(queue_zoned_qd1_writes, "zoned_qd1_writes"); 651 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); 652 QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones"); 653 QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones"); 654 655 QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); 656 QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); 657 QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); 658 QUEUE_RW_ENTRY(queue_poll, "io_poll"); 659 QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); 660 QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache"); 661 QUEUE_LIM_RO_ENTRY(queue_fua, "fua"); 662 QUEUE_LIM_RO_ENTRY(queue_dax, "dax"); 663 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); 664 QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); 665 QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment"); 666 667 /* legacy alias for logical_block_size: */ 668 static const struct queue_sysfs_entry queue_hw_sector_size_entry = { 669 .attr = {.name = "hw_sector_size", .mode = 0444 }, 670 .show_limit = queue_logical_block_size_show, 671 }; 672 673 QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational"); 674 QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats"); 675 QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random"); 676 QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes"); 677 678 #ifdef CONFIG_BLK_WBT 679 static ssize_t queue_var_store64(s64 *var, const char *page) 680 { 681 int err; 682 s64 v; 683 684 err = kstrtos64(page, 10, &v); 685 if (err < 0) 686 return err; 687 688 *var = v; 689 return 0; 690 } 691 692 static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) 693 { 694 ssize_t ret; 695 struct request_queue *q = disk->queue; 696 697 mutex_lock(&disk->rqos_state_mutex); 698 if (!wbt_rq_qos(q)) { 699 ret = -EINVAL; 700 goto out; 701 } 702 703 if (wbt_disabled(q)) { 704 ret = sysfs_emit(page, "0\n"); 705 goto out; 706 } 707 708 ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); 709 out: 710 mutex_unlock(&disk->rqos_state_mutex); 711 return ret; 712 } 713 714 static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, 715 size_t count) 716 { 717 ssize_t ret; 718 s64 val; 719 720 ret = queue_var_store64(&val, page); 721 if (ret < 0) 722 return ret; 723 if (val < -1) 724 return -EINVAL; 725 726 ret = wbt_set_lat(disk, val); 727 return ret ? ret : count; 728 } 729 730 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); 731 #endif 732 733 /* Common attributes for bio-based and request-based queues. */ 734 static const struct attribute *const queue_attrs[] = { 735 /* 736 * Attributes which are protected with q->limits_lock. 737 */ 738 &queue_max_hw_sectors_entry.attr, 739 &queue_max_sectors_entry.attr, 740 &queue_max_segments_entry.attr, 741 &queue_max_discard_segments_entry.attr, 742 &queue_max_integrity_segments_entry.attr, 743 &queue_max_segment_size_entry.attr, 744 &queue_max_write_streams_entry.attr, 745 &queue_write_stream_granularity_entry.attr, 746 &queue_hw_sector_size_entry.attr, 747 &queue_logical_block_size_entry.attr, 748 &queue_physical_block_size_entry.attr, 749 &queue_chunk_sectors_entry.attr, 750 &queue_io_min_entry.attr, 751 &queue_io_opt_entry.attr, 752 &queue_discard_granularity_entry.attr, 753 &queue_max_discard_sectors_entry.attr, 754 &queue_max_hw_discard_sectors_entry.attr, 755 &queue_atomic_write_max_sectors_entry.attr, 756 &queue_atomic_write_boundary_sectors_entry.attr, 757 &queue_atomic_write_unit_min_entry.attr, 758 &queue_atomic_write_unit_max_entry.attr, 759 &queue_max_write_zeroes_sectors_entry.attr, 760 &queue_max_hw_wzeroes_unmap_sectors_entry.attr, 761 &queue_max_wzeroes_unmap_sectors_entry.attr, 762 &queue_max_zone_append_sectors_entry.attr, 763 &queue_zone_write_granularity_entry.attr, 764 &queue_rotational_entry.attr, 765 &queue_zoned_entry.attr, 766 &queue_max_open_zones_entry.attr, 767 &queue_max_active_zones_entry.attr, 768 &queue_iostats_passthrough_entry.attr, 769 &queue_iostats_entry.attr, 770 &queue_stable_writes_entry.attr, 771 &queue_add_random_entry.attr, 772 &queue_wc_entry.attr, 773 &queue_fua_entry.attr, 774 &queue_dax_entry.attr, 775 &queue_virt_boundary_mask_entry.attr, 776 &queue_dma_alignment_entry.attr, 777 &queue_ra_entry.attr, 778 779 /* 780 * Attributes which don't require locking. 781 */ 782 &queue_discard_zeroes_data_entry.attr, 783 &queue_write_same_max_entry.attr, 784 &queue_nr_zones_entry.attr, 785 &queue_nomerges_entry.attr, 786 &queue_poll_entry.attr, 787 &queue_poll_delay_entry.attr, 788 &queue_zoned_qd1_writes_entry.attr, 789 790 NULL, 791 }; 792 793 /* Request-based queue attributes that are not relevant for bio-based queues. */ 794 static const struct attribute *const blk_mq_queue_attrs[] = { 795 /* 796 * Attributes which require some form of locking other than 797 * q->sysfs_lock. 798 */ 799 &elv_iosched_entry.attr, 800 &queue_requests_entry.attr, 801 &queue_async_depth_entry.attr, 802 #ifdef CONFIG_BLK_WBT 803 &queue_wb_lat_entry.attr, 804 #endif 805 /* 806 * Attributes which don't require locking. 807 */ 808 &queue_rq_affinity_entry.attr, 809 &queue_io_timeout_entry.attr, 810 811 NULL, 812 }; 813 814 static umode_t queue_attr_visible(struct kobject *kobj, const struct attribute *attr, 815 int n) 816 { 817 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 818 struct request_queue *q = disk->queue; 819 820 if ((attr == &queue_max_open_zones_entry.attr || 821 attr == &queue_max_active_zones_entry.attr || 822 attr == &queue_zoned_qd1_writes_entry.attr) && 823 !blk_queue_is_zoned(q)) 824 return 0; 825 826 return attr->mode; 827 } 828 829 static umode_t blk_mq_queue_attr_visible(struct kobject *kobj, 830 const struct attribute *attr, int n) 831 { 832 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 833 struct request_queue *q = disk->queue; 834 835 if (!queue_is_mq(q)) 836 return 0; 837 838 if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout) 839 return 0; 840 841 return attr->mode; 842 } 843 844 static const struct attribute_group queue_attr_group = { 845 .attrs_const = queue_attrs, 846 .is_visible_const = queue_attr_visible, 847 }; 848 849 static const struct attribute_group blk_mq_queue_attr_group = { 850 .attrs_const = blk_mq_queue_attrs, 851 .is_visible_const = blk_mq_queue_attr_visible, 852 }; 853 854 #define to_queue(atr) container_of_const((atr), struct queue_sysfs_entry, attr) 855 856 static ssize_t 857 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 858 { 859 struct queue_sysfs_entry *entry = to_queue(attr); 860 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 861 862 if (!entry->show && !entry->show_limit) 863 return -EIO; 864 865 if (entry->show_limit) { 866 ssize_t res; 867 868 mutex_lock(&disk->queue->limits_lock); 869 res = entry->show_limit(disk, page); 870 mutex_unlock(&disk->queue->limits_lock); 871 return res; 872 } 873 874 return entry->show(disk, page); 875 } 876 877 static ssize_t 878 queue_attr_store(struct kobject *kobj, struct attribute *attr, 879 const char *page, size_t length) 880 { 881 struct queue_sysfs_entry *entry = to_queue(attr); 882 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 883 struct request_queue *q = disk->queue; 884 885 if (!entry->store_limit && !entry->store) 886 return -EIO; 887 888 if (entry->store_limit) { 889 ssize_t res; 890 891 struct queue_limits lim = queue_limits_start_update(q); 892 893 res = entry->store_limit(disk, page, length, &lim); 894 if (res < 0) { 895 queue_limits_cancel_update(q); 896 return res; 897 } 898 899 res = queue_limits_commit_update_frozen(q, &lim); 900 if (res) 901 return res; 902 return length; 903 } 904 905 return entry->store(disk, page, length); 906 } 907 908 static const struct sysfs_ops queue_sysfs_ops = { 909 .show = queue_attr_show, 910 .store = queue_attr_store, 911 }; 912 913 static const struct attribute_group *blk_queue_attr_groups[] = { 914 &queue_attr_group, 915 &blk_mq_queue_attr_group, 916 NULL 917 }; 918 919 static void blk_queue_release(struct kobject *kobj) 920 { 921 /* nothing to do here, all data is associated with the parent gendisk */ 922 } 923 924 const struct kobj_type blk_queue_ktype = { 925 .default_groups = blk_queue_attr_groups, 926 .sysfs_ops = &queue_sysfs_ops, 927 .release = blk_queue_release, 928 }; 929 930 static void blk_debugfs_remove(struct gendisk *disk) 931 { 932 struct request_queue *q = disk->queue; 933 934 blk_debugfs_lock_nomemsave(q); 935 blk_trace_shutdown(q); 936 debugfs_remove_recursive(q->debugfs_dir); 937 q->debugfs_dir = NULL; 938 q->sched_debugfs_dir = NULL; 939 q->rqos_debugfs_dir = NULL; 940 blk_debugfs_unlock_nomemrestore(q); 941 } 942 943 /** 944 * blk_register_queue - register a block layer queue with sysfs 945 * @disk: Disk of which the request queue should be registered with sysfs. 946 */ 947 int blk_register_queue(struct gendisk *disk) 948 { 949 struct request_queue *q = disk->queue; 950 unsigned int memflags; 951 int ret; 952 953 ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); 954 if (ret < 0) 955 return ret; 956 957 if (queue_is_mq(q)) { 958 ret = blk_mq_sysfs_register(disk); 959 if (ret) 960 goto out_del_queue_kobj; 961 } 962 mutex_lock(&q->sysfs_lock); 963 964 memflags = blk_debugfs_lock(q); 965 q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); 966 if (queue_is_mq(q)) 967 blk_mq_debugfs_register(q); 968 blk_debugfs_unlock(q, memflags); 969 970 /* 971 * For blk-mq rotational zoned devices, default to using QD=1 972 * writes. For non-mq rotational zoned devices, the device driver can 973 * set an appropriate default. 974 */ 975 if (queue_is_mq(q) && blk_queue_rot(q) && blk_queue_is_zoned(q)) 976 blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q); 977 978 ret = disk_register_independent_access_ranges(disk); 979 if (ret) 980 goto out_debugfs_remove; 981 982 ret = blk_crypto_sysfs_register(disk); 983 if (ret) 984 goto out_unregister_ia_ranges; 985 986 if (queue_is_mq(q)) 987 elevator_set_default(q); 988 989 blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); 990 wbt_init_enable_default(disk); 991 992 /* Now everything is ready and send out KOBJ_ADD uevent */ 993 kobject_uevent(&disk->queue_kobj, KOBJ_ADD); 994 if (q->elevator) 995 kobject_uevent(&q->elevator->kobj, KOBJ_ADD); 996 mutex_unlock(&q->sysfs_lock); 997 998 /* 999 * SCSI probing may synchronously create and destroy a lot of 1000 * request_queues for non-existent devices. Shutting down a fully 1001 * functional queue takes measureable wallclock time as RCU grace 1002 * periods are involved. To avoid excessive latency in these 1003 * cases, a request_queue starts out in a degraded mode which is 1004 * faster to shut down and is made fully functional here as 1005 * request_queues for non-existent devices never get registered. 1006 */ 1007 blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); 1008 percpu_ref_switch_to_percpu(&q->q_usage_counter); 1009 1010 return ret; 1011 1012 out_unregister_ia_ranges: 1013 disk_unregister_independent_access_ranges(disk); 1014 out_debugfs_remove: 1015 blk_debugfs_remove(disk); 1016 mutex_unlock(&q->sysfs_lock); 1017 if (queue_is_mq(q)) 1018 blk_mq_sysfs_unregister(disk); 1019 out_del_queue_kobj: 1020 kobject_del(&disk->queue_kobj); 1021 return ret; 1022 } 1023 1024 /** 1025 * blk_unregister_queue - counterpart of blk_register_queue() 1026 * @disk: Disk of which the request queue should be unregistered from sysfs. 1027 * 1028 * Note: the caller is responsible for guaranteeing that this function is called 1029 * after blk_register_queue() has finished. 1030 */ 1031 void blk_unregister_queue(struct gendisk *disk) 1032 { 1033 struct request_queue *q = disk->queue; 1034 1035 if (WARN_ON(!q)) 1036 return; 1037 1038 /* Return early if disk->queue was never registered. */ 1039 if (!blk_queue_registered(q)) 1040 return; 1041 1042 /* 1043 * Since sysfs_remove_dir() prevents adding new directory entries 1044 * before removal of existing entries starts, protect against 1045 * concurrent elv_iosched_store() calls. 1046 */ 1047 mutex_lock(&q->sysfs_lock); 1048 blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); 1049 mutex_unlock(&q->sysfs_lock); 1050 1051 /* 1052 * Remove the sysfs attributes before unregistering the queue data 1053 * structures that can be modified through sysfs. 1054 */ 1055 if (queue_is_mq(q)) 1056 blk_mq_sysfs_unregister(disk); 1057 blk_crypto_sysfs_unregister(disk); 1058 1059 mutex_lock(&q->sysfs_lock); 1060 disk_unregister_independent_access_ranges(disk); 1061 mutex_unlock(&q->sysfs_lock); 1062 1063 /* Now that we've deleted all child objects, we can delete the queue. */ 1064 kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); 1065 kobject_del(&disk->queue_kobj); 1066 1067 if (queue_is_mq(q)) 1068 elevator_set_none(q); 1069 1070 blk_debugfs_remove(disk); 1071 } 1072