1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to sysfs handling 4 */ 5 #include <linux/kernel.h> 6 #include <linux/slab.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/backing-dev.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/debugfs.h> 13 14 #include "blk.h" 15 #include "blk-mq.h" 16 #include "blk-mq-debugfs.h" 17 #include "blk-mq-sched.h" 18 #include "blk-rq-qos.h" 19 #include "blk-wbt.h" 20 #include "blk-cgroup.h" 21 #include "blk-throttle.h" 22 #include "error-injection.h" 23 24 struct queue_sysfs_entry { 25 struct attribute attr; 26 ssize_t (*show)(struct gendisk *disk, char *page); 27 ssize_t (*show_limit)(struct gendisk *disk, char *page); 28 29 ssize_t (*store)(struct gendisk *disk, const char *page, size_t count); 30 int (*store_limit)(struct gendisk *disk, const char *page, 31 size_t count, struct queue_limits *lim); 32 }; 33 34 static ssize_t 35 queue_var_show(unsigned long var, char *page) 36 { 37 return sysfs_emit(page, "%lu\n", var); 38 } 39 40 static ssize_t 41 queue_var_store(unsigned long *var, const char *page, size_t count) 42 { 43 int err; 44 unsigned long v; 45 46 err = kstrtoul(page, 10, &v); 47 if (err || v > UINT_MAX) 48 return -EINVAL; 49 50 *var = v; 51 52 return count; 53 } 54 55 static ssize_t queue_requests_show(struct gendisk *disk, char *page) 56 { 57 ssize_t ret; 58 59 mutex_lock(&disk->queue->elevator_lock); 60 ret = queue_var_show(disk->queue->nr_requests, page); 61 mutex_unlock(&disk->queue->elevator_lock); 62 return ret; 63 } 64 65 static ssize_t 66 queue_requests_store(struct gendisk *disk, const char *page, size_t count) 67 { 68 struct request_queue *q = disk->queue; 69 struct blk_mq_tag_set *set = q->tag_set; 70 struct elevator_tags *et = NULL; 71 unsigned int memflags; 72 unsigned long nr; 73 int ret; 74 75 ret = queue_var_store(&nr, page, count); 76 if (ret < 0) 77 return ret; 78 79 /* 80 * Serialize updating nr_requests with concurrent queue_requests_store() 81 * and switching elevator. 82 * 83 * Use trylock to avoid circular lock dependency with kernfs active 84 * reference during concurrent disk deletion: 85 * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) 86 * kn->active -> update_nr_hwq_lock (via this sysfs write path) 87 */ 88 if (!down_write_trylock(&set->update_nr_hwq_lock)) 89 return -EBUSY; 90 91 if (nr == q->nr_requests) 92 goto unlock; 93 94 if (nr < BLKDEV_MIN_RQ) 95 nr = BLKDEV_MIN_RQ; 96 97 /* 98 * Switching elevator is protected by update_nr_hwq_lock: 99 * - read lock is held from elevator sysfs attribute; 100 * - write lock is held from updating nr_hw_queues; 101 * Hence it's safe to access q->elevator here with write lock held. 102 */ 103 if (nr <= set->reserved_tags || 104 (q->elevator && nr > MAX_SCHED_RQ) || 105 (!q->elevator && nr > set->queue_depth)) { 106 ret = -EINVAL; 107 goto unlock; 108 } 109 110 if (!blk_mq_is_shared_tags(set->flags) && q->elevator && 111 nr > q->elevator->et->nr_requests) { 112 /* 113 * Tags will grow, allocate memory before freezing queue to 114 * prevent deadlock. 115 */ 116 et = blk_mq_alloc_sched_tags(set, q->nr_hw_queues, nr); 117 if (!et) { 118 ret = -ENOMEM; 119 goto unlock; 120 } 121 } 122 123 memflags = blk_mq_freeze_queue(q); 124 mutex_lock(&q->elevator_lock); 125 et = blk_mq_update_nr_requests(q, et, nr); 126 mutex_unlock(&q->elevator_lock); 127 blk_mq_unfreeze_queue(q, memflags); 128 129 if (et) 130 blk_mq_free_sched_tags(et, set); 131 132 unlock: 133 up_write(&set->update_nr_hwq_lock); 134 return ret; 135 } 136 137 static ssize_t queue_async_depth_show(struct gendisk *disk, char *page) 138 { 139 guard(mutex)(&disk->queue->elevator_lock); 140 141 return queue_var_show(disk->queue->async_depth, page); 142 } 143 144 static ssize_t 145 queue_async_depth_store(struct gendisk *disk, const char *page, size_t count) 146 { 147 struct request_queue *q = disk->queue; 148 unsigned int memflags; 149 unsigned long nr; 150 int ret; 151 152 if (!queue_is_mq(q)) 153 return -EINVAL; 154 155 ret = queue_var_store(&nr, page, count); 156 if (ret < 0) 157 return ret; 158 159 if (nr == 0) 160 return -EINVAL; 161 162 memflags = blk_mq_freeze_queue(q); 163 scoped_guard(mutex, &q->elevator_lock) { 164 if (q->elevator) { 165 q->async_depth = min(q->nr_requests, nr); 166 if (q->elevator->type->ops.depth_updated) 167 q->elevator->type->ops.depth_updated(q); 168 } else { 169 ret = -EINVAL; 170 } 171 } 172 blk_mq_unfreeze_queue(q, memflags); 173 174 return ret; 175 } 176 177 static ssize_t queue_ra_show(struct gendisk *disk, char *page) 178 { 179 ssize_t ret; 180 181 mutex_lock(&disk->queue->limits_lock); 182 ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page); 183 mutex_unlock(&disk->queue->limits_lock); 184 185 return ret; 186 } 187 188 static ssize_t 189 queue_ra_store(struct gendisk *disk, const char *page, size_t count) 190 { 191 unsigned long ra_kb; 192 ssize_t ret; 193 struct request_queue *q = disk->queue; 194 195 ret = queue_var_store(&ra_kb, page, count); 196 if (ret < 0) 197 return ret; 198 /* 199 * The ->ra_pages change below is protected by ->limits_lock because it 200 * is usually calculated from the queue limits by 201 * queue_limits_commit_update(). 202 * 203 * bdi->ra_pages reads are not serialized against bdi->ra_pages writes. 204 * Use WRITE_ONCE() to write bdi->ra_pages once. 205 */ 206 mutex_lock(&q->limits_lock); 207 WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10)); 208 mutex_unlock(&q->limits_lock); 209 210 return ret; 211 } 212 213 #define QUEUE_SYSFS_LIMIT_SHOW(_field) \ 214 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 215 { \ 216 return queue_var_show(disk->queue->limits._field, page); \ 217 } 218 219 QUEUE_SYSFS_LIMIT_SHOW(max_segments) 220 QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments) 221 QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments) 222 QUEUE_SYSFS_LIMIT_SHOW(max_segment_size) 223 QUEUE_SYSFS_LIMIT_SHOW(max_write_streams) 224 QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity) 225 QUEUE_SYSFS_LIMIT_SHOW(logical_block_size) 226 QUEUE_SYSFS_LIMIT_SHOW(physical_block_size) 227 QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors) 228 QUEUE_SYSFS_LIMIT_SHOW(io_min) 229 QUEUE_SYSFS_LIMIT_SHOW(io_opt) 230 QUEUE_SYSFS_LIMIT_SHOW(discard_granularity) 231 QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity) 232 QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask) 233 QUEUE_SYSFS_LIMIT_SHOW(dma_alignment) 234 QUEUE_SYSFS_LIMIT_SHOW(max_open_zones) 235 QUEUE_SYSFS_LIMIT_SHOW(max_active_zones) 236 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min) 237 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max) 238 239 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \ 240 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 241 { \ 242 return sysfs_emit(page, "%llu\n", \ 243 (unsigned long long)disk->queue->limits._field << \ 244 SECTOR_SHIFT); \ 245 } 246 247 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors) 248 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors) 249 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors) 250 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors) 251 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors) 252 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors) 253 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors) 254 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors) 255 256 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \ 257 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 258 { \ 259 return queue_var_show(disk->queue->limits._field >> 1, page); \ 260 } 261 262 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors) 263 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors) 264 265 #define QUEUE_SYSFS_SHOW_CONST(_name, _val) \ 266 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 267 { \ 268 return sysfs_emit(page, "%d\n", _val); \ 269 } 270 271 /* deprecated fields */ 272 QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0) 273 QUEUE_SYSFS_SHOW_CONST(write_same_max, 0) 274 QUEUE_SYSFS_SHOW_CONST(poll_delay, -1) 275 276 static int queue_max_discard_sectors_store(struct gendisk *disk, 277 const char *page, size_t count, struct queue_limits *lim) 278 { 279 unsigned long max_discard_bytes; 280 ssize_t ret; 281 282 ret = queue_var_store(&max_discard_bytes, page, count); 283 if (ret < 0) 284 return ret; 285 286 if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1)) 287 return -EINVAL; 288 289 if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX) 290 return -EINVAL; 291 292 lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT; 293 return 0; 294 } 295 296 static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk, 297 const char *page, size_t count, struct queue_limits *lim) 298 { 299 unsigned long max_zeroes_bytes, max_hw_zeroes_bytes; 300 ssize_t ret; 301 302 ret = queue_var_store(&max_zeroes_bytes, page, count); 303 if (ret < 0) 304 return ret; 305 306 max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT; 307 if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes) 308 return -EINVAL; 309 310 lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT; 311 return 0; 312 } 313 314 static int 315 queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count, 316 struct queue_limits *lim) 317 { 318 unsigned long max_sectors_kb; 319 ssize_t ret; 320 321 ret = queue_var_store(&max_sectors_kb, page, count); 322 if (ret < 0) 323 return ret; 324 325 lim->max_user_sectors = max_sectors_kb << 1; 326 return 0; 327 } 328 329 static ssize_t queue_feature_store(struct gendisk *disk, const char *page, 330 size_t count, struct queue_limits *lim, blk_features_t feature) 331 { 332 unsigned long val; 333 ssize_t ret; 334 335 ret = queue_var_store(&val, page, count); 336 if (ret < 0) 337 return ret; 338 339 if (val) 340 lim->features |= feature; 341 else 342 lim->features &= ~feature; 343 return 0; 344 } 345 346 #define QUEUE_SYSFS_FEATURE(_name, _feature) \ 347 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 348 { \ 349 return sysfs_emit(page, "%u\n", \ 350 !!(disk->queue->limits.features & _feature)); \ 351 } \ 352 static int queue_##_name##_store(struct gendisk *disk, \ 353 const char *page, size_t count, struct queue_limits *lim) \ 354 { \ 355 return queue_feature_store(disk, page, count, lim, _feature); \ 356 } 357 358 QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) 359 QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) 360 QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) 361 QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES); 362 363 #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \ 364 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 365 { \ 366 return sysfs_emit(page, "%u\n", \ 367 !!(disk->queue->limits.features & _feature)); \ 368 } 369 370 QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA); 371 QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX); 372 373 static ssize_t queue_poll_show(struct gendisk *disk, char *page) 374 { 375 if (queue_is_mq(disk->queue)) 376 return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); 377 378 return sysfs_emit(page, "%u\n", 379 !!(disk->queue->limits.features & BLK_FEAT_POLL)); 380 } 381 382 static ssize_t queue_zoned_show(struct gendisk *disk, char *page) 383 { 384 if (blk_queue_is_zoned(disk->queue)) 385 return sysfs_emit(page, "host-managed\n"); 386 return sysfs_emit(page, "none\n"); 387 } 388 389 static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) 390 { 391 return queue_var_show(disk_nr_zones(disk), page); 392 } 393 394 static ssize_t queue_zoned_qd1_writes_show(struct gendisk *disk, char *page) 395 { 396 return queue_var_show(!!blk_queue_zoned_qd1_writes(disk->queue), 397 page); 398 } 399 400 static ssize_t queue_zoned_qd1_writes_store(struct gendisk *disk, 401 const char *page, size_t count) 402 { 403 struct request_queue *q = disk->queue; 404 unsigned long qd1_writes; 405 unsigned int memflags; 406 ssize_t ret; 407 408 ret = queue_var_store(&qd1_writes, page, count); 409 if (ret < 0) 410 return ret; 411 412 memflags = blk_mq_freeze_queue(q); 413 blk_mq_quiesce_queue(q); 414 if (qd1_writes) 415 blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q); 416 else 417 blk_queue_flag_clear(QUEUE_FLAG_ZONED_QD1_WRITES, q); 418 blk_mq_unquiesce_queue(q); 419 blk_mq_unfreeze_queue(q, memflags); 420 421 return count; 422 } 423 424 static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) 425 { 426 return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page); 427 } 428 429 static int queue_iostats_passthrough_store(struct gendisk *disk, 430 const char *page, size_t count, struct queue_limits *lim) 431 { 432 unsigned long ios; 433 ssize_t ret; 434 435 ret = queue_var_store(&ios, page, count); 436 if (ret < 0) 437 return ret; 438 439 if (ios) 440 lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; 441 else 442 lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; 443 return 0; 444 } 445 446 static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) 447 { 448 return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | 449 blk_queue_noxmerges(disk->queue), page); 450 } 451 452 static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page, 453 size_t count) 454 { 455 unsigned long nm; 456 struct request_queue *q = disk->queue; 457 ssize_t ret = queue_var_store(&nm, page, count); 458 459 if (ret < 0) 460 return ret; 461 462 blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 463 blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); 464 if (nm == 2) 465 blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); 466 else if (nm) 467 blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); 468 469 return ret; 470 } 471 472 static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page) 473 { 474 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags); 475 bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags); 476 477 return queue_var_show(set << force, page); 478 } 479 480 static ssize_t 481 queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count) 482 { 483 ssize_t ret = -EINVAL; 484 #ifdef CONFIG_SMP 485 struct request_queue *q = disk->queue; 486 unsigned long val; 487 488 ret = queue_var_store(&val, page, count); 489 if (ret < 0) 490 return ret; 491 492 /* 493 * Here we update two queue flags each using atomic bitops, although 494 * updating two flags isn't atomic it should be harmless as those flags 495 * are accessed individually using atomic test_bit operation. So we 496 * don't grab any lock while updating these flags. 497 */ 498 if (val == 2) { 499 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 500 blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); 501 } else if (val == 1) { 502 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 503 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 504 } else if (val == 0) { 505 blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 506 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 507 } 508 #endif 509 return ret; 510 } 511 512 static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page, 513 size_t count) 514 { 515 return count; 516 } 517 518 static ssize_t queue_poll_store(struct gendisk *disk, const char *page, 519 size_t count) 520 { 521 ssize_t ret = count; 522 struct request_queue *q = disk->queue; 523 524 if (!(q->limits.features & BLK_FEAT_POLL)) { 525 ret = -EINVAL; 526 goto out; 527 } 528 529 pr_info_ratelimited("writes to the poll attribute are ignored.\n"); 530 pr_info_ratelimited("please use driver specific parameters instead.\n"); 531 out: 532 return ret; 533 } 534 535 static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page) 536 { 537 return sysfs_emit(page, "%u\n", 538 jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout))); 539 } 540 541 static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, 542 size_t count) 543 { 544 unsigned int val; 545 int err; 546 struct request_queue *q = disk->queue; 547 548 err = kstrtou32(page, 10, &val); 549 if (err || val == 0) 550 return -EINVAL; 551 552 blk_queue_rq_timeout(q, msecs_to_jiffies(val)); 553 554 return count; 555 } 556 557 static ssize_t queue_wc_show(struct gendisk *disk, char *page) 558 { 559 if (blk_queue_write_cache(disk->queue)) 560 return sysfs_emit(page, "write back\n"); 561 return sysfs_emit(page, "write through\n"); 562 } 563 564 static int queue_wc_store(struct gendisk *disk, const char *page, 565 size_t count, struct queue_limits *lim) 566 { 567 bool disable; 568 569 if (!strncmp(page, "write back", 10)) { 570 disable = false; 571 } else if (!strncmp(page, "write through", 13) || 572 !strncmp(page, "none", 4)) { 573 disable = true; 574 } else { 575 return -EINVAL; 576 } 577 578 if (disable) 579 lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED; 580 else 581 lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED; 582 return 0; 583 } 584 585 #define QUEUE_RO_ENTRY(_prefix, _name) \ 586 static const struct queue_sysfs_entry _prefix##_entry = { \ 587 .attr = { .name = _name, .mode = 0444 }, \ 588 .show = _prefix##_show, \ 589 }; 590 591 #define QUEUE_RW_ENTRY(_prefix, _name) \ 592 static const struct queue_sysfs_entry _prefix##_entry = { \ 593 .attr = { .name = _name, .mode = 0644 }, \ 594 .show = _prefix##_show, \ 595 .store = _prefix##_store, \ 596 }; 597 598 #define QUEUE_LIM_RO_ENTRY(_prefix, _name) \ 599 static const struct queue_sysfs_entry _prefix##_entry = { \ 600 .attr = { .name = _name, .mode = 0444 }, \ 601 .show_limit = _prefix##_show, \ 602 } 603 604 #define QUEUE_LIM_RW_ENTRY(_prefix, _name) \ 605 static const struct queue_sysfs_entry _prefix##_entry = { \ 606 .attr = { .name = _name, .mode = 0644 }, \ 607 .show_limit = _prefix##_show, \ 608 .store_limit = _prefix##_store, \ 609 } 610 611 QUEUE_RW_ENTRY(queue_requests, "nr_requests"); 612 QUEUE_RW_ENTRY(queue_async_depth, "async_depth"); 613 QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb"); 614 QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb"); 615 QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb"); 616 QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments"); 617 QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments"); 618 QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size"); 619 QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams"); 620 QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity"); 621 QUEUE_RW_ENTRY(elv_iosched, "scheduler"); 622 623 QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size"); 624 QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size"); 625 QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors"); 626 QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size"); 627 QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size"); 628 629 QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments"); 630 QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity"); 631 QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes"); 632 QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes"); 633 QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data"); 634 635 QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes"); 636 QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors, 637 "atomic_write_boundary_bytes"); 638 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes"); 639 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes"); 640 641 QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes"); 642 QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes"); 643 QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors, 644 "write_zeroes_unmap_max_hw_bytes"); 645 QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors, 646 "write_zeroes_unmap_max_bytes"); 647 QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes"); 648 QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity"); 649 650 QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned"); 651 QUEUE_RW_ENTRY(queue_zoned_qd1_writes, "zoned_qd1_writes"); 652 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); 653 QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones"); 654 QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones"); 655 656 QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); 657 QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); 658 QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); 659 QUEUE_RW_ENTRY(queue_poll, "io_poll"); 660 QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); 661 QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache"); 662 QUEUE_LIM_RO_ENTRY(queue_fua, "fua"); 663 QUEUE_LIM_RO_ENTRY(queue_dax, "dax"); 664 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); 665 QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); 666 QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment"); 667 668 /* legacy alias for logical_block_size: */ 669 static const struct queue_sysfs_entry queue_hw_sector_size_entry = { 670 .attr = {.name = "hw_sector_size", .mode = 0444 }, 671 .show_limit = queue_logical_block_size_show, 672 }; 673 674 QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational"); 675 QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats"); 676 QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random"); 677 QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes"); 678 679 #ifdef CONFIG_BLK_WBT 680 static ssize_t queue_var_store64(s64 *var, const char *page) 681 { 682 int err; 683 s64 v; 684 685 err = kstrtos64(page, 10, &v); 686 if (err < 0) 687 return err; 688 689 *var = v; 690 return 0; 691 } 692 693 static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) 694 { 695 ssize_t ret; 696 struct request_queue *q = disk->queue; 697 698 mutex_lock(&disk->rqos_state_mutex); 699 if (!wbt_rq_qos(q)) { 700 ret = -EINVAL; 701 goto out; 702 } 703 704 if (wbt_disabled(q)) { 705 ret = sysfs_emit(page, "0\n"); 706 goto out; 707 } 708 709 ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); 710 out: 711 mutex_unlock(&disk->rqos_state_mutex); 712 return ret; 713 } 714 715 static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, 716 size_t count) 717 { 718 ssize_t ret; 719 s64 val; 720 721 ret = queue_var_store64(&val, page); 722 if (ret < 0) 723 return ret; 724 if (val < -1) 725 return -EINVAL; 726 727 ret = wbt_set_lat(disk, val); 728 return ret ? ret : count; 729 } 730 731 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); 732 #endif 733 734 /* Common attributes for bio-based and request-based queues. */ 735 static const struct attribute *const queue_attrs[] = { 736 /* 737 * Attributes which are protected with q->limits_lock. 738 */ 739 &queue_max_hw_sectors_entry.attr, 740 &queue_max_sectors_entry.attr, 741 &queue_max_segments_entry.attr, 742 &queue_max_discard_segments_entry.attr, 743 &queue_max_integrity_segments_entry.attr, 744 &queue_max_segment_size_entry.attr, 745 &queue_max_write_streams_entry.attr, 746 &queue_write_stream_granularity_entry.attr, 747 &queue_hw_sector_size_entry.attr, 748 &queue_logical_block_size_entry.attr, 749 &queue_physical_block_size_entry.attr, 750 &queue_chunk_sectors_entry.attr, 751 &queue_io_min_entry.attr, 752 &queue_io_opt_entry.attr, 753 &queue_discard_granularity_entry.attr, 754 &queue_max_discard_sectors_entry.attr, 755 &queue_max_hw_discard_sectors_entry.attr, 756 &queue_atomic_write_max_sectors_entry.attr, 757 &queue_atomic_write_boundary_sectors_entry.attr, 758 &queue_atomic_write_unit_min_entry.attr, 759 &queue_atomic_write_unit_max_entry.attr, 760 &queue_max_write_zeroes_sectors_entry.attr, 761 &queue_max_hw_wzeroes_unmap_sectors_entry.attr, 762 &queue_max_wzeroes_unmap_sectors_entry.attr, 763 &queue_max_zone_append_sectors_entry.attr, 764 &queue_zone_write_granularity_entry.attr, 765 &queue_rotational_entry.attr, 766 &queue_zoned_entry.attr, 767 &queue_max_open_zones_entry.attr, 768 &queue_max_active_zones_entry.attr, 769 &queue_iostats_passthrough_entry.attr, 770 &queue_iostats_entry.attr, 771 &queue_stable_writes_entry.attr, 772 &queue_add_random_entry.attr, 773 &queue_wc_entry.attr, 774 &queue_fua_entry.attr, 775 &queue_dax_entry.attr, 776 &queue_virt_boundary_mask_entry.attr, 777 &queue_dma_alignment_entry.attr, 778 &queue_ra_entry.attr, 779 780 /* 781 * Attributes which don't require locking. 782 */ 783 &queue_discard_zeroes_data_entry.attr, 784 &queue_write_same_max_entry.attr, 785 &queue_nr_zones_entry.attr, 786 &queue_nomerges_entry.attr, 787 &queue_poll_entry.attr, 788 &queue_poll_delay_entry.attr, 789 &queue_zoned_qd1_writes_entry.attr, 790 791 NULL, 792 }; 793 794 /* Request-based queue attributes that are not relevant for bio-based queues. */ 795 static const struct attribute *const blk_mq_queue_attrs[] = { 796 /* 797 * Attributes which require some form of locking other than 798 * q->sysfs_lock. 799 */ 800 &elv_iosched_entry.attr, 801 &queue_requests_entry.attr, 802 &queue_async_depth_entry.attr, 803 #ifdef CONFIG_BLK_WBT 804 &queue_wb_lat_entry.attr, 805 #endif 806 /* 807 * Attributes which don't require locking. 808 */ 809 &queue_rq_affinity_entry.attr, 810 &queue_io_timeout_entry.attr, 811 812 NULL, 813 }; 814 815 static umode_t queue_attr_visible(struct kobject *kobj, const struct attribute *attr, 816 int n) 817 { 818 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 819 struct request_queue *q = disk->queue; 820 821 if ((attr == &queue_max_open_zones_entry.attr || 822 attr == &queue_max_active_zones_entry.attr || 823 attr == &queue_zoned_qd1_writes_entry.attr) && 824 !blk_queue_is_zoned(q)) 825 return 0; 826 827 return attr->mode; 828 } 829 830 static umode_t blk_mq_queue_attr_visible(struct kobject *kobj, 831 const struct attribute *attr, int n) 832 { 833 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 834 struct request_queue *q = disk->queue; 835 836 if (!queue_is_mq(q)) 837 return 0; 838 839 if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout) 840 return 0; 841 842 return attr->mode; 843 } 844 845 static const struct attribute_group queue_attr_group = { 846 .attrs_const = queue_attrs, 847 .is_visible_const = queue_attr_visible, 848 }; 849 850 static const struct attribute_group blk_mq_queue_attr_group = { 851 .attrs_const = blk_mq_queue_attrs, 852 .is_visible_const = blk_mq_queue_attr_visible, 853 }; 854 855 #define to_queue(atr) container_of_const((atr), struct queue_sysfs_entry, attr) 856 857 static ssize_t 858 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 859 { 860 struct queue_sysfs_entry *entry = to_queue(attr); 861 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 862 863 if (!entry->show && !entry->show_limit) 864 return -EIO; 865 866 if (entry->show_limit) { 867 ssize_t res; 868 869 mutex_lock(&disk->queue->limits_lock); 870 res = entry->show_limit(disk, page); 871 mutex_unlock(&disk->queue->limits_lock); 872 return res; 873 } 874 875 return entry->show(disk, page); 876 } 877 878 static ssize_t 879 queue_attr_store(struct kobject *kobj, struct attribute *attr, 880 const char *page, size_t length) 881 { 882 struct queue_sysfs_entry *entry = to_queue(attr); 883 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 884 struct request_queue *q = disk->queue; 885 886 if (!entry->store_limit && !entry->store) 887 return -EIO; 888 889 if (entry->store_limit) { 890 ssize_t res; 891 892 struct queue_limits lim = queue_limits_start_update(q); 893 894 res = entry->store_limit(disk, page, length, &lim); 895 if (res < 0) { 896 queue_limits_cancel_update(q); 897 return res; 898 } 899 900 res = queue_limits_commit_update_frozen(q, &lim); 901 if (res) 902 return res; 903 return length; 904 } 905 906 return entry->store(disk, page, length); 907 } 908 909 static const struct sysfs_ops queue_sysfs_ops = { 910 .show = queue_attr_show, 911 .store = queue_attr_store, 912 }; 913 914 static const struct attribute_group *blk_queue_attr_groups[] = { 915 &queue_attr_group, 916 &blk_mq_queue_attr_group, 917 NULL 918 }; 919 920 static void blk_queue_release(struct kobject *kobj) 921 { 922 /* nothing to do here, all data is associated with the parent gendisk */ 923 } 924 925 const struct kobj_type blk_queue_ktype = { 926 .default_groups = blk_queue_attr_groups, 927 .sysfs_ops = &queue_sysfs_ops, 928 .release = blk_queue_release, 929 }; 930 931 static void blk_debugfs_remove(struct gendisk *disk) 932 { 933 struct request_queue *q = disk->queue; 934 935 blk_debugfs_lock_nomemsave(q); 936 blk_trace_shutdown(q); 937 if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION)) 938 blk_error_injection_exit(disk); 939 debugfs_remove_recursive(q->debugfs_dir); 940 q->debugfs_dir = NULL; 941 q->sched_debugfs_dir = NULL; 942 q->rqos_debugfs_dir = NULL; 943 blk_debugfs_unlock_nomemrestore(q); 944 } 945 946 /** 947 * blk_register_queue - register a block layer queue with sysfs 948 * @disk: Disk of which the request queue should be registered with sysfs. 949 */ 950 int blk_register_queue(struct gendisk *disk) 951 { 952 struct request_queue *q = disk->queue; 953 unsigned int memflags; 954 int ret; 955 956 ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); 957 if (ret < 0) 958 return ret; 959 960 if (queue_is_mq(q)) { 961 ret = blk_mq_sysfs_register(disk); 962 if (ret) 963 goto out_del_queue_kobj; 964 } 965 mutex_lock(&q->sysfs_lock); 966 967 memflags = blk_debugfs_lock(q); 968 q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); 969 if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION)) 970 blk_error_injection_init(disk); 971 if (queue_is_mq(q)) 972 blk_mq_debugfs_register(q); 973 blk_debugfs_unlock(q, memflags); 974 975 /* 976 * For blk-mq rotational zoned devices, default to using QD=1 977 * writes. For non-mq rotational zoned devices, the device driver can 978 * set an appropriate default. 979 */ 980 if (queue_is_mq(q) && blk_queue_rot(q) && blk_queue_is_zoned(q)) 981 blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q); 982 983 ret = disk_register_independent_access_ranges(disk); 984 if (ret) 985 goto out_debugfs_remove; 986 987 ret = blk_crypto_sysfs_register(disk); 988 if (ret) 989 goto out_unregister_ia_ranges; 990 991 if (queue_is_mq(q)) 992 elevator_set_default(q); 993 994 blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); 995 wbt_init_enable_default(disk); 996 997 /* Now everything is ready and send out KOBJ_ADD uevent */ 998 kobject_uevent(&disk->queue_kobj, KOBJ_ADD); 999 if (q->elevator) 1000 kobject_uevent(&q->elevator->kobj, KOBJ_ADD); 1001 mutex_unlock(&q->sysfs_lock); 1002 1003 /* 1004 * SCSI probing may synchronously create and destroy a lot of 1005 * request_queues for non-existent devices. Shutting down a fully 1006 * functional queue takes measureable wallclock time as RCU grace 1007 * periods are involved. To avoid excessive latency in these 1008 * cases, a request_queue starts out in a degraded mode which is 1009 * faster to shut down and is made fully functional here as 1010 * request_queues for non-existent devices never get registered. 1011 */ 1012 blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); 1013 percpu_ref_switch_to_percpu(&q->q_usage_counter); 1014 1015 return ret; 1016 1017 out_unregister_ia_ranges: 1018 disk_unregister_independent_access_ranges(disk); 1019 out_debugfs_remove: 1020 blk_debugfs_remove(disk); 1021 mutex_unlock(&q->sysfs_lock); 1022 if (queue_is_mq(q)) 1023 blk_mq_sysfs_unregister(disk); 1024 out_del_queue_kobj: 1025 kobject_del(&disk->queue_kobj); 1026 return ret; 1027 } 1028 1029 /** 1030 * blk_unregister_queue - counterpart of blk_register_queue() 1031 * @disk: Disk of which the request queue should be unregistered from sysfs. 1032 * 1033 * Note: the caller is responsible for guaranteeing that this function is called 1034 * after blk_register_queue() has finished. 1035 */ 1036 void blk_unregister_queue(struct gendisk *disk) 1037 { 1038 struct request_queue *q = disk->queue; 1039 1040 if (WARN_ON(!q)) 1041 return; 1042 1043 /* Return early if disk->queue was never registered. */ 1044 if (!blk_queue_registered(q)) 1045 return; 1046 1047 /* 1048 * Since sysfs_remove_dir() prevents adding new directory entries 1049 * before removal of existing entries starts, protect against 1050 * concurrent elv_iosched_store() calls. 1051 */ 1052 mutex_lock(&q->sysfs_lock); 1053 blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); 1054 mutex_unlock(&q->sysfs_lock); 1055 1056 /* 1057 * Remove the sysfs attributes before unregistering the queue data 1058 * structures that can be modified through sysfs. 1059 */ 1060 if (queue_is_mq(q)) 1061 blk_mq_sysfs_unregister(disk); 1062 blk_crypto_sysfs_unregister(disk); 1063 1064 mutex_lock(&q->sysfs_lock); 1065 disk_unregister_independent_access_ranges(disk); 1066 mutex_unlock(&q->sysfs_lock); 1067 1068 /* Now that we've deleted all child objects, we can delete the queue. */ 1069 kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); 1070 kobject_del(&disk->queue_kobj); 1071 1072 if (queue_is_mq(q)) 1073 elevator_set_none(q); 1074 1075 blk_debugfs_remove(disk); 1076 } 1077