1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to sysfs handling 4 */ 5 #include <linux/kernel.h> 6 #include <linux/slab.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/backing-dev.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/debugfs.h> 13 14 #include "blk.h" 15 #include "blk-mq.h" 16 #include "blk-mq-debugfs.h" 17 #include "blk-mq-sched.h" 18 #include "blk-rq-qos.h" 19 #include "blk-wbt.h" 20 #include "blk-cgroup.h" 21 #include "blk-throttle.h" 22 23 struct queue_sysfs_entry { 24 struct attribute attr; 25 ssize_t (*show)(struct gendisk *disk, char *page); 26 ssize_t (*show_limit)(struct gendisk *disk, char *page); 27 28 ssize_t (*store)(struct gendisk *disk, const char *page, size_t count); 29 int (*store_limit)(struct gendisk *disk, const char *page, 30 size_t count, struct queue_limits *lim); 31 }; 32 33 static ssize_t 34 queue_var_show(unsigned long var, char *page) 35 { 36 return sysfs_emit(page, "%lu\n", var); 37 } 38 39 static ssize_t 40 queue_var_store(unsigned long *var, const char *page, size_t count) 41 { 42 int err; 43 unsigned long v; 44 45 err = kstrtoul(page, 10, &v); 46 if (err || v > UINT_MAX) 47 return -EINVAL; 48 49 *var = v; 50 51 return count; 52 } 53 54 static ssize_t queue_requests_show(struct gendisk *disk, char *page) 55 { 56 ssize_t ret; 57 58 mutex_lock(&disk->queue->elevator_lock); 59 ret = queue_var_show(disk->queue->nr_requests, page); 60 mutex_unlock(&disk->queue->elevator_lock); 61 return ret; 62 } 63 64 static ssize_t 65 queue_requests_store(struct gendisk *disk, const char *page, size_t count) 66 { 67 struct request_queue *q = disk->queue; 68 struct blk_mq_tag_set *set = q->tag_set; 69 struct elevator_tags *et = NULL; 70 unsigned int memflags; 71 unsigned long nr; 72 int ret; 73 74 ret = queue_var_store(&nr, page, count); 75 if (ret < 0) 76 return ret; 77 78 /* 79 * Serialize updating nr_requests with concurrent queue_requests_store() 80 * and switching elevator. 81 */ 82 down_write(&set->update_nr_hwq_lock); 83 84 if (nr == q->nr_requests) 85 goto unlock; 86 87 if (nr < BLKDEV_MIN_RQ) 88 nr = BLKDEV_MIN_RQ; 89 90 /* 91 * Switching elevator is protected by update_nr_hwq_lock: 92 * - read lock is held from elevator sysfs attribute; 93 * - write lock is held from updating nr_hw_queues; 94 * Hence it's safe to access q->elevator here with write lock held. 95 */ 96 if (nr <= set->reserved_tags || 97 (q->elevator && nr > MAX_SCHED_RQ) || 98 (!q->elevator && nr > set->queue_depth)) { 99 ret = -EINVAL; 100 goto unlock; 101 } 102 103 if (!blk_mq_is_shared_tags(set->flags) && q->elevator && 104 nr > q->elevator->et->nr_requests) { 105 /* 106 * Tags will grow, allocate memory before freezing queue to 107 * prevent deadlock. 108 */ 109 et = blk_mq_alloc_sched_tags(set, q->nr_hw_queues, nr); 110 if (!et) { 111 ret = -ENOMEM; 112 goto unlock; 113 } 114 } 115 116 memflags = blk_mq_freeze_queue(q); 117 mutex_lock(&q->elevator_lock); 118 et = blk_mq_update_nr_requests(q, et, nr); 119 mutex_unlock(&q->elevator_lock); 120 blk_mq_unfreeze_queue(q, memflags); 121 122 if (et) 123 blk_mq_free_sched_tags(et, set); 124 125 unlock: 126 up_write(&set->update_nr_hwq_lock); 127 return ret; 128 } 129 130 static ssize_t queue_async_depth_show(struct gendisk *disk, char *page) 131 { 132 guard(mutex)(&disk->queue->elevator_lock); 133 134 return queue_var_show(disk->queue->async_depth, page); 135 } 136 137 static ssize_t 138 queue_async_depth_store(struct gendisk *disk, const char *page, size_t count) 139 { 140 struct request_queue *q = disk->queue; 141 unsigned int memflags; 142 unsigned long nr; 143 int ret; 144 145 if (!queue_is_mq(q)) 146 return -EINVAL; 147 148 ret = queue_var_store(&nr, page, count); 149 if (ret < 0) 150 return ret; 151 152 if (nr == 0) 153 return -EINVAL; 154 155 memflags = blk_mq_freeze_queue(q); 156 scoped_guard(mutex, &q->elevator_lock) { 157 if (q->elevator) { 158 q->async_depth = min(q->nr_requests, nr); 159 if (q->elevator->type->ops.depth_updated) 160 q->elevator->type->ops.depth_updated(q); 161 } else { 162 ret = -EINVAL; 163 } 164 } 165 blk_mq_unfreeze_queue(q, memflags); 166 167 return ret; 168 } 169 170 static ssize_t queue_ra_show(struct gendisk *disk, char *page) 171 { 172 ssize_t ret; 173 174 mutex_lock(&disk->queue->limits_lock); 175 ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page); 176 mutex_unlock(&disk->queue->limits_lock); 177 178 return ret; 179 } 180 181 static ssize_t 182 queue_ra_store(struct gendisk *disk, const char *page, size_t count) 183 { 184 unsigned long ra_kb; 185 ssize_t ret; 186 struct request_queue *q = disk->queue; 187 188 ret = queue_var_store(&ra_kb, page, count); 189 if (ret < 0) 190 return ret; 191 /* 192 * The ->ra_pages change below is protected by ->limits_lock because it 193 * is usually calculated from the queue limits by 194 * queue_limits_commit_update(). 195 * 196 * bdi->ra_pages reads are not serialized against bdi->ra_pages writes. 197 * Use WRITE_ONCE() to write bdi->ra_pages once. 198 */ 199 mutex_lock(&q->limits_lock); 200 WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10)); 201 mutex_unlock(&q->limits_lock); 202 203 return ret; 204 } 205 206 #define QUEUE_SYSFS_LIMIT_SHOW(_field) \ 207 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 208 { \ 209 return queue_var_show(disk->queue->limits._field, page); \ 210 } 211 212 QUEUE_SYSFS_LIMIT_SHOW(max_segments) 213 QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments) 214 QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments) 215 QUEUE_SYSFS_LIMIT_SHOW(max_segment_size) 216 QUEUE_SYSFS_LIMIT_SHOW(max_write_streams) 217 QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity) 218 QUEUE_SYSFS_LIMIT_SHOW(logical_block_size) 219 QUEUE_SYSFS_LIMIT_SHOW(physical_block_size) 220 QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors) 221 QUEUE_SYSFS_LIMIT_SHOW(io_min) 222 QUEUE_SYSFS_LIMIT_SHOW(io_opt) 223 QUEUE_SYSFS_LIMIT_SHOW(discard_granularity) 224 QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity) 225 QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask) 226 QUEUE_SYSFS_LIMIT_SHOW(dma_alignment) 227 QUEUE_SYSFS_LIMIT_SHOW(max_open_zones) 228 QUEUE_SYSFS_LIMIT_SHOW(max_active_zones) 229 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min) 230 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max) 231 232 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \ 233 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 234 { \ 235 return sysfs_emit(page, "%llu\n", \ 236 (unsigned long long)disk->queue->limits._field << \ 237 SECTOR_SHIFT); \ 238 } 239 240 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors) 241 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors) 242 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors) 243 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors) 244 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors) 245 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors) 246 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors) 247 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors) 248 249 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \ 250 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 251 { \ 252 return queue_var_show(disk->queue->limits._field >> 1, page); \ 253 } 254 255 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors) 256 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors) 257 258 #define QUEUE_SYSFS_SHOW_CONST(_name, _val) \ 259 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 260 { \ 261 return sysfs_emit(page, "%d\n", _val); \ 262 } 263 264 /* deprecated fields */ 265 QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0) 266 QUEUE_SYSFS_SHOW_CONST(write_same_max, 0) 267 QUEUE_SYSFS_SHOW_CONST(poll_delay, -1) 268 269 static int queue_max_discard_sectors_store(struct gendisk *disk, 270 const char *page, size_t count, struct queue_limits *lim) 271 { 272 unsigned long max_discard_bytes; 273 ssize_t ret; 274 275 ret = queue_var_store(&max_discard_bytes, page, count); 276 if (ret < 0) 277 return ret; 278 279 if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1)) 280 return -EINVAL; 281 282 if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX) 283 return -EINVAL; 284 285 lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT; 286 return 0; 287 } 288 289 static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk, 290 const char *page, size_t count, struct queue_limits *lim) 291 { 292 unsigned long max_zeroes_bytes, max_hw_zeroes_bytes; 293 ssize_t ret; 294 295 ret = queue_var_store(&max_zeroes_bytes, page, count); 296 if (ret < 0) 297 return ret; 298 299 max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT; 300 if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes) 301 return -EINVAL; 302 303 lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT; 304 return 0; 305 } 306 307 static int 308 queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count, 309 struct queue_limits *lim) 310 { 311 unsigned long max_sectors_kb; 312 ssize_t ret; 313 314 ret = queue_var_store(&max_sectors_kb, page, count); 315 if (ret < 0) 316 return ret; 317 318 lim->max_user_sectors = max_sectors_kb << 1; 319 return 0; 320 } 321 322 static ssize_t queue_feature_store(struct gendisk *disk, const char *page, 323 size_t count, struct queue_limits *lim, blk_features_t feature) 324 { 325 unsigned long val; 326 ssize_t ret; 327 328 ret = queue_var_store(&val, page, count); 329 if (ret < 0) 330 return ret; 331 332 if (val) 333 lim->features |= feature; 334 else 335 lim->features &= ~feature; 336 return 0; 337 } 338 339 #define QUEUE_SYSFS_FEATURE(_name, _feature) \ 340 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 341 { \ 342 return sysfs_emit(page, "%u\n", \ 343 !!(disk->queue->limits.features & _feature)); \ 344 } \ 345 static int queue_##_name##_store(struct gendisk *disk, \ 346 const char *page, size_t count, struct queue_limits *lim) \ 347 { \ 348 return queue_feature_store(disk, page, count, lim, _feature); \ 349 } 350 351 QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) 352 QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) 353 QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) 354 QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES); 355 356 #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \ 357 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 358 { \ 359 return sysfs_emit(page, "%u\n", \ 360 !!(disk->queue->limits.features & _feature)); \ 361 } 362 363 QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA); 364 QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX); 365 366 static ssize_t queue_poll_show(struct gendisk *disk, char *page) 367 { 368 if (queue_is_mq(disk->queue)) 369 return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); 370 371 return sysfs_emit(page, "%u\n", 372 !!(disk->queue->limits.features & BLK_FEAT_POLL)); 373 } 374 375 static ssize_t queue_zoned_show(struct gendisk *disk, char *page) 376 { 377 if (blk_queue_is_zoned(disk->queue)) 378 return sysfs_emit(page, "host-managed\n"); 379 return sysfs_emit(page, "none\n"); 380 } 381 382 static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) 383 { 384 return queue_var_show(disk_nr_zones(disk), page); 385 } 386 387 static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) 388 { 389 return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page); 390 } 391 392 static int queue_iostats_passthrough_store(struct gendisk *disk, 393 const char *page, size_t count, struct queue_limits *lim) 394 { 395 unsigned long ios; 396 ssize_t ret; 397 398 ret = queue_var_store(&ios, page, count); 399 if (ret < 0) 400 return ret; 401 402 if (ios) 403 lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; 404 else 405 lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; 406 return 0; 407 } 408 409 static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) 410 { 411 return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | 412 blk_queue_noxmerges(disk->queue), page); 413 } 414 415 static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page, 416 size_t count) 417 { 418 unsigned long nm; 419 struct request_queue *q = disk->queue; 420 ssize_t ret = queue_var_store(&nm, page, count); 421 422 if (ret < 0) 423 return ret; 424 425 blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 426 blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); 427 if (nm == 2) 428 blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); 429 else if (nm) 430 blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); 431 432 return ret; 433 } 434 435 static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page) 436 { 437 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags); 438 bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags); 439 440 return queue_var_show(set << force, page); 441 } 442 443 static ssize_t 444 queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count) 445 { 446 ssize_t ret = -EINVAL; 447 #ifdef CONFIG_SMP 448 struct request_queue *q = disk->queue; 449 unsigned long val; 450 451 ret = queue_var_store(&val, page, count); 452 if (ret < 0) 453 return ret; 454 455 /* 456 * Here we update two queue flags each using atomic bitops, although 457 * updating two flags isn't atomic it should be harmless as those flags 458 * are accessed individually using atomic test_bit operation. So we 459 * don't grab any lock while updating these flags. 460 */ 461 if (val == 2) { 462 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 463 blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); 464 } else if (val == 1) { 465 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 466 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 467 } else if (val == 0) { 468 blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 469 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 470 } 471 #endif 472 return ret; 473 } 474 475 static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page, 476 size_t count) 477 { 478 return count; 479 } 480 481 static ssize_t queue_poll_store(struct gendisk *disk, const char *page, 482 size_t count) 483 { 484 ssize_t ret = count; 485 struct request_queue *q = disk->queue; 486 487 if (!(q->limits.features & BLK_FEAT_POLL)) { 488 ret = -EINVAL; 489 goto out; 490 } 491 492 pr_info_ratelimited("writes to the poll attribute are ignored.\n"); 493 pr_info_ratelimited("please use driver specific parameters instead.\n"); 494 out: 495 return ret; 496 } 497 498 static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page) 499 { 500 return sysfs_emit(page, "%u\n", 501 jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout))); 502 } 503 504 static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, 505 size_t count) 506 { 507 unsigned int val; 508 int err; 509 struct request_queue *q = disk->queue; 510 511 err = kstrtou32(page, 10, &val); 512 if (err || val == 0) 513 return -EINVAL; 514 515 blk_queue_rq_timeout(q, msecs_to_jiffies(val)); 516 517 return count; 518 } 519 520 static ssize_t queue_wc_show(struct gendisk *disk, char *page) 521 { 522 if (blk_queue_write_cache(disk->queue)) 523 return sysfs_emit(page, "write back\n"); 524 return sysfs_emit(page, "write through\n"); 525 } 526 527 static int queue_wc_store(struct gendisk *disk, const char *page, 528 size_t count, struct queue_limits *lim) 529 { 530 bool disable; 531 532 if (!strncmp(page, "write back", 10)) { 533 disable = false; 534 } else if (!strncmp(page, "write through", 13) || 535 !strncmp(page, "none", 4)) { 536 disable = true; 537 } else { 538 return -EINVAL; 539 } 540 541 if (disable) 542 lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED; 543 else 544 lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED; 545 return 0; 546 } 547 548 #define QUEUE_RO_ENTRY(_prefix, _name) \ 549 static struct queue_sysfs_entry _prefix##_entry = { \ 550 .attr = { .name = _name, .mode = 0444 }, \ 551 .show = _prefix##_show, \ 552 }; 553 554 #define QUEUE_RW_ENTRY(_prefix, _name) \ 555 static struct queue_sysfs_entry _prefix##_entry = { \ 556 .attr = { .name = _name, .mode = 0644 }, \ 557 .show = _prefix##_show, \ 558 .store = _prefix##_store, \ 559 }; 560 561 #define QUEUE_LIM_RO_ENTRY(_prefix, _name) \ 562 static struct queue_sysfs_entry _prefix##_entry = { \ 563 .attr = { .name = _name, .mode = 0444 }, \ 564 .show_limit = _prefix##_show, \ 565 } 566 567 #define QUEUE_LIM_RW_ENTRY(_prefix, _name) \ 568 static struct queue_sysfs_entry _prefix##_entry = { \ 569 .attr = { .name = _name, .mode = 0644 }, \ 570 .show_limit = _prefix##_show, \ 571 .store_limit = _prefix##_store, \ 572 } 573 574 QUEUE_RW_ENTRY(queue_requests, "nr_requests"); 575 QUEUE_RW_ENTRY(queue_async_depth, "async_depth"); 576 QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb"); 577 QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb"); 578 QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb"); 579 QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments"); 580 QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments"); 581 QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size"); 582 QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams"); 583 QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity"); 584 QUEUE_RW_ENTRY(elv_iosched, "scheduler"); 585 586 QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size"); 587 QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size"); 588 QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors"); 589 QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size"); 590 QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size"); 591 592 QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments"); 593 QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity"); 594 QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes"); 595 QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes"); 596 QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data"); 597 598 QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes"); 599 QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors, 600 "atomic_write_boundary_bytes"); 601 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes"); 602 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes"); 603 604 QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes"); 605 QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes"); 606 QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors, 607 "write_zeroes_unmap_max_hw_bytes"); 608 QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors, 609 "write_zeroes_unmap_max_bytes"); 610 QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes"); 611 QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity"); 612 613 QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned"); 614 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); 615 QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones"); 616 QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones"); 617 618 QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); 619 QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); 620 QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); 621 QUEUE_RW_ENTRY(queue_poll, "io_poll"); 622 QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); 623 QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache"); 624 QUEUE_LIM_RO_ENTRY(queue_fua, "fua"); 625 QUEUE_LIM_RO_ENTRY(queue_dax, "dax"); 626 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); 627 QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); 628 QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment"); 629 630 /* legacy alias for logical_block_size: */ 631 static struct queue_sysfs_entry queue_hw_sector_size_entry = { 632 .attr = {.name = "hw_sector_size", .mode = 0444 }, 633 .show_limit = queue_logical_block_size_show, 634 }; 635 636 QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational"); 637 QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats"); 638 QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random"); 639 QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes"); 640 641 #ifdef CONFIG_BLK_WBT 642 static ssize_t queue_var_store64(s64 *var, const char *page) 643 { 644 int err; 645 s64 v; 646 647 err = kstrtos64(page, 10, &v); 648 if (err < 0) 649 return err; 650 651 *var = v; 652 return 0; 653 } 654 655 static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) 656 { 657 ssize_t ret; 658 struct request_queue *q = disk->queue; 659 660 mutex_lock(&disk->rqos_state_mutex); 661 if (!wbt_rq_qos(q)) { 662 ret = -EINVAL; 663 goto out; 664 } 665 666 if (wbt_disabled(q)) { 667 ret = sysfs_emit(page, "0\n"); 668 goto out; 669 } 670 671 ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); 672 out: 673 mutex_unlock(&disk->rqos_state_mutex); 674 return ret; 675 } 676 677 static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, 678 size_t count) 679 { 680 ssize_t ret; 681 s64 val; 682 683 ret = queue_var_store64(&val, page); 684 if (ret < 0) 685 return ret; 686 if (val < -1) 687 return -EINVAL; 688 689 ret = wbt_set_lat(disk, val); 690 return ret ? ret : count; 691 } 692 693 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); 694 #endif 695 696 /* Common attributes for bio-based and request-based queues. */ 697 static struct attribute *queue_attrs[] = { 698 /* 699 * Attributes which are protected with q->limits_lock. 700 */ 701 &queue_max_hw_sectors_entry.attr, 702 &queue_max_sectors_entry.attr, 703 &queue_max_segments_entry.attr, 704 &queue_max_discard_segments_entry.attr, 705 &queue_max_integrity_segments_entry.attr, 706 &queue_max_segment_size_entry.attr, 707 &queue_max_write_streams_entry.attr, 708 &queue_write_stream_granularity_entry.attr, 709 &queue_hw_sector_size_entry.attr, 710 &queue_logical_block_size_entry.attr, 711 &queue_physical_block_size_entry.attr, 712 &queue_chunk_sectors_entry.attr, 713 &queue_io_min_entry.attr, 714 &queue_io_opt_entry.attr, 715 &queue_discard_granularity_entry.attr, 716 &queue_max_discard_sectors_entry.attr, 717 &queue_max_hw_discard_sectors_entry.attr, 718 &queue_atomic_write_max_sectors_entry.attr, 719 &queue_atomic_write_boundary_sectors_entry.attr, 720 &queue_atomic_write_unit_min_entry.attr, 721 &queue_atomic_write_unit_max_entry.attr, 722 &queue_max_write_zeroes_sectors_entry.attr, 723 &queue_max_hw_wzeroes_unmap_sectors_entry.attr, 724 &queue_max_wzeroes_unmap_sectors_entry.attr, 725 &queue_max_zone_append_sectors_entry.attr, 726 &queue_zone_write_granularity_entry.attr, 727 &queue_rotational_entry.attr, 728 &queue_zoned_entry.attr, 729 &queue_max_open_zones_entry.attr, 730 &queue_max_active_zones_entry.attr, 731 &queue_iostats_passthrough_entry.attr, 732 &queue_iostats_entry.attr, 733 &queue_stable_writes_entry.attr, 734 &queue_add_random_entry.attr, 735 &queue_wc_entry.attr, 736 &queue_fua_entry.attr, 737 &queue_dax_entry.attr, 738 &queue_virt_boundary_mask_entry.attr, 739 &queue_dma_alignment_entry.attr, 740 &queue_ra_entry.attr, 741 742 /* 743 * Attributes which don't require locking. 744 */ 745 &queue_discard_zeroes_data_entry.attr, 746 &queue_write_same_max_entry.attr, 747 &queue_nr_zones_entry.attr, 748 &queue_nomerges_entry.attr, 749 &queue_poll_entry.attr, 750 &queue_poll_delay_entry.attr, 751 752 NULL, 753 }; 754 755 /* Request-based queue attributes that are not relevant for bio-based queues. */ 756 static struct attribute *blk_mq_queue_attrs[] = { 757 /* 758 * Attributes which require some form of locking other than 759 * q->sysfs_lock. 760 */ 761 &elv_iosched_entry.attr, 762 &queue_requests_entry.attr, 763 &queue_async_depth_entry.attr, 764 #ifdef CONFIG_BLK_WBT 765 &queue_wb_lat_entry.attr, 766 #endif 767 /* 768 * Attributes which don't require locking. 769 */ 770 &queue_rq_affinity_entry.attr, 771 &queue_io_timeout_entry.attr, 772 773 NULL, 774 }; 775 776 static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, 777 int n) 778 { 779 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 780 struct request_queue *q = disk->queue; 781 782 if ((attr == &queue_max_open_zones_entry.attr || 783 attr == &queue_max_active_zones_entry.attr) && 784 !blk_queue_is_zoned(q)) 785 return 0; 786 787 return attr->mode; 788 } 789 790 static umode_t blk_mq_queue_attr_visible(struct kobject *kobj, 791 struct attribute *attr, int n) 792 { 793 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 794 struct request_queue *q = disk->queue; 795 796 if (!queue_is_mq(q)) 797 return 0; 798 799 if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout) 800 return 0; 801 802 return attr->mode; 803 } 804 805 static struct attribute_group queue_attr_group = { 806 .attrs = queue_attrs, 807 .is_visible = queue_attr_visible, 808 }; 809 810 static struct attribute_group blk_mq_queue_attr_group = { 811 .attrs = blk_mq_queue_attrs, 812 .is_visible = blk_mq_queue_attr_visible, 813 }; 814 815 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) 816 817 static ssize_t 818 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 819 { 820 struct queue_sysfs_entry *entry = to_queue(attr); 821 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 822 823 if (!entry->show && !entry->show_limit) 824 return -EIO; 825 826 if (entry->show_limit) { 827 ssize_t res; 828 829 mutex_lock(&disk->queue->limits_lock); 830 res = entry->show_limit(disk, page); 831 mutex_unlock(&disk->queue->limits_lock); 832 return res; 833 } 834 835 return entry->show(disk, page); 836 } 837 838 static ssize_t 839 queue_attr_store(struct kobject *kobj, struct attribute *attr, 840 const char *page, size_t length) 841 { 842 struct queue_sysfs_entry *entry = to_queue(attr); 843 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 844 struct request_queue *q = disk->queue; 845 846 if (!entry->store_limit && !entry->store) 847 return -EIO; 848 849 if (entry->store_limit) { 850 ssize_t res; 851 852 struct queue_limits lim = queue_limits_start_update(q); 853 854 res = entry->store_limit(disk, page, length, &lim); 855 if (res < 0) { 856 queue_limits_cancel_update(q); 857 return res; 858 } 859 860 res = queue_limits_commit_update_frozen(q, &lim); 861 if (res) 862 return res; 863 return length; 864 } 865 866 return entry->store(disk, page, length); 867 } 868 869 static const struct sysfs_ops queue_sysfs_ops = { 870 .show = queue_attr_show, 871 .store = queue_attr_store, 872 }; 873 874 static const struct attribute_group *blk_queue_attr_groups[] = { 875 &queue_attr_group, 876 &blk_mq_queue_attr_group, 877 NULL 878 }; 879 880 static void blk_queue_release(struct kobject *kobj) 881 { 882 /* nothing to do here, all data is associated with the parent gendisk */ 883 } 884 885 const struct kobj_type blk_queue_ktype = { 886 .default_groups = blk_queue_attr_groups, 887 .sysfs_ops = &queue_sysfs_ops, 888 .release = blk_queue_release, 889 }; 890 891 static void blk_debugfs_remove(struct gendisk *disk) 892 { 893 struct request_queue *q = disk->queue; 894 895 mutex_lock(&q->debugfs_mutex); 896 blk_trace_shutdown(q); 897 debugfs_remove_recursive(q->debugfs_dir); 898 q->debugfs_dir = NULL; 899 q->sched_debugfs_dir = NULL; 900 q->rqos_debugfs_dir = NULL; 901 mutex_unlock(&q->debugfs_mutex); 902 } 903 904 /** 905 * blk_register_queue - register a block layer queue with sysfs 906 * @disk: Disk of which the request queue should be registered with sysfs. 907 */ 908 int blk_register_queue(struct gendisk *disk) 909 { 910 struct request_queue *q = disk->queue; 911 int ret; 912 913 ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); 914 if (ret < 0) 915 return ret; 916 917 if (queue_is_mq(q)) { 918 ret = blk_mq_sysfs_register(disk); 919 if (ret) 920 goto out_del_queue_kobj; 921 } 922 mutex_lock(&q->sysfs_lock); 923 924 mutex_lock(&q->debugfs_mutex); 925 q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); 926 if (queue_is_mq(q)) 927 blk_mq_debugfs_register(q); 928 mutex_unlock(&q->debugfs_mutex); 929 930 ret = disk_register_independent_access_ranges(disk); 931 if (ret) 932 goto out_debugfs_remove; 933 934 ret = blk_crypto_sysfs_register(disk); 935 if (ret) 936 goto out_unregister_ia_ranges; 937 938 if (queue_is_mq(q)) 939 elevator_set_default(q); 940 941 blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); 942 wbt_init_enable_default(disk); 943 944 /* Now everything is ready and send out KOBJ_ADD uevent */ 945 kobject_uevent(&disk->queue_kobj, KOBJ_ADD); 946 if (q->elevator) 947 kobject_uevent(&q->elevator->kobj, KOBJ_ADD); 948 mutex_unlock(&q->sysfs_lock); 949 950 /* 951 * SCSI probing may synchronously create and destroy a lot of 952 * request_queues for non-existent devices. Shutting down a fully 953 * functional queue takes measureable wallclock time as RCU grace 954 * periods are involved. To avoid excessive latency in these 955 * cases, a request_queue starts out in a degraded mode which is 956 * faster to shut down and is made fully functional here as 957 * request_queues for non-existent devices never get registered. 958 */ 959 blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); 960 percpu_ref_switch_to_percpu(&q->q_usage_counter); 961 962 return ret; 963 964 out_unregister_ia_ranges: 965 disk_unregister_independent_access_ranges(disk); 966 out_debugfs_remove: 967 blk_debugfs_remove(disk); 968 mutex_unlock(&q->sysfs_lock); 969 if (queue_is_mq(q)) 970 blk_mq_sysfs_unregister(disk); 971 out_del_queue_kobj: 972 kobject_del(&disk->queue_kobj); 973 return ret; 974 } 975 976 /** 977 * blk_unregister_queue - counterpart of blk_register_queue() 978 * @disk: Disk of which the request queue should be unregistered from sysfs. 979 * 980 * Note: the caller is responsible for guaranteeing that this function is called 981 * after blk_register_queue() has finished. 982 */ 983 void blk_unregister_queue(struct gendisk *disk) 984 { 985 struct request_queue *q = disk->queue; 986 987 if (WARN_ON(!q)) 988 return; 989 990 /* Return early if disk->queue was never registered. */ 991 if (!blk_queue_registered(q)) 992 return; 993 994 /* 995 * Since sysfs_remove_dir() prevents adding new directory entries 996 * before removal of existing entries starts, protect against 997 * concurrent elv_iosched_store() calls. 998 */ 999 mutex_lock(&q->sysfs_lock); 1000 blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); 1001 mutex_unlock(&q->sysfs_lock); 1002 1003 /* 1004 * Remove the sysfs attributes before unregistering the queue data 1005 * structures that can be modified through sysfs. 1006 */ 1007 if (queue_is_mq(q)) 1008 blk_mq_sysfs_unregister(disk); 1009 blk_crypto_sysfs_unregister(disk); 1010 1011 mutex_lock(&q->sysfs_lock); 1012 disk_unregister_independent_access_ranges(disk); 1013 mutex_unlock(&q->sysfs_lock); 1014 1015 /* Now that we've deleted all child objects, we can delete the queue. */ 1016 kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); 1017 kobject_del(&disk->queue_kobj); 1018 1019 if (queue_is_mq(q)) 1020 elevator_set_none(q); 1021 1022 blk_debugfs_remove(disk); 1023 } 1024