1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to sysfs handling 4 */ 5 #include <linux/kernel.h> 6 #include <linux/slab.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/backing-dev.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/debugfs.h> 13 14 #include "blk.h" 15 #include "blk-mq.h" 16 #include "blk-mq-debugfs.h" 17 #include "blk-mq-sched.h" 18 #include "blk-rq-qos.h" 19 #include "blk-wbt.h" 20 #include "blk-cgroup.h" 21 #include "blk-throttle.h" 22 23 struct queue_sysfs_entry { 24 struct attribute attr; 25 ssize_t (*show)(struct gendisk *disk, char *page); 26 ssize_t (*show_limit)(struct gendisk *disk, char *page); 27 28 ssize_t (*store)(struct gendisk *disk, const char *page, size_t count); 29 int (*store_limit)(struct gendisk *disk, const char *page, 30 size_t count, struct queue_limits *lim); 31 }; 32 33 static ssize_t 34 queue_var_show(unsigned long var, char *page) 35 { 36 return sysfs_emit(page, "%lu\n", var); 37 } 38 39 static ssize_t 40 queue_var_store(unsigned long *var, const char *page, size_t count) 41 { 42 int err; 43 unsigned long v; 44 45 err = kstrtoul(page, 10, &v); 46 if (err || v > UINT_MAX) 47 return -EINVAL; 48 49 *var = v; 50 51 return count; 52 } 53 54 static ssize_t queue_requests_show(struct gendisk *disk, char *page) 55 { 56 ssize_t ret; 57 58 mutex_lock(&disk->queue->elevator_lock); 59 ret = queue_var_show(disk->queue->nr_requests, page); 60 mutex_unlock(&disk->queue->elevator_lock); 61 return ret; 62 } 63 64 static ssize_t 65 queue_requests_store(struct gendisk *disk, const char *page, size_t count) 66 { 67 struct request_queue *q = disk->queue; 68 struct blk_mq_tag_set *set = q->tag_set; 69 struct elevator_tags *et = NULL; 70 unsigned int memflags; 71 unsigned long nr; 72 int ret; 73 74 ret = queue_var_store(&nr, page, count); 75 if (ret < 0) 76 return ret; 77 78 /* 79 * Serialize updating nr_requests with concurrent queue_requests_store() 80 * and switching elevator. 81 */ 82 down_write(&set->update_nr_hwq_lock); 83 84 if (nr == q->nr_requests) 85 goto unlock; 86 87 if (nr < BLKDEV_MIN_RQ) 88 nr = BLKDEV_MIN_RQ; 89 90 /* 91 * Switching elevator is protected by update_nr_hwq_lock: 92 * - read lock is held from elevator sysfs attribute; 93 * - write lock is held from updating nr_hw_queues; 94 * Hence it's safe to access q->elevator here with write lock held. 95 */ 96 if (nr <= set->reserved_tags || 97 (q->elevator && nr > MAX_SCHED_RQ) || 98 (!q->elevator && nr > set->queue_depth)) { 99 ret = -EINVAL; 100 goto unlock; 101 } 102 103 if (!blk_mq_is_shared_tags(set->flags) && q->elevator && 104 nr > q->elevator->et->nr_requests) { 105 /* 106 * Tags will grow, allocate memory before freezing queue to 107 * prevent deadlock. 108 */ 109 et = blk_mq_alloc_sched_tags(set, q->nr_hw_queues, nr); 110 if (!et) { 111 ret = -ENOMEM; 112 goto unlock; 113 } 114 } 115 116 memflags = blk_mq_freeze_queue(q); 117 mutex_lock(&q->elevator_lock); 118 et = blk_mq_update_nr_requests(q, et, nr); 119 mutex_unlock(&q->elevator_lock); 120 blk_mq_unfreeze_queue(q, memflags); 121 122 if (et) 123 blk_mq_free_sched_tags(et, set); 124 125 unlock: 126 up_write(&set->update_nr_hwq_lock); 127 return ret; 128 } 129 130 static ssize_t queue_ra_show(struct gendisk *disk, char *page) 131 { 132 ssize_t ret; 133 134 mutex_lock(&disk->queue->limits_lock); 135 ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page); 136 mutex_unlock(&disk->queue->limits_lock); 137 138 return ret; 139 } 140 141 static ssize_t 142 queue_ra_store(struct gendisk *disk, const char *page, size_t count) 143 { 144 unsigned long ra_kb; 145 ssize_t ret; 146 struct request_queue *q = disk->queue; 147 148 ret = queue_var_store(&ra_kb, page, count); 149 if (ret < 0) 150 return ret; 151 /* 152 * The ->ra_pages change below is protected by ->limits_lock because it 153 * is usually calculated from the queue limits by 154 * queue_limits_commit_update(). 155 * 156 * bdi->ra_pages reads are not serialized against bdi->ra_pages writes. 157 * Use WRITE_ONCE() to write bdi->ra_pages once. 158 */ 159 mutex_lock(&q->limits_lock); 160 WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10)); 161 mutex_unlock(&q->limits_lock); 162 163 return ret; 164 } 165 166 #define QUEUE_SYSFS_LIMIT_SHOW(_field) \ 167 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 168 { \ 169 return queue_var_show(disk->queue->limits._field, page); \ 170 } 171 172 QUEUE_SYSFS_LIMIT_SHOW(max_segments) 173 QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments) 174 QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments) 175 QUEUE_SYSFS_LIMIT_SHOW(max_segment_size) 176 QUEUE_SYSFS_LIMIT_SHOW(max_write_streams) 177 QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity) 178 QUEUE_SYSFS_LIMIT_SHOW(logical_block_size) 179 QUEUE_SYSFS_LIMIT_SHOW(physical_block_size) 180 QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors) 181 QUEUE_SYSFS_LIMIT_SHOW(io_min) 182 QUEUE_SYSFS_LIMIT_SHOW(io_opt) 183 QUEUE_SYSFS_LIMIT_SHOW(discard_granularity) 184 QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity) 185 QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask) 186 QUEUE_SYSFS_LIMIT_SHOW(dma_alignment) 187 QUEUE_SYSFS_LIMIT_SHOW(max_open_zones) 188 QUEUE_SYSFS_LIMIT_SHOW(max_active_zones) 189 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min) 190 QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max) 191 192 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \ 193 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 194 { \ 195 return sysfs_emit(page, "%llu\n", \ 196 (unsigned long long)disk->queue->limits._field << \ 197 SECTOR_SHIFT); \ 198 } 199 200 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors) 201 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors) 202 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors) 203 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors) 204 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors) 205 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors) 206 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors) 207 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors) 208 209 #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \ 210 static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ 211 { \ 212 return queue_var_show(disk->queue->limits._field >> 1, page); \ 213 } 214 215 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors) 216 QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors) 217 218 #define QUEUE_SYSFS_SHOW_CONST(_name, _val) \ 219 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 220 { \ 221 return sysfs_emit(page, "%d\n", _val); \ 222 } 223 224 /* deprecated fields */ 225 QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0) 226 QUEUE_SYSFS_SHOW_CONST(write_same_max, 0) 227 QUEUE_SYSFS_SHOW_CONST(poll_delay, -1) 228 229 static int queue_max_discard_sectors_store(struct gendisk *disk, 230 const char *page, size_t count, struct queue_limits *lim) 231 { 232 unsigned long max_discard_bytes; 233 ssize_t ret; 234 235 ret = queue_var_store(&max_discard_bytes, page, count); 236 if (ret < 0) 237 return ret; 238 239 if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1)) 240 return -EINVAL; 241 242 if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX) 243 return -EINVAL; 244 245 lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT; 246 return 0; 247 } 248 249 static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk, 250 const char *page, size_t count, struct queue_limits *lim) 251 { 252 unsigned long max_zeroes_bytes, max_hw_zeroes_bytes; 253 ssize_t ret; 254 255 ret = queue_var_store(&max_zeroes_bytes, page, count); 256 if (ret < 0) 257 return ret; 258 259 max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT; 260 if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes) 261 return -EINVAL; 262 263 lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT; 264 return 0; 265 } 266 267 static int 268 queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count, 269 struct queue_limits *lim) 270 { 271 unsigned long max_sectors_kb; 272 ssize_t ret; 273 274 ret = queue_var_store(&max_sectors_kb, page, count); 275 if (ret < 0) 276 return ret; 277 278 lim->max_user_sectors = max_sectors_kb << 1; 279 return 0; 280 } 281 282 static ssize_t queue_feature_store(struct gendisk *disk, const char *page, 283 size_t count, struct queue_limits *lim, blk_features_t feature) 284 { 285 unsigned long val; 286 ssize_t ret; 287 288 ret = queue_var_store(&val, page, count); 289 if (ret < 0) 290 return ret; 291 292 if (val) 293 lim->features |= feature; 294 else 295 lim->features &= ~feature; 296 return 0; 297 } 298 299 #define QUEUE_SYSFS_FEATURE(_name, _feature) \ 300 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 301 { \ 302 return sysfs_emit(page, "%u\n", \ 303 !!(disk->queue->limits.features & _feature)); \ 304 } \ 305 static int queue_##_name##_store(struct gendisk *disk, \ 306 const char *page, size_t count, struct queue_limits *lim) \ 307 { \ 308 return queue_feature_store(disk, page, count, lim, _feature); \ 309 } 310 311 QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) 312 QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) 313 QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) 314 QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES); 315 316 #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \ 317 static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ 318 { \ 319 return sysfs_emit(page, "%u\n", \ 320 !!(disk->queue->limits.features & _feature)); \ 321 } 322 323 QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA); 324 QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX); 325 326 static ssize_t queue_poll_show(struct gendisk *disk, char *page) 327 { 328 if (queue_is_mq(disk->queue)) 329 return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); 330 331 return sysfs_emit(page, "%u\n", 332 !!(disk->queue->limits.features & BLK_FEAT_POLL)); 333 } 334 335 static ssize_t queue_zoned_show(struct gendisk *disk, char *page) 336 { 337 if (blk_queue_is_zoned(disk->queue)) 338 return sysfs_emit(page, "host-managed\n"); 339 return sysfs_emit(page, "none\n"); 340 } 341 342 static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) 343 { 344 return queue_var_show(disk_nr_zones(disk), page); 345 } 346 347 static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) 348 { 349 return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page); 350 } 351 352 static int queue_iostats_passthrough_store(struct gendisk *disk, 353 const char *page, size_t count, struct queue_limits *lim) 354 { 355 unsigned long ios; 356 ssize_t ret; 357 358 ret = queue_var_store(&ios, page, count); 359 if (ret < 0) 360 return ret; 361 362 if (ios) 363 lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; 364 else 365 lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; 366 return 0; 367 } 368 369 static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) 370 { 371 return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | 372 blk_queue_noxmerges(disk->queue), page); 373 } 374 375 static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page, 376 size_t count) 377 { 378 unsigned long nm; 379 struct request_queue *q = disk->queue; 380 ssize_t ret = queue_var_store(&nm, page, count); 381 382 if (ret < 0) 383 return ret; 384 385 blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 386 blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); 387 if (nm == 2) 388 blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); 389 else if (nm) 390 blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); 391 392 return ret; 393 } 394 395 static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page) 396 { 397 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags); 398 bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags); 399 400 return queue_var_show(set << force, page); 401 } 402 403 static ssize_t 404 queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count) 405 { 406 ssize_t ret = -EINVAL; 407 #ifdef CONFIG_SMP 408 struct request_queue *q = disk->queue; 409 unsigned long val; 410 411 ret = queue_var_store(&val, page, count); 412 if (ret < 0) 413 return ret; 414 415 /* 416 * Here we update two queue flags each using atomic bitops, although 417 * updating two flags isn't atomic it should be harmless as those flags 418 * are accessed individually using atomic test_bit operation. So we 419 * don't grab any lock while updating these flags. 420 */ 421 if (val == 2) { 422 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 423 blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); 424 } else if (val == 1) { 425 blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 426 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 427 } else if (val == 0) { 428 blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 429 blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 430 } 431 #endif 432 return ret; 433 } 434 435 static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page, 436 size_t count) 437 { 438 return count; 439 } 440 441 static ssize_t queue_poll_store(struct gendisk *disk, const char *page, 442 size_t count) 443 { 444 ssize_t ret = count; 445 struct request_queue *q = disk->queue; 446 447 if (!(q->limits.features & BLK_FEAT_POLL)) { 448 ret = -EINVAL; 449 goto out; 450 } 451 452 pr_info_ratelimited("writes to the poll attribute are ignored.\n"); 453 pr_info_ratelimited("please use driver specific parameters instead.\n"); 454 out: 455 return ret; 456 } 457 458 static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page) 459 { 460 return sysfs_emit(page, "%u\n", 461 jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout))); 462 } 463 464 static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, 465 size_t count) 466 { 467 unsigned int val; 468 int err; 469 struct request_queue *q = disk->queue; 470 471 err = kstrtou32(page, 10, &val); 472 if (err || val == 0) 473 return -EINVAL; 474 475 blk_queue_rq_timeout(q, msecs_to_jiffies(val)); 476 477 return count; 478 } 479 480 static ssize_t queue_wc_show(struct gendisk *disk, char *page) 481 { 482 if (blk_queue_write_cache(disk->queue)) 483 return sysfs_emit(page, "write back\n"); 484 return sysfs_emit(page, "write through\n"); 485 } 486 487 static int queue_wc_store(struct gendisk *disk, const char *page, 488 size_t count, struct queue_limits *lim) 489 { 490 bool disable; 491 492 if (!strncmp(page, "write back", 10)) { 493 disable = false; 494 } else if (!strncmp(page, "write through", 13) || 495 !strncmp(page, "none", 4)) { 496 disable = true; 497 } else { 498 return -EINVAL; 499 } 500 501 if (disable) 502 lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED; 503 else 504 lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED; 505 return 0; 506 } 507 508 #define QUEUE_RO_ENTRY(_prefix, _name) \ 509 static struct queue_sysfs_entry _prefix##_entry = { \ 510 .attr = { .name = _name, .mode = 0444 }, \ 511 .show = _prefix##_show, \ 512 }; 513 514 #define QUEUE_RW_ENTRY(_prefix, _name) \ 515 static struct queue_sysfs_entry _prefix##_entry = { \ 516 .attr = { .name = _name, .mode = 0644 }, \ 517 .show = _prefix##_show, \ 518 .store = _prefix##_store, \ 519 }; 520 521 #define QUEUE_LIM_RO_ENTRY(_prefix, _name) \ 522 static struct queue_sysfs_entry _prefix##_entry = { \ 523 .attr = { .name = _name, .mode = 0444 }, \ 524 .show_limit = _prefix##_show, \ 525 } 526 527 #define QUEUE_LIM_RW_ENTRY(_prefix, _name) \ 528 static struct queue_sysfs_entry _prefix##_entry = { \ 529 .attr = { .name = _name, .mode = 0644 }, \ 530 .show_limit = _prefix##_show, \ 531 .store_limit = _prefix##_store, \ 532 } 533 534 QUEUE_RW_ENTRY(queue_requests, "nr_requests"); 535 QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb"); 536 QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb"); 537 QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb"); 538 QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments"); 539 QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments"); 540 QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size"); 541 QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams"); 542 QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity"); 543 QUEUE_RW_ENTRY(elv_iosched, "scheduler"); 544 545 QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size"); 546 QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size"); 547 QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors"); 548 QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size"); 549 QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size"); 550 551 QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments"); 552 QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity"); 553 QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes"); 554 QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes"); 555 QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data"); 556 557 QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes"); 558 QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors, 559 "atomic_write_boundary_bytes"); 560 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes"); 561 QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes"); 562 563 QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes"); 564 QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes"); 565 QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors, 566 "write_zeroes_unmap_max_hw_bytes"); 567 QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors, 568 "write_zeroes_unmap_max_bytes"); 569 QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes"); 570 QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity"); 571 572 QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned"); 573 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); 574 QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones"); 575 QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones"); 576 577 QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); 578 QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); 579 QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); 580 QUEUE_RW_ENTRY(queue_poll, "io_poll"); 581 QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); 582 QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache"); 583 QUEUE_LIM_RO_ENTRY(queue_fua, "fua"); 584 QUEUE_LIM_RO_ENTRY(queue_dax, "dax"); 585 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); 586 QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); 587 QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment"); 588 589 /* legacy alias for logical_block_size: */ 590 static struct queue_sysfs_entry queue_hw_sector_size_entry = { 591 .attr = {.name = "hw_sector_size", .mode = 0444 }, 592 .show_limit = queue_logical_block_size_show, 593 }; 594 595 QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational"); 596 QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats"); 597 QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random"); 598 QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes"); 599 600 #ifdef CONFIG_BLK_WBT 601 static ssize_t queue_var_store64(s64 *var, const char *page) 602 { 603 int err; 604 s64 v; 605 606 err = kstrtos64(page, 10, &v); 607 if (err < 0) 608 return err; 609 610 *var = v; 611 return 0; 612 } 613 614 static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) 615 { 616 ssize_t ret; 617 struct request_queue *q = disk->queue; 618 619 mutex_lock(&disk->rqos_state_mutex); 620 if (!wbt_rq_qos(q)) { 621 ret = -EINVAL; 622 goto out; 623 } 624 625 if (wbt_disabled(q)) { 626 ret = sysfs_emit(page, "0\n"); 627 goto out; 628 } 629 630 ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); 631 out: 632 mutex_unlock(&disk->rqos_state_mutex); 633 return ret; 634 } 635 636 static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, 637 size_t count) 638 { 639 struct request_queue *q = disk->queue; 640 struct rq_qos *rqos; 641 ssize_t ret; 642 s64 val; 643 unsigned int memflags; 644 645 ret = queue_var_store64(&val, page); 646 if (ret < 0) 647 return ret; 648 if (val < -1) 649 return -EINVAL; 650 651 /* 652 * Ensure that the queue is idled, in case the latency update 653 * ends up either enabling or disabling wbt completely. We can't 654 * have IO inflight if that happens. 655 */ 656 memflags = blk_mq_freeze_queue(q); 657 658 rqos = wbt_rq_qos(q); 659 if (!rqos) { 660 ret = wbt_init(disk); 661 if (ret) 662 goto out; 663 } 664 665 ret = count; 666 if (val == -1) 667 val = wbt_default_latency_nsec(q); 668 else if (val >= 0) 669 val *= 1000ULL; 670 671 if (wbt_get_min_lat(q) == val) 672 goto out; 673 674 blk_mq_quiesce_queue(q); 675 676 mutex_lock(&disk->rqos_state_mutex); 677 wbt_set_min_lat(q, val); 678 mutex_unlock(&disk->rqos_state_mutex); 679 680 blk_mq_unquiesce_queue(q); 681 out: 682 blk_mq_unfreeze_queue(q, memflags); 683 684 return ret; 685 } 686 687 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); 688 #endif 689 690 /* Common attributes for bio-based and request-based queues. */ 691 static struct attribute *queue_attrs[] = { 692 /* 693 * Attributes which are protected with q->limits_lock. 694 */ 695 &queue_max_hw_sectors_entry.attr, 696 &queue_max_sectors_entry.attr, 697 &queue_max_segments_entry.attr, 698 &queue_max_discard_segments_entry.attr, 699 &queue_max_integrity_segments_entry.attr, 700 &queue_max_segment_size_entry.attr, 701 &queue_max_write_streams_entry.attr, 702 &queue_write_stream_granularity_entry.attr, 703 &queue_hw_sector_size_entry.attr, 704 &queue_logical_block_size_entry.attr, 705 &queue_physical_block_size_entry.attr, 706 &queue_chunk_sectors_entry.attr, 707 &queue_io_min_entry.attr, 708 &queue_io_opt_entry.attr, 709 &queue_discard_granularity_entry.attr, 710 &queue_max_discard_sectors_entry.attr, 711 &queue_max_hw_discard_sectors_entry.attr, 712 &queue_atomic_write_max_sectors_entry.attr, 713 &queue_atomic_write_boundary_sectors_entry.attr, 714 &queue_atomic_write_unit_min_entry.attr, 715 &queue_atomic_write_unit_max_entry.attr, 716 &queue_max_write_zeroes_sectors_entry.attr, 717 &queue_max_hw_wzeroes_unmap_sectors_entry.attr, 718 &queue_max_wzeroes_unmap_sectors_entry.attr, 719 &queue_max_zone_append_sectors_entry.attr, 720 &queue_zone_write_granularity_entry.attr, 721 &queue_rotational_entry.attr, 722 &queue_zoned_entry.attr, 723 &queue_max_open_zones_entry.attr, 724 &queue_max_active_zones_entry.attr, 725 &queue_iostats_passthrough_entry.attr, 726 &queue_iostats_entry.attr, 727 &queue_stable_writes_entry.attr, 728 &queue_add_random_entry.attr, 729 &queue_wc_entry.attr, 730 &queue_fua_entry.attr, 731 &queue_dax_entry.attr, 732 &queue_virt_boundary_mask_entry.attr, 733 &queue_dma_alignment_entry.attr, 734 &queue_ra_entry.attr, 735 736 /* 737 * Attributes which don't require locking. 738 */ 739 &queue_discard_zeroes_data_entry.attr, 740 &queue_write_same_max_entry.attr, 741 &queue_nr_zones_entry.attr, 742 &queue_nomerges_entry.attr, 743 &queue_poll_entry.attr, 744 &queue_poll_delay_entry.attr, 745 746 NULL, 747 }; 748 749 /* Request-based queue attributes that are not relevant for bio-based queues. */ 750 static struct attribute *blk_mq_queue_attrs[] = { 751 /* 752 * Attributes which require some form of locking other than 753 * q->sysfs_lock. 754 */ 755 &elv_iosched_entry.attr, 756 &queue_requests_entry.attr, 757 #ifdef CONFIG_BLK_WBT 758 &queue_wb_lat_entry.attr, 759 #endif 760 /* 761 * Attributes which don't require locking. 762 */ 763 &queue_rq_affinity_entry.attr, 764 &queue_io_timeout_entry.attr, 765 766 NULL, 767 }; 768 769 static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, 770 int n) 771 { 772 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 773 struct request_queue *q = disk->queue; 774 775 if ((attr == &queue_max_open_zones_entry.attr || 776 attr == &queue_max_active_zones_entry.attr) && 777 !blk_queue_is_zoned(q)) 778 return 0; 779 780 return attr->mode; 781 } 782 783 static umode_t blk_mq_queue_attr_visible(struct kobject *kobj, 784 struct attribute *attr, int n) 785 { 786 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 787 struct request_queue *q = disk->queue; 788 789 if (!queue_is_mq(q)) 790 return 0; 791 792 if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout) 793 return 0; 794 795 return attr->mode; 796 } 797 798 static struct attribute_group queue_attr_group = { 799 .attrs = queue_attrs, 800 .is_visible = queue_attr_visible, 801 }; 802 803 static struct attribute_group blk_mq_queue_attr_group = { 804 .attrs = blk_mq_queue_attrs, 805 .is_visible = blk_mq_queue_attr_visible, 806 }; 807 808 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) 809 810 static ssize_t 811 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 812 { 813 struct queue_sysfs_entry *entry = to_queue(attr); 814 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 815 816 if (!entry->show && !entry->show_limit) 817 return -EIO; 818 819 if (entry->show_limit) { 820 ssize_t res; 821 822 mutex_lock(&disk->queue->limits_lock); 823 res = entry->show_limit(disk, page); 824 mutex_unlock(&disk->queue->limits_lock); 825 return res; 826 } 827 828 return entry->show(disk, page); 829 } 830 831 static ssize_t 832 queue_attr_store(struct kobject *kobj, struct attribute *attr, 833 const char *page, size_t length) 834 { 835 struct queue_sysfs_entry *entry = to_queue(attr); 836 struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); 837 struct request_queue *q = disk->queue; 838 839 if (!entry->store_limit && !entry->store) 840 return -EIO; 841 842 if (entry->store_limit) { 843 ssize_t res; 844 845 struct queue_limits lim = queue_limits_start_update(q); 846 847 res = entry->store_limit(disk, page, length, &lim); 848 if (res < 0) { 849 queue_limits_cancel_update(q); 850 return res; 851 } 852 853 res = queue_limits_commit_update_frozen(q, &lim); 854 if (res) 855 return res; 856 return length; 857 } 858 859 return entry->store(disk, page, length); 860 } 861 862 static const struct sysfs_ops queue_sysfs_ops = { 863 .show = queue_attr_show, 864 .store = queue_attr_store, 865 }; 866 867 static const struct attribute_group *blk_queue_attr_groups[] = { 868 &queue_attr_group, 869 &blk_mq_queue_attr_group, 870 NULL 871 }; 872 873 static void blk_queue_release(struct kobject *kobj) 874 { 875 /* nothing to do here, all data is associated with the parent gendisk */ 876 } 877 878 const struct kobj_type blk_queue_ktype = { 879 .default_groups = blk_queue_attr_groups, 880 .sysfs_ops = &queue_sysfs_ops, 881 .release = blk_queue_release, 882 }; 883 884 static void blk_debugfs_remove(struct gendisk *disk) 885 { 886 struct request_queue *q = disk->queue; 887 888 mutex_lock(&q->debugfs_mutex); 889 blk_trace_shutdown(q); 890 debugfs_remove_recursive(q->debugfs_dir); 891 q->debugfs_dir = NULL; 892 q->sched_debugfs_dir = NULL; 893 q->rqos_debugfs_dir = NULL; 894 mutex_unlock(&q->debugfs_mutex); 895 } 896 897 /** 898 * blk_register_queue - register a block layer queue with sysfs 899 * @disk: Disk of which the request queue should be registered with sysfs. 900 */ 901 int blk_register_queue(struct gendisk *disk) 902 { 903 struct request_queue *q = disk->queue; 904 int ret; 905 906 ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); 907 if (ret < 0) 908 return ret; 909 910 if (queue_is_mq(q)) { 911 ret = blk_mq_sysfs_register(disk); 912 if (ret) 913 goto out_del_queue_kobj; 914 } 915 mutex_lock(&q->sysfs_lock); 916 917 mutex_lock(&q->debugfs_mutex); 918 q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); 919 if (queue_is_mq(q)) 920 blk_mq_debugfs_register(q); 921 mutex_unlock(&q->debugfs_mutex); 922 923 ret = disk_register_independent_access_ranges(disk); 924 if (ret) 925 goto out_debugfs_remove; 926 927 ret = blk_crypto_sysfs_register(disk); 928 if (ret) 929 goto out_unregister_ia_ranges; 930 931 if (queue_is_mq(q)) 932 elevator_set_default(q); 933 934 blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); 935 wbt_enable_default(disk); 936 937 /* Now everything is ready and send out KOBJ_ADD uevent */ 938 kobject_uevent(&disk->queue_kobj, KOBJ_ADD); 939 if (q->elevator) 940 kobject_uevent(&q->elevator->kobj, KOBJ_ADD); 941 mutex_unlock(&q->sysfs_lock); 942 943 /* 944 * SCSI probing may synchronously create and destroy a lot of 945 * request_queues for non-existent devices. Shutting down a fully 946 * functional queue takes measureable wallclock time as RCU grace 947 * periods are involved. To avoid excessive latency in these 948 * cases, a request_queue starts out in a degraded mode which is 949 * faster to shut down and is made fully functional here as 950 * request_queues for non-existent devices never get registered. 951 */ 952 blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); 953 percpu_ref_switch_to_percpu(&q->q_usage_counter); 954 955 return ret; 956 957 out_unregister_ia_ranges: 958 disk_unregister_independent_access_ranges(disk); 959 out_debugfs_remove: 960 blk_debugfs_remove(disk); 961 mutex_unlock(&q->sysfs_lock); 962 if (queue_is_mq(q)) 963 blk_mq_sysfs_unregister(disk); 964 out_del_queue_kobj: 965 kobject_del(&disk->queue_kobj); 966 return ret; 967 } 968 969 /** 970 * blk_unregister_queue - counterpart of blk_register_queue() 971 * @disk: Disk of which the request queue should be unregistered from sysfs. 972 * 973 * Note: the caller is responsible for guaranteeing that this function is called 974 * after blk_register_queue() has finished. 975 */ 976 void blk_unregister_queue(struct gendisk *disk) 977 { 978 struct request_queue *q = disk->queue; 979 980 if (WARN_ON(!q)) 981 return; 982 983 /* Return early if disk->queue was never registered. */ 984 if (!blk_queue_registered(q)) 985 return; 986 987 /* 988 * Since sysfs_remove_dir() prevents adding new directory entries 989 * before removal of existing entries starts, protect against 990 * concurrent elv_iosched_store() calls. 991 */ 992 mutex_lock(&q->sysfs_lock); 993 blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); 994 mutex_unlock(&q->sysfs_lock); 995 996 /* 997 * Remove the sysfs attributes before unregistering the queue data 998 * structures that can be modified through sysfs. 999 */ 1000 if (queue_is_mq(q)) 1001 blk_mq_sysfs_unregister(disk); 1002 blk_crypto_sysfs_unregister(disk); 1003 1004 mutex_lock(&q->sysfs_lock); 1005 disk_unregister_independent_access_ranges(disk); 1006 mutex_unlock(&q->sysfs_lock); 1007 1008 /* Now that we've deleted all child objects, we can delete the queue. */ 1009 kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); 1010 kobject_del(&disk->queue_kobj); 1011 1012 if (queue_is_mq(q)) 1013 elevator_set_none(q); 1014 1015 blk_debugfs_remove(disk); 1016 } 1017