1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * gendisk handling 4 * 5 * Portions Copyright (C) 2020 Christoph Hellwig 6 */ 7 8 #include <linux/module.h> 9 #include <linux/ctype.h> 10 #include <linux/fs.h> 11 #include <linux/genhd.h> 12 #include <linux/kdev_t.h> 13 #include <linux/kernel.h> 14 #include <linux/blkdev.h> 15 #include <linux/backing-dev.h> 16 #include <linux/init.h> 17 #include <linux/spinlock.h> 18 #include <linux/proc_fs.h> 19 #include <linux/seq_file.h> 20 #include <linux/slab.h> 21 #include <linux/kmod.h> 22 #include <linux/mutex.h> 23 #include <linux/idr.h> 24 #include <linux/log2.h> 25 #include <linux/pm_runtime.h> 26 #include <linux/badblocks.h> 27 28 #include "blk.h" 29 30 static struct kobject *block_depr; 31 32 /* for extended dynamic devt allocation, currently only one major is used */ 33 #define NR_EXT_DEVT (1 << MINORBITS) 34 static DEFINE_IDA(ext_devt_ida); 35 36 static void disk_check_events(struct disk_events *ev, 37 unsigned int *clearing_ptr); 38 static void disk_alloc_events(struct gendisk *disk); 39 static void disk_add_events(struct gendisk *disk); 40 static void disk_del_events(struct gendisk *disk); 41 static void disk_release_events(struct gendisk *disk); 42 43 void set_capacity(struct gendisk *disk, sector_t sectors) 44 { 45 struct block_device *bdev = disk->part0; 46 47 spin_lock(&bdev->bd_size_lock); 48 i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); 49 spin_unlock(&bdev->bd_size_lock); 50 } 51 EXPORT_SYMBOL(set_capacity); 52 53 /* 54 * Set disk capacity and notify if the size is not currently zero and will not 55 * be set to zero. Returns true if a uevent was sent, otherwise false. 56 */ 57 bool set_capacity_and_notify(struct gendisk *disk, sector_t size) 58 { 59 sector_t capacity = get_capacity(disk); 60 char *envp[] = { "RESIZE=1", NULL }; 61 62 set_capacity(disk, size); 63 64 /* 65 * Only print a message and send a uevent if the gendisk is user visible 66 * and alive. This avoids spamming the log and udev when setting the 67 * initial capacity during probing. 68 */ 69 if (size == capacity || 70 (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) 71 return false; 72 73 pr_info("%s: detected capacity change from %lld to %lld\n", 74 disk->disk_name, capacity, size); 75 76 /* 77 * Historically we did not send a uevent for changes to/from an empty 78 * device. 79 */ 80 if (!capacity || !size) 81 return false; 82 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); 83 return true; 84 } 85 EXPORT_SYMBOL_GPL(set_capacity_and_notify); 86 87 /* 88 * Format the device name of the indicated disk into the supplied buffer and 89 * return a pointer to that same buffer for convenience. 90 */ 91 char *disk_name(struct gendisk *hd, int partno, char *buf) 92 { 93 if (!partno) 94 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 95 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 96 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 97 else 98 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 99 100 return buf; 101 } 102 103 const char *bdevname(struct block_device *bdev, char *buf) 104 { 105 return disk_name(bdev->bd_disk, bdev->bd_partno, buf); 106 } 107 EXPORT_SYMBOL(bdevname); 108 109 static void part_stat_read_all(struct block_device *part, 110 struct disk_stats *stat) 111 { 112 int cpu; 113 114 memset(stat, 0, sizeof(struct disk_stats)); 115 for_each_possible_cpu(cpu) { 116 struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu); 117 int group; 118 119 for (group = 0; group < NR_STAT_GROUPS; group++) { 120 stat->nsecs[group] += ptr->nsecs[group]; 121 stat->sectors[group] += ptr->sectors[group]; 122 stat->ios[group] += ptr->ios[group]; 123 stat->merges[group] += ptr->merges[group]; 124 } 125 126 stat->io_ticks += ptr->io_ticks; 127 } 128 } 129 130 static unsigned int part_in_flight(struct block_device *part) 131 { 132 unsigned int inflight = 0; 133 int cpu; 134 135 for_each_possible_cpu(cpu) { 136 inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) + 137 part_stat_local_read_cpu(part, in_flight[1], cpu); 138 } 139 if ((int)inflight < 0) 140 inflight = 0; 141 142 return inflight; 143 } 144 145 static void part_in_flight_rw(struct block_device *part, 146 unsigned int inflight[2]) 147 { 148 int cpu; 149 150 inflight[0] = 0; 151 inflight[1] = 0; 152 for_each_possible_cpu(cpu) { 153 inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu); 154 inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu); 155 } 156 if ((int)inflight[0] < 0) 157 inflight[0] = 0; 158 if ((int)inflight[1] < 0) 159 inflight[1] = 0; 160 } 161 162 /* 163 * Can be deleted altogether. Later. 164 * 165 */ 166 #define BLKDEV_MAJOR_HASH_SIZE 255 167 static struct blk_major_name { 168 struct blk_major_name *next; 169 int major; 170 char name[16]; 171 void (*probe)(dev_t devt); 172 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 173 static DEFINE_MUTEX(major_names_lock); 174 175 /* index in the above - for now: assume no multimajor ranges */ 176 static inline int major_to_index(unsigned major) 177 { 178 return major % BLKDEV_MAJOR_HASH_SIZE; 179 } 180 181 #ifdef CONFIG_PROC_FS 182 void blkdev_show(struct seq_file *seqf, off_t offset) 183 { 184 struct blk_major_name *dp; 185 186 mutex_lock(&major_names_lock); 187 for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) 188 if (dp->major == offset) 189 seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 190 mutex_unlock(&major_names_lock); 191 } 192 #endif /* CONFIG_PROC_FS */ 193 194 /** 195 * __register_blkdev - register a new block device 196 * 197 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If 198 * @major = 0, try to allocate any unused major number. 199 * @name: the name of the new block device as a zero terminated string 200 * @probe: allback that is called on access to any minor number of @major 201 * 202 * The @name must be unique within the system. 203 * 204 * The return value depends on the @major input parameter: 205 * 206 * - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1] 207 * then the function returns zero on success, or a negative error code 208 * - if any unused major number was requested with @major = 0 parameter 209 * then the return value is the allocated major number in range 210 * [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise 211 * 212 * See Documentation/admin-guide/devices.txt for the list of allocated 213 * major numbers. 214 * 215 * Use register_blkdev instead for any new code. 216 */ 217 int __register_blkdev(unsigned int major, const char *name, 218 void (*probe)(dev_t devt)) 219 { 220 struct blk_major_name **n, *p; 221 int index, ret = 0; 222 223 mutex_lock(&major_names_lock); 224 225 /* temporary */ 226 if (major == 0) { 227 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 228 if (major_names[index] == NULL) 229 break; 230 } 231 232 if (index == 0) { 233 printk("%s: failed to get major for %s\n", 234 __func__, name); 235 ret = -EBUSY; 236 goto out; 237 } 238 major = index; 239 ret = major; 240 } 241 242 if (major >= BLKDEV_MAJOR_MAX) { 243 pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n", 244 __func__, major, BLKDEV_MAJOR_MAX-1, name); 245 246 ret = -EINVAL; 247 goto out; 248 } 249 250 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 251 if (p == NULL) { 252 ret = -ENOMEM; 253 goto out; 254 } 255 256 p->major = major; 257 p->probe = probe; 258 strlcpy(p->name, name, sizeof(p->name)); 259 p->next = NULL; 260 index = major_to_index(major); 261 262 for (n = &major_names[index]; *n; n = &(*n)->next) { 263 if ((*n)->major == major) 264 break; 265 } 266 if (!*n) 267 *n = p; 268 else 269 ret = -EBUSY; 270 271 if (ret < 0) { 272 printk("register_blkdev: cannot get major %u for %s\n", 273 major, name); 274 kfree(p); 275 } 276 out: 277 mutex_unlock(&major_names_lock); 278 return ret; 279 } 280 EXPORT_SYMBOL(__register_blkdev); 281 282 void unregister_blkdev(unsigned int major, const char *name) 283 { 284 struct blk_major_name **n; 285 struct blk_major_name *p = NULL; 286 int index = major_to_index(major); 287 288 mutex_lock(&major_names_lock); 289 for (n = &major_names[index]; *n; n = &(*n)->next) 290 if ((*n)->major == major) 291 break; 292 if (!*n || strcmp((*n)->name, name)) { 293 WARN_ON(1); 294 } else { 295 p = *n; 296 *n = p->next; 297 } 298 mutex_unlock(&major_names_lock); 299 kfree(p); 300 } 301 302 EXPORT_SYMBOL(unregister_blkdev); 303 304 /** 305 * blk_mangle_minor - scatter minor numbers apart 306 * @minor: minor number to mangle 307 * 308 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT 309 * is enabled. Mangling twice gives the original value. 310 * 311 * RETURNS: 312 * Mangled value. 313 * 314 * CONTEXT: 315 * Don't care. 316 */ 317 static int blk_mangle_minor(int minor) 318 { 319 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 320 int i; 321 322 for (i = 0; i < MINORBITS / 2; i++) { 323 int low = minor & (1 << i); 324 int high = minor & (1 << (MINORBITS - 1 - i)); 325 int distance = MINORBITS - 1 - 2 * i; 326 327 minor ^= low | high; /* clear both bits */ 328 low <<= distance; /* swap the positions */ 329 high >>= distance; 330 minor |= low | high; /* and set */ 331 } 332 #endif 333 return minor; 334 } 335 336 /** 337 * blk_alloc_devt - allocate a dev_t for a block device 338 * @bdev: block device to allocate dev_t for 339 * @devt: out parameter for resulting dev_t 340 * 341 * Allocate a dev_t for block device. 342 * 343 * RETURNS: 344 * 0 on success, allocated dev_t is returned in *@devt. -errno on 345 * failure. 346 * 347 * CONTEXT: 348 * Might sleep. 349 */ 350 int blk_alloc_devt(struct block_device *bdev, dev_t *devt) 351 { 352 struct gendisk *disk = bdev->bd_disk; 353 int idx; 354 355 /* in consecutive minor range? */ 356 if (bdev->bd_partno < disk->minors) { 357 *devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno); 358 return 0; 359 } 360 361 idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL); 362 if (idx < 0) 363 return idx == -ENOSPC ? -EBUSY : idx; 364 365 *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); 366 return 0; 367 } 368 369 /** 370 * blk_free_devt - free a dev_t 371 * @devt: dev_t to free 372 * 373 * Free @devt which was allocated using blk_alloc_devt(). 374 * 375 * CONTEXT: 376 * Might sleep. 377 */ 378 void blk_free_devt(dev_t devt) 379 { 380 if (MAJOR(devt) == BLOCK_EXT_MAJOR) 381 ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt))); 382 } 383 384 static char *bdevt_str(dev_t devt, char *buf) 385 { 386 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { 387 char tbuf[BDEVT_SIZE]; 388 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); 389 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); 390 } else 391 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 392 393 return buf; 394 } 395 396 void disk_uevent(struct gendisk *disk, enum kobject_action action) 397 { 398 struct block_device *part; 399 unsigned long idx; 400 401 rcu_read_lock(); 402 xa_for_each(&disk->part_tbl, idx, part) { 403 if (bdev_is_partition(part) && !bdev_nr_sectors(part)) 404 continue; 405 if (!bdgrab(part)) 406 continue; 407 408 rcu_read_unlock(); 409 kobject_uevent(bdev_kobj(part), action); 410 bdput(part); 411 rcu_read_lock(); 412 } 413 rcu_read_unlock(); 414 } 415 EXPORT_SYMBOL_GPL(disk_uevent); 416 417 static void disk_scan_partitions(struct gendisk *disk) 418 { 419 struct block_device *bdev; 420 421 if (!get_capacity(disk) || !disk_part_scan_enabled(disk)) 422 return; 423 424 set_bit(GD_NEED_PART_SCAN, &disk->state); 425 bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL); 426 if (!IS_ERR(bdev)) 427 blkdev_put(bdev, FMODE_READ); 428 } 429 430 static void register_disk(struct device *parent, struct gendisk *disk, 431 const struct attribute_group **groups) 432 { 433 struct device *ddev = disk_to_dev(disk); 434 int err; 435 436 ddev->parent = parent; 437 438 dev_set_name(ddev, "%s", disk->disk_name); 439 440 /* delay uevents, until we scanned partition table */ 441 dev_set_uevent_suppress(ddev, 1); 442 443 if (groups) { 444 WARN_ON(ddev->groups); 445 ddev->groups = groups; 446 } 447 if (device_add(ddev)) 448 return; 449 if (!sysfs_deprecated) { 450 err = sysfs_create_link(block_depr, &ddev->kobj, 451 kobject_name(&ddev->kobj)); 452 if (err) { 453 device_del(ddev); 454 return; 455 } 456 } 457 458 /* 459 * avoid probable deadlock caused by allocating memory with 460 * GFP_KERNEL in runtime_resume callback of its all ancestor 461 * devices 462 */ 463 pm_runtime_set_memalloc_noio(ddev, true); 464 465 disk->part0->bd_holder_dir = 466 kobject_create_and_add("holders", &ddev->kobj); 467 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); 468 469 if (disk->flags & GENHD_FL_HIDDEN) 470 return; 471 472 disk_scan_partitions(disk); 473 474 /* announce the disk and partitions after all partitions are created */ 475 dev_set_uevent_suppress(ddev, 0); 476 disk_uevent(disk, KOBJ_ADD); 477 478 if (disk->queue->backing_dev_info->dev) { 479 err = sysfs_create_link(&ddev->kobj, 480 &disk->queue->backing_dev_info->dev->kobj, 481 "bdi"); 482 WARN_ON(err); 483 } 484 } 485 486 /** 487 * __device_add_disk - add disk information to kernel list 488 * @parent: parent device for the disk 489 * @disk: per-device partitioning information 490 * @groups: Additional per-device sysfs groups 491 * @register_queue: register the queue if set to true 492 * 493 * This function registers the partitioning information in @disk 494 * with the kernel. 495 * 496 * FIXME: error handling 497 */ 498 static void __device_add_disk(struct device *parent, struct gendisk *disk, 499 const struct attribute_group **groups, 500 bool register_queue) 501 { 502 dev_t devt; 503 int retval; 504 505 /* 506 * The disk queue should now be all set with enough information about 507 * the device for the elevator code to pick an adequate default 508 * elevator if one is needed, that is, for devices requesting queue 509 * registration. 510 */ 511 if (register_queue) 512 elevator_init_mq(disk->queue); 513 514 /* minors == 0 indicates to use ext devt from part0 and should 515 * be accompanied with EXT_DEVT flag. Make sure all 516 * parameters make sense. 517 */ 518 WARN_ON(disk->minors && !(disk->major || disk->first_minor)); 519 WARN_ON(!disk->minors && 520 !(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN))); 521 522 disk->flags |= GENHD_FL_UP; 523 524 retval = blk_alloc_devt(disk->part0, &devt); 525 if (retval) { 526 WARN_ON(1); 527 return; 528 } 529 disk->major = MAJOR(devt); 530 disk->first_minor = MINOR(devt); 531 532 disk_alloc_events(disk); 533 534 if (disk->flags & GENHD_FL_HIDDEN) { 535 /* 536 * Don't let hidden disks show up in /proc/partitions, 537 * and don't bother scanning for partitions either. 538 */ 539 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; 540 disk->flags |= GENHD_FL_NO_PART_SCAN; 541 } else { 542 struct backing_dev_info *bdi = disk->queue->backing_dev_info; 543 struct device *dev = disk_to_dev(disk); 544 int ret; 545 546 /* Register BDI before referencing it from bdev */ 547 dev->devt = devt; 548 ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); 549 WARN_ON(ret); 550 bdi_set_owner(bdi, dev); 551 bdev_add(disk->part0, devt); 552 } 553 register_disk(parent, disk, groups); 554 if (register_queue) 555 blk_register_queue(disk); 556 557 /* 558 * Take an extra ref on queue which will be put on disk_release() 559 * so that it sticks around as long as @disk is there. 560 */ 561 WARN_ON_ONCE(!blk_get_queue(disk->queue)); 562 563 disk_add_events(disk); 564 blk_integrity_add(disk); 565 } 566 567 void device_add_disk(struct device *parent, struct gendisk *disk, 568 const struct attribute_group **groups) 569 570 { 571 __device_add_disk(parent, disk, groups, true); 572 } 573 EXPORT_SYMBOL(device_add_disk); 574 575 void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) 576 { 577 __device_add_disk(parent, disk, NULL, false); 578 } 579 EXPORT_SYMBOL(device_add_disk_no_queue_reg); 580 581 /** 582 * del_gendisk - remove the gendisk 583 * @disk: the struct gendisk to remove 584 * 585 * Removes the gendisk and all its associated resources. This deletes the 586 * partitions associated with the gendisk, and unregisters the associated 587 * request_queue. 588 * 589 * This is the counter to the respective __device_add_disk() call. 590 * 591 * The final removal of the struct gendisk happens when its refcount reaches 0 592 * with put_disk(), which should be called after del_gendisk(), if 593 * __device_add_disk() was used. 594 * 595 * Drivers exist which depend on the release of the gendisk to be synchronous, 596 * it should not be deferred. 597 * 598 * Context: can sleep 599 */ 600 void del_gendisk(struct gendisk *disk) 601 { 602 might_sleep(); 603 604 if (WARN_ON_ONCE(!disk->queue)) 605 return; 606 607 blk_integrity_del(disk); 608 disk_del_events(disk); 609 610 mutex_lock(&disk->part0->bd_mutex); 611 disk->flags &= ~GENHD_FL_UP; 612 blk_drop_partitions(disk); 613 mutex_unlock(&disk->part0->bd_mutex); 614 615 fsync_bdev(disk->part0); 616 __invalidate_device(disk->part0, true); 617 618 /* 619 * Unhash the bdev inode for this device so that it can't be looked 620 * up any more even if openers still hold references to it. 621 */ 622 remove_inode_hash(disk->part0->bd_inode); 623 624 set_capacity(disk, 0); 625 626 if (!(disk->flags & GENHD_FL_HIDDEN)) { 627 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 628 629 /* 630 * Unregister bdi before releasing device numbers (as they can 631 * get reused and we'd get clashes in sysfs). 632 */ 633 bdi_unregister(disk->queue->backing_dev_info); 634 } 635 636 blk_unregister_queue(disk); 637 638 kobject_put(disk->part0->bd_holder_dir); 639 kobject_put(disk->slave_dir); 640 641 part_stat_set_all(disk->part0, 0); 642 disk->part0->bd_stamp = 0; 643 if (!sysfs_deprecated) 644 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 645 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); 646 device_del(disk_to_dev(disk)); 647 } 648 EXPORT_SYMBOL(del_gendisk); 649 650 /* sysfs access to bad-blocks list. */ 651 static ssize_t disk_badblocks_show(struct device *dev, 652 struct device_attribute *attr, 653 char *page) 654 { 655 struct gendisk *disk = dev_to_disk(dev); 656 657 if (!disk->bb) 658 return sprintf(page, "\n"); 659 660 return badblocks_show(disk->bb, page, 0); 661 } 662 663 static ssize_t disk_badblocks_store(struct device *dev, 664 struct device_attribute *attr, 665 const char *page, size_t len) 666 { 667 struct gendisk *disk = dev_to_disk(dev); 668 669 if (!disk->bb) 670 return -ENXIO; 671 672 return badblocks_store(disk->bb, page, len, 0); 673 } 674 675 void blk_request_module(dev_t devt) 676 { 677 unsigned int major = MAJOR(devt); 678 struct blk_major_name **n; 679 680 mutex_lock(&major_names_lock); 681 for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) { 682 if ((*n)->major == major && (*n)->probe) { 683 (*n)->probe(devt); 684 mutex_unlock(&major_names_lock); 685 return; 686 } 687 } 688 mutex_unlock(&major_names_lock); 689 690 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 691 /* Make old-style 2.4 aliases work */ 692 request_module("block-major-%d", MAJOR(devt)); 693 } 694 695 /** 696 * bdget_disk - do bdget() by gendisk and partition number 697 * @disk: gendisk of interest 698 * @partno: partition number 699 * 700 * Find partition @partno from @disk, do bdget() on it. 701 * 702 * CONTEXT: 703 * Don't care. 704 * 705 * RETURNS: 706 * Resulting block_device on success, NULL on failure. 707 */ 708 struct block_device *bdget_disk(struct gendisk *disk, int partno) 709 { 710 struct block_device *bdev = NULL; 711 712 rcu_read_lock(); 713 bdev = xa_load(&disk->part_tbl, partno); 714 if (bdev && !bdgrab(bdev)) 715 bdev = NULL; 716 rcu_read_unlock(); 717 718 return bdev; 719 } 720 721 /* 722 * print a full list of all partitions - intended for places where the root 723 * filesystem can't be mounted and thus to give the victim some idea of what 724 * went wrong 725 */ 726 void __init printk_all_partitions(void) 727 { 728 struct class_dev_iter iter; 729 struct device *dev; 730 731 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 732 while ((dev = class_dev_iter_next(&iter))) { 733 struct gendisk *disk = dev_to_disk(dev); 734 struct block_device *part; 735 char name_buf[BDEVNAME_SIZE]; 736 char devt_buf[BDEVT_SIZE]; 737 unsigned long idx; 738 739 /* 740 * Don't show empty devices or things that have been 741 * suppressed 742 */ 743 if (get_capacity(disk) == 0 || 744 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 745 continue; 746 747 /* 748 * Note, unlike /proc/partitions, I am showing the numbers in 749 * hex - the same format as the root= option takes. 750 */ 751 rcu_read_lock(); 752 xa_for_each(&disk->part_tbl, idx, part) { 753 if (!bdev_nr_sectors(part)) 754 continue; 755 printk("%s%s %10llu %s %s", 756 bdev_is_partition(part) ? " " : "", 757 bdevt_str(part->bd_dev, devt_buf), 758 bdev_nr_sectors(part) >> 1, 759 disk_name(disk, part->bd_partno, name_buf), 760 part->bd_meta_info ? 761 part->bd_meta_info->uuid : ""); 762 if (bdev_is_partition(part)) 763 printk("\n"); 764 else if (dev->parent && dev->parent->driver) 765 printk(" driver: %s\n", 766 dev->parent->driver->name); 767 else 768 printk(" (driver?)\n"); 769 } 770 rcu_read_unlock(); 771 } 772 class_dev_iter_exit(&iter); 773 } 774 775 #ifdef CONFIG_PROC_FS 776 /* iterator */ 777 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) 778 { 779 loff_t skip = *pos; 780 struct class_dev_iter *iter; 781 struct device *dev; 782 783 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 784 if (!iter) 785 return ERR_PTR(-ENOMEM); 786 787 seqf->private = iter; 788 class_dev_iter_init(iter, &block_class, NULL, &disk_type); 789 do { 790 dev = class_dev_iter_next(iter); 791 if (!dev) 792 return NULL; 793 } while (skip--); 794 795 return dev_to_disk(dev); 796 } 797 798 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) 799 { 800 struct device *dev; 801 802 (*pos)++; 803 dev = class_dev_iter_next(seqf->private); 804 if (dev) 805 return dev_to_disk(dev); 806 807 return NULL; 808 } 809 810 static void disk_seqf_stop(struct seq_file *seqf, void *v) 811 { 812 struct class_dev_iter *iter = seqf->private; 813 814 /* stop is called even after start failed :-( */ 815 if (iter) { 816 class_dev_iter_exit(iter); 817 kfree(iter); 818 seqf->private = NULL; 819 } 820 } 821 822 static void *show_partition_start(struct seq_file *seqf, loff_t *pos) 823 { 824 void *p; 825 826 p = disk_seqf_start(seqf, pos); 827 if (!IS_ERR_OR_NULL(p) && !*pos) 828 seq_puts(seqf, "major minor #blocks name\n\n"); 829 return p; 830 } 831 832 static int show_partition(struct seq_file *seqf, void *v) 833 { 834 struct gendisk *sgp = v; 835 struct block_device *part; 836 unsigned long idx; 837 char buf[BDEVNAME_SIZE]; 838 839 /* Don't show non-partitionable removeable devices or empty devices */ 840 if (!get_capacity(sgp) || (!disk_max_parts(sgp) && 841 (sgp->flags & GENHD_FL_REMOVABLE))) 842 return 0; 843 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 844 return 0; 845 846 rcu_read_lock(); 847 xa_for_each(&sgp->part_tbl, idx, part) { 848 if (!bdev_nr_sectors(part)) 849 continue; 850 seq_printf(seqf, "%4d %7d %10llu %s\n", 851 MAJOR(part->bd_dev), MINOR(part->bd_dev), 852 bdev_nr_sectors(part) >> 1, 853 disk_name(sgp, part->bd_partno, buf)); 854 } 855 rcu_read_unlock(); 856 return 0; 857 } 858 859 static const struct seq_operations partitions_op = { 860 .start = show_partition_start, 861 .next = disk_seqf_next, 862 .stop = disk_seqf_stop, 863 .show = show_partition 864 }; 865 #endif 866 867 static int __init genhd_device_init(void) 868 { 869 int error; 870 871 block_class.dev_kobj = sysfs_dev_block_kobj; 872 error = class_register(&block_class); 873 if (unlikely(error)) 874 return error; 875 blk_dev_init(); 876 877 register_blkdev(BLOCK_EXT_MAJOR, "blkext"); 878 879 /* create top-level block dir */ 880 if (!sysfs_deprecated) 881 block_depr = kobject_create_and_add("block", NULL); 882 return 0; 883 } 884 885 subsys_initcall(genhd_device_init); 886 887 static ssize_t disk_range_show(struct device *dev, 888 struct device_attribute *attr, char *buf) 889 { 890 struct gendisk *disk = dev_to_disk(dev); 891 892 return sprintf(buf, "%d\n", disk->minors); 893 } 894 895 static ssize_t disk_ext_range_show(struct device *dev, 896 struct device_attribute *attr, char *buf) 897 { 898 struct gendisk *disk = dev_to_disk(dev); 899 900 return sprintf(buf, "%d\n", disk_max_parts(disk)); 901 } 902 903 static ssize_t disk_removable_show(struct device *dev, 904 struct device_attribute *attr, char *buf) 905 { 906 struct gendisk *disk = dev_to_disk(dev); 907 908 return sprintf(buf, "%d\n", 909 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 910 } 911 912 static ssize_t disk_hidden_show(struct device *dev, 913 struct device_attribute *attr, char *buf) 914 { 915 struct gendisk *disk = dev_to_disk(dev); 916 917 return sprintf(buf, "%d\n", 918 (disk->flags & GENHD_FL_HIDDEN ? 1 : 0)); 919 } 920 921 static ssize_t disk_ro_show(struct device *dev, 922 struct device_attribute *attr, char *buf) 923 { 924 struct gendisk *disk = dev_to_disk(dev); 925 926 return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); 927 } 928 929 ssize_t part_size_show(struct device *dev, 930 struct device_attribute *attr, char *buf) 931 { 932 return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); 933 } 934 935 ssize_t part_stat_show(struct device *dev, 936 struct device_attribute *attr, char *buf) 937 { 938 struct block_device *bdev = dev_to_bdev(dev); 939 struct request_queue *q = bdev->bd_disk->queue; 940 struct disk_stats stat; 941 unsigned int inflight; 942 943 part_stat_read_all(bdev, &stat); 944 if (queue_is_mq(q)) 945 inflight = blk_mq_in_flight(q, bdev); 946 else 947 inflight = part_in_flight(bdev); 948 949 return sprintf(buf, 950 "%8lu %8lu %8llu %8u " 951 "%8lu %8lu %8llu %8u " 952 "%8u %8u %8u " 953 "%8lu %8lu %8llu %8u " 954 "%8lu %8u" 955 "\n", 956 stat.ios[STAT_READ], 957 stat.merges[STAT_READ], 958 (unsigned long long)stat.sectors[STAT_READ], 959 (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC), 960 stat.ios[STAT_WRITE], 961 stat.merges[STAT_WRITE], 962 (unsigned long long)stat.sectors[STAT_WRITE], 963 (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC), 964 inflight, 965 jiffies_to_msecs(stat.io_ticks), 966 (unsigned int)div_u64(stat.nsecs[STAT_READ] + 967 stat.nsecs[STAT_WRITE] + 968 stat.nsecs[STAT_DISCARD] + 969 stat.nsecs[STAT_FLUSH], 970 NSEC_PER_MSEC), 971 stat.ios[STAT_DISCARD], 972 stat.merges[STAT_DISCARD], 973 (unsigned long long)stat.sectors[STAT_DISCARD], 974 (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC), 975 stat.ios[STAT_FLUSH], 976 (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); 977 } 978 979 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, 980 char *buf) 981 { 982 struct block_device *bdev = dev_to_bdev(dev); 983 struct request_queue *q = bdev->bd_disk->queue; 984 unsigned int inflight[2]; 985 986 if (queue_is_mq(q)) 987 blk_mq_in_flight_rw(q, bdev, inflight); 988 else 989 part_in_flight_rw(bdev, inflight); 990 991 return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); 992 } 993 994 static ssize_t disk_capability_show(struct device *dev, 995 struct device_attribute *attr, char *buf) 996 { 997 struct gendisk *disk = dev_to_disk(dev); 998 999 return sprintf(buf, "%x\n", disk->flags); 1000 } 1001 1002 static ssize_t disk_alignment_offset_show(struct device *dev, 1003 struct device_attribute *attr, 1004 char *buf) 1005 { 1006 struct gendisk *disk = dev_to_disk(dev); 1007 1008 return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); 1009 } 1010 1011 static ssize_t disk_discard_alignment_show(struct device *dev, 1012 struct device_attribute *attr, 1013 char *buf) 1014 { 1015 struct gendisk *disk = dev_to_disk(dev); 1016 1017 return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue)); 1018 } 1019 1020 static DEVICE_ATTR(range, 0444, disk_range_show, NULL); 1021 static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL); 1022 static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL); 1023 static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL); 1024 static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL); 1025 static DEVICE_ATTR(size, 0444, part_size_show, NULL); 1026 static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL); 1027 static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL); 1028 static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL); 1029 static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); 1030 static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); 1031 static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); 1032 1033 #ifdef CONFIG_FAIL_MAKE_REQUEST 1034 ssize_t part_fail_show(struct device *dev, 1035 struct device_attribute *attr, char *buf) 1036 { 1037 return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail); 1038 } 1039 1040 ssize_t part_fail_store(struct device *dev, 1041 struct device_attribute *attr, 1042 const char *buf, size_t count) 1043 { 1044 int i; 1045 1046 if (count > 0 && sscanf(buf, "%d", &i) > 0) 1047 dev_to_bdev(dev)->bd_make_it_fail = i; 1048 1049 return count; 1050 } 1051 1052 static struct device_attribute dev_attr_fail = 1053 __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); 1054 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 1055 1056 #ifdef CONFIG_FAIL_IO_TIMEOUT 1057 static struct device_attribute dev_attr_fail_timeout = 1058 __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store); 1059 #endif 1060 1061 static struct attribute *disk_attrs[] = { 1062 &dev_attr_range.attr, 1063 &dev_attr_ext_range.attr, 1064 &dev_attr_removable.attr, 1065 &dev_attr_hidden.attr, 1066 &dev_attr_ro.attr, 1067 &dev_attr_size.attr, 1068 &dev_attr_alignment_offset.attr, 1069 &dev_attr_discard_alignment.attr, 1070 &dev_attr_capability.attr, 1071 &dev_attr_stat.attr, 1072 &dev_attr_inflight.attr, 1073 &dev_attr_badblocks.attr, 1074 #ifdef CONFIG_FAIL_MAKE_REQUEST 1075 &dev_attr_fail.attr, 1076 #endif 1077 #ifdef CONFIG_FAIL_IO_TIMEOUT 1078 &dev_attr_fail_timeout.attr, 1079 #endif 1080 NULL 1081 }; 1082 1083 static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n) 1084 { 1085 struct device *dev = container_of(kobj, typeof(*dev), kobj); 1086 struct gendisk *disk = dev_to_disk(dev); 1087 1088 if (a == &dev_attr_badblocks.attr && !disk->bb) 1089 return 0; 1090 return a->mode; 1091 } 1092 1093 static struct attribute_group disk_attr_group = { 1094 .attrs = disk_attrs, 1095 .is_visible = disk_visible, 1096 }; 1097 1098 static const struct attribute_group *disk_attr_groups[] = { 1099 &disk_attr_group, 1100 NULL 1101 }; 1102 1103 /** 1104 * disk_release - releases all allocated resources of the gendisk 1105 * @dev: the device representing this disk 1106 * 1107 * This function releases all allocated resources of the gendisk. 1108 * 1109 * Drivers which used __device_add_disk() have a gendisk with a request_queue 1110 * assigned. Since the request_queue sits on top of the gendisk for these 1111 * drivers we also call blk_put_queue() for them, and we expect the 1112 * request_queue refcount to reach 0 at this point, and so the request_queue 1113 * will also be freed prior to the disk. 1114 * 1115 * Context: can sleep 1116 */ 1117 static void disk_release(struct device *dev) 1118 { 1119 struct gendisk *disk = dev_to_disk(dev); 1120 1121 might_sleep(); 1122 1123 blk_free_devt(dev->devt); 1124 disk_release_events(disk); 1125 kfree(disk->random); 1126 xa_destroy(&disk->part_tbl); 1127 bdput(disk->part0); 1128 if (disk->queue) 1129 blk_put_queue(disk->queue); 1130 kfree(disk); 1131 } 1132 struct class block_class = { 1133 .name = "block", 1134 }; 1135 1136 static char *block_devnode(struct device *dev, umode_t *mode, 1137 kuid_t *uid, kgid_t *gid) 1138 { 1139 struct gendisk *disk = dev_to_disk(dev); 1140 1141 if (disk->fops->devnode) 1142 return disk->fops->devnode(disk, mode); 1143 return NULL; 1144 } 1145 1146 const struct device_type disk_type = { 1147 .name = "disk", 1148 .groups = disk_attr_groups, 1149 .release = disk_release, 1150 .devnode = block_devnode, 1151 }; 1152 1153 #ifdef CONFIG_PROC_FS 1154 /* 1155 * aggregate disk stat collector. Uses the same stats that the sysfs 1156 * entries do, above, but makes them available through one seq_file. 1157 * 1158 * The output looks suspiciously like /proc/partitions with a bunch of 1159 * extra fields. 1160 */ 1161 static int diskstats_show(struct seq_file *seqf, void *v) 1162 { 1163 struct gendisk *gp = v; 1164 struct block_device *hd; 1165 char buf[BDEVNAME_SIZE]; 1166 unsigned int inflight; 1167 struct disk_stats stat; 1168 unsigned long idx; 1169 1170 /* 1171 if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) 1172 seq_puts(seqf, "major minor name" 1173 " rio rmerge rsect ruse wio wmerge " 1174 "wsect wuse running use aveq" 1175 "\n\n"); 1176 */ 1177 1178 rcu_read_lock(); 1179 xa_for_each(&gp->part_tbl, idx, hd) { 1180 if (bdev_is_partition(hd) && !bdev_nr_sectors(hd)) 1181 continue; 1182 part_stat_read_all(hd, &stat); 1183 if (queue_is_mq(gp->queue)) 1184 inflight = blk_mq_in_flight(gp->queue, hd); 1185 else 1186 inflight = part_in_flight(hd); 1187 1188 seq_printf(seqf, "%4d %7d %s " 1189 "%lu %lu %lu %u " 1190 "%lu %lu %lu %u " 1191 "%u %u %u " 1192 "%lu %lu %lu %u " 1193 "%lu %u" 1194 "\n", 1195 MAJOR(hd->bd_dev), MINOR(hd->bd_dev), 1196 disk_name(gp, hd->bd_partno, buf), 1197 stat.ios[STAT_READ], 1198 stat.merges[STAT_READ], 1199 stat.sectors[STAT_READ], 1200 (unsigned int)div_u64(stat.nsecs[STAT_READ], 1201 NSEC_PER_MSEC), 1202 stat.ios[STAT_WRITE], 1203 stat.merges[STAT_WRITE], 1204 stat.sectors[STAT_WRITE], 1205 (unsigned int)div_u64(stat.nsecs[STAT_WRITE], 1206 NSEC_PER_MSEC), 1207 inflight, 1208 jiffies_to_msecs(stat.io_ticks), 1209 (unsigned int)div_u64(stat.nsecs[STAT_READ] + 1210 stat.nsecs[STAT_WRITE] + 1211 stat.nsecs[STAT_DISCARD] + 1212 stat.nsecs[STAT_FLUSH], 1213 NSEC_PER_MSEC), 1214 stat.ios[STAT_DISCARD], 1215 stat.merges[STAT_DISCARD], 1216 stat.sectors[STAT_DISCARD], 1217 (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], 1218 NSEC_PER_MSEC), 1219 stat.ios[STAT_FLUSH], 1220 (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], 1221 NSEC_PER_MSEC) 1222 ); 1223 } 1224 rcu_read_unlock(); 1225 1226 return 0; 1227 } 1228 1229 static const struct seq_operations diskstats_op = { 1230 .start = disk_seqf_start, 1231 .next = disk_seqf_next, 1232 .stop = disk_seqf_stop, 1233 .show = diskstats_show 1234 }; 1235 1236 static int __init proc_genhd_init(void) 1237 { 1238 proc_create_seq("diskstats", 0, NULL, &diskstats_op); 1239 proc_create_seq("partitions", 0, NULL, &partitions_op); 1240 return 0; 1241 } 1242 module_init(proc_genhd_init); 1243 #endif /* CONFIG_PROC_FS */ 1244 1245 dev_t blk_lookup_devt(const char *name, int partno) 1246 { 1247 dev_t devt = MKDEV(0, 0); 1248 struct class_dev_iter iter; 1249 struct device *dev; 1250 1251 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1252 while ((dev = class_dev_iter_next(&iter))) { 1253 struct gendisk *disk = dev_to_disk(dev); 1254 struct block_device *part; 1255 1256 if (strcmp(dev_name(dev), name)) 1257 continue; 1258 1259 if (partno < disk->minors) { 1260 /* We need to return the right devno, even 1261 * if the partition doesn't exist yet. 1262 */ 1263 devt = MKDEV(MAJOR(dev->devt), 1264 MINOR(dev->devt) + partno); 1265 break; 1266 } 1267 part = bdget_disk(disk, partno); 1268 if (part) { 1269 devt = part->bd_dev; 1270 bdput(part); 1271 break; 1272 } 1273 } 1274 class_dev_iter_exit(&iter); 1275 return devt; 1276 } 1277 1278 struct gendisk *__alloc_disk_node(int minors, int node_id) 1279 { 1280 struct gendisk *disk; 1281 1282 if (minors > DISK_MAX_PARTS) { 1283 printk(KERN_ERR 1284 "block: can't allocate more than %d partitions\n", 1285 DISK_MAX_PARTS); 1286 minors = DISK_MAX_PARTS; 1287 } 1288 1289 disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); 1290 if (!disk) 1291 return NULL; 1292 1293 disk->part0 = bdev_alloc(disk, 0); 1294 if (!disk->part0) 1295 goto out_free_disk; 1296 1297 disk->node_id = node_id; 1298 xa_init(&disk->part_tbl); 1299 if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL)) 1300 goto out_destroy_part_tbl; 1301 1302 disk->minors = minors; 1303 rand_initialize_disk(disk); 1304 disk_to_dev(disk)->class = &block_class; 1305 disk_to_dev(disk)->type = &disk_type; 1306 device_initialize(disk_to_dev(disk)); 1307 return disk; 1308 1309 out_destroy_part_tbl: 1310 xa_destroy(&disk->part_tbl); 1311 bdput(disk->part0); 1312 out_free_disk: 1313 kfree(disk); 1314 return NULL; 1315 } 1316 EXPORT_SYMBOL(__alloc_disk_node); 1317 1318 /** 1319 * put_disk - decrements the gendisk refcount 1320 * @disk: the struct gendisk to decrement the refcount for 1321 * 1322 * This decrements the refcount for the struct gendisk. When this reaches 0 1323 * we'll have disk_release() called. 1324 * 1325 * Context: Any context, but the last reference must not be dropped from 1326 * atomic context. 1327 */ 1328 void put_disk(struct gendisk *disk) 1329 { 1330 if (disk) 1331 put_device(disk_to_dev(disk)); 1332 } 1333 EXPORT_SYMBOL(put_disk); 1334 1335 static void set_disk_ro_uevent(struct gendisk *gd, int ro) 1336 { 1337 char event[] = "DISK_RO=1"; 1338 char *envp[] = { event, NULL }; 1339 1340 if (!ro) 1341 event[8] = '0'; 1342 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1343 } 1344 1345 /** 1346 * set_disk_ro - set a gendisk read-only 1347 * @disk: gendisk to operate on 1348 * @read_only: %true to set the disk read-only, %false set the disk read/write 1349 * 1350 * This function is used to indicate whether a given disk device should have its 1351 * read-only flag set. set_disk_ro() is typically used by device drivers to 1352 * indicate whether the underlying physical device is write-protected. 1353 */ 1354 void set_disk_ro(struct gendisk *disk, bool read_only) 1355 { 1356 if (read_only) { 1357 if (test_and_set_bit(GD_READ_ONLY, &disk->state)) 1358 return; 1359 } else { 1360 if (!test_and_clear_bit(GD_READ_ONLY, &disk->state)) 1361 return; 1362 } 1363 set_disk_ro_uevent(disk, read_only); 1364 } 1365 EXPORT_SYMBOL(set_disk_ro); 1366 1367 int bdev_read_only(struct block_device *bdev) 1368 { 1369 return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); 1370 } 1371 EXPORT_SYMBOL(bdev_read_only); 1372 1373 /* 1374 * Disk events - monitor disk events like media change and eject request. 1375 */ 1376 struct disk_events { 1377 struct list_head node; /* all disk_event's */ 1378 struct gendisk *disk; /* the associated disk */ 1379 spinlock_t lock; 1380 1381 struct mutex block_mutex; /* protects blocking */ 1382 int block; /* event blocking depth */ 1383 unsigned int pending; /* events already sent out */ 1384 unsigned int clearing; /* events being cleared */ 1385 1386 long poll_msecs; /* interval, -1 for default */ 1387 struct delayed_work dwork; 1388 }; 1389 1390 static const char *disk_events_strs[] = { 1391 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", 1392 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", 1393 }; 1394 1395 static char *disk_uevents[] = { 1396 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", 1397 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", 1398 }; 1399 1400 /* list of all disk_events */ 1401 static DEFINE_MUTEX(disk_events_mutex); 1402 static LIST_HEAD(disk_events); 1403 1404 /* disable in-kernel polling by default */ 1405 static unsigned long disk_events_dfl_poll_msecs; 1406 1407 static unsigned long disk_events_poll_jiffies(struct gendisk *disk) 1408 { 1409 struct disk_events *ev = disk->ev; 1410 long intv_msecs = 0; 1411 1412 /* 1413 * If device-specific poll interval is set, always use it. If 1414 * the default is being used, poll if the POLL flag is set. 1415 */ 1416 if (ev->poll_msecs >= 0) 1417 intv_msecs = ev->poll_msecs; 1418 else if (disk->event_flags & DISK_EVENT_FLAG_POLL) 1419 intv_msecs = disk_events_dfl_poll_msecs; 1420 1421 return msecs_to_jiffies(intv_msecs); 1422 } 1423 1424 /** 1425 * disk_block_events - block and flush disk event checking 1426 * @disk: disk to block events for 1427 * 1428 * On return from this function, it is guaranteed that event checking 1429 * isn't in progress and won't happen until unblocked by 1430 * disk_unblock_events(). Events blocking is counted and the actual 1431 * unblocking happens after the matching number of unblocks are done. 1432 * 1433 * Note that this intentionally does not block event checking from 1434 * disk_clear_events(). 1435 * 1436 * CONTEXT: 1437 * Might sleep. 1438 */ 1439 void disk_block_events(struct gendisk *disk) 1440 { 1441 struct disk_events *ev = disk->ev; 1442 unsigned long flags; 1443 bool cancel; 1444 1445 if (!ev) 1446 return; 1447 1448 /* 1449 * Outer mutex ensures that the first blocker completes canceling 1450 * the event work before further blockers are allowed to finish. 1451 */ 1452 mutex_lock(&ev->block_mutex); 1453 1454 spin_lock_irqsave(&ev->lock, flags); 1455 cancel = !ev->block++; 1456 spin_unlock_irqrestore(&ev->lock, flags); 1457 1458 if (cancel) 1459 cancel_delayed_work_sync(&disk->ev->dwork); 1460 1461 mutex_unlock(&ev->block_mutex); 1462 } 1463 1464 static void __disk_unblock_events(struct gendisk *disk, bool check_now) 1465 { 1466 struct disk_events *ev = disk->ev; 1467 unsigned long intv; 1468 unsigned long flags; 1469 1470 spin_lock_irqsave(&ev->lock, flags); 1471 1472 if (WARN_ON_ONCE(ev->block <= 0)) 1473 goto out_unlock; 1474 1475 if (--ev->block) 1476 goto out_unlock; 1477 1478 intv = disk_events_poll_jiffies(disk); 1479 if (check_now) 1480 queue_delayed_work(system_freezable_power_efficient_wq, 1481 &ev->dwork, 0); 1482 else if (intv) 1483 queue_delayed_work(system_freezable_power_efficient_wq, 1484 &ev->dwork, intv); 1485 out_unlock: 1486 spin_unlock_irqrestore(&ev->lock, flags); 1487 } 1488 1489 /** 1490 * disk_unblock_events - unblock disk event checking 1491 * @disk: disk to unblock events for 1492 * 1493 * Undo disk_block_events(). When the block count reaches zero, it 1494 * starts events polling if configured. 1495 * 1496 * CONTEXT: 1497 * Don't care. Safe to call from irq context. 1498 */ 1499 void disk_unblock_events(struct gendisk *disk) 1500 { 1501 if (disk->ev) 1502 __disk_unblock_events(disk, false); 1503 } 1504 1505 /** 1506 * disk_flush_events - schedule immediate event checking and flushing 1507 * @disk: disk to check and flush events for 1508 * @mask: events to flush 1509 * 1510 * Schedule immediate event checking on @disk if not blocked. Events in 1511 * @mask are scheduled to be cleared from the driver. Note that this 1512 * doesn't clear the events from @disk->ev. 1513 * 1514 * CONTEXT: 1515 * If @mask is non-zero must be called with bdev->bd_mutex held. 1516 */ 1517 void disk_flush_events(struct gendisk *disk, unsigned int mask) 1518 { 1519 struct disk_events *ev = disk->ev; 1520 1521 if (!ev) 1522 return; 1523 1524 spin_lock_irq(&ev->lock); 1525 ev->clearing |= mask; 1526 if (!ev->block) 1527 mod_delayed_work(system_freezable_power_efficient_wq, 1528 &ev->dwork, 0); 1529 spin_unlock_irq(&ev->lock); 1530 } 1531 1532 /** 1533 * disk_clear_events - synchronously check, clear and return pending events 1534 * @disk: disk to fetch and clear events from 1535 * @mask: mask of events to be fetched and cleared 1536 * 1537 * Disk events are synchronously checked and pending events in @mask 1538 * are cleared and returned. This ignores the block count. 1539 * 1540 * CONTEXT: 1541 * Might sleep. 1542 */ 1543 static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) 1544 { 1545 struct disk_events *ev = disk->ev; 1546 unsigned int pending; 1547 unsigned int clearing = mask; 1548 1549 if (!ev) 1550 return 0; 1551 1552 disk_block_events(disk); 1553 1554 /* 1555 * store the union of mask and ev->clearing on the stack so that the 1556 * race with disk_flush_events does not cause ambiguity (ev->clearing 1557 * can still be modified even if events are blocked). 1558 */ 1559 spin_lock_irq(&ev->lock); 1560 clearing |= ev->clearing; 1561 ev->clearing = 0; 1562 spin_unlock_irq(&ev->lock); 1563 1564 disk_check_events(ev, &clearing); 1565 /* 1566 * if ev->clearing is not 0, the disk_flush_events got called in the 1567 * middle of this function, so we want to run the workfn without delay. 1568 */ 1569 __disk_unblock_events(disk, ev->clearing ? true : false); 1570 1571 /* then, fetch and clear pending events */ 1572 spin_lock_irq(&ev->lock); 1573 pending = ev->pending & mask; 1574 ev->pending &= ~mask; 1575 spin_unlock_irq(&ev->lock); 1576 WARN_ON_ONCE(clearing & mask); 1577 1578 return pending; 1579 } 1580 1581 /** 1582 * bdev_check_media_change - check if a removable media has been changed 1583 * @bdev: block device to check 1584 * 1585 * Check whether a removable media has been changed, and attempt to free all 1586 * dentries and inodes and invalidates all block device page cache entries in 1587 * that case. 1588 * 1589 * Returns %true if the block device changed, or %false if not. 1590 */ 1591 bool bdev_check_media_change(struct block_device *bdev) 1592 { 1593 unsigned int events; 1594 1595 events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE | 1596 DISK_EVENT_EJECT_REQUEST); 1597 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1598 return false; 1599 1600 if (__invalidate_device(bdev, true)) 1601 pr_warn("VFS: busy inodes on changed media %s\n", 1602 bdev->bd_disk->disk_name); 1603 set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); 1604 return true; 1605 } 1606 EXPORT_SYMBOL(bdev_check_media_change); 1607 1608 /* 1609 * Separate this part out so that a different pointer for clearing_ptr can be 1610 * passed in for disk_clear_events. 1611 */ 1612 static void disk_events_workfn(struct work_struct *work) 1613 { 1614 struct delayed_work *dwork = to_delayed_work(work); 1615 struct disk_events *ev = container_of(dwork, struct disk_events, dwork); 1616 1617 disk_check_events(ev, &ev->clearing); 1618 } 1619 1620 static void disk_check_events(struct disk_events *ev, 1621 unsigned int *clearing_ptr) 1622 { 1623 struct gendisk *disk = ev->disk; 1624 char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; 1625 unsigned int clearing = *clearing_ptr; 1626 unsigned int events; 1627 unsigned long intv; 1628 int nr_events = 0, i; 1629 1630 /* check events */ 1631 events = disk->fops->check_events(disk, clearing); 1632 1633 /* accumulate pending events and schedule next poll if necessary */ 1634 spin_lock_irq(&ev->lock); 1635 1636 events &= ~ev->pending; 1637 ev->pending |= events; 1638 *clearing_ptr &= ~clearing; 1639 1640 intv = disk_events_poll_jiffies(disk); 1641 if (!ev->block && intv) 1642 queue_delayed_work(system_freezable_power_efficient_wq, 1643 &ev->dwork, intv); 1644 1645 spin_unlock_irq(&ev->lock); 1646 1647 /* 1648 * Tell userland about new events. Only the events listed in 1649 * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT 1650 * is set. Otherwise, events are processed internally but never 1651 * get reported to userland. 1652 */ 1653 for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) 1654 if ((events & disk->events & (1 << i)) && 1655 (disk->event_flags & DISK_EVENT_FLAG_UEVENT)) 1656 envp[nr_events++] = disk_uevents[i]; 1657 1658 if (nr_events) 1659 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); 1660 } 1661 1662 /* 1663 * A disk events enabled device has the following sysfs nodes under 1664 * its /sys/block/X/ directory. 1665 * 1666 * events : list of all supported events 1667 * events_async : list of events which can be detected w/o polling 1668 * (always empty, only for backwards compatibility) 1669 * events_poll_msecs : polling interval, 0: disable, -1: system default 1670 */ 1671 static ssize_t __disk_events_show(unsigned int events, char *buf) 1672 { 1673 const char *delim = ""; 1674 ssize_t pos = 0; 1675 int i; 1676 1677 for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) 1678 if (events & (1 << i)) { 1679 pos += sprintf(buf + pos, "%s%s", 1680 delim, disk_events_strs[i]); 1681 delim = " "; 1682 } 1683 if (pos) 1684 pos += sprintf(buf + pos, "\n"); 1685 return pos; 1686 } 1687 1688 static ssize_t disk_events_show(struct device *dev, 1689 struct device_attribute *attr, char *buf) 1690 { 1691 struct gendisk *disk = dev_to_disk(dev); 1692 1693 if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT)) 1694 return 0; 1695 1696 return __disk_events_show(disk->events, buf); 1697 } 1698 1699 static ssize_t disk_events_async_show(struct device *dev, 1700 struct device_attribute *attr, char *buf) 1701 { 1702 return 0; 1703 } 1704 1705 static ssize_t disk_events_poll_msecs_show(struct device *dev, 1706 struct device_attribute *attr, 1707 char *buf) 1708 { 1709 struct gendisk *disk = dev_to_disk(dev); 1710 1711 if (!disk->ev) 1712 return sprintf(buf, "-1\n"); 1713 1714 return sprintf(buf, "%ld\n", disk->ev->poll_msecs); 1715 } 1716 1717 static ssize_t disk_events_poll_msecs_store(struct device *dev, 1718 struct device_attribute *attr, 1719 const char *buf, size_t count) 1720 { 1721 struct gendisk *disk = dev_to_disk(dev); 1722 long intv; 1723 1724 if (!count || !sscanf(buf, "%ld", &intv)) 1725 return -EINVAL; 1726 1727 if (intv < 0 && intv != -1) 1728 return -EINVAL; 1729 1730 if (!disk->ev) 1731 return -ENODEV; 1732 1733 disk_block_events(disk); 1734 disk->ev->poll_msecs = intv; 1735 __disk_unblock_events(disk, true); 1736 1737 return count; 1738 } 1739 1740 static const DEVICE_ATTR(events, 0444, disk_events_show, NULL); 1741 static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL); 1742 static const DEVICE_ATTR(events_poll_msecs, 0644, 1743 disk_events_poll_msecs_show, 1744 disk_events_poll_msecs_store); 1745 1746 static const struct attribute *disk_events_attrs[] = { 1747 &dev_attr_events.attr, 1748 &dev_attr_events_async.attr, 1749 &dev_attr_events_poll_msecs.attr, 1750 NULL, 1751 }; 1752 1753 /* 1754 * The default polling interval can be specified by the kernel 1755 * parameter block.events_dfl_poll_msecs which defaults to 0 1756 * (disable). This can also be modified runtime by writing to 1757 * /sys/module/block/parameters/events_dfl_poll_msecs. 1758 */ 1759 static int disk_events_set_dfl_poll_msecs(const char *val, 1760 const struct kernel_param *kp) 1761 { 1762 struct disk_events *ev; 1763 int ret; 1764 1765 ret = param_set_ulong(val, kp); 1766 if (ret < 0) 1767 return ret; 1768 1769 mutex_lock(&disk_events_mutex); 1770 1771 list_for_each_entry(ev, &disk_events, node) 1772 disk_flush_events(ev->disk, 0); 1773 1774 mutex_unlock(&disk_events_mutex); 1775 1776 return 0; 1777 } 1778 1779 static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { 1780 .set = disk_events_set_dfl_poll_msecs, 1781 .get = param_get_ulong, 1782 }; 1783 1784 #undef MODULE_PARAM_PREFIX 1785 #define MODULE_PARAM_PREFIX "block." 1786 1787 module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, 1788 &disk_events_dfl_poll_msecs, 0644); 1789 1790 /* 1791 * disk_{alloc|add|del|release}_events - initialize and destroy disk_events. 1792 */ 1793 static void disk_alloc_events(struct gendisk *disk) 1794 { 1795 struct disk_events *ev; 1796 1797 if (!disk->fops->check_events || !disk->events) 1798 return; 1799 1800 ev = kzalloc(sizeof(*ev), GFP_KERNEL); 1801 if (!ev) { 1802 pr_warn("%s: failed to initialize events\n", disk->disk_name); 1803 return; 1804 } 1805 1806 INIT_LIST_HEAD(&ev->node); 1807 ev->disk = disk; 1808 spin_lock_init(&ev->lock); 1809 mutex_init(&ev->block_mutex); 1810 ev->block = 1; 1811 ev->poll_msecs = -1; 1812 INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); 1813 1814 disk->ev = ev; 1815 } 1816 1817 static void disk_add_events(struct gendisk *disk) 1818 { 1819 /* FIXME: error handling */ 1820 if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0) 1821 pr_warn("%s: failed to create sysfs files for events\n", 1822 disk->disk_name); 1823 1824 if (!disk->ev) 1825 return; 1826 1827 mutex_lock(&disk_events_mutex); 1828 list_add_tail(&disk->ev->node, &disk_events); 1829 mutex_unlock(&disk_events_mutex); 1830 1831 /* 1832 * Block count is initialized to 1 and the following initial 1833 * unblock kicks it into action. 1834 */ 1835 __disk_unblock_events(disk, true); 1836 } 1837 1838 static void disk_del_events(struct gendisk *disk) 1839 { 1840 if (disk->ev) { 1841 disk_block_events(disk); 1842 1843 mutex_lock(&disk_events_mutex); 1844 list_del_init(&disk->ev->node); 1845 mutex_unlock(&disk_events_mutex); 1846 } 1847 1848 sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); 1849 } 1850 1851 static void disk_release_events(struct gendisk *disk) 1852 { 1853 /* the block count should be 1 from disk_del_events() */ 1854 WARN_ON_ONCE(disk->ev && disk->ev->block != 1); 1855 kfree(disk->ev); 1856 } 1857