1 /* 2 * gendisk handling 3 */ 4 5 #include <linux/config.h> 6 #include <linux/module.h> 7 #include <linux/fs.h> 8 #include <linux/genhd.h> 9 #include <linux/kernel.h> 10 #include <linux/blkdev.h> 11 #include <linux/init.h> 12 #include <linux/spinlock.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/kmod.h> 16 #include <linux/kobj_map.h> 17 #include <linux/buffer_head.h> 18 #include <linux/mutex.h> 19 20 static struct subsystem block_subsys; 21 22 static DEFINE_MUTEX(block_subsys_lock); 23 24 /* 25 * Can be deleted altogether. Later. 26 * 27 */ 28 static struct blk_major_name { 29 struct blk_major_name *next; 30 int major; 31 char name[16]; 32 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 33 34 /* index in the above - for now: assume no multimajor ranges */ 35 static inline int major_to_index(int major) 36 { 37 return major % BLKDEV_MAJOR_HASH_SIZE; 38 } 39 40 #ifdef CONFIG_PROC_FS 41 42 void blkdev_show(struct seq_file *f, off_t offset) 43 { 44 struct blk_major_name *dp; 45 46 if (offset < BLKDEV_MAJOR_HASH_SIZE) { 47 mutex_lock(&block_subsys_lock); 48 for (dp = major_names[offset]; dp; dp = dp->next) 49 seq_printf(f, "%3d %s\n", dp->major, dp->name); 50 mutex_unlock(&block_subsys_lock); 51 } 52 } 53 54 #endif /* CONFIG_PROC_FS */ 55 56 int register_blkdev(unsigned int major, const char *name) 57 { 58 struct blk_major_name **n, *p; 59 int index, ret = 0; 60 61 mutex_lock(&block_subsys_lock); 62 63 /* temporary */ 64 if (major == 0) { 65 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 66 if (major_names[index] == NULL) 67 break; 68 } 69 70 if (index == 0) { 71 printk("register_blkdev: failed to get major for %s\n", 72 name); 73 ret = -EBUSY; 74 goto out; 75 } 76 major = index; 77 ret = major; 78 } 79 80 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 81 if (p == NULL) { 82 ret = -ENOMEM; 83 goto out; 84 } 85 86 p->major = major; 87 strlcpy(p->name, name, sizeof(p->name)); 88 p->next = NULL; 89 index = major_to_index(major); 90 91 for (n = &major_names[index]; *n; n = &(*n)->next) { 92 if ((*n)->major == major) 93 break; 94 } 95 if (!*n) 96 *n = p; 97 else 98 ret = -EBUSY; 99 100 if (ret < 0) { 101 printk("register_blkdev: cannot get major %d for %s\n", 102 major, name); 103 kfree(p); 104 } 105 out: 106 mutex_unlock(&block_subsys_lock); 107 return ret; 108 } 109 110 EXPORT_SYMBOL(register_blkdev); 111 112 /* todo: make void - error printk here */ 113 int unregister_blkdev(unsigned int major, const char *name) 114 { 115 struct blk_major_name **n; 116 struct blk_major_name *p = NULL; 117 int index = major_to_index(major); 118 int ret = 0; 119 120 mutex_lock(&block_subsys_lock); 121 for (n = &major_names[index]; *n; n = &(*n)->next) 122 if ((*n)->major == major) 123 break; 124 if (!*n || strcmp((*n)->name, name)) 125 ret = -EINVAL; 126 else { 127 p = *n; 128 *n = p->next; 129 } 130 mutex_unlock(&block_subsys_lock); 131 kfree(p); 132 133 return ret; 134 } 135 136 EXPORT_SYMBOL(unregister_blkdev); 137 138 static struct kobj_map *bdev_map; 139 140 /* 141 * Register device numbers dev..(dev+range-1) 142 * range must be nonzero 143 * The hash chain is sorted on range, so that subranges can override. 144 */ 145 void blk_register_region(dev_t dev, unsigned long range, struct module *module, 146 struct kobject *(*probe)(dev_t, int *, void *), 147 int (*lock)(dev_t, void *), void *data) 148 { 149 kobj_map(bdev_map, dev, range, module, probe, lock, data); 150 } 151 152 EXPORT_SYMBOL(blk_register_region); 153 154 void blk_unregister_region(dev_t dev, unsigned long range) 155 { 156 kobj_unmap(bdev_map, dev, range); 157 } 158 159 EXPORT_SYMBOL(blk_unregister_region); 160 161 static struct kobject *exact_match(dev_t dev, int *part, void *data) 162 { 163 struct gendisk *p = data; 164 return &p->kobj; 165 } 166 167 static int exact_lock(dev_t dev, void *data) 168 { 169 struct gendisk *p = data; 170 171 if (!get_disk(p)) 172 return -1; 173 return 0; 174 } 175 176 /** 177 * add_disk - add partitioning information to kernel list 178 * @disk: per-device partitioning information 179 * 180 * This function registers the partitioning information in @disk 181 * with the kernel. 182 */ 183 void add_disk(struct gendisk *disk) 184 { 185 disk->flags |= GENHD_FL_UP; 186 blk_register_region(MKDEV(disk->major, disk->first_minor), 187 disk->minors, NULL, exact_match, exact_lock, disk); 188 register_disk(disk); 189 blk_register_queue(disk); 190 } 191 192 EXPORT_SYMBOL(add_disk); 193 EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ 194 195 void unlink_gendisk(struct gendisk *disk) 196 { 197 blk_unregister_queue(disk); 198 blk_unregister_region(MKDEV(disk->major, disk->first_minor), 199 disk->minors); 200 } 201 202 #define to_disk(obj) container_of(obj,struct gendisk,kobj) 203 204 /** 205 * get_gendisk - get partitioning information for a given device 206 * @dev: device to get partitioning information for 207 * 208 * This function gets the structure containing partitioning 209 * information for the given device @dev. 210 */ 211 struct gendisk *get_gendisk(dev_t dev, int *part) 212 { 213 struct kobject *kobj = kobj_lookup(bdev_map, dev, part); 214 return kobj ? to_disk(kobj) : NULL; 215 } 216 217 #ifdef CONFIG_PROC_FS 218 /* iterator */ 219 static void *part_start(struct seq_file *part, loff_t *pos) 220 { 221 struct list_head *p; 222 loff_t l = *pos; 223 224 mutex_lock(&block_subsys_lock); 225 list_for_each(p, &block_subsys.kset.list) 226 if (!l--) 227 return list_entry(p, struct gendisk, kobj.entry); 228 return NULL; 229 } 230 231 static void *part_next(struct seq_file *part, void *v, loff_t *pos) 232 { 233 struct list_head *p = ((struct gendisk *)v)->kobj.entry.next; 234 ++*pos; 235 return p==&block_subsys.kset.list ? NULL : 236 list_entry(p, struct gendisk, kobj.entry); 237 } 238 239 static void part_stop(struct seq_file *part, void *v) 240 { 241 mutex_unlock(&block_subsys_lock); 242 } 243 244 static int show_partition(struct seq_file *part, void *v) 245 { 246 struct gendisk *sgp = v; 247 int n; 248 char buf[BDEVNAME_SIZE]; 249 250 if (&sgp->kobj.entry == block_subsys.kset.list.next) 251 seq_puts(part, "major minor #blocks name\n\n"); 252 253 /* Don't show non-partitionable removeable devices or empty devices */ 254 if (!get_capacity(sgp) || 255 (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) 256 return 0; 257 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 258 return 0; 259 260 /* show the full disk and all non-0 size partitions of it */ 261 seq_printf(part, "%4d %4d %10llu %s\n", 262 sgp->major, sgp->first_minor, 263 (unsigned long long)get_capacity(sgp) >> 1, 264 disk_name(sgp, 0, buf)); 265 for (n = 0; n < sgp->minors - 1; n++) { 266 if (!sgp->part[n]) 267 continue; 268 if (sgp->part[n]->nr_sects == 0) 269 continue; 270 seq_printf(part, "%4d %4d %10llu %s\n", 271 sgp->major, n + 1 + sgp->first_minor, 272 (unsigned long long)sgp->part[n]->nr_sects >> 1 , 273 disk_name(sgp, n + 1, buf)); 274 } 275 276 return 0; 277 } 278 279 struct seq_operations partitions_op = { 280 .start =part_start, 281 .next = part_next, 282 .stop = part_stop, 283 .show = show_partition 284 }; 285 #endif 286 287 288 extern int blk_dev_init(void); 289 290 static struct kobject *base_probe(dev_t dev, int *part, void *data) 291 { 292 if (request_module("block-major-%d-%d", MAJOR(dev), MINOR(dev)) > 0) 293 /* Make old-style 2.4 aliases work */ 294 request_module("block-major-%d", MAJOR(dev)); 295 return NULL; 296 } 297 298 static int __init genhd_device_init(void) 299 { 300 bdev_map = kobj_map_init(base_probe, &block_subsys_lock); 301 blk_dev_init(); 302 subsystem_register(&block_subsys); 303 return 0; 304 } 305 306 subsys_initcall(genhd_device_init); 307 308 309 310 /* 311 * kobject & sysfs bindings for block devices 312 */ 313 static ssize_t disk_attr_show(struct kobject *kobj, struct attribute *attr, 314 char *page) 315 { 316 struct gendisk *disk = to_disk(kobj); 317 struct disk_attribute *disk_attr = 318 container_of(attr,struct disk_attribute,attr); 319 ssize_t ret = -EIO; 320 321 if (disk_attr->show) 322 ret = disk_attr->show(disk,page); 323 return ret; 324 } 325 326 static ssize_t disk_attr_store(struct kobject * kobj, struct attribute * attr, 327 const char *page, size_t count) 328 { 329 struct gendisk *disk = to_disk(kobj); 330 struct disk_attribute *disk_attr = 331 container_of(attr,struct disk_attribute,attr); 332 ssize_t ret = 0; 333 334 if (disk_attr->store) 335 ret = disk_attr->store(disk, page, count); 336 return ret; 337 } 338 339 static struct sysfs_ops disk_sysfs_ops = { 340 .show = &disk_attr_show, 341 .store = &disk_attr_store, 342 }; 343 344 static ssize_t disk_uevent_store(struct gendisk * disk, 345 const char *buf, size_t count) 346 { 347 kobject_uevent(&disk->kobj, KOBJ_ADD); 348 return count; 349 } 350 static ssize_t disk_dev_read(struct gendisk * disk, char *page) 351 { 352 dev_t base = MKDEV(disk->major, disk->first_minor); 353 return print_dev_t(page, base); 354 } 355 static ssize_t disk_range_read(struct gendisk * disk, char *page) 356 { 357 return sprintf(page, "%d\n", disk->minors); 358 } 359 static ssize_t disk_removable_read(struct gendisk * disk, char *page) 360 { 361 return sprintf(page, "%d\n", 362 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 363 364 } 365 static ssize_t disk_size_read(struct gendisk * disk, char *page) 366 { 367 return sprintf(page, "%llu\n", (unsigned long long)get_capacity(disk)); 368 } 369 370 static ssize_t disk_stats_read(struct gendisk * disk, char *page) 371 { 372 preempt_disable(); 373 disk_round_stats(disk); 374 preempt_enable(); 375 return sprintf(page, 376 "%8lu %8lu %8llu %8u " 377 "%8lu %8lu %8llu %8u " 378 "%8u %8u %8u" 379 "\n", 380 disk_stat_read(disk, ios[READ]), 381 disk_stat_read(disk, merges[READ]), 382 (unsigned long long)disk_stat_read(disk, sectors[READ]), 383 jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), 384 disk_stat_read(disk, ios[WRITE]), 385 disk_stat_read(disk, merges[WRITE]), 386 (unsigned long long)disk_stat_read(disk, sectors[WRITE]), 387 jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), 388 disk->in_flight, 389 jiffies_to_msecs(disk_stat_read(disk, io_ticks)), 390 jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); 391 } 392 static struct disk_attribute disk_attr_uevent = { 393 .attr = {.name = "uevent", .mode = S_IWUSR }, 394 .store = disk_uevent_store 395 }; 396 static struct disk_attribute disk_attr_dev = { 397 .attr = {.name = "dev", .mode = S_IRUGO }, 398 .show = disk_dev_read 399 }; 400 static struct disk_attribute disk_attr_range = { 401 .attr = {.name = "range", .mode = S_IRUGO }, 402 .show = disk_range_read 403 }; 404 static struct disk_attribute disk_attr_removable = { 405 .attr = {.name = "removable", .mode = S_IRUGO }, 406 .show = disk_removable_read 407 }; 408 static struct disk_attribute disk_attr_size = { 409 .attr = {.name = "size", .mode = S_IRUGO }, 410 .show = disk_size_read 411 }; 412 static struct disk_attribute disk_attr_stat = { 413 .attr = {.name = "stat", .mode = S_IRUGO }, 414 .show = disk_stats_read 415 }; 416 417 static struct attribute * default_attrs[] = { 418 &disk_attr_uevent.attr, 419 &disk_attr_dev.attr, 420 &disk_attr_range.attr, 421 &disk_attr_removable.attr, 422 &disk_attr_size.attr, 423 &disk_attr_stat.attr, 424 NULL, 425 }; 426 427 static void disk_release(struct kobject * kobj) 428 { 429 struct gendisk *disk = to_disk(kobj); 430 kfree(disk->random); 431 kfree(disk->part); 432 free_disk_stats(disk); 433 kfree(disk); 434 } 435 436 static struct kobj_type ktype_block = { 437 .release = disk_release, 438 .sysfs_ops = &disk_sysfs_ops, 439 .default_attrs = default_attrs, 440 }; 441 442 extern struct kobj_type ktype_part; 443 444 static int block_uevent_filter(struct kset *kset, struct kobject *kobj) 445 { 446 struct kobj_type *ktype = get_ktype(kobj); 447 448 return ((ktype == &ktype_block) || (ktype == &ktype_part)); 449 } 450 451 static int block_uevent(struct kset *kset, struct kobject *kobj, char **envp, 452 int num_envp, char *buffer, int buffer_size) 453 { 454 struct kobj_type *ktype = get_ktype(kobj); 455 struct device *physdev; 456 struct gendisk *disk; 457 struct hd_struct *part; 458 int length = 0; 459 int i = 0; 460 461 if (ktype == &ktype_block) { 462 disk = container_of(kobj, struct gendisk, kobj); 463 add_uevent_var(envp, num_envp, &i, buffer, buffer_size, 464 &length, "MINOR=%u", disk->first_minor); 465 } else if (ktype == &ktype_part) { 466 disk = container_of(kobj->parent, struct gendisk, kobj); 467 part = container_of(kobj, struct hd_struct, kobj); 468 add_uevent_var(envp, num_envp, &i, buffer, buffer_size, 469 &length, "MINOR=%u", 470 disk->first_minor + part->partno); 471 } else 472 return 0; 473 474 add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, 475 "MAJOR=%u", disk->major); 476 477 /* add physical device, backing this device */ 478 physdev = disk->driverfs_dev; 479 if (physdev) { 480 char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); 481 482 add_uevent_var(envp, num_envp, &i, buffer, buffer_size, 483 &length, "PHYSDEVPATH=%s", path); 484 kfree(path); 485 486 if (physdev->bus) 487 add_uevent_var(envp, num_envp, &i, 488 buffer, buffer_size, &length, 489 "PHYSDEVBUS=%s", 490 physdev->bus->name); 491 492 if (physdev->driver) 493 add_uevent_var(envp, num_envp, &i, 494 buffer, buffer_size, &length, 495 "PHYSDEVDRIVER=%s", 496 physdev->driver->name); 497 } 498 499 /* terminate, set to next free slot, shrink available space */ 500 envp[i] = NULL; 501 envp = &envp[i]; 502 num_envp -= i; 503 buffer = &buffer[length]; 504 buffer_size -= length; 505 506 return 0; 507 } 508 509 static struct kset_uevent_ops block_uevent_ops = { 510 .filter = block_uevent_filter, 511 .uevent = block_uevent, 512 }; 513 514 /* declare block_subsys. */ 515 static decl_subsys(block, &ktype_block, &block_uevent_ops); 516 517 518 /* 519 * aggregate disk stat collector. Uses the same stats that the sysfs 520 * entries do, above, but makes them available through one seq_file. 521 * Watching a few disks may be efficient through sysfs, but watching 522 * all of them will be more efficient through this interface. 523 * 524 * The output looks suspiciously like /proc/partitions with a bunch of 525 * extra fields. 526 */ 527 528 /* iterator */ 529 static void *diskstats_start(struct seq_file *part, loff_t *pos) 530 { 531 loff_t k = *pos; 532 struct list_head *p; 533 534 mutex_lock(&block_subsys_lock); 535 list_for_each(p, &block_subsys.kset.list) 536 if (!k--) 537 return list_entry(p, struct gendisk, kobj.entry); 538 return NULL; 539 } 540 541 static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos) 542 { 543 struct list_head *p = ((struct gendisk *)v)->kobj.entry.next; 544 ++*pos; 545 return p==&block_subsys.kset.list ? NULL : 546 list_entry(p, struct gendisk, kobj.entry); 547 } 548 549 static void diskstats_stop(struct seq_file *part, void *v) 550 { 551 mutex_unlock(&block_subsys_lock); 552 } 553 554 static int diskstats_show(struct seq_file *s, void *v) 555 { 556 struct gendisk *gp = v; 557 char buf[BDEVNAME_SIZE]; 558 int n = 0; 559 560 /* 561 if (&sgp->kobj.entry == block_subsys.kset.list.next) 562 seq_puts(s, "major minor name" 563 " rio rmerge rsect ruse wio wmerge " 564 "wsect wuse running use aveq" 565 "\n\n"); 566 */ 567 568 preempt_disable(); 569 disk_round_stats(gp); 570 preempt_enable(); 571 seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", 572 gp->major, n + gp->first_minor, disk_name(gp, n, buf), 573 disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), 574 (unsigned long long)disk_stat_read(gp, sectors[0]), 575 jiffies_to_msecs(disk_stat_read(gp, ticks[0])), 576 disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]), 577 (unsigned long long)disk_stat_read(gp, sectors[1]), 578 jiffies_to_msecs(disk_stat_read(gp, ticks[1])), 579 gp->in_flight, 580 jiffies_to_msecs(disk_stat_read(gp, io_ticks)), 581 jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); 582 583 /* now show all non-0 size partitions of it */ 584 for (n = 0; n < gp->minors - 1; n++) { 585 struct hd_struct *hd = gp->part[n]; 586 587 if (hd && hd->nr_sects) 588 seq_printf(s, "%4d %4d %s %u %u %u %u\n", 589 gp->major, n + gp->first_minor + 1, 590 disk_name(gp, n + 1, buf), 591 hd->ios[0], hd->sectors[0], 592 hd->ios[1], hd->sectors[1]); 593 } 594 595 return 0; 596 } 597 598 struct seq_operations diskstats_op = { 599 .start = diskstats_start, 600 .next = diskstats_next, 601 .stop = diskstats_stop, 602 .show = diskstats_show 603 }; 604 605 struct gendisk *alloc_disk(int minors) 606 { 607 return alloc_disk_node(minors, -1); 608 } 609 610 struct gendisk *alloc_disk_node(int minors, int node_id) 611 { 612 struct gendisk *disk; 613 614 disk = kmalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); 615 if (disk) { 616 memset(disk, 0, sizeof(struct gendisk)); 617 if (!init_disk_stats(disk)) { 618 kfree(disk); 619 return NULL; 620 } 621 if (minors > 1) { 622 int size = (minors - 1) * sizeof(struct hd_struct *); 623 disk->part = kmalloc_node(size, GFP_KERNEL, node_id); 624 if (!disk->part) { 625 kfree(disk); 626 return NULL; 627 } 628 memset(disk->part, 0, size); 629 } 630 disk->minors = minors; 631 kobj_set_kset_s(disk,block_subsys); 632 kobject_init(&disk->kobj); 633 rand_initialize_disk(disk); 634 } 635 return disk; 636 } 637 638 EXPORT_SYMBOL(alloc_disk); 639 EXPORT_SYMBOL(alloc_disk_node); 640 641 struct kobject *get_disk(struct gendisk *disk) 642 { 643 struct module *owner; 644 struct kobject *kobj; 645 646 if (!disk->fops) 647 return NULL; 648 owner = disk->fops->owner; 649 if (owner && !try_module_get(owner)) 650 return NULL; 651 kobj = kobject_get(&disk->kobj); 652 if (kobj == NULL) { 653 module_put(owner); 654 return NULL; 655 } 656 return kobj; 657 658 } 659 660 EXPORT_SYMBOL(get_disk); 661 662 void put_disk(struct gendisk *disk) 663 { 664 if (disk) 665 kobject_put(&disk->kobj); 666 } 667 668 EXPORT_SYMBOL(put_disk); 669 670 void set_device_ro(struct block_device *bdev, int flag) 671 { 672 if (bdev->bd_contains != bdev) 673 bdev->bd_part->policy = flag; 674 else 675 bdev->bd_disk->policy = flag; 676 } 677 678 EXPORT_SYMBOL(set_device_ro); 679 680 void set_disk_ro(struct gendisk *disk, int flag) 681 { 682 int i; 683 disk->policy = flag; 684 for (i = 0; i < disk->minors - 1; i++) 685 if (disk->part[i]) disk->part[i]->policy = flag; 686 } 687 688 EXPORT_SYMBOL(set_disk_ro); 689 690 int bdev_read_only(struct block_device *bdev) 691 { 692 if (!bdev) 693 return 0; 694 else if (bdev->bd_contains != bdev) 695 return bdev->bd_part->policy; 696 else 697 return bdev->bd_disk->policy; 698 } 699 700 EXPORT_SYMBOL(bdev_read_only); 701 702 int invalidate_partition(struct gendisk *disk, int index) 703 { 704 int res = 0; 705 struct block_device *bdev = bdget_disk(disk, index); 706 if (bdev) { 707 fsync_bdev(bdev); 708 res = __invalidate_device(bdev); 709 bdput(bdev); 710 } 711 return res; 712 } 713 714 EXPORT_SYMBOL(invalidate_partition); 715