1 /* 2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) 3 * Licensed under the GPL 4 */ 5 6 /* 2001-09-28...2002-04-17 7 * Partition stuff by James_McMechan@hotmail.com 8 * old style ubd by setting UBD_SHIFT to 0 9 * 2002-09-27...2002-10-18 massive tinkering for 2.5 10 * partitions have changed in 2.5 11 * 2003-01-29 more tinkering for 2.5.59-1 12 * This should now address the sysfs problems and has 13 * the symlink for devfs to allow for booting with 14 * the common /dev/ubd/discX/... names rather than 15 * only /dev/ubdN/discN this version also has lots of 16 * clean ups preparing for ubd-many. 17 * James McMechan 18 */ 19 20 #define MAJOR_NR UBD_MAJOR 21 #define UBD_SHIFT 4 22 23 #include "linux/module.h" 24 #include "linux/blkdev.h" 25 #include "linux/hdreg.h" 26 #include "linux/init.h" 27 #include "linux/cdrom.h" 28 #include "linux/proc_fs.h" 29 #include "linux/ctype.h" 30 #include "linux/capability.h" 31 #include "linux/mm.h" 32 #include "linux/vmalloc.h" 33 #include "linux/blkpg.h" 34 #include "linux/genhd.h" 35 #include "linux/spinlock.h" 36 #include "linux/platform_device.h" 37 #include "asm/segment.h" 38 #include "asm/uaccess.h" 39 #include "asm/irq.h" 40 #include "asm/types.h" 41 #include "asm/tlbflush.h" 42 #include "user_util.h" 43 #include "mem_user.h" 44 #include "kern_util.h" 45 #include "kern.h" 46 #include "mconsole_kern.h" 47 #include "init.h" 48 #include "irq_user.h" 49 #include "irq_kern.h" 50 #include "ubd_user.h" 51 #include "os.h" 52 #include "mem.h" 53 #include "mem_kern.h" 54 #include "cow.h" 55 56 enum ubd_req { UBD_READ, UBD_WRITE }; 57 58 struct io_thread_req { 59 struct request *req; 60 enum ubd_req op; 61 int fds[2]; 62 unsigned long offsets[2]; 63 unsigned long long offset; 64 unsigned long length; 65 char *buffer; 66 int sectorsize; 67 unsigned long sector_mask; 68 unsigned long long cow_offset; 69 unsigned long bitmap_words[2]; 70 int error; 71 }; 72 73 extern int open_ubd_file(char *file, struct openflags *openflags, int shared, 74 char **backing_file_out, int *bitmap_offset_out, 75 unsigned long *bitmap_len_out, int *data_offset_out, 76 int *create_cow_out); 77 extern int create_cow_file(char *cow_file, char *backing_file, 78 struct openflags flags, int sectorsize, 79 int alignment, int *bitmap_offset_out, 80 unsigned long *bitmap_len_out, 81 int *data_offset_out); 82 extern int read_cow_bitmap(int fd, void *buf, int offset, int len); 83 extern void do_io(struct io_thread_req *req); 84 85 static inline int ubd_test_bit(__u64 bit, unsigned char *data) 86 { 87 __u64 n; 88 int bits, off; 89 90 bits = sizeof(data[0]) * 8; 91 n = bit / bits; 92 off = bit % bits; 93 return((data[n] & (1 << off)) != 0); 94 } 95 96 static inline void ubd_set_bit(__u64 bit, unsigned char *data) 97 { 98 __u64 n; 99 int bits, off; 100 101 bits = sizeof(data[0]) * 8; 102 n = bit / bits; 103 off = bit % bits; 104 data[n] |= (1 << off); 105 } 106 /*End stuff from ubd_user.h*/ 107 108 #define DRIVER_NAME "uml-blkdev" 109 110 static DEFINE_MUTEX(ubd_lock); 111 112 /* XXX - this made sense in 2.4 days, now it's only used as a boolean, and 113 * probably it doesn't make sense even for that. */ 114 static int do_ubd; 115 116 static int ubd_open(struct inode * inode, struct file * filp); 117 static int ubd_release(struct inode * inode, struct file * file); 118 static int ubd_ioctl(struct inode * inode, struct file * file, 119 unsigned int cmd, unsigned long arg); 120 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); 121 122 #define MAX_DEV (16) 123 124 static struct block_device_operations ubd_blops = { 125 .owner = THIS_MODULE, 126 .open = ubd_open, 127 .release = ubd_release, 128 .ioctl = ubd_ioctl, 129 .getgeo = ubd_getgeo, 130 }; 131 132 /* Protected by ubd_lock */ 133 static int fake_major = MAJOR_NR; 134 static struct gendisk *ubd_gendisk[MAX_DEV]; 135 static struct gendisk *fake_gendisk[MAX_DEV]; 136 137 #ifdef CONFIG_BLK_DEV_UBD_SYNC 138 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ 139 .cl = 1 }) 140 #else 141 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ 142 .cl = 1 }) 143 #endif 144 static struct openflags global_openflags = OPEN_FLAGS; 145 146 struct cow { 147 /* backing file name */ 148 char *file; 149 /* backing file fd */ 150 int fd; 151 unsigned long *bitmap; 152 unsigned long bitmap_len; 153 int bitmap_offset; 154 int data_offset; 155 }; 156 157 struct ubd { 158 /* name (and fd, below) of the file opened for writing, either the 159 * backing or the cow file. */ 160 char *file; 161 int count; 162 int fd; 163 __u64 size; 164 struct openflags boot_openflags; 165 struct openflags openflags; 166 unsigned shared:1; 167 unsigned no_cow:1; 168 struct cow cow; 169 struct platform_device pdev; 170 struct request_queue *queue; 171 spinlock_t lock; 172 }; 173 174 #define DEFAULT_COW { \ 175 .file = NULL, \ 176 .fd = -1, \ 177 .bitmap = NULL, \ 178 .bitmap_offset = 0, \ 179 .data_offset = 0, \ 180 } 181 182 #define DEFAULT_UBD { \ 183 .file = NULL, \ 184 .count = 0, \ 185 .fd = -1, \ 186 .size = -1, \ 187 .boot_openflags = OPEN_FLAGS, \ 188 .openflags = OPEN_FLAGS, \ 189 .no_cow = 0, \ 190 .shared = 0, \ 191 .cow = DEFAULT_COW, \ 192 .lock = SPIN_LOCK_UNLOCKED, \ 193 } 194 195 /* Protected by ubd_lock */ 196 struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; 197 198 /* Only changed by fake_ide_setup which is a setup */ 199 static int fake_ide = 0; 200 static struct proc_dir_entry *proc_ide_root = NULL; 201 static struct proc_dir_entry *proc_ide = NULL; 202 203 static void make_proc_ide(void) 204 { 205 proc_ide_root = proc_mkdir("ide", NULL); 206 proc_ide = proc_mkdir("ide0", proc_ide_root); 207 } 208 209 static int proc_ide_read_media(char *page, char **start, off_t off, int count, 210 int *eof, void *data) 211 { 212 int len; 213 214 strcpy(page, "disk\n"); 215 len = strlen("disk\n"); 216 len -= off; 217 if (len < count){ 218 *eof = 1; 219 if (len <= 0) return 0; 220 } 221 else len = count; 222 *start = page + off; 223 return len; 224 } 225 226 static void make_ide_entries(char *dev_name) 227 { 228 struct proc_dir_entry *dir, *ent; 229 char name[64]; 230 231 if(proc_ide_root == NULL) make_proc_ide(); 232 233 dir = proc_mkdir(dev_name, proc_ide); 234 if(!dir) return; 235 236 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir); 237 if(!ent) return; 238 ent->data = NULL; 239 ent->read_proc = proc_ide_read_media; 240 ent->write_proc = NULL; 241 sprintf(name,"ide0/%s", dev_name); 242 proc_symlink(dev_name, proc_ide_root, name); 243 } 244 245 static int fake_ide_setup(char *str) 246 { 247 fake_ide = 1; 248 return(1); 249 } 250 251 __setup("fake_ide", fake_ide_setup); 252 253 __uml_help(fake_ide_setup, 254 "fake_ide\n" 255 " Create ide0 entries that map onto ubd devices.\n\n" 256 ); 257 258 static int parse_unit(char **ptr) 259 { 260 char *str = *ptr, *end; 261 int n = -1; 262 263 if(isdigit(*str)) { 264 n = simple_strtoul(str, &end, 0); 265 if(end == str) 266 return(-1); 267 *ptr = end; 268 } 269 else if (('a' <= *str) && (*str <= 'z')) { 270 n = *str - 'a'; 271 str++; 272 *ptr = str; 273 } 274 return(n); 275 } 276 277 /* If *index_out == -1 at exit, the passed option was a general one; 278 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it 279 * should not be freed on exit. 280 */ 281 static int ubd_setup_common(char *str, int *index_out, char **error_out) 282 { 283 struct ubd *ubd_dev; 284 struct openflags flags = global_openflags; 285 char *backing_file; 286 int n, err = 0, i; 287 288 if(index_out) *index_out = -1; 289 n = *str; 290 if(n == '='){ 291 char *end; 292 int major; 293 294 str++; 295 if(!strcmp(str, "sync")){ 296 global_openflags = of_sync(global_openflags); 297 goto out1; 298 } 299 300 err = -EINVAL; 301 major = simple_strtoul(str, &end, 0); 302 if((*end != '\0') || (end == str)){ 303 *error_out = "Didn't parse major number"; 304 goto out1; 305 } 306 307 mutex_lock(&ubd_lock); 308 if(fake_major != MAJOR_NR){ 309 *error_out = "Can't assign a fake major twice"; 310 goto out1; 311 } 312 313 fake_major = major; 314 315 printk(KERN_INFO "Setting extra ubd major number to %d\n", 316 major); 317 err = 0; 318 out1: 319 mutex_unlock(&ubd_lock); 320 return err; 321 } 322 323 n = parse_unit(&str); 324 if(n < 0){ 325 *error_out = "Couldn't parse device number"; 326 return -EINVAL; 327 } 328 if(n >= MAX_DEV){ 329 *error_out = "Device number out of range"; 330 return 1; 331 } 332 333 err = -EBUSY; 334 mutex_lock(&ubd_lock); 335 336 ubd_dev = &ubd_devs[n]; 337 if(ubd_dev->file != NULL){ 338 *error_out = "Device is already configured"; 339 goto out; 340 } 341 342 if (index_out) 343 *index_out = n; 344 345 err = -EINVAL; 346 for (i = 0; i < sizeof("rscd="); i++) { 347 switch (*str) { 348 case 'r': 349 flags.w = 0; 350 break; 351 case 's': 352 flags.s = 1; 353 break; 354 case 'd': 355 ubd_dev->no_cow = 1; 356 break; 357 case 'c': 358 ubd_dev->shared = 1; 359 break; 360 case '=': 361 str++; 362 goto break_loop; 363 default: 364 *error_out = "Expected '=' or flag letter " 365 "(r, s, c, or d)"; 366 goto out; 367 } 368 str++; 369 } 370 371 if (*str == '=') 372 *error_out = "Too many flags specified"; 373 else 374 *error_out = "Missing '='"; 375 goto out; 376 377 break_loop: 378 backing_file = strchr(str, ','); 379 380 if (backing_file == NULL) 381 backing_file = strchr(str, ':'); 382 383 if(backing_file != NULL){ 384 if(ubd_dev->no_cow){ 385 *error_out = "Can't specify both 'd' and a cow file"; 386 goto out; 387 } 388 else { 389 *backing_file = '\0'; 390 backing_file++; 391 } 392 } 393 err = 0; 394 ubd_dev->file = str; 395 ubd_dev->cow.file = backing_file; 396 ubd_dev->boot_openflags = flags; 397 out: 398 mutex_unlock(&ubd_lock); 399 return err; 400 } 401 402 static int ubd_setup(char *str) 403 { 404 char *error; 405 int err; 406 407 err = ubd_setup_common(str, NULL, &error); 408 if(err) 409 printk(KERN_ERR "Failed to initialize device with \"%s\" : " 410 "%s\n", str, error); 411 return 1; 412 } 413 414 __setup("ubd", ubd_setup); 415 __uml_help(ubd_setup, 416 "ubd<n><flags>=<filename>[(:|,)<filename2>]\n" 417 " This is used to associate a device with a file in the underlying\n" 418 " filesystem. When specifying two filenames, the first one is the\n" 419 " COW name and the second is the backing file name. As separator you can\n" 420 " use either a ':' or a ',': the first one allows writing things like;\n" 421 " ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" 422 " while with a ',' the shell would not expand the 2nd '~'.\n" 423 " When using only one filename, UML will detect whether to treat it like\n" 424 " a COW file or a backing file. To override this detection, add the 'd'\n" 425 " flag:\n" 426 " ubd0d=BackingFile\n" 427 " Usually, there is a filesystem in the file, but \n" 428 " that's not required. Swap devices containing swap files can be\n" 429 " specified like this. Also, a file which doesn't contain a\n" 430 " filesystem can have its contents read in the virtual \n" 431 " machine by running 'dd' on the device. <n> must be in the range\n" 432 " 0 to 7. Appending an 'r' to the number will cause that device\n" 433 " to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" 434 " an 's' will cause data to be written to disk on the host immediately.\n\n" 435 ); 436 437 static int udb_setup(char *str) 438 { 439 printk("udb%s specified on command line is almost certainly a ubd -> " 440 "udb TYPO\n", str); 441 return(1); 442 } 443 444 __setup("udb", udb_setup); 445 __uml_help(udb_setup, 446 "udb\n" 447 " This option is here solely to catch ubd -> udb typos, which can be\n" 448 " to impossible to catch visually unless you specifically look for\n" 449 " them. The only result of any option starting with 'udb' is an error\n" 450 " in the boot output.\n\n" 451 ); 452 453 static int fakehd_set = 0; 454 static int fakehd(char *str) 455 { 456 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n"); 457 fakehd_set = 1; 458 return 1; 459 } 460 461 __setup("fakehd", fakehd); 462 __uml_help(fakehd, 463 "fakehd\n" 464 " Change the ubd device name to \"hd\".\n\n" 465 ); 466 467 static void do_ubd_request(request_queue_t * q); 468 469 /* Only changed by ubd_init, which is an initcall. */ 470 int thread_fd = -1; 471 472 /* call ubd_finish if you need to serialize */ 473 static void __ubd_finish(struct request *req, int error) 474 { 475 int nsect; 476 477 if(error){ 478 end_request(req, 0); 479 return; 480 } 481 nsect = req->current_nr_sectors; 482 req->sector += nsect; 483 req->buffer += nsect << 9; 484 req->errors = 0; 485 req->nr_sectors -= nsect; 486 req->current_nr_sectors = 0; 487 end_request(req, 1); 488 } 489 490 /* Callable only from interrupt context - otherwise you need to do 491 * spin_lock_irq()/spin_lock_irqsave() */ 492 static inline void ubd_finish(struct request *req, int error) 493 { 494 struct ubd *dev = req->rq_disk->private_data; 495 496 spin_lock(&dev->lock); 497 __ubd_finish(req, error); 498 spin_unlock(&dev->lock); 499 } 500 501 /* XXX - move this inside ubd_intr. */ 502 /* Called without dev->lock held, and only in interrupt context. */ 503 static void ubd_handler(void) 504 { 505 struct io_thread_req req; 506 struct request *rq; 507 struct ubd *dev; 508 int n; 509 510 do_ubd = 0; 511 n = os_read_file(thread_fd, &req, sizeof(req)); 512 if(n != sizeof(req)){ 513 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " 514 "err = %d\n", os_getpid(), -n); 515 return; 516 } 517 518 rq = req.req; 519 dev = rq->rq_disk->private_data; 520 521 ubd_finish(rq, req.error); 522 reactivate_fd(thread_fd, UBD_IRQ); 523 spin_lock(&dev->lock); 524 do_ubd_request(dev->queue); 525 spin_unlock(&dev->lock); 526 } 527 528 static irqreturn_t ubd_intr(int irq, void *dev) 529 { 530 ubd_handler(); 531 return(IRQ_HANDLED); 532 } 533 534 /* Only changed by ubd_init, which is an initcall. */ 535 static int io_pid = -1; 536 537 void kill_io_thread(void) 538 { 539 if(io_pid != -1) 540 os_kill_process(io_pid, 1); 541 } 542 543 __uml_exitcall(kill_io_thread); 544 545 static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) 546 { 547 char *file; 548 549 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file; 550 return(os_file_size(file, size_out)); 551 } 552 553 static void ubd_close_dev(struct ubd *ubd_dev) 554 { 555 os_close_file(ubd_dev->fd); 556 if(ubd_dev->cow.file == NULL) 557 return; 558 559 os_close_file(ubd_dev->cow.fd); 560 vfree(ubd_dev->cow.bitmap); 561 ubd_dev->cow.bitmap = NULL; 562 } 563 564 static int ubd_open_dev(struct ubd *ubd_dev) 565 { 566 struct openflags flags; 567 char **back_ptr; 568 int err, create_cow, *create_ptr; 569 int fd; 570 571 ubd_dev->openflags = ubd_dev->boot_openflags; 572 create_cow = 0; 573 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL; 574 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file; 575 576 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared, 577 back_ptr, &ubd_dev->cow.bitmap_offset, 578 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset, 579 create_ptr); 580 581 if((fd == -ENOENT) && create_cow){ 582 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, 583 ubd_dev->openflags, 1 << 9, PAGE_SIZE, 584 &ubd_dev->cow.bitmap_offset, 585 &ubd_dev->cow.bitmap_len, 586 &ubd_dev->cow.data_offset); 587 if(fd >= 0){ 588 printk(KERN_INFO "Creating \"%s\" as COW file for " 589 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file); 590 } 591 } 592 593 if(fd < 0){ 594 printk("Failed to open '%s', errno = %d\n", ubd_dev->file, 595 -fd); 596 return fd; 597 } 598 ubd_dev->fd = fd; 599 600 if(ubd_dev->cow.file != NULL){ 601 err = -ENOMEM; 602 ubd_dev->cow.bitmap = (void *) vmalloc(ubd_dev->cow.bitmap_len); 603 if(ubd_dev->cow.bitmap == NULL){ 604 printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); 605 goto error; 606 } 607 flush_tlb_kernel_vm(); 608 609 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, 610 ubd_dev->cow.bitmap_offset, 611 ubd_dev->cow.bitmap_len); 612 if(err < 0) 613 goto error; 614 615 flags = ubd_dev->openflags; 616 flags.w = 0; 617 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL, 618 NULL, NULL, NULL, NULL); 619 if(err < 0) goto error; 620 ubd_dev->cow.fd = err; 621 } 622 return(0); 623 error: 624 os_close_file(ubd_dev->fd); 625 return(err); 626 } 627 628 static int ubd_disk_register(int major, u64 size, int unit, 629 struct gendisk **disk_out) 630 { 631 struct gendisk *disk; 632 633 disk = alloc_disk(1 << UBD_SHIFT); 634 if(disk == NULL) 635 return(-ENOMEM); 636 637 disk->major = major; 638 disk->first_minor = unit << UBD_SHIFT; 639 disk->fops = &ubd_blops; 640 set_capacity(disk, size / 512); 641 if(major == MAJOR_NR) 642 sprintf(disk->disk_name, "ubd%c", 'a' + unit); 643 else 644 sprintf(disk->disk_name, "ubd_fake%d", unit); 645 646 /* sysfs register (not for ide fake devices) */ 647 if (major == MAJOR_NR) { 648 ubd_devs[unit].pdev.id = unit; 649 ubd_devs[unit].pdev.name = DRIVER_NAME; 650 platform_device_register(&ubd_devs[unit].pdev); 651 disk->driverfs_dev = &ubd_devs[unit].pdev.dev; 652 } 653 654 disk->private_data = &ubd_devs[unit]; 655 disk->queue = ubd_devs[unit].queue; 656 add_disk(disk); 657 658 *disk_out = disk; 659 return 0; 660 } 661 662 #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) 663 664 static int ubd_add(int n, char **error_out) 665 { 666 struct ubd *ubd_dev = &ubd_devs[n]; 667 int err = 0; 668 669 if(ubd_dev->file == NULL) 670 goto out; 671 672 err = ubd_file_size(ubd_dev, &ubd_dev->size); 673 if(err < 0){ 674 *error_out = "Couldn't determine size of device's file"; 675 goto out; 676 } 677 678 ubd_dev->size = ROUND_BLOCK(ubd_dev->size); 679 680 err = -ENOMEM; 681 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); 682 if (ubd_dev->queue == NULL) { 683 *error_out = "Failed to initialize device queue"; 684 goto out; 685 } 686 ubd_dev->queue->queuedata = ubd_dev; 687 688 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]); 689 if(err){ 690 *error_out = "Failed to register device"; 691 goto out_cleanup; 692 } 693 694 if(fake_major != MAJOR_NR) 695 ubd_disk_register(fake_major, ubd_dev->size, n, 696 &fake_gendisk[n]); 697 698 /* perhaps this should also be under the "if (fake_major)" above */ 699 /* using the fake_disk->disk_name and also the fakehd_set name */ 700 if (fake_ide) 701 make_ide_entries(ubd_gendisk[n]->disk_name); 702 703 err = 0; 704 out: 705 return err; 706 707 out_cleanup: 708 blk_cleanup_queue(ubd_dev->queue); 709 goto out; 710 } 711 712 static int ubd_config(char *str, char **error_out) 713 { 714 int n, ret; 715 716 /* This string is possibly broken up and stored, so it's only 717 * freed if ubd_setup_common fails, or if only general options 718 * were set. 719 */ 720 str = kstrdup(str, GFP_KERNEL); 721 if (str == NULL) { 722 *error_out = "Failed to allocate memory"; 723 return -ENOMEM; 724 } 725 726 ret = ubd_setup_common(str, &n, error_out); 727 if (ret) 728 goto err_free; 729 730 if (n == -1) { 731 ret = 0; 732 goto err_free; 733 } 734 735 mutex_lock(&ubd_lock); 736 ret = ubd_add(n, error_out); 737 if (ret) 738 ubd_devs[n].file = NULL; 739 mutex_unlock(&ubd_lock); 740 741 out: 742 return ret; 743 744 err_free: 745 kfree(str); 746 goto out; 747 } 748 749 static int ubd_get_config(char *name, char *str, int size, char **error_out) 750 { 751 struct ubd *ubd_dev; 752 int n, len = 0; 753 754 n = parse_unit(&name); 755 if((n >= MAX_DEV) || (n < 0)){ 756 *error_out = "ubd_get_config : device number out of range"; 757 return(-1); 758 } 759 760 ubd_dev = &ubd_devs[n]; 761 mutex_lock(&ubd_lock); 762 763 if(ubd_dev->file == NULL){ 764 CONFIG_CHUNK(str, size, len, "", 1); 765 goto out; 766 } 767 768 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0); 769 770 if(ubd_dev->cow.file != NULL){ 771 CONFIG_CHUNK(str, size, len, ",", 0); 772 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1); 773 } 774 else CONFIG_CHUNK(str, size, len, "", 1); 775 776 out: 777 mutex_unlock(&ubd_lock); 778 return(len); 779 } 780 781 static int ubd_id(char **str, int *start_out, int *end_out) 782 { 783 int n; 784 785 n = parse_unit(str); 786 *start_out = 0; 787 *end_out = MAX_DEV - 1; 788 return n; 789 } 790 791 static int ubd_remove(int n, char **error_out) 792 { 793 struct ubd *ubd_dev; 794 int err = -ENODEV; 795 796 mutex_lock(&ubd_lock); 797 798 if(ubd_gendisk[n] == NULL) 799 goto out; 800 801 ubd_dev = &ubd_devs[n]; 802 803 if(ubd_dev->file == NULL) 804 goto out; 805 806 /* you cannot remove a open disk */ 807 err = -EBUSY; 808 if(ubd_dev->count > 0) 809 goto out; 810 811 del_gendisk(ubd_gendisk[n]); 812 put_disk(ubd_gendisk[n]); 813 ubd_gendisk[n] = NULL; 814 815 if(fake_gendisk[n] != NULL){ 816 del_gendisk(fake_gendisk[n]); 817 put_disk(fake_gendisk[n]); 818 fake_gendisk[n] = NULL; 819 } 820 821 blk_cleanup_queue(ubd_dev->queue); 822 platform_device_unregister(&ubd_dev->pdev); 823 *ubd_dev = ((struct ubd) DEFAULT_UBD); 824 err = 0; 825 out: 826 mutex_unlock(&ubd_lock); 827 return err; 828 } 829 830 /* All these are called by mconsole in process context and without 831 * ubd-specific locks. The structure itself is const except for .list. 832 */ 833 static struct mc_device ubd_mc = { 834 .list = LIST_HEAD_INIT(ubd_mc.list), 835 .name = "ubd", 836 .config = ubd_config, 837 .get_config = ubd_get_config, 838 .id = ubd_id, 839 .remove = ubd_remove, 840 }; 841 842 static int __init ubd_mc_init(void) 843 { 844 mconsole_register_dev(&ubd_mc); 845 return 0; 846 } 847 848 __initcall(ubd_mc_init); 849 850 static int __init ubd0_init(void) 851 { 852 struct ubd *ubd_dev = &ubd_devs[0]; 853 854 mutex_lock(&ubd_lock); 855 if(ubd_dev->file == NULL) 856 ubd_dev->file = "root_fs"; 857 mutex_unlock(&ubd_lock); 858 859 return(0); 860 } 861 862 __initcall(ubd0_init); 863 864 /* Used in ubd_init, which is an initcall */ 865 static struct platform_driver ubd_driver = { 866 .driver = { 867 .name = DRIVER_NAME, 868 }, 869 }; 870 871 static int __init ubd_init(void) 872 { 873 char *error; 874 int i, err; 875 876 if (register_blkdev(MAJOR_NR, "ubd")) 877 return -1; 878 879 if (fake_major != MAJOR_NR) { 880 char name[sizeof("ubd_nnn\0")]; 881 882 snprintf(name, sizeof(name), "ubd_%d", fake_major); 883 if (register_blkdev(fake_major, "ubd")) 884 return -1; 885 } 886 platform_driver_register(&ubd_driver); 887 mutex_lock(&ubd_lock); 888 for (i = 0; i < MAX_DEV; i++){ 889 err = ubd_add(i, &error); 890 if(err) 891 printk(KERN_ERR "Failed to initialize ubd device %d :" 892 "%s\n", i, error); 893 } 894 mutex_unlock(&ubd_lock); 895 return 0; 896 } 897 898 late_initcall(ubd_init); 899 900 static int __init ubd_driver_init(void){ 901 unsigned long stack; 902 int err; 903 904 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ 905 if(global_openflags.s){ 906 printk(KERN_INFO "ubd: Synchronous mode\n"); 907 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is 908 * enough. So use anyway the io thread. */ 909 } 910 stack = alloc_stack(0, 0); 911 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), 912 &thread_fd); 913 if(io_pid < 0){ 914 printk(KERN_ERR 915 "ubd : Failed to start I/O thread (errno = %d) - " 916 "falling back to synchronous I/O\n", -io_pid); 917 io_pid = -1; 918 return(0); 919 } 920 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 921 IRQF_DISABLED, "ubd", ubd_devs); 922 if(err != 0) 923 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); 924 return 0; 925 } 926 927 device_initcall(ubd_driver_init); 928 929 static int ubd_open(struct inode *inode, struct file *filp) 930 { 931 struct gendisk *disk = inode->i_bdev->bd_disk; 932 struct ubd *ubd_dev = disk->private_data; 933 int err = 0; 934 935 if(ubd_dev->count == 0){ 936 err = ubd_open_dev(ubd_dev); 937 if(err){ 938 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", 939 disk->disk_name, ubd_dev->file, -err); 940 goto out; 941 } 942 } 943 ubd_dev->count++; 944 set_disk_ro(disk, !ubd_dev->openflags.w); 945 946 /* This should no more be needed. And it didn't work anyway to exclude 947 * read-write remounting of filesystems.*/ 948 /*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){ 949 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev); 950 err = -EROFS; 951 }*/ 952 out: 953 return(err); 954 } 955 956 static int ubd_release(struct inode * inode, struct file * file) 957 { 958 struct gendisk *disk = inode->i_bdev->bd_disk; 959 struct ubd *ubd_dev = disk->private_data; 960 961 if(--ubd_dev->count == 0) 962 ubd_close_dev(ubd_dev); 963 return(0); 964 } 965 966 static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, 967 __u64 *cow_offset, unsigned long *bitmap, 968 __u64 bitmap_offset, unsigned long *bitmap_words, 969 __u64 bitmap_len) 970 { 971 __u64 sector = io_offset >> 9; 972 int i, update_bitmap = 0; 973 974 for(i = 0; i < length >> 9; i++){ 975 if(cow_mask != NULL) 976 ubd_set_bit(i, (unsigned char *) cow_mask); 977 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 978 continue; 979 980 update_bitmap = 1; 981 ubd_set_bit(sector + i, (unsigned char *) bitmap); 982 } 983 984 if(!update_bitmap) 985 return; 986 987 *cow_offset = sector / (sizeof(unsigned long) * 8); 988 989 /* This takes care of the case where we're exactly at the end of the 990 * device, and *cow_offset + 1 is off the end. So, just back it up 991 * by one word. Thanks to Lynn Kerby for the fix and James McMechan 992 * for the original diagnosis. 993 */ 994 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / 995 sizeof(unsigned long) - 1)) 996 (*cow_offset)--; 997 998 bitmap_words[0] = bitmap[*cow_offset]; 999 bitmap_words[1] = bitmap[*cow_offset + 1]; 1000 1001 *cow_offset *= sizeof(unsigned long); 1002 *cow_offset += bitmap_offset; 1003 } 1004 1005 static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, 1006 __u64 bitmap_offset, __u64 bitmap_len) 1007 { 1008 __u64 sector = req->offset >> 9; 1009 int i; 1010 1011 if(req->length > (sizeof(req->sector_mask) * 8) << 9) 1012 panic("Operation too long"); 1013 1014 if(req->op == UBD_READ) { 1015 for(i = 0; i < req->length >> 9; i++){ 1016 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1017 ubd_set_bit(i, (unsigned char *) 1018 &req->sector_mask); 1019 } 1020 } 1021 else cowify_bitmap(req->offset, req->length, &req->sector_mask, 1022 &req->cow_offset, bitmap, bitmap_offset, 1023 req->bitmap_words, bitmap_len); 1024 } 1025 1026 /* Called with dev->lock held */ 1027 static int prepare_request(struct request *req, struct io_thread_req *io_req) 1028 { 1029 struct gendisk *disk = req->rq_disk; 1030 struct ubd *ubd_dev = disk->private_data; 1031 __u64 offset; 1032 int len; 1033 1034 /* This should be impossible now */ 1035 if((rq_data_dir(req) == WRITE) && !ubd_dev->openflags.w){ 1036 printk("Write attempted on readonly ubd device %s\n", 1037 disk->disk_name); 1038 end_request(req, 0); 1039 return(1); 1040 } 1041 1042 offset = ((__u64) req->sector) << 9; 1043 len = req->current_nr_sectors << 9; 1044 1045 io_req->req = req; 1046 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : ubd_dev->fd; 1047 io_req->fds[1] = ubd_dev->fd; 1048 io_req->cow_offset = -1; 1049 io_req->offset = offset; 1050 io_req->length = len; 1051 io_req->error = 0; 1052 io_req->sector_mask = 0; 1053 1054 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; 1055 io_req->offsets[0] = 0; 1056 io_req->offsets[1] = ubd_dev->cow.data_offset; 1057 io_req->buffer = req->buffer; 1058 io_req->sectorsize = 1 << 9; 1059 1060 if(ubd_dev->cow.file != NULL) 1061 cowify_req(io_req, ubd_dev->cow.bitmap, ubd_dev->cow.bitmap_offset, 1062 ubd_dev->cow.bitmap_len); 1063 1064 return(0); 1065 } 1066 1067 /* Called with dev->lock held */ 1068 static void do_ubd_request(request_queue_t *q) 1069 { 1070 struct io_thread_req io_req; 1071 struct request *req; 1072 int err, n; 1073 1074 if(thread_fd == -1){ 1075 while((req = elv_next_request(q)) != NULL){ 1076 err = prepare_request(req, &io_req); 1077 if(!err){ 1078 do_io(&io_req); 1079 __ubd_finish(req, io_req.error); 1080 } 1081 } 1082 } 1083 else { 1084 if(do_ubd || (req = elv_next_request(q)) == NULL) 1085 return; 1086 err = prepare_request(req, &io_req); 1087 if(!err){ 1088 do_ubd = 1; 1089 n = os_write_file(thread_fd, (char *) &io_req, 1090 sizeof(io_req)); 1091 if(n != sizeof(io_req)) 1092 printk("write to io thread failed, " 1093 "errno = %d\n", -n); 1094 } 1095 } 1096 } 1097 1098 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) 1099 { 1100 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1101 1102 geo->heads = 128; 1103 geo->sectors = 32; 1104 geo->cylinders = ubd_dev->size / (128 * 32 * 512); 1105 return 0; 1106 } 1107 1108 static int ubd_ioctl(struct inode * inode, struct file * file, 1109 unsigned int cmd, unsigned long arg) 1110 { 1111 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data; 1112 struct hd_driveid ubd_id = { 1113 .cyls = 0, 1114 .heads = 128, 1115 .sectors = 32, 1116 }; 1117 1118 switch (cmd) { 1119 struct cdrom_volctrl volume; 1120 case HDIO_GET_IDENTITY: 1121 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512); 1122 if(copy_to_user((char __user *) arg, (char *) &ubd_id, 1123 sizeof(ubd_id))) 1124 return(-EFAULT); 1125 return(0); 1126 1127 case CDROMVOLREAD: 1128 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume))) 1129 return(-EFAULT); 1130 volume.channel0 = 255; 1131 volume.channel1 = 255; 1132 volume.channel2 = 255; 1133 volume.channel3 = 255; 1134 if(copy_to_user((char __user *) arg, &volume, sizeof(volume))) 1135 return(-EFAULT); 1136 return(0); 1137 } 1138 return(-EINVAL); 1139 } 1140 1141 static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) 1142 { 1143 struct uml_stat buf1, buf2; 1144 int err; 1145 1146 if(from_cmdline == NULL) 1147 return 0; 1148 if(!strcmp(from_cmdline, from_cow)) 1149 return 0; 1150 1151 err = os_stat_file(from_cmdline, &buf1); 1152 if(err < 0){ 1153 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); 1154 return 0; 1155 } 1156 err = os_stat_file(from_cow, &buf2); 1157 if(err < 0){ 1158 printk("Couldn't stat '%s', err = %d\n", from_cow, -err); 1159 return 1; 1160 } 1161 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) 1162 return 0; 1163 1164 printk("Backing file mismatch - \"%s\" requested,\n" 1165 "\"%s\" specified in COW header of \"%s\"\n", 1166 from_cmdline, from_cow, cow); 1167 return 1; 1168 } 1169 1170 static int backing_file_mismatch(char *file, __u64 size, time_t mtime) 1171 { 1172 unsigned long modtime; 1173 unsigned long long actual; 1174 int err; 1175 1176 err = os_file_modtime(file, &modtime); 1177 if(err < 0){ 1178 printk("Failed to get modification time of backing file " 1179 "\"%s\", err = %d\n", file, -err); 1180 return(err); 1181 } 1182 1183 err = os_file_size(file, &actual); 1184 if(err < 0){ 1185 printk("Failed to get size of backing file \"%s\", " 1186 "err = %d\n", file, -err); 1187 return(err); 1188 } 1189 1190 if(actual != size){ 1191 /*__u64 can be a long on AMD64 and with %lu GCC complains; so 1192 * the typecast.*/ 1193 printk("Size mismatch (%llu vs %llu) of COW header vs backing " 1194 "file\n", (unsigned long long) size, actual); 1195 return(-EINVAL); 1196 } 1197 if(modtime != mtime){ 1198 printk("mtime mismatch (%ld vs %ld) of COW header vs backing " 1199 "file\n", mtime, modtime); 1200 return(-EINVAL); 1201 } 1202 return(0); 1203 } 1204 1205 int read_cow_bitmap(int fd, void *buf, int offset, int len) 1206 { 1207 int err; 1208 1209 err = os_seek_file(fd, offset); 1210 if(err < 0) 1211 return(err); 1212 1213 err = os_read_file(fd, buf, len); 1214 if(err < 0) 1215 return(err); 1216 1217 return(0); 1218 } 1219 1220 int open_ubd_file(char *file, struct openflags *openflags, int shared, 1221 char **backing_file_out, int *bitmap_offset_out, 1222 unsigned long *bitmap_len_out, int *data_offset_out, 1223 int *create_cow_out) 1224 { 1225 time_t mtime; 1226 unsigned long long size; 1227 __u32 version, align; 1228 char *backing_file; 1229 int fd, err, sectorsize, asked_switch, mode = 0644; 1230 1231 fd = os_open_file(file, *openflags, mode); 1232 if (fd < 0) { 1233 if ((fd == -ENOENT) && (create_cow_out != NULL)) 1234 *create_cow_out = 1; 1235 if (!openflags->w || 1236 ((fd != -EROFS) && (fd != -EACCES))) 1237 return fd; 1238 openflags->w = 0; 1239 fd = os_open_file(file, *openflags, mode); 1240 if (fd < 0) 1241 return fd; 1242 } 1243 1244 if(shared) 1245 printk("Not locking \"%s\" on the host\n", file); 1246 else { 1247 err = os_lock_file(fd, openflags->w); 1248 if(err < 0){ 1249 printk("Failed to lock '%s', err = %d\n", file, -err); 1250 goto out_close; 1251 } 1252 } 1253 1254 /* Successful return case! */ 1255 if(backing_file_out == NULL) 1256 return(fd); 1257 1258 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, 1259 &size, §orsize, &align, bitmap_offset_out); 1260 if(err && (*backing_file_out != NULL)){ 1261 printk("Failed to read COW header from COW file \"%s\", " 1262 "errno = %d\n", file, -err); 1263 goto out_close; 1264 } 1265 if(err) 1266 return(fd); 1267 1268 asked_switch = path_requires_switch(*backing_file_out, backing_file, file); 1269 1270 /* Allow switching only if no mismatch. */ 1271 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) { 1272 printk("Switching backing file to '%s'\n", *backing_file_out); 1273 err = write_cow_header(file, fd, *backing_file_out, 1274 sectorsize, align, &size); 1275 if (err) { 1276 printk("Switch failed, errno = %d\n", -err); 1277 goto out_close; 1278 } 1279 } else { 1280 *backing_file_out = backing_file; 1281 err = backing_file_mismatch(*backing_file_out, size, mtime); 1282 if (err) 1283 goto out_close; 1284 } 1285 1286 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, 1287 bitmap_len_out, data_offset_out); 1288 1289 return fd; 1290 out_close: 1291 os_close_file(fd); 1292 return err; 1293 } 1294 1295 int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, 1296 int sectorsize, int alignment, int *bitmap_offset_out, 1297 unsigned long *bitmap_len_out, int *data_offset_out) 1298 { 1299 int err, fd; 1300 1301 flags.c = 1; 1302 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); 1303 if(fd < 0){ 1304 err = fd; 1305 printk("Open of COW file '%s' failed, errno = %d\n", cow_file, 1306 -err); 1307 goto out; 1308 } 1309 1310 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, 1311 bitmap_offset_out, bitmap_len_out, 1312 data_offset_out); 1313 if(!err) 1314 return(fd); 1315 os_close_file(fd); 1316 out: 1317 return(err); 1318 } 1319 1320 static int update_bitmap(struct io_thread_req *req) 1321 { 1322 int n; 1323 1324 if(req->cow_offset == -1) 1325 return(0); 1326 1327 n = os_seek_file(req->fds[1], req->cow_offset); 1328 if(n < 0){ 1329 printk("do_io - bitmap lseek failed : err = %d\n", -n); 1330 return(1); 1331 } 1332 1333 n = os_write_file(req->fds[1], &req->bitmap_words, 1334 sizeof(req->bitmap_words)); 1335 if(n != sizeof(req->bitmap_words)){ 1336 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, 1337 req->fds[1]); 1338 return(1); 1339 } 1340 1341 return(0); 1342 } 1343 1344 void do_io(struct io_thread_req *req) 1345 { 1346 char *buf; 1347 unsigned long len; 1348 int n, nsectors, start, end, bit; 1349 int err; 1350 __u64 off; 1351 1352 nsectors = req->length / req->sectorsize; 1353 start = 0; 1354 do { 1355 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); 1356 end = start; 1357 while((end < nsectors) && 1358 (ubd_test_bit(end, (unsigned char *) 1359 &req->sector_mask) == bit)) 1360 end++; 1361 1362 off = req->offset + req->offsets[bit] + 1363 start * req->sectorsize; 1364 len = (end - start) * req->sectorsize; 1365 buf = &req->buffer[start * req->sectorsize]; 1366 1367 err = os_seek_file(req->fds[bit], off); 1368 if(err < 0){ 1369 printk("do_io - lseek failed : err = %d\n", -err); 1370 req->error = 1; 1371 return; 1372 } 1373 if(req->op == UBD_READ){ 1374 n = 0; 1375 do { 1376 buf = &buf[n]; 1377 len -= n; 1378 n = os_read_file(req->fds[bit], buf, len); 1379 if (n < 0) { 1380 printk("do_io - read failed, err = %d " 1381 "fd = %d\n", -n, req->fds[bit]); 1382 req->error = 1; 1383 return; 1384 } 1385 } while((n < len) && (n != 0)); 1386 if (n < len) memset(&buf[n], 0, len - n); 1387 } else { 1388 n = os_write_file(req->fds[bit], buf, len); 1389 if(n != len){ 1390 printk("do_io - write failed err = %d " 1391 "fd = %d\n", -n, req->fds[bit]); 1392 req->error = 1; 1393 return; 1394 } 1395 } 1396 1397 start = end; 1398 } while(start < nsectors); 1399 1400 req->error = update_bitmap(req); 1401 } 1402 1403 /* Changed in start_io_thread, which is serialized by being called only 1404 * from ubd_init, which is an initcall. 1405 */ 1406 int kernel_fd = -1; 1407 1408 /* Only changed by the io thread. XXX: currently unused. */ 1409 static int io_count = 0; 1410 1411 int io_thread(void *arg) 1412 { 1413 struct io_thread_req req; 1414 int n; 1415 1416 ignore_sigwinch_sig(); 1417 while(1){ 1418 n = os_read_file(kernel_fd, &req, sizeof(req)); 1419 if(n != sizeof(req)){ 1420 if(n < 0) 1421 printk("io_thread - read failed, fd = %d, " 1422 "err = %d\n", kernel_fd, -n); 1423 else { 1424 printk("io_thread - short read, fd = %d, " 1425 "length = %d\n", kernel_fd, n); 1426 } 1427 continue; 1428 } 1429 io_count++; 1430 do_io(&req); 1431 n = os_write_file(kernel_fd, &req, sizeof(req)); 1432 if(n != sizeof(req)) 1433 printk("io_thread - write failed, fd = %d, err = %d\n", 1434 kernel_fd, -n); 1435 } 1436 1437 return 0; 1438 } 1439