xref: /linux/arch/um/drivers/ubd_kern.c (revision 2624f124b3b5d550ab2fbef7ee3bc0e1fed09722)
1 /*
2  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3  * Licensed under the GPL
4  */
5 
6 /* 2001-09-28...2002-04-17
7  * Partition stuff by James_McMechan@hotmail.com
8  * old style ubd by setting UBD_SHIFT to 0
9  * 2002-09-27...2002-10-18 massive tinkering for 2.5
10  * partitions have changed in 2.5
11  * 2003-01-29 more tinkering for 2.5.59-1
12  * This should now address the sysfs problems and has
13  * the symlink for devfs to allow for booting with
14  * the common /dev/ubd/discX/... names rather than
15  * only /dev/ubdN/discN this version also has lots of
16  * clean ups preparing for ubd-many.
17  * James McMechan
18  */
19 
20 #define MAJOR_NR UBD_MAJOR
21 #define UBD_SHIFT 4
22 
23 #include "linux/config.h"
24 #include "linux/module.h"
25 #include "linux/blkdev.h"
26 #include "linux/hdreg.h"
27 #include "linux/init.h"
28 #include "linux/devfs_fs_kernel.h"
29 #include "linux/cdrom.h"
30 #include "linux/proc_fs.h"
31 #include "linux/ctype.h"
32 #include "linux/capability.h"
33 #include "linux/mm.h"
34 #include "linux/vmalloc.h"
35 #include "linux/blkpg.h"
36 #include "linux/genhd.h"
37 #include "linux/spinlock.h"
38 #include "asm/atomic.h"
39 #include "asm/segment.h"
40 #include "asm/uaccess.h"
41 #include "asm/irq.h"
42 #include "asm/types.h"
43 #include "asm/tlbflush.h"
44 #include "user_util.h"
45 #include "mem_user.h"
46 #include "kern_util.h"
47 #include "kern.h"
48 #include "mconsole_kern.h"
49 #include "init.h"
50 #include "irq_user.h"
51 #include "irq_kern.h"
52 #include "ubd_user.h"
53 #include "os.h"
54 #include "mem.h"
55 #include "mem_kern.h"
56 #include "cow.h"
57 #include "aio.h"
58 
59 enum ubd_req { UBD_READ, UBD_WRITE };
60 
61 struct io_thread_req {
62 	enum aio_type op;
63 	int fds[2];
64 	unsigned long offsets[2];
65 	unsigned long long offset;
66 	unsigned long length;
67 	char *buffer;
68 	int sectorsize;
69 	int bitmap_offset;
70 	long bitmap_start;
71 	long bitmap_end;
72 	int error;
73 };
74 
75 extern int open_ubd_file(char *file, struct openflags *openflags,
76 			 char **backing_file_out, int *bitmap_offset_out,
77 			 unsigned long *bitmap_len_out, int *data_offset_out,
78 			 int *create_cow_out);
79 extern int create_cow_file(char *cow_file, char *backing_file,
80 			   struct openflags flags, int sectorsize,
81 			   int alignment, int *bitmap_offset_out,
82 			   unsigned long *bitmap_len_out,
83 			   int *data_offset_out);
84 extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
85 extern void do_io(struct io_thread_req *req, struct request *r,
86 		  unsigned long *bitmap);
87 
88 static inline int ubd_test_bit(__u64 bit, void *data)
89 {
90 	unsigned char *buffer = data;
91 	__u64 n;
92 	int bits, off;
93 
94 	bits = sizeof(buffer[0]) * 8;
95 	n = bit / bits;
96 	off = bit % bits;
97 	return((buffer[n] & (1 << off)) != 0);
98 }
99 
100 static inline void ubd_set_bit(__u64 bit, void *data)
101 {
102 	unsigned char *buffer = data;
103 	__u64 n;
104 	int bits, off;
105 
106 	bits = sizeof(buffer[0]) * 8;
107 	n = bit / bits;
108 	off = bit % bits;
109 	buffer[n] |= (1 << off);
110 }
111 /*End stuff from ubd_user.h*/
112 
113 #define DRIVER_NAME "uml-blkdev"
114 
115 static DEFINE_SPINLOCK(ubd_io_lock);
116 static DEFINE_SPINLOCK(ubd_lock);
117 
118 static int ubd_open(struct inode * inode, struct file * filp);
119 static int ubd_release(struct inode * inode, struct file * file);
120 static int ubd_ioctl(struct inode * inode, struct file * file,
121 		     unsigned int cmd, unsigned long arg);
122 
123 #define MAX_DEV (8)
124 
125 static struct block_device_operations ubd_blops = {
126         .owner		= THIS_MODULE,
127         .open		= ubd_open,
128         .release	= ubd_release,
129         .ioctl		= ubd_ioctl,
130 };
131 
132 /* Protected by the queue_lock */
133 static request_queue_t *ubd_queue;
134 
135 /* Protected by ubd_lock */
136 static int fake_major = MAJOR_NR;
137 
138 static struct gendisk *ubd_gendisk[MAX_DEV];
139 static struct gendisk *fake_gendisk[MAX_DEV];
140 
141 #ifdef CONFIG_BLK_DEV_UBD_SYNC
142 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
143 					 .cl = 1 })
144 #else
145 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
146 					 .cl = 1 })
147 #endif
148 
149 /* Not protected - changed only in ubd_setup_common and then only to
150  * to enable O_SYNC.
151  */
152 static struct openflags global_openflags = OPEN_FLAGS;
153 
154 struct cow {
155 	/* This is the backing file, actually */
156 	char *file;
157 	int fd;
158 	unsigned long *bitmap;
159 	unsigned long bitmap_len;
160 	int bitmap_offset;
161         int data_offset;
162 };
163 
164 #define MAX_SG 64
165 
166 struct ubd {
167 	char *file;
168 	int count;
169 	int fd;
170 	__u64 size;
171 	struct openflags boot_openflags;
172 	struct openflags openflags;
173 	int no_cow;
174 	struct cow cow;
175 	struct platform_device pdev;
176         struct scatterlist sg[MAX_SG];
177 };
178 
179 #define DEFAULT_COW { \
180 	.file =			NULL, \
181         .fd =			-1, \
182         .bitmap =		NULL, \
183 	.bitmap_offset =	0, \
184         .data_offset =		0, \
185 }
186 
187 #define DEFAULT_UBD { \
188 	.file = 		NULL, \
189 	.count =		0, \
190 	.fd =			-1, \
191 	.size =			-1, \
192 	.boot_openflags =	OPEN_FLAGS, \
193 	.openflags =		OPEN_FLAGS, \
194         .no_cow =               0, \
195         .cow =			DEFAULT_COW, \
196 }
197 
198 struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
199 
200 static int ubd0_init(void)
201 {
202 	struct ubd *dev = &ubd_dev[0];
203 
204 	if(dev->file == NULL)
205 		dev->file = "root_fs";
206 	return(0);
207 }
208 
209 __initcall(ubd0_init);
210 
211 /* Only changed by fake_ide_setup which is a setup */
212 static int fake_ide = 0;
213 static struct proc_dir_entry *proc_ide_root = NULL;
214 static struct proc_dir_entry *proc_ide = NULL;
215 
216 static void make_proc_ide(void)
217 {
218 	proc_ide_root = proc_mkdir("ide", NULL);
219 	proc_ide = proc_mkdir("ide0", proc_ide_root);
220 }
221 
222 static int proc_ide_read_media(char *page, char **start, off_t off, int count,
223 			       int *eof, void *data)
224 {
225 	int len;
226 
227 	strcpy(page, "disk\n");
228 	len = strlen("disk\n");
229 	len -= off;
230 	if (len < count){
231 		*eof = 1;
232 		if (len <= 0) return 0;
233 	}
234 	else len = count;
235 	*start = page + off;
236 	return len;
237 }
238 
239 static void make_ide_entries(char *dev_name)
240 {
241 	struct proc_dir_entry *dir, *ent;
242 	char name[64];
243 
244 	if(proc_ide_root == NULL) make_proc_ide();
245 
246 	dir = proc_mkdir(dev_name, proc_ide);
247 	if(!dir) return;
248 
249 	ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
250 	if(!ent) return;
251 	ent->nlink = 1;
252 	ent->data = NULL;
253 	ent->read_proc = proc_ide_read_media;
254 	ent->write_proc = NULL;
255 	sprintf(name,"ide0/%s", dev_name);
256 	proc_symlink(dev_name, proc_ide_root, name);
257 }
258 
259 static int fake_ide_setup(char *str)
260 {
261 	fake_ide = 1;
262 	return(1);
263 }
264 
265 __setup("fake_ide", fake_ide_setup);
266 
267 __uml_help(fake_ide_setup,
268 "fake_ide\n"
269 "    Create ide0 entries that map onto ubd devices.\n\n"
270 );
271 
272 static int parse_unit(char **ptr)
273 {
274 	char *str = *ptr, *end;
275 	int n = -1;
276 
277 	if(isdigit(*str)) {
278 		n = simple_strtoul(str, &end, 0);
279 		if(end == str)
280 			return(-1);
281 		*ptr = end;
282 	}
283 	else if (('a' <= *str) && (*str <= 'h')) {
284 		n = *str - 'a';
285 		str++;
286 		*ptr = str;
287 	}
288 	return(n);
289 }
290 
291 static int ubd_setup_common(char *str, int *index_out)
292 {
293 	struct ubd *dev;
294 	struct openflags flags = global_openflags;
295 	char *backing_file;
296 	int n, err, i;
297 
298 	if(index_out) *index_out = -1;
299 	n = *str;
300 	if(n == '='){
301 		char *end;
302 		int major;
303 
304 		str++;
305 		if(!strcmp(str, "sync")){
306 			global_openflags = of_sync(global_openflags);
307 			return(0);
308 		}
309 		major = simple_strtoul(str, &end, 0);
310 		if((*end != '\0') || (end == str)){
311 			printk(KERN_ERR
312 			       "ubd_setup : didn't parse major number\n");
313 			return(1);
314 		}
315 
316 		err = 1;
317  		spin_lock(&ubd_lock);
318  		if(fake_major != MAJOR_NR){
319  			printk(KERN_ERR "Can't assign a fake major twice\n");
320  			goto out1;
321  		}
322 
323  		fake_major = major;
324 
325 		printk(KERN_INFO "Setting extra ubd major number to %d\n",
326 		       major);
327  		err = 0;
328  	out1:
329  		spin_unlock(&ubd_lock);
330 		return(err);
331 	}
332 
333 	n = parse_unit(&str);
334 	if(n < 0){
335 		printk(KERN_ERR "ubd_setup : couldn't parse unit number "
336 		       "'%s'\n", str);
337 		return(1);
338 	}
339 	if(n >= MAX_DEV){
340 		printk(KERN_ERR "ubd_setup : index %d out of range "
341 		       "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
342 		return(1);
343 	}
344 
345 	err = 1;
346 	spin_lock(&ubd_lock);
347 
348 	dev = &ubd_dev[n];
349 	if(dev->file != NULL){
350 		printk(KERN_ERR "ubd_setup : device already configured\n");
351 		goto out;
352 	}
353 
354 	if (index_out)
355 		*index_out = n;
356 
357 	for (i = 0; i < 4; i++) {
358 		switch (*str) {
359 		case 'r':
360 			flags.w = 0;
361 			break;
362 		case 's':
363 			flags.s = 1;
364 			break;
365 		case 'd':
366 			dev->no_cow = 1;
367 			break;
368 		case '=':
369 			str++;
370 			goto break_loop;
371 		default:
372 			printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
373 			goto out;
374 		}
375 		str++;
376 	}
377 
378         if (*str == '=')
379 		printk(KERN_ERR "ubd_setup : Too many flags specified\n");
380         else
381 		printk(KERN_ERR "ubd_setup : Expected '='\n");
382 	goto out;
383 
384 break_loop:
385 	err = 0;
386 	backing_file = strchr(str, ',');
387 
388 	if (!backing_file) {
389 		backing_file = strchr(str, ':');
390 	}
391 
392 	if(backing_file){
393 		if(dev->no_cow)
394 			printk(KERN_ERR "Can't specify both 'd' and a "
395 			       "cow file\n");
396 		else {
397 			*backing_file = '\0';
398 			backing_file++;
399 		}
400 	}
401 	dev->file = str;
402 	dev->cow.file = backing_file;
403 	dev->boot_openflags = flags;
404 out:
405 	spin_unlock(&ubd_lock);
406 	return(err);
407 }
408 
409 static int ubd_setup(char *str)
410 {
411 	ubd_setup_common(str, NULL);
412 	return(1);
413 }
414 
415 __setup("ubd", ubd_setup);
416 __uml_help(ubd_setup,
417 "ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
418 "    This is used to associate a device with a file in the underlying\n"
419 "    filesystem. When specifying two filenames, the first one is the\n"
420 "    COW name and the second is the backing file name. As separator you can\n"
421 "    use either a ':' or a ',': the first one allows writing things like;\n"
422 "	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
423 "    while with a ',' the shell would not expand the 2nd '~'.\n"
424 "    When using only one filename, UML will detect whether to thread it like\n"
425 "    a COW file or a backing file. To override this detection, add the 'd'\n"
426 "    flag:\n"
427 "	ubd0d=BackingFile\n"
428 "    Usually, there is a filesystem in the file, but \n"
429 "    that's not required. Swap devices containing swap files can be\n"
430 "    specified like this. Also, a file which doesn't contain a\n"
431 "    filesystem can have its contents read in the virtual \n"
432 "    machine by running 'dd' on the device. <n> must be in the range\n"
433 "    0 to 7. Appending an 'r' to the number will cause that device\n"
434 "    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
435 "    an 's' will cause data to be written to disk on the host immediately.\n\n"
436 );
437 
438 static int udb_setup(char *str)
439 {
440 	printk("udb%s specified on command line is almost certainly a ubd -> "
441 	       "udb TYPO\n", str);
442 	return(1);
443 }
444 
445 __setup("udb", udb_setup);
446 __uml_help(udb_setup,
447 "udb\n"
448 "    This option is here solely to catch ubd -> udb typos, which can be\n"
449 "    to impossible to catch visually unless you specifically look for\n"
450 "    them.  The only result of any option starting with 'udb' is an error\n"
451 "    in the boot output.\n\n"
452 );
453 
454 static int fakehd_set = 0;
455 static int fakehd(char *str)
456 {
457 	printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
458 	fakehd_set = 1;
459 	return 1;
460 }
461 
462 __setup("fakehd", fakehd);
463 __uml_help(fakehd,
464 "fakehd\n"
465 "    Change the ubd device name to \"hd\".\n\n"
466 );
467 
468 static void do_ubd_request(request_queue_t * q);
469 static int in_ubd;
470 
471 /* Changed by ubd_handler, which is serialized because interrupts only
472  * happen on CPU 0.
473  */
474 int intr_count = 0;
475 
476 static void ubd_end_request(struct request *req, int bytes, int uptodate)
477 {
478 	if (!end_that_request_first(req, uptodate, bytes >> 9)) {
479 		add_disk_randomness(req->rq_disk);
480 		end_that_request_last(req);
481 	}
482 }
483 
484 /* call ubd_finish if you need to serialize */
485 static void __ubd_finish(struct request *req, int bytes)
486 {
487 	if(bytes < 0){
488 		ubd_end_request(req, 0, 0);
489   		return;
490   	}
491 
492 	ubd_end_request(req, bytes, 1);
493 }
494 
495 static inline void ubd_finish(struct request *req, int bytes)
496 {
497    	spin_lock(&ubd_io_lock);
498 	__ubd_finish(req, bytes);
499   	spin_unlock(&ubd_io_lock);
500 }
501 
502 struct bitmap_io {
503         atomic_t count;
504         struct aio_context aio;
505 };
506 
507 struct ubd_aio {
508         struct aio_context aio;
509         struct request *req;
510         int len;
511         struct bitmap_io *bitmap;
512         void *bitmap_buf;
513 };
514 
515 static int ubd_reply_fd = -1;
516 
517 static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
518 {
519 	struct aio_thread_reply reply;
520 	struct ubd_aio *aio;
521 	struct request *req;
522 	int err, n, fd = (int) (long) dev;
523 
524 	while(1){
525 		err = os_read_file(fd, &reply, sizeof(reply));
526 		if(err == -EAGAIN)
527 			break;
528 		if(err < 0){
529 			printk("ubd_aio_handler - read returned err %d\n",
530 			       -err);
531 			break;
532 		}
533 
534                 aio = container_of(reply.data, struct ubd_aio, aio);
535                 n = reply.err;
536 
537 		if(n == 0){
538 			req = aio->req;
539 			req->nr_sectors -= aio->len >> 9;
540 
541 			if((aio->bitmap != NULL) &&
542 			   (atomic_dec_and_test(&aio->bitmap->count))){
543                                 aio->aio = aio->bitmap->aio;
544                                 aio->len = 0;
545                                 kfree(aio->bitmap);
546                                 aio->bitmap = NULL;
547                                 submit_aio(&aio->aio);
548 			}
549 			else {
550 				if((req->nr_sectors == 0) &&
551                                    (aio->bitmap == NULL)){
552 					int len = req->hard_nr_sectors << 9;
553 					ubd_finish(req, len);
554 				}
555 
556                                 if(aio->bitmap_buf != NULL)
557                                         kfree(aio->bitmap_buf);
558 				kfree(aio);
559 			}
560 		}
561                 else if(n < 0){
562                         ubd_finish(aio->req, n);
563                         if(aio->bitmap != NULL)
564                                 kfree(aio->bitmap);
565                         if(aio->bitmap_buf != NULL)
566                                 kfree(aio->bitmap_buf);
567                         kfree(aio);
568                 }
569 	}
570 	reactivate_fd(fd, UBD_IRQ);
571 
572         do_ubd_request(ubd_queue);
573 
574 	return(IRQ_HANDLED);
575 }
576 
577 static int ubd_file_size(struct ubd *dev, __u64 *size_out)
578 {
579 	char *file;
580 
581 	file = dev->cow.file ? dev->cow.file : dev->file;
582 	return(os_file_size(file, size_out));
583 }
584 
585 static void ubd_close(struct ubd *dev)
586 {
587 	os_close_file(dev->fd);
588 	if(dev->cow.file == NULL)
589 		return;
590 
591 	os_close_file(dev->cow.fd);
592 	vfree(dev->cow.bitmap);
593 	dev->cow.bitmap = NULL;
594 }
595 
596 static int ubd_open_dev(struct ubd *dev)
597 {
598 	struct openflags flags;
599 	char **back_ptr;
600 	int err, create_cow, *create_ptr;
601 
602 	dev->openflags = dev->boot_openflags;
603 	create_cow = 0;
604 	create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
605 	back_ptr = dev->no_cow ? NULL : &dev->cow.file;
606 	dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
607 				&dev->cow.bitmap_offset, &dev->cow.bitmap_len,
608 				&dev->cow.data_offset, create_ptr);
609 
610 	if((dev->fd == -ENOENT) && create_cow){
611 		dev->fd = create_cow_file(dev->file, dev->cow.file,
612 					  dev->openflags, 1 << 9, PAGE_SIZE,
613 					  &dev->cow.bitmap_offset,
614 					  &dev->cow.bitmap_len,
615 					  &dev->cow.data_offset);
616 		if(dev->fd >= 0){
617 			printk(KERN_INFO "Creating \"%s\" as COW file for "
618 			       "\"%s\"\n", dev->file, dev->cow.file);
619 		}
620 	}
621 
622 	if(dev->fd < 0){
623 		printk("Failed to open '%s', errno = %d\n", dev->file,
624 		       -dev->fd);
625 		return(dev->fd);
626 	}
627 
628 	if(dev->cow.file != NULL){
629 		err = -ENOMEM;
630 		dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
631 		if(dev->cow.bitmap == NULL){
632 			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
633 			goto error;
634 		}
635 		flush_tlb_kernel_vm();
636 
637 		err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
638 				      dev->cow.bitmap_offset,
639 				      dev->cow.bitmap_len);
640 		if(err < 0)
641 			goto error;
642 
643 		flags = dev->openflags;
644 		flags.w = 0;
645 		err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL,
646 				    NULL, NULL);
647 		if(err < 0) goto error;
648 		dev->cow.fd = err;
649 	}
650 	return(0);
651  error:
652 	os_close_file(dev->fd);
653 	return(err);
654 }
655 
656 static int ubd_new_disk(int major, u64 size, int unit,
657 			struct gendisk **disk_out)
658 
659 {
660 	struct gendisk *disk;
661 	char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
662 	int err;
663 
664 	disk = alloc_disk(1 << UBD_SHIFT);
665 	if(disk == NULL)
666 		return(-ENOMEM);
667 
668 	disk->major = major;
669 	disk->first_minor = unit << UBD_SHIFT;
670 	disk->fops = &ubd_blops;
671 	set_capacity(disk, size / 512);
672 	if(major == MAJOR_NR){
673 		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
674 		sprintf(disk->devfs_name, "ubd/disc%d", unit);
675 		sprintf(from, "ubd/%d", unit);
676 		sprintf(to, "disc%d/disc", unit);
677 		err = devfs_mk_symlink(from, to);
678 		if(err)
679 			printk("ubd_new_disk failed to make link from %s to "
680 			       "%s, error = %d\n", from, to, err);
681 	}
682 	else {
683 		sprintf(disk->disk_name, "ubd_fake%d", unit);
684 		sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
685 	}
686 
687 	/* sysfs register (not for ide fake devices) */
688 	if (major == MAJOR_NR) {
689 		ubd_dev[unit].pdev.id   = unit;
690 		ubd_dev[unit].pdev.name = DRIVER_NAME;
691 		platform_device_register(&ubd_dev[unit].pdev);
692 		disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
693 	}
694 
695 	disk->private_data = &ubd_dev[unit];
696 	disk->queue = ubd_queue;
697 	add_disk(disk);
698 
699 	*disk_out = disk;
700 	return 0;
701 }
702 
703 #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
704 
705 static int ubd_add(int n)
706 {
707 	struct ubd *dev = &ubd_dev[n];
708 	int err;
709 
710 	err = -ENODEV;
711 	if(dev->file == NULL)
712 		goto out;
713 
714 	if (ubd_open_dev(dev))
715 		goto out;
716 
717 	err = ubd_file_size(dev, &dev->size);
718 	if(err < 0)
719 		goto out_close;
720 
721 	dev->size = ROUND_BLOCK(dev->size);
722 
723 	err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
724 	if(err)
725 		goto out_close;
726 
727 	if(fake_major != MAJOR_NR)
728 		ubd_new_disk(fake_major, dev->size, n,
729 			     &fake_gendisk[n]);
730 
731 	/* perhaps this should also be under the "if (fake_major)" above */
732 	/* using the fake_disk->disk_name and also the fakehd_set name */
733 	if (fake_ide)
734 		make_ide_entries(ubd_gendisk[n]->disk_name);
735 
736 	err = 0;
737 out_close:
738 	ubd_close(dev);
739 out:
740 	return err;
741 }
742 
743 static int ubd_config(char *str)
744 {
745 	int n, err;
746 
747 	str = uml_strdup(str);
748 	if(str == NULL){
749 		printk(KERN_ERR "ubd_config failed to strdup string\n");
750 		return(1);
751 	}
752 	err = ubd_setup_common(str, &n);
753 	if(err){
754 		kfree(str);
755 		return(-1);
756 	}
757 	if(n == -1) return(0);
758 
759  	spin_lock(&ubd_lock);
760 	err = ubd_add(n);
761 	if(err)
762 		ubd_dev[n].file = NULL;
763  	spin_unlock(&ubd_lock);
764 
765 	return(err);
766 }
767 
768 static int ubd_get_config(char *name, char *str, int size, char **error_out)
769 {
770 	struct ubd *dev;
771 	int n, len = 0;
772 
773 	n = parse_unit(&name);
774 	if((n >= MAX_DEV) || (n < 0)){
775 		*error_out = "ubd_get_config : device number out of range";
776 		return(-1);
777 	}
778 
779 	dev = &ubd_dev[n];
780 	spin_lock(&ubd_lock);
781 
782 	if(dev->file == NULL){
783 		CONFIG_CHUNK(str, size, len, "", 1);
784 		goto out;
785 	}
786 
787 	CONFIG_CHUNK(str, size, len, dev->file, 0);
788 
789 	if(dev->cow.file != NULL){
790 		CONFIG_CHUNK(str, size, len, ",", 0);
791 		CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
792 	}
793 	else CONFIG_CHUNK(str, size, len, "", 1);
794 
795  out:
796 	spin_unlock(&ubd_lock);
797 	return(len);
798 }
799 
800 static int ubd_id(char **str, int *start_out, int *end_out)
801 {
802         int n;
803 
804 	n = parse_unit(str);
805         *start_out = 0;
806         *end_out = MAX_DEV - 1;
807         return n;
808 }
809 
810 static int ubd_remove(int n)
811 {
812 	struct ubd *dev;
813 	int err = -ENODEV;
814 
815 	spin_lock(&ubd_lock);
816 
817 	if(ubd_gendisk[n] == NULL)
818 		goto out;
819 
820 	dev = &ubd_dev[n];
821 
822 	if(dev->file == NULL)
823 		goto out;
824 
825 	/* you cannot remove a open disk */
826 	err = -EBUSY;
827 	if(dev->count > 0)
828 		goto out;
829 
830 	del_gendisk(ubd_gendisk[n]);
831 	put_disk(ubd_gendisk[n]);
832 	ubd_gendisk[n] = NULL;
833 
834 	if(fake_gendisk[n] != NULL){
835 		del_gendisk(fake_gendisk[n]);
836 		put_disk(fake_gendisk[n]);
837 		fake_gendisk[n] = NULL;
838 	}
839 
840 	platform_device_unregister(&dev->pdev);
841 	*dev = ((struct ubd) DEFAULT_UBD);
842 	err = 0;
843 out:
844 	spin_unlock(&ubd_lock);
845 	return err;
846 }
847 
848 static struct mc_device ubd_mc = {
849 	.name		= "ubd",
850 	.config		= ubd_config,
851  	.get_config	= ubd_get_config,
852 	.id		= ubd_id,
853 	.remove		= ubd_remove,
854 };
855 
856 static int ubd_mc_init(void)
857 {
858 	mconsole_register_dev(&ubd_mc);
859 	return 0;
860 }
861 
862 __initcall(ubd_mc_init);
863 
864 static struct device_driver ubd_driver = {
865 	.name  = DRIVER_NAME,
866 	.bus   = &platform_bus_type,
867 };
868 
869 int ubd_init(void)
870 {
871         int i;
872 
873 	ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr);
874 	if(ubd_reply_fd < 0)
875 		printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd);
876 
877 	devfs_mk_dir("ubd");
878 	if (register_blkdev(MAJOR_NR, "ubd"))
879 		return -1;
880 
881 	ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
882 	if (!ubd_queue) {
883 		unregister_blkdev(MAJOR_NR, "ubd");
884 		return -1;
885 	}
886 
887 	blk_queue_max_hw_segments(ubd_queue, MAX_SG);
888 	if (fake_major != MAJOR_NR) {
889 		char name[sizeof("ubd_nnn\0")];
890 
891 		snprintf(name, sizeof(name), "ubd_%d", fake_major);
892 		devfs_mk_dir(name);
893 		if (register_blkdev(fake_major, "ubd"))
894 			return -1;
895 	}
896 	driver_register(&ubd_driver);
897 	for (i = 0; i < MAX_DEV; i++)
898 		ubd_add(i);
899 
900 	return 0;
901 }
902 
903 late_initcall(ubd_init);
904 
905 static int ubd_open(struct inode *inode, struct file *filp)
906 {
907 	struct gendisk *disk = inode->i_bdev->bd_disk;
908 	struct ubd *dev = disk->private_data;
909 	int err = 0;
910 
911 	if(dev->count == 0){
912 		err = ubd_open_dev(dev);
913 		if(err){
914 			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
915 			       disk->disk_name, dev->file, -err);
916 			goto out;
917 		}
918 	}
919 	dev->count++;
920 	set_disk_ro(disk, !dev->openflags.w);
921 
922 	/* This should no more be needed. And it didn't work anyway to exclude
923 	 * read-write remounting of filesystems.*/
924 	/*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
925 	        if(--dev->count == 0) ubd_close(dev);
926 	        err = -EROFS;
927 	}*/
928  out:
929 	return(err);
930 }
931 
932 static int ubd_release(struct inode * inode, struct file * file)
933 {
934 	struct gendisk *disk = inode->i_bdev->bd_disk;
935 	struct ubd *dev = disk->private_data;
936 
937 	if(--dev->count == 0)
938 		ubd_close(dev);
939 	return(0);
940 }
941 
942 static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap)
943 {
944         __u64 sector = req->offset / req->sectorsize;
945         int i;
946 
947         for(i = 0; i < req->length / req->sectorsize; i++){
948                 if(ubd_test_bit(sector + i, bitmap))
949                         continue;
950 
951                 if(req->bitmap_start == -1)
952                         req->bitmap_start = sector + i;
953                 req->bitmap_end = sector + i + 1;
954 
955                 ubd_set_bit(sector + i, bitmap);
956         }
957 }
958 
959 /* Called with ubd_io_lock held */
960 static int prepare_request(struct request *req, struct io_thread_req *io_req,
961                            unsigned long long offset, int page_offset,
962                            int len, struct page *page)
963 {
964 	struct gendisk *disk = req->rq_disk;
965 	struct ubd *dev = disk->private_data;
966 
967 	/* This should be impossible now */
968 	if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
969 		printk("Write attempted on readonly ubd device %s\n",
970 		       disk->disk_name);
971                 ubd_end_request(req, 0, 0);
972 		return(1);
973 	}
974 
975 	io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
976 	io_req->fds[1] = dev->fd;
977 	io_req->offset = offset;
978 	io_req->length = len;
979 	io_req->error = 0;
980 	io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE;
981 	io_req->offsets[0] = 0;
982 	io_req->offsets[1] = dev->cow.data_offset;
983         io_req->buffer = page_address(page) + page_offset;
984 	io_req->sectorsize = 1 << 9;
985         io_req->bitmap_offset = dev->cow.bitmap_offset;
986         io_req->bitmap_start = -1;
987         io_req->bitmap_end = -1;
988 
989         if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE))
990                 cowify_bitmap(io_req, dev->cow.bitmap);
991 	return(0);
992 }
993 
994 /* Called with ubd_io_lock held */
995 static void do_ubd_request(request_queue_t *q)
996 {
997 	struct io_thread_req io_req;
998 	struct request *req;
999 	__u64 sector;
1000 	int err;
1001 
1002 	if(in_ubd)
1003 		return;
1004 	in_ubd = 1;
1005 	while((req = elv_next_request(q)) != NULL){
1006 		struct gendisk *disk = req->rq_disk;
1007 		struct ubd *dev = disk->private_data;
1008 		int n, i;
1009 
1010 		blkdev_dequeue_request(req);
1011 
1012 		sector = req->sector;
1013 		n = blk_rq_map_sg(q, req, dev->sg);
1014 
1015 		for(i = 0; i < n; i++){
1016 			struct scatterlist *sg = &dev->sg[i];
1017 
1018 			err = prepare_request(req, &io_req, sector << 9,
1019 					      sg->offset, sg->length,
1020 					      sg->page);
1021 			if(err)
1022 				continue;
1023 
1024 			sector += sg->length >> 9;
1025 			do_io(&io_req, req, dev->cow.bitmap);
1026 		}
1027 	}
1028 	in_ubd = 0;
1029 }
1030 
1031 static int ubd_ioctl(struct inode * inode, struct file * file,
1032 		     unsigned int cmd, unsigned long arg)
1033 {
1034 	struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
1035 	struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1036 	struct hd_driveid ubd_id = {
1037 		.cyls		= 0,
1038 		.heads		= 128,
1039 		.sectors	= 32,
1040 	};
1041 
1042 	switch (cmd) {
1043 	        struct hd_geometry g;
1044 		struct cdrom_volctrl volume;
1045 	case HDIO_GETGEO:
1046 		if(!loc) return(-EINVAL);
1047 		g.heads = 128;
1048 		g.sectors = 32;
1049 		g.cylinders = dev->size / (128 * 32 * 512);
1050 		g.start = get_start_sect(inode->i_bdev);
1051 		return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1052 
1053 	case HDIO_GET_IDENTITY:
1054 		ubd_id.cyls = dev->size / (128 * 32 * 512);
1055 		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1056 				 sizeof(ubd_id)))
1057 			return(-EFAULT);
1058 		return(0);
1059 
1060 	case CDROMVOLREAD:
1061 		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1062 			return(-EFAULT);
1063 		volume.channel0 = 255;
1064 		volume.channel1 = 255;
1065 		volume.channel2 = 255;
1066 		volume.channel3 = 255;
1067 		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1068 			return(-EFAULT);
1069 		return(0);
1070 	}
1071 	return(-EINVAL);
1072 }
1073 
1074 static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1075 {
1076 	struct uml_stat buf1, buf2;
1077 	int err;
1078 
1079 	if(from_cmdline == NULL) return(1);
1080 	if(!strcmp(from_cmdline, from_cow)) return(1);
1081 
1082 	err = os_stat_file(from_cmdline, &buf1);
1083 	if(err < 0){
1084 		printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1085 		return(1);
1086 	}
1087 	err = os_stat_file(from_cow, &buf2);
1088 	if(err < 0){
1089 		printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1090 		return(1);
1091 	}
1092 	if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1093 		return(1);
1094 
1095 	printk("Backing file mismatch - \"%s\" requested,\n"
1096 	       "\"%s\" specified in COW header of \"%s\"\n",
1097 	       from_cmdline, from_cow, cow);
1098 	return(0);
1099 }
1100 
1101 static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1102 {
1103 	unsigned long modtime;
1104 	long long actual;
1105 	int err;
1106 
1107 	err = os_file_modtime(file, &modtime);
1108 	if(err < 0){
1109 		printk("Failed to get modification time of backing file "
1110 		       "\"%s\", err = %d\n", file, -err);
1111 		return(err);
1112 	}
1113 
1114 	err = os_file_size(file, &actual);
1115 	if(err < 0){
1116 		printk("Failed to get size of backing file \"%s\", "
1117 		       "err = %d\n", file, -err);
1118 		return(err);
1119 	}
1120 
1121   	if(actual != size){
1122 		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
1123 		 * the typecast.*/
1124 		printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1125 		       "file\n", (unsigned long long) size, actual);
1126 		return(-EINVAL);
1127 	}
1128 	if(modtime != mtime){
1129 		printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1130 		       "file\n", mtime, modtime);
1131 		return(-EINVAL);
1132 	}
1133 	return(0);
1134 }
1135 
1136 int read_cow_bitmap(int fd, void *buf, int offset, int len)
1137 {
1138 	int err;
1139 
1140 	err = os_seek_file(fd, offset);
1141 	if(err < 0)
1142 		return(err);
1143 
1144 	err = os_read_file(fd, buf, len);
1145 	if(err < 0)
1146 		return(err);
1147 
1148 	return(0);
1149 }
1150 
1151 int open_ubd_file(char *file, struct openflags *openflags,
1152 		  char **backing_file_out, int *bitmap_offset_out,
1153 		  unsigned long *bitmap_len_out, int *data_offset_out,
1154 		  int *create_cow_out)
1155 {
1156 	time_t mtime;
1157 	unsigned long long size;
1158 	__u32 version, align;
1159 	char *backing_file;
1160 	int fd, err, sectorsize, same, mode = 0644;
1161 
1162 	fd = os_open_file(file, *openflags, mode);
1163 	if(fd < 0){
1164 		if((fd == -ENOENT) && (create_cow_out != NULL))
1165 			*create_cow_out = 1;
1166                 if(!openflags->w ||
1167                    ((fd != -EROFS) && (fd != -EACCES))) return(fd);
1168 		openflags->w = 0;
1169 		fd = os_open_file(file, *openflags, mode);
1170 		if(fd < 0)
1171 			return(fd);
1172         }
1173 
1174 	err = os_lock_file(fd, openflags->w);
1175 	if(err < 0){
1176 		printk("Failed to lock '%s', err = %d\n", file, -err);
1177 		goto out_close;
1178 	}
1179 
1180 	if(backing_file_out == NULL) return(fd);
1181 
1182 	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1183 			      &size, &sectorsize, &align, bitmap_offset_out);
1184 	if(err && (*backing_file_out != NULL)){
1185 		printk("Failed to read COW header from COW file \"%s\", "
1186 		       "errno = %d\n", file, -err);
1187 		goto out_close;
1188 	}
1189 	if(err) return(fd);
1190 
1191 	if(backing_file_out == NULL) return(fd);
1192 
1193 	same = same_backing_files(*backing_file_out, backing_file, file);
1194 
1195 	if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
1196 		printk("Switching backing file to '%s'\n", *backing_file_out);
1197 		err = write_cow_header(file, fd, *backing_file_out,
1198 				       sectorsize, align, &size);
1199 		if(err){
1200 			printk("Switch failed, errno = %d\n", -err);
1201 			return(err);
1202 		}
1203 	}
1204 	else {
1205 		*backing_file_out = backing_file;
1206 		err = backing_file_mismatch(*backing_file_out, size, mtime);
1207 		if(err) goto out_close;
1208 	}
1209 
1210 	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1211 		  bitmap_len_out, data_offset_out);
1212 
1213         return(fd);
1214  out_close:
1215 	os_close_file(fd);
1216 	return(err);
1217 }
1218 
1219 int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1220 		    int sectorsize, int alignment, int *bitmap_offset_out,
1221 		    unsigned long *bitmap_len_out, int *data_offset_out)
1222 {
1223 	int err, fd;
1224 
1225 	flags.c = 1;
1226 	fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
1227 	if(fd < 0){
1228 		err = fd;
1229 		printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1230 		       -err);
1231 		goto out;
1232 	}
1233 
1234 	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1235 			    bitmap_offset_out, bitmap_len_out,
1236 			    data_offset_out);
1237 	if(!err)
1238 		return(fd);
1239 	os_close_file(fd);
1240  out:
1241 	return(err);
1242 }
1243 
1244 void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap)
1245 {
1246         struct ubd_aio *aio;
1247         struct bitmap_io *bitmap_io = NULL;
1248         char *buf;
1249         void *bitmap_buf = NULL;
1250         unsigned long len, sector;
1251         int nsectors, start, end, bit, err;
1252         __u64 off;
1253 
1254         if(req->bitmap_start != -1){
1255                 /* Round up to the nearest word */
1256                 int round = sizeof(unsigned long);
1257                 len = (req->bitmap_end - req->bitmap_start +
1258                        round * 8 - 1) / (round * 8);
1259                 len *= round;
1260 
1261                 off = req->bitmap_start / (8 * round);
1262                 off *= round;
1263 
1264                 bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL);
1265                 if(bitmap_io == NULL){
1266                         printk("Failed to kmalloc bitmap IO\n");
1267                         req->error = 1;
1268                         return;
1269                 }
1270 
1271                 bitmap_buf = kmalloc(len, GFP_KERNEL);
1272                 if(bitmap_buf == NULL){
1273                         printk("do_io : kmalloc of bitmap chunk "
1274                                "failed\n");
1275                         kfree(bitmap_io);
1276                         req->error = 1;
1277                         return;
1278                 }
1279                 memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len);
1280 
1281                 *bitmap_io = ((struct bitmap_io)
1282                         { .count	= ATOMIC_INIT(0),
1283                           .aio		= INIT_AIO(AIO_WRITE, req->fds[1],
1284                                                    bitmap_buf, len,
1285                                                    req->bitmap_offset + off,
1286                                                    ubd_reply_fd) } );
1287         }
1288 
1289         nsectors = req->length / req->sectorsize;
1290         start = 0;
1291         end = nsectors;
1292         bit = 0;
1293         do {
1294                 if(bitmap != NULL){
1295                         sector = req->offset / req->sectorsize;
1296                         bit = ubd_test_bit(sector + start, bitmap);
1297                         end = start;
1298                         while((end < nsectors) &&
1299                               (ubd_test_bit(sector + end, bitmap) == bit))
1300                                 end++;
1301                 }
1302 
1303                 off = req->offsets[bit] + req->offset +
1304                         start * req->sectorsize;
1305                 len = (end - start) * req->sectorsize;
1306                 buf = &req->buffer[start * req->sectorsize];
1307 
1308                 aio = kmalloc(sizeof(*aio), GFP_KERNEL);
1309                 if(aio == NULL){
1310                         req->error = 1;
1311                         return;
1312                 }
1313 
1314                 *aio = ((struct ubd_aio)
1315                         { .aio		= INIT_AIO(req->op, req->fds[bit], buf,
1316                                                    len, off, ubd_reply_fd),
1317                           .len		= len,
1318                           .req		= r,
1319                           .bitmap	= bitmap_io,
1320                           .bitmap_buf 	= bitmap_buf });
1321 
1322                 if(aio->bitmap != NULL)
1323                         atomic_inc(&aio->bitmap->count);
1324 
1325                 err = submit_aio(&aio->aio);
1326                 if(err){
1327                         printk("do_io - submit_aio failed, "
1328                                "err = %d\n", err);
1329                         req->error = 1;
1330                         return;
1331                 }
1332 
1333                 start = end;
1334         } while(start < nsectors);
1335 }
1336