xref: /freebsd/sys/dev/md/md.c (revision 8f8def9e2c957ac49cff3622ddc3119a38212fcb)
100a6a3c6SPoul-Henning Kamp /*
200a6a3c6SPoul-Henning Kamp  * ----------------------------------------------------------------------------
300a6a3c6SPoul-Henning Kamp  * "THE BEER-WARE LICENSE" (Revision 42):
400a6a3c6SPoul-Henning Kamp  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
500a6a3c6SPoul-Henning Kamp  * can do whatever you want with this stuff. If we meet some day, and you think
600a6a3c6SPoul-Henning Kamp  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
700a6a3c6SPoul-Henning Kamp  * ----------------------------------------------------------------------------
800a6a3c6SPoul-Henning Kamp  *
900a6a3c6SPoul-Henning Kamp  * $FreeBSD$
1000a6a3c6SPoul-Henning Kamp  *
1100a6a3c6SPoul-Henning Kamp  */
1200a6a3c6SPoul-Henning Kamp 
1371e4fff8SPoul-Henning Kamp #include "opt_mfs.h"		/* We have adopted some tasks from MFS */
143f54a085SPoul-Henning Kamp #include "opt_md.h"
1571e4fff8SPoul-Henning Kamp 
1600a6a3c6SPoul-Henning Kamp #include <sys/param.h>
1700a6a3c6SPoul-Henning Kamp #include <sys/systm.h>
189626b608SPoul-Henning Kamp #include <sys/bio.h>
1900a6a3c6SPoul-Henning Kamp #include <sys/conf.h>
2000a6a3c6SPoul-Henning Kamp #include <sys/devicestat.h>
2127068b01SBrian Feldman #include <sys/disk.h>
2227068b01SBrian Feldman #include <sys/kernel.h>
2327068b01SBrian Feldman #include <sys/malloc.h>
2427068b01SBrian Feldman #include <sys/sysctl.h>
2595f1a897SPoul-Henning Kamp #include <sys/linker.h>
263f54a085SPoul-Henning Kamp #include <sys/queue.h>
278f8def9eSPoul-Henning Kamp #include <sys/mdioctl.h>
288f8def9eSPoul-Henning Kamp #include <sys/vnode.h>
298f8def9eSPoul-Henning Kamp #include <sys/namei.h>
308f8def9eSPoul-Henning Kamp #include <sys/fcntl.h>
318f8def9eSPoul-Henning Kamp #include <sys/proc.h>
328f8def9eSPoul-Henning Kamp #include <machine/atomic.h>
338f8def9eSPoul-Henning Kamp 
348f8def9eSPoul-Henning Kamp #include <vm/vm.h>
358f8def9eSPoul-Henning Kamp #include <vm/vm_object.h>
368f8def9eSPoul-Henning Kamp #include <vm/vm_page.h>
378f8def9eSPoul-Henning Kamp #include <vm/vm_pager.h>
388f8def9eSPoul-Henning Kamp #include <vm/vm_zone.h>
398f8def9eSPoul-Henning Kamp #include <vm/swap_pager.h>
403f54a085SPoul-Henning Kamp 
41f2744793SSheldon Hearn #ifndef MD_NSECT
42f2744793SSheldon Hearn #define MD_NSECT (10000 * 2)
4333edfabeSPoul-Henning Kamp #endif
4433edfabeSPoul-Henning Kamp 
4500a6a3c6SPoul-Henning Kamp MALLOC_DEFINE(M_MD, "MD disk", "Memory Disk");
4600a6a3c6SPoul-Henning Kamp MALLOC_DEFINE(M_MDSECT, "MD sectors", "Memory Disk Sectors");
4700a6a3c6SPoul-Henning Kamp 
4871e4fff8SPoul-Henning Kamp static int md_debug;
4900a6a3c6SPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
5000a6a3c6SPoul-Henning Kamp 
5171e4fff8SPoul-Henning Kamp #if defined(MFS_ROOT) && !defined(MD_ROOT)
5271e4fff8SPoul-Henning Kamp #define MD_ROOT MFS_ROOT
5371e4fff8SPoul-Henning Kamp #warning "option MFS_ROOT has been superceeded by MD_ROOT"
5471e4fff8SPoul-Henning Kamp #endif
5571e4fff8SPoul-Henning Kamp 
5671e4fff8SPoul-Henning Kamp #if defined(MFS_ROOT_SIZE) && !defined(MD_ROOT_SIZE)
5771e4fff8SPoul-Henning Kamp #define MD_ROOT_SIZE MFS_ROOT_SIZE
5871e4fff8SPoul-Henning Kamp #warning "option MFS_ROOT_SIZE has been superceeded by MD_ROOT_SIZE"
5971e4fff8SPoul-Henning Kamp #endif
6071e4fff8SPoul-Henning Kamp 
6171e4fff8SPoul-Henning Kamp #if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
6271e4fff8SPoul-Henning Kamp /* Image gets put here: */
6371e4fff8SPoul-Henning Kamp static u_char mfs_root[MD_ROOT_SIZE*1024] = "MFS Filesystem goes here";
6471e4fff8SPoul-Henning Kamp static u_char end_mfs_root[] __unused = "MFS Filesystem had better STOP here";
6571e4fff8SPoul-Henning Kamp #endif
6671e4fff8SPoul-Henning Kamp 
6771e4fff8SPoul-Henning Kamp static int mdrootready;
688f8def9eSPoul-Henning Kamp static int mdunits;
690cfaeeeeSPoul-Henning Kamp 
7000a6a3c6SPoul-Henning Kamp #define CDEV_MAJOR	95
7100a6a3c6SPoul-Henning Kamp 
7200a6a3c6SPoul-Henning Kamp static d_strategy_t mdstrategy;
7300a6a3c6SPoul-Henning Kamp static d_open_t mdopen;
748f8def9eSPoul-Henning Kamp static d_ioctl_t mdioctl, mdctlioctl;
7500a6a3c6SPoul-Henning Kamp 
7600a6a3c6SPoul-Henning Kamp static struct cdevsw md_cdevsw = {
7700a6a3c6SPoul-Henning Kamp         /* open */      mdopen,
7800a6a3c6SPoul-Henning Kamp         /* close */     nullclose,
7900a6a3c6SPoul-Henning Kamp         /* read */      physread,
8000a6a3c6SPoul-Henning Kamp         /* write */     physwrite,
8100a6a3c6SPoul-Henning Kamp         /* ioctl */     mdioctl,
8200a6a3c6SPoul-Henning Kamp         /* poll */      nopoll,
8300a6a3c6SPoul-Henning Kamp         /* mmap */      nommap,
8400a6a3c6SPoul-Henning Kamp         /* strategy */  mdstrategy,
8500a6a3c6SPoul-Henning Kamp         /* name */      "md",
8600a6a3c6SPoul-Henning Kamp         /* maj */       CDEV_MAJOR,
8700a6a3c6SPoul-Henning Kamp         /* dump */      nodump,
8800a6a3c6SPoul-Henning Kamp         /* psize */     nopsize,
8971e4fff8SPoul-Henning Kamp         /* flags */     D_DISK | D_CANFREE | D_MEMDISK,
908f8def9eSPoul-Henning Kamp };
918f8def9eSPoul-Henning Kamp 
928f8def9eSPoul-Henning Kamp static struct cdevsw mdctl_cdevsw = {
938f8def9eSPoul-Henning Kamp         /* open */      nullopen,
948f8def9eSPoul-Henning Kamp         /* close */     nullclose,
958f8def9eSPoul-Henning Kamp         /* read */      noread,
968f8def9eSPoul-Henning Kamp         /* write */     nowrite,
978f8def9eSPoul-Henning Kamp         /* ioctl */     mdctlioctl,
988f8def9eSPoul-Henning Kamp         /* poll */      nopoll,
998f8def9eSPoul-Henning Kamp         /* mmap */      nommap,
1008f8def9eSPoul-Henning Kamp         /* strategy */  nostrategy,
1018f8def9eSPoul-Henning Kamp         /* name */      "md",
1028f8def9eSPoul-Henning Kamp         /* maj */       CDEV_MAJOR
10300a6a3c6SPoul-Henning Kamp };
10400a6a3c6SPoul-Henning Kamp 
1050cfaeeeeSPoul-Henning Kamp static struct cdevsw mddisk_cdevsw;
1060cfaeeeeSPoul-Henning Kamp 
1073f54a085SPoul-Henning Kamp static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(&md_softc_list);
1083f54a085SPoul-Henning Kamp 
10900a6a3c6SPoul-Henning Kamp struct md_s {
11000a6a3c6SPoul-Henning Kamp 	int unit;
1113f54a085SPoul-Henning Kamp 	LIST_ENTRY(md_s) list;
11200a6a3c6SPoul-Henning Kamp 	struct devstat stats;
1138177437dSPoul-Henning Kamp 	struct bio_queue_head bio_queue;
11400a6a3c6SPoul-Henning Kamp 	struct disk disk;
11500a6a3c6SPoul-Henning Kamp 	dev_t dev;
11695f1a897SPoul-Henning Kamp 	int busy;
1178f8def9eSPoul-Henning Kamp 	enum md_types type;
11800a6a3c6SPoul-Henning Kamp 	unsigned nsect;
1198f8def9eSPoul-Henning Kamp 	unsigned secsize;
1208f8def9eSPoul-Henning Kamp 	unsigned flags;
12195f1a897SPoul-Henning Kamp 
12295f1a897SPoul-Henning Kamp 	/* MD_MALLOC related fields */
12300a6a3c6SPoul-Henning Kamp 	unsigned nsecp;
12400a6a3c6SPoul-Henning Kamp 	u_char **secp;
12500a6a3c6SPoul-Henning Kamp 
12695f1a897SPoul-Henning Kamp 	/* MD_PRELOAD related fields */
12795f1a897SPoul-Henning Kamp 	u_char *pl_ptr;
12895f1a897SPoul-Henning Kamp 	unsigned pl_len;
12900a6a3c6SPoul-Henning Kamp 
1308f8def9eSPoul-Henning Kamp 	/* MD_VNODE related fields */
1318f8def9eSPoul-Henning Kamp 	struct vnode *vnode;
1328f8def9eSPoul-Henning Kamp 	struct ucred *cred;
1338f8def9eSPoul-Henning Kamp 
1348f8def9eSPoul-Henning Kamp 	/* MD_OBJET related fields */
1358f8def9eSPoul-Henning Kamp 	vm_object_t object;
1368f8def9eSPoul-Henning Kamp };
13700a6a3c6SPoul-Henning Kamp 
13800a6a3c6SPoul-Henning Kamp static int
13900a6a3c6SPoul-Henning Kamp mdopen(dev_t dev, int flag, int fmt, struct proc *p)
14000a6a3c6SPoul-Henning Kamp {
14100a6a3c6SPoul-Henning Kamp 	struct md_s *sc;
14200a6a3c6SPoul-Henning Kamp 	struct disklabel *dl;
14300a6a3c6SPoul-Henning Kamp 
14400a6a3c6SPoul-Henning Kamp 	if (md_debug)
14500a6a3c6SPoul-Henning Kamp 		printf("mdopen(%s %x %x %p)\n",
14600a6a3c6SPoul-Henning Kamp 			devtoname(dev), flag, fmt, p);
14700a6a3c6SPoul-Henning Kamp 
14800a6a3c6SPoul-Henning Kamp 	sc = dev->si_drv1;
14900a6a3c6SPoul-Henning Kamp 
15000a6a3c6SPoul-Henning Kamp 	dl = &sc->disk.d_label;
15100a6a3c6SPoul-Henning Kamp 	bzero(dl, sizeof(*dl));
1528f8def9eSPoul-Henning Kamp 	dl->d_secsize = sc->secsize;
1538f8def9eSPoul-Henning Kamp 	if (sc->nsect > 1024)
1548f8def9eSPoul-Henning Kamp 	dl->d_nsectors = sc->nsect > 1024 ? 1024 : sc->nsect;
15500a6a3c6SPoul-Henning Kamp 	dl->d_ntracks = 1;
15695f1a897SPoul-Henning Kamp 	dl->d_secpercyl = dl->d_nsectors * dl->d_ntracks;
15700a6a3c6SPoul-Henning Kamp 	dl->d_secperunit = sc->nsect;
15800a6a3c6SPoul-Henning Kamp 	dl->d_ncylinders = dl->d_secperunit / dl->d_secpercyl;
15900a6a3c6SPoul-Henning Kamp 	return (0);
16000a6a3c6SPoul-Henning Kamp }
16100a6a3c6SPoul-Henning Kamp 
16200a6a3c6SPoul-Henning Kamp static int
16300a6a3c6SPoul-Henning Kamp mdioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
16400a6a3c6SPoul-Henning Kamp {
16500a6a3c6SPoul-Henning Kamp 
16600a6a3c6SPoul-Henning Kamp 	if (md_debug)
16700a6a3c6SPoul-Henning Kamp 		printf("mdioctl(%s %lx %p %x %p)\n",
16800a6a3c6SPoul-Henning Kamp 			devtoname(dev), cmd, addr, flags, p);
16900a6a3c6SPoul-Henning Kamp 
17000a6a3c6SPoul-Henning Kamp 	return (ENOIOCTL);
17100a6a3c6SPoul-Henning Kamp }
17200a6a3c6SPoul-Henning Kamp 
17300a6a3c6SPoul-Henning Kamp static void
1748f8def9eSPoul-Henning Kamp mdstart_malloc(struct md_s *sc)
17500a6a3c6SPoul-Henning Kamp {
1768f8def9eSPoul-Henning Kamp 	int i;
1778f8def9eSPoul-Henning Kamp 	struct bio *bp;
17800a6a3c6SPoul-Henning Kamp 	devstat_trans_flags dop;
17900a6a3c6SPoul-Henning Kamp 	u_char *secp, **secpp, *dst;
18000a6a3c6SPoul-Henning Kamp 	unsigned secno, nsec, secval, uc;
18100a6a3c6SPoul-Henning Kamp 
1828f8def9eSPoul-Henning Kamp 	for (;;) {
1838f8def9eSPoul-Henning Kamp 		/* XXX: LOCK(unique unit numbers) */
1848177437dSPoul-Henning Kamp 		bp = bioq_first(&sc->bio_queue);
18500a6a3c6SPoul-Henning Kamp 		if (bp)
1868177437dSPoul-Henning Kamp 			bioq_remove(&sc->bio_queue, bp);
1878f8def9eSPoul-Henning Kamp 		/* XXX: UNLOCK(unique unit numbers) */
18800a6a3c6SPoul-Henning Kamp 		if (!bp)
18900a6a3c6SPoul-Henning Kamp 			break;
19000a6a3c6SPoul-Henning Kamp 
19100a6a3c6SPoul-Henning Kamp 		devstat_start_transaction(&sc->stats);
19200a6a3c6SPoul-Henning Kamp 
1938177437dSPoul-Henning Kamp 		if (bp->bio_cmd == BIO_DELETE)
19400a6a3c6SPoul-Henning Kamp 			dop = DEVSTAT_NO_DATA;
1958177437dSPoul-Henning Kamp 		else if (bp->bio_cmd == BIO_READ)
19600a6a3c6SPoul-Henning Kamp 			dop = DEVSTAT_READ;
19700a6a3c6SPoul-Henning Kamp 		else
19800a6a3c6SPoul-Henning Kamp 			dop = DEVSTAT_WRITE;
19900a6a3c6SPoul-Henning Kamp 
2008177437dSPoul-Henning Kamp 		nsec = bp->bio_bcount / DEV_BSIZE;
2018177437dSPoul-Henning Kamp 		secno = bp->bio_pblkno;
2028177437dSPoul-Henning Kamp 		dst = bp->bio_data;
20300a6a3c6SPoul-Henning Kamp 		while (nsec--) {
20400a6a3c6SPoul-Henning Kamp 			if (secno < sc->nsecp) {
20500a6a3c6SPoul-Henning Kamp 				secpp = &sc->secp[secno];
20621c3015aSDoug Rabson 				if ((uintptr_t)*secpp > 255) {
20700a6a3c6SPoul-Henning Kamp 					secp = *secpp;
20800a6a3c6SPoul-Henning Kamp 					secval = 0;
20900a6a3c6SPoul-Henning Kamp 				} else {
21000a6a3c6SPoul-Henning Kamp 					secp = 0;
21121c3015aSDoug Rabson 					secval = (uintptr_t) *secpp;
21200a6a3c6SPoul-Henning Kamp 				}
21300a6a3c6SPoul-Henning Kamp 			} else {
21400a6a3c6SPoul-Henning Kamp 				secpp = 0;
21500a6a3c6SPoul-Henning Kamp 				secp = 0;
21600a6a3c6SPoul-Henning Kamp 				secval = 0;
21700a6a3c6SPoul-Henning Kamp 			}
21833edfabeSPoul-Henning Kamp 			if (md_debug > 2)
2198177437dSPoul-Henning Kamp 				printf("%x %p %p %d\n",
2208177437dSPoul-Henning Kamp 				    bp->bio_flags, secpp, secp, secval);
22100a6a3c6SPoul-Henning Kamp 
2228177437dSPoul-Henning Kamp 			if (bp->bio_cmd == BIO_DELETE) {
2238f8def9eSPoul-Henning Kamp 				if (secpp && !(sc->flags & MD_RESERVE)) {
22400a6a3c6SPoul-Henning Kamp 					if (secp)
22500a6a3c6SPoul-Henning Kamp 						FREE(secp, M_MDSECT);
22600a6a3c6SPoul-Henning Kamp 					*secpp = 0;
22700a6a3c6SPoul-Henning Kamp 				}
2288177437dSPoul-Henning Kamp 			} else if (bp->bio_cmd == BIO_READ) {
22900a6a3c6SPoul-Henning Kamp 				if (secp) {
23000a6a3c6SPoul-Henning Kamp 					bcopy(secp, dst, DEV_BSIZE);
23100a6a3c6SPoul-Henning Kamp 				} else if (secval) {
23200a6a3c6SPoul-Henning Kamp 					for (i = 0; i < DEV_BSIZE; i++)
23300a6a3c6SPoul-Henning Kamp 						dst[i] = secval;
23400a6a3c6SPoul-Henning Kamp 				} else {
23500a6a3c6SPoul-Henning Kamp 					bzero(dst, DEV_BSIZE);
23600a6a3c6SPoul-Henning Kamp 				}
23700a6a3c6SPoul-Henning Kamp 			} else {
2388f8def9eSPoul-Henning Kamp 				if (sc->flags & MD_COMPRESS) {
23900a6a3c6SPoul-Henning Kamp 					uc = dst[0];
24000a6a3c6SPoul-Henning Kamp 					for (i = 1; i < DEV_BSIZE; i++)
24100a6a3c6SPoul-Henning Kamp 						if (dst[i] != uc)
24200a6a3c6SPoul-Henning Kamp 							break;
2438f8def9eSPoul-Henning Kamp 				} else {
2448f8def9eSPoul-Henning Kamp 					i = 0;
2458f8def9eSPoul-Henning Kamp 					uc = 0;
2468f8def9eSPoul-Henning Kamp 				}
24700a6a3c6SPoul-Henning Kamp 				if (i == DEV_BSIZE && !uc) {
24800a6a3c6SPoul-Henning Kamp 					if (secp)
24900a6a3c6SPoul-Henning Kamp 						FREE(secp, M_MDSECT);
25000a6a3c6SPoul-Henning Kamp 					if (secpp)
25121c3015aSDoug Rabson 						*secpp = (u_char *)(uintptr_t)uc;
25200a6a3c6SPoul-Henning Kamp 				} else {
25300a6a3c6SPoul-Henning Kamp 					if (!secpp) {
2547cc0979fSDavid Malone 						MALLOC(secpp, u_char **, (secno + nsec + 1) * sizeof(u_char *), M_MD, M_WAITOK | M_ZERO);
25500a6a3c6SPoul-Henning Kamp 						bcopy(sc->secp, secpp, sc->nsecp * sizeof(u_char *));
25600a6a3c6SPoul-Henning Kamp 						FREE(sc->secp, M_MD);
25700a6a3c6SPoul-Henning Kamp 						sc->secp = secpp;
25800a6a3c6SPoul-Henning Kamp 						sc->nsecp = secno + nsec + 1;
25900a6a3c6SPoul-Henning Kamp 						secpp = &sc->secp[secno];
26000a6a3c6SPoul-Henning Kamp 					}
26100a6a3c6SPoul-Henning Kamp 					if (i == DEV_BSIZE) {
26200a6a3c6SPoul-Henning Kamp 						if (secp)
26300a6a3c6SPoul-Henning Kamp 							FREE(secp, M_MDSECT);
26421c3015aSDoug Rabson 						*secpp = (u_char *)(uintptr_t)uc;
26500a6a3c6SPoul-Henning Kamp 					} else {
26600a6a3c6SPoul-Henning Kamp 						if (!secp)
26700a6a3c6SPoul-Henning Kamp 							MALLOC(secp, u_char *, DEV_BSIZE, M_MDSECT, M_WAITOK);
26800a6a3c6SPoul-Henning Kamp 						bcopy(dst, secp, DEV_BSIZE);
26900a6a3c6SPoul-Henning Kamp 						*secpp = secp;
27000a6a3c6SPoul-Henning Kamp 					}
27100a6a3c6SPoul-Henning Kamp 				}
27200a6a3c6SPoul-Henning Kamp 			}
27300a6a3c6SPoul-Henning Kamp 			secno++;
27400a6a3c6SPoul-Henning Kamp 			dst += DEV_BSIZE;
27500a6a3c6SPoul-Henning Kamp 		}
2768177437dSPoul-Henning Kamp 		bp->bio_resid = 0;
2778177437dSPoul-Henning Kamp 		devstat_end_transaction_bio(&sc->stats, bp);
27800a6a3c6SPoul-Henning Kamp 		biodone(bp);
27900a6a3c6SPoul-Henning Kamp 	}
28000a6a3c6SPoul-Henning Kamp 	return;
28100a6a3c6SPoul-Henning Kamp }
28200a6a3c6SPoul-Henning Kamp 
28371e4fff8SPoul-Henning Kamp 
28495f1a897SPoul-Henning Kamp static void
2858f8def9eSPoul-Henning Kamp mdstart_preload(struct md_s *sc)
28671e4fff8SPoul-Henning Kamp {
2878f8def9eSPoul-Henning Kamp 	struct bio *bp;
28871e4fff8SPoul-Henning Kamp 	devstat_trans_flags dop;
28971e4fff8SPoul-Henning Kamp 
2908f8def9eSPoul-Henning Kamp 	for (;;) {
2918f8def9eSPoul-Henning Kamp 		/* XXX: LOCK(unique unit numbers) */
2928177437dSPoul-Henning Kamp 		bp = bioq_first(&sc->bio_queue);
29371e4fff8SPoul-Henning Kamp 		if (bp)
2948177437dSPoul-Henning Kamp 			bioq_remove(&sc->bio_queue, bp);
2958f8def9eSPoul-Henning Kamp 		/* XXX: UNLOCK(unique unit numbers) */
29671e4fff8SPoul-Henning Kamp 		if (!bp)
29771e4fff8SPoul-Henning Kamp 			break;
29871e4fff8SPoul-Henning Kamp 
29971e4fff8SPoul-Henning Kamp 		devstat_start_transaction(&sc->stats);
30071e4fff8SPoul-Henning Kamp 
3018177437dSPoul-Henning Kamp 		if (bp->bio_cmd == BIO_DELETE) {
30271e4fff8SPoul-Henning Kamp 			dop = DEVSTAT_NO_DATA;
3038177437dSPoul-Henning Kamp 		} else if (bp->bio_cmd == BIO_READ) {
30471e4fff8SPoul-Henning Kamp 			dop = DEVSTAT_READ;
3058177437dSPoul-Henning Kamp 			bcopy(sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_data, bp->bio_bcount);
30671e4fff8SPoul-Henning Kamp 		} else {
30771e4fff8SPoul-Henning Kamp 			dop = DEVSTAT_WRITE;
3088177437dSPoul-Henning Kamp 			bcopy(bp->bio_data, sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_bcount);
30971e4fff8SPoul-Henning Kamp 		}
3108177437dSPoul-Henning Kamp 		bp->bio_resid = 0;
3118177437dSPoul-Henning Kamp 		devstat_end_transaction_bio(&sc->stats, bp);
31271e4fff8SPoul-Henning Kamp 		biodone(bp);
31371e4fff8SPoul-Henning Kamp 	}
31471e4fff8SPoul-Henning Kamp 	return;
31571e4fff8SPoul-Henning Kamp }
31671e4fff8SPoul-Henning Kamp 
3178f8def9eSPoul-Henning Kamp static void
3188f8def9eSPoul-Henning Kamp mdstart_vnode(struct md_s *sc)
3198f8def9eSPoul-Henning Kamp {
3208f8def9eSPoul-Henning Kamp 	int error;
3218f8def9eSPoul-Henning Kamp 	struct bio *bp;
3228f8def9eSPoul-Henning Kamp 	struct uio auio;
3238f8def9eSPoul-Henning Kamp 	struct iovec aiov;
3248f8def9eSPoul-Henning Kamp 	struct mount *mp;
3258f8def9eSPoul-Henning Kamp 
3268f8def9eSPoul-Henning Kamp 	/*
3278f8def9eSPoul-Henning Kamp 	 * VNODE I/O
3288f8def9eSPoul-Henning Kamp 	 *
3298f8def9eSPoul-Henning Kamp 	 * If an error occurs, we set BIO_ERROR but we do not set
3308f8def9eSPoul-Henning Kamp 	 * B_INVAL because (for a write anyway), the buffer is
3318f8def9eSPoul-Henning Kamp 	 * still valid.
3328f8def9eSPoul-Henning Kamp 	 */
3338f8def9eSPoul-Henning Kamp 
3348f8def9eSPoul-Henning Kamp 	for (;;) {
3358f8def9eSPoul-Henning Kamp 		/* XXX: LOCK(unique unit numbers) */
3368f8def9eSPoul-Henning Kamp 		bp = bioq_first(&sc->bio_queue);
3378f8def9eSPoul-Henning Kamp 		if (bp)
3388f8def9eSPoul-Henning Kamp 			bioq_remove(&sc->bio_queue, bp);
3398f8def9eSPoul-Henning Kamp 		/* XXX: UNLOCK(unique unit numbers) */
3408f8def9eSPoul-Henning Kamp 		if (!bp)
3418f8def9eSPoul-Henning Kamp 			break;
3428f8def9eSPoul-Henning Kamp 
3438f8def9eSPoul-Henning Kamp 		devstat_start_transaction(&sc->stats);
3448f8def9eSPoul-Henning Kamp 
3458f8def9eSPoul-Henning Kamp 		bzero(&auio, sizeof(auio));
3468f8def9eSPoul-Henning Kamp 
3478f8def9eSPoul-Henning Kamp 		aiov.iov_base = bp->bio_data;
3488f8def9eSPoul-Henning Kamp 		aiov.iov_len = bp->bio_bcount;
3498f8def9eSPoul-Henning Kamp 		auio.uio_iov = &aiov;
3508f8def9eSPoul-Henning Kamp 		auio.uio_iovcnt = 1;
3518f8def9eSPoul-Henning Kamp 		auio.uio_offset = (vm_ooffset_t)bp->bio_pblkno * sc->secsize;
3528f8def9eSPoul-Henning Kamp 		auio.uio_segflg = UIO_SYSSPACE;
3538f8def9eSPoul-Henning Kamp 		if(bp->bio_cmd == BIO_READ)
3548f8def9eSPoul-Henning Kamp 			auio.uio_rw = UIO_READ;
3558f8def9eSPoul-Henning Kamp 		else
3568f8def9eSPoul-Henning Kamp 			auio.uio_rw = UIO_WRITE;
3578f8def9eSPoul-Henning Kamp 		auio.uio_resid = bp->bio_bcount;
3588f8def9eSPoul-Henning Kamp 		auio.uio_procp = curproc;
3598f8def9eSPoul-Henning Kamp 		if (VOP_ISLOCKED(sc->vnode, NULL))
3608f8def9eSPoul-Henning Kamp 			vprint("unexpected vn driver lock", sc->vnode);
3618f8def9eSPoul-Henning Kamp 		if (bp->bio_cmd == BIO_READ) {
3628f8def9eSPoul-Henning Kamp 			vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
3638f8def9eSPoul-Henning Kamp 			error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
3648f8def9eSPoul-Henning Kamp 		} else {
3658f8def9eSPoul-Henning Kamp 			(void) vn_start_write(sc->vnode, &mp, V_WAIT);
3668f8def9eSPoul-Henning Kamp 			vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
3678f8def9eSPoul-Henning Kamp 			error = VOP_WRITE(sc->vnode, &auio, 0, sc->cred);
3688f8def9eSPoul-Henning Kamp 			vn_finished_write(mp);
3698f8def9eSPoul-Henning Kamp 		}
3708f8def9eSPoul-Henning Kamp 		VOP_UNLOCK(sc->vnode, 0, curproc);
3718f8def9eSPoul-Henning Kamp 		bp->bio_resid = auio.uio_resid;
3728f8def9eSPoul-Henning Kamp 
3738f8def9eSPoul-Henning Kamp 		if (error) {
3748f8def9eSPoul-Henning Kamp 			bp->bio_error = error;
3758f8def9eSPoul-Henning Kamp 			bp->bio_flags |= BIO_ERROR;
3768f8def9eSPoul-Henning Kamp 		}
3778f8def9eSPoul-Henning Kamp 		devstat_end_transaction_bio(&sc->stats, bp);
3788f8def9eSPoul-Henning Kamp 		biodone(bp);
3798f8def9eSPoul-Henning Kamp 	}
3808f8def9eSPoul-Henning Kamp 	return;
3818f8def9eSPoul-Henning Kamp }
3828f8def9eSPoul-Henning Kamp 
3838f8def9eSPoul-Henning Kamp static void
3848f8def9eSPoul-Henning Kamp mdstart_swap(struct md_s *sc)
3858f8def9eSPoul-Henning Kamp {
3868f8def9eSPoul-Henning Kamp 	struct bio *bp;
3878f8def9eSPoul-Henning Kamp 
3888f8def9eSPoul-Henning Kamp 	for (;;) {
3898f8def9eSPoul-Henning Kamp 		/* XXX: LOCK(unique unit numbers) */
3908f8def9eSPoul-Henning Kamp 		bp = bioq_first(&sc->bio_queue);
3918f8def9eSPoul-Henning Kamp 		if (bp)
3928f8def9eSPoul-Henning Kamp 			bioq_remove(&sc->bio_queue, bp);
3938f8def9eSPoul-Henning Kamp 		/* XXX: UNLOCK(unique unit numbers) */
3948f8def9eSPoul-Henning Kamp 		if (!bp)
3958f8def9eSPoul-Henning Kamp 			break;
3968f8def9eSPoul-Henning Kamp 
3978f8def9eSPoul-Henning Kamp #if 0
3988f8def9eSPoul-Henning Kamp 		devstat_start_transaction(&sc->stats);
3998f8def9eSPoul-Henning Kamp #endif
4008f8def9eSPoul-Henning Kamp 
4018f8def9eSPoul-Henning Kamp 		if ((bp->bio_cmd == BIO_DELETE) && (sc->flags & MD_RESERVE))
4028f8def9eSPoul-Henning Kamp 			biodone(bp);
4038f8def9eSPoul-Henning Kamp 		else
4048f8def9eSPoul-Henning Kamp 			vm_pager_strategy(sc->object, bp);
4058f8def9eSPoul-Henning Kamp 
4068f8def9eSPoul-Henning Kamp #if 0
4078f8def9eSPoul-Henning Kamp 		devstat_end_transaction_bio(&sc->stats, bp);
4088f8def9eSPoul-Henning Kamp #endif
4098f8def9eSPoul-Henning Kamp 	}
4108f8def9eSPoul-Henning Kamp 	return;
4118f8def9eSPoul-Henning Kamp }
4128f8def9eSPoul-Henning Kamp 
4138f8def9eSPoul-Henning Kamp static void
4148f8def9eSPoul-Henning Kamp mdstrategy(struct bio *bp)
41500a6a3c6SPoul-Henning Kamp {
41600a6a3c6SPoul-Henning Kamp 	struct md_s *sc;
41700a6a3c6SPoul-Henning Kamp 
4188f8def9eSPoul-Henning Kamp 	if (md_debug > 1)
4198f8def9eSPoul-Henning Kamp 		printf("mdstrategy(%p) %s %x, %d, %ld, %p)\n",
4208f8def9eSPoul-Henning Kamp 		    bp, devtoname(bp->bio_dev), bp->bio_flags, bp->bio_blkno,
4218f8def9eSPoul-Henning Kamp 		    bp->bio_bcount / DEV_BSIZE, bp->bio_data);
4228f8def9eSPoul-Henning Kamp 
4238f8def9eSPoul-Henning Kamp 	sc = bp->bio_dev->si_drv1;
4248f8def9eSPoul-Henning Kamp 
4258f8def9eSPoul-Henning Kamp 	/* XXX: LOCK(sc->lock) */
4268f8def9eSPoul-Henning Kamp 	bioqdisksort(&sc->bio_queue, bp);
4278f8def9eSPoul-Henning Kamp 	/* XXX: UNLOCK(sc->lock) */
4288f8def9eSPoul-Henning Kamp 
4298f8def9eSPoul-Henning Kamp 	if (atomic_cmpset_int(&sc->busy, 0, 1) == 0)
4308f8def9eSPoul-Henning Kamp 		return;
4318f8def9eSPoul-Henning Kamp 
4328f8def9eSPoul-Henning Kamp 	switch (sc->type) {
4338f8def9eSPoul-Henning Kamp 	case MD_MALLOC:
4348f8def9eSPoul-Henning Kamp 		mdstart_malloc(sc);
4358f8def9eSPoul-Henning Kamp 		break;
4368f8def9eSPoul-Henning Kamp 	case MD_PRELOAD:
4378f8def9eSPoul-Henning Kamp 		mdstart_preload(sc);
4388f8def9eSPoul-Henning Kamp 		break;
4398f8def9eSPoul-Henning Kamp 	case MD_VNODE:
4408f8def9eSPoul-Henning Kamp 		mdstart_vnode(sc);
4418f8def9eSPoul-Henning Kamp 		break;
4428f8def9eSPoul-Henning Kamp 	case MD_SWAP:
4438f8def9eSPoul-Henning Kamp 		mdstart_swap(sc);
4448f8def9eSPoul-Henning Kamp 		break;
4458f8def9eSPoul-Henning Kamp 	default:
4468f8def9eSPoul-Henning Kamp 		panic("Impossible md(type)");
4478f8def9eSPoul-Henning Kamp 		break;
4488f8def9eSPoul-Henning Kamp 	}
4498f8def9eSPoul-Henning Kamp 	sc->busy = 0;
4508f8def9eSPoul-Henning Kamp }
4518f8def9eSPoul-Henning Kamp 
4528f8def9eSPoul-Henning Kamp static struct md_s *
4538f8def9eSPoul-Henning Kamp mdfind(int unit)
4548f8def9eSPoul-Henning Kamp {
4558f8def9eSPoul-Henning Kamp 	struct md_s *sc;
4568f8def9eSPoul-Henning Kamp 
4578f8def9eSPoul-Henning Kamp 	/* XXX: LOCK(unique unit numbers) */
4583f54a085SPoul-Henning Kamp 	LIST_FOREACH(sc, &md_softc_list, list) {
4593f54a085SPoul-Henning Kamp 		if (sc->unit == unit)
4608f8def9eSPoul-Henning Kamp 			break;
4618f8def9eSPoul-Henning Kamp 	}
4628f8def9eSPoul-Henning Kamp 	/* XXX: UNLOCK(unique unit numbers) */
4638f8def9eSPoul-Henning Kamp 	return (sc);
4648f8def9eSPoul-Henning Kamp }
4658f8def9eSPoul-Henning Kamp 
4668f8def9eSPoul-Henning Kamp static struct md_s *
4678f8def9eSPoul-Henning Kamp mdnew(int unit)
4688f8def9eSPoul-Henning Kamp {
4698f8def9eSPoul-Henning Kamp 	struct md_s *sc;
4708f8def9eSPoul-Henning Kamp 	int max = -1;
4718f8def9eSPoul-Henning Kamp 
4728f8def9eSPoul-Henning Kamp 	/* XXX: LOCK(unique unit numbers) */
4738f8def9eSPoul-Henning Kamp 	LIST_FOREACH(sc, &md_softc_list, list) {
4748f8def9eSPoul-Henning Kamp 		if (sc->unit == unit) {
4758f8def9eSPoul-Henning Kamp 			/* XXX: UNLOCK(unique unit numbers) */
4763f54a085SPoul-Henning Kamp 			return (NULL);
4773f54a085SPoul-Henning Kamp 		}
4788f8def9eSPoul-Henning Kamp 		if (sc->unit > max)
4798f8def9eSPoul-Henning Kamp 			max = sc->unit;
4808f8def9eSPoul-Henning Kamp 	}
4818f8def9eSPoul-Henning Kamp 	if (unit == -1)
4828f8def9eSPoul-Henning Kamp 		unit = max + 1;
4838f8def9eSPoul-Henning Kamp 	if (unit > DKMAXUNIT)
4848f8def9eSPoul-Henning Kamp 		return (NULL);
4857cc0979fSDavid Malone 	MALLOC(sc, struct md_s *,sizeof(*sc), M_MD, M_WAITOK | M_ZERO);
4863f54a085SPoul-Henning Kamp 	sc->unit = unit;
4878f8def9eSPoul-Henning Kamp 	LIST_INSERT_HEAD(&md_softc_list, sc, list);
4888f8def9eSPoul-Henning Kamp 	/* XXX: UNLOCK(unique unit numbers) */
4898f8def9eSPoul-Henning Kamp 	return (sc);
4908f8def9eSPoul-Henning Kamp }
4918f8def9eSPoul-Henning Kamp 
4928f8def9eSPoul-Henning Kamp static void
4938f8def9eSPoul-Henning Kamp mddelete(struct md_s *sc)
4948f8def9eSPoul-Henning Kamp {
4958f8def9eSPoul-Henning Kamp 
4968f8def9eSPoul-Henning Kamp 	/* XXX: LOCK(unique unit numbers) */
4978f8def9eSPoul-Henning Kamp 	LIST_REMOVE(sc, list);
4988f8def9eSPoul-Henning Kamp 	/* XXX: UNLOCK(unique unit numbers) */
4998f8def9eSPoul-Henning Kamp 	FREE(sc, M_MD);
5008f8def9eSPoul-Henning Kamp }
5018f8def9eSPoul-Henning Kamp 
5028f8def9eSPoul-Henning Kamp static void
5038f8def9eSPoul-Henning Kamp mdinit(struct md_s *sc)
5048f8def9eSPoul-Henning Kamp {
5058f8def9eSPoul-Henning Kamp 
5068177437dSPoul-Henning Kamp 	bioq_init(&sc->bio_queue);
50795f1a897SPoul-Henning Kamp 	devstat_add_entry(&sc->stats, "md", sc->unit, DEV_BSIZE,
50895f1a897SPoul-Henning Kamp 		DEVSTAT_NO_ORDERED_TAGS,
50971e4fff8SPoul-Henning Kamp 		DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
51071e4fff8SPoul-Henning Kamp 		DEVSTAT_PRIORITY_OTHER);
5110cfaeeeeSPoul-Henning Kamp 	sc->dev = disk_create(sc->unit, &sc->disk, 0, &md_cdevsw, &mddisk_cdevsw);
51295f1a897SPoul-Henning Kamp 	sc->dev->si_drv1 = sc;
51371e4fff8SPoul-Henning Kamp }
51471e4fff8SPoul-Henning Kamp 
51571e4fff8SPoul-Henning Kamp static void
51671e4fff8SPoul-Henning Kamp mdcreate_preload(u_char *image, unsigned length)
51771e4fff8SPoul-Henning Kamp {
51871e4fff8SPoul-Henning Kamp 	struct md_s *sc;
51971e4fff8SPoul-Henning Kamp 
5208f8def9eSPoul-Henning Kamp 	sc = mdnew(-1);
5218f8def9eSPoul-Henning Kamp 	if (sc == NULL)
5228f8def9eSPoul-Henning Kamp 		return;
52366c16191SPoul-Henning Kamp 	sc->type = MD_PRELOAD;
5248f8def9eSPoul-Henning Kamp 	sc->secsize = DEV_BSIZE;
52595f1a897SPoul-Henning Kamp 	sc->nsect = length / DEV_BSIZE;
52695f1a897SPoul-Henning Kamp 	sc->pl_ptr = image;
52795f1a897SPoul-Henning Kamp 	sc->pl_len = length;
52871e4fff8SPoul-Henning Kamp 	if (sc->unit == 0)
52971e4fff8SPoul-Henning Kamp 		mdrootready = 1;
5308f8def9eSPoul-Henning Kamp 	mdinit(sc);
53195f1a897SPoul-Henning Kamp }
53295f1a897SPoul-Henning Kamp 
5338f8def9eSPoul-Henning Kamp static int
5348f8def9eSPoul-Henning Kamp mdcreate_malloc(struct md_ioctl *mdio)
53595f1a897SPoul-Henning Kamp {
53695f1a897SPoul-Henning Kamp 	struct md_s *sc;
5378f8def9eSPoul-Henning Kamp 	unsigned u;
53895f1a897SPoul-Henning Kamp 
5398f8def9eSPoul-Henning Kamp 	if (mdio->md_size == 0)
5408f8def9eSPoul-Henning Kamp 		return(EINVAL);
5418f8def9eSPoul-Henning Kamp 	if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
5428f8def9eSPoul-Henning Kamp 		return(EINVAL);
5438f8def9eSPoul-Henning Kamp 	/* Compression doesn't make sense if we have reserved space */
5448f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_RESERVE)
5458f8def9eSPoul-Henning Kamp 		mdio->md_options &= ~MD_COMPRESS;
5468f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_AUTOUNIT) {
5478f8def9eSPoul-Henning Kamp 		sc = mdnew(-1);
5483f54a085SPoul-Henning Kamp 		if (sc == NULL)
5498f8def9eSPoul-Henning Kamp 			return (ENOMEM);
5508f8def9eSPoul-Henning Kamp 		mdio->md_unit = sc->unit;
5518f8def9eSPoul-Henning Kamp 	} else {
5528f8def9eSPoul-Henning Kamp 		sc = mdnew(mdio->md_unit);
5538f8def9eSPoul-Henning Kamp 		if (sc == NULL)
5548f8def9eSPoul-Henning Kamp 			return (EBUSY);
5558f8def9eSPoul-Henning Kamp 	}
55666c16191SPoul-Henning Kamp 	sc->type = MD_MALLOC;
5578f8def9eSPoul-Henning Kamp 	sc->secsize = DEV_BSIZE;
5588f8def9eSPoul-Henning Kamp 	sc->nsect = mdio->md_size;
5598f8def9eSPoul-Henning Kamp 	sc->flags = mdio->md_options & MD_COMPRESS;
5608f8def9eSPoul-Henning Kamp 	if (!(mdio->md_options & MD_RESERVE)) {
5617cc0979fSDavid Malone 		MALLOC(sc->secp, u_char **, sizeof(u_char *), M_MD, M_WAITOK | M_ZERO);
56200a6a3c6SPoul-Henning Kamp 		sc->nsecp = 1;
5638f8def9eSPoul-Henning Kamp 	} else {
5648f8def9eSPoul-Henning Kamp 		MALLOC(sc->secp, u_char **, sc->nsect * sizeof(u_char *), M_MD, M_WAITOK | M_ZERO);
5658f8def9eSPoul-Henning Kamp 		sc->nsecp = sc->nsect;
5668f8def9eSPoul-Henning Kamp 		for (u = 0; u < sc->nsect; u++)
5678f8def9eSPoul-Henning Kamp 			MALLOC(sc->secp[u], u_char *, DEV_BSIZE, M_MDSECT, M_WAITOK | M_ZERO);
5688f8def9eSPoul-Henning Kamp 	}
5690cfaeeeeSPoul-Henning Kamp 	printf("md%d: Malloc disk\n", sc->unit);
5708f8def9eSPoul-Henning Kamp 	mdinit(sc);
5718f8def9eSPoul-Henning Kamp 	return (0);
57200a6a3c6SPoul-Henning Kamp }
57300a6a3c6SPoul-Henning Kamp 
5743f54a085SPoul-Henning Kamp 
5758f8def9eSPoul-Henning Kamp static int
5768f8def9eSPoul-Henning Kamp mdsetcred(struct md_s *sc, struct ucred *cred)
5778f8def9eSPoul-Henning Kamp {
5788f8def9eSPoul-Henning Kamp 	char *tmpbuf;
5798f8def9eSPoul-Henning Kamp 	int error = 0;
5808f8def9eSPoul-Henning Kamp 
5813f54a085SPoul-Henning Kamp 	/*
5828f8def9eSPoul-Henning Kamp 	 * Set credits in our softc
5833f54a085SPoul-Henning Kamp 	 */
5848f8def9eSPoul-Henning Kamp 
5858f8def9eSPoul-Henning Kamp 	if (sc->cred)
5868f8def9eSPoul-Henning Kamp 		crfree(sc->cred);
5878f8def9eSPoul-Henning Kamp 	sc->cred = crdup(cred);
5888f8def9eSPoul-Henning Kamp 
5898f8def9eSPoul-Henning Kamp 	/*
5908f8def9eSPoul-Henning Kamp 	 * Horrible kludge to establish credentials for NFS  XXX.
5918f8def9eSPoul-Henning Kamp 	 */
5928f8def9eSPoul-Henning Kamp 
5938f8def9eSPoul-Henning Kamp 	if (sc->vnode) {
5948f8def9eSPoul-Henning Kamp 		struct uio auio;
5958f8def9eSPoul-Henning Kamp 		struct iovec aiov;
5968f8def9eSPoul-Henning Kamp 
5978f8def9eSPoul-Henning Kamp 		tmpbuf = malloc(sc->secsize, M_TEMP, M_WAITOK);
5988f8def9eSPoul-Henning Kamp 		bzero(&auio, sizeof(auio));
5998f8def9eSPoul-Henning Kamp 
6008f8def9eSPoul-Henning Kamp 		aiov.iov_base = tmpbuf;
6018f8def9eSPoul-Henning Kamp 		aiov.iov_len = sc->secsize;
6028f8def9eSPoul-Henning Kamp 		auio.uio_iov = &aiov;
6038f8def9eSPoul-Henning Kamp 		auio.uio_iovcnt = 1;
6048f8def9eSPoul-Henning Kamp 		auio.uio_offset = 0;
6058f8def9eSPoul-Henning Kamp 		auio.uio_rw = UIO_READ;
6068f8def9eSPoul-Henning Kamp 		auio.uio_segflg = UIO_SYSSPACE;
6078f8def9eSPoul-Henning Kamp 		auio.uio_resid = aiov.iov_len;
6088f8def9eSPoul-Henning Kamp 		vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
6098f8def9eSPoul-Henning Kamp 		error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
6108f8def9eSPoul-Henning Kamp 		VOP_UNLOCK(sc->vnode, 0, curproc);
6118f8def9eSPoul-Henning Kamp 		free(tmpbuf, M_TEMP);
6128f8def9eSPoul-Henning Kamp 	}
6138f8def9eSPoul-Henning Kamp 	return (error);
6148f8def9eSPoul-Henning Kamp }
6158f8def9eSPoul-Henning Kamp 
6168f8def9eSPoul-Henning Kamp static int
6178f8def9eSPoul-Henning Kamp mdcreate_vnode(struct md_ioctl *mdio, struct proc *p)
6188f8def9eSPoul-Henning Kamp {
6198f8def9eSPoul-Henning Kamp 	struct md_s *sc;
6208f8def9eSPoul-Henning Kamp 	struct vattr vattr;
6218f8def9eSPoul-Henning Kamp 	struct nameidata nd;
6228f8def9eSPoul-Henning Kamp 	int error, flags;
6238f8def9eSPoul-Henning Kamp 
6248f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_AUTOUNIT) {
6258f8def9eSPoul-Henning Kamp 		sc = mdnew(-1);
6268f8def9eSPoul-Henning Kamp 		mdio->md_unit = sc->unit;
6278f8def9eSPoul-Henning Kamp 	} else {
6288f8def9eSPoul-Henning Kamp 		sc = mdnew(mdio->md_unit);
6298f8def9eSPoul-Henning Kamp 	}
6308f8def9eSPoul-Henning Kamp 	if (sc == NULL)
6318f8def9eSPoul-Henning Kamp 		return (EBUSY);
6328f8def9eSPoul-Henning Kamp 
6338f8def9eSPoul-Henning Kamp 	sc->type = MD_VNODE;
6348f8def9eSPoul-Henning Kamp 
6358f8def9eSPoul-Henning Kamp 	flags = FREAD|FWRITE;
6368f8def9eSPoul-Henning Kamp 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, mdio->md_file, p);
6378f8def9eSPoul-Henning Kamp 	error = vn_open(&nd, &flags, 0);
6388f8def9eSPoul-Henning Kamp 	if (error) {
6398f8def9eSPoul-Henning Kamp 		if (error != EACCES && error != EPERM && error != EROFS)
6408f8def9eSPoul-Henning Kamp 			return (error);
6418f8def9eSPoul-Henning Kamp 		flags &= ~FWRITE;
6428f8def9eSPoul-Henning Kamp 		sc->flags |= MD_READONLY;
6438f8def9eSPoul-Henning Kamp 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, mdio->md_file, p);
6448f8def9eSPoul-Henning Kamp 		error = vn_open(&nd, &flags, 0);
6458f8def9eSPoul-Henning Kamp 		if (error)
6468f8def9eSPoul-Henning Kamp 			return (error);
6478f8def9eSPoul-Henning Kamp 	}
6488f8def9eSPoul-Henning Kamp 	NDFREE(&nd, NDF_ONLY_PNBUF);
6498f8def9eSPoul-Henning Kamp 	if (nd.ni_vp->v_type != VREG ||
6508f8def9eSPoul-Henning Kamp 	    (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p))) {
6518f8def9eSPoul-Henning Kamp 		VOP_UNLOCK(nd.ni_vp, 0, p);
6528f8def9eSPoul-Henning Kamp 		(void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
6538f8def9eSPoul-Henning Kamp 		return (error ? error : EINVAL);
6548f8def9eSPoul-Henning Kamp 	}
6558f8def9eSPoul-Henning Kamp 	VOP_UNLOCK(nd.ni_vp, 0, p);
6568f8def9eSPoul-Henning Kamp 	sc->secsize = DEV_BSIZE;
6578f8def9eSPoul-Henning Kamp 	sc->vnode = nd.ni_vp;
6588f8def9eSPoul-Henning Kamp 
6598f8def9eSPoul-Henning Kamp 	/*
6608f8def9eSPoul-Henning Kamp 	 * If the size is specified, override the file attributes.
6618f8def9eSPoul-Henning Kamp 	 */
6628f8def9eSPoul-Henning Kamp 	if (mdio->md_size)
6638f8def9eSPoul-Henning Kamp 		sc->nsect = mdio->md_size;
6648f8def9eSPoul-Henning Kamp 	else
6658f8def9eSPoul-Henning Kamp 		sc->nsect = vattr.va_size / sc->secsize; /* XXX: round up ? */
6668f8def9eSPoul-Henning Kamp 	error = mdsetcred(sc, p->p_ucred);
6678f8def9eSPoul-Henning Kamp 	if (error) {
6688f8def9eSPoul-Henning Kamp 		(void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
6698f8def9eSPoul-Henning Kamp 		return(error);
6708f8def9eSPoul-Henning Kamp 	}
6718f8def9eSPoul-Henning Kamp 	mdinit(sc);
6728f8def9eSPoul-Henning Kamp 	return (0);
6738f8def9eSPoul-Henning Kamp }
6748f8def9eSPoul-Henning Kamp 
6758f8def9eSPoul-Henning Kamp static int
6768f8def9eSPoul-Henning Kamp mddestroy(struct md_s *sc, struct md_ioctl *mdio, struct proc *p)
6778f8def9eSPoul-Henning Kamp {
6788f8def9eSPoul-Henning Kamp 	unsigned u;
6798f8def9eSPoul-Henning Kamp 
6808f8def9eSPoul-Henning Kamp 	if (sc->dev != NULL)
6818f8def9eSPoul-Henning Kamp 		disk_destroy(sc->dev);
6828f8def9eSPoul-Henning Kamp 	if (sc->vnode != NULL)
6838f8def9eSPoul-Henning Kamp 		(void)vn_close(sc->vnode, sc->flags & MD_READONLY ?  FREAD : (FREAD|FWRITE), sc->cred, p);
6848f8def9eSPoul-Henning Kamp 	if (sc->cred != NULL)
6858f8def9eSPoul-Henning Kamp 		crfree(sc->cred);
6868f8def9eSPoul-Henning Kamp 	if (sc->object != NULL)
6878f8def9eSPoul-Henning Kamp 		vm_pager_deallocate(sc->object);
6888f8def9eSPoul-Henning Kamp 	if (sc->secp != NULL) {
6898f8def9eSPoul-Henning Kamp 		for (u = 0; u < sc->nsecp; u++)
6908f8def9eSPoul-Henning Kamp 			if ((uintptr_t)sc->secp[u] > 255)
6918f8def9eSPoul-Henning Kamp 				FREE(sc->secp[u], M_MDSECT);
6928f8def9eSPoul-Henning Kamp 		FREE(sc->secp, M_MD);
6938f8def9eSPoul-Henning Kamp 	}
6948f8def9eSPoul-Henning Kamp 	mddelete(sc);
6958f8def9eSPoul-Henning Kamp 	return (0);
6968f8def9eSPoul-Henning Kamp }
6978f8def9eSPoul-Henning Kamp 
6988f8def9eSPoul-Henning Kamp static int
6998f8def9eSPoul-Henning Kamp mdcreate_swap(struct md_ioctl *mdio, struct proc *p)
7008f8def9eSPoul-Henning Kamp {
7018f8def9eSPoul-Henning Kamp 	int error;
7028f8def9eSPoul-Henning Kamp 	struct md_s *sc;
7038f8def9eSPoul-Henning Kamp 
7048f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_AUTOUNIT) {
7058f8def9eSPoul-Henning Kamp 		sc = mdnew(-1);
7068f8def9eSPoul-Henning Kamp 		mdio->md_unit = sc->unit;
7078f8def9eSPoul-Henning Kamp 	} else {
7088f8def9eSPoul-Henning Kamp 		sc = mdnew(mdio->md_unit);
7098f8def9eSPoul-Henning Kamp 	}
7108f8def9eSPoul-Henning Kamp 	if (sc == NULL)
7118f8def9eSPoul-Henning Kamp 		return (EBUSY);
7128f8def9eSPoul-Henning Kamp 
7138f8def9eSPoul-Henning Kamp 	sc->type = MD_SWAP;
7148f8def9eSPoul-Henning Kamp 
7158f8def9eSPoul-Henning Kamp 	/*
7168f8def9eSPoul-Henning Kamp 	 * Range check.  Disallow negative sizes or any size less then the
7178f8def9eSPoul-Henning Kamp 	 * size of a page.  Then round to a page.
7188f8def9eSPoul-Henning Kamp 	 */
7198f8def9eSPoul-Henning Kamp 
7208f8def9eSPoul-Henning Kamp 	if (mdio->md_size == 0)
7218f8def9eSPoul-Henning Kamp 		return(EDOM);
7228f8def9eSPoul-Henning Kamp 
7238f8def9eSPoul-Henning Kamp 	/*
7248f8def9eSPoul-Henning Kamp 	 * Allocate an OBJT_SWAP object.
7258f8def9eSPoul-Henning Kamp 	 *
7268f8def9eSPoul-Henning Kamp 	 * sc_secsize is PAGE_SIZE'd
7278f8def9eSPoul-Henning Kamp 	 *
7288f8def9eSPoul-Henning Kamp 	 * mdio->size is in DEV_BSIZE'd chunks.
7298f8def9eSPoul-Henning Kamp 	 * Note the truncation.
7308f8def9eSPoul-Henning Kamp 	 */
7318f8def9eSPoul-Henning Kamp 
7328f8def9eSPoul-Henning Kamp 	sc->secsize = PAGE_SIZE;
7338f8def9eSPoul-Henning Kamp 	sc->nsect = mdio->md_size / (PAGE_SIZE / DEV_BSIZE);
7348f8def9eSPoul-Henning Kamp 	sc->object = vm_pager_allocate(OBJT_SWAP, NULL, sc->secsize * (vm_offset_t)sc->nsect, VM_PROT_DEFAULT, 0);
7358f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_RESERVE) {
7368f8def9eSPoul-Henning Kamp 		if (swap_pager_reserve(sc->object, 0, sc->nsect) < 0) {
7378f8def9eSPoul-Henning Kamp 			vm_pager_deallocate(sc->object);
7388f8def9eSPoul-Henning Kamp 			sc->object = NULL;
7398f8def9eSPoul-Henning Kamp 			return(EDOM);
7408f8def9eSPoul-Henning Kamp 		}
7418f8def9eSPoul-Henning Kamp 	}
7428f8def9eSPoul-Henning Kamp 	error = mdsetcred(sc, p->p_ucred);
7438f8def9eSPoul-Henning Kamp 	if (error)
7448f8def9eSPoul-Henning Kamp 		mddestroy(sc, mdio, p);
7458f8def9eSPoul-Henning Kamp 	else
7468f8def9eSPoul-Henning Kamp 		mdinit(sc);
7478f8def9eSPoul-Henning Kamp 	return(error);
7488f8def9eSPoul-Henning Kamp }
7498f8def9eSPoul-Henning Kamp 
7508f8def9eSPoul-Henning Kamp static int
7518f8def9eSPoul-Henning Kamp mdctlioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
7528f8def9eSPoul-Henning Kamp {
7538f8def9eSPoul-Henning Kamp 	struct md_ioctl *mdio;
7548f8def9eSPoul-Henning Kamp 	struct md_s *sc;
7558f8def9eSPoul-Henning Kamp 
7568f8def9eSPoul-Henning Kamp 	if (md_debug)
7578f8def9eSPoul-Henning Kamp 		printf("mdctlioctl(%s %lx %p %x %p)\n",
7588f8def9eSPoul-Henning Kamp 			devtoname(dev), cmd, addr, flags, p);
7598f8def9eSPoul-Henning Kamp 
7608f8def9eSPoul-Henning Kamp 	mdio = (struct md_ioctl *)addr;
7618f8def9eSPoul-Henning Kamp 	if (mdio->md_type != MD_VNODE)
7628f8def9eSPoul-Henning Kamp 		mdio->md_file[0] = '\0';
7638f8def9eSPoul-Henning Kamp 	switch (cmd) {
7648f8def9eSPoul-Henning Kamp 	case MDIOCATTACH:
7658f8def9eSPoul-Henning Kamp 		printf("A: u %u t %d n %s s %u o %u\n", mdio->md_unit,
7668f8def9eSPoul-Henning Kamp 		    mdio->md_type, mdio->md_file, mdio->md_size,
7678f8def9eSPoul-Henning Kamp 		    mdio->md_options);
7688f8def9eSPoul-Henning Kamp 		switch (mdio->md_type) {
7698f8def9eSPoul-Henning Kamp 		case MD_MALLOC:
7708f8def9eSPoul-Henning Kamp 			return(mdcreate_malloc(mdio));
7718f8def9eSPoul-Henning Kamp 		case MD_PRELOAD:
7728f8def9eSPoul-Henning Kamp 			return (EINVAL);
7738f8def9eSPoul-Henning Kamp 		case MD_VNODE:
7748f8def9eSPoul-Henning Kamp 			return(mdcreate_vnode(mdio, p));
7758f8def9eSPoul-Henning Kamp 		case MD_SWAP:
7768f8def9eSPoul-Henning Kamp 			return(mdcreate_swap(mdio, p));
7778f8def9eSPoul-Henning Kamp 		default:
7788f8def9eSPoul-Henning Kamp 			return (EINVAL);
7798f8def9eSPoul-Henning Kamp 		}
7808f8def9eSPoul-Henning Kamp 	case MDIOCDETACH:
7818f8def9eSPoul-Henning Kamp 		printf("D: u %u t %d n %s s %u o %u\n", mdio->md_unit,
7828f8def9eSPoul-Henning Kamp 		    mdio->md_type, mdio->md_file, mdio->md_size,
7838f8def9eSPoul-Henning Kamp 		    mdio->md_options);
7848f8def9eSPoul-Henning Kamp 		if (*mdio->md_file != '\0')
7858f8def9eSPoul-Henning Kamp 			return(EINVAL);
7868f8def9eSPoul-Henning Kamp 		if (mdio->md_size != 0)
7878f8def9eSPoul-Henning Kamp 			return(EINVAL);
7888f8def9eSPoul-Henning Kamp 		if (mdio->md_options != 0)
7898f8def9eSPoul-Henning Kamp 			return(EINVAL);
7908f8def9eSPoul-Henning Kamp 		sc = mdfind(mdio->md_unit);
7918f8def9eSPoul-Henning Kamp 		if (sc == NULL)
7928f8def9eSPoul-Henning Kamp 			return (ENOENT);
7938f8def9eSPoul-Henning Kamp 		switch(sc->type) {
7948f8def9eSPoul-Henning Kamp 		case MD_VNODE:
7958f8def9eSPoul-Henning Kamp 		case MD_SWAP:
7968f8def9eSPoul-Henning Kamp 		case MD_MALLOC:
7978f8def9eSPoul-Henning Kamp 			return(mddestroy(sc, mdio, p));
7988f8def9eSPoul-Henning Kamp 		default:
7998f8def9eSPoul-Henning Kamp 			return (EOPNOTSUPP);
8008f8def9eSPoul-Henning Kamp 		}
8018f8def9eSPoul-Henning Kamp 	default:
8028f8def9eSPoul-Henning Kamp 		return (ENOIOCTL);
8038f8def9eSPoul-Henning Kamp 	};
8048f8def9eSPoul-Henning Kamp 	return (ENOIOCTL);
8053f54a085SPoul-Henning Kamp }
8063f54a085SPoul-Henning Kamp 
80700a6a3c6SPoul-Henning Kamp static void
80800a6a3c6SPoul-Henning Kamp md_drvinit(void *unused)
80900a6a3c6SPoul-Henning Kamp {
81000a6a3c6SPoul-Henning Kamp 
81195f1a897SPoul-Henning Kamp 	caddr_t mod;
81295f1a897SPoul-Henning Kamp 	caddr_t c;
81395f1a897SPoul-Henning Kamp 	u_char *ptr, *name, *type;
81495f1a897SPoul-Henning Kamp 	unsigned len;
81595f1a897SPoul-Henning Kamp 
81671e4fff8SPoul-Henning Kamp #ifdef MD_ROOT_SIZE
81771e4fff8SPoul-Henning Kamp 	mdcreate_preload(mfs_root, MD_ROOT_SIZE*1024);
81871e4fff8SPoul-Henning Kamp #endif
81995f1a897SPoul-Henning Kamp 	mod = NULL;
82095f1a897SPoul-Henning Kamp 	while ((mod = preload_search_next_name(mod)) != NULL) {
82195f1a897SPoul-Henning Kamp 		name = (char *)preload_search_info(mod, MODINFO_NAME);
82295f1a897SPoul-Henning Kamp 		type = (char *)preload_search_info(mod, MODINFO_TYPE);
82395f1a897SPoul-Henning Kamp 		if (name == NULL)
82495f1a897SPoul-Henning Kamp 			continue;
82595f1a897SPoul-Henning Kamp 		if (type == NULL)
82695f1a897SPoul-Henning Kamp 			continue;
82771e4fff8SPoul-Henning Kamp 		if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
82895f1a897SPoul-Henning Kamp 			continue;
82995f1a897SPoul-Henning Kamp 		c = preload_search_info(mod, MODINFO_ADDR);
83095f1a897SPoul-Henning Kamp 		ptr = *(u_char **)c;
83195f1a897SPoul-Henning Kamp 		c = preload_search_info(mod, MODINFO_SIZE);
83295f1a897SPoul-Henning Kamp 		len = *(unsigned *)c;
83395f1a897SPoul-Henning Kamp 		printf("md%d: Preloaded image <%s> %d bytes at %p\n",
83495f1a897SPoul-Henning Kamp 		   mdunits, name, len, ptr);
83595f1a897SPoul-Henning Kamp 		mdcreate_preload(ptr, len);
83695f1a897SPoul-Henning Kamp 	}
8378f8def9eSPoul-Henning Kamp 	make_dev(&mdctl_cdevsw, 0xffff00ff, UID_ROOT, GID_WHEEL, 0600, "mdctl");
83800a6a3c6SPoul-Henning Kamp }
83900a6a3c6SPoul-Henning Kamp 
84071e4fff8SPoul-Henning Kamp SYSINIT(mddev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR, md_drvinit,NULL)
84100a6a3c6SPoul-Henning Kamp 
84271e4fff8SPoul-Henning Kamp #ifdef MD_ROOT
84371e4fff8SPoul-Henning Kamp static void
84471e4fff8SPoul-Henning Kamp md_takeroot(void *junk)
84571e4fff8SPoul-Henning Kamp {
84671e4fff8SPoul-Henning Kamp 	if (mdrootready)
84771e4fff8SPoul-Henning Kamp 		rootdevnames[0] = "ufs:/dev/md0c";
84871e4fff8SPoul-Henning Kamp }
84971e4fff8SPoul-Henning Kamp 
85071e4fff8SPoul-Henning Kamp SYSINIT(md_root, SI_SUB_MOUNT_ROOT, SI_ORDER_FIRST, md_takeroot, NULL);
85171e4fff8SPoul-Henning Kamp #endif
8523f54a085SPoul-Henning Kamp 
853