xref: /freebsd/sys/dev/md/md.c (revision 57e9624ec9c9570ddbc14cf8b37f9dd1b03b5f55)
100a6a3c6SPoul-Henning Kamp /*
200a6a3c6SPoul-Henning Kamp  * ----------------------------------------------------------------------------
300a6a3c6SPoul-Henning Kamp  * "THE BEER-WARE LICENSE" (Revision 42):
400a6a3c6SPoul-Henning Kamp  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
500a6a3c6SPoul-Henning Kamp  * can do whatever you want with this stuff. If we meet some day, and you think
600a6a3c6SPoul-Henning Kamp  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
700a6a3c6SPoul-Henning Kamp  * ----------------------------------------------------------------------------
800a6a3c6SPoul-Henning Kamp  *
900a6a3c6SPoul-Henning Kamp  * $FreeBSD$
1000a6a3c6SPoul-Henning Kamp  *
1100a6a3c6SPoul-Henning Kamp  */
1200a6a3c6SPoul-Henning Kamp 
13637f671aSPoul-Henning Kamp /*
14637f671aSPoul-Henning Kamp  * The following functions are based in the vn(4) driver: mdstart_swap(),
15637f671aSPoul-Henning Kamp  * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
16637f671aSPoul-Henning Kamp  * and as such under the following copyright:
17637f671aSPoul-Henning Kamp  *
18637f671aSPoul-Henning Kamp  * Copyright (c) 1988 University of Utah.
19637f671aSPoul-Henning Kamp  * Copyright (c) 1990, 1993
20637f671aSPoul-Henning Kamp  *	The Regents of the University of California.  All rights reserved.
21637f671aSPoul-Henning Kamp  *
22637f671aSPoul-Henning Kamp  * This code is derived from software contributed to Berkeley by
23637f671aSPoul-Henning Kamp  * the Systems Programming Group of the University of Utah Computer
24637f671aSPoul-Henning Kamp  * Science Department.
25637f671aSPoul-Henning Kamp  *
26637f671aSPoul-Henning Kamp  * Redistribution and use in source and binary forms, with or without
27637f671aSPoul-Henning Kamp  * modification, are permitted provided that the following conditions
28637f671aSPoul-Henning Kamp  * are met:
29637f671aSPoul-Henning Kamp  * 1. Redistributions of source code must retain the above copyright
30637f671aSPoul-Henning Kamp  *    notice, this list of conditions and the following disclaimer.
31637f671aSPoul-Henning Kamp  * 2. Redistributions in binary form must reproduce the above copyright
32637f671aSPoul-Henning Kamp  *    notice, this list of conditions and the following disclaimer in the
33637f671aSPoul-Henning Kamp  *    documentation and/or other materials provided with the distribution.
34637f671aSPoul-Henning Kamp  * 3. All advertising materials mentioning features or use of this software
35637f671aSPoul-Henning Kamp  *    must display the following acknowledgement:
36637f671aSPoul-Henning Kamp  *	This product includes software developed by the University of
37637f671aSPoul-Henning Kamp  *	California, Berkeley and its contributors.
38637f671aSPoul-Henning Kamp  * 4. Neither the name of the University nor the names of its contributors
39637f671aSPoul-Henning Kamp  *    may be used to endorse or promote products derived from this software
40637f671aSPoul-Henning Kamp  *    without specific prior written permission.
41637f671aSPoul-Henning Kamp  *
42637f671aSPoul-Henning Kamp  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43637f671aSPoul-Henning Kamp  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44637f671aSPoul-Henning Kamp  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45637f671aSPoul-Henning Kamp  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46637f671aSPoul-Henning Kamp  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47637f671aSPoul-Henning Kamp  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48637f671aSPoul-Henning Kamp  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49637f671aSPoul-Henning Kamp  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50637f671aSPoul-Henning Kamp  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51637f671aSPoul-Henning Kamp  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52637f671aSPoul-Henning Kamp  * SUCH DAMAGE.
53637f671aSPoul-Henning Kamp  *
54637f671aSPoul-Henning Kamp  * from: Utah Hdr: vn.c 1.13 94/04/02
55637f671aSPoul-Henning Kamp  *
56637f671aSPoul-Henning Kamp  *	from: @(#)vn.c	8.6 (Berkeley) 4/1/94
57637f671aSPoul-Henning Kamp  * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
58637f671aSPoul-Henning Kamp  */
59637f671aSPoul-Henning Kamp 
6071e4fff8SPoul-Henning Kamp #include "opt_mfs.h"		/* We have adopted some tasks from MFS */
613f54a085SPoul-Henning Kamp #include "opt_md.h"
6271e4fff8SPoul-Henning Kamp 
6300a6a3c6SPoul-Henning Kamp #include <sys/param.h>
6400a6a3c6SPoul-Henning Kamp #include <sys/systm.h>
659626b608SPoul-Henning Kamp #include <sys/bio.h>
6600a6a3c6SPoul-Henning Kamp #include <sys/conf.h>
6700a6a3c6SPoul-Henning Kamp #include <sys/devicestat.h>
6827068b01SBrian Feldman #include <sys/disk.h>
6927068b01SBrian Feldman #include <sys/kernel.h>
7027068b01SBrian Feldman #include <sys/malloc.h>
7127068b01SBrian Feldman #include <sys/sysctl.h>
7295f1a897SPoul-Henning Kamp #include <sys/linker.h>
733f54a085SPoul-Henning Kamp #include <sys/queue.h>
748f8def9eSPoul-Henning Kamp #include <sys/mdioctl.h>
758f8def9eSPoul-Henning Kamp #include <sys/vnode.h>
768f8def9eSPoul-Henning Kamp #include <sys/namei.h>
778f8def9eSPoul-Henning Kamp #include <sys/fcntl.h>
788f8def9eSPoul-Henning Kamp #include <sys/proc.h>
798f8def9eSPoul-Henning Kamp #include <machine/atomic.h>
808f8def9eSPoul-Henning Kamp 
818f8def9eSPoul-Henning Kamp #include <vm/vm.h>
828f8def9eSPoul-Henning Kamp #include <vm/vm_object.h>
838f8def9eSPoul-Henning Kamp #include <vm/vm_page.h>
848f8def9eSPoul-Henning Kamp #include <vm/vm_pager.h>
858f8def9eSPoul-Henning Kamp #include <vm/vm_zone.h>
868f8def9eSPoul-Henning Kamp #include <vm/swap_pager.h>
873f54a085SPoul-Henning Kamp 
8857e9624eSPoul-Henning Kamp #define MD_MODVER 1
8957e9624eSPoul-Henning Kamp 
90f2744793SSheldon Hearn #ifndef MD_NSECT
91f2744793SSheldon Hearn #define MD_NSECT (10000 * 2)
9233edfabeSPoul-Henning Kamp #endif
9333edfabeSPoul-Henning Kamp 
9400a6a3c6SPoul-Henning Kamp MALLOC_DEFINE(M_MD, "MD disk", "Memory Disk");
9500a6a3c6SPoul-Henning Kamp MALLOC_DEFINE(M_MDSECT, "MD sectors", "Memory Disk Sectors");
9600a6a3c6SPoul-Henning Kamp 
9771e4fff8SPoul-Henning Kamp static int md_debug;
9800a6a3c6SPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
9900a6a3c6SPoul-Henning Kamp 
10071e4fff8SPoul-Henning Kamp #if defined(MFS_ROOT) && !defined(MD_ROOT)
10171e4fff8SPoul-Henning Kamp #define MD_ROOT MFS_ROOT
10271e4fff8SPoul-Henning Kamp #warning "option MFS_ROOT has been superceeded by MD_ROOT"
10371e4fff8SPoul-Henning Kamp #endif
10471e4fff8SPoul-Henning Kamp 
10571e4fff8SPoul-Henning Kamp #if defined(MFS_ROOT_SIZE) && !defined(MD_ROOT_SIZE)
10671e4fff8SPoul-Henning Kamp #define MD_ROOT_SIZE MFS_ROOT_SIZE
10771e4fff8SPoul-Henning Kamp #warning "option MFS_ROOT_SIZE has been superceeded by MD_ROOT_SIZE"
10871e4fff8SPoul-Henning Kamp #endif
10971e4fff8SPoul-Henning Kamp 
11071e4fff8SPoul-Henning Kamp #if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
11171e4fff8SPoul-Henning Kamp /* Image gets put here: */
11271e4fff8SPoul-Henning Kamp static u_char mfs_root[MD_ROOT_SIZE*1024] = "MFS Filesystem goes here";
11371e4fff8SPoul-Henning Kamp static u_char end_mfs_root[] __unused = "MFS Filesystem had better STOP here";
11471e4fff8SPoul-Henning Kamp #endif
11571e4fff8SPoul-Henning Kamp 
11671e4fff8SPoul-Henning Kamp static int	mdrootready;
1178f8def9eSPoul-Henning Kamp static int	mdunits;
11857e9624eSPoul-Henning Kamp static dev_t	status_dev = 0;
11957e9624eSPoul-Henning Kamp 
1200cfaeeeeSPoul-Henning Kamp 
12100a6a3c6SPoul-Henning Kamp #define CDEV_MAJOR	95
12200a6a3c6SPoul-Henning Kamp 
12300a6a3c6SPoul-Henning Kamp static d_strategy_t mdstrategy;
12400a6a3c6SPoul-Henning Kamp static d_open_t mdopen;
1258f8def9eSPoul-Henning Kamp static d_ioctl_t mdioctl, mdctlioctl;
12600a6a3c6SPoul-Henning Kamp 
12700a6a3c6SPoul-Henning Kamp static struct cdevsw md_cdevsw = {
12800a6a3c6SPoul-Henning Kamp         /* open */      mdopen,
12900a6a3c6SPoul-Henning Kamp         /* close */     nullclose,
13000a6a3c6SPoul-Henning Kamp         /* read */      physread,
13100a6a3c6SPoul-Henning Kamp         /* write */     physwrite,
13200a6a3c6SPoul-Henning Kamp         /* ioctl */     mdioctl,
13300a6a3c6SPoul-Henning Kamp         /* poll */      nopoll,
13400a6a3c6SPoul-Henning Kamp         /* mmap */      nommap,
13500a6a3c6SPoul-Henning Kamp         /* strategy */  mdstrategy,
13600a6a3c6SPoul-Henning Kamp         /* name */      "md",
13700a6a3c6SPoul-Henning Kamp         /* maj */       CDEV_MAJOR,
13800a6a3c6SPoul-Henning Kamp         /* dump */      nodump,
13900a6a3c6SPoul-Henning Kamp         /* psize */     nopsize,
14071e4fff8SPoul-Henning Kamp         /* flags */     D_DISK | D_CANFREE | D_MEMDISK,
1418f8def9eSPoul-Henning Kamp };
1428f8def9eSPoul-Henning Kamp 
1438f8def9eSPoul-Henning Kamp static struct cdevsw mdctl_cdevsw = {
1448f8def9eSPoul-Henning Kamp         /* open */      nullopen,
1458f8def9eSPoul-Henning Kamp         /* close */     nullclose,
1468f8def9eSPoul-Henning Kamp         /* read */      noread,
1478f8def9eSPoul-Henning Kamp         /* write */     nowrite,
1488f8def9eSPoul-Henning Kamp         /* ioctl */     mdctlioctl,
1498f8def9eSPoul-Henning Kamp         /* poll */      nopoll,
1508f8def9eSPoul-Henning Kamp         /* mmap */      nommap,
1518f8def9eSPoul-Henning Kamp         /* strategy */  nostrategy,
1528f8def9eSPoul-Henning Kamp         /* name */      "md",
1538f8def9eSPoul-Henning Kamp         /* maj */       CDEV_MAJOR
15400a6a3c6SPoul-Henning Kamp };
15500a6a3c6SPoul-Henning Kamp 
1560cfaeeeeSPoul-Henning Kamp static struct cdevsw mddisk_cdevsw;
1570cfaeeeeSPoul-Henning Kamp 
1583f54a085SPoul-Henning Kamp static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(&md_softc_list);
1593f54a085SPoul-Henning Kamp 
16000a6a3c6SPoul-Henning Kamp struct md_s {
16100a6a3c6SPoul-Henning Kamp 	int unit;
1623f54a085SPoul-Henning Kamp 	LIST_ENTRY(md_s) list;
16300a6a3c6SPoul-Henning Kamp 	struct devstat stats;
1648177437dSPoul-Henning Kamp 	struct bio_queue_head bio_queue;
16500a6a3c6SPoul-Henning Kamp 	struct disk disk;
16600a6a3c6SPoul-Henning Kamp 	dev_t dev;
16795f1a897SPoul-Henning Kamp 	int busy;
1688f8def9eSPoul-Henning Kamp 	enum md_types type;
16900a6a3c6SPoul-Henning Kamp 	unsigned nsect;
1708f8def9eSPoul-Henning Kamp 	unsigned secsize;
1718f8def9eSPoul-Henning Kamp 	unsigned flags;
17295f1a897SPoul-Henning Kamp 
17395f1a897SPoul-Henning Kamp 	/* MD_MALLOC related fields */
17400a6a3c6SPoul-Henning Kamp 	u_char **secp;
17500a6a3c6SPoul-Henning Kamp 
17695f1a897SPoul-Henning Kamp 	/* MD_PRELOAD related fields */
17795f1a897SPoul-Henning Kamp 	u_char *pl_ptr;
17895f1a897SPoul-Henning Kamp 	unsigned pl_len;
17900a6a3c6SPoul-Henning Kamp 
1808f8def9eSPoul-Henning Kamp 	/* MD_VNODE related fields */
1818f8def9eSPoul-Henning Kamp 	struct vnode *vnode;
1828f8def9eSPoul-Henning Kamp 	struct ucred *cred;
1838f8def9eSPoul-Henning Kamp 
1848f8def9eSPoul-Henning Kamp 	/* MD_OBJET related fields */
1858f8def9eSPoul-Henning Kamp 	vm_object_t object;
1868f8def9eSPoul-Henning Kamp };
18700a6a3c6SPoul-Henning Kamp 
18800a6a3c6SPoul-Henning Kamp static int
18900a6a3c6SPoul-Henning Kamp mdopen(dev_t dev, int flag, int fmt, struct proc *p)
19000a6a3c6SPoul-Henning Kamp {
19100a6a3c6SPoul-Henning Kamp 	struct md_s *sc;
19200a6a3c6SPoul-Henning Kamp 	struct disklabel *dl;
19300a6a3c6SPoul-Henning Kamp 
19400a6a3c6SPoul-Henning Kamp 	if (md_debug)
19500a6a3c6SPoul-Henning Kamp 		printf("mdopen(%s %x %x %p)\n",
19600a6a3c6SPoul-Henning Kamp 			devtoname(dev), flag, fmt, p);
19700a6a3c6SPoul-Henning Kamp 
19800a6a3c6SPoul-Henning Kamp 	sc = dev->si_drv1;
19900a6a3c6SPoul-Henning Kamp 
20000a6a3c6SPoul-Henning Kamp 	dl = &sc->disk.d_label;
20100a6a3c6SPoul-Henning Kamp 	bzero(dl, sizeof(*dl));
2028f8def9eSPoul-Henning Kamp 	dl->d_secsize = sc->secsize;
2038f8def9eSPoul-Henning Kamp 	dl->d_nsectors = sc->nsect > 1024 ? 1024 : sc->nsect;
20400a6a3c6SPoul-Henning Kamp 	dl->d_ntracks = 1;
20595f1a897SPoul-Henning Kamp 	dl->d_secpercyl = dl->d_nsectors * dl->d_ntracks;
20600a6a3c6SPoul-Henning Kamp 	dl->d_secperunit = sc->nsect;
20700a6a3c6SPoul-Henning Kamp 	dl->d_ncylinders = dl->d_secperunit / dl->d_secpercyl;
20800a6a3c6SPoul-Henning Kamp 	return (0);
20900a6a3c6SPoul-Henning Kamp }
21000a6a3c6SPoul-Henning Kamp 
21100a6a3c6SPoul-Henning Kamp static int
21200a6a3c6SPoul-Henning Kamp mdioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
21300a6a3c6SPoul-Henning Kamp {
21400a6a3c6SPoul-Henning Kamp 
21500a6a3c6SPoul-Henning Kamp 	if (md_debug)
21600a6a3c6SPoul-Henning Kamp 		printf("mdioctl(%s %lx %p %x %p)\n",
21700a6a3c6SPoul-Henning Kamp 			devtoname(dev), cmd, addr, flags, p);
21800a6a3c6SPoul-Henning Kamp 
21900a6a3c6SPoul-Henning Kamp 	return (ENOIOCTL);
22000a6a3c6SPoul-Henning Kamp }
22100a6a3c6SPoul-Henning Kamp 
22200a6a3c6SPoul-Henning Kamp static void
2238f8def9eSPoul-Henning Kamp mdstart_malloc(struct md_s *sc)
22400a6a3c6SPoul-Henning Kamp {
2258f8def9eSPoul-Henning Kamp 	int i;
2268f8def9eSPoul-Henning Kamp 	struct bio *bp;
22700a6a3c6SPoul-Henning Kamp 	devstat_trans_flags dop;
22800a6a3c6SPoul-Henning Kamp 	u_char *secp, **secpp, *dst;
22900a6a3c6SPoul-Henning Kamp 	unsigned secno, nsec, secval, uc;
23000a6a3c6SPoul-Henning Kamp 
2318f8def9eSPoul-Henning Kamp 	for (;;) {
2328f8def9eSPoul-Henning Kamp 		/* XXX: LOCK(unique unit numbers) */
2338177437dSPoul-Henning Kamp 		bp = bioq_first(&sc->bio_queue);
23400a6a3c6SPoul-Henning Kamp 		if (bp)
2358177437dSPoul-Henning Kamp 			bioq_remove(&sc->bio_queue, bp);
2368f8def9eSPoul-Henning Kamp 		/* XXX: UNLOCK(unique unit numbers) */
23700a6a3c6SPoul-Henning Kamp 		if (!bp)
23800a6a3c6SPoul-Henning Kamp 			break;
23900a6a3c6SPoul-Henning Kamp 
24000a6a3c6SPoul-Henning Kamp 		devstat_start_transaction(&sc->stats);
24100a6a3c6SPoul-Henning Kamp 
2428177437dSPoul-Henning Kamp 		if (bp->bio_cmd == BIO_DELETE)
24300a6a3c6SPoul-Henning Kamp 			dop = DEVSTAT_NO_DATA;
2448177437dSPoul-Henning Kamp 		else if (bp->bio_cmd == BIO_READ)
24500a6a3c6SPoul-Henning Kamp 			dop = DEVSTAT_READ;
24600a6a3c6SPoul-Henning Kamp 		else
24700a6a3c6SPoul-Henning Kamp 			dop = DEVSTAT_WRITE;
24800a6a3c6SPoul-Henning Kamp 
24996b6a55fSPoul-Henning Kamp 		nsec = bp->bio_bcount / sc->secsize;
2508177437dSPoul-Henning Kamp 		secno = bp->bio_pblkno;
2518177437dSPoul-Henning Kamp 		dst = bp->bio_data;
25200a6a3c6SPoul-Henning Kamp 		while (nsec--) {
25300a6a3c6SPoul-Henning Kamp 			secpp = &sc->secp[secno];
25421c3015aSDoug Rabson 			if ((uintptr_t)*secpp > 255) {
25500a6a3c6SPoul-Henning Kamp 				secp = *secpp;
25600a6a3c6SPoul-Henning Kamp 				secval = 0;
25700a6a3c6SPoul-Henning Kamp 			} else {
25896b6a55fSPoul-Henning Kamp 				secp = NULL;
25921c3015aSDoug Rabson 				secval = (uintptr_t) *secpp;
26000a6a3c6SPoul-Henning Kamp 			}
26196b6a55fSPoul-Henning Kamp 
26233edfabeSPoul-Henning Kamp 			if (md_debug > 2)
2638177437dSPoul-Henning Kamp 				printf("%x %p %p %d\n",
2648177437dSPoul-Henning Kamp 				    bp->bio_flags, secpp, secp, secval);
26500a6a3c6SPoul-Henning Kamp 
2668177437dSPoul-Henning Kamp 			if (bp->bio_cmd == BIO_DELETE) {
26796b6a55fSPoul-Henning Kamp 				if (!(sc->flags & MD_RESERVE) && secp != NULL) {
26800a6a3c6SPoul-Henning Kamp 					FREE(secp, M_MDSECT);
26900a6a3c6SPoul-Henning Kamp 					*secpp = 0;
27000a6a3c6SPoul-Henning Kamp 				}
2718177437dSPoul-Henning Kamp 			} else if (bp->bio_cmd == BIO_READ) {
27296b6a55fSPoul-Henning Kamp 				if (secp != NULL) {
27396b6a55fSPoul-Henning Kamp 					bcopy(secp, dst, sc->secsize);
27400a6a3c6SPoul-Henning Kamp 				} else if (secval) {
27596b6a55fSPoul-Henning Kamp 					for (i = 0; i < sc->secsize; i++)
27600a6a3c6SPoul-Henning Kamp 						dst[i] = secval;
27700a6a3c6SPoul-Henning Kamp 				} else {
27896b6a55fSPoul-Henning Kamp 					bzero(dst, sc->secsize);
27900a6a3c6SPoul-Henning Kamp 				}
28000a6a3c6SPoul-Henning Kamp 			} else {
2818f8def9eSPoul-Henning Kamp 				if (sc->flags & MD_COMPRESS) {
28200a6a3c6SPoul-Henning Kamp 					uc = dst[0];
28396b6a55fSPoul-Henning Kamp 					for (i = 1; i < sc->secsize; i++)
28400a6a3c6SPoul-Henning Kamp 						if (dst[i] != uc)
28500a6a3c6SPoul-Henning Kamp 							break;
2868f8def9eSPoul-Henning Kamp 				} else {
2878f8def9eSPoul-Henning Kamp 					i = 0;
2888f8def9eSPoul-Henning Kamp 					uc = 0;
2898f8def9eSPoul-Henning Kamp 				}
29096b6a55fSPoul-Henning Kamp 				if (i == sc->secsize) {
29100a6a3c6SPoul-Henning Kamp 					if (secp)
29200a6a3c6SPoul-Henning Kamp 						FREE(secp, M_MDSECT);
29321c3015aSDoug Rabson 					*secpp = (u_char *)(uintptr_t)uc;
29400a6a3c6SPoul-Henning Kamp 				} else {
29596b6a55fSPoul-Henning Kamp 					if (secp == NULL)
29696b6a55fSPoul-Henning Kamp 						MALLOC(secp, u_char *, sc->secsize, M_MDSECT, M_WAITOK);
29796b6a55fSPoul-Henning Kamp 					bcopy(dst, secp, sc->secsize);
29800a6a3c6SPoul-Henning Kamp 					*secpp = secp;
29900a6a3c6SPoul-Henning Kamp 				}
30000a6a3c6SPoul-Henning Kamp 			}
30100a6a3c6SPoul-Henning Kamp 			secno++;
30296b6a55fSPoul-Henning Kamp 			dst += sc->secsize;
30300a6a3c6SPoul-Henning Kamp 		}
3048177437dSPoul-Henning Kamp 		bp->bio_resid = 0;
3058177437dSPoul-Henning Kamp 		devstat_end_transaction_bio(&sc->stats, bp);
30600a6a3c6SPoul-Henning Kamp 		biodone(bp);
30700a6a3c6SPoul-Henning Kamp 	}
30800a6a3c6SPoul-Henning Kamp 	return;
30900a6a3c6SPoul-Henning Kamp }
31000a6a3c6SPoul-Henning Kamp 
31171e4fff8SPoul-Henning Kamp 
31295f1a897SPoul-Henning Kamp static void
3138f8def9eSPoul-Henning Kamp mdstart_preload(struct md_s *sc)
31471e4fff8SPoul-Henning Kamp {
3158f8def9eSPoul-Henning Kamp 	struct bio *bp;
31671e4fff8SPoul-Henning Kamp 	devstat_trans_flags dop;
31771e4fff8SPoul-Henning Kamp 
3188f8def9eSPoul-Henning Kamp 	for (;;) {
3198f8def9eSPoul-Henning Kamp 		/* XXX: LOCK(unique unit numbers) */
3208177437dSPoul-Henning Kamp 		bp = bioq_first(&sc->bio_queue);
32171e4fff8SPoul-Henning Kamp 		if (bp)
3228177437dSPoul-Henning Kamp 			bioq_remove(&sc->bio_queue, bp);
3238f8def9eSPoul-Henning Kamp 		/* XXX: UNLOCK(unique unit numbers) */
32471e4fff8SPoul-Henning Kamp 		if (!bp)
32571e4fff8SPoul-Henning Kamp 			break;
32671e4fff8SPoul-Henning Kamp 
32771e4fff8SPoul-Henning Kamp 		devstat_start_transaction(&sc->stats);
32871e4fff8SPoul-Henning Kamp 
3298177437dSPoul-Henning Kamp 		if (bp->bio_cmd == BIO_DELETE) {
33071e4fff8SPoul-Henning Kamp 			dop = DEVSTAT_NO_DATA;
3318177437dSPoul-Henning Kamp 		} else if (bp->bio_cmd == BIO_READ) {
33271e4fff8SPoul-Henning Kamp 			dop = DEVSTAT_READ;
3338177437dSPoul-Henning Kamp 			bcopy(sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_data, bp->bio_bcount);
33471e4fff8SPoul-Henning Kamp 		} else {
33571e4fff8SPoul-Henning Kamp 			dop = DEVSTAT_WRITE;
3368177437dSPoul-Henning Kamp 			bcopy(bp->bio_data, sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_bcount);
33771e4fff8SPoul-Henning Kamp 		}
3388177437dSPoul-Henning Kamp 		bp->bio_resid = 0;
3398177437dSPoul-Henning Kamp 		devstat_end_transaction_bio(&sc->stats, bp);
34071e4fff8SPoul-Henning Kamp 		biodone(bp);
34171e4fff8SPoul-Henning Kamp 	}
34271e4fff8SPoul-Henning Kamp 	return;
34371e4fff8SPoul-Henning Kamp }
34471e4fff8SPoul-Henning Kamp 
3458f8def9eSPoul-Henning Kamp static void
3468f8def9eSPoul-Henning Kamp mdstart_vnode(struct md_s *sc)
3478f8def9eSPoul-Henning Kamp {
3488f8def9eSPoul-Henning Kamp 	int error;
3498f8def9eSPoul-Henning Kamp 	struct bio *bp;
3508f8def9eSPoul-Henning Kamp 	struct uio auio;
3518f8def9eSPoul-Henning Kamp 	struct iovec aiov;
3528f8def9eSPoul-Henning Kamp 	struct mount *mp;
3538f8def9eSPoul-Henning Kamp 
3548f8def9eSPoul-Henning Kamp 	/*
3558f8def9eSPoul-Henning Kamp 	 * VNODE I/O
3568f8def9eSPoul-Henning Kamp 	 *
3578f8def9eSPoul-Henning Kamp 	 * If an error occurs, we set BIO_ERROR but we do not set
3588f8def9eSPoul-Henning Kamp 	 * B_INVAL because (for a write anyway), the buffer is
3598f8def9eSPoul-Henning Kamp 	 * still valid.
3608f8def9eSPoul-Henning Kamp 	 */
3618f8def9eSPoul-Henning Kamp 
3628f8def9eSPoul-Henning Kamp 	for (;;) {
3638f8def9eSPoul-Henning Kamp 		/* XXX: LOCK(unique unit numbers) */
3648f8def9eSPoul-Henning Kamp 		bp = bioq_first(&sc->bio_queue);
3658f8def9eSPoul-Henning Kamp 		if (bp)
3668f8def9eSPoul-Henning Kamp 			bioq_remove(&sc->bio_queue, bp);
3678f8def9eSPoul-Henning Kamp 		/* XXX: UNLOCK(unique unit numbers) */
3688f8def9eSPoul-Henning Kamp 		if (!bp)
3698f8def9eSPoul-Henning Kamp 			break;
3708f8def9eSPoul-Henning Kamp 
3718f8def9eSPoul-Henning Kamp 		devstat_start_transaction(&sc->stats);
3728f8def9eSPoul-Henning Kamp 
3738f8def9eSPoul-Henning Kamp 		bzero(&auio, sizeof(auio));
3748f8def9eSPoul-Henning Kamp 
3758f8def9eSPoul-Henning Kamp 		aiov.iov_base = bp->bio_data;
3768f8def9eSPoul-Henning Kamp 		aiov.iov_len = bp->bio_bcount;
3778f8def9eSPoul-Henning Kamp 		auio.uio_iov = &aiov;
3788f8def9eSPoul-Henning Kamp 		auio.uio_iovcnt = 1;
3798f8def9eSPoul-Henning Kamp 		auio.uio_offset = (vm_ooffset_t)bp->bio_pblkno * sc->secsize;
3808f8def9eSPoul-Henning Kamp 		auio.uio_segflg = UIO_SYSSPACE;
3818f8def9eSPoul-Henning Kamp 		if(bp->bio_cmd == BIO_READ)
3828f8def9eSPoul-Henning Kamp 			auio.uio_rw = UIO_READ;
3838f8def9eSPoul-Henning Kamp 		else
3848f8def9eSPoul-Henning Kamp 			auio.uio_rw = UIO_WRITE;
3858f8def9eSPoul-Henning Kamp 		auio.uio_resid = bp->bio_bcount;
3868f8def9eSPoul-Henning Kamp 		auio.uio_procp = curproc;
3878f8def9eSPoul-Henning Kamp 		if (VOP_ISLOCKED(sc->vnode, NULL))
3888f8def9eSPoul-Henning Kamp 			vprint("unexpected vn driver lock", sc->vnode);
3898f8def9eSPoul-Henning Kamp 		if (bp->bio_cmd == BIO_READ) {
3908f8def9eSPoul-Henning Kamp 			vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
3918f8def9eSPoul-Henning Kamp 			error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
3928f8def9eSPoul-Henning Kamp 		} else {
3938f8def9eSPoul-Henning Kamp 			(void) vn_start_write(sc->vnode, &mp, V_WAIT);
3948f8def9eSPoul-Henning Kamp 			vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
3958f8def9eSPoul-Henning Kamp 			error = VOP_WRITE(sc->vnode, &auio, 0, sc->cred);
3968f8def9eSPoul-Henning Kamp 			vn_finished_write(mp);
3978f8def9eSPoul-Henning Kamp 		}
3988f8def9eSPoul-Henning Kamp 		VOP_UNLOCK(sc->vnode, 0, curproc);
3998f8def9eSPoul-Henning Kamp 		bp->bio_resid = auio.uio_resid;
4008f8def9eSPoul-Henning Kamp 
4018f8def9eSPoul-Henning Kamp 		if (error) {
4028f8def9eSPoul-Henning Kamp 			bp->bio_error = error;
4038f8def9eSPoul-Henning Kamp 			bp->bio_flags |= BIO_ERROR;
4048f8def9eSPoul-Henning Kamp 		}
4058f8def9eSPoul-Henning Kamp 		devstat_end_transaction_bio(&sc->stats, bp);
4068f8def9eSPoul-Henning Kamp 		biodone(bp);
4078f8def9eSPoul-Henning Kamp 	}
4088f8def9eSPoul-Henning Kamp 	return;
4098f8def9eSPoul-Henning Kamp }
4108f8def9eSPoul-Henning Kamp 
4118f8def9eSPoul-Henning Kamp static void
4128f8def9eSPoul-Henning Kamp mdstart_swap(struct md_s *sc)
4138f8def9eSPoul-Henning Kamp {
4148f8def9eSPoul-Henning Kamp 	struct bio *bp;
4158f8def9eSPoul-Henning Kamp 
4168f8def9eSPoul-Henning Kamp 	for (;;) {
4178f8def9eSPoul-Henning Kamp 		/* XXX: LOCK(unique unit numbers) */
4188f8def9eSPoul-Henning Kamp 		bp = bioq_first(&sc->bio_queue);
4198f8def9eSPoul-Henning Kamp 		if (bp)
4208f8def9eSPoul-Henning Kamp 			bioq_remove(&sc->bio_queue, bp);
4218f8def9eSPoul-Henning Kamp 		/* XXX: UNLOCK(unique unit numbers) */
4228f8def9eSPoul-Henning Kamp 		if (!bp)
4238f8def9eSPoul-Henning Kamp 			break;
4248f8def9eSPoul-Henning Kamp 
4258f8def9eSPoul-Henning Kamp #if 0
4268f8def9eSPoul-Henning Kamp 		devstat_start_transaction(&sc->stats);
4278f8def9eSPoul-Henning Kamp #endif
4288f8def9eSPoul-Henning Kamp 
4298f8def9eSPoul-Henning Kamp 		if ((bp->bio_cmd == BIO_DELETE) && (sc->flags & MD_RESERVE))
4308f8def9eSPoul-Henning Kamp 			biodone(bp);
4318f8def9eSPoul-Henning Kamp 		else
4328f8def9eSPoul-Henning Kamp 			vm_pager_strategy(sc->object, bp);
4338f8def9eSPoul-Henning Kamp 
4348f8def9eSPoul-Henning Kamp #if 0
4358f8def9eSPoul-Henning Kamp 		devstat_end_transaction_bio(&sc->stats, bp);
4368f8def9eSPoul-Henning Kamp #endif
4378f8def9eSPoul-Henning Kamp 	}
4388f8def9eSPoul-Henning Kamp 	return;
4398f8def9eSPoul-Henning Kamp }
4408f8def9eSPoul-Henning Kamp 
4418f8def9eSPoul-Henning Kamp static void
4428f8def9eSPoul-Henning Kamp mdstrategy(struct bio *bp)
44300a6a3c6SPoul-Henning Kamp {
44400a6a3c6SPoul-Henning Kamp 	struct md_s *sc;
44500a6a3c6SPoul-Henning Kamp 
4468f8def9eSPoul-Henning Kamp 	if (md_debug > 1)
4478f8def9eSPoul-Henning Kamp 		printf("mdstrategy(%p) %s %x, %d, %ld, %p)\n",
4488f8def9eSPoul-Henning Kamp 		    bp, devtoname(bp->bio_dev), bp->bio_flags, bp->bio_blkno,
4498f8def9eSPoul-Henning Kamp 		    bp->bio_bcount / DEV_BSIZE, bp->bio_data);
4508f8def9eSPoul-Henning Kamp 
4518f8def9eSPoul-Henning Kamp 	sc = bp->bio_dev->si_drv1;
4528f8def9eSPoul-Henning Kamp 
4538f8def9eSPoul-Henning Kamp 	/* XXX: LOCK(sc->lock) */
4548f8def9eSPoul-Henning Kamp 	bioqdisksort(&sc->bio_queue, bp);
4558f8def9eSPoul-Henning Kamp 	/* XXX: UNLOCK(sc->lock) */
4568f8def9eSPoul-Henning Kamp 
4578f8def9eSPoul-Henning Kamp 	if (atomic_cmpset_int(&sc->busy, 0, 1) == 0)
4588f8def9eSPoul-Henning Kamp 		return;
4598f8def9eSPoul-Henning Kamp 
4608f8def9eSPoul-Henning Kamp 	switch (sc->type) {
4618f8def9eSPoul-Henning Kamp 	case MD_MALLOC:
4628f8def9eSPoul-Henning Kamp 		mdstart_malloc(sc);
4638f8def9eSPoul-Henning Kamp 		break;
4648f8def9eSPoul-Henning Kamp 	case MD_PRELOAD:
4658f8def9eSPoul-Henning Kamp 		mdstart_preload(sc);
4668f8def9eSPoul-Henning Kamp 		break;
4678f8def9eSPoul-Henning Kamp 	case MD_VNODE:
4688f8def9eSPoul-Henning Kamp 		mdstart_vnode(sc);
4698f8def9eSPoul-Henning Kamp 		break;
4708f8def9eSPoul-Henning Kamp 	case MD_SWAP:
4718f8def9eSPoul-Henning Kamp 		mdstart_swap(sc);
4728f8def9eSPoul-Henning Kamp 		break;
4738f8def9eSPoul-Henning Kamp 	default:
4748f8def9eSPoul-Henning Kamp 		panic("Impossible md(type)");
4758f8def9eSPoul-Henning Kamp 		break;
4768f8def9eSPoul-Henning Kamp 	}
4778f8def9eSPoul-Henning Kamp 	sc->busy = 0;
4788f8def9eSPoul-Henning Kamp }
4798f8def9eSPoul-Henning Kamp 
4808f8def9eSPoul-Henning Kamp static struct md_s *
4818f8def9eSPoul-Henning Kamp mdfind(int unit)
4828f8def9eSPoul-Henning Kamp {
4838f8def9eSPoul-Henning Kamp 	struct md_s *sc;
4848f8def9eSPoul-Henning Kamp 
4858f8def9eSPoul-Henning Kamp 	/* XXX: LOCK(unique unit numbers) */
4863f54a085SPoul-Henning Kamp 	LIST_FOREACH(sc, &md_softc_list, list) {
4873f54a085SPoul-Henning Kamp 		if (sc->unit == unit)
4888f8def9eSPoul-Henning Kamp 			break;
4898f8def9eSPoul-Henning Kamp 	}
4908f8def9eSPoul-Henning Kamp 	/* XXX: UNLOCK(unique unit numbers) */
4918f8def9eSPoul-Henning Kamp 	return (sc);
4928f8def9eSPoul-Henning Kamp }
4938f8def9eSPoul-Henning Kamp 
4948f8def9eSPoul-Henning Kamp static struct md_s *
4958f8def9eSPoul-Henning Kamp mdnew(int unit)
4968f8def9eSPoul-Henning Kamp {
4978f8def9eSPoul-Henning Kamp 	struct md_s *sc;
4988f8def9eSPoul-Henning Kamp 	int max = -1;
4998f8def9eSPoul-Henning Kamp 
5008f8def9eSPoul-Henning Kamp 	/* XXX: LOCK(unique unit numbers) */
5018f8def9eSPoul-Henning Kamp 	LIST_FOREACH(sc, &md_softc_list, list) {
5028f8def9eSPoul-Henning Kamp 		if (sc->unit == unit) {
5038f8def9eSPoul-Henning Kamp 			/* XXX: UNLOCK(unique unit numbers) */
5043f54a085SPoul-Henning Kamp 			return (NULL);
5053f54a085SPoul-Henning Kamp 		}
5068f8def9eSPoul-Henning Kamp 		if (sc->unit > max)
5078f8def9eSPoul-Henning Kamp 			max = sc->unit;
5088f8def9eSPoul-Henning Kamp 	}
5098f8def9eSPoul-Henning Kamp 	if (unit == -1)
5108f8def9eSPoul-Henning Kamp 		unit = max + 1;
5118f8def9eSPoul-Henning Kamp 	if (unit > DKMAXUNIT)
5128f8def9eSPoul-Henning Kamp 		return (NULL);
5137cc0979fSDavid Malone 	MALLOC(sc, struct md_s *,sizeof(*sc), M_MD, M_WAITOK | M_ZERO);
5143f54a085SPoul-Henning Kamp 	sc->unit = unit;
5158f8def9eSPoul-Henning Kamp 	LIST_INSERT_HEAD(&md_softc_list, sc, list);
5168f8def9eSPoul-Henning Kamp 	/* XXX: UNLOCK(unique unit numbers) */
5178f8def9eSPoul-Henning Kamp 	return (sc);
5188f8def9eSPoul-Henning Kamp }
5198f8def9eSPoul-Henning Kamp 
5208f8def9eSPoul-Henning Kamp static void
5218f8def9eSPoul-Henning Kamp mddelete(struct md_s *sc)
5228f8def9eSPoul-Henning Kamp {
5238f8def9eSPoul-Henning Kamp 
524c9384920SPoul-Henning Kamp 	devstat_remove_entry(&sc->stats);
5258f8def9eSPoul-Henning Kamp 	/* XXX: LOCK(unique unit numbers) */
5268f8def9eSPoul-Henning Kamp 	LIST_REMOVE(sc, list);
5278f8def9eSPoul-Henning Kamp 	/* XXX: UNLOCK(unique unit numbers) */
5288f8def9eSPoul-Henning Kamp 	FREE(sc, M_MD);
5298f8def9eSPoul-Henning Kamp }
5308f8def9eSPoul-Henning Kamp 
5318f8def9eSPoul-Henning Kamp static void
5328f8def9eSPoul-Henning Kamp mdinit(struct md_s *sc)
5338f8def9eSPoul-Henning Kamp {
5348f8def9eSPoul-Henning Kamp 
5358177437dSPoul-Henning Kamp 	bioq_init(&sc->bio_queue);
53696b6a55fSPoul-Henning Kamp 	devstat_add_entry(&sc->stats, "md", sc->unit, sc->secsize,
53795f1a897SPoul-Henning Kamp 		DEVSTAT_NO_ORDERED_TAGS,
53871e4fff8SPoul-Henning Kamp 		DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
53971e4fff8SPoul-Henning Kamp 		DEVSTAT_PRIORITY_OTHER);
5400cfaeeeeSPoul-Henning Kamp 	sc->dev = disk_create(sc->unit, &sc->disk, 0, &md_cdevsw, &mddisk_cdevsw);
54195f1a897SPoul-Henning Kamp 	sc->dev->si_drv1 = sc;
54271e4fff8SPoul-Henning Kamp }
54371e4fff8SPoul-Henning Kamp 
54496b6a55fSPoul-Henning Kamp /*
54596b6a55fSPoul-Henning Kamp  * XXX: we should check that the range they feed us is mapped.
54696b6a55fSPoul-Henning Kamp  * XXX: we should implement read-only.
54796b6a55fSPoul-Henning Kamp  */
54896b6a55fSPoul-Henning Kamp 
549637f671aSPoul-Henning Kamp static int
550637f671aSPoul-Henning Kamp mdcreate_preload(struct md_ioctl *mdio)
55171e4fff8SPoul-Henning Kamp {
55271e4fff8SPoul-Henning Kamp 	struct md_s *sc;
55371e4fff8SPoul-Henning Kamp 
554637f671aSPoul-Henning Kamp 	if (mdio->md_size == 0)
555637f671aSPoul-Henning Kamp 		return(EINVAL);
556637f671aSPoul-Henning Kamp 	if (mdio->md_options & ~(MD_AUTOUNIT))
557637f671aSPoul-Henning Kamp 		return(EINVAL);
558637f671aSPoul-Henning Kamp 	if (mdio->md_options & MD_AUTOUNIT) {
5598f8def9eSPoul-Henning Kamp 		sc = mdnew(-1);
5608f8def9eSPoul-Henning Kamp 		if (sc == NULL)
561637f671aSPoul-Henning Kamp 			return (ENOMEM);
562637f671aSPoul-Henning Kamp 		mdio->md_unit = sc->unit;
563637f671aSPoul-Henning Kamp 	} else {
564637f671aSPoul-Henning Kamp 		sc = mdnew(mdio->md_unit);
565637f671aSPoul-Henning Kamp 		if (sc == NULL)
566637f671aSPoul-Henning Kamp 			return (EBUSY);
567637f671aSPoul-Henning Kamp 	}
56866c16191SPoul-Henning Kamp 	sc->type = MD_PRELOAD;
5698f8def9eSPoul-Henning Kamp 	sc->secsize = DEV_BSIZE;
570637f671aSPoul-Henning Kamp 	sc->nsect = mdio->md_size;
57196b6a55fSPoul-Henning Kamp 	/* Cast to pointer size, then to pointer to avoid warning */
572dc57d7c6SPeter Wemm 	sc->pl_ptr = (u_char *)(uintptr_t)mdio->md_base;
573637f671aSPoul-Henning Kamp 	sc->pl_len = (mdio->md_size << DEV_BSHIFT);
5748f8def9eSPoul-Henning Kamp 	mdinit(sc);
575637f671aSPoul-Henning Kamp 	return (0);
57695f1a897SPoul-Henning Kamp }
57795f1a897SPoul-Henning Kamp 
578637f671aSPoul-Henning Kamp 
5798f8def9eSPoul-Henning Kamp static int
5808f8def9eSPoul-Henning Kamp mdcreate_malloc(struct md_ioctl *mdio)
58195f1a897SPoul-Henning Kamp {
58295f1a897SPoul-Henning Kamp 	struct md_s *sc;
5838f8def9eSPoul-Henning Kamp 	unsigned u;
58495f1a897SPoul-Henning Kamp 
5858f8def9eSPoul-Henning Kamp 	if (mdio->md_size == 0)
5868f8def9eSPoul-Henning Kamp 		return(EINVAL);
5878f8def9eSPoul-Henning Kamp 	if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
5888f8def9eSPoul-Henning Kamp 		return(EINVAL);
5898f8def9eSPoul-Henning Kamp 	/* Compression doesn't make sense if we have reserved space */
5908f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_RESERVE)
5918f8def9eSPoul-Henning Kamp 		mdio->md_options &= ~MD_COMPRESS;
5928f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_AUTOUNIT) {
5938f8def9eSPoul-Henning Kamp 		sc = mdnew(-1);
5943f54a085SPoul-Henning Kamp 		if (sc == NULL)
5958f8def9eSPoul-Henning Kamp 			return (ENOMEM);
5968f8def9eSPoul-Henning Kamp 		mdio->md_unit = sc->unit;
5978f8def9eSPoul-Henning Kamp 	} else {
5988f8def9eSPoul-Henning Kamp 		sc = mdnew(mdio->md_unit);
5998f8def9eSPoul-Henning Kamp 		if (sc == NULL)
6008f8def9eSPoul-Henning Kamp 			return (EBUSY);
6018f8def9eSPoul-Henning Kamp 	}
60266c16191SPoul-Henning Kamp 	sc->type = MD_MALLOC;
6038f8def9eSPoul-Henning Kamp 	sc->secsize = DEV_BSIZE;
6048f8def9eSPoul-Henning Kamp 	sc->nsect = mdio->md_size;
6058f8def9eSPoul-Henning Kamp 	sc->flags = mdio->md_options & MD_COMPRESS;
6068f8def9eSPoul-Henning Kamp 	MALLOC(sc->secp, u_char **, sc->nsect * sizeof(u_char *), M_MD, M_WAITOK | M_ZERO);
60796b6a55fSPoul-Henning Kamp 	if (mdio->md_options & MD_RESERVE) {
6088f8def9eSPoul-Henning Kamp 		for (u = 0; u < sc->nsect; u++)
6098f8def9eSPoul-Henning Kamp 			MALLOC(sc->secp[u], u_char *, DEV_BSIZE, M_MDSECT, M_WAITOK | M_ZERO);
6108f8def9eSPoul-Henning Kamp 	}
6110cfaeeeeSPoul-Henning Kamp 	printf("md%d: Malloc disk\n", sc->unit);
6128f8def9eSPoul-Henning Kamp 	mdinit(sc);
6138f8def9eSPoul-Henning Kamp 	return (0);
61400a6a3c6SPoul-Henning Kamp }
61500a6a3c6SPoul-Henning Kamp 
6163f54a085SPoul-Henning Kamp 
6178f8def9eSPoul-Henning Kamp static int
6188f8def9eSPoul-Henning Kamp mdsetcred(struct md_s *sc, struct ucred *cred)
6198f8def9eSPoul-Henning Kamp {
6208f8def9eSPoul-Henning Kamp 	char *tmpbuf;
6218f8def9eSPoul-Henning Kamp 	int error = 0;
6228f8def9eSPoul-Henning Kamp 
6233f54a085SPoul-Henning Kamp 	/*
6248f8def9eSPoul-Henning Kamp 	 * Set credits in our softc
6253f54a085SPoul-Henning Kamp 	 */
6268f8def9eSPoul-Henning Kamp 
6278f8def9eSPoul-Henning Kamp 	if (sc->cred)
6288f8def9eSPoul-Henning Kamp 		crfree(sc->cred);
6298f8def9eSPoul-Henning Kamp 	sc->cred = crdup(cred);
6308f8def9eSPoul-Henning Kamp 
6318f8def9eSPoul-Henning Kamp 	/*
6328f8def9eSPoul-Henning Kamp 	 * Horrible kludge to establish credentials for NFS  XXX.
6338f8def9eSPoul-Henning Kamp 	 */
6348f8def9eSPoul-Henning Kamp 
6358f8def9eSPoul-Henning Kamp 	if (sc->vnode) {
6368f8def9eSPoul-Henning Kamp 		struct uio auio;
6378f8def9eSPoul-Henning Kamp 		struct iovec aiov;
6388f8def9eSPoul-Henning Kamp 
6398f8def9eSPoul-Henning Kamp 		tmpbuf = malloc(sc->secsize, M_TEMP, M_WAITOK);
6408f8def9eSPoul-Henning Kamp 		bzero(&auio, sizeof(auio));
6418f8def9eSPoul-Henning Kamp 
6428f8def9eSPoul-Henning Kamp 		aiov.iov_base = tmpbuf;
6438f8def9eSPoul-Henning Kamp 		aiov.iov_len = sc->secsize;
6448f8def9eSPoul-Henning Kamp 		auio.uio_iov = &aiov;
6458f8def9eSPoul-Henning Kamp 		auio.uio_iovcnt = 1;
6468f8def9eSPoul-Henning Kamp 		auio.uio_offset = 0;
6478f8def9eSPoul-Henning Kamp 		auio.uio_rw = UIO_READ;
6488f8def9eSPoul-Henning Kamp 		auio.uio_segflg = UIO_SYSSPACE;
6498f8def9eSPoul-Henning Kamp 		auio.uio_resid = aiov.iov_len;
6508f8def9eSPoul-Henning Kamp 		vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curproc);
6518f8def9eSPoul-Henning Kamp 		error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
6528f8def9eSPoul-Henning Kamp 		VOP_UNLOCK(sc->vnode, 0, curproc);
6538f8def9eSPoul-Henning Kamp 		free(tmpbuf, M_TEMP);
6548f8def9eSPoul-Henning Kamp 	}
6558f8def9eSPoul-Henning Kamp 	return (error);
6568f8def9eSPoul-Henning Kamp }
6578f8def9eSPoul-Henning Kamp 
6588f8def9eSPoul-Henning Kamp static int
6598f8def9eSPoul-Henning Kamp mdcreate_vnode(struct md_ioctl *mdio, struct proc *p)
6608f8def9eSPoul-Henning Kamp {
6618f8def9eSPoul-Henning Kamp 	struct md_s *sc;
6628f8def9eSPoul-Henning Kamp 	struct vattr vattr;
6638f8def9eSPoul-Henning Kamp 	struct nameidata nd;
6648f8def9eSPoul-Henning Kamp 	int error, flags;
6658f8def9eSPoul-Henning Kamp 
6668f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_AUTOUNIT) {
6678f8def9eSPoul-Henning Kamp 		sc = mdnew(-1);
6688f8def9eSPoul-Henning Kamp 		mdio->md_unit = sc->unit;
6698f8def9eSPoul-Henning Kamp 	} else {
6708f8def9eSPoul-Henning Kamp 		sc = mdnew(mdio->md_unit);
6718f8def9eSPoul-Henning Kamp 	}
6728f8def9eSPoul-Henning Kamp 	if (sc == NULL)
6738f8def9eSPoul-Henning Kamp 		return (EBUSY);
6748f8def9eSPoul-Henning Kamp 
6758f8def9eSPoul-Henning Kamp 	sc->type = MD_VNODE;
6768f8def9eSPoul-Henning Kamp 
6778f8def9eSPoul-Henning Kamp 	flags = FREAD|FWRITE;
678637f671aSPoul-Henning Kamp 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, p);
6798f8def9eSPoul-Henning Kamp 	error = vn_open(&nd, &flags, 0);
6808f8def9eSPoul-Henning Kamp 	if (error) {
6818f8def9eSPoul-Henning Kamp 		if (error != EACCES && error != EPERM && error != EROFS)
6828f8def9eSPoul-Henning Kamp 			return (error);
6838f8def9eSPoul-Henning Kamp 		flags &= ~FWRITE;
6848f8def9eSPoul-Henning Kamp 		sc->flags |= MD_READONLY;
685637f671aSPoul-Henning Kamp 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, p);
6868f8def9eSPoul-Henning Kamp 		error = vn_open(&nd, &flags, 0);
6878f8def9eSPoul-Henning Kamp 		if (error)
6888f8def9eSPoul-Henning Kamp 			return (error);
6898f8def9eSPoul-Henning Kamp 	}
6908f8def9eSPoul-Henning Kamp 	NDFREE(&nd, NDF_ONLY_PNBUF);
6918f8def9eSPoul-Henning Kamp 	if (nd.ni_vp->v_type != VREG ||
6928f8def9eSPoul-Henning Kamp 	    (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p))) {
6938f8def9eSPoul-Henning Kamp 		VOP_UNLOCK(nd.ni_vp, 0, p);
6948f8def9eSPoul-Henning Kamp 		(void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
6958f8def9eSPoul-Henning Kamp 		return (error ? error : EINVAL);
6968f8def9eSPoul-Henning Kamp 	}
6978f8def9eSPoul-Henning Kamp 	VOP_UNLOCK(nd.ni_vp, 0, p);
6988f8def9eSPoul-Henning Kamp 	sc->secsize = DEV_BSIZE;
6998f8def9eSPoul-Henning Kamp 	sc->vnode = nd.ni_vp;
7008f8def9eSPoul-Henning Kamp 
7018f8def9eSPoul-Henning Kamp 	/*
7028f8def9eSPoul-Henning Kamp 	 * If the size is specified, override the file attributes.
7038f8def9eSPoul-Henning Kamp 	 */
7048f8def9eSPoul-Henning Kamp 	if (mdio->md_size)
7058f8def9eSPoul-Henning Kamp 		sc->nsect = mdio->md_size;
7068f8def9eSPoul-Henning Kamp 	else
7078f8def9eSPoul-Henning Kamp 		sc->nsect = vattr.va_size / sc->secsize; /* XXX: round up ? */
7088f8def9eSPoul-Henning Kamp 	error = mdsetcred(sc, p->p_ucred);
7098f8def9eSPoul-Henning Kamp 	if (error) {
7108f8def9eSPoul-Henning Kamp 		(void) vn_close(nd.ni_vp, flags, p->p_ucred, p);
7118f8def9eSPoul-Henning Kamp 		return(error);
7128f8def9eSPoul-Henning Kamp 	}
7138f8def9eSPoul-Henning Kamp 	mdinit(sc);
7148f8def9eSPoul-Henning Kamp 	return (0);
7158f8def9eSPoul-Henning Kamp }
7168f8def9eSPoul-Henning Kamp 
7178f8def9eSPoul-Henning Kamp static int
7188f8def9eSPoul-Henning Kamp mddestroy(struct md_s *sc, struct md_ioctl *mdio, struct proc *p)
7198f8def9eSPoul-Henning Kamp {
7208f8def9eSPoul-Henning Kamp 	unsigned u;
7218f8def9eSPoul-Henning Kamp 
7228f8def9eSPoul-Henning Kamp 	if (sc->dev != NULL)
7238f8def9eSPoul-Henning Kamp 		disk_destroy(sc->dev);
7248f8def9eSPoul-Henning Kamp 	if (sc->vnode != NULL)
7258f8def9eSPoul-Henning Kamp 		(void)vn_close(sc->vnode, sc->flags & MD_READONLY ?  FREAD : (FREAD|FWRITE), sc->cred, p);
7268f8def9eSPoul-Henning Kamp 	if (sc->cred != NULL)
7278f8def9eSPoul-Henning Kamp 		crfree(sc->cred);
7288f8def9eSPoul-Henning Kamp 	if (sc->object != NULL)
7298f8def9eSPoul-Henning Kamp 		vm_pager_deallocate(sc->object);
7308f8def9eSPoul-Henning Kamp 	if (sc->secp != NULL) {
73196b6a55fSPoul-Henning Kamp 		for (u = 0; u < sc->nsect; u++)
7328f8def9eSPoul-Henning Kamp 			if ((uintptr_t)sc->secp[u] > 255)
7338f8def9eSPoul-Henning Kamp 				FREE(sc->secp[u], M_MDSECT);
7348f8def9eSPoul-Henning Kamp 		FREE(sc->secp, M_MD);
7358f8def9eSPoul-Henning Kamp 	}
7368f8def9eSPoul-Henning Kamp 	mddelete(sc);
7378f8def9eSPoul-Henning Kamp 	return (0);
7388f8def9eSPoul-Henning Kamp }
7398f8def9eSPoul-Henning Kamp 
7408f8def9eSPoul-Henning Kamp static int
7418f8def9eSPoul-Henning Kamp mdcreate_swap(struct md_ioctl *mdio, struct proc *p)
7428f8def9eSPoul-Henning Kamp {
7438f8def9eSPoul-Henning Kamp 	int error;
7448f8def9eSPoul-Henning Kamp 	struct md_s *sc;
7458f8def9eSPoul-Henning Kamp 
7468f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_AUTOUNIT) {
7478f8def9eSPoul-Henning Kamp 		sc = mdnew(-1);
7488f8def9eSPoul-Henning Kamp 		mdio->md_unit = sc->unit;
7498f8def9eSPoul-Henning Kamp 	} else {
7508f8def9eSPoul-Henning Kamp 		sc = mdnew(mdio->md_unit);
7518f8def9eSPoul-Henning Kamp 	}
7528f8def9eSPoul-Henning Kamp 	if (sc == NULL)
7538f8def9eSPoul-Henning Kamp 		return (EBUSY);
7548f8def9eSPoul-Henning Kamp 
7558f8def9eSPoul-Henning Kamp 	sc->type = MD_SWAP;
7568f8def9eSPoul-Henning Kamp 
7578f8def9eSPoul-Henning Kamp 	/*
7588f8def9eSPoul-Henning Kamp 	 * Range check.  Disallow negative sizes or any size less then the
7598f8def9eSPoul-Henning Kamp 	 * size of a page.  Then round to a page.
7608f8def9eSPoul-Henning Kamp 	 */
7618f8def9eSPoul-Henning Kamp 
7628f8def9eSPoul-Henning Kamp 	if (mdio->md_size == 0)
7638f8def9eSPoul-Henning Kamp 		return(EDOM);
7648f8def9eSPoul-Henning Kamp 
7658f8def9eSPoul-Henning Kamp 	/*
7668f8def9eSPoul-Henning Kamp 	 * Allocate an OBJT_SWAP object.
7678f8def9eSPoul-Henning Kamp 	 *
7688f8def9eSPoul-Henning Kamp 	 * sc_secsize is PAGE_SIZE'd
7698f8def9eSPoul-Henning Kamp 	 *
7708f8def9eSPoul-Henning Kamp 	 * mdio->size is in DEV_BSIZE'd chunks.
7718f8def9eSPoul-Henning Kamp 	 * Note the truncation.
7728f8def9eSPoul-Henning Kamp 	 */
7738f8def9eSPoul-Henning Kamp 
7748f8def9eSPoul-Henning Kamp 	sc->secsize = PAGE_SIZE;
7758f8def9eSPoul-Henning Kamp 	sc->nsect = mdio->md_size / (PAGE_SIZE / DEV_BSIZE);
7768f8def9eSPoul-Henning Kamp 	sc->object = vm_pager_allocate(OBJT_SWAP, NULL, sc->secsize * (vm_offset_t)sc->nsect, VM_PROT_DEFAULT, 0);
7778f8def9eSPoul-Henning Kamp 	if (mdio->md_options & MD_RESERVE) {
7788f8def9eSPoul-Henning Kamp 		if (swap_pager_reserve(sc->object, 0, sc->nsect) < 0) {
7798f8def9eSPoul-Henning Kamp 			vm_pager_deallocate(sc->object);
7808f8def9eSPoul-Henning Kamp 			sc->object = NULL;
7818f8def9eSPoul-Henning Kamp 			return(EDOM);
7828f8def9eSPoul-Henning Kamp 		}
7838f8def9eSPoul-Henning Kamp 	}
7848f8def9eSPoul-Henning Kamp 	error = mdsetcred(sc, p->p_ucred);
7858f8def9eSPoul-Henning Kamp 	if (error)
7868f8def9eSPoul-Henning Kamp 		mddestroy(sc, mdio, p);
7878f8def9eSPoul-Henning Kamp 	else
7888f8def9eSPoul-Henning Kamp 		mdinit(sc);
7898f8def9eSPoul-Henning Kamp 	return(error);
7908f8def9eSPoul-Henning Kamp }
7918f8def9eSPoul-Henning Kamp 
7928f8def9eSPoul-Henning Kamp static int
7938f8def9eSPoul-Henning Kamp mdctlioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
7948f8def9eSPoul-Henning Kamp {
7958f8def9eSPoul-Henning Kamp 	struct md_ioctl *mdio;
7968f8def9eSPoul-Henning Kamp 	struct md_s *sc;
7978f8def9eSPoul-Henning Kamp 
7988f8def9eSPoul-Henning Kamp 	if (md_debug)
7998f8def9eSPoul-Henning Kamp 		printf("mdctlioctl(%s %lx %p %x %p)\n",
8008f8def9eSPoul-Henning Kamp 			devtoname(dev), cmd, addr, flags, p);
8018f8def9eSPoul-Henning Kamp 
8028f8def9eSPoul-Henning Kamp 	mdio = (struct md_ioctl *)addr;
8038f8def9eSPoul-Henning Kamp 	switch (cmd) {
8048f8def9eSPoul-Henning Kamp 	case MDIOCATTACH:
8058f8def9eSPoul-Henning Kamp 		switch (mdio->md_type) {
8068f8def9eSPoul-Henning Kamp 		case MD_MALLOC:
8078f8def9eSPoul-Henning Kamp 			return(mdcreate_malloc(mdio));
8088f8def9eSPoul-Henning Kamp 		case MD_PRELOAD:
809637f671aSPoul-Henning Kamp 			return(mdcreate_preload(mdio));
8108f8def9eSPoul-Henning Kamp 		case MD_VNODE:
8118f8def9eSPoul-Henning Kamp 			return(mdcreate_vnode(mdio, p));
8128f8def9eSPoul-Henning Kamp 		case MD_SWAP:
8138f8def9eSPoul-Henning Kamp 			return(mdcreate_swap(mdio, p));
8148f8def9eSPoul-Henning Kamp 		default:
8158f8def9eSPoul-Henning Kamp 			return (EINVAL);
8168f8def9eSPoul-Henning Kamp 		}
8178f8def9eSPoul-Henning Kamp 	case MDIOCDETACH:
818637f671aSPoul-Henning Kamp 		if (mdio->md_file != NULL)
8198f8def9eSPoul-Henning Kamp 			return(EINVAL);
8208f8def9eSPoul-Henning Kamp 		if (mdio->md_size != 0)
8218f8def9eSPoul-Henning Kamp 			return(EINVAL);
8228f8def9eSPoul-Henning Kamp 		if (mdio->md_options != 0)
8238f8def9eSPoul-Henning Kamp 			return(EINVAL);
8248f8def9eSPoul-Henning Kamp 		sc = mdfind(mdio->md_unit);
8258f8def9eSPoul-Henning Kamp 		if (sc == NULL)
8268f8def9eSPoul-Henning Kamp 			return (ENOENT);
8278f8def9eSPoul-Henning Kamp 		switch(sc->type) {
8288f8def9eSPoul-Henning Kamp 		case MD_VNODE:
8298f8def9eSPoul-Henning Kamp 		case MD_SWAP:
8308f8def9eSPoul-Henning Kamp 		case MD_MALLOC:
831637f671aSPoul-Henning Kamp 		case MD_PRELOAD:
8328f8def9eSPoul-Henning Kamp 			return(mddestroy(sc, mdio, p));
8338f8def9eSPoul-Henning Kamp 		default:
8348f8def9eSPoul-Henning Kamp 			return (EOPNOTSUPP);
8358f8def9eSPoul-Henning Kamp 		}
8368f8def9eSPoul-Henning Kamp 	default:
8378f8def9eSPoul-Henning Kamp 		return (ENOIOCTL);
8388f8def9eSPoul-Henning Kamp 	};
8398f8def9eSPoul-Henning Kamp 	return (ENOIOCTL);
8403f54a085SPoul-Henning Kamp }
8413f54a085SPoul-Henning Kamp 
84200a6a3c6SPoul-Henning Kamp static void
843637f671aSPoul-Henning Kamp md_preloaded(u_char *image, unsigned length)
844637f671aSPoul-Henning Kamp {
845637f671aSPoul-Henning Kamp 	struct md_s *sc;
846637f671aSPoul-Henning Kamp 
847637f671aSPoul-Henning Kamp 	sc = mdnew(-1);
848637f671aSPoul-Henning Kamp 	if (sc == NULL)
849637f671aSPoul-Henning Kamp 		return;
850637f671aSPoul-Henning Kamp 	sc->type = MD_PRELOAD;
851637f671aSPoul-Henning Kamp 	sc->secsize = DEV_BSIZE;
852637f671aSPoul-Henning Kamp 	sc->nsect = length / DEV_BSIZE;
853637f671aSPoul-Henning Kamp 	sc->pl_ptr = image;
854637f671aSPoul-Henning Kamp 	sc->pl_len = length;
855637f671aSPoul-Henning Kamp 	if (sc->unit == 0)
856637f671aSPoul-Henning Kamp 		mdrootready = 1;
857637f671aSPoul-Henning Kamp 	mdinit(sc);
858637f671aSPoul-Henning Kamp }
859637f671aSPoul-Henning Kamp 
860637f671aSPoul-Henning Kamp static void
86100a6a3c6SPoul-Henning Kamp md_drvinit(void *unused)
86200a6a3c6SPoul-Henning Kamp {
86300a6a3c6SPoul-Henning Kamp 
86495f1a897SPoul-Henning Kamp 	caddr_t mod;
86595f1a897SPoul-Henning Kamp 	caddr_t c;
86695f1a897SPoul-Henning Kamp 	u_char *ptr, *name, *type;
86795f1a897SPoul-Henning Kamp 	unsigned len;
86895f1a897SPoul-Henning Kamp 
86971e4fff8SPoul-Henning Kamp #ifdef MD_ROOT_SIZE
870637f671aSPoul-Henning Kamp 	md_preloaded(mfs_root, MD_ROOT_SIZE*1024);
87171e4fff8SPoul-Henning Kamp #endif
87295f1a897SPoul-Henning Kamp 	mod = NULL;
87395f1a897SPoul-Henning Kamp 	while ((mod = preload_search_next_name(mod)) != NULL) {
87495f1a897SPoul-Henning Kamp 		name = (char *)preload_search_info(mod, MODINFO_NAME);
87595f1a897SPoul-Henning Kamp 		type = (char *)preload_search_info(mod, MODINFO_TYPE);
87695f1a897SPoul-Henning Kamp 		if (name == NULL)
87795f1a897SPoul-Henning Kamp 			continue;
87895f1a897SPoul-Henning Kamp 		if (type == NULL)
87995f1a897SPoul-Henning Kamp 			continue;
88071e4fff8SPoul-Henning Kamp 		if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
88195f1a897SPoul-Henning Kamp 			continue;
88295f1a897SPoul-Henning Kamp 		c = preload_search_info(mod, MODINFO_ADDR);
88395f1a897SPoul-Henning Kamp 		ptr = *(u_char **)c;
88495f1a897SPoul-Henning Kamp 		c = preload_search_info(mod, MODINFO_SIZE);
88595f1a897SPoul-Henning Kamp 		len = *(unsigned *)c;
88695f1a897SPoul-Henning Kamp 		printf("md%d: Preloaded image <%s> %d bytes at %p\n",
88795f1a897SPoul-Henning Kamp 		   mdunits, name, len, ptr);
888637f671aSPoul-Henning Kamp 		md_preloaded(ptr, len);
88995f1a897SPoul-Henning Kamp 	}
89057e9624eSPoul-Henning Kamp 	status_dev = make_dev(&mdctl_cdevsw, 0xffff00ff, UID_ROOT, GID_WHEEL, 0600, "mdctl");
89100a6a3c6SPoul-Henning Kamp }
89200a6a3c6SPoul-Henning Kamp 
89357e9624eSPoul-Henning Kamp static int
89457e9624eSPoul-Henning Kamp md_modevent(module_t mod, int type, void *data)
89557e9624eSPoul-Henning Kamp {
89657e9624eSPoul-Henning Kamp         switch (type) {
89757e9624eSPoul-Henning Kamp         case MOD_LOAD:
89857e9624eSPoul-Henning Kamp 		md_drvinit(NULL);
89957e9624eSPoul-Henning Kamp                 break;
90057e9624eSPoul-Henning Kamp         case MOD_UNLOAD:
90157e9624eSPoul-Henning Kamp 		if (!LIST_EMPTY(&md_softc_list))
90257e9624eSPoul-Henning Kamp 			return EBUSY;
90357e9624eSPoul-Henning Kamp                 if (status_dev)
90457e9624eSPoul-Henning Kamp                         destroy_dev(status_dev);
90557e9624eSPoul-Henning Kamp                 status_dev = 0;
90657e9624eSPoul-Henning Kamp                 break;
90757e9624eSPoul-Henning Kamp         default:
90857e9624eSPoul-Henning Kamp                 break;
90957e9624eSPoul-Henning Kamp         }
91057e9624eSPoul-Henning Kamp         return 0;
91157e9624eSPoul-Henning Kamp }
91257e9624eSPoul-Henning Kamp 
91357e9624eSPoul-Henning Kamp static moduledata_t md_mod = {
91457e9624eSPoul-Henning Kamp         "md",
91557e9624eSPoul-Henning Kamp         md_modevent,
91657e9624eSPoul-Henning Kamp         NULL
91757e9624eSPoul-Henning Kamp };
91857e9624eSPoul-Henning Kamp DECLARE_MODULE(md, md_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+CDEV_MAJOR);
91957e9624eSPoul-Henning Kamp MODULE_VERSION(md, MD_MODVER);
92057e9624eSPoul-Henning Kamp 
92100a6a3c6SPoul-Henning Kamp 
92271e4fff8SPoul-Henning Kamp #ifdef MD_ROOT
92371e4fff8SPoul-Henning Kamp static void
92471e4fff8SPoul-Henning Kamp md_takeroot(void *junk)
92571e4fff8SPoul-Henning Kamp {
92671e4fff8SPoul-Henning Kamp 	if (mdrootready)
92771e4fff8SPoul-Henning Kamp 		rootdevnames[0] = "ufs:/dev/md0c";
92871e4fff8SPoul-Henning Kamp }
92971e4fff8SPoul-Henning Kamp 
93071e4fff8SPoul-Henning Kamp SYSINIT(md_root, SI_SUB_MOUNT_ROOT, SI_ORDER_FIRST, md_takeroot, NULL);
93171e4fff8SPoul-Henning Kamp #endif
9323f54a085SPoul-Henning Kamp 
933