xref: /titanic_53/usr/src/uts/common/os/move.c (revision 6f5f1c638c7bce3a35e88526a88fc78bdfd58ffe)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
517169044Sbrutus  * Common Development and Distribution License (the "License").
617169044Sbrutus  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2217169044Sbrutus  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
277c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
317c478bd9Sstevel@tonic-gate  * The Regents of the University of California
327c478bd9Sstevel@tonic-gate  * All Rights Reserved
337c478bd9Sstevel@tonic-gate  *
347c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
357c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
367c478bd9Sstevel@tonic-gate  * contributors.
377c478bd9Sstevel@tonic-gate  */
387c478bd9Sstevel@tonic-gate 
397c478bd9Sstevel@tonic-gate #include <sys/types.h>
407c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
417c478bd9Sstevel@tonic-gate #include <sys/param.h>
427c478bd9Sstevel@tonic-gate #include <sys/systm.h>
437c478bd9Sstevel@tonic-gate #include <sys/uio.h>
447c478bd9Sstevel@tonic-gate #include <sys/errno.h>
4517169044Sbrutus #include <sys/vmsystm.h>
4617169044Sbrutus #include <sys/cmn_err.h>
4717169044Sbrutus #include <vm/as.h>
4817169044Sbrutus #include <vm/page.h>
4917169044Sbrutus 
5017169044Sbrutus #include <sys/dcopy.h>
5117169044Sbrutus 
5217169044Sbrutus int64_t uioa_maxpoll = -1;	/* <0 = noblock, 0 = block, >0 = block after */
5317169044Sbrutus #define	UIO_DCOPY_CHANNEL	0
5417169044Sbrutus #define	UIO_DCOPY_CMD		1
557c478bd9Sstevel@tonic-gate 
567c478bd9Sstevel@tonic-gate /*
577c478bd9Sstevel@tonic-gate  * Move "n" bytes at byte address "p"; "rw" indicates the direction
587c478bd9Sstevel@tonic-gate  * of the move, and the I/O parameters are provided in "uio", which is
597c478bd9Sstevel@tonic-gate  * update to reflect the data which was moved.  Returns 0 on success or
607c478bd9Sstevel@tonic-gate  * a non-zero errno on failure.
617c478bd9Sstevel@tonic-gate  */
627c478bd9Sstevel@tonic-gate int
637c478bd9Sstevel@tonic-gate uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
647c478bd9Sstevel@tonic-gate {
657c478bd9Sstevel@tonic-gate 	struct iovec *iov;
667c478bd9Sstevel@tonic-gate 	ulong_t cnt;
677c478bd9Sstevel@tonic-gate 	int error;
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate 	while (n && uio->uio_resid) {
707c478bd9Sstevel@tonic-gate 		iov = uio->uio_iov;
717c478bd9Sstevel@tonic-gate 		cnt = MIN(iov->iov_len, n);
727c478bd9Sstevel@tonic-gate 		if (cnt == 0l) {
737c478bd9Sstevel@tonic-gate 			uio->uio_iov++;
747c478bd9Sstevel@tonic-gate 			uio->uio_iovcnt--;
757c478bd9Sstevel@tonic-gate 			continue;
767c478bd9Sstevel@tonic-gate 		}
777c478bd9Sstevel@tonic-gate 		switch (uio->uio_segflg) {
787c478bd9Sstevel@tonic-gate 
797c478bd9Sstevel@tonic-gate 		case UIO_USERSPACE:
807c478bd9Sstevel@tonic-gate 		case UIO_USERISPACE:
817c478bd9Sstevel@tonic-gate 			if (rw == UIO_READ) {
827c478bd9Sstevel@tonic-gate 				error = xcopyout_nta(p, iov->iov_base, cnt,
837c478bd9Sstevel@tonic-gate 				    (uio->uio_extflg & UIO_COPY_CACHED));
847c478bd9Sstevel@tonic-gate 			} else {
857c478bd9Sstevel@tonic-gate 				error = xcopyin_nta(iov->iov_base, p, cnt,
867c478bd9Sstevel@tonic-gate 				    (uio->uio_extflg & UIO_COPY_CACHED));
877c478bd9Sstevel@tonic-gate 			}
887c478bd9Sstevel@tonic-gate 
897c478bd9Sstevel@tonic-gate 			if (error)
907c478bd9Sstevel@tonic-gate 				return (error);
917c478bd9Sstevel@tonic-gate 			break;
927c478bd9Sstevel@tonic-gate 
937c478bd9Sstevel@tonic-gate 		case UIO_SYSSPACE:
947c478bd9Sstevel@tonic-gate 			if (rw == UIO_READ)
957c478bd9Sstevel@tonic-gate 				error = kcopy_nta(p, iov->iov_base, cnt,
967c478bd9Sstevel@tonic-gate 				    (uio->uio_extflg & UIO_COPY_CACHED));
977c478bd9Sstevel@tonic-gate 			else
987c478bd9Sstevel@tonic-gate 				error = kcopy_nta(iov->iov_base, p, cnt,
997c478bd9Sstevel@tonic-gate 				    (uio->uio_extflg & UIO_COPY_CACHED));
1007c478bd9Sstevel@tonic-gate 			if (error)
1017c478bd9Sstevel@tonic-gate 				return (error);
1027c478bd9Sstevel@tonic-gate 			break;
1037c478bd9Sstevel@tonic-gate 		}
1047c478bd9Sstevel@tonic-gate 		iov->iov_base += cnt;
1057c478bd9Sstevel@tonic-gate 		iov->iov_len -= cnt;
1067c478bd9Sstevel@tonic-gate 		uio->uio_resid -= cnt;
1077c478bd9Sstevel@tonic-gate 		uio->uio_loffset += cnt;
1087c478bd9Sstevel@tonic-gate 		p = (caddr_t)p + cnt;
1097c478bd9Sstevel@tonic-gate 		n -= cnt;
1107c478bd9Sstevel@tonic-gate 	}
1117c478bd9Sstevel@tonic-gate 	return (0);
1127c478bd9Sstevel@tonic-gate }
1137c478bd9Sstevel@tonic-gate 
1147c478bd9Sstevel@tonic-gate /*
115*6f5f1c63SDonghai Qiao  * Fault in the pages of the first n bytes specified by the uio structure.
116*6f5f1c63SDonghai Qiao  * 1 byte in each page is touched and the uio struct is unmodified. Any
117*6f5f1c63SDonghai Qiao  * error will terminate the process as this is only a best attempt to get
118*6f5f1c63SDonghai Qiao  * the pages resident.
119*6f5f1c63SDonghai Qiao  */
120*6f5f1c63SDonghai Qiao void
121*6f5f1c63SDonghai Qiao uio_prefaultpages(ssize_t n, struct uio *uio)
122*6f5f1c63SDonghai Qiao {
123*6f5f1c63SDonghai Qiao 	struct iovec *iov;
124*6f5f1c63SDonghai Qiao 	ulong_t cnt, incr;
125*6f5f1c63SDonghai Qiao 	caddr_t p;
126*6f5f1c63SDonghai Qiao 	uint8_t tmp;
127*6f5f1c63SDonghai Qiao 	int iovcnt;
128*6f5f1c63SDonghai Qiao 
129*6f5f1c63SDonghai Qiao 	iov = uio->uio_iov;
130*6f5f1c63SDonghai Qiao 	iovcnt = uio->uio_iovcnt;
131*6f5f1c63SDonghai Qiao 
132*6f5f1c63SDonghai Qiao 	while ((n > 0) && (iovcnt > 0)) {
133*6f5f1c63SDonghai Qiao 		cnt = MIN(iov->iov_len, n);
134*6f5f1c63SDonghai Qiao 		if (cnt == 0) {
135*6f5f1c63SDonghai Qiao 			/* empty iov entry */
136*6f5f1c63SDonghai Qiao 			iov++;
137*6f5f1c63SDonghai Qiao 			iovcnt--;
138*6f5f1c63SDonghai Qiao 			continue;
139*6f5f1c63SDonghai Qiao 		}
140*6f5f1c63SDonghai Qiao 		n -= cnt;
141*6f5f1c63SDonghai Qiao 		/*
142*6f5f1c63SDonghai Qiao 		 * touch each page in this segment.
143*6f5f1c63SDonghai Qiao 		 */
144*6f5f1c63SDonghai Qiao 		p = iov->iov_base;
145*6f5f1c63SDonghai Qiao 		while (cnt) {
146*6f5f1c63SDonghai Qiao 			switch (uio->uio_segflg) {
147*6f5f1c63SDonghai Qiao 			case UIO_USERSPACE:
148*6f5f1c63SDonghai Qiao 			case UIO_USERISPACE:
149*6f5f1c63SDonghai Qiao 				if (fuword8(p, &tmp))
150*6f5f1c63SDonghai Qiao 					return;
151*6f5f1c63SDonghai Qiao 				break;
152*6f5f1c63SDonghai Qiao 			case UIO_SYSSPACE:
153*6f5f1c63SDonghai Qiao 				if (kcopy(p, &tmp, 1))
154*6f5f1c63SDonghai Qiao 					return;
155*6f5f1c63SDonghai Qiao 				break;
156*6f5f1c63SDonghai Qiao 			}
157*6f5f1c63SDonghai Qiao 			incr = MIN(cnt, PAGESIZE);
158*6f5f1c63SDonghai Qiao 			p += incr;
159*6f5f1c63SDonghai Qiao 			cnt -= incr;
160*6f5f1c63SDonghai Qiao 		}
161*6f5f1c63SDonghai Qiao 		/*
162*6f5f1c63SDonghai Qiao 		 * touch the last byte in case it straddles a page.
163*6f5f1c63SDonghai Qiao 		 */
164*6f5f1c63SDonghai Qiao 		p--;
165*6f5f1c63SDonghai Qiao 		switch (uio->uio_segflg) {
166*6f5f1c63SDonghai Qiao 		case UIO_USERSPACE:
167*6f5f1c63SDonghai Qiao 		case UIO_USERISPACE:
168*6f5f1c63SDonghai Qiao 			if (fuword8(p, &tmp))
169*6f5f1c63SDonghai Qiao 				return;
170*6f5f1c63SDonghai Qiao 			break;
171*6f5f1c63SDonghai Qiao 		case UIO_SYSSPACE:
172*6f5f1c63SDonghai Qiao 			if (kcopy(p, &tmp, 1))
173*6f5f1c63SDonghai Qiao 				return;
174*6f5f1c63SDonghai Qiao 			break;
175*6f5f1c63SDonghai Qiao 		}
176*6f5f1c63SDonghai Qiao 		iov++;
177*6f5f1c63SDonghai Qiao 		iovcnt--;
178*6f5f1c63SDonghai Qiao 	}
179*6f5f1c63SDonghai Qiao }
180*6f5f1c63SDonghai Qiao 
181*6f5f1c63SDonghai Qiao /*
1827c478bd9Sstevel@tonic-gate  * transfer a character value into the address space
1837c478bd9Sstevel@tonic-gate  * delineated by a uio and update fields within the
1847c478bd9Sstevel@tonic-gate  * uio for next character. Return 0 for success, EFAULT
1857c478bd9Sstevel@tonic-gate  * for error.
1867c478bd9Sstevel@tonic-gate  */
1877c478bd9Sstevel@tonic-gate int
1887c478bd9Sstevel@tonic-gate ureadc(int val, struct uio *uiop)
1897c478bd9Sstevel@tonic-gate {
1907c478bd9Sstevel@tonic-gate 	struct iovec *iovp;
1917c478bd9Sstevel@tonic-gate 	unsigned char c;
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate 	/*
1947c478bd9Sstevel@tonic-gate 	 * first determine if uio is valid.  uiop should be
1957c478bd9Sstevel@tonic-gate 	 * non-NULL and the resid count > 0.
1967c478bd9Sstevel@tonic-gate 	 */
1977c478bd9Sstevel@tonic-gate 	if (!(uiop && uiop->uio_resid > 0))
1987c478bd9Sstevel@tonic-gate 		return (EFAULT);
1997c478bd9Sstevel@tonic-gate 
2007c478bd9Sstevel@tonic-gate 	/*
2017c478bd9Sstevel@tonic-gate 	 * scan through iovecs until one is found that is non-empty.
2027c478bd9Sstevel@tonic-gate 	 * Return EFAULT if none found.
2037c478bd9Sstevel@tonic-gate 	 */
2047c478bd9Sstevel@tonic-gate 	while (uiop->uio_iovcnt > 0) {
2057c478bd9Sstevel@tonic-gate 		iovp = uiop->uio_iov;
2067c478bd9Sstevel@tonic-gate 		if (iovp->iov_len <= 0) {
2077c478bd9Sstevel@tonic-gate 			uiop->uio_iovcnt--;
2087c478bd9Sstevel@tonic-gate 			uiop->uio_iov++;
2097c478bd9Sstevel@tonic-gate 		} else
2107c478bd9Sstevel@tonic-gate 			break;
2117c478bd9Sstevel@tonic-gate 	}
2127c478bd9Sstevel@tonic-gate 
2137c478bd9Sstevel@tonic-gate 	if (uiop->uio_iovcnt <= 0)
2147c478bd9Sstevel@tonic-gate 		return (EFAULT);
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate 	/*
2177c478bd9Sstevel@tonic-gate 	 * Transfer character to uio space.
2187c478bd9Sstevel@tonic-gate 	 */
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate 	c = (unsigned char) (val & 0xFF);
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate 	switch (uiop->uio_segflg) {
2237c478bd9Sstevel@tonic-gate 
2247c478bd9Sstevel@tonic-gate 	case UIO_USERISPACE:
2257c478bd9Sstevel@tonic-gate 	case UIO_USERSPACE:
2267c478bd9Sstevel@tonic-gate 		if (copyout(&c, iovp->iov_base, sizeof (unsigned char)))
2277c478bd9Sstevel@tonic-gate 			return (EFAULT);
2287c478bd9Sstevel@tonic-gate 		break;
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 	case UIO_SYSSPACE: /* can do direct copy since kernel-kernel */
2317c478bd9Sstevel@tonic-gate 		*iovp->iov_base = c;
2327c478bd9Sstevel@tonic-gate 		break;
2337c478bd9Sstevel@tonic-gate 
2347c478bd9Sstevel@tonic-gate 	default:
2357c478bd9Sstevel@tonic-gate 		return (EFAULT); /* invalid segflg value */
2367c478bd9Sstevel@tonic-gate 	}
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate 	/*
2397c478bd9Sstevel@tonic-gate 	 * bump up/down iovec and uio members to reflect transfer.
2407c478bd9Sstevel@tonic-gate 	 */
2417c478bd9Sstevel@tonic-gate 	iovp->iov_base++;
2427c478bd9Sstevel@tonic-gate 	iovp->iov_len--;
2437c478bd9Sstevel@tonic-gate 	uiop->uio_resid--;
2447c478bd9Sstevel@tonic-gate 	uiop->uio_loffset++;
2457c478bd9Sstevel@tonic-gate 	return (0); /* success */
2467c478bd9Sstevel@tonic-gate }
2477c478bd9Sstevel@tonic-gate 
2487c478bd9Sstevel@tonic-gate /*
2497c478bd9Sstevel@tonic-gate  * return a character value from the address space
2507c478bd9Sstevel@tonic-gate  * delineated by a uio and update fields within the
2517c478bd9Sstevel@tonic-gate  * uio for next character. Return the character for success,
2527c478bd9Sstevel@tonic-gate  * -1 for error.
2537c478bd9Sstevel@tonic-gate  */
2547c478bd9Sstevel@tonic-gate int
2557c478bd9Sstevel@tonic-gate uwritec(struct uio *uiop)
2567c478bd9Sstevel@tonic-gate {
2577c478bd9Sstevel@tonic-gate 	struct iovec *iovp;
2587c478bd9Sstevel@tonic-gate 	unsigned char c;
2597c478bd9Sstevel@tonic-gate 
2607c478bd9Sstevel@tonic-gate 	/*
2617c478bd9Sstevel@tonic-gate 	 * verify we were passed a valid uio structure.
2627c478bd9Sstevel@tonic-gate 	 * (1) non-NULL uiop, (2) positive resid count
2637c478bd9Sstevel@tonic-gate 	 * (3) there is an iovec with positive length
2647c478bd9Sstevel@tonic-gate 	 */
2657c478bd9Sstevel@tonic-gate 
2667c478bd9Sstevel@tonic-gate 	if (!(uiop && uiop->uio_resid > 0))
2677c478bd9Sstevel@tonic-gate 		return (-1);
2687c478bd9Sstevel@tonic-gate 
2697c478bd9Sstevel@tonic-gate 	while (uiop->uio_iovcnt > 0) {
2707c478bd9Sstevel@tonic-gate 		iovp = uiop->uio_iov;
2717c478bd9Sstevel@tonic-gate 		if (iovp->iov_len <= 0) {
2727c478bd9Sstevel@tonic-gate 			uiop->uio_iovcnt--;
2737c478bd9Sstevel@tonic-gate 			uiop->uio_iov++;
2747c478bd9Sstevel@tonic-gate 		} else
2757c478bd9Sstevel@tonic-gate 			break;
2767c478bd9Sstevel@tonic-gate 	}
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate 	if (uiop->uio_iovcnt <= 0)
2797c478bd9Sstevel@tonic-gate 		return (-1);
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate 	/*
2827c478bd9Sstevel@tonic-gate 	 * Get the character from the uio address space.
2837c478bd9Sstevel@tonic-gate 	 */
2847c478bd9Sstevel@tonic-gate 	switch (uiop->uio_segflg) {
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	case UIO_USERISPACE:
2877c478bd9Sstevel@tonic-gate 	case UIO_USERSPACE:
2887c478bd9Sstevel@tonic-gate 		if (copyin(iovp->iov_base, &c, sizeof (unsigned char)))
2897c478bd9Sstevel@tonic-gate 			return (-1);
2907c478bd9Sstevel@tonic-gate 		break;
2917c478bd9Sstevel@tonic-gate 
2927c478bd9Sstevel@tonic-gate 	case UIO_SYSSPACE:
2937c478bd9Sstevel@tonic-gate 		c = *iovp->iov_base;
2947c478bd9Sstevel@tonic-gate 		break;
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate 	default:
2977c478bd9Sstevel@tonic-gate 		return (-1); /* invalid segflg */
2987c478bd9Sstevel@tonic-gate 	}
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 	/*
3017c478bd9Sstevel@tonic-gate 	 * Adjust fields of iovec and uio appropriately.
3027c478bd9Sstevel@tonic-gate 	 */
3037c478bd9Sstevel@tonic-gate 	iovp->iov_base++;
3047c478bd9Sstevel@tonic-gate 	iovp->iov_len--;
3057c478bd9Sstevel@tonic-gate 	uiop->uio_resid--;
3067c478bd9Sstevel@tonic-gate 	uiop->uio_loffset++;
3077c478bd9Sstevel@tonic-gate 	return ((int)c & 0xFF); /* success */
3087c478bd9Sstevel@tonic-gate }
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate /*
3117c478bd9Sstevel@tonic-gate  * Drop the next n chars out of *uiop.
3127c478bd9Sstevel@tonic-gate  */
3137c478bd9Sstevel@tonic-gate void
3147c478bd9Sstevel@tonic-gate uioskip(uio_t *uiop, size_t n)
3157c478bd9Sstevel@tonic-gate {
3167c478bd9Sstevel@tonic-gate 	if (n > uiop->uio_resid)
3177c478bd9Sstevel@tonic-gate 		return;
3187c478bd9Sstevel@tonic-gate 	while (n != 0) {
3197c478bd9Sstevel@tonic-gate 		register iovec_t	*iovp = uiop->uio_iov;
3207c478bd9Sstevel@tonic-gate 		register size_t		niovb = MIN(iovp->iov_len, n);
3217c478bd9Sstevel@tonic-gate 
3227c478bd9Sstevel@tonic-gate 		if (niovb == 0) {
3237c478bd9Sstevel@tonic-gate 			uiop->uio_iov++;
3247c478bd9Sstevel@tonic-gate 			uiop->uio_iovcnt--;
3257c478bd9Sstevel@tonic-gate 			continue;
3267c478bd9Sstevel@tonic-gate 		}
3277c478bd9Sstevel@tonic-gate 		iovp->iov_base += niovb;
3287c478bd9Sstevel@tonic-gate 		uiop->uio_loffset += niovb;
3297c478bd9Sstevel@tonic-gate 		iovp->iov_len -= niovb;
3307c478bd9Sstevel@tonic-gate 		uiop->uio_resid -= niovb;
3317c478bd9Sstevel@tonic-gate 		n -= niovb;
3327c478bd9Sstevel@tonic-gate 	}
3337c478bd9Sstevel@tonic-gate }
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate /*
3367c478bd9Sstevel@tonic-gate  * Dup the suio into the duio and diovec of size diov_cnt. If diov
3377c478bd9Sstevel@tonic-gate  * is too small to dup suio then an error will be returned, else 0.
3387c478bd9Sstevel@tonic-gate  */
3397c478bd9Sstevel@tonic-gate int
3407c478bd9Sstevel@tonic-gate uiodup(uio_t *suio, uio_t *duio, iovec_t *diov, int diov_cnt)
3417c478bd9Sstevel@tonic-gate {
3427c478bd9Sstevel@tonic-gate 	int ix;
3437c478bd9Sstevel@tonic-gate 	iovec_t *siov = suio->uio_iov;
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate 	*duio = *suio;
3467c478bd9Sstevel@tonic-gate 	for (ix = 0; ix < suio->uio_iovcnt; ix++) {
3477c478bd9Sstevel@tonic-gate 		diov[ix] = siov[ix];
3487c478bd9Sstevel@tonic-gate 		if (ix >= diov_cnt)
3497c478bd9Sstevel@tonic-gate 			return (1);
3507c478bd9Sstevel@tonic-gate 	}
3517c478bd9Sstevel@tonic-gate 	duio->uio_iov = diov;
3527c478bd9Sstevel@tonic-gate 	return (0);
3537c478bd9Sstevel@tonic-gate }
35417169044Sbrutus 
35517169044Sbrutus /*
35617169044Sbrutus  * Shadow state for checking if a platform has hardware asynchronous
35717169044Sbrutus  * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine,
35817169044Sbrutus  *
35917169044Sbrutus  * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls
36017169044Sbrutus  * into dcopy to register and uioa_dcopy_disable() when the device calls
36117169044Sbrutus  * into dcopy to unregister.
36217169044Sbrutus  */
36317169044Sbrutus uioasync_t uioasync = {B_FALSE, 1024};
36417169044Sbrutus 
36517169044Sbrutus void
36617169044Sbrutus uioa_dcopy_enable()
36717169044Sbrutus {
36817169044Sbrutus 	uioasync.enabled = B_TRUE;
36917169044Sbrutus }
37017169044Sbrutus 
37117169044Sbrutus void
37217169044Sbrutus uioa_dcopy_disable()
37317169044Sbrutus {
37417169044Sbrutus 	uioasync.enabled = B_FALSE;
37517169044Sbrutus }
37617169044Sbrutus 
37717169044Sbrutus /*
37817169044Sbrutus  * Schedule an asynchronous move of "n" bytes at byte address "p",
37917169044Sbrutus  * "rw" indicates the direction of the move, I/O parameters and
38017169044Sbrutus  * async state are provided in "uioa" which is update to reflect
38117169044Sbrutus  * the data which is to be moved.
38217169044Sbrutus  *
38317169044Sbrutus  * Returns 0 on success or a non-zero errno on failure.
38417169044Sbrutus  *
38517169044Sbrutus  * Note, while the uioasync APIs are general purpose in design
38617169044Sbrutus  * the current implementation is Intel I/OAT specific.
38717169044Sbrutus  */
38817169044Sbrutus int
38917169044Sbrutus uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa)
39017169044Sbrutus {
39117169044Sbrutus 	int		soff, doff;
39217169044Sbrutus 	uint64_t	pa;
39317169044Sbrutus 	int		cnt;
39417169044Sbrutus 	iovec_t		*iov;
39517169044Sbrutus 	dcopy_handle_t	channel;
39617169044Sbrutus 	dcopy_cmd_t	cmd;
39717169044Sbrutus 	int		ret = 0;
39817169044Sbrutus 	int		dcopy_flags;
39917169044Sbrutus 
40017169044Sbrutus 	if (!(uioa->uioa_state & UIOA_ENABLED)) {
40117169044Sbrutus 		/* The uioa_t isn't enabled */
40217169044Sbrutus 		return (ENXIO);
40317169044Sbrutus 	}
40417169044Sbrutus 
40517169044Sbrutus 	if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) {
40617169044Sbrutus 		/* Only support to user-land from kernel */
40717169044Sbrutus 		return (ENOTSUP);
40817169044Sbrutus 	}
40917169044Sbrutus 
41017169044Sbrutus 
41117169044Sbrutus 	channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL];
41217169044Sbrutus 	cmd = uioa->uioa_hwst[UIO_DCOPY_CMD];
41317169044Sbrutus 	dcopy_flags = DCOPY_NOSLEEP;
41417169044Sbrutus 
41517169044Sbrutus 	/*
41617169044Sbrutus 	 * While source bytes and destination bytes.
41717169044Sbrutus 	 */
41817169044Sbrutus 	while (n > 0 && uioa->uio_resid > 0) {
41917169044Sbrutus 		iov = uioa->uio_iov;
42017169044Sbrutus 		if (iov->iov_len == 0l) {
42117169044Sbrutus 			uioa->uio_iov++;
42217169044Sbrutus 			uioa->uio_iovcnt--;
42317169044Sbrutus 			uioa->uioa_lcur++;
42417169044Sbrutus 			uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp;
42517169044Sbrutus 			continue;
42617169044Sbrutus 		}
42717169044Sbrutus 		/*
42817169044Sbrutus 		 * While source bytes schedule an async
42917169044Sbrutus 		 * dma for destination page by page.
43017169044Sbrutus 		 */
43117169044Sbrutus 		while (n > 0) {
43217169044Sbrutus 			/* Addr offset in page src/dst */
43317169044Sbrutus 			soff = (uintptr_t)p & PAGEOFFSET;
43417169044Sbrutus 			doff = (uintptr_t)iov->iov_base & PAGEOFFSET;
43517169044Sbrutus 			/* Min copy count src and dst and page sized */
43617169044Sbrutus 			cnt = MIN(n, iov->iov_len);
43717169044Sbrutus 			cnt = MIN(cnt, PAGESIZE - soff);
43817169044Sbrutus 			cnt = MIN(cnt, PAGESIZE - doff);
43917169044Sbrutus 			/* XXX if next page(s) contiguous could use multipage */
44017169044Sbrutus 
44117169044Sbrutus 			/*
44217169044Sbrutus 			 * if we have an old command, we want to link all
44317169044Sbrutus 			 * other commands to the next command we alloced so
44417169044Sbrutus 			 * we only need to track the last command but can
44517169044Sbrutus 			 * still free them all.
44617169044Sbrutus 			 */
44717169044Sbrutus 			if (cmd != NULL) {
44817169044Sbrutus 				dcopy_flags |= DCOPY_ALLOC_LINK;
44917169044Sbrutus 			}
45017169044Sbrutus 			ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd);
45117169044Sbrutus 			if (ret != DCOPY_SUCCESS) {
45217169044Sbrutus 				/* Error of some sort */
45317169044Sbrutus 				return (EIO);
45417169044Sbrutus 			}
45517169044Sbrutus 			uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd;
45617169044Sbrutus 
45717169044Sbrutus 			ASSERT(cmd->dp_version == DCOPY_CMD_V0);
45817169044Sbrutus 			if (uioa_maxpoll >= 0) {
45917169044Sbrutus 				/* Blocking (>0 may be) used in uioafini() */
46017169044Sbrutus 				cmd->dp_flags = DCOPY_CMD_INTR;
46117169044Sbrutus 			} else {
46217169044Sbrutus 				/* Non blocking uioafini() so no intr */
46317169044Sbrutus 				cmd->dp_flags = DCOPY_CMD_NOFLAGS;
46417169044Sbrutus 			}
46517169044Sbrutus 			cmd->dp_cmd = DCOPY_CMD_COPY;
46617169044Sbrutus 			pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p));
46717169044Sbrutus 			cmd->dp.copy.cc_source = pa + soff;
46817169044Sbrutus 			if (uioa->uioa_lcur->uioa_pfncnt == 0) {
46917169044Sbrutus 				/* Have a (page_t **) */
47017169044Sbrutus 				pa = ptob((uint64_t)(
47117169044Sbrutus 				    *(page_t **)uioa->uioa_lppp)->p_pagenum);
47217169044Sbrutus 			} else {
47317169044Sbrutus 				/* Have a (pfn_t *) */
47417169044Sbrutus 				pa = ptob((uint64_t)(
47517169044Sbrutus 				    *(pfn_t *)uioa->uioa_lppp));
47617169044Sbrutus 			}
47717169044Sbrutus 			cmd->dp.copy.cc_dest = pa + doff;
47817169044Sbrutus 			cmd->dp.copy.cc_size = cnt;
47917169044Sbrutus 			ret = dcopy_cmd_post(cmd);
48017169044Sbrutus 			if (ret != DCOPY_SUCCESS) {
48117169044Sbrutus 				/* Error of some sort */
48217169044Sbrutus 				return (EIO);
48317169044Sbrutus 			}
48417169044Sbrutus 			ret = 0;
48517169044Sbrutus 
48617169044Sbrutus 			/* If UIOA_POLL not set, set it */
48717169044Sbrutus 			if (!(uioa->uioa_state & UIOA_POLL))
48817169044Sbrutus 				uioa->uioa_state |= UIOA_POLL;
48917169044Sbrutus 
49017169044Sbrutus 			/* Update iov, uio, and local pointers/counters */
49117169044Sbrutus 			iov->iov_base += cnt;
49217169044Sbrutus 			iov->iov_len -= cnt;
49317169044Sbrutus 			uioa->uio_resid -= cnt;
49496e0e3daSYu Xiangning 			uioa->uioa_mbytes += cnt;
49517169044Sbrutus 			uioa->uio_loffset += cnt;
49617169044Sbrutus 			p = (caddr_t)p + cnt;
49717169044Sbrutus 			n -= cnt;
49817169044Sbrutus 
49917169044Sbrutus 			/* End of iovec? */
50017169044Sbrutus 			if (iov->iov_len == 0) {
50117169044Sbrutus 				/* Yup, next iovec */
50217169044Sbrutus 				break;
50317169044Sbrutus 			}
50417169044Sbrutus 
50517169044Sbrutus 			/* Next dst addr page? */
50617169044Sbrutus 			if (doff + cnt == PAGESIZE) {
50717169044Sbrutus 				/* Yup, next page_t */
50817169044Sbrutus 				uioa->uioa_lppp++;
50917169044Sbrutus 			}
51017169044Sbrutus 		}
51117169044Sbrutus 	}
51217169044Sbrutus 
51317169044Sbrutus 	return (ret);
51417169044Sbrutus }
51517169044Sbrutus 
51617169044Sbrutus /*
51717169044Sbrutus  * Initialize a uioa_t for a given uio_t for the current user context,
51817169044Sbrutus  * copy the common uio_t to the uioa_t, walk the shared iovec_t and
51917169044Sbrutus  * lock down the user-land page(s) containing iovec_t data, then mapin
52017169044Sbrutus  * user-land pages using segkpm.
52117169044Sbrutus  */
52217169044Sbrutus int
52317169044Sbrutus uioainit(uio_t *uiop, uioa_t *uioap)
52417169044Sbrutus {
52517169044Sbrutus 	caddr_t	addr;
52617169044Sbrutus 	page_t		**pages;
52717169044Sbrutus 	int		off;
52817169044Sbrutus 	int		len;
52917169044Sbrutus 	proc_t		*procp = ttoproc(curthread);
53017169044Sbrutus 	struct as	*as = procp->p_as;
53117169044Sbrutus 	iovec_t		*iov = uiop->uio_iov;
53217169044Sbrutus 	int32_t		iovcnt = uiop->uio_iovcnt;
53317169044Sbrutus 	uioa_page_t	*locked = uioap->uioa_locked;
53417169044Sbrutus 	dcopy_handle_t	channel;
53517169044Sbrutus 	int		error;
53617169044Sbrutus 
53717169044Sbrutus 	if (! (uioap->uioa_state & UIOA_ALLOC)) {
53817169044Sbrutus 		/* Can only init() a freshly allocated uioa_t */
53917169044Sbrutus 		return (EINVAL);
54017169044Sbrutus 	}
54117169044Sbrutus 
54217169044Sbrutus 	error = dcopy_alloc(DCOPY_NOSLEEP, &channel);
54317169044Sbrutus 	if (error == DCOPY_NORESOURCES) {
54417169044Sbrutus 		/* Turn off uioa */
54517169044Sbrutus 		uioasync.enabled = B_FALSE;
54617169044Sbrutus 		return (ENODEV);
54717169044Sbrutus 	}
54817169044Sbrutus 	if (error != DCOPY_SUCCESS) {
54917169044Sbrutus 		/* Alloc failed */
55017169044Sbrutus 		return (EIO);
55117169044Sbrutus 	}
55217169044Sbrutus 
55317169044Sbrutus 	uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel;
55417169044Sbrutus 	uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
55517169044Sbrutus 
55617169044Sbrutus 	/* Indicate uioa_t (will be) initialized */
55717169044Sbrutus 	uioap->uioa_state = UIOA_INIT;
55817169044Sbrutus 
55996e0e3daSYu Xiangning 	uioap->uioa_mbytes = 0;
56096e0e3daSYu Xiangning 
56196e0e3daSYu Xiangning 	uioap->uioa_mbytes = 0;
56296e0e3daSYu Xiangning 
56317169044Sbrutus 	/* uio_t/uioa_t uio_t common struct copy */
56417169044Sbrutus 	*((uio_t *)uioap) = *uiop;
56517169044Sbrutus 
56617169044Sbrutus 	/* initialize *uiop->uio_iov */
56717169044Sbrutus 	if (iovcnt > UIOA_IOV_MAX) {
56817169044Sbrutus 		/* Too big? */
56917169044Sbrutus 		return (E2BIG);
57017169044Sbrutus 	}
57117169044Sbrutus 	uioap->uio_iov = iov;
57217169044Sbrutus 	uioap->uio_iovcnt = iovcnt;
57317169044Sbrutus 
57417169044Sbrutus 	/* Mark the uioap as such */
57517169044Sbrutus 	uioap->uio_extflg |= UIO_ASYNC;
57617169044Sbrutus 
57717169044Sbrutus 	/*
57817169044Sbrutus 	 * For each iovec_t, lock-down the page(s) backing the iovec_t
57917169044Sbrutus 	 * and save the page_t list for phys addr use in uioamove().
58017169044Sbrutus 	 */
58117169044Sbrutus 	iov = uiop->uio_iov;
58217169044Sbrutus 	iovcnt = uiop->uio_iovcnt;
58317169044Sbrutus 	while (iovcnt > 0) {
58417169044Sbrutus 		addr = iov->iov_base;
58517169044Sbrutus 		off = (uintptr_t)addr & PAGEOFFSET;
58617169044Sbrutus 		addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
58717169044Sbrutus 		len = iov->iov_len + off;
58817169044Sbrutus 
58917169044Sbrutus 		/* Lock down page(s) for the iov span */
59017169044Sbrutus 		if ((error = as_pagelock(as, &pages,
59117169044Sbrutus 		    iov->iov_base, iov->iov_len, S_WRITE)) != 0) {
59217169044Sbrutus 			/* Error */
59317169044Sbrutus 			goto cleanup;
59417169044Sbrutus 		}
59517169044Sbrutus 
59617169044Sbrutus 		if (pages == NULL) {
59717169044Sbrutus 			/*
59817169044Sbrutus 			 * Need page_t list, really only need
59917169044Sbrutus 			 * a pfn list so build one.
60017169044Sbrutus 			 */
60117169044Sbrutus 			pfn_t   *pfnp;
60217169044Sbrutus 			int	pcnt = len >> PAGESHIFT;
60317169044Sbrutus 
60417169044Sbrutus 			if (off)
60517169044Sbrutus 				pcnt++;
60617169044Sbrutus 			if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp),
60717169044Sbrutus 			    KM_NOSLEEP)) == NULL) {
60817169044Sbrutus 				error = ENOMEM;
60917169044Sbrutus 				goto cleanup;
61017169044Sbrutus 			}
61117169044Sbrutus 			locked->uioa_ppp = (void **)pfnp;
61217169044Sbrutus 			locked->uioa_pfncnt = pcnt;
61317169044Sbrutus 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
61417169044Sbrutus 			while (pcnt-- > 0) {
61517169044Sbrutus 				*pfnp++ = hat_getpfnum(as->a_hat, addr);
61617169044Sbrutus 				addr += PAGESIZE;
61717169044Sbrutus 			}
61817169044Sbrutus 			AS_LOCK_EXIT(as, &as->a_lock);
61917169044Sbrutus 		} else {
62017169044Sbrutus 			/* Have a page_t list, save it */
62117169044Sbrutus 			locked->uioa_ppp = (void **)pages;
62217169044Sbrutus 			locked->uioa_pfncnt = 0;
62317169044Sbrutus 		}
62417169044Sbrutus 		/* Save for as_pageunlock() in uioafini() */
62517169044Sbrutus 		locked->uioa_base = iov->iov_base;
62617169044Sbrutus 		locked->uioa_len = iov->iov_len;
62717169044Sbrutus 		locked++;
62817169044Sbrutus 
62917169044Sbrutus 		/* Next iovec_t */
63017169044Sbrutus 		iov++;
63117169044Sbrutus 		iovcnt--;
63217169044Sbrutus 	}
63317169044Sbrutus 	/* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */
63417169044Sbrutus 	uioap->uioa_lcur = uioap->uioa_locked;
63517169044Sbrutus 	uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp;
63617169044Sbrutus 	return (0);
63717169044Sbrutus 
63817169044Sbrutus cleanup:
63917169044Sbrutus 	/* Unlock any previously locked page_t(s) */
64017169044Sbrutus 	while (locked > uioap->uioa_locked) {
64117169044Sbrutus 		locked--;
64217169044Sbrutus 		as_pageunlock(as, (page_t **)locked->uioa_ppp,
64317169044Sbrutus 		    locked->uioa_base, locked->uioa_len, S_WRITE);
64417169044Sbrutus 	}
64517169044Sbrutus 
64617169044Sbrutus 	/* Last indicate uioa_t still in alloc state */
64717169044Sbrutus 	uioap->uioa_state = UIOA_ALLOC;
64896e0e3daSYu Xiangning 	uioap->uioa_mbytes = 0;
64917169044Sbrutus 
65017169044Sbrutus 	return (error);
65117169044Sbrutus }
65217169044Sbrutus 
65317169044Sbrutus /*
65417169044Sbrutus  * Finish processing of a uioa_t by cleanup any pending "uioap" actions.
65517169044Sbrutus  */
65617169044Sbrutus int
65717169044Sbrutus uioafini(uio_t *uiop, uioa_t *uioap)
65817169044Sbrutus {
65917169044Sbrutus 	int32_t		iovcnt = uiop->uio_iovcnt;
66017169044Sbrutus 	uioa_page_t	*locked = uioap->uioa_locked;
66117169044Sbrutus 	struct as	*as = ttoproc(curthread)->p_as;
66217169044Sbrutus 	dcopy_handle_t	channel;
66317169044Sbrutus 	dcopy_cmd_t	cmd;
66417169044Sbrutus 	int		ret = 0;
66517169044Sbrutus 
66617169044Sbrutus 	ASSERT(uioap->uio_extflg & UIO_ASYNC);
66717169044Sbrutus 
66817169044Sbrutus 	if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) {
66917169044Sbrutus 		/* Must be an active uioa_t */
67017169044Sbrutus 		return (EINVAL);
67117169044Sbrutus 	}
67217169044Sbrutus 
67317169044Sbrutus 	channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL];
67417169044Sbrutus 	cmd = uioap->uioa_hwst[UIO_DCOPY_CMD];
67517169044Sbrutus 
67617169044Sbrutus 	/* XXX - why do we get cmd == NULL sometimes? */
67717169044Sbrutus 	if (cmd != NULL) {
67817169044Sbrutus 		if (uioap->uioa_state & UIOA_POLL) {
67917169044Sbrutus 			/* Wait for last dcopy() to finish */
68017169044Sbrutus 			int64_t poll = 1;
68117169044Sbrutus 			int poll_flag = DCOPY_POLL_NOFLAGS;
68217169044Sbrutus 
68317169044Sbrutus 			do {
68417169044Sbrutus 				if (uioa_maxpoll == 0 ||
68517169044Sbrutus 				    (uioa_maxpoll > 0 &&
68617169044Sbrutus 				    poll >= uioa_maxpoll)) {
68717169044Sbrutus 					/* Always block or after maxpoll */
68817169044Sbrutus 					poll_flag = DCOPY_POLL_BLOCK;
68917169044Sbrutus 				} else {
69017169044Sbrutus 					/* No block, poll */
69117169044Sbrutus 					poll++;
69217169044Sbrutus 				}
69317169044Sbrutus 				ret = dcopy_cmd_poll(cmd, poll_flag);
69417169044Sbrutus 			} while (ret == DCOPY_PENDING);
69517169044Sbrutus 
69617169044Sbrutus 			if (ret == DCOPY_COMPLETED) {
69717169044Sbrutus 				/* Poll/block succeeded */
69817169044Sbrutus 				ret = 0;
69917169044Sbrutus 			} else {
70017169044Sbrutus 				/* Poll/block failed */
70117169044Sbrutus 				ret = EIO;
70217169044Sbrutus 			}
70317169044Sbrutus 		}
70417169044Sbrutus 		dcopy_cmd_free(&cmd);
70517169044Sbrutus 	}
70617169044Sbrutus 
70717169044Sbrutus 	dcopy_free(&channel);
70817169044Sbrutus 
70917169044Sbrutus 	/* Unlock all page(s) iovec_t by iovec_t */
71017169044Sbrutus 	while (iovcnt-- > 0) {
71117169044Sbrutus 		page_t **pages;
71217169044Sbrutus 
71317169044Sbrutus 		if (locked->uioa_pfncnt == 0) {
71417169044Sbrutus 			/* A as_pagelock() returned (page_t **) */
71517169044Sbrutus 			pages = (page_t **)locked->uioa_ppp;
71617169044Sbrutus 		} else {
71717169044Sbrutus 			/* Our pfn_t array */
71817169044Sbrutus 			pages = NULL;
71917169044Sbrutus 			kmem_free(locked->uioa_ppp, locked->uioa_pfncnt *
72017169044Sbrutus 			    sizeof (pfn_t *));
72117169044Sbrutus 		}
72217169044Sbrutus 		as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len,
72317169044Sbrutus 		    S_WRITE);
72417169044Sbrutus 
72517169044Sbrutus 		locked++;
72617169044Sbrutus 	}
72717169044Sbrutus 	/* uioa_t->uio_t common struct copy */
72817169044Sbrutus 	*uiop = *((uio_t *)uioap);
72917169044Sbrutus 
73017169044Sbrutus 	/*
73117169044Sbrutus 	 * Last, reset uioa state to alloc.
73217169044Sbrutus 	 *
73317169044Sbrutus 	 * Note, we only initialize the state here, all other members
73417169044Sbrutus 	 * will be initialized in a subsequent uioainit().
73517169044Sbrutus 	 */
73617169044Sbrutus 	uioap->uioa_state = UIOA_ALLOC;
73796e0e3daSYu Xiangning 	uioap->uioa_mbytes = 0;
73817169044Sbrutus 
73917169044Sbrutus 	uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
74017169044Sbrutus 	uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL;
74117169044Sbrutus 
74217169044Sbrutus 	return (ret);
74317169044Sbrutus }
744