xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c (revision 7a7741af18d6c8a804cc643cb7ecda9d730c6aa6)
1184c1b94SMartin Matuska /*
2184c1b94SMartin Matuska  * CDDL HEADER START
3184c1b94SMartin Matuska  *
4184c1b94SMartin Matuska  * The contents of this file are subject to the terms of the
5184c1b94SMartin Matuska  * Common Development and Distribution License (the "License").
6184c1b94SMartin Matuska  * You may not use this file except in compliance with the License.
7184c1b94SMartin Matuska  *
8184c1b94SMartin Matuska  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10184c1b94SMartin Matuska  * See the License for the specific language governing permissions
11184c1b94SMartin Matuska  * and limitations under the License.
12184c1b94SMartin Matuska  *
13184c1b94SMartin Matuska  * When distributing Covered Code, include this CDDL HEADER in each
14184c1b94SMartin Matuska  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15184c1b94SMartin Matuska  * If applicable, add the following below this CDDL HEADER, with the
16184c1b94SMartin Matuska  * fields enclosed by brackets "[]" replaced with your own identifying
17184c1b94SMartin Matuska  * information: Portions Copyright [yyyy] [name of copyright owner]
18184c1b94SMartin Matuska  *
19184c1b94SMartin Matuska  * CDDL HEADER END
20184c1b94SMartin Matuska  */
21184c1b94SMartin Matuska /*
22184c1b94SMartin Matuska  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23184c1b94SMartin Matuska  * Use is subject to license terms.
24184c1b94SMartin Matuska  */
25184c1b94SMartin Matuska 
26184c1b94SMartin Matuska /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27184c1b94SMartin Matuska /*	  All Rights Reserved	*/
28184c1b94SMartin Matuska 
29184c1b94SMartin Matuska /*
30184c1b94SMartin Matuska  * University Copyright- Copyright (c) 1982, 1986, 1988
31184c1b94SMartin Matuska  * The Regents of the University of California
32184c1b94SMartin Matuska  * All Rights Reserved
33184c1b94SMartin Matuska  *
34184c1b94SMartin Matuska  * University Acknowledgment- Portions of this document are derived from
35184c1b94SMartin Matuska  * software developed by the University of California, Berkeley, and its
36184c1b94SMartin Matuska  * contributors.
37184c1b94SMartin Matuska  */
38184c1b94SMartin Matuska /*
39184c1b94SMartin Matuska  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
40184c1b94SMartin Matuska  */
41184c1b94SMartin Matuska 
42184c1b94SMartin Matuska #ifdef _KERNEL
43184c1b94SMartin Matuska 
44*7a7741afSMartin Matuska #include <sys/errno.h>
45*7a7741afSMartin Matuska #include <sys/vmem.h>
46*7a7741afSMartin Matuska #include <sys/sysmacros.h>
47184c1b94SMartin Matuska #include <sys/types.h>
48184c1b94SMartin Matuska #include <sys/uio_impl.h>
49184c1b94SMartin Matuska #include <sys/sysmacros.h>
50da5137abSMartin Matuska #include <sys/string.h>
51*7a7741afSMartin Matuska #include <sys/zfs_refcount.h>
52*7a7741afSMartin Matuska #include <sys/zfs_debug.h>
53184c1b94SMartin Matuska #include <linux/kmap_compat.h>
54184c1b94SMartin Matuska #include <linux/uaccess.h>
55*7a7741afSMartin Matuska #include <linux/pagemap.h>
56*7a7741afSMartin Matuska #include <linux/mman.h>
57184c1b94SMartin Matuska 
58184c1b94SMartin Matuska /*
59184c1b94SMartin Matuska  * Move "n" bytes at byte address "p"; "rw" indicates the direction
60184c1b94SMartin Matuska  * of the move, and the I/O parameters are provided in "uio", which is
61184c1b94SMartin Matuska  * update to reflect the data which was moved.  Returns 0 on success or
62184c1b94SMartin Matuska  * a non-zero errno on failure.
63184c1b94SMartin Matuska  */
64184c1b94SMartin Matuska static int
zfs_uiomove_iov(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)65184c1b94SMartin Matuska zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
66184c1b94SMartin Matuska {
67184c1b94SMartin Matuska 	const struct iovec *iov = uio->uio_iov;
68184c1b94SMartin Matuska 	size_t skip = uio->uio_skip;
69184c1b94SMartin Matuska 	ulong_t cnt;
70184c1b94SMartin Matuska 
71184c1b94SMartin Matuska 	while (n && uio->uio_resid) {
72184c1b94SMartin Matuska 		cnt = MIN(iov->iov_len - skip, n);
73184c1b94SMartin Matuska 		switch (uio->uio_segflg) {
74184c1b94SMartin Matuska 		case UIO_USERSPACE:
75184c1b94SMartin Matuska 			/*
76184c1b94SMartin Matuska 			 * p = kernel data pointer
77184c1b94SMartin Matuska 			 * iov->iov_base = user data pointer
78184c1b94SMartin Matuska 			 */
79184c1b94SMartin Matuska 			if (rw == UIO_READ) {
80184c1b94SMartin Matuska 				if (copy_to_user(iov->iov_base+skip, p, cnt))
81184c1b94SMartin Matuska 					return (EFAULT);
82184c1b94SMartin Matuska 			} else {
83184c1b94SMartin Matuska 				unsigned long b_left = 0;
84184c1b94SMartin Matuska 				if (uio->uio_fault_disable) {
85184c1b94SMartin Matuska 					if (!zfs_access_ok(VERIFY_READ,
86184c1b94SMartin Matuska 					    (iov->iov_base + skip), cnt)) {
87184c1b94SMartin Matuska 						return (EFAULT);
88184c1b94SMartin Matuska 					}
89184c1b94SMartin Matuska 					pagefault_disable();
90184c1b94SMartin Matuska 					b_left =
91184c1b94SMartin Matuska 					    __copy_from_user_inatomic(p,
92184c1b94SMartin Matuska 					    (iov->iov_base + skip), cnt);
93184c1b94SMartin Matuska 					pagefault_enable();
94184c1b94SMartin Matuska 				} else {
95184c1b94SMartin Matuska 					b_left =
96184c1b94SMartin Matuska 					    copy_from_user(p,
97184c1b94SMartin Matuska 					    (iov->iov_base + skip), cnt);
98184c1b94SMartin Matuska 				}
99184c1b94SMartin Matuska 				if (b_left > 0) {
100184c1b94SMartin Matuska 					unsigned long c_bytes =
101184c1b94SMartin Matuska 					    cnt - b_left;
102184c1b94SMartin Matuska 					uio->uio_skip += c_bytes;
103184c1b94SMartin Matuska 					ASSERT3U(uio->uio_skip, <,
104184c1b94SMartin Matuska 					    iov->iov_len);
105184c1b94SMartin Matuska 					uio->uio_resid -= c_bytes;
106184c1b94SMartin Matuska 					uio->uio_loffset += c_bytes;
107184c1b94SMartin Matuska 					return (EFAULT);
108184c1b94SMartin Matuska 				}
109184c1b94SMartin Matuska 			}
110184c1b94SMartin Matuska 			break;
111184c1b94SMartin Matuska 		case UIO_SYSSPACE:
112184c1b94SMartin Matuska 			if (rw == UIO_READ)
113da5137abSMartin Matuska 				memcpy(iov->iov_base + skip, p, cnt);
114184c1b94SMartin Matuska 			else
115da5137abSMartin Matuska 				memcpy(p, iov->iov_base + skip, cnt);
116184c1b94SMartin Matuska 			break;
117184c1b94SMartin Matuska 		default:
118184c1b94SMartin Matuska 			ASSERT(0);
119184c1b94SMartin Matuska 		}
120184c1b94SMartin Matuska 		skip += cnt;
121184c1b94SMartin Matuska 		if (skip == iov->iov_len) {
122184c1b94SMartin Matuska 			skip = 0;
123184c1b94SMartin Matuska 			uio->uio_iov = (++iov);
124184c1b94SMartin Matuska 			uio->uio_iovcnt--;
125184c1b94SMartin Matuska 		}
126184c1b94SMartin Matuska 		uio->uio_skip = skip;
127184c1b94SMartin Matuska 		uio->uio_resid -= cnt;
128184c1b94SMartin Matuska 		uio->uio_loffset += cnt;
129184c1b94SMartin Matuska 		p = (caddr_t)p + cnt;
130184c1b94SMartin Matuska 		n -= cnt;
131184c1b94SMartin Matuska 	}
132184c1b94SMartin Matuska 	return (0);
133184c1b94SMartin Matuska }
134184c1b94SMartin Matuska 
135184c1b94SMartin Matuska static int
zfs_uiomove_bvec_impl(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)1361f1e2261SMartin Matuska zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
137184c1b94SMartin Matuska {
138184c1b94SMartin Matuska 	const struct bio_vec *bv = uio->uio_bvec;
139184c1b94SMartin Matuska 	size_t skip = uio->uio_skip;
140184c1b94SMartin Matuska 	ulong_t cnt;
141184c1b94SMartin Matuska 
142184c1b94SMartin Matuska 	while (n && uio->uio_resid) {
143184c1b94SMartin Matuska 		void *paddr;
144184c1b94SMartin Matuska 		cnt = MIN(bv->bv_len - skip, n);
145184c1b94SMartin Matuska 
14675e1fea6SMartin Matuska 		paddr = zfs_kmap_local(bv->bv_page);
1471f1e2261SMartin Matuska 		if (rw == UIO_READ) {
1481f1e2261SMartin Matuska 			/* Copy from buffer 'p' to the bvec data */
149da5137abSMartin Matuska 			memcpy(paddr + bv->bv_offset + skip, p, cnt);
1501f1e2261SMartin Matuska 		} else {
1511f1e2261SMartin Matuska 			/* Copy from bvec data to buffer 'p' */
152da5137abSMartin Matuska 			memcpy(p, paddr + bv->bv_offset + skip, cnt);
1531f1e2261SMartin Matuska 		}
15475e1fea6SMartin Matuska 		zfs_kunmap_local(paddr);
155184c1b94SMartin Matuska 
156184c1b94SMartin Matuska 		skip += cnt;
157184c1b94SMartin Matuska 		if (skip == bv->bv_len) {
158184c1b94SMartin Matuska 			skip = 0;
159184c1b94SMartin Matuska 			uio->uio_bvec = (++bv);
160184c1b94SMartin Matuska 			uio->uio_iovcnt--;
161184c1b94SMartin Matuska 		}
162184c1b94SMartin Matuska 		uio->uio_skip = skip;
163184c1b94SMartin Matuska 		uio->uio_resid -= cnt;
164184c1b94SMartin Matuska 		uio->uio_loffset += cnt;
165184c1b94SMartin Matuska 		p = (caddr_t)p + cnt;
166184c1b94SMartin Matuska 		n -= cnt;
167184c1b94SMartin Matuska 	}
168184c1b94SMartin Matuska 	return (0);
169184c1b94SMartin Matuska }
170184c1b94SMartin Matuska 
1711f1e2261SMartin Matuska static void
zfs_copy_bvec(void * p,size_t skip,size_t cnt,zfs_uio_rw_t rw,struct bio_vec * bv)1721f1e2261SMartin Matuska zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
1731f1e2261SMartin Matuska     struct bio_vec *bv)
1741f1e2261SMartin Matuska {
1751f1e2261SMartin Matuska 	void *paddr;
1761f1e2261SMartin Matuska 
17775e1fea6SMartin Matuska 	paddr = zfs_kmap_local(bv->bv_page);
1781f1e2261SMartin Matuska 	if (rw == UIO_READ) {
1791f1e2261SMartin Matuska 		/* Copy from buffer 'p' to the bvec data */
1801f1e2261SMartin Matuska 		memcpy(paddr + bv->bv_offset + skip, p, cnt);
1811f1e2261SMartin Matuska 	} else {
1821f1e2261SMartin Matuska 		/* Copy from bvec data to buffer 'p' */
1831f1e2261SMartin Matuska 		memcpy(p, paddr + bv->bv_offset + skip, cnt);
1841f1e2261SMartin Matuska 	}
18575e1fea6SMartin Matuska 	zfs_kunmap_local(paddr);
1861f1e2261SMartin Matuska }
1871f1e2261SMartin Matuska 
1881f1e2261SMartin Matuska /*
1891f1e2261SMartin Matuska  * Copy 'n' bytes of data between the buffer p[] and the data represented
1901f1e2261SMartin Matuska  * by the request in the uio.
1911f1e2261SMartin Matuska  */
1921f1e2261SMartin Matuska static int
zfs_uiomove_bvec_rq(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)1931f1e2261SMartin Matuska zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
1941f1e2261SMartin Matuska {
1951f1e2261SMartin Matuska 	struct request *rq = uio->rq;
1961f1e2261SMartin Matuska 	struct bio_vec bv;
1971f1e2261SMartin Matuska 	struct req_iterator iter;
1981f1e2261SMartin Matuska 	size_t this_seg_start;	/* logical offset */
1991f1e2261SMartin Matuska 	size_t this_seg_end;		/* logical offset */
2001f1e2261SMartin Matuska 	size_t skip_in_seg;
2011f1e2261SMartin Matuska 	size_t copy_from_seg;
2021f1e2261SMartin Matuska 	size_t orig_loffset;
2031f1e2261SMartin Matuska 	int copied = 0;
2041f1e2261SMartin Matuska 
2051f1e2261SMartin Matuska 	/*
2061f1e2261SMartin Matuska 	 * Get the original logical offset of this entire request (because
2071f1e2261SMartin Matuska 	 * uio->uio_loffset will be modified over time).
2081f1e2261SMartin Matuska 	 */
2091f1e2261SMartin Matuska 	orig_loffset = io_offset(NULL, rq);
2101f1e2261SMartin Matuska 	this_seg_start = orig_loffset;
2111f1e2261SMartin Matuska 
2121f1e2261SMartin Matuska 	rq_for_each_segment(bv, rq, iter) {
2131f1e2261SMartin Matuska 		/*
2141f1e2261SMartin Matuska 		 * Lookup what the logical offset of the last byte of this
2151f1e2261SMartin Matuska 		 * segment is.
2161f1e2261SMartin Matuska 		 */
2171f1e2261SMartin Matuska 		this_seg_end = this_seg_start + bv.bv_len - 1;
2181f1e2261SMartin Matuska 
2191f1e2261SMartin Matuska 		/*
2201f1e2261SMartin Matuska 		 * We only need to operate on segments that have data we're
2211f1e2261SMartin Matuska 		 * copying.
2221f1e2261SMartin Matuska 		 */
2231f1e2261SMartin Matuska 		if (uio->uio_loffset >= this_seg_start &&
2241f1e2261SMartin Matuska 		    uio->uio_loffset <= this_seg_end) {
2251f1e2261SMartin Matuska 			/*
2261f1e2261SMartin Matuska 			 * Some, or all, of the data in this segment needs to be
2271f1e2261SMartin Matuska 			 * copied.
2281f1e2261SMartin Matuska 			 */
2291f1e2261SMartin Matuska 
2301f1e2261SMartin Matuska 			/*
2311f1e2261SMartin Matuska 			 * We may be not be copying from the first byte in the
2321f1e2261SMartin Matuska 			 * segment.  Figure out how many bytes to skip copying
2331f1e2261SMartin Matuska 			 * from the beginning of this segment.
2341f1e2261SMartin Matuska 			 */
2351f1e2261SMartin Matuska 			skip_in_seg = uio->uio_loffset - this_seg_start;
2361f1e2261SMartin Matuska 
2371f1e2261SMartin Matuska 			/*
2381f1e2261SMartin Matuska 			 * Calculate the total number of bytes from this
2391f1e2261SMartin Matuska 			 * segment that we will be copying.
2401f1e2261SMartin Matuska 			 */
2411f1e2261SMartin Matuska 			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
2421f1e2261SMartin Matuska 
2431f1e2261SMartin Matuska 			/* Copy the bytes */
2441f1e2261SMartin Matuska 			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
2451f1e2261SMartin Matuska 			p = ((char *)p) + copy_from_seg;
2461f1e2261SMartin Matuska 
2471f1e2261SMartin Matuska 			n -= copy_from_seg;
2481f1e2261SMartin Matuska 			uio->uio_resid -= copy_from_seg;
2491f1e2261SMartin Matuska 			uio->uio_loffset += copy_from_seg;
2501f1e2261SMartin Matuska 			copied = 1;	/* We copied some data */
2511f1e2261SMartin Matuska 		}
2521f1e2261SMartin Matuska 
2531f1e2261SMartin Matuska 		this_seg_start = this_seg_end + 1;
2541f1e2261SMartin Matuska 	}
2551f1e2261SMartin Matuska 
2561f1e2261SMartin Matuska 	if (!copied) {
2571f1e2261SMartin Matuska 		/* Didn't copy anything */
2581f1e2261SMartin Matuska 		uio->uio_resid = 0;
2591f1e2261SMartin Matuska 	}
2601f1e2261SMartin Matuska 	return (0);
2611f1e2261SMartin Matuska }
2621f1e2261SMartin Matuska 
2631f1e2261SMartin Matuska static int
zfs_uiomove_bvec(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)2641f1e2261SMartin Matuska zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
2651f1e2261SMartin Matuska {
2661f1e2261SMartin Matuska 	if (uio->rq != NULL)
2671f1e2261SMartin Matuska 		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
2681f1e2261SMartin Matuska 	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
2691f1e2261SMartin Matuska }
2701f1e2261SMartin Matuska 
271184c1b94SMartin Matuska #if defined(HAVE_VFS_IOV_ITER)
272184c1b94SMartin Matuska static int
zfs_uiomove_iter(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio,boolean_t revert)273184c1b94SMartin Matuska zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
274184c1b94SMartin Matuska     boolean_t revert)
275184c1b94SMartin Matuska {
276184c1b94SMartin Matuska 	size_t cnt = MIN(n, uio->uio_resid);
277184c1b94SMartin Matuska 
278184c1b94SMartin Matuska 	if (uio->uio_skip)
279184c1b94SMartin Matuska 		iov_iter_advance(uio->uio_iter, uio->uio_skip);
280184c1b94SMartin Matuska 
281184c1b94SMartin Matuska 	if (rw == UIO_READ)
282184c1b94SMartin Matuska 		cnt = copy_to_iter(p, cnt, uio->uio_iter);
283184c1b94SMartin Matuska 	else
284184c1b94SMartin Matuska 		cnt = copy_from_iter(p, cnt, uio->uio_iter);
285184c1b94SMartin Matuska 
286184c1b94SMartin Matuska 	/*
287184c1b94SMartin Matuska 	 * When operating on a full pipe no bytes are processed.
288184c1b94SMartin Matuska 	 * In which case return EFAULT which is converted to EAGAIN
289184c1b94SMartin Matuska 	 * by the kernel's generic_file_splice_read() function.
290184c1b94SMartin Matuska 	 */
291184c1b94SMartin Matuska 	if (cnt == 0)
292184c1b94SMartin Matuska 		return (EFAULT);
293184c1b94SMartin Matuska 
294184c1b94SMartin Matuska 	/*
295184c1b94SMartin Matuska 	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
296184c1b94SMartin Matuska 	 * to avoid consuming the uio and its iov_iter structure.
297184c1b94SMartin Matuska 	 */
298184c1b94SMartin Matuska 	if (revert)
299184c1b94SMartin Matuska 		iov_iter_revert(uio->uio_iter, cnt);
300184c1b94SMartin Matuska 
301184c1b94SMartin Matuska 	uio->uio_resid -= cnt;
302184c1b94SMartin Matuska 	uio->uio_loffset += cnt;
303184c1b94SMartin Matuska 
304184c1b94SMartin Matuska 	return (0);
305184c1b94SMartin Matuska }
306184c1b94SMartin Matuska #endif
307184c1b94SMartin Matuska 
308184c1b94SMartin Matuska int
zfs_uiomove(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)309184c1b94SMartin Matuska zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
310184c1b94SMartin Matuska {
311184c1b94SMartin Matuska 	if (uio->uio_segflg == UIO_BVEC)
312184c1b94SMartin Matuska 		return (zfs_uiomove_bvec(p, n, rw, uio));
313184c1b94SMartin Matuska #if defined(HAVE_VFS_IOV_ITER)
314184c1b94SMartin Matuska 	else if (uio->uio_segflg == UIO_ITER)
315184c1b94SMartin Matuska 		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
316184c1b94SMartin Matuska #endif
317184c1b94SMartin Matuska 	else
318184c1b94SMartin Matuska 		return (zfs_uiomove_iov(p, n, rw, uio));
319184c1b94SMartin Matuska }
320184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uiomove);
321184c1b94SMartin Matuska 
322184c1b94SMartin Matuska /*
323184c1b94SMartin Matuska  * Fault in the pages of the first n bytes specified by the uio structure.
324184c1b94SMartin Matuska  * 1 byte in each page is touched and the uio struct is unmodified. Any
325184c1b94SMartin Matuska  * error will terminate the process as this is only a best attempt to get
326184c1b94SMartin Matuska  * the pages resident.
327184c1b94SMartin Matuska  */
328184c1b94SMartin Matuska int
zfs_uio_prefaultpages(ssize_t n,zfs_uio_t * uio)329184c1b94SMartin Matuska zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
330184c1b94SMartin Matuska {
331*7a7741afSMartin Matuska 	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC ||
332*7a7741afSMartin Matuska 	    (uio->uio_extflg & UIO_DIRECT)) {
333*7a7741afSMartin Matuska 		/*
334*7a7741afSMartin Matuska 		 * There's never a need to fault in kernel pages or Direct I/O
335*7a7741afSMartin Matuska 		 * write pages. Direct I/O write pages have been pinned in so
336*7a7741afSMartin Matuska 		 * there is never a time for these pages a fault will occur.
337*7a7741afSMartin Matuska 		 */
338184c1b94SMartin Matuska 		return (0);
339184c1b94SMartin Matuska #if defined(HAVE_VFS_IOV_ITER)
340184c1b94SMartin Matuska 	} else if (uio->uio_segflg == UIO_ITER) {
341184c1b94SMartin Matuska 		/*
342184c1b94SMartin Matuska 		 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
343184c1b94SMartin Matuska 		 * can be relied on to fault in user pages when referenced.
344184c1b94SMartin Matuska 		 */
345184c1b94SMartin Matuska 		if (iov_iter_fault_in_readable(uio->uio_iter, n))
346184c1b94SMartin Matuska 			return (EFAULT);
347184c1b94SMartin Matuska #endif
348184c1b94SMartin Matuska 	} else {
349184c1b94SMartin Matuska 		/* Fault in all user pages */
350184c1b94SMartin Matuska 		ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
351184c1b94SMartin Matuska 		const struct iovec *iov = uio->uio_iov;
352184c1b94SMartin Matuska 		int iovcnt = uio->uio_iovcnt;
353184c1b94SMartin Matuska 		size_t skip = uio->uio_skip;
354184c1b94SMartin Matuska 		uint8_t tmp;
355184c1b94SMartin Matuska 		caddr_t p;
356184c1b94SMartin Matuska 
357184c1b94SMartin Matuska 		for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
358184c1b94SMartin Matuska 			ulong_t cnt = MIN(iov->iov_len - skip, n);
359184c1b94SMartin Matuska 			/* empty iov */
360184c1b94SMartin Matuska 			if (cnt == 0)
361184c1b94SMartin Matuska 				continue;
362184c1b94SMartin Matuska 			n -= cnt;
363184c1b94SMartin Matuska 			/* touch each page in this segment. */
364184c1b94SMartin Matuska 			p = iov->iov_base + skip;
365184c1b94SMartin Matuska 			while (cnt) {
366716fd348SMartin Matuska 				if (copy_from_user(&tmp, p, 1))
367184c1b94SMartin Matuska 					return (EFAULT);
368184c1b94SMartin Matuska 				ulong_t incr = MIN(cnt, PAGESIZE);
369184c1b94SMartin Matuska 				p += incr;
370184c1b94SMartin Matuska 				cnt -= incr;
371184c1b94SMartin Matuska 			}
372184c1b94SMartin Matuska 			/* touch the last byte in case it straddles a page. */
373184c1b94SMartin Matuska 			p--;
374716fd348SMartin Matuska 			if (copy_from_user(&tmp, p, 1))
375184c1b94SMartin Matuska 				return (EFAULT);
376184c1b94SMartin Matuska 		}
377184c1b94SMartin Matuska 	}
378184c1b94SMartin Matuska 
379184c1b94SMartin Matuska 	return (0);
380184c1b94SMartin Matuska }
381184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uio_prefaultpages);
382184c1b94SMartin Matuska 
383184c1b94SMartin Matuska /*
384184c1b94SMartin Matuska  * The same as zfs_uiomove() but doesn't modify uio structure.
385184c1b94SMartin Matuska  * return in cbytes how many bytes were copied.
386184c1b94SMartin Matuska  */
387184c1b94SMartin Matuska int
zfs_uiocopy(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio,size_t * cbytes)388184c1b94SMartin Matuska zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
389184c1b94SMartin Matuska {
390184c1b94SMartin Matuska 	zfs_uio_t uio_copy;
391184c1b94SMartin Matuska 	int ret;
392184c1b94SMartin Matuska 
393da5137abSMartin Matuska 	memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
394184c1b94SMartin Matuska 
395184c1b94SMartin Matuska 	if (uio->uio_segflg == UIO_BVEC)
396184c1b94SMartin Matuska 		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
397184c1b94SMartin Matuska #if defined(HAVE_VFS_IOV_ITER)
398184c1b94SMartin Matuska 	else if (uio->uio_segflg == UIO_ITER)
399184c1b94SMartin Matuska 		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
400184c1b94SMartin Matuska #endif
401184c1b94SMartin Matuska 	else
402184c1b94SMartin Matuska 		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
403184c1b94SMartin Matuska 
404184c1b94SMartin Matuska 	*cbytes = uio->uio_resid - uio_copy.uio_resid;
405184c1b94SMartin Matuska 
406184c1b94SMartin Matuska 	return (ret);
407184c1b94SMartin Matuska }
408184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uiocopy);
409184c1b94SMartin Matuska 
410184c1b94SMartin Matuska /*
411184c1b94SMartin Matuska  * Drop the next n chars out of *uio.
412184c1b94SMartin Matuska  */
413184c1b94SMartin Matuska void
zfs_uioskip(zfs_uio_t * uio,size_t n)414184c1b94SMartin Matuska zfs_uioskip(zfs_uio_t *uio, size_t n)
415184c1b94SMartin Matuska {
416184c1b94SMartin Matuska 	if (n > uio->uio_resid)
417184c1b94SMartin Matuska 		return;
4181f1e2261SMartin Matuska 	/*
4191f1e2261SMartin Matuska 	 * When using a uio with a struct request, we simply
4201f1e2261SMartin Matuska 	 * use uio_loffset as a pointer to the next logical byte to
4211f1e2261SMartin Matuska 	 * copy in the request.  We don't have to do any fancy
4221f1e2261SMartin Matuska 	 * accounting with uio_bvec/uio_iovcnt since we don't use
4231f1e2261SMartin Matuska 	 * them.
4241f1e2261SMartin Matuska 	 */
4251f1e2261SMartin Matuska 	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
426184c1b94SMartin Matuska 		uio->uio_skip += n;
427184c1b94SMartin Matuska 		while (uio->uio_iovcnt &&
428184c1b94SMartin Matuska 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
429184c1b94SMartin Matuska 			uio->uio_skip -= uio->uio_bvec->bv_len;
430184c1b94SMartin Matuska 			uio->uio_bvec++;
431184c1b94SMartin Matuska 			uio->uio_iovcnt--;
432184c1b94SMartin Matuska 		}
433184c1b94SMartin Matuska #if defined(HAVE_VFS_IOV_ITER)
434184c1b94SMartin Matuska 	} else if (uio->uio_segflg == UIO_ITER) {
435184c1b94SMartin Matuska 		iov_iter_advance(uio->uio_iter, n);
436184c1b94SMartin Matuska #endif
437184c1b94SMartin Matuska 	} else {
438184c1b94SMartin Matuska 		uio->uio_skip += n;
439184c1b94SMartin Matuska 		while (uio->uio_iovcnt &&
440184c1b94SMartin Matuska 		    uio->uio_skip >= uio->uio_iov->iov_len) {
441184c1b94SMartin Matuska 			uio->uio_skip -= uio->uio_iov->iov_len;
442184c1b94SMartin Matuska 			uio->uio_iov++;
443184c1b94SMartin Matuska 			uio->uio_iovcnt--;
444184c1b94SMartin Matuska 		}
445184c1b94SMartin Matuska 	}
446*7a7741afSMartin Matuska 
447184c1b94SMartin Matuska 	uio->uio_loffset += n;
448184c1b94SMartin Matuska 	uio->uio_resid -= n;
449184c1b94SMartin Matuska }
450184c1b94SMartin Matuska EXPORT_SYMBOL(zfs_uioskip);
451184c1b94SMartin Matuska 
452*7a7741afSMartin Matuska /*
453*7a7741afSMartin Matuska  * Check if the uio is page-aligned in memory.
454*7a7741afSMartin Matuska  */
455*7a7741afSMartin Matuska boolean_t
zfs_uio_page_aligned(zfs_uio_t * uio)456*7a7741afSMartin Matuska zfs_uio_page_aligned(zfs_uio_t *uio)
457*7a7741afSMartin Matuska {
458*7a7741afSMartin Matuska 	boolean_t aligned = B_TRUE;
459*7a7741afSMartin Matuska 
460*7a7741afSMartin Matuska 	if (uio->uio_segflg == UIO_USERSPACE ||
461*7a7741afSMartin Matuska 	    uio->uio_segflg == UIO_SYSSPACE) {
462*7a7741afSMartin Matuska 		const struct iovec *iov = uio->uio_iov;
463*7a7741afSMartin Matuska 		size_t skip = uio->uio_skip;
464*7a7741afSMartin Matuska 
465*7a7741afSMartin Matuska 		for (int i = uio->uio_iovcnt; i > 0; iov++, i--) {
466*7a7741afSMartin Matuska 			uintptr_t addr = (uintptr_t)(iov->iov_base + skip);
467*7a7741afSMartin Matuska 			size_t size = iov->iov_len - skip;
468*7a7741afSMartin Matuska 			if ((addr & (PAGE_SIZE - 1)) ||
469*7a7741afSMartin Matuska 			    (size & (PAGE_SIZE - 1))) {
470*7a7741afSMartin Matuska 				aligned = B_FALSE;
471*7a7741afSMartin Matuska 				break;
472*7a7741afSMartin Matuska 			}
473*7a7741afSMartin Matuska 			skip = 0;
474*7a7741afSMartin Matuska 		}
475*7a7741afSMartin Matuska #if defined(HAVE_VFS_IOV_ITER)
476*7a7741afSMartin Matuska 	} else if (uio->uio_segflg == UIO_ITER) {
477*7a7741afSMartin Matuska 		unsigned long alignment =
478*7a7741afSMartin Matuska 		    iov_iter_alignment(uio->uio_iter);
479*7a7741afSMartin Matuska 		aligned = IS_P2ALIGNED(alignment, PAGE_SIZE);
480*7a7741afSMartin Matuska #endif
481*7a7741afSMartin Matuska 	} else {
482*7a7741afSMartin Matuska 		/* Currently not supported */
483*7a7741afSMartin Matuska 		aligned = B_FALSE;
484*7a7741afSMartin Matuska 	}
485*7a7741afSMartin Matuska 
486*7a7741afSMartin Matuska 	return (aligned);
487*7a7741afSMartin Matuska }
488*7a7741afSMartin Matuska 
489*7a7741afSMartin Matuska 
490*7a7741afSMartin Matuska #if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64)
491*7a7741afSMartin Matuska #define	ZFS_MARKEED_PAGE	0x0
492*7a7741afSMartin Matuska #define	IS_ZFS_MARKED_PAGE(_p)	0
493*7a7741afSMartin Matuska #define	zfs_mark_page(_p)
494*7a7741afSMartin Matuska #define	zfs_unmark_page(_p)
495*7a7741afSMartin Matuska #define	IS_ZERO_PAGE(_p)	0
496*7a7741afSMartin Matuska 
497*7a7741afSMartin Matuska #else
498*7a7741afSMartin Matuska /*
499*7a7741afSMartin Matuska  * Mark pages to know if they were allocated to replace ZERO_PAGE() for
500*7a7741afSMartin Matuska  * Direct I/O writes.
501*7a7741afSMartin Matuska  */
502*7a7741afSMartin Matuska #define	ZFS_MARKED_PAGE		0x5a465350414745 /* ASCII: ZFSPAGE */
503*7a7741afSMartin Matuska #define	IS_ZFS_MARKED_PAGE(_p) \
504*7a7741afSMartin Matuska 	(page_private(_p) == (unsigned long)ZFS_MARKED_PAGE)
505*7a7741afSMartin Matuska #define	IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0))
506*7a7741afSMartin Matuska 
507*7a7741afSMartin Matuska static inline void
zfs_mark_page(struct page * page)508*7a7741afSMartin Matuska zfs_mark_page(struct page *page)
509*7a7741afSMartin Matuska {
510*7a7741afSMartin Matuska 	ASSERT3P(page, !=, NULL);
511*7a7741afSMartin Matuska 	get_page(page);
512*7a7741afSMartin Matuska 	SetPagePrivate(page);
513*7a7741afSMartin Matuska 	set_page_private(page, ZFS_MARKED_PAGE);
514*7a7741afSMartin Matuska }
515*7a7741afSMartin Matuska 
516*7a7741afSMartin Matuska static inline void
zfs_unmark_page(struct page * page)517*7a7741afSMartin Matuska zfs_unmark_page(struct page *page)
518*7a7741afSMartin Matuska {
519*7a7741afSMartin Matuska 	ASSERT3P(page, !=, NULL);
520*7a7741afSMartin Matuska 	set_page_private(page, 0UL);
521*7a7741afSMartin Matuska 	ClearPagePrivate(page);
522*7a7741afSMartin Matuska 	put_page(page);
523*7a7741afSMartin Matuska }
524*7a7741afSMartin Matuska #endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */
525*7a7741afSMartin Matuska 
526*7a7741afSMartin Matuska static void
zfs_uio_dio_check_for_zero_page(zfs_uio_t * uio)527*7a7741afSMartin Matuska zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
528*7a7741afSMartin Matuska {
529*7a7741afSMartin Matuska 	ASSERT3P(uio->uio_dio.pages, !=, NULL);
530*7a7741afSMartin Matuska 
531*7a7741afSMartin Matuska 	for (long i = 0; i < uio->uio_dio.npages; i++) {
532*7a7741afSMartin Matuska 		struct page *p = uio->uio_dio.pages[i];
533*7a7741afSMartin Matuska 		lock_page(p);
534*7a7741afSMartin Matuska 
535*7a7741afSMartin Matuska 		if (IS_ZERO_PAGE(p)) {
536*7a7741afSMartin Matuska 			/*
537*7a7741afSMartin Matuska 			 * If the user page points the kernels ZERO_PAGE() a
538*7a7741afSMartin Matuska 			 * new zero filled page will just be allocated so the
539*7a7741afSMartin Matuska 			 * contents of the page can not be changed by the user
540*7a7741afSMartin Matuska 			 * while a Direct I/O write is taking place.
541*7a7741afSMartin Matuska 			 */
542*7a7741afSMartin Matuska 			gfp_t gfp_zero_page  = __GFP_NOWARN | GFP_NOIO |
543*7a7741afSMartin Matuska 			    __GFP_ZERO | GFP_KERNEL;
544*7a7741afSMartin Matuska 
545*7a7741afSMartin Matuska 			ASSERT0(IS_ZFS_MARKED_PAGE(p));
546*7a7741afSMartin Matuska 			unlock_page(p);
547*7a7741afSMartin Matuska 			put_page(p);
548*7a7741afSMartin Matuska 
549*7a7741afSMartin Matuska 			p = __page_cache_alloc(gfp_zero_page);
550*7a7741afSMartin Matuska 			zfs_mark_page(p);
551*7a7741afSMartin Matuska 		} else {
552*7a7741afSMartin Matuska 			unlock_page(p);
553*7a7741afSMartin Matuska 		}
554*7a7741afSMartin Matuska 	}
555*7a7741afSMartin Matuska }
556*7a7741afSMartin Matuska 
557*7a7741afSMartin Matuska void
zfs_uio_free_dio_pages(zfs_uio_t * uio,zfs_uio_rw_t rw)558*7a7741afSMartin Matuska zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
559*7a7741afSMartin Matuska {
560*7a7741afSMartin Matuska 
561*7a7741afSMartin Matuska 	ASSERT(uio->uio_extflg & UIO_DIRECT);
562*7a7741afSMartin Matuska 	ASSERT3P(uio->uio_dio.pages, !=, NULL);
563*7a7741afSMartin Matuska 
564*7a7741afSMartin Matuska 	for (long i = 0; i < uio->uio_dio.npages; i++) {
565*7a7741afSMartin Matuska 		struct page *p = uio->uio_dio.pages[i];
566*7a7741afSMartin Matuska 
567*7a7741afSMartin Matuska 		if (IS_ZFS_MARKED_PAGE(p)) {
568*7a7741afSMartin Matuska 			zfs_unmark_page(p);
569*7a7741afSMartin Matuska 			__free_page(p);
570*7a7741afSMartin Matuska 			continue;
571*7a7741afSMartin Matuska 		}
572*7a7741afSMartin Matuska 
573*7a7741afSMartin Matuska 		put_page(p);
574*7a7741afSMartin Matuska 	}
575*7a7741afSMartin Matuska 
576*7a7741afSMartin Matuska 	vmem_free(uio->uio_dio.pages,
577*7a7741afSMartin Matuska 	    uio->uio_dio.npages * sizeof (struct page *));
578*7a7741afSMartin Matuska }
579*7a7741afSMartin Matuska 
580*7a7741afSMartin Matuska /*
581*7a7741afSMartin Matuska  * zfs_uio_iov_step() is just a modified version of the STEP function of Linux's
582*7a7741afSMartin Matuska  * iov_iter_get_pages().
583*7a7741afSMartin Matuska  */
584*7a7741afSMartin Matuska static int
zfs_uio_iov_step(struct iovec v,zfs_uio_rw_t rw,zfs_uio_t * uio,long * numpages)585*7a7741afSMartin Matuska zfs_uio_iov_step(struct iovec v, zfs_uio_rw_t rw, zfs_uio_t *uio,
586*7a7741afSMartin Matuska     long *numpages)
587*7a7741afSMartin Matuska {
588*7a7741afSMartin Matuska 	unsigned long addr = (unsigned long)(v.iov_base);
589*7a7741afSMartin Matuska 	size_t len = v.iov_len;
590*7a7741afSMartin Matuska 	unsigned long n = DIV_ROUND_UP(len, PAGE_SIZE);
591*7a7741afSMartin Matuska 
592*7a7741afSMartin Matuska 	/*
593*7a7741afSMartin Matuska 	 * read returning FOLL_WRITE is due to the fact that we are stating
594*7a7741afSMartin Matuska 	 * that the kernel will have write access to the user pages. So, when a
595*7a7741afSMartin Matuska 	 * Direct I/O read request is issued, the kernel must write to the user
596*7a7741afSMartin Matuska 	 * pages.
597*7a7741afSMartin Matuska 	 */
598*7a7741afSMartin Matuska 	long res = get_user_pages_unlocked(
599*7a7741afSMartin Matuska 	    P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n,
600*7a7741afSMartin Matuska 	    &uio->uio_dio.pages[uio->uio_dio.npages],
601*7a7741afSMartin Matuska 	    rw == UIO_READ ? FOLL_WRITE : 0);
602*7a7741afSMartin Matuska 	if (res < 0) {
603*7a7741afSMartin Matuska 		return (SET_ERROR(-res));
604*7a7741afSMartin Matuska 	} else if (len != (res * PAGE_SIZE)) {
605*7a7741afSMartin Matuska 		return (SET_ERROR(EFAULT));
606*7a7741afSMartin Matuska 	}
607*7a7741afSMartin Matuska 
608*7a7741afSMartin Matuska 	ASSERT3S(len, ==, res * PAGE_SIZE);
609*7a7741afSMartin Matuska 	*numpages = res;
610*7a7741afSMartin Matuska 	return (0);
611*7a7741afSMartin Matuska }
612*7a7741afSMartin Matuska 
613*7a7741afSMartin Matuska static int
zfs_uio_get_dio_pages_iov(zfs_uio_t * uio,zfs_uio_rw_t rw)614*7a7741afSMartin Matuska zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw)
615*7a7741afSMartin Matuska {
616*7a7741afSMartin Matuska 	const struct iovec *iovp = uio->uio_iov;
617*7a7741afSMartin Matuska 	size_t skip = uio->uio_skip;
618*7a7741afSMartin Matuska 	size_t len = uio->uio_resid - skip;
619*7a7741afSMartin Matuska 
620*7a7741afSMartin Matuska 	ASSERT(uio->uio_segflg != UIO_SYSSPACE);
621*7a7741afSMartin Matuska 
622*7a7741afSMartin Matuska 	for (int i = 0; i < uio->uio_iovcnt; i++) {
623*7a7741afSMartin Matuska 		struct iovec iov;
624*7a7741afSMartin Matuska 		long numpages = 0;
625*7a7741afSMartin Matuska 
626*7a7741afSMartin Matuska 		if (iovp->iov_len == 0) {
627*7a7741afSMartin Matuska 			iovp++;
628*7a7741afSMartin Matuska 			skip = 0;
629*7a7741afSMartin Matuska 			continue;
630*7a7741afSMartin Matuska 		}
631*7a7741afSMartin Matuska 		iov.iov_len = MIN(len, iovp->iov_len - skip);
632*7a7741afSMartin Matuska 		iov.iov_base = iovp->iov_base + skip;
633*7a7741afSMartin Matuska 		int error = zfs_uio_iov_step(iov, rw, uio, &numpages);
634*7a7741afSMartin Matuska 
635*7a7741afSMartin Matuska 		if (error)
636*7a7741afSMartin Matuska 			return (error);
637*7a7741afSMartin Matuska 
638*7a7741afSMartin Matuska 		uio->uio_dio.npages += numpages;
639*7a7741afSMartin Matuska 		len -= iov.iov_len;
640*7a7741afSMartin Matuska 		skip = 0;
641*7a7741afSMartin Matuska 		iovp++;
642*7a7741afSMartin Matuska 	}
643*7a7741afSMartin Matuska 
644*7a7741afSMartin Matuska 	ASSERT0(len);
645*7a7741afSMartin Matuska 
646*7a7741afSMartin Matuska 	return (0);
647*7a7741afSMartin Matuska }
648*7a7741afSMartin Matuska 
649*7a7741afSMartin Matuska #if defined(HAVE_VFS_IOV_ITER)
650*7a7741afSMartin Matuska static int
zfs_uio_get_dio_pages_iov_iter(zfs_uio_t * uio,zfs_uio_rw_t rw)651*7a7741afSMartin Matuska zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
652*7a7741afSMartin Matuska {
653*7a7741afSMartin Matuska 	size_t skip = uio->uio_skip;
654*7a7741afSMartin Matuska 	size_t wanted = uio->uio_resid - uio->uio_skip;
655*7a7741afSMartin Matuska 	ssize_t rollback = 0;
656*7a7741afSMartin Matuska 	ssize_t cnt;
657*7a7741afSMartin Matuska 	unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
658*7a7741afSMartin Matuska 
659*7a7741afSMartin Matuska 	while (wanted) {
660*7a7741afSMartin Matuska #if defined(HAVE_IOV_ITER_GET_PAGES2)
661*7a7741afSMartin Matuska 		cnt = iov_iter_get_pages2(uio->uio_iter,
662*7a7741afSMartin Matuska 		    &uio->uio_dio.pages[uio->uio_dio.npages],
663*7a7741afSMartin Matuska 		    wanted, maxpages, &skip);
664*7a7741afSMartin Matuska #else
665*7a7741afSMartin Matuska 		cnt = iov_iter_get_pages(uio->uio_iter,
666*7a7741afSMartin Matuska 		    &uio->uio_dio.pages[uio->uio_dio.npages],
667*7a7741afSMartin Matuska 		    wanted, maxpages, &skip);
668*7a7741afSMartin Matuska #endif
669*7a7741afSMartin Matuska 		if (cnt < 0) {
670*7a7741afSMartin Matuska 			iov_iter_revert(uio->uio_iter, rollback);
671*7a7741afSMartin Matuska 			return (SET_ERROR(-cnt));
672*7a7741afSMartin Matuska 		}
673*7a7741afSMartin Matuska 		uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE);
674*7a7741afSMartin Matuska 		rollback += cnt;
675*7a7741afSMartin Matuska 		wanted -= cnt;
676*7a7741afSMartin Matuska 		skip = 0;
677*7a7741afSMartin Matuska #if !defined(HAVE_IOV_ITER_GET_PAGES2)
678*7a7741afSMartin Matuska 		/*
679*7a7741afSMartin Matuska 		 * iov_iter_get_pages2() advances the iov_iter on success.
680*7a7741afSMartin Matuska 		 */
681*7a7741afSMartin Matuska 		iov_iter_advance(uio->uio_iter, cnt);
682*7a7741afSMartin Matuska #endif
683*7a7741afSMartin Matuska 
684*7a7741afSMartin Matuska 	}
685*7a7741afSMartin Matuska 	ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip);
686*7a7741afSMartin Matuska 	iov_iter_revert(uio->uio_iter, rollback);
687*7a7741afSMartin Matuska 
688*7a7741afSMartin Matuska 	return (0);
689*7a7741afSMartin Matuska }
690*7a7741afSMartin Matuska #endif /* HAVE_VFS_IOV_ITER */
691*7a7741afSMartin Matuska 
692*7a7741afSMartin Matuska /*
693*7a7741afSMartin Matuska  * This function pins user pages. In the event that the user pages were not
694*7a7741afSMartin Matuska  * successfully pinned an error value is returned.
695*7a7741afSMartin Matuska  *
696*7a7741afSMartin Matuska  * On success, 0 is returned.
697*7a7741afSMartin Matuska  */
698*7a7741afSMartin Matuska int
zfs_uio_get_dio_pages_alloc(zfs_uio_t * uio,zfs_uio_rw_t rw)699*7a7741afSMartin Matuska zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
700*7a7741afSMartin Matuska {
701*7a7741afSMartin Matuska 	int error = 0;
702*7a7741afSMartin Matuska 	long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE);
703*7a7741afSMartin Matuska 	size_t size = npages * sizeof (struct page *);
704*7a7741afSMartin Matuska 
705*7a7741afSMartin Matuska 	if (uio->uio_segflg == UIO_USERSPACE) {
706*7a7741afSMartin Matuska 		uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
707*7a7741afSMartin Matuska 		error = zfs_uio_get_dio_pages_iov(uio, rw);
708*7a7741afSMartin Matuska #if defined(HAVE_VFS_IOV_ITER)
709*7a7741afSMartin Matuska 	} else if (uio->uio_segflg == UIO_ITER) {
710*7a7741afSMartin Matuska 		uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
711*7a7741afSMartin Matuska 		error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
712*7a7741afSMartin Matuska #endif
713*7a7741afSMartin Matuska 	} else {
714*7a7741afSMartin Matuska 		return (SET_ERROR(EOPNOTSUPP));
715*7a7741afSMartin Matuska 	}
716*7a7741afSMartin Matuska 
717*7a7741afSMartin Matuska 	ASSERT3S(uio->uio_dio.npages, >=, 0);
718*7a7741afSMartin Matuska 
719*7a7741afSMartin Matuska 	if (error) {
720*7a7741afSMartin Matuska 		for (long i = 0; i < uio->uio_dio.npages; i++)
721*7a7741afSMartin Matuska 			put_page(uio->uio_dio.pages[i]);
722*7a7741afSMartin Matuska 		vmem_free(uio->uio_dio.pages, size);
723*7a7741afSMartin Matuska 		return (error);
724*7a7741afSMartin Matuska 	} else {
725*7a7741afSMartin Matuska 		ASSERT3S(uio->uio_dio.npages, ==, npages);
726*7a7741afSMartin Matuska 	}
727*7a7741afSMartin Matuska 
728*7a7741afSMartin Matuska 	if (rw == UIO_WRITE) {
729*7a7741afSMartin Matuska 		zfs_uio_dio_check_for_zero_page(uio);
730*7a7741afSMartin Matuska 	}
731*7a7741afSMartin Matuska 
732*7a7741afSMartin Matuska 	uio->uio_extflg |= UIO_DIRECT;
733*7a7741afSMartin Matuska 
734*7a7741afSMartin Matuska 	return (0);
735*7a7741afSMartin Matuska }
736*7a7741afSMartin Matuska 
737184c1b94SMartin Matuska #endif /* _KERNEL */
738