xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c (revision 397e83df75e0fcd0d3fcb95ae4d794cb7600fc89)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 /*
39  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
40  */
41 
42 #ifdef _KERNEL
43 
44 #include <sys/types.h>
45 #include <sys/uio_impl.h>
46 #include <sys/sysmacros.h>
47 #include <sys/string.h>
48 #include <linux/kmap_compat.h>
49 #include <linux/uaccess.h>
50 
51 /*
52  * Move "n" bytes at byte address "p"; "rw" indicates the direction
53  * of the move, and the I/O parameters are provided in "uio", which is
54  * update to reflect the data which was moved.  Returns 0 on success or
55  * a non-zero errno on failure.
56  */
57 static int
58 zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
59 {
60 	const struct iovec *iov = uio->uio_iov;
61 	size_t skip = uio->uio_skip;
62 	ulong_t cnt;
63 
64 	while (n && uio->uio_resid) {
65 		cnt = MIN(iov->iov_len - skip, n);
66 		switch (uio->uio_segflg) {
67 		case UIO_USERSPACE:
68 			/*
69 			 * p = kernel data pointer
70 			 * iov->iov_base = user data pointer
71 			 */
72 			if (rw == UIO_READ) {
73 				if (copy_to_user(iov->iov_base+skip, p, cnt))
74 					return (EFAULT);
75 			} else {
76 				unsigned long b_left = 0;
77 				if (uio->uio_fault_disable) {
78 					if (!zfs_access_ok(VERIFY_READ,
79 					    (iov->iov_base + skip), cnt)) {
80 						return (EFAULT);
81 					}
82 					pagefault_disable();
83 					b_left =
84 					    __copy_from_user_inatomic(p,
85 					    (iov->iov_base + skip), cnt);
86 					pagefault_enable();
87 				} else {
88 					b_left =
89 					    copy_from_user(p,
90 					    (iov->iov_base + skip), cnt);
91 				}
92 				if (b_left > 0) {
93 					unsigned long c_bytes =
94 					    cnt - b_left;
95 					uio->uio_skip += c_bytes;
96 					ASSERT3U(uio->uio_skip, <,
97 					    iov->iov_len);
98 					uio->uio_resid -= c_bytes;
99 					uio->uio_loffset += c_bytes;
100 					return (EFAULT);
101 				}
102 			}
103 			break;
104 		case UIO_SYSSPACE:
105 			if (rw == UIO_READ)
106 				memcpy(iov->iov_base + skip, p, cnt);
107 			else
108 				memcpy(p, iov->iov_base + skip, cnt);
109 			break;
110 		default:
111 			ASSERT(0);
112 		}
113 		skip += cnt;
114 		if (skip == iov->iov_len) {
115 			skip = 0;
116 			uio->uio_iov = (++iov);
117 			uio->uio_iovcnt--;
118 		}
119 		uio->uio_skip = skip;
120 		uio->uio_resid -= cnt;
121 		uio->uio_loffset += cnt;
122 		p = (caddr_t)p + cnt;
123 		n -= cnt;
124 	}
125 	return (0);
126 }
127 
128 static int
129 zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
130 {
131 	const struct bio_vec *bv = uio->uio_bvec;
132 	size_t skip = uio->uio_skip;
133 	ulong_t cnt;
134 
135 	while (n && uio->uio_resid) {
136 		void *paddr;
137 		cnt = MIN(bv->bv_len - skip, n);
138 
139 		paddr = zfs_kmap_atomic(bv->bv_page);
140 		if (rw == UIO_READ) {
141 			/* Copy from buffer 'p' to the bvec data */
142 			memcpy(paddr + bv->bv_offset + skip, p, cnt);
143 		} else {
144 			/* Copy from bvec data to buffer 'p' */
145 			memcpy(p, paddr + bv->bv_offset + skip, cnt);
146 		}
147 		zfs_kunmap_atomic(paddr);
148 
149 		skip += cnt;
150 		if (skip == bv->bv_len) {
151 			skip = 0;
152 			uio->uio_bvec = (++bv);
153 			uio->uio_iovcnt--;
154 		}
155 		uio->uio_skip = skip;
156 		uio->uio_resid -= cnt;
157 		uio->uio_loffset += cnt;
158 		p = (caddr_t)p + cnt;
159 		n -= cnt;
160 	}
161 	return (0);
162 }
163 
164 #ifdef HAVE_BLK_MQ
165 static void
166 zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
167     struct bio_vec *bv)
168 {
169 	void *paddr;
170 
171 	paddr = zfs_kmap_atomic(bv->bv_page);
172 	if (rw == UIO_READ) {
173 		/* Copy from buffer 'p' to the bvec data */
174 		memcpy(paddr + bv->bv_offset + skip, p, cnt);
175 	} else {
176 		/* Copy from bvec data to buffer 'p' */
177 		memcpy(p, paddr + bv->bv_offset + skip, cnt);
178 	}
179 	zfs_kunmap_atomic(paddr);
180 }
181 
182 /*
183  * Copy 'n' bytes of data between the buffer p[] and the data represented
184  * by the request in the uio.
185  */
186 static int
187 zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
188 {
189 	struct request *rq = uio->rq;
190 	struct bio_vec bv;
191 	struct req_iterator iter;
192 	size_t this_seg_start;	/* logical offset */
193 	size_t this_seg_end;		/* logical offset */
194 	size_t skip_in_seg;
195 	size_t copy_from_seg;
196 	size_t orig_loffset;
197 	int copied = 0;
198 
199 	/*
200 	 * Get the original logical offset of this entire request (because
201 	 * uio->uio_loffset will be modified over time).
202 	 */
203 	orig_loffset = io_offset(NULL, rq);
204 	this_seg_start = orig_loffset;
205 
206 	rq_for_each_segment(bv, rq, iter) {
207 		/*
208 		 * Lookup what the logical offset of the last byte of this
209 		 * segment is.
210 		 */
211 		this_seg_end = this_seg_start + bv.bv_len - 1;
212 
213 		/*
214 		 * We only need to operate on segments that have data we're
215 		 * copying.
216 		 */
217 		if (uio->uio_loffset >= this_seg_start &&
218 		    uio->uio_loffset <= this_seg_end) {
219 			/*
220 			 * Some, or all, of the data in this segment needs to be
221 			 * copied.
222 			 */
223 
224 			/*
225 			 * We may be not be copying from the first byte in the
226 			 * segment.  Figure out how many bytes to skip copying
227 			 * from the beginning of this segment.
228 			 */
229 			skip_in_seg = uio->uio_loffset - this_seg_start;
230 
231 			/*
232 			 * Calculate the total number of bytes from this
233 			 * segment that we will be copying.
234 			 */
235 			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
236 
237 			/* Copy the bytes */
238 			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
239 			p = ((char *)p) + copy_from_seg;
240 
241 			n -= copy_from_seg;
242 			uio->uio_resid -= copy_from_seg;
243 			uio->uio_loffset += copy_from_seg;
244 			copied = 1;	/* We copied some data */
245 		}
246 
247 		this_seg_start = this_seg_end + 1;
248 	}
249 
250 	if (!copied) {
251 		/* Didn't copy anything */
252 		uio->uio_resid = 0;
253 	}
254 	return (0);
255 }
256 #endif
257 
258 static int
259 zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
260 {
261 #ifdef HAVE_BLK_MQ
262 	if (uio->rq != NULL)
263 		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
264 #else
265 	ASSERT3P(uio->rq, ==, NULL);
266 #endif
267 	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
268 }
269 
270 #if defined(HAVE_VFS_IOV_ITER)
271 static int
272 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
273     boolean_t revert)
274 {
275 	size_t cnt = MIN(n, uio->uio_resid);
276 
277 	if (uio->uio_skip)
278 		iov_iter_advance(uio->uio_iter, uio->uio_skip);
279 
280 	if (rw == UIO_READ)
281 		cnt = copy_to_iter(p, cnt, uio->uio_iter);
282 	else
283 		cnt = copy_from_iter(p, cnt, uio->uio_iter);
284 
285 	/*
286 	 * When operating on a full pipe no bytes are processed.
287 	 * In which case return EFAULT which is converted to EAGAIN
288 	 * by the kernel's generic_file_splice_read() function.
289 	 */
290 	if (cnt == 0)
291 		return (EFAULT);
292 
293 	/*
294 	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
295 	 * to avoid consuming the uio and its iov_iter structure.
296 	 */
297 	if (revert)
298 		iov_iter_revert(uio->uio_iter, cnt);
299 
300 	uio->uio_resid -= cnt;
301 	uio->uio_loffset += cnt;
302 
303 	return (0);
304 }
305 #endif
306 
307 int
308 zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
309 {
310 	if (uio->uio_segflg == UIO_BVEC)
311 		return (zfs_uiomove_bvec(p, n, rw, uio));
312 #if defined(HAVE_VFS_IOV_ITER)
313 	else if (uio->uio_segflg == UIO_ITER)
314 		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
315 #endif
316 	else
317 		return (zfs_uiomove_iov(p, n, rw, uio));
318 }
319 EXPORT_SYMBOL(zfs_uiomove);
320 
321 /*
322  * Fault in the pages of the first n bytes specified by the uio structure.
323  * 1 byte in each page is touched and the uio struct is unmodified. Any
324  * error will terminate the process as this is only a best attempt to get
325  * the pages resident.
326  */
327 int
328 zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
329 {
330 	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) {
331 		/* There's never a need to fault in kernel pages */
332 		return (0);
333 #if defined(HAVE_VFS_IOV_ITER)
334 	} else if (uio->uio_segflg == UIO_ITER) {
335 		/*
336 		 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
337 		 * can be relied on to fault in user pages when referenced.
338 		 */
339 		if (iov_iter_fault_in_readable(uio->uio_iter, n))
340 			return (EFAULT);
341 #endif
342 	} else {
343 		/* Fault in all user pages */
344 		ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
345 		const struct iovec *iov = uio->uio_iov;
346 		int iovcnt = uio->uio_iovcnt;
347 		size_t skip = uio->uio_skip;
348 		uint8_t tmp;
349 		caddr_t p;
350 
351 		for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
352 			ulong_t cnt = MIN(iov->iov_len - skip, n);
353 			/* empty iov */
354 			if (cnt == 0)
355 				continue;
356 			n -= cnt;
357 			/* touch each page in this segment. */
358 			p = iov->iov_base + skip;
359 			while (cnt) {
360 				if (copy_from_user(&tmp, p, 1))
361 					return (EFAULT);
362 				ulong_t incr = MIN(cnt, PAGESIZE);
363 				p += incr;
364 				cnt -= incr;
365 			}
366 			/* touch the last byte in case it straddles a page. */
367 			p--;
368 			if (copy_from_user(&tmp, p, 1))
369 				return (EFAULT);
370 		}
371 	}
372 
373 	return (0);
374 }
375 EXPORT_SYMBOL(zfs_uio_prefaultpages);
376 
377 /*
378  * The same as zfs_uiomove() but doesn't modify uio structure.
379  * return in cbytes how many bytes were copied.
380  */
381 int
382 zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
383 {
384 	zfs_uio_t uio_copy;
385 	int ret;
386 
387 	memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
388 
389 	if (uio->uio_segflg == UIO_BVEC)
390 		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
391 #if defined(HAVE_VFS_IOV_ITER)
392 	else if (uio->uio_segflg == UIO_ITER)
393 		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
394 #endif
395 	else
396 		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
397 
398 	*cbytes = uio->uio_resid - uio_copy.uio_resid;
399 
400 	return (ret);
401 }
402 EXPORT_SYMBOL(zfs_uiocopy);
403 
404 /*
405  * Drop the next n chars out of *uio.
406  */
407 void
408 zfs_uioskip(zfs_uio_t *uio, size_t n)
409 {
410 	if (n > uio->uio_resid)
411 		return;
412 	/*
413 	 * When using a uio with a struct request, we simply
414 	 * use uio_loffset as a pointer to the next logical byte to
415 	 * copy in the request.  We don't have to do any fancy
416 	 * accounting with uio_bvec/uio_iovcnt since we don't use
417 	 * them.
418 	 */
419 	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
420 		uio->uio_skip += n;
421 		while (uio->uio_iovcnt &&
422 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
423 			uio->uio_skip -= uio->uio_bvec->bv_len;
424 			uio->uio_bvec++;
425 			uio->uio_iovcnt--;
426 		}
427 #if defined(HAVE_VFS_IOV_ITER)
428 	} else if (uio->uio_segflg == UIO_ITER) {
429 		iov_iter_advance(uio->uio_iter, n);
430 #endif
431 	} else {
432 		uio->uio_skip += n;
433 		while (uio->uio_iovcnt &&
434 		    uio->uio_skip >= uio->uio_iov->iov_len) {
435 			uio->uio_skip -= uio->uio_iov->iov_len;
436 			uio->uio_iov++;
437 			uio->uio_iovcnt--;
438 		}
439 	}
440 	uio->uio_loffset += n;
441 	uio->uio_resid -= n;
442 }
443 EXPORT_SYMBOL(zfs_uioskip);
444 
445 #endif /* _KERNEL */
446