xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c (revision a2464ee12761660f50d0b6f59f233949ebcacc87)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 /*
39  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
40  */
41 
42 #ifdef _KERNEL
43 
44 #include <sys/types.h>
45 #include <sys/uio_impl.h>
46 #include <sys/sysmacros.h>
47 #include <sys/string.h>
48 #include <linux/kmap_compat.h>
49 #include <linux/uaccess.h>
50 
51 /*
52  * Move "n" bytes at byte address "p"; "rw" indicates the direction
53  * of the move, and the I/O parameters are provided in "uio", which is
54  * update to reflect the data which was moved.  Returns 0 on success or
55  * a non-zero errno on failure.
56  */
57 static int
58 zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
59 {
60 	const struct iovec *iov = uio->uio_iov;
61 	size_t skip = uio->uio_skip;
62 	ulong_t cnt;
63 
64 	while (n && uio->uio_resid) {
65 		cnt = MIN(iov->iov_len - skip, n);
66 		switch (uio->uio_segflg) {
67 		case UIO_USERSPACE:
68 			/*
69 			 * p = kernel data pointer
70 			 * iov->iov_base = user data pointer
71 			 */
72 			if (rw == UIO_READ) {
73 				if (copy_to_user(iov->iov_base+skip, p, cnt))
74 					return (EFAULT);
75 			} else {
76 				unsigned long b_left = 0;
77 				if (uio->uio_fault_disable) {
78 					if (!zfs_access_ok(VERIFY_READ,
79 					    (iov->iov_base + skip), cnt)) {
80 						return (EFAULT);
81 					}
82 					pagefault_disable();
83 					b_left =
84 					    __copy_from_user_inatomic(p,
85 					    (iov->iov_base + skip), cnt);
86 					pagefault_enable();
87 				} else {
88 					b_left =
89 					    copy_from_user(p,
90 					    (iov->iov_base + skip), cnt);
91 				}
92 				if (b_left > 0) {
93 					unsigned long c_bytes =
94 					    cnt - b_left;
95 					uio->uio_skip += c_bytes;
96 					ASSERT3U(uio->uio_skip, <,
97 					    iov->iov_len);
98 					uio->uio_resid -= c_bytes;
99 					uio->uio_loffset += c_bytes;
100 					return (EFAULT);
101 				}
102 			}
103 			break;
104 		case UIO_SYSSPACE:
105 			if (rw == UIO_READ)
106 				memcpy(iov->iov_base + skip, p, cnt);
107 			else
108 				memcpy(p, iov->iov_base + skip, cnt);
109 			break;
110 		default:
111 			ASSERT(0);
112 		}
113 		skip += cnt;
114 		if (skip == iov->iov_len) {
115 			skip = 0;
116 			uio->uio_iov = (++iov);
117 			uio->uio_iovcnt--;
118 		}
119 		uio->uio_skip = skip;
120 		uio->uio_resid -= cnt;
121 		uio->uio_loffset += cnt;
122 		p = (caddr_t)p + cnt;
123 		n -= cnt;
124 	}
125 	return (0);
126 }
127 
128 static int
129 zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
130 {
131 	const struct bio_vec *bv = uio->uio_bvec;
132 	size_t skip = uio->uio_skip;
133 	ulong_t cnt;
134 
135 	while (n && uio->uio_resid) {
136 		void *paddr;
137 		cnt = MIN(bv->bv_len - skip, n);
138 
139 		paddr = zfs_kmap_atomic(bv->bv_page);
140 		if (rw == UIO_READ) {
141 			/* Copy from buffer 'p' to the bvec data */
142 			memcpy(paddr + bv->bv_offset + skip, p, cnt);
143 		} else {
144 			/* Copy from bvec data to buffer 'p' */
145 			memcpy(p, paddr + bv->bv_offset + skip, cnt);
146 		}
147 		zfs_kunmap_atomic(paddr);
148 
149 		skip += cnt;
150 		if (skip == bv->bv_len) {
151 			skip = 0;
152 			uio->uio_bvec = (++bv);
153 			uio->uio_iovcnt--;
154 		}
155 		uio->uio_skip = skip;
156 		uio->uio_resid -= cnt;
157 		uio->uio_loffset += cnt;
158 		p = (caddr_t)p + cnt;
159 		n -= cnt;
160 	}
161 	return (0);
162 }
163 
164 #ifdef HAVE_BLK_MQ
165 static void
166 zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
167     struct bio_vec *bv)
168 {
169 	void *paddr;
170 
171 	paddr = zfs_kmap_atomic(bv->bv_page);
172 	if (rw == UIO_READ) {
173 		/* Copy from buffer 'p' to the bvec data */
174 		memcpy(paddr + bv->bv_offset + skip, p, cnt);
175 	} else {
176 		/* Copy from bvec data to buffer 'p' */
177 		memcpy(p, paddr + bv->bv_offset + skip, cnt);
178 	}
179 	zfs_kunmap_atomic(paddr);
180 }
181 
182 /*
183  * Copy 'n' bytes of data between the buffer p[] and the data represented
184  * by the request in the uio.
185  */
186 static int
187 zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
188 {
189 	struct request *rq = uio->rq;
190 	struct bio_vec bv;
191 	struct req_iterator iter;
192 	size_t this_seg_start;	/* logical offset */
193 	size_t this_seg_end;		/* logical offset */
194 	size_t skip_in_seg;
195 	size_t copy_from_seg;
196 	size_t orig_loffset;
197 	int copied = 0;
198 
199 	/*
200 	 * Get the original logical offset of this entire request (because
201 	 * uio->uio_loffset will be modified over time).
202 	 */
203 	orig_loffset = io_offset(NULL, rq);
204 	this_seg_start = orig_loffset;
205 
206 	rq_for_each_segment(bv, rq, iter) {
207 		if (uio->iter.bio) {
208 			/*
209 			 * If uio->iter.bio is present, then we know we've saved
210 			 * uio->iter from a previous call to this function, and
211 			 * we can skip ahead in this rq_for_each_segment() loop
212 			 * to where we last left off.  That way, we don't need
213 			 * to iterate over tons of segments we've already
214 			 * processed - we can just restore the "saved state".
215 			 */
216 			iter = uio->iter;
217 			bv = uio->bv;
218 			this_seg_start = uio->uio_loffset;
219 			memset(&uio->iter, 0, sizeof (uio->iter));
220 			continue;
221 		}
222 
223 		/*
224 		 * Lookup what the logical offset of the last byte of this
225 		 * segment is.
226 		 */
227 		this_seg_end = this_seg_start + bv.bv_len - 1;
228 
229 		/*
230 		 * We only need to operate on segments that have data we're
231 		 * copying.
232 		 */
233 		if (uio->uio_loffset >= this_seg_start &&
234 		    uio->uio_loffset <= this_seg_end) {
235 			/*
236 			 * Some, or all, of the data in this segment needs to be
237 			 * copied.
238 			 */
239 
240 			/*
241 			 * We may be not be copying from the first byte in the
242 			 * segment.  Figure out how many bytes to skip copying
243 			 * from the beginning of this segment.
244 			 */
245 			skip_in_seg = uio->uio_loffset - this_seg_start;
246 
247 			/*
248 			 * Calculate the total number of bytes from this
249 			 * segment that we will be copying.
250 			 */
251 			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
252 
253 			/* Copy the bytes */
254 			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
255 			p = ((char *)p) + copy_from_seg;
256 
257 			n -= copy_from_seg;
258 			uio->uio_resid -= copy_from_seg;
259 			uio->uio_loffset += copy_from_seg;
260 			copied = 1;	/* We copied some data */
261 		}
262 
263 		if (n == 0) {
264 			/*
265 			 * All done copying.  Save our 'iter' value to the uio.
266 			 * This allows us to "save our state" and skip ahead in
267 			 * the rq_for_each_segment() loop the next time we call
268 			 * call zfs_uiomove_bvec_rq() on this uio (which we
269 			 * will be doing for any remaining data in the uio).
270 			 */
271 			uio->iter = iter; /* make a copy of the struct data */
272 			uio->bv = bv;
273 			return (0);
274 		}
275 
276 		this_seg_start = this_seg_end + 1;
277 	}
278 
279 	if (!copied) {
280 		/* Didn't copy anything */
281 		uio->uio_resid = 0;
282 	}
283 	return (0);
284 }
285 #endif
286 
287 static int
288 zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
289 {
290 #ifdef HAVE_BLK_MQ
291 	if (uio->rq != NULL)
292 		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
293 #else
294 	ASSERT3P(uio->rq, ==, NULL);
295 #endif
296 	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
297 }
298 
299 #if defined(HAVE_VFS_IOV_ITER)
300 static int
301 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
302     boolean_t revert)
303 {
304 	size_t cnt = MIN(n, uio->uio_resid);
305 
306 	if (uio->uio_skip)
307 		iov_iter_advance(uio->uio_iter, uio->uio_skip);
308 
309 	if (rw == UIO_READ)
310 		cnt = copy_to_iter(p, cnt, uio->uio_iter);
311 	else
312 		cnt = copy_from_iter(p, cnt, uio->uio_iter);
313 
314 	/*
315 	 * When operating on a full pipe no bytes are processed.
316 	 * In which case return EFAULT which is converted to EAGAIN
317 	 * by the kernel's generic_file_splice_read() function.
318 	 */
319 	if (cnt == 0)
320 		return (EFAULT);
321 
322 	/*
323 	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
324 	 * to avoid consuming the uio and its iov_iter structure.
325 	 */
326 	if (revert)
327 		iov_iter_revert(uio->uio_iter, cnt);
328 
329 	uio->uio_resid -= cnt;
330 	uio->uio_loffset += cnt;
331 
332 	return (0);
333 }
334 #endif
335 
336 int
337 zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
338 {
339 	if (uio->uio_segflg == UIO_BVEC)
340 		return (zfs_uiomove_bvec(p, n, rw, uio));
341 #if defined(HAVE_VFS_IOV_ITER)
342 	else if (uio->uio_segflg == UIO_ITER)
343 		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
344 #endif
345 	else
346 		return (zfs_uiomove_iov(p, n, rw, uio));
347 }
348 EXPORT_SYMBOL(zfs_uiomove);
349 
350 /*
351  * Fault in the pages of the first n bytes specified by the uio structure.
352  * 1 byte in each page is touched and the uio struct is unmodified. Any
353  * error will terminate the process as this is only a best attempt to get
354  * the pages resident.
355  */
356 int
357 zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
358 {
359 	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) {
360 		/* There's never a need to fault in kernel pages */
361 		return (0);
362 #if defined(HAVE_VFS_IOV_ITER)
363 	} else if (uio->uio_segflg == UIO_ITER) {
364 		/*
365 		 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
366 		 * can be relied on to fault in user pages when referenced.
367 		 */
368 		if (iov_iter_fault_in_readable(uio->uio_iter, n))
369 			return (EFAULT);
370 #endif
371 	} else {
372 		/* Fault in all user pages */
373 		ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
374 		const struct iovec *iov = uio->uio_iov;
375 		int iovcnt = uio->uio_iovcnt;
376 		size_t skip = uio->uio_skip;
377 		uint8_t tmp;
378 		caddr_t p;
379 
380 		for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
381 			ulong_t cnt = MIN(iov->iov_len - skip, n);
382 			/* empty iov */
383 			if (cnt == 0)
384 				continue;
385 			n -= cnt;
386 			/* touch each page in this segment. */
387 			p = iov->iov_base + skip;
388 			while (cnt) {
389 				if (copy_from_user(&tmp, p, 1))
390 					return (EFAULT);
391 				ulong_t incr = MIN(cnt, PAGESIZE);
392 				p += incr;
393 				cnt -= incr;
394 			}
395 			/* touch the last byte in case it straddles a page. */
396 			p--;
397 			if (copy_from_user(&tmp, p, 1))
398 				return (EFAULT);
399 		}
400 	}
401 
402 	return (0);
403 }
404 EXPORT_SYMBOL(zfs_uio_prefaultpages);
405 
406 /*
407  * The same as zfs_uiomove() but doesn't modify uio structure.
408  * return in cbytes how many bytes were copied.
409  */
410 int
411 zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
412 {
413 	zfs_uio_t uio_copy;
414 	int ret;
415 
416 	memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
417 
418 	if (uio->uio_segflg == UIO_BVEC)
419 		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
420 #if defined(HAVE_VFS_IOV_ITER)
421 	else if (uio->uio_segflg == UIO_ITER)
422 		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
423 #endif
424 	else
425 		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
426 
427 	*cbytes = uio->uio_resid - uio_copy.uio_resid;
428 
429 	return (ret);
430 }
431 EXPORT_SYMBOL(zfs_uiocopy);
432 
433 /*
434  * Drop the next n chars out of *uio.
435  */
436 void
437 zfs_uioskip(zfs_uio_t *uio, size_t n)
438 {
439 	if (n > uio->uio_resid)
440 		return;
441 	/*
442 	 * When using a uio with a struct request, we simply
443 	 * use uio_loffset as a pointer to the next logical byte to
444 	 * copy in the request.  We don't have to do any fancy
445 	 * accounting with uio_bvec/uio_iovcnt since we don't use
446 	 * them.
447 	 */
448 	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
449 		uio->uio_skip += n;
450 		while (uio->uio_iovcnt &&
451 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
452 			uio->uio_skip -= uio->uio_bvec->bv_len;
453 			uio->uio_bvec++;
454 			uio->uio_iovcnt--;
455 		}
456 #if defined(HAVE_VFS_IOV_ITER)
457 	} else if (uio->uio_segflg == UIO_ITER) {
458 		iov_iter_advance(uio->uio_iter, n);
459 #endif
460 	} else {
461 		uio->uio_skip += n;
462 		while (uio->uio_iovcnt &&
463 		    uio->uio_skip >= uio->uio_iov->iov_len) {
464 			uio->uio_skip -= uio->uio_iov->iov_len;
465 			uio->uio_iov++;
466 			uio->uio_iovcnt--;
467 		}
468 	}
469 	uio->uio_loffset += n;
470 	uio->uio_resid -= n;
471 }
472 EXPORT_SYMBOL(zfs_uioskip);
473 
474 #endif /* _KERNEL */
475