xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 /*
39  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
40  */
41 
42 #ifdef _KERNEL
43 
44 #include <sys/errno.h>
45 #include <sys/vmem.h>
46 #include <sys/sysmacros.h>
47 #include <sys/types.h>
48 #include <sys/uio_impl.h>
49 #include <sys/sysmacros.h>
50 #include <sys/string.h>
51 #include <sys/zfs_refcount.h>
52 #include <sys/zfs_debug.h>
53 #include <linux/kmap_compat.h>
54 #include <linux/uaccess.h>
55 #include <linux/pagemap.h>
56 #include <linux/mman.h>
57 
58 /*
59  * Move "n" bytes at byte address "p"; "rw" indicates the direction
60  * of the move, and the I/O parameters are provided in "uio", which is
61  * update to reflect the data which was moved.  Returns 0 on success or
62  * a non-zero errno on failure.
63  */
64 static int
zfs_uiomove_iov(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)65 zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
66 {
67 	const struct iovec *iov = uio->uio_iov;
68 	size_t skip = uio->uio_skip;
69 	ulong_t cnt;
70 
71 	ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE);
72 	while (n && uio->uio_resid) {
73 		cnt = MIN(iov->iov_len - skip, n);
74 		if (rw == UIO_READ)
75 			memcpy(iov->iov_base + skip, p, cnt);
76 		else
77 			memcpy(p, iov->iov_base + skip, cnt);
78 		skip += cnt;
79 		if (skip == iov->iov_len) {
80 			skip = 0;
81 			uio->uio_iov = (++iov);
82 			uio->uio_iovcnt--;
83 		}
84 		uio->uio_skip = skip;
85 		uio->uio_resid -= cnt;
86 		uio->uio_loffset += cnt;
87 		p = (caddr_t)p + cnt;
88 		n -= cnt;
89 	}
90 	return (0);
91 }
92 
93 static int
zfs_uiomove_bvec_impl(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)94 zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
95 {
96 	const struct bio_vec *bv = uio->uio_bvec;
97 	size_t skip = uio->uio_skip;
98 	ulong_t cnt;
99 
100 	while (n && uio->uio_resid) {
101 		void *paddr;
102 		cnt = MIN(bv->bv_len - skip, n);
103 
104 		paddr = zfs_kmap_local(bv->bv_page);
105 		if (rw == UIO_READ) {
106 			/* Copy from buffer 'p' to the bvec data */
107 			memcpy(paddr + bv->bv_offset + skip, p, cnt);
108 		} else {
109 			/* Copy from bvec data to buffer 'p' */
110 			memcpy(p, paddr + bv->bv_offset + skip, cnt);
111 		}
112 		zfs_kunmap_local(paddr);
113 
114 		skip += cnt;
115 		if (skip == bv->bv_len) {
116 			skip = 0;
117 			uio->uio_bvec = (++bv);
118 			uio->uio_iovcnt--;
119 		}
120 		uio->uio_skip = skip;
121 		uio->uio_resid -= cnt;
122 		uio->uio_loffset += cnt;
123 		p = (caddr_t)p + cnt;
124 		n -= cnt;
125 	}
126 	return (0);
127 }
128 
129 static void
zfs_copy_bvec(void * p,size_t skip,size_t cnt,zfs_uio_rw_t rw,struct bio_vec * bv)130 zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
131     struct bio_vec *bv)
132 {
133 	void *paddr;
134 
135 	paddr = zfs_kmap_local(bv->bv_page);
136 	if (rw == UIO_READ) {
137 		/* Copy from buffer 'p' to the bvec data */
138 		memcpy(paddr + bv->bv_offset + skip, p, cnt);
139 	} else {
140 		/* Copy from bvec data to buffer 'p' */
141 		memcpy(p, paddr + bv->bv_offset + skip, cnt);
142 	}
143 	zfs_kunmap_local(paddr);
144 }
145 
146 /*
147  * Copy 'n' bytes of data between the buffer p[] and the data represented
148  * by the request in the uio.
149  */
150 static int
zfs_uiomove_bvec_rq(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)151 zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
152 {
153 	struct request *rq = uio->rq;
154 	struct bio_vec bv;
155 	struct req_iterator iter;
156 	size_t this_seg_start;	/* logical offset */
157 	size_t this_seg_end;		/* logical offset */
158 	size_t skip_in_seg;
159 	size_t copy_from_seg;
160 	size_t orig_loffset;
161 	int copied = 0;
162 
163 	/*
164 	 * Get the original logical offset of this entire request (because
165 	 * uio->uio_loffset will be modified over time).
166 	 */
167 	orig_loffset = io_offset(NULL, rq);
168 	this_seg_start = orig_loffset;
169 
170 	rq_for_each_segment(bv, rq, iter) {
171 		/*
172 		 * Lookup what the logical offset of the last byte of this
173 		 * segment is.
174 		 */
175 		this_seg_end = this_seg_start + bv.bv_len - 1;
176 
177 		/*
178 		 * We only need to operate on segments that have data we're
179 		 * copying.
180 		 */
181 		if (uio->uio_loffset >= this_seg_start &&
182 		    uio->uio_loffset <= this_seg_end) {
183 			/*
184 			 * Some, or all, of the data in this segment needs to be
185 			 * copied.
186 			 */
187 
188 			/*
189 			 * We may be not be copying from the first byte in the
190 			 * segment.  Figure out how many bytes to skip copying
191 			 * from the beginning of this segment.
192 			 */
193 			skip_in_seg = uio->uio_loffset - this_seg_start;
194 
195 			/*
196 			 * Calculate the total number of bytes from this
197 			 * segment that we will be copying.
198 			 */
199 			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
200 
201 			/* Copy the bytes */
202 			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
203 			p = ((char *)p) + copy_from_seg;
204 
205 			n -= copy_from_seg;
206 			uio->uio_resid -= copy_from_seg;
207 			uio->uio_loffset += copy_from_seg;
208 			copied = 1;	/* We copied some data */
209 		}
210 
211 		this_seg_start = this_seg_end + 1;
212 	}
213 
214 	if (!copied) {
215 		/* Didn't copy anything */
216 		uio->uio_resid = 0;
217 	}
218 	return (0);
219 }
220 
221 static int
zfs_uiomove_bvec(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)222 zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
223 {
224 	if (uio->rq != NULL)
225 		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
226 	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
227 }
228 
229 static int
zfs_uiomove_iter(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio,boolean_t revert)230 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
231     boolean_t revert)
232 {
233 	size_t cnt = MIN(n, uio->uio_resid);
234 
235 	if (uio->uio_skip)
236 		iov_iter_advance(uio->uio_iter, uio->uio_skip);
237 
238 	if (rw == UIO_READ)
239 		cnt = copy_to_iter(p, cnt, uio->uio_iter);
240 	else
241 		cnt = copy_from_iter(p, cnt, uio->uio_iter);
242 
243 	/*
244 	 * When operating on a full pipe no bytes are processed.
245 	 * In which case return EFAULT which is converted to EAGAIN
246 	 * by the kernel's generic_file_splice_read() function.
247 	 */
248 	if (cnt == 0)
249 		return (EFAULT);
250 
251 	/*
252 	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
253 	 * to avoid consuming the uio and its iov_iter structure.
254 	 */
255 	if (revert)
256 		iov_iter_revert(uio->uio_iter, cnt);
257 
258 	uio->uio_resid -= cnt;
259 	uio->uio_loffset += cnt;
260 
261 	return (0);
262 }
263 
264 int
zfs_uiomove(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)265 zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
266 {
267 	if (uio->uio_segflg == UIO_BVEC)
268 		return (zfs_uiomove_bvec(p, n, rw, uio));
269 	else if (uio->uio_segflg == UIO_ITER)
270 		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
271 	else
272 		return (zfs_uiomove_iov(p, n, rw, uio));
273 }
274 EXPORT_SYMBOL(zfs_uiomove);
275 
276 /*
277  * Fault in the pages of the first n bytes specified by the uio structure.
278  * 1 byte in each page is touched and the uio struct is unmodified. Any
279  * error will terminate the process as this is only a best attempt to get
280  * the pages resident.
281  */
282 int
zfs_uio_prefaultpages(ssize_t n,zfs_uio_t * uio)283 zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
284 {
285 	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC ||
286 	    (uio->uio_extflg & UIO_DIRECT)) {
287 		/*
288 		 * There's never a need to fault in kernel pages or Direct I/O
289 		 * write pages. Direct I/O write pages have been pinned in so
290 		 * there is never a time for these pages a fault will occur.
291 		 */
292 		return (0);
293 	} else  {
294 		ASSERT3S(uio->uio_segflg, ==, UIO_ITER);
295 		/*
296 		 * At least a Linux 4.18 kernel, iov_iter_fault_in_readable()
297 		 * can be relied on to fault in user pages when referenced.
298 		 */
299 		if (iov_iter_fault_in_readable(uio->uio_iter, n))
300 			return (EFAULT);
301 	}
302 
303 	return (0);
304 }
305 EXPORT_SYMBOL(zfs_uio_prefaultpages);
306 
307 /*
308  * The same as zfs_uiomove() but doesn't modify uio structure.
309  * return in cbytes how many bytes were copied.
310  */
311 int
zfs_uiocopy(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio,size_t * cbytes)312 zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
313 {
314 	zfs_uio_t uio_copy;
315 	int ret;
316 
317 	memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
318 
319 	if (uio->uio_segflg == UIO_BVEC)
320 		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
321 	else if (uio->uio_segflg == UIO_ITER)
322 		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
323 	else
324 		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
325 
326 	*cbytes = uio->uio_resid - uio_copy.uio_resid;
327 
328 	return (ret);
329 }
330 EXPORT_SYMBOL(zfs_uiocopy);
331 
332 /*
333  * Drop the next n chars out of *uio.
334  */
335 void
zfs_uioskip(zfs_uio_t * uio,size_t n)336 zfs_uioskip(zfs_uio_t *uio, size_t n)
337 {
338 	if (n > uio->uio_resid)
339 		return;
340 	/*
341 	 * When using a uio with a struct request, we simply
342 	 * use uio_loffset as a pointer to the next logical byte to
343 	 * copy in the request.  We don't have to do any fancy
344 	 * accounting with uio_bvec/uio_iovcnt since we don't use
345 	 * them.
346 	 */
347 	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
348 		uio->uio_skip += n;
349 		while (uio->uio_iovcnt &&
350 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
351 			uio->uio_skip -= uio->uio_bvec->bv_len;
352 			uio->uio_bvec++;
353 			uio->uio_iovcnt--;
354 		}
355 	} else if (uio->uio_segflg == UIO_ITER) {
356 		iov_iter_advance(uio->uio_iter, n);
357 	} else {
358 		ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE);
359 		uio->uio_skip += n;
360 		while (uio->uio_iovcnt &&
361 		    uio->uio_skip >= uio->uio_iov->iov_len) {
362 			uio->uio_skip -= uio->uio_iov->iov_len;
363 			uio->uio_iov++;
364 			uio->uio_iovcnt--;
365 		}
366 	}
367 
368 	uio->uio_loffset += n;
369 	uio->uio_resid -= n;
370 }
371 EXPORT_SYMBOL(zfs_uioskip);
372 
373 /*
374  * Check if the uio is page-aligned in memory.
375  */
376 boolean_t
zfs_uio_page_aligned(zfs_uio_t * uio)377 zfs_uio_page_aligned(zfs_uio_t *uio)
378 {
379 	boolean_t aligned = B_TRUE;
380 
381 	if (uio->uio_segflg == UIO_SYSSPACE) {
382 		const struct iovec *iov = uio->uio_iov;
383 		size_t skip = uio->uio_skip;
384 
385 		for (int i = uio->uio_iovcnt; i > 0; iov++, i--) {
386 			uintptr_t addr = (uintptr_t)(iov->iov_base + skip);
387 			size_t size = iov->iov_len - skip;
388 			if ((addr & (PAGE_SIZE - 1)) ||
389 			    (size & (PAGE_SIZE - 1))) {
390 				aligned = B_FALSE;
391 				break;
392 			}
393 			skip = 0;
394 		}
395 	} else if (uio->uio_segflg == UIO_ITER) {
396 		unsigned long alignment =
397 		    iov_iter_alignment(uio->uio_iter);
398 		aligned = IS_P2ALIGNED(alignment, PAGE_SIZE);
399 	} else {
400 		/* Currently not supported */
401 		aligned = B_FALSE;
402 	}
403 
404 	return (aligned);
405 }
406 
407 
408 #if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64)
409 #define	ZFS_MARKEED_PAGE	0x0
410 #define	IS_ZFS_MARKED_PAGE(_p)	0
411 #define	zfs_mark_page(_p)
412 #define	zfs_unmark_page(_p)
413 #define	IS_ZERO_PAGE(_p)	0
414 
415 #else
416 /*
417  * Mark pages to know if they were allocated to replace ZERO_PAGE() for
418  * Direct I/O writes.
419  */
420 #define	ZFS_MARKED_PAGE		0x5a465350414745 /* ASCII: ZFSPAGE */
421 #define	IS_ZFS_MARKED_PAGE(_p) \
422 	(page_private(_p) == (unsigned long)ZFS_MARKED_PAGE)
423 #define	IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0))
424 
425 static inline void
zfs_mark_page(struct page * page)426 zfs_mark_page(struct page *page)
427 {
428 	ASSERT3P(page, !=, NULL);
429 	get_page(page);
430 	SetPagePrivate(page);
431 	set_page_private(page, ZFS_MARKED_PAGE);
432 }
433 
434 static inline void
zfs_unmark_page(struct page * page)435 zfs_unmark_page(struct page *page)
436 {
437 	ASSERT3P(page, !=, NULL);
438 	set_page_private(page, 0UL);
439 	ClearPagePrivate(page);
440 	put_page(page);
441 }
442 #endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */
443 
444 #if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
445 static void
zfs_uio_dio_check_for_zero_page(zfs_uio_t * uio)446 zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
447 {
448 	ASSERT3P(uio->uio_dio.pages, !=, NULL);
449 
450 	for (long i = 0; i < uio->uio_dio.npages; i++) {
451 		struct page *p = uio->uio_dio.pages[i];
452 		lock_page(p);
453 
454 		if (IS_ZERO_PAGE(p)) {
455 			/*
456 			 * If the user page points the kernels ZERO_PAGE() a
457 			 * new zero filled page will just be allocated so the
458 			 * contents of the page can not be changed by the user
459 			 * while a Direct I/O write is taking place.
460 			 */
461 			gfp_t gfp_zero_page  = __GFP_NOWARN | GFP_NOIO |
462 			    __GFP_ZERO | GFP_KERNEL;
463 
464 			ASSERT0(IS_ZFS_MARKED_PAGE(p));
465 			unlock_page(p);
466 			put_page(p);
467 
468 			uio->uio_dio.pages[i] =
469 			    __page_cache_alloc(gfp_zero_page);
470 			zfs_mark_page(uio->uio_dio.pages[i]);
471 		} else {
472 			unlock_page(p);
473 		}
474 	}
475 }
476 #endif
477 
478 void
zfs_uio_free_dio_pages(zfs_uio_t * uio,zfs_uio_rw_t rw)479 zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
480 {
481 
482 	ASSERT(uio->uio_extflg & UIO_DIRECT);
483 	ASSERT3P(uio->uio_dio.pages, !=, NULL);
484 
485 #if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
486 	unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
487 #else
488 	for (long i = 0; i < uio->uio_dio.npages; i++) {
489 		struct page *p = uio->uio_dio.pages[i];
490 
491 		if (IS_ZFS_MARKED_PAGE(p)) {
492 			zfs_unmark_page(p);
493 			__free_page(p);
494 			continue;
495 		}
496 
497 		put_page(p);
498 	}
499 #endif
500 	vmem_free(uio->uio_dio.pages,
501 	    uio->uio_dio.npages * sizeof (struct page *));
502 }
503 
504 #if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
505 static int
zfs_uio_pin_user_pages(zfs_uio_t * uio,zfs_uio_rw_t rw)506 zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
507 {
508 	long res;
509 	size_t skip = uio->uio_skip;
510 	size_t len = uio->uio_resid - skip;
511 	unsigned int gup_flags = 0;
512 	unsigned long addr;
513 	unsigned long nr_pages;
514 
515 	/*
516 	 * Kernel 6.2 introduced the FOLL_PCI_P2PDMA flag. This flag could
517 	 * possibly be used here in the future to allow for P2P operations with
518 	 * user pages.
519 	 */
520 	if (rw == UIO_READ)
521 		gup_flags = FOLL_WRITE;
522 
523 	if (len == 0)
524 		return (0);
525 
526 #if defined(HAVE_ITER_IS_UBUF)
527 	if (iter_is_ubuf(uio->uio_iter)) {
528 		nr_pages = DIV_ROUND_UP(len, PAGE_SIZE);
529 		addr = (unsigned long)uio->uio_iter->ubuf + skip;
530 		res = pin_user_pages_unlocked(addr, nr_pages,
531 		    &uio->uio_dio.pages[uio->uio_dio.npages], gup_flags);
532 		if (res < 0) {
533 			return (SET_ERROR(-res));
534 		} else if (len != (res * PAGE_SIZE)) {
535 			uio->uio_dio.npages += res;
536 			return (SET_ERROR(EFAULT));
537 		}
538 		uio->uio_dio.npages += res;
539 		return (0);
540 	}
541 #endif
542 	const struct iovec *iovp = zfs_uio_iter_iov(uio->uio_iter);
543 	for (int i = 0; i < uio->uio_iovcnt; i++) {
544 		size_t amt = iovp->iov_len - skip;
545 		if (amt == 0) {
546 			iovp++;
547 			skip = 0;
548 			continue;
549 		}
550 
551 		addr = (unsigned long)iovp->iov_base + skip;
552 		nr_pages = DIV_ROUND_UP(amt, PAGE_SIZE);
553 		res = pin_user_pages_unlocked(addr, nr_pages,
554 		    &uio->uio_dio.pages[uio->uio_dio.npages], gup_flags);
555 		if (res < 0) {
556 			return (SET_ERROR(-res));
557 		} else if (amt != (res * PAGE_SIZE)) {
558 			uio->uio_dio.npages += res;
559 			return (SET_ERROR(EFAULT));
560 		}
561 
562 		len -= amt;
563 		uio->uio_dio.npages += res;
564 		skip = 0;
565 		iovp++;
566 	};
567 
568 	ASSERT0(len);
569 
570 	return (0);
571 }
572 
573 #else
574 static int
zfs_uio_get_dio_pages_iov_iter(zfs_uio_t * uio,zfs_uio_rw_t rw)575 zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
576 {
577 	size_t start;
578 	size_t wanted = uio->uio_resid - uio->uio_skip;
579 	ssize_t rollback = 0;
580 	ssize_t cnt;
581 	unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
582 
583 	while (wanted) {
584 		cnt = iov_iter_get_pages(uio->uio_iter,
585 		    &uio->uio_dio.pages[uio->uio_dio.npages],
586 		    wanted, maxpages, &start);
587 		if (cnt < 0) {
588 			iov_iter_revert(uio->uio_iter, rollback);
589 			return (SET_ERROR(-cnt));
590 		}
591 		/*
592 		 * All Direct I/O operations must be page aligned.
593 		 */
594 		ASSERT(IS_P2ALIGNED(start, PAGE_SIZE));
595 		uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE);
596 		rollback += cnt;
597 		wanted -= cnt;
598 		iov_iter_advance(uio->uio_iter, cnt);
599 
600 	}
601 	ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip);
602 	iov_iter_revert(uio->uio_iter, rollback);
603 
604 	return (0);
605 }
606 #endif /* HAVE_PIN_USER_PAGES_UNLOCKED */
607 
608 /*
609  * This function pins user pages. In the event that the user pages were not
610  * successfully pinned an error value is returned.
611  *
612  * On success, 0 is returned.
613  */
614 int
zfs_uio_get_dio_pages_alloc(zfs_uio_t * uio,zfs_uio_rw_t rw)615 zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
616 {
617 	int error = 0;
618 	long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE);
619 	size_t size = npages * sizeof (struct page *);
620 
621 	if (uio->uio_segflg == UIO_ITER) {
622 		uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
623 #if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
624 		error = zfs_uio_pin_user_pages(uio, rw);
625 #else
626 		error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
627 #endif
628 	} else {
629 		return (SET_ERROR(EOPNOTSUPP));
630 	}
631 
632 	ASSERT3S(uio->uio_dio.npages, >=, 0);
633 
634 	if (error) {
635 #if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
636 		unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
637 #else
638 		for (long i = 0; i < uio->uio_dio.npages; i++)
639 			put_page(uio->uio_dio.pages[i]);
640 #endif
641 		vmem_free(uio->uio_dio.pages, size);
642 		return (error);
643 	} else {
644 		ASSERT3S(uio->uio_dio.npages, ==, npages);
645 	}
646 
647 #if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
648 	if (rw == UIO_WRITE)
649 		zfs_uio_dio_check_for_zero_page(uio);
650 #endif
651 
652 	uio->uio_extflg |= UIO_DIRECT;
653 
654 	return (0);
655 }
656 
657 #endif /* _KERNEL */
658