1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38 /*
39 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
40 */
41
42 #ifdef _KERNEL
43
44 #include <sys/errno.h>
45 #include <sys/vmem.h>
46 #include <sys/sysmacros.h>
47 #include <sys/types.h>
48 #include <sys/uio_impl.h>
49 #include <sys/sysmacros.h>
50 #include <sys/string.h>
51 #include <sys/zfs_refcount.h>
52 #include <sys/zfs_debug.h>
53 #include <linux/kmap_compat.h>
54 #include <linux/uaccess.h>
55 #include <linux/pagemap.h>
56 #include <linux/mman.h>
57
58 /*
59 * Move "n" bytes at byte address "p"; "rw" indicates the direction
60 * of the move, and the I/O parameters are provided in "uio", which is
61 * update to reflect the data which was moved. Returns 0 on success or
62 * a non-zero errno on failure.
63 */
64 static int
zfs_uiomove_iov(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)65 zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
66 {
67 const struct iovec *iov = uio->uio_iov;
68 size_t skip = uio->uio_skip;
69 ulong_t cnt;
70
71 while (n && uio->uio_resid) {
72 cnt = MIN(iov->iov_len - skip, n);
73 switch (uio->uio_segflg) {
74 case UIO_USERSPACE:
75 /*
76 * p = kernel data pointer
77 * iov->iov_base = user data pointer
78 */
79 if (rw == UIO_READ) {
80 if (copy_to_user(iov->iov_base+skip, p, cnt))
81 return (EFAULT);
82 } else {
83 unsigned long b_left = 0;
84 if (uio->uio_fault_disable) {
85 if (!zfs_access_ok(VERIFY_READ,
86 (iov->iov_base + skip), cnt)) {
87 return (EFAULT);
88 }
89 pagefault_disable();
90 b_left =
91 __copy_from_user_inatomic(p,
92 (iov->iov_base + skip), cnt);
93 pagefault_enable();
94 } else {
95 b_left =
96 copy_from_user(p,
97 (iov->iov_base + skip), cnt);
98 }
99 if (b_left > 0) {
100 unsigned long c_bytes =
101 cnt - b_left;
102 uio->uio_skip += c_bytes;
103 ASSERT3U(uio->uio_skip, <,
104 iov->iov_len);
105 uio->uio_resid -= c_bytes;
106 uio->uio_loffset += c_bytes;
107 return (EFAULT);
108 }
109 }
110 break;
111 case UIO_SYSSPACE:
112 if (rw == UIO_READ)
113 memcpy(iov->iov_base + skip, p, cnt);
114 else
115 memcpy(p, iov->iov_base + skip, cnt);
116 break;
117 default:
118 ASSERT(0);
119 }
120 skip += cnt;
121 if (skip == iov->iov_len) {
122 skip = 0;
123 uio->uio_iov = (++iov);
124 uio->uio_iovcnt--;
125 }
126 uio->uio_skip = skip;
127 uio->uio_resid -= cnt;
128 uio->uio_loffset += cnt;
129 p = (caddr_t)p + cnt;
130 n -= cnt;
131 }
132 return (0);
133 }
134
135 static int
zfs_uiomove_bvec_impl(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)136 zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
137 {
138 const struct bio_vec *bv = uio->uio_bvec;
139 size_t skip = uio->uio_skip;
140 ulong_t cnt;
141
142 while (n && uio->uio_resid) {
143 void *paddr;
144 cnt = MIN(bv->bv_len - skip, n);
145
146 paddr = zfs_kmap_local(bv->bv_page);
147 if (rw == UIO_READ) {
148 /* Copy from buffer 'p' to the bvec data */
149 memcpy(paddr + bv->bv_offset + skip, p, cnt);
150 } else {
151 /* Copy from bvec data to buffer 'p' */
152 memcpy(p, paddr + bv->bv_offset + skip, cnt);
153 }
154 zfs_kunmap_local(paddr);
155
156 skip += cnt;
157 if (skip == bv->bv_len) {
158 skip = 0;
159 uio->uio_bvec = (++bv);
160 uio->uio_iovcnt--;
161 }
162 uio->uio_skip = skip;
163 uio->uio_resid -= cnt;
164 uio->uio_loffset += cnt;
165 p = (caddr_t)p + cnt;
166 n -= cnt;
167 }
168 return (0);
169 }
170
171 static void
zfs_copy_bvec(void * p,size_t skip,size_t cnt,zfs_uio_rw_t rw,struct bio_vec * bv)172 zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
173 struct bio_vec *bv)
174 {
175 void *paddr;
176
177 paddr = zfs_kmap_local(bv->bv_page);
178 if (rw == UIO_READ) {
179 /* Copy from buffer 'p' to the bvec data */
180 memcpy(paddr + bv->bv_offset + skip, p, cnt);
181 } else {
182 /* Copy from bvec data to buffer 'p' */
183 memcpy(p, paddr + bv->bv_offset + skip, cnt);
184 }
185 zfs_kunmap_local(paddr);
186 }
187
188 /*
189 * Copy 'n' bytes of data between the buffer p[] and the data represented
190 * by the request in the uio.
191 */
192 static int
zfs_uiomove_bvec_rq(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)193 zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
194 {
195 struct request *rq = uio->rq;
196 struct bio_vec bv;
197 struct req_iterator iter;
198 size_t this_seg_start; /* logical offset */
199 size_t this_seg_end; /* logical offset */
200 size_t skip_in_seg;
201 size_t copy_from_seg;
202 size_t orig_loffset;
203 int copied = 0;
204
205 /*
206 * Get the original logical offset of this entire request (because
207 * uio->uio_loffset will be modified over time).
208 */
209 orig_loffset = io_offset(NULL, rq);
210 this_seg_start = orig_loffset;
211
212 rq_for_each_segment(bv, rq, iter) {
213 /*
214 * Lookup what the logical offset of the last byte of this
215 * segment is.
216 */
217 this_seg_end = this_seg_start + bv.bv_len - 1;
218
219 /*
220 * We only need to operate on segments that have data we're
221 * copying.
222 */
223 if (uio->uio_loffset >= this_seg_start &&
224 uio->uio_loffset <= this_seg_end) {
225 /*
226 * Some, or all, of the data in this segment needs to be
227 * copied.
228 */
229
230 /*
231 * We may be not be copying from the first byte in the
232 * segment. Figure out how many bytes to skip copying
233 * from the beginning of this segment.
234 */
235 skip_in_seg = uio->uio_loffset - this_seg_start;
236
237 /*
238 * Calculate the total number of bytes from this
239 * segment that we will be copying.
240 */
241 copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
242
243 /* Copy the bytes */
244 zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
245 p = ((char *)p) + copy_from_seg;
246
247 n -= copy_from_seg;
248 uio->uio_resid -= copy_from_seg;
249 uio->uio_loffset += copy_from_seg;
250 copied = 1; /* We copied some data */
251 }
252
253 this_seg_start = this_seg_end + 1;
254 }
255
256 if (!copied) {
257 /* Didn't copy anything */
258 uio->uio_resid = 0;
259 }
260 return (0);
261 }
262
263 static int
zfs_uiomove_bvec(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)264 zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
265 {
266 if (uio->rq != NULL)
267 return (zfs_uiomove_bvec_rq(p, n, rw, uio));
268 return (zfs_uiomove_bvec_impl(p, n, rw, uio));
269 }
270
271 #if defined(HAVE_VFS_IOV_ITER)
272 static int
zfs_uiomove_iter(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio,boolean_t revert)273 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
274 boolean_t revert)
275 {
276 size_t cnt = MIN(n, uio->uio_resid);
277
278 if (uio->uio_skip)
279 iov_iter_advance(uio->uio_iter, uio->uio_skip);
280
281 if (rw == UIO_READ)
282 cnt = copy_to_iter(p, cnt, uio->uio_iter);
283 else
284 cnt = copy_from_iter(p, cnt, uio->uio_iter);
285
286 /*
287 * When operating on a full pipe no bytes are processed.
288 * In which case return EFAULT which is converted to EAGAIN
289 * by the kernel's generic_file_splice_read() function.
290 */
291 if (cnt == 0)
292 return (EFAULT);
293
294 /*
295 * Revert advancing the uio_iter. This is set by zfs_uiocopy()
296 * to avoid consuming the uio and its iov_iter structure.
297 */
298 if (revert)
299 iov_iter_revert(uio->uio_iter, cnt);
300
301 uio->uio_resid -= cnt;
302 uio->uio_loffset += cnt;
303
304 return (0);
305 }
306 #endif
307
308 int
zfs_uiomove(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio)309 zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
310 {
311 if (uio->uio_segflg == UIO_BVEC)
312 return (zfs_uiomove_bvec(p, n, rw, uio));
313 #if defined(HAVE_VFS_IOV_ITER)
314 else if (uio->uio_segflg == UIO_ITER)
315 return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
316 #endif
317 else
318 return (zfs_uiomove_iov(p, n, rw, uio));
319 }
320 EXPORT_SYMBOL(zfs_uiomove);
321
322 /*
323 * Fault in the pages of the first n bytes specified by the uio structure.
324 * 1 byte in each page is touched and the uio struct is unmodified. Any
325 * error will terminate the process as this is only a best attempt to get
326 * the pages resident.
327 */
328 int
zfs_uio_prefaultpages(ssize_t n,zfs_uio_t * uio)329 zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
330 {
331 if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC ||
332 (uio->uio_extflg & UIO_DIRECT)) {
333 /*
334 * There's never a need to fault in kernel pages or Direct I/O
335 * write pages. Direct I/O write pages have been pinned in so
336 * there is never a time for these pages a fault will occur.
337 */
338 return (0);
339 #if defined(HAVE_VFS_IOV_ITER)
340 } else if (uio->uio_segflg == UIO_ITER) {
341 /*
342 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
343 * can be relied on to fault in user pages when referenced.
344 */
345 if (iov_iter_fault_in_readable(uio->uio_iter, n))
346 return (EFAULT);
347 #endif
348 } else {
349 /* Fault in all user pages */
350 ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
351 const struct iovec *iov = uio->uio_iov;
352 int iovcnt = uio->uio_iovcnt;
353 size_t skip = uio->uio_skip;
354 uint8_t tmp;
355 caddr_t p;
356
357 for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
358 ulong_t cnt = MIN(iov->iov_len - skip, n);
359 /* empty iov */
360 if (cnt == 0)
361 continue;
362 n -= cnt;
363 /* touch each page in this segment. */
364 p = iov->iov_base + skip;
365 while (cnt) {
366 if (copy_from_user(&tmp, p, 1))
367 return (EFAULT);
368 ulong_t incr = MIN(cnt, PAGESIZE);
369 p += incr;
370 cnt -= incr;
371 }
372 /* touch the last byte in case it straddles a page. */
373 p--;
374 if (copy_from_user(&tmp, p, 1))
375 return (EFAULT);
376 }
377 }
378
379 return (0);
380 }
381 EXPORT_SYMBOL(zfs_uio_prefaultpages);
382
383 /*
384 * The same as zfs_uiomove() but doesn't modify uio structure.
385 * return in cbytes how many bytes were copied.
386 */
387 int
zfs_uiocopy(void * p,size_t n,zfs_uio_rw_t rw,zfs_uio_t * uio,size_t * cbytes)388 zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
389 {
390 zfs_uio_t uio_copy;
391 int ret;
392
393 memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
394
395 if (uio->uio_segflg == UIO_BVEC)
396 ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
397 #if defined(HAVE_VFS_IOV_ITER)
398 else if (uio->uio_segflg == UIO_ITER)
399 ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
400 #endif
401 else
402 ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
403
404 *cbytes = uio->uio_resid - uio_copy.uio_resid;
405
406 return (ret);
407 }
408 EXPORT_SYMBOL(zfs_uiocopy);
409
410 /*
411 * Drop the next n chars out of *uio.
412 */
413 void
zfs_uioskip(zfs_uio_t * uio,size_t n)414 zfs_uioskip(zfs_uio_t *uio, size_t n)
415 {
416 if (n > uio->uio_resid)
417 return;
418 /*
419 * When using a uio with a struct request, we simply
420 * use uio_loffset as a pointer to the next logical byte to
421 * copy in the request. We don't have to do any fancy
422 * accounting with uio_bvec/uio_iovcnt since we don't use
423 * them.
424 */
425 if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
426 uio->uio_skip += n;
427 while (uio->uio_iovcnt &&
428 uio->uio_skip >= uio->uio_bvec->bv_len) {
429 uio->uio_skip -= uio->uio_bvec->bv_len;
430 uio->uio_bvec++;
431 uio->uio_iovcnt--;
432 }
433 #if defined(HAVE_VFS_IOV_ITER)
434 } else if (uio->uio_segflg == UIO_ITER) {
435 iov_iter_advance(uio->uio_iter, n);
436 #endif
437 } else {
438 uio->uio_skip += n;
439 while (uio->uio_iovcnt &&
440 uio->uio_skip >= uio->uio_iov->iov_len) {
441 uio->uio_skip -= uio->uio_iov->iov_len;
442 uio->uio_iov++;
443 uio->uio_iovcnt--;
444 }
445 }
446
447 uio->uio_loffset += n;
448 uio->uio_resid -= n;
449 }
450 EXPORT_SYMBOL(zfs_uioskip);
451
452 /*
453 * Check if the uio is page-aligned in memory.
454 */
455 boolean_t
zfs_uio_page_aligned(zfs_uio_t * uio)456 zfs_uio_page_aligned(zfs_uio_t *uio)
457 {
458 boolean_t aligned = B_TRUE;
459
460 if (uio->uio_segflg == UIO_USERSPACE ||
461 uio->uio_segflg == UIO_SYSSPACE) {
462 const struct iovec *iov = uio->uio_iov;
463 size_t skip = uio->uio_skip;
464
465 for (int i = uio->uio_iovcnt; i > 0; iov++, i--) {
466 uintptr_t addr = (uintptr_t)(iov->iov_base + skip);
467 size_t size = iov->iov_len - skip;
468 if ((addr & (PAGE_SIZE - 1)) ||
469 (size & (PAGE_SIZE - 1))) {
470 aligned = B_FALSE;
471 break;
472 }
473 skip = 0;
474 }
475 #if defined(HAVE_VFS_IOV_ITER)
476 } else if (uio->uio_segflg == UIO_ITER) {
477 unsigned long alignment =
478 iov_iter_alignment(uio->uio_iter);
479 aligned = IS_P2ALIGNED(alignment, PAGE_SIZE);
480 #endif
481 } else {
482 /* Currently not supported */
483 aligned = B_FALSE;
484 }
485
486 return (aligned);
487 }
488
489
490 #if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64)
491 #define ZFS_MARKEED_PAGE 0x0
492 #define IS_ZFS_MARKED_PAGE(_p) 0
493 #define zfs_mark_page(_p)
494 #define zfs_unmark_page(_p)
495 #define IS_ZERO_PAGE(_p) 0
496
497 #else
498 /*
499 * Mark pages to know if they were allocated to replace ZERO_PAGE() for
500 * Direct I/O writes.
501 */
502 #define ZFS_MARKED_PAGE 0x5a465350414745 /* ASCII: ZFSPAGE */
503 #define IS_ZFS_MARKED_PAGE(_p) \
504 (page_private(_p) == (unsigned long)ZFS_MARKED_PAGE)
505 #define IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0))
506
507 static inline void
zfs_mark_page(struct page * page)508 zfs_mark_page(struct page *page)
509 {
510 ASSERT3P(page, !=, NULL);
511 get_page(page);
512 SetPagePrivate(page);
513 set_page_private(page, ZFS_MARKED_PAGE);
514 }
515
516 static inline void
zfs_unmark_page(struct page * page)517 zfs_unmark_page(struct page *page)
518 {
519 ASSERT3P(page, !=, NULL);
520 set_page_private(page, 0UL);
521 ClearPagePrivate(page);
522 put_page(page);
523 }
524 #endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */
525
526 static void
zfs_uio_dio_check_for_zero_page(zfs_uio_t * uio)527 zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
528 {
529 ASSERT3P(uio->uio_dio.pages, !=, NULL);
530
531 for (long i = 0; i < uio->uio_dio.npages; i++) {
532 struct page *p = uio->uio_dio.pages[i];
533 lock_page(p);
534
535 if (IS_ZERO_PAGE(p)) {
536 /*
537 * If the user page points the kernels ZERO_PAGE() a
538 * new zero filled page will just be allocated so the
539 * contents of the page can not be changed by the user
540 * while a Direct I/O write is taking place.
541 */
542 gfp_t gfp_zero_page = __GFP_NOWARN | GFP_NOIO |
543 __GFP_ZERO | GFP_KERNEL;
544
545 ASSERT0(IS_ZFS_MARKED_PAGE(p));
546 unlock_page(p);
547 put_page(p);
548
549 p = __page_cache_alloc(gfp_zero_page);
550 zfs_mark_page(p);
551 } else {
552 unlock_page(p);
553 }
554 }
555 }
556
557 void
zfs_uio_free_dio_pages(zfs_uio_t * uio,zfs_uio_rw_t rw)558 zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
559 {
560
561 ASSERT(uio->uio_extflg & UIO_DIRECT);
562 ASSERT3P(uio->uio_dio.pages, !=, NULL);
563
564 for (long i = 0; i < uio->uio_dio.npages; i++) {
565 struct page *p = uio->uio_dio.pages[i];
566
567 if (IS_ZFS_MARKED_PAGE(p)) {
568 zfs_unmark_page(p);
569 __free_page(p);
570 continue;
571 }
572
573 put_page(p);
574 }
575
576 vmem_free(uio->uio_dio.pages,
577 uio->uio_dio.npages * sizeof (struct page *));
578 }
579
580 /*
581 * zfs_uio_iov_step() is just a modified version of the STEP function of Linux's
582 * iov_iter_get_pages().
583 */
584 static int
zfs_uio_iov_step(struct iovec v,zfs_uio_rw_t rw,zfs_uio_t * uio,long * numpages)585 zfs_uio_iov_step(struct iovec v, zfs_uio_rw_t rw, zfs_uio_t *uio,
586 long *numpages)
587 {
588 unsigned long addr = (unsigned long)(v.iov_base);
589 size_t len = v.iov_len;
590 unsigned long n = DIV_ROUND_UP(len, PAGE_SIZE);
591
592 /*
593 * read returning FOLL_WRITE is due to the fact that we are stating
594 * that the kernel will have write access to the user pages. So, when a
595 * Direct I/O read request is issued, the kernel must write to the user
596 * pages.
597 */
598 long res = get_user_pages_unlocked(
599 P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n,
600 &uio->uio_dio.pages[uio->uio_dio.npages],
601 rw == UIO_READ ? FOLL_WRITE : 0);
602 if (res < 0) {
603 return (SET_ERROR(-res));
604 } else if (len != (res * PAGE_SIZE)) {
605 return (SET_ERROR(EFAULT));
606 }
607
608 ASSERT3S(len, ==, res * PAGE_SIZE);
609 *numpages = res;
610 return (0);
611 }
612
613 static int
zfs_uio_get_dio_pages_iov(zfs_uio_t * uio,zfs_uio_rw_t rw)614 zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw)
615 {
616 const struct iovec *iovp = uio->uio_iov;
617 size_t skip = uio->uio_skip;
618 size_t len = uio->uio_resid - skip;
619
620 ASSERT(uio->uio_segflg != UIO_SYSSPACE);
621
622 for (int i = 0; i < uio->uio_iovcnt; i++) {
623 struct iovec iov;
624 long numpages = 0;
625
626 if (iovp->iov_len == 0) {
627 iovp++;
628 skip = 0;
629 continue;
630 }
631 iov.iov_len = MIN(len, iovp->iov_len - skip);
632 iov.iov_base = iovp->iov_base + skip;
633 int error = zfs_uio_iov_step(iov, rw, uio, &numpages);
634
635 if (error)
636 return (error);
637
638 uio->uio_dio.npages += numpages;
639 len -= iov.iov_len;
640 skip = 0;
641 iovp++;
642 }
643
644 ASSERT0(len);
645
646 return (0);
647 }
648
649 #if defined(HAVE_VFS_IOV_ITER)
650 static int
zfs_uio_get_dio_pages_iov_iter(zfs_uio_t * uio,zfs_uio_rw_t rw)651 zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
652 {
653 size_t skip = uio->uio_skip;
654 size_t wanted = uio->uio_resid - uio->uio_skip;
655 ssize_t rollback = 0;
656 ssize_t cnt;
657 unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
658
659 while (wanted) {
660 #if defined(HAVE_IOV_ITER_GET_PAGES2)
661 cnt = iov_iter_get_pages2(uio->uio_iter,
662 &uio->uio_dio.pages[uio->uio_dio.npages],
663 wanted, maxpages, &skip);
664 #else
665 cnt = iov_iter_get_pages(uio->uio_iter,
666 &uio->uio_dio.pages[uio->uio_dio.npages],
667 wanted, maxpages, &skip);
668 #endif
669 if (cnt < 0) {
670 iov_iter_revert(uio->uio_iter, rollback);
671 return (SET_ERROR(-cnt));
672 }
673 uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE);
674 rollback += cnt;
675 wanted -= cnt;
676 skip = 0;
677 #if !defined(HAVE_IOV_ITER_GET_PAGES2)
678 /*
679 * iov_iter_get_pages2() advances the iov_iter on success.
680 */
681 iov_iter_advance(uio->uio_iter, cnt);
682 #endif
683
684 }
685 ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip);
686 iov_iter_revert(uio->uio_iter, rollback);
687
688 return (0);
689 }
690 #endif /* HAVE_VFS_IOV_ITER */
691
692 /*
693 * This function pins user pages. In the event that the user pages were not
694 * successfully pinned an error value is returned.
695 *
696 * On success, 0 is returned.
697 */
698 int
zfs_uio_get_dio_pages_alloc(zfs_uio_t * uio,zfs_uio_rw_t rw)699 zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
700 {
701 int error = 0;
702 long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE);
703 size_t size = npages * sizeof (struct page *);
704
705 if (uio->uio_segflg == UIO_USERSPACE) {
706 uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
707 error = zfs_uio_get_dio_pages_iov(uio, rw);
708 #if defined(HAVE_VFS_IOV_ITER)
709 } else if (uio->uio_segflg == UIO_ITER) {
710 uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
711 error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
712 #endif
713 } else {
714 return (SET_ERROR(EOPNOTSUPP));
715 }
716
717 ASSERT3S(uio->uio_dio.npages, >=, 0);
718
719 if (error) {
720 for (long i = 0; i < uio->uio_dio.npages; i++)
721 put_page(uio->uio_dio.pages[i]);
722 vmem_free(uio->uio_dio.pages, size);
723 return (error);
724 } else {
725 ASSERT3S(uio->uio_dio.npages, ==, npages);
726 }
727
728 if (rw == UIO_WRITE) {
729 zfs_uio_dio_check_for_zero_page(uio);
730 }
731
732 uio->uio_extflg |= UIO_DIRECT;
733
734 return (0);
735 }
736
737 #endif /* _KERNEL */
738