xref: /linux/lib/iov_iter.c (revision ebf68996de0ab250c5d520eb2291ab65643e9a1e)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/export.h>
3 #include <linux/bvec.h>
4 #include <linux/uio.h>
5 #include <linux/pagemap.h>
6 #include <linux/slab.h>
7 #include <linux/vmalloc.h>
8 #include <linux/splice.h>
9 #include <net/checksum.h>
10 #include <linux/scatterlist.h>
11 
12 #define PIPE_PARANOIA /* for now */
13 
14 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
15 	size_t left;					\
16 	size_t wanted = n;				\
17 	__p = i->iov;					\
18 	__v.iov_len = min(n, __p->iov_len - skip);	\
19 	if (likely(__v.iov_len)) {			\
20 		__v.iov_base = __p->iov_base + skip;	\
21 		left = (STEP);				\
22 		__v.iov_len -= left;			\
23 		skip += __v.iov_len;			\
24 		n -= __v.iov_len;			\
25 	} else {					\
26 		left = 0;				\
27 	}						\
28 	while (unlikely(!left && n)) {			\
29 		__p++;					\
30 		__v.iov_len = min(n, __p->iov_len);	\
31 		if (unlikely(!__v.iov_len))		\
32 			continue;			\
33 		__v.iov_base = __p->iov_base;		\
34 		left = (STEP);				\
35 		__v.iov_len -= left;			\
36 		skip = __v.iov_len;			\
37 		n -= __v.iov_len;			\
38 	}						\
39 	n = wanted - n;					\
40 }
41 
42 #define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
43 	size_t wanted = n;				\
44 	__p = i->kvec;					\
45 	__v.iov_len = min(n, __p->iov_len - skip);	\
46 	if (likely(__v.iov_len)) {			\
47 		__v.iov_base = __p->iov_base + skip;	\
48 		(void)(STEP);				\
49 		skip += __v.iov_len;			\
50 		n -= __v.iov_len;			\
51 	}						\
52 	while (unlikely(n)) {				\
53 		__p++;					\
54 		__v.iov_len = min(n, __p->iov_len);	\
55 		if (unlikely(!__v.iov_len))		\
56 			continue;			\
57 		__v.iov_base = __p->iov_base;		\
58 		(void)(STEP);				\
59 		skip = __v.iov_len;			\
60 		n -= __v.iov_len;			\
61 	}						\
62 	n = wanted;					\
63 }
64 
65 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
66 	struct bvec_iter __start;			\
67 	__start.bi_size = n;				\
68 	__start.bi_bvec_done = skip;			\
69 	__start.bi_idx = 0;				\
70 	for_each_bvec(__v, i->bvec, __bi, __start) {	\
71 		if (!__v.bv_len)			\
72 			continue;			\
73 		(void)(STEP);				\
74 	}						\
75 }
76 
77 #define iterate_all_kinds(i, n, v, I, B, K) {			\
78 	if (likely(n)) {					\
79 		size_t skip = i->iov_offset;			\
80 		if (unlikely(i->type & ITER_BVEC)) {		\
81 			struct bio_vec v;			\
82 			struct bvec_iter __bi;			\
83 			iterate_bvec(i, n, v, __bi, skip, (B))	\
84 		} else if (unlikely(i->type & ITER_KVEC)) {	\
85 			const struct kvec *kvec;		\
86 			struct kvec v;				\
87 			iterate_kvec(i, n, v, kvec, skip, (K))	\
88 		} else if (unlikely(i->type & ITER_DISCARD)) {	\
89 		} else {					\
90 			const struct iovec *iov;		\
91 			struct iovec v;				\
92 			iterate_iovec(i, n, v, iov, skip, (I))	\
93 		}						\
94 	}							\
95 }
96 
97 #define iterate_and_advance(i, n, v, I, B, K) {			\
98 	if (unlikely(i->count < n))				\
99 		n = i->count;					\
100 	if (i->count) {						\
101 		size_t skip = i->iov_offset;			\
102 		if (unlikely(i->type & ITER_BVEC)) {		\
103 			const struct bio_vec *bvec = i->bvec;	\
104 			struct bio_vec v;			\
105 			struct bvec_iter __bi;			\
106 			iterate_bvec(i, n, v, __bi, skip, (B))	\
107 			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
108 			i->nr_segs -= i->bvec - bvec;		\
109 			skip = __bi.bi_bvec_done;		\
110 		} else if (unlikely(i->type & ITER_KVEC)) {	\
111 			const struct kvec *kvec;		\
112 			struct kvec v;				\
113 			iterate_kvec(i, n, v, kvec, skip, (K))	\
114 			if (skip == kvec->iov_len) {		\
115 				kvec++;				\
116 				skip = 0;			\
117 			}					\
118 			i->nr_segs -= kvec - i->kvec;		\
119 			i->kvec = kvec;				\
120 		} else if (unlikely(i->type & ITER_DISCARD)) {	\
121 			skip += n;				\
122 		} else {					\
123 			const struct iovec *iov;		\
124 			struct iovec v;				\
125 			iterate_iovec(i, n, v, iov, skip, (I))	\
126 			if (skip == iov->iov_len) {		\
127 				iov++;				\
128 				skip = 0;			\
129 			}					\
130 			i->nr_segs -= iov - i->iov;		\
131 			i->iov = iov;				\
132 		}						\
133 		i->count -= n;					\
134 		i->iov_offset = skip;				\
135 	}							\
136 }
137 
138 static int copyout(void __user *to, const void *from, size_t n)
139 {
140 	if (access_ok(to, n)) {
141 		kasan_check_read(from, n);
142 		n = raw_copy_to_user(to, from, n);
143 	}
144 	return n;
145 }
146 
147 static int copyin(void *to, const void __user *from, size_t n)
148 {
149 	if (access_ok(from, n)) {
150 		kasan_check_write(to, n);
151 		n = raw_copy_from_user(to, from, n);
152 	}
153 	return n;
154 }
155 
156 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
157 			 struct iov_iter *i)
158 {
159 	size_t skip, copy, left, wanted;
160 	const struct iovec *iov;
161 	char __user *buf;
162 	void *kaddr, *from;
163 
164 	if (unlikely(bytes > i->count))
165 		bytes = i->count;
166 
167 	if (unlikely(!bytes))
168 		return 0;
169 
170 	might_fault();
171 	wanted = bytes;
172 	iov = i->iov;
173 	skip = i->iov_offset;
174 	buf = iov->iov_base + skip;
175 	copy = min(bytes, iov->iov_len - skip);
176 
177 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
178 		kaddr = kmap_atomic(page);
179 		from = kaddr + offset;
180 
181 		/* first chunk, usually the only one */
182 		left = copyout(buf, from, copy);
183 		copy -= left;
184 		skip += copy;
185 		from += copy;
186 		bytes -= copy;
187 
188 		while (unlikely(!left && bytes)) {
189 			iov++;
190 			buf = iov->iov_base;
191 			copy = min(bytes, iov->iov_len);
192 			left = copyout(buf, from, copy);
193 			copy -= left;
194 			skip = copy;
195 			from += copy;
196 			bytes -= copy;
197 		}
198 		if (likely(!bytes)) {
199 			kunmap_atomic(kaddr);
200 			goto done;
201 		}
202 		offset = from - kaddr;
203 		buf += copy;
204 		kunmap_atomic(kaddr);
205 		copy = min(bytes, iov->iov_len - skip);
206 	}
207 	/* Too bad - revert to non-atomic kmap */
208 
209 	kaddr = kmap(page);
210 	from = kaddr + offset;
211 	left = copyout(buf, from, copy);
212 	copy -= left;
213 	skip += copy;
214 	from += copy;
215 	bytes -= copy;
216 	while (unlikely(!left && bytes)) {
217 		iov++;
218 		buf = iov->iov_base;
219 		copy = min(bytes, iov->iov_len);
220 		left = copyout(buf, from, copy);
221 		copy -= left;
222 		skip = copy;
223 		from += copy;
224 		bytes -= copy;
225 	}
226 	kunmap(page);
227 
228 done:
229 	if (skip == iov->iov_len) {
230 		iov++;
231 		skip = 0;
232 	}
233 	i->count -= wanted - bytes;
234 	i->nr_segs -= iov - i->iov;
235 	i->iov = iov;
236 	i->iov_offset = skip;
237 	return wanted - bytes;
238 }
239 
240 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
241 			 struct iov_iter *i)
242 {
243 	size_t skip, copy, left, wanted;
244 	const struct iovec *iov;
245 	char __user *buf;
246 	void *kaddr, *to;
247 
248 	if (unlikely(bytes > i->count))
249 		bytes = i->count;
250 
251 	if (unlikely(!bytes))
252 		return 0;
253 
254 	might_fault();
255 	wanted = bytes;
256 	iov = i->iov;
257 	skip = i->iov_offset;
258 	buf = iov->iov_base + skip;
259 	copy = min(bytes, iov->iov_len - skip);
260 
261 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
262 		kaddr = kmap_atomic(page);
263 		to = kaddr + offset;
264 
265 		/* first chunk, usually the only one */
266 		left = copyin(to, buf, copy);
267 		copy -= left;
268 		skip += copy;
269 		to += copy;
270 		bytes -= copy;
271 
272 		while (unlikely(!left && bytes)) {
273 			iov++;
274 			buf = iov->iov_base;
275 			copy = min(bytes, iov->iov_len);
276 			left = copyin(to, buf, copy);
277 			copy -= left;
278 			skip = copy;
279 			to += copy;
280 			bytes -= copy;
281 		}
282 		if (likely(!bytes)) {
283 			kunmap_atomic(kaddr);
284 			goto done;
285 		}
286 		offset = to - kaddr;
287 		buf += copy;
288 		kunmap_atomic(kaddr);
289 		copy = min(bytes, iov->iov_len - skip);
290 	}
291 	/* Too bad - revert to non-atomic kmap */
292 
293 	kaddr = kmap(page);
294 	to = kaddr + offset;
295 	left = copyin(to, buf, copy);
296 	copy -= left;
297 	skip += copy;
298 	to += copy;
299 	bytes -= copy;
300 	while (unlikely(!left && bytes)) {
301 		iov++;
302 		buf = iov->iov_base;
303 		copy = min(bytes, iov->iov_len);
304 		left = copyin(to, buf, copy);
305 		copy -= left;
306 		skip = copy;
307 		to += copy;
308 		bytes -= copy;
309 	}
310 	kunmap(page);
311 
312 done:
313 	if (skip == iov->iov_len) {
314 		iov++;
315 		skip = 0;
316 	}
317 	i->count -= wanted - bytes;
318 	i->nr_segs -= iov - i->iov;
319 	i->iov = iov;
320 	i->iov_offset = skip;
321 	return wanted - bytes;
322 }
323 
324 #ifdef PIPE_PARANOIA
325 static bool sanity(const struct iov_iter *i)
326 {
327 	struct pipe_inode_info *pipe = i->pipe;
328 	int idx = i->idx;
329 	int next = pipe->curbuf + pipe->nrbufs;
330 	if (i->iov_offset) {
331 		struct pipe_buffer *p;
332 		if (unlikely(!pipe->nrbufs))
333 			goto Bad;	// pipe must be non-empty
334 		if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
335 			goto Bad;	// must be at the last buffer...
336 
337 		p = &pipe->bufs[idx];
338 		if (unlikely(p->offset + p->len != i->iov_offset))
339 			goto Bad;	// ... at the end of segment
340 	} else {
341 		if (idx != (next & (pipe->buffers - 1)))
342 			goto Bad;	// must be right after the last buffer
343 	}
344 	return true;
345 Bad:
346 	printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
347 	printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
348 			pipe->curbuf, pipe->nrbufs, pipe->buffers);
349 	for (idx = 0; idx < pipe->buffers; idx++)
350 		printk(KERN_ERR "[%p %p %d %d]\n",
351 			pipe->bufs[idx].ops,
352 			pipe->bufs[idx].page,
353 			pipe->bufs[idx].offset,
354 			pipe->bufs[idx].len);
355 	WARN_ON(1);
356 	return false;
357 }
358 #else
359 #define sanity(i) true
360 #endif
361 
362 static inline int next_idx(int idx, struct pipe_inode_info *pipe)
363 {
364 	return (idx + 1) & (pipe->buffers - 1);
365 }
366 
367 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
368 			 struct iov_iter *i)
369 {
370 	struct pipe_inode_info *pipe = i->pipe;
371 	struct pipe_buffer *buf;
372 	size_t off;
373 	int idx;
374 
375 	if (unlikely(bytes > i->count))
376 		bytes = i->count;
377 
378 	if (unlikely(!bytes))
379 		return 0;
380 
381 	if (!sanity(i))
382 		return 0;
383 
384 	off = i->iov_offset;
385 	idx = i->idx;
386 	buf = &pipe->bufs[idx];
387 	if (off) {
388 		if (offset == off && buf->page == page) {
389 			/* merge with the last one */
390 			buf->len += bytes;
391 			i->iov_offset += bytes;
392 			goto out;
393 		}
394 		idx = next_idx(idx, pipe);
395 		buf = &pipe->bufs[idx];
396 	}
397 	if (idx == pipe->curbuf && pipe->nrbufs)
398 		return 0;
399 	pipe->nrbufs++;
400 	buf->ops = &page_cache_pipe_buf_ops;
401 	get_page(buf->page = page);
402 	buf->offset = offset;
403 	buf->len = bytes;
404 	i->iov_offset = offset + bytes;
405 	i->idx = idx;
406 out:
407 	i->count -= bytes;
408 	return bytes;
409 }
410 
411 /*
412  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
413  * bytes.  For each iovec, fault in each page that constitutes the iovec.
414  *
415  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
416  * because it is an invalid address).
417  */
418 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
419 {
420 	size_t skip = i->iov_offset;
421 	const struct iovec *iov;
422 	int err;
423 	struct iovec v;
424 
425 	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
426 		iterate_iovec(i, bytes, v, iov, skip, ({
427 			err = fault_in_pages_readable(v.iov_base, v.iov_len);
428 			if (unlikely(err))
429 			return err;
430 		0;}))
431 	}
432 	return 0;
433 }
434 EXPORT_SYMBOL(iov_iter_fault_in_readable);
435 
436 void iov_iter_init(struct iov_iter *i, unsigned int direction,
437 			const struct iovec *iov, unsigned long nr_segs,
438 			size_t count)
439 {
440 	WARN_ON(direction & ~(READ | WRITE));
441 	direction &= READ | WRITE;
442 
443 	/* It will get better.  Eventually... */
444 	if (uaccess_kernel()) {
445 		i->type = ITER_KVEC | direction;
446 		i->kvec = (struct kvec *)iov;
447 	} else {
448 		i->type = ITER_IOVEC | direction;
449 		i->iov = iov;
450 	}
451 	i->nr_segs = nr_segs;
452 	i->iov_offset = 0;
453 	i->count = count;
454 }
455 EXPORT_SYMBOL(iov_iter_init);
456 
457 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
458 {
459 	char *from = kmap_atomic(page);
460 	memcpy(to, from + offset, len);
461 	kunmap_atomic(from);
462 }
463 
464 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
465 {
466 	char *to = kmap_atomic(page);
467 	memcpy(to + offset, from, len);
468 	kunmap_atomic(to);
469 }
470 
471 static void memzero_page(struct page *page, size_t offset, size_t len)
472 {
473 	char *addr = kmap_atomic(page);
474 	memset(addr + offset, 0, len);
475 	kunmap_atomic(addr);
476 }
477 
478 static inline bool allocated(struct pipe_buffer *buf)
479 {
480 	return buf->ops == &default_pipe_buf_ops;
481 }
482 
483 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
484 {
485 	size_t off = i->iov_offset;
486 	int idx = i->idx;
487 	if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
488 		idx = next_idx(idx, i->pipe);
489 		off = 0;
490 	}
491 	*idxp = idx;
492 	*offp = off;
493 }
494 
495 static size_t push_pipe(struct iov_iter *i, size_t size,
496 			int *idxp, size_t *offp)
497 {
498 	struct pipe_inode_info *pipe = i->pipe;
499 	size_t off;
500 	int idx;
501 	ssize_t left;
502 
503 	if (unlikely(size > i->count))
504 		size = i->count;
505 	if (unlikely(!size))
506 		return 0;
507 
508 	left = size;
509 	data_start(i, &idx, &off);
510 	*idxp = idx;
511 	*offp = off;
512 	if (off) {
513 		left -= PAGE_SIZE - off;
514 		if (left <= 0) {
515 			pipe->bufs[idx].len += size;
516 			return size;
517 		}
518 		pipe->bufs[idx].len = PAGE_SIZE;
519 		idx = next_idx(idx, pipe);
520 	}
521 	while (idx != pipe->curbuf || !pipe->nrbufs) {
522 		struct page *page = alloc_page(GFP_USER);
523 		if (!page)
524 			break;
525 		pipe->nrbufs++;
526 		pipe->bufs[idx].ops = &default_pipe_buf_ops;
527 		pipe->bufs[idx].page = page;
528 		pipe->bufs[idx].offset = 0;
529 		if (left <= PAGE_SIZE) {
530 			pipe->bufs[idx].len = left;
531 			return size;
532 		}
533 		pipe->bufs[idx].len = PAGE_SIZE;
534 		left -= PAGE_SIZE;
535 		idx = next_idx(idx, pipe);
536 	}
537 	return size - left;
538 }
539 
540 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
541 				struct iov_iter *i)
542 {
543 	struct pipe_inode_info *pipe = i->pipe;
544 	size_t n, off;
545 	int idx;
546 
547 	if (!sanity(i))
548 		return 0;
549 
550 	bytes = n = push_pipe(i, bytes, &idx, &off);
551 	if (unlikely(!n))
552 		return 0;
553 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
554 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
555 		memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
556 		i->idx = idx;
557 		i->iov_offset = off + chunk;
558 		n -= chunk;
559 		addr += chunk;
560 	}
561 	i->count -= bytes;
562 	return bytes;
563 }
564 
565 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
566 			      __wsum sum, size_t off)
567 {
568 	__wsum next = csum_partial_copy_nocheck(from, to, len, 0);
569 	return csum_block_add(sum, next, off);
570 }
571 
572 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
573 				__wsum *csum, struct iov_iter *i)
574 {
575 	struct pipe_inode_info *pipe = i->pipe;
576 	size_t n, r;
577 	size_t off = 0;
578 	__wsum sum = *csum;
579 	int idx;
580 
581 	if (!sanity(i))
582 		return 0;
583 
584 	bytes = n = push_pipe(i, bytes, &idx, &r);
585 	if (unlikely(!n))
586 		return 0;
587 	for ( ; n; idx = next_idx(idx, pipe), r = 0) {
588 		size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
589 		char *p = kmap_atomic(pipe->bufs[idx].page);
590 		sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
591 		kunmap_atomic(p);
592 		i->idx = idx;
593 		i->iov_offset = r + chunk;
594 		n -= chunk;
595 		off += chunk;
596 		addr += chunk;
597 	}
598 	i->count -= bytes;
599 	*csum = sum;
600 	return bytes;
601 }
602 
603 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
604 {
605 	const char *from = addr;
606 	if (unlikely(iov_iter_is_pipe(i)))
607 		return copy_pipe_to_iter(addr, bytes, i);
608 	if (iter_is_iovec(i))
609 		might_fault();
610 	iterate_and_advance(i, bytes, v,
611 		copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
612 		memcpy_to_page(v.bv_page, v.bv_offset,
613 			       (from += v.bv_len) - v.bv_len, v.bv_len),
614 		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
615 	)
616 
617 	return bytes;
618 }
619 EXPORT_SYMBOL(_copy_to_iter);
620 
621 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
622 static int copyout_mcsafe(void __user *to, const void *from, size_t n)
623 {
624 	if (access_ok(to, n)) {
625 		kasan_check_read(from, n);
626 		n = copy_to_user_mcsafe((__force void *) to, from, n);
627 	}
628 	return n;
629 }
630 
631 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
632 		const char *from, size_t len)
633 {
634 	unsigned long ret;
635 	char *to;
636 
637 	to = kmap_atomic(page);
638 	ret = memcpy_mcsafe(to + offset, from, len);
639 	kunmap_atomic(to);
640 
641 	return ret;
642 }
643 
644 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
645 				struct iov_iter *i)
646 {
647 	struct pipe_inode_info *pipe = i->pipe;
648 	size_t n, off, xfer = 0;
649 	int idx;
650 
651 	if (!sanity(i))
652 		return 0;
653 
654 	bytes = n = push_pipe(i, bytes, &idx, &off);
655 	if (unlikely(!n))
656 		return 0;
657 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
658 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
659 		unsigned long rem;
660 
661 		rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
662 				chunk);
663 		i->idx = idx;
664 		i->iov_offset = off + chunk - rem;
665 		xfer += chunk - rem;
666 		if (rem)
667 			break;
668 		n -= chunk;
669 		addr += chunk;
670 	}
671 	i->count -= xfer;
672 	return xfer;
673 }
674 
675 /**
676  * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
677  * @addr: source kernel address
678  * @bytes: total transfer length
679  * @iter: destination iterator
680  *
681  * The pmem driver arranges for filesystem-dax to use this facility via
682  * dax_copy_to_iter() for protecting read/write to persistent memory.
683  * Unless / until an architecture can guarantee identical performance
684  * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
685  * performance regression to switch more users to the mcsafe version.
686  *
687  * Otherwise, the main differences between this and typical _copy_to_iter().
688  *
689  * * Typical tail/residue handling after a fault retries the copy
690  *   byte-by-byte until the fault happens again. Re-triggering machine
691  *   checks is potentially fatal so the implementation uses source
692  *   alignment and poison alignment assumptions to avoid re-triggering
693  *   hardware exceptions.
694  *
695  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
696  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
697  *   a short copy.
698  *
699  * See MCSAFE_TEST for self-test.
700  */
701 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
702 {
703 	const char *from = addr;
704 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
705 
706 	if (unlikely(iov_iter_is_pipe(i)))
707 		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
708 	if (iter_is_iovec(i))
709 		might_fault();
710 	iterate_and_advance(i, bytes, v,
711 		copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
712 		({
713 		rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
714                                (from += v.bv_len) - v.bv_len, v.bv_len);
715 		if (rem) {
716 			curr_addr = (unsigned long) from;
717 			bytes = curr_addr - s_addr - rem;
718 			return bytes;
719 		}
720 		}),
721 		({
722 		rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
723 				v.iov_len);
724 		if (rem) {
725 			curr_addr = (unsigned long) from;
726 			bytes = curr_addr - s_addr - rem;
727 			return bytes;
728 		}
729 		})
730 	)
731 
732 	return bytes;
733 }
734 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
735 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
736 
737 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
738 {
739 	char *to = addr;
740 	if (unlikely(iov_iter_is_pipe(i))) {
741 		WARN_ON(1);
742 		return 0;
743 	}
744 	if (iter_is_iovec(i))
745 		might_fault();
746 	iterate_and_advance(i, bytes, v,
747 		copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
748 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
749 				 v.bv_offset, v.bv_len),
750 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
751 	)
752 
753 	return bytes;
754 }
755 EXPORT_SYMBOL(_copy_from_iter);
756 
757 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
758 {
759 	char *to = addr;
760 	if (unlikely(iov_iter_is_pipe(i))) {
761 		WARN_ON(1);
762 		return false;
763 	}
764 	if (unlikely(i->count < bytes))
765 		return false;
766 
767 	if (iter_is_iovec(i))
768 		might_fault();
769 	iterate_all_kinds(i, bytes, v, ({
770 		if (copyin((to += v.iov_len) - v.iov_len,
771 				      v.iov_base, v.iov_len))
772 			return false;
773 		0;}),
774 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
775 				 v.bv_offset, v.bv_len),
776 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
777 	)
778 
779 	iov_iter_advance(i, bytes);
780 	return true;
781 }
782 EXPORT_SYMBOL(_copy_from_iter_full);
783 
784 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
785 {
786 	char *to = addr;
787 	if (unlikely(iov_iter_is_pipe(i))) {
788 		WARN_ON(1);
789 		return 0;
790 	}
791 	iterate_and_advance(i, bytes, v,
792 		__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
793 					 v.iov_base, v.iov_len),
794 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
795 				 v.bv_offset, v.bv_len),
796 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
797 	)
798 
799 	return bytes;
800 }
801 EXPORT_SYMBOL(_copy_from_iter_nocache);
802 
803 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
804 /**
805  * _copy_from_iter_flushcache - write destination through cpu cache
806  * @addr: destination kernel address
807  * @bytes: total transfer length
808  * @iter: source iterator
809  *
810  * The pmem driver arranges for filesystem-dax to use this facility via
811  * dax_copy_from_iter() for ensuring that writes to persistent memory
812  * are flushed through the CPU cache. It is differentiated from
813  * _copy_from_iter_nocache() in that guarantees all data is flushed for
814  * all iterator types. The _copy_from_iter_nocache() only attempts to
815  * bypass the cache for the ITER_IOVEC case, and on some archs may use
816  * instructions that strand dirty-data in the cache.
817  */
818 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
819 {
820 	char *to = addr;
821 	if (unlikely(iov_iter_is_pipe(i))) {
822 		WARN_ON(1);
823 		return 0;
824 	}
825 	iterate_and_advance(i, bytes, v,
826 		__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
827 					 v.iov_base, v.iov_len),
828 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
829 				 v.bv_offset, v.bv_len),
830 		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
831 			v.iov_len)
832 	)
833 
834 	return bytes;
835 }
836 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
837 #endif
838 
839 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
840 {
841 	char *to = addr;
842 	if (unlikely(iov_iter_is_pipe(i))) {
843 		WARN_ON(1);
844 		return false;
845 	}
846 	if (unlikely(i->count < bytes))
847 		return false;
848 	iterate_all_kinds(i, bytes, v, ({
849 		if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
850 					     v.iov_base, v.iov_len))
851 			return false;
852 		0;}),
853 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
854 				 v.bv_offset, v.bv_len),
855 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
856 	)
857 
858 	iov_iter_advance(i, bytes);
859 	return true;
860 }
861 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
862 
863 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
864 {
865 	struct page *head;
866 	size_t v = n + offset;
867 
868 	/*
869 	 * The general case needs to access the page order in order
870 	 * to compute the page size.
871 	 * However, we mostly deal with order-0 pages and thus can
872 	 * avoid a possible cache line miss for requests that fit all
873 	 * page orders.
874 	 */
875 	if (n <= v && v <= PAGE_SIZE)
876 		return true;
877 
878 	head = compound_head(page);
879 	v += (page - head) << PAGE_SHIFT;
880 
881 	if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
882 		return true;
883 	WARN_ON(1);
884 	return false;
885 }
886 
887 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
888 			 struct iov_iter *i)
889 {
890 	if (unlikely(!page_copy_sane(page, offset, bytes)))
891 		return 0;
892 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
893 		void *kaddr = kmap_atomic(page);
894 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
895 		kunmap_atomic(kaddr);
896 		return wanted;
897 	} else if (unlikely(iov_iter_is_discard(i)))
898 		return bytes;
899 	else if (likely(!iov_iter_is_pipe(i)))
900 		return copy_page_to_iter_iovec(page, offset, bytes, i);
901 	else
902 		return copy_page_to_iter_pipe(page, offset, bytes, i);
903 }
904 EXPORT_SYMBOL(copy_page_to_iter);
905 
906 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
907 			 struct iov_iter *i)
908 {
909 	if (unlikely(!page_copy_sane(page, offset, bytes)))
910 		return 0;
911 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
912 		WARN_ON(1);
913 		return 0;
914 	}
915 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
916 		void *kaddr = kmap_atomic(page);
917 		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
918 		kunmap_atomic(kaddr);
919 		return wanted;
920 	} else
921 		return copy_page_from_iter_iovec(page, offset, bytes, i);
922 }
923 EXPORT_SYMBOL(copy_page_from_iter);
924 
925 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
926 {
927 	struct pipe_inode_info *pipe = i->pipe;
928 	size_t n, off;
929 	int idx;
930 
931 	if (!sanity(i))
932 		return 0;
933 
934 	bytes = n = push_pipe(i, bytes, &idx, &off);
935 	if (unlikely(!n))
936 		return 0;
937 
938 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
939 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
940 		memzero_page(pipe->bufs[idx].page, off, chunk);
941 		i->idx = idx;
942 		i->iov_offset = off + chunk;
943 		n -= chunk;
944 	}
945 	i->count -= bytes;
946 	return bytes;
947 }
948 
949 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
950 {
951 	if (unlikely(iov_iter_is_pipe(i)))
952 		return pipe_zero(bytes, i);
953 	iterate_and_advance(i, bytes, v,
954 		clear_user(v.iov_base, v.iov_len),
955 		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
956 		memset(v.iov_base, 0, v.iov_len)
957 	)
958 
959 	return bytes;
960 }
961 EXPORT_SYMBOL(iov_iter_zero);
962 
963 size_t iov_iter_copy_from_user_atomic(struct page *page,
964 		struct iov_iter *i, unsigned long offset, size_t bytes)
965 {
966 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
967 	if (unlikely(!page_copy_sane(page, offset, bytes))) {
968 		kunmap_atomic(kaddr);
969 		return 0;
970 	}
971 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
972 		kunmap_atomic(kaddr);
973 		WARN_ON(1);
974 		return 0;
975 	}
976 	iterate_all_kinds(i, bytes, v,
977 		copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
978 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
979 				 v.bv_offset, v.bv_len),
980 		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
981 	)
982 	kunmap_atomic(kaddr);
983 	return bytes;
984 }
985 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
986 
987 static inline void pipe_truncate(struct iov_iter *i)
988 {
989 	struct pipe_inode_info *pipe = i->pipe;
990 	if (pipe->nrbufs) {
991 		size_t off = i->iov_offset;
992 		int idx = i->idx;
993 		int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
994 		if (off) {
995 			pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
996 			idx = next_idx(idx, pipe);
997 			nrbufs++;
998 		}
999 		while (pipe->nrbufs > nrbufs) {
1000 			pipe_buf_release(pipe, &pipe->bufs[idx]);
1001 			idx = next_idx(idx, pipe);
1002 			pipe->nrbufs--;
1003 		}
1004 	}
1005 }
1006 
1007 static void pipe_advance(struct iov_iter *i, size_t size)
1008 {
1009 	struct pipe_inode_info *pipe = i->pipe;
1010 	if (unlikely(i->count < size))
1011 		size = i->count;
1012 	if (size) {
1013 		struct pipe_buffer *buf;
1014 		size_t off = i->iov_offset, left = size;
1015 		int idx = i->idx;
1016 		if (off) /* make it relative to the beginning of buffer */
1017 			left += off - pipe->bufs[idx].offset;
1018 		while (1) {
1019 			buf = &pipe->bufs[idx];
1020 			if (left <= buf->len)
1021 				break;
1022 			left -= buf->len;
1023 			idx = next_idx(idx, pipe);
1024 		}
1025 		i->idx = idx;
1026 		i->iov_offset = buf->offset + left;
1027 	}
1028 	i->count -= size;
1029 	/* ... and discard everything past that point */
1030 	pipe_truncate(i);
1031 }
1032 
1033 void iov_iter_advance(struct iov_iter *i, size_t size)
1034 {
1035 	if (unlikely(iov_iter_is_pipe(i))) {
1036 		pipe_advance(i, size);
1037 		return;
1038 	}
1039 	if (unlikely(iov_iter_is_discard(i))) {
1040 		i->count -= size;
1041 		return;
1042 	}
1043 	iterate_and_advance(i, size, v, 0, 0, 0)
1044 }
1045 EXPORT_SYMBOL(iov_iter_advance);
1046 
1047 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1048 {
1049 	if (!unroll)
1050 		return;
1051 	if (WARN_ON(unroll > MAX_RW_COUNT))
1052 		return;
1053 	i->count += unroll;
1054 	if (unlikely(iov_iter_is_pipe(i))) {
1055 		struct pipe_inode_info *pipe = i->pipe;
1056 		int idx = i->idx;
1057 		size_t off = i->iov_offset;
1058 		while (1) {
1059 			size_t n = off - pipe->bufs[idx].offset;
1060 			if (unroll < n) {
1061 				off -= unroll;
1062 				break;
1063 			}
1064 			unroll -= n;
1065 			if (!unroll && idx == i->start_idx) {
1066 				off = 0;
1067 				break;
1068 			}
1069 			if (!idx--)
1070 				idx = pipe->buffers - 1;
1071 			off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
1072 		}
1073 		i->iov_offset = off;
1074 		i->idx = idx;
1075 		pipe_truncate(i);
1076 		return;
1077 	}
1078 	if (unlikely(iov_iter_is_discard(i)))
1079 		return;
1080 	if (unroll <= i->iov_offset) {
1081 		i->iov_offset -= unroll;
1082 		return;
1083 	}
1084 	unroll -= i->iov_offset;
1085 	if (iov_iter_is_bvec(i)) {
1086 		const struct bio_vec *bvec = i->bvec;
1087 		while (1) {
1088 			size_t n = (--bvec)->bv_len;
1089 			i->nr_segs++;
1090 			if (unroll <= n) {
1091 				i->bvec = bvec;
1092 				i->iov_offset = n - unroll;
1093 				return;
1094 			}
1095 			unroll -= n;
1096 		}
1097 	} else { /* same logics for iovec and kvec */
1098 		const struct iovec *iov = i->iov;
1099 		while (1) {
1100 			size_t n = (--iov)->iov_len;
1101 			i->nr_segs++;
1102 			if (unroll <= n) {
1103 				i->iov = iov;
1104 				i->iov_offset = n - unroll;
1105 				return;
1106 			}
1107 			unroll -= n;
1108 		}
1109 	}
1110 }
1111 EXPORT_SYMBOL(iov_iter_revert);
1112 
1113 /*
1114  * Return the count of just the current iov_iter segment.
1115  */
1116 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1117 {
1118 	if (unlikely(iov_iter_is_pipe(i)))
1119 		return i->count;	// it is a silly place, anyway
1120 	if (i->nr_segs == 1)
1121 		return i->count;
1122 	if (unlikely(iov_iter_is_discard(i)))
1123 		return i->count;
1124 	else if (iov_iter_is_bvec(i))
1125 		return min(i->count, i->bvec->bv_len - i->iov_offset);
1126 	else
1127 		return min(i->count, i->iov->iov_len - i->iov_offset);
1128 }
1129 EXPORT_SYMBOL(iov_iter_single_seg_count);
1130 
1131 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1132 			const struct kvec *kvec, unsigned long nr_segs,
1133 			size_t count)
1134 {
1135 	WARN_ON(direction & ~(READ | WRITE));
1136 	i->type = ITER_KVEC | (direction & (READ | WRITE));
1137 	i->kvec = kvec;
1138 	i->nr_segs = nr_segs;
1139 	i->iov_offset = 0;
1140 	i->count = count;
1141 }
1142 EXPORT_SYMBOL(iov_iter_kvec);
1143 
1144 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1145 			const struct bio_vec *bvec, unsigned long nr_segs,
1146 			size_t count)
1147 {
1148 	WARN_ON(direction & ~(READ | WRITE));
1149 	i->type = ITER_BVEC | (direction & (READ | WRITE));
1150 	i->bvec = bvec;
1151 	i->nr_segs = nr_segs;
1152 	i->iov_offset = 0;
1153 	i->count = count;
1154 }
1155 EXPORT_SYMBOL(iov_iter_bvec);
1156 
1157 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1158 			struct pipe_inode_info *pipe,
1159 			size_t count)
1160 {
1161 	BUG_ON(direction != READ);
1162 	WARN_ON(pipe->nrbufs == pipe->buffers);
1163 	i->type = ITER_PIPE | READ;
1164 	i->pipe = pipe;
1165 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1166 	i->iov_offset = 0;
1167 	i->count = count;
1168 	i->start_idx = i->idx;
1169 }
1170 EXPORT_SYMBOL(iov_iter_pipe);
1171 
1172 /**
1173  * iov_iter_discard - Initialise an I/O iterator that discards data
1174  * @i: The iterator to initialise.
1175  * @direction: The direction of the transfer.
1176  * @count: The size of the I/O buffer in bytes.
1177  *
1178  * Set up an I/O iterator that just discards everything that's written to it.
1179  * It's only available as a READ iterator.
1180  */
1181 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1182 {
1183 	BUG_ON(direction != READ);
1184 	i->type = ITER_DISCARD | READ;
1185 	i->count = count;
1186 	i->iov_offset = 0;
1187 }
1188 EXPORT_SYMBOL(iov_iter_discard);
1189 
1190 unsigned long iov_iter_alignment(const struct iov_iter *i)
1191 {
1192 	unsigned long res = 0;
1193 	size_t size = i->count;
1194 
1195 	if (unlikely(iov_iter_is_pipe(i))) {
1196 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
1197 			return size | i->iov_offset;
1198 		return size;
1199 	}
1200 	iterate_all_kinds(i, size, v,
1201 		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
1202 		res |= v.bv_offset | v.bv_len,
1203 		res |= (unsigned long)v.iov_base | v.iov_len
1204 	)
1205 	return res;
1206 }
1207 EXPORT_SYMBOL(iov_iter_alignment);
1208 
1209 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1210 {
1211 	unsigned long res = 0;
1212 	size_t size = i->count;
1213 
1214 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1215 		WARN_ON(1);
1216 		return ~0U;
1217 	}
1218 
1219 	iterate_all_kinds(i, size, v,
1220 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1221 			(size != v.iov_len ? size : 0), 0),
1222 		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1223 			(size != v.bv_len ? size : 0)),
1224 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1225 			(size != v.iov_len ? size : 0))
1226 		);
1227 	return res;
1228 }
1229 EXPORT_SYMBOL(iov_iter_gap_alignment);
1230 
1231 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1232 				size_t maxsize,
1233 				struct page **pages,
1234 				int idx,
1235 				size_t *start)
1236 {
1237 	struct pipe_inode_info *pipe = i->pipe;
1238 	ssize_t n = push_pipe(i, maxsize, &idx, start);
1239 	if (!n)
1240 		return -EFAULT;
1241 
1242 	maxsize = n;
1243 	n += *start;
1244 	while (n > 0) {
1245 		get_page(*pages++ = pipe->bufs[idx].page);
1246 		idx = next_idx(idx, pipe);
1247 		n -= PAGE_SIZE;
1248 	}
1249 
1250 	return maxsize;
1251 }
1252 
1253 static ssize_t pipe_get_pages(struct iov_iter *i,
1254 		   struct page **pages, size_t maxsize, unsigned maxpages,
1255 		   size_t *start)
1256 {
1257 	unsigned npages;
1258 	size_t capacity;
1259 	int idx;
1260 
1261 	if (!maxsize)
1262 		return 0;
1263 
1264 	if (!sanity(i))
1265 		return -EFAULT;
1266 
1267 	data_start(i, &idx, start);
1268 	/* some of this one + all after this one */
1269 	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1270 	capacity = min(npages,maxpages) * PAGE_SIZE - *start;
1271 
1272 	return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
1273 }
1274 
1275 ssize_t iov_iter_get_pages(struct iov_iter *i,
1276 		   struct page **pages, size_t maxsize, unsigned maxpages,
1277 		   size_t *start)
1278 {
1279 	if (maxsize > i->count)
1280 		maxsize = i->count;
1281 
1282 	if (unlikely(iov_iter_is_pipe(i)))
1283 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
1284 	if (unlikely(iov_iter_is_discard(i)))
1285 		return -EFAULT;
1286 
1287 	iterate_all_kinds(i, maxsize, v, ({
1288 		unsigned long addr = (unsigned long)v.iov_base;
1289 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1290 		int n;
1291 		int res;
1292 
1293 		if (len > maxpages * PAGE_SIZE)
1294 			len = maxpages * PAGE_SIZE;
1295 		addr &= ~(PAGE_SIZE - 1);
1296 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1297 		res = get_user_pages_fast(addr, n,
1298 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1299 				pages);
1300 		if (unlikely(res < 0))
1301 			return res;
1302 		return (res == n ? len : res * PAGE_SIZE) - *start;
1303 	0;}),({
1304 		/* can't be more than PAGE_SIZE */
1305 		*start = v.bv_offset;
1306 		get_page(*pages = v.bv_page);
1307 		return v.bv_len;
1308 	}),({
1309 		return -EFAULT;
1310 	})
1311 	)
1312 	return 0;
1313 }
1314 EXPORT_SYMBOL(iov_iter_get_pages);
1315 
1316 static struct page **get_pages_array(size_t n)
1317 {
1318 	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1319 }
1320 
1321 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1322 		   struct page ***pages, size_t maxsize,
1323 		   size_t *start)
1324 {
1325 	struct page **p;
1326 	ssize_t n;
1327 	int idx;
1328 	int npages;
1329 
1330 	if (!maxsize)
1331 		return 0;
1332 
1333 	if (!sanity(i))
1334 		return -EFAULT;
1335 
1336 	data_start(i, &idx, start);
1337 	/* some of this one + all after this one */
1338 	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1339 	n = npages * PAGE_SIZE - *start;
1340 	if (maxsize > n)
1341 		maxsize = n;
1342 	else
1343 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1344 	p = get_pages_array(npages);
1345 	if (!p)
1346 		return -ENOMEM;
1347 	n = __pipe_get_pages(i, maxsize, p, idx, start);
1348 	if (n > 0)
1349 		*pages = p;
1350 	else
1351 		kvfree(p);
1352 	return n;
1353 }
1354 
1355 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1356 		   struct page ***pages, size_t maxsize,
1357 		   size_t *start)
1358 {
1359 	struct page **p;
1360 
1361 	if (maxsize > i->count)
1362 		maxsize = i->count;
1363 
1364 	if (unlikely(iov_iter_is_pipe(i)))
1365 		return pipe_get_pages_alloc(i, pages, maxsize, start);
1366 	if (unlikely(iov_iter_is_discard(i)))
1367 		return -EFAULT;
1368 
1369 	iterate_all_kinds(i, maxsize, v, ({
1370 		unsigned long addr = (unsigned long)v.iov_base;
1371 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1372 		int n;
1373 		int res;
1374 
1375 		addr &= ~(PAGE_SIZE - 1);
1376 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1377 		p = get_pages_array(n);
1378 		if (!p)
1379 			return -ENOMEM;
1380 		res = get_user_pages_fast(addr, n,
1381 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1382 		if (unlikely(res < 0)) {
1383 			kvfree(p);
1384 			return res;
1385 		}
1386 		*pages = p;
1387 		return (res == n ? len : res * PAGE_SIZE) - *start;
1388 	0;}),({
1389 		/* can't be more than PAGE_SIZE */
1390 		*start = v.bv_offset;
1391 		*pages = p = get_pages_array(1);
1392 		if (!p)
1393 			return -ENOMEM;
1394 		get_page(*p = v.bv_page);
1395 		return v.bv_len;
1396 	}),({
1397 		return -EFAULT;
1398 	})
1399 	)
1400 	return 0;
1401 }
1402 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1403 
1404 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1405 			       struct iov_iter *i)
1406 {
1407 	char *to = addr;
1408 	__wsum sum, next;
1409 	size_t off = 0;
1410 	sum = *csum;
1411 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1412 		WARN_ON(1);
1413 		return 0;
1414 	}
1415 	iterate_and_advance(i, bytes, v, ({
1416 		int err = 0;
1417 		next = csum_and_copy_from_user(v.iov_base,
1418 					       (to += v.iov_len) - v.iov_len,
1419 					       v.iov_len, 0, &err);
1420 		if (!err) {
1421 			sum = csum_block_add(sum, next, off);
1422 			off += v.iov_len;
1423 		}
1424 		err ? v.iov_len : 0;
1425 	}), ({
1426 		char *p = kmap_atomic(v.bv_page);
1427 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1428 				      p + v.bv_offset, v.bv_len,
1429 				      sum, off);
1430 		kunmap_atomic(p);
1431 		off += v.bv_len;
1432 	}),({
1433 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1434 				      v.iov_base, v.iov_len,
1435 				      sum, off);
1436 		off += v.iov_len;
1437 	})
1438 	)
1439 	*csum = sum;
1440 	return bytes;
1441 }
1442 EXPORT_SYMBOL(csum_and_copy_from_iter);
1443 
1444 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1445 			       struct iov_iter *i)
1446 {
1447 	char *to = addr;
1448 	__wsum sum, next;
1449 	size_t off = 0;
1450 	sum = *csum;
1451 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1452 		WARN_ON(1);
1453 		return false;
1454 	}
1455 	if (unlikely(i->count < bytes))
1456 		return false;
1457 	iterate_all_kinds(i, bytes, v, ({
1458 		int err = 0;
1459 		next = csum_and_copy_from_user(v.iov_base,
1460 					       (to += v.iov_len) - v.iov_len,
1461 					       v.iov_len, 0, &err);
1462 		if (err)
1463 			return false;
1464 		sum = csum_block_add(sum, next, off);
1465 		off += v.iov_len;
1466 		0;
1467 	}), ({
1468 		char *p = kmap_atomic(v.bv_page);
1469 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1470 				      p + v.bv_offset, v.bv_len,
1471 				      sum, off);
1472 		kunmap_atomic(p);
1473 		off += v.bv_len;
1474 	}),({
1475 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1476 				      v.iov_base, v.iov_len,
1477 				      sum, off);
1478 		off += v.iov_len;
1479 	})
1480 	)
1481 	*csum = sum;
1482 	iov_iter_advance(i, bytes);
1483 	return true;
1484 }
1485 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1486 
1487 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
1488 			     struct iov_iter *i)
1489 {
1490 	const char *from = addr;
1491 	__wsum *csum = csump;
1492 	__wsum sum, next;
1493 	size_t off = 0;
1494 
1495 	if (unlikely(iov_iter_is_pipe(i)))
1496 		return csum_and_copy_to_pipe_iter(addr, bytes, csum, i);
1497 
1498 	sum = *csum;
1499 	if (unlikely(iov_iter_is_discard(i))) {
1500 		WARN_ON(1);	/* for now */
1501 		return 0;
1502 	}
1503 	iterate_and_advance(i, bytes, v, ({
1504 		int err = 0;
1505 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1506 					     v.iov_base,
1507 					     v.iov_len, 0, &err);
1508 		if (!err) {
1509 			sum = csum_block_add(sum, next, off);
1510 			off += v.iov_len;
1511 		}
1512 		err ? v.iov_len : 0;
1513 	}), ({
1514 		char *p = kmap_atomic(v.bv_page);
1515 		sum = csum_and_memcpy(p + v.bv_offset,
1516 				      (from += v.bv_len) - v.bv_len,
1517 				      v.bv_len, sum, off);
1518 		kunmap_atomic(p);
1519 		off += v.bv_len;
1520 	}),({
1521 		sum = csum_and_memcpy(v.iov_base,
1522 				     (from += v.iov_len) - v.iov_len,
1523 				     v.iov_len, sum, off);
1524 		off += v.iov_len;
1525 	})
1526 	)
1527 	*csum = sum;
1528 	return bytes;
1529 }
1530 EXPORT_SYMBOL(csum_and_copy_to_iter);
1531 
1532 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1533 		struct iov_iter *i)
1534 {
1535 #ifdef CONFIG_CRYPTO
1536 	struct ahash_request *hash = hashp;
1537 	struct scatterlist sg;
1538 	size_t copied;
1539 
1540 	copied = copy_to_iter(addr, bytes, i);
1541 	sg_init_one(&sg, addr, copied);
1542 	ahash_request_set_crypt(hash, &sg, NULL, copied);
1543 	crypto_ahash_update(hash);
1544 	return copied;
1545 #else
1546 	return 0;
1547 #endif
1548 }
1549 EXPORT_SYMBOL(hash_and_copy_to_iter);
1550 
1551 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1552 {
1553 	size_t size = i->count;
1554 	int npages = 0;
1555 
1556 	if (!size)
1557 		return 0;
1558 	if (unlikely(iov_iter_is_discard(i)))
1559 		return 0;
1560 
1561 	if (unlikely(iov_iter_is_pipe(i))) {
1562 		struct pipe_inode_info *pipe = i->pipe;
1563 		size_t off;
1564 		int idx;
1565 
1566 		if (!sanity(i))
1567 			return 0;
1568 
1569 		data_start(i, &idx, &off);
1570 		/* some of this one + all after this one */
1571 		npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1572 		if (npages >= maxpages)
1573 			return maxpages;
1574 	} else iterate_all_kinds(i, size, v, ({
1575 		unsigned long p = (unsigned long)v.iov_base;
1576 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1577 			- p / PAGE_SIZE;
1578 		if (npages >= maxpages)
1579 			return maxpages;
1580 	0;}),({
1581 		npages++;
1582 		if (npages >= maxpages)
1583 			return maxpages;
1584 	}),({
1585 		unsigned long p = (unsigned long)v.iov_base;
1586 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1587 			- p / PAGE_SIZE;
1588 		if (npages >= maxpages)
1589 			return maxpages;
1590 	})
1591 	)
1592 	return npages;
1593 }
1594 EXPORT_SYMBOL(iov_iter_npages);
1595 
1596 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1597 {
1598 	*new = *old;
1599 	if (unlikely(iov_iter_is_pipe(new))) {
1600 		WARN_ON(1);
1601 		return NULL;
1602 	}
1603 	if (unlikely(iov_iter_is_discard(new)))
1604 		return NULL;
1605 	if (iov_iter_is_bvec(new))
1606 		return new->bvec = kmemdup(new->bvec,
1607 				    new->nr_segs * sizeof(struct bio_vec),
1608 				    flags);
1609 	else
1610 		/* iovec and kvec have identical layout */
1611 		return new->iov = kmemdup(new->iov,
1612 				   new->nr_segs * sizeof(struct iovec),
1613 				   flags);
1614 }
1615 EXPORT_SYMBOL(dup_iter);
1616 
1617 /**
1618  * import_iovec() - Copy an array of &struct iovec from userspace
1619  *     into the kernel, check that it is valid, and initialize a new
1620  *     &struct iov_iter iterator to access it.
1621  *
1622  * @type: One of %READ or %WRITE.
1623  * @uvector: Pointer to the userspace array.
1624  * @nr_segs: Number of elements in userspace array.
1625  * @fast_segs: Number of elements in @iov.
1626  * @iov: (input and output parameter) Pointer to pointer to (usually small
1627  *     on-stack) kernel array.
1628  * @i: Pointer to iterator that will be initialized on success.
1629  *
1630  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1631  * then this function places %NULL in *@iov on return. Otherwise, a new
1632  * array will be allocated and the result placed in *@iov. This means that
1633  * the caller may call kfree() on *@iov regardless of whether the small
1634  * on-stack array was used or not (and regardless of whether this function
1635  * returns an error or not).
1636  *
1637  * Return: 0 on success or negative error code on error.
1638  */
1639 int import_iovec(int type, const struct iovec __user * uvector,
1640 		 unsigned nr_segs, unsigned fast_segs,
1641 		 struct iovec **iov, struct iov_iter *i)
1642 {
1643 	ssize_t n;
1644 	struct iovec *p;
1645 	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1646 				  *iov, &p);
1647 	if (n < 0) {
1648 		if (p != *iov)
1649 			kfree(p);
1650 		*iov = NULL;
1651 		return n;
1652 	}
1653 	iov_iter_init(i, type, p, nr_segs, n);
1654 	*iov = p == *iov ? NULL : p;
1655 	return 0;
1656 }
1657 EXPORT_SYMBOL(import_iovec);
1658 
1659 #ifdef CONFIG_COMPAT
1660 #include <linux/compat.h>
1661 
1662 int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
1663 		 unsigned nr_segs, unsigned fast_segs,
1664 		 struct iovec **iov, struct iov_iter *i)
1665 {
1666 	ssize_t n;
1667 	struct iovec *p;
1668 	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1669 				  *iov, &p);
1670 	if (n < 0) {
1671 		if (p != *iov)
1672 			kfree(p);
1673 		*iov = NULL;
1674 		return n;
1675 	}
1676 	iov_iter_init(i, type, p, nr_segs, n);
1677 	*iov = p == *iov ? NULL : p;
1678 	return 0;
1679 }
1680 #endif
1681 
1682 int import_single_range(int rw, void __user *buf, size_t len,
1683 		 struct iovec *iov, struct iov_iter *i)
1684 {
1685 	if (len > MAX_RW_COUNT)
1686 		len = MAX_RW_COUNT;
1687 	if (unlikely(!access_ok(buf, len)))
1688 		return -EFAULT;
1689 
1690 	iov->iov_base = buf;
1691 	iov->iov_len = len;
1692 	iov_iter_init(i, rw, iov, 1, len);
1693 	return 0;
1694 }
1695 EXPORT_SYMBOL(import_single_range);
1696 
1697 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1698 			    int (*f)(struct kvec *vec, void *context),
1699 			    void *context)
1700 {
1701 	struct kvec w;
1702 	int err = -EINVAL;
1703 	if (!bytes)
1704 		return 0;
1705 
1706 	iterate_all_kinds(i, bytes, v, -EINVAL, ({
1707 		w.iov_base = kmap(v.bv_page) + v.bv_offset;
1708 		w.iov_len = v.bv_len;
1709 		err = f(&w, context);
1710 		kunmap(v.bv_page);
1711 		err;}), ({
1712 		w = v;
1713 		err = f(&w, context);})
1714 	)
1715 	return err;
1716 }
1717 EXPORT_SYMBOL(iov_iter_for_each_range);
1718