xref: /illumos-gate/usr/src/uts/common/syscall/sendfile.c (revision e116a42f06336d303f479428ee38eaf8006bb105)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/buf.h>
32 #include <sys/conf.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/sysmacros.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/time.h>
41 #include <sys/file.h>
42 #include <sys/open.h>
43 #include <sys/user.h>
44 #include <sys/termios.h>
45 #include <sys/stream.h>
46 #include <sys/strsubr.h>
47 #include <sys/sunddi.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/cmn_err.h>
52 #include <sys/vmsystm.h>
53 
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 
57 #include <netinet/in.h>
58 #include <sys/sendfile.h>
59 #include <sys/un.h>
60 #include <sys/tihdr.h>
61 #include <sys/atomic.h>
62 
63 #include <inet/common.h>
64 #include <inet/ip.h>
65 #include <inet/ip6.h>
66 #include <inet/tcp.h>
67 
68 extern int sosendfile64(file_t *, file_t *, const struct ksendfilevec64 *,
69 		ssize32_t *);
70 extern int nl7c_sendfilev(struct sonode *, u_offset_t *, struct sendfilevec *,
71 		int, ssize_t *);
72 extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, ssize_t *,
73 		boolean_t);
74 
75 #define	readflg	(V_WRITELOCK_FALSE)
76 #define	rwflag	(V_WRITELOCK_TRUE)
77 
78 /*
79  * kstrwritemp() has very similar semantics as that of strwrite().
80  * The main difference is it obtains mblks from the caller and also
81  * does not do any copy as done in strwrite() from user buffers to
82  * kernel buffers.
83  *
84  * Currently, this routine is used by sendfile to send data allocated
85  * within the kernel without any copying. This interface does not use the
86  * synchronous stream interface as synch. stream interface implies
87  * copying.
88  */
89 int
90 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
91 {
92 	struct stdata *stp;
93 	struct queue *wqp;
94 	mblk_t *newmp;
95 	char waitflag;
96 	int tempmode;
97 	int error = 0;
98 	int done = 0;
99 	struct sonode *so;
100 	boolean_t direct;
101 
102 	ASSERT(vp->v_stream);
103 	stp = vp->v_stream;
104 
105 	so = VTOSO(vp);
106 	direct = (so->so_state & SS_DIRECT);
107 
108 	/*
109 	 * This is the sockfs direct fast path. canputnext() need
110 	 * not be accurate so we don't grab the sd_lock here. If
111 	 * we get flow-controlled, we grab sd_lock just before the
112 	 * do..while loop below to emulate what strwrite() does.
113 	 */
114 	wqp = stp->sd_wrq;
115 	if (canputnext(wqp) && direct &&
116 	    !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
117 		return (sostream_direct(so, NULL, mp, CRED()));
118 	} else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
119 		/* Fast check of flags before acquiring the lock */
120 		mutex_enter(&stp->sd_lock);
121 		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
122 		mutex_exit(&stp->sd_lock);
123 		if (error != 0) {
124 			if (!(stp->sd_flag & STPLEX) &&
125 			    (stp->sd_wput_opt & SW_SIGPIPE)) {
126 				tsignal(curthread, SIGPIPE);
127 				error = EPIPE;
128 			}
129 			return (error);
130 		}
131 	}
132 
133 	waitflag = WRITEWAIT;
134 	if (stp->sd_flag & OLDNDELAY)
135 		tempmode = fmode & ~FNDELAY;
136 	else
137 		tempmode = fmode;
138 
139 	mutex_enter(&stp->sd_lock);
140 	do {
141 		if (canputnext(wqp)) {
142 			mutex_exit(&stp->sd_lock);
143 			if (stp->sd_wputdatafunc != NULL) {
144 				newmp = (stp->sd_wputdatafunc)(vp, mp, NULL,
145 				    NULL, NULL, NULL);
146 				if (newmp == NULL) {
147 					/* The caller will free mp */
148 					return (ECOMM);
149 				}
150 				mp = newmp;
151 			}
152 			putnext(wqp, mp);
153 			return (0);
154 		}
155 		error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
156 		    &done);
157 	} while (error == 0 && !done);
158 
159 	mutex_exit(&stp->sd_lock);
160 	/*
161 	 * EAGAIN tells the application to try again. ENOMEM
162 	 * is returned only if the memory allocation size
163 	 * exceeds the physical limits of the system. ENOMEM
164 	 * can't be true here.
165 	 */
166 	if (error == ENOMEM)
167 		error = EAGAIN;
168 	return (error);
169 }
170 
171 #define	SEND_MAX_CHUNK	16
172 
173 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
174 /*
175  * 64 bit offsets for 32 bit applications only running either on
176  * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
177  * more than 2GB of data.
178  */
179 int
180 sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
181     int copy_cnt, ssize32_t *count)
182 {
183 	struct vnode *vp;
184 	ushort_t fflag;
185 	int ioflag;
186 	size32_t cnt;
187 	ssize32_t sfv_len;
188 	ssize32_t tmpcount;
189 	u_offset_t sfv_off;
190 	struct uio auio;
191 	struct iovec aiov;
192 	int i, error;
193 
194 	fflag = fp->f_flag;
195 	vp = fp->f_vnode;
196 	for (i = 0; i < copy_cnt; i++) {
197 
198 		if (ISSIG(curthread, JUSTLOOKING))
199 			return (EINTR);
200 
201 		/*
202 		 * Do similar checks as "write" as we are writing
203 		 * sfv_len bytes into "vp".
204 		 */
205 		sfv_len = (ssize32_t)sfv->sfv_len;
206 
207 		if (sfv_len == 0) {
208 			sfv++;
209 			continue;
210 		}
211 
212 		if (sfv_len < 0)
213 			return (EINVAL);
214 
215 		if (vp->v_type == VREG) {
216 			if (*fileoff >= curproc->p_fsz_ctl) {
217 				mutex_enter(&curproc->p_lock);
218 				(void) rctl_action(
219 				    rctlproc_legacy[RLIMIT_FSIZE],
220 				    curproc->p_rctls, curproc, RCA_SAFE);
221 				mutex_exit(&curproc->p_lock);
222 				return (EFBIG);
223 			}
224 
225 			if (*fileoff >= OFFSET_MAX(fp))
226 				return (EFBIG);
227 
228 			if (*fileoff + sfv_len > OFFSET_MAX(fp))
229 				return (EINVAL);
230 		}
231 
232 		tmpcount = *count + sfv_len;
233 		if (tmpcount < 0)
234 			return (EINVAL);
235 
236 		sfv_off = sfv->sfv_off;
237 
238 		auio.uio_extflg = UIO_COPY_DEFAULT;
239 		if (sfv->sfv_fd == SFV_FD_SELF) {
240 			aiov.iov_len = sfv_len;
241 			aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
242 			auio.uio_loffset = *fileoff;
243 			auio.uio_iovcnt = 1;
244 			auio.uio_resid = sfv_len;
245 			auio.uio_iov = &aiov;
246 			auio.uio_segflg = UIO_USERSPACE;
247 			auio.uio_llimit = curproc->p_fsz_ctl;
248 			auio.uio_fmode = fflag;
249 			ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
250 			while (sfv_len > 0) {
251 				error = VOP_WRITE(vp, &auio, ioflag,
252 				    fp->f_cred, NULL);
253 				cnt = sfv_len - auio.uio_resid;
254 				sfv_len -= cnt;
255 				ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
256 				if (vp->v_type == VREG)
257 					*fileoff += cnt;
258 				*count += cnt;
259 				if (error != 0)
260 					return (error);
261 			}
262 		} else {
263 			file_t	*ffp;
264 			vnode_t	*readvp;
265 			size_t	size;
266 			caddr_t	ptr;
267 
268 			if ((ffp = getf(sfv->sfv_fd)) == NULL)
269 				return (EBADF);
270 
271 			if ((ffp->f_flag & FREAD) == 0) {
272 				releasef(sfv->sfv_fd);
273 				return (EBADF);
274 			}
275 
276 			readvp = ffp->f_vnode;
277 			if (readvp->v_type != VREG) {
278 				releasef(sfv->sfv_fd);
279 				return (EINVAL);
280 			}
281 
282 			/*
283 			 * No point reading and writing to same vp,
284 			 * as long as both are regular files. readvp is not
285 			 * locked; but since we got it from an open file the
286 			 * contents will be valid during the time of access.
287 			 */
288 			if (vn_compare(vp, readvp)) {
289 				releasef(sfv->sfv_fd);
290 				return (EINVAL);
291 			}
292 
293 			/*
294 			 * Note: we assume readvp != vp. "vp" is already
295 			 * locked, and "readvp" must not be.
296 			 */
297 			(void) VOP_RWLOCK(readvp, readflg, NULL);
298 
299 			/*
300 			 * Same checks as in pread64.
301 			 */
302 			if (sfv_off > MAXOFFSET_T) {
303 				VOP_RWUNLOCK(readvp, readflg, NULL);
304 				releasef(sfv->sfv_fd);
305 				return (EINVAL);
306 			}
307 
308 			if (sfv_off + sfv_len > MAXOFFSET_T)
309 				sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off);
310 
311 			/* Find the native blocksize to transfer data */
312 			size = MIN(vp->v_vfsp->vfs_bsize,
313 			    readvp->v_vfsp->vfs_bsize);
314 			size = sfv_len < size ? sfv_len : size;
315 			ptr = kmem_alloc(size, KM_SLEEP);
316 
317 			while (sfv_len > 0) {
318 				size_t	iov_len;
319 
320 				iov_len = MIN(size, sfv_len);
321 				aiov.iov_base = ptr;
322 				aiov.iov_len = iov_len;
323 				auio.uio_loffset = sfv_off;
324 				auio.uio_iov = &aiov;
325 				auio.uio_iovcnt = 1;
326 				auio.uio_resid = iov_len;
327 				auio.uio_segflg = UIO_SYSSPACE;
328 				auio.uio_llimit = MAXOFFSET_T;
329 				auio.uio_fmode = ffp->f_flag;
330 				ioflag = auio.uio_fmode &
331 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
332 
333 				/*
334 				 * If read sync is not asked for,
335 				 * filter sync flags
336 				 */
337 				if ((ioflag & FRSYNC) == 0)
338 					ioflag &= ~(FSYNC|FDSYNC);
339 				error = VOP_READ(readvp, &auio, ioflag,
340 				    fp->f_cred, NULL);
341 				if (error) {
342 					kmem_free(ptr, size);
343 					VOP_RWUNLOCK(readvp, readflg, NULL);
344 					releasef(sfv->sfv_fd);
345 					return (error);
346 				}
347 
348 				/*
349 				 * Check how must data was really read.
350 				 * Decrement the 'len' and increment the
351 				 * 'off' appropriately.
352 				 */
353 				cnt = iov_len - auio.uio_resid;
354 				if (cnt == 0) {
355 					/*
356 					 * If we were reading a pipe (currently
357 					 * not implemented), we may now lose
358 					 * data.
359 					 */
360 					kmem_free(ptr, size);
361 					VOP_RWUNLOCK(readvp, readflg, NULL);
362 					releasef(sfv->sfv_fd);
363 					return (EINVAL);
364 				}
365 				sfv_len -= cnt;
366 				sfv_off += cnt;
367 
368 				aiov.iov_base = ptr;
369 				aiov.iov_len = cnt;
370 				auio.uio_loffset = *fileoff;
371 				auio.uio_iov = &aiov;
372 				auio.uio_iovcnt = 1;
373 				auio.uio_resid = cnt;
374 				auio.uio_segflg = UIO_SYSSPACE;
375 				auio.uio_llimit = curproc->p_fsz_ctl;
376 				auio.uio_fmode = fflag;
377 				ioflag = auio.uio_fmode &
378 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
379 				error = VOP_WRITE(vp, &auio, ioflag,
380 				    fp->f_cred, NULL);
381 
382 				/*
383 				 * Check how much data was written. Increment
384 				 * the 'len' and decrement the 'off' if all
385 				 * the data was not written.
386 				 */
387 				cnt -= auio.uio_resid;
388 				sfv_len += auio.uio_resid;
389 				sfv_off -= auio.uio_resid;
390 				ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
391 				if (vp->v_type == VREG)
392 					*fileoff += cnt;
393 				*count += cnt;
394 				if (error != 0) {
395 					kmem_free(ptr, size);
396 					VOP_RWUNLOCK(readvp, readflg, NULL);
397 					releasef(sfv->sfv_fd);
398 					return (error);
399 				}
400 			}
401 			VOP_RWUNLOCK(readvp, readflg, NULL);
402 			releasef(sfv->sfv_fd);
403 			kmem_free(ptr, size);
404 		}
405 		sfv++;
406 	}
407 	return (0);
408 }
409 
410 ssize32_t
411 sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
412 	size32_t *xferred, int fildes)
413 {
414 	u_offset_t		fileoff;
415 	int			copy_cnt;
416 	const struct ksendfilevec64 *copy_vec;
417 	struct ksendfilevec64 sfv[SEND_MAX_CHUNK];
418 	struct vnode *vp;
419 	int error;
420 	ssize32_t count = 0;
421 
422 	vp = fp->f_vnode;
423 	(void) VOP_RWLOCK(vp, rwflag, NULL);
424 
425 	copy_vec = vec;
426 	fileoff = fp->f_offset;
427 
428 	do {
429 		copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
430 		if (copyin(copy_vec, sfv, copy_cnt *
431 		    sizeof (struct ksendfilevec64))) {
432 			error = EFAULT;
433 			break;
434 		}
435 
436 		/*
437 		 * Optimize the regular file over
438 		 * the socket case.
439 		 */
440 		if (vp->v_type == VSOCK && sfv->sfv_fd != SFV_FD_SELF) {
441 			file_t *rfp;
442 			vnode_t *rvp;
443 
444 			if ((rfp = getf(sfv->sfv_fd)) == NULL) {
445 				error = EBADF;
446 				break;
447 			}
448 			if ((rfp->f_flag & FREAD) == 0) {
449 				releasef(sfv->sfv_fd);
450 				error = EBADF;
451 				break;
452 			}
453 			rvp = rfp->f_vnode;
454 			if (rvp->v_type == VREG) {
455 				error = sosendfile64(fp, rfp, sfv, &count);
456 				if (error)
457 					break;
458 				copy_vec++;
459 				sfvcnt--;
460 				continue;
461 			}
462 			releasef(sfv->sfv_fd);
463 		}
464 		error = sendvec_chunk64(fp, &fileoff, sfv, copy_cnt, &count);
465 		if (error != 0)
466 			break;
467 
468 		copy_vec += copy_cnt;
469 		sfvcnt -= copy_cnt;
470 	} while (sfvcnt > 0);
471 
472 	if (vp->v_type == VREG)
473 		fp->f_offset += count;
474 
475 	VOP_RWUNLOCK(vp, rwflag, NULL);
476 	if (copyout(&count, xferred, sizeof (count)))
477 		error = EFAULT;
478 	releasef(fildes);
479 	if (error != 0)
480 		return (set_errno(error));
481 	return (count);
482 }
483 #endif
484 
485 int
486 sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
487     int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
488 {
489 	struct vnode *vp;
490 	struct uio auio;
491 	struct iovec aiov;
492 	ushort_t fflag;
493 	int ioflag;
494 	int i, error;
495 	size_t cnt;
496 	ssize_t sfv_len;
497 	u_offset_t sfv_off;
498 #ifdef _SYSCALL32_IMPL
499 	model_t model = get_udatamodel();
500 	u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
501 	    MAXOFF32_T : MAXOFFSET_T;
502 #else
503 	const u_offset_t maxoff = MAXOFF32_T;
504 #endif
505 	mblk_t *dmp = NULL;
506 	int wroff;
507 	int buf_left = 0;
508 	size_t	iov_len;
509 	mblk_t  *head, *tmp;
510 	size_t  size = total_size;
511 	size_t  extra;
512 	int tail_len;
513 
514 	fflag = fp->f_flag;
515 	vp = fp->f_vnode;
516 
517 	ASSERT(vp->v_type == VSOCK);
518 	ASSERT(maxblk > 0);
519 
520 	/* If nothing to send, return */
521 	if (total_size == 0)
522 		return (0);
523 
524 	wroff = (int)vp->v_stream->sd_wroff;
525 	tail_len = (int)vp->v_stream->sd_tail;
526 	extra = wroff + tail_len;
527 
528 	buf_left = MIN(total_size, maxblk);
529 	head = dmp = allocb(buf_left + extra, BPRI_HI);
530 	if (head == NULL)
531 		return (ENOMEM);
532 	head->b_wptr = head->b_rptr = head->b_rptr + wroff;
533 
534 	auio.uio_extflg = UIO_COPY_DEFAULT;
535 	for (i = 0; i < copy_cnt; i++) {
536 		if (ISSIG(curthread, JUSTLOOKING)) {
537 			freemsg(head);
538 			return (EINTR);
539 		}
540 
541 		/*
542 		 * Do similar checks as "write" as we are writing
543 		 * sfv_len bytes into "vp".
544 		 */
545 		sfv_len = (ssize_t)sfv->sfv_len;
546 
547 		if (sfv_len == 0) {
548 			sfv++;
549 			continue;
550 		}
551 
552 		/* Check for overflow */
553 #ifdef _SYSCALL32_IMPL
554 		if (model == DATAMODEL_ILP32) {
555 			if (((ssize32_t)(*count + sfv_len)) < 0) {
556 				freemsg(head);
557 				return (EINVAL);
558 			}
559 		} else
560 #endif
561 		if ((*count + sfv_len) < 0) {
562 			freemsg(head);
563 			return (EINVAL);
564 		}
565 
566 		sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
567 
568 		if (sfv->sfv_fd == SFV_FD_SELF) {
569 			while (sfv_len > 0) {
570 				if (buf_left == 0) {
571 					tmp = dmp;
572 					buf_left = MIN(total_size, maxblk);
573 					iov_len = MIN(buf_left, sfv_len);
574 					dmp = allocb(buf_left + extra, BPRI_HI);
575 					if (dmp == NULL) {
576 						freemsg(head);
577 						return (ENOMEM);
578 					}
579 					dmp->b_wptr = dmp->b_rptr =
580 					    dmp->b_rptr + wroff;
581 					tmp->b_cont = dmp;
582 				} else {
583 					iov_len = MIN(buf_left, sfv_len);
584 				}
585 
586 				aiov.iov_len = iov_len;
587 				aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
588 				auio.uio_loffset = *fileoff;
589 				auio.uio_iovcnt = 1;
590 				auio.uio_resid = iov_len;
591 				auio.uio_iov = &aiov;
592 				auio.uio_segflg = UIO_USERSPACE;
593 				auio.uio_llimit = curproc->p_fsz_ctl;
594 				auio.uio_fmode = fflag;
595 
596 				buf_left -= iov_len;
597 				total_size -= iov_len;
598 				sfv_len -= iov_len;
599 				sfv_off += iov_len;
600 
601 				error = uiomove((caddr_t)dmp->b_wptr,
602 				    iov_len, UIO_WRITE, &auio);
603 				if (error != 0) {
604 					freemsg(head);
605 					return (error);
606 				}
607 				dmp->b_wptr += iov_len;
608 			}
609 		} else {
610 			file_t	*ffp;
611 			vnode_t	*readvp;
612 
613 			if ((ffp = getf(sfv->sfv_fd)) == NULL) {
614 				freemsg(head);
615 				return (EBADF);
616 			}
617 
618 			if ((ffp->f_flag & FREAD) == 0) {
619 				releasef(sfv->sfv_fd);
620 				freemsg(head);
621 				return (EACCES);
622 			}
623 
624 			readvp = ffp->f_vnode;
625 			if (readvp->v_type != VREG) {
626 				releasef(sfv->sfv_fd);
627 				freemsg(head);
628 				return (EINVAL);
629 			}
630 
631 			/*
632 			 * No point reading and writing to same vp,
633 			 * as long as both are regular files. readvp is not
634 			 * locked; but since we got it from an open file the
635 			 * contents will be valid during the time of access.
636 			 */
637 
638 			if (vn_compare(vp, readvp)) {
639 				releasef(sfv->sfv_fd);
640 				freemsg(head);
641 				return (EINVAL);
642 			}
643 
644 			/*
645 			 * Note: we assume readvp != vp. "vp" is already
646 			 * locked, and "readvp" must not be.
647 			 */
648 
649 			(void) VOP_RWLOCK(readvp, readflg, NULL);
650 
651 			/* Same checks as in pread */
652 			if (sfv_off > maxoff) {
653 				VOP_RWUNLOCK(readvp, readflg, NULL);
654 				releasef(sfv->sfv_fd);
655 				freemsg(head);
656 				return (EINVAL);
657 			}
658 			if (sfv_off + sfv_len > maxoff) {
659 				total_size -= (sfv_off + sfv_len - maxoff);
660 				sfv_len = (ssize_t)((offset_t)maxoff -
661 				    sfv_off);
662 			}
663 
664 			while (sfv_len > 0) {
665 				if (buf_left == 0) {
666 					tmp = dmp;
667 					buf_left = MIN(total_size, maxblk);
668 					iov_len = MIN(buf_left, sfv_len);
669 					dmp = allocb(buf_left + extra, BPRI_HI);
670 					if (dmp == NULL) {
671 						VOP_RWUNLOCK(readvp, readflg,
672 						    NULL);
673 						releasef(sfv->sfv_fd);
674 						freemsg(head);
675 						return (ENOMEM);
676 					}
677 					dmp->b_wptr = dmp->b_rptr =
678 					    dmp->b_rptr + wroff;
679 					tmp->b_cont = dmp;
680 				} else {
681 					iov_len = MIN(buf_left, sfv_len);
682 				}
683 				aiov.iov_base = (caddr_t)dmp->b_wptr;
684 				aiov.iov_len = iov_len;
685 				auio.uio_loffset = sfv_off;
686 				auio.uio_iov = &aiov;
687 				auio.uio_iovcnt = 1;
688 				auio.uio_resid = iov_len;
689 				auio.uio_segflg = UIO_SYSSPACE;
690 				auio.uio_llimit = MAXOFFSET_T;
691 				auio.uio_fmode = ffp->f_flag;
692 				ioflag = auio.uio_fmode &
693 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
694 
695 				/*
696 				 * If read sync is not asked for,
697 				 * filter sync flags
698 				 */
699 				if ((ioflag & FRSYNC) == 0)
700 					ioflag &= ~(FSYNC|FDSYNC);
701 				error = VOP_READ(readvp, &auio, ioflag,
702 				    fp->f_cred, NULL);
703 				if (error != 0) {
704 					/*
705 					 * If we were reading a pipe (currently
706 					 * not implemented), we may now loose
707 					 * data.
708 					 */
709 					VOP_RWUNLOCK(readvp, readflg, NULL);
710 					releasef(sfv->sfv_fd);
711 					freemsg(head);
712 					return (error);
713 				}
714 
715 				/*
716 				 * Check how much data was really read.
717 				 * Decrement the 'len' and increment the
718 				 * 'off' appropriately.
719 				 */
720 				cnt = iov_len - auio.uio_resid;
721 				if (cnt == 0) {
722 					VOP_RWUNLOCK(readvp, readflg, NULL);
723 					releasef(sfv->sfv_fd);
724 					freemsg(head);
725 					return (EINVAL);
726 				}
727 				sfv_len -= cnt;
728 				sfv_off += cnt;
729 				total_size -= cnt;
730 				buf_left -= cnt;
731 
732 				dmp->b_wptr += cnt;
733 			}
734 			VOP_RWUNLOCK(readvp, readflg, NULL);
735 			releasef(sfv->sfv_fd);
736 		}
737 		sfv++;
738 	}
739 
740 	ASSERT(total_size == 0);
741 	error = kstrwritemp(vp, head, fflag);
742 	if (error != 0) {
743 		freemsg(head);
744 		return (error);
745 	}
746 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
747 	*count += size;
748 
749 	return (0);
750 }
751 
752 
753 int
754 sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
755     int copy_cnt, ssize_t *count)
756 {
757 	struct vnode *vp;
758 	struct uio auio;
759 	struct iovec aiov;
760 	ushort_t fflag;
761 	int ioflag;
762 	int i, error;
763 	size_t cnt;
764 	ssize_t sfv_len;
765 	u_offset_t sfv_off;
766 #ifdef _SYSCALL32_IMPL
767 	model_t model = get_udatamodel();
768 	u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
769 	    MAXOFF32_T : MAXOFFSET_T;
770 #else
771 	const u_offset_t maxoff = MAXOFF32_T;
772 #endif
773 	mblk_t	*dmp = NULL;
774 	char	*buf = NULL;
775 	size_t  extra;
776 	int maxblk, wroff, tail_len;
777 	struct sonode *so;
778 	stdata_t *stp;
779 
780 	fflag = fp->f_flag;
781 	vp = fp->f_vnode;
782 
783 	if (vp->v_type == VSOCK) {
784 		so = VTOSO(vp);
785 		stp = vp->v_stream;
786 		wroff = (int)stp->sd_wroff;
787 		tail_len = (int)stp->sd_tail;
788 		maxblk = (int)stp->sd_maxblk;
789 		extra = wroff + tail_len;
790 	}
791 
792 	auio.uio_extflg = UIO_COPY_DEFAULT;
793 	for (i = 0; i < copy_cnt; i++) {
794 		if (ISSIG(curthread, JUSTLOOKING))
795 			return (EINTR);
796 
797 		/*
798 		 * Do similar checks as "write" as we are writing
799 		 * sfv_len bytes into "vp".
800 		 */
801 		sfv_len = (ssize_t)sfv->sfv_len;
802 
803 		if (sfv_len == 0) {
804 			sfv++;
805 			continue;
806 		}
807 
808 		if (vp->v_type == VREG) {
809 			if (*fileoff >= curproc->p_fsz_ctl) {
810 				mutex_enter(&curproc->p_lock);
811 				(void) rctl_action(
812 				    rctlproc_legacy[RLIMIT_FSIZE],
813 				    curproc->p_rctls, curproc, RCA_SAFE);
814 				mutex_exit(&curproc->p_lock);
815 
816 				return (EFBIG);
817 			}
818 
819 			if (*fileoff >= maxoff)
820 				return (EFBIG);
821 
822 			if (*fileoff + sfv_len > maxoff)
823 				return (EINVAL);
824 		}
825 
826 		/* Check for overflow */
827 #ifdef _SYSCALL32_IMPL
828 		if (model == DATAMODEL_ILP32) {
829 			if (((ssize32_t)(*count + sfv_len)) < 0)
830 				return (EINVAL);
831 		} else
832 #endif
833 		if ((*count + sfv_len) < 0)
834 			return (EINVAL);
835 
836 		sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
837 
838 		if (sfv->sfv_fd == SFV_FD_SELF) {
839 			if (vp->v_type == VSOCK) {
840 				while (sfv_len > 0) {
841 					size_t iov_len;
842 
843 					iov_len = sfv_len;
844 					if (so->so_kssl_ctx != NULL)
845 						iov_len = MIN(iov_len, maxblk);
846 
847 					aiov.iov_len = iov_len;
848 					aiov.iov_base =
849 					    (caddr_t)(uintptr_t)sfv_off;
850 
851 					auio.uio_iov = &aiov;
852 					auio.uio_iovcnt = 1;
853 					auio.uio_loffset = *fileoff;
854 					auio.uio_segflg = UIO_USERSPACE;
855 					auio.uio_fmode = fflag;
856 					auio.uio_llimit = curproc->p_fsz_ctl;
857 					auio.uio_resid = iov_len;
858 
859 					dmp = allocb(iov_len + extra, BPRI_HI);
860 					if (dmp == NULL)
861 						return (ENOMEM);
862 					dmp->b_wptr = dmp->b_rptr =
863 					    dmp->b_rptr + wroff;
864 					error = uiomove((caddr_t)dmp->b_wptr,
865 					    iov_len, UIO_WRITE, &auio);
866 					if (error != 0) {
867 						freeb(dmp);
868 						return (error);
869 					}
870 					dmp->b_wptr += iov_len;
871 					error = kstrwritemp(vp, dmp, fflag);
872 					if (error != 0) {
873 						freeb(dmp);
874 						return (error);
875 					}
876 					ttolwp(curthread)->lwp_ru.ioch +=
877 					    (ulong_t)iov_len;
878 					*count += iov_len;
879 					sfv_len -= iov_len;
880 					sfv_off += iov_len;
881 				}
882 			} else {
883 				aiov.iov_len = sfv_len;
884 				aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
885 
886 				auio.uio_iov = &aiov;
887 				auio.uio_iovcnt = 1;
888 				auio.uio_loffset = *fileoff;
889 				auio.uio_segflg = UIO_USERSPACE;
890 				auio.uio_fmode = fflag;
891 				auio.uio_llimit = curproc->p_fsz_ctl;
892 				auio.uio_resid = sfv_len;
893 
894 				ioflag = auio.uio_fmode &
895 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
896 				while (sfv_len > 0) {
897 					error = VOP_WRITE(vp, &auio, ioflag,
898 					    fp->f_cred, NULL);
899 					cnt = sfv_len - auio.uio_resid;
900 					sfv_len -= cnt;
901 					ttolwp(curthread)->lwp_ru.ioch +=
902 					    (ulong_t)cnt;
903 					*fileoff += cnt;
904 					*count += cnt;
905 					if (error != 0)
906 						return (error);
907 				}
908 			}
909 		} else {
910 			int segmapit = 0;
911 			file_t	*ffp;
912 			vnode_t	*readvp;
913 			struct vnode *realvp;
914 			size_t	size;
915 			caddr_t	ptr;
916 
917 			if ((ffp = getf(sfv->sfv_fd)) == NULL)
918 				return (EBADF);
919 
920 			if ((ffp->f_flag & FREAD) == 0) {
921 				releasef(sfv->sfv_fd);
922 				return (EBADF);
923 			}
924 
925 			readvp = ffp->f_vnode;
926 			if (VOP_REALVP(readvp, &realvp, NULL) == 0)
927 				readvp = realvp;
928 			if (readvp->v_type != VREG) {
929 				releasef(sfv->sfv_fd);
930 				return (EINVAL);
931 			}
932 
933 			/*
934 			 * No point reading and writing to same vp,
935 			 * as long as both are regular files. readvp is not
936 			 * locked; but since we got it from an open file the
937 			 * contents will be valid during the time of access.
938 			 */
939 			if (vn_compare(vp, readvp)) {
940 				releasef(sfv->sfv_fd);
941 				return (EINVAL);
942 			}
943 
944 			/*
945 			 * Note: we assume readvp != vp. "vp" is already
946 			 * locked, and "readvp" must not be.
947 			 */
948 			(void) VOP_RWLOCK(readvp, readflg, NULL);
949 
950 			/* Same checks as in pread */
951 			if (sfv_off > maxoff) {
952 				VOP_RWUNLOCK(readvp, readflg, NULL);
953 				releasef(sfv->sfv_fd);
954 				return (EINVAL);
955 			}
956 			if (sfv_off + sfv_len > maxoff) {
957 				sfv_len = (ssize_t)((offset_t)maxoff -
958 				    sfv_off);
959 			}
960 			/* Find the native blocksize to transfer data */
961 			size = MIN(vp->v_vfsp->vfs_bsize,
962 			    readvp->v_vfsp->vfs_bsize);
963 			size = sfv_len < size ? sfv_len : size;
964 
965 			if (vp->v_type != VSOCK) {
966 				segmapit = 0;
967 				buf = kmem_alloc(size, KM_NOSLEEP);
968 				if (buf == NULL) {
969 					VOP_RWUNLOCK(readvp, readflg, NULL);
970 					releasef(sfv->sfv_fd);
971 					return (ENOMEM);
972 				}
973 			} else {
974 				/*
975 				 * For sockets acting as an SSL proxy, we
976 				 * need to adjust the size to the maximum
977 				 * SSL record size set in the stream head.
978 				 */
979 				if (so->so_kssl_ctx != NULL)
980 					size = MIN(size, maxblk);
981 
982 				if (vn_has_flocks(readvp) ||
983 				    readvp->v_flag & VNOMAP ||
984 				    stp->sd_copyflag & STZCVMUNSAFE) {
985 					segmapit = 0;
986 				} else if (stp->sd_copyflag & STZCVMSAFE) {
987 					segmapit = 1;
988 				} else {
989 					int on = 1;
990 					if (SOP_SETSOCKOPT(VTOSO(vp),
991 					    SOL_SOCKET, SO_SND_COPYAVOID,
992 					    &on, sizeof (on)) == 0)
993 					segmapit = 1;
994 				}
995 			}
996 
997 			if (segmapit) {
998 				boolean_t nowait;
999 
1000 				nowait = (sfv->sfv_flag & SFV_NOWAIT) != 0;
1001 				error = snf_segmap(fp, readvp, sfv_off,
1002 				    (u_offset_t)sfv_len, (ssize_t *)&cnt,
1003 				    nowait);
1004 				releasef(sfv->sfv_fd);
1005 				*count += cnt;
1006 				if (error)
1007 					return (error);
1008 				sfv++;
1009 				continue;
1010 			}
1011 
1012 			while (sfv_len > 0) {
1013 				size_t	iov_len;
1014 
1015 				iov_len = MIN(size, sfv_len);
1016 
1017 				if (vp->v_type == VSOCK) {
1018 					dmp = allocb(iov_len + extra, BPRI_HI);
1019 					if (dmp == NULL) {
1020 						VOP_RWUNLOCK(readvp, readflg,
1021 						    NULL);
1022 						releasef(sfv->sfv_fd);
1023 						return (ENOMEM);
1024 					}
1025 					dmp->b_wptr = dmp->b_rptr =
1026 					    dmp->b_rptr + wroff;
1027 					ptr = (caddr_t)dmp->b_rptr;
1028 				} else {
1029 					ptr = buf;
1030 				}
1031 
1032 				aiov.iov_base = ptr;
1033 				aiov.iov_len = iov_len;
1034 				auio.uio_loffset = sfv_off;
1035 				auio.uio_iov = &aiov;
1036 				auio.uio_iovcnt = 1;
1037 				auio.uio_resid = iov_len;
1038 				auio.uio_segflg = UIO_SYSSPACE;
1039 				auio.uio_llimit = MAXOFFSET_T;
1040 				auio.uio_fmode = ffp->f_flag;
1041 				ioflag = auio.uio_fmode &
1042 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1043 
1044 				/*
1045 				 * If read sync is not asked for,
1046 				 * filter sync flags
1047 				 */
1048 				if ((ioflag & FRSYNC) == 0)
1049 					ioflag &= ~(FSYNC|FDSYNC);
1050 				error = VOP_READ(readvp, &auio, ioflag,
1051 				    fp->f_cred, NULL);
1052 				if (error != 0) {
1053 					/*
1054 					 * If we were reading a pipe (currently
1055 					 * not implemented), we may now lose
1056 					 * data.
1057 					 */
1058 					if (vp->v_type == VSOCK)
1059 						freeb(dmp);
1060 					else
1061 						kmem_free(buf, size);
1062 					VOP_RWUNLOCK(readvp, readflg, NULL);
1063 					releasef(sfv->sfv_fd);
1064 					return (error);
1065 				}
1066 
1067 				/*
1068 				 * Check how much data was really read.
1069 				 * Decrement the 'len' and increment the
1070 				 * 'off' appropriately.
1071 				 */
1072 				cnt = iov_len - auio.uio_resid;
1073 				if (cnt == 0) {
1074 					if (vp->v_type == VSOCK)
1075 						freeb(dmp);
1076 					else
1077 						kmem_free(buf, size);
1078 					VOP_RWUNLOCK(readvp, readflg, NULL);
1079 					releasef(sfv->sfv_fd);
1080 					return (EINVAL);
1081 				}
1082 				sfv_len -= cnt;
1083 				sfv_off += cnt;
1084 
1085 				if (vp->v_type == VSOCK) {
1086 					dmp->b_wptr = dmp->b_rptr + cnt;
1087 
1088 					error = kstrwritemp(vp, dmp, fflag);
1089 					if (error != 0) {
1090 						freeb(dmp);
1091 						VOP_RWUNLOCK(readvp, readflg,
1092 						    NULL);
1093 						releasef(sfv->sfv_fd);
1094 						return (error);
1095 					}
1096 
1097 					ttolwp(curthread)->lwp_ru.ioch +=
1098 					    (ulong_t)cnt;
1099 					*count += cnt;
1100 				} else {
1101 
1102 					aiov.iov_base = ptr;
1103 					aiov.iov_len = cnt;
1104 					auio.uio_loffset = *fileoff;
1105 					auio.uio_resid = cnt;
1106 					auio.uio_iov = &aiov;
1107 					auio.uio_iovcnt = 1;
1108 					auio.uio_segflg = UIO_SYSSPACE;
1109 					auio.uio_llimit = curproc->p_fsz_ctl;
1110 					auio.uio_fmode = fflag;
1111 					ioflag = auio.uio_fmode &
1112 					    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1113 					error = VOP_WRITE(vp, &auio, ioflag,
1114 					    fp->f_cred, NULL);
1115 
1116 					/*
1117 					 * Check how much data was written.
1118 					 * Increment the 'len' and decrement the
1119 					 * 'off' if all the data was not
1120 					 * written.
1121 					 */
1122 					cnt -= auio.uio_resid;
1123 					sfv_len += auio.uio_resid;
1124 					sfv_off -= auio.uio_resid;
1125 					ttolwp(curthread)->lwp_ru.ioch +=
1126 					    (ulong_t)cnt;
1127 					*fileoff += cnt;
1128 					*count += cnt;
1129 					if (error != 0) {
1130 						kmem_free(buf, size);
1131 						VOP_RWUNLOCK(readvp, readflg,
1132 						    NULL);
1133 						releasef(sfv->sfv_fd);
1134 						return (error);
1135 					}
1136 				}
1137 			}
1138 			if (buf) {
1139 				kmem_free(buf, size);
1140 				buf = NULL;
1141 			}
1142 			VOP_RWUNLOCK(readvp, readflg, NULL);
1143 			releasef(sfv->sfv_fd);
1144 		}
1145 		sfv++;
1146 	}
1147 	return (0);
1148 }
1149 
1150 ssize_t
1151 sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
1152     size_t *xferred)
1153 {
1154 	int error = 0;
1155 	int first_vector_error = 0;
1156 	file_t *fp;
1157 	struct vnode *vp;
1158 	struct sonode *so;
1159 	u_offset_t fileoff;
1160 	int copy_cnt;
1161 	const struct sendfilevec *copy_vec;
1162 	struct sendfilevec sfv[SEND_MAX_CHUNK];
1163 	ssize_t count = 0;
1164 #ifdef _SYSCALL32_IMPL
1165 	struct ksendfilevec32 sfv32[SEND_MAX_CHUNK];
1166 #endif
1167 	ssize_t total_size;
1168 	int i;
1169 	boolean_t is_sock = B_FALSE;
1170 	int maxblk = 0;
1171 
1172 	if (sfvcnt <= 0)
1173 		return (set_errno(EINVAL));
1174 
1175 	if ((fp = getf(fildes)) == NULL)
1176 		return (set_errno(EBADF));
1177 
1178 	if (((fp->f_flag) & FWRITE) == 0) {
1179 		error = EBADF;
1180 		goto err;
1181 	}
1182 
1183 	fileoff = fp->f_offset;
1184 	vp = fp->f_vnode;
1185 
1186 	switch (vp->v_type) {
1187 	case VSOCK:
1188 		so = VTOSO(vp);
1189 		/* sendfile not supported for SCTP */
1190 		if (so->so_protocol == IPPROTO_SCTP) {
1191 			error = EPROTONOSUPPORT;
1192 			goto err;
1193 		}
1194 		is_sock = B_TRUE;
1195 		switch (so->so_family) {
1196 		case AF_INET:
1197 		case AF_INET6:
1198 			/*
1199 			 * Make similar checks done in SOP_WRITE().
1200 			 */
1201 			if (so->so_state & SS_CANTSENDMORE) {
1202 				tsignal(curthread, SIGPIPE);
1203 				error = EPIPE;
1204 				goto err;
1205 			}
1206 			if (so->so_type != SOCK_STREAM) {
1207 				error = EOPNOTSUPP;
1208 				goto err;
1209 			}
1210 
1211 			if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
1212 			    (SS_ISCONNECTED|SS_ISBOUND)) {
1213 				error = ENOTCONN;
1214 				goto err;
1215 			}
1216 
1217 			if ((so->so_state & SS_DIRECT) &&
1218 			    (so->so_priv != NULL) &&
1219 			    (so->so_kssl_ctx == NULL)) {
1220 				maxblk = ((tcp_t *)so->so_priv)->tcp_mss;
1221 			} else {
1222 				maxblk = (int)vp->v_stream->sd_maxblk;
1223 			}
1224 			break;
1225 		default:
1226 			error = EAFNOSUPPORT;
1227 			goto err;
1228 		}
1229 		break;
1230 	case VREG:
1231 		break;
1232 	default:
1233 		error = EINVAL;
1234 		goto err;
1235 	}
1236 
1237 	switch (opcode) {
1238 	case SENDFILEV :
1239 		break;
1240 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1241 	case SENDFILEV64 :
1242 		return (sendvec64(fp, (struct ksendfilevec64 *)vec, sfvcnt,
1243 		    (size32_t *)xferred, fildes));
1244 #endif
1245 	default :
1246 		error = ENOSYS;
1247 		break;
1248 	}
1249 
1250 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1251 	copy_vec = vec;
1252 
1253 	do {
1254 		total_size = 0;
1255 		copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
1256 #ifdef _SYSCALL32_IMPL
1257 		/* 32-bit callers need to have their iovec expanded. */
1258 		if (get_udatamodel() == DATAMODEL_ILP32) {
1259 			if (copyin(copy_vec, sfv32,
1260 			    copy_cnt * sizeof (ksendfilevec32_t))) {
1261 				error = EFAULT;
1262 				break;
1263 			}
1264 
1265 			for (i = 0; i < copy_cnt; i++) {
1266 				sfv[i].sfv_fd = sfv32[i].sfv_fd;
1267 				sfv[i].sfv_off =
1268 				    (off_t)(uint32_t)sfv32[i].sfv_off;
1269 				sfv[i].sfv_len = (size_t)sfv32[i].sfv_len;
1270 				total_size += sfv[i].sfv_len;
1271 				sfv[i].sfv_flag = sfv32[i].sfv_flag;
1272 				/*
1273 				 * Individual elements of the vector must not
1274 				 * wrap or overflow, as later math is signed.
1275 				 * Equally total_size needs to be checked after
1276 				 * each vector is added in, to be sure that
1277 				 * rogue values haven't overflowed the counter.
1278 				 */
1279 				if (((ssize32_t)sfv[i].sfv_len < 0) ||
1280 				    ((ssize32_t)total_size < 0)) {
1281 					/*
1282 					 * Truncate the vector to send data
1283 					 * described by elements before the
1284 					 * error.
1285 					 */
1286 					copy_cnt = i;
1287 					first_vector_error = EINVAL;
1288 					/* total_size can't be trusted */
1289 					if ((ssize32_t)total_size < 0)
1290 						error = EINVAL;
1291 					break;
1292 				}
1293 			}
1294 			/* Nothing to do, process errors */
1295 			if (copy_cnt == 0)
1296 				break;
1297 
1298 		} else {
1299 #endif
1300 			if (copyin(copy_vec, sfv,
1301 			    copy_cnt * sizeof (sendfilevec_t))) {
1302 				error = EFAULT;
1303 				break;
1304 			}
1305 
1306 			for (i = 0; i < copy_cnt; i++) {
1307 				total_size += sfv[i].sfv_len;
1308 				/*
1309 				 * Individual elements of the vector must not
1310 				 * wrap or overflow, as later math is signed.
1311 				 * Equally total_size needs to be checked after
1312 				 * each vector is added in, to be sure that
1313 				 * rogue values haven't overflowed the counter.
1314 				 */
1315 				if (((ssize_t)sfv[i].sfv_len < 0) ||
1316 				    (total_size < 0)) {
1317 					/*
1318 					 * Truncate the vector to send data
1319 					 * described by elements before the
1320 					 * error.
1321 					 */
1322 					copy_cnt = i;
1323 					first_vector_error = EINVAL;
1324 					/* total_size can't be trusted */
1325 					if (total_size < 0)
1326 						error = EINVAL;
1327 					break;
1328 				}
1329 			}
1330 			/* Nothing to do, process errors */
1331 			if (copy_cnt == 0)
1332 				break;
1333 #ifdef _SYSCALL32_IMPL
1334 		}
1335 #endif
1336 
1337 		/*
1338 		 * The task between deciding to use sendvec_small_chunk
1339 		 * and sendvec_chunk is dependant on multiple things:
1340 		 *
1341 		 * i) latency is important for smaller files. So if the
1342 		 * data is smaller than 'tcp_slow_start_initial' times
1343 		 * maxblk, then use sendvec_small_chunk which creates
1344 		 * maxblk size mblks and chains them together and sends
1345 		 * them to TCP in one shot. It also leaves 'wroff' size
1346 		 * space for the headers in each mblk.
1347 		 *
1348 		 * ii) for total size bigger than 'tcp_slow_start_initial'
1349 		 * time maxblk, its probably real file data which is
1350 		 * dominating. So its better to use sendvec_chunk because
1351 		 * performance goes to dog if we don't do pagesize reads.
1352 		 * sendvec_chunk will do pagesize reads and write them
1353 		 * in pagesize mblks to TCP.
1354 		 *
1355 		 * Side Notes: A write to file has not been optimized.
1356 		 * Future zero copy code will plugin into sendvec_chunk
1357 		 * only because doing zero copy for files smaller then
1358 		 * pagesize is useless.
1359 		 *
1360 		 * Note, if socket has NL7C enabled then call NL7C's
1361 		 * senfilev() function to consume the sfv[].
1362 		 */
1363 		if (is_sock) {
1364 			switch (so->so_family) {
1365 			case AF_INET:
1366 			case AF_INET6:
1367 				if (so->so_nl7c_flags != 0)
1368 					error = nl7c_sendfilev(so, &fileoff,
1369 					    sfv, copy_cnt, &count);
1370 				else if ((total_size <= (4 * maxblk)) &&
1371 				    error == 0)
1372 					error = sendvec_small_chunk(fp,
1373 					    &fileoff, sfv, copy_cnt,
1374 					    total_size, maxblk, &count);
1375 				else
1376 					error = sendvec_chunk(fp, &fileoff,
1377 					    sfv, copy_cnt, &count);
1378 				break;
1379 			}
1380 		} else {
1381 			ASSERT(vp->v_type == VREG);
1382 			error = sendvec_chunk(fp, &fileoff, sfv, copy_cnt,
1383 			    &count);
1384 		}
1385 
1386 
1387 #ifdef _SYSCALL32_IMPL
1388 	if (get_udatamodel() == DATAMODEL_ILP32)
1389 		copy_vec = (const struct sendfilevec *)((char *)copy_vec +
1390 		    (copy_cnt * sizeof (ksendfilevec32_t)));
1391 	else
1392 #endif
1393 		copy_vec += copy_cnt;
1394 		sfvcnt -= copy_cnt;
1395 
1396 	/* Process all vector members up to first error */
1397 	} while ((sfvcnt > 0) && first_vector_error == 0 && error == 0);
1398 
1399 	if (vp->v_type == VREG)
1400 		fp->f_offset += count;
1401 
1402 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1403 
1404 #ifdef _SYSCALL32_IMPL
1405 	if (get_udatamodel() == DATAMODEL_ILP32) {
1406 		ssize32_t count32 = (ssize32_t)count;
1407 		if (copyout(&count32, xferred, sizeof (count32)))
1408 			error = EFAULT;
1409 		releasef(fildes);
1410 		if (error != 0)
1411 			return (set_errno(error));
1412 		if (first_vector_error != 0)
1413 			return (set_errno(first_vector_error));
1414 		return (count32);
1415 	}
1416 #endif
1417 	if (copyout(&count, xferred, sizeof (count)))
1418 		error = EFAULT;
1419 	releasef(fildes);
1420 	if (error != 0)
1421 		return (set_errno(error));
1422 	if (first_vector_error != 0)
1423 		return (set_errno(first_vector_error));
1424 	return (count);
1425 err:
1426 	ASSERT(error != 0);
1427 	releasef(fildes);
1428 	return (set_errno(error));
1429 }
1430