xref: /titanic_52/usr/src/uts/common/syscall/sendfile.c (revision 4bff34e37def8a90f9194d81bc345c52ba20086a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/t_lock.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/buf.h>
34 #include <sys/conf.h>
35 #include <sys/cred.h>
36 #include <sys/kmem.h>
37 #include <sys/sysmacros.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/debug.h>
41 #include <sys/errno.h>
42 #include <sys/time.h>
43 #include <sys/file.h>
44 #include <sys/open.h>
45 #include <sys/user.h>
46 #include <sys/termios.h>
47 #include <sys/stream.h>
48 #include <sys/strsubr.h>
49 #include <sys/sunddi.h>
50 #include <sys/esunddi.h>
51 #include <sys/flock.h>
52 #include <sys/modctl.h>
53 #include <sys/cmn_err.h>
54 #include <sys/vmsystm.h>
55 
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 /* swilly code in sys/socketvar.h turns off DEBUG */
59 #ifdef __lint
60 #define	DEBUG
61 #endif
62 
63 #include <netinet/in.h>
64 #include <sys/sendfile.h>
65 #include <sys/un.h>
66 #include <sys/tihdr.h>
67 #include <sys/atomic.h>
68 
69 #include <inet/common.h>
70 #include <inet/ip.h>
71 #include <inet/ip6.h>
72 #include <inet/tcp.h>
73 
74 extern int sosendfile64(file_t *, file_t *, const struct ksendfilevec64 *,
75 		ssize32_t *);
76 extern int nl7c_sendfilev(struct sonode *, u_offset_t *, struct sendfilevec *,
77 		int, ssize_t *);
78 extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, uint_t,
79 		ssize_t *, boolean_t);
80 
81 #define	readflg	(V_WRITELOCK_FALSE)
82 #define	rwflag	(V_WRITELOCK_TRUE)
83 
84 /*
85  * kstrwritemp() has very similar semantics as that of strwrite().
86  * The main difference is it obtains mblks from the caller and also
87  * does not do any copy as done in strwrite() from user buffers to
88  * kernel buffers.
89  *
90  * Currently, this routine is used by sendfile to send data allocated
91  * within the kernel without any copying. This interface does not use the
92  * synchronous stream interface as synch. stream interface implies
93  * copying.
94  */
95 int
96 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
97 {
98 	struct stdata *stp;
99 	struct queue *wqp;
100 	mblk_t *newmp;
101 	char waitflag;
102 	int tempmode;
103 	int error = 0;
104 	int done = 0;
105 	struct sonode *so;
106 	boolean_t direct;
107 
108 	ASSERT(vp->v_stream);
109 	stp = vp->v_stream;
110 
111 	so = VTOSO(vp);
112 	direct = (so->so_state & SS_DIRECT);
113 
114 	/*
115 	 * This is the sockfs direct fast path. canputnext() need
116 	 * not be accurate so we don't grab the sd_lock here. If
117 	 * we get flow-controlled, we grab sd_lock just before the
118 	 * do..while loop below to emulate what strwrite() does.
119 	 */
120 	wqp = stp->sd_wrq;
121 	if (canputnext(wqp) && direct &&
122 	    !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
123 		return (sostream_direct(so, NULL, mp, CRED()));
124 	} else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
125 		/* Fast check of flags before acquiring the lock */
126 		mutex_enter(&stp->sd_lock);
127 		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
128 		mutex_exit(&stp->sd_lock);
129 		if (error != 0) {
130 			if (!(stp->sd_flag & STPLEX) &&
131 			    (stp->sd_wput_opt & SW_SIGPIPE)) {
132 				tsignal(curthread, SIGPIPE);
133 				error = EPIPE;
134 			}
135 			return (error);
136 		}
137 	}
138 
139 	waitflag = WRITEWAIT;
140 	if (stp->sd_flag & OLDNDELAY)
141 		tempmode = fmode & ~FNDELAY;
142 	else
143 		tempmode = fmode;
144 
145 	mutex_enter(&stp->sd_lock);
146 	do {
147 		if (canputnext(wqp)) {
148 			mutex_exit(&stp->sd_lock);
149 			if (stp->sd_wputdatafunc != NULL) {
150 				newmp = (stp->sd_wputdatafunc)(vp, mp, NULL,
151 				    NULL, NULL, NULL);
152 				if (newmp == NULL) {
153 					/* The caller will free mp */
154 					return (ECOMM);
155 				}
156 				mp = newmp;
157 			}
158 			putnext(wqp, mp);
159 			return (0);
160 		}
161 		error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
162 		    &done);
163 	} while (error == 0 && !done);
164 
165 	mutex_exit(&stp->sd_lock);
166 	/*
167 	 * EAGAIN tells the application to try again. ENOMEM
168 	 * is returned only if the memory allocation size
169 	 * exceeds the physical limits of the system. ENOMEM
170 	 * can't be true here.
171 	 */
172 	if (error == ENOMEM)
173 		error = EAGAIN;
174 	return (error);
175 }
176 
177 #define	SEND_MAX_CHUNK	16
178 
179 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
180 /*
181  * 64 bit offsets for 32 bit applications only running either on
182  * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
183  * more than 2GB of data.
184  */
185 int
186 sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
187     int copy_cnt, ssize32_t *count)
188 {
189 	struct vnode *vp;
190 	ushort_t fflag;
191 	int ioflag;
192 	size32_t cnt;
193 	ssize32_t sfv_len;
194 	ssize32_t tmpcount;
195 	u_offset_t sfv_off;
196 	struct uio auio;
197 	struct iovec aiov;
198 	int i, error;
199 
200 	fflag = fp->f_flag;
201 	vp = fp->f_vnode;
202 	for (i = 0; i < copy_cnt; i++) {
203 
204 		if (ISSIG(curthread, JUSTLOOKING))
205 			return (EINTR);
206 
207 		/*
208 		 * Do similar checks as "write" as we are writing
209 		 * sfv_len bytes into "vp".
210 		 */
211 		sfv_len = (ssize32_t)sfv->sfv_len;
212 
213 		if (sfv_len == 0)
214 			continue;
215 
216 		if (sfv_len < 0)
217 			return (EINVAL);
218 
219 		if (vp->v_type == VREG) {
220 			if (*fileoff >= curproc->p_fsz_ctl) {
221 				mutex_enter(&curproc->p_lock);
222 				(void) rctl_action(
223 				    rctlproc_legacy[RLIMIT_FSIZE],
224 				    curproc->p_rctls, curproc, RCA_SAFE);
225 				mutex_exit(&curproc->p_lock);
226 				return (EFBIG);
227 			}
228 
229 			if (*fileoff >= OFFSET_MAX(fp))
230 				return (EFBIG);
231 
232 			if (*fileoff + sfv_len > OFFSET_MAX(fp))
233 				return (EINVAL);
234 		}
235 
236 		tmpcount = *count + sfv_len;
237 		if (tmpcount < 0)
238 			return (EINVAL);
239 
240 		sfv_off = sfv->sfv_off;
241 
242 		auio.uio_extflg = UIO_COPY_DEFAULT;
243 		if (sfv->sfv_fd == SFV_FD_SELF) {
244 			aiov.iov_len = sfv_len;
245 			aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
246 			auio.uio_loffset = *fileoff;
247 			auio.uio_iovcnt = 1;
248 			auio.uio_resid = sfv_len;
249 			auio.uio_iov = &aiov;
250 			auio.uio_segflg = UIO_USERSPACE;
251 			auio.uio_llimit = curproc->p_fsz_ctl;
252 			auio.uio_fmode = fflag;
253 			ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
254 			while (sfv_len > 0) {
255 				error = VOP_WRITE(vp, &auio, ioflag,
256 				    fp->f_cred, NULL);
257 				cnt = sfv_len - auio.uio_resid;
258 				sfv_len -= cnt;
259 				ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
260 				if (vp->v_type == VREG)
261 					*fileoff += cnt;
262 				*count += cnt;
263 				if (error != 0)
264 					return (error);
265 			}
266 		} else {
267 			file_t	*ffp;
268 			vnode_t	*readvp;
269 			size_t	size;
270 			caddr_t	ptr;
271 
272 			if ((ffp = getf(sfv->sfv_fd)) == NULL)
273 				return (EBADF);
274 
275 			if ((ffp->f_flag & FREAD) == 0) {
276 				releasef(sfv->sfv_fd);
277 				return (EBADF);
278 			}
279 
280 			readvp = ffp->f_vnode;
281 			if (readvp->v_type != VREG) {
282 				releasef(sfv->sfv_fd);
283 				return (EINVAL);
284 			}
285 
286 			/*
287 			 * No point reading and writing to same vp,
288 			 * as long as both are regular files. readvp is not
289 			 * locked; but since we got it from an open file the
290 			 * contents will be valid during the time of access.
291 			 */
292 			if (vn_compare(vp, readvp)) {
293 				releasef(sfv->sfv_fd);
294 				return (EINVAL);
295 			}
296 
297 			/*
298 			 * Note: we assume readvp != vp. "vp" is already
299 			 * locked, and "readvp" must not be.
300 			 */
301 			(void) VOP_RWLOCK(readvp, readflg, NULL);
302 
303 			/*
304 			 * Same checks as in pread64.
305 			 */
306 			if (sfv_off > MAXOFFSET_T) {
307 				VOP_RWUNLOCK(readvp, readflg, NULL);
308 				releasef(sfv->sfv_fd);
309 				return (EINVAL);
310 			}
311 
312 			if (sfv_off + sfv_len > MAXOFFSET_T)
313 				sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off);
314 
315 			/* Find the native blocksize to transfer data */
316 			size = MIN(vp->v_vfsp->vfs_bsize,
317 			    readvp->v_vfsp->vfs_bsize);
318 			size = sfv_len < size ? sfv_len : size;
319 			ptr = kmem_alloc(size, KM_SLEEP);
320 
321 			while (sfv_len > 0) {
322 				size_t	iov_len;
323 
324 				iov_len = MIN(size, sfv_len);
325 				aiov.iov_base = ptr;
326 				aiov.iov_len = iov_len;
327 				auio.uio_loffset = sfv_off;
328 				auio.uio_iov = &aiov;
329 				auio.uio_iovcnt = 1;
330 				auio.uio_resid = iov_len;
331 				auio.uio_segflg = UIO_SYSSPACE;
332 				auio.uio_llimit = MAXOFFSET_T;
333 				auio.uio_fmode = ffp->f_flag;
334 				ioflag = auio.uio_fmode &
335 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
336 
337 				/*
338 				 * If read sync is not asked for,
339 				 * filter sync flags
340 				 */
341 				if ((ioflag & FRSYNC) == 0)
342 					ioflag &= ~(FSYNC|FDSYNC);
343 				error = VOP_READ(readvp, &auio, ioflag,
344 				    fp->f_cred, NULL);
345 				if (error) {
346 					kmem_free(ptr, size);
347 					VOP_RWUNLOCK(readvp, readflg, NULL);
348 					releasef(sfv->sfv_fd);
349 					return (error);
350 				}
351 
352 				/*
353 				 * Check how must data was really read.
354 				 * Decrement the 'len' and increment the
355 				 * 'off' appropriately.
356 				 */
357 				cnt = iov_len - auio.uio_resid;
358 				if (cnt == 0) {
359 					/*
360 					 * If we were reading a pipe (currently
361 					 * not implemented), we may now lose
362 					 * data.
363 					 */
364 					kmem_free(ptr, size);
365 					VOP_RWUNLOCK(readvp, readflg, NULL);
366 					releasef(sfv->sfv_fd);
367 					return (EINVAL);
368 				}
369 				sfv_len -= cnt;
370 				sfv_off += cnt;
371 
372 				aiov.iov_base = ptr;
373 				aiov.iov_len = cnt;
374 				auio.uio_loffset = *fileoff;
375 				auio.uio_resid = cnt;
376 				auio.uio_segflg = UIO_SYSSPACE;
377 				auio.uio_llimit = curproc->p_fsz_ctl;
378 				auio.uio_fmode = fflag;
379 				ioflag = auio.uio_fmode &
380 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
381 				error = VOP_WRITE(vp, &auio, ioflag,
382 				    fp->f_cred, NULL);
383 
384 				/*
385 				 * Check how much data was written. Increment
386 				 * the 'len' and decrement the 'off' if all
387 				 * the data was not written.
388 				 */
389 				cnt -= auio.uio_resid;
390 				sfv_len += auio.uio_resid;
391 				sfv_off -= auio.uio_resid;
392 				ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
393 				if (vp->v_type == VREG)
394 					*fileoff += cnt;
395 				*count += cnt;
396 				if (error != 0) {
397 					kmem_free(ptr, size);
398 					VOP_RWUNLOCK(readvp, readflg, NULL);
399 					releasef(sfv->sfv_fd);
400 					return (error);
401 				}
402 			}
403 			VOP_RWUNLOCK(readvp, readflg, NULL);
404 			releasef(sfv->sfv_fd);
405 			kmem_free(ptr, size);
406 		}
407 		sfv++;
408 	}
409 	return (0);
410 }
411 
412 ssize32_t
413 sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
414 	size32_t *xferred, int fildes)
415 {
416 	u_offset_t		fileoff;
417 	int			copy_cnt;
418 	const struct ksendfilevec64 *copy_vec;
419 	struct ksendfilevec64 sfv[SEND_MAX_CHUNK];
420 	struct vnode *vp;
421 	int error;
422 	ssize32_t count = 0;
423 
424 	vp = fp->f_vnode;
425 	(void) VOP_RWLOCK(vp, rwflag, NULL);
426 
427 	copy_vec = vec;
428 	fileoff = fp->f_offset;
429 
430 	do {
431 		copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
432 		if (copyin(copy_vec, sfv, copy_cnt *
433 		    sizeof (struct ksendfilevec64))) {
434 			error = EFAULT;
435 			break;
436 		}
437 
438 		/*
439 		 * Optimize the regular file over
440 		 * the socket case.
441 		 */
442 		if (vp->v_type == VSOCK && sfv->sfv_fd != SFV_FD_SELF) {
443 			file_t *rfp;
444 			vnode_t *rvp;
445 
446 			if ((rfp = getf(sfv->sfv_fd)) == NULL) {
447 				error = EBADF;
448 				break;
449 			}
450 			if ((rfp->f_flag & FREAD) == 0) {
451 				releasef(sfv->sfv_fd);
452 				error = EBADF;
453 				break;
454 			}
455 			rvp = rfp->f_vnode;
456 			if (rvp->v_type == VREG) {
457 				error = sosendfile64(fp, rfp, sfv, &count);
458 				if (error)
459 					break;
460 				copy_vec++;
461 				sfvcnt--;
462 				continue;
463 			}
464 			releasef(sfv->sfv_fd);
465 		}
466 		error = sendvec_chunk64(fp, &fileoff, sfv, copy_cnt, &count);
467 		if (error != 0)
468 			break;
469 
470 		copy_vec += copy_cnt;
471 		sfvcnt -= copy_cnt;
472 	} while (sfvcnt > 0);
473 
474 	if (vp->v_type == VREG)
475 		fp->f_offset += count;
476 
477 	VOP_RWUNLOCK(vp, rwflag, NULL);
478 	if (copyout(&count, xferred, sizeof (count)))
479 		error = EFAULT;
480 	releasef(fildes);
481 	if (error != 0)
482 		return (set_errno(error));
483 	return (count);
484 }
485 #endif
486 
487 int
488 sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
489     int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
490 {
491 	struct vnode *vp;
492 	struct uio auio;
493 	struct iovec aiov;
494 	ushort_t fflag;
495 	int ioflag;
496 	int i, error;
497 	size_t cnt;
498 	ssize_t sfv_len;
499 	u_offset_t sfv_off;
500 #ifdef _SYSCALL32_IMPL
501 	model_t model = get_udatamodel();
502 	u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
503 		MAXOFF32_T : MAXOFFSET_T;
504 #else
505 	const u_offset_t maxoff = MAXOFF32_T;
506 #endif
507 	mblk_t *dmp = NULL;
508 	int wroff;
509 	int buf_left = 0;
510 	size_t	iov_len;
511 	mblk_t  *head, *tmp;
512 	size_t  size = total_size;
513 	size_t  extra;
514 	int tail_len;
515 
516 	fflag = fp->f_flag;
517 	vp = fp->f_vnode;
518 
519 	ASSERT(vp->v_type == VSOCK);
520 	ASSERT(maxblk > 0);
521 
522 	wroff = (int)vp->v_stream->sd_wroff;
523 	tail_len = (int)vp->v_stream->sd_tail;
524 	extra = wroff + tail_len;
525 
526 	buf_left = MIN(total_size, maxblk);
527 	head = dmp = allocb(buf_left + extra, BPRI_HI);
528 	if (head == NULL)
529 		return (ENOMEM);
530 	head->b_wptr = head->b_rptr = head->b_rptr + wroff;
531 
532 	auio.uio_extflg = UIO_COPY_DEFAULT;
533 	for (i = 0; i < copy_cnt; i++) {
534 		if (ISSIG(curthread, JUSTLOOKING))
535 			return (EINTR);
536 
537 		/*
538 		 * Do similar checks as "write" as we are writing
539 		 * sfv_len bytes into "vp".
540 		 */
541 		sfv_len = (ssize_t)sfv->sfv_len;
542 
543 		if (sfv_len == 0) {
544 			sfv++;
545 			continue;
546 		}
547 
548 		/* Make sure sfv_len is not negative */
549 #ifdef _SYSCALL32_IMPL
550 		if (model == DATAMODEL_ILP32) {
551 			if ((ssize32_t)sfv_len < 0)
552 				return (EINVAL);
553 		} else
554 #endif
555 		if (sfv_len < 0)
556 			return (EINVAL);
557 
558 		/* Check for overflow */
559 #ifdef _SYSCALL32_IMPL
560 		if (model == DATAMODEL_ILP32) {
561 			if (((ssize32_t)(*count + sfv_len)) < 0)
562 				return (EINVAL);
563 		} else
564 #endif
565 		if ((*count + sfv_len) < 0)
566 			return (EINVAL);
567 
568 		sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
569 
570 		if (sfv->sfv_fd == SFV_FD_SELF) {
571 			while (sfv_len > 0) {
572 				if (buf_left == 0) {
573 					tmp = dmp;
574 					buf_left = MIN(total_size, maxblk);
575 					iov_len = MIN(buf_left, sfv_len);
576 					dmp = allocb(buf_left + extra, BPRI_HI);
577 					if (dmp == NULL) {
578 						freemsg(head);
579 						return (ENOMEM);
580 					}
581 					dmp->b_wptr = dmp->b_rptr =
582 					    dmp->b_rptr + wroff;
583 					tmp->b_cont = dmp;
584 				} else {
585 					iov_len = MIN(buf_left, sfv_len);
586 				}
587 
588 				aiov.iov_len = iov_len;
589 				aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
590 				auio.uio_loffset = *fileoff;
591 				auio.uio_iovcnt = 1;
592 				auio.uio_resid = iov_len;
593 				auio.uio_iov = &aiov;
594 				auio.uio_segflg = UIO_USERSPACE;
595 				auio.uio_llimit = curproc->p_fsz_ctl;
596 				auio.uio_fmode = fflag;
597 
598 				buf_left -= iov_len;
599 				total_size -= iov_len;
600 				sfv_len -= iov_len;
601 				sfv_off += iov_len;
602 
603 				error = uiomove((caddr_t)dmp->b_wptr,
604 				    iov_len, UIO_WRITE, &auio);
605 				if (error != 0) {
606 					freemsg(head);
607 					return (error);
608 				}
609 				dmp->b_wptr += iov_len;
610 			}
611 		} else {
612 			file_t	*ffp;
613 			vnode_t	*readvp;
614 
615 			if ((ffp = getf(sfv->sfv_fd)) == NULL) {
616 				freemsg(head);
617 				return (EBADF);
618 			}
619 
620 			if ((ffp->f_flag & FREAD) == 0) {
621 				releasef(sfv->sfv_fd);
622 				freemsg(head);
623 				return (EACCES);
624 			}
625 
626 			readvp = ffp->f_vnode;
627 			if (readvp->v_type != VREG) {
628 				releasef(sfv->sfv_fd);
629 				freemsg(head);
630 				return (EINVAL);
631 			}
632 
633 			/*
634 			 * No point reading and writing to same vp,
635 			 * as long as both are regular files. readvp is not
636 			 * locked; but since we got it from an open file the
637 			 * contents will be valid during the time of access.
638 			 */
639 
640 			if (vn_compare(vp, readvp)) {
641 				releasef(sfv->sfv_fd);
642 				freemsg(head);
643 				return (EINVAL);
644 			}
645 
646 			/*
647 			 * Note: we assume readvp != vp. "vp" is already
648 			 * locked, and "readvp" must not be.
649 			 */
650 
651 			(void) VOP_RWLOCK(readvp, readflg, NULL);
652 
653 			/* Same checks as in pread */
654 			if (sfv_off > maxoff) {
655 				VOP_RWUNLOCK(readvp, readflg, NULL);
656 				releasef(sfv->sfv_fd);
657 				freemsg(head);
658 				return (EINVAL);
659 			}
660 			if (sfv_off + sfv_len > maxoff) {
661 				total_size -= (sfv_off + sfv_len - maxoff);
662 				sfv_len = (ssize_t)((offset_t)maxoff -
663 				    sfv_off);
664 			}
665 
666 			while (sfv_len > 0) {
667 				if (buf_left == 0) {
668 					tmp = dmp;
669 					buf_left = MIN(total_size, maxblk);
670 					iov_len = MIN(buf_left, sfv_len);
671 					dmp = allocb(buf_left + extra, BPRI_HI);
672 					if (dmp == NULL) {
673 						VOP_RWUNLOCK(readvp, readflg,
674 									NULL);
675 						releasef(sfv->sfv_fd);
676 						freemsg(head);
677 						return (ENOMEM);
678 					}
679 					dmp->b_wptr = dmp->b_rptr =
680 					    dmp->b_rptr + wroff;
681 					tmp->b_cont = dmp;
682 				} else {
683 					iov_len = MIN(buf_left, sfv_len);
684 				}
685 				aiov.iov_base = (caddr_t)dmp->b_wptr;
686 				aiov.iov_len = iov_len;
687 				auio.uio_loffset = sfv_off;
688 				auio.uio_iov = &aiov;
689 				auio.uio_iovcnt = 1;
690 				auio.uio_resid = iov_len;
691 				auio.uio_segflg = UIO_SYSSPACE;
692 				auio.uio_llimit = MAXOFFSET_T;
693 				auio.uio_fmode = ffp->f_flag;
694 				ioflag = auio.uio_fmode &
695 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
696 
697 				/*
698 				 * If read sync is not asked for,
699 				 * filter sync flags
700 				 */
701 				if ((ioflag & FRSYNC) == 0)
702 					ioflag &= ~(FSYNC|FDSYNC);
703 				error = VOP_READ(readvp, &auio, ioflag,
704 				    fp->f_cred, NULL);
705 				if (error != 0) {
706 					/*
707 					 * If we were reading a pipe (currently
708 					 * not implemented), we may now loose
709 					 * data.
710 					 */
711 					VOP_RWUNLOCK(readvp, readflg, NULL);
712 					releasef(sfv->sfv_fd);
713 					freemsg(head);
714 					return (error);
715 				}
716 
717 				/*
718 				 * Check how much data was really read.
719 				 * Decrement the 'len' and increment the
720 				 * 'off' appropriately.
721 				 */
722 				cnt = iov_len - auio.uio_resid;
723 				if (cnt == 0) {
724 					VOP_RWUNLOCK(readvp, readflg, NULL);
725 					releasef(sfv->sfv_fd);
726 					freemsg(head);
727 					return (EINVAL);
728 				}
729 				sfv_len -= cnt;
730 				sfv_off += cnt;
731 				total_size -= cnt;
732 				buf_left -= cnt;
733 
734 				dmp->b_wptr += cnt;
735 			}
736 			VOP_RWUNLOCK(readvp, readflg, NULL);
737 			releasef(sfv->sfv_fd);
738 		}
739 		sfv++;
740 	}
741 
742 	ASSERT(total_size == 0);
743 	error = kstrwritemp(vp, head, fflag);
744 	if (error != 0) {
745 		freemsg(head);
746 		return (error);
747 	}
748 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
749 	*count += size;
750 
751 	return (0);
752 }
753 
754 
755 int
756 sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
757     int copy_cnt, ssize_t *count)
758 {
759 	struct vnode *vp;
760 	struct uio auio;
761 	struct iovec aiov;
762 	ushort_t fflag;
763 	int ioflag;
764 	int i, error;
765 	size_t cnt;
766 	ssize_t sfv_len;
767 	u_offset_t sfv_off;
768 #ifdef _SYSCALL32_IMPL
769 	model_t model = get_udatamodel();
770 	u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
771 		MAXOFF32_T : MAXOFFSET_T;
772 #else
773 	const u_offset_t maxoff = MAXOFF32_T;
774 #endif
775 	mblk_t	*dmp = NULL;
776 	char	*buf = NULL;
777 	size_t  extra;
778 	int maxblk, wroff, tail_len;
779 	struct sonode *so;
780 	stdata_t *stp;
781 
782 	fflag = fp->f_flag;
783 	vp = fp->f_vnode;
784 
785 	if (vp->v_type == VSOCK) {
786 		so = VTOSO(vp);
787 		stp = vp->v_stream;
788 		wroff = (int)stp->sd_wroff;
789 		tail_len = (int)stp->sd_tail;
790 		maxblk = (int)stp->sd_maxblk;
791 		extra = wroff + tail_len;
792 	}
793 
794 	auio.uio_extflg = UIO_COPY_DEFAULT;
795 	for (i = 0; i < copy_cnt; i++) {
796 		if (ISSIG(curthread, JUSTLOOKING))
797 			return (EINTR);
798 
799 		/*
800 		 * Do similar checks as "write" as we are writing
801 		 * sfv_len bytes into "vp".
802 		 */
803 		sfv_len = (ssize_t)sfv->sfv_len;
804 
805 		if (sfv_len == 0) {
806 			sfv++;
807 			continue;
808 		}
809 
810 		/* Make sure sfv_len is not negative */
811 #ifdef _SYSCALL32_IMPL
812 		if (model == DATAMODEL_ILP32) {
813 			if ((ssize32_t)sfv_len < 0)
814 				return (EINVAL);
815 		} else
816 #endif
817 		if (sfv_len < 0)
818 			return (EINVAL);
819 
820 		if (vp->v_type == VREG) {
821 			if (*fileoff >= curproc->p_fsz_ctl) {
822 				mutex_enter(&curproc->p_lock);
823 				(void) rctl_action(
824 				    rctlproc_legacy[RLIMIT_FSIZE],
825 				    curproc->p_rctls, curproc, RCA_SAFE);
826 				mutex_exit(&curproc->p_lock);
827 
828 				return (EFBIG);
829 			}
830 
831 			if (*fileoff >= maxoff)
832 				return (EFBIG);
833 
834 			if (*fileoff + sfv_len > maxoff)
835 				return (EINVAL);
836 		}
837 
838 		/* Check for overflow */
839 #ifdef _SYSCALL32_IMPL
840 		if (model == DATAMODEL_ILP32) {
841 			if (((ssize32_t)(*count + sfv_len)) < 0)
842 				return (EINVAL);
843 		} else
844 #endif
845 		if ((*count + sfv_len) < 0)
846 			return (EINVAL);
847 
848 		sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
849 
850 		if (sfv->sfv_fd == SFV_FD_SELF) {
851 			aiov.iov_len = sfv_len;
852 			aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
853 			auio.uio_loffset = *fileoff;
854 			auio.uio_iovcnt = 1;
855 			auio.uio_resid = sfv_len;
856 			auio.uio_iov = &aiov;
857 			auio.uio_segflg = UIO_USERSPACE;
858 			auio.uio_llimit = curproc->p_fsz_ctl;
859 			auio.uio_fmode = fflag;
860 
861 			if (vp->v_type == VSOCK) {
862 
863 				/*
864 				 * Optimize for the socket case
865 				 */
866 
867 				dmp = allocb(sfv_len + extra, BPRI_HI);
868 				if (dmp == NULL)
869 					return (ENOMEM);
870 				dmp->b_wptr = dmp->b_rptr = dmp->b_rptr + wroff;
871 				error = uiomove((caddr_t)dmp->b_wptr,
872 				    sfv_len, UIO_WRITE, &auio);
873 				if (error != 0) {
874 					freeb(dmp);
875 					return (error);
876 				}
877 				dmp->b_wptr += sfv_len;
878 				error = kstrwritemp(vp, dmp, fflag);
879 				if (error != 0) {
880 					freeb(dmp);
881 					return (error);
882 				}
883 				ttolwp(curthread)->lwp_ru.ioch +=
884 				    (ulong_t)sfv_len;
885 				*count += sfv_len;
886 			} else {
887 				ioflag = auio.uio_fmode &
888 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
889 				while (sfv_len > 0) {
890 					error = VOP_WRITE(vp, &auio, ioflag,
891 					    fp->f_cred, NULL);
892 					cnt = sfv_len - auio.uio_resid;
893 					sfv_len -= cnt;
894 					ttolwp(curthread)->lwp_ru.ioch +=
895 					    (ulong_t)cnt;
896 					*fileoff += cnt;
897 					*count += cnt;
898 					if (error != 0)
899 						return (error);
900 				}
901 			}
902 		} else {
903 			int segmapit = 0;
904 			file_t	*ffp;
905 			vnode_t	*readvp;
906 			struct vnode *realvp;
907 			size_t	size;
908 			caddr_t	ptr;
909 
910 			if ((ffp = getf(sfv->sfv_fd)) == NULL)
911 				return (EBADF);
912 
913 			if ((ffp->f_flag & FREAD) == 0) {
914 				releasef(sfv->sfv_fd);
915 				return (EBADF);
916 			}
917 
918 			readvp = ffp->f_vnode;
919 			if (VOP_REALVP(readvp, &realvp, NULL) == 0)
920 				readvp = realvp;
921 			if (readvp->v_type != VREG) {
922 				releasef(sfv->sfv_fd);
923 				return (EINVAL);
924 			}
925 
926 			/*
927 			 * No point reading and writing to same vp,
928 			 * as long as both are regular files. readvp is not
929 			 * locked; but since we got it from an open file the
930 			 * contents will be valid during the time of access.
931 			 */
932 			if (vn_compare(vp, readvp)) {
933 				releasef(sfv->sfv_fd);
934 				return (EINVAL);
935 			}
936 
937 			/*
938 			 * Note: we assume readvp != vp. "vp" is already
939 			 * locked, and "readvp" must not be.
940 			 */
941 			(void) VOP_RWLOCK(readvp, readflg, NULL);
942 
943 			/* Same checks as in pread */
944 			if (sfv_off > maxoff) {
945 				VOP_RWUNLOCK(readvp, readflg, NULL);
946 				releasef(sfv->sfv_fd);
947 				return (EINVAL);
948 			}
949 			if (sfv_off + sfv_len > maxoff) {
950 				sfv_len = (ssize_t)((offset_t)maxoff -
951 				    sfv_off);
952 			}
953 			/* Find the native blocksize to transfer data */
954 			size = MIN(vp->v_vfsp->vfs_bsize,
955 			    readvp->v_vfsp->vfs_bsize);
956 			size = sfv_len < size ? sfv_len : size;
957 
958 			if (vp->v_type != VSOCK) {
959 				segmapit = 0;
960 				buf = kmem_alloc(size, KM_NOSLEEP);
961 				if (buf == NULL) {
962 					VOP_RWUNLOCK(readvp, readflg, NULL);
963 					releasef(sfv->sfv_fd);
964 					return (ENOMEM);
965 				}
966 			} else {
967 				/*
968 				 * For sockets acting as an SSL proxy, we
969 				 * need to adjust the size to the maximum
970 				 * SSL record size set in the stream head.
971 				 */
972 				if (so->so_kssl_ctx != NULL)
973 					size = MIN(size, maxblk);
974 
975 				if (vn_has_flocks(readvp) ||
976 				    readvp->v_flag & VNOMAP ||
977 				    stp->sd_copyflag & STZCVMUNSAFE) {
978 					segmapit = 0;
979 				} else if (stp->sd_copyflag & STZCVMSAFE) {
980 					segmapit = 1;
981 				} else {
982 					int on = 1;
983 					if (SOP_SETSOCKOPT(VTOSO(vp),
984 					    SOL_SOCKET, SO_SND_COPYAVOID,
985 					    &on, sizeof (on)) == 0)
986 					segmapit = 1;
987 				}
988 			}
989 
990 			if (segmapit) {
991 				boolean_t nowait;
992 				uint_t maxpsz;
993 
994 				nowait = (sfv->sfv_flag & SFV_NOWAIT) != 0;
995 				maxpsz = stp->sd_qn_maxpsz;
996 				if (maxpsz == INFPSZ)
997 					maxpsz = maxphys;
998 				maxpsz = roundup(maxpsz, MAXBSIZE);
999 				error = snf_segmap(fp, readvp, sfv_off,
1000 					    (u_offset_t)sfv_len, maxpsz,
1001 					    (ssize_t *)&cnt, nowait);
1002 				releasef(sfv->sfv_fd);
1003 				*count += cnt;
1004 				if (error)
1005 					return (error);
1006 				sfv++;
1007 				continue;
1008 			}
1009 
1010 			while (sfv_len > 0) {
1011 				size_t	iov_len;
1012 
1013 				iov_len = MIN(size, sfv_len);
1014 
1015 				if (vp->v_type == VSOCK) {
1016 					dmp = allocb(iov_len + extra, BPRI_HI);
1017 					if (dmp == NULL) {
1018 						VOP_RWUNLOCK(readvp, readflg,
1019 						    NULL);
1020 						releasef(sfv->sfv_fd);
1021 						return (ENOMEM);
1022 					}
1023 					dmp->b_wptr = dmp->b_rptr =
1024 					    dmp->b_rptr + wroff;
1025 					ptr = (caddr_t)dmp->b_rptr;
1026 				} else {
1027 					ptr = buf;
1028 				}
1029 
1030 				aiov.iov_base = ptr;
1031 				aiov.iov_len = iov_len;
1032 				auio.uio_loffset = sfv_off;
1033 				auio.uio_iov = &aiov;
1034 				auio.uio_iovcnt = 1;
1035 				auio.uio_resid = iov_len;
1036 				auio.uio_segflg = UIO_SYSSPACE;
1037 				auio.uio_llimit = MAXOFFSET_T;
1038 				auio.uio_fmode = ffp->f_flag;
1039 				ioflag = auio.uio_fmode &
1040 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1041 
1042 				/*
1043 				 * If read sync is not asked for,
1044 				 * filter sync flags
1045 				 */
1046 				if ((ioflag & FRSYNC) == 0)
1047 					ioflag &= ~(FSYNC|FDSYNC);
1048 				error = VOP_READ(readvp, &auio, ioflag,
1049 				    fp->f_cred, NULL);
1050 				if (error != 0) {
1051 					/*
1052 					 * If we were reading a pipe (currently
1053 					 * not implemented), we may now lose
1054 					 * data.
1055 					 */
1056 					if (vp->v_type == VSOCK)
1057 						freeb(dmp);
1058 					else
1059 						kmem_free(buf, size);
1060 					VOP_RWUNLOCK(readvp, readflg, NULL);
1061 					releasef(sfv->sfv_fd);
1062 					return (error);
1063 				}
1064 
1065 				/*
1066 				 * Check how much data was really read.
1067 				 * Decrement the 'len' and increment the
1068 				 * 'off' appropriately.
1069 				 */
1070 				cnt = iov_len - auio.uio_resid;
1071 				if (cnt == 0) {
1072 					if (vp->v_type == VSOCK)
1073 						freeb(dmp);
1074 					else
1075 						kmem_free(buf, size);
1076 					VOP_RWUNLOCK(readvp, readflg, NULL);
1077 					releasef(sfv->sfv_fd);
1078 					return (EINVAL);
1079 				}
1080 				sfv_len -= cnt;
1081 				sfv_off += cnt;
1082 
1083 				if (vp->v_type == VSOCK) {
1084 					dmp->b_wptr = dmp->b_rptr + cnt;
1085 
1086 					error = kstrwritemp(vp, dmp, fflag);
1087 					if (error != 0) {
1088 						freeb(dmp);
1089 						VOP_RWUNLOCK(readvp, readflg,
1090 									NULL);
1091 						releasef(sfv->sfv_fd);
1092 						return (error);
1093 					}
1094 
1095 					ttolwp(curthread)->lwp_ru.ioch +=
1096 					    (ulong_t)cnt;
1097 					*count += cnt;
1098 				} else {
1099 
1100 					aiov.iov_base = ptr;
1101 					aiov.iov_len = cnt;
1102 					auio.uio_loffset = *fileoff;
1103 					auio.uio_resid = cnt;
1104 					auio.uio_segflg = UIO_SYSSPACE;
1105 					auio.uio_llimit = curproc->p_fsz_ctl;
1106 					auio.uio_fmode = fflag;
1107 					ioflag = auio.uio_fmode &
1108 					    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1109 					error = VOP_WRITE(vp, &auio, ioflag,
1110 					    fp->f_cred, NULL);
1111 
1112 					/*
1113 					 * Check how much data was written.
1114 					 * Increment the 'len' and decrement the
1115 					 * 'off' if all the data was not
1116 					 * written.
1117 					 */
1118 					cnt -= auio.uio_resid;
1119 					sfv_len += auio.uio_resid;
1120 					sfv_off -= auio.uio_resid;
1121 					ttolwp(curthread)->lwp_ru.ioch +=
1122 					    (ulong_t)cnt;
1123 					*fileoff += cnt;
1124 					*count += cnt;
1125 					if (error != 0) {
1126 						kmem_free(buf, size);
1127 						VOP_RWUNLOCK(readvp, readflg,
1128 									NULL);
1129 						releasef(sfv->sfv_fd);
1130 						return (error);
1131 					}
1132 				}
1133 			}
1134 			if (buf) {
1135 				kmem_free(buf, size);
1136 				buf = NULL;
1137 			}
1138 			VOP_RWUNLOCK(readvp, readflg, NULL);
1139 			releasef(sfv->sfv_fd);
1140 		}
1141 		sfv++;
1142 	}
1143 	return (0);
1144 }
1145 
1146 ssize_t
1147 sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
1148     size_t *xferred)
1149 {
1150 	int error;
1151 	file_t *fp;
1152 	struct vnode *vp;
1153 	struct sonode *so;
1154 	u_offset_t fileoff;
1155 	int copy_cnt;
1156 	const struct sendfilevec *copy_vec;
1157 	struct sendfilevec sfv[SEND_MAX_CHUNK];
1158 	ssize_t count = 0;
1159 #ifdef _SYSCALL32_IMPL
1160 	struct ksendfilevec32 sfv32[SEND_MAX_CHUNK];
1161 #endif
1162 	ssize_t total_size;
1163 	int i;
1164 	boolean_t is_sock = B_FALSE;
1165 	int maxblk = 0;
1166 
1167 	if (sfvcnt <= 0)
1168 		return (set_errno(EINVAL));
1169 
1170 	if ((fp = getf(fildes)) == NULL)
1171 		return (set_errno(EBADF));
1172 
1173 	if (((fp->f_flag) & FWRITE) == 0) {
1174 		error = EBADF;
1175 		goto err;
1176 	}
1177 
1178 	fileoff = fp->f_offset;
1179 	vp = fp->f_vnode;
1180 
1181 	switch (vp->v_type) {
1182 	case VSOCK:
1183 		so = VTOSO(vp);
1184 		/* sendfile not supported for SCTP */
1185 		if (so->so_protocol == IPPROTO_SCTP) {
1186 			error = EPROTONOSUPPORT;
1187 			goto err;
1188 		}
1189 		is_sock = B_TRUE;
1190 		switch (so->so_family) {
1191 		case AF_INET:
1192 		case AF_INET6:
1193 			/*
1194 			 * Make similar checks done in SOP_WRITE().
1195 			 */
1196 			if (so->so_state & SS_CANTSENDMORE) {
1197 				tsignal(curthread, SIGPIPE);
1198 				error = EPIPE;
1199 				goto err;
1200 			}
1201 			if (so->so_type != SOCK_STREAM) {
1202 				error = EOPNOTSUPP;
1203 				goto err;
1204 			}
1205 
1206 			if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) !=
1207 			    (SS_ISCONNECTED|SS_ISBOUND)) {
1208 				error = ENOTCONN;
1209 				goto err;
1210 			}
1211 
1212 			if ((so->so_state & SS_DIRECT) &&
1213 			    (so->so_priv != NULL) &&
1214 			    (so->so_kssl_ctx == NULL)) {
1215 				maxblk = ((tcp_t *)so->so_priv)->tcp_mss;
1216 			} else {
1217 				maxblk = (int)vp->v_stream->sd_maxblk;
1218 			}
1219 			break;
1220 		default:
1221 			error = EAFNOSUPPORT;
1222 			goto err;
1223 		}
1224 		break;
1225 	case VREG:
1226 		break;
1227 	default:
1228 		error = EINVAL;
1229 		goto err;
1230 	}
1231 
1232 	switch (opcode) {
1233 	case SENDFILEV :
1234 		break;
1235 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1236 	case SENDFILEV64 :
1237 		return (sendvec64(fp, (struct ksendfilevec64 *)vec, sfvcnt,
1238 		    (size32_t *)xferred, fildes));
1239 #endif
1240 	default :
1241 		error = ENOSYS;
1242 		break;
1243 	}
1244 
1245 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1246 	copy_vec = vec;
1247 
1248 	do {
1249 		total_size = 0;
1250 		copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
1251 #ifdef _SYSCALL32_IMPL
1252 		/* 32-bit callers need to have their iovec expanded. */
1253 		if (get_udatamodel() == DATAMODEL_ILP32) {
1254 			if (copyin(copy_vec, sfv32,
1255 			    copy_cnt * sizeof (ksendfilevec32_t))) {
1256 				error = EFAULT;
1257 				break;
1258 			}
1259 
1260 			for (i = 0; i < copy_cnt; i++) {
1261 				sfv[i].sfv_fd = sfv32[i].sfv_fd;
1262 				sfv[i].sfv_off =
1263 					(off_t)(uint32_t)sfv32[i].sfv_off;
1264 				sfv[i].sfv_len = (size_t)sfv32[i].sfv_len;
1265 				total_size += sfv[i].sfv_len;
1266 				sfv[i].sfv_flag = sfv32[i].sfv_flag;
1267 			}
1268 		} else {
1269 #endif
1270 			if (copyin(copy_vec, sfv,
1271 			    copy_cnt * sizeof (sendfilevec_t))) {
1272 				error = EFAULT;
1273 				break;
1274 			}
1275 
1276 			for (i = 0; i < copy_cnt; i++) {
1277 				total_size += sfv[i].sfv_len;
1278 			}
1279 #ifdef _SYSCALL32_IMPL
1280 		}
1281 #endif
1282 
1283 		/*
1284 		 * The task between deciding to use sendvec_small_chunk
1285 		 * and sendvec_chunk is dependant on multiple things:
1286 		 *
1287 		 * i) latency is important for smaller files. So if the
1288 		 * data is smaller than 'tcp_slow_start_initial' times
1289 		 * maxblk, then use sendvec_small_chunk which creates
1290 		 * maxblk size mblks and chains then together and sends
1291 		 * them to TCP in one shot. It also leaves 'wroff' size
1292 		 * space for the headers in each mblk.
1293 		 *
1294 		 * ii) for total size bigger than 'tcp_slow_start_initial'
1295 		 * time maxblk, its probably real file data which is
1296 		 * dominating. So its better to use sendvec_chunk because
1297 		 * performance goes to dog if we don't do pagesize reads.
1298 		 * sendvec_chunk will do pagesize reads and write them
1299 		 * in pagesize mblks to TCP.
1300 		 *
1301 		 * Side Notes: A write to file has not been optimized.
1302 		 * Future zero copy code will plugin into sendvec_chunk
1303 		 * only because doing zero copy for files smaller then
1304 		 * pagesize is useless.
1305 		 *
1306 		 * Note, if socket has NL7C enabled then call NL7C's
1307 		 * senfilev() function to consume the sfv[].
1308 		 */
1309 		if (is_sock) {
1310 			switch (so->so_family) {
1311 			case AF_INET:
1312 			case AF_INET6:
1313 				if (so->so_nl7c_flags != 0)
1314 					error = nl7c_sendfilev(so, &fileoff,
1315 					    sfv, copy_cnt, &count);
1316 				else if (total_size <= (4 * maxblk))
1317 					error = sendvec_small_chunk(fp,
1318 					    &fileoff, sfv, copy_cnt,
1319 					    total_size, maxblk, &count);
1320 				else
1321 					error = sendvec_chunk(fp, &fileoff,
1322 					    sfv, copy_cnt, &count);
1323 				break;
1324 			}
1325 		} else {
1326 			ASSERT(vp->v_type == VREG);
1327 			error = sendvec_chunk(fp, &fileoff, sfv, copy_cnt,
1328 			    &count);
1329 		}
1330 
1331 
1332 #ifdef _SYSCALL32_IMPL
1333 	if (get_udatamodel() == DATAMODEL_ILP32)
1334 		copy_vec = (const struct sendfilevec *)((char *)copy_vec +
1335 		    (copy_cnt * sizeof (ksendfilevec32_t)));
1336 	else
1337 #endif
1338 		copy_vec += copy_cnt;
1339 		sfvcnt -= copy_cnt;
1340 	} while (sfvcnt > 0);
1341 
1342 	if (vp->v_type == VREG)
1343 		fp->f_offset += count;
1344 
1345 
1346 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1347 
1348 #ifdef _SYSCALL32_IMPL
1349 	if (get_udatamodel() == DATAMODEL_ILP32) {
1350 		ssize32_t count32 = (ssize32_t)count;
1351 		if (copyout(&count32, xferred, sizeof (count32)))
1352 			error = EFAULT;
1353 		releasef(fildes);
1354 		if (error != 0)
1355 			return (set_errno(error));
1356 		return (count32);
1357 	}
1358 #endif
1359 	if (copyout(&count, xferred, sizeof (count)))
1360 		error = EFAULT;
1361 	releasef(fildes);
1362 	if (error != 0)
1363 		return (set_errno(error));
1364 	return (count);
1365 err:
1366 	ASSERT(error != 0);
1367 	releasef(fildes);
1368 	return (set_errno(error));
1369 }
1370