xref: /titanic_51/usr/src/uts/common/syscall/rw.c (revision fca543ca45b12c44a243625bce68b645ba8ed791)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
513506d1eSmaybee  * Common Development and Distribution License (the "License").
613506d1eSmaybee  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
214d86dd30Sraf 
227c478bd9Sstevel@tonic-gate /*
234d86dd30Sraf  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*fca543caSDJ Hoffman  * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
297c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate /*
327c478bd9Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
337c478bd9Sstevel@tonic-gate  * under license from the Regents of the University of California.
347c478bd9Sstevel@tonic-gate  */
357c478bd9Sstevel@tonic-gate 
367c478bd9Sstevel@tonic-gate #include <sys/param.h>
377c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
387c478bd9Sstevel@tonic-gate #include <sys/types.h>
397c478bd9Sstevel@tonic-gate #include <sys/inttypes.h>
407c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
417c478bd9Sstevel@tonic-gate #include <sys/cred.h>
427c478bd9Sstevel@tonic-gate #include <sys/user.h>
437c478bd9Sstevel@tonic-gate #include <sys/systm.h>
447c478bd9Sstevel@tonic-gate #include <sys/errno.h>
457c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
467c478bd9Sstevel@tonic-gate #include <sys/file.h>
477c478bd9Sstevel@tonic-gate #include <sys/proc.h>
487c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
497c478bd9Sstevel@tonic-gate #include <sys/uio.h>
507c478bd9Sstevel@tonic-gate #include <sys/debug.h>
517c478bd9Sstevel@tonic-gate #include <sys/rctl.h>
527c478bd9Sstevel@tonic-gate #include <sys/nbmlock.h>
537c478bd9Sstevel@tonic-gate 
5413506d1eSmaybee #define	COPYOUT_MAX_CACHE	(1<<17)		/* 128K */
557c478bd9Sstevel@tonic-gate 
5613506d1eSmaybee size_t copyout_max_cached = COPYOUT_MAX_CACHE;	/* global so it's patchable */
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate /*
597c478bd9Sstevel@tonic-gate  * read, write, pread, pwrite, readv, and writev syscalls.
607c478bd9Sstevel@tonic-gate  *
617c478bd9Sstevel@tonic-gate  * 64-bit open:	all open's are large file opens.
627c478bd9Sstevel@tonic-gate  * Large Files: the behaviour of read depends on whether the fd
637c478bd9Sstevel@tonic-gate  *		corresponds to large open or not.
647c478bd9Sstevel@tonic-gate  * 32-bit open:	FOFFMAX flag not set.
657c478bd9Sstevel@tonic-gate  *		read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
667c478bd9Sstevel@tonic-gate  *		EOVERFLOW if count is non-zero and if size of file
677c478bd9Sstevel@tonic-gate  *		is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
687c478bd9Sstevel@tonic-gate  *		at >= MAXOFF32_T returns EOF.
697c478bd9Sstevel@tonic-gate  */
707c478bd9Sstevel@tonic-gate 
717c478bd9Sstevel@tonic-gate /*
727c478bd9Sstevel@tonic-gate  * Native system call
737c478bd9Sstevel@tonic-gate  */
747c478bd9Sstevel@tonic-gate ssize_t
757c478bd9Sstevel@tonic-gate read(int fdes, void *cbuf, size_t count)
767c478bd9Sstevel@tonic-gate {
777c478bd9Sstevel@tonic-gate 	struct uio auio;
787c478bd9Sstevel@tonic-gate 	struct iovec aiov;
797c478bd9Sstevel@tonic-gate 	file_t *fp;
807c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
817c478bd9Sstevel@tonic-gate 	struct cpu *cp;
827c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
837c478bd9Sstevel@tonic-gate 	ssize_t cnt, bcount;
847c478bd9Sstevel@tonic-gate 	int error = 0;
857c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
867c478bd9Sstevel@tonic-gate 	int in_crit = 0;
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate 	if ((cnt = (ssize_t)count) < 0)
897c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
907c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
917c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
927c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & FREAD) == 0) {
937c478bd9Sstevel@tonic-gate 		error = EBADF;
947c478bd9Sstevel@tonic-gate 		goto out;
957c478bd9Sstevel@tonic-gate 	}
967c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && cnt == 0) {
997c478bd9Sstevel@tonic-gate 		goto out;
1007c478bd9Sstevel@tonic-gate 	}
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate 	rwflag = 0;
1037c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
1047c478bd9Sstevel@tonic-gate 	aiov.iov_len = cnt;
1057c478bd9Sstevel@tonic-gate 
1067c478bd9Sstevel@tonic-gate 	/*
1077c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
1087c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with write() calls.
1097c478bd9Sstevel@tonic-gate 	 */
1107c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
1117c478bd9Sstevel@tonic-gate 		int svmand;
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
1147c478bd9Sstevel@tonic-gate 		in_crit = 1;
1157c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
1167c478bd9Sstevel@tonic-gate 		if (error != 0)
1177c478bd9Sstevel@tonic-gate 			goto out;
118da6c28aaSamw 		if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand,
119da6c28aaSamw 		    NULL)) {
1207c478bd9Sstevel@tonic-gate 			error = EACCES;
1217c478bd9Sstevel@tonic-gate 			goto out;
1227c478bd9Sstevel@tonic-gate 		}
1237c478bd9Sstevel@tonic-gate 	}
1247c478bd9Sstevel@tonic-gate 
1257c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate 	/*
1287c478bd9Sstevel@tonic-gate 	 * We do the following checks inside VOP_RWLOCK so as to
1297c478bd9Sstevel@tonic-gate 	 * prevent file size from changing while these checks are
1307c478bd9Sstevel@tonic-gate 	 * being done. Also, we load fp's offset to the local
1317c478bd9Sstevel@tonic-gate 	 * variable fileoff because we can have a parallel lseek
1327c478bd9Sstevel@tonic-gate 	 * going on (f_offset is not protected by any lock) which
1337c478bd9Sstevel@tonic-gate 	 * could change f_offset. We need to see the value only
1347c478bd9Sstevel@tonic-gate 	 * once here and take a decision. Seeing it more than once
1357c478bd9Sstevel@tonic-gate 	 * can lead to incorrect functionality.
1367c478bd9Sstevel@tonic-gate 	 */
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate 	fileoff = (u_offset_t)fp->f_offset;
1397c478bd9Sstevel@tonic-gate 	if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
1407c478bd9Sstevel@tonic-gate 		struct vattr va;
1417c478bd9Sstevel@tonic-gate 		va.va_mask = AT_SIZE;
142da6c28aaSamw 		if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
1437c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
1447c478bd9Sstevel@tonic-gate 			goto out;
1457c478bd9Sstevel@tonic-gate 		}
1467c478bd9Sstevel@tonic-gate 		if (fileoff >= va.va_size) {
1477c478bd9Sstevel@tonic-gate 			cnt = 0;
1487c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
1497c478bd9Sstevel@tonic-gate 			goto out;
1507c478bd9Sstevel@tonic-gate 		} else {
1517c478bd9Sstevel@tonic-gate 			error = EOVERFLOW;
1527c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
1537c478bd9Sstevel@tonic-gate 			goto out;
1547c478bd9Sstevel@tonic-gate 		}
1557c478bd9Sstevel@tonic-gate 	}
1567c478bd9Sstevel@tonic-gate 	if ((vp->v_type == VREG) &&
1577c478bd9Sstevel@tonic-gate 	    (fileoff + cnt > OFFSET_MAX(fp))) {
1587c478bd9Sstevel@tonic-gate 		cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1597c478bd9Sstevel@tonic-gate 	}
1607c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
1617c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
1627c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
1637c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount = cnt;
1647c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
1657c478bd9Sstevel@tonic-gate 	auio.uio_llimit = MAXOFFSET_T;
1667c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
1677c478bd9Sstevel@tonic-gate 	/*
1687c478bd9Sstevel@tonic-gate 	 * Only use bypass caches when the count is large enough
1697c478bd9Sstevel@tonic-gate 	 */
17013506d1eSmaybee 	if (bcount <= copyout_max_cached)
1717c478bd9Sstevel@tonic-gate 		auio.uio_extflg = UIO_COPY_CACHED;
1727c478bd9Sstevel@tonic-gate 	else
1737c478bd9Sstevel@tonic-gate 		auio.uio_extflg = UIO_COPY_DEFAULT;
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate 	/* If read sync is not asked for, filter sync flags */
1787c478bd9Sstevel@tonic-gate 	if ((ioflag & FRSYNC) == 0)
1797c478bd9Sstevel@tonic-gate 		ioflag &= ~(FSYNC|FDSYNC);
1807c478bd9Sstevel@tonic-gate 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1817c478bd9Sstevel@tonic-gate 	cnt -= auio.uio_resid;
1827c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
1837c478bd9Sstevel@tonic-gate 	cp = CPU;
1847c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
1857c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
1867c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
1877c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
1887c478bd9Sstevel@tonic-gate 
1897c478bd9Sstevel@tonic-gate 	if (vp->v_type == VFIFO)	/* Backward compatibility */
1907c478bd9Sstevel@tonic-gate 		fp->f_offset = cnt;
1917c478bd9Sstevel@tonic-gate 	else if (((fp->f_flag & FAPPEND) == 0) ||
1927c478bd9Sstevel@tonic-gate 	    (vp->v_type != VREG) || (bcount != 0))	/* POSIX */
1937c478bd9Sstevel@tonic-gate 		fp->f_offset = auio.uio_loffset;
1947c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
1957c478bd9Sstevel@tonic-gate 
1967c478bd9Sstevel@tonic-gate 	if (error == EINTR && cnt != 0)
1977c478bd9Sstevel@tonic-gate 		error = 0;
1987c478bd9Sstevel@tonic-gate out:
1997c478bd9Sstevel@tonic-gate 	if (in_crit)
2007c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
2017c478bd9Sstevel@tonic-gate 	releasef(fdes);
2027c478bd9Sstevel@tonic-gate 	if (error)
2037c478bd9Sstevel@tonic-gate 		return (set_errno(error));
2047c478bd9Sstevel@tonic-gate 	return (cnt);
2057c478bd9Sstevel@tonic-gate }
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate /*
2087c478bd9Sstevel@tonic-gate  * Native system call
2097c478bd9Sstevel@tonic-gate  */
2107c478bd9Sstevel@tonic-gate ssize_t
2117c478bd9Sstevel@tonic-gate write(int fdes, void *cbuf, size_t count)
2127c478bd9Sstevel@tonic-gate {
2137c478bd9Sstevel@tonic-gate 	struct uio auio;
2147c478bd9Sstevel@tonic-gate 	struct iovec aiov;
2157c478bd9Sstevel@tonic-gate 	file_t *fp;
2167c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
2177c478bd9Sstevel@tonic-gate 	struct cpu *cp;
2187c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
2197c478bd9Sstevel@tonic-gate 	ssize_t cnt, bcount;
2207c478bd9Sstevel@tonic-gate 	int error = 0;
2217c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
2227c478bd9Sstevel@tonic-gate 	int in_crit = 0;
2237c478bd9Sstevel@tonic-gate 
2247c478bd9Sstevel@tonic-gate 	if ((cnt = (ssize_t)count) < 0)
2257c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
2267c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
2277c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
2287c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & FWRITE) == 0) {
2297c478bd9Sstevel@tonic-gate 		error = EBADF;
2307c478bd9Sstevel@tonic-gate 		goto out;
2317c478bd9Sstevel@tonic-gate 	}
2327c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
2337c478bd9Sstevel@tonic-gate 
2347c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && cnt == 0) {
2357c478bd9Sstevel@tonic-gate 		goto out;
2367c478bd9Sstevel@tonic-gate 	}
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate 	rwflag = 1;
2397c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
2407c478bd9Sstevel@tonic-gate 	aiov.iov_len = cnt;
2417c478bd9Sstevel@tonic-gate 
2427c478bd9Sstevel@tonic-gate 	/*
2437c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
2447c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
2457c478bd9Sstevel@tonic-gate 	 */
2467c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
2477c478bd9Sstevel@tonic-gate 		int svmand;
2487c478bd9Sstevel@tonic-gate 
2497c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
2507c478bd9Sstevel@tonic-gate 		in_crit = 1;
2517c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
2527c478bd9Sstevel@tonic-gate 		if (error != 0)
2537c478bd9Sstevel@tonic-gate 			goto out;
254da6c28aaSamw 		if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand,
255da6c28aaSamw 		    NULL)) {
2567c478bd9Sstevel@tonic-gate 			error = EACCES;
2577c478bd9Sstevel@tonic-gate 			goto out;
2587c478bd9Sstevel@tonic-gate 		}
2597c478bd9Sstevel@tonic-gate 	}
2607c478bd9Sstevel@tonic-gate 
2617c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
2627c478bd9Sstevel@tonic-gate 
2637c478bd9Sstevel@tonic-gate 	fileoff = fp->f_offset;
2647c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
2657c478bd9Sstevel@tonic-gate 
2667c478bd9Sstevel@tonic-gate 		/*
2677c478bd9Sstevel@tonic-gate 		 * We raise psignal if write for >0 bytes causes
2687c478bd9Sstevel@tonic-gate 		 * it to exceed the ulimit.
2697c478bd9Sstevel@tonic-gate 		 */
2707c478bd9Sstevel@tonic-gate 		if (fileoff >= curproc->p_fsz_ctl) {
2717c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
2727c478bd9Sstevel@tonic-gate 
2737c478bd9Sstevel@tonic-gate 			mutex_enter(&curproc->p_lock);
2747c478bd9Sstevel@tonic-gate 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
2757c478bd9Sstevel@tonic-gate 			    curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
2767c478bd9Sstevel@tonic-gate 			mutex_exit(&curproc->p_lock);
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate 			error = EFBIG;
2797c478bd9Sstevel@tonic-gate 			goto out;
2807c478bd9Sstevel@tonic-gate 		}
2817c478bd9Sstevel@tonic-gate 		/*
2827c478bd9Sstevel@tonic-gate 		 * We return EFBIG if write is done at an offset
2837c478bd9Sstevel@tonic-gate 		 * greater than the offset maximum for this file structure.
2847c478bd9Sstevel@tonic-gate 		 */
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 		if (fileoff >= OFFSET_MAX(fp)) {
2877c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
2887c478bd9Sstevel@tonic-gate 			error = EFBIG;
2897c478bd9Sstevel@tonic-gate 			goto out;
2907c478bd9Sstevel@tonic-gate 		}
2917c478bd9Sstevel@tonic-gate 		/*
2927c478bd9Sstevel@tonic-gate 		 * Limit the bytes to be written  upto offset maximum for
2937c478bd9Sstevel@tonic-gate 		 * this open file structure.
2947c478bd9Sstevel@tonic-gate 		 */
2957c478bd9Sstevel@tonic-gate 		if (fileoff + cnt > OFFSET_MAX(fp))
2967c478bd9Sstevel@tonic-gate 			cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
2977c478bd9Sstevel@tonic-gate 	}
2987c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
2997c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
3007c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
3017c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount = cnt;
3027c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
3037c478bd9Sstevel@tonic-gate 	auio.uio_llimit = curproc->p_fsz_ctl;
3047c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
3057c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_DEFAULT;
3067c478bd9Sstevel@tonic-gate 
3077c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
3107c478bd9Sstevel@tonic-gate 	cnt -= auio.uio_resid;
3117c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
3127c478bd9Sstevel@tonic-gate 	cp = CPU;
3137c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
3147c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
3157c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
3167c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
3177c478bd9Sstevel@tonic-gate 
3187c478bd9Sstevel@tonic-gate 	if (vp->v_type == VFIFO)	/* Backward compatibility */
3197c478bd9Sstevel@tonic-gate 		fp->f_offset = cnt;
3207c478bd9Sstevel@tonic-gate 	else if (((fp->f_flag & FAPPEND) == 0) ||
3217c478bd9Sstevel@tonic-gate 	    (vp->v_type != VREG) || (bcount != 0))	/* POSIX */
3227c478bd9Sstevel@tonic-gate 		fp->f_offset = auio.uio_loffset;
3237c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
3247c478bd9Sstevel@tonic-gate 
3257c478bd9Sstevel@tonic-gate 	if (error == EINTR && cnt != 0)
3267c478bd9Sstevel@tonic-gate 		error = 0;
3277c478bd9Sstevel@tonic-gate out:
3287c478bd9Sstevel@tonic-gate 	if (in_crit)
3297c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
3307c478bd9Sstevel@tonic-gate 	releasef(fdes);
3317c478bd9Sstevel@tonic-gate 	if (error)
3327c478bd9Sstevel@tonic-gate 		return (set_errno(error));
3337c478bd9Sstevel@tonic-gate 	return (cnt);
3347c478bd9Sstevel@tonic-gate }
3357c478bd9Sstevel@tonic-gate 
3367c478bd9Sstevel@tonic-gate ssize_t
3377c478bd9Sstevel@tonic-gate pread(int fdes, void *cbuf, size_t count, off_t offset)
3387c478bd9Sstevel@tonic-gate {
3397c478bd9Sstevel@tonic-gate 	struct uio auio;
3407c478bd9Sstevel@tonic-gate 	struct iovec aiov;
3417c478bd9Sstevel@tonic-gate 	file_t *fp;
3427c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
3437c478bd9Sstevel@tonic-gate 	struct cpu *cp;
3447c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
3457c478bd9Sstevel@tonic-gate 	ssize_t bcount;
3467c478bd9Sstevel@tonic-gate 	int error = 0;
3477c478bd9Sstevel@tonic-gate 	u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
3487c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
3497c478bd9Sstevel@tonic-gate 	u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
3507c478bd9Sstevel@tonic-gate 	    MAXOFF32_T : MAXOFFSET_T;
3517c478bd9Sstevel@tonic-gate #else
3527c478bd9Sstevel@tonic-gate 	const u_offset_t maxoff = MAXOFF32_T;
3537c478bd9Sstevel@tonic-gate #endif
3547c478bd9Sstevel@tonic-gate 	int in_crit = 0;
3557c478bd9Sstevel@tonic-gate 
3567c478bd9Sstevel@tonic-gate 	if ((bcount = (ssize_t)count) < 0)
3577c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
3607c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
3617c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & (FREAD)) == 0) {
3627c478bd9Sstevel@tonic-gate 		error = EBADF;
3637c478bd9Sstevel@tonic-gate 		goto out;
3647c478bd9Sstevel@tonic-gate 	}
3657c478bd9Sstevel@tonic-gate 
3667c478bd9Sstevel@tonic-gate 	rwflag = 0;
3677c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
3687c478bd9Sstevel@tonic-gate 
3697c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
3707c478bd9Sstevel@tonic-gate 
3717c478bd9Sstevel@tonic-gate 		if (bcount == 0)
3727c478bd9Sstevel@tonic-gate 			goto out;
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate 		/*
3757c478bd9Sstevel@tonic-gate 		 * Return EINVAL if an invalid offset comes to pread.
3767c478bd9Sstevel@tonic-gate 		 * Negative offset from user will cause this error.
3777c478bd9Sstevel@tonic-gate 		 */
3787c478bd9Sstevel@tonic-gate 
3797c478bd9Sstevel@tonic-gate 		if (fileoff > maxoff) {
3807c478bd9Sstevel@tonic-gate 			error = EINVAL;
3817c478bd9Sstevel@tonic-gate 			goto out;
3827c478bd9Sstevel@tonic-gate 		}
3837c478bd9Sstevel@tonic-gate 		/*
3847c478bd9Sstevel@tonic-gate 		 * Limit offset such that we don't read or write
3857c478bd9Sstevel@tonic-gate 		 * a file beyond the maximum offset representable in
3867c478bd9Sstevel@tonic-gate 		 * an off_t structure.
3877c478bd9Sstevel@tonic-gate 		 */
3887c478bd9Sstevel@tonic-gate 		if (fileoff + bcount > maxoff)
3897c478bd9Sstevel@tonic-gate 			bcount = (ssize_t)((offset_t)maxoff - fileoff);
3907c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VFIFO) {
3917c478bd9Sstevel@tonic-gate 		error = ESPIPE;
3927c478bd9Sstevel@tonic-gate 		goto out;
3937c478bd9Sstevel@tonic-gate 	}
3947c478bd9Sstevel@tonic-gate 
3957c478bd9Sstevel@tonic-gate 	/*
3967c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
3977c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
3987c478bd9Sstevel@tonic-gate 	 */
3997c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
4007c478bd9Sstevel@tonic-gate 		int svmand;
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
4037c478bd9Sstevel@tonic-gate 		in_crit = 1;
4047c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
4057c478bd9Sstevel@tonic-gate 		if (error != 0)
4067c478bd9Sstevel@tonic-gate 			goto out;
407da6c28aaSamw 		if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
408da6c28aaSamw 		    NULL)) {
4097c478bd9Sstevel@tonic-gate 			error = EACCES;
4107c478bd9Sstevel@tonic-gate 			goto out;
4117c478bd9Sstevel@tonic-gate 		}
4127c478bd9Sstevel@tonic-gate 	}
4137c478bd9Sstevel@tonic-gate 
4147c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
4157c478bd9Sstevel@tonic-gate 	aiov.iov_len = bcount;
4167c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
4177c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) {
4187c478bd9Sstevel@tonic-gate 		struct vattr va;
4197c478bd9Sstevel@tonic-gate 		va.va_mask = AT_SIZE;
420da6c28aaSamw 		if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
4217c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
4227c478bd9Sstevel@tonic-gate 			goto out;
4237c478bd9Sstevel@tonic-gate 		}
4247c478bd9Sstevel@tonic-gate 		VOP_RWUNLOCK(vp, rwflag, NULL);
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate 		/*
4277c478bd9Sstevel@tonic-gate 		 * We have to return EOF if fileoff is >= file size.
4287c478bd9Sstevel@tonic-gate 		 */
4297c478bd9Sstevel@tonic-gate 		if (fileoff >= va.va_size) {
4307c478bd9Sstevel@tonic-gate 			bcount = 0;
4317c478bd9Sstevel@tonic-gate 			goto out;
4327c478bd9Sstevel@tonic-gate 		}
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate 		/*
4357c478bd9Sstevel@tonic-gate 		 * File is greater than or equal to maxoff and therefore
4367c478bd9Sstevel@tonic-gate 		 * we return EOVERFLOW.
4377c478bd9Sstevel@tonic-gate 		 */
4387c478bd9Sstevel@tonic-gate 		error = EOVERFLOW;
4397c478bd9Sstevel@tonic-gate 		goto out;
4407c478bd9Sstevel@tonic-gate 	}
4417c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
4427c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
4437c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
4447c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount;
4457c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
4467c478bd9Sstevel@tonic-gate 	auio.uio_llimit = MAXOFFSET_T;
4477c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
4487c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_CACHED;
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
4517c478bd9Sstevel@tonic-gate 
4527c478bd9Sstevel@tonic-gate 	/* If read sync is not asked for, filter sync flags */
4537c478bd9Sstevel@tonic-gate 	if ((ioflag & FRSYNC) == 0)
4547c478bd9Sstevel@tonic-gate 		ioflag &= ~(FSYNC|FDSYNC);
4557c478bd9Sstevel@tonic-gate 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
4567c478bd9Sstevel@tonic-gate 	bcount -= auio.uio_resid;
4577c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
4587c478bd9Sstevel@tonic-gate 	cp = CPU;
4597c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
4607c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
4617c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
4627c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
4637c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate 	if (error == EINTR && bcount != 0)
4667c478bd9Sstevel@tonic-gate 		error = 0;
4677c478bd9Sstevel@tonic-gate out:
4687c478bd9Sstevel@tonic-gate 	if (in_crit)
4697c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
4707c478bd9Sstevel@tonic-gate 	releasef(fdes);
4717c478bd9Sstevel@tonic-gate 	if (error)
4727c478bd9Sstevel@tonic-gate 		return (set_errno(error));
4737c478bd9Sstevel@tonic-gate 	return (bcount);
4747c478bd9Sstevel@tonic-gate }
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate ssize_t
4777c478bd9Sstevel@tonic-gate pwrite(int fdes, void *cbuf, size_t count, off_t offset)
4787c478bd9Sstevel@tonic-gate {
4797c478bd9Sstevel@tonic-gate 	struct uio auio;
4807c478bd9Sstevel@tonic-gate 	struct iovec aiov;
4817c478bd9Sstevel@tonic-gate 	file_t *fp;
4827c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
4837c478bd9Sstevel@tonic-gate 	struct cpu *cp;
4847c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
4857c478bd9Sstevel@tonic-gate 	ssize_t bcount;
4867c478bd9Sstevel@tonic-gate 	int error = 0;
4877c478bd9Sstevel@tonic-gate 	u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
4887c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
4897c478bd9Sstevel@tonic-gate 	u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
4907c478bd9Sstevel@tonic-gate 	    MAXOFF32_T : MAXOFFSET_T;
4917c478bd9Sstevel@tonic-gate #else
4927c478bd9Sstevel@tonic-gate 	const u_offset_t maxoff = MAXOFF32_T;
4937c478bd9Sstevel@tonic-gate #endif
4947c478bd9Sstevel@tonic-gate 	int in_crit = 0;
4957c478bd9Sstevel@tonic-gate 
4967c478bd9Sstevel@tonic-gate 	if ((bcount = (ssize_t)count) < 0)
4977c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
4987c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
4997c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
5007c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
5017c478bd9Sstevel@tonic-gate 		error = EBADF;
5027c478bd9Sstevel@tonic-gate 		goto out;
5037c478bd9Sstevel@tonic-gate 	}
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	rwflag = 1;
5067c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
5077c478bd9Sstevel@tonic-gate 
5087c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate 		if (bcount == 0)
5117c478bd9Sstevel@tonic-gate 			goto out;
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 		/*
5147c478bd9Sstevel@tonic-gate 		 * return EINVAL for offsets that cannot be
5157c478bd9Sstevel@tonic-gate 		 * represented in an off_t.
5167c478bd9Sstevel@tonic-gate 		 */
5177c478bd9Sstevel@tonic-gate 		if (fileoff > maxoff) {
5187c478bd9Sstevel@tonic-gate 			error = EINVAL;
5197c478bd9Sstevel@tonic-gate 			goto out;
5207c478bd9Sstevel@tonic-gate 		}
5217c478bd9Sstevel@tonic-gate 		/*
5227c478bd9Sstevel@tonic-gate 		 * Take appropriate action if we are trying to write above the
5237c478bd9Sstevel@tonic-gate 		 * resource limit.
5247c478bd9Sstevel@tonic-gate 		 */
5257c478bd9Sstevel@tonic-gate 		if (fileoff >= curproc->p_fsz_ctl) {
5267c478bd9Sstevel@tonic-gate 			mutex_enter(&curproc->p_lock);
5277c478bd9Sstevel@tonic-gate 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
5287c478bd9Sstevel@tonic-gate 			    curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
5297c478bd9Sstevel@tonic-gate 			mutex_exit(&curproc->p_lock);
5307c478bd9Sstevel@tonic-gate 
5317c478bd9Sstevel@tonic-gate 			error = EFBIG;
5327c478bd9Sstevel@tonic-gate 			goto out;
5337c478bd9Sstevel@tonic-gate 		}
5347c478bd9Sstevel@tonic-gate 		/*
5357c478bd9Sstevel@tonic-gate 		 * Don't allow pwrite to cause file sizes to exceed
5367c478bd9Sstevel@tonic-gate 		 * maxoff.
5377c478bd9Sstevel@tonic-gate 		 */
5387c478bd9Sstevel@tonic-gate 		if (fileoff == maxoff) {
5397c478bd9Sstevel@tonic-gate 			error = EFBIG;
5407c478bd9Sstevel@tonic-gate 			goto out;
5417c478bd9Sstevel@tonic-gate 		}
5427c478bd9Sstevel@tonic-gate 		if (fileoff + count > maxoff)
5437c478bd9Sstevel@tonic-gate 			bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
5447c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VFIFO) {
5457c478bd9Sstevel@tonic-gate 		error = ESPIPE;
5467c478bd9Sstevel@tonic-gate 		goto out;
5477c478bd9Sstevel@tonic-gate 	}
5487c478bd9Sstevel@tonic-gate 
5497c478bd9Sstevel@tonic-gate 	/*
5507c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
5517c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
5527c478bd9Sstevel@tonic-gate 	 */
5537c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
5547c478bd9Sstevel@tonic-gate 		int svmand;
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
5577c478bd9Sstevel@tonic-gate 		in_crit = 1;
5587c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
5597c478bd9Sstevel@tonic-gate 		if (error != 0)
5607c478bd9Sstevel@tonic-gate 			goto out;
561da6c28aaSamw 		if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
562da6c28aaSamw 		    NULL)) {
5637c478bd9Sstevel@tonic-gate 			error = EACCES;
5647c478bd9Sstevel@tonic-gate 			goto out;
5657c478bd9Sstevel@tonic-gate 		}
5667c478bd9Sstevel@tonic-gate 	}
5677c478bd9Sstevel@tonic-gate 
5687c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
5697c478bd9Sstevel@tonic-gate 	aiov.iov_len = bcount;
5707c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
5717c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
5727c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
5737c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
5747c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount;
5757c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
5767c478bd9Sstevel@tonic-gate 	auio.uio_llimit = curproc->p_fsz_ctl;
5777c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
5787c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_CACHED;
5797c478bd9Sstevel@tonic-gate 
5804d86dd30Sraf 	/*
5814d86dd30Sraf 	 * The SUSv4 POSIX specification states:
5824d86dd30Sraf 	 *	The pwrite() function shall be equivalent to write(), except
5834d86dd30Sraf 	 *	that it writes into a given position and does not change
5844d86dd30Sraf 	 *	the file offset (regardless of whether O_APPEND is set).
5854d86dd30Sraf 	 * To make this be true, we omit the FAPPEND flag from ioflag.
5864d86dd30Sraf 	 */
5874d86dd30Sraf 	ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
5907c478bd9Sstevel@tonic-gate 	bcount -= auio.uio_resid;
5917c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
5927c478bd9Sstevel@tonic-gate 	cp = CPU;
5937c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
5947c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
5957c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
5967c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
5977c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 	if (error == EINTR && bcount != 0)
6007c478bd9Sstevel@tonic-gate 		error = 0;
6017c478bd9Sstevel@tonic-gate out:
6027c478bd9Sstevel@tonic-gate 	if (in_crit)
6037c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
6047c478bd9Sstevel@tonic-gate 	releasef(fdes);
6057c478bd9Sstevel@tonic-gate 	if (error)
6067c478bd9Sstevel@tonic-gate 		return (set_errno(error));
6077c478bd9Sstevel@tonic-gate 	return (bcount);
6087c478bd9Sstevel@tonic-gate }
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate /*
6117c478bd9Sstevel@tonic-gate  * XXX -- The SVID refers to IOV_MAX, but doesn't define it.  Grrrr....
6127c478bd9Sstevel@tonic-gate  * XXX -- However, SVVS expects readv() and writev() to fail if
6137c478bd9Sstevel@tonic-gate  * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source),
6147c478bd9Sstevel@tonic-gate  * XXX -- so I guess that's the "interface".
6157c478bd9Sstevel@tonic-gate  */
6167c478bd9Sstevel@tonic-gate #define	DEF_IOV_MAX	16
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate ssize_t
6197c478bd9Sstevel@tonic-gate readv(int fdes, struct iovec *iovp, int iovcnt)
6207c478bd9Sstevel@tonic-gate {
6217c478bd9Sstevel@tonic-gate 	struct uio auio;
6227c478bd9Sstevel@tonic-gate 	struct iovec aiov[DEF_IOV_MAX];
6237c478bd9Sstevel@tonic-gate 	file_t *fp;
6247c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
6257c478bd9Sstevel@tonic-gate 	struct cpu *cp;
6267c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
6277c478bd9Sstevel@tonic-gate 	ssize_t count, bcount;
6287c478bd9Sstevel@tonic-gate 	int error = 0;
6297c478bd9Sstevel@tonic-gate 	int i;
6307c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
6317c478bd9Sstevel@tonic-gate 	int in_crit = 0;
6327c478bd9Sstevel@tonic-gate 
6337c478bd9Sstevel@tonic-gate 	if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
6347c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
6357c478bd9Sstevel@tonic-gate 
6367c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
6377c478bd9Sstevel@tonic-gate 	/*
6387c478bd9Sstevel@tonic-gate 	 * 32-bit callers need to have their iovec expanded,
6397c478bd9Sstevel@tonic-gate 	 * while ensuring that they can't move more than 2Gbytes
6407c478bd9Sstevel@tonic-gate 	 * of data in a single call.
6417c478bd9Sstevel@tonic-gate 	 */
6427c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_ILP32) {
6437c478bd9Sstevel@tonic-gate 		struct iovec32 aiov32[DEF_IOV_MAX];
6447c478bd9Sstevel@tonic-gate 		ssize32_t count32;
6457c478bd9Sstevel@tonic-gate 
6467c478bd9Sstevel@tonic-gate 		if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
6477c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
6487c478bd9Sstevel@tonic-gate 
6497c478bd9Sstevel@tonic-gate 		count32 = 0;
6507c478bd9Sstevel@tonic-gate 		for (i = 0; i < iovcnt; i++) {
6517c478bd9Sstevel@tonic-gate 			ssize32_t iovlen32 = aiov32[i].iov_len;
6527c478bd9Sstevel@tonic-gate 			count32 += iovlen32;
6537c478bd9Sstevel@tonic-gate 			if (iovlen32 < 0 || count32 < 0)
6547c478bd9Sstevel@tonic-gate 				return (set_errno(EINVAL));
6557c478bd9Sstevel@tonic-gate 			aiov[i].iov_len = iovlen32;
6567c478bd9Sstevel@tonic-gate 			aiov[i].iov_base =
6577c478bd9Sstevel@tonic-gate 			    (caddr_t)(uintptr_t)aiov32[i].iov_base;
6587c478bd9Sstevel@tonic-gate 		}
6597c478bd9Sstevel@tonic-gate 	} else
6607c478bd9Sstevel@tonic-gate #endif
6617c478bd9Sstevel@tonic-gate 	if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
6627c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
6637c478bd9Sstevel@tonic-gate 
6647c478bd9Sstevel@tonic-gate 	count = 0;
6657c478bd9Sstevel@tonic-gate 	for (i = 0; i < iovcnt; i++) {
6667c478bd9Sstevel@tonic-gate 		ssize_t iovlen = aiov[i].iov_len;
6677c478bd9Sstevel@tonic-gate 		count += iovlen;
6687c478bd9Sstevel@tonic-gate 		if (iovlen < 0 || count < 0)
6697c478bd9Sstevel@tonic-gate 			return (set_errno(EINVAL));
6707c478bd9Sstevel@tonic-gate 	}
6717c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
6727c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
6737c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & FREAD) == 0) {
6747c478bd9Sstevel@tonic-gate 		error = EBADF;
6757c478bd9Sstevel@tonic-gate 		goto out;
6767c478bd9Sstevel@tonic-gate 	}
6777c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
6787c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && count == 0) {
6797c478bd9Sstevel@tonic-gate 		goto out;
6807c478bd9Sstevel@tonic-gate 	}
6817c478bd9Sstevel@tonic-gate 
6827c478bd9Sstevel@tonic-gate 	rwflag = 0;
6837c478bd9Sstevel@tonic-gate 
6847c478bd9Sstevel@tonic-gate 	/*
6857c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
6867c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
6877c478bd9Sstevel@tonic-gate 	 */
6887c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
6897c478bd9Sstevel@tonic-gate 		int svmand;
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
6927c478bd9Sstevel@tonic-gate 		in_crit = 1;
6937c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
6947c478bd9Sstevel@tonic-gate 		if (error != 0)
6957c478bd9Sstevel@tonic-gate 			goto out;
696da6c28aaSamw 		if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand,
697da6c28aaSamw 		    NULL)) {
6987c478bd9Sstevel@tonic-gate 			error = EACCES;
6997c478bd9Sstevel@tonic-gate 			goto out;
7007c478bd9Sstevel@tonic-gate 		}
7017c478bd9Sstevel@tonic-gate 	}
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
7047c478bd9Sstevel@tonic-gate 	fileoff = fp->f_offset;
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 	/*
7077c478bd9Sstevel@tonic-gate 	 * Behaviour is same as read. Please see comments in read.
7087c478bd9Sstevel@tonic-gate 	 */
7097c478bd9Sstevel@tonic-gate 
7107c478bd9Sstevel@tonic-gate 	if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
7117c478bd9Sstevel@tonic-gate 		struct vattr va;
7127c478bd9Sstevel@tonic-gate 		va.va_mask = AT_SIZE;
713da6c28aaSamw 		if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
7147c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
7157c478bd9Sstevel@tonic-gate 			goto out;
7167c478bd9Sstevel@tonic-gate 		}
7177c478bd9Sstevel@tonic-gate 		if (fileoff >= va.va_size) {
7187c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
7197c478bd9Sstevel@tonic-gate 			count = 0;
7207c478bd9Sstevel@tonic-gate 			goto out;
7217c478bd9Sstevel@tonic-gate 		} else {
7227c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
7237c478bd9Sstevel@tonic-gate 			error = EOVERFLOW;
7247c478bd9Sstevel@tonic-gate 			goto out;
7257c478bd9Sstevel@tonic-gate 		}
7267c478bd9Sstevel@tonic-gate 	}
7277c478bd9Sstevel@tonic-gate 	if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
7287c478bd9Sstevel@tonic-gate 		count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
7297c478bd9Sstevel@tonic-gate 	}
7307c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
7317c478bd9Sstevel@tonic-gate 	auio.uio_iov = aiov;
7327c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = iovcnt;
7337c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount = count;
7347c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
7357c478bd9Sstevel@tonic-gate 	auio.uio_llimit = MAXOFFSET_T;
7367c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
73713506d1eSmaybee 	if (bcount <= copyout_max_cached)
7387c478bd9Sstevel@tonic-gate 		auio.uio_extflg = UIO_COPY_CACHED;
7397c478bd9Sstevel@tonic-gate 	else
7407c478bd9Sstevel@tonic-gate 		auio.uio_extflg = UIO_COPY_DEFAULT;
7417c478bd9Sstevel@tonic-gate 
7427c478bd9Sstevel@tonic-gate 
7437c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
7447c478bd9Sstevel@tonic-gate 
7457c478bd9Sstevel@tonic-gate 	/* If read sync is not asked for, filter sync flags */
7467c478bd9Sstevel@tonic-gate 	if ((ioflag & FRSYNC) == 0)
7477c478bd9Sstevel@tonic-gate 		ioflag &= ~(FSYNC|FDSYNC);
7487c478bd9Sstevel@tonic-gate 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
7497c478bd9Sstevel@tonic-gate 	count -= auio.uio_resid;
7507c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
7517c478bd9Sstevel@tonic-gate 	cp = CPU;
7527c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
7537c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
7547c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
7557c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate 	if (vp->v_type == VFIFO)	/* Backward compatibility */
7587c478bd9Sstevel@tonic-gate 		fp->f_offset = count;
7597c478bd9Sstevel@tonic-gate 	else if (((fp->f_flag & FAPPEND) == 0) ||
7607c478bd9Sstevel@tonic-gate 	    (vp->v_type != VREG) || (bcount != 0))	/* POSIX */
7617c478bd9Sstevel@tonic-gate 		fp->f_offset = auio.uio_loffset;
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
7647c478bd9Sstevel@tonic-gate 
7657c478bd9Sstevel@tonic-gate 	if (error == EINTR && count != 0)
7667c478bd9Sstevel@tonic-gate 		error = 0;
7677c478bd9Sstevel@tonic-gate out:
7687c478bd9Sstevel@tonic-gate 	if (in_crit)
7697c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
7707c478bd9Sstevel@tonic-gate 	releasef(fdes);
7717c478bd9Sstevel@tonic-gate 	if (error)
7727c478bd9Sstevel@tonic-gate 		return (set_errno(error));
7737c478bd9Sstevel@tonic-gate 	return (count);
7747c478bd9Sstevel@tonic-gate }
7757c478bd9Sstevel@tonic-gate 
7767c478bd9Sstevel@tonic-gate ssize_t
7777c478bd9Sstevel@tonic-gate writev(int fdes, struct iovec *iovp, int iovcnt)
7787c478bd9Sstevel@tonic-gate {
7797c478bd9Sstevel@tonic-gate 	struct uio auio;
7807c478bd9Sstevel@tonic-gate 	struct iovec aiov[DEF_IOV_MAX];
7817c478bd9Sstevel@tonic-gate 	file_t *fp;
7827c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
7837c478bd9Sstevel@tonic-gate 	struct cpu *cp;
7847c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
7857c478bd9Sstevel@tonic-gate 	ssize_t count, bcount;
7867c478bd9Sstevel@tonic-gate 	int error = 0;
7877c478bd9Sstevel@tonic-gate 	int i;
7887c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
7897c478bd9Sstevel@tonic-gate 	int in_crit = 0;
7907c478bd9Sstevel@tonic-gate 
7917c478bd9Sstevel@tonic-gate 	if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
7927c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
7937c478bd9Sstevel@tonic-gate 
7947c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
7957c478bd9Sstevel@tonic-gate 	/*
7967c478bd9Sstevel@tonic-gate 	 * 32-bit callers need to have their iovec expanded,
7977c478bd9Sstevel@tonic-gate 	 * while ensuring that they can't move more than 2Gbytes
7987c478bd9Sstevel@tonic-gate 	 * of data in a single call.
7997c478bd9Sstevel@tonic-gate 	 */
8007c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_ILP32) {
8017c478bd9Sstevel@tonic-gate 		struct iovec32 aiov32[DEF_IOV_MAX];
8027c478bd9Sstevel@tonic-gate 		ssize32_t count32;
8037c478bd9Sstevel@tonic-gate 
8047c478bd9Sstevel@tonic-gate 		if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
8057c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
8067c478bd9Sstevel@tonic-gate 
8077c478bd9Sstevel@tonic-gate 		count32 = 0;
8087c478bd9Sstevel@tonic-gate 		for (i = 0; i < iovcnt; i++) {
8097c478bd9Sstevel@tonic-gate 			ssize32_t iovlen = aiov32[i].iov_len;
8107c478bd9Sstevel@tonic-gate 			count32 += iovlen;
8117c478bd9Sstevel@tonic-gate 			if (iovlen < 0 || count32 < 0)
8127c478bd9Sstevel@tonic-gate 				return (set_errno(EINVAL));
8137c478bd9Sstevel@tonic-gate 			aiov[i].iov_len = iovlen;
8147c478bd9Sstevel@tonic-gate 			aiov[i].iov_base =
8157c478bd9Sstevel@tonic-gate 			    (caddr_t)(uintptr_t)aiov32[i].iov_base;
8167c478bd9Sstevel@tonic-gate 		}
8177c478bd9Sstevel@tonic-gate 	} else
8187c478bd9Sstevel@tonic-gate #endif
8197c478bd9Sstevel@tonic-gate 	if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
8207c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
8217c478bd9Sstevel@tonic-gate 
8227c478bd9Sstevel@tonic-gate 	count = 0;
8237c478bd9Sstevel@tonic-gate 	for (i = 0; i < iovcnt; i++) {
8247c478bd9Sstevel@tonic-gate 		ssize_t iovlen = aiov[i].iov_len;
8257c478bd9Sstevel@tonic-gate 		count += iovlen;
8267c478bd9Sstevel@tonic-gate 		if (iovlen < 0 || count < 0)
8277c478bd9Sstevel@tonic-gate 			return (set_errno(EINVAL));
8287c478bd9Sstevel@tonic-gate 	}
8297c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
8307c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
8317c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & FWRITE) == 0) {
8327c478bd9Sstevel@tonic-gate 		error = EBADF;
8337c478bd9Sstevel@tonic-gate 		goto out;
8347c478bd9Sstevel@tonic-gate 	}
8357c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
8367c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && count == 0) {
8377c478bd9Sstevel@tonic-gate 		goto out;
8387c478bd9Sstevel@tonic-gate 	}
8397c478bd9Sstevel@tonic-gate 
8407c478bd9Sstevel@tonic-gate 	rwflag = 1;
8417c478bd9Sstevel@tonic-gate 
8427c478bd9Sstevel@tonic-gate 	/*
8437c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
8447c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
8457c478bd9Sstevel@tonic-gate 	 */
8467c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
8477c478bd9Sstevel@tonic-gate 		int svmand;
8487c478bd9Sstevel@tonic-gate 
8497c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
8507c478bd9Sstevel@tonic-gate 		in_crit = 1;
8517c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
8527c478bd9Sstevel@tonic-gate 		if (error != 0)
8537c478bd9Sstevel@tonic-gate 			goto out;
854da6c28aaSamw 		if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand,
855da6c28aaSamw 		    NULL)) {
8567c478bd9Sstevel@tonic-gate 			error = EACCES;
8577c478bd9Sstevel@tonic-gate 			goto out;
8587c478bd9Sstevel@tonic-gate 		}
8597c478bd9Sstevel@tonic-gate 	}
8607c478bd9Sstevel@tonic-gate 
8617c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
8627c478bd9Sstevel@tonic-gate 
8637c478bd9Sstevel@tonic-gate 	fileoff = fp->f_offset;
8647c478bd9Sstevel@tonic-gate 
8657c478bd9Sstevel@tonic-gate 	/*
8667c478bd9Sstevel@tonic-gate 	 * Behaviour is same as write. Please see comments for write.
8677c478bd9Sstevel@tonic-gate 	 */
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
8707c478bd9Sstevel@tonic-gate 		if (fileoff >= curproc->p_fsz_ctl) {
8717c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
8727c478bd9Sstevel@tonic-gate 			mutex_enter(&curproc->p_lock);
8737c478bd9Sstevel@tonic-gate 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
8747c478bd9Sstevel@tonic-gate 			    curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
8757c478bd9Sstevel@tonic-gate 			mutex_exit(&curproc->p_lock);
8767c478bd9Sstevel@tonic-gate 			error = EFBIG;
8777c478bd9Sstevel@tonic-gate 			goto out;
8787c478bd9Sstevel@tonic-gate 		}
8797c478bd9Sstevel@tonic-gate 		if (fileoff >= OFFSET_MAX(fp)) {
8807c478bd9Sstevel@tonic-gate 			VOP_RWUNLOCK(vp, rwflag, NULL);
8817c478bd9Sstevel@tonic-gate 			error = EFBIG;
8827c478bd9Sstevel@tonic-gate 			goto out;
8837c478bd9Sstevel@tonic-gate 		}
8847c478bd9Sstevel@tonic-gate 		if (fileoff + count > OFFSET_MAX(fp))
8857c478bd9Sstevel@tonic-gate 			count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
8867c478bd9Sstevel@tonic-gate 	}
8877c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
8887c478bd9Sstevel@tonic-gate 	auio.uio_iov = aiov;
8897c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = iovcnt;
8907c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount = count;
8917c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
8927c478bd9Sstevel@tonic-gate 	auio.uio_llimit = curproc->p_fsz_ctl;
8937c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
8947c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_DEFAULT;
8957c478bd9Sstevel@tonic-gate 
8967c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
8977c478bd9Sstevel@tonic-gate 
8987c478bd9Sstevel@tonic-gate 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
8997c478bd9Sstevel@tonic-gate 	count -= auio.uio_resid;
9007c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
9017c478bd9Sstevel@tonic-gate 	cp = CPU;
9027c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
9037c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
9047c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
9057c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate 	if (vp->v_type == VFIFO)	/* Backward compatibility */
9087c478bd9Sstevel@tonic-gate 		fp->f_offset = count;
9097c478bd9Sstevel@tonic-gate 	else if (((fp->f_flag & FAPPEND) == 0) ||
9107c478bd9Sstevel@tonic-gate 	    (vp->v_type != VREG) || (bcount != 0))	/* POSIX */
9117c478bd9Sstevel@tonic-gate 		fp->f_offset = auio.uio_loffset;
9127c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
9137c478bd9Sstevel@tonic-gate 
9147c478bd9Sstevel@tonic-gate 	if (error == EINTR && count != 0)
9157c478bd9Sstevel@tonic-gate 		error = 0;
9167c478bd9Sstevel@tonic-gate out:
9177c478bd9Sstevel@tonic-gate 	if (in_crit)
9187c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
9197c478bd9Sstevel@tonic-gate 	releasef(fdes);
9207c478bd9Sstevel@tonic-gate 	if (error)
9217c478bd9Sstevel@tonic-gate 		return (set_errno(error));
9227c478bd9Sstevel@tonic-gate 	return (count);
9237c478bd9Sstevel@tonic-gate }
9247c478bd9Sstevel@tonic-gate 
925*fca543caSDJ Hoffman ssize_t
926*fca543caSDJ Hoffman preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
927*fca543caSDJ Hoffman     off_t extended_offset)
928*fca543caSDJ Hoffman {
929*fca543caSDJ Hoffman 	struct uio auio;
930*fca543caSDJ Hoffman 	struct iovec aiov[DEF_IOV_MAX];
931*fca543caSDJ Hoffman 	file_t *fp;
932*fca543caSDJ Hoffman 	register vnode_t *vp;
933*fca543caSDJ Hoffman 	struct cpu *cp;
934*fca543caSDJ Hoffman 	int fflag, ioflag, rwflag;
935*fca543caSDJ Hoffman 	ssize_t count, bcount;
936*fca543caSDJ Hoffman 	int error = 0;
937*fca543caSDJ Hoffman 	int i;
938*fca543caSDJ Hoffman 
939*fca543caSDJ Hoffman #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
940*fca543caSDJ Hoffman 	u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
941*fca543caSDJ Hoffman 	    (u_offset_t)offset;
942*fca543caSDJ Hoffman #else /* _SYSCALL32_IMPL || _ILP32 */
943*fca543caSDJ Hoffman 	u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
944*fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPR || _ILP32 */
945*fca543caSDJ Hoffman #ifdef _SYSCALL32_IMPL
946*fca543caSDJ Hoffman 	const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
947*fca543caSDJ Hoffman 	    extended_offset == 0?
948*fca543caSDJ Hoffman 	    MAXOFF32_T : MAXOFFSET_T;
949*fca543caSDJ Hoffman #else /* _SYSCALL32_IMPL */
950*fca543caSDJ Hoffman 	const u_offset_t maxoff = MAXOFF32_T;
951*fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPL */
952*fca543caSDJ Hoffman 
953*fca543caSDJ Hoffman 	int in_crit = 0;
954*fca543caSDJ Hoffman 
955*fca543caSDJ Hoffman 	if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
956*fca543caSDJ Hoffman 		return (set_errno(EINVAL));
957*fca543caSDJ Hoffman 
958*fca543caSDJ Hoffman #ifdef _SYSCALL32_IMPL
959*fca543caSDJ Hoffman 	/*
960*fca543caSDJ Hoffman 	 * 32-bit callers need to have their iovec expanded,
961*fca543caSDJ Hoffman 	 * while ensuring that they can't move more than 2Gbytes
962*fca543caSDJ Hoffman 	 * of data in a single call.
963*fca543caSDJ Hoffman 	 */
964*fca543caSDJ Hoffman 	if (get_udatamodel() == DATAMODEL_ILP32) {
965*fca543caSDJ Hoffman 		struct iovec32 aiov32[DEF_IOV_MAX];
966*fca543caSDJ Hoffman 		ssize32_t count32;
967*fca543caSDJ Hoffman 
968*fca543caSDJ Hoffman 		if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
969*fca543caSDJ Hoffman 			return (set_errno(EFAULT));
970*fca543caSDJ Hoffman 
971*fca543caSDJ Hoffman 		count32 = 0;
972*fca543caSDJ Hoffman 		for (i = 0; i < iovcnt; i++) {
973*fca543caSDJ Hoffman 			ssize32_t iovlen32 = aiov32[i].iov_len;
974*fca543caSDJ Hoffman 			count32 += iovlen32;
975*fca543caSDJ Hoffman 			if (iovlen32 < 0 || count32 < 0)
976*fca543caSDJ Hoffman 				return (set_errno(EINVAL));
977*fca543caSDJ Hoffman 			aiov[i].iov_len = iovlen32;
978*fca543caSDJ Hoffman 			aiov[i].iov_base =
979*fca543caSDJ Hoffman 			    (caddr_t)(uintptr_t)aiov32[i].iov_base;
980*fca543caSDJ Hoffman 		}
981*fca543caSDJ Hoffman 	} else
982*fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPL */
983*fca543caSDJ Hoffman 		if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
984*fca543caSDJ Hoffman 			return (set_errno(EFAULT));
985*fca543caSDJ Hoffman 
986*fca543caSDJ Hoffman 	count = 0;
987*fca543caSDJ Hoffman 	for (i = 0; i < iovcnt; i++) {
988*fca543caSDJ Hoffman 		ssize_t iovlen = aiov[i].iov_len;
989*fca543caSDJ Hoffman 		count += iovlen;
990*fca543caSDJ Hoffman 		if (iovlen < 0 || count < 0)
991*fca543caSDJ Hoffman 			return (set_errno(EINVAL));
992*fca543caSDJ Hoffman 	}
993*fca543caSDJ Hoffman 
994*fca543caSDJ Hoffman 	if ((bcount = (ssize_t)count) < 0)
995*fca543caSDJ Hoffman 		return (set_errno(EINVAL));
996*fca543caSDJ Hoffman 	if ((fp = getf(fdes)) == NULL)
997*fca543caSDJ Hoffman 		return (set_errno(EBADF));
998*fca543caSDJ Hoffman 	if (((fflag = fp->f_flag) & FREAD) == 0) {
999*fca543caSDJ Hoffman 		error = EBADF;
1000*fca543caSDJ Hoffman 		goto out;
1001*fca543caSDJ Hoffman 	}
1002*fca543caSDJ Hoffman 	vp = fp->f_vnode;
1003*fca543caSDJ Hoffman 	rwflag = 0;
1004*fca543caSDJ Hoffman 	if (vp->v_type == VREG) {
1005*fca543caSDJ Hoffman 
1006*fca543caSDJ Hoffman 		if (bcount == 0)
1007*fca543caSDJ Hoffman 			goto out;
1008*fca543caSDJ Hoffman 
1009*fca543caSDJ Hoffman 		/*
1010*fca543caSDJ Hoffman 		 * return EINVAL for offsets that cannot be
1011*fca543caSDJ Hoffman 		 * represented in an off_t.
1012*fca543caSDJ Hoffman 		 */
1013*fca543caSDJ Hoffman 		if (fileoff > maxoff) {
1014*fca543caSDJ Hoffman 			error = EINVAL;
1015*fca543caSDJ Hoffman 			goto out;
1016*fca543caSDJ Hoffman 		}
1017*fca543caSDJ Hoffman 
1018*fca543caSDJ Hoffman 		if (fileoff + bcount > maxoff)
1019*fca543caSDJ Hoffman 			bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1020*fca543caSDJ Hoffman 	} else if (vp->v_type == VFIFO) {
1021*fca543caSDJ Hoffman 		error = ESPIPE;
1022*fca543caSDJ Hoffman 		goto out;
1023*fca543caSDJ Hoffman 	}
1024*fca543caSDJ Hoffman 	/*
1025*fca543caSDJ Hoffman 	 * We have to enter the critical region before calling VOP_RWLOCK
1026*fca543caSDJ Hoffman 	 * to avoid a deadlock with ufs.
1027*fca543caSDJ Hoffman 	 */
1028*fca543caSDJ Hoffman 	if (nbl_need_check(vp)) {
1029*fca543caSDJ Hoffman 		int svmand;
1030*fca543caSDJ Hoffman 
1031*fca543caSDJ Hoffman 		nbl_start_crit(vp, RW_READER);
1032*fca543caSDJ Hoffman 		in_crit = 1;
1033*fca543caSDJ Hoffman 		error = nbl_svmand(vp, fp->f_cred, &svmand);
1034*fca543caSDJ Hoffman 		if (error != 0)
1035*fca543caSDJ Hoffman 			goto out;
1036*fca543caSDJ Hoffman 		if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1037*fca543caSDJ Hoffman 		    NULL)) {
1038*fca543caSDJ Hoffman 			error = EACCES;
1039*fca543caSDJ Hoffman 			goto out;
1040*fca543caSDJ Hoffman 		}
1041*fca543caSDJ Hoffman 	}
1042*fca543caSDJ Hoffman 
1043*fca543caSDJ Hoffman 	(void) VOP_RWLOCK(vp, rwflag, NULL);
1044*fca543caSDJ Hoffman 
1045*fca543caSDJ Hoffman 	/*
1046*fca543caSDJ Hoffman 	 * Behaviour is same as read(2). Please see comments in
1047*fca543caSDJ Hoffman 	 * read(2).
1048*fca543caSDJ Hoffman 	 */
1049*fca543caSDJ Hoffman 
1050*fca543caSDJ Hoffman 	if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1051*fca543caSDJ Hoffman 		struct vattr va;
1052*fca543caSDJ Hoffman 		va.va_mask = AT_SIZE;
1053*fca543caSDJ Hoffman 		if ((error =
1054*fca543caSDJ Hoffman 		    VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
1055*fca543caSDJ Hoffman 			VOP_RWUNLOCK(vp, rwflag, NULL);
1056*fca543caSDJ Hoffman 			goto out;
1057*fca543caSDJ Hoffman 		}
1058*fca543caSDJ Hoffman 		if (fileoff >= va.va_size) {
1059*fca543caSDJ Hoffman 			VOP_RWUNLOCK(vp, rwflag, NULL);
1060*fca543caSDJ Hoffman 			count = 0;
1061*fca543caSDJ Hoffman 			goto out;
1062*fca543caSDJ Hoffman 		} else {
1063*fca543caSDJ Hoffman 			VOP_RWUNLOCK(vp, rwflag, NULL);
1064*fca543caSDJ Hoffman 			error = EOVERFLOW;
1065*fca543caSDJ Hoffman 			goto out;
1066*fca543caSDJ Hoffman 		}
1067*fca543caSDJ Hoffman 	}
1068*fca543caSDJ Hoffman 	if ((vp->v_type == VREG) &&
1069*fca543caSDJ Hoffman 	    (fileoff + count > OFFSET_MAX(fp))) {
1070*fca543caSDJ Hoffman 		count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1071*fca543caSDJ Hoffman 	}
1072*fca543caSDJ Hoffman 	auio.uio_loffset = fileoff;
1073*fca543caSDJ Hoffman 	auio.uio_iov = aiov;
1074*fca543caSDJ Hoffman 	auio.uio_iovcnt = iovcnt;
1075*fca543caSDJ Hoffman 	auio.uio_resid = bcount = count;
1076*fca543caSDJ Hoffman 	auio.uio_segflg = UIO_USERSPACE;
1077*fca543caSDJ Hoffman 	auio.uio_llimit = MAXOFFSET_T;
1078*fca543caSDJ Hoffman 	auio.uio_fmode = fflag;
1079*fca543caSDJ Hoffman 	if (bcount <= copyout_max_cached)
1080*fca543caSDJ Hoffman 		auio.uio_extflg = UIO_COPY_CACHED;
1081*fca543caSDJ Hoffman 	else
1082*fca543caSDJ Hoffman 		auio.uio_extflg = UIO_COPY_DEFAULT;
1083*fca543caSDJ Hoffman 
1084*fca543caSDJ Hoffman 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1085*fca543caSDJ Hoffman 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1086*fca543caSDJ Hoffman 	count -= auio.uio_resid;
1087*fca543caSDJ Hoffman 	CPU_STATS_ENTER_K();
1088*fca543caSDJ Hoffman 	cp = CPU;
1089*fca543caSDJ Hoffman 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
1090*fca543caSDJ Hoffman 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1091*fca543caSDJ Hoffman 	CPU_STATS_EXIT_K();
1092*fca543caSDJ Hoffman 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1093*fca543caSDJ Hoffman 
1094*fca543caSDJ Hoffman 	VOP_RWUNLOCK(vp, rwflag, NULL);
1095*fca543caSDJ Hoffman 
1096*fca543caSDJ Hoffman 	if (error == EINTR && count != 0)
1097*fca543caSDJ Hoffman 		error = 0;
1098*fca543caSDJ Hoffman out:
1099*fca543caSDJ Hoffman 	if (in_crit)
1100*fca543caSDJ Hoffman 		nbl_end_crit(vp);
1101*fca543caSDJ Hoffman 	releasef(fdes);
1102*fca543caSDJ Hoffman 	if (error)
1103*fca543caSDJ Hoffman 		return (set_errno(error));
1104*fca543caSDJ Hoffman 	return (count);
1105*fca543caSDJ Hoffman }
1106*fca543caSDJ Hoffman 
1107*fca543caSDJ Hoffman ssize_t
1108*fca543caSDJ Hoffman pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1109*fca543caSDJ Hoffman     off_t extended_offset)
1110*fca543caSDJ Hoffman {
1111*fca543caSDJ Hoffman 	struct uio auio;
1112*fca543caSDJ Hoffman 	struct iovec aiov[DEF_IOV_MAX];
1113*fca543caSDJ Hoffman 	file_t *fp;
1114*fca543caSDJ Hoffman 	register vnode_t *vp;
1115*fca543caSDJ Hoffman 	struct cpu *cp;
1116*fca543caSDJ Hoffman 	int fflag, ioflag, rwflag;
1117*fca543caSDJ Hoffman 	ssize_t count, bcount;
1118*fca543caSDJ Hoffman 	int error = 0;
1119*fca543caSDJ Hoffman 	int i;
1120*fca543caSDJ Hoffman 
1121*fca543caSDJ Hoffman #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1122*fca543caSDJ Hoffman 	u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1123*fca543caSDJ Hoffman 	    (u_offset_t)offset;
1124*fca543caSDJ Hoffman #else /* _SYSCALL32_IMPL || _ILP32 */
1125*fca543caSDJ Hoffman 	u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1126*fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPR || _ILP32 */
1127*fca543caSDJ Hoffman #ifdef _SYSCALL32_IMPL
1128*fca543caSDJ Hoffman 	const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1129*fca543caSDJ Hoffman 	    extended_offset == 0?
1130*fca543caSDJ Hoffman 	    MAXOFF32_T : MAXOFFSET_T;
1131*fca543caSDJ Hoffman #else /* _SYSCALL32_IMPL */
1132*fca543caSDJ Hoffman 	const u_offset_t maxoff = MAXOFF32_T;
1133*fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPL */
1134*fca543caSDJ Hoffman 
1135*fca543caSDJ Hoffman 	int in_crit = 0;
1136*fca543caSDJ Hoffman 
1137*fca543caSDJ Hoffman 	if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
1138*fca543caSDJ Hoffman 		return (set_errno(EINVAL));
1139*fca543caSDJ Hoffman 
1140*fca543caSDJ Hoffman #ifdef _SYSCALL32_IMPL
1141*fca543caSDJ Hoffman 	/*
1142*fca543caSDJ Hoffman 	 * 32-bit callers need to have their iovec expanded,
1143*fca543caSDJ Hoffman 	 * while ensuring that they can't move more than 2Gbytes
1144*fca543caSDJ Hoffman 	 * of data in a single call.
1145*fca543caSDJ Hoffman 	 */
1146*fca543caSDJ Hoffman 	if (get_udatamodel() == DATAMODEL_ILP32) {
1147*fca543caSDJ Hoffman 		struct iovec32 aiov32[DEF_IOV_MAX];
1148*fca543caSDJ Hoffman 		ssize32_t count32;
1149*fca543caSDJ Hoffman 
1150*fca543caSDJ Hoffman 		if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
1151*fca543caSDJ Hoffman 			return (set_errno(EFAULT));
1152*fca543caSDJ Hoffman 
1153*fca543caSDJ Hoffman 		count32 = 0;
1154*fca543caSDJ Hoffman 		for (i = 0; i < iovcnt; i++) {
1155*fca543caSDJ Hoffman 			ssize32_t iovlen32 = aiov32[i].iov_len;
1156*fca543caSDJ Hoffman 			count32 += iovlen32;
1157*fca543caSDJ Hoffman 			if (iovlen32 < 0 || count32 < 0)
1158*fca543caSDJ Hoffman 				return (set_errno(EINVAL));
1159*fca543caSDJ Hoffman 			aiov[i].iov_len = iovlen32;
1160*fca543caSDJ Hoffman 			aiov[i].iov_base =
1161*fca543caSDJ Hoffman 			    (caddr_t)(uintptr_t)aiov32[i].iov_base;
1162*fca543caSDJ Hoffman 		}
1163*fca543caSDJ Hoffman 	} else
1164*fca543caSDJ Hoffman #endif /* _SYSCALL32_IMPL */
1165*fca543caSDJ Hoffman 		if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
1166*fca543caSDJ Hoffman 			return (set_errno(EFAULT));
1167*fca543caSDJ Hoffman 
1168*fca543caSDJ Hoffman 	count = 0;
1169*fca543caSDJ Hoffman 	for (i = 0; i < iovcnt; i++) {
1170*fca543caSDJ Hoffman 		ssize_t iovlen = aiov[i].iov_len;
1171*fca543caSDJ Hoffman 		count += iovlen;
1172*fca543caSDJ Hoffman 		if (iovlen < 0 || count < 0)
1173*fca543caSDJ Hoffman 			return (set_errno(EINVAL));
1174*fca543caSDJ Hoffman 	}
1175*fca543caSDJ Hoffman 
1176*fca543caSDJ Hoffman 	if ((bcount = (ssize_t)count) < 0)
1177*fca543caSDJ Hoffman 		return (set_errno(EINVAL));
1178*fca543caSDJ Hoffman 	if ((fp = getf(fdes)) == NULL)
1179*fca543caSDJ Hoffman 		return (set_errno(EBADF));
1180*fca543caSDJ Hoffman 	if (((fflag = fp->f_flag) & FWRITE) == 0) {
1181*fca543caSDJ Hoffman 		error = EBADF;
1182*fca543caSDJ Hoffman 		goto out;
1183*fca543caSDJ Hoffman 	}
1184*fca543caSDJ Hoffman 	vp = fp->f_vnode;
1185*fca543caSDJ Hoffman 	rwflag = 1;
1186*fca543caSDJ Hoffman 	if (vp->v_type == VREG) {
1187*fca543caSDJ Hoffman 
1188*fca543caSDJ Hoffman 		if (bcount == 0)
1189*fca543caSDJ Hoffman 			goto out;
1190*fca543caSDJ Hoffman 
1191*fca543caSDJ Hoffman 		/*
1192*fca543caSDJ Hoffman 		 * return EINVAL for offsets that cannot be
1193*fca543caSDJ Hoffman 		 * represented in an off_t.
1194*fca543caSDJ Hoffman 		 */
1195*fca543caSDJ Hoffman 		if (fileoff > maxoff) {
1196*fca543caSDJ Hoffman 			error = EINVAL;
1197*fca543caSDJ Hoffman 			goto out;
1198*fca543caSDJ Hoffman 		}
1199*fca543caSDJ Hoffman 		/*
1200*fca543caSDJ Hoffman 		 * Take appropriate action if we are trying
1201*fca543caSDJ Hoffman 		 * to write above the resource limit.
1202*fca543caSDJ Hoffman 		 */
1203*fca543caSDJ Hoffman 		if (fileoff >= curproc->p_fsz_ctl) {
1204*fca543caSDJ Hoffman 			mutex_enter(&curproc->p_lock);
1205*fca543caSDJ Hoffman 			/*
1206*fca543caSDJ Hoffman 			 * Return value ignored because it lists
1207*fca543caSDJ Hoffman 			 * actions taken, but we are in an error case.
1208*fca543caSDJ Hoffman 			 * We don't have any actions that depend on
1209*fca543caSDJ Hoffman 			 * what could happen in this call, so we ignore
1210*fca543caSDJ Hoffman 			 * the return value.
1211*fca543caSDJ Hoffman 			 */
1212*fca543caSDJ Hoffman 			(void) rctl_action(
1213*fca543caSDJ Hoffman 			    rctlproc_legacy[RLIMIT_FSIZE],
1214*fca543caSDJ Hoffman 			    curproc->p_rctls, curproc,
1215*fca543caSDJ Hoffman 			    RCA_UNSAFE_SIGINFO);
1216*fca543caSDJ Hoffman 			mutex_exit(&curproc->p_lock);
1217*fca543caSDJ Hoffman 
1218*fca543caSDJ Hoffman 			error = EFBIG;
1219*fca543caSDJ Hoffman 			goto out;
1220*fca543caSDJ Hoffman 		}
1221*fca543caSDJ Hoffman 		/*
1222*fca543caSDJ Hoffman 		 * Don't allow pwritev to cause file sizes to exceed
1223*fca543caSDJ Hoffman 		 * maxoff.
1224*fca543caSDJ Hoffman 		 */
1225*fca543caSDJ Hoffman 		if (fileoff == maxoff) {
1226*fca543caSDJ Hoffman 			error = EFBIG;
1227*fca543caSDJ Hoffman 			goto out;
1228*fca543caSDJ Hoffman 		}
1229*fca543caSDJ Hoffman 
1230*fca543caSDJ Hoffman 		if (fileoff + bcount > maxoff)
1231*fca543caSDJ Hoffman 			bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1232*fca543caSDJ Hoffman 	} else if (vp->v_type == VFIFO) {
1233*fca543caSDJ Hoffman 		error = ESPIPE;
1234*fca543caSDJ Hoffman 		goto out;
1235*fca543caSDJ Hoffman 	}
1236*fca543caSDJ Hoffman 	/*
1237*fca543caSDJ Hoffman 	 * We have to enter the critical region before calling VOP_RWLOCK
1238*fca543caSDJ Hoffman 	 * to avoid a deadlock with ufs.
1239*fca543caSDJ Hoffman 	 */
1240*fca543caSDJ Hoffman 	if (nbl_need_check(vp)) {
1241*fca543caSDJ Hoffman 		int svmand;
1242*fca543caSDJ Hoffman 
1243*fca543caSDJ Hoffman 		nbl_start_crit(vp, RW_READER);
1244*fca543caSDJ Hoffman 		in_crit = 1;
1245*fca543caSDJ Hoffman 		error = nbl_svmand(vp, fp->f_cred, &svmand);
1246*fca543caSDJ Hoffman 		if (error != 0)
1247*fca543caSDJ Hoffman 			goto out;
1248*fca543caSDJ Hoffman 		if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1249*fca543caSDJ Hoffman 		    NULL)) {
1250*fca543caSDJ Hoffman 			error = EACCES;
1251*fca543caSDJ Hoffman 			goto out;
1252*fca543caSDJ Hoffman 		}
1253*fca543caSDJ Hoffman 	}
1254*fca543caSDJ Hoffman 
1255*fca543caSDJ Hoffman 	(void) VOP_RWLOCK(vp, rwflag, NULL);
1256*fca543caSDJ Hoffman 
1257*fca543caSDJ Hoffman 
1258*fca543caSDJ Hoffman 	/*
1259*fca543caSDJ Hoffman 	 * Behaviour is same as write(2). Please see comments for
1260*fca543caSDJ Hoffman 	 * write(2).
1261*fca543caSDJ Hoffman 	 */
1262*fca543caSDJ Hoffman 
1263*fca543caSDJ Hoffman 	if (vp->v_type == VREG) {
1264*fca543caSDJ Hoffman 		if (fileoff >= curproc->p_fsz_ctl) {
1265*fca543caSDJ Hoffman 			VOP_RWUNLOCK(vp, rwflag, NULL);
1266*fca543caSDJ Hoffman 			mutex_enter(&curproc->p_lock);
1267*fca543caSDJ Hoffman 			/* see above rctl_action comment */
1268*fca543caSDJ Hoffman 			(void) rctl_action(
1269*fca543caSDJ Hoffman 			    rctlproc_legacy[RLIMIT_FSIZE],
1270*fca543caSDJ Hoffman 			    curproc->p_rctls,
1271*fca543caSDJ Hoffman 			    curproc, RCA_UNSAFE_SIGINFO);
1272*fca543caSDJ Hoffman 			mutex_exit(&curproc->p_lock);
1273*fca543caSDJ Hoffman 			error = EFBIG;
1274*fca543caSDJ Hoffman 			goto out;
1275*fca543caSDJ Hoffman 		}
1276*fca543caSDJ Hoffman 		if (fileoff >= OFFSET_MAX(fp)) {
1277*fca543caSDJ Hoffman 			VOP_RWUNLOCK(vp, rwflag, NULL);
1278*fca543caSDJ Hoffman 			error = EFBIG;
1279*fca543caSDJ Hoffman 			goto out;
1280*fca543caSDJ Hoffman 		}
1281*fca543caSDJ Hoffman 		if (fileoff + count > OFFSET_MAX(fp))
1282*fca543caSDJ Hoffman 			count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1283*fca543caSDJ Hoffman 	}
1284*fca543caSDJ Hoffman 
1285*fca543caSDJ Hoffman 	auio.uio_loffset = fileoff;
1286*fca543caSDJ Hoffman 	auio.uio_iov = aiov;
1287*fca543caSDJ Hoffman 	auio.uio_iovcnt = iovcnt;
1288*fca543caSDJ Hoffman 	auio.uio_resid = bcount = count;
1289*fca543caSDJ Hoffman 	auio.uio_segflg = UIO_USERSPACE;
1290*fca543caSDJ Hoffman 	auio.uio_llimit = curproc->p_fsz_ctl;
1291*fca543caSDJ Hoffman 	auio.uio_fmode = fflag;
1292*fca543caSDJ Hoffman 	auio.uio_extflg = UIO_COPY_CACHED;
1293*fca543caSDJ Hoffman 	ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1294*fca543caSDJ Hoffman 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1295*fca543caSDJ Hoffman 	count -= auio.uio_resid;
1296*fca543caSDJ Hoffman 	CPU_STATS_ENTER_K();
1297*fca543caSDJ Hoffman 	cp = CPU;
1298*fca543caSDJ Hoffman 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1299*fca543caSDJ Hoffman 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1300*fca543caSDJ Hoffman 	CPU_STATS_EXIT_K();
1301*fca543caSDJ Hoffman 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1302*fca543caSDJ Hoffman 
1303*fca543caSDJ Hoffman 	VOP_RWUNLOCK(vp, rwflag, NULL);
1304*fca543caSDJ Hoffman 
1305*fca543caSDJ Hoffman 	if (error == EINTR && count != 0)
1306*fca543caSDJ Hoffman 		error = 0;
1307*fca543caSDJ Hoffman out:
1308*fca543caSDJ Hoffman 	if (in_crit)
1309*fca543caSDJ Hoffman 		nbl_end_crit(vp);
1310*fca543caSDJ Hoffman 	releasef(fdes);
1311*fca543caSDJ Hoffman 	if (error)
1312*fca543caSDJ Hoffman 		return (set_errno(error));
1313*fca543caSDJ Hoffman 	return (count);
1314*fca543caSDJ Hoffman }
1315*fca543caSDJ Hoffman 
13167c478bd9Sstevel@tonic-gate #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
13177c478bd9Sstevel@tonic-gate 
13187c478bd9Sstevel@tonic-gate /*
13197c478bd9Sstevel@tonic-gate  * This syscall supplies 64-bit file offsets to 32-bit applications only.
13207c478bd9Sstevel@tonic-gate  */
13217c478bd9Sstevel@tonic-gate ssize32_t
13227c478bd9Sstevel@tonic-gate pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
13237c478bd9Sstevel@tonic-gate     uint32_t offset_2)
13247c478bd9Sstevel@tonic-gate {
13257c478bd9Sstevel@tonic-gate 	struct uio auio;
13267c478bd9Sstevel@tonic-gate 	struct iovec aiov;
13277c478bd9Sstevel@tonic-gate 	file_t *fp;
13287c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
13297c478bd9Sstevel@tonic-gate 	struct cpu *cp;
13307c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
13317c478bd9Sstevel@tonic-gate 	ssize_t bcount;
13327c478bd9Sstevel@tonic-gate 	int error = 0;
13337c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
13347c478bd9Sstevel@tonic-gate 	int in_crit = 0;
13357c478bd9Sstevel@tonic-gate 
13367c478bd9Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
13377c478bd9Sstevel@tonic-gate 	fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
13387c478bd9Sstevel@tonic-gate #else
13397c478bd9Sstevel@tonic-gate 	fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
13407c478bd9Sstevel@tonic-gate #endif
13417c478bd9Sstevel@tonic-gate 
13427c478bd9Sstevel@tonic-gate 	if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
13437c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
13447c478bd9Sstevel@tonic-gate 
13457c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
13467c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
13477c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & (FREAD)) == 0) {
13487c478bd9Sstevel@tonic-gate 		error = EBADF;
13497c478bd9Sstevel@tonic-gate 		goto out;
13507c478bd9Sstevel@tonic-gate 	}
13517c478bd9Sstevel@tonic-gate 
13527c478bd9Sstevel@tonic-gate 	rwflag = 0;
13537c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
13547c478bd9Sstevel@tonic-gate 
13557c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
13567c478bd9Sstevel@tonic-gate 
13577c478bd9Sstevel@tonic-gate 		if (bcount == 0)
13587c478bd9Sstevel@tonic-gate 			goto out;
13597c478bd9Sstevel@tonic-gate 
13607c478bd9Sstevel@tonic-gate 		/*
13617c478bd9Sstevel@tonic-gate 		 * Same as pread. See comments in pread.
13627c478bd9Sstevel@tonic-gate 		 */
13637c478bd9Sstevel@tonic-gate 
13647c478bd9Sstevel@tonic-gate 		if (fileoff > MAXOFFSET_T) {
13657c478bd9Sstevel@tonic-gate 			error = EINVAL;
13667c478bd9Sstevel@tonic-gate 			goto out;
13677c478bd9Sstevel@tonic-gate 		}
13687c478bd9Sstevel@tonic-gate 		if (fileoff + bcount > MAXOFFSET_T)
13697c478bd9Sstevel@tonic-gate 			bcount = (ssize_t)(MAXOFFSET_T - fileoff);
13707c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VFIFO) {
13717c478bd9Sstevel@tonic-gate 		error = ESPIPE;
13727c478bd9Sstevel@tonic-gate 		goto out;
13737c478bd9Sstevel@tonic-gate 	}
13747c478bd9Sstevel@tonic-gate 
13757c478bd9Sstevel@tonic-gate 	/*
13767c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
13777c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
13787c478bd9Sstevel@tonic-gate 	 */
13797c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
13807c478bd9Sstevel@tonic-gate 		int svmand;
13817c478bd9Sstevel@tonic-gate 
13827c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
13837c478bd9Sstevel@tonic-gate 		in_crit = 1;
13847c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
13857c478bd9Sstevel@tonic-gate 		if (error != 0)
13867c478bd9Sstevel@tonic-gate 			goto out;
1387da6c28aaSamw 		if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
1388da6c28aaSamw 		    NULL)) {
13897c478bd9Sstevel@tonic-gate 			error = EACCES;
13907c478bd9Sstevel@tonic-gate 			goto out;
13917c478bd9Sstevel@tonic-gate 		}
13927c478bd9Sstevel@tonic-gate 	}
13937c478bd9Sstevel@tonic-gate 
13947c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
13957c478bd9Sstevel@tonic-gate 	aiov.iov_len = bcount;
13967c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
13977c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
13987c478bd9Sstevel@tonic-gate 
13997c478bd9Sstevel@tonic-gate 	/*
14007c478bd9Sstevel@tonic-gate 	 * Note: File size can never be greater than MAXOFFSET_T.
14017c478bd9Sstevel@tonic-gate 	 * If ever we start supporting 128 bit files the code
14027c478bd9Sstevel@tonic-gate 	 * similar to the one in pread at this place should be here.
14037c478bd9Sstevel@tonic-gate 	 * Here we avoid the unnecessary VOP_GETATTR() when we
14047c478bd9Sstevel@tonic-gate 	 * know that fileoff == MAXOFFSET_T implies that it is always
14057c478bd9Sstevel@tonic-gate 	 * greater than or equal to file size.
14067c478bd9Sstevel@tonic-gate 	 */
14077c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
14087c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
14097c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount;
14107c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
14117c478bd9Sstevel@tonic-gate 	auio.uio_llimit = MAXOFFSET_T;
14127c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
14137c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_CACHED;
14147c478bd9Sstevel@tonic-gate 
14157c478bd9Sstevel@tonic-gate 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
14167c478bd9Sstevel@tonic-gate 
14177c478bd9Sstevel@tonic-gate 	/* If read sync is not asked for, filter sync flags */
14187c478bd9Sstevel@tonic-gate 	if ((ioflag & FRSYNC) == 0)
14197c478bd9Sstevel@tonic-gate 		ioflag &= ~(FSYNC|FDSYNC);
14207c478bd9Sstevel@tonic-gate 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
14217c478bd9Sstevel@tonic-gate 	bcount -= auio.uio_resid;
14227c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
14237c478bd9Sstevel@tonic-gate 	cp = CPU;
14247c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, sysread, 1);
14257c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
14267c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
14277c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
14287c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
14297c478bd9Sstevel@tonic-gate 
14307c478bd9Sstevel@tonic-gate 	if (error == EINTR && bcount != 0)
14317c478bd9Sstevel@tonic-gate 		error = 0;
14327c478bd9Sstevel@tonic-gate out:
14337c478bd9Sstevel@tonic-gate 	if (in_crit)
14347c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
14357c478bd9Sstevel@tonic-gate 	releasef(fdes);
14367c478bd9Sstevel@tonic-gate 	if (error)
14377c478bd9Sstevel@tonic-gate 		return (set_errno(error));
14387c478bd9Sstevel@tonic-gate 	return (bcount);
14397c478bd9Sstevel@tonic-gate }
14407c478bd9Sstevel@tonic-gate 
14417c478bd9Sstevel@tonic-gate /*
14427c478bd9Sstevel@tonic-gate  * This syscall supplies 64-bit file offsets to 32-bit applications only.
14437c478bd9Sstevel@tonic-gate  */
14447c478bd9Sstevel@tonic-gate ssize32_t
14457c478bd9Sstevel@tonic-gate pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
14467c478bd9Sstevel@tonic-gate     uint32_t offset_2)
14477c478bd9Sstevel@tonic-gate {
14487c478bd9Sstevel@tonic-gate 	struct uio auio;
14497c478bd9Sstevel@tonic-gate 	struct iovec aiov;
14507c478bd9Sstevel@tonic-gate 	file_t *fp;
14517c478bd9Sstevel@tonic-gate 	register vnode_t *vp;
14527c478bd9Sstevel@tonic-gate 	struct cpu *cp;
14537c478bd9Sstevel@tonic-gate 	int fflag, ioflag, rwflag;
14547c478bd9Sstevel@tonic-gate 	ssize_t bcount;
14557c478bd9Sstevel@tonic-gate 	int error = 0;
14567c478bd9Sstevel@tonic-gate 	u_offset_t fileoff;
14577c478bd9Sstevel@tonic-gate 	int in_crit = 0;
14587c478bd9Sstevel@tonic-gate 
14597c478bd9Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
14607c478bd9Sstevel@tonic-gate 	fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
14617c478bd9Sstevel@tonic-gate #else
14627c478bd9Sstevel@tonic-gate 	fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
14637c478bd9Sstevel@tonic-gate #endif
14647c478bd9Sstevel@tonic-gate 
14657c478bd9Sstevel@tonic-gate 	if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
14667c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
14677c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL)
14687c478bd9Sstevel@tonic-gate 		return (set_errno(EBADF));
14697c478bd9Sstevel@tonic-gate 	if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
14707c478bd9Sstevel@tonic-gate 		error = EBADF;
14717c478bd9Sstevel@tonic-gate 		goto out;
14727c478bd9Sstevel@tonic-gate 	}
14737c478bd9Sstevel@tonic-gate 
14747c478bd9Sstevel@tonic-gate 	rwflag = 1;
14757c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
14767c478bd9Sstevel@tonic-gate 
14777c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG) {
14787c478bd9Sstevel@tonic-gate 
14797c478bd9Sstevel@tonic-gate 		if (bcount == 0)
14807c478bd9Sstevel@tonic-gate 			goto out;
14817c478bd9Sstevel@tonic-gate 
14827c478bd9Sstevel@tonic-gate 		/*
14837c478bd9Sstevel@tonic-gate 		 * See comments in pwrite.
14847c478bd9Sstevel@tonic-gate 		 */
14857c478bd9Sstevel@tonic-gate 		if (fileoff > MAXOFFSET_T) {
14867c478bd9Sstevel@tonic-gate 			error = EINVAL;
14877c478bd9Sstevel@tonic-gate 			goto out;
14887c478bd9Sstevel@tonic-gate 		}
14897c478bd9Sstevel@tonic-gate 		if (fileoff >= curproc->p_fsz_ctl) {
14907c478bd9Sstevel@tonic-gate 			mutex_enter(&curproc->p_lock);
14917c478bd9Sstevel@tonic-gate 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
14927c478bd9Sstevel@tonic-gate 			    curproc->p_rctls, curproc, RCA_SAFE);
14937c478bd9Sstevel@tonic-gate 			mutex_exit(&curproc->p_lock);
14947c478bd9Sstevel@tonic-gate 			error = EFBIG;
14957c478bd9Sstevel@tonic-gate 			goto out;
14967c478bd9Sstevel@tonic-gate 		}
14977c478bd9Sstevel@tonic-gate 		if (fileoff == MAXOFFSET_T) {
14987c478bd9Sstevel@tonic-gate 			error = EFBIG;
14997c478bd9Sstevel@tonic-gate 			goto out;
15007c478bd9Sstevel@tonic-gate 		}
15017c478bd9Sstevel@tonic-gate 		if (fileoff + bcount > MAXOFFSET_T)
15027c478bd9Sstevel@tonic-gate 			bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
15037c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VFIFO) {
15047c478bd9Sstevel@tonic-gate 		error = ESPIPE;
15057c478bd9Sstevel@tonic-gate 		goto out;
15067c478bd9Sstevel@tonic-gate 	}
15077c478bd9Sstevel@tonic-gate 
15087c478bd9Sstevel@tonic-gate 	/*
15097c478bd9Sstevel@tonic-gate 	 * We have to enter the critical region before calling VOP_RWLOCK
15107c478bd9Sstevel@tonic-gate 	 * to avoid a deadlock with ufs.
15117c478bd9Sstevel@tonic-gate 	 */
15127c478bd9Sstevel@tonic-gate 	if (nbl_need_check(vp)) {
15137c478bd9Sstevel@tonic-gate 		int svmand;
15147c478bd9Sstevel@tonic-gate 
15157c478bd9Sstevel@tonic-gate 		nbl_start_crit(vp, RW_READER);
15167c478bd9Sstevel@tonic-gate 		in_crit = 1;
15177c478bd9Sstevel@tonic-gate 		error = nbl_svmand(vp, fp->f_cred, &svmand);
15187c478bd9Sstevel@tonic-gate 		if (error != 0)
15197c478bd9Sstevel@tonic-gate 			goto out;
1520da6c28aaSamw 		if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
1521da6c28aaSamw 		    NULL)) {
15227c478bd9Sstevel@tonic-gate 			error = EACCES;
15237c478bd9Sstevel@tonic-gate 			goto out;
15247c478bd9Sstevel@tonic-gate 		}
15257c478bd9Sstevel@tonic-gate 	}
15267c478bd9Sstevel@tonic-gate 
15277c478bd9Sstevel@tonic-gate 	aiov.iov_base = cbuf;
15287c478bd9Sstevel@tonic-gate 	aiov.iov_len = bcount;
15297c478bd9Sstevel@tonic-gate 	(void) VOP_RWLOCK(vp, rwflag, NULL);
15307c478bd9Sstevel@tonic-gate 	auio.uio_loffset = fileoff;
15317c478bd9Sstevel@tonic-gate 	auio.uio_iov = &aiov;
15327c478bd9Sstevel@tonic-gate 	auio.uio_iovcnt = 1;
15337c478bd9Sstevel@tonic-gate 	auio.uio_resid = bcount;
15347c478bd9Sstevel@tonic-gate 	auio.uio_segflg = UIO_USERSPACE;
15357c478bd9Sstevel@tonic-gate 	auio.uio_llimit = curproc->p_fsz_ctl;
15367c478bd9Sstevel@tonic-gate 	auio.uio_fmode = fflag;
15377c478bd9Sstevel@tonic-gate 	auio.uio_extflg = UIO_COPY_CACHED;
15387c478bd9Sstevel@tonic-gate 
15394d86dd30Sraf 	/*
15404d86dd30Sraf 	 * The SUSv4 POSIX specification states:
15414d86dd30Sraf 	 *	The pwrite() function shall be equivalent to write(), except
15424d86dd30Sraf 	 *	that it writes into a given position and does not change
15434d86dd30Sraf 	 *	the file offset (regardless of whether O_APPEND is set).
15444d86dd30Sraf 	 * To make this be true, we omit the FAPPEND flag from ioflag.
15454d86dd30Sraf 	 */
15464d86dd30Sraf 	ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
15477c478bd9Sstevel@tonic-gate 
15487c478bd9Sstevel@tonic-gate 	error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
15497c478bd9Sstevel@tonic-gate 	bcount -= auio.uio_resid;
15507c478bd9Sstevel@tonic-gate 	CPU_STATS_ENTER_K();
15517c478bd9Sstevel@tonic-gate 	cp = CPU;
15527c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, syswrite, 1);
15537c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
15547c478bd9Sstevel@tonic-gate 	CPU_STATS_EXIT_K();
15557c478bd9Sstevel@tonic-gate 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
15567c478bd9Sstevel@tonic-gate 	VOP_RWUNLOCK(vp, rwflag, NULL);
15577c478bd9Sstevel@tonic-gate 
15587c478bd9Sstevel@tonic-gate 	if (error == EINTR && bcount != 0)
15597c478bd9Sstevel@tonic-gate 		error = 0;
15607c478bd9Sstevel@tonic-gate out:
15617c478bd9Sstevel@tonic-gate 	if (in_crit)
15627c478bd9Sstevel@tonic-gate 		nbl_end_crit(vp);
15637c478bd9Sstevel@tonic-gate 	releasef(fdes);
15647c478bd9Sstevel@tonic-gate 	if (error)
15657c478bd9Sstevel@tonic-gate 		return (set_errno(error));
15667c478bd9Sstevel@tonic-gate 	return (bcount);
15677c478bd9Sstevel@tonic-gate }
15687c478bd9Sstevel@tonic-gate 
15697c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL || _ILP32 */
15707c478bd9Sstevel@tonic-gate 
15717c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
15727c478bd9Sstevel@tonic-gate /*
15737c478bd9Sstevel@tonic-gate  * Tail-call elimination of xxx32() down to xxx()
15747c478bd9Sstevel@tonic-gate  *
15757c478bd9Sstevel@tonic-gate  * A number of xxx32 system calls take a len (or count) argument and
15767c478bd9Sstevel@tonic-gate  * return a number in the range [0,len] or -1 on error.
15777c478bd9Sstevel@tonic-gate  * Given an ssize32_t input len, the downcall xxx() will return
15787c478bd9Sstevel@tonic-gate  * a 64-bit value that is -1 or in the range [0,len] which actually
15797c478bd9Sstevel@tonic-gate  * is a proper return value for the xxx32 call. So even if the xxx32
15807c478bd9Sstevel@tonic-gate  * calls can be considered as returning a ssize32_t, they are currently
15817c478bd9Sstevel@tonic-gate  * declared as returning a ssize_t as this enables tail-call elimination.
15827c478bd9Sstevel@tonic-gate  *
15837c478bd9Sstevel@tonic-gate  * The cast of len (or count) to ssize32_t is needed to ensure we pass
15847c478bd9Sstevel@tonic-gate  * down negative input values as such and let the downcall handle error
15857c478bd9Sstevel@tonic-gate  * reporting. Functions covered by this comments are:
15867c478bd9Sstevel@tonic-gate  *
15877c478bd9Sstevel@tonic-gate  * rw.c:           read32, write32, pread32, pwrite32, readv32, writev32.
15887c478bd9Sstevel@tonic-gate  * socksyscall.c:  recv32, recvfrom32, send32, sendto32.
15897c478bd9Sstevel@tonic-gate  * readlink.c:     readlink32.
15907c478bd9Sstevel@tonic-gate  */
15917c478bd9Sstevel@tonic-gate 
15927c478bd9Sstevel@tonic-gate ssize_t
15937c478bd9Sstevel@tonic-gate read32(int32_t fdes, caddr32_t cbuf, size32_t count)
15947c478bd9Sstevel@tonic-gate {
15957c478bd9Sstevel@tonic-gate 	return (read(fdes,
15967c478bd9Sstevel@tonic-gate 	    (void *)(uintptr_t)cbuf, (ssize32_t)count));
15977c478bd9Sstevel@tonic-gate }
15987c478bd9Sstevel@tonic-gate 
15997c478bd9Sstevel@tonic-gate ssize_t
16007c478bd9Sstevel@tonic-gate write32(int32_t fdes, caddr32_t cbuf, size32_t count)
16017c478bd9Sstevel@tonic-gate {
16027c478bd9Sstevel@tonic-gate 	return (write(fdes,
16037c478bd9Sstevel@tonic-gate 	    (void *)(uintptr_t)cbuf, (ssize32_t)count));
16047c478bd9Sstevel@tonic-gate }
16057c478bd9Sstevel@tonic-gate 
16067c478bd9Sstevel@tonic-gate ssize_t
16077c478bd9Sstevel@tonic-gate pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
16087c478bd9Sstevel@tonic-gate {
16097c478bd9Sstevel@tonic-gate 	return (pread(fdes,
16107c478bd9Sstevel@tonic-gate 	    (void *)(uintptr_t)cbuf, (ssize32_t)count,
16117c478bd9Sstevel@tonic-gate 	    (off_t)(uint32_t)offset));
16127c478bd9Sstevel@tonic-gate }
16137c478bd9Sstevel@tonic-gate 
16147c478bd9Sstevel@tonic-gate ssize_t
16157c478bd9Sstevel@tonic-gate pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
16167c478bd9Sstevel@tonic-gate {
16177c478bd9Sstevel@tonic-gate 	return (pwrite(fdes,
16187c478bd9Sstevel@tonic-gate 	    (void *)(uintptr_t)cbuf, (ssize32_t)count,
16197c478bd9Sstevel@tonic-gate 	    (off_t)(uint32_t)offset));
16207c478bd9Sstevel@tonic-gate }
16217c478bd9Sstevel@tonic-gate 
16227c478bd9Sstevel@tonic-gate ssize_t
16237c478bd9Sstevel@tonic-gate readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
16247c478bd9Sstevel@tonic-gate {
16257c478bd9Sstevel@tonic-gate 	return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
16267c478bd9Sstevel@tonic-gate }
16277c478bd9Sstevel@tonic-gate 
16287c478bd9Sstevel@tonic-gate ssize_t
16297c478bd9Sstevel@tonic-gate writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
16307c478bd9Sstevel@tonic-gate {
16317c478bd9Sstevel@tonic-gate 	return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
16327c478bd9Sstevel@tonic-gate }
16337c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
1634