1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 29*7c478bd9Sstevel@tonic-gate 30*7c478bd9Sstevel@tonic-gate /* 31*7c478bd9Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 32*7c478bd9Sstevel@tonic-gate * under license from the Regents of the University of California. 33*7c478bd9Sstevel@tonic-gate */ 34*7c478bd9Sstevel@tonic-gate 35*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 36*7c478bd9Sstevel@tonic-gate 37*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/inttypes.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/cred.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/user.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/errno.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/file.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/proc.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 50*7c478bd9Sstevel@tonic-gate #include <sys/uio.h> 51*7c478bd9Sstevel@tonic-gate #include <sys/ioreq.h> 52*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 53*7c478bd9Sstevel@tonic-gate #include <sys/rctl.h> 54*7c478bd9Sstevel@tonic-gate #include <sys/nbmlock.h> 55*7c478bd9Sstevel@tonic-gate 56*7c478bd9Sstevel@tonic-gate #define COPYOUT_MIN_SIZE (1<<17) /* 128K */ 57*7c478bd9Sstevel@tonic-gate 58*7c478bd9Sstevel@tonic-gate static size_t copyout_min_size = COPYOUT_MIN_SIZE; 59*7c478bd9Sstevel@tonic-gate 60*7c478bd9Sstevel@tonic-gate /* 61*7c478bd9Sstevel@tonic-gate * read, write, pread, pwrite, readv, and writev syscalls. 62*7c478bd9Sstevel@tonic-gate * 63*7c478bd9Sstevel@tonic-gate * 64-bit open: all open's are large file opens. 64*7c478bd9Sstevel@tonic-gate * Large Files: the behaviour of read depends on whether the fd 65*7c478bd9Sstevel@tonic-gate * corresponds to large open or not. 66*7c478bd9Sstevel@tonic-gate * 32-bit open: FOFFMAX flag not set. 67*7c478bd9Sstevel@tonic-gate * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 68*7c478bd9Sstevel@tonic-gate * EOVERFLOW if count is non-zero and if size of file 69*7c478bd9Sstevel@tonic-gate * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 70*7c478bd9Sstevel@tonic-gate * at >= MAXOFF32_T returns EOF. 71*7c478bd9Sstevel@tonic-gate */ 72*7c478bd9Sstevel@tonic-gate 73*7c478bd9Sstevel@tonic-gate /* 74*7c478bd9Sstevel@tonic-gate * Native system call 75*7c478bd9Sstevel@tonic-gate */ 76*7c478bd9Sstevel@tonic-gate ssize_t 77*7c478bd9Sstevel@tonic-gate read(int fdes, void *cbuf, size_t count) 78*7c478bd9Sstevel@tonic-gate { 79*7c478bd9Sstevel@tonic-gate struct uio auio; 80*7c478bd9Sstevel@tonic-gate struct iovec aiov; 81*7c478bd9Sstevel@tonic-gate file_t *fp; 82*7c478bd9Sstevel@tonic-gate register vnode_t *vp; 83*7c478bd9Sstevel@tonic-gate struct cpu *cp; 84*7c478bd9Sstevel@tonic-gate int fflag, ioflag, rwflag; 85*7c478bd9Sstevel@tonic-gate ssize_t cnt, bcount; 86*7c478bd9Sstevel@tonic-gate int error = 0; 87*7c478bd9Sstevel@tonic-gate u_offset_t fileoff; 88*7c478bd9Sstevel@tonic-gate int in_crit = 0; 89*7c478bd9Sstevel@tonic-gate 90*7c478bd9Sstevel@tonic-gate if ((cnt = (ssize_t)count) < 0) 91*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 92*7c478bd9Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) 93*7c478bd9Sstevel@tonic-gate return (set_errno(EBADF)); 94*7c478bd9Sstevel@tonic-gate if (((fflag = fp->f_flag) & FREAD) == 0) { 95*7c478bd9Sstevel@tonic-gate error = EBADF; 96*7c478bd9Sstevel@tonic-gate goto out; 97*7c478bd9Sstevel@tonic-gate } 98*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 99*7c478bd9Sstevel@tonic-gate 100*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG && cnt == 0) { 101*7c478bd9Sstevel@tonic-gate goto out; 102*7c478bd9Sstevel@tonic-gate } 103*7c478bd9Sstevel@tonic-gate 104*7c478bd9Sstevel@tonic-gate rwflag = 0; 105*7c478bd9Sstevel@tonic-gate aiov.iov_base = cbuf; 106*7c478bd9Sstevel@tonic-gate aiov.iov_len = cnt; 107*7c478bd9Sstevel@tonic-gate 108*7c478bd9Sstevel@tonic-gate /* 109*7c478bd9Sstevel@tonic-gate * We have to enter the critical region before calling VOP_RWLOCK 110*7c478bd9Sstevel@tonic-gate * to avoid a deadlock with write() calls. 111*7c478bd9Sstevel@tonic-gate */ 112*7c478bd9Sstevel@tonic-gate if (nbl_need_check(vp)) { 113*7c478bd9Sstevel@tonic-gate int svmand; 114*7c478bd9Sstevel@tonic-gate 115*7c478bd9Sstevel@tonic-gate nbl_start_crit(vp, RW_READER); 116*7c478bd9Sstevel@tonic-gate in_crit = 1; 117*7c478bd9Sstevel@tonic-gate error = nbl_svmand(vp, fp->f_cred, &svmand); 118*7c478bd9Sstevel@tonic-gate if (error != 0) 119*7c478bd9Sstevel@tonic-gate goto out; 120*7c478bd9Sstevel@tonic-gate if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand)) { 121*7c478bd9Sstevel@tonic-gate error = EACCES; 122*7c478bd9Sstevel@tonic-gate goto out; 123*7c478bd9Sstevel@tonic-gate } 124*7c478bd9Sstevel@tonic-gate } 125*7c478bd9Sstevel@tonic-gate 126*7c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, rwflag, NULL); 127*7c478bd9Sstevel@tonic-gate 128*7c478bd9Sstevel@tonic-gate /* 129*7c478bd9Sstevel@tonic-gate * We do the following checks inside VOP_RWLOCK so as to 130*7c478bd9Sstevel@tonic-gate * prevent file size from changing while these checks are 131*7c478bd9Sstevel@tonic-gate * being done. Also, we load fp's offset to the local 132*7c478bd9Sstevel@tonic-gate * variable fileoff because we can have a parallel lseek 133*7c478bd9Sstevel@tonic-gate * going on (f_offset is not protected by any lock) which 134*7c478bd9Sstevel@tonic-gate * could change f_offset. We need to see the value only 135*7c478bd9Sstevel@tonic-gate * once here and take a decision. Seeing it more than once 136*7c478bd9Sstevel@tonic-gate * can lead to incorrect functionality. 137*7c478bd9Sstevel@tonic-gate */ 138*7c478bd9Sstevel@tonic-gate 139*7c478bd9Sstevel@tonic-gate fileoff = (u_offset_t)fp->f_offset; 140*7c478bd9Sstevel@tonic-gate if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 141*7c478bd9Sstevel@tonic-gate struct vattr va; 142*7c478bd9Sstevel@tonic-gate va.va_mask = AT_SIZE; 143*7c478bd9Sstevel@tonic-gate if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 144*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 145*7c478bd9Sstevel@tonic-gate goto out; 146*7c478bd9Sstevel@tonic-gate } 147*7c478bd9Sstevel@tonic-gate if (fileoff >= va.va_size) { 148*7c478bd9Sstevel@tonic-gate cnt = 0; 149*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 150*7c478bd9Sstevel@tonic-gate goto out; 151*7c478bd9Sstevel@tonic-gate } else { 152*7c478bd9Sstevel@tonic-gate error = EOVERFLOW; 153*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 154*7c478bd9Sstevel@tonic-gate goto out; 155*7c478bd9Sstevel@tonic-gate } 156*7c478bd9Sstevel@tonic-gate } 157*7c478bd9Sstevel@tonic-gate if ((vp->v_type == VREG) && 158*7c478bd9Sstevel@tonic-gate (fileoff + cnt > OFFSET_MAX(fp))) { 159*7c478bd9Sstevel@tonic-gate cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 160*7c478bd9Sstevel@tonic-gate } 161*7c478bd9Sstevel@tonic-gate auio.uio_loffset = fileoff; 162*7c478bd9Sstevel@tonic-gate auio.uio_iov = &aiov; 163*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = 1; 164*7c478bd9Sstevel@tonic-gate auio.uio_resid = bcount = cnt; 165*7c478bd9Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 166*7c478bd9Sstevel@tonic-gate auio.uio_llimit = MAXOFFSET_T; 167*7c478bd9Sstevel@tonic-gate auio.uio_fmode = fflag; 168*7c478bd9Sstevel@tonic-gate /* 169*7c478bd9Sstevel@tonic-gate * Only use bypass caches when the count is large enough 170*7c478bd9Sstevel@tonic-gate */ 171*7c478bd9Sstevel@tonic-gate if (bcount < copyout_min_size) 172*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_CACHED; 173*7c478bd9Sstevel@tonic-gate else 174*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_DEFAULT; 175*7c478bd9Sstevel@tonic-gate 176*7c478bd9Sstevel@tonic-gate ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 177*7c478bd9Sstevel@tonic-gate 178*7c478bd9Sstevel@tonic-gate /* If read sync is not asked for, filter sync flags */ 179*7c478bd9Sstevel@tonic-gate if ((ioflag & FRSYNC) == 0) 180*7c478bd9Sstevel@tonic-gate ioflag &= ~(FSYNC|FDSYNC); 181*7c478bd9Sstevel@tonic-gate error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 182*7c478bd9Sstevel@tonic-gate cnt -= auio.uio_resid; 183*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 184*7c478bd9Sstevel@tonic-gate cp = CPU; 185*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, sysread, 1); 186*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 187*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 188*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 189*7c478bd9Sstevel@tonic-gate 190*7c478bd9Sstevel@tonic-gate if (vp->v_type == VFIFO) /* Backward compatibility */ 191*7c478bd9Sstevel@tonic-gate fp->f_offset = cnt; 192*7c478bd9Sstevel@tonic-gate else if (((fp->f_flag & FAPPEND) == 0) || 193*7c478bd9Sstevel@tonic-gate (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 194*7c478bd9Sstevel@tonic-gate fp->f_offset = auio.uio_loffset; 195*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 196*7c478bd9Sstevel@tonic-gate 197*7c478bd9Sstevel@tonic-gate if (error == EINTR && cnt != 0) 198*7c478bd9Sstevel@tonic-gate error = 0; 199*7c478bd9Sstevel@tonic-gate out: 200*7c478bd9Sstevel@tonic-gate if (in_crit) 201*7c478bd9Sstevel@tonic-gate nbl_end_crit(vp); 202*7c478bd9Sstevel@tonic-gate releasef(fdes); 203*7c478bd9Sstevel@tonic-gate if (error) 204*7c478bd9Sstevel@tonic-gate return (set_errno(error)); 205*7c478bd9Sstevel@tonic-gate return (cnt); 206*7c478bd9Sstevel@tonic-gate } 207*7c478bd9Sstevel@tonic-gate 208*7c478bd9Sstevel@tonic-gate /* 209*7c478bd9Sstevel@tonic-gate * Native system call 210*7c478bd9Sstevel@tonic-gate */ 211*7c478bd9Sstevel@tonic-gate ssize_t 212*7c478bd9Sstevel@tonic-gate write(int fdes, void *cbuf, size_t count) 213*7c478bd9Sstevel@tonic-gate { 214*7c478bd9Sstevel@tonic-gate struct uio auio; 215*7c478bd9Sstevel@tonic-gate struct iovec aiov; 216*7c478bd9Sstevel@tonic-gate file_t *fp; 217*7c478bd9Sstevel@tonic-gate register vnode_t *vp; 218*7c478bd9Sstevel@tonic-gate struct cpu *cp; 219*7c478bd9Sstevel@tonic-gate int fflag, ioflag, rwflag; 220*7c478bd9Sstevel@tonic-gate ssize_t cnt, bcount; 221*7c478bd9Sstevel@tonic-gate int error = 0; 222*7c478bd9Sstevel@tonic-gate u_offset_t fileoff; 223*7c478bd9Sstevel@tonic-gate int in_crit = 0; 224*7c478bd9Sstevel@tonic-gate 225*7c478bd9Sstevel@tonic-gate if ((cnt = (ssize_t)count) < 0) 226*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 227*7c478bd9Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) 228*7c478bd9Sstevel@tonic-gate return (set_errno(EBADF)); 229*7c478bd9Sstevel@tonic-gate if (((fflag = fp->f_flag) & FWRITE) == 0) { 230*7c478bd9Sstevel@tonic-gate error = EBADF; 231*7c478bd9Sstevel@tonic-gate goto out; 232*7c478bd9Sstevel@tonic-gate } 233*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 234*7c478bd9Sstevel@tonic-gate 235*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG && cnt == 0) { 236*7c478bd9Sstevel@tonic-gate goto out; 237*7c478bd9Sstevel@tonic-gate } 238*7c478bd9Sstevel@tonic-gate 239*7c478bd9Sstevel@tonic-gate rwflag = 1; 240*7c478bd9Sstevel@tonic-gate aiov.iov_base = cbuf; 241*7c478bd9Sstevel@tonic-gate aiov.iov_len = cnt; 242*7c478bd9Sstevel@tonic-gate 243*7c478bd9Sstevel@tonic-gate /* 244*7c478bd9Sstevel@tonic-gate * We have to enter the critical region before calling VOP_RWLOCK 245*7c478bd9Sstevel@tonic-gate * to avoid a deadlock with ufs. 246*7c478bd9Sstevel@tonic-gate */ 247*7c478bd9Sstevel@tonic-gate if (nbl_need_check(vp)) { 248*7c478bd9Sstevel@tonic-gate int svmand; 249*7c478bd9Sstevel@tonic-gate 250*7c478bd9Sstevel@tonic-gate nbl_start_crit(vp, RW_READER); 251*7c478bd9Sstevel@tonic-gate in_crit = 1; 252*7c478bd9Sstevel@tonic-gate error = nbl_svmand(vp, fp->f_cred, &svmand); 253*7c478bd9Sstevel@tonic-gate if (error != 0) 254*7c478bd9Sstevel@tonic-gate goto out; 255*7c478bd9Sstevel@tonic-gate if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand)) { 256*7c478bd9Sstevel@tonic-gate error = EACCES; 257*7c478bd9Sstevel@tonic-gate goto out; 258*7c478bd9Sstevel@tonic-gate } 259*7c478bd9Sstevel@tonic-gate } 260*7c478bd9Sstevel@tonic-gate 261*7c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, rwflag, NULL); 262*7c478bd9Sstevel@tonic-gate 263*7c478bd9Sstevel@tonic-gate fileoff = fp->f_offset; 264*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG) { 265*7c478bd9Sstevel@tonic-gate 266*7c478bd9Sstevel@tonic-gate /* 267*7c478bd9Sstevel@tonic-gate * We raise psignal if write for >0 bytes causes 268*7c478bd9Sstevel@tonic-gate * it to exceed the ulimit. 269*7c478bd9Sstevel@tonic-gate */ 270*7c478bd9Sstevel@tonic-gate if (fileoff >= curproc->p_fsz_ctl) { 271*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 272*7c478bd9Sstevel@tonic-gate 273*7c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 274*7c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 275*7c478bd9Sstevel@tonic-gate curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 276*7c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 277*7c478bd9Sstevel@tonic-gate 278*7c478bd9Sstevel@tonic-gate error = EFBIG; 279*7c478bd9Sstevel@tonic-gate goto out; 280*7c478bd9Sstevel@tonic-gate } 281*7c478bd9Sstevel@tonic-gate /* 282*7c478bd9Sstevel@tonic-gate * We return EFBIG if write is done at an offset 283*7c478bd9Sstevel@tonic-gate * greater than the offset maximum for this file structure. 284*7c478bd9Sstevel@tonic-gate */ 285*7c478bd9Sstevel@tonic-gate 286*7c478bd9Sstevel@tonic-gate if (fileoff >= OFFSET_MAX(fp)) { 287*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 288*7c478bd9Sstevel@tonic-gate error = EFBIG; 289*7c478bd9Sstevel@tonic-gate goto out; 290*7c478bd9Sstevel@tonic-gate } 291*7c478bd9Sstevel@tonic-gate /* 292*7c478bd9Sstevel@tonic-gate * Limit the bytes to be written upto offset maximum for 293*7c478bd9Sstevel@tonic-gate * this open file structure. 294*7c478bd9Sstevel@tonic-gate */ 295*7c478bd9Sstevel@tonic-gate if (fileoff + cnt > OFFSET_MAX(fp)) 296*7c478bd9Sstevel@tonic-gate cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 297*7c478bd9Sstevel@tonic-gate } 298*7c478bd9Sstevel@tonic-gate auio.uio_loffset = fileoff; 299*7c478bd9Sstevel@tonic-gate auio.uio_iov = &aiov; 300*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = 1; 301*7c478bd9Sstevel@tonic-gate auio.uio_resid = bcount = cnt; 302*7c478bd9Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 303*7c478bd9Sstevel@tonic-gate auio.uio_llimit = curproc->p_fsz_ctl; 304*7c478bd9Sstevel@tonic-gate auio.uio_fmode = fflag; 305*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_DEFAULT; 306*7c478bd9Sstevel@tonic-gate 307*7c478bd9Sstevel@tonic-gate ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 308*7c478bd9Sstevel@tonic-gate 309*7c478bd9Sstevel@tonic-gate error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 310*7c478bd9Sstevel@tonic-gate cnt -= auio.uio_resid; 311*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 312*7c478bd9Sstevel@tonic-gate cp = CPU; 313*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, syswrite, 1); 314*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 315*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 316*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 317*7c478bd9Sstevel@tonic-gate 318*7c478bd9Sstevel@tonic-gate if (vp->v_type == VFIFO) /* Backward compatibility */ 319*7c478bd9Sstevel@tonic-gate fp->f_offset = cnt; 320*7c478bd9Sstevel@tonic-gate else if (((fp->f_flag & FAPPEND) == 0) || 321*7c478bd9Sstevel@tonic-gate (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 322*7c478bd9Sstevel@tonic-gate fp->f_offset = auio.uio_loffset; 323*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 324*7c478bd9Sstevel@tonic-gate 325*7c478bd9Sstevel@tonic-gate if (error == EINTR && cnt != 0) 326*7c478bd9Sstevel@tonic-gate error = 0; 327*7c478bd9Sstevel@tonic-gate out: 328*7c478bd9Sstevel@tonic-gate if (in_crit) 329*7c478bd9Sstevel@tonic-gate nbl_end_crit(vp); 330*7c478bd9Sstevel@tonic-gate releasef(fdes); 331*7c478bd9Sstevel@tonic-gate if (error) 332*7c478bd9Sstevel@tonic-gate return (set_errno(error)); 333*7c478bd9Sstevel@tonic-gate return (cnt); 334*7c478bd9Sstevel@tonic-gate } 335*7c478bd9Sstevel@tonic-gate 336*7c478bd9Sstevel@tonic-gate ssize_t 337*7c478bd9Sstevel@tonic-gate pread(int fdes, void *cbuf, size_t count, off_t offset) 338*7c478bd9Sstevel@tonic-gate { 339*7c478bd9Sstevel@tonic-gate struct uio auio; 340*7c478bd9Sstevel@tonic-gate struct iovec aiov; 341*7c478bd9Sstevel@tonic-gate file_t *fp; 342*7c478bd9Sstevel@tonic-gate register vnode_t *vp; 343*7c478bd9Sstevel@tonic-gate struct cpu *cp; 344*7c478bd9Sstevel@tonic-gate int fflag, ioflag, rwflag; 345*7c478bd9Sstevel@tonic-gate ssize_t bcount; 346*7c478bd9Sstevel@tonic-gate int error = 0; 347*7c478bd9Sstevel@tonic-gate u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 348*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 349*7c478bd9Sstevel@tonic-gate u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 350*7c478bd9Sstevel@tonic-gate MAXOFF32_T : MAXOFFSET_T; 351*7c478bd9Sstevel@tonic-gate #else 352*7c478bd9Sstevel@tonic-gate const u_offset_t maxoff = MAXOFF32_T; 353*7c478bd9Sstevel@tonic-gate #endif 354*7c478bd9Sstevel@tonic-gate int in_crit = 0; 355*7c478bd9Sstevel@tonic-gate 356*7c478bd9Sstevel@tonic-gate if ((bcount = (ssize_t)count) < 0) 357*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 358*7c478bd9Sstevel@tonic-gate 359*7c478bd9Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) 360*7c478bd9Sstevel@tonic-gate return (set_errno(EBADF)); 361*7c478bd9Sstevel@tonic-gate if (((fflag = fp->f_flag) & (FREAD)) == 0) { 362*7c478bd9Sstevel@tonic-gate error = EBADF; 363*7c478bd9Sstevel@tonic-gate goto out; 364*7c478bd9Sstevel@tonic-gate } 365*7c478bd9Sstevel@tonic-gate 366*7c478bd9Sstevel@tonic-gate rwflag = 0; 367*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 368*7c478bd9Sstevel@tonic-gate 369*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG) { 370*7c478bd9Sstevel@tonic-gate 371*7c478bd9Sstevel@tonic-gate if (bcount == 0) 372*7c478bd9Sstevel@tonic-gate goto out; 373*7c478bd9Sstevel@tonic-gate 374*7c478bd9Sstevel@tonic-gate /* 375*7c478bd9Sstevel@tonic-gate * Return EINVAL if an invalid offset comes to pread. 376*7c478bd9Sstevel@tonic-gate * Negative offset from user will cause this error. 377*7c478bd9Sstevel@tonic-gate */ 378*7c478bd9Sstevel@tonic-gate 379*7c478bd9Sstevel@tonic-gate if (fileoff > maxoff) { 380*7c478bd9Sstevel@tonic-gate error = EINVAL; 381*7c478bd9Sstevel@tonic-gate goto out; 382*7c478bd9Sstevel@tonic-gate } 383*7c478bd9Sstevel@tonic-gate /* 384*7c478bd9Sstevel@tonic-gate * Limit offset such that we don't read or write 385*7c478bd9Sstevel@tonic-gate * a file beyond the maximum offset representable in 386*7c478bd9Sstevel@tonic-gate * an off_t structure. 387*7c478bd9Sstevel@tonic-gate */ 388*7c478bd9Sstevel@tonic-gate if (fileoff + bcount > maxoff) 389*7c478bd9Sstevel@tonic-gate bcount = (ssize_t)((offset_t)maxoff - fileoff); 390*7c478bd9Sstevel@tonic-gate } else if (vp->v_type == VFIFO) { 391*7c478bd9Sstevel@tonic-gate error = ESPIPE; 392*7c478bd9Sstevel@tonic-gate goto out; 393*7c478bd9Sstevel@tonic-gate } 394*7c478bd9Sstevel@tonic-gate 395*7c478bd9Sstevel@tonic-gate /* 396*7c478bd9Sstevel@tonic-gate * We have to enter the critical region before calling VOP_RWLOCK 397*7c478bd9Sstevel@tonic-gate * to avoid a deadlock with ufs. 398*7c478bd9Sstevel@tonic-gate */ 399*7c478bd9Sstevel@tonic-gate if (nbl_need_check(vp)) { 400*7c478bd9Sstevel@tonic-gate int svmand; 401*7c478bd9Sstevel@tonic-gate 402*7c478bd9Sstevel@tonic-gate nbl_start_crit(vp, RW_READER); 403*7c478bd9Sstevel@tonic-gate in_crit = 1; 404*7c478bd9Sstevel@tonic-gate error = nbl_svmand(vp, fp->f_cred, &svmand); 405*7c478bd9Sstevel@tonic-gate if (error != 0) 406*7c478bd9Sstevel@tonic-gate goto out; 407*7c478bd9Sstevel@tonic-gate if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { 408*7c478bd9Sstevel@tonic-gate error = EACCES; 409*7c478bd9Sstevel@tonic-gate goto out; 410*7c478bd9Sstevel@tonic-gate } 411*7c478bd9Sstevel@tonic-gate } 412*7c478bd9Sstevel@tonic-gate 413*7c478bd9Sstevel@tonic-gate aiov.iov_base = cbuf; 414*7c478bd9Sstevel@tonic-gate aiov.iov_len = bcount; 415*7c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, rwflag, NULL); 416*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 417*7c478bd9Sstevel@tonic-gate struct vattr va; 418*7c478bd9Sstevel@tonic-gate va.va_mask = AT_SIZE; 419*7c478bd9Sstevel@tonic-gate if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 420*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 421*7c478bd9Sstevel@tonic-gate goto out; 422*7c478bd9Sstevel@tonic-gate } 423*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 424*7c478bd9Sstevel@tonic-gate 425*7c478bd9Sstevel@tonic-gate /* 426*7c478bd9Sstevel@tonic-gate * We have to return EOF if fileoff is >= file size. 427*7c478bd9Sstevel@tonic-gate */ 428*7c478bd9Sstevel@tonic-gate if (fileoff >= va.va_size) { 429*7c478bd9Sstevel@tonic-gate bcount = 0; 430*7c478bd9Sstevel@tonic-gate goto out; 431*7c478bd9Sstevel@tonic-gate } 432*7c478bd9Sstevel@tonic-gate 433*7c478bd9Sstevel@tonic-gate /* 434*7c478bd9Sstevel@tonic-gate * File is greater than or equal to maxoff and therefore 435*7c478bd9Sstevel@tonic-gate * we return EOVERFLOW. 436*7c478bd9Sstevel@tonic-gate */ 437*7c478bd9Sstevel@tonic-gate error = EOVERFLOW; 438*7c478bd9Sstevel@tonic-gate goto out; 439*7c478bd9Sstevel@tonic-gate } 440*7c478bd9Sstevel@tonic-gate auio.uio_loffset = fileoff; 441*7c478bd9Sstevel@tonic-gate auio.uio_iov = &aiov; 442*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = 1; 443*7c478bd9Sstevel@tonic-gate auio.uio_resid = bcount; 444*7c478bd9Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 445*7c478bd9Sstevel@tonic-gate auio.uio_llimit = MAXOFFSET_T; 446*7c478bd9Sstevel@tonic-gate auio.uio_fmode = fflag; 447*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_CACHED; 448*7c478bd9Sstevel@tonic-gate 449*7c478bd9Sstevel@tonic-gate ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 450*7c478bd9Sstevel@tonic-gate 451*7c478bd9Sstevel@tonic-gate /* If read sync is not asked for, filter sync flags */ 452*7c478bd9Sstevel@tonic-gate if ((ioflag & FRSYNC) == 0) 453*7c478bd9Sstevel@tonic-gate ioflag &= ~(FSYNC|FDSYNC); 454*7c478bd9Sstevel@tonic-gate error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 455*7c478bd9Sstevel@tonic-gate bcount -= auio.uio_resid; 456*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 457*7c478bd9Sstevel@tonic-gate cp = CPU; 458*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, sysread, 1); 459*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 460*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 461*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 462*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 463*7c478bd9Sstevel@tonic-gate 464*7c478bd9Sstevel@tonic-gate if (error == EINTR && bcount != 0) 465*7c478bd9Sstevel@tonic-gate error = 0; 466*7c478bd9Sstevel@tonic-gate out: 467*7c478bd9Sstevel@tonic-gate if (in_crit) 468*7c478bd9Sstevel@tonic-gate nbl_end_crit(vp); 469*7c478bd9Sstevel@tonic-gate releasef(fdes); 470*7c478bd9Sstevel@tonic-gate if (error) 471*7c478bd9Sstevel@tonic-gate return (set_errno(error)); 472*7c478bd9Sstevel@tonic-gate return (bcount); 473*7c478bd9Sstevel@tonic-gate } 474*7c478bd9Sstevel@tonic-gate 475*7c478bd9Sstevel@tonic-gate ssize_t 476*7c478bd9Sstevel@tonic-gate pwrite(int fdes, void *cbuf, size_t count, off_t offset) 477*7c478bd9Sstevel@tonic-gate { 478*7c478bd9Sstevel@tonic-gate struct uio auio; 479*7c478bd9Sstevel@tonic-gate struct iovec aiov; 480*7c478bd9Sstevel@tonic-gate file_t *fp; 481*7c478bd9Sstevel@tonic-gate register vnode_t *vp; 482*7c478bd9Sstevel@tonic-gate struct cpu *cp; 483*7c478bd9Sstevel@tonic-gate int fflag, ioflag, rwflag; 484*7c478bd9Sstevel@tonic-gate ssize_t bcount; 485*7c478bd9Sstevel@tonic-gate int error = 0; 486*7c478bd9Sstevel@tonic-gate u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 487*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 488*7c478bd9Sstevel@tonic-gate u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 489*7c478bd9Sstevel@tonic-gate MAXOFF32_T : MAXOFFSET_T; 490*7c478bd9Sstevel@tonic-gate #else 491*7c478bd9Sstevel@tonic-gate const u_offset_t maxoff = MAXOFF32_T; 492*7c478bd9Sstevel@tonic-gate #endif 493*7c478bd9Sstevel@tonic-gate int in_crit = 0; 494*7c478bd9Sstevel@tonic-gate 495*7c478bd9Sstevel@tonic-gate if ((bcount = (ssize_t)count) < 0) 496*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 497*7c478bd9Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) 498*7c478bd9Sstevel@tonic-gate return (set_errno(EBADF)); 499*7c478bd9Sstevel@tonic-gate if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 500*7c478bd9Sstevel@tonic-gate error = EBADF; 501*7c478bd9Sstevel@tonic-gate goto out; 502*7c478bd9Sstevel@tonic-gate } 503*7c478bd9Sstevel@tonic-gate 504*7c478bd9Sstevel@tonic-gate rwflag = 1; 505*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 506*7c478bd9Sstevel@tonic-gate 507*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG) { 508*7c478bd9Sstevel@tonic-gate 509*7c478bd9Sstevel@tonic-gate if (bcount == 0) 510*7c478bd9Sstevel@tonic-gate goto out; 511*7c478bd9Sstevel@tonic-gate 512*7c478bd9Sstevel@tonic-gate /* 513*7c478bd9Sstevel@tonic-gate * return EINVAL for offsets that cannot be 514*7c478bd9Sstevel@tonic-gate * represented in an off_t. 515*7c478bd9Sstevel@tonic-gate */ 516*7c478bd9Sstevel@tonic-gate if (fileoff > maxoff) { 517*7c478bd9Sstevel@tonic-gate error = EINVAL; 518*7c478bd9Sstevel@tonic-gate goto out; 519*7c478bd9Sstevel@tonic-gate } 520*7c478bd9Sstevel@tonic-gate /* 521*7c478bd9Sstevel@tonic-gate * Take appropriate action if we are trying to write above the 522*7c478bd9Sstevel@tonic-gate * resource limit. 523*7c478bd9Sstevel@tonic-gate */ 524*7c478bd9Sstevel@tonic-gate if (fileoff >= curproc->p_fsz_ctl) { 525*7c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 526*7c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 527*7c478bd9Sstevel@tonic-gate curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 528*7c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 529*7c478bd9Sstevel@tonic-gate 530*7c478bd9Sstevel@tonic-gate error = EFBIG; 531*7c478bd9Sstevel@tonic-gate goto out; 532*7c478bd9Sstevel@tonic-gate } 533*7c478bd9Sstevel@tonic-gate /* 534*7c478bd9Sstevel@tonic-gate * Don't allow pwrite to cause file sizes to exceed 535*7c478bd9Sstevel@tonic-gate * maxoff. 536*7c478bd9Sstevel@tonic-gate */ 537*7c478bd9Sstevel@tonic-gate if (fileoff == maxoff) { 538*7c478bd9Sstevel@tonic-gate error = EFBIG; 539*7c478bd9Sstevel@tonic-gate goto out; 540*7c478bd9Sstevel@tonic-gate } 541*7c478bd9Sstevel@tonic-gate if (fileoff + count > maxoff) 542*7c478bd9Sstevel@tonic-gate bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 543*7c478bd9Sstevel@tonic-gate } else if (vp->v_type == VFIFO) { 544*7c478bd9Sstevel@tonic-gate error = ESPIPE; 545*7c478bd9Sstevel@tonic-gate goto out; 546*7c478bd9Sstevel@tonic-gate } 547*7c478bd9Sstevel@tonic-gate 548*7c478bd9Sstevel@tonic-gate /* 549*7c478bd9Sstevel@tonic-gate * We have to enter the critical region before calling VOP_RWLOCK 550*7c478bd9Sstevel@tonic-gate * to avoid a deadlock with ufs. 551*7c478bd9Sstevel@tonic-gate */ 552*7c478bd9Sstevel@tonic-gate if (nbl_need_check(vp)) { 553*7c478bd9Sstevel@tonic-gate int svmand; 554*7c478bd9Sstevel@tonic-gate 555*7c478bd9Sstevel@tonic-gate nbl_start_crit(vp, RW_READER); 556*7c478bd9Sstevel@tonic-gate in_crit = 1; 557*7c478bd9Sstevel@tonic-gate error = nbl_svmand(vp, fp->f_cred, &svmand); 558*7c478bd9Sstevel@tonic-gate if (error != 0) 559*7c478bd9Sstevel@tonic-gate goto out; 560*7c478bd9Sstevel@tonic-gate if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { 561*7c478bd9Sstevel@tonic-gate error = EACCES; 562*7c478bd9Sstevel@tonic-gate goto out; 563*7c478bd9Sstevel@tonic-gate } 564*7c478bd9Sstevel@tonic-gate } 565*7c478bd9Sstevel@tonic-gate 566*7c478bd9Sstevel@tonic-gate aiov.iov_base = cbuf; 567*7c478bd9Sstevel@tonic-gate aiov.iov_len = bcount; 568*7c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, rwflag, NULL); 569*7c478bd9Sstevel@tonic-gate auio.uio_loffset = fileoff; 570*7c478bd9Sstevel@tonic-gate auio.uio_iov = &aiov; 571*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = 1; 572*7c478bd9Sstevel@tonic-gate auio.uio_resid = bcount; 573*7c478bd9Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 574*7c478bd9Sstevel@tonic-gate auio.uio_llimit = curproc->p_fsz_ctl; 575*7c478bd9Sstevel@tonic-gate auio.uio_fmode = fflag; 576*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_CACHED; 577*7c478bd9Sstevel@tonic-gate 578*7c478bd9Sstevel@tonic-gate ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 579*7c478bd9Sstevel@tonic-gate 580*7c478bd9Sstevel@tonic-gate error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 581*7c478bd9Sstevel@tonic-gate bcount -= auio.uio_resid; 582*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 583*7c478bd9Sstevel@tonic-gate cp = CPU; 584*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, syswrite, 1); 585*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 586*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 587*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 588*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 589*7c478bd9Sstevel@tonic-gate 590*7c478bd9Sstevel@tonic-gate if (error == EINTR && bcount != 0) 591*7c478bd9Sstevel@tonic-gate error = 0; 592*7c478bd9Sstevel@tonic-gate out: 593*7c478bd9Sstevel@tonic-gate if (in_crit) 594*7c478bd9Sstevel@tonic-gate nbl_end_crit(vp); 595*7c478bd9Sstevel@tonic-gate releasef(fdes); 596*7c478bd9Sstevel@tonic-gate if (error) 597*7c478bd9Sstevel@tonic-gate return (set_errno(error)); 598*7c478bd9Sstevel@tonic-gate return (bcount); 599*7c478bd9Sstevel@tonic-gate } 600*7c478bd9Sstevel@tonic-gate 601*7c478bd9Sstevel@tonic-gate /* 602*7c478bd9Sstevel@tonic-gate * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... 603*7c478bd9Sstevel@tonic-gate * XXX -- However, SVVS expects readv() and writev() to fail if 604*7c478bd9Sstevel@tonic-gate * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), 605*7c478bd9Sstevel@tonic-gate * XXX -- so I guess that's the "interface". 606*7c478bd9Sstevel@tonic-gate */ 607*7c478bd9Sstevel@tonic-gate #define DEF_IOV_MAX 16 608*7c478bd9Sstevel@tonic-gate 609*7c478bd9Sstevel@tonic-gate ssize_t 610*7c478bd9Sstevel@tonic-gate readv(int fdes, struct iovec *iovp, int iovcnt) 611*7c478bd9Sstevel@tonic-gate { 612*7c478bd9Sstevel@tonic-gate struct uio auio; 613*7c478bd9Sstevel@tonic-gate struct iovec aiov[DEF_IOV_MAX]; 614*7c478bd9Sstevel@tonic-gate file_t *fp; 615*7c478bd9Sstevel@tonic-gate register vnode_t *vp; 616*7c478bd9Sstevel@tonic-gate struct cpu *cp; 617*7c478bd9Sstevel@tonic-gate int fflag, ioflag, rwflag; 618*7c478bd9Sstevel@tonic-gate ssize_t count, bcount; 619*7c478bd9Sstevel@tonic-gate int error = 0; 620*7c478bd9Sstevel@tonic-gate int i; 621*7c478bd9Sstevel@tonic-gate u_offset_t fileoff; 622*7c478bd9Sstevel@tonic-gate int in_crit = 0; 623*7c478bd9Sstevel@tonic-gate 624*7c478bd9Sstevel@tonic-gate if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 625*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 626*7c478bd9Sstevel@tonic-gate 627*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 628*7c478bd9Sstevel@tonic-gate /* 629*7c478bd9Sstevel@tonic-gate * 32-bit callers need to have their iovec expanded, 630*7c478bd9Sstevel@tonic-gate * while ensuring that they can't move more than 2Gbytes 631*7c478bd9Sstevel@tonic-gate * of data in a single call. 632*7c478bd9Sstevel@tonic-gate */ 633*7c478bd9Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_ILP32) { 634*7c478bd9Sstevel@tonic-gate struct iovec32 aiov32[DEF_IOV_MAX]; 635*7c478bd9Sstevel@tonic-gate ssize32_t count32; 636*7c478bd9Sstevel@tonic-gate 637*7c478bd9Sstevel@tonic-gate if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 638*7c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 639*7c478bd9Sstevel@tonic-gate 640*7c478bd9Sstevel@tonic-gate count32 = 0; 641*7c478bd9Sstevel@tonic-gate for (i = 0; i < iovcnt; i++) { 642*7c478bd9Sstevel@tonic-gate ssize32_t iovlen32 = aiov32[i].iov_len; 643*7c478bd9Sstevel@tonic-gate count32 += iovlen32; 644*7c478bd9Sstevel@tonic-gate if (iovlen32 < 0 || count32 < 0) 645*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 646*7c478bd9Sstevel@tonic-gate aiov[i].iov_len = iovlen32; 647*7c478bd9Sstevel@tonic-gate aiov[i].iov_base = 648*7c478bd9Sstevel@tonic-gate (caddr_t)(uintptr_t)aiov32[i].iov_base; 649*7c478bd9Sstevel@tonic-gate } 650*7c478bd9Sstevel@tonic-gate } else 651*7c478bd9Sstevel@tonic-gate #endif 652*7c478bd9Sstevel@tonic-gate if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 653*7c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 654*7c478bd9Sstevel@tonic-gate 655*7c478bd9Sstevel@tonic-gate count = 0; 656*7c478bd9Sstevel@tonic-gate for (i = 0; i < iovcnt; i++) { 657*7c478bd9Sstevel@tonic-gate ssize_t iovlen = aiov[i].iov_len; 658*7c478bd9Sstevel@tonic-gate count += iovlen; 659*7c478bd9Sstevel@tonic-gate if (iovlen < 0 || count < 0) 660*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 661*7c478bd9Sstevel@tonic-gate } 662*7c478bd9Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) 663*7c478bd9Sstevel@tonic-gate return (set_errno(EBADF)); 664*7c478bd9Sstevel@tonic-gate if (((fflag = fp->f_flag) & FREAD) == 0) { 665*7c478bd9Sstevel@tonic-gate error = EBADF; 666*7c478bd9Sstevel@tonic-gate goto out; 667*7c478bd9Sstevel@tonic-gate } 668*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 669*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG && count == 0) { 670*7c478bd9Sstevel@tonic-gate goto out; 671*7c478bd9Sstevel@tonic-gate } 672*7c478bd9Sstevel@tonic-gate 673*7c478bd9Sstevel@tonic-gate rwflag = 0; 674*7c478bd9Sstevel@tonic-gate 675*7c478bd9Sstevel@tonic-gate /* 676*7c478bd9Sstevel@tonic-gate * We have to enter the critical region before calling VOP_RWLOCK 677*7c478bd9Sstevel@tonic-gate * to avoid a deadlock with ufs. 678*7c478bd9Sstevel@tonic-gate */ 679*7c478bd9Sstevel@tonic-gate if (nbl_need_check(vp)) { 680*7c478bd9Sstevel@tonic-gate int svmand; 681*7c478bd9Sstevel@tonic-gate 682*7c478bd9Sstevel@tonic-gate nbl_start_crit(vp, RW_READER); 683*7c478bd9Sstevel@tonic-gate in_crit = 1; 684*7c478bd9Sstevel@tonic-gate error = nbl_svmand(vp, fp->f_cred, &svmand); 685*7c478bd9Sstevel@tonic-gate if (error != 0) 686*7c478bd9Sstevel@tonic-gate goto out; 687*7c478bd9Sstevel@tonic-gate if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand)) { 688*7c478bd9Sstevel@tonic-gate error = EACCES; 689*7c478bd9Sstevel@tonic-gate goto out; 690*7c478bd9Sstevel@tonic-gate } 691*7c478bd9Sstevel@tonic-gate } 692*7c478bd9Sstevel@tonic-gate 693*7c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, rwflag, NULL); 694*7c478bd9Sstevel@tonic-gate fileoff = fp->f_offset; 695*7c478bd9Sstevel@tonic-gate 696*7c478bd9Sstevel@tonic-gate /* 697*7c478bd9Sstevel@tonic-gate * Behaviour is same as read. Please see comments in read. 698*7c478bd9Sstevel@tonic-gate */ 699*7c478bd9Sstevel@tonic-gate 700*7c478bd9Sstevel@tonic-gate if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 701*7c478bd9Sstevel@tonic-gate struct vattr va; 702*7c478bd9Sstevel@tonic-gate va.va_mask = AT_SIZE; 703*7c478bd9Sstevel@tonic-gate if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 704*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 705*7c478bd9Sstevel@tonic-gate goto out; 706*7c478bd9Sstevel@tonic-gate } 707*7c478bd9Sstevel@tonic-gate if (fileoff >= va.va_size) { 708*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 709*7c478bd9Sstevel@tonic-gate count = 0; 710*7c478bd9Sstevel@tonic-gate goto out; 711*7c478bd9Sstevel@tonic-gate } else { 712*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 713*7c478bd9Sstevel@tonic-gate error = EOVERFLOW; 714*7c478bd9Sstevel@tonic-gate goto out; 715*7c478bd9Sstevel@tonic-gate } 716*7c478bd9Sstevel@tonic-gate } 717*7c478bd9Sstevel@tonic-gate if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 718*7c478bd9Sstevel@tonic-gate count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 719*7c478bd9Sstevel@tonic-gate } 720*7c478bd9Sstevel@tonic-gate auio.uio_loffset = fileoff; 721*7c478bd9Sstevel@tonic-gate auio.uio_iov = aiov; 722*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = iovcnt; 723*7c478bd9Sstevel@tonic-gate auio.uio_resid = bcount = count; 724*7c478bd9Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 725*7c478bd9Sstevel@tonic-gate auio.uio_llimit = MAXOFFSET_T; 726*7c478bd9Sstevel@tonic-gate auio.uio_fmode = fflag; 727*7c478bd9Sstevel@tonic-gate if (bcount < copyout_min_size) 728*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_CACHED; 729*7c478bd9Sstevel@tonic-gate else 730*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_DEFAULT; 731*7c478bd9Sstevel@tonic-gate 732*7c478bd9Sstevel@tonic-gate 733*7c478bd9Sstevel@tonic-gate ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 734*7c478bd9Sstevel@tonic-gate 735*7c478bd9Sstevel@tonic-gate /* If read sync is not asked for, filter sync flags */ 736*7c478bd9Sstevel@tonic-gate if ((ioflag & FRSYNC) == 0) 737*7c478bd9Sstevel@tonic-gate ioflag &= ~(FSYNC|FDSYNC); 738*7c478bd9Sstevel@tonic-gate error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 739*7c478bd9Sstevel@tonic-gate count -= auio.uio_resid; 740*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 741*7c478bd9Sstevel@tonic-gate cp = CPU; 742*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, sysread, 1); 743*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 744*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 745*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 746*7c478bd9Sstevel@tonic-gate 747*7c478bd9Sstevel@tonic-gate if (vp->v_type == VFIFO) /* Backward compatibility */ 748*7c478bd9Sstevel@tonic-gate fp->f_offset = count; 749*7c478bd9Sstevel@tonic-gate else if (((fp->f_flag & FAPPEND) == 0) || 750*7c478bd9Sstevel@tonic-gate (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 751*7c478bd9Sstevel@tonic-gate fp->f_offset = auio.uio_loffset; 752*7c478bd9Sstevel@tonic-gate 753*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 754*7c478bd9Sstevel@tonic-gate 755*7c478bd9Sstevel@tonic-gate if (error == EINTR && count != 0) 756*7c478bd9Sstevel@tonic-gate error = 0; 757*7c478bd9Sstevel@tonic-gate out: 758*7c478bd9Sstevel@tonic-gate if (in_crit) 759*7c478bd9Sstevel@tonic-gate nbl_end_crit(vp); 760*7c478bd9Sstevel@tonic-gate releasef(fdes); 761*7c478bd9Sstevel@tonic-gate if (error) 762*7c478bd9Sstevel@tonic-gate return (set_errno(error)); 763*7c478bd9Sstevel@tonic-gate return (count); 764*7c478bd9Sstevel@tonic-gate } 765*7c478bd9Sstevel@tonic-gate 766*7c478bd9Sstevel@tonic-gate ssize_t 767*7c478bd9Sstevel@tonic-gate writev(int fdes, struct iovec *iovp, int iovcnt) 768*7c478bd9Sstevel@tonic-gate { 769*7c478bd9Sstevel@tonic-gate struct uio auio; 770*7c478bd9Sstevel@tonic-gate struct iovec aiov[DEF_IOV_MAX]; 771*7c478bd9Sstevel@tonic-gate file_t *fp; 772*7c478bd9Sstevel@tonic-gate register vnode_t *vp; 773*7c478bd9Sstevel@tonic-gate struct cpu *cp; 774*7c478bd9Sstevel@tonic-gate int fflag, ioflag, rwflag; 775*7c478bd9Sstevel@tonic-gate ssize_t count, bcount; 776*7c478bd9Sstevel@tonic-gate int error = 0; 777*7c478bd9Sstevel@tonic-gate int i; 778*7c478bd9Sstevel@tonic-gate u_offset_t fileoff; 779*7c478bd9Sstevel@tonic-gate int in_crit = 0; 780*7c478bd9Sstevel@tonic-gate 781*7c478bd9Sstevel@tonic-gate if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 782*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 783*7c478bd9Sstevel@tonic-gate 784*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 785*7c478bd9Sstevel@tonic-gate /* 786*7c478bd9Sstevel@tonic-gate * 32-bit callers need to have their iovec expanded, 787*7c478bd9Sstevel@tonic-gate * while ensuring that they can't move more than 2Gbytes 788*7c478bd9Sstevel@tonic-gate * of data in a single call. 789*7c478bd9Sstevel@tonic-gate */ 790*7c478bd9Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_ILP32) { 791*7c478bd9Sstevel@tonic-gate struct iovec32 aiov32[DEF_IOV_MAX]; 792*7c478bd9Sstevel@tonic-gate ssize32_t count32; 793*7c478bd9Sstevel@tonic-gate 794*7c478bd9Sstevel@tonic-gate if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 795*7c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 796*7c478bd9Sstevel@tonic-gate 797*7c478bd9Sstevel@tonic-gate count32 = 0; 798*7c478bd9Sstevel@tonic-gate for (i = 0; i < iovcnt; i++) { 799*7c478bd9Sstevel@tonic-gate ssize32_t iovlen = aiov32[i].iov_len; 800*7c478bd9Sstevel@tonic-gate count32 += iovlen; 801*7c478bd9Sstevel@tonic-gate if (iovlen < 0 || count32 < 0) 802*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 803*7c478bd9Sstevel@tonic-gate aiov[i].iov_len = iovlen; 804*7c478bd9Sstevel@tonic-gate aiov[i].iov_base = 805*7c478bd9Sstevel@tonic-gate (caddr_t)(uintptr_t)aiov32[i].iov_base; 806*7c478bd9Sstevel@tonic-gate } 807*7c478bd9Sstevel@tonic-gate } else 808*7c478bd9Sstevel@tonic-gate #endif 809*7c478bd9Sstevel@tonic-gate if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 810*7c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 811*7c478bd9Sstevel@tonic-gate 812*7c478bd9Sstevel@tonic-gate count = 0; 813*7c478bd9Sstevel@tonic-gate for (i = 0; i < iovcnt; i++) { 814*7c478bd9Sstevel@tonic-gate ssize_t iovlen = aiov[i].iov_len; 815*7c478bd9Sstevel@tonic-gate count += iovlen; 816*7c478bd9Sstevel@tonic-gate if (iovlen < 0 || count < 0) 817*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 818*7c478bd9Sstevel@tonic-gate } 819*7c478bd9Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) 820*7c478bd9Sstevel@tonic-gate return (set_errno(EBADF)); 821*7c478bd9Sstevel@tonic-gate if (((fflag = fp->f_flag) & FWRITE) == 0) { 822*7c478bd9Sstevel@tonic-gate error = EBADF; 823*7c478bd9Sstevel@tonic-gate goto out; 824*7c478bd9Sstevel@tonic-gate } 825*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 826*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG && count == 0) { 827*7c478bd9Sstevel@tonic-gate goto out; 828*7c478bd9Sstevel@tonic-gate } 829*7c478bd9Sstevel@tonic-gate 830*7c478bd9Sstevel@tonic-gate rwflag = 1; 831*7c478bd9Sstevel@tonic-gate 832*7c478bd9Sstevel@tonic-gate /* 833*7c478bd9Sstevel@tonic-gate * We have to enter the critical region before calling VOP_RWLOCK 834*7c478bd9Sstevel@tonic-gate * to avoid a deadlock with ufs. 835*7c478bd9Sstevel@tonic-gate */ 836*7c478bd9Sstevel@tonic-gate if (nbl_need_check(vp)) { 837*7c478bd9Sstevel@tonic-gate int svmand; 838*7c478bd9Sstevel@tonic-gate 839*7c478bd9Sstevel@tonic-gate nbl_start_crit(vp, RW_READER); 840*7c478bd9Sstevel@tonic-gate in_crit = 1; 841*7c478bd9Sstevel@tonic-gate error = nbl_svmand(vp, fp->f_cred, &svmand); 842*7c478bd9Sstevel@tonic-gate if (error != 0) 843*7c478bd9Sstevel@tonic-gate goto out; 844*7c478bd9Sstevel@tonic-gate if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand)) { 845*7c478bd9Sstevel@tonic-gate error = EACCES; 846*7c478bd9Sstevel@tonic-gate goto out; 847*7c478bd9Sstevel@tonic-gate } 848*7c478bd9Sstevel@tonic-gate } 849*7c478bd9Sstevel@tonic-gate 850*7c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, rwflag, NULL); 851*7c478bd9Sstevel@tonic-gate 852*7c478bd9Sstevel@tonic-gate fileoff = fp->f_offset; 853*7c478bd9Sstevel@tonic-gate 854*7c478bd9Sstevel@tonic-gate /* 855*7c478bd9Sstevel@tonic-gate * Behaviour is same as write. Please see comments for write. 856*7c478bd9Sstevel@tonic-gate */ 857*7c478bd9Sstevel@tonic-gate 858*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG) { 859*7c478bd9Sstevel@tonic-gate if (fileoff >= curproc->p_fsz_ctl) { 860*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 861*7c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 862*7c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 863*7c478bd9Sstevel@tonic-gate curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 864*7c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 865*7c478bd9Sstevel@tonic-gate error = EFBIG; 866*7c478bd9Sstevel@tonic-gate goto out; 867*7c478bd9Sstevel@tonic-gate } 868*7c478bd9Sstevel@tonic-gate if (fileoff >= OFFSET_MAX(fp)) { 869*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 870*7c478bd9Sstevel@tonic-gate error = EFBIG; 871*7c478bd9Sstevel@tonic-gate goto out; 872*7c478bd9Sstevel@tonic-gate } 873*7c478bd9Sstevel@tonic-gate if (fileoff + count > OFFSET_MAX(fp)) 874*7c478bd9Sstevel@tonic-gate count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 875*7c478bd9Sstevel@tonic-gate } 876*7c478bd9Sstevel@tonic-gate auio.uio_loffset = fileoff; 877*7c478bd9Sstevel@tonic-gate auio.uio_iov = aiov; 878*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = iovcnt; 879*7c478bd9Sstevel@tonic-gate auio.uio_resid = bcount = count; 880*7c478bd9Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 881*7c478bd9Sstevel@tonic-gate auio.uio_llimit = curproc->p_fsz_ctl; 882*7c478bd9Sstevel@tonic-gate auio.uio_fmode = fflag; 883*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_DEFAULT; 884*7c478bd9Sstevel@tonic-gate 885*7c478bd9Sstevel@tonic-gate ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 886*7c478bd9Sstevel@tonic-gate 887*7c478bd9Sstevel@tonic-gate error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 888*7c478bd9Sstevel@tonic-gate count -= auio.uio_resid; 889*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 890*7c478bd9Sstevel@tonic-gate cp = CPU; 891*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, syswrite, 1); 892*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 893*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 894*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 895*7c478bd9Sstevel@tonic-gate 896*7c478bd9Sstevel@tonic-gate if (vp->v_type == VFIFO) /* Backward compatibility */ 897*7c478bd9Sstevel@tonic-gate fp->f_offset = count; 898*7c478bd9Sstevel@tonic-gate else if (((fp->f_flag & FAPPEND) == 0) || 899*7c478bd9Sstevel@tonic-gate (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 900*7c478bd9Sstevel@tonic-gate fp->f_offset = auio.uio_loffset; 901*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 902*7c478bd9Sstevel@tonic-gate 903*7c478bd9Sstevel@tonic-gate if (error == EINTR && count != 0) 904*7c478bd9Sstevel@tonic-gate error = 0; 905*7c478bd9Sstevel@tonic-gate out: 906*7c478bd9Sstevel@tonic-gate if (in_crit) 907*7c478bd9Sstevel@tonic-gate nbl_end_crit(vp); 908*7c478bd9Sstevel@tonic-gate releasef(fdes); 909*7c478bd9Sstevel@tonic-gate if (error) 910*7c478bd9Sstevel@tonic-gate return (set_errno(error)); 911*7c478bd9Sstevel@tonic-gate return (count); 912*7c478bd9Sstevel@tonic-gate } 913*7c478bd9Sstevel@tonic-gate 914*7c478bd9Sstevel@tonic-gate #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 915*7c478bd9Sstevel@tonic-gate 916*7c478bd9Sstevel@tonic-gate /* 917*7c478bd9Sstevel@tonic-gate * This syscall supplies 64-bit file offsets to 32-bit applications only. 918*7c478bd9Sstevel@tonic-gate */ 919*7c478bd9Sstevel@tonic-gate ssize32_t 920*7c478bd9Sstevel@tonic-gate pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 921*7c478bd9Sstevel@tonic-gate uint32_t offset_2) 922*7c478bd9Sstevel@tonic-gate { 923*7c478bd9Sstevel@tonic-gate struct uio auio; 924*7c478bd9Sstevel@tonic-gate struct iovec aiov; 925*7c478bd9Sstevel@tonic-gate file_t *fp; 926*7c478bd9Sstevel@tonic-gate register vnode_t *vp; 927*7c478bd9Sstevel@tonic-gate struct cpu *cp; 928*7c478bd9Sstevel@tonic-gate int fflag, ioflag, rwflag; 929*7c478bd9Sstevel@tonic-gate ssize_t bcount; 930*7c478bd9Sstevel@tonic-gate int error = 0; 931*7c478bd9Sstevel@tonic-gate u_offset_t fileoff; 932*7c478bd9Sstevel@tonic-gate int in_crit = 0; 933*7c478bd9Sstevel@tonic-gate 934*7c478bd9Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN) 935*7c478bd9Sstevel@tonic-gate fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 936*7c478bd9Sstevel@tonic-gate #else 937*7c478bd9Sstevel@tonic-gate fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 938*7c478bd9Sstevel@tonic-gate #endif 939*7c478bd9Sstevel@tonic-gate 940*7c478bd9Sstevel@tonic-gate if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 941*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 942*7c478bd9Sstevel@tonic-gate 943*7c478bd9Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) 944*7c478bd9Sstevel@tonic-gate return (set_errno(EBADF)); 945*7c478bd9Sstevel@tonic-gate if (((fflag = fp->f_flag) & (FREAD)) == 0) { 946*7c478bd9Sstevel@tonic-gate error = EBADF; 947*7c478bd9Sstevel@tonic-gate goto out; 948*7c478bd9Sstevel@tonic-gate } 949*7c478bd9Sstevel@tonic-gate 950*7c478bd9Sstevel@tonic-gate rwflag = 0; 951*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 952*7c478bd9Sstevel@tonic-gate 953*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG) { 954*7c478bd9Sstevel@tonic-gate 955*7c478bd9Sstevel@tonic-gate if (bcount == 0) 956*7c478bd9Sstevel@tonic-gate goto out; 957*7c478bd9Sstevel@tonic-gate 958*7c478bd9Sstevel@tonic-gate /* 959*7c478bd9Sstevel@tonic-gate * Same as pread. See comments in pread. 960*7c478bd9Sstevel@tonic-gate */ 961*7c478bd9Sstevel@tonic-gate 962*7c478bd9Sstevel@tonic-gate if (fileoff > MAXOFFSET_T) { 963*7c478bd9Sstevel@tonic-gate error = EINVAL; 964*7c478bd9Sstevel@tonic-gate goto out; 965*7c478bd9Sstevel@tonic-gate } 966*7c478bd9Sstevel@tonic-gate if (fileoff + bcount > MAXOFFSET_T) 967*7c478bd9Sstevel@tonic-gate bcount = (ssize_t)(MAXOFFSET_T - fileoff); 968*7c478bd9Sstevel@tonic-gate } else if (vp->v_type == VFIFO) { 969*7c478bd9Sstevel@tonic-gate error = ESPIPE; 970*7c478bd9Sstevel@tonic-gate goto out; 971*7c478bd9Sstevel@tonic-gate } 972*7c478bd9Sstevel@tonic-gate 973*7c478bd9Sstevel@tonic-gate /* 974*7c478bd9Sstevel@tonic-gate * We have to enter the critical region before calling VOP_RWLOCK 975*7c478bd9Sstevel@tonic-gate * to avoid a deadlock with ufs. 976*7c478bd9Sstevel@tonic-gate */ 977*7c478bd9Sstevel@tonic-gate if (nbl_need_check(vp)) { 978*7c478bd9Sstevel@tonic-gate int svmand; 979*7c478bd9Sstevel@tonic-gate 980*7c478bd9Sstevel@tonic-gate nbl_start_crit(vp, RW_READER); 981*7c478bd9Sstevel@tonic-gate in_crit = 1; 982*7c478bd9Sstevel@tonic-gate error = nbl_svmand(vp, fp->f_cred, &svmand); 983*7c478bd9Sstevel@tonic-gate if (error != 0) 984*7c478bd9Sstevel@tonic-gate goto out; 985*7c478bd9Sstevel@tonic-gate if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { 986*7c478bd9Sstevel@tonic-gate error = EACCES; 987*7c478bd9Sstevel@tonic-gate goto out; 988*7c478bd9Sstevel@tonic-gate } 989*7c478bd9Sstevel@tonic-gate } 990*7c478bd9Sstevel@tonic-gate 991*7c478bd9Sstevel@tonic-gate aiov.iov_base = cbuf; 992*7c478bd9Sstevel@tonic-gate aiov.iov_len = bcount; 993*7c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, rwflag, NULL); 994*7c478bd9Sstevel@tonic-gate auio.uio_loffset = fileoff; 995*7c478bd9Sstevel@tonic-gate 996*7c478bd9Sstevel@tonic-gate /* 997*7c478bd9Sstevel@tonic-gate * Note: File size can never be greater than MAXOFFSET_T. 998*7c478bd9Sstevel@tonic-gate * If ever we start supporting 128 bit files the code 999*7c478bd9Sstevel@tonic-gate * similar to the one in pread at this place should be here. 1000*7c478bd9Sstevel@tonic-gate * Here we avoid the unnecessary VOP_GETATTR() when we 1001*7c478bd9Sstevel@tonic-gate * know that fileoff == MAXOFFSET_T implies that it is always 1002*7c478bd9Sstevel@tonic-gate * greater than or equal to file size. 1003*7c478bd9Sstevel@tonic-gate */ 1004*7c478bd9Sstevel@tonic-gate auio.uio_iov = &aiov; 1005*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = 1; 1006*7c478bd9Sstevel@tonic-gate auio.uio_resid = bcount; 1007*7c478bd9Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 1008*7c478bd9Sstevel@tonic-gate auio.uio_llimit = MAXOFFSET_T; 1009*7c478bd9Sstevel@tonic-gate auio.uio_fmode = fflag; 1010*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_CACHED; 1011*7c478bd9Sstevel@tonic-gate 1012*7c478bd9Sstevel@tonic-gate ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1013*7c478bd9Sstevel@tonic-gate 1014*7c478bd9Sstevel@tonic-gate /* If read sync is not asked for, filter sync flags */ 1015*7c478bd9Sstevel@tonic-gate if ((ioflag & FRSYNC) == 0) 1016*7c478bd9Sstevel@tonic-gate ioflag &= ~(FSYNC|FDSYNC); 1017*7c478bd9Sstevel@tonic-gate error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1018*7c478bd9Sstevel@tonic-gate bcount -= auio.uio_resid; 1019*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 1020*7c478bd9Sstevel@tonic-gate cp = CPU; 1021*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, sysread, 1); 1022*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1023*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 1024*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1025*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 1026*7c478bd9Sstevel@tonic-gate 1027*7c478bd9Sstevel@tonic-gate if (error == EINTR && bcount != 0) 1028*7c478bd9Sstevel@tonic-gate error = 0; 1029*7c478bd9Sstevel@tonic-gate out: 1030*7c478bd9Sstevel@tonic-gate if (in_crit) 1031*7c478bd9Sstevel@tonic-gate nbl_end_crit(vp); 1032*7c478bd9Sstevel@tonic-gate releasef(fdes); 1033*7c478bd9Sstevel@tonic-gate if (error) 1034*7c478bd9Sstevel@tonic-gate return (set_errno(error)); 1035*7c478bd9Sstevel@tonic-gate return (bcount); 1036*7c478bd9Sstevel@tonic-gate } 1037*7c478bd9Sstevel@tonic-gate 1038*7c478bd9Sstevel@tonic-gate /* 1039*7c478bd9Sstevel@tonic-gate * This syscall supplies 64-bit file offsets to 32-bit applications only. 1040*7c478bd9Sstevel@tonic-gate */ 1041*7c478bd9Sstevel@tonic-gate ssize32_t 1042*7c478bd9Sstevel@tonic-gate pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1043*7c478bd9Sstevel@tonic-gate uint32_t offset_2) 1044*7c478bd9Sstevel@tonic-gate { 1045*7c478bd9Sstevel@tonic-gate struct uio auio; 1046*7c478bd9Sstevel@tonic-gate struct iovec aiov; 1047*7c478bd9Sstevel@tonic-gate file_t *fp; 1048*7c478bd9Sstevel@tonic-gate register vnode_t *vp; 1049*7c478bd9Sstevel@tonic-gate struct cpu *cp; 1050*7c478bd9Sstevel@tonic-gate int fflag, ioflag, rwflag; 1051*7c478bd9Sstevel@tonic-gate ssize_t bcount; 1052*7c478bd9Sstevel@tonic-gate int error = 0; 1053*7c478bd9Sstevel@tonic-gate u_offset_t fileoff; 1054*7c478bd9Sstevel@tonic-gate int in_crit = 0; 1055*7c478bd9Sstevel@tonic-gate 1056*7c478bd9Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN) 1057*7c478bd9Sstevel@tonic-gate fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1058*7c478bd9Sstevel@tonic-gate #else 1059*7c478bd9Sstevel@tonic-gate fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1060*7c478bd9Sstevel@tonic-gate #endif 1061*7c478bd9Sstevel@tonic-gate 1062*7c478bd9Sstevel@tonic-gate if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1063*7c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 1064*7c478bd9Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) 1065*7c478bd9Sstevel@tonic-gate return (set_errno(EBADF)); 1066*7c478bd9Sstevel@tonic-gate if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1067*7c478bd9Sstevel@tonic-gate error = EBADF; 1068*7c478bd9Sstevel@tonic-gate goto out; 1069*7c478bd9Sstevel@tonic-gate } 1070*7c478bd9Sstevel@tonic-gate 1071*7c478bd9Sstevel@tonic-gate rwflag = 1; 1072*7c478bd9Sstevel@tonic-gate vp = fp->f_vnode; 1073*7c478bd9Sstevel@tonic-gate 1074*7c478bd9Sstevel@tonic-gate if (vp->v_type == VREG) { 1075*7c478bd9Sstevel@tonic-gate 1076*7c478bd9Sstevel@tonic-gate if (bcount == 0) 1077*7c478bd9Sstevel@tonic-gate goto out; 1078*7c478bd9Sstevel@tonic-gate 1079*7c478bd9Sstevel@tonic-gate /* 1080*7c478bd9Sstevel@tonic-gate * See comments in pwrite. 1081*7c478bd9Sstevel@tonic-gate */ 1082*7c478bd9Sstevel@tonic-gate if (fileoff > MAXOFFSET_T) { 1083*7c478bd9Sstevel@tonic-gate error = EINVAL; 1084*7c478bd9Sstevel@tonic-gate goto out; 1085*7c478bd9Sstevel@tonic-gate } 1086*7c478bd9Sstevel@tonic-gate if (fileoff >= curproc->p_fsz_ctl) { 1087*7c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 1088*7c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1089*7c478bd9Sstevel@tonic-gate curproc->p_rctls, curproc, RCA_SAFE); 1090*7c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 1091*7c478bd9Sstevel@tonic-gate error = EFBIG; 1092*7c478bd9Sstevel@tonic-gate goto out; 1093*7c478bd9Sstevel@tonic-gate } 1094*7c478bd9Sstevel@tonic-gate if (fileoff == MAXOFFSET_T) { 1095*7c478bd9Sstevel@tonic-gate error = EFBIG; 1096*7c478bd9Sstevel@tonic-gate goto out; 1097*7c478bd9Sstevel@tonic-gate } 1098*7c478bd9Sstevel@tonic-gate if (fileoff + bcount > MAXOFFSET_T) 1099*7c478bd9Sstevel@tonic-gate bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1100*7c478bd9Sstevel@tonic-gate } else if (vp->v_type == VFIFO) { 1101*7c478bd9Sstevel@tonic-gate error = ESPIPE; 1102*7c478bd9Sstevel@tonic-gate goto out; 1103*7c478bd9Sstevel@tonic-gate } 1104*7c478bd9Sstevel@tonic-gate 1105*7c478bd9Sstevel@tonic-gate /* 1106*7c478bd9Sstevel@tonic-gate * We have to enter the critical region before calling VOP_RWLOCK 1107*7c478bd9Sstevel@tonic-gate * to avoid a deadlock with ufs. 1108*7c478bd9Sstevel@tonic-gate */ 1109*7c478bd9Sstevel@tonic-gate if (nbl_need_check(vp)) { 1110*7c478bd9Sstevel@tonic-gate int svmand; 1111*7c478bd9Sstevel@tonic-gate 1112*7c478bd9Sstevel@tonic-gate nbl_start_crit(vp, RW_READER); 1113*7c478bd9Sstevel@tonic-gate in_crit = 1; 1114*7c478bd9Sstevel@tonic-gate error = nbl_svmand(vp, fp->f_cred, &svmand); 1115*7c478bd9Sstevel@tonic-gate if (error != 0) 1116*7c478bd9Sstevel@tonic-gate goto out; 1117*7c478bd9Sstevel@tonic-gate if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { 1118*7c478bd9Sstevel@tonic-gate error = EACCES; 1119*7c478bd9Sstevel@tonic-gate goto out; 1120*7c478bd9Sstevel@tonic-gate } 1121*7c478bd9Sstevel@tonic-gate } 1122*7c478bd9Sstevel@tonic-gate 1123*7c478bd9Sstevel@tonic-gate aiov.iov_base = cbuf; 1124*7c478bd9Sstevel@tonic-gate aiov.iov_len = bcount; 1125*7c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, rwflag, NULL); 1126*7c478bd9Sstevel@tonic-gate auio.uio_loffset = fileoff; 1127*7c478bd9Sstevel@tonic-gate auio.uio_iov = &aiov; 1128*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = 1; 1129*7c478bd9Sstevel@tonic-gate auio.uio_resid = bcount; 1130*7c478bd9Sstevel@tonic-gate auio.uio_segflg = UIO_USERSPACE; 1131*7c478bd9Sstevel@tonic-gate auio.uio_llimit = curproc->p_fsz_ctl; 1132*7c478bd9Sstevel@tonic-gate auio.uio_fmode = fflag; 1133*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_CACHED; 1134*7c478bd9Sstevel@tonic-gate 1135*7c478bd9Sstevel@tonic-gate ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1136*7c478bd9Sstevel@tonic-gate 1137*7c478bd9Sstevel@tonic-gate error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1138*7c478bd9Sstevel@tonic-gate bcount -= auio.uio_resid; 1139*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 1140*7c478bd9Sstevel@tonic-gate cp = CPU; 1141*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1142*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1143*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 1144*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1145*7c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, rwflag, NULL); 1146*7c478bd9Sstevel@tonic-gate 1147*7c478bd9Sstevel@tonic-gate if (error == EINTR && bcount != 0) 1148*7c478bd9Sstevel@tonic-gate error = 0; 1149*7c478bd9Sstevel@tonic-gate out: 1150*7c478bd9Sstevel@tonic-gate if (in_crit) 1151*7c478bd9Sstevel@tonic-gate nbl_end_crit(vp); 1152*7c478bd9Sstevel@tonic-gate releasef(fdes); 1153*7c478bd9Sstevel@tonic-gate if (error) 1154*7c478bd9Sstevel@tonic-gate return (set_errno(error)); 1155*7c478bd9Sstevel@tonic-gate return (bcount); 1156*7c478bd9Sstevel@tonic-gate } 1157*7c478bd9Sstevel@tonic-gate 1158*7c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL || _ILP32 */ 1159*7c478bd9Sstevel@tonic-gate 1160*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1161*7c478bd9Sstevel@tonic-gate /* 1162*7c478bd9Sstevel@tonic-gate * Tail-call elimination of xxx32() down to xxx() 1163*7c478bd9Sstevel@tonic-gate * 1164*7c478bd9Sstevel@tonic-gate * A number of xxx32 system calls take a len (or count) argument and 1165*7c478bd9Sstevel@tonic-gate * return a number in the range [0,len] or -1 on error. 1166*7c478bd9Sstevel@tonic-gate * Given an ssize32_t input len, the downcall xxx() will return 1167*7c478bd9Sstevel@tonic-gate * a 64-bit value that is -1 or in the range [0,len] which actually 1168*7c478bd9Sstevel@tonic-gate * is a proper return value for the xxx32 call. So even if the xxx32 1169*7c478bd9Sstevel@tonic-gate * calls can be considered as returning a ssize32_t, they are currently 1170*7c478bd9Sstevel@tonic-gate * declared as returning a ssize_t as this enables tail-call elimination. 1171*7c478bd9Sstevel@tonic-gate * 1172*7c478bd9Sstevel@tonic-gate * The cast of len (or count) to ssize32_t is needed to ensure we pass 1173*7c478bd9Sstevel@tonic-gate * down negative input values as such and let the downcall handle error 1174*7c478bd9Sstevel@tonic-gate * reporting. Functions covered by this comments are: 1175*7c478bd9Sstevel@tonic-gate * 1176*7c478bd9Sstevel@tonic-gate * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1177*7c478bd9Sstevel@tonic-gate * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1178*7c478bd9Sstevel@tonic-gate * readlink.c: readlink32. 1179*7c478bd9Sstevel@tonic-gate */ 1180*7c478bd9Sstevel@tonic-gate 1181*7c478bd9Sstevel@tonic-gate ssize_t 1182*7c478bd9Sstevel@tonic-gate read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1183*7c478bd9Sstevel@tonic-gate { 1184*7c478bd9Sstevel@tonic-gate return (read(fdes, 1185*7c478bd9Sstevel@tonic-gate (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1186*7c478bd9Sstevel@tonic-gate } 1187*7c478bd9Sstevel@tonic-gate 1188*7c478bd9Sstevel@tonic-gate ssize_t 1189*7c478bd9Sstevel@tonic-gate write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1190*7c478bd9Sstevel@tonic-gate { 1191*7c478bd9Sstevel@tonic-gate return (write(fdes, 1192*7c478bd9Sstevel@tonic-gate (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1193*7c478bd9Sstevel@tonic-gate } 1194*7c478bd9Sstevel@tonic-gate 1195*7c478bd9Sstevel@tonic-gate ssize_t 1196*7c478bd9Sstevel@tonic-gate pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1197*7c478bd9Sstevel@tonic-gate { 1198*7c478bd9Sstevel@tonic-gate return (pread(fdes, 1199*7c478bd9Sstevel@tonic-gate (void *)(uintptr_t)cbuf, (ssize32_t)count, 1200*7c478bd9Sstevel@tonic-gate (off_t)(uint32_t)offset)); 1201*7c478bd9Sstevel@tonic-gate } 1202*7c478bd9Sstevel@tonic-gate 1203*7c478bd9Sstevel@tonic-gate ssize_t 1204*7c478bd9Sstevel@tonic-gate pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1205*7c478bd9Sstevel@tonic-gate { 1206*7c478bd9Sstevel@tonic-gate return (pwrite(fdes, 1207*7c478bd9Sstevel@tonic-gate (void *)(uintptr_t)cbuf, (ssize32_t)count, 1208*7c478bd9Sstevel@tonic-gate (off_t)(uint32_t)offset)); 1209*7c478bd9Sstevel@tonic-gate } 1210*7c478bd9Sstevel@tonic-gate 1211*7c478bd9Sstevel@tonic-gate ssize_t 1212*7c478bd9Sstevel@tonic-gate readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1213*7c478bd9Sstevel@tonic-gate { 1214*7c478bd9Sstevel@tonic-gate return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1215*7c478bd9Sstevel@tonic-gate } 1216*7c478bd9Sstevel@tonic-gate 1217*7c478bd9Sstevel@tonic-gate ssize_t 1218*7c478bd9Sstevel@tonic-gate writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1219*7c478bd9Sstevel@tonic-gate { 1220*7c478bd9Sstevel@tonic-gate return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1221*7c478bd9Sstevel@tonic-gate } 1222*7c478bd9Sstevel@tonic-gate 1223*7c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1224