1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Portions of this source code were derived from Berkeley 4.3 BSD 32 * under license from the Regents of the University of California. 33 */ 34 35 #pragma ident "%Z%%M% %I% %E% SMI" 36 37 #include <sys/param.h> 38 #include <sys/isa_defs.h> 39 #include <sys/types.h> 40 #include <sys/inttypes.h> 41 #include <sys/sysmacros.h> 42 #include <sys/cred.h> 43 #include <sys/user.h> 44 #include <sys/systm.h> 45 #include <sys/errno.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/proc.h> 49 #include <sys/cpuvar.h> 50 #include <sys/uio.h> 51 #include <sys/ioreq.h> 52 #include <sys/debug.h> 53 #include <sys/rctl.h> 54 #include <sys/nbmlock.h> 55 56 #define COPYOUT_MIN_SIZE (1<<17) /* 128K */ 57 58 static size_t copyout_min_size = COPYOUT_MIN_SIZE; 59 60 /* 61 * read, write, pread, pwrite, readv, and writev syscalls. 62 * 63 * 64-bit open: all open's are large file opens. 64 * Large Files: the behaviour of read depends on whether the fd 65 * corresponds to large open or not. 66 * 32-bit open: FOFFMAX flag not set. 67 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 68 * EOVERFLOW if count is non-zero and if size of file 69 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 70 * at >= MAXOFF32_T returns EOF. 71 */ 72 73 /* 74 * Native system call 75 */ 76 ssize_t 77 read(int fdes, void *cbuf, size_t count) 78 { 79 struct uio auio; 80 struct iovec aiov; 81 file_t *fp; 82 register vnode_t *vp; 83 struct cpu *cp; 84 int fflag, ioflag, rwflag; 85 ssize_t cnt, bcount; 86 int error = 0; 87 u_offset_t fileoff; 88 int in_crit = 0; 89 90 if ((cnt = (ssize_t)count) < 0) 91 return (set_errno(EINVAL)); 92 if ((fp = getf(fdes)) == NULL) 93 return (set_errno(EBADF)); 94 if (((fflag = fp->f_flag) & FREAD) == 0) { 95 error = EBADF; 96 goto out; 97 } 98 vp = fp->f_vnode; 99 100 if (vp->v_type == VREG && cnt == 0) { 101 goto out; 102 } 103 104 rwflag = 0; 105 aiov.iov_base = cbuf; 106 aiov.iov_len = cnt; 107 108 /* 109 * We have to enter the critical region before calling VOP_RWLOCK 110 * to avoid a deadlock with write() calls. 111 */ 112 if (nbl_need_check(vp)) { 113 int svmand; 114 115 nbl_start_crit(vp, RW_READER); 116 in_crit = 1; 117 error = nbl_svmand(vp, fp->f_cred, &svmand); 118 if (error != 0) 119 goto out; 120 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand)) { 121 error = EACCES; 122 goto out; 123 } 124 } 125 126 (void) VOP_RWLOCK(vp, rwflag, NULL); 127 128 /* 129 * We do the following checks inside VOP_RWLOCK so as to 130 * prevent file size from changing while these checks are 131 * being done. Also, we load fp's offset to the local 132 * variable fileoff because we can have a parallel lseek 133 * going on (f_offset is not protected by any lock) which 134 * could change f_offset. We need to see the value only 135 * once here and take a decision. Seeing it more than once 136 * can lead to incorrect functionality. 137 */ 138 139 fileoff = (u_offset_t)fp->f_offset; 140 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 141 struct vattr va; 142 va.va_mask = AT_SIZE; 143 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 144 VOP_RWUNLOCK(vp, rwflag, NULL); 145 goto out; 146 } 147 if (fileoff >= va.va_size) { 148 cnt = 0; 149 VOP_RWUNLOCK(vp, rwflag, NULL); 150 goto out; 151 } else { 152 error = EOVERFLOW; 153 VOP_RWUNLOCK(vp, rwflag, NULL); 154 goto out; 155 } 156 } 157 if ((vp->v_type == VREG) && 158 (fileoff + cnt > OFFSET_MAX(fp))) { 159 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 160 } 161 auio.uio_loffset = fileoff; 162 auio.uio_iov = &aiov; 163 auio.uio_iovcnt = 1; 164 auio.uio_resid = bcount = cnt; 165 auio.uio_segflg = UIO_USERSPACE; 166 auio.uio_llimit = MAXOFFSET_T; 167 auio.uio_fmode = fflag; 168 /* 169 * Only use bypass caches when the count is large enough 170 */ 171 if (bcount < copyout_min_size) 172 auio.uio_extflg = UIO_COPY_CACHED; 173 else 174 auio.uio_extflg = UIO_COPY_DEFAULT; 175 176 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 177 178 /* If read sync is not asked for, filter sync flags */ 179 if ((ioflag & FRSYNC) == 0) 180 ioflag &= ~(FSYNC|FDSYNC); 181 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 182 cnt -= auio.uio_resid; 183 CPU_STATS_ENTER_K(); 184 cp = CPU; 185 CPU_STATS_ADDQ(cp, sys, sysread, 1); 186 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 187 CPU_STATS_EXIT_K(); 188 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 189 190 if (vp->v_type == VFIFO) /* Backward compatibility */ 191 fp->f_offset = cnt; 192 else if (((fp->f_flag & FAPPEND) == 0) || 193 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 194 fp->f_offset = auio.uio_loffset; 195 VOP_RWUNLOCK(vp, rwflag, NULL); 196 197 if (error == EINTR && cnt != 0) 198 error = 0; 199 out: 200 if (in_crit) 201 nbl_end_crit(vp); 202 releasef(fdes); 203 if (error) 204 return (set_errno(error)); 205 return (cnt); 206 } 207 208 /* 209 * Native system call 210 */ 211 ssize_t 212 write(int fdes, void *cbuf, size_t count) 213 { 214 struct uio auio; 215 struct iovec aiov; 216 file_t *fp; 217 register vnode_t *vp; 218 struct cpu *cp; 219 int fflag, ioflag, rwflag; 220 ssize_t cnt, bcount; 221 int error = 0; 222 u_offset_t fileoff; 223 int in_crit = 0; 224 225 if ((cnt = (ssize_t)count) < 0) 226 return (set_errno(EINVAL)); 227 if ((fp = getf(fdes)) == NULL) 228 return (set_errno(EBADF)); 229 if (((fflag = fp->f_flag) & FWRITE) == 0) { 230 error = EBADF; 231 goto out; 232 } 233 vp = fp->f_vnode; 234 235 if (vp->v_type == VREG && cnt == 0) { 236 goto out; 237 } 238 239 rwflag = 1; 240 aiov.iov_base = cbuf; 241 aiov.iov_len = cnt; 242 243 /* 244 * We have to enter the critical region before calling VOP_RWLOCK 245 * to avoid a deadlock with ufs. 246 */ 247 if (nbl_need_check(vp)) { 248 int svmand; 249 250 nbl_start_crit(vp, RW_READER); 251 in_crit = 1; 252 error = nbl_svmand(vp, fp->f_cred, &svmand); 253 if (error != 0) 254 goto out; 255 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand)) { 256 error = EACCES; 257 goto out; 258 } 259 } 260 261 (void) VOP_RWLOCK(vp, rwflag, NULL); 262 263 fileoff = fp->f_offset; 264 if (vp->v_type == VREG) { 265 266 /* 267 * We raise psignal if write for >0 bytes causes 268 * it to exceed the ulimit. 269 */ 270 if (fileoff >= curproc->p_fsz_ctl) { 271 VOP_RWUNLOCK(vp, rwflag, NULL); 272 273 mutex_enter(&curproc->p_lock); 274 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 275 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 276 mutex_exit(&curproc->p_lock); 277 278 error = EFBIG; 279 goto out; 280 } 281 /* 282 * We return EFBIG if write is done at an offset 283 * greater than the offset maximum for this file structure. 284 */ 285 286 if (fileoff >= OFFSET_MAX(fp)) { 287 VOP_RWUNLOCK(vp, rwflag, NULL); 288 error = EFBIG; 289 goto out; 290 } 291 /* 292 * Limit the bytes to be written upto offset maximum for 293 * this open file structure. 294 */ 295 if (fileoff + cnt > OFFSET_MAX(fp)) 296 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 297 } 298 auio.uio_loffset = fileoff; 299 auio.uio_iov = &aiov; 300 auio.uio_iovcnt = 1; 301 auio.uio_resid = bcount = cnt; 302 auio.uio_segflg = UIO_USERSPACE; 303 auio.uio_llimit = curproc->p_fsz_ctl; 304 auio.uio_fmode = fflag; 305 auio.uio_extflg = UIO_COPY_DEFAULT; 306 307 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 308 309 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 310 cnt -= auio.uio_resid; 311 CPU_STATS_ENTER_K(); 312 cp = CPU; 313 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 314 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 315 CPU_STATS_EXIT_K(); 316 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 317 318 if (vp->v_type == VFIFO) /* Backward compatibility */ 319 fp->f_offset = cnt; 320 else if (((fp->f_flag & FAPPEND) == 0) || 321 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 322 fp->f_offset = auio.uio_loffset; 323 VOP_RWUNLOCK(vp, rwflag, NULL); 324 325 if (error == EINTR && cnt != 0) 326 error = 0; 327 out: 328 if (in_crit) 329 nbl_end_crit(vp); 330 releasef(fdes); 331 if (error) 332 return (set_errno(error)); 333 return (cnt); 334 } 335 336 ssize_t 337 pread(int fdes, void *cbuf, size_t count, off_t offset) 338 { 339 struct uio auio; 340 struct iovec aiov; 341 file_t *fp; 342 register vnode_t *vp; 343 struct cpu *cp; 344 int fflag, ioflag, rwflag; 345 ssize_t bcount; 346 int error = 0; 347 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 348 #ifdef _SYSCALL32_IMPL 349 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 350 MAXOFF32_T : MAXOFFSET_T; 351 #else 352 const u_offset_t maxoff = MAXOFF32_T; 353 #endif 354 int in_crit = 0; 355 356 if ((bcount = (ssize_t)count) < 0) 357 return (set_errno(EINVAL)); 358 359 if ((fp = getf(fdes)) == NULL) 360 return (set_errno(EBADF)); 361 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 362 error = EBADF; 363 goto out; 364 } 365 366 rwflag = 0; 367 vp = fp->f_vnode; 368 369 if (vp->v_type == VREG) { 370 371 if (bcount == 0) 372 goto out; 373 374 /* 375 * Return EINVAL if an invalid offset comes to pread. 376 * Negative offset from user will cause this error. 377 */ 378 379 if (fileoff > maxoff) { 380 error = EINVAL; 381 goto out; 382 } 383 /* 384 * Limit offset such that we don't read or write 385 * a file beyond the maximum offset representable in 386 * an off_t structure. 387 */ 388 if (fileoff + bcount > maxoff) 389 bcount = (ssize_t)((offset_t)maxoff - fileoff); 390 } else if (vp->v_type == VFIFO) { 391 error = ESPIPE; 392 goto out; 393 } 394 395 /* 396 * We have to enter the critical region before calling VOP_RWLOCK 397 * to avoid a deadlock with ufs. 398 */ 399 if (nbl_need_check(vp)) { 400 int svmand; 401 402 nbl_start_crit(vp, RW_READER); 403 in_crit = 1; 404 error = nbl_svmand(vp, fp->f_cred, &svmand); 405 if (error != 0) 406 goto out; 407 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { 408 error = EACCES; 409 goto out; 410 } 411 } 412 413 aiov.iov_base = cbuf; 414 aiov.iov_len = bcount; 415 (void) VOP_RWLOCK(vp, rwflag, NULL); 416 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 417 struct vattr va; 418 va.va_mask = AT_SIZE; 419 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 420 VOP_RWUNLOCK(vp, rwflag, NULL); 421 goto out; 422 } 423 VOP_RWUNLOCK(vp, rwflag, NULL); 424 425 /* 426 * We have to return EOF if fileoff is >= file size. 427 */ 428 if (fileoff >= va.va_size) { 429 bcount = 0; 430 goto out; 431 } 432 433 /* 434 * File is greater than or equal to maxoff and therefore 435 * we return EOVERFLOW. 436 */ 437 error = EOVERFLOW; 438 goto out; 439 } 440 auio.uio_loffset = fileoff; 441 auio.uio_iov = &aiov; 442 auio.uio_iovcnt = 1; 443 auio.uio_resid = bcount; 444 auio.uio_segflg = UIO_USERSPACE; 445 auio.uio_llimit = MAXOFFSET_T; 446 auio.uio_fmode = fflag; 447 auio.uio_extflg = UIO_COPY_CACHED; 448 449 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 450 451 /* If read sync is not asked for, filter sync flags */ 452 if ((ioflag & FRSYNC) == 0) 453 ioflag &= ~(FSYNC|FDSYNC); 454 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 455 bcount -= auio.uio_resid; 456 CPU_STATS_ENTER_K(); 457 cp = CPU; 458 CPU_STATS_ADDQ(cp, sys, sysread, 1); 459 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 460 CPU_STATS_EXIT_K(); 461 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 462 VOP_RWUNLOCK(vp, rwflag, NULL); 463 464 if (error == EINTR && bcount != 0) 465 error = 0; 466 out: 467 if (in_crit) 468 nbl_end_crit(vp); 469 releasef(fdes); 470 if (error) 471 return (set_errno(error)); 472 return (bcount); 473 } 474 475 ssize_t 476 pwrite(int fdes, void *cbuf, size_t count, off_t offset) 477 { 478 struct uio auio; 479 struct iovec aiov; 480 file_t *fp; 481 register vnode_t *vp; 482 struct cpu *cp; 483 int fflag, ioflag, rwflag; 484 ssize_t bcount; 485 int error = 0; 486 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 487 #ifdef _SYSCALL32_IMPL 488 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 489 MAXOFF32_T : MAXOFFSET_T; 490 #else 491 const u_offset_t maxoff = MAXOFF32_T; 492 #endif 493 int in_crit = 0; 494 495 if ((bcount = (ssize_t)count) < 0) 496 return (set_errno(EINVAL)); 497 if ((fp = getf(fdes)) == NULL) 498 return (set_errno(EBADF)); 499 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 500 error = EBADF; 501 goto out; 502 } 503 504 rwflag = 1; 505 vp = fp->f_vnode; 506 507 if (vp->v_type == VREG) { 508 509 if (bcount == 0) 510 goto out; 511 512 /* 513 * return EINVAL for offsets that cannot be 514 * represented in an off_t. 515 */ 516 if (fileoff > maxoff) { 517 error = EINVAL; 518 goto out; 519 } 520 /* 521 * Take appropriate action if we are trying to write above the 522 * resource limit. 523 */ 524 if (fileoff >= curproc->p_fsz_ctl) { 525 mutex_enter(&curproc->p_lock); 526 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 527 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 528 mutex_exit(&curproc->p_lock); 529 530 error = EFBIG; 531 goto out; 532 } 533 /* 534 * Don't allow pwrite to cause file sizes to exceed 535 * maxoff. 536 */ 537 if (fileoff == maxoff) { 538 error = EFBIG; 539 goto out; 540 } 541 if (fileoff + count > maxoff) 542 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 543 } else if (vp->v_type == VFIFO) { 544 error = ESPIPE; 545 goto out; 546 } 547 548 /* 549 * We have to enter the critical region before calling VOP_RWLOCK 550 * to avoid a deadlock with ufs. 551 */ 552 if (nbl_need_check(vp)) { 553 int svmand; 554 555 nbl_start_crit(vp, RW_READER); 556 in_crit = 1; 557 error = nbl_svmand(vp, fp->f_cred, &svmand); 558 if (error != 0) 559 goto out; 560 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { 561 error = EACCES; 562 goto out; 563 } 564 } 565 566 aiov.iov_base = cbuf; 567 aiov.iov_len = bcount; 568 (void) VOP_RWLOCK(vp, rwflag, NULL); 569 auio.uio_loffset = fileoff; 570 auio.uio_iov = &aiov; 571 auio.uio_iovcnt = 1; 572 auio.uio_resid = bcount; 573 auio.uio_segflg = UIO_USERSPACE; 574 auio.uio_llimit = curproc->p_fsz_ctl; 575 auio.uio_fmode = fflag; 576 auio.uio_extflg = UIO_COPY_CACHED; 577 578 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 579 580 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 581 bcount -= auio.uio_resid; 582 CPU_STATS_ENTER_K(); 583 cp = CPU; 584 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 585 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 586 CPU_STATS_EXIT_K(); 587 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 588 VOP_RWUNLOCK(vp, rwflag, NULL); 589 590 if (error == EINTR && bcount != 0) 591 error = 0; 592 out: 593 if (in_crit) 594 nbl_end_crit(vp); 595 releasef(fdes); 596 if (error) 597 return (set_errno(error)); 598 return (bcount); 599 } 600 601 /* 602 * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... 603 * XXX -- However, SVVS expects readv() and writev() to fail if 604 * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), 605 * XXX -- so I guess that's the "interface". 606 */ 607 #define DEF_IOV_MAX 16 608 609 ssize_t 610 readv(int fdes, struct iovec *iovp, int iovcnt) 611 { 612 struct uio auio; 613 struct iovec aiov[DEF_IOV_MAX]; 614 file_t *fp; 615 register vnode_t *vp; 616 struct cpu *cp; 617 int fflag, ioflag, rwflag; 618 ssize_t count, bcount; 619 int error = 0; 620 int i; 621 u_offset_t fileoff; 622 int in_crit = 0; 623 624 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 625 return (set_errno(EINVAL)); 626 627 #ifdef _SYSCALL32_IMPL 628 /* 629 * 32-bit callers need to have their iovec expanded, 630 * while ensuring that they can't move more than 2Gbytes 631 * of data in a single call. 632 */ 633 if (get_udatamodel() == DATAMODEL_ILP32) { 634 struct iovec32 aiov32[DEF_IOV_MAX]; 635 ssize32_t count32; 636 637 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 638 return (set_errno(EFAULT)); 639 640 count32 = 0; 641 for (i = 0; i < iovcnt; i++) { 642 ssize32_t iovlen32 = aiov32[i].iov_len; 643 count32 += iovlen32; 644 if (iovlen32 < 0 || count32 < 0) 645 return (set_errno(EINVAL)); 646 aiov[i].iov_len = iovlen32; 647 aiov[i].iov_base = 648 (caddr_t)(uintptr_t)aiov32[i].iov_base; 649 } 650 } else 651 #endif 652 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 653 return (set_errno(EFAULT)); 654 655 count = 0; 656 for (i = 0; i < iovcnt; i++) { 657 ssize_t iovlen = aiov[i].iov_len; 658 count += iovlen; 659 if (iovlen < 0 || count < 0) 660 return (set_errno(EINVAL)); 661 } 662 if ((fp = getf(fdes)) == NULL) 663 return (set_errno(EBADF)); 664 if (((fflag = fp->f_flag) & FREAD) == 0) { 665 error = EBADF; 666 goto out; 667 } 668 vp = fp->f_vnode; 669 if (vp->v_type == VREG && count == 0) { 670 goto out; 671 } 672 673 rwflag = 0; 674 675 /* 676 * We have to enter the critical region before calling VOP_RWLOCK 677 * to avoid a deadlock with ufs. 678 */ 679 if (nbl_need_check(vp)) { 680 int svmand; 681 682 nbl_start_crit(vp, RW_READER); 683 in_crit = 1; 684 error = nbl_svmand(vp, fp->f_cred, &svmand); 685 if (error != 0) 686 goto out; 687 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand)) { 688 error = EACCES; 689 goto out; 690 } 691 } 692 693 (void) VOP_RWLOCK(vp, rwflag, NULL); 694 fileoff = fp->f_offset; 695 696 /* 697 * Behaviour is same as read. Please see comments in read. 698 */ 699 700 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 701 struct vattr va; 702 va.va_mask = AT_SIZE; 703 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 704 VOP_RWUNLOCK(vp, rwflag, NULL); 705 goto out; 706 } 707 if (fileoff >= va.va_size) { 708 VOP_RWUNLOCK(vp, rwflag, NULL); 709 count = 0; 710 goto out; 711 } else { 712 VOP_RWUNLOCK(vp, rwflag, NULL); 713 error = EOVERFLOW; 714 goto out; 715 } 716 } 717 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 718 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 719 } 720 auio.uio_loffset = fileoff; 721 auio.uio_iov = aiov; 722 auio.uio_iovcnt = iovcnt; 723 auio.uio_resid = bcount = count; 724 auio.uio_segflg = UIO_USERSPACE; 725 auio.uio_llimit = MAXOFFSET_T; 726 auio.uio_fmode = fflag; 727 if (bcount < copyout_min_size) 728 auio.uio_extflg = UIO_COPY_CACHED; 729 else 730 auio.uio_extflg = UIO_COPY_DEFAULT; 731 732 733 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 734 735 /* If read sync is not asked for, filter sync flags */ 736 if ((ioflag & FRSYNC) == 0) 737 ioflag &= ~(FSYNC|FDSYNC); 738 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 739 count -= auio.uio_resid; 740 CPU_STATS_ENTER_K(); 741 cp = CPU; 742 CPU_STATS_ADDQ(cp, sys, sysread, 1); 743 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 744 CPU_STATS_EXIT_K(); 745 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 746 747 if (vp->v_type == VFIFO) /* Backward compatibility */ 748 fp->f_offset = count; 749 else if (((fp->f_flag & FAPPEND) == 0) || 750 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 751 fp->f_offset = auio.uio_loffset; 752 753 VOP_RWUNLOCK(vp, rwflag, NULL); 754 755 if (error == EINTR && count != 0) 756 error = 0; 757 out: 758 if (in_crit) 759 nbl_end_crit(vp); 760 releasef(fdes); 761 if (error) 762 return (set_errno(error)); 763 return (count); 764 } 765 766 ssize_t 767 writev(int fdes, struct iovec *iovp, int iovcnt) 768 { 769 struct uio auio; 770 struct iovec aiov[DEF_IOV_MAX]; 771 file_t *fp; 772 register vnode_t *vp; 773 struct cpu *cp; 774 int fflag, ioflag, rwflag; 775 ssize_t count, bcount; 776 int error = 0; 777 int i; 778 u_offset_t fileoff; 779 int in_crit = 0; 780 781 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 782 return (set_errno(EINVAL)); 783 784 #ifdef _SYSCALL32_IMPL 785 /* 786 * 32-bit callers need to have their iovec expanded, 787 * while ensuring that they can't move more than 2Gbytes 788 * of data in a single call. 789 */ 790 if (get_udatamodel() == DATAMODEL_ILP32) { 791 struct iovec32 aiov32[DEF_IOV_MAX]; 792 ssize32_t count32; 793 794 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 795 return (set_errno(EFAULT)); 796 797 count32 = 0; 798 for (i = 0; i < iovcnt; i++) { 799 ssize32_t iovlen = aiov32[i].iov_len; 800 count32 += iovlen; 801 if (iovlen < 0 || count32 < 0) 802 return (set_errno(EINVAL)); 803 aiov[i].iov_len = iovlen; 804 aiov[i].iov_base = 805 (caddr_t)(uintptr_t)aiov32[i].iov_base; 806 } 807 } else 808 #endif 809 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 810 return (set_errno(EFAULT)); 811 812 count = 0; 813 for (i = 0; i < iovcnt; i++) { 814 ssize_t iovlen = aiov[i].iov_len; 815 count += iovlen; 816 if (iovlen < 0 || count < 0) 817 return (set_errno(EINVAL)); 818 } 819 if ((fp = getf(fdes)) == NULL) 820 return (set_errno(EBADF)); 821 if (((fflag = fp->f_flag) & FWRITE) == 0) { 822 error = EBADF; 823 goto out; 824 } 825 vp = fp->f_vnode; 826 if (vp->v_type == VREG && count == 0) { 827 goto out; 828 } 829 830 rwflag = 1; 831 832 /* 833 * We have to enter the critical region before calling VOP_RWLOCK 834 * to avoid a deadlock with ufs. 835 */ 836 if (nbl_need_check(vp)) { 837 int svmand; 838 839 nbl_start_crit(vp, RW_READER); 840 in_crit = 1; 841 error = nbl_svmand(vp, fp->f_cred, &svmand); 842 if (error != 0) 843 goto out; 844 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand)) { 845 error = EACCES; 846 goto out; 847 } 848 } 849 850 (void) VOP_RWLOCK(vp, rwflag, NULL); 851 852 fileoff = fp->f_offset; 853 854 /* 855 * Behaviour is same as write. Please see comments for write. 856 */ 857 858 if (vp->v_type == VREG) { 859 if (fileoff >= curproc->p_fsz_ctl) { 860 VOP_RWUNLOCK(vp, rwflag, NULL); 861 mutex_enter(&curproc->p_lock); 862 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 863 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 864 mutex_exit(&curproc->p_lock); 865 error = EFBIG; 866 goto out; 867 } 868 if (fileoff >= OFFSET_MAX(fp)) { 869 VOP_RWUNLOCK(vp, rwflag, NULL); 870 error = EFBIG; 871 goto out; 872 } 873 if (fileoff + count > OFFSET_MAX(fp)) 874 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 875 } 876 auio.uio_loffset = fileoff; 877 auio.uio_iov = aiov; 878 auio.uio_iovcnt = iovcnt; 879 auio.uio_resid = bcount = count; 880 auio.uio_segflg = UIO_USERSPACE; 881 auio.uio_llimit = curproc->p_fsz_ctl; 882 auio.uio_fmode = fflag; 883 auio.uio_extflg = UIO_COPY_DEFAULT; 884 885 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 886 887 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 888 count -= auio.uio_resid; 889 CPU_STATS_ENTER_K(); 890 cp = CPU; 891 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 892 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 893 CPU_STATS_EXIT_K(); 894 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 895 896 if (vp->v_type == VFIFO) /* Backward compatibility */ 897 fp->f_offset = count; 898 else if (((fp->f_flag & FAPPEND) == 0) || 899 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 900 fp->f_offset = auio.uio_loffset; 901 VOP_RWUNLOCK(vp, rwflag, NULL); 902 903 if (error == EINTR && count != 0) 904 error = 0; 905 out: 906 if (in_crit) 907 nbl_end_crit(vp); 908 releasef(fdes); 909 if (error) 910 return (set_errno(error)); 911 return (count); 912 } 913 914 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 915 916 /* 917 * This syscall supplies 64-bit file offsets to 32-bit applications only. 918 */ 919 ssize32_t 920 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 921 uint32_t offset_2) 922 { 923 struct uio auio; 924 struct iovec aiov; 925 file_t *fp; 926 register vnode_t *vp; 927 struct cpu *cp; 928 int fflag, ioflag, rwflag; 929 ssize_t bcount; 930 int error = 0; 931 u_offset_t fileoff; 932 int in_crit = 0; 933 934 #if defined(_LITTLE_ENDIAN) 935 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 936 #else 937 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 938 #endif 939 940 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 941 return (set_errno(EINVAL)); 942 943 if ((fp = getf(fdes)) == NULL) 944 return (set_errno(EBADF)); 945 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 946 error = EBADF; 947 goto out; 948 } 949 950 rwflag = 0; 951 vp = fp->f_vnode; 952 953 if (vp->v_type == VREG) { 954 955 if (bcount == 0) 956 goto out; 957 958 /* 959 * Same as pread. See comments in pread. 960 */ 961 962 if (fileoff > MAXOFFSET_T) { 963 error = EINVAL; 964 goto out; 965 } 966 if (fileoff + bcount > MAXOFFSET_T) 967 bcount = (ssize_t)(MAXOFFSET_T - fileoff); 968 } else if (vp->v_type == VFIFO) { 969 error = ESPIPE; 970 goto out; 971 } 972 973 /* 974 * We have to enter the critical region before calling VOP_RWLOCK 975 * to avoid a deadlock with ufs. 976 */ 977 if (nbl_need_check(vp)) { 978 int svmand; 979 980 nbl_start_crit(vp, RW_READER); 981 in_crit = 1; 982 error = nbl_svmand(vp, fp->f_cred, &svmand); 983 if (error != 0) 984 goto out; 985 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { 986 error = EACCES; 987 goto out; 988 } 989 } 990 991 aiov.iov_base = cbuf; 992 aiov.iov_len = bcount; 993 (void) VOP_RWLOCK(vp, rwflag, NULL); 994 auio.uio_loffset = fileoff; 995 996 /* 997 * Note: File size can never be greater than MAXOFFSET_T. 998 * If ever we start supporting 128 bit files the code 999 * similar to the one in pread at this place should be here. 1000 * Here we avoid the unnecessary VOP_GETATTR() when we 1001 * know that fileoff == MAXOFFSET_T implies that it is always 1002 * greater than or equal to file size. 1003 */ 1004 auio.uio_iov = &aiov; 1005 auio.uio_iovcnt = 1; 1006 auio.uio_resid = bcount; 1007 auio.uio_segflg = UIO_USERSPACE; 1008 auio.uio_llimit = MAXOFFSET_T; 1009 auio.uio_fmode = fflag; 1010 auio.uio_extflg = UIO_COPY_CACHED; 1011 1012 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1013 1014 /* If read sync is not asked for, filter sync flags */ 1015 if ((ioflag & FRSYNC) == 0) 1016 ioflag &= ~(FSYNC|FDSYNC); 1017 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1018 bcount -= auio.uio_resid; 1019 CPU_STATS_ENTER_K(); 1020 cp = CPU; 1021 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1022 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1023 CPU_STATS_EXIT_K(); 1024 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1025 VOP_RWUNLOCK(vp, rwflag, NULL); 1026 1027 if (error == EINTR && bcount != 0) 1028 error = 0; 1029 out: 1030 if (in_crit) 1031 nbl_end_crit(vp); 1032 releasef(fdes); 1033 if (error) 1034 return (set_errno(error)); 1035 return (bcount); 1036 } 1037 1038 /* 1039 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1040 */ 1041 ssize32_t 1042 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1043 uint32_t offset_2) 1044 { 1045 struct uio auio; 1046 struct iovec aiov; 1047 file_t *fp; 1048 register vnode_t *vp; 1049 struct cpu *cp; 1050 int fflag, ioflag, rwflag; 1051 ssize_t bcount; 1052 int error = 0; 1053 u_offset_t fileoff; 1054 int in_crit = 0; 1055 1056 #if defined(_LITTLE_ENDIAN) 1057 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1058 #else 1059 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1060 #endif 1061 1062 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1063 return (set_errno(EINVAL)); 1064 if ((fp = getf(fdes)) == NULL) 1065 return (set_errno(EBADF)); 1066 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1067 error = EBADF; 1068 goto out; 1069 } 1070 1071 rwflag = 1; 1072 vp = fp->f_vnode; 1073 1074 if (vp->v_type == VREG) { 1075 1076 if (bcount == 0) 1077 goto out; 1078 1079 /* 1080 * See comments in pwrite. 1081 */ 1082 if (fileoff > MAXOFFSET_T) { 1083 error = EINVAL; 1084 goto out; 1085 } 1086 if (fileoff >= curproc->p_fsz_ctl) { 1087 mutex_enter(&curproc->p_lock); 1088 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1089 curproc->p_rctls, curproc, RCA_SAFE); 1090 mutex_exit(&curproc->p_lock); 1091 error = EFBIG; 1092 goto out; 1093 } 1094 if (fileoff == MAXOFFSET_T) { 1095 error = EFBIG; 1096 goto out; 1097 } 1098 if (fileoff + bcount > MAXOFFSET_T) 1099 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1100 } else if (vp->v_type == VFIFO) { 1101 error = ESPIPE; 1102 goto out; 1103 } 1104 1105 /* 1106 * We have to enter the critical region before calling VOP_RWLOCK 1107 * to avoid a deadlock with ufs. 1108 */ 1109 if (nbl_need_check(vp)) { 1110 int svmand; 1111 1112 nbl_start_crit(vp, RW_READER); 1113 in_crit = 1; 1114 error = nbl_svmand(vp, fp->f_cred, &svmand); 1115 if (error != 0) 1116 goto out; 1117 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { 1118 error = EACCES; 1119 goto out; 1120 } 1121 } 1122 1123 aiov.iov_base = cbuf; 1124 aiov.iov_len = bcount; 1125 (void) VOP_RWLOCK(vp, rwflag, NULL); 1126 auio.uio_loffset = fileoff; 1127 auio.uio_iov = &aiov; 1128 auio.uio_iovcnt = 1; 1129 auio.uio_resid = bcount; 1130 auio.uio_segflg = UIO_USERSPACE; 1131 auio.uio_llimit = curproc->p_fsz_ctl; 1132 auio.uio_fmode = fflag; 1133 auio.uio_extflg = UIO_COPY_CACHED; 1134 1135 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1136 1137 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1138 bcount -= auio.uio_resid; 1139 CPU_STATS_ENTER_K(); 1140 cp = CPU; 1141 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1142 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1143 CPU_STATS_EXIT_K(); 1144 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1145 VOP_RWUNLOCK(vp, rwflag, NULL); 1146 1147 if (error == EINTR && bcount != 0) 1148 error = 0; 1149 out: 1150 if (in_crit) 1151 nbl_end_crit(vp); 1152 releasef(fdes); 1153 if (error) 1154 return (set_errno(error)); 1155 return (bcount); 1156 } 1157 1158 #endif /* _SYSCALL32_IMPL || _ILP32 */ 1159 1160 #ifdef _SYSCALL32_IMPL 1161 /* 1162 * Tail-call elimination of xxx32() down to xxx() 1163 * 1164 * A number of xxx32 system calls take a len (or count) argument and 1165 * return a number in the range [0,len] or -1 on error. 1166 * Given an ssize32_t input len, the downcall xxx() will return 1167 * a 64-bit value that is -1 or in the range [0,len] which actually 1168 * is a proper return value for the xxx32 call. So even if the xxx32 1169 * calls can be considered as returning a ssize32_t, they are currently 1170 * declared as returning a ssize_t as this enables tail-call elimination. 1171 * 1172 * The cast of len (or count) to ssize32_t is needed to ensure we pass 1173 * down negative input values as such and let the downcall handle error 1174 * reporting. Functions covered by this comments are: 1175 * 1176 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1177 * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1178 * readlink.c: readlink32. 1179 */ 1180 1181 ssize_t 1182 read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1183 { 1184 return (read(fdes, 1185 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1186 } 1187 1188 ssize_t 1189 write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1190 { 1191 return (write(fdes, 1192 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1193 } 1194 1195 ssize_t 1196 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1197 { 1198 return (pread(fdes, 1199 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1200 (off_t)(uint32_t)offset)); 1201 } 1202 1203 ssize_t 1204 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1205 { 1206 return (pwrite(fdes, 1207 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1208 (off_t)(uint32_t)offset)); 1209 } 1210 1211 ssize_t 1212 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1213 { 1214 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1215 } 1216 1217 ssize_t 1218 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1219 { 1220 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1221 } 1222 1223 #endif /* _SYSCALL32_IMPL */ 1224