1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/param.h> 37 #include <sys/isa_defs.h> 38 #include <sys/types.h> 39 #include <sys/inttypes.h> 40 #include <sys/sysmacros.h> 41 #include <sys/cred.h> 42 #include <sys/user.h> 43 #include <sys/systm.h> 44 #include <sys/errno.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/cpuvar.h> 49 #include <sys/uio.h> 50 #include <sys/debug.h> 51 #include <sys/rctl.h> 52 #include <sys/nbmlock.h> 53 54 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */ 55 56 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */ 57 58 /* 59 * read, write, pread, pwrite, readv, and writev syscalls. 60 * 61 * 64-bit open: all open's are large file opens. 62 * Large Files: the behaviour of read depends on whether the fd 63 * corresponds to large open or not. 64 * 32-bit open: FOFFMAX flag not set. 65 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 66 * EOVERFLOW if count is non-zero and if size of file 67 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 68 * at >= MAXOFF32_T returns EOF. 69 */ 70 71 /* 72 * Native system call 73 */ 74 ssize_t 75 read(int fdes, void *cbuf, size_t count) 76 { 77 struct uio auio; 78 struct iovec aiov; 79 file_t *fp; 80 register vnode_t *vp; 81 struct cpu *cp; 82 int fflag, ioflag, rwflag; 83 ssize_t cnt, bcount; 84 int error = 0; 85 u_offset_t fileoff; 86 int in_crit = 0; 87 88 if ((cnt = (ssize_t)count) < 0) 89 return (set_errno(EINVAL)); 90 if ((fp = getf(fdes)) == NULL) 91 return (set_errno(EBADF)); 92 if (((fflag = fp->f_flag) & FREAD) == 0) { 93 error = EBADF; 94 goto out; 95 } 96 vp = fp->f_vnode; 97 98 if (vp->v_type == VREG && cnt == 0) { 99 goto out; 100 } 101 102 rwflag = 0; 103 aiov.iov_base = cbuf; 104 aiov.iov_len = cnt; 105 106 /* 107 * We have to enter the critical region before calling VOP_RWLOCK 108 * to avoid a deadlock with write() calls. 109 */ 110 if (nbl_need_check(vp)) { 111 int svmand; 112 113 nbl_start_crit(vp, RW_READER); 114 in_crit = 1; 115 error = nbl_svmand(vp, fp->f_cred, &svmand); 116 if (error != 0) 117 goto out; 118 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand, 119 NULL)) { 120 error = EACCES; 121 goto out; 122 } 123 } 124 125 (void) VOP_RWLOCK(vp, rwflag, NULL); 126 127 /* 128 * We do the following checks inside VOP_RWLOCK so as to 129 * prevent file size from changing while these checks are 130 * being done. Also, we load fp's offset to the local 131 * variable fileoff because we can have a parallel lseek 132 * going on (f_offset is not protected by any lock) which 133 * could change f_offset. We need to see the value only 134 * once here and take a decision. Seeing it more than once 135 * can lead to incorrect functionality. 136 */ 137 138 fileoff = (u_offset_t)fp->f_offset; 139 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 140 struct vattr va; 141 va.va_mask = AT_SIZE; 142 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 143 VOP_RWUNLOCK(vp, rwflag, NULL); 144 goto out; 145 } 146 if (fileoff >= va.va_size) { 147 cnt = 0; 148 VOP_RWUNLOCK(vp, rwflag, NULL); 149 goto out; 150 } else { 151 error = EOVERFLOW; 152 VOP_RWUNLOCK(vp, rwflag, NULL); 153 goto out; 154 } 155 } 156 if ((vp->v_type == VREG) && 157 (fileoff + cnt > OFFSET_MAX(fp))) { 158 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 159 } 160 auio.uio_loffset = fileoff; 161 auio.uio_iov = &aiov; 162 auio.uio_iovcnt = 1; 163 auio.uio_resid = bcount = cnt; 164 auio.uio_segflg = UIO_USERSPACE; 165 auio.uio_llimit = MAXOFFSET_T; 166 auio.uio_fmode = fflag; 167 /* 168 * Only use bypass caches when the count is large enough 169 */ 170 if (bcount <= copyout_max_cached) 171 auio.uio_extflg = UIO_COPY_CACHED; 172 else 173 auio.uio_extflg = UIO_COPY_DEFAULT; 174 175 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 176 177 /* If read sync is not asked for, filter sync flags */ 178 if ((ioflag & FRSYNC) == 0) 179 ioflag &= ~(FSYNC|FDSYNC); 180 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 181 cnt -= auio.uio_resid; 182 CPU_STATS_ENTER_K(); 183 cp = CPU; 184 CPU_STATS_ADDQ(cp, sys, sysread, 1); 185 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 186 CPU_STATS_EXIT_K(); 187 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 188 189 if (vp->v_type == VFIFO) /* Backward compatibility */ 190 fp->f_offset = cnt; 191 else if (((fp->f_flag & FAPPEND) == 0) || 192 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 193 fp->f_offset = auio.uio_loffset; 194 VOP_RWUNLOCK(vp, rwflag, NULL); 195 196 if (error == EINTR && cnt != 0) 197 error = 0; 198 out: 199 if (in_crit) 200 nbl_end_crit(vp); 201 releasef(fdes); 202 if (error) 203 return (set_errno(error)); 204 return (cnt); 205 } 206 207 /* 208 * Native system call 209 */ 210 ssize_t 211 write(int fdes, void *cbuf, size_t count) 212 { 213 struct uio auio; 214 struct iovec aiov; 215 file_t *fp; 216 register vnode_t *vp; 217 struct cpu *cp; 218 int fflag, ioflag, rwflag; 219 ssize_t cnt, bcount; 220 int error = 0; 221 u_offset_t fileoff; 222 int in_crit = 0; 223 224 if ((cnt = (ssize_t)count) < 0) 225 return (set_errno(EINVAL)); 226 if ((fp = getf(fdes)) == NULL) 227 return (set_errno(EBADF)); 228 if (((fflag = fp->f_flag) & FWRITE) == 0) { 229 error = EBADF; 230 goto out; 231 } 232 vp = fp->f_vnode; 233 234 if (vp->v_type == VREG && cnt == 0) { 235 goto out; 236 } 237 238 rwflag = 1; 239 aiov.iov_base = cbuf; 240 aiov.iov_len = cnt; 241 242 /* 243 * We have to enter the critical region before calling VOP_RWLOCK 244 * to avoid a deadlock with ufs. 245 */ 246 if (nbl_need_check(vp)) { 247 int svmand; 248 249 nbl_start_crit(vp, RW_READER); 250 in_crit = 1; 251 error = nbl_svmand(vp, fp->f_cred, &svmand); 252 if (error != 0) 253 goto out; 254 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand, 255 NULL)) { 256 error = EACCES; 257 goto out; 258 } 259 } 260 261 (void) VOP_RWLOCK(vp, rwflag, NULL); 262 263 fileoff = fp->f_offset; 264 if (vp->v_type == VREG) { 265 266 /* 267 * We raise psignal if write for >0 bytes causes 268 * it to exceed the ulimit. 269 */ 270 if (fileoff >= curproc->p_fsz_ctl) { 271 VOP_RWUNLOCK(vp, rwflag, NULL); 272 273 mutex_enter(&curproc->p_lock); 274 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 275 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 276 mutex_exit(&curproc->p_lock); 277 278 error = EFBIG; 279 goto out; 280 } 281 /* 282 * We return EFBIG if write is done at an offset 283 * greater than the offset maximum for this file structure. 284 */ 285 286 if (fileoff >= OFFSET_MAX(fp)) { 287 VOP_RWUNLOCK(vp, rwflag, NULL); 288 error = EFBIG; 289 goto out; 290 } 291 /* 292 * Limit the bytes to be written upto offset maximum for 293 * this open file structure. 294 */ 295 if (fileoff + cnt > OFFSET_MAX(fp)) 296 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 297 } 298 auio.uio_loffset = fileoff; 299 auio.uio_iov = &aiov; 300 auio.uio_iovcnt = 1; 301 auio.uio_resid = bcount = cnt; 302 auio.uio_segflg = UIO_USERSPACE; 303 auio.uio_llimit = curproc->p_fsz_ctl; 304 auio.uio_fmode = fflag; 305 auio.uio_extflg = UIO_COPY_DEFAULT; 306 307 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 308 309 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 310 cnt -= auio.uio_resid; 311 CPU_STATS_ENTER_K(); 312 cp = CPU; 313 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 314 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 315 CPU_STATS_EXIT_K(); 316 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 317 318 if (vp->v_type == VFIFO) /* Backward compatibility */ 319 fp->f_offset = cnt; 320 else if (((fp->f_flag & FAPPEND) == 0) || 321 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 322 fp->f_offset = auio.uio_loffset; 323 VOP_RWUNLOCK(vp, rwflag, NULL); 324 325 if (error == EINTR && cnt != 0) 326 error = 0; 327 out: 328 if (in_crit) 329 nbl_end_crit(vp); 330 releasef(fdes); 331 if (error) 332 return (set_errno(error)); 333 return (cnt); 334 } 335 336 ssize_t 337 pread(int fdes, void *cbuf, size_t count, off_t offset) 338 { 339 struct uio auio; 340 struct iovec aiov; 341 file_t *fp; 342 register vnode_t *vp; 343 struct cpu *cp; 344 int fflag, ioflag, rwflag; 345 ssize_t bcount; 346 int error = 0; 347 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 348 #ifdef _SYSCALL32_IMPL 349 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 350 MAXOFF32_T : MAXOFFSET_T; 351 #else 352 const u_offset_t maxoff = MAXOFF32_T; 353 #endif 354 int in_crit = 0; 355 356 if ((bcount = (ssize_t)count) < 0) 357 return (set_errno(EINVAL)); 358 359 if ((fp = getf(fdes)) == NULL) 360 return (set_errno(EBADF)); 361 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 362 error = EBADF; 363 goto out; 364 } 365 366 rwflag = 0; 367 vp = fp->f_vnode; 368 369 if (vp->v_type == VREG) { 370 371 if (bcount == 0) 372 goto out; 373 374 /* 375 * Return EINVAL if an invalid offset comes to pread. 376 * Negative offset from user will cause this error. 377 */ 378 379 if (fileoff > maxoff) { 380 error = EINVAL; 381 goto out; 382 } 383 /* 384 * Limit offset such that we don't read or write 385 * a file beyond the maximum offset representable in 386 * an off_t structure. 387 */ 388 if (fileoff + bcount > maxoff) 389 bcount = (ssize_t)((offset_t)maxoff - fileoff); 390 } else if (vp->v_type == VFIFO) { 391 error = ESPIPE; 392 goto out; 393 } 394 395 /* 396 * We have to enter the critical region before calling VOP_RWLOCK 397 * to avoid a deadlock with ufs. 398 */ 399 if (nbl_need_check(vp)) { 400 int svmand; 401 402 nbl_start_crit(vp, RW_READER); 403 in_crit = 1; 404 error = nbl_svmand(vp, fp->f_cred, &svmand); 405 if (error != 0) 406 goto out; 407 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 408 NULL)) { 409 error = EACCES; 410 goto out; 411 } 412 } 413 414 aiov.iov_base = cbuf; 415 aiov.iov_len = bcount; 416 (void) VOP_RWLOCK(vp, rwflag, NULL); 417 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 418 struct vattr va; 419 va.va_mask = AT_SIZE; 420 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 421 VOP_RWUNLOCK(vp, rwflag, NULL); 422 goto out; 423 } 424 VOP_RWUNLOCK(vp, rwflag, NULL); 425 426 /* 427 * We have to return EOF if fileoff is >= file size. 428 */ 429 if (fileoff >= va.va_size) { 430 bcount = 0; 431 goto out; 432 } 433 434 /* 435 * File is greater than or equal to maxoff and therefore 436 * we return EOVERFLOW. 437 */ 438 error = EOVERFLOW; 439 goto out; 440 } 441 auio.uio_loffset = fileoff; 442 auio.uio_iov = &aiov; 443 auio.uio_iovcnt = 1; 444 auio.uio_resid = bcount; 445 auio.uio_segflg = UIO_USERSPACE; 446 auio.uio_llimit = MAXOFFSET_T; 447 auio.uio_fmode = fflag; 448 auio.uio_extflg = UIO_COPY_CACHED; 449 450 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 451 452 /* If read sync is not asked for, filter sync flags */ 453 if ((ioflag & FRSYNC) == 0) 454 ioflag &= ~(FSYNC|FDSYNC); 455 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 456 bcount -= auio.uio_resid; 457 CPU_STATS_ENTER_K(); 458 cp = CPU; 459 CPU_STATS_ADDQ(cp, sys, sysread, 1); 460 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 461 CPU_STATS_EXIT_K(); 462 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 463 VOP_RWUNLOCK(vp, rwflag, NULL); 464 465 if (error == EINTR && bcount != 0) 466 error = 0; 467 out: 468 if (in_crit) 469 nbl_end_crit(vp); 470 releasef(fdes); 471 if (error) 472 return (set_errno(error)); 473 return (bcount); 474 } 475 476 ssize_t 477 pwrite(int fdes, void *cbuf, size_t count, off_t offset) 478 { 479 struct uio auio; 480 struct iovec aiov; 481 file_t *fp; 482 register vnode_t *vp; 483 struct cpu *cp; 484 int fflag, ioflag, rwflag; 485 ssize_t bcount; 486 int error = 0; 487 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 488 #ifdef _SYSCALL32_IMPL 489 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 490 MAXOFF32_T : MAXOFFSET_T; 491 #else 492 const u_offset_t maxoff = MAXOFF32_T; 493 #endif 494 int in_crit = 0; 495 496 if ((bcount = (ssize_t)count) < 0) 497 return (set_errno(EINVAL)); 498 if ((fp = getf(fdes)) == NULL) 499 return (set_errno(EBADF)); 500 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 501 error = EBADF; 502 goto out; 503 } 504 505 rwflag = 1; 506 vp = fp->f_vnode; 507 508 if (vp->v_type == VREG) { 509 510 if (bcount == 0) 511 goto out; 512 513 /* 514 * return EINVAL for offsets that cannot be 515 * represented in an off_t. 516 */ 517 if (fileoff > maxoff) { 518 error = EINVAL; 519 goto out; 520 } 521 /* 522 * Take appropriate action if we are trying to write above the 523 * resource limit. 524 */ 525 if (fileoff >= curproc->p_fsz_ctl) { 526 mutex_enter(&curproc->p_lock); 527 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 528 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 529 mutex_exit(&curproc->p_lock); 530 531 error = EFBIG; 532 goto out; 533 } 534 /* 535 * Don't allow pwrite to cause file sizes to exceed 536 * maxoff. 537 */ 538 if (fileoff == maxoff) { 539 error = EFBIG; 540 goto out; 541 } 542 if (fileoff + count > maxoff) 543 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 544 } else if (vp->v_type == VFIFO) { 545 error = ESPIPE; 546 goto out; 547 } 548 549 /* 550 * We have to enter the critical region before calling VOP_RWLOCK 551 * to avoid a deadlock with ufs. 552 */ 553 if (nbl_need_check(vp)) { 554 int svmand; 555 556 nbl_start_crit(vp, RW_READER); 557 in_crit = 1; 558 error = nbl_svmand(vp, fp->f_cred, &svmand); 559 if (error != 0) 560 goto out; 561 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 562 NULL)) { 563 error = EACCES; 564 goto out; 565 } 566 } 567 568 aiov.iov_base = cbuf; 569 aiov.iov_len = bcount; 570 (void) VOP_RWLOCK(vp, rwflag, NULL); 571 auio.uio_loffset = fileoff; 572 auio.uio_iov = &aiov; 573 auio.uio_iovcnt = 1; 574 auio.uio_resid = bcount; 575 auio.uio_segflg = UIO_USERSPACE; 576 auio.uio_llimit = curproc->p_fsz_ctl; 577 auio.uio_fmode = fflag; 578 auio.uio_extflg = UIO_COPY_CACHED; 579 580 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 581 582 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 583 bcount -= auio.uio_resid; 584 CPU_STATS_ENTER_K(); 585 cp = CPU; 586 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 587 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 588 CPU_STATS_EXIT_K(); 589 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 590 VOP_RWUNLOCK(vp, rwflag, NULL); 591 592 if (error == EINTR && bcount != 0) 593 error = 0; 594 out: 595 if (in_crit) 596 nbl_end_crit(vp); 597 releasef(fdes); 598 if (error) 599 return (set_errno(error)); 600 return (bcount); 601 } 602 603 /* 604 * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... 605 * XXX -- However, SVVS expects readv() and writev() to fail if 606 * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), 607 * XXX -- so I guess that's the "interface". 608 */ 609 #define DEF_IOV_MAX 16 610 611 ssize_t 612 readv(int fdes, struct iovec *iovp, int iovcnt) 613 { 614 struct uio auio; 615 struct iovec aiov[DEF_IOV_MAX]; 616 file_t *fp; 617 register vnode_t *vp; 618 struct cpu *cp; 619 int fflag, ioflag, rwflag; 620 ssize_t count, bcount; 621 int error = 0; 622 int i; 623 u_offset_t fileoff; 624 int in_crit = 0; 625 626 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 627 return (set_errno(EINVAL)); 628 629 #ifdef _SYSCALL32_IMPL 630 /* 631 * 32-bit callers need to have their iovec expanded, 632 * while ensuring that they can't move more than 2Gbytes 633 * of data in a single call. 634 */ 635 if (get_udatamodel() == DATAMODEL_ILP32) { 636 struct iovec32 aiov32[DEF_IOV_MAX]; 637 ssize32_t count32; 638 639 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 640 return (set_errno(EFAULT)); 641 642 count32 = 0; 643 for (i = 0; i < iovcnt; i++) { 644 ssize32_t iovlen32 = aiov32[i].iov_len; 645 count32 += iovlen32; 646 if (iovlen32 < 0 || count32 < 0) 647 return (set_errno(EINVAL)); 648 aiov[i].iov_len = iovlen32; 649 aiov[i].iov_base = 650 (caddr_t)(uintptr_t)aiov32[i].iov_base; 651 } 652 } else 653 #endif 654 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 655 return (set_errno(EFAULT)); 656 657 count = 0; 658 for (i = 0; i < iovcnt; i++) { 659 ssize_t iovlen = aiov[i].iov_len; 660 count += iovlen; 661 if (iovlen < 0 || count < 0) 662 return (set_errno(EINVAL)); 663 } 664 if ((fp = getf(fdes)) == NULL) 665 return (set_errno(EBADF)); 666 if (((fflag = fp->f_flag) & FREAD) == 0) { 667 error = EBADF; 668 goto out; 669 } 670 vp = fp->f_vnode; 671 if (vp->v_type == VREG && count == 0) { 672 goto out; 673 } 674 675 rwflag = 0; 676 677 /* 678 * We have to enter the critical region before calling VOP_RWLOCK 679 * to avoid a deadlock with ufs. 680 */ 681 if (nbl_need_check(vp)) { 682 int svmand; 683 684 nbl_start_crit(vp, RW_READER); 685 in_crit = 1; 686 error = nbl_svmand(vp, fp->f_cred, &svmand); 687 if (error != 0) 688 goto out; 689 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand, 690 NULL)) { 691 error = EACCES; 692 goto out; 693 } 694 } 695 696 (void) VOP_RWLOCK(vp, rwflag, NULL); 697 fileoff = fp->f_offset; 698 699 /* 700 * Behaviour is same as read. Please see comments in read. 701 */ 702 703 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 704 struct vattr va; 705 va.va_mask = AT_SIZE; 706 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 707 VOP_RWUNLOCK(vp, rwflag, NULL); 708 goto out; 709 } 710 if (fileoff >= va.va_size) { 711 VOP_RWUNLOCK(vp, rwflag, NULL); 712 count = 0; 713 goto out; 714 } else { 715 VOP_RWUNLOCK(vp, rwflag, NULL); 716 error = EOVERFLOW; 717 goto out; 718 } 719 } 720 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 721 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 722 } 723 auio.uio_loffset = fileoff; 724 auio.uio_iov = aiov; 725 auio.uio_iovcnt = iovcnt; 726 auio.uio_resid = bcount = count; 727 auio.uio_segflg = UIO_USERSPACE; 728 auio.uio_llimit = MAXOFFSET_T; 729 auio.uio_fmode = fflag; 730 if (bcount <= copyout_max_cached) 731 auio.uio_extflg = UIO_COPY_CACHED; 732 else 733 auio.uio_extflg = UIO_COPY_DEFAULT; 734 735 736 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 737 738 /* If read sync is not asked for, filter sync flags */ 739 if ((ioflag & FRSYNC) == 0) 740 ioflag &= ~(FSYNC|FDSYNC); 741 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 742 count -= auio.uio_resid; 743 CPU_STATS_ENTER_K(); 744 cp = CPU; 745 CPU_STATS_ADDQ(cp, sys, sysread, 1); 746 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 747 CPU_STATS_EXIT_K(); 748 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 749 750 if (vp->v_type == VFIFO) /* Backward compatibility */ 751 fp->f_offset = count; 752 else if (((fp->f_flag & FAPPEND) == 0) || 753 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 754 fp->f_offset = auio.uio_loffset; 755 756 VOP_RWUNLOCK(vp, rwflag, NULL); 757 758 if (error == EINTR && count != 0) 759 error = 0; 760 out: 761 if (in_crit) 762 nbl_end_crit(vp); 763 releasef(fdes); 764 if (error) 765 return (set_errno(error)); 766 return (count); 767 } 768 769 ssize_t 770 writev(int fdes, struct iovec *iovp, int iovcnt) 771 { 772 struct uio auio; 773 struct iovec aiov[DEF_IOV_MAX]; 774 file_t *fp; 775 register vnode_t *vp; 776 struct cpu *cp; 777 int fflag, ioflag, rwflag; 778 ssize_t count, bcount; 779 int error = 0; 780 int i; 781 u_offset_t fileoff; 782 int in_crit = 0; 783 784 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 785 return (set_errno(EINVAL)); 786 787 #ifdef _SYSCALL32_IMPL 788 /* 789 * 32-bit callers need to have their iovec expanded, 790 * while ensuring that they can't move more than 2Gbytes 791 * of data in a single call. 792 */ 793 if (get_udatamodel() == DATAMODEL_ILP32) { 794 struct iovec32 aiov32[DEF_IOV_MAX]; 795 ssize32_t count32; 796 797 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 798 return (set_errno(EFAULT)); 799 800 count32 = 0; 801 for (i = 0; i < iovcnt; i++) { 802 ssize32_t iovlen = aiov32[i].iov_len; 803 count32 += iovlen; 804 if (iovlen < 0 || count32 < 0) 805 return (set_errno(EINVAL)); 806 aiov[i].iov_len = iovlen; 807 aiov[i].iov_base = 808 (caddr_t)(uintptr_t)aiov32[i].iov_base; 809 } 810 } else 811 #endif 812 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 813 return (set_errno(EFAULT)); 814 815 count = 0; 816 for (i = 0; i < iovcnt; i++) { 817 ssize_t iovlen = aiov[i].iov_len; 818 count += iovlen; 819 if (iovlen < 0 || count < 0) 820 return (set_errno(EINVAL)); 821 } 822 if ((fp = getf(fdes)) == NULL) 823 return (set_errno(EBADF)); 824 if (((fflag = fp->f_flag) & FWRITE) == 0) { 825 error = EBADF; 826 goto out; 827 } 828 vp = fp->f_vnode; 829 if (vp->v_type == VREG && count == 0) { 830 goto out; 831 } 832 833 rwflag = 1; 834 835 /* 836 * We have to enter the critical region before calling VOP_RWLOCK 837 * to avoid a deadlock with ufs. 838 */ 839 if (nbl_need_check(vp)) { 840 int svmand; 841 842 nbl_start_crit(vp, RW_READER); 843 in_crit = 1; 844 error = nbl_svmand(vp, fp->f_cred, &svmand); 845 if (error != 0) 846 goto out; 847 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand, 848 NULL)) { 849 error = EACCES; 850 goto out; 851 } 852 } 853 854 (void) VOP_RWLOCK(vp, rwflag, NULL); 855 856 fileoff = fp->f_offset; 857 858 /* 859 * Behaviour is same as write. Please see comments for write. 860 */ 861 862 if (vp->v_type == VREG) { 863 if (fileoff >= curproc->p_fsz_ctl) { 864 VOP_RWUNLOCK(vp, rwflag, NULL); 865 mutex_enter(&curproc->p_lock); 866 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 867 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 868 mutex_exit(&curproc->p_lock); 869 error = EFBIG; 870 goto out; 871 } 872 if (fileoff >= OFFSET_MAX(fp)) { 873 VOP_RWUNLOCK(vp, rwflag, NULL); 874 error = EFBIG; 875 goto out; 876 } 877 if (fileoff + count > OFFSET_MAX(fp)) 878 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 879 } 880 auio.uio_loffset = fileoff; 881 auio.uio_iov = aiov; 882 auio.uio_iovcnt = iovcnt; 883 auio.uio_resid = bcount = count; 884 auio.uio_segflg = UIO_USERSPACE; 885 auio.uio_llimit = curproc->p_fsz_ctl; 886 auio.uio_fmode = fflag; 887 auio.uio_extflg = UIO_COPY_DEFAULT; 888 889 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 890 891 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 892 count -= auio.uio_resid; 893 CPU_STATS_ENTER_K(); 894 cp = CPU; 895 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 896 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 897 CPU_STATS_EXIT_K(); 898 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 899 900 if (vp->v_type == VFIFO) /* Backward compatibility */ 901 fp->f_offset = count; 902 else if (((fp->f_flag & FAPPEND) == 0) || 903 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 904 fp->f_offset = auio.uio_loffset; 905 VOP_RWUNLOCK(vp, rwflag, NULL); 906 907 if (error == EINTR && count != 0) 908 error = 0; 909 out: 910 if (in_crit) 911 nbl_end_crit(vp); 912 releasef(fdes); 913 if (error) 914 return (set_errno(error)); 915 return (count); 916 } 917 918 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 919 920 /* 921 * This syscall supplies 64-bit file offsets to 32-bit applications only. 922 */ 923 ssize32_t 924 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 925 uint32_t offset_2) 926 { 927 struct uio auio; 928 struct iovec aiov; 929 file_t *fp; 930 register vnode_t *vp; 931 struct cpu *cp; 932 int fflag, ioflag, rwflag; 933 ssize_t bcount; 934 int error = 0; 935 u_offset_t fileoff; 936 int in_crit = 0; 937 938 #if defined(_LITTLE_ENDIAN) 939 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 940 #else 941 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 942 #endif 943 944 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 945 return (set_errno(EINVAL)); 946 947 if ((fp = getf(fdes)) == NULL) 948 return (set_errno(EBADF)); 949 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 950 error = EBADF; 951 goto out; 952 } 953 954 rwflag = 0; 955 vp = fp->f_vnode; 956 957 if (vp->v_type == VREG) { 958 959 if (bcount == 0) 960 goto out; 961 962 /* 963 * Same as pread. See comments in pread. 964 */ 965 966 if (fileoff > MAXOFFSET_T) { 967 error = EINVAL; 968 goto out; 969 } 970 if (fileoff + bcount > MAXOFFSET_T) 971 bcount = (ssize_t)(MAXOFFSET_T - fileoff); 972 } else if (vp->v_type == VFIFO) { 973 error = ESPIPE; 974 goto out; 975 } 976 977 /* 978 * We have to enter the critical region before calling VOP_RWLOCK 979 * to avoid a deadlock with ufs. 980 */ 981 if (nbl_need_check(vp)) { 982 int svmand; 983 984 nbl_start_crit(vp, RW_READER); 985 in_crit = 1; 986 error = nbl_svmand(vp, fp->f_cred, &svmand); 987 if (error != 0) 988 goto out; 989 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 990 NULL)) { 991 error = EACCES; 992 goto out; 993 } 994 } 995 996 aiov.iov_base = cbuf; 997 aiov.iov_len = bcount; 998 (void) VOP_RWLOCK(vp, rwflag, NULL); 999 auio.uio_loffset = fileoff; 1000 1001 /* 1002 * Note: File size can never be greater than MAXOFFSET_T. 1003 * If ever we start supporting 128 bit files the code 1004 * similar to the one in pread at this place should be here. 1005 * Here we avoid the unnecessary VOP_GETATTR() when we 1006 * know that fileoff == MAXOFFSET_T implies that it is always 1007 * greater than or equal to file size. 1008 */ 1009 auio.uio_iov = &aiov; 1010 auio.uio_iovcnt = 1; 1011 auio.uio_resid = bcount; 1012 auio.uio_segflg = UIO_USERSPACE; 1013 auio.uio_llimit = MAXOFFSET_T; 1014 auio.uio_fmode = fflag; 1015 auio.uio_extflg = UIO_COPY_CACHED; 1016 1017 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1018 1019 /* If read sync is not asked for, filter sync flags */ 1020 if ((ioflag & FRSYNC) == 0) 1021 ioflag &= ~(FSYNC|FDSYNC); 1022 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1023 bcount -= auio.uio_resid; 1024 CPU_STATS_ENTER_K(); 1025 cp = CPU; 1026 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1027 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1028 CPU_STATS_EXIT_K(); 1029 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1030 VOP_RWUNLOCK(vp, rwflag, NULL); 1031 1032 if (error == EINTR && bcount != 0) 1033 error = 0; 1034 out: 1035 if (in_crit) 1036 nbl_end_crit(vp); 1037 releasef(fdes); 1038 if (error) 1039 return (set_errno(error)); 1040 return (bcount); 1041 } 1042 1043 /* 1044 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1045 */ 1046 ssize32_t 1047 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1048 uint32_t offset_2) 1049 { 1050 struct uio auio; 1051 struct iovec aiov; 1052 file_t *fp; 1053 register vnode_t *vp; 1054 struct cpu *cp; 1055 int fflag, ioflag, rwflag; 1056 ssize_t bcount; 1057 int error = 0; 1058 u_offset_t fileoff; 1059 int in_crit = 0; 1060 1061 #if defined(_LITTLE_ENDIAN) 1062 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1063 #else 1064 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1065 #endif 1066 1067 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1068 return (set_errno(EINVAL)); 1069 if ((fp = getf(fdes)) == NULL) 1070 return (set_errno(EBADF)); 1071 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1072 error = EBADF; 1073 goto out; 1074 } 1075 1076 rwflag = 1; 1077 vp = fp->f_vnode; 1078 1079 if (vp->v_type == VREG) { 1080 1081 if (bcount == 0) 1082 goto out; 1083 1084 /* 1085 * See comments in pwrite. 1086 */ 1087 if (fileoff > MAXOFFSET_T) { 1088 error = EINVAL; 1089 goto out; 1090 } 1091 if (fileoff >= curproc->p_fsz_ctl) { 1092 mutex_enter(&curproc->p_lock); 1093 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1094 curproc->p_rctls, curproc, RCA_SAFE); 1095 mutex_exit(&curproc->p_lock); 1096 error = EFBIG; 1097 goto out; 1098 } 1099 if (fileoff == MAXOFFSET_T) { 1100 error = EFBIG; 1101 goto out; 1102 } 1103 if (fileoff + bcount > MAXOFFSET_T) 1104 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1105 } else if (vp->v_type == VFIFO) { 1106 error = ESPIPE; 1107 goto out; 1108 } 1109 1110 /* 1111 * We have to enter the critical region before calling VOP_RWLOCK 1112 * to avoid a deadlock with ufs. 1113 */ 1114 if (nbl_need_check(vp)) { 1115 int svmand; 1116 1117 nbl_start_crit(vp, RW_READER); 1118 in_crit = 1; 1119 error = nbl_svmand(vp, fp->f_cred, &svmand); 1120 if (error != 0) 1121 goto out; 1122 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 1123 NULL)) { 1124 error = EACCES; 1125 goto out; 1126 } 1127 } 1128 1129 aiov.iov_base = cbuf; 1130 aiov.iov_len = bcount; 1131 (void) VOP_RWLOCK(vp, rwflag, NULL); 1132 auio.uio_loffset = fileoff; 1133 auio.uio_iov = &aiov; 1134 auio.uio_iovcnt = 1; 1135 auio.uio_resid = bcount; 1136 auio.uio_segflg = UIO_USERSPACE; 1137 auio.uio_llimit = curproc->p_fsz_ctl; 1138 auio.uio_fmode = fflag; 1139 auio.uio_extflg = UIO_COPY_CACHED; 1140 1141 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1142 1143 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1144 bcount -= auio.uio_resid; 1145 CPU_STATS_ENTER_K(); 1146 cp = CPU; 1147 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1148 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1149 CPU_STATS_EXIT_K(); 1150 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1151 VOP_RWUNLOCK(vp, rwflag, NULL); 1152 1153 if (error == EINTR && bcount != 0) 1154 error = 0; 1155 out: 1156 if (in_crit) 1157 nbl_end_crit(vp); 1158 releasef(fdes); 1159 if (error) 1160 return (set_errno(error)); 1161 return (bcount); 1162 } 1163 1164 #endif /* _SYSCALL32_IMPL || _ILP32 */ 1165 1166 #ifdef _SYSCALL32_IMPL 1167 /* 1168 * Tail-call elimination of xxx32() down to xxx() 1169 * 1170 * A number of xxx32 system calls take a len (or count) argument and 1171 * return a number in the range [0,len] or -1 on error. 1172 * Given an ssize32_t input len, the downcall xxx() will return 1173 * a 64-bit value that is -1 or in the range [0,len] which actually 1174 * is a proper return value for the xxx32 call. So even if the xxx32 1175 * calls can be considered as returning a ssize32_t, they are currently 1176 * declared as returning a ssize_t as this enables tail-call elimination. 1177 * 1178 * The cast of len (or count) to ssize32_t is needed to ensure we pass 1179 * down negative input values as such and let the downcall handle error 1180 * reporting. Functions covered by this comments are: 1181 * 1182 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1183 * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1184 * readlink.c: readlink32. 1185 */ 1186 1187 ssize_t 1188 read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1189 { 1190 return (read(fdes, 1191 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1192 } 1193 1194 ssize_t 1195 write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1196 { 1197 return (write(fdes, 1198 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1199 } 1200 1201 ssize_t 1202 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1203 { 1204 return (pread(fdes, 1205 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1206 (off_t)(uint32_t)offset)); 1207 } 1208 1209 ssize_t 1210 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1211 { 1212 return (pwrite(fdes, 1213 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1214 (off_t)(uint32_t)offset)); 1215 } 1216 1217 ssize_t 1218 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1219 { 1220 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1221 } 1222 1223 ssize_t 1224 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1225 { 1226 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1227 } 1228 1229 #endif /* _SYSCALL32_IMPL */ 1230