1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/param.h> 37 #include <sys/isa_defs.h> 38 #include <sys/types.h> 39 #include <sys/inttypes.h> 40 #include <sys/sysmacros.h> 41 #include <sys/cred.h> 42 #include <sys/user.h> 43 #include <sys/systm.h> 44 #include <sys/errno.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/cpuvar.h> 49 #include <sys/uio.h> 50 #include <sys/debug.h> 51 #include <sys/rctl.h> 52 #include <sys/nbmlock.h> 53 54 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */ 55 56 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */ 57 58 /* 59 * read, write, pread, pwrite, readv, and writev syscalls. 60 * 61 * 64-bit open: all open's are large file opens. 62 * Large Files: the behaviour of read depends on whether the fd 63 * corresponds to large open or not. 64 * 32-bit open: FOFFMAX flag not set. 65 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 66 * EOVERFLOW if count is non-zero and if size of file 67 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 68 * at >= MAXOFF32_T returns EOF. 69 */ 70 71 /* 72 * Native system call 73 */ 74 ssize_t 75 read(int fdes, void *cbuf, size_t count) 76 { 77 struct uio auio; 78 struct iovec aiov; 79 file_t *fp; 80 register vnode_t *vp; 81 struct cpu *cp; 82 int fflag, ioflag, rwflag; 83 ssize_t cnt, bcount; 84 int error = 0; 85 u_offset_t fileoff; 86 int in_crit = 0; 87 88 if ((cnt = (ssize_t)count) < 0) 89 return (set_errno(EINVAL)); 90 if ((fp = getf(fdes)) == NULL) 91 return (set_errno(EBADF)); 92 if (((fflag = fp->f_flag) & FREAD) == 0) { 93 error = EBADF; 94 goto out; 95 } 96 vp = fp->f_vnode; 97 98 if (vp->v_type == VREG && cnt == 0) { 99 goto out; 100 } 101 102 rwflag = 0; 103 aiov.iov_base = cbuf; 104 aiov.iov_len = cnt; 105 106 /* 107 * We have to enter the critical region before calling VOP_RWLOCK 108 * to avoid a deadlock with write() calls. 109 */ 110 if (nbl_need_check(vp)) { 111 int svmand; 112 113 nbl_start_crit(vp, RW_READER); 114 in_crit = 1; 115 error = nbl_svmand(vp, fp->f_cred, &svmand); 116 if (error != 0) 117 goto out; 118 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand)) { 119 error = EACCES; 120 goto out; 121 } 122 } 123 124 (void) VOP_RWLOCK(vp, rwflag, NULL); 125 126 /* 127 * We do the following checks inside VOP_RWLOCK so as to 128 * prevent file size from changing while these checks are 129 * being done. Also, we load fp's offset to the local 130 * variable fileoff because we can have a parallel lseek 131 * going on (f_offset is not protected by any lock) which 132 * could change f_offset. We need to see the value only 133 * once here and take a decision. Seeing it more than once 134 * can lead to incorrect functionality. 135 */ 136 137 fileoff = (u_offset_t)fp->f_offset; 138 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 139 struct vattr va; 140 va.va_mask = AT_SIZE; 141 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 142 VOP_RWUNLOCK(vp, rwflag, NULL); 143 goto out; 144 } 145 if (fileoff >= va.va_size) { 146 cnt = 0; 147 VOP_RWUNLOCK(vp, rwflag, NULL); 148 goto out; 149 } else { 150 error = EOVERFLOW; 151 VOP_RWUNLOCK(vp, rwflag, NULL); 152 goto out; 153 } 154 } 155 if ((vp->v_type == VREG) && 156 (fileoff + cnt > OFFSET_MAX(fp))) { 157 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 158 } 159 auio.uio_loffset = fileoff; 160 auio.uio_iov = &aiov; 161 auio.uio_iovcnt = 1; 162 auio.uio_resid = bcount = cnt; 163 auio.uio_segflg = UIO_USERSPACE; 164 auio.uio_llimit = MAXOFFSET_T; 165 auio.uio_fmode = fflag; 166 /* 167 * Only use bypass caches when the count is large enough 168 */ 169 if (bcount <= copyout_max_cached) 170 auio.uio_extflg = UIO_COPY_CACHED; 171 else 172 auio.uio_extflg = UIO_COPY_DEFAULT; 173 174 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 175 176 /* If read sync is not asked for, filter sync flags */ 177 if ((ioflag & FRSYNC) == 0) 178 ioflag &= ~(FSYNC|FDSYNC); 179 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 180 cnt -= auio.uio_resid; 181 CPU_STATS_ENTER_K(); 182 cp = CPU; 183 CPU_STATS_ADDQ(cp, sys, sysread, 1); 184 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 185 CPU_STATS_EXIT_K(); 186 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 187 188 if (vp->v_type == VFIFO) /* Backward compatibility */ 189 fp->f_offset = cnt; 190 else if (((fp->f_flag & FAPPEND) == 0) || 191 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 192 fp->f_offset = auio.uio_loffset; 193 VOP_RWUNLOCK(vp, rwflag, NULL); 194 195 if (error == EINTR && cnt != 0) 196 error = 0; 197 out: 198 if (in_crit) 199 nbl_end_crit(vp); 200 releasef(fdes); 201 if (error) 202 return (set_errno(error)); 203 return (cnt); 204 } 205 206 /* 207 * Native system call 208 */ 209 ssize_t 210 write(int fdes, void *cbuf, size_t count) 211 { 212 struct uio auio; 213 struct iovec aiov; 214 file_t *fp; 215 register vnode_t *vp; 216 struct cpu *cp; 217 int fflag, ioflag, rwflag; 218 ssize_t cnt, bcount; 219 int error = 0; 220 u_offset_t fileoff; 221 int in_crit = 0; 222 223 if ((cnt = (ssize_t)count) < 0) 224 return (set_errno(EINVAL)); 225 if ((fp = getf(fdes)) == NULL) 226 return (set_errno(EBADF)); 227 if (((fflag = fp->f_flag) & FWRITE) == 0) { 228 error = EBADF; 229 goto out; 230 } 231 vp = fp->f_vnode; 232 233 if (vp->v_type == VREG && cnt == 0) { 234 goto out; 235 } 236 237 rwflag = 1; 238 aiov.iov_base = cbuf; 239 aiov.iov_len = cnt; 240 241 /* 242 * We have to enter the critical region before calling VOP_RWLOCK 243 * to avoid a deadlock with ufs. 244 */ 245 if (nbl_need_check(vp)) { 246 int svmand; 247 248 nbl_start_crit(vp, RW_READER); 249 in_crit = 1; 250 error = nbl_svmand(vp, fp->f_cred, &svmand); 251 if (error != 0) 252 goto out; 253 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand)) { 254 error = EACCES; 255 goto out; 256 } 257 } 258 259 (void) VOP_RWLOCK(vp, rwflag, NULL); 260 261 fileoff = fp->f_offset; 262 if (vp->v_type == VREG) { 263 264 /* 265 * We raise psignal if write for >0 bytes causes 266 * it to exceed the ulimit. 267 */ 268 if (fileoff >= curproc->p_fsz_ctl) { 269 VOP_RWUNLOCK(vp, rwflag, NULL); 270 271 mutex_enter(&curproc->p_lock); 272 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 273 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 274 mutex_exit(&curproc->p_lock); 275 276 error = EFBIG; 277 goto out; 278 } 279 /* 280 * We return EFBIG if write is done at an offset 281 * greater than the offset maximum for this file structure. 282 */ 283 284 if (fileoff >= OFFSET_MAX(fp)) { 285 VOP_RWUNLOCK(vp, rwflag, NULL); 286 error = EFBIG; 287 goto out; 288 } 289 /* 290 * Limit the bytes to be written upto offset maximum for 291 * this open file structure. 292 */ 293 if (fileoff + cnt > OFFSET_MAX(fp)) 294 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 295 } 296 auio.uio_loffset = fileoff; 297 auio.uio_iov = &aiov; 298 auio.uio_iovcnt = 1; 299 auio.uio_resid = bcount = cnt; 300 auio.uio_segflg = UIO_USERSPACE; 301 auio.uio_llimit = curproc->p_fsz_ctl; 302 auio.uio_fmode = fflag; 303 auio.uio_extflg = UIO_COPY_DEFAULT; 304 305 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 306 307 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 308 cnt -= auio.uio_resid; 309 CPU_STATS_ENTER_K(); 310 cp = CPU; 311 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 312 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 313 CPU_STATS_EXIT_K(); 314 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 315 316 if (vp->v_type == VFIFO) /* Backward compatibility */ 317 fp->f_offset = cnt; 318 else if (((fp->f_flag & FAPPEND) == 0) || 319 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 320 fp->f_offset = auio.uio_loffset; 321 VOP_RWUNLOCK(vp, rwflag, NULL); 322 323 if (error == EINTR && cnt != 0) 324 error = 0; 325 out: 326 if (in_crit) 327 nbl_end_crit(vp); 328 releasef(fdes); 329 if (error) 330 return (set_errno(error)); 331 return (cnt); 332 } 333 334 ssize_t 335 pread(int fdes, void *cbuf, size_t count, off_t offset) 336 { 337 struct uio auio; 338 struct iovec aiov; 339 file_t *fp; 340 register vnode_t *vp; 341 struct cpu *cp; 342 int fflag, ioflag, rwflag; 343 ssize_t bcount; 344 int error = 0; 345 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 346 #ifdef _SYSCALL32_IMPL 347 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 348 MAXOFF32_T : MAXOFFSET_T; 349 #else 350 const u_offset_t maxoff = MAXOFF32_T; 351 #endif 352 int in_crit = 0; 353 354 if ((bcount = (ssize_t)count) < 0) 355 return (set_errno(EINVAL)); 356 357 if ((fp = getf(fdes)) == NULL) 358 return (set_errno(EBADF)); 359 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 360 error = EBADF; 361 goto out; 362 } 363 364 rwflag = 0; 365 vp = fp->f_vnode; 366 367 if (vp->v_type == VREG) { 368 369 if (bcount == 0) 370 goto out; 371 372 /* 373 * Return EINVAL if an invalid offset comes to pread. 374 * Negative offset from user will cause this error. 375 */ 376 377 if (fileoff > maxoff) { 378 error = EINVAL; 379 goto out; 380 } 381 /* 382 * Limit offset such that we don't read or write 383 * a file beyond the maximum offset representable in 384 * an off_t structure. 385 */ 386 if (fileoff + bcount > maxoff) 387 bcount = (ssize_t)((offset_t)maxoff - fileoff); 388 } else if (vp->v_type == VFIFO) { 389 error = ESPIPE; 390 goto out; 391 } 392 393 /* 394 * We have to enter the critical region before calling VOP_RWLOCK 395 * to avoid a deadlock with ufs. 396 */ 397 if (nbl_need_check(vp)) { 398 int svmand; 399 400 nbl_start_crit(vp, RW_READER); 401 in_crit = 1; 402 error = nbl_svmand(vp, fp->f_cred, &svmand); 403 if (error != 0) 404 goto out; 405 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { 406 error = EACCES; 407 goto out; 408 } 409 } 410 411 aiov.iov_base = cbuf; 412 aiov.iov_len = bcount; 413 (void) VOP_RWLOCK(vp, rwflag, NULL); 414 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 415 struct vattr va; 416 va.va_mask = AT_SIZE; 417 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 418 VOP_RWUNLOCK(vp, rwflag, NULL); 419 goto out; 420 } 421 VOP_RWUNLOCK(vp, rwflag, NULL); 422 423 /* 424 * We have to return EOF if fileoff is >= file size. 425 */ 426 if (fileoff >= va.va_size) { 427 bcount = 0; 428 goto out; 429 } 430 431 /* 432 * File is greater than or equal to maxoff and therefore 433 * we return EOVERFLOW. 434 */ 435 error = EOVERFLOW; 436 goto out; 437 } 438 auio.uio_loffset = fileoff; 439 auio.uio_iov = &aiov; 440 auio.uio_iovcnt = 1; 441 auio.uio_resid = bcount; 442 auio.uio_segflg = UIO_USERSPACE; 443 auio.uio_llimit = MAXOFFSET_T; 444 auio.uio_fmode = fflag; 445 auio.uio_extflg = UIO_COPY_CACHED; 446 447 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 448 449 /* If read sync is not asked for, filter sync flags */ 450 if ((ioflag & FRSYNC) == 0) 451 ioflag &= ~(FSYNC|FDSYNC); 452 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 453 bcount -= auio.uio_resid; 454 CPU_STATS_ENTER_K(); 455 cp = CPU; 456 CPU_STATS_ADDQ(cp, sys, sysread, 1); 457 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 458 CPU_STATS_EXIT_K(); 459 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 460 VOP_RWUNLOCK(vp, rwflag, NULL); 461 462 if (error == EINTR && bcount != 0) 463 error = 0; 464 out: 465 if (in_crit) 466 nbl_end_crit(vp); 467 releasef(fdes); 468 if (error) 469 return (set_errno(error)); 470 return (bcount); 471 } 472 473 ssize_t 474 pwrite(int fdes, void *cbuf, size_t count, off_t offset) 475 { 476 struct uio auio; 477 struct iovec aiov; 478 file_t *fp; 479 register vnode_t *vp; 480 struct cpu *cp; 481 int fflag, ioflag, rwflag; 482 ssize_t bcount; 483 int error = 0; 484 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 485 #ifdef _SYSCALL32_IMPL 486 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 487 MAXOFF32_T : MAXOFFSET_T; 488 #else 489 const u_offset_t maxoff = MAXOFF32_T; 490 #endif 491 int in_crit = 0; 492 493 if ((bcount = (ssize_t)count) < 0) 494 return (set_errno(EINVAL)); 495 if ((fp = getf(fdes)) == NULL) 496 return (set_errno(EBADF)); 497 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 498 error = EBADF; 499 goto out; 500 } 501 502 rwflag = 1; 503 vp = fp->f_vnode; 504 505 if (vp->v_type == VREG) { 506 507 if (bcount == 0) 508 goto out; 509 510 /* 511 * return EINVAL for offsets that cannot be 512 * represented in an off_t. 513 */ 514 if (fileoff > maxoff) { 515 error = EINVAL; 516 goto out; 517 } 518 /* 519 * Take appropriate action if we are trying to write above the 520 * resource limit. 521 */ 522 if (fileoff >= curproc->p_fsz_ctl) { 523 mutex_enter(&curproc->p_lock); 524 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 525 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 526 mutex_exit(&curproc->p_lock); 527 528 error = EFBIG; 529 goto out; 530 } 531 /* 532 * Don't allow pwrite to cause file sizes to exceed 533 * maxoff. 534 */ 535 if (fileoff == maxoff) { 536 error = EFBIG; 537 goto out; 538 } 539 if (fileoff + count > maxoff) 540 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 541 } else if (vp->v_type == VFIFO) { 542 error = ESPIPE; 543 goto out; 544 } 545 546 /* 547 * We have to enter the critical region before calling VOP_RWLOCK 548 * to avoid a deadlock with ufs. 549 */ 550 if (nbl_need_check(vp)) { 551 int svmand; 552 553 nbl_start_crit(vp, RW_READER); 554 in_crit = 1; 555 error = nbl_svmand(vp, fp->f_cred, &svmand); 556 if (error != 0) 557 goto out; 558 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { 559 error = EACCES; 560 goto out; 561 } 562 } 563 564 aiov.iov_base = cbuf; 565 aiov.iov_len = bcount; 566 (void) VOP_RWLOCK(vp, rwflag, NULL); 567 auio.uio_loffset = fileoff; 568 auio.uio_iov = &aiov; 569 auio.uio_iovcnt = 1; 570 auio.uio_resid = bcount; 571 auio.uio_segflg = UIO_USERSPACE; 572 auio.uio_llimit = curproc->p_fsz_ctl; 573 auio.uio_fmode = fflag; 574 auio.uio_extflg = UIO_COPY_CACHED; 575 576 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 577 578 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 579 bcount -= auio.uio_resid; 580 CPU_STATS_ENTER_K(); 581 cp = CPU; 582 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 583 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 584 CPU_STATS_EXIT_K(); 585 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 586 VOP_RWUNLOCK(vp, rwflag, NULL); 587 588 if (error == EINTR && bcount != 0) 589 error = 0; 590 out: 591 if (in_crit) 592 nbl_end_crit(vp); 593 releasef(fdes); 594 if (error) 595 return (set_errno(error)); 596 return (bcount); 597 } 598 599 /* 600 * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... 601 * XXX -- However, SVVS expects readv() and writev() to fail if 602 * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), 603 * XXX -- so I guess that's the "interface". 604 */ 605 #define DEF_IOV_MAX 16 606 607 ssize_t 608 readv(int fdes, struct iovec *iovp, int iovcnt) 609 { 610 struct uio auio; 611 struct iovec aiov[DEF_IOV_MAX]; 612 file_t *fp; 613 register vnode_t *vp; 614 struct cpu *cp; 615 int fflag, ioflag, rwflag; 616 ssize_t count, bcount; 617 int error = 0; 618 int i; 619 u_offset_t fileoff; 620 int in_crit = 0; 621 622 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 623 return (set_errno(EINVAL)); 624 625 #ifdef _SYSCALL32_IMPL 626 /* 627 * 32-bit callers need to have their iovec expanded, 628 * while ensuring that they can't move more than 2Gbytes 629 * of data in a single call. 630 */ 631 if (get_udatamodel() == DATAMODEL_ILP32) { 632 struct iovec32 aiov32[DEF_IOV_MAX]; 633 ssize32_t count32; 634 635 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 636 return (set_errno(EFAULT)); 637 638 count32 = 0; 639 for (i = 0; i < iovcnt; i++) { 640 ssize32_t iovlen32 = aiov32[i].iov_len; 641 count32 += iovlen32; 642 if (iovlen32 < 0 || count32 < 0) 643 return (set_errno(EINVAL)); 644 aiov[i].iov_len = iovlen32; 645 aiov[i].iov_base = 646 (caddr_t)(uintptr_t)aiov32[i].iov_base; 647 } 648 } else 649 #endif 650 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 651 return (set_errno(EFAULT)); 652 653 count = 0; 654 for (i = 0; i < iovcnt; i++) { 655 ssize_t iovlen = aiov[i].iov_len; 656 count += iovlen; 657 if (iovlen < 0 || count < 0) 658 return (set_errno(EINVAL)); 659 } 660 if ((fp = getf(fdes)) == NULL) 661 return (set_errno(EBADF)); 662 if (((fflag = fp->f_flag) & FREAD) == 0) { 663 error = EBADF; 664 goto out; 665 } 666 vp = fp->f_vnode; 667 if (vp->v_type == VREG && count == 0) { 668 goto out; 669 } 670 671 rwflag = 0; 672 673 /* 674 * We have to enter the critical region before calling VOP_RWLOCK 675 * to avoid a deadlock with ufs. 676 */ 677 if (nbl_need_check(vp)) { 678 int svmand; 679 680 nbl_start_crit(vp, RW_READER); 681 in_crit = 1; 682 error = nbl_svmand(vp, fp->f_cred, &svmand); 683 if (error != 0) 684 goto out; 685 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand)) { 686 error = EACCES; 687 goto out; 688 } 689 } 690 691 (void) VOP_RWLOCK(vp, rwflag, NULL); 692 fileoff = fp->f_offset; 693 694 /* 695 * Behaviour is same as read. Please see comments in read. 696 */ 697 698 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 699 struct vattr va; 700 va.va_mask = AT_SIZE; 701 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 702 VOP_RWUNLOCK(vp, rwflag, NULL); 703 goto out; 704 } 705 if (fileoff >= va.va_size) { 706 VOP_RWUNLOCK(vp, rwflag, NULL); 707 count = 0; 708 goto out; 709 } else { 710 VOP_RWUNLOCK(vp, rwflag, NULL); 711 error = EOVERFLOW; 712 goto out; 713 } 714 } 715 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 716 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 717 } 718 auio.uio_loffset = fileoff; 719 auio.uio_iov = aiov; 720 auio.uio_iovcnt = iovcnt; 721 auio.uio_resid = bcount = count; 722 auio.uio_segflg = UIO_USERSPACE; 723 auio.uio_llimit = MAXOFFSET_T; 724 auio.uio_fmode = fflag; 725 if (bcount <= copyout_max_cached) 726 auio.uio_extflg = UIO_COPY_CACHED; 727 else 728 auio.uio_extflg = UIO_COPY_DEFAULT; 729 730 731 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 732 733 /* If read sync is not asked for, filter sync flags */ 734 if ((ioflag & FRSYNC) == 0) 735 ioflag &= ~(FSYNC|FDSYNC); 736 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 737 count -= auio.uio_resid; 738 CPU_STATS_ENTER_K(); 739 cp = CPU; 740 CPU_STATS_ADDQ(cp, sys, sysread, 1); 741 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 742 CPU_STATS_EXIT_K(); 743 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 744 745 if (vp->v_type == VFIFO) /* Backward compatibility */ 746 fp->f_offset = count; 747 else if (((fp->f_flag & FAPPEND) == 0) || 748 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 749 fp->f_offset = auio.uio_loffset; 750 751 VOP_RWUNLOCK(vp, rwflag, NULL); 752 753 if (error == EINTR && count != 0) 754 error = 0; 755 out: 756 if (in_crit) 757 nbl_end_crit(vp); 758 releasef(fdes); 759 if (error) 760 return (set_errno(error)); 761 return (count); 762 } 763 764 ssize_t 765 writev(int fdes, struct iovec *iovp, int iovcnt) 766 { 767 struct uio auio; 768 struct iovec aiov[DEF_IOV_MAX]; 769 file_t *fp; 770 register vnode_t *vp; 771 struct cpu *cp; 772 int fflag, ioflag, rwflag; 773 ssize_t count, bcount; 774 int error = 0; 775 int i; 776 u_offset_t fileoff; 777 int in_crit = 0; 778 779 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 780 return (set_errno(EINVAL)); 781 782 #ifdef _SYSCALL32_IMPL 783 /* 784 * 32-bit callers need to have their iovec expanded, 785 * while ensuring that they can't move more than 2Gbytes 786 * of data in a single call. 787 */ 788 if (get_udatamodel() == DATAMODEL_ILP32) { 789 struct iovec32 aiov32[DEF_IOV_MAX]; 790 ssize32_t count32; 791 792 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 793 return (set_errno(EFAULT)); 794 795 count32 = 0; 796 for (i = 0; i < iovcnt; i++) { 797 ssize32_t iovlen = aiov32[i].iov_len; 798 count32 += iovlen; 799 if (iovlen < 0 || count32 < 0) 800 return (set_errno(EINVAL)); 801 aiov[i].iov_len = iovlen; 802 aiov[i].iov_base = 803 (caddr_t)(uintptr_t)aiov32[i].iov_base; 804 } 805 } else 806 #endif 807 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 808 return (set_errno(EFAULT)); 809 810 count = 0; 811 for (i = 0; i < iovcnt; i++) { 812 ssize_t iovlen = aiov[i].iov_len; 813 count += iovlen; 814 if (iovlen < 0 || count < 0) 815 return (set_errno(EINVAL)); 816 } 817 if ((fp = getf(fdes)) == NULL) 818 return (set_errno(EBADF)); 819 if (((fflag = fp->f_flag) & FWRITE) == 0) { 820 error = EBADF; 821 goto out; 822 } 823 vp = fp->f_vnode; 824 if (vp->v_type == VREG && count == 0) { 825 goto out; 826 } 827 828 rwflag = 1; 829 830 /* 831 * We have to enter the critical region before calling VOP_RWLOCK 832 * to avoid a deadlock with ufs. 833 */ 834 if (nbl_need_check(vp)) { 835 int svmand; 836 837 nbl_start_crit(vp, RW_READER); 838 in_crit = 1; 839 error = nbl_svmand(vp, fp->f_cred, &svmand); 840 if (error != 0) 841 goto out; 842 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand)) { 843 error = EACCES; 844 goto out; 845 } 846 } 847 848 (void) VOP_RWLOCK(vp, rwflag, NULL); 849 850 fileoff = fp->f_offset; 851 852 /* 853 * Behaviour is same as write. Please see comments for write. 854 */ 855 856 if (vp->v_type == VREG) { 857 if (fileoff >= curproc->p_fsz_ctl) { 858 VOP_RWUNLOCK(vp, rwflag, NULL); 859 mutex_enter(&curproc->p_lock); 860 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 861 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 862 mutex_exit(&curproc->p_lock); 863 error = EFBIG; 864 goto out; 865 } 866 if (fileoff >= OFFSET_MAX(fp)) { 867 VOP_RWUNLOCK(vp, rwflag, NULL); 868 error = EFBIG; 869 goto out; 870 } 871 if (fileoff + count > OFFSET_MAX(fp)) 872 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 873 } 874 auio.uio_loffset = fileoff; 875 auio.uio_iov = aiov; 876 auio.uio_iovcnt = iovcnt; 877 auio.uio_resid = bcount = count; 878 auio.uio_segflg = UIO_USERSPACE; 879 auio.uio_llimit = curproc->p_fsz_ctl; 880 auio.uio_fmode = fflag; 881 auio.uio_extflg = UIO_COPY_DEFAULT; 882 883 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 884 885 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 886 count -= auio.uio_resid; 887 CPU_STATS_ENTER_K(); 888 cp = CPU; 889 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 890 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 891 CPU_STATS_EXIT_K(); 892 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 893 894 if (vp->v_type == VFIFO) /* Backward compatibility */ 895 fp->f_offset = count; 896 else if (((fp->f_flag & FAPPEND) == 0) || 897 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 898 fp->f_offset = auio.uio_loffset; 899 VOP_RWUNLOCK(vp, rwflag, NULL); 900 901 if (error == EINTR && count != 0) 902 error = 0; 903 out: 904 if (in_crit) 905 nbl_end_crit(vp); 906 releasef(fdes); 907 if (error) 908 return (set_errno(error)); 909 return (count); 910 } 911 912 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 913 914 /* 915 * This syscall supplies 64-bit file offsets to 32-bit applications only. 916 */ 917 ssize32_t 918 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 919 uint32_t offset_2) 920 { 921 struct uio auio; 922 struct iovec aiov; 923 file_t *fp; 924 register vnode_t *vp; 925 struct cpu *cp; 926 int fflag, ioflag, rwflag; 927 ssize_t bcount; 928 int error = 0; 929 u_offset_t fileoff; 930 int in_crit = 0; 931 932 #if defined(_LITTLE_ENDIAN) 933 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 934 #else 935 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 936 #endif 937 938 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 939 return (set_errno(EINVAL)); 940 941 if ((fp = getf(fdes)) == NULL) 942 return (set_errno(EBADF)); 943 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 944 error = EBADF; 945 goto out; 946 } 947 948 rwflag = 0; 949 vp = fp->f_vnode; 950 951 if (vp->v_type == VREG) { 952 953 if (bcount == 0) 954 goto out; 955 956 /* 957 * Same as pread. See comments in pread. 958 */ 959 960 if (fileoff > MAXOFFSET_T) { 961 error = EINVAL; 962 goto out; 963 } 964 if (fileoff + bcount > MAXOFFSET_T) 965 bcount = (ssize_t)(MAXOFFSET_T - fileoff); 966 } else if (vp->v_type == VFIFO) { 967 error = ESPIPE; 968 goto out; 969 } 970 971 /* 972 * We have to enter the critical region before calling VOP_RWLOCK 973 * to avoid a deadlock with ufs. 974 */ 975 if (nbl_need_check(vp)) { 976 int svmand; 977 978 nbl_start_crit(vp, RW_READER); 979 in_crit = 1; 980 error = nbl_svmand(vp, fp->f_cred, &svmand); 981 if (error != 0) 982 goto out; 983 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { 984 error = EACCES; 985 goto out; 986 } 987 } 988 989 aiov.iov_base = cbuf; 990 aiov.iov_len = bcount; 991 (void) VOP_RWLOCK(vp, rwflag, NULL); 992 auio.uio_loffset = fileoff; 993 994 /* 995 * Note: File size can never be greater than MAXOFFSET_T. 996 * If ever we start supporting 128 bit files the code 997 * similar to the one in pread at this place should be here. 998 * Here we avoid the unnecessary VOP_GETATTR() when we 999 * know that fileoff == MAXOFFSET_T implies that it is always 1000 * greater than or equal to file size. 1001 */ 1002 auio.uio_iov = &aiov; 1003 auio.uio_iovcnt = 1; 1004 auio.uio_resid = bcount; 1005 auio.uio_segflg = UIO_USERSPACE; 1006 auio.uio_llimit = MAXOFFSET_T; 1007 auio.uio_fmode = fflag; 1008 auio.uio_extflg = UIO_COPY_CACHED; 1009 1010 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1011 1012 /* If read sync is not asked for, filter sync flags */ 1013 if ((ioflag & FRSYNC) == 0) 1014 ioflag &= ~(FSYNC|FDSYNC); 1015 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1016 bcount -= auio.uio_resid; 1017 CPU_STATS_ENTER_K(); 1018 cp = CPU; 1019 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1020 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1021 CPU_STATS_EXIT_K(); 1022 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1023 VOP_RWUNLOCK(vp, rwflag, NULL); 1024 1025 if (error == EINTR && bcount != 0) 1026 error = 0; 1027 out: 1028 if (in_crit) 1029 nbl_end_crit(vp); 1030 releasef(fdes); 1031 if (error) 1032 return (set_errno(error)); 1033 return (bcount); 1034 } 1035 1036 /* 1037 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1038 */ 1039 ssize32_t 1040 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1041 uint32_t offset_2) 1042 { 1043 struct uio auio; 1044 struct iovec aiov; 1045 file_t *fp; 1046 register vnode_t *vp; 1047 struct cpu *cp; 1048 int fflag, ioflag, rwflag; 1049 ssize_t bcount; 1050 int error = 0; 1051 u_offset_t fileoff; 1052 int in_crit = 0; 1053 1054 #if defined(_LITTLE_ENDIAN) 1055 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1056 #else 1057 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1058 #endif 1059 1060 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1061 return (set_errno(EINVAL)); 1062 if ((fp = getf(fdes)) == NULL) 1063 return (set_errno(EBADF)); 1064 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1065 error = EBADF; 1066 goto out; 1067 } 1068 1069 rwflag = 1; 1070 vp = fp->f_vnode; 1071 1072 if (vp->v_type == VREG) { 1073 1074 if (bcount == 0) 1075 goto out; 1076 1077 /* 1078 * See comments in pwrite. 1079 */ 1080 if (fileoff > MAXOFFSET_T) { 1081 error = EINVAL; 1082 goto out; 1083 } 1084 if (fileoff >= curproc->p_fsz_ctl) { 1085 mutex_enter(&curproc->p_lock); 1086 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1087 curproc->p_rctls, curproc, RCA_SAFE); 1088 mutex_exit(&curproc->p_lock); 1089 error = EFBIG; 1090 goto out; 1091 } 1092 if (fileoff == MAXOFFSET_T) { 1093 error = EFBIG; 1094 goto out; 1095 } 1096 if (fileoff + bcount > MAXOFFSET_T) 1097 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1098 } else if (vp->v_type == VFIFO) { 1099 error = ESPIPE; 1100 goto out; 1101 } 1102 1103 /* 1104 * We have to enter the critical region before calling VOP_RWLOCK 1105 * to avoid a deadlock with ufs. 1106 */ 1107 if (nbl_need_check(vp)) { 1108 int svmand; 1109 1110 nbl_start_crit(vp, RW_READER); 1111 in_crit = 1; 1112 error = nbl_svmand(vp, fp->f_cred, &svmand); 1113 if (error != 0) 1114 goto out; 1115 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { 1116 error = EACCES; 1117 goto out; 1118 } 1119 } 1120 1121 aiov.iov_base = cbuf; 1122 aiov.iov_len = bcount; 1123 (void) VOP_RWLOCK(vp, rwflag, NULL); 1124 auio.uio_loffset = fileoff; 1125 auio.uio_iov = &aiov; 1126 auio.uio_iovcnt = 1; 1127 auio.uio_resid = bcount; 1128 auio.uio_segflg = UIO_USERSPACE; 1129 auio.uio_llimit = curproc->p_fsz_ctl; 1130 auio.uio_fmode = fflag; 1131 auio.uio_extflg = UIO_COPY_CACHED; 1132 1133 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1134 1135 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1136 bcount -= auio.uio_resid; 1137 CPU_STATS_ENTER_K(); 1138 cp = CPU; 1139 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1140 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1141 CPU_STATS_EXIT_K(); 1142 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1143 VOP_RWUNLOCK(vp, rwflag, NULL); 1144 1145 if (error == EINTR && bcount != 0) 1146 error = 0; 1147 out: 1148 if (in_crit) 1149 nbl_end_crit(vp); 1150 releasef(fdes); 1151 if (error) 1152 return (set_errno(error)); 1153 return (bcount); 1154 } 1155 1156 #endif /* _SYSCALL32_IMPL || _ILP32 */ 1157 1158 #ifdef _SYSCALL32_IMPL 1159 /* 1160 * Tail-call elimination of xxx32() down to xxx() 1161 * 1162 * A number of xxx32 system calls take a len (or count) argument and 1163 * return a number in the range [0,len] or -1 on error. 1164 * Given an ssize32_t input len, the downcall xxx() will return 1165 * a 64-bit value that is -1 or in the range [0,len] which actually 1166 * is a proper return value for the xxx32 call. So even if the xxx32 1167 * calls can be considered as returning a ssize32_t, they are currently 1168 * declared as returning a ssize_t as this enables tail-call elimination. 1169 * 1170 * The cast of len (or count) to ssize32_t is needed to ensure we pass 1171 * down negative input values as such and let the downcall handle error 1172 * reporting. Functions covered by this comments are: 1173 * 1174 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1175 * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1176 * readlink.c: readlink32. 1177 */ 1178 1179 ssize_t 1180 read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1181 { 1182 return (read(fdes, 1183 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1184 } 1185 1186 ssize_t 1187 write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1188 { 1189 return (write(fdes, 1190 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1191 } 1192 1193 ssize_t 1194 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1195 { 1196 return (pread(fdes, 1197 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1198 (off_t)(uint32_t)offset)); 1199 } 1200 1201 ssize_t 1202 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1203 { 1204 return (pwrite(fdes, 1205 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1206 (off_t)(uint32_t)offset)); 1207 } 1208 1209 ssize_t 1210 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1211 { 1212 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1213 } 1214 1215 ssize_t 1216 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1217 { 1218 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1219 } 1220 1221 #endif /* _SYSCALL32_IMPL */ 1222