1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Portions of this source code were derived from Berkeley 4.3 BSD 32 * under license from the Regents of the University of California. 33 */ 34 35 #pragma ident "%Z%%M% %I% %E% SMI" 36 37 #include <sys/param.h> 38 #include <sys/isa_defs.h> 39 #include <sys/types.h> 40 #include <sys/inttypes.h> 41 #include <sys/sysmacros.h> 42 #include <sys/cred.h> 43 #include <sys/user.h> 44 #include <sys/systm.h> 45 #include <sys/errno.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/proc.h> 49 #include <sys/cpuvar.h> 50 #include <sys/uio.h> 51 #include <sys/debug.h> 52 #include <sys/rctl.h> 53 #include <sys/nbmlock.h> 54 55 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */ 56 57 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */ 58 59 /* 60 * read, write, pread, pwrite, readv, and writev syscalls. 61 * 62 * 64-bit open: all open's are large file opens. 63 * Large Files: the behaviour of read depends on whether the fd 64 * corresponds to large open or not. 65 * 32-bit open: FOFFMAX flag not set. 66 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 67 * EOVERFLOW if count is non-zero and if size of file 68 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 69 * at >= MAXOFF32_T returns EOF. 70 */ 71 72 /* 73 * Native system call 74 */ 75 ssize_t 76 read(int fdes, void *cbuf, size_t count) 77 { 78 struct uio auio; 79 struct iovec aiov; 80 file_t *fp; 81 register vnode_t *vp; 82 struct cpu *cp; 83 int fflag, ioflag, rwflag; 84 ssize_t cnt, bcount; 85 int error = 0; 86 u_offset_t fileoff; 87 int in_crit = 0; 88 89 if ((cnt = (ssize_t)count) < 0) 90 return (set_errno(EINVAL)); 91 if ((fp = getf(fdes)) == NULL) 92 return (set_errno(EBADF)); 93 if (((fflag = fp->f_flag) & FREAD) == 0) { 94 error = EBADF; 95 goto out; 96 } 97 vp = fp->f_vnode; 98 99 if (vp->v_type == VREG && cnt == 0) { 100 goto out; 101 } 102 103 rwflag = 0; 104 aiov.iov_base = cbuf; 105 aiov.iov_len = cnt; 106 107 /* 108 * We have to enter the critical region before calling VOP_RWLOCK 109 * to avoid a deadlock with write() calls. 110 */ 111 if (nbl_need_check(vp)) { 112 int svmand; 113 114 nbl_start_crit(vp, RW_READER); 115 in_crit = 1; 116 error = nbl_svmand(vp, fp->f_cred, &svmand); 117 if (error != 0) 118 goto out; 119 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand, 120 NULL)) { 121 error = EACCES; 122 goto out; 123 } 124 } 125 126 (void) VOP_RWLOCK(vp, rwflag, NULL); 127 128 /* 129 * We do the following checks inside VOP_RWLOCK so as to 130 * prevent file size from changing while these checks are 131 * being done. Also, we load fp's offset to the local 132 * variable fileoff because we can have a parallel lseek 133 * going on (f_offset is not protected by any lock) which 134 * could change f_offset. We need to see the value only 135 * once here and take a decision. Seeing it more than once 136 * can lead to incorrect functionality. 137 */ 138 139 fileoff = (u_offset_t)fp->f_offset; 140 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 141 struct vattr va; 142 va.va_mask = AT_SIZE; 143 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 144 VOP_RWUNLOCK(vp, rwflag, NULL); 145 goto out; 146 } 147 if (fileoff >= va.va_size) { 148 cnt = 0; 149 VOP_RWUNLOCK(vp, rwflag, NULL); 150 goto out; 151 } else { 152 error = EOVERFLOW; 153 VOP_RWUNLOCK(vp, rwflag, NULL); 154 goto out; 155 } 156 } 157 if ((vp->v_type == VREG) && 158 (fileoff + cnt > OFFSET_MAX(fp))) { 159 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 160 } 161 auio.uio_loffset = fileoff; 162 auio.uio_iov = &aiov; 163 auio.uio_iovcnt = 1; 164 auio.uio_resid = bcount = cnt; 165 auio.uio_segflg = UIO_USERSPACE; 166 auio.uio_llimit = MAXOFFSET_T; 167 auio.uio_fmode = fflag; 168 /* 169 * Only use bypass caches when the count is large enough 170 */ 171 if (bcount <= copyout_max_cached) 172 auio.uio_extflg = UIO_COPY_CACHED; 173 else 174 auio.uio_extflg = UIO_COPY_DEFAULT; 175 176 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 177 178 /* If read sync is not asked for, filter sync flags */ 179 if ((ioflag & FRSYNC) == 0) 180 ioflag &= ~(FSYNC|FDSYNC); 181 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 182 cnt -= auio.uio_resid; 183 CPU_STATS_ENTER_K(); 184 cp = CPU; 185 CPU_STATS_ADDQ(cp, sys, sysread, 1); 186 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 187 CPU_STATS_EXIT_K(); 188 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 189 190 if (vp->v_type == VFIFO) /* Backward compatibility */ 191 fp->f_offset = cnt; 192 else if (((fp->f_flag & FAPPEND) == 0) || 193 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 194 fp->f_offset = auio.uio_loffset; 195 VOP_RWUNLOCK(vp, rwflag, NULL); 196 197 if (error == EINTR && cnt != 0) 198 error = 0; 199 out: 200 if (in_crit) 201 nbl_end_crit(vp); 202 releasef(fdes); 203 if (error) 204 return (set_errno(error)); 205 return (cnt); 206 } 207 208 /* 209 * Native system call 210 */ 211 ssize_t 212 write(int fdes, void *cbuf, size_t count) 213 { 214 struct uio auio; 215 struct iovec aiov; 216 file_t *fp; 217 register vnode_t *vp; 218 struct cpu *cp; 219 int fflag, ioflag, rwflag; 220 ssize_t cnt, bcount; 221 int error = 0; 222 u_offset_t fileoff; 223 int in_crit = 0; 224 225 if ((cnt = (ssize_t)count) < 0) 226 return (set_errno(EINVAL)); 227 if ((fp = getf(fdes)) == NULL) 228 return (set_errno(EBADF)); 229 if (((fflag = fp->f_flag) & FWRITE) == 0) { 230 error = EBADF; 231 goto out; 232 } 233 vp = fp->f_vnode; 234 235 if (vp->v_type == VREG && cnt == 0) { 236 goto out; 237 } 238 239 rwflag = 1; 240 aiov.iov_base = cbuf; 241 aiov.iov_len = cnt; 242 243 /* 244 * We have to enter the critical region before calling VOP_RWLOCK 245 * to avoid a deadlock with ufs. 246 */ 247 if (nbl_need_check(vp)) { 248 int svmand; 249 250 nbl_start_crit(vp, RW_READER); 251 in_crit = 1; 252 error = nbl_svmand(vp, fp->f_cred, &svmand); 253 if (error != 0) 254 goto out; 255 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand, 256 NULL)) { 257 error = EACCES; 258 goto out; 259 } 260 } 261 262 (void) VOP_RWLOCK(vp, rwflag, NULL); 263 264 fileoff = fp->f_offset; 265 if (vp->v_type == VREG) { 266 267 /* 268 * We raise psignal if write for >0 bytes causes 269 * it to exceed the ulimit. 270 */ 271 if (fileoff >= curproc->p_fsz_ctl) { 272 VOP_RWUNLOCK(vp, rwflag, NULL); 273 274 mutex_enter(&curproc->p_lock); 275 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 276 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 277 mutex_exit(&curproc->p_lock); 278 279 error = EFBIG; 280 goto out; 281 } 282 /* 283 * We return EFBIG if write is done at an offset 284 * greater than the offset maximum for this file structure. 285 */ 286 287 if (fileoff >= OFFSET_MAX(fp)) { 288 VOP_RWUNLOCK(vp, rwflag, NULL); 289 error = EFBIG; 290 goto out; 291 } 292 /* 293 * Limit the bytes to be written upto offset maximum for 294 * this open file structure. 295 */ 296 if (fileoff + cnt > OFFSET_MAX(fp)) 297 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 298 } 299 auio.uio_loffset = fileoff; 300 auio.uio_iov = &aiov; 301 auio.uio_iovcnt = 1; 302 auio.uio_resid = bcount = cnt; 303 auio.uio_segflg = UIO_USERSPACE; 304 auio.uio_llimit = curproc->p_fsz_ctl; 305 auio.uio_fmode = fflag; 306 auio.uio_extflg = UIO_COPY_DEFAULT; 307 308 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 309 310 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 311 cnt -= auio.uio_resid; 312 CPU_STATS_ENTER_K(); 313 cp = CPU; 314 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 315 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 316 CPU_STATS_EXIT_K(); 317 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 318 319 if (vp->v_type == VFIFO) /* Backward compatibility */ 320 fp->f_offset = cnt; 321 else if (((fp->f_flag & FAPPEND) == 0) || 322 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 323 fp->f_offset = auio.uio_loffset; 324 VOP_RWUNLOCK(vp, rwflag, NULL); 325 326 if (error == EINTR && cnt != 0) 327 error = 0; 328 out: 329 if (in_crit) 330 nbl_end_crit(vp); 331 releasef(fdes); 332 if (error) 333 return (set_errno(error)); 334 return (cnt); 335 } 336 337 ssize_t 338 pread(int fdes, void *cbuf, size_t count, off_t offset) 339 { 340 struct uio auio; 341 struct iovec aiov; 342 file_t *fp; 343 register vnode_t *vp; 344 struct cpu *cp; 345 int fflag, ioflag, rwflag; 346 ssize_t bcount; 347 int error = 0; 348 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 349 #ifdef _SYSCALL32_IMPL 350 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 351 MAXOFF32_T : MAXOFFSET_T; 352 #else 353 const u_offset_t maxoff = MAXOFF32_T; 354 #endif 355 int in_crit = 0; 356 357 if ((bcount = (ssize_t)count) < 0) 358 return (set_errno(EINVAL)); 359 360 if ((fp = getf(fdes)) == NULL) 361 return (set_errno(EBADF)); 362 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 363 error = EBADF; 364 goto out; 365 } 366 367 rwflag = 0; 368 vp = fp->f_vnode; 369 370 if (vp->v_type == VREG) { 371 372 if (bcount == 0) 373 goto out; 374 375 /* 376 * Return EINVAL if an invalid offset comes to pread. 377 * Negative offset from user will cause this error. 378 */ 379 380 if (fileoff > maxoff) { 381 error = EINVAL; 382 goto out; 383 } 384 /* 385 * Limit offset such that we don't read or write 386 * a file beyond the maximum offset representable in 387 * an off_t structure. 388 */ 389 if (fileoff + bcount > maxoff) 390 bcount = (ssize_t)((offset_t)maxoff - fileoff); 391 } else if (vp->v_type == VFIFO) { 392 error = ESPIPE; 393 goto out; 394 } 395 396 /* 397 * We have to enter the critical region before calling VOP_RWLOCK 398 * to avoid a deadlock with ufs. 399 */ 400 if (nbl_need_check(vp)) { 401 int svmand; 402 403 nbl_start_crit(vp, RW_READER); 404 in_crit = 1; 405 error = nbl_svmand(vp, fp->f_cred, &svmand); 406 if (error != 0) 407 goto out; 408 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 409 NULL)) { 410 error = EACCES; 411 goto out; 412 } 413 } 414 415 aiov.iov_base = cbuf; 416 aiov.iov_len = bcount; 417 (void) VOP_RWLOCK(vp, rwflag, NULL); 418 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 419 struct vattr va; 420 va.va_mask = AT_SIZE; 421 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 422 VOP_RWUNLOCK(vp, rwflag, NULL); 423 goto out; 424 } 425 VOP_RWUNLOCK(vp, rwflag, NULL); 426 427 /* 428 * We have to return EOF if fileoff is >= file size. 429 */ 430 if (fileoff >= va.va_size) { 431 bcount = 0; 432 goto out; 433 } 434 435 /* 436 * File is greater than or equal to maxoff and therefore 437 * we return EOVERFLOW. 438 */ 439 error = EOVERFLOW; 440 goto out; 441 } 442 auio.uio_loffset = fileoff; 443 auio.uio_iov = &aiov; 444 auio.uio_iovcnt = 1; 445 auio.uio_resid = bcount; 446 auio.uio_segflg = UIO_USERSPACE; 447 auio.uio_llimit = MAXOFFSET_T; 448 auio.uio_fmode = fflag; 449 auio.uio_extflg = UIO_COPY_CACHED; 450 451 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 452 453 /* If read sync is not asked for, filter sync flags */ 454 if ((ioflag & FRSYNC) == 0) 455 ioflag &= ~(FSYNC|FDSYNC); 456 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 457 bcount -= auio.uio_resid; 458 CPU_STATS_ENTER_K(); 459 cp = CPU; 460 CPU_STATS_ADDQ(cp, sys, sysread, 1); 461 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 462 CPU_STATS_EXIT_K(); 463 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 464 VOP_RWUNLOCK(vp, rwflag, NULL); 465 466 if (error == EINTR && bcount != 0) 467 error = 0; 468 out: 469 if (in_crit) 470 nbl_end_crit(vp); 471 releasef(fdes); 472 if (error) 473 return (set_errno(error)); 474 return (bcount); 475 } 476 477 ssize_t 478 pwrite(int fdes, void *cbuf, size_t count, off_t offset) 479 { 480 struct uio auio; 481 struct iovec aiov; 482 file_t *fp; 483 register vnode_t *vp; 484 struct cpu *cp; 485 int fflag, ioflag, rwflag; 486 ssize_t bcount; 487 int error = 0; 488 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 489 #ifdef _SYSCALL32_IMPL 490 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 491 MAXOFF32_T : MAXOFFSET_T; 492 #else 493 const u_offset_t maxoff = MAXOFF32_T; 494 #endif 495 int in_crit = 0; 496 497 if ((bcount = (ssize_t)count) < 0) 498 return (set_errno(EINVAL)); 499 if ((fp = getf(fdes)) == NULL) 500 return (set_errno(EBADF)); 501 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 502 error = EBADF; 503 goto out; 504 } 505 506 rwflag = 1; 507 vp = fp->f_vnode; 508 509 if (vp->v_type == VREG) { 510 511 if (bcount == 0) 512 goto out; 513 514 /* 515 * return EINVAL for offsets that cannot be 516 * represented in an off_t. 517 */ 518 if (fileoff > maxoff) { 519 error = EINVAL; 520 goto out; 521 } 522 /* 523 * Take appropriate action if we are trying to write above the 524 * resource limit. 525 */ 526 if (fileoff >= curproc->p_fsz_ctl) { 527 mutex_enter(&curproc->p_lock); 528 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 529 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 530 mutex_exit(&curproc->p_lock); 531 532 error = EFBIG; 533 goto out; 534 } 535 /* 536 * Don't allow pwrite to cause file sizes to exceed 537 * maxoff. 538 */ 539 if (fileoff == maxoff) { 540 error = EFBIG; 541 goto out; 542 } 543 if (fileoff + count > maxoff) 544 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 545 } else if (vp->v_type == VFIFO) { 546 error = ESPIPE; 547 goto out; 548 } 549 550 /* 551 * We have to enter the critical region before calling VOP_RWLOCK 552 * to avoid a deadlock with ufs. 553 */ 554 if (nbl_need_check(vp)) { 555 int svmand; 556 557 nbl_start_crit(vp, RW_READER); 558 in_crit = 1; 559 error = nbl_svmand(vp, fp->f_cred, &svmand); 560 if (error != 0) 561 goto out; 562 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 563 NULL)) { 564 error = EACCES; 565 goto out; 566 } 567 } 568 569 aiov.iov_base = cbuf; 570 aiov.iov_len = bcount; 571 (void) VOP_RWLOCK(vp, rwflag, NULL); 572 auio.uio_loffset = fileoff; 573 auio.uio_iov = &aiov; 574 auio.uio_iovcnt = 1; 575 auio.uio_resid = bcount; 576 auio.uio_segflg = UIO_USERSPACE; 577 auio.uio_llimit = curproc->p_fsz_ctl; 578 auio.uio_fmode = fflag; 579 auio.uio_extflg = UIO_COPY_CACHED; 580 581 /* 582 * The SUSv4 POSIX specification states: 583 * The pwrite() function shall be equivalent to write(), except 584 * that it writes into a given position and does not change 585 * the file offset (regardless of whether O_APPEND is set). 586 * To make this be true, we omit the FAPPEND flag from ioflag. 587 */ 588 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 589 590 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 591 bcount -= auio.uio_resid; 592 CPU_STATS_ENTER_K(); 593 cp = CPU; 594 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 595 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 596 CPU_STATS_EXIT_K(); 597 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 598 VOP_RWUNLOCK(vp, rwflag, NULL); 599 600 if (error == EINTR && bcount != 0) 601 error = 0; 602 out: 603 if (in_crit) 604 nbl_end_crit(vp); 605 releasef(fdes); 606 if (error) 607 return (set_errno(error)); 608 return (bcount); 609 } 610 611 /* 612 * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... 613 * XXX -- However, SVVS expects readv() and writev() to fail if 614 * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), 615 * XXX -- so I guess that's the "interface". 616 */ 617 #define DEF_IOV_MAX 16 618 619 ssize_t 620 readv(int fdes, struct iovec *iovp, int iovcnt) 621 { 622 struct uio auio; 623 struct iovec aiov[DEF_IOV_MAX]; 624 file_t *fp; 625 register vnode_t *vp; 626 struct cpu *cp; 627 int fflag, ioflag, rwflag; 628 ssize_t count, bcount; 629 int error = 0; 630 int i; 631 u_offset_t fileoff; 632 int in_crit = 0; 633 634 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 635 return (set_errno(EINVAL)); 636 637 #ifdef _SYSCALL32_IMPL 638 /* 639 * 32-bit callers need to have their iovec expanded, 640 * while ensuring that they can't move more than 2Gbytes 641 * of data in a single call. 642 */ 643 if (get_udatamodel() == DATAMODEL_ILP32) { 644 struct iovec32 aiov32[DEF_IOV_MAX]; 645 ssize32_t count32; 646 647 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 648 return (set_errno(EFAULT)); 649 650 count32 = 0; 651 for (i = 0; i < iovcnt; i++) { 652 ssize32_t iovlen32 = aiov32[i].iov_len; 653 count32 += iovlen32; 654 if (iovlen32 < 0 || count32 < 0) 655 return (set_errno(EINVAL)); 656 aiov[i].iov_len = iovlen32; 657 aiov[i].iov_base = 658 (caddr_t)(uintptr_t)aiov32[i].iov_base; 659 } 660 } else 661 #endif 662 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 663 return (set_errno(EFAULT)); 664 665 count = 0; 666 for (i = 0; i < iovcnt; i++) { 667 ssize_t iovlen = aiov[i].iov_len; 668 count += iovlen; 669 if (iovlen < 0 || count < 0) 670 return (set_errno(EINVAL)); 671 } 672 if ((fp = getf(fdes)) == NULL) 673 return (set_errno(EBADF)); 674 if (((fflag = fp->f_flag) & FREAD) == 0) { 675 error = EBADF; 676 goto out; 677 } 678 vp = fp->f_vnode; 679 if (vp->v_type == VREG && count == 0) { 680 goto out; 681 } 682 683 rwflag = 0; 684 685 /* 686 * We have to enter the critical region before calling VOP_RWLOCK 687 * to avoid a deadlock with ufs. 688 */ 689 if (nbl_need_check(vp)) { 690 int svmand; 691 692 nbl_start_crit(vp, RW_READER); 693 in_crit = 1; 694 error = nbl_svmand(vp, fp->f_cred, &svmand); 695 if (error != 0) 696 goto out; 697 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand, 698 NULL)) { 699 error = EACCES; 700 goto out; 701 } 702 } 703 704 (void) VOP_RWLOCK(vp, rwflag, NULL); 705 fileoff = fp->f_offset; 706 707 /* 708 * Behaviour is same as read. Please see comments in read. 709 */ 710 711 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 712 struct vattr va; 713 va.va_mask = AT_SIZE; 714 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 715 VOP_RWUNLOCK(vp, rwflag, NULL); 716 goto out; 717 } 718 if (fileoff >= va.va_size) { 719 VOP_RWUNLOCK(vp, rwflag, NULL); 720 count = 0; 721 goto out; 722 } else { 723 VOP_RWUNLOCK(vp, rwflag, NULL); 724 error = EOVERFLOW; 725 goto out; 726 } 727 } 728 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 729 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 730 } 731 auio.uio_loffset = fileoff; 732 auio.uio_iov = aiov; 733 auio.uio_iovcnt = iovcnt; 734 auio.uio_resid = bcount = count; 735 auio.uio_segflg = UIO_USERSPACE; 736 auio.uio_llimit = MAXOFFSET_T; 737 auio.uio_fmode = fflag; 738 if (bcount <= copyout_max_cached) 739 auio.uio_extflg = UIO_COPY_CACHED; 740 else 741 auio.uio_extflg = UIO_COPY_DEFAULT; 742 743 744 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 745 746 /* If read sync is not asked for, filter sync flags */ 747 if ((ioflag & FRSYNC) == 0) 748 ioflag &= ~(FSYNC|FDSYNC); 749 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 750 count -= auio.uio_resid; 751 CPU_STATS_ENTER_K(); 752 cp = CPU; 753 CPU_STATS_ADDQ(cp, sys, sysread, 1); 754 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 755 CPU_STATS_EXIT_K(); 756 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 757 758 if (vp->v_type == VFIFO) /* Backward compatibility */ 759 fp->f_offset = count; 760 else if (((fp->f_flag & FAPPEND) == 0) || 761 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 762 fp->f_offset = auio.uio_loffset; 763 764 VOP_RWUNLOCK(vp, rwflag, NULL); 765 766 if (error == EINTR && count != 0) 767 error = 0; 768 out: 769 if (in_crit) 770 nbl_end_crit(vp); 771 releasef(fdes); 772 if (error) 773 return (set_errno(error)); 774 return (count); 775 } 776 777 ssize_t 778 writev(int fdes, struct iovec *iovp, int iovcnt) 779 { 780 struct uio auio; 781 struct iovec aiov[DEF_IOV_MAX]; 782 file_t *fp; 783 register vnode_t *vp; 784 struct cpu *cp; 785 int fflag, ioflag, rwflag; 786 ssize_t count, bcount; 787 int error = 0; 788 int i; 789 u_offset_t fileoff; 790 int in_crit = 0; 791 792 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 793 return (set_errno(EINVAL)); 794 795 #ifdef _SYSCALL32_IMPL 796 /* 797 * 32-bit callers need to have their iovec expanded, 798 * while ensuring that they can't move more than 2Gbytes 799 * of data in a single call. 800 */ 801 if (get_udatamodel() == DATAMODEL_ILP32) { 802 struct iovec32 aiov32[DEF_IOV_MAX]; 803 ssize32_t count32; 804 805 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 806 return (set_errno(EFAULT)); 807 808 count32 = 0; 809 for (i = 0; i < iovcnt; i++) { 810 ssize32_t iovlen = aiov32[i].iov_len; 811 count32 += iovlen; 812 if (iovlen < 0 || count32 < 0) 813 return (set_errno(EINVAL)); 814 aiov[i].iov_len = iovlen; 815 aiov[i].iov_base = 816 (caddr_t)(uintptr_t)aiov32[i].iov_base; 817 } 818 } else 819 #endif 820 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 821 return (set_errno(EFAULT)); 822 823 count = 0; 824 for (i = 0; i < iovcnt; i++) { 825 ssize_t iovlen = aiov[i].iov_len; 826 count += iovlen; 827 if (iovlen < 0 || count < 0) 828 return (set_errno(EINVAL)); 829 } 830 if ((fp = getf(fdes)) == NULL) 831 return (set_errno(EBADF)); 832 if (((fflag = fp->f_flag) & FWRITE) == 0) { 833 error = EBADF; 834 goto out; 835 } 836 vp = fp->f_vnode; 837 if (vp->v_type == VREG && count == 0) { 838 goto out; 839 } 840 841 rwflag = 1; 842 843 /* 844 * We have to enter the critical region before calling VOP_RWLOCK 845 * to avoid a deadlock with ufs. 846 */ 847 if (nbl_need_check(vp)) { 848 int svmand; 849 850 nbl_start_crit(vp, RW_READER); 851 in_crit = 1; 852 error = nbl_svmand(vp, fp->f_cred, &svmand); 853 if (error != 0) 854 goto out; 855 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand, 856 NULL)) { 857 error = EACCES; 858 goto out; 859 } 860 } 861 862 (void) VOP_RWLOCK(vp, rwflag, NULL); 863 864 fileoff = fp->f_offset; 865 866 /* 867 * Behaviour is same as write. Please see comments for write. 868 */ 869 870 if (vp->v_type == VREG) { 871 if (fileoff >= curproc->p_fsz_ctl) { 872 VOP_RWUNLOCK(vp, rwflag, NULL); 873 mutex_enter(&curproc->p_lock); 874 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 875 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 876 mutex_exit(&curproc->p_lock); 877 error = EFBIG; 878 goto out; 879 } 880 if (fileoff >= OFFSET_MAX(fp)) { 881 VOP_RWUNLOCK(vp, rwflag, NULL); 882 error = EFBIG; 883 goto out; 884 } 885 if (fileoff + count > OFFSET_MAX(fp)) 886 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 887 } 888 auio.uio_loffset = fileoff; 889 auio.uio_iov = aiov; 890 auio.uio_iovcnt = iovcnt; 891 auio.uio_resid = bcount = count; 892 auio.uio_segflg = UIO_USERSPACE; 893 auio.uio_llimit = curproc->p_fsz_ctl; 894 auio.uio_fmode = fflag; 895 auio.uio_extflg = UIO_COPY_DEFAULT; 896 897 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 898 899 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 900 count -= auio.uio_resid; 901 CPU_STATS_ENTER_K(); 902 cp = CPU; 903 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 904 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 905 CPU_STATS_EXIT_K(); 906 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 907 908 if (vp->v_type == VFIFO) /* Backward compatibility */ 909 fp->f_offset = count; 910 else if (((fp->f_flag & FAPPEND) == 0) || 911 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 912 fp->f_offset = auio.uio_loffset; 913 VOP_RWUNLOCK(vp, rwflag, NULL); 914 915 if (error == EINTR && count != 0) 916 error = 0; 917 out: 918 if (in_crit) 919 nbl_end_crit(vp); 920 releasef(fdes); 921 if (error) 922 return (set_errno(error)); 923 return (count); 924 } 925 926 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 927 928 /* 929 * This syscall supplies 64-bit file offsets to 32-bit applications only. 930 */ 931 ssize32_t 932 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 933 uint32_t offset_2) 934 { 935 struct uio auio; 936 struct iovec aiov; 937 file_t *fp; 938 register vnode_t *vp; 939 struct cpu *cp; 940 int fflag, ioflag, rwflag; 941 ssize_t bcount; 942 int error = 0; 943 u_offset_t fileoff; 944 int in_crit = 0; 945 946 #if defined(_LITTLE_ENDIAN) 947 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 948 #else 949 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 950 #endif 951 952 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 953 return (set_errno(EINVAL)); 954 955 if ((fp = getf(fdes)) == NULL) 956 return (set_errno(EBADF)); 957 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 958 error = EBADF; 959 goto out; 960 } 961 962 rwflag = 0; 963 vp = fp->f_vnode; 964 965 if (vp->v_type == VREG) { 966 967 if (bcount == 0) 968 goto out; 969 970 /* 971 * Same as pread. See comments in pread. 972 */ 973 974 if (fileoff > MAXOFFSET_T) { 975 error = EINVAL; 976 goto out; 977 } 978 if (fileoff + bcount > MAXOFFSET_T) 979 bcount = (ssize_t)(MAXOFFSET_T - fileoff); 980 } else if (vp->v_type == VFIFO) { 981 error = ESPIPE; 982 goto out; 983 } 984 985 /* 986 * We have to enter the critical region before calling VOP_RWLOCK 987 * to avoid a deadlock with ufs. 988 */ 989 if (nbl_need_check(vp)) { 990 int svmand; 991 992 nbl_start_crit(vp, RW_READER); 993 in_crit = 1; 994 error = nbl_svmand(vp, fp->f_cred, &svmand); 995 if (error != 0) 996 goto out; 997 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 998 NULL)) { 999 error = EACCES; 1000 goto out; 1001 } 1002 } 1003 1004 aiov.iov_base = cbuf; 1005 aiov.iov_len = bcount; 1006 (void) VOP_RWLOCK(vp, rwflag, NULL); 1007 auio.uio_loffset = fileoff; 1008 1009 /* 1010 * Note: File size can never be greater than MAXOFFSET_T. 1011 * If ever we start supporting 128 bit files the code 1012 * similar to the one in pread at this place should be here. 1013 * Here we avoid the unnecessary VOP_GETATTR() when we 1014 * know that fileoff == MAXOFFSET_T implies that it is always 1015 * greater than or equal to file size. 1016 */ 1017 auio.uio_iov = &aiov; 1018 auio.uio_iovcnt = 1; 1019 auio.uio_resid = bcount; 1020 auio.uio_segflg = UIO_USERSPACE; 1021 auio.uio_llimit = MAXOFFSET_T; 1022 auio.uio_fmode = fflag; 1023 auio.uio_extflg = UIO_COPY_CACHED; 1024 1025 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1026 1027 /* If read sync is not asked for, filter sync flags */ 1028 if ((ioflag & FRSYNC) == 0) 1029 ioflag &= ~(FSYNC|FDSYNC); 1030 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1031 bcount -= auio.uio_resid; 1032 CPU_STATS_ENTER_K(); 1033 cp = CPU; 1034 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1035 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1036 CPU_STATS_EXIT_K(); 1037 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1038 VOP_RWUNLOCK(vp, rwflag, NULL); 1039 1040 if (error == EINTR && bcount != 0) 1041 error = 0; 1042 out: 1043 if (in_crit) 1044 nbl_end_crit(vp); 1045 releasef(fdes); 1046 if (error) 1047 return (set_errno(error)); 1048 return (bcount); 1049 } 1050 1051 /* 1052 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1053 */ 1054 ssize32_t 1055 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1056 uint32_t offset_2) 1057 { 1058 struct uio auio; 1059 struct iovec aiov; 1060 file_t *fp; 1061 register vnode_t *vp; 1062 struct cpu *cp; 1063 int fflag, ioflag, rwflag; 1064 ssize_t bcount; 1065 int error = 0; 1066 u_offset_t fileoff; 1067 int in_crit = 0; 1068 1069 #if defined(_LITTLE_ENDIAN) 1070 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1071 #else 1072 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1073 #endif 1074 1075 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1076 return (set_errno(EINVAL)); 1077 if ((fp = getf(fdes)) == NULL) 1078 return (set_errno(EBADF)); 1079 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1080 error = EBADF; 1081 goto out; 1082 } 1083 1084 rwflag = 1; 1085 vp = fp->f_vnode; 1086 1087 if (vp->v_type == VREG) { 1088 1089 if (bcount == 0) 1090 goto out; 1091 1092 /* 1093 * See comments in pwrite. 1094 */ 1095 if (fileoff > MAXOFFSET_T) { 1096 error = EINVAL; 1097 goto out; 1098 } 1099 if (fileoff >= curproc->p_fsz_ctl) { 1100 mutex_enter(&curproc->p_lock); 1101 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1102 curproc->p_rctls, curproc, RCA_SAFE); 1103 mutex_exit(&curproc->p_lock); 1104 error = EFBIG; 1105 goto out; 1106 } 1107 if (fileoff == MAXOFFSET_T) { 1108 error = EFBIG; 1109 goto out; 1110 } 1111 if (fileoff + bcount > MAXOFFSET_T) 1112 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1113 } else if (vp->v_type == VFIFO) { 1114 error = ESPIPE; 1115 goto out; 1116 } 1117 1118 /* 1119 * We have to enter the critical region before calling VOP_RWLOCK 1120 * to avoid a deadlock with ufs. 1121 */ 1122 if (nbl_need_check(vp)) { 1123 int svmand; 1124 1125 nbl_start_crit(vp, RW_READER); 1126 in_crit = 1; 1127 error = nbl_svmand(vp, fp->f_cred, &svmand); 1128 if (error != 0) 1129 goto out; 1130 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 1131 NULL)) { 1132 error = EACCES; 1133 goto out; 1134 } 1135 } 1136 1137 aiov.iov_base = cbuf; 1138 aiov.iov_len = bcount; 1139 (void) VOP_RWLOCK(vp, rwflag, NULL); 1140 auio.uio_loffset = fileoff; 1141 auio.uio_iov = &aiov; 1142 auio.uio_iovcnt = 1; 1143 auio.uio_resid = bcount; 1144 auio.uio_segflg = UIO_USERSPACE; 1145 auio.uio_llimit = curproc->p_fsz_ctl; 1146 auio.uio_fmode = fflag; 1147 auio.uio_extflg = UIO_COPY_CACHED; 1148 1149 /* 1150 * The SUSv4 POSIX specification states: 1151 * The pwrite() function shall be equivalent to write(), except 1152 * that it writes into a given position and does not change 1153 * the file offset (regardless of whether O_APPEND is set). 1154 * To make this be true, we omit the FAPPEND flag from ioflag. 1155 */ 1156 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 1157 1158 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1159 bcount -= auio.uio_resid; 1160 CPU_STATS_ENTER_K(); 1161 cp = CPU; 1162 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1163 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1164 CPU_STATS_EXIT_K(); 1165 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1166 VOP_RWUNLOCK(vp, rwflag, NULL); 1167 1168 if (error == EINTR && bcount != 0) 1169 error = 0; 1170 out: 1171 if (in_crit) 1172 nbl_end_crit(vp); 1173 releasef(fdes); 1174 if (error) 1175 return (set_errno(error)); 1176 return (bcount); 1177 } 1178 1179 #endif /* _SYSCALL32_IMPL || _ILP32 */ 1180 1181 #ifdef _SYSCALL32_IMPL 1182 /* 1183 * Tail-call elimination of xxx32() down to xxx() 1184 * 1185 * A number of xxx32 system calls take a len (or count) argument and 1186 * return a number in the range [0,len] or -1 on error. 1187 * Given an ssize32_t input len, the downcall xxx() will return 1188 * a 64-bit value that is -1 or in the range [0,len] which actually 1189 * is a proper return value for the xxx32 call. So even if the xxx32 1190 * calls can be considered as returning a ssize32_t, they are currently 1191 * declared as returning a ssize_t as this enables tail-call elimination. 1192 * 1193 * The cast of len (or count) to ssize32_t is needed to ensure we pass 1194 * down negative input values as such and let the downcall handle error 1195 * reporting. Functions covered by this comments are: 1196 * 1197 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1198 * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1199 * readlink.c: readlink32. 1200 */ 1201 1202 ssize_t 1203 read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1204 { 1205 return (read(fdes, 1206 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1207 } 1208 1209 ssize_t 1210 write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1211 { 1212 return (write(fdes, 1213 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1214 } 1215 1216 ssize_t 1217 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1218 { 1219 return (pread(fdes, 1220 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1221 (off_t)(uint32_t)offset)); 1222 } 1223 1224 ssize_t 1225 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1226 { 1227 return (pwrite(fdes, 1228 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1229 (off_t)(uint32_t)offset)); 1230 } 1231 1232 ssize_t 1233 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1234 { 1235 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1236 } 1237 1238 ssize_t 1239 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1240 { 1241 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1242 } 1243 1244 #endif /* _SYSCALL32_IMPL */ 1245