1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2015, Joyent, Inc. All rights reserved. 26 */ 27 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * Portions of this source code were derived from Berkeley 4.3 BSD 33 * under license from the Regents of the University of California. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/isa_defs.h> 38 #include <sys/types.h> 39 #include <sys/inttypes.h> 40 #include <sys/sysmacros.h> 41 #include <sys/cred.h> 42 #include <sys/user.h> 43 #include <sys/systm.h> 44 #include <sys/errno.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/cpuvar.h> 49 #include <sys/uio.h> 50 #include <sys/debug.h> 51 #include <sys/rctl.h> 52 #include <sys/nbmlock.h> 53 #include <sys/limits.h> 54 55 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */ 56 57 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */ 58 59 /* 60 * read, write, pread, pwrite, readv, and writev syscalls. 61 * 62 * 64-bit open: all open's are large file opens. 63 * Large Files: the behaviour of read depends on whether the fd 64 * corresponds to large open or not. 65 * 32-bit open: FOFFMAX flag not set. 66 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 67 * EOVERFLOW if count is non-zero and if size of file 68 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 69 * at >= MAXOFF32_T returns EOF. 70 */ 71 72 /* 73 * Native system call 74 */ 75 ssize_t 76 read(int fdes, void *cbuf, size_t count) 77 { 78 struct uio auio; 79 struct iovec aiov; 80 file_t *fp; 81 register vnode_t *vp; 82 struct cpu *cp; 83 int fflag, ioflag, rwflag; 84 ssize_t cnt, bcount; 85 int error = 0; 86 u_offset_t fileoff; 87 int in_crit = 0; 88 89 if ((cnt = (ssize_t)count) < 0) 90 return (set_errno(EINVAL)); 91 if ((fp = getf(fdes)) == NULL) 92 return (set_errno(EBADF)); 93 if (((fflag = fp->f_flag) & FREAD) == 0) { 94 error = EBADF; 95 goto out; 96 } 97 vp = fp->f_vnode; 98 99 if (vp->v_type == VREG && cnt == 0) { 100 goto out; 101 } 102 103 rwflag = 0; 104 aiov.iov_base = cbuf; 105 aiov.iov_len = cnt; 106 107 /* 108 * We have to enter the critical region before calling VOP_RWLOCK 109 * to avoid a deadlock with write() calls. 110 */ 111 if (nbl_need_check(vp)) { 112 int svmand; 113 114 nbl_start_crit(vp, RW_READER); 115 in_crit = 1; 116 error = nbl_svmand(vp, fp->f_cred, &svmand); 117 if (error != 0) 118 goto out; 119 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand, 120 NULL)) { 121 error = EACCES; 122 goto out; 123 } 124 } 125 126 (void) VOP_RWLOCK(vp, rwflag, NULL); 127 128 /* 129 * We do the following checks inside VOP_RWLOCK so as to 130 * prevent file size from changing while these checks are 131 * being done. Also, we load fp's offset to the local 132 * variable fileoff because we can have a parallel lseek 133 * going on (f_offset is not protected by any lock) which 134 * could change f_offset. We need to see the value only 135 * once here and take a decision. Seeing it more than once 136 * can lead to incorrect functionality. 137 */ 138 139 fileoff = (u_offset_t)fp->f_offset; 140 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 141 struct vattr va; 142 va.va_mask = AT_SIZE; 143 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 144 VOP_RWUNLOCK(vp, rwflag, NULL); 145 goto out; 146 } 147 if (fileoff >= va.va_size) { 148 cnt = 0; 149 VOP_RWUNLOCK(vp, rwflag, NULL); 150 goto out; 151 } else { 152 error = EOVERFLOW; 153 VOP_RWUNLOCK(vp, rwflag, NULL); 154 goto out; 155 } 156 } 157 if ((vp->v_type == VREG) && 158 (fileoff + cnt > OFFSET_MAX(fp))) { 159 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 160 } 161 auio.uio_loffset = fileoff; 162 auio.uio_iov = &aiov; 163 auio.uio_iovcnt = 1; 164 auio.uio_resid = bcount = cnt; 165 auio.uio_segflg = UIO_USERSPACE; 166 auio.uio_llimit = MAXOFFSET_T; 167 auio.uio_fmode = fflag; 168 /* 169 * Only use bypass caches when the count is large enough 170 */ 171 if (bcount <= copyout_max_cached) 172 auio.uio_extflg = UIO_COPY_CACHED; 173 else 174 auio.uio_extflg = UIO_COPY_DEFAULT; 175 176 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 177 178 /* If read sync is not asked for, filter sync flags */ 179 if ((ioflag & FRSYNC) == 0) 180 ioflag &= ~(FSYNC|FDSYNC); 181 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 182 cnt -= auio.uio_resid; 183 CPU_STATS_ENTER_K(); 184 cp = CPU; 185 CPU_STATS_ADDQ(cp, sys, sysread, 1); 186 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 187 CPU_STATS_EXIT_K(); 188 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 189 190 if (vp->v_type == VFIFO) /* Backward compatibility */ 191 fp->f_offset = cnt; 192 else if (((fp->f_flag & FAPPEND) == 0) || 193 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 194 fp->f_offset = auio.uio_loffset; 195 VOP_RWUNLOCK(vp, rwflag, NULL); 196 197 if (error == EINTR && cnt != 0) 198 error = 0; 199 out: 200 if (in_crit) 201 nbl_end_crit(vp); 202 releasef(fdes); 203 if (error) 204 return (set_errno(error)); 205 return (cnt); 206 } 207 208 /* 209 * Native system call 210 */ 211 ssize_t 212 write(int fdes, void *cbuf, size_t count) 213 { 214 struct uio auio; 215 struct iovec aiov; 216 file_t *fp; 217 register vnode_t *vp; 218 struct cpu *cp; 219 int fflag, ioflag, rwflag; 220 ssize_t cnt, bcount; 221 int error = 0; 222 u_offset_t fileoff; 223 int in_crit = 0; 224 225 if ((cnt = (ssize_t)count) < 0) 226 return (set_errno(EINVAL)); 227 if ((fp = getf(fdes)) == NULL) 228 return (set_errno(EBADF)); 229 if (((fflag = fp->f_flag) & FWRITE) == 0) { 230 error = EBADF; 231 goto out; 232 } 233 vp = fp->f_vnode; 234 235 if (vp->v_type == VREG && cnt == 0) { 236 goto out; 237 } 238 239 rwflag = 1; 240 aiov.iov_base = cbuf; 241 aiov.iov_len = cnt; 242 243 /* 244 * We have to enter the critical region before calling VOP_RWLOCK 245 * to avoid a deadlock with ufs. 246 */ 247 if (nbl_need_check(vp)) { 248 int svmand; 249 250 nbl_start_crit(vp, RW_READER); 251 in_crit = 1; 252 error = nbl_svmand(vp, fp->f_cred, &svmand); 253 if (error != 0) 254 goto out; 255 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand, 256 NULL)) { 257 error = EACCES; 258 goto out; 259 } 260 } 261 262 (void) VOP_RWLOCK(vp, rwflag, NULL); 263 264 fileoff = fp->f_offset; 265 if (vp->v_type == VREG) { 266 267 /* 268 * We raise psignal if write for >0 bytes causes 269 * it to exceed the ulimit. 270 */ 271 if (fileoff >= curproc->p_fsz_ctl) { 272 VOP_RWUNLOCK(vp, rwflag, NULL); 273 274 mutex_enter(&curproc->p_lock); 275 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 276 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 277 mutex_exit(&curproc->p_lock); 278 279 error = EFBIG; 280 goto out; 281 } 282 /* 283 * We return EFBIG if write is done at an offset 284 * greater than the offset maximum for this file structure. 285 */ 286 287 if (fileoff >= OFFSET_MAX(fp)) { 288 VOP_RWUNLOCK(vp, rwflag, NULL); 289 error = EFBIG; 290 goto out; 291 } 292 /* 293 * Limit the bytes to be written upto offset maximum for 294 * this open file structure. 295 */ 296 if (fileoff + cnt > OFFSET_MAX(fp)) 297 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 298 } 299 auio.uio_loffset = fileoff; 300 auio.uio_iov = &aiov; 301 auio.uio_iovcnt = 1; 302 auio.uio_resid = bcount = cnt; 303 auio.uio_segflg = UIO_USERSPACE; 304 auio.uio_llimit = curproc->p_fsz_ctl; 305 auio.uio_fmode = fflag; 306 auio.uio_extflg = UIO_COPY_DEFAULT; 307 308 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 309 310 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 311 cnt -= auio.uio_resid; 312 CPU_STATS_ENTER_K(); 313 cp = CPU; 314 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 315 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 316 CPU_STATS_EXIT_K(); 317 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 318 319 if (vp->v_type == VFIFO) /* Backward compatibility */ 320 fp->f_offset = cnt; 321 else if (((fp->f_flag & FAPPEND) == 0) || 322 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 323 fp->f_offset = auio.uio_loffset; 324 VOP_RWUNLOCK(vp, rwflag, NULL); 325 326 if (error == EINTR && cnt != 0) 327 error = 0; 328 out: 329 if (in_crit) 330 nbl_end_crit(vp); 331 releasef(fdes); 332 if (error) 333 return (set_errno(error)); 334 return (cnt); 335 } 336 337 ssize_t 338 pread(int fdes, void *cbuf, size_t count, off_t offset) 339 { 340 struct uio auio; 341 struct iovec aiov; 342 file_t *fp; 343 register vnode_t *vp; 344 struct cpu *cp; 345 int fflag, ioflag, rwflag; 346 ssize_t bcount; 347 int error = 0; 348 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 349 #ifdef _SYSCALL32_IMPL 350 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 351 MAXOFF32_T : MAXOFFSET_T; 352 #else 353 const u_offset_t maxoff = MAXOFF32_T; 354 #endif 355 int in_crit = 0; 356 357 if ((bcount = (ssize_t)count) < 0) 358 return (set_errno(EINVAL)); 359 360 if ((fp = getf(fdes)) == NULL) 361 return (set_errno(EBADF)); 362 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 363 error = EBADF; 364 goto out; 365 } 366 367 rwflag = 0; 368 vp = fp->f_vnode; 369 370 if (vp->v_type == VREG) { 371 372 if (bcount == 0) 373 goto out; 374 375 /* 376 * Return EINVAL if an invalid offset comes to pread. 377 * Negative offset from user will cause this error. 378 */ 379 380 if (fileoff > maxoff) { 381 error = EINVAL; 382 goto out; 383 } 384 /* 385 * Limit offset such that we don't read or write 386 * a file beyond the maximum offset representable in 387 * an off_t structure. 388 */ 389 if (fileoff + bcount > maxoff) 390 bcount = (ssize_t)((offset_t)maxoff - fileoff); 391 } else if (vp->v_type == VFIFO) { 392 error = ESPIPE; 393 goto out; 394 } 395 396 /* 397 * We have to enter the critical region before calling VOP_RWLOCK 398 * to avoid a deadlock with ufs. 399 */ 400 if (nbl_need_check(vp)) { 401 int svmand; 402 403 nbl_start_crit(vp, RW_READER); 404 in_crit = 1; 405 error = nbl_svmand(vp, fp->f_cred, &svmand); 406 if (error != 0) 407 goto out; 408 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 409 NULL)) { 410 error = EACCES; 411 goto out; 412 } 413 } 414 415 aiov.iov_base = cbuf; 416 aiov.iov_len = bcount; 417 (void) VOP_RWLOCK(vp, rwflag, NULL); 418 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 419 struct vattr va; 420 va.va_mask = AT_SIZE; 421 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 422 VOP_RWUNLOCK(vp, rwflag, NULL); 423 goto out; 424 } 425 VOP_RWUNLOCK(vp, rwflag, NULL); 426 427 /* 428 * We have to return EOF if fileoff is >= file size. 429 */ 430 if (fileoff >= va.va_size) { 431 bcount = 0; 432 goto out; 433 } 434 435 /* 436 * File is greater than or equal to maxoff and therefore 437 * we return EOVERFLOW. 438 */ 439 error = EOVERFLOW; 440 goto out; 441 } 442 auio.uio_loffset = fileoff; 443 auio.uio_iov = &aiov; 444 auio.uio_iovcnt = 1; 445 auio.uio_resid = bcount; 446 auio.uio_segflg = UIO_USERSPACE; 447 auio.uio_llimit = MAXOFFSET_T; 448 auio.uio_fmode = fflag; 449 auio.uio_extflg = UIO_COPY_CACHED; 450 451 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 452 453 /* If read sync is not asked for, filter sync flags */ 454 if ((ioflag & FRSYNC) == 0) 455 ioflag &= ~(FSYNC|FDSYNC); 456 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 457 bcount -= auio.uio_resid; 458 CPU_STATS_ENTER_K(); 459 cp = CPU; 460 CPU_STATS_ADDQ(cp, sys, sysread, 1); 461 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 462 CPU_STATS_EXIT_K(); 463 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 464 VOP_RWUNLOCK(vp, rwflag, NULL); 465 466 if (error == EINTR && bcount != 0) 467 error = 0; 468 out: 469 if (in_crit) 470 nbl_end_crit(vp); 471 releasef(fdes); 472 if (error) 473 return (set_errno(error)); 474 return (bcount); 475 } 476 477 ssize_t 478 pwrite(int fdes, void *cbuf, size_t count, off_t offset) 479 { 480 struct uio auio; 481 struct iovec aiov; 482 file_t *fp; 483 register vnode_t *vp; 484 struct cpu *cp; 485 int fflag, ioflag, rwflag; 486 ssize_t bcount; 487 int error = 0; 488 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 489 #ifdef _SYSCALL32_IMPL 490 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 491 MAXOFF32_T : MAXOFFSET_T; 492 #else 493 const u_offset_t maxoff = MAXOFF32_T; 494 #endif 495 int in_crit = 0; 496 497 if ((bcount = (ssize_t)count) < 0) 498 return (set_errno(EINVAL)); 499 if ((fp = getf(fdes)) == NULL) 500 return (set_errno(EBADF)); 501 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 502 error = EBADF; 503 goto out; 504 } 505 506 rwflag = 1; 507 vp = fp->f_vnode; 508 509 if (vp->v_type == VREG) { 510 511 if (bcount == 0) 512 goto out; 513 514 /* 515 * return EINVAL for offsets that cannot be 516 * represented in an off_t. 517 */ 518 if (fileoff > maxoff) { 519 error = EINVAL; 520 goto out; 521 } 522 /* 523 * Take appropriate action if we are trying to write above the 524 * resource limit. 525 */ 526 if (fileoff >= curproc->p_fsz_ctl) { 527 mutex_enter(&curproc->p_lock); 528 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 529 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 530 mutex_exit(&curproc->p_lock); 531 532 error = EFBIG; 533 goto out; 534 } 535 /* 536 * Don't allow pwrite to cause file sizes to exceed 537 * maxoff. 538 */ 539 if (fileoff == maxoff) { 540 error = EFBIG; 541 goto out; 542 } 543 if (fileoff + count > maxoff) 544 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 545 } else if (vp->v_type == VFIFO) { 546 error = ESPIPE; 547 goto out; 548 } 549 550 /* 551 * We have to enter the critical region before calling VOP_RWLOCK 552 * to avoid a deadlock with ufs. 553 */ 554 if (nbl_need_check(vp)) { 555 int svmand; 556 557 nbl_start_crit(vp, RW_READER); 558 in_crit = 1; 559 error = nbl_svmand(vp, fp->f_cred, &svmand); 560 if (error != 0) 561 goto out; 562 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 563 NULL)) { 564 error = EACCES; 565 goto out; 566 } 567 } 568 569 aiov.iov_base = cbuf; 570 aiov.iov_len = bcount; 571 (void) VOP_RWLOCK(vp, rwflag, NULL); 572 auio.uio_loffset = fileoff; 573 auio.uio_iov = &aiov; 574 auio.uio_iovcnt = 1; 575 auio.uio_resid = bcount; 576 auio.uio_segflg = UIO_USERSPACE; 577 auio.uio_llimit = curproc->p_fsz_ctl; 578 auio.uio_fmode = fflag; 579 auio.uio_extflg = UIO_COPY_CACHED; 580 581 /* 582 * The SUSv4 POSIX specification states: 583 * The pwrite() function shall be equivalent to write(), except 584 * that it writes into a given position and does not change 585 * the file offset (regardless of whether O_APPEND is set). 586 * To make this be true, we omit the FAPPEND flag from ioflag. 587 */ 588 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 589 590 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 591 bcount -= auio.uio_resid; 592 CPU_STATS_ENTER_K(); 593 cp = CPU; 594 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 595 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 596 CPU_STATS_EXIT_K(); 597 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 598 VOP_RWUNLOCK(vp, rwflag, NULL); 599 600 if (error == EINTR && bcount != 0) 601 error = 0; 602 out: 603 if (in_crit) 604 nbl_end_crit(vp); 605 releasef(fdes); 606 if (error) 607 return (set_errno(error)); 608 return (bcount); 609 } 610 611 ssize_t 612 readv(int fdes, struct iovec *iovp, int iovcnt) 613 { 614 struct uio auio; 615 struct iovec buf[IOV_MAX_STACK], *aiov = buf; 616 int aiovlen = 0; 617 file_t *fp; 618 register vnode_t *vp; 619 struct cpu *cp; 620 int fflag, ioflag, rwflag; 621 ssize_t count, bcount; 622 int error = 0; 623 int i; 624 u_offset_t fileoff; 625 int in_crit = 0; 626 627 if (iovcnt <= 0 || iovcnt > IOV_MAX) 628 return (set_errno(EINVAL)); 629 630 if (iovcnt > IOV_MAX_STACK) { 631 aiovlen = iovcnt * sizeof (iovec_t); 632 aiov = kmem_alloc(aiovlen, KM_SLEEP); 633 } 634 635 #ifdef _SYSCALL32_IMPL 636 /* 637 * 32-bit callers need to have their iovec expanded, 638 * while ensuring that they can't move more than 2Gbytes 639 * of data in a single call. 640 */ 641 if (get_udatamodel() == DATAMODEL_ILP32) { 642 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; 643 int aiov32len; 644 ssize32_t count32; 645 646 aiov32len = iovcnt * sizeof (iovec32_t); 647 if (aiovlen != 0) 648 aiov32 = kmem_alloc(aiov32len, KM_SLEEP); 649 650 if (copyin(iovp, aiov32, aiov32len)) { 651 if (aiovlen != 0) { 652 kmem_free(aiov32, aiov32len); 653 kmem_free(aiov, aiovlen); 654 } 655 return (set_errno(EFAULT)); 656 } 657 658 count32 = 0; 659 for (i = 0; i < iovcnt; i++) { 660 ssize32_t iovlen32 = aiov32[i].iov_len; 661 count32 += iovlen32; 662 if (iovlen32 < 0 || count32 < 0) { 663 if (aiovlen != 0) { 664 kmem_free(aiov32, aiov32len); 665 kmem_free(aiov, aiovlen); 666 } 667 return (set_errno(EINVAL)); 668 } 669 aiov[i].iov_len = iovlen32; 670 aiov[i].iov_base = 671 (caddr_t)(uintptr_t)aiov32[i].iov_base; 672 } 673 674 if (aiovlen != 0) 675 kmem_free(aiov32, aiov32len); 676 } else 677 #endif 678 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { 679 if (aiovlen != 0) 680 kmem_free(aiov, aiovlen); 681 return (set_errno(EFAULT)); 682 } 683 684 count = 0; 685 for (i = 0; i < iovcnt; i++) { 686 ssize_t iovlen = aiov[i].iov_len; 687 count += iovlen; 688 if (iovlen < 0 || count < 0) { 689 if (aiovlen != 0) 690 kmem_free(aiov, aiovlen); 691 return (set_errno(EINVAL)); 692 } 693 } 694 if ((fp = getf(fdes)) == NULL) { 695 if (aiovlen != 0) 696 kmem_free(aiov, aiovlen); 697 return (set_errno(EBADF)); 698 } 699 if (((fflag = fp->f_flag) & FREAD) == 0) { 700 error = EBADF; 701 goto out; 702 } 703 vp = fp->f_vnode; 704 if (vp->v_type == VREG && count == 0) { 705 goto out; 706 } 707 708 rwflag = 0; 709 710 /* 711 * We have to enter the critical region before calling VOP_RWLOCK 712 * to avoid a deadlock with ufs. 713 */ 714 if (nbl_need_check(vp)) { 715 int svmand; 716 717 nbl_start_crit(vp, RW_READER); 718 in_crit = 1; 719 error = nbl_svmand(vp, fp->f_cred, &svmand); 720 if (error != 0) 721 goto out; 722 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand, 723 NULL)) { 724 error = EACCES; 725 goto out; 726 } 727 } 728 729 (void) VOP_RWLOCK(vp, rwflag, NULL); 730 fileoff = fp->f_offset; 731 732 /* 733 * Behaviour is same as read. Please see comments in read. 734 */ 735 736 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 737 struct vattr va; 738 va.va_mask = AT_SIZE; 739 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 740 VOP_RWUNLOCK(vp, rwflag, NULL); 741 goto out; 742 } 743 if (fileoff >= va.va_size) { 744 VOP_RWUNLOCK(vp, rwflag, NULL); 745 count = 0; 746 goto out; 747 } else { 748 VOP_RWUNLOCK(vp, rwflag, NULL); 749 error = EOVERFLOW; 750 goto out; 751 } 752 } 753 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 754 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 755 } 756 auio.uio_loffset = fileoff; 757 auio.uio_iov = aiov; 758 auio.uio_iovcnt = iovcnt; 759 auio.uio_resid = bcount = count; 760 auio.uio_segflg = UIO_USERSPACE; 761 auio.uio_llimit = MAXOFFSET_T; 762 auio.uio_fmode = fflag; 763 if (bcount <= copyout_max_cached) 764 auio.uio_extflg = UIO_COPY_CACHED; 765 else 766 auio.uio_extflg = UIO_COPY_DEFAULT; 767 768 769 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 770 771 /* If read sync is not asked for, filter sync flags */ 772 if ((ioflag & FRSYNC) == 0) 773 ioflag &= ~(FSYNC|FDSYNC); 774 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 775 count -= auio.uio_resid; 776 CPU_STATS_ENTER_K(); 777 cp = CPU; 778 CPU_STATS_ADDQ(cp, sys, sysread, 1); 779 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 780 CPU_STATS_EXIT_K(); 781 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 782 783 if (vp->v_type == VFIFO) /* Backward compatibility */ 784 fp->f_offset = count; 785 else if (((fp->f_flag & FAPPEND) == 0) || 786 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 787 fp->f_offset = auio.uio_loffset; 788 789 VOP_RWUNLOCK(vp, rwflag, NULL); 790 791 if (error == EINTR && count != 0) 792 error = 0; 793 out: 794 if (in_crit) 795 nbl_end_crit(vp); 796 releasef(fdes); 797 if (aiovlen != 0) 798 kmem_free(aiov, aiovlen); 799 if (error) 800 return (set_errno(error)); 801 return (count); 802 } 803 804 ssize_t 805 writev(int fdes, struct iovec *iovp, int iovcnt) 806 { 807 struct uio auio; 808 struct iovec buf[IOV_MAX_STACK], *aiov = buf; 809 int aiovlen = 0; 810 file_t *fp; 811 register vnode_t *vp; 812 struct cpu *cp; 813 int fflag, ioflag, rwflag; 814 ssize_t count, bcount; 815 int error = 0; 816 int i; 817 u_offset_t fileoff; 818 int in_crit = 0; 819 820 if (iovcnt <= 0 || iovcnt > IOV_MAX) 821 return (set_errno(EINVAL)); 822 823 if (iovcnt > IOV_MAX_STACK) { 824 aiovlen = iovcnt * sizeof (iovec_t); 825 aiov = kmem_alloc(aiovlen, KM_SLEEP); 826 } 827 828 #ifdef _SYSCALL32_IMPL 829 /* 830 * 32-bit callers need to have their iovec expanded, 831 * while ensuring that they can't move more than 2Gbytes 832 * of data in a single call. 833 */ 834 if (get_udatamodel() == DATAMODEL_ILP32) { 835 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; 836 int aiov32len; 837 ssize32_t count32; 838 839 aiov32len = iovcnt * sizeof (iovec32_t); 840 if (aiovlen != 0) 841 aiov32 = kmem_alloc(aiov32len, KM_SLEEP); 842 843 if (copyin(iovp, aiov32, aiov32len)) { 844 if (aiovlen != 0) { 845 kmem_free(aiov32, aiov32len); 846 kmem_free(aiov, aiovlen); 847 } 848 return (set_errno(EFAULT)); 849 } 850 851 count32 = 0; 852 for (i = 0; i < iovcnt; i++) { 853 ssize32_t iovlen = aiov32[i].iov_len; 854 count32 += iovlen; 855 if (iovlen < 0 || count32 < 0) { 856 if (aiovlen != 0) { 857 kmem_free(aiov32, aiov32len); 858 kmem_free(aiov, aiovlen); 859 } 860 return (set_errno(EINVAL)); 861 } 862 aiov[i].iov_len = iovlen; 863 aiov[i].iov_base = 864 (caddr_t)(uintptr_t)aiov32[i].iov_base; 865 } 866 if (aiovlen != 0) 867 kmem_free(aiov32, aiov32len); 868 } else 869 #endif 870 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { 871 if (aiovlen != 0) 872 kmem_free(aiov, aiovlen); 873 return (set_errno(EFAULT)); 874 } 875 876 count = 0; 877 for (i = 0; i < iovcnt; i++) { 878 ssize_t iovlen = aiov[i].iov_len; 879 count += iovlen; 880 if (iovlen < 0 || count < 0) { 881 if (aiovlen != 0) 882 kmem_free(aiov, aiovlen); 883 return (set_errno(EINVAL)); 884 } 885 } 886 if ((fp = getf(fdes)) == NULL) { 887 if (aiovlen != 0) 888 kmem_free(aiov, aiovlen); 889 return (set_errno(EBADF)); 890 } 891 if (((fflag = fp->f_flag) & FWRITE) == 0) { 892 error = EBADF; 893 goto out; 894 } 895 vp = fp->f_vnode; 896 if (vp->v_type == VREG && count == 0) { 897 goto out; 898 } 899 900 rwflag = 1; 901 902 /* 903 * We have to enter the critical region before calling VOP_RWLOCK 904 * to avoid a deadlock with ufs. 905 */ 906 if (nbl_need_check(vp)) { 907 int svmand; 908 909 nbl_start_crit(vp, RW_READER); 910 in_crit = 1; 911 error = nbl_svmand(vp, fp->f_cred, &svmand); 912 if (error != 0) 913 goto out; 914 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand, 915 NULL)) { 916 error = EACCES; 917 goto out; 918 } 919 } 920 921 (void) VOP_RWLOCK(vp, rwflag, NULL); 922 923 fileoff = fp->f_offset; 924 925 /* 926 * Behaviour is same as write. Please see comments for write. 927 */ 928 929 if (vp->v_type == VREG) { 930 if (fileoff >= curproc->p_fsz_ctl) { 931 VOP_RWUNLOCK(vp, rwflag, NULL); 932 mutex_enter(&curproc->p_lock); 933 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 934 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 935 mutex_exit(&curproc->p_lock); 936 error = EFBIG; 937 goto out; 938 } 939 if (fileoff >= OFFSET_MAX(fp)) { 940 VOP_RWUNLOCK(vp, rwflag, NULL); 941 error = EFBIG; 942 goto out; 943 } 944 if (fileoff + count > OFFSET_MAX(fp)) 945 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 946 } 947 auio.uio_loffset = fileoff; 948 auio.uio_iov = aiov; 949 auio.uio_iovcnt = iovcnt; 950 auio.uio_resid = bcount = count; 951 auio.uio_segflg = UIO_USERSPACE; 952 auio.uio_llimit = curproc->p_fsz_ctl; 953 auio.uio_fmode = fflag; 954 auio.uio_extflg = UIO_COPY_DEFAULT; 955 956 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 957 958 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 959 count -= auio.uio_resid; 960 CPU_STATS_ENTER_K(); 961 cp = CPU; 962 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 963 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 964 CPU_STATS_EXIT_K(); 965 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 966 967 if (vp->v_type == VFIFO) /* Backward compatibility */ 968 fp->f_offset = count; 969 else if (((fp->f_flag & FAPPEND) == 0) || 970 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 971 fp->f_offset = auio.uio_loffset; 972 VOP_RWUNLOCK(vp, rwflag, NULL); 973 974 if (error == EINTR && count != 0) 975 error = 0; 976 out: 977 if (in_crit) 978 nbl_end_crit(vp); 979 releasef(fdes); 980 if (aiovlen != 0) 981 kmem_free(aiov, aiovlen); 982 if (error) 983 return (set_errno(error)); 984 return (count); 985 } 986 987 ssize_t 988 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, 989 off_t extended_offset) 990 { 991 struct uio auio; 992 struct iovec buf[IOV_MAX_STACK], *aiov = buf; 993 int aiovlen = 0; 994 file_t *fp; 995 register vnode_t *vp; 996 struct cpu *cp; 997 int fflag, ioflag, rwflag; 998 ssize_t count, bcount; 999 int error = 0; 1000 int i; 1001 1002 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 1003 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | 1004 (u_offset_t)offset; 1005 #else /* _SYSCALL32_IMPL || _ILP32 */ 1006 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 1007 #endif /* _SYSCALL32_IMPR || _ILP32 */ 1008 #ifdef _SYSCALL32_IMPL 1009 const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 && 1010 extended_offset == 0? 1011 MAXOFF32_T : MAXOFFSET_T; 1012 #else /* _SYSCALL32_IMPL */ 1013 const u_offset_t maxoff = MAXOFF32_T; 1014 #endif /* _SYSCALL32_IMPL */ 1015 1016 int in_crit = 0; 1017 1018 if (iovcnt <= 0 || iovcnt > IOV_MAX) 1019 return (set_errno(EINVAL)); 1020 1021 if (iovcnt > IOV_MAX_STACK) { 1022 aiovlen = iovcnt * sizeof (iovec_t); 1023 aiov = kmem_alloc(aiovlen, KM_SLEEP); 1024 } 1025 1026 #ifdef _SYSCALL32_IMPL 1027 /* 1028 * 32-bit callers need to have their iovec expanded, 1029 * while ensuring that they can't move more than 2Gbytes 1030 * of data in a single call. 1031 */ 1032 if (get_udatamodel() == DATAMODEL_ILP32) { 1033 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; 1034 int aiov32len; 1035 ssize32_t count32; 1036 1037 aiov32len = iovcnt * sizeof (iovec32_t); 1038 if (aiovlen != 0) 1039 aiov32 = kmem_alloc(aiov32len, KM_SLEEP); 1040 1041 if (copyin(iovp, aiov32, aiov32len)) { 1042 if (aiovlen != 0) { 1043 kmem_free(aiov32, aiov32len); 1044 kmem_free(aiov, aiovlen); 1045 } 1046 return (set_errno(EFAULT)); 1047 } 1048 1049 count32 = 0; 1050 for (i = 0; i < iovcnt; i++) { 1051 ssize32_t iovlen32 = aiov32[i].iov_len; 1052 count32 += iovlen32; 1053 if (iovlen32 < 0 || count32 < 0) { 1054 if (aiovlen != 0) { 1055 kmem_free(aiov32, aiov32len); 1056 kmem_free(aiov, aiovlen); 1057 } 1058 return (set_errno(EINVAL)); 1059 } 1060 aiov[i].iov_len = iovlen32; 1061 aiov[i].iov_base = 1062 (caddr_t)(uintptr_t)aiov32[i].iov_base; 1063 } 1064 if (aiovlen != 0) 1065 kmem_free(aiov32, aiov32len); 1066 } else 1067 #endif /* _SYSCALL32_IMPL */ 1068 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { 1069 if (aiovlen != 0) 1070 kmem_free(aiov, aiovlen); 1071 return (set_errno(EFAULT)); 1072 } 1073 1074 count = 0; 1075 for (i = 0; i < iovcnt; i++) { 1076 ssize_t iovlen = aiov[i].iov_len; 1077 count += iovlen; 1078 if (iovlen < 0 || count < 0) { 1079 if (aiovlen != 0) 1080 kmem_free(aiov, aiovlen); 1081 return (set_errno(EINVAL)); 1082 } 1083 } 1084 1085 if ((bcount = (ssize_t)count) < 0) { 1086 if (aiovlen != 0) 1087 kmem_free(aiov, aiovlen); 1088 return (set_errno(EINVAL)); 1089 } 1090 if ((fp = getf(fdes)) == NULL) { 1091 if (aiovlen != 0) 1092 kmem_free(aiov, aiovlen); 1093 return (set_errno(EBADF)); 1094 } 1095 if (((fflag = fp->f_flag) & FREAD) == 0) { 1096 error = EBADF; 1097 goto out; 1098 } 1099 vp = fp->f_vnode; 1100 rwflag = 0; 1101 if (vp->v_type == VREG) { 1102 1103 if (bcount == 0) 1104 goto out; 1105 1106 /* 1107 * return EINVAL for offsets that cannot be 1108 * represented in an off_t. 1109 */ 1110 if (fileoff > maxoff) { 1111 error = EINVAL; 1112 goto out; 1113 } 1114 1115 if (fileoff + bcount > maxoff) 1116 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 1117 } else if (vp->v_type == VFIFO) { 1118 error = ESPIPE; 1119 goto out; 1120 } 1121 /* 1122 * We have to enter the critical region before calling VOP_RWLOCK 1123 * to avoid a deadlock with ufs. 1124 */ 1125 if (nbl_need_check(vp)) { 1126 int svmand; 1127 1128 nbl_start_crit(vp, RW_READER); 1129 in_crit = 1; 1130 error = nbl_svmand(vp, fp->f_cred, &svmand); 1131 if (error != 0) 1132 goto out; 1133 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, 1134 NULL)) { 1135 error = EACCES; 1136 goto out; 1137 } 1138 } 1139 1140 (void) VOP_RWLOCK(vp, rwflag, NULL); 1141 1142 /* 1143 * Behaviour is same as read(2). Please see comments in 1144 * read(2). 1145 */ 1146 1147 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 1148 struct vattr va; 1149 va.va_mask = AT_SIZE; 1150 if ((error = 1151 VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 1152 VOP_RWUNLOCK(vp, rwflag, NULL); 1153 goto out; 1154 } 1155 if (fileoff >= va.va_size) { 1156 VOP_RWUNLOCK(vp, rwflag, NULL); 1157 count = 0; 1158 goto out; 1159 } else { 1160 VOP_RWUNLOCK(vp, rwflag, NULL); 1161 error = EOVERFLOW; 1162 goto out; 1163 } 1164 } 1165 if ((vp->v_type == VREG) && 1166 (fileoff + count > OFFSET_MAX(fp))) { 1167 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 1168 } 1169 auio.uio_loffset = fileoff; 1170 auio.uio_iov = aiov; 1171 auio.uio_iovcnt = iovcnt; 1172 auio.uio_resid = bcount = count; 1173 auio.uio_segflg = UIO_USERSPACE; 1174 auio.uio_llimit = MAXOFFSET_T; 1175 auio.uio_fmode = fflag; 1176 if (bcount <= copyout_max_cached) 1177 auio.uio_extflg = UIO_COPY_CACHED; 1178 else 1179 auio.uio_extflg = UIO_COPY_DEFAULT; 1180 1181 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1182 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1183 count -= auio.uio_resid; 1184 CPU_STATS_ENTER_K(); 1185 cp = CPU; 1186 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1187 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 1188 CPU_STATS_EXIT_K(); 1189 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 1190 1191 VOP_RWUNLOCK(vp, rwflag, NULL); 1192 1193 if (error == EINTR && count != 0) 1194 error = 0; 1195 out: 1196 if (in_crit) 1197 nbl_end_crit(vp); 1198 releasef(fdes); 1199 if (aiovlen != 0) 1200 kmem_free(aiov, aiovlen); 1201 if (error) 1202 return (set_errno(error)); 1203 return (count); 1204 } 1205 1206 ssize_t 1207 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, 1208 off_t extended_offset) 1209 { 1210 struct uio auio; 1211 struct iovec buf[IOV_MAX_STACK], *aiov = buf; 1212 int aiovlen = 0; 1213 file_t *fp; 1214 register vnode_t *vp; 1215 struct cpu *cp; 1216 int fflag, ioflag, rwflag; 1217 ssize_t count, bcount; 1218 int error = 0; 1219 int i; 1220 1221 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 1222 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | 1223 (u_offset_t)offset; 1224 #else /* _SYSCALL32_IMPL || _ILP32 */ 1225 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 1226 #endif /* _SYSCALL32_IMPR || _ILP32 */ 1227 #ifdef _SYSCALL32_IMPL 1228 const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 && 1229 extended_offset == 0? 1230 MAXOFF32_T : MAXOFFSET_T; 1231 #else /* _SYSCALL32_IMPL */ 1232 const u_offset_t maxoff = MAXOFF32_T; 1233 #endif /* _SYSCALL32_IMPL */ 1234 1235 int in_crit = 0; 1236 1237 if (iovcnt <= 0 || iovcnt > IOV_MAX) 1238 return (set_errno(EINVAL)); 1239 1240 if (iovcnt > IOV_MAX_STACK) { 1241 aiovlen = iovcnt * sizeof (iovec_t); 1242 aiov = kmem_alloc(aiovlen, KM_SLEEP); 1243 } 1244 1245 #ifdef _SYSCALL32_IMPL 1246 /* 1247 * 32-bit callers need to have their iovec expanded, 1248 * while ensuring that they can't move more than 2Gbytes 1249 * of data in a single call. 1250 */ 1251 if (get_udatamodel() == DATAMODEL_ILP32) { 1252 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; 1253 int aiov32len; 1254 ssize32_t count32; 1255 1256 aiov32len = iovcnt * sizeof (iovec32_t); 1257 if (aiovlen != 0) 1258 aiov32 = kmem_alloc(aiov32len, KM_SLEEP); 1259 1260 if (copyin(iovp, aiov32, aiov32len)) { 1261 if (aiovlen != 0) { 1262 kmem_free(aiov32, aiov32len); 1263 kmem_free(aiov, aiovlen); 1264 } 1265 return (set_errno(EFAULT)); 1266 } 1267 1268 count32 = 0; 1269 for (i = 0; i < iovcnt; i++) { 1270 ssize32_t iovlen32 = aiov32[i].iov_len; 1271 count32 += iovlen32; 1272 if (iovlen32 < 0 || count32 < 0) { 1273 if (aiovlen != 0) { 1274 kmem_free(aiov32, aiov32len); 1275 kmem_free(aiov, aiovlen); 1276 } 1277 return (set_errno(EINVAL)); 1278 } 1279 aiov[i].iov_len = iovlen32; 1280 aiov[i].iov_base = 1281 (caddr_t)(uintptr_t)aiov32[i].iov_base; 1282 } 1283 if (aiovlen != 0) 1284 kmem_free(aiov32, aiov32len); 1285 } else 1286 #endif /* _SYSCALL32_IMPL */ 1287 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { 1288 if (aiovlen != 0) 1289 kmem_free(aiov, aiovlen); 1290 return (set_errno(EFAULT)); 1291 } 1292 1293 count = 0; 1294 for (i = 0; i < iovcnt; i++) { 1295 ssize_t iovlen = aiov[i].iov_len; 1296 count += iovlen; 1297 if (iovlen < 0 || count < 0) { 1298 if (aiovlen != 0) 1299 kmem_free(aiov, aiovlen); 1300 return (set_errno(EINVAL)); 1301 } 1302 } 1303 1304 if ((bcount = (ssize_t)count) < 0) { 1305 if (aiovlen != 0) 1306 kmem_free(aiov, aiovlen); 1307 return (set_errno(EINVAL)); 1308 } 1309 if ((fp = getf(fdes)) == NULL) { 1310 if (aiovlen != 0) 1311 kmem_free(aiov, aiovlen); 1312 return (set_errno(EBADF)); 1313 } 1314 if (((fflag = fp->f_flag) & FWRITE) == 0) { 1315 error = EBADF; 1316 goto out; 1317 } 1318 vp = fp->f_vnode; 1319 rwflag = 1; 1320 if (vp->v_type == VREG) { 1321 1322 if (bcount == 0) 1323 goto out; 1324 1325 /* 1326 * return EINVAL for offsets that cannot be 1327 * represented in an off_t. 1328 */ 1329 if (fileoff > maxoff) { 1330 error = EINVAL; 1331 goto out; 1332 } 1333 /* 1334 * Take appropriate action if we are trying 1335 * to write above the resource limit. 1336 */ 1337 if (fileoff >= curproc->p_fsz_ctl) { 1338 mutex_enter(&curproc->p_lock); 1339 /* 1340 * Return value ignored because it lists 1341 * actions taken, but we are in an error case. 1342 * We don't have any actions that depend on 1343 * what could happen in this call, so we ignore 1344 * the return value. 1345 */ 1346 (void) rctl_action( 1347 rctlproc_legacy[RLIMIT_FSIZE], 1348 curproc->p_rctls, curproc, 1349 RCA_UNSAFE_SIGINFO); 1350 mutex_exit(&curproc->p_lock); 1351 1352 error = EFBIG; 1353 goto out; 1354 } 1355 /* 1356 * Don't allow pwritev to cause file sizes to exceed 1357 * maxoff. 1358 */ 1359 if (fileoff == maxoff) { 1360 error = EFBIG; 1361 goto out; 1362 } 1363 1364 if (fileoff + bcount > maxoff) 1365 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 1366 } else if (vp->v_type == VFIFO) { 1367 error = ESPIPE; 1368 goto out; 1369 } 1370 /* 1371 * We have to enter the critical region before calling VOP_RWLOCK 1372 * to avoid a deadlock with ufs. 1373 */ 1374 if (nbl_need_check(vp)) { 1375 int svmand; 1376 1377 nbl_start_crit(vp, RW_READER); 1378 in_crit = 1; 1379 error = nbl_svmand(vp, fp->f_cred, &svmand); 1380 if (error != 0) 1381 goto out; 1382 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, 1383 NULL)) { 1384 error = EACCES; 1385 goto out; 1386 } 1387 } 1388 1389 (void) VOP_RWLOCK(vp, rwflag, NULL); 1390 1391 1392 /* 1393 * Behaviour is same as write(2). Please see comments for 1394 * write(2). 1395 */ 1396 1397 if (vp->v_type == VREG) { 1398 if (fileoff >= curproc->p_fsz_ctl) { 1399 VOP_RWUNLOCK(vp, rwflag, NULL); 1400 mutex_enter(&curproc->p_lock); 1401 /* see above rctl_action comment */ 1402 (void) rctl_action( 1403 rctlproc_legacy[RLIMIT_FSIZE], 1404 curproc->p_rctls, 1405 curproc, RCA_UNSAFE_SIGINFO); 1406 mutex_exit(&curproc->p_lock); 1407 error = EFBIG; 1408 goto out; 1409 } 1410 if (fileoff >= OFFSET_MAX(fp)) { 1411 VOP_RWUNLOCK(vp, rwflag, NULL); 1412 error = EFBIG; 1413 goto out; 1414 } 1415 if (fileoff + count > OFFSET_MAX(fp)) 1416 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 1417 } 1418 1419 auio.uio_loffset = fileoff; 1420 auio.uio_iov = aiov; 1421 auio.uio_iovcnt = iovcnt; 1422 auio.uio_resid = bcount = count; 1423 auio.uio_segflg = UIO_USERSPACE; 1424 auio.uio_llimit = curproc->p_fsz_ctl; 1425 auio.uio_fmode = fflag; 1426 auio.uio_extflg = UIO_COPY_CACHED; 1427 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 1428 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1429 count -= auio.uio_resid; 1430 CPU_STATS_ENTER_K(); 1431 cp = CPU; 1432 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1433 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 1434 CPU_STATS_EXIT_K(); 1435 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 1436 1437 VOP_RWUNLOCK(vp, rwflag, NULL); 1438 1439 if (error == EINTR && count != 0) 1440 error = 0; 1441 out: 1442 if (in_crit) 1443 nbl_end_crit(vp); 1444 releasef(fdes); 1445 if (aiovlen != 0) 1446 kmem_free(aiov, aiovlen); 1447 if (error) 1448 return (set_errno(error)); 1449 return (count); 1450 } 1451 1452 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 1453 1454 /* 1455 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1456 */ 1457 ssize32_t 1458 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1459 uint32_t offset_2) 1460 { 1461 struct uio auio; 1462 struct iovec aiov; 1463 file_t *fp; 1464 register vnode_t *vp; 1465 struct cpu *cp; 1466 int fflag, ioflag, rwflag; 1467 ssize_t bcount; 1468 int error = 0; 1469 u_offset_t fileoff; 1470 int in_crit = 0; 1471 1472 #if defined(_LITTLE_ENDIAN) 1473 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1474 #else 1475 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1476 #endif 1477 1478 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1479 return (set_errno(EINVAL)); 1480 1481 if ((fp = getf(fdes)) == NULL) 1482 return (set_errno(EBADF)); 1483 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 1484 error = EBADF; 1485 goto out; 1486 } 1487 1488 rwflag = 0; 1489 vp = fp->f_vnode; 1490 1491 if (vp->v_type == VREG) { 1492 1493 if (bcount == 0) 1494 goto out; 1495 1496 /* 1497 * Same as pread. See comments in pread. 1498 */ 1499 1500 if (fileoff > MAXOFFSET_T) { 1501 error = EINVAL; 1502 goto out; 1503 } 1504 if (fileoff + bcount > MAXOFFSET_T) 1505 bcount = (ssize_t)(MAXOFFSET_T - fileoff); 1506 } else if (vp->v_type == VFIFO) { 1507 error = ESPIPE; 1508 goto out; 1509 } 1510 1511 /* 1512 * We have to enter the critical region before calling VOP_RWLOCK 1513 * to avoid a deadlock with ufs. 1514 */ 1515 if (nbl_need_check(vp)) { 1516 int svmand; 1517 1518 nbl_start_crit(vp, RW_READER); 1519 in_crit = 1; 1520 error = nbl_svmand(vp, fp->f_cred, &svmand); 1521 if (error != 0) 1522 goto out; 1523 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 1524 NULL)) { 1525 error = EACCES; 1526 goto out; 1527 } 1528 } 1529 1530 aiov.iov_base = cbuf; 1531 aiov.iov_len = bcount; 1532 (void) VOP_RWLOCK(vp, rwflag, NULL); 1533 auio.uio_loffset = fileoff; 1534 1535 /* 1536 * Note: File size can never be greater than MAXOFFSET_T. 1537 * If ever we start supporting 128 bit files the code 1538 * similar to the one in pread at this place should be here. 1539 * Here we avoid the unnecessary VOP_GETATTR() when we 1540 * know that fileoff == MAXOFFSET_T implies that it is always 1541 * greater than or equal to file size. 1542 */ 1543 auio.uio_iov = &aiov; 1544 auio.uio_iovcnt = 1; 1545 auio.uio_resid = bcount; 1546 auio.uio_segflg = UIO_USERSPACE; 1547 auio.uio_llimit = MAXOFFSET_T; 1548 auio.uio_fmode = fflag; 1549 auio.uio_extflg = UIO_COPY_CACHED; 1550 1551 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1552 1553 /* If read sync is not asked for, filter sync flags */ 1554 if ((ioflag & FRSYNC) == 0) 1555 ioflag &= ~(FSYNC|FDSYNC); 1556 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1557 bcount -= auio.uio_resid; 1558 CPU_STATS_ENTER_K(); 1559 cp = CPU; 1560 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1561 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1562 CPU_STATS_EXIT_K(); 1563 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1564 VOP_RWUNLOCK(vp, rwflag, NULL); 1565 1566 if (error == EINTR && bcount != 0) 1567 error = 0; 1568 out: 1569 if (in_crit) 1570 nbl_end_crit(vp); 1571 releasef(fdes); 1572 if (error) 1573 return (set_errno(error)); 1574 return (bcount); 1575 } 1576 1577 /* 1578 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1579 */ 1580 ssize32_t 1581 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1582 uint32_t offset_2) 1583 { 1584 struct uio auio; 1585 struct iovec aiov; 1586 file_t *fp; 1587 register vnode_t *vp; 1588 struct cpu *cp; 1589 int fflag, ioflag, rwflag; 1590 ssize_t bcount; 1591 int error = 0; 1592 u_offset_t fileoff; 1593 int in_crit = 0; 1594 1595 #if defined(_LITTLE_ENDIAN) 1596 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1597 #else 1598 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1599 #endif 1600 1601 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1602 return (set_errno(EINVAL)); 1603 if ((fp = getf(fdes)) == NULL) 1604 return (set_errno(EBADF)); 1605 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1606 error = EBADF; 1607 goto out; 1608 } 1609 1610 rwflag = 1; 1611 vp = fp->f_vnode; 1612 1613 if (vp->v_type == VREG) { 1614 1615 if (bcount == 0) 1616 goto out; 1617 1618 /* 1619 * See comments in pwrite. 1620 */ 1621 if (fileoff > MAXOFFSET_T) { 1622 error = EINVAL; 1623 goto out; 1624 } 1625 if (fileoff >= curproc->p_fsz_ctl) { 1626 mutex_enter(&curproc->p_lock); 1627 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1628 curproc->p_rctls, curproc, RCA_SAFE); 1629 mutex_exit(&curproc->p_lock); 1630 error = EFBIG; 1631 goto out; 1632 } 1633 if (fileoff == MAXOFFSET_T) { 1634 error = EFBIG; 1635 goto out; 1636 } 1637 if (fileoff + bcount > MAXOFFSET_T) 1638 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1639 } else if (vp->v_type == VFIFO) { 1640 error = ESPIPE; 1641 goto out; 1642 } 1643 1644 /* 1645 * We have to enter the critical region before calling VOP_RWLOCK 1646 * to avoid a deadlock with ufs. 1647 */ 1648 if (nbl_need_check(vp)) { 1649 int svmand; 1650 1651 nbl_start_crit(vp, RW_READER); 1652 in_crit = 1; 1653 error = nbl_svmand(vp, fp->f_cred, &svmand); 1654 if (error != 0) 1655 goto out; 1656 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 1657 NULL)) { 1658 error = EACCES; 1659 goto out; 1660 } 1661 } 1662 1663 aiov.iov_base = cbuf; 1664 aiov.iov_len = bcount; 1665 (void) VOP_RWLOCK(vp, rwflag, NULL); 1666 auio.uio_loffset = fileoff; 1667 auio.uio_iov = &aiov; 1668 auio.uio_iovcnt = 1; 1669 auio.uio_resid = bcount; 1670 auio.uio_segflg = UIO_USERSPACE; 1671 auio.uio_llimit = curproc->p_fsz_ctl; 1672 auio.uio_fmode = fflag; 1673 auio.uio_extflg = UIO_COPY_CACHED; 1674 1675 /* 1676 * The SUSv4 POSIX specification states: 1677 * The pwrite() function shall be equivalent to write(), except 1678 * that it writes into a given position and does not change 1679 * the file offset (regardless of whether O_APPEND is set). 1680 * To make this be true, we omit the FAPPEND flag from ioflag. 1681 */ 1682 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 1683 1684 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1685 bcount -= auio.uio_resid; 1686 CPU_STATS_ENTER_K(); 1687 cp = CPU; 1688 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1689 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1690 CPU_STATS_EXIT_K(); 1691 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1692 VOP_RWUNLOCK(vp, rwflag, NULL); 1693 1694 if (error == EINTR && bcount != 0) 1695 error = 0; 1696 out: 1697 if (in_crit) 1698 nbl_end_crit(vp); 1699 releasef(fdes); 1700 if (error) 1701 return (set_errno(error)); 1702 return (bcount); 1703 } 1704 1705 #endif /* _SYSCALL32_IMPL || _ILP32 */ 1706 1707 #ifdef _SYSCALL32_IMPL 1708 /* 1709 * Tail-call elimination of xxx32() down to xxx() 1710 * 1711 * A number of xxx32 system calls take a len (or count) argument and 1712 * return a number in the range [0,len] or -1 on error. 1713 * Given an ssize32_t input len, the downcall xxx() will return 1714 * a 64-bit value that is -1 or in the range [0,len] which actually 1715 * is a proper return value for the xxx32 call. So even if the xxx32 1716 * calls can be considered as returning a ssize32_t, they are currently 1717 * declared as returning a ssize_t as this enables tail-call elimination. 1718 * 1719 * The cast of len (or count) to ssize32_t is needed to ensure we pass 1720 * down negative input values as such and let the downcall handle error 1721 * reporting. Functions covered by this comments are: 1722 * 1723 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1724 * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1725 * readlink.c: readlink32. 1726 */ 1727 1728 ssize_t 1729 read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1730 { 1731 return (read(fdes, 1732 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1733 } 1734 1735 ssize_t 1736 write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1737 { 1738 return (write(fdes, 1739 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1740 } 1741 1742 ssize_t 1743 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1744 { 1745 return (pread(fdes, 1746 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1747 (off_t)(uint32_t)offset)); 1748 } 1749 1750 ssize_t 1751 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1752 { 1753 return (pwrite(fdes, 1754 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1755 (off_t)(uint32_t)offset)); 1756 } 1757 1758 ssize_t 1759 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1760 { 1761 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1762 } 1763 1764 ssize_t 1765 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1766 { 1767 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1768 } 1769 #endif /* _SYSCALL32_IMPL */ 1770