1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright (c) 2015, Joyent, Inc. All rights reserved. 26 */ 27 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * Portions of this source code were derived from Berkeley 4.3 BSD 33 * under license from the Regents of the University of California. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/isa_defs.h> 38 #include <sys/types.h> 39 #include <sys/inttypes.h> 40 #include <sys/sysmacros.h> 41 #include <sys/cred.h> 42 #include <sys/user.h> 43 #include <sys/systm.h> 44 #include <sys/errno.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/cpuvar.h> 49 #include <sys/uio.h> 50 #include <sys/debug.h> 51 #include <sys/rctl.h> 52 #include <sys/nbmlock.h> 53 54 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */ 55 56 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */ 57 58 /* 59 * read, write, pread, pwrite, readv, and writev syscalls. 60 * 61 * 64-bit open: all open's are large file opens. 62 * Large Files: the behaviour of read depends on whether the fd 63 * corresponds to large open or not. 64 * 32-bit open: FOFFMAX flag not set. 65 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 66 * EOVERFLOW if count is non-zero and if size of file 67 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 68 * at >= MAXOFF32_T returns EOF. 69 */ 70 71 /* 72 * Native system call 73 */ 74 ssize_t 75 read(int fdes, void *cbuf, size_t count) 76 { 77 struct uio auio; 78 struct iovec aiov; 79 file_t *fp; 80 register vnode_t *vp; 81 struct cpu *cp; 82 int fflag, ioflag, rwflag; 83 ssize_t cnt, bcount; 84 int error = 0; 85 u_offset_t fileoff; 86 int in_crit = 0; 87 88 if ((cnt = (ssize_t)count) < 0) 89 return (set_errno(EINVAL)); 90 if ((fp = getf(fdes)) == NULL) 91 return (set_errno(EBADF)); 92 if (((fflag = fp->f_flag) & FREAD) == 0) { 93 error = EBADF; 94 goto out; 95 } 96 vp = fp->f_vnode; 97 98 if (vp->v_type == VREG && cnt == 0) { 99 goto out; 100 } 101 102 rwflag = 0; 103 aiov.iov_base = cbuf; 104 aiov.iov_len = cnt; 105 106 /* 107 * We have to enter the critical region before calling VOP_RWLOCK 108 * to avoid a deadlock with write() calls. 109 */ 110 if (nbl_need_check(vp)) { 111 int svmand; 112 113 nbl_start_crit(vp, RW_READER); 114 in_crit = 1; 115 error = nbl_svmand(vp, fp->f_cred, &svmand); 116 if (error != 0) 117 goto out; 118 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand, 119 NULL)) { 120 error = EACCES; 121 goto out; 122 } 123 } 124 125 (void) VOP_RWLOCK(vp, rwflag, NULL); 126 127 /* 128 * We do the following checks inside VOP_RWLOCK so as to 129 * prevent file size from changing while these checks are 130 * being done. Also, we load fp's offset to the local 131 * variable fileoff because we can have a parallel lseek 132 * going on (f_offset is not protected by any lock) which 133 * could change f_offset. We need to see the value only 134 * once here and take a decision. Seeing it more than once 135 * can lead to incorrect functionality. 136 */ 137 138 fileoff = (u_offset_t)fp->f_offset; 139 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 140 struct vattr va; 141 va.va_mask = AT_SIZE; 142 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 143 VOP_RWUNLOCK(vp, rwflag, NULL); 144 goto out; 145 } 146 if (fileoff >= va.va_size) { 147 cnt = 0; 148 VOP_RWUNLOCK(vp, rwflag, NULL); 149 goto out; 150 } else { 151 error = EOVERFLOW; 152 VOP_RWUNLOCK(vp, rwflag, NULL); 153 goto out; 154 } 155 } 156 if ((vp->v_type == VREG) && 157 (fileoff + cnt > OFFSET_MAX(fp))) { 158 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 159 } 160 auio.uio_loffset = fileoff; 161 auio.uio_iov = &aiov; 162 auio.uio_iovcnt = 1; 163 auio.uio_resid = bcount = cnt; 164 auio.uio_segflg = UIO_USERSPACE; 165 auio.uio_llimit = MAXOFFSET_T; 166 auio.uio_fmode = fflag; 167 /* 168 * Only use bypass caches when the count is large enough 169 */ 170 if (bcount <= copyout_max_cached) 171 auio.uio_extflg = UIO_COPY_CACHED; 172 else 173 auio.uio_extflg = UIO_COPY_DEFAULT; 174 175 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 176 177 /* If read sync is not asked for, filter sync flags */ 178 if ((ioflag & FRSYNC) == 0) 179 ioflag &= ~(FSYNC|FDSYNC); 180 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 181 cnt -= auio.uio_resid; 182 CPU_STATS_ENTER_K(); 183 cp = CPU; 184 CPU_STATS_ADDQ(cp, sys, sysread, 1); 185 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 186 CPU_STATS_EXIT_K(); 187 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 188 189 if (vp->v_type == VFIFO) /* Backward compatibility */ 190 fp->f_offset = cnt; 191 else if (((fp->f_flag & FAPPEND) == 0) || 192 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 193 fp->f_offset = auio.uio_loffset; 194 VOP_RWUNLOCK(vp, rwflag, NULL); 195 196 if (error == EINTR && cnt != 0) 197 error = 0; 198 out: 199 if (in_crit) 200 nbl_end_crit(vp); 201 releasef(fdes); 202 if (error) 203 return (set_errno(error)); 204 return (cnt); 205 } 206 207 /* 208 * Native system call 209 */ 210 ssize_t 211 write(int fdes, void *cbuf, size_t count) 212 { 213 struct uio auio; 214 struct iovec aiov; 215 file_t *fp; 216 register vnode_t *vp; 217 struct cpu *cp; 218 int fflag, ioflag, rwflag; 219 ssize_t cnt, bcount; 220 int error = 0; 221 u_offset_t fileoff; 222 int in_crit = 0; 223 224 if ((cnt = (ssize_t)count) < 0) 225 return (set_errno(EINVAL)); 226 if ((fp = getf(fdes)) == NULL) 227 return (set_errno(EBADF)); 228 if (((fflag = fp->f_flag) & FWRITE) == 0) { 229 error = EBADF; 230 goto out; 231 } 232 vp = fp->f_vnode; 233 234 if (vp->v_type == VREG && cnt == 0) { 235 goto out; 236 } 237 238 rwflag = 1; 239 aiov.iov_base = cbuf; 240 aiov.iov_len = cnt; 241 242 /* 243 * We have to enter the critical region before calling VOP_RWLOCK 244 * to avoid a deadlock with ufs. 245 */ 246 if (nbl_need_check(vp)) { 247 int svmand; 248 249 nbl_start_crit(vp, RW_READER); 250 in_crit = 1; 251 error = nbl_svmand(vp, fp->f_cred, &svmand); 252 if (error != 0) 253 goto out; 254 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand, 255 NULL)) { 256 error = EACCES; 257 goto out; 258 } 259 } 260 261 (void) VOP_RWLOCK(vp, rwflag, NULL); 262 263 fileoff = fp->f_offset; 264 if (vp->v_type == VREG) { 265 266 /* 267 * We raise psignal if write for >0 bytes causes 268 * it to exceed the ulimit. 269 */ 270 if (fileoff >= curproc->p_fsz_ctl) { 271 VOP_RWUNLOCK(vp, rwflag, NULL); 272 273 mutex_enter(&curproc->p_lock); 274 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 275 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 276 mutex_exit(&curproc->p_lock); 277 278 error = EFBIG; 279 goto out; 280 } 281 /* 282 * We return EFBIG if write is done at an offset 283 * greater than the offset maximum for this file structure. 284 */ 285 286 if (fileoff >= OFFSET_MAX(fp)) { 287 VOP_RWUNLOCK(vp, rwflag, NULL); 288 error = EFBIG; 289 goto out; 290 } 291 /* 292 * Limit the bytes to be written upto offset maximum for 293 * this open file structure. 294 */ 295 if (fileoff + cnt > OFFSET_MAX(fp)) 296 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 297 } 298 auio.uio_loffset = fileoff; 299 auio.uio_iov = &aiov; 300 auio.uio_iovcnt = 1; 301 auio.uio_resid = bcount = cnt; 302 auio.uio_segflg = UIO_USERSPACE; 303 auio.uio_llimit = curproc->p_fsz_ctl; 304 auio.uio_fmode = fflag; 305 auio.uio_extflg = UIO_COPY_DEFAULT; 306 307 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 308 309 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 310 cnt -= auio.uio_resid; 311 CPU_STATS_ENTER_K(); 312 cp = CPU; 313 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 314 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 315 CPU_STATS_EXIT_K(); 316 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 317 318 if (vp->v_type == VFIFO) /* Backward compatibility */ 319 fp->f_offset = cnt; 320 else if (((fp->f_flag & FAPPEND) == 0) || 321 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 322 fp->f_offset = auio.uio_loffset; 323 VOP_RWUNLOCK(vp, rwflag, NULL); 324 325 if (error == EINTR && cnt != 0) 326 error = 0; 327 out: 328 if (in_crit) 329 nbl_end_crit(vp); 330 releasef(fdes); 331 if (error) 332 return (set_errno(error)); 333 return (cnt); 334 } 335 336 ssize_t 337 pread(int fdes, void *cbuf, size_t count, off_t offset) 338 { 339 struct uio auio; 340 struct iovec aiov; 341 file_t *fp; 342 register vnode_t *vp; 343 struct cpu *cp; 344 int fflag, ioflag, rwflag; 345 ssize_t bcount; 346 int error = 0; 347 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 348 #ifdef _SYSCALL32_IMPL 349 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 350 MAXOFF32_T : MAXOFFSET_T; 351 #else 352 const u_offset_t maxoff = MAXOFF32_T; 353 #endif 354 int in_crit = 0; 355 356 if ((bcount = (ssize_t)count) < 0) 357 return (set_errno(EINVAL)); 358 359 if ((fp = getf(fdes)) == NULL) 360 return (set_errno(EBADF)); 361 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 362 error = EBADF; 363 goto out; 364 } 365 366 rwflag = 0; 367 vp = fp->f_vnode; 368 369 if (vp->v_type == VREG) { 370 371 if (bcount == 0) 372 goto out; 373 374 /* 375 * Return EINVAL if an invalid offset comes to pread. 376 * Negative offset from user will cause this error. 377 */ 378 379 if (fileoff > maxoff) { 380 error = EINVAL; 381 goto out; 382 } 383 /* 384 * Limit offset such that we don't read or write 385 * a file beyond the maximum offset representable in 386 * an off_t structure. 387 */ 388 if (fileoff + bcount > maxoff) 389 bcount = (ssize_t)((offset_t)maxoff - fileoff); 390 } else if (vp->v_type == VFIFO) { 391 error = ESPIPE; 392 goto out; 393 } 394 395 /* 396 * We have to enter the critical region before calling VOP_RWLOCK 397 * to avoid a deadlock with ufs. 398 */ 399 if (nbl_need_check(vp)) { 400 int svmand; 401 402 nbl_start_crit(vp, RW_READER); 403 in_crit = 1; 404 error = nbl_svmand(vp, fp->f_cred, &svmand); 405 if (error != 0) 406 goto out; 407 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 408 NULL)) { 409 error = EACCES; 410 goto out; 411 } 412 } 413 414 aiov.iov_base = cbuf; 415 aiov.iov_len = bcount; 416 (void) VOP_RWLOCK(vp, rwflag, NULL); 417 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 418 struct vattr va; 419 va.va_mask = AT_SIZE; 420 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 421 VOP_RWUNLOCK(vp, rwflag, NULL); 422 goto out; 423 } 424 VOP_RWUNLOCK(vp, rwflag, NULL); 425 426 /* 427 * We have to return EOF if fileoff is >= file size. 428 */ 429 if (fileoff >= va.va_size) { 430 bcount = 0; 431 goto out; 432 } 433 434 /* 435 * File is greater than or equal to maxoff and therefore 436 * we return EOVERFLOW. 437 */ 438 error = EOVERFLOW; 439 goto out; 440 } 441 auio.uio_loffset = fileoff; 442 auio.uio_iov = &aiov; 443 auio.uio_iovcnt = 1; 444 auio.uio_resid = bcount; 445 auio.uio_segflg = UIO_USERSPACE; 446 auio.uio_llimit = MAXOFFSET_T; 447 auio.uio_fmode = fflag; 448 auio.uio_extflg = UIO_COPY_CACHED; 449 450 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 451 452 /* If read sync is not asked for, filter sync flags */ 453 if ((ioflag & FRSYNC) == 0) 454 ioflag &= ~(FSYNC|FDSYNC); 455 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 456 bcount -= auio.uio_resid; 457 CPU_STATS_ENTER_K(); 458 cp = CPU; 459 CPU_STATS_ADDQ(cp, sys, sysread, 1); 460 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 461 CPU_STATS_EXIT_K(); 462 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 463 VOP_RWUNLOCK(vp, rwflag, NULL); 464 465 if (error == EINTR && bcount != 0) 466 error = 0; 467 out: 468 if (in_crit) 469 nbl_end_crit(vp); 470 releasef(fdes); 471 if (error) 472 return (set_errno(error)); 473 return (bcount); 474 } 475 476 ssize_t 477 pwrite(int fdes, void *cbuf, size_t count, off_t offset) 478 { 479 struct uio auio; 480 struct iovec aiov; 481 file_t *fp; 482 register vnode_t *vp; 483 struct cpu *cp; 484 int fflag, ioflag, rwflag; 485 ssize_t bcount; 486 int error = 0; 487 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 488 #ifdef _SYSCALL32_IMPL 489 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 490 MAXOFF32_T : MAXOFFSET_T; 491 #else 492 const u_offset_t maxoff = MAXOFF32_T; 493 #endif 494 int in_crit = 0; 495 496 if ((bcount = (ssize_t)count) < 0) 497 return (set_errno(EINVAL)); 498 if ((fp = getf(fdes)) == NULL) 499 return (set_errno(EBADF)); 500 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 501 error = EBADF; 502 goto out; 503 } 504 505 rwflag = 1; 506 vp = fp->f_vnode; 507 508 if (vp->v_type == VREG) { 509 510 if (bcount == 0) 511 goto out; 512 513 /* 514 * return EINVAL for offsets that cannot be 515 * represented in an off_t. 516 */ 517 if (fileoff > maxoff) { 518 error = EINVAL; 519 goto out; 520 } 521 /* 522 * Take appropriate action if we are trying to write above the 523 * resource limit. 524 */ 525 if (fileoff >= curproc->p_fsz_ctl) { 526 mutex_enter(&curproc->p_lock); 527 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 528 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 529 mutex_exit(&curproc->p_lock); 530 531 error = EFBIG; 532 goto out; 533 } 534 /* 535 * Don't allow pwrite to cause file sizes to exceed 536 * maxoff. 537 */ 538 if (fileoff == maxoff) { 539 error = EFBIG; 540 goto out; 541 } 542 if (fileoff + count > maxoff) 543 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 544 } else if (vp->v_type == VFIFO) { 545 error = ESPIPE; 546 goto out; 547 } 548 549 /* 550 * We have to enter the critical region before calling VOP_RWLOCK 551 * to avoid a deadlock with ufs. 552 */ 553 if (nbl_need_check(vp)) { 554 int svmand; 555 556 nbl_start_crit(vp, RW_READER); 557 in_crit = 1; 558 error = nbl_svmand(vp, fp->f_cred, &svmand); 559 if (error != 0) 560 goto out; 561 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 562 NULL)) { 563 error = EACCES; 564 goto out; 565 } 566 } 567 568 aiov.iov_base = cbuf; 569 aiov.iov_len = bcount; 570 (void) VOP_RWLOCK(vp, rwflag, NULL); 571 auio.uio_loffset = fileoff; 572 auio.uio_iov = &aiov; 573 auio.uio_iovcnt = 1; 574 auio.uio_resid = bcount; 575 auio.uio_segflg = UIO_USERSPACE; 576 auio.uio_llimit = curproc->p_fsz_ctl; 577 auio.uio_fmode = fflag; 578 auio.uio_extflg = UIO_COPY_CACHED; 579 580 /* 581 * The SUSv4 POSIX specification states: 582 * The pwrite() function shall be equivalent to write(), except 583 * that it writes into a given position and does not change 584 * the file offset (regardless of whether O_APPEND is set). 585 * To make this be true, we omit the FAPPEND flag from ioflag. 586 */ 587 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 588 589 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 590 bcount -= auio.uio_resid; 591 CPU_STATS_ENTER_K(); 592 cp = CPU; 593 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 594 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 595 CPU_STATS_EXIT_K(); 596 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 597 VOP_RWUNLOCK(vp, rwflag, NULL); 598 599 if (error == EINTR && bcount != 0) 600 error = 0; 601 out: 602 if (in_crit) 603 nbl_end_crit(vp); 604 releasef(fdes); 605 if (error) 606 return (set_errno(error)); 607 return (bcount); 608 } 609 610 /* 611 * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... 612 * XXX -- However, SVVS expects readv() and writev() to fail if 613 * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), 614 * XXX -- so I guess that's the "interface". 615 */ 616 #define DEF_IOV_MAX 16 617 618 ssize_t 619 readv(int fdes, struct iovec *iovp, int iovcnt) 620 { 621 struct uio auio; 622 struct iovec aiov[DEF_IOV_MAX]; 623 file_t *fp; 624 register vnode_t *vp; 625 struct cpu *cp; 626 int fflag, ioflag, rwflag; 627 ssize_t count, bcount; 628 int error = 0; 629 int i; 630 u_offset_t fileoff; 631 int in_crit = 0; 632 633 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 634 return (set_errno(EINVAL)); 635 636 #ifdef _SYSCALL32_IMPL 637 /* 638 * 32-bit callers need to have their iovec expanded, 639 * while ensuring that they can't move more than 2Gbytes 640 * of data in a single call. 641 */ 642 if (get_udatamodel() == DATAMODEL_ILP32) { 643 struct iovec32 aiov32[DEF_IOV_MAX]; 644 ssize32_t count32; 645 646 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 647 return (set_errno(EFAULT)); 648 649 count32 = 0; 650 for (i = 0; i < iovcnt; i++) { 651 ssize32_t iovlen32 = aiov32[i].iov_len; 652 count32 += iovlen32; 653 if (iovlen32 < 0 || count32 < 0) 654 return (set_errno(EINVAL)); 655 aiov[i].iov_len = iovlen32; 656 aiov[i].iov_base = 657 (caddr_t)(uintptr_t)aiov32[i].iov_base; 658 } 659 } else 660 #endif 661 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 662 return (set_errno(EFAULT)); 663 664 count = 0; 665 for (i = 0; i < iovcnt; i++) { 666 ssize_t iovlen = aiov[i].iov_len; 667 count += iovlen; 668 if (iovlen < 0 || count < 0) 669 return (set_errno(EINVAL)); 670 } 671 if ((fp = getf(fdes)) == NULL) 672 return (set_errno(EBADF)); 673 if (((fflag = fp->f_flag) & FREAD) == 0) { 674 error = EBADF; 675 goto out; 676 } 677 vp = fp->f_vnode; 678 if (vp->v_type == VREG && count == 0) { 679 goto out; 680 } 681 682 rwflag = 0; 683 684 /* 685 * We have to enter the critical region before calling VOP_RWLOCK 686 * to avoid a deadlock with ufs. 687 */ 688 if (nbl_need_check(vp)) { 689 int svmand; 690 691 nbl_start_crit(vp, RW_READER); 692 in_crit = 1; 693 error = nbl_svmand(vp, fp->f_cred, &svmand); 694 if (error != 0) 695 goto out; 696 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand, 697 NULL)) { 698 error = EACCES; 699 goto out; 700 } 701 } 702 703 (void) VOP_RWLOCK(vp, rwflag, NULL); 704 fileoff = fp->f_offset; 705 706 /* 707 * Behaviour is same as read. Please see comments in read. 708 */ 709 710 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 711 struct vattr va; 712 va.va_mask = AT_SIZE; 713 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 714 VOP_RWUNLOCK(vp, rwflag, NULL); 715 goto out; 716 } 717 if (fileoff >= va.va_size) { 718 VOP_RWUNLOCK(vp, rwflag, NULL); 719 count = 0; 720 goto out; 721 } else { 722 VOP_RWUNLOCK(vp, rwflag, NULL); 723 error = EOVERFLOW; 724 goto out; 725 } 726 } 727 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 728 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 729 } 730 auio.uio_loffset = fileoff; 731 auio.uio_iov = aiov; 732 auio.uio_iovcnt = iovcnt; 733 auio.uio_resid = bcount = count; 734 auio.uio_segflg = UIO_USERSPACE; 735 auio.uio_llimit = MAXOFFSET_T; 736 auio.uio_fmode = fflag; 737 if (bcount <= copyout_max_cached) 738 auio.uio_extflg = UIO_COPY_CACHED; 739 else 740 auio.uio_extflg = UIO_COPY_DEFAULT; 741 742 743 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 744 745 /* If read sync is not asked for, filter sync flags */ 746 if ((ioflag & FRSYNC) == 0) 747 ioflag &= ~(FSYNC|FDSYNC); 748 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 749 count -= auio.uio_resid; 750 CPU_STATS_ENTER_K(); 751 cp = CPU; 752 CPU_STATS_ADDQ(cp, sys, sysread, 1); 753 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 754 CPU_STATS_EXIT_K(); 755 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 756 757 if (vp->v_type == VFIFO) /* Backward compatibility */ 758 fp->f_offset = count; 759 else if (((fp->f_flag & FAPPEND) == 0) || 760 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 761 fp->f_offset = auio.uio_loffset; 762 763 VOP_RWUNLOCK(vp, rwflag, NULL); 764 765 if (error == EINTR && count != 0) 766 error = 0; 767 out: 768 if (in_crit) 769 nbl_end_crit(vp); 770 releasef(fdes); 771 if (error) 772 return (set_errno(error)); 773 return (count); 774 } 775 776 ssize_t 777 writev(int fdes, struct iovec *iovp, int iovcnt) 778 { 779 struct uio auio; 780 struct iovec aiov[DEF_IOV_MAX]; 781 file_t *fp; 782 register vnode_t *vp; 783 struct cpu *cp; 784 int fflag, ioflag, rwflag; 785 ssize_t count, bcount; 786 int error = 0; 787 int i; 788 u_offset_t fileoff; 789 int in_crit = 0; 790 791 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 792 return (set_errno(EINVAL)); 793 794 #ifdef _SYSCALL32_IMPL 795 /* 796 * 32-bit callers need to have their iovec expanded, 797 * while ensuring that they can't move more than 2Gbytes 798 * of data in a single call. 799 */ 800 if (get_udatamodel() == DATAMODEL_ILP32) { 801 struct iovec32 aiov32[DEF_IOV_MAX]; 802 ssize32_t count32; 803 804 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 805 return (set_errno(EFAULT)); 806 807 count32 = 0; 808 for (i = 0; i < iovcnt; i++) { 809 ssize32_t iovlen = aiov32[i].iov_len; 810 count32 += iovlen; 811 if (iovlen < 0 || count32 < 0) 812 return (set_errno(EINVAL)); 813 aiov[i].iov_len = iovlen; 814 aiov[i].iov_base = 815 (caddr_t)(uintptr_t)aiov32[i].iov_base; 816 } 817 } else 818 #endif 819 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 820 return (set_errno(EFAULT)); 821 822 count = 0; 823 for (i = 0; i < iovcnt; i++) { 824 ssize_t iovlen = aiov[i].iov_len; 825 count += iovlen; 826 if (iovlen < 0 || count < 0) 827 return (set_errno(EINVAL)); 828 } 829 if ((fp = getf(fdes)) == NULL) 830 return (set_errno(EBADF)); 831 if (((fflag = fp->f_flag) & FWRITE) == 0) { 832 error = EBADF; 833 goto out; 834 } 835 vp = fp->f_vnode; 836 if (vp->v_type == VREG && count == 0) { 837 goto out; 838 } 839 840 rwflag = 1; 841 842 /* 843 * We have to enter the critical region before calling VOP_RWLOCK 844 * to avoid a deadlock with ufs. 845 */ 846 if (nbl_need_check(vp)) { 847 int svmand; 848 849 nbl_start_crit(vp, RW_READER); 850 in_crit = 1; 851 error = nbl_svmand(vp, fp->f_cred, &svmand); 852 if (error != 0) 853 goto out; 854 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand, 855 NULL)) { 856 error = EACCES; 857 goto out; 858 } 859 } 860 861 (void) VOP_RWLOCK(vp, rwflag, NULL); 862 863 fileoff = fp->f_offset; 864 865 /* 866 * Behaviour is same as write. Please see comments for write. 867 */ 868 869 if (vp->v_type == VREG) { 870 if (fileoff >= curproc->p_fsz_ctl) { 871 VOP_RWUNLOCK(vp, rwflag, NULL); 872 mutex_enter(&curproc->p_lock); 873 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 874 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 875 mutex_exit(&curproc->p_lock); 876 error = EFBIG; 877 goto out; 878 } 879 if (fileoff >= OFFSET_MAX(fp)) { 880 VOP_RWUNLOCK(vp, rwflag, NULL); 881 error = EFBIG; 882 goto out; 883 } 884 if (fileoff + count > OFFSET_MAX(fp)) 885 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 886 } 887 auio.uio_loffset = fileoff; 888 auio.uio_iov = aiov; 889 auio.uio_iovcnt = iovcnt; 890 auio.uio_resid = bcount = count; 891 auio.uio_segflg = UIO_USERSPACE; 892 auio.uio_llimit = curproc->p_fsz_ctl; 893 auio.uio_fmode = fflag; 894 auio.uio_extflg = UIO_COPY_DEFAULT; 895 896 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 897 898 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 899 count -= auio.uio_resid; 900 CPU_STATS_ENTER_K(); 901 cp = CPU; 902 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 903 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 904 CPU_STATS_EXIT_K(); 905 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 906 907 if (vp->v_type == VFIFO) /* Backward compatibility */ 908 fp->f_offset = count; 909 else if (((fp->f_flag & FAPPEND) == 0) || 910 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 911 fp->f_offset = auio.uio_loffset; 912 VOP_RWUNLOCK(vp, rwflag, NULL); 913 914 if (error == EINTR && count != 0) 915 error = 0; 916 out: 917 if (in_crit) 918 nbl_end_crit(vp); 919 releasef(fdes); 920 if (error) 921 return (set_errno(error)); 922 return (count); 923 } 924 925 ssize_t 926 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, 927 off_t extended_offset) 928 { 929 struct uio auio; 930 struct iovec aiov[DEF_IOV_MAX]; 931 file_t *fp; 932 register vnode_t *vp; 933 struct cpu *cp; 934 int fflag, ioflag, rwflag; 935 ssize_t count, bcount; 936 int error = 0; 937 int i; 938 939 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 940 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | 941 (u_offset_t)offset; 942 #else /* _SYSCALL32_IMPL || _ILP32 */ 943 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 944 #endif /* _SYSCALL32_IMPR || _ILP32 */ 945 #ifdef _SYSCALL32_IMPL 946 const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 && 947 extended_offset == 0? 948 MAXOFF32_T : MAXOFFSET_T; 949 #else /* _SYSCALL32_IMPL */ 950 const u_offset_t maxoff = MAXOFF32_T; 951 #endif /* _SYSCALL32_IMPL */ 952 953 int in_crit = 0; 954 955 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 956 return (set_errno(EINVAL)); 957 958 #ifdef _SYSCALL32_IMPL 959 /* 960 * 32-bit callers need to have their iovec expanded, 961 * while ensuring that they can't move more than 2Gbytes 962 * of data in a single call. 963 */ 964 if (get_udatamodel() == DATAMODEL_ILP32) { 965 struct iovec32 aiov32[DEF_IOV_MAX]; 966 ssize32_t count32; 967 968 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 969 return (set_errno(EFAULT)); 970 971 count32 = 0; 972 for (i = 0; i < iovcnt; i++) { 973 ssize32_t iovlen32 = aiov32[i].iov_len; 974 count32 += iovlen32; 975 if (iovlen32 < 0 || count32 < 0) 976 return (set_errno(EINVAL)); 977 aiov[i].iov_len = iovlen32; 978 aiov[i].iov_base = 979 (caddr_t)(uintptr_t)aiov32[i].iov_base; 980 } 981 } else 982 #endif /* _SYSCALL32_IMPL */ 983 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 984 return (set_errno(EFAULT)); 985 986 count = 0; 987 for (i = 0; i < iovcnt; i++) { 988 ssize_t iovlen = aiov[i].iov_len; 989 count += iovlen; 990 if (iovlen < 0 || count < 0) 991 return (set_errno(EINVAL)); 992 } 993 994 if ((bcount = (ssize_t)count) < 0) 995 return (set_errno(EINVAL)); 996 if ((fp = getf(fdes)) == NULL) 997 return (set_errno(EBADF)); 998 if (((fflag = fp->f_flag) & FREAD) == 0) { 999 error = EBADF; 1000 goto out; 1001 } 1002 vp = fp->f_vnode; 1003 rwflag = 0; 1004 if (vp->v_type == VREG) { 1005 1006 if (bcount == 0) 1007 goto out; 1008 1009 /* 1010 * return EINVAL for offsets that cannot be 1011 * represented in an off_t. 1012 */ 1013 if (fileoff > maxoff) { 1014 error = EINVAL; 1015 goto out; 1016 } 1017 1018 if (fileoff + bcount > maxoff) 1019 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 1020 } else if (vp->v_type == VFIFO) { 1021 error = ESPIPE; 1022 goto out; 1023 } 1024 /* 1025 * We have to enter the critical region before calling VOP_RWLOCK 1026 * to avoid a deadlock with ufs. 1027 */ 1028 if (nbl_need_check(vp)) { 1029 int svmand; 1030 1031 nbl_start_crit(vp, RW_READER); 1032 in_crit = 1; 1033 error = nbl_svmand(vp, fp->f_cred, &svmand); 1034 if (error != 0) 1035 goto out; 1036 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, 1037 NULL)) { 1038 error = EACCES; 1039 goto out; 1040 } 1041 } 1042 1043 (void) VOP_RWLOCK(vp, rwflag, NULL); 1044 1045 /* 1046 * Behaviour is same as read(2). Please see comments in 1047 * read(2). 1048 */ 1049 1050 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 1051 struct vattr va; 1052 va.va_mask = AT_SIZE; 1053 if ((error = 1054 VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 1055 VOP_RWUNLOCK(vp, rwflag, NULL); 1056 goto out; 1057 } 1058 if (fileoff >= va.va_size) { 1059 VOP_RWUNLOCK(vp, rwflag, NULL); 1060 count = 0; 1061 goto out; 1062 } else { 1063 VOP_RWUNLOCK(vp, rwflag, NULL); 1064 error = EOVERFLOW; 1065 goto out; 1066 } 1067 } 1068 if ((vp->v_type == VREG) && 1069 (fileoff + count > OFFSET_MAX(fp))) { 1070 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 1071 } 1072 auio.uio_loffset = fileoff; 1073 auio.uio_iov = aiov; 1074 auio.uio_iovcnt = iovcnt; 1075 auio.uio_resid = bcount = count; 1076 auio.uio_segflg = UIO_USERSPACE; 1077 auio.uio_llimit = MAXOFFSET_T; 1078 auio.uio_fmode = fflag; 1079 if (bcount <= copyout_max_cached) 1080 auio.uio_extflg = UIO_COPY_CACHED; 1081 else 1082 auio.uio_extflg = UIO_COPY_DEFAULT; 1083 1084 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1085 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1086 count -= auio.uio_resid; 1087 CPU_STATS_ENTER_K(); 1088 cp = CPU; 1089 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1090 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 1091 CPU_STATS_EXIT_K(); 1092 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 1093 1094 VOP_RWUNLOCK(vp, rwflag, NULL); 1095 1096 if (error == EINTR && count != 0) 1097 error = 0; 1098 out: 1099 if (in_crit) 1100 nbl_end_crit(vp); 1101 releasef(fdes); 1102 if (error) 1103 return (set_errno(error)); 1104 return (count); 1105 } 1106 1107 ssize_t 1108 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, 1109 off_t extended_offset) 1110 { 1111 struct uio auio; 1112 struct iovec aiov[DEF_IOV_MAX]; 1113 file_t *fp; 1114 register vnode_t *vp; 1115 struct cpu *cp; 1116 int fflag, ioflag, rwflag; 1117 ssize_t count, bcount; 1118 int error = 0; 1119 int i; 1120 1121 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 1122 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | 1123 (u_offset_t)offset; 1124 #else /* _SYSCALL32_IMPL || _ILP32 */ 1125 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 1126 #endif /* _SYSCALL32_IMPR || _ILP32 */ 1127 #ifdef _SYSCALL32_IMPL 1128 const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 && 1129 extended_offset == 0? 1130 MAXOFF32_T : MAXOFFSET_T; 1131 #else /* _SYSCALL32_IMPL */ 1132 const u_offset_t maxoff = MAXOFF32_T; 1133 #endif /* _SYSCALL32_IMPL */ 1134 1135 int in_crit = 0; 1136 1137 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 1138 return (set_errno(EINVAL)); 1139 1140 #ifdef _SYSCALL32_IMPL 1141 /* 1142 * 32-bit callers need to have their iovec expanded, 1143 * while ensuring that they can't move more than 2Gbytes 1144 * of data in a single call. 1145 */ 1146 if (get_udatamodel() == DATAMODEL_ILP32) { 1147 struct iovec32 aiov32[DEF_IOV_MAX]; 1148 ssize32_t count32; 1149 1150 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 1151 return (set_errno(EFAULT)); 1152 1153 count32 = 0; 1154 for (i = 0; i < iovcnt; i++) { 1155 ssize32_t iovlen32 = aiov32[i].iov_len; 1156 count32 += iovlen32; 1157 if (iovlen32 < 0 || count32 < 0) 1158 return (set_errno(EINVAL)); 1159 aiov[i].iov_len = iovlen32; 1160 aiov[i].iov_base = 1161 (caddr_t)(uintptr_t)aiov32[i].iov_base; 1162 } 1163 } else 1164 #endif /* _SYSCALL32_IMPL */ 1165 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 1166 return (set_errno(EFAULT)); 1167 1168 count = 0; 1169 for (i = 0; i < iovcnt; i++) { 1170 ssize_t iovlen = aiov[i].iov_len; 1171 count += iovlen; 1172 if (iovlen < 0 || count < 0) 1173 return (set_errno(EINVAL)); 1174 } 1175 1176 if ((bcount = (ssize_t)count) < 0) 1177 return (set_errno(EINVAL)); 1178 if ((fp = getf(fdes)) == NULL) 1179 return (set_errno(EBADF)); 1180 if (((fflag = fp->f_flag) & FWRITE) == 0) { 1181 error = EBADF; 1182 goto out; 1183 } 1184 vp = fp->f_vnode; 1185 rwflag = 1; 1186 if (vp->v_type == VREG) { 1187 1188 if (bcount == 0) 1189 goto out; 1190 1191 /* 1192 * return EINVAL for offsets that cannot be 1193 * represented in an off_t. 1194 */ 1195 if (fileoff > maxoff) { 1196 error = EINVAL; 1197 goto out; 1198 } 1199 /* 1200 * Take appropriate action if we are trying 1201 * to write above the resource limit. 1202 */ 1203 if (fileoff >= curproc->p_fsz_ctl) { 1204 mutex_enter(&curproc->p_lock); 1205 /* 1206 * Return value ignored because it lists 1207 * actions taken, but we are in an error case. 1208 * We don't have any actions that depend on 1209 * what could happen in this call, so we ignore 1210 * the return value. 1211 */ 1212 (void) rctl_action( 1213 rctlproc_legacy[RLIMIT_FSIZE], 1214 curproc->p_rctls, curproc, 1215 RCA_UNSAFE_SIGINFO); 1216 mutex_exit(&curproc->p_lock); 1217 1218 error = EFBIG; 1219 goto out; 1220 } 1221 /* 1222 * Don't allow pwritev to cause file sizes to exceed 1223 * maxoff. 1224 */ 1225 if (fileoff == maxoff) { 1226 error = EFBIG; 1227 goto out; 1228 } 1229 1230 if (fileoff + bcount > maxoff) 1231 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 1232 } else if (vp->v_type == VFIFO) { 1233 error = ESPIPE; 1234 goto out; 1235 } 1236 /* 1237 * We have to enter the critical region before calling VOP_RWLOCK 1238 * to avoid a deadlock with ufs. 1239 */ 1240 if (nbl_need_check(vp)) { 1241 int svmand; 1242 1243 nbl_start_crit(vp, RW_READER); 1244 in_crit = 1; 1245 error = nbl_svmand(vp, fp->f_cred, &svmand); 1246 if (error != 0) 1247 goto out; 1248 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, 1249 NULL)) { 1250 error = EACCES; 1251 goto out; 1252 } 1253 } 1254 1255 (void) VOP_RWLOCK(vp, rwflag, NULL); 1256 1257 1258 /* 1259 * Behaviour is same as write(2). Please see comments for 1260 * write(2). 1261 */ 1262 1263 if (vp->v_type == VREG) { 1264 if (fileoff >= curproc->p_fsz_ctl) { 1265 VOP_RWUNLOCK(vp, rwflag, NULL); 1266 mutex_enter(&curproc->p_lock); 1267 /* see above rctl_action comment */ 1268 (void) rctl_action( 1269 rctlproc_legacy[RLIMIT_FSIZE], 1270 curproc->p_rctls, 1271 curproc, RCA_UNSAFE_SIGINFO); 1272 mutex_exit(&curproc->p_lock); 1273 error = EFBIG; 1274 goto out; 1275 } 1276 if (fileoff >= OFFSET_MAX(fp)) { 1277 VOP_RWUNLOCK(vp, rwflag, NULL); 1278 error = EFBIG; 1279 goto out; 1280 } 1281 if (fileoff + count > OFFSET_MAX(fp)) 1282 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 1283 } 1284 1285 auio.uio_loffset = fileoff; 1286 auio.uio_iov = aiov; 1287 auio.uio_iovcnt = iovcnt; 1288 auio.uio_resid = bcount = count; 1289 auio.uio_segflg = UIO_USERSPACE; 1290 auio.uio_llimit = curproc->p_fsz_ctl; 1291 auio.uio_fmode = fflag; 1292 auio.uio_extflg = UIO_COPY_CACHED; 1293 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 1294 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1295 count -= auio.uio_resid; 1296 CPU_STATS_ENTER_K(); 1297 cp = CPU; 1298 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1299 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 1300 CPU_STATS_EXIT_K(); 1301 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 1302 1303 VOP_RWUNLOCK(vp, rwflag, NULL); 1304 1305 if (error == EINTR && count != 0) 1306 error = 0; 1307 out: 1308 if (in_crit) 1309 nbl_end_crit(vp); 1310 releasef(fdes); 1311 if (error) 1312 return (set_errno(error)); 1313 return (count); 1314 } 1315 1316 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 1317 1318 /* 1319 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1320 */ 1321 ssize32_t 1322 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1323 uint32_t offset_2) 1324 { 1325 struct uio auio; 1326 struct iovec aiov; 1327 file_t *fp; 1328 register vnode_t *vp; 1329 struct cpu *cp; 1330 int fflag, ioflag, rwflag; 1331 ssize_t bcount; 1332 int error = 0; 1333 u_offset_t fileoff; 1334 int in_crit = 0; 1335 1336 #if defined(_LITTLE_ENDIAN) 1337 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1338 #else 1339 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1340 #endif 1341 1342 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1343 return (set_errno(EINVAL)); 1344 1345 if ((fp = getf(fdes)) == NULL) 1346 return (set_errno(EBADF)); 1347 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 1348 error = EBADF; 1349 goto out; 1350 } 1351 1352 rwflag = 0; 1353 vp = fp->f_vnode; 1354 1355 if (vp->v_type == VREG) { 1356 1357 if (bcount == 0) 1358 goto out; 1359 1360 /* 1361 * Same as pread. See comments in pread. 1362 */ 1363 1364 if (fileoff > MAXOFFSET_T) { 1365 error = EINVAL; 1366 goto out; 1367 } 1368 if (fileoff + bcount > MAXOFFSET_T) 1369 bcount = (ssize_t)(MAXOFFSET_T - fileoff); 1370 } else if (vp->v_type == VFIFO) { 1371 error = ESPIPE; 1372 goto out; 1373 } 1374 1375 /* 1376 * We have to enter the critical region before calling VOP_RWLOCK 1377 * to avoid a deadlock with ufs. 1378 */ 1379 if (nbl_need_check(vp)) { 1380 int svmand; 1381 1382 nbl_start_crit(vp, RW_READER); 1383 in_crit = 1; 1384 error = nbl_svmand(vp, fp->f_cred, &svmand); 1385 if (error != 0) 1386 goto out; 1387 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 1388 NULL)) { 1389 error = EACCES; 1390 goto out; 1391 } 1392 } 1393 1394 aiov.iov_base = cbuf; 1395 aiov.iov_len = bcount; 1396 (void) VOP_RWLOCK(vp, rwflag, NULL); 1397 auio.uio_loffset = fileoff; 1398 1399 /* 1400 * Note: File size can never be greater than MAXOFFSET_T. 1401 * If ever we start supporting 128 bit files the code 1402 * similar to the one in pread at this place should be here. 1403 * Here we avoid the unnecessary VOP_GETATTR() when we 1404 * know that fileoff == MAXOFFSET_T implies that it is always 1405 * greater than or equal to file size. 1406 */ 1407 auio.uio_iov = &aiov; 1408 auio.uio_iovcnt = 1; 1409 auio.uio_resid = bcount; 1410 auio.uio_segflg = UIO_USERSPACE; 1411 auio.uio_llimit = MAXOFFSET_T; 1412 auio.uio_fmode = fflag; 1413 auio.uio_extflg = UIO_COPY_CACHED; 1414 1415 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1416 1417 /* If read sync is not asked for, filter sync flags */ 1418 if ((ioflag & FRSYNC) == 0) 1419 ioflag &= ~(FSYNC|FDSYNC); 1420 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1421 bcount -= auio.uio_resid; 1422 CPU_STATS_ENTER_K(); 1423 cp = CPU; 1424 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1425 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1426 CPU_STATS_EXIT_K(); 1427 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1428 VOP_RWUNLOCK(vp, rwflag, NULL); 1429 1430 if (error == EINTR && bcount != 0) 1431 error = 0; 1432 out: 1433 if (in_crit) 1434 nbl_end_crit(vp); 1435 releasef(fdes); 1436 if (error) 1437 return (set_errno(error)); 1438 return (bcount); 1439 } 1440 1441 /* 1442 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1443 */ 1444 ssize32_t 1445 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1446 uint32_t offset_2) 1447 { 1448 struct uio auio; 1449 struct iovec aiov; 1450 file_t *fp; 1451 register vnode_t *vp; 1452 struct cpu *cp; 1453 int fflag, ioflag, rwflag; 1454 ssize_t bcount; 1455 int error = 0; 1456 u_offset_t fileoff; 1457 int in_crit = 0; 1458 1459 #if defined(_LITTLE_ENDIAN) 1460 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1461 #else 1462 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1463 #endif 1464 1465 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1466 return (set_errno(EINVAL)); 1467 if ((fp = getf(fdes)) == NULL) 1468 return (set_errno(EBADF)); 1469 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1470 error = EBADF; 1471 goto out; 1472 } 1473 1474 rwflag = 1; 1475 vp = fp->f_vnode; 1476 1477 if (vp->v_type == VREG) { 1478 1479 if (bcount == 0) 1480 goto out; 1481 1482 /* 1483 * See comments in pwrite. 1484 */ 1485 if (fileoff > MAXOFFSET_T) { 1486 error = EINVAL; 1487 goto out; 1488 } 1489 if (fileoff >= curproc->p_fsz_ctl) { 1490 mutex_enter(&curproc->p_lock); 1491 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1492 curproc->p_rctls, curproc, RCA_SAFE); 1493 mutex_exit(&curproc->p_lock); 1494 error = EFBIG; 1495 goto out; 1496 } 1497 if (fileoff == MAXOFFSET_T) { 1498 error = EFBIG; 1499 goto out; 1500 } 1501 if (fileoff + bcount > MAXOFFSET_T) 1502 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1503 } else if (vp->v_type == VFIFO) { 1504 error = ESPIPE; 1505 goto out; 1506 } 1507 1508 /* 1509 * We have to enter the critical region before calling VOP_RWLOCK 1510 * to avoid a deadlock with ufs. 1511 */ 1512 if (nbl_need_check(vp)) { 1513 int svmand; 1514 1515 nbl_start_crit(vp, RW_READER); 1516 in_crit = 1; 1517 error = nbl_svmand(vp, fp->f_cred, &svmand); 1518 if (error != 0) 1519 goto out; 1520 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 1521 NULL)) { 1522 error = EACCES; 1523 goto out; 1524 } 1525 } 1526 1527 aiov.iov_base = cbuf; 1528 aiov.iov_len = bcount; 1529 (void) VOP_RWLOCK(vp, rwflag, NULL); 1530 auio.uio_loffset = fileoff; 1531 auio.uio_iov = &aiov; 1532 auio.uio_iovcnt = 1; 1533 auio.uio_resid = bcount; 1534 auio.uio_segflg = UIO_USERSPACE; 1535 auio.uio_llimit = curproc->p_fsz_ctl; 1536 auio.uio_fmode = fflag; 1537 auio.uio_extflg = UIO_COPY_CACHED; 1538 1539 /* 1540 * The SUSv4 POSIX specification states: 1541 * The pwrite() function shall be equivalent to write(), except 1542 * that it writes into a given position and does not change 1543 * the file offset (regardless of whether O_APPEND is set). 1544 * To make this be true, we omit the FAPPEND flag from ioflag. 1545 */ 1546 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 1547 1548 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1549 bcount -= auio.uio_resid; 1550 CPU_STATS_ENTER_K(); 1551 cp = CPU; 1552 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1553 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1554 CPU_STATS_EXIT_K(); 1555 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1556 VOP_RWUNLOCK(vp, rwflag, NULL); 1557 1558 if (error == EINTR && bcount != 0) 1559 error = 0; 1560 out: 1561 if (in_crit) 1562 nbl_end_crit(vp); 1563 releasef(fdes); 1564 if (error) 1565 return (set_errno(error)); 1566 return (bcount); 1567 } 1568 1569 #endif /* _SYSCALL32_IMPL || _ILP32 */ 1570 1571 #ifdef _SYSCALL32_IMPL 1572 /* 1573 * Tail-call elimination of xxx32() down to xxx() 1574 * 1575 * A number of xxx32 system calls take a len (or count) argument and 1576 * return a number in the range [0,len] or -1 on error. 1577 * Given an ssize32_t input len, the downcall xxx() will return 1578 * a 64-bit value that is -1 or in the range [0,len] which actually 1579 * is a proper return value for the xxx32 call. So even if the xxx32 1580 * calls can be considered as returning a ssize32_t, they are currently 1581 * declared as returning a ssize_t as this enables tail-call elimination. 1582 * 1583 * The cast of len (or count) to ssize32_t is needed to ensure we pass 1584 * down negative input values as such and let the downcall handle error 1585 * reporting. Functions covered by this comments are: 1586 * 1587 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1588 * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1589 * readlink.c: readlink32. 1590 */ 1591 1592 ssize_t 1593 read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1594 { 1595 return (read(fdes, 1596 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1597 } 1598 1599 ssize_t 1600 write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1601 { 1602 return (write(fdes, 1603 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1604 } 1605 1606 ssize_t 1607 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1608 { 1609 return (pread(fdes, 1610 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1611 (off_t)(uint32_t)offset)); 1612 } 1613 1614 ssize_t 1615 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1616 { 1617 return (pwrite(fdes, 1618 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1619 (off_t)(uint32_t)offset)); 1620 } 1621 1622 ssize_t 1623 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1624 { 1625 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1626 } 1627 1628 ssize_t 1629 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1630 { 1631 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1632 } 1633 #endif /* _SYSCALL32_IMPL */ 1634