1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Portions of this source code were derived from Berkeley 4.3 BSD 32 * under license from the Regents of the University of California. 33 */ 34 35 #pragma ident "%Z%%M% %I% %E% SMI" 36 37 #include <sys/param.h> 38 #include <sys/isa_defs.h> 39 #include <sys/types.h> 40 #include <sys/inttypes.h> 41 #include <sys/sysmacros.h> 42 #include <sys/cred.h> 43 #include <sys/user.h> 44 #include <sys/systm.h> 45 #include <sys/errno.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/proc.h> 49 #include <sys/cpuvar.h> 50 #include <sys/uio.h> 51 #include <sys/debug.h> 52 #include <sys/rctl.h> 53 #include <sys/nbmlock.h> 54 55 #define COPYOUT_MIN_SIZE (1<<17) /* 128K */ 56 57 static size_t copyout_min_size = COPYOUT_MIN_SIZE; 58 59 /* 60 * read, write, pread, pwrite, readv, and writev syscalls. 61 * 62 * 64-bit open: all open's are large file opens. 63 * Large Files: the behaviour of read depends on whether the fd 64 * corresponds to large open or not. 65 * 32-bit open: FOFFMAX flag not set. 66 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 67 * EOVERFLOW if count is non-zero and if size of file 68 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 69 * at >= MAXOFF32_T returns EOF. 70 */ 71 72 /* 73 * Native system call 74 */ 75 ssize_t 76 read(int fdes, void *cbuf, size_t count) 77 { 78 struct uio auio; 79 struct iovec aiov; 80 file_t *fp; 81 register vnode_t *vp; 82 struct cpu *cp; 83 int fflag, ioflag, rwflag; 84 ssize_t cnt, bcount; 85 int error = 0; 86 u_offset_t fileoff; 87 int in_crit = 0; 88 89 if ((cnt = (ssize_t)count) < 0) 90 return (set_errno(EINVAL)); 91 if ((fp = getf(fdes)) == NULL) 92 return (set_errno(EBADF)); 93 if (((fflag = fp->f_flag) & FREAD) == 0) { 94 error = EBADF; 95 goto out; 96 } 97 vp = fp->f_vnode; 98 99 if (vp->v_type == VREG && cnt == 0) { 100 goto out; 101 } 102 103 rwflag = 0; 104 aiov.iov_base = cbuf; 105 aiov.iov_len = cnt; 106 107 /* 108 * We have to enter the critical region before calling VOP_RWLOCK 109 * to avoid a deadlock with write() calls. 110 */ 111 if (nbl_need_check(vp)) { 112 int svmand; 113 114 nbl_start_crit(vp, RW_READER); 115 in_crit = 1; 116 error = nbl_svmand(vp, fp->f_cred, &svmand); 117 if (error != 0) 118 goto out; 119 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand)) { 120 error = EACCES; 121 goto out; 122 } 123 } 124 125 (void) VOP_RWLOCK(vp, rwflag, NULL); 126 127 /* 128 * We do the following checks inside VOP_RWLOCK so as to 129 * prevent file size from changing while these checks are 130 * being done. Also, we load fp's offset to the local 131 * variable fileoff because we can have a parallel lseek 132 * going on (f_offset is not protected by any lock) which 133 * could change f_offset. We need to see the value only 134 * once here and take a decision. Seeing it more than once 135 * can lead to incorrect functionality. 136 */ 137 138 fileoff = (u_offset_t)fp->f_offset; 139 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 140 struct vattr va; 141 va.va_mask = AT_SIZE; 142 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 143 VOP_RWUNLOCK(vp, rwflag, NULL); 144 goto out; 145 } 146 if (fileoff >= va.va_size) { 147 cnt = 0; 148 VOP_RWUNLOCK(vp, rwflag, NULL); 149 goto out; 150 } else { 151 error = EOVERFLOW; 152 VOP_RWUNLOCK(vp, rwflag, NULL); 153 goto out; 154 } 155 } 156 if ((vp->v_type == VREG) && 157 (fileoff + cnt > OFFSET_MAX(fp))) { 158 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 159 } 160 auio.uio_loffset = fileoff; 161 auio.uio_iov = &aiov; 162 auio.uio_iovcnt = 1; 163 auio.uio_resid = bcount = cnt; 164 auio.uio_segflg = UIO_USERSPACE; 165 auio.uio_llimit = MAXOFFSET_T; 166 auio.uio_fmode = fflag; 167 /* 168 * Only use bypass caches when the count is large enough 169 */ 170 if (bcount < copyout_min_size) 171 auio.uio_extflg = UIO_COPY_CACHED; 172 else 173 auio.uio_extflg = UIO_COPY_DEFAULT; 174 175 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 176 177 /* If read sync is not asked for, filter sync flags */ 178 if ((ioflag & FRSYNC) == 0) 179 ioflag &= ~(FSYNC|FDSYNC); 180 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 181 cnt -= auio.uio_resid; 182 CPU_STATS_ENTER_K(); 183 cp = CPU; 184 CPU_STATS_ADDQ(cp, sys, sysread, 1); 185 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 186 CPU_STATS_EXIT_K(); 187 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 188 189 if (vp->v_type == VFIFO) /* Backward compatibility */ 190 fp->f_offset = cnt; 191 else if (((fp->f_flag & FAPPEND) == 0) || 192 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 193 fp->f_offset = auio.uio_loffset; 194 VOP_RWUNLOCK(vp, rwflag, NULL); 195 196 if (error == EINTR && cnt != 0) 197 error = 0; 198 out: 199 if (in_crit) 200 nbl_end_crit(vp); 201 releasef(fdes); 202 if (error) 203 return (set_errno(error)); 204 return (cnt); 205 } 206 207 /* 208 * Native system call 209 */ 210 ssize_t 211 write(int fdes, void *cbuf, size_t count) 212 { 213 struct uio auio; 214 struct iovec aiov; 215 file_t *fp; 216 register vnode_t *vp; 217 struct cpu *cp; 218 int fflag, ioflag, rwflag; 219 ssize_t cnt, bcount; 220 int error = 0; 221 u_offset_t fileoff; 222 int in_crit = 0; 223 224 if ((cnt = (ssize_t)count) < 0) 225 return (set_errno(EINVAL)); 226 if ((fp = getf(fdes)) == NULL) 227 return (set_errno(EBADF)); 228 if (((fflag = fp->f_flag) & FWRITE) == 0) { 229 error = EBADF; 230 goto out; 231 } 232 vp = fp->f_vnode; 233 234 if (vp->v_type == VREG && cnt == 0) { 235 goto out; 236 } 237 238 rwflag = 1; 239 aiov.iov_base = cbuf; 240 aiov.iov_len = cnt; 241 242 /* 243 * We have to enter the critical region before calling VOP_RWLOCK 244 * to avoid a deadlock with ufs. 245 */ 246 if (nbl_need_check(vp)) { 247 int svmand; 248 249 nbl_start_crit(vp, RW_READER); 250 in_crit = 1; 251 error = nbl_svmand(vp, fp->f_cred, &svmand); 252 if (error != 0) 253 goto out; 254 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand)) { 255 error = EACCES; 256 goto out; 257 } 258 } 259 260 (void) VOP_RWLOCK(vp, rwflag, NULL); 261 262 fileoff = fp->f_offset; 263 if (vp->v_type == VREG) { 264 265 /* 266 * We raise psignal if write for >0 bytes causes 267 * it to exceed the ulimit. 268 */ 269 if (fileoff >= curproc->p_fsz_ctl) { 270 VOP_RWUNLOCK(vp, rwflag, NULL); 271 272 mutex_enter(&curproc->p_lock); 273 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 274 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 275 mutex_exit(&curproc->p_lock); 276 277 error = EFBIG; 278 goto out; 279 } 280 /* 281 * We return EFBIG if write is done at an offset 282 * greater than the offset maximum for this file structure. 283 */ 284 285 if (fileoff >= OFFSET_MAX(fp)) { 286 VOP_RWUNLOCK(vp, rwflag, NULL); 287 error = EFBIG; 288 goto out; 289 } 290 /* 291 * Limit the bytes to be written upto offset maximum for 292 * this open file structure. 293 */ 294 if (fileoff + cnt > OFFSET_MAX(fp)) 295 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 296 } 297 auio.uio_loffset = fileoff; 298 auio.uio_iov = &aiov; 299 auio.uio_iovcnt = 1; 300 auio.uio_resid = bcount = cnt; 301 auio.uio_segflg = UIO_USERSPACE; 302 auio.uio_llimit = curproc->p_fsz_ctl; 303 auio.uio_fmode = fflag; 304 auio.uio_extflg = UIO_COPY_DEFAULT; 305 306 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 307 308 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 309 cnt -= auio.uio_resid; 310 CPU_STATS_ENTER_K(); 311 cp = CPU; 312 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 313 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 314 CPU_STATS_EXIT_K(); 315 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 316 317 if (vp->v_type == VFIFO) /* Backward compatibility */ 318 fp->f_offset = cnt; 319 else if (((fp->f_flag & FAPPEND) == 0) || 320 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 321 fp->f_offset = auio.uio_loffset; 322 VOP_RWUNLOCK(vp, rwflag, NULL); 323 324 if (error == EINTR && cnt != 0) 325 error = 0; 326 out: 327 if (in_crit) 328 nbl_end_crit(vp); 329 releasef(fdes); 330 if (error) 331 return (set_errno(error)); 332 return (cnt); 333 } 334 335 ssize_t 336 pread(int fdes, void *cbuf, size_t count, off_t offset) 337 { 338 struct uio auio; 339 struct iovec aiov; 340 file_t *fp; 341 register vnode_t *vp; 342 struct cpu *cp; 343 int fflag, ioflag, rwflag; 344 ssize_t bcount; 345 int error = 0; 346 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 347 #ifdef _SYSCALL32_IMPL 348 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 349 MAXOFF32_T : MAXOFFSET_T; 350 #else 351 const u_offset_t maxoff = MAXOFF32_T; 352 #endif 353 int in_crit = 0; 354 355 if ((bcount = (ssize_t)count) < 0) 356 return (set_errno(EINVAL)); 357 358 if ((fp = getf(fdes)) == NULL) 359 return (set_errno(EBADF)); 360 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 361 error = EBADF; 362 goto out; 363 } 364 365 rwflag = 0; 366 vp = fp->f_vnode; 367 368 if (vp->v_type == VREG) { 369 370 if (bcount == 0) 371 goto out; 372 373 /* 374 * Return EINVAL if an invalid offset comes to pread. 375 * Negative offset from user will cause this error. 376 */ 377 378 if (fileoff > maxoff) { 379 error = EINVAL; 380 goto out; 381 } 382 /* 383 * Limit offset such that we don't read or write 384 * a file beyond the maximum offset representable in 385 * an off_t structure. 386 */ 387 if (fileoff + bcount > maxoff) 388 bcount = (ssize_t)((offset_t)maxoff - fileoff); 389 } else if (vp->v_type == VFIFO) { 390 error = ESPIPE; 391 goto out; 392 } 393 394 /* 395 * We have to enter the critical region before calling VOP_RWLOCK 396 * to avoid a deadlock with ufs. 397 */ 398 if (nbl_need_check(vp)) { 399 int svmand; 400 401 nbl_start_crit(vp, RW_READER); 402 in_crit = 1; 403 error = nbl_svmand(vp, fp->f_cred, &svmand); 404 if (error != 0) 405 goto out; 406 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { 407 error = EACCES; 408 goto out; 409 } 410 } 411 412 aiov.iov_base = cbuf; 413 aiov.iov_len = bcount; 414 (void) VOP_RWLOCK(vp, rwflag, NULL); 415 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 416 struct vattr va; 417 va.va_mask = AT_SIZE; 418 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 419 VOP_RWUNLOCK(vp, rwflag, NULL); 420 goto out; 421 } 422 VOP_RWUNLOCK(vp, rwflag, NULL); 423 424 /* 425 * We have to return EOF if fileoff is >= file size. 426 */ 427 if (fileoff >= va.va_size) { 428 bcount = 0; 429 goto out; 430 } 431 432 /* 433 * File is greater than or equal to maxoff and therefore 434 * we return EOVERFLOW. 435 */ 436 error = EOVERFLOW; 437 goto out; 438 } 439 auio.uio_loffset = fileoff; 440 auio.uio_iov = &aiov; 441 auio.uio_iovcnt = 1; 442 auio.uio_resid = bcount; 443 auio.uio_segflg = UIO_USERSPACE; 444 auio.uio_llimit = MAXOFFSET_T; 445 auio.uio_fmode = fflag; 446 auio.uio_extflg = UIO_COPY_CACHED; 447 448 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 449 450 /* If read sync is not asked for, filter sync flags */ 451 if ((ioflag & FRSYNC) == 0) 452 ioflag &= ~(FSYNC|FDSYNC); 453 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 454 bcount -= auio.uio_resid; 455 CPU_STATS_ENTER_K(); 456 cp = CPU; 457 CPU_STATS_ADDQ(cp, sys, sysread, 1); 458 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 459 CPU_STATS_EXIT_K(); 460 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 461 VOP_RWUNLOCK(vp, rwflag, NULL); 462 463 if (error == EINTR && bcount != 0) 464 error = 0; 465 out: 466 if (in_crit) 467 nbl_end_crit(vp); 468 releasef(fdes); 469 if (error) 470 return (set_errno(error)); 471 return (bcount); 472 } 473 474 ssize_t 475 pwrite(int fdes, void *cbuf, size_t count, off_t offset) 476 { 477 struct uio auio; 478 struct iovec aiov; 479 file_t *fp; 480 register vnode_t *vp; 481 struct cpu *cp; 482 int fflag, ioflag, rwflag; 483 ssize_t bcount; 484 int error = 0; 485 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 486 #ifdef _SYSCALL32_IMPL 487 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 488 MAXOFF32_T : MAXOFFSET_T; 489 #else 490 const u_offset_t maxoff = MAXOFF32_T; 491 #endif 492 int in_crit = 0; 493 494 if ((bcount = (ssize_t)count) < 0) 495 return (set_errno(EINVAL)); 496 if ((fp = getf(fdes)) == NULL) 497 return (set_errno(EBADF)); 498 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 499 error = EBADF; 500 goto out; 501 } 502 503 rwflag = 1; 504 vp = fp->f_vnode; 505 506 if (vp->v_type == VREG) { 507 508 if (bcount == 0) 509 goto out; 510 511 /* 512 * return EINVAL for offsets that cannot be 513 * represented in an off_t. 514 */ 515 if (fileoff > maxoff) { 516 error = EINVAL; 517 goto out; 518 } 519 /* 520 * Take appropriate action if we are trying to write above the 521 * resource limit. 522 */ 523 if (fileoff >= curproc->p_fsz_ctl) { 524 mutex_enter(&curproc->p_lock); 525 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 526 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 527 mutex_exit(&curproc->p_lock); 528 529 error = EFBIG; 530 goto out; 531 } 532 /* 533 * Don't allow pwrite to cause file sizes to exceed 534 * maxoff. 535 */ 536 if (fileoff == maxoff) { 537 error = EFBIG; 538 goto out; 539 } 540 if (fileoff + count > maxoff) 541 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 542 } else if (vp->v_type == VFIFO) { 543 error = ESPIPE; 544 goto out; 545 } 546 547 /* 548 * We have to enter the critical region before calling VOP_RWLOCK 549 * to avoid a deadlock with ufs. 550 */ 551 if (nbl_need_check(vp)) { 552 int svmand; 553 554 nbl_start_crit(vp, RW_READER); 555 in_crit = 1; 556 error = nbl_svmand(vp, fp->f_cred, &svmand); 557 if (error != 0) 558 goto out; 559 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { 560 error = EACCES; 561 goto out; 562 } 563 } 564 565 aiov.iov_base = cbuf; 566 aiov.iov_len = bcount; 567 (void) VOP_RWLOCK(vp, rwflag, NULL); 568 auio.uio_loffset = fileoff; 569 auio.uio_iov = &aiov; 570 auio.uio_iovcnt = 1; 571 auio.uio_resid = bcount; 572 auio.uio_segflg = UIO_USERSPACE; 573 auio.uio_llimit = curproc->p_fsz_ctl; 574 auio.uio_fmode = fflag; 575 auio.uio_extflg = UIO_COPY_CACHED; 576 577 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 578 579 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 580 bcount -= auio.uio_resid; 581 CPU_STATS_ENTER_K(); 582 cp = CPU; 583 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 584 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 585 CPU_STATS_EXIT_K(); 586 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 587 VOP_RWUNLOCK(vp, rwflag, NULL); 588 589 if (error == EINTR && bcount != 0) 590 error = 0; 591 out: 592 if (in_crit) 593 nbl_end_crit(vp); 594 releasef(fdes); 595 if (error) 596 return (set_errno(error)); 597 return (bcount); 598 } 599 600 /* 601 * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... 602 * XXX -- However, SVVS expects readv() and writev() to fail if 603 * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), 604 * XXX -- so I guess that's the "interface". 605 */ 606 #define DEF_IOV_MAX 16 607 608 ssize_t 609 readv(int fdes, struct iovec *iovp, int iovcnt) 610 { 611 struct uio auio; 612 struct iovec aiov[DEF_IOV_MAX]; 613 file_t *fp; 614 register vnode_t *vp; 615 struct cpu *cp; 616 int fflag, ioflag, rwflag; 617 ssize_t count, bcount; 618 int error = 0; 619 int i; 620 u_offset_t fileoff; 621 int in_crit = 0; 622 623 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 624 return (set_errno(EINVAL)); 625 626 #ifdef _SYSCALL32_IMPL 627 /* 628 * 32-bit callers need to have their iovec expanded, 629 * while ensuring that they can't move more than 2Gbytes 630 * of data in a single call. 631 */ 632 if (get_udatamodel() == DATAMODEL_ILP32) { 633 struct iovec32 aiov32[DEF_IOV_MAX]; 634 ssize32_t count32; 635 636 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 637 return (set_errno(EFAULT)); 638 639 count32 = 0; 640 for (i = 0; i < iovcnt; i++) { 641 ssize32_t iovlen32 = aiov32[i].iov_len; 642 count32 += iovlen32; 643 if (iovlen32 < 0 || count32 < 0) 644 return (set_errno(EINVAL)); 645 aiov[i].iov_len = iovlen32; 646 aiov[i].iov_base = 647 (caddr_t)(uintptr_t)aiov32[i].iov_base; 648 } 649 } else 650 #endif 651 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 652 return (set_errno(EFAULT)); 653 654 count = 0; 655 for (i = 0; i < iovcnt; i++) { 656 ssize_t iovlen = aiov[i].iov_len; 657 count += iovlen; 658 if (iovlen < 0 || count < 0) 659 return (set_errno(EINVAL)); 660 } 661 if ((fp = getf(fdes)) == NULL) 662 return (set_errno(EBADF)); 663 if (((fflag = fp->f_flag) & FREAD) == 0) { 664 error = EBADF; 665 goto out; 666 } 667 vp = fp->f_vnode; 668 if (vp->v_type == VREG && count == 0) { 669 goto out; 670 } 671 672 rwflag = 0; 673 674 /* 675 * We have to enter the critical region before calling VOP_RWLOCK 676 * to avoid a deadlock with ufs. 677 */ 678 if (nbl_need_check(vp)) { 679 int svmand; 680 681 nbl_start_crit(vp, RW_READER); 682 in_crit = 1; 683 error = nbl_svmand(vp, fp->f_cred, &svmand); 684 if (error != 0) 685 goto out; 686 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand)) { 687 error = EACCES; 688 goto out; 689 } 690 } 691 692 (void) VOP_RWLOCK(vp, rwflag, NULL); 693 fileoff = fp->f_offset; 694 695 /* 696 * Behaviour is same as read. Please see comments in read. 697 */ 698 699 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 700 struct vattr va; 701 va.va_mask = AT_SIZE; 702 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { 703 VOP_RWUNLOCK(vp, rwflag, NULL); 704 goto out; 705 } 706 if (fileoff >= va.va_size) { 707 VOP_RWUNLOCK(vp, rwflag, NULL); 708 count = 0; 709 goto out; 710 } else { 711 VOP_RWUNLOCK(vp, rwflag, NULL); 712 error = EOVERFLOW; 713 goto out; 714 } 715 } 716 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 717 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 718 } 719 auio.uio_loffset = fileoff; 720 auio.uio_iov = aiov; 721 auio.uio_iovcnt = iovcnt; 722 auio.uio_resid = bcount = count; 723 auio.uio_segflg = UIO_USERSPACE; 724 auio.uio_llimit = MAXOFFSET_T; 725 auio.uio_fmode = fflag; 726 if (bcount < copyout_min_size) 727 auio.uio_extflg = UIO_COPY_CACHED; 728 else 729 auio.uio_extflg = UIO_COPY_DEFAULT; 730 731 732 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 733 734 /* If read sync is not asked for, filter sync flags */ 735 if ((ioflag & FRSYNC) == 0) 736 ioflag &= ~(FSYNC|FDSYNC); 737 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 738 count -= auio.uio_resid; 739 CPU_STATS_ENTER_K(); 740 cp = CPU; 741 CPU_STATS_ADDQ(cp, sys, sysread, 1); 742 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 743 CPU_STATS_EXIT_K(); 744 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 745 746 if (vp->v_type == VFIFO) /* Backward compatibility */ 747 fp->f_offset = count; 748 else if (((fp->f_flag & FAPPEND) == 0) || 749 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 750 fp->f_offset = auio.uio_loffset; 751 752 VOP_RWUNLOCK(vp, rwflag, NULL); 753 754 if (error == EINTR && count != 0) 755 error = 0; 756 out: 757 if (in_crit) 758 nbl_end_crit(vp); 759 releasef(fdes); 760 if (error) 761 return (set_errno(error)); 762 return (count); 763 } 764 765 ssize_t 766 writev(int fdes, struct iovec *iovp, int iovcnt) 767 { 768 struct uio auio; 769 struct iovec aiov[DEF_IOV_MAX]; 770 file_t *fp; 771 register vnode_t *vp; 772 struct cpu *cp; 773 int fflag, ioflag, rwflag; 774 ssize_t count, bcount; 775 int error = 0; 776 int i; 777 u_offset_t fileoff; 778 int in_crit = 0; 779 780 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) 781 return (set_errno(EINVAL)); 782 783 #ifdef _SYSCALL32_IMPL 784 /* 785 * 32-bit callers need to have their iovec expanded, 786 * while ensuring that they can't move more than 2Gbytes 787 * of data in a single call. 788 */ 789 if (get_udatamodel() == DATAMODEL_ILP32) { 790 struct iovec32 aiov32[DEF_IOV_MAX]; 791 ssize32_t count32; 792 793 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) 794 return (set_errno(EFAULT)); 795 796 count32 = 0; 797 for (i = 0; i < iovcnt; i++) { 798 ssize32_t iovlen = aiov32[i].iov_len; 799 count32 += iovlen; 800 if (iovlen < 0 || count32 < 0) 801 return (set_errno(EINVAL)); 802 aiov[i].iov_len = iovlen; 803 aiov[i].iov_base = 804 (caddr_t)(uintptr_t)aiov32[i].iov_base; 805 } 806 } else 807 #endif 808 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) 809 return (set_errno(EFAULT)); 810 811 count = 0; 812 for (i = 0; i < iovcnt; i++) { 813 ssize_t iovlen = aiov[i].iov_len; 814 count += iovlen; 815 if (iovlen < 0 || count < 0) 816 return (set_errno(EINVAL)); 817 } 818 if ((fp = getf(fdes)) == NULL) 819 return (set_errno(EBADF)); 820 if (((fflag = fp->f_flag) & FWRITE) == 0) { 821 error = EBADF; 822 goto out; 823 } 824 vp = fp->f_vnode; 825 if (vp->v_type == VREG && count == 0) { 826 goto out; 827 } 828 829 rwflag = 1; 830 831 /* 832 * We have to enter the critical region before calling VOP_RWLOCK 833 * to avoid a deadlock with ufs. 834 */ 835 if (nbl_need_check(vp)) { 836 int svmand; 837 838 nbl_start_crit(vp, RW_READER); 839 in_crit = 1; 840 error = nbl_svmand(vp, fp->f_cred, &svmand); 841 if (error != 0) 842 goto out; 843 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand)) { 844 error = EACCES; 845 goto out; 846 } 847 } 848 849 (void) VOP_RWLOCK(vp, rwflag, NULL); 850 851 fileoff = fp->f_offset; 852 853 /* 854 * Behaviour is same as write. Please see comments for write. 855 */ 856 857 if (vp->v_type == VREG) { 858 if (fileoff >= curproc->p_fsz_ctl) { 859 VOP_RWUNLOCK(vp, rwflag, NULL); 860 mutex_enter(&curproc->p_lock); 861 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 862 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 863 mutex_exit(&curproc->p_lock); 864 error = EFBIG; 865 goto out; 866 } 867 if (fileoff >= OFFSET_MAX(fp)) { 868 VOP_RWUNLOCK(vp, rwflag, NULL); 869 error = EFBIG; 870 goto out; 871 } 872 if (fileoff + count > OFFSET_MAX(fp)) 873 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 874 } 875 auio.uio_loffset = fileoff; 876 auio.uio_iov = aiov; 877 auio.uio_iovcnt = iovcnt; 878 auio.uio_resid = bcount = count; 879 auio.uio_segflg = UIO_USERSPACE; 880 auio.uio_llimit = curproc->p_fsz_ctl; 881 auio.uio_fmode = fflag; 882 auio.uio_extflg = UIO_COPY_DEFAULT; 883 884 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 885 886 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 887 count -= auio.uio_resid; 888 CPU_STATS_ENTER_K(); 889 cp = CPU; 890 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 891 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 892 CPU_STATS_EXIT_K(); 893 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 894 895 if (vp->v_type == VFIFO) /* Backward compatibility */ 896 fp->f_offset = count; 897 else if (((fp->f_flag & FAPPEND) == 0) || 898 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 899 fp->f_offset = auio.uio_loffset; 900 VOP_RWUNLOCK(vp, rwflag, NULL); 901 902 if (error == EINTR && count != 0) 903 error = 0; 904 out: 905 if (in_crit) 906 nbl_end_crit(vp); 907 releasef(fdes); 908 if (error) 909 return (set_errno(error)); 910 return (count); 911 } 912 913 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 914 915 /* 916 * This syscall supplies 64-bit file offsets to 32-bit applications only. 917 */ 918 ssize32_t 919 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 920 uint32_t offset_2) 921 { 922 struct uio auio; 923 struct iovec aiov; 924 file_t *fp; 925 register vnode_t *vp; 926 struct cpu *cp; 927 int fflag, ioflag, rwflag; 928 ssize_t bcount; 929 int error = 0; 930 u_offset_t fileoff; 931 int in_crit = 0; 932 933 #if defined(_LITTLE_ENDIAN) 934 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 935 #else 936 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 937 #endif 938 939 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 940 return (set_errno(EINVAL)); 941 942 if ((fp = getf(fdes)) == NULL) 943 return (set_errno(EBADF)); 944 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 945 error = EBADF; 946 goto out; 947 } 948 949 rwflag = 0; 950 vp = fp->f_vnode; 951 952 if (vp->v_type == VREG) { 953 954 if (bcount == 0) 955 goto out; 956 957 /* 958 * Same as pread. See comments in pread. 959 */ 960 961 if (fileoff > MAXOFFSET_T) { 962 error = EINVAL; 963 goto out; 964 } 965 if (fileoff + bcount > MAXOFFSET_T) 966 bcount = (ssize_t)(MAXOFFSET_T - fileoff); 967 } else if (vp->v_type == VFIFO) { 968 error = ESPIPE; 969 goto out; 970 } 971 972 /* 973 * We have to enter the critical region before calling VOP_RWLOCK 974 * to avoid a deadlock with ufs. 975 */ 976 if (nbl_need_check(vp)) { 977 int svmand; 978 979 nbl_start_crit(vp, RW_READER); 980 in_crit = 1; 981 error = nbl_svmand(vp, fp->f_cred, &svmand); 982 if (error != 0) 983 goto out; 984 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { 985 error = EACCES; 986 goto out; 987 } 988 } 989 990 aiov.iov_base = cbuf; 991 aiov.iov_len = bcount; 992 (void) VOP_RWLOCK(vp, rwflag, NULL); 993 auio.uio_loffset = fileoff; 994 995 /* 996 * Note: File size can never be greater than MAXOFFSET_T. 997 * If ever we start supporting 128 bit files the code 998 * similar to the one in pread at this place should be here. 999 * Here we avoid the unnecessary VOP_GETATTR() when we 1000 * know that fileoff == MAXOFFSET_T implies that it is always 1001 * greater than or equal to file size. 1002 */ 1003 auio.uio_iov = &aiov; 1004 auio.uio_iovcnt = 1; 1005 auio.uio_resid = bcount; 1006 auio.uio_segflg = UIO_USERSPACE; 1007 auio.uio_llimit = MAXOFFSET_T; 1008 auio.uio_fmode = fflag; 1009 auio.uio_extflg = UIO_COPY_CACHED; 1010 1011 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1012 1013 /* If read sync is not asked for, filter sync flags */ 1014 if ((ioflag & FRSYNC) == 0) 1015 ioflag &= ~(FSYNC|FDSYNC); 1016 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1017 bcount -= auio.uio_resid; 1018 CPU_STATS_ENTER_K(); 1019 cp = CPU; 1020 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1021 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1022 CPU_STATS_EXIT_K(); 1023 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1024 VOP_RWUNLOCK(vp, rwflag, NULL); 1025 1026 if (error == EINTR && bcount != 0) 1027 error = 0; 1028 out: 1029 if (in_crit) 1030 nbl_end_crit(vp); 1031 releasef(fdes); 1032 if (error) 1033 return (set_errno(error)); 1034 return (bcount); 1035 } 1036 1037 /* 1038 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1039 */ 1040 ssize32_t 1041 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1042 uint32_t offset_2) 1043 { 1044 struct uio auio; 1045 struct iovec aiov; 1046 file_t *fp; 1047 register vnode_t *vp; 1048 struct cpu *cp; 1049 int fflag, ioflag, rwflag; 1050 ssize_t bcount; 1051 int error = 0; 1052 u_offset_t fileoff; 1053 int in_crit = 0; 1054 1055 #if defined(_LITTLE_ENDIAN) 1056 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1057 #else 1058 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1059 #endif 1060 1061 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1062 return (set_errno(EINVAL)); 1063 if ((fp = getf(fdes)) == NULL) 1064 return (set_errno(EBADF)); 1065 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1066 error = EBADF; 1067 goto out; 1068 } 1069 1070 rwflag = 1; 1071 vp = fp->f_vnode; 1072 1073 if (vp->v_type == VREG) { 1074 1075 if (bcount == 0) 1076 goto out; 1077 1078 /* 1079 * See comments in pwrite. 1080 */ 1081 if (fileoff > MAXOFFSET_T) { 1082 error = EINVAL; 1083 goto out; 1084 } 1085 if (fileoff >= curproc->p_fsz_ctl) { 1086 mutex_enter(&curproc->p_lock); 1087 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1088 curproc->p_rctls, curproc, RCA_SAFE); 1089 mutex_exit(&curproc->p_lock); 1090 error = EFBIG; 1091 goto out; 1092 } 1093 if (fileoff == MAXOFFSET_T) { 1094 error = EFBIG; 1095 goto out; 1096 } 1097 if (fileoff + bcount > MAXOFFSET_T) 1098 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1099 } else if (vp->v_type == VFIFO) { 1100 error = ESPIPE; 1101 goto out; 1102 } 1103 1104 /* 1105 * We have to enter the critical region before calling VOP_RWLOCK 1106 * to avoid a deadlock with ufs. 1107 */ 1108 if (nbl_need_check(vp)) { 1109 int svmand; 1110 1111 nbl_start_crit(vp, RW_READER); 1112 in_crit = 1; 1113 error = nbl_svmand(vp, fp->f_cred, &svmand); 1114 if (error != 0) 1115 goto out; 1116 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { 1117 error = EACCES; 1118 goto out; 1119 } 1120 } 1121 1122 aiov.iov_base = cbuf; 1123 aiov.iov_len = bcount; 1124 (void) VOP_RWLOCK(vp, rwflag, NULL); 1125 auio.uio_loffset = fileoff; 1126 auio.uio_iov = &aiov; 1127 auio.uio_iovcnt = 1; 1128 auio.uio_resid = bcount; 1129 auio.uio_segflg = UIO_USERSPACE; 1130 auio.uio_llimit = curproc->p_fsz_ctl; 1131 auio.uio_fmode = fflag; 1132 auio.uio_extflg = UIO_COPY_CACHED; 1133 1134 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1135 1136 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1137 bcount -= auio.uio_resid; 1138 CPU_STATS_ENTER_K(); 1139 cp = CPU; 1140 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1141 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1142 CPU_STATS_EXIT_K(); 1143 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1144 VOP_RWUNLOCK(vp, rwflag, NULL); 1145 1146 if (error == EINTR && bcount != 0) 1147 error = 0; 1148 out: 1149 if (in_crit) 1150 nbl_end_crit(vp); 1151 releasef(fdes); 1152 if (error) 1153 return (set_errno(error)); 1154 return (bcount); 1155 } 1156 1157 #endif /* _SYSCALL32_IMPL || _ILP32 */ 1158 1159 #ifdef _SYSCALL32_IMPL 1160 /* 1161 * Tail-call elimination of xxx32() down to xxx() 1162 * 1163 * A number of xxx32 system calls take a len (or count) argument and 1164 * return a number in the range [0,len] or -1 on error. 1165 * Given an ssize32_t input len, the downcall xxx() will return 1166 * a 64-bit value that is -1 or in the range [0,len] which actually 1167 * is a proper return value for the xxx32 call. So even if the xxx32 1168 * calls can be considered as returning a ssize32_t, they are currently 1169 * declared as returning a ssize_t as this enables tail-call elimination. 1170 * 1171 * The cast of len (or count) to ssize32_t is needed to ensure we pass 1172 * down negative input values as such and let the downcall handle error 1173 * reporting. Functions covered by this comments are: 1174 * 1175 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1176 * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1177 * readlink.c: readlink32. 1178 */ 1179 1180 ssize_t 1181 read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1182 { 1183 return (read(fdes, 1184 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1185 } 1186 1187 ssize_t 1188 write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1189 { 1190 return (write(fdes, 1191 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1192 } 1193 1194 ssize_t 1195 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1196 { 1197 return (pread(fdes, 1198 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1199 (off_t)(uint32_t)offset)); 1200 } 1201 1202 ssize_t 1203 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1204 { 1205 return (pwrite(fdes, 1206 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1207 (off_t)(uint32_t)offset)); 1208 } 1209 1210 ssize_t 1211 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1212 { 1213 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1214 } 1215 1216 ssize_t 1217 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1218 { 1219 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1220 } 1221 1222 #endif /* _SYSCALL32_IMPL */ 1223