1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 /*
32 * Portions of this source code were derived from Berkeley 4.3 BSD
33 * under license from the Regents of the University of California.
34 */
35
36 #include <sys/param.h>
37 #include <sys/isa_defs.h>
38 #include <sys/types.h>
39 #include <sys/inttypes.h>
40 #include <sys/sysmacros.h>
41 #include <sys/cred.h>
42 #include <sys/user.h>
43 #include <sys/systm.h>
44 #include <sys/errno.h>
45 #include <sys/vnode.h>
46 #include <sys/file.h>
47 #include <sys/proc.h>
48 #include <sys/cpuvar.h>
49 #include <sys/uio.h>
50 #include <sys/debug.h>
51 #include <sys/rctl.h>
52 #include <sys/nbmlock.h>
53
54 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */
55
56 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */
57
58 /*
59 * read, write, pread, pwrite, readv, and writev syscalls.
60 *
61 * 64-bit open: all open's are large file opens.
62 * Large Files: the behaviour of read depends on whether the fd
63 * corresponds to large open or not.
64 * 32-bit open: FOFFMAX flag not set.
65 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
66 * EOVERFLOW if count is non-zero and if size of file
67 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
68 * at >= MAXOFF32_T returns EOF.
69 */
70
71 /*
72 * Native system call
73 */
74 ssize_t
read(int fdes,void * cbuf,size_t count)75 read(int fdes, void *cbuf, size_t count)
76 {
77 struct uio auio;
78 struct iovec aiov;
79 file_t *fp;
80 register vnode_t *vp;
81 struct cpu *cp;
82 int fflag, ioflag, rwflag;
83 ssize_t cnt, bcount;
84 int error = 0;
85 u_offset_t fileoff;
86 int in_crit = 0;
87
88 if ((cnt = (ssize_t)count) < 0)
89 return (set_errno(EINVAL));
90 if ((fp = getf(fdes)) == NULL)
91 return (set_errno(EBADF));
92 if (((fflag = fp->f_flag) & FREAD) == 0) {
93 error = EBADF;
94 goto out;
95 }
96 vp = fp->f_vnode;
97
98 if (vp->v_type == VREG && cnt == 0) {
99 goto out;
100 }
101
102 rwflag = 0;
103 aiov.iov_base = cbuf;
104 aiov.iov_len = cnt;
105
106 /*
107 * We have to enter the critical region before calling VOP_RWLOCK
108 * to avoid a deadlock with write() calls.
109 */
110 if (nbl_need_check(vp)) {
111 int svmand;
112
113 nbl_start_crit(vp, RW_READER);
114 in_crit = 1;
115 error = nbl_svmand(vp, fp->f_cred, &svmand);
116 if (error != 0)
117 goto out;
118 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand,
119 NULL)) {
120 error = EACCES;
121 goto out;
122 }
123 }
124
125 (void) VOP_RWLOCK(vp, rwflag, NULL);
126
127 /*
128 * We do the following checks inside VOP_RWLOCK so as to
129 * prevent file size from changing while these checks are
130 * being done. Also, we load fp's offset to the local
131 * variable fileoff because we can have a parallel lseek
132 * going on (f_offset is not protected by any lock) which
133 * could change f_offset. We need to see the value only
134 * once here and take a decision. Seeing it more than once
135 * can lead to incorrect functionality.
136 */
137
138 fileoff = (u_offset_t)fp->f_offset;
139 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
140 struct vattr va;
141 va.va_mask = AT_SIZE;
142 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
143 VOP_RWUNLOCK(vp, rwflag, NULL);
144 goto out;
145 }
146 if (fileoff >= va.va_size) {
147 cnt = 0;
148 VOP_RWUNLOCK(vp, rwflag, NULL);
149 goto out;
150 } else {
151 error = EOVERFLOW;
152 VOP_RWUNLOCK(vp, rwflag, NULL);
153 goto out;
154 }
155 }
156 if ((vp->v_type == VREG) &&
157 (fileoff + cnt > OFFSET_MAX(fp))) {
158 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
159 }
160 auio.uio_loffset = fileoff;
161 auio.uio_iov = &aiov;
162 auio.uio_iovcnt = 1;
163 auio.uio_resid = bcount = cnt;
164 auio.uio_segflg = UIO_USERSPACE;
165 auio.uio_llimit = MAXOFFSET_T;
166 auio.uio_fmode = fflag;
167 /*
168 * Only use bypass caches when the count is large enough
169 */
170 if (bcount <= copyout_max_cached)
171 auio.uio_extflg = UIO_COPY_CACHED;
172 else
173 auio.uio_extflg = UIO_COPY_DEFAULT;
174
175 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
176
177 /* If read sync is not asked for, filter sync flags */
178 if ((ioflag & FRSYNC) == 0)
179 ioflag &= ~(FSYNC|FDSYNC);
180 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
181 cnt -= auio.uio_resid;
182 CPU_STATS_ENTER_K();
183 cp = CPU;
184 CPU_STATS_ADDQ(cp, sys, sysread, 1);
185 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
186 CPU_STATS_EXIT_K();
187 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
188
189 if (vp->v_type == VFIFO) /* Backward compatibility */
190 fp->f_offset = cnt;
191 else if (((fp->f_flag & FAPPEND) == 0) ||
192 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
193 fp->f_offset = auio.uio_loffset;
194 VOP_RWUNLOCK(vp, rwflag, NULL);
195
196 if (error == EINTR && cnt != 0)
197 error = 0;
198 out:
199 if (in_crit)
200 nbl_end_crit(vp);
201 releasef(fdes);
202 if (error)
203 return (set_errno(error));
204 return (cnt);
205 }
206
207 /*
208 * Native system call
209 */
210 ssize_t
write(int fdes,void * cbuf,size_t count)211 write(int fdes, void *cbuf, size_t count)
212 {
213 struct uio auio;
214 struct iovec aiov;
215 file_t *fp;
216 register vnode_t *vp;
217 struct cpu *cp;
218 int fflag, ioflag, rwflag;
219 ssize_t cnt, bcount;
220 int error = 0;
221 u_offset_t fileoff;
222 int in_crit = 0;
223
224 if ((cnt = (ssize_t)count) < 0)
225 return (set_errno(EINVAL));
226 if ((fp = getf(fdes)) == NULL)
227 return (set_errno(EBADF));
228 if (((fflag = fp->f_flag) & FWRITE) == 0) {
229 error = EBADF;
230 goto out;
231 }
232 vp = fp->f_vnode;
233
234 if (vp->v_type == VREG && cnt == 0) {
235 goto out;
236 }
237
238 rwflag = 1;
239 aiov.iov_base = cbuf;
240 aiov.iov_len = cnt;
241
242 /*
243 * We have to enter the critical region before calling VOP_RWLOCK
244 * to avoid a deadlock with ufs.
245 */
246 if (nbl_need_check(vp)) {
247 int svmand;
248
249 nbl_start_crit(vp, RW_READER);
250 in_crit = 1;
251 error = nbl_svmand(vp, fp->f_cred, &svmand);
252 if (error != 0)
253 goto out;
254 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand,
255 NULL)) {
256 error = EACCES;
257 goto out;
258 }
259 }
260
261 (void) VOP_RWLOCK(vp, rwflag, NULL);
262
263 fileoff = fp->f_offset;
264 if (vp->v_type == VREG) {
265
266 /*
267 * We raise psignal if write for >0 bytes causes
268 * it to exceed the ulimit.
269 */
270 if (fileoff >= curproc->p_fsz_ctl) {
271 VOP_RWUNLOCK(vp, rwflag, NULL);
272
273 mutex_enter(&curproc->p_lock);
274 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
275 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
276 mutex_exit(&curproc->p_lock);
277
278 error = EFBIG;
279 goto out;
280 }
281 /*
282 * We return EFBIG if write is done at an offset
283 * greater than the offset maximum for this file structure.
284 */
285
286 if (fileoff >= OFFSET_MAX(fp)) {
287 VOP_RWUNLOCK(vp, rwflag, NULL);
288 error = EFBIG;
289 goto out;
290 }
291 /*
292 * Limit the bytes to be written upto offset maximum for
293 * this open file structure.
294 */
295 if (fileoff + cnt > OFFSET_MAX(fp))
296 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
297 }
298 auio.uio_loffset = fileoff;
299 auio.uio_iov = &aiov;
300 auio.uio_iovcnt = 1;
301 auio.uio_resid = bcount = cnt;
302 auio.uio_segflg = UIO_USERSPACE;
303 auio.uio_llimit = curproc->p_fsz_ctl;
304 auio.uio_fmode = fflag;
305 auio.uio_extflg = UIO_COPY_DEFAULT;
306
307 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
308
309 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
310 cnt -= auio.uio_resid;
311 CPU_STATS_ENTER_K();
312 cp = CPU;
313 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
314 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
315 CPU_STATS_EXIT_K();
316 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
317
318 if (vp->v_type == VFIFO) /* Backward compatibility */
319 fp->f_offset = cnt;
320 else if (((fp->f_flag & FAPPEND) == 0) ||
321 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
322 fp->f_offset = auio.uio_loffset;
323 VOP_RWUNLOCK(vp, rwflag, NULL);
324
325 if (error == EINTR && cnt != 0)
326 error = 0;
327 out:
328 if (in_crit)
329 nbl_end_crit(vp);
330 releasef(fdes);
331 if (error)
332 return (set_errno(error));
333 return (cnt);
334 }
335
336 ssize_t
pread(int fdes,void * cbuf,size_t count,off_t offset)337 pread(int fdes, void *cbuf, size_t count, off_t offset)
338 {
339 struct uio auio;
340 struct iovec aiov;
341 file_t *fp;
342 register vnode_t *vp;
343 struct cpu *cp;
344 int fflag, ioflag, rwflag;
345 ssize_t bcount;
346 int error = 0;
347 u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
348 #ifdef _SYSCALL32_IMPL
349 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
350 MAXOFF32_T : MAXOFFSET_T;
351 #else
352 const u_offset_t maxoff = MAXOFF32_T;
353 #endif
354 int in_crit = 0;
355
356 if ((bcount = (ssize_t)count) < 0)
357 return (set_errno(EINVAL));
358
359 if ((fp = getf(fdes)) == NULL)
360 return (set_errno(EBADF));
361 if (((fflag = fp->f_flag) & (FREAD)) == 0) {
362 error = EBADF;
363 goto out;
364 }
365
366 rwflag = 0;
367 vp = fp->f_vnode;
368
369 if (vp->v_type == VREG) {
370
371 if (bcount == 0)
372 goto out;
373
374 /*
375 * Return EINVAL if an invalid offset comes to pread.
376 * Negative offset from user will cause this error.
377 */
378
379 if (fileoff > maxoff) {
380 error = EINVAL;
381 goto out;
382 }
383 /*
384 * Limit offset such that we don't read or write
385 * a file beyond the maximum offset representable in
386 * an off_t structure.
387 */
388 if (fileoff + bcount > maxoff)
389 bcount = (ssize_t)((offset_t)maxoff - fileoff);
390 } else if (vp->v_type == VFIFO) {
391 error = ESPIPE;
392 goto out;
393 }
394
395 /*
396 * We have to enter the critical region before calling VOP_RWLOCK
397 * to avoid a deadlock with ufs.
398 */
399 if (nbl_need_check(vp)) {
400 int svmand;
401
402 nbl_start_crit(vp, RW_READER);
403 in_crit = 1;
404 error = nbl_svmand(vp, fp->f_cred, &svmand);
405 if (error != 0)
406 goto out;
407 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
408 NULL)) {
409 error = EACCES;
410 goto out;
411 }
412 }
413
414 aiov.iov_base = cbuf;
415 aiov.iov_len = bcount;
416 (void) VOP_RWLOCK(vp, rwflag, NULL);
417 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) {
418 struct vattr va;
419 va.va_mask = AT_SIZE;
420 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
421 VOP_RWUNLOCK(vp, rwflag, NULL);
422 goto out;
423 }
424 VOP_RWUNLOCK(vp, rwflag, NULL);
425
426 /*
427 * We have to return EOF if fileoff is >= file size.
428 */
429 if (fileoff >= va.va_size) {
430 bcount = 0;
431 goto out;
432 }
433
434 /*
435 * File is greater than or equal to maxoff and therefore
436 * we return EOVERFLOW.
437 */
438 error = EOVERFLOW;
439 goto out;
440 }
441 auio.uio_loffset = fileoff;
442 auio.uio_iov = &aiov;
443 auio.uio_iovcnt = 1;
444 auio.uio_resid = bcount;
445 auio.uio_segflg = UIO_USERSPACE;
446 auio.uio_llimit = MAXOFFSET_T;
447 auio.uio_fmode = fflag;
448 auio.uio_extflg = UIO_COPY_CACHED;
449
450 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
451
452 /* If read sync is not asked for, filter sync flags */
453 if ((ioflag & FRSYNC) == 0)
454 ioflag &= ~(FSYNC|FDSYNC);
455 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
456 bcount -= auio.uio_resid;
457 CPU_STATS_ENTER_K();
458 cp = CPU;
459 CPU_STATS_ADDQ(cp, sys, sysread, 1);
460 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
461 CPU_STATS_EXIT_K();
462 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
463 VOP_RWUNLOCK(vp, rwflag, NULL);
464
465 if (error == EINTR && bcount != 0)
466 error = 0;
467 out:
468 if (in_crit)
469 nbl_end_crit(vp);
470 releasef(fdes);
471 if (error)
472 return (set_errno(error));
473 return (bcount);
474 }
475
476 ssize_t
pwrite(int fdes,void * cbuf,size_t count,off_t offset)477 pwrite(int fdes, void *cbuf, size_t count, off_t offset)
478 {
479 struct uio auio;
480 struct iovec aiov;
481 file_t *fp;
482 register vnode_t *vp;
483 struct cpu *cp;
484 int fflag, ioflag, rwflag;
485 ssize_t bcount;
486 int error = 0;
487 u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
488 #ifdef _SYSCALL32_IMPL
489 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
490 MAXOFF32_T : MAXOFFSET_T;
491 #else
492 const u_offset_t maxoff = MAXOFF32_T;
493 #endif
494 int in_crit = 0;
495
496 if ((bcount = (ssize_t)count) < 0)
497 return (set_errno(EINVAL));
498 if ((fp = getf(fdes)) == NULL)
499 return (set_errno(EBADF));
500 if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
501 error = EBADF;
502 goto out;
503 }
504
505 rwflag = 1;
506 vp = fp->f_vnode;
507
508 if (vp->v_type == VREG) {
509
510 if (bcount == 0)
511 goto out;
512
513 /*
514 * return EINVAL for offsets that cannot be
515 * represented in an off_t.
516 */
517 if (fileoff > maxoff) {
518 error = EINVAL;
519 goto out;
520 }
521 /*
522 * Take appropriate action if we are trying to write above the
523 * resource limit.
524 */
525 if (fileoff >= curproc->p_fsz_ctl) {
526 mutex_enter(&curproc->p_lock);
527 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
528 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
529 mutex_exit(&curproc->p_lock);
530
531 error = EFBIG;
532 goto out;
533 }
534 /*
535 * Don't allow pwrite to cause file sizes to exceed
536 * maxoff.
537 */
538 if (fileoff == maxoff) {
539 error = EFBIG;
540 goto out;
541 }
542 if (fileoff + count > maxoff)
543 bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
544 } else if (vp->v_type == VFIFO) {
545 error = ESPIPE;
546 goto out;
547 }
548
549 /*
550 * We have to enter the critical region before calling VOP_RWLOCK
551 * to avoid a deadlock with ufs.
552 */
553 if (nbl_need_check(vp)) {
554 int svmand;
555
556 nbl_start_crit(vp, RW_READER);
557 in_crit = 1;
558 error = nbl_svmand(vp, fp->f_cred, &svmand);
559 if (error != 0)
560 goto out;
561 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
562 NULL)) {
563 error = EACCES;
564 goto out;
565 }
566 }
567
568 aiov.iov_base = cbuf;
569 aiov.iov_len = bcount;
570 (void) VOP_RWLOCK(vp, rwflag, NULL);
571 auio.uio_loffset = fileoff;
572 auio.uio_iov = &aiov;
573 auio.uio_iovcnt = 1;
574 auio.uio_resid = bcount;
575 auio.uio_segflg = UIO_USERSPACE;
576 auio.uio_llimit = curproc->p_fsz_ctl;
577 auio.uio_fmode = fflag;
578 auio.uio_extflg = UIO_COPY_CACHED;
579
580 /*
581 * The SUSv4 POSIX specification states:
582 * The pwrite() function shall be equivalent to write(), except
583 * that it writes into a given position and does not change
584 * the file offset (regardless of whether O_APPEND is set).
585 * To make this be true, we omit the FAPPEND flag from ioflag.
586 */
587 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
588
589 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
590 bcount -= auio.uio_resid;
591 CPU_STATS_ENTER_K();
592 cp = CPU;
593 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
594 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
595 CPU_STATS_EXIT_K();
596 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
597 VOP_RWUNLOCK(vp, rwflag, NULL);
598
599 if (error == EINTR && bcount != 0)
600 error = 0;
601 out:
602 if (in_crit)
603 nbl_end_crit(vp);
604 releasef(fdes);
605 if (error)
606 return (set_errno(error));
607 return (bcount);
608 }
609
610 /*
611 * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr....
612 * XXX -- However, SVVS expects readv() and writev() to fail if
613 * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source),
614 * XXX -- so I guess that's the "interface".
615 */
616 #define DEF_IOV_MAX 16
617
618 ssize_t
readv(int fdes,struct iovec * iovp,int iovcnt)619 readv(int fdes, struct iovec *iovp, int iovcnt)
620 {
621 struct uio auio;
622 struct iovec aiov[DEF_IOV_MAX];
623 file_t *fp;
624 register vnode_t *vp;
625 struct cpu *cp;
626 int fflag, ioflag, rwflag;
627 ssize_t count, bcount;
628 int error = 0;
629 int i;
630 u_offset_t fileoff;
631 int in_crit = 0;
632
633 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
634 return (set_errno(EINVAL));
635
636 #ifdef _SYSCALL32_IMPL
637 /*
638 * 32-bit callers need to have their iovec expanded,
639 * while ensuring that they can't move more than 2Gbytes
640 * of data in a single call.
641 */
642 if (get_udatamodel() == DATAMODEL_ILP32) {
643 struct iovec32 aiov32[DEF_IOV_MAX];
644 ssize32_t count32;
645
646 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
647 return (set_errno(EFAULT));
648
649 count32 = 0;
650 for (i = 0; i < iovcnt; i++) {
651 ssize32_t iovlen32 = aiov32[i].iov_len;
652 count32 += iovlen32;
653 if (iovlen32 < 0 || count32 < 0)
654 return (set_errno(EINVAL));
655 aiov[i].iov_len = iovlen32;
656 aiov[i].iov_base =
657 (caddr_t)(uintptr_t)aiov32[i].iov_base;
658 }
659 } else
660 #endif
661 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
662 return (set_errno(EFAULT));
663
664 count = 0;
665 for (i = 0; i < iovcnt; i++) {
666 ssize_t iovlen = aiov[i].iov_len;
667 count += iovlen;
668 if (iovlen < 0 || count < 0)
669 return (set_errno(EINVAL));
670 }
671 if ((fp = getf(fdes)) == NULL)
672 return (set_errno(EBADF));
673 if (((fflag = fp->f_flag) & FREAD) == 0) {
674 error = EBADF;
675 goto out;
676 }
677 vp = fp->f_vnode;
678 if (vp->v_type == VREG && count == 0) {
679 goto out;
680 }
681
682 rwflag = 0;
683
684 /*
685 * We have to enter the critical region before calling VOP_RWLOCK
686 * to avoid a deadlock with ufs.
687 */
688 if (nbl_need_check(vp)) {
689 int svmand;
690
691 nbl_start_crit(vp, RW_READER);
692 in_crit = 1;
693 error = nbl_svmand(vp, fp->f_cred, &svmand);
694 if (error != 0)
695 goto out;
696 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand,
697 NULL)) {
698 error = EACCES;
699 goto out;
700 }
701 }
702
703 (void) VOP_RWLOCK(vp, rwflag, NULL);
704 fileoff = fp->f_offset;
705
706 /*
707 * Behaviour is same as read. Please see comments in read.
708 */
709
710 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
711 struct vattr va;
712 va.va_mask = AT_SIZE;
713 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
714 VOP_RWUNLOCK(vp, rwflag, NULL);
715 goto out;
716 }
717 if (fileoff >= va.va_size) {
718 VOP_RWUNLOCK(vp, rwflag, NULL);
719 count = 0;
720 goto out;
721 } else {
722 VOP_RWUNLOCK(vp, rwflag, NULL);
723 error = EOVERFLOW;
724 goto out;
725 }
726 }
727 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
728 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
729 }
730 auio.uio_loffset = fileoff;
731 auio.uio_iov = aiov;
732 auio.uio_iovcnt = iovcnt;
733 auio.uio_resid = bcount = count;
734 auio.uio_segflg = UIO_USERSPACE;
735 auio.uio_llimit = MAXOFFSET_T;
736 auio.uio_fmode = fflag;
737 if (bcount <= copyout_max_cached)
738 auio.uio_extflg = UIO_COPY_CACHED;
739 else
740 auio.uio_extflg = UIO_COPY_DEFAULT;
741
742
743 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
744
745 /* If read sync is not asked for, filter sync flags */
746 if ((ioflag & FRSYNC) == 0)
747 ioflag &= ~(FSYNC|FDSYNC);
748 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
749 count -= auio.uio_resid;
750 CPU_STATS_ENTER_K();
751 cp = CPU;
752 CPU_STATS_ADDQ(cp, sys, sysread, 1);
753 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
754 CPU_STATS_EXIT_K();
755 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
756
757 if (vp->v_type == VFIFO) /* Backward compatibility */
758 fp->f_offset = count;
759 else if (((fp->f_flag & FAPPEND) == 0) ||
760 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
761 fp->f_offset = auio.uio_loffset;
762
763 VOP_RWUNLOCK(vp, rwflag, NULL);
764
765 if (error == EINTR && count != 0)
766 error = 0;
767 out:
768 if (in_crit)
769 nbl_end_crit(vp);
770 releasef(fdes);
771 if (error)
772 return (set_errno(error));
773 return (count);
774 }
775
776 ssize_t
writev(int fdes,struct iovec * iovp,int iovcnt)777 writev(int fdes, struct iovec *iovp, int iovcnt)
778 {
779 struct uio auio;
780 struct iovec aiov[DEF_IOV_MAX];
781 file_t *fp;
782 register vnode_t *vp;
783 struct cpu *cp;
784 int fflag, ioflag, rwflag;
785 ssize_t count, bcount;
786 int error = 0;
787 int i;
788 u_offset_t fileoff;
789 int in_crit = 0;
790
791 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
792 return (set_errno(EINVAL));
793
794 #ifdef _SYSCALL32_IMPL
795 /*
796 * 32-bit callers need to have their iovec expanded,
797 * while ensuring that they can't move more than 2Gbytes
798 * of data in a single call.
799 */
800 if (get_udatamodel() == DATAMODEL_ILP32) {
801 struct iovec32 aiov32[DEF_IOV_MAX];
802 ssize32_t count32;
803
804 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
805 return (set_errno(EFAULT));
806
807 count32 = 0;
808 for (i = 0; i < iovcnt; i++) {
809 ssize32_t iovlen = aiov32[i].iov_len;
810 count32 += iovlen;
811 if (iovlen < 0 || count32 < 0)
812 return (set_errno(EINVAL));
813 aiov[i].iov_len = iovlen;
814 aiov[i].iov_base =
815 (caddr_t)(uintptr_t)aiov32[i].iov_base;
816 }
817 } else
818 #endif
819 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
820 return (set_errno(EFAULT));
821
822 count = 0;
823 for (i = 0; i < iovcnt; i++) {
824 ssize_t iovlen = aiov[i].iov_len;
825 count += iovlen;
826 if (iovlen < 0 || count < 0)
827 return (set_errno(EINVAL));
828 }
829 if ((fp = getf(fdes)) == NULL)
830 return (set_errno(EBADF));
831 if (((fflag = fp->f_flag) & FWRITE) == 0) {
832 error = EBADF;
833 goto out;
834 }
835 vp = fp->f_vnode;
836 if (vp->v_type == VREG && count == 0) {
837 goto out;
838 }
839
840 rwflag = 1;
841
842 /*
843 * We have to enter the critical region before calling VOP_RWLOCK
844 * to avoid a deadlock with ufs.
845 */
846 if (nbl_need_check(vp)) {
847 int svmand;
848
849 nbl_start_crit(vp, RW_READER);
850 in_crit = 1;
851 error = nbl_svmand(vp, fp->f_cred, &svmand);
852 if (error != 0)
853 goto out;
854 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand,
855 NULL)) {
856 error = EACCES;
857 goto out;
858 }
859 }
860
861 (void) VOP_RWLOCK(vp, rwflag, NULL);
862
863 fileoff = fp->f_offset;
864
865 /*
866 * Behaviour is same as write. Please see comments for write.
867 */
868
869 if (vp->v_type == VREG) {
870 if (fileoff >= curproc->p_fsz_ctl) {
871 VOP_RWUNLOCK(vp, rwflag, NULL);
872 mutex_enter(&curproc->p_lock);
873 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
874 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
875 mutex_exit(&curproc->p_lock);
876 error = EFBIG;
877 goto out;
878 }
879 if (fileoff >= OFFSET_MAX(fp)) {
880 VOP_RWUNLOCK(vp, rwflag, NULL);
881 error = EFBIG;
882 goto out;
883 }
884 if (fileoff + count > OFFSET_MAX(fp))
885 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
886 }
887 auio.uio_loffset = fileoff;
888 auio.uio_iov = aiov;
889 auio.uio_iovcnt = iovcnt;
890 auio.uio_resid = bcount = count;
891 auio.uio_segflg = UIO_USERSPACE;
892 auio.uio_llimit = curproc->p_fsz_ctl;
893 auio.uio_fmode = fflag;
894 auio.uio_extflg = UIO_COPY_DEFAULT;
895
896 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
897
898 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
899 count -= auio.uio_resid;
900 CPU_STATS_ENTER_K();
901 cp = CPU;
902 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
903 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
904 CPU_STATS_EXIT_K();
905 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
906
907 if (vp->v_type == VFIFO) /* Backward compatibility */
908 fp->f_offset = count;
909 else if (((fp->f_flag & FAPPEND) == 0) ||
910 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
911 fp->f_offset = auio.uio_loffset;
912 VOP_RWUNLOCK(vp, rwflag, NULL);
913
914 if (error == EINTR && count != 0)
915 error = 0;
916 out:
917 if (in_crit)
918 nbl_end_crit(vp);
919 releasef(fdes);
920 if (error)
921 return (set_errno(error));
922 return (count);
923 }
924
925 ssize_t
preadv(int fdes,struct iovec * iovp,int iovcnt,off_t offset,off_t extended_offset)926 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
927 off_t extended_offset)
928 {
929 struct uio auio;
930 struct iovec aiov[DEF_IOV_MAX];
931 file_t *fp;
932 register vnode_t *vp;
933 struct cpu *cp;
934 int fflag, ioflag, rwflag;
935 ssize_t count, bcount;
936 int error = 0;
937 int i;
938
939 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
940 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
941 (u_offset_t)offset;
942 #else /* _SYSCALL32_IMPL || _ILP32 */
943 u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
944 #endif /* _SYSCALL32_IMPR || _ILP32 */
945 #ifdef _SYSCALL32_IMPL
946 const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
947 extended_offset == 0?
948 MAXOFF32_T : MAXOFFSET_T;
949 #else /* _SYSCALL32_IMPL */
950 const u_offset_t maxoff = MAXOFF32_T;
951 #endif /* _SYSCALL32_IMPL */
952
953 int in_crit = 0;
954
955 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
956 return (set_errno(EINVAL));
957
958 #ifdef _SYSCALL32_IMPL
959 /*
960 * 32-bit callers need to have their iovec expanded,
961 * while ensuring that they can't move more than 2Gbytes
962 * of data in a single call.
963 */
964 if (get_udatamodel() == DATAMODEL_ILP32) {
965 struct iovec32 aiov32[DEF_IOV_MAX];
966 ssize32_t count32;
967
968 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
969 return (set_errno(EFAULT));
970
971 count32 = 0;
972 for (i = 0; i < iovcnt; i++) {
973 ssize32_t iovlen32 = aiov32[i].iov_len;
974 count32 += iovlen32;
975 if (iovlen32 < 0 || count32 < 0)
976 return (set_errno(EINVAL));
977 aiov[i].iov_len = iovlen32;
978 aiov[i].iov_base =
979 (caddr_t)(uintptr_t)aiov32[i].iov_base;
980 }
981 } else
982 #endif /* _SYSCALL32_IMPL */
983 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
984 return (set_errno(EFAULT));
985
986 count = 0;
987 for (i = 0; i < iovcnt; i++) {
988 ssize_t iovlen = aiov[i].iov_len;
989 count += iovlen;
990 if (iovlen < 0 || count < 0)
991 return (set_errno(EINVAL));
992 }
993
994 if ((bcount = (ssize_t)count) < 0)
995 return (set_errno(EINVAL));
996 if ((fp = getf(fdes)) == NULL)
997 return (set_errno(EBADF));
998 if (((fflag = fp->f_flag) & FREAD) == 0) {
999 error = EBADF;
1000 goto out;
1001 }
1002 vp = fp->f_vnode;
1003 rwflag = 0;
1004 if (vp->v_type == VREG) {
1005
1006 if (bcount == 0)
1007 goto out;
1008
1009 /*
1010 * return EINVAL for offsets that cannot be
1011 * represented in an off_t.
1012 */
1013 if (fileoff > maxoff) {
1014 error = EINVAL;
1015 goto out;
1016 }
1017
1018 if (fileoff + bcount > maxoff)
1019 bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1020 } else if (vp->v_type == VFIFO) {
1021 error = ESPIPE;
1022 goto out;
1023 }
1024 /*
1025 * We have to enter the critical region before calling VOP_RWLOCK
1026 * to avoid a deadlock with ufs.
1027 */
1028 if (nbl_need_check(vp)) {
1029 int svmand;
1030
1031 nbl_start_crit(vp, RW_READER);
1032 in_crit = 1;
1033 error = nbl_svmand(vp, fp->f_cred, &svmand);
1034 if (error != 0)
1035 goto out;
1036 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1037 NULL)) {
1038 error = EACCES;
1039 goto out;
1040 }
1041 }
1042
1043 (void) VOP_RWLOCK(vp, rwflag, NULL);
1044
1045 /*
1046 * Behaviour is same as read(2). Please see comments in
1047 * read(2).
1048 */
1049
1050 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1051 struct vattr va;
1052 va.va_mask = AT_SIZE;
1053 if ((error =
1054 VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
1055 VOP_RWUNLOCK(vp, rwflag, NULL);
1056 goto out;
1057 }
1058 if (fileoff >= va.va_size) {
1059 VOP_RWUNLOCK(vp, rwflag, NULL);
1060 count = 0;
1061 goto out;
1062 } else {
1063 VOP_RWUNLOCK(vp, rwflag, NULL);
1064 error = EOVERFLOW;
1065 goto out;
1066 }
1067 }
1068 if ((vp->v_type == VREG) &&
1069 (fileoff + count > OFFSET_MAX(fp))) {
1070 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1071 }
1072 auio.uio_loffset = fileoff;
1073 auio.uio_iov = aiov;
1074 auio.uio_iovcnt = iovcnt;
1075 auio.uio_resid = bcount = count;
1076 auio.uio_segflg = UIO_USERSPACE;
1077 auio.uio_llimit = MAXOFFSET_T;
1078 auio.uio_fmode = fflag;
1079 if (bcount <= copyout_max_cached)
1080 auio.uio_extflg = UIO_COPY_CACHED;
1081 else
1082 auio.uio_extflg = UIO_COPY_DEFAULT;
1083
1084 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1085 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1086 count -= auio.uio_resid;
1087 CPU_STATS_ENTER_K();
1088 cp = CPU;
1089 CPU_STATS_ADDQ(cp, sys, sysread, 1);
1090 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1091 CPU_STATS_EXIT_K();
1092 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1093
1094 VOP_RWUNLOCK(vp, rwflag, NULL);
1095
1096 if (error == EINTR && count != 0)
1097 error = 0;
1098 out:
1099 if (in_crit)
1100 nbl_end_crit(vp);
1101 releasef(fdes);
1102 if (error)
1103 return (set_errno(error));
1104 return (count);
1105 }
1106
1107 ssize_t
pwritev(int fdes,struct iovec * iovp,int iovcnt,off_t offset,off_t extended_offset)1108 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1109 off_t extended_offset)
1110 {
1111 struct uio auio;
1112 struct iovec aiov[DEF_IOV_MAX];
1113 file_t *fp;
1114 register vnode_t *vp;
1115 struct cpu *cp;
1116 int fflag, ioflag, rwflag;
1117 ssize_t count, bcount;
1118 int error = 0;
1119 int i;
1120
1121 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1122 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1123 (u_offset_t)offset;
1124 #else /* _SYSCALL32_IMPL || _ILP32 */
1125 u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1126 #endif /* _SYSCALL32_IMPR || _ILP32 */
1127 #ifdef _SYSCALL32_IMPL
1128 const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1129 extended_offset == 0?
1130 MAXOFF32_T : MAXOFFSET_T;
1131 #else /* _SYSCALL32_IMPL */
1132 const u_offset_t maxoff = MAXOFF32_T;
1133 #endif /* _SYSCALL32_IMPL */
1134
1135 int in_crit = 0;
1136
1137 if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
1138 return (set_errno(EINVAL));
1139
1140 #ifdef _SYSCALL32_IMPL
1141 /*
1142 * 32-bit callers need to have their iovec expanded,
1143 * while ensuring that they can't move more than 2Gbytes
1144 * of data in a single call.
1145 */
1146 if (get_udatamodel() == DATAMODEL_ILP32) {
1147 struct iovec32 aiov32[DEF_IOV_MAX];
1148 ssize32_t count32;
1149
1150 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
1151 return (set_errno(EFAULT));
1152
1153 count32 = 0;
1154 for (i = 0; i < iovcnt; i++) {
1155 ssize32_t iovlen32 = aiov32[i].iov_len;
1156 count32 += iovlen32;
1157 if (iovlen32 < 0 || count32 < 0)
1158 return (set_errno(EINVAL));
1159 aiov[i].iov_len = iovlen32;
1160 aiov[i].iov_base =
1161 (caddr_t)(uintptr_t)aiov32[i].iov_base;
1162 }
1163 } else
1164 #endif /* _SYSCALL32_IMPL */
1165 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
1166 return (set_errno(EFAULT));
1167
1168 count = 0;
1169 for (i = 0; i < iovcnt; i++) {
1170 ssize_t iovlen = aiov[i].iov_len;
1171 count += iovlen;
1172 if (iovlen < 0 || count < 0)
1173 return (set_errno(EINVAL));
1174 }
1175
1176 if ((bcount = (ssize_t)count) < 0)
1177 return (set_errno(EINVAL));
1178 if ((fp = getf(fdes)) == NULL)
1179 return (set_errno(EBADF));
1180 if (((fflag = fp->f_flag) & FWRITE) == 0) {
1181 error = EBADF;
1182 goto out;
1183 }
1184 vp = fp->f_vnode;
1185 rwflag = 1;
1186 if (vp->v_type == VREG) {
1187
1188 if (bcount == 0)
1189 goto out;
1190
1191 /*
1192 * return EINVAL for offsets that cannot be
1193 * represented in an off_t.
1194 */
1195 if (fileoff > maxoff) {
1196 error = EINVAL;
1197 goto out;
1198 }
1199 /*
1200 * Take appropriate action if we are trying
1201 * to write above the resource limit.
1202 */
1203 if (fileoff >= curproc->p_fsz_ctl) {
1204 mutex_enter(&curproc->p_lock);
1205 /*
1206 * Return value ignored because it lists
1207 * actions taken, but we are in an error case.
1208 * We don't have any actions that depend on
1209 * what could happen in this call, so we ignore
1210 * the return value.
1211 */
1212 (void) rctl_action(
1213 rctlproc_legacy[RLIMIT_FSIZE],
1214 curproc->p_rctls, curproc,
1215 RCA_UNSAFE_SIGINFO);
1216 mutex_exit(&curproc->p_lock);
1217
1218 error = EFBIG;
1219 goto out;
1220 }
1221 /*
1222 * Don't allow pwritev to cause file sizes to exceed
1223 * maxoff.
1224 */
1225 if (fileoff == maxoff) {
1226 error = EFBIG;
1227 goto out;
1228 }
1229
1230 if (fileoff + bcount > maxoff)
1231 bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1232 } else if (vp->v_type == VFIFO) {
1233 error = ESPIPE;
1234 goto out;
1235 }
1236 /*
1237 * We have to enter the critical region before calling VOP_RWLOCK
1238 * to avoid a deadlock with ufs.
1239 */
1240 if (nbl_need_check(vp)) {
1241 int svmand;
1242
1243 nbl_start_crit(vp, RW_READER);
1244 in_crit = 1;
1245 error = nbl_svmand(vp, fp->f_cred, &svmand);
1246 if (error != 0)
1247 goto out;
1248 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1249 NULL)) {
1250 error = EACCES;
1251 goto out;
1252 }
1253 }
1254
1255 (void) VOP_RWLOCK(vp, rwflag, NULL);
1256
1257
1258 /*
1259 * Behaviour is same as write(2). Please see comments for
1260 * write(2).
1261 */
1262
1263 if (vp->v_type == VREG) {
1264 if (fileoff >= curproc->p_fsz_ctl) {
1265 VOP_RWUNLOCK(vp, rwflag, NULL);
1266 mutex_enter(&curproc->p_lock);
1267 /* see above rctl_action comment */
1268 (void) rctl_action(
1269 rctlproc_legacy[RLIMIT_FSIZE],
1270 curproc->p_rctls,
1271 curproc, RCA_UNSAFE_SIGINFO);
1272 mutex_exit(&curproc->p_lock);
1273 error = EFBIG;
1274 goto out;
1275 }
1276 if (fileoff >= OFFSET_MAX(fp)) {
1277 VOP_RWUNLOCK(vp, rwflag, NULL);
1278 error = EFBIG;
1279 goto out;
1280 }
1281 if (fileoff + count > OFFSET_MAX(fp))
1282 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1283 }
1284
1285 auio.uio_loffset = fileoff;
1286 auio.uio_iov = aiov;
1287 auio.uio_iovcnt = iovcnt;
1288 auio.uio_resid = bcount = count;
1289 auio.uio_segflg = UIO_USERSPACE;
1290 auio.uio_llimit = curproc->p_fsz_ctl;
1291 auio.uio_fmode = fflag;
1292 auio.uio_extflg = UIO_COPY_CACHED;
1293 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1294 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1295 count -= auio.uio_resid;
1296 CPU_STATS_ENTER_K();
1297 cp = CPU;
1298 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1299 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1300 CPU_STATS_EXIT_K();
1301 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1302
1303 VOP_RWUNLOCK(vp, rwflag, NULL);
1304
1305 if (error == EINTR && count != 0)
1306 error = 0;
1307 out:
1308 if (in_crit)
1309 nbl_end_crit(vp);
1310 releasef(fdes);
1311 if (error)
1312 return (set_errno(error));
1313 return (count);
1314 }
1315
1316 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1317
1318 /*
1319 * This syscall supplies 64-bit file offsets to 32-bit applications only.
1320 */
1321 ssize32_t
pread64(int fdes,void * cbuf,size32_t count,uint32_t offset_1,uint32_t offset_2)1322 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1323 uint32_t offset_2)
1324 {
1325 struct uio auio;
1326 struct iovec aiov;
1327 file_t *fp;
1328 register vnode_t *vp;
1329 struct cpu *cp;
1330 int fflag, ioflag, rwflag;
1331 ssize_t bcount;
1332 int error = 0;
1333 u_offset_t fileoff;
1334 int in_crit = 0;
1335
1336 #if defined(_LITTLE_ENDIAN)
1337 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
1338 #else
1339 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
1340 #endif
1341
1342 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1343 return (set_errno(EINVAL));
1344
1345 if ((fp = getf(fdes)) == NULL)
1346 return (set_errno(EBADF));
1347 if (((fflag = fp->f_flag) & (FREAD)) == 0) {
1348 error = EBADF;
1349 goto out;
1350 }
1351
1352 rwflag = 0;
1353 vp = fp->f_vnode;
1354
1355 if (vp->v_type == VREG) {
1356
1357 if (bcount == 0)
1358 goto out;
1359
1360 /*
1361 * Same as pread. See comments in pread.
1362 */
1363
1364 if (fileoff > MAXOFFSET_T) {
1365 error = EINVAL;
1366 goto out;
1367 }
1368 if (fileoff + bcount > MAXOFFSET_T)
1369 bcount = (ssize_t)(MAXOFFSET_T - fileoff);
1370 } else if (vp->v_type == VFIFO) {
1371 error = ESPIPE;
1372 goto out;
1373 }
1374
1375 /*
1376 * We have to enter the critical region before calling VOP_RWLOCK
1377 * to avoid a deadlock with ufs.
1378 */
1379 if (nbl_need_check(vp)) {
1380 int svmand;
1381
1382 nbl_start_crit(vp, RW_READER);
1383 in_crit = 1;
1384 error = nbl_svmand(vp, fp->f_cred, &svmand);
1385 if (error != 0)
1386 goto out;
1387 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
1388 NULL)) {
1389 error = EACCES;
1390 goto out;
1391 }
1392 }
1393
1394 aiov.iov_base = cbuf;
1395 aiov.iov_len = bcount;
1396 (void) VOP_RWLOCK(vp, rwflag, NULL);
1397 auio.uio_loffset = fileoff;
1398
1399 /*
1400 * Note: File size can never be greater than MAXOFFSET_T.
1401 * If ever we start supporting 128 bit files the code
1402 * similar to the one in pread at this place should be here.
1403 * Here we avoid the unnecessary VOP_GETATTR() when we
1404 * know that fileoff == MAXOFFSET_T implies that it is always
1405 * greater than or equal to file size.
1406 */
1407 auio.uio_iov = &aiov;
1408 auio.uio_iovcnt = 1;
1409 auio.uio_resid = bcount;
1410 auio.uio_segflg = UIO_USERSPACE;
1411 auio.uio_llimit = MAXOFFSET_T;
1412 auio.uio_fmode = fflag;
1413 auio.uio_extflg = UIO_COPY_CACHED;
1414
1415 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1416
1417 /* If read sync is not asked for, filter sync flags */
1418 if ((ioflag & FRSYNC) == 0)
1419 ioflag &= ~(FSYNC|FDSYNC);
1420 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1421 bcount -= auio.uio_resid;
1422 CPU_STATS_ENTER_K();
1423 cp = CPU;
1424 CPU_STATS_ADDQ(cp, sys, sysread, 1);
1425 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
1426 CPU_STATS_EXIT_K();
1427 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1428 VOP_RWUNLOCK(vp, rwflag, NULL);
1429
1430 if (error == EINTR && bcount != 0)
1431 error = 0;
1432 out:
1433 if (in_crit)
1434 nbl_end_crit(vp);
1435 releasef(fdes);
1436 if (error)
1437 return (set_errno(error));
1438 return (bcount);
1439 }
1440
1441 /*
1442 * This syscall supplies 64-bit file offsets to 32-bit applications only.
1443 */
1444 ssize32_t
pwrite64(int fdes,void * cbuf,size32_t count,uint32_t offset_1,uint32_t offset_2)1445 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1446 uint32_t offset_2)
1447 {
1448 struct uio auio;
1449 struct iovec aiov;
1450 file_t *fp;
1451 register vnode_t *vp;
1452 struct cpu *cp;
1453 int fflag, ioflag, rwflag;
1454 ssize_t bcount;
1455 int error = 0;
1456 u_offset_t fileoff;
1457 int in_crit = 0;
1458
1459 #if defined(_LITTLE_ENDIAN)
1460 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
1461 #else
1462 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
1463 #endif
1464
1465 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1466 return (set_errno(EINVAL));
1467 if ((fp = getf(fdes)) == NULL)
1468 return (set_errno(EBADF));
1469 if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
1470 error = EBADF;
1471 goto out;
1472 }
1473
1474 rwflag = 1;
1475 vp = fp->f_vnode;
1476
1477 if (vp->v_type == VREG) {
1478
1479 if (bcount == 0)
1480 goto out;
1481
1482 /*
1483 * See comments in pwrite.
1484 */
1485 if (fileoff > MAXOFFSET_T) {
1486 error = EINVAL;
1487 goto out;
1488 }
1489 if (fileoff >= curproc->p_fsz_ctl) {
1490 mutex_enter(&curproc->p_lock);
1491 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
1492 curproc->p_rctls, curproc, RCA_SAFE);
1493 mutex_exit(&curproc->p_lock);
1494 error = EFBIG;
1495 goto out;
1496 }
1497 if (fileoff == MAXOFFSET_T) {
1498 error = EFBIG;
1499 goto out;
1500 }
1501 if (fileoff + bcount > MAXOFFSET_T)
1502 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
1503 } else if (vp->v_type == VFIFO) {
1504 error = ESPIPE;
1505 goto out;
1506 }
1507
1508 /*
1509 * We have to enter the critical region before calling VOP_RWLOCK
1510 * to avoid a deadlock with ufs.
1511 */
1512 if (nbl_need_check(vp)) {
1513 int svmand;
1514
1515 nbl_start_crit(vp, RW_READER);
1516 in_crit = 1;
1517 error = nbl_svmand(vp, fp->f_cred, &svmand);
1518 if (error != 0)
1519 goto out;
1520 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
1521 NULL)) {
1522 error = EACCES;
1523 goto out;
1524 }
1525 }
1526
1527 aiov.iov_base = cbuf;
1528 aiov.iov_len = bcount;
1529 (void) VOP_RWLOCK(vp, rwflag, NULL);
1530 auio.uio_loffset = fileoff;
1531 auio.uio_iov = &aiov;
1532 auio.uio_iovcnt = 1;
1533 auio.uio_resid = bcount;
1534 auio.uio_segflg = UIO_USERSPACE;
1535 auio.uio_llimit = curproc->p_fsz_ctl;
1536 auio.uio_fmode = fflag;
1537 auio.uio_extflg = UIO_COPY_CACHED;
1538
1539 /*
1540 * The SUSv4 POSIX specification states:
1541 * The pwrite() function shall be equivalent to write(), except
1542 * that it writes into a given position and does not change
1543 * the file offset (regardless of whether O_APPEND is set).
1544 * To make this be true, we omit the FAPPEND flag from ioflag.
1545 */
1546 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1547
1548 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1549 bcount -= auio.uio_resid;
1550 CPU_STATS_ENTER_K();
1551 cp = CPU;
1552 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1553 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
1554 CPU_STATS_EXIT_K();
1555 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1556 VOP_RWUNLOCK(vp, rwflag, NULL);
1557
1558 if (error == EINTR && bcount != 0)
1559 error = 0;
1560 out:
1561 if (in_crit)
1562 nbl_end_crit(vp);
1563 releasef(fdes);
1564 if (error)
1565 return (set_errno(error));
1566 return (bcount);
1567 }
1568
1569 #endif /* _SYSCALL32_IMPL || _ILP32 */
1570
1571 #ifdef _SYSCALL32_IMPL
1572 /*
1573 * Tail-call elimination of xxx32() down to xxx()
1574 *
1575 * A number of xxx32 system calls take a len (or count) argument and
1576 * return a number in the range [0,len] or -1 on error.
1577 * Given an ssize32_t input len, the downcall xxx() will return
1578 * a 64-bit value that is -1 or in the range [0,len] which actually
1579 * is a proper return value for the xxx32 call. So even if the xxx32
1580 * calls can be considered as returning a ssize32_t, they are currently
1581 * declared as returning a ssize_t as this enables tail-call elimination.
1582 *
1583 * The cast of len (or count) to ssize32_t is needed to ensure we pass
1584 * down negative input values as such and let the downcall handle error
1585 * reporting. Functions covered by this comments are:
1586 *
1587 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32.
1588 * socksyscall.c: recv32, recvfrom32, send32, sendto32.
1589 * readlink.c: readlink32.
1590 */
1591
1592 ssize_t
read32(int32_t fdes,caddr32_t cbuf,size32_t count)1593 read32(int32_t fdes, caddr32_t cbuf, size32_t count)
1594 {
1595 return (read(fdes,
1596 (void *)(uintptr_t)cbuf, (ssize32_t)count));
1597 }
1598
1599 ssize_t
write32(int32_t fdes,caddr32_t cbuf,size32_t count)1600 write32(int32_t fdes, caddr32_t cbuf, size32_t count)
1601 {
1602 return (write(fdes,
1603 (void *)(uintptr_t)cbuf, (ssize32_t)count));
1604 }
1605
1606 ssize_t
pread32(int32_t fdes,caddr32_t cbuf,size32_t count,off32_t offset)1607 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1608 {
1609 return (pread(fdes,
1610 (void *)(uintptr_t)cbuf, (ssize32_t)count,
1611 (off_t)(uint32_t)offset));
1612 }
1613
1614 ssize_t
pwrite32(int32_t fdes,caddr32_t cbuf,size32_t count,off32_t offset)1615 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1616 {
1617 return (pwrite(fdes,
1618 (void *)(uintptr_t)cbuf, (ssize32_t)count,
1619 (off_t)(uint32_t)offset));
1620 }
1621
1622 ssize_t
readv32(int32_t fdes,caddr32_t iovp,int32_t iovcnt)1623 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1624 {
1625 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
1626 }
1627
1628 ssize_t
writev32(int32_t fdes,caddr32_t iovp,int32_t iovcnt)1629 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1630 {
1631 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
1632 }
1633 #endif /* _SYSCALL32_IMPL */
1634