1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright 2018, Joyent, Inc.
26 * Copyright 2024 Oxide Computer Company
27 */
28
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 /*
33 * Portions of this source code were derived from Berkeley 4.3 BSD
34 * under license from the Regents of the University of California.
35 */
36
37
38 #include <sys/param.h>
39 #include <sys/isa_defs.h>
40 #include <sys/types.h>
41 #include <sys/sysmacros.h>
42 #include <sys/systm.h>
43 #include <sys/errno.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/vnode.h>
47 #include <sys/file.h>
48 #include <sys/mode.h>
49 #include <sys/proc.h>
50 #include <sys/filio.h>
51 #include <sys/share.h>
52 #include <sys/debug.h>
53 #include <sys/rctl.h>
54 #include <sys/nbmlock.h>
55
56 #include <sys/cmn_err.h>
57
58 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
59 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
60 static void fd_too_big(proc_t *);
61
62 /*
63 * File control.
64 */
65 int
fcntl(int fdes,int cmd,intptr_t arg,intptr_t arg1)66 fcntl(int fdes, int cmd, intptr_t arg, intptr_t arg1)
67 {
68 int iarg;
69 int error = 0;
70 int retval;
71 proc_t *p;
72 file_t *fp;
73 vnode_t *vp;
74 u_offset_t offset;
75 u_offset_t start;
76 struct vattr vattr;
77 int in_crit;
78 int flag;
79 struct flock sbf;
80 struct flock64 bf;
81 struct o_flock obf;
82 struct flock64_32 bf64_32;
83 struct fshare fsh;
84 struct shrlock shr;
85 struct shr_locowner shr_own;
86 offset_t maxoffset;
87 model_t datamodel;
88 int fdres;
89
90 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
91 ASSERT(sizeof (struct flock) == sizeof (struct flock32));
92 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
93 #endif
94 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
95 ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
96 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
97 #endif
98
99 /*
100 * Most fcntl() calls take either 2 or 3 arguments. The introduction of
101 * F_DUP3FD added a version that takes a 4th argument (referred to as
102 * arg1). While fcntl() traditionally has had loose validation, we
103 * strictly validate this new arg.
104 */
105 switch (cmd) {
106 case F_DUP3FD:
107 if ((arg1 & ~(FD_CLOEXEC | FD_CLOFORK)) != 0) {
108 error = EINVAL;
109 goto out;
110 }
111 break;
112 default:
113 if (arg1 != 0) {
114 error = EINVAL;
115 goto out;
116 }
117 break;
118 }
119
120 /*
121 * First, for speed, deal with the subset of cases
122 * that do not require getf() / releasef().
123 */
124 switch (cmd) {
125 case F_GETFD:
126 if ((error = f_getfd_error(fdes, &flag)) == 0)
127 retval = flag;
128 goto out;
129
130 case F_SETFD:
131 error = f_setfd_error(fdes, (int)arg);
132 retval = 0;
133 goto out;
134
135 case F_GETFL:
136 if ((error = f_getfl(fdes, &flag)) == 0) {
137 retval = (flag & (FMASK | FASYNC));
138 if ((flag & (FSEARCH | FEXEC)) == 0)
139 retval += FOPEN;
140 else
141 retval |= (flag & (FSEARCH | FEXEC));
142 }
143 goto out;
144
145 case F_GETXFL:
146 if ((error = f_getfl(fdes, &flag)) == 0) {
147 retval = flag;
148 if ((flag & (FSEARCH | FEXEC)) == 0)
149 retval += FOPEN;
150 }
151 goto out;
152
153 case F_BADFD:
154 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
155 retval = fdres;
156 goto out;
157 }
158
159 /*
160 * Second, for speed, deal with the subset of cases that
161 * require getf() / releasef() but do not require copyin.
162 */
163 if ((fp = getf(fdes)) == NULL) {
164 error = EBADF;
165 goto out;
166 }
167 iarg = (int)arg;
168
169 switch (cmd) {
170 case F_DUPFD:
171 case F_DUPFD_CLOEXEC:
172 case F_DUPFD_CLOFORK:
173 p = curproc;
174 if ((uint_t)iarg >= p->p_fno_ctl) {
175 if (iarg >= 0)
176 fd_too_big(p);
177 error = EINVAL;
178 goto done;
179 }
180 /*
181 * We need to increment the f_count reference counter
182 * before allocating a new file descriptor.
183 * Doing it other way round opens a window for race condition
184 * with closeandsetf() on the target file descriptor which can
185 * close the file still referenced by the original
186 * file descriptor.
187 */
188 mutex_enter(&fp->f_tlock);
189 fp->f_count++;
190 mutex_exit(&fp->f_tlock);
191 if ((retval = ufalloc_file(iarg, fp)) == -1) {
192 /*
193 * New file descriptor can't be allocated.
194 * Revert the reference count.
195 */
196 mutex_enter(&fp->f_tlock);
197 fp->f_count--;
198 mutex_exit(&fp->f_tlock);
199 error = EMFILE;
200 } else {
201 if (cmd == F_DUPFD_CLOEXEC) {
202 f_setfd_or(retval, FD_CLOEXEC);
203 }
204
205 if (cmd == F_DUPFD_CLOFORK) {
206 f_setfd_or(retval, FD_CLOFORK);
207 }
208 }
209 goto done;
210
211 case F_DUP2FD_CLOEXEC:
212 case F_DUP2FD_CLOFORK:
213 if (fdes == iarg) {
214 error = EINVAL;
215 goto done;
216 }
217
218 /*FALLTHROUGH*/
219
220 case F_DUP2FD:
221 case F_DUP3FD:
222 p = curproc;
223 if (fdes == iarg) {
224 retval = iarg;
225 } else if ((uint_t)iarg >= p->p_fno_ctl) {
226 if (iarg >= 0)
227 fd_too_big(p);
228 error = EBADF;
229 } else {
230 /*
231 * We can't hold our getf(fdes) across the call to
232 * closeandsetf() because it creates a window for
233 * deadlock: if one thread is doing dup2(a, b) while
234 * another is doing dup2(b, a), each one will block
235 * waiting for the other to call releasef(). The
236 * solution is to increment the file reference count
237 * (which we have to do anyway), then releasef(fdes),
238 * then closeandsetf(). Incrementing f_count ensures
239 * that fp won't disappear after we call releasef().
240 * When closeandsetf() fails, we try avoid calling
241 * closef() because of all the side effects.
242 */
243 mutex_enter(&fp->f_tlock);
244 fp->f_count++;
245 mutex_exit(&fp->f_tlock);
246 releasef(fdes);
247 if ((error = closeandsetf(iarg, fp)) == 0) {
248 if (cmd == F_DUP2FD_CLOEXEC) {
249 f_setfd_or(iarg, FD_CLOEXEC);
250 } else if (cmd == F_DUP2FD_CLOFORK) {
251 f_setfd_or(iarg, FD_CLOFORK);
252 } else if (cmd == F_DUP3FD) {
253 f_setfd_or(iarg, (int)arg1);
254 }
255 retval = iarg;
256 } else {
257 mutex_enter(&fp->f_tlock);
258 if (fp->f_count > 1) {
259 fp->f_count--;
260 mutex_exit(&fp->f_tlock);
261 } else {
262 mutex_exit(&fp->f_tlock);
263 (void) closef(fp);
264 }
265 }
266 goto out;
267 }
268 goto done;
269
270 case F_SETFL:
271 vp = fp->f_vnode;
272 flag = fp->f_flag;
273 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
274 iarg &= ~FNDELAY;
275 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
276 0) {
277 iarg &= FMASK;
278 mutex_enter(&fp->f_tlock);
279 fp->f_flag &= ~FMASK | (FREAD|FWRITE);
280 fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
281 mutex_exit(&fp->f_tlock);
282 }
283 retval = 0;
284 goto done;
285 }
286
287 /*
288 * Finally, deal with the expensive cases.
289 */
290 retval = 0;
291 in_crit = 0;
292 maxoffset = MAXOFF_T;
293 datamodel = DATAMODEL_NATIVE;
294 #if defined(_SYSCALL32_IMPL)
295 if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
296 maxoffset = MAXOFF32_T;
297 #endif
298
299 vp = fp->f_vnode;
300 flag = fp->f_flag;
301 offset = fp->f_offset;
302
303 switch (cmd) {
304 /*
305 * The file system and vnode layers understand and implement
306 * locking with flock64 structures. So here once we pass through
307 * the test for compatibility as defined by LFS API, (for F_SETLK,
308 * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
309 * F_FREESP) we transform the flock structure to a flock64 structure
310 * and send it to the lower layers. Similarly in case of GETLK and
311 * OFD_GETLK the returned flock64 structure is transformed to a flock
312 * structure if everything fits in nicely, otherwise we return
313 * EOVERFLOW.
314 */
315
316 case F_GETLK:
317 case F_O_GETLK:
318 case F_SETLK:
319 case F_SETLKW:
320 case F_SETLK_NBMAND:
321 case F_OFD_GETLK:
322 case F_OFD_SETLK:
323 case F_OFD_SETLKW:
324 case F_FLOCK:
325 case F_FLOCKW:
326
327 /*
328 * Copy in input fields only.
329 */
330
331 if (cmd == F_O_GETLK) {
332 if (datamodel != DATAMODEL_ILP32) {
333 error = EINVAL;
334 break;
335 }
336
337 if (copyin((void *)arg, &obf, sizeof (obf))) {
338 error = EFAULT;
339 break;
340 }
341 bf.l_type = obf.l_type;
342 bf.l_whence = obf.l_whence;
343 bf.l_start = (off64_t)obf.l_start;
344 bf.l_len = (off64_t)obf.l_len;
345 bf.l_sysid = (int)obf.l_sysid;
346 bf.l_pid = obf.l_pid;
347 } else if (datamodel == DATAMODEL_NATIVE) {
348 if (copyin((void *)arg, &sbf, sizeof (sbf))) {
349 error = EFAULT;
350 break;
351 }
352 /*
353 * XXX In an LP64 kernel with an LP64 application
354 * there's no need to do a structure copy here
355 * struct flock == struct flock64. However,
356 * we did it this way to avoid more conditional
357 * compilation.
358 */
359 bf.l_type = sbf.l_type;
360 bf.l_whence = sbf.l_whence;
361 bf.l_start = (off64_t)sbf.l_start;
362 bf.l_len = (off64_t)sbf.l_len;
363 bf.l_sysid = sbf.l_sysid;
364 bf.l_pid = sbf.l_pid;
365 }
366 #if defined(_SYSCALL32_IMPL)
367 else {
368 struct flock32 sbf32;
369 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
370 error = EFAULT;
371 break;
372 }
373 bf.l_type = sbf32.l_type;
374 bf.l_whence = sbf32.l_whence;
375 bf.l_start = (off64_t)sbf32.l_start;
376 bf.l_len = (off64_t)sbf32.l_len;
377 bf.l_sysid = sbf32.l_sysid;
378 bf.l_pid = sbf32.l_pid;
379 }
380 #endif /* _SYSCALL32_IMPL */
381
382 /*
383 * 64-bit support: check for overflow for 32-bit lock ops
384 */
385 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
386 break;
387
388 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
389 /* FLOCK* locking is always over the entire file. */
390 if (bf.l_whence != 0 || bf.l_start != 0 ||
391 bf.l_len != 0) {
392 error = EINVAL;
393 break;
394 }
395 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
396 error = EINVAL;
397 break;
398 }
399 }
400
401 if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
402 cmd == F_OFD_SETLKW) {
403 /*
404 * TBD OFD-style locking is currently limited to
405 * covering the entire file.
406 */
407 if (bf.l_whence != 0 || bf.l_start != 0 ||
408 bf.l_len != 0) {
409 error = EINVAL;
410 break;
411 }
412 }
413
414 /*
415 * Not all of the filesystems understand F_O_GETLK, and
416 * there's no need for them to know. Map it to F_GETLK.
417 *
418 * The *_frlock functions in the various file systems basically
419 * do some validation and then funnel everything through the
420 * fs_frlock function. For OFD-style locks fs_frlock will do
421 * nothing so that once control returns here we can call the
422 * ofdlock function with the correct fp. For OFD-style locks
423 * the unsupported remote file systems, such as NFS, detect and
424 * reject the OFD-style cmd argument.
425 */
426 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
427 &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
428 break;
429
430 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
431 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
432 /*
433 * This is an OFD-style lock so we need to handle it
434 * here. Because OFD-style locks are associated with
435 * the file_t we didn't have enough info down the
436 * VOP_FRLOCK path immediately above.
437 */
438 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
439 break;
440 }
441
442 /*
443 * If command is GETLK and no lock is found, only
444 * the type field is changed.
445 */
446 if ((cmd == F_O_GETLK || cmd == F_GETLK ||
447 cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
448 /* l_type always first entry, always a short */
449 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
450 sizeof (bf.l_type)))
451 error = EFAULT;
452 break;
453 }
454
455 if (cmd == F_O_GETLK) {
456 /*
457 * Return an SVR3 flock structure to the user.
458 */
459 obf.l_type = (int16_t)bf.l_type;
460 obf.l_whence = (int16_t)bf.l_whence;
461 obf.l_start = (int32_t)bf.l_start;
462 obf.l_len = (int32_t)bf.l_len;
463 if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
464 /*
465 * One or both values for the above fields
466 * is too large to store in an SVR3 flock
467 * structure.
468 */
469 error = EOVERFLOW;
470 break;
471 }
472 obf.l_sysid = (int16_t)bf.l_sysid;
473 obf.l_pid = (int16_t)bf.l_pid;
474 if (copyout(&obf, (void *)arg, sizeof (obf)))
475 error = EFAULT;
476 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
477 /*
478 * Copy out SVR4 flock.
479 */
480 int i;
481
482 if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
483 error = EOVERFLOW;
484 break;
485 }
486
487 if (datamodel == DATAMODEL_NATIVE) {
488 for (i = 0; i < 4; i++)
489 sbf.l_pad[i] = 0;
490 /*
491 * XXX In an LP64 kernel with an LP64
492 * application there's no need to do a
493 * structure copy here as currently
494 * struct flock == struct flock64.
495 * We did it this way to avoid more
496 * conditional compilation.
497 */
498 sbf.l_type = bf.l_type;
499 sbf.l_whence = bf.l_whence;
500 sbf.l_start = (off_t)bf.l_start;
501 sbf.l_len = (off_t)bf.l_len;
502 sbf.l_sysid = bf.l_sysid;
503 sbf.l_pid = bf.l_pid;
504 if (copyout(&sbf, (void *)arg, sizeof (sbf)))
505 error = EFAULT;
506 }
507 #if defined(_SYSCALL32_IMPL)
508 else {
509 struct flock32 sbf32;
510 if (bf.l_start > MAXOFF32_T ||
511 bf.l_len > MAXOFF32_T) {
512 error = EOVERFLOW;
513 break;
514 }
515 for (i = 0; i < 4; i++)
516 sbf32.l_pad[i] = 0;
517 sbf32.l_type = (int16_t)bf.l_type;
518 sbf32.l_whence = (int16_t)bf.l_whence;
519 sbf32.l_start = (off32_t)bf.l_start;
520 sbf32.l_len = (off32_t)bf.l_len;
521 sbf32.l_sysid = (int32_t)bf.l_sysid;
522 sbf32.l_pid = (pid32_t)bf.l_pid;
523 if (copyout(&sbf32,
524 (void *)arg, sizeof (sbf32)))
525 error = EFAULT;
526 }
527 #endif
528 }
529 break;
530
531 case F_CHKFL:
532 /*
533 * This is for internal use only, to allow the vnode layer
534 * to validate a flags setting before applying it. User
535 * programs can't issue it.
536 */
537 error = EINVAL;
538 break;
539
540 case F_ALLOCSP:
541 case F_FREESP:
542 case F_ALLOCSP64:
543 case F_FREESP64:
544 /*
545 * Test for not-a-regular-file (and returning EINVAL)
546 * before testing for open-for-writing (and returning EBADF).
547 * This is relied upon by posix_fallocate() in libc.
548 */
549 if (vp->v_type != VREG) {
550 error = EINVAL;
551 break;
552 }
553
554 if ((flag & FWRITE) == 0) {
555 error = EBADF;
556 break;
557 }
558
559 if (datamodel != DATAMODEL_ILP32 &&
560 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
561 error = EINVAL;
562 break;
563 }
564
565 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
566 if (datamodel == DATAMODEL_ILP32 &&
567 (cmd == F_ALLOCSP || cmd == F_FREESP)) {
568 struct flock32 sbf32;
569 /*
570 * For compatibility we overlay an SVR3 flock on an SVR4
571 * flock. This works because the input field offsets
572 * in "struct flock" were preserved.
573 */
574 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
575 error = EFAULT;
576 break;
577 } else {
578 bf.l_type = sbf32.l_type;
579 bf.l_whence = sbf32.l_whence;
580 bf.l_start = (off64_t)sbf32.l_start;
581 bf.l_len = (off64_t)sbf32.l_len;
582 bf.l_sysid = sbf32.l_sysid;
583 bf.l_pid = sbf32.l_pid;
584 }
585 }
586 #endif /* _ILP32 || _SYSCALL32_IMPL */
587
588 #if defined(_LP64)
589 if (datamodel == DATAMODEL_LP64 &&
590 (cmd == F_ALLOCSP || cmd == F_FREESP)) {
591 if (copyin((void *)arg, &bf, sizeof (bf))) {
592 error = EFAULT;
593 break;
594 }
595 }
596 #endif /* defined(_LP64) */
597
598 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
599 if (datamodel == DATAMODEL_ILP32 &&
600 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
601 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
602 error = EFAULT;
603 break;
604 } else {
605 /*
606 * Note that the size of flock64 is different in
607 * the ILP32 and LP64 models, due to the l_pad
608 * field. We do not want to assume that the
609 * flock64 structure is laid out the same in
610 * ILP32 and LP64 environments, so we will
611 * copy in the ILP32 version of flock64
612 * explicitly and copy it to the native
613 * flock64 structure.
614 */
615 bf.l_type = (short)bf64_32.l_type;
616 bf.l_whence = (short)bf64_32.l_whence;
617 bf.l_start = bf64_32.l_start;
618 bf.l_len = bf64_32.l_len;
619 bf.l_sysid = (int)bf64_32.l_sysid;
620 bf.l_pid = (pid_t)bf64_32.l_pid;
621 }
622 }
623 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
624
625 if (cmd == F_ALLOCSP || cmd == F_FREESP)
626 error = flock_check(vp, &bf, offset, maxoffset);
627 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
628 error = flock_check(vp, &bf, offset, MAXOFFSET_T);
629 if (error)
630 break;
631
632 if (vp->v_type == VREG && bf.l_len == 0 &&
633 bf.l_start > OFFSET_MAX(fp)) {
634 error = EFBIG;
635 break;
636 }
637
638 /*
639 * Make sure that there are no conflicting non-blocking
640 * mandatory locks in the region being manipulated. If
641 * there are such locks then return EACCES.
642 */
643 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
644 break;
645
646 if (nbl_need_check(vp)) {
647 u_offset_t begin;
648 ssize_t length;
649
650 nbl_start_crit(vp, RW_READER);
651 in_crit = 1;
652 vattr.va_mask = AT_SIZE;
653 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
654 != 0)
655 break;
656 begin = start > vattr.va_size ? vattr.va_size : start;
657 length = vattr.va_size > start ? vattr.va_size - start :
658 start - vattr.va_size;
659 if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
660 NULL)) {
661 error = EACCES;
662 break;
663 }
664 }
665
666 if (cmd == F_ALLOCSP64)
667 cmd = F_ALLOCSP;
668 else if (cmd == F_FREESP64)
669 cmd = F_FREESP;
670
671 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
672
673 break;
674
675 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
676 case F_GETLK64:
677 case F_SETLK64:
678 case F_SETLKW64:
679 case F_SETLK64_NBMAND:
680 case F_OFD_GETLK64:
681 case F_OFD_SETLK64:
682 case F_OFD_SETLKW64:
683 case F_FLOCK64:
684 case F_FLOCKW64:
685 /*
686 * Large Files: Here we set cmd as *LK and send it to
687 * lower layers. *LK64 is only for the user land.
688 * Most of the comments described above for F_SETLK
689 * applies here too.
690 * Large File support is only needed for ILP32 apps!
691 */
692 if (datamodel != DATAMODEL_ILP32) {
693 error = EINVAL;
694 break;
695 }
696
697 if (cmd == F_GETLK64)
698 cmd = F_GETLK;
699 else if (cmd == F_SETLK64)
700 cmd = F_SETLK;
701 else if (cmd == F_SETLKW64)
702 cmd = F_SETLKW;
703 else if (cmd == F_SETLK64_NBMAND)
704 cmd = F_SETLK_NBMAND;
705 else if (cmd == F_OFD_GETLK64)
706 cmd = F_OFD_GETLK;
707 else if (cmd == F_OFD_SETLK64)
708 cmd = F_OFD_SETLK;
709 else if (cmd == F_OFD_SETLKW64)
710 cmd = F_OFD_SETLKW;
711 else if (cmd == F_FLOCK64)
712 cmd = F_FLOCK;
713 else if (cmd == F_FLOCKW64)
714 cmd = F_FLOCKW;
715
716 /*
717 * Note that the size of flock64 is different in the ILP32
718 * and LP64 models, due to the sucking l_pad field.
719 * We do not want to assume that the flock64 structure is
720 * laid out in the same in ILP32 and LP64 environments, so
721 * we will copy in the ILP32 version of flock64 explicitly
722 * and copy it to the native flock64 structure.
723 */
724
725 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
726 error = EFAULT;
727 break;
728 }
729
730 bf.l_type = (short)bf64_32.l_type;
731 bf.l_whence = (short)bf64_32.l_whence;
732 bf.l_start = bf64_32.l_start;
733 bf.l_len = bf64_32.l_len;
734 bf.l_sysid = (int)bf64_32.l_sysid;
735 bf.l_pid = (pid_t)bf64_32.l_pid;
736
737 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
738 break;
739
740 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
741 /* FLOCK* locking is always over the entire file. */
742 if (bf.l_whence != 0 || bf.l_start != 0 ||
743 bf.l_len != 0) {
744 error = EINVAL;
745 break;
746 }
747 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
748 error = EINVAL;
749 break;
750 }
751 }
752
753 if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
754 cmd == F_OFD_SETLKW) {
755 /*
756 * TBD OFD-style locking is currently limited to
757 * covering the entire file.
758 */
759 if (bf.l_whence != 0 || bf.l_start != 0 ||
760 bf.l_len != 0) {
761 error = EINVAL;
762 break;
763 }
764 }
765
766 /*
767 * The *_frlock functions in the various file systems basically
768 * do some validation and then funnel everything through the
769 * fs_frlock function. For OFD-style locks fs_frlock will do
770 * nothing so that once control returns here we can call the
771 * ofdlock function with the correct fp. For OFD-style locks
772 * the unsupported remote file systems, such as NFS, detect and
773 * reject the OFD-style cmd argument.
774 */
775 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
776 NULL, fp->f_cred, NULL)) != 0)
777 break;
778
779 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
780 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
781 /*
782 * This is an OFD-style lock so we need to handle it
783 * here. Because OFD-style locks are associated with
784 * the file_t we didn't have enough info down the
785 * VOP_FRLOCK path immediately above.
786 */
787 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
788 break;
789 }
790
791 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
792 bf.l_type == F_UNLCK) {
793 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
794 sizeof (bf.l_type)))
795 error = EFAULT;
796 break;
797 }
798
799 if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
800 int i;
801
802 /*
803 * We do not want to assume that the flock64 structure
804 * is laid out in the same in ILP32 and LP64
805 * environments, so we will copy out the ILP32 version
806 * of flock64 explicitly after copying the native
807 * flock64 structure to it.
808 */
809 for (i = 0; i < 4; i++)
810 bf64_32.l_pad[i] = 0;
811 bf64_32.l_type = (int16_t)bf.l_type;
812 bf64_32.l_whence = (int16_t)bf.l_whence;
813 bf64_32.l_start = bf.l_start;
814 bf64_32.l_len = bf.l_len;
815 bf64_32.l_sysid = (int32_t)bf.l_sysid;
816 bf64_32.l_pid = (pid32_t)bf.l_pid;
817 if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
818 error = EFAULT;
819 }
820 break;
821 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
822
823 case F_SHARE:
824 case F_SHARE_NBMAND:
825 case F_UNSHARE:
826
827 /*
828 * Copy in input fields only.
829 */
830 if (copyin((void *)arg, &fsh, sizeof (fsh))) {
831 error = EFAULT;
832 break;
833 }
834
835 /*
836 * Local share reservations always have this simple form
837 */
838 shr.s_access = fsh.f_access;
839 shr.s_deny = fsh.f_deny;
840 shr.s_sysid = 0;
841 shr.s_pid = ttoproc(curthread)->p_pid;
842 shr_own.sl_pid = shr.s_pid;
843 shr_own.sl_id = fsh.f_id;
844 shr.s_own_len = sizeof (shr_own);
845 shr.s_owner = (caddr_t)&shr_own;
846 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
847 break;
848
849 default:
850 error = EINVAL;
851 break;
852 }
853
854 if (in_crit)
855 nbl_end_crit(vp);
856
857 done:
858 releasef(fdes);
859 out:
860 if (error)
861 return (set_errno(error));
862 return (retval);
863 }
864
865 int
flock_check(vnode_t * vp,flock64_t * flp,offset_t offset,offset_t max)866 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
867 {
868 struct vattr vattr;
869 int error;
870 u_offset_t start, end;
871
872 /*
873 * Determine the starting point of the request
874 */
875 switch (flp->l_whence) {
876 case 0: /* SEEK_SET */
877 start = (u_offset_t)flp->l_start;
878 if (start > max)
879 return (EINVAL);
880 break;
881 case 1: /* SEEK_CUR */
882 if (flp->l_start > (max - offset))
883 return (EOVERFLOW);
884 start = (u_offset_t)(flp->l_start + offset);
885 if (start > max)
886 return (EINVAL);
887 break;
888 case 2: /* SEEK_END */
889 vattr.va_mask = AT_SIZE;
890 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
891 return (error);
892 if (flp->l_start > (max - (offset_t)vattr.va_size))
893 return (EOVERFLOW);
894 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
895 if (start > max)
896 return (EINVAL);
897 break;
898 default:
899 return (EINVAL);
900 }
901
902 /*
903 * Determine the range covered by the request.
904 */
905 if (flp->l_len == 0)
906 end = MAXEND;
907 else if ((offset_t)flp->l_len > 0) {
908 if (flp->l_len > (max - start + 1))
909 return (EOVERFLOW);
910 end = (u_offset_t)(start + (flp->l_len - 1));
911 ASSERT(end <= max);
912 } else {
913 /*
914 * Negative length; why do we even allow this ?
915 * Because this allows easy specification of
916 * the last n bytes of the file.
917 */
918 end = start;
919 start += (u_offset_t)flp->l_len;
920 (start)++;
921 if (start > max)
922 return (EINVAL);
923 ASSERT(end <= max);
924 }
925 ASSERT(start <= max);
926 if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
927 end == (offset_t)max) {
928 flp->l_len = 0;
929 }
930 if (start > end)
931 return (EINVAL);
932 return (0);
933 }
934
935 static int
flock_get_start(vnode_t * vp,flock64_t * flp,offset_t offset,u_offset_t * start)936 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
937 {
938 struct vattr vattr;
939 int error;
940
941 /*
942 * Determine the starting point of the request. Assume that it is
943 * a valid starting point.
944 */
945 switch (flp->l_whence) {
946 case 0: /* SEEK_SET */
947 *start = (u_offset_t)flp->l_start;
948 break;
949 case 1: /* SEEK_CUR */
950 *start = (u_offset_t)(flp->l_start + offset);
951 break;
952 case 2: /* SEEK_END */
953 vattr.va_mask = AT_SIZE;
954 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
955 return (error);
956 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
957 break;
958 default:
959 return (EINVAL);
960 }
961
962 return (0);
963 }
964
965 /*
966 * Take rctl action when the requested file descriptor is too big.
967 */
968 static void
fd_too_big(proc_t * p)969 fd_too_big(proc_t *p)
970 {
971 mutex_enter(&p->p_lock);
972 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
973 p->p_rctls, p, RCA_SAFE);
974 mutex_exit(&p->p_lock);
975 }
976