xref: /illumos-gate/usr/src/uts/common/syscall/fcntl.c (revision b1e2e3fb17324e9ddf43db264a0c64da7756d9e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25  * Copyright 2018, Joyent, Inc.
26  */
27 
28 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 /*
32  * Portions of this source code were derived from Berkeley 4.3 BSD
33  * under license from the Regents of the University of California.
34  */
35 
36 
37 #include <sys/param.h>
38 #include <sys/isa_defs.h>
39 #include <sys/types.h>
40 #include <sys/sysmacros.h>
41 #include <sys/systm.h>
42 #include <sys/errno.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/vnode.h>
46 #include <sys/file.h>
47 #include <sys/mode.h>
48 #include <sys/proc.h>
49 #include <sys/filio.h>
50 #include <sys/share.h>
51 #include <sys/debug.h>
52 #include <sys/rctl.h>
53 #include <sys/nbmlock.h>
54 
55 #include <sys/cmn_err.h>
56 
57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
59 static void fd_too_big(proc_t *);
60 
61 /*
62  * File control.
63  */
64 int
65 fcntl(int fdes, int cmd, intptr_t arg)
66 {
67 	int iarg;
68 	int error = 0;
69 	int retval;
70 	proc_t *p;
71 	file_t *fp;
72 	vnode_t *vp;
73 	u_offset_t offset;
74 	u_offset_t start;
75 	struct vattr vattr;
76 	int in_crit;
77 	int flag;
78 	struct flock sbf;
79 	struct flock64 bf;
80 	struct o_flock obf;
81 	struct flock64_32 bf64_32;
82 	struct fshare fsh;
83 	struct shrlock shr;
84 	struct shr_locowner shr_own;
85 	offset_t maxoffset;
86 	model_t datamodel;
87 	int fdres;
88 
89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
90 	ASSERT(sizeof (struct flock) == sizeof (struct flock32));
91 	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
92 #endif
93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
94 	ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
95 	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
96 #endif
97 
98 	/*
99 	 * First, for speed, deal with the subset of cases
100 	 * that do not require getf() / releasef().
101 	 */
102 	switch (cmd) {
103 	case F_GETFD:
104 		if ((error = f_getfd_error(fdes, &flag)) == 0)
105 			retval = flag;
106 		goto out;
107 
108 	case F_SETFD:
109 		error = f_setfd_error(fdes, (int)arg);
110 		retval = 0;
111 		goto out;
112 
113 	case F_GETFL:
114 		if ((error = f_getfl(fdes, &flag)) == 0) {
115 			retval = (flag & (FMASK | FASYNC));
116 			if ((flag & (FSEARCH | FEXEC)) == 0)
117 				retval += FOPEN;
118 			else
119 				retval |= (flag & (FSEARCH | FEXEC));
120 		}
121 		goto out;
122 
123 	case F_GETXFL:
124 		if ((error = f_getfl(fdes, &flag)) == 0) {
125 			retval = flag;
126 			if ((flag & (FSEARCH | FEXEC)) == 0)
127 				retval += FOPEN;
128 		}
129 		goto out;
130 
131 	case F_BADFD:
132 		if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
133 			retval = fdres;
134 		goto out;
135 	}
136 
137 	/*
138 	 * Second, for speed, deal with the subset of cases that
139 	 * require getf() / releasef() but do not require copyin.
140 	 */
141 	if ((fp = getf(fdes)) == NULL) {
142 		error = EBADF;
143 		goto out;
144 	}
145 	iarg = (int)arg;
146 
147 	switch (cmd) {
148 	case F_DUPFD:
149 	case F_DUPFD_CLOEXEC:
150 		p = curproc;
151 		if ((uint_t)iarg >= p->p_fno_ctl) {
152 			if (iarg >= 0)
153 				fd_too_big(p);
154 			error = EINVAL;
155 			goto done;
156 		}
157 		/*
158 		 * We need to increment the f_count reference counter
159 		 * before allocating a new file descriptor.
160 		 * Doing it other way round opens a window for race condition
161 		 * with closeandsetf() on the target file descriptor which can
162 		 * close the file still referenced by the original
163 		 * file descriptor.
164 		 */
165 		mutex_enter(&fp->f_tlock);
166 		fp->f_count++;
167 		mutex_exit(&fp->f_tlock);
168 		if ((retval = ufalloc_file(iarg, fp)) == -1) {
169 			/*
170 			 * New file descriptor can't be allocated.
171 			 * Revert the reference count.
172 			 */
173 			mutex_enter(&fp->f_tlock);
174 			fp->f_count--;
175 			mutex_exit(&fp->f_tlock);
176 			error = EMFILE;
177 		} else {
178 			if (cmd == F_DUPFD_CLOEXEC) {
179 				f_setfd(retval, FD_CLOEXEC);
180 			}
181 		}
182 		goto done;
183 
184 	case F_DUP2FD_CLOEXEC:
185 		if (fdes == iarg) {
186 			error = EINVAL;
187 			goto done;
188 		}
189 
190 		/*FALLTHROUGH*/
191 
192 	case F_DUP2FD:
193 		p = curproc;
194 		if (fdes == iarg) {
195 			retval = iarg;
196 		} else if ((uint_t)iarg >= p->p_fno_ctl) {
197 			if (iarg >= 0)
198 				fd_too_big(p);
199 			error = EBADF;
200 		} else {
201 			/*
202 			 * We can't hold our getf(fdes) across the call to
203 			 * closeandsetf() because it creates a window for
204 			 * deadlock: if one thread is doing dup2(a, b) while
205 			 * another is doing dup2(b, a), each one will block
206 			 * waiting for the other to call releasef().  The
207 			 * solution is to increment the file reference count
208 			 * (which we have to do anyway), then releasef(fdes),
209 			 * then closeandsetf().  Incrementing f_count ensures
210 			 * that fp won't disappear after we call releasef().
211 			 * When closeandsetf() fails, we try avoid calling
212 			 * closef() because of all the side effects.
213 			 */
214 			mutex_enter(&fp->f_tlock);
215 			fp->f_count++;
216 			mutex_exit(&fp->f_tlock);
217 			releasef(fdes);
218 			if ((error = closeandsetf(iarg, fp)) == 0) {
219 				if (cmd == F_DUP2FD_CLOEXEC) {
220 					f_setfd(iarg, FD_CLOEXEC);
221 				}
222 				retval = iarg;
223 			} else {
224 				mutex_enter(&fp->f_tlock);
225 				if (fp->f_count > 1) {
226 					fp->f_count--;
227 					mutex_exit(&fp->f_tlock);
228 				} else {
229 					mutex_exit(&fp->f_tlock);
230 					(void) closef(fp);
231 				}
232 			}
233 			goto out;
234 		}
235 		goto done;
236 
237 	case F_SETFL:
238 		vp = fp->f_vnode;
239 		flag = fp->f_flag;
240 		if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
241 			iarg &= ~FNDELAY;
242 		if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
243 		    0) {
244 			iarg &= FMASK;
245 			mutex_enter(&fp->f_tlock);
246 			fp->f_flag &= ~FMASK | (FREAD|FWRITE);
247 			fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
248 			mutex_exit(&fp->f_tlock);
249 		}
250 		retval = 0;
251 		goto done;
252 	}
253 
254 	/*
255 	 * Finally, deal with the expensive cases.
256 	 */
257 	retval = 0;
258 	in_crit = 0;
259 	maxoffset = MAXOFF_T;
260 	datamodel = DATAMODEL_NATIVE;
261 #if defined(_SYSCALL32_IMPL)
262 	if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
263 		maxoffset = MAXOFF32_T;
264 #endif
265 
266 	vp = fp->f_vnode;
267 	flag = fp->f_flag;
268 	offset = fp->f_offset;
269 
270 	switch (cmd) {
271 	/*
272 	 * The file system and vnode layers understand and implement
273 	 * locking with flock64 structures. So here once we pass through
274 	 * the test for compatibility as defined by LFS API, (for F_SETLK,
275 	 * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
276 	 * F_FREESP) we transform the flock structure to a flock64 structure
277 	 * and send it to the lower layers. Similarly in case of GETLK and
278 	 * OFD_GETLK the returned flock64 structure is transformed to a flock
279 	 * structure if everything fits in nicely, otherwise we return
280 	 * EOVERFLOW.
281 	 */
282 
283 	case F_GETLK:
284 	case F_O_GETLK:
285 	case F_SETLK:
286 	case F_SETLKW:
287 	case F_SETLK_NBMAND:
288 	case F_OFD_GETLK:
289 	case F_OFD_SETLK:
290 	case F_OFD_SETLKW:
291 	case F_FLOCK:
292 	case F_FLOCKW:
293 
294 		/*
295 		 * Copy in input fields only.
296 		 */
297 
298 		if (cmd == F_O_GETLK) {
299 			if (datamodel != DATAMODEL_ILP32) {
300 				error = EINVAL;
301 				break;
302 			}
303 
304 			if (copyin((void *)arg, &obf, sizeof (obf))) {
305 				error = EFAULT;
306 				break;
307 			}
308 			bf.l_type = obf.l_type;
309 			bf.l_whence = obf.l_whence;
310 			bf.l_start = (off64_t)obf.l_start;
311 			bf.l_len = (off64_t)obf.l_len;
312 			bf.l_sysid = (int)obf.l_sysid;
313 			bf.l_pid = obf.l_pid;
314 		} else if (datamodel == DATAMODEL_NATIVE) {
315 			if (copyin((void *)arg, &sbf, sizeof (sbf))) {
316 				error = EFAULT;
317 				break;
318 			}
319 			/*
320 			 * XXX	In an LP64 kernel with an LP64 application
321 			 *	there's no need to do a structure copy here
322 			 *	struct flock == struct flock64. However,
323 			 *	we did it this way to avoid more conditional
324 			 *	compilation.
325 			 */
326 			bf.l_type = sbf.l_type;
327 			bf.l_whence = sbf.l_whence;
328 			bf.l_start = (off64_t)sbf.l_start;
329 			bf.l_len = (off64_t)sbf.l_len;
330 			bf.l_sysid = sbf.l_sysid;
331 			bf.l_pid = sbf.l_pid;
332 		}
333 #if defined(_SYSCALL32_IMPL)
334 		else {
335 			struct flock32 sbf32;
336 			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
337 				error = EFAULT;
338 				break;
339 			}
340 			bf.l_type = sbf32.l_type;
341 			bf.l_whence = sbf32.l_whence;
342 			bf.l_start = (off64_t)sbf32.l_start;
343 			bf.l_len = (off64_t)sbf32.l_len;
344 			bf.l_sysid = sbf32.l_sysid;
345 			bf.l_pid = sbf32.l_pid;
346 		}
347 #endif /* _SYSCALL32_IMPL */
348 
349 		/*
350 		 * 64-bit support: check for overflow for 32-bit lock ops
351 		 */
352 		if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
353 			break;
354 
355 		if (cmd == F_FLOCK || cmd == F_FLOCKW) {
356 			/* FLOCK* locking is always over the entire file. */
357 			if (bf.l_whence != 0 || bf.l_start != 0 ||
358 			    bf.l_len != 0) {
359 				error = EINVAL;
360 				break;
361 			}
362 			if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
363 				error = EINVAL;
364 				break;
365 			}
366 		}
367 
368 		if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
369 		    cmd == F_OFD_SETLKW) {
370 			/*
371 			 * TBD OFD-style locking is currently limited to
372 			 * covering the entire file.
373 			 */
374 			if (bf.l_whence != 0 || bf.l_start != 0 ||
375 			    bf.l_len != 0) {
376 				error = EINVAL;
377 				break;
378 			}
379 		}
380 
381 		/*
382 		 * Not all of the filesystems understand F_O_GETLK, and
383 		 * there's no need for them to know.  Map it to F_GETLK.
384 		 *
385 		 * The *_frlock functions in the various file systems basically
386 		 * do some validation and then funnel everything through the
387 		 * fs_frlock function. For OFD-style locks fs_frlock will do
388 		 * nothing so that once control returns here we can call the
389 		 * ofdlock function with the correct fp. For OFD-style locks
390 		 * the unsupported remote file systems, such as NFS, detect and
391 		 * reject the OFD-style cmd argument.
392 		 */
393 		if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
394 		    &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
395 			break;
396 
397 		if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
398 		    cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
399 			/*
400 			 * This is an OFD-style lock so we need to handle it
401 			 * here. Because OFD-style locks are associated with
402 			 * the file_t we didn't have enough info down the
403 			 * VOP_FRLOCK path immediately above.
404 			 */
405 			if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
406 				break;
407 		}
408 
409 		/*
410 		 * If command is GETLK and no lock is found, only
411 		 * the type field is changed.
412 		 */
413 		if ((cmd == F_O_GETLK || cmd == F_GETLK ||
414 		    cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
415 			/* l_type always first entry, always a short */
416 			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
417 			    sizeof (bf.l_type)))
418 				error = EFAULT;
419 			break;
420 		}
421 
422 		if (cmd == F_O_GETLK) {
423 			/*
424 			 * Return an SVR3 flock structure to the user.
425 			 */
426 			obf.l_type = (int16_t)bf.l_type;
427 			obf.l_whence = (int16_t)bf.l_whence;
428 			obf.l_start = (int32_t)bf.l_start;
429 			obf.l_len = (int32_t)bf.l_len;
430 			if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
431 				/*
432 				 * One or both values for the above fields
433 				 * is too large to store in an SVR3 flock
434 				 * structure.
435 				 */
436 				error = EOVERFLOW;
437 				break;
438 			}
439 			obf.l_sysid = (int16_t)bf.l_sysid;
440 			obf.l_pid = (int16_t)bf.l_pid;
441 			if (copyout(&obf, (void *)arg, sizeof (obf)))
442 				error = EFAULT;
443 		} else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
444 			/*
445 			 * Copy out SVR4 flock.
446 			 */
447 			int i;
448 
449 			if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
450 				error = EOVERFLOW;
451 				break;
452 			}
453 
454 			if (datamodel == DATAMODEL_NATIVE) {
455 				for (i = 0; i < 4; i++)
456 					sbf.l_pad[i] = 0;
457 				/*
458 				 * XXX	In an LP64 kernel with an LP64
459 				 *	application there's no need to do a
460 				 *	structure copy here as currently
461 				 *	struct flock == struct flock64.
462 				 *	We did it this way to avoid more
463 				 *	conditional compilation.
464 				 */
465 				sbf.l_type = bf.l_type;
466 				sbf.l_whence = bf.l_whence;
467 				sbf.l_start = (off_t)bf.l_start;
468 				sbf.l_len = (off_t)bf.l_len;
469 				sbf.l_sysid = bf.l_sysid;
470 				sbf.l_pid = bf.l_pid;
471 				if (copyout(&sbf, (void *)arg, sizeof (sbf)))
472 					error = EFAULT;
473 			}
474 #if defined(_SYSCALL32_IMPL)
475 			else {
476 				struct flock32 sbf32;
477 				if (bf.l_start > MAXOFF32_T ||
478 				    bf.l_len > MAXOFF32_T) {
479 					error = EOVERFLOW;
480 					break;
481 				}
482 				for (i = 0; i < 4; i++)
483 					sbf32.l_pad[i] = 0;
484 				sbf32.l_type = (int16_t)bf.l_type;
485 				sbf32.l_whence = (int16_t)bf.l_whence;
486 				sbf32.l_start = (off32_t)bf.l_start;
487 				sbf32.l_len = (off32_t)bf.l_len;
488 				sbf32.l_sysid = (int32_t)bf.l_sysid;
489 				sbf32.l_pid = (pid32_t)bf.l_pid;
490 				if (copyout(&sbf32,
491 				    (void *)arg, sizeof (sbf32)))
492 					error = EFAULT;
493 			}
494 #endif
495 		}
496 		break;
497 
498 	case F_CHKFL:
499 		/*
500 		 * This is for internal use only, to allow the vnode layer
501 		 * to validate a flags setting before applying it.  User
502 		 * programs can't issue it.
503 		 */
504 		error = EINVAL;
505 		break;
506 
507 	case F_ALLOCSP:
508 	case F_FREESP:
509 	case F_ALLOCSP64:
510 	case F_FREESP64:
511 		/*
512 		 * Test for not-a-regular-file (and returning EINVAL)
513 		 * before testing for open-for-writing (and returning EBADF).
514 		 * This is relied upon by posix_fallocate() in libc.
515 		 */
516 		if (vp->v_type != VREG) {
517 			error = EINVAL;
518 			break;
519 		}
520 
521 		if ((flag & FWRITE) == 0) {
522 			error = EBADF;
523 			break;
524 		}
525 
526 		if (datamodel != DATAMODEL_ILP32 &&
527 		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
528 			error = EINVAL;
529 			break;
530 		}
531 
532 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
533 		if (datamodel == DATAMODEL_ILP32 &&
534 		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
535 			struct flock32 sbf32;
536 			/*
537 			 * For compatibility we overlay an SVR3 flock on an SVR4
538 			 * flock.  This works because the input field offsets
539 			 * in "struct flock" were preserved.
540 			 */
541 			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
542 				error = EFAULT;
543 				break;
544 			} else {
545 				bf.l_type = sbf32.l_type;
546 				bf.l_whence = sbf32.l_whence;
547 				bf.l_start = (off64_t)sbf32.l_start;
548 				bf.l_len = (off64_t)sbf32.l_len;
549 				bf.l_sysid = sbf32.l_sysid;
550 				bf.l_pid = sbf32.l_pid;
551 			}
552 		}
553 #endif /* _ILP32 || _SYSCALL32_IMPL */
554 
555 #if defined(_LP64)
556 		if (datamodel == DATAMODEL_LP64 &&
557 		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
558 			if (copyin((void *)arg, &bf, sizeof (bf))) {
559 				error = EFAULT;
560 				break;
561 			}
562 		}
563 #endif /* defined(_LP64) */
564 
565 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
566 		if (datamodel == DATAMODEL_ILP32 &&
567 		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
568 			if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
569 				error = EFAULT;
570 				break;
571 			} else {
572 				/*
573 				 * Note that the size of flock64 is different in
574 				 * the ILP32 and LP64 models, due to the l_pad
575 				 * field. We do not want to assume that the
576 				 * flock64 structure is laid out the same in
577 				 * ILP32 and LP64 environments, so we will
578 				 * copy in the ILP32 version of flock64
579 				 * explicitly and copy it to the native
580 				 * flock64 structure.
581 				 */
582 				bf.l_type = (short)bf64_32.l_type;
583 				bf.l_whence = (short)bf64_32.l_whence;
584 				bf.l_start = bf64_32.l_start;
585 				bf.l_len = bf64_32.l_len;
586 				bf.l_sysid = (int)bf64_32.l_sysid;
587 				bf.l_pid = (pid_t)bf64_32.l_pid;
588 			}
589 		}
590 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
591 
592 		if (cmd == F_ALLOCSP || cmd == F_FREESP)
593 			error = flock_check(vp, &bf, offset, maxoffset);
594 		else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
595 			error = flock_check(vp, &bf, offset, MAXOFFSET_T);
596 		if (error)
597 			break;
598 
599 		if (vp->v_type == VREG && bf.l_len == 0 &&
600 		    bf.l_start > OFFSET_MAX(fp)) {
601 			error = EFBIG;
602 			break;
603 		}
604 
605 		/*
606 		 * Make sure that there are no conflicting non-blocking
607 		 * mandatory locks in the region being manipulated. If
608 		 * there are such locks then return EACCES.
609 		 */
610 		if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
611 			break;
612 
613 		if (nbl_need_check(vp)) {
614 			u_offset_t	begin;
615 			ssize_t		length;
616 
617 			nbl_start_crit(vp, RW_READER);
618 			in_crit = 1;
619 			vattr.va_mask = AT_SIZE;
620 			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
621 			    != 0)
622 				break;
623 			begin = start > vattr.va_size ? vattr.va_size : start;
624 			length = vattr.va_size > start ? vattr.va_size - start :
625 			    start - vattr.va_size;
626 			if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
627 			    NULL)) {
628 				error = EACCES;
629 				break;
630 			}
631 		}
632 
633 		if (cmd == F_ALLOCSP64)
634 			cmd = F_ALLOCSP;
635 		else if (cmd == F_FREESP64)
636 			cmd = F_FREESP;
637 
638 		error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
639 
640 		break;
641 
642 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
643 	case F_GETLK64:
644 	case F_SETLK64:
645 	case F_SETLKW64:
646 	case F_SETLK64_NBMAND:
647 	case F_OFD_GETLK64:
648 	case F_OFD_SETLK64:
649 	case F_OFD_SETLKW64:
650 	case F_FLOCK64:
651 	case F_FLOCKW64:
652 		/*
653 		 * Large Files: Here we set cmd as *LK and send it to
654 		 * lower layers. *LK64 is only for the user land.
655 		 * Most of the comments described above for F_SETLK
656 		 * applies here too.
657 		 * Large File support is only needed for ILP32 apps!
658 		 */
659 		if (datamodel != DATAMODEL_ILP32) {
660 			error = EINVAL;
661 			break;
662 		}
663 
664 		if (cmd == F_GETLK64)
665 			cmd = F_GETLK;
666 		else if (cmd == F_SETLK64)
667 			cmd = F_SETLK;
668 		else if (cmd == F_SETLKW64)
669 			cmd = F_SETLKW;
670 		else if (cmd == F_SETLK64_NBMAND)
671 			cmd = F_SETLK_NBMAND;
672 		else if (cmd == F_OFD_GETLK64)
673 			cmd = F_OFD_GETLK;
674 		else if (cmd == F_OFD_SETLK64)
675 			cmd = F_OFD_SETLK;
676 		else if (cmd == F_OFD_SETLKW64)
677 			cmd = F_OFD_SETLKW;
678 		else if (cmd == F_FLOCK64)
679 			cmd = F_FLOCK;
680 		else if (cmd == F_FLOCKW64)
681 			cmd = F_FLOCKW;
682 
683 		/*
684 		 * Note that the size of flock64 is different in the ILP32
685 		 * and LP64 models, due to the sucking l_pad field.
686 		 * We do not want to assume that the flock64 structure is
687 		 * laid out in the same in ILP32 and LP64 environments, so
688 		 * we will copy in the ILP32 version of flock64 explicitly
689 		 * and copy it to the native flock64 structure.
690 		 */
691 
692 		if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
693 			error = EFAULT;
694 			break;
695 		}
696 
697 		bf.l_type = (short)bf64_32.l_type;
698 		bf.l_whence = (short)bf64_32.l_whence;
699 		bf.l_start = bf64_32.l_start;
700 		bf.l_len = bf64_32.l_len;
701 		bf.l_sysid = (int)bf64_32.l_sysid;
702 		bf.l_pid = (pid_t)bf64_32.l_pid;
703 
704 		if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
705 			break;
706 
707 		if (cmd == F_FLOCK || cmd == F_FLOCKW) {
708 			/* FLOCK* locking is always over the entire file. */
709 			if (bf.l_whence != 0 || bf.l_start != 0 ||
710 			    bf.l_len != 0) {
711 				error = EINVAL;
712 				break;
713 			}
714 			if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
715 				error = EINVAL;
716 				break;
717 			}
718 		}
719 
720 		if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
721 		    cmd == F_OFD_SETLKW) {
722 			/*
723 			 * TBD OFD-style locking is currently limited to
724 			 * covering the entire file.
725 			 */
726 			if (bf.l_whence != 0 || bf.l_start != 0 ||
727 			    bf.l_len != 0) {
728 				error = EINVAL;
729 				break;
730 			}
731 		}
732 
733 		/*
734 		 * The *_frlock functions in the various file systems basically
735 		 * do some validation and then funnel everything through the
736 		 * fs_frlock function. For OFD-style locks fs_frlock will do
737 		 * nothing so that once control returns here we can call the
738 		 * ofdlock function with the correct fp. For OFD-style locks
739 		 * the unsupported remote file systems, such as NFS, detect and
740 		 * reject the OFD-style cmd argument.
741 		 */
742 		if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
743 		    NULL, fp->f_cred, NULL)) != 0)
744 			break;
745 
746 		if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
747 		    cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
748 			/*
749 			 * This is an OFD-style lock so we need to handle it
750 			 * here. Because OFD-style locks are associated with
751 			 * the file_t we didn't have enough info down the
752 			 * VOP_FRLOCK path immediately above.
753 			 */
754 			if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
755 				break;
756 		}
757 
758 		if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
759 		    bf.l_type == F_UNLCK) {
760 			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
761 			    sizeof (bf.l_type)))
762 				error = EFAULT;
763 			break;
764 		}
765 
766 		if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
767 			int i;
768 
769 			/*
770 			 * We do not want to assume that the flock64 structure
771 			 * is laid out in the same in ILP32 and LP64
772 			 * environments, so we will copy out the ILP32 version
773 			 * of flock64 explicitly after copying the native
774 			 * flock64 structure to it.
775 			 */
776 			for (i = 0; i < 4; i++)
777 				bf64_32.l_pad[i] = 0;
778 			bf64_32.l_type = (int16_t)bf.l_type;
779 			bf64_32.l_whence = (int16_t)bf.l_whence;
780 			bf64_32.l_start = bf.l_start;
781 			bf64_32.l_len = bf.l_len;
782 			bf64_32.l_sysid = (int32_t)bf.l_sysid;
783 			bf64_32.l_pid = (pid32_t)bf.l_pid;
784 			if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
785 				error = EFAULT;
786 		}
787 		break;
788 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
789 
790 	case F_SHARE:
791 	case F_SHARE_NBMAND:
792 	case F_UNSHARE:
793 
794 		/*
795 		 * Copy in input fields only.
796 		 */
797 		if (copyin((void *)arg, &fsh, sizeof (fsh))) {
798 			error = EFAULT;
799 			break;
800 		}
801 
802 		/*
803 		 * Local share reservations always have this simple form
804 		 */
805 		shr.s_access = fsh.f_access;
806 		shr.s_deny = fsh.f_deny;
807 		shr.s_sysid = 0;
808 		shr.s_pid = ttoproc(curthread)->p_pid;
809 		shr_own.sl_pid = shr.s_pid;
810 		shr_own.sl_id = fsh.f_id;
811 		shr.s_own_len = sizeof (shr_own);
812 		shr.s_owner = (caddr_t)&shr_own;
813 		error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
814 		break;
815 
816 	default:
817 		error = EINVAL;
818 		break;
819 	}
820 
821 	if (in_crit)
822 		nbl_end_crit(vp);
823 
824 done:
825 	releasef(fdes);
826 out:
827 	if (error)
828 		return (set_errno(error));
829 	return (retval);
830 }
831 
832 int
833 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
834 {
835 	struct vattr	vattr;
836 	int	error;
837 	u_offset_t start, end;
838 
839 	/*
840 	 * Determine the starting point of the request
841 	 */
842 	switch (flp->l_whence) {
843 	case 0:		/* SEEK_SET */
844 		start = (u_offset_t)flp->l_start;
845 		if (start > max)
846 			return (EINVAL);
847 		break;
848 	case 1:		/* SEEK_CUR */
849 		if (flp->l_start > (max - offset))
850 			return (EOVERFLOW);
851 		start = (u_offset_t)(flp->l_start + offset);
852 		if (start > max)
853 			return (EINVAL);
854 		break;
855 	case 2:		/* SEEK_END */
856 		vattr.va_mask = AT_SIZE;
857 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
858 			return (error);
859 		if (flp->l_start > (max - (offset_t)vattr.va_size))
860 			return (EOVERFLOW);
861 		start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
862 		if (start > max)
863 			return (EINVAL);
864 		break;
865 	default:
866 		return (EINVAL);
867 	}
868 
869 	/*
870 	 * Determine the range covered by the request.
871 	 */
872 	if (flp->l_len == 0)
873 		end = MAXEND;
874 	else if ((offset_t)flp->l_len > 0) {
875 		if (flp->l_len > (max - start + 1))
876 			return (EOVERFLOW);
877 		end = (u_offset_t)(start + (flp->l_len - 1));
878 		ASSERT(end <= max);
879 	} else {
880 		/*
881 		 * Negative length; why do we even allow this ?
882 		 * Because this allows easy specification of
883 		 * the last n bytes of the file.
884 		 */
885 		end = start;
886 		start += (u_offset_t)flp->l_len;
887 		(start)++;
888 		if (start > max)
889 			return (EINVAL);
890 		ASSERT(end <= max);
891 	}
892 	ASSERT(start <= max);
893 	if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
894 	    end == (offset_t)max) {
895 		flp->l_len = 0;
896 	}
897 	if (start  > end)
898 		return (EINVAL);
899 	return (0);
900 }
901 
902 static int
903 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
904 {
905 	struct vattr	vattr;
906 	int	error;
907 
908 	/*
909 	 * Determine the starting point of the request. Assume that it is
910 	 * a valid starting point.
911 	 */
912 	switch (flp->l_whence) {
913 	case 0:		/* SEEK_SET */
914 		*start = (u_offset_t)flp->l_start;
915 		break;
916 	case 1:		/* SEEK_CUR */
917 		*start = (u_offset_t)(flp->l_start + offset);
918 		break;
919 	case 2:		/* SEEK_END */
920 		vattr.va_mask = AT_SIZE;
921 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
922 			return (error);
923 		*start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
924 		break;
925 	default:
926 		return (EINVAL);
927 	}
928 
929 	return (0);
930 }
931 
932 /*
933  * Take rctl action when the requested file descriptor is too big.
934  */
935 static void
936 fd_too_big(proc_t *p)
937 {
938 	mutex_enter(&p->p_lock);
939 	(void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
940 	    p->p_rctls, p, RCA_SAFE);
941 	mutex_exit(&p->p_lock);
942 }
943