xref: /titanic_52/usr/src/uts/common/syscall/fcntl.c (revision 4c1177a46d4d850e30806d4e27d635527bba8e90)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25  * Copyright 2015, Joyent, Inc.
26  */
27 
28 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 /*
32  * Portions of this source code were derived from Berkeley 4.3 BSD
33  * under license from the Regents of the University of California.
34  */
35 
36 
37 #include <sys/param.h>
38 #include <sys/isa_defs.h>
39 #include <sys/types.h>
40 #include <sys/sysmacros.h>
41 #include <sys/systm.h>
42 #include <sys/errno.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/vnode.h>
46 #include <sys/file.h>
47 #include <sys/mode.h>
48 #include <sys/proc.h>
49 #include <sys/filio.h>
50 #include <sys/share.h>
51 #include <sys/debug.h>
52 #include <sys/rctl.h>
53 #include <sys/nbmlock.h>
54 
55 #include <sys/cmn_err.h>
56 
57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
59 static void fd_too_big(proc_t *);
60 
61 /*
62  * File control.
63  */
64 int
65 fcntl(int fdes, int cmd, intptr_t arg)
66 {
67 	int iarg;
68 	int error = 0;
69 	int retval;
70 	proc_t *p;
71 	file_t *fp;
72 	vnode_t *vp;
73 	u_offset_t offset;
74 	u_offset_t start;
75 	struct vattr vattr;
76 	int in_crit;
77 	int flag;
78 	struct flock sbf;
79 	struct flock64 bf;
80 	struct o_flock obf;
81 	struct flock64_32 bf64_32;
82 	struct fshare fsh;
83 	struct shrlock shr;
84 	struct shr_locowner shr_own;
85 	offset_t maxoffset;
86 	model_t datamodel;
87 	int fdres;
88 
89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
90 	ASSERT(sizeof (struct flock) == sizeof (struct flock32));
91 	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
92 #endif
93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
94 	ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
95 	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
96 #endif
97 
98 	/*
99 	 * First, for speed, deal with the subset of cases
100 	 * that do not require getf() / releasef().
101 	 */
102 	switch (cmd) {
103 	case F_GETFD:
104 		if ((error = f_getfd_error(fdes, &flag)) == 0)
105 			retval = flag;
106 		goto out;
107 
108 	case F_SETFD:
109 		error = f_setfd_error(fdes, (int)arg);
110 		retval = 0;
111 		goto out;
112 
113 	case F_GETFL:
114 		if ((error = f_getfl(fdes, &flag)) == 0) {
115 			retval = (flag & (FMASK | FASYNC));
116 			if ((flag & (FSEARCH | FEXEC)) == 0)
117 				retval += FOPEN;
118 			else
119 				retval |= (flag & (FSEARCH | FEXEC));
120 		}
121 		goto out;
122 
123 	case F_GETXFL:
124 		if ((error = f_getfl(fdes, &flag)) == 0) {
125 			retval = flag;
126 			if ((flag & (FSEARCH | FEXEC)) == 0)
127 				retval += FOPEN;
128 		}
129 		goto out;
130 
131 	case F_BADFD:
132 		if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
133 			retval = fdres;
134 		goto out;
135 	}
136 
137 	/*
138 	 * Second, for speed, deal with the subset of cases that
139 	 * require getf() / releasef() but do not require copyin.
140 	 */
141 	if ((fp = getf(fdes)) == NULL) {
142 		error = EBADF;
143 		goto out;
144 	}
145 	iarg = (int)arg;
146 
147 	switch (cmd) {
148 	case F_DUPFD:
149 	case F_DUPFD_CLOEXEC:
150 		p = curproc;
151 		if ((uint_t)iarg >= p->p_fno_ctl) {
152 			if (iarg >= 0)
153 				fd_too_big(p);
154 			error = EINVAL;
155 			goto done;
156 		}
157 		/*
158 		 * We need to increment the f_count reference counter
159 		 * before allocating a new file descriptor.
160 		 * Doing it other way round opens a window for race condition
161 		 * with closeandsetf() on the target file descriptor which can
162 		 * close the file still referenced by the original
163 		 * file descriptor.
164 		 */
165 		mutex_enter(&fp->f_tlock);
166 		fp->f_count++;
167 		mutex_exit(&fp->f_tlock);
168 		if ((retval = ufalloc_file(iarg, fp)) == -1) {
169 			/*
170 			 * New file descriptor can't be allocated.
171 			 * Revert the reference count.
172 			 */
173 			mutex_enter(&fp->f_tlock);
174 			fp->f_count--;
175 			mutex_exit(&fp->f_tlock);
176 			error = EMFILE;
177 		} else {
178 			if (cmd == F_DUPFD_CLOEXEC) {
179 				f_setfd(retval, FD_CLOEXEC);
180 			}
181 		}
182 		goto done;
183 
184 	case F_DUP2FD_CLOEXEC:
185 		if (fdes == iarg) {
186 			error = EINVAL;
187 			goto done;
188 		}
189 
190 		/*FALLTHROUGH*/
191 
192 	case F_DUP2FD:
193 		p = curproc;
194 		if (fdes == iarg) {
195 			retval = iarg;
196 		} else if ((uint_t)iarg >= p->p_fno_ctl) {
197 			if (iarg >= 0)
198 				fd_too_big(p);
199 			error = EBADF;
200 		} else {
201 			/*
202 			 * We can't hold our getf(fdes) across the call to
203 			 * closeandsetf() because it creates a window for
204 			 * deadlock: if one thread is doing dup2(a, b) while
205 			 * another is doing dup2(b, a), each one will block
206 			 * waiting for the other to call releasef().  The
207 			 * solution is to increment the file reference count
208 			 * (which we have to do anyway), then releasef(fdes),
209 			 * then closeandsetf().  Incrementing f_count ensures
210 			 * that fp won't disappear after we call releasef().
211 			 * When closeandsetf() fails, we try avoid calling
212 			 * closef() because of all the side effects.
213 			 */
214 			mutex_enter(&fp->f_tlock);
215 			fp->f_count++;
216 			mutex_exit(&fp->f_tlock);
217 			releasef(fdes);
218 			if ((error = closeandsetf(iarg, fp)) == 0) {
219 				if (cmd == F_DUP2FD_CLOEXEC) {
220 					f_setfd(iarg, FD_CLOEXEC);
221 				}
222 				retval = iarg;
223 			} else {
224 				mutex_enter(&fp->f_tlock);
225 				if (fp->f_count > 1) {
226 					fp->f_count--;
227 					mutex_exit(&fp->f_tlock);
228 				} else {
229 					mutex_exit(&fp->f_tlock);
230 					(void) closef(fp);
231 				}
232 			}
233 			goto out;
234 		}
235 		goto done;
236 
237 	case F_SETFL:
238 		vp = fp->f_vnode;
239 		flag = fp->f_flag;
240 		if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
241 			iarg &= ~FNDELAY;
242 		if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
243 		    0) {
244 			iarg &= FMASK;
245 			mutex_enter(&fp->f_tlock);
246 			fp->f_flag &= ~FMASK | (FREAD|FWRITE);
247 			fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
248 			mutex_exit(&fp->f_tlock);
249 		}
250 		retval = 0;
251 		goto done;
252 	}
253 
254 	/*
255 	 * Finally, deal with the expensive cases.
256 	 */
257 	retval = 0;
258 	in_crit = 0;
259 	maxoffset = MAXOFF_T;
260 	datamodel = DATAMODEL_NATIVE;
261 #if defined(_SYSCALL32_IMPL)
262 	if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
263 		maxoffset = MAXOFF32_T;
264 #endif
265 
266 	vp = fp->f_vnode;
267 	flag = fp->f_flag;
268 	offset = fp->f_offset;
269 
270 	switch (cmd) {
271 	/*
272 	 * The file system and vnode layers understand and implement
273 	 * locking with flock64 structures. So here once we pass through
274 	 * the test for compatibility as defined by LFS API, (for F_SETLK,
275 	 * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
276 	 * F_FREESP) we transform the flock structure to a flock64 structure
277 	 * and send it to the lower layers. Similarly in case of GETLK and
278 	 * OFD_GETLK the returned flock64 structure is transformed to a flock
279 	 * structure if everything fits in nicely, otherwise we return
280 	 * EOVERFLOW.
281 	 */
282 
283 	case F_GETLK:
284 	case F_O_GETLK:
285 	case F_SETLK:
286 	case F_SETLKW:
287 	case F_SETLK_NBMAND:
288 	case F_OFD_GETLK:
289 	case F_OFD_SETLK:
290 	case F_OFD_SETLKW:
291 	case F_FLOCK:
292 	case F_FLOCKW:
293 
294 		/*
295 		 * Copy in input fields only.
296 		 */
297 
298 		if (cmd == F_O_GETLK) {
299 			if (datamodel != DATAMODEL_ILP32) {
300 				error = EINVAL;
301 				break;
302 			}
303 
304 			if (copyin((void *)arg, &obf, sizeof (obf))) {
305 				error = EFAULT;
306 				break;
307 			}
308 			bf.l_type = obf.l_type;
309 			bf.l_whence = obf.l_whence;
310 			bf.l_start = (off64_t)obf.l_start;
311 			bf.l_len = (off64_t)obf.l_len;
312 			bf.l_sysid = (int)obf.l_sysid;
313 			bf.l_pid = obf.l_pid;
314 		} else if (datamodel == DATAMODEL_NATIVE) {
315 			if (copyin((void *)arg, &sbf, sizeof (sbf))) {
316 				error = EFAULT;
317 				break;
318 			}
319 			/*
320 			 * XXX	In an LP64 kernel with an LP64 application
321 			 *	there's no need to do a structure copy here
322 			 *	struct flock == struct flock64. However,
323 			 *	we did it this way to avoid more conditional
324 			 *	compilation.
325 			 */
326 			bf.l_type = sbf.l_type;
327 			bf.l_whence = sbf.l_whence;
328 			bf.l_start = (off64_t)sbf.l_start;
329 			bf.l_len = (off64_t)sbf.l_len;
330 			bf.l_sysid = sbf.l_sysid;
331 			bf.l_pid = sbf.l_pid;
332 		}
333 #if defined(_SYSCALL32_IMPL)
334 		else {
335 			struct flock32 sbf32;
336 			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
337 				error = EFAULT;
338 				break;
339 			}
340 			bf.l_type = sbf32.l_type;
341 			bf.l_whence = sbf32.l_whence;
342 			bf.l_start = (off64_t)sbf32.l_start;
343 			bf.l_len = (off64_t)sbf32.l_len;
344 			bf.l_sysid = sbf32.l_sysid;
345 			bf.l_pid = sbf32.l_pid;
346 		}
347 #endif /* _SYSCALL32_IMPL */
348 
349 		/*
350 		 * 64-bit support: check for overflow for 32-bit lock ops
351 		 */
352 		if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
353 			break;
354 
355 		if (cmd == F_FLOCK || cmd == F_FLOCKW) {
356 			/* FLOCK* locking is always over the entire file. */
357 			if (bf.l_whence != 0 || bf.l_start != 0 ||
358 			    bf.l_len != 0) {
359 				error = EINVAL;
360 				break;
361 			}
362 			if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
363 				error = EINVAL;
364 				break;
365 			}
366 		}
367 
368 		if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
369 			/*
370 			 * TBD OFD-style locking is currently limited to
371 			 * covering the entire file.
372 			 */
373 			if (bf.l_whence != 0 || bf.l_start != 0 ||
374 			    bf.l_len != 0) {
375 				error = EINVAL;
376 				break;
377 			}
378 		}
379 
380 		/*
381 		 * Not all of the filesystems understand F_O_GETLK, and
382 		 * there's no need for them to know.  Map it to F_GETLK.
383 		 *
384 		 * The *_frlock functions in the various file systems basically
385 		 * do some validation and then funnel everything through the
386 		 * fs_frlock function. For OFD-style locks fs_frlock will do
387 		 * nothing so that once control returns here we can call the
388 		 * ofdlock function with the correct fp. For OFD-style locks
389 		 * the unsupported remote file systems, such as NFS, detect and
390 		 * reject the OFD-style cmd argument.
391 		 */
392 		if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
393 		    &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
394 			break;
395 
396 		if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
397 		    cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
398 			/*
399 			 * This is an OFD-style lock so we need to handle it
400 			 * here. Because OFD-style locks are associated with
401 			 * the file_t we didn't have enough info down the
402 			 * VOP_FRLOCK path immediately above.
403 			 */
404 			if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
405 				break;
406 		}
407 
408 		/*
409 		 * If command is GETLK and no lock is found, only
410 		 * the type field is changed.
411 		 */
412 		if ((cmd == F_O_GETLK || cmd == F_GETLK ||
413 		    cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
414 			/* l_type always first entry, always a short */
415 			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
416 			    sizeof (bf.l_type)))
417 				error = EFAULT;
418 			break;
419 		}
420 
421 		if (cmd == F_O_GETLK) {
422 			/*
423 			 * Return an SVR3 flock structure to the user.
424 			 */
425 			obf.l_type = (int16_t)bf.l_type;
426 			obf.l_whence = (int16_t)bf.l_whence;
427 			obf.l_start = (int32_t)bf.l_start;
428 			obf.l_len = (int32_t)bf.l_len;
429 			if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
430 				/*
431 				 * One or both values for the above fields
432 				 * is too large to store in an SVR3 flock
433 				 * structure.
434 				 */
435 				error = EOVERFLOW;
436 				break;
437 			}
438 			obf.l_sysid = (int16_t)bf.l_sysid;
439 			obf.l_pid = (int16_t)bf.l_pid;
440 			if (copyout(&obf, (void *)arg, sizeof (obf)))
441 				error = EFAULT;
442 		} else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
443 			/*
444 			 * Copy out SVR4 flock.
445 			 */
446 			int i;
447 
448 			if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
449 				error = EOVERFLOW;
450 				break;
451 			}
452 
453 			if (datamodel == DATAMODEL_NATIVE) {
454 				for (i = 0; i < 4; i++)
455 					sbf.l_pad[i] = 0;
456 				/*
457 				 * XXX	In an LP64 kernel with an LP64
458 				 *	application there's no need to do a
459 				 *	structure copy here as currently
460 				 *	struct flock == struct flock64.
461 				 *	We did it this way to avoid more
462 				 *	conditional compilation.
463 				 */
464 				sbf.l_type = bf.l_type;
465 				sbf.l_whence = bf.l_whence;
466 				sbf.l_start = (off_t)bf.l_start;
467 				sbf.l_len = (off_t)bf.l_len;
468 				sbf.l_sysid = bf.l_sysid;
469 				sbf.l_pid = bf.l_pid;
470 				if (copyout(&sbf, (void *)arg, sizeof (sbf)))
471 					error = EFAULT;
472 			}
473 #if defined(_SYSCALL32_IMPL)
474 			else {
475 				struct flock32 sbf32;
476 				if (bf.l_start > MAXOFF32_T ||
477 				    bf.l_len > MAXOFF32_T) {
478 					error = EOVERFLOW;
479 					break;
480 				}
481 				for (i = 0; i < 4; i++)
482 					sbf32.l_pad[i] = 0;
483 				sbf32.l_type = (int16_t)bf.l_type;
484 				sbf32.l_whence = (int16_t)bf.l_whence;
485 				sbf32.l_start = (off32_t)bf.l_start;
486 				sbf32.l_len = (off32_t)bf.l_len;
487 				sbf32.l_sysid = (int32_t)bf.l_sysid;
488 				sbf32.l_pid = (pid32_t)bf.l_pid;
489 				if (copyout(&sbf32,
490 				    (void *)arg, sizeof (sbf32)))
491 					error = EFAULT;
492 			}
493 #endif
494 		}
495 		break;
496 
497 	case F_CHKFL:
498 		/*
499 		 * This is for internal use only, to allow the vnode layer
500 		 * to validate a flags setting before applying it.  User
501 		 * programs can't issue it.
502 		 */
503 		error = EINVAL;
504 		break;
505 
506 	case F_ALLOCSP:
507 	case F_FREESP:
508 	case F_ALLOCSP64:
509 	case F_FREESP64:
510 		/*
511 		 * Test for not-a-regular-file (and returning EINVAL)
512 		 * before testing for open-for-writing (and returning EBADF).
513 		 * This is relied upon by posix_fallocate() in libc.
514 		 */
515 		if (vp->v_type != VREG) {
516 			error = EINVAL;
517 			break;
518 		}
519 
520 		if ((flag & FWRITE) == 0) {
521 			error = EBADF;
522 			break;
523 		}
524 
525 		if (datamodel != DATAMODEL_ILP32 &&
526 		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
527 			error = EINVAL;
528 			break;
529 		}
530 
531 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
532 		if (datamodel == DATAMODEL_ILP32 &&
533 		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
534 			struct flock32 sbf32;
535 			/*
536 			 * For compatibility we overlay an SVR3 flock on an SVR4
537 			 * flock.  This works because the input field offsets
538 			 * in "struct flock" were preserved.
539 			 */
540 			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
541 				error = EFAULT;
542 				break;
543 			} else {
544 				bf.l_type = sbf32.l_type;
545 				bf.l_whence = sbf32.l_whence;
546 				bf.l_start = (off64_t)sbf32.l_start;
547 				bf.l_len = (off64_t)sbf32.l_len;
548 				bf.l_sysid = sbf32.l_sysid;
549 				bf.l_pid = sbf32.l_pid;
550 			}
551 		}
552 #endif /* _ILP32 || _SYSCALL32_IMPL */
553 
554 #if defined(_LP64)
555 		if (datamodel == DATAMODEL_LP64 &&
556 		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
557 			if (copyin((void *)arg, &bf, sizeof (bf))) {
558 				error = EFAULT;
559 				break;
560 			}
561 		}
562 #endif /* defined(_LP64) */
563 
564 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
565 		if (datamodel == DATAMODEL_ILP32 &&
566 		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
567 			if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
568 				error = EFAULT;
569 				break;
570 			} else {
571 				/*
572 				 * Note that the size of flock64 is different in
573 				 * the ILP32 and LP64 models, due to the l_pad
574 				 * field. We do not want to assume that the
575 				 * flock64 structure is laid out the same in
576 				 * ILP32 and LP64 environments, so we will
577 				 * copy in the ILP32 version of flock64
578 				 * explicitly and copy it to the native
579 				 * flock64 structure.
580 				 */
581 				bf.l_type = (short)bf64_32.l_type;
582 				bf.l_whence = (short)bf64_32.l_whence;
583 				bf.l_start = bf64_32.l_start;
584 				bf.l_len = bf64_32.l_len;
585 				bf.l_sysid = (int)bf64_32.l_sysid;
586 				bf.l_pid = (pid_t)bf64_32.l_pid;
587 			}
588 		}
589 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
590 
591 		if (cmd == F_ALLOCSP || cmd == F_FREESP)
592 			error = flock_check(vp, &bf, offset, maxoffset);
593 		else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
594 			error = flock_check(vp, &bf, offset, MAXOFFSET_T);
595 		if (error)
596 			break;
597 
598 		if (vp->v_type == VREG && bf.l_len == 0 &&
599 		    bf.l_start > OFFSET_MAX(fp)) {
600 			error = EFBIG;
601 			break;
602 		}
603 
604 		/*
605 		 * Make sure that there are no conflicting non-blocking
606 		 * mandatory locks in the region being manipulated. If
607 		 * there are such locks then return EACCES.
608 		 */
609 		if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
610 			break;
611 
612 		if (nbl_need_check(vp)) {
613 			u_offset_t	begin;
614 			ssize_t		length;
615 
616 			nbl_start_crit(vp, RW_READER);
617 			in_crit = 1;
618 			vattr.va_mask = AT_SIZE;
619 			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
620 			    != 0)
621 				break;
622 			begin = start > vattr.va_size ? vattr.va_size : start;
623 			length = vattr.va_size > start ? vattr.va_size - start :
624 			    start - vattr.va_size;
625 			if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
626 			    NULL)) {
627 				error = EACCES;
628 				break;
629 			}
630 		}
631 
632 		if (cmd == F_ALLOCSP64)
633 			cmd = F_ALLOCSP;
634 		else if (cmd == F_FREESP64)
635 			cmd = F_FREESP;
636 
637 		error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
638 
639 		break;
640 
641 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
642 	case F_GETLK64:
643 	case F_SETLK64:
644 	case F_SETLKW64:
645 	case F_SETLK64_NBMAND:
646 	case F_OFD_GETLK64:
647 	case F_OFD_SETLK64:
648 	case F_OFD_SETLKW64:
649 	case F_FLOCK64:
650 	case F_FLOCKW64:
651 		/*
652 		 * Large Files: Here we set cmd as *LK and send it to
653 		 * lower layers. *LK64 is only for the user land.
654 		 * Most of the comments described above for F_SETLK
655 		 * applies here too.
656 		 * Large File support is only needed for ILP32 apps!
657 		 */
658 		if (datamodel != DATAMODEL_ILP32) {
659 			error = EINVAL;
660 			break;
661 		}
662 
663 		if (cmd == F_GETLK64)
664 			cmd = F_GETLK;
665 		else if (cmd == F_SETLK64)
666 			cmd = F_SETLK;
667 		else if (cmd == F_SETLKW64)
668 			cmd = F_SETLKW;
669 		else if (cmd == F_SETLK64_NBMAND)
670 			cmd = F_SETLK_NBMAND;
671 		else if (cmd == F_OFD_GETLK64)
672 			cmd = F_OFD_GETLK;
673 		else if (cmd == F_OFD_SETLK64)
674 			cmd = F_OFD_SETLK;
675 		else if (cmd == F_OFD_SETLKW64)
676 			cmd = F_OFD_SETLKW;
677 		else if (cmd == F_FLOCK64)
678 			cmd = F_FLOCK;
679 		else if (cmd == F_FLOCKW64)
680 			cmd = F_FLOCKW;
681 
682 		/*
683 		 * Note that the size of flock64 is different in the ILP32
684 		 * and LP64 models, due to the sucking l_pad field.
685 		 * We do not want to assume that the flock64 structure is
686 		 * laid out in the same in ILP32 and LP64 environments, so
687 		 * we will copy in the ILP32 version of flock64 explicitly
688 		 * and copy it to the native flock64 structure.
689 		 */
690 
691 		if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
692 			error = EFAULT;
693 			break;
694 		}
695 
696 		bf.l_type = (short)bf64_32.l_type;
697 		bf.l_whence = (short)bf64_32.l_whence;
698 		bf.l_start = bf64_32.l_start;
699 		bf.l_len = bf64_32.l_len;
700 		bf.l_sysid = (int)bf64_32.l_sysid;
701 		bf.l_pid = (pid_t)bf64_32.l_pid;
702 
703 		if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
704 			break;
705 
706 		if (cmd == F_FLOCK || cmd == F_FLOCKW) {
707 			/* FLOCK* locking is always over the entire file. */
708 			if (bf.l_whence != 0 || bf.l_start != 0 ||
709 			    bf.l_len != 0) {
710 				error = EINVAL;
711 				break;
712 			}
713 			if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
714 				error = EINVAL;
715 				break;
716 			}
717 		}
718 
719 		if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
720 			/*
721 			 * TBD OFD-style locking is currently limited to
722 			 * covering the entire file.
723 			 */
724 			if (bf.l_whence != 0 || bf.l_start != 0 ||
725 			    bf.l_len != 0) {
726 				error = EINVAL;
727 				break;
728 			}
729 		}
730 
731 		/*
732 		 * The *_frlock functions in the various file systems basically
733 		 * do some validation and then funnel everything through the
734 		 * fs_frlock function. For OFD-style locks fs_frlock will do
735 		 * nothing so that once control returns here we can call the
736 		 * ofdlock function with the correct fp. For OFD-style locks
737 		 * the unsupported remote file systems, such as NFS, detect and
738 		 * reject the OFD-style cmd argument.
739 		 */
740 		if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
741 		    NULL, fp->f_cred, NULL)) != 0)
742 			break;
743 
744 		if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
745 		    cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
746 			/*
747 			 * This is an OFD-style lock so we need to handle it
748 			 * here. Because OFD-style locks are associated with
749 			 * the file_t we didn't have enough info down the
750 			 * VOP_FRLOCK path immediately above.
751 			 */
752 			if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
753 				break;
754 		}
755 
756 		if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
757 		    bf.l_type == F_UNLCK) {
758 			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
759 			    sizeof (bf.l_type)))
760 				error = EFAULT;
761 			break;
762 		}
763 
764 		if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
765 			int i;
766 
767 			/*
768 			 * We do not want to assume that the flock64 structure
769 			 * is laid out in the same in ILP32 and LP64
770 			 * environments, so we will copy out the ILP32 version
771 			 * of flock64 explicitly after copying the native
772 			 * flock64 structure to it.
773 			 */
774 			for (i = 0; i < 4; i++)
775 				bf64_32.l_pad[i] = 0;
776 			bf64_32.l_type = (int16_t)bf.l_type;
777 			bf64_32.l_whence = (int16_t)bf.l_whence;
778 			bf64_32.l_start = bf.l_start;
779 			bf64_32.l_len = bf.l_len;
780 			bf64_32.l_sysid = (int32_t)bf.l_sysid;
781 			bf64_32.l_pid = (pid32_t)bf.l_pid;
782 			if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
783 				error = EFAULT;
784 		}
785 		break;
786 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
787 
788 	case F_SHARE:
789 	case F_SHARE_NBMAND:
790 	case F_UNSHARE:
791 
792 		/*
793 		 * Copy in input fields only.
794 		 */
795 		if (copyin((void *)arg, &fsh, sizeof (fsh))) {
796 			error = EFAULT;
797 			break;
798 		}
799 
800 		/*
801 		 * Local share reservations always have this simple form
802 		 */
803 		shr.s_access = fsh.f_access;
804 		shr.s_deny = fsh.f_deny;
805 		shr.s_sysid = 0;
806 		shr.s_pid = ttoproc(curthread)->p_pid;
807 		shr_own.sl_pid = shr.s_pid;
808 		shr_own.sl_id = fsh.f_id;
809 		shr.s_own_len = sizeof (shr_own);
810 		shr.s_owner = (caddr_t)&shr_own;
811 		error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
812 		break;
813 
814 	default:
815 		error = EINVAL;
816 		break;
817 	}
818 
819 	if (in_crit)
820 		nbl_end_crit(vp);
821 
822 done:
823 	releasef(fdes);
824 out:
825 	if (error)
826 		return (set_errno(error));
827 	return (retval);
828 }
829 
830 int
831 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
832 {
833 	struct vattr	vattr;
834 	int	error;
835 	u_offset_t start, end;
836 
837 	/*
838 	 * Determine the starting point of the request
839 	 */
840 	switch (flp->l_whence) {
841 	case 0:		/* SEEK_SET */
842 		start = (u_offset_t)flp->l_start;
843 		if (start > max)
844 			return (EINVAL);
845 		break;
846 	case 1:		/* SEEK_CUR */
847 		if (flp->l_start > (max - offset))
848 			return (EOVERFLOW);
849 		start = (u_offset_t)(flp->l_start + offset);
850 		if (start > max)
851 			return (EINVAL);
852 		break;
853 	case 2:		/* SEEK_END */
854 		vattr.va_mask = AT_SIZE;
855 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
856 			return (error);
857 		if (flp->l_start > (max - (offset_t)vattr.va_size))
858 			return (EOVERFLOW);
859 		start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
860 		if (start > max)
861 			return (EINVAL);
862 		break;
863 	default:
864 		return (EINVAL);
865 	}
866 
867 	/*
868 	 * Determine the range covered by the request.
869 	 */
870 	if (flp->l_len == 0)
871 		end = MAXEND;
872 	else if ((offset_t)flp->l_len > 0) {
873 		if (flp->l_len > (max - start + 1))
874 			return (EOVERFLOW);
875 		end = (u_offset_t)(start + (flp->l_len - 1));
876 		ASSERT(end <= max);
877 	} else {
878 		/*
879 		 * Negative length; why do we even allow this ?
880 		 * Because this allows easy specification of
881 		 * the last n bytes of the file.
882 		 */
883 		end = start;
884 		start += (u_offset_t)flp->l_len;
885 		(start)++;
886 		if (start > max)
887 			return (EINVAL);
888 		ASSERT(end <= max);
889 	}
890 	ASSERT(start <= max);
891 	if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
892 	    end == (offset_t)max) {
893 		flp->l_len = 0;
894 	}
895 	if (start  > end)
896 		return (EINVAL);
897 	return (0);
898 }
899 
900 static int
901 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
902 {
903 	struct vattr	vattr;
904 	int	error;
905 
906 	/*
907 	 * Determine the starting point of the request. Assume that it is
908 	 * a valid starting point.
909 	 */
910 	switch (flp->l_whence) {
911 	case 0:		/* SEEK_SET */
912 		*start = (u_offset_t)flp->l_start;
913 		break;
914 	case 1:		/* SEEK_CUR */
915 		*start = (u_offset_t)(flp->l_start + offset);
916 		break;
917 	case 2:		/* SEEK_END */
918 		vattr.va_mask = AT_SIZE;
919 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
920 			return (error);
921 		*start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
922 		break;
923 	default:
924 		return (EINVAL);
925 	}
926 
927 	return (0);
928 }
929 
930 /*
931  * Take rctl action when the requested file descriptor is too big.
932  */
933 static void
934 fd_too_big(proc_t *p)
935 {
936 	mutex_enter(&p->p_lock);
937 	(void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
938 	    p->p_rctls, p, RCA_SAFE);
939 	mutex_exit(&p->p_lock);
940 }
941