xref: /titanic_52/usr/src/uts/common/syscall/fcntl.c (revision 9d12795f87b63c2e39e87bff369182edd34677d3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * Portions of this source code were derived from Berkeley 4.3 BSD
32  * under license from the Regents of the University of California.
33  */
34 
35 
36 #include <sys/param.h>
37 #include <sys/isa_defs.h>
38 #include <sys/types.h>
39 #include <sys/sysmacros.h>
40 #include <sys/systm.h>
41 #include <sys/errno.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/vnode.h>
45 #include <sys/file.h>
46 #include <sys/mode.h>
47 #include <sys/proc.h>
48 #include <sys/filio.h>
49 #include <sys/share.h>
50 #include <sys/debug.h>
51 #include <sys/rctl.h>
52 #include <sys/nbmlock.h>
53 
54 #include <sys/cmn_err.h>
55 
56 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
57 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
58 static void fd_too_big(proc_t *);
59 
60 /*
61  * File control.
62  */
63 int
64 fcntl(int fdes, int cmd, intptr_t arg)
65 {
66 	int iarg;
67 	int error = 0;
68 	int retval;
69 	proc_t *p;
70 	file_t *fp;
71 	vnode_t *vp;
72 	u_offset_t offset;
73 	u_offset_t start;
74 	struct vattr vattr;
75 	int in_crit;
76 	int flag;
77 	struct flock sbf;
78 	struct flock64 bf;
79 	struct o_flock obf;
80 	struct flock64_32 bf64_32;
81 	struct fshare fsh;
82 	struct shrlock shr;
83 	struct shr_locowner shr_own;
84 	offset_t maxoffset;
85 	model_t datamodel;
86 	int fdres;
87 
88 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
89 	ASSERT(sizeof (struct flock) == sizeof (struct flock32));
90 	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
91 #endif
92 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
93 	ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
94 	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
95 #endif
96 
97 	/*
98 	 * First, for speed, deal with the subset of cases
99 	 * that do not require getf() / releasef().
100 	 */
101 	switch (cmd) {
102 	case F_GETFD:
103 		if ((error = f_getfd_error(fdes, &flag)) == 0)
104 			retval = flag;
105 		goto out;
106 
107 	case F_SETFD:
108 		error = f_setfd_error(fdes, (int)arg);
109 		retval = 0;
110 		goto out;
111 
112 	case F_GETFL:
113 		if ((error = f_getfl(fdes, &flag)) == 0) {
114 			retval = (flag & (FMASK | FASYNC));
115 			if ((flag & (FSEARCH | FEXEC)) == 0)
116 				retval += FOPEN;
117 			else
118 				retval |= (flag & (FSEARCH | FEXEC));
119 		}
120 		goto out;
121 
122 	case F_GETXFL:
123 		if ((error = f_getfl(fdes, &flag)) == 0) {
124 			retval = flag;
125 			if ((flag & (FSEARCH | FEXEC)) == 0)
126 				retval += FOPEN;
127 		}
128 		goto out;
129 
130 	case F_BADFD:
131 		if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
132 			retval = fdres;
133 		goto out;
134 	}
135 
136 	/*
137 	 * Second, for speed, deal with the subset of cases that
138 	 * require getf() / releasef() but do not require copyin.
139 	 */
140 	if ((fp = getf(fdes)) == NULL) {
141 		error = EBADF;
142 		goto out;
143 	}
144 	iarg = (int)arg;
145 
146 	switch (cmd) {
147 	case F_DUPFD:
148 	case F_DUPFD_CLOEXEC:
149 		p = curproc;
150 		if ((uint_t)iarg >= p->p_fno_ctl) {
151 			if (iarg >= 0)
152 				fd_too_big(p);
153 			error = EINVAL;
154 			goto done;
155 		}
156 		/*
157 		 * We need to increment the f_count reference counter
158 		 * before allocating a new file descriptor.
159 		 * Doing it other way round opens a window for race condition
160 		 * with closeandsetf() on the target file descriptor which can
161 		 * close the file still referenced by the original
162 		 * file descriptor.
163 		 */
164 		mutex_enter(&fp->f_tlock);
165 		fp->f_count++;
166 		mutex_exit(&fp->f_tlock);
167 		if ((retval = ufalloc_file(iarg, fp)) == -1) {
168 			/*
169 			 * New file descriptor can't be allocated.
170 			 * Revert the reference count.
171 			 */
172 			mutex_enter(&fp->f_tlock);
173 			fp->f_count--;
174 			mutex_exit(&fp->f_tlock);
175 			error = EMFILE;
176 		} else {
177 			if (cmd == F_DUPFD_CLOEXEC) {
178 				f_setfd(retval, FD_CLOEXEC);
179 			}
180 		}
181 		goto done;
182 
183 	case F_DUP2FD_CLOEXEC:
184 		if (fdes == iarg) {
185 			error = EINVAL;
186 			goto done;
187 		}
188 
189 		/*FALLTHROUGH*/
190 
191 	case F_DUP2FD:
192 		p = curproc;
193 		if (fdes == iarg) {
194 			retval = iarg;
195 		} else if ((uint_t)iarg >= p->p_fno_ctl) {
196 			if (iarg >= 0)
197 				fd_too_big(p);
198 			error = EBADF;
199 		} else {
200 			/*
201 			 * We can't hold our getf(fdes) across the call to
202 			 * closeandsetf() because it creates a window for
203 			 * deadlock: if one thread is doing dup2(a, b) while
204 			 * another is doing dup2(b, a), each one will block
205 			 * waiting for the other to call releasef().  The
206 			 * solution is to increment the file reference count
207 			 * (which we have to do anyway), then releasef(fdes),
208 			 * then closeandsetf().  Incrementing f_count ensures
209 			 * that fp won't disappear after we call releasef().
210 			 * When closeandsetf() fails, we try avoid calling
211 			 * closef() because of all the side effects.
212 			 */
213 			mutex_enter(&fp->f_tlock);
214 			fp->f_count++;
215 			mutex_exit(&fp->f_tlock);
216 			releasef(fdes);
217 			if ((error = closeandsetf(iarg, fp)) == 0) {
218 				if (cmd == F_DUP2FD_CLOEXEC) {
219 					f_setfd(iarg, FD_CLOEXEC);
220 				}
221 				retval = iarg;
222 			} else {
223 				mutex_enter(&fp->f_tlock);
224 				if (fp->f_count > 1) {
225 					fp->f_count--;
226 					mutex_exit(&fp->f_tlock);
227 				} else {
228 					mutex_exit(&fp->f_tlock);
229 					(void) closef(fp);
230 				}
231 			}
232 			goto out;
233 		}
234 		goto done;
235 
236 	case F_SETFL:
237 		vp = fp->f_vnode;
238 		flag = fp->f_flag;
239 		if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
240 			iarg &= ~FNDELAY;
241 		if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
242 		    0) {
243 			iarg &= FMASK;
244 			mutex_enter(&fp->f_tlock);
245 			fp->f_flag &= ~FMASK | (FREAD|FWRITE);
246 			fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
247 			mutex_exit(&fp->f_tlock);
248 		}
249 		retval = 0;
250 		goto done;
251 	}
252 
253 	/*
254 	 * Finally, deal with the expensive cases.
255 	 */
256 	retval = 0;
257 	in_crit = 0;
258 	maxoffset = MAXOFF_T;
259 	datamodel = DATAMODEL_NATIVE;
260 #if defined(_SYSCALL32_IMPL)
261 	if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
262 		maxoffset = MAXOFF32_T;
263 #endif
264 
265 	vp = fp->f_vnode;
266 	flag = fp->f_flag;
267 	offset = fp->f_offset;
268 
269 	switch (cmd) {
270 	/*
271 	 * The file system and vnode layers understand and implement
272 	 * locking with flock64 structures. So here once we pass through
273 	 * the test for compatibility as defined by LFS API, (for F_SETLK,
274 	 * F_SETLKW, F_GETLK, F_GETLKW, F_FREESP) we transform
275 	 * the flock structure to a flock64 structure and send it to the
276 	 * lower layers. Similarly in case of GETLK the returned flock64
277 	 * structure is transformed to a flock structure if everything fits
278 	 * in nicely, otherwise we return EOVERFLOW.
279 	 */
280 
281 	case F_GETLK:
282 	case F_O_GETLK:
283 	case F_SETLK:
284 	case F_SETLKW:
285 	case F_SETLK_NBMAND:
286 
287 		/*
288 		 * Copy in input fields only.
289 		 */
290 
291 		if (cmd == F_O_GETLK) {
292 			if (datamodel != DATAMODEL_ILP32) {
293 				error = EINVAL;
294 				break;
295 			}
296 
297 			if (copyin((void *)arg, &obf, sizeof (obf))) {
298 				error = EFAULT;
299 				break;
300 			}
301 			bf.l_type = obf.l_type;
302 			bf.l_whence = obf.l_whence;
303 			bf.l_start = (off64_t)obf.l_start;
304 			bf.l_len = (off64_t)obf.l_len;
305 			bf.l_sysid = (int)obf.l_sysid;
306 			bf.l_pid = obf.l_pid;
307 		} else if (datamodel == DATAMODEL_NATIVE) {
308 			if (copyin((void *)arg, &sbf, sizeof (sbf))) {
309 				error = EFAULT;
310 				break;
311 			}
312 			/*
313 			 * XXX	In an LP64 kernel with an LP64 application
314 			 *	there's no need to do a structure copy here
315 			 *	struct flock == struct flock64. However,
316 			 *	we did it this way to avoid more conditional
317 			 *	compilation.
318 			 */
319 			bf.l_type = sbf.l_type;
320 			bf.l_whence = sbf.l_whence;
321 			bf.l_start = (off64_t)sbf.l_start;
322 			bf.l_len = (off64_t)sbf.l_len;
323 			bf.l_sysid = sbf.l_sysid;
324 			bf.l_pid = sbf.l_pid;
325 		}
326 #if defined(_SYSCALL32_IMPL)
327 		else {
328 			struct flock32 sbf32;
329 			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
330 				error = EFAULT;
331 				break;
332 			}
333 			bf.l_type = sbf32.l_type;
334 			bf.l_whence = sbf32.l_whence;
335 			bf.l_start = (off64_t)sbf32.l_start;
336 			bf.l_len = (off64_t)sbf32.l_len;
337 			bf.l_sysid = sbf32.l_sysid;
338 			bf.l_pid = sbf32.l_pid;
339 		}
340 #endif /* _SYSCALL32_IMPL */
341 
342 		/*
343 		 * 64-bit support: check for overflow for 32-bit lock ops
344 		 */
345 		if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
346 			break;
347 
348 		/*
349 		 * Not all of the filesystems understand F_O_GETLK, and
350 		 * there's no need for them to know.  Map it to F_GETLK.
351 		 */
352 		if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
353 		    &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
354 			break;
355 
356 		/*
357 		 * If command is GETLK and no lock is found, only
358 		 * the type field is changed.
359 		 */
360 		if ((cmd == F_O_GETLK || cmd == F_GETLK) &&
361 		    bf.l_type == F_UNLCK) {
362 			/* l_type always first entry, always a short */
363 			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
364 			    sizeof (bf.l_type)))
365 				error = EFAULT;
366 			break;
367 		}
368 
369 		if (cmd == F_O_GETLK) {
370 			/*
371 			 * Return an SVR3 flock structure to the user.
372 			 */
373 			obf.l_type = (int16_t)bf.l_type;
374 			obf.l_whence = (int16_t)bf.l_whence;
375 			obf.l_start = (int32_t)bf.l_start;
376 			obf.l_len = (int32_t)bf.l_len;
377 			if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
378 				/*
379 				 * One or both values for the above fields
380 				 * is too large to store in an SVR3 flock
381 				 * structure.
382 				 */
383 				error = EOVERFLOW;
384 				break;
385 			}
386 			obf.l_sysid = (int16_t)bf.l_sysid;
387 			obf.l_pid = (int16_t)bf.l_pid;
388 			if (copyout(&obf, (void *)arg, sizeof (obf)))
389 				error = EFAULT;
390 		} else if (cmd == F_GETLK) {
391 			/*
392 			 * Copy out SVR4 flock.
393 			 */
394 			int i;
395 
396 			if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
397 				error = EOVERFLOW;
398 				break;
399 			}
400 
401 			if (datamodel == DATAMODEL_NATIVE) {
402 				for (i = 0; i < 4; i++)
403 					sbf.l_pad[i] = 0;
404 				/*
405 				 * XXX	In an LP64 kernel with an LP64
406 				 *	application there's no need to do a
407 				 *	structure copy here as currently
408 				 *	struct flock == struct flock64.
409 				 *	We did it this way to avoid more
410 				 *	conditional compilation.
411 				 */
412 				sbf.l_type = bf.l_type;
413 				sbf.l_whence = bf.l_whence;
414 				sbf.l_start = (off_t)bf.l_start;
415 				sbf.l_len = (off_t)bf.l_len;
416 				sbf.l_sysid = bf.l_sysid;
417 				sbf.l_pid = bf.l_pid;
418 				if (copyout(&sbf, (void *)arg, sizeof (sbf)))
419 					error = EFAULT;
420 			}
421 #if defined(_SYSCALL32_IMPL)
422 			else {
423 				struct flock32 sbf32;
424 				if (bf.l_start > MAXOFF32_T ||
425 				    bf.l_len > MAXOFF32_T) {
426 					error = EOVERFLOW;
427 					break;
428 				}
429 				for (i = 0; i < 4; i++)
430 					sbf32.l_pad[i] = 0;
431 				sbf32.l_type = (int16_t)bf.l_type;
432 				sbf32.l_whence = (int16_t)bf.l_whence;
433 				sbf32.l_start = (off32_t)bf.l_start;
434 				sbf32.l_len = (off32_t)bf.l_len;
435 				sbf32.l_sysid = (int32_t)bf.l_sysid;
436 				sbf32.l_pid = (pid32_t)bf.l_pid;
437 				if (copyout(&sbf32,
438 				    (void *)arg, sizeof (sbf32)))
439 					error = EFAULT;
440 			}
441 #endif
442 		}
443 		break;
444 
445 	case F_CHKFL:
446 		/*
447 		 * This is for internal use only, to allow the vnode layer
448 		 * to validate a flags setting before applying it.  User
449 		 * programs can't issue it.
450 		 */
451 		error = EINVAL;
452 		break;
453 
454 	case F_ALLOCSP:
455 	case F_FREESP:
456 	case F_ALLOCSP64:
457 	case F_FREESP64:
458 		/*
459 		 * Test for not-a-regular-file (and returning EINVAL)
460 		 * before testing for open-for-writing (and returning EBADF).
461 		 * This is relied upon by posix_fallocate() in libc.
462 		 */
463 		if (vp->v_type != VREG) {
464 			error = EINVAL;
465 			break;
466 		}
467 
468 		if ((flag & FWRITE) == 0) {
469 			error = EBADF;
470 			break;
471 		}
472 
473 		if (datamodel != DATAMODEL_ILP32 &&
474 		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
475 			error = EINVAL;
476 			break;
477 		}
478 
479 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
480 		if (datamodel == DATAMODEL_ILP32 &&
481 		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
482 			struct flock32 sbf32;
483 			/*
484 			 * For compatibility we overlay an SVR3 flock on an SVR4
485 			 * flock.  This works because the input field offsets
486 			 * in "struct flock" were preserved.
487 			 */
488 			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
489 				error = EFAULT;
490 				break;
491 			} else {
492 				bf.l_type = sbf32.l_type;
493 				bf.l_whence = sbf32.l_whence;
494 				bf.l_start = (off64_t)sbf32.l_start;
495 				bf.l_len = (off64_t)sbf32.l_len;
496 				bf.l_sysid = sbf32.l_sysid;
497 				bf.l_pid = sbf32.l_pid;
498 			}
499 		}
500 #endif /* _ILP32 || _SYSCALL32_IMPL */
501 
502 #if defined(_LP64)
503 		if (datamodel == DATAMODEL_LP64 &&
504 		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
505 			if (copyin((void *)arg, &bf, sizeof (bf))) {
506 				error = EFAULT;
507 				break;
508 			}
509 		}
510 #endif /* defined(_LP64) */
511 
512 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
513 		if (datamodel == DATAMODEL_ILP32 &&
514 		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
515 			if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
516 				error = EFAULT;
517 				break;
518 			} else {
519 				/*
520 				 * Note that the size of flock64 is different in
521 				 * the ILP32 and LP64 models, due to the l_pad
522 				 * field. We do not want to assume that the
523 				 * flock64 structure is laid out the same in
524 				 * ILP32 and LP64 environments, so we will
525 				 * copy in the ILP32 version of flock64
526 				 * explicitly and copy it to the native
527 				 * flock64 structure.
528 				 */
529 				bf.l_type = (short)bf64_32.l_type;
530 				bf.l_whence = (short)bf64_32.l_whence;
531 				bf.l_start = bf64_32.l_start;
532 				bf.l_len = bf64_32.l_len;
533 				bf.l_sysid = (int)bf64_32.l_sysid;
534 				bf.l_pid = (pid_t)bf64_32.l_pid;
535 			}
536 		}
537 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
538 
539 		if (cmd == F_ALLOCSP || cmd == F_FREESP)
540 			error = flock_check(vp, &bf, offset, maxoffset);
541 		else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
542 			error = flock_check(vp, &bf, offset, MAXOFFSET_T);
543 		if (error)
544 			break;
545 
546 		if (vp->v_type == VREG && bf.l_len == 0 &&
547 		    bf.l_start > OFFSET_MAX(fp)) {
548 			error = EFBIG;
549 			break;
550 		}
551 
552 		/*
553 		 * Make sure that there are no conflicting non-blocking
554 		 * mandatory locks in the region being manipulated. If
555 		 * there are such locks then return EACCES.
556 		 */
557 		if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
558 			break;
559 
560 		if (nbl_need_check(vp)) {
561 			u_offset_t	begin;
562 			ssize_t		length;
563 
564 			nbl_start_crit(vp, RW_READER);
565 			in_crit = 1;
566 			vattr.va_mask = AT_SIZE;
567 			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
568 			    != 0)
569 				break;
570 			begin = start > vattr.va_size ? vattr.va_size : start;
571 			length = vattr.va_size > start ? vattr.va_size - start :
572 			    start - vattr.va_size;
573 			if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
574 			    NULL)) {
575 				error = EACCES;
576 				break;
577 			}
578 		}
579 
580 		if (cmd == F_ALLOCSP64)
581 			cmd = F_ALLOCSP;
582 		else if (cmd == F_FREESP64)
583 			cmd = F_FREESP;
584 
585 		error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
586 
587 		break;
588 
589 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
590 	case F_GETLK64:
591 	case F_SETLK64:
592 	case F_SETLKW64:
593 	case F_SETLK64_NBMAND:
594 		/*
595 		 * Large Files: Here we set cmd as *LK and send it to
596 		 * lower layers. *LK64 is only for the user land.
597 		 * Most of the comments described above for F_SETLK
598 		 * applies here too.
599 		 * Large File support is only needed for ILP32 apps!
600 		 */
601 		if (datamodel != DATAMODEL_ILP32) {
602 			error = EINVAL;
603 			break;
604 		}
605 
606 		if (cmd == F_GETLK64)
607 			cmd = F_GETLK;
608 		else if (cmd == F_SETLK64)
609 			cmd = F_SETLK;
610 		else if (cmd == F_SETLKW64)
611 			cmd = F_SETLKW;
612 		else if (cmd == F_SETLK64_NBMAND)
613 			cmd = F_SETLK_NBMAND;
614 
615 		/*
616 		 * Note that the size of flock64 is different in the ILP32
617 		 * and LP64 models, due to the sucking l_pad field.
618 		 * We do not want to assume that the flock64 structure is
619 		 * laid out in the same in ILP32 and LP64 environments, so
620 		 * we will copy in the ILP32 version of flock64 explicitly
621 		 * and copy it to the native flock64 structure.
622 		 */
623 
624 		if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
625 			error = EFAULT;
626 			break;
627 		}
628 
629 		bf.l_type = (short)bf64_32.l_type;
630 		bf.l_whence = (short)bf64_32.l_whence;
631 		bf.l_start = bf64_32.l_start;
632 		bf.l_len = bf64_32.l_len;
633 		bf.l_sysid = (int)bf64_32.l_sysid;
634 		bf.l_pid = (pid_t)bf64_32.l_pid;
635 
636 		if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
637 			break;
638 
639 		if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
640 		    NULL, fp->f_cred, NULL)) != 0)
641 			break;
642 
643 		if ((cmd == F_GETLK) && bf.l_type == F_UNLCK) {
644 			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
645 			    sizeof (bf.l_type)))
646 				error = EFAULT;
647 			break;
648 		}
649 
650 		if (cmd == F_GETLK) {
651 			int i;
652 
653 			/*
654 			 * We do not want to assume that the flock64 structure
655 			 * is laid out in the same in ILP32 and LP64
656 			 * environments, so we will copy out the ILP32 version
657 			 * of flock64 explicitly after copying the native
658 			 * flock64 structure to it.
659 			 */
660 			for (i = 0; i < 4; i++)
661 				bf64_32.l_pad[i] = 0;
662 			bf64_32.l_type = (int16_t)bf.l_type;
663 			bf64_32.l_whence = (int16_t)bf.l_whence;
664 			bf64_32.l_start = bf.l_start;
665 			bf64_32.l_len = bf.l_len;
666 			bf64_32.l_sysid = (int32_t)bf.l_sysid;
667 			bf64_32.l_pid = (pid32_t)bf.l_pid;
668 			if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
669 				error = EFAULT;
670 		}
671 		break;
672 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
673 
674 	case F_SHARE:
675 	case F_SHARE_NBMAND:
676 	case F_UNSHARE:
677 
678 		/*
679 		 * Copy in input fields only.
680 		 */
681 		if (copyin((void *)arg, &fsh, sizeof (fsh))) {
682 			error = EFAULT;
683 			break;
684 		}
685 
686 		/*
687 		 * Local share reservations always have this simple form
688 		 */
689 		shr.s_access = fsh.f_access;
690 		shr.s_deny = fsh.f_deny;
691 		shr.s_sysid = 0;
692 		shr.s_pid = ttoproc(curthread)->p_pid;
693 		shr_own.sl_pid = shr.s_pid;
694 		shr_own.sl_id = fsh.f_id;
695 		shr.s_own_len = sizeof (shr_own);
696 		shr.s_owner = (caddr_t)&shr_own;
697 		error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
698 		break;
699 
700 	default:
701 		error = EINVAL;
702 		break;
703 	}
704 
705 	if (in_crit)
706 		nbl_end_crit(vp);
707 
708 done:
709 	releasef(fdes);
710 out:
711 	if (error)
712 		return (set_errno(error));
713 	return (retval);
714 }
715 
716 int
717 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
718 {
719 	struct vattr	vattr;
720 	int	error;
721 	u_offset_t start, end;
722 
723 	/*
724 	 * Determine the starting point of the request
725 	 */
726 	switch (flp->l_whence) {
727 	case 0:		/* SEEK_SET */
728 		start = (u_offset_t)flp->l_start;
729 		if (start > max)
730 			return (EINVAL);
731 		break;
732 	case 1:		/* SEEK_CUR */
733 		if (flp->l_start > (max - offset))
734 			return (EOVERFLOW);
735 		start = (u_offset_t)(flp->l_start + offset);
736 		if (start > max)
737 			return (EINVAL);
738 		break;
739 	case 2:		/* SEEK_END */
740 		vattr.va_mask = AT_SIZE;
741 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
742 			return (error);
743 		if (flp->l_start > (max - (offset_t)vattr.va_size))
744 			return (EOVERFLOW);
745 		start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
746 		if (start > max)
747 			return (EINVAL);
748 		break;
749 	default:
750 		return (EINVAL);
751 	}
752 
753 	/*
754 	 * Determine the range covered by the request.
755 	 */
756 	if (flp->l_len == 0)
757 		end = MAXEND;
758 	else if ((offset_t)flp->l_len > 0) {
759 		if (flp->l_len > (max - start + 1))
760 			return (EOVERFLOW);
761 		end = (u_offset_t)(start + (flp->l_len - 1));
762 		ASSERT(end <= max);
763 	} else {
764 		/*
765 		 * Negative length; why do we even allow this ?
766 		 * Because this allows easy specification of
767 		 * the last n bytes of the file.
768 		 */
769 		end = start;
770 		start += (u_offset_t)flp->l_len;
771 		(start)++;
772 		if (start > max)
773 			return (EINVAL);
774 		ASSERT(end <= max);
775 	}
776 	ASSERT(start <= max);
777 	if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
778 	    end == (offset_t)max) {
779 		flp->l_len = 0;
780 	}
781 	if (start  > end)
782 		return (EINVAL);
783 	return (0);
784 }
785 
786 static int
787 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
788 {
789 	struct vattr	vattr;
790 	int	error;
791 
792 	/*
793 	 * Determine the starting point of the request. Assume that it is
794 	 * a valid starting point.
795 	 */
796 	switch (flp->l_whence) {
797 	case 0:		/* SEEK_SET */
798 		*start = (u_offset_t)flp->l_start;
799 		break;
800 	case 1:		/* SEEK_CUR */
801 		*start = (u_offset_t)(flp->l_start + offset);
802 		break;
803 	case 2:		/* SEEK_END */
804 		vattr.va_mask = AT_SIZE;
805 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
806 			return (error);
807 		*start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
808 		break;
809 	default:
810 		return (EINVAL);
811 	}
812 
813 	return (0);
814 }
815 
816 /*
817  * Take rctl action when the requested file descriptor is too big.
818  */
819 static void
820 fd_too_big(proc_t *p)
821 {
822 	mutex_enter(&p->p_lock);
823 	(void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
824 	    p->p_rctls, p, RCA_SAFE);
825 	mutex_exit(&p->p_lock);
826 }
827