xref: /illumos-gate/usr/src/uts/common/syscall/fcntl.c (revision 8119dad84d6416f13557b0ba8e2aaf9064cbcfd3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25  * Copyright 2018, Joyent, Inc.
26  * Copyright 2024 Oxide Computer Company
27  */
28 
29 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
30 /*	  All Rights Reserved	*/
31 
32 /*
33  * Portions of this source code were derived from Berkeley 4.3 BSD
34  * under license from the Regents of the University of California.
35  */
36 
37 
38 #include <sys/param.h>
39 #include <sys/isa_defs.h>
40 #include <sys/types.h>
41 #include <sys/sysmacros.h>
42 #include <sys/systm.h>
43 #include <sys/errno.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/vnode.h>
47 #include <sys/file.h>
48 #include <sys/mode.h>
49 #include <sys/proc.h>
50 #include <sys/filio.h>
51 #include <sys/share.h>
52 #include <sys/debug.h>
53 #include <sys/rctl.h>
54 #include <sys/nbmlock.h>
55 
56 #include <sys/cmn_err.h>
57 
58 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
59 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
60 static void fd_too_big(proc_t *);
61 
62 /*
63  * File control.
64  */
65 int
66 fcntl(int fdes, int cmd, intptr_t arg, intptr_t arg1)
67 {
68 	int iarg;
69 	int error = 0;
70 	int retval;
71 	proc_t *p;
72 	file_t *fp;
73 	vnode_t *vp;
74 	u_offset_t offset;
75 	u_offset_t start;
76 	struct vattr vattr;
77 	int in_crit;
78 	int flag;
79 	struct flock sbf;
80 	struct flock64 bf;
81 	struct o_flock obf;
82 	struct flock64_32 bf64_32;
83 	struct fshare fsh;
84 	struct shrlock shr;
85 	struct shr_locowner shr_own;
86 	offset_t maxoffset;
87 	model_t datamodel;
88 	int fdres;
89 
90 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
91 	ASSERT(sizeof (struct flock) == sizeof (struct flock32));
92 	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
93 #endif
94 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
95 	ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
96 	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
97 #endif
98 
99 	/*
100 	 * Most fcntl() calls take either 2 or 3 arguments. The introduction of
101 	 * F_DUP3FD added a version that takes a 4th argument (referred to as
102 	 * arg1). While fcntl() traditionally has had loose validation, we
103 	 * strictly validate this new arg.
104 	 */
105 	switch (cmd) {
106 	case F_DUP3FD:
107 		if ((arg1 & ~(FD_CLOEXEC | FD_CLOFORK)) != 0) {
108 			error = EINVAL;
109 			goto out;
110 		}
111 		break;
112 	default:
113 		if (arg1 != 0) {
114 			error = EINVAL;
115 			goto out;
116 		}
117 		break;
118 	}
119 
120 	/*
121 	 * First, for speed, deal with the subset of cases
122 	 * that do not require getf() / releasef().
123 	 */
124 	switch (cmd) {
125 	case F_GETFD:
126 		if ((error = f_getfd_error(fdes, &flag)) == 0)
127 			retval = flag;
128 		goto out;
129 
130 	case F_SETFD:
131 		error = f_setfd_error(fdes, (int)arg);
132 		retval = 0;
133 		goto out;
134 
135 	case F_GETFL:
136 		if ((error = f_getfl(fdes, &flag)) == 0) {
137 			retval = (flag & (FMASK | FASYNC));
138 			if ((flag & (FSEARCH | FEXEC)) == 0)
139 				retval += FOPEN;
140 			else
141 				retval |= (flag & (FSEARCH | FEXEC));
142 		}
143 		goto out;
144 
145 	case F_GETXFL:
146 		if ((error = f_getfl(fdes, &flag)) == 0) {
147 			retval = flag;
148 			if ((flag & (FSEARCH | FEXEC)) == 0)
149 				retval += FOPEN;
150 		}
151 		goto out;
152 
153 	case F_BADFD:
154 		if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
155 			retval = fdres;
156 		goto out;
157 	}
158 
159 	/*
160 	 * Second, for speed, deal with the subset of cases that
161 	 * require getf() / releasef() but do not require copyin.
162 	 */
163 	if ((fp = getf(fdes)) == NULL) {
164 		error = EBADF;
165 		goto out;
166 	}
167 	iarg = (int)arg;
168 
169 	switch (cmd) {
170 	case F_DUPFD:
171 	case F_DUPFD_CLOEXEC:
172 	case F_DUPFD_CLOFORK:
173 		p = curproc;
174 		if ((uint_t)iarg >= p->p_fno_ctl) {
175 			if (iarg >= 0)
176 				fd_too_big(p);
177 			error = EINVAL;
178 			goto done;
179 		}
180 		/*
181 		 * We need to increment the f_count reference counter
182 		 * before allocating a new file descriptor.
183 		 * Doing it other way round opens a window for race condition
184 		 * with closeandsetf() on the target file descriptor which can
185 		 * close the file still referenced by the original
186 		 * file descriptor.
187 		 */
188 		mutex_enter(&fp->f_tlock);
189 		fp->f_count++;
190 		mutex_exit(&fp->f_tlock);
191 		if ((retval = ufalloc_file(iarg, fp)) == -1) {
192 			/*
193 			 * New file descriptor can't be allocated.
194 			 * Revert the reference count.
195 			 */
196 			mutex_enter(&fp->f_tlock);
197 			fp->f_count--;
198 			mutex_exit(&fp->f_tlock);
199 			error = EMFILE;
200 		} else {
201 			if (cmd == F_DUPFD_CLOEXEC) {
202 				f_setfd_or(retval, FD_CLOEXEC);
203 			}
204 
205 			if (cmd == F_DUPFD_CLOFORK) {
206 				f_setfd_or(retval, FD_CLOFORK);
207 			}
208 		}
209 		goto done;
210 
211 	case F_DUP2FD_CLOEXEC:
212 	case F_DUP2FD_CLOFORK:
213 		if (fdes == iarg) {
214 			error = EINVAL;
215 			goto done;
216 		}
217 
218 		/*FALLTHROUGH*/
219 
220 	case F_DUP2FD:
221 	case F_DUP3FD:
222 		p = curproc;
223 		if (fdes == iarg) {
224 			retval = iarg;
225 		} else if ((uint_t)iarg >= p->p_fno_ctl) {
226 			if (iarg >= 0)
227 				fd_too_big(p);
228 			error = EBADF;
229 		} else {
230 			/*
231 			 * We can't hold our getf(fdes) across the call to
232 			 * closeandsetf() because it creates a window for
233 			 * deadlock: if one thread is doing dup2(a, b) while
234 			 * another is doing dup2(b, a), each one will block
235 			 * waiting for the other to call releasef().  The
236 			 * solution is to increment the file reference count
237 			 * (which we have to do anyway), then releasef(fdes),
238 			 * then closeandsetf().  Incrementing f_count ensures
239 			 * that fp won't disappear after we call releasef().
240 			 * When closeandsetf() fails, we try avoid calling
241 			 * closef() because of all the side effects.
242 			 */
243 			mutex_enter(&fp->f_tlock);
244 			fp->f_count++;
245 			mutex_exit(&fp->f_tlock);
246 			releasef(fdes);
247 			if ((error = closeandsetf(iarg, fp)) == 0) {
248 				if (cmd == F_DUP2FD_CLOEXEC) {
249 					f_setfd_or(iarg, FD_CLOEXEC);
250 				} else if (cmd == F_DUP2FD_CLOFORK) {
251 					f_setfd_or(iarg, FD_CLOFORK);
252 				} else if (cmd == F_DUP3FD) {
253 					f_setfd_or(iarg, (int)arg1);
254 				}
255 				retval = iarg;
256 			} else {
257 				mutex_enter(&fp->f_tlock);
258 				if (fp->f_count > 1) {
259 					fp->f_count--;
260 					mutex_exit(&fp->f_tlock);
261 				} else {
262 					mutex_exit(&fp->f_tlock);
263 					(void) closef(fp);
264 				}
265 			}
266 			goto out;
267 		}
268 		goto done;
269 
270 	case F_SETFL:
271 		vp = fp->f_vnode;
272 		flag = fp->f_flag;
273 		if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
274 			iarg &= ~FNDELAY;
275 		if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
276 		    0) {
277 			iarg &= FMASK;
278 			mutex_enter(&fp->f_tlock);
279 			fp->f_flag &= ~FMASK | (FREAD|FWRITE);
280 			fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
281 			mutex_exit(&fp->f_tlock);
282 		}
283 		retval = 0;
284 		goto done;
285 	}
286 
287 	/*
288 	 * Finally, deal with the expensive cases.
289 	 */
290 	retval = 0;
291 	in_crit = 0;
292 	maxoffset = MAXOFF_T;
293 	datamodel = DATAMODEL_NATIVE;
294 #if defined(_SYSCALL32_IMPL)
295 	if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
296 		maxoffset = MAXOFF32_T;
297 #endif
298 
299 	vp = fp->f_vnode;
300 	flag = fp->f_flag;
301 	offset = fp->f_offset;
302 
303 	switch (cmd) {
304 	/*
305 	 * The file system and vnode layers understand and implement
306 	 * locking with flock64 structures. So here once we pass through
307 	 * the test for compatibility as defined by LFS API, (for F_SETLK,
308 	 * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
309 	 * F_FREESP) we transform the flock structure to a flock64 structure
310 	 * and send it to the lower layers. Similarly in case of GETLK and
311 	 * OFD_GETLK the returned flock64 structure is transformed to a flock
312 	 * structure if everything fits in nicely, otherwise we return
313 	 * EOVERFLOW.
314 	 */
315 
316 	case F_GETLK:
317 	case F_O_GETLK:
318 	case F_SETLK:
319 	case F_SETLKW:
320 	case F_SETLK_NBMAND:
321 	case F_OFD_GETLK:
322 	case F_OFD_SETLK:
323 	case F_OFD_SETLKW:
324 	case F_FLOCK:
325 	case F_FLOCKW:
326 
327 		/*
328 		 * Copy in input fields only.
329 		 */
330 
331 		if (cmd == F_O_GETLK) {
332 			if (datamodel != DATAMODEL_ILP32) {
333 				error = EINVAL;
334 				break;
335 			}
336 
337 			if (copyin((void *)arg, &obf, sizeof (obf))) {
338 				error = EFAULT;
339 				break;
340 			}
341 			bf.l_type = obf.l_type;
342 			bf.l_whence = obf.l_whence;
343 			bf.l_start = (off64_t)obf.l_start;
344 			bf.l_len = (off64_t)obf.l_len;
345 			bf.l_sysid = (int)obf.l_sysid;
346 			bf.l_pid = obf.l_pid;
347 		} else if (datamodel == DATAMODEL_NATIVE) {
348 			if (copyin((void *)arg, &sbf, sizeof (sbf))) {
349 				error = EFAULT;
350 				break;
351 			}
352 			/*
353 			 * XXX	In an LP64 kernel with an LP64 application
354 			 *	there's no need to do a structure copy here
355 			 *	struct flock == struct flock64. However,
356 			 *	we did it this way to avoid more conditional
357 			 *	compilation.
358 			 */
359 			bf.l_type = sbf.l_type;
360 			bf.l_whence = sbf.l_whence;
361 			bf.l_start = (off64_t)sbf.l_start;
362 			bf.l_len = (off64_t)sbf.l_len;
363 			bf.l_sysid = sbf.l_sysid;
364 			bf.l_pid = sbf.l_pid;
365 		}
366 #if defined(_SYSCALL32_IMPL)
367 		else {
368 			struct flock32 sbf32;
369 			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
370 				error = EFAULT;
371 				break;
372 			}
373 			bf.l_type = sbf32.l_type;
374 			bf.l_whence = sbf32.l_whence;
375 			bf.l_start = (off64_t)sbf32.l_start;
376 			bf.l_len = (off64_t)sbf32.l_len;
377 			bf.l_sysid = sbf32.l_sysid;
378 			bf.l_pid = sbf32.l_pid;
379 		}
380 #endif /* _SYSCALL32_IMPL */
381 
382 		/*
383 		 * 64-bit support: check for overflow for 32-bit lock ops
384 		 */
385 		if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
386 			break;
387 
388 		if (cmd == F_FLOCK || cmd == F_FLOCKW) {
389 			/* FLOCK* locking is always over the entire file. */
390 			if (bf.l_whence != 0 || bf.l_start != 0 ||
391 			    bf.l_len != 0) {
392 				error = EINVAL;
393 				break;
394 			}
395 			if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
396 				error = EINVAL;
397 				break;
398 			}
399 		}
400 
401 		if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
402 		    cmd == F_OFD_SETLKW) {
403 			/*
404 			 * TBD OFD-style locking is currently limited to
405 			 * covering the entire file.
406 			 */
407 			if (bf.l_whence != 0 || bf.l_start != 0 ||
408 			    bf.l_len != 0) {
409 				error = EINVAL;
410 				break;
411 			}
412 		}
413 
414 		/*
415 		 * Not all of the filesystems understand F_O_GETLK, and
416 		 * there's no need for them to know.  Map it to F_GETLK.
417 		 *
418 		 * The *_frlock functions in the various file systems basically
419 		 * do some validation and then funnel everything through the
420 		 * fs_frlock function. For OFD-style locks fs_frlock will do
421 		 * nothing so that once control returns here we can call the
422 		 * ofdlock function with the correct fp. For OFD-style locks
423 		 * the unsupported remote file systems, such as NFS, detect and
424 		 * reject the OFD-style cmd argument.
425 		 */
426 		if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
427 		    &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
428 			break;
429 
430 		if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
431 		    cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
432 			/*
433 			 * This is an OFD-style lock so we need to handle it
434 			 * here. Because OFD-style locks are associated with
435 			 * the file_t we didn't have enough info down the
436 			 * VOP_FRLOCK path immediately above.
437 			 */
438 			if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
439 				break;
440 		}
441 
442 		/*
443 		 * If command is GETLK and no lock is found, only
444 		 * the type field is changed.
445 		 */
446 		if ((cmd == F_O_GETLK || cmd == F_GETLK ||
447 		    cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
448 			/* l_type always first entry, always a short */
449 			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
450 			    sizeof (bf.l_type)))
451 				error = EFAULT;
452 			break;
453 		}
454 
455 		if (cmd == F_O_GETLK) {
456 			/*
457 			 * Return an SVR3 flock structure to the user.
458 			 */
459 			obf.l_type = (int16_t)bf.l_type;
460 			obf.l_whence = (int16_t)bf.l_whence;
461 			obf.l_start = (int32_t)bf.l_start;
462 			obf.l_len = (int32_t)bf.l_len;
463 			if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
464 				/*
465 				 * One or both values for the above fields
466 				 * is too large to store in an SVR3 flock
467 				 * structure.
468 				 */
469 				error = EOVERFLOW;
470 				break;
471 			}
472 			obf.l_sysid = (int16_t)bf.l_sysid;
473 			obf.l_pid = (int16_t)bf.l_pid;
474 			if (copyout(&obf, (void *)arg, sizeof (obf)))
475 				error = EFAULT;
476 		} else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
477 			/*
478 			 * Copy out SVR4 flock.
479 			 */
480 			int i;
481 
482 			if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
483 				error = EOVERFLOW;
484 				break;
485 			}
486 
487 			if (datamodel == DATAMODEL_NATIVE) {
488 				for (i = 0; i < 4; i++)
489 					sbf.l_pad[i] = 0;
490 				/*
491 				 * XXX	In an LP64 kernel with an LP64
492 				 *	application there's no need to do a
493 				 *	structure copy here as currently
494 				 *	struct flock == struct flock64.
495 				 *	We did it this way to avoid more
496 				 *	conditional compilation.
497 				 */
498 				sbf.l_type = bf.l_type;
499 				sbf.l_whence = bf.l_whence;
500 				sbf.l_start = (off_t)bf.l_start;
501 				sbf.l_len = (off_t)bf.l_len;
502 				sbf.l_sysid = bf.l_sysid;
503 				sbf.l_pid = bf.l_pid;
504 				if (copyout(&sbf, (void *)arg, sizeof (sbf)))
505 					error = EFAULT;
506 			}
507 #if defined(_SYSCALL32_IMPL)
508 			else {
509 				struct flock32 sbf32;
510 				if (bf.l_start > MAXOFF32_T ||
511 				    bf.l_len > MAXOFF32_T) {
512 					error = EOVERFLOW;
513 					break;
514 				}
515 				for (i = 0; i < 4; i++)
516 					sbf32.l_pad[i] = 0;
517 				sbf32.l_type = (int16_t)bf.l_type;
518 				sbf32.l_whence = (int16_t)bf.l_whence;
519 				sbf32.l_start = (off32_t)bf.l_start;
520 				sbf32.l_len = (off32_t)bf.l_len;
521 				sbf32.l_sysid = (int32_t)bf.l_sysid;
522 				sbf32.l_pid = (pid32_t)bf.l_pid;
523 				if (copyout(&sbf32,
524 				    (void *)arg, sizeof (sbf32)))
525 					error = EFAULT;
526 			}
527 #endif
528 		}
529 		break;
530 
531 	case F_CHKFL:
532 		/*
533 		 * This is for internal use only, to allow the vnode layer
534 		 * to validate a flags setting before applying it.  User
535 		 * programs can't issue it.
536 		 */
537 		error = EINVAL;
538 		break;
539 
540 	case F_ALLOCSP:
541 	case F_FREESP:
542 	case F_ALLOCSP64:
543 	case F_FREESP64:
544 		/*
545 		 * Test for not-a-regular-file (and returning EINVAL)
546 		 * before testing for open-for-writing (and returning EBADF).
547 		 * This is relied upon by posix_fallocate() in libc.
548 		 */
549 		if (vp->v_type != VREG) {
550 			error = EINVAL;
551 			break;
552 		}
553 
554 		if ((flag & FWRITE) == 0) {
555 			error = EBADF;
556 			break;
557 		}
558 
559 		if (datamodel != DATAMODEL_ILP32 &&
560 		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
561 			error = EINVAL;
562 			break;
563 		}
564 
565 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
566 		if (datamodel == DATAMODEL_ILP32 &&
567 		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
568 			struct flock32 sbf32;
569 			/*
570 			 * For compatibility we overlay an SVR3 flock on an SVR4
571 			 * flock.  This works because the input field offsets
572 			 * in "struct flock" were preserved.
573 			 */
574 			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
575 				error = EFAULT;
576 				break;
577 			} else {
578 				bf.l_type = sbf32.l_type;
579 				bf.l_whence = sbf32.l_whence;
580 				bf.l_start = (off64_t)sbf32.l_start;
581 				bf.l_len = (off64_t)sbf32.l_len;
582 				bf.l_sysid = sbf32.l_sysid;
583 				bf.l_pid = sbf32.l_pid;
584 			}
585 		}
586 #endif /* _ILP32 || _SYSCALL32_IMPL */
587 
588 #if defined(_LP64)
589 		if (datamodel == DATAMODEL_LP64 &&
590 		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
591 			if (copyin((void *)arg, &bf, sizeof (bf))) {
592 				error = EFAULT;
593 				break;
594 			}
595 		}
596 #endif /* defined(_LP64) */
597 
598 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
599 		if (datamodel == DATAMODEL_ILP32 &&
600 		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
601 			if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
602 				error = EFAULT;
603 				break;
604 			} else {
605 				/*
606 				 * Note that the size of flock64 is different in
607 				 * the ILP32 and LP64 models, due to the l_pad
608 				 * field. We do not want to assume that the
609 				 * flock64 structure is laid out the same in
610 				 * ILP32 and LP64 environments, so we will
611 				 * copy in the ILP32 version of flock64
612 				 * explicitly and copy it to the native
613 				 * flock64 structure.
614 				 */
615 				bf.l_type = (short)bf64_32.l_type;
616 				bf.l_whence = (short)bf64_32.l_whence;
617 				bf.l_start = bf64_32.l_start;
618 				bf.l_len = bf64_32.l_len;
619 				bf.l_sysid = (int)bf64_32.l_sysid;
620 				bf.l_pid = (pid_t)bf64_32.l_pid;
621 			}
622 		}
623 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
624 
625 		if (cmd == F_ALLOCSP || cmd == F_FREESP)
626 			error = flock_check(vp, &bf, offset, maxoffset);
627 		else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
628 			error = flock_check(vp, &bf, offset, MAXOFFSET_T);
629 		if (error)
630 			break;
631 
632 		if (vp->v_type == VREG && bf.l_len == 0 &&
633 		    bf.l_start > OFFSET_MAX(fp)) {
634 			error = EFBIG;
635 			break;
636 		}
637 
638 		/*
639 		 * Make sure that there are no conflicting non-blocking
640 		 * mandatory locks in the region being manipulated. If
641 		 * there are such locks then return EACCES.
642 		 */
643 		if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
644 			break;
645 
646 		if (nbl_need_check(vp)) {
647 			u_offset_t	begin;
648 			ssize_t		length;
649 
650 			nbl_start_crit(vp, RW_READER);
651 			in_crit = 1;
652 			vattr.va_mask = AT_SIZE;
653 			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
654 			    != 0)
655 				break;
656 			begin = start > vattr.va_size ? vattr.va_size : start;
657 			length = vattr.va_size > start ? vattr.va_size - start :
658 			    start - vattr.va_size;
659 			if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
660 			    NULL)) {
661 				error = EACCES;
662 				break;
663 			}
664 		}
665 
666 		if (cmd == F_ALLOCSP64)
667 			cmd = F_ALLOCSP;
668 		else if (cmd == F_FREESP64)
669 			cmd = F_FREESP;
670 
671 		error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
672 
673 		break;
674 
675 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
676 	case F_GETLK64:
677 	case F_SETLK64:
678 	case F_SETLKW64:
679 	case F_SETLK64_NBMAND:
680 	case F_OFD_GETLK64:
681 	case F_OFD_SETLK64:
682 	case F_OFD_SETLKW64:
683 	case F_FLOCK64:
684 	case F_FLOCKW64:
685 		/*
686 		 * Large Files: Here we set cmd as *LK and send it to
687 		 * lower layers. *LK64 is only for the user land.
688 		 * Most of the comments described above for F_SETLK
689 		 * applies here too.
690 		 * Large File support is only needed for ILP32 apps!
691 		 */
692 		if (datamodel != DATAMODEL_ILP32) {
693 			error = EINVAL;
694 			break;
695 		}
696 
697 		if (cmd == F_GETLK64)
698 			cmd = F_GETLK;
699 		else if (cmd == F_SETLK64)
700 			cmd = F_SETLK;
701 		else if (cmd == F_SETLKW64)
702 			cmd = F_SETLKW;
703 		else if (cmd == F_SETLK64_NBMAND)
704 			cmd = F_SETLK_NBMAND;
705 		else if (cmd == F_OFD_GETLK64)
706 			cmd = F_OFD_GETLK;
707 		else if (cmd == F_OFD_SETLK64)
708 			cmd = F_OFD_SETLK;
709 		else if (cmd == F_OFD_SETLKW64)
710 			cmd = F_OFD_SETLKW;
711 		else if (cmd == F_FLOCK64)
712 			cmd = F_FLOCK;
713 		else if (cmd == F_FLOCKW64)
714 			cmd = F_FLOCKW;
715 
716 		/*
717 		 * Note that the size of flock64 is different in the ILP32
718 		 * and LP64 models, due to the sucking l_pad field.
719 		 * We do not want to assume that the flock64 structure is
720 		 * laid out in the same in ILP32 and LP64 environments, so
721 		 * we will copy in the ILP32 version of flock64 explicitly
722 		 * and copy it to the native flock64 structure.
723 		 */
724 
725 		if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
726 			error = EFAULT;
727 			break;
728 		}
729 
730 		bf.l_type = (short)bf64_32.l_type;
731 		bf.l_whence = (short)bf64_32.l_whence;
732 		bf.l_start = bf64_32.l_start;
733 		bf.l_len = bf64_32.l_len;
734 		bf.l_sysid = (int)bf64_32.l_sysid;
735 		bf.l_pid = (pid_t)bf64_32.l_pid;
736 
737 		if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
738 			break;
739 
740 		if (cmd == F_FLOCK || cmd == F_FLOCKW) {
741 			/* FLOCK* locking is always over the entire file. */
742 			if (bf.l_whence != 0 || bf.l_start != 0 ||
743 			    bf.l_len != 0) {
744 				error = EINVAL;
745 				break;
746 			}
747 			if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
748 				error = EINVAL;
749 				break;
750 			}
751 		}
752 
753 		if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
754 		    cmd == F_OFD_SETLKW) {
755 			/*
756 			 * TBD OFD-style locking is currently limited to
757 			 * covering the entire file.
758 			 */
759 			if (bf.l_whence != 0 || bf.l_start != 0 ||
760 			    bf.l_len != 0) {
761 				error = EINVAL;
762 				break;
763 			}
764 		}
765 
766 		/*
767 		 * The *_frlock functions in the various file systems basically
768 		 * do some validation and then funnel everything through the
769 		 * fs_frlock function. For OFD-style locks fs_frlock will do
770 		 * nothing so that once control returns here we can call the
771 		 * ofdlock function with the correct fp. For OFD-style locks
772 		 * the unsupported remote file systems, such as NFS, detect and
773 		 * reject the OFD-style cmd argument.
774 		 */
775 		if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
776 		    NULL, fp->f_cred, NULL)) != 0)
777 			break;
778 
779 		if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
780 		    cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
781 			/*
782 			 * This is an OFD-style lock so we need to handle it
783 			 * here. Because OFD-style locks are associated with
784 			 * the file_t we didn't have enough info down the
785 			 * VOP_FRLOCK path immediately above.
786 			 */
787 			if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
788 				break;
789 		}
790 
791 		if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
792 		    bf.l_type == F_UNLCK) {
793 			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
794 			    sizeof (bf.l_type)))
795 				error = EFAULT;
796 			break;
797 		}
798 
799 		if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
800 			int i;
801 
802 			/*
803 			 * We do not want to assume that the flock64 structure
804 			 * is laid out in the same in ILP32 and LP64
805 			 * environments, so we will copy out the ILP32 version
806 			 * of flock64 explicitly after copying the native
807 			 * flock64 structure to it.
808 			 */
809 			for (i = 0; i < 4; i++)
810 				bf64_32.l_pad[i] = 0;
811 			bf64_32.l_type = (int16_t)bf.l_type;
812 			bf64_32.l_whence = (int16_t)bf.l_whence;
813 			bf64_32.l_start = bf.l_start;
814 			bf64_32.l_len = bf.l_len;
815 			bf64_32.l_sysid = (int32_t)bf.l_sysid;
816 			bf64_32.l_pid = (pid32_t)bf.l_pid;
817 			if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
818 				error = EFAULT;
819 		}
820 		break;
821 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
822 
823 	case F_SHARE:
824 	case F_SHARE_NBMAND:
825 	case F_UNSHARE:
826 
827 		/*
828 		 * Copy in input fields only.
829 		 */
830 		if (copyin((void *)arg, &fsh, sizeof (fsh))) {
831 			error = EFAULT;
832 			break;
833 		}
834 
835 		/*
836 		 * Local share reservations always have this simple form
837 		 */
838 		shr.s_access = fsh.f_access;
839 		shr.s_deny = fsh.f_deny;
840 		shr.s_sysid = 0;
841 		shr.s_pid = ttoproc(curthread)->p_pid;
842 		shr_own.sl_pid = shr.s_pid;
843 		shr_own.sl_id = fsh.f_id;
844 		shr.s_own_len = sizeof (shr_own);
845 		shr.s_owner = (caddr_t)&shr_own;
846 		error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
847 		break;
848 
849 	default:
850 		error = EINVAL;
851 		break;
852 	}
853 
854 	if (in_crit)
855 		nbl_end_crit(vp);
856 
857 done:
858 	releasef(fdes);
859 out:
860 	if (error)
861 		return (set_errno(error));
862 	return (retval);
863 }
864 
865 int
866 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
867 {
868 	struct vattr	vattr;
869 	int	error;
870 	u_offset_t start, end;
871 
872 	/*
873 	 * Determine the starting point of the request
874 	 */
875 	switch (flp->l_whence) {
876 	case 0:		/* SEEK_SET */
877 		start = (u_offset_t)flp->l_start;
878 		if (start > max)
879 			return (EINVAL);
880 		break;
881 	case 1:		/* SEEK_CUR */
882 		if (flp->l_start > (max - offset))
883 			return (EOVERFLOW);
884 		start = (u_offset_t)(flp->l_start + offset);
885 		if (start > max)
886 			return (EINVAL);
887 		break;
888 	case 2:		/* SEEK_END */
889 		vattr.va_mask = AT_SIZE;
890 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
891 			return (error);
892 		if (flp->l_start > (max - (offset_t)vattr.va_size))
893 			return (EOVERFLOW);
894 		start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
895 		if (start > max)
896 			return (EINVAL);
897 		break;
898 	default:
899 		return (EINVAL);
900 	}
901 
902 	/*
903 	 * Determine the range covered by the request.
904 	 */
905 	if (flp->l_len == 0)
906 		end = MAXEND;
907 	else if ((offset_t)flp->l_len > 0) {
908 		if (flp->l_len > (max - start + 1))
909 			return (EOVERFLOW);
910 		end = (u_offset_t)(start + (flp->l_len - 1));
911 		ASSERT(end <= max);
912 	} else {
913 		/*
914 		 * Negative length; why do we even allow this ?
915 		 * Because this allows easy specification of
916 		 * the last n bytes of the file.
917 		 */
918 		end = start;
919 		start += (u_offset_t)flp->l_len;
920 		(start)++;
921 		if (start > max)
922 			return (EINVAL);
923 		ASSERT(end <= max);
924 	}
925 	ASSERT(start <= max);
926 	if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
927 	    end == (offset_t)max) {
928 		flp->l_len = 0;
929 	}
930 	if (start  > end)
931 		return (EINVAL);
932 	return (0);
933 }
934 
935 static int
936 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
937 {
938 	struct vattr	vattr;
939 	int	error;
940 
941 	/*
942 	 * Determine the starting point of the request. Assume that it is
943 	 * a valid starting point.
944 	 */
945 	switch (flp->l_whence) {
946 	case 0:		/* SEEK_SET */
947 		*start = (u_offset_t)flp->l_start;
948 		break;
949 	case 1:		/* SEEK_CUR */
950 		*start = (u_offset_t)(flp->l_start + offset);
951 		break;
952 	case 2:		/* SEEK_END */
953 		vattr.va_mask = AT_SIZE;
954 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
955 			return (error);
956 		*start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
957 		break;
958 	default:
959 		return (EINVAL);
960 	}
961 
962 	return (0);
963 }
964 
965 /*
966  * Take rctl action when the requested file descriptor is too big.
967  */
968 static void
969 fd_too_big(proc_t *p)
970 {
971 	mutex_enter(&p->p_lock);
972 	(void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
973 	    p->p_rctls, p, RCA_SAFE);
974 	mutex_exit(&p->p_lock);
975 }
976