1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. 25 * Copyright 2018, Joyent, Inc. 26 * Copyright 2024 Oxide Computer Company 27 */ 28 29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 /* 33 * Portions of this source code were derived from Berkeley 4.3 BSD 34 * under license from the Regents of the University of California. 35 */ 36 37 38 #include <sys/param.h> 39 #include <sys/isa_defs.h> 40 #include <sys/types.h> 41 #include <sys/sysmacros.h> 42 #include <sys/systm.h> 43 #include <sys/errno.h> 44 #include <sys/fcntl.h> 45 #include <sys/flock.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/mode.h> 49 #include <sys/proc.h> 50 #include <sys/filio.h> 51 #include <sys/share.h> 52 #include <sys/debug.h> 53 #include <sys/rctl.h> 54 #include <sys/nbmlock.h> 55 56 #include <sys/cmn_err.h> 57 58 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t); 59 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *); 60 static void fd_too_big(proc_t *); 61 62 /* 63 * File control. 64 */ 65 int 66 fcntl(int fdes, int cmd, intptr_t arg, intptr_t arg1) 67 { 68 int iarg; 69 int error = 0; 70 int retval; 71 proc_t *p; 72 file_t *fp; 73 vnode_t *vp; 74 u_offset_t offset; 75 u_offset_t start; 76 struct vattr vattr; 77 int in_crit; 78 int flag; 79 struct flock sbf; 80 struct flock64 bf; 81 struct o_flock obf; 82 struct flock64_32 bf64_32; 83 struct fshare fsh; 84 struct shrlock shr; 85 struct shr_locowner shr_own; 86 offset_t maxoffset; 87 model_t datamodel; 88 int fdres; 89 90 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32) 91 ASSERT(sizeof (struct flock) == sizeof (struct flock32)); 92 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32)); 93 #endif 94 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32) 95 ASSERT(sizeof (struct flock) == sizeof (struct flock64_64)); 96 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64)); 97 #endif 98 99 /* 100 * Most fcntl() calls take either 2 or 3 arguments. The introduction of 101 * F_DUP3FD added a version that takes a 4th argument (referred to as 102 * arg1). While fcntl() traditionally has had loose validation, we 103 * strictly validate this new arg. 104 */ 105 switch (cmd) { 106 case F_DUP3FD: 107 if ((arg1 & ~(FD_CLOEXEC | FD_CLOFORK)) != 0) { 108 error = EINVAL; 109 goto out; 110 } 111 break; 112 default: 113 if (arg1 != 0) { 114 error = EINVAL; 115 goto out; 116 } 117 break; 118 } 119 120 /* 121 * First, for speed, deal with the subset of cases 122 * that do not require getf() / releasef(). 123 */ 124 switch (cmd) { 125 case F_GETFD: 126 if ((error = f_getfd_error(fdes, &flag)) == 0) 127 retval = flag; 128 goto out; 129 130 case F_SETFD: 131 error = f_setfd_error(fdes, (int)arg); 132 retval = 0; 133 goto out; 134 135 case F_GETFL: 136 if ((error = f_getfl(fdes, &flag)) == 0) { 137 retval = (flag & (FMASK | FASYNC)); 138 if ((flag & (FSEARCH | FEXEC)) == 0) 139 retval += FOPEN; 140 else 141 retval |= (flag & (FSEARCH | FEXEC)); 142 } 143 goto out; 144 145 case F_GETXFL: 146 if ((error = f_getfl(fdes, &flag)) == 0) { 147 retval = flag; 148 if ((flag & (FSEARCH | FEXEC)) == 0) 149 retval += FOPEN; 150 } 151 goto out; 152 153 case F_BADFD: 154 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0) 155 retval = fdres; 156 goto out; 157 } 158 159 /* 160 * Second, for speed, deal with the subset of cases that 161 * require getf() / releasef() but do not require copyin. 162 */ 163 if ((fp = getf(fdes)) == NULL) { 164 error = EBADF; 165 goto out; 166 } 167 iarg = (int)arg; 168 169 switch (cmd) { 170 case F_DUPFD: 171 case F_DUPFD_CLOEXEC: 172 case F_DUPFD_CLOFORK: 173 p = curproc; 174 if ((uint_t)iarg >= p->p_fno_ctl) { 175 if (iarg >= 0) 176 fd_too_big(p); 177 error = EINVAL; 178 goto done; 179 } 180 /* 181 * We need to increment the f_count reference counter 182 * before allocating a new file descriptor. 183 * Doing it other way round opens a window for race condition 184 * with closeandsetf() on the target file descriptor which can 185 * close the file still referenced by the original 186 * file descriptor. 187 */ 188 mutex_enter(&fp->f_tlock); 189 fp->f_count++; 190 mutex_exit(&fp->f_tlock); 191 if ((retval = ufalloc_file(iarg, fp)) == -1) { 192 /* 193 * New file descriptor can't be allocated. 194 * Revert the reference count. 195 */ 196 mutex_enter(&fp->f_tlock); 197 fp->f_count--; 198 mutex_exit(&fp->f_tlock); 199 error = EMFILE; 200 } else { 201 if (cmd == F_DUPFD_CLOEXEC) { 202 f_setfd_or(retval, FD_CLOEXEC); 203 } 204 205 if (cmd == F_DUPFD_CLOFORK) { 206 f_setfd_or(retval, FD_CLOFORK); 207 } 208 } 209 goto done; 210 211 case F_DUP2FD_CLOEXEC: 212 case F_DUP2FD_CLOFORK: 213 if (fdes == iarg) { 214 error = EINVAL; 215 goto done; 216 } 217 218 /*FALLTHROUGH*/ 219 220 case F_DUP2FD: 221 case F_DUP3FD: 222 p = curproc; 223 if (fdes == iarg) { 224 retval = iarg; 225 } else if ((uint_t)iarg >= p->p_fno_ctl) { 226 if (iarg >= 0) 227 fd_too_big(p); 228 error = EBADF; 229 } else { 230 /* 231 * We can't hold our getf(fdes) across the call to 232 * closeandsetf() because it creates a window for 233 * deadlock: if one thread is doing dup2(a, b) while 234 * another is doing dup2(b, a), each one will block 235 * waiting for the other to call releasef(). The 236 * solution is to increment the file reference count 237 * (which we have to do anyway), then releasef(fdes), 238 * then closeandsetf(). Incrementing f_count ensures 239 * that fp won't disappear after we call releasef(). 240 * When closeandsetf() fails, we try avoid calling 241 * closef() because of all the side effects. 242 */ 243 mutex_enter(&fp->f_tlock); 244 fp->f_count++; 245 mutex_exit(&fp->f_tlock); 246 releasef(fdes); 247 if ((error = closeandsetf(iarg, fp)) == 0) { 248 if (cmd == F_DUP2FD_CLOEXEC) { 249 f_setfd_or(iarg, FD_CLOEXEC); 250 } else if (cmd == F_DUP2FD_CLOFORK) { 251 f_setfd_or(iarg, FD_CLOFORK); 252 } else if (cmd == F_DUP3FD) { 253 f_setfd_or(iarg, (int)arg1); 254 } 255 retval = iarg; 256 } else { 257 mutex_enter(&fp->f_tlock); 258 if (fp->f_count > 1) { 259 fp->f_count--; 260 mutex_exit(&fp->f_tlock); 261 } else { 262 mutex_exit(&fp->f_tlock); 263 (void) closef(fp); 264 } 265 } 266 goto out; 267 } 268 goto done; 269 270 case F_SETFL: 271 vp = fp->f_vnode; 272 flag = fp->f_flag; 273 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY)) 274 iarg &= ~FNDELAY; 275 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) == 276 0) { 277 iarg &= FMASK; 278 mutex_enter(&fp->f_tlock); 279 fp->f_flag &= ~FMASK | (FREAD|FWRITE); 280 fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE); 281 mutex_exit(&fp->f_tlock); 282 } 283 retval = 0; 284 goto done; 285 } 286 287 /* 288 * Finally, deal with the expensive cases. 289 */ 290 retval = 0; 291 in_crit = 0; 292 maxoffset = MAXOFF_T; 293 datamodel = DATAMODEL_NATIVE; 294 #if defined(_SYSCALL32_IMPL) 295 if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32) 296 maxoffset = MAXOFF32_T; 297 #endif 298 299 vp = fp->f_vnode; 300 flag = fp->f_flag; 301 offset = fp->f_offset; 302 303 switch (cmd) { 304 /* 305 * The file system and vnode layers understand and implement 306 * locking with flock64 structures. So here once we pass through 307 * the test for compatibility as defined by LFS API, (for F_SETLK, 308 * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW, 309 * F_FREESP) we transform the flock structure to a flock64 structure 310 * and send it to the lower layers. Similarly in case of GETLK and 311 * OFD_GETLK the returned flock64 structure is transformed to a flock 312 * structure if everything fits in nicely, otherwise we return 313 * EOVERFLOW. 314 */ 315 316 case F_GETLK: 317 case F_O_GETLK: 318 case F_SETLK: 319 case F_SETLKW: 320 case F_SETLK_NBMAND: 321 case F_OFD_GETLK: 322 case F_OFD_SETLK: 323 case F_OFD_SETLKW: 324 case F_FLOCK: 325 case F_FLOCKW: 326 327 /* 328 * Copy in input fields only. 329 */ 330 331 if (cmd == F_O_GETLK) { 332 if (datamodel != DATAMODEL_ILP32) { 333 error = EINVAL; 334 break; 335 } 336 337 if (copyin((void *)arg, &obf, sizeof (obf))) { 338 error = EFAULT; 339 break; 340 } 341 bf.l_type = obf.l_type; 342 bf.l_whence = obf.l_whence; 343 bf.l_start = (off64_t)obf.l_start; 344 bf.l_len = (off64_t)obf.l_len; 345 bf.l_sysid = (int)obf.l_sysid; 346 bf.l_pid = obf.l_pid; 347 } else if (datamodel == DATAMODEL_NATIVE) { 348 if (copyin((void *)arg, &sbf, sizeof (sbf))) { 349 error = EFAULT; 350 break; 351 } 352 /* 353 * XXX In an LP64 kernel with an LP64 application 354 * there's no need to do a structure copy here 355 * struct flock == struct flock64. However, 356 * we did it this way to avoid more conditional 357 * compilation. 358 */ 359 bf.l_type = sbf.l_type; 360 bf.l_whence = sbf.l_whence; 361 bf.l_start = (off64_t)sbf.l_start; 362 bf.l_len = (off64_t)sbf.l_len; 363 bf.l_sysid = sbf.l_sysid; 364 bf.l_pid = sbf.l_pid; 365 } 366 #if defined(_SYSCALL32_IMPL) 367 else { 368 struct flock32 sbf32; 369 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) { 370 error = EFAULT; 371 break; 372 } 373 bf.l_type = sbf32.l_type; 374 bf.l_whence = sbf32.l_whence; 375 bf.l_start = (off64_t)sbf32.l_start; 376 bf.l_len = (off64_t)sbf32.l_len; 377 bf.l_sysid = sbf32.l_sysid; 378 bf.l_pid = sbf32.l_pid; 379 } 380 #endif /* _SYSCALL32_IMPL */ 381 382 /* 383 * 64-bit support: check for overflow for 32-bit lock ops 384 */ 385 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0) 386 break; 387 388 if (cmd == F_FLOCK || cmd == F_FLOCKW) { 389 /* FLOCK* locking is always over the entire file. */ 390 if (bf.l_whence != 0 || bf.l_start != 0 || 391 bf.l_len != 0) { 392 error = EINVAL; 393 break; 394 } 395 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) { 396 error = EINVAL; 397 break; 398 } 399 } 400 401 if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK || 402 cmd == F_OFD_SETLKW) { 403 /* 404 * TBD OFD-style locking is currently limited to 405 * covering the entire file. 406 */ 407 if (bf.l_whence != 0 || bf.l_start != 0 || 408 bf.l_len != 0) { 409 error = EINVAL; 410 break; 411 } 412 } 413 414 /* 415 * Not all of the filesystems understand F_O_GETLK, and 416 * there's no need for them to know. Map it to F_GETLK. 417 * 418 * The *_frlock functions in the various file systems basically 419 * do some validation and then funnel everything through the 420 * fs_frlock function. For OFD-style locks fs_frlock will do 421 * nothing so that once control returns here we can call the 422 * ofdlock function with the correct fp. For OFD-style locks 423 * the unsupported remote file systems, such as NFS, detect and 424 * reject the OFD-style cmd argument. 425 */ 426 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd, 427 &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0) 428 break; 429 430 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK || 431 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) { 432 /* 433 * This is an OFD-style lock so we need to handle it 434 * here. Because OFD-style locks are associated with 435 * the file_t we didn't have enough info down the 436 * VOP_FRLOCK path immediately above. 437 */ 438 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0) 439 break; 440 } 441 442 /* 443 * If command is GETLK and no lock is found, only 444 * the type field is changed. 445 */ 446 if ((cmd == F_O_GETLK || cmd == F_GETLK || 447 cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) { 448 /* l_type always first entry, always a short */ 449 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type, 450 sizeof (bf.l_type))) 451 error = EFAULT; 452 break; 453 } 454 455 if (cmd == F_O_GETLK) { 456 /* 457 * Return an SVR3 flock structure to the user. 458 */ 459 obf.l_type = (int16_t)bf.l_type; 460 obf.l_whence = (int16_t)bf.l_whence; 461 obf.l_start = (int32_t)bf.l_start; 462 obf.l_len = (int32_t)bf.l_len; 463 if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) { 464 /* 465 * One or both values for the above fields 466 * is too large to store in an SVR3 flock 467 * structure. 468 */ 469 error = EOVERFLOW; 470 break; 471 } 472 obf.l_sysid = (int16_t)bf.l_sysid; 473 obf.l_pid = (int16_t)bf.l_pid; 474 if (copyout(&obf, (void *)arg, sizeof (obf))) 475 error = EFAULT; 476 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) { 477 /* 478 * Copy out SVR4 flock. 479 */ 480 int i; 481 482 if (bf.l_start > maxoffset || bf.l_len > maxoffset) { 483 error = EOVERFLOW; 484 break; 485 } 486 487 if (datamodel == DATAMODEL_NATIVE) { 488 for (i = 0; i < 4; i++) 489 sbf.l_pad[i] = 0; 490 /* 491 * XXX In an LP64 kernel with an LP64 492 * application there's no need to do a 493 * structure copy here as currently 494 * struct flock == struct flock64. 495 * We did it this way to avoid more 496 * conditional compilation. 497 */ 498 sbf.l_type = bf.l_type; 499 sbf.l_whence = bf.l_whence; 500 sbf.l_start = (off_t)bf.l_start; 501 sbf.l_len = (off_t)bf.l_len; 502 sbf.l_sysid = bf.l_sysid; 503 sbf.l_pid = bf.l_pid; 504 if (copyout(&sbf, (void *)arg, sizeof (sbf))) 505 error = EFAULT; 506 } 507 #if defined(_SYSCALL32_IMPL) 508 else { 509 struct flock32 sbf32; 510 if (bf.l_start > MAXOFF32_T || 511 bf.l_len > MAXOFF32_T) { 512 error = EOVERFLOW; 513 break; 514 } 515 for (i = 0; i < 4; i++) 516 sbf32.l_pad[i] = 0; 517 sbf32.l_type = (int16_t)bf.l_type; 518 sbf32.l_whence = (int16_t)bf.l_whence; 519 sbf32.l_start = (off32_t)bf.l_start; 520 sbf32.l_len = (off32_t)bf.l_len; 521 sbf32.l_sysid = (int32_t)bf.l_sysid; 522 sbf32.l_pid = (pid32_t)bf.l_pid; 523 if (copyout(&sbf32, 524 (void *)arg, sizeof (sbf32))) 525 error = EFAULT; 526 } 527 #endif 528 } 529 break; 530 531 case F_CHKFL: 532 /* 533 * This is for internal use only, to allow the vnode layer 534 * to validate a flags setting before applying it. User 535 * programs can't issue it. 536 */ 537 error = EINVAL; 538 break; 539 540 case F_ALLOCSP: 541 case F_FREESP: 542 case F_ALLOCSP64: 543 case F_FREESP64: 544 /* 545 * Test for not-a-regular-file (and returning EINVAL) 546 * before testing for open-for-writing (and returning EBADF). 547 * This is relied upon by posix_fallocate() in libc. 548 */ 549 if (vp->v_type != VREG) { 550 error = EINVAL; 551 break; 552 } 553 554 if ((flag & FWRITE) == 0) { 555 error = EBADF; 556 break; 557 } 558 559 if (datamodel != DATAMODEL_ILP32 && 560 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) { 561 error = EINVAL; 562 break; 563 } 564 565 #if defined(_ILP32) || defined(_SYSCALL32_IMPL) 566 if (datamodel == DATAMODEL_ILP32 && 567 (cmd == F_ALLOCSP || cmd == F_FREESP)) { 568 struct flock32 sbf32; 569 /* 570 * For compatibility we overlay an SVR3 flock on an SVR4 571 * flock. This works because the input field offsets 572 * in "struct flock" were preserved. 573 */ 574 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) { 575 error = EFAULT; 576 break; 577 } else { 578 bf.l_type = sbf32.l_type; 579 bf.l_whence = sbf32.l_whence; 580 bf.l_start = (off64_t)sbf32.l_start; 581 bf.l_len = (off64_t)sbf32.l_len; 582 bf.l_sysid = sbf32.l_sysid; 583 bf.l_pid = sbf32.l_pid; 584 } 585 } 586 #endif /* _ILP32 || _SYSCALL32_IMPL */ 587 588 #if defined(_LP64) 589 if (datamodel == DATAMODEL_LP64 && 590 (cmd == F_ALLOCSP || cmd == F_FREESP)) { 591 if (copyin((void *)arg, &bf, sizeof (bf))) { 592 error = EFAULT; 593 break; 594 } 595 } 596 #endif /* defined(_LP64) */ 597 598 #if !defined(_LP64) || defined(_SYSCALL32_IMPL) 599 if (datamodel == DATAMODEL_ILP32 && 600 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) { 601 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) { 602 error = EFAULT; 603 break; 604 } else { 605 /* 606 * Note that the size of flock64 is different in 607 * the ILP32 and LP64 models, due to the l_pad 608 * field. We do not want to assume that the 609 * flock64 structure is laid out the same in 610 * ILP32 and LP64 environments, so we will 611 * copy in the ILP32 version of flock64 612 * explicitly and copy it to the native 613 * flock64 structure. 614 */ 615 bf.l_type = (short)bf64_32.l_type; 616 bf.l_whence = (short)bf64_32.l_whence; 617 bf.l_start = bf64_32.l_start; 618 bf.l_len = bf64_32.l_len; 619 bf.l_sysid = (int)bf64_32.l_sysid; 620 bf.l_pid = (pid_t)bf64_32.l_pid; 621 } 622 } 623 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */ 624 625 if (cmd == F_ALLOCSP || cmd == F_FREESP) 626 error = flock_check(vp, &bf, offset, maxoffset); 627 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64) 628 error = flock_check(vp, &bf, offset, MAXOFFSET_T); 629 if (error) 630 break; 631 632 if (vp->v_type == VREG && bf.l_len == 0 && 633 bf.l_start > OFFSET_MAX(fp)) { 634 error = EFBIG; 635 break; 636 } 637 638 /* 639 * Make sure that there are no conflicting non-blocking 640 * mandatory locks in the region being manipulated. If 641 * there are such locks then return EACCES. 642 */ 643 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0) 644 break; 645 646 if (nbl_need_check(vp)) { 647 u_offset_t begin; 648 ssize_t length; 649 650 nbl_start_crit(vp, RW_READER); 651 in_crit = 1; 652 vattr.va_mask = AT_SIZE; 653 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 654 != 0) 655 break; 656 begin = start > vattr.va_size ? vattr.va_size : start; 657 length = vattr.va_size > start ? vattr.va_size - start : 658 start - vattr.va_size; 659 if (nbl_conflict(vp, NBL_WRITE, begin, length, 0, 660 NULL)) { 661 error = EACCES; 662 break; 663 } 664 } 665 666 if (cmd == F_ALLOCSP64) 667 cmd = F_ALLOCSP; 668 else if (cmd == F_FREESP64) 669 cmd = F_FREESP; 670 671 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL); 672 673 break; 674 675 #if !defined(_LP64) || defined(_SYSCALL32_IMPL) 676 case F_GETLK64: 677 case F_SETLK64: 678 case F_SETLKW64: 679 case F_SETLK64_NBMAND: 680 case F_OFD_GETLK64: 681 case F_OFD_SETLK64: 682 case F_OFD_SETLKW64: 683 case F_FLOCK64: 684 case F_FLOCKW64: 685 /* 686 * Large Files: Here we set cmd as *LK and send it to 687 * lower layers. *LK64 is only for the user land. 688 * Most of the comments described above for F_SETLK 689 * applies here too. 690 * Large File support is only needed for ILP32 apps! 691 */ 692 if (datamodel != DATAMODEL_ILP32) { 693 error = EINVAL; 694 break; 695 } 696 697 if (cmd == F_GETLK64) 698 cmd = F_GETLK; 699 else if (cmd == F_SETLK64) 700 cmd = F_SETLK; 701 else if (cmd == F_SETLKW64) 702 cmd = F_SETLKW; 703 else if (cmd == F_SETLK64_NBMAND) 704 cmd = F_SETLK_NBMAND; 705 else if (cmd == F_OFD_GETLK64) 706 cmd = F_OFD_GETLK; 707 else if (cmd == F_OFD_SETLK64) 708 cmd = F_OFD_SETLK; 709 else if (cmd == F_OFD_SETLKW64) 710 cmd = F_OFD_SETLKW; 711 else if (cmd == F_FLOCK64) 712 cmd = F_FLOCK; 713 else if (cmd == F_FLOCKW64) 714 cmd = F_FLOCKW; 715 716 /* 717 * Note that the size of flock64 is different in the ILP32 718 * and LP64 models, due to the sucking l_pad field. 719 * We do not want to assume that the flock64 structure is 720 * laid out in the same in ILP32 and LP64 environments, so 721 * we will copy in the ILP32 version of flock64 explicitly 722 * and copy it to the native flock64 structure. 723 */ 724 725 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) { 726 error = EFAULT; 727 break; 728 } 729 730 bf.l_type = (short)bf64_32.l_type; 731 bf.l_whence = (short)bf64_32.l_whence; 732 bf.l_start = bf64_32.l_start; 733 bf.l_len = bf64_32.l_len; 734 bf.l_sysid = (int)bf64_32.l_sysid; 735 bf.l_pid = (pid_t)bf64_32.l_pid; 736 737 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0) 738 break; 739 740 if (cmd == F_FLOCK || cmd == F_FLOCKW) { 741 /* FLOCK* locking is always over the entire file. */ 742 if (bf.l_whence != 0 || bf.l_start != 0 || 743 bf.l_len != 0) { 744 error = EINVAL; 745 break; 746 } 747 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) { 748 error = EINVAL; 749 break; 750 } 751 } 752 753 if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK || 754 cmd == F_OFD_SETLKW) { 755 /* 756 * TBD OFD-style locking is currently limited to 757 * covering the entire file. 758 */ 759 if (bf.l_whence != 0 || bf.l_start != 0 || 760 bf.l_len != 0) { 761 error = EINVAL; 762 break; 763 } 764 } 765 766 /* 767 * The *_frlock functions in the various file systems basically 768 * do some validation and then funnel everything through the 769 * fs_frlock function. For OFD-style locks fs_frlock will do 770 * nothing so that once control returns here we can call the 771 * ofdlock function with the correct fp. For OFD-style locks 772 * the unsupported remote file systems, such as NFS, detect and 773 * reject the OFD-style cmd argument. 774 */ 775 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset, 776 NULL, fp->f_cred, NULL)) != 0) 777 break; 778 779 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK || 780 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) { 781 /* 782 * This is an OFD-style lock so we need to handle it 783 * here. Because OFD-style locks are associated with 784 * the file_t we didn't have enough info down the 785 * VOP_FRLOCK path immediately above. 786 */ 787 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0) 788 break; 789 } 790 791 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) && 792 bf.l_type == F_UNLCK) { 793 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type, 794 sizeof (bf.l_type))) 795 error = EFAULT; 796 break; 797 } 798 799 if (cmd == F_GETLK || cmd == F_OFD_GETLK) { 800 int i; 801 802 /* 803 * We do not want to assume that the flock64 structure 804 * is laid out in the same in ILP32 and LP64 805 * environments, so we will copy out the ILP32 version 806 * of flock64 explicitly after copying the native 807 * flock64 structure to it. 808 */ 809 for (i = 0; i < 4; i++) 810 bf64_32.l_pad[i] = 0; 811 bf64_32.l_type = (int16_t)bf.l_type; 812 bf64_32.l_whence = (int16_t)bf.l_whence; 813 bf64_32.l_start = bf.l_start; 814 bf64_32.l_len = bf.l_len; 815 bf64_32.l_sysid = (int32_t)bf.l_sysid; 816 bf64_32.l_pid = (pid32_t)bf.l_pid; 817 if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32))) 818 error = EFAULT; 819 } 820 break; 821 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */ 822 823 case F_SHARE: 824 case F_SHARE_NBMAND: 825 case F_UNSHARE: 826 827 /* 828 * Copy in input fields only. 829 */ 830 if (copyin((void *)arg, &fsh, sizeof (fsh))) { 831 error = EFAULT; 832 break; 833 } 834 835 /* 836 * Local share reservations always have this simple form 837 */ 838 shr.s_access = fsh.f_access; 839 shr.s_deny = fsh.f_deny; 840 shr.s_sysid = 0; 841 shr.s_pid = ttoproc(curthread)->p_pid; 842 shr_own.sl_pid = shr.s_pid; 843 shr_own.sl_id = fsh.f_id; 844 shr.s_own_len = sizeof (shr_own); 845 shr.s_owner = (caddr_t)&shr_own; 846 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL); 847 break; 848 849 default: 850 error = EINVAL; 851 break; 852 } 853 854 if (in_crit) 855 nbl_end_crit(vp); 856 857 done: 858 releasef(fdes); 859 out: 860 if (error) 861 return (set_errno(error)); 862 return (retval); 863 } 864 865 int 866 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max) 867 { 868 struct vattr vattr; 869 int error; 870 u_offset_t start, end; 871 872 /* 873 * Determine the starting point of the request 874 */ 875 switch (flp->l_whence) { 876 case 0: /* SEEK_SET */ 877 start = (u_offset_t)flp->l_start; 878 if (start > max) 879 return (EINVAL); 880 break; 881 case 1: /* SEEK_CUR */ 882 if (flp->l_start > (max - offset)) 883 return (EOVERFLOW); 884 start = (u_offset_t)(flp->l_start + offset); 885 if (start > max) 886 return (EINVAL); 887 break; 888 case 2: /* SEEK_END */ 889 vattr.va_mask = AT_SIZE; 890 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 891 return (error); 892 if (flp->l_start > (max - (offset_t)vattr.va_size)) 893 return (EOVERFLOW); 894 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size); 895 if (start > max) 896 return (EINVAL); 897 break; 898 default: 899 return (EINVAL); 900 } 901 902 /* 903 * Determine the range covered by the request. 904 */ 905 if (flp->l_len == 0) 906 end = MAXEND; 907 else if ((offset_t)flp->l_len > 0) { 908 if (flp->l_len > (max - start + 1)) 909 return (EOVERFLOW); 910 end = (u_offset_t)(start + (flp->l_len - 1)); 911 ASSERT(end <= max); 912 } else { 913 /* 914 * Negative length; why do we even allow this ? 915 * Because this allows easy specification of 916 * the last n bytes of the file. 917 */ 918 end = start; 919 start += (u_offset_t)flp->l_len; 920 (start)++; 921 if (start > max) 922 return (EINVAL); 923 ASSERT(end <= max); 924 } 925 ASSERT(start <= max); 926 if (flp->l_type == F_UNLCK && flp->l_len > 0 && 927 end == (offset_t)max) { 928 flp->l_len = 0; 929 } 930 if (start > end) 931 return (EINVAL); 932 return (0); 933 } 934 935 static int 936 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start) 937 { 938 struct vattr vattr; 939 int error; 940 941 /* 942 * Determine the starting point of the request. Assume that it is 943 * a valid starting point. 944 */ 945 switch (flp->l_whence) { 946 case 0: /* SEEK_SET */ 947 *start = (u_offset_t)flp->l_start; 948 break; 949 case 1: /* SEEK_CUR */ 950 *start = (u_offset_t)(flp->l_start + offset); 951 break; 952 case 2: /* SEEK_END */ 953 vattr.va_mask = AT_SIZE; 954 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 955 return (error); 956 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size); 957 break; 958 default: 959 return (EINVAL); 960 } 961 962 return (0); 963 } 964 965 /* 966 * Take rctl action when the requested file descriptor is too big. 967 */ 968 static void 969 fd_too_big(proc_t *p) 970 { 971 mutex_enter(&p->p_lock); 972 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 973 p->p_rctls, p, RCA_SAFE); 974 mutex_exit(&p->p_lock); 975 } 976