1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 41 #pragma ident "%Z%%M% %I% %E% SMI" 42 43 #include <sys/types.h> 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/cpuvar.h> 47 #include <sys/errno.h> 48 #include <sys/cred.h> 49 #include <sys/user.h> 50 #include <sys/uio.h> 51 #include <sys/vfs.h> 52 #include <sys/vnode.h> 53 #include <sys/pathname.h> 54 #include <sys/proc.h> 55 #include <sys/vtrace.h> 56 #include <sys/sysmacros.h> 57 #include <sys/debug.h> 58 #include <sys/dirent.h> 59 #include <c2/audit.h> 60 #include <sys/zone.h> 61 #include <sys/dnlc.h> 62 #include <sys/fs/snode.h> 63 64 /* Controls whether paths are stored with vnodes. */ 65 int vfs_vnode_path = 1; 66 67 int 68 lookupname( 69 char *fnamep, 70 enum uio_seg seg, 71 enum symfollow followlink, 72 vnode_t **dirvpp, 73 vnode_t **compvpp) 74 { 75 return (lookupnameat(fnamep, seg, followlink, dirvpp, compvpp, NULL)); 76 } 77 78 79 /* 80 * Lookup the user file name, 81 * Handle allocation and freeing of pathname buffer, return error. 82 */ 83 int 84 lookupnameat( 85 char *fnamep, /* user pathname */ 86 enum uio_seg seg, /* addr space that name is in */ 87 enum symfollow followlink, /* follow sym links */ 88 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */ 89 vnode_t **compvpp, /* ret for ptr to component vnode */ 90 vnode_t *startvp) /* start path search from vp */ 91 { 92 char namebuf[TYPICALMAXPATHLEN]; 93 struct pathname lookpn; 94 int error; 95 96 error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf)); 97 if (error == 0) { 98 #ifdef C2_AUDIT 99 if (audit_active) 100 audit_lookupname(); 101 #endif 102 error = lookuppnat(&lookpn, NULL, followlink, 103 dirvpp, compvpp, startvp); 104 } 105 if (error == ENAMETOOLONG) { 106 /* 107 * This thread used a pathname > TYPICALMAXPATHLEN bytes long. 108 */ 109 if (error = pn_get(fnamep, seg, &lookpn)) 110 return (error); 111 error = lookuppnat(&lookpn, NULL, followlink, 112 dirvpp, compvpp, startvp); 113 pn_free(&lookpn); 114 } 115 116 return (error); 117 } 118 119 /* 120 * Lookup the user file name from a given vp, 121 */ 122 int 123 lookuppn( 124 struct pathname *pnp, 125 struct pathname *rpnp, 126 enum symfollow followlink, 127 vnode_t **dirvpp, 128 vnode_t **compvpp) 129 { 130 return (lookuppnat(pnp, rpnp, followlink, dirvpp, compvpp, NULL)); 131 } 132 133 int 134 lookuppnat( 135 struct pathname *pnp, /* pathname to lookup */ 136 struct pathname *rpnp, /* if non-NULL, return resolved path */ 137 enum symfollow followlink, /* (don't) follow sym links */ 138 vnode_t **dirvpp, /* ptr for parent vnode */ 139 vnode_t **compvpp, /* ptr for entry vnode */ 140 vnode_t *startvp) /* start search from this vp */ 141 { 142 vnode_t *vp; /* current directory vp */ 143 vnode_t *rootvp; 144 proc_t *p = curproc; 145 146 if (pnp->pn_pathlen == 0) 147 return (ENOENT); 148 149 mutex_enter(&p->p_lock); /* for u_rdir and u_cdir */ 150 if ((rootvp = PTOU(p)->u_rdir) == NULL) 151 rootvp = rootdir; 152 else if (rootvp != rootdir) /* no need to VN_HOLD rootdir */ 153 VN_HOLD(rootvp); 154 155 if (pnp->pn_path[0] == '/') { 156 vp = rootvp; 157 } else { 158 vp = (startvp == NULL) ? PTOU(p)->u_cdir : startvp; 159 } 160 VN_HOLD(vp); 161 mutex_exit(&p->p_lock); 162 163 /* 164 * Skip over leading slashes 165 */ 166 if (pnp->pn_path[0] == '/') { 167 do { 168 pnp->pn_path++; 169 pnp->pn_pathlen--; 170 } while (pnp->pn_path[0] == '/'); 171 } 172 173 return (lookuppnvp(pnp, rpnp, followlink, dirvpp, 174 compvpp, rootvp, vp, CRED())); 175 } 176 177 /* Private flag to do our getcwd() dirty work */ 178 #define LOOKUP_CHECKREAD 0x10 179 #define LOOKUP_MASK (~LOOKUP_CHECKREAD) 180 181 /* 182 * Starting at current directory, translate pathname pnp to end. 183 * Leave pathname of final component in pnp, return the vnode 184 * for the final component in *compvpp, and return the vnode 185 * for the parent of the final component in dirvpp. 186 * 187 * This is the central routine in pathname translation and handles 188 * multiple components in pathnames, separating them at /'s. It also 189 * implements mounted file systems and processes symbolic links. 190 * 191 * vp is the vnode where the directory search should start. 192 * 193 * Reference counts: vp must be held prior to calling this function. rootvp 194 * should only be held if rootvp != rootdir. 195 */ 196 int 197 lookuppnvp( 198 struct pathname *pnp, /* pathname to lookup */ 199 struct pathname *rpnp, /* if non-NULL, return resolved path */ 200 int flags, /* follow symlinks */ 201 vnode_t **dirvpp, /* ptr for parent vnode */ 202 vnode_t **compvpp, /* ptr for entry vnode */ 203 vnode_t *rootvp, /* rootvp */ 204 vnode_t *vp, /* directory to start search at */ 205 cred_t *cr) /* user's credential */ 206 { 207 vnode_t *cvp; /* current component vp */ 208 vnode_t *tvp; /* addressable temp ptr */ 209 char component[MAXNAMELEN]; /* buffer for component (incl null) */ 210 int error; 211 int nlink; 212 int lookup_flags; 213 vnode_t *startvp; 214 vnode_t *zonevp = curproc->p_zone->zone_rootvp; /* zone root */ 215 int must_be_directory = 0; 216 217 CPU_STATS_ADDQ(CPU, sys, namei, 1); 218 nlink = 0; 219 cvp = NULL; 220 if (rpnp) 221 rpnp->pn_pathlen = 0; 222 lookup_flags = dirvpp ? LOOKUP_DIR : 0; 223 #ifdef C2_AUDIT 224 if (audit_active) 225 audit_anchorpath(pnp, vp == rootvp); 226 #endif 227 228 /* 229 * Eliminate any trailing slashes in the pathname. 230 * If there are any, we must follow all symlinks. 231 * Also, we must guarantee that the last component is a directory. 232 */ 233 if (pn_fixslash(pnp)) { 234 flags |= FOLLOW; 235 must_be_directory = 1; 236 } 237 238 startvp = vp; 239 next: 240 /* 241 * Make sure we have a directory. 242 */ 243 if (vp->v_type != VDIR) { 244 error = ENOTDIR; 245 goto bad; 246 } 247 248 if (rpnp && VN_CMP(vp, rootvp)) 249 (void) pn_set(rpnp, "/"); 250 251 /* 252 * Process the next component of the pathname. 253 */ 254 if (error = pn_getcomponent(pnp, component)) { 255 #ifdef C2_AUDIT 256 if (audit_active) 257 audit_addcomponent(pnp); 258 #endif 259 goto bad; 260 } 261 262 /* 263 * Handle "..": two special cases. 264 * 1. If we're at the root directory (e.g. after chroot or 265 * zone_enter) then change ".." to "." so we can't get 266 * out of this subtree. 267 * 2. If this vnode is the root of a mounted file system, 268 * then replace it with the vnode that was mounted on 269 * so that we take the ".." in the other file system. 270 */ 271 if (component[0] == '.' && component[1] == '.' && component[2] == 0) { 272 checkforroot: 273 if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) { 274 component[1] = '\0'; 275 } else if (vp->v_flag & VROOT) { 276 vfs_t *vfsp; 277 cvp = vp; 278 279 /* 280 * While we deal with the vfs pointer from the vnode 281 * the filesystem could have been forcefully unmounted 282 * and the vnode's v_vfsp could have been invalidated 283 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it 284 * with vfs_rlock_wait/vfs_unlock. 285 * It is safe to use the v_vfsp even it is freed by 286 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock 287 * do not dereference v_vfsp. It is just used as a 288 * magic cookie. 289 * One more corner case here is the memory getting 290 * reused for another vfs structure. In this case 291 * lookuppnvp's vfs_rlock_wait will succeed, domount's 292 * vfs_lock will fail and domount will bail out with an 293 * error (EBUSY). 294 */ 295 vfsp = cvp->v_vfsp; 296 297 /* 298 * This lock is used to synchronize 299 * mounts/unmounts and lookups. 300 * Threads doing mounts/unmounts hold the 301 * writers version vfs_lock_wait(). 302 */ 303 304 vfs_rlock_wait(vfsp); 305 306 /* 307 * If this vnode is on a file system that 308 * has been forcibly unmounted, 309 * we can't proceed. Cancel this operation 310 * and return EIO. 311 * 312 * vfs_vnodecovered is NULL if unmounted. 313 * Currently, nfs uses VFS_UNMOUNTED to 314 * check if it's a forced-umount. Keep the 315 * same checking here as well even though it 316 * may not be needed. 317 */ 318 if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) || 319 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 320 vfs_unlock(vfsp); 321 VN_RELE(cvp); 322 return (EIO); 323 } 324 VN_HOLD(vp); 325 vfs_unlock(vfsp); 326 VN_RELE(cvp); 327 cvp = NULL; 328 goto checkforroot; 329 } 330 } 331 332 /* 333 * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate 334 * that we need to have read permission on every directory in the entire 335 * path. This is used to ensure that a forward-lookup of a cached value 336 * has the same effect as a reverse-lookup when the cached value cannot 337 * be found. 338 */ 339 if ((flags & LOOKUP_CHECKREAD) && 340 (error = VOP_ACCESS(vp, VREAD, 0, cr)) != 0) 341 goto bad; 342 343 /* 344 * Perform a lookup in the current directory. 345 */ 346 error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, 347 rootvp, cr); 348 cvp = tvp; 349 if (error) { 350 cvp = NULL; 351 /* 352 * On error, return hard error if 353 * (a) we're not at the end of the pathname yet, or 354 * (b) the caller didn't want the parent directory, or 355 * (c) we failed for some reason other than a missing entry. 356 */ 357 if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT) 358 goto bad; 359 #ifdef C2_AUDIT 360 if (audit_active) { /* directory access */ 361 if (error = audit_savepath(pnp, vp, error, cr)) 362 goto bad_noaudit; 363 } 364 #endif 365 pn_setlast(pnp); 366 /* 367 * We inform the caller that the desired entry must be 368 * a directory by adding a '/' to the component name. 369 */ 370 if (must_be_directory && (error = pn_addslash(pnp)) != 0) 371 goto bad; 372 *dirvpp = vp; 373 if (compvpp != NULL) 374 *compvpp = NULL; 375 if (rootvp != rootdir) 376 VN_RELE(rootvp); 377 return (0); 378 } 379 380 /* 381 * Traverse mount points. 382 * XXX why don't we need to hold a read lock here (call vn_vfsrlock)? 383 * What prevents a concurrent update to v_vfsmountedhere? 384 * Possible answer: if mounting, we might not see the mount 385 * if it is concurrently coming into existence, but that's 386 * really not much different from the thread running a bit slower. 387 * If unmounting, we may get into traverse() when we shouldn't, 388 * but traverse() will catch this case for us. 389 * (For this to work, fetching v_vfsmountedhere had better 390 * be atomic!) 391 */ 392 if (vn_mountedvfs(cvp) != NULL) { 393 tvp = cvp; 394 if ((error = traverse(&tvp)) != 0) { 395 /* 396 * It is required to assign cvp here, because 397 * traverse() will return a held vnode which 398 * may different than the vnode that was passed 399 * in (even in the error case). If traverse() 400 * changes the vnode it releases the original, 401 * and holds the new one. 402 */ 403 cvp = tvp; 404 goto bad; 405 } 406 cvp = tvp; 407 } 408 409 /* 410 * If we hit a symbolic link and there is more path to be 411 * translated or this operation does not wish to apply 412 * to a link, then place the contents of the link at the 413 * front of the remaining pathname. 414 */ 415 if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) { 416 struct pathname linkpath; 417 #ifdef C2_AUDIT 418 if (audit_active) { 419 if (error = audit_pathcomp(pnp, cvp, cr)) 420 goto bad; 421 } 422 #endif 423 424 if (++nlink > MAXSYMLINKS) { 425 error = ELOOP; 426 goto bad; 427 } 428 pn_alloc(&linkpath); 429 if (error = pn_getsymlink(cvp, &linkpath, cr)) { 430 pn_free(&linkpath); 431 goto bad; 432 } 433 434 #ifdef C2_AUDIT 435 if (audit_active) 436 audit_symlink(pnp, &linkpath); 437 #endif /* C2_AUDIT */ 438 439 if (pn_pathleft(&linkpath) == 0) 440 (void) pn_set(&linkpath, "."); 441 error = pn_insert(pnp, &linkpath, strlen(component)); 442 pn_free(&linkpath); 443 if (error) 444 goto bad; 445 VN_RELE(cvp); 446 cvp = NULL; 447 if (pnp->pn_pathlen == 0) { 448 error = ENOENT; 449 goto bad; 450 } 451 if (pnp->pn_path[0] == '/') { 452 do { 453 pnp->pn_path++; 454 pnp->pn_pathlen--; 455 } while (pnp->pn_path[0] == '/'); 456 VN_RELE(vp); 457 vp = rootvp; 458 VN_HOLD(vp); 459 } 460 #ifdef C2_AUDIT 461 if (audit_active) 462 audit_anchorpath(pnp, vp == rootvp); 463 #endif 464 if (pn_fixslash(pnp)) { 465 flags |= FOLLOW; 466 must_be_directory = 1; 467 } 468 goto next; 469 } 470 471 /* 472 * If rpnp is non-NULL, remember the resolved path name therein. 473 * Do not include "." components. Collapse occurrences of 474 * "previous/..", so long as "previous" is not itself "..". 475 * Exhausting rpnp results in error ENAMETOOLONG. 476 */ 477 if (rpnp && strcmp(component, ".") != 0) { 478 size_t len; 479 480 if (strcmp(component, "..") == 0 && 481 rpnp->pn_pathlen != 0 && 482 !((rpnp->pn_pathlen > 2 && 483 strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) || 484 (rpnp->pn_pathlen == 2 && 485 strncmp(rpnp->pn_path, "..", 2) == 0))) { 486 while (rpnp->pn_pathlen && 487 rpnp->pn_path[rpnp->pn_pathlen-1] != '/') 488 rpnp->pn_pathlen--; 489 if (rpnp->pn_pathlen > 1) 490 rpnp->pn_pathlen--; 491 rpnp->pn_path[rpnp->pn_pathlen] = '\0'; 492 } else { 493 if (rpnp->pn_pathlen != 0 && 494 rpnp->pn_path[rpnp->pn_pathlen-1] != '/') 495 rpnp->pn_path[rpnp->pn_pathlen++] = '/'; 496 error = copystr(component, 497 rpnp->pn_path + rpnp->pn_pathlen, 498 rpnp->pn_bufsize - rpnp->pn_pathlen, &len); 499 if (error) /* copystr() returns ENAMETOOLONG */ 500 goto bad; 501 rpnp->pn_pathlen += (len - 1); 502 ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen); 503 } 504 } 505 506 /* 507 * If no more components, return last directory (if wanted) and 508 * last component (if wanted). 509 */ 510 if (pn_pathleft(pnp) == 0) { 511 /* 512 * If there was a trailing slash in the pathname, 513 * make sure the last component is a directory. 514 */ 515 if (must_be_directory && cvp->v_type != VDIR) { 516 error = ENOTDIR; 517 goto bad; 518 } 519 if (dirvpp != NULL) { 520 /* 521 * Check that we have the real parent and not 522 * an alias of the last component. 523 */ 524 if (vn_compare(vp, cvp)) { 525 #ifdef C2_AUDIT 526 if (audit_active) 527 (void) audit_savepath(pnp, cvp, 528 EINVAL, cr); 529 #endif 530 pn_setlast(pnp); 531 VN_RELE(vp); 532 VN_RELE(cvp); 533 if (rootvp != rootdir) 534 VN_RELE(rootvp); 535 return (EINVAL); 536 } 537 #ifdef C2_AUDIT 538 if (audit_active) { 539 if (error = audit_pathcomp(pnp, vp, cr)) 540 goto bad; 541 } 542 #endif 543 *dirvpp = vp; 544 } else 545 VN_RELE(vp); 546 #ifdef C2_AUDIT 547 if (audit_active) 548 (void) audit_savepath(pnp, cvp, 0, cr); 549 #endif 550 if (pnp->pn_path == pnp->pn_buf) 551 (void) pn_set(pnp, "."); 552 else 553 pn_setlast(pnp); 554 if (rpnp) { 555 if (VN_CMP(cvp, rootvp)) 556 (void) pn_set(rpnp, "/"); 557 else if (rpnp->pn_pathlen == 0) 558 (void) pn_set(rpnp, "."); 559 } 560 561 if (compvpp != NULL) 562 *compvpp = cvp; 563 else 564 VN_RELE(cvp); 565 if (rootvp != rootdir) 566 VN_RELE(rootvp); 567 return (0); 568 } 569 570 #ifdef C2_AUDIT 571 if (audit_active) { 572 if (error = audit_pathcomp(pnp, cvp, cr)) 573 goto bad; 574 } 575 #endif 576 577 /* 578 * Skip over slashes from end of last component. 579 */ 580 while (pnp->pn_path[0] == '/') { 581 pnp->pn_path++; 582 pnp->pn_pathlen--; 583 } 584 585 /* 586 * Searched through another level of directory: 587 * release previous directory handle and save new (result 588 * of lookup) as current directory. 589 */ 590 VN_RELE(vp); 591 vp = cvp; 592 cvp = NULL; 593 goto next; 594 595 bad: 596 #ifdef C2_AUDIT 597 if (audit_active) /* reached end of path */ 598 (void) audit_savepath(pnp, cvp, error, cr); 599 bad_noaudit: 600 #endif 601 /* 602 * Error. Release vnodes and return. 603 */ 604 if (cvp) 605 VN_RELE(cvp); 606 /* 607 * If the error was ESTALE and the current directory to look in 608 * was the root for this lookup, the root for a mounted file 609 * system, or the starting directory for lookups, then 610 * return ENOENT instead of ESTALE. In this case, no recovery 611 * is possible by the higher level. If ESTALE was returned for 612 * some intermediate directory along the path, then recovery 613 * is potentially possible and retrying from the higher level 614 * will either correct the situation by purging stale cache 615 * entries or eventually get back to the point where no recovery 616 * is possible. 617 */ 618 if (error == ESTALE && 619 (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp)) 620 error = ENOENT; 621 VN_RELE(vp); 622 if (rootvp != rootdir) 623 VN_RELE(rootvp); 624 return (error); 625 } 626 627 /* 628 * Traverse a mount point. Routine accepts a vnode pointer as a reference 629 * parameter and performs the indirection, releasing the original vnode. 630 */ 631 int 632 traverse(vnode_t **cvpp) 633 { 634 int error = 0; 635 vnode_t *cvp; 636 vnode_t *tvp; 637 vfs_t *vfsp; 638 639 cvp = *cvpp; 640 641 /* 642 * If this vnode is mounted on, then we transparently indirect 643 * to the vnode which is the root of the mounted file system. 644 * Before we do this we must check that an unmount is not in 645 * progress on this vnode. 646 */ 647 648 for (;;) { 649 /* 650 * Try to read lock the vnode. If this fails because 651 * the vnode is already write locked, then check to 652 * see whether it is the current thread which locked 653 * the vnode. If it is not, then read lock the vnode 654 * by waiting to acquire the lock. 655 * 656 * The code path in domount() is an example of support 657 * which needs to look up two pathnames and locks one 658 * of them in between the two lookups. 659 */ 660 error = vn_vfsrlock(cvp); 661 if (error) { 662 if (!vn_vfswlock_held(cvp)) 663 error = vn_vfsrlock_wait(cvp); 664 if (error != 0) { 665 /* 666 * lookuppn() expects a held vnode to be 667 * returned because it promptly calls 668 * VN_RELE after the error return 669 */ 670 *cvpp = cvp; 671 return (error); 672 } 673 } 674 675 /* 676 * Reached the end of the mount chain? 677 */ 678 vfsp = vn_mountedvfs(cvp); 679 if (vfsp == NULL) { 680 vn_vfsunlock(cvp); 681 break; 682 } 683 684 /* 685 * The read lock must be held across the call to VFS_ROOT() to 686 * prevent a concurrent unmount from destroying the vfs. 687 */ 688 error = VFS_ROOT(vfsp, &tvp); 689 vn_vfsunlock(cvp); 690 691 if (error) 692 break; 693 694 VN_RELE(cvp); 695 696 cvp = tvp; 697 } 698 699 *cvpp = cvp; 700 return (error); 701 } 702 703 /* 704 * Return the lowermost vnode if this is a mountpoint. 705 */ 706 static vnode_t * 707 vn_under(vnode_t *vp) 708 { 709 vnode_t *uvp; 710 vfs_t *vfsp; 711 712 while (vp->v_flag & VROOT) { 713 714 vfsp = vp->v_vfsp; 715 vfs_rlock_wait(vfsp); 716 if ((uvp = vfsp->vfs_vnodecovered) == NULL || 717 (vfsp->vfs_flag & VFS_UNMOUNTED)) { 718 vfs_unlock(vfsp); 719 break; 720 } 721 VN_HOLD(uvp); 722 vfs_unlock(vfsp); 723 VN_RELE(vp); 724 vp = uvp; 725 } 726 727 return (vp); 728 } 729 730 static int 731 vnode_match(vnode_t *v1, vnode_t *v2, cred_t *cr) 732 { 733 vattr_t v1attr, v2attr; 734 735 /* 736 * If we have a device file, check to see if is a cloned open of the 737 * same device. For self-cloning devices, the major numbers will match. 738 * For devices cloned through the 'clone' driver, the minor number of 739 * the source device will be the same as the major number of the cloned 740 * device. 741 */ 742 if ((v1->v_type == VCHR || v1->v_type == VBLK) && 743 v1->v_type == v2->v_type) { 744 if ((spec_is_selfclone(v1) || spec_is_selfclone(v2)) && 745 getmajor(v1->v_rdev) == getmajor(v2->v_rdev)) 746 return (1); 747 748 if (spec_is_clone(v1) && 749 getmajor(v1->v_rdev) == getminor(v2->v_rdev)) 750 return (1); 751 752 if (spec_is_clone(v2) && 753 getmajor(v2->v_rdev) == getminor(v1->v_rdev)) 754 return (1); 755 } 756 757 v1attr.va_mask = v2attr.va_mask = AT_TYPE; 758 759 /* 760 * This check for symbolic links handles the pseudo-symlinks in procfs. 761 * These particular links have v_type of VDIR, but the attributes have a 762 * type of VLNK. We need to avoid these links because otherwise if we 763 * are currently in '/proc/self/fd', then '/proc/self/cwd' will compare 764 * as the same vnode. 765 */ 766 if (VOP_GETATTR(v1, &v1attr, 0, cr) != 0 || 767 VOP_GETATTR(v2, &v2attr, 0, cr) != 0 || 768 v1attr.va_type == VLNK || v2attr.va_type == VLNK) 769 return (0); 770 771 v1attr.va_mask = v2attr.va_mask = AT_TYPE | AT_FSID | AT_NODEID; 772 773 if (VOP_GETATTR(v1, &v1attr, ATTR_REAL, cr) != 0 || 774 VOP_GETATTR(v2, &v2attr, ATTR_REAL, cr) != 0) 775 return (0); 776 777 return (v1attr.va_fsid == v2attr.va_fsid && 778 v1attr.va_nodeid == v2attr.va_nodeid); 779 } 780 781 782 /* 783 * Find the entry in the directory corresponding to the target vnode. 784 */ 785 int 786 dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf, 787 size_t dlen, dirent64_t **rdp) 788 { 789 size_t dbuflen; 790 struct iovec iov; 791 struct uio uio; 792 int err; 793 int eof; 794 vnode_t *cmpvp; 795 struct dirent64 *dp; 796 pathname_t pnp; 797 798 ASSERT(dvp->v_type == VDIR); 799 800 /* 801 * This is necessary because of the strange semantics of VOP_LOOKUP(). 802 */ 803 bzero(&pnp, sizeof (pnp)); 804 805 eof = 0; 806 807 uio.uio_iov = &iov; 808 uio.uio_iovcnt = 1; 809 uio.uio_segflg = UIO_SYSSPACE; 810 uio.uio_fmode = 0; 811 uio.uio_extflg = UIO_COPY_CACHED; 812 uio.uio_loffset = 0; 813 814 if ((err = VOP_ACCESS(dvp, VREAD, 0, cr)) != 0) 815 return (err); 816 817 while (!eof) { 818 uio.uio_resid = dlen; 819 iov.iov_base = dbuf; 820 iov.iov_len = dlen; 821 822 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); 823 err = VOP_READDIR(dvp, &uio, cr, &eof); 824 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); 825 826 dbuflen = dlen - uio.uio_resid; 827 828 if (err || dbuflen == 0) 829 break; 830 831 dp = (dirent64_t *)dbuf; 832 while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) { 833 /* 834 * Ignore '.' and '..' entries 835 */ 836 if (strcmp(dp->d_name, ".") == 0 || 837 strcmp(dp->d_name, "..") == 0) { 838 dp = (dirent64_t *)((intptr_t)dp + 839 dp->d_reclen); 840 continue; 841 } 842 843 err = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0, 844 vrootp, cr); 845 846 /* 847 * We only want to bail out if there was an error other 848 * than ENOENT. Otherwise, it could be that someone 849 * just removed an entry since the readdir() call, and 850 * the entry we want is further on in the directory. 851 */ 852 if (err == 0) { 853 if (vnode_match(tvp, cmpvp, cr)) { 854 VN_RELE(cmpvp); 855 *rdp = dp; 856 return (0); 857 } 858 859 VN_RELE(cmpvp); 860 } else if (err != ENOENT) { 861 return (err); 862 } 863 864 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen); 865 } 866 } 867 868 /* 869 * Something strange has happened, this directory does not contain the 870 * specified vnode. This should never happen in the normal case, since 871 * we ensured that dvp is the parent of vp. This may be possible in 872 * some race conditions, so fail gracefully. 873 */ 874 if (err == 0) 875 err = ENOENT; 876 877 return (err); 878 } 879 880 /* 881 * Given a global path (from rootdir), and a vnode that is the current root, 882 * return the portion of the path that is beneath the current root or NULL on 883 * failure. The path MUST be a resolved path (no '..' entries or symlinks), 884 * otherwise this function will fail. 885 */ 886 static char * 887 localpath(char *path, struct vnode *vrootp, cred_t *cr) 888 { 889 vnode_t *vp; 890 vnode_t *cvp; 891 char component[MAXNAMELEN]; 892 char *ret = NULL; 893 pathname_t pn; 894 895 /* 896 * We use vn_compare() instead of VN_CMP() in order to detect lofs 897 * mounts and stacked vnodes. 898 */ 899 if (vn_compare(vrootp, rootdir)) 900 return (path); 901 902 if (pn_get(path, UIO_SYSSPACE, &pn) != 0) 903 return (NULL); 904 905 vp = rootdir; 906 VN_HOLD(vp); 907 908 while (pn_pathleft(&pn)) { 909 pn_skipslash(&pn); 910 911 if (pn_getcomponent(&pn, component) != 0) 912 break; 913 914 if (vn_ismntpt(vp) && traverse(&vp) != 0) 915 break; 916 917 if (VOP_LOOKUP(vp, component, &cvp, &pn, 0, rootdir, cr) != 0) 918 break; 919 920 VN_RELE(vp); 921 vp = cvp; 922 923 if (vn_compare(vp, vrootp)) { 924 ret = path + (pn.pn_path - pn.pn_buf); 925 break; 926 } 927 } 928 929 VN_RELE(vp); 930 pn_free(&pn); 931 932 return (ret); 933 } 934 935 /* 936 * Given a directory, return the full, resolved path. This looks up "..", 937 * searches for the given vnode in the parent, appends the component, etc. It 938 * is used to implement vnodetopath() and getcwd() when the cached path fails 939 * (or vfs_vnode_path is not set). 940 */ 941 static int 942 dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr) 943 { 944 pathname_t pn, rpn, emptypn; 945 vnode_t *cmpvp, *pvp = NULL; 946 vnode_t *startvp = vp; 947 int err = 0; 948 size_t complen; 949 char *dbuf; 950 dirent64_t *dp; 951 char *bufloc; 952 size_t dlen = DIRENT64_RECLEN(MAXPATHLEN); 953 refstr_t *mntpt; 954 955 /* Operation only allowed on directories */ 956 ASSERT(vp->v_type == VDIR); 957 958 /* We must have at least enough space for "/" */ 959 if (buflen < 2) 960 return (ENAMETOOLONG); 961 962 /* Start at end of string with terminating null */ 963 bufloc = &buf[buflen - 1]; 964 *bufloc = '\0'; 965 966 pn_alloc(&pn); 967 pn_alloc(&rpn); 968 dbuf = kmem_alloc(dlen, KM_SLEEP); 969 bzero(&emptypn, sizeof (emptypn)); 970 971 /* 972 * Begin with an additional reference on vp. This will be decremented 973 * during the loop. 974 */ 975 VN_HOLD(vp); 976 977 for (;;) { 978 /* 979 * Return if we've reached the root. If the buffer is empty, 980 * return '/'. We explicitly don't use vn_compare(), since it 981 * compares the real vnodes. A lofs mount of '/' would produce 982 * incorrect results otherwise. 983 */ 984 if (VN_CMP(vrootp, vp)) { 985 if (*bufloc == '\0') 986 *--bufloc = '/'; 987 break; 988 } 989 990 /* 991 * If we've reached the VFS root, something has gone wrong. We 992 * should have reached the root in the above check. The only 993 * explantation is that 'vp' is not contained withing the given 994 * root, in which case we return EPERM. 995 */ 996 if (VN_CMP(rootdir, vp)) { 997 err = EPERM; 998 goto out; 999 } 1000 1001 /* 1002 * Shortcut: see if this vnode is a mountpoint. If so, 1003 * grab the path information from the vfs_t. 1004 */ 1005 if (vp->v_flag & VROOT) { 1006 1007 mntpt = vfs_getmntpoint(vp->v_vfsp); 1008 if ((err = pn_set(&pn, (char *)refstr_value(mntpt))) 1009 == 0) { 1010 refstr_rele(mntpt); 1011 rpn.pn_path = rpn.pn_buf; 1012 1013 /* 1014 * Ensure the mointpoint still exists. 1015 */ 1016 VN_HOLD(vrootp); 1017 if (vrootp != rootdir) 1018 VN_HOLD(vrootp); 1019 if (lookuppnvp(&pn, &rpn, 0, NULL, 1020 &cmpvp, vrootp, vrootp, cr) == 0) { 1021 1022 if (VN_CMP(vp, cmpvp)) { 1023 VN_RELE(cmpvp); 1024 1025 complen = strlen(rpn.pn_path); 1026 bufloc -= complen; 1027 if (bufloc < buf) { 1028 err = ERANGE; 1029 goto out; 1030 } 1031 bcopy(rpn.pn_path, bufloc, 1032 complen); 1033 break; 1034 } else { 1035 VN_RELE(cmpvp); 1036 } 1037 } 1038 } else { 1039 refstr_rele(mntpt); 1040 } 1041 } 1042 1043 /* 1044 * Shortcuts failed, search for this vnode in its parent. If 1045 * this is a mountpoint, then get the vnode underneath. 1046 */ 1047 if (vp->v_flag & VROOT) 1048 vp = vn_under(vp); 1049 if ((err = VOP_LOOKUP(vp, "..", &pvp, &emptypn, 0, vrootp, cr)) 1050 != 0) 1051 goto out; 1052 1053 /* 1054 * With extended attributes, it's possible for a directory to 1055 * have a parent that is a regular file. Check for that here. 1056 */ 1057 if (pvp->v_type != VDIR) { 1058 err = ENOTDIR; 1059 goto out; 1060 } 1061 1062 /* 1063 * If this is true, something strange has happened. This is 1064 * only true if we are the root of a filesystem, which should 1065 * have been caught by the check above. 1066 */ 1067 if (VN_CMP(pvp, vp)) { 1068 err = ENOENT; 1069 goto out; 1070 } 1071 1072 /* 1073 * Search the parent directory for the entry corresponding to 1074 * this vnode. 1075 */ 1076 if ((err = dirfindvp(vrootp, pvp, vp, cr, dbuf, dlen, &dp)) 1077 != 0) 1078 goto out; 1079 complen = strlen(dp->d_name); 1080 bufloc -= complen; 1081 if (bufloc <= buf) { 1082 err = ENAMETOOLONG; 1083 goto out; 1084 } 1085 bcopy(dp->d_name, bufloc, complen); 1086 1087 /* Prepend a slash to the current path. */ 1088 *--bufloc = '/'; 1089 1090 /* And continue with the next component */ 1091 VN_RELE(vp); 1092 vp = pvp; 1093 pvp = NULL; 1094 } 1095 1096 /* 1097 * Place the path at the beginning of the buffer. 1098 */ 1099 if (bufloc != buf) 1100 ovbcopy(bufloc, buf, buflen - (bufloc - buf)); 1101 1102 out: 1103 /* 1104 * If the error was ESTALE and the current directory to look in 1105 * was the root for this lookup, the root for a mounted file 1106 * system, or the starting directory for lookups, then 1107 * return ENOENT instead of ESTALE. In this case, no recovery 1108 * is possible by the higher level. If ESTALE was returned for 1109 * some intermediate directory along the path, then recovery 1110 * is potentially possible and retrying from the higher level 1111 * will either correct the situation by purging stale cache 1112 * entries or eventually get back to the point where no recovery 1113 * is possible. 1114 */ 1115 if (err == ESTALE && 1116 (VN_CMP(vp, vrootp) || (vp->v_flag & VROOT) || vp == startvp)) 1117 err = ENOENT; 1118 1119 kmem_free(dbuf, dlen); 1120 VN_RELE(vp); 1121 if (pvp) 1122 VN_RELE(pvp); 1123 pn_free(&pn); 1124 pn_free(&rpn); 1125 1126 return (err); 1127 } 1128 1129 /* 1130 * The additional flag, LOOKUP_CHECKREAD, is ued to enforce artificial 1131 * constraints in order to be standards compliant. For example, if we have 1132 * the cached path of '/foo/bar', and '/foo' has permissions 100 (execute 1133 * only), then we can legitimately look up the path to the current working 1134 * directory without needing read permission. Existing standards tests, 1135 * however, assume that we are determining the path by repeatedly looking up 1136 * "..". We need to keep this behavior in order to maintain backwards 1137 * compatibility. 1138 */ 1139 static int 1140 vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, 1141 cred_t *cr, int flags) 1142 { 1143 pathname_t pn, rpn; 1144 int ret, len; 1145 vnode_t *compvp, *pvp, *realvp; 1146 proc_t *p = curproc; 1147 char path[MAXNAMELEN]; 1148 int doclose = 0; 1149 1150 /* 1151 * If vrootp is NULL, get the root for curproc. Callers with any other 1152 * requirements should pass in a different vrootp. 1153 */ 1154 if (vrootp == NULL) { 1155 mutex_enter(&p->p_lock); 1156 if ((vrootp = PTOU(p)->u_rdir) == NULL) 1157 vrootp = rootdir; 1158 VN_HOLD(vrootp); 1159 mutex_exit(&p->p_lock); 1160 } else { 1161 VN_HOLD(vrootp); 1162 } 1163 1164 /* 1165 * This is to get around an annoying artifact of the /proc filesystem, 1166 * which is the behavior of {cwd/root}. Trying to resolve this path 1167 * will result in /proc/pid/cwd instead of whatever the real working 1168 * directory is. We can't rely on VOP_REALVP(), since that will break 1169 * lofs. The only difference between procfs and lofs is that opening 1170 * the file will return the underling vnode in the case of procfs. 1171 */ 1172 if (vp->v_type == VDIR && VOP_REALVP(vp, &realvp) == 0 && 1173 realvp != vp) { 1174 VN_HOLD(vp); 1175 if (VOP_OPEN(&vp, FREAD, cr) == 0) 1176 doclose = 1; 1177 else 1178 VN_RELE(vp); 1179 } 1180 1181 pn_alloc(&pn); 1182 1183 /* 1184 * Check to see if we have a cached path in the vnode. 1185 */ 1186 mutex_enter(&vp->v_lock); 1187 if (vp->v_path != NULL) { 1188 (void) pn_set(&pn, vp->v_path); 1189 mutex_exit(&vp->v_lock); 1190 1191 pn_alloc(&rpn); 1192 1193 /* We should only cache absolute paths */ 1194 ASSERT(pn.pn_buf[0] == '/'); 1195 1196 /* 1197 * If we are in a zone or a chroot environment, then we have to 1198 * take additional steps, since the path to the root might not 1199 * be readable with the current credentials, even though the 1200 * process can legitmately access the file. In this case, we 1201 * do the following: 1202 * 1203 * lookuppnvp() with all privileges to get the resolved path. 1204 * call localpath() to get the local portion of the path, and 1205 * continue as normal. 1206 * 1207 * If the the conversion to a local path fails, then we continue 1208 * as normal. This is a heuristic to make process object file 1209 * paths available from within a zone. Because lofs doesn't 1210 * support page operations, the vnode stored in the seg_t is 1211 * actually the underlying real vnode, not the lofs node itself. 1212 * Most of the time, the lofs path is the same as the underlying 1213 * vnode (for example, /usr/lib/libc.so.1). 1214 */ 1215 if (vrootp != rootdir) { 1216 char *local = NULL; 1217 VN_HOLD(rootdir); 1218 if (lookuppnvp(&pn, &rpn, FOLLOW, 1219 NULL, &compvp, rootdir, rootdir, kcred) == 0) { 1220 local = localpath(rpn.pn_path, vrootp, 1221 kcred); 1222 VN_RELE(compvp); 1223 } 1224 1225 /* 1226 * The original pn was changed through lookuppnvp(), so 1227 * reset it. 1228 */ 1229 if (local) { 1230 (void) pn_set(&pn, local); 1231 } else { 1232 mutex_enter(&vp->v_lock); 1233 if (vp->v_path != NULL) { 1234 (void) pn_set(&pn, vp->v_path); 1235 mutex_exit(&vp->v_lock); 1236 } else { 1237 mutex_exit(&vp->v_lock); 1238 goto notcached; 1239 } 1240 } 1241 } 1242 1243 /* 1244 * We should have a local path at this point, so start the 1245 * search from the root of the current process. 1246 */ 1247 VN_HOLD(vrootp); 1248 if (vrootp != rootdir) 1249 VN_HOLD(vrootp); 1250 ret = lookuppnvp(&pn, &rpn, FOLLOW | flags, NULL, 1251 &compvp, vrootp, vrootp, cr); 1252 if (ret == 0) { 1253 /* 1254 * Check to see if the returned vnode is the same as 1255 * the one we expect. If not, give up. 1256 */ 1257 if (!vn_compare(vp, compvp) && 1258 !vnode_match(vp, compvp, cr)) { 1259 VN_RELE(compvp); 1260 goto notcached; 1261 } 1262 1263 VN_RELE(compvp); 1264 1265 /* 1266 * Return the result. 1267 */ 1268 if (buflen <= rpn.pn_pathlen) 1269 goto notcached; 1270 1271 bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1); 1272 pn_free(&pn); 1273 pn_free(&rpn); 1274 VN_RELE(vrootp); 1275 if (doclose) { 1276 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr); 1277 VN_RELE(vp); 1278 } 1279 return (0); 1280 } 1281 1282 notcached: 1283 pn_free(&rpn); 1284 } else { 1285 mutex_exit(&vp->v_lock); 1286 } 1287 1288 pn_free(&pn); 1289 1290 if (vp->v_type != VDIR) { 1291 /* 1292 * If we don't have a directory, try to find it in the dnlc via 1293 * reverse lookup. Once this is found, we can use the regular 1294 * directory search to find the full path. 1295 */ 1296 if ((pvp = dnlc_reverse_lookup(vp, path, MAXNAMELEN)) != NULL) { 1297 ret = dirtopath(vrootp, pvp, buf, buflen, cr); 1298 if (ret == 0) { 1299 len = strlen(buf); 1300 if (len + strlen(path) + 1 >= buflen) { 1301 ret = ENAMETOOLONG; 1302 } else { 1303 if (buf[len - 1] != '/') 1304 buf[len++] = '/'; 1305 bcopy(path, buf + len, 1306 strlen(path) + 1); 1307 } 1308 } 1309 1310 VN_RELE(pvp); 1311 } else 1312 ret = ENOENT; 1313 } else 1314 ret = dirtopath(vrootp, vp, buf, buflen, cr); 1315 1316 VN_RELE(vrootp); 1317 if (doclose) { 1318 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr); 1319 VN_RELE(vp); 1320 } 1321 1322 return (ret); 1323 } 1324 1325 int 1326 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr) 1327 { 1328 return (vnodetopath_common(vrootp, vp, buf, buflen, cr, 0)); 1329 } 1330 1331 int 1332 dogetcwd(char *buf, size_t buflen) 1333 { 1334 int ret; 1335 vnode_t *vp; 1336 vnode_t *compvp; 1337 refstr_t *cwd, *oldcwd; 1338 const char *value; 1339 pathname_t rpnp, pnp; 1340 proc_t *p = curproc; 1341 1342 /* 1343 * Check to see if there is a cached version of the cwd. If so, lookup 1344 * the cached value and make sure it is the same vnode. 1345 */ 1346 mutex_enter(&p->p_lock); 1347 if ((cwd = PTOU(p)->u_cwd) != NULL) 1348 refstr_hold(cwd); 1349 vp = PTOU(p)->u_cdir; 1350 VN_HOLD(vp); 1351 mutex_exit(&p->p_lock); 1352 1353 /* 1354 * Make sure we have permission to access the current directory. 1355 */ 1356 if ((ret = VOP_ACCESS(vp, VEXEC, 0, CRED())) != 0) { 1357 if (cwd != NULL) 1358 refstr_rele(cwd); 1359 VN_RELE(vp); 1360 return (ret); 1361 } 1362 1363 if (cwd) { 1364 value = refstr_value(cwd); 1365 if ((ret = pn_get((char *)value, UIO_SYSSPACE, &pnp)) != 0) { 1366 refstr_rele(cwd); 1367 VN_RELE(vp); 1368 return (ret); 1369 } 1370 1371 pn_alloc(&rpnp); 1372 1373 if (lookuppn(&pnp, &rpnp, NO_FOLLOW, NULL, &compvp) == 0) { 1374 1375 if (VN_CMP(vp, compvp) && 1376 strcmp(value, rpnp.pn_path) == 0) { 1377 VN_RELE(compvp); 1378 VN_RELE(vp); 1379 pn_free(&pnp); 1380 pn_free(&rpnp); 1381 if (strlen(value) + 1 > buflen) { 1382 refstr_rele(cwd); 1383 return (ENAMETOOLONG); 1384 } 1385 bcopy(value, buf, strlen(value) + 1); 1386 refstr_rele(cwd); 1387 return (0); 1388 } 1389 1390 VN_RELE(compvp); 1391 } 1392 1393 pn_free(&rpnp); 1394 pn_free(&pnp); 1395 1396 refstr_rele(cwd); 1397 } 1398 1399 ret = vnodetopath_common(NULL, vp, buf, buflen, CRED(), 1400 LOOKUP_CHECKREAD); 1401 1402 VN_RELE(vp); 1403 1404 /* 1405 * Store the new cwd and replace the existing cached copy. 1406 */ 1407 if (ret == 0) 1408 cwd = refstr_alloc(buf); 1409 else 1410 cwd = NULL; 1411 1412 mutex_enter(&p->p_lock); 1413 oldcwd = PTOU(p)->u_cwd; 1414 PTOU(p)->u_cwd = cwd; 1415 mutex_exit(&p->p_lock); 1416 1417 if (oldcwd) 1418 refstr_rele(oldcwd); 1419 1420 return (ret); 1421 } 1422