1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 41 #pragma ident "%Z%%M% %I% %E% SMI" 42 43 #include <sys/types.h> 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/cpuvar.h> 47 #include <sys/errno.h> 48 #include <sys/cred.h> 49 #include <sys/user.h> 50 #include <sys/uio.h> 51 #include <sys/vfs.h> 52 #include <sys/vnode.h> 53 #include <sys/pathname.h> 54 #include <sys/proc.h> 55 #include <sys/vtrace.h> 56 #include <sys/sysmacros.h> 57 #include <sys/debug.h> 58 #include <sys/dirent.h> 59 #include <c2/audit.h> 60 #include <sys/zone.h> 61 #include <sys/dnlc.h> 62 #include <sys/fs/snode.h> 63 64 /* Controls whether paths are stored with vnodes. */ 65 int vfs_vnode_path = 1; 66 67 int 68 lookupname( 69 char *fnamep, 70 enum uio_seg seg, 71 enum symfollow followlink, 72 vnode_t **dirvpp, 73 vnode_t **compvpp) 74 { 75 return (lookupnameat(fnamep, seg, followlink, dirvpp, compvpp, NULL)); 76 } 77 78 79 /* 80 * Lookup the user file name, 81 * Handle allocation and freeing of pathname buffer, return error. 82 */ 83 int 84 lookupnameat( 85 char *fnamep, /* user pathname */ 86 enum uio_seg seg, /* addr space that name is in */ 87 enum symfollow followlink, /* follow sym links */ 88 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */ 89 vnode_t **compvpp, /* ret for ptr to component vnode */ 90 vnode_t *startvp) /* start path search from vp */ 91 { 92 char namebuf[TYPICALMAXPATHLEN]; 93 struct pathname lookpn; 94 int error; 95 96 error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf)); 97 if (error == 0) { 98 #ifdef C2_AUDIT 99 if (audit_active) 100 audit_lookupname(); 101 #endif 102 error = lookuppnat(&lookpn, NULL, followlink, 103 dirvpp, compvpp, startvp); 104 } 105 if (error == ENAMETOOLONG) { 106 /* 107 * This thread used a pathname > TYPICALMAXPATHLEN bytes long. 108 */ 109 if (error = pn_get(fnamep, seg, &lookpn)) 110 return (error); 111 error = lookuppnat(&lookpn, NULL, followlink, 112 dirvpp, compvpp, startvp); 113 pn_free(&lookpn); 114 } 115 116 return (error); 117 } 118 119 /* 120 * Lookup the user file name from a given vp, 121 */ 122 int 123 lookuppn( 124 struct pathname *pnp, 125 struct pathname *rpnp, 126 enum symfollow followlink, 127 vnode_t **dirvpp, 128 vnode_t **compvpp) 129 { 130 return (lookuppnat(pnp, rpnp, followlink, dirvpp, compvpp, NULL)); 131 } 132 133 int 134 lookuppnat( 135 struct pathname *pnp, /* pathname to lookup */ 136 struct pathname *rpnp, /* if non-NULL, return resolved path */ 137 enum symfollow followlink, /* (don't) follow sym links */ 138 vnode_t **dirvpp, /* ptr for parent vnode */ 139 vnode_t **compvpp, /* ptr for entry vnode */ 140 vnode_t *startvp) /* start search from this vp */ 141 { 142 vnode_t *vp; /* current directory vp */ 143 vnode_t *rootvp; 144 proc_t *p = curproc; 145 146 if (pnp->pn_pathlen == 0) 147 return (ENOENT); 148 149 mutex_enter(&p->p_lock); /* for u_rdir and u_cdir */ 150 if ((rootvp = PTOU(p)->u_rdir) == NULL) 151 rootvp = rootdir; 152 else if (rootvp != rootdir) /* no need to VN_HOLD rootdir */ 153 VN_HOLD(rootvp); 154 155 if (pnp->pn_path[0] == '/') { 156 vp = rootvp; 157 } else { 158 vp = (startvp == NULL) ? PTOU(p)->u_cdir : startvp; 159 } 160 VN_HOLD(vp); 161 mutex_exit(&p->p_lock); 162 163 /* 164 * Skip over leading slashes 165 */ 166 if (pnp->pn_path[0] == '/') { 167 do { 168 pnp->pn_path++; 169 pnp->pn_pathlen--; 170 } while (pnp->pn_path[0] == '/'); 171 } 172 173 return (lookuppnvp(pnp, rpnp, followlink, dirvpp, 174 compvpp, rootvp, vp, CRED())); 175 } 176 177 /* Private flag to do our getcwd() dirty work */ 178 #define LOOKUP_CHECKREAD 0x10 179 #define LOOKUP_MASK (~LOOKUP_CHECKREAD) 180 181 /* 182 * Starting at current directory, translate pathname pnp to end. 183 * Leave pathname of final component in pnp, return the vnode 184 * for the final component in *compvpp, and return the vnode 185 * for the parent of the final component in dirvpp. 186 * 187 * This is the central routine in pathname translation and handles 188 * multiple components in pathnames, separating them at /'s. It also 189 * implements mounted file systems and processes symbolic links. 190 * 191 * vp is the vnode where the directory search should start. 192 * 193 * Reference counts: vp must be held prior to calling this function. rootvp 194 * should only be held if rootvp != rootdir. 195 */ 196 int 197 lookuppnvp( 198 struct pathname *pnp, /* pathname to lookup */ 199 struct pathname *rpnp, /* if non-NULL, return resolved path */ 200 int flags, /* follow symlinks */ 201 vnode_t **dirvpp, /* ptr for parent vnode */ 202 vnode_t **compvpp, /* ptr for entry vnode */ 203 vnode_t *rootvp, /* rootvp */ 204 vnode_t *vp, /* directory to start search at */ 205 cred_t *cr) /* user's credential */ 206 { 207 vnode_t *cvp; /* current component vp */ 208 vnode_t *tvp; /* addressable temp ptr */ 209 char component[MAXNAMELEN]; /* buffer for component (incl null) */ 210 int error; 211 int nlink; 212 int lookup_flags; 213 vnode_t *startvp; 214 vnode_t *zonevp = curproc->p_zone->zone_rootvp; /* zone root */ 215 int must_be_directory = 0; 216 size_t plen; 217 218 CPU_STATS_ADDQ(CPU, sys, namei, 1); 219 nlink = 0; 220 cvp = NULL; 221 if (rpnp) 222 rpnp->pn_pathlen = 0; 223 lookup_flags = dirvpp ? LOOKUP_DIR : 0; 224 #ifdef C2_AUDIT 225 if (audit_active) 226 audit_anchorpath(pnp, vp == rootvp); 227 #endif 228 229 /* 230 * Eliminate any trailing slashes in the pathname. 231 * If there are any, we must follow all symlinks. 232 * Also, we must guarantee that the last component is a directory. 233 */ 234 if (pn_fixslash(pnp)) { 235 flags |= FOLLOW; 236 must_be_directory = 1; 237 } 238 239 startvp = vp; 240 next: 241 /* 242 * Make sure we have a directory. 243 */ 244 if (vp->v_type != VDIR) { 245 error = ENOTDIR; 246 goto bad; 247 } 248 249 if (rpnp && VN_CMP(vp, rootvp)) 250 (void) pn_set(rpnp, "/"); 251 252 /* 253 * Process the next component of the pathname. 254 */ 255 if (error = pn_getcomponent(pnp, component)) { 256 #ifdef C2_AUDIT 257 if (audit_active) 258 audit_addcomponent(pnp); 259 #endif 260 goto bad; 261 } 262 263 /* 264 * Handle "..": two special cases. 265 * 1. If we're at the root directory (e.g. after chroot or 266 * zone_enter) then change ".." to "." so we can't get 267 * out of this subtree. 268 * 2. If this vnode is the root of a mounted file system, 269 * then replace it with the vnode that was mounted on 270 * so that we take the ".." in the other file system. 271 */ 272 if (component[0] == '.' && component[1] == '.' && component[2] == 0) { 273 checkforroot: 274 if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) { 275 component[1] = '\0'; 276 } else if (vp->v_flag & VROOT) { 277 vfs_t *vfsp; 278 cvp = vp; 279 280 /* 281 * While we deal with the vfs pointer from the vnode 282 * the filesystem could have been forcefully unmounted 283 * and the vnode's v_vfsp could have been invalidated 284 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it 285 * with vfs_rlock_wait/vfs_unlock. 286 * It is safe to use the v_vfsp even it is freed by 287 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock 288 * do not dereference v_vfsp. It is just used as a 289 * magic cookie. 290 * One more corner case here is the memory getting 291 * reused for another vfs structure. In this case 292 * lookuppnvp's vfs_rlock_wait will succeed, domount's 293 * vfs_lock will fail and domount will bail out with an 294 * error (EBUSY). 295 */ 296 vfsp = cvp->v_vfsp; 297 298 /* 299 * This lock is used to synchronize 300 * mounts/unmounts and lookups. 301 * Threads doing mounts/unmounts hold the 302 * writers version vfs_lock_wait(). 303 */ 304 305 vfs_rlock_wait(vfsp); 306 307 /* 308 * If this vnode is on a file system that 309 * has been forcibly unmounted, 310 * we can't proceed. Cancel this operation 311 * and return EIO. 312 * 313 * vfs_vnodecovered is NULL if unmounted. 314 * Currently, nfs uses VFS_UNMOUNTED to 315 * check if it's a forced-umount. Keep the 316 * same checking here as well even though it 317 * may not be needed. 318 */ 319 if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) || 320 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 321 vfs_unlock(vfsp); 322 VN_RELE(cvp); 323 return (EIO); 324 } 325 VN_HOLD(vp); 326 vfs_unlock(vfsp); 327 VN_RELE(cvp); 328 cvp = NULL; 329 goto checkforroot; 330 } 331 } 332 333 /* 334 * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate 335 * that we need to have read permission on every directory in the entire 336 * path. This is used to ensure that a forward-lookup of a cached value 337 * has the same effect as a reverse-lookup when the cached value cannot 338 * be found. 339 */ 340 if ((flags & LOOKUP_CHECKREAD) && 341 (error = VOP_ACCESS(vp, VREAD, 0, cr)) != 0) 342 goto bad; 343 344 /* 345 * Perform a lookup in the current directory. 346 */ 347 error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, 348 rootvp, cr); 349 cvp = tvp; 350 if (error) { 351 cvp = NULL; 352 /* 353 * On error, return hard error if 354 * (a) we're not at the end of the pathname yet, or 355 * (b) the caller didn't want the parent directory, or 356 * (c) we failed for some reason other than a missing entry. 357 */ 358 if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT) 359 goto bad; 360 #ifdef C2_AUDIT 361 if (audit_active) { /* directory access */ 362 if (error = audit_savepath(pnp, vp, error, cr)) 363 goto bad_noaudit; 364 } 365 #endif 366 pn_setlast(pnp); 367 /* 368 * We inform the caller that the desired entry must be 369 * a directory by adding a '/' to the component name. 370 */ 371 if (must_be_directory && (error = pn_addslash(pnp)) != 0) 372 goto bad; 373 *dirvpp = vp; 374 /* 375 * We cache the path of everything up to right before this 376 * component and store that in the parent directory. 377 */ 378 if (vfs_vnode_path && pnp->pn_path != pnp->pn_buf) { 379 VN_SETPATH(rootvp, startvp, vp, pnp->pn_buf, 380 pnp->pn_path - pnp->pn_buf); 381 } 382 if (compvpp != NULL) 383 *compvpp = NULL; 384 if (rootvp != rootdir) 385 VN_RELE(rootvp); 386 return (0); 387 } 388 389 /* 390 * Traverse mount points. 391 * XXX why don't we need to hold a read lock here (call vn_vfsrlock)? 392 * What prevents a concurrent update to v_vfsmountedhere? 393 * Possible answer: if mounting, we might not see the mount 394 * if it is concurrently coming into existence, but that's 395 * really not much different from the thread running a bit slower. 396 * If unmounting, we may get into traverse() when we shouldn't, 397 * but traverse() will catch this case for us. 398 * (For this to work, fetching v_vfsmountedhere had better 399 * be atomic!) 400 */ 401 if (vn_mountedvfs(cvp) != NULL) { 402 tvp = cvp; 403 if ((error = traverse(&tvp)) != 0) { 404 /* 405 * It is required to assign cvp here, because 406 * traverse() will return a held vnode which 407 * may different than the vnode that was passed 408 * in (even in the error case). If traverse() 409 * changes the vnode it releases the original, 410 * and holds the new one. 411 */ 412 cvp = tvp; 413 goto bad; 414 } 415 cvp = tvp; 416 } 417 418 /* 419 * If we hit a symbolic link and there is more path to be 420 * translated or this operation does not wish to apply 421 * to a link, then place the contents of the link at the 422 * front of the remaining pathname. 423 */ 424 if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) { 425 struct pathname linkpath; 426 #ifdef C2_AUDIT 427 if (audit_active) { 428 if (error = audit_pathcomp(pnp, cvp, cr)) 429 goto bad; 430 } 431 #endif 432 433 if (++nlink > MAXSYMLINKS) { 434 error = ELOOP; 435 goto bad; 436 } 437 pn_alloc(&linkpath); 438 if (error = pn_getsymlink(cvp, &linkpath, cr)) { 439 pn_free(&linkpath); 440 goto bad; 441 } 442 443 #ifdef C2_AUDIT 444 if (audit_active) 445 audit_symlink(pnp, &linkpath); 446 #endif /* C2_AUDIT */ 447 448 if (pn_pathleft(&linkpath) == 0) 449 (void) pn_set(&linkpath, "."); 450 error = pn_insert(pnp, &linkpath, strlen(component)); 451 pn_free(&linkpath); 452 if (error) 453 goto bad; 454 VN_RELE(cvp); 455 cvp = NULL; 456 if (pnp->pn_pathlen == 0) { 457 error = ENOENT; 458 goto bad; 459 } 460 if (pnp->pn_path[0] == '/') { 461 do { 462 pnp->pn_path++; 463 pnp->pn_pathlen--; 464 } while (pnp->pn_path[0] == '/'); 465 VN_RELE(vp); 466 vp = rootvp; 467 VN_HOLD(vp); 468 } 469 #ifdef C2_AUDIT 470 if (audit_active) 471 audit_anchorpath(pnp, vp == rootvp); 472 #endif 473 if (pn_fixslash(pnp)) { 474 flags |= FOLLOW; 475 must_be_directory = 1; 476 } 477 goto next; 478 } 479 480 /* 481 * If rpnp is non-NULL, remember the resolved path name therein. 482 * Do not include "." components. Collapse occurrences of 483 * "previous/..", so long as "previous" is not itself "..". 484 * Exhausting rpnp results in error ENAMETOOLONG. 485 */ 486 if (rpnp && strcmp(component, ".") != 0) { 487 size_t len; 488 489 if (strcmp(component, "..") == 0 && 490 rpnp->pn_pathlen != 0 && 491 !((rpnp->pn_pathlen > 2 && 492 strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) || 493 (rpnp->pn_pathlen == 2 && 494 strncmp(rpnp->pn_path, "..", 2) == 0))) { 495 while (rpnp->pn_pathlen && 496 rpnp->pn_path[rpnp->pn_pathlen-1] != '/') 497 rpnp->pn_pathlen--; 498 if (rpnp->pn_pathlen > 1) 499 rpnp->pn_pathlen--; 500 rpnp->pn_path[rpnp->pn_pathlen] = '\0'; 501 } else { 502 if (rpnp->pn_pathlen != 0 && 503 rpnp->pn_path[rpnp->pn_pathlen-1] != '/') 504 rpnp->pn_path[rpnp->pn_pathlen++] = '/'; 505 error = copystr(component, 506 rpnp->pn_path + rpnp->pn_pathlen, 507 rpnp->pn_bufsize - rpnp->pn_pathlen, &len); 508 if (error) /* copystr() returns ENAMETOOLONG */ 509 goto bad; 510 rpnp->pn_pathlen += (len - 1); 511 ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen); 512 } 513 } 514 515 /* 516 * If no more components, return last directory (if wanted) and 517 * last component (if wanted). 518 */ 519 if (pn_pathleft(pnp) == 0) { 520 /* 521 * If there was a trailing slash in the pathname, 522 * make sure the last component is a directory. 523 */ 524 if (must_be_directory && cvp->v_type != VDIR) { 525 error = ENOTDIR; 526 goto bad; 527 } 528 if (dirvpp != NULL) { 529 /* 530 * Check that we have the real parent and not 531 * an alias of the last component. 532 */ 533 if (vn_compare(vp, cvp)) { 534 #ifdef C2_AUDIT 535 if (audit_active) 536 (void) audit_savepath(pnp, cvp, 537 EINVAL, cr); 538 #endif 539 pn_setlast(pnp); 540 VN_RELE(vp); 541 VN_RELE(cvp); 542 if (rootvp != rootdir) 543 VN_RELE(rootvp); 544 return (EINVAL); 545 } 546 #ifdef C2_AUDIT 547 if (audit_active) { 548 if (error = audit_pathcomp(pnp, vp, cr)) 549 goto bad; 550 } 551 #endif 552 *dirvpp = vp; 553 } else 554 VN_RELE(vp); 555 #ifdef C2_AUDIT 556 if (audit_active) 557 (void) audit_savepath(pnp, cvp, 0, cr); 558 #endif 559 if (pnp->pn_path == pnp->pn_buf) 560 (void) pn_set(pnp, "."); 561 else 562 pn_setlast(pnp); 563 if (rpnp) { 564 if (VN_CMP(cvp, rootvp)) 565 (void) pn_set(rpnp, "/"); 566 else if (rpnp->pn_pathlen == 0) 567 (void) pn_set(rpnp, "."); 568 } 569 570 /* 571 * Store the path for this vnode and/or its parent. 572 */ 573 if (vfs_vnode_path) { 574 plen = pnp->pn_path - pnp->pn_buf; 575 if (dirvpp != NULL && plen != 0) 576 VN_SETPATH(rootvp, startvp, *dirvpp, 577 pnp->pn_buf, plen); 578 VN_SETPATH(rootvp, startvp, cvp, pnp->pn_buf, 579 plen + pnp->pn_pathlen); 580 } 581 582 if (compvpp != NULL) 583 *compvpp = cvp; 584 else 585 VN_RELE(cvp); 586 if (rootvp != rootdir) 587 VN_RELE(rootvp); 588 return (0); 589 } 590 591 #ifdef C2_AUDIT 592 if (audit_active) { 593 if (error = audit_pathcomp(pnp, cvp, cr)) 594 goto bad; 595 } 596 #endif 597 598 /* 599 * Skip over slashes from end of last component. 600 */ 601 while (pnp->pn_path[0] == '/') { 602 pnp->pn_path++; 603 pnp->pn_pathlen--; 604 } 605 606 /* 607 * Searched through another level of directory: 608 * release previous directory handle and save new (result 609 * of lookup) as current directory. 610 */ 611 VN_RELE(vp); 612 vp = cvp; 613 cvp = NULL; 614 goto next; 615 616 bad: 617 #ifdef C2_AUDIT 618 if (audit_active) /* reached end of path */ 619 (void) audit_savepath(pnp, cvp, error, cr); 620 bad_noaudit: 621 #endif 622 /* 623 * Error. Release vnodes and return. 624 */ 625 if (cvp) 626 VN_RELE(cvp); 627 /* 628 * If the error was ESTALE and the current directory to look in 629 * was the root for this lookup, the root for a mounted file 630 * system, or the starting directory for lookups, then 631 * return ENOENT instead of ESTALE. In this case, no recovery 632 * is possible by the higher level. If ESTALE was returned for 633 * some intermediate directory along the path, then recovery 634 * is potentially possible and retrying from the higher level 635 * will either correct the situation by purging stale cache 636 * entries or eventually get back to the point where no recovery 637 * is possible. 638 */ 639 if (error == ESTALE && 640 (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp)) 641 error = ENOENT; 642 VN_RELE(vp); 643 if (rootvp != rootdir) 644 VN_RELE(rootvp); 645 return (error); 646 } 647 648 /* 649 * Traverse a mount point. Routine accepts a vnode pointer as a reference 650 * parameter and performs the indirection, releasing the original vnode. 651 */ 652 int 653 traverse(vnode_t **cvpp) 654 { 655 int error = 0; 656 vnode_t *cvp; 657 vnode_t *tvp; 658 vfs_t *vfsp; 659 660 cvp = *cvpp; 661 662 /* 663 * If this vnode is mounted on, then we transparently indirect 664 * to the vnode which is the root of the mounted file system. 665 * Before we do this we must check that an unmount is not in 666 * progress on this vnode. 667 */ 668 669 for (;;) { 670 /* 671 * Try to read lock the vnode. If this fails because 672 * the vnode is already write locked, then check to 673 * see whether it is the current thread which locked 674 * the vnode. If it is not, then read lock the vnode 675 * by waiting to acquire the lock. 676 * 677 * The code path in domount() is an example of support 678 * which needs to look up two pathnames and locks one 679 * of them in between the two lookups. 680 */ 681 error = vn_vfsrlock(cvp); 682 if (error) { 683 if (!vn_vfswlock_held(cvp)) 684 error = vn_vfsrlock_wait(cvp); 685 if (error != 0) { 686 /* 687 * lookuppn() expects a held vnode to be 688 * returned because it promptly calls 689 * VN_RELE after the error return 690 */ 691 *cvpp = cvp; 692 return (error); 693 } 694 } 695 696 /* 697 * Reached the end of the mount chain? 698 */ 699 vfsp = vn_mountedvfs(cvp); 700 if (vfsp == NULL) { 701 vn_vfsunlock(cvp); 702 break; 703 } 704 705 /* 706 * The read lock must be held across the call to VFS_ROOT() to 707 * prevent a concurrent unmount from destroying the vfs. 708 */ 709 error = VFS_ROOT(vfsp, &tvp); 710 vn_vfsunlock(cvp); 711 712 if (error) 713 break; 714 715 VN_RELE(cvp); 716 717 cvp = tvp; 718 } 719 720 *cvpp = cvp; 721 return (error); 722 } 723 724 /* 725 * Return the lowermost vnode if this is a mountpoint. 726 */ 727 static vnode_t * 728 vn_under(vnode_t *vp) 729 { 730 vnode_t *uvp; 731 vfs_t *vfsp; 732 733 while (vp->v_flag & VROOT) { 734 735 vfsp = vp->v_vfsp; 736 vfs_rlock_wait(vfsp); 737 if ((uvp = vfsp->vfs_vnodecovered) == NULL || 738 (vfsp->vfs_flag & VFS_UNMOUNTED)) { 739 vfs_unlock(vfsp); 740 break; 741 } 742 VN_HOLD(uvp); 743 vfs_unlock(vfsp); 744 VN_RELE(vp); 745 vp = uvp; 746 } 747 748 return (vp); 749 } 750 751 static int 752 vnode_match(vnode_t *v1, vnode_t *v2, cred_t *cr) 753 { 754 vattr_t v1attr, v2attr; 755 756 /* 757 * If we have a device file, check to see if is a cloned open of the 758 * same device. For self-cloning devices, the major numbers will match. 759 * For devices cloned through the 'clone' driver, the minor number of 760 * the source device will be the same as the major number of the cloned 761 * device. 762 */ 763 if ((v1->v_type == VCHR || v1->v_type == VBLK) && 764 v1->v_type == v2->v_type) { 765 if ((spec_is_selfclone(v1) || spec_is_selfclone(v2)) && 766 getmajor(v1->v_rdev) == getmajor(v2->v_rdev)) 767 return (1); 768 769 if (spec_is_clone(v1) && 770 getmajor(v1->v_rdev) == getminor(v2->v_rdev)) 771 return (1); 772 773 if (spec_is_clone(v2) && 774 getmajor(v2->v_rdev) == getminor(v1->v_rdev)) 775 return (1); 776 } 777 778 v1attr.va_mask = v2attr.va_mask = AT_TYPE; 779 780 /* 781 * This check for symbolic links handles the pseudo-symlinks in procfs. 782 * These particular links have v_type of VDIR, but the attributes have a 783 * type of VLNK. We need to avoid these links because otherwise if we 784 * are currently in '/proc/self/fd', then '/proc/self/cwd' will compare 785 * as the same vnode. 786 */ 787 if (VOP_GETATTR(v1, &v1attr, 0, cr) != 0 || 788 VOP_GETATTR(v2, &v2attr, 0, cr) != 0 || 789 v1attr.va_type == VLNK || v2attr.va_type == VLNK) 790 return (0); 791 792 v1attr.va_mask = v2attr.va_mask = AT_TYPE | AT_FSID | AT_NODEID; 793 794 if (VOP_GETATTR(v1, &v1attr, ATTR_REAL, cr) != 0 || 795 VOP_GETATTR(v2, &v2attr, ATTR_REAL, cr) != 0) 796 return (0); 797 798 return (v1attr.va_fsid == v2attr.va_fsid && 799 v1attr.va_nodeid == v2attr.va_nodeid); 800 } 801 802 803 /* 804 * Find the entry in the directory corresponding to the target vnode. 805 */ 806 int 807 dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf, 808 size_t dlen, dirent64_t **rdp) 809 { 810 size_t dbuflen; 811 struct iovec iov; 812 struct uio uio; 813 int err; 814 int eof; 815 vnode_t *cmpvp; 816 struct dirent64 *dp; 817 pathname_t pnp; 818 819 ASSERT(dvp->v_type == VDIR); 820 821 /* 822 * This is necessary because of the strange semantics of VOP_LOOKUP(). 823 */ 824 bzero(&pnp, sizeof (pnp)); 825 826 eof = 0; 827 828 uio.uio_iov = &iov; 829 uio.uio_iovcnt = 1; 830 uio.uio_segflg = UIO_SYSSPACE; 831 uio.uio_fmode = 0; 832 uio.uio_extflg = UIO_COPY_CACHED; 833 uio.uio_loffset = 0; 834 835 if ((err = VOP_ACCESS(dvp, VREAD, 0, cr)) != 0) 836 return (err); 837 838 while (!eof) { 839 uio.uio_resid = dlen; 840 iov.iov_base = dbuf; 841 iov.iov_len = dlen; 842 843 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); 844 err = VOP_READDIR(dvp, &uio, cr, &eof); 845 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); 846 847 dbuflen = dlen - uio.uio_resid; 848 849 if (err || dbuflen == 0) 850 break; 851 852 dp = (dirent64_t *)dbuf; 853 while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) { 854 /* 855 * Ignore '.' and '..' entries 856 */ 857 if (strcmp(dp->d_name, ".") == 0 || 858 strcmp(dp->d_name, "..") == 0) { 859 dp = (dirent64_t *)((intptr_t)dp + 860 dp->d_reclen); 861 continue; 862 } 863 864 err = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0, 865 vrootp, cr); 866 867 /* 868 * We only want to bail out if there was an error other 869 * than ENOENT. Otherwise, it could be that someone 870 * just removed an entry since the readdir() call, and 871 * the entry we want is further on in the directory. 872 */ 873 if (err == 0) { 874 if (vnode_match(tvp, cmpvp, cr)) { 875 VN_RELE(cmpvp); 876 *rdp = dp; 877 return (0); 878 } 879 880 VN_RELE(cmpvp); 881 } else if (err != ENOENT) { 882 return (err); 883 } 884 885 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen); 886 } 887 } 888 889 /* 890 * Something strange has happened, this directory does not contain the 891 * specified vnode. This should never happen in the normal case, since 892 * we ensured that dvp is the parent of vp. This may be possible in 893 * some race conditions, so fail gracefully. 894 */ 895 if (err == 0) 896 err = ENOENT; 897 898 return (err); 899 } 900 901 /* 902 * Given a global path (from rootdir), and a vnode that is the current root, 903 * return the portion of the path that is beneath the current root or NULL on 904 * failure. The path MUST be a resolved path (no '..' entries or symlinks), 905 * otherwise this function will fail. 906 */ 907 static char * 908 localpath(char *path, struct vnode *vrootp, cred_t *cr) 909 { 910 vnode_t *vp; 911 vnode_t *cvp; 912 char component[MAXNAMELEN]; 913 char *ret = NULL; 914 pathname_t pn; 915 916 /* 917 * We use vn_compare() instead of VN_CMP() in order to detect lofs 918 * mounts and stacked vnodes. 919 */ 920 if (vn_compare(vrootp, rootdir)) 921 return (path); 922 923 if (pn_get(path, UIO_SYSSPACE, &pn) != 0) 924 return (NULL); 925 926 vp = rootdir; 927 VN_HOLD(vp); 928 929 while (pn_pathleft(&pn)) { 930 pn_skipslash(&pn); 931 932 if (pn_getcomponent(&pn, component) != 0) 933 break; 934 935 if (vn_ismntpt(vp) && traverse(&vp) != 0) 936 break; 937 938 if (VOP_LOOKUP(vp, component, &cvp, &pn, 0, rootdir, cr) != 0) 939 break; 940 941 VN_RELE(vp); 942 vp = cvp; 943 944 if (vn_compare(vp, vrootp)) { 945 ret = path + (pn.pn_path - pn.pn_buf); 946 break; 947 } 948 } 949 950 VN_RELE(vp); 951 pn_free(&pn); 952 953 return (ret); 954 } 955 956 /* 957 * Given a directory, return the full, resolved path. This looks up "..", 958 * searches for the given vnode in the parent, appends the component, etc. It 959 * is used to implement vnodetopath() and getcwd() when the cached path fails 960 * (or vfs_vnode_path is not set). 961 */ 962 static int 963 dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr) 964 { 965 pathname_t pn, rpn, emptypn; 966 vnode_t *cmpvp, *pvp = NULL; 967 vnode_t *startvp = vp; 968 int err = 0; 969 size_t complen; 970 char *dbuf; 971 dirent64_t *dp; 972 char *bufloc; 973 size_t dlen = DIRENT64_RECLEN(MAXPATHLEN); 974 refstr_t *mntpt; 975 976 /* Operation only allowed on directories */ 977 ASSERT(vp->v_type == VDIR); 978 979 /* We must have at least enough space for "/" */ 980 if (buflen < 2) 981 return (ENAMETOOLONG); 982 983 /* Start at end of string with terminating null */ 984 bufloc = &buf[buflen - 1]; 985 *bufloc = '\0'; 986 987 pn_alloc(&pn); 988 pn_alloc(&rpn); 989 dbuf = kmem_alloc(dlen, KM_SLEEP); 990 bzero(&emptypn, sizeof (emptypn)); 991 992 /* 993 * Begin with an additional reference on vp. This will be decremented 994 * during the loop. 995 */ 996 VN_HOLD(vp); 997 998 for (;;) { 999 /* 1000 * Return if we've reached the root. If the buffer is empty, 1001 * return '/'. We explicitly don't use vn_compare(), since it 1002 * compares the real vnodes. A lofs mount of '/' would produce 1003 * incorrect results otherwise. 1004 */ 1005 if (VN_CMP(vrootp, vp)) { 1006 if (*bufloc == '\0') 1007 *--bufloc = '/'; 1008 break; 1009 } 1010 1011 /* 1012 * If we've reached the VFS root, something has gone wrong. We 1013 * should have reached the root in the above check. The only 1014 * explantation is that 'vp' is not contained withing the given 1015 * root, in which case we return EPERM. 1016 */ 1017 if (VN_CMP(rootdir, vp)) { 1018 err = EPERM; 1019 goto out; 1020 } 1021 1022 /* 1023 * Shortcut: see if this vnode is a mountpoint. If so, 1024 * grab the path information from the vfs_t. 1025 */ 1026 if (vp->v_flag & VROOT) { 1027 1028 mntpt = vfs_getmntpoint(vp->v_vfsp); 1029 if ((err = pn_set(&pn, (char *)refstr_value(mntpt))) 1030 == 0) { 1031 refstr_rele(mntpt); 1032 rpn.pn_path = rpn.pn_buf; 1033 1034 /* 1035 * Ensure the mointpoint still exists. 1036 */ 1037 VN_HOLD(vrootp); 1038 if (vrootp != rootdir) 1039 VN_HOLD(vrootp); 1040 if (lookuppnvp(&pn, &rpn, 0, NULL, 1041 &cmpvp, vrootp, vrootp, cr) == 0) { 1042 1043 if (VN_CMP(vp, cmpvp)) { 1044 VN_RELE(cmpvp); 1045 1046 complen = strlen(rpn.pn_path); 1047 bufloc -= complen; 1048 if (bufloc < buf) { 1049 err = ERANGE; 1050 goto out; 1051 } 1052 bcopy(rpn.pn_path, bufloc, 1053 complen); 1054 break; 1055 } else { 1056 VN_RELE(cmpvp); 1057 } 1058 } 1059 } else { 1060 refstr_rele(mntpt); 1061 } 1062 } 1063 1064 /* 1065 * Shortcuts failed, search for this vnode in its parent. If 1066 * this is a mountpoint, then get the vnode underneath. 1067 */ 1068 if (vp->v_flag & VROOT) 1069 vp = vn_under(vp); 1070 if ((err = VOP_LOOKUP(vp, "..", &pvp, &emptypn, 0, vrootp, cr)) 1071 != 0) 1072 goto out; 1073 1074 /* 1075 * With extended attributes, it's possible for a directory to 1076 * have a parent that is a regular file. Check for that here. 1077 */ 1078 if (pvp->v_type != VDIR) { 1079 err = ENOTDIR; 1080 goto out; 1081 } 1082 1083 /* 1084 * If this is true, something strange has happened. This is 1085 * only true if we are the root of a filesystem, which should 1086 * have been caught by the check above. 1087 */ 1088 if (VN_CMP(pvp, vp)) { 1089 err = ENOENT; 1090 goto out; 1091 } 1092 1093 /* 1094 * Search the parent directory for the entry corresponding to 1095 * this vnode. 1096 */ 1097 if ((err = dirfindvp(vrootp, pvp, vp, cr, dbuf, dlen, &dp)) 1098 != 0) 1099 goto out; 1100 complen = strlen(dp->d_name); 1101 bufloc -= complen; 1102 if (bufloc <= buf) { 1103 err = ENAMETOOLONG; 1104 goto out; 1105 } 1106 bcopy(dp->d_name, bufloc, complen); 1107 1108 /* Prepend a slash to the current path. */ 1109 *--bufloc = '/'; 1110 1111 /* And continue with the next component */ 1112 VN_RELE(vp); 1113 vp = pvp; 1114 pvp = NULL; 1115 } 1116 1117 /* 1118 * Place the path at the beginning of the buffer. 1119 */ 1120 if (bufloc != buf) 1121 ovbcopy(bufloc, buf, buflen - (bufloc - buf)); 1122 1123 out: 1124 /* 1125 * If the error was ESTALE and the current directory to look in 1126 * was the root for this lookup, the root for a mounted file 1127 * system, or the starting directory for lookups, then 1128 * return ENOENT instead of ESTALE. In this case, no recovery 1129 * is possible by the higher level. If ESTALE was returned for 1130 * some intermediate directory along the path, then recovery 1131 * is potentially possible and retrying from the higher level 1132 * will either correct the situation by purging stale cache 1133 * entries or eventually get back to the point where no recovery 1134 * is possible. 1135 */ 1136 if (err == ESTALE && 1137 (VN_CMP(vp, vrootp) || (vp->v_flag & VROOT) || vp == startvp)) 1138 err = ENOENT; 1139 1140 kmem_free(dbuf, dlen); 1141 VN_RELE(vp); 1142 if (pvp) 1143 VN_RELE(pvp); 1144 pn_free(&pn); 1145 pn_free(&rpn); 1146 1147 return (err); 1148 } 1149 1150 /* 1151 * The additional flag, LOOKUP_CHECKREAD, is ued to enforce artificial 1152 * constraints in order to be standards compliant. For example, if we have 1153 * the cached path of '/foo/bar', and '/foo' has permissions 100 (execute 1154 * only), then we can legitimately look up the path to the current working 1155 * directory without needing read permission. Existing standards tests, 1156 * however, assume that we are determining the path by repeatedly looking up 1157 * "..". We need to keep this behavior in order to maintain backwards 1158 * compatibility. 1159 */ 1160 static int 1161 vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, 1162 cred_t *cr, int flags) 1163 { 1164 pathname_t pn, rpn; 1165 int ret, len; 1166 vnode_t *compvp, *pvp, *realvp; 1167 proc_t *p = curproc; 1168 char path[MAXNAMELEN]; 1169 int doclose = 0; 1170 1171 /* 1172 * If vrootp is NULL, get the root for curproc. Callers with any other 1173 * requirements should pass in a different vrootp. 1174 */ 1175 if (vrootp == NULL) { 1176 mutex_enter(&p->p_lock); 1177 if ((vrootp = PTOU(p)->u_rdir) == NULL) 1178 vrootp = rootdir; 1179 VN_HOLD(vrootp); 1180 mutex_exit(&p->p_lock); 1181 } else { 1182 VN_HOLD(vrootp); 1183 } 1184 1185 /* 1186 * This is to get around an annoying artifact of the /proc filesystem, 1187 * which is the behavior of {cwd/root}. Trying to resolve this path 1188 * will result in /proc/pid/cwd instead of whatever the real working 1189 * directory is. We can't rely on VOP_REALVP(), since that will break 1190 * lofs. The only difference between procfs and lofs is that opening 1191 * the file will return the underling vnode in the case of procfs. 1192 */ 1193 if (vp->v_type == VDIR && VOP_REALVP(vp, &realvp) == 0 && 1194 realvp != vp) { 1195 VN_HOLD(vp); 1196 if (VOP_OPEN(&vp, FREAD, cr) == 0) 1197 doclose = 1; 1198 else 1199 VN_RELE(vp); 1200 } 1201 1202 pn_alloc(&pn); 1203 1204 /* 1205 * Check to see if we have a cached path in the vnode. 1206 */ 1207 mutex_enter(&vp->v_lock); 1208 if (vn_path(vp) != NULL) { 1209 (void) pn_set(&pn, vn_path(vp)); 1210 mutex_exit(&vp->v_lock); 1211 1212 pn_alloc(&rpn); 1213 1214 /* We should only cache absolute paths */ 1215 ASSERT(pn.pn_buf[0] == '/'); 1216 1217 /* 1218 * If we are in a zone or a chroot environment, then we have to 1219 * take additional steps, since the path to the root might not 1220 * be readable with the current credentials, even though the 1221 * process can legitmately access the file. In this case, we 1222 * do the following: 1223 * 1224 * lookuppnvp() with all privileges to get the resolved path. 1225 * call localpath() to get the local portion of the path, and 1226 * continue as normal. 1227 * 1228 * If the the conversion to a local path fails, then we continue 1229 * as normal. This is a heuristic to make process object file 1230 * paths available from within a zone. Because lofs doesn't 1231 * support page operations, the vnode stored in the seg_t is 1232 * actually the underlying real vnode, not the lofs node itself. 1233 * Most of the time, the lofs path is the same as the underlying 1234 * vnode (for example, /usr/lib/libc.so.1). 1235 */ 1236 if (vrootp != rootdir) { 1237 char *local = NULL; 1238 VN_HOLD(rootdir); 1239 if (lookuppnvp(&pn, &rpn, FOLLOW, 1240 NULL, &compvp, rootdir, rootdir, kcred) == 0) { 1241 local = localpath(rpn.pn_path, vrootp, 1242 kcred); 1243 VN_RELE(compvp); 1244 } 1245 1246 /* 1247 * The original pn was changed through lookuppnvp(), so 1248 * reset it. 1249 */ 1250 if (local) { 1251 (void) pn_set(&pn, local); 1252 } else { 1253 mutex_enter(&vp->v_lock); 1254 if (vn_path(vp) != NULL) { 1255 (void) pn_set(&pn, vn_path(vp)); 1256 mutex_exit(&vp->v_lock); 1257 } else { 1258 mutex_exit(&vp->v_lock); 1259 goto notcached; 1260 } 1261 } 1262 } 1263 1264 /* 1265 * We should have a local path at this point, so start the 1266 * search from the root of the current process. 1267 */ 1268 VN_HOLD(vrootp); 1269 if (vrootp != rootdir) 1270 VN_HOLD(vrootp); 1271 ret = lookuppnvp(&pn, &rpn, FOLLOW | flags, NULL, 1272 &compvp, vrootp, vrootp, cr); 1273 if (ret == 0) { 1274 /* 1275 * Check to see if the returned vnode is the same as 1276 * the one we expect. If not, give up. 1277 */ 1278 if (!vn_compare(vp, compvp) && 1279 !vnode_match(vp, compvp, cr)) { 1280 VN_RELE(compvp); 1281 goto notcached; 1282 } 1283 1284 VN_RELE(compvp); 1285 1286 /* 1287 * Return the result. 1288 */ 1289 if (buflen <= rpn.pn_pathlen) 1290 goto notcached; 1291 1292 bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1); 1293 pn_free(&pn); 1294 pn_free(&rpn); 1295 VN_RELE(vrootp); 1296 if (doclose) { 1297 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr); 1298 VN_RELE(vp); 1299 } 1300 return (0); 1301 } 1302 1303 notcached: 1304 pn_free(&rpn); 1305 } else { 1306 mutex_exit(&vp->v_lock); 1307 } 1308 1309 pn_free(&pn); 1310 1311 if (vp->v_type != VDIR) { 1312 /* 1313 * If we don't have a directory, try to find it in the dnlc via 1314 * reverse lookup. Once this is found, we can use the regular 1315 * directory search to find the full path. 1316 */ 1317 if ((pvp = dnlc_reverse_lookup(vp, path, MAXNAMELEN)) != NULL) { 1318 ret = dirtopath(vrootp, pvp, buf, buflen, cr); 1319 if (ret == 0) { 1320 len = strlen(buf); 1321 if (len + strlen(path) + 1 >= buflen) { 1322 ret = ENAMETOOLONG; 1323 } else { 1324 if (buf[len - 1] != '/') 1325 buf[len++] = '/'; 1326 bcopy(path, buf + len, 1327 strlen(path) + 1); 1328 } 1329 } 1330 1331 VN_RELE(pvp); 1332 } else 1333 ret = ENOENT; 1334 } else 1335 ret = dirtopath(vrootp, vp, buf, buflen, cr); 1336 1337 VN_RELE(vrootp); 1338 if (doclose) { 1339 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr); 1340 VN_RELE(vp); 1341 } 1342 1343 return (ret); 1344 } 1345 1346 int 1347 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr) 1348 { 1349 return (vnodetopath_common(vrootp, vp, buf, buflen, cr, 0)); 1350 } 1351 1352 int 1353 dogetcwd(char *buf, size_t buflen) 1354 { 1355 int ret; 1356 vnode_t *vp; 1357 vnode_t *compvp; 1358 refstr_t *cwd, *oldcwd; 1359 const char *value; 1360 pathname_t rpnp, pnp; 1361 proc_t *p = curproc; 1362 1363 /* 1364 * Check to see if there is a cached version of the cwd. If so, lookup 1365 * the cached value and make sure it is the same vnode. 1366 */ 1367 mutex_enter(&p->p_lock); 1368 if ((cwd = PTOU(p)->u_cwd) != NULL) 1369 refstr_hold(cwd); 1370 vp = PTOU(p)->u_cdir; 1371 VN_HOLD(vp); 1372 mutex_exit(&p->p_lock); 1373 1374 /* 1375 * Make sure we have permission to access the current directory. 1376 */ 1377 if ((ret = VOP_ACCESS(vp, VEXEC, 0, CRED())) != 0) { 1378 if (cwd != NULL) 1379 refstr_rele(cwd); 1380 VN_RELE(vp); 1381 return (ret); 1382 } 1383 1384 if (cwd) { 1385 value = refstr_value(cwd); 1386 if ((ret = pn_get((char *)value, UIO_SYSSPACE, &pnp)) != 0) { 1387 refstr_rele(cwd); 1388 VN_RELE(vp); 1389 return (ret); 1390 } 1391 1392 pn_alloc(&rpnp); 1393 1394 if (lookuppn(&pnp, &rpnp, NO_FOLLOW, NULL, &compvp) == 0) { 1395 1396 if (VN_CMP(vp, compvp) && 1397 strcmp(value, rpnp.pn_path) == 0) { 1398 VN_RELE(compvp); 1399 VN_RELE(vp); 1400 pn_free(&pnp); 1401 pn_free(&rpnp); 1402 if (strlen(value) + 1 > buflen) { 1403 refstr_rele(cwd); 1404 return (ENAMETOOLONG); 1405 } 1406 bcopy(value, buf, strlen(value) + 1); 1407 refstr_rele(cwd); 1408 return (0); 1409 } 1410 1411 VN_RELE(compvp); 1412 } 1413 1414 pn_free(&rpnp); 1415 pn_free(&pnp); 1416 1417 refstr_rele(cwd); 1418 } 1419 1420 ret = vnodetopath_common(NULL, vp, buf, buflen, CRED(), 1421 LOOKUP_CHECKREAD); 1422 1423 VN_RELE(vp); 1424 1425 /* 1426 * Store the new cwd and replace the existing cached copy. 1427 */ 1428 if (ret == 0) 1429 cwd = refstr_alloc(buf); 1430 else 1431 cwd = NULL; 1432 1433 mutex_enter(&p->p_lock); 1434 oldcwd = PTOU(p)->u_cwd; 1435 PTOU(p)->u_cwd = cwd; 1436 mutex_exit(&p->p_lock); 1437 1438 if (oldcwd) 1439 refstr_rele(oldcwd); 1440 1441 return (ret); 1442 } 1443