1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 41 #pragma ident "%Z%%M% %I% %E% SMI" 42 43 #include <sys/types.h> 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/cpuvar.h> 47 #include <sys/errno.h> 48 #include <sys/cred.h> 49 #include <sys/user.h> 50 #include <sys/uio.h> 51 #include <sys/vfs.h> 52 #include <sys/vnode.h> 53 #include <sys/pathname.h> 54 #include <sys/proc.h> 55 #include <sys/vtrace.h> 56 #include <sys/sysmacros.h> 57 #include <sys/debug.h> 58 #include <sys/dirent.h> 59 #include <c2/audit.h> 60 #include <sys/zone.h> 61 #include <sys/dnlc.h> 62 #include <sys/fs/snode.h> 63 64 /* Controls whether paths are stored with vnodes. */ 65 int vfs_vnode_path = 1; 66 67 int 68 lookupname( 69 char *fnamep, 70 enum uio_seg seg, 71 enum symfollow followlink, 72 vnode_t **dirvpp, 73 vnode_t **compvpp) 74 { 75 return (lookupnameat(fnamep, seg, followlink, dirvpp, compvpp, NULL)); 76 } 77 78 79 /* 80 * Lookup the user file name, 81 * Handle allocation and freeing of pathname buffer, return error. 82 */ 83 int 84 lookupnameat( 85 char *fnamep, /* user pathname */ 86 enum uio_seg seg, /* addr space that name is in */ 87 enum symfollow followlink, /* follow sym links */ 88 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */ 89 vnode_t **compvpp, /* ret for ptr to component vnode */ 90 vnode_t *startvp) /* start path search from vp */ 91 { 92 char namebuf[TYPICALMAXPATHLEN]; 93 struct pathname lookpn; 94 int error; 95 96 error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf)); 97 if (error == 0) { 98 #ifdef C2_AUDIT 99 if (audit_active) 100 audit_lookupname(); 101 #endif 102 error = lookuppnat(&lookpn, NULL, followlink, 103 dirvpp, compvpp, startvp); 104 } 105 if (error == ENAMETOOLONG) { 106 /* 107 * This thread used a pathname > TYPICALMAXPATHLEN bytes long. 108 */ 109 if (error = pn_get(fnamep, seg, &lookpn)) 110 return (error); 111 error = lookuppnat(&lookpn, NULL, followlink, 112 dirvpp, compvpp, startvp); 113 pn_free(&lookpn); 114 } 115 116 return (error); 117 } 118 119 /* 120 * Lookup the user file name from a given vp, 121 */ 122 int 123 lookuppn( 124 struct pathname *pnp, 125 struct pathname *rpnp, 126 enum symfollow followlink, 127 vnode_t **dirvpp, 128 vnode_t **compvpp) 129 { 130 return (lookuppnat(pnp, rpnp, followlink, dirvpp, compvpp, NULL)); 131 } 132 133 int 134 lookuppnat( 135 struct pathname *pnp, /* pathname to lookup */ 136 struct pathname *rpnp, /* if non-NULL, return resolved path */ 137 enum symfollow followlink, /* (don't) follow sym links */ 138 vnode_t **dirvpp, /* ptr for parent vnode */ 139 vnode_t **compvpp, /* ptr for entry vnode */ 140 vnode_t *startvp) /* start search from this vp */ 141 { 142 vnode_t *vp; /* current directory vp */ 143 vnode_t *rootvp; 144 proc_t *p = curproc; 145 146 if (pnp->pn_pathlen == 0) 147 return (ENOENT); 148 149 mutex_enter(&p->p_lock); /* for u_rdir and u_cdir */ 150 if ((rootvp = PTOU(p)->u_rdir) == NULL) 151 rootvp = rootdir; 152 else if (rootvp != rootdir) /* no need to VN_HOLD rootdir */ 153 VN_HOLD(rootvp); 154 155 if (pnp->pn_path[0] == '/') { 156 vp = rootvp; 157 } else { 158 vp = (startvp == NULL) ? PTOU(p)->u_cdir : startvp; 159 } 160 VN_HOLD(vp); 161 mutex_exit(&p->p_lock); 162 163 /* 164 * Skip over leading slashes 165 */ 166 if (pnp->pn_path[0] == '/') { 167 do { 168 pnp->pn_path++; 169 pnp->pn_pathlen--; 170 } while (pnp->pn_path[0] == '/'); 171 } 172 173 return (lookuppnvp(pnp, rpnp, followlink, dirvpp, 174 compvpp, rootvp, vp, CRED())); 175 } 176 177 /* Private flag to do our getcwd() dirty work */ 178 #define LOOKUP_CHECKREAD 0x10 179 #define LOOKUP_MASK (~LOOKUP_CHECKREAD) 180 181 /* 182 * Starting at current directory, translate pathname pnp to end. 183 * Leave pathname of final component in pnp, return the vnode 184 * for the final component in *compvpp, and return the vnode 185 * for the parent of the final component in dirvpp. 186 * 187 * This is the central routine in pathname translation and handles 188 * multiple components in pathnames, separating them at /'s. It also 189 * implements mounted file systems and processes symbolic links. 190 * 191 * vp is the vnode where the directory search should start. 192 * 193 * Reference counts: vp must be held prior to calling this function. rootvp 194 * should only be held if rootvp != rootdir. 195 */ 196 int 197 lookuppnvp( 198 struct pathname *pnp, /* pathname to lookup */ 199 struct pathname *rpnp, /* if non-NULL, return resolved path */ 200 int flags, /* follow symlinks */ 201 vnode_t **dirvpp, /* ptr for parent vnode */ 202 vnode_t **compvpp, /* ptr for entry vnode */ 203 vnode_t *rootvp, /* rootvp */ 204 vnode_t *vp, /* directory to start search at */ 205 cred_t *cr) /* user's credential */ 206 { 207 vnode_t *cvp; /* current component vp */ 208 vnode_t *tvp; /* addressable temp ptr */ 209 char component[MAXNAMELEN]; /* buffer for component (incl null) */ 210 int error; 211 int nlink; 212 int lookup_flags; 213 vnode_t *startvp; 214 vnode_t *zonevp = curproc->p_zone->zone_rootvp; /* zone root */ 215 int must_be_directory = 0; 216 217 CPU_STATS_ADDQ(CPU, sys, namei, 1); 218 nlink = 0; 219 cvp = NULL; 220 if (rpnp) 221 rpnp->pn_pathlen = 0; 222 lookup_flags = dirvpp ? LOOKUP_DIR : 0; 223 #ifdef C2_AUDIT 224 if (audit_active) 225 audit_anchorpath(pnp, vp == rootvp); 226 #endif 227 228 /* 229 * Eliminate any trailing slashes in the pathname. 230 * If there are any, we must follow all symlinks. 231 * Also, we must guarantee that the last component is a directory. 232 */ 233 if (pn_fixslash(pnp)) { 234 flags |= FOLLOW; 235 must_be_directory = 1; 236 } 237 238 startvp = vp; 239 next: 240 /* 241 * Make sure we have a directory. 242 */ 243 if (vp->v_type != VDIR) { 244 error = ENOTDIR; 245 goto bad; 246 } 247 248 if (rpnp && VN_CMP(vp, rootvp)) 249 (void) pn_set(rpnp, "/"); 250 251 /* 252 * Process the next component of the pathname. 253 */ 254 if (error = pn_getcomponent(pnp, component)) { 255 #ifdef C2_AUDIT 256 if (audit_active) 257 audit_addcomponent(pnp); 258 #endif 259 goto bad; 260 } 261 262 /* 263 * Handle "..": two special cases. 264 * 1. If we're at the root directory (e.g. after chroot or 265 * zone_enter) then change ".." to "." so we can't get 266 * out of this subtree. 267 * 2. If this vnode is the root of a mounted file system, 268 * then replace it with the vnode that was mounted on 269 * so that we take the ".." in the other file system. 270 */ 271 if (component[0] == '.' && component[1] == '.' && component[2] == 0) { 272 checkforroot: 273 if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) { 274 component[1] = '\0'; 275 } else if (vp->v_flag & VROOT) { 276 vfs_t *vfsp; 277 cvp = vp; 278 279 /* 280 * While we deal with the vfs pointer from the vnode 281 * the filesystem could have been forcefully unmounted 282 * and the vnode's v_vfsp could have been invalidated 283 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it 284 * with vfs_rlock_wait/vfs_unlock. 285 * It is safe to use the v_vfsp even it is freed by 286 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock 287 * do not dereference v_vfsp. It is just used as a 288 * magic cookie. 289 * One more corner case here is the memory getting 290 * reused for another vfs structure. In this case 291 * lookuppnvp's vfs_rlock_wait will succeed, domount's 292 * vfs_lock will fail and domount will bail out with an 293 * error (EBUSY). 294 */ 295 vfsp = cvp->v_vfsp; 296 297 /* 298 * This lock is used to synchronize 299 * mounts/unmounts and lookups. 300 * Threads doing mounts/unmounts hold the 301 * writers version vfs_lock_wait(). 302 */ 303 304 vfs_rlock_wait(vfsp); 305 306 /* 307 * If this vnode is on a file system that 308 * has been forcibly unmounted, 309 * we can't proceed. Cancel this operation 310 * and return EIO. 311 * 312 * vfs_vnodecovered is NULL if unmounted. 313 * Currently, nfs uses VFS_UNMOUNTED to 314 * check if it's a forced-umount. Keep the 315 * same checking here as well even though it 316 * may not be needed. 317 */ 318 if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) || 319 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 320 vfs_unlock(vfsp); 321 VN_RELE(cvp); 322 return (EIO); 323 } 324 VN_HOLD(vp); 325 vfs_unlock(vfsp); 326 VN_RELE(cvp); 327 cvp = NULL; 328 goto checkforroot; 329 } 330 } 331 332 /* 333 * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate 334 * that we need to have read permission on every directory in the entire 335 * path. This is used to ensure that a forward-lookup of a cached value 336 * has the same effect as a reverse-lookup when the cached value cannot 337 * be found. 338 */ 339 if ((flags & LOOKUP_CHECKREAD) && 340 (error = VOP_ACCESS(vp, VREAD, 0, cr)) != 0) 341 goto bad; 342 343 /* 344 * Perform a lookup in the current directory. 345 */ 346 error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags, 347 rootvp, cr); 348 cvp = tvp; 349 if (error) { 350 cvp = NULL; 351 /* 352 * On error, return hard error if 353 * (a) we're not at the end of the pathname yet, or 354 * (b) the caller didn't want the parent directory, or 355 * (c) we failed for some reason other than a missing entry. 356 */ 357 if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT) 358 goto bad; 359 #ifdef C2_AUDIT 360 if (audit_active) { /* directory access */ 361 if (error = audit_savepath(pnp, vp, error, cr)) 362 goto bad_noaudit; 363 } 364 #endif 365 pn_setlast(pnp); 366 /* 367 * We inform the caller that the desired entry must be 368 * a directory by adding a '/' to the component name. 369 */ 370 if (must_be_directory && (error = pn_addslash(pnp)) != 0) 371 goto bad; 372 *dirvpp = vp; 373 if (compvpp != NULL) 374 *compvpp = NULL; 375 if (rootvp != rootdir) 376 VN_RELE(rootvp); 377 return (0); 378 } 379 380 /* 381 * Traverse mount points. 382 * XXX why don't we need to hold a read lock here (call vn_vfsrlock)? 383 * What prevents a concurrent update to v_vfsmountedhere? 384 * Possible answer: if mounting, we might not see the mount 385 * if it is concurrently coming into existence, but that's 386 * really not much different from the thread running a bit slower. 387 * If unmounting, we may get into traverse() when we shouldn't, 388 * but traverse() will catch this case for us. 389 * (For this to work, fetching v_vfsmountedhere had better 390 * be atomic!) 391 */ 392 if (vn_mountedvfs(cvp) != NULL) { 393 tvp = cvp; 394 if ((error = traverse(&tvp)) != 0) { 395 /* 396 * It is required to assign cvp here, because 397 * traverse() will return a held vnode which 398 * may different than the vnode that was passed 399 * in (even in the error case). If traverse() 400 * changes the vnode it releases the original, 401 * and holds the new one. 402 */ 403 cvp = tvp; 404 goto bad; 405 } 406 cvp = tvp; 407 } 408 409 /* 410 * If we hit a symbolic link and there is more path to be 411 * translated or this operation does not wish to apply 412 * to a link, then place the contents of the link at the 413 * front of the remaining pathname. 414 */ 415 if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) { 416 struct pathname linkpath; 417 #ifdef C2_AUDIT 418 if (audit_active) { 419 if (error = audit_pathcomp(pnp, cvp, cr)) 420 goto bad; 421 } 422 #endif 423 424 if (++nlink > MAXSYMLINKS) { 425 error = ELOOP; 426 goto bad; 427 } 428 pn_alloc(&linkpath); 429 if (error = pn_getsymlink(cvp, &linkpath, cr)) { 430 pn_free(&linkpath); 431 goto bad; 432 } 433 434 #ifdef C2_AUDIT 435 if (audit_active) 436 audit_symlink(pnp, &linkpath); 437 #endif /* C2_AUDIT */ 438 439 if (pn_pathleft(&linkpath) == 0) 440 (void) pn_set(&linkpath, "."); 441 error = pn_insert(pnp, &linkpath, strlen(component)); 442 pn_free(&linkpath); 443 if (error) 444 goto bad; 445 VN_RELE(cvp); 446 cvp = NULL; 447 if (pnp->pn_pathlen == 0) { 448 error = ENOENT; 449 goto bad; 450 } 451 if (pnp->pn_path[0] == '/') { 452 do { 453 pnp->pn_path++; 454 pnp->pn_pathlen--; 455 } while (pnp->pn_path[0] == '/'); 456 VN_RELE(vp); 457 vp = rootvp; 458 VN_HOLD(vp); 459 } 460 #ifdef C2_AUDIT 461 if (audit_active) 462 audit_anchorpath(pnp, vp == rootvp); 463 #endif 464 if (pn_fixslash(pnp)) { 465 flags |= FOLLOW; 466 must_be_directory = 1; 467 } 468 goto next; 469 } 470 471 /* 472 * If rpnp is non-NULL, remember the resolved path name therein. 473 * Do not include "." components. Collapse occurrences of 474 * "previous/..", so long as "previous" is not itself "..". 475 * Exhausting rpnp results in error ENAMETOOLONG. 476 */ 477 if (rpnp && strcmp(component, ".") != 0) { 478 size_t len; 479 480 if (strcmp(component, "..") == 0 && 481 rpnp->pn_pathlen != 0 && 482 !((rpnp->pn_pathlen > 2 && 483 strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) || 484 (rpnp->pn_pathlen == 2 && 485 strncmp(rpnp->pn_path, "..", 2) == 0))) { 486 while (rpnp->pn_pathlen && 487 rpnp->pn_path[rpnp->pn_pathlen-1] != '/') 488 rpnp->pn_pathlen--; 489 if (rpnp->pn_pathlen > 1) 490 rpnp->pn_pathlen--; 491 rpnp->pn_path[rpnp->pn_pathlen] = '\0'; 492 } else { 493 if (rpnp->pn_pathlen != 0 && 494 rpnp->pn_path[rpnp->pn_pathlen-1] != '/') 495 rpnp->pn_path[rpnp->pn_pathlen++] = '/'; 496 error = copystr(component, 497 rpnp->pn_path + rpnp->pn_pathlen, 498 rpnp->pn_bufsize - rpnp->pn_pathlen, &len); 499 if (error) /* copystr() returns ENAMETOOLONG */ 500 goto bad; 501 rpnp->pn_pathlen += (len - 1); 502 ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen); 503 } 504 } 505 506 /* 507 * If no more components, return last directory (if wanted) and 508 * last component (if wanted). 509 */ 510 if (pn_pathleft(pnp) == 0) { 511 /* 512 * If there was a trailing slash in the pathname, 513 * make sure the last component is a directory. 514 */ 515 if (must_be_directory && cvp->v_type != VDIR) { 516 error = ENOTDIR; 517 goto bad; 518 } 519 if (dirvpp != NULL) { 520 /* 521 * Check that we have the real parent and not 522 * an alias of the last component. 523 */ 524 if (vn_compare(vp, cvp)) { 525 #ifdef C2_AUDIT 526 if (audit_active) 527 (void) audit_savepath(pnp, cvp, 528 EINVAL, cr); 529 #endif 530 pn_setlast(pnp); 531 VN_RELE(vp); 532 VN_RELE(cvp); 533 if (rootvp != rootdir) 534 VN_RELE(rootvp); 535 return (EINVAL); 536 } 537 #ifdef C2_AUDIT 538 if (audit_active) { 539 if (error = audit_pathcomp(pnp, vp, cr)) 540 goto bad; 541 } 542 #endif 543 *dirvpp = vp; 544 } else 545 VN_RELE(vp); 546 #ifdef C2_AUDIT 547 if (audit_active) 548 (void) audit_savepath(pnp, cvp, 0, cr); 549 #endif 550 if (pnp->pn_path == pnp->pn_buf) 551 (void) pn_set(pnp, "."); 552 else 553 pn_setlast(pnp); 554 if (rpnp) { 555 if (VN_CMP(cvp, rootvp)) 556 (void) pn_set(rpnp, "/"); 557 else if (rpnp->pn_pathlen == 0) 558 (void) pn_set(rpnp, "."); 559 } 560 561 if (compvpp != NULL) 562 *compvpp = cvp; 563 else 564 VN_RELE(cvp); 565 if (rootvp != rootdir) 566 VN_RELE(rootvp); 567 return (0); 568 } 569 570 #ifdef C2_AUDIT 571 if (audit_active) { 572 if (error = audit_pathcomp(pnp, cvp, cr)) 573 goto bad; 574 } 575 #endif 576 577 /* 578 * Skip over slashes from end of last component. 579 */ 580 while (pnp->pn_path[0] == '/') { 581 pnp->pn_path++; 582 pnp->pn_pathlen--; 583 } 584 585 /* 586 * Searched through another level of directory: 587 * release previous directory handle and save new (result 588 * of lookup) as current directory. 589 */ 590 VN_RELE(vp); 591 vp = cvp; 592 cvp = NULL; 593 goto next; 594 595 bad: 596 #ifdef C2_AUDIT 597 if (audit_active) /* reached end of path */ 598 (void) audit_savepath(pnp, cvp, error, cr); 599 bad_noaudit: 600 #endif 601 /* 602 * Error. Release vnodes and return. 603 */ 604 if (cvp) 605 VN_RELE(cvp); 606 /* 607 * If the error was ESTALE and the current directory to look in 608 * was the root for this lookup, the root for a mounted file 609 * system, or the starting directory for lookups, then 610 * return ENOENT instead of ESTALE. In this case, no recovery 611 * is possible by the higher level. If ESTALE was returned for 612 * some intermediate directory along the path, then recovery 613 * is potentially possible and retrying from the higher level 614 * will either correct the situation by purging stale cache 615 * entries or eventually get back to the point where no recovery 616 * is possible. 617 */ 618 if (error == ESTALE && 619 (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp)) 620 error = ENOENT; 621 VN_RELE(vp); 622 if (rootvp != rootdir) 623 VN_RELE(rootvp); 624 return (error); 625 } 626 627 /* 628 * Traverse a mount point. Routine accepts a vnode pointer as a reference 629 * parameter and performs the indirection, releasing the original vnode. 630 */ 631 int 632 traverse(vnode_t **cvpp) 633 { 634 int error = 0; 635 vnode_t *cvp; 636 vnode_t *tvp; 637 vfs_t *vfsp; 638 639 cvp = *cvpp; 640 641 /* 642 * If this vnode is mounted on, then we transparently indirect 643 * to the vnode which is the root of the mounted file system. 644 * Before we do this we must check that an unmount is not in 645 * progress on this vnode. 646 */ 647 648 for (;;) { 649 /* 650 * Try to read lock the vnode. If this fails because 651 * the vnode is already write locked, then check to 652 * see whether it is the current thread which locked 653 * the vnode. If it is not, then read lock the vnode 654 * by waiting to acquire the lock. 655 * 656 * The code path in domount() is an example of support 657 * which needs to look up two pathnames and locks one 658 * of them in between the two lookups. 659 */ 660 error = vn_vfsrlock(cvp); 661 if (error) { 662 if (!vn_vfswlock_held(cvp)) 663 error = vn_vfsrlock_wait(cvp); 664 if (error != 0) { 665 /* 666 * lookuppn() expects a held vnode to be 667 * returned because it promptly calls 668 * VN_RELE after the error return 669 */ 670 *cvpp = cvp; 671 return (error); 672 } 673 } 674 675 /* 676 * Reached the end of the mount chain? 677 */ 678 vfsp = vn_mountedvfs(cvp); 679 if (vfsp == NULL) { 680 vn_vfsunlock(cvp); 681 break; 682 } 683 684 /* 685 * The read lock must be held across the call to VFS_ROOT() to 686 * prevent a concurrent unmount from destroying the vfs. 687 */ 688 error = VFS_ROOT(vfsp, &tvp); 689 vn_vfsunlock(cvp); 690 691 if (error) 692 break; 693 694 VN_RELE(cvp); 695 696 cvp = tvp; 697 } 698 699 *cvpp = cvp; 700 return (error); 701 } 702 703 /* 704 * Return the lowermost vnode if this is a mountpoint. 705 */ 706 static vnode_t * 707 vn_under(vnode_t *vp) 708 { 709 vnode_t *uvp; 710 vfs_t *vfsp; 711 712 while (vp->v_flag & VROOT) { 713 714 vfsp = vp->v_vfsp; 715 vfs_rlock_wait(vfsp); 716 if ((uvp = vfsp->vfs_vnodecovered) == NULL || 717 (vfsp->vfs_flag & VFS_UNMOUNTED)) { 718 vfs_unlock(vfsp); 719 break; 720 } 721 VN_HOLD(uvp); 722 vfs_unlock(vfsp); 723 VN_RELE(vp); 724 vp = uvp; 725 } 726 727 return (vp); 728 } 729 730 static int 731 vnode_match(vnode_t *v1, vnode_t *v2, cred_t *cr) 732 { 733 vattr_t v1attr, v2attr; 734 735 /* 736 * If we have a device file, check to see if is a cloned open of the 737 * same device. For self-cloning devices, the major numbers will match. 738 * For devices cloned through the 'clone' driver, the minor number of 739 * the source device will be the same as the major number of the cloned 740 * device. 741 */ 742 if ((v1->v_type == VCHR || v1->v_type == VBLK) && 743 v1->v_type == v2->v_type) { 744 if ((spec_is_selfclone(v1) || spec_is_selfclone(v2)) && 745 getmajor(v1->v_rdev) == getmajor(v2->v_rdev)) 746 return (1); 747 748 if (spec_is_clone(v1) && 749 getmajor(v1->v_rdev) == getminor(v2->v_rdev)) 750 return (1); 751 752 if (spec_is_clone(v2) && 753 getmajor(v2->v_rdev) == getminor(v1->v_rdev)) 754 return (1); 755 } 756 757 v1attr.va_mask = v2attr.va_mask = AT_TYPE; 758 759 /* 760 * This check for symbolic links handles the pseudo-symlinks in procfs. 761 * These particular links have v_type of VDIR, but the attributes have a 762 * type of VLNK. We need to avoid these links because otherwise if we 763 * are currently in '/proc/self/fd', then '/proc/self/cwd' will compare 764 * as the same vnode. 765 */ 766 if (VOP_GETATTR(v1, &v1attr, 0, cr) != 0 || 767 VOP_GETATTR(v2, &v2attr, 0, cr) != 0 || 768 v1attr.va_type == VLNK || v2attr.va_type == VLNK) 769 return (0); 770 771 v1attr.va_mask = v2attr.va_mask = AT_TYPE | AT_FSID | AT_NODEID; 772 773 if (VOP_GETATTR(v1, &v1attr, ATTR_REAL, cr) != 0 || 774 VOP_GETATTR(v2, &v2attr, ATTR_REAL, cr) != 0) 775 return (0); 776 777 return (v1attr.va_fsid == v2attr.va_fsid && 778 v1attr.va_nodeid == v2attr.va_nodeid); 779 } 780 781 782 /* 783 * Find the entry in the directory corresponding to the target vnode. 784 */ 785 int 786 dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf, 787 size_t dlen, dirent64_t **rdp) 788 { 789 size_t dbuflen; 790 struct iovec iov; 791 struct uio uio; 792 int error; 793 int eof; 794 vnode_t *cmpvp; 795 struct dirent64 *dp; 796 pathname_t pnp; 797 798 ASSERT(dvp->v_type == VDIR); 799 800 /* 801 * This is necessary because of the strange semantics of VOP_LOOKUP(). 802 */ 803 bzero(&pnp, sizeof (pnp)); 804 805 eof = 0; 806 807 uio.uio_iov = &iov; 808 uio.uio_iovcnt = 1; 809 uio.uio_segflg = UIO_SYSSPACE; 810 uio.uio_fmode = 0; 811 uio.uio_extflg = UIO_COPY_CACHED; 812 uio.uio_loffset = 0; 813 814 if ((error = VOP_ACCESS(dvp, VREAD, 0, cr)) != 0) 815 return (error); 816 817 while (!eof) { 818 uio.uio_resid = dlen; 819 iov.iov_base = dbuf; 820 iov.iov_len = dlen; 821 822 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); 823 error = VOP_READDIR(dvp, &uio, cr, &eof); 824 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); 825 826 dbuflen = dlen - uio.uio_resid; 827 828 if (error || dbuflen == 0) 829 break; 830 831 dp = (dirent64_t *)dbuf; 832 while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) { 833 /* 834 * Ignore '.' and '..' entries 835 */ 836 if (strcmp(dp->d_name, ".") == 0 || 837 strcmp(dp->d_name, "..") == 0) { 838 dp = (dirent64_t *)((intptr_t)dp + 839 dp->d_reclen); 840 continue; 841 } 842 843 error = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0, 844 vrootp, cr); 845 846 /* 847 * We only want to bail out if there was an error other 848 * than ENOENT. Otherwise, it could be that someone 849 * just removed an entry since the readdir() call, and 850 * the entry we want is further on in the directory. 851 */ 852 if (error == 0) { 853 if (vnode_match(tvp, cmpvp, cr)) { 854 VN_RELE(cmpvp); 855 *rdp = dp; 856 return (0); 857 } 858 859 VN_RELE(cmpvp); 860 } else if (error != ENOENT) { 861 return (error); 862 } 863 864 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen); 865 } 866 } 867 868 /* 869 * Something strange has happened, this directory does not contain the 870 * specified vnode. This should never happen in the normal case, since 871 * we ensured that dvp is the parent of vp. This is possible in some 872 * rare conditions (races and the special .zfs directory). 873 */ 874 if (error == 0) { 875 error = VOP_LOOKUP(dvp, ".zfs", &cmpvp, &pnp, 0, vrootp, cr); 876 if (error == 0) { 877 if (vnode_match(tvp, cmpvp, cr)) { 878 (void) strcpy(dp->d_name, ".zfs"); 879 dp->d_reclen = strlen(".zfs"); 880 dp->d_off = 2; 881 dp->d_ino = 1; 882 *rdp = dp; 883 } else { 884 error = ENOENT; 885 } 886 VN_RELE(cmpvp); 887 } 888 } 889 890 return (error); 891 } 892 893 /* 894 * Given a global path (from rootdir), and a vnode that is the current root, 895 * return the portion of the path that is beneath the current root or NULL on 896 * failure. The path MUST be a resolved path (no '..' entries or symlinks), 897 * otherwise this function will fail. 898 */ 899 static char * 900 localpath(char *path, struct vnode *vrootp, cred_t *cr) 901 { 902 vnode_t *vp; 903 vnode_t *cvp; 904 char component[MAXNAMELEN]; 905 char *ret = NULL; 906 pathname_t pn; 907 908 /* 909 * We use vn_compare() instead of VN_CMP() in order to detect lofs 910 * mounts and stacked vnodes. 911 */ 912 if (vn_compare(vrootp, rootdir)) 913 return (path); 914 915 if (pn_get(path, UIO_SYSSPACE, &pn) != 0) 916 return (NULL); 917 918 vp = rootdir; 919 VN_HOLD(vp); 920 921 while (pn_pathleft(&pn)) { 922 pn_skipslash(&pn); 923 924 if (pn_getcomponent(&pn, component) != 0) 925 break; 926 927 if (vn_ismntpt(vp) && traverse(&vp) != 0) 928 break; 929 930 if (VOP_LOOKUP(vp, component, &cvp, &pn, 0, rootdir, cr) != 0) 931 break; 932 933 VN_RELE(vp); 934 vp = cvp; 935 936 if (vn_compare(vp, vrootp)) { 937 ret = path + (pn.pn_path - pn.pn_buf); 938 break; 939 } 940 } 941 942 VN_RELE(vp); 943 pn_free(&pn); 944 945 return (ret); 946 } 947 948 /* 949 * Given a directory, return the full, resolved path. This looks up "..", 950 * searches for the given vnode in the parent, appends the component, etc. It 951 * is used to implement vnodetopath() and getcwd() when the cached path fails 952 * (or vfs_vnode_path is not set). 953 */ 954 static int 955 dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr) 956 { 957 pathname_t pn, rpn, emptypn; 958 vnode_t *cmpvp, *pvp = NULL; 959 vnode_t *startvp = vp; 960 int err = 0; 961 size_t complen; 962 char *dbuf; 963 dirent64_t *dp; 964 char *bufloc; 965 size_t dlen = DIRENT64_RECLEN(MAXPATHLEN); 966 refstr_t *mntpt; 967 968 /* Operation only allowed on directories */ 969 ASSERT(vp->v_type == VDIR); 970 971 /* We must have at least enough space for "/" */ 972 if (buflen < 2) 973 return (ENAMETOOLONG); 974 975 /* Start at end of string with terminating null */ 976 bufloc = &buf[buflen - 1]; 977 *bufloc = '\0'; 978 979 pn_alloc(&pn); 980 pn_alloc(&rpn); 981 dbuf = kmem_alloc(dlen, KM_SLEEP); 982 bzero(&emptypn, sizeof (emptypn)); 983 984 /* 985 * Begin with an additional reference on vp. This will be decremented 986 * during the loop. 987 */ 988 VN_HOLD(vp); 989 990 for (;;) { 991 /* 992 * Return if we've reached the root. If the buffer is empty, 993 * return '/'. We explicitly don't use vn_compare(), since it 994 * compares the real vnodes. A lofs mount of '/' would produce 995 * incorrect results otherwise. 996 */ 997 if (VN_CMP(vrootp, vp)) { 998 if (*bufloc == '\0') 999 *--bufloc = '/'; 1000 break; 1001 } 1002 1003 /* 1004 * If we've reached the VFS root, something has gone wrong. We 1005 * should have reached the root in the above check. The only 1006 * explantation is that 'vp' is not contained withing the given 1007 * root, in which case we return EPERM. 1008 */ 1009 if (VN_CMP(rootdir, vp)) { 1010 err = EPERM; 1011 goto out; 1012 } 1013 1014 /* 1015 * Shortcut: see if this vnode is a mountpoint. If so, 1016 * grab the path information from the vfs_t. 1017 */ 1018 if (vp->v_flag & VROOT) { 1019 1020 mntpt = vfs_getmntpoint(vp->v_vfsp); 1021 if ((err = pn_set(&pn, (char *)refstr_value(mntpt))) 1022 == 0) { 1023 refstr_rele(mntpt); 1024 rpn.pn_path = rpn.pn_buf; 1025 1026 /* 1027 * Ensure the mointpoint still exists. 1028 */ 1029 VN_HOLD(vrootp); 1030 if (vrootp != rootdir) 1031 VN_HOLD(vrootp); 1032 if (lookuppnvp(&pn, &rpn, 0, NULL, 1033 &cmpvp, vrootp, vrootp, cr) == 0) { 1034 1035 if (VN_CMP(vp, cmpvp)) { 1036 VN_RELE(cmpvp); 1037 1038 complen = strlen(rpn.pn_path); 1039 bufloc -= complen; 1040 if (bufloc < buf) { 1041 err = ERANGE; 1042 goto out; 1043 } 1044 bcopy(rpn.pn_path, bufloc, 1045 complen); 1046 break; 1047 } else { 1048 VN_RELE(cmpvp); 1049 } 1050 } 1051 } else { 1052 refstr_rele(mntpt); 1053 } 1054 } 1055 1056 /* 1057 * Shortcuts failed, search for this vnode in its parent. If 1058 * this is a mountpoint, then get the vnode underneath. 1059 */ 1060 if (vp->v_flag & VROOT) 1061 vp = vn_under(vp); 1062 if ((err = VOP_LOOKUP(vp, "..", &pvp, &emptypn, 0, vrootp, cr)) 1063 != 0) 1064 goto out; 1065 1066 /* 1067 * With extended attributes, it's possible for a directory to 1068 * have a parent that is a regular file. Check for that here. 1069 */ 1070 if (pvp->v_type != VDIR) { 1071 err = ENOTDIR; 1072 goto out; 1073 } 1074 1075 /* 1076 * If this is true, something strange has happened. This is 1077 * only true if we are the root of a filesystem, which should 1078 * have been caught by the check above. 1079 */ 1080 if (VN_CMP(pvp, vp)) { 1081 err = ENOENT; 1082 goto out; 1083 } 1084 1085 /* 1086 * Search the parent directory for the entry corresponding to 1087 * this vnode. 1088 */ 1089 if ((err = dirfindvp(vrootp, pvp, vp, cr, dbuf, dlen, &dp)) 1090 != 0) 1091 goto out; 1092 complen = strlen(dp->d_name); 1093 bufloc -= complen; 1094 if (bufloc <= buf) { 1095 err = ENAMETOOLONG; 1096 goto out; 1097 } 1098 bcopy(dp->d_name, bufloc, complen); 1099 1100 /* Prepend a slash to the current path. */ 1101 *--bufloc = '/'; 1102 1103 /* And continue with the next component */ 1104 VN_RELE(vp); 1105 vp = pvp; 1106 pvp = NULL; 1107 } 1108 1109 /* 1110 * Place the path at the beginning of the buffer. 1111 */ 1112 if (bufloc != buf) 1113 ovbcopy(bufloc, buf, buflen - (bufloc - buf)); 1114 1115 out: 1116 /* 1117 * If the error was ESTALE and the current directory to look in 1118 * was the root for this lookup, the root for a mounted file 1119 * system, or the starting directory for lookups, then 1120 * return ENOENT instead of ESTALE. In this case, no recovery 1121 * is possible by the higher level. If ESTALE was returned for 1122 * some intermediate directory along the path, then recovery 1123 * is potentially possible and retrying from the higher level 1124 * will either correct the situation by purging stale cache 1125 * entries or eventually get back to the point where no recovery 1126 * is possible. 1127 */ 1128 if (err == ESTALE && 1129 (VN_CMP(vp, vrootp) || (vp->v_flag & VROOT) || vp == startvp)) 1130 err = ENOENT; 1131 1132 kmem_free(dbuf, dlen); 1133 VN_RELE(vp); 1134 if (pvp) 1135 VN_RELE(pvp); 1136 pn_free(&pn); 1137 pn_free(&rpn); 1138 1139 return (err); 1140 } 1141 1142 /* 1143 * The additional flag, LOOKUP_CHECKREAD, is ued to enforce artificial 1144 * constraints in order to be standards compliant. For example, if we have 1145 * the cached path of '/foo/bar', and '/foo' has permissions 100 (execute 1146 * only), then we can legitimately look up the path to the current working 1147 * directory without needing read permission. Existing standards tests, 1148 * however, assume that we are determining the path by repeatedly looking up 1149 * "..". We need to keep this behavior in order to maintain backwards 1150 * compatibility. 1151 */ 1152 static int 1153 vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, 1154 cred_t *cr, int flags) 1155 { 1156 pathname_t pn, rpn; 1157 int ret, len; 1158 vnode_t *compvp, *pvp, *realvp; 1159 proc_t *p = curproc; 1160 char path[MAXNAMELEN]; 1161 int doclose = 0; 1162 1163 /* 1164 * If vrootp is NULL, get the root for curproc. Callers with any other 1165 * requirements should pass in a different vrootp. 1166 */ 1167 if (vrootp == NULL) { 1168 mutex_enter(&p->p_lock); 1169 if ((vrootp = PTOU(p)->u_rdir) == NULL) 1170 vrootp = rootdir; 1171 VN_HOLD(vrootp); 1172 mutex_exit(&p->p_lock); 1173 } else { 1174 VN_HOLD(vrootp); 1175 } 1176 1177 /* 1178 * This is to get around an annoying artifact of the /proc filesystem, 1179 * which is the behavior of {cwd/root}. Trying to resolve this path 1180 * will result in /proc/pid/cwd instead of whatever the real working 1181 * directory is. We can't rely on VOP_REALVP(), since that will break 1182 * lofs. The only difference between procfs and lofs is that opening 1183 * the file will return the underling vnode in the case of procfs. 1184 */ 1185 if (vp->v_type == VDIR && VOP_REALVP(vp, &realvp) == 0 && 1186 realvp != vp) { 1187 VN_HOLD(vp); 1188 if (VOP_OPEN(&vp, FREAD, cr) == 0) 1189 doclose = 1; 1190 else 1191 VN_RELE(vp); 1192 } 1193 1194 pn_alloc(&pn); 1195 1196 /* 1197 * Check to see if we have a cached path in the vnode. 1198 */ 1199 mutex_enter(&vp->v_lock); 1200 if (vp->v_path != NULL) { 1201 (void) pn_set(&pn, vp->v_path); 1202 mutex_exit(&vp->v_lock); 1203 1204 pn_alloc(&rpn); 1205 1206 /* We should only cache absolute paths */ 1207 ASSERT(pn.pn_buf[0] == '/'); 1208 1209 /* 1210 * If we are in a zone or a chroot environment, then we have to 1211 * take additional steps, since the path to the root might not 1212 * be readable with the current credentials, even though the 1213 * process can legitmately access the file. In this case, we 1214 * do the following: 1215 * 1216 * lookuppnvp() with all privileges to get the resolved path. 1217 * call localpath() to get the local portion of the path, and 1218 * continue as normal. 1219 * 1220 * If the the conversion to a local path fails, then we continue 1221 * as normal. This is a heuristic to make process object file 1222 * paths available from within a zone. Because lofs doesn't 1223 * support page operations, the vnode stored in the seg_t is 1224 * actually the underlying real vnode, not the lofs node itself. 1225 * Most of the time, the lofs path is the same as the underlying 1226 * vnode (for example, /usr/lib/libc.so.1). 1227 */ 1228 if (vrootp != rootdir) { 1229 char *local = NULL; 1230 VN_HOLD(rootdir); 1231 if (lookuppnvp(&pn, &rpn, FOLLOW, 1232 NULL, &compvp, rootdir, rootdir, kcred) == 0) { 1233 local = localpath(rpn.pn_path, vrootp, 1234 kcred); 1235 VN_RELE(compvp); 1236 } 1237 1238 /* 1239 * The original pn was changed through lookuppnvp(), so 1240 * reset it. 1241 */ 1242 if (local) { 1243 (void) pn_set(&pn, local); 1244 } else { 1245 mutex_enter(&vp->v_lock); 1246 if (vp->v_path != NULL) { 1247 (void) pn_set(&pn, vp->v_path); 1248 mutex_exit(&vp->v_lock); 1249 } else { 1250 mutex_exit(&vp->v_lock); 1251 goto notcached; 1252 } 1253 } 1254 } 1255 1256 /* 1257 * We should have a local path at this point, so start the 1258 * search from the root of the current process. 1259 */ 1260 VN_HOLD(vrootp); 1261 if (vrootp != rootdir) 1262 VN_HOLD(vrootp); 1263 ret = lookuppnvp(&pn, &rpn, FOLLOW | flags, NULL, 1264 &compvp, vrootp, vrootp, cr); 1265 if (ret == 0) { 1266 /* 1267 * Check to see if the returned vnode is the same as 1268 * the one we expect. If not, give up. 1269 */ 1270 if (!vn_compare(vp, compvp) && 1271 !vnode_match(vp, compvp, cr)) { 1272 VN_RELE(compvp); 1273 goto notcached; 1274 } 1275 1276 VN_RELE(compvp); 1277 1278 /* 1279 * Return the result. 1280 */ 1281 if (buflen <= rpn.pn_pathlen) 1282 goto notcached; 1283 1284 bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1); 1285 pn_free(&pn); 1286 pn_free(&rpn); 1287 VN_RELE(vrootp); 1288 if (doclose) { 1289 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr); 1290 VN_RELE(vp); 1291 } 1292 return (0); 1293 } 1294 1295 notcached: 1296 pn_free(&rpn); 1297 } else { 1298 mutex_exit(&vp->v_lock); 1299 } 1300 1301 pn_free(&pn); 1302 1303 if (vp->v_type != VDIR) { 1304 /* 1305 * If we don't have a directory, try to find it in the dnlc via 1306 * reverse lookup. Once this is found, we can use the regular 1307 * directory search to find the full path. 1308 */ 1309 if ((pvp = dnlc_reverse_lookup(vp, path, MAXNAMELEN)) != NULL) { 1310 ret = dirtopath(vrootp, pvp, buf, buflen, cr); 1311 if (ret == 0) { 1312 len = strlen(buf); 1313 if (len + strlen(path) + 1 >= buflen) { 1314 ret = ENAMETOOLONG; 1315 } else { 1316 if (buf[len - 1] != '/') 1317 buf[len++] = '/'; 1318 bcopy(path, buf + len, 1319 strlen(path) + 1); 1320 } 1321 } 1322 1323 VN_RELE(pvp); 1324 } else 1325 ret = ENOENT; 1326 } else 1327 ret = dirtopath(vrootp, vp, buf, buflen, cr); 1328 1329 VN_RELE(vrootp); 1330 if (doclose) { 1331 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr); 1332 VN_RELE(vp); 1333 } 1334 1335 return (ret); 1336 } 1337 1338 int 1339 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr) 1340 { 1341 return (vnodetopath_common(vrootp, vp, buf, buflen, cr, 0)); 1342 } 1343 1344 int 1345 dogetcwd(char *buf, size_t buflen) 1346 { 1347 int ret; 1348 vnode_t *vp; 1349 vnode_t *compvp; 1350 refstr_t *cwd, *oldcwd; 1351 const char *value; 1352 pathname_t rpnp, pnp; 1353 proc_t *p = curproc; 1354 1355 /* 1356 * Check to see if there is a cached version of the cwd. If so, lookup 1357 * the cached value and make sure it is the same vnode. 1358 */ 1359 mutex_enter(&p->p_lock); 1360 if ((cwd = PTOU(p)->u_cwd) != NULL) 1361 refstr_hold(cwd); 1362 vp = PTOU(p)->u_cdir; 1363 VN_HOLD(vp); 1364 mutex_exit(&p->p_lock); 1365 1366 /* 1367 * Make sure we have permission to access the current directory. 1368 */ 1369 if ((ret = VOP_ACCESS(vp, VEXEC, 0, CRED())) != 0) { 1370 if (cwd != NULL) 1371 refstr_rele(cwd); 1372 VN_RELE(vp); 1373 return (ret); 1374 } 1375 1376 if (cwd) { 1377 value = refstr_value(cwd); 1378 if ((ret = pn_get((char *)value, UIO_SYSSPACE, &pnp)) != 0) { 1379 refstr_rele(cwd); 1380 VN_RELE(vp); 1381 return (ret); 1382 } 1383 1384 pn_alloc(&rpnp); 1385 1386 if (lookuppn(&pnp, &rpnp, NO_FOLLOW, NULL, &compvp) == 0) { 1387 1388 if (VN_CMP(vp, compvp) && 1389 strcmp(value, rpnp.pn_path) == 0) { 1390 VN_RELE(compvp); 1391 VN_RELE(vp); 1392 pn_free(&pnp); 1393 pn_free(&rpnp); 1394 if (strlen(value) + 1 > buflen) { 1395 refstr_rele(cwd); 1396 return (ENAMETOOLONG); 1397 } 1398 bcopy(value, buf, strlen(value) + 1); 1399 refstr_rele(cwd); 1400 return (0); 1401 } 1402 1403 VN_RELE(compvp); 1404 } 1405 1406 pn_free(&rpnp); 1407 pn_free(&pnp); 1408 1409 refstr_rele(cwd); 1410 } 1411 1412 ret = vnodetopath_common(NULL, vp, buf, buflen, CRED(), 1413 LOOKUP_CHECKREAD); 1414 1415 VN_RELE(vp); 1416 1417 /* 1418 * Store the new cwd and replace the existing cached copy. 1419 */ 1420 if (ret == 0) 1421 cwd = refstr_alloc(buf); 1422 else 1423 cwd = NULL; 1424 1425 mutex_enter(&p->p_lock); 1426 oldcwd = PTOU(p)->u_cwd; 1427 PTOU(p)->u_cwd = cwd; 1428 mutex_exit(&p->p_lock); 1429 1430 if (oldcwd) 1431 refstr_rele(oldcwd); 1432 1433 return (ret); 1434 } 1435