1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_ktrace.h" 41 #include "opt_mac.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/namei.h> 50 #include <sys/vnode.h> 51 #include <sys/mount.h> 52 #include <sys/filedesc.h> 53 #include <sys/proc.h> 54 #include <sys/syscallsubr.h> 55 #include <sys/sysctl.h> 56 #ifdef KTRACE 57 #include <sys/ktrace.h> 58 #endif 59 60 #include <security/audit/audit.h> 61 #include <security/mac/mac_framework.h> 62 63 #include <vm/uma.h> 64 65 #define NAMEI_DIAGNOSTIC 1 66 #undef NAMEI_DIAGNOSTIC 67 68 /* 69 * Allocation zone for namei 70 */ 71 uma_zone_t namei_zone; 72 /* 73 * Placeholder vnode for mp traversal 74 */ 75 static struct vnode *vp_crossmp; 76 77 static void 78 nameiinit(void *dummy __unused) 79 { 80 int error; 81 82 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 83 UMA_ALIGN_PTR, 0); 84 error = getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); 85 if (error != 0) 86 panic("nameiinit: getnewvnode"); 87 VN_LOCK_ASHARE(vp_crossmp); 88 } 89 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); 90 91 static int lookup_shared = 1; 92 SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, 93 "Enables/Disables shared locks for path name translation"); 94 TUNABLE_INT("vfs.lookup_shared", &lookup_shared); 95 96 /* 97 * Convert a pathname into a pointer to a locked vnode. 98 * 99 * The FOLLOW flag is set when symbolic links are to be followed 100 * when they occur at the end of the name translation process. 101 * Symbolic links are always followed for all other pathname 102 * components other than the last. 103 * 104 * The segflg defines whether the name is to be copied from user 105 * space or kernel space. 106 * 107 * Overall outline of namei: 108 * 109 * copy in name 110 * get starting directory 111 * while (!done && !error) { 112 * call lookup to search path. 113 * if symbolic link, massage name in buffer and continue 114 * } 115 */ 116 int 117 namei(struct nameidata *ndp) 118 { 119 struct filedesc *fdp; /* pointer to file descriptor state */ 120 char *cp; /* pointer into pathname argument */ 121 struct vnode *dp; /* the directory we are searching */ 122 struct iovec aiov; /* uio for reading symbolic links */ 123 struct uio auio; 124 int error, linklen; 125 struct componentname *cnp = &ndp->ni_cnd; 126 struct thread *td = cnp->cn_thread; 127 struct proc *p = td->td_proc; 128 int vfslocked; 129 130 KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0, 131 ("NOT MPSAFE and Giant not held")); 132 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 133 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 134 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 135 ("namei: nameiop contaminated with flags")); 136 KASSERT((cnp->cn_flags & OPMASK) == 0, 137 ("namei: flags contaminated with nameiops")); 138 if (!lookup_shared) 139 cnp->cn_flags &= ~LOCKSHARED; 140 fdp = p->p_fd; 141 142 /* 143 * Get a buffer for the name to be translated, and copy the 144 * name into the buffer. 145 */ 146 if ((cnp->cn_flags & HASBUF) == 0) 147 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 148 if (ndp->ni_segflg == UIO_SYSSPACE) 149 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, 150 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 151 else 152 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, 153 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 154 155 /* If we are auditing the kernel pathname, save the user pathname. */ 156 if (cnp->cn_flags & AUDITVNODE1) 157 AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH1); 158 if (cnp->cn_flags & AUDITVNODE2) 159 AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH2); 160 161 /* 162 * Don't allow empty pathnames. 163 */ 164 if (!error && *cnp->cn_pnbuf == '\0') 165 error = ENOENT; 166 167 if (error) { 168 uma_zfree(namei_zone, cnp->cn_pnbuf); 169 #ifdef DIAGNOSTIC 170 cnp->cn_pnbuf = NULL; 171 cnp->cn_nameptr = NULL; 172 #endif 173 ndp->ni_vp = NULL; 174 return (error); 175 } 176 ndp->ni_loopcnt = 0; 177 #ifdef KTRACE 178 if (KTRPOINT(td, KTR_NAMEI)) { 179 KASSERT(cnp->cn_thread == curthread, 180 ("namei not using curthread")); 181 ktrnamei(cnp->cn_pnbuf); 182 } 183 #endif 184 185 /* 186 * Get starting point for the translation. 187 */ 188 FILEDESC_SLOCK(fdp); 189 ndp->ni_rootdir = fdp->fd_rdir; 190 ndp->ni_topdir = fdp->fd_jdir; 191 192 dp = NULL; 193 if (cnp->cn_pnbuf[0] != '/') { 194 if (ndp->ni_startdir != NULL) { 195 dp = ndp->ni_startdir; 196 error = 0; 197 } else if (ndp->ni_dirfd != AT_FDCWD) 198 error = fgetvp(td, ndp->ni_dirfd, &dp); 199 if (error != 0 || dp != NULL) { 200 FILEDESC_SUNLOCK(fdp); 201 if (error == 0 && dp->v_type != VDIR) { 202 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 203 vrele(dp); 204 VFS_UNLOCK_GIANT(vfslocked); 205 error = ENOTDIR; 206 } 207 } 208 if (error) { 209 uma_zfree(namei_zone, cnp->cn_pnbuf); 210 #ifdef DIAGNOSTIC 211 cnp->cn_pnbuf = NULL; 212 cnp->cn_nameptr = NULL; 213 #endif 214 return (error); 215 } 216 } 217 if (dp == NULL) { 218 dp = fdp->fd_cdir; 219 VREF(dp); 220 FILEDESC_SUNLOCK(fdp); 221 if (ndp->ni_startdir != NULL) { 222 vfslocked = VFS_LOCK_GIANT(ndp->ni_startdir->v_mount); 223 vrele(ndp->ni_startdir); 224 VFS_UNLOCK_GIANT(vfslocked); 225 } 226 } 227 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 228 for (;;) { 229 /* 230 * Check if root directory should replace current directory. 231 * Done at start of translation and after symbolic link. 232 */ 233 cnp->cn_nameptr = cnp->cn_pnbuf; 234 if (*(cnp->cn_nameptr) == '/') { 235 vrele(dp); 236 VFS_UNLOCK_GIANT(vfslocked); 237 while (*(cnp->cn_nameptr) == '/') { 238 cnp->cn_nameptr++; 239 ndp->ni_pathlen--; 240 } 241 dp = ndp->ni_rootdir; 242 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 243 VREF(dp); 244 } 245 if (vfslocked) 246 ndp->ni_cnd.cn_flags |= GIANTHELD; 247 ndp->ni_startdir = dp; 248 error = lookup(ndp); 249 if (error) { 250 uma_zfree(namei_zone, cnp->cn_pnbuf); 251 #ifdef DIAGNOSTIC 252 cnp->cn_pnbuf = NULL; 253 cnp->cn_nameptr = NULL; 254 #endif 255 return (error); 256 } 257 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 258 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 259 /* 260 * Check for symbolic link 261 */ 262 if ((cnp->cn_flags & ISSYMLINK) == 0) { 263 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 264 uma_zfree(namei_zone, cnp->cn_pnbuf); 265 #ifdef DIAGNOSTIC 266 cnp->cn_pnbuf = NULL; 267 cnp->cn_nameptr = NULL; 268 #endif 269 } else 270 cnp->cn_flags |= HASBUF; 271 272 if ((cnp->cn_flags & MPSAFE) == 0) { 273 VFS_UNLOCK_GIANT(vfslocked); 274 } else if (vfslocked) 275 ndp->ni_cnd.cn_flags |= GIANTHELD; 276 return (0); 277 } 278 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 279 error = ELOOP; 280 break; 281 } 282 #ifdef MAC 283 if ((cnp->cn_flags & NOMACCHECK) == 0) { 284 error = mac_vnode_check_readlink(td->td_ucred, 285 ndp->ni_vp); 286 if (error) 287 break; 288 } 289 #endif 290 if (ndp->ni_pathlen > 1) 291 cp = uma_zalloc(namei_zone, M_WAITOK); 292 else 293 cp = cnp->cn_pnbuf; 294 aiov.iov_base = cp; 295 aiov.iov_len = MAXPATHLEN; 296 auio.uio_iov = &aiov; 297 auio.uio_iovcnt = 1; 298 auio.uio_offset = 0; 299 auio.uio_rw = UIO_READ; 300 auio.uio_segflg = UIO_SYSSPACE; 301 auio.uio_td = (struct thread *)0; 302 auio.uio_resid = MAXPATHLEN; 303 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 304 if (error) { 305 if (ndp->ni_pathlen > 1) 306 uma_zfree(namei_zone, cp); 307 break; 308 } 309 linklen = MAXPATHLEN - auio.uio_resid; 310 if (linklen == 0) { 311 if (ndp->ni_pathlen > 1) 312 uma_zfree(namei_zone, cp); 313 error = ENOENT; 314 break; 315 } 316 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 317 if (ndp->ni_pathlen > 1) 318 uma_zfree(namei_zone, cp); 319 error = ENAMETOOLONG; 320 break; 321 } 322 if (ndp->ni_pathlen > 1) { 323 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 324 uma_zfree(namei_zone, cnp->cn_pnbuf); 325 cnp->cn_pnbuf = cp; 326 } else 327 cnp->cn_pnbuf[linklen] = '\0'; 328 ndp->ni_pathlen += linklen; 329 vput(ndp->ni_vp); 330 dp = ndp->ni_dvp; 331 } 332 uma_zfree(namei_zone, cnp->cn_pnbuf); 333 #ifdef DIAGNOSTIC 334 cnp->cn_pnbuf = NULL; 335 cnp->cn_nameptr = NULL; 336 #endif 337 vput(ndp->ni_vp); 338 ndp->ni_vp = NULL; 339 vrele(ndp->ni_dvp); 340 VFS_UNLOCK_GIANT(vfslocked); 341 return (error); 342 } 343 344 static int 345 compute_cn_lkflags(struct mount *mp, int lkflags) 346 { 347 348 if (mp == NULL || 349 ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { 350 lkflags &= ~LK_SHARED; 351 lkflags |= LK_EXCLUSIVE; 352 } 353 return (lkflags); 354 } 355 356 /* 357 * Search a pathname. 358 * This is a very central and rather complicated routine. 359 * 360 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 361 * The starting directory is taken from ni_startdir. The pathname is 362 * descended until done, or a symbolic link is encountered. The variable 363 * ni_more is clear if the path is completed; it is set to one if a 364 * symbolic link needing interpretation is encountered. 365 * 366 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 367 * whether the name is to be looked up, created, renamed, or deleted. 368 * When CREATE, RENAME, or DELETE is specified, information usable in 369 * creating, renaming, or deleting a directory entry may be calculated. 370 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 371 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 372 * returned unlocked. Otherwise the parent directory is not returned. If 373 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 374 * the target is returned locked, otherwise it is returned unlocked. 375 * When creating or renaming and LOCKPARENT is specified, the target may not 376 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 377 * 378 * Overall outline of lookup: 379 * 380 * dirloop: 381 * identify next component of name at ndp->ni_ptr 382 * handle degenerate case where name is null string 383 * if .. and crossing mount points and on mounted filesys, find parent 384 * call VOP_LOOKUP routine for next component name 385 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 386 * component vnode returned in ni_vp (if it exists), locked. 387 * if result vnode is mounted on and crossing mount points, 388 * find mounted on vnode 389 * if more components of name, do next level at dirloop 390 * return the answer in ni_vp, locked if LOCKLEAF set 391 * if LOCKPARENT set, return locked parent in ni_dvp 392 * if WANTPARENT set, return unlocked parent in ni_dvp 393 */ 394 int 395 lookup(struct nameidata *ndp) 396 { 397 char *cp; /* pointer into pathname argument */ 398 struct vnode *dp = 0; /* the directory we are searching */ 399 struct vnode *tdp; /* saved dp */ 400 struct mount *mp; /* mount table entry */ 401 int docache; /* == 0 do not cache last component */ 402 int wantparent; /* 1 => wantparent or lockparent flag */ 403 int rdonly; /* lookup read-only flag bit */ 404 int trailing_slash; 405 int error = 0; 406 int dpunlocked = 0; /* dp has already been unlocked */ 407 struct componentname *cnp = &ndp->ni_cnd; 408 struct thread *td = cnp->cn_thread; 409 int vfslocked; /* VFS Giant state for child */ 410 int dvfslocked; /* VFS Giant state for parent */ 411 int tvfslocked; 412 int lkflags_save; 413 414 /* 415 * Setup: break out flag bits into variables. 416 */ 417 dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 418 vfslocked = 0; 419 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 420 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 421 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 422 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 423 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 424 if (cnp->cn_nameiop == DELETE || 425 (wantparent && cnp->cn_nameiop != CREATE && 426 cnp->cn_nameiop != LOOKUP)) 427 docache = 0; 428 rdonly = cnp->cn_flags & RDONLY; 429 cnp->cn_flags &= ~ISSYMLINK; 430 ndp->ni_dvp = NULL; 431 /* 432 * We use shared locks until we hit the parent of the last cn then 433 * we adjust based on the requesting flags. 434 */ 435 if (lookup_shared) 436 cnp->cn_lkflags = LK_SHARED; 437 else 438 cnp->cn_lkflags = LK_EXCLUSIVE; 439 dp = ndp->ni_startdir; 440 ndp->ni_startdir = NULLVP; 441 vn_lock(dp, 442 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); 443 444 dirloop: 445 /* 446 * Search a new directory. 447 * 448 * The last component of the filename is left accessible via 449 * cnp->cn_nameptr for callers that need the name. Callers needing 450 * the name set the SAVENAME flag. When done, they assume 451 * responsibility for freeing the pathname buffer. 452 */ 453 cnp->cn_consume = 0; 454 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 455 continue; 456 cnp->cn_namelen = cp - cnp->cn_nameptr; 457 if (cnp->cn_namelen > NAME_MAX) { 458 error = ENAMETOOLONG; 459 goto bad; 460 } 461 #ifdef NAMEI_DIAGNOSTIC 462 { char c = *cp; 463 *cp = '\0'; 464 printf("{%s}: ", cnp->cn_nameptr); 465 *cp = c; } 466 #endif 467 ndp->ni_pathlen -= cnp->cn_namelen; 468 ndp->ni_next = cp; 469 470 /* 471 * Replace multiple slashes by a single slash and trailing slashes 472 * by a null. This must be done before VOP_LOOKUP() because some 473 * fs's don't know about trailing slashes. Remember if there were 474 * trailing slashes to handle symlinks, existing non-directories 475 * and non-existing files that won't be directories specially later. 476 */ 477 trailing_slash = 0; 478 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 479 cp++; 480 ndp->ni_pathlen--; 481 if (*cp == '\0') { 482 trailing_slash = 1; 483 *ndp->ni_next = '\0'; /* XXX for direnter() ... */ 484 } 485 } 486 ndp->ni_next = cp; 487 488 cnp->cn_flags |= MAKEENTRY; 489 if (*cp == '\0' && docache == 0) 490 cnp->cn_flags &= ~MAKEENTRY; 491 if (cnp->cn_namelen == 2 && 492 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 493 cnp->cn_flags |= ISDOTDOT; 494 else 495 cnp->cn_flags &= ~ISDOTDOT; 496 if (*ndp->ni_next == 0) 497 cnp->cn_flags |= ISLASTCN; 498 else 499 cnp->cn_flags &= ~ISLASTCN; 500 501 502 /* 503 * Check for degenerate name (e.g. / or "") 504 * which is a way of talking about a directory, 505 * e.g. like "/." or ".". 506 */ 507 if (cnp->cn_nameptr[0] == '\0') { 508 if (dp->v_type != VDIR) { 509 error = ENOTDIR; 510 goto bad; 511 } 512 if (cnp->cn_nameiop != LOOKUP) { 513 error = EISDIR; 514 goto bad; 515 } 516 if (wantparent) { 517 ndp->ni_dvp = dp; 518 VREF(dp); 519 } 520 ndp->ni_vp = dp; 521 522 if (cnp->cn_flags & AUDITVNODE1) 523 AUDIT_ARG(vnode, dp, ARG_VNODE1); 524 else if (cnp->cn_flags & AUDITVNODE2) 525 AUDIT_ARG(vnode, dp, ARG_VNODE2); 526 527 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 528 VOP_UNLOCK(dp, 0); 529 /* XXX This should probably move to the top of function. */ 530 if (cnp->cn_flags & SAVESTART) 531 panic("lookup: SAVESTART"); 532 goto success; 533 } 534 535 /* 536 * Handle "..": four special cases. 537 * 1. Return an error if this is the last component of 538 * the name and the operation is DELETE or RENAME. 539 * 2. If at root directory (e.g. after chroot) 540 * or at absolute root directory 541 * then ignore it so can't get out. 542 * 3. If this vnode is the root of a mounted 543 * filesystem, then replace it with the 544 * vnode which was mounted on so we take the 545 * .. in the other filesystem. 546 * 4. If the vnode is the top directory of 547 * the jail or chroot, don't let them out. 548 */ 549 if (cnp->cn_flags & ISDOTDOT) { 550 if ((cnp->cn_flags & ISLASTCN) != 0 && 551 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 552 error = EINVAL; 553 goto bad; 554 } 555 for (;;) { 556 if (dp == ndp->ni_rootdir || 557 dp == ndp->ni_topdir || 558 dp == rootvnode || 559 ((dp->v_vflag & VV_ROOT) != 0 && 560 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 561 ndp->ni_dvp = dp; 562 ndp->ni_vp = dp; 563 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 564 VREF(dp); 565 goto nextname; 566 } 567 if ((dp->v_vflag & VV_ROOT) == 0) 568 break; 569 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 570 error = EBADF; 571 goto bad; 572 } 573 tdp = dp; 574 dp = dp->v_mount->mnt_vnodecovered; 575 tvfslocked = dvfslocked; 576 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 577 VREF(dp); 578 vput(tdp); 579 VFS_UNLOCK_GIANT(tvfslocked); 580 vn_lock(dp, 581 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 582 LK_RETRY)); 583 } 584 } 585 586 /* 587 * We now have a segment name to search for, and a directory to search. 588 */ 589 unionlookup: 590 #ifdef MAC 591 if ((cnp->cn_flags & NOMACCHECK) == 0) { 592 error = mac_vnode_check_lookup(td->td_ucred, dp, cnp); 593 if (error) 594 goto bad; 595 } 596 #endif 597 ndp->ni_dvp = dp; 598 ndp->ni_vp = NULL; 599 ASSERT_VOP_LOCKED(dp, "lookup"); 600 VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked)); 601 /* 602 * If we have a shared lock we may need to upgrade the lock for the 603 * last operation. 604 */ 605 if (dp != vp_crossmp && 606 VOP_ISLOCKED(dp) == LK_SHARED && 607 (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) 608 vn_lock(dp, LK_UPGRADE|LK_RETRY); 609 /* 610 * If we're looking up the last component and we need an exclusive 611 * lock, adjust our lkflags. 612 */ 613 if ((cnp->cn_flags & (ISLASTCN|LOCKSHARED|LOCKLEAF)) == 614 (ISLASTCN|LOCKLEAF)) 615 cnp->cn_lkflags = LK_EXCLUSIVE; 616 #ifdef NAMEI_DIAGNOSTIC 617 vprint("lookup in", dp); 618 #endif 619 lkflags_save = cnp->cn_lkflags; 620 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags); 621 if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { 622 cnp->cn_lkflags = lkflags_save; 623 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 624 #ifdef NAMEI_DIAGNOSTIC 625 printf("not found\n"); 626 #endif 627 if ((error == ENOENT) && 628 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 629 (dp->v_mount->mnt_flag & MNT_UNION)) { 630 tdp = dp; 631 dp = dp->v_mount->mnt_vnodecovered; 632 tvfslocked = dvfslocked; 633 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 634 VREF(dp); 635 vput(tdp); 636 VFS_UNLOCK_GIANT(tvfslocked); 637 vn_lock(dp, 638 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 639 LK_RETRY)); 640 goto unionlookup; 641 } 642 643 if (error != EJUSTRETURN) 644 goto bad; 645 /* 646 * If creating and at end of pathname, then can consider 647 * allowing file to be created. 648 */ 649 if (rdonly) { 650 error = EROFS; 651 goto bad; 652 } 653 if (*cp == '\0' && trailing_slash && 654 !(cnp->cn_flags & WILLBEDIR)) { 655 error = ENOENT; 656 goto bad; 657 } 658 if ((cnp->cn_flags & LOCKPARENT) == 0) 659 VOP_UNLOCK(dp, 0); 660 /* 661 * This is a temporary assert to make sure I know what the 662 * behavior here was. 663 */ 664 KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, 665 ("lookup: Unhandled case.")); 666 /* 667 * We return with ni_vp NULL to indicate that the entry 668 * doesn't currently exist, leaving a pointer to the 669 * (possibly locked) directory vnode in ndp->ni_dvp. 670 */ 671 if (cnp->cn_flags & SAVESTART) { 672 ndp->ni_startdir = ndp->ni_dvp; 673 VREF(ndp->ni_startdir); 674 } 675 goto success; 676 } else 677 cnp->cn_lkflags = lkflags_save; 678 #ifdef NAMEI_DIAGNOSTIC 679 printf("found\n"); 680 #endif 681 /* 682 * Take into account any additional components consumed by 683 * the underlying filesystem. 684 */ 685 if (cnp->cn_consume > 0) { 686 cnp->cn_nameptr += cnp->cn_consume; 687 ndp->ni_next += cnp->cn_consume; 688 ndp->ni_pathlen -= cnp->cn_consume; 689 cnp->cn_consume = 0; 690 } 691 692 dp = ndp->ni_vp; 693 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 694 695 /* 696 * Check to see if the vnode has been mounted on; 697 * if so find the root of the mounted filesystem. 698 */ 699 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 700 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 701 if (vfs_busy(mp, 0)) 702 continue; 703 vput(dp); 704 VFS_UNLOCK_GIANT(vfslocked); 705 vfslocked = VFS_LOCK_GIANT(mp); 706 if (dp != ndp->ni_dvp) 707 vput(ndp->ni_dvp); 708 else 709 vrele(ndp->ni_dvp); 710 VFS_UNLOCK_GIANT(dvfslocked); 711 dvfslocked = 0; 712 vref(vp_crossmp); 713 ndp->ni_dvp = vp_crossmp; 714 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), &tdp, td); 715 vfs_unbusy(mp); 716 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) 717 panic("vp_crossmp exclusively locked or reclaimed"); 718 if (error) { 719 dpunlocked = 1; 720 goto bad2; 721 } 722 ndp->ni_vp = dp = tdp; 723 } 724 725 /* 726 * Check for symbolic link 727 */ 728 if ((dp->v_type == VLNK) && 729 ((cnp->cn_flags & FOLLOW) || trailing_slash || 730 *ndp->ni_next == '/')) { 731 cnp->cn_flags |= ISSYMLINK; 732 if (dp->v_iflag & VI_DOOMED) { 733 /* We can't know whether the directory was mounted with 734 * NOSYMFOLLOW, so we can't follow safely. */ 735 error = EBADF; 736 goto bad2; 737 } 738 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 739 error = EACCES; 740 goto bad2; 741 } 742 /* 743 * Symlink code always expects an unlocked dvp. 744 */ 745 if (ndp->ni_dvp != ndp->ni_vp) 746 VOP_UNLOCK(ndp->ni_dvp, 0); 747 goto success; 748 } 749 750 /* 751 * Check for bogus trailing slashes. 752 */ 753 if (trailing_slash && dp->v_type != VDIR) { 754 error = ENOTDIR; 755 goto bad2; 756 } 757 758 nextname: 759 /* 760 * Not a symbolic link. If more pathname, 761 * continue at next component, else return. 762 */ 763 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 764 ("lookup: invalid path state.")); 765 if (*ndp->ni_next == '/') { 766 cnp->cn_nameptr = ndp->ni_next; 767 while (*cnp->cn_nameptr == '/') { 768 cnp->cn_nameptr++; 769 ndp->ni_pathlen--; 770 } 771 if (ndp->ni_dvp != dp) 772 vput(ndp->ni_dvp); 773 else 774 vrele(ndp->ni_dvp); 775 VFS_UNLOCK_GIANT(dvfslocked); 776 dvfslocked = vfslocked; /* dp becomes dvp in dirloop */ 777 vfslocked = 0; 778 goto dirloop; 779 } 780 /* 781 * Disallow directory write attempts on read-only filesystems. 782 */ 783 if (rdonly && 784 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 785 error = EROFS; 786 goto bad2; 787 } 788 if (cnp->cn_flags & SAVESTART) { 789 ndp->ni_startdir = ndp->ni_dvp; 790 VREF(ndp->ni_startdir); 791 } 792 if (!wantparent) { 793 if (ndp->ni_dvp != dp) 794 vput(ndp->ni_dvp); 795 else 796 vrele(ndp->ni_dvp); 797 VFS_UNLOCK_GIANT(dvfslocked); 798 dvfslocked = 0; 799 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) 800 VOP_UNLOCK(ndp->ni_dvp, 0); 801 802 if (cnp->cn_flags & AUDITVNODE1) 803 AUDIT_ARG(vnode, dp, ARG_VNODE1); 804 else if (cnp->cn_flags & AUDITVNODE2) 805 AUDIT_ARG(vnode, dp, ARG_VNODE2); 806 807 if ((cnp->cn_flags & LOCKLEAF) == 0) 808 VOP_UNLOCK(dp, 0); 809 success: 810 /* 811 * Because of lookup_shared we may have the vnode shared locked, but 812 * the caller may want it to be exclusively locked. 813 */ 814 if ((cnp->cn_flags & (ISLASTCN | LOCKSHARED | LOCKLEAF)) == 815 (ISLASTCN | LOCKLEAF) && VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { 816 vn_lock(dp, LK_UPGRADE | LK_RETRY); 817 if (dp->v_iflag & VI_DOOMED) { 818 error = ENOENT; 819 goto bad2; 820 } 821 } 822 if (vfslocked && dvfslocked) 823 VFS_UNLOCK_GIANT(dvfslocked); /* Only need one */ 824 if (vfslocked || dvfslocked) 825 ndp->ni_cnd.cn_flags |= GIANTHELD; 826 return (0); 827 828 bad2: 829 if (dp != ndp->ni_dvp) 830 vput(ndp->ni_dvp); 831 else 832 vrele(ndp->ni_dvp); 833 bad: 834 if (!dpunlocked) 835 vput(dp); 836 VFS_UNLOCK_GIANT(vfslocked); 837 VFS_UNLOCK_GIANT(dvfslocked); 838 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 839 ndp->ni_vp = NULL; 840 return (error); 841 } 842 843 /* 844 * relookup - lookup a path name component 845 * Used by lookup to re-acquire things. 846 */ 847 int 848 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 849 { 850 struct vnode *dp = 0; /* the directory we are searching */ 851 int wantparent; /* 1 => wantparent or lockparent flag */ 852 int rdonly; /* lookup read-only flag bit */ 853 int error = 0; 854 855 KASSERT(cnp->cn_flags & ISLASTCN, 856 ("relookup: Not given last component.")); 857 /* 858 * Setup: break out flag bits into variables. 859 */ 860 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 861 KASSERT(wantparent, ("relookup: parent not wanted.")); 862 rdonly = cnp->cn_flags & RDONLY; 863 cnp->cn_flags &= ~ISSYMLINK; 864 dp = dvp; 865 cnp->cn_lkflags = LK_EXCLUSIVE; 866 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 867 868 /* 869 * Search a new directory. 870 * 871 * The last component of the filename is left accessible via 872 * cnp->cn_nameptr for callers that need the name. Callers needing 873 * the name set the SAVENAME flag. When done, they assume 874 * responsibility for freeing the pathname buffer. 875 */ 876 #ifdef NAMEI_DIAGNOSTIC 877 printf("{%s}: ", cnp->cn_nameptr); 878 #endif 879 880 /* 881 * Check for degenerate name (e.g. / or "") 882 * which is a way of talking about a directory, 883 * e.g. like "/." or ".". 884 */ 885 if (cnp->cn_nameptr[0] == '\0') { 886 if (cnp->cn_nameiop != LOOKUP || wantparent) { 887 error = EISDIR; 888 goto bad; 889 } 890 if (dp->v_type != VDIR) { 891 error = ENOTDIR; 892 goto bad; 893 } 894 if (!(cnp->cn_flags & LOCKLEAF)) 895 VOP_UNLOCK(dp, 0); 896 *vpp = dp; 897 /* XXX This should probably move to the top of function. */ 898 if (cnp->cn_flags & SAVESTART) 899 panic("lookup: SAVESTART"); 900 return (0); 901 } 902 903 if (cnp->cn_flags & ISDOTDOT) 904 panic ("relookup: lookup on dot-dot"); 905 906 /* 907 * We now have a segment name to search for, and a directory to search. 908 */ 909 #ifdef NAMEI_DIAGNOSTIC 910 vprint("search in:", dp); 911 #endif 912 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 913 KASSERT(*vpp == NULL, ("leaf should be empty")); 914 if (error != EJUSTRETURN) 915 goto bad; 916 /* 917 * If creating and at end of pathname, then can consider 918 * allowing file to be created. 919 */ 920 if (rdonly) { 921 error = EROFS; 922 goto bad; 923 } 924 /* ASSERT(dvp == ndp->ni_startdir) */ 925 if (cnp->cn_flags & SAVESTART) 926 VREF(dvp); 927 if ((cnp->cn_flags & LOCKPARENT) == 0) 928 VOP_UNLOCK(dp, 0); 929 /* 930 * This is a temporary assert to make sure I know what the 931 * behavior here was. 932 */ 933 KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, 934 ("relookup: Unhandled case.")); 935 /* 936 * We return with ni_vp NULL to indicate that the entry 937 * doesn't currently exist, leaving a pointer to the 938 * (possibly locked) directory vnode in ndp->ni_dvp. 939 */ 940 return (0); 941 } 942 943 dp = *vpp; 944 945 /* 946 * Disallow directory write attempts on read-only filesystems. 947 */ 948 if (rdonly && 949 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 950 if (dvp == dp) 951 vrele(dvp); 952 else 953 vput(dvp); 954 error = EROFS; 955 goto bad; 956 } 957 /* 958 * Set the parent lock/ref state to the requested state. 959 */ 960 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 961 if (wantparent) 962 VOP_UNLOCK(dvp, 0); 963 else 964 vput(dvp); 965 } else if (!wantparent) 966 vrele(dvp); 967 /* 968 * Check for symbolic link 969 */ 970 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 971 ("relookup: symlink found.\n")); 972 973 /* ASSERT(dvp == ndp->ni_startdir) */ 974 if (cnp->cn_flags & SAVESTART) 975 VREF(dvp); 976 977 if ((cnp->cn_flags & LOCKLEAF) == 0) 978 VOP_UNLOCK(dp, 0); 979 return (0); 980 bad: 981 vput(dp); 982 *vpp = NULL; 983 return (error); 984 } 985 986 /* 987 * Free data allocated by namei(); see namei(9) for details. 988 */ 989 void 990 NDFREE(struct nameidata *ndp, const u_int flags) 991 { 992 int unlock_dvp; 993 int unlock_vp; 994 995 unlock_dvp = 0; 996 unlock_vp = 0; 997 998 if (!(flags & NDF_NO_FREE_PNBUF) && 999 (ndp->ni_cnd.cn_flags & HASBUF)) { 1000 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 1001 ndp->ni_cnd.cn_flags &= ~HASBUF; 1002 } 1003 if (!(flags & NDF_NO_VP_UNLOCK) && 1004 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 1005 unlock_vp = 1; 1006 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 1007 if (unlock_vp) { 1008 vput(ndp->ni_vp); 1009 unlock_vp = 0; 1010 } else 1011 vrele(ndp->ni_vp); 1012 ndp->ni_vp = NULL; 1013 } 1014 if (unlock_vp) 1015 VOP_UNLOCK(ndp->ni_vp, 0); 1016 if (!(flags & NDF_NO_DVP_UNLOCK) && 1017 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 1018 ndp->ni_dvp != ndp->ni_vp) 1019 unlock_dvp = 1; 1020 if (!(flags & NDF_NO_DVP_RELE) && 1021 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 1022 if (unlock_dvp) { 1023 vput(ndp->ni_dvp); 1024 unlock_dvp = 0; 1025 } else 1026 vrele(ndp->ni_dvp); 1027 ndp->ni_dvp = NULL; 1028 } 1029 if (unlock_dvp) 1030 VOP_UNLOCK(ndp->ni_dvp, 0); 1031 if (!(flags & NDF_NO_STARTDIR_RELE) && 1032 (ndp->ni_cnd.cn_flags & SAVESTART)) { 1033 vrele(ndp->ni_startdir); 1034 ndp->ni_startdir = NULL; 1035 } 1036 } 1037 1038 /* 1039 * Determine if there is a suitable alternate filename under the specified 1040 * prefix for the specified path. If the create flag is set, then the 1041 * alternate prefix will be used so long as the parent directory exists. 1042 * This is used by the various compatiblity ABIs so that Linux binaries prefer 1043 * files under /compat/linux for example. The chosen path (whether under 1044 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 1045 * to by pathbuf. The caller is responsible for free'ing the buffer from 1046 * the M_TEMP bucket if one is returned. 1047 */ 1048 int 1049 kern_alternate_path(struct thread *td, const char *prefix, const char *path, 1050 enum uio_seg pathseg, char **pathbuf, int create, int dirfd) 1051 { 1052 struct nameidata nd, ndroot; 1053 char *ptr, *buf, *cp; 1054 size_t len, sz; 1055 int error; 1056 1057 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1058 *pathbuf = buf; 1059 1060 /* Copy the prefix into the new pathname as a starting point. */ 1061 len = strlcpy(buf, prefix, MAXPATHLEN); 1062 if (len >= MAXPATHLEN) { 1063 *pathbuf = NULL; 1064 free(buf, M_TEMP); 1065 return (EINVAL); 1066 } 1067 sz = MAXPATHLEN - len; 1068 ptr = buf + len; 1069 1070 /* Append the filename to the prefix. */ 1071 if (pathseg == UIO_SYSSPACE) 1072 error = copystr(path, ptr, sz, &len); 1073 else 1074 error = copyinstr(path, ptr, sz, &len); 1075 1076 if (error) { 1077 *pathbuf = NULL; 1078 free(buf, M_TEMP); 1079 return (error); 1080 } 1081 1082 /* Only use a prefix with absolute pathnames. */ 1083 if (*ptr != '/') { 1084 error = EINVAL; 1085 goto keeporig; 1086 } 1087 1088 if (dirfd != AT_FDCWD) { 1089 /* 1090 * We want the original because the "prefix" is 1091 * included in the already opened dirfd. 1092 */ 1093 bcopy(ptr, buf, len); 1094 return (0); 1095 } 1096 1097 /* 1098 * We know that there is a / somewhere in this pathname. 1099 * Search backwards for it, to find the file's parent dir 1100 * to see if it exists in the alternate tree. If it does, 1101 * and we want to create a file (cflag is set). We don't 1102 * need to worry about the root comparison in this case. 1103 */ 1104 1105 if (create) { 1106 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1107 *cp = '\0'; 1108 1109 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1110 error = namei(&nd); 1111 *cp = '/'; 1112 if (error != 0) 1113 goto keeporig; 1114 } else { 1115 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1116 1117 error = namei(&nd); 1118 if (error != 0) 1119 goto keeporig; 1120 1121 /* 1122 * We now compare the vnode of the prefix to the one 1123 * vnode asked. If they resolve to be the same, then we 1124 * ignore the match so that the real root gets used. 1125 * This avoids the problem of traversing "../.." to find the 1126 * root directory and never finding it, because "/" resolves 1127 * to the emulation root directory. This is expensive :-( 1128 */ 1129 NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix, 1130 td); 1131 1132 /* We shouldn't ever get an error from this namei(). */ 1133 error = namei(&ndroot); 1134 if (error == 0) { 1135 if (nd.ni_vp == ndroot.ni_vp) 1136 error = ENOENT; 1137 1138 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1139 vrele(ndroot.ni_vp); 1140 VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot)); 1141 } 1142 } 1143 1144 NDFREE(&nd, NDF_ONLY_PNBUF); 1145 vrele(nd.ni_vp); 1146 VFS_UNLOCK_GIANT(NDHASGIANT(&nd)); 1147 1148 keeporig: 1149 /* If there was an error, use the original path name. */ 1150 if (error) 1151 bcopy(ptr, buf, len); 1152 return (error); 1153 } 1154