1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_ktrace.h" 41 #include "opt_mac.h" 42 #include "opt_vfs.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/namei.h> 50 #include <sys/vnode.h> 51 #include <sys/mount.h> 52 #include <sys/filedesc.h> 53 #include <sys/proc.h> 54 #include <sys/syscallsubr.h> 55 #include <sys/sysctl.h> 56 #ifdef KTRACE 57 #include <sys/ktrace.h> 58 #endif 59 60 #include <security/audit/audit.h> 61 #include <security/mac/mac_framework.h> 62 63 #include <vm/uma.h> 64 65 #define NAMEI_DIAGNOSTIC 1 66 #undef NAMEI_DIAGNOSTIC 67 68 /* 69 * Allocation zone for namei 70 */ 71 uma_zone_t namei_zone; 72 /* 73 * Placeholder vnode for mp traversal 74 */ 75 static struct vnode *vp_crossmp; 76 77 static void 78 nameiinit(void *dummy __unused) 79 { 80 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 81 UMA_ALIGN_PTR, 0); 82 getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); 83 vp_crossmp->v_vnlock->lk_flags &= ~LK_NOSHARE; 84 } 85 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL) 86 87 #ifdef LOOKUP_SHARED 88 static int lookup_shared = 1; 89 #else 90 static int lookup_shared = 0; 91 #endif 92 SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, 93 "Enables/Disables shared locks for path name translation"); 94 95 /* 96 * Convert a pathname into a pointer to a locked vnode. 97 * 98 * The FOLLOW flag is set when symbolic links are to be followed 99 * when they occur at the end of the name translation process. 100 * Symbolic links are always followed for all other pathname 101 * components other than the last. 102 * 103 * The segflg defines whether the name is to be copied from user 104 * space or kernel space. 105 * 106 * Overall outline of namei: 107 * 108 * copy in name 109 * get starting directory 110 * while (!done && !error) { 111 * call lookup to search path. 112 * if symbolic link, massage name in buffer and continue 113 * } 114 */ 115 int 116 namei(struct nameidata *ndp) 117 { 118 struct filedesc *fdp; /* pointer to file descriptor state */ 119 char *cp; /* pointer into pathname argument */ 120 struct vnode *dp; /* the directory we are searching */ 121 struct iovec aiov; /* uio for reading symbolic links */ 122 struct uio auio; 123 int error, linklen; 124 struct componentname *cnp = &ndp->ni_cnd; 125 struct thread *td = cnp->cn_thread; 126 struct proc *p = td->td_proc; 127 int vfslocked; 128 129 KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0, 130 ("NOT MPSAFE and Giant not held")); 131 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 132 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 133 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 134 ("namei: nameiop contaminated with flags")); 135 KASSERT((cnp->cn_flags & OPMASK) == 0, 136 ("namei: flags contaminated with nameiops")); 137 if (!lookup_shared) 138 cnp->cn_flags &= ~LOCKSHARED; 139 fdp = p->p_fd; 140 141 /* 142 * Get a buffer for the name to be translated, and copy the 143 * name into the buffer. 144 */ 145 if ((cnp->cn_flags & HASBUF) == 0) 146 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 147 if (ndp->ni_segflg == UIO_SYSSPACE) 148 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, 149 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 150 else 151 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, 152 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 153 154 /* If we are auditing the kernel pathname, save the user pathname. */ 155 if (cnp->cn_flags & AUDITVNODE1) 156 AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH1); 157 if (cnp->cn_flags & AUDITVNODE2) 158 AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH2); 159 160 /* 161 * Don't allow empty pathnames. 162 */ 163 if (!error && *cnp->cn_pnbuf == '\0') 164 error = ENOENT; 165 166 if (error) { 167 uma_zfree(namei_zone, cnp->cn_pnbuf); 168 #ifdef DIAGNOSTIC 169 cnp->cn_pnbuf = NULL; 170 cnp->cn_nameptr = NULL; 171 #endif 172 ndp->ni_vp = NULL; 173 return (error); 174 } 175 ndp->ni_loopcnt = 0; 176 #ifdef KTRACE 177 if (KTRPOINT(td, KTR_NAMEI)) { 178 KASSERT(cnp->cn_thread == curthread, 179 ("namei not using curthread")); 180 ktrnamei(cnp->cn_pnbuf); 181 } 182 #endif 183 184 /* 185 * Get starting point for the translation. 186 */ 187 FILEDESC_LOCK(fdp); 188 ndp->ni_rootdir = fdp->fd_rdir; 189 ndp->ni_topdir = fdp->fd_jdir; 190 191 dp = fdp->fd_cdir; 192 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 193 VREF(dp); 194 FILEDESC_UNLOCK(fdp); 195 for (;;) { 196 /* 197 * Check if root directory should replace current directory. 198 * Done at start of translation and after symbolic link. 199 */ 200 cnp->cn_nameptr = cnp->cn_pnbuf; 201 if (*(cnp->cn_nameptr) == '/') { 202 vrele(dp); 203 VFS_UNLOCK_GIANT(vfslocked); 204 while (*(cnp->cn_nameptr) == '/') { 205 cnp->cn_nameptr++; 206 ndp->ni_pathlen--; 207 } 208 dp = ndp->ni_rootdir; 209 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 210 VREF(dp); 211 } 212 if (vfslocked) 213 ndp->ni_cnd.cn_flags |= GIANTHELD; 214 ndp->ni_startdir = dp; 215 error = lookup(ndp); 216 if (error) { 217 uma_zfree(namei_zone, cnp->cn_pnbuf); 218 #ifdef DIAGNOSTIC 219 cnp->cn_pnbuf = NULL; 220 cnp->cn_nameptr = NULL; 221 #endif 222 return (error); 223 } 224 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 225 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 226 /* 227 * Check for symbolic link 228 */ 229 if ((cnp->cn_flags & ISSYMLINK) == 0) { 230 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 231 uma_zfree(namei_zone, cnp->cn_pnbuf); 232 #ifdef DIAGNOSTIC 233 cnp->cn_pnbuf = NULL; 234 cnp->cn_nameptr = NULL; 235 #endif 236 } else 237 cnp->cn_flags |= HASBUF; 238 239 if ((cnp->cn_flags & MPSAFE) == 0) { 240 VFS_UNLOCK_GIANT(vfslocked); 241 } else if (vfslocked) 242 ndp->ni_cnd.cn_flags |= GIANTHELD; 243 return (0); 244 } 245 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 246 error = ELOOP; 247 break; 248 } 249 #ifdef MAC 250 if ((cnp->cn_flags & NOMACCHECK) == 0) { 251 error = mac_check_vnode_readlink(td->td_ucred, 252 ndp->ni_vp); 253 if (error) 254 break; 255 } 256 #endif 257 if (ndp->ni_pathlen > 1) 258 cp = uma_zalloc(namei_zone, M_WAITOK); 259 else 260 cp = cnp->cn_pnbuf; 261 aiov.iov_base = cp; 262 aiov.iov_len = MAXPATHLEN; 263 auio.uio_iov = &aiov; 264 auio.uio_iovcnt = 1; 265 auio.uio_offset = 0; 266 auio.uio_rw = UIO_READ; 267 auio.uio_segflg = UIO_SYSSPACE; 268 auio.uio_td = (struct thread *)0; 269 auio.uio_resid = MAXPATHLEN; 270 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 271 if (error) { 272 if (ndp->ni_pathlen > 1) 273 uma_zfree(namei_zone, cp); 274 break; 275 } 276 linklen = MAXPATHLEN - auio.uio_resid; 277 if (linklen == 0) { 278 if (ndp->ni_pathlen > 1) 279 uma_zfree(namei_zone, cp); 280 error = ENOENT; 281 break; 282 } 283 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 284 if (ndp->ni_pathlen > 1) 285 uma_zfree(namei_zone, cp); 286 error = ENAMETOOLONG; 287 break; 288 } 289 if (ndp->ni_pathlen > 1) { 290 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 291 uma_zfree(namei_zone, cnp->cn_pnbuf); 292 cnp->cn_pnbuf = cp; 293 } else 294 cnp->cn_pnbuf[linklen] = '\0'; 295 ndp->ni_pathlen += linklen; 296 vput(ndp->ni_vp); 297 dp = ndp->ni_dvp; 298 } 299 uma_zfree(namei_zone, cnp->cn_pnbuf); 300 #ifdef DIAGNOSTIC 301 cnp->cn_pnbuf = NULL; 302 cnp->cn_nameptr = NULL; 303 #endif 304 vput(ndp->ni_vp); 305 ndp->ni_vp = NULL; 306 vrele(ndp->ni_dvp); 307 VFS_UNLOCK_GIANT(vfslocked); 308 return (error); 309 } 310 311 static int 312 compute_cn_lkflags(struct mount *mp, int lkflags) 313 { 314 if (mp == NULL || 315 ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { 316 lkflags &= ~LK_SHARED; 317 lkflags |= LK_EXCLUSIVE; 318 } 319 return lkflags; 320 } 321 322 /* 323 * Search a pathname. 324 * This is a very central and rather complicated routine. 325 * 326 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 327 * The starting directory is taken from ni_startdir. The pathname is 328 * descended until done, or a symbolic link is encountered. The variable 329 * ni_more is clear if the path is completed; it is set to one if a 330 * symbolic link needing interpretation is encountered. 331 * 332 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 333 * whether the name is to be looked up, created, renamed, or deleted. 334 * When CREATE, RENAME, or DELETE is specified, information usable in 335 * creating, renaming, or deleting a directory entry may be calculated. 336 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 337 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 338 * returned unlocked. Otherwise the parent directory is not returned. If 339 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 340 * the target is returned locked, otherwise it is returned unlocked. 341 * When creating or renaming and LOCKPARENT is specified, the target may not 342 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 343 * 344 * Overall outline of lookup: 345 * 346 * dirloop: 347 * identify next component of name at ndp->ni_ptr 348 * handle degenerate case where name is null string 349 * if .. and crossing mount points and on mounted filesys, find parent 350 * call VOP_LOOKUP routine for next component name 351 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 352 * component vnode returned in ni_vp (if it exists), locked. 353 * if result vnode is mounted on and crossing mount points, 354 * find mounted on vnode 355 * if more components of name, do next level at dirloop 356 * return the answer in ni_vp, locked if LOCKLEAF set 357 * if LOCKPARENT set, return locked parent in ni_dvp 358 * if WANTPARENT set, return unlocked parent in ni_dvp 359 */ 360 int 361 lookup(struct nameidata *ndp) 362 { 363 char *cp; /* pointer into pathname argument */ 364 struct vnode *dp = 0; /* the directory we are searching */ 365 struct vnode *tdp; /* saved dp */ 366 struct mount *mp; /* mount table entry */ 367 int docache; /* == 0 do not cache last component */ 368 int wantparent; /* 1 => wantparent or lockparent flag */ 369 int rdonly; /* lookup read-only flag bit */ 370 int trailing_slash; 371 int error = 0; 372 int dpunlocked = 0; /* dp has already been unlocked */ 373 struct componentname *cnp = &ndp->ni_cnd; 374 struct thread *td = cnp->cn_thread; 375 int vfslocked; /* VFS Giant state for child */ 376 int dvfslocked; /* VFS Giant state for parent */ 377 int tvfslocked; 378 int lkflags_save; 379 380 /* 381 * Setup: break out flag bits into variables. 382 */ 383 dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 384 vfslocked = 0; 385 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 386 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 387 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 388 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 389 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 390 if (cnp->cn_nameiop == DELETE || 391 (wantparent && cnp->cn_nameiop != CREATE && 392 cnp->cn_nameiop != LOOKUP)) 393 docache = 0; 394 rdonly = cnp->cn_flags & RDONLY; 395 cnp->cn_flags &= ~ISSYMLINK; 396 ndp->ni_dvp = NULL; 397 /* 398 * We use shared locks until we hit the parent of the last cn then 399 * we adjust based on the requesting flags. 400 */ 401 if (lookup_shared) 402 cnp->cn_lkflags = LK_SHARED; 403 else 404 cnp->cn_lkflags = LK_EXCLUSIVE; 405 dp = ndp->ni_startdir; 406 ndp->ni_startdir = NULLVP; 407 vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); 408 409 dirloop: 410 /* 411 * Search a new directory. 412 * 413 * The last component of the filename is left accessible via 414 * cnp->cn_nameptr for callers that need the name. Callers needing 415 * the name set the SAVENAME flag. When done, they assume 416 * responsibility for freeing the pathname buffer. 417 */ 418 cnp->cn_consume = 0; 419 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 420 continue; 421 cnp->cn_namelen = cp - cnp->cn_nameptr; 422 if (cnp->cn_namelen > NAME_MAX) { 423 error = ENAMETOOLONG; 424 goto bad; 425 } 426 #ifdef NAMEI_DIAGNOSTIC 427 { char c = *cp; 428 *cp = '\0'; 429 printf("{%s}: ", cnp->cn_nameptr); 430 *cp = c; } 431 #endif 432 ndp->ni_pathlen -= cnp->cn_namelen; 433 ndp->ni_next = cp; 434 435 /* 436 * Replace multiple slashes by a single slash and trailing slashes 437 * by a null. This must be done before VOP_LOOKUP() because some 438 * fs's don't know about trailing slashes. Remember if there were 439 * trailing slashes to handle symlinks, existing non-directories 440 * and non-existing files that won't be directories specially later. 441 */ 442 trailing_slash = 0; 443 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 444 cp++; 445 ndp->ni_pathlen--; 446 if (*cp == '\0') { 447 trailing_slash = 1; 448 *ndp->ni_next = '\0'; /* XXX for direnter() ... */ 449 } 450 } 451 ndp->ni_next = cp; 452 453 cnp->cn_flags |= MAKEENTRY; 454 if (*cp == '\0' && docache == 0) 455 cnp->cn_flags &= ~MAKEENTRY; 456 if (cnp->cn_namelen == 2 && 457 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 458 cnp->cn_flags |= ISDOTDOT; 459 else 460 cnp->cn_flags &= ~ISDOTDOT; 461 if (*ndp->ni_next == 0) 462 cnp->cn_flags |= ISLASTCN; 463 else 464 cnp->cn_flags &= ~ISLASTCN; 465 466 467 /* 468 * Check for degenerate name (e.g. / or "") 469 * which is a way of talking about a directory, 470 * e.g. like "/." or ".". 471 */ 472 if (cnp->cn_nameptr[0] == '\0') { 473 if (dp->v_type != VDIR) { 474 error = ENOTDIR; 475 goto bad; 476 } 477 if (cnp->cn_nameiop != LOOKUP) { 478 error = EISDIR; 479 goto bad; 480 } 481 if (wantparent) { 482 ndp->ni_dvp = dp; 483 VREF(dp); 484 } 485 ndp->ni_vp = dp; 486 487 if (cnp->cn_flags & AUDITVNODE1) 488 AUDIT_ARG(vnode, dp, ARG_VNODE1); 489 else if (cnp->cn_flags & AUDITVNODE2) 490 AUDIT_ARG(vnode, dp, ARG_VNODE2); 491 492 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 493 VOP_UNLOCK(dp, 0, td); 494 /* XXX This should probably move to the top of function. */ 495 if (cnp->cn_flags & SAVESTART) 496 panic("lookup: SAVESTART"); 497 goto success; 498 } 499 500 /* 501 * Handle "..": four special cases. 502 * 1. Return an error if this is the last component of 503 * the name and the operation is DELETE or RENAME. 504 * 2. If at root directory (e.g. after chroot) 505 * or at absolute root directory 506 * then ignore it so can't get out. 507 * 3. If this vnode is the root of a mounted 508 * filesystem, then replace it with the 509 * vnode which was mounted on so we take the 510 * .. in the other filesystem. 511 * 4. If the vnode is the top directory of 512 * the jail or chroot, don't let them out. 513 */ 514 if (cnp->cn_flags & ISDOTDOT) { 515 if ((cnp->cn_flags & ISLASTCN) != 0 && 516 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 517 error = EINVAL; 518 goto bad; 519 } 520 for (;;) { 521 if (dp == ndp->ni_rootdir || 522 dp == ndp->ni_topdir || 523 dp == rootvnode) { 524 ndp->ni_dvp = dp; 525 ndp->ni_vp = dp; 526 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 527 VREF(dp); 528 goto nextname; 529 } 530 if ((dp->v_vflag & VV_ROOT) == 0 || 531 (cnp->cn_flags & NOCROSSMOUNT)) 532 break; 533 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 534 error = EBADF; 535 goto bad; 536 } 537 tdp = dp; 538 dp = dp->v_mount->mnt_vnodecovered; 539 tvfslocked = dvfslocked; 540 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 541 VREF(dp); 542 vput(tdp); 543 VFS_UNLOCK_GIANT(tvfslocked); 544 vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); 545 } 546 } 547 548 /* 549 * We now have a segment name to search for, and a directory to search. 550 */ 551 unionlookup: 552 #ifdef MAC 553 if ((cnp->cn_flags & NOMACCHECK) == 0) { 554 error = mac_check_vnode_lookup(td->td_ucred, dp, cnp); 555 if (error) 556 goto bad; 557 } 558 #endif 559 ndp->ni_dvp = dp; 560 ndp->ni_vp = NULL; 561 ASSERT_VOP_LOCKED(dp, "lookup"); 562 VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked)); 563 /* 564 * If we have a shared lock we may need to upgrade the lock for the 565 * last operation. 566 */ 567 if (dp != vp_crossmp && 568 VOP_ISLOCKED(dp, td) == LK_SHARED && 569 (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) 570 vn_lock(dp, LK_UPGRADE|LK_RETRY, td); 571 /* 572 * If we're looking up the last component and we need an exclusive 573 * lock, adjust our lkflags. 574 */ 575 if ((cnp->cn_flags & (ISLASTCN|LOCKSHARED|LOCKLEAF)) == 576 (ISLASTCN|LOCKLEAF)) 577 cnp->cn_lkflags = LK_EXCLUSIVE; 578 #ifdef NAMEI_DIAGNOSTIC 579 vprint("lookup in", dp); 580 #endif 581 lkflags_save = cnp->cn_lkflags; 582 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags); 583 if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { 584 cnp->cn_lkflags = lkflags_save; 585 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 586 #ifdef NAMEI_DIAGNOSTIC 587 printf("not found\n"); 588 #endif 589 if ((error == ENOENT) && 590 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 591 (dp->v_mount->mnt_flag & MNT_UNION)) { 592 tdp = dp; 593 dp = dp->v_mount->mnt_vnodecovered; 594 tvfslocked = dvfslocked; 595 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 596 VREF(dp); 597 vput(tdp); 598 VFS_UNLOCK_GIANT(tvfslocked); 599 vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); 600 goto unionlookup; 601 } 602 603 if (error != EJUSTRETURN) 604 goto bad; 605 /* 606 * If creating and at end of pathname, then can consider 607 * allowing file to be created. 608 */ 609 if (rdonly) { 610 error = EROFS; 611 goto bad; 612 } 613 if (*cp == '\0' && trailing_slash && 614 !(cnp->cn_flags & WILLBEDIR)) { 615 error = ENOENT; 616 goto bad; 617 } 618 if ((cnp->cn_flags & LOCKPARENT) == 0) 619 VOP_UNLOCK(dp, 0, td); 620 /* 621 * This is a temporary assert to make sure I know what the 622 * behavior here was. 623 */ 624 KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, 625 ("lookup: Unhandled case.")); 626 /* 627 * We return with ni_vp NULL to indicate that the entry 628 * doesn't currently exist, leaving a pointer to the 629 * (possibly locked) directory vnode in ndp->ni_dvp. 630 */ 631 if (cnp->cn_flags & SAVESTART) { 632 ndp->ni_startdir = ndp->ni_dvp; 633 VREF(ndp->ni_startdir); 634 } 635 goto success; 636 } else 637 cnp->cn_lkflags = lkflags_save; 638 #ifdef NAMEI_DIAGNOSTIC 639 printf("found\n"); 640 #endif 641 /* 642 * Take into account any additional components consumed by 643 * the underlying filesystem. 644 */ 645 if (cnp->cn_consume > 0) { 646 cnp->cn_nameptr += cnp->cn_consume; 647 ndp->ni_next += cnp->cn_consume; 648 ndp->ni_pathlen -= cnp->cn_consume; 649 cnp->cn_consume = 0; 650 } 651 652 dp = ndp->ni_vp; 653 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 654 655 /* 656 * Check to see if the vnode has been mounted on; 657 * if so find the root of the mounted filesystem. 658 */ 659 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 660 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 661 if (vfs_busy(mp, 0, 0, td)) 662 continue; 663 vput(dp); 664 VFS_UNLOCK_GIANT(vfslocked); 665 vfslocked = VFS_LOCK_GIANT(mp); 666 if (dp != ndp->ni_dvp) 667 vput(ndp->ni_dvp); 668 else 669 vrele(ndp->ni_dvp); 670 VFS_UNLOCK_GIANT(dvfslocked); 671 dvfslocked = 0; 672 vref(vp_crossmp); 673 ndp->ni_dvp = vp_crossmp; 674 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), &tdp, td); 675 vfs_unbusy(mp, td); 676 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT, td)) 677 panic("vp_crossmp exclusively locked or reclaimed"); 678 if (error) { 679 dpunlocked = 1; 680 goto bad2; 681 } 682 ndp->ni_vp = dp = tdp; 683 } 684 685 /* 686 * Check for symbolic link 687 */ 688 if ((dp->v_type == VLNK) && 689 ((cnp->cn_flags & FOLLOW) || trailing_slash || 690 *ndp->ni_next == '/')) { 691 cnp->cn_flags |= ISSYMLINK; 692 if (dp->v_iflag & VI_DOOMED) { 693 /* We can't know whether the directory was mounted with 694 * NOSYMFOLLOW, so we can't follow safely. */ 695 error = EBADF; 696 goto bad2; 697 } 698 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 699 error = EACCES; 700 goto bad2; 701 } 702 /* 703 * Symlink code always expects an unlocked dvp. 704 */ 705 if (ndp->ni_dvp != ndp->ni_vp) 706 VOP_UNLOCK(ndp->ni_dvp, 0, td); 707 goto success; 708 } 709 710 /* 711 * Check for bogus trailing slashes. 712 */ 713 if (trailing_slash && dp->v_type != VDIR) { 714 error = ENOTDIR; 715 goto bad2; 716 } 717 718 nextname: 719 /* 720 * Not a symbolic link. If more pathname, 721 * continue at next component, else return. 722 */ 723 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 724 ("lookup: invalid path state.")); 725 if (*ndp->ni_next == '/') { 726 cnp->cn_nameptr = ndp->ni_next; 727 while (*cnp->cn_nameptr == '/') { 728 cnp->cn_nameptr++; 729 ndp->ni_pathlen--; 730 } 731 if (ndp->ni_dvp != dp) 732 vput(ndp->ni_dvp); 733 else 734 vrele(ndp->ni_dvp); 735 VFS_UNLOCK_GIANT(dvfslocked); 736 dvfslocked = vfslocked; /* dp becomes dvp in dirloop */ 737 vfslocked = 0; 738 goto dirloop; 739 } 740 /* 741 * Disallow directory write attempts on read-only filesystems. 742 */ 743 if (rdonly && 744 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 745 error = EROFS; 746 goto bad2; 747 } 748 if (cnp->cn_flags & SAVESTART) { 749 ndp->ni_startdir = ndp->ni_dvp; 750 VREF(ndp->ni_startdir); 751 } 752 if (!wantparent) { 753 if (ndp->ni_dvp != dp) 754 vput(ndp->ni_dvp); 755 else 756 vrele(ndp->ni_dvp); 757 VFS_UNLOCK_GIANT(dvfslocked); 758 dvfslocked = 0; 759 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) 760 VOP_UNLOCK(ndp->ni_dvp, 0, td); 761 762 if (cnp->cn_flags & AUDITVNODE1) 763 AUDIT_ARG(vnode, dp, ARG_VNODE1); 764 else if (cnp->cn_flags & AUDITVNODE2) 765 AUDIT_ARG(vnode, dp, ARG_VNODE2); 766 767 if ((cnp->cn_flags & LOCKLEAF) == 0) 768 VOP_UNLOCK(dp, 0, td); 769 success: 770 if (vfslocked && dvfslocked) 771 VFS_UNLOCK_GIANT(dvfslocked); /* Only need one */ 772 if (vfslocked || dvfslocked) 773 ndp->ni_cnd.cn_flags |= GIANTHELD; 774 return (0); 775 776 bad2: 777 if (dp != ndp->ni_dvp) 778 vput(ndp->ni_dvp); 779 else 780 vrele(ndp->ni_dvp); 781 bad: 782 if (!dpunlocked) 783 vput(dp); 784 VFS_UNLOCK_GIANT(vfslocked); 785 VFS_UNLOCK_GIANT(dvfslocked); 786 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 787 ndp->ni_vp = NULL; 788 return (error); 789 } 790 791 /* 792 * relookup - lookup a path name component 793 * Used by lookup to re-aquire things. 794 */ 795 int 796 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 797 { 798 struct thread *td = cnp->cn_thread; 799 struct vnode *dp = 0; /* the directory we are searching */ 800 int wantparent; /* 1 => wantparent or lockparent flag */ 801 int rdonly; /* lookup read-only flag bit */ 802 int error = 0; 803 804 KASSERT(cnp->cn_flags & ISLASTCN, 805 ("relookup: Not given last component.")); 806 /* 807 * Setup: break out flag bits into variables. 808 */ 809 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 810 KASSERT(wantparent, ("relookup: parent not wanted.")); 811 rdonly = cnp->cn_flags & RDONLY; 812 cnp->cn_flags &= ~ISSYMLINK; 813 dp = dvp; 814 cnp->cn_lkflags = LK_EXCLUSIVE; 815 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, td); 816 817 /* 818 * Search a new directory. 819 * 820 * The last component of the filename is left accessible via 821 * cnp->cn_nameptr for callers that need the name. Callers needing 822 * the name set the SAVENAME flag. When done, they assume 823 * responsibility for freeing the pathname buffer. 824 */ 825 #ifdef NAMEI_DIAGNOSTIC 826 printf("{%s}: ", cnp->cn_nameptr); 827 #endif 828 829 /* 830 * Check for degenerate name (e.g. / or "") 831 * which is a way of talking about a directory, 832 * e.g. like "/." or ".". 833 */ 834 if (cnp->cn_nameptr[0] == '\0') { 835 if (cnp->cn_nameiop != LOOKUP || wantparent) { 836 error = EISDIR; 837 goto bad; 838 } 839 if (dp->v_type != VDIR) { 840 error = ENOTDIR; 841 goto bad; 842 } 843 if (!(cnp->cn_flags & LOCKLEAF)) 844 VOP_UNLOCK(dp, 0, td); 845 *vpp = dp; 846 /* XXX This should probably move to the top of function. */ 847 if (cnp->cn_flags & SAVESTART) 848 panic("lookup: SAVESTART"); 849 return (0); 850 } 851 852 if (cnp->cn_flags & ISDOTDOT) 853 panic ("relookup: lookup on dot-dot"); 854 855 /* 856 * We now have a segment name to search for, and a directory to search. 857 */ 858 #ifdef NAMEI_DIAGNOSTIC 859 vprint("search in:", dp); 860 #endif 861 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 862 KASSERT(*vpp == NULL, ("leaf should be empty")); 863 if (error != EJUSTRETURN) 864 goto bad; 865 /* 866 * If creating and at end of pathname, then can consider 867 * allowing file to be created. 868 */ 869 if (rdonly) { 870 error = EROFS; 871 goto bad; 872 } 873 /* ASSERT(dvp == ndp->ni_startdir) */ 874 if (cnp->cn_flags & SAVESTART) 875 VREF(dvp); 876 if ((cnp->cn_flags & LOCKPARENT) == 0) 877 VOP_UNLOCK(dp, 0, td); 878 /* 879 * This is a temporary assert to make sure I know what the 880 * behavior here was. 881 */ 882 KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, 883 ("relookup: Unhandled case.")); 884 /* 885 * We return with ni_vp NULL to indicate that the entry 886 * doesn't currently exist, leaving a pointer to the 887 * (possibly locked) directory vnode in ndp->ni_dvp. 888 */ 889 return (0); 890 } 891 892 dp = *vpp; 893 894 /* 895 * Disallow directory write attempts on read-only filesystems. 896 */ 897 if (rdonly && 898 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 899 if (dvp == dp) 900 vrele(dvp); 901 else 902 vput(dvp); 903 error = EROFS; 904 goto bad; 905 } 906 /* 907 * Set the parent lock/ref state to the requested state. 908 */ 909 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 910 if (wantparent) 911 VOP_UNLOCK(dvp, 0, td); 912 else 913 vput(dvp); 914 } else if (!wantparent) 915 vrele(dvp); 916 /* 917 * Check for symbolic link 918 */ 919 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 920 ("relookup: symlink found.\n")); 921 922 /* ASSERT(dvp == ndp->ni_startdir) */ 923 if (cnp->cn_flags & SAVESTART) 924 VREF(dvp); 925 926 if ((cnp->cn_flags & LOCKLEAF) == 0) 927 VOP_UNLOCK(dp, 0, td); 928 return (0); 929 bad: 930 vput(dp); 931 *vpp = NULL; 932 return (error); 933 } 934 935 /* 936 * Free data allocated by namei(); see namei(9) for details. 937 */ 938 void 939 NDFREE(struct nameidata *ndp, const u_int flags) 940 { 941 int unlock_dvp; 942 int unlock_vp; 943 944 unlock_dvp = 0; 945 unlock_vp = 0; 946 947 if (!(flags & NDF_NO_FREE_PNBUF) && 948 (ndp->ni_cnd.cn_flags & HASBUF)) { 949 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 950 ndp->ni_cnd.cn_flags &= ~HASBUF; 951 } 952 if (!(flags & NDF_NO_VP_UNLOCK) && 953 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 954 unlock_vp = 1; 955 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 956 if (unlock_vp) { 957 vput(ndp->ni_vp); 958 unlock_vp = 0; 959 } else 960 vrele(ndp->ni_vp); 961 ndp->ni_vp = NULL; 962 } 963 if (unlock_vp) 964 VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_thread); 965 if (!(flags & NDF_NO_DVP_UNLOCK) && 966 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 967 ndp->ni_dvp != ndp->ni_vp) 968 unlock_dvp = 1; 969 if (!(flags & NDF_NO_DVP_RELE) && 970 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 971 if (unlock_dvp) { 972 vput(ndp->ni_dvp); 973 unlock_dvp = 0; 974 } else 975 vrele(ndp->ni_dvp); 976 ndp->ni_dvp = NULL; 977 } 978 if (unlock_dvp) 979 VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_thread); 980 if (!(flags & NDF_NO_STARTDIR_RELE) && 981 (ndp->ni_cnd.cn_flags & SAVESTART)) { 982 vrele(ndp->ni_startdir); 983 ndp->ni_startdir = NULL; 984 } 985 } 986 987 /* 988 * Determine if there is a suitable alternate filename under the specified 989 * prefix for the specified path. If the create flag is set, then the 990 * alternate prefix will be used so long as the parent directory exists. 991 * This is used by the various compatiblity ABIs so that Linux binaries prefer 992 * files under /compat/linux for example. The chosen path (whether under 993 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 994 * to by pathbuf. The caller is responsible for free'ing the buffer from 995 * the M_TEMP bucket if one is returned. 996 */ 997 int 998 kern_alternate_path(struct thread *td, const char *prefix, char *path, 999 enum uio_seg pathseg, char **pathbuf, int create) 1000 { 1001 struct nameidata nd, ndroot; 1002 char *ptr, *buf, *cp; 1003 size_t len, sz; 1004 int error; 1005 1006 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1007 *pathbuf = buf; 1008 1009 /* Copy the prefix into the new pathname as a starting point. */ 1010 len = strlcpy(buf, prefix, MAXPATHLEN); 1011 if (len >= MAXPATHLEN) { 1012 *pathbuf = NULL; 1013 free(buf, M_TEMP); 1014 return (EINVAL); 1015 } 1016 sz = MAXPATHLEN - len; 1017 ptr = buf + len; 1018 1019 /* Append the filename to the prefix. */ 1020 if (pathseg == UIO_SYSSPACE) 1021 error = copystr(path, ptr, sz, &len); 1022 else 1023 error = copyinstr(path, ptr, sz, &len); 1024 1025 if (error) { 1026 *pathbuf = NULL; 1027 free(buf, M_TEMP); 1028 return (error); 1029 } 1030 1031 /* Only use a prefix with absolute pathnames. */ 1032 if (*ptr != '/') { 1033 error = EINVAL; 1034 goto keeporig; 1035 } 1036 1037 /* 1038 * We know that there is a / somewhere in this pathname. 1039 * Search backwards for it, to find the file's parent dir 1040 * to see if it exists in the alternate tree. If it does, 1041 * and we want to create a file (cflag is set). We don't 1042 * need to worry about the root comparison in this case. 1043 */ 1044 1045 if (create) { 1046 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1047 *cp = '\0'; 1048 1049 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1050 error = namei(&nd); 1051 *cp = '/'; 1052 if (error != 0) 1053 goto keeporig; 1054 } else { 1055 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1056 1057 error = namei(&nd); 1058 if (error != 0) 1059 goto keeporig; 1060 1061 /* 1062 * We now compare the vnode of the prefix to the one 1063 * vnode asked. If they resolve to be the same, then we 1064 * ignore the match so that the real root gets used. 1065 * This avoids the problem of traversing "../.." to find the 1066 * root directory and never finding it, because "/" resolves 1067 * to the emulation root directory. This is expensive :-( 1068 */ 1069 NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix, 1070 td); 1071 1072 /* We shouldn't ever get an error from this namei(). */ 1073 error = namei(&ndroot); 1074 if (error == 0) { 1075 if (nd.ni_vp == ndroot.ni_vp) 1076 error = ENOENT; 1077 1078 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1079 vrele(ndroot.ni_vp); 1080 VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot)); 1081 } 1082 } 1083 1084 NDFREE(&nd, NDF_ONLY_PNBUF); 1085 vrele(nd.ni_vp); 1086 VFS_UNLOCK_GIANT(NDHASGIANT(&nd)); 1087 1088 keeporig: 1089 /* If there was an error, use the original path name. */ 1090 if (error) 1091 bcopy(ptr, buf, len); 1092 return (error); 1093 } 1094