1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_ktrace.h" 41 #include "opt_mac.h" 42 #include "opt_vfs.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/namei.h> 50 #include <sys/vnode.h> 51 #include <sys/mount.h> 52 #include <sys/filedesc.h> 53 #include <sys/proc.h> 54 #include <sys/syscallsubr.h> 55 #include <sys/sysctl.h> 56 #ifdef KTRACE 57 #include <sys/ktrace.h> 58 #endif 59 60 #include <security/audit/audit.h> 61 #include <security/mac/mac_framework.h> 62 63 #include <vm/uma.h> 64 65 #define NAMEI_DIAGNOSTIC 1 66 #undef NAMEI_DIAGNOSTIC 67 68 /* 69 * Allocation zone for namei 70 */ 71 uma_zone_t namei_zone; 72 /* 73 * Placeholder vnode for mp traversal 74 */ 75 static struct vnode *vp_crossmp; 76 77 static void 78 nameiinit(void *dummy __unused) 79 { 80 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 81 UMA_ALIGN_PTR, 0); 82 getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); 83 vp_crossmp->v_vnlock->lk_flags &= ~LK_NOSHARE; 84 } 85 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL) 86 87 #ifdef LOOKUP_SHARED 88 static int lookup_shared = 1; 89 #else 90 static int lookup_shared = 0; 91 #endif 92 SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, 93 "Enables/Disables shared locks for path name translation"); 94 95 /* 96 * Convert a pathname into a pointer to a locked vnode. 97 * 98 * The FOLLOW flag is set when symbolic links are to be followed 99 * when they occur at the end of the name translation process. 100 * Symbolic links are always followed for all other pathname 101 * components other than the last. 102 * 103 * The segflg defines whether the name is to be copied from user 104 * space or kernel space. 105 * 106 * Overall outline of namei: 107 * 108 * copy in name 109 * get starting directory 110 * while (!done && !error) { 111 * call lookup to search path. 112 * if symbolic link, massage name in buffer and continue 113 * } 114 */ 115 int 116 namei(struct nameidata *ndp) 117 { 118 struct filedesc *fdp; /* pointer to file descriptor state */ 119 char *cp; /* pointer into pathname argument */ 120 struct vnode *dp; /* the directory we are searching */ 121 struct iovec aiov; /* uio for reading symbolic links */ 122 struct uio auio; 123 int error, linklen; 124 struct componentname *cnp = &ndp->ni_cnd; 125 struct thread *td = cnp->cn_thread; 126 struct proc *p = td->td_proc; 127 int vfslocked; 128 129 KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0, 130 ("NOT MPSAFE and Giant not held")); 131 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 132 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 133 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 134 ("namei: nameiop contaminated with flags")); 135 KASSERT((cnp->cn_flags & OPMASK) == 0, 136 ("namei: flags contaminated with nameiops")); 137 if (!lookup_shared) 138 cnp->cn_flags &= ~LOCKSHARED; 139 fdp = p->p_fd; 140 141 /* 142 * Get a buffer for the name to be translated, and copy the 143 * name into the buffer. 144 */ 145 if ((cnp->cn_flags & HASBUF) == 0) 146 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 147 if (ndp->ni_segflg == UIO_SYSSPACE) 148 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, 149 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 150 else 151 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, 152 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 153 154 /* If we are auditing the kernel pathname, save the user pathname. */ 155 if (cnp->cn_flags & AUDITVNODE1) 156 AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH1); 157 if (cnp->cn_flags & AUDITVNODE2) 158 AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH2); 159 160 /* 161 * Don't allow empty pathnames. 162 */ 163 if (!error && *cnp->cn_pnbuf == '\0') 164 error = ENOENT; 165 166 if (error) { 167 uma_zfree(namei_zone, cnp->cn_pnbuf); 168 #ifdef DIAGNOSTIC 169 cnp->cn_pnbuf = NULL; 170 cnp->cn_nameptr = NULL; 171 #endif 172 ndp->ni_vp = NULL; 173 return (error); 174 } 175 ndp->ni_loopcnt = 0; 176 #ifdef KTRACE 177 if (KTRPOINT(td, KTR_NAMEI)) { 178 KASSERT(cnp->cn_thread == curthread, 179 ("namei not using curthread")); 180 ktrnamei(cnp->cn_pnbuf); 181 } 182 #endif 183 184 /* 185 * Get starting point for the translation. 186 */ 187 FILEDESC_LOCK(fdp); 188 ndp->ni_rootdir = fdp->fd_rdir; 189 ndp->ni_topdir = fdp->fd_jdir; 190 191 dp = fdp->fd_cdir; 192 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 193 VREF(dp); 194 FILEDESC_UNLOCK(fdp); 195 for (;;) { 196 /* 197 * Check if root directory should replace current directory. 198 * Done at start of translation and after symbolic link. 199 */ 200 cnp->cn_nameptr = cnp->cn_pnbuf; 201 if (*(cnp->cn_nameptr) == '/') { 202 vrele(dp); 203 VFS_UNLOCK_GIANT(vfslocked); 204 while (*(cnp->cn_nameptr) == '/') { 205 cnp->cn_nameptr++; 206 ndp->ni_pathlen--; 207 } 208 dp = ndp->ni_rootdir; 209 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 210 VREF(dp); 211 } 212 if (vfslocked) 213 ndp->ni_cnd.cn_flags |= GIANTHELD; 214 ndp->ni_startdir = dp; 215 error = lookup(ndp); 216 if (error) { 217 uma_zfree(namei_zone, cnp->cn_pnbuf); 218 #ifdef DIAGNOSTIC 219 cnp->cn_pnbuf = NULL; 220 cnp->cn_nameptr = NULL; 221 #endif 222 return (error); 223 } 224 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 225 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 226 /* 227 * Check for symbolic link 228 */ 229 if ((cnp->cn_flags & ISSYMLINK) == 0) { 230 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 231 uma_zfree(namei_zone, cnp->cn_pnbuf); 232 #ifdef DIAGNOSTIC 233 cnp->cn_pnbuf = NULL; 234 cnp->cn_nameptr = NULL; 235 #endif 236 } else 237 cnp->cn_flags |= HASBUF; 238 239 if ((cnp->cn_flags & MPSAFE) == 0) { 240 VFS_UNLOCK_GIANT(vfslocked); 241 } else if (vfslocked) 242 ndp->ni_cnd.cn_flags |= GIANTHELD; 243 return (0); 244 } 245 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 246 error = ELOOP; 247 break; 248 } 249 #ifdef MAC 250 if ((cnp->cn_flags & NOMACCHECK) == 0) { 251 error = mac_check_vnode_readlink(td->td_ucred, 252 ndp->ni_vp); 253 if (error) 254 break; 255 } 256 #endif 257 if (ndp->ni_pathlen > 1) 258 cp = uma_zalloc(namei_zone, M_WAITOK); 259 else 260 cp = cnp->cn_pnbuf; 261 aiov.iov_base = cp; 262 aiov.iov_len = MAXPATHLEN; 263 auio.uio_iov = &aiov; 264 auio.uio_iovcnt = 1; 265 auio.uio_offset = 0; 266 auio.uio_rw = UIO_READ; 267 auio.uio_segflg = UIO_SYSSPACE; 268 auio.uio_td = (struct thread *)0; 269 auio.uio_resid = MAXPATHLEN; 270 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 271 if (error) { 272 if (ndp->ni_pathlen > 1) 273 uma_zfree(namei_zone, cp); 274 break; 275 } 276 linklen = MAXPATHLEN - auio.uio_resid; 277 if (linklen == 0) { 278 if (ndp->ni_pathlen > 1) 279 uma_zfree(namei_zone, cp); 280 error = ENOENT; 281 break; 282 } 283 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 284 if (ndp->ni_pathlen > 1) 285 uma_zfree(namei_zone, cp); 286 error = ENAMETOOLONG; 287 break; 288 } 289 if (ndp->ni_pathlen > 1) { 290 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 291 uma_zfree(namei_zone, cnp->cn_pnbuf); 292 cnp->cn_pnbuf = cp; 293 } else 294 cnp->cn_pnbuf[linklen] = '\0'; 295 ndp->ni_pathlen += linklen; 296 vput(ndp->ni_vp); 297 dp = ndp->ni_dvp; 298 } 299 uma_zfree(namei_zone, cnp->cn_pnbuf); 300 #ifdef DIAGNOSTIC 301 cnp->cn_pnbuf = NULL; 302 cnp->cn_nameptr = NULL; 303 #endif 304 vput(ndp->ni_vp); 305 ndp->ni_vp = NULL; 306 vrele(ndp->ni_dvp); 307 VFS_UNLOCK_GIANT(vfslocked); 308 return (error); 309 } 310 311 static int 312 compute_cn_lkflags(struct mount *mp, int lkflags) 313 { 314 if (mp == NULL || 315 ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { 316 lkflags &= ~LK_SHARED; 317 lkflags |= LK_EXCLUSIVE; 318 } 319 return lkflags; 320 } 321 322 /* 323 * Search a pathname. 324 * This is a very central and rather complicated routine. 325 * 326 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 327 * The starting directory is taken from ni_startdir. The pathname is 328 * descended until done, or a symbolic link is encountered. The variable 329 * ni_more is clear if the path is completed; it is set to one if a 330 * symbolic link needing interpretation is encountered. 331 * 332 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 333 * whether the name is to be looked up, created, renamed, or deleted. 334 * When CREATE, RENAME, or DELETE is specified, information usable in 335 * creating, renaming, or deleting a directory entry may be calculated. 336 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 337 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 338 * returned unlocked. Otherwise the parent directory is not returned. If 339 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 340 * the target is returned locked, otherwise it is returned unlocked. 341 * When creating or renaming and LOCKPARENT is specified, the target may not 342 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 343 * 344 * Overall outline of lookup: 345 * 346 * dirloop: 347 * identify next component of name at ndp->ni_ptr 348 * handle degenerate case where name is null string 349 * if .. and crossing mount points and on mounted filesys, find parent 350 * call VOP_LOOKUP routine for next component name 351 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 352 * component vnode returned in ni_vp (if it exists), locked. 353 * if result vnode is mounted on and crossing mount points, 354 * find mounted on vnode 355 * if more components of name, do next level at dirloop 356 * return the answer in ni_vp, locked if LOCKLEAF set 357 * if LOCKPARENT set, return locked parent in ni_dvp 358 * if WANTPARENT set, return unlocked parent in ni_dvp 359 */ 360 int 361 lookup(struct nameidata *ndp) 362 { 363 char *cp; /* pointer into pathname argument */ 364 struct vnode *dp = 0; /* the directory we are searching */ 365 struct vnode *tdp; /* saved dp */ 366 struct mount *mp; /* mount table entry */ 367 int docache; /* == 0 do not cache last component */ 368 int wantparent; /* 1 => wantparent or lockparent flag */ 369 int rdonly; /* lookup read-only flag bit */ 370 int trailing_slash; 371 int error = 0; 372 int dpunlocked = 0; /* dp has already been unlocked */ 373 struct componentname *cnp = &ndp->ni_cnd; 374 struct thread *td = cnp->cn_thread; 375 int vfslocked; /* VFS Giant state for child */ 376 int dvfslocked; /* VFS Giant state for parent */ 377 int tvfslocked; 378 int lkflags_save; 379 380 /* 381 * Setup: break out flag bits into variables. 382 */ 383 dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 384 vfslocked = 0; 385 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 386 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 387 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 388 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 389 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 390 if (cnp->cn_nameiop == DELETE || 391 (wantparent && cnp->cn_nameiop != CREATE && 392 cnp->cn_nameiop != LOOKUP)) 393 docache = 0; 394 rdonly = cnp->cn_flags & RDONLY; 395 cnp->cn_flags &= ~ISSYMLINK; 396 ndp->ni_dvp = NULL; 397 /* 398 * We use shared locks until we hit the parent of the last cn then 399 * we adjust based on the requesting flags. 400 */ 401 if (lookup_shared) 402 cnp->cn_lkflags = LK_SHARED; 403 else 404 cnp->cn_lkflags = LK_EXCLUSIVE; 405 dp = ndp->ni_startdir; 406 ndp->ni_startdir = NULLVP; 407 vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); 408 409 dirloop: 410 /* 411 * Search a new directory. 412 * 413 * The last component of the filename is left accessible via 414 * cnp->cn_nameptr for callers that need the name. Callers needing 415 * the name set the SAVENAME flag. When done, they assume 416 * responsibility for freeing the pathname buffer. 417 */ 418 cnp->cn_consume = 0; 419 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 420 continue; 421 cnp->cn_namelen = cp - cnp->cn_nameptr; 422 if (cnp->cn_namelen > NAME_MAX) { 423 error = ENAMETOOLONG; 424 goto bad; 425 } 426 #ifdef NAMEI_DIAGNOSTIC 427 { char c = *cp; 428 *cp = '\0'; 429 printf("{%s}: ", cnp->cn_nameptr); 430 *cp = c; } 431 #endif 432 ndp->ni_pathlen -= cnp->cn_namelen; 433 ndp->ni_next = cp; 434 435 /* 436 * Replace multiple slashes by a single slash and trailing slashes 437 * by a null. This must be done before VOP_LOOKUP() because some 438 * fs's don't know about trailing slashes. Remember if there were 439 * trailing slashes to handle symlinks, existing non-directories 440 * and non-existing files that won't be directories specially later. 441 */ 442 trailing_slash = 0; 443 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 444 cp++; 445 ndp->ni_pathlen--; 446 if (*cp == '\0') { 447 trailing_slash = 1; 448 *ndp->ni_next = '\0'; /* XXX for direnter() ... */ 449 } 450 } 451 ndp->ni_next = cp; 452 453 cnp->cn_flags |= MAKEENTRY; 454 if (*cp == '\0' && docache == 0) 455 cnp->cn_flags &= ~MAKEENTRY; 456 if (cnp->cn_namelen == 2 && 457 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 458 cnp->cn_flags |= ISDOTDOT; 459 else 460 cnp->cn_flags &= ~ISDOTDOT; 461 if (*ndp->ni_next == 0) 462 cnp->cn_flags |= ISLASTCN; 463 else 464 cnp->cn_flags &= ~ISLASTCN; 465 466 467 /* 468 * Check for degenerate name (e.g. / or "") 469 * which is a way of talking about a directory, 470 * e.g. like "/." or ".". 471 */ 472 if (cnp->cn_nameptr[0] == '\0') { 473 if (dp->v_type != VDIR) { 474 error = ENOTDIR; 475 goto bad; 476 } 477 if (cnp->cn_nameiop != LOOKUP) { 478 error = EISDIR; 479 goto bad; 480 } 481 if (wantparent) { 482 ndp->ni_dvp = dp; 483 VREF(dp); 484 } 485 ndp->ni_vp = dp; 486 487 if (cnp->cn_flags & AUDITVNODE1) 488 AUDIT_ARG(vnode, dp, ARG_VNODE1); 489 else if (cnp->cn_flags & AUDITVNODE2) 490 AUDIT_ARG(vnode, dp, ARG_VNODE2); 491 492 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 493 VOP_UNLOCK(dp, 0, td); 494 /* XXX This should probably move to the top of function. */ 495 if (cnp->cn_flags & SAVESTART) 496 panic("lookup: SAVESTART"); 497 goto success; 498 } 499 500 /* 501 * Handle "..": four special cases. 502 * 1. Return an error if this is the last component of 503 * the name and the operation is DELETE or RENAME. 504 * 2. If at root directory (e.g. after chroot) 505 * or at absolute root directory 506 * then ignore it so can't get out. 507 * 3. If this vnode is the root of a mounted 508 * filesystem, then replace it with the 509 * vnode which was mounted on so we take the 510 * .. in the other filesystem. 511 * 4. If the vnode is the top directory of 512 * the jail or chroot, don't let them out. 513 */ 514 if (cnp->cn_flags & ISDOTDOT) { 515 if ((cnp->cn_flags & ISLASTCN) != 0 && 516 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 517 error = EINVAL; 518 goto bad; 519 } 520 for (;;) { 521 if (dp == ndp->ni_rootdir || 522 dp == ndp->ni_topdir || 523 dp == rootvnode || 524 ((dp->v_vflag & VV_ROOT) != 0 && 525 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 526 ndp->ni_dvp = dp; 527 ndp->ni_vp = dp; 528 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 529 VREF(dp); 530 goto nextname; 531 } 532 if ((dp->v_vflag & VV_ROOT) == 0) 533 break; 534 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 535 error = EBADF; 536 goto bad; 537 } 538 tdp = dp; 539 dp = dp->v_mount->mnt_vnodecovered; 540 tvfslocked = dvfslocked; 541 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 542 VREF(dp); 543 vput(tdp); 544 VFS_UNLOCK_GIANT(tvfslocked); 545 vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); 546 } 547 } 548 549 /* 550 * We now have a segment name to search for, and a directory to search. 551 */ 552 unionlookup: 553 #ifdef MAC 554 if ((cnp->cn_flags & NOMACCHECK) == 0) { 555 error = mac_check_vnode_lookup(td->td_ucred, dp, cnp); 556 if (error) 557 goto bad; 558 } 559 #endif 560 ndp->ni_dvp = dp; 561 ndp->ni_vp = NULL; 562 ASSERT_VOP_LOCKED(dp, "lookup"); 563 VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked)); 564 /* 565 * If we have a shared lock we may need to upgrade the lock for the 566 * last operation. 567 */ 568 if (dp != vp_crossmp && 569 VOP_ISLOCKED(dp, td) == LK_SHARED && 570 (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) 571 vn_lock(dp, LK_UPGRADE|LK_RETRY, td); 572 /* 573 * If we're looking up the last component and we need an exclusive 574 * lock, adjust our lkflags. 575 */ 576 if ((cnp->cn_flags & (ISLASTCN|LOCKSHARED|LOCKLEAF)) == 577 (ISLASTCN|LOCKLEAF)) 578 cnp->cn_lkflags = LK_EXCLUSIVE; 579 #ifdef NAMEI_DIAGNOSTIC 580 vprint("lookup in", dp); 581 #endif 582 lkflags_save = cnp->cn_lkflags; 583 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags); 584 if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { 585 cnp->cn_lkflags = lkflags_save; 586 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 587 #ifdef NAMEI_DIAGNOSTIC 588 printf("not found\n"); 589 #endif 590 if ((error == ENOENT) && 591 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 592 (dp->v_mount->mnt_flag & MNT_UNION)) { 593 tdp = dp; 594 dp = dp->v_mount->mnt_vnodecovered; 595 tvfslocked = dvfslocked; 596 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 597 VREF(dp); 598 vput(tdp); 599 VFS_UNLOCK_GIANT(tvfslocked); 600 vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); 601 goto unionlookup; 602 } 603 604 if (error != EJUSTRETURN) 605 goto bad; 606 /* 607 * If creating and at end of pathname, then can consider 608 * allowing file to be created. 609 */ 610 if (rdonly) { 611 error = EROFS; 612 goto bad; 613 } 614 if (*cp == '\0' && trailing_slash && 615 !(cnp->cn_flags & WILLBEDIR)) { 616 error = ENOENT; 617 goto bad; 618 } 619 if ((cnp->cn_flags & LOCKPARENT) == 0) 620 VOP_UNLOCK(dp, 0, td); 621 /* 622 * This is a temporary assert to make sure I know what the 623 * behavior here was. 624 */ 625 KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, 626 ("lookup: Unhandled case.")); 627 /* 628 * We return with ni_vp NULL to indicate that the entry 629 * doesn't currently exist, leaving a pointer to the 630 * (possibly locked) directory vnode in ndp->ni_dvp. 631 */ 632 if (cnp->cn_flags & SAVESTART) { 633 ndp->ni_startdir = ndp->ni_dvp; 634 VREF(ndp->ni_startdir); 635 } 636 goto success; 637 } else 638 cnp->cn_lkflags = lkflags_save; 639 #ifdef NAMEI_DIAGNOSTIC 640 printf("found\n"); 641 #endif 642 /* 643 * Take into account any additional components consumed by 644 * the underlying filesystem. 645 */ 646 if (cnp->cn_consume > 0) { 647 cnp->cn_nameptr += cnp->cn_consume; 648 ndp->ni_next += cnp->cn_consume; 649 ndp->ni_pathlen -= cnp->cn_consume; 650 cnp->cn_consume = 0; 651 } 652 653 dp = ndp->ni_vp; 654 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 655 656 /* 657 * Check to see if the vnode has been mounted on; 658 * if so find the root of the mounted filesystem. 659 */ 660 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 661 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 662 if (vfs_busy(mp, 0, 0, td)) 663 continue; 664 vput(dp); 665 VFS_UNLOCK_GIANT(vfslocked); 666 vfslocked = VFS_LOCK_GIANT(mp); 667 if (dp != ndp->ni_dvp) 668 vput(ndp->ni_dvp); 669 else 670 vrele(ndp->ni_dvp); 671 VFS_UNLOCK_GIANT(dvfslocked); 672 dvfslocked = 0; 673 vref(vp_crossmp); 674 ndp->ni_dvp = vp_crossmp; 675 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), &tdp, td); 676 vfs_unbusy(mp, td); 677 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT, td)) 678 panic("vp_crossmp exclusively locked or reclaimed"); 679 if (error) { 680 dpunlocked = 1; 681 goto bad2; 682 } 683 ndp->ni_vp = dp = tdp; 684 } 685 686 /* 687 * Check for symbolic link 688 */ 689 if ((dp->v_type == VLNK) && 690 ((cnp->cn_flags & FOLLOW) || trailing_slash || 691 *ndp->ni_next == '/')) { 692 cnp->cn_flags |= ISSYMLINK; 693 if (dp->v_iflag & VI_DOOMED) { 694 /* We can't know whether the directory was mounted with 695 * NOSYMFOLLOW, so we can't follow safely. */ 696 error = EBADF; 697 goto bad2; 698 } 699 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 700 error = EACCES; 701 goto bad2; 702 } 703 /* 704 * Symlink code always expects an unlocked dvp. 705 */ 706 if (ndp->ni_dvp != ndp->ni_vp) 707 VOP_UNLOCK(ndp->ni_dvp, 0, td); 708 goto success; 709 } 710 711 /* 712 * Check for bogus trailing slashes. 713 */ 714 if (trailing_slash && dp->v_type != VDIR) { 715 error = ENOTDIR; 716 goto bad2; 717 } 718 719 nextname: 720 /* 721 * Not a symbolic link. If more pathname, 722 * continue at next component, else return. 723 */ 724 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 725 ("lookup: invalid path state.")); 726 if (*ndp->ni_next == '/') { 727 cnp->cn_nameptr = ndp->ni_next; 728 while (*cnp->cn_nameptr == '/') { 729 cnp->cn_nameptr++; 730 ndp->ni_pathlen--; 731 } 732 if (ndp->ni_dvp != dp) 733 vput(ndp->ni_dvp); 734 else 735 vrele(ndp->ni_dvp); 736 VFS_UNLOCK_GIANT(dvfslocked); 737 dvfslocked = vfslocked; /* dp becomes dvp in dirloop */ 738 vfslocked = 0; 739 goto dirloop; 740 } 741 /* 742 * Disallow directory write attempts on read-only filesystems. 743 */ 744 if (rdonly && 745 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 746 error = EROFS; 747 goto bad2; 748 } 749 if (cnp->cn_flags & SAVESTART) { 750 ndp->ni_startdir = ndp->ni_dvp; 751 VREF(ndp->ni_startdir); 752 } 753 if (!wantparent) { 754 if (ndp->ni_dvp != dp) 755 vput(ndp->ni_dvp); 756 else 757 vrele(ndp->ni_dvp); 758 VFS_UNLOCK_GIANT(dvfslocked); 759 dvfslocked = 0; 760 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) 761 VOP_UNLOCK(ndp->ni_dvp, 0, td); 762 763 if (cnp->cn_flags & AUDITVNODE1) 764 AUDIT_ARG(vnode, dp, ARG_VNODE1); 765 else if (cnp->cn_flags & AUDITVNODE2) 766 AUDIT_ARG(vnode, dp, ARG_VNODE2); 767 768 if ((cnp->cn_flags & LOCKLEAF) == 0) 769 VOP_UNLOCK(dp, 0, td); 770 success: 771 if (vfslocked && dvfslocked) 772 VFS_UNLOCK_GIANT(dvfslocked); /* Only need one */ 773 if (vfslocked || dvfslocked) 774 ndp->ni_cnd.cn_flags |= GIANTHELD; 775 return (0); 776 777 bad2: 778 if (dp != ndp->ni_dvp) 779 vput(ndp->ni_dvp); 780 else 781 vrele(ndp->ni_dvp); 782 bad: 783 if (!dpunlocked) 784 vput(dp); 785 VFS_UNLOCK_GIANT(vfslocked); 786 VFS_UNLOCK_GIANT(dvfslocked); 787 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 788 ndp->ni_vp = NULL; 789 return (error); 790 } 791 792 /* 793 * relookup - lookup a path name component 794 * Used by lookup to re-aquire things. 795 */ 796 int 797 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 798 { 799 struct thread *td = cnp->cn_thread; 800 struct vnode *dp = 0; /* the directory we are searching */ 801 int wantparent; /* 1 => wantparent or lockparent flag */ 802 int rdonly; /* lookup read-only flag bit */ 803 int error = 0; 804 805 KASSERT(cnp->cn_flags & ISLASTCN, 806 ("relookup: Not given last component.")); 807 /* 808 * Setup: break out flag bits into variables. 809 */ 810 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 811 KASSERT(wantparent, ("relookup: parent not wanted.")); 812 rdonly = cnp->cn_flags & RDONLY; 813 cnp->cn_flags &= ~ISSYMLINK; 814 dp = dvp; 815 cnp->cn_lkflags = LK_EXCLUSIVE; 816 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, td); 817 818 /* 819 * Search a new directory. 820 * 821 * The last component of the filename is left accessible via 822 * cnp->cn_nameptr for callers that need the name. Callers needing 823 * the name set the SAVENAME flag. When done, they assume 824 * responsibility for freeing the pathname buffer. 825 */ 826 #ifdef NAMEI_DIAGNOSTIC 827 printf("{%s}: ", cnp->cn_nameptr); 828 #endif 829 830 /* 831 * Check for degenerate name (e.g. / or "") 832 * which is a way of talking about a directory, 833 * e.g. like "/." or ".". 834 */ 835 if (cnp->cn_nameptr[0] == '\0') { 836 if (cnp->cn_nameiop != LOOKUP || wantparent) { 837 error = EISDIR; 838 goto bad; 839 } 840 if (dp->v_type != VDIR) { 841 error = ENOTDIR; 842 goto bad; 843 } 844 if (!(cnp->cn_flags & LOCKLEAF)) 845 VOP_UNLOCK(dp, 0, td); 846 *vpp = dp; 847 /* XXX This should probably move to the top of function. */ 848 if (cnp->cn_flags & SAVESTART) 849 panic("lookup: SAVESTART"); 850 return (0); 851 } 852 853 if (cnp->cn_flags & ISDOTDOT) 854 panic ("relookup: lookup on dot-dot"); 855 856 /* 857 * We now have a segment name to search for, and a directory to search. 858 */ 859 #ifdef NAMEI_DIAGNOSTIC 860 vprint("search in:", dp); 861 #endif 862 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 863 KASSERT(*vpp == NULL, ("leaf should be empty")); 864 if (error != EJUSTRETURN) 865 goto bad; 866 /* 867 * If creating and at end of pathname, then can consider 868 * allowing file to be created. 869 */ 870 if (rdonly) { 871 error = EROFS; 872 goto bad; 873 } 874 /* ASSERT(dvp == ndp->ni_startdir) */ 875 if (cnp->cn_flags & SAVESTART) 876 VREF(dvp); 877 if ((cnp->cn_flags & LOCKPARENT) == 0) 878 VOP_UNLOCK(dp, 0, td); 879 /* 880 * This is a temporary assert to make sure I know what the 881 * behavior here was. 882 */ 883 KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, 884 ("relookup: Unhandled case.")); 885 /* 886 * We return with ni_vp NULL to indicate that the entry 887 * doesn't currently exist, leaving a pointer to the 888 * (possibly locked) directory vnode in ndp->ni_dvp. 889 */ 890 return (0); 891 } 892 893 dp = *vpp; 894 895 /* 896 * Disallow directory write attempts on read-only filesystems. 897 */ 898 if (rdonly && 899 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 900 if (dvp == dp) 901 vrele(dvp); 902 else 903 vput(dvp); 904 error = EROFS; 905 goto bad; 906 } 907 /* 908 * Set the parent lock/ref state to the requested state. 909 */ 910 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 911 if (wantparent) 912 VOP_UNLOCK(dvp, 0, td); 913 else 914 vput(dvp); 915 } else if (!wantparent) 916 vrele(dvp); 917 /* 918 * Check for symbolic link 919 */ 920 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 921 ("relookup: symlink found.\n")); 922 923 /* ASSERT(dvp == ndp->ni_startdir) */ 924 if (cnp->cn_flags & SAVESTART) 925 VREF(dvp); 926 927 if ((cnp->cn_flags & LOCKLEAF) == 0) 928 VOP_UNLOCK(dp, 0, td); 929 return (0); 930 bad: 931 vput(dp); 932 *vpp = NULL; 933 return (error); 934 } 935 936 /* 937 * Free data allocated by namei(); see namei(9) for details. 938 */ 939 void 940 NDFREE(struct nameidata *ndp, const u_int flags) 941 { 942 int unlock_dvp; 943 int unlock_vp; 944 945 unlock_dvp = 0; 946 unlock_vp = 0; 947 948 if (!(flags & NDF_NO_FREE_PNBUF) && 949 (ndp->ni_cnd.cn_flags & HASBUF)) { 950 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 951 ndp->ni_cnd.cn_flags &= ~HASBUF; 952 } 953 if (!(flags & NDF_NO_VP_UNLOCK) && 954 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 955 unlock_vp = 1; 956 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 957 if (unlock_vp) { 958 vput(ndp->ni_vp); 959 unlock_vp = 0; 960 } else 961 vrele(ndp->ni_vp); 962 ndp->ni_vp = NULL; 963 } 964 if (unlock_vp) 965 VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_thread); 966 if (!(flags & NDF_NO_DVP_UNLOCK) && 967 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 968 ndp->ni_dvp != ndp->ni_vp) 969 unlock_dvp = 1; 970 if (!(flags & NDF_NO_DVP_RELE) && 971 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 972 if (unlock_dvp) { 973 vput(ndp->ni_dvp); 974 unlock_dvp = 0; 975 } else 976 vrele(ndp->ni_dvp); 977 ndp->ni_dvp = NULL; 978 } 979 if (unlock_dvp) 980 VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_thread); 981 if (!(flags & NDF_NO_STARTDIR_RELE) && 982 (ndp->ni_cnd.cn_flags & SAVESTART)) { 983 vrele(ndp->ni_startdir); 984 ndp->ni_startdir = NULL; 985 } 986 } 987 988 /* 989 * Determine if there is a suitable alternate filename under the specified 990 * prefix for the specified path. If the create flag is set, then the 991 * alternate prefix will be used so long as the parent directory exists. 992 * This is used by the various compatiblity ABIs so that Linux binaries prefer 993 * files under /compat/linux for example. The chosen path (whether under 994 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 995 * to by pathbuf. The caller is responsible for free'ing the buffer from 996 * the M_TEMP bucket if one is returned. 997 */ 998 int 999 kern_alternate_path(struct thread *td, const char *prefix, char *path, 1000 enum uio_seg pathseg, char **pathbuf, int create) 1001 { 1002 struct nameidata nd, ndroot; 1003 char *ptr, *buf, *cp; 1004 size_t len, sz; 1005 int error; 1006 1007 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1008 *pathbuf = buf; 1009 1010 /* Copy the prefix into the new pathname as a starting point. */ 1011 len = strlcpy(buf, prefix, MAXPATHLEN); 1012 if (len >= MAXPATHLEN) { 1013 *pathbuf = NULL; 1014 free(buf, M_TEMP); 1015 return (EINVAL); 1016 } 1017 sz = MAXPATHLEN - len; 1018 ptr = buf + len; 1019 1020 /* Append the filename to the prefix. */ 1021 if (pathseg == UIO_SYSSPACE) 1022 error = copystr(path, ptr, sz, &len); 1023 else 1024 error = copyinstr(path, ptr, sz, &len); 1025 1026 if (error) { 1027 *pathbuf = NULL; 1028 free(buf, M_TEMP); 1029 return (error); 1030 } 1031 1032 /* Only use a prefix with absolute pathnames. */ 1033 if (*ptr != '/') { 1034 error = EINVAL; 1035 goto keeporig; 1036 } 1037 1038 /* 1039 * We know that there is a / somewhere in this pathname. 1040 * Search backwards for it, to find the file's parent dir 1041 * to see if it exists in the alternate tree. If it does, 1042 * and we want to create a file (cflag is set). We don't 1043 * need to worry about the root comparison in this case. 1044 */ 1045 1046 if (create) { 1047 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1048 *cp = '\0'; 1049 1050 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1051 error = namei(&nd); 1052 *cp = '/'; 1053 if (error != 0) 1054 goto keeporig; 1055 } else { 1056 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1057 1058 error = namei(&nd); 1059 if (error != 0) 1060 goto keeporig; 1061 1062 /* 1063 * We now compare the vnode of the prefix to the one 1064 * vnode asked. If they resolve to be the same, then we 1065 * ignore the match so that the real root gets used. 1066 * This avoids the problem of traversing "../.." to find the 1067 * root directory and never finding it, because "/" resolves 1068 * to the emulation root directory. This is expensive :-( 1069 */ 1070 NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix, 1071 td); 1072 1073 /* We shouldn't ever get an error from this namei(). */ 1074 error = namei(&ndroot); 1075 if (error == 0) { 1076 if (nd.ni_vp == ndroot.ni_vp) 1077 error = ENOENT; 1078 1079 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1080 vrele(ndroot.ni_vp); 1081 VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot)); 1082 } 1083 } 1084 1085 NDFREE(&nd, NDF_ONLY_PNBUF); 1086 vrele(nd.ni_vp); 1087 VFS_UNLOCK_GIANT(NDHASGIANT(&nd)); 1088 1089 keeporig: 1090 /* If there was an error, use the original path name. */ 1091 if (error) 1092 bcopy(ptr, buf, len); 1093 return (error); 1094 } 1095