1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_kdtrace.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/jail.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/namei.h> 51 #include <sys/vnode.h> 52 #include <sys/mount.h> 53 #include <sys/filedesc.h> 54 #include <sys/proc.h> 55 #include <sys/sdt.h> 56 #include <sys/syscallsubr.h> 57 #include <sys/sysctl.h> 58 #ifdef KTRACE 59 #include <sys/ktrace.h> 60 #endif 61 62 #include <security/audit/audit.h> 63 #include <security/mac/mac_framework.h> 64 65 #include <vm/uma.h> 66 67 #define NAMEI_DIAGNOSTIC 1 68 #undef NAMEI_DIAGNOSTIC 69 70 SDT_PROVIDER_DECLARE(vfs); 71 SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *", 72 "unsigned long"); 73 SDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *"); 74 75 /* 76 * Allocation zone for namei 77 */ 78 uma_zone_t namei_zone; 79 /* 80 * Placeholder vnode for mp traversal 81 */ 82 static struct vnode *vp_crossmp; 83 84 static void 85 nameiinit(void *dummy __unused) 86 { 87 int error; 88 89 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 90 UMA_ALIGN_PTR, 0); 91 error = getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); 92 if (error != 0) 93 panic("nameiinit: getnewvnode"); 94 VN_LOCK_ASHARE(vp_crossmp); 95 } 96 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); 97 98 static int lookup_shared = 1; 99 SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, 100 "Enables/Disables shared locks for path name translation"); 101 TUNABLE_INT("vfs.lookup_shared", &lookup_shared); 102 103 /* 104 * Convert a pathname into a pointer to a locked vnode. 105 * 106 * The FOLLOW flag is set when symbolic links are to be followed 107 * when they occur at the end of the name translation process. 108 * Symbolic links are always followed for all other pathname 109 * components other than the last. 110 * 111 * The segflg defines whether the name is to be copied from user 112 * space or kernel space. 113 * 114 * Overall outline of namei: 115 * 116 * copy in name 117 * get starting directory 118 * while (!done && !error) { 119 * call lookup to search path. 120 * if symbolic link, massage name in buffer and continue 121 * } 122 */ 123 int 124 namei(struct nameidata *ndp) 125 { 126 struct filedesc *fdp; /* pointer to file descriptor state */ 127 char *cp; /* pointer into pathname argument */ 128 struct vnode *dp; /* the directory we are searching */ 129 struct iovec aiov; /* uio for reading symbolic links */ 130 struct uio auio; 131 int error, linklen; 132 struct componentname *cnp = &ndp->ni_cnd; 133 struct thread *td = cnp->cn_thread; 134 struct proc *p = td->td_proc; 135 int vfslocked; 136 137 KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0, 138 ("NOT MPSAFE and Giant not held")); 139 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 140 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 141 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 142 ("namei: nameiop contaminated with flags")); 143 KASSERT((cnp->cn_flags & OPMASK) == 0, 144 ("namei: flags contaminated with nameiops")); 145 if (!lookup_shared) 146 cnp->cn_flags &= ~LOCKSHARED; 147 fdp = p->p_fd; 148 149 /* We will set this ourselves if we need it. */ 150 cnp->cn_flags &= ~TRAILINGSLASH; 151 152 /* 153 * Get a buffer for the name to be translated, and copy the 154 * name into the buffer. 155 */ 156 if ((cnp->cn_flags & HASBUF) == 0) 157 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 158 if (ndp->ni_segflg == UIO_SYSSPACE) 159 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, 160 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 161 else 162 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, 163 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 164 165 /* If we are auditing the kernel pathname, save the user pathname. */ 166 if (cnp->cn_flags & AUDITVNODE1) 167 AUDIT_ARG_UPATH(td, cnp->cn_pnbuf, ARG_UPATH1); 168 if (cnp->cn_flags & AUDITVNODE2) 169 AUDIT_ARG_UPATH(td, cnp->cn_pnbuf, ARG_UPATH2); 170 171 /* 172 * Don't allow empty pathnames. 173 */ 174 if (!error && *cnp->cn_pnbuf == '\0') 175 error = ENOENT; 176 177 if (error) { 178 uma_zfree(namei_zone, cnp->cn_pnbuf); 179 #ifdef DIAGNOSTIC 180 cnp->cn_pnbuf = NULL; 181 cnp->cn_nameptr = NULL; 182 #endif 183 ndp->ni_vp = NULL; 184 return (error); 185 } 186 ndp->ni_loopcnt = 0; 187 #ifdef KTRACE 188 if (KTRPOINT(td, KTR_NAMEI)) { 189 KASSERT(cnp->cn_thread == curthread, 190 ("namei not using curthread")); 191 ktrnamei(cnp->cn_pnbuf); 192 } 193 #endif 194 /* 195 * Get starting point for the translation. 196 */ 197 FILEDESC_SLOCK(fdp); 198 ndp->ni_rootdir = fdp->fd_rdir; 199 ndp->ni_topdir = fdp->fd_jdir; 200 201 dp = NULL; 202 if (cnp->cn_pnbuf[0] != '/') { 203 if (ndp->ni_startdir != NULL) { 204 dp = ndp->ni_startdir; 205 error = 0; 206 } else if (ndp->ni_dirfd != AT_FDCWD) 207 error = fgetvp(td, ndp->ni_dirfd, &dp); 208 if (error != 0 || dp != NULL) { 209 FILEDESC_SUNLOCK(fdp); 210 if (error == 0 && dp->v_type != VDIR) { 211 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 212 vrele(dp); 213 VFS_UNLOCK_GIANT(vfslocked); 214 error = ENOTDIR; 215 } 216 } 217 if (error) { 218 uma_zfree(namei_zone, cnp->cn_pnbuf); 219 #ifdef DIAGNOSTIC 220 cnp->cn_pnbuf = NULL; 221 cnp->cn_nameptr = NULL; 222 #endif 223 return (error); 224 } 225 } 226 if (dp == NULL) { 227 dp = fdp->fd_cdir; 228 VREF(dp); 229 FILEDESC_SUNLOCK(fdp); 230 if (ndp->ni_startdir != NULL) { 231 vfslocked = VFS_LOCK_GIANT(ndp->ni_startdir->v_mount); 232 vrele(ndp->ni_startdir); 233 VFS_UNLOCK_GIANT(vfslocked); 234 } 235 } 236 SDT_PROBE(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, 237 cnp->cn_flags, 0, 0); 238 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 239 for (;;) { 240 /* 241 * Check if root directory should replace current directory. 242 * Done at start of translation and after symbolic link. 243 */ 244 cnp->cn_nameptr = cnp->cn_pnbuf; 245 if (*(cnp->cn_nameptr) == '/') { 246 vrele(dp); 247 VFS_UNLOCK_GIANT(vfslocked); 248 while (*(cnp->cn_nameptr) == '/') { 249 cnp->cn_nameptr++; 250 ndp->ni_pathlen--; 251 } 252 dp = ndp->ni_rootdir; 253 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 254 VREF(dp); 255 } 256 if (vfslocked) 257 ndp->ni_cnd.cn_flags |= GIANTHELD; 258 ndp->ni_startdir = dp; 259 error = lookup(ndp); 260 if (error) { 261 uma_zfree(namei_zone, cnp->cn_pnbuf); 262 #ifdef DIAGNOSTIC 263 cnp->cn_pnbuf = NULL; 264 cnp->cn_nameptr = NULL; 265 #endif 266 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 267 0, 0); 268 return (error); 269 } 270 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 271 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 272 /* 273 * If not a symbolic link, we're done. 274 */ 275 if ((cnp->cn_flags & ISSYMLINK) == 0) { 276 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 277 uma_zfree(namei_zone, cnp->cn_pnbuf); 278 #ifdef DIAGNOSTIC 279 cnp->cn_pnbuf = NULL; 280 cnp->cn_nameptr = NULL; 281 #endif 282 } else 283 cnp->cn_flags |= HASBUF; 284 285 if ((cnp->cn_flags & MPSAFE) == 0) { 286 VFS_UNLOCK_GIANT(vfslocked); 287 } else if (vfslocked) 288 ndp->ni_cnd.cn_flags |= GIANTHELD; 289 SDT_PROBE(vfs, namei, lookup, return, 0, ndp->ni_vp, 290 0, 0, 0); 291 return (0); 292 } 293 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 294 error = ELOOP; 295 break; 296 } 297 #ifdef MAC 298 if ((cnp->cn_flags & NOMACCHECK) == 0) { 299 error = mac_vnode_check_readlink(td->td_ucred, 300 ndp->ni_vp); 301 if (error) 302 break; 303 } 304 #endif 305 if (ndp->ni_pathlen > 1) 306 cp = uma_zalloc(namei_zone, M_WAITOK); 307 else 308 cp = cnp->cn_pnbuf; 309 aiov.iov_base = cp; 310 aiov.iov_len = MAXPATHLEN; 311 auio.uio_iov = &aiov; 312 auio.uio_iovcnt = 1; 313 auio.uio_offset = 0; 314 auio.uio_rw = UIO_READ; 315 auio.uio_segflg = UIO_SYSSPACE; 316 auio.uio_td = (struct thread *)0; 317 auio.uio_resid = MAXPATHLEN; 318 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 319 if (error) { 320 if (ndp->ni_pathlen > 1) 321 uma_zfree(namei_zone, cp); 322 break; 323 } 324 linklen = MAXPATHLEN - auio.uio_resid; 325 if (linklen == 0) { 326 if (ndp->ni_pathlen > 1) 327 uma_zfree(namei_zone, cp); 328 error = ENOENT; 329 break; 330 } 331 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 332 if (ndp->ni_pathlen > 1) 333 uma_zfree(namei_zone, cp); 334 error = ENAMETOOLONG; 335 break; 336 } 337 if (ndp->ni_pathlen > 1) { 338 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 339 uma_zfree(namei_zone, cnp->cn_pnbuf); 340 cnp->cn_pnbuf = cp; 341 } else 342 cnp->cn_pnbuf[linklen] = '\0'; 343 ndp->ni_pathlen += linklen; 344 vput(ndp->ni_vp); 345 dp = ndp->ni_dvp; 346 } 347 uma_zfree(namei_zone, cnp->cn_pnbuf); 348 #ifdef DIAGNOSTIC 349 cnp->cn_pnbuf = NULL; 350 cnp->cn_nameptr = NULL; 351 #endif 352 vput(ndp->ni_vp); 353 ndp->ni_vp = NULL; 354 vrele(ndp->ni_dvp); 355 VFS_UNLOCK_GIANT(vfslocked); 356 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 0, 0); 357 return (error); 358 } 359 360 static int 361 compute_cn_lkflags(struct mount *mp, int lkflags) 362 { 363 364 if (mp == NULL || 365 ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { 366 lkflags &= ~LK_SHARED; 367 lkflags |= LK_EXCLUSIVE; 368 } 369 return (lkflags); 370 } 371 372 static __inline int 373 needs_exclusive_leaf(struct mount *mp, int flags) 374 { 375 376 /* 377 * Intermediate nodes can use shared locks, we only need to 378 * force an exclusive lock for leaf nodes. 379 */ 380 if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) 381 return (0); 382 383 /* Always use exclusive locks if LOCKSHARED isn't set. */ 384 if (!(flags & LOCKSHARED)) 385 return (1); 386 387 /* 388 * For lookups during open(), if the mount point supports 389 * extended shared operations, then use a shared lock for the 390 * leaf node, otherwise use an exclusive lock. 391 */ 392 if (flags & ISOPEN) { 393 if (mp != NULL && 394 (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED)) 395 return (0); 396 else 397 return (1); 398 } 399 400 /* 401 * Lookup requests outside of open() that specify LOCKSHARED 402 * only need a shared lock on the leaf vnode. 403 */ 404 return (0); 405 } 406 407 /* 408 * Search a pathname. 409 * This is a very central and rather complicated routine. 410 * 411 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 412 * The starting directory is taken from ni_startdir. The pathname is 413 * descended until done, or a symbolic link is encountered. The variable 414 * ni_more is clear if the path is completed; it is set to one if a 415 * symbolic link needing interpretation is encountered. 416 * 417 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 418 * whether the name is to be looked up, created, renamed, or deleted. 419 * When CREATE, RENAME, or DELETE is specified, information usable in 420 * creating, renaming, or deleting a directory entry may be calculated. 421 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 422 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 423 * returned unlocked. Otherwise the parent directory is not returned. If 424 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 425 * the target is returned locked, otherwise it is returned unlocked. 426 * When creating or renaming and LOCKPARENT is specified, the target may not 427 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 428 * 429 * Overall outline of lookup: 430 * 431 * dirloop: 432 * identify next component of name at ndp->ni_ptr 433 * handle degenerate case where name is null string 434 * if .. and crossing mount points and on mounted filesys, find parent 435 * call VOP_LOOKUP routine for next component name 436 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 437 * component vnode returned in ni_vp (if it exists), locked. 438 * if result vnode is mounted on and crossing mount points, 439 * find mounted on vnode 440 * if more components of name, do next level at dirloop 441 * return the answer in ni_vp, locked if LOCKLEAF set 442 * if LOCKPARENT set, return locked parent in ni_dvp 443 * if WANTPARENT set, return unlocked parent in ni_dvp 444 */ 445 int 446 lookup(struct nameidata *ndp) 447 { 448 char *cp; /* pointer into pathname argument */ 449 struct vnode *dp = 0; /* the directory we are searching */ 450 struct vnode *tdp; /* saved dp */ 451 struct mount *mp; /* mount table entry */ 452 struct prison *pr; 453 int docache; /* == 0 do not cache last component */ 454 int wantparent; /* 1 => wantparent or lockparent flag */ 455 int rdonly; /* lookup read-only flag bit */ 456 int error = 0; 457 int dpunlocked = 0; /* dp has already been unlocked */ 458 struct componentname *cnp = &ndp->ni_cnd; 459 int vfslocked; /* VFS Giant state for child */ 460 int dvfslocked; /* VFS Giant state for parent */ 461 int tvfslocked; 462 int lkflags_save; 463 464 /* 465 * Setup: break out flag bits into variables. 466 */ 467 dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 468 vfslocked = 0; 469 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 470 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 471 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 472 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 473 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 474 if (cnp->cn_nameiop == DELETE || 475 (wantparent && cnp->cn_nameiop != CREATE && 476 cnp->cn_nameiop != LOOKUP)) 477 docache = 0; 478 rdonly = cnp->cn_flags & RDONLY; 479 cnp->cn_flags &= ~ISSYMLINK; 480 ndp->ni_dvp = NULL; 481 /* 482 * We use shared locks until we hit the parent of the last cn then 483 * we adjust based on the requesting flags. 484 */ 485 if (lookup_shared) 486 cnp->cn_lkflags = LK_SHARED; 487 else 488 cnp->cn_lkflags = LK_EXCLUSIVE; 489 dp = ndp->ni_startdir; 490 ndp->ni_startdir = NULLVP; 491 vn_lock(dp, 492 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); 493 494 dirloop: 495 /* 496 * Search a new directory. 497 * 498 * The last component of the filename is left accessible via 499 * cnp->cn_nameptr for callers that need the name. Callers needing 500 * the name set the SAVENAME flag. When done, they assume 501 * responsibility for freeing the pathname buffer. 502 */ 503 cnp->cn_consume = 0; 504 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 505 continue; 506 cnp->cn_namelen = cp - cnp->cn_nameptr; 507 if (cnp->cn_namelen > NAME_MAX) { 508 error = ENAMETOOLONG; 509 goto bad; 510 } 511 #ifdef NAMEI_DIAGNOSTIC 512 { char c = *cp; 513 *cp = '\0'; 514 printf("{%s}: ", cnp->cn_nameptr); 515 *cp = c; } 516 #endif 517 ndp->ni_pathlen -= cnp->cn_namelen; 518 ndp->ni_next = cp; 519 520 /* 521 * Replace multiple slashes by a single slash and trailing slashes 522 * by a null. This must be done before VOP_LOOKUP() because some 523 * fs's don't know about trailing slashes. Remember if there were 524 * trailing slashes to handle symlinks, existing non-directories 525 * and non-existing files that won't be directories specially later. 526 */ 527 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 528 cp++; 529 ndp->ni_pathlen--; 530 if (*cp == '\0') { 531 *ndp->ni_next = '\0'; 532 cnp->cn_flags |= TRAILINGSLASH; 533 } 534 } 535 ndp->ni_next = cp; 536 537 cnp->cn_flags |= MAKEENTRY; 538 if (*cp == '\0' && docache == 0) 539 cnp->cn_flags &= ~MAKEENTRY; 540 if (cnp->cn_namelen == 2 && 541 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 542 cnp->cn_flags |= ISDOTDOT; 543 else 544 cnp->cn_flags &= ~ISDOTDOT; 545 if (*ndp->ni_next == 0) 546 cnp->cn_flags |= ISLASTCN; 547 else 548 cnp->cn_flags &= ~ISLASTCN; 549 550 551 /* 552 * Check for degenerate name (e.g. / or "") 553 * which is a way of talking about a directory, 554 * e.g. like "/." or ".". 555 */ 556 if (cnp->cn_nameptr[0] == '\0') { 557 if (dp->v_type != VDIR) { 558 error = ENOTDIR; 559 goto bad; 560 } 561 if (cnp->cn_nameiop != LOOKUP) { 562 error = EISDIR; 563 goto bad; 564 } 565 if (wantparent) { 566 ndp->ni_dvp = dp; 567 VREF(dp); 568 } 569 ndp->ni_vp = dp; 570 571 if (cnp->cn_flags & AUDITVNODE1) 572 AUDIT_ARG_VNODE(dp, ARG_VNODE1); 573 else if (cnp->cn_flags & AUDITVNODE2) 574 AUDIT_ARG_VNODE(dp, ARG_VNODE2); 575 576 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 577 VOP_UNLOCK(dp, 0); 578 /* XXX This should probably move to the top of function. */ 579 if (cnp->cn_flags & SAVESTART) 580 panic("lookup: SAVESTART"); 581 goto success; 582 } 583 584 /* 585 * Handle "..": four special cases. 586 * 1. Return an error if this is the last component of 587 * the name and the operation is DELETE or RENAME. 588 * 2. If at root directory (e.g. after chroot) 589 * or at absolute root directory 590 * then ignore it so can't get out. 591 * 3. If this vnode is the root of a mounted 592 * filesystem, then replace it with the 593 * vnode which was mounted on so we take the 594 * .. in the other filesystem. 595 * 4. If the vnode is the top directory of 596 * the jail or chroot, don't let them out. 597 */ 598 if (cnp->cn_flags & ISDOTDOT) { 599 if ((cnp->cn_flags & ISLASTCN) != 0 && 600 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 601 error = EINVAL; 602 goto bad; 603 } 604 for (;;) { 605 for (pr = cnp->cn_cred->cr_prison; pr != NULL; 606 pr = pr->pr_parent) 607 if (dp == pr->pr_root) 608 break; 609 if (dp == ndp->ni_rootdir || 610 dp == ndp->ni_topdir || 611 dp == rootvnode || 612 pr != NULL || 613 ((dp->v_vflag & VV_ROOT) != 0 && 614 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 615 ndp->ni_dvp = dp; 616 ndp->ni_vp = dp; 617 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 618 VREF(dp); 619 goto nextname; 620 } 621 if ((dp->v_vflag & VV_ROOT) == 0) 622 break; 623 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 624 error = ENOENT; 625 goto bad; 626 } 627 tdp = dp; 628 dp = dp->v_mount->mnt_vnodecovered; 629 tvfslocked = dvfslocked; 630 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 631 VREF(dp); 632 vput(tdp); 633 VFS_UNLOCK_GIANT(tvfslocked); 634 vn_lock(dp, 635 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 636 LK_RETRY)); 637 } 638 } 639 640 /* 641 * We now have a segment name to search for, and a directory to search. 642 */ 643 unionlookup: 644 #ifdef MAC 645 if ((cnp->cn_flags & NOMACCHECK) == 0) { 646 error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, 647 cnp); 648 if (error) 649 goto bad; 650 } 651 #endif 652 ndp->ni_dvp = dp; 653 ndp->ni_vp = NULL; 654 ASSERT_VOP_LOCKED(dp, "lookup"); 655 VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked)); 656 /* 657 * If we have a shared lock we may need to upgrade the lock for the 658 * last operation. 659 */ 660 if (dp != vp_crossmp && 661 VOP_ISLOCKED(dp) == LK_SHARED && 662 (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) 663 vn_lock(dp, LK_UPGRADE|LK_RETRY); 664 /* 665 * If we're looking up the last component and we need an exclusive 666 * lock, adjust our lkflags. 667 */ 668 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) 669 cnp->cn_lkflags = LK_EXCLUSIVE; 670 #ifdef NAMEI_DIAGNOSTIC 671 vprint("lookup in", dp); 672 #endif 673 lkflags_save = cnp->cn_lkflags; 674 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags); 675 if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { 676 cnp->cn_lkflags = lkflags_save; 677 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 678 #ifdef NAMEI_DIAGNOSTIC 679 printf("not found\n"); 680 #endif 681 if ((error == ENOENT) && 682 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 683 (dp->v_mount->mnt_flag & MNT_UNION)) { 684 tdp = dp; 685 dp = dp->v_mount->mnt_vnodecovered; 686 tvfslocked = dvfslocked; 687 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 688 VREF(dp); 689 vput(tdp); 690 VFS_UNLOCK_GIANT(tvfslocked); 691 vn_lock(dp, 692 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 693 LK_RETRY)); 694 goto unionlookup; 695 } 696 697 if (error != EJUSTRETURN) 698 goto bad; 699 /* 700 * At this point, we know we're at the end of the 701 * pathname. If creating / renaming, we can consider 702 * allowing the file or directory to be created / renamed, 703 * provided we're not on a read-only filesystem. 704 */ 705 if (rdonly) { 706 error = EROFS; 707 goto bad; 708 } 709 /* trailing slash only allowed for directories */ 710 if ((cnp->cn_flags & TRAILINGSLASH) && 711 !(cnp->cn_flags & WILLBEDIR)) { 712 error = ENOENT; 713 goto bad; 714 } 715 if ((cnp->cn_flags & LOCKPARENT) == 0) 716 VOP_UNLOCK(dp, 0); 717 /* 718 * We return with ni_vp NULL to indicate that the entry 719 * doesn't currently exist, leaving a pointer to the 720 * (possibly locked) directory vnode in ndp->ni_dvp. 721 */ 722 if (cnp->cn_flags & SAVESTART) { 723 ndp->ni_startdir = ndp->ni_dvp; 724 VREF(ndp->ni_startdir); 725 } 726 goto success; 727 } else 728 cnp->cn_lkflags = lkflags_save; 729 #ifdef NAMEI_DIAGNOSTIC 730 printf("found\n"); 731 #endif 732 /* 733 * Take into account any additional components consumed by 734 * the underlying filesystem. 735 */ 736 if (cnp->cn_consume > 0) { 737 cnp->cn_nameptr += cnp->cn_consume; 738 ndp->ni_next += cnp->cn_consume; 739 ndp->ni_pathlen -= cnp->cn_consume; 740 cnp->cn_consume = 0; 741 } 742 743 dp = ndp->ni_vp; 744 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 745 746 /* 747 * Check to see if the vnode has been mounted on; 748 * if so find the root of the mounted filesystem. 749 */ 750 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 751 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 752 if (vfs_busy(mp, 0)) 753 continue; 754 vput(dp); 755 VFS_UNLOCK_GIANT(vfslocked); 756 vfslocked = VFS_LOCK_GIANT(mp); 757 if (dp != ndp->ni_dvp) 758 vput(ndp->ni_dvp); 759 else 760 vrele(ndp->ni_dvp); 761 VFS_UNLOCK_GIANT(dvfslocked); 762 dvfslocked = 0; 763 vref(vp_crossmp); 764 ndp->ni_dvp = vp_crossmp; 765 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), 766 &tdp); 767 vfs_unbusy(mp); 768 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) 769 panic("vp_crossmp exclusively locked or reclaimed"); 770 if (error) { 771 dpunlocked = 1; 772 goto bad2; 773 } 774 ndp->ni_vp = dp = tdp; 775 } 776 777 /* 778 * Check for symbolic link 779 */ 780 if ((dp->v_type == VLNK) && 781 ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || 782 *ndp->ni_next == '/')) { 783 cnp->cn_flags |= ISSYMLINK; 784 if (dp->v_iflag & VI_DOOMED) { 785 /* 786 * We can't know whether the directory was mounted with 787 * NOSYMFOLLOW, so we can't follow safely. 788 */ 789 error = ENOENT; 790 goto bad2; 791 } 792 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 793 error = EACCES; 794 goto bad2; 795 } 796 /* 797 * Symlink code always expects an unlocked dvp. 798 */ 799 if (ndp->ni_dvp != ndp->ni_vp) 800 VOP_UNLOCK(ndp->ni_dvp, 0); 801 goto success; 802 } 803 804 nextname: 805 /* 806 * Not a symbolic link that we will follow. Continue with the 807 * next component if there is any; otherwise, we're done. 808 */ 809 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 810 ("lookup: invalid path state.")); 811 if (*ndp->ni_next == '/') { 812 cnp->cn_nameptr = ndp->ni_next; 813 while (*cnp->cn_nameptr == '/') { 814 cnp->cn_nameptr++; 815 ndp->ni_pathlen--; 816 } 817 if (ndp->ni_dvp != dp) 818 vput(ndp->ni_dvp); 819 else 820 vrele(ndp->ni_dvp); 821 VFS_UNLOCK_GIANT(dvfslocked); 822 dvfslocked = vfslocked; /* dp becomes dvp in dirloop */ 823 vfslocked = 0; 824 goto dirloop; 825 } 826 /* 827 * If we're processing a path with a trailing slash, 828 * check that the end result is a directory. 829 */ 830 if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { 831 error = ENOTDIR; 832 goto bad2; 833 } 834 /* 835 * Disallow directory write attempts on read-only filesystems. 836 */ 837 if (rdonly && 838 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 839 error = EROFS; 840 goto bad2; 841 } 842 if (cnp->cn_flags & SAVESTART) { 843 ndp->ni_startdir = ndp->ni_dvp; 844 VREF(ndp->ni_startdir); 845 } 846 if (!wantparent) { 847 if (ndp->ni_dvp != dp) 848 vput(ndp->ni_dvp); 849 else 850 vrele(ndp->ni_dvp); 851 VFS_UNLOCK_GIANT(dvfslocked); 852 dvfslocked = 0; 853 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) 854 VOP_UNLOCK(ndp->ni_dvp, 0); 855 856 if (cnp->cn_flags & AUDITVNODE1) 857 AUDIT_ARG_VNODE(dp, ARG_VNODE1); 858 else if (cnp->cn_flags & AUDITVNODE2) 859 AUDIT_ARG_VNODE(dp, ARG_VNODE2); 860 861 if ((cnp->cn_flags & LOCKLEAF) == 0) 862 VOP_UNLOCK(dp, 0); 863 success: 864 /* 865 * Because of lookup_shared we may have the vnode shared locked, but 866 * the caller may want it to be exclusively locked. 867 */ 868 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && 869 VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { 870 vn_lock(dp, LK_UPGRADE | LK_RETRY); 871 if (dp->v_iflag & VI_DOOMED) { 872 error = ENOENT; 873 goto bad2; 874 } 875 } 876 if (vfslocked && dvfslocked) 877 VFS_UNLOCK_GIANT(dvfslocked); /* Only need one */ 878 if (vfslocked || dvfslocked) 879 ndp->ni_cnd.cn_flags |= GIANTHELD; 880 return (0); 881 882 bad2: 883 if (dp != ndp->ni_dvp) 884 vput(ndp->ni_dvp); 885 else 886 vrele(ndp->ni_dvp); 887 bad: 888 if (!dpunlocked) 889 vput(dp); 890 VFS_UNLOCK_GIANT(vfslocked); 891 VFS_UNLOCK_GIANT(dvfslocked); 892 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 893 ndp->ni_vp = NULL; 894 return (error); 895 } 896 897 /* 898 * relookup - lookup a path name component 899 * Used by lookup to re-acquire things. 900 */ 901 int 902 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 903 { 904 struct vnode *dp = 0; /* the directory we are searching */ 905 int wantparent; /* 1 => wantparent or lockparent flag */ 906 int rdonly; /* lookup read-only flag bit */ 907 int error = 0; 908 909 KASSERT(cnp->cn_flags & ISLASTCN, 910 ("relookup: Not given last component.")); 911 /* 912 * Setup: break out flag bits into variables. 913 */ 914 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 915 KASSERT(wantparent, ("relookup: parent not wanted.")); 916 rdonly = cnp->cn_flags & RDONLY; 917 cnp->cn_flags &= ~ISSYMLINK; 918 dp = dvp; 919 cnp->cn_lkflags = LK_EXCLUSIVE; 920 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 921 922 /* 923 * Search a new directory. 924 * 925 * The last component of the filename is left accessible via 926 * cnp->cn_nameptr for callers that need the name. Callers needing 927 * the name set the SAVENAME flag. When done, they assume 928 * responsibility for freeing the pathname buffer. 929 */ 930 #ifdef NAMEI_DIAGNOSTIC 931 printf("{%s}: ", cnp->cn_nameptr); 932 #endif 933 934 /* 935 * Check for degenerate name (e.g. / or "") 936 * which is a way of talking about a directory, 937 * e.g. like "/." or ".". 938 */ 939 if (cnp->cn_nameptr[0] == '\0') { 940 if (cnp->cn_nameiop != LOOKUP || wantparent) { 941 error = EISDIR; 942 goto bad; 943 } 944 if (dp->v_type != VDIR) { 945 error = ENOTDIR; 946 goto bad; 947 } 948 if (!(cnp->cn_flags & LOCKLEAF)) 949 VOP_UNLOCK(dp, 0); 950 *vpp = dp; 951 /* XXX This should probably move to the top of function. */ 952 if (cnp->cn_flags & SAVESTART) 953 panic("lookup: SAVESTART"); 954 return (0); 955 } 956 957 if (cnp->cn_flags & ISDOTDOT) 958 panic ("relookup: lookup on dot-dot"); 959 960 /* 961 * We now have a segment name to search for, and a directory to search. 962 */ 963 #ifdef NAMEI_DIAGNOSTIC 964 vprint("search in:", dp); 965 #endif 966 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 967 KASSERT(*vpp == NULL, ("leaf should be empty")); 968 if (error != EJUSTRETURN) 969 goto bad; 970 /* 971 * If creating and at end of pathname, then can consider 972 * allowing file to be created. 973 */ 974 if (rdonly) { 975 error = EROFS; 976 goto bad; 977 } 978 /* ASSERT(dvp == ndp->ni_startdir) */ 979 if (cnp->cn_flags & SAVESTART) 980 VREF(dvp); 981 if ((cnp->cn_flags & LOCKPARENT) == 0) 982 VOP_UNLOCK(dp, 0); 983 /* 984 * We return with ni_vp NULL to indicate that the entry 985 * doesn't currently exist, leaving a pointer to the 986 * (possibly locked) directory vnode in ndp->ni_dvp. 987 */ 988 return (0); 989 } 990 991 dp = *vpp; 992 993 /* 994 * Disallow directory write attempts on read-only filesystems. 995 */ 996 if (rdonly && 997 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 998 if (dvp == dp) 999 vrele(dvp); 1000 else 1001 vput(dvp); 1002 error = EROFS; 1003 goto bad; 1004 } 1005 /* 1006 * Set the parent lock/ref state to the requested state. 1007 */ 1008 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 1009 if (wantparent) 1010 VOP_UNLOCK(dvp, 0); 1011 else 1012 vput(dvp); 1013 } else if (!wantparent) 1014 vrele(dvp); 1015 /* 1016 * Check for symbolic link 1017 */ 1018 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 1019 ("relookup: symlink found.\n")); 1020 1021 /* ASSERT(dvp == ndp->ni_startdir) */ 1022 if (cnp->cn_flags & SAVESTART) 1023 VREF(dvp); 1024 1025 if ((cnp->cn_flags & LOCKLEAF) == 0) 1026 VOP_UNLOCK(dp, 0); 1027 return (0); 1028 bad: 1029 vput(dp); 1030 *vpp = NULL; 1031 return (error); 1032 } 1033 1034 /* 1035 * Free data allocated by namei(); see namei(9) for details. 1036 */ 1037 void 1038 NDFREE(struct nameidata *ndp, const u_int flags) 1039 { 1040 int unlock_dvp; 1041 int unlock_vp; 1042 1043 unlock_dvp = 0; 1044 unlock_vp = 0; 1045 1046 if (!(flags & NDF_NO_FREE_PNBUF) && 1047 (ndp->ni_cnd.cn_flags & HASBUF)) { 1048 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 1049 ndp->ni_cnd.cn_flags &= ~HASBUF; 1050 } 1051 if (!(flags & NDF_NO_VP_UNLOCK) && 1052 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 1053 unlock_vp = 1; 1054 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 1055 if (unlock_vp) { 1056 vput(ndp->ni_vp); 1057 unlock_vp = 0; 1058 } else 1059 vrele(ndp->ni_vp); 1060 ndp->ni_vp = NULL; 1061 } 1062 if (unlock_vp) 1063 VOP_UNLOCK(ndp->ni_vp, 0); 1064 if (!(flags & NDF_NO_DVP_UNLOCK) && 1065 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 1066 ndp->ni_dvp != ndp->ni_vp) 1067 unlock_dvp = 1; 1068 if (!(flags & NDF_NO_DVP_RELE) && 1069 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 1070 if (unlock_dvp) { 1071 vput(ndp->ni_dvp); 1072 unlock_dvp = 0; 1073 } else 1074 vrele(ndp->ni_dvp); 1075 ndp->ni_dvp = NULL; 1076 } 1077 if (unlock_dvp) 1078 VOP_UNLOCK(ndp->ni_dvp, 0); 1079 if (!(flags & NDF_NO_STARTDIR_RELE) && 1080 (ndp->ni_cnd.cn_flags & SAVESTART)) { 1081 vrele(ndp->ni_startdir); 1082 ndp->ni_startdir = NULL; 1083 } 1084 } 1085 1086 /* 1087 * Determine if there is a suitable alternate filename under the specified 1088 * prefix for the specified path. If the create flag is set, then the 1089 * alternate prefix will be used so long as the parent directory exists. 1090 * This is used by the various compatiblity ABIs so that Linux binaries prefer 1091 * files under /compat/linux for example. The chosen path (whether under 1092 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 1093 * to by pathbuf. The caller is responsible for free'ing the buffer from 1094 * the M_TEMP bucket if one is returned. 1095 */ 1096 int 1097 kern_alternate_path(struct thread *td, const char *prefix, const char *path, 1098 enum uio_seg pathseg, char **pathbuf, int create, int dirfd) 1099 { 1100 struct nameidata nd, ndroot; 1101 char *ptr, *buf, *cp; 1102 size_t len, sz; 1103 int error; 1104 1105 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1106 *pathbuf = buf; 1107 1108 /* Copy the prefix into the new pathname as a starting point. */ 1109 len = strlcpy(buf, prefix, MAXPATHLEN); 1110 if (len >= MAXPATHLEN) { 1111 *pathbuf = NULL; 1112 free(buf, M_TEMP); 1113 return (EINVAL); 1114 } 1115 sz = MAXPATHLEN - len; 1116 ptr = buf + len; 1117 1118 /* Append the filename to the prefix. */ 1119 if (pathseg == UIO_SYSSPACE) 1120 error = copystr(path, ptr, sz, &len); 1121 else 1122 error = copyinstr(path, ptr, sz, &len); 1123 1124 if (error) { 1125 *pathbuf = NULL; 1126 free(buf, M_TEMP); 1127 return (error); 1128 } 1129 1130 /* Only use a prefix with absolute pathnames. */ 1131 if (*ptr != '/') { 1132 error = EINVAL; 1133 goto keeporig; 1134 } 1135 1136 if (dirfd != AT_FDCWD) { 1137 /* 1138 * We want the original because the "prefix" is 1139 * included in the already opened dirfd. 1140 */ 1141 bcopy(ptr, buf, len); 1142 return (0); 1143 } 1144 1145 /* 1146 * We know that there is a / somewhere in this pathname. 1147 * Search backwards for it, to find the file's parent dir 1148 * to see if it exists in the alternate tree. If it does, 1149 * and we want to create a file (cflag is set). We don't 1150 * need to worry about the root comparison in this case. 1151 */ 1152 1153 if (create) { 1154 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1155 *cp = '\0'; 1156 1157 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1158 error = namei(&nd); 1159 *cp = '/'; 1160 if (error != 0) 1161 goto keeporig; 1162 } else { 1163 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1164 1165 error = namei(&nd); 1166 if (error != 0) 1167 goto keeporig; 1168 1169 /* 1170 * We now compare the vnode of the prefix to the one 1171 * vnode asked. If they resolve to be the same, then we 1172 * ignore the match so that the real root gets used. 1173 * This avoids the problem of traversing "../.." to find the 1174 * root directory and never finding it, because "/" resolves 1175 * to the emulation root directory. This is expensive :-( 1176 */ 1177 NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix, 1178 td); 1179 1180 /* We shouldn't ever get an error from this namei(). */ 1181 error = namei(&ndroot); 1182 if (error == 0) { 1183 if (nd.ni_vp == ndroot.ni_vp) 1184 error = ENOENT; 1185 1186 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1187 vrele(ndroot.ni_vp); 1188 VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot)); 1189 } 1190 } 1191 1192 NDFREE(&nd, NDF_ONLY_PNBUF); 1193 vrele(nd.ni_vp); 1194 VFS_UNLOCK_GIANT(NDHASGIANT(&nd)); 1195 1196 keeporig: 1197 /* If there was an error, use the original path name. */ 1198 if (error) 1199 bcopy(ptr, buf, len); 1200 return (error); 1201 } 1202