1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_kdtrace.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/capability.h> 48 #include <sys/fcntl.h> 49 #include <sys/jail.h> 50 #include <sys/lock.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/vnode.h> 54 #include <sys/mount.h> 55 #include <sys/filedesc.h> 56 #include <sys/proc.h> 57 #include <sys/sdt.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysctl.h> 60 #ifdef KTRACE 61 #include <sys/ktrace.h> 62 #endif 63 64 #include <security/audit/audit.h> 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/uma.h> 68 69 #define NAMEI_DIAGNOSTIC 1 70 #undef NAMEI_DIAGNOSTIC 71 72 SDT_PROVIDER_DECLARE(vfs); 73 SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, entry, "struct vnode *", "char *", 74 "unsigned long"); 75 SDT_PROBE_DEFINE2(vfs, namei, lookup, return, return, "int", "struct vnode *"); 76 77 /* 78 * Allocation zone for namei 79 */ 80 uma_zone_t namei_zone; 81 /* 82 * Placeholder vnode for mp traversal 83 */ 84 static struct vnode *vp_crossmp; 85 86 static void 87 nameiinit(void *dummy __unused) 88 { 89 90 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 91 UMA_ALIGN_PTR, 0); 92 getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); 93 vn_lock(vp_crossmp, LK_EXCLUSIVE); 94 VN_LOCK_ASHARE(vp_crossmp); 95 VOP_UNLOCK(vp_crossmp, 0); 96 } 97 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); 98 99 static int lookup_shared = 1; 100 SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, 101 "Enables/Disables shared locks for path name translation"); 102 TUNABLE_INT("vfs.lookup_shared", &lookup_shared); 103 104 /* 105 * Convert a pathname into a pointer to a locked vnode. 106 * 107 * The FOLLOW flag is set when symbolic links are to be followed 108 * when they occur at the end of the name translation process. 109 * Symbolic links are always followed for all other pathname 110 * components other than the last. 111 * 112 * The segflg defines whether the name is to be copied from user 113 * space or kernel space. 114 * 115 * Overall outline of namei: 116 * 117 * copy in name 118 * get starting directory 119 * while (!done && !error) { 120 * call lookup to search path. 121 * if symbolic link, massage name in buffer and continue 122 * } 123 */ 124 int 125 namei(struct nameidata *ndp) 126 { 127 struct filedesc *fdp; /* pointer to file descriptor state */ 128 char *cp; /* pointer into pathname argument */ 129 struct vnode *dp; /* the directory we are searching */ 130 struct iovec aiov; /* uio for reading symbolic links */ 131 struct uio auio; 132 int error, linklen; 133 struct componentname *cnp = &ndp->ni_cnd; 134 struct thread *td = cnp->cn_thread; 135 struct proc *p = td->td_proc; 136 137 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 138 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 139 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 140 ("namei: nameiop contaminated with flags")); 141 KASSERT((cnp->cn_flags & OPMASK) == 0, 142 ("namei: flags contaminated with nameiops")); 143 if (!lookup_shared) 144 cnp->cn_flags &= ~LOCKSHARED; 145 fdp = p->p_fd; 146 147 /* We will set this ourselves if we need it. */ 148 cnp->cn_flags &= ~TRAILINGSLASH; 149 150 /* 151 * Get a buffer for the name to be translated, and copy the 152 * name into the buffer. 153 */ 154 if ((cnp->cn_flags & HASBUF) == 0) 155 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 156 if (ndp->ni_segflg == UIO_SYSSPACE) 157 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, 158 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 159 else 160 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, 161 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 162 163 /* 164 * Don't allow empty pathnames. 165 */ 166 if (!error && *cnp->cn_pnbuf == '\0') 167 error = ENOENT; 168 169 #ifdef CAPABILITY_MODE 170 /* 171 * In capability mode, lookups must be "strictly relative" (i.e. 172 * not an absolute path, and not containing '..' components) to 173 * a real file descriptor, not the pseudo-descriptor AT_FDCWD. 174 */ 175 if (IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) { 176 ndp->ni_strictrelative = 1; 177 if (ndp->ni_dirfd == AT_FDCWD) { 178 #ifdef KTRACE 179 if (KTRPOINT(td, KTR_CAPFAIL)) 180 ktrcapfail(CAPFAIL_LOOKUP, 0, 0); 181 #endif 182 error = ECAPMODE; 183 } 184 } 185 #endif 186 if (error) { 187 uma_zfree(namei_zone, cnp->cn_pnbuf); 188 #ifdef DIAGNOSTIC 189 cnp->cn_pnbuf = NULL; 190 cnp->cn_nameptr = NULL; 191 #endif 192 ndp->ni_vp = NULL; 193 return (error); 194 } 195 ndp->ni_loopcnt = 0; 196 #ifdef KTRACE 197 if (KTRPOINT(td, KTR_NAMEI)) { 198 KASSERT(cnp->cn_thread == curthread, 199 ("namei not using curthread")); 200 ktrnamei(cnp->cn_pnbuf); 201 } 202 #endif 203 /* 204 * Get starting point for the translation. 205 */ 206 FILEDESC_SLOCK(fdp); 207 ndp->ni_rootdir = fdp->fd_rdir; 208 ndp->ni_topdir = fdp->fd_jdir; 209 210 /* 211 * If we are auditing the kernel pathname, save the user pathname. 212 */ 213 if (cnp->cn_flags & AUDITVNODE1) 214 AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf); 215 if (cnp->cn_flags & AUDITVNODE2) 216 AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf); 217 218 dp = NULL; 219 if (cnp->cn_pnbuf[0] != '/') { 220 if (ndp->ni_startdir != NULL) { 221 dp = ndp->ni_startdir; 222 error = 0; 223 } else if (ndp->ni_dirfd != AT_FDCWD) { 224 if (cnp->cn_flags & AUDITVNODE1) 225 AUDIT_ARG_ATFD1(ndp->ni_dirfd); 226 if (cnp->cn_flags & AUDITVNODE2) 227 AUDIT_ARG_ATFD2(ndp->ni_dirfd); 228 error = fgetvp_rights(td, ndp->ni_dirfd, 229 ndp->ni_rightsneeded | CAP_LOOKUP, 230 &(ndp->ni_baserights), &dp); 231 #ifdef CAPABILITIES 232 /* 233 * Lookups relative to a capability must also be 234 * strictly relative. 235 * 236 * Note that a capability with rights CAP_MASK_VALID 237 * is treated exactly like a regular file descriptor. 238 */ 239 if (ndp->ni_baserights != CAP_MASK_VALID) 240 ndp->ni_strictrelative = 1; 241 #endif 242 } 243 if (error != 0 || dp != NULL) { 244 FILEDESC_SUNLOCK(fdp); 245 if (error == 0 && dp->v_type != VDIR) { 246 vrele(dp); 247 error = ENOTDIR; 248 } 249 } 250 if (error) { 251 uma_zfree(namei_zone, cnp->cn_pnbuf); 252 #ifdef DIAGNOSTIC 253 cnp->cn_pnbuf = NULL; 254 cnp->cn_nameptr = NULL; 255 #endif 256 return (error); 257 } 258 } 259 if (dp == NULL) { 260 dp = fdp->fd_cdir; 261 VREF(dp); 262 FILEDESC_SUNLOCK(fdp); 263 if (ndp->ni_startdir != NULL) 264 vrele(ndp->ni_startdir); 265 } 266 SDT_PROBE(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, 267 cnp->cn_flags, 0, 0); 268 for (;;) { 269 /* 270 * Check if root directory should replace current directory. 271 * Done at start of translation and after symbolic link. 272 */ 273 cnp->cn_nameptr = cnp->cn_pnbuf; 274 if (*(cnp->cn_nameptr) == '/') { 275 vrele(dp); 276 if (ndp->ni_strictrelative != 0) { 277 #ifdef KTRACE 278 if (KTRPOINT(curthread, KTR_CAPFAIL)) 279 ktrcapfail(CAPFAIL_LOOKUP, 0, 0); 280 #endif 281 return (ENOTCAPABLE); 282 } 283 while (*(cnp->cn_nameptr) == '/') { 284 cnp->cn_nameptr++; 285 ndp->ni_pathlen--; 286 } 287 dp = ndp->ni_rootdir; 288 VREF(dp); 289 } 290 ndp->ni_startdir = dp; 291 error = lookup(ndp); 292 if (error) { 293 uma_zfree(namei_zone, cnp->cn_pnbuf); 294 #ifdef DIAGNOSTIC 295 cnp->cn_pnbuf = NULL; 296 cnp->cn_nameptr = NULL; 297 #endif 298 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 299 0, 0); 300 return (error); 301 } 302 /* 303 * If not a symbolic link, we're done. 304 */ 305 if ((cnp->cn_flags & ISSYMLINK) == 0) { 306 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 307 uma_zfree(namei_zone, cnp->cn_pnbuf); 308 #ifdef DIAGNOSTIC 309 cnp->cn_pnbuf = NULL; 310 cnp->cn_nameptr = NULL; 311 #endif 312 } else 313 cnp->cn_flags |= HASBUF; 314 315 SDT_PROBE(vfs, namei, lookup, return, 0, ndp->ni_vp, 316 0, 0, 0); 317 return (0); 318 } 319 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 320 error = ELOOP; 321 break; 322 } 323 #ifdef MAC 324 if ((cnp->cn_flags & NOMACCHECK) == 0) { 325 error = mac_vnode_check_readlink(td->td_ucred, 326 ndp->ni_vp); 327 if (error) 328 break; 329 } 330 #endif 331 if (ndp->ni_pathlen > 1) 332 cp = uma_zalloc(namei_zone, M_WAITOK); 333 else 334 cp = cnp->cn_pnbuf; 335 aiov.iov_base = cp; 336 aiov.iov_len = MAXPATHLEN; 337 auio.uio_iov = &aiov; 338 auio.uio_iovcnt = 1; 339 auio.uio_offset = 0; 340 auio.uio_rw = UIO_READ; 341 auio.uio_segflg = UIO_SYSSPACE; 342 auio.uio_td = (struct thread *)0; 343 auio.uio_resid = MAXPATHLEN; 344 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 345 if (error) { 346 if (ndp->ni_pathlen > 1) 347 uma_zfree(namei_zone, cp); 348 break; 349 } 350 linklen = MAXPATHLEN - auio.uio_resid; 351 if (linklen == 0) { 352 if (ndp->ni_pathlen > 1) 353 uma_zfree(namei_zone, cp); 354 error = ENOENT; 355 break; 356 } 357 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 358 if (ndp->ni_pathlen > 1) 359 uma_zfree(namei_zone, cp); 360 error = ENAMETOOLONG; 361 break; 362 } 363 if (ndp->ni_pathlen > 1) { 364 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 365 uma_zfree(namei_zone, cnp->cn_pnbuf); 366 cnp->cn_pnbuf = cp; 367 } else 368 cnp->cn_pnbuf[linklen] = '\0'; 369 ndp->ni_pathlen += linklen; 370 vput(ndp->ni_vp); 371 dp = ndp->ni_dvp; 372 } 373 uma_zfree(namei_zone, cnp->cn_pnbuf); 374 #ifdef DIAGNOSTIC 375 cnp->cn_pnbuf = NULL; 376 cnp->cn_nameptr = NULL; 377 #endif 378 vput(ndp->ni_vp); 379 ndp->ni_vp = NULL; 380 vrele(ndp->ni_dvp); 381 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 0, 0); 382 return (error); 383 } 384 385 static int 386 compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) 387 { 388 389 if (mp == NULL || ((lkflags & LK_SHARED) && 390 (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || 391 ((cnflags & ISDOTDOT) && 392 (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { 393 lkflags &= ~LK_SHARED; 394 lkflags |= LK_EXCLUSIVE; 395 } 396 return (lkflags); 397 } 398 399 static __inline int 400 needs_exclusive_leaf(struct mount *mp, int flags) 401 { 402 403 /* 404 * Intermediate nodes can use shared locks, we only need to 405 * force an exclusive lock for leaf nodes. 406 */ 407 if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) 408 return (0); 409 410 /* Always use exclusive locks if LOCKSHARED isn't set. */ 411 if (!(flags & LOCKSHARED)) 412 return (1); 413 414 /* 415 * For lookups during open(), if the mount point supports 416 * extended shared operations, then use a shared lock for the 417 * leaf node, otherwise use an exclusive lock. 418 */ 419 if (flags & ISOPEN) { 420 if (mp != NULL && 421 (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED)) 422 return (0); 423 else 424 return (1); 425 } 426 427 /* 428 * Lookup requests outside of open() that specify LOCKSHARED 429 * only need a shared lock on the leaf vnode. 430 */ 431 return (0); 432 } 433 434 /* 435 * Search a pathname. 436 * This is a very central and rather complicated routine. 437 * 438 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 439 * The starting directory is taken from ni_startdir. The pathname is 440 * descended until done, or a symbolic link is encountered. The variable 441 * ni_more is clear if the path is completed; it is set to one if a 442 * symbolic link needing interpretation is encountered. 443 * 444 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 445 * whether the name is to be looked up, created, renamed, or deleted. 446 * When CREATE, RENAME, or DELETE is specified, information usable in 447 * creating, renaming, or deleting a directory entry may be calculated. 448 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 449 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 450 * returned unlocked. Otherwise the parent directory is not returned. If 451 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 452 * the target is returned locked, otherwise it is returned unlocked. 453 * When creating or renaming and LOCKPARENT is specified, the target may not 454 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 455 * 456 * Overall outline of lookup: 457 * 458 * dirloop: 459 * identify next component of name at ndp->ni_ptr 460 * handle degenerate case where name is null string 461 * if .. and crossing mount points and on mounted filesys, find parent 462 * call VOP_LOOKUP routine for next component name 463 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 464 * component vnode returned in ni_vp (if it exists), locked. 465 * if result vnode is mounted on and crossing mount points, 466 * find mounted on vnode 467 * if more components of name, do next level at dirloop 468 * return the answer in ni_vp, locked if LOCKLEAF set 469 * if LOCKPARENT set, return locked parent in ni_dvp 470 * if WANTPARENT set, return unlocked parent in ni_dvp 471 */ 472 int 473 lookup(struct nameidata *ndp) 474 { 475 char *cp; /* pointer into pathname argument */ 476 struct vnode *dp = 0; /* the directory we are searching */ 477 struct vnode *tdp; /* saved dp */ 478 struct mount *mp; /* mount table entry */ 479 struct prison *pr; 480 int docache; /* == 0 do not cache last component */ 481 int wantparent; /* 1 => wantparent or lockparent flag */ 482 int rdonly; /* lookup read-only flag bit */ 483 int error = 0; 484 int dpunlocked = 0; /* dp has already been unlocked */ 485 struct componentname *cnp = &ndp->ni_cnd; 486 int lkflags_save; 487 int ni_dvp_unlocked; 488 489 /* 490 * Setup: break out flag bits into variables. 491 */ 492 ni_dvp_unlocked = 0; 493 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 494 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 495 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 496 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 497 if (cnp->cn_nameiop == DELETE || 498 (wantparent && cnp->cn_nameiop != CREATE && 499 cnp->cn_nameiop != LOOKUP)) 500 docache = 0; 501 rdonly = cnp->cn_flags & RDONLY; 502 cnp->cn_flags &= ~ISSYMLINK; 503 ndp->ni_dvp = NULL; 504 /* 505 * We use shared locks until we hit the parent of the last cn then 506 * we adjust based on the requesting flags. 507 */ 508 if (lookup_shared) 509 cnp->cn_lkflags = LK_SHARED; 510 else 511 cnp->cn_lkflags = LK_EXCLUSIVE; 512 dp = ndp->ni_startdir; 513 ndp->ni_startdir = NULLVP; 514 vn_lock(dp, 515 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, 516 cnp->cn_flags)); 517 518 dirloop: 519 /* 520 * Search a new directory. 521 * 522 * The last component of the filename is left accessible via 523 * cnp->cn_nameptr for callers that need the name. Callers needing 524 * the name set the SAVENAME flag. When done, they assume 525 * responsibility for freeing the pathname buffer. 526 */ 527 cnp->cn_consume = 0; 528 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 529 continue; 530 cnp->cn_namelen = cp - cnp->cn_nameptr; 531 if (cnp->cn_namelen > NAME_MAX) { 532 error = ENAMETOOLONG; 533 goto bad; 534 } 535 #ifdef NAMEI_DIAGNOSTIC 536 { char c = *cp; 537 *cp = '\0'; 538 printf("{%s}: ", cnp->cn_nameptr); 539 *cp = c; } 540 #endif 541 ndp->ni_pathlen -= cnp->cn_namelen; 542 ndp->ni_next = cp; 543 544 /* 545 * Replace multiple slashes by a single slash and trailing slashes 546 * by a null. This must be done before VOP_LOOKUP() because some 547 * fs's don't know about trailing slashes. Remember if there were 548 * trailing slashes to handle symlinks, existing non-directories 549 * and non-existing files that won't be directories specially later. 550 */ 551 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 552 cp++; 553 ndp->ni_pathlen--; 554 if (*cp == '\0') { 555 *ndp->ni_next = '\0'; 556 cnp->cn_flags |= TRAILINGSLASH; 557 } 558 } 559 ndp->ni_next = cp; 560 561 cnp->cn_flags |= MAKEENTRY; 562 if (*cp == '\0' && docache == 0) 563 cnp->cn_flags &= ~MAKEENTRY; 564 if (cnp->cn_namelen == 2 && 565 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 566 cnp->cn_flags |= ISDOTDOT; 567 else 568 cnp->cn_flags &= ~ISDOTDOT; 569 if (*ndp->ni_next == 0) 570 cnp->cn_flags |= ISLASTCN; 571 else 572 cnp->cn_flags &= ~ISLASTCN; 573 574 if ((cnp->cn_flags & ISLASTCN) != 0 && 575 cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && 576 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 577 error = EINVAL; 578 goto bad; 579 } 580 581 /* 582 * Check for degenerate name (e.g. / or "") 583 * which is a way of talking about a directory, 584 * e.g. like "/." or ".". 585 */ 586 if (cnp->cn_nameptr[0] == '\0') { 587 if (dp->v_type != VDIR) { 588 error = ENOTDIR; 589 goto bad; 590 } 591 if (cnp->cn_nameiop != LOOKUP) { 592 error = EISDIR; 593 goto bad; 594 } 595 if (wantparent) { 596 ndp->ni_dvp = dp; 597 VREF(dp); 598 } 599 ndp->ni_vp = dp; 600 601 if (cnp->cn_flags & AUDITVNODE1) 602 AUDIT_ARG_VNODE1(dp); 603 else if (cnp->cn_flags & AUDITVNODE2) 604 AUDIT_ARG_VNODE2(dp); 605 606 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 607 VOP_UNLOCK(dp, 0); 608 /* XXX This should probably move to the top of function. */ 609 if (cnp->cn_flags & SAVESTART) 610 panic("lookup: SAVESTART"); 611 goto success; 612 } 613 614 /* 615 * Handle "..": five special cases. 616 * 0. If doing a capability lookup, return ENOTCAPABLE (this is a 617 * fairly conservative design choice, but it's the only one that we 618 * are satisfied guarantees the property we're looking for). 619 * 1. Return an error if this is the last component of 620 * the name and the operation is DELETE or RENAME. 621 * 2. If at root directory (e.g. after chroot) 622 * or at absolute root directory 623 * then ignore it so can't get out. 624 * 3. If this vnode is the root of a mounted 625 * filesystem, then replace it with the 626 * vnode which was mounted on so we take the 627 * .. in the other filesystem. 628 * 4. If the vnode is the top directory of 629 * the jail or chroot, don't let them out. 630 */ 631 if (cnp->cn_flags & ISDOTDOT) { 632 if (ndp->ni_strictrelative != 0) { 633 #ifdef KTRACE 634 if (KTRPOINT(curthread, KTR_CAPFAIL)) 635 ktrcapfail(CAPFAIL_LOOKUP, 0, 0); 636 #endif 637 error = ENOTCAPABLE; 638 goto bad; 639 } 640 if ((cnp->cn_flags & ISLASTCN) != 0 && 641 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 642 error = EINVAL; 643 goto bad; 644 } 645 for (;;) { 646 for (pr = cnp->cn_cred->cr_prison; pr != NULL; 647 pr = pr->pr_parent) 648 if (dp == pr->pr_root) 649 break; 650 if (dp == ndp->ni_rootdir || 651 dp == ndp->ni_topdir || 652 dp == rootvnode || 653 pr != NULL || 654 ((dp->v_vflag & VV_ROOT) != 0 && 655 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 656 ndp->ni_dvp = dp; 657 ndp->ni_vp = dp; 658 VREF(dp); 659 goto nextname; 660 } 661 if ((dp->v_vflag & VV_ROOT) == 0) 662 break; 663 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 664 error = ENOENT; 665 goto bad; 666 } 667 tdp = dp; 668 dp = dp->v_mount->mnt_vnodecovered; 669 VREF(dp); 670 vput(tdp); 671 vn_lock(dp, 672 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 673 LK_RETRY, ISDOTDOT)); 674 } 675 } 676 677 /* 678 * We now have a segment name to search for, and a directory to search. 679 */ 680 unionlookup: 681 #ifdef MAC 682 if ((cnp->cn_flags & NOMACCHECK) == 0) { 683 error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, 684 cnp); 685 if (error) 686 goto bad; 687 } 688 #endif 689 ndp->ni_dvp = dp; 690 ndp->ni_vp = NULL; 691 ASSERT_VOP_LOCKED(dp, "lookup"); 692 /* 693 * If we have a shared lock we may need to upgrade the lock for the 694 * last operation. 695 */ 696 if (dp != vp_crossmp && 697 VOP_ISLOCKED(dp) == LK_SHARED && 698 (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) 699 vn_lock(dp, LK_UPGRADE|LK_RETRY); 700 /* 701 * If we're looking up the last component and we need an exclusive 702 * lock, adjust our lkflags. 703 */ 704 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) 705 cnp->cn_lkflags = LK_EXCLUSIVE; 706 #ifdef NAMEI_DIAGNOSTIC 707 vprint("lookup in", dp); 708 #endif 709 lkflags_save = cnp->cn_lkflags; 710 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, 711 cnp->cn_flags); 712 if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { 713 cnp->cn_lkflags = lkflags_save; 714 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 715 #ifdef NAMEI_DIAGNOSTIC 716 printf("not found\n"); 717 #endif 718 if ((error == ENOENT) && 719 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 720 (dp->v_mount->mnt_flag & MNT_UNION)) { 721 tdp = dp; 722 dp = dp->v_mount->mnt_vnodecovered; 723 VREF(dp); 724 vput(tdp); 725 vn_lock(dp, 726 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 727 LK_RETRY, cnp->cn_flags)); 728 goto unionlookup; 729 } 730 731 if (error != EJUSTRETURN) 732 goto bad; 733 /* 734 * At this point, we know we're at the end of the 735 * pathname. If creating / renaming, we can consider 736 * allowing the file or directory to be created / renamed, 737 * provided we're not on a read-only filesystem. 738 */ 739 if (rdonly) { 740 error = EROFS; 741 goto bad; 742 } 743 /* trailing slash only allowed for directories */ 744 if ((cnp->cn_flags & TRAILINGSLASH) && 745 !(cnp->cn_flags & WILLBEDIR)) { 746 error = ENOENT; 747 goto bad; 748 } 749 if ((cnp->cn_flags & LOCKPARENT) == 0) 750 VOP_UNLOCK(dp, 0); 751 /* 752 * We return with ni_vp NULL to indicate that the entry 753 * doesn't currently exist, leaving a pointer to the 754 * (possibly locked) directory vnode in ndp->ni_dvp. 755 */ 756 if (cnp->cn_flags & SAVESTART) { 757 ndp->ni_startdir = ndp->ni_dvp; 758 VREF(ndp->ni_startdir); 759 } 760 goto success; 761 } else 762 cnp->cn_lkflags = lkflags_save; 763 #ifdef NAMEI_DIAGNOSTIC 764 printf("found\n"); 765 #endif 766 /* 767 * Take into account any additional components consumed by 768 * the underlying filesystem. 769 */ 770 if (cnp->cn_consume > 0) { 771 cnp->cn_nameptr += cnp->cn_consume; 772 ndp->ni_next += cnp->cn_consume; 773 ndp->ni_pathlen -= cnp->cn_consume; 774 cnp->cn_consume = 0; 775 } 776 777 dp = ndp->ni_vp; 778 779 /* 780 * Check to see if the vnode has been mounted on; 781 * if so find the root of the mounted filesystem. 782 */ 783 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 784 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 785 if (vfs_busy(mp, 0)) 786 continue; 787 vput(dp); 788 if (dp != ndp->ni_dvp) 789 vput(ndp->ni_dvp); 790 else 791 vrele(ndp->ni_dvp); 792 vref(vp_crossmp); 793 ndp->ni_dvp = vp_crossmp; 794 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, 795 cnp->cn_flags), &tdp); 796 vfs_unbusy(mp); 797 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) 798 panic("vp_crossmp exclusively locked or reclaimed"); 799 if (error) { 800 dpunlocked = 1; 801 goto bad2; 802 } 803 ndp->ni_vp = dp = tdp; 804 } 805 806 /* 807 * Check for symbolic link 808 */ 809 if ((dp->v_type == VLNK) && 810 ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || 811 *ndp->ni_next == '/')) { 812 cnp->cn_flags |= ISSYMLINK; 813 if (dp->v_iflag & VI_DOOMED) { 814 /* 815 * We can't know whether the directory was mounted with 816 * NOSYMFOLLOW, so we can't follow safely. 817 */ 818 error = ENOENT; 819 goto bad2; 820 } 821 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 822 error = EACCES; 823 goto bad2; 824 } 825 /* 826 * Symlink code always expects an unlocked dvp. 827 */ 828 if (ndp->ni_dvp != ndp->ni_vp) { 829 VOP_UNLOCK(ndp->ni_dvp, 0); 830 ni_dvp_unlocked = 1; 831 } 832 goto success; 833 } 834 835 nextname: 836 /* 837 * Not a symbolic link that we will follow. Continue with the 838 * next component if there is any; otherwise, we're done. 839 */ 840 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 841 ("lookup: invalid path state.")); 842 if (*ndp->ni_next == '/') { 843 cnp->cn_nameptr = ndp->ni_next; 844 while (*cnp->cn_nameptr == '/') { 845 cnp->cn_nameptr++; 846 ndp->ni_pathlen--; 847 } 848 if (ndp->ni_dvp != dp) 849 vput(ndp->ni_dvp); 850 else 851 vrele(ndp->ni_dvp); 852 goto dirloop; 853 } 854 /* 855 * If we're processing a path with a trailing slash, 856 * check that the end result is a directory. 857 */ 858 if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { 859 error = ENOTDIR; 860 goto bad2; 861 } 862 /* 863 * Disallow directory write attempts on read-only filesystems. 864 */ 865 if (rdonly && 866 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 867 error = EROFS; 868 goto bad2; 869 } 870 if (cnp->cn_flags & SAVESTART) { 871 ndp->ni_startdir = ndp->ni_dvp; 872 VREF(ndp->ni_startdir); 873 } 874 if (!wantparent) { 875 ni_dvp_unlocked = 2; 876 if (ndp->ni_dvp != dp) 877 vput(ndp->ni_dvp); 878 else 879 vrele(ndp->ni_dvp); 880 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) { 881 VOP_UNLOCK(ndp->ni_dvp, 0); 882 ni_dvp_unlocked = 1; 883 } 884 885 if (cnp->cn_flags & AUDITVNODE1) 886 AUDIT_ARG_VNODE1(dp); 887 else if (cnp->cn_flags & AUDITVNODE2) 888 AUDIT_ARG_VNODE2(dp); 889 890 if ((cnp->cn_flags & LOCKLEAF) == 0) 891 VOP_UNLOCK(dp, 0); 892 success: 893 /* 894 * Because of lookup_shared we may have the vnode shared locked, but 895 * the caller may want it to be exclusively locked. 896 */ 897 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && 898 VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { 899 vn_lock(dp, LK_UPGRADE | LK_RETRY); 900 if (dp->v_iflag & VI_DOOMED) { 901 error = ENOENT; 902 goto bad2; 903 } 904 } 905 return (0); 906 907 bad2: 908 if (ni_dvp_unlocked != 2) { 909 if (dp != ndp->ni_dvp && !ni_dvp_unlocked) 910 vput(ndp->ni_dvp); 911 else 912 vrele(ndp->ni_dvp); 913 } 914 bad: 915 if (!dpunlocked) 916 vput(dp); 917 ndp->ni_vp = NULL; 918 return (error); 919 } 920 921 /* 922 * relookup - lookup a path name component 923 * Used by lookup to re-acquire things. 924 */ 925 int 926 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 927 { 928 struct vnode *dp = 0; /* the directory we are searching */ 929 int wantparent; /* 1 => wantparent or lockparent flag */ 930 int rdonly; /* lookup read-only flag bit */ 931 int error = 0; 932 933 KASSERT(cnp->cn_flags & ISLASTCN, 934 ("relookup: Not given last component.")); 935 /* 936 * Setup: break out flag bits into variables. 937 */ 938 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 939 KASSERT(wantparent, ("relookup: parent not wanted.")); 940 rdonly = cnp->cn_flags & RDONLY; 941 cnp->cn_flags &= ~ISSYMLINK; 942 dp = dvp; 943 cnp->cn_lkflags = LK_EXCLUSIVE; 944 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 945 946 /* 947 * Search a new directory. 948 * 949 * The last component of the filename is left accessible via 950 * cnp->cn_nameptr for callers that need the name. Callers needing 951 * the name set the SAVENAME flag. When done, they assume 952 * responsibility for freeing the pathname buffer. 953 */ 954 #ifdef NAMEI_DIAGNOSTIC 955 printf("{%s}: ", cnp->cn_nameptr); 956 #endif 957 958 /* 959 * Check for "" which represents the root directory after slash 960 * removal. 961 */ 962 if (cnp->cn_nameptr[0] == '\0') { 963 /* 964 * Support only LOOKUP for "/" because lookup() 965 * can't succeed for CREATE, DELETE and RENAME. 966 */ 967 KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP")); 968 KASSERT(dp->v_type == VDIR, ("dp is not a directory")); 969 970 if (!(cnp->cn_flags & LOCKLEAF)) 971 VOP_UNLOCK(dp, 0); 972 *vpp = dp; 973 /* XXX This should probably move to the top of function. */ 974 if (cnp->cn_flags & SAVESTART) 975 panic("lookup: SAVESTART"); 976 return (0); 977 } 978 979 if (cnp->cn_flags & ISDOTDOT) 980 panic ("relookup: lookup on dot-dot"); 981 982 /* 983 * We now have a segment name to search for, and a directory to search. 984 */ 985 #ifdef NAMEI_DIAGNOSTIC 986 vprint("search in:", dp); 987 #endif 988 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 989 KASSERT(*vpp == NULL, ("leaf should be empty")); 990 if (error != EJUSTRETURN) 991 goto bad; 992 /* 993 * If creating and at end of pathname, then can consider 994 * allowing file to be created. 995 */ 996 if (rdonly) { 997 error = EROFS; 998 goto bad; 999 } 1000 /* ASSERT(dvp == ndp->ni_startdir) */ 1001 if (cnp->cn_flags & SAVESTART) 1002 VREF(dvp); 1003 if ((cnp->cn_flags & LOCKPARENT) == 0) 1004 VOP_UNLOCK(dp, 0); 1005 /* 1006 * We return with ni_vp NULL to indicate that the entry 1007 * doesn't currently exist, leaving a pointer to the 1008 * (possibly locked) directory vnode in ndp->ni_dvp. 1009 */ 1010 return (0); 1011 } 1012 1013 dp = *vpp; 1014 1015 /* 1016 * Disallow directory write attempts on read-only filesystems. 1017 */ 1018 if (rdonly && 1019 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1020 if (dvp == dp) 1021 vrele(dvp); 1022 else 1023 vput(dvp); 1024 error = EROFS; 1025 goto bad; 1026 } 1027 /* 1028 * Set the parent lock/ref state to the requested state. 1029 */ 1030 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 1031 if (wantparent) 1032 VOP_UNLOCK(dvp, 0); 1033 else 1034 vput(dvp); 1035 } else if (!wantparent) 1036 vrele(dvp); 1037 /* 1038 * Check for symbolic link 1039 */ 1040 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 1041 ("relookup: symlink found.\n")); 1042 1043 /* ASSERT(dvp == ndp->ni_startdir) */ 1044 if (cnp->cn_flags & SAVESTART) 1045 VREF(dvp); 1046 1047 if ((cnp->cn_flags & LOCKLEAF) == 0) 1048 VOP_UNLOCK(dp, 0); 1049 return (0); 1050 bad: 1051 vput(dp); 1052 *vpp = NULL; 1053 return (error); 1054 } 1055 1056 /* 1057 * Free data allocated by namei(); see namei(9) for details. 1058 */ 1059 void 1060 NDFREE(struct nameidata *ndp, const u_int flags) 1061 { 1062 int unlock_dvp; 1063 int unlock_vp; 1064 1065 unlock_dvp = 0; 1066 unlock_vp = 0; 1067 1068 if (!(flags & NDF_NO_FREE_PNBUF) && 1069 (ndp->ni_cnd.cn_flags & HASBUF)) { 1070 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 1071 ndp->ni_cnd.cn_flags &= ~HASBUF; 1072 } 1073 if (!(flags & NDF_NO_VP_UNLOCK) && 1074 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 1075 unlock_vp = 1; 1076 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 1077 if (unlock_vp) { 1078 vput(ndp->ni_vp); 1079 unlock_vp = 0; 1080 } else 1081 vrele(ndp->ni_vp); 1082 ndp->ni_vp = NULL; 1083 } 1084 if (unlock_vp) 1085 VOP_UNLOCK(ndp->ni_vp, 0); 1086 if (!(flags & NDF_NO_DVP_UNLOCK) && 1087 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 1088 ndp->ni_dvp != ndp->ni_vp) 1089 unlock_dvp = 1; 1090 if (!(flags & NDF_NO_DVP_RELE) && 1091 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 1092 if (unlock_dvp) { 1093 vput(ndp->ni_dvp); 1094 unlock_dvp = 0; 1095 } else 1096 vrele(ndp->ni_dvp); 1097 ndp->ni_dvp = NULL; 1098 } 1099 if (unlock_dvp) 1100 VOP_UNLOCK(ndp->ni_dvp, 0); 1101 if (!(flags & NDF_NO_STARTDIR_RELE) && 1102 (ndp->ni_cnd.cn_flags & SAVESTART)) { 1103 vrele(ndp->ni_startdir); 1104 ndp->ni_startdir = NULL; 1105 } 1106 } 1107 1108 /* 1109 * Determine if there is a suitable alternate filename under the specified 1110 * prefix for the specified path. If the create flag is set, then the 1111 * alternate prefix will be used so long as the parent directory exists. 1112 * This is used by the various compatiblity ABIs so that Linux binaries prefer 1113 * files under /compat/linux for example. The chosen path (whether under 1114 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 1115 * to by pathbuf. The caller is responsible for free'ing the buffer from 1116 * the M_TEMP bucket if one is returned. 1117 */ 1118 int 1119 kern_alternate_path(struct thread *td, const char *prefix, const char *path, 1120 enum uio_seg pathseg, char **pathbuf, int create, int dirfd) 1121 { 1122 struct nameidata nd, ndroot; 1123 char *ptr, *buf, *cp; 1124 size_t len, sz; 1125 int error; 1126 1127 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1128 *pathbuf = buf; 1129 1130 /* Copy the prefix into the new pathname as a starting point. */ 1131 len = strlcpy(buf, prefix, MAXPATHLEN); 1132 if (len >= MAXPATHLEN) { 1133 *pathbuf = NULL; 1134 free(buf, M_TEMP); 1135 return (EINVAL); 1136 } 1137 sz = MAXPATHLEN - len; 1138 ptr = buf + len; 1139 1140 /* Append the filename to the prefix. */ 1141 if (pathseg == UIO_SYSSPACE) 1142 error = copystr(path, ptr, sz, &len); 1143 else 1144 error = copyinstr(path, ptr, sz, &len); 1145 1146 if (error) { 1147 *pathbuf = NULL; 1148 free(buf, M_TEMP); 1149 return (error); 1150 } 1151 1152 /* Only use a prefix with absolute pathnames. */ 1153 if (*ptr != '/') { 1154 error = EINVAL; 1155 goto keeporig; 1156 } 1157 1158 if (dirfd != AT_FDCWD) { 1159 /* 1160 * We want the original because the "prefix" is 1161 * included in the already opened dirfd. 1162 */ 1163 bcopy(ptr, buf, len); 1164 return (0); 1165 } 1166 1167 /* 1168 * We know that there is a / somewhere in this pathname. 1169 * Search backwards for it, to find the file's parent dir 1170 * to see if it exists in the alternate tree. If it does, 1171 * and we want to create a file (cflag is set). We don't 1172 * need to worry about the root comparison in this case. 1173 */ 1174 1175 if (create) { 1176 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1177 *cp = '\0'; 1178 1179 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, td); 1180 error = namei(&nd); 1181 *cp = '/'; 1182 if (error != 0) 1183 goto keeporig; 1184 } else { 1185 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, td); 1186 1187 error = namei(&nd); 1188 if (error != 0) 1189 goto keeporig; 1190 1191 /* 1192 * We now compare the vnode of the prefix to the one 1193 * vnode asked. If they resolve to be the same, then we 1194 * ignore the match so that the real root gets used. 1195 * This avoids the problem of traversing "../.." to find the 1196 * root directory and never finding it, because "/" resolves 1197 * to the emulation root directory. This is expensive :-( 1198 */ 1199 NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix, 1200 td); 1201 1202 /* We shouldn't ever get an error from this namei(). */ 1203 error = namei(&ndroot); 1204 if (error == 0) { 1205 if (nd.ni_vp == ndroot.ni_vp) 1206 error = ENOENT; 1207 1208 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1209 vrele(ndroot.ni_vp); 1210 } 1211 } 1212 1213 NDFREE(&nd, NDF_ONLY_PNBUF); 1214 vrele(nd.ni_vp); 1215 1216 keeporig: 1217 /* If there was an error, use the original path name. */ 1218 if (error) 1219 bcopy(ptr, buf, len); 1220 return (error); 1221 } 1222