1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/capsicum.h> 49 #include <sys/fcntl.h> 50 #include <sys/jail.h> 51 #include <sys/lock.h> 52 #include <sys/mutex.h> 53 #include <sys/namei.h> 54 #include <sys/vnode.h> 55 #include <sys/mount.h> 56 #include <sys/filedesc.h> 57 #include <sys/proc.h> 58 #include <sys/sdt.h> 59 #include <sys/syscallsubr.h> 60 #include <sys/sysctl.h> 61 #ifdef KTRACE 62 #include <sys/ktrace.h> 63 #endif 64 65 #include <security/audit/audit.h> 66 #include <security/mac/mac_framework.h> 67 68 #include <vm/uma.h> 69 70 #define NAMEI_DIAGNOSTIC 1 71 #undef NAMEI_DIAGNOSTIC 72 73 SDT_PROVIDER_DECLARE(vfs); 74 SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *", 75 "unsigned long"); 76 SDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *"); 77 78 /* Allocation zone for namei. */ 79 uma_zone_t namei_zone; 80 81 /* Placeholder vnode for mp traversal. */ 82 static struct vnode *vp_crossmp; 83 84 static int 85 crossmp_vop_islocked(struct vop_islocked_args *ap) 86 { 87 88 return (LK_SHARED); 89 } 90 91 static int 92 crossmp_vop_lock1(struct vop_lock1_args *ap) 93 { 94 struct vnode *vp; 95 struct lock *lk; 96 const char *file; 97 int flags, line; 98 99 vp = ap->a_vp; 100 lk = vp->v_vnlock; 101 flags = ap->a_flags; 102 file = ap->a_file; 103 line = ap->a_line; 104 105 if ((flags & LK_SHARED) == 0) 106 panic("invalid lock request for crossmp"); 107 108 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line, 109 flags & LK_INTERLOCK ? &VI_MTX(vp)->lock_object : NULL); 110 WITNESS_LOCK(&lk->lock_object, 0, file, line); 111 if ((flags & LK_INTERLOCK) != 0) 112 VI_UNLOCK(vp); 113 LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, ap->a_file, line); 114 return (0); 115 } 116 117 static int 118 crossmp_vop_unlock(struct vop_unlock_args *ap) 119 { 120 struct vnode *vp; 121 struct lock *lk; 122 int flags; 123 124 vp = ap->a_vp; 125 lk = vp->v_vnlock; 126 flags = ap->a_flags; 127 128 if ((flags & LK_INTERLOCK) != 0) 129 VI_UNLOCK(vp); 130 WITNESS_UNLOCK(&lk->lock_object, 0, LOCK_FILE, LOCK_LINE); 131 LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, LOCK_FILE, 132 LOCK_LINE); 133 return (0); 134 } 135 136 static struct vop_vector crossmp_vnodeops = { 137 .vop_default = &default_vnodeops, 138 .vop_islocked = crossmp_vop_islocked, 139 .vop_lock1 = crossmp_vop_lock1, 140 .vop_unlock = crossmp_vop_unlock, 141 }; 142 143 struct nameicap_tracker { 144 struct vnode *dp; 145 TAILQ_ENTRY(nameicap_tracker) nm_link; 146 }; 147 148 /* Zone for cap mode tracker elements used for dotdot capability checks. */ 149 static uma_zone_t nt_zone; 150 151 static void 152 nameiinit(void *dummy __unused) 153 { 154 155 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 156 UMA_ALIGN_PTR, 0); 157 nt_zone = uma_zcreate("rentr", sizeof(struct nameicap_tracker), 158 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 159 getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp); 160 } 161 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); 162 163 static int lookup_shared = 1; 164 SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RWTUN, &lookup_shared, 0, 165 "enables shared locks for path name translation"); 166 167 static int lookup_cap_dotdot = 1; 168 SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN, 169 &lookup_cap_dotdot, 0, 170 "enables \"..\" components in path lookup in capability mode"); 171 static int lookup_cap_dotdot_nonlocal = 1; 172 SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN, 173 &lookup_cap_dotdot_nonlocal, 0, 174 "enables \"..\" components in path lookup in capability mode " 175 "on non-local mount"); 176 177 static void 178 nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) 179 { 180 struct nameicap_tracker *nt; 181 182 if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) 183 return; 184 nt = uma_zalloc(nt_zone, M_WAITOK); 185 vhold(dp); 186 nt->dp = dp; 187 TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); 188 } 189 190 static void 191 nameicap_cleanup(struct nameidata *ndp) 192 { 193 struct nameicap_tracker *nt, *nt1; 194 195 KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || 196 (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); 197 TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { 198 TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); 199 vdrop(nt->dp); 200 uma_zfree(nt_zone, nt); 201 } 202 } 203 204 /* 205 * For dotdot lookups in capability mode, only allow the component 206 * lookup to succeed if the resulting directory was already traversed 207 * during the operation. Also fail dotdot lookups for non-local 208 * filesystems, where external agents might assist local lookups to 209 * escape the compartment. 210 */ 211 static int 212 nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) 213 { 214 struct nameicap_tracker *nt; 215 struct mount *mp; 216 217 if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp == NULL || 218 dp->v_type != VDIR) 219 return (0); 220 mp = dp->v_mount; 221 if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && 222 (mp->mnt_flag & MNT_LOCAL) == 0) 223 return (ENOTCAPABLE); 224 TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, 225 nm_link) { 226 if (dp == nt->dp) 227 return (0); 228 } 229 return (ENOTCAPABLE); 230 } 231 232 static void 233 namei_cleanup_cnp(struct componentname *cnp) 234 { 235 236 uma_zfree(namei_zone, cnp->cn_pnbuf); 237 #ifdef DIAGNOSTIC 238 cnp->cn_pnbuf = NULL; 239 cnp->cn_nameptr = NULL; 240 #endif 241 } 242 243 static int 244 namei_handle_root(struct nameidata *ndp, struct vnode **dpp) 245 { 246 struct componentname *cnp; 247 248 cnp = &ndp->ni_cnd; 249 if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) { 250 #ifdef KTRACE 251 if (KTRPOINT(curthread, KTR_CAPFAIL)) 252 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 253 #endif 254 return (ENOTCAPABLE); 255 } 256 while (*(cnp->cn_nameptr) == '/') { 257 cnp->cn_nameptr++; 258 ndp->ni_pathlen--; 259 } 260 *dpp = ndp->ni_rootdir; 261 vrefact(*dpp); 262 return (0); 263 } 264 265 /* 266 * Convert a pathname into a pointer to a locked vnode. 267 * 268 * The FOLLOW flag is set when symbolic links are to be followed 269 * when they occur at the end of the name translation process. 270 * Symbolic links are always followed for all other pathname 271 * components other than the last. 272 * 273 * The segflg defines whether the name is to be copied from user 274 * space or kernel space. 275 * 276 * Overall outline of namei: 277 * 278 * copy in name 279 * get starting directory 280 * while (!done && !error) { 281 * call lookup to search path. 282 * if symbolic link, massage name in buffer and continue 283 * } 284 */ 285 int 286 namei(struct nameidata *ndp) 287 { 288 struct filedesc *fdp; /* pointer to file descriptor state */ 289 char *cp; /* pointer into pathname argument */ 290 struct vnode *dp; /* the directory we are searching */ 291 struct iovec aiov; /* uio for reading symbolic links */ 292 struct componentname *cnp; 293 struct thread *td; 294 struct proc *p; 295 cap_rights_t rights; 296 struct uio auio; 297 int error, linklen, startdir_used; 298 299 cnp = &ndp->ni_cnd; 300 td = cnp->cn_thread; 301 p = td->td_proc; 302 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 303 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 304 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 305 ("namei: nameiop contaminated with flags")); 306 KASSERT((cnp->cn_flags & OPMASK) == 0, 307 ("namei: flags contaminated with nameiops")); 308 MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || 309 ndp->ni_startdir->v_type == VBAD); 310 if (!lookup_shared) 311 cnp->cn_flags &= ~LOCKSHARED; 312 fdp = p->p_fd; 313 TAILQ_INIT(&ndp->ni_cap_tracker); 314 ndp->ni_lcf = 0; 315 316 /* We will set this ourselves if we need it. */ 317 cnp->cn_flags &= ~TRAILINGSLASH; 318 319 /* 320 * Get a buffer for the name to be translated, and copy the 321 * name into the buffer. 322 */ 323 if ((cnp->cn_flags & HASBUF) == 0) 324 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 325 if (ndp->ni_segflg == UIO_SYSSPACE) 326 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, 327 &ndp->ni_pathlen); 328 else 329 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, 330 &ndp->ni_pathlen); 331 332 /* 333 * Don't allow empty pathnames. 334 */ 335 if (error == 0 && *cnp->cn_pnbuf == '\0') 336 error = ENOENT; 337 338 #ifdef CAPABILITY_MODE 339 /* 340 * In capability mode, lookups must be restricted to happen in 341 * the subtree with the root specified by the file descriptor: 342 * - The root must be real file descriptor, not the pseudo-descriptor 343 * AT_FDCWD. 344 * - The passed path must be relative and not absolute. 345 * - If lookup_cap_dotdot is disabled, path must not contain the 346 * '..' components. 347 * - If lookup_cap_dotdot is enabled, we verify that all '..' 348 * components lookups result in the directories which were 349 * previously walked by us, which prevents an escape from 350 * the relative root. 351 */ 352 if (error == 0 && IN_CAPABILITY_MODE(td) && 353 (cnp->cn_flags & NOCAPCHECK) == 0) { 354 ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; 355 if (ndp->ni_dirfd == AT_FDCWD) { 356 #ifdef KTRACE 357 if (KTRPOINT(td, KTR_CAPFAIL)) 358 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 359 #endif 360 error = ECAPMODE; 361 } 362 } 363 #endif 364 if (error != 0) { 365 namei_cleanup_cnp(cnp); 366 ndp->ni_vp = NULL; 367 return (error); 368 } 369 ndp->ni_loopcnt = 0; 370 #ifdef KTRACE 371 if (KTRPOINT(td, KTR_NAMEI)) { 372 KASSERT(cnp->cn_thread == curthread, 373 ("namei not using curthread")); 374 ktrnamei(cnp->cn_pnbuf); 375 } 376 #endif 377 /* 378 * Get starting point for the translation. 379 */ 380 FILEDESC_SLOCK(fdp); 381 ndp->ni_rootdir = fdp->fd_rdir; 382 vrefact(ndp->ni_rootdir); 383 ndp->ni_topdir = fdp->fd_jdir; 384 385 /* 386 * If we are auditing the kernel pathname, save the user pathname. 387 */ 388 if (cnp->cn_flags & AUDITVNODE1) 389 AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf); 390 if (cnp->cn_flags & AUDITVNODE2) 391 AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf); 392 393 startdir_used = 0; 394 dp = NULL; 395 cnp->cn_nameptr = cnp->cn_pnbuf; 396 if (cnp->cn_pnbuf[0] == '/') { 397 error = namei_handle_root(ndp, &dp); 398 } else { 399 if (ndp->ni_startdir != NULL) { 400 dp = ndp->ni_startdir; 401 startdir_used = 1; 402 } else if (ndp->ni_dirfd == AT_FDCWD) { 403 dp = fdp->fd_cdir; 404 vrefact(dp); 405 } else { 406 rights = ndp->ni_rightsneeded; 407 cap_rights_set(&rights, CAP_LOOKUP); 408 409 if (cnp->cn_flags & AUDITVNODE1) 410 AUDIT_ARG_ATFD1(ndp->ni_dirfd); 411 if (cnp->cn_flags & AUDITVNODE2) 412 AUDIT_ARG_ATFD2(ndp->ni_dirfd); 413 error = fgetvp_rights(td, ndp->ni_dirfd, 414 &rights, &ndp->ni_filecaps, &dp); 415 if (error == EINVAL) 416 error = ENOTDIR; 417 #ifdef CAPABILITIES 418 /* 419 * If file descriptor doesn't have all rights, 420 * all lookups relative to it must also be 421 * strictly relative. 422 */ 423 CAP_ALL(&rights); 424 if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, 425 &rights) || 426 ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || 427 ndp->ni_filecaps.fc_nioctls != -1) { 428 ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; 429 } 430 #endif 431 } 432 if (error == 0 && dp->v_type != VDIR) 433 error = ENOTDIR; 434 } 435 FILEDESC_SUNLOCK(fdp); 436 if (ndp->ni_startdir != NULL && !startdir_used) 437 vrele(ndp->ni_startdir); 438 if (error != 0) { 439 if (dp != NULL) 440 vrele(dp); 441 goto out; 442 } 443 if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 && 444 lookup_cap_dotdot != 0) 445 ndp->ni_lcf |= NI_LCF_CAP_DOTDOT; 446 SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, 447 cnp->cn_flags); 448 for (;;) { 449 ndp->ni_startdir = dp; 450 error = lookup(ndp); 451 if (error != 0) 452 goto out; 453 /* 454 * If not a symbolic link, we're done. 455 */ 456 if ((cnp->cn_flags & ISSYMLINK) == 0) { 457 vrele(ndp->ni_rootdir); 458 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 459 namei_cleanup_cnp(cnp); 460 } else 461 cnp->cn_flags |= HASBUF; 462 nameicap_cleanup(ndp); 463 SDT_PROBE2(vfs, namei, lookup, return, 0, ndp->ni_vp); 464 return (0); 465 } 466 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 467 error = ELOOP; 468 break; 469 } 470 #ifdef MAC 471 if ((cnp->cn_flags & NOMACCHECK) == 0) { 472 error = mac_vnode_check_readlink(td->td_ucred, 473 ndp->ni_vp); 474 if (error != 0) 475 break; 476 } 477 #endif 478 if (ndp->ni_pathlen > 1) 479 cp = uma_zalloc(namei_zone, M_WAITOK); 480 else 481 cp = cnp->cn_pnbuf; 482 aiov.iov_base = cp; 483 aiov.iov_len = MAXPATHLEN; 484 auio.uio_iov = &aiov; 485 auio.uio_iovcnt = 1; 486 auio.uio_offset = 0; 487 auio.uio_rw = UIO_READ; 488 auio.uio_segflg = UIO_SYSSPACE; 489 auio.uio_td = td; 490 auio.uio_resid = MAXPATHLEN; 491 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 492 if (error != 0) { 493 if (ndp->ni_pathlen > 1) 494 uma_zfree(namei_zone, cp); 495 break; 496 } 497 linklen = MAXPATHLEN - auio.uio_resid; 498 if (linklen == 0) { 499 if (ndp->ni_pathlen > 1) 500 uma_zfree(namei_zone, cp); 501 error = ENOENT; 502 break; 503 } 504 if (linklen + ndp->ni_pathlen > MAXPATHLEN) { 505 if (ndp->ni_pathlen > 1) 506 uma_zfree(namei_zone, cp); 507 error = ENAMETOOLONG; 508 break; 509 } 510 if (ndp->ni_pathlen > 1) { 511 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 512 uma_zfree(namei_zone, cnp->cn_pnbuf); 513 cnp->cn_pnbuf = cp; 514 } else 515 cnp->cn_pnbuf[linklen] = '\0'; 516 ndp->ni_pathlen += linklen; 517 vput(ndp->ni_vp); 518 dp = ndp->ni_dvp; 519 /* 520 * Check if root directory should replace current directory. 521 */ 522 cnp->cn_nameptr = cnp->cn_pnbuf; 523 if (*(cnp->cn_nameptr) == '/') { 524 vrele(dp); 525 error = namei_handle_root(ndp, &dp); 526 if (error != 0) 527 goto out; 528 } 529 } 530 vput(ndp->ni_vp); 531 ndp->ni_vp = NULL; 532 vrele(ndp->ni_dvp); 533 out: 534 vrele(ndp->ni_rootdir); 535 namei_cleanup_cnp(cnp); 536 nameicap_cleanup(ndp); 537 SDT_PROBE2(vfs, namei, lookup, return, error, NULL); 538 return (error); 539 } 540 541 static int 542 compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) 543 { 544 545 if (mp == NULL || ((lkflags & LK_SHARED) && 546 (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || 547 ((cnflags & ISDOTDOT) && 548 (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { 549 lkflags &= ~LK_SHARED; 550 lkflags |= LK_EXCLUSIVE; 551 } 552 lkflags |= LK_NODDLKTREAT; 553 return (lkflags); 554 } 555 556 static __inline int 557 needs_exclusive_leaf(struct mount *mp, int flags) 558 { 559 560 /* 561 * Intermediate nodes can use shared locks, we only need to 562 * force an exclusive lock for leaf nodes. 563 */ 564 if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) 565 return (0); 566 567 /* Always use exclusive locks if LOCKSHARED isn't set. */ 568 if (!(flags & LOCKSHARED)) 569 return (1); 570 571 /* 572 * For lookups during open(), if the mount point supports 573 * extended shared operations, then use a shared lock for the 574 * leaf node, otherwise use an exclusive lock. 575 */ 576 if ((flags & ISOPEN) != 0) 577 return (!MNT_EXTENDED_SHARED(mp)); 578 579 /* 580 * Lookup requests outside of open() that specify LOCKSHARED 581 * only need a shared lock on the leaf vnode. 582 */ 583 return (0); 584 } 585 586 /* 587 * Search a pathname. 588 * This is a very central and rather complicated routine. 589 * 590 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 591 * The starting directory is taken from ni_startdir. The pathname is 592 * descended until done, or a symbolic link is encountered. The variable 593 * ni_more is clear if the path is completed; it is set to one if a 594 * symbolic link needing interpretation is encountered. 595 * 596 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 597 * whether the name is to be looked up, created, renamed, or deleted. 598 * When CREATE, RENAME, or DELETE is specified, information usable in 599 * creating, renaming, or deleting a directory entry may be calculated. 600 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 601 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 602 * returned unlocked. Otherwise the parent directory is not returned. If 603 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 604 * the target is returned locked, otherwise it is returned unlocked. 605 * When creating or renaming and LOCKPARENT is specified, the target may not 606 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 607 * 608 * Overall outline of lookup: 609 * 610 * dirloop: 611 * identify next component of name at ndp->ni_ptr 612 * handle degenerate case where name is null string 613 * if .. and crossing mount points and on mounted filesys, find parent 614 * call VOP_LOOKUP routine for next component name 615 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 616 * component vnode returned in ni_vp (if it exists), locked. 617 * if result vnode is mounted on and crossing mount points, 618 * find mounted on vnode 619 * if more components of name, do next level at dirloop 620 * return the answer in ni_vp, locked if LOCKLEAF set 621 * if LOCKPARENT set, return locked parent in ni_dvp 622 * if WANTPARENT set, return unlocked parent in ni_dvp 623 */ 624 int 625 lookup(struct nameidata *ndp) 626 { 627 char *cp; /* pointer into pathname argument */ 628 char *prev_ni_next; /* saved ndp->ni_next */ 629 struct vnode *dp = NULL; /* the directory we are searching */ 630 struct vnode *tdp; /* saved dp */ 631 struct mount *mp; /* mount table entry */ 632 struct prison *pr; 633 size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */ 634 int docache; /* == 0 do not cache last component */ 635 int wantparent; /* 1 => wantparent or lockparent flag */ 636 int rdonly; /* lookup read-only flag bit */ 637 int error = 0; 638 int dpunlocked = 0; /* dp has already been unlocked */ 639 int relookup = 0; /* do not consume the path component */ 640 struct componentname *cnp = &ndp->ni_cnd; 641 int lkflags_save; 642 int ni_dvp_unlocked; 643 644 /* 645 * Setup: break out flag bits into variables. 646 */ 647 ni_dvp_unlocked = 0; 648 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 649 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 650 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 651 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 652 if (cnp->cn_nameiop == DELETE || 653 (wantparent && cnp->cn_nameiop != CREATE && 654 cnp->cn_nameiop != LOOKUP)) 655 docache = 0; 656 rdonly = cnp->cn_flags & RDONLY; 657 cnp->cn_flags &= ~ISSYMLINK; 658 ndp->ni_dvp = NULL; 659 /* 660 * We use shared locks until we hit the parent of the last cn then 661 * we adjust based on the requesting flags. 662 */ 663 if (lookup_shared) 664 cnp->cn_lkflags = LK_SHARED; 665 else 666 cnp->cn_lkflags = LK_EXCLUSIVE; 667 dp = ndp->ni_startdir; 668 ndp->ni_startdir = NULLVP; 669 vn_lock(dp, 670 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, 671 cnp->cn_flags)); 672 673 dirloop: 674 /* 675 * Search a new directory. 676 * 677 * The last component of the filename is left accessible via 678 * cnp->cn_nameptr for callers that need the name. Callers needing 679 * the name set the SAVENAME flag. When done, they assume 680 * responsibility for freeing the pathname buffer. 681 */ 682 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 683 continue; 684 cnp->cn_namelen = cp - cnp->cn_nameptr; 685 if (cnp->cn_namelen > NAME_MAX) { 686 error = ENAMETOOLONG; 687 goto bad; 688 } 689 #ifdef NAMEI_DIAGNOSTIC 690 { char c = *cp; 691 *cp = '\0'; 692 printf("{%s}: ", cnp->cn_nameptr); 693 *cp = c; } 694 #endif 695 prev_ni_pathlen = ndp->ni_pathlen; 696 ndp->ni_pathlen -= cnp->cn_namelen; 697 KASSERT(ndp->ni_pathlen <= PATH_MAX, 698 ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen)); 699 prev_ni_next = ndp->ni_next; 700 ndp->ni_next = cp; 701 702 /* 703 * Replace multiple slashes by a single slash and trailing slashes 704 * by a null. This must be done before VOP_LOOKUP() because some 705 * fs's don't know about trailing slashes. Remember if there were 706 * trailing slashes to handle symlinks, existing non-directories 707 * and non-existing files that won't be directories specially later. 708 */ 709 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 710 cp++; 711 ndp->ni_pathlen--; 712 if (*cp == '\0') { 713 *ndp->ni_next = '\0'; 714 cnp->cn_flags |= TRAILINGSLASH; 715 } 716 } 717 ndp->ni_next = cp; 718 719 cnp->cn_flags |= MAKEENTRY; 720 if (*cp == '\0' && docache == 0) 721 cnp->cn_flags &= ~MAKEENTRY; 722 if (cnp->cn_namelen == 2 && 723 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 724 cnp->cn_flags |= ISDOTDOT; 725 else 726 cnp->cn_flags &= ~ISDOTDOT; 727 if (*ndp->ni_next == 0) 728 cnp->cn_flags |= ISLASTCN; 729 else 730 cnp->cn_flags &= ~ISLASTCN; 731 732 if ((cnp->cn_flags & ISLASTCN) != 0 && 733 cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && 734 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 735 error = EINVAL; 736 goto bad; 737 } 738 739 nameicap_tracker_add(ndp, dp); 740 741 /* 742 * Check for degenerate name (e.g. / or "") 743 * which is a way of talking about a directory, 744 * e.g. like "/." or ".". 745 */ 746 if (cnp->cn_nameptr[0] == '\0') { 747 if (dp->v_type != VDIR) { 748 error = ENOTDIR; 749 goto bad; 750 } 751 if (cnp->cn_nameiop != LOOKUP) { 752 error = EISDIR; 753 goto bad; 754 } 755 if (wantparent) { 756 ndp->ni_dvp = dp; 757 VREF(dp); 758 } 759 ndp->ni_vp = dp; 760 761 if (cnp->cn_flags & AUDITVNODE1) 762 AUDIT_ARG_VNODE1(dp); 763 else if (cnp->cn_flags & AUDITVNODE2) 764 AUDIT_ARG_VNODE2(dp); 765 766 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 767 VOP_UNLOCK(dp, 0); 768 /* XXX This should probably move to the top of function. */ 769 if (cnp->cn_flags & SAVESTART) 770 panic("lookup: SAVESTART"); 771 goto success; 772 } 773 774 /* 775 * Handle "..": five special cases. 776 * 0. If doing a capability lookup and lookup_cap_dotdot is 777 * disabled, return ENOTCAPABLE. 778 * 1. Return an error if this is the last component of 779 * the name and the operation is DELETE or RENAME. 780 * 2. If at root directory (e.g. after chroot) 781 * or at absolute root directory 782 * then ignore it so can't get out. 783 * 3. If this vnode is the root of a mounted 784 * filesystem, then replace it with the 785 * vnode which was mounted on so we take the 786 * .. in the other filesystem. 787 * 4. If the vnode is the top directory of 788 * the jail or chroot, don't let them out. 789 * 5. If doing a capability lookup and lookup_cap_dotdot is 790 * enabled, return ENOTCAPABLE if the lookup would escape 791 * from the initial file descriptor directory. Checks are 792 * done by ensuring that namei() already traversed the 793 * result of dotdot lookup. 794 */ 795 if (cnp->cn_flags & ISDOTDOT) { 796 if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT)) 797 == NI_LCF_STRICTRELATIVE) { 798 #ifdef KTRACE 799 if (KTRPOINT(curthread, KTR_CAPFAIL)) 800 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 801 #endif 802 error = ENOTCAPABLE; 803 goto bad; 804 } 805 if ((cnp->cn_flags & ISLASTCN) != 0 && 806 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 807 error = EINVAL; 808 goto bad; 809 } 810 for (;;) { 811 for (pr = cnp->cn_cred->cr_prison; pr != NULL; 812 pr = pr->pr_parent) 813 if (dp == pr->pr_root) 814 break; 815 if (dp == ndp->ni_rootdir || 816 dp == ndp->ni_topdir || 817 dp == rootvnode || 818 pr != NULL || 819 ((dp->v_vflag & VV_ROOT) != 0 && 820 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 821 ndp->ni_dvp = dp; 822 ndp->ni_vp = dp; 823 VREF(dp); 824 goto nextname; 825 } 826 if ((dp->v_vflag & VV_ROOT) == 0) 827 break; 828 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 829 error = ENOENT; 830 goto bad; 831 } 832 tdp = dp; 833 dp = dp->v_mount->mnt_vnodecovered; 834 VREF(dp); 835 vput(tdp); 836 vn_lock(dp, 837 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 838 LK_RETRY, ISDOTDOT)); 839 error = nameicap_check_dotdot(ndp, dp); 840 if (error != 0) { 841 #ifdef KTRACE 842 if (KTRPOINT(curthread, KTR_CAPFAIL)) 843 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 844 #endif 845 goto bad; 846 } 847 } 848 } 849 850 /* 851 * We now have a segment name to search for, and a directory to search. 852 */ 853 unionlookup: 854 #ifdef MAC 855 if ((cnp->cn_flags & NOMACCHECK) == 0) { 856 error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, 857 cnp); 858 if (error) 859 goto bad; 860 } 861 #endif 862 ndp->ni_dvp = dp; 863 ndp->ni_vp = NULL; 864 ASSERT_VOP_LOCKED(dp, "lookup"); 865 /* 866 * If we have a shared lock we may need to upgrade the lock for the 867 * last operation. 868 */ 869 if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) && 870 dp != vp_crossmp && VOP_ISLOCKED(dp) == LK_SHARED) 871 vn_lock(dp, LK_UPGRADE|LK_RETRY); 872 if ((dp->v_iflag & VI_DOOMED) != 0) { 873 error = ENOENT; 874 goto bad; 875 } 876 /* 877 * If we're looking up the last component and we need an exclusive 878 * lock, adjust our lkflags. 879 */ 880 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) 881 cnp->cn_lkflags = LK_EXCLUSIVE; 882 #ifdef NAMEI_DIAGNOSTIC 883 vn_printf(dp, "lookup in "); 884 #endif 885 lkflags_save = cnp->cn_lkflags; 886 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, 887 cnp->cn_flags); 888 error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp); 889 cnp->cn_lkflags = lkflags_save; 890 if (error != 0) { 891 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 892 #ifdef NAMEI_DIAGNOSTIC 893 printf("not found\n"); 894 #endif 895 if ((error == ENOENT) && 896 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 897 (dp->v_mount->mnt_flag & MNT_UNION)) { 898 tdp = dp; 899 dp = dp->v_mount->mnt_vnodecovered; 900 VREF(dp); 901 vput(tdp); 902 vn_lock(dp, 903 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 904 LK_RETRY, cnp->cn_flags)); 905 nameicap_tracker_add(ndp, dp); 906 goto unionlookup; 907 } 908 909 if (error == ERELOOKUP) { 910 vref(dp); 911 ndp->ni_vp = dp; 912 error = 0; 913 relookup = 1; 914 goto good; 915 } 916 917 if (error != EJUSTRETURN) 918 goto bad; 919 /* 920 * At this point, we know we're at the end of the 921 * pathname. If creating / renaming, we can consider 922 * allowing the file or directory to be created / renamed, 923 * provided we're not on a read-only filesystem. 924 */ 925 if (rdonly) { 926 error = EROFS; 927 goto bad; 928 } 929 /* trailing slash only allowed for directories */ 930 if ((cnp->cn_flags & TRAILINGSLASH) && 931 !(cnp->cn_flags & WILLBEDIR)) { 932 error = ENOENT; 933 goto bad; 934 } 935 if ((cnp->cn_flags & LOCKPARENT) == 0) 936 VOP_UNLOCK(dp, 0); 937 /* 938 * We return with ni_vp NULL to indicate that the entry 939 * doesn't currently exist, leaving a pointer to the 940 * (possibly locked) directory vnode in ndp->ni_dvp. 941 */ 942 if (cnp->cn_flags & SAVESTART) { 943 ndp->ni_startdir = ndp->ni_dvp; 944 VREF(ndp->ni_startdir); 945 } 946 goto success; 947 } 948 949 good: 950 #ifdef NAMEI_DIAGNOSTIC 951 printf("found\n"); 952 #endif 953 dp = ndp->ni_vp; 954 955 /* 956 * Check to see if the vnode has been mounted on; 957 * if so find the root of the mounted filesystem. 958 */ 959 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 960 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 961 if (vfs_busy(mp, 0)) 962 continue; 963 vput(dp); 964 if (dp != ndp->ni_dvp) 965 vput(ndp->ni_dvp); 966 else 967 vrele(ndp->ni_dvp); 968 vrefact(vp_crossmp); 969 ndp->ni_dvp = vp_crossmp; 970 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, 971 cnp->cn_flags), &tdp); 972 vfs_unbusy(mp); 973 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) 974 panic("vp_crossmp exclusively locked or reclaimed"); 975 if (error) { 976 dpunlocked = 1; 977 goto bad2; 978 } 979 ndp->ni_vp = dp = tdp; 980 } 981 982 /* 983 * Check for symbolic link 984 */ 985 if ((dp->v_type == VLNK) && 986 ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || 987 *ndp->ni_next == '/')) { 988 cnp->cn_flags |= ISSYMLINK; 989 if (dp->v_iflag & VI_DOOMED) { 990 /* 991 * We can't know whether the directory was mounted with 992 * NOSYMFOLLOW, so we can't follow safely. 993 */ 994 error = ENOENT; 995 goto bad2; 996 } 997 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 998 error = EACCES; 999 goto bad2; 1000 } 1001 /* 1002 * Symlink code always expects an unlocked dvp. 1003 */ 1004 if (ndp->ni_dvp != ndp->ni_vp) { 1005 VOP_UNLOCK(ndp->ni_dvp, 0); 1006 ni_dvp_unlocked = 1; 1007 } 1008 goto success; 1009 } 1010 1011 nextname: 1012 /* 1013 * Not a symbolic link that we will follow. Continue with the 1014 * next component if there is any; otherwise, we're done. 1015 */ 1016 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 1017 ("lookup: invalid path state.")); 1018 if (relookup) { 1019 relookup = 0; 1020 ndp->ni_pathlen = prev_ni_pathlen; 1021 ndp->ni_next = prev_ni_next; 1022 if (ndp->ni_dvp != dp) 1023 vput(ndp->ni_dvp); 1024 else 1025 vrele(ndp->ni_dvp); 1026 goto dirloop; 1027 } 1028 if (cnp->cn_flags & ISDOTDOT) { 1029 error = nameicap_check_dotdot(ndp, ndp->ni_vp); 1030 if (error != 0) { 1031 #ifdef KTRACE 1032 if (KTRPOINT(curthread, KTR_CAPFAIL)) 1033 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 1034 #endif 1035 goto bad2; 1036 } 1037 } 1038 if (*ndp->ni_next == '/') { 1039 cnp->cn_nameptr = ndp->ni_next; 1040 while (*cnp->cn_nameptr == '/') { 1041 cnp->cn_nameptr++; 1042 ndp->ni_pathlen--; 1043 } 1044 if (ndp->ni_dvp != dp) 1045 vput(ndp->ni_dvp); 1046 else 1047 vrele(ndp->ni_dvp); 1048 goto dirloop; 1049 } 1050 /* 1051 * If we're processing a path with a trailing slash, 1052 * check that the end result is a directory. 1053 */ 1054 if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { 1055 error = ENOTDIR; 1056 goto bad2; 1057 } 1058 /* 1059 * Disallow directory write attempts on read-only filesystems. 1060 */ 1061 if (rdonly && 1062 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1063 error = EROFS; 1064 goto bad2; 1065 } 1066 if (cnp->cn_flags & SAVESTART) { 1067 ndp->ni_startdir = ndp->ni_dvp; 1068 VREF(ndp->ni_startdir); 1069 } 1070 if (!wantparent) { 1071 ni_dvp_unlocked = 2; 1072 if (ndp->ni_dvp != dp) 1073 vput(ndp->ni_dvp); 1074 else 1075 vrele(ndp->ni_dvp); 1076 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) { 1077 VOP_UNLOCK(ndp->ni_dvp, 0); 1078 ni_dvp_unlocked = 1; 1079 } 1080 1081 if (cnp->cn_flags & AUDITVNODE1) 1082 AUDIT_ARG_VNODE1(dp); 1083 else if (cnp->cn_flags & AUDITVNODE2) 1084 AUDIT_ARG_VNODE2(dp); 1085 1086 if ((cnp->cn_flags & LOCKLEAF) == 0) 1087 VOP_UNLOCK(dp, 0); 1088 success: 1089 /* 1090 * Because of lookup_shared we may have the vnode shared locked, but 1091 * the caller may want it to be exclusively locked. 1092 */ 1093 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && 1094 VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { 1095 vn_lock(dp, LK_UPGRADE | LK_RETRY); 1096 if (dp->v_iflag & VI_DOOMED) { 1097 error = ENOENT; 1098 goto bad2; 1099 } 1100 } 1101 return (0); 1102 1103 bad2: 1104 if (ni_dvp_unlocked != 2) { 1105 if (dp != ndp->ni_dvp && !ni_dvp_unlocked) 1106 vput(ndp->ni_dvp); 1107 else 1108 vrele(ndp->ni_dvp); 1109 } 1110 bad: 1111 if (!dpunlocked) 1112 vput(dp); 1113 ndp->ni_vp = NULL; 1114 return (error); 1115 } 1116 1117 /* 1118 * relookup - lookup a path name component 1119 * Used by lookup to re-acquire things. 1120 */ 1121 int 1122 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 1123 { 1124 struct vnode *dp = NULL; /* the directory we are searching */ 1125 int wantparent; /* 1 => wantparent or lockparent flag */ 1126 int rdonly; /* lookup read-only flag bit */ 1127 int error = 0; 1128 1129 KASSERT(cnp->cn_flags & ISLASTCN, 1130 ("relookup: Not given last component.")); 1131 /* 1132 * Setup: break out flag bits into variables. 1133 */ 1134 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 1135 KASSERT(wantparent, ("relookup: parent not wanted.")); 1136 rdonly = cnp->cn_flags & RDONLY; 1137 cnp->cn_flags &= ~ISSYMLINK; 1138 dp = dvp; 1139 cnp->cn_lkflags = LK_EXCLUSIVE; 1140 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 1141 1142 /* 1143 * Search a new directory. 1144 * 1145 * The last component of the filename is left accessible via 1146 * cnp->cn_nameptr for callers that need the name. Callers needing 1147 * the name set the SAVENAME flag. When done, they assume 1148 * responsibility for freeing the pathname buffer. 1149 */ 1150 #ifdef NAMEI_DIAGNOSTIC 1151 printf("{%s}: ", cnp->cn_nameptr); 1152 #endif 1153 1154 /* 1155 * Check for "" which represents the root directory after slash 1156 * removal. 1157 */ 1158 if (cnp->cn_nameptr[0] == '\0') { 1159 /* 1160 * Support only LOOKUP for "/" because lookup() 1161 * can't succeed for CREATE, DELETE and RENAME. 1162 */ 1163 KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP")); 1164 KASSERT(dp->v_type == VDIR, ("dp is not a directory")); 1165 1166 if (!(cnp->cn_flags & LOCKLEAF)) 1167 VOP_UNLOCK(dp, 0); 1168 *vpp = dp; 1169 /* XXX This should probably move to the top of function. */ 1170 if (cnp->cn_flags & SAVESTART) 1171 panic("lookup: SAVESTART"); 1172 return (0); 1173 } 1174 1175 if (cnp->cn_flags & ISDOTDOT) 1176 panic ("relookup: lookup on dot-dot"); 1177 1178 /* 1179 * We now have a segment name to search for, and a directory to search. 1180 */ 1181 #ifdef NAMEI_DIAGNOSTIC 1182 vn_printf(dp, "search in "); 1183 #endif 1184 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 1185 KASSERT(*vpp == NULL, ("leaf should be empty")); 1186 if (error != EJUSTRETURN) 1187 goto bad; 1188 /* 1189 * If creating and at end of pathname, then can consider 1190 * allowing file to be created. 1191 */ 1192 if (rdonly) { 1193 error = EROFS; 1194 goto bad; 1195 } 1196 /* ASSERT(dvp == ndp->ni_startdir) */ 1197 if (cnp->cn_flags & SAVESTART) 1198 VREF(dvp); 1199 if ((cnp->cn_flags & LOCKPARENT) == 0) 1200 VOP_UNLOCK(dp, 0); 1201 /* 1202 * We return with ni_vp NULL to indicate that the entry 1203 * doesn't currently exist, leaving a pointer to the 1204 * (possibly locked) directory vnode in ndp->ni_dvp. 1205 */ 1206 return (0); 1207 } 1208 1209 dp = *vpp; 1210 1211 /* 1212 * Disallow directory write attempts on read-only filesystems. 1213 */ 1214 if (rdonly && 1215 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1216 if (dvp == dp) 1217 vrele(dvp); 1218 else 1219 vput(dvp); 1220 error = EROFS; 1221 goto bad; 1222 } 1223 /* 1224 * Set the parent lock/ref state to the requested state. 1225 */ 1226 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 1227 if (wantparent) 1228 VOP_UNLOCK(dvp, 0); 1229 else 1230 vput(dvp); 1231 } else if (!wantparent) 1232 vrele(dvp); 1233 /* 1234 * Check for symbolic link 1235 */ 1236 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 1237 ("relookup: symlink found.\n")); 1238 1239 /* ASSERT(dvp == ndp->ni_startdir) */ 1240 if (cnp->cn_flags & SAVESTART) 1241 VREF(dvp); 1242 1243 if ((cnp->cn_flags & LOCKLEAF) == 0) 1244 VOP_UNLOCK(dp, 0); 1245 return (0); 1246 bad: 1247 vput(dp); 1248 *vpp = NULL; 1249 return (error); 1250 } 1251 1252 void 1253 NDINIT_ALL(struct nameidata *ndp, u_long op, u_long flags, enum uio_seg segflg, 1254 const char *namep, int dirfd, struct vnode *startdir, cap_rights_t *rightsp, 1255 struct thread *td) 1256 { 1257 1258 ndp->ni_cnd.cn_nameiop = op; 1259 ndp->ni_cnd.cn_flags = flags; 1260 ndp->ni_segflg = segflg; 1261 ndp->ni_dirp = namep; 1262 ndp->ni_dirfd = dirfd; 1263 ndp->ni_startdir = startdir; 1264 if (rightsp != NULL) 1265 ndp->ni_rightsneeded = *rightsp; 1266 else 1267 cap_rights_init(&ndp->ni_rightsneeded); 1268 filecaps_init(&ndp->ni_filecaps); 1269 ndp->ni_cnd.cn_thread = td; 1270 } 1271 1272 /* 1273 * Free data allocated by namei(); see namei(9) for details. 1274 */ 1275 void 1276 NDFREE(struct nameidata *ndp, const u_int flags) 1277 { 1278 int unlock_dvp; 1279 int unlock_vp; 1280 1281 unlock_dvp = 0; 1282 unlock_vp = 0; 1283 1284 if (!(flags & NDF_NO_FREE_PNBUF) && 1285 (ndp->ni_cnd.cn_flags & HASBUF)) { 1286 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 1287 ndp->ni_cnd.cn_flags &= ~HASBUF; 1288 } 1289 if (!(flags & NDF_NO_VP_UNLOCK) && 1290 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 1291 unlock_vp = 1; 1292 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 1293 if (unlock_vp) { 1294 vput(ndp->ni_vp); 1295 unlock_vp = 0; 1296 } else 1297 vrele(ndp->ni_vp); 1298 ndp->ni_vp = NULL; 1299 } 1300 if (unlock_vp) 1301 VOP_UNLOCK(ndp->ni_vp, 0); 1302 if (!(flags & NDF_NO_DVP_UNLOCK) && 1303 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 1304 ndp->ni_dvp != ndp->ni_vp) 1305 unlock_dvp = 1; 1306 if (!(flags & NDF_NO_DVP_RELE) && 1307 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 1308 if (unlock_dvp) { 1309 vput(ndp->ni_dvp); 1310 unlock_dvp = 0; 1311 } else 1312 vrele(ndp->ni_dvp); 1313 ndp->ni_dvp = NULL; 1314 } 1315 if (unlock_dvp) 1316 VOP_UNLOCK(ndp->ni_dvp, 0); 1317 if (!(flags & NDF_NO_STARTDIR_RELE) && 1318 (ndp->ni_cnd.cn_flags & SAVESTART)) { 1319 vrele(ndp->ni_startdir); 1320 ndp->ni_startdir = NULL; 1321 } 1322 } 1323 1324 /* 1325 * Determine if there is a suitable alternate filename under the specified 1326 * prefix for the specified path. If the create flag is set, then the 1327 * alternate prefix will be used so long as the parent directory exists. 1328 * This is used by the various compatibility ABIs so that Linux binaries prefer 1329 * files under /compat/linux for example. The chosen path (whether under 1330 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 1331 * to by pathbuf. The caller is responsible for free'ing the buffer from 1332 * the M_TEMP bucket if one is returned. 1333 */ 1334 int 1335 kern_alternate_path(struct thread *td, const char *prefix, const char *path, 1336 enum uio_seg pathseg, char **pathbuf, int create, int dirfd) 1337 { 1338 struct nameidata nd, ndroot; 1339 char *ptr, *buf, *cp; 1340 size_t len, sz; 1341 int error; 1342 1343 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1344 *pathbuf = buf; 1345 1346 /* Copy the prefix into the new pathname as a starting point. */ 1347 len = strlcpy(buf, prefix, MAXPATHLEN); 1348 if (len >= MAXPATHLEN) { 1349 *pathbuf = NULL; 1350 free(buf, M_TEMP); 1351 return (EINVAL); 1352 } 1353 sz = MAXPATHLEN - len; 1354 ptr = buf + len; 1355 1356 /* Append the filename to the prefix. */ 1357 if (pathseg == UIO_SYSSPACE) 1358 error = copystr(path, ptr, sz, &len); 1359 else 1360 error = copyinstr(path, ptr, sz, &len); 1361 1362 if (error) { 1363 *pathbuf = NULL; 1364 free(buf, M_TEMP); 1365 return (error); 1366 } 1367 1368 /* Only use a prefix with absolute pathnames. */ 1369 if (*ptr != '/') { 1370 error = EINVAL; 1371 goto keeporig; 1372 } 1373 1374 if (dirfd != AT_FDCWD) { 1375 /* 1376 * We want the original because the "prefix" is 1377 * included in the already opened dirfd. 1378 */ 1379 bcopy(ptr, buf, len); 1380 return (0); 1381 } 1382 1383 /* 1384 * We know that there is a / somewhere in this pathname. 1385 * Search backwards for it, to find the file's parent dir 1386 * to see if it exists in the alternate tree. If it does, 1387 * and we want to create a file (cflag is set). We don't 1388 * need to worry about the root comparison in this case. 1389 */ 1390 1391 if (create) { 1392 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1393 *cp = '\0'; 1394 1395 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td); 1396 error = namei(&nd); 1397 *cp = '/'; 1398 if (error != 0) 1399 goto keeporig; 1400 } else { 1401 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td); 1402 1403 error = namei(&nd); 1404 if (error != 0) 1405 goto keeporig; 1406 1407 /* 1408 * We now compare the vnode of the prefix to the one 1409 * vnode asked. If they resolve to be the same, then we 1410 * ignore the match so that the real root gets used. 1411 * This avoids the problem of traversing "../.." to find the 1412 * root directory and never finding it, because "/" resolves 1413 * to the emulation root directory. This is expensive :-( 1414 */ 1415 NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix, 1416 td); 1417 1418 /* We shouldn't ever get an error from this namei(). */ 1419 error = namei(&ndroot); 1420 if (error == 0) { 1421 if (nd.ni_vp == ndroot.ni_vp) 1422 error = ENOENT; 1423 1424 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1425 vrele(ndroot.ni_vp); 1426 } 1427 } 1428 1429 NDFREE(&nd, NDF_ONLY_PNBUF); 1430 vrele(nd.ni_vp); 1431 1432 keeporig: 1433 /* If there was an error, use the original path name. */ 1434 if (error) 1435 bcopy(ptr, buf, len); 1436 return (error); 1437 } 1438