xref: /titanic_44/usr/src/uts/common/fs/lookup.c (revision 09f67678c27dda8a89f87f1f408a87dd49ceb0e1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/cpuvar.h>
47 #include <sys/errno.h>
48 #include <sys/cred.h>
49 #include <sys/user.h>
50 #include <sys/uio.h>
51 #include <sys/vfs.h>
52 #include <sys/vnode.h>
53 #include <sys/pathname.h>
54 #include <sys/proc.h>
55 #include <sys/vtrace.h>
56 #include <sys/sysmacros.h>
57 #include <sys/debug.h>
58 #include <sys/dirent.h>
59 #include <c2/audit.h>
60 #include <sys/zone.h>
61 #include <sys/dnlc.h>
62 #include <sys/fs/snode.h>
63 
64 /* Controls whether paths are stored with vnodes. */
65 int vfs_vnode_path = 1;
66 
67 int
68 lookupname(
69 	char *fnamep,
70 	enum uio_seg seg,
71 	enum symfollow followlink,
72 	vnode_t **dirvpp,
73 	vnode_t **compvpp)
74 {
75 	return (lookupnameat(fnamep, seg, followlink, dirvpp, compvpp, NULL));
76 }
77 
78 
79 /*
80  * Lookup the user file name,
81  * Handle allocation and freeing of pathname buffer, return error.
82  */
83 int
84 lookupnameat(
85 	char *fnamep,			/* user pathname */
86 	enum uio_seg seg,		/* addr space that name is in */
87 	enum symfollow followlink,	/* follow sym links */
88 	vnode_t **dirvpp,		/* ret for ptr to parent dir vnode */
89 	vnode_t **compvpp,		/* ret for ptr to component vnode */
90 	vnode_t *startvp)		/* start path search from vp */
91 {
92 	char namebuf[TYPICALMAXPATHLEN];
93 	struct pathname lookpn;
94 	int error;
95 
96 	error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf));
97 	if (error == 0) {
98 #ifdef C2_AUDIT
99 		if (audit_active)
100 			audit_lookupname();
101 #endif
102 		error = lookuppnat(&lookpn, NULL, followlink,
103 		    dirvpp, compvpp, startvp);
104 	}
105 	if (error == ENAMETOOLONG) {
106 		/*
107 		 * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
108 		 */
109 		if (error = pn_get(fnamep, seg, &lookpn))
110 			return (error);
111 		error = lookuppnat(&lookpn, NULL, followlink,
112 		    dirvpp, compvpp, startvp);
113 		pn_free(&lookpn);
114 	}
115 
116 	return (error);
117 }
118 
119 /*
120  * Lookup the user file name from a given vp,
121  */
122 int
123 lookuppn(
124 	struct pathname *pnp,
125 	struct pathname *rpnp,
126 	enum symfollow followlink,
127 	vnode_t **dirvpp,
128 	vnode_t **compvpp)
129 {
130 	return (lookuppnat(pnp, rpnp, followlink, dirvpp, compvpp, NULL));
131 }
132 
133 int
134 lookuppnat(
135 	struct pathname *pnp,		/* pathname to lookup */
136 	struct pathname *rpnp,		/* if non-NULL, return resolved path */
137 	enum symfollow followlink,	/* (don't) follow sym links */
138 	vnode_t **dirvpp,		/* ptr for parent vnode */
139 	vnode_t **compvpp,		/* ptr for entry vnode */
140 	vnode_t *startvp)		/* start search from this vp */
141 {
142 	vnode_t *vp;	/* current directory vp */
143 	vnode_t *rootvp;
144 	proc_t *p = curproc;
145 
146 	if (pnp->pn_pathlen == 0)
147 		return (ENOENT);
148 
149 	mutex_enter(&p->p_lock);	/* for u_rdir and u_cdir */
150 	if ((rootvp = PTOU(p)->u_rdir) == NULL)
151 		rootvp = rootdir;
152 	else if (rootvp != rootdir)	/* no need to VN_HOLD rootdir */
153 		VN_HOLD(rootvp);
154 
155 	if (pnp->pn_path[0] == '/') {
156 		vp = rootvp;
157 	} else {
158 		vp = (startvp == NULL) ? PTOU(p)->u_cdir : startvp;
159 	}
160 	VN_HOLD(vp);
161 	mutex_exit(&p->p_lock);
162 
163 	/*
164 	 * Skip over leading slashes
165 	 */
166 	if (pnp->pn_path[0] == '/') {
167 		do {
168 			pnp->pn_path++;
169 			pnp->pn_pathlen--;
170 		} while (pnp->pn_path[0] == '/');
171 	}
172 
173 	return (lookuppnvp(pnp, rpnp, followlink, dirvpp,
174 	    compvpp, rootvp, vp, CRED()));
175 }
176 
177 /* Private flag to do our getcwd() dirty work */
178 #define	LOOKUP_CHECKREAD	0x10
179 #define	LOOKUP_MASK		(~LOOKUP_CHECKREAD)
180 
181 /*
182  * Starting at current directory, translate pathname pnp to end.
183  * Leave pathname of final component in pnp, return the vnode
184  * for the final component in *compvpp, and return the vnode
185  * for the parent of the final component in dirvpp.
186  *
187  * This is the central routine in pathname translation and handles
188  * multiple components in pathnames, separating them at /'s.  It also
189  * implements mounted file systems and processes symbolic links.
190  *
191  * vp is the vnode where the directory search should start.
192  *
193  * Reference counts: vp must be held prior to calling this function.  rootvp
194  * should only be held if rootvp != rootdir.
195  */
196 int
197 lookuppnvp(
198 	struct pathname *pnp,		/* pathname to lookup */
199 	struct pathname *rpnp,		/* if non-NULL, return resolved path */
200 	int flags,			/* follow symlinks */
201 	vnode_t **dirvpp,		/* ptr for parent vnode */
202 	vnode_t **compvpp,		/* ptr for entry vnode */
203 	vnode_t *rootvp,		/* rootvp */
204 	vnode_t *vp,			/* directory to start search at */
205 	cred_t *cr)			/* user's credential */
206 {
207 	vnode_t *cvp;	/* current component vp */
208 	vnode_t *tvp;	/* addressable temp ptr */
209 	char component[MAXNAMELEN];	/* buffer for component (incl null) */
210 	int error;
211 	int nlink;
212 	int lookup_flags;
213 	vnode_t *startvp;
214 	vnode_t *zonevp = curproc->p_zone->zone_rootvp;		/* zone root */
215 	int must_be_directory = 0;
216 	size_t plen;
217 
218 	CPU_STATS_ADDQ(CPU, sys, namei, 1);
219 	nlink = 0;
220 	cvp = NULL;
221 	if (rpnp)
222 		rpnp->pn_pathlen = 0;
223 	lookup_flags = dirvpp ? LOOKUP_DIR : 0;
224 #ifdef C2_AUDIT
225 	if (audit_active)
226 		audit_anchorpath(pnp, vp == rootvp);
227 #endif
228 
229 	/*
230 	 * Eliminate any trailing slashes in the pathname.
231 	 * If there are any, we must follow all symlinks.
232 	 * Also, we must guarantee that the last component is a directory.
233 	 */
234 	if (pn_fixslash(pnp)) {
235 		flags |= FOLLOW;
236 		must_be_directory = 1;
237 	}
238 
239 	startvp = vp;
240 next:
241 	/*
242 	 * Make sure we have a directory.
243 	 */
244 	if (vp->v_type != VDIR) {
245 		error = ENOTDIR;
246 		goto bad;
247 	}
248 
249 	if (rpnp && VN_CMP(vp, rootvp))
250 		(void) pn_set(rpnp, "/");
251 
252 	/*
253 	 * Process the next component of the pathname.
254 	 */
255 	if (error = pn_getcomponent(pnp, component)) {
256 #ifdef C2_AUDIT
257 		if (audit_active)
258 			audit_addcomponent(pnp);
259 #endif
260 		goto bad;
261 	}
262 
263 	/*
264 	 * Handle "..": two special cases.
265 	 * 1. If we're at the root directory (e.g. after chroot or
266 	 *    zone_enter) then change ".." to "." so we can't get
267 	 *    out of this subtree.
268 	 * 2. If this vnode is the root of a mounted file system,
269 	 *    then replace it with the vnode that was mounted on
270 	 *    so that we take the ".." in the other file system.
271 	 */
272 	if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
273 checkforroot:
274 		if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) {
275 			component[1] = '\0';
276 		} else if (vp->v_flag & VROOT) {
277 			vfs_t *vfsp;
278 			cvp = vp;
279 
280 			/*
281 			 * While we deal with the vfs pointer from the vnode
282 			 * the filesystem could have been forcefully unmounted
283 			 * and the vnode's v_vfsp could have been invalidated
284 			 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
285 			 * with vfs_rlock_wait/vfs_unlock.
286 			 * It is safe to use the v_vfsp even it is freed by
287 			 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
288 			 * do not dereference v_vfsp. It is just used as a
289 			 * magic cookie.
290 			 * One more corner case here is the memory getting
291 			 * reused for another vfs structure. In this case
292 			 * lookuppnvp's vfs_rlock_wait will succeed, domount's
293 			 * vfs_lock will fail and domount will bail out with an
294 			 * error (EBUSY).
295 			 */
296 			vfsp = cvp->v_vfsp;
297 
298 			/*
299 			 * This lock is used to synchronize
300 			 * mounts/unmounts and lookups.
301 			 * Threads doing mounts/unmounts hold the
302 			 * writers version vfs_lock_wait().
303 			 */
304 
305 			vfs_rlock_wait(vfsp);
306 
307 			/*
308 			 * If this vnode is on a file system that
309 			 * has been forcibly unmounted,
310 			 * we can't proceed. Cancel this operation
311 			 * and return EIO.
312 			 *
313 			 * vfs_vnodecovered is NULL if unmounted.
314 			 * Currently, nfs uses VFS_UNMOUNTED to
315 			 * check if it's a forced-umount. Keep the
316 			 * same checking here as well even though it
317 			 * may not be needed.
318 			 */
319 			if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
320 			    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
321 				vfs_unlock(vfsp);
322 				VN_RELE(cvp);
323 				return (EIO);
324 			}
325 			VN_HOLD(vp);
326 			vfs_unlock(vfsp);
327 			VN_RELE(cvp);
328 			cvp = NULL;
329 			goto checkforroot;
330 		}
331 	}
332 
333 	/*
334 	 * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate
335 	 * that we need to have read permission on every directory in the entire
336 	 * path.  This is used to ensure that a forward-lookup of a cached value
337 	 * has the same effect as a reverse-lookup when the cached value cannot
338 	 * be found.
339 	 */
340 	if ((flags & LOOKUP_CHECKREAD) &&
341 	    (error = VOP_ACCESS(vp, VREAD, 0, cr)) != 0)
342 		goto bad;
343 
344 	/*
345 	 * Perform a lookup in the current directory.
346 	 */
347 	error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
348 		rootvp, cr);
349 	cvp = tvp;
350 	if (error) {
351 		cvp = NULL;
352 		/*
353 		 * On error, return hard error if
354 		 * (a) we're not at the end of the pathname yet, or
355 		 * (b) the caller didn't want the parent directory, or
356 		 * (c) we failed for some reason other than a missing entry.
357 		 */
358 		if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
359 			goto bad;
360 #ifdef C2_AUDIT
361 		if (audit_active) {	/* directory access */
362 			if (error = audit_savepath(pnp, vp, error, cr))
363 				goto bad_noaudit;
364 		}
365 #endif
366 		pn_setlast(pnp);
367 		/*
368 		 * We inform the caller that the desired entry must be
369 		 * a directory by adding a '/' to the component name.
370 		 */
371 		if (must_be_directory && (error = pn_addslash(pnp)) != 0)
372 			goto bad;
373 		*dirvpp = vp;
374 		/*
375 		 * We cache the path of everything up to right before this
376 		 * component and store that in the parent directory.
377 		 */
378 		if (vfs_vnode_path && pnp->pn_path != pnp->pn_buf) {
379 			VN_SETPATH(rootvp, startvp, vp, pnp->pn_buf,
380 			    pnp->pn_path - pnp->pn_buf);
381 		}
382 		if (compvpp != NULL)
383 			*compvpp = NULL;
384 		if (rootvp != rootdir)
385 			VN_RELE(rootvp);
386 		return (0);
387 	}
388 
389 	/*
390 	 * Traverse mount points.
391 	 * XXX why don't we need to hold a read lock here (call vn_vfsrlock)?
392 	 * What prevents a concurrent update to v_vfsmountedhere?
393 	 * 	Possible answer: if mounting, we might not see the mount
394 	 *	if it is concurrently coming into existence, but that's
395 	 *	really not much different from the thread running a bit slower.
396 	 *	If unmounting, we may get into traverse() when we shouldn't,
397 	 *	but traverse() will catch this case for us.
398 	 *	(For this to work, fetching v_vfsmountedhere had better
399 	 *	be atomic!)
400 	 */
401 	if (vn_mountedvfs(cvp) != NULL) {
402 		tvp = cvp;
403 		if ((error = traverse(&tvp)) != 0) {
404 			/*
405 			 * It is required to assign cvp here, because
406 			 * traverse() will return a held vnode which
407 			 * may different than the vnode that was passed
408 			 * in (even in the error case).  If traverse()
409 			 * changes the vnode it releases the original,
410 			 * and holds the new one.
411 			 */
412 			cvp = tvp;
413 			goto bad;
414 		}
415 		cvp = tvp;
416 	}
417 
418 	/*
419 	 * If we hit a symbolic link and there is more path to be
420 	 * translated or this operation does not wish to apply
421 	 * to a link, then place the contents of the link at the
422 	 * front of the remaining pathname.
423 	 */
424 	if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
425 		struct pathname linkpath;
426 #ifdef C2_AUDIT
427 		if (audit_active) {
428 			if (error = audit_pathcomp(pnp, cvp, cr))
429 				goto bad;
430 		}
431 #endif
432 
433 		if (++nlink > MAXSYMLINKS) {
434 			error = ELOOP;
435 			goto bad;
436 		}
437 		pn_alloc(&linkpath);
438 		if (error = pn_getsymlink(cvp, &linkpath, cr)) {
439 			pn_free(&linkpath);
440 			goto bad;
441 		}
442 
443 #ifdef C2_AUDIT
444 		if (audit_active)
445 			audit_symlink(pnp, &linkpath);
446 #endif /* C2_AUDIT */
447 
448 		if (pn_pathleft(&linkpath) == 0)
449 			(void) pn_set(&linkpath, ".");
450 		error = pn_insert(pnp, &linkpath, strlen(component));
451 		pn_free(&linkpath);
452 		if (error)
453 			goto bad;
454 		VN_RELE(cvp);
455 		cvp = NULL;
456 		if (pnp->pn_pathlen == 0) {
457 			error = ENOENT;
458 			goto bad;
459 		}
460 		if (pnp->pn_path[0] == '/') {
461 			do {
462 				pnp->pn_path++;
463 				pnp->pn_pathlen--;
464 			} while (pnp->pn_path[0] == '/');
465 			VN_RELE(vp);
466 			vp = rootvp;
467 			VN_HOLD(vp);
468 		}
469 #ifdef C2_AUDIT
470 		if (audit_active)
471 			audit_anchorpath(pnp, vp == rootvp);
472 #endif
473 		if (pn_fixslash(pnp)) {
474 			flags |= FOLLOW;
475 			must_be_directory = 1;
476 		}
477 		goto next;
478 	}
479 
480 	/*
481 	 * If rpnp is non-NULL, remember the resolved path name therein.
482 	 * Do not include "." components.  Collapse occurrences of
483 	 * "previous/..", so long as "previous" is not itself "..".
484 	 * Exhausting rpnp results in error ENAMETOOLONG.
485 	 */
486 	if (rpnp && strcmp(component, ".") != 0) {
487 		size_t len;
488 
489 		if (strcmp(component, "..") == 0 &&
490 		    rpnp->pn_pathlen != 0 &&
491 		    !((rpnp->pn_pathlen > 2 &&
492 		    strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
493 		    (rpnp->pn_pathlen == 2 &&
494 		    strncmp(rpnp->pn_path, "..", 2) == 0))) {
495 			while (rpnp->pn_pathlen &&
496 			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
497 				rpnp->pn_pathlen--;
498 			if (rpnp->pn_pathlen > 1)
499 				rpnp->pn_pathlen--;
500 			rpnp->pn_path[rpnp->pn_pathlen] = '\0';
501 		} else {
502 			if (rpnp->pn_pathlen != 0 &&
503 			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
504 				rpnp->pn_path[rpnp->pn_pathlen++] = '/';
505 			error = copystr(component,
506 			    rpnp->pn_path + rpnp->pn_pathlen,
507 			    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
508 			if (error)	/* copystr() returns ENAMETOOLONG */
509 				goto bad;
510 			rpnp->pn_pathlen += (len - 1);
511 			ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
512 		}
513 	}
514 
515 	/*
516 	 * If no more components, return last directory (if wanted) and
517 	 * last component (if wanted).
518 	 */
519 	if (pn_pathleft(pnp) == 0) {
520 		/*
521 		 * If there was a trailing slash in the pathname,
522 		 * make sure the last component is a directory.
523 		 */
524 		if (must_be_directory && cvp->v_type != VDIR) {
525 			error = ENOTDIR;
526 			goto bad;
527 		}
528 		if (dirvpp != NULL) {
529 			/*
530 			 * Check that we have the real parent and not
531 			 * an alias of the last component.
532 			 */
533 			if (vn_compare(vp, cvp)) {
534 #ifdef C2_AUDIT
535 				if (audit_active)
536 					(void) audit_savepath(pnp, cvp,
537 						EINVAL, cr);
538 #endif
539 				pn_setlast(pnp);
540 				VN_RELE(vp);
541 				VN_RELE(cvp);
542 				if (rootvp != rootdir)
543 					VN_RELE(rootvp);
544 				return (EINVAL);
545 			}
546 #ifdef C2_AUDIT
547 			if (audit_active) {
548 				if (error = audit_pathcomp(pnp, vp, cr))
549 					goto bad;
550 			}
551 #endif
552 			*dirvpp = vp;
553 		} else
554 			VN_RELE(vp);
555 #ifdef C2_AUDIT
556 		if (audit_active)
557 			(void) audit_savepath(pnp, cvp, 0, cr);
558 #endif
559 		if (pnp->pn_path == pnp->pn_buf)
560 			(void) pn_set(pnp, ".");
561 		else
562 			pn_setlast(pnp);
563 		if (rpnp) {
564 			if (VN_CMP(cvp, rootvp))
565 				(void) pn_set(rpnp, "/");
566 			else if (rpnp->pn_pathlen == 0)
567 				(void) pn_set(rpnp, ".");
568 		}
569 
570 		/*
571 		 * Store the path for this vnode and/or its parent.
572 		 */
573 		if (vfs_vnode_path) {
574 			plen = pnp->pn_path - pnp->pn_buf;
575 			if (dirvpp != NULL && plen != 0)
576 				VN_SETPATH(rootvp, startvp, *dirvpp,
577 				    pnp->pn_buf, plen);
578 			VN_SETPATH(rootvp, startvp, cvp, pnp->pn_buf,
579 			    plen + pnp->pn_pathlen);
580 		}
581 
582 		if (compvpp != NULL)
583 			*compvpp = cvp;
584 		else
585 			VN_RELE(cvp);
586 		if (rootvp != rootdir)
587 			VN_RELE(rootvp);
588 		return (0);
589 	}
590 
591 #ifdef C2_AUDIT
592 	if (audit_active) {
593 		if (error = audit_pathcomp(pnp, cvp, cr))
594 			goto bad;
595 	}
596 #endif
597 
598 	/*
599 	 * Skip over slashes from end of last component.
600 	 */
601 	while (pnp->pn_path[0] == '/') {
602 		pnp->pn_path++;
603 		pnp->pn_pathlen--;
604 	}
605 
606 	/*
607 	 * Searched through another level of directory:
608 	 * release previous directory handle and save new (result
609 	 * of lookup) as current directory.
610 	 */
611 	VN_RELE(vp);
612 	vp = cvp;
613 	cvp = NULL;
614 	goto next;
615 
616 bad:
617 #ifdef C2_AUDIT
618 	if (audit_active)	/* reached end of path */
619 		(void) audit_savepath(pnp, cvp, error, cr);
620 bad_noaudit:
621 #endif
622 	/*
623 	 * Error.  Release vnodes and return.
624 	 */
625 	if (cvp)
626 		VN_RELE(cvp);
627 	/*
628 	 * If the error was ESTALE and the current directory to look in
629 	 * was the root for this lookup, the root for a mounted file
630 	 * system, or the starting directory for lookups, then
631 	 * return ENOENT instead of ESTALE.  In this case, no recovery
632 	 * is possible by the higher level.  If ESTALE was returned for
633 	 * some intermediate directory along the path, then recovery
634 	 * is potentially possible and retrying from the higher level
635 	 * will either correct the situation by purging stale cache
636 	 * entries or eventually get back to the point where no recovery
637 	 * is possible.
638 	 */
639 	if (error == ESTALE &&
640 	    (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
641 		error = ENOENT;
642 	VN_RELE(vp);
643 	if (rootvp != rootdir)
644 		VN_RELE(rootvp);
645 	return (error);
646 }
647 
648 /*
649  * Traverse a mount point.  Routine accepts a vnode pointer as a reference
650  * parameter and performs the indirection, releasing the original vnode.
651  */
652 int
653 traverse(vnode_t **cvpp)
654 {
655 	int error = 0;
656 	vnode_t *cvp;
657 	vnode_t *tvp;
658 	vfs_t *vfsp;
659 
660 	cvp = *cvpp;
661 
662 	/*
663 	 * If this vnode is mounted on, then we transparently indirect
664 	 * to the vnode which is the root of the mounted file system.
665 	 * Before we do this we must check that an unmount is not in
666 	 * progress on this vnode.
667 	 */
668 
669 	for (;;) {
670 		/*
671 		 * Try to read lock the vnode.  If this fails because
672 		 * the vnode is already write locked, then check to
673 		 * see whether it is the current thread which locked
674 		 * the vnode.  If it is not, then read lock the vnode
675 		 * by waiting to acquire the lock.
676 		 *
677 		 * The code path in domount() is an example of support
678 		 * which needs to look up two pathnames and locks one
679 		 * of them in between the two lookups.
680 		 */
681 		error = vn_vfsrlock(cvp);
682 		if (error) {
683 			if (!vn_vfswlock_held(cvp))
684 				error = vn_vfsrlock_wait(cvp);
685 			if (error != 0) {
686 				/*
687 				 * lookuppn() expects a held vnode to be
688 				 * returned because it promptly calls
689 				 * VN_RELE after the error return
690 				 */
691 				*cvpp = cvp;
692 				return (error);
693 			}
694 		}
695 
696 		/*
697 		 * Reached the end of the mount chain?
698 		 */
699 		vfsp = vn_mountedvfs(cvp);
700 		if (vfsp == NULL) {
701 			vn_vfsunlock(cvp);
702 			break;
703 		}
704 
705 		/*
706 		 * The read lock must be held across the call to VFS_ROOT() to
707 		 * prevent a concurrent unmount from destroying the vfs.
708 		 */
709 		error = VFS_ROOT(vfsp, &tvp);
710 		vn_vfsunlock(cvp);
711 
712 		if (error)
713 			break;
714 
715 		VN_RELE(cvp);
716 
717 		cvp = tvp;
718 	}
719 
720 	*cvpp = cvp;
721 	return (error);
722 }
723 
724 /*
725  * Return the lowermost vnode if this is a mountpoint.
726  */
727 static vnode_t *
728 vn_under(vnode_t *vp)
729 {
730 	vnode_t *uvp;
731 	vfs_t *vfsp;
732 
733 	while (vp->v_flag & VROOT) {
734 
735 		vfsp = vp->v_vfsp;
736 		vfs_rlock_wait(vfsp);
737 		if ((uvp = vfsp->vfs_vnodecovered) == NULL ||
738 		    (vfsp->vfs_flag & VFS_UNMOUNTED)) {
739 			vfs_unlock(vfsp);
740 			break;
741 		}
742 		VN_HOLD(uvp);
743 		vfs_unlock(vfsp);
744 		VN_RELE(vp);
745 		vp = uvp;
746 	}
747 
748 	return (vp);
749 }
750 
751 static int
752 vnode_match(vnode_t *v1, vnode_t *v2, cred_t *cr)
753 {
754 	vattr_t	v1attr, v2attr;
755 
756 	/*
757 	 * If we have a device file, check to see if is a cloned open of the
758 	 * same device.  For self-cloning devices, the major numbers will match.
759 	 * For devices cloned through the 'clone' driver, the minor number of
760 	 * the source device will be the same as the major number of the cloned
761 	 * device.
762 	 */
763 	if ((v1->v_type == VCHR || v1->v_type == VBLK) &&
764 	    v1->v_type == v2->v_type) {
765 		if ((spec_is_selfclone(v1) || spec_is_selfclone(v2)) &&
766 		    getmajor(v1->v_rdev) == getmajor(v2->v_rdev))
767 			return (1);
768 
769 		if (spec_is_clone(v1) &&
770 		    getmajor(v1->v_rdev) == getminor(v2->v_rdev))
771 			return (1);
772 
773 		if (spec_is_clone(v2) &&
774 		    getmajor(v2->v_rdev) == getminor(v1->v_rdev))
775 			return (1);
776 	}
777 
778 	v1attr.va_mask = v2attr.va_mask = AT_TYPE;
779 
780 	/*
781 	 * This check for symbolic links handles the pseudo-symlinks in procfs.
782 	 * These particular links have v_type of VDIR, but the attributes have a
783 	 * type of VLNK.  We need to avoid these links because otherwise if we
784 	 * are currently in '/proc/self/fd', then '/proc/self/cwd' will compare
785 	 * as the same vnode.
786 	 */
787 	if (VOP_GETATTR(v1, &v1attr, 0, cr) != 0 ||
788 	    VOP_GETATTR(v2, &v2attr, 0, cr) != 0 ||
789 	    v1attr.va_type == VLNK || v2attr.va_type == VLNK)
790 		return (0);
791 
792 	v1attr.va_mask = v2attr.va_mask = AT_TYPE | AT_FSID | AT_NODEID;
793 
794 	if (VOP_GETATTR(v1, &v1attr, ATTR_REAL, cr) != 0 ||
795 	    VOP_GETATTR(v2, &v2attr, ATTR_REAL, cr) != 0)
796 		return (0);
797 
798 	return (v1attr.va_fsid == v2attr.va_fsid &&
799 	    v1attr.va_nodeid == v2attr.va_nodeid);
800 }
801 
802 
803 /*
804  * Find the entry in the directory corresponding to the target vnode.
805  */
806 int
807 dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf,
808     size_t dlen, dirent64_t **rdp)
809 {
810 	size_t dbuflen;
811 	struct iovec iov;
812 	struct uio uio;
813 	int err;
814 	int eof;
815 	vnode_t *cmpvp;
816 	struct dirent64 *dp;
817 	pathname_t pnp;
818 
819 	ASSERT(dvp->v_type == VDIR);
820 
821 	/*
822 	 * This is necessary because of the strange semantics of VOP_LOOKUP().
823 	 */
824 	bzero(&pnp, sizeof (pnp));
825 
826 	eof = 0;
827 
828 	uio.uio_iov = &iov;
829 	uio.uio_iovcnt = 1;
830 	uio.uio_segflg = UIO_SYSSPACE;
831 	uio.uio_fmode = 0;
832 	uio.uio_extflg = UIO_COPY_CACHED;
833 	uio.uio_loffset = 0;
834 
835 	if ((err = VOP_ACCESS(dvp, VREAD, 0, cr)) != 0)
836 		return (err);
837 
838 	while (!eof) {
839 		uio.uio_resid = dlen;
840 		iov.iov_base = dbuf;
841 		iov.iov_len = dlen;
842 
843 		(void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
844 		err = VOP_READDIR(dvp, &uio, cr, &eof);
845 		VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
846 
847 		dbuflen = dlen - uio.uio_resid;
848 
849 		if (err || dbuflen == 0)
850 			break;
851 
852 		dp = (dirent64_t *)dbuf;
853 		while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
854 			/*
855 			 * Ignore '.' and '..' entries
856 			 */
857 			if (strcmp(dp->d_name, ".") == 0 ||
858 			    strcmp(dp->d_name, "..") == 0) {
859 				dp = (dirent64_t *)((intptr_t)dp +
860 				    dp->d_reclen);
861 				continue;
862 			}
863 
864 			err = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0,
865 			    vrootp, cr);
866 
867 			/*
868 			 * We only want to bail out if there was an error other
869 			 * than ENOENT.  Otherwise, it could be that someone
870 			 * just removed an entry since the readdir() call, and
871 			 * the entry we want is further on in the directory.
872 			 */
873 			if (err == 0) {
874 				if (vnode_match(tvp, cmpvp, cr)) {
875 					VN_RELE(cmpvp);
876 					*rdp = dp;
877 					return (0);
878 				}
879 
880 				VN_RELE(cmpvp);
881 			} else if (err != ENOENT) {
882 				return (err);
883 			}
884 
885 			dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
886 		}
887 	}
888 
889 	/*
890 	 * Something strange has happened, this directory does not contain the
891 	 * specified vnode.  This should never happen in the normal case, since
892 	 * we ensured that dvp is the parent of vp.  This may be possible in
893 	 * some race conditions, so fail gracefully.
894 	 */
895 	if (err == 0)
896 		err = ENOENT;
897 
898 	return (err);
899 }
900 
901 /*
902  * Given a global path (from rootdir), and a vnode that is the current root,
903  * return the portion of the path that is beneath the current root or NULL on
904  * failure.  The path MUST be a resolved path (no '..' entries or symlinks),
905  * otherwise this function will fail.
906  */
907 static char *
908 localpath(char *path, struct vnode *vrootp, cred_t *cr)
909 {
910 	vnode_t *vp;
911 	vnode_t *cvp;
912 	char component[MAXNAMELEN];
913 	char *ret = NULL;
914 	pathname_t pn;
915 
916 	/*
917 	 * We use vn_compare() instead of VN_CMP() in order to detect lofs
918 	 * mounts and stacked vnodes.
919 	 */
920 	if (vn_compare(vrootp, rootdir))
921 		return (path);
922 
923 	if (pn_get(path, UIO_SYSSPACE, &pn) != 0)
924 		return (NULL);
925 
926 	vp = rootdir;
927 	VN_HOLD(vp);
928 
929 	while (pn_pathleft(&pn)) {
930 		pn_skipslash(&pn);
931 
932 		if (pn_getcomponent(&pn, component) != 0)
933 			break;
934 
935 		if (vn_ismntpt(vp) && traverse(&vp) != 0)
936 			break;
937 
938 		if (VOP_LOOKUP(vp, component, &cvp, &pn, 0, rootdir, cr) != 0)
939 			break;
940 
941 		VN_RELE(vp);
942 		vp = cvp;
943 
944 		if (vn_compare(vp, vrootp)) {
945 			ret = path + (pn.pn_path - pn.pn_buf);
946 			break;
947 		}
948 	}
949 
950 	VN_RELE(vp);
951 	pn_free(&pn);
952 
953 	return (ret);
954 }
955 
956 /*
957  * Given a directory, return the full, resolved path.  This looks up "..",
958  * searches for the given vnode in the parent, appends the component, etc.  It
959  * is used to implement vnodetopath() and getcwd() when the cached path fails
960  * (or vfs_vnode_path is not set).
961  */
962 static int
963 dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
964 {
965 	pathname_t pn, rpn, emptypn;
966 	vnode_t *cmpvp, *pvp = NULL;
967 	vnode_t *startvp = vp;
968 	int err = 0;
969 	size_t complen;
970 	char *dbuf;
971 	dirent64_t *dp;
972 	char		*bufloc;
973 	size_t		dlen = DIRENT64_RECLEN(MAXPATHLEN);
974 	refstr_t	*mntpt;
975 
976 	/* Operation only allowed on directories */
977 	ASSERT(vp->v_type == VDIR);
978 
979 	/* We must have at least enough space for "/" */
980 	if (buflen < 2)
981 		return (ENAMETOOLONG);
982 
983 	/* Start at end of string with terminating null */
984 	bufloc = &buf[buflen - 1];
985 	*bufloc = '\0';
986 
987 	pn_alloc(&pn);
988 	pn_alloc(&rpn);
989 	dbuf = kmem_alloc(dlen, KM_SLEEP);
990 	bzero(&emptypn, sizeof (emptypn));
991 
992 	/*
993 	 * Begin with an additional reference on vp.  This will be decremented
994 	 * during the loop.
995 	 */
996 	VN_HOLD(vp);
997 
998 	for (;;) {
999 		/*
1000 		 * Return if we've reached the root.  If the buffer is empty,
1001 		 * return '/'.  We explicitly don't use vn_compare(), since it
1002 		 * compares the real vnodes.  A lofs mount of '/' would produce
1003 		 * incorrect results otherwise.
1004 		 */
1005 		if (VN_CMP(vrootp, vp)) {
1006 			if (*bufloc == '\0')
1007 				*--bufloc = '/';
1008 			break;
1009 		}
1010 
1011 		/*
1012 		 * If we've reached the VFS root, something has gone wrong.  We
1013 		 * should have reached the root in the above check.  The only
1014 		 * explantation is that 'vp' is not contained withing the given
1015 		 * root, in which case we return EPERM.
1016 		 */
1017 		if (VN_CMP(rootdir, vp)) {
1018 			err = EPERM;
1019 			goto out;
1020 		}
1021 
1022 		/*
1023 		 * Shortcut: see if this vnode is a mountpoint.  If so,
1024 		 * grab the path information from the vfs_t.
1025 		 */
1026 		if (vp->v_flag & VROOT) {
1027 
1028 			mntpt = vfs_getmntpoint(vp->v_vfsp);
1029 			if ((err = pn_set(&pn, (char *)refstr_value(mntpt)))
1030 			    == 0) {
1031 				refstr_rele(mntpt);
1032 				rpn.pn_path = rpn.pn_buf;
1033 
1034 				/*
1035 				 * Ensure the mointpoint still exists.
1036 				 */
1037 				VN_HOLD(vrootp);
1038 				if (vrootp != rootdir)
1039 					VN_HOLD(vrootp);
1040 				if (lookuppnvp(&pn, &rpn, 0, NULL,
1041 				    &cmpvp, vrootp, vrootp, cr) == 0) {
1042 
1043 					if (VN_CMP(vp, cmpvp)) {
1044 						VN_RELE(cmpvp);
1045 
1046 						complen = strlen(rpn.pn_path);
1047 						bufloc -= complen;
1048 						if (bufloc < buf) {
1049 							err = ERANGE;
1050 							goto out;
1051 						}
1052 						bcopy(rpn.pn_path, bufloc,
1053 						    complen);
1054 						break;
1055 					} else {
1056 						VN_RELE(cmpvp);
1057 					}
1058 				}
1059 			} else {
1060 				refstr_rele(mntpt);
1061 			}
1062 		}
1063 
1064 		/*
1065 		 * Shortcuts failed, search for this vnode in its parent.  If
1066 		 * this is a mountpoint, then get the vnode underneath.
1067 		 */
1068 		if (vp->v_flag & VROOT)
1069 			vp = vn_under(vp);
1070 		if ((err = VOP_LOOKUP(vp, "..", &pvp, &emptypn, 0, vrootp, cr))
1071 		    != 0)
1072 			goto out;
1073 
1074 		/*
1075 		 * With extended attributes, it's possible for a directory to
1076 		 * have a parent that is a regular file.  Check for that here.
1077 		 */
1078 		if (pvp->v_type != VDIR) {
1079 			err = ENOTDIR;
1080 			goto out;
1081 		}
1082 
1083 		/*
1084 		 * If this is true, something strange has happened.  This is
1085 		 * only true if we are the root of a filesystem, which should
1086 		 * have been caught by the check above.
1087 		 */
1088 		if (VN_CMP(pvp, vp)) {
1089 			err = ENOENT;
1090 			goto out;
1091 		}
1092 
1093 		/*
1094 		 * Search the parent directory for the entry corresponding to
1095 		 * this vnode.
1096 		 */
1097 		if ((err = dirfindvp(vrootp, pvp, vp, cr, dbuf, dlen, &dp))
1098 		    != 0)
1099 			goto out;
1100 		complen = strlen(dp->d_name);
1101 		bufloc -= complen;
1102 		if (bufloc <= buf) {
1103 			err = ENAMETOOLONG;
1104 			goto out;
1105 		}
1106 		bcopy(dp->d_name, bufloc, complen);
1107 
1108 		/* Prepend a slash to the current path.  */
1109 		*--bufloc = '/';
1110 
1111 		/* And continue with the next component */
1112 		VN_RELE(vp);
1113 		vp = pvp;
1114 		pvp = NULL;
1115 	}
1116 
1117 	/*
1118 	 * Place the path at the beginning of the buffer.
1119 	 */
1120 	if (bufloc != buf)
1121 		ovbcopy(bufloc, buf, buflen - (bufloc - buf));
1122 
1123 out:
1124 	/*
1125 	 * If the error was ESTALE and the current directory to look in
1126 	 * was the root for this lookup, the root for a mounted file
1127 	 * system, or the starting directory for lookups, then
1128 	 * return ENOENT instead of ESTALE.  In this case, no recovery
1129 	 * is possible by the higher level.  If ESTALE was returned for
1130 	 * some intermediate directory along the path, then recovery
1131 	 * is potentially possible and retrying from the higher level
1132 	 * will either correct the situation by purging stale cache
1133 	 * entries or eventually get back to the point where no recovery
1134 	 * is possible.
1135 	 */
1136 	if (err == ESTALE &&
1137 	    (VN_CMP(vp, vrootp) || (vp->v_flag & VROOT) || vp == startvp))
1138 		err = ENOENT;
1139 
1140 	kmem_free(dbuf, dlen);
1141 	VN_RELE(vp);
1142 	if (pvp)
1143 		VN_RELE(pvp);
1144 	pn_free(&pn);
1145 	pn_free(&rpn);
1146 
1147 	return (err);
1148 }
1149 
1150 /*
1151  * The additional flag, LOOKUP_CHECKREAD, is ued to enforce artificial
1152  * constraints in order to be standards compliant.  For example, if we have
1153  * the cached path of '/foo/bar', and '/foo' has permissions 100 (execute
1154  * only), then we can legitimately look up the path to the current working
1155  * directory without needing read permission.  Existing standards tests,
1156  * however, assume that we are determining the path by repeatedly looking up
1157  * "..".  We need to keep this behavior in order to maintain backwards
1158  * compatibility.
1159  */
1160 static int
1161 vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen,
1162     cred_t *cr, int flags)
1163 {
1164 	pathname_t pn, rpn;
1165 	int ret, len;
1166 	vnode_t *compvp, *pvp, *realvp;
1167 	proc_t *p = curproc;
1168 	char path[MAXNAMELEN];
1169 	int doclose = 0;
1170 
1171 	/*
1172 	 * If vrootp is NULL, get the root for curproc.  Callers with any other
1173 	 * requirements should pass in a different vrootp.
1174 	 */
1175 	if (vrootp == NULL) {
1176 		mutex_enter(&p->p_lock);
1177 		if ((vrootp = PTOU(p)->u_rdir) == NULL)
1178 			vrootp = rootdir;
1179 		VN_HOLD(vrootp);
1180 		mutex_exit(&p->p_lock);
1181 	} else {
1182 		VN_HOLD(vrootp);
1183 	}
1184 
1185 	/*
1186 	 * This is to get around an annoying artifact of the /proc filesystem,
1187 	 * which is the behavior of {cwd/root}.  Trying to resolve this path
1188 	 * will result in /proc/pid/cwd instead of whatever the real working
1189 	 * directory is.  We can't rely on VOP_REALVP(), since that will break
1190 	 * lofs.  The only difference between procfs and lofs is that opening
1191 	 * the file will return the underling vnode in the case of procfs.
1192 	 */
1193 	if (vp->v_type == VDIR && VOP_REALVP(vp, &realvp) == 0 &&
1194 	    realvp != vp) {
1195 		VN_HOLD(vp);
1196 		if (VOP_OPEN(&vp, FREAD, cr) == 0)
1197 			doclose = 1;
1198 		else
1199 			VN_RELE(vp);
1200 	}
1201 
1202 	pn_alloc(&pn);
1203 
1204 	/*
1205 	 * Check to see if we have a cached path in the vnode.
1206 	 */
1207 	mutex_enter(&vp->v_lock);
1208 	if (vn_path(vp) != NULL) {
1209 		(void) pn_set(&pn, vn_path(vp));
1210 		mutex_exit(&vp->v_lock);
1211 
1212 		pn_alloc(&rpn);
1213 
1214 		/* We should only cache absolute paths */
1215 		ASSERT(pn.pn_buf[0] == '/');
1216 
1217 		/*
1218 		 * If we are in a zone or a chroot environment, then we have to
1219 		 * take additional steps, since the path to the root might not
1220 		 * be readable with the current credentials, even though the
1221 		 * process can legitmately access the file.  In this case, we
1222 		 * do the following:
1223 		 *
1224 		 * lookuppnvp() with all privileges to get the resolved path.
1225 		 * call localpath() to get the local portion of the path, and
1226 		 * continue as normal.
1227 		 *
1228 		 * If the the conversion to a local path fails, then we continue
1229 		 * as normal.  This is a heuristic to make process object file
1230 		 * paths available from within a zone.  Because lofs doesn't
1231 		 * support page operations, the vnode stored in the seg_t is
1232 		 * actually the underlying real vnode, not the lofs node itself.
1233 		 * Most of the time, the lofs path is the same as the underlying
1234 		 * vnode (for example, /usr/lib/libc.so.1).
1235 		 */
1236 		if (vrootp != rootdir) {
1237 			char *local = NULL;
1238 			VN_HOLD(rootdir);
1239 			if (lookuppnvp(&pn, &rpn, FOLLOW,
1240 			    NULL, &compvp, rootdir, rootdir, kcred) == 0) {
1241 				local = localpath(rpn.pn_path, vrootp,
1242 				    kcred);
1243 				VN_RELE(compvp);
1244 			}
1245 
1246 			/*
1247 			 * The original pn was changed through lookuppnvp(), so
1248 			 * reset it.
1249 			 */
1250 			if (local) {
1251 				(void) pn_set(&pn, local);
1252 			} else {
1253 				mutex_enter(&vp->v_lock);
1254 				if (vn_path(vp) != NULL) {
1255 					(void) pn_set(&pn, vn_path(vp));
1256 					mutex_exit(&vp->v_lock);
1257 				} else {
1258 					mutex_exit(&vp->v_lock);
1259 					goto notcached;
1260 				}
1261 			}
1262 		}
1263 
1264 		/*
1265 		 * We should have a local path at this point, so start the
1266 		 * search from the root of the current process.
1267 		 */
1268 		VN_HOLD(vrootp);
1269 		if (vrootp != rootdir)
1270 			VN_HOLD(vrootp);
1271 		ret = lookuppnvp(&pn, &rpn, FOLLOW | flags, NULL,
1272 		    &compvp, vrootp, vrootp, cr);
1273 		if (ret == 0) {
1274 			/*
1275 			 * Check to see if the returned vnode is the same as
1276 			 * the one we expect.  If not, give up.
1277 			 */
1278 			if (!vn_compare(vp, compvp) &&
1279 			    !vnode_match(vp, compvp, cr)) {
1280 				VN_RELE(compvp);
1281 				goto notcached;
1282 			}
1283 
1284 			VN_RELE(compvp);
1285 
1286 			/*
1287 			 * Return the result.
1288 			 */
1289 			if (buflen <= rpn.pn_pathlen)
1290 				goto notcached;
1291 
1292 			bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1);
1293 			pn_free(&pn);
1294 			pn_free(&rpn);
1295 			VN_RELE(vrootp);
1296 			if (doclose) {
1297 				(void) VOP_CLOSE(vp, FREAD, 1, 0, cr);
1298 				VN_RELE(vp);
1299 			}
1300 			return (0);
1301 		}
1302 
1303 notcached:
1304 		pn_free(&rpn);
1305 	} else {
1306 		mutex_exit(&vp->v_lock);
1307 	}
1308 
1309 	pn_free(&pn);
1310 
1311 	if (vp->v_type != VDIR) {
1312 		/*
1313 		 * If we don't have a directory, try to find it in the dnlc via
1314 		 * reverse lookup.  Once this is found, we can use the regular
1315 		 * directory search to find the full path.
1316 		 */
1317 		if ((pvp = dnlc_reverse_lookup(vp, path, MAXNAMELEN)) != NULL) {
1318 			ret = dirtopath(vrootp, pvp, buf, buflen, cr);
1319 			if (ret == 0) {
1320 				len = strlen(buf);
1321 				if (len + strlen(path) + 1 >= buflen) {
1322 					ret = ENAMETOOLONG;
1323 				} else {
1324 					if (buf[len - 1] != '/')
1325 						buf[len++] = '/';
1326 					bcopy(path, buf + len,
1327 					    strlen(path) + 1);
1328 				}
1329 			}
1330 
1331 			VN_RELE(pvp);
1332 		} else
1333 			ret = ENOENT;
1334 	} else
1335 		ret = dirtopath(vrootp, vp, buf, buflen, cr);
1336 
1337 	VN_RELE(vrootp);
1338 	if (doclose) {
1339 		(void) VOP_CLOSE(vp, FREAD, 1, 0, cr);
1340 		VN_RELE(vp);
1341 	}
1342 
1343 	return (ret);
1344 }
1345 
1346 int
1347 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
1348 {
1349 	return (vnodetopath_common(vrootp, vp, buf, buflen, cr, 0));
1350 }
1351 
1352 int
1353 dogetcwd(char *buf, size_t buflen)
1354 {
1355 	int ret;
1356 	vnode_t *vp;
1357 	vnode_t *compvp;
1358 	refstr_t *cwd, *oldcwd;
1359 	const char *value;
1360 	pathname_t rpnp, pnp;
1361 	proc_t *p = curproc;
1362 
1363 	/*
1364 	 * Check to see if there is a cached version of the cwd.  If so, lookup
1365 	 * the cached value and make sure it is the same vnode.
1366 	 */
1367 	mutex_enter(&p->p_lock);
1368 	if ((cwd = PTOU(p)->u_cwd) != NULL)
1369 		refstr_hold(cwd);
1370 	vp = PTOU(p)->u_cdir;
1371 	VN_HOLD(vp);
1372 	mutex_exit(&p->p_lock);
1373 
1374 	/*
1375 	 * Make sure we have permission to access the current directory.
1376 	 */
1377 	if ((ret = VOP_ACCESS(vp, VEXEC, 0, CRED())) != 0) {
1378 		if (cwd != NULL)
1379 			refstr_rele(cwd);
1380 		VN_RELE(vp);
1381 		return (ret);
1382 	}
1383 
1384 	if (cwd) {
1385 		value = refstr_value(cwd);
1386 		if ((ret = pn_get((char *)value, UIO_SYSSPACE, &pnp)) != 0) {
1387 			refstr_rele(cwd);
1388 			VN_RELE(vp);
1389 			return (ret);
1390 		}
1391 
1392 		pn_alloc(&rpnp);
1393 
1394 		if (lookuppn(&pnp, &rpnp, NO_FOLLOW, NULL, &compvp) == 0) {
1395 
1396 			if (VN_CMP(vp, compvp) &&
1397 			    strcmp(value, rpnp.pn_path) == 0) {
1398 				VN_RELE(compvp);
1399 				VN_RELE(vp);
1400 				pn_free(&pnp);
1401 				pn_free(&rpnp);
1402 				if (strlen(value) + 1 > buflen) {
1403 					refstr_rele(cwd);
1404 					return (ENAMETOOLONG);
1405 				}
1406 				bcopy(value, buf, strlen(value) + 1);
1407 				refstr_rele(cwd);
1408 				return (0);
1409 			}
1410 
1411 			VN_RELE(compvp);
1412 		}
1413 
1414 		pn_free(&rpnp);
1415 		pn_free(&pnp);
1416 
1417 		refstr_rele(cwd);
1418 	}
1419 
1420 	ret = vnodetopath_common(NULL, vp, buf, buflen, CRED(),
1421 	    LOOKUP_CHECKREAD);
1422 
1423 	VN_RELE(vp);
1424 
1425 	/*
1426 	 * Store the new cwd and replace the existing cached copy.
1427 	 */
1428 	if (ret == 0)
1429 		cwd = refstr_alloc(buf);
1430 	else
1431 		cwd = NULL;
1432 
1433 	mutex_enter(&p->p_lock);
1434 	oldcwd = PTOU(p)->u_cwd;
1435 	PTOU(p)->u_cwd = cwd;
1436 	mutex_exit(&p->p_lock);
1437 
1438 	if (oldcwd)
1439 		refstr_rele(oldcwd);
1440 
1441 	return (ret);
1442 }
1443