1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
24 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright 2016 Joyent, Inc.
26 * Copyright 2023 RackTop Systems, Inc.
27 */
28
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 /*
33 * University Copyright- Copyright (c) 1982, 1986, 1988
34 * The Regents of the University of California
35 * All Rights Reserved
36 *
37 * University Acknowledgment- Portions of this document are derived from
38 * software developed by the University of California, Berkeley, and its
39 * contributors.
40 */
41
42 #include <sys/types.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/cpuvar.h>
46 #include <sys/errno.h>
47 #include <sys/cred.h>
48 #include <sys/user.h>
49 #include <sys/uio.h>
50 #include <sys/vfs.h>
51 #include <sys/vnode.h>
52 #include <sys/pathname.h>
53 #include <sys/proc.h>
54 #include <sys/vtrace.h>
55 #include <sys/sysmacros.h>
56 #include <sys/debug.h>
57 #include <sys/dirent.h>
58 #include <c2/audit.h>
59 #include <sys/zone.h>
60 #include <sys/dnlc.h>
61 #include <sys/fs/snode.h>
62
63 /* Controls whether paths are stored with vnodes. */
64 int vfs_vnode_path = 1;
65
66 int
lookupname(const char * fnamep,enum uio_seg seg,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp)67 lookupname(
68 const char *fnamep,
69 enum uio_seg seg,
70 int followlink,
71 vnode_t **dirvpp,
72 vnode_t **compvpp)
73 {
74 return (lookupnameatcred(fnamep, seg, followlink, dirvpp, compvpp, NULL,
75 CRED()));
76 }
77
78 /*
79 * Lookup the user file name,
80 * Handle allocation and freeing of pathname buffer, return error.
81 */
82 int
lookupnameatcred(const char * fnamep,enum uio_seg seg,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * startvp,cred_t * cr)83 lookupnameatcred(
84 const char *fnamep, /* user pathname */
85 enum uio_seg seg, /* addr space that name is in */
86 int followlink, /* follow sym links */
87 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */
88 vnode_t **compvpp, /* ret for ptr to component vnode */
89 vnode_t *startvp, /* start path search from vp */
90 cred_t *cr) /* credential */
91 {
92 char namebuf[TYPICALMAXPATHLEN];
93 struct pathname lookpn;
94 int error;
95
96 error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf));
97 if (error == 0) {
98 error = lookuppnatcred(&lookpn, NULL, followlink,
99 dirvpp, compvpp, startvp, cr);
100 }
101 if (error == ENAMETOOLONG) {
102 /*
103 * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
104 */
105 if (error = pn_get(fnamep, seg, &lookpn))
106 return (error);
107 error = lookuppnatcred(&lookpn, NULL, followlink,
108 dirvpp, compvpp, startvp, cr);
109 pn_free(&lookpn);
110 }
111
112 return (error);
113 }
114
115 int
lookupnameat(const char * fnamep,enum uio_seg seg,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * startvp)116 lookupnameat(const char *fnamep, enum uio_seg seg, int followlink,
117 vnode_t **dirvpp, vnode_t **compvpp, vnode_t *startvp)
118 {
119 return (lookupnameatcred(fnamep, seg, followlink, dirvpp, compvpp,
120 startvp, CRED()));
121 }
122
123 int
lookuppn(struct pathname * pnp,struct pathname * rpnp,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp)124 lookuppn(
125 struct pathname *pnp,
126 struct pathname *rpnp,
127 int followlink,
128 vnode_t **dirvpp,
129 vnode_t **compvpp)
130 {
131 return (lookuppnatcred(pnp, rpnp, followlink, dirvpp, compvpp, NULL,
132 CRED()));
133 }
134
135 /*
136 * Lookup the user file name from a given vp, using a specific credential.
137 */
138 int
lookuppnatcred(struct pathname * pnp,struct pathname * rpnp,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * startvp,cred_t * cr)139 lookuppnatcred(
140 struct pathname *pnp, /* pathname to lookup */
141 struct pathname *rpnp, /* if non-NULL, return resolved path */
142 int followlink, /* (don't) follow sym links */
143 vnode_t **dirvpp, /* ptr for parent vnode */
144 vnode_t **compvpp, /* ptr for entry vnode */
145 vnode_t *startvp, /* start search from this vp */
146 cred_t *cr) /* user credential */
147 {
148 vnode_t *vp; /* current directory vp */
149 vnode_t *rootvp;
150 proc_t *p = curproc;
151
152 if (pnp->pn_pathlen == 0)
153 return (ENOENT);
154
155 mutex_enter(&p->p_lock); /* for u_rdir and u_cdir */
156 if ((rootvp = PTOU(p)->u_rdir) == NULL)
157 rootvp = rootdir;
158 else if (rootvp != rootdir) /* no need to VN_HOLD rootdir */
159 VN_HOLD(rootvp);
160
161 if (pnp->pn_path[0] == '/') {
162 vp = rootvp;
163 } else {
164 vp = (startvp == NULL) ? PTOU(p)->u_cdir : startvp;
165 }
166 VN_HOLD(vp);
167 mutex_exit(&p->p_lock);
168
169 /*
170 * Skip over leading slashes
171 */
172 if (pnp->pn_path[0] == '/') {
173 do {
174 pnp->pn_path++;
175 pnp->pn_pathlen--;
176 } while (pnp->pn_path[0] == '/');
177 }
178
179 return (lookuppnvp(pnp, rpnp, followlink, dirvpp,
180 compvpp, rootvp, vp, cr));
181 }
182
183 int
lookuppnat(struct pathname * pnp,struct pathname * rpnp,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * startvp)184 lookuppnat(struct pathname *pnp, struct pathname *rpnp,
185 int followlink, vnode_t **dirvpp, vnode_t **compvpp,
186 vnode_t *startvp)
187 {
188 return (lookuppnatcred(pnp, rpnp, followlink, dirvpp, compvpp, startvp,
189 CRED()));
190 }
191
192 /* Private flag to do our getcwd() dirty work */
193 #define LOOKUP_CHECKREAD 0x10
194 #define LOOKUP_MASK (~LOOKUP_CHECKREAD)
195
196 /*
197 * Starting at current directory, translate pathname pnp to end.
198 * Leave pathname of final component in pnp, return the vnode
199 * for the final component in *compvpp, and return the vnode
200 * for the parent of the final component in dirvpp.
201 *
202 * This is the central routine in pathname translation and handles
203 * multiple components in pathnames, separating them at /'s. It also
204 * implements mounted file systems and processes symbolic links.
205 *
206 * vp is the vnode where the directory search should start.
207 *
208 * Reference counts: vp must be held prior to calling this function. rootvp
209 * should only be held if rootvp != rootdir.
210 */
211 int
lookuppnvp(struct pathname * pnp,struct pathname * rpnp,int flags,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * rootvp,vnode_t * vp,cred_t * cr)212 lookuppnvp(
213 struct pathname *pnp, /* pathname to lookup */
214 struct pathname *rpnp, /* if non-NULL, return resolved path */
215 int flags, /* follow symlinks */
216 vnode_t **dirvpp, /* ptr for parent vnode */
217 vnode_t **compvpp, /* ptr for entry vnode */
218 vnode_t *rootvp, /* rootvp */
219 vnode_t *vp, /* directory to start search at */
220 cred_t *cr) /* user's credential */
221 {
222 vnode_t *cvp; /* current component vp */
223 char component[MAXNAMELEN]; /* buffer for component (incl null) */
224 int error;
225 int nlink;
226 int lookup_flags;
227 struct pathname presrvd; /* case preserved name */
228 struct pathname *pp = NULL;
229 vnode_t *startvp;
230 vnode_t *zonevp = curproc->p_zone->zone_rootvp; /* zone root */
231 int must_be_directory = 0;
232 boolean_t retry_with_kcred;
233 uint32_t auditing = AU_AUDITING();
234
235 CPU_STATS_ADDQ(CPU, sys, namei, 1);
236 nlink = 0;
237 cvp = NULL;
238 if (rpnp)
239 rpnp->pn_pathlen = 0;
240
241 lookup_flags = dirvpp ? LOOKUP_DIR : 0;
242 if (flags & FIGNORECASE) {
243 lookup_flags |= FIGNORECASE;
244 pn_alloc(&presrvd);
245 pp = &presrvd;
246 }
247 if ((flags & LOOKUP_NOACLCHECK) != 0) {
248 lookup_flags |= LOOKUP_NOACLCHECK;
249 flags &= ~LOOKUP_NOACLCHECK;
250 }
251
252 if (auditing)
253 audit_anchorpath(pnp, vp == rootvp);
254
255 /*
256 * Eliminate any trailing slashes in the pathname.
257 * If there are any, we must follow all symlinks.
258 * Also, we must guarantee that the last component is a directory.
259 */
260 if (pn_fixslash(pnp)) {
261 flags |= FOLLOW;
262 must_be_directory = 1;
263 }
264
265 startvp = vp;
266 next:
267 retry_with_kcred = B_FALSE;
268
269 /*
270 * Make sure we have a directory.
271 */
272 if (vp->v_type != VDIR) {
273 error = ENOTDIR;
274 goto bad;
275 }
276
277 if (rpnp && VN_CMP(vp, rootvp))
278 (void) pn_set(rpnp, "/");
279
280 /*
281 * Process the next component of the pathname.
282 */
283 if (error = pn_getcomponent(pnp, component)) {
284 goto bad;
285 }
286
287 /*
288 * Handle "..": two special cases.
289 * 1. If we're at the root directory (e.g. after chroot or
290 * zone_enter) then change ".." to "." so we can't get
291 * out of this subtree.
292 * 2. If this vnode is the root of a mounted file system,
293 * then replace it with the vnode that was mounted on
294 * so that we take the ".." in the other file system.
295 */
296 if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
297 checkforroot:
298 if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) {
299 component[1] = '\0';
300 } else if (vp->v_flag & VROOT) {
301 vfs_t *vfsp;
302 cvp = vp;
303
304 /*
305 * While we deal with the vfs pointer from the vnode
306 * the filesystem could have been forcefully unmounted
307 * and the vnode's v_vfsp could have been invalidated
308 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
309 * with vfs_rlock_wait/vfs_unlock.
310 * It is safe to use the v_vfsp even it is freed by
311 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
312 * do not dereference v_vfsp. It is just used as a
313 * magic cookie.
314 * One more corner case here is the memory getting
315 * reused for another vfs structure. In this case
316 * lookuppnvp's vfs_rlock_wait will succeed, domount's
317 * vfs_lock will fail and domount will bail out with an
318 * error (EBUSY).
319 */
320 vfsp = cvp->v_vfsp;
321
322 /*
323 * This lock is used to synchronize
324 * mounts/unmounts and lookups.
325 * Threads doing mounts/unmounts hold the
326 * writers version vfs_lock_wait().
327 */
328
329 vfs_rlock_wait(vfsp);
330
331 /*
332 * If this vnode is on a file system that
333 * has been forcibly unmounted,
334 * we can't proceed. Cancel this operation
335 * and return EIO.
336 *
337 * vfs_vnodecovered is NULL if unmounted.
338 * Currently, nfs uses VFS_UNMOUNTED to
339 * check if it's a forced-umount. Keep the
340 * same checking here as well even though it
341 * may not be needed.
342 */
343 if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
344 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
345 vfs_unlock(vfsp);
346 VN_RELE(cvp);
347 if (pp)
348 pn_free(pp);
349 return (EIO);
350 }
351 VN_HOLD(vp);
352 vfs_unlock(vfsp);
353 VN_RELE(cvp);
354 cvp = NULL;
355 /*
356 * Crossing mount points. For eg: We are doing
357 * a lookup of ".." for file systems root vnode
358 * mounted here, and VOP_LOOKUP() (with covered vnode)
359 * will be on underlying file systems mount point
360 * vnode. Set retry_with_kcred flag as we might end
361 * up doing VOP_LOOKUP() with kcred if required.
362 */
363 retry_with_kcred = B_TRUE;
364 goto checkforroot;
365 }
366 }
367
368 /*
369 * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate
370 * that we need to have read permission on every directory in the entire
371 * path. This is used to ensure that a forward-lookup of a cached value
372 * has the same effect as a reverse-lookup when the cached value cannot
373 * be found.
374 */
375 if ((flags & LOOKUP_CHECKREAD) &&
376 (error = VOP_ACCESS(vp, VREAD, 0, cr, NULL)) != 0)
377 goto bad;
378
379 /*
380 * Perform a lookup in the current directory.
381 */
382 error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
383 rootvp, cr, NULL, NULL, pp);
384
385 /*
386 * Retry with kcred - If crossing mount points & error is EACCES.
387 *
388 * If we are crossing mount points here and doing ".." lookup,
389 * VOP_LOOKUP() might fail if the underlying file systems
390 * mount point has no execute permission. In cases like these,
391 * we retry VOP_LOOKUP() by giving as much privilage as possible
392 * by passing kcred credentials.
393 *
394 * In case of hierarchical file systems, passing kcred still may
395 * or may not work.
396 * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some
397 * directory inside NFS FS.
398 */
399 if ((error == EACCES) && retry_with_kcred)
400 error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
401 rootvp, zone_kcred(), NULL, NULL, pp);
402
403 if (error) {
404 cvp = NULL;
405 /*
406 * On error, return hard error if
407 * (a) we're not at the end of the pathname yet, or
408 * (b) the caller didn't want the parent directory, or
409 * (c) we failed for some reason other than a missing entry.
410 */
411 if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
412 goto bad;
413 if (auditing) { /* directory access */
414 if (error = audit_savepath(pnp, vp, vp, error, cr))
415 goto bad_noaudit;
416 }
417
418 pn_setlast(pnp);
419 /*
420 * We inform the caller that the desired entry must be
421 * a directory by adding a '/' to the component name.
422 */
423 if (must_be_directory && (error = pn_addslash(pnp)) != 0)
424 goto bad;
425 *dirvpp = vp;
426 if (compvpp != NULL)
427 *compvpp = NULL;
428 if (rootvp != rootdir)
429 VN_RELE(rootvp);
430 if (pp)
431 pn_free(pp);
432 return (0);
433 }
434
435 /*
436 * Traverse mount points.
437 * XXX why don't we need to hold a read lock here (call vn_vfsrlock)?
438 * What prevents a concurrent update to v_vfsmountedhere?
439 * Possible answer: if mounting, we might not see the mount
440 * if it is concurrently coming into existence, but that's
441 * really not much different from the thread running a bit slower.
442 * If unmounting, we may get into traverse() when we shouldn't,
443 * but traverse() will catch this case for us.
444 * (For this to work, fetching v_vfsmountedhere had better
445 * be atomic!)
446 */
447 if (vn_mountedvfs(cvp) != NULL) {
448 if ((error = traverse(&cvp)) != 0)
449 goto bad;
450 }
451
452 /*
453 * If we hit a symbolic link and there is more path to be
454 * translated or this operation does not wish to apply
455 * to a link, then place the contents of the link at the
456 * front of the remaining pathname.
457 */
458 if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
459 struct pathname linkpath;
460
461 if (++nlink > MAXSYMLINKS) {
462 error = ELOOP;
463 goto bad;
464 }
465 pn_alloc(&linkpath);
466 if (error = pn_getsymlink(cvp, &linkpath, cr)) {
467 pn_free(&linkpath);
468 goto bad;
469 }
470
471 if (auditing)
472 audit_symlink(pnp, &linkpath);
473
474 if (pn_pathleft(&linkpath) == 0)
475 (void) pn_set(&linkpath, ".");
476 error = pn_insert(pnp, &linkpath, strlen(component));
477 pn_free(&linkpath);
478 if (error)
479 goto bad;
480 VN_RELE(cvp);
481 cvp = NULL;
482 if (pnp->pn_pathlen == 0) {
483 error = ENOENT;
484 goto bad;
485 }
486 if (pnp->pn_path[0] == '/') {
487 do {
488 pnp->pn_path++;
489 pnp->pn_pathlen--;
490 } while (pnp->pn_path[0] == '/');
491 VN_RELE(vp);
492 vp = rootvp;
493 VN_HOLD(vp);
494 }
495 if (auditing)
496 audit_anchorpath(pnp, vp == rootvp);
497 if (pn_fixslash(pnp)) {
498 flags |= FOLLOW;
499 must_be_directory = 1;
500 }
501 goto next;
502 }
503
504 /*
505 * If rpnp is non-NULL, remember the resolved path name therein.
506 * Do not include "." components. Collapse occurrences of
507 * "previous/..", so long as "previous" is not itself "..".
508 * Exhausting rpnp results in error ENAMETOOLONG.
509 */
510 if (rpnp && strcmp(component, ".") != 0) {
511 size_t len;
512
513 if (strcmp(component, "..") == 0 &&
514 rpnp->pn_pathlen != 0 &&
515 !((rpnp->pn_pathlen > 2 &&
516 strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
517 (rpnp->pn_pathlen == 2 &&
518 strncmp(rpnp->pn_path, "..", 2) == 0))) {
519 while (rpnp->pn_pathlen &&
520 rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
521 rpnp->pn_pathlen--;
522 if (rpnp->pn_pathlen > 1)
523 rpnp->pn_pathlen--;
524 rpnp->pn_path[rpnp->pn_pathlen] = '\0';
525 } else {
526 if (rpnp->pn_pathlen != 0 &&
527 rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
528 rpnp->pn_path[rpnp->pn_pathlen++] = '/';
529 if (flags & FIGNORECASE) {
530 /*
531 * Return the case-preserved name
532 * within the resolved path.
533 */
534 error = copystr(pp->pn_buf,
535 rpnp->pn_path + rpnp->pn_pathlen,
536 rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
537 } else {
538 error = copystr(component,
539 rpnp->pn_path + rpnp->pn_pathlen,
540 rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
541 }
542 if (error) /* copystr() returns ENAMETOOLONG */
543 goto bad;
544 rpnp->pn_pathlen += (len - 1);
545 ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
546 }
547 }
548
549 /*
550 * If no more components, return last directory (if wanted) and
551 * last component (if wanted).
552 */
553 if (pn_pathleft(pnp) == 0) {
554 /*
555 * If there was a trailing slash in the pathname,
556 * make sure the last component is a directory.
557 */
558 if (must_be_directory && cvp->v_type != VDIR) {
559 error = ENOTDIR;
560 goto bad;
561 }
562 if (dirvpp != NULL) {
563 /*
564 * Check that we have the real parent and not
565 * an alias of the last component.
566 */
567 if (vn_compare(vp, cvp)) {
568 if (auditing)
569 (void) audit_savepath(pnp, cvp, vp,
570 EINVAL, cr);
571 pn_setlast(pnp);
572 VN_RELE(vp);
573 VN_RELE(cvp);
574 if (rootvp != rootdir)
575 VN_RELE(rootvp);
576 if (pp)
577 pn_free(pp);
578 return (EINVAL);
579 }
580 *dirvpp = vp;
581 } else
582 VN_RELE(vp);
583 if (auditing)
584 (void) audit_savepath(pnp, cvp, vp, 0, cr);
585 if (pnp->pn_path == pnp->pn_buf)
586 (void) pn_set(pnp, ".");
587 else
588 pn_setlast(pnp);
589 if (rpnp) {
590 if (VN_CMP(cvp, rootvp))
591 (void) pn_set(rpnp, "/");
592 else if (rpnp->pn_pathlen == 0)
593 (void) pn_set(rpnp, ".");
594 }
595
596 if (compvpp != NULL)
597 *compvpp = cvp;
598 else
599 VN_RELE(cvp);
600 if (rootvp != rootdir)
601 VN_RELE(rootvp);
602 if (pp)
603 pn_free(pp);
604 return (0);
605 }
606
607 /*
608 * Skip over slashes from end of last component.
609 */
610 while (pnp->pn_path[0] == '/') {
611 pnp->pn_path++;
612 pnp->pn_pathlen--;
613 }
614
615 /*
616 * Searched through another level of directory:
617 * release previous directory handle and save new (result
618 * of lookup) as current directory.
619 */
620 VN_RELE(vp);
621 vp = cvp;
622 cvp = NULL;
623 goto next;
624
625 bad:
626 if (auditing) /* reached end of path */
627 (void) audit_savepath(pnp, cvp, vp, error, cr);
628 bad_noaudit:
629 /*
630 * Error. Release vnodes and return.
631 */
632 if (cvp)
633 VN_RELE(cvp);
634 /*
635 * If the error was ESTALE and the current directory to look in
636 * was the root for this lookup, the root for a mounted file
637 * system, or the starting directory for lookups, then
638 * return ENOENT instead of ESTALE. In this case, no recovery
639 * is possible by the higher level. If ESTALE was returned for
640 * some intermediate directory along the path, then recovery
641 * is potentially possible and retrying from the higher level
642 * will either correct the situation by purging stale cache
643 * entries or eventually get back to the point where no recovery
644 * is possible.
645 */
646 if (error == ESTALE &&
647 (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
648 error = ENOENT;
649 VN_RELE(vp);
650 if (rootvp != rootdir)
651 VN_RELE(rootvp);
652 if (pp)
653 pn_free(pp);
654 return (error);
655 }
656
657 /*
658 * Traverse a mount point. Routine accepts a vnode pointer as a reference
659 * parameter and performs the indirection, releasing the original vnode.
660 */
661 int
traverse(vnode_t ** cvpp)662 traverse(vnode_t **cvpp)
663 {
664 int error = 0;
665 vnode_t *cvp;
666 vnode_t *tvp;
667 vfs_t *vfsp;
668
669 cvp = *cvpp;
670
671 /*
672 * If this vnode is mounted on, then we transparently indirect
673 * to the vnode which is the root of the mounted file system.
674 * Before we do this we must check that an unmount is not in
675 * progress on this vnode.
676 */
677
678 for (;;) {
679 /*
680 * Try to read lock the vnode. If this fails because
681 * the vnode is already write locked, then check to
682 * see whether it is the current thread which locked
683 * the vnode. If it is not, then read lock the vnode
684 * by waiting to acquire the lock.
685 *
686 * The code path in domount() is an example of support
687 * which needs to look up two pathnames and locks one
688 * of them in between the two lookups.
689 */
690 error = vn_vfsrlock(cvp);
691 if (error) {
692 if (!vn_vfswlock_held(cvp))
693 error = vn_vfsrlock_wait(cvp);
694 if (error != 0) {
695 /*
696 * lookuppn() expects a held vnode to be
697 * returned because it promptly calls
698 * VN_RELE after the error return
699 */
700 *cvpp = cvp;
701 return (error);
702 }
703 }
704
705 /*
706 * Reached the end of the mount chain?
707 */
708 vfsp = vn_mountedvfs(cvp);
709 if (vfsp == NULL) {
710 vn_vfsunlock(cvp);
711 break;
712 }
713
714 /*
715 * The read lock must be held across the call to VFS_ROOT() to
716 * prevent a concurrent unmount from destroying the vfs.
717 */
718 error = VFS_ROOT(vfsp, &tvp);
719 vn_vfsunlock(cvp);
720
721 if (error)
722 break;
723
724 VN_RELE(cvp);
725
726 cvp = tvp;
727 }
728
729 *cvpp = cvp;
730 return (error);
731 }
732
733 /*
734 * Return the lowermost vnode if this is a mountpoint.
735 */
736 static vnode_t *
vn_under(vnode_t * vp)737 vn_under(vnode_t *vp)
738 {
739 vnode_t *uvp;
740 vfs_t *vfsp;
741
742 while (vp->v_flag & VROOT) {
743
744 vfsp = vp->v_vfsp;
745 vfs_rlock_wait(vfsp);
746 if ((uvp = vfsp->vfs_vnodecovered) == NULL ||
747 (vfsp->vfs_flag & VFS_UNMOUNTED)) {
748 vfs_unlock(vfsp);
749 break;
750 }
751 VN_HOLD(uvp);
752 vfs_unlock(vfsp);
753 VN_RELE(vp);
754 vp = uvp;
755 }
756
757 return (vp);
758 }
759
760 static int
vnode_match(vnode_t * v1,vnode_t * v2,cred_t * cr)761 vnode_match(vnode_t *v1, vnode_t *v2, cred_t *cr)
762 {
763 vattr_t v1attr, v2attr;
764
765 /*
766 * If we have a device file, check to see if is a cloned open of the
767 * same device. For self-cloning devices, the major numbers will match.
768 * For devices cloned through the 'clone' driver, the minor number of
769 * the source device will be the same as the major number of the cloned
770 * device.
771 */
772 if ((v1->v_type == VCHR || v1->v_type == VBLK) &&
773 v1->v_type == v2->v_type) {
774 if ((spec_is_selfclone(v1) || spec_is_selfclone(v2)) &&
775 getmajor(v1->v_rdev) == getmajor(v2->v_rdev))
776 return (1);
777
778 if (spec_is_clone(v1) &&
779 getmajor(v1->v_rdev) == getminor(v2->v_rdev))
780 return (1);
781
782 if (spec_is_clone(v2) &&
783 getmajor(v2->v_rdev) == getminor(v1->v_rdev))
784 return (1);
785 }
786
787 v1attr.va_mask = v2attr.va_mask = AT_TYPE;
788
789 /*
790 * This check for symbolic links handles the pseudo-symlinks in procfs.
791 * These particular links have v_type of VDIR, but the attributes have a
792 * type of VLNK. We need to avoid these links because otherwise if we
793 * are currently in '/proc/self/fd', then '/proc/self/cwd' will compare
794 * as the same vnode.
795 */
796 if (VOP_GETATTR(v1, &v1attr, 0, cr, NULL) != 0 ||
797 VOP_GETATTR(v2, &v2attr, 0, cr, NULL) != 0 ||
798 v1attr.va_type == VLNK || v2attr.va_type == VLNK)
799 return (0);
800
801 v1attr.va_mask = v2attr.va_mask = AT_TYPE | AT_FSID | AT_NODEID;
802
803 if (VOP_GETATTR(v1, &v1attr, ATTR_REAL, cr, NULL) != 0 ||
804 VOP_GETATTR(v2, &v2attr, ATTR_REAL, cr, NULL) != 0)
805 return (0);
806
807 return (v1attr.va_fsid == v2attr.va_fsid &&
808 v1attr.va_nodeid == v2attr.va_nodeid);
809 }
810
811
812 /*
813 * Find the entry in the directory corresponding to the target vnode.
814 */
815 int
dirfindvp(vnode_t * vrootp,vnode_t * dvp,vnode_t * tvp,cred_t * cr,char * dbuf,size_t dlen,dirent64_t ** rdp)816 dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf,
817 size_t dlen, dirent64_t **rdp)
818 {
819 size_t dbuflen;
820 struct iovec iov;
821 struct uio uio;
822 int error;
823 int eof;
824 vnode_t *cmpvp;
825 struct dirent64 *dp;
826 pathname_t pnp;
827
828 ASSERT(dvp->v_type == VDIR);
829
830 /*
831 * This is necessary because of the strange semantics of VOP_LOOKUP().
832 */
833 bzero(&pnp, sizeof (pnp));
834
835 uio.uio_iov = &iov;
836 uio.uio_iovcnt = 1;
837 uio.uio_segflg = UIO_SYSSPACE;
838 uio.uio_fmode = 0;
839 uio.uio_extflg = UIO_COPY_CACHED;
840 uio.uio_loffset = 0;
841
842 if ((error = VOP_ACCESS(dvp, VREAD, 0, cr, NULL)) != 0)
843 return (error);
844
845 dp = NULL;
846 eof = 0;
847
848 while (!eof) {
849 uio.uio_resid = dlen;
850 iov.iov_base = dbuf;
851 iov.iov_len = dlen;
852
853 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
854 error = VOP_READDIR(dvp, &uio, cr, &eof, NULL, 0);
855 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
856
857 dbuflen = dlen - uio.uio_resid;
858
859 if (error || dbuflen == 0)
860 break;
861
862 dp = (dirent64_t *)dbuf;
863 while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
864 /*
865 * Ignore '.' and '..' entries
866 */
867 if (strcmp(dp->d_name, ".") == 0 ||
868 strcmp(dp->d_name, "..") == 0) {
869 dp = (dirent64_t *)((intptr_t)dp +
870 dp->d_reclen);
871 continue;
872 }
873
874 error = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0,
875 vrootp, cr, NULL, NULL, NULL);
876
877 /*
878 * We only want to bail out if there was an error other
879 * than ENOENT. Otherwise, it could be that someone
880 * just removed an entry since the readdir() call, and
881 * the entry we want is further on in the directory.
882 */
883 if (error == 0) {
884 if (vnode_match(tvp, cmpvp, cr)) {
885 VN_RELE(cmpvp);
886 *rdp = dp;
887 return (0);
888 }
889
890 VN_RELE(cmpvp);
891 } else if (error != ENOENT) {
892 return (error);
893 }
894
895 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
896 }
897 }
898
899 /*
900 * Something strange has happened, this directory does not contain the
901 * specified vnode. This should never happen in the normal case, since
902 * we ensured that dvp is the parent of vp. This is possible in some
903 * rare conditions (races and the special .zfs directory).
904 */
905 if (error == 0) {
906 error = VOP_LOOKUP(dvp, ".zfs", &cmpvp, &pnp, 0, vrootp, cr,
907 NULL, NULL, NULL);
908 if (error == 0) {
909 if (vnode_match(tvp, cmpvp, cr)) {
910 (void) strcpy(dp->d_name, ".zfs");
911 dp->d_reclen = strlen(".zfs");
912 dp->d_off = 2;
913 dp->d_ino = 1;
914 *rdp = dp;
915 } else {
916 error = ENOENT;
917 }
918 VN_RELE(cmpvp);
919 }
920 }
921
922 return (error);
923 }
924
925 /*
926 * Given a global path (from rootdir), and a vnode that is the current root,
927 * return the portion of the path that is beneath the current root or NULL on
928 * failure. The path MUST be a resolved path (no '..' entries or symlinks),
929 * otherwise this function will fail.
930 */
931 static char *
localpath(char * path,struct vnode * vrootp,cred_t * cr)932 localpath(char *path, struct vnode *vrootp, cred_t *cr)
933 {
934 vnode_t *vp;
935 vnode_t *cvp;
936 char component[MAXNAMELEN];
937 char *ret = NULL;
938 pathname_t pn;
939
940 /*
941 * We use vn_compare() instead of VN_CMP() in order to detect lofs
942 * mounts and stacked vnodes.
943 */
944 if (vn_compare(vrootp, rootdir))
945 return (path);
946
947 if (pn_get(path, UIO_SYSSPACE, &pn) != 0)
948 return (NULL);
949
950 vp = rootdir;
951 VN_HOLD(vp);
952
953 if (vn_ismntpt(vp) && traverse(&vp) != 0) {
954 VN_RELE(vp);
955 pn_free(&pn);
956 return (NULL);
957 }
958
959 while (pn_pathleft(&pn)) {
960 pn_skipslash(&pn);
961
962 if (pn_getcomponent(&pn, component) != 0)
963 break;
964
965 if (VOP_LOOKUP(vp, component, &cvp, &pn, 0, rootdir, cr,
966 NULL, NULL, NULL) != 0)
967 break;
968 VN_RELE(vp);
969 vp = cvp;
970
971 if (vn_ismntpt(vp) && traverse(&vp) != 0)
972 break;
973
974 if (vn_compare(vp, vrootp)) {
975 ret = path + (pn.pn_path - pn.pn_buf);
976 break;
977 }
978 }
979
980 VN_RELE(vp);
981 pn_free(&pn);
982
983 return (ret);
984 }
985
986 /*
987 * Clean a stale v_path from a vnode. This is only performed if the v_path has
988 * not been altered since it was found to be stale
989 */
990 static void
vnode_clear_vpath(vnode_t * vp,char * vpath_old)991 vnode_clear_vpath(vnode_t *vp, char *vpath_old)
992 {
993 mutex_enter(&vp->v_lock);
994 if (vp->v_path != vn_vpath_empty && vp->v_path == vpath_old) {
995 vp->v_path = vn_vpath_empty;
996 mutex_exit(&vp->v_lock);
997 kmem_free(vpath_old, strlen(vpath_old) + 1);
998 } else {
999 mutex_exit(&vp->v_lock);
1000 }
1001 }
1002
1003 /*
1004 * Validate that a pathname refers to a given vnode.
1005 */
1006 static int
vnode_valid_pn(vnode_t * vp,vnode_t * vrootp,pathname_t * pn,pathname_t * rpn,int flags,cred_t * cr)1007 vnode_valid_pn(vnode_t *vp, vnode_t *vrootp, pathname_t *pn, pathname_t *rpn,
1008 int flags, cred_t *cr)
1009 {
1010 vnode_t *compvp;
1011 /*
1012 * If we are in a zone or a chroot environment, then we have to
1013 * take additional steps, since the path to the root might not
1014 * be readable with the current credentials, even though the
1015 * process can legitmately access the file. In this case, we
1016 * do the following:
1017 *
1018 * lookuppnvp() with all privileges to get the resolved path.
1019 * call localpath() to get the local portion of the path, and
1020 * continue as normal.
1021 *
1022 * If the the conversion to a local path fails, then we continue
1023 * as normal. This is a heuristic to make process object file
1024 * paths available from within a zone. Because lofs doesn't
1025 * support page operations, the vnode stored in the seg_t is
1026 * actually the underlying real vnode, not the lofs node itself.
1027 * Most of the time, the lofs path is the same as the underlying
1028 * vnode (for example, /usr/lib/libc.so.1).
1029 */
1030 if (vrootp != rootdir) {
1031 char *local = NULL;
1032
1033 VN_HOLD(rootdir);
1034 if (lookuppnvp(pn, rpn, FOLLOW, NULL, &compvp, rootdir,
1035 rootdir, kcred) == 0) {
1036 local = localpath(rpn->pn_path, vrootp, kcred);
1037 VN_RELE(compvp);
1038 }
1039
1040 /*
1041 * The original pn was changed through lookuppnvp().
1042 * Set it to local for next validation attempt.
1043 */
1044 if (local) {
1045 (void) pn_set(pn, local);
1046 } else {
1047 return (1);
1048 }
1049 }
1050
1051 /*
1052 * We should have a local path at this point, so start the search from
1053 * the root of the current process.
1054 */
1055 VN_HOLD(vrootp);
1056 if (vrootp != rootdir)
1057 VN_HOLD(vrootp);
1058
1059 /*
1060 * The FOLLOW flag only determines, if the final path component
1061 * is a symlink, whether lookuppnvp will return the symlink, or its
1062 * target.
1063 *
1064 * If the vp is a VLNK, then passing the FOLLOW flag will cause
1065 * lookuppnvp to return the vnode of its target, instead of itself, and
1066 * so vn_compare will fail. Therefore, we do not pass FOLLOW when our vp
1067 * is a symlink.
1068 *
1069 * If the vp is not a VLNK, then we pass FOLLOW on the off-chance that
1070 * the stored v_path ends at a symlink, instead of the symlink's target.
1071 */
1072 if (vp->v_type != VLNK)
1073 flags |= FOLLOW;
1074 else
1075 flags &= ~FOLLOW;
1076
1077 if (lookuppnvp(pn, rpn, flags, NULL, &compvp, vrootp, vrootp,
1078 cr) == 0) {
1079 /*
1080 * Check to see if the returned vnode is the same as the one we
1081 * expect.
1082 */
1083 if (vn_compare(vp, compvp) ||
1084 vnode_match(vp, compvp, cr)) {
1085 VN_RELE(compvp);
1086 return (0);
1087 } else {
1088 VN_RELE(compvp);
1089 }
1090 }
1091
1092 return (1);
1093 }
1094
1095 /*
1096 * Struct for tracking vnodes with invalidated v_path entries during a
1097 * dirtopath reverse lookup. By keeping adequate state, those vnodes can be
1098 * revisted to populate v_path.
1099 */
1100 struct dirpath_walk {
1101 struct dirpath_walk *dw_next;
1102 vnode_t *dw_vnode;
1103 vnode_t *dw_pvnode;
1104 size_t dw_len;
1105 char *dw_name;
1106 };
1107
1108 /*
1109 * Given a directory, return the full, resolved path. This looks up "..",
1110 * searches for the given vnode in the parent, appends the component, etc. It
1111 * is used to implement vnodetopath() and getcwd() when the cached path fails.
1112 */
1113 static int
dirtopath(vnode_t * vrootp,vnode_t * vp,char * buf,size_t buflen,int flags,cred_t * cr)1114 dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
1115 cred_t *cr)
1116 {
1117 pathname_t pn, rpn, emptypn;
1118 vnode_t *pvp = NULL, *startvp = vp;
1119 int err = 0;
1120 size_t complen;
1121 dirent64_t *dp;
1122 char *bufloc, *dbuf;
1123 const size_t dlen = DIRENT64_RECLEN(MAXPATHLEN);
1124 struct dirpath_walk *dw_chain = NULL, *dw_entry;
1125
1126 /* Operation only allowed on directories */
1127 ASSERT(vp->v_type == VDIR);
1128
1129 /* We must have at least enough space for "/" */
1130 if (buflen < 2)
1131 return (ENAMETOOLONG);
1132
1133 /* Start at end of string with terminating null */
1134 bufloc = &buf[buflen - 1];
1135 *bufloc = '\0';
1136
1137 pn_alloc(&pn);
1138 pn_alloc(&rpn);
1139 dbuf = kmem_alloc(dlen, KM_SLEEP);
1140 bzero(&emptypn, sizeof (emptypn));
1141
1142 /*
1143 * Begin with an additional reference on vp. This will be decremented
1144 * during the loop.
1145 */
1146 VN_HOLD(vp);
1147
1148 for (;;) {
1149 int vprivs;
1150 hrtime_t cached_stamp;
1151
1152 /*
1153 * Return if we've reached the root. If the buffer is empty,
1154 * return '/'. We explicitly don't use vn_compare(), since it
1155 * compares the real vnodes. A lofs mount of '/' would produce
1156 * incorrect results otherwise.
1157 */
1158 if (VN_CMP(vrootp, vp)) {
1159 if (*bufloc == '\0')
1160 *--bufloc = '/';
1161 break;
1162 }
1163
1164 /*
1165 * If we've reached the VFS root, something has gone wrong. We
1166 * should have reached the root in the above check. The only
1167 * explantation is that 'vp' is not contained withing the given
1168 * root, in which case we return EPERM.
1169 */
1170 if (VN_CMP(rootdir, vp)) {
1171 err = EPERM;
1172 goto out;
1173 }
1174
1175 /*
1176 * Shortcut: see if this vnode has correct v_path. If so,
1177 * we have the work done.
1178 */
1179 mutex_enter(&vp->v_lock);
1180 if (vp->v_path != vn_vpath_empty &&
1181 pn_set(&pn, vp->v_path) == 0) {
1182 cached_stamp = vp->v_path_stamp;
1183 mutex_exit(&vp->v_lock);
1184 rpn.pn_path = rpn.pn_buf;
1185
1186 /* Ensure the v_path pointing to correct vnode */
1187 if (vnode_valid_pn(vp, vrootp, &pn, &rpn, flags,
1188 cr) == 0) {
1189 complen = strlen(rpn.pn_path);
1190 bufloc -= complen;
1191 if (bufloc < buf) {
1192 err = ERANGE;
1193 goto out;
1194 }
1195 bcopy(rpn.pn_path, bufloc, complen);
1196 break;
1197 } else {
1198 /*
1199 * Immediately nuke cached v_path entries known
1200 * to be invalid.
1201 */
1202 vn_clearpath(vp, cached_stamp);
1203 }
1204 } else {
1205 mutex_exit(&vp->v_lock);
1206 }
1207
1208 /*
1209 * Shortcuts failed, search for this vnode in its parent. If
1210 * this is a mountpoint, then get the vnode underneath.
1211 */
1212 if (vp->v_flag & VROOT)
1213 vp = vn_under(vp);
1214 if ((err = VOP_LOOKUP(vp, "..", &pvp, &emptypn, 0, vrootp, cr,
1215 NULL, NULL, NULL)) != 0)
1216 goto out;
1217
1218 /*
1219 * With extended attributes, it's possible for a directory to
1220 * have a parent that is a regular file. Check for that here.
1221 */
1222 if (pvp->v_type != VDIR) {
1223 err = ENOTDIR;
1224 goto out;
1225 }
1226
1227 /*
1228 * If this is true, something strange has happened. This is
1229 * only true if we are the root of a filesystem, which should
1230 * have been caught by the check above.
1231 */
1232 if (VN_CMP(pvp, vp)) {
1233 err = ENOENT;
1234 goto out;
1235 }
1236
1237 /*
1238 * Check if we have read and search privilege so, that
1239 * we can lookup the path in the directory
1240 */
1241 vprivs = (flags & LOOKUP_CHECKREAD) ? VREAD | VEXEC : VEXEC;
1242 if ((err = VOP_ACCESS(pvp, vprivs, 0, cr, NULL)) != 0) {
1243 goto out;
1244 }
1245
1246 /*
1247 * Search the parent directory for the entry corresponding to
1248 * this vnode.
1249 */
1250 if ((err = dirfindvp(vrootp, pvp, vp, cr, dbuf, dlen, &dp))
1251 != 0)
1252 goto out;
1253 complen = strlen(dp->d_name);
1254 bufloc -= complen;
1255 if (bufloc <= buf) {
1256 err = ENAMETOOLONG;
1257 goto out;
1258 }
1259 bcopy(dp->d_name, bufloc, complen);
1260
1261 /* Prepend a slash to the current path. */
1262 *--bufloc = '/';
1263
1264 /*
1265 * Record the name and directory for later reconstruction and
1266 * link it up with the others.
1267 */
1268 dw_entry = kmem_alloc(sizeof (*dw_entry), KM_SLEEP);
1269 dw_entry->dw_name = kmem_alloc(complen + 1, KM_SLEEP);
1270 VN_HOLD(dw_entry->dw_vnode = vp);
1271 VN_HOLD(dw_entry->dw_pvnode = pvp);
1272 bcopy(dp->d_name, dw_entry->dw_name, complen + 1);
1273 dw_entry->dw_len = complen;
1274 dw_entry->dw_next = dw_chain;
1275 dw_chain = dw_entry;
1276
1277 /* And continue with the next component */
1278 VN_RELE(vp);
1279 vp = pvp;
1280 pvp = NULL;
1281 }
1282
1283 /*
1284 * Place the path at the beginning of the buffer.
1285 */
1286 if (bufloc != buf)
1287 ovbcopy(bufloc, buf, buflen - (bufloc - buf));
1288
1289 out:
1290 /*
1291 * Walk over encountered directory entries which were afflicted with a
1292 * stale or absent v_path. If the dirtopath was successful, we should
1293 * possess the necessary information to populate all of them with a
1294 * valid v_path.
1295 *
1296 * While processing this list, it is safe to call vn_setpath despite
1297 * the fact that racing vnode actions may have altered v_path entries
1298 * while the above loopwas still executing. Any updated entries will
1299 * have a newer v_path_stamp value which prevents an invalid overwrite.
1300 *
1301 * If an error was encountered during the search, freeing the chain is
1302 * still required.
1303 */
1304 dw_entry = dw_chain;
1305 while (dw_entry != NULL) {
1306 struct dirpath_walk *next = dw_entry->dw_next;
1307
1308 if (err == 0) {
1309 vn_setpath(NULL, dw_entry->dw_pvnode,
1310 dw_entry->dw_vnode, dw_entry->dw_name,
1311 dw_entry->dw_len);
1312 }
1313
1314 VN_RELE(dw_entry->dw_vnode);
1315 VN_RELE(dw_entry->dw_pvnode);
1316 kmem_free(dw_entry->dw_name, dw_entry->dw_len + 1);
1317 kmem_free(dw_entry, sizeof (*dw_entry));
1318 dw_entry = next;
1319 }
1320
1321 /*
1322 * If the error was ESTALE and the current directory to look in
1323 * was the root for this lookup, the root for a mounted file
1324 * system, or the starting directory for lookups, then
1325 * return ENOENT instead of ESTALE. In this case, no recovery
1326 * is possible by the higher level. If ESTALE was returned for
1327 * some intermediate directory along the path, then recovery
1328 * is potentially possible and retrying from the higher level
1329 * will either correct the situation by purging stale cache
1330 * entries or eventually get back to the point where no recovery
1331 * is possible.
1332 */
1333 if (err == ESTALE &&
1334 (VN_CMP(vp, vrootp) || (vp->v_flag & VROOT) || vp == startvp))
1335 err = ENOENT;
1336
1337 kmem_free(dbuf, dlen);
1338 VN_RELE(vp);
1339 if (pvp)
1340 VN_RELE(pvp);
1341 pn_free(&pn);
1342 pn_free(&rpn);
1343
1344 return (err);
1345 }
1346
1347 /*
1348 * The additional flag, LOOKUP_CHECKREAD, is used to enforce artificial
1349 * constraints in order to be standards compliant. For example, if we have
1350 * the cached path of '/foo/bar', and '/foo' has permissions 100 (execute
1351 * only), then we can legitimately look up the path to the current working
1352 * directory without needing read permission. Existing standards tests,
1353 * however, assume that we are determining the path by repeatedly looking up
1354 * "..". We need to keep this behavior in order to maintain backwards
1355 * compatibility.
1356 */
1357 static int
vnodetopath_common(vnode_t * vrootp,vnode_t * vp,char * buf,size_t buflen,cred_t * cr,int flags)1358 vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen,
1359 cred_t *cr, int flags)
1360 {
1361 pathname_t pn;
1362 int ret = 0;
1363 vnode_t *realvp;
1364 boolean_t doclose = B_FALSE;
1365
1366 /*
1367 * If vrootp is NULL, get the root for curproc. Callers with any other
1368 * requirements should pass in a different vrootp.
1369 */
1370 if (vrootp == NULL) {
1371 proc_t *p = curproc;
1372
1373 mutex_enter(&p->p_lock);
1374 if ((vrootp = PTOU(p)->u_rdir) == NULL)
1375 vrootp = rootdir;
1376 VN_HOLD(vrootp);
1377 mutex_exit(&p->p_lock);
1378 } else {
1379 VN_HOLD(vrootp);
1380 }
1381
1382 /*
1383 * This is to get around an annoying artifact of the /proc filesystem,
1384 * which is the behavior of {cwd/root}. Trying to resolve this path
1385 * will result in /proc/pid/cwd instead of whatever the real working
1386 * directory is. We can't rely on VOP_REALVP(), since that will break
1387 * lofs. The only difference between procfs and lofs is that opening
1388 * the file will return the underling vnode in the case of procfs.
1389 */
1390 if (vp->v_type == VDIR && VOP_REALVP(vp, &realvp, NULL) == 0 &&
1391 realvp != vp) {
1392 VN_HOLD(vp);
1393 if (VOP_OPEN(&vp, FREAD, cr, NULL) == 0)
1394 doclose = B_TRUE;
1395 else
1396 VN_RELE(vp);
1397 }
1398
1399 /*
1400 * Check to see if we have a valid cached path in the vnode.
1401 */
1402 pn_alloc(&pn);
1403 mutex_enter(&vp->v_lock);
1404 if (vp->v_path != vn_vpath_empty) {
1405 hrtime_t cached_stamp;
1406 pathname_t rpn;
1407
1408 cached_stamp = vp->v_path_stamp;
1409 (void) pn_set(&pn, vp->v_path);
1410 mutex_exit(&vp->v_lock);
1411
1412 /* We should only cache absolute paths */
1413 ASSERT(pn.pn_buf[0] == '/');
1414
1415 pn_alloc(&rpn);
1416 if (vnode_valid_pn(vp, vrootp, &pn, &rpn, flags, cr) == 0) {
1417 /* Return the result, if we're able. */
1418 if (buflen > rpn.pn_pathlen) {
1419 bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1);
1420 } else {
1421 ret = ENAMETOOLONG;
1422 }
1423 pn_free(&pn);
1424 pn_free(&rpn);
1425 goto out;
1426 }
1427 pn_free(&rpn);
1428 vn_clearpath(vp, cached_stamp);
1429 } else {
1430 mutex_exit(&vp->v_lock);
1431 }
1432 pn_free(&pn);
1433
1434 if (vp->v_type != VDIR) {
1435 /*
1436 * The reverse lookup tricks used by dirtopath aren't possible
1437 * for non-directory entries. The best which can be done is
1438 * clearing any stale v_path so later lookups can potentially
1439 * repopulate it with a valid path.
1440 */
1441 ret = ENOENT;
1442 } else {
1443 ret = dirtopath(vrootp, vp, buf, buflen, flags, cr);
1444 }
1445
1446 out:
1447 VN_RELE(vrootp);
1448 if (doclose) {
1449 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr, NULL);
1450 VN_RELE(vp);
1451 }
1452
1453 return (ret);
1454 }
1455
1456 int
vnodetopath(vnode_t * vrootp,vnode_t * vp,char * buf,size_t buflen,cred_t * cr)1457 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
1458 {
1459 return (vnodetopath_common(vrootp, vp, buf, buflen, cr, 0));
1460 }
1461
1462 int
dogetcwd(char * buf,size_t buflen)1463 dogetcwd(char *buf, size_t buflen)
1464 {
1465 int ret;
1466 vnode_t *vp;
1467 vnode_t *compvp;
1468 refstr_t *cwd, *oldcwd;
1469 const char *value;
1470 pathname_t rpnp, pnp;
1471 proc_t *p = curproc;
1472
1473 /*
1474 * Check to see if there is a cached version of the cwd. If so, lookup
1475 * the cached value and make sure it is the same vnode.
1476 */
1477 mutex_enter(&p->p_lock);
1478 if ((cwd = PTOU(p)->u_cwd) != NULL)
1479 refstr_hold(cwd);
1480 vp = PTOU(p)->u_cdir;
1481 VN_HOLD(vp);
1482 mutex_exit(&p->p_lock);
1483
1484 /*
1485 * Make sure we have permission to access the current directory.
1486 */
1487 if ((ret = VOP_ACCESS(vp, VEXEC, 0, CRED(), NULL)) != 0) {
1488 if (cwd != NULL)
1489 refstr_rele(cwd);
1490 VN_RELE(vp);
1491 return (ret);
1492 }
1493
1494 if (cwd) {
1495 value = refstr_value(cwd);
1496 if ((ret = pn_get((char *)value, UIO_SYSSPACE, &pnp)) != 0) {
1497 refstr_rele(cwd);
1498 VN_RELE(vp);
1499 return (ret);
1500 }
1501
1502 pn_alloc(&rpnp);
1503
1504 if (lookuppn(&pnp, &rpnp, NO_FOLLOW, NULL, &compvp) == 0) {
1505
1506 if (VN_CMP(vp, compvp) &&
1507 strcmp(value, rpnp.pn_path) == 0) {
1508 VN_RELE(compvp);
1509 VN_RELE(vp);
1510 pn_free(&pnp);
1511 pn_free(&rpnp);
1512 if (strlen(value) + 1 > buflen) {
1513 refstr_rele(cwd);
1514 return (ENAMETOOLONG);
1515 }
1516 bcopy(value, buf, strlen(value) + 1);
1517 refstr_rele(cwd);
1518 return (0);
1519 }
1520
1521 VN_RELE(compvp);
1522 }
1523
1524 pn_free(&rpnp);
1525 pn_free(&pnp);
1526
1527 refstr_rele(cwd);
1528 }
1529
1530 ret = vnodetopath_common(NULL, vp, buf, buflen, CRED(),
1531 LOOKUP_CHECKREAD);
1532
1533 VN_RELE(vp);
1534
1535 /*
1536 * Store the new cwd and replace the existing cached copy.
1537 */
1538 if (ret == 0)
1539 cwd = refstr_alloc(buf);
1540 else
1541 cwd = NULL;
1542
1543 mutex_enter(&p->p_lock);
1544 oldcwd = PTOU(p)->u_cwd;
1545 PTOU(p)->u_cwd = cwd;
1546 mutex_exit(&p->p_lock);
1547
1548 if (oldcwd)
1549 refstr_rele(oldcwd);
1550
1551 return (ret);
1552 }
1553