xref: /freebsd/sys/kern/vfs_syscalls.c (revision 4f29da19bd44f0e99f021510460a81bf754c21d2)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
91     size_t nbytes, struct thread *td);
92 
93 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108 	int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int vfslocked;
125 	int asyncflag;
126 
127 	mtx_lock(&mountlist_mtx);
128 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
129 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
130 			nmp = TAILQ_NEXT(mp, mnt_list);
131 			continue;
132 		}
133 		vfslocked = VFS_LOCK_GIANT(mp);
134 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
135 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
136 			asyncflag = mp->mnt_flag & MNT_ASYNC;
137 			mp->mnt_flag &= ~MNT_ASYNC;
138 			vfs_msync(mp, MNT_NOWAIT);
139 			VFS_SYNC(mp, MNT_NOWAIT, td);
140 			mp->mnt_flag |= asyncflag;
141 			vn_finished_write(mp);
142 		}
143 		VFS_UNLOCK_GIANT(vfslocked);
144 		mtx_lock(&mountlist_mtx);
145 		nmp = TAILQ_NEXT(mp, mnt_list);
146 		vfs_unbusy(mp, td);
147 	}
148 	mtx_unlock(&mountlist_mtx);
149 	return (0);
150 }
151 
152 /* XXX PRISON: could be per prison flag */
153 static int prison_quotas;
154 #if 0
155 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
156 #endif
157 
158 /*
159  * Change filesystem quotas.
160  *
161  * MP SAFE
162  */
163 #ifndef _SYS_SYSPROTO_H_
164 struct quotactl_args {
165 	char *path;
166 	int cmd;
167 	int uid;
168 	caddr_t arg;
169 };
170 #endif
171 int
172 quotactl(td, uap)
173 	struct thread *td;
174 	register struct quotactl_args /* {
175 		char *path;
176 		int cmd;
177 		int uid;
178 		caddr_t arg;
179 	} */ *uap;
180 {
181 	struct mount *mp, *vmp;
182 	int vfslocked;
183 	int error;
184 	struct nameidata nd;
185 
186 	if (jailed(td->td_ucred) && !prison_quotas)
187 		return (EPERM);
188 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
189 	   UIO_USERSPACE, uap->path, td);
190 	if ((error = namei(&nd)) != 0)
191 		return (error);
192 	vfslocked = NDHASGIANT(&nd);
193 	NDFREE(&nd, NDF_ONLY_PNBUF);
194 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
195 	mp = nd.ni_vp->v_mount;
196 	vrele(nd.ni_vp);
197 	if (error)
198 		goto out;
199 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
200 	vn_finished_write(vmp);
201 out:
202 	VFS_UNLOCK_GIANT(vfslocked);
203 	return (error);
204 }
205 
206 /*
207  * Get filesystem statistics.
208  */
209 #ifndef _SYS_SYSPROTO_H_
210 struct statfs_args {
211 	char *path;
212 	struct statfs *buf;
213 };
214 #endif
215 int
216 statfs(td, uap)
217 	struct thread *td;
218 	register struct statfs_args /* {
219 		char *path;
220 		struct statfs *buf;
221 	} */ *uap;
222 {
223 	struct statfs sf;
224 	int error;
225 
226 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
227 	if (error == 0)
228 		error = copyout(&sf, uap->buf, sizeof(sf));
229 	return (error);
230 }
231 
232 int
233 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
234     struct statfs *buf)
235 {
236 	struct mount *mp;
237 	struct statfs *sp, sb;
238 	int vfslocked;
239 	int error;
240 	struct nameidata nd;
241 
242 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
243 	    pathseg, path, td);
244 	error = namei(&nd);
245 	if (error)
246 		return (error);
247 	vfslocked = NDHASGIANT(&nd);
248 	mp = nd.ni_vp->v_mount;
249 	vfs_ref(mp);
250 	NDFREE(&nd, NDF_ONLY_PNBUF);
251 	vput(nd.ni_vp);
252 #ifdef MAC
253 	error = mac_check_mount_stat(td->td_ucred, mp);
254 	if (error) {
255 		vfs_rel(mp);
256 		goto out;
257 	}
258 #endif
259 	/*
260 	 * Set these in case the underlying filesystem fails to do so.
261 	 */
262 	sp = &mp->mnt_stat;
263 	sp->f_version = STATFS_VERSION;
264 	sp->f_namemax = NAME_MAX;
265 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
266 	error = VFS_STATFS(mp, sp, td);
267 	vfs_rel(mp);
268 	if (error)
269 		goto out;
270 	if (suser(td)) {
271 		bcopy(sp, &sb, sizeof(sb));
272 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
273 		prison_enforce_statfs(td->td_ucred, mp, &sb);
274 		sp = &sb;
275 	}
276 	*buf = *sp;
277 out:
278 	VFS_UNLOCK_GIANT(vfslocked);
279 	if (mtx_owned(&Giant))
280 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
281 	return (error);
282 }
283 
284 /*
285  * Get filesystem statistics.
286  */
287 #ifndef _SYS_SYSPROTO_H_
288 struct fstatfs_args {
289 	int fd;
290 	struct statfs *buf;
291 };
292 #endif
293 int
294 fstatfs(td, uap)
295 	struct thread *td;
296 	register struct fstatfs_args /* {
297 		int fd;
298 		struct statfs *buf;
299 	} */ *uap;
300 {
301 	struct statfs sf;
302 	int error;
303 
304 	error = kern_fstatfs(td, uap->fd, &sf);
305 	if (error == 0)
306 		error = copyout(&sf, uap->buf, sizeof(sf));
307 	return (error);
308 }
309 
310 int
311 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
312 {
313 	struct file *fp;
314 	struct mount *mp;
315 	struct statfs *sp, sb;
316 	int vfslocked;
317 	struct vnode *vp;
318 	int error;
319 
320 	AUDIT_ARG(fd, fd);
321 	error = getvnode(td->td_proc->p_fd, fd, &fp);
322 	if (error)
323 		return (error);
324 	vp = fp->f_vnode;
325 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
326 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
327 #ifdef AUDIT
328 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
329 #endif
330 	mp = vp->v_mount;
331 	if (mp)
332 		vfs_ref(mp);
333 	VOP_UNLOCK(vp, 0, td);
334 	fdrop(fp, td);
335 	if (vp->v_iflag & VI_DOOMED) {
336 		if (mp)
337 			vfs_rel(mp);
338 		error = EBADF;
339 		goto out;
340 	}
341 #ifdef MAC
342 	error = mac_check_mount_stat(td->td_ucred, mp);
343 	if (error) {
344 		vfs_rel(mp);
345 		goto out;
346 	}
347 #endif
348 	/*
349 	 * Set these in case the underlying filesystem fails to do so.
350 	 */
351 	sp = &mp->mnt_stat;
352 	sp->f_version = STATFS_VERSION;
353 	sp->f_namemax = NAME_MAX;
354 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
355 	error = VFS_STATFS(mp, sp, td);
356 	vfs_rel(mp);
357 	if (error)
358 		goto out;
359 	if (suser(td)) {
360 		bcopy(sp, &sb, sizeof(sb));
361 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
362 		prison_enforce_statfs(td->td_ucred, mp, &sb);
363 		sp = &sb;
364 	}
365 	*buf = *sp;
366 out:
367 	VFS_UNLOCK_GIANT(vfslocked);
368 	return (error);
369 }
370 
371 /*
372  * Get statistics on all filesystems.
373  */
374 #ifndef _SYS_SYSPROTO_H_
375 struct getfsstat_args {
376 	struct statfs *buf;
377 	long bufsize;
378 	int flags;
379 };
380 #endif
381 int
382 getfsstat(td, uap)
383 	struct thread *td;
384 	register struct getfsstat_args /* {
385 		struct statfs *buf;
386 		long bufsize;
387 		int flags;
388 	} */ *uap;
389 {
390 
391 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
392 	    uap->flags));
393 }
394 
395 /*
396  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
397  * 	The caller is responsible for freeing memory which will be allocated
398  *	in '*buf'.
399  */
400 int
401 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
402     enum uio_seg bufseg, int flags)
403 {
404 	struct mount *mp, *nmp;
405 	struct statfs *sfsp, *sp, sb;
406 	size_t count, maxcount;
407 	int vfslocked;
408 	int error;
409 
410 	maxcount = bufsize / sizeof(struct statfs);
411 	if (bufsize == 0)
412 		sfsp = NULL;
413 	else if (bufseg == UIO_USERSPACE)
414 		sfsp = *buf;
415 	else /* if (bufseg == UIO_SYSSPACE) */ {
416 		count = 0;
417 		mtx_lock(&mountlist_mtx);
418 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
419 			count++;
420 		}
421 		mtx_unlock(&mountlist_mtx);
422 		if (maxcount > count)
423 			maxcount = count;
424 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
425 		    M_WAITOK);
426 	}
427 	count = 0;
428 	mtx_lock(&mountlist_mtx);
429 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
430 		if (prison_canseemount(td->td_ucred, mp) != 0) {
431 			nmp = TAILQ_NEXT(mp, mnt_list);
432 			continue;
433 		}
434 #ifdef MAC
435 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
436 			nmp = TAILQ_NEXT(mp, mnt_list);
437 			continue;
438 		}
439 #endif
440 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
441 			nmp = TAILQ_NEXT(mp, mnt_list);
442 			continue;
443 		}
444 		vfslocked = VFS_LOCK_GIANT(mp);
445 		if (sfsp && count < maxcount) {
446 			sp = &mp->mnt_stat;
447 			/*
448 			 * Set these in case the underlying filesystem
449 			 * fails to do so.
450 			 */
451 			sp->f_version = STATFS_VERSION;
452 			sp->f_namemax = NAME_MAX;
453 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
454 			/*
455 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
456 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
457 			 * overrides MNT_WAIT.
458 			 */
459 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
460 			    (flags & MNT_WAIT)) &&
461 			    (error = VFS_STATFS(mp, sp, td))) {
462 				VFS_UNLOCK_GIANT(vfslocked);
463 				mtx_lock(&mountlist_mtx);
464 				nmp = TAILQ_NEXT(mp, mnt_list);
465 				vfs_unbusy(mp, td);
466 				continue;
467 			}
468 			if (suser(td)) {
469 				bcopy(sp, &sb, sizeof(sb));
470 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
471 				prison_enforce_statfs(td->td_ucred, mp, &sb);
472 				sp = &sb;
473 			}
474 			if (bufseg == UIO_SYSSPACE)
475 				bcopy(sp, sfsp, sizeof(*sp));
476 			else /* if (bufseg == UIO_USERSPACE) */ {
477 				error = copyout(sp, sfsp, sizeof(*sp));
478 				if (error) {
479 					vfs_unbusy(mp, td);
480 					VFS_UNLOCK_GIANT(vfslocked);
481 					return (error);
482 				}
483 			}
484 			sfsp++;
485 		}
486 		VFS_UNLOCK_GIANT(vfslocked);
487 		count++;
488 		mtx_lock(&mountlist_mtx);
489 		nmp = TAILQ_NEXT(mp, mnt_list);
490 		vfs_unbusy(mp, td);
491 	}
492 	mtx_unlock(&mountlist_mtx);
493 	if (sfsp && count > maxcount)
494 		td->td_retval[0] = maxcount;
495 	else
496 		td->td_retval[0] = count;
497 	return (0);
498 }
499 
500 #ifdef COMPAT_FREEBSD4
501 /*
502  * Get old format filesystem statistics.
503  */
504 static void cvtstatfs(struct statfs *, struct ostatfs *);
505 
506 #ifndef _SYS_SYSPROTO_H_
507 struct freebsd4_statfs_args {
508 	char *path;
509 	struct ostatfs *buf;
510 };
511 #endif
512 int
513 freebsd4_statfs(td, uap)
514 	struct thread *td;
515 	struct freebsd4_statfs_args /* {
516 		char *path;
517 		struct ostatfs *buf;
518 	} */ *uap;
519 {
520 	struct ostatfs osb;
521 	struct statfs sf;
522 	int error;
523 
524 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
525 	if (error)
526 		return (error);
527 	cvtstatfs(&sf, &osb);
528 	return (copyout(&osb, uap->buf, sizeof(osb)));
529 }
530 
531 /*
532  * Get filesystem statistics.
533  */
534 #ifndef _SYS_SYSPROTO_H_
535 struct freebsd4_fstatfs_args {
536 	int fd;
537 	struct ostatfs *buf;
538 };
539 #endif
540 int
541 freebsd4_fstatfs(td, uap)
542 	struct thread *td;
543 	struct freebsd4_fstatfs_args /* {
544 		int fd;
545 		struct ostatfs *buf;
546 	} */ *uap;
547 {
548 	struct ostatfs osb;
549 	struct statfs sf;
550 	int error;
551 
552 	error = kern_fstatfs(td, uap->fd, &sf);
553 	if (error)
554 		return (error);
555 	cvtstatfs(&sf, &osb);
556 	return (copyout(&osb, uap->buf, sizeof(osb)));
557 }
558 
559 /*
560  * Get statistics on all filesystems.
561  */
562 #ifndef _SYS_SYSPROTO_H_
563 struct freebsd4_getfsstat_args {
564 	struct ostatfs *buf;
565 	long bufsize;
566 	int flags;
567 };
568 #endif
569 int
570 freebsd4_getfsstat(td, uap)
571 	struct thread *td;
572 	register struct freebsd4_getfsstat_args /* {
573 		struct ostatfs *buf;
574 		long bufsize;
575 		int flags;
576 	} */ *uap;
577 {
578 	struct statfs *buf, *sp;
579 	struct ostatfs osb;
580 	size_t count, size;
581 	int error;
582 
583 	count = uap->bufsize / sizeof(struct ostatfs);
584 	size = count * sizeof(struct statfs);
585 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
586 	if (size > 0) {
587 		count = td->td_retval[0];
588 		sp = buf;
589 		while (count > 0 && error == 0) {
590 			cvtstatfs(sp, &osb);
591 			error = copyout(&osb, uap->buf, sizeof(osb));
592 			sp++;
593 			uap->buf++;
594 			count--;
595 		}
596 		free(buf, M_TEMP);
597 	}
598 	return (error);
599 }
600 
601 /*
602  * Implement fstatfs() for (NFS) file handles.
603  */
604 #ifndef _SYS_SYSPROTO_H_
605 struct freebsd4_fhstatfs_args {
606 	struct fhandle *u_fhp;
607 	struct ostatfs *buf;
608 };
609 #endif
610 int
611 freebsd4_fhstatfs(td, uap)
612 	struct thread *td;
613 	struct freebsd4_fhstatfs_args /* {
614 		struct fhandle *u_fhp;
615 		struct ostatfs *buf;
616 	} */ *uap;
617 {
618 	struct ostatfs osb;
619 	struct statfs sf;
620 	fhandle_t fh;
621 	int error;
622 
623 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
624 	if (error)
625 		return (error);
626 	error = kern_fhstatfs(td, fh, &sf);
627 	if (error)
628 		return (error);
629 	cvtstatfs(&sf, &osb);
630 	return (copyout(&osb, uap->buf, sizeof(osb)));
631 }
632 
633 /*
634  * Convert a new format statfs structure to an old format statfs structure.
635  */
636 static void
637 cvtstatfs(nsp, osp)
638 	struct statfs *nsp;
639 	struct ostatfs *osp;
640 {
641 
642 	bzero(osp, sizeof(*osp));
643 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
644 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
645 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
646 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
647 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
648 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
649 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
650 	osp->f_owner = nsp->f_owner;
651 	osp->f_type = nsp->f_type;
652 	osp->f_flags = nsp->f_flags;
653 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
654 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
655 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
656 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
657 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
658 	    MIN(MFSNAMELEN, OMFSNAMELEN));
659 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
660 	    MIN(MNAMELEN, OMNAMELEN));
661 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
662 	    MIN(MNAMELEN, OMNAMELEN));
663 	osp->f_fsid = nsp->f_fsid;
664 }
665 #endif /* COMPAT_FREEBSD4 */
666 
667 /*
668  * Change current working directory to a given file descriptor.
669  */
670 #ifndef _SYS_SYSPROTO_H_
671 struct fchdir_args {
672 	int	fd;
673 };
674 #endif
675 int
676 fchdir(td, uap)
677 	struct thread *td;
678 	struct fchdir_args /* {
679 		int fd;
680 	} */ *uap;
681 {
682 	register struct filedesc *fdp = td->td_proc->p_fd;
683 	struct vnode *vp, *tdp, *vpold;
684 	struct mount *mp;
685 	struct file *fp;
686 	int vfslocked;
687 	int error;
688 
689 	AUDIT_ARG(fd, uap->fd);
690 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
691 		return (error);
692 	vp = fp->f_vnode;
693 	VREF(vp);
694 	fdrop(fp, td);
695 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
696 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
697 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
698 	if (vp->v_type != VDIR)
699 		error = ENOTDIR;
700 #ifdef MAC
701 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
702 	}
703 #endif
704 	else
705 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
706 	while (!error && (mp = vp->v_mountedhere) != NULL) {
707 		int tvfslocked;
708 		if (vfs_busy(mp, 0, 0, td))
709 			continue;
710 		tvfslocked = VFS_LOCK_GIANT(mp);
711 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
712 		vfs_unbusy(mp, td);
713 		if (error) {
714 			VFS_UNLOCK_GIANT(tvfslocked);
715 			break;
716 		}
717 		vput(vp);
718 		VFS_UNLOCK_GIANT(vfslocked);
719 		vp = tdp;
720 		vfslocked = tvfslocked;
721 	}
722 	if (error) {
723 		vput(vp);
724 		VFS_UNLOCK_GIANT(vfslocked);
725 		return (error);
726 	}
727 	VOP_UNLOCK(vp, 0, td);
728 	VFS_UNLOCK_GIANT(vfslocked);
729 	FILEDESC_LOCK_FAST(fdp);
730 	vpold = fdp->fd_cdir;
731 	fdp->fd_cdir = vp;
732 	FILEDESC_UNLOCK_FAST(fdp);
733 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
734 	vrele(vpold);
735 	VFS_UNLOCK_GIANT(vfslocked);
736 	return (0);
737 }
738 
739 /*
740  * Change current working directory (``.'').
741  */
742 #ifndef _SYS_SYSPROTO_H_
743 struct chdir_args {
744 	char	*path;
745 };
746 #endif
747 int
748 chdir(td, uap)
749 	struct thread *td;
750 	struct chdir_args /* {
751 		char *path;
752 	} */ *uap;
753 {
754 
755 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
756 }
757 
758 int
759 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
760 {
761 	register struct filedesc *fdp = td->td_proc->p_fd;
762 	int error;
763 	struct nameidata nd;
764 	struct vnode *vp;
765 	int vfslocked;
766 
767 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
768 	    pathseg, path, td);
769 	if ((error = namei(&nd)) != 0)
770 		return (error);
771 	vfslocked = NDHASGIANT(&nd);
772 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
773 		vput(nd.ni_vp);
774 		VFS_UNLOCK_GIANT(vfslocked);
775 		NDFREE(&nd, NDF_ONLY_PNBUF);
776 		return (error);
777 	}
778 	VOP_UNLOCK(nd.ni_vp, 0, td);
779 	VFS_UNLOCK_GIANT(vfslocked);
780 	NDFREE(&nd, NDF_ONLY_PNBUF);
781 	FILEDESC_LOCK_FAST(fdp);
782 	vp = fdp->fd_cdir;
783 	fdp->fd_cdir = nd.ni_vp;
784 	FILEDESC_UNLOCK_FAST(fdp);
785 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
786 	vrele(vp);
787 	VFS_UNLOCK_GIANT(vfslocked);
788 	return (0);
789 }
790 
791 /*
792  * Helper function for raised chroot(2) security function:  Refuse if
793  * any filedescriptors are open directories.
794  */
795 static int
796 chroot_refuse_vdir_fds(fdp)
797 	struct filedesc *fdp;
798 {
799 	struct vnode *vp;
800 	struct file *fp;
801 	int fd;
802 
803 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
804 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
805 		fp = fget_locked(fdp, fd);
806 		if (fp == NULL)
807 			continue;
808 		if (fp->f_type == DTYPE_VNODE) {
809 			vp = fp->f_vnode;
810 			if (vp->v_type == VDIR)
811 				return (EPERM);
812 		}
813 	}
814 	return (0);
815 }
816 
817 /*
818  * This sysctl determines if we will allow a process to chroot(2) if it
819  * has a directory open:
820  *	0: disallowed for all processes.
821  *	1: allowed for processes that were not already chroot(2)'ed.
822  *	2: allowed for all processes.
823  */
824 
825 static int chroot_allow_open_directories = 1;
826 
827 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
828      &chroot_allow_open_directories, 0, "");
829 
830 /*
831  * Change notion of root (``/'') directory.
832  */
833 #ifndef _SYS_SYSPROTO_H_
834 struct chroot_args {
835 	char	*path;
836 };
837 #endif
838 int
839 chroot(td, uap)
840 	struct thread *td;
841 	struct chroot_args /* {
842 		char *path;
843 	} */ *uap;
844 {
845 	int error;
846 	struct nameidata nd;
847 	int vfslocked;
848 
849 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
850 	if (error)
851 		return (error);
852 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
853 	    UIO_USERSPACE, uap->path, td);
854 	error = namei(&nd);
855 	if (error)
856 		goto error;
857 	vfslocked = NDHASGIANT(&nd);
858 	if ((error = change_dir(nd.ni_vp, td)) != 0)
859 		goto e_vunlock;
860 #ifdef MAC
861 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
862 		goto e_vunlock;
863 #endif
864 	VOP_UNLOCK(nd.ni_vp, 0, td);
865 	error = change_root(nd.ni_vp, td);
866 	vrele(nd.ni_vp);
867 	VFS_UNLOCK_GIANT(vfslocked);
868 	NDFREE(&nd, NDF_ONLY_PNBUF);
869 	return (error);
870 e_vunlock:
871 	vput(nd.ni_vp);
872 	VFS_UNLOCK_GIANT(vfslocked);
873 error:
874 	NDFREE(&nd, NDF_ONLY_PNBUF);
875 	return (error);
876 }
877 
878 /*
879  * Common routine for chroot and chdir.  Callers must provide a locked vnode
880  * instance.
881  */
882 int
883 change_dir(vp, td)
884 	struct vnode *vp;
885 	struct thread *td;
886 {
887 	int error;
888 
889 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
890 	if (vp->v_type != VDIR)
891 		return (ENOTDIR);
892 #ifdef MAC
893 	error = mac_check_vnode_chdir(td->td_ucred, vp);
894 	if (error)
895 		return (error);
896 #endif
897 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
898 	return (error);
899 }
900 
901 /*
902  * Common routine for kern_chroot() and jail_attach().  The caller is
903  * responsible for invoking suser() and mac_check_chroot() to authorize this
904  * operation.
905  */
906 int
907 change_root(vp, td)
908 	struct vnode *vp;
909 	struct thread *td;
910 {
911 	struct filedesc *fdp;
912 	struct vnode *oldvp;
913 	int vfslocked;
914 	int error;
915 
916 	VFS_ASSERT_GIANT(vp->v_mount);
917 	fdp = td->td_proc->p_fd;
918 	FILEDESC_LOCK(fdp);
919 	if (chroot_allow_open_directories == 0 ||
920 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
921 		error = chroot_refuse_vdir_fds(fdp);
922 		if (error) {
923 			FILEDESC_UNLOCK(fdp);
924 			return (error);
925 		}
926 	}
927 	oldvp = fdp->fd_rdir;
928 	fdp->fd_rdir = vp;
929 	VREF(fdp->fd_rdir);
930 	if (!fdp->fd_jdir) {
931 		fdp->fd_jdir = vp;
932 		VREF(fdp->fd_jdir);
933 	}
934 	FILEDESC_UNLOCK(fdp);
935 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
936 	vrele(oldvp);
937 	VFS_UNLOCK_GIANT(vfslocked);
938 	return (0);
939 }
940 
941 /*
942  * Check permissions, allocate an open file structure,
943  * and call the device open routine if any.
944  *
945  * MP SAFE
946  */
947 #ifndef _SYS_SYSPROTO_H_
948 struct open_args {
949 	char	*path;
950 	int	flags;
951 	int	mode;
952 };
953 #endif
954 int
955 open(td, uap)
956 	struct thread *td;
957 	register struct open_args /* {
958 		char *path;
959 		int flags;
960 		int mode;
961 	} */ *uap;
962 {
963 
964 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
965 }
966 
967 int
968 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
969     int mode)
970 {
971 	struct proc *p = td->td_proc;
972 	struct filedesc *fdp = p->p_fd;
973 	struct file *fp;
974 	struct vnode *vp;
975 	struct vattr vat;
976 	struct mount *mp;
977 	int cmode;
978 	struct file *nfp;
979 	int type, indx, error;
980 	struct flock lf;
981 	struct nameidata nd;
982 	int vfslocked;
983 
984 	AUDIT_ARG(fflags, flags);
985 	AUDIT_ARG(mode, mode);
986 	if ((flags & O_ACCMODE) == O_ACCMODE)
987 		return (EINVAL);
988 	flags = FFLAGS(flags);
989 	error = falloc(td, &nfp, &indx);
990 	if (error)
991 		return (error);
992 	/* An extra reference on `nfp' has been held for us by falloc(). */
993 	fp = nfp;
994 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
995 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
996 	td->td_dupfd = -1;		/* XXX check for fdopen */
997 	error = vn_open(&nd, &flags, cmode, indx);
998 	if (error) {
999 		/*
1000 		 * If the vn_open replaced the method vector, something
1001 		 * wonderous happened deep below and we just pass it up
1002 		 * pretending we know what we do.
1003 		 */
1004 		if (error == ENXIO && fp->f_ops != &badfileops) {
1005 			fdrop(fp, td);
1006 			td->td_retval[0] = indx;
1007 			return (0);
1008 		}
1009 
1010 		/*
1011 		 * release our own reference
1012 		 */
1013 		fdrop(fp, td);
1014 
1015 		/*
1016 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1017 		 * responsible for dropping the old contents of ofiles[indx]
1018 		 * if it succeeds.
1019 		 */
1020 		if ((error == ENODEV || error == ENXIO) &&
1021 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1022 		    (error =
1023 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1024 			td->td_retval[0] = indx;
1025 			return (0);
1026 		}
1027 		/*
1028 		 * Clean up the descriptor, but only if another thread hadn't
1029 		 * replaced or closed it.
1030 		 */
1031 		fdclose(fdp, fp, indx, td);
1032 
1033 		if (error == ERESTART)
1034 			error = EINTR;
1035 		return (error);
1036 	}
1037 	td->td_dupfd = 0;
1038 	vfslocked = NDHASGIANT(&nd);
1039 	NDFREE(&nd, NDF_ONLY_PNBUF);
1040 	vp = nd.ni_vp;
1041 
1042 	/*
1043 	 * There should be 2 references on the file, one from the descriptor
1044 	 * table, and one for us.
1045 	 *
1046 	 * Handle the case where someone closed the file (via its file
1047 	 * descriptor) while we were blocked.  The end result should look
1048 	 * like opening the file succeeded but it was immediately closed.
1049 	 * We call vn_close() manually because we haven't yet hooked up
1050 	 * the various 'struct file' fields.
1051 	 */
1052 	FILEDESC_LOCK(fdp);
1053 	FILE_LOCK(fp);
1054 	if (fp->f_count == 1) {
1055 		mp = vp->v_mount;
1056 		KASSERT(fdp->fd_ofiles[indx] != fp,
1057 		    ("Open file descriptor lost all refs"));
1058 		FILE_UNLOCK(fp);
1059 		FILEDESC_UNLOCK(fdp);
1060 		VOP_UNLOCK(vp, 0, td);
1061 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1062 		VFS_UNLOCK_GIANT(vfslocked);
1063 		fdrop(fp, td);
1064 		td->td_retval[0] = indx;
1065 		return (0);
1066 	}
1067 	fp->f_vnode = vp;
1068 	if (fp->f_data == NULL)
1069 		fp->f_data = vp;
1070 	fp->f_flag = flags & FMASK;
1071 	if (fp->f_ops == &badfileops)
1072 		fp->f_ops = &vnops;
1073 	fp->f_seqcount = 1;
1074 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1075 	FILE_UNLOCK(fp);
1076 	FILEDESC_UNLOCK(fdp);
1077 
1078 	VOP_UNLOCK(vp, 0, td);
1079 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1080 		lf.l_whence = SEEK_SET;
1081 		lf.l_start = 0;
1082 		lf.l_len = 0;
1083 		if (flags & O_EXLOCK)
1084 			lf.l_type = F_WRLCK;
1085 		else
1086 			lf.l_type = F_RDLCK;
1087 		type = F_FLOCK;
1088 		if ((flags & FNONBLOCK) == 0)
1089 			type |= F_WAIT;
1090 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1091 			    type)) != 0)
1092 			goto bad;
1093 		fp->f_flag |= FHASLOCK;
1094 	}
1095 	if (flags & O_TRUNC) {
1096 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1097 			goto bad;
1098 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1099 		VATTR_NULL(&vat);
1100 		vat.va_size = 0;
1101 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1102 #ifdef MAC
1103 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1104 		if (error == 0)
1105 #endif
1106 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1107 		VOP_UNLOCK(vp, 0, td);
1108 		vn_finished_write(mp);
1109 		if (error)
1110 			goto bad;
1111 	}
1112 	VFS_UNLOCK_GIANT(vfslocked);
1113 	/*
1114 	 * Release our private reference, leaving the one associated with
1115 	 * the descriptor table intact.
1116 	 */
1117 	fdrop(fp, td);
1118 	td->td_retval[0] = indx;
1119 	return (0);
1120 bad:
1121 	VFS_UNLOCK_GIANT(vfslocked);
1122 	fdclose(fdp, fp, indx, td);
1123 	fdrop(fp, td);
1124 	return (error);
1125 }
1126 
1127 #ifdef COMPAT_43
1128 /*
1129  * Create a file.
1130  *
1131  * MP SAFE
1132  */
1133 #ifndef _SYS_SYSPROTO_H_
1134 struct ocreat_args {
1135 	char	*path;
1136 	int	mode;
1137 };
1138 #endif
1139 int
1140 ocreat(td, uap)
1141 	struct thread *td;
1142 	register struct ocreat_args /* {
1143 		char *path;
1144 		int mode;
1145 	} */ *uap;
1146 {
1147 
1148 	return (kern_open(td, uap->path, UIO_USERSPACE,
1149 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1150 }
1151 #endif /* COMPAT_43 */
1152 
1153 /*
1154  * Create a special file.
1155  */
1156 #ifndef _SYS_SYSPROTO_H_
1157 struct mknod_args {
1158 	char	*path;
1159 	int	mode;
1160 	int	dev;
1161 };
1162 #endif
1163 int
1164 mknod(td, uap)
1165 	struct thread *td;
1166 	register struct mknod_args /* {
1167 		char *path;
1168 		int mode;
1169 		int dev;
1170 	} */ *uap;
1171 {
1172 
1173 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1174 }
1175 
1176 int
1177 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1178     int dev)
1179 {
1180 	struct vnode *vp;
1181 	struct mount *mp;
1182 	struct vattr vattr;
1183 	int error;
1184 	int whiteout = 0;
1185 	struct nameidata nd;
1186 	int vfslocked;
1187 
1188 	AUDIT_ARG(mode, mode);
1189 	AUDIT_ARG(dev, dev);
1190 	switch (mode & S_IFMT) {
1191 	case S_IFCHR:
1192 	case S_IFBLK:
1193 		error = suser(td);
1194 		break;
1195 	default:
1196 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1197 		break;
1198 	}
1199 	if (error)
1200 		return (error);
1201 restart:
1202 	bwillwrite();
1203 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1204 	    pathseg, path, td);
1205 	if ((error = namei(&nd)) != 0)
1206 		return (error);
1207 	vfslocked = NDHASGIANT(&nd);
1208 	vp = nd.ni_vp;
1209 	if (vp != NULL) {
1210 		NDFREE(&nd, NDF_ONLY_PNBUF);
1211 		if (vp == nd.ni_dvp)
1212 			vrele(nd.ni_dvp);
1213 		else
1214 			vput(nd.ni_dvp);
1215 		vrele(vp);
1216 		VFS_UNLOCK_GIANT(vfslocked);
1217 		return (EEXIST);
1218 	} else {
1219 		VATTR_NULL(&vattr);
1220 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1221 		vattr.va_mode = (mode & ALLPERMS) &
1222 		    ~td->td_proc->p_fd->fd_cmask;
1223 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1224 		vattr.va_rdev = dev;
1225 		whiteout = 0;
1226 
1227 		switch (mode & S_IFMT) {
1228 		case S_IFMT:	/* used by badsect to flag bad sectors */
1229 			vattr.va_type = VBAD;
1230 			break;
1231 		case S_IFCHR:
1232 			vattr.va_type = VCHR;
1233 			break;
1234 		case S_IFBLK:
1235 			vattr.va_type = VBLK;
1236 			break;
1237 		case S_IFWHT:
1238 			whiteout = 1;
1239 			break;
1240 		default:
1241 			error = EINVAL;
1242 			break;
1243 		}
1244 	}
1245 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1246 		NDFREE(&nd, NDF_ONLY_PNBUF);
1247 		vput(nd.ni_dvp);
1248 		VFS_UNLOCK_GIANT(vfslocked);
1249 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1250 			return (error);
1251 		goto restart;
1252 	}
1253 #ifdef MAC
1254 	if (error == 0 && !whiteout)
1255 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1256 		    &nd.ni_cnd, &vattr);
1257 #endif
1258 	if (!error) {
1259 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1260 		if (whiteout)
1261 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1262 		else {
1263 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1264 						&nd.ni_cnd, &vattr);
1265 			if (error == 0)
1266 				vput(nd.ni_vp);
1267 		}
1268 	}
1269 	NDFREE(&nd, NDF_ONLY_PNBUF);
1270 	vput(nd.ni_dvp);
1271 	vn_finished_write(mp);
1272 	VFS_UNLOCK_GIANT(vfslocked);
1273 	return (error);
1274 }
1275 
1276 /*
1277  * Create a named pipe.
1278  */
1279 #ifndef _SYS_SYSPROTO_H_
1280 struct mkfifo_args {
1281 	char	*path;
1282 	int	mode;
1283 };
1284 #endif
1285 int
1286 mkfifo(td, uap)
1287 	struct thread *td;
1288 	register struct mkfifo_args /* {
1289 		char *path;
1290 		int mode;
1291 	} */ *uap;
1292 {
1293 
1294 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1295 }
1296 
1297 int
1298 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1299 {
1300 	struct mount *mp;
1301 	struct vattr vattr;
1302 	int error;
1303 	struct nameidata nd;
1304 	int vfslocked;
1305 
1306 restart:
1307 	bwillwrite();
1308 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1309 	    pathseg, path, td);
1310 	if ((error = namei(&nd)) != 0)
1311 		return (error);
1312 	vfslocked = NDHASGIANT(&nd);
1313 	if (nd.ni_vp != NULL) {
1314 		NDFREE(&nd, NDF_ONLY_PNBUF);
1315 		if (nd.ni_vp == nd.ni_dvp)
1316 			vrele(nd.ni_dvp);
1317 		else
1318 			vput(nd.ni_dvp);
1319 		vrele(nd.ni_vp);
1320 		VFS_UNLOCK_GIANT(vfslocked);
1321 		return (EEXIST);
1322 	}
1323 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1324 		NDFREE(&nd, NDF_ONLY_PNBUF);
1325 		vput(nd.ni_dvp);
1326 		VFS_UNLOCK_GIANT(vfslocked);
1327 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1328 			return (error);
1329 		goto restart;
1330 	}
1331 	VATTR_NULL(&vattr);
1332 	vattr.va_type = VFIFO;
1333 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1334 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1335 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1336 #ifdef MAC
1337 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1338 	    &vattr);
1339 	if (error)
1340 		goto out;
1341 #endif
1342 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1343 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1344 	if (error == 0)
1345 		vput(nd.ni_vp);
1346 #ifdef MAC
1347 out:
1348 #endif
1349 	vput(nd.ni_dvp);
1350 	vn_finished_write(mp);
1351 	VFS_UNLOCK_GIANT(vfslocked);
1352 	NDFREE(&nd, NDF_ONLY_PNBUF);
1353 	return (error);
1354 }
1355 
1356 /*
1357  * Make a hard file link.
1358  */
1359 #ifndef _SYS_SYSPROTO_H_
1360 struct link_args {
1361 	char	*path;
1362 	char	*link;
1363 };
1364 #endif
1365 int
1366 link(td, uap)
1367 	struct thread *td;
1368 	register struct link_args /* {
1369 		char *path;
1370 		char *link;
1371 	} */ *uap;
1372 {
1373 	int error;
1374 
1375 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1376 	return (error);
1377 }
1378 
1379 SYSCTL_DECL(_security_bsd);
1380 
1381 static int hardlink_check_uid = 0;
1382 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1383     &hardlink_check_uid, 0,
1384     "Unprivileged processes cannot create hard links to files owned by other "
1385     "users");
1386 static int hardlink_check_gid = 0;
1387 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1388     &hardlink_check_gid, 0,
1389     "Unprivileged processes cannot create hard links to files owned by other "
1390     "groups");
1391 
1392 static int
1393 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1394 {
1395 	struct vattr va;
1396 	int error;
1397 
1398 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1399 		return (0);
1400 
1401 	if (!hardlink_check_uid && !hardlink_check_gid)
1402 		return (0);
1403 
1404 	error = VOP_GETATTR(vp, &va, cred, td);
1405 	if (error != 0)
1406 		return (error);
1407 
1408 	if (hardlink_check_uid) {
1409 		if (cred->cr_uid != va.va_uid)
1410 			return (EPERM);
1411 	}
1412 
1413 	if (hardlink_check_gid) {
1414 		if (!groupmember(va.va_gid, cred))
1415 			return (EPERM);
1416 	}
1417 
1418 	return (0);
1419 }
1420 
1421 int
1422 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1423 {
1424 	struct vnode *vp;
1425 	struct mount *mp;
1426 	struct nameidata nd;
1427 	int vfslocked;
1428 	int lvfslocked;
1429 	int error;
1430 
1431 	bwillwrite();
1432 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1433 	if ((error = namei(&nd)) != 0)
1434 		return (error);
1435 	vfslocked = NDHASGIANT(&nd);
1436 	NDFREE(&nd, NDF_ONLY_PNBUF);
1437 	vp = nd.ni_vp;
1438 	if (vp->v_type == VDIR) {
1439 		vrele(vp);
1440 		VFS_UNLOCK_GIANT(vfslocked);
1441 		return (EPERM);		/* POSIX */
1442 	}
1443 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1444 		vrele(vp);
1445 		VFS_UNLOCK_GIANT(vfslocked);
1446 		return (error);
1447 	}
1448 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1449 	    segflg, link, td);
1450 	if ((error = namei(&nd)) == 0) {
1451 		lvfslocked = NDHASGIANT(&nd);
1452 		if (nd.ni_vp != NULL) {
1453 			if (nd.ni_dvp == nd.ni_vp)
1454 				vrele(nd.ni_dvp);
1455 			else
1456 				vput(nd.ni_dvp);
1457 			vrele(nd.ni_vp);
1458 			error = EEXIST;
1459 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1460 		    == 0) {
1461 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1462 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1463 			error = can_hardlink(vp, td, td->td_ucred);
1464 			if (error == 0)
1465 #ifdef MAC
1466 				error = mac_check_vnode_link(td->td_ucred,
1467 				    nd.ni_dvp, vp, &nd.ni_cnd);
1468 			if (error == 0)
1469 #endif
1470 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1471 			VOP_UNLOCK(vp, 0, td);
1472 			vput(nd.ni_dvp);
1473 		}
1474 		NDFREE(&nd, NDF_ONLY_PNBUF);
1475 		VFS_UNLOCK_GIANT(lvfslocked);
1476 	}
1477 	vrele(vp);
1478 	vn_finished_write(mp);
1479 	VFS_UNLOCK_GIANT(vfslocked);
1480 	return (error);
1481 }
1482 
1483 /*
1484  * Make a symbolic link.
1485  */
1486 #ifndef _SYS_SYSPROTO_H_
1487 struct symlink_args {
1488 	char	*path;
1489 	char	*link;
1490 };
1491 #endif
1492 int
1493 symlink(td, uap)
1494 	struct thread *td;
1495 	register struct symlink_args /* {
1496 		char *path;
1497 		char *link;
1498 	} */ *uap;
1499 {
1500 
1501 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1502 }
1503 
1504 int
1505 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1506 {
1507 	struct mount *mp;
1508 	struct vattr vattr;
1509 	char *syspath;
1510 	int error;
1511 	struct nameidata nd;
1512 	int vfslocked;
1513 
1514 	if (segflg == UIO_SYSSPACE) {
1515 		syspath = path;
1516 	} else {
1517 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1518 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1519 			goto out;
1520 	}
1521 restart:
1522 	bwillwrite();
1523 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1524 	    segflg, link, td);
1525 	if ((error = namei(&nd)) != 0)
1526 		goto out;
1527 	vfslocked = NDHASGIANT(&nd);
1528 	if (nd.ni_vp) {
1529 		NDFREE(&nd, NDF_ONLY_PNBUF);
1530 		if (nd.ni_vp == nd.ni_dvp)
1531 			vrele(nd.ni_dvp);
1532 		else
1533 			vput(nd.ni_dvp);
1534 		vrele(nd.ni_vp);
1535 		VFS_UNLOCK_GIANT(vfslocked);
1536 		error = EEXIST;
1537 		goto out;
1538 	}
1539 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1540 		NDFREE(&nd, NDF_ONLY_PNBUF);
1541 		vput(nd.ni_dvp);
1542 		VFS_UNLOCK_GIANT(vfslocked);
1543 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1544 			goto out;
1545 		goto restart;
1546 	}
1547 	VATTR_NULL(&vattr);
1548 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1549 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1550 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1551 #ifdef MAC
1552 	vattr.va_type = VLNK;
1553 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1554 	    &vattr);
1555 	if (error)
1556 		goto out2;
1557 #endif
1558 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1559 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1560 	if (error == 0)
1561 		vput(nd.ni_vp);
1562 #ifdef MAC
1563 out2:
1564 #endif
1565 	NDFREE(&nd, NDF_ONLY_PNBUF);
1566 	vput(nd.ni_dvp);
1567 	vn_finished_write(mp);
1568 	VFS_UNLOCK_GIANT(vfslocked);
1569 out:
1570 	if (segflg != UIO_SYSSPACE)
1571 		uma_zfree(namei_zone, syspath);
1572 	return (error);
1573 }
1574 
1575 /*
1576  * Delete a whiteout from the filesystem.
1577  */
1578 int
1579 undelete(td, uap)
1580 	struct thread *td;
1581 	register struct undelete_args /* {
1582 		char *path;
1583 	} */ *uap;
1584 {
1585 	int error;
1586 	struct mount *mp;
1587 	struct nameidata nd;
1588 	int vfslocked;
1589 
1590 restart:
1591 	bwillwrite();
1592 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1593 	    UIO_USERSPACE, uap->path, td);
1594 	error = namei(&nd);
1595 	if (error)
1596 		return (error);
1597 	vfslocked = NDHASGIANT(&nd);
1598 
1599 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1600 		NDFREE(&nd, NDF_ONLY_PNBUF);
1601 		if (nd.ni_vp == nd.ni_dvp)
1602 			vrele(nd.ni_dvp);
1603 		else
1604 			vput(nd.ni_dvp);
1605 		if (nd.ni_vp)
1606 			vrele(nd.ni_vp);
1607 		VFS_UNLOCK_GIANT(vfslocked);
1608 		return (EEXIST);
1609 	}
1610 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1611 		NDFREE(&nd, NDF_ONLY_PNBUF);
1612 		vput(nd.ni_dvp);
1613 		VFS_UNLOCK_GIANT(vfslocked);
1614 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1615 			return (error);
1616 		goto restart;
1617 	}
1618 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1619 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1620 	NDFREE(&nd, NDF_ONLY_PNBUF);
1621 	vput(nd.ni_dvp);
1622 	vn_finished_write(mp);
1623 	VFS_UNLOCK_GIANT(vfslocked);
1624 	return (error);
1625 }
1626 
1627 /*
1628  * Delete a name from the filesystem.
1629  */
1630 #ifndef _SYS_SYSPROTO_H_
1631 struct unlink_args {
1632 	char	*path;
1633 };
1634 #endif
1635 int
1636 unlink(td, uap)
1637 	struct thread *td;
1638 	struct unlink_args /* {
1639 		char *path;
1640 	} */ *uap;
1641 {
1642 	int error;
1643 
1644 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1645 	return (error);
1646 }
1647 
1648 int
1649 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1650 {
1651 	struct mount *mp;
1652 	struct vnode *vp;
1653 	int error;
1654 	struct nameidata nd;
1655 	int vfslocked;
1656 
1657 restart:
1658 	bwillwrite();
1659 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1660 	    pathseg, path, td);
1661 	if ((error = namei(&nd)) != 0)
1662 		return (error == EINVAL ? EPERM : error);
1663 	vfslocked = NDHASGIANT(&nd);
1664 	vp = nd.ni_vp;
1665 	if (vp->v_type == VDIR)
1666 		error = EPERM;		/* POSIX */
1667 	else {
1668 		/*
1669 		 * The root of a mounted filesystem cannot be deleted.
1670 		 *
1671 		 * XXX: can this only be a VDIR case?
1672 		 */
1673 		if (vp->v_vflag & VV_ROOT)
1674 			error = EBUSY;
1675 	}
1676 	if (error == 0) {
1677 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1678 			NDFREE(&nd, NDF_ONLY_PNBUF);
1679 			vput(nd.ni_dvp);
1680 			if (vp == nd.ni_dvp)
1681 				vrele(vp);
1682 			else
1683 				vput(vp);
1684 			VFS_UNLOCK_GIANT(vfslocked);
1685 			if ((error = vn_start_write(NULL, &mp,
1686 			    V_XSLEEP | PCATCH)) != 0)
1687 				return (error);
1688 			goto restart;
1689 		}
1690 #ifdef MAC
1691 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1692 		    &nd.ni_cnd);
1693 		if (error)
1694 			goto out;
1695 #endif
1696 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1697 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1698 #ifdef MAC
1699 out:
1700 #endif
1701 		vn_finished_write(mp);
1702 	}
1703 	NDFREE(&nd, NDF_ONLY_PNBUF);
1704 	vput(nd.ni_dvp);
1705 	if (vp == nd.ni_dvp)
1706 		vrele(vp);
1707 	else
1708 		vput(vp);
1709 	VFS_UNLOCK_GIANT(vfslocked);
1710 	return (error);
1711 }
1712 
1713 /*
1714  * Reposition read/write file offset.
1715  */
1716 #ifndef _SYS_SYSPROTO_H_
1717 struct lseek_args {
1718 	int	fd;
1719 	int	pad;
1720 	off_t	offset;
1721 	int	whence;
1722 };
1723 #endif
1724 int
1725 lseek(td, uap)
1726 	struct thread *td;
1727 	register struct lseek_args /* {
1728 		int fd;
1729 		int pad;
1730 		off_t offset;
1731 		int whence;
1732 	} */ *uap;
1733 {
1734 	struct ucred *cred = td->td_ucred;
1735 	struct file *fp;
1736 	struct vnode *vp;
1737 	struct vattr vattr;
1738 	off_t offset;
1739 	int error, noneg;
1740 	int vfslocked;
1741 
1742 	if ((error = fget(td, uap->fd, &fp)) != 0)
1743 		return (error);
1744 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1745 		fdrop(fp, td);
1746 		return (ESPIPE);
1747 	}
1748 	vp = fp->f_vnode;
1749 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1750 	noneg = (vp->v_type != VCHR);
1751 	offset = uap->offset;
1752 	switch (uap->whence) {
1753 	case L_INCR:
1754 		if (noneg &&
1755 		    (fp->f_offset < 0 ||
1756 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1757 			error = EOVERFLOW;
1758 			break;
1759 		}
1760 		offset += fp->f_offset;
1761 		break;
1762 	case L_XTND:
1763 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1764 		error = VOP_GETATTR(vp, &vattr, cred, td);
1765 		VOP_UNLOCK(vp, 0, td);
1766 		if (error)
1767 			break;
1768 		if (noneg &&
1769 		    (vattr.va_size > OFF_MAX ||
1770 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1771 			error = EOVERFLOW;
1772 			break;
1773 		}
1774 		offset += vattr.va_size;
1775 		break;
1776 	case L_SET:
1777 		break;
1778 	default:
1779 		error = EINVAL;
1780 	}
1781 	if (error == 0 && noneg && offset < 0)
1782 		error = EINVAL;
1783 	if (error != 0)
1784 		goto drop;
1785 	fp->f_offset = offset;
1786 	*(off_t *)(td->td_retval) = fp->f_offset;
1787 drop:
1788 	fdrop(fp, td);
1789 	VFS_UNLOCK_GIANT(vfslocked);
1790 	return (error);
1791 }
1792 
1793 #if defined(COMPAT_43)
1794 /*
1795  * Reposition read/write file offset.
1796  */
1797 #ifndef _SYS_SYSPROTO_H_
1798 struct olseek_args {
1799 	int	fd;
1800 	long	offset;
1801 	int	whence;
1802 };
1803 #endif
1804 int
1805 olseek(td, uap)
1806 	struct thread *td;
1807 	register struct olseek_args /* {
1808 		int fd;
1809 		long offset;
1810 		int whence;
1811 	} */ *uap;
1812 {
1813 	struct lseek_args /* {
1814 		int fd;
1815 		int pad;
1816 		off_t offset;
1817 		int whence;
1818 	} */ nuap;
1819 	int error;
1820 
1821 	nuap.fd = uap->fd;
1822 	nuap.offset = uap->offset;
1823 	nuap.whence = uap->whence;
1824 	error = lseek(td, &nuap);
1825 	return (error);
1826 }
1827 #endif /* COMPAT_43 */
1828 
1829 /*
1830  * Check access permissions using passed credentials.
1831  */
1832 static int
1833 vn_access(vp, user_flags, cred, td)
1834 	struct vnode	*vp;
1835 	int		user_flags;
1836 	struct ucred	*cred;
1837 	struct thread	*td;
1838 {
1839 	int error, flags;
1840 
1841 	/* Flags == 0 means only check for existence. */
1842 	error = 0;
1843 	if (user_flags) {
1844 		flags = 0;
1845 		if (user_flags & R_OK)
1846 			flags |= VREAD;
1847 		if (user_flags & W_OK)
1848 			flags |= VWRITE;
1849 		if (user_flags & X_OK)
1850 			flags |= VEXEC;
1851 #ifdef MAC
1852 		error = mac_check_vnode_access(cred, vp, flags);
1853 		if (error)
1854 			return (error);
1855 #endif
1856 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1857 			error = VOP_ACCESS(vp, flags, cred, td);
1858 	}
1859 	return (error);
1860 }
1861 
1862 /*
1863  * Check access permissions using "real" credentials.
1864  */
1865 #ifndef _SYS_SYSPROTO_H_
1866 struct access_args {
1867 	char	*path;
1868 	int	flags;
1869 };
1870 #endif
1871 int
1872 access(td, uap)
1873 	struct thread *td;
1874 	register struct access_args /* {
1875 		char *path;
1876 		int flags;
1877 	} */ *uap;
1878 {
1879 
1880 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1881 }
1882 
1883 int
1884 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1885 {
1886 	struct ucred *cred, *tmpcred;
1887 	register struct vnode *vp;
1888 	struct nameidata nd;
1889 	int vfslocked;
1890 	int error;
1891 
1892 	/*
1893 	 * Create and modify a temporary credential instead of one that
1894 	 * is potentially shared.  This could also mess up socket
1895 	 * buffer accounting which can run in an interrupt context.
1896 	 */
1897 	cred = td->td_ucred;
1898 	tmpcred = crdup(cred);
1899 	tmpcred->cr_uid = cred->cr_ruid;
1900 	tmpcred->cr_groups[0] = cred->cr_rgid;
1901 	td->td_ucred = tmpcred;
1902 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1903 	    pathseg, path, td);
1904 	if ((error = namei(&nd)) != 0)
1905 		goto out1;
1906 	vfslocked = NDHASGIANT(&nd);
1907 	vp = nd.ni_vp;
1908 
1909 	error = vn_access(vp, flags, tmpcred, td);
1910 	NDFREE(&nd, NDF_ONLY_PNBUF);
1911 	vput(vp);
1912 	VFS_UNLOCK_GIANT(vfslocked);
1913 out1:
1914 	td->td_ucred = cred;
1915 	crfree(tmpcred);
1916 	return (error);
1917 }
1918 
1919 /*
1920  * Check access permissions using "effective" credentials.
1921  */
1922 #ifndef _SYS_SYSPROTO_H_
1923 struct eaccess_args {
1924 	char	*path;
1925 	int	flags;
1926 };
1927 #endif
1928 int
1929 eaccess(td, uap)
1930 	struct thread *td;
1931 	register struct eaccess_args /* {
1932 		char *path;
1933 		int flags;
1934 	} */ *uap;
1935 {
1936 
1937 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1938 }
1939 
1940 int
1941 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1942 {
1943 	struct nameidata nd;
1944 	struct vnode *vp;
1945 	int vfslocked;
1946 	int error;
1947 
1948 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1949 	    pathseg, path, td);
1950 	if ((error = namei(&nd)) != 0)
1951 		return (error);
1952 	vp = nd.ni_vp;
1953 	vfslocked = NDHASGIANT(&nd);
1954 	error = vn_access(vp, flags, td->td_ucred, td);
1955 	NDFREE(&nd, NDF_ONLY_PNBUF);
1956 	vput(vp);
1957 	VFS_UNLOCK_GIANT(vfslocked);
1958 	return (error);
1959 }
1960 
1961 #if defined(COMPAT_43)
1962 /*
1963  * Get file status; this version follows links.
1964  */
1965 #ifndef _SYS_SYSPROTO_H_
1966 struct ostat_args {
1967 	char	*path;
1968 	struct ostat *ub;
1969 };
1970 #endif
1971 int
1972 ostat(td, uap)
1973 	struct thread *td;
1974 	register struct ostat_args /* {
1975 		char *path;
1976 		struct ostat *ub;
1977 	} */ *uap;
1978 {
1979 	struct stat sb;
1980 	struct ostat osb;
1981 	int error;
1982 
1983 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1984 	if (error)
1985 		return (error);
1986 	cvtstat(&sb, &osb);
1987 	error = copyout(&osb, uap->ub, sizeof (osb));
1988 	return (error);
1989 }
1990 
1991 /*
1992  * Get file status; this version does not follow links.
1993  */
1994 #ifndef _SYS_SYSPROTO_H_
1995 struct olstat_args {
1996 	char	*path;
1997 	struct ostat *ub;
1998 };
1999 #endif
2000 int
2001 olstat(td, uap)
2002 	struct thread *td;
2003 	register struct olstat_args /* {
2004 		char *path;
2005 		struct ostat *ub;
2006 	} */ *uap;
2007 {
2008 	struct stat sb;
2009 	struct ostat osb;
2010 	int error;
2011 
2012 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2013 	if (error)
2014 		return (error);
2015 	cvtstat(&sb, &osb);
2016 	error = copyout(&osb, uap->ub, sizeof (osb));
2017 	return (error);
2018 }
2019 
2020 /*
2021  * Convert from an old to a new stat structure.
2022  */
2023 void
2024 cvtstat(st, ost)
2025 	struct stat *st;
2026 	struct ostat *ost;
2027 {
2028 
2029 	ost->st_dev = st->st_dev;
2030 	ost->st_ino = st->st_ino;
2031 	ost->st_mode = st->st_mode;
2032 	ost->st_nlink = st->st_nlink;
2033 	ost->st_uid = st->st_uid;
2034 	ost->st_gid = st->st_gid;
2035 	ost->st_rdev = st->st_rdev;
2036 	if (st->st_size < (quad_t)1 << 32)
2037 		ost->st_size = st->st_size;
2038 	else
2039 		ost->st_size = -2;
2040 	ost->st_atime = st->st_atime;
2041 	ost->st_mtime = st->st_mtime;
2042 	ost->st_ctime = st->st_ctime;
2043 	ost->st_blksize = st->st_blksize;
2044 	ost->st_blocks = st->st_blocks;
2045 	ost->st_flags = st->st_flags;
2046 	ost->st_gen = st->st_gen;
2047 }
2048 #endif /* COMPAT_43 */
2049 
2050 /*
2051  * Get file status; this version follows links.
2052  */
2053 #ifndef _SYS_SYSPROTO_H_
2054 struct stat_args {
2055 	char	*path;
2056 	struct stat *ub;
2057 };
2058 #endif
2059 int
2060 stat(td, uap)
2061 	struct thread *td;
2062 	register struct stat_args /* {
2063 		char *path;
2064 		struct stat *ub;
2065 	} */ *uap;
2066 {
2067 	struct stat sb;
2068 	int error;
2069 
2070 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2071 	if (error == 0)
2072 		error = copyout(&sb, uap->ub, sizeof (sb));
2073 	return (error);
2074 }
2075 
2076 int
2077 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2078 {
2079 	struct nameidata nd;
2080 	struct stat sb;
2081 	int error, vfslocked;
2082 
2083 	NDINIT(&nd, LOOKUP,
2084 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2085 	    pathseg, path, td);
2086 	if ((error = namei(&nd)) != 0)
2087 		return (error);
2088 	vfslocked = NDHASGIANT(&nd);
2089 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2090 	NDFREE(&nd, NDF_ONLY_PNBUF);
2091 	vput(nd.ni_vp);
2092 	VFS_UNLOCK_GIANT(vfslocked);
2093 	if (mtx_owned(&Giant))
2094 		printf("stat(%d): %s\n", vfslocked, path);
2095 	if (error)
2096 		return (error);
2097 	*sbp = sb;
2098 	return (0);
2099 }
2100 
2101 /*
2102  * Get file status; this version does not follow links.
2103  */
2104 #ifndef _SYS_SYSPROTO_H_
2105 struct lstat_args {
2106 	char	*path;
2107 	struct stat *ub;
2108 };
2109 #endif
2110 int
2111 lstat(td, uap)
2112 	struct thread *td;
2113 	register struct lstat_args /* {
2114 		char *path;
2115 		struct stat *ub;
2116 	} */ *uap;
2117 {
2118 	struct stat sb;
2119 	int error;
2120 
2121 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2122 	if (error == 0)
2123 		error = copyout(&sb, uap->ub, sizeof (sb));
2124 	return (error);
2125 }
2126 
2127 int
2128 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2129 {
2130 	struct vnode *vp;
2131 	struct stat sb;
2132 	struct nameidata nd;
2133 	int error, vfslocked;
2134 
2135 	NDINIT(&nd, LOOKUP,
2136 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2137 	    pathseg, path, td);
2138 	if ((error = namei(&nd)) != 0)
2139 		return (error);
2140 	vfslocked = NDHASGIANT(&nd);
2141 	vp = nd.ni_vp;
2142 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2143 	NDFREE(&nd, NDF_ONLY_PNBUF);
2144 	vput(vp);
2145 	VFS_UNLOCK_GIANT(vfslocked);
2146 	if (error)
2147 		return (error);
2148 	*sbp = sb;
2149 	return (0);
2150 }
2151 
2152 /*
2153  * Implementation of the NetBSD [l]stat() functions.
2154  */
2155 void
2156 cvtnstat(sb, nsb)
2157 	struct stat *sb;
2158 	struct nstat *nsb;
2159 {
2160 	bzero(nsb, sizeof *nsb);
2161 	nsb->st_dev = sb->st_dev;
2162 	nsb->st_ino = sb->st_ino;
2163 	nsb->st_mode = sb->st_mode;
2164 	nsb->st_nlink = sb->st_nlink;
2165 	nsb->st_uid = sb->st_uid;
2166 	nsb->st_gid = sb->st_gid;
2167 	nsb->st_rdev = sb->st_rdev;
2168 	nsb->st_atimespec = sb->st_atimespec;
2169 	nsb->st_mtimespec = sb->st_mtimespec;
2170 	nsb->st_ctimespec = sb->st_ctimespec;
2171 	nsb->st_size = sb->st_size;
2172 	nsb->st_blocks = sb->st_blocks;
2173 	nsb->st_blksize = sb->st_blksize;
2174 	nsb->st_flags = sb->st_flags;
2175 	nsb->st_gen = sb->st_gen;
2176 	nsb->st_birthtimespec = sb->st_birthtimespec;
2177 }
2178 
2179 #ifndef _SYS_SYSPROTO_H_
2180 struct nstat_args {
2181 	char	*path;
2182 	struct nstat *ub;
2183 };
2184 #endif
2185 int
2186 nstat(td, uap)
2187 	struct thread *td;
2188 	register struct nstat_args /* {
2189 		char *path;
2190 		struct nstat *ub;
2191 	} */ *uap;
2192 {
2193 	struct stat sb;
2194 	struct nstat nsb;
2195 	int error;
2196 
2197 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2198 	if (error)
2199 		return (error);
2200 	cvtnstat(&sb, &nsb);
2201 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2202 	return (error);
2203 }
2204 
2205 /*
2206  * NetBSD lstat.  Get file status; this version does not follow links.
2207  */
2208 #ifndef _SYS_SYSPROTO_H_
2209 struct lstat_args {
2210 	char	*path;
2211 	struct stat *ub;
2212 };
2213 #endif
2214 int
2215 nlstat(td, uap)
2216 	struct thread *td;
2217 	register struct nlstat_args /* {
2218 		char *path;
2219 		struct nstat *ub;
2220 	} */ *uap;
2221 {
2222 	struct stat sb;
2223 	struct nstat nsb;
2224 	int error;
2225 
2226 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2227 	if (error)
2228 		return (error);
2229 	cvtnstat(&sb, &nsb);
2230 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2231 	return (error);
2232 }
2233 
2234 /*
2235  * Get configurable pathname variables.
2236  */
2237 #ifndef _SYS_SYSPROTO_H_
2238 struct pathconf_args {
2239 	char	*path;
2240 	int	name;
2241 };
2242 #endif
2243 int
2244 pathconf(td, uap)
2245 	struct thread *td;
2246 	register struct pathconf_args /* {
2247 		char *path;
2248 		int name;
2249 	} */ *uap;
2250 {
2251 
2252 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2253 }
2254 
2255 int
2256 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2257 {
2258 	struct nameidata nd;
2259 	int error, vfslocked;
2260 
2261 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2262 	    pathseg, path, td);
2263 	if ((error = namei(&nd)) != 0)
2264 		return (error);
2265 	vfslocked = NDHASGIANT(&nd);
2266 	NDFREE(&nd, NDF_ONLY_PNBUF);
2267 
2268 	/* If asynchronous I/O is available, it works for all files. */
2269 	if (name == _PC_ASYNC_IO)
2270 		td->td_retval[0] = async_io_version;
2271 	else
2272 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2273 	vput(nd.ni_vp);
2274 	VFS_UNLOCK_GIANT(vfslocked);
2275 	return (error);
2276 }
2277 
2278 /*
2279  * Return target name of a symbolic link.
2280  */
2281 #ifndef _SYS_SYSPROTO_H_
2282 struct readlink_args {
2283 	char	*path;
2284 	char	*buf;
2285 	int	count;
2286 };
2287 #endif
2288 int
2289 readlink(td, uap)
2290 	struct thread *td;
2291 	register struct readlink_args /* {
2292 		char *path;
2293 		char *buf;
2294 		int count;
2295 	} */ *uap;
2296 {
2297 
2298 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2299 	    UIO_USERSPACE, uap->count));
2300 }
2301 
2302 int
2303 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2304     enum uio_seg bufseg, int count)
2305 {
2306 	register struct vnode *vp;
2307 	struct iovec aiov;
2308 	struct uio auio;
2309 	int error;
2310 	struct nameidata nd;
2311 	int vfslocked;
2312 
2313 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2314 	    pathseg, path, td);
2315 	if ((error = namei(&nd)) != 0)
2316 		return (error);
2317 	NDFREE(&nd, NDF_ONLY_PNBUF);
2318 	vfslocked = NDHASGIANT(&nd);
2319 	vp = nd.ni_vp;
2320 #ifdef MAC
2321 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2322 	if (error) {
2323 		vput(vp);
2324 		VFS_UNLOCK_GIANT(vfslocked);
2325 		return (error);
2326 	}
2327 #endif
2328 	if (vp->v_type != VLNK)
2329 		error = EINVAL;
2330 	else {
2331 		aiov.iov_base = buf;
2332 		aiov.iov_len = count;
2333 		auio.uio_iov = &aiov;
2334 		auio.uio_iovcnt = 1;
2335 		auio.uio_offset = 0;
2336 		auio.uio_rw = UIO_READ;
2337 		auio.uio_segflg = bufseg;
2338 		auio.uio_td = td;
2339 		auio.uio_resid = count;
2340 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2341 	}
2342 	vput(vp);
2343 	VFS_UNLOCK_GIANT(vfslocked);
2344 	td->td_retval[0] = count - auio.uio_resid;
2345 	return (error);
2346 }
2347 
2348 /*
2349  * Common implementation code for chflags() and fchflags().
2350  */
2351 static int
2352 setfflags(td, vp, flags)
2353 	struct thread *td;
2354 	struct vnode *vp;
2355 	int flags;
2356 {
2357 	int error;
2358 	struct mount *mp;
2359 	struct vattr vattr;
2360 
2361 	/*
2362 	 * Prevent non-root users from setting flags on devices.  When
2363 	 * a device is reused, users can retain ownership of the device
2364 	 * if they are allowed to set flags and programs assume that
2365 	 * chown can't fail when done as root.
2366 	 */
2367 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2368 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2369 		if (error)
2370 			return (error);
2371 	}
2372 
2373 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2374 		return (error);
2375 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2376 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2377 	VATTR_NULL(&vattr);
2378 	vattr.va_flags = flags;
2379 #ifdef MAC
2380 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2381 	if (error == 0)
2382 #endif
2383 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2384 	VOP_UNLOCK(vp, 0, td);
2385 	vn_finished_write(mp);
2386 	return (error);
2387 }
2388 
2389 /*
2390  * Change flags of a file given a path name.
2391  */
2392 #ifndef _SYS_SYSPROTO_H_
2393 struct chflags_args {
2394 	char	*path;
2395 	int	flags;
2396 };
2397 #endif
2398 int
2399 chflags(td, uap)
2400 	struct thread *td;
2401 	register struct chflags_args /* {
2402 		char *path;
2403 		int flags;
2404 	} */ *uap;
2405 {
2406 	int error;
2407 	struct nameidata nd;
2408 	int vfslocked;
2409 
2410 	AUDIT_ARG(fflags, uap->flags);
2411 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2412 	    uap->path, td);
2413 	if ((error = namei(&nd)) != 0)
2414 		return (error);
2415 	NDFREE(&nd, NDF_ONLY_PNBUF);
2416 	vfslocked = NDHASGIANT(&nd);
2417 	error = setfflags(td, nd.ni_vp, uap->flags);
2418 	vrele(nd.ni_vp);
2419 	VFS_UNLOCK_GIANT(vfslocked);
2420 	return (error);
2421 }
2422 
2423 /*
2424  * Same as chflags() but doesn't follow symlinks.
2425  */
2426 int
2427 lchflags(td, uap)
2428 	struct thread *td;
2429 	register struct lchflags_args /* {
2430 		char *path;
2431 		int flags;
2432 	} */ *uap;
2433 {
2434 	int error;
2435 	struct nameidata nd;
2436 	int vfslocked;
2437 
2438 	AUDIT_ARG(fflags, uap->flags);
2439 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2440 	    uap->path, td);
2441 	if ((error = namei(&nd)) != 0)
2442 		return (error);
2443 	vfslocked = NDHASGIANT(&nd);
2444 	NDFREE(&nd, NDF_ONLY_PNBUF);
2445 	error = setfflags(td, nd.ni_vp, uap->flags);
2446 	vrele(nd.ni_vp);
2447 	VFS_UNLOCK_GIANT(vfslocked);
2448 	return (error);
2449 }
2450 
2451 /*
2452  * Change flags of a file given a file descriptor.
2453  */
2454 #ifndef _SYS_SYSPROTO_H_
2455 struct fchflags_args {
2456 	int	fd;
2457 	int	flags;
2458 };
2459 #endif
2460 int
2461 fchflags(td, uap)
2462 	struct thread *td;
2463 	register struct fchflags_args /* {
2464 		int fd;
2465 		int flags;
2466 	} */ *uap;
2467 {
2468 	struct file *fp;
2469 	int vfslocked;
2470 	int error;
2471 
2472 	AUDIT_ARG(fd, uap->fd);
2473 	AUDIT_ARG(fflags, uap->flags);
2474 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2475 		return (error);
2476 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2477 #ifdef AUDIT
2478 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2479 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2480 	VOP_UNLOCK(fp->f_vnode, 0, td);
2481 #endif
2482 	error = setfflags(td, fp->f_vnode, uap->flags);
2483 	VFS_UNLOCK_GIANT(vfslocked);
2484 	fdrop(fp, td);
2485 	return (error);
2486 }
2487 
2488 /*
2489  * Common implementation code for chmod(), lchmod() and fchmod().
2490  */
2491 static int
2492 setfmode(td, vp, mode)
2493 	struct thread *td;
2494 	struct vnode *vp;
2495 	int mode;
2496 {
2497 	int error;
2498 	struct mount *mp;
2499 	struct vattr vattr;
2500 
2501 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2502 		return (error);
2503 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2504 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2505 	VATTR_NULL(&vattr);
2506 	vattr.va_mode = mode & ALLPERMS;
2507 #ifdef MAC
2508 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2509 	if (error == 0)
2510 #endif
2511 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2512 	VOP_UNLOCK(vp, 0, td);
2513 	vn_finished_write(mp);
2514 	return (error);
2515 }
2516 
2517 /*
2518  * Change mode of a file given path name.
2519  */
2520 #ifndef _SYS_SYSPROTO_H_
2521 struct chmod_args {
2522 	char	*path;
2523 	int	mode;
2524 };
2525 #endif
2526 int
2527 chmod(td, uap)
2528 	struct thread *td;
2529 	register struct chmod_args /* {
2530 		char *path;
2531 		int mode;
2532 	} */ *uap;
2533 {
2534 
2535 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2536 }
2537 
2538 int
2539 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2540 {
2541 	int error;
2542 	struct nameidata nd;
2543 	int vfslocked;
2544 
2545 	AUDIT_ARG(mode, mode);
2546 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2547 	if ((error = namei(&nd)) != 0)
2548 		return (error);
2549 	vfslocked = NDHASGIANT(&nd);
2550 	NDFREE(&nd, NDF_ONLY_PNBUF);
2551 	error = setfmode(td, nd.ni_vp, mode);
2552 	vrele(nd.ni_vp);
2553 	VFS_UNLOCK_GIANT(vfslocked);
2554 	return (error);
2555 }
2556 
2557 /*
2558  * Change mode of a file given path name (don't follow links.)
2559  */
2560 #ifndef _SYS_SYSPROTO_H_
2561 struct lchmod_args {
2562 	char	*path;
2563 	int	mode;
2564 };
2565 #endif
2566 int
2567 lchmod(td, uap)
2568 	struct thread *td;
2569 	register struct lchmod_args /* {
2570 		char *path;
2571 		int mode;
2572 	} */ *uap;
2573 {
2574 	int error;
2575 	struct nameidata nd;
2576 	int vfslocked;
2577 
2578 	AUDIT_ARG(mode, (mode_t)uap->mode);
2579 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2580 	    uap->path, td);
2581 	if ((error = namei(&nd)) != 0)
2582 		return (error);
2583 	vfslocked = NDHASGIANT(&nd);
2584 	NDFREE(&nd, NDF_ONLY_PNBUF);
2585 	error = setfmode(td, nd.ni_vp, uap->mode);
2586 	vrele(nd.ni_vp);
2587 	VFS_UNLOCK_GIANT(vfslocked);
2588 	return (error);
2589 }
2590 
2591 /*
2592  * Change mode of a file given a file descriptor.
2593  */
2594 #ifndef _SYS_SYSPROTO_H_
2595 struct fchmod_args {
2596 	int	fd;
2597 	int	mode;
2598 };
2599 #endif
2600 int
2601 fchmod(td, uap)
2602 	struct thread *td;
2603 	register struct fchmod_args /* {
2604 		int fd;
2605 		int mode;
2606 	} */ *uap;
2607 {
2608 	struct file *fp;
2609 	int vfslocked;
2610 	int error;
2611 
2612 	AUDIT_ARG(fd, uap->fd);
2613 	AUDIT_ARG(mode, uap->mode);
2614 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2615 		return (error);
2616 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2617 #ifdef AUDIT
2618 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2619 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2620 	VOP_UNLOCK(fp->f_vnode, 0, td);
2621 #endif
2622 	error = setfmode(td, fp->f_vnode, uap->mode);
2623 	VFS_UNLOCK_GIANT(vfslocked);
2624 	fdrop(fp, td);
2625 	return (error);
2626 }
2627 
2628 /*
2629  * Common implementation for chown(), lchown(), and fchown()
2630  */
2631 static int
2632 setfown(td, vp, uid, gid)
2633 	struct thread *td;
2634 	struct vnode *vp;
2635 	uid_t uid;
2636 	gid_t gid;
2637 {
2638 	int error;
2639 	struct mount *mp;
2640 	struct vattr vattr;
2641 
2642 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2643 		return (error);
2644 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2645 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2646 	VATTR_NULL(&vattr);
2647 	vattr.va_uid = uid;
2648 	vattr.va_gid = gid;
2649 #ifdef MAC
2650 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2651 	    vattr.va_gid);
2652 	if (error == 0)
2653 #endif
2654 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2655 	VOP_UNLOCK(vp, 0, td);
2656 	vn_finished_write(mp);
2657 	return (error);
2658 }
2659 
2660 /*
2661  * Set ownership given a path name.
2662  */
2663 #ifndef _SYS_SYSPROTO_H_
2664 struct chown_args {
2665 	char	*path;
2666 	int	uid;
2667 	int	gid;
2668 };
2669 #endif
2670 int
2671 chown(td, uap)
2672 	struct thread *td;
2673 	register struct chown_args /* {
2674 		char *path;
2675 		int uid;
2676 		int gid;
2677 	} */ *uap;
2678 {
2679 
2680 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2681 }
2682 
2683 int
2684 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2685     int gid)
2686 {
2687 	int error;
2688 	struct nameidata nd;
2689 	int vfslocked;
2690 
2691 	AUDIT_ARG(owner, uid, gid);
2692 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2693 	if ((error = namei(&nd)) != 0)
2694 		return (error);
2695 	vfslocked = NDHASGIANT(&nd);
2696 	NDFREE(&nd, NDF_ONLY_PNBUF);
2697 	error = setfown(td, nd.ni_vp, uid, gid);
2698 	vrele(nd.ni_vp);
2699 	VFS_UNLOCK_GIANT(vfslocked);
2700 	return (error);
2701 }
2702 
2703 /*
2704  * Set ownership given a path name, do not cross symlinks.
2705  */
2706 #ifndef _SYS_SYSPROTO_H_
2707 struct lchown_args {
2708 	char	*path;
2709 	int	uid;
2710 	int	gid;
2711 };
2712 #endif
2713 int
2714 lchown(td, uap)
2715 	struct thread *td;
2716 	register struct lchown_args /* {
2717 		char *path;
2718 		int uid;
2719 		int gid;
2720 	} */ *uap;
2721 {
2722 
2723 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2724 }
2725 
2726 int
2727 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2728     int gid)
2729 {
2730 	int error;
2731 	struct nameidata nd;
2732 	int vfslocked;
2733 
2734 	AUDIT_ARG(owner, uid, gid);
2735 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2736 	if ((error = namei(&nd)) != 0)
2737 		return (error);
2738 	vfslocked = NDHASGIANT(&nd);
2739 	NDFREE(&nd, NDF_ONLY_PNBUF);
2740 	error = setfown(td, nd.ni_vp, uid, gid);
2741 	vrele(nd.ni_vp);
2742 	VFS_UNLOCK_GIANT(vfslocked);
2743 	return (error);
2744 }
2745 
2746 /*
2747  * Set ownership given a file descriptor.
2748  */
2749 #ifndef _SYS_SYSPROTO_H_
2750 struct fchown_args {
2751 	int	fd;
2752 	int	uid;
2753 	int	gid;
2754 };
2755 #endif
2756 int
2757 fchown(td, uap)
2758 	struct thread *td;
2759 	register struct fchown_args /* {
2760 		int fd;
2761 		int uid;
2762 		int gid;
2763 	} */ *uap;
2764 {
2765 	struct file *fp;
2766 	int vfslocked;
2767 	int error;
2768 
2769 	AUDIT_ARG(fd, uap->fd);
2770 	AUDIT_ARG(owner, uap->uid, uap->gid);
2771 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2772 		return (error);
2773 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2774 #ifdef AUDIT
2775 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2776 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2777 	VOP_UNLOCK(fp->f_vnode, 0, td);
2778 #endif
2779 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2780 	VFS_UNLOCK_GIANT(vfslocked);
2781 	fdrop(fp, td);
2782 	return (error);
2783 }
2784 
2785 /*
2786  * Common implementation code for utimes(), lutimes(), and futimes().
2787  */
2788 static int
2789 getutimes(usrtvp, tvpseg, tsp)
2790 	const struct timeval *usrtvp;
2791 	enum uio_seg tvpseg;
2792 	struct timespec *tsp;
2793 {
2794 	struct timeval tv[2];
2795 	const struct timeval *tvp;
2796 	int error;
2797 
2798 	if (usrtvp == NULL) {
2799 		microtime(&tv[0]);
2800 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2801 		tsp[1] = tsp[0];
2802 	} else {
2803 		if (tvpseg == UIO_SYSSPACE) {
2804 			tvp = usrtvp;
2805 		} else {
2806 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2807 				return (error);
2808 			tvp = tv;
2809 		}
2810 
2811 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2812 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2813 			return (EINVAL);
2814 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2815 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2816 	}
2817 	return (0);
2818 }
2819 
2820 /*
2821  * Common implementation code for utimes(), lutimes(), and futimes().
2822  */
2823 static int
2824 setutimes(td, vp, ts, numtimes, nullflag)
2825 	struct thread *td;
2826 	struct vnode *vp;
2827 	const struct timespec *ts;
2828 	int numtimes;
2829 	int nullflag;
2830 {
2831 	int error, setbirthtime;
2832 	struct mount *mp;
2833 	struct vattr vattr;
2834 
2835 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2836 		return (error);
2837 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2838 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2839 	setbirthtime = 0;
2840 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2841 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2842 		setbirthtime = 1;
2843 	VATTR_NULL(&vattr);
2844 	vattr.va_atime = ts[0];
2845 	vattr.va_mtime = ts[1];
2846 	if (setbirthtime)
2847 		vattr.va_birthtime = ts[1];
2848 	if (numtimes > 2)
2849 		vattr.va_birthtime = ts[2];
2850 	if (nullflag)
2851 		vattr.va_vaflags |= VA_UTIMES_NULL;
2852 #ifdef MAC
2853 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2854 	    vattr.va_mtime);
2855 #endif
2856 	if (error == 0)
2857 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2858 	VOP_UNLOCK(vp, 0, td);
2859 	vn_finished_write(mp);
2860 	return (error);
2861 }
2862 
2863 /*
2864  * Set the access and modification times of a file.
2865  */
2866 #ifndef _SYS_SYSPROTO_H_
2867 struct utimes_args {
2868 	char	*path;
2869 	struct	timeval *tptr;
2870 };
2871 #endif
2872 int
2873 utimes(td, uap)
2874 	struct thread *td;
2875 	register struct utimes_args /* {
2876 		char *path;
2877 		struct timeval *tptr;
2878 	} */ *uap;
2879 {
2880 
2881 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2882 	    UIO_USERSPACE));
2883 }
2884 
2885 int
2886 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2887     struct timeval *tptr, enum uio_seg tptrseg)
2888 {
2889 	struct timespec ts[2];
2890 	int error;
2891 	struct nameidata nd;
2892 	int vfslocked;
2893 
2894 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2895 		return (error);
2896 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2897 	if ((error = namei(&nd)) != 0)
2898 		return (error);
2899 	vfslocked = NDHASGIANT(&nd);
2900 	NDFREE(&nd, NDF_ONLY_PNBUF);
2901 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2902 	vrele(nd.ni_vp);
2903 	VFS_UNLOCK_GIANT(vfslocked);
2904 	return (error);
2905 }
2906 
2907 /*
2908  * Set the access and modification times of a file.
2909  */
2910 #ifndef _SYS_SYSPROTO_H_
2911 struct lutimes_args {
2912 	char	*path;
2913 	struct	timeval *tptr;
2914 };
2915 #endif
2916 int
2917 lutimes(td, uap)
2918 	struct thread *td;
2919 	register struct lutimes_args /* {
2920 		char *path;
2921 		struct timeval *tptr;
2922 	} */ *uap;
2923 {
2924 
2925 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2926 	    UIO_USERSPACE));
2927 }
2928 
2929 int
2930 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2931     struct timeval *tptr, enum uio_seg tptrseg)
2932 {
2933 	struct timespec ts[2];
2934 	int error;
2935 	struct nameidata nd;
2936 	int vfslocked;
2937 
2938 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2939 		return (error);
2940 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2941 	if ((error = namei(&nd)) != 0)
2942 		return (error);
2943 	vfslocked = NDHASGIANT(&nd);
2944 	NDFREE(&nd, NDF_ONLY_PNBUF);
2945 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2946 	vrele(nd.ni_vp);
2947 	VFS_UNLOCK_GIANT(vfslocked);
2948 	return (error);
2949 }
2950 
2951 /*
2952  * Set the access and modification times of a file.
2953  */
2954 #ifndef _SYS_SYSPROTO_H_
2955 struct futimes_args {
2956 	int	fd;
2957 	struct	timeval *tptr;
2958 };
2959 #endif
2960 int
2961 futimes(td, uap)
2962 	struct thread *td;
2963 	register struct futimes_args /* {
2964 		int  fd;
2965 		struct timeval *tptr;
2966 	} */ *uap;
2967 {
2968 
2969 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2970 }
2971 
2972 int
2973 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2974     enum uio_seg tptrseg)
2975 {
2976 	struct timespec ts[2];
2977 	struct file *fp;
2978 	int vfslocked;
2979 	int error;
2980 
2981 	AUDIT_ARG(fd, fd);
2982 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2983 		return (error);
2984 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2985 		return (error);
2986 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2987 #ifdef AUDIT
2988 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2989 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2990 	VOP_UNLOCK(fp->f_vnode, 0, td);
2991 #endif
2992 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2993 	VFS_UNLOCK_GIANT(vfslocked);
2994 	fdrop(fp, td);
2995 	return (error);
2996 }
2997 
2998 /*
2999  * Truncate a file given its path name.
3000  */
3001 #ifndef _SYS_SYSPROTO_H_
3002 struct truncate_args {
3003 	char	*path;
3004 	int	pad;
3005 	off_t	length;
3006 };
3007 #endif
3008 int
3009 truncate(td, uap)
3010 	struct thread *td;
3011 	register struct truncate_args /* {
3012 		char *path;
3013 		int pad;
3014 		off_t length;
3015 	} */ *uap;
3016 {
3017 
3018 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3019 }
3020 
3021 int
3022 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3023 {
3024 	struct mount *mp;
3025 	struct vnode *vp;
3026 	struct vattr vattr;
3027 	int error;
3028 	struct nameidata nd;
3029 	int vfslocked;
3030 
3031 	if (length < 0)
3032 		return(EINVAL);
3033 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3034 	if ((error = namei(&nd)) != 0)
3035 		return (error);
3036 	vfslocked = NDHASGIANT(&nd);
3037 	vp = nd.ni_vp;
3038 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3039 		vrele(vp);
3040 		VFS_UNLOCK_GIANT(vfslocked);
3041 		return (error);
3042 	}
3043 	NDFREE(&nd, NDF_ONLY_PNBUF);
3044 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3045 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3046 	if (vp->v_type == VDIR)
3047 		error = EISDIR;
3048 #ifdef MAC
3049 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3050 	}
3051 #endif
3052 	else if ((error = vn_writechk(vp)) == 0 &&
3053 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3054 		VATTR_NULL(&vattr);
3055 		vattr.va_size = length;
3056 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3057 	}
3058 	vput(vp);
3059 	vn_finished_write(mp);
3060 	VFS_UNLOCK_GIANT(vfslocked);
3061 	return (error);
3062 }
3063 
3064 /*
3065  * Truncate a file given a file descriptor.
3066  */
3067 #ifndef _SYS_SYSPROTO_H_
3068 struct ftruncate_args {
3069 	int	fd;
3070 	int	pad;
3071 	off_t	length;
3072 };
3073 #endif
3074 int
3075 ftruncate(td, uap)
3076 	struct thread *td;
3077 	register struct ftruncate_args /* {
3078 		int fd;
3079 		int pad;
3080 		off_t length;
3081 	} */ *uap;
3082 {
3083 	struct mount *mp;
3084 	struct vattr vattr;
3085 	struct vnode *vp;
3086 	struct file *fp;
3087 	int vfslocked;
3088 	int error;
3089 
3090 	AUDIT_ARG(fd, uap->fd);
3091 	if (uap->length < 0)
3092 		return(EINVAL);
3093 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3094 		return (error);
3095 	if ((fp->f_flag & FWRITE) == 0) {
3096 		fdrop(fp, td);
3097 		return (EINVAL);
3098 	}
3099 	vp = fp->f_vnode;
3100 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3101 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3102 		goto drop;
3103 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3104 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3105 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3106 	if (vp->v_type == VDIR)
3107 		error = EISDIR;
3108 #ifdef MAC
3109 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3110 	    vp))) {
3111 	}
3112 #endif
3113 	else if ((error = vn_writechk(vp)) == 0) {
3114 		VATTR_NULL(&vattr);
3115 		vattr.va_size = uap->length;
3116 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3117 	}
3118 	VOP_UNLOCK(vp, 0, td);
3119 	vn_finished_write(mp);
3120 drop:
3121 	VFS_UNLOCK_GIANT(vfslocked);
3122 	fdrop(fp, td);
3123 	return (error);
3124 }
3125 
3126 #if defined(COMPAT_43)
3127 /*
3128  * Truncate a file given its path name.
3129  */
3130 #ifndef _SYS_SYSPROTO_H_
3131 struct otruncate_args {
3132 	char	*path;
3133 	long	length;
3134 };
3135 #endif
3136 int
3137 otruncate(td, uap)
3138 	struct thread *td;
3139 	register struct otruncate_args /* {
3140 		char *path;
3141 		long length;
3142 	} */ *uap;
3143 {
3144 	struct truncate_args /* {
3145 		char *path;
3146 		int pad;
3147 		off_t length;
3148 	} */ nuap;
3149 
3150 	nuap.path = uap->path;
3151 	nuap.length = uap->length;
3152 	return (truncate(td, &nuap));
3153 }
3154 
3155 /*
3156  * Truncate a file given a file descriptor.
3157  */
3158 #ifndef _SYS_SYSPROTO_H_
3159 struct oftruncate_args {
3160 	int	fd;
3161 	long	length;
3162 };
3163 #endif
3164 int
3165 oftruncate(td, uap)
3166 	struct thread *td;
3167 	register struct oftruncate_args /* {
3168 		int fd;
3169 		long length;
3170 	} */ *uap;
3171 {
3172 	struct ftruncate_args /* {
3173 		int fd;
3174 		int pad;
3175 		off_t length;
3176 	} */ nuap;
3177 
3178 	nuap.fd = uap->fd;
3179 	nuap.length = uap->length;
3180 	return (ftruncate(td, &nuap));
3181 }
3182 #endif /* COMPAT_43 */
3183 
3184 /*
3185  * Sync an open file.
3186  */
3187 #ifndef _SYS_SYSPROTO_H_
3188 struct fsync_args {
3189 	int	fd;
3190 };
3191 #endif
3192 int
3193 fsync(td, uap)
3194 	struct thread *td;
3195 	struct fsync_args /* {
3196 		int fd;
3197 	} */ *uap;
3198 {
3199 	struct vnode *vp;
3200 	struct mount *mp;
3201 	struct file *fp;
3202 	int vfslocked;
3203 	int error;
3204 
3205 	AUDIT_ARG(fd, uap->fd);
3206 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3207 		return (error);
3208 	vp = fp->f_vnode;
3209 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3210 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3211 		goto drop;
3212 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3213 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3214 	if (vp->v_object != NULL) {
3215 		VM_OBJECT_LOCK(vp->v_object);
3216 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3217 		VM_OBJECT_UNLOCK(vp->v_object);
3218 	}
3219 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3220 
3221 	VOP_UNLOCK(vp, 0, td);
3222 	vn_finished_write(mp);
3223 drop:
3224 	VFS_UNLOCK_GIANT(vfslocked);
3225 	fdrop(fp, td);
3226 	return (error);
3227 }
3228 
3229 /*
3230  * Rename files.  Source and destination must either both be directories,
3231  * or both not be directories.  If target is a directory, it must be empty.
3232  */
3233 #ifndef _SYS_SYSPROTO_H_
3234 struct rename_args {
3235 	char	*from;
3236 	char	*to;
3237 };
3238 #endif
3239 int
3240 rename(td, uap)
3241 	struct thread *td;
3242 	register struct rename_args /* {
3243 		char *from;
3244 		char *to;
3245 	} */ *uap;
3246 {
3247 
3248 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3249 }
3250 
3251 int
3252 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3253 {
3254 	struct mount *mp = NULL;
3255 	struct vnode *tvp, *fvp, *tdvp;
3256 	struct nameidata fromnd, tond;
3257 	int tvfslocked;
3258 	int fvfslocked;
3259 	int error;
3260 
3261 	bwillwrite();
3262 #ifdef MAC
3263 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3264 	    AUDITVNODE1, pathseg, from, td);
3265 #else
3266 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3267 	    AUDITVNODE1, pathseg, from, td);
3268 #endif
3269 	if ((error = namei(&fromnd)) != 0)
3270 		return (error);
3271 	fvfslocked = NDHASGIANT(&fromnd);
3272 	tvfslocked = 0;
3273 #ifdef MAC
3274 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3275 	    fromnd.ni_vp, &fromnd.ni_cnd);
3276 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3277 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3278 #endif
3279 	fvp = fromnd.ni_vp;
3280 	if (error == 0)
3281 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3282 	if (error != 0) {
3283 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3284 		vrele(fromnd.ni_dvp);
3285 		vrele(fvp);
3286 		goto out1;
3287 	}
3288 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3289 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3290 	if (fromnd.ni_vp->v_type == VDIR)
3291 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3292 	if ((error = namei(&tond)) != 0) {
3293 		/* Translate error code for rename("dir1", "dir2/."). */
3294 		if (error == EISDIR && fvp->v_type == VDIR)
3295 			error = EINVAL;
3296 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3297 		vrele(fromnd.ni_dvp);
3298 		vrele(fvp);
3299 		vn_finished_write(mp);
3300 		goto out1;
3301 	}
3302 	tvfslocked = NDHASGIANT(&tond);
3303 	tdvp = tond.ni_dvp;
3304 	tvp = tond.ni_vp;
3305 	if (tvp != NULL) {
3306 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3307 			error = ENOTDIR;
3308 			goto out;
3309 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3310 			error = EISDIR;
3311 			goto out;
3312 		}
3313 	}
3314 	if (fvp == tdvp)
3315 		error = EINVAL;
3316 	/*
3317 	 * If the source is the same as the destination (that is, if they
3318 	 * are links to the same vnode), then there is nothing to do.
3319 	 */
3320 	if (fvp == tvp)
3321 		error = -1;
3322 #ifdef MAC
3323 	else
3324 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3325 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3326 #endif
3327 out:
3328 	if (!error) {
3329 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3330 		if (fromnd.ni_dvp != tdvp) {
3331 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3332 		}
3333 		if (tvp) {
3334 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3335 		}
3336 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3337 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3338 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3339 		NDFREE(&tond, NDF_ONLY_PNBUF);
3340 	} else {
3341 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3342 		NDFREE(&tond, NDF_ONLY_PNBUF);
3343 		if (tvp)
3344 			vput(tvp);
3345 		if (tdvp == tvp)
3346 			vrele(tdvp);
3347 		else
3348 			vput(tdvp);
3349 		vrele(fromnd.ni_dvp);
3350 		vrele(fvp);
3351 	}
3352 	vrele(tond.ni_startdir);
3353 	vn_finished_write(mp);
3354 out1:
3355 	if (fromnd.ni_startdir)
3356 		vrele(fromnd.ni_startdir);
3357 	VFS_UNLOCK_GIANT(fvfslocked);
3358 	VFS_UNLOCK_GIANT(tvfslocked);
3359 	if (error == -1)
3360 		return (0);
3361 	return (error);
3362 }
3363 
3364 /*
3365  * Make a directory file.
3366  */
3367 #ifndef _SYS_SYSPROTO_H_
3368 struct mkdir_args {
3369 	char	*path;
3370 	int	mode;
3371 };
3372 #endif
3373 int
3374 mkdir(td, uap)
3375 	struct thread *td;
3376 	register struct mkdir_args /* {
3377 		char *path;
3378 		int mode;
3379 	} */ *uap;
3380 {
3381 
3382 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3383 }
3384 
3385 int
3386 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3387 {
3388 	struct mount *mp;
3389 	struct vnode *vp;
3390 	struct vattr vattr;
3391 	int error;
3392 	struct nameidata nd;
3393 	int vfslocked;
3394 
3395 	AUDIT_ARG(mode, mode);
3396 restart:
3397 	bwillwrite();
3398 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3399 	    segflg, path, td);
3400 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3401 	if ((error = namei(&nd)) != 0)
3402 		return (error);
3403 	vfslocked = NDHASGIANT(&nd);
3404 	vp = nd.ni_vp;
3405 	if (vp != NULL) {
3406 		NDFREE(&nd, NDF_ONLY_PNBUF);
3407 		/*
3408 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3409 		 * the strange behaviour of leaving the vnode unlocked
3410 		 * if the target is the same vnode as the parent.
3411 		 */
3412 		if (vp == nd.ni_dvp)
3413 			vrele(nd.ni_dvp);
3414 		else
3415 			vput(nd.ni_dvp);
3416 		vrele(vp);
3417 		VFS_UNLOCK_GIANT(vfslocked);
3418 		return (EEXIST);
3419 	}
3420 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3421 		NDFREE(&nd, NDF_ONLY_PNBUF);
3422 		vput(nd.ni_dvp);
3423 		VFS_UNLOCK_GIANT(vfslocked);
3424 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3425 			return (error);
3426 		goto restart;
3427 	}
3428 	VATTR_NULL(&vattr);
3429 	vattr.va_type = VDIR;
3430 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3431 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3432 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3433 #ifdef MAC
3434 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3435 	    &vattr);
3436 	if (error)
3437 		goto out;
3438 #endif
3439 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3440 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3441 #ifdef MAC
3442 out:
3443 #endif
3444 	NDFREE(&nd, NDF_ONLY_PNBUF);
3445 	vput(nd.ni_dvp);
3446 	if (!error)
3447 		vput(nd.ni_vp);
3448 	vn_finished_write(mp);
3449 	VFS_UNLOCK_GIANT(vfslocked);
3450 	return (error);
3451 }
3452 
3453 /*
3454  * Remove a directory file.
3455  */
3456 #ifndef _SYS_SYSPROTO_H_
3457 struct rmdir_args {
3458 	char	*path;
3459 };
3460 #endif
3461 int
3462 rmdir(td, uap)
3463 	struct thread *td;
3464 	struct rmdir_args /* {
3465 		char *path;
3466 	} */ *uap;
3467 {
3468 
3469 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3470 }
3471 
3472 int
3473 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3474 {
3475 	struct mount *mp;
3476 	struct vnode *vp;
3477 	int error;
3478 	struct nameidata nd;
3479 	int vfslocked;
3480 
3481 restart:
3482 	bwillwrite();
3483 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3484 	    pathseg, path, td);
3485 	if ((error = namei(&nd)) != 0)
3486 		return (error);
3487 	vfslocked = NDHASGIANT(&nd);
3488 	vp = nd.ni_vp;
3489 	if (vp->v_type != VDIR) {
3490 		error = ENOTDIR;
3491 		goto out;
3492 	}
3493 	/*
3494 	 * No rmdir "." please.
3495 	 */
3496 	if (nd.ni_dvp == vp) {
3497 		error = EINVAL;
3498 		goto out;
3499 	}
3500 	/*
3501 	 * The root of a mounted filesystem cannot be deleted.
3502 	 */
3503 	if (vp->v_vflag & VV_ROOT) {
3504 		error = EBUSY;
3505 		goto out;
3506 	}
3507 #ifdef MAC
3508 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3509 	    &nd.ni_cnd);
3510 	if (error)
3511 		goto out;
3512 #endif
3513 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3514 		NDFREE(&nd, NDF_ONLY_PNBUF);
3515 		vput(vp);
3516 		if (nd.ni_dvp == vp)
3517 			vrele(nd.ni_dvp);
3518 		else
3519 			vput(nd.ni_dvp);
3520 		VFS_UNLOCK_GIANT(vfslocked);
3521 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3522 			return (error);
3523 		goto restart;
3524 	}
3525 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3526 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3527 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3528 	vn_finished_write(mp);
3529 out:
3530 	NDFREE(&nd, NDF_ONLY_PNBUF);
3531 	vput(vp);
3532 	if (nd.ni_dvp == vp)
3533 		vrele(nd.ni_dvp);
3534 	else
3535 		vput(nd.ni_dvp);
3536 	VFS_UNLOCK_GIANT(vfslocked);
3537 	return (error);
3538 }
3539 
3540 #ifdef COMPAT_43
3541 /*
3542  * Read a block of directory entries in a filesystem independent format.
3543  */
3544 #ifndef _SYS_SYSPROTO_H_
3545 struct ogetdirentries_args {
3546 	int	fd;
3547 	char	*buf;
3548 	u_int	count;
3549 	long	*basep;
3550 };
3551 #endif
3552 int
3553 ogetdirentries(td, uap)
3554 	struct thread *td;
3555 	register struct ogetdirentries_args /* {
3556 		int fd;
3557 		char *buf;
3558 		u_int count;
3559 		long *basep;
3560 	} */ *uap;
3561 {
3562 	struct vnode *vp;
3563 	struct file *fp;
3564 	struct uio auio, kuio;
3565 	struct iovec aiov, kiov;
3566 	struct dirent *dp, *edp;
3567 	caddr_t dirbuf;
3568 	int error, eofflag, readcnt;
3569 	long loff;
3570 
3571 	/* XXX arbitrary sanity limit on `count'. */
3572 	if (uap->count > 64 * 1024)
3573 		return (EINVAL);
3574 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3575 		return (error);
3576 	if ((fp->f_flag & FREAD) == 0) {
3577 		fdrop(fp, td);
3578 		return (EBADF);
3579 	}
3580 	vp = fp->f_vnode;
3581 unionread:
3582 	if (vp->v_type != VDIR) {
3583 		fdrop(fp, td);
3584 		return (EINVAL);
3585 	}
3586 	aiov.iov_base = uap->buf;
3587 	aiov.iov_len = uap->count;
3588 	auio.uio_iov = &aiov;
3589 	auio.uio_iovcnt = 1;
3590 	auio.uio_rw = UIO_READ;
3591 	auio.uio_segflg = UIO_USERSPACE;
3592 	auio.uio_td = td;
3593 	auio.uio_resid = uap->count;
3594 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3595 	loff = auio.uio_offset = fp->f_offset;
3596 #ifdef MAC
3597 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3598 	if (error) {
3599 		VOP_UNLOCK(vp, 0, td);
3600 		fdrop(fp, td);
3601 		return (error);
3602 	}
3603 #endif
3604 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3605 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3606 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3607 			    NULL, NULL);
3608 			fp->f_offset = auio.uio_offset;
3609 		} else
3610 #	endif
3611 	{
3612 		kuio = auio;
3613 		kuio.uio_iov = &kiov;
3614 		kuio.uio_segflg = UIO_SYSSPACE;
3615 		kiov.iov_len = uap->count;
3616 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3617 		kiov.iov_base = dirbuf;
3618 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3619 			    NULL, NULL);
3620 		fp->f_offset = kuio.uio_offset;
3621 		if (error == 0) {
3622 			readcnt = uap->count - kuio.uio_resid;
3623 			edp = (struct dirent *)&dirbuf[readcnt];
3624 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3625 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3626 					/*
3627 					 * The expected low byte of
3628 					 * dp->d_namlen is our dp->d_type.
3629 					 * The high MBZ byte of dp->d_namlen
3630 					 * is our dp->d_namlen.
3631 					 */
3632 					dp->d_type = dp->d_namlen;
3633 					dp->d_namlen = 0;
3634 #				else
3635 					/*
3636 					 * The dp->d_type is the high byte
3637 					 * of the expected dp->d_namlen,
3638 					 * so must be zero'ed.
3639 					 */
3640 					dp->d_type = 0;
3641 #				endif
3642 				if (dp->d_reclen > 0) {
3643 					dp = (struct dirent *)
3644 					    ((char *)dp + dp->d_reclen);
3645 				} else {
3646 					error = EIO;
3647 					break;
3648 				}
3649 			}
3650 			if (dp >= edp)
3651 				error = uiomove(dirbuf, readcnt, &auio);
3652 		}
3653 		FREE(dirbuf, M_TEMP);
3654 	}
3655 	VOP_UNLOCK(vp, 0, td);
3656 	if (error) {
3657 		fdrop(fp, td);
3658 		return (error);
3659 	}
3660 	if (uap->count == auio.uio_resid) {
3661 		if (union_dircheckp) {
3662 			error = union_dircheckp(td, &vp, fp);
3663 			if (error == -1)
3664 				goto unionread;
3665 			if (error) {
3666 				fdrop(fp, td);
3667 				return (error);
3668 			}
3669 		}
3670 		/*
3671 		 * XXX We could delay dropping the lock above but
3672 		 * union_dircheckp complicates things.
3673 		 */
3674 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3675 		if ((vp->v_vflag & VV_ROOT) &&
3676 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3677 			struct vnode *tvp = vp;
3678 			vp = vp->v_mount->mnt_vnodecovered;
3679 			VREF(vp);
3680 			fp->f_vnode = vp;
3681 			fp->f_data = vp;
3682 			fp->f_offset = 0;
3683 			vput(tvp);
3684 			goto unionread;
3685 		}
3686 		VOP_UNLOCK(vp, 0, td);
3687 	}
3688 	error = copyout(&loff, uap->basep, sizeof(long));
3689 	fdrop(fp, td);
3690 	td->td_retval[0] = uap->count - auio.uio_resid;
3691 	return (error);
3692 }
3693 #endif /* COMPAT_43 */
3694 
3695 /*
3696  * Read a block of directory entries in a filesystem independent format.
3697  */
3698 #ifndef _SYS_SYSPROTO_H_
3699 struct getdirentries_args {
3700 	int	fd;
3701 	char	*buf;
3702 	u_int	count;
3703 	long	*basep;
3704 };
3705 #endif
3706 int
3707 getdirentries(td, uap)
3708 	struct thread *td;
3709 	register struct getdirentries_args /* {
3710 		int fd;
3711 		char *buf;
3712 		u_int count;
3713 		long *basep;
3714 	} */ *uap;
3715 {
3716 	struct vnode *vp;
3717 	struct file *fp;
3718 	struct uio auio;
3719 	struct iovec aiov;
3720 	int vfslocked;
3721 	long loff;
3722 	int error, eofflag;
3723 
3724 	AUDIT_ARG(fd, uap->fd);
3725 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3726 		return (error);
3727 	if ((fp->f_flag & FREAD) == 0) {
3728 		fdrop(fp, td);
3729 		return (EBADF);
3730 	}
3731 	vp = fp->f_vnode;
3732 unionread:
3733 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3734 	if (vp->v_type != VDIR) {
3735 		error = EINVAL;
3736 		goto fail;
3737 	}
3738 	aiov.iov_base = uap->buf;
3739 	aiov.iov_len = uap->count;
3740 	auio.uio_iov = &aiov;
3741 	auio.uio_iovcnt = 1;
3742 	auio.uio_rw = UIO_READ;
3743 	auio.uio_segflg = UIO_USERSPACE;
3744 	auio.uio_td = td;
3745 	auio.uio_resid = uap->count;
3746 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3747 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3748 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3749 	loff = auio.uio_offset = fp->f_offset;
3750 #ifdef MAC
3751 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3752 	if (error == 0)
3753 #endif
3754 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3755 		    NULL);
3756 	fp->f_offset = auio.uio_offset;
3757 	VOP_UNLOCK(vp, 0, td);
3758 	if (error)
3759 		goto fail;
3760 	if (uap->count == auio.uio_resid) {
3761 		if (union_dircheckp) {
3762 			error = union_dircheckp(td, &vp, fp);
3763 			if (error == -1) {
3764 				VFS_UNLOCK_GIANT(vfslocked);
3765 				goto unionread;
3766 			}
3767 			if (error)
3768 				goto fail;
3769 		}
3770 		/*
3771 		 * XXX We could delay dropping the lock above but
3772 		 * union_dircheckp complicates things.
3773 		 */
3774 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3775 		if ((vp->v_vflag & VV_ROOT) &&
3776 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3777 			struct vnode *tvp = vp;
3778 			vp = vp->v_mount->mnt_vnodecovered;
3779 			VREF(vp);
3780 			fp->f_vnode = vp;
3781 			fp->f_data = vp;
3782 			fp->f_offset = 0;
3783 			vput(tvp);
3784 			VFS_UNLOCK_GIANT(vfslocked);
3785 			goto unionread;
3786 		}
3787 		VOP_UNLOCK(vp, 0, td);
3788 	}
3789 	if (uap->basep != NULL) {
3790 		error = copyout(&loff, uap->basep, sizeof(long));
3791 	}
3792 	td->td_retval[0] = uap->count - auio.uio_resid;
3793 fail:
3794 	VFS_UNLOCK_GIANT(vfslocked);
3795 	fdrop(fp, td);
3796 	return (error);
3797 }
3798 #ifndef _SYS_SYSPROTO_H_
3799 struct getdents_args {
3800 	int fd;
3801 	char *buf;
3802 	size_t count;
3803 };
3804 #endif
3805 int
3806 getdents(td, uap)
3807 	struct thread *td;
3808 	register struct getdents_args /* {
3809 		int fd;
3810 		char *buf;
3811 		u_int count;
3812 	} */ *uap;
3813 {
3814 	struct getdirentries_args ap;
3815 	ap.fd = uap->fd;
3816 	ap.buf = uap->buf;
3817 	ap.count = uap->count;
3818 	ap.basep = NULL;
3819 	return (getdirentries(td, &ap));
3820 }
3821 
3822 /*
3823  * Set the mode mask for creation of filesystem nodes.
3824  *
3825  * MP SAFE
3826  */
3827 #ifndef _SYS_SYSPROTO_H_
3828 struct umask_args {
3829 	int	newmask;
3830 };
3831 #endif
3832 int
3833 umask(td, uap)
3834 	struct thread *td;
3835 	struct umask_args /* {
3836 		int newmask;
3837 	} */ *uap;
3838 {
3839 	register struct filedesc *fdp;
3840 
3841 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3842 	fdp = td->td_proc->p_fd;
3843 	td->td_retval[0] = fdp->fd_cmask;
3844 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3845 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3846 	return (0);
3847 }
3848 
3849 /*
3850  * Void all references to file by ripping underlying filesystem
3851  * away from vnode.
3852  */
3853 #ifndef _SYS_SYSPROTO_H_
3854 struct revoke_args {
3855 	char	*path;
3856 };
3857 #endif
3858 int
3859 revoke(td, uap)
3860 	struct thread *td;
3861 	register struct revoke_args /* {
3862 		char *path;
3863 	} */ *uap;
3864 {
3865 	struct vnode *vp;
3866 	struct vattr vattr;
3867 	int error;
3868 	struct nameidata nd;
3869 	int vfslocked;
3870 
3871 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3872 	    UIO_USERSPACE, uap->path, td);
3873 	if ((error = namei(&nd)) != 0)
3874 		return (error);
3875 	vfslocked = NDHASGIANT(&nd);
3876 	vp = nd.ni_vp;
3877 	NDFREE(&nd, NDF_ONLY_PNBUF);
3878 	if (vp->v_type != VCHR) {
3879 		error = EINVAL;
3880 		goto out;
3881 	}
3882 #ifdef MAC
3883 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3884 	if (error)
3885 		goto out;
3886 #endif
3887 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3888 	if (error)
3889 		goto out;
3890 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3891 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3892 		if (error)
3893 			goto out;
3894 	}
3895 	if (vcount(vp) > 1)
3896 		VOP_REVOKE(vp, REVOKEALL);
3897 out:
3898 	vput(vp);
3899 	VFS_UNLOCK_GIANT(vfslocked);
3900 	return (error);
3901 }
3902 
3903 /*
3904  * Convert a user file descriptor to a kernel file entry.
3905  * A reference on the file entry is held upon returning.
3906  */
3907 int
3908 getvnode(fdp, fd, fpp)
3909 	struct filedesc *fdp;
3910 	int fd;
3911 	struct file **fpp;
3912 {
3913 	int error;
3914 	struct file *fp;
3915 
3916 	fp = NULL;
3917 	if (fdp == NULL)
3918 		error = EBADF;
3919 	else {
3920 		FILEDESC_LOCK(fdp);
3921 		if ((u_int)fd >= fdp->fd_nfiles ||
3922 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3923 			error = EBADF;
3924 		else if (fp->f_vnode == NULL) {
3925 			fp = NULL;
3926 			error = EINVAL;
3927 		} else {
3928 			fhold(fp);
3929 			error = 0;
3930 		}
3931 		FILEDESC_UNLOCK(fdp);
3932 	}
3933 	*fpp = fp;
3934 	return (error);
3935 }
3936 
3937 /*
3938  * Get (NFS) file handle
3939  */
3940 #ifndef _SYS_SYSPROTO_H_
3941 struct lgetfh_args {
3942 	char	*fname;
3943 	fhandle_t *fhp;
3944 };
3945 #endif
3946 int
3947 lgetfh(td, uap)
3948 	struct thread *td;
3949 	register struct lgetfh_args *uap;
3950 {
3951 	struct nameidata nd;
3952 	fhandle_t fh;
3953 	register struct vnode *vp;
3954 	int vfslocked;
3955 	int error;
3956 
3957 	error = suser(td);
3958 	if (error)
3959 		return (error);
3960 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3961 	    UIO_USERSPACE, uap->fname, td);
3962 	error = namei(&nd);
3963 	if (error)
3964 		return (error);
3965 	vfslocked = NDHASGIANT(&nd);
3966 	NDFREE(&nd, NDF_ONLY_PNBUF);
3967 	vp = nd.ni_vp;
3968 	bzero(&fh, sizeof(fh));
3969 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3970 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3971 	vput(vp);
3972 	VFS_UNLOCK_GIANT(vfslocked);
3973 	if (error)
3974 		return (error);
3975 	error = copyout(&fh, uap->fhp, sizeof (fh));
3976 	return (error);
3977 }
3978 
3979 #ifndef _SYS_SYSPROTO_H_
3980 struct getfh_args {
3981 	char	*fname;
3982 	fhandle_t *fhp;
3983 };
3984 #endif
3985 int
3986 getfh(td, uap)
3987 	struct thread *td;
3988 	register struct getfh_args *uap;
3989 {
3990 	struct nameidata nd;
3991 	fhandle_t fh;
3992 	register struct vnode *vp;
3993 	int vfslocked;
3994 	int error;
3995 
3996 	error = suser(td);
3997 	if (error)
3998 		return (error);
3999 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4000 	    UIO_USERSPACE, uap->fname, td);
4001 	error = namei(&nd);
4002 	if (error)
4003 		return (error);
4004 	vfslocked = NDHASGIANT(&nd);
4005 	NDFREE(&nd, NDF_ONLY_PNBUF);
4006 	vp = nd.ni_vp;
4007 	bzero(&fh, sizeof(fh));
4008 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4009 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4010 	vput(vp);
4011 	VFS_UNLOCK_GIANT(vfslocked);
4012 	if (error)
4013 		return (error);
4014 	error = copyout(&fh, uap->fhp, sizeof (fh));
4015 	return (error);
4016 }
4017 
4018 /*
4019  * syscall for the rpc.lockd to use to translate a NFS file handle into
4020  * an open descriptor.
4021  *
4022  * warning: do not remove the suser() call or this becomes one giant
4023  * security hole.
4024  *
4025  * MP SAFE
4026  */
4027 #ifndef _SYS_SYSPROTO_H_
4028 struct fhopen_args {
4029 	const struct fhandle *u_fhp;
4030 	int flags;
4031 };
4032 #endif
4033 int
4034 fhopen(td, uap)
4035 	struct thread *td;
4036 	struct fhopen_args /* {
4037 		const struct fhandle *u_fhp;
4038 		int flags;
4039 	} */ *uap;
4040 {
4041 	struct proc *p = td->td_proc;
4042 	struct mount *mp;
4043 	struct vnode *vp;
4044 	struct fhandle fhp;
4045 	struct vattr vat;
4046 	struct vattr *vap = &vat;
4047 	struct flock lf;
4048 	struct file *fp;
4049 	register struct filedesc *fdp = p->p_fd;
4050 	int fmode, mode, error, type;
4051 	struct file *nfp;
4052 	int vfslocked;
4053 	int indx;
4054 
4055 	error = suser(td);
4056 	if (error)
4057 		return (error);
4058 	fmode = FFLAGS(uap->flags);
4059 	/* why not allow a non-read/write open for our lockd? */
4060 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4061 		return (EINVAL);
4062 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4063 	if (error)
4064 		return(error);
4065 	/* find the mount point */
4066 	mp = vfs_getvfs(&fhp.fh_fsid);
4067 	if (mp == NULL)
4068 		return (ESTALE);
4069 	vfslocked = VFS_LOCK_GIANT(mp);
4070 	/* now give me my vnode, it gets returned to me locked */
4071 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4072 	if (error)
4073 		goto out;
4074 	/*
4075 	 * from now on we have to make sure not
4076 	 * to forget about the vnode
4077 	 * any error that causes an abort must vput(vp)
4078 	 * just set error = err and 'goto bad;'.
4079 	 */
4080 
4081 	/*
4082 	 * from vn_open
4083 	 */
4084 	if (vp->v_type == VLNK) {
4085 		error = EMLINK;
4086 		goto bad;
4087 	}
4088 	if (vp->v_type == VSOCK) {
4089 		error = EOPNOTSUPP;
4090 		goto bad;
4091 	}
4092 	mode = 0;
4093 	if (fmode & (FWRITE | O_TRUNC)) {
4094 		if (vp->v_type == VDIR) {
4095 			error = EISDIR;
4096 			goto bad;
4097 		}
4098 		error = vn_writechk(vp);
4099 		if (error)
4100 			goto bad;
4101 		mode |= VWRITE;
4102 	}
4103 	if (fmode & FREAD)
4104 		mode |= VREAD;
4105 	if (fmode & O_APPEND)
4106 		mode |= VAPPEND;
4107 #ifdef MAC
4108 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4109 	if (error)
4110 		goto bad;
4111 #endif
4112 	if (mode) {
4113 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4114 		if (error)
4115 			goto bad;
4116 	}
4117 	if (fmode & O_TRUNC) {
4118 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4119 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4120 			vrele(vp);
4121 			goto out;
4122 		}
4123 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4124 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4125 #ifdef MAC
4126 		/*
4127 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4128 		 * should be right.
4129 		 */
4130 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4131 		if (error == 0) {
4132 #endif
4133 			VATTR_NULL(vap);
4134 			vap->va_size = 0;
4135 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4136 #ifdef MAC
4137 		}
4138 #endif
4139 		vn_finished_write(mp);
4140 		if (error)
4141 			goto bad;
4142 	}
4143 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4144 	if (error)
4145 		goto bad;
4146 
4147 	if (fmode & FWRITE)
4148 		vp->v_writecount++;
4149 
4150 	/*
4151 	 * end of vn_open code
4152 	 */
4153 
4154 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4155 		if (fmode & FWRITE)
4156 			vp->v_writecount--;
4157 		goto bad;
4158 	}
4159 	/* An extra reference on `nfp' has been held for us by falloc(). */
4160 	fp = nfp;
4161 
4162 	nfp->f_vnode = vp;
4163 	nfp->f_data = vp;
4164 	nfp->f_flag = fmode & FMASK;
4165 	nfp->f_ops = &vnops;
4166 	nfp->f_type = DTYPE_VNODE;
4167 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4168 		lf.l_whence = SEEK_SET;
4169 		lf.l_start = 0;
4170 		lf.l_len = 0;
4171 		if (fmode & O_EXLOCK)
4172 			lf.l_type = F_WRLCK;
4173 		else
4174 			lf.l_type = F_RDLCK;
4175 		type = F_FLOCK;
4176 		if ((fmode & FNONBLOCK) == 0)
4177 			type |= F_WAIT;
4178 		VOP_UNLOCK(vp, 0, td);
4179 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4180 			    type)) != 0) {
4181 			/*
4182 			 * The lock request failed.  Normally close the
4183 			 * descriptor but handle the case where someone might
4184 			 * have dup()d or close()d it when we weren't looking.
4185 			 */
4186 			fdclose(fdp, fp, indx, td);
4187 
4188 			/*
4189 			 * release our private reference
4190 			 */
4191 			fdrop(fp, td);
4192 			goto out;
4193 		}
4194 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4195 		fp->f_flag |= FHASLOCK;
4196 	}
4197 
4198 	VOP_UNLOCK(vp, 0, td);
4199 	fdrop(fp, td);
4200 	vfs_rel(mp);
4201 	VFS_UNLOCK_GIANT(vfslocked);
4202 	td->td_retval[0] = indx;
4203 	return (0);
4204 
4205 bad:
4206 	vput(vp);
4207 out:
4208 	vfs_rel(mp);
4209 	VFS_UNLOCK_GIANT(vfslocked);
4210 	return (error);
4211 }
4212 
4213 /*
4214  * Stat an (NFS) file handle.
4215  *
4216  * MP SAFE
4217  */
4218 #ifndef _SYS_SYSPROTO_H_
4219 struct fhstat_args {
4220 	struct fhandle *u_fhp;
4221 	struct stat *sb;
4222 };
4223 #endif
4224 int
4225 fhstat(td, uap)
4226 	struct thread *td;
4227 	register struct fhstat_args /* {
4228 		struct fhandle *u_fhp;
4229 		struct stat *sb;
4230 	} */ *uap;
4231 {
4232 	struct stat sb;
4233 	fhandle_t fh;
4234 	struct mount *mp;
4235 	struct vnode *vp;
4236 	int vfslocked;
4237 	int error;
4238 
4239 	error = suser(td);
4240 	if (error)
4241 		return (error);
4242 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4243 	if (error)
4244 		return (error);
4245 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4246 		return (ESTALE);
4247 	vfslocked = VFS_LOCK_GIANT(mp);
4248 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4249 		vfs_rel(mp);
4250 		VFS_UNLOCK_GIANT(vfslocked);
4251 		return (error);
4252 	}
4253 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4254 	vput(vp);
4255 	vfs_rel(mp);
4256 	VFS_UNLOCK_GIANT(vfslocked);
4257 	if (error)
4258 		return (error);
4259 	error = copyout(&sb, uap->sb, sizeof(sb));
4260 	return (error);
4261 }
4262 
4263 /*
4264  * Implement fstatfs() for (NFS) file handles.
4265  *
4266  * MP SAFE
4267  */
4268 #ifndef _SYS_SYSPROTO_H_
4269 struct fhstatfs_args {
4270 	struct fhandle *u_fhp;
4271 	struct statfs *buf;
4272 };
4273 #endif
4274 int
4275 fhstatfs(td, uap)
4276 	struct thread *td;
4277 	struct fhstatfs_args /* {
4278 		struct fhandle *u_fhp;
4279 		struct statfs *buf;
4280 	} */ *uap;
4281 {
4282 	struct statfs sf;
4283 	fhandle_t fh;
4284 	int error;
4285 
4286 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4287 	if (error)
4288 		return (error);
4289 	error = kern_fhstatfs(td, fh, &sf);
4290 	if (error)
4291 		return (error);
4292 	return (copyout(&sf, uap->buf, sizeof(sf)));
4293 }
4294 
4295 int
4296 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4297 {
4298 	struct statfs *sp;
4299 	struct mount *mp;
4300 	struct vnode *vp;
4301 	int vfslocked;
4302 	int error;
4303 
4304 	error = suser(td);
4305 	if (error)
4306 		return (error);
4307 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4308 		return (ESTALE);
4309 	vfslocked = VFS_LOCK_GIANT(mp);
4310 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4311 	if (error) {
4312 		VFS_UNLOCK_GIANT(vfslocked);
4313 		vfs_rel(mp);
4314 		return (error);
4315 	}
4316 	vput(vp);
4317 	sp = NULL;
4318 	error = prison_canseemount(td->td_ucred, mp);
4319 	if (error)
4320 		goto out;
4321 #ifdef MAC
4322 	error = mac_check_mount_stat(td->td_ucred, mp);
4323 	if (error)
4324 		goto out;
4325 #endif
4326 	/*
4327 	 * Set these in case the underlying filesystem fails to do so.
4328 	 */
4329 	sp = &mp->mnt_stat;
4330 	sp->f_version = STATFS_VERSION;
4331 	sp->f_namemax = NAME_MAX;
4332 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4333 	error = VFS_STATFS(mp, sp, td);
4334 out:
4335 	vfs_rel(mp);
4336 	VFS_UNLOCK_GIANT(vfslocked);
4337 	if (sp)
4338 		*buf = *sp;
4339 	return (error);
4340 }
4341 
4342 /*
4343  * Syscall to push extended attribute configuration information into the
4344  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4345  * a command (int cmd), and attribute name and misc data.  For now, the
4346  * attribute name is left in userspace for consumption by the VFS_op.
4347  * It will probably be changed to be copied into sysspace by the
4348  * syscall in the future, once issues with various consumers of the
4349  * attribute code have raised their hands.
4350  *
4351  * Currently this is used only by UFS Extended Attributes.
4352  */
4353 int
4354 extattrctl(td, uap)
4355 	struct thread *td;
4356 	struct extattrctl_args /* {
4357 		const char *path;
4358 		int cmd;
4359 		const char *filename;
4360 		int attrnamespace;
4361 		const char *attrname;
4362 	} */ *uap;
4363 {
4364 	struct vnode *filename_vp;
4365 	struct nameidata nd;
4366 	struct mount *mp, *mp_writable;
4367 	char attrname[EXTATTR_MAXNAMELEN];
4368 	int vfslocked, fnvfslocked, error;
4369 
4370 	/*
4371 	 * uap->attrname is not always defined.  We check again later when we
4372 	 * invoke the VFS call so as to pass in NULL there if needed.
4373 	 */
4374 	if (uap->attrname != NULL) {
4375 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4376 		    NULL);
4377 		if (error)
4378 			return (error);
4379 	}
4380 
4381 	vfslocked = fnvfslocked = 0;
4382 	/*
4383 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4384 	 * which VFS_EXTATTRCTL() will later release.
4385 	 */
4386 	filename_vp = NULL;
4387 	if (uap->filename != NULL) {
4388 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4389 		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4390 		error = namei(&nd);
4391 		if (error)
4392 			return (error);
4393 		fnvfslocked = NDHASGIANT(&nd);
4394 		filename_vp = nd.ni_vp;
4395 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4396 	}
4397 
4398 	/* uap->path is always defined. */
4399 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4400 	    uap->path, td);
4401 	error = namei(&nd);
4402 	if (error) {
4403 		if (filename_vp != NULL)
4404 			vput(filename_vp);
4405 		goto out;
4406 	}
4407 	vfslocked = NDHASGIANT(&nd);
4408 	mp = nd.ni_vp->v_mount;
4409 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4410 	NDFREE(&nd, 0);
4411 	if (error) {
4412 		if (filename_vp != NULL)
4413 			vput(filename_vp);
4414 		goto out;
4415 	}
4416 
4417 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4418 	    uap->attrname != NULL ? attrname : NULL, td);
4419 
4420 	vn_finished_write(mp_writable);
4421 	/*
4422 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4423 	 * filename_vp, so vrele it if it is defined.
4424 	 */
4425 	if (filename_vp != NULL)
4426 		vrele(filename_vp);
4427 out:
4428 	VFS_UNLOCK_GIANT(fnvfslocked);
4429 	VFS_UNLOCK_GIANT(vfslocked);
4430 	return (error);
4431 }
4432 
4433 /*-
4434  * Set a named extended attribute on a file or directory
4435  *
4436  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4437  *            kernelspace string pointer "attrname", userspace buffer
4438  *            pointer "data", buffer length "nbytes", thread "td".
4439  * Returns: 0 on success, an error number otherwise
4440  * Locks: none
4441  * References: vp must be a valid reference for the duration of the call
4442  */
4443 static int
4444 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4445     void *data, size_t nbytes, struct thread *td)
4446 {
4447 	struct mount *mp;
4448 	struct uio auio;
4449 	struct iovec aiov;
4450 	ssize_t cnt;
4451 	int error;
4452 
4453 	VFS_ASSERT_GIANT(vp->v_mount);
4454 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4455 	if (error)
4456 		return (error);
4457 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4458 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4459 
4460 	aiov.iov_base = data;
4461 	aiov.iov_len = nbytes;
4462 	auio.uio_iov = &aiov;
4463 	auio.uio_iovcnt = 1;
4464 	auio.uio_offset = 0;
4465 	if (nbytes > INT_MAX) {
4466 		error = EINVAL;
4467 		goto done;
4468 	}
4469 	auio.uio_resid = nbytes;
4470 	auio.uio_rw = UIO_WRITE;
4471 	auio.uio_segflg = UIO_USERSPACE;
4472 	auio.uio_td = td;
4473 	cnt = nbytes;
4474 
4475 #ifdef MAC
4476 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4477 	    attrname, &auio);
4478 	if (error)
4479 		goto done;
4480 #endif
4481 
4482 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4483 	    td->td_ucred, td);
4484 	cnt -= auio.uio_resid;
4485 	td->td_retval[0] = cnt;
4486 
4487 done:
4488 	VOP_UNLOCK(vp, 0, td);
4489 	vn_finished_write(mp);
4490 	return (error);
4491 }
4492 
4493 int
4494 extattr_set_fd(td, uap)
4495 	struct thread *td;
4496 	struct extattr_set_fd_args /* {
4497 		int fd;
4498 		int attrnamespace;
4499 		const char *attrname;
4500 		void *data;
4501 		size_t nbytes;
4502 	} */ *uap;
4503 {
4504 	struct file *fp;
4505 	char attrname[EXTATTR_MAXNAMELEN];
4506 	int vfslocked, error;
4507 
4508 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4509 	if (error)
4510 		return (error);
4511 
4512 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4513 	if (error)
4514 		return (error);
4515 
4516 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4517 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4518 	    attrname, uap->data, uap->nbytes, td);
4519 	fdrop(fp, td);
4520 	VFS_UNLOCK_GIANT(vfslocked);
4521 
4522 	return (error);
4523 }
4524 
4525 int
4526 extattr_set_file(td, uap)
4527 	struct thread *td;
4528 	struct extattr_set_file_args /* {
4529 		const char *path;
4530 		int attrnamespace;
4531 		const char *attrname;
4532 		void *data;
4533 		size_t nbytes;
4534 	} */ *uap;
4535 {
4536 	struct nameidata nd;
4537 	char attrname[EXTATTR_MAXNAMELEN];
4538 	int vfslocked, error;
4539 
4540 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4541 	if (error)
4542 		return (error);
4543 
4544 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4545 	    uap->path, td);
4546 	error = namei(&nd);
4547 	if (error)
4548 		return (error);
4549 	NDFREE(&nd, NDF_ONLY_PNBUF);
4550 
4551 	vfslocked = NDHASGIANT(&nd);
4552 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4553 	    uap->data, uap->nbytes, td);
4554 
4555 	vrele(nd.ni_vp);
4556 	VFS_UNLOCK_GIANT(vfslocked);
4557 	return (error);
4558 }
4559 
4560 int
4561 extattr_set_link(td, uap)
4562 	struct thread *td;
4563 	struct extattr_set_link_args /* {
4564 		const char *path;
4565 		int attrnamespace;
4566 		const char *attrname;
4567 		void *data;
4568 		size_t nbytes;
4569 	} */ *uap;
4570 {
4571 	struct nameidata nd;
4572 	char attrname[EXTATTR_MAXNAMELEN];
4573 	int vfslocked, error;
4574 
4575 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4576 	if (error)
4577 		return (error);
4578 
4579 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4580 	    uap->path, td);
4581 	error = namei(&nd);
4582 	if (error)
4583 		return (error);
4584 	NDFREE(&nd, NDF_ONLY_PNBUF);
4585 
4586 	vfslocked = NDHASGIANT(&nd);
4587 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4588 	    uap->data, uap->nbytes, td);
4589 
4590 	vrele(nd.ni_vp);
4591 	VFS_UNLOCK_GIANT(vfslocked);
4592 	return (error);
4593 }
4594 
4595 /*-
4596  * Get a named extended attribute on a file or directory
4597  *
4598  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4599  *            kernelspace string pointer "attrname", userspace buffer
4600  *            pointer "data", buffer length "nbytes", thread "td".
4601  * Returns: 0 on success, an error number otherwise
4602  * Locks: none
4603  * References: vp must be a valid reference for the duration of the call
4604  */
4605 static int
4606 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4607     void *data, size_t nbytes, struct thread *td)
4608 {
4609 	struct uio auio, *auiop;
4610 	struct iovec aiov;
4611 	ssize_t cnt;
4612 	size_t size, *sizep;
4613 	int error;
4614 
4615 	VFS_ASSERT_GIANT(vp->v_mount);
4616 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4617 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4618 
4619 	/*
4620 	 * Slightly unusual semantics: if the user provides a NULL data
4621 	 * pointer, they don't want to receive the data, just the
4622 	 * maximum read length.
4623 	 */
4624 	auiop = NULL;
4625 	sizep = NULL;
4626 	cnt = 0;
4627 	if (data != NULL) {
4628 		aiov.iov_base = data;
4629 		aiov.iov_len = nbytes;
4630 		auio.uio_iov = &aiov;
4631 		auio.uio_iovcnt = 1;
4632 		auio.uio_offset = 0;
4633 		if (nbytes > INT_MAX) {
4634 			error = EINVAL;
4635 			goto done;
4636 		}
4637 		auio.uio_resid = nbytes;
4638 		auio.uio_rw = UIO_READ;
4639 		auio.uio_segflg = UIO_USERSPACE;
4640 		auio.uio_td = td;
4641 		auiop = &auio;
4642 		cnt = nbytes;
4643 	} else
4644 		sizep = &size;
4645 
4646 #ifdef MAC
4647 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4648 	    attrname, &auio);
4649 	if (error)
4650 		goto done;
4651 #endif
4652 
4653 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4654 	    td->td_ucred, td);
4655 
4656 	if (auiop != NULL) {
4657 		cnt -= auio.uio_resid;
4658 		td->td_retval[0] = cnt;
4659 	} else
4660 		td->td_retval[0] = size;
4661 
4662 done:
4663 	VOP_UNLOCK(vp, 0, td);
4664 	return (error);
4665 }
4666 
4667 int
4668 extattr_get_fd(td, uap)
4669 	struct thread *td;
4670 	struct extattr_get_fd_args /* {
4671 		int fd;
4672 		int attrnamespace;
4673 		const char *attrname;
4674 		void *data;
4675 		size_t nbytes;
4676 	} */ *uap;
4677 {
4678 	struct file *fp;
4679 	char attrname[EXTATTR_MAXNAMELEN];
4680 	int vfslocked, error;
4681 
4682 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4683 	if (error)
4684 		return (error);
4685 
4686 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4687 	if (error)
4688 		return (error);
4689 
4690 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4691 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4692 	    attrname, uap->data, uap->nbytes, td);
4693 
4694 	fdrop(fp, td);
4695 	VFS_UNLOCK_GIANT(vfslocked);
4696 	return (error);
4697 }
4698 
4699 int
4700 extattr_get_file(td, uap)
4701 	struct thread *td;
4702 	struct extattr_get_file_args /* {
4703 		const char *path;
4704 		int attrnamespace;
4705 		const char *attrname;
4706 		void *data;
4707 		size_t nbytes;
4708 	} */ *uap;
4709 {
4710 	struct nameidata nd;
4711 	char attrname[EXTATTR_MAXNAMELEN];
4712 	int vfslocked, error;
4713 
4714 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4715 	if (error)
4716 		return (error);
4717 
4718 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4719 	    uap->path, td);
4720 	error = namei(&nd);
4721 	if (error)
4722 		return (error);
4723 	NDFREE(&nd, NDF_ONLY_PNBUF);
4724 
4725 	vfslocked = NDHASGIANT(&nd);
4726 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4727 	    uap->data, uap->nbytes, td);
4728 
4729 	vrele(nd.ni_vp);
4730 	VFS_UNLOCK_GIANT(vfslocked);
4731 	return (error);
4732 }
4733 
4734 int
4735 extattr_get_link(td, uap)
4736 	struct thread *td;
4737 	struct extattr_get_link_args /* {
4738 		const char *path;
4739 		int attrnamespace;
4740 		const char *attrname;
4741 		void *data;
4742 		size_t nbytes;
4743 	} */ *uap;
4744 {
4745 	struct nameidata nd;
4746 	char attrname[EXTATTR_MAXNAMELEN];
4747 	int vfslocked, error;
4748 
4749 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4750 	if (error)
4751 		return (error);
4752 
4753 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4754 	    uap->path, td);
4755 	error = namei(&nd);
4756 	if (error)
4757 		return (error);
4758 	NDFREE(&nd, NDF_ONLY_PNBUF);
4759 
4760 	vfslocked = NDHASGIANT(&nd);
4761 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4762 	    uap->data, uap->nbytes, td);
4763 
4764 	vrele(nd.ni_vp);
4765 	VFS_UNLOCK_GIANT(vfslocked);
4766 	return (error);
4767 }
4768 
4769 /*
4770  * extattr_delete_vp(): Delete a named extended attribute on a file or
4771  *                      directory
4772  *
4773  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4774  *            kernelspace string pointer "attrname", proc "p"
4775  * Returns: 0 on success, an error number otherwise
4776  * Locks: none
4777  * References: vp must be a valid reference for the duration of the call
4778  */
4779 static int
4780 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4781     struct thread *td)
4782 {
4783 	struct mount *mp;
4784 	int error;
4785 
4786 	VFS_ASSERT_GIANT(vp->v_mount);
4787 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4788 	if (error)
4789 		return (error);
4790 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4791 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4792 
4793 #ifdef MAC
4794 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4795 	    attrname);
4796 	if (error)
4797 		goto done;
4798 #endif
4799 
4800 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4801 	    td);
4802 	if (error == EOPNOTSUPP)
4803 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4804 		    td->td_ucred, td);
4805 #ifdef MAC
4806 done:
4807 #endif
4808 	VOP_UNLOCK(vp, 0, td);
4809 	vn_finished_write(mp);
4810 	return (error);
4811 }
4812 
4813 int
4814 extattr_delete_fd(td, uap)
4815 	struct thread *td;
4816 	struct extattr_delete_fd_args /* {
4817 		int fd;
4818 		int attrnamespace;
4819 		const char *attrname;
4820 	} */ *uap;
4821 {
4822 	struct file *fp;
4823 	char attrname[EXTATTR_MAXNAMELEN];
4824 	int vfslocked, error;
4825 
4826 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4827 	if (error)
4828 		return (error);
4829 
4830 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4831 	if (error)
4832 		return (error);
4833 
4834 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4835 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4836 	    attrname, td);
4837 	fdrop(fp, td);
4838 	VFS_UNLOCK_GIANT(vfslocked);
4839 	return (error);
4840 }
4841 
4842 int
4843 extattr_delete_file(td, uap)
4844 	struct thread *td;
4845 	struct extattr_delete_file_args /* {
4846 		const char *path;
4847 		int attrnamespace;
4848 		const char *attrname;
4849 	} */ *uap;
4850 {
4851 	struct nameidata nd;
4852 	char attrname[EXTATTR_MAXNAMELEN];
4853 	int vfslocked, error;
4854 
4855 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4856 	if (error)
4857 		return(error);
4858 
4859 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4860 	    uap->path, td);
4861 	error = namei(&nd);
4862 	if (error)
4863 		return(error);
4864 	NDFREE(&nd, NDF_ONLY_PNBUF);
4865 
4866 	vfslocked = NDHASGIANT(&nd);
4867 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4868 	vrele(nd.ni_vp);
4869 	VFS_UNLOCK_GIANT(vfslocked);
4870 	return(error);
4871 }
4872 
4873 int
4874 extattr_delete_link(td, uap)
4875 	struct thread *td;
4876 	struct extattr_delete_link_args /* {
4877 		const char *path;
4878 		int attrnamespace;
4879 		const char *attrname;
4880 	} */ *uap;
4881 {
4882 	struct nameidata nd;
4883 	char attrname[EXTATTR_MAXNAMELEN];
4884 	int vfslocked, error;
4885 
4886 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4887 	if (error)
4888 		return(error);
4889 
4890 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4891 	    uap->path, td);
4892 	error = namei(&nd);
4893 	if (error)
4894 		return(error);
4895 	NDFREE(&nd, NDF_ONLY_PNBUF);
4896 
4897 	vfslocked = NDHASGIANT(&nd);
4898 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4899 	vrele(nd.ni_vp);
4900 	VFS_UNLOCK_GIANT(vfslocked);
4901 	return(error);
4902 }
4903 
4904 /*-
4905  * Retrieve a list of extended attributes on a file or directory.
4906  *
4907  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4908  *            userspace buffer pointer "data", buffer length "nbytes",
4909  *            thread "td".
4910  * Returns: 0 on success, an error number otherwise
4911  * Locks: none
4912  * References: vp must be a valid reference for the duration of the call
4913  */
4914 static int
4915 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4916     size_t nbytes, struct thread *td)
4917 {
4918 	struct uio auio, *auiop;
4919 	size_t size, *sizep;
4920 	struct iovec aiov;
4921 	ssize_t cnt;
4922 	int error;
4923 
4924 	VFS_ASSERT_GIANT(vp->v_mount);
4925 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4926 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4927 
4928 	auiop = NULL;
4929 	sizep = NULL;
4930 	cnt = 0;
4931 	if (data != NULL) {
4932 		aiov.iov_base = data;
4933 		aiov.iov_len = nbytes;
4934 		auio.uio_iov = &aiov;
4935 		auio.uio_iovcnt = 1;
4936 		auio.uio_offset = 0;
4937 		if (nbytes > INT_MAX) {
4938 			error = EINVAL;
4939 			goto done;
4940 		}
4941 		auio.uio_resid = nbytes;
4942 		auio.uio_rw = UIO_READ;
4943 		auio.uio_segflg = UIO_USERSPACE;
4944 		auio.uio_td = td;
4945 		auiop = &auio;
4946 		cnt = nbytes;
4947 	} else
4948 		sizep = &size;
4949 
4950 #ifdef MAC
4951 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4952 	if (error)
4953 		goto done;
4954 #endif
4955 
4956 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4957 	    td->td_ucred, td);
4958 
4959 	if (auiop != NULL) {
4960 		cnt -= auio.uio_resid;
4961 		td->td_retval[0] = cnt;
4962 	} else
4963 		td->td_retval[0] = size;
4964 
4965 done:
4966 	VOP_UNLOCK(vp, 0, td);
4967 	return (error);
4968 }
4969 
4970 
4971 int
4972 extattr_list_fd(td, uap)
4973 	struct thread *td;
4974 	struct extattr_list_fd_args /* {
4975 		int fd;
4976 		int attrnamespace;
4977 		void *data;
4978 		size_t nbytes;
4979 	} */ *uap;
4980 {
4981 	struct file *fp;
4982 	int vfslocked, error;
4983 
4984 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4985 	if (error)
4986 		return (error);
4987 
4988 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4989 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4990 	    uap->nbytes, td);
4991 
4992 	fdrop(fp, td);
4993 	VFS_UNLOCK_GIANT(vfslocked);
4994 	return (error);
4995 }
4996 
4997 int
4998 extattr_list_file(td, uap)
4999 	struct thread*td;
5000 	struct extattr_list_file_args /* {
5001 		const char *path;
5002 		int attrnamespace;
5003 		void *data;
5004 		size_t nbytes;
5005 	} */ *uap;
5006 {
5007 	struct nameidata nd;
5008 	int vfslocked, error;
5009 
5010 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5011 	    uap->path, td);
5012 	error = namei(&nd);
5013 	if (error)
5014 		return (error);
5015 	NDFREE(&nd, NDF_ONLY_PNBUF);
5016 
5017 	vfslocked = NDHASGIANT(&nd);
5018 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5019 	    uap->nbytes, td);
5020 
5021 	vrele(nd.ni_vp);
5022 	VFS_UNLOCK_GIANT(vfslocked);
5023 	return (error);
5024 }
5025 
5026 int
5027 extattr_list_link(td, uap)
5028 	struct thread*td;
5029 	struct extattr_list_link_args /* {
5030 		const char *path;
5031 		int attrnamespace;
5032 		void *data;
5033 		size_t nbytes;
5034 	} */ *uap;
5035 {
5036 	struct nameidata nd;
5037 	int vfslocked, error;
5038 
5039 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5040 	    uap->path, td);
5041 	error = namei(&nd);
5042 	if (error)
5043 		return (error);
5044 	NDFREE(&nd, NDF_ONLY_PNBUF);
5045 
5046 	vfslocked = NDHASGIANT(&nd);
5047 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5048 	    uap->nbytes, td);
5049 
5050 	vrele(nd.ni_vp);
5051 	VFS_UNLOCK_GIANT(vfslocked);
5052 	return (error);
5053 }
5054