xref: /freebsd/sys/kern/vfs_syscalls.c (revision 3fe92528afe8313fecf48822dde74bad5e380f48)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
91     size_t nbytes, struct thread *td);
92 
93 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108 	int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int vfslocked;
125 	int asyncflag;
126 
127 	mtx_lock(&mountlist_mtx);
128 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
129 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
130 			nmp = TAILQ_NEXT(mp, mnt_list);
131 			continue;
132 		}
133 		vfslocked = VFS_LOCK_GIANT(mp);
134 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
135 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
136 			asyncflag = mp->mnt_flag & MNT_ASYNC;
137 			mp->mnt_flag &= ~MNT_ASYNC;
138 			vfs_msync(mp, MNT_NOWAIT);
139 			VFS_SYNC(mp, MNT_NOWAIT, td);
140 			mp->mnt_flag |= asyncflag;
141 			vn_finished_write(mp);
142 		}
143 		VFS_UNLOCK_GIANT(vfslocked);
144 		mtx_lock(&mountlist_mtx);
145 		nmp = TAILQ_NEXT(mp, mnt_list);
146 		vfs_unbusy(mp, td);
147 	}
148 	mtx_unlock(&mountlist_mtx);
149 	return (0);
150 }
151 
152 /* XXX PRISON: could be per prison flag */
153 static int prison_quotas;
154 #if 0
155 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
156 #endif
157 
158 /*
159  * Change filesystem quotas.
160  *
161  * MP SAFE
162  */
163 #ifndef _SYS_SYSPROTO_H_
164 struct quotactl_args {
165 	char *path;
166 	int cmd;
167 	int uid;
168 	caddr_t arg;
169 };
170 #endif
171 int
172 quotactl(td, uap)
173 	struct thread *td;
174 	register struct quotactl_args /* {
175 		char *path;
176 		int cmd;
177 		int uid;
178 		caddr_t arg;
179 	} */ *uap;
180 {
181 	struct mount *mp, *vmp;
182 	int vfslocked;
183 	int error;
184 	struct nameidata nd;
185 
186 	AUDIT_ARG(cmd, uap->cmd);
187 	AUDIT_ARG(uid, uap->uid);
188 	if (jailed(td->td_ucred) && !prison_quotas)
189 		return (EPERM);
190 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
191 	   UIO_USERSPACE, uap->path, td);
192 	if ((error = namei(&nd)) != 0)
193 		return (error);
194 	vfslocked = NDHASGIANT(&nd);
195 	NDFREE(&nd, NDF_ONLY_PNBUF);
196 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
197 	mp = nd.ni_vp->v_mount;
198 	vrele(nd.ni_vp);
199 	if (error)
200 		goto out;
201 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
202 	vn_finished_write(vmp);
203 out:
204 	VFS_UNLOCK_GIANT(vfslocked);
205 	return (error);
206 }
207 
208 /*
209  * Get filesystem statistics.
210  */
211 #ifndef _SYS_SYSPROTO_H_
212 struct statfs_args {
213 	char *path;
214 	struct statfs *buf;
215 };
216 #endif
217 int
218 statfs(td, uap)
219 	struct thread *td;
220 	register struct statfs_args /* {
221 		char *path;
222 		struct statfs *buf;
223 	} */ *uap;
224 {
225 	struct statfs sf;
226 	int error;
227 
228 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
229 	if (error == 0)
230 		error = copyout(&sf, uap->buf, sizeof(sf));
231 	return (error);
232 }
233 
234 int
235 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
236     struct statfs *buf)
237 {
238 	struct mount *mp;
239 	struct statfs *sp, sb;
240 	int vfslocked;
241 	int error;
242 	struct nameidata nd;
243 
244 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
245 	    pathseg, path, td);
246 	error = namei(&nd);
247 	if (error)
248 		return (error);
249 	vfslocked = NDHASGIANT(&nd);
250 	mp = nd.ni_vp->v_mount;
251 	vfs_ref(mp);
252 	NDFREE(&nd, NDF_ONLY_PNBUF);
253 	vput(nd.ni_vp);
254 #ifdef MAC
255 	error = mac_check_mount_stat(td->td_ucred, mp);
256 	if (error)
257 		goto out;
258 #endif
259 	/*
260 	 * Set these in case the underlying filesystem fails to do so.
261 	 */
262 	sp = &mp->mnt_stat;
263 	sp->f_version = STATFS_VERSION;
264 	sp->f_namemax = NAME_MAX;
265 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
266 	error = VFS_STATFS(mp, sp, td);
267 	if (error)
268 		goto out;
269 	if (suser(td)) {
270 		bcopy(sp, &sb, sizeof(sb));
271 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
272 		prison_enforce_statfs(td->td_ucred, mp, &sb);
273 		sp = &sb;
274 	}
275 	*buf = *sp;
276 out:
277 	vfs_rel(mp);
278 	VFS_UNLOCK_GIANT(vfslocked);
279 	if (mtx_owned(&Giant))
280 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
281 	return (error);
282 }
283 
284 /*
285  * Get filesystem statistics.
286  */
287 #ifndef _SYS_SYSPROTO_H_
288 struct fstatfs_args {
289 	int fd;
290 	struct statfs *buf;
291 };
292 #endif
293 int
294 fstatfs(td, uap)
295 	struct thread *td;
296 	register struct fstatfs_args /* {
297 		int fd;
298 		struct statfs *buf;
299 	} */ *uap;
300 {
301 	struct statfs sf;
302 	int error;
303 
304 	error = kern_fstatfs(td, uap->fd, &sf);
305 	if (error == 0)
306 		error = copyout(&sf, uap->buf, sizeof(sf));
307 	return (error);
308 }
309 
310 int
311 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
312 {
313 	struct file *fp;
314 	struct mount *mp;
315 	struct statfs *sp, sb;
316 	int vfslocked;
317 	struct vnode *vp;
318 	int error;
319 
320 	AUDIT_ARG(fd, fd);
321 	error = getvnode(td->td_proc->p_fd, fd, &fp);
322 	if (error)
323 		return (error);
324 	vp = fp->f_vnode;
325 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
326 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
327 #ifdef AUDIT
328 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
329 #endif
330 	mp = vp->v_mount;
331 	if (mp)
332 		vfs_ref(mp);
333 	VOP_UNLOCK(vp, 0, td);
334 	fdrop(fp, td);
335 	if (vp->v_iflag & VI_DOOMED) {
336 		error = EBADF;
337 		goto out;
338 	}
339 #ifdef MAC
340 	error = mac_check_mount_stat(td->td_ucred, mp);
341 	if (error)
342 		goto out;
343 #endif
344 	/*
345 	 * Set these in case the underlying filesystem fails to do so.
346 	 */
347 	sp = &mp->mnt_stat;
348 	sp->f_version = STATFS_VERSION;
349 	sp->f_namemax = NAME_MAX;
350 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
351 	error = VFS_STATFS(mp, sp, td);
352 	if (error)
353 		goto out;
354 	if (suser(td)) {
355 		bcopy(sp, &sb, sizeof(sb));
356 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
357 		prison_enforce_statfs(td->td_ucred, mp, &sb);
358 		sp = &sb;
359 	}
360 	*buf = *sp;
361 out:
362 	if (mp)
363 		vfs_rel(mp);
364 	VFS_UNLOCK_GIANT(vfslocked);
365 	return (error);
366 }
367 
368 /*
369  * Get statistics on all filesystems.
370  */
371 #ifndef _SYS_SYSPROTO_H_
372 struct getfsstat_args {
373 	struct statfs *buf;
374 	long bufsize;
375 	int flags;
376 };
377 #endif
378 int
379 getfsstat(td, uap)
380 	struct thread *td;
381 	register struct getfsstat_args /* {
382 		struct statfs *buf;
383 		long bufsize;
384 		int flags;
385 	} */ *uap;
386 {
387 
388 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
389 	    uap->flags));
390 }
391 
392 /*
393  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
394  * 	The caller is responsible for freeing memory which will be allocated
395  *	in '*buf'.
396  */
397 int
398 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
399     enum uio_seg bufseg, int flags)
400 {
401 	struct mount *mp, *nmp;
402 	struct statfs *sfsp, *sp, sb;
403 	size_t count, maxcount;
404 	int vfslocked;
405 	int error;
406 
407 	maxcount = bufsize / sizeof(struct statfs);
408 	if (bufsize == 0)
409 		sfsp = NULL;
410 	else if (bufseg == UIO_USERSPACE)
411 		sfsp = *buf;
412 	else /* if (bufseg == UIO_SYSSPACE) */ {
413 		count = 0;
414 		mtx_lock(&mountlist_mtx);
415 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
416 			count++;
417 		}
418 		mtx_unlock(&mountlist_mtx);
419 		if (maxcount > count)
420 			maxcount = count;
421 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
422 		    M_WAITOK);
423 	}
424 	count = 0;
425 	mtx_lock(&mountlist_mtx);
426 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
427 		if (prison_canseemount(td->td_ucred, mp) != 0) {
428 			nmp = TAILQ_NEXT(mp, mnt_list);
429 			continue;
430 		}
431 #ifdef MAC
432 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
433 			nmp = TAILQ_NEXT(mp, mnt_list);
434 			continue;
435 		}
436 #endif
437 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
438 			nmp = TAILQ_NEXT(mp, mnt_list);
439 			continue;
440 		}
441 		vfslocked = VFS_LOCK_GIANT(mp);
442 		if (sfsp && count < maxcount) {
443 			sp = &mp->mnt_stat;
444 			/*
445 			 * Set these in case the underlying filesystem
446 			 * fails to do so.
447 			 */
448 			sp->f_version = STATFS_VERSION;
449 			sp->f_namemax = NAME_MAX;
450 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
451 			/*
452 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
453 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
454 			 * overrides MNT_WAIT.
455 			 */
456 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
457 			    (flags & MNT_WAIT)) &&
458 			    (error = VFS_STATFS(mp, sp, td))) {
459 				VFS_UNLOCK_GIANT(vfslocked);
460 				mtx_lock(&mountlist_mtx);
461 				nmp = TAILQ_NEXT(mp, mnt_list);
462 				vfs_unbusy(mp, td);
463 				continue;
464 			}
465 			if (suser(td)) {
466 				bcopy(sp, &sb, sizeof(sb));
467 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
468 				prison_enforce_statfs(td->td_ucred, mp, &sb);
469 				sp = &sb;
470 			}
471 			if (bufseg == UIO_SYSSPACE)
472 				bcopy(sp, sfsp, sizeof(*sp));
473 			else /* if (bufseg == UIO_USERSPACE) */ {
474 				error = copyout(sp, sfsp, sizeof(*sp));
475 				if (error) {
476 					vfs_unbusy(mp, td);
477 					VFS_UNLOCK_GIANT(vfslocked);
478 					return (error);
479 				}
480 			}
481 			sfsp++;
482 		}
483 		VFS_UNLOCK_GIANT(vfslocked);
484 		count++;
485 		mtx_lock(&mountlist_mtx);
486 		nmp = TAILQ_NEXT(mp, mnt_list);
487 		vfs_unbusy(mp, td);
488 	}
489 	mtx_unlock(&mountlist_mtx);
490 	if (sfsp && count > maxcount)
491 		td->td_retval[0] = maxcount;
492 	else
493 		td->td_retval[0] = count;
494 	return (0);
495 }
496 
497 #ifdef COMPAT_FREEBSD4
498 /*
499  * Get old format filesystem statistics.
500  */
501 static void cvtstatfs(struct statfs *, struct ostatfs *);
502 
503 #ifndef _SYS_SYSPROTO_H_
504 struct freebsd4_statfs_args {
505 	char *path;
506 	struct ostatfs *buf;
507 };
508 #endif
509 int
510 freebsd4_statfs(td, uap)
511 	struct thread *td;
512 	struct freebsd4_statfs_args /* {
513 		char *path;
514 		struct ostatfs *buf;
515 	} */ *uap;
516 {
517 	struct ostatfs osb;
518 	struct statfs sf;
519 	int error;
520 
521 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
522 	if (error)
523 		return (error);
524 	cvtstatfs(&sf, &osb);
525 	return (copyout(&osb, uap->buf, sizeof(osb)));
526 }
527 
528 /*
529  * Get filesystem statistics.
530  */
531 #ifndef _SYS_SYSPROTO_H_
532 struct freebsd4_fstatfs_args {
533 	int fd;
534 	struct ostatfs *buf;
535 };
536 #endif
537 int
538 freebsd4_fstatfs(td, uap)
539 	struct thread *td;
540 	struct freebsd4_fstatfs_args /* {
541 		int fd;
542 		struct ostatfs *buf;
543 	} */ *uap;
544 {
545 	struct ostatfs osb;
546 	struct statfs sf;
547 	int error;
548 
549 	error = kern_fstatfs(td, uap->fd, &sf);
550 	if (error)
551 		return (error);
552 	cvtstatfs(&sf, &osb);
553 	return (copyout(&osb, uap->buf, sizeof(osb)));
554 }
555 
556 /*
557  * Get statistics on all filesystems.
558  */
559 #ifndef _SYS_SYSPROTO_H_
560 struct freebsd4_getfsstat_args {
561 	struct ostatfs *buf;
562 	long bufsize;
563 	int flags;
564 };
565 #endif
566 int
567 freebsd4_getfsstat(td, uap)
568 	struct thread *td;
569 	register struct freebsd4_getfsstat_args /* {
570 		struct ostatfs *buf;
571 		long bufsize;
572 		int flags;
573 	} */ *uap;
574 {
575 	struct statfs *buf, *sp;
576 	struct ostatfs osb;
577 	size_t count, size;
578 	int error;
579 
580 	count = uap->bufsize / sizeof(struct ostatfs);
581 	size = count * sizeof(struct statfs);
582 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
583 	if (size > 0) {
584 		count = td->td_retval[0];
585 		sp = buf;
586 		while (count > 0 && error == 0) {
587 			cvtstatfs(sp, &osb);
588 			error = copyout(&osb, uap->buf, sizeof(osb));
589 			sp++;
590 			uap->buf++;
591 			count--;
592 		}
593 		free(buf, M_TEMP);
594 	}
595 	return (error);
596 }
597 
598 /*
599  * Implement fstatfs() for (NFS) file handles.
600  */
601 #ifndef _SYS_SYSPROTO_H_
602 struct freebsd4_fhstatfs_args {
603 	struct fhandle *u_fhp;
604 	struct ostatfs *buf;
605 };
606 #endif
607 int
608 freebsd4_fhstatfs(td, uap)
609 	struct thread *td;
610 	struct freebsd4_fhstatfs_args /* {
611 		struct fhandle *u_fhp;
612 		struct ostatfs *buf;
613 	} */ *uap;
614 {
615 	struct ostatfs osb;
616 	struct statfs sf;
617 	fhandle_t fh;
618 	int error;
619 
620 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
621 	if (error)
622 		return (error);
623 	error = kern_fhstatfs(td, fh, &sf);
624 	if (error)
625 		return (error);
626 	cvtstatfs(&sf, &osb);
627 	return (copyout(&osb, uap->buf, sizeof(osb)));
628 }
629 
630 /*
631  * Convert a new format statfs structure to an old format statfs structure.
632  */
633 static void
634 cvtstatfs(nsp, osp)
635 	struct statfs *nsp;
636 	struct ostatfs *osp;
637 {
638 
639 	bzero(osp, sizeof(*osp));
640 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
641 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
642 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
643 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
644 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
645 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
646 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
647 	osp->f_owner = nsp->f_owner;
648 	osp->f_type = nsp->f_type;
649 	osp->f_flags = nsp->f_flags;
650 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
651 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
652 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
653 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
654 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
655 	    MIN(MFSNAMELEN, OMFSNAMELEN));
656 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
657 	    MIN(MNAMELEN, OMNAMELEN));
658 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
659 	    MIN(MNAMELEN, OMNAMELEN));
660 	osp->f_fsid = nsp->f_fsid;
661 }
662 #endif /* COMPAT_FREEBSD4 */
663 
664 /*
665  * Change current working directory to a given file descriptor.
666  */
667 #ifndef _SYS_SYSPROTO_H_
668 struct fchdir_args {
669 	int	fd;
670 };
671 #endif
672 int
673 fchdir(td, uap)
674 	struct thread *td;
675 	struct fchdir_args /* {
676 		int fd;
677 	} */ *uap;
678 {
679 	register struct filedesc *fdp = td->td_proc->p_fd;
680 	struct vnode *vp, *tdp, *vpold;
681 	struct mount *mp;
682 	struct file *fp;
683 	int vfslocked;
684 	int error;
685 
686 	AUDIT_ARG(fd, uap->fd);
687 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
688 		return (error);
689 	vp = fp->f_vnode;
690 	VREF(vp);
691 	fdrop(fp, td);
692 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
693 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
694 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
695 	error = change_dir(vp, td);
696 	while (!error && (mp = vp->v_mountedhere) != NULL) {
697 		int tvfslocked;
698 		if (vfs_busy(mp, 0, 0, td))
699 			continue;
700 		tvfslocked = VFS_LOCK_GIANT(mp);
701 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
702 		vfs_unbusy(mp, td);
703 		if (error) {
704 			VFS_UNLOCK_GIANT(tvfslocked);
705 			break;
706 		}
707 		vput(vp);
708 		VFS_UNLOCK_GIANT(vfslocked);
709 		vp = tdp;
710 		vfslocked = tvfslocked;
711 	}
712 	if (error) {
713 		vput(vp);
714 		VFS_UNLOCK_GIANT(vfslocked);
715 		return (error);
716 	}
717 	VOP_UNLOCK(vp, 0, td);
718 	VFS_UNLOCK_GIANT(vfslocked);
719 	FILEDESC_LOCK_FAST(fdp);
720 	vpold = fdp->fd_cdir;
721 	fdp->fd_cdir = vp;
722 	FILEDESC_UNLOCK_FAST(fdp);
723 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
724 	vrele(vpold);
725 	VFS_UNLOCK_GIANT(vfslocked);
726 	return (0);
727 }
728 
729 /*
730  * Change current working directory (``.'').
731  */
732 #ifndef _SYS_SYSPROTO_H_
733 struct chdir_args {
734 	char	*path;
735 };
736 #endif
737 int
738 chdir(td, uap)
739 	struct thread *td;
740 	struct chdir_args /* {
741 		char *path;
742 	} */ *uap;
743 {
744 
745 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
746 }
747 
748 int
749 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
750 {
751 	register struct filedesc *fdp = td->td_proc->p_fd;
752 	int error;
753 	struct nameidata nd;
754 	struct vnode *vp;
755 	int vfslocked;
756 
757 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
758 	    pathseg, path, td);
759 	if ((error = namei(&nd)) != 0)
760 		return (error);
761 	vfslocked = NDHASGIANT(&nd);
762 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
763 		vput(nd.ni_vp);
764 		VFS_UNLOCK_GIANT(vfslocked);
765 		NDFREE(&nd, NDF_ONLY_PNBUF);
766 		return (error);
767 	}
768 	VOP_UNLOCK(nd.ni_vp, 0, td);
769 	VFS_UNLOCK_GIANT(vfslocked);
770 	NDFREE(&nd, NDF_ONLY_PNBUF);
771 	FILEDESC_LOCK_FAST(fdp);
772 	vp = fdp->fd_cdir;
773 	fdp->fd_cdir = nd.ni_vp;
774 	FILEDESC_UNLOCK_FAST(fdp);
775 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
776 	vrele(vp);
777 	VFS_UNLOCK_GIANT(vfslocked);
778 	return (0);
779 }
780 
781 /*
782  * Helper function for raised chroot(2) security function:  Refuse if
783  * any filedescriptors are open directories.
784  */
785 static int
786 chroot_refuse_vdir_fds(fdp)
787 	struct filedesc *fdp;
788 {
789 	struct vnode *vp;
790 	struct file *fp;
791 	int fd;
792 
793 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
794 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
795 		fp = fget_locked(fdp, fd);
796 		if (fp == NULL)
797 			continue;
798 		if (fp->f_type == DTYPE_VNODE) {
799 			vp = fp->f_vnode;
800 			if (vp->v_type == VDIR)
801 				return (EPERM);
802 		}
803 	}
804 	return (0);
805 }
806 
807 /*
808  * This sysctl determines if we will allow a process to chroot(2) if it
809  * has a directory open:
810  *	0: disallowed for all processes.
811  *	1: allowed for processes that were not already chroot(2)'ed.
812  *	2: allowed for all processes.
813  */
814 
815 static int chroot_allow_open_directories = 1;
816 
817 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
818      &chroot_allow_open_directories, 0, "");
819 
820 /*
821  * Change notion of root (``/'') directory.
822  */
823 #ifndef _SYS_SYSPROTO_H_
824 struct chroot_args {
825 	char	*path;
826 };
827 #endif
828 int
829 chroot(td, uap)
830 	struct thread *td;
831 	struct chroot_args /* {
832 		char *path;
833 	} */ *uap;
834 {
835 	int error;
836 	struct nameidata nd;
837 	int vfslocked;
838 
839 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
840 	if (error)
841 		return (error);
842 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
843 	    UIO_USERSPACE, uap->path, td);
844 	error = namei(&nd);
845 	if (error)
846 		goto error;
847 	vfslocked = NDHASGIANT(&nd);
848 	if ((error = change_dir(nd.ni_vp, td)) != 0)
849 		goto e_vunlock;
850 #ifdef MAC
851 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
852 		goto e_vunlock;
853 #endif
854 	VOP_UNLOCK(nd.ni_vp, 0, td);
855 	error = change_root(nd.ni_vp, td);
856 	vrele(nd.ni_vp);
857 	VFS_UNLOCK_GIANT(vfslocked);
858 	NDFREE(&nd, NDF_ONLY_PNBUF);
859 	return (error);
860 e_vunlock:
861 	vput(nd.ni_vp);
862 	VFS_UNLOCK_GIANT(vfslocked);
863 error:
864 	NDFREE(&nd, NDF_ONLY_PNBUF);
865 	return (error);
866 }
867 
868 /*
869  * Common routine for chroot and chdir.  Callers must provide a locked vnode
870  * instance.
871  */
872 int
873 change_dir(vp, td)
874 	struct vnode *vp;
875 	struct thread *td;
876 {
877 	int error;
878 
879 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
880 	if (vp->v_type != VDIR)
881 		return (ENOTDIR);
882 #ifdef MAC
883 	error = mac_check_vnode_chdir(td->td_ucred, vp);
884 	if (error)
885 		return (error);
886 #endif
887 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
888 	return (error);
889 }
890 
891 /*
892  * Common routine for kern_chroot() and jail_attach().  The caller is
893  * responsible for invoking suser() and mac_check_chroot() to authorize this
894  * operation.
895  */
896 int
897 change_root(vp, td)
898 	struct vnode *vp;
899 	struct thread *td;
900 {
901 	struct filedesc *fdp;
902 	struct vnode *oldvp;
903 	int vfslocked;
904 	int error;
905 
906 	VFS_ASSERT_GIANT(vp->v_mount);
907 	fdp = td->td_proc->p_fd;
908 	FILEDESC_LOCK(fdp);
909 	if (chroot_allow_open_directories == 0 ||
910 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
911 		error = chroot_refuse_vdir_fds(fdp);
912 		if (error) {
913 			FILEDESC_UNLOCK(fdp);
914 			return (error);
915 		}
916 	}
917 	oldvp = fdp->fd_rdir;
918 	fdp->fd_rdir = vp;
919 	VREF(fdp->fd_rdir);
920 	if (!fdp->fd_jdir) {
921 		fdp->fd_jdir = vp;
922 		VREF(fdp->fd_jdir);
923 	}
924 	FILEDESC_UNLOCK(fdp);
925 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
926 	vrele(oldvp);
927 	VFS_UNLOCK_GIANT(vfslocked);
928 	return (0);
929 }
930 
931 /*
932  * Check permissions, allocate an open file structure,
933  * and call the device open routine if any.
934  *
935  * MP SAFE
936  */
937 #ifndef _SYS_SYSPROTO_H_
938 struct open_args {
939 	char	*path;
940 	int	flags;
941 	int	mode;
942 };
943 #endif
944 int
945 open(td, uap)
946 	struct thread *td;
947 	register struct open_args /* {
948 		char *path;
949 		int flags;
950 		int mode;
951 	} */ *uap;
952 {
953 
954 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
955 }
956 
957 int
958 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
959     int mode)
960 {
961 	struct proc *p = td->td_proc;
962 	struct filedesc *fdp = p->p_fd;
963 	struct file *fp;
964 	struct vnode *vp;
965 	struct vattr vat;
966 	struct mount *mp;
967 	int cmode;
968 	struct file *nfp;
969 	int type, indx, error;
970 	struct flock lf;
971 	struct nameidata nd;
972 	int vfslocked;
973 
974 	AUDIT_ARG(fflags, flags);
975 	AUDIT_ARG(mode, mode);
976 	if ((flags & O_ACCMODE) == O_ACCMODE)
977 		return (EINVAL);
978 	flags = FFLAGS(flags);
979 	error = falloc(td, &nfp, &indx);
980 	if (error)
981 		return (error);
982 	/* An extra reference on `nfp' has been held for us by falloc(). */
983 	fp = nfp;
984 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
985 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
986 	td->td_dupfd = -1;		/* XXX check for fdopen */
987 	error = vn_open(&nd, &flags, cmode, indx);
988 	if (error) {
989 		/*
990 		 * If the vn_open replaced the method vector, something
991 		 * wonderous happened deep below and we just pass it up
992 		 * pretending we know what we do.
993 		 */
994 		if (error == ENXIO && fp->f_ops != &badfileops) {
995 			fdrop(fp, td);
996 			td->td_retval[0] = indx;
997 			return (0);
998 		}
999 
1000 		/*
1001 		 * release our own reference
1002 		 */
1003 		fdrop(fp, td);
1004 
1005 		/*
1006 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1007 		 * responsible for dropping the old contents of ofiles[indx]
1008 		 * if it succeeds.
1009 		 */
1010 		if ((error == ENODEV || error == ENXIO) &&
1011 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1012 		    (error =
1013 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1014 			td->td_retval[0] = indx;
1015 			return (0);
1016 		}
1017 		/*
1018 		 * Clean up the descriptor, but only if another thread hadn't
1019 		 * replaced or closed it.
1020 		 */
1021 		fdclose(fdp, fp, indx, td);
1022 
1023 		if (error == ERESTART)
1024 			error = EINTR;
1025 		return (error);
1026 	}
1027 	td->td_dupfd = 0;
1028 	vfslocked = NDHASGIANT(&nd);
1029 	NDFREE(&nd, NDF_ONLY_PNBUF);
1030 	vp = nd.ni_vp;
1031 
1032 	/*
1033 	 * There should be 2 references on the file, one from the descriptor
1034 	 * table, and one for us.
1035 	 *
1036 	 * Handle the case where someone closed the file (via its file
1037 	 * descriptor) while we were blocked.  The end result should look
1038 	 * like opening the file succeeded but it was immediately closed.
1039 	 * We call vn_close() manually because we haven't yet hooked up
1040 	 * the various 'struct file' fields.
1041 	 */
1042 	FILEDESC_LOCK(fdp);
1043 	FILE_LOCK(fp);
1044 	if (fp->f_count == 1) {
1045 		mp = vp->v_mount;
1046 		KASSERT(fdp->fd_ofiles[indx] != fp,
1047 		    ("Open file descriptor lost all refs"));
1048 		FILE_UNLOCK(fp);
1049 		FILEDESC_UNLOCK(fdp);
1050 		VOP_UNLOCK(vp, 0, td);
1051 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1052 		VFS_UNLOCK_GIANT(vfslocked);
1053 		fdrop(fp, td);
1054 		td->td_retval[0] = indx;
1055 		return (0);
1056 	}
1057 	fp->f_vnode = vp;
1058 	if (fp->f_data == NULL)
1059 		fp->f_data = vp;
1060 	fp->f_flag = flags & FMASK;
1061 	if (fp->f_ops == &badfileops)
1062 		fp->f_ops = &vnops;
1063 	fp->f_seqcount = 1;
1064 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1065 	FILE_UNLOCK(fp);
1066 	FILEDESC_UNLOCK(fdp);
1067 
1068 	VOP_UNLOCK(vp, 0, td);
1069 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1070 		lf.l_whence = SEEK_SET;
1071 		lf.l_start = 0;
1072 		lf.l_len = 0;
1073 		if (flags & O_EXLOCK)
1074 			lf.l_type = F_WRLCK;
1075 		else
1076 			lf.l_type = F_RDLCK;
1077 		type = F_FLOCK;
1078 		if ((flags & FNONBLOCK) == 0)
1079 			type |= F_WAIT;
1080 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1081 			    type)) != 0)
1082 			goto bad;
1083 		fp->f_flag |= FHASLOCK;
1084 	}
1085 	if (flags & O_TRUNC) {
1086 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1087 			goto bad;
1088 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1089 		VATTR_NULL(&vat);
1090 		vat.va_size = 0;
1091 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1092 #ifdef MAC
1093 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1094 		if (error == 0)
1095 #endif
1096 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1097 		VOP_UNLOCK(vp, 0, td);
1098 		vn_finished_write(mp);
1099 		if (error)
1100 			goto bad;
1101 	}
1102 	VFS_UNLOCK_GIANT(vfslocked);
1103 	/*
1104 	 * Release our private reference, leaving the one associated with
1105 	 * the descriptor table intact.
1106 	 */
1107 	fdrop(fp, td);
1108 	td->td_retval[0] = indx;
1109 	return (0);
1110 bad:
1111 	VFS_UNLOCK_GIANT(vfslocked);
1112 	fdclose(fdp, fp, indx, td);
1113 	fdrop(fp, td);
1114 	return (error);
1115 }
1116 
1117 #ifdef COMPAT_43
1118 /*
1119  * Create a file.
1120  *
1121  * MP SAFE
1122  */
1123 #ifndef _SYS_SYSPROTO_H_
1124 struct ocreat_args {
1125 	char	*path;
1126 	int	mode;
1127 };
1128 #endif
1129 int
1130 ocreat(td, uap)
1131 	struct thread *td;
1132 	register struct ocreat_args /* {
1133 		char *path;
1134 		int mode;
1135 	} */ *uap;
1136 {
1137 
1138 	return (kern_open(td, uap->path, UIO_USERSPACE,
1139 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1140 }
1141 #endif /* COMPAT_43 */
1142 
1143 /*
1144  * Create a special file.
1145  */
1146 #ifndef _SYS_SYSPROTO_H_
1147 struct mknod_args {
1148 	char	*path;
1149 	int	mode;
1150 	int	dev;
1151 };
1152 #endif
1153 int
1154 mknod(td, uap)
1155 	struct thread *td;
1156 	register struct mknod_args /* {
1157 		char *path;
1158 		int mode;
1159 		int dev;
1160 	} */ *uap;
1161 {
1162 
1163 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1164 }
1165 
1166 int
1167 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1168     int dev)
1169 {
1170 	struct vnode *vp;
1171 	struct mount *mp;
1172 	struct vattr vattr;
1173 	int error;
1174 	int whiteout = 0;
1175 	struct nameidata nd;
1176 	int vfslocked;
1177 
1178 	AUDIT_ARG(mode, mode);
1179 	AUDIT_ARG(dev, dev);
1180 	switch (mode & S_IFMT) {
1181 	case S_IFCHR:
1182 	case S_IFBLK:
1183 		error = suser(td);
1184 		break;
1185 	default:
1186 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1187 		break;
1188 	}
1189 	if (error)
1190 		return (error);
1191 restart:
1192 	bwillwrite();
1193 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1194 	    pathseg, path, td);
1195 	if ((error = namei(&nd)) != 0)
1196 		return (error);
1197 	vfslocked = NDHASGIANT(&nd);
1198 	vp = nd.ni_vp;
1199 	if (vp != NULL) {
1200 		NDFREE(&nd, NDF_ONLY_PNBUF);
1201 		if (vp == nd.ni_dvp)
1202 			vrele(nd.ni_dvp);
1203 		else
1204 			vput(nd.ni_dvp);
1205 		vrele(vp);
1206 		VFS_UNLOCK_GIANT(vfslocked);
1207 		return (EEXIST);
1208 	} else {
1209 		VATTR_NULL(&vattr);
1210 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1211 		vattr.va_mode = (mode & ALLPERMS) &
1212 		    ~td->td_proc->p_fd->fd_cmask;
1213 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1214 		vattr.va_rdev = dev;
1215 		whiteout = 0;
1216 
1217 		switch (mode & S_IFMT) {
1218 		case S_IFMT:	/* used by badsect to flag bad sectors */
1219 			vattr.va_type = VBAD;
1220 			break;
1221 		case S_IFCHR:
1222 			vattr.va_type = VCHR;
1223 			break;
1224 		case S_IFBLK:
1225 			vattr.va_type = VBLK;
1226 			break;
1227 		case S_IFWHT:
1228 			whiteout = 1;
1229 			break;
1230 		default:
1231 			error = EINVAL;
1232 			break;
1233 		}
1234 	}
1235 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1236 		NDFREE(&nd, NDF_ONLY_PNBUF);
1237 		vput(nd.ni_dvp);
1238 		VFS_UNLOCK_GIANT(vfslocked);
1239 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1240 			return (error);
1241 		goto restart;
1242 	}
1243 #ifdef MAC
1244 	if (error == 0 && !whiteout)
1245 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1246 		    &nd.ni_cnd, &vattr);
1247 #endif
1248 	if (!error) {
1249 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1250 		if (whiteout)
1251 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1252 		else {
1253 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1254 						&nd.ni_cnd, &vattr);
1255 			if (error == 0)
1256 				vput(nd.ni_vp);
1257 		}
1258 	}
1259 	NDFREE(&nd, NDF_ONLY_PNBUF);
1260 	vput(nd.ni_dvp);
1261 	vn_finished_write(mp);
1262 	VFS_UNLOCK_GIANT(vfslocked);
1263 	return (error);
1264 }
1265 
1266 /*
1267  * Create a named pipe.
1268  */
1269 #ifndef _SYS_SYSPROTO_H_
1270 struct mkfifo_args {
1271 	char	*path;
1272 	int	mode;
1273 };
1274 #endif
1275 int
1276 mkfifo(td, uap)
1277 	struct thread *td;
1278 	register struct mkfifo_args /* {
1279 		char *path;
1280 		int mode;
1281 	} */ *uap;
1282 {
1283 
1284 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1285 }
1286 
1287 int
1288 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1289 {
1290 	struct mount *mp;
1291 	struct vattr vattr;
1292 	int error;
1293 	struct nameidata nd;
1294 	int vfslocked;
1295 
1296 	AUDIT_ARG(mode, mode);
1297 restart:
1298 	bwillwrite();
1299 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1300 	    pathseg, path, td);
1301 	if ((error = namei(&nd)) != 0)
1302 		return (error);
1303 	vfslocked = NDHASGIANT(&nd);
1304 	if (nd.ni_vp != NULL) {
1305 		NDFREE(&nd, NDF_ONLY_PNBUF);
1306 		if (nd.ni_vp == nd.ni_dvp)
1307 			vrele(nd.ni_dvp);
1308 		else
1309 			vput(nd.ni_dvp);
1310 		vrele(nd.ni_vp);
1311 		VFS_UNLOCK_GIANT(vfslocked);
1312 		return (EEXIST);
1313 	}
1314 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1315 		NDFREE(&nd, NDF_ONLY_PNBUF);
1316 		vput(nd.ni_dvp);
1317 		VFS_UNLOCK_GIANT(vfslocked);
1318 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1319 			return (error);
1320 		goto restart;
1321 	}
1322 	VATTR_NULL(&vattr);
1323 	vattr.va_type = VFIFO;
1324 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1325 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1326 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1327 #ifdef MAC
1328 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1329 	    &vattr);
1330 	if (error)
1331 		goto out;
1332 #endif
1333 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1334 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1335 	if (error == 0)
1336 		vput(nd.ni_vp);
1337 #ifdef MAC
1338 out:
1339 #endif
1340 	vput(nd.ni_dvp);
1341 	vn_finished_write(mp);
1342 	VFS_UNLOCK_GIANT(vfslocked);
1343 	NDFREE(&nd, NDF_ONLY_PNBUF);
1344 	return (error);
1345 }
1346 
1347 /*
1348  * Make a hard file link.
1349  */
1350 #ifndef _SYS_SYSPROTO_H_
1351 struct link_args {
1352 	char	*path;
1353 	char	*link;
1354 };
1355 #endif
1356 int
1357 link(td, uap)
1358 	struct thread *td;
1359 	register struct link_args /* {
1360 		char *path;
1361 		char *link;
1362 	} */ *uap;
1363 {
1364 	int error;
1365 
1366 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1367 	return (error);
1368 }
1369 
1370 static int hardlink_check_uid = 0;
1371 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1372     &hardlink_check_uid, 0,
1373     "Unprivileged processes cannot create hard links to files owned by other "
1374     "users");
1375 static int hardlink_check_gid = 0;
1376 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1377     &hardlink_check_gid, 0,
1378     "Unprivileged processes cannot create hard links to files owned by other "
1379     "groups");
1380 
1381 static int
1382 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1383 {
1384 	struct vattr va;
1385 	int error;
1386 
1387 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1388 		return (0);
1389 
1390 	if (!hardlink_check_uid && !hardlink_check_gid)
1391 		return (0);
1392 
1393 	error = VOP_GETATTR(vp, &va, cred, td);
1394 	if (error != 0)
1395 		return (error);
1396 
1397 	if (hardlink_check_uid) {
1398 		if (cred->cr_uid != va.va_uid)
1399 			return (EPERM);
1400 	}
1401 
1402 	if (hardlink_check_gid) {
1403 		if (!groupmember(va.va_gid, cred))
1404 			return (EPERM);
1405 	}
1406 
1407 	return (0);
1408 }
1409 
1410 int
1411 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1412 {
1413 	struct vnode *vp;
1414 	struct mount *mp;
1415 	struct nameidata nd;
1416 	int vfslocked;
1417 	int lvfslocked;
1418 	int error;
1419 
1420 	bwillwrite();
1421 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1422 	if ((error = namei(&nd)) != 0)
1423 		return (error);
1424 	vfslocked = NDHASGIANT(&nd);
1425 	NDFREE(&nd, NDF_ONLY_PNBUF);
1426 	vp = nd.ni_vp;
1427 	if (vp->v_type == VDIR) {
1428 		vrele(vp);
1429 		VFS_UNLOCK_GIANT(vfslocked);
1430 		return (EPERM);		/* POSIX */
1431 	}
1432 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1433 		vrele(vp);
1434 		VFS_UNLOCK_GIANT(vfslocked);
1435 		return (error);
1436 	}
1437 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1438 	    segflg, link, td);
1439 	if ((error = namei(&nd)) == 0) {
1440 		lvfslocked = NDHASGIANT(&nd);
1441 		if (nd.ni_vp != NULL) {
1442 			if (nd.ni_dvp == nd.ni_vp)
1443 				vrele(nd.ni_dvp);
1444 			else
1445 				vput(nd.ni_dvp);
1446 			vrele(nd.ni_vp);
1447 			error = EEXIST;
1448 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1449 		    == 0) {
1450 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1451 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1452 			error = can_hardlink(vp, td, td->td_ucred);
1453 			if (error == 0)
1454 #ifdef MAC
1455 				error = mac_check_vnode_link(td->td_ucred,
1456 				    nd.ni_dvp, vp, &nd.ni_cnd);
1457 			if (error == 0)
1458 #endif
1459 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1460 			VOP_UNLOCK(vp, 0, td);
1461 			vput(nd.ni_dvp);
1462 		}
1463 		NDFREE(&nd, NDF_ONLY_PNBUF);
1464 		VFS_UNLOCK_GIANT(lvfslocked);
1465 	}
1466 	vrele(vp);
1467 	vn_finished_write(mp);
1468 	VFS_UNLOCK_GIANT(vfslocked);
1469 	return (error);
1470 }
1471 
1472 /*
1473  * Make a symbolic link.
1474  */
1475 #ifndef _SYS_SYSPROTO_H_
1476 struct symlink_args {
1477 	char	*path;
1478 	char	*link;
1479 };
1480 #endif
1481 int
1482 symlink(td, uap)
1483 	struct thread *td;
1484 	register struct symlink_args /* {
1485 		char *path;
1486 		char *link;
1487 	} */ *uap;
1488 {
1489 
1490 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1491 }
1492 
1493 int
1494 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1495 {
1496 	struct mount *mp;
1497 	struct vattr vattr;
1498 	char *syspath;
1499 	int error;
1500 	struct nameidata nd;
1501 	int vfslocked;
1502 
1503 	if (segflg == UIO_SYSSPACE) {
1504 		syspath = path;
1505 	} else {
1506 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1507 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1508 			goto out;
1509 	}
1510 	AUDIT_ARG(text, syspath);
1511 restart:
1512 	bwillwrite();
1513 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1514 	    segflg, link, td);
1515 	if ((error = namei(&nd)) != 0)
1516 		goto out;
1517 	vfslocked = NDHASGIANT(&nd);
1518 	if (nd.ni_vp) {
1519 		NDFREE(&nd, NDF_ONLY_PNBUF);
1520 		if (nd.ni_vp == nd.ni_dvp)
1521 			vrele(nd.ni_dvp);
1522 		else
1523 			vput(nd.ni_dvp);
1524 		vrele(nd.ni_vp);
1525 		VFS_UNLOCK_GIANT(vfslocked);
1526 		error = EEXIST;
1527 		goto out;
1528 	}
1529 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1530 		NDFREE(&nd, NDF_ONLY_PNBUF);
1531 		vput(nd.ni_dvp);
1532 		VFS_UNLOCK_GIANT(vfslocked);
1533 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1534 			goto out;
1535 		goto restart;
1536 	}
1537 	VATTR_NULL(&vattr);
1538 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1539 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1540 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1541 #ifdef MAC
1542 	vattr.va_type = VLNK;
1543 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1544 	    &vattr);
1545 	if (error)
1546 		goto out2;
1547 #endif
1548 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1549 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1550 	if (error == 0)
1551 		vput(nd.ni_vp);
1552 #ifdef MAC
1553 out2:
1554 #endif
1555 	NDFREE(&nd, NDF_ONLY_PNBUF);
1556 	vput(nd.ni_dvp);
1557 	vn_finished_write(mp);
1558 	VFS_UNLOCK_GIANT(vfslocked);
1559 out:
1560 	if (segflg != UIO_SYSSPACE)
1561 		uma_zfree(namei_zone, syspath);
1562 	return (error);
1563 }
1564 
1565 /*
1566  * Delete a whiteout from the filesystem.
1567  */
1568 int
1569 undelete(td, uap)
1570 	struct thread *td;
1571 	register struct undelete_args /* {
1572 		char *path;
1573 	} */ *uap;
1574 {
1575 	int error;
1576 	struct mount *mp;
1577 	struct nameidata nd;
1578 	int vfslocked;
1579 
1580 restart:
1581 	bwillwrite();
1582 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1583 	    UIO_USERSPACE, uap->path, td);
1584 	error = namei(&nd);
1585 	if (error)
1586 		return (error);
1587 	vfslocked = NDHASGIANT(&nd);
1588 
1589 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1590 		NDFREE(&nd, NDF_ONLY_PNBUF);
1591 		if (nd.ni_vp == nd.ni_dvp)
1592 			vrele(nd.ni_dvp);
1593 		else
1594 			vput(nd.ni_dvp);
1595 		if (nd.ni_vp)
1596 			vrele(nd.ni_vp);
1597 		VFS_UNLOCK_GIANT(vfslocked);
1598 		return (EEXIST);
1599 	}
1600 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1601 		NDFREE(&nd, NDF_ONLY_PNBUF);
1602 		vput(nd.ni_dvp);
1603 		VFS_UNLOCK_GIANT(vfslocked);
1604 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1605 			return (error);
1606 		goto restart;
1607 	}
1608 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1609 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1610 	NDFREE(&nd, NDF_ONLY_PNBUF);
1611 	vput(nd.ni_dvp);
1612 	vn_finished_write(mp);
1613 	VFS_UNLOCK_GIANT(vfslocked);
1614 	return (error);
1615 }
1616 
1617 /*
1618  * Delete a name from the filesystem.
1619  */
1620 #ifndef _SYS_SYSPROTO_H_
1621 struct unlink_args {
1622 	char	*path;
1623 };
1624 #endif
1625 int
1626 unlink(td, uap)
1627 	struct thread *td;
1628 	struct unlink_args /* {
1629 		char *path;
1630 	} */ *uap;
1631 {
1632 	int error;
1633 
1634 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1635 	return (error);
1636 }
1637 
1638 int
1639 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1640 {
1641 	struct mount *mp;
1642 	struct vnode *vp;
1643 	int error;
1644 	struct nameidata nd;
1645 	int vfslocked;
1646 
1647 restart:
1648 	bwillwrite();
1649 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1650 	    pathseg, path, td);
1651 	if ((error = namei(&nd)) != 0)
1652 		return (error == EINVAL ? EPERM : error);
1653 	vfslocked = NDHASGIANT(&nd);
1654 	vp = nd.ni_vp;
1655 	if (vp->v_type == VDIR)
1656 		error = EPERM;		/* POSIX */
1657 	else {
1658 		/*
1659 		 * The root of a mounted filesystem cannot be deleted.
1660 		 *
1661 		 * XXX: can this only be a VDIR case?
1662 		 */
1663 		if (vp->v_vflag & VV_ROOT)
1664 			error = EBUSY;
1665 	}
1666 	if (error == 0) {
1667 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1668 			NDFREE(&nd, NDF_ONLY_PNBUF);
1669 			vput(nd.ni_dvp);
1670 			if (vp == nd.ni_dvp)
1671 				vrele(vp);
1672 			else
1673 				vput(vp);
1674 			VFS_UNLOCK_GIANT(vfslocked);
1675 			if ((error = vn_start_write(NULL, &mp,
1676 			    V_XSLEEP | PCATCH)) != 0)
1677 				return (error);
1678 			goto restart;
1679 		}
1680 #ifdef MAC
1681 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1682 		    &nd.ni_cnd);
1683 		if (error)
1684 			goto out;
1685 #endif
1686 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1687 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1688 #ifdef MAC
1689 out:
1690 #endif
1691 		vn_finished_write(mp);
1692 	}
1693 	NDFREE(&nd, NDF_ONLY_PNBUF);
1694 	vput(nd.ni_dvp);
1695 	if (vp == nd.ni_dvp)
1696 		vrele(vp);
1697 	else
1698 		vput(vp);
1699 	VFS_UNLOCK_GIANT(vfslocked);
1700 	return (error);
1701 }
1702 
1703 /*
1704  * Reposition read/write file offset.
1705  */
1706 #ifndef _SYS_SYSPROTO_H_
1707 struct lseek_args {
1708 	int	fd;
1709 	int	pad;
1710 	off_t	offset;
1711 	int	whence;
1712 };
1713 #endif
1714 int
1715 lseek(td, uap)
1716 	struct thread *td;
1717 	register struct lseek_args /* {
1718 		int fd;
1719 		int pad;
1720 		off_t offset;
1721 		int whence;
1722 	} */ *uap;
1723 {
1724 	struct ucred *cred = td->td_ucred;
1725 	struct file *fp;
1726 	struct vnode *vp;
1727 	struct vattr vattr;
1728 	off_t offset;
1729 	int error, noneg;
1730 	int vfslocked;
1731 
1732 	if ((error = fget(td, uap->fd, &fp)) != 0)
1733 		return (error);
1734 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1735 		fdrop(fp, td);
1736 		return (ESPIPE);
1737 	}
1738 	vp = fp->f_vnode;
1739 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1740 	noneg = (vp->v_type != VCHR);
1741 	offset = uap->offset;
1742 	switch (uap->whence) {
1743 	case L_INCR:
1744 		if (noneg &&
1745 		    (fp->f_offset < 0 ||
1746 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1747 			error = EOVERFLOW;
1748 			break;
1749 		}
1750 		offset += fp->f_offset;
1751 		break;
1752 	case L_XTND:
1753 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1754 		error = VOP_GETATTR(vp, &vattr, cred, td);
1755 		VOP_UNLOCK(vp, 0, td);
1756 		if (error)
1757 			break;
1758 		if (noneg &&
1759 		    (vattr.va_size > OFF_MAX ||
1760 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1761 			error = EOVERFLOW;
1762 			break;
1763 		}
1764 		offset += vattr.va_size;
1765 		break;
1766 	case L_SET:
1767 		break;
1768 	default:
1769 		error = EINVAL;
1770 	}
1771 	if (error == 0 && noneg && offset < 0)
1772 		error = EINVAL;
1773 	if (error != 0)
1774 		goto drop;
1775 	fp->f_offset = offset;
1776 	*(off_t *)(td->td_retval) = fp->f_offset;
1777 drop:
1778 	fdrop(fp, td);
1779 	VFS_UNLOCK_GIANT(vfslocked);
1780 	return (error);
1781 }
1782 
1783 #if defined(COMPAT_43)
1784 /*
1785  * Reposition read/write file offset.
1786  */
1787 #ifndef _SYS_SYSPROTO_H_
1788 struct olseek_args {
1789 	int	fd;
1790 	long	offset;
1791 	int	whence;
1792 };
1793 #endif
1794 int
1795 olseek(td, uap)
1796 	struct thread *td;
1797 	register struct olseek_args /* {
1798 		int fd;
1799 		long offset;
1800 		int whence;
1801 	} */ *uap;
1802 {
1803 	struct lseek_args /* {
1804 		int fd;
1805 		int pad;
1806 		off_t offset;
1807 		int whence;
1808 	} */ nuap;
1809 	int error;
1810 
1811 	nuap.fd = uap->fd;
1812 	nuap.offset = uap->offset;
1813 	nuap.whence = uap->whence;
1814 	error = lseek(td, &nuap);
1815 	return (error);
1816 }
1817 #endif /* COMPAT_43 */
1818 
1819 /*
1820  * Check access permissions using passed credentials.
1821  */
1822 static int
1823 vn_access(vp, user_flags, cred, td)
1824 	struct vnode	*vp;
1825 	int		user_flags;
1826 	struct ucred	*cred;
1827 	struct thread	*td;
1828 {
1829 	int error, flags;
1830 
1831 	/* Flags == 0 means only check for existence. */
1832 	error = 0;
1833 	if (user_flags) {
1834 		flags = 0;
1835 		if (user_flags & R_OK)
1836 			flags |= VREAD;
1837 		if (user_flags & W_OK)
1838 			flags |= VWRITE;
1839 		if (user_flags & X_OK)
1840 			flags |= VEXEC;
1841 #ifdef MAC
1842 		error = mac_check_vnode_access(cred, vp, flags);
1843 		if (error)
1844 			return (error);
1845 #endif
1846 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1847 			error = VOP_ACCESS(vp, flags, cred, td);
1848 	}
1849 	return (error);
1850 }
1851 
1852 /*
1853  * Check access permissions using "real" credentials.
1854  */
1855 #ifndef _SYS_SYSPROTO_H_
1856 struct access_args {
1857 	char	*path;
1858 	int	flags;
1859 };
1860 #endif
1861 int
1862 access(td, uap)
1863 	struct thread *td;
1864 	register struct access_args /* {
1865 		char *path;
1866 		int flags;
1867 	} */ *uap;
1868 {
1869 
1870 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1871 }
1872 
1873 int
1874 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1875 {
1876 	struct ucred *cred, *tmpcred;
1877 	register struct vnode *vp;
1878 	struct nameidata nd;
1879 	int vfslocked;
1880 	int error;
1881 
1882 	/*
1883 	 * Create and modify a temporary credential instead of one that
1884 	 * is potentially shared.  This could also mess up socket
1885 	 * buffer accounting which can run in an interrupt context.
1886 	 */
1887 	cred = td->td_ucred;
1888 	tmpcred = crdup(cred);
1889 	tmpcred->cr_uid = cred->cr_ruid;
1890 	tmpcred->cr_groups[0] = cred->cr_rgid;
1891 	td->td_ucred = tmpcred;
1892 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1893 	    pathseg, path, td);
1894 	if ((error = namei(&nd)) != 0)
1895 		goto out1;
1896 	vfslocked = NDHASGIANT(&nd);
1897 	vp = nd.ni_vp;
1898 
1899 	error = vn_access(vp, flags, tmpcred, td);
1900 	NDFREE(&nd, NDF_ONLY_PNBUF);
1901 	vput(vp);
1902 	VFS_UNLOCK_GIANT(vfslocked);
1903 out1:
1904 	td->td_ucred = cred;
1905 	crfree(tmpcred);
1906 	return (error);
1907 }
1908 
1909 /*
1910  * Check access permissions using "effective" credentials.
1911  */
1912 #ifndef _SYS_SYSPROTO_H_
1913 struct eaccess_args {
1914 	char	*path;
1915 	int	flags;
1916 };
1917 #endif
1918 int
1919 eaccess(td, uap)
1920 	struct thread *td;
1921 	register struct eaccess_args /* {
1922 		char *path;
1923 		int flags;
1924 	} */ *uap;
1925 {
1926 
1927 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1928 }
1929 
1930 int
1931 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1932 {
1933 	struct nameidata nd;
1934 	struct vnode *vp;
1935 	int vfslocked;
1936 	int error;
1937 
1938 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1939 	    pathseg, path, td);
1940 	if ((error = namei(&nd)) != 0)
1941 		return (error);
1942 	vp = nd.ni_vp;
1943 	vfslocked = NDHASGIANT(&nd);
1944 	error = vn_access(vp, flags, td->td_ucred, td);
1945 	NDFREE(&nd, NDF_ONLY_PNBUF);
1946 	vput(vp);
1947 	VFS_UNLOCK_GIANT(vfslocked);
1948 	return (error);
1949 }
1950 
1951 #if defined(COMPAT_43)
1952 /*
1953  * Get file status; this version follows links.
1954  */
1955 #ifndef _SYS_SYSPROTO_H_
1956 struct ostat_args {
1957 	char	*path;
1958 	struct ostat *ub;
1959 };
1960 #endif
1961 int
1962 ostat(td, uap)
1963 	struct thread *td;
1964 	register struct ostat_args /* {
1965 		char *path;
1966 		struct ostat *ub;
1967 	} */ *uap;
1968 {
1969 	struct stat sb;
1970 	struct ostat osb;
1971 	int error;
1972 
1973 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1974 	if (error)
1975 		return (error);
1976 	cvtstat(&sb, &osb);
1977 	error = copyout(&osb, uap->ub, sizeof (osb));
1978 	return (error);
1979 }
1980 
1981 /*
1982  * Get file status; this version does not follow links.
1983  */
1984 #ifndef _SYS_SYSPROTO_H_
1985 struct olstat_args {
1986 	char	*path;
1987 	struct ostat *ub;
1988 };
1989 #endif
1990 int
1991 olstat(td, uap)
1992 	struct thread *td;
1993 	register struct olstat_args /* {
1994 		char *path;
1995 		struct ostat *ub;
1996 	} */ *uap;
1997 {
1998 	struct stat sb;
1999 	struct ostat osb;
2000 	int error;
2001 
2002 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2003 	if (error)
2004 		return (error);
2005 	cvtstat(&sb, &osb);
2006 	error = copyout(&osb, uap->ub, sizeof (osb));
2007 	return (error);
2008 }
2009 
2010 /*
2011  * Convert from an old to a new stat structure.
2012  */
2013 void
2014 cvtstat(st, ost)
2015 	struct stat *st;
2016 	struct ostat *ost;
2017 {
2018 
2019 	ost->st_dev = st->st_dev;
2020 	ost->st_ino = st->st_ino;
2021 	ost->st_mode = st->st_mode;
2022 	ost->st_nlink = st->st_nlink;
2023 	ost->st_uid = st->st_uid;
2024 	ost->st_gid = st->st_gid;
2025 	ost->st_rdev = st->st_rdev;
2026 	if (st->st_size < (quad_t)1 << 32)
2027 		ost->st_size = st->st_size;
2028 	else
2029 		ost->st_size = -2;
2030 	ost->st_atime = st->st_atime;
2031 	ost->st_mtime = st->st_mtime;
2032 	ost->st_ctime = st->st_ctime;
2033 	ost->st_blksize = st->st_blksize;
2034 	ost->st_blocks = st->st_blocks;
2035 	ost->st_flags = st->st_flags;
2036 	ost->st_gen = st->st_gen;
2037 }
2038 #endif /* COMPAT_43 */
2039 
2040 /*
2041  * Get file status; this version follows links.
2042  */
2043 #ifndef _SYS_SYSPROTO_H_
2044 struct stat_args {
2045 	char	*path;
2046 	struct stat *ub;
2047 };
2048 #endif
2049 int
2050 stat(td, uap)
2051 	struct thread *td;
2052 	register struct stat_args /* {
2053 		char *path;
2054 		struct stat *ub;
2055 	} */ *uap;
2056 {
2057 	struct stat sb;
2058 	int error;
2059 
2060 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2061 	if (error == 0)
2062 		error = copyout(&sb, uap->ub, sizeof (sb));
2063 	return (error);
2064 }
2065 
2066 int
2067 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2068 {
2069 	struct nameidata nd;
2070 	struct stat sb;
2071 	int error, vfslocked;
2072 
2073 	NDINIT(&nd, LOOKUP,
2074 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2075 	    pathseg, path, td);
2076 	if ((error = namei(&nd)) != 0)
2077 		return (error);
2078 	vfslocked = NDHASGIANT(&nd);
2079 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2080 	NDFREE(&nd, NDF_ONLY_PNBUF);
2081 	vput(nd.ni_vp);
2082 	VFS_UNLOCK_GIANT(vfslocked);
2083 	if (mtx_owned(&Giant))
2084 		printf("stat(%d): %s\n", vfslocked, path);
2085 	if (error)
2086 		return (error);
2087 	*sbp = sb;
2088 	return (0);
2089 }
2090 
2091 /*
2092  * Get file status; this version does not follow links.
2093  */
2094 #ifndef _SYS_SYSPROTO_H_
2095 struct lstat_args {
2096 	char	*path;
2097 	struct stat *ub;
2098 };
2099 #endif
2100 int
2101 lstat(td, uap)
2102 	struct thread *td;
2103 	register struct lstat_args /* {
2104 		char *path;
2105 		struct stat *ub;
2106 	} */ *uap;
2107 {
2108 	struct stat sb;
2109 	int error;
2110 
2111 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2112 	if (error == 0)
2113 		error = copyout(&sb, uap->ub, sizeof (sb));
2114 	return (error);
2115 }
2116 
2117 int
2118 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2119 {
2120 	struct vnode *vp;
2121 	struct stat sb;
2122 	struct nameidata nd;
2123 	int error, vfslocked;
2124 
2125 	NDINIT(&nd, LOOKUP,
2126 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2127 	    pathseg, path, td);
2128 	if ((error = namei(&nd)) != 0)
2129 		return (error);
2130 	vfslocked = NDHASGIANT(&nd);
2131 	vp = nd.ni_vp;
2132 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2133 	NDFREE(&nd, NDF_ONLY_PNBUF);
2134 	vput(vp);
2135 	VFS_UNLOCK_GIANT(vfslocked);
2136 	if (error)
2137 		return (error);
2138 	*sbp = sb;
2139 	return (0);
2140 }
2141 
2142 /*
2143  * Implementation of the NetBSD [l]stat() functions.
2144  */
2145 void
2146 cvtnstat(sb, nsb)
2147 	struct stat *sb;
2148 	struct nstat *nsb;
2149 {
2150 	bzero(nsb, sizeof *nsb);
2151 	nsb->st_dev = sb->st_dev;
2152 	nsb->st_ino = sb->st_ino;
2153 	nsb->st_mode = sb->st_mode;
2154 	nsb->st_nlink = sb->st_nlink;
2155 	nsb->st_uid = sb->st_uid;
2156 	nsb->st_gid = sb->st_gid;
2157 	nsb->st_rdev = sb->st_rdev;
2158 	nsb->st_atimespec = sb->st_atimespec;
2159 	nsb->st_mtimespec = sb->st_mtimespec;
2160 	nsb->st_ctimespec = sb->st_ctimespec;
2161 	nsb->st_size = sb->st_size;
2162 	nsb->st_blocks = sb->st_blocks;
2163 	nsb->st_blksize = sb->st_blksize;
2164 	nsb->st_flags = sb->st_flags;
2165 	nsb->st_gen = sb->st_gen;
2166 	nsb->st_birthtimespec = sb->st_birthtimespec;
2167 }
2168 
2169 #ifndef _SYS_SYSPROTO_H_
2170 struct nstat_args {
2171 	char	*path;
2172 	struct nstat *ub;
2173 };
2174 #endif
2175 int
2176 nstat(td, uap)
2177 	struct thread *td;
2178 	register struct nstat_args /* {
2179 		char *path;
2180 		struct nstat *ub;
2181 	} */ *uap;
2182 {
2183 	struct stat sb;
2184 	struct nstat nsb;
2185 	int error;
2186 
2187 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2188 	if (error)
2189 		return (error);
2190 	cvtnstat(&sb, &nsb);
2191 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2192 	return (error);
2193 }
2194 
2195 /*
2196  * NetBSD lstat.  Get file status; this version does not follow links.
2197  */
2198 #ifndef _SYS_SYSPROTO_H_
2199 struct lstat_args {
2200 	char	*path;
2201 	struct stat *ub;
2202 };
2203 #endif
2204 int
2205 nlstat(td, uap)
2206 	struct thread *td;
2207 	register struct nlstat_args /* {
2208 		char *path;
2209 		struct nstat *ub;
2210 	} */ *uap;
2211 {
2212 	struct stat sb;
2213 	struct nstat nsb;
2214 	int error;
2215 
2216 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2217 	if (error)
2218 		return (error);
2219 	cvtnstat(&sb, &nsb);
2220 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2221 	return (error);
2222 }
2223 
2224 /*
2225  * Get configurable pathname variables.
2226  */
2227 #ifndef _SYS_SYSPROTO_H_
2228 struct pathconf_args {
2229 	char	*path;
2230 	int	name;
2231 };
2232 #endif
2233 int
2234 pathconf(td, uap)
2235 	struct thread *td;
2236 	register struct pathconf_args /* {
2237 		char *path;
2238 		int name;
2239 	} */ *uap;
2240 {
2241 
2242 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2243 }
2244 
2245 int
2246 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2247 {
2248 	struct nameidata nd;
2249 	int error, vfslocked;
2250 
2251 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2252 	    pathseg, path, td);
2253 	if ((error = namei(&nd)) != 0)
2254 		return (error);
2255 	vfslocked = NDHASGIANT(&nd);
2256 	NDFREE(&nd, NDF_ONLY_PNBUF);
2257 
2258 	/* If asynchronous I/O is available, it works for all files. */
2259 	if (name == _PC_ASYNC_IO)
2260 		td->td_retval[0] = async_io_version;
2261 	else
2262 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2263 	vput(nd.ni_vp);
2264 	VFS_UNLOCK_GIANT(vfslocked);
2265 	return (error);
2266 }
2267 
2268 /*
2269  * Return target name of a symbolic link.
2270  */
2271 #ifndef _SYS_SYSPROTO_H_
2272 struct readlink_args {
2273 	char	*path;
2274 	char	*buf;
2275 	int	count;
2276 };
2277 #endif
2278 int
2279 readlink(td, uap)
2280 	struct thread *td;
2281 	register struct readlink_args /* {
2282 		char *path;
2283 		char *buf;
2284 		int count;
2285 	} */ *uap;
2286 {
2287 
2288 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2289 	    UIO_USERSPACE, uap->count));
2290 }
2291 
2292 int
2293 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2294     enum uio_seg bufseg, int count)
2295 {
2296 	register struct vnode *vp;
2297 	struct iovec aiov;
2298 	struct uio auio;
2299 	int error;
2300 	struct nameidata nd;
2301 	int vfslocked;
2302 
2303 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2304 	    pathseg, path, td);
2305 	if ((error = namei(&nd)) != 0)
2306 		return (error);
2307 	NDFREE(&nd, NDF_ONLY_PNBUF);
2308 	vfslocked = NDHASGIANT(&nd);
2309 	vp = nd.ni_vp;
2310 #ifdef MAC
2311 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2312 	if (error) {
2313 		vput(vp);
2314 		VFS_UNLOCK_GIANT(vfslocked);
2315 		return (error);
2316 	}
2317 #endif
2318 	if (vp->v_type != VLNK)
2319 		error = EINVAL;
2320 	else {
2321 		aiov.iov_base = buf;
2322 		aiov.iov_len = count;
2323 		auio.uio_iov = &aiov;
2324 		auio.uio_iovcnt = 1;
2325 		auio.uio_offset = 0;
2326 		auio.uio_rw = UIO_READ;
2327 		auio.uio_segflg = bufseg;
2328 		auio.uio_td = td;
2329 		auio.uio_resid = count;
2330 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2331 	}
2332 	vput(vp);
2333 	VFS_UNLOCK_GIANT(vfslocked);
2334 	td->td_retval[0] = count - auio.uio_resid;
2335 	return (error);
2336 }
2337 
2338 /*
2339  * Common implementation code for chflags() and fchflags().
2340  */
2341 static int
2342 setfflags(td, vp, flags)
2343 	struct thread *td;
2344 	struct vnode *vp;
2345 	int flags;
2346 {
2347 	int error;
2348 	struct mount *mp;
2349 	struct vattr vattr;
2350 
2351 	/*
2352 	 * Prevent non-root users from setting flags on devices.  When
2353 	 * a device is reused, users can retain ownership of the device
2354 	 * if they are allowed to set flags and programs assume that
2355 	 * chown can't fail when done as root.
2356 	 */
2357 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2358 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2359 		if (error)
2360 			return (error);
2361 	}
2362 
2363 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2364 		return (error);
2365 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2366 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2367 	VATTR_NULL(&vattr);
2368 	vattr.va_flags = flags;
2369 #ifdef MAC
2370 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2371 	if (error == 0)
2372 #endif
2373 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2374 	VOP_UNLOCK(vp, 0, td);
2375 	vn_finished_write(mp);
2376 	return (error);
2377 }
2378 
2379 /*
2380  * Change flags of a file given a path name.
2381  */
2382 #ifndef _SYS_SYSPROTO_H_
2383 struct chflags_args {
2384 	char	*path;
2385 	int	flags;
2386 };
2387 #endif
2388 int
2389 chflags(td, uap)
2390 	struct thread *td;
2391 	register struct chflags_args /* {
2392 		char *path;
2393 		int flags;
2394 	} */ *uap;
2395 {
2396 	int error;
2397 	struct nameidata nd;
2398 	int vfslocked;
2399 
2400 	AUDIT_ARG(fflags, uap->flags);
2401 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2402 	    uap->path, td);
2403 	if ((error = namei(&nd)) != 0)
2404 		return (error);
2405 	NDFREE(&nd, NDF_ONLY_PNBUF);
2406 	vfslocked = NDHASGIANT(&nd);
2407 	error = setfflags(td, nd.ni_vp, uap->flags);
2408 	vrele(nd.ni_vp);
2409 	VFS_UNLOCK_GIANT(vfslocked);
2410 	return (error);
2411 }
2412 
2413 /*
2414  * Same as chflags() but doesn't follow symlinks.
2415  */
2416 int
2417 lchflags(td, uap)
2418 	struct thread *td;
2419 	register struct lchflags_args /* {
2420 		char *path;
2421 		int flags;
2422 	} */ *uap;
2423 {
2424 	int error;
2425 	struct nameidata nd;
2426 	int vfslocked;
2427 
2428 	AUDIT_ARG(fflags, uap->flags);
2429 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2430 	    uap->path, td);
2431 	if ((error = namei(&nd)) != 0)
2432 		return (error);
2433 	vfslocked = NDHASGIANT(&nd);
2434 	NDFREE(&nd, NDF_ONLY_PNBUF);
2435 	error = setfflags(td, nd.ni_vp, uap->flags);
2436 	vrele(nd.ni_vp);
2437 	VFS_UNLOCK_GIANT(vfslocked);
2438 	return (error);
2439 }
2440 
2441 /*
2442  * Change flags of a file given a file descriptor.
2443  */
2444 #ifndef _SYS_SYSPROTO_H_
2445 struct fchflags_args {
2446 	int	fd;
2447 	int	flags;
2448 };
2449 #endif
2450 int
2451 fchflags(td, uap)
2452 	struct thread *td;
2453 	register struct fchflags_args /* {
2454 		int fd;
2455 		int flags;
2456 	} */ *uap;
2457 {
2458 	struct file *fp;
2459 	int vfslocked;
2460 	int error;
2461 
2462 	AUDIT_ARG(fd, uap->fd);
2463 	AUDIT_ARG(fflags, uap->flags);
2464 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2465 		return (error);
2466 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2467 #ifdef AUDIT
2468 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2469 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2470 	VOP_UNLOCK(fp->f_vnode, 0, td);
2471 #endif
2472 	error = setfflags(td, fp->f_vnode, uap->flags);
2473 	VFS_UNLOCK_GIANT(vfslocked);
2474 	fdrop(fp, td);
2475 	return (error);
2476 }
2477 
2478 /*
2479  * Common implementation code for chmod(), lchmod() and fchmod().
2480  */
2481 static int
2482 setfmode(td, vp, mode)
2483 	struct thread *td;
2484 	struct vnode *vp;
2485 	int mode;
2486 {
2487 	int error;
2488 	struct mount *mp;
2489 	struct vattr vattr;
2490 
2491 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2492 		return (error);
2493 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2494 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2495 	VATTR_NULL(&vattr);
2496 	vattr.va_mode = mode & ALLPERMS;
2497 #ifdef MAC
2498 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2499 	if (error == 0)
2500 #endif
2501 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2502 	VOP_UNLOCK(vp, 0, td);
2503 	vn_finished_write(mp);
2504 	return (error);
2505 }
2506 
2507 /*
2508  * Change mode of a file given path name.
2509  */
2510 #ifndef _SYS_SYSPROTO_H_
2511 struct chmod_args {
2512 	char	*path;
2513 	int	mode;
2514 };
2515 #endif
2516 int
2517 chmod(td, uap)
2518 	struct thread *td;
2519 	register struct chmod_args /* {
2520 		char *path;
2521 		int mode;
2522 	} */ *uap;
2523 {
2524 
2525 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2526 }
2527 
2528 int
2529 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2530 {
2531 	int error;
2532 	struct nameidata nd;
2533 	int vfslocked;
2534 
2535 	AUDIT_ARG(mode, mode);
2536 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2537 	if ((error = namei(&nd)) != 0)
2538 		return (error);
2539 	vfslocked = NDHASGIANT(&nd);
2540 	NDFREE(&nd, NDF_ONLY_PNBUF);
2541 	error = setfmode(td, nd.ni_vp, mode);
2542 	vrele(nd.ni_vp);
2543 	VFS_UNLOCK_GIANT(vfslocked);
2544 	return (error);
2545 }
2546 
2547 /*
2548  * Change mode of a file given path name (don't follow links.)
2549  */
2550 #ifndef _SYS_SYSPROTO_H_
2551 struct lchmod_args {
2552 	char	*path;
2553 	int	mode;
2554 };
2555 #endif
2556 int
2557 lchmod(td, uap)
2558 	struct thread *td;
2559 	register struct lchmod_args /* {
2560 		char *path;
2561 		int mode;
2562 	} */ *uap;
2563 {
2564 	int error;
2565 	struct nameidata nd;
2566 	int vfslocked;
2567 
2568 	AUDIT_ARG(mode, (mode_t)uap->mode);
2569 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2570 	    uap->path, td);
2571 	if ((error = namei(&nd)) != 0)
2572 		return (error);
2573 	vfslocked = NDHASGIANT(&nd);
2574 	NDFREE(&nd, NDF_ONLY_PNBUF);
2575 	error = setfmode(td, nd.ni_vp, uap->mode);
2576 	vrele(nd.ni_vp);
2577 	VFS_UNLOCK_GIANT(vfslocked);
2578 	return (error);
2579 }
2580 
2581 /*
2582  * Change mode of a file given a file descriptor.
2583  */
2584 #ifndef _SYS_SYSPROTO_H_
2585 struct fchmod_args {
2586 	int	fd;
2587 	int	mode;
2588 };
2589 #endif
2590 int
2591 fchmod(td, uap)
2592 	struct thread *td;
2593 	register struct fchmod_args /* {
2594 		int fd;
2595 		int mode;
2596 	} */ *uap;
2597 {
2598 	struct file *fp;
2599 	int vfslocked;
2600 	int error;
2601 
2602 	AUDIT_ARG(fd, uap->fd);
2603 	AUDIT_ARG(mode, uap->mode);
2604 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2605 		return (error);
2606 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2607 #ifdef AUDIT
2608 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2609 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2610 	VOP_UNLOCK(fp->f_vnode, 0, td);
2611 #endif
2612 	error = setfmode(td, fp->f_vnode, uap->mode);
2613 	VFS_UNLOCK_GIANT(vfslocked);
2614 	fdrop(fp, td);
2615 	return (error);
2616 }
2617 
2618 /*
2619  * Common implementation for chown(), lchown(), and fchown()
2620  */
2621 static int
2622 setfown(td, vp, uid, gid)
2623 	struct thread *td;
2624 	struct vnode *vp;
2625 	uid_t uid;
2626 	gid_t gid;
2627 {
2628 	int error;
2629 	struct mount *mp;
2630 	struct vattr vattr;
2631 
2632 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2633 		return (error);
2634 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2635 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2636 	VATTR_NULL(&vattr);
2637 	vattr.va_uid = uid;
2638 	vattr.va_gid = gid;
2639 #ifdef MAC
2640 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2641 	    vattr.va_gid);
2642 	if (error == 0)
2643 #endif
2644 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2645 	VOP_UNLOCK(vp, 0, td);
2646 	vn_finished_write(mp);
2647 	return (error);
2648 }
2649 
2650 /*
2651  * Set ownership given a path name.
2652  */
2653 #ifndef _SYS_SYSPROTO_H_
2654 struct chown_args {
2655 	char	*path;
2656 	int	uid;
2657 	int	gid;
2658 };
2659 #endif
2660 int
2661 chown(td, uap)
2662 	struct thread *td;
2663 	register struct chown_args /* {
2664 		char *path;
2665 		int uid;
2666 		int gid;
2667 	} */ *uap;
2668 {
2669 
2670 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2671 }
2672 
2673 int
2674 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2675     int gid)
2676 {
2677 	int error;
2678 	struct nameidata nd;
2679 	int vfslocked;
2680 
2681 	AUDIT_ARG(owner, uid, gid);
2682 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2683 	if ((error = namei(&nd)) != 0)
2684 		return (error);
2685 	vfslocked = NDHASGIANT(&nd);
2686 	NDFREE(&nd, NDF_ONLY_PNBUF);
2687 	error = setfown(td, nd.ni_vp, uid, gid);
2688 	vrele(nd.ni_vp);
2689 	VFS_UNLOCK_GIANT(vfslocked);
2690 	return (error);
2691 }
2692 
2693 /*
2694  * Set ownership given a path name, do not cross symlinks.
2695  */
2696 #ifndef _SYS_SYSPROTO_H_
2697 struct lchown_args {
2698 	char	*path;
2699 	int	uid;
2700 	int	gid;
2701 };
2702 #endif
2703 int
2704 lchown(td, uap)
2705 	struct thread *td;
2706 	register struct lchown_args /* {
2707 		char *path;
2708 		int uid;
2709 		int gid;
2710 	} */ *uap;
2711 {
2712 
2713 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2714 }
2715 
2716 int
2717 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2718     int gid)
2719 {
2720 	int error;
2721 	struct nameidata nd;
2722 	int vfslocked;
2723 
2724 	AUDIT_ARG(owner, uid, gid);
2725 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2726 	if ((error = namei(&nd)) != 0)
2727 		return (error);
2728 	vfslocked = NDHASGIANT(&nd);
2729 	NDFREE(&nd, NDF_ONLY_PNBUF);
2730 	error = setfown(td, nd.ni_vp, uid, gid);
2731 	vrele(nd.ni_vp);
2732 	VFS_UNLOCK_GIANT(vfslocked);
2733 	return (error);
2734 }
2735 
2736 /*
2737  * Set ownership given a file descriptor.
2738  */
2739 #ifndef _SYS_SYSPROTO_H_
2740 struct fchown_args {
2741 	int	fd;
2742 	int	uid;
2743 	int	gid;
2744 };
2745 #endif
2746 int
2747 fchown(td, uap)
2748 	struct thread *td;
2749 	register struct fchown_args /* {
2750 		int fd;
2751 		int uid;
2752 		int gid;
2753 	} */ *uap;
2754 {
2755 	struct file *fp;
2756 	int vfslocked;
2757 	int error;
2758 
2759 	AUDIT_ARG(fd, uap->fd);
2760 	AUDIT_ARG(owner, uap->uid, uap->gid);
2761 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2762 		return (error);
2763 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2764 #ifdef AUDIT
2765 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2766 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2767 	VOP_UNLOCK(fp->f_vnode, 0, td);
2768 #endif
2769 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2770 	VFS_UNLOCK_GIANT(vfslocked);
2771 	fdrop(fp, td);
2772 	return (error);
2773 }
2774 
2775 /*
2776  * Common implementation code for utimes(), lutimes(), and futimes().
2777  */
2778 static int
2779 getutimes(usrtvp, tvpseg, tsp)
2780 	const struct timeval *usrtvp;
2781 	enum uio_seg tvpseg;
2782 	struct timespec *tsp;
2783 {
2784 	struct timeval tv[2];
2785 	const struct timeval *tvp;
2786 	int error;
2787 
2788 	if (usrtvp == NULL) {
2789 		microtime(&tv[0]);
2790 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2791 		tsp[1] = tsp[0];
2792 	} else {
2793 		if (tvpseg == UIO_SYSSPACE) {
2794 			tvp = usrtvp;
2795 		} else {
2796 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2797 				return (error);
2798 			tvp = tv;
2799 		}
2800 
2801 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2802 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2803 			return (EINVAL);
2804 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2805 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2806 	}
2807 	return (0);
2808 }
2809 
2810 /*
2811  * Common implementation code for utimes(), lutimes(), and futimes().
2812  */
2813 static int
2814 setutimes(td, vp, ts, numtimes, nullflag)
2815 	struct thread *td;
2816 	struct vnode *vp;
2817 	const struct timespec *ts;
2818 	int numtimes;
2819 	int nullflag;
2820 {
2821 	int error, setbirthtime;
2822 	struct mount *mp;
2823 	struct vattr vattr;
2824 
2825 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2826 		return (error);
2827 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2828 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2829 	setbirthtime = 0;
2830 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2831 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2832 		setbirthtime = 1;
2833 	VATTR_NULL(&vattr);
2834 	vattr.va_atime = ts[0];
2835 	vattr.va_mtime = ts[1];
2836 	if (setbirthtime)
2837 		vattr.va_birthtime = ts[1];
2838 	if (numtimes > 2)
2839 		vattr.va_birthtime = ts[2];
2840 	if (nullflag)
2841 		vattr.va_vaflags |= VA_UTIMES_NULL;
2842 #ifdef MAC
2843 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2844 	    vattr.va_mtime);
2845 #endif
2846 	if (error == 0)
2847 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2848 	VOP_UNLOCK(vp, 0, td);
2849 	vn_finished_write(mp);
2850 	return (error);
2851 }
2852 
2853 /*
2854  * Set the access and modification times of a file.
2855  */
2856 #ifndef _SYS_SYSPROTO_H_
2857 struct utimes_args {
2858 	char	*path;
2859 	struct	timeval *tptr;
2860 };
2861 #endif
2862 int
2863 utimes(td, uap)
2864 	struct thread *td;
2865 	register struct utimes_args /* {
2866 		char *path;
2867 		struct timeval *tptr;
2868 	} */ *uap;
2869 {
2870 
2871 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2872 	    UIO_USERSPACE));
2873 }
2874 
2875 int
2876 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2877     struct timeval *tptr, enum uio_seg tptrseg)
2878 {
2879 	struct timespec ts[2];
2880 	int error;
2881 	struct nameidata nd;
2882 	int vfslocked;
2883 
2884 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2885 		return (error);
2886 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2887 	if ((error = namei(&nd)) != 0)
2888 		return (error);
2889 	vfslocked = NDHASGIANT(&nd);
2890 	NDFREE(&nd, NDF_ONLY_PNBUF);
2891 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2892 	vrele(nd.ni_vp);
2893 	VFS_UNLOCK_GIANT(vfslocked);
2894 	return (error);
2895 }
2896 
2897 /*
2898  * Set the access and modification times of a file.
2899  */
2900 #ifndef _SYS_SYSPROTO_H_
2901 struct lutimes_args {
2902 	char	*path;
2903 	struct	timeval *tptr;
2904 };
2905 #endif
2906 int
2907 lutimes(td, uap)
2908 	struct thread *td;
2909 	register struct lutimes_args /* {
2910 		char *path;
2911 		struct timeval *tptr;
2912 	} */ *uap;
2913 {
2914 
2915 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2916 	    UIO_USERSPACE));
2917 }
2918 
2919 int
2920 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2921     struct timeval *tptr, enum uio_seg tptrseg)
2922 {
2923 	struct timespec ts[2];
2924 	int error;
2925 	struct nameidata nd;
2926 	int vfslocked;
2927 
2928 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2929 		return (error);
2930 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2931 	if ((error = namei(&nd)) != 0)
2932 		return (error);
2933 	vfslocked = NDHASGIANT(&nd);
2934 	NDFREE(&nd, NDF_ONLY_PNBUF);
2935 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2936 	vrele(nd.ni_vp);
2937 	VFS_UNLOCK_GIANT(vfslocked);
2938 	return (error);
2939 }
2940 
2941 /*
2942  * Set the access and modification times of a file.
2943  */
2944 #ifndef _SYS_SYSPROTO_H_
2945 struct futimes_args {
2946 	int	fd;
2947 	struct	timeval *tptr;
2948 };
2949 #endif
2950 int
2951 futimes(td, uap)
2952 	struct thread *td;
2953 	register struct futimes_args /* {
2954 		int  fd;
2955 		struct timeval *tptr;
2956 	} */ *uap;
2957 {
2958 
2959 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2960 }
2961 
2962 int
2963 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2964     enum uio_seg tptrseg)
2965 {
2966 	struct timespec ts[2];
2967 	struct file *fp;
2968 	int vfslocked;
2969 	int error;
2970 
2971 	AUDIT_ARG(fd, fd);
2972 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2973 		return (error);
2974 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2975 		return (error);
2976 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2977 #ifdef AUDIT
2978 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2979 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2980 	VOP_UNLOCK(fp->f_vnode, 0, td);
2981 #endif
2982 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2983 	VFS_UNLOCK_GIANT(vfslocked);
2984 	fdrop(fp, td);
2985 	return (error);
2986 }
2987 
2988 /*
2989  * Truncate a file given its path name.
2990  */
2991 #ifndef _SYS_SYSPROTO_H_
2992 struct truncate_args {
2993 	char	*path;
2994 	int	pad;
2995 	off_t	length;
2996 };
2997 #endif
2998 int
2999 truncate(td, uap)
3000 	struct thread *td;
3001 	register struct truncate_args /* {
3002 		char *path;
3003 		int pad;
3004 		off_t length;
3005 	} */ *uap;
3006 {
3007 
3008 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3009 }
3010 
3011 int
3012 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3013 {
3014 	struct mount *mp;
3015 	struct vnode *vp;
3016 	struct vattr vattr;
3017 	int error;
3018 	struct nameidata nd;
3019 	int vfslocked;
3020 
3021 	if (length < 0)
3022 		return(EINVAL);
3023 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3024 	if ((error = namei(&nd)) != 0)
3025 		return (error);
3026 	vfslocked = NDHASGIANT(&nd);
3027 	vp = nd.ni_vp;
3028 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3029 		vrele(vp);
3030 		VFS_UNLOCK_GIANT(vfslocked);
3031 		return (error);
3032 	}
3033 	NDFREE(&nd, NDF_ONLY_PNBUF);
3034 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3035 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3036 	if (vp->v_type == VDIR)
3037 		error = EISDIR;
3038 #ifdef MAC
3039 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3040 	}
3041 #endif
3042 	else if ((error = vn_writechk(vp)) == 0 &&
3043 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3044 		VATTR_NULL(&vattr);
3045 		vattr.va_size = length;
3046 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3047 	}
3048 	vput(vp);
3049 	vn_finished_write(mp);
3050 	VFS_UNLOCK_GIANT(vfslocked);
3051 	return (error);
3052 }
3053 
3054 /*
3055  * Truncate a file given a file descriptor.
3056  */
3057 #ifndef _SYS_SYSPROTO_H_
3058 struct ftruncate_args {
3059 	int	fd;
3060 	int	pad;
3061 	off_t	length;
3062 };
3063 #endif
3064 int
3065 ftruncate(td, uap)
3066 	struct thread *td;
3067 	register struct ftruncate_args /* {
3068 		int fd;
3069 		int pad;
3070 		off_t length;
3071 	} */ *uap;
3072 {
3073 	struct mount *mp;
3074 	struct vattr vattr;
3075 	struct vnode *vp;
3076 	struct file *fp;
3077 	int vfslocked;
3078 	int error;
3079 
3080 	AUDIT_ARG(fd, uap->fd);
3081 	if (uap->length < 0)
3082 		return(EINVAL);
3083 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3084 		return (error);
3085 	if ((fp->f_flag & FWRITE) == 0) {
3086 		fdrop(fp, td);
3087 		return (EINVAL);
3088 	}
3089 	vp = fp->f_vnode;
3090 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3091 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3092 		goto drop;
3093 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3094 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3095 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3096 	if (vp->v_type == VDIR)
3097 		error = EISDIR;
3098 #ifdef MAC
3099 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3100 	    vp))) {
3101 	}
3102 #endif
3103 	else if ((error = vn_writechk(vp)) == 0) {
3104 		VATTR_NULL(&vattr);
3105 		vattr.va_size = uap->length;
3106 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3107 	}
3108 	VOP_UNLOCK(vp, 0, td);
3109 	vn_finished_write(mp);
3110 drop:
3111 	VFS_UNLOCK_GIANT(vfslocked);
3112 	fdrop(fp, td);
3113 	return (error);
3114 }
3115 
3116 #if defined(COMPAT_43)
3117 /*
3118  * Truncate a file given its path name.
3119  */
3120 #ifndef _SYS_SYSPROTO_H_
3121 struct otruncate_args {
3122 	char	*path;
3123 	long	length;
3124 };
3125 #endif
3126 int
3127 otruncate(td, uap)
3128 	struct thread *td;
3129 	register struct otruncate_args /* {
3130 		char *path;
3131 		long length;
3132 	} */ *uap;
3133 {
3134 	struct truncate_args /* {
3135 		char *path;
3136 		int pad;
3137 		off_t length;
3138 	} */ nuap;
3139 
3140 	nuap.path = uap->path;
3141 	nuap.length = uap->length;
3142 	return (truncate(td, &nuap));
3143 }
3144 
3145 /*
3146  * Truncate a file given a file descriptor.
3147  */
3148 #ifndef _SYS_SYSPROTO_H_
3149 struct oftruncate_args {
3150 	int	fd;
3151 	long	length;
3152 };
3153 #endif
3154 int
3155 oftruncate(td, uap)
3156 	struct thread *td;
3157 	register struct oftruncate_args /* {
3158 		int fd;
3159 		long length;
3160 	} */ *uap;
3161 {
3162 	struct ftruncate_args /* {
3163 		int fd;
3164 		int pad;
3165 		off_t length;
3166 	} */ nuap;
3167 
3168 	nuap.fd = uap->fd;
3169 	nuap.length = uap->length;
3170 	return (ftruncate(td, &nuap));
3171 }
3172 #endif /* COMPAT_43 */
3173 
3174 /*
3175  * Sync an open file.
3176  */
3177 #ifndef _SYS_SYSPROTO_H_
3178 struct fsync_args {
3179 	int	fd;
3180 };
3181 #endif
3182 int
3183 fsync(td, uap)
3184 	struct thread *td;
3185 	struct fsync_args /* {
3186 		int fd;
3187 	} */ *uap;
3188 {
3189 	struct vnode *vp;
3190 	struct mount *mp;
3191 	struct file *fp;
3192 	int vfslocked;
3193 	int error;
3194 
3195 	AUDIT_ARG(fd, uap->fd);
3196 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3197 		return (error);
3198 	vp = fp->f_vnode;
3199 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3200 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3201 		goto drop;
3202 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3203 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3204 	if (vp->v_object != NULL) {
3205 		VM_OBJECT_LOCK(vp->v_object);
3206 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3207 		VM_OBJECT_UNLOCK(vp->v_object);
3208 	}
3209 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3210 
3211 	VOP_UNLOCK(vp, 0, td);
3212 	vn_finished_write(mp);
3213 drop:
3214 	VFS_UNLOCK_GIANT(vfslocked);
3215 	fdrop(fp, td);
3216 	return (error);
3217 }
3218 
3219 /*
3220  * Rename files.  Source and destination must either both be directories,
3221  * or both not be directories.  If target is a directory, it must be empty.
3222  */
3223 #ifndef _SYS_SYSPROTO_H_
3224 struct rename_args {
3225 	char	*from;
3226 	char	*to;
3227 };
3228 #endif
3229 int
3230 rename(td, uap)
3231 	struct thread *td;
3232 	register struct rename_args /* {
3233 		char *from;
3234 		char *to;
3235 	} */ *uap;
3236 {
3237 
3238 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3239 }
3240 
3241 int
3242 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3243 {
3244 	struct mount *mp = NULL;
3245 	struct vnode *tvp, *fvp, *tdvp;
3246 	struct nameidata fromnd, tond;
3247 	int tvfslocked;
3248 	int fvfslocked;
3249 	int error;
3250 
3251 	bwillwrite();
3252 #ifdef MAC
3253 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3254 	    AUDITVNODE1, pathseg, from, td);
3255 #else
3256 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3257 	    AUDITVNODE1, pathseg, from, td);
3258 #endif
3259 	if ((error = namei(&fromnd)) != 0)
3260 		return (error);
3261 	fvfslocked = NDHASGIANT(&fromnd);
3262 	tvfslocked = 0;
3263 #ifdef MAC
3264 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3265 	    fromnd.ni_vp, &fromnd.ni_cnd);
3266 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3267 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3268 #endif
3269 	fvp = fromnd.ni_vp;
3270 	if (error == 0)
3271 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3272 	if (error != 0) {
3273 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3274 		vrele(fromnd.ni_dvp);
3275 		vrele(fvp);
3276 		goto out1;
3277 	}
3278 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3279 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3280 	if (fromnd.ni_vp->v_type == VDIR)
3281 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3282 	if ((error = namei(&tond)) != 0) {
3283 		/* Translate error code for rename("dir1", "dir2/."). */
3284 		if (error == EISDIR && fvp->v_type == VDIR)
3285 			error = EINVAL;
3286 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3287 		vrele(fromnd.ni_dvp);
3288 		vrele(fvp);
3289 		vn_finished_write(mp);
3290 		goto out1;
3291 	}
3292 	tvfslocked = NDHASGIANT(&tond);
3293 	tdvp = tond.ni_dvp;
3294 	tvp = tond.ni_vp;
3295 	if (tvp != NULL) {
3296 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3297 			error = ENOTDIR;
3298 			goto out;
3299 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3300 			error = EISDIR;
3301 			goto out;
3302 		}
3303 	}
3304 	if (fvp == tdvp)
3305 		error = EINVAL;
3306 	/*
3307 	 * If the source is the same as the destination (that is, if they
3308 	 * are links to the same vnode), then there is nothing to do.
3309 	 */
3310 	if (fvp == tvp)
3311 		error = -1;
3312 #ifdef MAC
3313 	else
3314 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3315 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3316 #endif
3317 out:
3318 	if (!error) {
3319 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3320 		if (fromnd.ni_dvp != tdvp) {
3321 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3322 		}
3323 		if (tvp) {
3324 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3325 		}
3326 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3327 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3328 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3329 		NDFREE(&tond, NDF_ONLY_PNBUF);
3330 	} else {
3331 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3332 		NDFREE(&tond, NDF_ONLY_PNBUF);
3333 		if (tvp)
3334 			vput(tvp);
3335 		if (tdvp == tvp)
3336 			vrele(tdvp);
3337 		else
3338 			vput(tdvp);
3339 		vrele(fromnd.ni_dvp);
3340 		vrele(fvp);
3341 	}
3342 	vrele(tond.ni_startdir);
3343 	vn_finished_write(mp);
3344 out1:
3345 	if (fromnd.ni_startdir)
3346 		vrele(fromnd.ni_startdir);
3347 	VFS_UNLOCK_GIANT(fvfslocked);
3348 	VFS_UNLOCK_GIANT(tvfslocked);
3349 	if (error == -1)
3350 		return (0);
3351 	return (error);
3352 }
3353 
3354 /*
3355  * Make a directory file.
3356  */
3357 #ifndef _SYS_SYSPROTO_H_
3358 struct mkdir_args {
3359 	char	*path;
3360 	int	mode;
3361 };
3362 #endif
3363 int
3364 mkdir(td, uap)
3365 	struct thread *td;
3366 	register struct mkdir_args /* {
3367 		char *path;
3368 		int mode;
3369 	} */ *uap;
3370 {
3371 
3372 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3373 }
3374 
3375 int
3376 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3377 {
3378 	struct mount *mp;
3379 	struct vnode *vp;
3380 	struct vattr vattr;
3381 	int error;
3382 	struct nameidata nd;
3383 	int vfslocked;
3384 
3385 	AUDIT_ARG(mode, mode);
3386 restart:
3387 	bwillwrite();
3388 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3389 	    segflg, path, td);
3390 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3391 	if ((error = namei(&nd)) != 0)
3392 		return (error);
3393 	vfslocked = NDHASGIANT(&nd);
3394 	vp = nd.ni_vp;
3395 	if (vp != NULL) {
3396 		NDFREE(&nd, NDF_ONLY_PNBUF);
3397 		/*
3398 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3399 		 * the strange behaviour of leaving the vnode unlocked
3400 		 * if the target is the same vnode as the parent.
3401 		 */
3402 		if (vp == nd.ni_dvp)
3403 			vrele(nd.ni_dvp);
3404 		else
3405 			vput(nd.ni_dvp);
3406 		vrele(vp);
3407 		VFS_UNLOCK_GIANT(vfslocked);
3408 		return (EEXIST);
3409 	}
3410 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3411 		NDFREE(&nd, NDF_ONLY_PNBUF);
3412 		vput(nd.ni_dvp);
3413 		VFS_UNLOCK_GIANT(vfslocked);
3414 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3415 			return (error);
3416 		goto restart;
3417 	}
3418 	VATTR_NULL(&vattr);
3419 	vattr.va_type = VDIR;
3420 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3421 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3422 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3423 #ifdef MAC
3424 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3425 	    &vattr);
3426 	if (error)
3427 		goto out;
3428 #endif
3429 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3430 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3431 #ifdef MAC
3432 out:
3433 #endif
3434 	NDFREE(&nd, NDF_ONLY_PNBUF);
3435 	vput(nd.ni_dvp);
3436 	if (!error)
3437 		vput(nd.ni_vp);
3438 	vn_finished_write(mp);
3439 	VFS_UNLOCK_GIANT(vfslocked);
3440 	return (error);
3441 }
3442 
3443 /*
3444  * Remove a directory file.
3445  */
3446 #ifndef _SYS_SYSPROTO_H_
3447 struct rmdir_args {
3448 	char	*path;
3449 };
3450 #endif
3451 int
3452 rmdir(td, uap)
3453 	struct thread *td;
3454 	struct rmdir_args /* {
3455 		char *path;
3456 	} */ *uap;
3457 {
3458 
3459 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3460 }
3461 
3462 int
3463 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3464 {
3465 	struct mount *mp;
3466 	struct vnode *vp;
3467 	int error;
3468 	struct nameidata nd;
3469 	int vfslocked;
3470 
3471 restart:
3472 	bwillwrite();
3473 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3474 	    pathseg, path, td);
3475 	if ((error = namei(&nd)) != 0)
3476 		return (error);
3477 	vfslocked = NDHASGIANT(&nd);
3478 	vp = nd.ni_vp;
3479 	if (vp->v_type != VDIR) {
3480 		error = ENOTDIR;
3481 		goto out;
3482 	}
3483 	/*
3484 	 * No rmdir "." please.
3485 	 */
3486 	if (nd.ni_dvp == vp) {
3487 		error = EINVAL;
3488 		goto out;
3489 	}
3490 	/*
3491 	 * The root of a mounted filesystem cannot be deleted.
3492 	 */
3493 	if (vp->v_vflag & VV_ROOT) {
3494 		error = EBUSY;
3495 		goto out;
3496 	}
3497 #ifdef MAC
3498 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3499 	    &nd.ni_cnd);
3500 	if (error)
3501 		goto out;
3502 #endif
3503 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3504 		NDFREE(&nd, NDF_ONLY_PNBUF);
3505 		vput(vp);
3506 		if (nd.ni_dvp == vp)
3507 			vrele(nd.ni_dvp);
3508 		else
3509 			vput(nd.ni_dvp);
3510 		VFS_UNLOCK_GIANT(vfslocked);
3511 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3512 			return (error);
3513 		goto restart;
3514 	}
3515 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3516 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3517 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3518 	vn_finished_write(mp);
3519 out:
3520 	NDFREE(&nd, NDF_ONLY_PNBUF);
3521 	vput(vp);
3522 	if (nd.ni_dvp == vp)
3523 		vrele(nd.ni_dvp);
3524 	else
3525 		vput(nd.ni_dvp);
3526 	VFS_UNLOCK_GIANT(vfslocked);
3527 	return (error);
3528 }
3529 
3530 #ifdef COMPAT_43
3531 /*
3532  * Read a block of directory entries in a filesystem independent format.
3533  */
3534 #ifndef _SYS_SYSPROTO_H_
3535 struct ogetdirentries_args {
3536 	int	fd;
3537 	char	*buf;
3538 	u_int	count;
3539 	long	*basep;
3540 };
3541 #endif
3542 int
3543 ogetdirentries(td, uap)
3544 	struct thread *td;
3545 	register struct ogetdirentries_args /* {
3546 		int fd;
3547 		char *buf;
3548 		u_int count;
3549 		long *basep;
3550 	} */ *uap;
3551 {
3552 	struct vnode *vp;
3553 	struct file *fp;
3554 	struct uio auio, kuio;
3555 	struct iovec aiov, kiov;
3556 	struct dirent *dp, *edp;
3557 	caddr_t dirbuf;
3558 	int error, eofflag, readcnt, vfslocked;
3559 	long loff;
3560 
3561 	/* XXX arbitrary sanity limit on `count'. */
3562 	if (uap->count > 64 * 1024)
3563 		return (EINVAL);
3564 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3565 		return (error);
3566 	if ((fp->f_flag & FREAD) == 0) {
3567 		fdrop(fp, td);
3568 		return (EBADF);
3569 	}
3570 	vp = fp->f_vnode;
3571 unionread:
3572 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3573 	if (vp->v_type != VDIR) {
3574 		VFS_UNLOCK_GIANT(vfslocked);
3575 		fdrop(fp, td);
3576 		return (EINVAL);
3577 	}
3578 	aiov.iov_base = uap->buf;
3579 	aiov.iov_len = uap->count;
3580 	auio.uio_iov = &aiov;
3581 	auio.uio_iovcnt = 1;
3582 	auio.uio_rw = UIO_READ;
3583 	auio.uio_segflg = UIO_USERSPACE;
3584 	auio.uio_td = td;
3585 	auio.uio_resid = uap->count;
3586 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3587 	loff = auio.uio_offset = fp->f_offset;
3588 #ifdef MAC
3589 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3590 	if (error) {
3591 		VOP_UNLOCK(vp, 0, td);
3592 		VFS_UNLOCK_GIANT(vfslocked);
3593 		fdrop(fp, td);
3594 		return (error);
3595 	}
3596 #endif
3597 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3598 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3599 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3600 			    NULL, NULL);
3601 			fp->f_offset = auio.uio_offset;
3602 		} else
3603 #	endif
3604 	{
3605 		kuio = auio;
3606 		kuio.uio_iov = &kiov;
3607 		kuio.uio_segflg = UIO_SYSSPACE;
3608 		kiov.iov_len = uap->count;
3609 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3610 		kiov.iov_base = dirbuf;
3611 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3612 			    NULL, NULL);
3613 		fp->f_offset = kuio.uio_offset;
3614 		if (error == 0) {
3615 			readcnt = uap->count - kuio.uio_resid;
3616 			edp = (struct dirent *)&dirbuf[readcnt];
3617 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3618 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3619 					/*
3620 					 * The expected low byte of
3621 					 * dp->d_namlen is our dp->d_type.
3622 					 * The high MBZ byte of dp->d_namlen
3623 					 * is our dp->d_namlen.
3624 					 */
3625 					dp->d_type = dp->d_namlen;
3626 					dp->d_namlen = 0;
3627 #				else
3628 					/*
3629 					 * The dp->d_type is the high byte
3630 					 * of the expected dp->d_namlen,
3631 					 * so must be zero'ed.
3632 					 */
3633 					dp->d_type = 0;
3634 #				endif
3635 				if (dp->d_reclen > 0) {
3636 					dp = (struct dirent *)
3637 					    ((char *)dp + dp->d_reclen);
3638 				} else {
3639 					error = EIO;
3640 					break;
3641 				}
3642 			}
3643 			if (dp >= edp)
3644 				error = uiomove(dirbuf, readcnt, &auio);
3645 		}
3646 		FREE(dirbuf, M_TEMP);
3647 	}
3648 	VOP_UNLOCK(vp, 0, td);
3649 	if (error) {
3650 		VFS_UNLOCK_GIANT(vfslocked);
3651 		fdrop(fp, td);
3652 		return (error);
3653 	}
3654 	if (uap->count == auio.uio_resid) {
3655 		if (union_dircheckp) {
3656 			error = union_dircheckp(td, &vp, fp);
3657 			if (error == -1) {
3658 				VFS_UNLOCK_GIANT(vfslocked);
3659 				goto unionread;
3660 			}
3661 			if (error) {
3662 				VFS_UNLOCK_GIANT(vfslocked);
3663 				fdrop(fp, td);
3664 				return (error);
3665 			}
3666 		}
3667 		/*
3668 		 * XXX We could delay dropping the lock above but
3669 		 * union_dircheckp complicates things.
3670 		 */
3671 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3672 		if ((vp->v_vflag & VV_ROOT) &&
3673 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3674 			struct vnode *tvp = vp;
3675 			vp = vp->v_mount->mnt_vnodecovered;
3676 			VREF(vp);
3677 			fp->f_vnode = vp;
3678 			fp->f_data = vp;
3679 			fp->f_offset = 0;
3680 			vput(tvp);
3681 			VFS_UNLOCK_GIANT(vfslocked);
3682 			goto unionread;
3683 		}
3684 		VOP_UNLOCK(vp, 0, td);
3685 	}
3686 	VFS_UNLOCK_GIANT(vfslocked);
3687 	error = copyout(&loff, uap->basep, sizeof(long));
3688 	fdrop(fp, td);
3689 	td->td_retval[0] = uap->count - auio.uio_resid;
3690 	return (error);
3691 }
3692 #endif /* COMPAT_43 */
3693 
3694 /*
3695  * Read a block of directory entries in a filesystem independent format.
3696  */
3697 #ifndef _SYS_SYSPROTO_H_
3698 struct getdirentries_args {
3699 	int	fd;
3700 	char	*buf;
3701 	u_int	count;
3702 	long	*basep;
3703 };
3704 #endif
3705 int
3706 getdirentries(td, uap)
3707 	struct thread *td;
3708 	register struct getdirentries_args /* {
3709 		int fd;
3710 		char *buf;
3711 		u_int count;
3712 		long *basep;
3713 	} */ *uap;
3714 {
3715 	struct vnode *vp;
3716 	struct file *fp;
3717 	struct uio auio;
3718 	struct iovec aiov;
3719 	int vfslocked;
3720 	long loff;
3721 	int error, eofflag;
3722 
3723 	AUDIT_ARG(fd, uap->fd);
3724 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3725 		return (error);
3726 	if ((fp->f_flag & FREAD) == 0) {
3727 		fdrop(fp, td);
3728 		return (EBADF);
3729 	}
3730 	vp = fp->f_vnode;
3731 unionread:
3732 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3733 	if (vp->v_type != VDIR) {
3734 		error = EINVAL;
3735 		goto fail;
3736 	}
3737 	aiov.iov_base = uap->buf;
3738 	aiov.iov_len = uap->count;
3739 	auio.uio_iov = &aiov;
3740 	auio.uio_iovcnt = 1;
3741 	auio.uio_rw = UIO_READ;
3742 	auio.uio_segflg = UIO_USERSPACE;
3743 	auio.uio_td = td;
3744 	auio.uio_resid = uap->count;
3745 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3746 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3747 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3748 	loff = auio.uio_offset = fp->f_offset;
3749 #ifdef MAC
3750 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3751 	if (error == 0)
3752 #endif
3753 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3754 		    NULL);
3755 	fp->f_offset = auio.uio_offset;
3756 	VOP_UNLOCK(vp, 0, td);
3757 	if (error)
3758 		goto fail;
3759 	if (uap->count == auio.uio_resid) {
3760 		if (union_dircheckp) {
3761 			error = union_dircheckp(td, &vp, fp);
3762 			if (error == -1) {
3763 				VFS_UNLOCK_GIANT(vfslocked);
3764 				goto unionread;
3765 			}
3766 			if (error)
3767 				goto fail;
3768 		}
3769 		/*
3770 		 * XXX We could delay dropping the lock above but
3771 		 * union_dircheckp complicates things.
3772 		 */
3773 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3774 		if ((vp->v_vflag & VV_ROOT) &&
3775 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3776 			struct vnode *tvp = vp;
3777 			vp = vp->v_mount->mnt_vnodecovered;
3778 			VREF(vp);
3779 			fp->f_vnode = vp;
3780 			fp->f_data = vp;
3781 			fp->f_offset = 0;
3782 			vput(tvp);
3783 			VFS_UNLOCK_GIANT(vfslocked);
3784 			goto unionread;
3785 		}
3786 		VOP_UNLOCK(vp, 0, td);
3787 	}
3788 	if (uap->basep != NULL) {
3789 		error = copyout(&loff, uap->basep, sizeof(long));
3790 	}
3791 	td->td_retval[0] = uap->count - auio.uio_resid;
3792 fail:
3793 	VFS_UNLOCK_GIANT(vfslocked);
3794 	fdrop(fp, td);
3795 	return (error);
3796 }
3797 #ifndef _SYS_SYSPROTO_H_
3798 struct getdents_args {
3799 	int fd;
3800 	char *buf;
3801 	size_t count;
3802 };
3803 #endif
3804 int
3805 getdents(td, uap)
3806 	struct thread *td;
3807 	register struct getdents_args /* {
3808 		int fd;
3809 		char *buf;
3810 		u_int count;
3811 	} */ *uap;
3812 {
3813 	struct getdirentries_args ap;
3814 	ap.fd = uap->fd;
3815 	ap.buf = uap->buf;
3816 	ap.count = uap->count;
3817 	ap.basep = NULL;
3818 	return (getdirentries(td, &ap));
3819 }
3820 
3821 /*
3822  * Set the mode mask for creation of filesystem nodes.
3823  *
3824  * MP SAFE
3825  */
3826 #ifndef _SYS_SYSPROTO_H_
3827 struct umask_args {
3828 	int	newmask;
3829 };
3830 #endif
3831 int
3832 umask(td, uap)
3833 	struct thread *td;
3834 	struct umask_args /* {
3835 		int newmask;
3836 	} */ *uap;
3837 {
3838 	register struct filedesc *fdp;
3839 
3840 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3841 	fdp = td->td_proc->p_fd;
3842 	td->td_retval[0] = fdp->fd_cmask;
3843 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3844 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3845 	return (0);
3846 }
3847 
3848 /*
3849  * Void all references to file by ripping underlying filesystem
3850  * away from vnode.
3851  */
3852 #ifndef _SYS_SYSPROTO_H_
3853 struct revoke_args {
3854 	char	*path;
3855 };
3856 #endif
3857 int
3858 revoke(td, uap)
3859 	struct thread *td;
3860 	register struct revoke_args /* {
3861 		char *path;
3862 	} */ *uap;
3863 {
3864 	struct vnode *vp;
3865 	struct vattr vattr;
3866 	int error;
3867 	struct nameidata nd;
3868 	int vfslocked;
3869 
3870 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3871 	    UIO_USERSPACE, uap->path, td);
3872 	if ((error = namei(&nd)) != 0)
3873 		return (error);
3874 	vfslocked = NDHASGIANT(&nd);
3875 	vp = nd.ni_vp;
3876 	NDFREE(&nd, NDF_ONLY_PNBUF);
3877 	if (vp->v_type != VCHR) {
3878 		error = EINVAL;
3879 		goto out;
3880 	}
3881 #ifdef MAC
3882 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3883 	if (error)
3884 		goto out;
3885 #endif
3886 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3887 	if (error)
3888 		goto out;
3889 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3890 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3891 		if (error)
3892 			goto out;
3893 	}
3894 	if (vcount(vp) > 1)
3895 		VOP_REVOKE(vp, REVOKEALL);
3896 out:
3897 	vput(vp);
3898 	VFS_UNLOCK_GIANT(vfslocked);
3899 	return (error);
3900 }
3901 
3902 /*
3903  * Convert a user file descriptor to a kernel file entry.
3904  * A reference on the file entry is held upon returning.
3905  */
3906 int
3907 getvnode(fdp, fd, fpp)
3908 	struct filedesc *fdp;
3909 	int fd;
3910 	struct file **fpp;
3911 {
3912 	int error;
3913 	struct file *fp;
3914 
3915 	fp = NULL;
3916 	if (fdp == NULL)
3917 		error = EBADF;
3918 	else {
3919 		FILEDESC_LOCK(fdp);
3920 		if ((u_int)fd >= fdp->fd_nfiles ||
3921 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3922 			error = EBADF;
3923 		else if (fp->f_vnode == NULL) {
3924 			fp = NULL;
3925 			error = EINVAL;
3926 		} else {
3927 			fhold(fp);
3928 			error = 0;
3929 		}
3930 		FILEDESC_UNLOCK(fdp);
3931 	}
3932 	*fpp = fp;
3933 	return (error);
3934 }
3935 
3936 /*
3937  * Get (NFS) file handle
3938  */
3939 #ifndef _SYS_SYSPROTO_H_
3940 struct lgetfh_args {
3941 	char	*fname;
3942 	fhandle_t *fhp;
3943 };
3944 #endif
3945 int
3946 lgetfh(td, uap)
3947 	struct thread *td;
3948 	register struct lgetfh_args *uap;
3949 {
3950 	struct nameidata nd;
3951 	fhandle_t fh;
3952 	register struct vnode *vp;
3953 	int vfslocked;
3954 	int error;
3955 
3956 	error = suser(td);
3957 	if (error)
3958 		return (error);
3959 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3960 	    UIO_USERSPACE, uap->fname, td);
3961 	error = namei(&nd);
3962 	if (error)
3963 		return (error);
3964 	vfslocked = NDHASGIANT(&nd);
3965 	NDFREE(&nd, NDF_ONLY_PNBUF);
3966 	vp = nd.ni_vp;
3967 	bzero(&fh, sizeof(fh));
3968 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3969 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3970 	vput(vp);
3971 	VFS_UNLOCK_GIANT(vfslocked);
3972 	if (error)
3973 		return (error);
3974 	error = copyout(&fh, uap->fhp, sizeof (fh));
3975 	return (error);
3976 }
3977 
3978 #ifndef _SYS_SYSPROTO_H_
3979 struct getfh_args {
3980 	char	*fname;
3981 	fhandle_t *fhp;
3982 };
3983 #endif
3984 int
3985 getfh(td, uap)
3986 	struct thread *td;
3987 	register struct getfh_args *uap;
3988 {
3989 	struct nameidata nd;
3990 	fhandle_t fh;
3991 	register struct vnode *vp;
3992 	int vfslocked;
3993 	int error;
3994 
3995 	error = suser(td);
3996 	if (error)
3997 		return (error);
3998 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3999 	    UIO_USERSPACE, uap->fname, td);
4000 	error = namei(&nd);
4001 	if (error)
4002 		return (error);
4003 	vfslocked = NDHASGIANT(&nd);
4004 	NDFREE(&nd, NDF_ONLY_PNBUF);
4005 	vp = nd.ni_vp;
4006 	bzero(&fh, sizeof(fh));
4007 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4008 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4009 	vput(vp);
4010 	VFS_UNLOCK_GIANT(vfslocked);
4011 	if (error)
4012 		return (error);
4013 	error = copyout(&fh, uap->fhp, sizeof (fh));
4014 	return (error);
4015 }
4016 
4017 /*
4018  * syscall for the rpc.lockd to use to translate a NFS file handle into
4019  * an open descriptor.
4020  *
4021  * warning: do not remove the suser() call or this becomes one giant
4022  * security hole.
4023  *
4024  * MP SAFE
4025  */
4026 #ifndef _SYS_SYSPROTO_H_
4027 struct fhopen_args {
4028 	const struct fhandle *u_fhp;
4029 	int flags;
4030 };
4031 #endif
4032 int
4033 fhopen(td, uap)
4034 	struct thread *td;
4035 	struct fhopen_args /* {
4036 		const struct fhandle *u_fhp;
4037 		int flags;
4038 	} */ *uap;
4039 {
4040 	struct proc *p = td->td_proc;
4041 	struct mount *mp;
4042 	struct vnode *vp;
4043 	struct fhandle fhp;
4044 	struct vattr vat;
4045 	struct vattr *vap = &vat;
4046 	struct flock lf;
4047 	struct file *fp;
4048 	register struct filedesc *fdp = p->p_fd;
4049 	int fmode, mode, error, type;
4050 	struct file *nfp;
4051 	int vfslocked;
4052 	int indx;
4053 
4054 	error = suser(td);
4055 	if (error)
4056 		return (error);
4057 	fmode = FFLAGS(uap->flags);
4058 	/* why not allow a non-read/write open for our lockd? */
4059 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4060 		return (EINVAL);
4061 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4062 	if (error)
4063 		return(error);
4064 	/* find the mount point */
4065 	mp = vfs_getvfs(&fhp.fh_fsid);
4066 	if (mp == NULL)
4067 		return (ESTALE);
4068 	vfslocked = VFS_LOCK_GIANT(mp);
4069 	/* now give me my vnode, it gets returned to me locked */
4070 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4071 	if (error)
4072 		goto out;
4073 	/*
4074 	 * from now on we have to make sure not
4075 	 * to forget about the vnode
4076 	 * any error that causes an abort must vput(vp)
4077 	 * just set error = err and 'goto bad;'.
4078 	 */
4079 
4080 	/*
4081 	 * from vn_open
4082 	 */
4083 	if (vp->v_type == VLNK) {
4084 		error = EMLINK;
4085 		goto bad;
4086 	}
4087 	if (vp->v_type == VSOCK) {
4088 		error = EOPNOTSUPP;
4089 		goto bad;
4090 	}
4091 	mode = 0;
4092 	if (fmode & (FWRITE | O_TRUNC)) {
4093 		if (vp->v_type == VDIR) {
4094 			error = EISDIR;
4095 			goto bad;
4096 		}
4097 		error = vn_writechk(vp);
4098 		if (error)
4099 			goto bad;
4100 		mode |= VWRITE;
4101 	}
4102 	if (fmode & FREAD)
4103 		mode |= VREAD;
4104 	if (fmode & O_APPEND)
4105 		mode |= VAPPEND;
4106 #ifdef MAC
4107 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4108 	if (error)
4109 		goto bad;
4110 #endif
4111 	if (mode) {
4112 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4113 		if (error)
4114 			goto bad;
4115 	}
4116 	if (fmode & O_TRUNC) {
4117 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4118 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4119 			vrele(vp);
4120 			goto out;
4121 		}
4122 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4123 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4124 #ifdef MAC
4125 		/*
4126 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4127 		 * should be right.
4128 		 */
4129 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4130 		if (error == 0) {
4131 #endif
4132 			VATTR_NULL(vap);
4133 			vap->va_size = 0;
4134 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4135 #ifdef MAC
4136 		}
4137 #endif
4138 		vn_finished_write(mp);
4139 		if (error)
4140 			goto bad;
4141 	}
4142 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4143 	if (error)
4144 		goto bad;
4145 
4146 	if (fmode & FWRITE)
4147 		vp->v_writecount++;
4148 
4149 	/*
4150 	 * end of vn_open code
4151 	 */
4152 
4153 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4154 		if (fmode & FWRITE)
4155 			vp->v_writecount--;
4156 		goto bad;
4157 	}
4158 	/* An extra reference on `nfp' has been held for us by falloc(). */
4159 	fp = nfp;
4160 
4161 	nfp->f_vnode = vp;
4162 	nfp->f_data = vp;
4163 	nfp->f_flag = fmode & FMASK;
4164 	nfp->f_ops = &vnops;
4165 	nfp->f_type = DTYPE_VNODE;
4166 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4167 		lf.l_whence = SEEK_SET;
4168 		lf.l_start = 0;
4169 		lf.l_len = 0;
4170 		if (fmode & O_EXLOCK)
4171 			lf.l_type = F_WRLCK;
4172 		else
4173 			lf.l_type = F_RDLCK;
4174 		type = F_FLOCK;
4175 		if ((fmode & FNONBLOCK) == 0)
4176 			type |= F_WAIT;
4177 		VOP_UNLOCK(vp, 0, td);
4178 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4179 			    type)) != 0) {
4180 			/*
4181 			 * The lock request failed.  Normally close the
4182 			 * descriptor but handle the case where someone might
4183 			 * have dup()d or close()d it when we weren't looking.
4184 			 */
4185 			fdclose(fdp, fp, indx, td);
4186 
4187 			/*
4188 			 * release our private reference
4189 			 */
4190 			fdrop(fp, td);
4191 			goto out;
4192 		}
4193 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4194 		fp->f_flag |= FHASLOCK;
4195 	}
4196 
4197 	VOP_UNLOCK(vp, 0, td);
4198 	fdrop(fp, td);
4199 	vfs_rel(mp);
4200 	VFS_UNLOCK_GIANT(vfslocked);
4201 	td->td_retval[0] = indx;
4202 	return (0);
4203 
4204 bad:
4205 	vput(vp);
4206 out:
4207 	vfs_rel(mp);
4208 	VFS_UNLOCK_GIANT(vfslocked);
4209 	return (error);
4210 }
4211 
4212 /*
4213  * Stat an (NFS) file handle.
4214  *
4215  * MP SAFE
4216  */
4217 #ifndef _SYS_SYSPROTO_H_
4218 struct fhstat_args {
4219 	struct fhandle *u_fhp;
4220 	struct stat *sb;
4221 };
4222 #endif
4223 int
4224 fhstat(td, uap)
4225 	struct thread *td;
4226 	register struct fhstat_args /* {
4227 		struct fhandle *u_fhp;
4228 		struct stat *sb;
4229 	} */ *uap;
4230 {
4231 	struct stat sb;
4232 	fhandle_t fh;
4233 	struct mount *mp;
4234 	struct vnode *vp;
4235 	int vfslocked;
4236 	int error;
4237 
4238 	error = suser(td);
4239 	if (error)
4240 		return (error);
4241 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4242 	if (error)
4243 		return (error);
4244 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4245 		return (ESTALE);
4246 	vfslocked = VFS_LOCK_GIANT(mp);
4247 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4248 		vfs_rel(mp);
4249 		VFS_UNLOCK_GIANT(vfslocked);
4250 		return (error);
4251 	}
4252 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4253 	vput(vp);
4254 	vfs_rel(mp);
4255 	VFS_UNLOCK_GIANT(vfslocked);
4256 	if (error)
4257 		return (error);
4258 	error = copyout(&sb, uap->sb, sizeof(sb));
4259 	return (error);
4260 }
4261 
4262 /*
4263  * Implement fstatfs() for (NFS) file handles.
4264  *
4265  * MP SAFE
4266  */
4267 #ifndef _SYS_SYSPROTO_H_
4268 struct fhstatfs_args {
4269 	struct fhandle *u_fhp;
4270 	struct statfs *buf;
4271 };
4272 #endif
4273 int
4274 fhstatfs(td, uap)
4275 	struct thread *td;
4276 	struct fhstatfs_args /* {
4277 		struct fhandle *u_fhp;
4278 		struct statfs *buf;
4279 	} */ *uap;
4280 {
4281 	struct statfs sf;
4282 	fhandle_t fh;
4283 	int error;
4284 
4285 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4286 	if (error)
4287 		return (error);
4288 	error = kern_fhstatfs(td, fh, &sf);
4289 	if (error)
4290 		return (error);
4291 	return (copyout(&sf, uap->buf, sizeof(sf)));
4292 }
4293 
4294 int
4295 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4296 {
4297 	struct statfs *sp;
4298 	struct mount *mp;
4299 	struct vnode *vp;
4300 	int vfslocked;
4301 	int error;
4302 
4303 	error = suser(td);
4304 	if (error)
4305 		return (error);
4306 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4307 		return (ESTALE);
4308 	vfslocked = VFS_LOCK_GIANT(mp);
4309 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4310 	if (error) {
4311 		VFS_UNLOCK_GIANT(vfslocked);
4312 		vfs_rel(mp);
4313 		return (error);
4314 	}
4315 	vput(vp);
4316 	sp = NULL;
4317 	error = prison_canseemount(td->td_ucred, mp);
4318 	if (error)
4319 		goto out;
4320 #ifdef MAC
4321 	error = mac_check_mount_stat(td->td_ucred, mp);
4322 	if (error)
4323 		goto out;
4324 #endif
4325 	/*
4326 	 * Set these in case the underlying filesystem fails to do so.
4327 	 */
4328 	sp = &mp->mnt_stat;
4329 	sp->f_version = STATFS_VERSION;
4330 	sp->f_namemax = NAME_MAX;
4331 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4332 	error = VFS_STATFS(mp, sp, td);
4333 out:
4334 	vfs_rel(mp);
4335 	VFS_UNLOCK_GIANT(vfslocked);
4336 	if (sp)
4337 		*buf = *sp;
4338 	return (error);
4339 }
4340 
4341 /*
4342  * Syscall to push extended attribute configuration information into the
4343  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4344  * a command (int cmd), and attribute name and misc data.  For now, the
4345  * attribute name is left in userspace for consumption by the VFS_op.
4346  * It will probably be changed to be copied into sysspace by the
4347  * syscall in the future, once issues with various consumers of the
4348  * attribute code have raised their hands.
4349  *
4350  * Currently this is used only by UFS Extended Attributes.
4351  */
4352 int
4353 extattrctl(td, uap)
4354 	struct thread *td;
4355 	struct extattrctl_args /* {
4356 		const char *path;
4357 		int cmd;
4358 		const char *filename;
4359 		int attrnamespace;
4360 		const char *attrname;
4361 	} */ *uap;
4362 {
4363 	struct vnode *filename_vp;
4364 	struct nameidata nd;
4365 	struct mount *mp, *mp_writable;
4366 	char attrname[EXTATTR_MAXNAMELEN];
4367 	int vfslocked, fnvfslocked, error;
4368 
4369 	AUDIT_ARG(cmd, uap->cmd);
4370 	AUDIT_ARG(value, uap->attrnamespace);
4371 	/*
4372 	 * uap->attrname is not always defined.  We check again later when we
4373 	 * invoke the VFS call so as to pass in NULL there if needed.
4374 	 */
4375 	if (uap->attrname != NULL) {
4376 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4377 		    NULL);
4378 		if (error)
4379 			return (error);
4380 	}
4381 	AUDIT_ARG(text, attrname);
4382 
4383 	vfslocked = fnvfslocked = 0;
4384 	/*
4385 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4386 	 * which VFS_EXTATTRCTL() will later release.
4387 	 */
4388 	filename_vp = NULL;
4389 	if (uap->filename != NULL) {
4390 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4391 		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4392 		error = namei(&nd);
4393 		if (error)
4394 			return (error);
4395 		fnvfslocked = NDHASGIANT(&nd);
4396 		filename_vp = nd.ni_vp;
4397 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4398 	}
4399 
4400 	/* uap->path is always defined. */
4401 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4402 	    uap->path, td);
4403 	error = namei(&nd);
4404 	if (error) {
4405 		if (filename_vp != NULL)
4406 			vput(filename_vp);
4407 		goto out;
4408 	}
4409 	vfslocked = NDHASGIANT(&nd);
4410 	mp = nd.ni_vp->v_mount;
4411 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4412 	NDFREE(&nd, 0);
4413 	if (error) {
4414 		if (filename_vp != NULL)
4415 			vput(filename_vp);
4416 		goto out;
4417 	}
4418 
4419 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4420 	    uap->attrname != NULL ? attrname : NULL, td);
4421 
4422 	vn_finished_write(mp_writable);
4423 	/*
4424 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4425 	 * filename_vp, so vrele it if it is defined.
4426 	 */
4427 	if (filename_vp != NULL)
4428 		vrele(filename_vp);
4429 out:
4430 	VFS_UNLOCK_GIANT(fnvfslocked);
4431 	VFS_UNLOCK_GIANT(vfslocked);
4432 	return (error);
4433 }
4434 
4435 /*-
4436  * Set a named extended attribute on a file or directory
4437  *
4438  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4439  *            kernelspace string pointer "attrname", userspace buffer
4440  *            pointer "data", buffer length "nbytes", thread "td".
4441  * Returns: 0 on success, an error number otherwise
4442  * Locks: none
4443  * References: vp must be a valid reference for the duration of the call
4444  */
4445 static int
4446 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4447     void *data, size_t nbytes, struct thread *td)
4448 {
4449 	struct mount *mp;
4450 	struct uio auio;
4451 	struct iovec aiov;
4452 	ssize_t cnt;
4453 	int error;
4454 
4455 	VFS_ASSERT_GIANT(vp->v_mount);
4456 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4457 	if (error)
4458 		return (error);
4459 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4460 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4461 
4462 	aiov.iov_base = data;
4463 	aiov.iov_len = nbytes;
4464 	auio.uio_iov = &aiov;
4465 	auio.uio_iovcnt = 1;
4466 	auio.uio_offset = 0;
4467 	if (nbytes > INT_MAX) {
4468 		error = EINVAL;
4469 		goto done;
4470 	}
4471 	auio.uio_resid = nbytes;
4472 	auio.uio_rw = UIO_WRITE;
4473 	auio.uio_segflg = UIO_USERSPACE;
4474 	auio.uio_td = td;
4475 	cnt = nbytes;
4476 
4477 #ifdef MAC
4478 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4479 	    attrname, &auio);
4480 	if (error)
4481 		goto done;
4482 #endif
4483 
4484 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4485 	    td->td_ucred, td);
4486 	cnt -= auio.uio_resid;
4487 	td->td_retval[0] = cnt;
4488 
4489 done:
4490 	VOP_UNLOCK(vp, 0, td);
4491 	vn_finished_write(mp);
4492 	return (error);
4493 }
4494 
4495 int
4496 extattr_set_fd(td, uap)
4497 	struct thread *td;
4498 	struct extattr_set_fd_args /* {
4499 		int fd;
4500 		int attrnamespace;
4501 		const char *attrname;
4502 		void *data;
4503 		size_t nbytes;
4504 	} */ *uap;
4505 {
4506 	struct file *fp;
4507 	char attrname[EXTATTR_MAXNAMELEN];
4508 	int vfslocked, error;
4509 
4510 	AUDIT_ARG(fd, uap->fd);
4511 	AUDIT_ARG(value, uap->attrnamespace);
4512 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4513 	if (error)
4514 		return (error);
4515 	AUDIT_ARG(text, attrname);
4516 
4517 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4518 	if (error)
4519 		return (error);
4520 
4521 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4522 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4523 	    attrname, uap->data, uap->nbytes, td);
4524 	fdrop(fp, td);
4525 	VFS_UNLOCK_GIANT(vfslocked);
4526 
4527 	return (error);
4528 }
4529 
4530 int
4531 extattr_set_file(td, uap)
4532 	struct thread *td;
4533 	struct extattr_set_file_args /* {
4534 		const char *path;
4535 		int attrnamespace;
4536 		const char *attrname;
4537 		void *data;
4538 		size_t nbytes;
4539 	} */ *uap;
4540 {
4541 	struct nameidata nd;
4542 	char attrname[EXTATTR_MAXNAMELEN];
4543 	int vfslocked, error;
4544 
4545 	AUDIT_ARG(value, uap->attrnamespace);
4546 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4547 	if (error)
4548 		return (error);
4549 	AUDIT_ARG(text, attrname);
4550 
4551 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4552 	    uap->path, td);
4553 	error = namei(&nd);
4554 	if (error)
4555 		return (error);
4556 	NDFREE(&nd, NDF_ONLY_PNBUF);
4557 
4558 	vfslocked = NDHASGIANT(&nd);
4559 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4560 	    uap->data, uap->nbytes, td);
4561 
4562 	vrele(nd.ni_vp);
4563 	VFS_UNLOCK_GIANT(vfslocked);
4564 	return (error);
4565 }
4566 
4567 int
4568 extattr_set_link(td, uap)
4569 	struct thread *td;
4570 	struct extattr_set_link_args /* {
4571 		const char *path;
4572 		int attrnamespace;
4573 		const char *attrname;
4574 		void *data;
4575 		size_t nbytes;
4576 	} */ *uap;
4577 {
4578 	struct nameidata nd;
4579 	char attrname[EXTATTR_MAXNAMELEN];
4580 	int vfslocked, error;
4581 
4582 	AUDIT_ARG(value, uap->attrnamespace);
4583 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4584 	if (error)
4585 		return (error);
4586 	AUDIT_ARG(text, attrname);
4587 
4588 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4589 	    uap->path, td);
4590 	error = namei(&nd);
4591 	if (error)
4592 		return (error);
4593 	NDFREE(&nd, NDF_ONLY_PNBUF);
4594 
4595 	vfslocked = NDHASGIANT(&nd);
4596 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4597 	    uap->data, uap->nbytes, td);
4598 
4599 	vrele(nd.ni_vp);
4600 	VFS_UNLOCK_GIANT(vfslocked);
4601 	return (error);
4602 }
4603 
4604 /*-
4605  * Get a named extended attribute on a file or directory
4606  *
4607  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4608  *            kernelspace string pointer "attrname", userspace buffer
4609  *            pointer "data", buffer length "nbytes", thread "td".
4610  * Returns: 0 on success, an error number otherwise
4611  * Locks: none
4612  * References: vp must be a valid reference for the duration of the call
4613  */
4614 static int
4615 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4616     void *data, size_t nbytes, struct thread *td)
4617 {
4618 	struct uio auio, *auiop;
4619 	struct iovec aiov;
4620 	ssize_t cnt;
4621 	size_t size, *sizep;
4622 	int error;
4623 
4624 	VFS_ASSERT_GIANT(vp->v_mount);
4625 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4626 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4627 
4628 	/*
4629 	 * Slightly unusual semantics: if the user provides a NULL data
4630 	 * pointer, they don't want to receive the data, just the
4631 	 * maximum read length.
4632 	 */
4633 	auiop = NULL;
4634 	sizep = NULL;
4635 	cnt = 0;
4636 	if (data != NULL) {
4637 		aiov.iov_base = data;
4638 		aiov.iov_len = nbytes;
4639 		auio.uio_iov = &aiov;
4640 		auio.uio_iovcnt = 1;
4641 		auio.uio_offset = 0;
4642 		if (nbytes > INT_MAX) {
4643 			error = EINVAL;
4644 			goto done;
4645 		}
4646 		auio.uio_resid = nbytes;
4647 		auio.uio_rw = UIO_READ;
4648 		auio.uio_segflg = UIO_USERSPACE;
4649 		auio.uio_td = td;
4650 		auiop = &auio;
4651 		cnt = nbytes;
4652 	} else
4653 		sizep = &size;
4654 
4655 #ifdef MAC
4656 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4657 	    attrname, &auio);
4658 	if (error)
4659 		goto done;
4660 #endif
4661 
4662 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4663 	    td->td_ucred, td);
4664 
4665 	if (auiop != NULL) {
4666 		cnt -= auio.uio_resid;
4667 		td->td_retval[0] = cnt;
4668 	} else
4669 		td->td_retval[0] = size;
4670 
4671 done:
4672 	VOP_UNLOCK(vp, 0, td);
4673 	return (error);
4674 }
4675 
4676 int
4677 extattr_get_fd(td, uap)
4678 	struct thread *td;
4679 	struct extattr_get_fd_args /* {
4680 		int fd;
4681 		int attrnamespace;
4682 		const char *attrname;
4683 		void *data;
4684 		size_t nbytes;
4685 	} */ *uap;
4686 {
4687 	struct file *fp;
4688 	char attrname[EXTATTR_MAXNAMELEN];
4689 	int vfslocked, error;
4690 
4691 	AUDIT_ARG(fd, uap->fd);
4692 	AUDIT_ARG(value, uap->attrnamespace);
4693 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4694 	if (error)
4695 		return (error);
4696 	AUDIT_ARG(text, attrname);
4697 
4698 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4699 	if (error)
4700 		return (error);
4701 
4702 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4703 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4704 	    attrname, uap->data, uap->nbytes, td);
4705 
4706 	fdrop(fp, td);
4707 	VFS_UNLOCK_GIANT(vfslocked);
4708 	return (error);
4709 }
4710 
4711 int
4712 extattr_get_file(td, uap)
4713 	struct thread *td;
4714 	struct extattr_get_file_args /* {
4715 		const char *path;
4716 		int attrnamespace;
4717 		const char *attrname;
4718 		void *data;
4719 		size_t nbytes;
4720 	} */ *uap;
4721 {
4722 	struct nameidata nd;
4723 	char attrname[EXTATTR_MAXNAMELEN];
4724 	int vfslocked, error;
4725 
4726 	AUDIT_ARG(value, uap->attrnamespace);
4727 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4728 	if (error)
4729 		return (error);
4730 	AUDIT_ARG(text, attrname);
4731 
4732 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4733 	    uap->path, td);
4734 	error = namei(&nd);
4735 	if (error)
4736 		return (error);
4737 	NDFREE(&nd, NDF_ONLY_PNBUF);
4738 
4739 	vfslocked = NDHASGIANT(&nd);
4740 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4741 	    uap->data, uap->nbytes, td);
4742 
4743 	vrele(nd.ni_vp);
4744 	VFS_UNLOCK_GIANT(vfslocked);
4745 	return (error);
4746 }
4747 
4748 int
4749 extattr_get_link(td, uap)
4750 	struct thread *td;
4751 	struct extattr_get_link_args /* {
4752 		const char *path;
4753 		int attrnamespace;
4754 		const char *attrname;
4755 		void *data;
4756 		size_t nbytes;
4757 	} */ *uap;
4758 {
4759 	struct nameidata nd;
4760 	char attrname[EXTATTR_MAXNAMELEN];
4761 	int vfslocked, error;
4762 
4763 	AUDIT_ARG(value, uap->attrnamespace);
4764 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4765 	if (error)
4766 		return (error);
4767 	AUDIT_ARG(text, attrname);
4768 
4769 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4770 	    uap->path, td);
4771 	error = namei(&nd);
4772 	if (error)
4773 		return (error);
4774 	NDFREE(&nd, NDF_ONLY_PNBUF);
4775 
4776 	vfslocked = NDHASGIANT(&nd);
4777 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4778 	    uap->data, uap->nbytes, td);
4779 
4780 	vrele(nd.ni_vp);
4781 	VFS_UNLOCK_GIANT(vfslocked);
4782 	return (error);
4783 }
4784 
4785 /*
4786  * extattr_delete_vp(): Delete a named extended attribute on a file or
4787  *                      directory
4788  *
4789  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4790  *            kernelspace string pointer "attrname", proc "p"
4791  * Returns: 0 on success, an error number otherwise
4792  * Locks: none
4793  * References: vp must be a valid reference for the duration of the call
4794  */
4795 static int
4796 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4797     struct thread *td)
4798 {
4799 	struct mount *mp;
4800 	int error;
4801 
4802 	VFS_ASSERT_GIANT(vp->v_mount);
4803 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4804 	if (error)
4805 		return (error);
4806 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4807 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4808 
4809 #ifdef MAC
4810 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4811 	    attrname);
4812 	if (error)
4813 		goto done;
4814 #endif
4815 
4816 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4817 	    td);
4818 	if (error == EOPNOTSUPP)
4819 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4820 		    td->td_ucred, td);
4821 #ifdef MAC
4822 done:
4823 #endif
4824 	VOP_UNLOCK(vp, 0, td);
4825 	vn_finished_write(mp);
4826 	return (error);
4827 }
4828 
4829 int
4830 extattr_delete_fd(td, uap)
4831 	struct thread *td;
4832 	struct extattr_delete_fd_args /* {
4833 		int fd;
4834 		int attrnamespace;
4835 		const char *attrname;
4836 	} */ *uap;
4837 {
4838 	struct file *fp;
4839 	char attrname[EXTATTR_MAXNAMELEN];
4840 	int vfslocked, error;
4841 
4842 	AUDIT_ARG(fd, uap->fd);
4843 	AUDIT_ARG(value, uap->attrnamespace);
4844 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4845 	if (error)
4846 		return (error);
4847 	AUDIT_ARG(text, attrname);
4848 
4849 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4850 	if (error)
4851 		return (error);
4852 
4853 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4854 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4855 	    attrname, td);
4856 	fdrop(fp, td);
4857 	VFS_UNLOCK_GIANT(vfslocked);
4858 	return (error);
4859 }
4860 
4861 int
4862 extattr_delete_file(td, uap)
4863 	struct thread *td;
4864 	struct extattr_delete_file_args /* {
4865 		const char *path;
4866 		int attrnamespace;
4867 		const char *attrname;
4868 	} */ *uap;
4869 {
4870 	struct nameidata nd;
4871 	char attrname[EXTATTR_MAXNAMELEN];
4872 	int vfslocked, error;
4873 
4874 	AUDIT_ARG(value, uap->attrnamespace);
4875 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4876 	if (error)
4877 		return(error);
4878 	AUDIT_ARG(text, attrname);
4879 
4880 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4881 	    uap->path, td);
4882 	error = namei(&nd);
4883 	if (error)
4884 		return(error);
4885 	NDFREE(&nd, NDF_ONLY_PNBUF);
4886 
4887 	vfslocked = NDHASGIANT(&nd);
4888 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4889 	vrele(nd.ni_vp);
4890 	VFS_UNLOCK_GIANT(vfslocked);
4891 	return(error);
4892 }
4893 
4894 int
4895 extattr_delete_link(td, uap)
4896 	struct thread *td;
4897 	struct extattr_delete_link_args /* {
4898 		const char *path;
4899 		int attrnamespace;
4900 		const char *attrname;
4901 	} */ *uap;
4902 {
4903 	struct nameidata nd;
4904 	char attrname[EXTATTR_MAXNAMELEN];
4905 	int vfslocked, error;
4906 
4907 	AUDIT_ARG(value, uap->attrnamespace);
4908 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4909 	if (error)
4910 		return(error);
4911 	AUDIT_ARG(text, attrname);
4912 
4913 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4914 	    uap->path, td);
4915 	error = namei(&nd);
4916 	if (error)
4917 		return(error);
4918 	NDFREE(&nd, NDF_ONLY_PNBUF);
4919 
4920 	vfslocked = NDHASGIANT(&nd);
4921 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4922 	vrele(nd.ni_vp);
4923 	VFS_UNLOCK_GIANT(vfslocked);
4924 	return(error);
4925 }
4926 
4927 /*-
4928  * Retrieve a list of extended attributes on a file or directory.
4929  *
4930  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4931  *            userspace buffer pointer "data", buffer length "nbytes",
4932  *            thread "td".
4933  * Returns: 0 on success, an error number otherwise
4934  * Locks: none
4935  * References: vp must be a valid reference for the duration of the call
4936  */
4937 static int
4938 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4939     size_t nbytes, struct thread *td)
4940 {
4941 	struct uio auio, *auiop;
4942 	size_t size, *sizep;
4943 	struct iovec aiov;
4944 	ssize_t cnt;
4945 	int error;
4946 
4947 	VFS_ASSERT_GIANT(vp->v_mount);
4948 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4949 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4950 
4951 	auiop = NULL;
4952 	sizep = NULL;
4953 	cnt = 0;
4954 	if (data != NULL) {
4955 		aiov.iov_base = data;
4956 		aiov.iov_len = nbytes;
4957 		auio.uio_iov = &aiov;
4958 		auio.uio_iovcnt = 1;
4959 		auio.uio_offset = 0;
4960 		if (nbytes > INT_MAX) {
4961 			error = EINVAL;
4962 			goto done;
4963 		}
4964 		auio.uio_resid = nbytes;
4965 		auio.uio_rw = UIO_READ;
4966 		auio.uio_segflg = UIO_USERSPACE;
4967 		auio.uio_td = td;
4968 		auiop = &auio;
4969 		cnt = nbytes;
4970 	} else
4971 		sizep = &size;
4972 
4973 #ifdef MAC
4974 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4975 	if (error)
4976 		goto done;
4977 #endif
4978 
4979 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4980 	    td->td_ucred, td);
4981 
4982 	if (auiop != NULL) {
4983 		cnt -= auio.uio_resid;
4984 		td->td_retval[0] = cnt;
4985 	} else
4986 		td->td_retval[0] = size;
4987 
4988 done:
4989 	VOP_UNLOCK(vp, 0, td);
4990 	return (error);
4991 }
4992 
4993 
4994 int
4995 extattr_list_fd(td, uap)
4996 	struct thread *td;
4997 	struct extattr_list_fd_args /* {
4998 		int fd;
4999 		int attrnamespace;
5000 		void *data;
5001 		size_t nbytes;
5002 	} */ *uap;
5003 {
5004 	struct file *fp;
5005 	int vfslocked, error;
5006 
5007 	AUDIT_ARG(fd, uap->fd);
5008 	AUDIT_ARG(value, uap->attrnamespace);
5009 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
5010 	if (error)
5011 		return (error);
5012 
5013 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
5014 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
5015 	    uap->nbytes, td);
5016 
5017 	fdrop(fp, td);
5018 	VFS_UNLOCK_GIANT(vfslocked);
5019 	return (error);
5020 }
5021 
5022 int
5023 extattr_list_file(td, uap)
5024 	struct thread*td;
5025 	struct extattr_list_file_args /* {
5026 		const char *path;
5027 		int attrnamespace;
5028 		void *data;
5029 		size_t nbytes;
5030 	} */ *uap;
5031 {
5032 	struct nameidata nd;
5033 	int vfslocked, error;
5034 
5035 	AUDIT_ARG(value, uap->attrnamespace);
5036 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5037 	    uap->path, td);
5038 	error = namei(&nd);
5039 	if (error)
5040 		return (error);
5041 	NDFREE(&nd, NDF_ONLY_PNBUF);
5042 
5043 	vfslocked = NDHASGIANT(&nd);
5044 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5045 	    uap->nbytes, td);
5046 
5047 	vrele(nd.ni_vp);
5048 	VFS_UNLOCK_GIANT(vfslocked);
5049 	return (error);
5050 }
5051 
5052 int
5053 extattr_list_link(td, uap)
5054 	struct thread*td;
5055 	struct extattr_list_link_args /* {
5056 		const char *path;
5057 		int attrnamespace;
5058 		void *data;
5059 		size_t nbytes;
5060 	} */ *uap;
5061 {
5062 	struct nameidata nd;
5063 	int vfslocked, error;
5064 
5065 	AUDIT_ARG(value, uap->attrnamespace);
5066 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5067 	    uap->path, td);
5068 	error = namei(&nd);
5069 	if (error)
5070 		return (error);
5071 	NDFREE(&nd, NDF_ONLY_PNBUF);
5072 
5073 	vfslocked = NDHASGIANT(&nd);
5074 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5075 	    uap->nbytes, td);
5076 
5077 	vrele(nd.ni_vp);
5078 	VFS_UNLOCK_GIANT(vfslocked);
5079 	return (error);
5080 }
5081