xref: /freebsd/sys/kern/vfs_extattr.c (revision d2387d42b8da231a5b95cbc313825fb2aadf26f6)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 /* For 4.3 integer FS ID compatibility */
45 #include "opt_compat.h"
46 #include "opt_mac.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/sysent.h>
53 #include <sys/mac.h>
54 #include <sys/malloc.h>
55 #include <sys/mount.h>
56 #include <sys/mutex.h>
57 #include <sys/sysproto.h>
58 #include <sys/namei.h>
59 #include <sys/filedesc.h>
60 #include <sys/kernel.h>
61 #include <sys/fcntl.h>
62 #include <sys/file.h>
63 #include <sys/limits.h>
64 #include <sys/linker.h>
65 #include <sys/stat.h>
66 #include <sys/sx.h>
67 #include <sys/unistd.h>
68 #include <sys/vnode.h>
69 #include <sys/proc.h>
70 #include <sys/dirent.h>
71 #include <sys/extattr.h>
72 #include <sys/jail.h>
73 #include <sys/syscallsubr.h>
74 #include <sys/sysctl.h>
75 
76 #include <machine/stdarg.h>
77 
78 #include <vm/vm.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_page.h>
81 #include <vm/uma.h>
82 
83 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
84 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
85 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
86 static int setfmode(struct thread *td, struct vnode *, int);
87 static int setfflags(struct thread *td, struct vnode *, int);
88 static int setutimes(struct thread *td, struct vnode *,
89     const struct timespec *, int, int);
90 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
91     struct thread *td);
92 
93 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
94     size_t nbytes, struct thread *td);
95 
96 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
97 int (*softdep_fsync_hook)(struct vnode *);
98 
99 /*
100  * The module initialization routine for POSIX asynchronous I/O will
101  * set this to the version of AIO that it implements.  (Zero means
102  * that it is not implemented.)  This value is used here by pathconf()
103  * and in kern_descrip.c by fpathconf().
104  */
105 int async_io_version;
106 
107 /*
108  * Sync each mounted filesystem.
109  */
110 #ifndef _SYS_SYSPROTO_H_
111 struct sync_args {
112 	int     dummy;
113 };
114 #endif
115 
116 #ifdef DEBUG
117 static int syncprt = 0;
118 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
119 #endif
120 
121 /* ARGSUSED */
122 int
123 sync(td, uap)
124 	struct thread *td;
125 	struct sync_args *uap;
126 {
127 	struct mount *mp, *nmp;
128 	int asyncflag;
129 
130 	mtx_lock(&mountlist_mtx);
131 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
132 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
133 			nmp = TAILQ_NEXT(mp, mnt_list);
134 			continue;
135 		}
136 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
137 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
138 			asyncflag = mp->mnt_flag & MNT_ASYNC;
139 			mp->mnt_flag &= ~MNT_ASYNC;
140 			vfs_msync(mp, MNT_NOWAIT);
141 			VFS_SYNC(mp, MNT_NOWAIT,
142 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
143 			mp->mnt_flag |= asyncflag;
144 			vn_finished_write(mp);
145 		}
146 		mtx_lock(&mountlist_mtx);
147 		nmp = TAILQ_NEXT(mp, mnt_list);
148 		vfs_unbusy(mp, td);
149 	}
150 	mtx_unlock(&mountlist_mtx);
151 #if 0
152 /*
153  * XXX don't call vfs_bufstats() yet because that routine
154  * was not imported in the Lite2 merge.
155  */
156 #ifdef DIAGNOSTIC
157 	if (syncprt)
158 		vfs_bufstats();
159 #endif /* DIAGNOSTIC */
160 #endif
161 	return (0);
162 }
163 
164 /* XXX PRISON: could be per prison flag */
165 static int prison_quotas;
166 #if 0
167 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
168 #endif
169 
170 /*
171  * Change filesystem quotas.
172  */
173 #ifndef _SYS_SYSPROTO_H_
174 struct quotactl_args {
175 	char *path;
176 	int cmd;
177 	int uid;
178 	caddr_t arg;
179 };
180 #endif
181 /* ARGSUSED */
182 int
183 quotactl(td, uap)
184 	struct thread *td;
185 	register struct quotactl_args /* {
186 		char *path;
187 		int cmd;
188 		int uid;
189 		caddr_t arg;
190 	} */ *uap;
191 {
192 	struct mount *mp;
193 	int error;
194 	struct nameidata nd;
195 
196 	if (jailed(td->td_ucred) && !prison_quotas)
197 		return (EPERM);
198 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
199 	if ((error = namei(&nd)) != 0)
200 		return (error);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
203 	vrele(nd.ni_vp);
204 	if (error)
205 		return (error);
206 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
207 	vn_finished_write(mp);
208 	return (error);
209 }
210 
211 /*
212  * Get filesystem statistics.
213  */
214 #ifndef _SYS_SYSPROTO_H_
215 struct statfs_args {
216 	char *path;
217 	struct statfs *buf;
218 };
219 #endif
220 /* ARGSUSED */
221 int
222 statfs(td, uap)
223 	struct thread *td;
224 	register struct statfs_args /* {
225 		char *path;
226 		struct statfs *buf;
227 	} */ *uap;
228 {
229 	struct mount *mp;
230 	struct statfs *sp, sb;
231 	int error;
232 	struct nameidata nd;
233 
234 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
235 	if ((error = namei(&nd)) != 0)
236 		return (error);
237 	mp = nd.ni_vp->v_mount;
238 	sp = &mp->mnt_stat;
239 	NDFREE(&nd, NDF_ONLY_PNBUF);
240 	vrele(nd.ni_vp);
241 #ifdef MAC
242 	error = mac_check_mount_stat(td->td_ucred, mp);
243 	if (error)
244 		return (error);
245 #endif
246 	/*
247 	 * Set these in case the underlying filesystem fails to do so.
248 	 */
249 	sp->f_version = STATFS_VERSION;
250 	sp->f_namemax = NAME_MAX;
251 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
252 	error = VFS_STATFS(mp, sp, td);
253 	if (error)
254 		return (error);
255 	if (suser(td)) {
256 		bcopy(sp, &sb, sizeof(sb));
257 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
258 		sp = &sb;
259 	}
260 	return (copyout(sp, uap->buf, sizeof(*sp)));
261 }
262 
263 /*
264  * Get filesystem statistics.
265  */
266 #ifndef _SYS_SYSPROTO_H_
267 struct fstatfs_args {
268 	int fd;
269 	struct statfs *buf;
270 };
271 #endif
272 /* ARGSUSED */
273 int
274 fstatfs(td, uap)
275 	struct thread *td;
276 	register struct fstatfs_args /* {
277 		int fd;
278 		struct statfs *buf;
279 	} */ *uap;
280 {
281 	struct file *fp;
282 	struct mount *mp;
283 	struct statfs *sp, sb;
284 	int error;
285 
286 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
287 		return (error);
288 	mp = fp->f_vnode->v_mount;
289 	fdrop(fp, td);
290 	if (mp == NULL)
291 		return (EBADF);
292 #ifdef MAC
293 	error = mac_check_mount_stat(td->td_ucred, mp);
294 	if (error)
295 		return (error);
296 #endif
297 	sp = &mp->mnt_stat;
298 	/*
299 	 * Set these in case the underlying filesystem fails to do so.
300 	 */
301 	sp->f_version = STATFS_VERSION;
302 	sp->f_namemax = NAME_MAX;
303 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
304 	error = VFS_STATFS(mp, sp, td);
305 	if (error)
306 		return (error);
307 	if (suser(td)) {
308 		bcopy(sp, &sb, sizeof(sb));
309 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
310 		sp = &sb;
311 	}
312 	return (copyout(sp, uap->buf, sizeof(*sp)));
313 }
314 
315 /*
316  * Get statistics on all filesystems.
317  */
318 #ifndef _SYS_SYSPROTO_H_
319 struct getfsstat_args {
320 	struct statfs *buf;
321 	long bufsize;
322 	int flags;
323 };
324 #endif
325 int
326 getfsstat(td, uap)
327 	struct thread *td;
328 	register struct getfsstat_args /* {
329 		struct statfs *buf;
330 		long bufsize;
331 		int flags;
332 	} */ *uap;
333 {
334 	struct mount *mp, *nmp;
335 	struct statfs *sp, sb;
336 	caddr_t sfsp;
337 	long count, maxcount, error;
338 
339 	maxcount = uap->bufsize / sizeof(struct statfs);
340 	sfsp = (caddr_t)uap->buf;
341 	count = 0;
342 	mtx_lock(&mountlist_mtx);
343 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
344 		if (!prison_check_mount(td->td_ucred, mp)) {
345 			nmp = TAILQ_NEXT(mp, mnt_list);
346 			continue;
347 		}
348 #ifdef MAC
349 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
350 			nmp = TAILQ_NEXT(mp, mnt_list);
351 			continue;
352 		}
353 #endif
354 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
355 			nmp = TAILQ_NEXT(mp, mnt_list);
356 			continue;
357 		}
358 		if (sfsp && count < maxcount) {
359 			sp = &mp->mnt_stat;
360 			/*
361 			 * Set these in case the underlying filesystem
362 			 * fails to do so.
363 			 */
364 			sp->f_version = STATFS_VERSION;
365 			sp->f_namemax = NAME_MAX;
366 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
367 			/*
368 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
369 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
370 			 * overrides MNT_WAIT.
371 			 */
372 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
373 			    (uap->flags & MNT_WAIT)) &&
374 			    (error = VFS_STATFS(mp, sp, td))) {
375 				mtx_lock(&mountlist_mtx);
376 				nmp = TAILQ_NEXT(mp, mnt_list);
377 				vfs_unbusy(mp, td);
378 				continue;
379 			}
380 			if (suser(td)) {
381 				bcopy(sp, &sb, sizeof(sb));
382 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
383 				sp = &sb;
384 			}
385 			error = copyout(sp, sfsp, sizeof(*sp));
386 			if (error) {
387 				vfs_unbusy(mp, td);
388 				return (error);
389 			}
390 			sfsp += sizeof(*sp);
391 		}
392 		count++;
393 		mtx_lock(&mountlist_mtx);
394 		nmp = TAILQ_NEXT(mp, mnt_list);
395 		vfs_unbusy(mp, td);
396 	}
397 	mtx_unlock(&mountlist_mtx);
398 	if (sfsp && count > maxcount)
399 		td->td_retval[0] = maxcount;
400 	else
401 		td->td_retval[0] = count;
402 	return (0);
403 }
404 
405 #ifdef COMPAT_FREEBSD4
406 /*
407  * Get old format filesystem statistics.
408  */
409 static void cvtstatfs(struct thread *, struct statfs *, struct ostatfs *);
410 
411 #ifndef _SYS_SYSPROTO_H_
412 struct freebsd4_statfs_args {
413 	char *path;
414 	struct ostatfs *buf;
415 };
416 #endif
417 /* ARGSUSED */
418 int
419 freebsd4_statfs(td, uap)
420 	struct thread *td;
421 	struct freebsd4_statfs_args /* {
422 		char *path;
423 		struct ostatfs *buf;
424 	} */ *uap;
425 {
426 	struct mount *mp;
427 	struct statfs *sp;
428 	struct ostatfs osb;
429 	int error;
430 	struct nameidata nd;
431 
432 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
433 	if ((error = namei(&nd)) != 0)
434 		return (error);
435 	mp = nd.ni_vp->v_mount;
436 	sp = &mp->mnt_stat;
437 	NDFREE(&nd, NDF_ONLY_PNBUF);
438 	vrele(nd.ni_vp);
439 #ifdef MAC
440 	error = mac_check_mount_stat(td->td_ucred, mp);
441 	if (error)
442 		return (error);
443 #endif
444 	error = VFS_STATFS(mp, sp, td);
445 	if (error)
446 		return (error);
447 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
448 	cvtstatfs(td, sp, &osb);
449 	return (copyout(&osb, uap->buf, sizeof(osb)));
450 }
451 
452 /*
453  * Get filesystem statistics.
454  */
455 #ifndef _SYS_SYSPROTO_H_
456 struct freebsd4_fstatfs_args {
457 	int fd;
458 	struct ostatfs *buf;
459 };
460 #endif
461 /* ARGSUSED */
462 int
463 freebsd4_fstatfs(td, uap)
464 	struct thread *td;
465 	struct freebsd4_fstatfs_args /* {
466 		int fd;
467 		struct ostatfs *buf;
468 	} */ *uap;
469 {
470 	struct file *fp;
471 	struct mount *mp;
472 	struct statfs *sp;
473 	struct ostatfs osb;
474 	int error;
475 
476 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
477 		return (error);
478 	mp = fp->f_vnode->v_mount;
479 	fdrop(fp, td);
480 	if (mp == NULL)
481 		return (EBADF);
482 #ifdef MAC
483 	error = mac_check_mount_stat(td->td_ucred, mp);
484 	if (error)
485 		return (error);
486 #endif
487 	sp = &mp->mnt_stat;
488 	error = VFS_STATFS(mp, sp, td);
489 	if (error)
490 		return (error);
491 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
492 	cvtstatfs(td, sp, &osb);
493 	return (copyout(&osb, uap->buf, sizeof(osb)));
494 }
495 
496 /*
497  * Get statistics on all filesystems.
498  */
499 #ifndef _SYS_SYSPROTO_H_
500 struct freebsd4_getfsstat_args {
501 	struct ostatfs *buf;
502 	long bufsize;
503 	int flags;
504 };
505 #endif
506 int
507 freebsd4_getfsstat(td, uap)
508 	struct thread *td;
509 	register struct freebsd4_getfsstat_args /* {
510 		struct ostatfs *buf;
511 		long bufsize;
512 		int flags;
513 	} */ *uap;
514 {
515 	struct mount *mp, *nmp;
516 	struct statfs *sp;
517 	struct ostatfs osb;
518 	caddr_t sfsp;
519 	long count, maxcount, error;
520 
521 	maxcount = uap->bufsize / sizeof(struct ostatfs);
522 	sfsp = (caddr_t)uap->buf;
523 	count = 0;
524 	mtx_lock(&mountlist_mtx);
525 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
526 		if (!prison_check_mount(td->td_ucred, mp)) {
527 			nmp = TAILQ_NEXT(mp, mnt_list);
528 			continue;
529 		}
530 #ifdef MAC
531 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
532 			nmp = TAILQ_NEXT(mp, mnt_list);
533 			continue;
534 		}
535 #endif
536 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
537 			nmp = TAILQ_NEXT(mp, mnt_list);
538 			continue;
539 		}
540 		if (sfsp && count < maxcount) {
541 			sp = &mp->mnt_stat;
542 			/*
543 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
544 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
545 			 * overrides MNT_WAIT.
546 			 */
547 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
548 			    (uap->flags & MNT_WAIT)) &&
549 			    (error = VFS_STATFS(mp, sp, td))) {
550 				mtx_lock(&mountlist_mtx);
551 				nmp = TAILQ_NEXT(mp, mnt_list);
552 				vfs_unbusy(mp, td);
553 				continue;
554 			}
555 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
556 			cvtstatfs(td, sp, &osb);
557 			error = copyout(&osb, sfsp, sizeof(osb));
558 			if (error) {
559 				vfs_unbusy(mp, td);
560 				return (error);
561 			}
562 			sfsp += sizeof(osb);
563 		}
564 		count++;
565 		mtx_lock(&mountlist_mtx);
566 		nmp = TAILQ_NEXT(mp, mnt_list);
567 		vfs_unbusy(mp, td);
568 	}
569 	mtx_unlock(&mountlist_mtx);
570 	if (sfsp && count > maxcount)
571 		td->td_retval[0] = maxcount;
572 	else
573 		td->td_retval[0] = count;
574 	return (0);
575 }
576 
577 /*
578  * Implement fstatfs() for (NFS) file handles.
579  */
580 #ifndef _SYS_SYSPROTO_H_
581 struct freebsd4_fhstatfs_args {
582 	struct fhandle *u_fhp;
583 	struct ostatfs *buf;
584 };
585 #endif
586 int
587 freebsd4_fhstatfs(td, uap)
588 	struct thread *td;
589 	struct freebsd4_fhstatfs_args /* {
590 		struct fhandle *u_fhp;
591 		struct ostatfs *buf;
592 	} */ *uap;
593 {
594 	struct statfs *sp;
595 	struct mount *mp;
596 	struct vnode *vp;
597 	struct ostatfs osb;
598 	fhandle_t fh;
599 	int error;
600 
601 	/*
602 	 * Must be super user
603 	 */
604 	error = suser(td);
605 	if (error)
606 		return (error);
607 
608 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
609 		return (error);
610 
611 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
612 		return (ESTALE);
613 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
614 		return (error);
615 	mp = vp->v_mount;
616 	sp = &mp->mnt_stat;
617 	vput(vp);
618 #ifdef MAC
619 	error = mac_check_mount_stat(td->td_ucred, mp);
620 	if (error)
621 		return (error);
622 #endif
623 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
624 		return (error);
625 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
626 	cvtstatfs(td, sp, &osb);
627 	return (copyout(&osb, uap->buf, sizeof(osb)));
628 }
629 
630 /*
631  * Convert a new format statfs structure to an old format statfs structure.
632  */
633 static void
634 cvtstatfs(td, nsp, osp)
635 	struct thread *td;
636 	struct statfs *nsp;
637 	struct ostatfs *osp;
638 {
639 
640 	bzero(osp, sizeof(*osp));
641 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
642 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
643 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
644 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
645 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
646 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
647 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
648 	osp->f_owner = nsp->f_owner;
649 	osp->f_type = nsp->f_type;
650 	osp->f_flags = nsp->f_flags;
651 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
652 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
653 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
654 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
655 	bcopy(nsp->f_fstypename, osp->f_fstypename,
656 	    MIN(MFSNAMELEN, OMNAMELEN));
657 	bcopy(nsp->f_mntonname, osp->f_mntonname,
658 	    MIN(MFSNAMELEN, OMNAMELEN));
659 	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
660 	    MIN(MFSNAMELEN, OMNAMELEN));
661 	if (suser(td)) {
662 		osp->f_fsid.val[0] = osp->f_fsid.val[1] = 0;
663 	} else {
664 		osp->f_fsid = nsp->f_fsid;
665 	}
666 }
667 #endif /* COMPAT_FREEBSD4 */
668 
669 /*
670  * Change current working directory to a given file descriptor.
671  */
672 #ifndef _SYS_SYSPROTO_H_
673 struct fchdir_args {
674 	int	fd;
675 };
676 #endif
677 /* ARGSUSED */
678 int
679 fchdir(td, uap)
680 	struct thread *td;
681 	struct fchdir_args /* {
682 		int fd;
683 	} */ *uap;
684 {
685 	register struct filedesc *fdp = td->td_proc->p_fd;
686 	struct vnode *vp, *tdp, *vpold;
687 	struct mount *mp;
688 	struct file *fp;
689 	int error;
690 
691 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
692 		return (error);
693 	vp = fp->f_vnode;
694 	VREF(vp);
695 	fdrop(fp, td);
696 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
697 	if (vp->v_type != VDIR)
698 		error = ENOTDIR;
699 #ifdef MAC
700 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
701 	}
702 #endif
703 	else
704 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
705 	while (!error && (mp = vp->v_mountedhere) != NULL) {
706 		if (vfs_busy(mp, 0, 0, td))
707 			continue;
708 		error = VFS_ROOT(mp, &tdp);
709 		vfs_unbusy(mp, td);
710 		if (error)
711 			break;
712 		vput(vp);
713 		vp = tdp;
714 	}
715 	if (error) {
716 		vput(vp);
717 		return (error);
718 	}
719 	VOP_UNLOCK(vp, 0, td);
720 	FILEDESC_LOCK(fdp);
721 	vpold = fdp->fd_cdir;
722 	fdp->fd_cdir = vp;
723 	FILEDESC_UNLOCK(fdp);
724 	vrele(vpold);
725 	return (0);
726 }
727 
728 /*
729  * Change current working directory (``.'').
730  */
731 #ifndef _SYS_SYSPROTO_H_
732 struct chdir_args {
733 	char	*path;
734 };
735 #endif
736 /* ARGSUSED */
737 int
738 chdir(td, uap)
739 	struct thread *td;
740 	struct chdir_args /* {
741 		char *path;
742 	} */ *uap;
743 {
744 
745 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
746 }
747 
748 int
749 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
750 {
751 	register struct filedesc *fdp = td->td_proc->p_fd;
752 	int error;
753 	struct nameidata nd;
754 	struct vnode *vp;
755 
756 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
757 	if ((error = namei(&nd)) != 0)
758 		return (error);
759 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
760 		vput(nd.ni_vp);
761 		NDFREE(&nd, NDF_ONLY_PNBUF);
762 		return (error);
763 	}
764 	VOP_UNLOCK(nd.ni_vp, 0, td);
765 	NDFREE(&nd, NDF_ONLY_PNBUF);
766 	FILEDESC_LOCK(fdp);
767 	vp = fdp->fd_cdir;
768 	fdp->fd_cdir = nd.ni_vp;
769 	FILEDESC_UNLOCK(fdp);
770 	vrele(vp);
771 	return (0);
772 }
773 
774 /*
775  * Helper function for raised chroot(2) security function:  Refuse if
776  * any filedescriptors are open directories.
777  */
778 static int
779 chroot_refuse_vdir_fds(fdp)
780 	struct filedesc *fdp;
781 {
782 	struct vnode *vp;
783 	struct file *fp;
784 	int fd;
785 
786 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
787 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
788 		fp = fget_locked(fdp, fd);
789 		if (fp == NULL)
790 			continue;
791 		if (fp->f_type == DTYPE_VNODE) {
792 			vp = fp->f_vnode;
793 			if (vp->v_type == VDIR)
794 				return (EPERM);
795 		}
796 	}
797 	return (0);
798 }
799 
800 /*
801  * This sysctl determines if we will allow a process to chroot(2) if it
802  * has a directory open:
803  *	0: disallowed for all processes.
804  *	1: allowed for processes that were not already chroot(2)'ed.
805  *	2: allowed for all processes.
806  */
807 
808 static int chroot_allow_open_directories = 1;
809 
810 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
811      &chroot_allow_open_directories, 0, "");
812 
813 /*
814  * Change notion of root (``/'') directory.
815  */
816 #ifndef _SYS_SYSPROTO_H_
817 struct chroot_args {
818 	char	*path;
819 };
820 #endif
821 /* ARGSUSED */
822 int
823 chroot(td, uap)
824 	struct thread *td;
825 	struct chroot_args /* {
826 		char *path;
827 	} */ *uap;
828 {
829 	int error;
830 	struct nameidata nd;
831 
832 	error = suser_cred(td->td_ucred, PRISON_ROOT);
833 	if (error)
834 		return (error);
835 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
836 	mtx_lock(&Giant);
837 	error = namei(&nd);
838 	if (error)
839 		goto error;
840 	if ((error = change_dir(nd.ni_vp, td)) != 0)
841 		goto e_vunlock;
842 #ifdef MAC
843 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
844 		goto e_vunlock;
845 #endif
846 	VOP_UNLOCK(nd.ni_vp, 0, td);
847 	error = change_root(nd.ni_vp, td);
848 	vrele(nd.ni_vp);
849 	NDFREE(&nd, NDF_ONLY_PNBUF);
850 	mtx_unlock(&Giant);
851 	return (error);
852 e_vunlock:
853 	vput(nd.ni_vp);
854 error:
855 	mtx_unlock(&Giant);
856 	NDFREE(&nd, NDF_ONLY_PNBUF);
857 	return (error);
858 }
859 
860 /*
861  * Common routine for chroot and chdir.  Callers must provide a locked vnode
862  * instance.
863  */
864 int
865 change_dir(vp, td)
866 	struct vnode *vp;
867 	struct thread *td;
868 {
869 	int error;
870 
871 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
872 	if (vp->v_type != VDIR)
873 		return (ENOTDIR);
874 #ifdef MAC
875 	error = mac_check_vnode_chdir(td->td_ucred, vp);
876 	if (error)
877 		return (error);
878 #endif
879 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
880 	return (error);
881 }
882 
883 /*
884  * Common routine for kern_chroot() and jail_attach().  The caller is
885  * responsible for invoking suser() and mac_check_chroot() to authorize this
886  * operation.
887  */
888 int
889 change_root(vp, td)
890 	struct vnode *vp;
891 	struct thread *td;
892 {
893 	struct filedesc *fdp;
894 	struct vnode *oldvp;
895 	int error;
896 
897 	mtx_assert(&Giant, MA_OWNED);
898 	fdp = td->td_proc->p_fd;
899 	FILEDESC_LOCK(fdp);
900 	if (chroot_allow_open_directories == 0 ||
901 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
902 		error = chroot_refuse_vdir_fds(fdp);
903 		if (error) {
904 			FILEDESC_UNLOCK(fdp);
905 			return (error);
906 		}
907 	}
908 	oldvp = fdp->fd_rdir;
909 	fdp->fd_rdir = vp;
910 	VREF(fdp->fd_rdir);
911 	if (!fdp->fd_jdir) {
912 		fdp->fd_jdir = vp;
913 		VREF(fdp->fd_jdir);
914 	}
915 	FILEDESC_UNLOCK(fdp);
916 	vrele(oldvp);
917 	return (0);
918 }
919 
920 /*
921  * Check permissions, allocate an open file structure,
922  * and call the device open routine if any.
923  *
924  * MP SAFE
925  */
926 #ifndef _SYS_SYSPROTO_H_
927 struct open_args {
928 	char	*path;
929 	int	flags;
930 	int	mode;
931 };
932 #endif
933 int
934 open(td, uap)
935 	struct thread *td;
936 	register struct open_args /* {
937 		char *path;
938 		int flags;
939 		int mode;
940 	} */ *uap;
941 {
942 
943 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
944 }
945 
946 int
947 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
948     int mode)
949 {
950 	struct proc *p = td->td_proc;
951 	struct filedesc *fdp = p->p_fd;
952 	struct file *fp;
953 	struct vnode *vp;
954 	struct vattr vat;
955 	struct mount *mp;
956 	int cmode;
957 	struct file *nfp;
958 	int type, indx, error;
959 	struct flock lf;
960 	struct nameidata nd;
961 
962 	if ((flags & O_ACCMODE) == O_ACCMODE)
963 		return (EINVAL);
964 	flags = FFLAGS(flags);
965 	error = falloc(td, &nfp, &indx);
966 	if (error)
967 		return (error);
968 	/* An extra reference on `nfp' has been held for us by falloc(). */
969 	fp = nfp;
970 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
971 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
972 	td->td_dupfd = -1;		/* XXX check for fdopen */
973 	mtx_lock(&Giant);
974 	error = vn_open(&nd, &flags, cmode, indx);
975 	if (error) {
976 		mtx_unlock(&Giant);
977 
978 		/*
979 		 * If the vn_open replaced the method vector, something
980 		 * wonderous happened deep below and we just pass it up
981 		 * pretending we know what we do.
982 		 */
983 		if (error == ENXIO && fp->f_ops != &badfileops) {
984 			fdrop(fp, td);
985 			td->td_retval[0] = indx;
986 			return (0);
987 		}
988 
989 		/*
990 		 * release our own reference
991 		 */
992 		fdrop(fp, td);
993 
994 		/*
995 		 * handle special fdopen() case.  bleh.  dupfdopen() is
996 		 * responsible for dropping the old contents of ofiles[indx]
997 		 * if it succeeds.
998 		 */
999 		if ((error == ENODEV || error == ENXIO) &&
1000 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1001 		    (error =
1002 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1003 			td->td_retval[0] = indx;
1004 			return (0);
1005 		}
1006 		/*
1007 		 * Clean up the descriptor, but only if another thread hadn't
1008 		 * replaced or closed it.
1009 		 */
1010 		FILEDESC_LOCK(fdp);
1011 		if (fdp->fd_ofiles[indx] == fp) {
1012 			fdp->fd_ofiles[indx] = NULL;
1013 			fdunused(fdp, indx);
1014 			FILEDESC_UNLOCK(fdp);
1015 			fdrop(fp, td);
1016 		} else {
1017 			FILEDESC_UNLOCK(fdp);
1018 		}
1019 
1020 		if (error == ERESTART)
1021 			error = EINTR;
1022 		return (error);
1023 	}
1024 	td->td_dupfd = 0;
1025 	NDFREE(&nd, NDF_ONLY_PNBUF);
1026 	vp = nd.ni_vp;
1027 
1028 	/*
1029 	 * There should be 2 references on the file, one from the descriptor
1030 	 * table, and one for us.
1031 	 *
1032 	 * Handle the case where someone closed the file (via its file
1033 	 * descriptor) while we were blocked.  The end result should look
1034 	 * like opening the file succeeded but it was immediately closed.
1035 	 * We call vn_close() manually because we haven't yet hooked up
1036 	 * the various 'struct file' fields.
1037 	 */
1038 	FILEDESC_LOCK(fdp);
1039 	FILE_LOCK(fp);
1040 	if (fp->f_count == 1) {
1041 		KASSERT(fdp->fd_ofiles[indx] != fp,
1042 		    ("Open file descriptor lost all refs"));
1043 		FILEDESC_UNLOCK(fdp);
1044 		FILE_UNLOCK(fp);
1045 		VOP_UNLOCK(vp, 0, td);
1046 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1047 		mtx_unlock(&Giant);
1048 		fdrop(fp, td);
1049 		td->td_retval[0] = indx;
1050 		return (0);
1051 	}
1052 	fp->f_vnode = vp;
1053 	fp->f_data = vp;
1054 	fp->f_flag = flags & FMASK;
1055 	fp->f_ops = &vnops;
1056 	fp->f_seqcount = 1;
1057 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1058 	FILEDESC_UNLOCK(fdp);
1059 	FILE_UNLOCK(fp);
1060 
1061 	/* assert that vn_open created a backing object if one is needed */
1062 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1063 		("open: vmio vnode has no backing object after vn_open"));
1064 
1065 	VOP_UNLOCK(vp, 0, td);
1066 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1067 		lf.l_whence = SEEK_SET;
1068 		lf.l_start = 0;
1069 		lf.l_len = 0;
1070 		if (flags & O_EXLOCK)
1071 			lf.l_type = F_WRLCK;
1072 		else
1073 			lf.l_type = F_RDLCK;
1074 		type = F_FLOCK;
1075 		if ((flags & FNONBLOCK) == 0)
1076 			type |= F_WAIT;
1077 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1078 			    type)) != 0)
1079 			goto bad;
1080 		fp->f_flag |= FHASLOCK;
1081 	}
1082 	if (flags & O_TRUNC) {
1083 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1084 			goto bad;
1085 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1086 		VATTR_NULL(&vat);
1087 		vat.va_size = 0;
1088 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1089 #ifdef MAC
1090 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1091 		if (error == 0)
1092 #endif
1093 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1094 		VOP_UNLOCK(vp, 0, td);
1095 		vn_finished_write(mp);
1096 		if (error)
1097 			goto bad;
1098 	}
1099 	mtx_unlock(&Giant);
1100 	/*
1101 	 * Release our private reference, leaving the one associated with
1102 	 * the descriptor table intact.
1103 	 */
1104 	fdrop(fp, td);
1105 	td->td_retval[0] = indx;
1106 	return (0);
1107 bad:
1108 	mtx_unlock(&Giant);
1109 	FILEDESC_LOCK(fdp);
1110 	if (fdp->fd_ofiles[indx] == fp) {
1111 		fdp->fd_ofiles[indx] = NULL;
1112 		fdunused(fdp, indx);
1113 		FILEDESC_UNLOCK(fdp);
1114 		fdrop(fp, td);
1115 	} else {
1116 		FILEDESC_UNLOCK(fdp);
1117 	}
1118 	fdrop(fp, td);
1119 	return (error);
1120 }
1121 
1122 #ifdef COMPAT_43
1123 /*
1124  * Create a file.
1125  *
1126  * MP SAFE
1127  */
1128 #ifndef _SYS_SYSPROTO_H_
1129 struct ocreat_args {
1130 	char	*path;
1131 	int	mode;
1132 };
1133 #endif
1134 int
1135 ocreat(td, uap)
1136 	struct thread *td;
1137 	register struct ocreat_args /* {
1138 		char *path;
1139 		int mode;
1140 	} */ *uap;
1141 {
1142 
1143 	return (kern_open(td, uap->path, UIO_USERSPACE,
1144 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1145 }
1146 #endif /* COMPAT_43 */
1147 
1148 /*
1149  * Create a special file.
1150  */
1151 #ifndef _SYS_SYSPROTO_H_
1152 struct mknod_args {
1153 	char	*path;
1154 	int	mode;
1155 	int	dev;
1156 };
1157 #endif
1158 /* ARGSUSED */
1159 int
1160 mknod(td, uap)
1161 	struct thread *td;
1162 	register struct mknod_args /* {
1163 		char *path;
1164 		int mode;
1165 		int dev;
1166 	} */ *uap;
1167 {
1168 
1169 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1170 }
1171 
1172 int
1173 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1174     int dev)
1175 {
1176 	struct vnode *vp;
1177 	struct mount *mp;
1178 	struct vattr vattr;
1179 	int error;
1180 	int whiteout = 0;
1181 	struct nameidata nd;
1182 
1183 	switch (mode & S_IFMT) {
1184 	case S_IFCHR:
1185 	case S_IFBLK:
1186 		error = suser(td);
1187 		break;
1188 	default:
1189 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1190 		break;
1191 	}
1192 	if (error)
1193 		return (error);
1194 restart:
1195 	bwillwrite();
1196 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
1197 	if ((error = namei(&nd)) != 0)
1198 		return (error);
1199 	vp = nd.ni_vp;
1200 	if (vp != NULL) {
1201 		NDFREE(&nd, NDF_ONLY_PNBUF);
1202 		vrele(vp);
1203 		if (vp == nd.ni_dvp)
1204 			vrele(nd.ni_dvp);
1205 		else
1206 			vput(nd.ni_dvp);
1207 		return (EEXIST);
1208 	} else {
1209 		VATTR_NULL(&vattr);
1210 		FILEDESC_LOCK(td->td_proc->p_fd);
1211 		vattr.va_mode = (mode & ALLPERMS) &
1212 		    ~td->td_proc->p_fd->fd_cmask;
1213 		FILEDESC_UNLOCK(td->td_proc->p_fd);
1214 		vattr.va_rdev = dev;
1215 		whiteout = 0;
1216 
1217 		switch (mode & S_IFMT) {
1218 		case S_IFMT:	/* used by badsect to flag bad sectors */
1219 			vattr.va_type = VBAD;
1220 			break;
1221 		case S_IFCHR:
1222 			vattr.va_type = VCHR;
1223 			break;
1224 		case S_IFBLK:
1225 			vattr.va_type = VBLK;
1226 			break;
1227 		case S_IFWHT:
1228 			whiteout = 1;
1229 			break;
1230 		default:
1231 			error = EINVAL;
1232 			break;
1233 		}
1234 	}
1235 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1236 		NDFREE(&nd, NDF_ONLY_PNBUF);
1237 		vput(nd.ni_dvp);
1238 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1239 			return (error);
1240 		goto restart;
1241 	}
1242 #ifdef MAC
1243 	if (error == 0 && !whiteout)
1244 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1245 		    &nd.ni_cnd, &vattr);
1246 #endif
1247 	if (!error) {
1248 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1249 		if (whiteout)
1250 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1251 		else {
1252 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1253 						&nd.ni_cnd, &vattr);
1254 			if (error == 0)
1255 				vput(nd.ni_vp);
1256 		}
1257 	}
1258 	NDFREE(&nd, NDF_ONLY_PNBUF);
1259 	vput(nd.ni_dvp);
1260 	vn_finished_write(mp);
1261 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1262 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1263 	return (error);
1264 }
1265 
1266 /*
1267  * Create a named pipe.
1268  */
1269 #ifndef _SYS_SYSPROTO_H_
1270 struct mkfifo_args {
1271 	char	*path;
1272 	int	mode;
1273 };
1274 #endif
1275 /* ARGSUSED */
1276 int
1277 mkfifo(td, uap)
1278 	struct thread *td;
1279 	register struct mkfifo_args /* {
1280 		char *path;
1281 		int mode;
1282 	} */ *uap;
1283 {
1284 
1285 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1286 }
1287 
1288 int
1289 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1290 {
1291 	struct mount *mp;
1292 	struct vattr vattr;
1293 	int error;
1294 	struct nameidata nd;
1295 
1296 restart:
1297 	bwillwrite();
1298 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
1299 	if ((error = namei(&nd)) != 0)
1300 		return (error);
1301 	if (nd.ni_vp != NULL) {
1302 		NDFREE(&nd, NDF_ONLY_PNBUF);
1303 		vrele(nd.ni_vp);
1304 		if (nd.ni_vp == nd.ni_dvp)
1305 			vrele(nd.ni_dvp);
1306 		else
1307 			vput(nd.ni_dvp);
1308 		return (EEXIST);
1309 	}
1310 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1311 		NDFREE(&nd, NDF_ONLY_PNBUF);
1312 		vput(nd.ni_dvp);
1313 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1314 			return (error);
1315 		goto restart;
1316 	}
1317 	VATTR_NULL(&vattr);
1318 	vattr.va_type = VFIFO;
1319 	FILEDESC_LOCK(td->td_proc->p_fd);
1320 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1321 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1322 #ifdef MAC
1323 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1324 	    &vattr);
1325 	if (error)
1326 		goto out;
1327 #endif
1328 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1329 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1330 	if (error == 0)
1331 		vput(nd.ni_vp);
1332 #ifdef MAC
1333 out:
1334 #endif
1335 	NDFREE(&nd, NDF_ONLY_PNBUF);
1336 	vput(nd.ni_dvp);
1337 	vn_finished_write(mp);
1338 	return (error);
1339 }
1340 
1341 /*
1342  * Make a hard file link.
1343  */
1344 #ifndef _SYS_SYSPROTO_H_
1345 struct link_args {
1346 	char	*path;
1347 	char	*link;
1348 };
1349 #endif
1350 /* ARGSUSED */
1351 int
1352 link(td, uap)
1353 	struct thread *td;
1354 	register struct link_args /* {
1355 		char *path;
1356 		char *link;
1357 	} */ *uap;
1358 {
1359 
1360 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1361 }
1362 
1363 SYSCTL_DECL(_security_bsd);
1364 
1365 static int hardlink_check_uid = 0;
1366 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1367     &hardlink_check_uid, 0,
1368     "Unprivileged processes cannot create hard links to files owned by other "
1369     "users");
1370 static int hardlink_check_gid = 0;
1371 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1372     &hardlink_check_gid, 0,
1373     "Unprivileged processes cannot create hard links to files owned by other "
1374     "groups");
1375 
1376 static int
1377 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1378 {
1379 	struct vattr va;
1380 	int error;
1381 
1382 	if (suser_cred(cred, PRISON_ROOT) == 0)
1383 		return (0);
1384 
1385 	if (!hardlink_check_uid && !hardlink_check_gid)
1386 		return (0);
1387 
1388 	error = VOP_GETATTR(vp, &va, cred, td);
1389 	if (error != 0)
1390 		return (error);
1391 
1392 	if (hardlink_check_uid) {
1393 		if (cred->cr_uid != va.va_uid)
1394 			return (EPERM);
1395 	}
1396 
1397 	if (hardlink_check_gid) {
1398 		if (!groupmember(va.va_gid, cred))
1399 			return (EPERM);
1400 	}
1401 
1402 	return (0);
1403 }
1404 
1405 int
1406 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1407 {
1408 	struct vnode *vp;
1409 	struct mount *mp;
1410 	struct nameidata nd;
1411 	int error;
1412 
1413 	bwillwrite();
1414 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1415 	if ((error = namei(&nd)) != 0)
1416 		return (error);
1417 	NDFREE(&nd, NDF_ONLY_PNBUF);
1418 	vp = nd.ni_vp;
1419 	if (vp->v_type == VDIR) {
1420 		vrele(vp);
1421 		return (EPERM);		/* POSIX */
1422 	}
1423 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1424 		vrele(vp);
1425 		return (error);
1426 	}
1427 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1428 	if ((error = namei(&nd)) == 0) {
1429 		if (nd.ni_vp != NULL) {
1430 			vrele(nd.ni_vp);
1431 			if (nd.ni_dvp == nd.ni_vp)
1432 				vrele(nd.ni_dvp);
1433 			else
1434 				vput(nd.ni_dvp);
1435 			error = EEXIST;
1436 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1437 		    == 0) {
1438 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1439 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1440 			error = can_hardlink(vp, td, td->td_ucred);
1441 			if (error == 0)
1442 #ifdef MAC
1443 				error = mac_check_vnode_link(td->td_ucred,
1444 				    nd.ni_dvp, vp, &nd.ni_cnd);
1445 			if (error == 0)
1446 #endif
1447 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1448 			VOP_UNLOCK(vp, 0, td);
1449 			vput(nd.ni_dvp);
1450 		}
1451 		NDFREE(&nd, NDF_ONLY_PNBUF);
1452 	}
1453 	vrele(vp);
1454 	vn_finished_write(mp);
1455 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1456 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1457 	return (error);
1458 }
1459 
1460 /*
1461  * Make a symbolic link.
1462  */
1463 #ifndef _SYS_SYSPROTO_H_
1464 struct symlink_args {
1465 	char	*path;
1466 	char	*link;
1467 };
1468 #endif
1469 /* ARGSUSED */
1470 int
1471 symlink(td, uap)
1472 	struct thread *td;
1473 	register struct symlink_args /* {
1474 		char *path;
1475 		char *link;
1476 	} */ *uap;
1477 {
1478 
1479 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1480 }
1481 
1482 int
1483 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1484 {
1485 	struct mount *mp;
1486 	struct vattr vattr;
1487 	char *syspath;
1488 	int error;
1489 	struct nameidata nd;
1490 
1491 	if (segflg == UIO_SYSSPACE) {
1492 		syspath = path;
1493 	} else {
1494 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1495 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1496 			goto out;
1497 	}
1498 restart:
1499 	bwillwrite();
1500 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1501 	if ((error = namei(&nd)) != 0)
1502 		goto out;
1503 	if (nd.ni_vp) {
1504 		NDFREE(&nd, NDF_ONLY_PNBUF);
1505 		vrele(nd.ni_vp);
1506 		if (nd.ni_vp == nd.ni_dvp)
1507 			vrele(nd.ni_dvp);
1508 		else
1509 			vput(nd.ni_dvp);
1510 		error = EEXIST;
1511 		goto out;
1512 	}
1513 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1514 		NDFREE(&nd, NDF_ONLY_PNBUF);
1515 		vput(nd.ni_dvp);
1516 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1517 			return (error);
1518 		goto restart;
1519 	}
1520 	VATTR_NULL(&vattr);
1521 	FILEDESC_LOCK(td->td_proc->p_fd);
1522 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1523 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1524 #ifdef MAC
1525 	vattr.va_type = VLNK;
1526 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1527 	    &vattr);
1528 	if (error)
1529 		goto out2;
1530 #endif
1531 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1532 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1533 	if (error == 0)
1534 		vput(nd.ni_vp);
1535 #ifdef MAC
1536 out2:
1537 #endif
1538 	NDFREE(&nd, NDF_ONLY_PNBUF);
1539 	vput(nd.ni_dvp);
1540 	vn_finished_write(mp);
1541 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1542 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1543 out:
1544 	if (segflg != UIO_SYSSPACE)
1545 		uma_zfree(namei_zone, syspath);
1546 	return (error);
1547 }
1548 
1549 /*
1550  * Delete a whiteout from the filesystem.
1551  */
1552 /* ARGSUSED */
1553 int
1554 undelete(td, uap)
1555 	struct thread *td;
1556 	register struct undelete_args /* {
1557 		char *path;
1558 	} */ *uap;
1559 {
1560 	int error;
1561 	struct mount *mp;
1562 	struct nameidata nd;
1563 
1564 restart:
1565 	bwillwrite();
1566 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1567 	    uap->path, td);
1568 	error = namei(&nd);
1569 	if (error)
1570 		return (error);
1571 
1572 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1573 		NDFREE(&nd, NDF_ONLY_PNBUF);
1574 		if (nd.ni_vp)
1575 			vrele(nd.ni_vp);
1576 		if (nd.ni_vp == nd.ni_dvp)
1577 			vrele(nd.ni_dvp);
1578 		else
1579 			vput(nd.ni_dvp);
1580 		return (EEXIST);
1581 	}
1582 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1583 		NDFREE(&nd, NDF_ONLY_PNBUF);
1584 		vput(nd.ni_dvp);
1585 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1586 			return (error);
1587 		goto restart;
1588 	}
1589 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1590 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1591 	NDFREE(&nd, NDF_ONLY_PNBUF);
1592 	vput(nd.ni_dvp);
1593 	vn_finished_write(mp);
1594 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1595 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1596 	return (error);
1597 }
1598 
1599 /*
1600  * Delete a name from the filesystem.
1601  */
1602 #ifndef _SYS_SYSPROTO_H_
1603 struct unlink_args {
1604 	char	*path;
1605 };
1606 #endif
1607 /* ARGSUSED */
1608 int
1609 unlink(td, uap)
1610 	struct thread *td;
1611 	struct unlink_args /* {
1612 		char *path;
1613 	} */ *uap;
1614 {
1615 
1616 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1617 }
1618 
1619 int
1620 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1621 {
1622 	struct mount *mp;
1623 	struct vnode *vp;
1624 	int error;
1625 	struct nameidata nd;
1626 
1627 restart:
1628 	bwillwrite();
1629 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1630 	if ((error = namei(&nd)) != 0)
1631 		return (error);
1632 	vp = nd.ni_vp;
1633 	if (vp->v_type == VDIR)
1634 		error = EPERM;		/* POSIX */
1635 	else {
1636 		/*
1637 		 * The root of a mounted filesystem cannot be deleted.
1638 		 *
1639 		 * XXX: can this only be a VDIR case?
1640 		 */
1641 		if (vp->v_vflag & VV_ROOT)
1642 			error = EBUSY;
1643 	}
1644 	if (error == 0) {
1645 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1646 			NDFREE(&nd, NDF_ONLY_PNBUF);
1647 			if (vp == nd.ni_dvp)
1648 				vrele(vp);
1649 			else
1650 				vput(vp);
1651 			vput(nd.ni_dvp);
1652 			if ((error = vn_start_write(NULL, &mp,
1653 			    V_XSLEEP | PCATCH)) != 0)
1654 				return (error);
1655 			goto restart;
1656 		}
1657 #ifdef MAC
1658 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1659 		    &nd.ni_cnd);
1660 		if (error)
1661 			goto out;
1662 #endif
1663 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1664 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1665 #ifdef MAC
1666 out:
1667 #endif
1668 		vn_finished_write(mp);
1669 	}
1670 	NDFREE(&nd, NDF_ONLY_PNBUF);
1671 	if (vp == nd.ni_dvp)
1672 		vrele(vp);
1673 	else
1674 		vput(vp);
1675 	vput(nd.ni_dvp);
1676 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1677 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1678 	return (error);
1679 }
1680 
1681 /*
1682  * Reposition read/write file offset.
1683  */
1684 #ifndef _SYS_SYSPROTO_H_
1685 struct lseek_args {
1686 	int	fd;
1687 	int	pad;
1688 	off_t	offset;
1689 	int	whence;
1690 };
1691 #endif
1692 int
1693 lseek(td, uap)
1694 	struct thread *td;
1695 	register struct lseek_args /* {
1696 		int fd;
1697 		int pad;
1698 		off_t offset;
1699 		int whence;
1700 	} */ *uap;
1701 {
1702 	struct ucred *cred = td->td_ucred;
1703 	struct file *fp;
1704 	struct vnode *vp;
1705 	struct vattr vattr;
1706 	off_t offset;
1707 	int error, noneg;
1708 
1709 	if ((error = fget(td, uap->fd, &fp)) != 0)
1710 		return (error);
1711 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1712 		fdrop(fp, td);
1713 		return (ESPIPE);
1714 	}
1715 	vp = fp->f_vnode;
1716 	noneg = (vp->v_type != VCHR);
1717 	offset = uap->offset;
1718 	switch (uap->whence) {
1719 	case L_INCR:
1720 		if (noneg &&
1721 		    (fp->f_offset < 0 ||
1722 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1723 			error = EOVERFLOW;
1724 			break;
1725 		}
1726 		offset += fp->f_offset;
1727 		break;
1728 	case L_XTND:
1729 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1730 		error = VOP_GETATTR(vp, &vattr, cred, td);
1731 		VOP_UNLOCK(vp, 0, td);
1732 		if (error)
1733 			break;
1734 		if (noneg &&
1735 		    (vattr.va_size > OFF_MAX ||
1736 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1737 			error = EOVERFLOW;
1738 			break;
1739 		}
1740 		offset += vattr.va_size;
1741 		break;
1742 	case L_SET:
1743 		break;
1744 	default:
1745 		error = EINVAL;
1746 	}
1747 	if (error == 0 && noneg && offset < 0)
1748 		error = EINVAL;
1749 	if (error != 0) {
1750 		fdrop(fp, td);
1751 		return (error);
1752 	}
1753 	fp->f_offset = offset;
1754 	*(off_t *)(td->td_retval) = fp->f_offset;
1755 	fdrop(fp, td);
1756 	return (0);
1757 }
1758 
1759 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1760 /*
1761  * Reposition read/write file offset.
1762  */
1763 #ifndef _SYS_SYSPROTO_H_
1764 struct olseek_args {
1765 	int	fd;
1766 	long	offset;
1767 	int	whence;
1768 };
1769 #endif
1770 int
1771 olseek(td, uap)
1772 	struct thread *td;
1773 	register struct olseek_args /* {
1774 		int fd;
1775 		long offset;
1776 		int whence;
1777 	} */ *uap;
1778 {
1779 	struct lseek_args /* {
1780 		int fd;
1781 		int pad;
1782 		off_t offset;
1783 		int whence;
1784 	} */ nuap;
1785 	int error;
1786 
1787 	nuap.fd = uap->fd;
1788 	nuap.offset = uap->offset;
1789 	nuap.whence = uap->whence;
1790 	error = lseek(td, &nuap);
1791 	return (error);
1792 }
1793 #endif /* COMPAT_43 */
1794 
1795 /*
1796  * Check access permissions using passed credentials.
1797  */
1798 static int
1799 vn_access(vp, user_flags, cred, td)
1800 	struct vnode	*vp;
1801 	int		user_flags;
1802 	struct ucred	*cred;
1803 	struct thread	*td;
1804 {
1805 	int error, flags;
1806 
1807 	/* Flags == 0 means only check for existence. */
1808 	error = 0;
1809 	if (user_flags) {
1810 		flags = 0;
1811 		if (user_flags & R_OK)
1812 			flags |= VREAD;
1813 		if (user_flags & W_OK)
1814 			flags |= VWRITE;
1815 		if (user_flags & X_OK)
1816 			flags |= VEXEC;
1817 #ifdef MAC
1818 		error = mac_check_vnode_access(cred, vp, flags);
1819 		if (error)
1820 			return (error);
1821 #endif
1822 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1823 			error = VOP_ACCESS(vp, flags, cred, td);
1824 	}
1825 	return (error);
1826 }
1827 
1828 /*
1829  * Check access permissions using "real" credentials.
1830  */
1831 #ifndef _SYS_SYSPROTO_H_
1832 struct access_args {
1833 	char	*path;
1834 	int	flags;
1835 };
1836 #endif
1837 int
1838 access(td, uap)
1839 	struct thread *td;
1840 	register struct access_args /* {
1841 		char *path;
1842 		int flags;
1843 	} */ *uap;
1844 {
1845 
1846 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1847 }
1848 
1849 int
1850 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1851 {
1852 	struct ucred *cred, *tmpcred;
1853 	register struct vnode *vp;
1854 	int error;
1855 	struct nameidata nd;
1856 
1857 	/*
1858 	 * Create and modify a temporary credential instead of one that
1859 	 * is potentially shared.  This could also mess up socket
1860 	 * buffer accounting which can run in an interrupt context.
1861 	 *
1862 	 * XXX - Depending on how "threads" are finally implemented, it
1863 	 * may be better to explicitly pass the credential to namei()
1864 	 * rather than to modify the potentially shared process structure.
1865 	 */
1866 	cred = td->td_ucred;
1867 	tmpcred = crdup(cred);
1868 	tmpcred->cr_uid = cred->cr_ruid;
1869 	tmpcred->cr_groups[0] = cred->cr_rgid;
1870 	td->td_ucred = tmpcred;
1871 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1872 	if ((error = namei(&nd)) != 0)
1873 		goto out1;
1874 	vp = nd.ni_vp;
1875 
1876 	error = vn_access(vp, flags, tmpcred, td);
1877 	NDFREE(&nd, NDF_ONLY_PNBUF);
1878 	vput(vp);
1879 out1:
1880 	td->td_ucred = cred;
1881 	crfree(tmpcred);
1882 	return (error);
1883 }
1884 
1885 /*
1886  * Check access permissions using "effective" credentials.
1887  */
1888 #ifndef _SYS_SYSPROTO_H_
1889 struct eaccess_args {
1890 	char	*path;
1891 	int	flags;
1892 };
1893 #endif
1894 int
1895 eaccess(td, uap)
1896 	struct thread *td;
1897 	register struct eaccess_args /* {
1898 		char *path;
1899 		int flags;
1900 	} */ *uap;
1901 {
1902 	struct nameidata nd;
1903 	struct vnode *vp;
1904 	int error;
1905 
1906 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1907 	    uap->path, td);
1908 	if ((error = namei(&nd)) != 0)
1909 		return (error);
1910 	vp = nd.ni_vp;
1911 
1912 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1913 	NDFREE(&nd, NDF_ONLY_PNBUF);
1914 	vput(vp);
1915 	return (error);
1916 }
1917 
1918 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1919 /*
1920  * Get file status; this version follows links.
1921  */
1922 #ifndef _SYS_SYSPROTO_H_
1923 struct ostat_args {
1924 	char	*path;
1925 	struct ostat *ub;
1926 };
1927 #endif
1928 /* ARGSUSED */
1929 int
1930 ostat(td, uap)
1931 	struct thread *td;
1932 	register struct ostat_args /* {
1933 		char *path;
1934 		struct ostat *ub;
1935 	} */ *uap;
1936 {
1937 	struct stat sb;
1938 	struct ostat osb;
1939 	int error;
1940 	struct nameidata nd;
1941 
1942 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1943 	    uap->path, td);
1944 	if ((error = namei(&nd)) != 0)
1945 		return (error);
1946 	NDFREE(&nd, NDF_ONLY_PNBUF);
1947 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1948 	vput(nd.ni_vp);
1949 	if (error)
1950 		return (error);
1951 	cvtstat(&sb, &osb);
1952 	error = copyout(&osb, uap->ub, sizeof (osb));
1953 	return (error);
1954 }
1955 
1956 /*
1957  * Get file status; this version does not follow links.
1958  */
1959 #ifndef _SYS_SYSPROTO_H_
1960 struct olstat_args {
1961 	char	*path;
1962 	struct ostat *ub;
1963 };
1964 #endif
1965 /* ARGSUSED */
1966 int
1967 olstat(td, uap)
1968 	struct thread *td;
1969 	register struct olstat_args /* {
1970 		char *path;
1971 		struct ostat *ub;
1972 	} */ *uap;
1973 {
1974 	struct vnode *vp;
1975 	struct stat sb;
1976 	struct ostat osb;
1977 	int error;
1978 	struct nameidata nd;
1979 
1980 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1981 	    uap->path, td);
1982 	if ((error = namei(&nd)) != 0)
1983 		return (error);
1984 	vp = nd.ni_vp;
1985 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1986 	NDFREE(&nd, NDF_ONLY_PNBUF);
1987 	vput(vp);
1988 	if (error)
1989 		return (error);
1990 	cvtstat(&sb, &osb);
1991 	error = copyout(&osb, uap->ub, sizeof (osb));
1992 	return (error);
1993 }
1994 
1995 /*
1996  * Convert from an old to a new stat structure.
1997  */
1998 void
1999 cvtstat(st, ost)
2000 	struct stat *st;
2001 	struct ostat *ost;
2002 {
2003 
2004 	ost->st_dev = st->st_dev;
2005 	ost->st_ino = st->st_ino;
2006 	ost->st_mode = st->st_mode;
2007 	ost->st_nlink = st->st_nlink;
2008 	ost->st_uid = st->st_uid;
2009 	ost->st_gid = st->st_gid;
2010 	ost->st_rdev = st->st_rdev;
2011 	if (st->st_size < (quad_t)1 << 32)
2012 		ost->st_size = st->st_size;
2013 	else
2014 		ost->st_size = -2;
2015 	ost->st_atime = st->st_atime;
2016 	ost->st_mtime = st->st_mtime;
2017 	ost->st_ctime = st->st_ctime;
2018 	ost->st_blksize = st->st_blksize;
2019 	ost->st_blocks = st->st_blocks;
2020 	ost->st_flags = st->st_flags;
2021 	ost->st_gen = st->st_gen;
2022 }
2023 #endif /* COMPAT_43 || COMPAT_SUNOS */
2024 
2025 /*
2026  * Get file status; this version follows links.
2027  */
2028 #ifndef _SYS_SYSPROTO_H_
2029 struct stat_args {
2030 	char	*path;
2031 	struct stat *ub;
2032 };
2033 #endif
2034 /* ARGSUSED */
2035 int
2036 stat(td, uap)
2037 	struct thread *td;
2038 	register struct stat_args /* {
2039 		char *path;
2040 		struct stat *ub;
2041 	} */ *uap;
2042 {
2043 	struct stat sb;
2044 	int error;
2045 	struct nameidata nd;
2046 
2047 #ifdef LOOKUP_SHARED
2048 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2049 	    UIO_USERSPACE, uap->path, td);
2050 #else
2051 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2052 	    uap->path, td);
2053 #endif
2054 	if ((error = namei(&nd)) != 0)
2055 		return (error);
2056 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2057 	NDFREE(&nd, NDF_ONLY_PNBUF);
2058 	vput(nd.ni_vp);
2059 	if (error)
2060 		return (error);
2061 	error = copyout(&sb, uap->ub, sizeof (sb));
2062 	return (error);
2063 }
2064 
2065 /*
2066  * Get file status; this version does not follow links.
2067  */
2068 #ifndef _SYS_SYSPROTO_H_
2069 struct lstat_args {
2070 	char	*path;
2071 	struct stat *ub;
2072 };
2073 #endif
2074 /* ARGSUSED */
2075 int
2076 lstat(td, uap)
2077 	struct thread *td;
2078 	register struct lstat_args /* {
2079 		char *path;
2080 		struct stat *ub;
2081 	} */ *uap;
2082 {
2083 	int error;
2084 	struct vnode *vp;
2085 	struct stat sb;
2086 	struct nameidata nd;
2087 
2088 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2089 	    uap->path, td);
2090 	if ((error = namei(&nd)) != 0)
2091 		return (error);
2092 	vp = nd.ni_vp;
2093 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2094 	NDFREE(&nd, NDF_ONLY_PNBUF);
2095 	vput(vp);
2096 	if (error)
2097 		return (error);
2098 	error = copyout(&sb, uap->ub, sizeof (sb));
2099 	return (error);
2100 }
2101 
2102 /*
2103  * Implementation of the NetBSD stat() function.
2104  * XXX This should probably be collapsed with the FreeBSD version,
2105  * as the differences are only due to vn_stat() clearing spares at
2106  * the end of the structures.  vn_stat could be split to avoid this,
2107  * and thus collapse the following to close to zero code.
2108  */
2109 void
2110 cvtnstat(sb, nsb)
2111 	struct stat *sb;
2112 	struct nstat *nsb;
2113 {
2114 	bzero(nsb, sizeof *nsb);
2115 	nsb->st_dev = sb->st_dev;
2116 	nsb->st_ino = sb->st_ino;
2117 	nsb->st_mode = sb->st_mode;
2118 	nsb->st_nlink = sb->st_nlink;
2119 	nsb->st_uid = sb->st_uid;
2120 	nsb->st_gid = sb->st_gid;
2121 	nsb->st_rdev = sb->st_rdev;
2122 	nsb->st_atimespec = sb->st_atimespec;
2123 	nsb->st_mtimespec = sb->st_mtimespec;
2124 	nsb->st_ctimespec = sb->st_ctimespec;
2125 	nsb->st_size = sb->st_size;
2126 	nsb->st_blocks = sb->st_blocks;
2127 	nsb->st_blksize = sb->st_blksize;
2128 	nsb->st_flags = sb->st_flags;
2129 	nsb->st_gen = sb->st_gen;
2130 	nsb->st_birthtimespec = sb->st_birthtimespec;
2131 }
2132 
2133 #ifndef _SYS_SYSPROTO_H_
2134 struct nstat_args {
2135 	char	*path;
2136 	struct nstat *ub;
2137 };
2138 #endif
2139 /* ARGSUSED */
2140 int
2141 nstat(td, uap)
2142 	struct thread *td;
2143 	register struct nstat_args /* {
2144 		char *path;
2145 		struct nstat *ub;
2146 	} */ *uap;
2147 {
2148 	struct stat sb;
2149 	struct nstat nsb;
2150 	int error;
2151 	struct nameidata nd;
2152 
2153 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2154 	    uap->path, td);
2155 	if ((error = namei(&nd)) != 0)
2156 		return (error);
2157 	NDFREE(&nd, NDF_ONLY_PNBUF);
2158 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2159 	vput(nd.ni_vp);
2160 	if (error)
2161 		return (error);
2162 	cvtnstat(&sb, &nsb);
2163 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2164 	return (error);
2165 }
2166 
2167 /*
2168  * NetBSD lstat.  Get file status; this version does not follow links.
2169  */
2170 #ifndef _SYS_SYSPROTO_H_
2171 struct lstat_args {
2172 	char	*path;
2173 	struct stat *ub;
2174 };
2175 #endif
2176 /* ARGSUSED */
2177 int
2178 nlstat(td, uap)
2179 	struct thread *td;
2180 	register struct nlstat_args /* {
2181 		char *path;
2182 		struct nstat *ub;
2183 	} */ *uap;
2184 {
2185 	int error;
2186 	struct vnode *vp;
2187 	struct stat sb;
2188 	struct nstat nsb;
2189 	struct nameidata nd;
2190 
2191 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2192 	    uap->path, td);
2193 	if ((error = namei(&nd)) != 0)
2194 		return (error);
2195 	vp = nd.ni_vp;
2196 	NDFREE(&nd, NDF_ONLY_PNBUF);
2197 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2198 	vput(vp);
2199 	if (error)
2200 		return (error);
2201 	cvtnstat(&sb, &nsb);
2202 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2203 	return (error);
2204 }
2205 
2206 /*
2207  * Get configurable pathname variables.
2208  */
2209 #ifndef _SYS_SYSPROTO_H_
2210 struct pathconf_args {
2211 	char	*path;
2212 	int	name;
2213 };
2214 #endif
2215 /* ARGSUSED */
2216 int
2217 pathconf(td, uap)
2218 	struct thread *td;
2219 	register struct pathconf_args /* {
2220 		char *path;
2221 		int name;
2222 	} */ *uap;
2223 {
2224 	int error;
2225 	struct nameidata nd;
2226 
2227 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2228 	    uap->path, td);
2229 	if ((error = namei(&nd)) != 0)
2230 		return (error);
2231 	NDFREE(&nd, NDF_ONLY_PNBUF);
2232 
2233 	/* If asynchronous I/O is available, it works for all files. */
2234 	if (uap->name == _PC_ASYNC_IO)
2235 		td->td_retval[0] = async_io_version;
2236 	else
2237 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
2238 	vput(nd.ni_vp);
2239 	return (error);
2240 }
2241 
2242 /*
2243  * Return target name of a symbolic link.
2244  */
2245 #ifndef _SYS_SYSPROTO_H_
2246 struct readlink_args {
2247 	char	*path;
2248 	char	*buf;
2249 	int	count;
2250 };
2251 #endif
2252 /* ARGSUSED */
2253 int
2254 readlink(td, uap)
2255 	struct thread *td;
2256 	register struct readlink_args /* {
2257 		char *path;
2258 		char *buf;
2259 		int count;
2260 	} */ *uap;
2261 {
2262 
2263 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2264 	    UIO_USERSPACE, uap->count));
2265 }
2266 
2267 int
2268 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2269     enum uio_seg bufseg, int count)
2270 {
2271 	register struct vnode *vp;
2272 	struct iovec aiov;
2273 	struct uio auio;
2274 	int error;
2275 	struct nameidata nd;
2276 
2277 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
2278 	if ((error = namei(&nd)) != 0)
2279 		return (error);
2280 	NDFREE(&nd, NDF_ONLY_PNBUF);
2281 	vp = nd.ni_vp;
2282 #ifdef MAC
2283 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2284 	if (error) {
2285 		vput(vp);
2286 		return (error);
2287 	}
2288 #endif
2289 	if (vp->v_type != VLNK)
2290 		error = EINVAL;
2291 	else {
2292 		aiov.iov_base = buf;
2293 		aiov.iov_len = count;
2294 		auio.uio_iov = &aiov;
2295 		auio.uio_iovcnt = 1;
2296 		auio.uio_offset = 0;
2297 		auio.uio_rw = UIO_READ;
2298 		auio.uio_segflg = bufseg;
2299 		auio.uio_td = td;
2300 		auio.uio_resid = count;
2301 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2302 	}
2303 	vput(vp);
2304 	td->td_retval[0] = count - auio.uio_resid;
2305 	return (error);
2306 }
2307 
2308 /*
2309  * Common implementation code for chflags() and fchflags().
2310  */
2311 static int
2312 setfflags(td, vp, flags)
2313 	struct thread *td;
2314 	struct vnode *vp;
2315 	int flags;
2316 {
2317 	int error;
2318 	struct mount *mp;
2319 	struct vattr vattr;
2320 
2321 	/*
2322 	 * Prevent non-root users from setting flags on devices.  When
2323 	 * a device is reused, users can retain ownership of the device
2324 	 * if they are allowed to set flags and programs assume that
2325 	 * chown can't fail when done as root.
2326 	 */
2327 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2328 		error = suser_cred(td->td_ucred, PRISON_ROOT);
2329 		if (error)
2330 			return (error);
2331 	}
2332 
2333 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2334 		return (error);
2335 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2336 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2337 	VATTR_NULL(&vattr);
2338 	vattr.va_flags = flags;
2339 #ifdef MAC
2340 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2341 	if (error == 0)
2342 #endif
2343 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2344 	VOP_UNLOCK(vp, 0, td);
2345 	vn_finished_write(mp);
2346 	return (error);
2347 }
2348 
2349 /*
2350  * Change flags of a file given a path name.
2351  */
2352 #ifndef _SYS_SYSPROTO_H_
2353 struct chflags_args {
2354 	char	*path;
2355 	int	flags;
2356 };
2357 #endif
2358 /* ARGSUSED */
2359 int
2360 chflags(td, uap)
2361 	struct thread *td;
2362 	register struct chflags_args /* {
2363 		char *path;
2364 		int flags;
2365 	} */ *uap;
2366 {
2367 	int error;
2368 	struct nameidata nd;
2369 
2370 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2371 	if ((error = namei(&nd)) != 0)
2372 		return (error);
2373 	NDFREE(&nd, NDF_ONLY_PNBUF);
2374 	error = setfflags(td, nd.ni_vp, uap->flags);
2375 	vrele(nd.ni_vp);
2376 	return (error);
2377 }
2378 
2379 /*
2380  * Same as chflags() but doesn't follow symlinks.
2381  */
2382 int
2383 lchflags(td, uap)
2384 	struct thread *td;
2385 	register struct lchflags_args /* {
2386 		char *path;
2387 		int flags;
2388 	} */ *uap;
2389 {
2390 	int error;
2391 	struct nameidata nd;
2392 
2393 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2394 	if ((error = namei(&nd)) != 0)
2395 		return (error);
2396 	NDFREE(&nd, NDF_ONLY_PNBUF);
2397 	error = setfflags(td, nd.ni_vp, uap->flags);
2398 	vrele(nd.ni_vp);
2399 	return (error);
2400 }
2401 
2402 /*
2403  * Change flags of a file given a file descriptor.
2404  */
2405 #ifndef _SYS_SYSPROTO_H_
2406 struct fchflags_args {
2407 	int	fd;
2408 	int	flags;
2409 };
2410 #endif
2411 /* ARGSUSED */
2412 int
2413 fchflags(td, uap)
2414 	struct thread *td;
2415 	register struct fchflags_args /* {
2416 		int fd;
2417 		int flags;
2418 	} */ *uap;
2419 {
2420 	struct file *fp;
2421 	int error;
2422 
2423 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2424 		return (error);
2425 	error = setfflags(td, fp->f_vnode, uap->flags);
2426 	fdrop(fp, td);
2427 	return (error);
2428 }
2429 
2430 /*
2431  * Common implementation code for chmod(), lchmod() and fchmod().
2432  */
2433 static int
2434 setfmode(td, vp, mode)
2435 	struct thread *td;
2436 	struct vnode *vp;
2437 	int mode;
2438 {
2439 	int error;
2440 	struct mount *mp;
2441 	struct vattr vattr;
2442 
2443 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2444 		return (error);
2445 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2446 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2447 	VATTR_NULL(&vattr);
2448 	vattr.va_mode = mode & ALLPERMS;
2449 #ifdef MAC
2450 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2451 	if (error == 0)
2452 #endif
2453 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2454 	VOP_UNLOCK(vp, 0, td);
2455 	vn_finished_write(mp);
2456 	return (error);
2457 }
2458 
2459 /*
2460  * Change mode of a file given path name.
2461  */
2462 #ifndef _SYS_SYSPROTO_H_
2463 struct chmod_args {
2464 	char	*path;
2465 	int	mode;
2466 };
2467 #endif
2468 /* ARGSUSED */
2469 int
2470 chmod(td, uap)
2471 	struct thread *td;
2472 	register struct chmod_args /* {
2473 		char *path;
2474 		int mode;
2475 	} */ *uap;
2476 {
2477 
2478 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2479 }
2480 
2481 int
2482 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2483 {
2484 	int error;
2485 	struct nameidata nd;
2486 
2487 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2488 	if ((error = namei(&nd)) != 0)
2489 		return (error);
2490 	NDFREE(&nd, NDF_ONLY_PNBUF);
2491 	error = setfmode(td, nd.ni_vp, mode);
2492 	vrele(nd.ni_vp);
2493 	return (error);
2494 }
2495 
2496 /*
2497  * Change mode of a file given path name (don't follow links.)
2498  */
2499 #ifndef _SYS_SYSPROTO_H_
2500 struct lchmod_args {
2501 	char	*path;
2502 	int	mode;
2503 };
2504 #endif
2505 /* ARGSUSED */
2506 int
2507 lchmod(td, uap)
2508 	struct thread *td;
2509 	register struct lchmod_args /* {
2510 		char *path;
2511 		int mode;
2512 	} */ *uap;
2513 {
2514 	int error;
2515 	struct nameidata nd;
2516 
2517 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2518 	if ((error = namei(&nd)) != 0)
2519 		return (error);
2520 	NDFREE(&nd, NDF_ONLY_PNBUF);
2521 	error = setfmode(td, nd.ni_vp, uap->mode);
2522 	vrele(nd.ni_vp);
2523 	return (error);
2524 }
2525 
2526 /*
2527  * Change mode of a file given a file descriptor.
2528  */
2529 #ifndef _SYS_SYSPROTO_H_
2530 struct fchmod_args {
2531 	int	fd;
2532 	int	mode;
2533 };
2534 #endif
2535 /* ARGSUSED */
2536 int
2537 fchmod(td, uap)
2538 	struct thread *td;
2539 	register struct fchmod_args /* {
2540 		int fd;
2541 		int mode;
2542 	} */ *uap;
2543 {
2544 	struct file *fp;
2545 	int error;
2546 
2547 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2548 		return (error);
2549 	error = setfmode(td, fp->f_vnode, uap->mode);
2550 	fdrop(fp, td);
2551 	return (error);
2552 }
2553 
2554 /*
2555  * Common implementation for chown(), lchown(), and fchown()
2556  */
2557 static int
2558 setfown(td, vp, uid, gid)
2559 	struct thread *td;
2560 	struct vnode *vp;
2561 	uid_t uid;
2562 	gid_t gid;
2563 {
2564 	int error;
2565 	struct mount *mp;
2566 	struct vattr vattr;
2567 
2568 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2569 		return (error);
2570 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2571 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2572 	VATTR_NULL(&vattr);
2573 	vattr.va_uid = uid;
2574 	vattr.va_gid = gid;
2575 #ifdef MAC
2576 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2577 	    vattr.va_gid);
2578 	if (error == 0)
2579 #endif
2580 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2581 	VOP_UNLOCK(vp, 0, td);
2582 	vn_finished_write(mp);
2583 	return (error);
2584 }
2585 
2586 /*
2587  * Set ownership given a path name.
2588  */
2589 #ifndef _SYS_SYSPROTO_H_
2590 struct chown_args {
2591 	char	*path;
2592 	int	uid;
2593 	int	gid;
2594 };
2595 #endif
2596 /* ARGSUSED */
2597 int
2598 chown(td, uap)
2599 	struct thread *td;
2600 	register struct chown_args /* {
2601 		char *path;
2602 		int uid;
2603 		int gid;
2604 	} */ *uap;
2605 {
2606 
2607 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2608 }
2609 
2610 int
2611 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2612     int gid)
2613 {
2614 	int error;
2615 	struct nameidata nd;
2616 
2617 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2618 	if ((error = namei(&nd)) != 0)
2619 		return (error);
2620 	NDFREE(&nd, NDF_ONLY_PNBUF);
2621 	error = setfown(td, nd.ni_vp, uid, gid);
2622 	vrele(nd.ni_vp);
2623 	return (error);
2624 }
2625 
2626 /*
2627  * Set ownership given a path name, do not cross symlinks.
2628  */
2629 #ifndef _SYS_SYSPROTO_H_
2630 struct lchown_args {
2631 	char	*path;
2632 	int	uid;
2633 	int	gid;
2634 };
2635 #endif
2636 /* ARGSUSED */
2637 int
2638 lchown(td, uap)
2639 	struct thread *td;
2640 	register struct lchown_args /* {
2641 		char *path;
2642 		int uid;
2643 		int gid;
2644 	} */ *uap;
2645 {
2646 
2647 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2648 }
2649 
2650 int
2651 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2652     int gid)
2653 {
2654 	int error;
2655 	struct nameidata nd;
2656 
2657 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2658 	if ((error = namei(&nd)) != 0)
2659 		return (error);
2660 	NDFREE(&nd, NDF_ONLY_PNBUF);
2661 	error = setfown(td, nd.ni_vp, uid, gid);
2662 	vrele(nd.ni_vp);
2663 	return (error);
2664 }
2665 
2666 /*
2667  * Set ownership given a file descriptor.
2668  */
2669 #ifndef _SYS_SYSPROTO_H_
2670 struct fchown_args {
2671 	int	fd;
2672 	int	uid;
2673 	int	gid;
2674 };
2675 #endif
2676 /* ARGSUSED */
2677 int
2678 fchown(td, uap)
2679 	struct thread *td;
2680 	register struct fchown_args /* {
2681 		int fd;
2682 		int uid;
2683 		int gid;
2684 	} */ *uap;
2685 {
2686 	struct file *fp;
2687 	int error;
2688 
2689 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2690 		return (error);
2691 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2692 	fdrop(fp, td);
2693 	return (error);
2694 }
2695 
2696 /*
2697  * Common implementation code for utimes(), lutimes(), and futimes().
2698  */
2699 static int
2700 getutimes(usrtvp, tvpseg, tsp)
2701 	const struct timeval *usrtvp;
2702 	enum uio_seg tvpseg;
2703 	struct timespec *tsp;
2704 {
2705 	struct timeval tv[2];
2706 	const struct timeval *tvp;
2707 	int error;
2708 
2709 	if (usrtvp == NULL) {
2710 		microtime(&tv[0]);
2711 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2712 		tsp[1] = tsp[0];
2713 	} else {
2714 		if (tvpseg == UIO_SYSSPACE) {
2715 			tvp = usrtvp;
2716 		} else {
2717 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2718 				return (error);
2719 			tvp = tv;
2720 		}
2721 
2722 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2723 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2724 	}
2725 	return (0);
2726 }
2727 
2728 /*
2729  * Common implementation code for utimes(), lutimes(), and futimes().
2730  */
2731 static int
2732 setutimes(td, vp, ts, numtimes, nullflag)
2733 	struct thread *td;
2734 	struct vnode *vp;
2735 	const struct timespec *ts;
2736 	int numtimes;
2737 	int nullflag;
2738 {
2739 	int error, setbirthtime;
2740 	struct mount *mp;
2741 	struct vattr vattr;
2742 
2743 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2744 		return (error);
2745 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2746 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2747 	setbirthtime = 0;
2748 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2749 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2750 		setbirthtime = 1;
2751 	VATTR_NULL(&vattr);
2752 	vattr.va_atime = ts[0];
2753 	vattr.va_mtime = ts[1];
2754 	if (setbirthtime)
2755 		vattr.va_birthtime = ts[1];
2756 	if (numtimes > 2)
2757 		vattr.va_birthtime = ts[2];
2758 	if (nullflag)
2759 		vattr.va_vaflags |= VA_UTIMES_NULL;
2760 #ifdef MAC
2761 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2762 	    vattr.va_mtime);
2763 #endif
2764 	if (error == 0)
2765 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2766 	VOP_UNLOCK(vp, 0, td);
2767 	vn_finished_write(mp);
2768 	return (error);
2769 }
2770 
2771 /*
2772  * Set the access and modification times of a file.
2773  */
2774 #ifndef _SYS_SYSPROTO_H_
2775 struct utimes_args {
2776 	char	*path;
2777 	struct	timeval *tptr;
2778 };
2779 #endif
2780 /* ARGSUSED */
2781 int
2782 utimes(td, uap)
2783 	struct thread *td;
2784 	register struct utimes_args /* {
2785 		char *path;
2786 		struct timeval *tptr;
2787 	} */ *uap;
2788 {
2789 
2790 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2791 	    UIO_USERSPACE));
2792 }
2793 
2794 int
2795 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2796     struct timeval *tptr, enum uio_seg tptrseg)
2797 {
2798 	struct timespec ts[2];
2799 	int error;
2800 	struct nameidata nd;
2801 
2802 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2803 		return (error);
2804 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2805 	if ((error = namei(&nd)) != 0)
2806 		return (error);
2807 	NDFREE(&nd, NDF_ONLY_PNBUF);
2808 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2809 	vrele(nd.ni_vp);
2810 	return (error);
2811 }
2812 
2813 /*
2814  * Set the access and modification times of a file.
2815  */
2816 #ifndef _SYS_SYSPROTO_H_
2817 struct lutimes_args {
2818 	char	*path;
2819 	struct	timeval *tptr;
2820 };
2821 #endif
2822 /* ARGSUSED */
2823 int
2824 lutimes(td, uap)
2825 	struct thread *td;
2826 	register struct lutimes_args /* {
2827 		char *path;
2828 		struct timeval *tptr;
2829 	} */ *uap;
2830 {
2831 
2832 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2833 	    UIO_USERSPACE));
2834 }
2835 
2836 int
2837 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2838     struct timeval *tptr, enum uio_seg tptrseg)
2839 {
2840 	struct timespec ts[2];
2841 	int error;
2842 	struct nameidata nd;
2843 
2844 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2845 		return (error);
2846 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2847 	if ((error = namei(&nd)) != 0)
2848 		return (error);
2849 	NDFREE(&nd, NDF_ONLY_PNBUF);
2850 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2851 	vrele(nd.ni_vp);
2852 	return (error);
2853 }
2854 
2855 /*
2856  * Set the access and modification times of a file.
2857  */
2858 #ifndef _SYS_SYSPROTO_H_
2859 struct futimes_args {
2860 	int	fd;
2861 	struct	timeval *tptr;
2862 };
2863 #endif
2864 /* ARGSUSED */
2865 int
2866 futimes(td, uap)
2867 	struct thread *td;
2868 	register struct futimes_args /* {
2869 		int  fd;
2870 		struct timeval *tptr;
2871 	} */ *uap;
2872 {
2873 
2874 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2875 }
2876 
2877 int
2878 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2879     enum uio_seg tptrseg)
2880 {
2881 	struct timespec ts[2];
2882 	struct file *fp;
2883 	int error;
2884 
2885 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2886 		return (error);
2887 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2888 		return (error);
2889 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2890 	fdrop(fp, td);
2891 	return (error);
2892 }
2893 
2894 /*
2895  * Truncate a file given its path name.
2896  */
2897 #ifndef _SYS_SYSPROTO_H_
2898 struct truncate_args {
2899 	char	*path;
2900 	int	pad;
2901 	off_t	length;
2902 };
2903 #endif
2904 /* ARGSUSED */
2905 int
2906 truncate(td, uap)
2907 	struct thread *td;
2908 	register struct truncate_args /* {
2909 		char *path;
2910 		int pad;
2911 		off_t length;
2912 	} */ *uap;
2913 {
2914 
2915 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2916 }
2917 
2918 int
2919 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2920 {
2921 	struct mount *mp;
2922 	struct vnode *vp;
2923 	struct vattr vattr;
2924 	int error;
2925 	struct nameidata nd;
2926 
2927 	if (length < 0)
2928 		return(EINVAL);
2929 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2930 	if ((error = namei(&nd)) != 0)
2931 		return (error);
2932 	vp = nd.ni_vp;
2933 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2934 		vrele(vp);
2935 		return (error);
2936 	}
2937 	NDFREE(&nd, NDF_ONLY_PNBUF);
2938 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2939 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2940 	if (vp->v_type == VDIR)
2941 		error = EISDIR;
2942 #ifdef MAC
2943 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2944 	}
2945 #endif
2946 	else if ((error = vn_writechk(vp)) == 0 &&
2947 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2948 		VATTR_NULL(&vattr);
2949 		vattr.va_size = length;
2950 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2951 	}
2952 	vput(vp);
2953 	vn_finished_write(mp);
2954 	return (error);
2955 }
2956 
2957 /*
2958  * Truncate a file given a file descriptor.
2959  */
2960 #ifndef _SYS_SYSPROTO_H_
2961 struct ftruncate_args {
2962 	int	fd;
2963 	int	pad;
2964 	off_t	length;
2965 };
2966 #endif
2967 /* ARGSUSED */
2968 int
2969 ftruncate(td, uap)
2970 	struct thread *td;
2971 	register struct ftruncate_args /* {
2972 		int fd;
2973 		int pad;
2974 		off_t length;
2975 	} */ *uap;
2976 {
2977 	struct mount *mp;
2978 	struct vattr vattr;
2979 	struct vnode *vp;
2980 	struct file *fp;
2981 	int error;
2982 
2983 	if (uap->length < 0)
2984 		return(EINVAL);
2985 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2986 		return (error);
2987 	if ((fp->f_flag & FWRITE) == 0) {
2988 		fdrop(fp, td);
2989 		return (EINVAL);
2990 	}
2991 	vp = fp->f_vnode;
2992 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2993 		fdrop(fp, td);
2994 		return (error);
2995 	}
2996 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2997 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2998 	if (vp->v_type == VDIR)
2999 		error = EISDIR;
3000 #ifdef MAC
3001 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3002 	    vp))) {
3003 	}
3004 #endif
3005 	else if ((error = vn_writechk(vp)) == 0) {
3006 		VATTR_NULL(&vattr);
3007 		vattr.va_size = uap->length;
3008 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3009 	}
3010 	VOP_UNLOCK(vp, 0, td);
3011 	vn_finished_write(mp);
3012 	fdrop(fp, td);
3013 	return (error);
3014 }
3015 
3016 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
3017 /*
3018  * Truncate a file given its path name.
3019  */
3020 #ifndef _SYS_SYSPROTO_H_
3021 struct otruncate_args {
3022 	char	*path;
3023 	long	length;
3024 };
3025 #endif
3026 /* ARGSUSED */
3027 int
3028 otruncate(td, uap)
3029 	struct thread *td;
3030 	register struct otruncate_args /* {
3031 		char *path;
3032 		long length;
3033 	} */ *uap;
3034 {
3035 	struct truncate_args /* {
3036 		char *path;
3037 		int pad;
3038 		off_t length;
3039 	} */ nuap;
3040 
3041 	nuap.path = uap->path;
3042 	nuap.length = uap->length;
3043 	return (truncate(td, &nuap));
3044 }
3045 
3046 /*
3047  * Truncate a file given a file descriptor.
3048  */
3049 #ifndef _SYS_SYSPROTO_H_
3050 struct oftruncate_args {
3051 	int	fd;
3052 	long	length;
3053 };
3054 #endif
3055 /* ARGSUSED */
3056 int
3057 oftruncate(td, uap)
3058 	struct thread *td;
3059 	register struct oftruncate_args /* {
3060 		int fd;
3061 		long length;
3062 	} */ *uap;
3063 {
3064 	struct ftruncate_args /* {
3065 		int fd;
3066 		int pad;
3067 		off_t length;
3068 	} */ nuap;
3069 
3070 	nuap.fd = uap->fd;
3071 	nuap.length = uap->length;
3072 	return (ftruncate(td, &nuap));
3073 }
3074 #endif /* COMPAT_43 || COMPAT_SUNOS */
3075 
3076 /*
3077  * Sync an open file.
3078  */
3079 #ifndef _SYS_SYSPROTO_H_
3080 struct fsync_args {
3081 	int	fd;
3082 };
3083 #endif
3084 /* ARGSUSED */
3085 int
3086 fsync(td, uap)
3087 	struct thread *td;
3088 	struct fsync_args /* {
3089 		int fd;
3090 	} */ *uap;
3091 {
3092 	struct vnode *vp;
3093 	struct mount *mp;
3094 	struct file *fp;
3095 	vm_object_t obj;
3096 	int error;
3097 
3098 	GIANT_REQUIRED;
3099 
3100 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3101 		return (error);
3102 	vp = fp->f_vnode;
3103 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3104 		fdrop(fp, td);
3105 		return (error);
3106 	}
3107 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3108 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
3109 		VM_OBJECT_LOCK(obj);
3110 		vm_object_page_clean(obj, 0, 0, 0);
3111 		VM_OBJECT_UNLOCK(obj);
3112 	}
3113 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
3114 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
3115 	    && softdep_fsync_hook != NULL)
3116 		error = (*softdep_fsync_hook)(vp);
3117 
3118 	VOP_UNLOCK(vp, 0, td);
3119 	vn_finished_write(mp);
3120 	fdrop(fp, td);
3121 	return (error);
3122 }
3123 
3124 /*
3125  * Rename files.  Source and destination must either both be directories,
3126  * or both not be directories.  If target is a directory, it must be empty.
3127  */
3128 #ifndef _SYS_SYSPROTO_H_
3129 struct rename_args {
3130 	char	*from;
3131 	char	*to;
3132 };
3133 #endif
3134 /* ARGSUSED */
3135 int
3136 rename(td, uap)
3137 	struct thread *td;
3138 	register struct rename_args /* {
3139 		char *from;
3140 		char *to;
3141 	} */ *uap;
3142 {
3143 
3144 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3145 }
3146 
3147 int
3148 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3149 {
3150 	struct mount *mp = NULL;
3151 	struct vnode *tvp, *fvp, *tdvp;
3152 	struct nameidata fromnd, tond;
3153 	int error;
3154 
3155 	bwillwrite();
3156 #ifdef MAC
3157 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
3158 	    from, td);
3159 #else
3160 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
3161 #endif
3162 	if ((error = namei(&fromnd)) != 0)
3163 		return (error);
3164 #ifdef MAC
3165 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3166 	    fromnd.ni_vp, &fromnd.ni_cnd);
3167 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3168 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3169 #endif
3170 	fvp = fromnd.ni_vp;
3171 	if (error == 0)
3172 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3173 	if (error != 0) {
3174 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3175 		vrele(fromnd.ni_dvp);
3176 		vrele(fvp);
3177 		goto out1;
3178 	}
3179 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3180 	    NOOBJ, pathseg, to, td);
3181 	if (fromnd.ni_vp->v_type == VDIR)
3182 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3183 	if ((error = namei(&tond)) != 0) {
3184 		/* Translate error code for rename("dir1", "dir2/."). */
3185 		if (error == EISDIR && fvp->v_type == VDIR)
3186 			error = EINVAL;
3187 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3188 		vrele(fromnd.ni_dvp);
3189 		vrele(fvp);
3190 		goto out1;
3191 	}
3192 	tdvp = tond.ni_dvp;
3193 	tvp = tond.ni_vp;
3194 	if (tvp != NULL) {
3195 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3196 			error = ENOTDIR;
3197 			goto out;
3198 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3199 			error = EISDIR;
3200 			goto out;
3201 		}
3202 	}
3203 	if (fvp == tdvp)
3204 		error = EINVAL;
3205 	/*
3206 	 * If the source is the same as the destination (that is, if they
3207 	 * are links to the same vnode), then there is nothing to do.
3208 	 */
3209 	if (fvp == tvp)
3210 		error = -1;
3211 #ifdef MAC
3212 	else
3213 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3214 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3215 #endif
3216 out:
3217 	if (!error) {
3218 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3219 		if (fromnd.ni_dvp != tdvp) {
3220 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3221 		}
3222 		if (tvp) {
3223 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3224 		}
3225 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3226 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3227 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3228 		NDFREE(&tond, NDF_ONLY_PNBUF);
3229 	} else {
3230 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3231 		NDFREE(&tond, NDF_ONLY_PNBUF);
3232 		if (tdvp == tvp)
3233 			vrele(tdvp);
3234 		else
3235 			vput(tdvp);
3236 		if (tvp)
3237 			vput(tvp);
3238 		vrele(fromnd.ni_dvp);
3239 		vrele(fvp);
3240 	}
3241 	vrele(tond.ni_startdir);
3242 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3243 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3244 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3245 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3246 out1:
3247 	vn_finished_write(mp);
3248 	if (fromnd.ni_startdir)
3249 		vrele(fromnd.ni_startdir);
3250 	if (error == -1)
3251 		return (0);
3252 	return (error);
3253 }
3254 
3255 /*
3256  * Make a directory file.
3257  */
3258 #ifndef _SYS_SYSPROTO_H_
3259 struct mkdir_args {
3260 	char	*path;
3261 	int	mode;
3262 };
3263 #endif
3264 /* ARGSUSED */
3265 int
3266 mkdir(td, uap)
3267 	struct thread *td;
3268 	register struct mkdir_args /* {
3269 		char *path;
3270 		int mode;
3271 	} */ *uap;
3272 {
3273 
3274 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3275 }
3276 
3277 int
3278 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3279 {
3280 	struct mount *mp;
3281 	struct vnode *vp;
3282 	struct vattr vattr;
3283 	int error;
3284 	struct nameidata nd;
3285 
3286 restart:
3287 	bwillwrite();
3288 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
3289 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3290 	if ((error = namei(&nd)) != 0)
3291 		return (error);
3292 	vp = nd.ni_vp;
3293 	if (vp != NULL) {
3294 		NDFREE(&nd, NDF_ONLY_PNBUF);
3295 		vrele(vp);
3296 		/*
3297 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3298 		 * the strange behaviour of leaving the vnode unlocked
3299 		 * if the target is the same vnode as the parent.
3300 		 */
3301 		if (vp == nd.ni_dvp)
3302 			vrele(nd.ni_dvp);
3303 		else
3304 			vput(nd.ni_dvp);
3305 		return (EEXIST);
3306 	}
3307 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3308 		NDFREE(&nd, NDF_ONLY_PNBUF);
3309 		vput(nd.ni_dvp);
3310 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3311 			return (error);
3312 		goto restart;
3313 	}
3314 	VATTR_NULL(&vattr);
3315 	vattr.va_type = VDIR;
3316 	FILEDESC_LOCK(td->td_proc->p_fd);
3317 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3318 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3319 #ifdef MAC
3320 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3321 	    &vattr);
3322 	if (error)
3323 		goto out;
3324 #endif
3325 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3326 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3327 #ifdef MAC
3328 out:
3329 #endif
3330 	NDFREE(&nd, NDF_ONLY_PNBUF);
3331 	vput(nd.ni_dvp);
3332 	if (!error)
3333 		vput(nd.ni_vp);
3334 	vn_finished_write(mp);
3335 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3336 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3337 	return (error);
3338 }
3339 
3340 /*
3341  * Remove a directory file.
3342  */
3343 #ifndef _SYS_SYSPROTO_H_
3344 struct rmdir_args {
3345 	char	*path;
3346 };
3347 #endif
3348 /* ARGSUSED */
3349 int
3350 rmdir(td, uap)
3351 	struct thread *td;
3352 	struct rmdir_args /* {
3353 		char *path;
3354 	} */ *uap;
3355 {
3356 
3357 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3358 }
3359 
3360 int
3361 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3362 {
3363 	struct mount *mp;
3364 	struct vnode *vp;
3365 	int error;
3366 	struct nameidata nd;
3367 
3368 restart:
3369 	bwillwrite();
3370 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3371 	if ((error = namei(&nd)) != 0)
3372 		return (error);
3373 	vp = nd.ni_vp;
3374 	if (vp->v_type != VDIR) {
3375 		error = ENOTDIR;
3376 		goto out;
3377 	}
3378 	/*
3379 	 * No rmdir "." please.
3380 	 */
3381 	if (nd.ni_dvp == vp) {
3382 		error = EINVAL;
3383 		goto out;
3384 	}
3385 	/*
3386 	 * The root of a mounted filesystem cannot be deleted.
3387 	 */
3388 	if (vp->v_vflag & VV_ROOT) {
3389 		error = EBUSY;
3390 		goto out;
3391 	}
3392 #ifdef MAC
3393 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3394 	    &nd.ni_cnd);
3395 	if (error)
3396 		goto out;
3397 #endif
3398 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3399 		NDFREE(&nd, NDF_ONLY_PNBUF);
3400 		if (nd.ni_dvp == vp)
3401 			vrele(nd.ni_dvp);
3402 		else
3403 			vput(nd.ni_dvp);
3404 		vput(vp);
3405 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3406 			return (error);
3407 		goto restart;
3408 	}
3409 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3410 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3411 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3412 	vn_finished_write(mp);
3413 out:
3414 	NDFREE(&nd, NDF_ONLY_PNBUF);
3415 	if (nd.ni_dvp == vp)
3416 		vrele(nd.ni_dvp);
3417 	else
3418 		vput(nd.ni_dvp);
3419 	vput(vp);
3420 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3421 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3422 	return (error);
3423 }
3424 
3425 #ifdef COMPAT_43
3426 /*
3427  * Read a block of directory entries in a filesystem independent format.
3428  */
3429 #ifndef _SYS_SYSPROTO_H_
3430 struct ogetdirentries_args {
3431 	int	fd;
3432 	char	*buf;
3433 	u_int	count;
3434 	long	*basep;
3435 };
3436 #endif
3437 int
3438 ogetdirentries(td, uap)
3439 	struct thread *td;
3440 	register struct ogetdirentries_args /* {
3441 		int fd;
3442 		char *buf;
3443 		u_int count;
3444 		long *basep;
3445 	} */ *uap;
3446 {
3447 	struct vnode *vp;
3448 	struct file *fp;
3449 	struct uio auio, kuio;
3450 	struct iovec aiov, kiov;
3451 	struct dirent *dp, *edp;
3452 	caddr_t dirbuf;
3453 	int error, eofflag, readcnt;
3454 	long loff;
3455 
3456 	/* XXX arbitrary sanity limit on `count'. */
3457 	if (uap->count > 64 * 1024)
3458 		return (EINVAL);
3459 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3460 		return (error);
3461 	if ((fp->f_flag & FREAD) == 0) {
3462 		fdrop(fp, td);
3463 		return (EBADF);
3464 	}
3465 	vp = fp->f_vnode;
3466 unionread:
3467 	if (vp->v_type != VDIR) {
3468 		fdrop(fp, td);
3469 		return (EINVAL);
3470 	}
3471 	aiov.iov_base = uap->buf;
3472 	aiov.iov_len = uap->count;
3473 	auio.uio_iov = &aiov;
3474 	auio.uio_iovcnt = 1;
3475 	auio.uio_rw = UIO_READ;
3476 	auio.uio_segflg = UIO_USERSPACE;
3477 	auio.uio_td = td;
3478 	auio.uio_resid = uap->count;
3479 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3480 	loff = auio.uio_offset = fp->f_offset;
3481 #ifdef MAC
3482 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3483 	if (error) {
3484 		VOP_UNLOCK(vp, 0, td);
3485 		fdrop(fp, td);
3486 		return (error);
3487 	}
3488 #endif
3489 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3490 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3491 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3492 			    NULL, NULL);
3493 			fp->f_offset = auio.uio_offset;
3494 		} else
3495 #	endif
3496 	{
3497 		kuio = auio;
3498 		kuio.uio_iov = &kiov;
3499 		kuio.uio_segflg = UIO_SYSSPACE;
3500 		kiov.iov_len = uap->count;
3501 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3502 		kiov.iov_base = dirbuf;
3503 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3504 			    NULL, NULL);
3505 		fp->f_offset = kuio.uio_offset;
3506 		if (error == 0) {
3507 			readcnt = uap->count - kuio.uio_resid;
3508 			edp = (struct dirent *)&dirbuf[readcnt];
3509 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3510 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3511 					/*
3512 					 * The expected low byte of
3513 					 * dp->d_namlen is our dp->d_type.
3514 					 * The high MBZ byte of dp->d_namlen
3515 					 * is our dp->d_namlen.
3516 					 */
3517 					dp->d_type = dp->d_namlen;
3518 					dp->d_namlen = 0;
3519 #				else
3520 					/*
3521 					 * The dp->d_type is the high byte
3522 					 * of the expected dp->d_namlen,
3523 					 * so must be zero'ed.
3524 					 */
3525 					dp->d_type = 0;
3526 #				endif
3527 				if (dp->d_reclen > 0) {
3528 					dp = (struct dirent *)
3529 					    ((char *)dp + dp->d_reclen);
3530 				} else {
3531 					error = EIO;
3532 					break;
3533 				}
3534 			}
3535 			if (dp >= edp)
3536 				error = uiomove(dirbuf, readcnt, &auio);
3537 		}
3538 		FREE(dirbuf, M_TEMP);
3539 	}
3540 	VOP_UNLOCK(vp, 0, td);
3541 	if (error) {
3542 		fdrop(fp, td);
3543 		return (error);
3544 	}
3545 	if (uap->count == auio.uio_resid) {
3546 		if (union_dircheckp) {
3547 			error = union_dircheckp(td, &vp, fp);
3548 			if (error == -1)
3549 				goto unionread;
3550 			if (error) {
3551 				fdrop(fp, td);
3552 				return (error);
3553 			}
3554 		}
3555 		/*
3556 		 * XXX We could delay dropping the lock above but
3557 		 * union_dircheckp complicates things.
3558 		 */
3559 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3560 		if ((vp->v_vflag & VV_ROOT) &&
3561 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3562 			struct vnode *tvp = vp;
3563 			vp = vp->v_mount->mnt_vnodecovered;
3564 			VREF(vp);
3565 			fp->f_vnode = vp;
3566 			fp->f_data = vp;
3567 			fp->f_offset = 0;
3568 			vput(tvp);
3569 			goto unionread;
3570 		}
3571 		VOP_UNLOCK(vp, 0, td);
3572 	}
3573 	error = copyout(&loff, uap->basep, sizeof(long));
3574 	fdrop(fp, td);
3575 	td->td_retval[0] = uap->count - auio.uio_resid;
3576 	return (error);
3577 }
3578 #endif /* COMPAT_43 */
3579 
3580 /*
3581  * Read a block of directory entries in a filesystem independent format.
3582  */
3583 #ifndef _SYS_SYSPROTO_H_
3584 struct getdirentries_args {
3585 	int	fd;
3586 	char	*buf;
3587 	u_int	count;
3588 	long	*basep;
3589 };
3590 #endif
3591 int
3592 getdirentries(td, uap)
3593 	struct thread *td;
3594 	register struct getdirentries_args /* {
3595 		int fd;
3596 		char *buf;
3597 		u_int count;
3598 		long *basep;
3599 	} */ *uap;
3600 {
3601 	struct vnode *vp;
3602 	struct file *fp;
3603 	struct uio auio;
3604 	struct iovec aiov;
3605 	long loff;
3606 	int error, eofflag;
3607 
3608 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3609 		return (error);
3610 	if ((fp->f_flag & FREAD) == 0) {
3611 		fdrop(fp, td);
3612 		return (EBADF);
3613 	}
3614 	vp = fp->f_vnode;
3615 unionread:
3616 	if (vp->v_type != VDIR) {
3617 		fdrop(fp, td);
3618 		return (EINVAL);
3619 	}
3620 	aiov.iov_base = uap->buf;
3621 	aiov.iov_len = uap->count;
3622 	auio.uio_iov = &aiov;
3623 	auio.uio_iovcnt = 1;
3624 	auio.uio_rw = UIO_READ;
3625 	auio.uio_segflg = UIO_USERSPACE;
3626 	auio.uio_td = td;
3627 	auio.uio_resid = uap->count;
3628 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3629 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3630 	loff = auio.uio_offset = fp->f_offset;
3631 #ifdef MAC
3632 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3633 	if (error == 0)
3634 #endif
3635 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3636 		    NULL);
3637 	fp->f_offset = auio.uio_offset;
3638 	VOP_UNLOCK(vp, 0, td);
3639 	if (error) {
3640 		fdrop(fp, td);
3641 		return (error);
3642 	}
3643 	if (uap->count == auio.uio_resid) {
3644 		if (union_dircheckp) {
3645 			error = union_dircheckp(td, &vp, fp);
3646 			if (error == -1)
3647 				goto unionread;
3648 			if (error) {
3649 				fdrop(fp, td);
3650 				return (error);
3651 			}
3652 		}
3653 		/*
3654 		 * XXX We could delay dropping the lock above but
3655 		 * union_dircheckp complicates things.
3656 		 */
3657 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3658 		if ((vp->v_vflag & VV_ROOT) &&
3659 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3660 			struct vnode *tvp = vp;
3661 			vp = vp->v_mount->mnt_vnodecovered;
3662 			VREF(vp);
3663 			fp->f_vnode = vp;
3664 			fp->f_data = vp;
3665 			fp->f_offset = 0;
3666 			vput(tvp);
3667 			goto unionread;
3668 		}
3669 		VOP_UNLOCK(vp, 0, td);
3670 	}
3671 	if (uap->basep != NULL) {
3672 		error = copyout(&loff, uap->basep, sizeof(long));
3673 	}
3674 	td->td_retval[0] = uap->count - auio.uio_resid;
3675 	fdrop(fp, td);
3676 	return (error);
3677 }
3678 #ifndef _SYS_SYSPROTO_H_
3679 struct getdents_args {
3680 	int fd;
3681 	char *buf;
3682 	size_t count;
3683 };
3684 #endif
3685 int
3686 getdents(td, uap)
3687 	struct thread *td;
3688 	register struct getdents_args /* {
3689 		int fd;
3690 		char *buf;
3691 		u_int count;
3692 	} */ *uap;
3693 {
3694 	struct getdirentries_args ap;
3695 	ap.fd = uap->fd;
3696 	ap.buf = uap->buf;
3697 	ap.count = uap->count;
3698 	ap.basep = NULL;
3699 	return (getdirentries(td, &ap));
3700 }
3701 
3702 /*
3703  * Set the mode mask for creation of filesystem nodes.
3704  *
3705  * MP SAFE
3706  */
3707 #ifndef _SYS_SYSPROTO_H_
3708 struct umask_args {
3709 	int	newmask;
3710 };
3711 #endif
3712 int
3713 umask(td, uap)
3714 	struct thread *td;
3715 	struct umask_args /* {
3716 		int newmask;
3717 	} */ *uap;
3718 {
3719 	register struct filedesc *fdp;
3720 
3721 	FILEDESC_LOCK(td->td_proc->p_fd);
3722 	fdp = td->td_proc->p_fd;
3723 	td->td_retval[0] = fdp->fd_cmask;
3724 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3725 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3726 	return (0);
3727 }
3728 
3729 /*
3730  * Void all references to file by ripping underlying filesystem
3731  * away from vnode.
3732  */
3733 #ifndef _SYS_SYSPROTO_H_
3734 struct revoke_args {
3735 	char	*path;
3736 };
3737 #endif
3738 /* ARGSUSED */
3739 int
3740 revoke(td, uap)
3741 	struct thread *td;
3742 	register struct revoke_args /* {
3743 		char *path;
3744 	} */ *uap;
3745 {
3746 	struct mount *mp;
3747 	struct vnode *vp;
3748 	struct vattr vattr;
3749 	int error;
3750 	struct nameidata nd;
3751 
3752 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3753 	if ((error = namei(&nd)) != 0)
3754 		return (error);
3755 	vp = nd.ni_vp;
3756 	NDFREE(&nd, NDF_ONLY_PNBUF);
3757 	if (vp->v_type != VCHR) {
3758 		vput(vp);
3759 		return (EINVAL);
3760 	}
3761 #ifdef MAC
3762 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3763 	if (error) {
3764 		vput(vp);
3765 		return (error);
3766 	}
3767 #endif
3768 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3769 	if (error) {
3770 		vput(vp);
3771 		return (error);
3772 	}
3773 	VOP_UNLOCK(vp, 0, td);
3774 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3775 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3776 		if (error)
3777 			goto out;
3778 	}
3779 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3780 		goto out;
3781 	if (vcount(vp) > 1)
3782 		VOP_REVOKE(vp, REVOKEALL);
3783 	vn_finished_write(mp);
3784 out:
3785 	vrele(vp);
3786 	return (error);
3787 }
3788 
3789 /*
3790  * Convert a user file descriptor to a kernel file entry.
3791  * A reference on the file entry is held upon returning.
3792  */
3793 int
3794 getvnode(fdp, fd, fpp)
3795 	struct filedesc *fdp;
3796 	int fd;
3797 	struct file **fpp;
3798 {
3799 	int error;
3800 	struct file *fp;
3801 
3802 	fp = NULL;
3803 	if (fdp == NULL)
3804 		error = EBADF;
3805 	else {
3806 		FILEDESC_LOCK(fdp);
3807 		if ((u_int)fd >= fdp->fd_nfiles ||
3808 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3809 			error = EBADF;
3810 		else if (fp->f_vnode == NULL) {
3811 			fp = NULL;
3812 			error = EINVAL;
3813 		} else {
3814 			fhold(fp);
3815 			error = 0;
3816 		}
3817 		FILEDESC_UNLOCK(fdp);
3818 	}
3819 	*fpp = fp;
3820 	return (error);
3821 }
3822 
3823 /*
3824  * Get (NFS) file handle
3825  */
3826 #ifndef _SYS_SYSPROTO_H_
3827 struct getfh_args {
3828 	char	*fname;
3829 	fhandle_t *fhp;
3830 };
3831 #endif
3832 int
3833 getfh(td, uap)
3834 	struct thread *td;
3835 	register struct getfh_args *uap;
3836 {
3837 	struct nameidata nd;
3838 	fhandle_t fh;
3839 	register struct vnode *vp;
3840 	int error;
3841 
3842 	/*
3843 	 * Must be super user
3844 	 */
3845 	error = suser(td);
3846 	if (error)
3847 		return (error);
3848 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3849 	error = namei(&nd);
3850 	if (error)
3851 		return (error);
3852 	NDFREE(&nd, NDF_ONLY_PNBUF);
3853 	vp = nd.ni_vp;
3854 	bzero(&fh, sizeof(fh));
3855 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3856 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3857 	vput(vp);
3858 	if (error)
3859 		return (error);
3860 	error = copyout(&fh, uap->fhp, sizeof (fh));
3861 	return (error);
3862 }
3863 
3864 /*
3865  * syscall for the rpc.lockd to use to translate a NFS file handle into
3866  * an open descriptor.
3867  *
3868  * warning: do not remove the suser() call or this becomes one giant
3869  * security hole.
3870  */
3871 #ifndef _SYS_SYSPROTO_H_
3872 struct fhopen_args {
3873 	const struct fhandle *u_fhp;
3874 	int flags;
3875 };
3876 #endif
3877 int
3878 fhopen(td, uap)
3879 	struct thread *td;
3880 	struct fhopen_args /* {
3881 		const struct fhandle *u_fhp;
3882 		int flags;
3883 	} */ *uap;
3884 {
3885 	struct proc *p = td->td_proc;
3886 	struct mount *mp;
3887 	struct vnode *vp;
3888 	struct fhandle fhp;
3889 	struct vattr vat;
3890 	struct vattr *vap = &vat;
3891 	struct flock lf;
3892 	struct file *fp;
3893 	register struct filedesc *fdp = p->p_fd;
3894 	int fmode, mode, error, type;
3895 	struct file *nfp;
3896 	int indx;
3897 
3898 	/*
3899 	 * Must be super user
3900 	 */
3901 	error = suser(td);
3902 	if (error)
3903 		return (error);
3904 
3905 	fmode = FFLAGS(uap->flags);
3906 	/* why not allow a non-read/write open for our lockd? */
3907 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3908 		return (EINVAL);
3909 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3910 	if (error)
3911 		return(error);
3912 	/* find the mount point */
3913 	mp = vfs_getvfs(&fhp.fh_fsid);
3914 	if (mp == NULL)
3915 		return (ESTALE);
3916 	/* now give me my vnode, it gets returned to me locked */
3917 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3918 	if (error)
3919 		return (error);
3920 	/*
3921 	 * from now on we have to make sure not
3922 	 * to forget about the vnode
3923 	 * any error that causes an abort must vput(vp)
3924 	 * just set error = err and 'goto bad;'.
3925 	 */
3926 
3927 	/*
3928 	 * from vn_open
3929 	 */
3930 	if (vp->v_type == VLNK) {
3931 		error = EMLINK;
3932 		goto bad;
3933 	}
3934 	if (vp->v_type == VSOCK) {
3935 		error = EOPNOTSUPP;
3936 		goto bad;
3937 	}
3938 	mode = 0;
3939 	if (fmode & (FWRITE | O_TRUNC)) {
3940 		if (vp->v_type == VDIR) {
3941 			error = EISDIR;
3942 			goto bad;
3943 		}
3944 		error = vn_writechk(vp);
3945 		if (error)
3946 			goto bad;
3947 		mode |= VWRITE;
3948 	}
3949 	if (fmode & FREAD)
3950 		mode |= VREAD;
3951 	if (fmode & O_APPEND)
3952 		mode |= VAPPEND;
3953 #ifdef MAC
3954 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3955 	if (error)
3956 		goto bad;
3957 #endif
3958 	if (mode) {
3959 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3960 		if (error)
3961 			goto bad;
3962 	}
3963 	if (fmode & O_TRUNC) {
3964 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3965 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3966 			vrele(vp);
3967 			return (error);
3968 		}
3969 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3970 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3971 #ifdef MAC
3972 		/*
3973 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3974 		 * should be right.
3975 		 */
3976 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3977 		if (error == 0) {
3978 #endif
3979 			VATTR_NULL(vap);
3980 			vap->va_size = 0;
3981 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3982 #ifdef MAC
3983 		}
3984 #endif
3985 		vn_finished_write(mp);
3986 		if (error)
3987 			goto bad;
3988 	}
3989 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
3990 	if (error)
3991 		goto bad;
3992 	/*
3993 	 * Make sure that a VM object is created for VMIO support.
3994 	 */
3995 	if (vn_canvmio(vp) == TRUE) {
3996 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3997 			goto bad;
3998 	}
3999 	if (fmode & FWRITE)
4000 		vp->v_writecount++;
4001 
4002 	/*
4003 	 * end of vn_open code
4004 	 */
4005 
4006 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4007 		if (fmode & FWRITE)
4008 			vp->v_writecount--;
4009 		goto bad;
4010 	}
4011 	/* An extra reference on `nfp' has been held for us by falloc(). */
4012 	fp = nfp;
4013 
4014 	nfp->f_vnode = vp;
4015 	nfp->f_data = vp;
4016 	nfp->f_flag = fmode & FMASK;
4017 	nfp->f_ops = &vnops;
4018 	nfp->f_type = DTYPE_VNODE;
4019 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4020 		lf.l_whence = SEEK_SET;
4021 		lf.l_start = 0;
4022 		lf.l_len = 0;
4023 		if (fmode & O_EXLOCK)
4024 			lf.l_type = F_WRLCK;
4025 		else
4026 			lf.l_type = F_RDLCK;
4027 		type = F_FLOCK;
4028 		if ((fmode & FNONBLOCK) == 0)
4029 			type |= F_WAIT;
4030 		VOP_UNLOCK(vp, 0, td);
4031 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4032 			    type)) != 0) {
4033 			/*
4034 			 * The lock request failed.  Normally close the
4035 			 * descriptor but handle the case where someone might
4036 			 * have dup()d or close()d it when we weren't looking.
4037 			 */
4038 			FILEDESC_LOCK(fdp);
4039 			if (fdp->fd_ofiles[indx] == fp) {
4040 				fdp->fd_ofiles[indx] = NULL;
4041 				fdunused(fdp, indx);
4042 				FILEDESC_UNLOCK(fdp);
4043 				fdrop(fp, td);
4044 			} else {
4045 				FILEDESC_UNLOCK(fdp);
4046 			}
4047 			/*
4048 			 * release our private reference
4049 			 */
4050 			fdrop(fp, td);
4051 			return(error);
4052 		}
4053 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4054 		fp->f_flag |= FHASLOCK;
4055 	}
4056 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
4057 		vfs_object_create(vp, td, td->td_ucred);
4058 
4059 	VOP_UNLOCK(vp, 0, td);
4060 	fdrop(fp, td);
4061 	td->td_retval[0] = indx;
4062 	return (0);
4063 
4064 bad:
4065 	vput(vp);
4066 	return (error);
4067 }
4068 
4069 /*
4070  * Stat an (NFS) file handle.
4071  */
4072 #ifndef _SYS_SYSPROTO_H_
4073 struct fhstat_args {
4074 	struct fhandle *u_fhp;
4075 	struct stat *sb;
4076 };
4077 #endif
4078 int
4079 fhstat(td, uap)
4080 	struct thread *td;
4081 	register struct fhstat_args /* {
4082 		struct fhandle *u_fhp;
4083 		struct stat *sb;
4084 	} */ *uap;
4085 {
4086 	struct stat sb;
4087 	fhandle_t fh;
4088 	struct mount *mp;
4089 	struct vnode *vp;
4090 	int error;
4091 
4092 	/*
4093 	 * Must be super user
4094 	 */
4095 	error = suser(td);
4096 	if (error)
4097 		return (error);
4098 
4099 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4100 	if (error)
4101 		return (error);
4102 
4103 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4104 		return (ESTALE);
4105 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4106 		return (error);
4107 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4108 	vput(vp);
4109 	if (error)
4110 		return (error);
4111 	error = copyout(&sb, uap->sb, sizeof(sb));
4112 	return (error);
4113 }
4114 
4115 /*
4116  * Implement fstatfs() for (NFS) file handles.
4117  */
4118 #ifndef _SYS_SYSPROTO_H_
4119 struct fhstatfs_args {
4120 	struct fhandle *u_fhp;
4121 	struct statfs *buf;
4122 };
4123 #endif
4124 int
4125 fhstatfs(td, uap)
4126 	struct thread *td;
4127 	struct fhstatfs_args /* {
4128 		struct fhandle *u_fhp;
4129 		struct statfs *buf;
4130 	} */ *uap;
4131 {
4132 	struct statfs *sp, sb;
4133 	struct mount *mp;
4134 	struct vnode *vp;
4135 	fhandle_t fh;
4136 	int error;
4137 
4138 	/*
4139 	 * Must be super user
4140 	 */
4141 	error = suser(td);
4142 	if (error)
4143 		return (error);
4144 
4145 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
4146 		return (error);
4147 
4148 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4149 		return (ESTALE);
4150 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4151 		return (error);
4152 	mp = vp->v_mount;
4153 	sp = &mp->mnt_stat;
4154 	vput(vp);
4155 #ifdef MAC
4156 	error = mac_check_mount_stat(td->td_ucred, mp);
4157 	if (error)
4158 		return (error);
4159 #endif
4160 	/*
4161 	 * Set these in case the underlying filesystem fails to do so.
4162 	 */
4163 	sp->f_version = STATFS_VERSION;
4164 	sp->f_namemax = NAME_MAX;
4165 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4166 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4167 		return (error);
4168 	if (suser(td)) {
4169 		bcopy(sp, &sb, sizeof(sb));
4170 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
4171 		sp = &sb;
4172 	}
4173 	return (copyout(sp, uap->buf, sizeof(*sp)));
4174 }
4175 
4176 /*
4177  * Syscall to push extended attribute configuration information into the
4178  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4179  * a command (int cmd), and attribute name and misc data.  For now, the
4180  * attribute name is left in userspace for consumption by the VFS_op.
4181  * It will probably be changed to be copied into sysspace by the
4182  * syscall in the future, once issues with various consumers of the
4183  * attribute code have raised their hands.
4184  *
4185  * Currently this is used only by UFS Extended Attributes.
4186  */
4187 int
4188 extattrctl(td, uap)
4189 	struct thread *td;
4190 	struct extattrctl_args /* {
4191 		const char *path;
4192 		int cmd;
4193 		const char *filename;
4194 		int attrnamespace;
4195 		const char *attrname;
4196 	} */ *uap;
4197 {
4198 	struct vnode *filename_vp;
4199 	struct nameidata nd;
4200 	struct mount *mp, *mp_writable;
4201 	char attrname[EXTATTR_MAXNAMELEN];
4202 	int error;
4203 
4204 	/*
4205 	 * uap->attrname is not always defined.  We check again later when we
4206 	 * invoke the VFS call so as to pass in NULL there if needed.
4207 	 */
4208 	if (uap->attrname != NULL) {
4209 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4210 		    NULL);
4211 		if (error)
4212 			return (error);
4213 	}
4214 
4215 	/*
4216 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4217 	 * which VFS_EXTATTRCTL() will later release.
4218 	 */
4219 	filename_vp = NULL;
4220 	if (uap->filename != NULL) {
4221 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4222 		    uap->filename, td);
4223 		error = namei(&nd);
4224 		if (error)
4225 			return (error);
4226 		filename_vp = nd.ni_vp;
4227 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4228 	}
4229 
4230 	/* uap->path is always defined. */
4231 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4232 	error = namei(&nd);
4233 	if (error) {
4234 		if (filename_vp != NULL)
4235 			vput(filename_vp);
4236 		return (error);
4237 	}
4238 	mp = nd.ni_vp->v_mount;
4239 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4240 	NDFREE(&nd, 0);
4241 	if (error) {
4242 		if (filename_vp != NULL)
4243 			vput(filename_vp);
4244 		return (error);
4245 	}
4246 
4247 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4248 	    uap->attrname != NULL ? attrname : NULL, td);
4249 
4250 	vn_finished_write(mp_writable);
4251 	/*
4252 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4253 	 * filename_vp, so vrele it if it is defined.
4254 	 */
4255 	if (filename_vp != NULL)
4256 		vrele(filename_vp);
4257 	return (error);
4258 }
4259 
4260 /*-
4261  * Set a named extended attribute on a file or directory
4262  *
4263  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4264  *            kernelspace string pointer "attrname", userspace buffer
4265  *            pointer "data", buffer length "nbytes", thread "td".
4266  * Returns: 0 on success, an error number otherwise
4267  * Locks: none
4268  * References: vp must be a valid reference for the duration of the call
4269  */
4270 static int
4271 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4272     void *data, size_t nbytes, struct thread *td)
4273 {
4274 	struct mount *mp;
4275 	struct uio auio;
4276 	struct iovec aiov;
4277 	ssize_t cnt;
4278 	int error;
4279 
4280 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4281 	if (error)
4282 		return (error);
4283 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4284 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4285 
4286 	aiov.iov_base = data;
4287 	aiov.iov_len = nbytes;
4288 	auio.uio_iov = &aiov;
4289 	auio.uio_iovcnt = 1;
4290 	auio.uio_offset = 0;
4291 	if (nbytes > INT_MAX) {
4292 		error = EINVAL;
4293 		goto done;
4294 	}
4295 	auio.uio_resid = nbytes;
4296 	auio.uio_rw = UIO_WRITE;
4297 	auio.uio_segflg = UIO_USERSPACE;
4298 	auio.uio_td = td;
4299 	cnt = nbytes;
4300 
4301 #ifdef MAC
4302 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4303 	    attrname, &auio);
4304 	if (error)
4305 		goto done;
4306 #endif
4307 
4308 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4309 	    td->td_ucred, td);
4310 	cnt -= auio.uio_resid;
4311 	td->td_retval[0] = cnt;
4312 
4313 done:
4314 	VOP_UNLOCK(vp, 0, td);
4315 	vn_finished_write(mp);
4316 	return (error);
4317 }
4318 
4319 int
4320 extattr_set_fd(td, uap)
4321 	struct thread *td;
4322 	struct extattr_set_fd_args /* {
4323 		int fd;
4324 		int attrnamespace;
4325 		const char *attrname;
4326 		void *data;
4327 		size_t nbytes;
4328 	} */ *uap;
4329 {
4330 	struct file *fp;
4331 	char attrname[EXTATTR_MAXNAMELEN];
4332 	int error;
4333 
4334 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4335 	if (error)
4336 		return (error);
4337 
4338 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4339 	if (error)
4340 		return (error);
4341 
4342 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4343 	    attrname, uap->data, uap->nbytes, td);
4344 	fdrop(fp, td);
4345 
4346 	return (error);
4347 }
4348 
4349 int
4350 extattr_set_file(td, uap)
4351 	struct thread *td;
4352 	struct extattr_set_file_args /* {
4353 		const char *path;
4354 		int attrnamespace;
4355 		const char *attrname;
4356 		void *data;
4357 		size_t nbytes;
4358 	} */ *uap;
4359 {
4360 	struct nameidata nd;
4361 	char attrname[EXTATTR_MAXNAMELEN];
4362 	int error;
4363 
4364 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4365 	if (error)
4366 		return (error);
4367 
4368 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4369 	error = namei(&nd);
4370 	if (error)
4371 		return (error);
4372 	NDFREE(&nd, NDF_ONLY_PNBUF);
4373 
4374 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4375 	    uap->data, uap->nbytes, td);
4376 
4377 	vrele(nd.ni_vp);
4378 	return (error);
4379 }
4380 
4381 int
4382 extattr_set_link(td, uap)
4383 	struct thread *td;
4384 	struct extattr_set_link_args /* {
4385 		const char *path;
4386 		int attrnamespace;
4387 		const char *attrname;
4388 		void *data;
4389 		size_t nbytes;
4390 	} */ *uap;
4391 {
4392 	struct nameidata nd;
4393 	char attrname[EXTATTR_MAXNAMELEN];
4394 	int error;
4395 
4396 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4397 	if (error)
4398 		return (error);
4399 
4400 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4401 	error = namei(&nd);
4402 	if (error)
4403 		return (error);
4404 	NDFREE(&nd, NDF_ONLY_PNBUF);
4405 
4406 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4407 	    uap->data, uap->nbytes, td);
4408 
4409 	vrele(nd.ni_vp);
4410 	return (error);
4411 }
4412 
4413 /*-
4414  * Get a named extended attribute on a file or directory
4415  *
4416  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4417  *            kernelspace string pointer "attrname", userspace buffer
4418  *            pointer "data", buffer length "nbytes", thread "td".
4419  * Returns: 0 on success, an error number otherwise
4420  * Locks: none
4421  * References: vp must be a valid reference for the duration of the call
4422  */
4423 static int
4424 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4425     void *data, size_t nbytes, struct thread *td)
4426 {
4427 	struct uio auio, *auiop;
4428 	struct iovec aiov;
4429 	ssize_t cnt;
4430 	size_t size, *sizep;
4431 	int error;
4432 
4433 	/*
4434 	 * XXX: Temporary API compatibility for applications that know
4435 	 * about this hack ("" means list), but haven't been updated
4436 	 * for the extattr_list_*() system calls yet.  This will go
4437 	 * away for FreeBSD 5.3.
4438 	 */
4439 	if (strlen(attrname) == 0)
4440 		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
4441 
4442 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4443 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4444 
4445 	/*
4446 	 * Slightly unusual semantics: if the user provides a NULL data
4447 	 * pointer, they don't want to receive the data, just the
4448 	 * maximum read length.
4449 	 */
4450 	auiop = NULL;
4451 	sizep = NULL;
4452 	cnt = 0;
4453 	if (data != NULL) {
4454 		aiov.iov_base = data;
4455 		aiov.iov_len = nbytes;
4456 		auio.uio_iov = &aiov;
4457 		auio.uio_offset = 0;
4458 		if (nbytes > INT_MAX) {
4459 			error = EINVAL;
4460 			goto done;
4461 		}
4462 		auio.uio_resid = nbytes;
4463 		auio.uio_rw = UIO_READ;
4464 		auio.uio_segflg = UIO_USERSPACE;
4465 		auio.uio_td = td;
4466 		auiop = &auio;
4467 		cnt = nbytes;
4468 	} else {
4469 		sizep = &size;
4470 	}
4471 
4472 #ifdef MAC
4473 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4474 	    attrname, &auio);
4475 	if (error)
4476 		goto done;
4477 #endif
4478 
4479 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4480 	    td->td_ucred, td);
4481 
4482 	if (auiop != NULL) {
4483 		cnt -= auio.uio_resid;
4484 		td->td_retval[0] = cnt;
4485 	} else {
4486 		td->td_retval[0] = size;
4487 	}
4488 
4489 done:
4490 	VOP_UNLOCK(vp, 0, td);
4491 	return (error);
4492 }
4493 
4494 int
4495 extattr_get_fd(td, uap)
4496 	struct thread *td;
4497 	struct extattr_get_fd_args /* {
4498 		int fd;
4499 		int attrnamespace;
4500 		const char *attrname;
4501 		void *data;
4502 		size_t nbytes;
4503 	} */ *uap;
4504 {
4505 	struct file *fp;
4506 	char attrname[EXTATTR_MAXNAMELEN];
4507 	int error;
4508 
4509 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4510 	if (error)
4511 		return (error);
4512 
4513 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4514 	if (error)
4515 		return (error);
4516 
4517 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4518 	    attrname, uap->data, uap->nbytes, td);
4519 
4520 	fdrop(fp, td);
4521 	return (error);
4522 }
4523 
4524 int
4525 extattr_get_file(td, uap)
4526 	struct thread *td;
4527 	struct extattr_get_file_args /* {
4528 		const char *path;
4529 		int attrnamespace;
4530 		const char *attrname;
4531 		void *data;
4532 		size_t nbytes;
4533 	} */ *uap;
4534 {
4535 	struct nameidata nd;
4536 	char attrname[EXTATTR_MAXNAMELEN];
4537 	int error;
4538 
4539 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4540 	if (error)
4541 		return (error);
4542 
4543 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4544 	error = namei(&nd);
4545 	if (error)
4546 		return (error);
4547 	NDFREE(&nd, NDF_ONLY_PNBUF);
4548 
4549 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4550 	    uap->data, uap->nbytes, td);
4551 
4552 	vrele(nd.ni_vp);
4553 	return (error);
4554 }
4555 
4556 int
4557 extattr_get_link(td, uap)
4558 	struct thread *td;
4559 	struct extattr_get_link_args /* {
4560 		const char *path;
4561 		int attrnamespace;
4562 		const char *attrname;
4563 		void *data;
4564 		size_t nbytes;
4565 	} */ *uap;
4566 {
4567 	struct nameidata nd;
4568 	char attrname[EXTATTR_MAXNAMELEN];
4569 	int error;
4570 
4571 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4572 	if (error)
4573 		return (error);
4574 
4575 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4576 	error = namei(&nd);
4577 	if (error)
4578 		return (error);
4579 	NDFREE(&nd, NDF_ONLY_PNBUF);
4580 
4581 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4582 	    uap->data, uap->nbytes, td);
4583 
4584 	vrele(nd.ni_vp);
4585 	return (error);
4586 }
4587 
4588 /*
4589  * extattr_delete_vp(): Delete a named extended attribute on a file or
4590  *                      directory
4591  *
4592  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4593  *            kernelspace string pointer "attrname", proc "p"
4594  * Returns: 0 on success, an error number otherwise
4595  * Locks: none
4596  * References: vp must be a valid reference for the duration of the call
4597  */
4598 static int
4599 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4600     struct thread *td)
4601 {
4602 	struct mount *mp;
4603 	int error;
4604 
4605 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4606 	if (error)
4607 		return (error);
4608 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4609 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4610 
4611 #ifdef MAC
4612 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4613 	    attrname);
4614 	if (error)
4615 		goto done;
4616 #endif
4617 
4618 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4619 	    td);
4620 	if (error == EOPNOTSUPP)
4621 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4622 		    td->td_ucred, td);
4623 #ifdef MAC
4624 done:
4625 #endif
4626 	VOP_UNLOCK(vp, 0, td);
4627 	vn_finished_write(mp);
4628 	return (error);
4629 }
4630 
4631 int
4632 extattr_delete_fd(td, uap)
4633 	struct thread *td;
4634 	struct extattr_delete_fd_args /* {
4635 		int fd;
4636 		int attrnamespace;
4637 		const char *attrname;
4638 	} */ *uap;
4639 {
4640 	struct file *fp;
4641 	struct vnode *vp;
4642 	char attrname[EXTATTR_MAXNAMELEN];
4643 	int error;
4644 
4645 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4646 	if (error)
4647 		return (error);
4648 
4649 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4650 	if (error)
4651 		return (error);
4652 	vp = fp->f_vnode;
4653 
4654 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4655 	fdrop(fp, td);
4656 	return (error);
4657 }
4658 
4659 int
4660 extattr_delete_file(td, uap)
4661 	struct thread *td;
4662 	struct extattr_delete_file_args /* {
4663 		const char *path;
4664 		int attrnamespace;
4665 		const char *attrname;
4666 	} */ *uap;
4667 {
4668 	struct nameidata nd;
4669 	char attrname[EXTATTR_MAXNAMELEN];
4670 	int error;
4671 
4672 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4673 	if (error)
4674 		return(error);
4675 
4676 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4677 	error = namei(&nd);
4678 	if (error)
4679 		return(error);
4680 	NDFREE(&nd, NDF_ONLY_PNBUF);
4681 
4682 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4683 	vrele(nd.ni_vp);
4684 	return(error);
4685 }
4686 
4687 int
4688 extattr_delete_link(td, uap)
4689 	struct thread *td;
4690 	struct extattr_delete_link_args /* {
4691 		const char *path;
4692 		int attrnamespace;
4693 		const char *attrname;
4694 	} */ *uap;
4695 {
4696 	struct nameidata nd;
4697 	char attrname[EXTATTR_MAXNAMELEN];
4698 	int error;
4699 
4700 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4701 	if (error)
4702 		return(error);
4703 
4704 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4705 	error = namei(&nd);
4706 	if (error)
4707 		return(error);
4708 	NDFREE(&nd, NDF_ONLY_PNBUF);
4709 
4710 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4711 	vrele(nd.ni_vp);
4712 	return(error);
4713 }
4714 
4715 /*-
4716  * Retrieve a list of extended attributes on a file or directory.
4717  *
4718  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4719  *            userspace buffer pointer "data", buffer length "nbytes",
4720  *            thread "td".
4721  * Returns: 0 on success, an error number otherwise
4722  * Locks: none
4723  * References: vp must be a valid reference for the duration of the call
4724  */
4725 static int
4726 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4727     size_t nbytes, struct thread *td)
4728 {
4729 	struct uio auio, *auiop;
4730 	size_t size, *sizep;
4731 	struct iovec aiov;
4732 	ssize_t cnt;
4733 	int error;
4734 
4735 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4736 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4737 
4738 	auiop = NULL;
4739 	sizep = NULL;
4740 	cnt = 0;
4741 	if (data != NULL) {
4742 		aiov.iov_base = data;
4743 		aiov.iov_len = nbytes;
4744 		auio.uio_iov = &aiov;
4745 		auio.uio_offset = 0;
4746 		if (nbytes > INT_MAX) {
4747 			error = EINVAL;
4748 			goto done;
4749 		}
4750 		auio.uio_resid = nbytes;
4751 		auio.uio_rw = UIO_READ;
4752 		auio.uio_segflg = UIO_USERSPACE;
4753 		auio.uio_td = td;
4754 		auiop = &auio;
4755 		cnt = nbytes;
4756 	} else {
4757 		sizep = &size;
4758 	}
4759 
4760 #ifdef MAC
4761 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4762 	if (error)
4763 		goto done;
4764 #endif
4765 
4766 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4767 	    td->td_ucred, td);
4768 
4769 	if (auiop != NULL) {
4770 		cnt -= auio.uio_resid;
4771 		td->td_retval[0] = cnt;
4772 	} else {
4773 		td->td_retval[0] = size;
4774 	}
4775 
4776 done:
4777 	VOP_UNLOCK(vp, 0, td);
4778 	return (error);
4779 }
4780 
4781 
4782 int
4783 extattr_list_fd(td, uap)
4784 	struct thread *td;
4785 	struct extattr_list_fd_args /* {
4786 		int fd;
4787 		int attrnamespace;
4788 		void *data;
4789 		size_t nbytes;
4790 	} */ *uap;
4791 {
4792 	struct file *fp;
4793 	int error;
4794 
4795 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4796 	if (error)
4797 		return (error);
4798 
4799 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4800 	    uap->nbytes, td);
4801 
4802 	fdrop(fp, td);
4803 	return (error);
4804 }
4805 
4806 int
4807 extattr_list_file(td, uap)
4808 	struct thread*td;
4809 	struct extattr_list_file_args /* {
4810 		const char *path;
4811 		int attrnamespace;
4812 		void *data;
4813 		size_t nbytes;
4814 	} */ *uap;
4815 {
4816 	struct nameidata nd;
4817 	int error;
4818 
4819 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4820 	error = namei(&nd);
4821 	if (error)
4822 		return (error);
4823 	NDFREE(&nd, NDF_ONLY_PNBUF);
4824 
4825 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4826 	    uap->nbytes, td);
4827 
4828 	vrele(nd.ni_vp);
4829 	return (error);
4830 }
4831 
4832 int
4833 extattr_list_link(td, uap)
4834 	struct thread*td;
4835 	struct extattr_list_link_args /* {
4836 		const char *path;
4837 		int attrnamespace;
4838 		void *data;
4839 		size_t nbytes;
4840 	} */ *uap;
4841 {
4842 	struct nameidata nd;
4843 	int error;
4844 
4845 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4846 	error = namei(&nd);
4847 	if (error)
4848 		return (error);
4849 	NDFREE(&nd, NDF_ONLY_PNBUF);
4850 
4851 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4852 	    uap->nbytes, td);
4853 
4854 	vrele(nd.ni_vp);
4855 	return (error);
4856 }
4857