xref: /freebsd/sys/kern/vfs_syscalls.c (revision ceaec73d406831b1251babb61675df0a1aa54a31)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/uma.h>
77 
78 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
79 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
80 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
81 static int setfmode(struct thread *td, struct vnode *, int);
82 static int setfflags(struct thread *td, struct vnode *, int);
83 static int setutimes(struct thread *td, struct vnode *,
84     const struct timespec *, int, int);
85 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
86     struct thread *td);
87 
88 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
89     size_t nbytes, struct thread *td);
90 
91 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92 
93 /*
94  * The module initialization routine for POSIX asynchronous I/O will
95  * set this to the version of AIO that it implements.  (Zero means
96  * that it is not implemented.)  This value is used here by pathconf()
97  * and in kern_descrip.c by fpathconf().
98  */
99 int async_io_version;
100 
101 /*
102  * Sync each mounted filesystem.
103  */
104 #ifndef _SYS_SYSPROTO_H_
105 struct sync_args {
106 	int     dummy;
107 };
108 #endif
109 
110 #ifdef DEBUG
111 static int syncprt = 0;
112 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
113 #endif
114 
115 /* ARGSUSED */
116 int
117 sync(td, uap)
118 	struct thread *td;
119 	struct sync_args *uap;
120 {
121 	struct mount *mp, *nmp;
122 	int asyncflag;
123 
124 	mtx_lock(&Giant);
125 	mtx_lock(&mountlist_mtx);
126 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
127 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
128 			nmp = TAILQ_NEXT(mp, mnt_list);
129 			continue;
130 		}
131 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
132 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
133 			asyncflag = mp->mnt_flag & MNT_ASYNC;
134 			mp->mnt_flag &= ~MNT_ASYNC;
135 			vfs_msync(mp, MNT_NOWAIT);
136 			VFS_SYNC(mp, MNT_NOWAIT, td);
137 			mp->mnt_flag |= asyncflag;
138 			vn_finished_write(mp);
139 		}
140 		mtx_lock(&mountlist_mtx);
141 		nmp = TAILQ_NEXT(mp, mnt_list);
142 		vfs_unbusy(mp, td);
143 	}
144 	mtx_unlock(&mountlist_mtx);
145 #if 0
146 /*
147  * XXX don't call vfs_bufstats() yet because that routine
148  * was not imported in the Lite2 merge.
149  */
150 #ifdef DIAGNOSTIC
151 	if (syncprt)
152 		vfs_bufstats();
153 #endif /* DIAGNOSTIC */
154 #endif
155 	mtx_unlock(&Giant);
156 	return (0);
157 }
158 
159 /* XXX PRISON: could be per prison flag */
160 static int prison_quotas;
161 #if 0
162 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163 #endif
164 
165 /*
166  * Change filesystem quotas.
167  */
168 #ifndef _SYS_SYSPROTO_H_
169 struct quotactl_args {
170 	char *path;
171 	int cmd;
172 	int uid;
173 	caddr_t arg;
174 };
175 #endif
176 int
177 quotactl(td, uap)
178 	struct thread *td;
179 	register struct quotactl_args /* {
180 		char *path;
181 		int cmd;
182 		int uid;
183 		caddr_t arg;
184 	} */ *uap;
185 {
186 	struct mount *mp, *vmp;
187 	int error;
188 	struct nameidata nd;
189 
190 	if (jailed(td->td_ucred) && !prison_quotas)
191 		return (EPERM);
192 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
193 	if ((error = namei(&nd)) != 0)
194 		return (error);
195 	NDFREE(&nd, NDF_ONLY_PNBUF);
196 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
197 	mp = nd.ni_vp->v_mount;
198 	vrele(nd.ni_vp);
199 	if (error)
200 		return (error);
201 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
202 	vn_finished_write(vmp);
203 	return (error);
204 }
205 
206 /*
207  * Get filesystem statistics.
208  */
209 #ifndef _SYS_SYSPROTO_H_
210 struct statfs_args {
211 	char *path;
212 	struct statfs *buf;
213 };
214 #endif
215 int
216 statfs(td, uap)
217 	struct thread *td;
218 	register struct statfs_args /* {
219 		char *path;
220 		struct statfs *buf;
221 	} */ *uap;
222 {
223 	struct statfs sf;
224 	int error;
225 
226 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
227 	if (error == 0)
228 		error = copyout(&sf, uap->buf, sizeof(sf));
229 	return (error);
230 }
231 
232 int
233 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
234     struct statfs *buf)
235 {
236 	struct mount *mp;
237 	struct statfs *sp, sb;
238 	int error;
239 	struct nameidata nd;
240 
241 	mtx_lock(&Giant);
242 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
243 	if ((error = namei(&nd)) != 0) {
244 		mtx_unlock(&Giant);
245 		return (error);
246 	}
247 	mp = nd.ni_vp->v_mount;
248 	sp = &mp->mnt_stat;
249 	NDFREE(&nd, NDF_ONLY_PNBUF);
250 	vrele(nd.ni_vp);
251 #ifdef MAC
252 	error = mac_check_mount_stat(td->td_ucred, mp);
253 	if (error) {
254 		mtx_unlock(&Giant);
255 		return (error);
256 	}
257 #endif
258 	/*
259 	 * Set these in case the underlying filesystem fails to do so.
260 	 */
261 	sp->f_version = STATFS_VERSION;
262 	sp->f_namemax = NAME_MAX;
263 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
264 	error = VFS_STATFS(mp, sp, td);
265 	mtx_unlock(&Giant);
266 	if (error)
267 		return (error);
268 	if (suser(td)) {
269 		bcopy(sp, &sb, sizeof(sb));
270 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
271 		sp = &sb;
272 	}
273 	*buf = *sp;
274 	return (0);
275 }
276 
277 /*
278  * Get filesystem statistics.
279  */
280 #ifndef _SYS_SYSPROTO_H_
281 struct fstatfs_args {
282 	int fd;
283 	struct statfs *buf;
284 };
285 #endif
286 int
287 fstatfs(td, uap)
288 	struct thread *td;
289 	register struct fstatfs_args /* {
290 		int fd;
291 		struct statfs *buf;
292 	} */ *uap;
293 {
294 	struct statfs sf;
295 	int error;
296 
297 	error = kern_fstatfs(td, uap->fd, &sf);
298 	if (error == 0)
299 		error = copyout(&sf, uap->buf, sizeof(sf));
300 	return (error);
301 }
302 
303 int
304 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
305 {
306 	struct file *fp;
307 	struct mount *mp;
308 	struct statfs *sp, sb;
309 	int error;
310 
311 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
312 		return (error);
313 	mtx_lock(&Giant);
314 	mp = fp->f_vnode->v_mount;
315 	fdrop(fp, td);
316 	if (mp == NULL) {
317 		mtx_unlock(&Giant);
318 		return (EBADF);
319 	}
320 #ifdef MAC
321 	error = mac_check_mount_stat(td->td_ucred, mp);
322 	if (error) {
323 		mtx_unlock(&Giant);
324 		return (error);
325 	}
326 #endif
327 	sp = &mp->mnt_stat;
328 	/*
329 	 * Set these in case the underlying filesystem fails to do so.
330 	 */
331 	sp->f_version = STATFS_VERSION;
332 	sp->f_namemax = NAME_MAX;
333 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
334 	error = VFS_STATFS(mp, sp, td);
335 	mtx_unlock(&Giant);
336 	if (error)
337 		return (error);
338 	if (suser(td)) {
339 		bcopy(sp, &sb, sizeof(sb));
340 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
341 		sp = &sb;
342 	}
343 	*buf = *sp;
344 	return (0);
345 }
346 
347 /*
348  * Get statistics on all filesystems.
349  */
350 #ifndef _SYS_SYSPROTO_H_
351 struct getfsstat_args {
352 	struct statfs *buf;
353 	long bufsize;
354 	int flags;
355 };
356 #endif
357 int
358 getfsstat(td, uap)
359 	struct thread *td;
360 	register struct getfsstat_args /* {
361 		struct statfs *buf;
362 		long bufsize;
363 		int flags;
364 	} */ *uap;
365 {
366 	struct mount *mp, *nmp;
367 	struct statfs *sp, sb;
368 	caddr_t sfsp;
369 	long count, maxcount, error;
370 
371 	maxcount = uap->bufsize / sizeof(struct statfs);
372 	sfsp = (caddr_t)uap->buf;
373 	count = 0;
374 	mtx_lock(&Giant);
375 	mtx_lock(&mountlist_mtx);
376 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
377 		if (!prison_check_mount(td->td_ucred, mp)) {
378 			nmp = TAILQ_NEXT(mp, mnt_list);
379 			continue;
380 		}
381 #ifdef MAC
382 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
383 			nmp = TAILQ_NEXT(mp, mnt_list);
384 			continue;
385 		}
386 #endif
387 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
388 			nmp = TAILQ_NEXT(mp, mnt_list);
389 			continue;
390 		}
391 		if (sfsp && count < maxcount) {
392 			sp = &mp->mnt_stat;
393 			/*
394 			 * Set these in case the underlying filesystem
395 			 * fails to do so.
396 			 */
397 			sp->f_version = STATFS_VERSION;
398 			sp->f_namemax = NAME_MAX;
399 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
400 			/*
401 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
402 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
403 			 * overrides MNT_WAIT.
404 			 */
405 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
406 			    (uap->flags & MNT_WAIT)) &&
407 			    (error = VFS_STATFS(mp, sp, td))) {
408 				mtx_lock(&mountlist_mtx);
409 				nmp = TAILQ_NEXT(mp, mnt_list);
410 				vfs_unbusy(mp, td);
411 				continue;
412 			}
413 			if (suser(td)) {
414 				bcopy(sp, &sb, sizeof(sb));
415 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
416 				sp = &sb;
417 			}
418 			error = copyout(sp, sfsp, sizeof(*sp));
419 			if (error) {
420 				vfs_unbusy(mp, td);
421 				mtx_unlock(&Giant);
422 				return (error);
423 			}
424 			sfsp += sizeof(*sp);
425 		}
426 		count++;
427 		mtx_lock(&mountlist_mtx);
428 		nmp = TAILQ_NEXT(mp, mnt_list);
429 		vfs_unbusy(mp, td);
430 	}
431 	mtx_unlock(&mountlist_mtx);
432 	mtx_unlock(&Giant);
433 	if (sfsp && count > maxcount)
434 		td->td_retval[0] = maxcount;
435 	else
436 		td->td_retval[0] = count;
437 	return (0);
438 }
439 
440 #ifdef COMPAT_FREEBSD4
441 /*
442  * Get old format filesystem statistics.
443  */
444 static void cvtstatfs(struct thread *, struct statfs *, struct ostatfs *);
445 
446 #ifndef _SYS_SYSPROTO_H_
447 struct freebsd4_statfs_args {
448 	char *path;
449 	struct ostatfs *buf;
450 };
451 #endif
452 int
453 freebsd4_statfs(td, uap)
454 	struct thread *td;
455 	struct freebsd4_statfs_args /* {
456 		char *path;
457 		struct ostatfs *buf;
458 	} */ *uap;
459 {
460 	struct ostatfs osb;
461 	struct statfs sf;
462 	int error;
463 
464 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
465 	if (error)
466 		return (error);
467 	cvtstatfs(td, &sf, &osb);
468 	return (copyout(&osb, uap->buf, sizeof(osb)));
469 }
470 
471 /*
472  * Get filesystem statistics.
473  */
474 #ifndef _SYS_SYSPROTO_H_
475 struct freebsd4_fstatfs_args {
476 	int fd;
477 	struct ostatfs *buf;
478 };
479 #endif
480 int
481 freebsd4_fstatfs(td, uap)
482 	struct thread *td;
483 	struct freebsd4_fstatfs_args /* {
484 		int fd;
485 		struct ostatfs *buf;
486 	} */ *uap;
487 {
488 	struct ostatfs osb;
489 	struct statfs sf;
490 	int error;
491 
492 	error = kern_fstatfs(td, uap->fd, &sf);
493 	if (error)
494 		return (error);
495 	cvtstatfs(td, &sf, &osb);
496 	return (copyout(&osb, uap->buf, sizeof(osb)));
497 }
498 
499 /*
500  * Get statistics on all filesystems.
501  */
502 #ifndef _SYS_SYSPROTO_H_
503 struct freebsd4_getfsstat_args {
504 	struct ostatfs *buf;
505 	long bufsize;
506 	int flags;
507 };
508 #endif
509 int
510 freebsd4_getfsstat(td, uap)
511 	struct thread *td;
512 	register struct freebsd4_getfsstat_args /* {
513 		struct ostatfs *buf;
514 		long bufsize;
515 		int flags;
516 	} */ *uap;
517 {
518 	struct mount *mp, *nmp;
519 	struct statfs *sp;
520 	struct ostatfs osb;
521 	caddr_t sfsp;
522 	long count, maxcount, error;
523 
524 	maxcount = uap->bufsize / sizeof(struct ostatfs);
525 	sfsp = (caddr_t)uap->buf;
526 	count = 0;
527 	mtx_lock(&mountlist_mtx);
528 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
529 		if (!prison_check_mount(td->td_ucred, mp)) {
530 			nmp = TAILQ_NEXT(mp, mnt_list);
531 			continue;
532 		}
533 #ifdef MAC
534 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
535 			nmp = TAILQ_NEXT(mp, mnt_list);
536 			continue;
537 		}
538 #endif
539 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
540 			nmp = TAILQ_NEXT(mp, mnt_list);
541 			continue;
542 		}
543 		if (sfsp && count < maxcount) {
544 			sp = &mp->mnt_stat;
545 			/*
546 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
547 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
548 			 * overrides MNT_WAIT.
549 			 */
550 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
551 			    (uap->flags & MNT_WAIT)) &&
552 			    (error = VFS_STATFS(mp, sp, td))) {
553 				mtx_lock(&mountlist_mtx);
554 				nmp = TAILQ_NEXT(mp, mnt_list);
555 				vfs_unbusy(mp, td);
556 				continue;
557 			}
558 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
559 			cvtstatfs(td, sp, &osb);
560 			error = copyout(&osb, sfsp, sizeof(osb));
561 			if (error) {
562 				vfs_unbusy(mp, td);
563 				return (error);
564 			}
565 			sfsp += sizeof(osb);
566 		}
567 		count++;
568 		mtx_lock(&mountlist_mtx);
569 		nmp = TAILQ_NEXT(mp, mnt_list);
570 		vfs_unbusy(mp, td);
571 	}
572 	mtx_unlock(&mountlist_mtx);
573 	if (sfsp && count > maxcount)
574 		td->td_retval[0] = maxcount;
575 	else
576 		td->td_retval[0] = count;
577 	return (0);
578 }
579 
580 /*
581  * Implement fstatfs() for (NFS) file handles.
582  */
583 #ifndef _SYS_SYSPROTO_H_
584 struct freebsd4_fhstatfs_args {
585 	struct fhandle *u_fhp;
586 	struct ostatfs *buf;
587 };
588 #endif
589 int
590 freebsd4_fhstatfs(td, uap)
591 	struct thread *td;
592 	struct freebsd4_fhstatfs_args /* {
593 		struct fhandle *u_fhp;
594 		struct ostatfs *buf;
595 	} */ *uap;
596 {
597 	struct ostatfs osb;
598 	struct statfs sf;
599 	fhandle_t fh;
600 	int error;
601 
602 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
603 		return (error);
604 	error = kern_fhstatfs(td, fh, &sf);
605 	if (error)
606 		return (error);
607 	cvtstatfs(td, &sf, &osb);
608 	return (copyout(&osb, uap->buf, sizeof(osb)));
609 }
610 
611 /*
612  * Convert a new format statfs structure to an old format statfs structure.
613  */
614 static void
615 cvtstatfs(td, nsp, osp)
616 	struct thread *td;
617 	struct statfs *nsp;
618 	struct ostatfs *osp;
619 {
620 
621 	bzero(osp, sizeof(*osp));
622 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
623 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
624 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
625 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
626 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
627 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
628 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
629 	osp->f_owner = nsp->f_owner;
630 	osp->f_type = nsp->f_type;
631 	osp->f_flags = nsp->f_flags;
632 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
633 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
634 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
635 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
636 	bcopy(nsp->f_fstypename, osp->f_fstypename,
637 	    MIN(MFSNAMELEN, OMNAMELEN));
638 	bcopy(nsp->f_mntonname, osp->f_mntonname,
639 	    MIN(MFSNAMELEN, OMNAMELEN));
640 	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
641 	    MIN(MFSNAMELEN, OMNAMELEN));
642 	if (suser(td)) {
643 		osp->f_fsid.val[0] = osp->f_fsid.val[1] = 0;
644 	} else {
645 		osp->f_fsid = nsp->f_fsid;
646 	}
647 }
648 #endif /* COMPAT_FREEBSD4 */
649 
650 /*
651  * Change current working directory to a given file descriptor.
652  */
653 #ifndef _SYS_SYSPROTO_H_
654 struct fchdir_args {
655 	int	fd;
656 };
657 #endif
658 int
659 fchdir(td, uap)
660 	struct thread *td;
661 	struct fchdir_args /* {
662 		int fd;
663 	} */ *uap;
664 {
665 	register struct filedesc *fdp = td->td_proc->p_fd;
666 	struct vnode *vp, *tdp, *vpold;
667 	struct mount *mp;
668 	struct file *fp;
669 	int vfslocked;
670 	int error;
671 
672 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
673 		return (error);
674 	vp = fp->f_vnode;
675 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
676 	VREF(vp);
677 	fdrop(fp, td);
678 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
679 	if (vp->v_type != VDIR)
680 		error = ENOTDIR;
681 #ifdef MAC
682 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
683 	}
684 #endif
685 	else
686 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
687 	while (!error && (mp = vp->v_mountedhere) != NULL) {
688 		int tvfslocked;
689 		if (vfs_busy(mp, 0, 0, td))
690 			continue;
691 		tvfslocked = VFS_LOCK_GIANT(mp);
692 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
693 		vfs_unbusy(mp, td);
694 		if (error) {
695 			VFS_UNLOCK_GIANT(tvfslocked);
696 			break;
697 		}
698 		vput(vp);
699 		VFS_UNLOCK_GIANT(vfslocked);
700 		vp = tdp;
701 		vfslocked = tvfslocked;
702 	}
703 	if (error) {
704 		vput(vp);
705 		VFS_UNLOCK_GIANT(vfslocked);
706 		return (error);
707 	}
708 	VOP_UNLOCK(vp, 0, td);
709 	FILEDESC_LOCK_FAST(fdp);
710 	vpold = fdp->fd_cdir;
711 	fdp->fd_cdir = vp;
712 	FILEDESC_UNLOCK_FAST(fdp);
713 	vrele(vpold);
714 	VFS_UNLOCK_GIANT(vfslocked);
715 	return (0);
716 }
717 
718 /*
719  * Change current working directory (``.'').
720  */
721 #ifndef _SYS_SYSPROTO_H_
722 struct chdir_args {
723 	char	*path;
724 };
725 #endif
726 int
727 chdir(td, uap)
728 	struct thread *td;
729 	struct chdir_args /* {
730 		char *path;
731 	} */ *uap;
732 {
733 
734 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
735 }
736 
737 int
738 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
739 {
740 	register struct filedesc *fdp = td->td_proc->p_fd;
741 	int error;
742 	struct nameidata nd;
743 	struct vnode *vp;
744 	int vfslocked;
745 
746 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
747 	if ((error = namei(&nd)) != 0)
748 		return (error);
749 	vfslocked = NDHASGIANT(&nd);
750 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
751 		vput(nd.ni_vp);
752 		VFS_UNLOCK_GIANT(vfslocked);
753 		NDFREE(&nd, NDF_ONLY_PNBUF);
754 		return (error);
755 	}
756 	VOP_UNLOCK(nd.ni_vp, 0, td);
757 	NDFREE(&nd, NDF_ONLY_PNBUF);
758 	FILEDESC_LOCK_FAST(fdp);
759 	vp = fdp->fd_cdir;
760 	fdp->fd_cdir = nd.ni_vp;
761 	FILEDESC_UNLOCK_FAST(fdp);
762 	vrele(vp);
763 	VFS_UNLOCK_GIANT(vfslocked);
764 	return (0);
765 }
766 
767 /*
768  * Helper function for raised chroot(2) security function:  Refuse if
769  * any filedescriptors are open directories.
770  */
771 static int
772 chroot_refuse_vdir_fds(fdp)
773 	struct filedesc *fdp;
774 {
775 	struct vnode *vp;
776 	struct file *fp;
777 	int fd;
778 
779 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
780 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
781 		fp = fget_locked(fdp, fd);
782 		if (fp == NULL)
783 			continue;
784 		if (fp->f_type == DTYPE_VNODE) {
785 			vp = fp->f_vnode;
786 			if (vp->v_type == VDIR)
787 				return (EPERM);
788 		}
789 	}
790 	return (0);
791 }
792 
793 /*
794  * This sysctl determines if we will allow a process to chroot(2) if it
795  * has a directory open:
796  *	0: disallowed for all processes.
797  *	1: allowed for processes that were not already chroot(2)'ed.
798  *	2: allowed for all processes.
799  */
800 
801 static int chroot_allow_open_directories = 1;
802 
803 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
804      &chroot_allow_open_directories, 0, "");
805 
806 /*
807  * Change notion of root (``/'') directory.
808  */
809 #ifndef _SYS_SYSPROTO_H_
810 struct chroot_args {
811 	char	*path;
812 };
813 #endif
814 int
815 chroot(td, uap)
816 	struct thread *td;
817 	struct chroot_args /* {
818 		char *path;
819 	} */ *uap;
820 {
821 	int error;
822 	struct nameidata nd;
823 	int vfslocked;
824 
825 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
826 	if (error)
827 		return (error);
828 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
829 	    UIO_USERSPACE, uap->path, td);
830 	error = namei(&nd);
831 	if (error)
832 		goto error;
833 	vfslocked = NDHASGIANT(&nd);
834 	if ((error = change_dir(nd.ni_vp, td)) != 0)
835 		goto e_vunlock;
836 #ifdef MAC
837 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
838 		goto e_vunlock;
839 #endif
840 	VOP_UNLOCK(nd.ni_vp, 0, td);
841 	error = change_root(nd.ni_vp, td);
842 	vrele(nd.ni_vp);
843 	VFS_UNLOCK_GIANT(vfslocked);
844 	NDFREE(&nd, NDF_ONLY_PNBUF);
845 	return (error);
846 e_vunlock:
847 	vput(nd.ni_vp);
848 	VFS_UNLOCK_GIANT(vfslocked);
849 error:
850 	NDFREE(&nd, NDF_ONLY_PNBUF);
851 	return (error);
852 }
853 
854 /*
855  * Common routine for chroot and chdir.  Callers must provide a locked vnode
856  * instance.
857  */
858 int
859 change_dir(vp, td)
860 	struct vnode *vp;
861 	struct thread *td;
862 {
863 	int error;
864 
865 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
866 	if (vp->v_type != VDIR)
867 		return (ENOTDIR);
868 #ifdef MAC
869 	error = mac_check_vnode_chdir(td->td_ucred, vp);
870 	if (error)
871 		return (error);
872 #endif
873 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
874 	return (error);
875 }
876 
877 /*
878  * Common routine for kern_chroot() and jail_attach().  The caller is
879  * responsible for invoking suser() and mac_check_chroot() to authorize this
880  * operation.
881  */
882 int
883 change_root(vp, td)
884 	struct vnode *vp;
885 	struct thread *td;
886 {
887 	struct filedesc *fdp;
888 	struct vnode *oldvp;
889 	int error;
890 
891 	VFS_ASSERT_GIANT(vp->v_mount);
892 	fdp = td->td_proc->p_fd;
893 	FILEDESC_LOCK(fdp);
894 	if (chroot_allow_open_directories == 0 ||
895 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
896 		error = chroot_refuse_vdir_fds(fdp);
897 		if (error) {
898 			FILEDESC_UNLOCK(fdp);
899 			return (error);
900 		}
901 	}
902 	oldvp = fdp->fd_rdir;
903 	fdp->fd_rdir = vp;
904 	VREF(fdp->fd_rdir);
905 	if (!fdp->fd_jdir) {
906 		fdp->fd_jdir = vp;
907 		VREF(fdp->fd_jdir);
908 	}
909 	FILEDESC_UNLOCK(fdp);
910 	vrele(oldvp);
911 	return (0);
912 }
913 
914 /*
915  * Check permissions, allocate an open file structure,
916  * and call the device open routine if any.
917  *
918  * MP SAFE
919  */
920 #ifndef _SYS_SYSPROTO_H_
921 struct open_args {
922 	char	*path;
923 	int	flags;
924 	int	mode;
925 };
926 #endif
927 int
928 open(td, uap)
929 	struct thread *td;
930 	register struct open_args /* {
931 		char *path;
932 		int flags;
933 		int mode;
934 	} */ *uap;
935 {
936 	int error;
937 
938 	error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
939 	if (mtx_owned(&Giant))
940 		printf("open: %s: %d\n", uap->path, error);
941 	return (error);
942 }
943 
944 int
945 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
946     int mode)
947 {
948 	struct proc *p = td->td_proc;
949 	struct filedesc *fdp = p->p_fd;
950 	struct file *fp;
951 	struct vnode *vp;
952 	struct vattr vat;
953 	struct mount *mp;
954 	int cmode;
955 	struct file *nfp;
956 	int type, indx, error;
957 	struct flock lf;
958 	struct nameidata nd;
959 	int vfslocked;
960 
961 	if ((flags & O_ACCMODE) == O_ACCMODE)
962 		return (EINVAL);
963 	flags = FFLAGS(flags);
964 	error = falloc(td, &nfp, &indx);
965 	if (error)
966 		return (error);
967 	/* An extra reference on `nfp' has been held for us by falloc(). */
968 	fp = nfp;
969 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
970 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
971 	td->td_dupfd = -1;		/* XXX check for fdopen */
972 	error = vn_open(&nd, &flags, cmode, indx);
973 	if (error) {
974 		/*
975 		 * If the vn_open replaced the method vector, something
976 		 * wonderous happened deep below and we just pass it up
977 		 * pretending we know what we do.
978 		 */
979 		if (error == ENXIO && fp->f_ops != &badfileops) {
980 			fdrop(fp, td);
981 			td->td_retval[0] = indx;
982 			return (0);
983 		}
984 
985 		/*
986 		 * release our own reference
987 		 */
988 		fdrop(fp, td);
989 
990 		/*
991 		 * handle special fdopen() case.  bleh.  dupfdopen() is
992 		 * responsible for dropping the old contents of ofiles[indx]
993 		 * if it succeeds.
994 		 */
995 		if ((error == ENODEV || error == ENXIO) &&
996 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
997 		    (error =
998 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
999 			td->td_retval[0] = indx;
1000 			return (0);
1001 		}
1002 		/*
1003 		 * Clean up the descriptor, but only if another thread hadn't
1004 		 * replaced or closed it.
1005 		 */
1006 		fdclose(fdp, fp, indx, td);
1007 
1008 		if (error == ERESTART)
1009 			error = EINTR;
1010 		return (error);
1011 	}
1012 	td->td_dupfd = 0;
1013 	vfslocked = NDHASGIANT(&nd);
1014 	NDFREE(&nd, NDF_ONLY_PNBUF);
1015 	vp = nd.ni_vp;
1016 
1017 	/*
1018 	 * There should be 2 references on the file, one from the descriptor
1019 	 * table, and one for us.
1020 	 *
1021 	 * Handle the case where someone closed the file (via its file
1022 	 * descriptor) while we were blocked.  The end result should look
1023 	 * like opening the file succeeded but it was immediately closed.
1024 	 * We call vn_close() manually because we haven't yet hooked up
1025 	 * the various 'struct file' fields.
1026 	 */
1027 	FILEDESC_LOCK(fdp);
1028 	FILE_LOCK(fp);
1029 	if (fp->f_count == 1) {
1030 		mp = vp->v_mount;
1031 		KASSERT(fdp->fd_ofiles[indx] != fp,
1032 		    ("Open file descriptor lost all refs"));
1033 		FILE_UNLOCK(fp);
1034 		FILEDESC_UNLOCK(fdp);
1035 		VOP_UNLOCK(vp, 0, td);
1036 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1037 		VFS_UNLOCK_GIANT(vfslocked);
1038 		fdrop(fp, td);
1039 		td->td_retval[0] = indx;
1040 		return (0);
1041 	}
1042 	fp->f_vnode = vp;
1043 	if (fp->f_data == NULL)
1044 		fp->f_data = vp;
1045 	fp->f_flag = flags & FMASK;
1046 	if (fp->f_ops == &badfileops)
1047 		fp->f_ops = &vnops;
1048 	fp->f_seqcount = 1;
1049 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1050 	FILE_UNLOCK(fp);
1051 	FILEDESC_UNLOCK(fdp);
1052 
1053 	VOP_UNLOCK(vp, 0, td);
1054 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1055 		lf.l_whence = SEEK_SET;
1056 		lf.l_start = 0;
1057 		lf.l_len = 0;
1058 		if (flags & O_EXLOCK)
1059 			lf.l_type = F_WRLCK;
1060 		else
1061 			lf.l_type = F_RDLCK;
1062 		type = F_FLOCK;
1063 		if ((flags & FNONBLOCK) == 0)
1064 			type |= F_WAIT;
1065 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1066 			    type)) != 0)
1067 			goto bad;
1068 		fp->f_flag |= FHASLOCK;
1069 	}
1070 	if (flags & O_TRUNC) {
1071 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1072 			goto bad;
1073 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1074 		VATTR_NULL(&vat);
1075 		vat.va_size = 0;
1076 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1077 #ifdef MAC
1078 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1079 		if (error == 0)
1080 #endif
1081 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1082 		VOP_UNLOCK(vp, 0, td);
1083 		vn_finished_write(mp);
1084 		if (error)
1085 			goto bad;
1086 	}
1087 	VFS_UNLOCK_GIANT(vfslocked);
1088 	/*
1089 	 * Release our private reference, leaving the one associated with
1090 	 * the descriptor table intact.
1091 	 */
1092 	fdrop(fp, td);
1093 	td->td_retval[0] = indx;
1094 	return (0);
1095 bad:
1096 	VFS_UNLOCK_GIANT(vfslocked);
1097 	fdclose(fdp, fp, indx, td);
1098 	fdrop(fp, td);
1099 	return (error);
1100 }
1101 
1102 #ifdef COMPAT_43
1103 /*
1104  * Create a file.
1105  *
1106  * MP SAFE
1107  */
1108 #ifndef _SYS_SYSPROTO_H_
1109 struct ocreat_args {
1110 	char	*path;
1111 	int	mode;
1112 };
1113 #endif
1114 int
1115 ocreat(td, uap)
1116 	struct thread *td;
1117 	register struct ocreat_args /* {
1118 		char *path;
1119 		int mode;
1120 	} */ *uap;
1121 {
1122 
1123 	return (kern_open(td, uap->path, UIO_USERSPACE,
1124 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1125 }
1126 #endif /* COMPAT_43 */
1127 
1128 /*
1129  * Create a special file.
1130  */
1131 #ifndef _SYS_SYSPROTO_H_
1132 struct mknod_args {
1133 	char	*path;
1134 	int	mode;
1135 	int	dev;
1136 };
1137 #endif
1138 int
1139 mknod(td, uap)
1140 	struct thread *td;
1141 	register struct mknod_args /* {
1142 		char *path;
1143 		int mode;
1144 		int dev;
1145 	} */ *uap;
1146 {
1147 
1148 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1149 }
1150 
1151 int
1152 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1153     int dev)
1154 {
1155 	struct vnode *vp;
1156 	struct mount *mp;
1157 	struct vattr vattr;
1158 	int error;
1159 	int whiteout = 0;
1160 	struct nameidata nd;
1161 	int vfslocked;
1162 
1163 	switch (mode & S_IFMT) {
1164 	case S_IFCHR:
1165 	case S_IFBLK:
1166 		error = suser(td);
1167 		break;
1168 	default:
1169 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1170 		break;
1171 	}
1172 	if (error)
1173 		return (error);
1174 restart:
1175 	bwillwrite();
1176 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1177 	if ((error = namei(&nd)) != 0)
1178 		return (error);
1179 	vfslocked = NDHASGIANT(&nd);
1180 	vp = nd.ni_vp;
1181 	if (vp != NULL) {
1182 		NDFREE(&nd, NDF_ONLY_PNBUF);
1183 		vrele(vp);
1184 		if (vp == nd.ni_dvp)
1185 			vrele(nd.ni_dvp);
1186 		else
1187 			vput(nd.ni_dvp);
1188 		VFS_UNLOCK_GIANT(vfslocked);
1189 		return (EEXIST);
1190 	} else {
1191 		VATTR_NULL(&vattr);
1192 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1193 		vattr.va_mode = (mode & ALLPERMS) &
1194 		    ~td->td_proc->p_fd->fd_cmask;
1195 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1196 		vattr.va_rdev = dev;
1197 		whiteout = 0;
1198 
1199 		switch (mode & S_IFMT) {
1200 		case S_IFMT:	/* used by badsect to flag bad sectors */
1201 			vattr.va_type = VBAD;
1202 			break;
1203 		case S_IFCHR:
1204 			vattr.va_type = VCHR;
1205 			break;
1206 		case S_IFBLK:
1207 			vattr.va_type = VBLK;
1208 			break;
1209 		case S_IFWHT:
1210 			whiteout = 1;
1211 			break;
1212 		default:
1213 			error = EINVAL;
1214 			break;
1215 		}
1216 	}
1217 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1218 		NDFREE(&nd, NDF_ONLY_PNBUF);
1219 		vput(nd.ni_dvp);
1220 		VFS_UNLOCK_GIANT(vfslocked);
1221 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1222 			return (error);
1223 		goto restart;
1224 	}
1225 #ifdef MAC
1226 	if (error == 0 && !whiteout)
1227 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1228 		    &nd.ni_cnd, &vattr);
1229 #endif
1230 	if (!error) {
1231 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1232 		if (whiteout)
1233 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1234 		else {
1235 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1236 						&nd.ni_cnd, &vattr);
1237 			if (error == 0)
1238 				vput(nd.ni_vp);
1239 		}
1240 	}
1241 	NDFREE(&nd, NDF_ONLY_PNBUF);
1242 	vput(nd.ni_dvp);
1243 	vn_finished_write(mp);
1244 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1245 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1246 	VFS_UNLOCK_GIANT(vfslocked);
1247 	return (error);
1248 }
1249 
1250 /*
1251  * Create a named pipe.
1252  */
1253 #ifndef _SYS_SYSPROTO_H_
1254 struct mkfifo_args {
1255 	char	*path;
1256 	int	mode;
1257 };
1258 #endif
1259 int
1260 mkfifo(td, uap)
1261 	struct thread *td;
1262 	register struct mkfifo_args /* {
1263 		char *path;
1264 		int mode;
1265 	} */ *uap;
1266 {
1267 
1268 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1269 }
1270 
1271 int
1272 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1273 {
1274 	struct mount *mp;
1275 	struct vattr vattr;
1276 	int error;
1277 	struct nameidata nd;
1278 	int vfslocked;
1279 
1280 restart:
1281 	bwillwrite();
1282 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1283 	if ((error = namei(&nd)) != 0)
1284 		return (error);
1285 	vfslocked = NDHASGIANT(&nd);
1286 	if (nd.ni_vp != NULL) {
1287 		NDFREE(&nd, NDF_ONLY_PNBUF);
1288 		vrele(nd.ni_vp);
1289 		if (nd.ni_vp == nd.ni_dvp)
1290 			vrele(nd.ni_dvp);
1291 		else
1292 			vput(nd.ni_dvp);
1293 		VFS_UNLOCK_GIANT(vfslocked);
1294 		return (EEXIST);
1295 	}
1296 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1297 		NDFREE(&nd, NDF_ONLY_PNBUF);
1298 		vput(nd.ni_dvp);
1299 		VFS_UNLOCK_GIANT(vfslocked);
1300 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1301 			return (error);
1302 		goto restart;
1303 	}
1304 	VATTR_NULL(&vattr);
1305 	vattr.va_type = VFIFO;
1306 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1307 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1308 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1309 #ifdef MAC
1310 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1311 	    &vattr);
1312 	if (error)
1313 		goto out;
1314 #endif
1315 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1316 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1317 	if (error == 0)
1318 		vput(nd.ni_vp);
1319 #ifdef MAC
1320 out:
1321 #endif
1322 	vput(nd.ni_dvp);
1323 	vn_finished_write(mp);
1324 	VFS_UNLOCK_GIANT(vfslocked);
1325 	NDFREE(&nd, NDF_ONLY_PNBUF);
1326 	return (error);
1327 }
1328 
1329 /*
1330  * Make a hard file link.
1331  */
1332 #ifndef _SYS_SYSPROTO_H_
1333 struct link_args {
1334 	char	*path;
1335 	char	*link;
1336 };
1337 #endif
1338 int
1339 link(td, uap)
1340 	struct thread *td;
1341 	register struct link_args /* {
1342 		char *path;
1343 		char *link;
1344 	} */ *uap;
1345 {
1346 	int error;
1347 
1348 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1349 	return (error);
1350 }
1351 
1352 SYSCTL_DECL(_security_bsd);
1353 
1354 static int hardlink_check_uid = 0;
1355 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1356     &hardlink_check_uid, 0,
1357     "Unprivileged processes cannot create hard links to files owned by other "
1358     "users");
1359 static int hardlink_check_gid = 0;
1360 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1361     &hardlink_check_gid, 0,
1362     "Unprivileged processes cannot create hard links to files owned by other "
1363     "groups");
1364 
1365 static int
1366 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1367 {
1368 	struct vattr va;
1369 	int error;
1370 
1371 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1372 		return (0);
1373 
1374 	if (!hardlink_check_uid && !hardlink_check_gid)
1375 		return (0);
1376 
1377 	error = VOP_GETATTR(vp, &va, cred, td);
1378 	if (error != 0)
1379 		return (error);
1380 
1381 	if (hardlink_check_uid) {
1382 		if (cred->cr_uid != va.va_uid)
1383 			return (EPERM);
1384 	}
1385 
1386 	if (hardlink_check_gid) {
1387 		if (!groupmember(va.va_gid, cred))
1388 			return (EPERM);
1389 	}
1390 
1391 	return (0);
1392 }
1393 
1394 int
1395 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1396 {
1397 	struct vnode *vp;
1398 	struct mount *mp;
1399 	struct nameidata nd;
1400 	int vfslocked;
1401 	int lvfslocked;
1402 	int error;
1403 
1404 	bwillwrite();
1405 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, segflg, path, td);
1406 	if ((error = namei(&nd)) != 0)
1407 		return (error);
1408 	vfslocked = NDHASGIANT(&nd);
1409 	NDFREE(&nd, NDF_ONLY_PNBUF);
1410 	vp = nd.ni_vp;
1411 	if (vp->v_type == VDIR) {
1412 		vrele(vp);
1413 		VFS_UNLOCK_GIANT(vfslocked);
1414 		return (EPERM);		/* POSIX */
1415 	}
1416 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1417 		vrele(vp);
1418 		VFS_UNLOCK_GIANT(vfslocked);
1419 		return (error);
1420 	}
1421 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, link, td);
1422 	if ((error = namei(&nd)) == 0) {
1423 		lvfslocked = NDHASGIANT(&nd);
1424 		if (nd.ni_vp != NULL) {
1425 			vrele(nd.ni_vp);
1426 			if (nd.ni_dvp == nd.ni_vp)
1427 				vrele(nd.ni_dvp);
1428 			else
1429 				vput(nd.ni_dvp);
1430 			error = EEXIST;
1431 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1432 		    == 0) {
1433 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1434 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1435 			error = can_hardlink(vp, td, td->td_ucred);
1436 			if (error == 0)
1437 #ifdef MAC
1438 				error = mac_check_vnode_link(td->td_ucred,
1439 				    nd.ni_dvp, vp, &nd.ni_cnd);
1440 			if (error == 0)
1441 #endif
1442 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1443 			VOP_UNLOCK(vp, 0, td);
1444 			vput(nd.ni_dvp);
1445 		}
1446 		NDFREE(&nd, NDF_ONLY_PNBUF);
1447 		VFS_UNLOCK_GIANT(lvfslocked);
1448 	}
1449 	vrele(vp);
1450 	vn_finished_write(mp);
1451 	VFS_UNLOCK_GIANT(vfslocked);
1452 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1453 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1454 	return (error);
1455 }
1456 
1457 /*
1458  * Make a symbolic link.
1459  */
1460 #ifndef _SYS_SYSPROTO_H_
1461 struct symlink_args {
1462 	char	*path;
1463 	char	*link;
1464 };
1465 #endif
1466 int
1467 symlink(td, uap)
1468 	struct thread *td;
1469 	register struct symlink_args /* {
1470 		char *path;
1471 		char *link;
1472 	} */ *uap;
1473 {
1474 
1475 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1476 }
1477 
1478 int
1479 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1480 {
1481 	struct mount *mp;
1482 	struct vattr vattr;
1483 	char *syspath;
1484 	int error;
1485 	struct nameidata nd;
1486 	int vfslocked;
1487 
1488 	if (segflg == UIO_SYSSPACE) {
1489 		syspath = path;
1490 	} else {
1491 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1492 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1493 			goto out;
1494 	}
1495 restart:
1496 	bwillwrite();
1497 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE,
1498 	    segflg, link, td);
1499 	if ((error = namei(&nd)) != 0)
1500 		goto out;
1501 	vfslocked = NDHASGIANT(&nd);
1502 	if (nd.ni_vp) {
1503 		NDFREE(&nd, NDF_ONLY_PNBUF);
1504 		vrele(nd.ni_vp);
1505 		if (nd.ni_vp == nd.ni_dvp)
1506 			vrele(nd.ni_dvp);
1507 		else
1508 			vput(nd.ni_dvp);
1509 		VFS_UNLOCK_GIANT(vfslocked);
1510 		error = EEXIST;
1511 		goto out;
1512 	}
1513 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1514 		NDFREE(&nd, NDF_ONLY_PNBUF);
1515 		vput(nd.ni_dvp);
1516 		VFS_UNLOCK_GIANT(vfslocked);
1517 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1518 			goto out;
1519 		goto restart;
1520 	}
1521 	VATTR_NULL(&vattr);
1522 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1523 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1524 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1525 #ifdef MAC
1526 	vattr.va_type = VLNK;
1527 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1528 	    &vattr);
1529 	if (error)
1530 		goto out2;
1531 #endif
1532 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1533 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1534 	if (error == 0)
1535 		vput(nd.ni_vp);
1536 #ifdef MAC
1537 out2:
1538 #endif
1539 	NDFREE(&nd, NDF_ONLY_PNBUF);
1540 	vput(nd.ni_dvp);
1541 	vn_finished_write(mp);
1542 	VFS_UNLOCK_GIANT(vfslocked);
1543 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1544 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1545 out:
1546 	if (segflg != UIO_SYSSPACE)
1547 		uma_zfree(namei_zone, syspath);
1548 	return (error);
1549 }
1550 
1551 /*
1552  * Delete a whiteout from the filesystem.
1553  */
1554 int
1555 undelete(td, uap)
1556 	struct thread *td;
1557 	register struct undelete_args /* {
1558 		char *path;
1559 	} */ *uap;
1560 {
1561 	int error;
1562 	struct mount *mp;
1563 	struct nameidata nd;
1564 	int vfslocked;
1565 
1566 restart:
1567 	bwillwrite();
1568 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE, UIO_USERSPACE,
1569 	    uap->path, td);
1570 	error = namei(&nd);
1571 	if (error)
1572 		return (error);
1573 	vfslocked = NDHASGIANT(&nd);
1574 
1575 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1576 		NDFREE(&nd, NDF_ONLY_PNBUF);
1577 		if (nd.ni_vp)
1578 			vrele(nd.ni_vp);
1579 		if (nd.ni_vp == nd.ni_dvp)
1580 			vrele(nd.ni_dvp);
1581 		else
1582 			vput(nd.ni_dvp);
1583 		VFS_UNLOCK_GIANT(vfslocked);
1584 		return (EEXIST);
1585 	}
1586 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1587 		NDFREE(&nd, NDF_ONLY_PNBUF);
1588 		vput(nd.ni_dvp);
1589 		VFS_UNLOCK_GIANT(vfslocked);
1590 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1591 			return (error);
1592 		goto restart;
1593 	}
1594 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1595 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1596 	NDFREE(&nd, NDF_ONLY_PNBUF);
1597 	vput(nd.ni_dvp);
1598 	vn_finished_write(mp);
1599 	VFS_UNLOCK_GIANT(vfslocked);
1600 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1601 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1602 	return (error);
1603 }
1604 
1605 /*
1606  * Delete a name from the filesystem.
1607  */
1608 #ifndef _SYS_SYSPROTO_H_
1609 struct unlink_args {
1610 	char	*path;
1611 };
1612 #endif
1613 int
1614 unlink(td, uap)
1615 	struct thread *td;
1616 	struct unlink_args /* {
1617 		char *path;
1618 	} */ *uap;
1619 {
1620 	int error;
1621 
1622 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1623 	return (error);
1624 }
1625 
1626 int
1627 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1628 {
1629 	struct mount *mp;
1630 	struct vnode *vp;
1631 	int error;
1632 	struct nameidata nd;
1633 	int vfslocked;
1634 
1635 restart:
1636 	bwillwrite();
1637 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
1638 	if ((error = namei(&nd)) != 0)
1639 		return (error);
1640 	vfslocked = NDHASGIANT(&nd);
1641 	vp = nd.ni_vp;
1642 	if (vp->v_type == VDIR)
1643 		error = EPERM;		/* POSIX */
1644 	else {
1645 		/*
1646 		 * The root of a mounted filesystem cannot be deleted.
1647 		 *
1648 		 * XXX: can this only be a VDIR case?
1649 		 */
1650 		if (vp->v_vflag & VV_ROOT)
1651 			error = EBUSY;
1652 	}
1653 	if (error == 0) {
1654 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1655 			NDFREE(&nd, NDF_ONLY_PNBUF);
1656 			if (vp == nd.ni_dvp)
1657 				vrele(vp);
1658 			else
1659 				vput(vp);
1660 			vput(nd.ni_dvp);
1661 			VFS_UNLOCK_GIANT(vfslocked);
1662 			if ((error = vn_start_write(NULL, &mp,
1663 			    V_XSLEEP | PCATCH)) != 0)
1664 				return (error);
1665 			goto restart;
1666 		}
1667 #ifdef MAC
1668 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1669 		    &nd.ni_cnd);
1670 		if (error)
1671 			goto out;
1672 #endif
1673 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1674 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1675 #ifdef MAC
1676 out:
1677 #endif
1678 		vn_finished_write(mp);
1679 	}
1680 	NDFREE(&nd, NDF_ONLY_PNBUF);
1681 	if (vp == nd.ni_dvp)
1682 		vrele(vp);
1683 	else
1684 		vput(vp);
1685 	vput(nd.ni_dvp);
1686 	VFS_UNLOCK_GIANT(vfslocked);
1687 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1688 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1689 	return (error);
1690 }
1691 
1692 /*
1693  * Reposition read/write file offset.
1694  */
1695 #ifndef _SYS_SYSPROTO_H_
1696 struct lseek_args {
1697 	int	fd;
1698 	int	pad;
1699 	off_t	offset;
1700 	int	whence;
1701 };
1702 #endif
1703 int
1704 lseek(td, uap)
1705 	struct thread *td;
1706 	register struct lseek_args /* {
1707 		int fd;
1708 		int pad;
1709 		off_t offset;
1710 		int whence;
1711 	} */ *uap;
1712 {
1713 	struct ucred *cred = td->td_ucred;
1714 	struct file *fp;
1715 	struct vnode *vp;
1716 	struct vattr vattr;
1717 	off_t offset;
1718 	int error, noneg;
1719 	int vfslocked;
1720 
1721 	if ((error = fget(td, uap->fd, &fp)) != 0)
1722 		return (error);
1723 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1724 		fdrop(fp, td);
1725 		return (ESPIPE);
1726 	}
1727 	vp = fp->f_vnode;
1728 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1729 	noneg = (vp->v_type != VCHR);
1730 	offset = uap->offset;
1731 	switch (uap->whence) {
1732 	case L_INCR:
1733 		if (noneg &&
1734 		    (fp->f_offset < 0 ||
1735 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1736 			error = EOVERFLOW;
1737 			break;
1738 		}
1739 		offset += fp->f_offset;
1740 		break;
1741 	case L_XTND:
1742 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1743 		error = VOP_GETATTR(vp, &vattr, cred, td);
1744 		VOP_UNLOCK(vp, 0, td);
1745 		if (error)
1746 			break;
1747 		if (noneg &&
1748 		    (vattr.va_size > OFF_MAX ||
1749 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1750 			error = EOVERFLOW;
1751 			break;
1752 		}
1753 		offset += vattr.va_size;
1754 		break;
1755 	case L_SET:
1756 		break;
1757 	default:
1758 		error = EINVAL;
1759 	}
1760 	if (error == 0 && noneg && offset < 0)
1761 		error = EINVAL;
1762 	if (error != 0)
1763 		goto drop;
1764 	fp->f_offset = offset;
1765 	*(off_t *)(td->td_retval) = fp->f_offset;
1766 drop:
1767 	fdrop(fp, td);
1768 	VFS_UNLOCK_GIANT(vfslocked);
1769 	return (error);
1770 }
1771 
1772 #if defined(COMPAT_43)
1773 /*
1774  * Reposition read/write file offset.
1775  */
1776 #ifndef _SYS_SYSPROTO_H_
1777 struct olseek_args {
1778 	int	fd;
1779 	long	offset;
1780 	int	whence;
1781 };
1782 #endif
1783 int
1784 olseek(td, uap)
1785 	struct thread *td;
1786 	register struct olseek_args /* {
1787 		int fd;
1788 		long offset;
1789 		int whence;
1790 	} */ *uap;
1791 {
1792 	struct lseek_args /* {
1793 		int fd;
1794 		int pad;
1795 		off_t offset;
1796 		int whence;
1797 	} */ nuap;
1798 	int error;
1799 
1800 	nuap.fd = uap->fd;
1801 	nuap.offset = uap->offset;
1802 	nuap.whence = uap->whence;
1803 	error = lseek(td, &nuap);
1804 	return (error);
1805 }
1806 #endif /* COMPAT_43 */
1807 
1808 /*
1809  * Check access permissions using passed credentials.
1810  */
1811 static int
1812 vn_access(vp, user_flags, cred, td)
1813 	struct vnode	*vp;
1814 	int		user_flags;
1815 	struct ucred	*cred;
1816 	struct thread	*td;
1817 {
1818 	int error, flags;
1819 
1820 	/* Flags == 0 means only check for existence. */
1821 	error = 0;
1822 	if (user_flags) {
1823 		flags = 0;
1824 		if (user_flags & R_OK)
1825 			flags |= VREAD;
1826 		if (user_flags & W_OK)
1827 			flags |= VWRITE;
1828 		if (user_flags & X_OK)
1829 			flags |= VEXEC;
1830 #ifdef MAC
1831 		error = mac_check_vnode_access(cred, vp, flags);
1832 		if (error)
1833 			return (error);
1834 #endif
1835 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1836 			error = VOP_ACCESS(vp, flags, cred, td);
1837 	}
1838 	return (error);
1839 }
1840 
1841 /*
1842  * Check access permissions using "real" credentials.
1843  */
1844 #ifndef _SYS_SYSPROTO_H_
1845 struct access_args {
1846 	char	*path;
1847 	int	flags;
1848 };
1849 #endif
1850 int
1851 access(td, uap)
1852 	struct thread *td;
1853 	register struct access_args /* {
1854 		char *path;
1855 		int flags;
1856 	} */ *uap;
1857 {
1858 
1859 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1860 }
1861 
1862 int
1863 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1864 {
1865 	struct ucred *cred, *tmpcred;
1866 	register struct vnode *vp;
1867 	struct nameidata nd;
1868 	int vfslocked;
1869 	int error;
1870 
1871 	/*
1872 	 * Create and modify a temporary credential instead of one that
1873 	 * is potentially shared.  This could also mess up socket
1874 	 * buffer accounting which can run in an interrupt context.
1875 	 */
1876 	cred = td->td_ucred;
1877 	tmpcred = crdup(cred);
1878 	tmpcred->cr_uid = cred->cr_ruid;
1879 	tmpcred->cr_groups[0] = cred->cr_rgid;
1880 	td->td_ucred = tmpcred;
1881 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
1882 	if ((error = namei(&nd)) != 0)
1883 		goto out1;
1884 	vfslocked = NDHASGIANT(&nd);
1885 	vp = nd.ni_vp;
1886 
1887 	error = vn_access(vp, flags, tmpcred, td);
1888 	NDFREE(&nd, NDF_ONLY_PNBUF);
1889 	vput(vp);
1890 	VFS_UNLOCK_GIANT(vfslocked);
1891 out1:
1892 	td->td_ucred = cred;
1893 	crfree(tmpcred);
1894 	return (error);
1895 }
1896 
1897 /*
1898  * Check access permissions using "effective" credentials.
1899  */
1900 #ifndef _SYS_SYSPROTO_H_
1901 struct eaccess_args {
1902 	char	*path;
1903 	int	flags;
1904 };
1905 #endif
1906 int
1907 eaccess(td, uap)
1908 	struct thread *td;
1909 	register struct eaccess_args /* {
1910 		char *path;
1911 		int flags;
1912 	} */ *uap;
1913 {
1914 	struct nameidata nd;
1915 	struct vnode *vp;
1916 	int vfslocked;
1917 	int error;
1918 
1919 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
1920 	    uap->path, td);
1921 	if ((error = namei(&nd)) != 0)
1922 		return (error);
1923 	vp = nd.ni_vp;
1924 	vfslocked = NDHASGIANT(&nd);
1925 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1926 	NDFREE(&nd, NDF_ONLY_PNBUF);
1927 	vput(vp);
1928 	VFS_UNLOCK_GIANT(vfslocked);
1929 	return (error);
1930 }
1931 
1932 #if defined(COMPAT_43)
1933 /*
1934  * Get file status; this version follows links.
1935  */
1936 #ifndef _SYS_SYSPROTO_H_
1937 struct ostat_args {
1938 	char	*path;
1939 	struct ostat *ub;
1940 };
1941 #endif
1942 int
1943 ostat(td, uap)
1944 	struct thread *td;
1945 	register struct ostat_args /* {
1946 		char *path;
1947 		struct ostat *ub;
1948 	} */ *uap;
1949 {
1950 	struct stat sb;
1951 	struct ostat osb;
1952 	int error;
1953 
1954 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1955 	if (error)
1956 		return (error);
1957 	cvtstat(&sb, &osb);
1958 	error = copyout(&osb, uap->ub, sizeof (osb));
1959 	return (error);
1960 }
1961 
1962 /*
1963  * Get file status; this version does not follow links.
1964  */
1965 #ifndef _SYS_SYSPROTO_H_
1966 struct olstat_args {
1967 	char	*path;
1968 	struct ostat *ub;
1969 };
1970 #endif
1971 int
1972 olstat(td, uap)
1973 	struct thread *td;
1974 	register struct olstat_args /* {
1975 		char *path;
1976 		struct ostat *ub;
1977 	} */ *uap;
1978 {
1979 	struct stat sb;
1980 	struct ostat osb;
1981 	int error;
1982 
1983 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1984 	if (error)
1985 		return (error);
1986 	cvtstat(&sb, &osb);
1987 	error = copyout(&osb, uap->ub, sizeof (osb));
1988 	return (error);
1989 }
1990 
1991 /*
1992  * Convert from an old to a new stat structure.
1993  */
1994 void
1995 cvtstat(st, ost)
1996 	struct stat *st;
1997 	struct ostat *ost;
1998 {
1999 
2000 	ost->st_dev = st->st_dev;
2001 	ost->st_ino = st->st_ino;
2002 	ost->st_mode = st->st_mode;
2003 	ost->st_nlink = st->st_nlink;
2004 	ost->st_uid = st->st_uid;
2005 	ost->st_gid = st->st_gid;
2006 	ost->st_rdev = st->st_rdev;
2007 	if (st->st_size < (quad_t)1 << 32)
2008 		ost->st_size = st->st_size;
2009 	else
2010 		ost->st_size = -2;
2011 	ost->st_atime = st->st_atime;
2012 	ost->st_mtime = st->st_mtime;
2013 	ost->st_ctime = st->st_ctime;
2014 	ost->st_blksize = st->st_blksize;
2015 	ost->st_blocks = st->st_blocks;
2016 	ost->st_flags = st->st_flags;
2017 	ost->st_gen = st->st_gen;
2018 }
2019 #endif /* COMPAT_43 */
2020 
2021 /*
2022  * Get file status; this version follows links.
2023  */
2024 #ifndef _SYS_SYSPROTO_H_
2025 struct stat_args {
2026 	char	*path;
2027 	struct stat *ub;
2028 };
2029 #endif
2030 int
2031 stat(td, uap)
2032 	struct thread *td;
2033 	register struct stat_args /* {
2034 		char *path;
2035 		struct stat *ub;
2036 	} */ *uap;
2037 {
2038 	struct stat sb;
2039 	int error;
2040 
2041 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2042 	if (error == 0)
2043 		error = copyout(&sb, uap->ub, sizeof (sb));
2044 	return (error);
2045 }
2046 
2047 int
2048 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2049 {
2050 	struct nameidata nd;
2051 	struct stat sb;
2052 	int error, vfslocked;
2053 
2054 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE,
2055 	    pathseg, path, td);
2056 	if ((error = namei(&nd)) != 0)
2057 		return (error);
2058 	vfslocked = NDHASGIANT(&nd);
2059 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2060 	NDFREE(&nd, NDF_ONLY_PNBUF);
2061 	vput(nd.ni_vp);
2062 	VFS_UNLOCK_GIANT(vfslocked);
2063 	if (error)
2064 		return (error);
2065 	*sbp = sb;
2066 	return (0);
2067 }
2068 
2069 /*
2070  * Get file status; this version does not follow links.
2071  */
2072 #ifndef _SYS_SYSPROTO_H_
2073 struct lstat_args {
2074 	char	*path;
2075 	struct stat *ub;
2076 };
2077 #endif
2078 int
2079 lstat(td, uap)
2080 	struct thread *td;
2081 	register struct lstat_args /* {
2082 		char *path;
2083 		struct stat *ub;
2084 	} */ *uap;
2085 {
2086 	struct stat sb;
2087 	int error;
2088 
2089 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2090 	if (error == 0)
2091 		error = copyout(&sb, uap->ub, sizeof (sb));
2092 	return (error);
2093 }
2094 
2095 int
2096 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2097 {
2098 	struct vnode *vp;
2099 	struct stat sb;
2100 	struct nameidata nd;
2101 	int error, vfslocked;
2102 
2103 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE,
2104 	    pathseg, path, td);
2105 	if ((error = namei(&nd)) != 0)
2106 		return (error);
2107 	vfslocked = NDHASGIANT(&nd);
2108 	vp = nd.ni_vp;
2109 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2110 	NDFREE(&nd, NDF_ONLY_PNBUF);
2111 	vput(vp);
2112 	VFS_UNLOCK_GIANT(vfslocked);
2113 	if (error)
2114 		return (error);
2115 	*sbp = sb;
2116 	return (0);
2117 }
2118 
2119 /*
2120  * Implementation of the NetBSD [l]stat() functions.
2121  */
2122 void
2123 cvtnstat(sb, nsb)
2124 	struct stat *sb;
2125 	struct nstat *nsb;
2126 {
2127 	bzero(nsb, sizeof *nsb);
2128 	nsb->st_dev = sb->st_dev;
2129 	nsb->st_ino = sb->st_ino;
2130 	nsb->st_mode = sb->st_mode;
2131 	nsb->st_nlink = sb->st_nlink;
2132 	nsb->st_uid = sb->st_uid;
2133 	nsb->st_gid = sb->st_gid;
2134 	nsb->st_rdev = sb->st_rdev;
2135 	nsb->st_atimespec = sb->st_atimespec;
2136 	nsb->st_mtimespec = sb->st_mtimespec;
2137 	nsb->st_ctimespec = sb->st_ctimespec;
2138 	nsb->st_size = sb->st_size;
2139 	nsb->st_blocks = sb->st_blocks;
2140 	nsb->st_blksize = sb->st_blksize;
2141 	nsb->st_flags = sb->st_flags;
2142 	nsb->st_gen = sb->st_gen;
2143 	nsb->st_birthtimespec = sb->st_birthtimespec;
2144 }
2145 
2146 #ifndef _SYS_SYSPROTO_H_
2147 struct nstat_args {
2148 	char	*path;
2149 	struct nstat *ub;
2150 };
2151 #endif
2152 int
2153 nstat(td, uap)
2154 	struct thread *td;
2155 	register struct nstat_args /* {
2156 		char *path;
2157 		struct nstat *ub;
2158 	} */ *uap;
2159 {
2160 	struct stat sb;
2161 	struct nstat nsb;
2162 	int error;
2163 
2164 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2165 	if (error)
2166 		return (error);
2167 	cvtnstat(&sb, &nsb);
2168 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2169 	return (error);
2170 }
2171 
2172 /*
2173  * NetBSD lstat.  Get file status; this version does not follow links.
2174  */
2175 #ifndef _SYS_SYSPROTO_H_
2176 struct lstat_args {
2177 	char	*path;
2178 	struct stat *ub;
2179 };
2180 #endif
2181 int
2182 nlstat(td, uap)
2183 	struct thread *td;
2184 	register struct nlstat_args /* {
2185 		char *path;
2186 		struct nstat *ub;
2187 	} */ *uap;
2188 {
2189 	struct stat sb;
2190 	struct nstat nsb;
2191 	int error;
2192 
2193 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2194 	if (error)
2195 		return (error);
2196 	cvtnstat(&sb, &nsb);
2197 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2198 	return (error);
2199 }
2200 
2201 /*
2202  * Get configurable pathname variables.
2203  */
2204 #ifndef _SYS_SYSPROTO_H_
2205 struct pathconf_args {
2206 	char	*path;
2207 	int	name;
2208 };
2209 #endif
2210 int
2211 pathconf(td, uap)
2212 	struct thread *td;
2213 	register struct pathconf_args /* {
2214 		char *path;
2215 		int name;
2216 	} */ *uap;
2217 {
2218 
2219 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2220 }
2221 
2222 int
2223 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2224 {
2225 	struct nameidata nd;
2226 	int error, vfslocked;
2227 
2228 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2229 	if ((error = namei(&nd)) != 0)
2230 		return (error);
2231 	vfslocked = NDHASGIANT(&nd);
2232 	NDFREE(&nd, NDF_ONLY_PNBUF);
2233 
2234 	/* If asynchronous I/O is available, it works for all files. */
2235 	if (name == _PC_ASYNC_IO)
2236 		td->td_retval[0] = async_io_version;
2237 	else
2238 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2239 	vput(nd.ni_vp);
2240 	VFS_UNLOCK_GIANT(vfslocked);
2241 	return (error);
2242 }
2243 
2244 /*
2245  * Return target name of a symbolic link.
2246  */
2247 #ifndef _SYS_SYSPROTO_H_
2248 struct readlink_args {
2249 	char	*path;
2250 	char	*buf;
2251 	int	count;
2252 };
2253 #endif
2254 int
2255 readlink(td, uap)
2256 	struct thread *td;
2257 	register struct readlink_args /* {
2258 		char *path;
2259 		char *buf;
2260 		int count;
2261 	} */ *uap;
2262 {
2263 
2264 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2265 	    UIO_USERSPACE, uap->count));
2266 }
2267 
2268 int
2269 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2270     enum uio_seg bufseg, int count)
2271 {
2272 	register struct vnode *vp;
2273 	struct iovec aiov;
2274 	struct uio auio;
2275 	int error;
2276 	struct nameidata nd;
2277 	int vfslocked;
2278 
2279 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2280 	if ((error = namei(&nd)) != 0)
2281 		return (error);
2282 	NDFREE(&nd, NDF_ONLY_PNBUF);
2283 	vfslocked = NDHASGIANT(&nd);
2284 	vp = nd.ni_vp;
2285 #ifdef MAC
2286 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2287 	if (error) {
2288 		vput(vp);
2289 		VFS_UNLOCK_GIANT(vfslocked);
2290 		return (error);
2291 	}
2292 #endif
2293 	if (vp->v_type != VLNK)
2294 		error = EINVAL;
2295 	else {
2296 		aiov.iov_base = buf;
2297 		aiov.iov_len = count;
2298 		auio.uio_iov = &aiov;
2299 		auio.uio_iovcnt = 1;
2300 		auio.uio_offset = 0;
2301 		auio.uio_rw = UIO_READ;
2302 		auio.uio_segflg = bufseg;
2303 		auio.uio_td = td;
2304 		auio.uio_resid = count;
2305 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2306 	}
2307 	vput(vp);
2308 	VFS_UNLOCK_GIANT(vfslocked);
2309 	td->td_retval[0] = count - auio.uio_resid;
2310 	return (error);
2311 }
2312 
2313 /*
2314  * Common implementation code for chflags() and fchflags().
2315  */
2316 static int
2317 setfflags(td, vp, flags)
2318 	struct thread *td;
2319 	struct vnode *vp;
2320 	int flags;
2321 {
2322 	int error;
2323 	struct mount *mp;
2324 	struct vattr vattr;
2325 
2326 	/*
2327 	 * Prevent non-root users from setting flags on devices.  When
2328 	 * a device is reused, users can retain ownership of the device
2329 	 * if they are allowed to set flags and programs assume that
2330 	 * chown can't fail when done as root.
2331 	 */
2332 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2333 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2334 		if (error)
2335 			return (error);
2336 	}
2337 
2338 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2339 		return (error);
2340 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2341 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2342 	VATTR_NULL(&vattr);
2343 	vattr.va_flags = flags;
2344 #ifdef MAC
2345 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2346 	if (error == 0)
2347 #endif
2348 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2349 	VOP_UNLOCK(vp, 0, td);
2350 	vn_finished_write(mp);
2351 	return (error);
2352 }
2353 
2354 /*
2355  * Change flags of a file given a path name.
2356  */
2357 #ifndef _SYS_SYSPROTO_H_
2358 struct chflags_args {
2359 	char	*path;
2360 	int	flags;
2361 };
2362 #endif
2363 int
2364 chflags(td, uap)
2365 	struct thread *td;
2366 	register struct chflags_args /* {
2367 		char *path;
2368 		int flags;
2369 	} */ *uap;
2370 {
2371 	int error;
2372 	struct nameidata nd;
2373 	int vfslocked;
2374 
2375 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2376 	if ((error = namei(&nd)) != 0)
2377 		return (error);
2378 	NDFREE(&nd, NDF_ONLY_PNBUF);
2379 	vfslocked = NDHASGIANT(&nd);
2380 	error = setfflags(td, nd.ni_vp, uap->flags);
2381 	vrele(nd.ni_vp);
2382 	VFS_UNLOCK_GIANT(vfslocked);
2383 	return (error);
2384 }
2385 
2386 /*
2387  * Same as chflags() but doesn't follow symlinks.
2388  */
2389 int
2390 lchflags(td, uap)
2391 	struct thread *td;
2392 	register struct lchflags_args /* {
2393 		char *path;
2394 		int flags;
2395 	} */ *uap;
2396 {
2397 	int error;
2398 	struct nameidata nd;
2399 	int vfslocked;
2400 
2401 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2402 	if ((error = namei(&nd)) != 0)
2403 		return (error);
2404 	vfslocked = NDHASGIANT(&nd);
2405 	NDFREE(&nd, NDF_ONLY_PNBUF);
2406 	error = setfflags(td, nd.ni_vp, uap->flags);
2407 	vrele(nd.ni_vp);
2408 	VFS_UNLOCK_GIANT(vfslocked);
2409 	return (error);
2410 }
2411 
2412 /*
2413  * Change flags of a file given a file descriptor.
2414  */
2415 #ifndef _SYS_SYSPROTO_H_
2416 struct fchflags_args {
2417 	int	fd;
2418 	int	flags;
2419 };
2420 #endif
2421 int
2422 fchflags(td, uap)
2423 	struct thread *td;
2424 	register struct fchflags_args /* {
2425 		int fd;
2426 		int flags;
2427 	} */ *uap;
2428 {
2429 	struct file *fp;
2430 	int vfslocked;
2431 	int error;
2432 
2433 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2434 		return (error);
2435 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2436 	error = setfflags(td, fp->f_vnode, uap->flags);
2437 	fdrop(fp, td);
2438 	VFS_UNLOCK_GIANT(vfslocked);
2439 	return (error);
2440 }
2441 
2442 /*
2443  * Common implementation code for chmod(), lchmod() and fchmod().
2444  */
2445 static int
2446 setfmode(td, vp, mode)
2447 	struct thread *td;
2448 	struct vnode *vp;
2449 	int mode;
2450 {
2451 	int error;
2452 	struct mount *mp;
2453 	struct vattr vattr;
2454 
2455 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2456 		return (error);
2457 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2458 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2459 	VATTR_NULL(&vattr);
2460 	vattr.va_mode = mode & ALLPERMS;
2461 #ifdef MAC
2462 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2463 	if (error == 0)
2464 #endif
2465 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2466 	VOP_UNLOCK(vp, 0, td);
2467 	vn_finished_write(mp);
2468 	return (error);
2469 }
2470 
2471 /*
2472  * Change mode of a file given path name.
2473  */
2474 #ifndef _SYS_SYSPROTO_H_
2475 struct chmod_args {
2476 	char	*path;
2477 	int	mode;
2478 };
2479 #endif
2480 int
2481 chmod(td, uap)
2482 	struct thread *td;
2483 	register struct chmod_args /* {
2484 		char *path;
2485 		int mode;
2486 	} */ *uap;
2487 {
2488 
2489 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2490 }
2491 
2492 int
2493 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2494 {
2495 	int error;
2496 	struct nameidata nd;
2497 	int vfslocked;
2498 
2499 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2500 	if ((error = namei(&nd)) != 0)
2501 		return (error);
2502 	vfslocked = NDHASGIANT(&nd);
2503 	NDFREE(&nd, NDF_ONLY_PNBUF);
2504 	error = setfmode(td, nd.ni_vp, mode);
2505 	vrele(nd.ni_vp);
2506 	VFS_UNLOCK_GIANT(vfslocked);
2507 	return (error);
2508 }
2509 
2510 /*
2511  * Change mode of a file given path name (don't follow links.)
2512  */
2513 #ifndef _SYS_SYSPROTO_H_
2514 struct lchmod_args {
2515 	char	*path;
2516 	int	mode;
2517 };
2518 #endif
2519 int
2520 lchmod(td, uap)
2521 	struct thread *td;
2522 	register struct lchmod_args /* {
2523 		char *path;
2524 		int mode;
2525 	} */ *uap;
2526 {
2527 	int error;
2528 	struct nameidata nd;
2529 	int vfslocked;
2530 
2531 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2532 	if ((error = namei(&nd)) != 0)
2533 		return (error);
2534 	vfslocked = NDHASGIANT(&nd);
2535 	NDFREE(&nd, NDF_ONLY_PNBUF);
2536 	error = setfmode(td, nd.ni_vp, uap->mode);
2537 	vrele(nd.ni_vp);
2538 	VFS_UNLOCK_GIANT(vfslocked);
2539 	return (error);
2540 }
2541 
2542 /*
2543  * Change mode of a file given a file descriptor.
2544  */
2545 #ifndef _SYS_SYSPROTO_H_
2546 struct fchmod_args {
2547 	int	fd;
2548 	int	mode;
2549 };
2550 #endif
2551 int
2552 fchmod(td, uap)
2553 	struct thread *td;
2554 	register struct fchmod_args /* {
2555 		int fd;
2556 		int mode;
2557 	} */ *uap;
2558 {
2559 	struct file *fp;
2560 	int vfslocked;
2561 	int error;
2562 
2563 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2564 		return (error);
2565 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2566 	error = setfmode(td, fp->f_vnode, uap->mode);
2567 	fdrop(fp, td);
2568 	VFS_UNLOCK_GIANT(vfslocked);
2569 	return (error);
2570 }
2571 
2572 /*
2573  * Common implementation for chown(), lchown(), and fchown()
2574  */
2575 static int
2576 setfown(td, vp, uid, gid)
2577 	struct thread *td;
2578 	struct vnode *vp;
2579 	uid_t uid;
2580 	gid_t gid;
2581 {
2582 	int error;
2583 	struct mount *mp;
2584 	struct vattr vattr;
2585 
2586 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2587 		return (error);
2588 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2589 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2590 	VATTR_NULL(&vattr);
2591 	vattr.va_uid = uid;
2592 	vattr.va_gid = gid;
2593 #ifdef MAC
2594 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2595 	    vattr.va_gid);
2596 	if (error == 0)
2597 #endif
2598 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2599 	VOP_UNLOCK(vp, 0, td);
2600 	vn_finished_write(mp);
2601 	return (error);
2602 }
2603 
2604 /*
2605  * Set ownership given a path name.
2606  */
2607 #ifndef _SYS_SYSPROTO_H_
2608 struct chown_args {
2609 	char	*path;
2610 	int	uid;
2611 	int	gid;
2612 };
2613 #endif
2614 int
2615 chown(td, uap)
2616 	struct thread *td;
2617 	register struct chown_args /* {
2618 		char *path;
2619 		int uid;
2620 		int gid;
2621 	} */ *uap;
2622 {
2623 
2624 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2625 }
2626 
2627 int
2628 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2629     int gid)
2630 {
2631 	int error;
2632 	struct nameidata nd;
2633 	int vfslocked;
2634 
2635 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2636 	if ((error = namei(&nd)) != 0)
2637 		return (error);
2638 	vfslocked = NDHASGIANT(&nd);
2639 	NDFREE(&nd, NDF_ONLY_PNBUF);
2640 	error = setfown(td, nd.ni_vp, uid, gid);
2641 	vrele(nd.ni_vp);
2642 	VFS_UNLOCK_GIANT(vfslocked);
2643 	return (error);
2644 }
2645 
2646 /*
2647  * Set ownership given a path name, do not cross symlinks.
2648  */
2649 #ifndef _SYS_SYSPROTO_H_
2650 struct lchown_args {
2651 	char	*path;
2652 	int	uid;
2653 	int	gid;
2654 };
2655 #endif
2656 int
2657 lchown(td, uap)
2658 	struct thread *td;
2659 	register struct lchown_args /* {
2660 		char *path;
2661 		int uid;
2662 		int gid;
2663 	} */ *uap;
2664 {
2665 
2666 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2667 }
2668 
2669 int
2670 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2671     int gid)
2672 {
2673 	int error;
2674 	struct nameidata nd;
2675 	int vfslocked;
2676 
2677 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2678 	if ((error = namei(&nd)) != 0)
2679 		return (error);
2680 	vfslocked = NDHASGIANT(&nd);
2681 	NDFREE(&nd, NDF_ONLY_PNBUF);
2682 	error = setfown(td, nd.ni_vp, uid, gid);
2683 	vrele(nd.ni_vp);
2684 	VFS_UNLOCK_GIANT(vfslocked);
2685 	return (error);
2686 }
2687 
2688 /*
2689  * Set ownership given a file descriptor.
2690  */
2691 #ifndef _SYS_SYSPROTO_H_
2692 struct fchown_args {
2693 	int	fd;
2694 	int	uid;
2695 	int	gid;
2696 };
2697 #endif
2698 int
2699 fchown(td, uap)
2700 	struct thread *td;
2701 	register struct fchown_args /* {
2702 		int fd;
2703 		int uid;
2704 		int gid;
2705 	} */ *uap;
2706 {
2707 	struct file *fp;
2708 	int vfslocked;
2709 	int error;
2710 
2711 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2712 		return (error);
2713 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2714 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2715 	fdrop(fp, td);
2716 	VFS_UNLOCK_GIANT(vfslocked);
2717 	return (error);
2718 }
2719 
2720 /*
2721  * Common implementation code for utimes(), lutimes(), and futimes().
2722  */
2723 static int
2724 getutimes(usrtvp, tvpseg, tsp)
2725 	const struct timeval *usrtvp;
2726 	enum uio_seg tvpseg;
2727 	struct timespec *tsp;
2728 {
2729 	struct timeval tv[2];
2730 	const struct timeval *tvp;
2731 	int error;
2732 
2733 	if (usrtvp == NULL) {
2734 		microtime(&tv[0]);
2735 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2736 		tsp[1] = tsp[0];
2737 	} else {
2738 		if (tvpseg == UIO_SYSSPACE) {
2739 			tvp = usrtvp;
2740 		} else {
2741 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2742 				return (error);
2743 			tvp = tv;
2744 		}
2745 
2746 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2747 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2748 	}
2749 	return (0);
2750 }
2751 
2752 /*
2753  * Common implementation code for utimes(), lutimes(), and futimes().
2754  */
2755 static int
2756 setutimes(td, vp, ts, numtimes, nullflag)
2757 	struct thread *td;
2758 	struct vnode *vp;
2759 	const struct timespec *ts;
2760 	int numtimes;
2761 	int nullflag;
2762 {
2763 	int error, setbirthtime;
2764 	struct mount *mp;
2765 	struct vattr vattr;
2766 
2767 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2768 		return (error);
2769 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2770 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2771 	setbirthtime = 0;
2772 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2773 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2774 		setbirthtime = 1;
2775 	VATTR_NULL(&vattr);
2776 	vattr.va_atime = ts[0];
2777 	vattr.va_mtime = ts[1];
2778 	if (setbirthtime)
2779 		vattr.va_birthtime = ts[1];
2780 	if (numtimes > 2)
2781 		vattr.va_birthtime = ts[2];
2782 	if (nullflag)
2783 		vattr.va_vaflags |= VA_UTIMES_NULL;
2784 #ifdef MAC
2785 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2786 	    vattr.va_mtime);
2787 #endif
2788 	if (error == 0)
2789 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2790 	VOP_UNLOCK(vp, 0, td);
2791 	vn_finished_write(mp);
2792 	return (error);
2793 }
2794 
2795 /*
2796  * Set the access and modification times of a file.
2797  */
2798 #ifndef _SYS_SYSPROTO_H_
2799 struct utimes_args {
2800 	char	*path;
2801 	struct	timeval *tptr;
2802 };
2803 #endif
2804 int
2805 utimes(td, uap)
2806 	struct thread *td;
2807 	register struct utimes_args /* {
2808 		char *path;
2809 		struct timeval *tptr;
2810 	} */ *uap;
2811 {
2812 
2813 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2814 	    UIO_USERSPACE));
2815 }
2816 
2817 int
2818 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2819     struct timeval *tptr, enum uio_seg tptrseg)
2820 {
2821 	struct timespec ts[2];
2822 	int error;
2823 	struct nameidata nd;
2824 	int vfslocked;
2825 
2826 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2827 		return (error);
2828 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2829 	if ((error = namei(&nd)) != 0)
2830 		return (error);
2831 	vfslocked = NDHASGIANT(&nd);
2832 	NDFREE(&nd, NDF_ONLY_PNBUF);
2833 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2834 	vrele(nd.ni_vp);
2835 	VFS_UNLOCK_GIANT(vfslocked);
2836 	return (error);
2837 }
2838 
2839 /*
2840  * Set the access and modification times of a file.
2841  */
2842 #ifndef _SYS_SYSPROTO_H_
2843 struct lutimes_args {
2844 	char	*path;
2845 	struct	timeval *tptr;
2846 };
2847 #endif
2848 int
2849 lutimes(td, uap)
2850 	struct thread *td;
2851 	register struct lutimes_args /* {
2852 		char *path;
2853 		struct timeval *tptr;
2854 	} */ *uap;
2855 {
2856 
2857 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2858 	    UIO_USERSPACE));
2859 }
2860 
2861 int
2862 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2863     struct timeval *tptr, enum uio_seg tptrseg)
2864 {
2865 	struct timespec ts[2];
2866 	int error;
2867 	struct nameidata nd;
2868 	int vfslocked;
2869 
2870 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2871 		return (error);
2872 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2873 	if ((error = namei(&nd)) != 0)
2874 		return (error);
2875 	vfslocked = NDHASGIANT(&nd);
2876 	NDFREE(&nd, NDF_ONLY_PNBUF);
2877 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2878 	vrele(nd.ni_vp);
2879 	VFS_UNLOCK_GIANT(vfslocked);
2880 	return (error);
2881 }
2882 
2883 /*
2884  * Set the access and modification times of a file.
2885  */
2886 #ifndef _SYS_SYSPROTO_H_
2887 struct futimes_args {
2888 	int	fd;
2889 	struct	timeval *tptr;
2890 };
2891 #endif
2892 int
2893 futimes(td, uap)
2894 	struct thread *td;
2895 	register struct futimes_args /* {
2896 		int  fd;
2897 		struct timeval *tptr;
2898 	} */ *uap;
2899 {
2900 
2901 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2902 }
2903 
2904 int
2905 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2906     enum uio_seg tptrseg)
2907 {
2908 	struct timespec ts[2];
2909 	struct file *fp;
2910 	int vfslocked;
2911 	int error;
2912 
2913 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2914 		return (error);
2915 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2916 		return (error);
2917 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2918 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2919 	fdrop(fp, td);
2920 	VFS_UNLOCK_GIANT(vfslocked);
2921 	return (error);
2922 }
2923 
2924 /*
2925  * Truncate a file given its path name.
2926  */
2927 #ifndef _SYS_SYSPROTO_H_
2928 struct truncate_args {
2929 	char	*path;
2930 	int	pad;
2931 	off_t	length;
2932 };
2933 #endif
2934 int
2935 truncate(td, uap)
2936 	struct thread *td;
2937 	register struct truncate_args /* {
2938 		char *path;
2939 		int pad;
2940 		off_t length;
2941 	} */ *uap;
2942 {
2943 
2944 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2945 }
2946 
2947 int
2948 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2949 {
2950 	struct mount *mp;
2951 	struct vnode *vp;
2952 	struct vattr vattr;
2953 	int error;
2954 	struct nameidata nd;
2955 	int vfslocked;
2956 
2957 	if (length < 0)
2958 		return(EINVAL);
2959 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2960 	if ((error = namei(&nd)) != 0)
2961 		return (error);
2962 	vfslocked = NDHASGIANT(&nd);
2963 	vp = nd.ni_vp;
2964 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2965 		vrele(vp);
2966 		VFS_UNLOCK_GIANT(vfslocked);
2967 		return (error);
2968 	}
2969 	NDFREE(&nd, NDF_ONLY_PNBUF);
2970 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2971 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2972 	if (vp->v_type == VDIR)
2973 		error = EISDIR;
2974 #ifdef MAC
2975 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2976 	}
2977 #endif
2978 	else if ((error = vn_writechk(vp)) == 0 &&
2979 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2980 		VATTR_NULL(&vattr);
2981 		vattr.va_size = length;
2982 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2983 	}
2984 	vput(vp);
2985 	vn_finished_write(mp);
2986 	VFS_UNLOCK_GIANT(vfslocked);
2987 	return (error);
2988 }
2989 
2990 /*
2991  * Truncate a file given a file descriptor.
2992  */
2993 #ifndef _SYS_SYSPROTO_H_
2994 struct ftruncate_args {
2995 	int	fd;
2996 	int	pad;
2997 	off_t	length;
2998 };
2999 #endif
3000 int
3001 ftruncate(td, uap)
3002 	struct thread *td;
3003 	register struct ftruncate_args /* {
3004 		int fd;
3005 		int pad;
3006 		off_t length;
3007 	} */ *uap;
3008 {
3009 	struct mount *mp;
3010 	struct vattr vattr;
3011 	struct vnode *vp;
3012 	struct file *fp;
3013 	int vfslocked;
3014 	int error;
3015 
3016 	if (uap->length < 0)
3017 		return(EINVAL);
3018 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3019 		return (error);
3020 	if ((fp->f_flag & FWRITE) == 0) {
3021 		fdrop(fp, td);
3022 		return (EINVAL);
3023 	}
3024 	vp = fp->f_vnode;
3025 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3026 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3027 		goto drop;
3028 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3029 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3030 	if (vp->v_type == VDIR)
3031 		error = EISDIR;
3032 #ifdef MAC
3033 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3034 	    vp))) {
3035 	}
3036 #endif
3037 	else if ((error = vn_writechk(vp)) == 0) {
3038 		VATTR_NULL(&vattr);
3039 		vattr.va_size = uap->length;
3040 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3041 	}
3042 	VOP_UNLOCK(vp, 0, td);
3043 	vn_finished_write(mp);
3044 drop:
3045 	VFS_UNLOCK_GIANT(vfslocked);
3046 	fdrop(fp, td);
3047 	return (error);
3048 }
3049 
3050 #if defined(COMPAT_43)
3051 /*
3052  * Truncate a file given its path name.
3053  */
3054 #ifndef _SYS_SYSPROTO_H_
3055 struct otruncate_args {
3056 	char	*path;
3057 	long	length;
3058 };
3059 #endif
3060 int
3061 otruncate(td, uap)
3062 	struct thread *td;
3063 	register struct otruncate_args /* {
3064 		char *path;
3065 		long length;
3066 	} */ *uap;
3067 {
3068 	struct truncate_args /* {
3069 		char *path;
3070 		int pad;
3071 		off_t length;
3072 	} */ nuap;
3073 
3074 	nuap.path = uap->path;
3075 	nuap.length = uap->length;
3076 	return (truncate(td, &nuap));
3077 }
3078 
3079 /*
3080  * Truncate a file given a file descriptor.
3081  */
3082 #ifndef _SYS_SYSPROTO_H_
3083 struct oftruncate_args {
3084 	int	fd;
3085 	long	length;
3086 };
3087 #endif
3088 int
3089 oftruncate(td, uap)
3090 	struct thread *td;
3091 	register struct oftruncate_args /* {
3092 		int fd;
3093 		long length;
3094 	} */ *uap;
3095 {
3096 	struct ftruncate_args /* {
3097 		int fd;
3098 		int pad;
3099 		off_t length;
3100 	} */ nuap;
3101 
3102 	nuap.fd = uap->fd;
3103 	nuap.length = uap->length;
3104 	return (ftruncate(td, &nuap));
3105 }
3106 #endif /* COMPAT_43 */
3107 
3108 /*
3109  * Sync an open file.
3110  */
3111 #ifndef _SYS_SYSPROTO_H_
3112 struct fsync_args {
3113 	int	fd;
3114 };
3115 #endif
3116 int
3117 fsync(td, uap)
3118 	struct thread *td;
3119 	struct fsync_args /* {
3120 		int fd;
3121 	} */ *uap;
3122 {
3123 	struct vnode *vp;
3124 	struct mount *mp;
3125 	struct file *fp;
3126 	int vfslocked;
3127 	int error;
3128 
3129 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3130 		return (error);
3131 	vp = fp->f_vnode;
3132 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3133 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3134 		goto drop;
3135 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3136 	if (vp->v_object != NULL) {
3137 		VM_OBJECT_LOCK(vp->v_object);
3138 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3139 		VM_OBJECT_UNLOCK(vp->v_object);
3140 	}
3141 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3142 
3143 	VOP_UNLOCK(vp, 0, td);
3144 	vn_finished_write(mp);
3145 drop:
3146 	VFS_UNLOCK_GIANT(vfslocked);
3147 	fdrop(fp, td);
3148 	return (error);
3149 }
3150 
3151 /*
3152  * Rename files.  Source and destination must either both be directories,
3153  * or both not be directories.  If target is a directory, it must be empty.
3154  */
3155 #ifndef _SYS_SYSPROTO_H_
3156 struct rename_args {
3157 	char	*from;
3158 	char	*to;
3159 };
3160 #endif
3161 int
3162 rename(td, uap)
3163 	struct thread *td;
3164 	register struct rename_args /* {
3165 		char *from;
3166 		char *to;
3167 	} */ *uap;
3168 {
3169 
3170 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3171 }
3172 
3173 int
3174 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3175 {
3176 	struct mount *mp = NULL;
3177 	struct vnode *tvp, *fvp, *tdvp;
3178 	struct nameidata fromnd, tond;
3179 	int tvfslocked;
3180 	int fvfslocked;
3181 	int error;
3182 
3183 	bwillwrite();
3184 #ifdef MAC
3185 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE,
3186 	    pathseg, from, td);
3187 #else
3188 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE,
3189 	    pathseg, from, td);
3190 #endif
3191 	if ((error = namei(&fromnd)) != 0)
3192 		return (error);
3193 	fvfslocked = NDHASGIANT(&fromnd);
3194 	tvfslocked = 0;
3195 #ifdef MAC
3196 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3197 	    fromnd.ni_vp, &fromnd.ni_cnd);
3198 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3199 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3200 #endif
3201 	fvp = fromnd.ni_vp;
3202 	if (error == 0)
3203 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3204 	if (error != 0) {
3205 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3206 		vrele(fromnd.ni_dvp);
3207 		vrele(fvp);
3208 		goto out1;
3209 	}
3210 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3211 	    MPSAFE, pathseg, to, td);
3212 	if (fromnd.ni_vp->v_type == VDIR)
3213 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3214 	if ((error = namei(&tond)) != 0) {
3215 		/* Translate error code for rename("dir1", "dir2/."). */
3216 		if (error == EISDIR && fvp->v_type == VDIR)
3217 			error = EINVAL;
3218 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3219 		vrele(fromnd.ni_dvp);
3220 		vrele(fvp);
3221 		goto out1;
3222 	}
3223 	tvfslocked = NDHASGIANT(&tond);
3224 	tdvp = tond.ni_dvp;
3225 	tvp = tond.ni_vp;
3226 	if (tvp != NULL) {
3227 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3228 			error = ENOTDIR;
3229 			goto out;
3230 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3231 			error = EISDIR;
3232 			goto out;
3233 		}
3234 	}
3235 	if (fvp == tdvp)
3236 		error = EINVAL;
3237 	/*
3238 	 * If the source is the same as the destination (that is, if they
3239 	 * are links to the same vnode), then there is nothing to do.
3240 	 */
3241 	if (fvp == tvp)
3242 		error = -1;
3243 #ifdef MAC
3244 	else
3245 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3246 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3247 #endif
3248 out:
3249 	if (!error) {
3250 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3251 		if (fromnd.ni_dvp != tdvp) {
3252 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3253 		}
3254 		if (tvp) {
3255 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3256 		}
3257 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3258 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3259 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3260 		NDFREE(&tond, NDF_ONLY_PNBUF);
3261 	} else {
3262 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3263 		NDFREE(&tond, NDF_ONLY_PNBUF);
3264 		if (tvp)
3265 			vput(tvp);
3266 		if (tdvp == tvp)
3267 			vrele(tdvp);
3268 		else
3269 			vput(tdvp);
3270 		vrele(fromnd.ni_dvp);
3271 		vrele(fvp);
3272 	}
3273 	vrele(tond.ni_startdir);
3274 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3275 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3276 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3277 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3278 out1:
3279 	vn_finished_write(mp);
3280 	if (fromnd.ni_startdir)
3281 		vrele(fromnd.ni_startdir);
3282 	VFS_UNLOCK_GIANT(fvfslocked);
3283 	VFS_UNLOCK_GIANT(tvfslocked);
3284 	if (error == -1)
3285 		return (0);
3286 	return (error);
3287 }
3288 
3289 /*
3290  * Make a directory file.
3291  */
3292 #ifndef _SYS_SYSPROTO_H_
3293 struct mkdir_args {
3294 	char	*path;
3295 	int	mode;
3296 };
3297 #endif
3298 int
3299 mkdir(td, uap)
3300 	struct thread *td;
3301 	register struct mkdir_args /* {
3302 		char *path;
3303 		int mode;
3304 	} */ *uap;
3305 {
3306 
3307 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3308 }
3309 
3310 int
3311 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3312 {
3313 	struct mount *mp;
3314 	struct vnode *vp;
3315 	struct vattr vattr;
3316 	int error;
3317 	struct nameidata nd;
3318 	int vfslocked;
3319 
3320 restart:
3321 	bwillwrite();
3322 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, path, td);
3323 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3324 	if ((error = namei(&nd)) != 0)
3325 		return (error);
3326 	vfslocked = NDHASGIANT(&nd);
3327 	vp = nd.ni_vp;
3328 	if (vp != NULL) {
3329 		NDFREE(&nd, NDF_ONLY_PNBUF);
3330 		vrele(vp);
3331 		/*
3332 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3333 		 * the strange behaviour of leaving the vnode unlocked
3334 		 * if the target is the same vnode as the parent.
3335 		 */
3336 		if (vp == nd.ni_dvp)
3337 			vrele(nd.ni_dvp);
3338 		else
3339 			vput(nd.ni_dvp);
3340 		VFS_UNLOCK_GIANT(vfslocked);
3341 		return (EEXIST);
3342 	}
3343 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3344 		NDFREE(&nd, NDF_ONLY_PNBUF);
3345 		vput(nd.ni_dvp);
3346 		VFS_UNLOCK_GIANT(vfslocked);
3347 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3348 			return (error);
3349 		goto restart;
3350 	}
3351 	VATTR_NULL(&vattr);
3352 	vattr.va_type = VDIR;
3353 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3354 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3355 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3356 #ifdef MAC
3357 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3358 	    &vattr);
3359 	if (error)
3360 		goto out;
3361 #endif
3362 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3363 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3364 #ifdef MAC
3365 out:
3366 #endif
3367 	NDFREE(&nd, NDF_ONLY_PNBUF);
3368 	vput(nd.ni_dvp);
3369 	if (!error)
3370 		vput(nd.ni_vp);
3371 	vn_finished_write(mp);
3372 	VFS_UNLOCK_GIANT(vfslocked);
3373 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3374 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3375 	return (error);
3376 }
3377 
3378 /*
3379  * Remove a directory file.
3380  */
3381 #ifndef _SYS_SYSPROTO_H_
3382 struct rmdir_args {
3383 	char	*path;
3384 };
3385 #endif
3386 int
3387 rmdir(td, uap)
3388 	struct thread *td;
3389 	struct rmdir_args /* {
3390 		char *path;
3391 	} */ *uap;
3392 {
3393 
3394 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3395 }
3396 
3397 int
3398 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3399 {
3400 	struct mount *mp;
3401 	struct vnode *vp;
3402 	int error;
3403 	struct nameidata nd;
3404 	int vfslocked;
3405 
3406 restart:
3407 	bwillwrite();
3408 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
3409 	if ((error = namei(&nd)) != 0)
3410 		return (error);
3411 	vfslocked = NDHASGIANT(&nd);
3412 	vp = nd.ni_vp;
3413 	if (vp->v_type != VDIR) {
3414 		error = ENOTDIR;
3415 		goto out;
3416 	}
3417 	/*
3418 	 * No rmdir "." please.
3419 	 */
3420 	if (nd.ni_dvp == vp) {
3421 		error = EINVAL;
3422 		goto out;
3423 	}
3424 	/*
3425 	 * The root of a mounted filesystem cannot be deleted.
3426 	 */
3427 	if (vp->v_vflag & VV_ROOT) {
3428 		error = EBUSY;
3429 		goto out;
3430 	}
3431 #ifdef MAC
3432 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3433 	    &nd.ni_cnd);
3434 	if (error)
3435 		goto out;
3436 #endif
3437 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3438 		NDFREE(&nd, NDF_ONLY_PNBUF);
3439 		if (nd.ni_dvp == vp)
3440 			vrele(nd.ni_dvp);
3441 		else
3442 			vput(nd.ni_dvp);
3443 		vput(vp);
3444 		VFS_UNLOCK_GIANT(vfslocked);
3445 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3446 			return (error);
3447 		goto restart;
3448 	}
3449 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3450 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3451 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3452 	vn_finished_write(mp);
3453 out:
3454 	NDFREE(&nd, NDF_ONLY_PNBUF);
3455 	if (nd.ni_dvp == vp)
3456 		vrele(nd.ni_dvp);
3457 	else
3458 		vput(nd.ni_dvp);
3459 	vput(vp);
3460 	VFS_UNLOCK_GIANT(vfslocked);
3461 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3462 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3463 	return (error);
3464 }
3465 
3466 #ifdef COMPAT_43
3467 /*
3468  * Read a block of directory entries in a filesystem independent format.
3469  */
3470 #ifndef _SYS_SYSPROTO_H_
3471 struct ogetdirentries_args {
3472 	int	fd;
3473 	char	*buf;
3474 	u_int	count;
3475 	long	*basep;
3476 };
3477 #endif
3478 int
3479 ogetdirentries(td, uap)
3480 	struct thread *td;
3481 	register struct ogetdirentries_args /* {
3482 		int fd;
3483 		char *buf;
3484 		u_int count;
3485 		long *basep;
3486 	} */ *uap;
3487 {
3488 	struct vnode *vp;
3489 	struct file *fp;
3490 	struct uio auio, kuio;
3491 	struct iovec aiov, kiov;
3492 	struct dirent *dp, *edp;
3493 	caddr_t dirbuf;
3494 	int error, eofflag, readcnt;
3495 	long loff;
3496 
3497 	/* XXX arbitrary sanity limit on `count'. */
3498 	if (uap->count > 64 * 1024)
3499 		return (EINVAL);
3500 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3501 		return (error);
3502 	if ((fp->f_flag & FREAD) == 0) {
3503 		fdrop(fp, td);
3504 		return (EBADF);
3505 	}
3506 	vp = fp->f_vnode;
3507 unionread:
3508 	if (vp->v_type != VDIR) {
3509 		fdrop(fp, td);
3510 		return (EINVAL);
3511 	}
3512 	aiov.iov_base = uap->buf;
3513 	aiov.iov_len = uap->count;
3514 	auio.uio_iov = &aiov;
3515 	auio.uio_iovcnt = 1;
3516 	auio.uio_rw = UIO_READ;
3517 	auio.uio_segflg = UIO_USERSPACE;
3518 	auio.uio_td = td;
3519 	auio.uio_resid = uap->count;
3520 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3521 	loff = auio.uio_offset = fp->f_offset;
3522 #ifdef MAC
3523 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3524 	if (error) {
3525 		VOP_UNLOCK(vp, 0, td);
3526 		fdrop(fp, td);
3527 		return (error);
3528 	}
3529 #endif
3530 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3531 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3532 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3533 			    NULL, NULL);
3534 			fp->f_offset = auio.uio_offset;
3535 		} else
3536 #	endif
3537 	{
3538 		kuio = auio;
3539 		kuio.uio_iov = &kiov;
3540 		kuio.uio_segflg = UIO_SYSSPACE;
3541 		kiov.iov_len = uap->count;
3542 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3543 		kiov.iov_base = dirbuf;
3544 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3545 			    NULL, NULL);
3546 		fp->f_offset = kuio.uio_offset;
3547 		if (error == 0) {
3548 			readcnt = uap->count - kuio.uio_resid;
3549 			edp = (struct dirent *)&dirbuf[readcnt];
3550 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3551 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3552 					/*
3553 					 * The expected low byte of
3554 					 * dp->d_namlen is our dp->d_type.
3555 					 * The high MBZ byte of dp->d_namlen
3556 					 * is our dp->d_namlen.
3557 					 */
3558 					dp->d_type = dp->d_namlen;
3559 					dp->d_namlen = 0;
3560 #				else
3561 					/*
3562 					 * The dp->d_type is the high byte
3563 					 * of the expected dp->d_namlen,
3564 					 * so must be zero'ed.
3565 					 */
3566 					dp->d_type = 0;
3567 #				endif
3568 				if (dp->d_reclen > 0) {
3569 					dp = (struct dirent *)
3570 					    ((char *)dp + dp->d_reclen);
3571 				} else {
3572 					error = EIO;
3573 					break;
3574 				}
3575 			}
3576 			if (dp >= edp)
3577 				error = uiomove(dirbuf, readcnt, &auio);
3578 		}
3579 		FREE(dirbuf, M_TEMP);
3580 	}
3581 	VOP_UNLOCK(vp, 0, td);
3582 	if (error) {
3583 		fdrop(fp, td);
3584 		return (error);
3585 	}
3586 	if (uap->count == auio.uio_resid) {
3587 		if (union_dircheckp) {
3588 			error = union_dircheckp(td, &vp, fp);
3589 			if (error == -1)
3590 				goto unionread;
3591 			if (error) {
3592 				fdrop(fp, td);
3593 				return (error);
3594 			}
3595 		}
3596 		/*
3597 		 * XXX We could delay dropping the lock above but
3598 		 * union_dircheckp complicates things.
3599 		 */
3600 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3601 		if ((vp->v_vflag & VV_ROOT) &&
3602 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3603 			struct vnode *tvp = vp;
3604 			vp = vp->v_mount->mnt_vnodecovered;
3605 			VREF(vp);
3606 			fp->f_vnode = vp;
3607 			fp->f_data = vp;
3608 			fp->f_offset = 0;
3609 			vput(tvp);
3610 			goto unionread;
3611 		}
3612 		VOP_UNLOCK(vp, 0, td);
3613 	}
3614 	error = copyout(&loff, uap->basep, sizeof(long));
3615 	fdrop(fp, td);
3616 	td->td_retval[0] = uap->count - auio.uio_resid;
3617 	return (error);
3618 }
3619 #endif /* COMPAT_43 */
3620 
3621 /*
3622  * Read a block of directory entries in a filesystem independent format.
3623  */
3624 #ifndef _SYS_SYSPROTO_H_
3625 struct getdirentries_args {
3626 	int	fd;
3627 	char	*buf;
3628 	u_int	count;
3629 	long	*basep;
3630 };
3631 #endif
3632 int
3633 getdirentries(td, uap)
3634 	struct thread *td;
3635 	register struct getdirentries_args /* {
3636 		int fd;
3637 		char *buf;
3638 		u_int count;
3639 		long *basep;
3640 	} */ *uap;
3641 {
3642 	struct vnode *vp;
3643 	struct file *fp;
3644 	struct uio auio;
3645 	struct iovec aiov;
3646 	int vfslocked;
3647 	long loff;
3648 	int error, eofflag;
3649 
3650 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3651 		return (error);
3652 	if ((fp->f_flag & FREAD) == 0) {
3653 		fdrop(fp, td);
3654 		return (EBADF);
3655 	}
3656 	vp = fp->f_vnode;
3657 unionread:
3658 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3659 	if (vp->v_type != VDIR) {
3660 		error = EINVAL;
3661 		goto fail;
3662 	}
3663 	aiov.iov_base = uap->buf;
3664 	aiov.iov_len = uap->count;
3665 	auio.uio_iov = &aiov;
3666 	auio.uio_iovcnt = 1;
3667 	auio.uio_rw = UIO_READ;
3668 	auio.uio_segflg = UIO_USERSPACE;
3669 	auio.uio_td = td;
3670 	auio.uio_resid = uap->count;
3671 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3672 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3673 	loff = auio.uio_offset = fp->f_offset;
3674 #ifdef MAC
3675 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3676 	if (error == 0)
3677 #endif
3678 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3679 		    NULL);
3680 	fp->f_offset = auio.uio_offset;
3681 	VOP_UNLOCK(vp, 0, td);
3682 	if (error)
3683 		goto fail;
3684 	if (uap->count == auio.uio_resid) {
3685 		if (union_dircheckp) {
3686 			error = union_dircheckp(td, &vp, fp);
3687 			if (error == -1) {
3688 				VFS_UNLOCK_GIANT(vfslocked);
3689 				goto unionread;
3690 			}
3691 			if (error)
3692 				goto fail;
3693 		}
3694 		/*
3695 		 * XXX We could delay dropping the lock above but
3696 		 * union_dircheckp complicates things.
3697 		 */
3698 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3699 		if ((vp->v_vflag & VV_ROOT) &&
3700 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3701 			struct vnode *tvp = vp;
3702 			vp = vp->v_mount->mnt_vnodecovered;
3703 			VREF(vp);
3704 			fp->f_vnode = vp;
3705 			fp->f_data = vp;
3706 			fp->f_offset = 0;
3707 			vput(tvp);
3708 			VFS_UNLOCK_GIANT(vfslocked);
3709 			goto unionread;
3710 		}
3711 		VOP_UNLOCK(vp, 0, td);
3712 	}
3713 	if (uap->basep != NULL) {
3714 		error = copyout(&loff, uap->basep, sizeof(long));
3715 	}
3716 	td->td_retval[0] = uap->count - auio.uio_resid;
3717 fail:
3718 	VFS_UNLOCK_GIANT(vfslocked);
3719 	fdrop(fp, td);
3720 	return (error);
3721 }
3722 #ifndef _SYS_SYSPROTO_H_
3723 struct getdents_args {
3724 	int fd;
3725 	char *buf;
3726 	size_t count;
3727 };
3728 #endif
3729 int
3730 getdents(td, uap)
3731 	struct thread *td;
3732 	register struct getdents_args /* {
3733 		int fd;
3734 		char *buf;
3735 		u_int count;
3736 	} */ *uap;
3737 {
3738 	struct getdirentries_args ap;
3739 	ap.fd = uap->fd;
3740 	ap.buf = uap->buf;
3741 	ap.count = uap->count;
3742 	ap.basep = NULL;
3743 	return (getdirentries(td, &ap));
3744 }
3745 
3746 /*
3747  * Set the mode mask for creation of filesystem nodes.
3748  *
3749  * MP SAFE
3750  */
3751 #ifndef _SYS_SYSPROTO_H_
3752 struct umask_args {
3753 	int	newmask;
3754 };
3755 #endif
3756 int
3757 umask(td, uap)
3758 	struct thread *td;
3759 	struct umask_args /* {
3760 		int newmask;
3761 	} */ *uap;
3762 {
3763 	register struct filedesc *fdp;
3764 
3765 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3766 	fdp = td->td_proc->p_fd;
3767 	td->td_retval[0] = fdp->fd_cmask;
3768 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3769 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3770 	return (0);
3771 }
3772 
3773 /*
3774  * Void all references to file by ripping underlying filesystem
3775  * away from vnode.
3776  */
3777 #ifndef _SYS_SYSPROTO_H_
3778 struct revoke_args {
3779 	char	*path;
3780 };
3781 #endif
3782 int
3783 revoke(td, uap)
3784 	struct thread *td;
3785 	register struct revoke_args /* {
3786 		char *path;
3787 	} */ *uap;
3788 {
3789 	struct vnode *vp;
3790 	struct vattr vattr;
3791 	int error;
3792 	struct nameidata nd;
3793 	int vfslocked;
3794 
3795 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
3796 	    uap->path, td);
3797 	if ((error = namei(&nd)) != 0)
3798 		return (error);
3799 	vfslocked = NDHASGIANT(&nd);
3800 	vp = nd.ni_vp;
3801 	NDFREE(&nd, NDF_ONLY_PNBUF);
3802 	if (vp->v_type != VCHR) {
3803 		error = EINVAL;
3804 		goto out;
3805 	}
3806 #ifdef MAC
3807 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3808 	if (error)
3809 		goto out;
3810 #endif
3811 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3812 	if (error)
3813 		goto out;
3814 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3815 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3816 		if (error)
3817 			goto out;
3818 	}
3819 	if (vcount(vp) > 1)
3820 		VOP_REVOKE(vp, REVOKEALL);
3821 out:
3822 	vput(vp);
3823 	VFS_UNLOCK_GIANT(vfslocked);
3824 	return (error);
3825 }
3826 
3827 /*
3828  * Convert a user file descriptor to a kernel file entry.
3829  * A reference on the file entry is held upon returning.
3830  */
3831 int
3832 getvnode(fdp, fd, fpp)
3833 	struct filedesc *fdp;
3834 	int fd;
3835 	struct file **fpp;
3836 {
3837 	int error;
3838 	struct file *fp;
3839 
3840 	fp = NULL;
3841 	if (fdp == NULL)
3842 		error = EBADF;
3843 	else {
3844 		FILEDESC_LOCK(fdp);
3845 		if ((u_int)fd >= fdp->fd_nfiles ||
3846 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3847 			error = EBADF;
3848 		else if (fp->f_vnode == NULL) {
3849 			fp = NULL;
3850 			error = EINVAL;
3851 		} else {
3852 			fhold(fp);
3853 			error = 0;
3854 		}
3855 		FILEDESC_UNLOCK(fdp);
3856 	}
3857 	*fpp = fp;
3858 	return (error);
3859 }
3860 
3861 /*
3862  * Get (NFS) file handle
3863  */
3864 #ifndef _SYS_SYSPROTO_H_
3865 struct lgetfh_args {
3866 	char	*fname;
3867 	fhandle_t *fhp;
3868 };
3869 #endif
3870 int
3871 lgetfh(td, uap)
3872 	struct thread *td;
3873 	register struct lgetfh_args *uap;
3874 {
3875 	struct nameidata nd;
3876 	fhandle_t fh;
3877 	register struct vnode *vp;
3878 	int vfslocked;
3879 	int error;
3880 
3881 	error = suser(td);
3882 	if (error)
3883 		return (error);
3884 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE,
3885 	    UIO_USERSPACE, uap->fname, td);
3886 	error = namei(&nd);
3887 	if (error)
3888 		return (error);
3889 	vfslocked = NDHASGIANT(&nd);
3890 	NDFREE(&nd, NDF_ONLY_PNBUF);
3891 	vp = nd.ni_vp;
3892 	bzero(&fh, sizeof(fh));
3893 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3894 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3895 	vput(vp);
3896 	VFS_UNLOCK_GIANT(vfslocked);
3897 	if (error)
3898 		return (error);
3899 	error = copyout(&fh, uap->fhp, sizeof (fh));
3900 	return (error);
3901 }
3902 
3903 #ifndef _SYS_SYSPROTO_H_
3904 struct getfh_args {
3905 	char	*fname;
3906 	fhandle_t *fhp;
3907 };
3908 #endif
3909 int
3910 getfh(td, uap)
3911 	struct thread *td;
3912 	register struct getfh_args *uap;
3913 {
3914 	struct nameidata nd;
3915 	fhandle_t fh;
3916 	register struct vnode *vp;
3917 	int vfslocked;
3918 	int error;
3919 
3920 	error = suser(td);
3921 	if (error)
3922 		return (error);
3923 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
3924 	    UIO_USERSPACE, uap->fname, td);
3925 	error = namei(&nd);
3926 	if (error)
3927 		return (error);
3928 	vfslocked = NDHASGIANT(&nd);
3929 	NDFREE(&nd, NDF_ONLY_PNBUF);
3930 	vp = nd.ni_vp;
3931 	bzero(&fh, sizeof(fh));
3932 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3933 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3934 	vput(vp);
3935 	VFS_UNLOCK_GIANT(vfslocked);
3936 	if (error)
3937 		return (error);
3938 	error = copyout(&fh, uap->fhp, sizeof (fh));
3939 	return (error);
3940 }
3941 
3942 /*
3943  * syscall for the rpc.lockd to use to translate a NFS file handle into
3944  * an open descriptor.
3945  *
3946  * warning: do not remove the suser() call or this becomes one giant
3947  * security hole.
3948  */
3949 #ifndef _SYS_SYSPROTO_H_
3950 struct fhopen_args {
3951 	const struct fhandle *u_fhp;
3952 	int flags;
3953 };
3954 #endif
3955 int
3956 fhopen(td, uap)
3957 	struct thread *td;
3958 	struct fhopen_args /* {
3959 		const struct fhandle *u_fhp;
3960 		int flags;
3961 	} */ *uap;
3962 {
3963 	struct proc *p = td->td_proc;
3964 	struct mount *mp;
3965 	struct vnode *vp;
3966 	struct fhandle fhp;
3967 	struct vattr vat;
3968 	struct vattr *vap = &vat;
3969 	struct flock lf;
3970 	struct file *fp;
3971 	register struct filedesc *fdp = p->p_fd;
3972 	int fmode, mode, error, type;
3973 	struct file *nfp;
3974 	int indx;
3975 
3976 	error = suser(td);
3977 	if (error)
3978 		return (error);
3979 	fmode = FFLAGS(uap->flags);
3980 	/* why not allow a non-read/write open for our lockd? */
3981 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3982 		return (EINVAL);
3983 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3984 	if (error)
3985 		return(error);
3986 	/* find the mount point */
3987 	mp = vfs_getvfs(&fhp.fh_fsid);
3988 	if (mp == NULL)
3989 		return (ESTALE);
3990 	/* now give me my vnode, it gets returned to me locked */
3991 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3992 	if (error)
3993 		return (error);
3994 	/*
3995 	 * from now on we have to make sure not
3996 	 * to forget about the vnode
3997 	 * any error that causes an abort must vput(vp)
3998 	 * just set error = err and 'goto bad;'.
3999 	 */
4000 
4001 	/*
4002 	 * from vn_open
4003 	 */
4004 	if (vp->v_type == VLNK) {
4005 		error = EMLINK;
4006 		goto bad;
4007 	}
4008 	if (vp->v_type == VSOCK) {
4009 		error = EOPNOTSUPP;
4010 		goto bad;
4011 	}
4012 	mode = 0;
4013 	if (fmode & (FWRITE | O_TRUNC)) {
4014 		if (vp->v_type == VDIR) {
4015 			error = EISDIR;
4016 			goto bad;
4017 		}
4018 		error = vn_writechk(vp);
4019 		if (error)
4020 			goto bad;
4021 		mode |= VWRITE;
4022 	}
4023 	if (fmode & FREAD)
4024 		mode |= VREAD;
4025 	if (fmode & O_APPEND)
4026 		mode |= VAPPEND;
4027 #ifdef MAC
4028 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4029 	if (error)
4030 		goto bad;
4031 #endif
4032 	if (mode) {
4033 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4034 		if (error)
4035 			goto bad;
4036 	}
4037 	if (fmode & O_TRUNC) {
4038 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4039 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4040 			vrele(vp);
4041 			return (error);
4042 		}
4043 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4044 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4045 #ifdef MAC
4046 		/*
4047 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4048 		 * should be right.
4049 		 */
4050 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4051 		if (error == 0) {
4052 #endif
4053 			VATTR_NULL(vap);
4054 			vap->va_size = 0;
4055 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4056 #ifdef MAC
4057 		}
4058 #endif
4059 		vn_finished_write(mp);
4060 		if (error)
4061 			goto bad;
4062 	}
4063 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4064 	if (error)
4065 		goto bad;
4066 
4067 	if (fmode & FWRITE)
4068 		vp->v_writecount++;
4069 
4070 	/*
4071 	 * end of vn_open code
4072 	 */
4073 
4074 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4075 		if (fmode & FWRITE)
4076 			vp->v_writecount--;
4077 		goto bad;
4078 	}
4079 	/* An extra reference on `nfp' has been held for us by falloc(). */
4080 	fp = nfp;
4081 
4082 	nfp->f_vnode = vp;
4083 	nfp->f_data = vp;
4084 	nfp->f_flag = fmode & FMASK;
4085 	nfp->f_ops = &vnops;
4086 	nfp->f_type = DTYPE_VNODE;
4087 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4088 		lf.l_whence = SEEK_SET;
4089 		lf.l_start = 0;
4090 		lf.l_len = 0;
4091 		if (fmode & O_EXLOCK)
4092 			lf.l_type = F_WRLCK;
4093 		else
4094 			lf.l_type = F_RDLCK;
4095 		type = F_FLOCK;
4096 		if ((fmode & FNONBLOCK) == 0)
4097 			type |= F_WAIT;
4098 		VOP_UNLOCK(vp, 0, td);
4099 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4100 			    type)) != 0) {
4101 			/*
4102 			 * The lock request failed.  Normally close the
4103 			 * descriptor but handle the case where someone might
4104 			 * have dup()d or close()d it when we weren't looking.
4105 			 */
4106 			fdclose(fdp, fp, indx, td);
4107 
4108 			/*
4109 			 * release our private reference
4110 			 */
4111 			fdrop(fp, td);
4112 			return(error);
4113 		}
4114 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4115 		fp->f_flag |= FHASLOCK;
4116 	}
4117 
4118 	VOP_UNLOCK(vp, 0, td);
4119 	fdrop(fp, td);
4120 	td->td_retval[0] = indx;
4121 	return (0);
4122 
4123 bad:
4124 	vput(vp);
4125 	return (error);
4126 }
4127 
4128 /*
4129  * Stat an (NFS) file handle.
4130  */
4131 #ifndef _SYS_SYSPROTO_H_
4132 struct fhstat_args {
4133 	struct fhandle *u_fhp;
4134 	struct stat *sb;
4135 };
4136 #endif
4137 int
4138 fhstat(td, uap)
4139 	struct thread *td;
4140 	register struct fhstat_args /* {
4141 		struct fhandle *u_fhp;
4142 		struct stat *sb;
4143 	} */ *uap;
4144 {
4145 	struct stat sb;
4146 	fhandle_t fh;
4147 	struct mount *mp;
4148 	struct vnode *vp;
4149 	int error;
4150 
4151 	error = suser(td);
4152 	if (error)
4153 		return (error);
4154 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4155 	if (error)
4156 		return (error);
4157 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4158 		return (ESTALE);
4159 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4160 		return (error);
4161 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4162 	vput(vp);
4163 	if (error)
4164 		return (error);
4165 	error = copyout(&sb, uap->sb, sizeof(sb));
4166 	return (error);
4167 }
4168 
4169 /*
4170  * Implement fstatfs() for (NFS) file handles.
4171  */
4172 #ifndef _SYS_SYSPROTO_H_
4173 struct fhstatfs_args {
4174 	struct fhandle *u_fhp;
4175 	struct statfs *buf;
4176 };
4177 #endif
4178 int
4179 fhstatfs(td, uap)
4180 	struct thread *td;
4181 	struct fhstatfs_args /* {
4182 		struct fhandle *u_fhp;
4183 		struct statfs *buf;
4184 	} */ *uap;
4185 {
4186 	struct statfs sf;
4187 	fhandle_t fh;
4188 	int error;
4189 
4190 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
4191 		return (error);
4192 	error = kern_fhstatfs(td, fh, &sf);
4193 	if (error == 0)
4194 		error = copyout(&sf, uap->buf, sizeof(sf));
4195 	return (error);
4196 }
4197 
4198 int
4199 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4200 {
4201 	struct statfs *sp;
4202 	struct mount *mp;
4203 	struct vnode *vp;
4204 	int error;
4205 
4206 	error = suser(td);
4207 	if (error)
4208 		return (error);
4209 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4210 		return (ESTALE);
4211 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4212 		return (error);
4213 	mp = vp->v_mount;
4214 	sp = &mp->mnt_stat;
4215 	vput(vp);
4216 #ifdef MAC
4217 	error = mac_check_mount_stat(td->td_ucred, mp);
4218 	if (error)
4219 		return (error);
4220 #endif
4221 	/*
4222 	 * Set these in case the underlying filesystem fails to do so.
4223 	 */
4224 	sp->f_version = STATFS_VERSION;
4225 	sp->f_namemax = NAME_MAX;
4226 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4227 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4228 		return (error);
4229 	*buf = *sp;
4230 	return (0);
4231 }
4232 
4233 /*
4234  * Syscall to push extended attribute configuration information into the
4235  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4236  * a command (int cmd), and attribute name and misc data.  For now, the
4237  * attribute name is left in userspace for consumption by the VFS_op.
4238  * It will probably be changed to be copied into sysspace by the
4239  * syscall in the future, once issues with various consumers of the
4240  * attribute code have raised their hands.
4241  *
4242  * Currently this is used only by UFS Extended Attributes.
4243  */
4244 int
4245 extattrctl(td, uap)
4246 	struct thread *td;
4247 	struct extattrctl_args /* {
4248 		const char *path;
4249 		int cmd;
4250 		const char *filename;
4251 		int attrnamespace;
4252 		const char *attrname;
4253 	} */ *uap;
4254 {
4255 	struct vnode *filename_vp;
4256 	struct nameidata nd;
4257 	struct mount *mp, *mp_writable;
4258 	char attrname[EXTATTR_MAXNAMELEN];
4259 	int error;
4260 
4261 	/*
4262 	 * uap->attrname is not always defined.  We check again later when we
4263 	 * invoke the VFS call so as to pass in NULL there if needed.
4264 	 */
4265 	if (uap->attrname != NULL) {
4266 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4267 		    NULL);
4268 		if (error)
4269 			return (error);
4270 	}
4271 
4272 	/*
4273 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4274 	 * which VFS_EXTATTRCTL() will later release.
4275 	 */
4276 	filename_vp = NULL;
4277 	if (uap->filename != NULL) {
4278 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4279 		    uap->filename, td);
4280 		error = namei(&nd);
4281 		if (error)
4282 			return (error);
4283 		filename_vp = nd.ni_vp;
4284 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4285 	}
4286 
4287 	/* uap->path is always defined. */
4288 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4289 	error = namei(&nd);
4290 	if (error) {
4291 		if (filename_vp != NULL)
4292 			vput(filename_vp);
4293 		return (error);
4294 	}
4295 	mp = nd.ni_vp->v_mount;
4296 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4297 	NDFREE(&nd, 0);
4298 	if (error) {
4299 		if (filename_vp != NULL)
4300 			vput(filename_vp);
4301 		return (error);
4302 	}
4303 
4304 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4305 	    uap->attrname != NULL ? attrname : NULL, td);
4306 
4307 	vn_finished_write(mp_writable);
4308 	/*
4309 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4310 	 * filename_vp, so vrele it if it is defined.
4311 	 */
4312 	if (filename_vp != NULL)
4313 		vrele(filename_vp);
4314 	return (error);
4315 }
4316 
4317 /*-
4318  * Set a named extended attribute on a file or directory
4319  *
4320  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4321  *            kernelspace string pointer "attrname", userspace buffer
4322  *            pointer "data", buffer length "nbytes", thread "td".
4323  * Returns: 0 on success, an error number otherwise
4324  * Locks: none
4325  * References: vp must be a valid reference for the duration of the call
4326  */
4327 static int
4328 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4329     void *data, size_t nbytes, struct thread *td)
4330 {
4331 	struct mount *mp;
4332 	struct uio auio;
4333 	struct iovec aiov;
4334 	ssize_t cnt;
4335 	int error;
4336 
4337 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4338 	if (error)
4339 		return (error);
4340 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4341 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4342 
4343 	aiov.iov_base = data;
4344 	aiov.iov_len = nbytes;
4345 	auio.uio_iov = &aiov;
4346 	auio.uio_iovcnt = 1;
4347 	auio.uio_offset = 0;
4348 	if (nbytes > INT_MAX) {
4349 		error = EINVAL;
4350 		goto done;
4351 	}
4352 	auio.uio_resid = nbytes;
4353 	auio.uio_rw = UIO_WRITE;
4354 	auio.uio_segflg = UIO_USERSPACE;
4355 	auio.uio_td = td;
4356 	cnt = nbytes;
4357 
4358 #ifdef MAC
4359 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4360 	    attrname, &auio);
4361 	if (error)
4362 		goto done;
4363 #endif
4364 
4365 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4366 	    td->td_ucred, td);
4367 	cnt -= auio.uio_resid;
4368 	td->td_retval[0] = cnt;
4369 
4370 done:
4371 	VOP_UNLOCK(vp, 0, td);
4372 	vn_finished_write(mp);
4373 	return (error);
4374 }
4375 
4376 int
4377 extattr_set_fd(td, uap)
4378 	struct thread *td;
4379 	struct extattr_set_fd_args /* {
4380 		int fd;
4381 		int attrnamespace;
4382 		const char *attrname;
4383 		void *data;
4384 		size_t nbytes;
4385 	} */ *uap;
4386 {
4387 	struct file *fp;
4388 	char attrname[EXTATTR_MAXNAMELEN];
4389 	int error;
4390 
4391 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4392 	if (error)
4393 		return (error);
4394 
4395 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4396 	if (error)
4397 		return (error);
4398 
4399 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4400 	    attrname, uap->data, uap->nbytes, td);
4401 	fdrop(fp, td);
4402 
4403 	return (error);
4404 }
4405 
4406 int
4407 extattr_set_file(td, uap)
4408 	struct thread *td;
4409 	struct extattr_set_file_args /* {
4410 		const char *path;
4411 		int attrnamespace;
4412 		const char *attrname;
4413 		void *data;
4414 		size_t nbytes;
4415 	} */ *uap;
4416 {
4417 	struct nameidata nd;
4418 	char attrname[EXTATTR_MAXNAMELEN];
4419 	int error;
4420 
4421 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4422 	if (error)
4423 		return (error);
4424 
4425 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4426 	error = namei(&nd);
4427 	if (error)
4428 		return (error);
4429 	NDFREE(&nd, NDF_ONLY_PNBUF);
4430 
4431 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4432 	    uap->data, uap->nbytes, td);
4433 
4434 	vrele(nd.ni_vp);
4435 	return (error);
4436 }
4437 
4438 int
4439 extattr_set_link(td, uap)
4440 	struct thread *td;
4441 	struct extattr_set_link_args /* {
4442 		const char *path;
4443 		int attrnamespace;
4444 		const char *attrname;
4445 		void *data;
4446 		size_t nbytes;
4447 	} */ *uap;
4448 {
4449 	struct nameidata nd;
4450 	char attrname[EXTATTR_MAXNAMELEN];
4451 	int error;
4452 
4453 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4454 	if (error)
4455 		return (error);
4456 
4457 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4458 	error = namei(&nd);
4459 	if (error)
4460 		return (error);
4461 	NDFREE(&nd, NDF_ONLY_PNBUF);
4462 
4463 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4464 	    uap->data, uap->nbytes, td);
4465 
4466 	vrele(nd.ni_vp);
4467 	return (error);
4468 }
4469 
4470 /*-
4471  * Get a named extended attribute on a file or directory
4472  *
4473  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4474  *            kernelspace string pointer "attrname", userspace buffer
4475  *            pointer "data", buffer length "nbytes", thread "td".
4476  * Returns: 0 on success, an error number otherwise
4477  * Locks: none
4478  * References: vp must be a valid reference for the duration of the call
4479  */
4480 static int
4481 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4482     void *data, size_t nbytes, struct thread *td)
4483 {
4484 	struct uio auio, *auiop;
4485 	struct iovec aiov;
4486 	ssize_t cnt;
4487 	size_t size, *sizep;
4488 	int error;
4489 
4490 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4491 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4492 
4493 	/*
4494 	 * Slightly unusual semantics: if the user provides a NULL data
4495 	 * pointer, they don't want to receive the data, just the
4496 	 * maximum read length.
4497 	 */
4498 	auiop = NULL;
4499 	sizep = NULL;
4500 	cnt = 0;
4501 	if (data != NULL) {
4502 		aiov.iov_base = data;
4503 		aiov.iov_len = nbytes;
4504 		auio.uio_iov = &aiov;
4505 		auio.uio_offset = 0;
4506 		if (nbytes > INT_MAX) {
4507 			error = EINVAL;
4508 			goto done;
4509 		}
4510 		auio.uio_resid = nbytes;
4511 		auio.uio_rw = UIO_READ;
4512 		auio.uio_segflg = UIO_USERSPACE;
4513 		auio.uio_td = td;
4514 		auiop = &auio;
4515 		cnt = nbytes;
4516 	} else
4517 		sizep = &size;
4518 
4519 #ifdef MAC
4520 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4521 	    attrname, &auio);
4522 	if (error)
4523 		goto done;
4524 #endif
4525 
4526 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4527 	    td->td_ucred, td);
4528 
4529 	if (auiop != NULL) {
4530 		cnt -= auio.uio_resid;
4531 		td->td_retval[0] = cnt;
4532 	} else
4533 		td->td_retval[0] = size;
4534 
4535 done:
4536 	VOP_UNLOCK(vp, 0, td);
4537 	return (error);
4538 }
4539 
4540 int
4541 extattr_get_fd(td, uap)
4542 	struct thread *td;
4543 	struct extattr_get_fd_args /* {
4544 		int fd;
4545 		int attrnamespace;
4546 		const char *attrname;
4547 		void *data;
4548 		size_t nbytes;
4549 	} */ *uap;
4550 {
4551 	struct file *fp;
4552 	char attrname[EXTATTR_MAXNAMELEN];
4553 	int error;
4554 
4555 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4556 	if (error)
4557 		return (error);
4558 
4559 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4560 	if (error)
4561 		return (error);
4562 
4563 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4564 	    attrname, uap->data, uap->nbytes, td);
4565 
4566 	fdrop(fp, td);
4567 	return (error);
4568 }
4569 
4570 int
4571 extattr_get_file(td, uap)
4572 	struct thread *td;
4573 	struct extattr_get_file_args /* {
4574 		const char *path;
4575 		int attrnamespace;
4576 		const char *attrname;
4577 		void *data;
4578 		size_t nbytes;
4579 	} */ *uap;
4580 {
4581 	struct nameidata nd;
4582 	char attrname[EXTATTR_MAXNAMELEN];
4583 	int error;
4584 
4585 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4586 	if (error)
4587 		return (error);
4588 
4589 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4590 	error = namei(&nd);
4591 	if (error)
4592 		return (error);
4593 	NDFREE(&nd, NDF_ONLY_PNBUF);
4594 
4595 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4596 	    uap->data, uap->nbytes, td);
4597 
4598 	vrele(nd.ni_vp);
4599 	return (error);
4600 }
4601 
4602 int
4603 extattr_get_link(td, uap)
4604 	struct thread *td;
4605 	struct extattr_get_link_args /* {
4606 		const char *path;
4607 		int attrnamespace;
4608 		const char *attrname;
4609 		void *data;
4610 		size_t nbytes;
4611 	} */ *uap;
4612 {
4613 	struct nameidata nd;
4614 	char attrname[EXTATTR_MAXNAMELEN];
4615 	int error;
4616 
4617 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4618 	if (error)
4619 		return (error);
4620 
4621 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4622 	error = namei(&nd);
4623 	if (error)
4624 		return (error);
4625 	NDFREE(&nd, NDF_ONLY_PNBUF);
4626 
4627 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4628 	    uap->data, uap->nbytes, td);
4629 
4630 	vrele(nd.ni_vp);
4631 	return (error);
4632 }
4633 
4634 /*
4635  * extattr_delete_vp(): Delete a named extended attribute on a file or
4636  *                      directory
4637  *
4638  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4639  *            kernelspace string pointer "attrname", proc "p"
4640  * Returns: 0 on success, an error number otherwise
4641  * Locks: none
4642  * References: vp must be a valid reference for the duration of the call
4643  */
4644 static int
4645 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4646     struct thread *td)
4647 {
4648 	struct mount *mp;
4649 	int error;
4650 
4651 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4652 	if (error)
4653 		return (error);
4654 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4655 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4656 
4657 #ifdef MAC
4658 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4659 	    attrname);
4660 	if (error)
4661 		goto done;
4662 #endif
4663 
4664 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4665 	    td);
4666 	if (error == EOPNOTSUPP)
4667 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4668 		    td->td_ucred, td);
4669 #ifdef MAC
4670 done:
4671 #endif
4672 	VOP_UNLOCK(vp, 0, td);
4673 	vn_finished_write(mp);
4674 	return (error);
4675 }
4676 
4677 int
4678 extattr_delete_fd(td, uap)
4679 	struct thread *td;
4680 	struct extattr_delete_fd_args /* {
4681 		int fd;
4682 		int attrnamespace;
4683 		const char *attrname;
4684 	} */ *uap;
4685 {
4686 	struct file *fp;
4687 	struct vnode *vp;
4688 	char attrname[EXTATTR_MAXNAMELEN];
4689 	int error;
4690 
4691 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4692 	if (error)
4693 		return (error);
4694 
4695 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4696 	if (error)
4697 		return (error);
4698 	vp = fp->f_vnode;
4699 
4700 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4701 	fdrop(fp, td);
4702 	return (error);
4703 }
4704 
4705 int
4706 extattr_delete_file(td, uap)
4707 	struct thread *td;
4708 	struct extattr_delete_file_args /* {
4709 		const char *path;
4710 		int attrnamespace;
4711 		const char *attrname;
4712 	} */ *uap;
4713 {
4714 	struct nameidata nd;
4715 	char attrname[EXTATTR_MAXNAMELEN];
4716 	int error;
4717 
4718 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4719 	if (error)
4720 		return(error);
4721 
4722 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4723 	error = namei(&nd);
4724 	if (error)
4725 		return(error);
4726 	NDFREE(&nd, NDF_ONLY_PNBUF);
4727 
4728 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4729 	vrele(nd.ni_vp);
4730 	return(error);
4731 }
4732 
4733 int
4734 extattr_delete_link(td, uap)
4735 	struct thread *td;
4736 	struct extattr_delete_link_args /* {
4737 		const char *path;
4738 		int attrnamespace;
4739 		const char *attrname;
4740 	} */ *uap;
4741 {
4742 	struct nameidata nd;
4743 	char attrname[EXTATTR_MAXNAMELEN];
4744 	int error;
4745 
4746 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4747 	if (error)
4748 		return(error);
4749 
4750 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4751 	error = namei(&nd);
4752 	if (error)
4753 		return(error);
4754 	NDFREE(&nd, NDF_ONLY_PNBUF);
4755 
4756 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4757 	vrele(nd.ni_vp);
4758 	return(error);
4759 }
4760 
4761 /*-
4762  * Retrieve a list of extended attributes on a file or directory.
4763  *
4764  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4765  *            userspace buffer pointer "data", buffer length "nbytes",
4766  *            thread "td".
4767  * Returns: 0 on success, an error number otherwise
4768  * Locks: none
4769  * References: vp must be a valid reference for the duration of the call
4770  */
4771 static int
4772 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4773     size_t nbytes, struct thread *td)
4774 {
4775 	struct uio auio, *auiop;
4776 	size_t size, *sizep;
4777 	struct iovec aiov;
4778 	ssize_t cnt;
4779 	int error;
4780 
4781 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4782 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4783 
4784 	auiop = NULL;
4785 	sizep = NULL;
4786 	cnt = 0;
4787 	if (data != NULL) {
4788 		aiov.iov_base = data;
4789 		aiov.iov_len = nbytes;
4790 		auio.uio_iov = &aiov;
4791 		auio.uio_offset = 0;
4792 		if (nbytes > INT_MAX) {
4793 			error = EINVAL;
4794 			goto done;
4795 		}
4796 		auio.uio_resid = nbytes;
4797 		auio.uio_rw = UIO_READ;
4798 		auio.uio_segflg = UIO_USERSPACE;
4799 		auio.uio_td = td;
4800 		auiop = &auio;
4801 		cnt = nbytes;
4802 	} else
4803 		sizep = &size;
4804 
4805 #ifdef MAC
4806 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4807 	if (error)
4808 		goto done;
4809 #endif
4810 
4811 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4812 	    td->td_ucred, td);
4813 
4814 	if (auiop != NULL) {
4815 		cnt -= auio.uio_resid;
4816 		td->td_retval[0] = cnt;
4817 	} else
4818 		td->td_retval[0] = size;
4819 
4820 done:
4821 	VOP_UNLOCK(vp, 0, td);
4822 	return (error);
4823 }
4824 
4825 
4826 int
4827 extattr_list_fd(td, uap)
4828 	struct thread *td;
4829 	struct extattr_list_fd_args /* {
4830 		int fd;
4831 		int attrnamespace;
4832 		void *data;
4833 		size_t nbytes;
4834 	} */ *uap;
4835 {
4836 	struct file *fp;
4837 	int error;
4838 
4839 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4840 	if (error)
4841 		return (error);
4842 
4843 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4844 	    uap->nbytes, td);
4845 
4846 	fdrop(fp, td);
4847 	return (error);
4848 }
4849 
4850 int
4851 extattr_list_file(td, uap)
4852 	struct thread*td;
4853 	struct extattr_list_file_args /* {
4854 		const char *path;
4855 		int attrnamespace;
4856 		void *data;
4857 		size_t nbytes;
4858 	} */ *uap;
4859 {
4860 	struct nameidata nd;
4861 	int error;
4862 
4863 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4864 	error = namei(&nd);
4865 	if (error)
4866 		return (error);
4867 	NDFREE(&nd, NDF_ONLY_PNBUF);
4868 
4869 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4870 	    uap->nbytes, td);
4871 
4872 	vrele(nd.ni_vp);
4873 	return (error);
4874 }
4875 
4876 int
4877 extattr_list_link(td, uap)
4878 	struct thread*td;
4879 	struct extattr_list_link_args /* {
4880 		const char *path;
4881 		int attrnamespace;
4882 		void *data;
4883 		size_t nbytes;
4884 	} */ *uap;
4885 {
4886 	struct nameidata nd;
4887 	int error;
4888 
4889 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4890 	error = namei(&nd);
4891 	if (error)
4892 		return (error);
4893 	NDFREE(&nd, NDF_ONLY_PNBUF);
4894 
4895 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4896 	    uap->nbytes, td);
4897 
4898 	vrele(nd.ni_vp);
4899 	return (error);
4900 }
4901