xref: /freebsd/sys/kern/vfs_extattr.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
91     size_t nbytes, struct thread *td);
92 
93 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108 	int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int vfslocked;
125 	int asyncflag;
126 
127 	mtx_lock(&mountlist_mtx);
128 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
129 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
130 			nmp = TAILQ_NEXT(mp, mnt_list);
131 			continue;
132 		}
133 		vfslocked = VFS_LOCK_GIANT(mp);
134 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
135 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
136 			asyncflag = mp->mnt_flag & MNT_ASYNC;
137 			mp->mnt_flag &= ~MNT_ASYNC;
138 			vfs_msync(mp, MNT_NOWAIT);
139 			VFS_SYNC(mp, MNT_NOWAIT, td);
140 			mp->mnt_flag |= asyncflag;
141 			vn_finished_write(mp);
142 		}
143 		VFS_UNLOCK_GIANT(vfslocked);
144 		mtx_lock(&mountlist_mtx);
145 		nmp = TAILQ_NEXT(mp, mnt_list);
146 		vfs_unbusy(mp, td);
147 	}
148 	mtx_unlock(&mountlist_mtx);
149 	return (0);
150 }
151 
152 /* XXX PRISON: could be per prison flag */
153 static int prison_quotas;
154 #if 0
155 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
156 #endif
157 
158 /*
159  * Change filesystem quotas.
160  *
161  * MP SAFE
162  */
163 #ifndef _SYS_SYSPROTO_H_
164 struct quotactl_args {
165 	char *path;
166 	int cmd;
167 	int uid;
168 	caddr_t arg;
169 };
170 #endif
171 int
172 quotactl(td, uap)
173 	struct thread *td;
174 	register struct quotactl_args /* {
175 		char *path;
176 		int cmd;
177 		int uid;
178 		caddr_t arg;
179 	} */ *uap;
180 {
181 	struct mount *mp, *vmp;
182 	int vfslocked;
183 	int error;
184 	struct nameidata nd;
185 
186 	AUDIT_ARG(cmd, uap->cmd);
187 	AUDIT_ARG(uid, uap->uid);
188 	if (jailed(td->td_ucred) && !prison_quotas)
189 		return (EPERM);
190 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
191 	   UIO_USERSPACE, uap->path, td);
192 	if ((error = namei(&nd)) != 0)
193 		return (error);
194 	vfslocked = NDHASGIANT(&nd);
195 	NDFREE(&nd, NDF_ONLY_PNBUF);
196 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
197 	mp = nd.ni_vp->v_mount;
198 	vrele(nd.ni_vp);
199 	if (error)
200 		goto out;
201 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
202 	vn_finished_write(vmp);
203 out:
204 	VFS_UNLOCK_GIANT(vfslocked);
205 	return (error);
206 }
207 
208 /*
209  * Get filesystem statistics.
210  */
211 #ifndef _SYS_SYSPROTO_H_
212 struct statfs_args {
213 	char *path;
214 	struct statfs *buf;
215 };
216 #endif
217 int
218 statfs(td, uap)
219 	struct thread *td;
220 	register struct statfs_args /* {
221 		char *path;
222 		struct statfs *buf;
223 	} */ *uap;
224 {
225 	struct statfs sf;
226 	int error;
227 
228 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
229 	if (error == 0)
230 		error = copyout(&sf, uap->buf, sizeof(sf));
231 	return (error);
232 }
233 
234 int
235 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
236     struct statfs *buf)
237 {
238 	struct mount *mp;
239 	struct statfs *sp, sb;
240 	int vfslocked;
241 	int error;
242 	struct nameidata nd;
243 
244 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
245 	    pathseg, path, td);
246 	error = namei(&nd);
247 	if (error)
248 		return (error);
249 	vfslocked = NDHASGIANT(&nd);
250 	mp = nd.ni_vp->v_mount;
251 	vfs_ref(mp);
252 	NDFREE(&nd, NDF_ONLY_PNBUF);
253 	vput(nd.ni_vp);
254 #ifdef MAC
255 	error = mac_check_mount_stat(td->td_ucred, mp);
256 	if (error) {
257 		vfs_rel(mp);
258 		goto out;
259 	}
260 #endif
261 	/*
262 	 * Set these in case the underlying filesystem fails to do so.
263 	 */
264 	sp = &mp->mnt_stat;
265 	sp->f_version = STATFS_VERSION;
266 	sp->f_namemax = NAME_MAX;
267 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
268 	error = VFS_STATFS(mp, sp, td);
269 	vfs_rel(mp);
270 	if (error)
271 		goto out;
272 	if (suser(td)) {
273 		bcopy(sp, &sb, sizeof(sb));
274 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
275 		prison_enforce_statfs(td->td_ucred, mp, &sb);
276 		sp = &sb;
277 	}
278 	*buf = *sp;
279 out:
280 	VFS_UNLOCK_GIANT(vfslocked);
281 	if (mtx_owned(&Giant))
282 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
283 	return (error);
284 }
285 
286 /*
287  * Get filesystem statistics.
288  */
289 #ifndef _SYS_SYSPROTO_H_
290 struct fstatfs_args {
291 	int fd;
292 	struct statfs *buf;
293 };
294 #endif
295 int
296 fstatfs(td, uap)
297 	struct thread *td;
298 	register struct fstatfs_args /* {
299 		int fd;
300 		struct statfs *buf;
301 	} */ *uap;
302 {
303 	struct statfs sf;
304 	int error;
305 
306 	error = kern_fstatfs(td, uap->fd, &sf);
307 	if (error == 0)
308 		error = copyout(&sf, uap->buf, sizeof(sf));
309 	return (error);
310 }
311 
312 int
313 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
314 {
315 	struct file *fp;
316 	struct mount *mp;
317 	struct statfs *sp, sb;
318 	int vfslocked;
319 	struct vnode *vp;
320 	int error;
321 
322 	AUDIT_ARG(fd, fd);
323 	error = getvnode(td->td_proc->p_fd, fd, &fp);
324 	if (error)
325 		return (error);
326 	vp = fp->f_vnode;
327 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
328 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
329 #ifdef AUDIT
330 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
331 #endif
332 	mp = vp->v_mount;
333 	if (mp)
334 		vfs_ref(mp);
335 	VOP_UNLOCK(vp, 0, td);
336 	fdrop(fp, td);
337 	if (vp->v_iflag & VI_DOOMED) {
338 		if (mp)
339 			vfs_rel(mp);
340 		error = EBADF;
341 		goto out;
342 	}
343 #ifdef MAC
344 	error = mac_check_mount_stat(td->td_ucred, mp);
345 	if (error) {
346 		vfs_rel(mp);
347 		goto out;
348 	}
349 #endif
350 	/*
351 	 * Set these in case the underlying filesystem fails to do so.
352 	 */
353 	sp = &mp->mnt_stat;
354 	sp->f_version = STATFS_VERSION;
355 	sp->f_namemax = NAME_MAX;
356 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
357 	error = VFS_STATFS(mp, sp, td);
358 	vfs_rel(mp);
359 	if (error)
360 		goto out;
361 	if (suser(td)) {
362 		bcopy(sp, &sb, sizeof(sb));
363 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
364 		prison_enforce_statfs(td->td_ucred, mp, &sb);
365 		sp = &sb;
366 	}
367 	*buf = *sp;
368 out:
369 	VFS_UNLOCK_GIANT(vfslocked);
370 	return (error);
371 }
372 
373 /*
374  * Get statistics on all filesystems.
375  */
376 #ifndef _SYS_SYSPROTO_H_
377 struct getfsstat_args {
378 	struct statfs *buf;
379 	long bufsize;
380 	int flags;
381 };
382 #endif
383 int
384 getfsstat(td, uap)
385 	struct thread *td;
386 	register struct getfsstat_args /* {
387 		struct statfs *buf;
388 		long bufsize;
389 		int flags;
390 	} */ *uap;
391 {
392 
393 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
394 	    uap->flags));
395 }
396 
397 /*
398  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
399  * 	The caller is responsible for freeing memory which will be allocated
400  *	in '*buf'.
401  */
402 int
403 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
404     enum uio_seg bufseg, int flags)
405 {
406 	struct mount *mp, *nmp;
407 	struct statfs *sfsp, *sp, sb;
408 	size_t count, maxcount;
409 	int vfslocked;
410 	int error;
411 
412 	maxcount = bufsize / sizeof(struct statfs);
413 	if (bufsize == 0)
414 		sfsp = NULL;
415 	else if (bufseg == UIO_USERSPACE)
416 		sfsp = *buf;
417 	else /* if (bufseg == UIO_SYSSPACE) */ {
418 		count = 0;
419 		mtx_lock(&mountlist_mtx);
420 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
421 			count++;
422 		}
423 		mtx_unlock(&mountlist_mtx);
424 		if (maxcount > count)
425 			maxcount = count;
426 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
427 		    M_WAITOK);
428 	}
429 	count = 0;
430 	mtx_lock(&mountlist_mtx);
431 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
432 		if (prison_canseemount(td->td_ucred, mp) != 0) {
433 			nmp = TAILQ_NEXT(mp, mnt_list);
434 			continue;
435 		}
436 #ifdef MAC
437 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
438 			nmp = TAILQ_NEXT(mp, mnt_list);
439 			continue;
440 		}
441 #endif
442 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
443 			nmp = TAILQ_NEXT(mp, mnt_list);
444 			continue;
445 		}
446 		vfslocked = VFS_LOCK_GIANT(mp);
447 		if (sfsp && count < maxcount) {
448 			sp = &mp->mnt_stat;
449 			/*
450 			 * Set these in case the underlying filesystem
451 			 * fails to do so.
452 			 */
453 			sp->f_version = STATFS_VERSION;
454 			sp->f_namemax = NAME_MAX;
455 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
456 			/*
457 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
458 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
459 			 * overrides MNT_WAIT.
460 			 */
461 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
462 			    (flags & MNT_WAIT)) &&
463 			    (error = VFS_STATFS(mp, sp, td))) {
464 				VFS_UNLOCK_GIANT(vfslocked);
465 				mtx_lock(&mountlist_mtx);
466 				nmp = TAILQ_NEXT(mp, mnt_list);
467 				vfs_unbusy(mp, td);
468 				continue;
469 			}
470 			if (suser(td)) {
471 				bcopy(sp, &sb, sizeof(sb));
472 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
473 				prison_enforce_statfs(td->td_ucred, mp, &sb);
474 				sp = &sb;
475 			}
476 			if (bufseg == UIO_SYSSPACE)
477 				bcopy(sp, sfsp, sizeof(*sp));
478 			else /* if (bufseg == UIO_USERSPACE) */ {
479 				error = copyout(sp, sfsp, sizeof(*sp));
480 				if (error) {
481 					vfs_unbusy(mp, td);
482 					VFS_UNLOCK_GIANT(vfslocked);
483 					return (error);
484 				}
485 			}
486 			sfsp++;
487 		}
488 		VFS_UNLOCK_GIANT(vfslocked);
489 		count++;
490 		mtx_lock(&mountlist_mtx);
491 		nmp = TAILQ_NEXT(mp, mnt_list);
492 		vfs_unbusy(mp, td);
493 	}
494 	mtx_unlock(&mountlist_mtx);
495 	if (sfsp && count > maxcount)
496 		td->td_retval[0] = maxcount;
497 	else
498 		td->td_retval[0] = count;
499 	return (0);
500 }
501 
502 #ifdef COMPAT_FREEBSD4
503 /*
504  * Get old format filesystem statistics.
505  */
506 static void cvtstatfs(struct statfs *, struct ostatfs *);
507 
508 #ifndef _SYS_SYSPROTO_H_
509 struct freebsd4_statfs_args {
510 	char *path;
511 	struct ostatfs *buf;
512 };
513 #endif
514 int
515 freebsd4_statfs(td, uap)
516 	struct thread *td;
517 	struct freebsd4_statfs_args /* {
518 		char *path;
519 		struct ostatfs *buf;
520 	} */ *uap;
521 {
522 	struct ostatfs osb;
523 	struct statfs sf;
524 	int error;
525 
526 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
527 	if (error)
528 		return (error);
529 	cvtstatfs(&sf, &osb);
530 	return (copyout(&osb, uap->buf, sizeof(osb)));
531 }
532 
533 /*
534  * Get filesystem statistics.
535  */
536 #ifndef _SYS_SYSPROTO_H_
537 struct freebsd4_fstatfs_args {
538 	int fd;
539 	struct ostatfs *buf;
540 };
541 #endif
542 int
543 freebsd4_fstatfs(td, uap)
544 	struct thread *td;
545 	struct freebsd4_fstatfs_args /* {
546 		int fd;
547 		struct ostatfs *buf;
548 	} */ *uap;
549 {
550 	struct ostatfs osb;
551 	struct statfs sf;
552 	int error;
553 
554 	error = kern_fstatfs(td, uap->fd, &sf);
555 	if (error)
556 		return (error);
557 	cvtstatfs(&sf, &osb);
558 	return (copyout(&osb, uap->buf, sizeof(osb)));
559 }
560 
561 /*
562  * Get statistics on all filesystems.
563  */
564 #ifndef _SYS_SYSPROTO_H_
565 struct freebsd4_getfsstat_args {
566 	struct ostatfs *buf;
567 	long bufsize;
568 	int flags;
569 };
570 #endif
571 int
572 freebsd4_getfsstat(td, uap)
573 	struct thread *td;
574 	register struct freebsd4_getfsstat_args /* {
575 		struct ostatfs *buf;
576 		long bufsize;
577 		int flags;
578 	} */ *uap;
579 {
580 	struct statfs *buf, *sp;
581 	struct ostatfs osb;
582 	size_t count, size;
583 	int error;
584 
585 	count = uap->bufsize / sizeof(struct ostatfs);
586 	size = count * sizeof(struct statfs);
587 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
588 	if (size > 0) {
589 		count = td->td_retval[0];
590 		sp = buf;
591 		while (count > 0 && error == 0) {
592 			cvtstatfs(sp, &osb);
593 			error = copyout(&osb, uap->buf, sizeof(osb));
594 			sp++;
595 			uap->buf++;
596 			count--;
597 		}
598 		free(buf, M_TEMP);
599 	}
600 	return (error);
601 }
602 
603 /*
604  * Implement fstatfs() for (NFS) file handles.
605  */
606 #ifndef _SYS_SYSPROTO_H_
607 struct freebsd4_fhstatfs_args {
608 	struct fhandle *u_fhp;
609 	struct ostatfs *buf;
610 };
611 #endif
612 int
613 freebsd4_fhstatfs(td, uap)
614 	struct thread *td;
615 	struct freebsd4_fhstatfs_args /* {
616 		struct fhandle *u_fhp;
617 		struct ostatfs *buf;
618 	} */ *uap;
619 {
620 	struct ostatfs osb;
621 	struct statfs sf;
622 	fhandle_t fh;
623 	int error;
624 
625 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
626 	if (error)
627 		return (error);
628 	error = kern_fhstatfs(td, fh, &sf);
629 	if (error)
630 		return (error);
631 	cvtstatfs(&sf, &osb);
632 	return (copyout(&osb, uap->buf, sizeof(osb)));
633 }
634 
635 /*
636  * Convert a new format statfs structure to an old format statfs structure.
637  */
638 static void
639 cvtstatfs(nsp, osp)
640 	struct statfs *nsp;
641 	struct ostatfs *osp;
642 {
643 
644 	bzero(osp, sizeof(*osp));
645 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
646 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
647 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
648 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
649 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
650 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
651 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
652 	osp->f_owner = nsp->f_owner;
653 	osp->f_type = nsp->f_type;
654 	osp->f_flags = nsp->f_flags;
655 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
656 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
657 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
658 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
659 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
660 	    MIN(MFSNAMELEN, OMFSNAMELEN));
661 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
662 	    MIN(MNAMELEN, OMNAMELEN));
663 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
664 	    MIN(MNAMELEN, OMNAMELEN));
665 	osp->f_fsid = nsp->f_fsid;
666 }
667 #endif /* COMPAT_FREEBSD4 */
668 
669 /*
670  * Change current working directory to a given file descriptor.
671  */
672 #ifndef _SYS_SYSPROTO_H_
673 struct fchdir_args {
674 	int	fd;
675 };
676 #endif
677 int
678 fchdir(td, uap)
679 	struct thread *td;
680 	struct fchdir_args /* {
681 		int fd;
682 	} */ *uap;
683 {
684 	register struct filedesc *fdp = td->td_proc->p_fd;
685 	struct vnode *vp, *tdp, *vpold;
686 	struct mount *mp;
687 	struct file *fp;
688 	int vfslocked;
689 	int error;
690 
691 	AUDIT_ARG(fd, uap->fd);
692 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
693 		return (error);
694 	vp = fp->f_vnode;
695 	VREF(vp);
696 	fdrop(fp, td);
697 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
698 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
699 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
700 	if (vp->v_type != VDIR)
701 		error = ENOTDIR;
702 #ifdef MAC
703 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
704 	}
705 #endif
706 	else
707 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
708 	while (!error && (mp = vp->v_mountedhere) != NULL) {
709 		int tvfslocked;
710 		if (vfs_busy(mp, 0, 0, td))
711 			continue;
712 		tvfslocked = VFS_LOCK_GIANT(mp);
713 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
714 		vfs_unbusy(mp, td);
715 		if (error) {
716 			VFS_UNLOCK_GIANT(tvfslocked);
717 			break;
718 		}
719 		vput(vp);
720 		VFS_UNLOCK_GIANT(vfslocked);
721 		vp = tdp;
722 		vfslocked = tvfslocked;
723 	}
724 	if (error) {
725 		vput(vp);
726 		VFS_UNLOCK_GIANT(vfslocked);
727 		return (error);
728 	}
729 	VOP_UNLOCK(vp, 0, td);
730 	VFS_UNLOCK_GIANT(vfslocked);
731 	FILEDESC_LOCK_FAST(fdp);
732 	vpold = fdp->fd_cdir;
733 	fdp->fd_cdir = vp;
734 	FILEDESC_UNLOCK_FAST(fdp);
735 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
736 	vrele(vpold);
737 	VFS_UNLOCK_GIANT(vfslocked);
738 	return (0);
739 }
740 
741 /*
742  * Change current working directory (``.'').
743  */
744 #ifndef _SYS_SYSPROTO_H_
745 struct chdir_args {
746 	char	*path;
747 };
748 #endif
749 int
750 chdir(td, uap)
751 	struct thread *td;
752 	struct chdir_args /* {
753 		char *path;
754 	} */ *uap;
755 {
756 
757 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
758 }
759 
760 int
761 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
762 {
763 	register struct filedesc *fdp = td->td_proc->p_fd;
764 	int error;
765 	struct nameidata nd;
766 	struct vnode *vp;
767 	int vfslocked;
768 
769 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
770 	    pathseg, path, td);
771 	if ((error = namei(&nd)) != 0)
772 		return (error);
773 	vfslocked = NDHASGIANT(&nd);
774 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
775 		vput(nd.ni_vp);
776 		VFS_UNLOCK_GIANT(vfslocked);
777 		NDFREE(&nd, NDF_ONLY_PNBUF);
778 		return (error);
779 	}
780 	VOP_UNLOCK(nd.ni_vp, 0, td);
781 	VFS_UNLOCK_GIANT(vfslocked);
782 	NDFREE(&nd, NDF_ONLY_PNBUF);
783 	FILEDESC_LOCK_FAST(fdp);
784 	vp = fdp->fd_cdir;
785 	fdp->fd_cdir = nd.ni_vp;
786 	FILEDESC_UNLOCK_FAST(fdp);
787 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
788 	vrele(vp);
789 	VFS_UNLOCK_GIANT(vfslocked);
790 	return (0);
791 }
792 
793 /*
794  * Helper function for raised chroot(2) security function:  Refuse if
795  * any filedescriptors are open directories.
796  */
797 static int
798 chroot_refuse_vdir_fds(fdp)
799 	struct filedesc *fdp;
800 {
801 	struct vnode *vp;
802 	struct file *fp;
803 	int fd;
804 
805 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
806 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
807 		fp = fget_locked(fdp, fd);
808 		if (fp == NULL)
809 			continue;
810 		if (fp->f_type == DTYPE_VNODE) {
811 			vp = fp->f_vnode;
812 			if (vp->v_type == VDIR)
813 				return (EPERM);
814 		}
815 	}
816 	return (0);
817 }
818 
819 /*
820  * This sysctl determines if we will allow a process to chroot(2) if it
821  * has a directory open:
822  *	0: disallowed for all processes.
823  *	1: allowed for processes that were not already chroot(2)'ed.
824  *	2: allowed for all processes.
825  */
826 
827 static int chroot_allow_open_directories = 1;
828 
829 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
830      &chroot_allow_open_directories, 0, "");
831 
832 /*
833  * Change notion of root (``/'') directory.
834  */
835 #ifndef _SYS_SYSPROTO_H_
836 struct chroot_args {
837 	char	*path;
838 };
839 #endif
840 int
841 chroot(td, uap)
842 	struct thread *td;
843 	struct chroot_args /* {
844 		char *path;
845 	} */ *uap;
846 {
847 	int error;
848 	struct nameidata nd;
849 	int vfslocked;
850 
851 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
852 	if (error)
853 		return (error);
854 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
855 	    UIO_USERSPACE, uap->path, td);
856 	error = namei(&nd);
857 	if (error)
858 		goto error;
859 	vfslocked = NDHASGIANT(&nd);
860 	if ((error = change_dir(nd.ni_vp, td)) != 0)
861 		goto e_vunlock;
862 #ifdef MAC
863 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
864 		goto e_vunlock;
865 #endif
866 	VOP_UNLOCK(nd.ni_vp, 0, td);
867 	error = change_root(nd.ni_vp, td);
868 	vrele(nd.ni_vp);
869 	VFS_UNLOCK_GIANT(vfslocked);
870 	NDFREE(&nd, NDF_ONLY_PNBUF);
871 	return (error);
872 e_vunlock:
873 	vput(nd.ni_vp);
874 	VFS_UNLOCK_GIANT(vfslocked);
875 error:
876 	NDFREE(&nd, NDF_ONLY_PNBUF);
877 	return (error);
878 }
879 
880 /*
881  * Common routine for chroot and chdir.  Callers must provide a locked vnode
882  * instance.
883  */
884 int
885 change_dir(vp, td)
886 	struct vnode *vp;
887 	struct thread *td;
888 {
889 	int error;
890 
891 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
892 	if (vp->v_type != VDIR)
893 		return (ENOTDIR);
894 #ifdef MAC
895 	error = mac_check_vnode_chdir(td->td_ucred, vp);
896 	if (error)
897 		return (error);
898 #endif
899 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
900 	return (error);
901 }
902 
903 /*
904  * Common routine for kern_chroot() and jail_attach().  The caller is
905  * responsible for invoking suser() and mac_check_chroot() to authorize this
906  * operation.
907  */
908 int
909 change_root(vp, td)
910 	struct vnode *vp;
911 	struct thread *td;
912 {
913 	struct filedesc *fdp;
914 	struct vnode *oldvp;
915 	int vfslocked;
916 	int error;
917 
918 	VFS_ASSERT_GIANT(vp->v_mount);
919 	fdp = td->td_proc->p_fd;
920 	FILEDESC_LOCK(fdp);
921 	if (chroot_allow_open_directories == 0 ||
922 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
923 		error = chroot_refuse_vdir_fds(fdp);
924 		if (error) {
925 			FILEDESC_UNLOCK(fdp);
926 			return (error);
927 		}
928 	}
929 	oldvp = fdp->fd_rdir;
930 	fdp->fd_rdir = vp;
931 	VREF(fdp->fd_rdir);
932 	if (!fdp->fd_jdir) {
933 		fdp->fd_jdir = vp;
934 		VREF(fdp->fd_jdir);
935 	}
936 	FILEDESC_UNLOCK(fdp);
937 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
938 	vrele(oldvp);
939 	VFS_UNLOCK_GIANT(vfslocked);
940 	return (0);
941 }
942 
943 /*
944  * Check permissions, allocate an open file structure,
945  * and call the device open routine if any.
946  *
947  * MP SAFE
948  */
949 #ifndef _SYS_SYSPROTO_H_
950 struct open_args {
951 	char	*path;
952 	int	flags;
953 	int	mode;
954 };
955 #endif
956 int
957 open(td, uap)
958 	struct thread *td;
959 	register struct open_args /* {
960 		char *path;
961 		int flags;
962 		int mode;
963 	} */ *uap;
964 {
965 
966 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
967 }
968 
969 int
970 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
971     int mode)
972 {
973 	struct proc *p = td->td_proc;
974 	struct filedesc *fdp = p->p_fd;
975 	struct file *fp;
976 	struct vnode *vp;
977 	struct vattr vat;
978 	struct mount *mp;
979 	int cmode;
980 	struct file *nfp;
981 	int type, indx, error;
982 	struct flock lf;
983 	struct nameidata nd;
984 	int vfslocked;
985 
986 	AUDIT_ARG(fflags, flags);
987 	AUDIT_ARG(mode, mode);
988 	if ((flags & O_ACCMODE) == O_ACCMODE)
989 		return (EINVAL);
990 	flags = FFLAGS(flags);
991 	error = falloc(td, &nfp, &indx);
992 	if (error)
993 		return (error);
994 	/* An extra reference on `nfp' has been held for us by falloc(). */
995 	fp = nfp;
996 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
997 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
998 	td->td_dupfd = -1;		/* XXX check for fdopen */
999 	error = vn_open(&nd, &flags, cmode, indx);
1000 	if (error) {
1001 		/*
1002 		 * If the vn_open replaced the method vector, something
1003 		 * wonderous happened deep below and we just pass it up
1004 		 * pretending we know what we do.
1005 		 */
1006 		if (error == ENXIO && fp->f_ops != &badfileops) {
1007 			fdrop(fp, td);
1008 			td->td_retval[0] = indx;
1009 			return (0);
1010 		}
1011 
1012 		/*
1013 		 * release our own reference
1014 		 */
1015 		fdrop(fp, td);
1016 
1017 		/*
1018 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1019 		 * responsible for dropping the old contents of ofiles[indx]
1020 		 * if it succeeds.
1021 		 */
1022 		if ((error == ENODEV || error == ENXIO) &&
1023 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1024 		    (error =
1025 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1026 			td->td_retval[0] = indx;
1027 			return (0);
1028 		}
1029 		/*
1030 		 * Clean up the descriptor, but only if another thread hadn't
1031 		 * replaced or closed it.
1032 		 */
1033 		fdclose(fdp, fp, indx, td);
1034 
1035 		if (error == ERESTART)
1036 			error = EINTR;
1037 		return (error);
1038 	}
1039 	td->td_dupfd = 0;
1040 	vfslocked = NDHASGIANT(&nd);
1041 	NDFREE(&nd, NDF_ONLY_PNBUF);
1042 	vp = nd.ni_vp;
1043 
1044 	/*
1045 	 * There should be 2 references on the file, one from the descriptor
1046 	 * table, and one for us.
1047 	 *
1048 	 * Handle the case where someone closed the file (via its file
1049 	 * descriptor) while we were blocked.  The end result should look
1050 	 * like opening the file succeeded but it was immediately closed.
1051 	 * We call vn_close() manually because we haven't yet hooked up
1052 	 * the various 'struct file' fields.
1053 	 */
1054 	FILEDESC_LOCK(fdp);
1055 	FILE_LOCK(fp);
1056 	if (fp->f_count == 1) {
1057 		mp = vp->v_mount;
1058 		KASSERT(fdp->fd_ofiles[indx] != fp,
1059 		    ("Open file descriptor lost all refs"));
1060 		FILE_UNLOCK(fp);
1061 		FILEDESC_UNLOCK(fdp);
1062 		VOP_UNLOCK(vp, 0, td);
1063 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1064 		VFS_UNLOCK_GIANT(vfslocked);
1065 		fdrop(fp, td);
1066 		td->td_retval[0] = indx;
1067 		return (0);
1068 	}
1069 	fp->f_vnode = vp;
1070 	if (fp->f_data == NULL)
1071 		fp->f_data = vp;
1072 	fp->f_flag = flags & FMASK;
1073 	if (fp->f_ops == &badfileops)
1074 		fp->f_ops = &vnops;
1075 	fp->f_seqcount = 1;
1076 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1077 	FILE_UNLOCK(fp);
1078 	FILEDESC_UNLOCK(fdp);
1079 
1080 	VOP_UNLOCK(vp, 0, td);
1081 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1082 		lf.l_whence = SEEK_SET;
1083 		lf.l_start = 0;
1084 		lf.l_len = 0;
1085 		if (flags & O_EXLOCK)
1086 			lf.l_type = F_WRLCK;
1087 		else
1088 			lf.l_type = F_RDLCK;
1089 		type = F_FLOCK;
1090 		if ((flags & FNONBLOCK) == 0)
1091 			type |= F_WAIT;
1092 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1093 			    type)) != 0)
1094 			goto bad;
1095 		fp->f_flag |= FHASLOCK;
1096 	}
1097 	if (flags & O_TRUNC) {
1098 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1099 			goto bad;
1100 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1101 		VATTR_NULL(&vat);
1102 		vat.va_size = 0;
1103 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1104 #ifdef MAC
1105 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1106 		if (error == 0)
1107 #endif
1108 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1109 		VOP_UNLOCK(vp, 0, td);
1110 		vn_finished_write(mp);
1111 		if (error)
1112 			goto bad;
1113 	}
1114 	VFS_UNLOCK_GIANT(vfslocked);
1115 	/*
1116 	 * Release our private reference, leaving the one associated with
1117 	 * the descriptor table intact.
1118 	 */
1119 	fdrop(fp, td);
1120 	td->td_retval[0] = indx;
1121 	return (0);
1122 bad:
1123 	VFS_UNLOCK_GIANT(vfslocked);
1124 	fdclose(fdp, fp, indx, td);
1125 	fdrop(fp, td);
1126 	return (error);
1127 }
1128 
1129 #ifdef COMPAT_43
1130 /*
1131  * Create a file.
1132  *
1133  * MP SAFE
1134  */
1135 #ifndef _SYS_SYSPROTO_H_
1136 struct ocreat_args {
1137 	char	*path;
1138 	int	mode;
1139 };
1140 #endif
1141 int
1142 ocreat(td, uap)
1143 	struct thread *td;
1144 	register struct ocreat_args /* {
1145 		char *path;
1146 		int mode;
1147 	} */ *uap;
1148 {
1149 
1150 	return (kern_open(td, uap->path, UIO_USERSPACE,
1151 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1152 }
1153 #endif /* COMPAT_43 */
1154 
1155 /*
1156  * Create a special file.
1157  */
1158 #ifndef _SYS_SYSPROTO_H_
1159 struct mknod_args {
1160 	char	*path;
1161 	int	mode;
1162 	int	dev;
1163 };
1164 #endif
1165 int
1166 mknod(td, uap)
1167 	struct thread *td;
1168 	register struct mknod_args /* {
1169 		char *path;
1170 		int mode;
1171 		int dev;
1172 	} */ *uap;
1173 {
1174 
1175 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1176 }
1177 
1178 int
1179 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1180     int dev)
1181 {
1182 	struct vnode *vp;
1183 	struct mount *mp;
1184 	struct vattr vattr;
1185 	int error;
1186 	int whiteout = 0;
1187 	struct nameidata nd;
1188 	int vfslocked;
1189 
1190 	AUDIT_ARG(mode, mode);
1191 	AUDIT_ARG(dev, dev);
1192 	switch (mode & S_IFMT) {
1193 	case S_IFCHR:
1194 	case S_IFBLK:
1195 		error = suser(td);
1196 		break;
1197 	default:
1198 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1199 		break;
1200 	}
1201 	if (error)
1202 		return (error);
1203 restart:
1204 	bwillwrite();
1205 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1206 	    pathseg, path, td);
1207 	if ((error = namei(&nd)) != 0)
1208 		return (error);
1209 	vfslocked = NDHASGIANT(&nd);
1210 	vp = nd.ni_vp;
1211 	if (vp != NULL) {
1212 		NDFREE(&nd, NDF_ONLY_PNBUF);
1213 		if (vp == nd.ni_dvp)
1214 			vrele(nd.ni_dvp);
1215 		else
1216 			vput(nd.ni_dvp);
1217 		vrele(vp);
1218 		VFS_UNLOCK_GIANT(vfslocked);
1219 		return (EEXIST);
1220 	} else {
1221 		VATTR_NULL(&vattr);
1222 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1223 		vattr.va_mode = (mode & ALLPERMS) &
1224 		    ~td->td_proc->p_fd->fd_cmask;
1225 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1226 		vattr.va_rdev = dev;
1227 		whiteout = 0;
1228 
1229 		switch (mode & S_IFMT) {
1230 		case S_IFMT:	/* used by badsect to flag bad sectors */
1231 			vattr.va_type = VBAD;
1232 			break;
1233 		case S_IFCHR:
1234 			vattr.va_type = VCHR;
1235 			break;
1236 		case S_IFBLK:
1237 			vattr.va_type = VBLK;
1238 			break;
1239 		case S_IFWHT:
1240 			whiteout = 1;
1241 			break;
1242 		default:
1243 			error = EINVAL;
1244 			break;
1245 		}
1246 	}
1247 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1248 		NDFREE(&nd, NDF_ONLY_PNBUF);
1249 		vput(nd.ni_dvp);
1250 		VFS_UNLOCK_GIANT(vfslocked);
1251 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1252 			return (error);
1253 		goto restart;
1254 	}
1255 #ifdef MAC
1256 	if (error == 0 && !whiteout)
1257 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1258 		    &nd.ni_cnd, &vattr);
1259 #endif
1260 	if (!error) {
1261 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1262 		if (whiteout)
1263 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1264 		else {
1265 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1266 						&nd.ni_cnd, &vattr);
1267 			if (error == 0)
1268 				vput(nd.ni_vp);
1269 		}
1270 	}
1271 	NDFREE(&nd, NDF_ONLY_PNBUF);
1272 	vput(nd.ni_dvp);
1273 	vn_finished_write(mp);
1274 	VFS_UNLOCK_GIANT(vfslocked);
1275 	return (error);
1276 }
1277 
1278 /*
1279  * Create a named pipe.
1280  */
1281 #ifndef _SYS_SYSPROTO_H_
1282 struct mkfifo_args {
1283 	char	*path;
1284 	int	mode;
1285 };
1286 #endif
1287 int
1288 mkfifo(td, uap)
1289 	struct thread *td;
1290 	register struct mkfifo_args /* {
1291 		char *path;
1292 		int mode;
1293 	} */ *uap;
1294 {
1295 
1296 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1297 }
1298 
1299 int
1300 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1301 {
1302 	struct mount *mp;
1303 	struct vattr vattr;
1304 	int error;
1305 	struct nameidata nd;
1306 	int vfslocked;
1307 
1308 	AUDIT_ARG(mode, mode);
1309 restart:
1310 	bwillwrite();
1311 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1312 	    pathseg, path, td);
1313 	if ((error = namei(&nd)) != 0)
1314 		return (error);
1315 	vfslocked = NDHASGIANT(&nd);
1316 	if (nd.ni_vp != NULL) {
1317 		NDFREE(&nd, NDF_ONLY_PNBUF);
1318 		if (nd.ni_vp == nd.ni_dvp)
1319 			vrele(nd.ni_dvp);
1320 		else
1321 			vput(nd.ni_dvp);
1322 		vrele(nd.ni_vp);
1323 		VFS_UNLOCK_GIANT(vfslocked);
1324 		return (EEXIST);
1325 	}
1326 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1327 		NDFREE(&nd, NDF_ONLY_PNBUF);
1328 		vput(nd.ni_dvp);
1329 		VFS_UNLOCK_GIANT(vfslocked);
1330 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1331 			return (error);
1332 		goto restart;
1333 	}
1334 	VATTR_NULL(&vattr);
1335 	vattr.va_type = VFIFO;
1336 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1337 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1338 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1339 #ifdef MAC
1340 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1341 	    &vattr);
1342 	if (error)
1343 		goto out;
1344 #endif
1345 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1346 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1347 	if (error == 0)
1348 		vput(nd.ni_vp);
1349 #ifdef MAC
1350 out:
1351 #endif
1352 	vput(nd.ni_dvp);
1353 	vn_finished_write(mp);
1354 	VFS_UNLOCK_GIANT(vfslocked);
1355 	NDFREE(&nd, NDF_ONLY_PNBUF);
1356 	return (error);
1357 }
1358 
1359 /*
1360  * Make a hard file link.
1361  */
1362 #ifndef _SYS_SYSPROTO_H_
1363 struct link_args {
1364 	char	*path;
1365 	char	*link;
1366 };
1367 #endif
1368 int
1369 link(td, uap)
1370 	struct thread *td;
1371 	register struct link_args /* {
1372 		char *path;
1373 		char *link;
1374 	} */ *uap;
1375 {
1376 	int error;
1377 
1378 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1379 	return (error);
1380 }
1381 
1382 SYSCTL_DECL(_security_bsd);
1383 
1384 static int hardlink_check_uid = 0;
1385 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1386     &hardlink_check_uid, 0,
1387     "Unprivileged processes cannot create hard links to files owned by other "
1388     "users");
1389 static int hardlink_check_gid = 0;
1390 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1391     &hardlink_check_gid, 0,
1392     "Unprivileged processes cannot create hard links to files owned by other "
1393     "groups");
1394 
1395 static int
1396 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1397 {
1398 	struct vattr va;
1399 	int error;
1400 
1401 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1402 		return (0);
1403 
1404 	if (!hardlink_check_uid && !hardlink_check_gid)
1405 		return (0);
1406 
1407 	error = VOP_GETATTR(vp, &va, cred, td);
1408 	if (error != 0)
1409 		return (error);
1410 
1411 	if (hardlink_check_uid) {
1412 		if (cred->cr_uid != va.va_uid)
1413 			return (EPERM);
1414 	}
1415 
1416 	if (hardlink_check_gid) {
1417 		if (!groupmember(va.va_gid, cred))
1418 			return (EPERM);
1419 	}
1420 
1421 	return (0);
1422 }
1423 
1424 int
1425 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1426 {
1427 	struct vnode *vp;
1428 	struct mount *mp;
1429 	struct nameidata nd;
1430 	int vfslocked;
1431 	int lvfslocked;
1432 	int error;
1433 
1434 	bwillwrite();
1435 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1436 	if ((error = namei(&nd)) != 0)
1437 		return (error);
1438 	vfslocked = NDHASGIANT(&nd);
1439 	NDFREE(&nd, NDF_ONLY_PNBUF);
1440 	vp = nd.ni_vp;
1441 	if (vp->v_type == VDIR) {
1442 		vrele(vp);
1443 		VFS_UNLOCK_GIANT(vfslocked);
1444 		return (EPERM);		/* POSIX */
1445 	}
1446 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1447 		vrele(vp);
1448 		VFS_UNLOCK_GIANT(vfslocked);
1449 		return (error);
1450 	}
1451 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1452 	    segflg, link, td);
1453 	if ((error = namei(&nd)) == 0) {
1454 		lvfslocked = NDHASGIANT(&nd);
1455 		if (nd.ni_vp != NULL) {
1456 			if (nd.ni_dvp == nd.ni_vp)
1457 				vrele(nd.ni_dvp);
1458 			else
1459 				vput(nd.ni_dvp);
1460 			vrele(nd.ni_vp);
1461 			error = EEXIST;
1462 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1463 		    == 0) {
1464 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1465 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1466 			error = can_hardlink(vp, td, td->td_ucred);
1467 			if (error == 0)
1468 #ifdef MAC
1469 				error = mac_check_vnode_link(td->td_ucred,
1470 				    nd.ni_dvp, vp, &nd.ni_cnd);
1471 			if (error == 0)
1472 #endif
1473 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1474 			VOP_UNLOCK(vp, 0, td);
1475 			vput(nd.ni_dvp);
1476 		}
1477 		NDFREE(&nd, NDF_ONLY_PNBUF);
1478 		VFS_UNLOCK_GIANT(lvfslocked);
1479 	}
1480 	vrele(vp);
1481 	vn_finished_write(mp);
1482 	VFS_UNLOCK_GIANT(vfslocked);
1483 	return (error);
1484 }
1485 
1486 /*
1487  * Make a symbolic link.
1488  */
1489 #ifndef _SYS_SYSPROTO_H_
1490 struct symlink_args {
1491 	char	*path;
1492 	char	*link;
1493 };
1494 #endif
1495 int
1496 symlink(td, uap)
1497 	struct thread *td;
1498 	register struct symlink_args /* {
1499 		char *path;
1500 		char *link;
1501 	} */ *uap;
1502 {
1503 
1504 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1505 }
1506 
1507 int
1508 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1509 {
1510 	struct mount *mp;
1511 	struct vattr vattr;
1512 	char *syspath;
1513 	int error;
1514 	struct nameidata nd;
1515 	int vfslocked;
1516 
1517 	if (segflg == UIO_SYSSPACE) {
1518 		syspath = path;
1519 	} else {
1520 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1521 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1522 			goto out;
1523 	}
1524 	AUDIT_ARG(text, syspath);
1525 restart:
1526 	bwillwrite();
1527 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1528 	    segflg, link, td);
1529 	if ((error = namei(&nd)) != 0)
1530 		goto out;
1531 	vfslocked = NDHASGIANT(&nd);
1532 	if (nd.ni_vp) {
1533 		NDFREE(&nd, NDF_ONLY_PNBUF);
1534 		if (nd.ni_vp == nd.ni_dvp)
1535 			vrele(nd.ni_dvp);
1536 		else
1537 			vput(nd.ni_dvp);
1538 		vrele(nd.ni_vp);
1539 		VFS_UNLOCK_GIANT(vfslocked);
1540 		error = EEXIST;
1541 		goto out;
1542 	}
1543 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1544 		NDFREE(&nd, NDF_ONLY_PNBUF);
1545 		vput(nd.ni_dvp);
1546 		VFS_UNLOCK_GIANT(vfslocked);
1547 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1548 			goto out;
1549 		goto restart;
1550 	}
1551 	VATTR_NULL(&vattr);
1552 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1553 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1554 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1555 #ifdef MAC
1556 	vattr.va_type = VLNK;
1557 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1558 	    &vattr);
1559 	if (error)
1560 		goto out2;
1561 #endif
1562 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1563 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1564 	if (error == 0)
1565 		vput(nd.ni_vp);
1566 #ifdef MAC
1567 out2:
1568 #endif
1569 	NDFREE(&nd, NDF_ONLY_PNBUF);
1570 	vput(nd.ni_dvp);
1571 	vn_finished_write(mp);
1572 	VFS_UNLOCK_GIANT(vfslocked);
1573 out:
1574 	if (segflg != UIO_SYSSPACE)
1575 		uma_zfree(namei_zone, syspath);
1576 	return (error);
1577 }
1578 
1579 /*
1580  * Delete a whiteout from the filesystem.
1581  */
1582 int
1583 undelete(td, uap)
1584 	struct thread *td;
1585 	register struct undelete_args /* {
1586 		char *path;
1587 	} */ *uap;
1588 {
1589 	int error;
1590 	struct mount *mp;
1591 	struct nameidata nd;
1592 	int vfslocked;
1593 
1594 restart:
1595 	bwillwrite();
1596 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1597 	    UIO_USERSPACE, uap->path, td);
1598 	error = namei(&nd);
1599 	if (error)
1600 		return (error);
1601 	vfslocked = NDHASGIANT(&nd);
1602 
1603 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1604 		NDFREE(&nd, NDF_ONLY_PNBUF);
1605 		if (nd.ni_vp == nd.ni_dvp)
1606 			vrele(nd.ni_dvp);
1607 		else
1608 			vput(nd.ni_dvp);
1609 		if (nd.ni_vp)
1610 			vrele(nd.ni_vp);
1611 		VFS_UNLOCK_GIANT(vfslocked);
1612 		return (EEXIST);
1613 	}
1614 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1615 		NDFREE(&nd, NDF_ONLY_PNBUF);
1616 		vput(nd.ni_dvp);
1617 		VFS_UNLOCK_GIANT(vfslocked);
1618 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1619 			return (error);
1620 		goto restart;
1621 	}
1622 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1623 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1624 	NDFREE(&nd, NDF_ONLY_PNBUF);
1625 	vput(nd.ni_dvp);
1626 	vn_finished_write(mp);
1627 	VFS_UNLOCK_GIANT(vfslocked);
1628 	return (error);
1629 }
1630 
1631 /*
1632  * Delete a name from the filesystem.
1633  */
1634 #ifndef _SYS_SYSPROTO_H_
1635 struct unlink_args {
1636 	char	*path;
1637 };
1638 #endif
1639 int
1640 unlink(td, uap)
1641 	struct thread *td;
1642 	struct unlink_args /* {
1643 		char *path;
1644 	} */ *uap;
1645 {
1646 	int error;
1647 
1648 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1649 	return (error);
1650 }
1651 
1652 int
1653 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1654 {
1655 	struct mount *mp;
1656 	struct vnode *vp;
1657 	int error;
1658 	struct nameidata nd;
1659 	int vfslocked;
1660 
1661 restart:
1662 	bwillwrite();
1663 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1664 	    pathseg, path, td);
1665 	if ((error = namei(&nd)) != 0)
1666 		return (error == EINVAL ? EPERM : error);
1667 	vfslocked = NDHASGIANT(&nd);
1668 	vp = nd.ni_vp;
1669 	if (vp->v_type == VDIR)
1670 		error = EPERM;		/* POSIX */
1671 	else {
1672 		/*
1673 		 * The root of a mounted filesystem cannot be deleted.
1674 		 *
1675 		 * XXX: can this only be a VDIR case?
1676 		 */
1677 		if (vp->v_vflag & VV_ROOT)
1678 			error = EBUSY;
1679 	}
1680 	if (error == 0) {
1681 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1682 			NDFREE(&nd, NDF_ONLY_PNBUF);
1683 			vput(nd.ni_dvp);
1684 			if (vp == nd.ni_dvp)
1685 				vrele(vp);
1686 			else
1687 				vput(vp);
1688 			VFS_UNLOCK_GIANT(vfslocked);
1689 			if ((error = vn_start_write(NULL, &mp,
1690 			    V_XSLEEP | PCATCH)) != 0)
1691 				return (error);
1692 			goto restart;
1693 		}
1694 #ifdef MAC
1695 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1696 		    &nd.ni_cnd);
1697 		if (error)
1698 			goto out;
1699 #endif
1700 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1701 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1702 #ifdef MAC
1703 out:
1704 #endif
1705 		vn_finished_write(mp);
1706 	}
1707 	NDFREE(&nd, NDF_ONLY_PNBUF);
1708 	vput(nd.ni_dvp);
1709 	if (vp == nd.ni_dvp)
1710 		vrele(vp);
1711 	else
1712 		vput(vp);
1713 	VFS_UNLOCK_GIANT(vfslocked);
1714 	return (error);
1715 }
1716 
1717 /*
1718  * Reposition read/write file offset.
1719  */
1720 #ifndef _SYS_SYSPROTO_H_
1721 struct lseek_args {
1722 	int	fd;
1723 	int	pad;
1724 	off_t	offset;
1725 	int	whence;
1726 };
1727 #endif
1728 int
1729 lseek(td, uap)
1730 	struct thread *td;
1731 	register struct lseek_args /* {
1732 		int fd;
1733 		int pad;
1734 		off_t offset;
1735 		int whence;
1736 	} */ *uap;
1737 {
1738 	struct ucred *cred = td->td_ucred;
1739 	struct file *fp;
1740 	struct vnode *vp;
1741 	struct vattr vattr;
1742 	off_t offset;
1743 	int error, noneg;
1744 	int vfslocked;
1745 
1746 	if ((error = fget(td, uap->fd, &fp)) != 0)
1747 		return (error);
1748 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1749 		fdrop(fp, td);
1750 		return (ESPIPE);
1751 	}
1752 	vp = fp->f_vnode;
1753 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1754 	noneg = (vp->v_type != VCHR);
1755 	offset = uap->offset;
1756 	switch (uap->whence) {
1757 	case L_INCR:
1758 		if (noneg &&
1759 		    (fp->f_offset < 0 ||
1760 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1761 			error = EOVERFLOW;
1762 			break;
1763 		}
1764 		offset += fp->f_offset;
1765 		break;
1766 	case L_XTND:
1767 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1768 		error = VOP_GETATTR(vp, &vattr, cred, td);
1769 		VOP_UNLOCK(vp, 0, td);
1770 		if (error)
1771 			break;
1772 		if (noneg &&
1773 		    (vattr.va_size > OFF_MAX ||
1774 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1775 			error = EOVERFLOW;
1776 			break;
1777 		}
1778 		offset += vattr.va_size;
1779 		break;
1780 	case L_SET:
1781 		break;
1782 	default:
1783 		error = EINVAL;
1784 	}
1785 	if (error == 0 && noneg && offset < 0)
1786 		error = EINVAL;
1787 	if (error != 0)
1788 		goto drop;
1789 	fp->f_offset = offset;
1790 	*(off_t *)(td->td_retval) = fp->f_offset;
1791 drop:
1792 	fdrop(fp, td);
1793 	VFS_UNLOCK_GIANT(vfslocked);
1794 	return (error);
1795 }
1796 
1797 #if defined(COMPAT_43)
1798 /*
1799  * Reposition read/write file offset.
1800  */
1801 #ifndef _SYS_SYSPROTO_H_
1802 struct olseek_args {
1803 	int	fd;
1804 	long	offset;
1805 	int	whence;
1806 };
1807 #endif
1808 int
1809 olseek(td, uap)
1810 	struct thread *td;
1811 	register struct olseek_args /* {
1812 		int fd;
1813 		long offset;
1814 		int whence;
1815 	} */ *uap;
1816 {
1817 	struct lseek_args /* {
1818 		int fd;
1819 		int pad;
1820 		off_t offset;
1821 		int whence;
1822 	} */ nuap;
1823 	int error;
1824 
1825 	nuap.fd = uap->fd;
1826 	nuap.offset = uap->offset;
1827 	nuap.whence = uap->whence;
1828 	error = lseek(td, &nuap);
1829 	return (error);
1830 }
1831 #endif /* COMPAT_43 */
1832 
1833 /*
1834  * Check access permissions using passed credentials.
1835  */
1836 static int
1837 vn_access(vp, user_flags, cred, td)
1838 	struct vnode	*vp;
1839 	int		user_flags;
1840 	struct ucred	*cred;
1841 	struct thread	*td;
1842 {
1843 	int error, flags;
1844 
1845 	/* Flags == 0 means only check for existence. */
1846 	error = 0;
1847 	if (user_flags) {
1848 		flags = 0;
1849 		if (user_flags & R_OK)
1850 			flags |= VREAD;
1851 		if (user_flags & W_OK)
1852 			flags |= VWRITE;
1853 		if (user_flags & X_OK)
1854 			flags |= VEXEC;
1855 #ifdef MAC
1856 		error = mac_check_vnode_access(cred, vp, flags);
1857 		if (error)
1858 			return (error);
1859 #endif
1860 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1861 			error = VOP_ACCESS(vp, flags, cred, td);
1862 	}
1863 	return (error);
1864 }
1865 
1866 /*
1867  * Check access permissions using "real" credentials.
1868  */
1869 #ifndef _SYS_SYSPROTO_H_
1870 struct access_args {
1871 	char	*path;
1872 	int	flags;
1873 };
1874 #endif
1875 int
1876 access(td, uap)
1877 	struct thread *td;
1878 	register struct access_args /* {
1879 		char *path;
1880 		int flags;
1881 	} */ *uap;
1882 {
1883 
1884 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1885 }
1886 
1887 int
1888 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1889 {
1890 	struct ucred *cred, *tmpcred;
1891 	register struct vnode *vp;
1892 	struct nameidata nd;
1893 	int vfslocked;
1894 	int error;
1895 
1896 	/*
1897 	 * Create and modify a temporary credential instead of one that
1898 	 * is potentially shared.  This could also mess up socket
1899 	 * buffer accounting which can run in an interrupt context.
1900 	 */
1901 	cred = td->td_ucred;
1902 	tmpcred = crdup(cred);
1903 	tmpcred->cr_uid = cred->cr_ruid;
1904 	tmpcred->cr_groups[0] = cred->cr_rgid;
1905 	td->td_ucred = tmpcred;
1906 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1907 	    pathseg, path, td);
1908 	if ((error = namei(&nd)) != 0)
1909 		goto out1;
1910 	vfslocked = NDHASGIANT(&nd);
1911 	vp = nd.ni_vp;
1912 
1913 	error = vn_access(vp, flags, tmpcred, td);
1914 	NDFREE(&nd, NDF_ONLY_PNBUF);
1915 	vput(vp);
1916 	VFS_UNLOCK_GIANT(vfslocked);
1917 out1:
1918 	td->td_ucred = cred;
1919 	crfree(tmpcred);
1920 	return (error);
1921 }
1922 
1923 /*
1924  * Check access permissions using "effective" credentials.
1925  */
1926 #ifndef _SYS_SYSPROTO_H_
1927 struct eaccess_args {
1928 	char	*path;
1929 	int	flags;
1930 };
1931 #endif
1932 int
1933 eaccess(td, uap)
1934 	struct thread *td;
1935 	register struct eaccess_args /* {
1936 		char *path;
1937 		int flags;
1938 	} */ *uap;
1939 {
1940 
1941 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1942 }
1943 
1944 int
1945 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1946 {
1947 	struct nameidata nd;
1948 	struct vnode *vp;
1949 	int vfslocked;
1950 	int error;
1951 
1952 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1953 	    pathseg, path, td);
1954 	if ((error = namei(&nd)) != 0)
1955 		return (error);
1956 	vp = nd.ni_vp;
1957 	vfslocked = NDHASGIANT(&nd);
1958 	error = vn_access(vp, flags, td->td_ucred, td);
1959 	NDFREE(&nd, NDF_ONLY_PNBUF);
1960 	vput(vp);
1961 	VFS_UNLOCK_GIANT(vfslocked);
1962 	return (error);
1963 }
1964 
1965 #if defined(COMPAT_43)
1966 /*
1967  * Get file status; this version follows links.
1968  */
1969 #ifndef _SYS_SYSPROTO_H_
1970 struct ostat_args {
1971 	char	*path;
1972 	struct ostat *ub;
1973 };
1974 #endif
1975 int
1976 ostat(td, uap)
1977 	struct thread *td;
1978 	register struct ostat_args /* {
1979 		char *path;
1980 		struct ostat *ub;
1981 	} */ *uap;
1982 {
1983 	struct stat sb;
1984 	struct ostat osb;
1985 	int error;
1986 
1987 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1988 	if (error)
1989 		return (error);
1990 	cvtstat(&sb, &osb);
1991 	error = copyout(&osb, uap->ub, sizeof (osb));
1992 	return (error);
1993 }
1994 
1995 /*
1996  * Get file status; this version does not follow links.
1997  */
1998 #ifndef _SYS_SYSPROTO_H_
1999 struct olstat_args {
2000 	char	*path;
2001 	struct ostat *ub;
2002 };
2003 #endif
2004 int
2005 olstat(td, uap)
2006 	struct thread *td;
2007 	register struct olstat_args /* {
2008 		char *path;
2009 		struct ostat *ub;
2010 	} */ *uap;
2011 {
2012 	struct stat sb;
2013 	struct ostat osb;
2014 	int error;
2015 
2016 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2017 	if (error)
2018 		return (error);
2019 	cvtstat(&sb, &osb);
2020 	error = copyout(&osb, uap->ub, sizeof (osb));
2021 	return (error);
2022 }
2023 
2024 /*
2025  * Convert from an old to a new stat structure.
2026  */
2027 void
2028 cvtstat(st, ost)
2029 	struct stat *st;
2030 	struct ostat *ost;
2031 {
2032 
2033 	ost->st_dev = st->st_dev;
2034 	ost->st_ino = st->st_ino;
2035 	ost->st_mode = st->st_mode;
2036 	ost->st_nlink = st->st_nlink;
2037 	ost->st_uid = st->st_uid;
2038 	ost->st_gid = st->st_gid;
2039 	ost->st_rdev = st->st_rdev;
2040 	if (st->st_size < (quad_t)1 << 32)
2041 		ost->st_size = st->st_size;
2042 	else
2043 		ost->st_size = -2;
2044 	ost->st_atime = st->st_atime;
2045 	ost->st_mtime = st->st_mtime;
2046 	ost->st_ctime = st->st_ctime;
2047 	ost->st_blksize = st->st_blksize;
2048 	ost->st_blocks = st->st_blocks;
2049 	ost->st_flags = st->st_flags;
2050 	ost->st_gen = st->st_gen;
2051 }
2052 #endif /* COMPAT_43 */
2053 
2054 /*
2055  * Get file status; this version follows links.
2056  */
2057 #ifndef _SYS_SYSPROTO_H_
2058 struct stat_args {
2059 	char	*path;
2060 	struct stat *ub;
2061 };
2062 #endif
2063 int
2064 stat(td, uap)
2065 	struct thread *td;
2066 	register struct stat_args /* {
2067 		char *path;
2068 		struct stat *ub;
2069 	} */ *uap;
2070 {
2071 	struct stat sb;
2072 	int error;
2073 
2074 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2075 	if (error == 0)
2076 		error = copyout(&sb, uap->ub, sizeof (sb));
2077 	return (error);
2078 }
2079 
2080 int
2081 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2082 {
2083 	struct nameidata nd;
2084 	struct stat sb;
2085 	int error, vfslocked;
2086 
2087 	NDINIT(&nd, LOOKUP,
2088 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2089 	    pathseg, path, td);
2090 	if ((error = namei(&nd)) != 0)
2091 		return (error);
2092 	vfslocked = NDHASGIANT(&nd);
2093 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2094 	NDFREE(&nd, NDF_ONLY_PNBUF);
2095 	vput(nd.ni_vp);
2096 	VFS_UNLOCK_GIANT(vfslocked);
2097 	if (mtx_owned(&Giant))
2098 		printf("stat(%d): %s\n", vfslocked, path);
2099 	if (error)
2100 		return (error);
2101 	*sbp = sb;
2102 	return (0);
2103 }
2104 
2105 /*
2106  * Get file status; this version does not follow links.
2107  */
2108 #ifndef _SYS_SYSPROTO_H_
2109 struct lstat_args {
2110 	char	*path;
2111 	struct stat *ub;
2112 };
2113 #endif
2114 int
2115 lstat(td, uap)
2116 	struct thread *td;
2117 	register struct lstat_args /* {
2118 		char *path;
2119 		struct stat *ub;
2120 	} */ *uap;
2121 {
2122 	struct stat sb;
2123 	int error;
2124 
2125 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2126 	if (error == 0)
2127 		error = copyout(&sb, uap->ub, sizeof (sb));
2128 	return (error);
2129 }
2130 
2131 int
2132 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2133 {
2134 	struct vnode *vp;
2135 	struct stat sb;
2136 	struct nameidata nd;
2137 	int error, vfslocked;
2138 
2139 	NDINIT(&nd, LOOKUP,
2140 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2141 	    pathseg, path, td);
2142 	if ((error = namei(&nd)) != 0)
2143 		return (error);
2144 	vfslocked = NDHASGIANT(&nd);
2145 	vp = nd.ni_vp;
2146 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2147 	NDFREE(&nd, NDF_ONLY_PNBUF);
2148 	vput(vp);
2149 	VFS_UNLOCK_GIANT(vfslocked);
2150 	if (error)
2151 		return (error);
2152 	*sbp = sb;
2153 	return (0);
2154 }
2155 
2156 /*
2157  * Implementation of the NetBSD [l]stat() functions.
2158  */
2159 void
2160 cvtnstat(sb, nsb)
2161 	struct stat *sb;
2162 	struct nstat *nsb;
2163 {
2164 	bzero(nsb, sizeof *nsb);
2165 	nsb->st_dev = sb->st_dev;
2166 	nsb->st_ino = sb->st_ino;
2167 	nsb->st_mode = sb->st_mode;
2168 	nsb->st_nlink = sb->st_nlink;
2169 	nsb->st_uid = sb->st_uid;
2170 	nsb->st_gid = sb->st_gid;
2171 	nsb->st_rdev = sb->st_rdev;
2172 	nsb->st_atimespec = sb->st_atimespec;
2173 	nsb->st_mtimespec = sb->st_mtimespec;
2174 	nsb->st_ctimespec = sb->st_ctimespec;
2175 	nsb->st_size = sb->st_size;
2176 	nsb->st_blocks = sb->st_blocks;
2177 	nsb->st_blksize = sb->st_blksize;
2178 	nsb->st_flags = sb->st_flags;
2179 	nsb->st_gen = sb->st_gen;
2180 	nsb->st_birthtimespec = sb->st_birthtimespec;
2181 }
2182 
2183 #ifndef _SYS_SYSPROTO_H_
2184 struct nstat_args {
2185 	char	*path;
2186 	struct nstat *ub;
2187 };
2188 #endif
2189 int
2190 nstat(td, uap)
2191 	struct thread *td;
2192 	register struct nstat_args /* {
2193 		char *path;
2194 		struct nstat *ub;
2195 	} */ *uap;
2196 {
2197 	struct stat sb;
2198 	struct nstat nsb;
2199 	int error;
2200 
2201 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2202 	if (error)
2203 		return (error);
2204 	cvtnstat(&sb, &nsb);
2205 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2206 	return (error);
2207 }
2208 
2209 /*
2210  * NetBSD lstat.  Get file status; this version does not follow links.
2211  */
2212 #ifndef _SYS_SYSPROTO_H_
2213 struct lstat_args {
2214 	char	*path;
2215 	struct stat *ub;
2216 };
2217 #endif
2218 int
2219 nlstat(td, uap)
2220 	struct thread *td;
2221 	register struct nlstat_args /* {
2222 		char *path;
2223 		struct nstat *ub;
2224 	} */ *uap;
2225 {
2226 	struct stat sb;
2227 	struct nstat nsb;
2228 	int error;
2229 
2230 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2231 	if (error)
2232 		return (error);
2233 	cvtnstat(&sb, &nsb);
2234 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2235 	return (error);
2236 }
2237 
2238 /*
2239  * Get configurable pathname variables.
2240  */
2241 #ifndef _SYS_SYSPROTO_H_
2242 struct pathconf_args {
2243 	char	*path;
2244 	int	name;
2245 };
2246 #endif
2247 int
2248 pathconf(td, uap)
2249 	struct thread *td;
2250 	register struct pathconf_args /* {
2251 		char *path;
2252 		int name;
2253 	} */ *uap;
2254 {
2255 
2256 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2257 }
2258 
2259 int
2260 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2261 {
2262 	struct nameidata nd;
2263 	int error, vfslocked;
2264 
2265 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2266 	    pathseg, path, td);
2267 	if ((error = namei(&nd)) != 0)
2268 		return (error);
2269 	vfslocked = NDHASGIANT(&nd);
2270 	NDFREE(&nd, NDF_ONLY_PNBUF);
2271 
2272 	/* If asynchronous I/O is available, it works for all files. */
2273 	if (name == _PC_ASYNC_IO)
2274 		td->td_retval[0] = async_io_version;
2275 	else
2276 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2277 	vput(nd.ni_vp);
2278 	VFS_UNLOCK_GIANT(vfslocked);
2279 	return (error);
2280 }
2281 
2282 /*
2283  * Return target name of a symbolic link.
2284  */
2285 #ifndef _SYS_SYSPROTO_H_
2286 struct readlink_args {
2287 	char	*path;
2288 	char	*buf;
2289 	int	count;
2290 };
2291 #endif
2292 int
2293 readlink(td, uap)
2294 	struct thread *td;
2295 	register struct readlink_args /* {
2296 		char *path;
2297 		char *buf;
2298 		int count;
2299 	} */ *uap;
2300 {
2301 
2302 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2303 	    UIO_USERSPACE, uap->count));
2304 }
2305 
2306 int
2307 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2308     enum uio_seg bufseg, int count)
2309 {
2310 	register struct vnode *vp;
2311 	struct iovec aiov;
2312 	struct uio auio;
2313 	int error;
2314 	struct nameidata nd;
2315 	int vfslocked;
2316 
2317 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2318 	    pathseg, path, td);
2319 	if ((error = namei(&nd)) != 0)
2320 		return (error);
2321 	NDFREE(&nd, NDF_ONLY_PNBUF);
2322 	vfslocked = NDHASGIANT(&nd);
2323 	vp = nd.ni_vp;
2324 #ifdef MAC
2325 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2326 	if (error) {
2327 		vput(vp);
2328 		VFS_UNLOCK_GIANT(vfslocked);
2329 		return (error);
2330 	}
2331 #endif
2332 	if (vp->v_type != VLNK)
2333 		error = EINVAL;
2334 	else {
2335 		aiov.iov_base = buf;
2336 		aiov.iov_len = count;
2337 		auio.uio_iov = &aiov;
2338 		auio.uio_iovcnt = 1;
2339 		auio.uio_offset = 0;
2340 		auio.uio_rw = UIO_READ;
2341 		auio.uio_segflg = bufseg;
2342 		auio.uio_td = td;
2343 		auio.uio_resid = count;
2344 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2345 	}
2346 	vput(vp);
2347 	VFS_UNLOCK_GIANT(vfslocked);
2348 	td->td_retval[0] = count - auio.uio_resid;
2349 	return (error);
2350 }
2351 
2352 /*
2353  * Common implementation code for chflags() and fchflags().
2354  */
2355 static int
2356 setfflags(td, vp, flags)
2357 	struct thread *td;
2358 	struct vnode *vp;
2359 	int flags;
2360 {
2361 	int error;
2362 	struct mount *mp;
2363 	struct vattr vattr;
2364 
2365 	/*
2366 	 * Prevent non-root users from setting flags on devices.  When
2367 	 * a device is reused, users can retain ownership of the device
2368 	 * if they are allowed to set flags and programs assume that
2369 	 * chown can't fail when done as root.
2370 	 */
2371 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2372 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2373 		if (error)
2374 			return (error);
2375 	}
2376 
2377 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2378 		return (error);
2379 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2380 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2381 	VATTR_NULL(&vattr);
2382 	vattr.va_flags = flags;
2383 #ifdef MAC
2384 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2385 	if (error == 0)
2386 #endif
2387 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2388 	VOP_UNLOCK(vp, 0, td);
2389 	vn_finished_write(mp);
2390 	return (error);
2391 }
2392 
2393 /*
2394  * Change flags of a file given a path name.
2395  */
2396 #ifndef _SYS_SYSPROTO_H_
2397 struct chflags_args {
2398 	char	*path;
2399 	int	flags;
2400 };
2401 #endif
2402 int
2403 chflags(td, uap)
2404 	struct thread *td;
2405 	register struct chflags_args /* {
2406 		char *path;
2407 		int flags;
2408 	} */ *uap;
2409 {
2410 	int error;
2411 	struct nameidata nd;
2412 	int vfslocked;
2413 
2414 	AUDIT_ARG(fflags, uap->flags);
2415 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2416 	    uap->path, td);
2417 	if ((error = namei(&nd)) != 0)
2418 		return (error);
2419 	NDFREE(&nd, NDF_ONLY_PNBUF);
2420 	vfslocked = NDHASGIANT(&nd);
2421 	error = setfflags(td, nd.ni_vp, uap->flags);
2422 	vrele(nd.ni_vp);
2423 	VFS_UNLOCK_GIANT(vfslocked);
2424 	return (error);
2425 }
2426 
2427 /*
2428  * Same as chflags() but doesn't follow symlinks.
2429  */
2430 int
2431 lchflags(td, uap)
2432 	struct thread *td;
2433 	register struct lchflags_args /* {
2434 		char *path;
2435 		int flags;
2436 	} */ *uap;
2437 {
2438 	int error;
2439 	struct nameidata nd;
2440 	int vfslocked;
2441 
2442 	AUDIT_ARG(fflags, uap->flags);
2443 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2444 	    uap->path, td);
2445 	if ((error = namei(&nd)) != 0)
2446 		return (error);
2447 	vfslocked = NDHASGIANT(&nd);
2448 	NDFREE(&nd, NDF_ONLY_PNBUF);
2449 	error = setfflags(td, nd.ni_vp, uap->flags);
2450 	vrele(nd.ni_vp);
2451 	VFS_UNLOCK_GIANT(vfslocked);
2452 	return (error);
2453 }
2454 
2455 /*
2456  * Change flags of a file given a file descriptor.
2457  */
2458 #ifndef _SYS_SYSPROTO_H_
2459 struct fchflags_args {
2460 	int	fd;
2461 	int	flags;
2462 };
2463 #endif
2464 int
2465 fchflags(td, uap)
2466 	struct thread *td;
2467 	register struct fchflags_args /* {
2468 		int fd;
2469 		int flags;
2470 	} */ *uap;
2471 {
2472 	struct file *fp;
2473 	int vfslocked;
2474 	int error;
2475 
2476 	AUDIT_ARG(fd, uap->fd);
2477 	AUDIT_ARG(fflags, uap->flags);
2478 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2479 		return (error);
2480 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2481 #ifdef AUDIT
2482 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2483 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2484 	VOP_UNLOCK(fp->f_vnode, 0, td);
2485 #endif
2486 	error = setfflags(td, fp->f_vnode, uap->flags);
2487 	VFS_UNLOCK_GIANT(vfslocked);
2488 	fdrop(fp, td);
2489 	return (error);
2490 }
2491 
2492 /*
2493  * Common implementation code for chmod(), lchmod() and fchmod().
2494  */
2495 static int
2496 setfmode(td, vp, mode)
2497 	struct thread *td;
2498 	struct vnode *vp;
2499 	int mode;
2500 {
2501 	int error;
2502 	struct mount *mp;
2503 	struct vattr vattr;
2504 
2505 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2506 		return (error);
2507 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2508 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2509 	VATTR_NULL(&vattr);
2510 	vattr.va_mode = mode & ALLPERMS;
2511 #ifdef MAC
2512 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2513 	if (error == 0)
2514 #endif
2515 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2516 	VOP_UNLOCK(vp, 0, td);
2517 	vn_finished_write(mp);
2518 	return (error);
2519 }
2520 
2521 /*
2522  * Change mode of a file given path name.
2523  */
2524 #ifndef _SYS_SYSPROTO_H_
2525 struct chmod_args {
2526 	char	*path;
2527 	int	mode;
2528 };
2529 #endif
2530 int
2531 chmod(td, uap)
2532 	struct thread *td;
2533 	register struct chmod_args /* {
2534 		char *path;
2535 		int mode;
2536 	} */ *uap;
2537 {
2538 
2539 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2540 }
2541 
2542 int
2543 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2544 {
2545 	int error;
2546 	struct nameidata nd;
2547 	int vfslocked;
2548 
2549 	AUDIT_ARG(mode, mode);
2550 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2551 	if ((error = namei(&nd)) != 0)
2552 		return (error);
2553 	vfslocked = NDHASGIANT(&nd);
2554 	NDFREE(&nd, NDF_ONLY_PNBUF);
2555 	error = setfmode(td, nd.ni_vp, mode);
2556 	vrele(nd.ni_vp);
2557 	VFS_UNLOCK_GIANT(vfslocked);
2558 	return (error);
2559 }
2560 
2561 /*
2562  * Change mode of a file given path name (don't follow links.)
2563  */
2564 #ifndef _SYS_SYSPROTO_H_
2565 struct lchmod_args {
2566 	char	*path;
2567 	int	mode;
2568 };
2569 #endif
2570 int
2571 lchmod(td, uap)
2572 	struct thread *td;
2573 	register struct lchmod_args /* {
2574 		char *path;
2575 		int mode;
2576 	} */ *uap;
2577 {
2578 	int error;
2579 	struct nameidata nd;
2580 	int vfslocked;
2581 
2582 	AUDIT_ARG(mode, (mode_t)uap->mode);
2583 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2584 	    uap->path, td);
2585 	if ((error = namei(&nd)) != 0)
2586 		return (error);
2587 	vfslocked = NDHASGIANT(&nd);
2588 	NDFREE(&nd, NDF_ONLY_PNBUF);
2589 	error = setfmode(td, nd.ni_vp, uap->mode);
2590 	vrele(nd.ni_vp);
2591 	VFS_UNLOCK_GIANT(vfslocked);
2592 	return (error);
2593 }
2594 
2595 /*
2596  * Change mode of a file given a file descriptor.
2597  */
2598 #ifndef _SYS_SYSPROTO_H_
2599 struct fchmod_args {
2600 	int	fd;
2601 	int	mode;
2602 };
2603 #endif
2604 int
2605 fchmod(td, uap)
2606 	struct thread *td;
2607 	register struct fchmod_args /* {
2608 		int fd;
2609 		int mode;
2610 	} */ *uap;
2611 {
2612 	struct file *fp;
2613 	int vfslocked;
2614 	int error;
2615 
2616 	AUDIT_ARG(fd, uap->fd);
2617 	AUDIT_ARG(mode, uap->mode);
2618 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2619 		return (error);
2620 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2621 #ifdef AUDIT
2622 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2623 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2624 	VOP_UNLOCK(fp->f_vnode, 0, td);
2625 #endif
2626 	error = setfmode(td, fp->f_vnode, uap->mode);
2627 	VFS_UNLOCK_GIANT(vfslocked);
2628 	fdrop(fp, td);
2629 	return (error);
2630 }
2631 
2632 /*
2633  * Common implementation for chown(), lchown(), and fchown()
2634  */
2635 static int
2636 setfown(td, vp, uid, gid)
2637 	struct thread *td;
2638 	struct vnode *vp;
2639 	uid_t uid;
2640 	gid_t gid;
2641 {
2642 	int error;
2643 	struct mount *mp;
2644 	struct vattr vattr;
2645 
2646 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2647 		return (error);
2648 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2649 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2650 	VATTR_NULL(&vattr);
2651 	vattr.va_uid = uid;
2652 	vattr.va_gid = gid;
2653 #ifdef MAC
2654 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2655 	    vattr.va_gid);
2656 	if (error == 0)
2657 #endif
2658 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2659 	VOP_UNLOCK(vp, 0, td);
2660 	vn_finished_write(mp);
2661 	return (error);
2662 }
2663 
2664 /*
2665  * Set ownership given a path name.
2666  */
2667 #ifndef _SYS_SYSPROTO_H_
2668 struct chown_args {
2669 	char	*path;
2670 	int	uid;
2671 	int	gid;
2672 };
2673 #endif
2674 int
2675 chown(td, uap)
2676 	struct thread *td;
2677 	register struct chown_args /* {
2678 		char *path;
2679 		int uid;
2680 		int gid;
2681 	} */ *uap;
2682 {
2683 
2684 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2685 }
2686 
2687 int
2688 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2689     int gid)
2690 {
2691 	int error;
2692 	struct nameidata nd;
2693 	int vfslocked;
2694 
2695 	AUDIT_ARG(owner, uid, gid);
2696 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2697 	if ((error = namei(&nd)) != 0)
2698 		return (error);
2699 	vfslocked = NDHASGIANT(&nd);
2700 	NDFREE(&nd, NDF_ONLY_PNBUF);
2701 	error = setfown(td, nd.ni_vp, uid, gid);
2702 	vrele(nd.ni_vp);
2703 	VFS_UNLOCK_GIANT(vfslocked);
2704 	return (error);
2705 }
2706 
2707 /*
2708  * Set ownership given a path name, do not cross symlinks.
2709  */
2710 #ifndef _SYS_SYSPROTO_H_
2711 struct lchown_args {
2712 	char	*path;
2713 	int	uid;
2714 	int	gid;
2715 };
2716 #endif
2717 int
2718 lchown(td, uap)
2719 	struct thread *td;
2720 	register struct lchown_args /* {
2721 		char *path;
2722 		int uid;
2723 		int gid;
2724 	} */ *uap;
2725 {
2726 
2727 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2728 }
2729 
2730 int
2731 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2732     int gid)
2733 {
2734 	int error;
2735 	struct nameidata nd;
2736 	int vfslocked;
2737 
2738 	AUDIT_ARG(owner, uid, gid);
2739 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2740 	if ((error = namei(&nd)) != 0)
2741 		return (error);
2742 	vfslocked = NDHASGIANT(&nd);
2743 	NDFREE(&nd, NDF_ONLY_PNBUF);
2744 	error = setfown(td, nd.ni_vp, uid, gid);
2745 	vrele(nd.ni_vp);
2746 	VFS_UNLOCK_GIANT(vfslocked);
2747 	return (error);
2748 }
2749 
2750 /*
2751  * Set ownership given a file descriptor.
2752  */
2753 #ifndef _SYS_SYSPROTO_H_
2754 struct fchown_args {
2755 	int	fd;
2756 	int	uid;
2757 	int	gid;
2758 };
2759 #endif
2760 int
2761 fchown(td, uap)
2762 	struct thread *td;
2763 	register struct fchown_args /* {
2764 		int fd;
2765 		int uid;
2766 		int gid;
2767 	} */ *uap;
2768 {
2769 	struct file *fp;
2770 	int vfslocked;
2771 	int error;
2772 
2773 	AUDIT_ARG(fd, uap->fd);
2774 	AUDIT_ARG(owner, uap->uid, uap->gid);
2775 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2776 		return (error);
2777 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2778 #ifdef AUDIT
2779 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2780 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2781 	VOP_UNLOCK(fp->f_vnode, 0, td);
2782 #endif
2783 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2784 	VFS_UNLOCK_GIANT(vfslocked);
2785 	fdrop(fp, td);
2786 	return (error);
2787 }
2788 
2789 /*
2790  * Common implementation code for utimes(), lutimes(), and futimes().
2791  */
2792 static int
2793 getutimes(usrtvp, tvpseg, tsp)
2794 	const struct timeval *usrtvp;
2795 	enum uio_seg tvpseg;
2796 	struct timespec *tsp;
2797 {
2798 	struct timeval tv[2];
2799 	const struct timeval *tvp;
2800 	int error;
2801 
2802 	if (usrtvp == NULL) {
2803 		microtime(&tv[0]);
2804 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2805 		tsp[1] = tsp[0];
2806 	} else {
2807 		if (tvpseg == UIO_SYSSPACE) {
2808 			tvp = usrtvp;
2809 		} else {
2810 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2811 				return (error);
2812 			tvp = tv;
2813 		}
2814 
2815 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2816 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2817 			return (EINVAL);
2818 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2819 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2820 	}
2821 	return (0);
2822 }
2823 
2824 /*
2825  * Common implementation code for utimes(), lutimes(), and futimes().
2826  */
2827 static int
2828 setutimes(td, vp, ts, numtimes, nullflag)
2829 	struct thread *td;
2830 	struct vnode *vp;
2831 	const struct timespec *ts;
2832 	int numtimes;
2833 	int nullflag;
2834 {
2835 	int error, setbirthtime;
2836 	struct mount *mp;
2837 	struct vattr vattr;
2838 
2839 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2840 		return (error);
2841 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2842 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2843 	setbirthtime = 0;
2844 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2845 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2846 		setbirthtime = 1;
2847 	VATTR_NULL(&vattr);
2848 	vattr.va_atime = ts[0];
2849 	vattr.va_mtime = ts[1];
2850 	if (setbirthtime)
2851 		vattr.va_birthtime = ts[1];
2852 	if (numtimes > 2)
2853 		vattr.va_birthtime = ts[2];
2854 	if (nullflag)
2855 		vattr.va_vaflags |= VA_UTIMES_NULL;
2856 #ifdef MAC
2857 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2858 	    vattr.va_mtime);
2859 #endif
2860 	if (error == 0)
2861 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2862 	VOP_UNLOCK(vp, 0, td);
2863 	vn_finished_write(mp);
2864 	return (error);
2865 }
2866 
2867 /*
2868  * Set the access and modification times of a file.
2869  */
2870 #ifndef _SYS_SYSPROTO_H_
2871 struct utimes_args {
2872 	char	*path;
2873 	struct	timeval *tptr;
2874 };
2875 #endif
2876 int
2877 utimes(td, uap)
2878 	struct thread *td;
2879 	register struct utimes_args /* {
2880 		char *path;
2881 		struct timeval *tptr;
2882 	} */ *uap;
2883 {
2884 
2885 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2886 	    UIO_USERSPACE));
2887 }
2888 
2889 int
2890 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2891     struct timeval *tptr, enum uio_seg tptrseg)
2892 {
2893 	struct timespec ts[2];
2894 	int error;
2895 	struct nameidata nd;
2896 	int vfslocked;
2897 
2898 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2899 		return (error);
2900 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2901 	if ((error = namei(&nd)) != 0)
2902 		return (error);
2903 	vfslocked = NDHASGIANT(&nd);
2904 	NDFREE(&nd, NDF_ONLY_PNBUF);
2905 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2906 	vrele(nd.ni_vp);
2907 	VFS_UNLOCK_GIANT(vfslocked);
2908 	return (error);
2909 }
2910 
2911 /*
2912  * Set the access and modification times of a file.
2913  */
2914 #ifndef _SYS_SYSPROTO_H_
2915 struct lutimes_args {
2916 	char	*path;
2917 	struct	timeval *tptr;
2918 };
2919 #endif
2920 int
2921 lutimes(td, uap)
2922 	struct thread *td;
2923 	register struct lutimes_args /* {
2924 		char *path;
2925 		struct timeval *tptr;
2926 	} */ *uap;
2927 {
2928 
2929 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2930 	    UIO_USERSPACE));
2931 }
2932 
2933 int
2934 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2935     struct timeval *tptr, enum uio_seg tptrseg)
2936 {
2937 	struct timespec ts[2];
2938 	int error;
2939 	struct nameidata nd;
2940 	int vfslocked;
2941 
2942 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2943 		return (error);
2944 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2945 	if ((error = namei(&nd)) != 0)
2946 		return (error);
2947 	vfslocked = NDHASGIANT(&nd);
2948 	NDFREE(&nd, NDF_ONLY_PNBUF);
2949 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2950 	vrele(nd.ni_vp);
2951 	VFS_UNLOCK_GIANT(vfslocked);
2952 	return (error);
2953 }
2954 
2955 /*
2956  * Set the access and modification times of a file.
2957  */
2958 #ifndef _SYS_SYSPROTO_H_
2959 struct futimes_args {
2960 	int	fd;
2961 	struct	timeval *tptr;
2962 };
2963 #endif
2964 int
2965 futimes(td, uap)
2966 	struct thread *td;
2967 	register struct futimes_args /* {
2968 		int  fd;
2969 		struct timeval *tptr;
2970 	} */ *uap;
2971 {
2972 
2973 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2974 }
2975 
2976 int
2977 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2978     enum uio_seg tptrseg)
2979 {
2980 	struct timespec ts[2];
2981 	struct file *fp;
2982 	int vfslocked;
2983 	int error;
2984 
2985 	AUDIT_ARG(fd, fd);
2986 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2987 		return (error);
2988 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2989 		return (error);
2990 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2991 #ifdef AUDIT
2992 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2993 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2994 	VOP_UNLOCK(fp->f_vnode, 0, td);
2995 #endif
2996 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2997 	VFS_UNLOCK_GIANT(vfslocked);
2998 	fdrop(fp, td);
2999 	return (error);
3000 }
3001 
3002 /*
3003  * Truncate a file given its path name.
3004  */
3005 #ifndef _SYS_SYSPROTO_H_
3006 struct truncate_args {
3007 	char	*path;
3008 	int	pad;
3009 	off_t	length;
3010 };
3011 #endif
3012 int
3013 truncate(td, uap)
3014 	struct thread *td;
3015 	register struct truncate_args /* {
3016 		char *path;
3017 		int pad;
3018 		off_t length;
3019 	} */ *uap;
3020 {
3021 
3022 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3023 }
3024 
3025 int
3026 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3027 {
3028 	struct mount *mp;
3029 	struct vnode *vp;
3030 	struct vattr vattr;
3031 	int error;
3032 	struct nameidata nd;
3033 	int vfslocked;
3034 
3035 	if (length < 0)
3036 		return(EINVAL);
3037 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3038 	if ((error = namei(&nd)) != 0)
3039 		return (error);
3040 	vfslocked = NDHASGIANT(&nd);
3041 	vp = nd.ni_vp;
3042 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3043 		vrele(vp);
3044 		VFS_UNLOCK_GIANT(vfslocked);
3045 		return (error);
3046 	}
3047 	NDFREE(&nd, NDF_ONLY_PNBUF);
3048 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3049 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3050 	if (vp->v_type == VDIR)
3051 		error = EISDIR;
3052 #ifdef MAC
3053 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3054 	}
3055 #endif
3056 	else if ((error = vn_writechk(vp)) == 0 &&
3057 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3058 		VATTR_NULL(&vattr);
3059 		vattr.va_size = length;
3060 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3061 	}
3062 	vput(vp);
3063 	vn_finished_write(mp);
3064 	VFS_UNLOCK_GIANT(vfslocked);
3065 	return (error);
3066 }
3067 
3068 /*
3069  * Truncate a file given a file descriptor.
3070  */
3071 #ifndef _SYS_SYSPROTO_H_
3072 struct ftruncate_args {
3073 	int	fd;
3074 	int	pad;
3075 	off_t	length;
3076 };
3077 #endif
3078 int
3079 ftruncate(td, uap)
3080 	struct thread *td;
3081 	register struct ftruncate_args /* {
3082 		int fd;
3083 		int pad;
3084 		off_t length;
3085 	} */ *uap;
3086 {
3087 	struct mount *mp;
3088 	struct vattr vattr;
3089 	struct vnode *vp;
3090 	struct file *fp;
3091 	int vfslocked;
3092 	int error;
3093 
3094 	AUDIT_ARG(fd, uap->fd);
3095 	if (uap->length < 0)
3096 		return(EINVAL);
3097 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3098 		return (error);
3099 	if ((fp->f_flag & FWRITE) == 0) {
3100 		fdrop(fp, td);
3101 		return (EINVAL);
3102 	}
3103 	vp = fp->f_vnode;
3104 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3105 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3106 		goto drop;
3107 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3108 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3109 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3110 	if (vp->v_type == VDIR)
3111 		error = EISDIR;
3112 #ifdef MAC
3113 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3114 	    vp))) {
3115 	}
3116 #endif
3117 	else if ((error = vn_writechk(vp)) == 0) {
3118 		VATTR_NULL(&vattr);
3119 		vattr.va_size = uap->length;
3120 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3121 	}
3122 	VOP_UNLOCK(vp, 0, td);
3123 	vn_finished_write(mp);
3124 drop:
3125 	VFS_UNLOCK_GIANT(vfslocked);
3126 	fdrop(fp, td);
3127 	return (error);
3128 }
3129 
3130 #if defined(COMPAT_43)
3131 /*
3132  * Truncate a file given its path name.
3133  */
3134 #ifndef _SYS_SYSPROTO_H_
3135 struct otruncate_args {
3136 	char	*path;
3137 	long	length;
3138 };
3139 #endif
3140 int
3141 otruncate(td, uap)
3142 	struct thread *td;
3143 	register struct otruncate_args /* {
3144 		char *path;
3145 		long length;
3146 	} */ *uap;
3147 {
3148 	struct truncate_args /* {
3149 		char *path;
3150 		int pad;
3151 		off_t length;
3152 	} */ nuap;
3153 
3154 	nuap.path = uap->path;
3155 	nuap.length = uap->length;
3156 	return (truncate(td, &nuap));
3157 }
3158 
3159 /*
3160  * Truncate a file given a file descriptor.
3161  */
3162 #ifndef _SYS_SYSPROTO_H_
3163 struct oftruncate_args {
3164 	int	fd;
3165 	long	length;
3166 };
3167 #endif
3168 int
3169 oftruncate(td, uap)
3170 	struct thread *td;
3171 	register struct oftruncate_args /* {
3172 		int fd;
3173 		long length;
3174 	} */ *uap;
3175 {
3176 	struct ftruncate_args /* {
3177 		int fd;
3178 		int pad;
3179 		off_t length;
3180 	} */ nuap;
3181 
3182 	nuap.fd = uap->fd;
3183 	nuap.length = uap->length;
3184 	return (ftruncate(td, &nuap));
3185 }
3186 #endif /* COMPAT_43 */
3187 
3188 /*
3189  * Sync an open file.
3190  */
3191 #ifndef _SYS_SYSPROTO_H_
3192 struct fsync_args {
3193 	int	fd;
3194 };
3195 #endif
3196 int
3197 fsync(td, uap)
3198 	struct thread *td;
3199 	struct fsync_args /* {
3200 		int fd;
3201 	} */ *uap;
3202 {
3203 	struct vnode *vp;
3204 	struct mount *mp;
3205 	struct file *fp;
3206 	int vfslocked;
3207 	int error;
3208 
3209 	AUDIT_ARG(fd, uap->fd);
3210 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3211 		return (error);
3212 	vp = fp->f_vnode;
3213 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3214 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3215 		goto drop;
3216 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3217 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3218 	if (vp->v_object != NULL) {
3219 		VM_OBJECT_LOCK(vp->v_object);
3220 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3221 		VM_OBJECT_UNLOCK(vp->v_object);
3222 	}
3223 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3224 
3225 	VOP_UNLOCK(vp, 0, td);
3226 	vn_finished_write(mp);
3227 drop:
3228 	VFS_UNLOCK_GIANT(vfslocked);
3229 	fdrop(fp, td);
3230 	return (error);
3231 }
3232 
3233 /*
3234  * Rename files.  Source and destination must either both be directories,
3235  * or both not be directories.  If target is a directory, it must be empty.
3236  */
3237 #ifndef _SYS_SYSPROTO_H_
3238 struct rename_args {
3239 	char	*from;
3240 	char	*to;
3241 };
3242 #endif
3243 int
3244 rename(td, uap)
3245 	struct thread *td;
3246 	register struct rename_args /* {
3247 		char *from;
3248 		char *to;
3249 	} */ *uap;
3250 {
3251 
3252 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3253 }
3254 
3255 int
3256 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3257 {
3258 	struct mount *mp = NULL;
3259 	struct vnode *tvp, *fvp, *tdvp;
3260 	struct nameidata fromnd, tond;
3261 	int tvfslocked;
3262 	int fvfslocked;
3263 	int error;
3264 
3265 	bwillwrite();
3266 #ifdef MAC
3267 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3268 	    AUDITVNODE1, pathseg, from, td);
3269 #else
3270 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3271 	    AUDITVNODE1, pathseg, from, td);
3272 #endif
3273 	if ((error = namei(&fromnd)) != 0)
3274 		return (error);
3275 	fvfslocked = NDHASGIANT(&fromnd);
3276 	tvfslocked = 0;
3277 #ifdef MAC
3278 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3279 	    fromnd.ni_vp, &fromnd.ni_cnd);
3280 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3281 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3282 #endif
3283 	fvp = fromnd.ni_vp;
3284 	if (error == 0)
3285 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3286 	if (error != 0) {
3287 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3288 		vrele(fromnd.ni_dvp);
3289 		vrele(fvp);
3290 		goto out1;
3291 	}
3292 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3293 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3294 	if (fromnd.ni_vp->v_type == VDIR)
3295 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3296 	if ((error = namei(&tond)) != 0) {
3297 		/* Translate error code for rename("dir1", "dir2/."). */
3298 		if (error == EISDIR && fvp->v_type == VDIR)
3299 			error = EINVAL;
3300 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3301 		vrele(fromnd.ni_dvp);
3302 		vrele(fvp);
3303 		vn_finished_write(mp);
3304 		goto out1;
3305 	}
3306 	tvfslocked = NDHASGIANT(&tond);
3307 	tdvp = tond.ni_dvp;
3308 	tvp = tond.ni_vp;
3309 	if (tvp != NULL) {
3310 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3311 			error = ENOTDIR;
3312 			goto out;
3313 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3314 			error = EISDIR;
3315 			goto out;
3316 		}
3317 	}
3318 	if (fvp == tdvp)
3319 		error = EINVAL;
3320 	/*
3321 	 * If the source is the same as the destination (that is, if they
3322 	 * are links to the same vnode), then there is nothing to do.
3323 	 */
3324 	if (fvp == tvp)
3325 		error = -1;
3326 #ifdef MAC
3327 	else
3328 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3329 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3330 #endif
3331 out:
3332 	if (!error) {
3333 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3334 		if (fromnd.ni_dvp != tdvp) {
3335 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3336 		}
3337 		if (tvp) {
3338 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3339 		}
3340 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3341 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3342 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3343 		NDFREE(&tond, NDF_ONLY_PNBUF);
3344 	} else {
3345 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3346 		NDFREE(&tond, NDF_ONLY_PNBUF);
3347 		if (tvp)
3348 			vput(tvp);
3349 		if (tdvp == tvp)
3350 			vrele(tdvp);
3351 		else
3352 			vput(tdvp);
3353 		vrele(fromnd.ni_dvp);
3354 		vrele(fvp);
3355 	}
3356 	vrele(tond.ni_startdir);
3357 	vn_finished_write(mp);
3358 out1:
3359 	if (fromnd.ni_startdir)
3360 		vrele(fromnd.ni_startdir);
3361 	VFS_UNLOCK_GIANT(fvfslocked);
3362 	VFS_UNLOCK_GIANT(tvfslocked);
3363 	if (error == -1)
3364 		return (0);
3365 	return (error);
3366 }
3367 
3368 /*
3369  * Make a directory file.
3370  */
3371 #ifndef _SYS_SYSPROTO_H_
3372 struct mkdir_args {
3373 	char	*path;
3374 	int	mode;
3375 };
3376 #endif
3377 int
3378 mkdir(td, uap)
3379 	struct thread *td;
3380 	register struct mkdir_args /* {
3381 		char *path;
3382 		int mode;
3383 	} */ *uap;
3384 {
3385 
3386 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3387 }
3388 
3389 int
3390 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3391 {
3392 	struct mount *mp;
3393 	struct vnode *vp;
3394 	struct vattr vattr;
3395 	int error;
3396 	struct nameidata nd;
3397 	int vfslocked;
3398 
3399 	AUDIT_ARG(mode, mode);
3400 restart:
3401 	bwillwrite();
3402 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3403 	    segflg, path, td);
3404 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3405 	if ((error = namei(&nd)) != 0)
3406 		return (error);
3407 	vfslocked = NDHASGIANT(&nd);
3408 	vp = nd.ni_vp;
3409 	if (vp != NULL) {
3410 		NDFREE(&nd, NDF_ONLY_PNBUF);
3411 		/*
3412 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3413 		 * the strange behaviour of leaving the vnode unlocked
3414 		 * if the target is the same vnode as the parent.
3415 		 */
3416 		if (vp == nd.ni_dvp)
3417 			vrele(nd.ni_dvp);
3418 		else
3419 			vput(nd.ni_dvp);
3420 		vrele(vp);
3421 		VFS_UNLOCK_GIANT(vfslocked);
3422 		return (EEXIST);
3423 	}
3424 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3425 		NDFREE(&nd, NDF_ONLY_PNBUF);
3426 		vput(nd.ni_dvp);
3427 		VFS_UNLOCK_GIANT(vfslocked);
3428 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3429 			return (error);
3430 		goto restart;
3431 	}
3432 	VATTR_NULL(&vattr);
3433 	vattr.va_type = VDIR;
3434 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3435 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3436 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3437 #ifdef MAC
3438 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3439 	    &vattr);
3440 	if (error)
3441 		goto out;
3442 #endif
3443 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3444 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3445 #ifdef MAC
3446 out:
3447 #endif
3448 	NDFREE(&nd, NDF_ONLY_PNBUF);
3449 	vput(nd.ni_dvp);
3450 	if (!error)
3451 		vput(nd.ni_vp);
3452 	vn_finished_write(mp);
3453 	VFS_UNLOCK_GIANT(vfslocked);
3454 	return (error);
3455 }
3456 
3457 /*
3458  * Remove a directory file.
3459  */
3460 #ifndef _SYS_SYSPROTO_H_
3461 struct rmdir_args {
3462 	char	*path;
3463 };
3464 #endif
3465 int
3466 rmdir(td, uap)
3467 	struct thread *td;
3468 	struct rmdir_args /* {
3469 		char *path;
3470 	} */ *uap;
3471 {
3472 
3473 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3474 }
3475 
3476 int
3477 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3478 {
3479 	struct mount *mp;
3480 	struct vnode *vp;
3481 	int error;
3482 	struct nameidata nd;
3483 	int vfslocked;
3484 
3485 restart:
3486 	bwillwrite();
3487 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3488 	    pathseg, path, td);
3489 	if ((error = namei(&nd)) != 0)
3490 		return (error);
3491 	vfslocked = NDHASGIANT(&nd);
3492 	vp = nd.ni_vp;
3493 	if (vp->v_type != VDIR) {
3494 		error = ENOTDIR;
3495 		goto out;
3496 	}
3497 	/*
3498 	 * No rmdir "." please.
3499 	 */
3500 	if (nd.ni_dvp == vp) {
3501 		error = EINVAL;
3502 		goto out;
3503 	}
3504 	/*
3505 	 * The root of a mounted filesystem cannot be deleted.
3506 	 */
3507 	if (vp->v_vflag & VV_ROOT) {
3508 		error = EBUSY;
3509 		goto out;
3510 	}
3511 #ifdef MAC
3512 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3513 	    &nd.ni_cnd);
3514 	if (error)
3515 		goto out;
3516 #endif
3517 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3518 		NDFREE(&nd, NDF_ONLY_PNBUF);
3519 		vput(vp);
3520 		if (nd.ni_dvp == vp)
3521 			vrele(nd.ni_dvp);
3522 		else
3523 			vput(nd.ni_dvp);
3524 		VFS_UNLOCK_GIANT(vfslocked);
3525 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3526 			return (error);
3527 		goto restart;
3528 	}
3529 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3530 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3531 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3532 	vn_finished_write(mp);
3533 out:
3534 	NDFREE(&nd, NDF_ONLY_PNBUF);
3535 	vput(vp);
3536 	if (nd.ni_dvp == vp)
3537 		vrele(nd.ni_dvp);
3538 	else
3539 		vput(nd.ni_dvp);
3540 	VFS_UNLOCK_GIANT(vfslocked);
3541 	return (error);
3542 }
3543 
3544 #ifdef COMPAT_43
3545 /*
3546  * Read a block of directory entries in a filesystem independent format.
3547  */
3548 #ifndef _SYS_SYSPROTO_H_
3549 struct ogetdirentries_args {
3550 	int	fd;
3551 	char	*buf;
3552 	u_int	count;
3553 	long	*basep;
3554 };
3555 #endif
3556 int
3557 ogetdirentries(td, uap)
3558 	struct thread *td;
3559 	register struct ogetdirentries_args /* {
3560 		int fd;
3561 		char *buf;
3562 		u_int count;
3563 		long *basep;
3564 	} */ *uap;
3565 {
3566 	struct vnode *vp;
3567 	struct file *fp;
3568 	struct uio auio, kuio;
3569 	struct iovec aiov, kiov;
3570 	struct dirent *dp, *edp;
3571 	caddr_t dirbuf;
3572 	int error, eofflag, readcnt, vfslocked;
3573 	long loff;
3574 
3575 	/* XXX arbitrary sanity limit on `count'. */
3576 	if (uap->count > 64 * 1024)
3577 		return (EINVAL);
3578 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3579 		return (error);
3580 	if ((fp->f_flag & FREAD) == 0) {
3581 		fdrop(fp, td);
3582 		return (EBADF);
3583 	}
3584 	vp = fp->f_vnode;
3585 unionread:
3586 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3587 	if (vp->v_type != VDIR) {
3588 		VFS_UNLOCK_GIANT(vfslocked);
3589 		fdrop(fp, td);
3590 		return (EINVAL);
3591 	}
3592 	aiov.iov_base = uap->buf;
3593 	aiov.iov_len = uap->count;
3594 	auio.uio_iov = &aiov;
3595 	auio.uio_iovcnt = 1;
3596 	auio.uio_rw = UIO_READ;
3597 	auio.uio_segflg = UIO_USERSPACE;
3598 	auio.uio_td = td;
3599 	auio.uio_resid = uap->count;
3600 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3601 	loff = auio.uio_offset = fp->f_offset;
3602 #ifdef MAC
3603 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3604 	if (error) {
3605 		VOP_UNLOCK(vp, 0, td);
3606 		VFS_UNLOCK_GIANT(vfslocked);
3607 		fdrop(fp, td);
3608 		return (error);
3609 	}
3610 #endif
3611 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3612 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3613 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3614 			    NULL, NULL);
3615 			fp->f_offset = auio.uio_offset;
3616 		} else
3617 #	endif
3618 	{
3619 		kuio = auio;
3620 		kuio.uio_iov = &kiov;
3621 		kuio.uio_segflg = UIO_SYSSPACE;
3622 		kiov.iov_len = uap->count;
3623 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3624 		kiov.iov_base = dirbuf;
3625 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3626 			    NULL, NULL);
3627 		fp->f_offset = kuio.uio_offset;
3628 		if (error == 0) {
3629 			readcnt = uap->count - kuio.uio_resid;
3630 			edp = (struct dirent *)&dirbuf[readcnt];
3631 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3632 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3633 					/*
3634 					 * The expected low byte of
3635 					 * dp->d_namlen is our dp->d_type.
3636 					 * The high MBZ byte of dp->d_namlen
3637 					 * is our dp->d_namlen.
3638 					 */
3639 					dp->d_type = dp->d_namlen;
3640 					dp->d_namlen = 0;
3641 #				else
3642 					/*
3643 					 * The dp->d_type is the high byte
3644 					 * of the expected dp->d_namlen,
3645 					 * so must be zero'ed.
3646 					 */
3647 					dp->d_type = 0;
3648 #				endif
3649 				if (dp->d_reclen > 0) {
3650 					dp = (struct dirent *)
3651 					    ((char *)dp + dp->d_reclen);
3652 				} else {
3653 					error = EIO;
3654 					break;
3655 				}
3656 			}
3657 			if (dp >= edp)
3658 				error = uiomove(dirbuf, readcnt, &auio);
3659 		}
3660 		FREE(dirbuf, M_TEMP);
3661 	}
3662 	VOP_UNLOCK(vp, 0, td);
3663 	if (error) {
3664 		VFS_UNLOCK_GIANT(vfslocked);
3665 		fdrop(fp, td);
3666 		return (error);
3667 	}
3668 	if (uap->count == auio.uio_resid) {
3669 		if (union_dircheckp) {
3670 			error = union_dircheckp(td, &vp, fp);
3671 			if (error == -1) {
3672 				VFS_UNLOCK_GIANT(vfslocked);
3673 				goto unionread;
3674 			}
3675 			if (error) {
3676 				VFS_UNLOCK_GIANT(vfslocked);
3677 				fdrop(fp, td);
3678 				return (error);
3679 			}
3680 		}
3681 		/*
3682 		 * XXX We could delay dropping the lock above but
3683 		 * union_dircheckp complicates things.
3684 		 */
3685 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3686 		if ((vp->v_vflag & VV_ROOT) &&
3687 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3688 			struct vnode *tvp = vp;
3689 			vp = vp->v_mount->mnt_vnodecovered;
3690 			VREF(vp);
3691 			fp->f_vnode = vp;
3692 			fp->f_data = vp;
3693 			fp->f_offset = 0;
3694 			vput(tvp);
3695 			VFS_UNLOCK_GIANT(vfslocked);
3696 			goto unionread;
3697 		}
3698 		VOP_UNLOCK(vp, 0, td);
3699 	}
3700 	VFS_UNLOCK_GIANT(vfslocked);
3701 	error = copyout(&loff, uap->basep, sizeof(long));
3702 	fdrop(fp, td);
3703 	td->td_retval[0] = uap->count - auio.uio_resid;
3704 	return (error);
3705 }
3706 #endif /* COMPAT_43 */
3707 
3708 /*
3709  * Read a block of directory entries in a filesystem independent format.
3710  */
3711 #ifndef _SYS_SYSPROTO_H_
3712 struct getdirentries_args {
3713 	int	fd;
3714 	char	*buf;
3715 	u_int	count;
3716 	long	*basep;
3717 };
3718 #endif
3719 int
3720 getdirentries(td, uap)
3721 	struct thread *td;
3722 	register struct getdirentries_args /* {
3723 		int fd;
3724 		char *buf;
3725 		u_int count;
3726 		long *basep;
3727 	} */ *uap;
3728 {
3729 	struct vnode *vp;
3730 	struct file *fp;
3731 	struct uio auio;
3732 	struct iovec aiov;
3733 	int vfslocked;
3734 	long loff;
3735 	int error, eofflag;
3736 
3737 	AUDIT_ARG(fd, uap->fd);
3738 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3739 		return (error);
3740 	if ((fp->f_flag & FREAD) == 0) {
3741 		fdrop(fp, td);
3742 		return (EBADF);
3743 	}
3744 	vp = fp->f_vnode;
3745 unionread:
3746 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3747 	if (vp->v_type != VDIR) {
3748 		error = EINVAL;
3749 		goto fail;
3750 	}
3751 	aiov.iov_base = uap->buf;
3752 	aiov.iov_len = uap->count;
3753 	auio.uio_iov = &aiov;
3754 	auio.uio_iovcnt = 1;
3755 	auio.uio_rw = UIO_READ;
3756 	auio.uio_segflg = UIO_USERSPACE;
3757 	auio.uio_td = td;
3758 	auio.uio_resid = uap->count;
3759 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3760 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3761 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3762 	loff = auio.uio_offset = fp->f_offset;
3763 #ifdef MAC
3764 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3765 	if (error == 0)
3766 #endif
3767 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3768 		    NULL);
3769 	fp->f_offset = auio.uio_offset;
3770 	VOP_UNLOCK(vp, 0, td);
3771 	if (error)
3772 		goto fail;
3773 	if (uap->count == auio.uio_resid) {
3774 		if (union_dircheckp) {
3775 			error = union_dircheckp(td, &vp, fp);
3776 			if (error == -1) {
3777 				VFS_UNLOCK_GIANT(vfslocked);
3778 				goto unionread;
3779 			}
3780 			if (error)
3781 				goto fail;
3782 		}
3783 		/*
3784 		 * XXX We could delay dropping the lock above but
3785 		 * union_dircheckp complicates things.
3786 		 */
3787 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3788 		if ((vp->v_vflag & VV_ROOT) &&
3789 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3790 			struct vnode *tvp = vp;
3791 			vp = vp->v_mount->mnt_vnodecovered;
3792 			VREF(vp);
3793 			fp->f_vnode = vp;
3794 			fp->f_data = vp;
3795 			fp->f_offset = 0;
3796 			vput(tvp);
3797 			VFS_UNLOCK_GIANT(vfslocked);
3798 			goto unionread;
3799 		}
3800 		VOP_UNLOCK(vp, 0, td);
3801 	}
3802 	if (uap->basep != NULL) {
3803 		error = copyout(&loff, uap->basep, sizeof(long));
3804 	}
3805 	td->td_retval[0] = uap->count - auio.uio_resid;
3806 fail:
3807 	VFS_UNLOCK_GIANT(vfslocked);
3808 	fdrop(fp, td);
3809 	return (error);
3810 }
3811 #ifndef _SYS_SYSPROTO_H_
3812 struct getdents_args {
3813 	int fd;
3814 	char *buf;
3815 	size_t count;
3816 };
3817 #endif
3818 int
3819 getdents(td, uap)
3820 	struct thread *td;
3821 	register struct getdents_args /* {
3822 		int fd;
3823 		char *buf;
3824 		u_int count;
3825 	} */ *uap;
3826 {
3827 	struct getdirentries_args ap;
3828 	ap.fd = uap->fd;
3829 	ap.buf = uap->buf;
3830 	ap.count = uap->count;
3831 	ap.basep = NULL;
3832 	return (getdirentries(td, &ap));
3833 }
3834 
3835 /*
3836  * Set the mode mask for creation of filesystem nodes.
3837  *
3838  * MP SAFE
3839  */
3840 #ifndef _SYS_SYSPROTO_H_
3841 struct umask_args {
3842 	int	newmask;
3843 };
3844 #endif
3845 int
3846 umask(td, uap)
3847 	struct thread *td;
3848 	struct umask_args /* {
3849 		int newmask;
3850 	} */ *uap;
3851 {
3852 	register struct filedesc *fdp;
3853 
3854 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3855 	fdp = td->td_proc->p_fd;
3856 	td->td_retval[0] = fdp->fd_cmask;
3857 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3858 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3859 	return (0);
3860 }
3861 
3862 /*
3863  * Void all references to file by ripping underlying filesystem
3864  * away from vnode.
3865  */
3866 #ifndef _SYS_SYSPROTO_H_
3867 struct revoke_args {
3868 	char	*path;
3869 };
3870 #endif
3871 int
3872 revoke(td, uap)
3873 	struct thread *td;
3874 	register struct revoke_args /* {
3875 		char *path;
3876 	} */ *uap;
3877 {
3878 	struct vnode *vp;
3879 	struct vattr vattr;
3880 	int error;
3881 	struct nameidata nd;
3882 	int vfslocked;
3883 
3884 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3885 	    UIO_USERSPACE, uap->path, td);
3886 	if ((error = namei(&nd)) != 0)
3887 		return (error);
3888 	vfslocked = NDHASGIANT(&nd);
3889 	vp = nd.ni_vp;
3890 	NDFREE(&nd, NDF_ONLY_PNBUF);
3891 	if (vp->v_type != VCHR) {
3892 		error = EINVAL;
3893 		goto out;
3894 	}
3895 #ifdef MAC
3896 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3897 	if (error)
3898 		goto out;
3899 #endif
3900 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3901 	if (error)
3902 		goto out;
3903 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3904 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3905 		if (error)
3906 			goto out;
3907 	}
3908 	if (vcount(vp) > 1)
3909 		VOP_REVOKE(vp, REVOKEALL);
3910 out:
3911 	vput(vp);
3912 	VFS_UNLOCK_GIANT(vfslocked);
3913 	return (error);
3914 }
3915 
3916 /*
3917  * Convert a user file descriptor to a kernel file entry.
3918  * A reference on the file entry is held upon returning.
3919  */
3920 int
3921 getvnode(fdp, fd, fpp)
3922 	struct filedesc *fdp;
3923 	int fd;
3924 	struct file **fpp;
3925 {
3926 	int error;
3927 	struct file *fp;
3928 
3929 	fp = NULL;
3930 	if (fdp == NULL)
3931 		error = EBADF;
3932 	else {
3933 		FILEDESC_LOCK(fdp);
3934 		if ((u_int)fd >= fdp->fd_nfiles ||
3935 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3936 			error = EBADF;
3937 		else if (fp->f_vnode == NULL) {
3938 			fp = NULL;
3939 			error = EINVAL;
3940 		} else {
3941 			fhold(fp);
3942 			error = 0;
3943 		}
3944 		FILEDESC_UNLOCK(fdp);
3945 	}
3946 	*fpp = fp;
3947 	return (error);
3948 }
3949 
3950 /*
3951  * Get (NFS) file handle
3952  */
3953 #ifndef _SYS_SYSPROTO_H_
3954 struct lgetfh_args {
3955 	char	*fname;
3956 	fhandle_t *fhp;
3957 };
3958 #endif
3959 int
3960 lgetfh(td, uap)
3961 	struct thread *td;
3962 	register struct lgetfh_args *uap;
3963 {
3964 	struct nameidata nd;
3965 	fhandle_t fh;
3966 	register struct vnode *vp;
3967 	int vfslocked;
3968 	int error;
3969 
3970 	error = suser(td);
3971 	if (error)
3972 		return (error);
3973 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3974 	    UIO_USERSPACE, uap->fname, td);
3975 	error = namei(&nd);
3976 	if (error)
3977 		return (error);
3978 	vfslocked = NDHASGIANT(&nd);
3979 	NDFREE(&nd, NDF_ONLY_PNBUF);
3980 	vp = nd.ni_vp;
3981 	bzero(&fh, sizeof(fh));
3982 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3983 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3984 	vput(vp);
3985 	VFS_UNLOCK_GIANT(vfslocked);
3986 	if (error)
3987 		return (error);
3988 	error = copyout(&fh, uap->fhp, sizeof (fh));
3989 	return (error);
3990 }
3991 
3992 #ifndef _SYS_SYSPROTO_H_
3993 struct getfh_args {
3994 	char	*fname;
3995 	fhandle_t *fhp;
3996 };
3997 #endif
3998 int
3999 getfh(td, uap)
4000 	struct thread *td;
4001 	register struct getfh_args *uap;
4002 {
4003 	struct nameidata nd;
4004 	fhandle_t fh;
4005 	register struct vnode *vp;
4006 	int vfslocked;
4007 	int error;
4008 
4009 	error = suser(td);
4010 	if (error)
4011 		return (error);
4012 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4013 	    UIO_USERSPACE, uap->fname, td);
4014 	error = namei(&nd);
4015 	if (error)
4016 		return (error);
4017 	vfslocked = NDHASGIANT(&nd);
4018 	NDFREE(&nd, NDF_ONLY_PNBUF);
4019 	vp = nd.ni_vp;
4020 	bzero(&fh, sizeof(fh));
4021 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4022 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4023 	vput(vp);
4024 	VFS_UNLOCK_GIANT(vfslocked);
4025 	if (error)
4026 		return (error);
4027 	error = copyout(&fh, uap->fhp, sizeof (fh));
4028 	return (error);
4029 }
4030 
4031 /*
4032  * syscall for the rpc.lockd to use to translate a NFS file handle into
4033  * an open descriptor.
4034  *
4035  * warning: do not remove the suser() call or this becomes one giant
4036  * security hole.
4037  *
4038  * MP SAFE
4039  */
4040 #ifndef _SYS_SYSPROTO_H_
4041 struct fhopen_args {
4042 	const struct fhandle *u_fhp;
4043 	int flags;
4044 };
4045 #endif
4046 int
4047 fhopen(td, uap)
4048 	struct thread *td;
4049 	struct fhopen_args /* {
4050 		const struct fhandle *u_fhp;
4051 		int flags;
4052 	} */ *uap;
4053 {
4054 	struct proc *p = td->td_proc;
4055 	struct mount *mp;
4056 	struct vnode *vp;
4057 	struct fhandle fhp;
4058 	struct vattr vat;
4059 	struct vattr *vap = &vat;
4060 	struct flock lf;
4061 	struct file *fp;
4062 	register struct filedesc *fdp = p->p_fd;
4063 	int fmode, mode, error, type;
4064 	struct file *nfp;
4065 	int vfslocked;
4066 	int indx;
4067 
4068 	error = suser(td);
4069 	if (error)
4070 		return (error);
4071 	fmode = FFLAGS(uap->flags);
4072 	/* why not allow a non-read/write open for our lockd? */
4073 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4074 		return (EINVAL);
4075 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4076 	if (error)
4077 		return(error);
4078 	/* find the mount point */
4079 	mp = vfs_getvfs(&fhp.fh_fsid);
4080 	if (mp == NULL)
4081 		return (ESTALE);
4082 	vfslocked = VFS_LOCK_GIANT(mp);
4083 	/* now give me my vnode, it gets returned to me locked */
4084 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4085 	if (error)
4086 		goto out;
4087 	/*
4088 	 * from now on we have to make sure not
4089 	 * to forget about the vnode
4090 	 * any error that causes an abort must vput(vp)
4091 	 * just set error = err and 'goto bad;'.
4092 	 */
4093 
4094 	/*
4095 	 * from vn_open
4096 	 */
4097 	if (vp->v_type == VLNK) {
4098 		error = EMLINK;
4099 		goto bad;
4100 	}
4101 	if (vp->v_type == VSOCK) {
4102 		error = EOPNOTSUPP;
4103 		goto bad;
4104 	}
4105 	mode = 0;
4106 	if (fmode & (FWRITE | O_TRUNC)) {
4107 		if (vp->v_type == VDIR) {
4108 			error = EISDIR;
4109 			goto bad;
4110 		}
4111 		error = vn_writechk(vp);
4112 		if (error)
4113 			goto bad;
4114 		mode |= VWRITE;
4115 	}
4116 	if (fmode & FREAD)
4117 		mode |= VREAD;
4118 	if (fmode & O_APPEND)
4119 		mode |= VAPPEND;
4120 #ifdef MAC
4121 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4122 	if (error)
4123 		goto bad;
4124 #endif
4125 	if (mode) {
4126 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4127 		if (error)
4128 			goto bad;
4129 	}
4130 	if (fmode & O_TRUNC) {
4131 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4132 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4133 			vrele(vp);
4134 			goto out;
4135 		}
4136 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4137 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4138 #ifdef MAC
4139 		/*
4140 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4141 		 * should be right.
4142 		 */
4143 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4144 		if (error == 0) {
4145 #endif
4146 			VATTR_NULL(vap);
4147 			vap->va_size = 0;
4148 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4149 #ifdef MAC
4150 		}
4151 #endif
4152 		vn_finished_write(mp);
4153 		if (error)
4154 			goto bad;
4155 	}
4156 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4157 	if (error)
4158 		goto bad;
4159 
4160 	if (fmode & FWRITE)
4161 		vp->v_writecount++;
4162 
4163 	/*
4164 	 * end of vn_open code
4165 	 */
4166 
4167 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4168 		if (fmode & FWRITE)
4169 			vp->v_writecount--;
4170 		goto bad;
4171 	}
4172 	/* An extra reference on `nfp' has been held for us by falloc(). */
4173 	fp = nfp;
4174 
4175 	nfp->f_vnode = vp;
4176 	nfp->f_data = vp;
4177 	nfp->f_flag = fmode & FMASK;
4178 	nfp->f_ops = &vnops;
4179 	nfp->f_type = DTYPE_VNODE;
4180 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4181 		lf.l_whence = SEEK_SET;
4182 		lf.l_start = 0;
4183 		lf.l_len = 0;
4184 		if (fmode & O_EXLOCK)
4185 			lf.l_type = F_WRLCK;
4186 		else
4187 			lf.l_type = F_RDLCK;
4188 		type = F_FLOCK;
4189 		if ((fmode & FNONBLOCK) == 0)
4190 			type |= F_WAIT;
4191 		VOP_UNLOCK(vp, 0, td);
4192 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4193 			    type)) != 0) {
4194 			/*
4195 			 * The lock request failed.  Normally close the
4196 			 * descriptor but handle the case where someone might
4197 			 * have dup()d or close()d it when we weren't looking.
4198 			 */
4199 			fdclose(fdp, fp, indx, td);
4200 
4201 			/*
4202 			 * release our private reference
4203 			 */
4204 			fdrop(fp, td);
4205 			goto out;
4206 		}
4207 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4208 		fp->f_flag |= FHASLOCK;
4209 	}
4210 
4211 	VOP_UNLOCK(vp, 0, td);
4212 	fdrop(fp, td);
4213 	vfs_rel(mp);
4214 	VFS_UNLOCK_GIANT(vfslocked);
4215 	td->td_retval[0] = indx;
4216 	return (0);
4217 
4218 bad:
4219 	vput(vp);
4220 out:
4221 	vfs_rel(mp);
4222 	VFS_UNLOCK_GIANT(vfslocked);
4223 	return (error);
4224 }
4225 
4226 /*
4227  * Stat an (NFS) file handle.
4228  *
4229  * MP SAFE
4230  */
4231 #ifndef _SYS_SYSPROTO_H_
4232 struct fhstat_args {
4233 	struct fhandle *u_fhp;
4234 	struct stat *sb;
4235 };
4236 #endif
4237 int
4238 fhstat(td, uap)
4239 	struct thread *td;
4240 	register struct fhstat_args /* {
4241 		struct fhandle *u_fhp;
4242 		struct stat *sb;
4243 	} */ *uap;
4244 {
4245 	struct stat sb;
4246 	fhandle_t fh;
4247 	struct mount *mp;
4248 	struct vnode *vp;
4249 	int vfslocked;
4250 	int error;
4251 
4252 	error = suser(td);
4253 	if (error)
4254 		return (error);
4255 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4256 	if (error)
4257 		return (error);
4258 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4259 		return (ESTALE);
4260 	vfslocked = VFS_LOCK_GIANT(mp);
4261 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4262 		vfs_rel(mp);
4263 		VFS_UNLOCK_GIANT(vfslocked);
4264 		return (error);
4265 	}
4266 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4267 	vput(vp);
4268 	vfs_rel(mp);
4269 	VFS_UNLOCK_GIANT(vfslocked);
4270 	if (error)
4271 		return (error);
4272 	error = copyout(&sb, uap->sb, sizeof(sb));
4273 	return (error);
4274 }
4275 
4276 /*
4277  * Implement fstatfs() for (NFS) file handles.
4278  *
4279  * MP SAFE
4280  */
4281 #ifndef _SYS_SYSPROTO_H_
4282 struct fhstatfs_args {
4283 	struct fhandle *u_fhp;
4284 	struct statfs *buf;
4285 };
4286 #endif
4287 int
4288 fhstatfs(td, uap)
4289 	struct thread *td;
4290 	struct fhstatfs_args /* {
4291 		struct fhandle *u_fhp;
4292 		struct statfs *buf;
4293 	} */ *uap;
4294 {
4295 	struct statfs sf;
4296 	fhandle_t fh;
4297 	int error;
4298 
4299 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4300 	if (error)
4301 		return (error);
4302 	error = kern_fhstatfs(td, fh, &sf);
4303 	if (error)
4304 		return (error);
4305 	return (copyout(&sf, uap->buf, sizeof(sf)));
4306 }
4307 
4308 int
4309 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4310 {
4311 	struct statfs *sp;
4312 	struct mount *mp;
4313 	struct vnode *vp;
4314 	int vfslocked;
4315 	int error;
4316 
4317 	error = suser(td);
4318 	if (error)
4319 		return (error);
4320 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4321 		return (ESTALE);
4322 	vfslocked = VFS_LOCK_GIANT(mp);
4323 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4324 	if (error) {
4325 		VFS_UNLOCK_GIANT(vfslocked);
4326 		vfs_rel(mp);
4327 		return (error);
4328 	}
4329 	vput(vp);
4330 	sp = NULL;
4331 	error = prison_canseemount(td->td_ucred, mp);
4332 	if (error)
4333 		goto out;
4334 #ifdef MAC
4335 	error = mac_check_mount_stat(td->td_ucred, mp);
4336 	if (error)
4337 		goto out;
4338 #endif
4339 	/*
4340 	 * Set these in case the underlying filesystem fails to do so.
4341 	 */
4342 	sp = &mp->mnt_stat;
4343 	sp->f_version = STATFS_VERSION;
4344 	sp->f_namemax = NAME_MAX;
4345 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4346 	error = VFS_STATFS(mp, sp, td);
4347 out:
4348 	vfs_rel(mp);
4349 	VFS_UNLOCK_GIANT(vfslocked);
4350 	if (sp)
4351 		*buf = *sp;
4352 	return (error);
4353 }
4354 
4355 /*
4356  * Syscall to push extended attribute configuration information into the
4357  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4358  * a command (int cmd), and attribute name and misc data.  For now, the
4359  * attribute name is left in userspace for consumption by the VFS_op.
4360  * It will probably be changed to be copied into sysspace by the
4361  * syscall in the future, once issues with various consumers of the
4362  * attribute code have raised their hands.
4363  *
4364  * Currently this is used only by UFS Extended Attributes.
4365  */
4366 int
4367 extattrctl(td, uap)
4368 	struct thread *td;
4369 	struct extattrctl_args /* {
4370 		const char *path;
4371 		int cmd;
4372 		const char *filename;
4373 		int attrnamespace;
4374 		const char *attrname;
4375 	} */ *uap;
4376 {
4377 	struct vnode *filename_vp;
4378 	struct nameidata nd;
4379 	struct mount *mp, *mp_writable;
4380 	char attrname[EXTATTR_MAXNAMELEN];
4381 	int vfslocked, fnvfslocked, error;
4382 
4383 	AUDIT_ARG(cmd, uap->cmd);
4384 	AUDIT_ARG(value, uap->attrnamespace);
4385 	/*
4386 	 * uap->attrname is not always defined.  We check again later when we
4387 	 * invoke the VFS call so as to pass in NULL there if needed.
4388 	 */
4389 	if (uap->attrname != NULL) {
4390 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4391 		    NULL);
4392 		if (error)
4393 			return (error);
4394 	}
4395 	AUDIT_ARG(text, attrname);
4396 
4397 	vfslocked = fnvfslocked = 0;
4398 	/*
4399 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4400 	 * which VFS_EXTATTRCTL() will later release.
4401 	 */
4402 	filename_vp = NULL;
4403 	if (uap->filename != NULL) {
4404 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4405 		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4406 		error = namei(&nd);
4407 		if (error)
4408 			return (error);
4409 		fnvfslocked = NDHASGIANT(&nd);
4410 		filename_vp = nd.ni_vp;
4411 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4412 	}
4413 
4414 	/* uap->path is always defined. */
4415 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4416 	    uap->path, td);
4417 	error = namei(&nd);
4418 	if (error) {
4419 		if (filename_vp != NULL)
4420 			vput(filename_vp);
4421 		goto out;
4422 	}
4423 	vfslocked = NDHASGIANT(&nd);
4424 	mp = nd.ni_vp->v_mount;
4425 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4426 	NDFREE(&nd, 0);
4427 	if (error) {
4428 		if (filename_vp != NULL)
4429 			vput(filename_vp);
4430 		goto out;
4431 	}
4432 
4433 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4434 	    uap->attrname != NULL ? attrname : NULL, td);
4435 
4436 	vn_finished_write(mp_writable);
4437 	/*
4438 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4439 	 * filename_vp, so vrele it if it is defined.
4440 	 */
4441 	if (filename_vp != NULL)
4442 		vrele(filename_vp);
4443 out:
4444 	VFS_UNLOCK_GIANT(fnvfslocked);
4445 	VFS_UNLOCK_GIANT(vfslocked);
4446 	return (error);
4447 }
4448 
4449 /*-
4450  * Set a named extended attribute on a file or directory
4451  *
4452  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4453  *            kernelspace string pointer "attrname", userspace buffer
4454  *            pointer "data", buffer length "nbytes", thread "td".
4455  * Returns: 0 on success, an error number otherwise
4456  * Locks: none
4457  * References: vp must be a valid reference for the duration of the call
4458  */
4459 static int
4460 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4461     void *data, size_t nbytes, struct thread *td)
4462 {
4463 	struct mount *mp;
4464 	struct uio auio;
4465 	struct iovec aiov;
4466 	ssize_t cnt;
4467 	int error;
4468 
4469 	VFS_ASSERT_GIANT(vp->v_mount);
4470 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4471 	if (error)
4472 		return (error);
4473 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4474 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4475 
4476 	aiov.iov_base = data;
4477 	aiov.iov_len = nbytes;
4478 	auio.uio_iov = &aiov;
4479 	auio.uio_iovcnt = 1;
4480 	auio.uio_offset = 0;
4481 	if (nbytes > INT_MAX) {
4482 		error = EINVAL;
4483 		goto done;
4484 	}
4485 	auio.uio_resid = nbytes;
4486 	auio.uio_rw = UIO_WRITE;
4487 	auio.uio_segflg = UIO_USERSPACE;
4488 	auio.uio_td = td;
4489 	cnt = nbytes;
4490 
4491 #ifdef MAC
4492 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4493 	    attrname, &auio);
4494 	if (error)
4495 		goto done;
4496 #endif
4497 
4498 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4499 	    td->td_ucred, td);
4500 	cnt -= auio.uio_resid;
4501 	td->td_retval[0] = cnt;
4502 
4503 done:
4504 	VOP_UNLOCK(vp, 0, td);
4505 	vn_finished_write(mp);
4506 	return (error);
4507 }
4508 
4509 int
4510 extattr_set_fd(td, uap)
4511 	struct thread *td;
4512 	struct extattr_set_fd_args /* {
4513 		int fd;
4514 		int attrnamespace;
4515 		const char *attrname;
4516 		void *data;
4517 		size_t nbytes;
4518 	} */ *uap;
4519 {
4520 	struct file *fp;
4521 	char attrname[EXTATTR_MAXNAMELEN];
4522 	int vfslocked, error;
4523 
4524 	AUDIT_ARG(fd, uap->fd);
4525 	AUDIT_ARG(value, uap->attrnamespace);
4526 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4527 	if (error)
4528 		return (error);
4529 	AUDIT_ARG(text, attrname);
4530 
4531 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4532 	if (error)
4533 		return (error);
4534 
4535 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4536 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4537 	    attrname, uap->data, uap->nbytes, td);
4538 	fdrop(fp, td);
4539 	VFS_UNLOCK_GIANT(vfslocked);
4540 
4541 	return (error);
4542 }
4543 
4544 int
4545 extattr_set_file(td, uap)
4546 	struct thread *td;
4547 	struct extattr_set_file_args /* {
4548 		const char *path;
4549 		int attrnamespace;
4550 		const char *attrname;
4551 		void *data;
4552 		size_t nbytes;
4553 	} */ *uap;
4554 {
4555 	struct nameidata nd;
4556 	char attrname[EXTATTR_MAXNAMELEN];
4557 	int vfslocked, error;
4558 
4559 	AUDIT_ARG(value, uap->attrnamespace);
4560 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4561 	if (error)
4562 		return (error);
4563 	AUDIT_ARG(text, attrname);
4564 
4565 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4566 	    uap->path, td);
4567 	error = namei(&nd);
4568 	if (error)
4569 		return (error);
4570 	NDFREE(&nd, NDF_ONLY_PNBUF);
4571 
4572 	vfslocked = NDHASGIANT(&nd);
4573 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4574 	    uap->data, uap->nbytes, td);
4575 
4576 	vrele(nd.ni_vp);
4577 	VFS_UNLOCK_GIANT(vfslocked);
4578 	return (error);
4579 }
4580 
4581 int
4582 extattr_set_link(td, uap)
4583 	struct thread *td;
4584 	struct extattr_set_link_args /* {
4585 		const char *path;
4586 		int attrnamespace;
4587 		const char *attrname;
4588 		void *data;
4589 		size_t nbytes;
4590 	} */ *uap;
4591 {
4592 	struct nameidata nd;
4593 	char attrname[EXTATTR_MAXNAMELEN];
4594 	int vfslocked, error;
4595 
4596 	AUDIT_ARG(value, uap->attrnamespace);
4597 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4598 	if (error)
4599 		return (error);
4600 	AUDIT_ARG(text, attrname);
4601 
4602 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4603 	    uap->path, td);
4604 	error = namei(&nd);
4605 	if (error)
4606 		return (error);
4607 	NDFREE(&nd, NDF_ONLY_PNBUF);
4608 
4609 	vfslocked = NDHASGIANT(&nd);
4610 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4611 	    uap->data, uap->nbytes, td);
4612 
4613 	vrele(nd.ni_vp);
4614 	VFS_UNLOCK_GIANT(vfslocked);
4615 	return (error);
4616 }
4617 
4618 /*-
4619  * Get a named extended attribute on a file or directory
4620  *
4621  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4622  *            kernelspace string pointer "attrname", userspace buffer
4623  *            pointer "data", buffer length "nbytes", thread "td".
4624  * Returns: 0 on success, an error number otherwise
4625  * Locks: none
4626  * References: vp must be a valid reference for the duration of the call
4627  */
4628 static int
4629 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4630     void *data, size_t nbytes, struct thread *td)
4631 {
4632 	struct uio auio, *auiop;
4633 	struct iovec aiov;
4634 	ssize_t cnt;
4635 	size_t size, *sizep;
4636 	int error;
4637 
4638 	VFS_ASSERT_GIANT(vp->v_mount);
4639 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4640 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4641 
4642 	/*
4643 	 * Slightly unusual semantics: if the user provides a NULL data
4644 	 * pointer, they don't want to receive the data, just the
4645 	 * maximum read length.
4646 	 */
4647 	auiop = NULL;
4648 	sizep = NULL;
4649 	cnt = 0;
4650 	if (data != NULL) {
4651 		aiov.iov_base = data;
4652 		aiov.iov_len = nbytes;
4653 		auio.uio_iov = &aiov;
4654 		auio.uio_iovcnt = 1;
4655 		auio.uio_offset = 0;
4656 		if (nbytes > INT_MAX) {
4657 			error = EINVAL;
4658 			goto done;
4659 		}
4660 		auio.uio_resid = nbytes;
4661 		auio.uio_rw = UIO_READ;
4662 		auio.uio_segflg = UIO_USERSPACE;
4663 		auio.uio_td = td;
4664 		auiop = &auio;
4665 		cnt = nbytes;
4666 	} else
4667 		sizep = &size;
4668 
4669 #ifdef MAC
4670 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4671 	    attrname, &auio);
4672 	if (error)
4673 		goto done;
4674 #endif
4675 
4676 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4677 	    td->td_ucred, td);
4678 
4679 	if (auiop != NULL) {
4680 		cnt -= auio.uio_resid;
4681 		td->td_retval[0] = cnt;
4682 	} else
4683 		td->td_retval[0] = size;
4684 
4685 done:
4686 	VOP_UNLOCK(vp, 0, td);
4687 	return (error);
4688 }
4689 
4690 int
4691 extattr_get_fd(td, uap)
4692 	struct thread *td;
4693 	struct extattr_get_fd_args /* {
4694 		int fd;
4695 		int attrnamespace;
4696 		const char *attrname;
4697 		void *data;
4698 		size_t nbytes;
4699 	} */ *uap;
4700 {
4701 	struct file *fp;
4702 	char attrname[EXTATTR_MAXNAMELEN];
4703 	int vfslocked, error;
4704 
4705 	AUDIT_ARG(fd, uap->fd);
4706 	AUDIT_ARG(value, uap->attrnamespace);
4707 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4708 	if (error)
4709 		return (error);
4710 	AUDIT_ARG(text, attrname);
4711 
4712 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4713 	if (error)
4714 		return (error);
4715 
4716 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4717 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4718 	    attrname, uap->data, uap->nbytes, td);
4719 
4720 	fdrop(fp, td);
4721 	VFS_UNLOCK_GIANT(vfslocked);
4722 	return (error);
4723 }
4724 
4725 int
4726 extattr_get_file(td, uap)
4727 	struct thread *td;
4728 	struct extattr_get_file_args /* {
4729 		const char *path;
4730 		int attrnamespace;
4731 		const char *attrname;
4732 		void *data;
4733 		size_t nbytes;
4734 	} */ *uap;
4735 {
4736 	struct nameidata nd;
4737 	char attrname[EXTATTR_MAXNAMELEN];
4738 	int vfslocked, error;
4739 
4740 	AUDIT_ARG(value, uap->attrnamespace);
4741 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4742 	if (error)
4743 		return (error);
4744 	AUDIT_ARG(text, attrname);
4745 
4746 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4747 	    uap->path, td);
4748 	error = namei(&nd);
4749 	if (error)
4750 		return (error);
4751 	NDFREE(&nd, NDF_ONLY_PNBUF);
4752 
4753 	vfslocked = NDHASGIANT(&nd);
4754 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4755 	    uap->data, uap->nbytes, td);
4756 
4757 	vrele(nd.ni_vp);
4758 	VFS_UNLOCK_GIANT(vfslocked);
4759 	return (error);
4760 }
4761 
4762 int
4763 extattr_get_link(td, uap)
4764 	struct thread *td;
4765 	struct extattr_get_link_args /* {
4766 		const char *path;
4767 		int attrnamespace;
4768 		const char *attrname;
4769 		void *data;
4770 		size_t nbytes;
4771 	} */ *uap;
4772 {
4773 	struct nameidata nd;
4774 	char attrname[EXTATTR_MAXNAMELEN];
4775 	int vfslocked, error;
4776 
4777 	AUDIT_ARG(value, uap->attrnamespace);
4778 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4779 	if (error)
4780 		return (error);
4781 	AUDIT_ARG(text, attrname);
4782 
4783 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4784 	    uap->path, td);
4785 	error = namei(&nd);
4786 	if (error)
4787 		return (error);
4788 	NDFREE(&nd, NDF_ONLY_PNBUF);
4789 
4790 	vfslocked = NDHASGIANT(&nd);
4791 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4792 	    uap->data, uap->nbytes, td);
4793 
4794 	vrele(nd.ni_vp);
4795 	VFS_UNLOCK_GIANT(vfslocked);
4796 	return (error);
4797 }
4798 
4799 /*
4800  * extattr_delete_vp(): Delete a named extended attribute on a file or
4801  *                      directory
4802  *
4803  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4804  *            kernelspace string pointer "attrname", proc "p"
4805  * Returns: 0 on success, an error number otherwise
4806  * Locks: none
4807  * References: vp must be a valid reference for the duration of the call
4808  */
4809 static int
4810 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4811     struct thread *td)
4812 {
4813 	struct mount *mp;
4814 	int error;
4815 
4816 	VFS_ASSERT_GIANT(vp->v_mount);
4817 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4818 	if (error)
4819 		return (error);
4820 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4821 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4822 
4823 #ifdef MAC
4824 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4825 	    attrname);
4826 	if (error)
4827 		goto done;
4828 #endif
4829 
4830 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4831 	    td);
4832 	if (error == EOPNOTSUPP)
4833 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4834 		    td->td_ucred, td);
4835 #ifdef MAC
4836 done:
4837 #endif
4838 	VOP_UNLOCK(vp, 0, td);
4839 	vn_finished_write(mp);
4840 	return (error);
4841 }
4842 
4843 int
4844 extattr_delete_fd(td, uap)
4845 	struct thread *td;
4846 	struct extattr_delete_fd_args /* {
4847 		int fd;
4848 		int attrnamespace;
4849 		const char *attrname;
4850 	} */ *uap;
4851 {
4852 	struct file *fp;
4853 	char attrname[EXTATTR_MAXNAMELEN];
4854 	int vfslocked, error;
4855 
4856 	AUDIT_ARG(fd, uap->fd);
4857 	AUDIT_ARG(value, uap->attrnamespace);
4858 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4859 	if (error)
4860 		return (error);
4861 	AUDIT_ARG(text, attrname);
4862 
4863 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4864 	if (error)
4865 		return (error);
4866 
4867 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4868 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4869 	    attrname, td);
4870 	fdrop(fp, td);
4871 	VFS_UNLOCK_GIANT(vfslocked);
4872 	return (error);
4873 }
4874 
4875 int
4876 extattr_delete_file(td, uap)
4877 	struct thread *td;
4878 	struct extattr_delete_file_args /* {
4879 		const char *path;
4880 		int attrnamespace;
4881 		const char *attrname;
4882 	} */ *uap;
4883 {
4884 	struct nameidata nd;
4885 	char attrname[EXTATTR_MAXNAMELEN];
4886 	int vfslocked, error;
4887 
4888 	AUDIT_ARG(value, uap->attrnamespace);
4889 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4890 	if (error)
4891 		return(error);
4892 	AUDIT_ARG(text, attrname);
4893 
4894 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4895 	    uap->path, td);
4896 	error = namei(&nd);
4897 	if (error)
4898 		return(error);
4899 	NDFREE(&nd, NDF_ONLY_PNBUF);
4900 
4901 	vfslocked = NDHASGIANT(&nd);
4902 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4903 	vrele(nd.ni_vp);
4904 	VFS_UNLOCK_GIANT(vfslocked);
4905 	return(error);
4906 }
4907 
4908 int
4909 extattr_delete_link(td, uap)
4910 	struct thread *td;
4911 	struct extattr_delete_link_args /* {
4912 		const char *path;
4913 		int attrnamespace;
4914 		const char *attrname;
4915 	} */ *uap;
4916 {
4917 	struct nameidata nd;
4918 	char attrname[EXTATTR_MAXNAMELEN];
4919 	int vfslocked, error;
4920 
4921 	AUDIT_ARG(value, uap->attrnamespace);
4922 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4923 	if (error)
4924 		return(error);
4925 	AUDIT_ARG(text, attrname);
4926 
4927 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4928 	    uap->path, td);
4929 	error = namei(&nd);
4930 	if (error)
4931 		return(error);
4932 	NDFREE(&nd, NDF_ONLY_PNBUF);
4933 
4934 	vfslocked = NDHASGIANT(&nd);
4935 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4936 	vrele(nd.ni_vp);
4937 	VFS_UNLOCK_GIANT(vfslocked);
4938 	return(error);
4939 }
4940 
4941 /*-
4942  * Retrieve a list of extended attributes on a file or directory.
4943  *
4944  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4945  *            userspace buffer pointer "data", buffer length "nbytes",
4946  *            thread "td".
4947  * Returns: 0 on success, an error number otherwise
4948  * Locks: none
4949  * References: vp must be a valid reference for the duration of the call
4950  */
4951 static int
4952 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4953     size_t nbytes, struct thread *td)
4954 {
4955 	struct uio auio, *auiop;
4956 	size_t size, *sizep;
4957 	struct iovec aiov;
4958 	ssize_t cnt;
4959 	int error;
4960 
4961 	VFS_ASSERT_GIANT(vp->v_mount);
4962 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4963 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4964 
4965 	auiop = NULL;
4966 	sizep = NULL;
4967 	cnt = 0;
4968 	if (data != NULL) {
4969 		aiov.iov_base = data;
4970 		aiov.iov_len = nbytes;
4971 		auio.uio_iov = &aiov;
4972 		auio.uio_iovcnt = 1;
4973 		auio.uio_offset = 0;
4974 		if (nbytes > INT_MAX) {
4975 			error = EINVAL;
4976 			goto done;
4977 		}
4978 		auio.uio_resid = nbytes;
4979 		auio.uio_rw = UIO_READ;
4980 		auio.uio_segflg = UIO_USERSPACE;
4981 		auio.uio_td = td;
4982 		auiop = &auio;
4983 		cnt = nbytes;
4984 	} else
4985 		sizep = &size;
4986 
4987 #ifdef MAC
4988 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4989 	if (error)
4990 		goto done;
4991 #endif
4992 
4993 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4994 	    td->td_ucred, td);
4995 
4996 	if (auiop != NULL) {
4997 		cnt -= auio.uio_resid;
4998 		td->td_retval[0] = cnt;
4999 	} else
5000 		td->td_retval[0] = size;
5001 
5002 done:
5003 	VOP_UNLOCK(vp, 0, td);
5004 	return (error);
5005 }
5006 
5007 
5008 int
5009 extattr_list_fd(td, uap)
5010 	struct thread *td;
5011 	struct extattr_list_fd_args /* {
5012 		int fd;
5013 		int attrnamespace;
5014 		void *data;
5015 		size_t nbytes;
5016 	} */ *uap;
5017 {
5018 	struct file *fp;
5019 	int vfslocked, error;
5020 
5021 	AUDIT_ARG(fd, uap->fd);
5022 	AUDIT_ARG(value, uap->attrnamespace);
5023 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
5024 	if (error)
5025 		return (error);
5026 
5027 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
5028 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
5029 	    uap->nbytes, td);
5030 
5031 	fdrop(fp, td);
5032 	VFS_UNLOCK_GIANT(vfslocked);
5033 	return (error);
5034 }
5035 
5036 int
5037 extattr_list_file(td, uap)
5038 	struct thread*td;
5039 	struct extattr_list_file_args /* {
5040 		const char *path;
5041 		int attrnamespace;
5042 		void *data;
5043 		size_t nbytes;
5044 	} */ *uap;
5045 {
5046 	struct nameidata nd;
5047 	int vfslocked, error;
5048 
5049 	AUDIT_ARG(value, uap->attrnamespace);
5050 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5051 	    uap->path, td);
5052 	error = namei(&nd);
5053 	if (error)
5054 		return (error);
5055 	NDFREE(&nd, NDF_ONLY_PNBUF);
5056 
5057 	vfslocked = NDHASGIANT(&nd);
5058 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5059 	    uap->nbytes, td);
5060 
5061 	vrele(nd.ni_vp);
5062 	VFS_UNLOCK_GIANT(vfslocked);
5063 	return (error);
5064 }
5065 
5066 int
5067 extattr_list_link(td, uap)
5068 	struct thread*td;
5069 	struct extattr_list_link_args /* {
5070 		const char *path;
5071 		int attrnamespace;
5072 		void *data;
5073 		size_t nbytes;
5074 	} */ *uap;
5075 {
5076 	struct nameidata nd;
5077 	int vfslocked, error;
5078 
5079 	AUDIT_ARG(value, uap->attrnamespace);
5080 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5081 	    uap->path, td);
5082 	error = namei(&nd);
5083 	if (error)
5084 		return (error);
5085 	NDFREE(&nd, NDF_ONLY_PNBUF);
5086 
5087 	vfslocked = NDHASGIANT(&nd);
5088 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5089 	    uap->nbytes, td);
5090 
5091 	vrele(nd.ni_vp);
5092 	VFS_UNLOCK_GIANT(vfslocked);
5093 	return (error);
5094 }
5095