xref: /freebsd/sys/kern/vfs_vnops.c (revision df7f5d4de4592a8948a25ce01e5bddfbb7ce39dc)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_vnops.c	8.2 (Berkeley) 1/21/94
39  * $Id: vfs_vnops.c,v 1.31 1997/03/07 07:42:41 gpalmer Exp $
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/stat.h>
47 #include <sys/buf.h>
48 #include <sys/proc.h>
49 #include <sys/mount.h>
50 #include <sys/namei.h>
51 #include <sys/vnode.h>
52 #include <sys/ioctl.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_object.h>
57 #include <vm/vnode_pager.h>
58 
59 static int vn_closefile __P((struct file *fp, struct proc *p));
60 static int vn_ioctl __P((struct file *fp, int com, caddr_t data,
61 		struct proc *p));
62 static int vn_read __P((struct file *fp, struct uio *uio,
63 		struct ucred *cred));
64 static int vn_select __P((struct file *fp, int which, struct proc *p));
65 static int vn_write __P((struct file *fp, struct uio *uio,
66 		struct ucred *cred));
67 
68 struct 	fileops vnops =
69 	{ vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
70 
71 /*
72  * Common code for vnode open operations.
73  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
74  */
75 int
76 vn_open(ndp, fmode, cmode)
77 	register struct nameidata *ndp;
78 	int fmode, cmode;
79 {
80 	register struct vnode *vp;
81 	register struct proc *p = ndp->ni_cnd.cn_proc;
82 	register struct ucred *cred = p->p_ucred;
83 	struct vattr vat;
84 	struct vattr *vap = &vat;
85 	int error;
86 
87 	if (fmode & O_CREAT) {
88 		ndp->ni_cnd.cn_nameiop = CREATE;
89 		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
90 		if ((fmode & O_EXCL) == 0)
91 			ndp->ni_cnd.cn_flags |= FOLLOW;
92 		error = namei(ndp);
93 		if (error)
94 			return (error);
95 		if (ndp->ni_vp == NULL) {
96 			VATTR_NULL(vap);
97 			vap->va_type = VREG;
98 			vap->va_mode = cmode;
99 			if (fmode & O_EXCL)
100 				vap->va_vaflags |= VA_EXCLUSIVE;
101 			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
102 			if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
103 			    &ndp->ni_cnd, vap))
104 				return (error);
105 			fmode &= ~O_TRUNC;
106 			vp = ndp->ni_vp;
107 		} else {
108 			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
109 			if (ndp->ni_dvp == ndp->ni_vp)
110 				vrele(ndp->ni_dvp);
111 			else
112 				vput(ndp->ni_dvp);
113 			ndp->ni_dvp = NULL;
114 			vp = ndp->ni_vp;
115 			if (fmode & O_EXCL) {
116 				error = EEXIST;
117 				goto bad;
118 			}
119 			fmode &= ~O_CREAT;
120 		}
121 	} else {
122 		ndp->ni_cnd.cn_nameiop = LOOKUP;
123 		ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
124 		error = namei(ndp);
125 		if (error)
126 			return (error);
127 		vp = ndp->ni_vp;
128 	}
129 	if (vp->v_type == VSOCK) {
130 		error = EOPNOTSUPP;
131 		goto bad;
132 	}
133 	if ((fmode & O_CREAT) == 0) {
134 		if (fmode & FREAD) {
135 			error = VOP_ACCESS(vp, VREAD, cred, p);
136 			if (error)
137 				goto bad;
138 		}
139 		if (fmode & (FWRITE | O_TRUNC)) {
140 			if (vp->v_type == VDIR) {
141 				error = EISDIR;
142 				goto bad;
143 			}
144 			error = vn_writechk(vp);
145 			if (error)
146 				goto bad;
147 		        error = VOP_ACCESS(vp, VWRITE, cred, p);
148 			if (error)
149 				goto bad;
150 		}
151 	}
152 	if (fmode & O_TRUNC) {
153 		VOP_UNLOCK(vp, 0, p);				/* XXX */
154 		VOP_LEASE(vp, p, cred, LEASE_WRITE);
155 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
156 		VATTR_NULL(vap);
157 		vap->va_size = 0;
158 		error = VOP_SETATTR(vp, vap, cred, p);
159 		if (error)
160 			goto bad;
161 	}
162 	error = VOP_OPEN(vp, fmode, cred, p);
163 	if (error)
164 		goto bad;
165 	/*
166 	 * Make sure that a VM object is created for VMIO support.
167 	 */
168 	if (vp->v_type == VREG) {
169 		if ((error = vfs_object_create(vp, p, cred, 1)) != 0)
170 			goto bad;
171 	}
172 
173 	if (fmode & FWRITE)
174 		vp->v_writecount++;
175 	return (0);
176 bad:
177 	vput(vp);
178 	return (error);
179 }
180 
181 /*
182  * Check for write permissions on the specified vnode.
183  * Prototype text segments cannot be written.
184  */
185 int
186 vn_writechk(vp)
187 	register struct vnode *vp;
188 {
189 
190 	/*
191 	 * If there's shared text associated with
192 	 * the vnode, try to free it up once.  If
193 	 * we fail, we can't allow writing.
194 	 */
195 	if (vp->v_flag & VTEXT)
196 		return (ETXTBSY);
197 	return (0);
198 }
199 
200 /*
201  * Vnode close call
202  */
203 int
204 vn_close(vp, flags, cred, p)
205 	register struct vnode *vp;
206 	int flags;
207 	struct ucred *cred;
208 	struct proc *p;
209 {
210 	int error;
211 
212 	if (flags & FWRITE)
213 		vp->v_writecount--;
214 	error = VOP_CLOSE(vp, flags, cred, p);
215 	vrele(vp);
216 	return (error);
217 }
218 
219 /*
220  * Package up an I/O request on a vnode into a uio and do it.
221  */
222 int
223 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
224 	enum uio_rw rw;
225 	struct vnode *vp;
226 	caddr_t base;
227 	int len;
228 	off_t offset;
229 	enum uio_seg segflg;
230 	int ioflg;
231 	struct ucred *cred;
232 	int *aresid;
233 	struct proc *p;
234 {
235 	struct uio auio;
236 	struct iovec aiov;
237 	int error;
238 
239 	if ((ioflg & IO_NODELOCKED) == 0)
240 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
241 	auio.uio_iov = &aiov;
242 	auio.uio_iovcnt = 1;
243 	aiov.iov_base = base;
244 	aiov.iov_len = len;
245 	auio.uio_resid = len;
246 	auio.uio_offset = offset;
247 	auio.uio_segflg = segflg;
248 	auio.uio_rw = rw;
249 	auio.uio_procp = p;
250 	if (rw == UIO_READ) {
251 		error = VOP_READ(vp, &auio, ioflg, cred);
252 	} else {
253 		error = VOP_WRITE(vp, &auio, ioflg, cred);
254 	}
255 	if (aresid)
256 		*aresid = auio.uio_resid;
257 	else
258 		if (auio.uio_resid && error == 0)
259 			error = EIO;
260 	if ((ioflg & IO_NODELOCKED) == 0)
261 		VOP_UNLOCK(vp, 0, p);
262 	return (error);
263 }
264 
265 /*
266  * File table vnode read routine.
267  */
268 static int
269 vn_read(fp, uio, cred)
270 	struct file *fp;
271 	struct uio *uio;
272 	struct ucred *cred;
273 {
274 	struct vnode *vp = (struct vnode *)fp->f_data;
275 	struct proc *p = uio->uio_procp;
276 	int count, error;
277 	int flag, seq;
278 
279 	VOP_LEASE(vp, p, cred, LEASE_READ);
280 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
281 	uio->uio_offset = fp->f_offset;
282 	count = uio->uio_resid;
283 	flag = 0;
284 	if (fp->f_flag & FNONBLOCK)
285 		flag |= IO_NDELAY;
286 
287 	/*
288 	 * Sequential read heuristic.
289 	 * If we have been doing sequential input,
290 	 * a rewind operation doesn't turn off
291 	 * sequential input mode.
292 	 */
293 	if (((fp->f_offset == 0) && (fp->f_seqcount > 0)) ||
294 		(fp->f_offset == fp->f_nextread)) {
295 		int tmpseq = fp->f_seqcount;
296 		/*
297 		 * XXX we assume that the filesystem block size is
298 		 * the default.  Not true, but still gives us a pretty
299 		 * good indicator of how sequential the read operations
300 		 * are.
301 		 */
302 		tmpseq += ((count + BKVASIZE - 1) / BKVASIZE);
303 		if (tmpseq >= CHAR_MAX)
304 			tmpseq = CHAR_MAX;
305 		fp->f_seqcount = tmpseq;
306 		flag |= (fp->f_seqcount << 16);
307 	} else {
308 		if (fp->f_seqcount > 1)
309 			fp->f_seqcount = 1;
310 		else
311 			fp->f_seqcount = 0;
312 	}
313 
314 	error = VOP_READ(vp, uio, flag, cred);
315 	fp->f_offset += count - uio->uio_resid;
316 	fp->f_nextread = fp->f_offset;
317 	VOP_UNLOCK(vp, 0, p);
318 	return (error);
319 }
320 
321 /*
322  * File table vnode write routine.
323  */
324 static int
325 vn_write(fp, uio, cred)
326 	struct file *fp;
327 	struct uio *uio;
328 	struct ucred *cred;
329 {
330 	struct vnode *vp = (struct vnode *)fp->f_data;
331 	struct proc *p = uio->uio_procp;
332 	int count, error, ioflag = IO_UNIT;
333 
334 	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
335 		ioflag |= IO_APPEND;
336 	if (fp->f_flag & FNONBLOCK)
337 		ioflag |= IO_NDELAY;
338 	if ((fp->f_flag & O_FSYNC) ||
339 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
340 		ioflag |= IO_SYNC;
341 	VOP_LEASE(vp, p, cred, LEASE_WRITE);
342 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
343 	uio->uio_offset = fp->f_offset;
344 	count = uio->uio_resid;
345 	error = VOP_WRITE(vp, uio, ioflag, cred);
346 	if (ioflag & IO_APPEND)
347 		fp->f_offset = uio->uio_offset;
348 	else
349 		fp->f_offset += count - uio->uio_resid;
350 	VOP_UNLOCK(vp, 0, p);
351 	return (error);
352 }
353 
354 /*
355  * File table vnode stat routine.
356  */
357 int
358 vn_stat(vp, sb, p)
359 	struct vnode *vp;
360 	register struct stat *sb;
361 	struct proc *p;
362 {
363 	struct vattr vattr;
364 	register struct vattr *vap;
365 	int error;
366 	u_short mode;
367 
368 	vap = &vattr;
369 	error = VOP_GETATTR(vp, vap, p->p_ucred, p);
370 	if (error)
371 		return (error);
372 	/*
373 	 * Copy from vattr table
374 	 */
375 	sb->st_dev = vap->va_fsid;
376 	sb->st_ino = vap->va_fileid;
377 	mode = vap->va_mode;
378 	switch (vp->v_type) {
379 	case VREG:
380 		mode |= S_IFREG;
381 		break;
382 	case VDIR:
383 		mode |= S_IFDIR;
384 		break;
385 	case VBLK:
386 		mode |= S_IFBLK;
387 		break;
388 	case VCHR:
389 		mode |= S_IFCHR;
390 		break;
391 	case VLNK:
392 		mode |= S_IFLNK;
393 		break;
394 	case VSOCK:
395 		mode |= S_IFSOCK;
396 		break;
397 	case VFIFO:
398 		mode |= S_IFIFO;
399 		break;
400 	default:
401 		return (EBADF);
402 	};
403 	sb->st_mode = mode;
404 	sb->st_nlink = vap->va_nlink;
405 	sb->st_uid = vap->va_uid;
406 	sb->st_gid = vap->va_gid;
407 	sb->st_rdev = vap->va_rdev;
408 	sb->st_size = vap->va_size;
409 	sb->st_atimespec = vap->va_atime;
410 	sb->st_mtimespec = vap->va_mtime;
411 	sb->st_ctimespec = vap->va_ctime;
412 	sb->st_blksize = vap->va_blocksize;
413 	sb->st_flags = vap->va_flags;
414 	if (p->p_ucred->cr_uid != 0)
415 		sb->st_gen = 0;
416 	else
417 		sb->st_gen = vap->va_gen;
418 
419 #if (S_BLKSIZE == 512)
420 	/* Optimize this case */
421 	sb->st_blocks = vap->va_bytes >> 9;
422 #else
423 	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
424 #endif
425 	return (0);
426 }
427 
428 /*
429  * File table vnode ioctl routine.
430  */
431 static int
432 vn_ioctl(fp, com, data, p)
433 	struct file *fp;
434 	int com;
435 	caddr_t data;
436 	struct proc *p;
437 {
438 	register struct vnode *vp = ((struct vnode *)fp->f_data);
439 	struct vattr vattr;
440 	int error;
441 
442 	switch (vp->v_type) {
443 
444 	case VREG:
445 	case VDIR:
446 		if (com == FIONREAD) {
447 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
448 			if (error)
449 				return (error);
450 			*(int *)data = vattr.va_size - fp->f_offset;
451 			return (0);
452 		}
453 		if (com == FIONBIO || com == FIOASYNC)	/* XXX */
454 			return (0);			/* XXX */
455 		/* fall into ... */
456 
457 	default:
458 		return (ENOTTY);
459 
460 	case VFIFO:
461 	case VCHR:
462 	case VBLK:
463 		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
464 		if (error == 0 && com == TIOCSCTTY) {
465 
466 			/* Do nothing if reassigning same control tty */
467 			if (p->p_session->s_ttyvp == vp)
468 				return (0);
469 
470 			/* Get rid of reference to old control tty */
471 			if (p->p_session->s_ttyvp)
472 				vrele(p->p_session->s_ttyvp);
473 
474 			p->p_session->s_ttyvp = vp;
475 			VREF(vp);
476 		}
477 		return (error);
478 	}
479 }
480 
481 /*
482  * File table vnode select routine.
483  */
484 static int
485 vn_select(fp, which, p)
486 	struct file *fp;
487 	int which;
488 	struct proc *p;
489 {
490 
491 	return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
492 		fp->f_cred, p));
493 }
494 
495 /*
496  * File table vnode close routine.
497  */
498 static int
499 vn_closefile(fp, p)
500 	struct file *fp;
501 	struct proc *p;
502 {
503 
504 	return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
505 		fp->f_cred, p));
506 }
507 
508 /*
509  * Check that the vnode is still valid, and if so
510  * acquire requested lock.
511  */
512 int
513 vn_lock(vp, flags, p)
514 	struct vnode *vp;
515 	int flags;
516 	struct proc *p;
517 {
518 	int error;
519 
520 	do {
521 		if ((flags & LK_INTERLOCK) == 0) {
522 			simple_lock(&vp->v_interlock);
523 		}
524 		if (vp->v_flag & VXLOCK) {
525 			vp->v_flag |= VXWANT;
526 			simple_unlock(&vp->v_interlock);
527 			tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
528 			error = ENOENT;
529 		} else {
530 			error = VOP_LOCK(vp, flags | LK_INTERLOCK, p);
531 			if (error == 0)
532 				return (error);
533 		}
534 		flags &= ~LK_INTERLOCK;
535 	} while (flags & LK_RETRY);
536 	return (error);
537 }
538