xref: /freebsd/sys/kern/vfs_vnops.c (revision 0de89efe5c443f213c7ea28773ef2dc6cf3af2ed)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_vnops.c	8.2 (Berkeley) 1/21/94
39  * $Id: vfs_vnops.c,v 1.37 1997/09/02 20:06:04 bde Exp $
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/fcntl.h>
45 #include <sys/file.h>
46 #include <sys/stat.h>
47 #include <sys/proc.h>
48 #include <sys/mount.h>
49 #include <sys/namei.h>
50 #include <sys/vnode.h>
51 #include <sys/filio.h>
52 #include <sys/ttycom.h>
53 
54 static int vn_closefile __P((struct file *fp, struct proc *p));
55 static int vn_ioctl __P((struct file *fp, int com, caddr_t data,
56 		struct proc *p));
57 static int vn_read __P((struct file *fp, struct uio *uio,
58 		struct ucred *cred));
59 static int vn_poll __P((struct file *fp, int events, struct ucred *cred,
60 		struct proc *p));
61 static int vn_write __P((struct file *fp, struct uio *uio,
62 		struct ucred *cred));
63 
64 struct 	fileops vnops =
65 	{ vn_read, vn_write, vn_ioctl, vn_poll, vn_closefile };
66 
67 /*
68  * Common code for vnode open operations.
69  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
70  */
71 int
72 vn_open(ndp, fmode, cmode)
73 	register struct nameidata *ndp;
74 	int fmode, cmode;
75 {
76 	register struct vnode *vp;
77 	register struct proc *p = ndp->ni_cnd.cn_proc;
78 	register struct ucred *cred = p->p_ucred;
79 	struct vattr vat;
80 	struct vattr *vap = &vat;
81 	int error;
82 
83 	if (fmode & O_CREAT) {
84 		ndp->ni_cnd.cn_nameiop = CREATE;
85 		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
86 		if ((fmode & O_EXCL) == 0)
87 			ndp->ni_cnd.cn_flags |= FOLLOW;
88 		error = namei(ndp);
89 		if (error)
90 			return (error);
91 		if (ndp->ni_vp == NULL) {
92 			VATTR_NULL(vap);
93 			vap->va_type = VREG;
94 			vap->va_mode = cmode;
95 			if (fmode & O_EXCL)
96 				vap->va_vaflags |= VA_EXCLUSIVE;
97 			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
98 			if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
99 			    &ndp->ni_cnd, vap))
100 				return (error);
101 			ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create");
102 			ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
103 			fmode &= ~O_TRUNC;
104 			vp = ndp->ni_vp;
105 		} else {
106 			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
107 			if (ndp->ni_dvp == ndp->ni_vp)
108 				vrele(ndp->ni_dvp);
109 			else
110 				vput(ndp->ni_dvp);
111 			ndp->ni_dvp = NULL;
112 			vp = ndp->ni_vp;
113 			if (fmode & O_EXCL) {
114 				error = EEXIST;
115 				goto bad;
116 			}
117 			fmode &= ~O_CREAT;
118 		}
119 	} else {
120 		ndp->ni_cnd.cn_nameiop = LOOKUP;
121 		ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
122 		error = namei(ndp);
123 		if (error)
124 			return (error);
125 		vp = ndp->ni_vp;
126 	}
127 	if (vp->v_type == VSOCK) {
128 		error = EOPNOTSUPP;
129 		goto bad;
130 	}
131 	if ((fmode & O_CREAT) == 0) {
132 		if (fmode & FREAD) {
133 			error = VOP_ACCESS(vp, VREAD, cred, p);
134 			if (error)
135 				goto bad;
136 		}
137 		if (fmode & (FWRITE | O_TRUNC)) {
138 			if (vp->v_type == VDIR) {
139 				error = EISDIR;
140 				goto bad;
141 			}
142 			error = vn_writechk(vp);
143 			if (error)
144 				goto bad;
145 		        error = VOP_ACCESS(vp, VWRITE, cred, p);
146 			if (error)
147 				goto bad;
148 		}
149 	}
150 	if (fmode & O_TRUNC) {
151 		VOP_UNLOCK(vp, 0, p);				/* XXX */
152 		VOP_LEASE(vp, p, cred, LEASE_WRITE);
153 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
154 		VATTR_NULL(vap);
155 		vap->va_size = 0;
156 		error = VOP_SETATTR(vp, vap, cred, p);
157 		if (error)
158 			goto bad;
159 	}
160 	error = VOP_OPEN(vp, fmode, cred, p);
161 	if (error)
162 		goto bad;
163 	/*
164 	 * Make sure that a VM object is created for VMIO support.
165 	 */
166 	if (vp->v_type == VREG) {
167 		if ((error = vfs_object_create(vp, p, cred, 1)) != 0)
168 			goto bad;
169 	}
170 
171 	if (fmode & FWRITE)
172 		vp->v_writecount++;
173 	return (0);
174 bad:
175 	vput(vp);
176 	return (error);
177 }
178 
179 /*
180  * Check for write permissions on the specified vnode.
181  * Prototype text segments cannot be written.
182  */
183 int
184 vn_writechk(vp)
185 	register struct vnode *vp;
186 {
187 
188 	/*
189 	 * If there's shared text associated with
190 	 * the vnode, try to free it up once.  If
191 	 * we fail, we can't allow writing.
192 	 */
193 	if (vp->v_flag & VTEXT)
194 		return (ETXTBSY);
195 	return (0);
196 }
197 
198 /*
199  * Vnode close call
200  */
201 int
202 vn_close(vp, flags, cred, p)
203 	register struct vnode *vp;
204 	int flags;
205 	struct ucred *cred;
206 	struct proc *p;
207 {
208 	int error;
209 
210 	if (flags & FWRITE)
211 		vp->v_writecount--;
212 	error = VOP_CLOSE(vp, flags, cred, p);
213 	vrele(vp);
214 	return (error);
215 }
216 
217 /*
218  * Package up an I/O request on a vnode into a uio and do it.
219  */
220 int
221 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
222 	enum uio_rw rw;
223 	struct vnode *vp;
224 	caddr_t base;
225 	int len;
226 	off_t offset;
227 	enum uio_seg segflg;
228 	int ioflg;
229 	struct ucred *cred;
230 	int *aresid;
231 	struct proc *p;
232 {
233 	struct uio auio;
234 	struct iovec aiov;
235 	int error;
236 
237 	if ((ioflg & IO_NODELOCKED) == 0)
238 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
239 	auio.uio_iov = &aiov;
240 	auio.uio_iovcnt = 1;
241 	aiov.iov_base = base;
242 	aiov.iov_len = len;
243 	auio.uio_resid = len;
244 	auio.uio_offset = offset;
245 	auio.uio_segflg = segflg;
246 	auio.uio_rw = rw;
247 	auio.uio_procp = p;
248 	if (rw == UIO_READ) {
249 		error = VOP_READ(vp, &auio, ioflg, cred);
250 	} else {
251 		error = VOP_WRITE(vp, &auio, ioflg, cred);
252 	}
253 	if (aresid)
254 		*aresid = auio.uio_resid;
255 	else
256 		if (auio.uio_resid && error == 0)
257 			error = EIO;
258 	if ((ioflg & IO_NODELOCKED) == 0)
259 		VOP_UNLOCK(vp, 0, p);
260 	return (error);
261 }
262 
263 /*
264  * File table vnode read routine.
265  */
266 static int
267 vn_read(fp, uio, cred)
268 	struct file *fp;
269 	struct uio *uio;
270 	struct ucred *cred;
271 {
272 	struct vnode *vp = (struct vnode *)fp->f_data;
273 	struct proc *p = uio->uio_procp;
274 	int count, error;
275 	int flag, seq;
276 
277 	VOP_LEASE(vp, p, cred, LEASE_READ);
278 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
279 	uio->uio_offset = fp->f_offset;
280 	count = uio->uio_resid;
281 	flag = 0;
282 	if (fp->f_flag & FNONBLOCK)
283 		flag |= IO_NDELAY;
284 
285 	/*
286 	 * Sequential read heuristic.
287 	 * If we have been doing sequential input,
288 	 * a rewind operation doesn't turn off
289 	 * sequential input mode.
290 	 */
291 	if (((fp->f_offset == 0) && (fp->f_seqcount > 0)) ||
292 		(fp->f_offset == fp->f_nextread)) {
293 		int tmpseq = fp->f_seqcount;
294 		/*
295 		 * XXX we assume that the filesystem block size is
296 		 * the default.  Not true, but still gives us a pretty
297 		 * good indicator of how sequential the read operations
298 		 * are.
299 		 */
300 		tmpseq += ((count + BKVASIZE - 1) / BKVASIZE);
301 		if (tmpseq >= CHAR_MAX)
302 			tmpseq = CHAR_MAX;
303 		fp->f_seqcount = tmpseq;
304 		flag |= (fp->f_seqcount << 16);
305 	} else {
306 		if (fp->f_seqcount > 1)
307 			fp->f_seqcount = 1;
308 		else
309 			fp->f_seqcount = 0;
310 	}
311 
312 	error = VOP_READ(vp, uio, flag, cred);
313 	fp->f_offset += count - uio->uio_resid;
314 	fp->f_nextread = fp->f_offset;
315 	VOP_UNLOCK(vp, 0, p);
316 	return (error);
317 }
318 
319 /*
320  * File table vnode write routine.
321  */
322 static int
323 vn_write(fp, uio, cred)
324 	struct file *fp;
325 	struct uio *uio;
326 	struct ucred *cred;
327 {
328 	struct vnode *vp = (struct vnode *)fp->f_data;
329 	struct proc *p = uio->uio_procp;
330 	int count, error, ioflag = IO_UNIT;
331 
332 	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
333 		ioflag |= IO_APPEND;
334 	if (fp->f_flag & FNONBLOCK)
335 		ioflag |= IO_NDELAY;
336 	if ((fp->f_flag & O_FSYNC) ||
337 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
338 		ioflag |= IO_SYNC;
339 	VOP_LEASE(vp, p, cred, LEASE_WRITE);
340 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
341 	uio->uio_offset = fp->f_offset;
342 	count = uio->uio_resid;
343 	error = VOP_WRITE(vp, uio, ioflag, cred);
344 	if (ioflag & IO_APPEND)
345 		fp->f_offset = uio->uio_offset;
346 	else
347 		fp->f_offset += count - uio->uio_resid;
348 	VOP_UNLOCK(vp, 0, p);
349 	return (error);
350 }
351 
352 /*
353  * File table vnode stat routine.
354  */
355 int
356 vn_stat(vp, sb, p)
357 	struct vnode *vp;
358 	register struct stat *sb;
359 	struct proc *p;
360 {
361 	struct vattr vattr;
362 	register struct vattr *vap;
363 	int error;
364 	u_short mode;
365 
366 	vap = &vattr;
367 	error = VOP_GETATTR(vp, vap, p->p_ucred, p);
368 	if (error)
369 		return (error);
370 	/*
371 	 * Copy from vattr table
372 	 */
373 	sb->st_dev = vap->va_fsid;
374 	sb->st_ino = vap->va_fileid;
375 	mode = vap->va_mode;
376 	switch (vp->v_type) {
377 	case VREG:
378 		mode |= S_IFREG;
379 		break;
380 	case VDIR:
381 		mode |= S_IFDIR;
382 		break;
383 	case VBLK:
384 		mode |= S_IFBLK;
385 		break;
386 	case VCHR:
387 		mode |= S_IFCHR;
388 		break;
389 	case VLNK:
390 		mode |= S_IFLNK;
391 		break;
392 	case VSOCK:
393 		mode |= S_IFSOCK;
394 		break;
395 	case VFIFO:
396 		mode |= S_IFIFO;
397 		break;
398 	default:
399 		return (EBADF);
400 	};
401 	sb->st_mode = mode;
402 	sb->st_nlink = vap->va_nlink;
403 	sb->st_uid = vap->va_uid;
404 	sb->st_gid = vap->va_gid;
405 	sb->st_rdev = vap->va_rdev;
406 	sb->st_size = vap->va_size;
407 	sb->st_atimespec = vap->va_atime;
408 	sb->st_mtimespec = vap->va_mtime;
409 	sb->st_ctimespec = vap->va_ctime;
410 	sb->st_blksize = vap->va_blocksize;
411 	sb->st_flags = vap->va_flags;
412 	if (p->p_ucred->cr_uid != 0)
413 		sb->st_gen = 0;
414 	else
415 		sb->st_gen = vap->va_gen;
416 
417 #if (S_BLKSIZE == 512)
418 	/* Optimize this case */
419 	sb->st_blocks = vap->va_bytes >> 9;
420 #else
421 	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
422 #endif
423 	return (0);
424 }
425 
426 /*
427  * File table vnode ioctl routine.
428  */
429 static int
430 vn_ioctl(fp, com, data, p)
431 	struct file *fp;
432 	int com;
433 	caddr_t data;
434 	struct proc *p;
435 {
436 	register struct vnode *vp = ((struct vnode *)fp->f_data);
437 	struct vattr vattr;
438 	int error;
439 
440 	switch (vp->v_type) {
441 
442 	case VREG:
443 	case VDIR:
444 		if (com == FIONREAD) {
445 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
446 			if (error)
447 				return (error);
448 			*(int *)data = vattr.va_size - fp->f_offset;
449 			return (0);
450 		}
451 		if (com == FIONBIO || com == FIOASYNC)	/* XXX */
452 			return (0);			/* XXX */
453 		/* fall into ... */
454 
455 	default:
456 		return (ENOTTY);
457 
458 	case VFIFO:
459 	case VCHR:
460 	case VBLK:
461 		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
462 		if (error == 0 && com == TIOCSCTTY) {
463 
464 			/* Do nothing if reassigning same control tty */
465 			if (p->p_session->s_ttyvp == vp)
466 				return (0);
467 
468 			/* Get rid of reference to old control tty */
469 			if (p->p_session->s_ttyvp)
470 				vrele(p->p_session->s_ttyvp);
471 
472 			p->p_session->s_ttyvp = vp;
473 			VREF(vp);
474 		}
475 		return (error);
476 	}
477 }
478 
479 /*
480  * File table vnode poll routine.
481  */
482 static int
483 vn_poll(fp, events, cred, p)
484 	struct file *fp;
485 	int events;
486 	struct ucred *cred;
487 	struct proc *p;
488 {
489 
490 	return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, p));
491 }
492 
493 /*
494  * File table vnode close routine.
495  */
496 static int
497 vn_closefile(fp, p)
498 	struct file *fp;
499 	struct proc *p;
500 {
501 
502 	return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
503 		fp->f_cred, p));
504 }
505 
506 /*
507  * Check that the vnode is still valid, and if so
508  * acquire requested lock.
509  */
510 int
511 vn_lock(vp, flags, p)
512 	struct vnode *vp;
513 	int flags;
514 	struct proc *p;
515 {
516 	int error;
517 
518 	do {
519 		if ((flags & LK_INTERLOCK) == 0) {
520 			simple_lock(&vp->v_interlock);
521 		}
522 		if (vp->v_flag & VXLOCK) {
523 			vp->v_flag |= VXWANT;
524 			simple_unlock(&vp->v_interlock);
525 			tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
526 			error = ENOENT;
527 		} else {
528 			error = VOP_LOCK(vp, flags | LK_INTERLOCK, p);
529 			if (error == 0)
530 				return (error);
531 		}
532 		flags &= ~LK_INTERLOCK;
533 	} while (flags & LK_RETRY);
534 	return (error);
535 }
536