xref: /freebsd/sys/kern/vfs_aio.c (revision ee877a356c36e8d790874582cc5c4d9417009e8c)
1ee877a35SJohn Dyson 
2ee877a35SJohn Dyson /*
3ee877a35SJohn Dyson  * Copyright (c) 1997 John S. Dyson.  All rights reserved.
4ee877a35SJohn Dyson  *
5ee877a35SJohn Dyson  * Redistribution and use in source and binary forms, with or without
6ee877a35SJohn Dyson  * modification, are permitted provided that the following conditions
7ee877a35SJohn Dyson  * are met:
8ee877a35SJohn Dyson  * 1. Redistributions of source code must retain the above copyright
9ee877a35SJohn Dyson  *    notice, this list of conditions and the following disclaimer.
10ee877a35SJohn Dyson  * 2. John S. Dyson's name may not be used to endorse or promote products
11ee877a35SJohn Dyson  *    derived from this software without specific prior written permission.
12ee877a35SJohn Dyson  *
13ee877a35SJohn Dyson  * DISCLAIMER:  This code isn't warranted to do anything useful.  Anything
14ee877a35SJohn Dyson  * bad that happens because of using this software isn't the responsibility
15ee877a35SJohn Dyson  * of the author.  This software is distributed AS-IS.
16ee877a35SJohn Dyson  *
17ee877a35SJohn Dyson  * $Id$
18ee877a35SJohn Dyson  */
19ee877a35SJohn Dyson 
20ee877a35SJohn Dyson /*
21ee877a35SJohn Dyson  * This file contains support for the POSIX.4 AIO facility.
22ee877a35SJohn Dyson  *
23ee877a35SJohn Dyson  * The initial version provides only the (bogus) synchronous semantics
24ee877a35SJohn Dyson  * but will support async in the future.  Note that a bit
25ee877a35SJohn Dyson  * in a private field allows the user mode subroutine to adapt
26ee877a35SJohn Dyson  * the kernel operations to true POSIX.4 for future compatibility.
27ee877a35SJohn Dyson  *
28ee877a35SJohn Dyson  * This code is used to support true POSIX.4 AIO/LIO with the help
29ee877a35SJohn Dyson  * of a user mode subroutine package.  Note that eventually more support
30ee877a35SJohn Dyson  * will be pushed into the kernel.
31ee877a35SJohn Dyson  */
32ee877a35SJohn Dyson 
33ee877a35SJohn Dyson #include <sys/param.h>
34ee877a35SJohn Dyson #include <sys/systm.h>
35ee877a35SJohn Dyson #include <sys/sysent.h>
36ee877a35SJohn Dyson #include <sys/sysproto.h>
37ee877a35SJohn Dyson #include <sys/namei.h>
38ee877a35SJohn Dyson #include <sys/filedesc.h>
39ee877a35SJohn Dyson #include <sys/kernel.h>
40ee877a35SJohn Dyson #include <sys/fcntl.h>
41ee877a35SJohn Dyson #include <sys/file.h>
42ee877a35SJohn Dyson #include <sys/stat.h>
43ee877a35SJohn Dyson #include <sys/unistd.h>
44ee877a35SJohn Dyson #include <sys/vnode.h>
45ee877a35SJohn Dyson #include <sys/mount.h>
46ee877a35SJohn Dyson #include <sys/proc.h>
47ee877a35SJohn Dyson #include <sys/uio.h>
48ee877a35SJohn Dyson #include <sys/malloc.h>
49ee877a35SJohn Dyson #include <sys/dirent.h>
50ee877a35SJohn Dyson #include <sys/signalvar.h>
51ee877a35SJohn Dyson 
52ee877a35SJohn Dyson #include <vm/vm.h>
53ee877a35SJohn Dyson #include <vm/vm_param.h>
54ee877a35SJohn Dyson #include <vm/vm_object.h>
55ee877a35SJohn Dyson #include <vm/vm_extern.h>
56ee877a35SJohn Dyson #include <sys/sysctl.h>
57ee877a35SJohn Dyson #include <sys/aio.h>
58ee877a35SJohn Dyson 
59ee877a35SJohn Dyson 
60ee877a35SJohn Dyson /*
61ee877a35SJohn Dyson  * aio_cancel at the kernel level is a NOOP right now.  It
62ee877a35SJohn Dyson  * might be possible to support it partially in user mode, or
63ee877a35SJohn Dyson  * in kernel mode later on.
64ee877a35SJohn Dyson  */
65ee877a35SJohn Dyson int
66ee877a35SJohn Dyson aio_cancel(struct proc *p, struct aio_cancel_args *uap, int *retval) {
67ee877a35SJohn Dyson 	return AIO_NOTCANCELLED;
68ee877a35SJohn Dyson }
69ee877a35SJohn Dyson 
70ee877a35SJohn Dyson 
71ee877a35SJohn Dyson /*
72ee877a35SJohn Dyson  * aio_error is implemented in the kernel level for compatibility
73ee877a35SJohn Dyson  * purposes only.  For a user mode async implementation, it would be
74ee877a35SJohn Dyson  * best to do it in a userland subroutine.
75ee877a35SJohn Dyson  */
76ee877a35SJohn Dyson int
77ee877a35SJohn Dyson aio_error(struct proc *p, struct aio_error_args *uap, int *retval) {
78ee877a35SJohn Dyson 	int activeflag, errorcode;
79ee877a35SJohn Dyson 	struct aiocb iocb;
80ee877a35SJohn Dyson 	int error;
81ee877a35SJohn Dyson 
82ee877a35SJohn Dyson 	/*
83ee877a35SJohn Dyson 	 * Get control block
84ee877a35SJohn Dyson 	 */
85ee877a35SJohn Dyson 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
86ee877a35SJohn Dyson 		return error;
87ee877a35SJohn Dyson 	if (iocb._aiocb_private.active == -1)
88ee877a35SJohn Dyson 		return EFAULT;
89ee877a35SJohn Dyson 
90ee877a35SJohn Dyson 	if (iocb._aiocb_private.active != AIO_PMODE_ACTIVE) {
91ee877a35SJohn Dyson 		retval[0] = EINVAL;
92ee877a35SJohn Dyson 		return(0);
93ee877a35SJohn Dyson 	}
94ee877a35SJohn Dyson 
95ee877a35SJohn Dyson 	retval[0] = iocb._aiocb_private.error;
96ee877a35SJohn Dyson 	return(0);
97ee877a35SJohn Dyson }
98ee877a35SJohn Dyson 
99ee877a35SJohn Dyson int
100ee877a35SJohn Dyson aio_read(struct proc *p, struct aio_read_args *uap, int *retval) {
101ee877a35SJohn Dyson 	struct filedesc *fdp;
102ee877a35SJohn Dyson 	struct file *fp;
103ee877a35SJohn Dyson 	struct uio auio;
104ee877a35SJohn Dyson 	struct iovec aiov;
105ee877a35SJohn Dyson 	unsigned int fd;
106ee877a35SJohn Dyson 	int cnt;
107ee877a35SJohn Dyson 	struct aiocb iocb;
108ee877a35SJohn Dyson 	int error;
109ee877a35SJohn Dyson 
110ee877a35SJohn Dyson 
111ee877a35SJohn Dyson 	/*
112ee877a35SJohn Dyson 	 * Get control block
113ee877a35SJohn Dyson 	 */
114ee877a35SJohn Dyson 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
115ee877a35SJohn Dyson 		return error;
116ee877a35SJohn Dyson 
117ee877a35SJohn Dyson 	/*
118ee877a35SJohn Dyson 	 * We support sync only for now.
119ee877a35SJohn Dyson 	 */
120ee877a35SJohn Dyson 	if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0)
121ee877a35SJohn Dyson 		return ENOSYS;
122ee877a35SJohn Dyson 
123ee877a35SJohn Dyson 	/*
124ee877a35SJohn Dyson 	 * Get the fd info for process
125ee877a35SJohn Dyson 	 */
126ee877a35SJohn Dyson 	fdp = p->p_fd;
127ee877a35SJohn Dyson 
128ee877a35SJohn Dyson 	/*
129ee877a35SJohn Dyson 	 * Range check file descriptor
130ee877a35SJohn Dyson 	 */
131ee877a35SJohn Dyson 	fd = iocb.aio_fildes;
132ee877a35SJohn Dyson 	if (fd >= fdp->fd_nfiles)
133ee877a35SJohn Dyson 		return EBADF;
134ee877a35SJohn Dyson 	fp = fdp->fd_ofiles[fd];
135ee877a35SJohn Dyson 	if ((fp == NULL) || ((fp->f_flag & FREAD) == 0))
136ee877a35SJohn Dyson 		return EBADF;
137ee877a35SJohn Dyson 	if (((int) iocb.aio_offset) == -1)
138ee877a35SJohn Dyson 		return EINVAL;
139ee877a35SJohn Dyson 
140ee877a35SJohn Dyson 	aiov.iov_base = iocb.aio_buf;
141ee877a35SJohn Dyson 	aiov.iov_len = iocb.aio_nbytes;
142ee877a35SJohn Dyson 	auio.uio_iov = &aiov;
143ee877a35SJohn Dyson 	auio.uio_iovcnt = 1;
144ee877a35SJohn Dyson 	auio.uio_offset = iocb.aio_offset;
145ee877a35SJohn Dyson 
146ee877a35SJohn Dyson 	auio.uio_resid = iocb.aio_nbytes;
147ee877a35SJohn Dyson 	if (auio.uio_resid < 0)
148ee877a35SJohn Dyson 		return (EINVAL);
149ee877a35SJohn Dyson 
150ee877a35SJohn Dyson 	auio.uio_rw = UIO_READ;
151ee877a35SJohn Dyson 	auio.uio_segflg = UIO_USERSPACE;
152ee877a35SJohn Dyson 	auio.uio_procp = p;
153ee877a35SJohn Dyson 
154ee877a35SJohn Dyson 	cnt = iocb.aio_nbytes;
155ee877a35SJohn Dyson 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
156ee877a35SJohn Dyson 	if (error &&
157ee877a35SJohn Dyson 		(auio.uio_resid != cnt) &&
158ee877a35SJohn Dyson 		(error == ERESTART || error == EINTR || error == EWOULDBLOCK))
159ee877a35SJohn Dyson 			error = 0;
160ee877a35SJohn Dyson 	cnt -= auio.uio_resid;
161ee877a35SJohn Dyson 	*retval = cnt;
162ee877a35SJohn Dyson 	return error;
163ee877a35SJohn Dyson }
164ee877a35SJohn Dyson 
165ee877a35SJohn Dyson 
166ee877a35SJohn Dyson /*
167ee877a35SJohn Dyson  * Return and suspend aren't supported (yet).
168ee877a35SJohn Dyson  */
169ee877a35SJohn Dyson int
170ee877a35SJohn Dyson aio_return(struct proc *p, struct aio_return_args *uap, int *retval) {
171ee877a35SJohn Dyson 	return (0);
172ee877a35SJohn Dyson }
173ee877a35SJohn Dyson 
174ee877a35SJohn Dyson int
175ee877a35SJohn Dyson aio_suspend(struct proc *p, struct aio_suspend_args *uap, int *retval) {
176ee877a35SJohn Dyson 	return (0);
177ee877a35SJohn Dyson }
178ee877a35SJohn Dyson 
179ee877a35SJohn Dyson int
180ee877a35SJohn Dyson aio_write(struct proc *p, struct aio_write_args *uap, int *retval) {
181ee877a35SJohn Dyson 	struct filedesc *fdp;
182ee877a35SJohn Dyson 	struct file *fp;
183ee877a35SJohn Dyson 	struct uio auio;
184ee877a35SJohn Dyson 	struct iovec aiov;
185ee877a35SJohn Dyson 	unsigned int fd;
186ee877a35SJohn Dyson 	int cnt;
187ee877a35SJohn Dyson 	struct aiocb iocb;
188ee877a35SJohn Dyson 	int error;
189ee877a35SJohn Dyson 
190ee877a35SJohn Dyson 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
191ee877a35SJohn Dyson 		return error;
192ee877a35SJohn Dyson 
193ee877a35SJohn Dyson 	/*
194ee877a35SJohn Dyson 	 * We support sync only for now.
195ee877a35SJohn Dyson 	 */
196ee877a35SJohn Dyson 	if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0)
197ee877a35SJohn Dyson 		return ENOSYS;
198ee877a35SJohn Dyson 
199ee877a35SJohn Dyson 	/*
200ee877a35SJohn Dyson 	 * Get the fd info for process
201ee877a35SJohn Dyson 	 */
202ee877a35SJohn Dyson 	fdp = p->p_fd;
203ee877a35SJohn Dyson 
204ee877a35SJohn Dyson 	/*
205ee877a35SJohn Dyson 	 * Range check file descriptor
206ee877a35SJohn Dyson 	 */
207ee877a35SJohn Dyson 	fd = iocb.aio_fildes;
208ee877a35SJohn Dyson 	if (fd >= fdp->fd_nfiles)
209ee877a35SJohn Dyson 		return EBADF;
210ee877a35SJohn Dyson 	fp = fdp->fd_ofiles[fd];
211ee877a35SJohn Dyson 	if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0))
212ee877a35SJohn Dyson 		return EBADF;
213ee877a35SJohn Dyson 	if (((int) iocb.aio_offset) == -1)
214ee877a35SJohn Dyson 		return EINVAL;
215ee877a35SJohn Dyson 
216ee877a35SJohn Dyson 	aiov.iov_base = iocb.aio_buf;
217ee877a35SJohn Dyson 	aiov.iov_len = iocb.aio_nbytes;
218ee877a35SJohn Dyson 	auio.uio_iov = &aiov;
219ee877a35SJohn Dyson 	auio.uio_iovcnt = 1;
220ee877a35SJohn Dyson 	auio.uio_offset = iocb.aio_offset;
221ee877a35SJohn Dyson 
222ee877a35SJohn Dyson 	auio.uio_resid = iocb.aio_nbytes;
223ee877a35SJohn Dyson 	if (auio.uio_resid < 0)
224ee877a35SJohn Dyson 		return (EINVAL);
225ee877a35SJohn Dyson 
226ee877a35SJohn Dyson 	auio.uio_rw = UIO_WRITE;
227ee877a35SJohn Dyson 	auio.uio_segflg = UIO_USERSPACE;
228ee877a35SJohn Dyson 	auio.uio_procp = p;
229ee877a35SJohn Dyson 
230ee877a35SJohn Dyson 	cnt = iocb.aio_nbytes;
231ee877a35SJohn Dyson 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
232ee877a35SJohn Dyson 	if (error) {
233ee877a35SJohn Dyson 		if (auio.uio_resid != cnt) {
234ee877a35SJohn Dyson 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
235ee877a35SJohn Dyson 				error = 0;
236ee877a35SJohn Dyson 			if (error == EPIPE)
237ee877a35SJohn Dyson 				psignal(p, SIGPIPE);
238ee877a35SJohn Dyson 		}
239ee877a35SJohn Dyson 	}
240ee877a35SJohn Dyson 	cnt -= auio.uio_resid;
241ee877a35SJohn Dyson 	*retval = cnt;
242ee877a35SJohn Dyson 	return error;
243ee877a35SJohn Dyson }
244ee877a35SJohn Dyson 
245ee877a35SJohn Dyson int
246ee877a35SJohn Dyson lio_listio(struct proc *p, struct lio_listio_args *uap, int *retval) {
247ee877a35SJohn Dyson 	struct filedesc *fdp;
248ee877a35SJohn Dyson 	struct file *fp;
249ee877a35SJohn Dyson 	struct uio auio;
250ee877a35SJohn Dyson 	struct iovec aiov;
251ee877a35SJohn Dyson 	unsigned int fd;
252ee877a35SJohn Dyson 	int cnt;
253ee877a35SJohn Dyson 	unsigned int iocblen, iocbcnt;
254ee877a35SJohn Dyson 	struct aiocb *iocb;
255ee877a35SJohn Dyson 	int error;
256ee877a35SJohn Dyson 	int i;
257ee877a35SJohn Dyson 
258ee877a35SJohn Dyson 	if (uap->mode == LIO_NOWAIT)
259ee877a35SJohn Dyson 		return ENOSYS;
260ee877a35SJohn Dyson 	iocbcnt = uap->nent;
261ee877a35SJohn Dyson 	if (iocbcnt > AIO_LISTIO_MAX)
262ee877a35SJohn Dyson 		return EINVAL;
263ee877a35SJohn Dyson 	return ENOSYS;
264ee877a35SJohn Dyson }
265