xref: /freebsd/sys/kern/vfs_aio.c (revision fdebd4f0fd090401e174ee8efc5d33bf97050d69)
1ee877a35SJohn Dyson /*
2ee877a35SJohn Dyson  * Copyright (c) 1997 John S. Dyson.  All rights reserved.
3ee877a35SJohn Dyson  *
4ee877a35SJohn Dyson  * Redistribution and use in source and binary forms, with or without
5ee877a35SJohn Dyson  * modification, are permitted provided that the following conditions
6ee877a35SJohn Dyson  * are met:
7ee877a35SJohn Dyson  * 1. Redistributions of source code must retain the above copyright
8ee877a35SJohn Dyson  *    notice, this list of conditions and the following disclaimer.
9ee877a35SJohn Dyson  * 2. John S. Dyson's name may not be used to endorse or promote products
10ee877a35SJohn Dyson  *    derived from this software without specific prior written permission.
11ee877a35SJohn Dyson  *
12ee877a35SJohn Dyson  * DISCLAIMER:  This code isn't warranted to do anything useful.  Anything
13ee877a35SJohn Dyson  * bad that happens because of using this software isn't the responsibility
14ee877a35SJohn Dyson  * of the author.  This software is distributed AS-IS.
15ee877a35SJohn Dyson  *
16fdebd4f0SBruce Evans  * $Id: vfs_aio.c,v 1.10 1997/11/07 08:53:03 phk Exp $
17ee877a35SJohn Dyson  */
18ee877a35SJohn Dyson 
19ee877a35SJohn Dyson /*
20ee877a35SJohn Dyson  * This file contains support for the POSIX.4 AIO facility.
21ee877a35SJohn Dyson  *
22ee877a35SJohn Dyson  * The initial version provides only the (bogus) synchronous semantics
23ee877a35SJohn Dyson  * but will support async in the future.  Note that a bit
24ee877a35SJohn Dyson  * in a private field allows the user mode subroutine to adapt
25ee877a35SJohn Dyson  * the kernel operations to true POSIX.4 for future compatibility.
26ee877a35SJohn Dyson  *
27ee877a35SJohn Dyson  * This code is used to support true POSIX.4 AIO/LIO with the help
28ee877a35SJohn Dyson  * of a user mode subroutine package.  Note that eventually more support
29ee877a35SJohn Dyson  * will be pushed into the kernel.
30ee877a35SJohn Dyson  */
31ee877a35SJohn Dyson 
32ee877a35SJohn Dyson #include <sys/param.h>
33ee877a35SJohn Dyson #include <sys/systm.h>
34ee877a35SJohn Dyson #include <sys/sysproto.h>
35ee877a35SJohn Dyson #include <sys/filedesc.h>
36ee877a35SJohn Dyson #include <sys/kernel.h>
37ee877a35SJohn Dyson #include <sys/fcntl.h>
38ee877a35SJohn Dyson #include <sys/file.h>
39fdebd4f0SBruce Evans #include <sys/lock.h>
40ee877a35SJohn Dyson #include <sys/unistd.h>
41ee877a35SJohn Dyson #include <sys/proc.h>
42ee877a35SJohn Dyson #include <sys/uio.h>
43ee877a35SJohn Dyson #include <sys/malloc.h>
44ee877a35SJohn Dyson #include <sys/signalvar.h>
45a624e84fSJohn Dyson #include <sys/sysctl.h>
46ee877a35SJohn Dyson 
47ee877a35SJohn Dyson #include <vm/vm.h>
48ee877a35SJohn Dyson #include <vm/vm_param.h>
49ee877a35SJohn Dyson #include <vm/vm_extern.h>
502244ea07SJohn Dyson #include <vm/pmap.h>
512244ea07SJohn Dyson #include <vm/vm_map.h>
52ee877a35SJohn Dyson #include <sys/aio.h>
535aaef07cSJohn Dyson #include <sys/shm.h>
545aaef07cSJohn Dyson 
555aaef07cSJohn Dyson #include <machine/cpu.h>
56ee877a35SJohn Dyson 
57a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_AIO, "AIO", "AIO structure(s)");
5855166637SPoul-Henning Kamp 
592244ea07SJohn Dyson #define AIOCBLIST_CANCELLED	0x1
602244ea07SJohn Dyson #define AIOCBLIST_RUNDOWN	0x4
612244ea07SJohn Dyson #define AIOCBLIST_ASYNCFREE	0x8
622244ea07SJohn Dyson #define AIOCBLIST_SUSPEND	0x10
632244ea07SJohn Dyson 
642244ea07SJohn Dyson #if 0
652244ea07SJohn Dyson #define DEBUGAIO
662244ea07SJohn Dyson #define DIAGNOSTIC
672244ea07SJohn Dyson #endif
682244ea07SJohn Dyson 
69a624e84fSJohn Dyson #define DEBUGAIO 1
70a624e84fSJohn Dyson 
712244ea07SJohn Dyson static	int jobrefid;
722244ea07SJohn Dyson 
732244ea07SJohn Dyson #define JOBST_NULL		0x0
742244ea07SJohn Dyson #define	JOBST_JOBQPROC		0x1
752244ea07SJohn Dyson #define JOBST_JOBQGLOBAL	0x2
762244ea07SJohn Dyson #define JOBST_JOBRUNNING	0x3
772244ea07SJohn Dyson #define JOBST_JOBFINISHED	0x4
782244ea07SJohn Dyson 
792244ea07SJohn Dyson #define MAX_AIO_PER_PROC	32
802244ea07SJohn Dyson #define MAX_AIO_QUEUE_PER_PROC	256 /* Bigger than AIO_LISTIO_MAX */
812244ea07SJohn Dyson #define MAX_AIO_PROCS		128
822244ea07SJohn Dyson #define	MAX_AIO_QUEUE		1024 /* Bigger than AIO_LISTIO_MAX */
832244ea07SJohn Dyson #define TARGET_AIO_PROCS	64
842244ea07SJohn Dyson 
85a624e84fSJohn Dyson int max_aio_procs = MAX_AIO_PROCS;
86a624e84fSJohn Dyson int num_aio_procs = 0;
87a624e84fSJohn Dyson int target_aio_procs = TARGET_AIO_PROCS;
88a624e84fSJohn Dyson int max_queue_count = MAX_AIO_QUEUE;
89a624e84fSJohn Dyson int num_queue_count = 0;
90a624e84fSJohn Dyson 
91a624e84fSJohn Dyson int max_aio_per_proc = MAX_AIO_PER_PROC,
92a624e84fSJohn Dyson 	max_aio_queue_per_proc=MAX_AIO_QUEUE_PER_PROC;
93a624e84fSJohn Dyson 
94a624e84fSJohn Dyson 
95a624e84fSJohn Dyson SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt");
96a624e84fSJohn Dyson 
97a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc,
98a624e84fSJohn Dyson 	CTLFLAG_RW, &max_aio_per_proc, 0, "");
99a624e84fSJohn Dyson 
100a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc,
101a624e84fSJohn Dyson 	CTLFLAG_RW, &max_aio_queue_per_proc, 0, "");
102a624e84fSJohn Dyson 
103a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs,
104a624e84fSJohn Dyson 	CTLFLAG_RW, &max_aio_procs, 0, "");
105a624e84fSJohn Dyson 
106a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs,
107a624e84fSJohn Dyson 	CTLFLAG_RD, &num_aio_procs, 0, "");
108a624e84fSJohn Dyson 
109a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count,
110a624e84fSJohn Dyson 	CTLFLAG_RD, &num_queue_count, 0, "");
111a624e84fSJohn Dyson 
112a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue,
113a624e84fSJohn Dyson 	CTLFLAG_RW, &max_queue_count, 0, "");
114a624e84fSJohn Dyson 
115a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs,
116a624e84fSJohn Dyson 	CTLFLAG_RW, &target_aio_procs, 0, "");
117a624e84fSJohn Dyson 
118a624e84fSJohn Dyson #if DEBUGAIO > 0
119a624e84fSJohn Dyson static int debugaio;
120a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, debugaio, CTLFLAG_RW, &debugaio, 0, "");
121a624e84fSJohn Dyson #endif
122a624e84fSJohn Dyson 
1232244ea07SJohn Dyson /*
1242244ea07SJohn Dyson  * Job queue item
1252244ea07SJohn Dyson  */
1262244ea07SJohn Dyson struct aiocblist {
1272244ea07SJohn Dyson 	TAILQ_ENTRY (aiocblist) list;		/* List of jobs */
1282244ea07SJohn Dyson 	TAILQ_ENTRY (aiocblist) plist;		/* List of jobs for proc */
1292244ea07SJohn Dyson 	int	jobflags;
1302244ea07SJohn Dyson 	int	jobstate;
1312244ea07SJohn Dyson 	struct	proc *userproc;			/* User process */
1322244ea07SJohn Dyson 	struct	aioproclist	*jobaioproc;	/* AIO process descriptor */
1332244ea07SJohn Dyson 	struct	aiocb uaiocb;			/* Kernel I/O control block */
1342244ea07SJohn Dyson };
1352244ea07SJohn Dyson 
1362244ea07SJohn Dyson #define AIOP_FREE	0x1			/* proc on free queue */
1372244ea07SJohn Dyson /*
1382244ea07SJohn Dyson  * AIO process info
1392244ea07SJohn Dyson  */
1402244ea07SJohn Dyson struct aioproclist {
1412244ea07SJohn Dyson 	int aioprocflags;			/* AIO proc flags */
1422244ea07SJohn Dyson 	TAILQ_ENTRY(aioproclist) list;		/* List of processes */
1432244ea07SJohn Dyson 	struct proc *aioproc;			/* The AIO thread */
1442244ea07SJohn Dyson 	TAILQ_HEAD (,aiocblist) jobtorun;	/* suggested job to run */
1452244ea07SJohn Dyson };
1462244ea07SJohn Dyson 
1472244ea07SJohn Dyson struct kaioinfo {
1482244ea07SJohn Dyson 	int	kaio_maxactive_count;	/* maximum number of AIOs */
1492244ea07SJohn Dyson 	int	kaio_active_count;	/* number of currently used AIOs */
1502244ea07SJohn Dyson 	int	kaio_qallowed_count;	/* maxiumu size of AIO queue */
1512244ea07SJohn Dyson 	int	kaio_queue_count;	/* size of AIO queue */
1522244ea07SJohn Dyson 	TAILQ_HEAD (,aiocblist)	kaio_jobqueue;	/* job queue for process */
1532244ea07SJohn Dyson 	TAILQ_HEAD (,aiocblist)	kaio_jobdone;	/* done queue for process */
1542244ea07SJohn Dyson };
1552244ea07SJohn Dyson 
1562244ea07SJohn Dyson TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc;
1572244ea07SJohn Dyson TAILQ_HEAD(,aiocblist) aio_jobs;			/* Async job list */
1582244ea07SJohn Dyson TAILQ_HEAD(,aiocblist) aio_freejobs;
1592244ea07SJohn Dyson 
1602244ea07SJohn Dyson 
1612244ea07SJohn Dyson void aio_init_aioinfo(struct proc *p) ;
1625aaef07cSJohn Dyson void aio_onceonly(void *) ;
1632244ea07SJohn Dyson int aio_free_entry(struct aiocblist *aiocbe);
1642244ea07SJohn Dyson void aio_cancel_internal(struct aiocblist *aiocbe);
1652244ea07SJohn Dyson void aio_process(struct aiocblist *aiocbe);
1662244ea07SJohn Dyson void pmap_newvmspace(struct vmspace *);
1672244ea07SJohn Dyson static int aio_newproc(void) ;
1682244ea07SJohn Dyson static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ;
1692244ea07SJohn Dyson static void aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) ;
1702244ea07SJohn Dyson 
1712244ea07SJohn Dyson SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL);
1722244ea07SJohn Dyson 
173a624e84fSJohn Dyson 
1742244ea07SJohn Dyson /*
1752244ea07SJohn Dyson  * Startup initialization
1762244ea07SJohn Dyson  */
1772244ea07SJohn Dyson void
1785aaef07cSJohn Dyson aio_onceonly(void *na) {
1792244ea07SJohn Dyson 	TAILQ_INIT(&aio_freeproc);
1802244ea07SJohn Dyson 	TAILQ_INIT(&aio_activeproc);
1812244ea07SJohn Dyson 	TAILQ_INIT(&aio_jobs);
1822244ea07SJohn Dyson 	TAILQ_INIT(&aio_freejobs);
1832244ea07SJohn Dyson }
1842244ea07SJohn Dyson 
1852244ea07SJohn Dyson /*
1862244ea07SJohn Dyson  * Init the per-process aioinfo structure.
1872244ea07SJohn Dyson  */
1882244ea07SJohn Dyson void
1892244ea07SJohn Dyson aio_init_aioinfo(struct proc *p) {
1902244ea07SJohn Dyson 	struct kaioinfo *ki;
1912244ea07SJohn Dyson 	if (p->p_aioinfo == NULL) {
1922244ea07SJohn Dyson 		ki = malloc(sizeof (struct kaioinfo), M_AIO, M_WAITOK);
1932244ea07SJohn Dyson 		p->p_aioinfo = ki;
194a624e84fSJohn Dyson 		ki->kaio_maxactive_count = max_aio_per_proc;
1952244ea07SJohn Dyson 		ki->kaio_active_count = 0;
196a624e84fSJohn Dyson 		ki->kaio_qallowed_count = max_aio_queue_per_proc;
1972244ea07SJohn Dyson 		ki->kaio_queue_count = 0;
1982244ea07SJohn Dyson 		TAILQ_INIT(&ki->kaio_jobdone);
1992244ea07SJohn Dyson 		TAILQ_INIT(&ki->kaio_jobqueue);
2002244ea07SJohn Dyson 	}
2012244ea07SJohn Dyson }
2022244ea07SJohn Dyson 
2032244ea07SJohn Dyson /*
2042244ea07SJohn Dyson  * Free a job entry.  Wait for completion if it is currently
2052244ea07SJohn Dyson  * active, but don't delay forever.  If we delay, we return
2062244ea07SJohn Dyson  * a flag that says that we have to restart the queue scan.
2072244ea07SJohn Dyson  */
2082244ea07SJohn Dyson int
2092244ea07SJohn Dyson aio_free_entry(struct aiocblist *aiocbe) {
2102244ea07SJohn Dyson 	struct kaioinfo *ki;
2112244ea07SJohn Dyson 	struct aioproclist *aiop;
2122244ea07SJohn Dyson 	struct proc *p;
2132244ea07SJohn Dyson 
2142244ea07SJohn Dyson 	if (aiocbe->jobstate == JOBST_NULL)
2152244ea07SJohn Dyson 		panic("aio_free_entry: freeing already free job");
2162244ea07SJohn Dyson 
2172244ea07SJohn Dyson 	p = aiocbe->userproc;
2182244ea07SJohn Dyson 	ki = p->p_aioinfo;
2192244ea07SJohn Dyson 	if (ki == NULL)
2202244ea07SJohn Dyson 		panic("aio_free_entry: missing p->p_aioinfo");
2212244ea07SJohn Dyson 
2222244ea07SJohn Dyson 	if (aiocbe->jobstate == JOBST_JOBRUNNING) {
2232244ea07SJohn Dyson 		if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE)
2242244ea07SJohn Dyson 			return 0;
2252244ea07SJohn Dyson 		aiocbe->jobflags |= AIOCBLIST_RUNDOWN;
226a624e84fSJohn Dyson 		tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", 0);
227a624e84fSJohn Dyson /*
2282244ea07SJohn Dyson 		if (tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", hz*5)) {
2292244ea07SJohn Dyson 			aiocbe->jobflags |= AIOCBLIST_ASYNCFREE;
2302244ea07SJohn Dyson 			aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN;
2312244ea07SJohn Dyson 			return 1;
2322244ea07SJohn Dyson 		}
2332244ea07SJohn Dyson 		aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN;
234a624e84fSJohn Dyson */
2352244ea07SJohn Dyson 	}
2362244ea07SJohn Dyson 	aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE;
2372244ea07SJohn Dyson 
2382244ea07SJohn Dyson 	if (ki->kaio_queue_count <= 0)
2392244ea07SJohn Dyson 		panic("aio_free_entry: process queue size <= 0");
2402244ea07SJohn Dyson 	if (num_queue_count <= 0)
2412244ea07SJohn Dyson 		panic("aio_free_entry: system wide queue size <= 0");
2422244ea07SJohn Dyson 
2432244ea07SJohn Dyson 	--ki->kaio_queue_count;
2442244ea07SJohn Dyson 	--num_queue_count;
245a624e84fSJohn Dyson #if DEBUGAIO > 0
246a624e84fSJohn Dyson 	if (debugaio > 0)
247a624e84fSJohn Dyson 		printf("freeing entry: %d, %d\n",
248a624e84fSJohn Dyson 			ki->kaio_queue_count, num_queue_count);
249a624e84fSJohn Dyson #endif
2502244ea07SJohn Dyson 
2512244ea07SJohn Dyson 	if ( aiocbe->jobstate == JOBST_JOBQPROC) {
2522244ea07SJohn Dyson 		aiop = aiocbe->jobaioproc;
2532244ea07SJohn Dyson 		TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list);
2542244ea07SJohn Dyson 	} else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) {
2552244ea07SJohn Dyson 		TAILQ_REMOVE(&aio_jobs, aiocbe, list);
2562244ea07SJohn Dyson 	} else if ( aiocbe->jobstate == JOBST_JOBFINISHED) {
2572244ea07SJohn Dyson 		ki = p->p_aioinfo;
2582244ea07SJohn Dyson 		TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist);
2592244ea07SJohn Dyson 	}
2602244ea07SJohn Dyson 	TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
2612244ea07SJohn Dyson 	aiocbe->jobstate = JOBST_NULL;
2622244ea07SJohn Dyson 	return 0;
2632244ea07SJohn Dyson }
2642244ea07SJohn Dyson 
2652244ea07SJohn Dyson /*
2662244ea07SJohn Dyson  * Rundown the jobs for a given process.
2672244ea07SJohn Dyson  */
2682244ea07SJohn Dyson void
2692244ea07SJohn Dyson aio_proc_rundown(struct proc *p) {
2702244ea07SJohn Dyson 	struct kaioinfo *ki;
2712244ea07SJohn Dyson 	struct aiocblist *aiocbe, *aiocbn;
2722244ea07SJohn Dyson 
2732244ea07SJohn Dyson 	ki = p->p_aioinfo;
2742244ea07SJohn Dyson 	if (ki == NULL)
2752244ea07SJohn Dyson 		return;
2762244ea07SJohn Dyson 
277a624e84fSJohn Dyson 	while (ki->kaio_active_count > 0) {
278a624e84fSJohn Dyson 		if (tsleep(ki, PRIBIO, "kaiowt", 60 * hz))
279a624e84fSJohn Dyson 			break;
280a624e84fSJohn Dyson 	}
281a624e84fSJohn Dyson 
282a624e84fSJohn Dyson #if DEBUGAIO > 0
283a624e84fSJohn Dyson 	if (debugaio > 0)
284a624e84fSJohn Dyson 		printf("Proc rundown: %d %d\n",
285a624e84fSJohn Dyson 			num_queue_count, ki->kaio_queue_count);
286a624e84fSJohn Dyson #endif
287a624e84fSJohn Dyson 
2882244ea07SJohn Dyson restart1:
2892244ea07SJohn Dyson 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone);
2902244ea07SJohn Dyson 		aiocbe;
2912244ea07SJohn Dyson 		aiocbe = aiocbn) {
2922244ea07SJohn Dyson 		aiocbn = TAILQ_NEXT(aiocbe, plist);
2932244ea07SJohn Dyson 		if (aio_free_entry(aiocbe))
2942244ea07SJohn Dyson 			goto restart1;
2952244ea07SJohn Dyson 	}
2962244ea07SJohn Dyson 
2972244ea07SJohn Dyson restart2:
2982244ea07SJohn Dyson 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue);
2992244ea07SJohn Dyson 		aiocbe;
3002244ea07SJohn Dyson 		aiocbe = aiocbn) {
3012244ea07SJohn Dyson 		aiocbn = TAILQ_NEXT(aiocbe, plist);
3022244ea07SJohn Dyson 		if (aio_free_entry(aiocbe))
3032244ea07SJohn Dyson 			goto restart2;
3042244ea07SJohn Dyson 	}
3052244ea07SJohn Dyson 	free(ki, M_AIO);
306a624e84fSJohn Dyson 	p->p_aioinfo = NULL;
3072244ea07SJohn Dyson }
3082244ea07SJohn Dyson 
3092244ea07SJohn Dyson /*
3102244ea07SJohn Dyson  * Select a job to run (called by an AIO daemon)
3112244ea07SJohn Dyson  */
3122244ea07SJohn Dyson static struct aiocblist *
3132244ea07SJohn Dyson aio_selectjob(struct aioproclist *aiop) {
3142244ea07SJohn Dyson 
3152244ea07SJohn Dyson 	struct aiocblist *aiocbe;
3162244ea07SJohn Dyson 
3172244ea07SJohn Dyson 	aiocbe = TAILQ_FIRST(&aiop->jobtorun);
3182244ea07SJohn Dyson 	if (aiocbe) {
3192244ea07SJohn Dyson 		TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list);
3202244ea07SJohn Dyson 		return aiocbe;
3212244ea07SJohn Dyson 	}
3222244ea07SJohn Dyson 
3232244ea07SJohn Dyson 	for (aiocbe = TAILQ_FIRST(&aio_jobs);
3242244ea07SJohn Dyson 		aiocbe;
3252244ea07SJohn Dyson 		aiocbe = TAILQ_NEXT(aiocbe, list)) {
3262244ea07SJohn Dyson 		struct kaioinfo *ki;
3272244ea07SJohn Dyson 		struct proc *userp;
3282244ea07SJohn Dyson 
3292244ea07SJohn Dyson 		userp = aiocbe->userproc;
3302244ea07SJohn Dyson 		ki = userp->p_aioinfo;
3312244ea07SJohn Dyson 
3322244ea07SJohn Dyson 		if (ki->kaio_active_count < ki->kaio_maxactive_count) {
3332244ea07SJohn Dyson 			TAILQ_REMOVE(&aio_jobs, aiocbe, list);
3342244ea07SJohn Dyson 			return aiocbe;
3352244ea07SJohn Dyson 		}
3362244ea07SJohn Dyson 	}
3372244ea07SJohn Dyson 
3382244ea07SJohn Dyson 	return NULL;
3392244ea07SJohn Dyson }
3402244ea07SJohn Dyson 
3412244ea07SJohn Dyson /*
3422244ea07SJohn Dyson  * The AIO activity proper.
3432244ea07SJohn Dyson  */
3442244ea07SJohn Dyson void
3452244ea07SJohn Dyson aio_process(struct aiocblist *aiocbe) {
3462244ea07SJohn Dyson 	struct filedesc *fdp;
3472244ea07SJohn Dyson 	struct proc *userp;
3482244ea07SJohn Dyson 	struct aiocb *cb;
3492244ea07SJohn Dyson 	struct file *fp;
3502244ea07SJohn Dyson 	struct uio auio;
3512244ea07SJohn Dyson 	struct iovec aiov;
3522244ea07SJohn Dyson 	unsigned int fd;
3532244ea07SJohn Dyson 	int cnt;
3542244ea07SJohn Dyson 	int error;
355a624e84fSJohn Dyson 	off_t offset;
3562244ea07SJohn Dyson 
3572244ea07SJohn Dyson 	userp = aiocbe->userproc;
3582244ea07SJohn Dyson 	cb = &aiocbe->uaiocb;
3592244ea07SJohn Dyson 
360a624e84fSJohn Dyson #if DEBUGAIO > 0
361a624e84fSJohn Dyson 	if (debugaio > 1)
362a624e84fSJohn Dyson 		printf("AIO %s, fd: %d, offset: 0x%x, address: 0x%x, size: %d\n",
363a624e84fSJohn Dyson 			cb->aio_lio_opcode == LIO_READ?"Read":"Write",
3642244ea07SJohn Dyson 			cb->aio_fildes, (int) cb->aio_offset,
3652244ea07SJohn Dyson 				cb->aio_buf, cb->aio_nbytes);
366a624e84fSJohn Dyson #endif
367a624e84fSJohn Dyson #if SLOW
3682244ea07SJohn Dyson 	tsleep(curproc, PVM, "aioprc", hz);
3692244ea07SJohn Dyson #endif
3702244ea07SJohn Dyson 	fdp = curproc->p_fd;
3712244ea07SJohn Dyson 	/*
3722244ea07SJohn Dyson 	 * Range check file descriptor
3732244ea07SJohn Dyson 	 */
3742244ea07SJohn Dyson 	fd = cb->aio_fildes;
3752244ea07SJohn Dyson 	fp = fdp->fd_ofiles[fd];
3762244ea07SJohn Dyson 
3772244ea07SJohn Dyson 	aiov.iov_base = cb->aio_buf;
3782244ea07SJohn Dyson 	aiov.iov_len = cb->aio_nbytes;
3792244ea07SJohn Dyson 
3802244ea07SJohn Dyson 	auio.uio_iov = &aiov;
3812244ea07SJohn Dyson 	auio.uio_iovcnt = 1;
382a624e84fSJohn Dyson 	auio.uio_offset = offset = cb->aio_offset;
3832244ea07SJohn Dyson 	auio.uio_resid = cb->aio_nbytes;
3842244ea07SJohn Dyson 	cnt = cb->aio_nbytes;
3852244ea07SJohn Dyson 	auio.uio_segflg = UIO_USERSPACE;
3862244ea07SJohn Dyson 	auio.uio_procp = curproc;
3872244ea07SJohn Dyson 
3882244ea07SJohn Dyson 	if (cb->aio_lio_opcode == LIO_READ) {
3892244ea07SJohn Dyson 		auio.uio_rw = UIO_READ;
3902244ea07SJohn Dyson 		error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
3912244ea07SJohn Dyson 	} else {
3922244ea07SJohn Dyson 		auio.uio_rw = UIO_WRITE;
3932244ea07SJohn Dyson 		error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
3942244ea07SJohn Dyson 	}
3952244ea07SJohn Dyson 
3962244ea07SJohn Dyson 	if (error) {
3972244ea07SJohn Dyson 		if (auio.uio_resid != cnt) {
3982244ea07SJohn Dyson 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
3992244ea07SJohn Dyson 				error = 0;
4002244ea07SJohn Dyson 			if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE))
4012244ea07SJohn Dyson 				psignal(userp, SIGPIPE);
4022244ea07SJohn Dyson 		}
4032244ea07SJohn Dyson 	}
404a624e84fSJohn Dyson #if DEBUGAIO > 0
405a624e84fSJohn Dyson 	if (debugaio > 1)
406a624e84fSJohn Dyson 		printf("%s complete: error: %d, status: %d, nio: %d, resid: %d, offset: %d\n",
407a624e84fSJohn Dyson 	cb->aio_lio_opcode == LIO_READ?"Read":"Write",
408a624e84fSJohn Dyson error, cnt, cnt - auio.uio_resid, auio.uio_resid, (int) offset & 0xffffffff);
409a624e84fSJohn Dyson #endif
4102244ea07SJohn Dyson 
4112244ea07SJohn Dyson 	cnt -= auio.uio_resid;
4122244ea07SJohn Dyson 	cb->_aiocb_private.error = error;
4132244ea07SJohn Dyson 	cb->_aiocb_private.status = cnt;
4142244ea07SJohn Dyson 
4152244ea07SJohn Dyson 	return;
4162244ea07SJohn Dyson 
4172244ea07SJohn Dyson }
4182244ea07SJohn Dyson 
4192244ea07SJohn Dyson /*
4202244ea07SJohn Dyson  * The AIO daemon.
4212244ea07SJohn Dyson  */
4222244ea07SJohn Dyson static void
4232244ea07SJohn Dyson aio_startproc(void *uproc)
4242244ea07SJohn Dyson {
4252244ea07SJohn Dyson 	struct aioproclist *aiop;
4262244ea07SJohn Dyson 
4272244ea07SJohn Dyson 	/*
4282244ea07SJohn Dyson 	 * Allocate and ready the aio control info
4292244ea07SJohn Dyson 	 */
4302244ea07SJohn Dyson 	aiop = malloc(sizeof *aiop, M_AIO, M_WAITOK);
4312244ea07SJohn Dyson 	aiop->aioproc = curproc;
4322244ea07SJohn Dyson 	aiop->aioprocflags |= AIOP_FREE;
4332244ea07SJohn Dyson 	TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
4342244ea07SJohn Dyson 	TAILQ_INIT(&aiop->jobtorun);
4352244ea07SJohn Dyson 
4362244ea07SJohn Dyson 	/*
4372244ea07SJohn Dyson 	 * Get rid of current address space
4382244ea07SJohn Dyson 	 */
4392244ea07SJohn Dyson 	if (curproc->p_vmspace->vm_refcnt == 1) {
4402244ea07SJohn Dyson 		if (curproc->p_vmspace->vm_shm)
4412244ea07SJohn Dyson 			shmexit(curproc);
4422244ea07SJohn Dyson 		pmap_remove_pages(&curproc->p_vmspace->vm_pmap, 0, USRSTACK);
4432244ea07SJohn Dyson 		vm_map_remove(&curproc->p_vmspace->vm_map, 0, USRSTACK);
4442244ea07SJohn Dyson 	} else {
4452244ea07SJohn Dyson 		vmspace_exec(curproc);
4462244ea07SJohn Dyson 	}
4472244ea07SJohn Dyson 
4482244ea07SJohn Dyson 	/*
4492244ea07SJohn Dyson 	 * Make up a name for the daemon
4502244ea07SJohn Dyson 	 */
4512244ea07SJohn Dyson 	strcpy(curproc->p_comm, "aiodaemon");
4522244ea07SJohn Dyson 
4532244ea07SJohn Dyson 	/*
4542244ea07SJohn Dyson 	 * Get rid of our current filedescriptors
4552244ea07SJohn Dyson 	 */
4562244ea07SJohn Dyson 	fdfree(curproc);
4572244ea07SJohn Dyson 	curproc->p_fd = NULL;
4582244ea07SJohn Dyson 	curproc->p_ucred = crcopy(curproc->p_ucred);
4592244ea07SJohn Dyson 	curproc->p_ucred->cr_uid = 0;
4602244ea07SJohn Dyson 	curproc->p_ucred->cr_groups[0] = 1;
4612244ea07SJohn Dyson 	curproc->p_flag |= P_SYSTEM;
4622244ea07SJohn Dyson 
463a624e84fSJohn Dyson #if DEBUGAIO > 0
464a624e84fSJohn Dyson 	if (debugaio > 2)
4652244ea07SJohn Dyson 		printf("Started new process: %d\n", curproc->p_pid);
4662244ea07SJohn Dyson #endif
467a624e84fSJohn Dyson 	wakeup(&aio_freeproc);
4682244ea07SJohn Dyson 
4692244ea07SJohn Dyson 	while(1) {
4702244ea07SJohn Dyson 		struct vmspace *myvm, *tmpvm;
4712244ea07SJohn Dyson 		struct proc *cp = curproc;
4722244ea07SJohn Dyson 		struct	aiocblist *aiocbe;
4732244ea07SJohn Dyson 
4742244ea07SJohn Dyson 		if ((aiop->aioprocflags & AIOP_FREE) == 0) {
4752244ea07SJohn Dyson 			TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
4762244ea07SJohn Dyson 			aiop->aioprocflags |= AIOP_FREE;
4772244ea07SJohn Dyson 		}
478c4860686SJohn Dyson 		if (tsleep(cp, PRIBIO, "aiordy", hz*30)) {
479c4860686SJohn Dyson 			if ((num_aio_procs > target_aio_procs) &&
480c4860686SJohn Dyson 				(TAILQ_FIRST(&aiop->jobtorun) == NULL))
481c4860686SJohn Dyson 				exit1(curproc, 0);
482c4860686SJohn Dyson 		}
483c4860686SJohn Dyson 
4842244ea07SJohn Dyson 		if (aiop->aioprocflags & AIOP_FREE) {
4852244ea07SJohn Dyson 			TAILQ_REMOVE(&aio_freeproc, aiop, list);
4862244ea07SJohn Dyson 			TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list);
4872244ea07SJohn Dyson 			aiop->aioprocflags &= ~AIOP_FREE;
4882244ea07SJohn Dyson 		}
4892244ea07SJohn Dyson 
4902244ea07SJohn Dyson 		myvm = curproc->p_vmspace;
4912244ea07SJohn Dyson 
4922244ea07SJohn Dyson 		while ( aiocbe = aio_selectjob(aiop)) {
4932244ea07SJohn Dyson 			struct aiocb *cb;
4942244ea07SJohn Dyson 			struct kaioinfo *ki;
4952244ea07SJohn Dyson 			struct proc *userp;
4962244ea07SJohn Dyson 
4972244ea07SJohn Dyson 			cb = &aiocbe->uaiocb;
4982244ea07SJohn Dyson 			userp = aiocbe->userproc;
4992244ea07SJohn Dyson 			ki = userp->p_aioinfo;
5002244ea07SJohn Dyson 
5012244ea07SJohn Dyson 			aiocbe->jobstate = JOBST_JOBRUNNING;
5022244ea07SJohn Dyson 			if (userp != cp) {
5032244ea07SJohn Dyson 				tmpvm = curproc->p_vmspace;
5042244ea07SJohn Dyson 				curproc->p_vmspace = userp->p_vmspace;
5052244ea07SJohn Dyson 				++curproc->p_vmspace->vm_refcnt;
5062244ea07SJohn Dyson 				pmap_activate(curproc);
5072244ea07SJohn Dyson 				if (tmpvm != myvm) {
5082244ea07SJohn Dyson 					vmspace_free(tmpvm);
5092244ea07SJohn Dyson 				}
5102244ea07SJohn Dyson 				if (curproc->p_fd)
5112244ea07SJohn Dyson 					fdfree(curproc);
5122244ea07SJohn Dyson 				curproc->p_fd = fdshare(userp);
5132244ea07SJohn Dyson 				cp = userp;
5142244ea07SJohn Dyson 			}
5152244ea07SJohn Dyson 
5162244ea07SJohn Dyson 			ki->kaio_active_count++;
517a624e84fSJohn Dyson #if DEBUGAIO > 0
518a624e84fSJohn Dyson 			if (debugaio > 0)
519a624e84fSJohn Dyson 				printf("process: pid: %d(%d), active: %d, queue: %d\n",
520a624e84fSJohn Dyson 					cb->_aiocb_private.kernelinfo,
521a624e84fSJohn Dyson 					userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count);
522a624e84fSJohn Dyson #endif
5232244ea07SJohn Dyson 			aiocbe->jobaioproc = aiop;
5242244ea07SJohn Dyson 			aio_process(aiocbe);
5252244ea07SJohn Dyson 			--ki->kaio_active_count;
526a624e84fSJohn Dyson 			if (ki->kaio_active_count == 0)
527a624e84fSJohn Dyson 				wakeup(ki);
528a624e84fSJohn Dyson #if DEBUGAIO > 0
529a624e84fSJohn Dyson 			if (debugaio > 0)
530a624e84fSJohn Dyson 				printf("DONE process: pid: %d(%d), active: %d, queue: %d\n",
531a624e84fSJohn Dyson 					cb->_aiocb_private.kernelinfo,
532a624e84fSJohn Dyson 					userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count);
533a624e84fSJohn Dyson #endif
5342244ea07SJohn Dyson 
5352244ea07SJohn Dyson 			aiocbe->jobstate = JOBST_JOBFINISHED;
5362244ea07SJohn Dyson 
5372244ea07SJohn Dyson 			if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) {
5382244ea07SJohn Dyson 				aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE;
5392244ea07SJohn Dyson 				TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
5402244ea07SJohn Dyson 			} else {
5412244ea07SJohn Dyson 				TAILQ_REMOVE(&ki->kaio_jobqueue,
5422244ea07SJohn Dyson 					aiocbe, plist);
5432244ea07SJohn Dyson 				TAILQ_INSERT_TAIL(&ki->kaio_jobdone,
5442244ea07SJohn Dyson 					aiocbe, plist);
5452244ea07SJohn Dyson 			}
5462244ea07SJohn Dyson 
5472244ea07SJohn Dyson 			if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) {
5482244ea07SJohn Dyson 				wakeup(aiocbe);
5492244ea07SJohn Dyson 				aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN;
5502244ea07SJohn Dyson 			}
5512244ea07SJohn Dyson 
5522244ea07SJohn Dyson 			if (aiocbe->jobflags & AIOCBLIST_SUSPEND) {
5532244ea07SJohn Dyson 				wakeup(userp);
5542244ea07SJohn Dyson 				aiocbe->jobflags &= ~AIOCBLIST_SUSPEND;
5552244ea07SJohn Dyson 			}
5562244ea07SJohn Dyson 
5572244ea07SJohn Dyson 			if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) {
5582244ea07SJohn Dyson 				psignal(userp, cb->aio_sigevent.sigev_signo);
5592244ea07SJohn Dyson 			}
5602244ea07SJohn Dyson 		}
5612244ea07SJohn Dyson 
5622244ea07SJohn Dyson 		if (cp != curproc) {
5632244ea07SJohn Dyson 			tmpvm = curproc->p_vmspace;
5642244ea07SJohn Dyson 			curproc->p_vmspace = myvm;
5652244ea07SJohn Dyson 			pmap_activate(curproc);
5662244ea07SJohn Dyson 			vmspace_free(tmpvm);
5672244ea07SJohn Dyson 			if (curproc->p_fd)
5682244ea07SJohn Dyson 				fdfree(curproc);
5692244ea07SJohn Dyson 			curproc->p_fd = NULL;
5702244ea07SJohn Dyson 			cp = curproc;
5712244ea07SJohn Dyson 		}
5722244ea07SJohn Dyson 	}
5732244ea07SJohn Dyson }
5742244ea07SJohn Dyson 
5752244ea07SJohn Dyson /*
5762244ea07SJohn Dyson  * Create a new AIO daemon.
5772244ea07SJohn Dyson  */
5782244ea07SJohn Dyson static int
5792244ea07SJohn Dyson aio_newproc() {
5802244ea07SJohn Dyson 	int error;
5812244ea07SJohn Dyson 	struct rfork_args rfa;
5822244ea07SJohn Dyson 	struct proc *p;
5832244ea07SJohn Dyson 
5842244ea07SJohn Dyson 	rfa.flags = RFMEM | RFPROC | RFCFDG;
5852244ea07SJohn Dyson 
586cb226aaaSPoul-Henning Kamp 	p = curproc;
587cb226aaaSPoul-Henning Kamp 	if (error = rfork(p, &rfa))
5882244ea07SJohn Dyson 		return error;
589cb226aaaSPoul-Henning Kamp 	cpu_set_fork_handler(p = pfind(p->p_retval[0]), aio_startproc, curproc);
5902244ea07SJohn Dyson 
591a624e84fSJohn Dyson #if DEBUGAIO > 0
592a624e84fSJohn Dyson 	if (debugaio > 2)
5932244ea07SJohn Dyson 		printf("Waiting for new process: %d, count: %d\n",
5942244ea07SJohn Dyson 			curproc->p_pid, num_aio_procs);
5952244ea07SJohn Dyson #endif
5962244ea07SJohn Dyson 
597a624e84fSJohn Dyson 	error = tsleep(&aio_freeproc, PZERO, "aiosta", 5*hz);
5982244ea07SJohn Dyson 	++num_aio_procs;
5992244ea07SJohn Dyson 
6002244ea07SJohn Dyson 	return error;
6012244ea07SJohn Dyson 
6022244ea07SJohn Dyson }
6032244ea07SJohn Dyson 
6042244ea07SJohn Dyson /*
6052244ea07SJohn Dyson  * Queue a new AIO request.
6062244ea07SJohn Dyson  */
6072244ea07SJohn Dyson static int
6082244ea07SJohn Dyson _aio_aqueue(struct proc *p, struct aiocb *job, int type) {
6092244ea07SJohn Dyson 	struct filedesc *fdp;
6102244ea07SJohn Dyson 	struct file *fp;
6112244ea07SJohn Dyson 	unsigned int fd;
6122244ea07SJohn Dyson 
6132244ea07SJohn Dyson 	int error;
6142244ea07SJohn Dyson 	int opcode;
6152244ea07SJohn Dyson 	struct aiocblist *aiocbe;
6162244ea07SJohn Dyson 	struct aioproclist *aiop;
6172244ea07SJohn Dyson 	struct kaioinfo *ki;
6182244ea07SJohn Dyson 
6192244ea07SJohn Dyson 	if (aiocbe = TAILQ_FIRST(&aio_freejobs)) {
6202244ea07SJohn Dyson 		TAILQ_REMOVE(&aio_freejobs, aiocbe, list);
6212244ea07SJohn Dyson 	} else {
6222244ea07SJohn Dyson 		aiocbe = malloc (sizeof *aiocbe, M_AIO, M_WAITOK);
6232244ea07SJohn Dyson 	}
6242244ea07SJohn Dyson 
6252244ea07SJohn Dyson 	error = copyin((caddr_t)job,
6262244ea07SJohn Dyson 		(caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb);
6272244ea07SJohn Dyson 	if (error) {
628a624e84fSJohn Dyson #if DEBUGAIO > 0
629a624e84fSJohn Dyson 		if (debugaio > 0)
630a624e84fSJohn Dyson 			printf("aio_aqueue: Copyin error: %d\n", error);
631a624e84fSJohn Dyson #endif
6322244ea07SJohn Dyson 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
6332244ea07SJohn Dyson 		return error;
6342244ea07SJohn Dyson 	}
6352244ea07SJohn Dyson 
636a624e84fSJohn Dyson 	/*
637a624e84fSJohn Dyson 	 * Get the opcode
638a624e84fSJohn Dyson 	 */
639a624e84fSJohn Dyson 	if (type != LIO_NOP) {
640a624e84fSJohn Dyson 		aiocbe->uaiocb.aio_lio_opcode = type;
641a624e84fSJohn Dyson 	}
642a624e84fSJohn Dyson 	opcode = aiocbe->uaiocb.aio_lio_opcode;
6432244ea07SJohn Dyson 
6442244ea07SJohn Dyson 	/*
6452244ea07SJohn Dyson 	 * Get the fd info for process
6462244ea07SJohn Dyson 	 */
6472244ea07SJohn Dyson 	fdp = p->p_fd;
6482244ea07SJohn Dyson 
6492244ea07SJohn Dyson 	/*
6502244ea07SJohn Dyson 	 * Range check file descriptor
6512244ea07SJohn Dyson 	 */
6522244ea07SJohn Dyson 	fd = aiocbe->uaiocb.aio_fildes;
6532244ea07SJohn Dyson 	if (fd >= fdp->fd_nfiles) {
6542244ea07SJohn Dyson 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
6552244ea07SJohn Dyson 		if (type == 0) {
656a624e84fSJohn Dyson #if DEBUGAIO > 0
657a624e84fSJohn Dyson 			if (debugaio > 0)
658a624e84fSJohn Dyson 				printf("aio_aqueue: Null type\n");
659a624e84fSJohn Dyson #endif
6602244ea07SJohn Dyson 			suword(&job->_aiocb_private.status, -1);
6612244ea07SJohn Dyson 			suword(&job->_aiocb_private.error, EBADF);
6622244ea07SJohn Dyson 		}
6632244ea07SJohn Dyson 		return EBADF;
6642244ea07SJohn Dyson 	}
6652244ea07SJohn Dyson 
666c4860686SJohn Dyson #if DEBUGAIO > 0
667c4860686SJohn Dyson 	if (debugaio > 3)
668c4860686SJohn Dyson 		printf("aio_aqueue: fd: %d, cmd: %d, buf: %d, cnt: %d, fileoffset: %d\n",
669c4860686SJohn Dyson 			aiocbe->uaiocb.aio_fildes,
670c4860686SJohn Dyson 			aiocbe->uaiocb.aio_lio_opcode,
671c4860686SJohn Dyson 			(int) aiocbe->uaiocb.aio_buf & 0xffffffff,
672c4860686SJohn Dyson 			aiocbe->uaiocb.aio_nbytes,
673c4860686SJohn Dyson 			(int) aiocbe->uaiocb.aio_offset & 0xffffffff);
674c4860686SJohn Dyson #endif
675c4860686SJohn Dyson 
676c4860686SJohn Dyson 
6772244ea07SJohn Dyson 	fp = fdp->fd_ofiles[fd];
678a624e84fSJohn Dyson 	if ((fp == NULL) ||
679a624e84fSJohn Dyson 		((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 0))) {
6802244ea07SJohn Dyson 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
6812244ea07SJohn Dyson 		if (type == 0) {
6822244ea07SJohn Dyson 			suword(&job->_aiocb_private.status, -1);
6832244ea07SJohn Dyson 			suword(&job->_aiocb_private.error, EBADF);
6842244ea07SJohn Dyson 		}
685a624e84fSJohn Dyson #if DEBUGAIO > 0
686a624e84fSJohn Dyson 		if (debugaio > 0)
687a624e84fSJohn Dyson 			printf("aio_aqueue: Bad file descriptor\n");
688a624e84fSJohn Dyson #endif
6892244ea07SJohn Dyson 		return EBADF;
6902244ea07SJohn Dyson 	}
6912244ea07SJohn Dyson 
6922244ea07SJohn Dyson 	if (aiocbe->uaiocb.aio_offset == -1LL) {
6932244ea07SJohn Dyson 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
6942244ea07SJohn Dyson 		if (type == 0) {
6952244ea07SJohn Dyson 			suword(&job->_aiocb_private.status, -1);
6962244ea07SJohn Dyson 			suword(&job->_aiocb_private.error, EINVAL);
6972244ea07SJohn Dyson 		}
698a624e84fSJohn Dyson #if DEBUGAIO > 0
699a624e84fSJohn Dyson 		if (debugaio > 0)
700a624e84fSJohn Dyson 			printf("aio_aqueue: bad offset\n");
701a624e84fSJohn Dyson #endif
7022244ea07SJohn Dyson 		return EINVAL;
7032244ea07SJohn Dyson 	}
7042244ea07SJohn Dyson 
705a624e84fSJohn Dyson #if DEBUGAIO > 0
706a624e84fSJohn Dyson 	if (debugaio > 2)
7072244ea07SJohn Dyson 		printf("job addr: 0x%x, 0x%x, %d\n", job, &job->_aiocb_private.kernelinfo, jobrefid);
7082244ea07SJohn Dyson #endif
7092244ea07SJohn Dyson 
7102244ea07SJohn Dyson 	error = suword(&job->_aiocb_private.kernelinfo, jobrefid);
7112244ea07SJohn Dyson 	if (error) {
7122244ea07SJohn Dyson 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
7132244ea07SJohn Dyson 		if (type == 0) {
7142244ea07SJohn Dyson 			suword(&job->_aiocb_private.status, -1);
7152244ea07SJohn Dyson 			suword(&job->_aiocb_private.error, EINVAL);
7162244ea07SJohn Dyson 		}
717a624e84fSJohn Dyson #if DEBUGAIO > 0
718a624e84fSJohn Dyson 		if (debugaio > 0)
719a624e84fSJohn Dyson 			printf("aio_aqueue: fetch of kernelinfo from user space\n");
720a624e84fSJohn Dyson #endif
7212244ea07SJohn Dyson 		return error;
7222244ea07SJohn Dyson 	}
7232244ea07SJohn Dyson 
7242244ea07SJohn Dyson 	aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)jobrefid;
725a624e84fSJohn Dyson #if DEBUGAIO > 0
726a624e84fSJohn Dyson 	if (debugaio > 2)
7272244ea07SJohn Dyson 		printf("aio_aqueue: New job: %d...  ", jobrefid);
7282244ea07SJohn Dyson #endif
7292244ea07SJohn Dyson 	++jobrefid;
7302244ea07SJohn Dyson 
7312244ea07SJohn Dyson 	if (opcode == LIO_NOP) {
7322244ea07SJohn Dyson 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
7332244ea07SJohn Dyson 		if (type == 0) {
7342244ea07SJohn Dyson 			suword(&job->_aiocb_private.status, -1);
7352244ea07SJohn Dyson 			suword(&job->_aiocb_private.error, 0);
7362244ea07SJohn Dyson 		}
7372244ea07SJohn Dyson 		return 0;
7382244ea07SJohn Dyson 	}
7392244ea07SJohn Dyson 
7402244ea07SJohn Dyson 	if ((opcode != LIO_NOP) &&
7412244ea07SJohn Dyson 		(opcode != LIO_READ) && (opcode != LIO_WRITE)) {
7422244ea07SJohn Dyson 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
7432244ea07SJohn Dyson 		if (type == 0) {
7442244ea07SJohn Dyson 			suword(&job->_aiocb_private.status, -1);
7452244ea07SJohn Dyson 			suword(&job->_aiocb_private.error, EINVAL);
7462244ea07SJohn Dyson 		}
747a624e84fSJohn Dyson #if DEBUGAIO > 0
748a624e84fSJohn Dyson 		if (debugaio > 0)
749a624e84fSJohn Dyson 			printf("aio_aqueue: invalid LIO op: %d\n", opcode);
750a624e84fSJohn Dyson #endif
7512244ea07SJohn Dyson 		return EINVAL;
7522244ea07SJohn Dyson 	}
7532244ea07SJohn Dyson 
7542244ea07SJohn Dyson 	suword(&job->_aiocb_private.error, 0);
7552244ea07SJohn Dyson 	suword(&job->_aiocb_private.status, 0);
7562244ea07SJohn Dyson 	aiocbe->userproc = p;
7572244ea07SJohn Dyson 	aiocbe->jobflags = 0;
7582244ea07SJohn Dyson 	ki = p->p_aioinfo;
7592244ea07SJohn Dyson 	++num_queue_count;
7602244ea07SJohn Dyson 	++ki->kaio_queue_count;
7612244ea07SJohn Dyson 
7622244ea07SJohn Dyson retryproc:
7632244ea07SJohn Dyson 	if (aiop = TAILQ_FIRST(&aio_freeproc)) {
764a624e84fSJohn Dyson #if DEBUGAIO > 0
765a624e84fSJohn Dyson 		if (debugaio > 0)
7662244ea07SJohn Dyson 			printf("found a free AIO process\n");
7672244ea07SJohn Dyson #endif
7682244ea07SJohn Dyson 		TAILQ_REMOVE(&aio_freeproc, aiop, list);
7692244ea07SJohn Dyson 		TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list);
7702244ea07SJohn Dyson 		aiop->aioprocflags &= ~AIOP_FREE;
7712244ea07SJohn Dyson 		TAILQ_INSERT_TAIL(&aiop->jobtorun, aiocbe, list);
7722244ea07SJohn Dyson 		TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist);
7732244ea07SJohn Dyson 		aiocbe->jobstate = JOBST_JOBQPROC;
774a624e84fSJohn Dyson 
7752244ea07SJohn Dyson 		aiocbe->jobaioproc = aiop;
7762244ea07SJohn Dyson 		wakeup(aiop->aioproc);
7772244ea07SJohn Dyson 	} else if ((num_aio_procs < max_aio_procs) &&
7782244ea07SJohn Dyson 			(ki->kaio_active_count < ki->kaio_maxactive_count)) {
779a624e84fSJohn Dyson #if DEBUGAIO > 0
780a624e84fSJohn Dyson 		if (debugaio > 1) {
781a624e84fSJohn Dyson 			printf("aio_aqueue: starting new proc: num_aio_procs(%d), max_aio_procs(%d)\n", num_aio_procs, max_aio_procs);
782a624e84fSJohn Dyson 			printf("            ki->kaio_active_count(%d), ki->kaio_maxactive_count(%d)\n", ki->kaio_active_count, ki->kaio_maxactive_count);
783a624e84fSJohn Dyson 		}
784a624e84fSJohn Dyson #endif
7852244ea07SJohn Dyson 		if (error = aio_newproc()) {
786a624e84fSJohn Dyson #if DEBUGAIO > 0
787a624e84fSJohn Dyson 			if (debugaio > 0)
7882244ea07SJohn Dyson 				printf("aio_aqueue: problem sleeping for starting proc: %d\n",
7892244ea07SJohn Dyson 					error);
7902244ea07SJohn Dyson #endif
7912244ea07SJohn Dyson 		}
7922244ea07SJohn Dyson 		goto retryproc;
7932244ea07SJohn Dyson 	} else {
794a624e84fSJohn Dyson #if DEBUGAIO > 0
795a624e84fSJohn Dyson 		if (debugaio > 0)
7962244ea07SJohn Dyson 			printf("queuing to global queue\n");
7972244ea07SJohn Dyson #endif
7982244ea07SJohn Dyson 		TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list);
7992244ea07SJohn Dyson 		TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist);
8002244ea07SJohn Dyson 		aiocbe->jobstate = JOBST_JOBQGLOBAL;
8012244ea07SJohn Dyson 	}
8022244ea07SJohn Dyson 
8032244ea07SJohn Dyson 	return 0;
8042244ea07SJohn Dyson }
8052244ea07SJohn Dyson 
8062244ea07SJohn Dyson static int
8072244ea07SJohn Dyson aio_aqueue(struct proc *p, struct aiocb *job, int type) {
8082244ea07SJohn Dyson 	struct kaioinfo *ki;
8092244ea07SJohn Dyson 
8102244ea07SJohn Dyson 	if (p->p_aioinfo == NULL) {
8112244ea07SJohn Dyson 		aio_init_aioinfo(p);
8122244ea07SJohn Dyson 	}
8132244ea07SJohn Dyson 
8142244ea07SJohn Dyson 	if (num_queue_count >= max_queue_count)
8152244ea07SJohn Dyson 		return EAGAIN;
8162244ea07SJohn Dyson 
8172244ea07SJohn Dyson 	ki = p->p_aioinfo;
8182244ea07SJohn Dyson 	if (ki->kaio_queue_count >= ki->kaio_qallowed_count)
8192244ea07SJohn Dyson 		return EAGAIN;
8202244ea07SJohn Dyson 
8212244ea07SJohn Dyson 	return _aio_aqueue(p, job, type);
8222244ea07SJohn Dyson }
8232244ea07SJohn Dyson 
8242244ea07SJohn Dyson /*
8252244ea07SJohn Dyson  * Support the aio_return system call
8262244ea07SJohn Dyson  */
8272244ea07SJohn Dyson int
828cb226aaaSPoul-Henning Kamp aio_return(struct proc *p, struct aio_return_args *uap) {
8292244ea07SJohn Dyson 	int jobref, status;
8302244ea07SJohn Dyson 	struct aiocblist *cb;
8312244ea07SJohn Dyson 	struct kaioinfo *ki;
8322244ea07SJohn Dyson 
8332244ea07SJohn Dyson 	ki = p->p_aioinfo;
8342244ea07SJohn Dyson 	if (ki == NULL) {
8352244ea07SJohn Dyson 		return EINVAL;
8362244ea07SJohn Dyson 	}
8372244ea07SJohn Dyson 
8382244ea07SJohn Dyson 	jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo);
8392244ea07SJohn Dyson 	if (jobref == -1)
8402244ea07SJohn Dyson 		return EINVAL;
8412244ea07SJohn Dyson 
842a624e84fSJohn Dyson #if DEBUGAIO > 0
843a624e84fSJohn Dyson 	if (debugaio > 0)
844a624e84fSJohn Dyson 		printf("aio_return: jobref: %d\n", jobref);
845a624e84fSJohn Dyson #endif
846a624e84fSJohn Dyson 
8472244ea07SJohn Dyson 
8482244ea07SJohn Dyson 	for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
8492244ea07SJohn Dyson 		cb;
8502244ea07SJohn Dyson 		cb = TAILQ_NEXT(cb, plist)) {
8512244ea07SJohn Dyson 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
852cb226aaaSPoul-Henning Kamp 			p->p_retval[0] = cb->uaiocb._aiocb_private.status;
8532244ea07SJohn Dyson 			aio_free_entry(cb);
8542244ea07SJohn Dyson 			return 0;
8552244ea07SJohn Dyson 		}
8562244ea07SJohn Dyson 	}
8572244ea07SJohn Dyson 
8582244ea07SJohn Dyson 	status = fuword(&uap->aiocbp->_aiocb_private.status);
8592244ea07SJohn Dyson 	if (status == -1)
8602244ea07SJohn Dyson 		return 0;
8612244ea07SJohn Dyson 
8622244ea07SJohn Dyson 	return (EINVAL);
8632244ea07SJohn Dyson }
8642244ea07SJohn Dyson 
8652244ea07SJohn Dyson /*
8662244ea07SJohn Dyson  * Rundown the jobs for a given process.
8672244ea07SJohn Dyson  */
8682244ea07SJohn Dyson void
8692244ea07SJohn Dyson aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) {
8702244ea07SJohn Dyson 	struct aiocblist *aiocbe;
8712244ea07SJohn Dyson 	struct kaioinfo *ki;
8722244ea07SJohn Dyson 
8732244ea07SJohn Dyson 	ki = p->p_aioinfo;
8742244ea07SJohn Dyson 	if (ki == NULL)
8752244ea07SJohn Dyson 		return;
8762244ea07SJohn Dyson 
8772244ea07SJohn Dyson 	for (aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue);
8782244ea07SJohn Dyson 		aiocbe;
8792244ea07SJohn Dyson 		aiocbe = TAILQ_NEXT(aiocbe, plist)) {
8802244ea07SJohn Dyson 
8812244ea07SJohn Dyson 		if (njobs) {
8822244ea07SJohn Dyson 
8832244ea07SJohn Dyson 			int i;
8842244ea07SJohn Dyson 
8852244ea07SJohn Dyson 			for(i = 0; i < njobs; i++) {
8862244ea07SJohn Dyson 				if (((int) aiocbe->uaiocb._aiocb_private.kernelinfo) == joblist[i])
8872244ea07SJohn Dyson 					break;
8882244ea07SJohn Dyson 			}
8892244ea07SJohn Dyson 
8902244ea07SJohn Dyson 			if (i == njobs)
8912244ea07SJohn Dyson 				continue;
8922244ea07SJohn Dyson 		}
8932244ea07SJohn Dyson 
8942244ea07SJohn Dyson 		if (set)
8952244ea07SJohn Dyson 			aiocbe->jobflags |= AIOCBLIST_SUSPEND;
8962244ea07SJohn Dyson 		else
8972244ea07SJohn Dyson 			aiocbe->jobflags &= ~AIOCBLIST_SUSPEND;
8982244ea07SJohn Dyson 	}
8992244ea07SJohn Dyson }
9002244ea07SJohn Dyson 
9012244ea07SJohn Dyson /*
9022244ea07SJohn Dyson  * Allow a process to wakeup when any of the I/O requests are
9032244ea07SJohn Dyson  * completed.
9042244ea07SJohn Dyson  */
9052244ea07SJohn Dyson int
906cb226aaaSPoul-Henning Kamp aio_suspend(struct proc *p, struct aio_suspend_args *uap) {
9074a11ca4eSPoul-Henning Kamp 	struct timeval atv;
9082244ea07SJohn Dyson 	struct timespec ts;
9092244ea07SJohn Dyson 	struct aiocb *const *cbptr, *cbp;
9102244ea07SJohn Dyson 	struct kaioinfo *ki;
9112244ea07SJohn Dyson 	struct aiocblist *cb;
9122244ea07SJohn Dyson 	int i;
9132244ea07SJohn Dyson 	int error, s, timo;
9142244ea07SJohn Dyson 	int *joblist;
9152244ea07SJohn Dyson 
9162244ea07SJohn Dyson 
9172244ea07SJohn Dyson 	timo = 0;
9182244ea07SJohn Dyson 	if (uap->timeout) {
9192244ea07SJohn Dyson 		/*
9202244ea07SJohn Dyson 		 * Get timespec struct
9212244ea07SJohn Dyson 		 */
9222244ea07SJohn Dyson 		if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) {
9232244ea07SJohn Dyson 			return error;
9242244ea07SJohn Dyson 		}
9252244ea07SJohn Dyson 
9262244ea07SJohn Dyson 		if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
9272244ea07SJohn Dyson 			return (EINVAL);
9282244ea07SJohn Dyson 
9292244ea07SJohn Dyson 		TIMESPEC_TO_TIMEVAL(&atv, &ts)
9302244ea07SJohn Dyson 		if (itimerfix(&atv))
9312244ea07SJohn Dyson 			return (EINVAL);
9322244ea07SJohn Dyson 		/*
9332244ea07SJohn Dyson 		 * XXX this is not as careful as settimeofday() about minimising
9342244ea07SJohn Dyson 		 * interrupt latency.  The hzto() interface is inconvenient as usual.
9352244ea07SJohn Dyson 		 */
9362244ea07SJohn Dyson 		s = splclock();
9372244ea07SJohn Dyson 		timevaladd(&atv, &time);
9382244ea07SJohn Dyson 		timo = hzto(&atv);
9392244ea07SJohn Dyson 		splx(s);
9402244ea07SJohn Dyson 		if (timo == 0)
9412244ea07SJohn Dyson 			timo = 1;
9422244ea07SJohn Dyson 	}
9432244ea07SJohn Dyson 
9442244ea07SJohn Dyson 	ki = p->p_aioinfo;
9452244ea07SJohn Dyson 	if (ki == NULL)
9462244ea07SJohn Dyson 		return EAGAIN;
9472244ea07SJohn Dyson 
9482244ea07SJohn Dyson 	joblist = malloc(uap->nent * sizeof(int), M_TEMP, M_WAITOK);
9492244ea07SJohn Dyson 	cbptr = uap->aiocbp;
9502244ea07SJohn Dyson 
9512244ea07SJohn Dyson 	for(i=0;i<uap->nent;i++) {
9522244ea07SJohn Dyson 		cbp = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
953a624e84fSJohn Dyson #if DEBUGAIO > 1
954a624e84fSJohn Dyson 		if (debugaio > 2)
9552244ea07SJohn Dyson 			printf("cbp: %x\n", cbp);
9562244ea07SJohn Dyson #endif
9572244ea07SJohn Dyson 		joblist[i] = fuword(&cbp->_aiocb_private.kernelinfo);
9582244ea07SJohn Dyson 		cbptr++;
9592244ea07SJohn Dyson 	}
9602244ea07SJohn Dyson 
9612244ea07SJohn Dyson 
9622244ea07SJohn Dyson 	while (1) {
9632244ea07SJohn Dyson 		for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
9642244ea07SJohn Dyson 			cb;
9652244ea07SJohn Dyson 			cb = TAILQ_NEXT(cb, plist)) {
9662244ea07SJohn Dyson 			for(i=0;i<uap->nent;i++) {
9672244ea07SJohn Dyson 				if (((int) cb->uaiocb._aiocb_private.kernelinfo) == joblist[i]) {
9682244ea07SJohn Dyson 					free(joblist, M_TEMP);
9692244ea07SJohn Dyson 					return 0;
9702244ea07SJohn Dyson 				}
9712244ea07SJohn Dyson 			}
9722244ea07SJohn Dyson 		}
9732244ea07SJohn Dyson 
974a624e84fSJohn Dyson #if DEBUGAIO > 0
975a624e84fSJohn Dyson 	if (debugaio > 0) {
976a624e84fSJohn Dyson 		printf("Suspend, timeout: %d clocks, jobs:", timo);
977a624e84fSJohn Dyson 		for(i=0;i<uap->nent;i++)
978a624e84fSJohn Dyson 			printf(" %d", joblist[i]);
979a624e84fSJohn Dyson 		printf("\n");
980a624e84fSJohn Dyson 	}
981a624e84fSJohn Dyson #endif
982a624e84fSJohn Dyson 
9832244ea07SJohn Dyson 		aio_marksuspend(p, uap->nent, joblist, 1);
984a624e84fSJohn Dyson #if DEBUGAIO > 0
985a624e84fSJohn Dyson 		if (debugaio > 2) {
9862244ea07SJohn Dyson 			printf("Suspending -- waiting for all I/O's to complete: ");
9872244ea07SJohn Dyson 			for(i=0;i<uap->nent;i++)
9882244ea07SJohn Dyson 				printf(" %d", joblist[i]);
9892244ea07SJohn Dyson 			printf("\n");
990a624e84fSJohn Dyson 		}
9912244ea07SJohn Dyson #endif
9922244ea07SJohn Dyson 		error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo);
9932244ea07SJohn Dyson 		aio_marksuspend(p, uap->nent, joblist, 0);
9942244ea07SJohn Dyson 
9952244ea07SJohn Dyson 		if (error == EINTR) {
996a624e84fSJohn Dyson #if DEBUGAIO > 0
997a624e84fSJohn Dyson 			if (debugaio > 2)
9982244ea07SJohn Dyson 				printf(" signal\n");
9992244ea07SJohn Dyson #endif
10002244ea07SJohn Dyson 			free(joblist, M_TEMP);
10012244ea07SJohn Dyson 			return EINTR;
10022244ea07SJohn Dyson 		} else if (error == EWOULDBLOCK) {
1003a624e84fSJohn Dyson #if DEBUGAIO > 0
1004a624e84fSJohn Dyson 			if (debugaio > 2)
10052244ea07SJohn Dyson 				printf(" timeout\n");
10062244ea07SJohn Dyson #endif
10072244ea07SJohn Dyson 			free(joblist, M_TEMP);
10082244ea07SJohn Dyson 			return EAGAIN;
10092244ea07SJohn Dyson 		}
1010a624e84fSJohn Dyson #if DEBUGAIO > 0
1011a624e84fSJohn Dyson 		if (debugaio > 2)
10122244ea07SJohn Dyson 			printf("\n");
10132244ea07SJohn Dyson #endif
10142244ea07SJohn Dyson 	}
10152244ea07SJohn Dyson 
10162244ea07SJohn Dyson /* NOTREACHED */
10172244ea07SJohn Dyson 	return EINVAL;
10182244ea07SJohn Dyson }
1019ee877a35SJohn Dyson 
1020ee877a35SJohn Dyson /*
1021ee877a35SJohn Dyson  * aio_cancel at the kernel level is a NOOP right now.  It
1022ee877a35SJohn Dyson  * might be possible to support it partially in user mode, or
1023ee877a35SJohn Dyson  * in kernel mode later on.
1024ee877a35SJohn Dyson  */
1025ee877a35SJohn Dyson int
1026cb226aaaSPoul-Henning Kamp aio_cancel(struct proc *p, struct aio_cancel_args *uap) {
1027ee877a35SJohn Dyson 	return AIO_NOTCANCELLED;
1028ee877a35SJohn Dyson }
1029ee877a35SJohn Dyson 
1030ee877a35SJohn Dyson /*
1031ee877a35SJohn Dyson  * aio_error is implemented in the kernel level for compatibility
1032ee877a35SJohn Dyson  * purposes only.  For a user mode async implementation, it would be
1033ee877a35SJohn Dyson  * best to do it in a userland subroutine.
1034ee877a35SJohn Dyson  */
1035ee877a35SJohn Dyson int
1036cb226aaaSPoul-Henning Kamp aio_error(struct proc *p, struct aio_error_args *uap) {
10372244ea07SJohn Dyson 	struct aiocblist *cb;
10382244ea07SJohn Dyson 	struct kaioinfo *ki;
10392244ea07SJohn Dyson 	int jobref;
10404a11ca4eSPoul-Henning Kamp 	int status;
1041ee877a35SJohn Dyson 
10422244ea07SJohn Dyson 	ki = p->p_aioinfo;
10432244ea07SJohn Dyson 	if (ki == NULL)
10442244ea07SJohn Dyson 		return EINVAL;
10452244ea07SJohn Dyson 
10462244ea07SJohn Dyson 	jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo);
10472244ea07SJohn Dyson 	if (jobref == -1)
1048ee877a35SJohn Dyson 		return EFAULT;
1049ee877a35SJohn Dyson 
10502244ea07SJohn Dyson 	for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
10512244ea07SJohn Dyson 		cb;
10522244ea07SJohn Dyson 		cb = TAILQ_NEXT(cb, plist)) {
10532244ea07SJohn Dyson 
10542244ea07SJohn Dyson 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
1055cb226aaaSPoul-Henning Kamp 			p->p_retval[0] = cb->uaiocb._aiocb_private.error;
10562244ea07SJohn Dyson 			return 0;
10572244ea07SJohn Dyson 		}
1058ee877a35SJohn Dyson 	}
1059ee877a35SJohn Dyson 
10602244ea07SJohn Dyson 	for (cb = TAILQ_FIRST(&ki->kaio_jobqueue);
10612244ea07SJohn Dyson 		cb;
10622244ea07SJohn Dyson 		cb = TAILQ_NEXT(cb, plist)) {
10632244ea07SJohn Dyson 
10642244ea07SJohn Dyson 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
1065cb226aaaSPoul-Henning Kamp 			p->p_retval[0] = EINPROGRESS;
10662244ea07SJohn Dyson 			return 0;
10672244ea07SJohn Dyson 		}
10682244ea07SJohn Dyson 	}
10692244ea07SJohn Dyson 
10702244ea07SJohn Dyson 	/*
10712244ea07SJohn Dyson 	 * Hack for lio
10722244ea07SJohn Dyson 	 */
10732244ea07SJohn Dyson 	status = fuword(&uap->aiocbp->_aiocb_private.status);
10742244ea07SJohn Dyson 	if (status == -1) {
10752244ea07SJohn Dyson 		return fuword(&uap->aiocbp->_aiocb_private.error);
10762244ea07SJohn Dyson 	}
10772244ea07SJohn Dyson 	return EINVAL;
1078ee877a35SJohn Dyson }
1079ee877a35SJohn Dyson 
1080ee877a35SJohn Dyson int
1081cb226aaaSPoul-Henning Kamp aio_read(struct proc *p, struct aio_read_args *uap) {
1082ee877a35SJohn Dyson 	struct filedesc *fdp;
1083ee877a35SJohn Dyson 	struct file *fp;
1084ee877a35SJohn Dyson 	struct uio auio;
1085ee877a35SJohn Dyson 	struct iovec aiov;
1086ee877a35SJohn Dyson 	unsigned int fd;
1087ee877a35SJohn Dyson 	int cnt;
1088ee877a35SJohn Dyson 	struct aiocb iocb;
10892244ea07SJohn Dyson 	int error, pmodes;
1090ee877a35SJohn Dyson 
10912244ea07SJohn Dyson 	pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes);
10922244ea07SJohn Dyson 	if ((pmodes & AIO_PMODE_SYNC) == 0) {
1093a624e84fSJohn Dyson #if DEBUGAIO > 1
1094a624e84fSJohn Dyson 		if (debugaio > 2)
1095a624e84fSJohn Dyson 			printf("queueing aio_read\n");
1096a624e84fSJohn Dyson #endif
10972244ea07SJohn Dyson 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ);
10982244ea07SJohn Dyson 	}
1099ee877a35SJohn Dyson 
1100ee877a35SJohn Dyson 	/*
1101ee877a35SJohn Dyson 	 * Get control block
1102ee877a35SJohn Dyson 	 */
1103ee877a35SJohn Dyson 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
1104ee877a35SJohn Dyson 		return error;
1105ee877a35SJohn Dyson 
1106ee877a35SJohn Dyson 	/*
1107ee877a35SJohn Dyson 	 * Get the fd info for process
1108ee877a35SJohn Dyson 	 */
1109ee877a35SJohn Dyson 	fdp = p->p_fd;
1110ee877a35SJohn Dyson 
1111ee877a35SJohn Dyson 	/*
1112ee877a35SJohn Dyson 	 * Range check file descriptor
1113ee877a35SJohn Dyson 	 */
1114ee877a35SJohn Dyson 	fd = iocb.aio_fildes;
1115ee877a35SJohn Dyson 	if (fd >= fdp->fd_nfiles)
1116ee877a35SJohn Dyson 		return EBADF;
1117ee877a35SJohn Dyson 	fp = fdp->fd_ofiles[fd];
1118ee877a35SJohn Dyson 	if ((fp == NULL) || ((fp->f_flag & FREAD) == 0))
1119ee877a35SJohn Dyson 		return EBADF;
11202244ea07SJohn Dyson 	if (iocb.aio_offset == -1LL)
1121ee877a35SJohn Dyson 		return EINVAL;
1122ee877a35SJohn Dyson 
1123ee877a35SJohn Dyson 	auio.uio_resid = iocb.aio_nbytes;
1124ee877a35SJohn Dyson 	if (auio.uio_resid < 0)
1125ee877a35SJohn Dyson 		return (EINVAL);
1126ee877a35SJohn Dyson 
11272244ea07SJohn Dyson 	/*
11282244ea07SJohn Dyson 	 * Process sync simply -- queue async request.
11292244ea07SJohn Dyson 	 */
11302244ea07SJohn Dyson 	if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) {
11312244ea07SJohn Dyson 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ);
11322244ea07SJohn Dyson 	}
11332244ea07SJohn Dyson 
11342244ea07SJohn Dyson 	aiov.iov_base = iocb.aio_buf;
11352244ea07SJohn Dyson 	aiov.iov_len = iocb.aio_nbytes;
11362244ea07SJohn Dyson 
11372244ea07SJohn Dyson 	auio.uio_iov = &aiov;
11382244ea07SJohn Dyson 	auio.uio_iovcnt = 1;
11392244ea07SJohn Dyson 	auio.uio_offset = iocb.aio_offset;
1140ee877a35SJohn Dyson 	auio.uio_rw = UIO_READ;
1141ee877a35SJohn Dyson 	auio.uio_segflg = UIO_USERSPACE;
1142ee877a35SJohn Dyson 	auio.uio_procp = p;
1143ee877a35SJohn Dyson 
1144ee877a35SJohn Dyson 	cnt = iocb.aio_nbytes;
1145ee877a35SJohn Dyson 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
1146ee877a35SJohn Dyson 	if (error &&
1147ee877a35SJohn Dyson 		(auio.uio_resid != cnt) &&
1148ee877a35SJohn Dyson 		(error == ERESTART || error == EINTR || error == EWOULDBLOCK))
1149ee877a35SJohn Dyson 			error = 0;
1150ee877a35SJohn Dyson 	cnt -= auio.uio_resid;
1151cb226aaaSPoul-Henning Kamp 	p->p_retval[0] = cnt;
1152ee877a35SJohn Dyson 	return error;
1153ee877a35SJohn Dyson }
1154ee877a35SJohn Dyson 
1155ee877a35SJohn Dyson int
1156cb226aaaSPoul-Henning Kamp aio_write(struct proc *p, struct aio_write_args *uap) {
1157ee877a35SJohn Dyson 	struct filedesc *fdp;
1158ee877a35SJohn Dyson 	struct file *fp;
1159ee877a35SJohn Dyson 	struct uio auio;
1160ee877a35SJohn Dyson 	struct iovec aiov;
1161ee877a35SJohn Dyson 	unsigned int fd;
1162ee877a35SJohn Dyson 	int cnt;
1163ee877a35SJohn Dyson 	struct aiocb iocb;
1164ee877a35SJohn Dyson 	int error;
11652244ea07SJohn Dyson 	int pmodes;
11662244ea07SJohn Dyson 
11672244ea07SJohn Dyson 	/*
11682244ea07SJohn Dyson 	 * Process sync simply -- queue async request.
11692244ea07SJohn Dyson 	 */
11702244ea07SJohn Dyson 	pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes);
11712244ea07SJohn Dyson 	if ((pmodes & AIO_PMODE_SYNC) == 0) {
1172a624e84fSJohn Dyson #if DEBUGAIO > 1
1173a624e84fSJohn Dyson 		if (debugaio > 2)
1174a624e84fSJohn Dyson 			printf("queing aio_write\n");
1175a624e84fSJohn Dyson #endif
11762244ea07SJohn Dyson 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE);
11772244ea07SJohn Dyson 	}
1178ee877a35SJohn Dyson 
1179ee877a35SJohn Dyson 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
1180ee877a35SJohn Dyson 		return error;
1181ee877a35SJohn Dyson 
1182ee877a35SJohn Dyson 	/*
1183ee877a35SJohn Dyson 	 * Get the fd info for process
1184ee877a35SJohn Dyson 	 */
1185ee877a35SJohn Dyson 	fdp = p->p_fd;
1186ee877a35SJohn Dyson 
1187ee877a35SJohn Dyson 	/*
1188ee877a35SJohn Dyson 	 * Range check file descriptor
1189ee877a35SJohn Dyson 	 */
1190ee877a35SJohn Dyson 	fd = iocb.aio_fildes;
1191ee877a35SJohn Dyson 	if (fd >= fdp->fd_nfiles)
1192ee877a35SJohn Dyson 		return EBADF;
1193ee877a35SJohn Dyson 	fp = fdp->fd_ofiles[fd];
1194ee877a35SJohn Dyson 	if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0))
1195ee877a35SJohn Dyson 		return EBADF;
11962244ea07SJohn Dyson 	if (iocb.aio_offset == -1LL)
1197ee877a35SJohn Dyson 		return EINVAL;
1198ee877a35SJohn Dyson 
1199ee877a35SJohn Dyson 	aiov.iov_base = iocb.aio_buf;
1200ee877a35SJohn Dyson 	aiov.iov_len = iocb.aio_nbytes;
1201ee877a35SJohn Dyson 	auio.uio_iov = &aiov;
1202ee877a35SJohn Dyson 	auio.uio_iovcnt = 1;
1203ee877a35SJohn Dyson 	auio.uio_offset = iocb.aio_offset;
1204ee877a35SJohn Dyson 
1205ee877a35SJohn Dyson 	auio.uio_resid = iocb.aio_nbytes;
1206ee877a35SJohn Dyson 	if (auio.uio_resid < 0)
1207ee877a35SJohn Dyson 		return (EINVAL);
1208ee877a35SJohn Dyson 
1209ee877a35SJohn Dyson 	auio.uio_rw = UIO_WRITE;
1210ee877a35SJohn Dyson 	auio.uio_segflg = UIO_USERSPACE;
1211ee877a35SJohn Dyson 	auio.uio_procp = p;
1212ee877a35SJohn Dyson 
1213ee877a35SJohn Dyson 	cnt = iocb.aio_nbytes;
1214ee877a35SJohn Dyson 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
1215ee877a35SJohn Dyson 	if (error) {
1216ee877a35SJohn Dyson 		if (auio.uio_resid != cnt) {
1217ee877a35SJohn Dyson 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
1218ee877a35SJohn Dyson 				error = 0;
1219ee877a35SJohn Dyson 			if (error == EPIPE)
1220ee877a35SJohn Dyson 				psignal(p, SIGPIPE);
1221ee877a35SJohn Dyson 		}
1222ee877a35SJohn Dyson 	}
1223ee877a35SJohn Dyson 	cnt -= auio.uio_resid;
1224cb226aaaSPoul-Henning Kamp 	p->p_retval[0] = cnt;
1225ee877a35SJohn Dyson 	return error;
1226ee877a35SJohn Dyson }
1227ee877a35SJohn Dyson 
1228ee877a35SJohn Dyson int
1229cb226aaaSPoul-Henning Kamp lio_listio(struct proc *p, struct lio_listio_args *uap) {
12304a11ca4eSPoul-Henning Kamp 	int nent, nentqueued;
12312244ea07SJohn Dyson 	struct aiocb *iocb, * const *cbptr;
12322244ea07SJohn Dyson 	struct aiocblist *cb;
12332244ea07SJohn Dyson 	struct kaioinfo *ki;
12342244ea07SJohn Dyson 	int error, runningcode;
1235ee877a35SJohn Dyson 	int i;
1236ee877a35SJohn Dyson 
1237a624e84fSJohn Dyson 	if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) {
1238a624e84fSJohn Dyson #if DEBUGAIO > 0
1239a624e84fSJohn Dyson 		if (debugaio > 0)
1240a624e84fSJohn Dyson 			printf("lio_listio: bad mode: %d\n", uap->mode);
1241a624e84fSJohn Dyson #endif
1242ee877a35SJohn Dyson 		return EINVAL;
1243a624e84fSJohn Dyson 	}
12442244ea07SJohn Dyson 
12452244ea07SJohn Dyson 	nent = uap->nent;
1246a624e84fSJohn Dyson 	if (nent > AIO_LISTIO_MAX) {
1247a624e84fSJohn Dyson #if DEBUGAIO > 0
1248a624e84fSJohn Dyson 		if (debugaio > 0)
1249a624e84fSJohn Dyson 			printf("lio_listio: nent > AIO_LISTIO_MAX: %d > %d\n", nent, AIO_LISTIO_MAX);
1250a624e84fSJohn Dyson #endif
12512244ea07SJohn Dyson 		return EINVAL;
1252a624e84fSJohn Dyson 	}
12532244ea07SJohn Dyson 
12542244ea07SJohn Dyson 	if (p->p_aioinfo == NULL) {
12552244ea07SJohn Dyson 		aio_init_aioinfo(p);
12562244ea07SJohn Dyson 	}
12572244ea07SJohn Dyson 
1258a624e84fSJohn Dyson 	if ((nent + num_queue_count) > max_queue_count) {
1259a624e84fSJohn Dyson #if DEBUGAIO > 0
1260a624e84fSJohn Dyson 		if (debugaio > 0)
1261a624e84fSJohn Dyson 			printf("lio_listio: (nent(%d) + num_queue_count(%d)) > max_queue_count(%d)\n", nent, num_queue_count, max_queue_count);
1262a624e84fSJohn Dyson #endif
12632244ea07SJohn Dyson 		return EAGAIN;
1264a624e84fSJohn Dyson 	}
12652244ea07SJohn Dyson 
12662244ea07SJohn Dyson 	ki = p->p_aioinfo;
1267a624e84fSJohn Dyson 	if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) {
1268a624e84fSJohn Dyson #if DEBUGAIO > 0
1269a624e84fSJohn Dyson 		if (debugaio > 0)
1270a624e84fSJohn Dyson 			printf("lio_listio: (nent(%d) + ki->kaio_queue_count(%d)) > ki->kaio_qallowed_count(%d)\n", nent, ki->kaio_queue_count, ki->kaio_qallowed_count);
1271a624e84fSJohn Dyson #endif
12722244ea07SJohn Dyson 		return EAGAIN;
1273a624e84fSJohn Dyson 	}
12742244ea07SJohn Dyson 
12752244ea07SJohn Dyson /*
12762244ea07SJohn Dyson 	num_queue_count += nent;
12772244ea07SJohn Dyson 	ki->kaio_queue_count += nent;
1278a624e84fSJohn Dyson */
12792244ea07SJohn Dyson 	nentqueued = 0;
12802244ea07SJohn Dyson 
12812244ea07SJohn Dyson /*
12822244ea07SJohn Dyson  * get pointers to the list of I/O requests
12832244ea07SJohn Dyson 	iocbvec = malloc(uap->nent * sizeof(struct aiocb *), M_TEMP, M_WAITOK);
12842244ea07SJohn Dyson  */
12852244ea07SJohn Dyson 
12862244ea07SJohn Dyson 	cbptr = uap->acb_list;
12872244ea07SJohn Dyson 	for(i = 0; i < uap->nent; i++) {
12882244ea07SJohn Dyson 		iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
1289a624e84fSJohn Dyson 		error = _aio_aqueue(p, iocb, 0);
12902244ea07SJohn Dyson 		if (error == 0)
12912244ea07SJohn Dyson 			nentqueued++;
12922244ea07SJohn Dyson 	}
12932244ea07SJohn Dyson 
1294a624e84fSJohn Dyson 	/*
1295a624e84fSJohn Dyson 	 * If we haven't queued any, then just return error
1296a624e84fSJohn Dyson 	 */
1297a624e84fSJohn Dyson 	if (nentqueued == 0) {
1298a624e84fSJohn Dyson #if DEBUGAIO > 0
1299a624e84fSJohn Dyson 		if (debugaio > 0)
1300a624e84fSJohn Dyson 			printf("lio_listio: none queued\n");
1301a624e84fSJohn Dyson #endif
13022244ea07SJohn Dyson 		return EIO;
1303a624e84fSJohn Dyson 	}
13042244ea07SJohn Dyson 
1305a624e84fSJohn Dyson #if DEBUGAIO > 0
1306a624e84fSJohn Dyson 	if (debugaio > 0)
1307a624e84fSJohn Dyson 		printf("lio_listio: %d queued\n", nentqueued);
1308a624e84fSJohn Dyson #endif
1309a624e84fSJohn Dyson 
1310a624e84fSJohn Dyson 	/*
1311a624e84fSJohn Dyson 	 * Calculate the appropriate error return
1312a624e84fSJohn Dyson 	 */
13132244ea07SJohn Dyson 	runningcode = 0;
13142244ea07SJohn Dyson 	if (nentqueued != nent)
13152244ea07SJohn Dyson 		runningcode = EIO;
13162244ea07SJohn Dyson 
13172244ea07SJohn Dyson 	if (uap->mode == LIO_WAIT) {
13182244ea07SJohn Dyson 		while (1) {
13192244ea07SJohn Dyson 			for(i = 0; i < uap->nent; i++) {
13202244ea07SJohn Dyson 				int found;
13212244ea07SJohn Dyson 				int jobref, command, status;
13222244ea07SJohn Dyson 
1323a624e84fSJohn Dyson 				/*
1324a624e84fSJohn Dyson 				 * Fetch address of the control buf pointer in user space
1325a624e84fSJohn Dyson 				 */
13262244ea07SJohn Dyson 				iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
1327a624e84fSJohn Dyson 
1328a624e84fSJohn Dyson 				/*
1329a624e84fSJohn Dyson 				 * Fetch the associated command from user space
1330a624e84fSJohn Dyson 				 */
13312244ea07SJohn Dyson 				command = fuword(&iocb->aio_lio_opcode);
13322244ea07SJohn Dyson 				if (command == LIO_NOP)
13332244ea07SJohn Dyson 					continue;
13342244ea07SJohn Dyson 
1335a624e84fSJohn Dyson 				/*
1336a624e84fSJohn Dyson 				 * If the status shows error or complete, then skip this entry.
1337a624e84fSJohn Dyson 				 */
13382244ea07SJohn Dyson 				status = fuword(&iocb->_aiocb_private.status);
1339a624e84fSJohn Dyson 				if (status != 0)
13402244ea07SJohn Dyson 					continue;
1341a624e84fSJohn Dyson 
13422244ea07SJohn Dyson 				jobref = fuword(&iocb->_aiocb_private.kernelinfo);
13432244ea07SJohn Dyson 
13442244ea07SJohn Dyson 				found = 0;
13452244ea07SJohn Dyson 				for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
13462244ea07SJohn Dyson 					cb;
13472244ea07SJohn Dyson 					cb = TAILQ_NEXT(cb, plist)) {
13482244ea07SJohn Dyson 					if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
13492244ea07SJohn Dyson 						found++;
13502244ea07SJohn Dyson 						break;
13512244ea07SJohn Dyson 					}
13522244ea07SJohn Dyson 				}
13532244ea07SJohn Dyson 				if (found == 0)
13542244ea07SJohn Dyson 					break;
13552244ea07SJohn Dyson 			}
13562244ea07SJohn Dyson 
1357a624e84fSJohn Dyson 			/*
1358a624e84fSJohn Dyson 			 * If all I/Os have been disposed of, then we can return
1359a624e84fSJohn Dyson 			 */
13602244ea07SJohn Dyson 			if (i == uap->nent) {
13612244ea07SJohn Dyson 				return runningcode;
13622244ea07SJohn Dyson 			}
13632244ea07SJohn Dyson 
13642244ea07SJohn Dyson 			aio_marksuspend(p, 0, 0, 1);
13652244ea07SJohn Dyson 			error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0);
13662244ea07SJohn Dyson 			aio_marksuspend(p, 0, 0, 0);
13672244ea07SJohn Dyson 
13682244ea07SJohn Dyson 			if (error == EINTR) {
13692244ea07SJohn Dyson 				return EINTR;
13702244ea07SJohn Dyson 			} else if (error == EWOULDBLOCK) {
13712244ea07SJohn Dyson 				return EAGAIN;
13722244ea07SJohn Dyson 			}
13732244ea07SJohn Dyson 
13742244ea07SJohn Dyson 		}
13752244ea07SJohn Dyson 	}
13762244ea07SJohn Dyson 
13772244ea07SJohn Dyson 	return runningcode;
1378ee877a35SJohn Dyson }
1379