xref: /freebsd/sys/kern/vfs_aio.c (revision 9fb4ca060c29aa20b3184ef2e29ea0137e2ffdac)
1 /*
2  * Copyright (c) 1997 John S. Dyson.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. John S. Dyson's name may not be used to endorse or promote products
10  *    derived from this software without specific prior written permission.
11  *
12  * DISCLAIMER:  This code isn't warranted to do anything useful.  Anything
13  * bad that happens because of using this software isn't the responsibility
14  * of the author.  This software is distributed AS-IS.
15  *
16  * $Id: vfs_aio.c,v 1.7 1997/10/11 18:31:25 phk Exp $
17  */
18 
19 /*
20  * This file contains support for the POSIX.4 AIO facility.
21  *
22  * The initial version provides only the (bogus) synchronous semantics
23  * but will support async in the future.  Note that a bit
24  * in a private field allows the user mode subroutine to adapt
25  * the kernel operations to true POSIX.4 for future compatibility.
26  *
27  * This code is used to support true POSIX.4 AIO/LIO with the help
28  * of a user mode subroutine package.  Note that eventually more support
29  * will be pushed into the kernel.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/sysproto.h>
35 #include <sys/filedesc.h>
36 #include <sys/kernel.h>
37 #include <sys/fcntl.h>
38 #include <sys/file.h>
39 #include <sys/unistd.h>
40 #include <sys/vnode.h>
41 #include <sys/proc.h>
42 #include <sys/uio.h>
43 #include <sys/malloc.h>
44 #include <sys/signalvar.h>
45 #include <sys/sysctl.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_param.h>
49 #include <vm/vm_extern.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <sys/aio.h>
53 #include <sys/shm.h>
54 
55 #include <machine/cpu.h>
56 
57 static MALLOC_DEFINE(M_AIO, "AIO", "AIO structure(s)");
58 
59 #define AIOCBLIST_CANCELLED	0x1
60 #define AIOCBLIST_RUNDOWN	0x4
61 #define AIOCBLIST_ASYNCFREE	0x8
62 #define AIOCBLIST_SUSPEND	0x10
63 
64 #if 0
65 #define DEBUGAIO
66 #define DIAGNOSTIC
67 #endif
68 
69 #define DEBUGAIO 1
70 
71 static	int jobrefid;
72 
73 #define JOBST_NULL		0x0
74 #define	JOBST_JOBQPROC		0x1
75 #define JOBST_JOBQGLOBAL	0x2
76 #define JOBST_JOBRUNNING	0x3
77 #define JOBST_JOBFINISHED	0x4
78 
79 #define MAX_AIO_PER_PROC	32
80 #define MAX_AIO_QUEUE_PER_PROC	256 /* Bigger than AIO_LISTIO_MAX */
81 #define MAX_AIO_PROCS		128
82 #define	MAX_AIO_QUEUE		1024 /* Bigger than AIO_LISTIO_MAX */
83 #define TARGET_AIO_PROCS	64
84 
85 int max_aio_procs = MAX_AIO_PROCS;
86 int num_aio_procs = 0;
87 int target_aio_procs = TARGET_AIO_PROCS;
88 int max_queue_count = MAX_AIO_QUEUE;
89 int num_queue_count = 0;
90 
91 int max_aio_per_proc = MAX_AIO_PER_PROC,
92 	max_aio_queue_per_proc=MAX_AIO_QUEUE_PER_PROC;
93 
94 
95 SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt");
96 
97 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc,
98 	CTLFLAG_RW, &max_aio_per_proc, 0, "");
99 
100 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc,
101 	CTLFLAG_RW, &max_aio_queue_per_proc, 0, "");
102 
103 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs,
104 	CTLFLAG_RW, &max_aio_procs, 0, "");
105 
106 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs,
107 	CTLFLAG_RD, &num_aio_procs, 0, "");
108 
109 SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count,
110 	CTLFLAG_RD, &num_queue_count, 0, "");
111 
112 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue,
113 	CTLFLAG_RW, &max_queue_count, 0, "");
114 
115 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs,
116 	CTLFLAG_RW, &target_aio_procs, 0, "");
117 
118 #if DEBUGAIO > 0
119 static int debugaio;
120 SYSCTL_INT(_vfs_aio, OID_AUTO, debugaio, CTLFLAG_RW, &debugaio, 0, "");
121 #endif
122 
123 /*
124  * Job queue item
125  */
126 struct aiocblist {
127 	TAILQ_ENTRY (aiocblist) list;		/* List of jobs */
128 	TAILQ_ENTRY (aiocblist) plist;		/* List of jobs for proc */
129 	int	jobflags;
130 	int	jobstate;
131 	struct	proc *userproc;			/* User process */
132 	struct	aioproclist	*jobaioproc;	/* AIO process descriptor */
133 	struct	aiocb uaiocb;			/* Kernel I/O control block */
134 };
135 
136 #define AIOP_FREE	0x1			/* proc on free queue */
137 /*
138  * AIO process info
139  */
140 struct aioproclist {
141 	int aioprocflags;			/* AIO proc flags */
142 	TAILQ_ENTRY(aioproclist) list;		/* List of processes */
143 	struct proc *aioproc;			/* The AIO thread */
144 	TAILQ_HEAD (,aiocblist) jobtorun;	/* suggested job to run */
145 };
146 
147 struct kaioinfo {
148 	int	kaio_maxactive_count;	/* maximum number of AIOs */
149 	int	kaio_active_count;	/* number of currently used AIOs */
150 	int	kaio_qallowed_count;	/* maxiumu size of AIO queue */
151 	int	kaio_queue_count;	/* size of AIO queue */
152 	TAILQ_HEAD (,aiocblist)	kaio_jobqueue;	/* job queue for process */
153 	TAILQ_HEAD (,aiocblist)	kaio_jobdone;	/* done queue for process */
154 };
155 
156 TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc;
157 TAILQ_HEAD(,aiocblist) aio_jobs;			/* Async job list */
158 TAILQ_HEAD(,aiocblist) aio_freejobs;
159 
160 
161 void aio_init_aioinfo(struct proc *p) ;
162 void aio_onceonly(void *) ;
163 int aio_free_entry(struct aiocblist *aiocbe);
164 void aio_cancel_internal(struct aiocblist *aiocbe);
165 void aio_process(struct aiocblist *aiocbe);
166 void pmap_newvmspace(struct vmspace *);
167 static int aio_newproc(void) ;
168 static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ;
169 static void aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) ;
170 
171 SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL);
172 
173 
174 /*
175  * Startup initialization
176  */
177 void
178 aio_onceonly(void *na) {
179 	TAILQ_INIT(&aio_freeproc);
180 	TAILQ_INIT(&aio_activeproc);
181 	TAILQ_INIT(&aio_jobs);
182 	TAILQ_INIT(&aio_freejobs);
183 }
184 
185 /*
186  * Init the per-process aioinfo structure.
187  */
188 void
189 aio_init_aioinfo(struct proc *p) {
190 	struct kaioinfo *ki;
191 	if (p->p_aioinfo == NULL) {
192 		ki = malloc(sizeof (struct kaioinfo), M_AIO, M_WAITOK);
193 		p->p_aioinfo = ki;
194 		ki->kaio_maxactive_count = max_aio_per_proc;
195 		ki->kaio_active_count = 0;
196 		ki->kaio_qallowed_count = max_aio_queue_per_proc;
197 		ki->kaio_queue_count = 0;
198 		TAILQ_INIT(&ki->kaio_jobdone);
199 		TAILQ_INIT(&ki->kaio_jobqueue);
200 	}
201 }
202 
203 /*
204  * Free a job entry.  Wait for completion if it is currently
205  * active, but don't delay forever.  If we delay, we return
206  * a flag that says that we have to restart the queue scan.
207  */
208 int
209 aio_free_entry(struct aiocblist *aiocbe) {
210 	struct kaioinfo *ki;
211 	struct aioproclist *aiop;
212 	struct proc *p;
213 
214 	if (aiocbe->jobstate == JOBST_NULL)
215 		panic("aio_free_entry: freeing already free job");
216 
217 	p = aiocbe->userproc;
218 	ki = p->p_aioinfo;
219 	if (ki == NULL)
220 		panic("aio_free_entry: missing p->p_aioinfo");
221 
222 	if (aiocbe->jobstate == JOBST_JOBRUNNING) {
223 		if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE)
224 			return 0;
225 		aiocbe->jobflags |= AIOCBLIST_RUNDOWN;
226 		tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", 0);
227 /*
228 		if (tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", hz*5)) {
229 			aiocbe->jobflags |= AIOCBLIST_ASYNCFREE;
230 			aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN;
231 			return 1;
232 		}
233 		aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN;
234 */
235 	}
236 	aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE;
237 
238 	if (ki->kaio_queue_count <= 0)
239 		panic("aio_free_entry: process queue size <= 0");
240 	if (num_queue_count <= 0)
241 		panic("aio_free_entry: system wide queue size <= 0");
242 
243 	--ki->kaio_queue_count;
244 	--num_queue_count;
245 #if DEBUGAIO > 0
246 	if (debugaio > 0)
247 		printf("freeing entry: %d, %d\n",
248 			ki->kaio_queue_count, num_queue_count);
249 #endif
250 
251 	if ( aiocbe->jobstate == JOBST_JOBQPROC) {
252 		aiop = aiocbe->jobaioproc;
253 		TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list);
254 	} else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) {
255 		TAILQ_REMOVE(&aio_jobs, aiocbe, list);
256 	} else if ( aiocbe->jobstate == JOBST_JOBFINISHED) {
257 		ki = p->p_aioinfo;
258 		TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist);
259 	}
260 	TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
261 	aiocbe->jobstate = JOBST_NULL;
262 	return 0;
263 }
264 
265 /*
266  * Rundown the jobs for a given process.
267  */
268 void
269 aio_proc_rundown(struct proc *p) {
270 	struct kaioinfo *ki;
271 	struct aiocblist *aiocbe, *aiocbn;
272 
273 	ki = p->p_aioinfo;
274 	if (ki == NULL)
275 		return;
276 
277 	while (ki->kaio_active_count > 0) {
278 		if (tsleep(ki, PRIBIO, "kaiowt", 60 * hz))
279 			break;
280 	}
281 
282 #if DEBUGAIO > 0
283 	if (debugaio > 0)
284 		printf("Proc rundown: %d %d\n",
285 			num_queue_count, ki->kaio_queue_count);
286 #endif
287 
288 restart1:
289 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone);
290 		aiocbe;
291 		aiocbe = aiocbn) {
292 		aiocbn = TAILQ_NEXT(aiocbe, plist);
293 		if (aio_free_entry(aiocbe))
294 			goto restart1;
295 	}
296 
297 restart2:
298 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue);
299 		aiocbe;
300 		aiocbe = aiocbn) {
301 		aiocbn = TAILQ_NEXT(aiocbe, plist);
302 		if (aio_free_entry(aiocbe))
303 			goto restart2;
304 	}
305 	free(ki, M_AIO);
306 	p->p_aioinfo = NULL;
307 }
308 
309 /*
310  * Select a job to run (called by an AIO daemon)
311  */
312 static struct aiocblist *
313 aio_selectjob(struct aioproclist *aiop) {
314 
315 	struct aiocblist *aiocbe;
316 
317 	aiocbe = TAILQ_FIRST(&aiop->jobtorun);
318 	if (aiocbe) {
319 		TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list);
320 		return aiocbe;
321 	}
322 
323 	for (aiocbe = TAILQ_FIRST(&aio_jobs);
324 		aiocbe;
325 		aiocbe = TAILQ_NEXT(aiocbe, list)) {
326 		struct kaioinfo *ki;
327 		struct proc *userp;
328 
329 		userp = aiocbe->userproc;
330 		ki = userp->p_aioinfo;
331 
332 		if (ki->kaio_active_count < ki->kaio_maxactive_count) {
333 			TAILQ_REMOVE(&aio_jobs, aiocbe, list);
334 			return aiocbe;
335 		}
336 	}
337 
338 	return NULL;
339 }
340 
341 /*
342  * The AIO activity proper.
343  */
344 void
345 aio_process(struct aiocblist *aiocbe) {
346 	struct filedesc *fdp;
347 	struct proc *userp;
348 	struct aiocb *cb;
349 	struct file *fp;
350 	struct uio auio;
351 	struct iovec aiov;
352 	unsigned int fd;
353 	int cnt;
354 	int error;
355 	off_t offset;
356 
357 	userp = aiocbe->userproc;
358 	cb = &aiocbe->uaiocb;
359 
360 #if DEBUGAIO > 0
361 	if (debugaio > 1)
362 		printf("AIO %s, fd: %d, offset: 0x%x, address: 0x%x, size: %d\n",
363 			cb->aio_lio_opcode == LIO_READ?"Read":"Write",
364 			cb->aio_fildes, (int) cb->aio_offset,
365 				cb->aio_buf, cb->aio_nbytes);
366 #endif
367 #if SLOW
368 	tsleep(curproc, PVM, "aioprc", hz);
369 #endif
370 	fdp = curproc->p_fd;
371 	/*
372 	 * Range check file descriptor
373 	 */
374 	fd = cb->aio_fildes;
375 	fp = fdp->fd_ofiles[fd];
376 
377 	aiov.iov_base = cb->aio_buf;
378 	aiov.iov_len = cb->aio_nbytes;
379 
380 	auio.uio_iov = &aiov;
381 	auio.uio_iovcnt = 1;
382 	auio.uio_offset = offset = cb->aio_offset;
383 	auio.uio_resid = cb->aio_nbytes;
384 	cnt = cb->aio_nbytes;
385 	auio.uio_segflg = UIO_USERSPACE;
386 	auio.uio_procp = curproc;
387 
388 	if (cb->aio_lio_opcode == LIO_READ) {
389 		auio.uio_rw = UIO_READ;
390 		error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
391 	} else {
392 		auio.uio_rw = UIO_WRITE;
393 		error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
394 	}
395 
396 	if (error) {
397 		if (auio.uio_resid != cnt) {
398 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
399 				error = 0;
400 			if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE))
401 				psignal(userp, SIGPIPE);
402 		}
403 	}
404 #if DEBUGAIO > 0
405 	if (debugaio > 1)
406 		printf("%s complete: error: %d, status: %d, nio: %d, resid: %d, offset: %d\n",
407 	cb->aio_lio_opcode == LIO_READ?"Read":"Write",
408 error, cnt, cnt - auio.uio_resid, auio.uio_resid, (int) offset & 0xffffffff);
409 #endif
410 
411 	cnt -= auio.uio_resid;
412 	cb->_aiocb_private.error = error;
413 	cb->_aiocb_private.status = cnt;
414 
415 	return;
416 
417 }
418 
419 /*
420  * The AIO daemon.
421  */
422 static void
423 aio_startproc(void *uproc)
424 {
425 	struct aioproclist *aiop;
426 
427 	/*
428 	 * Allocate and ready the aio control info
429 	 */
430 	aiop = malloc(sizeof *aiop, M_AIO, M_WAITOK);
431 	aiop->aioproc = curproc;
432 	aiop->aioprocflags |= AIOP_FREE;
433 	TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
434 	TAILQ_INIT(&aiop->jobtorun);
435 
436 	/*
437 	 * Get rid of current address space
438 	 */
439 	if (curproc->p_vmspace->vm_refcnt == 1) {
440 		if (curproc->p_vmspace->vm_shm)
441 			shmexit(curproc);
442 		pmap_remove_pages(&curproc->p_vmspace->vm_pmap, 0, USRSTACK);
443 		vm_map_remove(&curproc->p_vmspace->vm_map, 0, USRSTACK);
444 	} else {
445 		vmspace_exec(curproc);
446 	}
447 
448 	/*
449 	 * Make up a name for the daemon
450 	 */
451 	strcpy(curproc->p_comm, "aiodaemon");
452 
453 	/*
454 	 * Get rid of our current filedescriptors
455 	 */
456 	fdfree(curproc);
457 	curproc->p_fd = NULL;
458 	curproc->p_ucred = crcopy(curproc->p_ucred);
459 	curproc->p_ucred->cr_uid = 0;
460 	curproc->p_ucred->cr_groups[0] = 1;
461 	curproc->p_flag |= P_SYSTEM;
462 
463 #if DEBUGAIO > 0
464 	if (debugaio > 2)
465 		printf("Started new process: %d\n", curproc->p_pid);
466 #endif
467 	wakeup(&aio_freeproc);
468 
469 	while(1) {
470 		struct vmspace *myvm, *tmpvm;
471 		struct proc *cp = curproc;
472 		struct proc *up = NULL;
473 		struct	aiocblist *aiocbe;
474 
475 		if ((aiop->aioprocflags & AIOP_FREE) == 0) {
476 			TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
477 			aiop->aioprocflags |= AIOP_FREE;
478 		}
479 		if (tsleep(cp, PRIBIO, "aiordy", hz*30)) {
480 			if ((num_aio_procs > target_aio_procs) &&
481 				(TAILQ_FIRST(&aiop->jobtorun) == NULL))
482 				exit1(curproc, 0);
483 		}
484 
485 		if (aiop->aioprocflags & AIOP_FREE) {
486 			TAILQ_REMOVE(&aio_freeproc, aiop, list);
487 			TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list);
488 			aiop->aioprocflags &= ~AIOP_FREE;
489 		}
490 
491 		myvm = curproc->p_vmspace;
492 
493 		while ( aiocbe = aio_selectjob(aiop)) {
494 			struct aiocb *cb;
495 			struct kaioinfo *ki;
496 			struct proc *userp;
497 
498 			cb = &aiocbe->uaiocb;
499 			userp = aiocbe->userproc;
500 			ki = userp->p_aioinfo;
501 
502 			aiocbe->jobstate = JOBST_JOBRUNNING;
503 			if (userp != cp) {
504 				tmpvm = curproc->p_vmspace;
505 				curproc->p_vmspace = userp->p_vmspace;
506 				++curproc->p_vmspace->vm_refcnt;
507 				pmap_activate(curproc);
508 				if (tmpvm != myvm) {
509 					vmspace_free(tmpvm);
510 				}
511 				if (curproc->p_fd)
512 					fdfree(curproc);
513 				curproc->p_fd = fdshare(userp);
514 				cp = userp;
515 			}
516 
517 			ki->kaio_active_count++;
518 #if DEBUGAIO > 0
519 			if (debugaio > 0)
520 				printf("process: pid: %d(%d), active: %d, queue: %d\n",
521 					cb->_aiocb_private.kernelinfo,
522 					userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count);
523 #endif
524 			aiocbe->jobaioproc = aiop;
525 			aio_process(aiocbe);
526 			--ki->kaio_active_count;
527 			if (ki->kaio_active_count == 0)
528 				wakeup(ki);
529 #if DEBUGAIO > 0
530 			if (debugaio > 0)
531 				printf("DONE process: pid: %d(%d), active: %d, queue: %d\n",
532 					cb->_aiocb_private.kernelinfo,
533 					userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count);
534 #endif
535 
536 			aiocbe->jobstate = JOBST_JOBFINISHED;
537 
538 			if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) {
539 				aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE;
540 				TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
541 			} else {
542 				TAILQ_REMOVE(&ki->kaio_jobqueue,
543 					aiocbe, plist);
544 				TAILQ_INSERT_TAIL(&ki->kaio_jobdone,
545 					aiocbe, plist);
546 			}
547 
548 			if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) {
549 				wakeup(aiocbe);
550 				aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN;
551 			}
552 
553 			if (aiocbe->jobflags & AIOCBLIST_SUSPEND) {
554 				wakeup(userp);
555 				aiocbe->jobflags &= ~AIOCBLIST_SUSPEND;
556 			}
557 
558 			if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) {
559 				psignal(userp, cb->aio_sigevent.sigev_signo);
560 			}
561 		}
562 
563 		if (cp != curproc) {
564 			tmpvm = curproc->p_vmspace;
565 			curproc->p_vmspace = myvm;
566 			pmap_activate(curproc);
567 			vmspace_free(tmpvm);
568 			if (curproc->p_fd)
569 				fdfree(curproc);
570 			curproc->p_fd = NULL;
571 			cp = curproc;
572 		}
573 	}
574 }
575 
576 /*
577  * Create a new AIO daemon.
578  */
579 static int
580 aio_newproc() {
581 	int error;
582 	int rval[2];
583 	struct rfork_args rfa;
584 	struct proc *p;
585 
586 	rfa.flags = RFMEM | RFPROC | RFCFDG;
587 
588 	if (error = rfork(curproc, &rfa, &rval[0]))
589 		return error;
590 
591 	cpu_set_fork_handler(p = pfind(rval[0]), aio_startproc, curproc);
592 
593 #if DEBUGAIO > 0
594 	if (debugaio > 2)
595 		printf("Waiting for new process: %d, count: %d\n",
596 			curproc->p_pid, num_aio_procs);
597 #endif
598 
599 	error = tsleep(&aio_freeproc, PZERO, "aiosta", 5*hz);
600 	++num_aio_procs;
601 
602 	return error;
603 
604 }
605 
606 /*
607  * Queue a new AIO request.
608  */
609 static int
610 _aio_aqueue(struct proc *p, struct aiocb *job, int type) {
611 	struct filedesc *fdp;
612 	struct file *fp;
613 	unsigned int fd;
614 
615 	int error;
616 	int opcode;
617 	struct aiocblist *aiocbe;
618 	struct aioproclist *aiop;
619 	struct kaioinfo *ki;
620 
621 	if (aiocbe = TAILQ_FIRST(&aio_freejobs)) {
622 		TAILQ_REMOVE(&aio_freejobs, aiocbe, list);
623 	} else {
624 		aiocbe = malloc (sizeof *aiocbe, M_AIO, M_WAITOK);
625 	}
626 
627 	error = copyin((caddr_t)job,
628 		(caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb);
629 	if (error) {
630 #if DEBUGAIO > 0
631 		if (debugaio > 0)
632 			printf("aio_aqueue: Copyin error: %d\n", error);
633 #endif
634 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
635 		return error;
636 	}
637 
638 	/*
639 	 * Get the opcode
640 	 */
641 	if (type != LIO_NOP) {
642 		aiocbe->uaiocb.aio_lio_opcode = type;
643 	}
644 	opcode = aiocbe->uaiocb.aio_lio_opcode;
645 
646 	/*
647 	 * Get the fd info for process
648 	 */
649 	fdp = p->p_fd;
650 
651 	/*
652 	 * Range check file descriptor
653 	 */
654 	fd = aiocbe->uaiocb.aio_fildes;
655 	if (fd >= fdp->fd_nfiles) {
656 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
657 		if (type == 0) {
658 #if DEBUGAIO > 0
659 			if (debugaio > 0)
660 				printf("aio_aqueue: Null type\n");
661 #endif
662 			suword(&job->_aiocb_private.status, -1);
663 			suword(&job->_aiocb_private.error, EBADF);
664 		}
665 		return EBADF;
666 	}
667 
668 #if DEBUGAIO > 0
669 	if (debugaio > 3)
670 		printf("aio_aqueue: fd: %d, cmd: %d, buf: %d, cnt: %d, fileoffset: %d\n",
671 			aiocbe->uaiocb.aio_fildes,
672 			aiocbe->uaiocb.aio_lio_opcode,
673 			(int) aiocbe->uaiocb.aio_buf & 0xffffffff,
674 			aiocbe->uaiocb.aio_nbytes,
675 			(int) aiocbe->uaiocb.aio_offset & 0xffffffff);
676 #endif
677 
678 
679 	fp = fdp->fd_ofiles[fd];
680 	if ((fp == NULL) ||
681 		((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 0))) {
682 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
683 		if (type == 0) {
684 			suword(&job->_aiocb_private.status, -1);
685 			suword(&job->_aiocb_private.error, EBADF);
686 		}
687 #if DEBUGAIO > 0
688 		if (debugaio > 0)
689 			printf("aio_aqueue: Bad file descriptor\n");
690 #endif
691 		return EBADF;
692 	}
693 
694 	if (aiocbe->uaiocb.aio_offset == -1LL) {
695 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
696 		if (type == 0) {
697 			suword(&job->_aiocb_private.status, -1);
698 			suword(&job->_aiocb_private.error, EINVAL);
699 		}
700 #if DEBUGAIO > 0
701 		if (debugaio > 0)
702 			printf("aio_aqueue: bad offset\n");
703 #endif
704 		return EINVAL;
705 	}
706 
707 #if DEBUGAIO > 0
708 	if (debugaio > 2)
709 		printf("job addr: 0x%x, 0x%x, %d\n", job, &job->_aiocb_private.kernelinfo, jobrefid);
710 #endif
711 
712 	error = suword(&job->_aiocb_private.kernelinfo, jobrefid);
713 	if (error) {
714 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
715 		if (type == 0) {
716 			suword(&job->_aiocb_private.status, -1);
717 			suword(&job->_aiocb_private.error, EINVAL);
718 		}
719 #if DEBUGAIO > 0
720 		if (debugaio > 0)
721 			printf("aio_aqueue: fetch of kernelinfo from user space\n");
722 #endif
723 		return error;
724 	}
725 
726 	aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)jobrefid;
727 #if DEBUGAIO > 0
728 	if (debugaio > 2)
729 		printf("aio_aqueue: New job: %d...  ", jobrefid);
730 #endif
731 	++jobrefid;
732 
733 	if (opcode == LIO_NOP) {
734 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
735 		if (type == 0) {
736 			suword(&job->_aiocb_private.status, -1);
737 			suword(&job->_aiocb_private.error, 0);
738 		}
739 		return 0;
740 	}
741 
742 	if ((opcode != LIO_NOP) &&
743 		(opcode != LIO_READ) && (opcode != LIO_WRITE)) {
744 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
745 		if (type == 0) {
746 			suword(&job->_aiocb_private.status, -1);
747 			suword(&job->_aiocb_private.error, EINVAL);
748 		}
749 #if DEBUGAIO > 0
750 		if (debugaio > 0)
751 			printf("aio_aqueue: invalid LIO op: %d\n", opcode);
752 #endif
753 		return EINVAL;
754 	}
755 
756 	suword(&job->_aiocb_private.error, 0);
757 	suword(&job->_aiocb_private.status, 0);
758 	aiocbe->userproc = p;
759 	aiocbe->jobflags = 0;
760 	ki = p->p_aioinfo;
761 	++num_queue_count;
762 	++ki->kaio_queue_count;
763 
764 retryproc:
765 	if (aiop = TAILQ_FIRST(&aio_freeproc)) {
766 #if DEBUGAIO > 0
767 		if (debugaio > 0)
768 			printf("found a free AIO process\n");
769 #endif
770 		TAILQ_REMOVE(&aio_freeproc, aiop, list);
771 		TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list);
772 		aiop->aioprocflags &= ~AIOP_FREE;
773 		TAILQ_INSERT_TAIL(&aiop->jobtorun, aiocbe, list);
774 		TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist);
775 		aiocbe->jobstate = JOBST_JOBQPROC;
776 
777 		aiocbe->jobaioproc = aiop;
778 		wakeup(aiop->aioproc);
779 	} else if ((num_aio_procs < max_aio_procs) &&
780 			(ki->kaio_active_count < ki->kaio_maxactive_count)) {
781 #if DEBUGAIO > 0
782 		if (debugaio > 1) {
783 			printf("aio_aqueue: starting new proc: num_aio_procs(%d), max_aio_procs(%d)\n", num_aio_procs, max_aio_procs);
784 			printf("            ki->kaio_active_count(%d), ki->kaio_maxactive_count(%d)\n", ki->kaio_active_count, ki->kaio_maxactive_count);
785 		}
786 #endif
787 		if (error = aio_newproc()) {
788 #if DEBUGAIO > 0
789 			if (debugaio > 0)
790 				printf("aio_aqueue: problem sleeping for starting proc: %d\n",
791 					error);
792 #endif
793 		}
794 		goto retryproc;
795 	} else {
796 #if DEBUGAIO > 0
797 		if (debugaio > 0)
798 			printf("queuing to global queue\n");
799 #endif
800 		TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list);
801 		TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist);
802 		aiocbe->jobstate = JOBST_JOBQGLOBAL;
803 	}
804 
805 	return 0;
806 }
807 
808 static int
809 aio_aqueue(struct proc *p, struct aiocb *job, int type) {
810 	struct kaioinfo *ki;
811 
812 	if (p->p_aioinfo == NULL) {
813 		aio_init_aioinfo(p);
814 	}
815 
816 	if (num_queue_count >= max_queue_count)
817 		return EAGAIN;
818 
819 	ki = p->p_aioinfo;
820 	if (ki->kaio_queue_count >= ki->kaio_qallowed_count)
821 		return EAGAIN;
822 
823 	return _aio_aqueue(p, job, type);
824 }
825 
826 /*
827  * Support the aio_return system call
828  */
829 int
830 aio_return(struct proc *p, struct aio_return_args *uap, int *retval) {
831 	int jobref, status;
832 	struct aiocblist *cb;
833 	struct kaioinfo *ki;
834 	struct proc *userp;
835 
836 	ki = p->p_aioinfo;
837 	if (ki == NULL) {
838 		return EINVAL;
839 	}
840 
841 	jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo);
842 	if (jobref == -1)
843 		return EINVAL;
844 
845 #if DEBUGAIO > 0
846 	if (debugaio > 0)
847 		printf("aio_return: jobref: %d\n", jobref);
848 #endif
849 
850 
851 	for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
852 		cb;
853 		cb = TAILQ_NEXT(cb, plist)) {
854 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
855 			retval[0] = cb->uaiocb._aiocb_private.status;
856 			aio_free_entry(cb);
857 			return 0;
858 		}
859 	}
860 
861 	status = fuword(&uap->aiocbp->_aiocb_private.status);
862 	if (status == -1)
863 		return 0;
864 
865 	return (EINVAL);
866 }
867 
868 /*
869  * Rundown the jobs for a given process.
870  */
871 void
872 aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) {
873 	struct aiocblist *aiocbe;
874 	struct kaioinfo *ki;
875 
876 	ki = p->p_aioinfo;
877 	if (ki == NULL)
878 		return;
879 
880 	for (aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue);
881 		aiocbe;
882 		aiocbe = TAILQ_NEXT(aiocbe, plist)) {
883 
884 		if (njobs) {
885 
886 			int i;
887 
888 			for(i = 0; i < njobs; i++) {
889 				if (((int) aiocbe->uaiocb._aiocb_private.kernelinfo) == joblist[i])
890 					break;
891 			}
892 
893 			if (i == njobs)
894 				continue;
895 		}
896 
897 		if (set)
898 			aiocbe->jobflags |= AIOCBLIST_SUSPEND;
899 		else
900 			aiocbe->jobflags &= ~AIOCBLIST_SUSPEND;
901 	}
902 }
903 
904 /*
905  * Allow a process to wakeup when any of the I/O requests are
906  * completed.
907  */
908 int
909 aio_suspend(struct proc *p, struct aio_suspend_args *uap, int *retval) {
910 	struct timeval atv, utv;
911 	struct timespec ts;
912 	struct aiocb *const *cbptr, *cbp;
913 	struct kaioinfo *ki;
914 	struct aiocblist *cb;
915 	int i;
916 	int error, s, timo;
917 	int *joblist;
918 
919 
920 	timo = 0;
921 	if (uap->timeout) {
922 		/*
923 		 * Get timespec struct
924 		 */
925 		if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) {
926 			return error;
927 		}
928 
929 		if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
930 			return (EINVAL);
931 
932 		TIMESPEC_TO_TIMEVAL(&atv, &ts)
933 		if (itimerfix(&atv))
934 			return (EINVAL);
935 		/*
936 		 * XXX this is not as careful as settimeofday() about minimising
937 		 * interrupt latency.  The hzto() interface is inconvenient as usual.
938 		 */
939 		s = splclock();
940 		timevaladd(&atv, &time);
941 		timo = hzto(&atv);
942 		splx(s);
943 		if (timo == 0)
944 			timo = 1;
945 	}
946 
947 	ki = p->p_aioinfo;
948 	if (ki == NULL)
949 		return EAGAIN;
950 
951 	joblist = malloc(uap->nent * sizeof(int), M_TEMP, M_WAITOK);
952 	cbptr = uap->aiocbp;
953 
954 	for(i=0;i<uap->nent;i++) {
955 		cbp = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
956 #if DEBUGAIO > 1
957 		if (debugaio > 2)
958 			printf("cbp: %x\n", cbp);
959 #endif
960 		joblist[i] = fuword(&cbp->_aiocb_private.kernelinfo);
961 		cbptr++;
962 	}
963 
964 
965 	while (1) {
966 		for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
967 			cb;
968 			cb = TAILQ_NEXT(cb, plist)) {
969 			for(i=0;i<uap->nent;i++) {
970 				if (((int) cb->uaiocb._aiocb_private.kernelinfo) == joblist[i]) {
971 					free(joblist, M_TEMP);
972 					return 0;
973 				}
974 			}
975 		}
976 
977 #if DEBUGAIO > 0
978 	if (debugaio > 0) {
979 		printf("Suspend, timeout: %d clocks, jobs:", timo);
980 		for(i=0;i<uap->nent;i++)
981 			printf(" %d", joblist[i]);
982 		printf("\n");
983 	}
984 #endif
985 
986 		aio_marksuspend(p, uap->nent, joblist, 1);
987 #if DEBUGAIO > 0
988 		if (debugaio > 2) {
989 			printf("Suspending -- waiting for all I/O's to complete: ");
990 			for(i=0;i<uap->nent;i++)
991 				printf(" %d", joblist[i]);
992 			printf("\n");
993 		}
994 #endif
995 		error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo);
996 		aio_marksuspend(p, uap->nent, joblist, 0);
997 
998 		if (error == EINTR) {
999 #if DEBUGAIO > 0
1000 			if (debugaio > 2)
1001 				printf(" signal\n");
1002 #endif
1003 			free(joblist, M_TEMP);
1004 			return EINTR;
1005 		} else if (error == EWOULDBLOCK) {
1006 #if DEBUGAIO > 0
1007 			if (debugaio > 2)
1008 				printf(" timeout\n");
1009 #endif
1010 			free(joblist, M_TEMP);
1011 			return EAGAIN;
1012 		}
1013 #if DEBUGAIO > 0
1014 		if (debugaio > 2)
1015 			printf("\n");
1016 #endif
1017 	}
1018 
1019 /* NOTREACHED */
1020 	return EINVAL;
1021 }
1022 
1023 /*
1024  * aio_cancel at the kernel level is a NOOP right now.  It
1025  * might be possible to support it partially in user mode, or
1026  * in kernel mode later on.
1027  */
1028 int
1029 aio_cancel(struct proc *p, struct aio_cancel_args *uap, int *retval) {
1030 	return AIO_NOTCANCELLED;
1031 }
1032 
1033 /*
1034  * aio_error is implemented in the kernel level for compatibility
1035  * purposes only.  For a user mode async implementation, it would be
1036  * best to do it in a userland subroutine.
1037  */
1038 int
1039 aio_error(struct proc *p, struct aio_error_args *uap, int *retval) {
1040 	int activeflag, errorcode;
1041 	struct aiocblist *cb;
1042 	struct kaioinfo *ki;
1043 	int jobref;
1044 	int error, status;
1045 
1046 	ki = p->p_aioinfo;
1047 	if (ki == NULL)
1048 		return EINVAL;
1049 
1050 	jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo);
1051 	if (jobref == -1)
1052 		return EFAULT;
1053 
1054 	for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
1055 		cb;
1056 		cb = TAILQ_NEXT(cb, plist)) {
1057 
1058 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
1059 			retval[0] = cb->uaiocb._aiocb_private.error;
1060 			return 0;
1061 		}
1062 	}
1063 
1064 	for (cb = TAILQ_FIRST(&ki->kaio_jobqueue);
1065 		cb;
1066 		cb = TAILQ_NEXT(cb, plist)) {
1067 
1068 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
1069 			retval[0] = EINPROGRESS;
1070 			return 0;
1071 		}
1072 	}
1073 
1074 	/*
1075 	 * Hack for lio
1076 	 */
1077 	status = fuword(&uap->aiocbp->_aiocb_private.status);
1078 	if (status == -1) {
1079 		return fuword(&uap->aiocbp->_aiocb_private.error);
1080 	}
1081 	return EINVAL;
1082 }
1083 
1084 int
1085 aio_read(struct proc *p, struct aio_read_args *uap, int *retval) {
1086 	struct filedesc *fdp;
1087 	struct file *fp;
1088 	struct uio auio;
1089 	struct iovec aiov;
1090 	unsigned int fd;
1091 	int cnt;
1092 	struct aiocb iocb;
1093 	int error, pmodes;
1094 
1095 	pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes);
1096 	if ((pmodes & AIO_PMODE_SYNC) == 0) {
1097 #if DEBUGAIO > 1
1098 		if (debugaio > 2)
1099 			printf("queueing aio_read\n");
1100 #endif
1101 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ);
1102 	}
1103 
1104 	/*
1105 	 * Get control block
1106 	 */
1107 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
1108 		return error;
1109 
1110 	/*
1111 	 * Get the fd info for process
1112 	 */
1113 	fdp = p->p_fd;
1114 
1115 	/*
1116 	 * Range check file descriptor
1117 	 */
1118 	fd = iocb.aio_fildes;
1119 	if (fd >= fdp->fd_nfiles)
1120 		return EBADF;
1121 	fp = fdp->fd_ofiles[fd];
1122 	if ((fp == NULL) || ((fp->f_flag & FREAD) == 0))
1123 		return EBADF;
1124 	if (iocb.aio_offset == -1LL)
1125 		return EINVAL;
1126 
1127 	auio.uio_resid = iocb.aio_nbytes;
1128 	if (auio.uio_resid < 0)
1129 		return (EINVAL);
1130 
1131 	/*
1132 	 * Process sync simply -- queue async request.
1133 	 */
1134 	if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) {
1135 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ);
1136 	}
1137 
1138 	aiov.iov_base = iocb.aio_buf;
1139 	aiov.iov_len = iocb.aio_nbytes;
1140 
1141 	auio.uio_iov = &aiov;
1142 	auio.uio_iovcnt = 1;
1143 	auio.uio_offset = iocb.aio_offset;
1144 	auio.uio_rw = UIO_READ;
1145 	auio.uio_segflg = UIO_USERSPACE;
1146 	auio.uio_procp = p;
1147 
1148 	cnt = iocb.aio_nbytes;
1149 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
1150 	if (error &&
1151 		(auio.uio_resid != cnt) &&
1152 		(error == ERESTART || error == EINTR || error == EWOULDBLOCK))
1153 			error = 0;
1154 	cnt -= auio.uio_resid;
1155 	*retval = cnt;
1156 	return error;
1157 }
1158 
1159 int
1160 aio_write(struct proc *p, struct aio_write_args *uap, int *retval) {
1161 	struct filedesc *fdp;
1162 	struct file *fp;
1163 	struct uio auio;
1164 	struct iovec aiov;
1165 	unsigned int fd;
1166 	int cnt;
1167 	struct aiocb iocb;
1168 	int error;
1169 	int pmodes;
1170 
1171 	/*
1172 	 * Process sync simply -- queue async request.
1173 	 */
1174 	pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes);
1175 	if ((pmodes & AIO_PMODE_SYNC) == 0) {
1176 #if DEBUGAIO > 1
1177 		if (debugaio > 2)
1178 			printf("queing aio_write\n");
1179 #endif
1180 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE);
1181 	}
1182 
1183 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
1184 		return error;
1185 
1186 	/*
1187 	 * Get the fd info for process
1188 	 */
1189 	fdp = p->p_fd;
1190 
1191 	/*
1192 	 * Range check file descriptor
1193 	 */
1194 	fd = iocb.aio_fildes;
1195 	if (fd >= fdp->fd_nfiles)
1196 		return EBADF;
1197 	fp = fdp->fd_ofiles[fd];
1198 	if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0))
1199 		return EBADF;
1200 	if (iocb.aio_offset == -1LL)
1201 		return EINVAL;
1202 
1203 	aiov.iov_base = iocb.aio_buf;
1204 	aiov.iov_len = iocb.aio_nbytes;
1205 	auio.uio_iov = &aiov;
1206 	auio.uio_iovcnt = 1;
1207 	auio.uio_offset = iocb.aio_offset;
1208 
1209 	auio.uio_resid = iocb.aio_nbytes;
1210 	if (auio.uio_resid < 0)
1211 		return (EINVAL);
1212 
1213 	auio.uio_rw = UIO_WRITE;
1214 	auio.uio_segflg = UIO_USERSPACE;
1215 	auio.uio_procp = p;
1216 
1217 	cnt = iocb.aio_nbytes;
1218 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
1219 	if (error) {
1220 		if (auio.uio_resid != cnt) {
1221 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
1222 				error = 0;
1223 			if (error == EPIPE)
1224 				psignal(p, SIGPIPE);
1225 		}
1226 	}
1227 	cnt -= auio.uio_resid;
1228 	*retval = cnt;
1229 	return error;
1230 }
1231 
1232 int
1233 lio_listio(struct proc *p, struct lio_listio_args *uap, int *retval) {
1234 	int cnt, nent, nentqueued;
1235 	struct aiocb *iocb, * const *cbptr;
1236 	struct aiocblist *cb;
1237 	struct kaioinfo *ki;
1238 	int error, runningcode;
1239 	int i;
1240 
1241 	if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) {
1242 #if DEBUGAIO > 0
1243 		if (debugaio > 0)
1244 			printf("lio_listio: bad mode: %d\n", uap->mode);
1245 #endif
1246 		return EINVAL;
1247 	}
1248 
1249 	nent = uap->nent;
1250 	if (nent > AIO_LISTIO_MAX) {
1251 #if DEBUGAIO > 0
1252 		if (debugaio > 0)
1253 			printf("lio_listio: nent > AIO_LISTIO_MAX: %d > %d\n", nent, AIO_LISTIO_MAX);
1254 #endif
1255 		return EINVAL;
1256 	}
1257 
1258 	if (p->p_aioinfo == NULL) {
1259 		aio_init_aioinfo(p);
1260 	}
1261 
1262 	if ((nent + num_queue_count) > max_queue_count) {
1263 #if DEBUGAIO > 0
1264 		if (debugaio > 0)
1265 			printf("lio_listio: (nent(%d) + num_queue_count(%d)) > max_queue_count(%d)\n", nent, num_queue_count, max_queue_count);
1266 #endif
1267 		return EAGAIN;
1268 	}
1269 
1270 	ki = p->p_aioinfo;
1271 	if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) {
1272 #if DEBUGAIO > 0
1273 		if (debugaio > 0)
1274 			printf("lio_listio: (nent(%d) + ki->kaio_queue_count(%d)) > ki->kaio_qallowed_count(%d)\n", nent, ki->kaio_queue_count, ki->kaio_qallowed_count);
1275 #endif
1276 		return EAGAIN;
1277 	}
1278 
1279 /*
1280 	num_queue_count += nent;
1281 	ki->kaio_queue_count += nent;
1282 */
1283 	nentqueued = 0;
1284 
1285 /*
1286  * get pointers to the list of I/O requests
1287 	iocbvec = malloc(uap->nent * sizeof(struct aiocb *), M_TEMP, M_WAITOK);
1288  */
1289 
1290 	cbptr = uap->acb_list;
1291 	for(i = 0; i < uap->nent; i++) {
1292 		iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
1293 		error = _aio_aqueue(p, iocb, 0);
1294 		if (error == 0)
1295 			nentqueued++;
1296 	}
1297 
1298 	/*
1299 	 * If we haven't queued any, then just return error
1300 	 */
1301 	if (nentqueued == 0) {
1302 #if DEBUGAIO > 0
1303 		if (debugaio > 0)
1304 			printf("lio_listio: none queued\n");
1305 #endif
1306 		return EIO;
1307 	}
1308 
1309 #if DEBUGAIO > 0
1310 	if (debugaio > 0)
1311 		printf("lio_listio: %d queued\n", nentqueued);
1312 #endif
1313 
1314 	/*
1315 	 * Calculate the appropriate error return
1316 	 */
1317 	runningcode = 0;
1318 	if (nentqueued != nent)
1319 		runningcode = EIO;
1320 
1321 	if (uap->mode == LIO_WAIT) {
1322 		while (1) {
1323 			for(i = 0; i < uap->nent; i++) {
1324 				int found;
1325 				int jobref, command, status;
1326 
1327 				/*
1328 				 * Fetch address of the control buf pointer in user space
1329 				 */
1330 				iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
1331 
1332 				/*
1333 				 * Fetch the associated command from user space
1334 				 */
1335 				command = fuword(&iocb->aio_lio_opcode);
1336 				if (command == LIO_NOP)
1337 					continue;
1338 
1339 				/*
1340 				 * If the status shows error or complete, then skip this entry.
1341 				 */
1342 				status = fuword(&iocb->_aiocb_private.status);
1343 				if (status != 0)
1344 					continue;
1345 
1346 				jobref = fuword(&iocb->_aiocb_private.kernelinfo);
1347 
1348 				found = 0;
1349 				for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
1350 					cb;
1351 					cb = TAILQ_NEXT(cb, plist)) {
1352 					if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
1353 						found++;
1354 						break;
1355 					}
1356 				}
1357 				if (found == 0)
1358 					break;
1359 			}
1360 
1361 			/*
1362 			 * If all I/Os have been disposed of, then we can return
1363 			 */
1364 			if (i == uap->nent) {
1365 				return runningcode;
1366 			}
1367 
1368 			aio_marksuspend(p, 0, 0, 1);
1369 			error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0);
1370 			aio_marksuspend(p, 0, 0, 0);
1371 
1372 			if (error == EINTR) {
1373 				return EINTR;
1374 			} else if (error == EWOULDBLOCK) {
1375 				return EAGAIN;
1376 			}
1377 
1378 		}
1379 	}
1380 
1381 	return runningcode;
1382 }
1383