xref: /illumos-gate/usr/src/uts/common/os/aio.c (revision 524e558aae3e99de2bdab73592f925ea489fbe07)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Kernel asynchronous I/O.
31  * This is only for raw devices now (as of Nov. 1993).
32  */
33 
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/conf.h>
37 #include <sys/file.h>
38 #include <sys/fs/snode.h>
39 #include <sys/unistd.h>
40 #include <sys/cmn_err.h>
41 #include <vm/as.h>
42 #include <vm/faultcode.h>
43 #include <sys/sysmacros.h>
44 #include <sys/procfs.h>
45 #include <sys/kmem.h>
46 #include <sys/autoconf.h>
47 #include <sys/ddi_impldefs.h>
48 #include <sys/sunddi.h>
49 #include <sys/aio_impl.h>
50 #include <sys/debug.h>
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/vmsystm.h>
54 #include <sys/fs/pxfs_ki.h>
55 #include <sys/contract/process_impl.h>
56 
57 /*
58  * external entry point.
59  */
60 #ifdef _LP64
61 static int64_t kaioc(long, long, long, long, long, long);
62 #endif
63 static int kaio(ulong_t *, rval_t *);
64 
65 
66 #define	AIO_64	0
67 #define	AIO_32	1
68 #define	AIO_LARGEFILE	2
69 
70 /*
71  * implementation specific functions (private)
72  */
73 #ifdef _LP64
74 static int alio(int, aiocb_t **, int, struct sigevent *);
75 #endif
76 static int aionotify(void);
77 static int aioinit(void);
78 static int aiostart(void);
79 static void alio_cleanup(aio_t *, aiocb_t **, int, int);
80 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
81     cred_t *);
82 static void lio_set_error(aio_req_t *);
83 static aio_t *aio_aiop_alloc();
84 static int aio_req_alloc(aio_req_t **, aio_result_t *);
85 static int aio_lio_alloc(aio_lio_t **);
86 static aio_req_t *aio_req_done(void *);
87 static aio_req_t *aio_req_remove(aio_req_t *);
88 static int aio_req_find(aio_result_t *, aio_req_t **);
89 static int aio_hash_insert(struct aio_req_t *, aio_t *);
90 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
91     aio_result_t *, vnode_t *);
92 static int aio_cleanup_thread(aio_t *);
93 static aio_lio_t *aio_list_get(aio_result_t *);
94 static void lio_set_uerror(void *, int);
95 extern void aio_zerolen(aio_req_t *);
96 static int aiowait(struct timeval *, int, long	*);
97 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
98 static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
99     aio_req_t *reqlist, aio_t *aiop, model_t model);
100 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
101 static int aiosuspend(void *, int, struct  timespec *, int,
102     long	*, int);
103 static int aliowait(int, void *, int, void *, int);
104 static int aioerror(void *, int);
105 static int aio_cancel(int, void *, long	*, int);
106 static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
107 static int aiorw(int, void *, int, int);
108 
109 static int alioLF(int, void *, int, void *);
110 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
111     aio_result_t *, vnode_t *);
112 static int alio32(int, void *, int, void *);
113 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
114 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
115 
116 #ifdef  _SYSCALL32_IMPL
117 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
118 void	aiocb_32ton(aiocb32_t *, aiocb_t *);
119 #endif /* _SYSCALL32_IMPL */
120 
121 /*
122  * implementation specific functions (external)
123  */
124 void aio_req_free(aio_t *, aio_req_t *);
125 
126 /*
127  * Event Port framework
128  */
129 
130 void aio_req_free_port(aio_t *, aio_req_t *);
131 static int aio_port_callback(void *, int *, pid_t, int, void *);
132 
133 /*
134  * This is the loadable module wrapper.
135  */
136 #include <sys/modctl.h>
137 #include <sys/syscall.h>
138 
139 #ifdef _LP64
140 
141 static struct sysent kaio_sysent = {
142 	6,
143 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
144 	(int (*)())kaioc
145 };
146 
147 #ifdef _SYSCALL32_IMPL
148 static struct sysent kaio_sysent32 = {
149 	7,
150 	SE_NOUNLOAD | SE_64RVAL,
151 	kaio
152 };
153 #endif  /* _SYSCALL32_IMPL */
154 
155 #else   /* _LP64 */
156 
157 static struct sysent kaio_sysent = {
158 	7,
159 	SE_NOUNLOAD | SE_32RVAL1,
160 	kaio
161 };
162 
163 #endif  /* _LP64 */
164 
165 /*
166  * Module linkage information for the kernel.
167  */
168 
169 static struct modlsys modlsys = {
170 	&mod_syscallops,
171 	"kernel Async I/O",
172 	&kaio_sysent
173 };
174 
175 #ifdef  _SYSCALL32_IMPL
176 static struct modlsys modlsys32 = {
177 	&mod_syscallops32,
178 	"kernel Async I/O for 32 bit compatibility",
179 	&kaio_sysent32
180 };
181 #endif  /* _SYSCALL32_IMPL */
182 
183 
184 static struct modlinkage modlinkage = {
185 	MODREV_1,
186 	&modlsys,
187 #ifdef  _SYSCALL32_IMPL
188 	&modlsys32,
189 #endif
190 	NULL
191 };
192 
193 int
194 _init(void)
195 {
196 	int retval;
197 
198 	if ((retval = mod_install(&modlinkage)) != 0)
199 		return (retval);
200 
201 	return (0);
202 }
203 
204 int
205 _fini(void)
206 {
207 	int retval;
208 
209 	retval = mod_remove(&modlinkage);
210 
211 	return (retval);
212 }
213 
214 int
215 _info(struct modinfo *modinfop)
216 {
217 	return (mod_info(&modlinkage, modinfop));
218 }
219 
220 #ifdef	_LP64
221 static int64_t
222 kaioc(
223 	long	a0,
224 	long	a1,
225 	long	a2,
226 	long	a3,
227 	long	a4,
228 	long	a5)
229 {
230 	int	error;
231 	long	rval = 0;
232 
233 	switch ((int)a0 & ~AIO_POLL_BIT) {
234 	case AIOREAD:
235 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
236 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
237 		break;
238 	case AIOWRITE:
239 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
240 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
241 		break;
242 	case AIOWAIT:
243 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
244 		break;
245 	case AIOWAITN:
246 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
247 		    (timespec_t *)a4);
248 		break;
249 	case AIONOTIFY:
250 		error = aionotify();
251 		break;
252 	case AIOINIT:
253 		error = aioinit();
254 		break;
255 	case AIOSTART:
256 		error = aiostart();
257 		break;
258 	case AIOLIO:
259 		error = alio((int)a1, (aiocb_t **)a2, (int)a3,
260 		    (struct sigevent *)a4);
261 		break;
262 	case AIOLIOWAIT:
263 		error = aliowait((int)a1, (void *)a2, (int)a3,
264 		    (struct sigevent *)a4, AIO_64);
265 		break;
266 	case AIOSUSPEND:
267 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
268 		    (int)a4, &rval, AIO_64);
269 		break;
270 	case AIOERROR:
271 		error = aioerror((void *)a1, AIO_64);
272 		break;
273 	case AIOAREAD:
274 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
275 		break;
276 	case AIOAWRITE:
277 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
278 		break;
279 	case AIOCANCEL:
280 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
281 		break;
282 
283 	/*
284 	 * The large file related stuff is valid only for
285 	 * 32 bit kernel and not for 64 bit kernel
286 	 * On 64 bit kernel we convert large file calls
287 	 * to regular 64bit calls.
288 	 */
289 
290 	default:
291 		error = EINVAL;
292 	}
293 	if (error)
294 		return ((int64_t)set_errno(error));
295 	return (rval);
296 }
297 #endif
298 
299 static int
300 kaio(
301 	ulong_t *uap,
302 	rval_t *rvp)
303 {
304 	long rval = 0;
305 	int	error = 0;
306 	offset_t	off;
307 
308 
309 		rvp->r_vals = 0;
310 #if defined(_LITTLE_ENDIAN)
311 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
312 #else
313 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
314 #endif
315 
316 	switch (uap[0] & ~AIO_POLL_BIT) {
317 	/*
318 	 * It must be the 32 bit system call on 64 bit kernel
319 	 */
320 	case AIOREAD:
321 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
322 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
323 	case AIOWRITE:
324 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
325 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
326 	case AIOWAIT:
327 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
328 		    &rval);
329 		break;
330 	case AIOWAITN:
331 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
332 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
333 		break;
334 	case AIONOTIFY:
335 		return (aionotify());
336 	case AIOINIT:
337 		return (aioinit());
338 	case AIOSTART:
339 		return (aiostart());
340 	case AIOLIO:
341 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
342 		    (void *)uap[4]));
343 	case AIOLIOWAIT:
344 		return (aliowait((int)uap[1], (void *)uap[2],
345 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
346 	case AIOSUSPEND:
347 		error = aiosuspend((void *)uap[1], (int)uap[2],
348 		    (timespec_t *)uap[3], (int)uap[4],
349 		    &rval, AIO_32);
350 		break;
351 	case AIOERROR:
352 		return (aioerror((void *)uap[1], AIO_32));
353 	case AIOAREAD:
354 		return (aiorw((int)uap[0], (void *)uap[1],
355 		    FREAD, AIO_32));
356 	case AIOAWRITE:
357 		return (aiorw((int)uap[0], (void *)uap[1],
358 		    FWRITE, AIO_32));
359 	case AIOCANCEL:
360 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
361 		    AIO_32));
362 		break;
363 	case AIOLIO64:
364 		return (alioLF((int)uap[1], (void *)uap[2],
365 		    (int)uap[3], (void *)uap[4]));
366 	case AIOLIOWAIT64:
367 		return (aliowait(uap[1], (void *)uap[2],
368 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
369 	case AIOSUSPEND64:
370 		error = aiosuspend((void *)uap[1], (int)uap[2],
371 		    (timespec_t *)uap[3], (int)uap[4], &rval,
372 		    AIO_LARGEFILE);
373 		break;
374 	case AIOERROR64:
375 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
376 	case AIOAREAD64:
377 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
378 		    AIO_LARGEFILE));
379 	case AIOAWRITE64:
380 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
381 		    AIO_LARGEFILE));
382 	case AIOCANCEL64:
383 		error = (aio_cancel((int)uap[1], (void *)uap[2],
384 		    &rval, AIO_LARGEFILE));
385 		break;
386 	default:
387 		return (EINVAL);
388 	}
389 
390 	rvp->r_val1 = rval;
391 	return (error);
392 }
393 
394 /*
395  * wake up LWPs in this process that are sleeping in
396  * aiowait().
397  */
398 static int
399 aionotify(void)
400 {
401 	aio_t	*aiop;
402 
403 	aiop = curproc->p_aio;
404 	if (aiop == NULL)
405 		return (0);
406 
407 	mutex_enter(&aiop->aio_mutex);
408 	aiop->aio_notifycnt++;
409 	cv_broadcast(&aiop->aio_waitcv);
410 	mutex_exit(&aiop->aio_mutex);
411 
412 	return (0);
413 }
414 
415 static int
416 timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
417 	timestruc_t **rqtp, int *blocking)
418 {
419 #ifdef	_SYSCALL32_IMPL
420 	struct timeval32 wait_time_32;
421 #endif
422 	struct timeval wait_time;
423 	model_t	model = get_udatamodel();
424 
425 	*rqtp = NULL;
426 	if (timout == NULL) {		/* wait indefinitely */
427 		*blocking = 1;
428 		return (0);
429 	}
430 
431 	/*
432 	 * Need to correctly compare with the -1 passed in for a user
433 	 * address pointer, with both 32 bit and 64 bit apps.
434 	 */
435 	if (model == DATAMODEL_NATIVE) {
436 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
437 			*blocking = 0;
438 			return (0);
439 		}
440 
441 		if (copyin(timout, &wait_time, sizeof (wait_time)))
442 			return (EFAULT);
443 	}
444 #ifdef	_SYSCALL32_IMPL
445 	else {
446 		/*
447 		 * -1 from a 32bit app. It will not get sign extended.
448 		 * don't wait if -1.
449 		 */
450 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
451 			*blocking = 0;
452 			return (0);
453 		}
454 
455 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
456 			return (EFAULT);
457 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
458 	}
459 #endif  /* _SYSCALL32_IMPL */
460 
461 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
462 		*blocking = 0;
463 		return (0);
464 	}
465 
466 	if (wait_time.tv_sec < 0 ||
467 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
468 		return (EINVAL);
469 
470 	rqtime->tv_sec = wait_time.tv_sec;
471 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
472 	*rqtp = rqtime;
473 	*blocking = 1;
474 
475 	return (0);
476 }
477 
478 static int
479 timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
480 	timestruc_t **rqtp, int *blocking)
481 {
482 #ifdef	_SYSCALL32_IMPL
483 	timespec32_t wait_time_32;
484 #endif
485 	model_t	model = get_udatamodel();
486 
487 	*rqtp = NULL;
488 	if (timout == NULL) {
489 		*blocking = 1;
490 		return (0);
491 	}
492 
493 	if (model == DATAMODEL_NATIVE) {
494 		if (copyin(timout, rqtime, sizeof (*rqtime)))
495 			return (EFAULT);
496 	}
497 #ifdef	_SYSCALL32_IMPL
498 	else {
499 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
500 			return (EFAULT);
501 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
502 	}
503 #endif  /* _SYSCALL32_IMPL */
504 
505 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
506 		*blocking = 0;
507 		return (0);
508 	}
509 
510 	if (rqtime->tv_sec < 0 ||
511 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
512 		return (EINVAL);
513 
514 	*rqtp = rqtime;
515 	*blocking = 1;
516 
517 	return (0);
518 }
519 
520 /*ARGSUSED*/
521 static int
522 aiowait(
523 	struct timeval	*timout,
524 	int	dontblockflg,
525 	long	*rval)
526 {
527 	int 		error;
528 	aio_t		*aiop;
529 	aio_req_t	*reqp;
530 	clock_t		status;
531 	int		blocking;
532 	int		timecheck;
533 	timestruc_t	rqtime;
534 	timestruc_t	*rqtp;
535 
536 	aiop = curproc->p_aio;
537 	if (aiop == NULL)
538 		return (EINVAL);
539 
540 	/*
541 	 * Establish the absolute future time for the timeout.
542 	 */
543 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
544 	if (error)
545 		return (error);
546 	if (rqtp) {
547 		timestruc_t now;
548 		timecheck = timechanged;
549 		gethrestime(&now);
550 		timespecadd(rqtp, &now);
551 	}
552 
553 	mutex_enter(&aiop->aio_mutex);
554 	for (;;) {
555 		/* process requests on poll queue */
556 		if (aiop->aio_pollq) {
557 			mutex_exit(&aiop->aio_mutex);
558 			aio_cleanup(0);
559 			mutex_enter(&aiop->aio_mutex);
560 		}
561 		if ((reqp = aio_req_remove(NULL)) != NULL) {
562 			*rval = (long)reqp->aio_req_resultp;
563 			break;
564 		}
565 		/* user-level done queue might not be empty */
566 		if (aiop->aio_notifycnt > 0) {
567 			aiop->aio_notifycnt--;
568 			*rval = 1;
569 			break;
570 		}
571 		/* don't block if no outstanding aio */
572 		if (aiop->aio_outstanding == 0 && dontblockflg) {
573 			error = EINVAL;
574 			break;
575 		}
576 		if (blocking) {
577 			status = cv_waituntil_sig(&aiop->aio_waitcv,
578 			    &aiop->aio_mutex, rqtp, timecheck);
579 
580 			if (status > 0)		/* check done queue again */
581 				continue;
582 			if (status == 0) {	/* interrupted by a signal */
583 				error = EINTR;
584 				*rval = -1;
585 			} else {		/* timer expired */
586 				error = ETIME;
587 			}
588 		}
589 		break;
590 	}
591 	mutex_exit(&aiop->aio_mutex);
592 	if (reqp) {
593 		aphysio_unlock(reqp);
594 		aio_copyout_result(reqp);
595 		mutex_enter(&aiop->aio_mutex);
596 		aio_req_free(aiop, reqp);
597 		mutex_exit(&aiop->aio_mutex);
598 	}
599 	return (error);
600 }
601 
602 /*
603  * aiowaitn can be used to reap completed asynchronous requests submitted with
604  * lio_listio, aio_read or aio_write.
605  * This function only reaps asynchronous raw I/Os.
606  */
607 
608 /*ARGSUSED*/
609 static int
610 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
611 {
612 	int 		error = 0;
613 	aio_t		*aiop;
614 	aio_req_t	*reqlist = NULL;
615 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
616 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
617 	size_t		iocbsz;			/* users iocb size */
618 	size_t		riocbsz;		/* returned iocb size */
619 	int		iocb_index = 0;
620 	model_t		model = get_udatamodel();
621 	int		blocking = 1;
622 	int		timecheck;
623 	timestruc_t	rqtime;
624 	timestruc_t	*rqtp;
625 
626 	aiop = curproc->p_aio;
627 	if (aiop == NULL)
628 		return (EINVAL);
629 
630 	if (aiop->aio_outstanding == 0)
631 		return (EAGAIN);
632 
633 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
634 		return (EFAULT);
635 
636 	/* set *nwait to zero, if we must return prematurely */
637 	if (copyout(&cnt, nwait, sizeof (uint_t)))
638 		return (EFAULT);
639 
640 	if (waitcnt == 0) {
641 		blocking = 0;
642 		rqtp = NULL;
643 		waitcnt = nent;
644 	} else {
645 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
646 		if (error)
647 			return (error);
648 	}
649 
650 	if (model == DATAMODEL_NATIVE)
651 		iocbsz = (sizeof (aiocb_t *) * nent);
652 #ifdef	_SYSCALL32_IMPL
653 	else
654 		iocbsz = (sizeof (caddr32_t) * nent);
655 #endif  /* _SYSCALL32_IMPL */
656 
657 	/*
658 	 * Only one aio_waitn call is allowed at a time.
659 	 * The active aio_waitn will collect all requests
660 	 * out of the "done" list and if necessary it will wait
661 	 * for some/all pending requests to fulfill the nwait
662 	 * parameter.
663 	 * A second or further aio_waitn calls will sleep here
664 	 * until the active aio_waitn finishes and leaves the kernel
665 	 * If the second call does not block (poll), then return
666 	 * immediately with the error code : EAGAIN.
667 	 * If the second call should block, then sleep here, but
668 	 * do not touch the timeout. The timeout starts when this
669 	 * aio_waitn-call becomes active.
670 	 */
671 
672 	mutex_enter(&aiop->aio_mutex);
673 
674 	while (aiop->aio_flags & AIO_WAITN) {
675 		if (blocking == 0) {
676 			mutex_exit(&aiop->aio_mutex);
677 			return (EAGAIN);
678 		}
679 
680 		/* block, no timeout */
681 		aiop->aio_flags |= AIO_WAITN_PENDING;
682 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
683 			mutex_exit(&aiop->aio_mutex);
684 			return (EINTR);
685 		}
686 	}
687 
688 	/*
689 	 * Establish the absolute future time for the timeout.
690 	 */
691 	if (rqtp) {
692 		timestruc_t now;
693 		timecheck = timechanged;
694 		gethrestime(&now);
695 		timespecadd(rqtp, &now);
696 	}
697 
698 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
699 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
700 		aiop->aio_iocb = NULL;
701 	}
702 
703 	if (aiop->aio_iocb == NULL) {
704 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
705 		if (iocblist == NULL) {
706 			mutex_exit(&aiop->aio_mutex);
707 			return (ENOMEM);
708 		}
709 		aiop->aio_iocb = (aiocb_t **)iocblist;
710 		aiop->aio_iocbsz = iocbsz;
711 	} else {
712 		iocblist = (char *)aiop->aio_iocb;
713 	}
714 
715 	aiop->aio_waitncnt = waitcnt;
716 	aiop->aio_flags |= AIO_WAITN;
717 
718 	for (;;) {
719 		/* push requests on poll queue to done queue */
720 		if (aiop->aio_pollq) {
721 			mutex_exit(&aiop->aio_mutex);
722 			aio_cleanup(0);
723 			mutex_enter(&aiop->aio_mutex);
724 		}
725 
726 		/* check for requests on done queue */
727 		if (aiop->aio_doneq) {
728 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
729 			aiop->aio_waitncnt = waitcnt - cnt;
730 		}
731 
732 		/* user-level done queue might not be empty */
733 		if (aiop->aio_notifycnt > 0) {
734 			aiop->aio_notifycnt--;
735 			error = 0;
736 			break;
737 		}
738 
739 		/*
740 		 * if we are here second time as a result of timer
741 		 * expiration, we reset error if there are enough
742 		 * aiocb's to satisfy request.
743 		 * We return also if all requests are already done
744 		 * and we picked up the whole done queue.
745 		 */
746 
747 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
748 		    aiop->aio_doneq == NULL)) {
749 			error = 0;
750 			break;
751 		}
752 
753 		if ((cnt < waitcnt) && blocking) {
754 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
755 				&aiop->aio_mutex, rqtp, timecheck);
756 			if (rval > 0)
757 				continue;
758 			if (rval < 0) {
759 				error = ETIME;
760 				blocking = 0;
761 				continue;
762 			}
763 			error = EINTR;
764 		}
765 		break;
766 	}
767 
768 	mutex_exit(&aiop->aio_mutex);
769 
770 	if (cnt > 0) {
771 
772 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
773 		    aiop, model);
774 
775 		if (model == DATAMODEL_NATIVE)
776 			riocbsz = (sizeof (aiocb_t *) * cnt);
777 #ifdef	_SYSCALL32_IMPL
778 		else
779 			riocbsz = (sizeof (caddr32_t) * cnt);
780 #endif  /* _SYSCALL32_IMPL */
781 
782 		if (copyout(iocblist, uiocb, riocbsz) ||
783 		    copyout(&cnt, nwait, sizeof (uint_t)))
784 			error = EFAULT;
785 	}
786 
787 	if (aiop->aio_iocbsz > AIO_IOCB_MAX) {
788 		kmem_free(iocblist, aiop->aio_iocbsz);
789 		aiop->aio_iocb = NULL;
790 	}
791 
792 	/* check if there is another thread waiting for execution */
793 	mutex_enter(&aiop->aio_mutex);
794 	aiop->aio_flags &= ~AIO_WAITN;
795 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
796 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
797 		cv_signal(&aiop->aio_waitncv);
798 	}
799 	mutex_exit(&aiop->aio_mutex);
800 
801 	return (error);
802 }
803 
804 /*
805  * aio_unlock_requests
806  * copyouts the result of the request as well as the return value.
807  * It builds the list of completed asynchronous requests,
808  * unlocks the allocated memory ranges and
809  * put the aio request structure back into the free list.
810  */
811 
812 static int
813 aio_unlock_requests(
814 	caddr_t	iocblist,
815 	int	iocb_index,
816 	aio_req_t *reqlist,
817 	aio_t	*aiop,
818 	model_t	model)
819 {
820 	aio_req_t	*reqp, *nreqp;
821 
822 	if (model == DATAMODEL_NATIVE) {
823 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
824 			(((caddr_t *)iocblist)[iocb_index++]) =
825 			    reqp->aio_req_iocb.iocb;
826 			nreqp = reqp->aio_req_next;
827 			aphysio_unlock(reqp);
828 			aio_copyout_result(reqp);
829 			mutex_enter(&aiop->aio_mutex);
830 			aio_req_free(aiop, reqp);
831 			mutex_exit(&aiop->aio_mutex);
832 		}
833 	}
834 #ifdef	_SYSCALL32_IMPL
835 	else {
836 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
837 			((caddr32_t *)iocblist)[iocb_index++] =
838 			    reqp->aio_req_iocb.iocb32;
839 			nreqp = reqp->aio_req_next;
840 			aphysio_unlock(reqp);
841 			aio_copyout_result(reqp);
842 			mutex_enter(&aiop->aio_mutex);
843 			aio_req_free(aiop, reqp);
844 			mutex_exit(&aiop->aio_mutex);
845 		}
846 	}
847 #endif	/* _SYSCALL32_IMPL */
848 	return (iocb_index);
849 }
850 
851 /*
852  * aio_reqlist_concat
853  * moves "max" elements from the done queue to the reqlist queue and removes
854  * the AIO_DONEQ flag.
855  * - reqlist queue is a simple linked list
856  * - done queue is a double linked list
857  */
858 
859 static int
860 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
861 {
862 	aio_req_t *q2, *q2work, *list;
863 	int count = 0;
864 
865 	list = *reqlist;
866 	q2 = aiop->aio_doneq;
867 	q2work = q2;
868 	while (max-- > 0) {
869 		q2work->aio_req_flags &= ~AIO_DONEQ;
870 		q2work = q2work->aio_req_next;
871 		count++;
872 		if (q2work == q2)
873 			break;
874 	}
875 
876 	if (q2work == q2) {
877 		/* all elements revised */
878 		q2->aio_req_prev->aio_req_next = list;
879 		list = q2;
880 		aiop->aio_doneq = NULL;
881 	} else {
882 		/*
883 		 * max < elements in the doneq
884 		 * detach only the required amount of elements
885 		 * out of the doneq
886 		 */
887 		q2work->aio_req_prev->aio_req_next = list;
888 		list = q2;
889 
890 		aiop->aio_doneq = q2work;
891 		q2work->aio_req_prev = q2->aio_req_prev;
892 		q2->aio_req_prev->aio_req_next = q2work;
893 	}
894 	*reqlist = list;
895 	return (count);
896 }
897 
898 /*ARGSUSED*/
899 static int
900 aiosuspend(
901 	void	*aiocb,
902 	int	nent,
903 	struct	timespec	*timout,
904 	int	flag,
905 	long	*rval,
906 	int	run_mode)
907 {
908 	int 		error;
909 	aio_t		*aiop;
910 	aio_req_t	*reqp, *found, *next;
911 	caddr_t		cbplist = NULL;
912 	aiocb_t		*cbp, **ucbp;
913 #ifdef	_SYSCALL32_IMPL
914 	aiocb32_t	*cbp32;
915 	caddr32_t	*ucbp32;
916 #endif  /* _SYSCALL32_IMPL */
917 	aiocb64_32_t	*cbp64;
918 	int		rv;
919 	int		i;
920 	size_t		ssize;
921 	model_t		model = get_udatamodel();
922 	int		blocking;
923 	int		timecheck;
924 	timestruc_t	rqtime;
925 	timestruc_t	*rqtp;
926 
927 	aiop = curproc->p_aio;
928 	if (aiop == NULL || nent <= 0)
929 		return (EINVAL);
930 
931 	/*
932 	 * Establish the absolute future time for the timeout.
933 	 */
934 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
935 	if (error)
936 		return (error);
937 	if (rqtp) {
938 		timestruc_t now;
939 		timecheck = timechanged;
940 		gethrestime(&now);
941 		timespecadd(rqtp, &now);
942 	}
943 
944 	/*
945 	 * If we are not blocking and there's no IO complete
946 	 * skip aiocb copyin.
947 	 */
948 	if (!blocking && (aiop->aio_pollq == NULL) &&
949 	    (aiop->aio_doneq == NULL)) {
950 		return (EAGAIN);
951 	}
952 
953 	if (model == DATAMODEL_NATIVE)
954 		ssize = (sizeof (aiocb_t *) * nent);
955 #ifdef	_SYSCALL32_IMPL
956 	else
957 		ssize = (sizeof (caddr32_t) * nent);
958 #endif  /* _SYSCALL32_IMPL */
959 
960 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
961 	if (cbplist == NULL)
962 		return (ENOMEM);
963 
964 	if (copyin(aiocb, cbplist, ssize)) {
965 		error = EFAULT;
966 		goto done;
967 	}
968 
969 	found = NULL;
970 	/*
971 	 * we need to get the aio_cleanupq_mutex since we call
972 	 * aio_req_done().
973 	 */
974 	mutex_enter(&aiop->aio_cleanupq_mutex);
975 	mutex_enter(&aiop->aio_mutex);
976 	for (;;) {
977 		/* push requests on poll queue to done queue */
978 		if (aiop->aio_pollq) {
979 			mutex_exit(&aiop->aio_mutex);
980 			mutex_exit(&aiop->aio_cleanupq_mutex);
981 			aio_cleanup(0);
982 			mutex_enter(&aiop->aio_cleanupq_mutex);
983 			mutex_enter(&aiop->aio_mutex);
984 		}
985 		/* check for requests on done queue */
986 		if (aiop->aio_doneq) {
987 			if (model == DATAMODEL_NATIVE)
988 				ucbp = (aiocb_t **)cbplist;
989 #ifdef	_SYSCALL32_IMPL
990 			else
991 				ucbp32 = (caddr32_t *)cbplist;
992 #endif  /* _SYSCALL32_IMPL */
993 			for (i = 0; i < nent; i++) {
994 				if (model == DATAMODEL_NATIVE) {
995 					if ((cbp = *ucbp++) == NULL)
996 						continue;
997 					if (run_mode != AIO_LARGEFILE)
998 						reqp = aio_req_done(
999 						    &cbp->aio_resultp);
1000 					else {
1001 						cbp64 = (aiocb64_32_t *)cbp;
1002 						reqp = aio_req_done(
1003 						    &cbp64->aio_resultp);
1004 					}
1005 				}
1006 #ifdef	_SYSCALL32_IMPL
1007 				else {
1008 					if (run_mode == AIO_32) {
1009 						if ((cbp32 =
1010 						    (aiocb32_t *)(uintptr_t)
1011 						    *ucbp32++) == NULL)
1012 							continue;
1013 						reqp = aio_req_done(
1014 						    &cbp32->aio_resultp);
1015 					} else if (run_mode == AIO_LARGEFILE) {
1016 						if ((cbp64 =
1017 						    (aiocb64_32_t *)(uintptr_t)
1018 						    *ucbp32++) == NULL)
1019 							continue;
1020 						    reqp = aio_req_done(
1021 							&cbp64->aio_resultp);
1022 					}
1023 
1024 				}
1025 #endif  /* _SYSCALL32_IMPL */
1026 				if (reqp) {
1027 					reqp->aio_req_next = found;
1028 					found = reqp;
1029 				}
1030 				if (aiop->aio_doneq == NULL)
1031 					break;
1032 			}
1033 			if (found)
1034 				break;
1035 		}
1036 		if (aiop->aio_notifycnt > 0) {
1037 			/*
1038 			 * nothing on the kernel's queue. the user
1039 			 * has notified the kernel that it has items
1040 			 * on a user-level queue.
1041 			 */
1042 			aiop->aio_notifycnt--;
1043 			*rval = 1;
1044 			error = 0;
1045 			break;
1046 		}
1047 		/* don't block if nothing is outstanding */
1048 		if (aiop->aio_outstanding == 0) {
1049 			error = EAGAIN;
1050 			break;
1051 		}
1052 		if (blocking) {
1053 			/*
1054 			 * drop the aio_cleanupq_mutex as we are
1055 			 * going to block.
1056 			 */
1057 			mutex_exit(&aiop->aio_cleanupq_mutex);
1058 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
1059 				&aiop->aio_mutex, rqtp, timecheck);
1060 			/*
1061 			 * we have to drop aio_mutex and
1062 			 * grab it in the right order.
1063 			 */
1064 			mutex_exit(&aiop->aio_mutex);
1065 			mutex_enter(&aiop->aio_cleanupq_mutex);
1066 			mutex_enter(&aiop->aio_mutex);
1067 			if (rv > 0)	/* check done queue again */
1068 				continue;
1069 			if (rv == 0)	/* interrupted by a signal */
1070 				error = EINTR;
1071 			else		/* timer expired */
1072 				error = ETIME;
1073 		} else {
1074 			error = EAGAIN;
1075 		}
1076 		break;
1077 	}
1078 	mutex_exit(&aiop->aio_mutex);
1079 	mutex_exit(&aiop->aio_cleanupq_mutex);
1080 	for (reqp = found; reqp != NULL; reqp = next) {
1081 		next = reqp->aio_req_next;
1082 		aphysio_unlock(reqp);
1083 		aio_copyout_result(reqp);
1084 		mutex_enter(&aiop->aio_mutex);
1085 		aio_req_free(aiop, reqp);
1086 		mutex_exit(&aiop->aio_mutex);
1087 	}
1088 done:
1089 	kmem_free(cbplist, ssize);
1090 	return (error);
1091 }
1092 
1093 /*
1094  * initialize aio by allocating an aio_t struct for this
1095  * process.
1096  */
1097 static int
1098 aioinit(void)
1099 {
1100 	proc_t *p = curproc;
1101 	aio_t *aiop;
1102 	mutex_enter(&p->p_lock);
1103 	if ((aiop = p->p_aio) == NULL) {
1104 		aiop = aio_aiop_alloc();
1105 		p->p_aio = aiop;
1106 	}
1107 	mutex_exit(&p->p_lock);
1108 	if (aiop == NULL)
1109 		return (ENOMEM);
1110 	return (0);
1111 }
1112 
1113 /*
1114  * start a special thread that will cleanup after aio requests
1115  * that are preventing a segment from being unmapped. as_unmap()
1116  * blocks until all phsyio to this segment is completed. this
1117  * doesn't happen until all the pages in this segment are not
1118  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1119  * requests still outstanding. this special thread will make sure
1120  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1121  *
1122  * this function will return an error if the process has only
1123  * one LWP. the assumption is that the caller is a separate LWP
1124  * that remains blocked in the kernel for the life of this process.
1125  */
1126 static int
1127 aiostart(void)
1128 {
1129 	proc_t *p = curproc;
1130 	aio_t *aiop;
1131 	int first, error = 0;
1132 
1133 	if (p->p_lwpcnt == 1)
1134 		return (EDEADLK);
1135 	mutex_enter(&p->p_lock);
1136 	if ((aiop = p->p_aio) == NULL)
1137 		error = EINVAL;
1138 	else {
1139 		first = aiop->aio_ok;
1140 		if (aiop->aio_ok == 0)
1141 			aiop->aio_ok = 1;
1142 	}
1143 	mutex_exit(&p->p_lock);
1144 	if (error == 0 && first == 0) {
1145 		return (aio_cleanup_thread(aiop));
1146 		/* should return only to exit */
1147 	}
1148 	return (error);
1149 }
1150 
1151 /*
1152  * Associate an aiocb with a port.
1153  * This function is used by aiorw() to associate a transaction with a port.
1154  * Allocate an event port structure (port_alloc_event()) and store the
1155  * delivered user pointer (portnfy_user) in the portkev_user field of the
1156  * port_kevent_t structure..
1157  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1158  * the port association.
1159  */
1160 
1161 static int
1162 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
1163 	aio_req_t *reqp, int event)
1164 {
1165 	port_kevent_t	*pkevp = NULL;
1166 	int		error;
1167 
1168 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
1169 	    PORT_SOURCE_AIO, &pkevp);
1170 	if (error) {
1171 		if ((error == ENOMEM) || (error == EAGAIN))
1172 			error = EAGAIN;
1173 		else
1174 			error = EINVAL;
1175 	} else {
1176 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
1177 		    aio_port_callback, reqp);
1178 		pkevp->portkev_events = event;
1179 		reqp->aio_req_portkev = pkevp;
1180 		reqp->aio_req_port = pntfy->portnfy_port;
1181 	}
1182 	return (error);
1183 }
1184 
1185 #ifdef _LP64
1186 
1187 /*
1188  * Asynchronous list IO. A chain of aiocb's are copied in
1189  * one at a time. If the aiocb is invalid, it is skipped.
1190  * For each aiocb, the appropriate driver entry point is
1191  * called. Optimize for the common case where the list
1192  * of requests is to the same file descriptor.
1193  *
1194  * One possible optimization is to define a new driver entry
1195  * point that supports a list of IO requests. Whether this
1196  * improves performance depends somewhat on the driver's
1197  * locking strategy. Processing a list could adversely impact
1198  * the driver's interrupt latency.
1199  */
1200 static int
1201 alio(
1202 	int		mode_arg,
1203 	aiocb_t		**aiocb_arg,
1204 	int		nent,
1205 	struct sigevent	*sigev)
1206 {
1207 	file_t		*fp;
1208 	file_t		*prev_fp = NULL;
1209 	int		prev_mode = -1;
1210 	struct vnode	*vp;
1211 	aio_lio_t	*head;
1212 	aio_req_t	*reqp;
1213 	aio_t		*aiop;
1214 	caddr_t		cbplist;
1215 	aiocb_t		cb;
1216 	aiocb_t		*aiocb = &cb;
1217 	aiocb_t		*cbp;
1218 	aiocb_t		**ucbp;
1219 	struct sigevent sigevk;
1220 	sigqueue_t	*sqp;
1221 	int		(*aio_func)();
1222 	int		mode;
1223 	int		error = 0;
1224 	int		aio_errors = 0;
1225 	int		i;
1226 	size_t		ssize;
1227 	int		deadhead = 0;
1228 	int		aio_notsupported = 0;
1229 	int		lio_head_port;
1230 	int		aio_port;
1231 	int		aio_thread;
1232 	port_kevent_t	*pkevtp = NULL;
1233 	port_notify_t	pnotify;
1234 	int		event;
1235 
1236 	aiop = curproc->p_aio;
1237 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1238 		return (EINVAL);
1239 
1240 	ssize = (sizeof (aiocb_t *) * nent);
1241 	cbplist = kmem_alloc(ssize, KM_SLEEP);
1242 	ucbp = (aiocb_t **)cbplist;
1243 
1244 	if (copyin(aiocb_arg, cbplist, ssize) ||
1245 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
1246 		kmem_free(cbplist, ssize);
1247 		return (EFAULT);
1248 	}
1249 
1250 	/* Event Ports  */
1251 	if (sigev &&
1252 	    (sigevk.sigev_notify == SIGEV_THREAD ||
1253 	    sigevk.sigev_notify == SIGEV_PORT)) {
1254 		if (sigevk.sigev_notify == SIGEV_THREAD) {
1255 			pnotify.portnfy_port = sigevk.sigev_signo;
1256 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
1257 		} else if (copyin(sigevk.sigev_value.sival_ptr,
1258 		    &pnotify, sizeof (pnotify))) {
1259 			kmem_free(cbplist, ssize);
1260 			return (EFAULT);
1261 		}
1262 		error = port_alloc_event(pnotify.portnfy_port,
1263 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
1264 		if (error) {
1265 			if (error == ENOMEM || error == EAGAIN)
1266 				error = EAGAIN;
1267 			else
1268 				error = EINVAL;
1269 			kmem_free(cbplist, ssize);
1270 			return (error);
1271 		}
1272 		lio_head_port = pnotify.portnfy_port;
1273 	}
1274 
1275 	/*
1276 	 * a list head should be allocated if notification is
1277 	 * enabled for this list.
1278 	 */
1279 	head = NULL;
1280 
1281 	if (mode_arg == LIO_WAIT || sigev) {
1282 		mutex_enter(&aiop->aio_mutex);
1283 		error = aio_lio_alloc(&head);
1284 		mutex_exit(&aiop->aio_mutex);
1285 		if (error)
1286 			goto done;
1287 		deadhead = 1;
1288 		head->lio_nent = nent;
1289 		head->lio_refcnt = nent;
1290 		head->lio_port = -1;
1291 		head->lio_portkev = NULL;
1292 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
1293 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
1294 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
1295 			if (sqp == NULL) {
1296 				error = EAGAIN;
1297 				goto done;
1298 			}
1299 			sqp->sq_func = NULL;
1300 			sqp->sq_next = NULL;
1301 			sqp->sq_info.si_code = SI_ASYNCIO;
1302 			sqp->sq_info.si_pid = curproc->p_pid;
1303 			sqp->sq_info.si_ctid = PRCTID(curproc);
1304 			sqp->sq_info.si_zoneid = getzoneid();
1305 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
1306 			sqp->sq_info.si_signo = sigevk.sigev_signo;
1307 			sqp->sq_info.si_value = sigevk.sigev_value;
1308 			head->lio_sigqp = sqp;
1309 		} else {
1310 			head->lio_sigqp = NULL;
1311 		}
1312 		if (pkevtp) {
1313 			/*
1314 			 * Prepare data to send when list of aiocb's
1315 			 * has completed.
1316 			 */
1317 			port_init_event(pkevtp, (uintptr_t)sigev,
1318 			    (void *)(uintptr_t)pnotify.portnfy_user,
1319 			    NULL, head);
1320 			pkevtp->portkev_events = AIOLIO;
1321 			head->lio_portkev = pkevtp;
1322 			head->lio_port = pnotify.portnfy_port;
1323 		}
1324 	}
1325 
1326 	for (i = 0; i < nent; i++, ucbp++) {
1327 
1328 		cbp = *ucbp;
1329 		/* skip entry if it can't be copied. */
1330 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
1331 			if (head) {
1332 				mutex_enter(&aiop->aio_mutex);
1333 				head->lio_nent--;
1334 				head->lio_refcnt--;
1335 				mutex_exit(&aiop->aio_mutex);
1336 			}
1337 			continue;
1338 		}
1339 
1340 		/* skip if opcode for aiocb is LIO_NOP */
1341 		mode = aiocb->aio_lio_opcode;
1342 		if (mode == LIO_NOP) {
1343 			cbp = NULL;
1344 			if (head) {
1345 				mutex_enter(&aiop->aio_mutex);
1346 				head->lio_nent--;
1347 				head->lio_refcnt--;
1348 				mutex_exit(&aiop->aio_mutex);
1349 			}
1350 			continue;
1351 		}
1352 
1353 		/* increment file descriptor's ref count. */
1354 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
1355 			lio_set_uerror(&cbp->aio_resultp, EBADF);
1356 			if (head) {
1357 				mutex_enter(&aiop->aio_mutex);
1358 				head->lio_nent--;
1359 				head->lio_refcnt--;
1360 				mutex_exit(&aiop->aio_mutex);
1361 			}
1362 			aio_errors++;
1363 			continue;
1364 		}
1365 
1366 		/*
1367 		 * check the permission of the partition
1368 		 */
1369 		if ((fp->f_flag & mode) == 0) {
1370 			releasef(aiocb->aio_fildes);
1371 			lio_set_uerror(&cbp->aio_resultp, EBADF);
1372 			if (head) {
1373 				mutex_enter(&aiop->aio_mutex);
1374 				head->lio_nent--;
1375 				head->lio_refcnt--;
1376 				mutex_exit(&aiop->aio_mutex);
1377 			}
1378 			aio_errors++;
1379 			continue;
1380 		}
1381 
1382 		/*
1383 		 * common case where requests are to the same fd
1384 		 * for the same r/w operation.
1385 		 * for UFS, need to set EBADFD
1386 		 */
1387 		vp = fp->f_vnode;
1388 		if (fp != prev_fp || mode != prev_mode) {
1389 			aio_func = check_vp(vp, mode);
1390 			if (aio_func == NULL) {
1391 				prev_fp = NULL;
1392 				releasef(aiocb->aio_fildes);
1393 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
1394 				aio_notsupported++;
1395 				if (head) {
1396 					mutex_enter(&aiop->aio_mutex);
1397 					head->lio_nent--;
1398 					head->lio_refcnt--;
1399 					mutex_exit(&aiop->aio_mutex);
1400 				}
1401 				continue;
1402 			} else {
1403 				prev_fp = fp;
1404 				prev_mode = mode;
1405 			}
1406 		}
1407 
1408 		error = aio_req_setup(&reqp, aiop, aiocb,
1409 		    &cbp->aio_resultp, vp);
1410 		if (error) {
1411 			releasef(aiocb->aio_fildes);
1412 			lio_set_uerror(&cbp->aio_resultp, error);
1413 			if (head) {
1414 				mutex_enter(&aiop->aio_mutex);
1415 				head->lio_nent--;
1416 				head->lio_refcnt--;
1417 				mutex_exit(&aiop->aio_mutex);
1418 			}
1419 			aio_errors++;
1420 			continue;
1421 		}
1422 
1423 		reqp->aio_req_lio = head;
1424 		deadhead = 0;
1425 
1426 		/*
1427 		 * Set the errno field now before sending the request to
1428 		 * the driver to avoid a race condition
1429 		 */
1430 		(void) suword32(&cbp->aio_resultp.aio_errno,
1431 		    EINPROGRESS);
1432 
1433 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
1434 
1435 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
1436 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
1437 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
1438 		if (aio_port | aio_thread) {
1439 			port_kevent_t *lpkevp;
1440 			/*
1441 			 * Prepare data to send with each aiocb completed.
1442 			 */
1443 			if (aio_port) {
1444 				void *paddr =
1445 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
1446 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
1447 					error = EFAULT;
1448 			} else {	/* aio_thread */
1449 				pnotify.portnfy_port =
1450 				    aiocb->aio_sigevent.sigev_signo;
1451 				pnotify.portnfy_user =
1452 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
1453 			}
1454 			if (error)
1455 				/* EMPTY */;
1456 			else if (pkevtp != NULL &&
1457 			    pnotify.portnfy_port == lio_head_port)
1458 				error = port_dup_event(pkevtp, &lpkevp,
1459 				    PORT_ALLOC_DEFAULT);
1460 			else
1461 				error = port_alloc_event(pnotify.portnfy_port,
1462 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
1463 				    &lpkevp);
1464 			if (error == 0) {
1465 				port_init_event(lpkevp, (uintptr_t)cbp,
1466 				    (void *)(uintptr_t)pnotify.portnfy_user,
1467 				    aio_port_callback, reqp);
1468 				lpkevp->portkev_events = event;
1469 				reqp->aio_req_portkev = lpkevp;
1470 				reqp->aio_req_port = pnotify.portnfy_port;
1471 			}
1472 		}
1473 
1474 		/*
1475 		 * send the request to driver.
1476 		 */
1477 		if (error == 0) {
1478 			if (aiocb->aio_nbytes == 0) {
1479 				clear_active_fd(aiocb->aio_fildes);
1480 				aio_zerolen(reqp);
1481 				continue;
1482 			}
1483 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
1484 			    CRED());
1485 		}
1486 
1487 		/*
1488 		 * the fd's ref count is not decremented until the IO has
1489 		 * completed unless there was an error.
1490 		 */
1491 		if (error) {
1492 			releasef(aiocb->aio_fildes);
1493 			lio_set_uerror(&cbp->aio_resultp, error);
1494 			if (head) {
1495 				mutex_enter(&aiop->aio_mutex);
1496 				head->lio_nent--;
1497 				head->lio_refcnt--;
1498 				mutex_exit(&aiop->aio_mutex);
1499 			}
1500 			if (error == ENOTSUP)
1501 				aio_notsupported++;
1502 			else
1503 				aio_errors++;
1504 			lio_set_error(reqp);
1505 		} else {
1506 			clear_active_fd(aiocb->aio_fildes);
1507 		}
1508 	}
1509 
1510 	if (aio_notsupported) {
1511 		error = ENOTSUP;
1512 	} else if (aio_errors) {
1513 		/*
1514 		 * return EIO if any request failed
1515 		 */
1516 		error = EIO;
1517 	}
1518 
1519 	if (mode_arg == LIO_WAIT) {
1520 		mutex_enter(&aiop->aio_mutex);
1521 		while (head->lio_refcnt > 0) {
1522 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1523 				mutex_exit(&aiop->aio_mutex);
1524 				error = EINTR;
1525 				goto done;
1526 			}
1527 		}
1528 		mutex_exit(&aiop->aio_mutex);
1529 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
1530 	}
1531 
1532 done:
1533 	kmem_free(cbplist, ssize);
1534 	if (deadhead) {
1535 		if (head->lio_sigqp)
1536 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
1537 		if (head->lio_portkev)
1538 			port_free_event(head->lio_portkev);
1539 		kmem_free(head, sizeof (aio_lio_t));
1540 	}
1541 	return (error);
1542 }
1543 
1544 #endif /* _LP64 */
1545 
1546 /*
1547  * Asynchronous list IO.
1548  * If list I/O is called with LIO_WAIT it can still return
1549  * before all the I/O's are completed if a signal is caught
1550  * or if the list include UFS I/O requests. If this happens,
1551  * libaio will call aliowait() to wait for the I/O's to
1552  * complete
1553  */
1554 /*ARGSUSED*/
1555 static int
1556 aliowait(
1557 	int	mode,
1558 	void	*aiocb,
1559 	int	nent,
1560 	void	*sigev,
1561 	int	run_mode)
1562 {
1563 	aio_lio_t	*head;
1564 	aio_t		*aiop;
1565 	caddr_t		cbplist;
1566 	aiocb_t		*cbp, **ucbp;
1567 #ifdef	_SYSCALL32_IMPL
1568 	aiocb32_t	*cbp32;
1569 	caddr32_t	*ucbp32;
1570 	aiocb64_32_t	*cbp64;
1571 #endif
1572 	int		error = 0;
1573 	int		i;
1574 	size_t		ssize = 0;
1575 	model_t		model = get_udatamodel();
1576 
1577 	aiop = curproc->p_aio;
1578 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1579 		return (EINVAL);
1580 
1581 	if (model == DATAMODEL_NATIVE)
1582 		ssize = (sizeof (aiocb_t *) * nent);
1583 #ifdef	_SYSCALL32_IMPL
1584 	else
1585 		ssize = (sizeof (caddr32_t) * nent);
1586 #endif  /* _SYSCALL32_IMPL */
1587 
1588 	if (ssize == 0)
1589 		return (EINVAL);
1590 
1591 	cbplist = kmem_alloc(ssize, KM_SLEEP);
1592 
1593 	if (model == DATAMODEL_NATIVE)
1594 		ucbp = (aiocb_t **)cbplist;
1595 #ifdef	_SYSCALL32_IMPL
1596 	else
1597 		ucbp32 = (caddr32_t *)cbplist;
1598 #endif  /* _SYSCALL32_IMPL */
1599 
1600 	if (copyin(aiocb, cbplist, ssize)) {
1601 		error = EFAULT;
1602 		goto done;
1603 	}
1604 
1605 	/*
1606 	 * To find the list head, we go through the
1607 	 * list of aiocb structs, find the request
1608 	 * its for, then get the list head that reqp
1609 	 * points to
1610 	 */
1611 	head = NULL;
1612 
1613 	for (i = 0; i < nent; i++) {
1614 		if (model == DATAMODEL_NATIVE) {
1615 			/*
1616 			 * Since we are only checking for a NULL pointer
1617 			 * Following should work on both native data sizes
1618 			 * as well as for largefile aiocb.
1619 			 */
1620 			if ((cbp = *ucbp++) == NULL)
1621 				continue;
1622 			if (run_mode != AIO_LARGEFILE)
1623 				if (head = aio_list_get(&cbp->aio_resultp))
1624 					break;
1625 			else {
1626 				/*
1627 				 * This is a case when largefile call is
1628 				 * made on 32 bit kernel.
1629 				 * Treat each pointer as pointer to
1630 				 * aiocb64_32
1631 				 */
1632 				if (head = aio_list_get((aio_result_t *)
1633 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
1634 					break;
1635 			}
1636 		}
1637 #ifdef	_SYSCALL32_IMPL
1638 		else {
1639 			if (run_mode == AIO_LARGEFILE) {
1640 				if ((cbp64 = (aiocb64_32_t *)
1641 				    (uintptr_t)*ucbp32++) == NULL)
1642 					continue;
1643 				if (head = aio_list_get((aio_result_t *)
1644 				    &cbp64->aio_resultp))
1645 					break;
1646 			} else if (run_mode == AIO_32) {
1647 				if ((cbp32 = (aiocb32_t *)
1648 				    (uintptr_t)*ucbp32++) == NULL)
1649 					continue;
1650 				if (head = aio_list_get((aio_result_t *)
1651 				    &cbp32->aio_resultp))
1652 					break;
1653 			}
1654 		}
1655 #endif	/* _SYSCALL32_IMPL */
1656 	}
1657 
1658 	if (head == NULL) {
1659 		error = EINVAL;
1660 		goto done;
1661 	}
1662 
1663 	mutex_enter(&aiop->aio_mutex);
1664 	while (head->lio_refcnt > 0) {
1665 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1666 			mutex_exit(&aiop->aio_mutex);
1667 			error = EINTR;
1668 			goto done;
1669 		}
1670 	}
1671 	mutex_exit(&aiop->aio_mutex);
1672 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
1673 done:
1674 	kmem_free(cbplist, ssize);
1675 	return (error);
1676 }
1677 
1678 aio_lio_t *
1679 aio_list_get(aio_result_t *resultp)
1680 {
1681 	aio_lio_t	*head = NULL;
1682 	aio_t		*aiop;
1683 	aio_req_t 	**bucket;
1684 	aio_req_t 	*reqp;
1685 	long		index;
1686 
1687 	aiop = curproc->p_aio;
1688 	if (aiop == NULL)
1689 		return (NULL);
1690 
1691 	if (resultp) {
1692 		index = AIO_HASH(resultp);
1693 		bucket = &aiop->aio_hash[index];
1694 		for (reqp = *bucket; reqp != NULL;
1695 		    reqp = reqp->aio_hash_next) {
1696 			if (reqp->aio_req_resultp == resultp) {
1697 				head = reqp->aio_req_lio;
1698 				return (head);
1699 			}
1700 		}
1701 	}
1702 	return (NULL);
1703 }
1704 
1705 
1706 static void
1707 lio_set_uerror(void *resultp, int error)
1708 {
1709 	/*
1710 	 * the resultp field is a pointer to where the
1711 	 * error should be written out to the user's
1712 	 * aiocb.
1713 	 *
1714 	 */
1715 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1716 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
1717 		    (ssize_t)-1);
1718 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1719 	}
1720 #ifdef	_SYSCALL32_IMPL
1721 	else {
1722 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1723 		    (uint_t)-1);
1724 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1725 	}
1726 #endif  /* _SYSCALL32_IMPL */
1727 }
1728 
1729 /*
1730  * do cleanup completion for all requests in list. memory for
1731  * each request is also freed.
1732  */
1733 static void
1734 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
1735 {
1736 	int i;
1737 	aio_req_t *reqp;
1738 	aio_result_t *resultp;
1739 	aiocb64_32_t *aiocb_64;
1740 
1741 	for (i = 0; i < nent; i++) {
1742 		if (get_udatamodel() == DATAMODEL_NATIVE) {
1743 			if (cbp[i] == NULL)
1744 				continue;
1745 			if (run_mode == AIO_LARGEFILE) {
1746 				aiocb_64 = (aiocb64_32_t *)cbp[i];
1747 				resultp = (aio_result_t *)
1748 				    &aiocb_64->aio_resultp;
1749 			} else
1750 				resultp = &cbp[i]->aio_resultp;
1751 		}
1752 #ifdef	_SYSCALL32_IMPL
1753 		else {
1754 			aiocb32_t *aiocb_32;
1755 			caddr32_t *cbp32;
1756 
1757 			cbp32 = (caddr32_t *)cbp;
1758 			if (cbp32[i] == NULL)
1759 				continue;
1760 			if (run_mode == AIO_32) {
1761 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
1762 				resultp = (aio_result_t *)&aiocb_32->
1763 				    aio_resultp;
1764 			} else if (run_mode == AIO_LARGEFILE) {
1765 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
1766 				resultp = (aio_result_t *)&aiocb_64->
1767 				    aio_resultp;
1768 			}
1769 		}
1770 #endif  /* _SYSCALL32_IMPL */
1771 		/*
1772 		 * we need to get the aio_cleanupq_mutex since we call
1773 		 * aio_req_done().
1774 		 */
1775 		mutex_enter(&aiop->aio_cleanupq_mutex);
1776 		mutex_enter(&aiop->aio_mutex);
1777 		reqp = aio_req_done(resultp);
1778 		mutex_exit(&aiop->aio_mutex);
1779 		mutex_exit(&aiop->aio_cleanupq_mutex);
1780 		if (reqp != NULL) {
1781 			aphysio_unlock(reqp);
1782 			aio_copyout_result(reqp);
1783 			mutex_enter(&aiop->aio_mutex);
1784 			aio_req_free(aiop, reqp);
1785 			mutex_exit(&aiop->aio_mutex);
1786 		}
1787 	}
1788 }
1789 
1790 /*
1791  * Write out the results for an aio request that is done.
1792  */
1793 static int
1794 aioerror(void *cb, int run_mode)
1795 {
1796 	aio_result_t *resultp;
1797 	aio_t *aiop;
1798 	aio_req_t *reqp;
1799 	int retval;
1800 
1801 	aiop = curproc->p_aio;
1802 	if (aiop == NULL || cb == NULL)
1803 		return (EINVAL);
1804 
1805 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1806 		if (run_mode == AIO_LARGEFILE)
1807 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1808 			    aio_resultp;
1809 		else
1810 			resultp = &((aiocb_t *)cb)->aio_resultp;
1811 	}
1812 #ifdef	_SYSCALL32_IMPL
1813 	else {
1814 		if (run_mode == AIO_LARGEFILE)
1815 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1816 			    aio_resultp;
1817 		else if (run_mode == AIO_32)
1818 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
1819 			    aio_resultp;
1820 	}
1821 #endif  /* _SYSCALL32_IMPL */
1822 	/*
1823 	 * we need to get the aio_cleanupq_mutex since we call
1824 	 * aio_req_find().
1825 	 */
1826 	mutex_enter(&aiop->aio_cleanupq_mutex);
1827 	mutex_enter(&aiop->aio_mutex);
1828 	retval = aio_req_find(resultp, &reqp);
1829 	mutex_exit(&aiop->aio_mutex);
1830 	mutex_exit(&aiop->aio_cleanupq_mutex);
1831 	if (retval == 0) {
1832 		aphysio_unlock(reqp);
1833 		aio_copyout_result(reqp);
1834 		mutex_enter(&aiop->aio_mutex);
1835 		aio_req_free(aiop, reqp);
1836 		mutex_exit(&aiop->aio_mutex);
1837 		return (0);
1838 	} else if (retval == 1)
1839 		return (EINPROGRESS);
1840 	else if (retval == 2)
1841 		return (EINVAL);
1842 	return (0);
1843 }
1844 
1845 /*
1846  * 	aio_cancel - if no requests outstanding,
1847  *			return AIO_ALLDONE
1848  *			else
1849  *			return AIO_NOTCANCELED
1850  */
1851 static int
1852 aio_cancel(
1853 	int	fildes,
1854 	void 	*cb,
1855 	long	*rval,
1856 	int	run_mode)
1857 {
1858 	aio_t *aiop;
1859 	void *resultp;
1860 	int index;
1861 	aio_req_t **bucket;
1862 	aio_req_t *ent;
1863 
1864 
1865 	/*
1866 	 * Verify valid file descriptor
1867 	 */
1868 	if ((getf(fildes)) == NULL) {
1869 		return (EBADF);
1870 	}
1871 	releasef(fildes);
1872 
1873 	aiop = curproc->p_aio;
1874 	if (aiop == NULL)
1875 		return (EINVAL);
1876 
1877 	if (aiop->aio_outstanding == 0) {
1878 		*rval = AIO_ALLDONE;
1879 		return (0);
1880 	}
1881 
1882 	mutex_enter(&aiop->aio_mutex);
1883 	if (cb != NULL) {
1884 		if (get_udatamodel() == DATAMODEL_NATIVE) {
1885 			if (run_mode == AIO_LARGEFILE)
1886 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1887 				    ->aio_resultp;
1888 			else
1889 				resultp = &((aiocb_t *)cb)->aio_resultp;
1890 		}
1891 #ifdef	_SYSCALL32_IMPL
1892 		else {
1893 			if (run_mode == AIO_LARGEFILE)
1894 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1895 				    ->aio_resultp;
1896 			else if (run_mode == AIO_32)
1897 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
1898 				    ->aio_resultp;
1899 		}
1900 #endif  /* _SYSCALL32_IMPL */
1901 		index = AIO_HASH(resultp);
1902 		bucket = &aiop->aio_hash[index];
1903 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1904 			if (ent->aio_req_resultp == resultp) {
1905 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
1906 					mutex_exit(&aiop->aio_mutex);
1907 					*rval = AIO_ALLDONE;
1908 					return (0);
1909 				}
1910 				mutex_exit(&aiop->aio_mutex);
1911 				*rval = AIO_NOTCANCELED;
1912 				return (0);
1913 			}
1914 		}
1915 		mutex_exit(&aiop->aio_mutex);
1916 		*rval = AIO_ALLDONE;
1917 		return (0);
1918 	}
1919 
1920 	for (index = 0; index < AIO_HASHSZ; index++) {
1921 		bucket = &aiop->aio_hash[index];
1922 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1923 			if (ent->aio_req_fd == fildes) {
1924 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
1925 					mutex_exit(&aiop->aio_mutex);
1926 					*rval = AIO_NOTCANCELED;
1927 					return (0);
1928 				}
1929 			}
1930 		}
1931 	}
1932 	mutex_exit(&aiop->aio_mutex);
1933 	*rval = AIO_ALLDONE;
1934 	return (0);
1935 }
1936 
1937 /*
1938  * solaris version of asynchronous read and write
1939  */
1940 static int
1941 arw(
1942 	int	opcode,
1943 	int	fdes,
1944 	char	*bufp,
1945 	int	bufsize,
1946 	offset_t	offset,
1947 	aio_result_t	*resultp,
1948 	int		mode)
1949 {
1950 	file_t		*fp;
1951 	int		error;
1952 	struct vnode	*vp;
1953 	aio_req_t	*reqp;
1954 	aio_t		*aiop;
1955 	int		(*aio_func)();
1956 #ifdef _LP64
1957 	aiocb_t		aiocb;
1958 #else
1959 	aiocb64_32_t	aiocb64;
1960 #endif
1961 
1962 	aiop = curproc->p_aio;
1963 	if (aiop == NULL)
1964 		return (EINVAL);
1965 
1966 	if ((fp = getf(fdes)) == NULL) {
1967 		return (EBADF);
1968 	}
1969 
1970 	/*
1971 	 * check the permission of the partition
1972 	 */
1973 	if ((fp->f_flag & mode) == 0) {
1974 		releasef(fdes);
1975 		return (EBADF);
1976 	}
1977 
1978 	vp = fp->f_vnode;
1979 	aio_func = check_vp(vp, mode);
1980 	if (aio_func == NULL) {
1981 		releasef(fdes);
1982 		return (EBADFD);
1983 	}
1984 #ifdef _LP64
1985 	aiocb.aio_fildes = fdes;
1986 	aiocb.aio_buf = bufp;
1987 	aiocb.aio_nbytes = bufsize;
1988 	aiocb.aio_offset = offset;
1989 	aiocb.aio_sigevent.sigev_notify = 0;
1990 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
1991 #else
1992 	aiocb64.aio_fildes = fdes;
1993 	aiocb64.aio_buf = (caddr32_t)bufp;
1994 	aiocb64.aio_nbytes = bufsize;
1995 	aiocb64.aio_offset = offset;
1996 	aiocb64.aio_sigevent.sigev_notify = 0;
1997 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
1998 #endif
1999 	if (error) {
2000 		releasef(fdes);
2001 		return (error);
2002 	}
2003 
2004 	/*
2005 	 * enable polling on this request if the opcode has
2006 	 * the AIO poll bit set
2007 	 */
2008 	if (opcode & AIO_POLL_BIT)
2009 		reqp->aio_req_flags |= AIO_POLL;
2010 
2011 	if (bufsize == 0) {
2012 		clear_active_fd(fdes);
2013 		aio_zerolen(reqp);
2014 		return (0);
2015 	}
2016 	/*
2017 	 * send the request to driver.
2018 	 */
2019 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2020 	/*
2021 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
2022 	 * is released by the aio_cleanup_thread() when the IO has
2023 	 * completed.
2024 	 */
2025 	if (error) {
2026 		releasef(fdes);
2027 		mutex_enter(&aiop->aio_mutex);
2028 		aio_req_free(aiop, reqp);
2029 		aiop->aio_pending--;
2030 		if (aiop->aio_flags & AIO_REQ_BLOCK)
2031 			cv_signal(&aiop->aio_cleanupcv);
2032 		mutex_exit(&aiop->aio_mutex);
2033 		return (error);
2034 	}
2035 	clear_active_fd(fdes);
2036 	return (0);
2037 }
2038 
2039 /*
2040  * posix version of asynchronous read and write
2041  */
2042 static int
2043 aiorw(
2044 	int		opcode,
2045 	void		*aiocb_arg,
2046 	int		mode,
2047 	int		run_mode)
2048 {
2049 #ifdef _SYSCALL32_IMPL
2050 	aiocb32_t	aiocb32;
2051 	struct	sigevent32 *sigev32;
2052 	port_notify32_t	pntfy32;
2053 #endif
2054 	aiocb64_32_t	aiocb64;
2055 	aiocb_t		aiocb;
2056 	file_t		*fp;
2057 	int		error, fd;
2058 	size_t		bufsize;
2059 	struct vnode	*vp;
2060 	aio_req_t	*reqp;
2061 	aio_t		*aiop;
2062 	int		(*aio_func)();
2063 	aio_result_t	*resultp;
2064 	struct	sigevent *sigev;
2065 	model_t		model;
2066 	int		aio_use_port = 0;
2067 	port_notify_t	pntfy;
2068 
2069 	model = get_udatamodel();
2070 	aiop = curproc->p_aio;
2071 	if (aiop == NULL)
2072 		return (EINVAL);
2073 
2074 	if (model == DATAMODEL_NATIVE) {
2075 		if (run_mode != AIO_LARGEFILE) {
2076 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
2077 				return (EFAULT);
2078 			bufsize = aiocb.aio_nbytes;
2079 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
2080 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
2081 				return (EBADF);
2082 			}
2083 			sigev = &aiocb.aio_sigevent;
2084 		} else {
2085 			/*
2086 			 * We come here only when we make largefile
2087 			 * call on 32 bit kernel using 32 bit library.
2088 			 */
2089 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2090 				return (EFAULT);
2091 			bufsize = aiocb64.aio_nbytes;
2092 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2093 			    ->aio_resultp);
2094 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2095 				return (EBADF);
2096 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
2097 		}
2098 
2099 		if (sigev->sigev_notify == SIGEV_PORT) {
2100 			if (copyin((void *)sigev->sigev_value.sival_ptr,
2101 			    &pntfy, sizeof (port_notify_t))) {
2102 				releasef(fd);
2103 				return (EFAULT);
2104 			}
2105 			aio_use_port = 1;
2106 		} else if (sigev->sigev_notify == SIGEV_THREAD) {
2107 			pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
2108 			pntfy.portnfy_user =
2109 			    aiocb.aio_sigevent.sigev_value.sival_ptr;
2110 			aio_use_port = 1;
2111 		}
2112 	}
2113 #ifdef	_SYSCALL32_IMPL
2114 	else {
2115 		if (run_mode == AIO_32) {
2116 			/* 32 bit system call is being made on 64 bit kernel */
2117 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
2118 				return (EFAULT);
2119 
2120 			bufsize = aiocb32.aio_nbytes;
2121 			aiocb_32ton(&aiocb32, &aiocb);
2122 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
2123 			    aio_resultp);
2124 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
2125 				return (EBADF);
2126 			}
2127 			sigev32 = &aiocb32.aio_sigevent;
2128 		} else if (run_mode == AIO_LARGEFILE) {
2129 			/*
2130 			 * We come here only when we make largefile
2131 			 * call on 64 bit kernel using 32 bit library.
2132 			 */
2133 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2134 				return (EFAULT);
2135 			bufsize = aiocb64.aio_nbytes;
2136 			aiocb_LFton(&aiocb64, &aiocb);
2137 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2138 			    ->aio_resultp);
2139 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2140 				return (EBADF);
2141 			sigev32 = &aiocb64.aio_sigevent;
2142 		}
2143 
2144 		if (sigev32->sigev_notify == SIGEV_PORT) {
2145 			if (copyin(
2146 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
2147 			    &pntfy32, sizeof (port_notify32_t))) {
2148 				releasef(fd);
2149 				return (EFAULT);
2150 			}
2151 			pntfy.portnfy_port = pntfy32.portnfy_port;
2152 			pntfy.portnfy_user = (void *)(uintptr_t)
2153 			    pntfy32.portnfy_user;
2154 			aio_use_port = 1;
2155 		} else if (sigev32->sigev_notify == SIGEV_THREAD) {
2156 			pntfy.portnfy_port = sigev32->sigev_signo;
2157 			pntfy.portnfy_user = (void *)(uintptr_t)
2158 			    sigev32->sigev_value.sival_ptr;
2159 			aio_use_port = 1;
2160 		}
2161 	}
2162 #endif  /* _SYSCALL32_IMPL */
2163 
2164 	/*
2165 	 * check the permission of the partition
2166 	 */
2167 
2168 	if ((fp->f_flag & mode) == 0) {
2169 		releasef(fd);
2170 		return (EBADF);
2171 	}
2172 
2173 	vp = fp->f_vnode;
2174 	aio_func = check_vp(vp, mode);
2175 	if (aio_func == NULL) {
2176 		releasef(fd);
2177 		return (EBADFD);
2178 	}
2179 	if (run_mode == AIO_LARGEFILE)
2180 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
2181 	else
2182 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
2183 
2184 	if (error) {
2185 		releasef(fd);
2186 		return (error);
2187 	}
2188 	/*
2189 	 * enable polling on this request if the opcode has
2190 	 * the AIO poll bit set
2191 	 */
2192 	if (opcode & AIO_POLL_BIT)
2193 		reqp->aio_req_flags |= AIO_POLL;
2194 
2195 	if (model == DATAMODEL_NATIVE)
2196 		reqp->aio_req_iocb.iocb = aiocb_arg;
2197 #ifdef  _SYSCALL32_IMPL
2198 	else
2199 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
2200 #endif
2201 
2202 	if (aio_use_port) {
2203 		int event = (run_mode == AIO_LARGEFILE)?
2204 		    ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
2205 		    ((mode == FREAD)? AIOAREAD : AIOAWRITE);
2206 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
2207 	}
2208 
2209 	/*
2210 	 * send the request to driver.
2211 	 */
2212 	if (error == 0) {
2213 		if (bufsize == 0) {
2214 			clear_active_fd(fd);
2215 			aio_zerolen(reqp);
2216 			return (0);
2217 		}
2218 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2219 	}
2220 
2221 	/*
2222 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
2223 	 * is released by the aio_cleanup_thread() when the IO has
2224 	 * completed.
2225 	 */
2226 	if (error) {
2227 		releasef(fd);
2228 		mutex_enter(&aiop->aio_mutex);
2229 		aio_deq(&aiop->aio_portpending, reqp);
2230 		aio_req_free(aiop, reqp);
2231 		aiop->aio_pending--;
2232 		if (aiop->aio_flags & AIO_REQ_BLOCK)
2233 			cv_signal(&aiop->aio_cleanupcv);
2234 		mutex_exit(&aiop->aio_mutex);
2235 		return (error);
2236 	}
2237 	clear_active_fd(fd);
2238 	return (0);
2239 }
2240 
2241 
2242 /*
2243  * set error for a list IO entry that failed.
2244  */
2245 static void
2246 lio_set_error(aio_req_t *reqp)
2247 {
2248 	aio_t *aiop = curproc->p_aio;
2249 
2250 	if (aiop == NULL)
2251 		return;
2252 
2253 	mutex_enter(&aiop->aio_mutex);
2254 	aio_deq(&aiop->aio_portpending, reqp);
2255 	aiop->aio_pending--;
2256 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2257 	reqp->aio_req_flags |= AIO_PHYSIODONE;
2258 	/*
2259 	 * Need to free the request now as its never
2260 	 * going to get on the done queue
2261 	 *
2262 	 * Note: aio_outstanding is decremented in
2263 	 *	 aio_req_free()
2264 	 */
2265 	aio_req_free(aiop, reqp);
2266 	if (aiop->aio_flags & AIO_REQ_BLOCK)
2267 		cv_signal(&aiop->aio_cleanupcv);
2268 	mutex_exit(&aiop->aio_mutex);
2269 }
2270 
2271 /*
2272  * check if a specified request is done, and remove it from
2273  * the done queue. otherwise remove anybody from the done queue
2274  * if NULL is specified.
2275  */
2276 static aio_req_t *
2277 aio_req_done(void *resultp)
2278 {
2279 	aio_req_t **bucket;
2280 	aio_req_t *ent;
2281 	aio_t *aiop = curproc->p_aio;
2282 	long index;
2283 
2284 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2285 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2286 
2287 	if (resultp) {
2288 		index = AIO_HASH(resultp);
2289 		bucket = &aiop->aio_hash[index];
2290 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2291 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
2292 				if (ent->aio_req_flags & AIO_DONEQ) {
2293 					return (aio_req_remove(ent));
2294 				}
2295 				return (NULL);
2296 			}
2297 		}
2298 		/* no match, resultp is invalid */
2299 		return (NULL);
2300 	}
2301 	return (aio_req_remove(NULL));
2302 }
2303 
2304 /*
2305  * determine if a user-level resultp pointer is associated with an
2306  * active IO request. Zero is returned when the request is done,
2307  * and the request is removed from the done queue. Only when the
2308  * return value is zero, is the "reqp" pointer valid. One is returned
2309  * when the request is inprogress. Two is returned when the request
2310  * is invalid.
2311  */
2312 static int
2313 aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
2314 {
2315 	aio_req_t **bucket;
2316 	aio_req_t *ent;
2317 	aio_t *aiop = curproc->p_aio;
2318 	long index;
2319 
2320 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2321 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2322 
2323 	index = AIO_HASH(resultp);
2324 	bucket = &aiop->aio_hash[index];
2325 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2326 		if (ent->aio_req_resultp == resultp) {
2327 			if (ent->aio_req_flags & AIO_DONEQ) {
2328 				*reqp = aio_req_remove(ent);
2329 				return (0);
2330 			}
2331 			return (1);
2332 		}
2333 	}
2334 	/* no match, resultp is invalid */
2335 	return (2);
2336 }
2337 
2338 /*
2339  * remove a request from the done queue.
2340  */
2341 static aio_req_t *
2342 aio_req_remove(aio_req_t *reqp)
2343 {
2344 	aio_t *aiop = curproc->p_aio;
2345 
2346 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2347 
2348 	if (reqp != NULL) {
2349 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2350 		if (reqp->aio_req_next == reqp) {
2351 			/* only one request on queue */
2352 			if (reqp ==  aiop->aio_doneq) {
2353 				aiop->aio_doneq = NULL;
2354 			} else {
2355 				ASSERT(reqp == aiop->aio_cleanupq);
2356 				aiop->aio_cleanupq = NULL;
2357 			}
2358 		} else {
2359 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2360 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2361 			/*
2362 			 * The request can be either on the aio_doneq or the
2363 			 * aio_cleanupq
2364 			 */
2365 			if (reqp == aiop->aio_doneq)
2366 				aiop->aio_doneq = reqp->aio_req_next;
2367 
2368 			if (reqp == aiop->aio_cleanupq)
2369 				aiop->aio_cleanupq = reqp->aio_req_next;
2370 		}
2371 		reqp->aio_req_flags &= ~AIO_DONEQ;
2372 		reqp->aio_req_next = NULL;
2373 		reqp->aio_req_prev = NULL;
2374 	} else if ((reqp = aiop->aio_doneq) != NULL) {
2375 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2376 		if (reqp == reqp->aio_req_next) {
2377 			/* only one request on queue */
2378 			aiop->aio_doneq = NULL;
2379 		} else {
2380 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2381 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2382 			aiop->aio_doneq = reqp->aio_req_next;
2383 		}
2384 		reqp->aio_req_flags &= ~AIO_DONEQ;
2385 		reqp->aio_req_next = NULL;
2386 		reqp->aio_req_prev = NULL;
2387 	}
2388 	if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
2389 		cv_broadcast(&aiop->aio_waitcv);
2390 	return (reqp);
2391 }
2392 
2393 static int
2394 aio_req_setup(
2395 	aio_req_t	**reqpp,
2396 	aio_t 		*aiop,
2397 	aiocb_t 	*arg,
2398 	aio_result_t 	*resultp,
2399 	vnode_t		*vp)
2400 {
2401 	sigqueue_t	*sqp = NULL;
2402 	aio_req_t 	*reqp;
2403 	struct uio 	*uio;
2404 	struct sigevent *sigev;
2405 	int		error;
2406 
2407 	sigev = &arg->aio_sigevent;
2408 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
2409 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
2410 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2411 		if (sqp == NULL)
2412 			return (EAGAIN);
2413 		sqp->sq_func = NULL;
2414 		sqp->sq_next = NULL;
2415 		sqp->sq_info.si_code = SI_ASYNCIO;
2416 		sqp->sq_info.si_pid = curproc->p_pid;
2417 		sqp->sq_info.si_ctid = PRCTID(curproc);
2418 		sqp->sq_info.si_zoneid = getzoneid();
2419 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
2420 		sqp->sq_info.si_signo = sigev->sigev_signo;
2421 		sqp->sq_info.si_value = sigev->sigev_value;
2422 	}
2423 
2424 	mutex_enter(&aiop->aio_mutex);
2425 
2426 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
2427 		mutex_exit(&aiop->aio_mutex);
2428 		if (sqp)
2429 			kmem_free(sqp, sizeof (sigqueue_t));
2430 		return (EIO);
2431 	}
2432 	/*
2433 	 * get an aio_reqp from the free list or allocate one
2434 	 * from dynamic memory.
2435 	 */
2436 	if (error = aio_req_alloc(&reqp, resultp)) {
2437 		mutex_exit(&aiop->aio_mutex);
2438 		if (sqp)
2439 			kmem_free(sqp, sizeof (sigqueue_t));
2440 		return (error);
2441 	}
2442 	aiop->aio_pending++;
2443 	aiop->aio_outstanding++;
2444 	reqp->aio_req_flags = AIO_PENDING;
2445 	if (sigev->sigev_notify == SIGEV_THREAD ||
2446 	    sigev->sigev_notify == SIGEV_PORT)
2447 		aio_enq(&aiop->aio_portpending, reqp, 0);
2448 	mutex_exit(&aiop->aio_mutex);
2449 	/*
2450 	 * initialize aio request.
2451 	 */
2452 	reqp->aio_req_fd = arg->aio_fildes;
2453 	reqp->aio_req_sigqp = sqp;
2454 	reqp->aio_req_iocb.iocb = NULL;
2455 	reqp->aio_req_lio = NULL;
2456 	reqp->aio_req_buf.b_file = vp;
2457 	uio = reqp->aio_req.aio_uio;
2458 	uio->uio_iovcnt = 1;
2459 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
2460 	uio->uio_iov->iov_len = arg->aio_nbytes;
2461 	uio->uio_loffset = arg->aio_offset;
2462 	*reqpp = reqp;
2463 	return (0);
2464 }
2465 
2466 /*
2467  * Allocate p_aio struct.
2468  */
2469 static aio_t *
2470 aio_aiop_alloc(void)
2471 {
2472 	aio_t	*aiop;
2473 
2474 	ASSERT(MUTEX_HELD(&curproc->p_lock));
2475 
2476 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
2477 	if (aiop) {
2478 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
2479 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
2480 									NULL);
2481 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
2482 	}
2483 	return (aiop);
2484 }
2485 
2486 /*
2487  * Allocate an aio_req struct.
2488  */
2489 static int
2490 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
2491 {
2492 	aio_req_t *reqp;
2493 	aio_t *aiop = curproc->p_aio;
2494 
2495 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2496 
2497 	if ((reqp = aiop->aio_free) != NULL) {
2498 		aiop->aio_free = reqp->aio_req_next;
2499 		bzero(reqp, sizeof (*reqp));
2500 	} else {
2501 		/*
2502 		 * Check whether memory is getting tight.
2503 		 * This is a temporary mechanism to avoid memory
2504 		 * exhaustion by a single process until we come up
2505 		 * with a per process solution such as setrlimit().
2506 		 */
2507 		if (freemem < desfree)
2508 			return (EAGAIN);
2509 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
2510 		if (reqp == NULL)
2511 			return (EAGAIN);
2512 	}
2513 	reqp->aio_req.aio_uio = &reqp->aio_req_uio;
2514 	reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
2515 	reqp->aio_req.aio_private = reqp;
2516 	reqp->aio_req_buf.b_offset = -1;
2517 	reqp->aio_req_resultp = resultp;
2518 	if (aio_hash_insert(reqp, aiop)) {
2519 		reqp->aio_req_next = aiop->aio_free;
2520 		aiop->aio_free = reqp;
2521 		return (EINVAL);
2522 	}
2523 	*nreqp = reqp;
2524 	return (0);
2525 }
2526 
2527 /*
2528  * Allocate an aio_lio_t struct.
2529  */
2530 static int
2531 aio_lio_alloc(aio_lio_t **head)
2532 {
2533 	aio_lio_t *liop;
2534 	aio_t *aiop = curproc->p_aio;
2535 
2536 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2537 
2538 	if ((liop = aiop->aio_lio_free) != NULL) {
2539 		aiop->aio_lio_free = liop->lio_next;
2540 	} else {
2541 		/*
2542 		 * Check whether memory is getting tight.
2543 		 * This is a temporary mechanism to avoid memory
2544 		 * exhaustion by a single process until we come up
2545 		 * with a per process solution such as setrlimit().
2546 		 */
2547 		if (freemem < desfree)
2548 			return (EAGAIN);
2549 
2550 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
2551 		if (liop == NULL)
2552 			return (EAGAIN);
2553 	}
2554 	*head = liop;
2555 	return (0);
2556 }
2557 
2558 /*
2559  * this is a special per-process thread that is only activated if
2560  * the process is unmapping a segment with outstanding aio. normally,
2561  * the process will have completed the aio before unmapping the
2562  * segment. If the process does unmap a segment with outstanding aio,
2563  * this special thread will guarentee that the locked pages due to
2564  * aphysio() are released, thereby permitting the segment to be
2565  * unmapped. In addition to this, the cleanup thread is woken up
2566  * during DR operations to release the locked pages.
2567  */
2568 
2569 static int
2570 aio_cleanup_thread(aio_t *aiop)
2571 {
2572 	proc_t *p = curproc;
2573 	struct as *as = p->p_as;
2574 	int poked = 0;
2575 	kcondvar_t *cvp;
2576 	int exit_flag = 0;
2577 	int rqclnup = 0;
2578 
2579 	sigfillset(&curthread->t_hold);
2580 	sigdiffset(&curthread->t_hold, &cantmask);
2581 	for (;;) {
2582 		/*
2583 		 * if a segment is being unmapped, and the current
2584 		 * process's done queue is not empty, then every request
2585 		 * on the doneq with locked resources should be forced
2586 		 * to release their locks. By moving the doneq request
2587 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
2588 		 * and place requests back onto the doneq. All requests
2589 		 * processed by aio_cleanup() will have their physical
2590 		 * resources unlocked.
2591 		 */
2592 		mutex_enter(&aiop->aio_mutex);
2593 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
2594 			aiop->aio_flags |= AIO_CLEANUP;
2595 			mutex_enter(&as->a_contents);
2596 			if (aiop->aio_rqclnup) {
2597 				aiop->aio_rqclnup = 0;
2598 				rqclnup = 1;
2599 			}
2600 
2601 			if ((rqclnup || AS_ISUNMAPWAIT(as)) &&
2602 			    aiop->aio_doneq) {
2603 				aio_req_t *doneqhead = aiop->aio_doneq;
2604 				mutex_exit(&as->a_contents);
2605 				aiop->aio_doneq = NULL;
2606 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
2607 			} else {
2608 				mutex_exit(&as->a_contents);
2609 			}
2610 		}
2611 		mutex_exit(&aiop->aio_mutex);
2612 		aio_cleanup(AIO_CLEANUP_THREAD);
2613 		/*
2614 		 * thread should block on the cleanupcv while
2615 		 * AIO_CLEANUP is set.
2616 		 */
2617 		cvp = &aiop->aio_cleanupcv;
2618 		mutex_enter(&aiop->aio_mutex);
2619 
2620 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
2621 		    aiop->aio_notifyq != NULL ||
2622 		    aiop->aio_portcleanupq != NULL) {
2623 			mutex_exit(&aiop->aio_mutex);
2624 			continue;
2625 		}
2626 		mutex_enter(&as->a_contents);
2627 
2628 		/*
2629 		 * AIO_CLEANUP determines when the cleanup thread
2630 		 * should be active. This flag is set when
2631 		 * the cleanup thread is awakened by as_unmap() or
2632 		 * due to DR operations.
2633 		 * The flag is cleared when the blocking as_unmap()
2634 		 * that originally awakened us is allowed to
2635 		 * complete. as_unmap() blocks when trying to
2636 		 * unmap a segment that has SOFTLOCKed pages. when
2637 		 * the segment's pages are all SOFTUNLOCKed,
2638 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2639 		 *
2640 		 * In case of cleanup request by DR, the flag is cleared
2641 		 * once all the pending aio requests have been processed.
2642 		 *
2643 		 * The flag shouldn't be cleared right away if the
2644 		 * cleanup thread was interrupted because the process
2645 		 * is doing forkall(). This happens when cv_wait_sig()
2646 		 * returns zero, because it was awakened by a pokelwps().
2647 		 * If the process is not exiting, it must be doing forkall().
2648 		 */
2649 		if ((poked == 0) &&
2650 			((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2651 					(aiop->aio_pending == 0))) {
2652 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
2653 			cvp = &as->a_cv;
2654 			rqclnup = 0;
2655 		}
2656 		mutex_exit(&aiop->aio_mutex);
2657 		if (poked) {
2658 			/*
2659 			 * If the process is exiting/killed, don't return
2660 			 * immediately without waiting for pending I/O's
2661 			 * and releasing the page locks.
2662 			 */
2663 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
2664 				/*
2665 				 * If exit_flag is set, then it is
2666 				 * safe to exit because we have released
2667 				 * page locks of completed I/O's.
2668 				 */
2669 				if (exit_flag)
2670 					break;
2671 
2672 				mutex_exit(&as->a_contents);
2673 
2674 				/*
2675 				 * Wait for all the pending aio to complete.
2676 				 */
2677 				mutex_enter(&aiop->aio_mutex);
2678 				aiop->aio_flags |= AIO_REQ_BLOCK;
2679 				while (aiop->aio_pending != 0)
2680 					cv_wait(&aiop->aio_cleanupcv,
2681 						&aiop->aio_mutex);
2682 				mutex_exit(&aiop->aio_mutex);
2683 				exit_flag = 1;
2684 				continue;
2685 			} else if (p->p_flag &
2686 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
2687 				/*
2688 				 * hold LWP until it
2689 				 * is continued.
2690 				 */
2691 				mutex_exit(&as->a_contents);
2692 				mutex_enter(&p->p_lock);
2693 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
2694 				mutex_exit(&p->p_lock);
2695 				poked = 0;
2696 				continue;
2697 			}
2698 		} else {
2699 			/*
2700 			 * When started this thread will sleep on as->a_cv.
2701 			 * as_unmap will awake this thread if the
2702 			 * segment has SOFTLOCKed pages (poked = 0).
2703 			 * 1. pokelwps() awakes this thread =>
2704 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
2705 			 * 2. as_unmap awakes this thread =>
2706 			 *    to break the loop it is necessary that
2707 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
2708 			 *	memory to be unlocked)
2709 			 *    - AIO_CLEANUP is not set
2710 			 *	(if AIO_CLEANUP is set we have to wait for
2711 			 *	pending requests. aio_done will send a signal
2712 			 *	for every request which completes to continue
2713 			 *	unmapping the corresponding address range)
2714 			 * 3. A cleanup request will wake this thread up, ex.
2715 			 *    by the DR operations. The aio_rqclnup flag will
2716 			 *    be set.
2717 			 */
2718 			while (poked == 0) {
2719 				/*
2720 				 * we need to handle cleanup requests
2721 				 * that come in after we had just cleaned up,
2722 				 * so that we do cleanup of any new aio
2723 				 * requests that got completed and have
2724 				 * locked resources.
2725 				 */
2726 				if ((aiop->aio_rqclnup ||
2727 					(AS_ISUNMAPWAIT(as) != 0)) &&
2728 					(aiop->aio_flags & AIO_CLEANUP) == 0)
2729 					break;
2730 				poked = !cv_wait_sig(cvp, &as->a_contents);
2731 				if (AS_ISUNMAPWAIT(as) == 0)
2732 					cv_signal(cvp);
2733 				if (aiop->aio_outstanding != 0)
2734 					break;
2735 			}
2736 		}
2737 		mutex_exit(&as->a_contents);
2738 	}
2739 exit:
2740 	mutex_exit(&as->a_contents);
2741 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
2742 	aston(curthread);	/* make thread do post_syscall */
2743 	return (0);
2744 }
2745 
2746 /*
2747  * save a reference to a user's outstanding aio in a hash list.
2748  */
2749 static int
2750 aio_hash_insert(
2751 	aio_req_t *aio_reqp,
2752 	aio_t *aiop)
2753 {
2754 	long index;
2755 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
2756 	aio_req_t *current;
2757 	aio_req_t **nextp;
2758 
2759 	index = AIO_HASH(resultp);
2760 	nextp = &aiop->aio_hash[index];
2761 	while ((current = *nextp) != NULL) {
2762 		if (current->aio_req_resultp == resultp)
2763 			return (DUPLICATE);
2764 		nextp = &current->aio_hash_next;
2765 	}
2766 	*nextp = aio_reqp;
2767 	aio_reqp->aio_hash_next = NULL;
2768 	return (0);
2769 }
2770 
2771 static int
2772 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
2773     cred_t *)
2774 {
2775 	struct snode *sp;
2776 	dev_t		dev;
2777 	struct cb_ops  	*cb;
2778 	major_t		major;
2779 	int		(*aio_func)();
2780 
2781 	dev = vp->v_rdev;
2782 	major = getmajor(dev);
2783 
2784 	/*
2785 	 * return NULL for requests to files and STREAMs so
2786 	 * that libaio takes care of them.
2787 	 */
2788 	if (vp->v_type == VCHR) {
2789 		/* no stream device for kaio */
2790 		if (STREAMSTAB(major)) {
2791 			return (NULL);
2792 		}
2793 	} else {
2794 		return (NULL);
2795 	}
2796 
2797 	/*
2798 	 * Check old drivers which do not have async I/O entry points.
2799 	 */
2800 	if (devopsp[major]->devo_rev < 3)
2801 		return (NULL);
2802 
2803 	cb = devopsp[major]->devo_cb_ops;
2804 
2805 	if (cb->cb_rev < 1)
2806 		return (NULL);
2807 
2808 	/*
2809 	 * Check whether this device is a block device.
2810 	 * Kaio is not supported for devices like tty.
2811 	 */
2812 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
2813 		return (NULL);
2814 
2815 	/*
2816 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
2817 	 * We cannot call the driver directly. Instead return the
2818 	 * PXFS functions.
2819 	 */
2820 
2821 	if (IS_PXFSVP(vp)) {
2822 		if (mode & FREAD)
2823 			return (clpxfs_aio_read);
2824 		else
2825 			return (clpxfs_aio_write);
2826 	}
2827 	if (mode & FREAD)
2828 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
2829 	else
2830 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
2831 
2832 	/*
2833 	 * Do we need this ?
2834 	 * nodev returns ENXIO anyway.
2835 	 */
2836 	if (aio_func == nodev)
2837 		return (NULL);
2838 
2839 	sp = VTOS(vp);
2840 	smark(sp, SACC);
2841 	return (aio_func);
2842 }
2843 
2844 /*
2845  * Clustering: We want check_vp to return a function prototyped
2846  * correctly that will be common to both PXFS and regular case.
2847  * We define this intermediate function that will do the right
2848  * thing for driver cases.
2849  */
2850 
2851 static int
2852 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2853 {
2854 	dev_t dev;
2855 	struct cb_ops  	*cb;
2856 
2857 	ASSERT(vp->v_type == VCHR);
2858 	ASSERT(!IS_PXFSVP(vp));
2859 	dev = VTOS(vp)->s_dev;
2860 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
2861 
2862 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
2863 
2864 	ASSERT(cb->cb_awrite != nodev);
2865 	return ((*cb->cb_awrite)(dev, aio, cred_p));
2866 }
2867 
2868 /*
2869  * Clustering: We want check_vp to return a function prototyped
2870  * correctly that will be common to both PXFS and regular case.
2871  * We define this intermediate function that will do the right
2872  * thing for driver cases.
2873  */
2874 
2875 static int
2876 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2877 {
2878 	dev_t dev;
2879 	struct cb_ops  	*cb;
2880 
2881 	ASSERT(vp->v_type == VCHR);
2882 	ASSERT(!IS_PXFSVP(vp));
2883 	dev = VTOS(vp)->s_dev;
2884 	ASSERT(!STREAMSTAB(getmajor(dev)));
2885 
2886 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
2887 
2888 	ASSERT(cb->cb_aread != nodev);
2889 	return ((*cb->cb_aread)(dev, aio, cred_p));
2890 }
2891 
2892 /*
2893  * This routine is called when a largefile call is made by a 32bit
2894  * process on a ILP32 or LP64 kernel. All 64bit processes are large
2895  * file by definition and will call alio() instead.
2896  */
2897 static int
2898 alioLF(
2899 	int		mode_arg,
2900 	void		*aiocb_arg,
2901 	int		nent,
2902 	void		*sigev)
2903 {
2904 	file_t		*fp;
2905 	file_t		*prev_fp = NULL;
2906 	int		prev_mode = -1;
2907 	struct vnode	*vp;
2908 	aio_lio_t	*head;
2909 	aio_req_t	*reqp;
2910 	aio_t		*aiop;
2911 	caddr_t		cbplist;
2912 	aiocb64_32_t	cb64;
2913 	aiocb64_32_t	*aiocb = &cb64;
2914 	aiocb64_32_t	*cbp;
2915 	caddr32_t	*ucbp;
2916 #ifdef _LP64
2917 	aiocb_t		aiocb_n;
2918 #endif
2919 	struct sigevent32	sigevk;
2920 	sigqueue_t	*sqp;
2921 	int		(*aio_func)();
2922 	int		mode;
2923 	int		error = 0;
2924 	int		aio_errors = 0;
2925 	int		i;
2926 	size_t		ssize;
2927 	int		deadhead = 0;
2928 	int		aio_notsupported = 0;
2929 	int		lio_head_port;
2930 	int		aio_port;
2931 	int		aio_thread;
2932 	port_kevent_t	*pkevtp = NULL;
2933 	port_notify32_t	pnotify;
2934 	int		event;
2935 
2936 	aiop = curproc->p_aio;
2937 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
2938 		return (EINVAL);
2939 
2940 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
2941 
2942 	ssize = (sizeof (caddr32_t) * nent);
2943 	cbplist = kmem_alloc(ssize, KM_SLEEP);
2944 	ucbp = (caddr32_t *)cbplist;
2945 
2946 	if (copyin(aiocb_arg, cbplist, ssize) ||
2947 	    (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
2948 		kmem_free(cbplist, ssize);
2949 		return (EFAULT);
2950 	}
2951 
2952 	/* Event Ports  */
2953 	if (sigev &&
2954 	    (sigevk.sigev_notify == SIGEV_THREAD ||
2955 	    sigevk.sigev_notify == SIGEV_PORT)) {
2956 		if (sigevk.sigev_notify == SIGEV_THREAD) {
2957 			pnotify.portnfy_port = sigevk.sigev_signo;
2958 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
2959 		} else if (copyin(
2960 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
2961 		    &pnotify, sizeof (pnotify))) {
2962 			kmem_free(cbplist, ssize);
2963 			return (EFAULT);
2964 		}
2965 		error = port_alloc_event(pnotify.portnfy_port,
2966 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
2967 		if (error) {
2968 			if (error == ENOMEM || error == EAGAIN)
2969 				error = EAGAIN;
2970 			else
2971 				error = EINVAL;
2972 			kmem_free(cbplist, ssize);
2973 			return (error);
2974 		}
2975 		lio_head_port = pnotify.portnfy_port;
2976 	}
2977 
2978 	/*
2979 	 * a list head should be allocated if notification is
2980 	 * enabled for this list.
2981 	 */
2982 	head = NULL;
2983 
2984 	if (mode_arg == LIO_WAIT || sigev) {
2985 		mutex_enter(&aiop->aio_mutex);
2986 		error = aio_lio_alloc(&head);
2987 		mutex_exit(&aiop->aio_mutex);
2988 		if (error)
2989 			goto done;
2990 		deadhead = 1;
2991 		head->lio_nent = nent;
2992 		head->lio_refcnt = nent;
2993 		head->lio_port = -1;
2994 		head->lio_portkev = NULL;
2995 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
2996 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
2997 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2998 			if (sqp == NULL) {
2999 				error = EAGAIN;
3000 				goto done;
3001 			}
3002 			sqp->sq_func = NULL;
3003 			sqp->sq_next = NULL;
3004 			sqp->sq_info.si_code = SI_ASYNCIO;
3005 			sqp->sq_info.si_pid = curproc->p_pid;
3006 			sqp->sq_info.si_ctid = PRCTID(curproc);
3007 			sqp->sq_info.si_zoneid = getzoneid();
3008 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3009 			sqp->sq_info.si_signo = sigevk.sigev_signo;
3010 			sqp->sq_info.si_value.sival_int =
3011 			    sigevk.sigev_value.sival_int;
3012 			head->lio_sigqp = sqp;
3013 		} else {
3014 			head->lio_sigqp = NULL;
3015 		}
3016 		if (pkevtp) {
3017 			/*
3018 			 * Prepare data to send when list of aiocb's
3019 			 * has completed.
3020 			 */
3021 			port_init_event(pkevtp, (uintptr_t)sigev,
3022 			    (void *)(uintptr_t)pnotify.portnfy_user,
3023 			    NULL, head);
3024 			pkevtp->portkev_events = AIOLIO64;
3025 			head->lio_portkev = pkevtp;
3026 			head->lio_port = pnotify.portnfy_port;
3027 		}
3028 	}
3029 
3030 	for (i = 0; i < nent; i++, ucbp++) {
3031 
3032 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
3033 		/* skip entry if it can't be copied. */
3034 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
3035 			if (head) {
3036 				mutex_enter(&aiop->aio_mutex);
3037 				head->lio_nent--;
3038 				head->lio_refcnt--;
3039 				mutex_exit(&aiop->aio_mutex);
3040 			}
3041 			continue;
3042 		}
3043 
3044 		/* skip if opcode for aiocb is LIO_NOP */
3045 		mode = aiocb->aio_lio_opcode;
3046 		if (mode == LIO_NOP) {
3047 			cbp = NULL;
3048 			if (head) {
3049 				mutex_enter(&aiop->aio_mutex);
3050 				head->lio_nent--;
3051 				head->lio_refcnt--;
3052 				mutex_exit(&aiop->aio_mutex);
3053 			}
3054 			continue;
3055 		}
3056 
3057 		/* increment file descriptor's ref count. */
3058 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3059 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3060 			if (head) {
3061 				mutex_enter(&aiop->aio_mutex);
3062 				head->lio_nent--;
3063 				head->lio_refcnt--;
3064 				mutex_exit(&aiop->aio_mutex);
3065 			}
3066 			aio_errors++;
3067 			continue;
3068 		}
3069 
3070 		/*
3071 		 * check the permission of the partition
3072 		 */
3073 		if ((fp->f_flag & mode) == 0) {
3074 			releasef(aiocb->aio_fildes);
3075 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3076 			if (head) {
3077 				mutex_enter(&aiop->aio_mutex);
3078 				head->lio_nent--;
3079 				head->lio_refcnt--;
3080 				mutex_exit(&aiop->aio_mutex);
3081 			}
3082 			aio_errors++;
3083 			continue;
3084 		}
3085 
3086 		/*
3087 		 * common case where requests are to the same fd
3088 		 * for the same r/w operation
3089 		 * for UFS, need to set EBADFD
3090 		 */
3091 		vp = fp->f_vnode;
3092 		if (fp != prev_fp || mode != prev_mode) {
3093 			aio_func = check_vp(vp, mode);
3094 			if (aio_func == NULL) {
3095 				prev_fp = NULL;
3096 				releasef(aiocb->aio_fildes);
3097 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
3098 				aio_notsupported++;
3099 				if (head) {
3100 					mutex_enter(&aiop->aio_mutex);
3101 					head->lio_nent--;
3102 					head->lio_refcnt--;
3103 					mutex_exit(&aiop->aio_mutex);
3104 				}
3105 				continue;
3106 			} else {
3107 				prev_fp = fp;
3108 				prev_mode = mode;
3109 			}
3110 		}
3111 
3112 #ifdef	_LP64
3113 		aiocb_LFton(aiocb, &aiocb_n);
3114 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
3115 		    (aio_result_t *)&cbp->aio_resultp, vp);
3116 #else
3117 		error = aio_req_setupLF(&reqp, aiop, aiocb,
3118 		    (aio_result_t *)&cbp->aio_resultp, vp);
3119 #endif  /* _LP64 */
3120 		if (error) {
3121 			releasef(aiocb->aio_fildes);
3122 			lio_set_uerror(&cbp->aio_resultp, error);
3123 			if (head) {
3124 				mutex_enter(&aiop->aio_mutex);
3125 				head->lio_nent--;
3126 				head->lio_refcnt--;
3127 				mutex_exit(&aiop->aio_mutex);
3128 			}
3129 			aio_errors++;
3130 			continue;
3131 		}
3132 
3133 		reqp->aio_req_lio = head;
3134 		deadhead = 0;
3135 
3136 		/*
3137 		 * Set the errno field now before sending the request to
3138 		 * the driver to avoid a race condition
3139 		 */
3140 		(void) suword32(&cbp->aio_resultp.aio_errno,
3141 		    EINPROGRESS);
3142 
3143 		reqp->aio_req_iocb.iocb32 = *ucbp;
3144 
3145 		event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
3146 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3147 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3148 		if (aio_port | aio_thread) {
3149 			port_kevent_t *lpkevp;
3150 			/*
3151 			 * Prepare data to send with each aiocb completed.
3152 			 */
3153 			if (aio_port) {
3154 				void *paddr = (void *)(uintptr_t)
3155 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3156 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3157 					error = EFAULT;
3158 			} else {	/* aio_thread */
3159 				pnotify.portnfy_port =
3160 				    aiocb->aio_sigevent.sigev_signo;
3161 				pnotify.portnfy_user =
3162 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3163 			}
3164 			if (error)
3165 				/* EMPTY */;
3166 			else if (pkevtp != NULL &&
3167 			    pnotify.portnfy_port == lio_head_port)
3168 				error = port_dup_event(pkevtp, &lpkevp,
3169 				    PORT_ALLOC_DEFAULT);
3170 			else
3171 				error = port_alloc_event(pnotify.portnfy_port,
3172 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3173 				    &lpkevp);
3174 			if (error == 0) {
3175 				port_init_event(lpkevp, (uintptr_t)*ucbp,
3176 				    (void *)(uintptr_t)pnotify.portnfy_user,
3177 				    aio_port_callback, reqp);
3178 				lpkevp->portkev_events = event;
3179 				reqp->aio_req_portkev = lpkevp;
3180 				reqp->aio_req_port = pnotify.portnfy_port;
3181 			}
3182 		}
3183 
3184 		/*
3185 		 * send the request to driver.
3186 		 */
3187 		if (error == 0) {
3188 			if (aiocb->aio_nbytes == 0) {
3189 				clear_active_fd(aiocb->aio_fildes);
3190 				aio_zerolen(reqp);
3191 				continue;
3192 			}
3193 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3194 			    CRED());
3195 		}
3196 
3197 		/*
3198 		 * the fd's ref count is not decremented until the IO has
3199 		 * completed unless there was an error.
3200 		 */
3201 		if (error) {
3202 			releasef(aiocb->aio_fildes);
3203 			lio_set_uerror(&cbp->aio_resultp, error);
3204 			if (head) {
3205 				mutex_enter(&aiop->aio_mutex);
3206 				head->lio_nent--;
3207 				head->lio_refcnt--;
3208 				mutex_exit(&aiop->aio_mutex);
3209 			}
3210 			if (error == ENOTSUP)
3211 				aio_notsupported++;
3212 			else
3213 				aio_errors++;
3214 			lio_set_error(reqp);
3215 		} else {
3216 			clear_active_fd(aiocb->aio_fildes);
3217 		}
3218 	}
3219 
3220 	if (aio_notsupported) {
3221 		error = ENOTSUP;
3222 	} else if (aio_errors) {
3223 		/*
3224 		 * return EIO if any request failed
3225 		 */
3226 		error = EIO;
3227 	}
3228 
3229 	if (mode_arg == LIO_WAIT) {
3230 		mutex_enter(&aiop->aio_mutex);
3231 		while (head->lio_refcnt > 0) {
3232 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3233 				mutex_exit(&aiop->aio_mutex);
3234 				error = EINTR;
3235 				goto done;
3236 			}
3237 		}
3238 		mutex_exit(&aiop->aio_mutex);
3239 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
3240 	}
3241 
3242 done:
3243 	kmem_free(cbplist, ssize);
3244 	if (deadhead) {
3245 		if (head->lio_sigqp)
3246 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3247 		if (head->lio_portkev)
3248 			port_free_event(head->lio_portkev);
3249 		kmem_free(head, sizeof (aio_lio_t));
3250 	}
3251 	return (error);
3252 }
3253 
3254 #ifdef  _SYSCALL32_IMPL
3255 static void
3256 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
3257 {
3258 	dest->aio_fildes = src->aio_fildes;
3259 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
3260 	dest->aio_nbytes = (size_t)src->aio_nbytes;
3261 	dest->aio_offset = (off_t)src->aio_offset;
3262 	dest->aio_reqprio = src->aio_reqprio;
3263 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3264 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3265 
3266 	/*
3267 	 * See comment in sigqueue32() on handling of 32-bit
3268 	 * sigvals in a 64-bit kernel.
3269 	 */
3270 	dest->aio_sigevent.sigev_value.sival_int =
3271 	    (int)src->aio_sigevent.sigev_value.sival_int;
3272 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3273 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
3274 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3275 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3276 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3277 	dest->aio_lio_opcode = src->aio_lio_opcode;
3278 	dest->aio_state = src->aio_state;
3279 	dest->aio__pad[0] = src->aio__pad[0];
3280 }
3281 #endif
3282 
3283 /*
3284  * This function is used only for largefile calls made by
3285  * 32 bit applications.
3286  */
3287 static int
3288 aio_req_setupLF(
3289 	aio_req_t	**reqpp,
3290 	aio_t		*aiop,
3291 	aiocb64_32_t	*arg,
3292 	aio_result_t	*resultp,
3293 	vnode_t		*vp)
3294 {
3295 	sigqueue_t	*sqp = NULL;
3296 	aio_req_t	*reqp;
3297 	struct uio	*uio;
3298 	struct sigevent32 *sigev;
3299 	int 		error;
3300 
3301 	sigev = &arg->aio_sigevent;
3302 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
3303 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
3304 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3305 		if (sqp == NULL)
3306 			return (EAGAIN);
3307 		sqp->sq_func = NULL;
3308 		sqp->sq_next = NULL;
3309 		sqp->sq_info.si_code = SI_ASYNCIO;
3310 		sqp->sq_info.si_pid = curproc->p_pid;
3311 		sqp->sq_info.si_ctid = PRCTID(curproc);
3312 		sqp->sq_info.si_zoneid = getzoneid();
3313 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3314 		sqp->sq_info.si_signo = sigev->sigev_signo;
3315 		sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
3316 	}
3317 
3318 	mutex_enter(&aiop->aio_mutex);
3319 
3320 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
3321 		mutex_exit(&aiop->aio_mutex);
3322 		if (sqp)
3323 			kmem_free(sqp, sizeof (sigqueue_t));
3324 		return (EIO);
3325 	}
3326 	/*
3327 	 * get an aio_reqp from the free list or allocate one
3328 	 * from dynamic memory.
3329 	 */
3330 	if (error = aio_req_alloc(&reqp, resultp)) {
3331 		mutex_exit(&aiop->aio_mutex);
3332 		if (sqp)
3333 			kmem_free(sqp, sizeof (sigqueue_t));
3334 		return (error);
3335 	}
3336 	aiop->aio_pending++;
3337 	aiop->aio_outstanding++;
3338 	reqp->aio_req_flags = AIO_PENDING;
3339 	if (sigev->sigev_notify == SIGEV_THREAD ||
3340 	    sigev->sigev_notify == SIGEV_PORT)
3341 		aio_enq(&aiop->aio_portpending, reqp, 0);
3342 	mutex_exit(&aiop->aio_mutex);
3343 	/*
3344 	 * initialize aio request.
3345 	 */
3346 	reqp->aio_req_fd = arg->aio_fildes;
3347 	reqp->aio_req_sigqp = sqp;
3348 	reqp->aio_req_iocb.iocb = NULL;
3349 	reqp->aio_req_lio = NULL;
3350 	reqp->aio_req_buf.b_file = vp;
3351 	uio = reqp->aio_req.aio_uio;
3352 	uio->uio_iovcnt = 1;
3353 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
3354 	uio->uio_iov->iov_len = arg->aio_nbytes;
3355 	uio->uio_loffset = arg->aio_offset;
3356 	*reqpp = reqp;
3357 	return (0);
3358 }
3359 
3360 /*
3361  * This routine is called when a non largefile call is made by a 32bit
3362  * process on a ILP32 or LP64 kernel.
3363  */
3364 static int
3365 alio32(
3366 	int		mode_arg,
3367 	void		*aiocb_arg,
3368 	int		nent,
3369 	void		*sigev)
3370 {
3371 	file_t		*fp;
3372 	file_t		*prev_fp = NULL;
3373 	int		prev_mode = -1;
3374 	struct vnode	*vp;
3375 	aio_lio_t	*head;
3376 	aio_req_t	*reqp;
3377 	aio_t		*aiop;
3378 	caddr_t		cbplist;
3379 	aiocb_t		cb;
3380 	aiocb_t		*aiocb = &cb;
3381 #ifdef	_LP64
3382 	aiocb32_t	*cbp;
3383 	caddr32_t	*ucbp;
3384 	aiocb32_t	cb32;
3385 	aiocb32_t	*aiocb32 = &cb32;
3386 	struct sigevent32	sigevk;
3387 #else
3388 	aiocb_t		*cbp, **ucbp;
3389 	struct sigevent	sigevk;
3390 #endif
3391 	sigqueue_t	*sqp;
3392 	int		(*aio_func)();
3393 	int		mode;
3394 	int		error = 0;
3395 	int		aio_errors = 0;
3396 	int		i;
3397 	size_t		ssize;
3398 	int		deadhead = 0;
3399 	int		aio_notsupported = 0;
3400 	int		lio_head_port;
3401 	int		aio_port;
3402 	int		aio_thread;
3403 	port_kevent_t	*pkevtp = NULL;
3404 #ifdef	_LP64
3405 	port_notify32_t	pnotify;
3406 #else
3407 	port_notify_t	pnotify;
3408 #endif
3409 	int		event;
3410 
3411 	aiop = curproc->p_aio;
3412 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
3413 		return (EINVAL);
3414 
3415 #ifdef	_LP64
3416 	ssize = (sizeof (caddr32_t) * nent);
3417 #else
3418 	ssize = (sizeof (aiocb_t *) * nent);
3419 #endif
3420 	cbplist = kmem_alloc(ssize, KM_SLEEP);
3421 	ucbp = (void *)cbplist;
3422 
3423 	if (copyin(aiocb_arg, cbplist, ssize) ||
3424 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
3425 		kmem_free(cbplist, ssize);
3426 		return (EFAULT);
3427 	}
3428 
3429 	/* Event Ports  */
3430 	if (sigev &&
3431 	    (sigevk.sigev_notify == SIGEV_THREAD ||
3432 	    sigevk.sigev_notify == SIGEV_PORT)) {
3433 		if (sigevk.sigev_notify == SIGEV_THREAD) {
3434 			pnotify.portnfy_port = sigevk.sigev_signo;
3435 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
3436 		} else if (copyin(
3437 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
3438 		    &pnotify, sizeof (pnotify))) {
3439 			kmem_free(cbplist, ssize);
3440 			return (EFAULT);
3441 		}
3442 		error = port_alloc_event(pnotify.portnfy_port,
3443 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
3444 		if (error) {
3445 			if (error == ENOMEM || error == EAGAIN)
3446 				error = EAGAIN;
3447 			else
3448 				error = EINVAL;
3449 			kmem_free(cbplist, ssize);
3450 			return (error);
3451 		}
3452 		lio_head_port = pnotify.portnfy_port;
3453 	}
3454 
3455 	/*
3456 	 * a list head should be allocated if notification is
3457 	 * enabled for this list.
3458 	 */
3459 	head = NULL;
3460 
3461 	if (mode_arg == LIO_WAIT || sigev) {
3462 		mutex_enter(&aiop->aio_mutex);
3463 		error = aio_lio_alloc(&head);
3464 		mutex_exit(&aiop->aio_mutex);
3465 		if (error)
3466 			goto done;
3467 		deadhead = 1;
3468 		head->lio_nent = nent;
3469 		head->lio_refcnt = nent;
3470 		head->lio_port = -1;
3471 		head->lio_portkev = NULL;
3472 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
3473 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
3474 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3475 			if (sqp == NULL) {
3476 				error = EAGAIN;
3477 				goto done;
3478 			}
3479 			sqp->sq_func = NULL;
3480 			sqp->sq_next = NULL;
3481 			sqp->sq_info.si_code = SI_ASYNCIO;
3482 			sqp->sq_info.si_pid = curproc->p_pid;
3483 			sqp->sq_info.si_ctid = PRCTID(curproc);
3484 			sqp->sq_info.si_zoneid = getzoneid();
3485 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3486 			sqp->sq_info.si_signo = sigevk.sigev_signo;
3487 			sqp->sq_info.si_value.sival_int =
3488 			    sigevk.sigev_value.sival_int;
3489 			head->lio_sigqp = sqp;
3490 		} else {
3491 			head->lio_sigqp = NULL;
3492 		}
3493 		if (pkevtp) {
3494 			/*
3495 			 * Prepare data to send when list of aiocb's has
3496 			 * completed.
3497 			 */
3498 			port_init_event(pkevtp, (uintptr_t)sigev,
3499 			    (void *)(uintptr_t)pnotify.portnfy_user,
3500 			    NULL, head);
3501 			pkevtp->portkev_events = AIOLIO;
3502 			head->lio_portkev = pkevtp;
3503 			head->lio_port = pnotify.portnfy_port;
3504 		}
3505 	}
3506 
3507 	for (i = 0; i < nent; i++, ucbp++) {
3508 
3509 		/* skip entry if it can't be copied. */
3510 #ifdef	_LP64
3511 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
3512 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
3513 #else
3514 		cbp = (aiocb_t *)*ucbp;
3515 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
3516 #endif
3517 		{
3518 			if (head) {
3519 				mutex_enter(&aiop->aio_mutex);
3520 				head->lio_nent--;
3521 				head->lio_refcnt--;
3522 				mutex_exit(&aiop->aio_mutex);
3523 			}
3524 			continue;
3525 		}
3526 #ifdef	_LP64
3527 		/*
3528 		 * copy 32 bit structure into 64 bit structure
3529 		 */
3530 		aiocb_32ton(aiocb32, aiocb);
3531 #endif /* _LP64 */
3532 
3533 		/* skip if opcode for aiocb is LIO_NOP */
3534 		mode = aiocb->aio_lio_opcode;
3535 		if (mode == LIO_NOP) {
3536 			cbp = NULL;
3537 			if (head) {
3538 				mutex_enter(&aiop->aio_mutex);
3539 				head->lio_nent--;
3540 				head->lio_refcnt--;
3541 				mutex_exit(&aiop->aio_mutex);
3542 			}
3543 			continue;
3544 		}
3545 
3546 		/* increment file descriptor's ref count. */
3547 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3548 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3549 			if (head) {
3550 				mutex_enter(&aiop->aio_mutex);
3551 				head->lio_nent--;
3552 				head->lio_refcnt--;
3553 				mutex_exit(&aiop->aio_mutex);
3554 			}
3555 			aio_errors++;
3556 			continue;
3557 		}
3558 
3559 		/*
3560 		 * check the permission of the partition
3561 		 */
3562 		if ((fp->f_flag & mode) == 0) {
3563 			releasef(aiocb->aio_fildes);
3564 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3565 			if (head) {
3566 				mutex_enter(&aiop->aio_mutex);
3567 				head->lio_nent--;
3568 				head->lio_refcnt--;
3569 				mutex_exit(&aiop->aio_mutex);
3570 			}
3571 			aio_errors++;
3572 			continue;
3573 		}
3574 
3575 		/*
3576 		 * common case where requests are to the same fd
3577 		 * for the same r/w operation
3578 		 * for UFS, need to set EBADFD
3579 		 */
3580 		vp = fp->f_vnode;
3581 		if (fp != prev_fp || mode != prev_mode) {
3582 			aio_func = check_vp(vp, mode);
3583 			if (aio_func == NULL) {
3584 				prev_fp = NULL;
3585 				releasef(aiocb->aio_fildes);
3586 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
3587 				aio_notsupported++;
3588 				if (head) {
3589 					mutex_enter(&aiop->aio_mutex);
3590 					head->lio_nent--;
3591 					head->lio_refcnt--;
3592 					mutex_exit(&aiop->aio_mutex);
3593 				}
3594 				continue;
3595 			} else {
3596 				prev_fp = fp;
3597 				prev_mode = mode;
3598 			}
3599 		}
3600 
3601 		error = aio_req_setup(&reqp, aiop, aiocb,
3602 		    (aio_result_t *)&cbp->aio_resultp, vp);
3603 		if (error) {
3604 			releasef(aiocb->aio_fildes);
3605 			lio_set_uerror(&cbp->aio_resultp, error);
3606 			if (head) {
3607 				mutex_enter(&aiop->aio_mutex);
3608 				head->lio_nent--;
3609 				head->lio_refcnt--;
3610 				mutex_exit(&aiop->aio_mutex);
3611 			}
3612 			aio_errors++;
3613 			continue;
3614 		}
3615 
3616 		reqp->aio_req_lio = head;
3617 		deadhead = 0;
3618 
3619 		/*
3620 		 * Set the errno field now before sending the request to
3621 		 * the driver to avoid a race condition
3622 		 */
3623 		(void) suword32(&cbp->aio_resultp.aio_errno,
3624 		    EINPROGRESS);
3625 
3626 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
3627 
3628 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
3629 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3630 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3631 		if (aio_port | aio_thread) {
3632 			port_kevent_t *lpkevp;
3633 			/*
3634 			 * Prepare data to send with each aiocb completed.
3635 			 */
3636 #ifdef _LP64
3637 			if (aio_port) {
3638 				void *paddr = (void  *)(uintptr_t)
3639 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
3640 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3641 					error = EFAULT;
3642 			} else {	/* aio_thread */
3643 				pnotify.portnfy_port =
3644 				    aiocb32->aio_sigevent.sigev_signo;
3645 				pnotify.portnfy_user =
3646 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
3647 			}
3648 #else
3649 			if (aio_port) {
3650 				void *paddr =
3651 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3652 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3653 					error = EFAULT;
3654 			} else {	/* aio_thread */
3655 				pnotify.portnfy_port =
3656 				    aiocb->aio_sigevent.sigev_signo;
3657 				pnotify.portnfy_user =
3658 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3659 			}
3660 #endif
3661 			if (error)
3662 				/* EMPTY */;
3663 			else if (pkevtp != NULL &&
3664 			    pnotify.portnfy_port == lio_head_port)
3665 				error = port_dup_event(pkevtp, &lpkevp,
3666 				    PORT_ALLOC_DEFAULT);
3667 			else
3668 				error = port_alloc_event(pnotify.portnfy_port,
3669 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3670 				    &lpkevp);
3671 			if (error == 0) {
3672 				port_init_event(lpkevp, (uintptr_t)cbp,
3673 				    (void *)(uintptr_t)pnotify.portnfy_user,
3674 				    aio_port_callback, reqp);
3675 				lpkevp->portkev_events = event;
3676 				reqp->aio_req_portkev = lpkevp;
3677 				reqp->aio_req_port = pnotify.portnfy_port;
3678 			}
3679 		}
3680 
3681 		/*
3682 		 * send the request to driver.
3683 		 */
3684 		if (error == 0) {
3685 			if (aiocb->aio_nbytes == 0) {
3686 				clear_active_fd(aiocb->aio_fildes);
3687 				aio_zerolen(reqp);
3688 				continue;
3689 			}
3690 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3691 			    CRED());
3692 		}
3693 
3694 		/*
3695 		 * the fd's ref count is not decremented until the IO has
3696 		 * completed unless there was an error.
3697 		 */
3698 		if (error) {
3699 			releasef(aiocb->aio_fildes);
3700 			lio_set_uerror(&cbp->aio_resultp, error);
3701 			if (head) {
3702 				mutex_enter(&aiop->aio_mutex);
3703 				head->lio_nent--;
3704 				head->lio_refcnt--;
3705 				mutex_exit(&aiop->aio_mutex);
3706 			}
3707 			if (error == ENOTSUP)
3708 				aio_notsupported++;
3709 			else
3710 				aio_errors++;
3711 			lio_set_error(reqp);
3712 		} else {
3713 			clear_active_fd(aiocb->aio_fildes);
3714 		}
3715 	}
3716 
3717 	if (aio_notsupported) {
3718 		error = ENOTSUP;
3719 	} else if (aio_errors) {
3720 		/*
3721 		 * return EIO if any request failed
3722 		 */
3723 		error = EIO;
3724 	}
3725 
3726 	if (mode_arg == LIO_WAIT) {
3727 		mutex_enter(&aiop->aio_mutex);
3728 		while (head->lio_refcnt > 0) {
3729 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3730 				mutex_exit(&aiop->aio_mutex);
3731 				error = EINTR;
3732 				goto done;
3733 			}
3734 		}
3735 		mutex_exit(&aiop->aio_mutex);
3736 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
3737 	}
3738 
3739 done:
3740 	kmem_free(cbplist, ssize);
3741 	if (deadhead) {
3742 		if (head->lio_sigqp)
3743 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3744 		if (head->lio_portkev)
3745 			port_free_event(head->lio_portkev);
3746 		kmem_free(head, sizeof (aio_lio_t));
3747 	}
3748 	return (error);
3749 }
3750 
3751 
3752 #ifdef  _SYSCALL32_IMPL
3753 void
3754 aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
3755 {
3756 	dest->aio_fildes = src->aio_fildes;
3757 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
3758 	dest->aio_nbytes = (size_t)src->aio_nbytes;
3759 	dest->aio_offset = (off_t)src->aio_offset;
3760 	dest->aio_reqprio = src->aio_reqprio;
3761 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3762 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3763 
3764 	/*
3765 	 * See comment in sigqueue32() on handling of 32-bit
3766 	 * sigvals in a 64-bit kernel.
3767 	 */
3768 	dest->aio_sigevent.sigev_value.sival_int =
3769 	    (int)src->aio_sigevent.sigev_value.sival_int;
3770 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3771 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
3772 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3773 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3774 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3775 	dest->aio_lio_opcode = src->aio_lio_opcode;
3776 	dest->aio_state = src->aio_state;
3777 	dest->aio__pad[0] = src->aio__pad[0];
3778 }
3779 #endif /* _SYSCALL32_IMPL */
3780 
3781 /*
3782  * aio_port_callback() is called just before the event is retrieved from the
3783  * port. The task of this callback function is to finish the work of the
3784  * transaction for the application, it means :
3785  * - copyout transaction data to the application
3786  *	(this thread is running in the right process context)
3787  * - keep trace of the transaction (update of counters).
3788  * - free allocated buffers
3789  * The aiocb pointer is the object element of the port_kevent_t structure.
3790  *
3791  * flag :
3792  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
3793  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
3794  */
3795 
3796 /*ARGSUSED*/
3797 int
3798 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
3799 {
3800 	aio_t		*aiop = curproc->p_aio;
3801 	aio_req_t	*reqp = arg;
3802 	struct	iovec	*iov;
3803 	struct	buf	*bp;
3804 	void		*resultp;
3805 
3806 	if (pid != curproc->p_pid) {
3807 		/* wrong proc !!, can not deliver data here ... */
3808 		return (EACCES);
3809 	}
3810 
3811 	mutex_enter(&aiop->aio_portq_mutex);
3812 	reqp->aio_req_portkev = NULL;
3813 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
3814 	mutex_exit(&aiop->aio_portq_mutex);
3815 	aphysio_unlock(reqp);		/* unlock used pages */
3816 	mutex_enter(&aiop->aio_mutex);
3817 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
3818 		aio_req_free_port(aiop, reqp);	/* back to free list */
3819 		mutex_exit(&aiop->aio_mutex);
3820 		return (0);
3821 	}
3822 
3823 	iov = reqp->aio_req_uio.uio_iov;
3824 	bp = &reqp->aio_req_buf;
3825 	resultp = (void *)reqp->aio_req_resultp;
3826 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
3827 	mutex_exit(&aiop->aio_mutex);
3828 	if (flag == PORT_CALLBACK_DEFAULT)
3829 		aio_copyout_result_port(iov, bp, resultp);
3830 	return (0);
3831 }
3832