xref: /titanic_53/usr/src/uts/common/os/aio.c (revision fa7f62f0bce8a70957bb131f796a8f5823b41d69)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
534709573Sraf  * Common Development and Distribution License (the "License").
634709573Sraf  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
2134709573Sraf 
227c478bd9Sstevel@tonic-gate /*
233e3bf233Sraf  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * Kernel asynchronous I/O.
317c478bd9Sstevel@tonic-gate  * This is only for raw devices now (as of Nov. 1993).
327c478bd9Sstevel@tonic-gate  */
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate #include <sys/types.h>
357c478bd9Sstevel@tonic-gate #include <sys/errno.h>
367c478bd9Sstevel@tonic-gate #include <sys/conf.h>
377c478bd9Sstevel@tonic-gate #include <sys/file.h>
387c478bd9Sstevel@tonic-gate #include <sys/fs/snode.h>
397c478bd9Sstevel@tonic-gate #include <sys/unistd.h>
407c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
417c478bd9Sstevel@tonic-gate #include <vm/as.h>
427c478bd9Sstevel@tonic-gate #include <vm/faultcode.h>
437c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
447c478bd9Sstevel@tonic-gate #include <sys/procfs.h>
457c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
467c478bd9Sstevel@tonic-gate #include <sys/autoconf.h>
477c478bd9Sstevel@tonic-gate #include <sys/ddi_impldefs.h>
487c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
497c478bd9Sstevel@tonic-gate #include <sys/aio_impl.h>
507c478bd9Sstevel@tonic-gate #include <sys/debug.h>
517c478bd9Sstevel@tonic-gate #include <sys/param.h>
527c478bd9Sstevel@tonic-gate #include <sys/systm.h>
537c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
547c478bd9Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h>
557c478bd9Sstevel@tonic-gate #include <sys/contract/process_impl.h>
567c478bd9Sstevel@tonic-gate 
577c478bd9Sstevel@tonic-gate /*
587c478bd9Sstevel@tonic-gate  * external entry point.
597c478bd9Sstevel@tonic-gate  */
607c478bd9Sstevel@tonic-gate #ifdef _LP64
617c478bd9Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long);
627c478bd9Sstevel@tonic-gate #endif
637c478bd9Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *);
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate #define	AIO_64	0
677c478bd9Sstevel@tonic-gate #define	AIO_32	1
687c478bd9Sstevel@tonic-gate #define	AIO_LARGEFILE	2
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate /*
717c478bd9Sstevel@tonic-gate  * implementation specific functions (private)
727c478bd9Sstevel@tonic-gate  */
737c478bd9Sstevel@tonic-gate #ifdef _LP64
7434709573Sraf static int alio(int, aiocb_t **, int, struct sigevent *);
757c478bd9Sstevel@tonic-gate #endif
767c478bd9Sstevel@tonic-gate static int aionotify(void);
777c478bd9Sstevel@tonic-gate static int aioinit(void);
787c478bd9Sstevel@tonic-gate static int aiostart(void);
797c478bd9Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int);
807c478bd9Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
817c478bd9Sstevel@tonic-gate     cred_t *);
8234b3058fSpraks static void lio_set_error(aio_req_t *, int portused);
837c478bd9Sstevel@tonic-gate static aio_t *aio_aiop_alloc();
847c478bd9Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *);
857c478bd9Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **);
867c478bd9Sstevel@tonic-gate static aio_req_t *aio_req_done(void *);
877c478bd9Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *);
887c478bd9Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **);
897c478bd9Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *);
907c478bd9Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
9134709573Sraf     aio_result_t *, vnode_t *);
927c478bd9Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *);
937c478bd9Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *);
947c478bd9Sstevel@tonic-gate static void lio_set_uerror(void *, int);
957c478bd9Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *);
967c478bd9Sstevel@tonic-gate static int aiowait(struct timeval *, int, long	*);
977c478bd9Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
987c478bd9Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
997c478bd9Sstevel@tonic-gate     aio_req_t *reqlist, aio_t *aiop, model_t model);
1007c478bd9Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
1017c478bd9Sstevel@tonic-gate static int aiosuspend(void *, int, struct  timespec *, int,
1027c478bd9Sstevel@tonic-gate     long	*, int);
1037c478bd9Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int);
1047c478bd9Sstevel@tonic-gate static int aioerror(void *, int);
1057c478bd9Sstevel@tonic-gate static int aio_cancel(int, void *, long	*, int);
1067c478bd9Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
1077c478bd9Sstevel@tonic-gate static int aiorw(int, void *, int, int);
1087c478bd9Sstevel@tonic-gate 
1097c478bd9Sstevel@tonic-gate static int alioLF(int, void *, int, void *);
11034709573Sraf static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
11134709573Sraf     aio_result_t *, vnode_t *);
1127c478bd9Sstevel@tonic-gate static int alio32(int, void *, int, void *);
1137c478bd9Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1147c478bd9Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1177c478bd9Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
1187c478bd9Sstevel@tonic-gate void	aiocb_32ton(aiocb32_t *, aiocb_t *);
1197c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate /*
1227c478bd9Sstevel@tonic-gate  * implementation specific functions (external)
1237c478bd9Sstevel@tonic-gate  */
1247c478bd9Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *);
1257c478bd9Sstevel@tonic-gate 
1267c478bd9Sstevel@tonic-gate /*
1277c478bd9Sstevel@tonic-gate  * Event Port framework
1287c478bd9Sstevel@tonic-gate  */
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *);
1317c478bd9Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *);
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate /*
1347c478bd9Sstevel@tonic-gate  * This is the loadable module wrapper.
1357c478bd9Sstevel@tonic-gate  */
1367c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
1377c478bd9Sstevel@tonic-gate #include <sys/syscall.h>
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate #ifdef _LP64
1407c478bd9Sstevel@tonic-gate 
1417c478bd9Sstevel@tonic-gate static struct sysent kaio_sysent = {
1427c478bd9Sstevel@tonic-gate 	6,
1437c478bd9Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
1447c478bd9Sstevel@tonic-gate 	(int (*)())kaioc
1457c478bd9Sstevel@tonic-gate };
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1487c478bd9Sstevel@tonic-gate static struct sysent kaio_sysent32 = {
1497c478bd9Sstevel@tonic-gate 	7,
1507c478bd9Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL,
1517c478bd9Sstevel@tonic-gate 	kaio
1527c478bd9Sstevel@tonic-gate };
1537c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate #else   /* _LP64 */
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate static struct sysent kaio_sysent = {
1587c478bd9Sstevel@tonic-gate 	7,
1597c478bd9Sstevel@tonic-gate 	SE_NOUNLOAD | SE_32RVAL1,
1607c478bd9Sstevel@tonic-gate 	kaio
1617c478bd9Sstevel@tonic-gate };
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate #endif  /* _LP64 */
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate /*
1667c478bd9Sstevel@tonic-gate  * Module linkage information for the kernel.
1677c478bd9Sstevel@tonic-gate  */
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate static struct modlsys modlsys = {
1707c478bd9Sstevel@tonic-gate 	&mod_syscallops,
1717c478bd9Sstevel@tonic-gate 	"kernel Async I/O",
1727c478bd9Sstevel@tonic-gate 	&kaio_sysent
1737c478bd9Sstevel@tonic-gate };
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1767c478bd9Sstevel@tonic-gate static struct modlsys modlsys32 = {
1777c478bd9Sstevel@tonic-gate 	&mod_syscallops32,
1787c478bd9Sstevel@tonic-gate 	"kernel Async I/O for 32 bit compatibility",
1797c478bd9Sstevel@tonic-gate 	&kaio_sysent32
1807c478bd9Sstevel@tonic-gate };
1817c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1827c478bd9Sstevel@tonic-gate 
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = {
1857c478bd9Sstevel@tonic-gate 	MODREV_1,
1867c478bd9Sstevel@tonic-gate 	&modlsys,
1877c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1887c478bd9Sstevel@tonic-gate 	&modlsys32,
1897c478bd9Sstevel@tonic-gate #endif
1907c478bd9Sstevel@tonic-gate 	NULL
1917c478bd9Sstevel@tonic-gate };
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate int
1947c478bd9Sstevel@tonic-gate _init(void)
1957c478bd9Sstevel@tonic-gate {
1967c478bd9Sstevel@tonic-gate 	int retval;
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate 	if ((retval = mod_install(&modlinkage)) != 0)
1997c478bd9Sstevel@tonic-gate 		return (retval);
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	return (0);
2027c478bd9Sstevel@tonic-gate }
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate int
2057c478bd9Sstevel@tonic-gate _fini(void)
2067c478bd9Sstevel@tonic-gate {
2077c478bd9Sstevel@tonic-gate 	int retval;
2087c478bd9Sstevel@tonic-gate 
2097c478bd9Sstevel@tonic-gate 	retval = mod_remove(&modlinkage);
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate 	return (retval);
2127c478bd9Sstevel@tonic-gate }
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate int
2157c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
2167c478bd9Sstevel@tonic-gate {
2177c478bd9Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
2187c478bd9Sstevel@tonic-gate }
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate #ifdef	_LP64
2217c478bd9Sstevel@tonic-gate static int64_t
2227c478bd9Sstevel@tonic-gate kaioc(
2237c478bd9Sstevel@tonic-gate 	long	a0,
2247c478bd9Sstevel@tonic-gate 	long	a1,
2257c478bd9Sstevel@tonic-gate 	long	a2,
2267c478bd9Sstevel@tonic-gate 	long	a3,
2277c478bd9Sstevel@tonic-gate 	long	a4,
2287c478bd9Sstevel@tonic-gate 	long	a5)
2297c478bd9Sstevel@tonic-gate {
2307c478bd9Sstevel@tonic-gate 	int	error;
2317c478bd9Sstevel@tonic-gate 	long	rval = 0;
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	switch ((int)a0 & ~AIO_POLL_BIT) {
2347c478bd9Sstevel@tonic-gate 	case AIOREAD:
2357c478bd9Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2367c478bd9Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
2377c478bd9Sstevel@tonic-gate 		break;
2387c478bd9Sstevel@tonic-gate 	case AIOWRITE:
2397c478bd9Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2407c478bd9Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
2417c478bd9Sstevel@tonic-gate 		break;
2427c478bd9Sstevel@tonic-gate 	case AIOWAIT:
2437c478bd9Sstevel@tonic-gate 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
2447c478bd9Sstevel@tonic-gate 		break;
2457c478bd9Sstevel@tonic-gate 	case AIOWAITN:
2467c478bd9Sstevel@tonic-gate 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
2477c478bd9Sstevel@tonic-gate 		    (timespec_t *)a4);
2487c478bd9Sstevel@tonic-gate 		break;
2497c478bd9Sstevel@tonic-gate 	case AIONOTIFY:
2507c478bd9Sstevel@tonic-gate 		error = aionotify();
2517c478bd9Sstevel@tonic-gate 		break;
2527c478bd9Sstevel@tonic-gate 	case AIOINIT:
2537c478bd9Sstevel@tonic-gate 		error = aioinit();
2547c478bd9Sstevel@tonic-gate 		break;
2557c478bd9Sstevel@tonic-gate 	case AIOSTART:
2567c478bd9Sstevel@tonic-gate 		error = aiostart();
2577c478bd9Sstevel@tonic-gate 		break;
2587c478bd9Sstevel@tonic-gate 	case AIOLIO:
25934709573Sraf 		error = alio((int)a1, (aiocb_t **)a2, (int)a3,
2607c478bd9Sstevel@tonic-gate 		    (struct sigevent *)a4);
2617c478bd9Sstevel@tonic-gate 		break;
2627c478bd9Sstevel@tonic-gate 	case AIOLIOWAIT:
2637c478bd9Sstevel@tonic-gate 		error = aliowait((int)a1, (void *)a2, (int)a3,
2647c478bd9Sstevel@tonic-gate 		    (struct sigevent *)a4, AIO_64);
2657c478bd9Sstevel@tonic-gate 		break;
2667c478bd9Sstevel@tonic-gate 	case AIOSUSPEND:
2677c478bd9Sstevel@tonic-gate 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
2687c478bd9Sstevel@tonic-gate 		    (int)a4, &rval, AIO_64);
2697c478bd9Sstevel@tonic-gate 		break;
2707c478bd9Sstevel@tonic-gate 	case AIOERROR:
2717c478bd9Sstevel@tonic-gate 		error = aioerror((void *)a1, AIO_64);
2727c478bd9Sstevel@tonic-gate 		break;
2737c478bd9Sstevel@tonic-gate 	case AIOAREAD:
2747c478bd9Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
2757c478bd9Sstevel@tonic-gate 		break;
2767c478bd9Sstevel@tonic-gate 	case AIOAWRITE:
2777c478bd9Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
2787c478bd9Sstevel@tonic-gate 		break;
2797c478bd9Sstevel@tonic-gate 	case AIOCANCEL:
2807c478bd9Sstevel@tonic-gate 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
2817c478bd9Sstevel@tonic-gate 		break;
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 	/*
2847c478bd9Sstevel@tonic-gate 	 * The large file related stuff is valid only for
2857c478bd9Sstevel@tonic-gate 	 * 32 bit kernel and not for 64 bit kernel
2867c478bd9Sstevel@tonic-gate 	 * On 64 bit kernel we convert large file calls
2877c478bd9Sstevel@tonic-gate 	 * to regular 64bit calls.
2887c478bd9Sstevel@tonic-gate 	 */
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 	default:
2917c478bd9Sstevel@tonic-gate 		error = EINVAL;
2927c478bd9Sstevel@tonic-gate 	}
2937c478bd9Sstevel@tonic-gate 	if (error)
2947c478bd9Sstevel@tonic-gate 		return ((int64_t)set_errno(error));
2957c478bd9Sstevel@tonic-gate 	return (rval);
2967c478bd9Sstevel@tonic-gate }
2977c478bd9Sstevel@tonic-gate #endif
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate static int
3007c478bd9Sstevel@tonic-gate kaio(
3017c478bd9Sstevel@tonic-gate 	ulong_t *uap,
3027c478bd9Sstevel@tonic-gate 	rval_t *rvp)
3037c478bd9Sstevel@tonic-gate {
3047c478bd9Sstevel@tonic-gate 	long rval = 0;
3057c478bd9Sstevel@tonic-gate 	int	error = 0;
3067c478bd9Sstevel@tonic-gate 	offset_t	off;
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate 		rvp->r_vals = 0;
3107c478bd9Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
3117c478bd9Sstevel@tonic-gate 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
3127c478bd9Sstevel@tonic-gate #else
3137c478bd9Sstevel@tonic-gate 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
3147c478bd9Sstevel@tonic-gate #endif
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate 	switch (uap[0] & ~AIO_POLL_BIT) {
3177c478bd9Sstevel@tonic-gate 	/*
3187c478bd9Sstevel@tonic-gate 	 * It must be the 32 bit system call on 64 bit kernel
3197c478bd9Sstevel@tonic-gate 	 */
3207c478bd9Sstevel@tonic-gate 	case AIOREAD:
3217c478bd9Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3227c478bd9Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
3237c478bd9Sstevel@tonic-gate 	case AIOWRITE:
3247c478bd9Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3257c478bd9Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
3267c478bd9Sstevel@tonic-gate 	case AIOWAIT:
3277c478bd9Sstevel@tonic-gate 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
3287c478bd9Sstevel@tonic-gate 		    &rval);
3297c478bd9Sstevel@tonic-gate 		break;
3307c478bd9Sstevel@tonic-gate 	case AIOWAITN:
3317c478bd9Sstevel@tonic-gate 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
3327c478bd9Sstevel@tonic-gate 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
3337c478bd9Sstevel@tonic-gate 		break;
3347c478bd9Sstevel@tonic-gate 	case AIONOTIFY:
3357c478bd9Sstevel@tonic-gate 		return (aionotify());
3367c478bd9Sstevel@tonic-gate 	case AIOINIT:
3377c478bd9Sstevel@tonic-gate 		return (aioinit());
3387c478bd9Sstevel@tonic-gate 	case AIOSTART:
3397c478bd9Sstevel@tonic-gate 		return (aiostart());
3407c478bd9Sstevel@tonic-gate 	case AIOLIO:
3417c478bd9Sstevel@tonic-gate 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
3427c478bd9Sstevel@tonic-gate 		    (void *)uap[4]));
3437c478bd9Sstevel@tonic-gate 	case AIOLIOWAIT:
3447c478bd9Sstevel@tonic-gate 		return (aliowait((int)uap[1], (void *)uap[2],
3457c478bd9Sstevel@tonic-gate 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
3467c478bd9Sstevel@tonic-gate 	case AIOSUSPEND:
3477c478bd9Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3487c478bd9Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4],
3497c478bd9Sstevel@tonic-gate 		    &rval, AIO_32);
3507c478bd9Sstevel@tonic-gate 		break;
3517c478bd9Sstevel@tonic-gate 	case AIOERROR:
3527c478bd9Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_32));
3537c478bd9Sstevel@tonic-gate 	case AIOAREAD:
3547c478bd9Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3557c478bd9Sstevel@tonic-gate 		    FREAD, AIO_32));
3567c478bd9Sstevel@tonic-gate 	case AIOAWRITE:
3577c478bd9Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3587c478bd9Sstevel@tonic-gate 		    FWRITE, AIO_32));
3597c478bd9Sstevel@tonic-gate 	case AIOCANCEL:
3607c478bd9Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
3617c478bd9Sstevel@tonic-gate 		    AIO_32));
3627c478bd9Sstevel@tonic-gate 		break;
3637c478bd9Sstevel@tonic-gate 	case AIOLIO64:
3647c478bd9Sstevel@tonic-gate 		return (alioLF((int)uap[1], (void *)uap[2],
3657c478bd9Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4]));
3667c478bd9Sstevel@tonic-gate 	case AIOLIOWAIT64:
3677c478bd9Sstevel@tonic-gate 		return (aliowait(uap[1], (void *)uap[2],
3687c478bd9Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
3697c478bd9Sstevel@tonic-gate 	case AIOSUSPEND64:
3707c478bd9Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3717c478bd9Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4], &rval,
3727c478bd9Sstevel@tonic-gate 		    AIO_LARGEFILE);
3737c478bd9Sstevel@tonic-gate 		break;
3747c478bd9Sstevel@tonic-gate 	case AIOERROR64:
3757c478bd9Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
3767c478bd9Sstevel@tonic-gate 	case AIOAREAD64:
3777c478bd9Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
3787c478bd9Sstevel@tonic-gate 		    AIO_LARGEFILE));
3797c478bd9Sstevel@tonic-gate 	case AIOAWRITE64:
3807c478bd9Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
3817c478bd9Sstevel@tonic-gate 		    AIO_LARGEFILE));
3827c478bd9Sstevel@tonic-gate 	case AIOCANCEL64:
3837c478bd9Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2],
3847c478bd9Sstevel@tonic-gate 		    &rval, AIO_LARGEFILE));
3857c478bd9Sstevel@tonic-gate 		break;
3867c478bd9Sstevel@tonic-gate 	default:
3877c478bd9Sstevel@tonic-gate 		return (EINVAL);
3887c478bd9Sstevel@tonic-gate 	}
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	rvp->r_val1 = rval;
3917c478bd9Sstevel@tonic-gate 	return (error);
3927c478bd9Sstevel@tonic-gate }
3937c478bd9Sstevel@tonic-gate 
3947c478bd9Sstevel@tonic-gate /*
3957c478bd9Sstevel@tonic-gate  * wake up LWPs in this process that are sleeping in
3967c478bd9Sstevel@tonic-gate  * aiowait().
3977c478bd9Sstevel@tonic-gate  */
3987c478bd9Sstevel@tonic-gate static int
3997c478bd9Sstevel@tonic-gate aionotify(void)
4007c478bd9Sstevel@tonic-gate {
4017c478bd9Sstevel@tonic-gate 	aio_t	*aiop;
4027c478bd9Sstevel@tonic-gate 
4037c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
4047c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
4057c478bd9Sstevel@tonic-gate 		return (0);
4067c478bd9Sstevel@tonic-gate 
4077c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
4087c478bd9Sstevel@tonic-gate 	aiop->aio_notifycnt++;
4097c478bd9Sstevel@tonic-gate 	cv_broadcast(&aiop->aio_waitcv);
4107c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate 	return (0);
4137c478bd9Sstevel@tonic-gate }
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate static int
4167c478bd9Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
4177c478bd9Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4187c478bd9Sstevel@tonic-gate {
4197c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4207c478bd9Sstevel@tonic-gate 	struct timeval32 wait_time_32;
4217c478bd9Sstevel@tonic-gate #endif
4227c478bd9Sstevel@tonic-gate 	struct timeval wait_time;
4237c478bd9Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4247c478bd9Sstevel@tonic-gate 
4257c478bd9Sstevel@tonic-gate 	*rqtp = NULL;
4267c478bd9Sstevel@tonic-gate 	if (timout == NULL) {		/* wait indefinitely */
4277c478bd9Sstevel@tonic-gate 		*blocking = 1;
4287c478bd9Sstevel@tonic-gate 		return (0);
4297c478bd9Sstevel@tonic-gate 	}
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate 	/*
4327c478bd9Sstevel@tonic-gate 	 * Need to correctly compare with the -1 passed in for a user
4337c478bd9Sstevel@tonic-gate 	 * address pointer, with both 32 bit and 64 bit apps.
4347c478bd9Sstevel@tonic-gate 	 */
4357c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4367c478bd9Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
4377c478bd9Sstevel@tonic-gate 			*blocking = 0;
4387c478bd9Sstevel@tonic-gate 			return (0);
4397c478bd9Sstevel@tonic-gate 		}
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 		if (copyin(timout, &wait_time, sizeof (wait_time)))
4427c478bd9Sstevel@tonic-gate 			return (EFAULT);
4437c478bd9Sstevel@tonic-gate 	}
4447c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4457c478bd9Sstevel@tonic-gate 	else {
4467c478bd9Sstevel@tonic-gate 		/*
4477c478bd9Sstevel@tonic-gate 		 * -1 from a 32bit app. It will not get sign extended.
4487c478bd9Sstevel@tonic-gate 		 * don't wait if -1.
4497c478bd9Sstevel@tonic-gate 		 */
4507c478bd9Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
4517c478bd9Sstevel@tonic-gate 			*blocking = 0;
4527c478bd9Sstevel@tonic-gate 			return (0);
4537c478bd9Sstevel@tonic-gate 		}
4547c478bd9Sstevel@tonic-gate 
4557c478bd9Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4567c478bd9Sstevel@tonic-gate 			return (EFAULT);
4577c478bd9Sstevel@tonic-gate 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
4587c478bd9Sstevel@tonic-gate 	}
4597c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
4607c478bd9Sstevel@tonic-gate 
4617c478bd9Sstevel@tonic-gate 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
4627c478bd9Sstevel@tonic-gate 		*blocking = 0;
4637c478bd9Sstevel@tonic-gate 		return (0);
4647c478bd9Sstevel@tonic-gate 	}
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 	if (wait_time.tv_sec < 0 ||
4677c478bd9Sstevel@tonic-gate 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
4687c478bd9Sstevel@tonic-gate 		return (EINVAL);
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate 	rqtime->tv_sec = wait_time.tv_sec;
4717c478bd9Sstevel@tonic-gate 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
4727c478bd9Sstevel@tonic-gate 	*rqtp = rqtime;
4737c478bd9Sstevel@tonic-gate 	*blocking = 1;
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate 	return (0);
4767c478bd9Sstevel@tonic-gate }
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate static int
4797c478bd9Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
4807c478bd9Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4817c478bd9Sstevel@tonic-gate {
4827c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4837c478bd9Sstevel@tonic-gate 	timespec32_t wait_time_32;
4847c478bd9Sstevel@tonic-gate #endif
4857c478bd9Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate 	*rqtp = NULL;
4887c478bd9Sstevel@tonic-gate 	if (timout == NULL) {
4897c478bd9Sstevel@tonic-gate 		*blocking = 1;
4907c478bd9Sstevel@tonic-gate 		return (0);
4917c478bd9Sstevel@tonic-gate 	}
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4947c478bd9Sstevel@tonic-gate 		if (copyin(timout, rqtime, sizeof (*rqtime)))
4957c478bd9Sstevel@tonic-gate 			return (EFAULT);
4967c478bd9Sstevel@tonic-gate 	}
4977c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4987c478bd9Sstevel@tonic-gate 	else {
4997c478bd9Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
5007c478bd9Sstevel@tonic-gate 			return (EFAULT);
5017c478bd9Sstevel@tonic-gate 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
5027c478bd9Sstevel@tonic-gate 	}
5037c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
5067c478bd9Sstevel@tonic-gate 		*blocking = 0;
5077c478bd9Sstevel@tonic-gate 		return (0);
5087c478bd9Sstevel@tonic-gate 	}
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate 	if (rqtime->tv_sec < 0 ||
5117c478bd9Sstevel@tonic-gate 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
5127c478bd9Sstevel@tonic-gate 		return (EINVAL);
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 	*rqtp = rqtime;
5157c478bd9Sstevel@tonic-gate 	*blocking = 1;
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate 	return (0);
5187c478bd9Sstevel@tonic-gate }
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5217c478bd9Sstevel@tonic-gate static int
5227c478bd9Sstevel@tonic-gate aiowait(
5237c478bd9Sstevel@tonic-gate 	struct timeval	*timout,
5247c478bd9Sstevel@tonic-gate 	int	dontblockflg,
5257c478bd9Sstevel@tonic-gate 	long	*rval)
5267c478bd9Sstevel@tonic-gate {
5277c478bd9Sstevel@tonic-gate 	int 		error;
5287c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
5297c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
5307c478bd9Sstevel@tonic-gate 	clock_t		status;
5317c478bd9Sstevel@tonic-gate 	int		blocking;
5323348528fSdm120769 	int		timecheck;
5337c478bd9Sstevel@tonic-gate 	timestruc_t	rqtime;
5347c478bd9Sstevel@tonic-gate 	timestruc_t	*rqtp;
5357c478bd9Sstevel@tonic-gate 
5367c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
5377c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
5387c478bd9Sstevel@tonic-gate 		return (EINVAL);
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate 	/*
5417c478bd9Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
5427c478bd9Sstevel@tonic-gate 	 */
5437c478bd9Sstevel@tonic-gate 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
5447c478bd9Sstevel@tonic-gate 	if (error)
5457c478bd9Sstevel@tonic-gate 		return (error);
5467c478bd9Sstevel@tonic-gate 	if (rqtp) {
5477c478bd9Sstevel@tonic-gate 		timestruc_t now;
5483348528fSdm120769 		timecheck = timechanged;
5497c478bd9Sstevel@tonic-gate 		gethrestime(&now);
5507c478bd9Sstevel@tonic-gate 		timespecadd(rqtp, &now);
5517c478bd9Sstevel@tonic-gate 	}
5527c478bd9Sstevel@tonic-gate 
5537c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
5547c478bd9Sstevel@tonic-gate 	for (;;) {
5557c478bd9Sstevel@tonic-gate 		/* process requests on poll queue */
5567c478bd9Sstevel@tonic-gate 		if (aiop->aio_pollq) {
5577c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
5587c478bd9Sstevel@tonic-gate 			aio_cleanup(0);
5597c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
5607c478bd9Sstevel@tonic-gate 		}
5617c478bd9Sstevel@tonic-gate 		if ((reqp = aio_req_remove(NULL)) != NULL) {
5627c478bd9Sstevel@tonic-gate 			*rval = (long)reqp->aio_req_resultp;
5637c478bd9Sstevel@tonic-gate 			break;
5647c478bd9Sstevel@tonic-gate 		}
5657c478bd9Sstevel@tonic-gate 		/* user-level done queue might not be empty */
5667c478bd9Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
5677c478bd9Sstevel@tonic-gate 			aiop->aio_notifycnt--;
5687c478bd9Sstevel@tonic-gate 			*rval = 1;
5697c478bd9Sstevel@tonic-gate 			break;
5707c478bd9Sstevel@tonic-gate 		}
5717c478bd9Sstevel@tonic-gate 		/* don't block if no outstanding aio */
5727c478bd9Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0 && dontblockflg) {
5737c478bd9Sstevel@tonic-gate 			error = EINVAL;
5747c478bd9Sstevel@tonic-gate 			break;
5757c478bd9Sstevel@tonic-gate 		}
5767c478bd9Sstevel@tonic-gate 		if (blocking) {
5777c478bd9Sstevel@tonic-gate 			status = cv_waituntil_sig(&aiop->aio_waitcv,
5783348528fSdm120769 			    &aiop->aio_mutex, rqtp, timecheck);
5797c478bd9Sstevel@tonic-gate 
5807c478bd9Sstevel@tonic-gate 			if (status > 0)		/* check done queue again */
5817c478bd9Sstevel@tonic-gate 				continue;
5827c478bd9Sstevel@tonic-gate 			if (status == 0) {	/* interrupted by a signal */
5837c478bd9Sstevel@tonic-gate 				error = EINTR;
5847c478bd9Sstevel@tonic-gate 				*rval = -1;
5857c478bd9Sstevel@tonic-gate 			} else {		/* timer expired */
5867c478bd9Sstevel@tonic-gate 				error = ETIME;
5877c478bd9Sstevel@tonic-gate 			}
5887c478bd9Sstevel@tonic-gate 		}
5897c478bd9Sstevel@tonic-gate 		break;
5907c478bd9Sstevel@tonic-gate 	}
5917c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
5927c478bd9Sstevel@tonic-gate 	if (reqp) {
5937c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
5947c478bd9Sstevel@tonic-gate 		aio_copyout_result(reqp);
5957c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
5967c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
5977c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
5987c478bd9Sstevel@tonic-gate 	}
5997c478bd9Sstevel@tonic-gate 	return (error);
6007c478bd9Sstevel@tonic-gate }
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate /*
6037c478bd9Sstevel@tonic-gate  * aiowaitn can be used to reap completed asynchronous requests submitted with
6047c478bd9Sstevel@tonic-gate  * lio_listio, aio_read or aio_write.
6057c478bd9Sstevel@tonic-gate  * This function only reaps asynchronous raw I/Os.
6067c478bd9Sstevel@tonic-gate  */
6077c478bd9Sstevel@tonic-gate 
6087c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6097c478bd9Sstevel@tonic-gate static int
6107c478bd9Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
6117c478bd9Sstevel@tonic-gate {
6127c478bd9Sstevel@tonic-gate 	int 		error = 0;
6137c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
6147c478bd9Sstevel@tonic-gate 	aio_req_t	*reqlist = NULL;
6157c478bd9Sstevel@tonic-gate 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
6167c478bd9Sstevel@tonic-gate 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
6177c478bd9Sstevel@tonic-gate 	size_t		iocbsz;			/* users iocb size */
6187c478bd9Sstevel@tonic-gate 	size_t		riocbsz;		/* returned iocb size */
6197c478bd9Sstevel@tonic-gate 	int		iocb_index = 0;
6207c478bd9Sstevel@tonic-gate 	model_t		model = get_udatamodel();
6217c478bd9Sstevel@tonic-gate 	int		blocking = 1;
6223348528fSdm120769 	int		timecheck;
6237c478bd9Sstevel@tonic-gate 	timestruc_t	rqtime;
6247c478bd9Sstevel@tonic-gate 	timestruc_t	*rqtp;
6257c478bd9Sstevel@tonic-gate 
6267c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
6277c478bd9Sstevel@tonic-gate 
6283e3bf233Sraf 	if (aiop == NULL || aiop->aio_outstanding == 0)
6297c478bd9Sstevel@tonic-gate 		return (EAGAIN);
6307c478bd9Sstevel@tonic-gate 
6317c478bd9Sstevel@tonic-gate 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
6327c478bd9Sstevel@tonic-gate 		return (EFAULT);
6337c478bd9Sstevel@tonic-gate 
6347c478bd9Sstevel@tonic-gate 	/* set *nwait to zero, if we must return prematurely */
6357c478bd9Sstevel@tonic-gate 	if (copyout(&cnt, nwait, sizeof (uint_t)))
6367c478bd9Sstevel@tonic-gate 		return (EFAULT);
6377c478bd9Sstevel@tonic-gate 
6387c478bd9Sstevel@tonic-gate 	if (waitcnt == 0) {
6397c478bd9Sstevel@tonic-gate 		blocking = 0;
6407c478bd9Sstevel@tonic-gate 		rqtp = NULL;
6417c478bd9Sstevel@tonic-gate 		waitcnt = nent;
6427c478bd9Sstevel@tonic-gate 	} else {
6437c478bd9Sstevel@tonic-gate 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
6447c478bd9Sstevel@tonic-gate 		if (error)
6457c478bd9Sstevel@tonic-gate 			return (error);
6467c478bd9Sstevel@tonic-gate 	}
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
6497c478bd9Sstevel@tonic-gate 		iocbsz = (sizeof (aiocb_t *) * nent);
6507c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
6517c478bd9Sstevel@tonic-gate 	else
6527c478bd9Sstevel@tonic-gate 		iocbsz = (sizeof (caddr32_t) * nent);
6537c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
6547c478bd9Sstevel@tonic-gate 
6557c478bd9Sstevel@tonic-gate 	/*
6567c478bd9Sstevel@tonic-gate 	 * Only one aio_waitn call is allowed at a time.
6577c478bd9Sstevel@tonic-gate 	 * The active aio_waitn will collect all requests
6587c478bd9Sstevel@tonic-gate 	 * out of the "done" list and if necessary it will wait
6597c478bd9Sstevel@tonic-gate 	 * for some/all pending requests to fulfill the nwait
6607c478bd9Sstevel@tonic-gate 	 * parameter.
6617c478bd9Sstevel@tonic-gate 	 * A second or further aio_waitn calls will sleep here
6627c478bd9Sstevel@tonic-gate 	 * until the active aio_waitn finishes and leaves the kernel
6637c478bd9Sstevel@tonic-gate 	 * If the second call does not block (poll), then return
6647c478bd9Sstevel@tonic-gate 	 * immediately with the error code : EAGAIN.
6657c478bd9Sstevel@tonic-gate 	 * If the second call should block, then sleep here, but
6667c478bd9Sstevel@tonic-gate 	 * do not touch the timeout. The timeout starts when this
6677c478bd9Sstevel@tonic-gate 	 * aio_waitn-call becomes active.
6687c478bd9Sstevel@tonic-gate 	 */
6697c478bd9Sstevel@tonic-gate 
6707c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
6717c478bd9Sstevel@tonic-gate 
6727c478bd9Sstevel@tonic-gate 	while (aiop->aio_flags & AIO_WAITN) {
6737c478bd9Sstevel@tonic-gate 		if (blocking == 0) {
6747c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6757c478bd9Sstevel@tonic-gate 			return (EAGAIN);
6767c478bd9Sstevel@tonic-gate 		}
6777c478bd9Sstevel@tonic-gate 
6787c478bd9Sstevel@tonic-gate 		/* block, no timeout */
6797c478bd9Sstevel@tonic-gate 		aiop->aio_flags |= AIO_WAITN_PENDING;
6807c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
6817c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6827c478bd9Sstevel@tonic-gate 			return (EINTR);
6837c478bd9Sstevel@tonic-gate 		}
6847c478bd9Sstevel@tonic-gate 	}
6857c478bd9Sstevel@tonic-gate 
6867c478bd9Sstevel@tonic-gate 	/*
6877c478bd9Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
6887c478bd9Sstevel@tonic-gate 	 */
6897c478bd9Sstevel@tonic-gate 	if (rqtp) {
6907c478bd9Sstevel@tonic-gate 		timestruc_t now;
6913348528fSdm120769 		timecheck = timechanged;
6927c478bd9Sstevel@tonic-gate 		gethrestime(&now);
6937c478bd9Sstevel@tonic-gate 		timespecadd(rqtp, &now);
6947c478bd9Sstevel@tonic-gate 	}
6957c478bd9Sstevel@tonic-gate 
6967c478bd9Sstevel@tonic-gate 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
6977c478bd9Sstevel@tonic-gate 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
6987c478bd9Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
6997c478bd9Sstevel@tonic-gate 	}
7007c478bd9Sstevel@tonic-gate 
7017c478bd9Sstevel@tonic-gate 	if (aiop->aio_iocb == NULL) {
7027c478bd9Sstevel@tonic-gate 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
7037c478bd9Sstevel@tonic-gate 		if (iocblist == NULL) {
7047c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7057c478bd9Sstevel@tonic-gate 			return (ENOMEM);
7067c478bd9Sstevel@tonic-gate 		}
7077c478bd9Sstevel@tonic-gate 		aiop->aio_iocb = (aiocb_t **)iocblist;
7087c478bd9Sstevel@tonic-gate 		aiop->aio_iocbsz = iocbsz;
7097c478bd9Sstevel@tonic-gate 	} else {
7107c478bd9Sstevel@tonic-gate 		iocblist = (char *)aiop->aio_iocb;
7117c478bd9Sstevel@tonic-gate 	}
7127c478bd9Sstevel@tonic-gate 
7137c478bd9Sstevel@tonic-gate 	aiop->aio_waitncnt = waitcnt;
7147c478bd9Sstevel@tonic-gate 	aiop->aio_flags |= AIO_WAITN;
7157c478bd9Sstevel@tonic-gate 
7167c478bd9Sstevel@tonic-gate 	for (;;) {
7177c478bd9Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
7187c478bd9Sstevel@tonic-gate 		if (aiop->aio_pollq) {
7197c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7207c478bd9Sstevel@tonic-gate 			aio_cleanup(0);
7217c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
7227c478bd9Sstevel@tonic-gate 		}
7237c478bd9Sstevel@tonic-gate 
7247c478bd9Sstevel@tonic-gate 		/* check for requests on done queue */
7257c478bd9Sstevel@tonic-gate 		if (aiop->aio_doneq) {
7267c478bd9Sstevel@tonic-gate 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
7277c478bd9Sstevel@tonic-gate 			aiop->aio_waitncnt = waitcnt - cnt;
7287c478bd9Sstevel@tonic-gate 		}
7297c478bd9Sstevel@tonic-gate 
7307c478bd9Sstevel@tonic-gate 		/* user-level done queue might not be empty */
7317c478bd9Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
7327c478bd9Sstevel@tonic-gate 			aiop->aio_notifycnt--;
7337c478bd9Sstevel@tonic-gate 			error = 0;
7347c478bd9Sstevel@tonic-gate 			break;
7357c478bd9Sstevel@tonic-gate 		}
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate 		/*
7387c478bd9Sstevel@tonic-gate 		 * if we are here second time as a result of timer
7397c478bd9Sstevel@tonic-gate 		 * expiration, we reset error if there are enough
7407c478bd9Sstevel@tonic-gate 		 * aiocb's to satisfy request.
7417c478bd9Sstevel@tonic-gate 		 * We return also if all requests are already done
7427c478bd9Sstevel@tonic-gate 		 * and we picked up the whole done queue.
7437c478bd9Sstevel@tonic-gate 		 */
7447c478bd9Sstevel@tonic-gate 
7457c478bd9Sstevel@tonic-gate 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
7467c478bd9Sstevel@tonic-gate 		    aiop->aio_doneq == NULL)) {
7477c478bd9Sstevel@tonic-gate 			error = 0;
7487c478bd9Sstevel@tonic-gate 			break;
7497c478bd9Sstevel@tonic-gate 		}
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate 		if ((cnt < waitcnt) && blocking) {
7527c478bd9Sstevel@tonic-gate 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
7533348528fSdm120769 			    &aiop->aio_mutex, rqtp, timecheck);
7547c478bd9Sstevel@tonic-gate 			if (rval > 0)
7557c478bd9Sstevel@tonic-gate 				continue;
7567c478bd9Sstevel@tonic-gate 			if (rval < 0) {
7577c478bd9Sstevel@tonic-gate 				error = ETIME;
7587c478bd9Sstevel@tonic-gate 				blocking = 0;
7597c478bd9Sstevel@tonic-gate 				continue;
7607c478bd9Sstevel@tonic-gate 			}
7617c478bd9Sstevel@tonic-gate 			error = EINTR;
7627c478bd9Sstevel@tonic-gate 		}
7637c478bd9Sstevel@tonic-gate 		break;
7647c478bd9Sstevel@tonic-gate 	}
7657c478bd9Sstevel@tonic-gate 
7667c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate 	if (cnt > 0) {
7697c478bd9Sstevel@tonic-gate 
7707c478bd9Sstevel@tonic-gate 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
7717c478bd9Sstevel@tonic-gate 		    aiop, model);
7727c478bd9Sstevel@tonic-gate 
7737c478bd9Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE)
7747c478bd9Sstevel@tonic-gate 			riocbsz = (sizeof (aiocb_t *) * cnt);
7757c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
7767c478bd9Sstevel@tonic-gate 		else
7777c478bd9Sstevel@tonic-gate 			riocbsz = (sizeof (caddr32_t) * cnt);
7787c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
7797c478bd9Sstevel@tonic-gate 
7807c478bd9Sstevel@tonic-gate 		if (copyout(iocblist, uiocb, riocbsz) ||
7817c478bd9Sstevel@tonic-gate 		    copyout(&cnt, nwait, sizeof (uint_t)))
7827c478bd9Sstevel@tonic-gate 			error = EFAULT;
7837c478bd9Sstevel@tonic-gate 	}
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 	if (aiop->aio_iocbsz > AIO_IOCB_MAX) {
7867c478bd9Sstevel@tonic-gate 		kmem_free(iocblist, aiop->aio_iocbsz);
7877c478bd9Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
7887c478bd9Sstevel@tonic-gate 	}
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 	/* check if there is another thread waiting for execution */
7917c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7927c478bd9Sstevel@tonic-gate 	aiop->aio_flags &= ~AIO_WAITN;
7937c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
7947c478bd9Sstevel@tonic-gate 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
7957c478bd9Sstevel@tonic-gate 		cv_signal(&aiop->aio_waitncv);
7967c478bd9Sstevel@tonic-gate 	}
7977c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate 	return (error);
8007c478bd9Sstevel@tonic-gate }
8017c478bd9Sstevel@tonic-gate 
8027c478bd9Sstevel@tonic-gate /*
8037c478bd9Sstevel@tonic-gate  * aio_unlock_requests
8047c478bd9Sstevel@tonic-gate  * copyouts the result of the request as well as the return value.
8057c478bd9Sstevel@tonic-gate  * It builds the list of completed asynchronous requests,
8067c478bd9Sstevel@tonic-gate  * unlocks the allocated memory ranges and
8077c478bd9Sstevel@tonic-gate  * put the aio request structure back into the free list.
8087c478bd9Sstevel@tonic-gate  */
8097c478bd9Sstevel@tonic-gate 
8107c478bd9Sstevel@tonic-gate static int
8117c478bd9Sstevel@tonic-gate aio_unlock_requests(
8127c478bd9Sstevel@tonic-gate 	caddr_t	iocblist,
8137c478bd9Sstevel@tonic-gate 	int	iocb_index,
8147c478bd9Sstevel@tonic-gate 	aio_req_t *reqlist,
8157c478bd9Sstevel@tonic-gate 	aio_t	*aiop,
8167c478bd9Sstevel@tonic-gate 	model_t	model)
8177c478bd9Sstevel@tonic-gate {
8187c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp, *nreqp;
8197c478bd9Sstevel@tonic-gate 
8207c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
8217c478bd9Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8227c478bd9Sstevel@tonic-gate 			(((caddr_t *)iocblist)[iocb_index++]) =
8237c478bd9Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb;
8247c478bd9Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8257c478bd9Sstevel@tonic-gate 			aphysio_unlock(reqp);
8267c478bd9Sstevel@tonic-gate 			aio_copyout_result(reqp);
8277c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8287c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8297c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8307c478bd9Sstevel@tonic-gate 		}
8317c478bd9Sstevel@tonic-gate 	}
8327c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
8337c478bd9Sstevel@tonic-gate 	else {
8347c478bd9Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8357c478bd9Sstevel@tonic-gate 			((caddr32_t *)iocblist)[iocb_index++] =
8367c478bd9Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb32;
8377c478bd9Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8387c478bd9Sstevel@tonic-gate 			aphysio_unlock(reqp);
8397c478bd9Sstevel@tonic-gate 			aio_copyout_result(reqp);
8407c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8417c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8427c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8437c478bd9Sstevel@tonic-gate 		}
8447c478bd9Sstevel@tonic-gate 	}
8457c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
8467c478bd9Sstevel@tonic-gate 	return (iocb_index);
8477c478bd9Sstevel@tonic-gate }
8487c478bd9Sstevel@tonic-gate 
8497c478bd9Sstevel@tonic-gate /*
8507c478bd9Sstevel@tonic-gate  * aio_reqlist_concat
8517c478bd9Sstevel@tonic-gate  * moves "max" elements from the done queue to the reqlist queue and removes
8527c478bd9Sstevel@tonic-gate  * the AIO_DONEQ flag.
8537c478bd9Sstevel@tonic-gate  * - reqlist queue is a simple linked list
8547c478bd9Sstevel@tonic-gate  * - done queue is a double linked list
8557c478bd9Sstevel@tonic-gate  */
8567c478bd9Sstevel@tonic-gate 
8577c478bd9Sstevel@tonic-gate static int
8587c478bd9Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
8597c478bd9Sstevel@tonic-gate {
8607c478bd9Sstevel@tonic-gate 	aio_req_t *q2, *q2work, *list;
8617c478bd9Sstevel@tonic-gate 	int count = 0;
8627c478bd9Sstevel@tonic-gate 
8637c478bd9Sstevel@tonic-gate 	list = *reqlist;
8647c478bd9Sstevel@tonic-gate 	q2 = aiop->aio_doneq;
8657c478bd9Sstevel@tonic-gate 	q2work = q2;
8667c478bd9Sstevel@tonic-gate 	while (max-- > 0) {
8677c478bd9Sstevel@tonic-gate 		q2work->aio_req_flags &= ~AIO_DONEQ;
8687c478bd9Sstevel@tonic-gate 		q2work = q2work->aio_req_next;
8697c478bd9Sstevel@tonic-gate 		count++;
8707c478bd9Sstevel@tonic-gate 		if (q2work == q2)
8717c478bd9Sstevel@tonic-gate 			break;
8727c478bd9Sstevel@tonic-gate 	}
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate 	if (q2work == q2) {
8757c478bd9Sstevel@tonic-gate 		/* all elements revised */
8767c478bd9Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = list;
8777c478bd9Sstevel@tonic-gate 		list = q2;
8787c478bd9Sstevel@tonic-gate 		aiop->aio_doneq = NULL;
8797c478bd9Sstevel@tonic-gate 	} else {
8807c478bd9Sstevel@tonic-gate 		/*
8817c478bd9Sstevel@tonic-gate 		 * max < elements in the doneq
8827c478bd9Sstevel@tonic-gate 		 * detach only the required amount of elements
8837c478bd9Sstevel@tonic-gate 		 * out of the doneq
8847c478bd9Sstevel@tonic-gate 		 */
8857c478bd9Sstevel@tonic-gate 		q2work->aio_req_prev->aio_req_next = list;
8867c478bd9Sstevel@tonic-gate 		list = q2;
8877c478bd9Sstevel@tonic-gate 
8887c478bd9Sstevel@tonic-gate 		aiop->aio_doneq = q2work;
8897c478bd9Sstevel@tonic-gate 		q2work->aio_req_prev = q2->aio_req_prev;
8907c478bd9Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = q2work;
8917c478bd9Sstevel@tonic-gate 	}
8927c478bd9Sstevel@tonic-gate 	*reqlist = list;
8937c478bd9Sstevel@tonic-gate 	return (count);
8947c478bd9Sstevel@tonic-gate }
8957c478bd9Sstevel@tonic-gate 
8967c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8977c478bd9Sstevel@tonic-gate static int
8987c478bd9Sstevel@tonic-gate aiosuspend(
8997c478bd9Sstevel@tonic-gate 	void	*aiocb,
9007c478bd9Sstevel@tonic-gate 	int	nent,
9017c478bd9Sstevel@tonic-gate 	struct	timespec	*timout,
9027c478bd9Sstevel@tonic-gate 	int	flag,
9037c478bd9Sstevel@tonic-gate 	long	*rval,
9047c478bd9Sstevel@tonic-gate 	int	run_mode)
9057c478bd9Sstevel@tonic-gate {
9067c478bd9Sstevel@tonic-gate 	int 		error;
9077c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
9087c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp, *found, *next;
9097c478bd9Sstevel@tonic-gate 	caddr_t		cbplist = NULL;
9107c478bd9Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
9117c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9127c478bd9Sstevel@tonic-gate 	aiocb32_t	*cbp32;
9137c478bd9Sstevel@tonic-gate 	caddr32_t	*ucbp32;
9147c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9157c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
9167c478bd9Sstevel@tonic-gate 	int		rv;
9177c478bd9Sstevel@tonic-gate 	int		i;
9187c478bd9Sstevel@tonic-gate 	size_t		ssize;
9197c478bd9Sstevel@tonic-gate 	model_t		model = get_udatamodel();
9207c478bd9Sstevel@tonic-gate 	int		blocking;
9213348528fSdm120769 	int		timecheck;
9227c478bd9Sstevel@tonic-gate 	timestruc_t	rqtime;
9237c478bd9Sstevel@tonic-gate 	timestruc_t	*rqtp;
9247c478bd9Sstevel@tonic-gate 
9257c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
9267c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0)
9277c478bd9Sstevel@tonic-gate 		return (EINVAL);
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate 	/*
9307c478bd9Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
9317c478bd9Sstevel@tonic-gate 	 */
9327c478bd9Sstevel@tonic-gate 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
9337c478bd9Sstevel@tonic-gate 	if (error)
9347c478bd9Sstevel@tonic-gate 		return (error);
9357c478bd9Sstevel@tonic-gate 	if (rqtp) {
9367c478bd9Sstevel@tonic-gate 		timestruc_t now;
9373348528fSdm120769 		timecheck = timechanged;
9387c478bd9Sstevel@tonic-gate 		gethrestime(&now);
9397c478bd9Sstevel@tonic-gate 		timespecadd(rqtp, &now);
9407c478bd9Sstevel@tonic-gate 	}
9417c478bd9Sstevel@tonic-gate 
9427c478bd9Sstevel@tonic-gate 	/*
9437c478bd9Sstevel@tonic-gate 	 * If we are not blocking and there's no IO complete
9447c478bd9Sstevel@tonic-gate 	 * skip aiocb copyin.
9457c478bd9Sstevel@tonic-gate 	 */
9467c478bd9Sstevel@tonic-gate 	if (!blocking && (aiop->aio_pollq == NULL) &&
9477c478bd9Sstevel@tonic-gate 	    (aiop->aio_doneq == NULL)) {
9487c478bd9Sstevel@tonic-gate 		return (EAGAIN);
9497c478bd9Sstevel@tonic-gate 	}
9507c478bd9Sstevel@tonic-gate 
9517c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
9527c478bd9Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
9537c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9547c478bd9Sstevel@tonic-gate 	else
9557c478bd9Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
9567c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9577c478bd9Sstevel@tonic-gate 
9587c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
9597c478bd9Sstevel@tonic-gate 	if (cbplist == NULL)
9607c478bd9Sstevel@tonic-gate 		return (ENOMEM);
9617c478bd9Sstevel@tonic-gate 
9627c478bd9Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
9637c478bd9Sstevel@tonic-gate 		error = EFAULT;
9647c478bd9Sstevel@tonic-gate 		goto done;
9657c478bd9Sstevel@tonic-gate 	}
9667c478bd9Sstevel@tonic-gate 
9677c478bd9Sstevel@tonic-gate 	found = NULL;
9687c478bd9Sstevel@tonic-gate 	/*
9697c478bd9Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
9707c478bd9Sstevel@tonic-gate 	 * aio_req_done().
9717c478bd9Sstevel@tonic-gate 	 */
9727c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
9737c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
9747c478bd9Sstevel@tonic-gate 	for (;;) {
9757c478bd9Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
9767c478bd9Sstevel@tonic-gate 		if (aiop->aio_pollq) {
9777c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
9787c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
9797c478bd9Sstevel@tonic-gate 			aio_cleanup(0);
9807c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
9817c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9827c478bd9Sstevel@tonic-gate 		}
9837c478bd9Sstevel@tonic-gate 		/* check for requests on done queue */
9847c478bd9Sstevel@tonic-gate 		if (aiop->aio_doneq) {
9857c478bd9Sstevel@tonic-gate 			if (model == DATAMODEL_NATIVE)
9867c478bd9Sstevel@tonic-gate 				ucbp = (aiocb_t **)cbplist;
9877c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9887c478bd9Sstevel@tonic-gate 			else
9897c478bd9Sstevel@tonic-gate 				ucbp32 = (caddr32_t *)cbplist;
9907c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9917c478bd9Sstevel@tonic-gate 			for (i = 0; i < nent; i++) {
9927c478bd9Sstevel@tonic-gate 				if (model == DATAMODEL_NATIVE) {
9937c478bd9Sstevel@tonic-gate 					if ((cbp = *ucbp++) == NULL)
9947c478bd9Sstevel@tonic-gate 						continue;
9957c478bd9Sstevel@tonic-gate 					if (run_mode != AIO_LARGEFILE)
9967c478bd9Sstevel@tonic-gate 						reqp = aio_req_done(
9977c478bd9Sstevel@tonic-gate 						    &cbp->aio_resultp);
9987c478bd9Sstevel@tonic-gate 					else {
9997c478bd9Sstevel@tonic-gate 						cbp64 = (aiocb64_32_t *)cbp;
10007c478bd9Sstevel@tonic-gate 						reqp = aio_req_done(
10017c478bd9Sstevel@tonic-gate 						    &cbp64->aio_resultp);
10027c478bd9Sstevel@tonic-gate 					}
10037c478bd9Sstevel@tonic-gate 				}
10047c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
10057c478bd9Sstevel@tonic-gate 				else {
10067c478bd9Sstevel@tonic-gate 					if (run_mode == AIO_32) {
10077c478bd9Sstevel@tonic-gate 						if ((cbp32 =
10087c478bd9Sstevel@tonic-gate 						    (aiocb32_t *)(uintptr_t)
10097c478bd9Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10107c478bd9Sstevel@tonic-gate 							continue;
10117c478bd9Sstevel@tonic-gate 						reqp = aio_req_done(
10127c478bd9Sstevel@tonic-gate 						    &cbp32->aio_resultp);
10137c478bd9Sstevel@tonic-gate 					} else if (run_mode == AIO_LARGEFILE) {
10147c478bd9Sstevel@tonic-gate 						if ((cbp64 =
10157c478bd9Sstevel@tonic-gate 						    (aiocb64_32_t *)(uintptr_t)
10167c478bd9Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10177c478bd9Sstevel@tonic-gate 							continue;
10187c478bd9Sstevel@tonic-gate 						reqp = aio_req_done(
10197c478bd9Sstevel@tonic-gate 						    &cbp64->aio_resultp);
10207c478bd9Sstevel@tonic-gate 					}
10217c478bd9Sstevel@tonic-gate 
10227c478bd9Sstevel@tonic-gate 				}
10237c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
10247c478bd9Sstevel@tonic-gate 				if (reqp) {
10257c478bd9Sstevel@tonic-gate 					reqp->aio_req_next = found;
10267c478bd9Sstevel@tonic-gate 					found = reqp;
10277c478bd9Sstevel@tonic-gate 				}
10287c478bd9Sstevel@tonic-gate 				if (aiop->aio_doneq == NULL)
10297c478bd9Sstevel@tonic-gate 					break;
10307c478bd9Sstevel@tonic-gate 			}
10317c478bd9Sstevel@tonic-gate 			if (found)
10327c478bd9Sstevel@tonic-gate 				break;
10337c478bd9Sstevel@tonic-gate 		}
10347c478bd9Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
10357c478bd9Sstevel@tonic-gate 			/*
10367c478bd9Sstevel@tonic-gate 			 * nothing on the kernel's queue. the user
10377c478bd9Sstevel@tonic-gate 			 * has notified the kernel that it has items
10387c478bd9Sstevel@tonic-gate 			 * on a user-level queue.
10397c478bd9Sstevel@tonic-gate 			 */
10407c478bd9Sstevel@tonic-gate 			aiop->aio_notifycnt--;
10417c478bd9Sstevel@tonic-gate 			*rval = 1;
10427c478bd9Sstevel@tonic-gate 			error = 0;
10437c478bd9Sstevel@tonic-gate 			break;
10447c478bd9Sstevel@tonic-gate 		}
10457c478bd9Sstevel@tonic-gate 		/* don't block if nothing is outstanding */
10467c478bd9Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0) {
10477c478bd9Sstevel@tonic-gate 			error = EAGAIN;
10487c478bd9Sstevel@tonic-gate 			break;
10497c478bd9Sstevel@tonic-gate 		}
10507c478bd9Sstevel@tonic-gate 		if (blocking) {
10517c478bd9Sstevel@tonic-gate 			/*
10527c478bd9Sstevel@tonic-gate 			 * drop the aio_cleanupq_mutex as we are
10537c478bd9Sstevel@tonic-gate 			 * going to block.
10547c478bd9Sstevel@tonic-gate 			 */
10557c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
10567c478bd9Sstevel@tonic-gate 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
10573348528fSdm120769 			    &aiop->aio_mutex, rqtp, timecheck);
10587c478bd9Sstevel@tonic-gate 			/*
10597c478bd9Sstevel@tonic-gate 			 * we have to drop aio_mutex and
10607c478bd9Sstevel@tonic-gate 			 * grab it in the right order.
10617c478bd9Sstevel@tonic-gate 			 */
10627c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
10637c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
10647c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
10657c478bd9Sstevel@tonic-gate 			if (rv > 0)	/* check done queue again */
10667c478bd9Sstevel@tonic-gate 				continue;
10677c478bd9Sstevel@tonic-gate 			if (rv == 0)	/* interrupted by a signal */
10687c478bd9Sstevel@tonic-gate 				error = EINTR;
10697c478bd9Sstevel@tonic-gate 			else		/* timer expired */
10707c478bd9Sstevel@tonic-gate 				error = ETIME;
10717c478bd9Sstevel@tonic-gate 		} else {
10727c478bd9Sstevel@tonic-gate 			error = EAGAIN;
10737c478bd9Sstevel@tonic-gate 		}
10747c478bd9Sstevel@tonic-gate 		break;
10757c478bd9Sstevel@tonic-gate 	}
10767c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
10777c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
10787c478bd9Sstevel@tonic-gate 	for (reqp = found; reqp != NULL; reqp = next) {
10797c478bd9Sstevel@tonic-gate 		next = reqp->aio_req_next;
10807c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
10817c478bd9Sstevel@tonic-gate 		aio_copyout_result(reqp);
10827c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
10837c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
10847c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
10857c478bd9Sstevel@tonic-gate 	}
10867c478bd9Sstevel@tonic-gate done:
10877c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
10887c478bd9Sstevel@tonic-gate 	return (error);
10897c478bd9Sstevel@tonic-gate }
10907c478bd9Sstevel@tonic-gate 
10917c478bd9Sstevel@tonic-gate /*
10927c478bd9Sstevel@tonic-gate  * initialize aio by allocating an aio_t struct for this
10937c478bd9Sstevel@tonic-gate  * process.
10947c478bd9Sstevel@tonic-gate  */
10957c478bd9Sstevel@tonic-gate static int
10967c478bd9Sstevel@tonic-gate aioinit(void)
10977c478bd9Sstevel@tonic-gate {
10987c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
10997c478bd9Sstevel@tonic-gate 	aio_t *aiop;
11007c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11017c478bd9Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL) {
11027c478bd9Sstevel@tonic-gate 		aiop = aio_aiop_alloc();
11037c478bd9Sstevel@tonic-gate 		p->p_aio = aiop;
11047c478bd9Sstevel@tonic-gate 	}
11057c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11067c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
11077c478bd9Sstevel@tonic-gate 		return (ENOMEM);
11087c478bd9Sstevel@tonic-gate 	return (0);
11097c478bd9Sstevel@tonic-gate }
11107c478bd9Sstevel@tonic-gate 
11117c478bd9Sstevel@tonic-gate /*
11127c478bd9Sstevel@tonic-gate  * start a special thread that will cleanup after aio requests
11137c478bd9Sstevel@tonic-gate  * that are preventing a segment from being unmapped. as_unmap()
11147c478bd9Sstevel@tonic-gate  * blocks until all phsyio to this segment is completed. this
11157c478bd9Sstevel@tonic-gate  * doesn't happen until all the pages in this segment are not
11167c478bd9Sstevel@tonic-gate  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
11177c478bd9Sstevel@tonic-gate  * requests still outstanding. this special thread will make sure
11187c478bd9Sstevel@tonic-gate  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
11197c478bd9Sstevel@tonic-gate  *
11207c478bd9Sstevel@tonic-gate  * this function will return an error if the process has only
11217c478bd9Sstevel@tonic-gate  * one LWP. the assumption is that the caller is a separate LWP
11227c478bd9Sstevel@tonic-gate  * that remains blocked in the kernel for the life of this process.
11237c478bd9Sstevel@tonic-gate  */
11247c478bd9Sstevel@tonic-gate static int
11257c478bd9Sstevel@tonic-gate aiostart(void)
11267c478bd9Sstevel@tonic-gate {
11277c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
11287c478bd9Sstevel@tonic-gate 	aio_t *aiop;
11297c478bd9Sstevel@tonic-gate 	int first, error = 0;
11307c478bd9Sstevel@tonic-gate 
11317c478bd9Sstevel@tonic-gate 	if (p->p_lwpcnt == 1)
11327c478bd9Sstevel@tonic-gate 		return (EDEADLK);
11337c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11347c478bd9Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL)
11357c478bd9Sstevel@tonic-gate 		error = EINVAL;
11367c478bd9Sstevel@tonic-gate 	else {
11377c478bd9Sstevel@tonic-gate 		first = aiop->aio_ok;
11387c478bd9Sstevel@tonic-gate 		if (aiop->aio_ok == 0)
11397c478bd9Sstevel@tonic-gate 			aiop->aio_ok = 1;
11407c478bd9Sstevel@tonic-gate 	}
11417c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11427c478bd9Sstevel@tonic-gate 	if (error == 0 && first == 0) {
11437c478bd9Sstevel@tonic-gate 		return (aio_cleanup_thread(aiop));
11447c478bd9Sstevel@tonic-gate 		/* should return only to exit */
11457c478bd9Sstevel@tonic-gate 	}
11467c478bd9Sstevel@tonic-gate 	return (error);
11477c478bd9Sstevel@tonic-gate }
11487c478bd9Sstevel@tonic-gate 
11497c478bd9Sstevel@tonic-gate /*
11507c478bd9Sstevel@tonic-gate  * Associate an aiocb with a port.
11517c478bd9Sstevel@tonic-gate  * This function is used by aiorw() to associate a transaction with a port.
11527c478bd9Sstevel@tonic-gate  * Allocate an event port structure (port_alloc_event()) and store the
11537c478bd9Sstevel@tonic-gate  * delivered user pointer (portnfy_user) in the portkev_user field of the
11547c478bd9Sstevel@tonic-gate  * port_kevent_t structure..
11557c478bd9Sstevel@tonic-gate  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
11567c478bd9Sstevel@tonic-gate  * the port association.
11577c478bd9Sstevel@tonic-gate  */
11587c478bd9Sstevel@tonic-gate 
11597c478bd9Sstevel@tonic-gate static int
116034709573Sraf aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
116134709573Sraf 	aio_req_t *reqp, int event)
11627c478bd9Sstevel@tonic-gate {
11637c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevp = NULL;
11647c478bd9Sstevel@tonic-gate 	int		error;
11657c478bd9Sstevel@tonic-gate 
11667c478bd9Sstevel@tonic-gate 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
11677c478bd9Sstevel@tonic-gate 	    PORT_SOURCE_AIO, &pkevp);
11687c478bd9Sstevel@tonic-gate 	if (error) {
11697c478bd9Sstevel@tonic-gate 		if ((error == ENOMEM) || (error == EAGAIN))
11707c478bd9Sstevel@tonic-gate 			error = EAGAIN;
11717c478bd9Sstevel@tonic-gate 		else
11727c478bd9Sstevel@tonic-gate 			error = EINVAL;
11737c478bd9Sstevel@tonic-gate 	} else {
11747c478bd9Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
11757c478bd9Sstevel@tonic-gate 		    aio_port_callback, reqp);
117634709573Sraf 		pkevp->portkev_events = event;
11777c478bd9Sstevel@tonic-gate 		reqp->aio_req_portkev = pkevp;
11787c478bd9Sstevel@tonic-gate 		reqp->aio_req_port = pntfy->portnfy_port;
11797c478bd9Sstevel@tonic-gate 	}
11807c478bd9Sstevel@tonic-gate 	return (error);
11817c478bd9Sstevel@tonic-gate }
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate #ifdef _LP64
11847c478bd9Sstevel@tonic-gate 
11857c478bd9Sstevel@tonic-gate /*
11867c478bd9Sstevel@tonic-gate  * Asynchronous list IO. A chain of aiocb's are copied in
11877c478bd9Sstevel@tonic-gate  * one at a time. If the aiocb is invalid, it is skipped.
11887c478bd9Sstevel@tonic-gate  * For each aiocb, the appropriate driver entry point is
11897c478bd9Sstevel@tonic-gate  * called. Optimize for the common case where the list
11907c478bd9Sstevel@tonic-gate  * of requests is to the same file descriptor.
11917c478bd9Sstevel@tonic-gate  *
11927c478bd9Sstevel@tonic-gate  * One possible optimization is to define a new driver entry
11937c478bd9Sstevel@tonic-gate  * point that supports a list of IO requests. Whether this
11947c478bd9Sstevel@tonic-gate  * improves performance depends somewhat on the driver's
11957c478bd9Sstevel@tonic-gate  * locking strategy. Processing a list could adversely impact
11967c478bd9Sstevel@tonic-gate  * the driver's interrupt latency.
11977c478bd9Sstevel@tonic-gate  */
11987c478bd9Sstevel@tonic-gate static int
11997c478bd9Sstevel@tonic-gate alio(
12007c478bd9Sstevel@tonic-gate 	int		mode_arg,
12017c478bd9Sstevel@tonic-gate 	aiocb_t		**aiocb_arg,
12027c478bd9Sstevel@tonic-gate 	int		nent,
12037c478bd9Sstevel@tonic-gate 	struct sigevent	*sigev)
12047c478bd9Sstevel@tonic-gate {
12057c478bd9Sstevel@tonic-gate 	file_t		*fp;
12067c478bd9Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
12077c478bd9Sstevel@tonic-gate 	int		prev_mode = -1;
12087c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
12097c478bd9Sstevel@tonic-gate 	aio_lio_t	*head;
12107c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
12117c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
12127c478bd9Sstevel@tonic-gate 	caddr_t		cbplist;
12137c478bd9Sstevel@tonic-gate 	aiocb_t		cb;
12147c478bd9Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
121534709573Sraf 	aiocb_t		*cbp;
121634709573Sraf 	aiocb_t		**ucbp;
12177c478bd9Sstevel@tonic-gate 	struct sigevent sigevk;
12187c478bd9Sstevel@tonic-gate 	sigqueue_t	*sqp;
12197c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
12207c478bd9Sstevel@tonic-gate 	int		mode;
12217c478bd9Sstevel@tonic-gate 	int		error = 0;
12227c478bd9Sstevel@tonic-gate 	int		aio_errors = 0;
12237c478bd9Sstevel@tonic-gate 	int		i;
12247c478bd9Sstevel@tonic-gate 	size_t		ssize;
12257c478bd9Sstevel@tonic-gate 	int		deadhead = 0;
12267c478bd9Sstevel@tonic-gate 	int		aio_notsupported = 0;
122734709573Sraf 	int		lio_head_port;
122834709573Sraf 	int		aio_port;
122934709573Sraf 	int		aio_thread;
12307c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
123134b3058fSpraks 	int		portused = 0;
12327c478bd9Sstevel@tonic-gate 	port_notify_t	pnotify;
123334709573Sraf 	int		event;
12347c478bd9Sstevel@tonic-gate 
12357c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
12367c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
12377c478bd9Sstevel@tonic-gate 		return (EINVAL);
12387c478bd9Sstevel@tonic-gate 
12397c478bd9Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
12407c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
12417c478bd9Sstevel@tonic-gate 	ucbp = (aiocb_t **)cbplist;
12427c478bd9Sstevel@tonic-gate 
124334709573Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
124434709573Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
12457c478bd9Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
12467c478bd9Sstevel@tonic-gate 		return (EFAULT);
12477c478bd9Sstevel@tonic-gate 	}
12487c478bd9Sstevel@tonic-gate 
124934709573Sraf 	/* Event Ports  */
125034709573Sraf 	if (sigev &&
125134709573Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
125234709573Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
125334709573Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
125434709573Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
125534709573Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
125634709573Sraf 		} else if (copyin(sigevk.sigev_value.sival_ptr,
125734709573Sraf 		    &pnotify, sizeof (pnotify))) {
12587c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
12597c478bd9Sstevel@tonic-gate 			return (EFAULT);
12607c478bd9Sstevel@tonic-gate 		}
126134709573Sraf 		error = port_alloc_event(pnotify.portnfy_port,
126234709573Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
126334709573Sraf 		if (error) {
126434709573Sraf 			if (error == ENOMEM || error == EAGAIN)
126534709573Sraf 				error = EAGAIN;
126634709573Sraf 			else
126734709573Sraf 				error = EINVAL;
126834709573Sraf 			kmem_free(cbplist, ssize);
126934709573Sraf 			return (error);
127034709573Sraf 		}
127134709573Sraf 		lio_head_port = pnotify.portnfy_port;
127234b3058fSpraks 		portused = 1;
12737c478bd9Sstevel@tonic-gate 	}
12747c478bd9Sstevel@tonic-gate 
12757c478bd9Sstevel@tonic-gate 	/*
12767c478bd9Sstevel@tonic-gate 	 * a list head should be allocated if notification is
12777c478bd9Sstevel@tonic-gate 	 * enabled for this list.
12787c478bd9Sstevel@tonic-gate 	 */
12797c478bd9Sstevel@tonic-gate 	head = NULL;
12807c478bd9Sstevel@tonic-gate 
128134709573Sraf 	if (mode_arg == LIO_WAIT || sigev) {
12827c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
12837c478bd9Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
12847c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12857c478bd9Sstevel@tonic-gate 		if (error)
12867c478bd9Sstevel@tonic-gate 			goto done;
12877c478bd9Sstevel@tonic-gate 		deadhead = 1;
12887c478bd9Sstevel@tonic-gate 		head->lio_nent = nent;
12897c478bd9Sstevel@tonic-gate 		head->lio_refcnt = nent;
129034709573Sraf 		head->lio_port = -1;
129134709573Sraf 		head->lio_portkev = NULL;
129234709573Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
129334709573Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
12947c478bd9Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
12957c478bd9Sstevel@tonic-gate 			if (sqp == NULL) {
12967c478bd9Sstevel@tonic-gate 				error = EAGAIN;
12977c478bd9Sstevel@tonic-gate 				goto done;
12987c478bd9Sstevel@tonic-gate 			}
12997c478bd9Sstevel@tonic-gate 			sqp->sq_func = NULL;
13007c478bd9Sstevel@tonic-gate 			sqp->sq_next = NULL;
13017c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
13027c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
13037c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
13047c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
13057c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
13067c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
13077c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_value = sigevk.sigev_value;
13087c478bd9Sstevel@tonic-gate 			head->lio_sigqp = sqp;
13097c478bd9Sstevel@tonic-gate 		} else {
13107c478bd9Sstevel@tonic-gate 			head->lio_sigqp = NULL;
13117c478bd9Sstevel@tonic-gate 		}
131234709573Sraf 		if (pkevtp) {
131334709573Sraf 			/*
131434709573Sraf 			 * Prepare data to send when list of aiocb's
131534709573Sraf 			 * has completed.
131634709573Sraf 			 */
131734709573Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
131834709573Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
131934709573Sraf 			    NULL, head);
132034709573Sraf 			pkevtp->portkev_events = AIOLIO;
132134709573Sraf 			head->lio_portkev = pkevtp;
132234709573Sraf 			head->lio_port = pnotify.portnfy_port;
132334709573Sraf 		}
13247c478bd9Sstevel@tonic-gate 	}
13257c478bd9Sstevel@tonic-gate 
13267c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
13277c478bd9Sstevel@tonic-gate 
13287c478bd9Sstevel@tonic-gate 		cbp = *ucbp;
13297c478bd9Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
133034709573Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
13317c478bd9Sstevel@tonic-gate 			if (head) {
13327c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13337c478bd9Sstevel@tonic-gate 				head->lio_nent--;
13347c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
13357c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13367c478bd9Sstevel@tonic-gate 			}
13377c478bd9Sstevel@tonic-gate 			continue;
13387c478bd9Sstevel@tonic-gate 		}
13397c478bd9Sstevel@tonic-gate 
13407c478bd9Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
13417c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
13427c478bd9Sstevel@tonic-gate 		if (mode == LIO_NOP) {
13437c478bd9Sstevel@tonic-gate 			cbp = NULL;
13447c478bd9Sstevel@tonic-gate 			if (head) {
13457c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13467c478bd9Sstevel@tonic-gate 				head->lio_nent--;
13477c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
13487c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13497c478bd9Sstevel@tonic-gate 			}
13507c478bd9Sstevel@tonic-gate 			continue;
13517c478bd9Sstevel@tonic-gate 		}
13527c478bd9Sstevel@tonic-gate 
13537c478bd9Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
13547c478bd9Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
13557c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13567c478bd9Sstevel@tonic-gate 			if (head) {
13577c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13587c478bd9Sstevel@tonic-gate 				head->lio_nent--;
13597c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
13607c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13617c478bd9Sstevel@tonic-gate 			}
13627c478bd9Sstevel@tonic-gate 			aio_errors++;
13637c478bd9Sstevel@tonic-gate 			continue;
13647c478bd9Sstevel@tonic-gate 		}
13657c478bd9Sstevel@tonic-gate 
13667c478bd9Sstevel@tonic-gate 		/*
13677c478bd9Sstevel@tonic-gate 		 * check the permission of the partition
13687c478bd9Sstevel@tonic-gate 		 */
13697c478bd9Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
13707c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
13717c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13727c478bd9Sstevel@tonic-gate 			if (head) {
13737c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13747c478bd9Sstevel@tonic-gate 				head->lio_nent--;
13757c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
13767c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13777c478bd9Sstevel@tonic-gate 			}
13787c478bd9Sstevel@tonic-gate 			aio_errors++;
13797c478bd9Sstevel@tonic-gate 			continue;
13807c478bd9Sstevel@tonic-gate 		}
13817c478bd9Sstevel@tonic-gate 
13827c478bd9Sstevel@tonic-gate 		/*
138334709573Sraf 		 * common case where requests are to the same fd
138434709573Sraf 		 * for the same r/w operation.
13857c478bd9Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
13867c478bd9Sstevel@tonic-gate 		 */
138734709573Sraf 		vp = fp->f_vnode;
138834709573Sraf 		if (fp != prev_fp || mode != prev_mode) {
13897c478bd9Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
13907c478bd9Sstevel@tonic-gate 			if (aio_func == NULL) {
13917c478bd9Sstevel@tonic-gate 				prev_fp = NULL;
13927c478bd9Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
13937c478bd9Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
13947c478bd9Sstevel@tonic-gate 				aio_notsupported++;
13957c478bd9Sstevel@tonic-gate 				if (head) {
13967c478bd9Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
13977c478bd9Sstevel@tonic-gate 					head->lio_nent--;
13987c478bd9Sstevel@tonic-gate 					head->lio_refcnt--;
13997c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
14007c478bd9Sstevel@tonic-gate 				}
14017c478bd9Sstevel@tonic-gate 				continue;
14027c478bd9Sstevel@tonic-gate 			} else {
14037c478bd9Sstevel@tonic-gate 				prev_fp = fp;
14047c478bd9Sstevel@tonic-gate 				prev_mode = mode;
14057c478bd9Sstevel@tonic-gate 			}
14067c478bd9Sstevel@tonic-gate 		}
14077c478bd9Sstevel@tonic-gate 
140834709573Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
140934709573Sraf 		    &cbp->aio_resultp, vp);
141034709573Sraf 		if (error) {
14117c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14127c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14137c478bd9Sstevel@tonic-gate 			if (head) {
14147c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14157c478bd9Sstevel@tonic-gate 				head->lio_nent--;
14167c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
14177c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14187c478bd9Sstevel@tonic-gate 			}
14197c478bd9Sstevel@tonic-gate 			aio_errors++;
14207c478bd9Sstevel@tonic-gate 			continue;
14217c478bd9Sstevel@tonic-gate 		}
14227c478bd9Sstevel@tonic-gate 
14237c478bd9Sstevel@tonic-gate 		reqp->aio_req_lio = head;
14247c478bd9Sstevel@tonic-gate 		deadhead = 0;
14257c478bd9Sstevel@tonic-gate 
14267c478bd9Sstevel@tonic-gate 		/*
14277c478bd9Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
14287c478bd9Sstevel@tonic-gate 		 * the driver to avoid a race condition
14297c478bd9Sstevel@tonic-gate 		 */
14307c478bd9Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
14317c478bd9Sstevel@tonic-gate 		    EINPROGRESS);
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
14347c478bd9Sstevel@tonic-gate 
143534709573Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
143634709573Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
143734709573Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
143834709573Sraf 		if (aio_port | aio_thread) {
143934709573Sraf 			port_kevent_t *lpkevp;
144034709573Sraf 			/*
144134709573Sraf 			 * Prepare data to send with each aiocb completed.
144234709573Sraf 			 */
144334709573Sraf 			if (aio_port) {
144434709573Sraf 				void *paddr =
144534709573Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
144634709573Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
144734709573Sraf 					error = EFAULT;
144834709573Sraf 			} else {	/* aio_thread */
144934709573Sraf 				pnotify.portnfy_port =
145034709573Sraf 				    aiocb->aio_sigevent.sigev_signo;
145134709573Sraf 				pnotify.portnfy_user =
145234709573Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
145334709573Sraf 			}
145434709573Sraf 			if (error)
145534709573Sraf 				/* EMPTY */;
145634709573Sraf 			else if (pkevtp != NULL &&
145734709573Sraf 			    pnotify.portnfy_port == lio_head_port)
145834709573Sraf 				error = port_dup_event(pkevtp, &lpkevp,
145934709573Sraf 				    PORT_ALLOC_DEFAULT);
146034709573Sraf 			else
146134709573Sraf 				error = port_alloc_event(pnotify.portnfy_port,
146234709573Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
146334709573Sraf 				    &lpkevp);
146434709573Sraf 			if (error == 0) {
146534709573Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
146634709573Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
146734709573Sraf 				    aio_port_callback, reqp);
146834709573Sraf 				lpkevp->portkev_events = event;
146934709573Sraf 				reqp->aio_req_portkev = lpkevp;
14707c478bd9Sstevel@tonic-gate 				reqp->aio_req_port = pnotify.portnfy_port;
147134709573Sraf 			}
14727c478bd9Sstevel@tonic-gate 		}
14737c478bd9Sstevel@tonic-gate 
14747c478bd9Sstevel@tonic-gate 		/*
14757c478bd9Sstevel@tonic-gate 		 * send the request to driver.
14767c478bd9Sstevel@tonic-gate 		 */
14777c478bd9Sstevel@tonic-gate 		if (error == 0) {
14787c478bd9Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
14797c478bd9Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
14807c478bd9Sstevel@tonic-gate 				aio_zerolen(reqp);
14817c478bd9Sstevel@tonic-gate 				continue;
14827c478bd9Sstevel@tonic-gate 			}
14837c478bd9Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
14847c478bd9Sstevel@tonic-gate 			    CRED());
14857c478bd9Sstevel@tonic-gate 		}
148634709573Sraf 
14877c478bd9Sstevel@tonic-gate 		/*
14887c478bd9Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
14897c478bd9Sstevel@tonic-gate 		 * completed unless there was an error.
14907c478bd9Sstevel@tonic-gate 		 */
14917c478bd9Sstevel@tonic-gate 		if (error) {
14927c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14937c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14947c478bd9Sstevel@tonic-gate 			if (head) {
14957c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14967c478bd9Sstevel@tonic-gate 				head->lio_nent--;
14977c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
14987c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14997c478bd9Sstevel@tonic-gate 			}
15007c478bd9Sstevel@tonic-gate 			if (error == ENOTSUP)
15017c478bd9Sstevel@tonic-gate 				aio_notsupported++;
15027c478bd9Sstevel@tonic-gate 			else
15037c478bd9Sstevel@tonic-gate 				aio_errors++;
150434b3058fSpraks 			lio_set_error(reqp, portused);
15057c478bd9Sstevel@tonic-gate 		} else {
15067c478bd9Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
15077c478bd9Sstevel@tonic-gate 		}
15087c478bd9Sstevel@tonic-gate 	}
15097c478bd9Sstevel@tonic-gate 
15107c478bd9Sstevel@tonic-gate 	if (aio_notsupported) {
15117c478bd9Sstevel@tonic-gate 		error = ENOTSUP;
15127c478bd9Sstevel@tonic-gate 	} else if (aio_errors) {
15137c478bd9Sstevel@tonic-gate 		/*
15147c478bd9Sstevel@tonic-gate 		 * return EIO if any request failed
15157c478bd9Sstevel@tonic-gate 		 */
15167c478bd9Sstevel@tonic-gate 		error = EIO;
15177c478bd9Sstevel@tonic-gate 	}
15187c478bd9Sstevel@tonic-gate 
15197c478bd9Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
15207c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
15217c478bd9Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
15227c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
15237c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
15247c478bd9Sstevel@tonic-gate 				error = EINTR;
15257c478bd9Sstevel@tonic-gate 				goto done;
15267c478bd9Sstevel@tonic-gate 			}
15277c478bd9Sstevel@tonic-gate 		}
15287c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
15297c478bd9Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
15307c478bd9Sstevel@tonic-gate 	}
15317c478bd9Sstevel@tonic-gate 
15327c478bd9Sstevel@tonic-gate done:
15337c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
15347c478bd9Sstevel@tonic-gate 	if (deadhead) {
15357c478bd9Sstevel@tonic-gate 		if (head->lio_sigqp)
15367c478bd9Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
153734709573Sraf 		if (head->lio_portkev)
153834709573Sraf 			port_free_event(head->lio_portkev);
15397c478bd9Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
15407c478bd9Sstevel@tonic-gate 	}
15417c478bd9Sstevel@tonic-gate 	return (error);
15427c478bd9Sstevel@tonic-gate }
15437c478bd9Sstevel@tonic-gate 
15447c478bd9Sstevel@tonic-gate #endif /* _LP64 */
15457c478bd9Sstevel@tonic-gate 
15467c478bd9Sstevel@tonic-gate /*
15477c478bd9Sstevel@tonic-gate  * Asynchronous list IO.
15487c478bd9Sstevel@tonic-gate  * If list I/O is called with LIO_WAIT it can still return
15497c478bd9Sstevel@tonic-gate  * before all the I/O's are completed if a signal is caught
15507c478bd9Sstevel@tonic-gate  * or if the list include UFS I/O requests. If this happens,
15517c478bd9Sstevel@tonic-gate  * libaio will call aliowait() to wait for the I/O's to
15527c478bd9Sstevel@tonic-gate  * complete
15537c478bd9Sstevel@tonic-gate  */
15547c478bd9Sstevel@tonic-gate /*ARGSUSED*/
15557c478bd9Sstevel@tonic-gate static int
15567c478bd9Sstevel@tonic-gate aliowait(
15577c478bd9Sstevel@tonic-gate 	int	mode,
15587c478bd9Sstevel@tonic-gate 	void	*aiocb,
15597c478bd9Sstevel@tonic-gate 	int	nent,
15607c478bd9Sstevel@tonic-gate 	void	*sigev,
15617c478bd9Sstevel@tonic-gate 	int	run_mode)
15627c478bd9Sstevel@tonic-gate {
15637c478bd9Sstevel@tonic-gate 	aio_lio_t	*head;
15647c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
15657c478bd9Sstevel@tonic-gate 	caddr_t		cbplist;
15667c478bd9Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
15677c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15687c478bd9Sstevel@tonic-gate 	aiocb32_t	*cbp32;
15697c478bd9Sstevel@tonic-gate 	caddr32_t	*ucbp32;
15707c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
15717c478bd9Sstevel@tonic-gate #endif
15727c478bd9Sstevel@tonic-gate 	int		error = 0;
15737c478bd9Sstevel@tonic-gate 	int		i;
15747c478bd9Sstevel@tonic-gate 	size_t		ssize = 0;
15757c478bd9Sstevel@tonic-gate 	model_t		model = get_udatamodel();
15767c478bd9Sstevel@tonic-gate 
15777c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
15787c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
15797c478bd9Sstevel@tonic-gate 		return (EINVAL);
15807c478bd9Sstevel@tonic-gate 
15817c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15827c478bd9Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
15837c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15847c478bd9Sstevel@tonic-gate 	else
15857c478bd9Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
15867c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15877c478bd9Sstevel@tonic-gate 
15887c478bd9Sstevel@tonic-gate 	if (ssize == 0)
15897c478bd9Sstevel@tonic-gate 		return (EINVAL);
15907c478bd9Sstevel@tonic-gate 
15917c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
15927c478bd9Sstevel@tonic-gate 
15937c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15947c478bd9Sstevel@tonic-gate 		ucbp = (aiocb_t **)cbplist;
15957c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15967c478bd9Sstevel@tonic-gate 	else
15977c478bd9Sstevel@tonic-gate 		ucbp32 = (caddr32_t *)cbplist;
15987c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15997c478bd9Sstevel@tonic-gate 
16007c478bd9Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
16017c478bd9Sstevel@tonic-gate 		error = EFAULT;
16027c478bd9Sstevel@tonic-gate 		goto done;
16037c478bd9Sstevel@tonic-gate 	}
16047c478bd9Sstevel@tonic-gate 
16057c478bd9Sstevel@tonic-gate 	/*
16067c478bd9Sstevel@tonic-gate 	 * To find the list head, we go through the
16077c478bd9Sstevel@tonic-gate 	 * list of aiocb structs, find the request
16087c478bd9Sstevel@tonic-gate 	 * its for, then get the list head that reqp
16097c478bd9Sstevel@tonic-gate 	 * points to
16107c478bd9Sstevel@tonic-gate 	 */
16117c478bd9Sstevel@tonic-gate 	head = NULL;
16127c478bd9Sstevel@tonic-gate 
16137c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
16147c478bd9Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE) {
16157c478bd9Sstevel@tonic-gate 			/*
16167c478bd9Sstevel@tonic-gate 			 * Since we are only checking for a NULL pointer
16177c478bd9Sstevel@tonic-gate 			 * Following should work on both native data sizes
16187c478bd9Sstevel@tonic-gate 			 * as well as for largefile aiocb.
16197c478bd9Sstevel@tonic-gate 			 */
16207c478bd9Sstevel@tonic-gate 			if ((cbp = *ucbp++) == NULL)
16217c478bd9Sstevel@tonic-gate 				continue;
16227c478bd9Sstevel@tonic-gate 			if (run_mode != AIO_LARGEFILE)
16237c478bd9Sstevel@tonic-gate 				if (head = aio_list_get(&cbp->aio_resultp))
16247c478bd9Sstevel@tonic-gate 					break;
16257c478bd9Sstevel@tonic-gate 			else {
16267c478bd9Sstevel@tonic-gate 				/*
16277c478bd9Sstevel@tonic-gate 				 * This is a case when largefile call is
16287c478bd9Sstevel@tonic-gate 				 * made on 32 bit kernel.
16297c478bd9Sstevel@tonic-gate 				 * Treat each pointer as pointer to
16307c478bd9Sstevel@tonic-gate 				 * aiocb64_32
16317c478bd9Sstevel@tonic-gate 				 */
16327c478bd9Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16337c478bd9Sstevel@tonic-gate 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
16347c478bd9Sstevel@tonic-gate 					break;
16357c478bd9Sstevel@tonic-gate 			}
16367c478bd9Sstevel@tonic-gate 		}
16377c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16387c478bd9Sstevel@tonic-gate 		else {
16397c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
16407c478bd9Sstevel@tonic-gate 				if ((cbp64 = (aiocb64_32_t *)
16417c478bd9Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16427c478bd9Sstevel@tonic-gate 					continue;
16437c478bd9Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16447c478bd9Sstevel@tonic-gate 				    &cbp64->aio_resultp))
16457c478bd9Sstevel@tonic-gate 					break;
16467c478bd9Sstevel@tonic-gate 			} else if (run_mode == AIO_32) {
16477c478bd9Sstevel@tonic-gate 				if ((cbp32 = (aiocb32_t *)
16487c478bd9Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16497c478bd9Sstevel@tonic-gate 					continue;
16507c478bd9Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16517c478bd9Sstevel@tonic-gate 				    &cbp32->aio_resultp))
16527c478bd9Sstevel@tonic-gate 					break;
16537c478bd9Sstevel@tonic-gate 			}
16547c478bd9Sstevel@tonic-gate 		}
16557c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
16567c478bd9Sstevel@tonic-gate 	}
16577c478bd9Sstevel@tonic-gate 
16587c478bd9Sstevel@tonic-gate 	if (head == NULL) {
16597c478bd9Sstevel@tonic-gate 		error = EINVAL;
16607c478bd9Sstevel@tonic-gate 		goto done;
16617c478bd9Sstevel@tonic-gate 	}
16627c478bd9Sstevel@tonic-gate 
16637c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
16647c478bd9Sstevel@tonic-gate 	while (head->lio_refcnt > 0) {
16657c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
16667c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
16677c478bd9Sstevel@tonic-gate 			error = EINTR;
16687c478bd9Sstevel@tonic-gate 			goto done;
16697c478bd9Sstevel@tonic-gate 		}
16707c478bd9Sstevel@tonic-gate 	}
16717c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
16727c478bd9Sstevel@tonic-gate 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
16737c478bd9Sstevel@tonic-gate done:
16747c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
16757c478bd9Sstevel@tonic-gate 	return (error);
16767c478bd9Sstevel@tonic-gate }
16777c478bd9Sstevel@tonic-gate 
16787c478bd9Sstevel@tonic-gate aio_lio_t *
16797c478bd9Sstevel@tonic-gate aio_list_get(aio_result_t *resultp)
16807c478bd9Sstevel@tonic-gate {
16817c478bd9Sstevel@tonic-gate 	aio_lio_t	*head = NULL;
16827c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
16837c478bd9Sstevel@tonic-gate 	aio_req_t 	**bucket;
16847c478bd9Sstevel@tonic-gate 	aio_req_t 	*reqp;
16857c478bd9Sstevel@tonic-gate 	long		index;
16867c478bd9Sstevel@tonic-gate 
16877c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
16887c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
16897c478bd9Sstevel@tonic-gate 		return (NULL);
16907c478bd9Sstevel@tonic-gate 
16917c478bd9Sstevel@tonic-gate 	if (resultp) {
16927c478bd9Sstevel@tonic-gate 		index = AIO_HASH(resultp);
16937c478bd9Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
16947c478bd9Sstevel@tonic-gate 		for (reqp = *bucket; reqp != NULL;
16957c478bd9Sstevel@tonic-gate 		    reqp = reqp->aio_hash_next) {
16967c478bd9Sstevel@tonic-gate 			if (reqp->aio_req_resultp == resultp) {
16977c478bd9Sstevel@tonic-gate 				head = reqp->aio_req_lio;
16987c478bd9Sstevel@tonic-gate 				return (head);
16997c478bd9Sstevel@tonic-gate 			}
17007c478bd9Sstevel@tonic-gate 		}
17017c478bd9Sstevel@tonic-gate 	}
17027c478bd9Sstevel@tonic-gate 	return (NULL);
17037c478bd9Sstevel@tonic-gate }
17047c478bd9Sstevel@tonic-gate 
17057c478bd9Sstevel@tonic-gate 
17067c478bd9Sstevel@tonic-gate static void
17077c478bd9Sstevel@tonic-gate lio_set_uerror(void *resultp, int error)
17087c478bd9Sstevel@tonic-gate {
17097c478bd9Sstevel@tonic-gate 	/*
17107c478bd9Sstevel@tonic-gate 	 * the resultp field is a pointer to where the
17117c478bd9Sstevel@tonic-gate 	 * error should be written out to the user's
17127c478bd9Sstevel@tonic-gate 	 * aiocb.
17137c478bd9Sstevel@tonic-gate 	 *
17147c478bd9Sstevel@tonic-gate 	 */
17157c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
17167c478bd9Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
17177c478bd9Sstevel@tonic-gate 		    (ssize_t)-1);
17187c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
17197c478bd9Sstevel@tonic-gate 	}
17207c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17217c478bd9Sstevel@tonic-gate 	else {
17227c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
17237c478bd9Sstevel@tonic-gate 		    (uint_t)-1);
17247c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
17257c478bd9Sstevel@tonic-gate 	}
17267c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17277c478bd9Sstevel@tonic-gate }
17287c478bd9Sstevel@tonic-gate 
17297c478bd9Sstevel@tonic-gate /*
17307c478bd9Sstevel@tonic-gate  * do cleanup completion for all requests in list. memory for
17317c478bd9Sstevel@tonic-gate  * each request is also freed.
17327c478bd9Sstevel@tonic-gate  */
17337c478bd9Sstevel@tonic-gate static void
17347c478bd9Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
17357c478bd9Sstevel@tonic-gate {
17367c478bd9Sstevel@tonic-gate 	int i;
17377c478bd9Sstevel@tonic-gate 	aio_req_t *reqp;
17387c478bd9Sstevel@tonic-gate 	aio_result_t *resultp;
17397c478bd9Sstevel@tonic-gate 	aiocb64_32_t *aiocb_64;
17407c478bd9Sstevel@tonic-gate 
17417c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
17427c478bd9Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
17437c478bd9Sstevel@tonic-gate 			if (cbp[i] == NULL)
17447c478bd9Sstevel@tonic-gate 				continue;
17457c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
17467c478bd9Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)cbp[i];
174734709573Sraf 				resultp = (aio_result_t *)
174834709573Sraf 				    &aiocb_64->aio_resultp;
17497c478bd9Sstevel@tonic-gate 			} else
17507c478bd9Sstevel@tonic-gate 				resultp = &cbp[i]->aio_resultp;
17517c478bd9Sstevel@tonic-gate 		}
17527c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17537c478bd9Sstevel@tonic-gate 		else {
17547c478bd9Sstevel@tonic-gate 			aiocb32_t *aiocb_32;
17557c478bd9Sstevel@tonic-gate 			caddr32_t *cbp32;
17567c478bd9Sstevel@tonic-gate 
17577c478bd9Sstevel@tonic-gate 			cbp32 = (caddr32_t *)cbp;
17587c478bd9Sstevel@tonic-gate 			if (cbp32[i] == NULL)
17597c478bd9Sstevel@tonic-gate 				continue;
17607c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_32) {
17617c478bd9Sstevel@tonic-gate 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
17627c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_32->
17637c478bd9Sstevel@tonic-gate 				    aio_resultp;
17647c478bd9Sstevel@tonic-gate 			} else if (run_mode == AIO_LARGEFILE) {
17657c478bd9Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
17667c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_64->
17677c478bd9Sstevel@tonic-gate 				    aio_resultp;
17687c478bd9Sstevel@tonic-gate 			}
17697c478bd9Sstevel@tonic-gate 		}
17707c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17717c478bd9Sstevel@tonic-gate 		/*
17727c478bd9Sstevel@tonic-gate 		 * we need to get the aio_cleanupq_mutex since we call
17737c478bd9Sstevel@tonic-gate 		 * aio_req_done().
17747c478bd9Sstevel@tonic-gate 		 */
17757c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_cleanupq_mutex);
17767c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
17777c478bd9Sstevel@tonic-gate 		reqp = aio_req_done(resultp);
17787c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
17797c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_cleanupq_mutex);
17807c478bd9Sstevel@tonic-gate 		if (reqp != NULL) {
17817c478bd9Sstevel@tonic-gate 			aphysio_unlock(reqp);
17827c478bd9Sstevel@tonic-gate 			aio_copyout_result(reqp);
17837c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
17847c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
17857c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
17867c478bd9Sstevel@tonic-gate 		}
17877c478bd9Sstevel@tonic-gate 	}
17887c478bd9Sstevel@tonic-gate }
17897c478bd9Sstevel@tonic-gate 
17907c478bd9Sstevel@tonic-gate /*
179134709573Sraf  * Write out the results for an aio request that is done.
17927c478bd9Sstevel@tonic-gate  */
17937c478bd9Sstevel@tonic-gate static int
17947c478bd9Sstevel@tonic-gate aioerror(void *cb, int run_mode)
17957c478bd9Sstevel@tonic-gate {
17967c478bd9Sstevel@tonic-gate 	aio_result_t *resultp;
17977c478bd9Sstevel@tonic-gate 	aio_t *aiop;
17987c478bd9Sstevel@tonic-gate 	aio_req_t *reqp;
17997c478bd9Sstevel@tonic-gate 	int retval;
18007c478bd9Sstevel@tonic-gate 
18017c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
18027c478bd9Sstevel@tonic-gate 	if (aiop == NULL || cb == NULL)
18037c478bd9Sstevel@tonic-gate 		return (EINVAL);
18047c478bd9Sstevel@tonic-gate 
18057c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
18067c478bd9Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18077c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18087c478bd9Sstevel@tonic-gate 			    aio_resultp;
18097c478bd9Sstevel@tonic-gate 		else
18107c478bd9Sstevel@tonic-gate 			resultp = &((aiocb_t *)cb)->aio_resultp;
18117c478bd9Sstevel@tonic-gate 	}
18127c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18137c478bd9Sstevel@tonic-gate 	else {
18147c478bd9Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18157c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18167c478bd9Sstevel@tonic-gate 			    aio_resultp;
18177c478bd9Sstevel@tonic-gate 		else if (run_mode == AIO_32)
18187c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
18197c478bd9Sstevel@tonic-gate 			    aio_resultp;
18207c478bd9Sstevel@tonic-gate 	}
18217c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18227c478bd9Sstevel@tonic-gate 	/*
18237c478bd9Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
18247c478bd9Sstevel@tonic-gate 	 * aio_req_find().
18257c478bd9Sstevel@tonic-gate 	 */
18267c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
18277c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18287c478bd9Sstevel@tonic-gate 	retval = aio_req_find(resultp, &reqp);
18297c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
18307c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
18317c478bd9Sstevel@tonic-gate 	if (retval == 0) {
18327c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
18337c478bd9Sstevel@tonic-gate 		aio_copyout_result(reqp);
18347c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
18357c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
18367c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
18377c478bd9Sstevel@tonic-gate 		return (0);
18387c478bd9Sstevel@tonic-gate 	} else if (retval == 1)
18397c478bd9Sstevel@tonic-gate 		return (EINPROGRESS);
18407c478bd9Sstevel@tonic-gate 	else if (retval == 2)
18417c478bd9Sstevel@tonic-gate 		return (EINVAL);
18427c478bd9Sstevel@tonic-gate 	return (0);
18437c478bd9Sstevel@tonic-gate }
18447c478bd9Sstevel@tonic-gate 
18457c478bd9Sstevel@tonic-gate /*
18467c478bd9Sstevel@tonic-gate  * 	aio_cancel - if no requests outstanding,
18477c478bd9Sstevel@tonic-gate  *			return AIO_ALLDONE
18487c478bd9Sstevel@tonic-gate  *			else
18497c478bd9Sstevel@tonic-gate  *			return AIO_NOTCANCELED
18507c478bd9Sstevel@tonic-gate  */
18517c478bd9Sstevel@tonic-gate static int
18527c478bd9Sstevel@tonic-gate aio_cancel(
18537c478bd9Sstevel@tonic-gate 	int	fildes,
18547c478bd9Sstevel@tonic-gate 	void 	*cb,
18557c478bd9Sstevel@tonic-gate 	long	*rval,
18567c478bd9Sstevel@tonic-gate 	int	run_mode)
18577c478bd9Sstevel@tonic-gate {
18587c478bd9Sstevel@tonic-gate 	aio_t *aiop;
18597c478bd9Sstevel@tonic-gate 	void *resultp;
18607c478bd9Sstevel@tonic-gate 	int index;
18617c478bd9Sstevel@tonic-gate 	aio_req_t **bucket;
18627c478bd9Sstevel@tonic-gate 	aio_req_t *ent;
18637c478bd9Sstevel@tonic-gate 
18647c478bd9Sstevel@tonic-gate 
18657c478bd9Sstevel@tonic-gate 	/*
18667c478bd9Sstevel@tonic-gate 	 * Verify valid file descriptor
18677c478bd9Sstevel@tonic-gate 	 */
18687c478bd9Sstevel@tonic-gate 	if ((getf(fildes)) == NULL) {
18697c478bd9Sstevel@tonic-gate 		return (EBADF);
18707c478bd9Sstevel@tonic-gate 	}
18717c478bd9Sstevel@tonic-gate 	releasef(fildes);
18727c478bd9Sstevel@tonic-gate 
18737c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
18747c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
18757c478bd9Sstevel@tonic-gate 		return (EINVAL);
18767c478bd9Sstevel@tonic-gate 
18777c478bd9Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0) {
18787c478bd9Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
18797c478bd9Sstevel@tonic-gate 		return (0);
18807c478bd9Sstevel@tonic-gate 	}
18817c478bd9Sstevel@tonic-gate 
18827c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18837c478bd9Sstevel@tonic-gate 	if (cb != NULL) {
18847c478bd9Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
18857c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18867c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18877c478bd9Sstevel@tonic-gate 				    ->aio_resultp;
18887c478bd9Sstevel@tonic-gate 			else
18897c478bd9Sstevel@tonic-gate 				resultp = &((aiocb_t *)cb)->aio_resultp;
18907c478bd9Sstevel@tonic-gate 		}
18917c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18927c478bd9Sstevel@tonic-gate 		else {
18937c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18947c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18957c478bd9Sstevel@tonic-gate 				    ->aio_resultp;
18967c478bd9Sstevel@tonic-gate 			else if (run_mode == AIO_32)
18977c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
18987c478bd9Sstevel@tonic-gate 				    ->aio_resultp;
18997c478bd9Sstevel@tonic-gate 		}
19007c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
19017c478bd9Sstevel@tonic-gate 		index = AIO_HASH(resultp);
19027c478bd9Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19037c478bd9Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19047c478bd9Sstevel@tonic-gate 			if (ent->aio_req_resultp == resultp) {
19057c478bd9Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
19067c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19077c478bd9Sstevel@tonic-gate 					*rval = AIO_ALLDONE;
19087c478bd9Sstevel@tonic-gate 					return (0);
19097c478bd9Sstevel@tonic-gate 				}
19107c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
19117c478bd9Sstevel@tonic-gate 				*rval = AIO_NOTCANCELED;
19127c478bd9Sstevel@tonic-gate 				return (0);
19137c478bd9Sstevel@tonic-gate 			}
19147c478bd9Sstevel@tonic-gate 		}
19157c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
19167c478bd9Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
19177c478bd9Sstevel@tonic-gate 		return (0);
19187c478bd9Sstevel@tonic-gate 	}
19197c478bd9Sstevel@tonic-gate 
19207c478bd9Sstevel@tonic-gate 	for (index = 0; index < AIO_HASHSZ; index++) {
19217c478bd9Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19227c478bd9Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19237c478bd9Sstevel@tonic-gate 			if (ent->aio_req_fd == fildes) {
19247c478bd9Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
19257c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19267c478bd9Sstevel@tonic-gate 					*rval = AIO_NOTCANCELED;
19277c478bd9Sstevel@tonic-gate 					return (0);
19287c478bd9Sstevel@tonic-gate 				}
19297c478bd9Sstevel@tonic-gate 			}
19307c478bd9Sstevel@tonic-gate 		}
19317c478bd9Sstevel@tonic-gate 	}
19327c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
19337c478bd9Sstevel@tonic-gate 	*rval = AIO_ALLDONE;
19347c478bd9Sstevel@tonic-gate 	return (0);
19357c478bd9Sstevel@tonic-gate }
19367c478bd9Sstevel@tonic-gate 
19377c478bd9Sstevel@tonic-gate /*
19387c478bd9Sstevel@tonic-gate  * solaris version of asynchronous read and write
19397c478bd9Sstevel@tonic-gate  */
19407c478bd9Sstevel@tonic-gate static int
19417c478bd9Sstevel@tonic-gate arw(
19427c478bd9Sstevel@tonic-gate 	int	opcode,
19437c478bd9Sstevel@tonic-gate 	int	fdes,
19447c478bd9Sstevel@tonic-gate 	char	*bufp,
19457c478bd9Sstevel@tonic-gate 	int	bufsize,
19467c478bd9Sstevel@tonic-gate 	offset_t	offset,
19477c478bd9Sstevel@tonic-gate 	aio_result_t	*resultp,
19487c478bd9Sstevel@tonic-gate 	int		mode)
19497c478bd9Sstevel@tonic-gate {
19507c478bd9Sstevel@tonic-gate 	file_t		*fp;
19517c478bd9Sstevel@tonic-gate 	int		error;
19527c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
19537c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
19547c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
19557c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
19567c478bd9Sstevel@tonic-gate #ifdef _LP64
19577c478bd9Sstevel@tonic-gate 	aiocb_t		aiocb;
19587c478bd9Sstevel@tonic-gate #else
19597c478bd9Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
19607c478bd9Sstevel@tonic-gate #endif
19617c478bd9Sstevel@tonic-gate 
19627c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
19637c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
19647c478bd9Sstevel@tonic-gate 		return (EINVAL);
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL) {
19677c478bd9Sstevel@tonic-gate 		return (EBADF);
19687c478bd9Sstevel@tonic-gate 	}
19697c478bd9Sstevel@tonic-gate 
19707c478bd9Sstevel@tonic-gate 	/*
19717c478bd9Sstevel@tonic-gate 	 * check the permission of the partition
19727c478bd9Sstevel@tonic-gate 	 */
19737c478bd9Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
19747c478bd9Sstevel@tonic-gate 		releasef(fdes);
19757c478bd9Sstevel@tonic-gate 		return (EBADF);
19767c478bd9Sstevel@tonic-gate 	}
19777c478bd9Sstevel@tonic-gate 
19787c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
19797c478bd9Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
19807c478bd9Sstevel@tonic-gate 	if (aio_func == NULL) {
19817c478bd9Sstevel@tonic-gate 		releasef(fdes);
19827c478bd9Sstevel@tonic-gate 		return (EBADFD);
19837c478bd9Sstevel@tonic-gate 	}
19847c478bd9Sstevel@tonic-gate #ifdef _LP64
19857c478bd9Sstevel@tonic-gate 	aiocb.aio_fildes = fdes;
19867c478bd9Sstevel@tonic-gate 	aiocb.aio_buf = bufp;
19877c478bd9Sstevel@tonic-gate 	aiocb.aio_nbytes = bufsize;
19887c478bd9Sstevel@tonic-gate 	aiocb.aio_offset = offset;
19897c478bd9Sstevel@tonic-gate 	aiocb.aio_sigevent.sigev_notify = 0;
199034709573Sraf 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
19917c478bd9Sstevel@tonic-gate #else
19927c478bd9Sstevel@tonic-gate 	aiocb64.aio_fildes = fdes;
19937c478bd9Sstevel@tonic-gate 	aiocb64.aio_buf = (caddr32_t)bufp;
19947c478bd9Sstevel@tonic-gate 	aiocb64.aio_nbytes = bufsize;
19957c478bd9Sstevel@tonic-gate 	aiocb64.aio_offset = offset;
19967c478bd9Sstevel@tonic-gate 	aiocb64.aio_sigevent.sigev_notify = 0;
199734709573Sraf 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
19987c478bd9Sstevel@tonic-gate #endif
19997c478bd9Sstevel@tonic-gate 	if (error) {
20007c478bd9Sstevel@tonic-gate 		releasef(fdes);
20017c478bd9Sstevel@tonic-gate 		return (error);
20027c478bd9Sstevel@tonic-gate 	}
20037c478bd9Sstevel@tonic-gate 
20047c478bd9Sstevel@tonic-gate 	/*
20057c478bd9Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
20067c478bd9Sstevel@tonic-gate 	 * the AIO poll bit set
20077c478bd9Sstevel@tonic-gate 	 */
20087c478bd9Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
20097c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
20107c478bd9Sstevel@tonic-gate 
20117c478bd9Sstevel@tonic-gate 	if (bufsize == 0) {
20127c478bd9Sstevel@tonic-gate 		clear_active_fd(fdes);
20137c478bd9Sstevel@tonic-gate 		aio_zerolen(reqp);
20147c478bd9Sstevel@tonic-gate 		return (0);
20157c478bd9Sstevel@tonic-gate 	}
20167c478bd9Sstevel@tonic-gate 	/*
20177c478bd9Sstevel@tonic-gate 	 * send the request to driver.
20187c478bd9Sstevel@tonic-gate 	 */
20197c478bd9Sstevel@tonic-gate 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
20207c478bd9Sstevel@tonic-gate 	/*
20217c478bd9Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
20227c478bd9Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
20237c478bd9Sstevel@tonic-gate 	 * completed.
20247c478bd9Sstevel@tonic-gate 	 */
20257c478bd9Sstevel@tonic-gate 	if (error) {
20267c478bd9Sstevel@tonic-gate 		releasef(fdes);
20277c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
20287c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
20297c478bd9Sstevel@tonic-gate 		aiop->aio_pending--;
20307c478bd9Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
20317c478bd9Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
20327c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
20337c478bd9Sstevel@tonic-gate 		return (error);
20347c478bd9Sstevel@tonic-gate 	}
20357c478bd9Sstevel@tonic-gate 	clear_active_fd(fdes);
20367c478bd9Sstevel@tonic-gate 	return (0);
20377c478bd9Sstevel@tonic-gate }
20387c478bd9Sstevel@tonic-gate 
20397c478bd9Sstevel@tonic-gate /*
20407c478bd9Sstevel@tonic-gate  * posix version of asynchronous read and write
20417c478bd9Sstevel@tonic-gate  */
20427c478bd9Sstevel@tonic-gate static int
20437c478bd9Sstevel@tonic-gate aiorw(
20447c478bd9Sstevel@tonic-gate 	int		opcode,
20457c478bd9Sstevel@tonic-gate 	void		*aiocb_arg,
20467c478bd9Sstevel@tonic-gate 	int		mode,
20477c478bd9Sstevel@tonic-gate 	int		run_mode)
20487c478bd9Sstevel@tonic-gate {
20497c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
20507c478bd9Sstevel@tonic-gate 	aiocb32_t	aiocb32;
20517c478bd9Sstevel@tonic-gate 	struct	sigevent32 *sigev32;
20527c478bd9Sstevel@tonic-gate 	port_notify32_t	pntfy32;
20537c478bd9Sstevel@tonic-gate #endif
20547c478bd9Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
20557c478bd9Sstevel@tonic-gate 	aiocb_t		aiocb;
20567c478bd9Sstevel@tonic-gate 	file_t		*fp;
20577c478bd9Sstevel@tonic-gate 	int		error, fd;
20587c478bd9Sstevel@tonic-gate 	size_t		bufsize;
20597c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
20607c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
20617c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
20627c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
20637c478bd9Sstevel@tonic-gate 	aio_result_t	*resultp;
20647c478bd9Sstevel@tonic-gate 	struct	sigevent *sigev;
20657c478bd9Sstevel@tonic-gate 	model_t		model;
20667c478bd9Sstevel@tonic-gate 	int		aio_use_port = 0;
20677c478bd9Sstevel@tonic-gate 	port_notify_t	pntfy;
20687c478bd9Sstevel@tonic-gate 
20697c478bd9Sstevel@tonic-gate 	model = get_udatamodel();
20707c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
20717c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
20727c478bd9Sstevel@tonic-gate 		return (EINVAL);
20737c478bd9Sstevel@tonic-gate 
20747c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
20757c478bd9Sstevel@tonic-gate 		if (run_mode != AIO_LARGEFILE) {
20767c478bd9Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
20777c478bd9Sstevel@tonic-gate 				return (EFAULT);
20787c478bd9Sstevel@tonic-gate 			bufsize = aiocb.aio_nbytes;
20797c478bd9Sstevel@tonic-gate 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
20807c478bd9Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
20817c478bd9Sstevel@tonic-gate 				return (EBADF);
20827c478bd9Sstevel@tonic-gate 			}
20837c478bd9Sstevel@tonic-gate 			sigev = &aiocb.aio_sigevent;
20847c478bd9Sstevel@tonic-gate 		} else {
20857c478bd9Sstevel@tonic-gate 			/*
20867c478bd9Sstevel@tonic-gate 			 * We come here only when we make largefile
20877c478bd9Sstevel@tonic-gate 			 * call on 32 bit kernel using 32 bit library.
20887c478bd9Sstevel@tonic-gate 			 */
20897c478bd9Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
20907c478bd9Sstevel@tonic-gate 				return (EFAULT);
20917c478bd9Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
20927c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
20937c478bd9Sstevel@tonic-gate 			    ->aio_resultp);
209434709573Sraf 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
20957c478bd9Sstevel@tonic-gate 				return (EBADF);
20967c478bd9Sstevel@tonic-gate 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
20977c478bd9Sstevel@tonic-gate 		}
20987c478bd9Sstevel@tonic-gate 
20997c478bd9Sstevel@tonic-gate 		if (sigev->sigev_notify == SIGEV_PORT) {
21007c478bd9Sstevel@tonic-gate 			if (copyin((void *)sigev->sigev_value.sival_ptr,
21017c478bd9Sstevel@tonic-gate 			    &pntfy, sizeof (port_notify_t))) {
21027c478bd9Sstevel@tonic-gate 				releasef(fd);
21037c478bd9Sstevel@tonic-gate 				return (EFAULT);
21047c478bd9Sstevel@tonic-gate 			}
21057c478bd9Sstevel@tonic-gate 			aio_use_port = 1;
210634709573Sraf 		} else if (sigev->sigev_notify == SIGEV_THREAD) {
210734709573Sraf 			pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
210834709573Sraf 			pntfy.portnfy_user =
210934709573Sraf 			    aiocb.aio_sigevent.sigev_value.sival_ptr;
211034709573Sraf 			aio_use_port = 1;
21117c478bd9Sstevel@tonic-gate 		}
21127c478bd9Sstevel@tonic-gate 	}
21137c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
21147c478bd9Sstevel@tonic-gate 	else {
21157c478bd9Sstevel@tonic-gate 		if (run_mode == AIO_32) {
21167c478bd9Sstevel@tonic-gate 			/* 32 bit system call is being made on 64 bit kernel */
21177c478bd9Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
21187c478bd9Sstevel@tonic-gate 				return (EFAULT);
21197c478bd9Sstevel@tonic-gate 
21207c478bd9Sstevel@tonic-gate 			bufsize = aiocb32.aio_nbytes;
21217c478bd9Sstevel@tonic-gate 			aiocb_32ton(&aiocb32, &aiocb);
21227c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
21237c478bd9Sstevel@tonic-gate 			    aio_resultp);
21247c478bd9Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
21257c478bd9Sstevel@tonic-gate 				return (EBADF);
21267c478bd9Sstevel@tonic-gate 			}
21277c478bd9Sstevel@tonic-gate 			sigev32 = &aiocb32.aio_sigevent;
21287c478bd9Sstevel@tonic-gate 		} else if (run_mode == AIO_LARGEFILE) {
21297c478bd9Sstevel@tonic-gate 			/*
21307c478bd9Sstevel@tonic-gate 			 * We come here only when we make largefile
21317c478bd9Sstevel@tonic-gate 			 * call on 64 bit kernel using 32 bit library.
21327c478bd9Sstevel@tonic-gate 			 */
21337c478bd9Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
21347c478bd9Sstevel@tonic-gate 				return (EFAULT);
21357c478bd9Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
21367c478bd9Sstevel@tonic-gate 			aiocb_LFton(&aiocb64, &aiocb);
21377c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
21387c478bd9Sstevel@tonic-gate 			    ->aio_resultp);
21397c478bd9Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
21407c478bd9Sstevel@tonic-gate 				return (EBADF);
21417c478bd9Sstevel@tonic-gate 			sigev32 = &aiocb64.aio_sigevent;
21427c478bd9Sstevel@tonic-gate 		}
21437c478bd9Sstevel@tonic-gate 
21447c478bd9Sstevel@tonic-gate 		if (sigev32->sigev_notify == SIGEV_PORT) {
21457c478bd9Sstevel@tonic-gate 			if (copyin(
21467c478bd9Sstevel@tonic-gate 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
21477c478bd9Sstevel@tonic-gate 			    &pntfy32, sizeof (port_notify32_t))) {
21487c478bd9Sstevel@tonic-gate 				releasef(fd);
21497c478bd9Sstevel@tonic-gate 				return (EFAULT);
21507c478bd9Sstevel@tonic-gate 			}
21517c478bd9Sstevel@tonic-gate 			pntfy.portnfy_port = pntfy32.portnfy_port;
215234709573Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
215334709573Sraf 			    pntfy32.portnfy_user;
215434709573Sraf 			aio_use_port = 1;
215534709573Sraf 		} else if (sigev32->sigev_notify == SIGEV_THREAD) {
215634709573Sraf 			pntfy.portnfy_port = sigev32->sigev_signo;
215734709573Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
215834709573Sraf 			    sigev32->sigev_value.sival_ptr;
21597c478bd9Sstevel@tonic-gate 			aio_use_port = 1;
21607c478bd9Sstevel@tonic-gate 		}
21617c478bd9Sstevel@tonic-gate 	}
21627c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
21637c478bd9Sstevel@tonic-gate 
21647c478bd9Sstevel@tonic-gate 	/*
21657c478bd9Sstevel@tonic-gate 	 * check the permission of the partition
21667c478bd9Sstevel@tonic-gate 	 */
21677c478bd9Sstevel@tonic-gate 
21687c478bd9Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
21697c478bd9Sstevel@tonic-gate 		releasef(fd);
21707c478bd9Sstevel@tonic-gate 		return (EBADF);
21717c478bd9Sstevel@tonic-gate 	}
21727c478bd9Sstevel@tonic-gate 
21737c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
21747c478bd9Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
21757c478bd9Sstevel@tonic-gate 	if (aio_func == NULL) {
21767c478bd9Sstevel@tonic-gate 		releasef(fd);
21777c478bd9Sstevel@tonic-gate 		return (EBADFD);
21787c478bd9Sstevel@tonic-gate 	}
217934709573Sraf 	if (run_mode == AIO_LARGEFILE)
218034709573Sraf 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
21817c478bd9Sstevel@tonic-gate 	else
218234709573Sraf 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
21837c478bd9Sstevel@tonic-gate 
21847c478bd9Sstevel@tonic-gate 	if (error) {
21857c478bd9Sstevel@tonic-gate 		releasef(fd);
21867c478bd9Sstevel@tonic-gate 		return (error);
21877c478bd9Sstevel@tonic-gate 	}
21887c478bd9Sstevel@tonic-gate 	/*
21897c478bd9Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
21907c478bd9Sstevel@tonic-gate 	 * the AIO poll bit set
21917c478bd9Sstevel@tonic-gate 	 */
21927c478bd9Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
21937c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
21947c478bd9Sstevel@tonic-gate 
21957c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
21967c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = aiocb_arg;
21977c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
21987c478bd9Sstevel@tonic-gate 	else
21997c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
22007c478bd9Sstevel@tonic-gate #endif
22017c478bd9Sstevel@tonic-gate 
220234709573Sraf 	if (aio_use_port) {
220334709573Sraf 		int event = (run_mode == AIO_LARGEFILE)?
220434709573Sraf 		    ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
220534709573Sraf 		    ((mode == FREAD)? AIOAREAD : AIOAWRITE);
220634709573Sraf 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
220734709573Sraf 	}
22087c478bd9Sstevel@tonic-gate 
22097c478bd9Sstevel@tonic-gate 	/*
22107c478bd9Sstevel@tonic-gate 	 * send the request to driver.
22117c478bd9Sstevel@tonic-gate 	 */
22127c478bd9Sstevel@tonic-gate 	if (error == 0) {
22137c478bd9Sstevel@tonic-gate 		if (bufsize == 0) {
22147c478bd9Sstevel@tonic-gate 			clear_active_fd(fd);
22157c478bd9Sstevel@tonic-gate 			aio_zerolen(reqp);
22167c478bd9Sstevel@tonic-gate 			return (0);
22177c478bd9Sstevel@tonic-gate 		}
22187c478bd9Sstevel@tonic-gate 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
22197c478bd9Sstevel@tonic-gate 	}
22207c478bd9Sstevel@tonic-gate 
22217c478bd9Sstevel@tonic-gate 	/*
22227c478bd9Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
22237c478bd9Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
22247c478bd9Sstevel@tonic-gate 	 * completed.
22257c478bd9Sstevel@tonic-gate 	 */
22267c478bd9Sstevel@tonic-gate 	if (error) {
22277c478bd9Sstevel@tonic-gate 		releasef(fd);
22287c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
222934b3058fSpraks 		if (aio_use_port)
223034709573Sraf 			aio_deq(&aiop->aio_portpending, reqp);
22317c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
22327c478bd9Sstevel@tonic-gate 		aiop->aio_pending--;
22337c478bd9Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
22347c478bd9Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
22357c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
22367c478bd9Sstevel@tonic-gate 		return (error);
22377c478bd9Sstevel@tonic-gate 	}
22387c478bd9Sstevel@tonic-gate 	clear_active_fd(fd);
22397c478bd9Sstevel@tonic-gate 	return (0);
22407c478bd9Sstevel@tonic-gate }
22417c478bd9Sstevel@tonic-gate 
22427c478bd9Sstevel@tonic-gate 
22437c478bd9Sstevel@tonic-gate /*
22447c478bd9Sstevel@tonic-gate  * set error for a list IO entry that failed.
22457c478bd9Sstevel@tonic-gate  */
22467c478bd9Sstevel@tonic-gate static void
224734b3058fSpraks lio_set_error(aio_req_t *reqp, int portused)
22487c478bd9Sstevel@tonic-gate {
22497c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22507c478bd9Sstevel@tonic-gate 
22517c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
22527c478bd9Sstevel@tonic-gate 		return;
22537c478bd9Sstevel@tonic-gate 
22547c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
225534b3058fSpraks 	if (portused)
225634709573Sraf 		aio_deq(&aiop->aio_portpending, reqp);
22577c478bd9Sstevel@tonic-gate 	aiop->aio_pending--;
22587c478bd9Sstevel@tonic-gate 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
22597c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PHYSIODONE;
22607c478bd9Sstevel@tonic-gate 	/*
22617c478bd9Sstevel@tonic-gate 	 * Need to free the request now as its never
22627c478bd9Sstevel@tonic-gate 	 * going to get on the done queue
22637c478bd9Sstevel@tonic-gate 	 *
22647c478bd9Sstevel@tonic-gate 	 * Note: aio_outstanding is decremented in
22657c478bd9Sstevel@tonic-gate 	 *	 aio_req_free()
22667c478bd9Sstevel@tonic-gate 	 */
22677c478bd9Sstevel@tonic-gate 	aio_req_free(aiop, reqp);
22687c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK)
22697c478bd9Sstevel@tonic-gate 		cv_signal(&aiop->aio_cleanupcv);
22707c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
22717c478bd9Sstevel@tonic-gate }
22727c478bd9Sstevel@tonic-gate 
22737c478bd9Sstevel@tonic-gate /*
22747c478bd9Sstevel@tonic-gate  * check if a specified request is done, and remove it from
22757c478bd9Sstevel@tonic-gate  * the done queue. otherwise remove anybody from the done queue
22767c478bd9Sstevel@tonic-gate  * if NULL is specified.
22777c478bd9Sstevel@tonic-gate  */
22787c478bd9Sstevel@tonic-gate static aio_req_t *
22797c478bd9Sstevel@tonic-gate aio_req_done(void *resultp)
22807c478bd9Sstevel@tonic-gate {
22817c478bd9Sstevel@tonic-gate 	aio_req_t **bucket;
22827c478bd9Sstevel@tonic-gate 	aio_req_t *ent;
22837c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22847c478bd9Sstevel@tonic-gate 	long index;
22857c478bd9Sstevel@tonic-gate 
22867c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
22877c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
22887c478bd9Sstevel@tonic-gate 
22897c478bd9Sstevel@tonic-gate 	if (resultp) {
22907c478bd9Sstevel@tonic-gate 		index = AIO_HASH(resultp);
22917c478bd9Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
22927c478bd9Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
22937c478bd9Sstevel@tonic-gate 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
22947c478bd9Sstevel@tonic-gate 				if (ent->aio_req_flags & AIO_DONEQ) {
22957c478bd9Sstevel@tonic-gate 					return (aio_req_remove(ent));
22967c478bd9Sstevel@tonic-gate 				}
22977c478bd9Sstevel@tonic-gate 				return (NULL);
22987c478bd9Sstevel@tonic-gate 			}
22997c478bd9Sstevel@tonic-gate 		}
23007c478bd9Sstevel@tonic-gate 		/* no match, resultp is invalid */
23017c478bd9Sstevel@tonic-gate 		return (NULL);
23027c478bd9Sstevel@tonic-gate 	}
23037c478bd9Sstevel@tonic-gate 	return (aio_req_remove(NULL));
23047c478bd9Sstevel@tonic-gate }
23057c478bd9Sstevel@tonic-gate 
23067c478bd9Sstevel@tonic-gate /*
23077c478bd9Sstevel@tonic-gate  * determine if a user-level resultp pointer is associated with an
23087c478bd9Sstevel@tonic-gate  * active IO request. Zero is returned when the request is done,
23097c478bd9Sstevel@tonic-gate  * and the request is removed from the done queue. Only when the
23107c478bd9Sstevel@tonic-gate  * return value is zero, is the "reqp" pointer valid. One is returned
23117c478bd9Sstevel@tonic-gate  * when the request is inprogress. Two is returned when the request
23127c478bd9Sstevel@tonic-gate  * is invalid.
23137c478bd9Sstevel@tonic-gate  */
23147c478bd9Sstevel@tonic-gate static int
23157c478bd9Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
23167c478bd9Sstevel@tonic-gate {
23177c478bd9Sstevel@tonic-gate 	aio_req_t **bucket;
23187c478bd9Sstevel@tonic-gate 	aio_req_t *ent;
23197c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23207c478bd9Sstevel@tonic-gate 	long index;
23217c478bd9Sstevel@tonic-gate 
23227c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
23237c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23247c478bd9Sstevel@tonic-gate 
23257c478bd9Sstevel@tonic-gate 	index = AIO_HASH(resultp);
23267c478bd9Sstevel@tonic-gate 	bucket = &aiop->aio_hash[index];
23277c478bd9Sstevel@tonic-gate 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
23287c478bd9Sstevel@tonic-gate 		if (ent->aio_req_resultp == resultp) {
23297c478bd9Sstevel@tonic-gate 			if (ent->aio_req_flags & AIO_DONEQ) {
23307c478bd9Sstevel@tonic-gate 				*reqp = aio_req_remove(ent);
23317c478bd9Sstevel@tonic-gate 				return (0);
23327c478bd9Sstevel@tonic-gate 			}
23337c478bd9Sstevel@tonic-gate 			return (1);
23347c478bd9Sstevel@tonic-gate 		}
23357c478bd9Sstevel@tonic-gate 	}
23367c478bd9Sstevel@tonic-gate 	/* no match, resultp is invalid */
23377c478bd9Sstevel@tonic-gate 	return (2);
23387c478bd9Sstevel@tonic-gate }
23397c478bd9Sstevel@tonic-gate 
23407c478bd9Sstevel@tonic-gate /*
23417c478bd9Sstevel@tonic-gate  * remove a request from the done queue.
23427c478bd9Sstevel@tonic-gate  */
23437c478bd9Sstevel@tonic-gate static aio_req_t *
23447c478bd9Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp)
23457c478bd9Sstevel@tonic-gate {
23467c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23477c478bd9Sstevel@tonic-gate 
23487c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23497c478bd9Sstevel@tonic-gate 
235034709573Sraf 	if (reqp != NULL) {
23517c478bd9Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23527c478bd9Sstevel@tonic-gate 		if (reqp->aio_req_next == reqp) {
23537c478bd9Sstevel@tonic-gate 			/* only one request on queue */
23547c478bd9Sstevel@tonic-gate 			if (reqp ==  aiop->aio_doneq) {
23557c478bd9Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
23567c478bd9Sstevel@tonic-gate 			} else {
23577c478bd9Sstevel@tonic-gate 				ASSERT(reqp == aiop->aio_cleanupq);
23587c478bd9Sstevel@tonic-gate 				aiop->aio_cleanupq = NULL;
23597c478bd9Sstevel@tonic-gate 			}
23607c478bd9Sstevel@tonic-gate 		} else {
23617c478bd9Sstevel@tonic-gate 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23627c478bd9Sstevel@tonic-gate 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23637c478bd9Sstevel@tonic-gate 			/*
23647c478bd9Sstevel@tonic-gate 			 * The request can be either on the aio_doneq or the
23657c478bd9Sstevel@tonic-gate 			 * aio_cleanupq
23667c478bd9Sstevel@tonic-gate 			 */
23677c478bd9Sstevel@tonic-gate 			if (reqp == aiop->aio_doneq)
23687c478bd9Sstevel@tonic-gate 				aiop->aio_doneq = reqp->aio_req_next;
23697c478bd9Sstevel@tonic-gate 
23707c478bd9Sstevel@tonic-gate 			if (reqp == aiop->aio_cleanupq)
23717c478bd9Sstevel@tonic-gate 				aiop->aio_cleanupq = reqp->aio_req_next;
23727c478bd9Sstevel@tonic-gate 		}
23737c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags &= ~AIO_DONEQ;
237434709573Sraf 		reqp->aio_req_next = NULL;
237534709573Sraf 		reqp->aio_req_prev = NULL;
237634709573Sraf 	} else if ((reqp = aiop->aio_doneq) != NULL) {
237734709573Sraf 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
237834709573Sraf 		if (reqp == reqp->aio_req_next) {
23797c478bd9Sstevel@tonic-gate 			/* only one request on queue */
23807c478bd9Sstevel@tonic-gate 			aiop->aio_doneq = NULL;
23817c478bd9Sstevel@tonic-gate 		} else {
238234709573Sraf 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
238334709573Sraf 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
238434709573Sraf 			aiop->aio_doneq = reqp->aio_req_next;
23857c478bd9Sstevel@tonic-gate 		}
238634709573Sraf 		reqp->aio_req_flags &= ~AIO_DONEQ;
238734709573Sraf 		reqp->aio_req_next = NULL;
238834709573Sraf 		reqp->aio_req_prev = NULL;
23897c478bd9Sstevel@tonic-gate 	}
239034709573Sraf 	if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
239134709573Sraf 		cv_broadcast(&aiop->aio_waitcv);
239234709573Sraf 	return (reqp);
23937c478bd9Sstevel@tonic-gate }
23947c478bd9Sstevel@tonic-gate 
23957c478bd9Sstevel@tonic-gate static int
23967c478bd9Sstevel@tonic-gate aio_req_setup(
23977c478bd9Sstevel@tonic-gate 	aio_req_t	**reqpp,
23987c478bd9Sstevel@tonic-gate 	aio_t 		*aiop,
23997c478bd9Sstevel@tonic-gate 	aiocb_t 	*arg,
24007c478bd9Sstevel@tonic-gate 	aio_result_t 	*resultp,
24017c478bd9Sstevel@tonic-gate 	vnode_t		*vp)
24027c478bd9Sstevel@tonic-gate {
240334709573Sraf 	sigqueue_t	*sqp = NULL;
24047c478bd9Sstevel@tonic-gate 	aio_req_t 	*reqp;
24057c478bd9Sstevel@tonic-gate 	struct uio 	*uio;
24067c478bd9Sstevel@tonic-gate 	struct sigevent *sigev;
24077c478bd9Sstevel@tonic-gate 	int		error;
24087c478bd9Sstevel@tonic-gate 
24097c478bd9Sstevel@tonic-gate 	sigev = &arg->aio_sigevent;
241034709573Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
241134709573Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
24127c478bd9Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
24137c478bd9Sstevel@tonic-gate 		if (sqp == NULL)
24147c478bd9Sstevel@tonic-gate 			return (EAGAIN);
24157c478bd9Sstevel@tonic-gate 		sqp->sq_func = NULL;
24167c478bd9Sstevel@tonic-gate 		sqp->sq_next = NULL;
24177c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
24187c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
24197c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
24207c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
24217c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
24227c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
24237c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_value = sigev->sigev_value;
242434709573Sraf 	}
24257c478bd9Sstevel@tonic-gate 
24267c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
24277c478bd9Sstevel@tonic-gate 
24287c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
24297c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24307c478bd9Sstevel@tonic-gate 		if (sqp)
24317c478bd9Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24327c478bd9Sstevel@tonic-gate 		return (EIO);
24337c478bd9Sstevel@tonic-gate 	}
24347c478bd9Sstevel@tonic-gate 	/*
24357c478bd9Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
24367c478bd9Sstevel@tonic-gate 	 * from dynamic memory.
24377c478bd9Sstevel@tonic-gate 	 */
24387c478bd9Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
24397c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24407c478bd9Sstevel@tonic-gate 		if (sqp)
24417c478bd9Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24427c478bd9Sstevel@tonic-gate 		return (error);
24437c478bd9Sstevel@tonic-gate 	}
24447c478bd9Sstevel@tonic-gate 	aiop->aio_pending++;
24457c478bd9Sstevel@tonic-gate 	aiop->aio_outstanding++;
24467c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
244734709573Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
244834709573Sraf 	    sigev->sigev_notify == SIGEV_PORT)
244934709573Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
24507c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
24517c478bd9Sstevel@tonic-gate 	/*
24527c478bd9Sstevel@tonic-gate 	 * initialize aio request.
24537c478bd9Sstevel@tonic-gate 	 */
24547c478bd9Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
24557c478bd9Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
24567c478bd9Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
245734709573Sraf 	reqp->aio_req_lio = NULL;
24587c478bd9Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
24597c478bd9Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
24607c478bd9Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
24617c478bd9Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
24627c478bd9Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
24637c478bd9Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
24647c478bd9Sstevel@tonic-gate 	*reqpp = reqp;
24657c478bd9Sstevel@tonic-gate 	return (0);
24667c478bd9Sstevel@tonic-gate }
24677c478bd9Sstevel@tonic-gate 
24687c478bd9Sstevel@tonic-gate /*
24697c478bd9Sstevel@tonic-gate  * Allocate p_aio struct.
24707c478bd9Sstevel@tonic-gate  */
24717c478bd9Sstevel@tonic-gate static aio_t *
24727c478bd9Sstevel@tonic-gate aio_aiop_alloc(void)
24737c478bd9Sstevel@tonic-gate {
24747c478bd9Sstevel@tonic-gate 	aio_t	*aiop;
24757c478bd9Sstevel@tonic-gate 
24767c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&curproc->p_lock));
24777c478bd9Sstevel@tonic-gate 
24787c478bd9Sstevel@tonic-gate 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
24797c478bd9Sstevel@tonic-gate 	if (aiop) {
24807c478bd9Sstevel@tonic-gate 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
24817c478bd9Sstevel@tonic-gate 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
24827c478bd9Sstevel@tonic-gate 		    NULL);
24837c478bd9Sstevel@tonic-gate 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
24847c478bd9Sstevel@tonic-gate 	}
24857c478bd9Sstevel@tonic-gate 	return (aiop);
24867c478bd9Sstevel@tonic-gate }
24877c478bd9Sstevel@tonic-gate 
24887c478bd9Sstevel@tonic-gate /*
24897c478bd9Sstevel@tonic-gate  * Allocate an aio_req struct.
24907c478bd9Sstevel@tonic-gate  */
24917c478bd9Sstevel@tonic-gate static int
24927c478bd9Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
24937c478bd9Sstevel@tonic-gate {
24947c478bd9Sstevel@tonic-gate 	aio_req_t *reqp;
24957c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
24967c478bd9Sstevel@tonic-gate 
24977c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
24987c478bd9Sstevel@tonic-gate 
24997c478bd9Sstevel@tonic-gate 	if ((reqp = aiop->aio_free) != NULL) {
25007c478bd9Sstevel@tonic-gate 		aiop->aio_free = reqp->aio_req_next;
250134709573Sraf 		bzero(reqp, sizeof (*reqp));
25027c478bd9Sstevel@tonic-gate 	} else {
25037c478bd9Sstevel@tonic-gate 		/*
25047c478bd9Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25057c478bd9Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25067c478bd9Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25077c478bd9Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25087c478bd9Sstevel@tonic-gate 		 */
25097c478bd9Sstevel@tonic-gate 		if (freemem < desfree)
25107c478bd9Sstevel@tonic-gate 			return (EAGAIN);
25117c478bd9Sstevel@tonic-gate 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
25127c478bd9Sstevel@tonic-gate 		if (reqp == NULL)
25137c478bd9Sstevel@tonic-gate 			return (EAGAIN);
25147c478bd9Sstevel@tonic-gate 	}
251534709573Sraf 	reqp->aio_req.aio_uio = &reqp->aio_req_uio;
251634709573Sraf 	reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
251734709573Sraf 	reqp->aio_req.aio_private = reqp;
25187c478bd9Sstevel@tonic-gate 	reqp->aio_req_buf.b_offset = -1;
25197c478bd9Sstevel@tonic-gate 	reqp->aio_req_resultp = resultp;
25207c478bd9Sstevel@tonic-gate 	if (aio_hash_insert(reqp, aiop)) {
25217c478bd9Sstevel@tonic-gate 		reqp->aio_req_next = aiop->aio_free;
25227c478bd9Sstevel@tonic-gate 		aiop->aio_free = reqp;
25237c478bd9Sstevel@tonic-gate 		return (EINVAL);
25247c478bd9Sstevel@tonic-gate 	}
25257c478bd9Sstevel@tonic-gate 	*nreqp = reqp;
25267c478bd9Sstevel@tonic-gate 	return (0);
25277c478bd9Sstevel@tonic-gate }
25287c478bd9Sstevel@tonic-gate 
25297c478bd9Sstevel@tonic-gate /*
25307c478bd9Sstevel@tonic-gate  * Allocate an aio_lio_t struct.
25317c478bd9Sstevel@tonic-gate  */
25327c478bd9Sstevel@tonic-gate static int
25337c478bd9Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head)
25347c478bd9Sstevel@tonic-gate {
25357c478bd9Sstevel@tonic-gate 	aio_lio_t *liop;
25367c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
25377c478bd9Sstevel@tonic-gate 
25387c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
25397c478bd9Sstevel@tonic-gate 
25407c478bd9Sstevel@tonic-gate 	if ((liop = aiop->aio_lio_free) != NULL) {
25417c478bd9Sstevel@tonic-gate 		aiop->aio_lio_free = liop->lio_next;
25427c478bd9Sstevel@tonic-gate 	} else {
25437c478bd9Sstevel@tonic-gate 		/*
25447c478bd9Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25457c478bd9Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25467c478bd9Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25477c478bd9Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25487c478bd9Sstevel@tonic-gate 		 */
25497c478bd9Sstevel@tonic-gate 		if (freemem < desfree)
25507c478bd9Sstevel@tonic-gate 			return (EAGAIN);
25517c478bd9Sstevel@tonic-gate 
25527c478bd9Sstevel@tonic-gate 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
25537c478bd9Sstevel@tonic-gate 		if (liop == NULL)
25547c478bd9Sstevel@tonic-gate 			return (EAGAIN);
25557c478bd9Sstevel@tonic-gate 	}
25567c478bd9Sstevel@tonic-gate 	*head = liop;
25577c478bd9Sstevel@tonic-gate 	return (0);
25587c478bd9Sstevel@tonic-gate }
25597c478bd9Sstevel@tonic-gate 
25607c478bd9Sstevel@tonic-gate /*
25617c478bd9Sstevel@tonic-gate  * this is a special per-process thread that is only activated if
25627c478bd9Sstevel@tonic-gate  * the process is unmapping a segment with outstanding aio. normally,
25637c478bd9Sstevel@tonic-gate  * the process will have completed the aio before unmapping the
25647c478bd9Sstevel@tonic-gate  * segment. If the process does unmap a segment with outstanding aio,
25657c478bd9Sstevel@tonic-gate  * this special thread will guarentee that the locked pages due to
25667c478bd9Sstevel@tonic-gate  * aphysio() are released, thereby permitting the segment to be
2567b0b27ce6Spraks  * unmapped. In addition to this, the cleanup thread is woken up
2568b0b27ce6Spraks  * during DR operations to release the locked pages.
25697c478bd9Sstevel@tonic-gate  */
25707c478bd9Sstevel@tonic-gate 
25717c478bd9Sstevel@tonic-gate static int
25727c478bd9Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop)
25737c478bd9Sstevel@tonic-gate {
25747c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
25757c478bd9Sstevel@tonic-gate 	struct as *as = p->p_as;
25767c478bd9Sstevel@tonic-gate 	int poked = 0;
25777c478bd9Sstevel@tonic-gate 	kcondvar_t *cvp;
25787c478bd9Sstevel@tonic-gate 	int exit_flag = 0;
2579b0b27ce6Spraks 	int rqclnup = 0;
25807c478bd9Sstevel@tonic-gate 
25817c478bd9Sstevel@tonic-gate 	sigfillset(&curthread->t_hold);
25827c478bd9Sstevel@tonic-gate 	sigdiffset(&curthread->t_hold, &cantmask);
25837c478bd9Sstevel@tonic-gate 	for (;;) {
25847c478bd9Sstevel@tonic-gate 		/*
25857c478bd9Sstevel@tonic-gate 		 * if a segment is being unmapped, and the current
25867c478bd9Sstevel@tonic-gate 		 * process's done queue is not empty, then every request
25877c478bd9Sstevel@tonic-gate 		 * on the doneq with locked resources should be forced
25887c478bd9Sstevel@tonic-gate 		 * to release their locks. By moving the doneq request
25897c478bd9Sstevel@tonic-gate 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
25907c478bd9Sstevel@tonic-gate 		 * and place requests back onto the doneq. All requests
25917c478bd9Sstevel@tonic-gate 		 * processed by aio_cleanup() will have their physical
25927c478bd9Sstevel@tonic-gate 		 * resources unlocked.
25937c478bd9Sstevel@tonic-gate 		 */
25947c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
25957c478bd9Sstevel@tonic-gate 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
25967c478bd9Sstevel@tonic-gate 			aiop->aio_flags |= AIO_CLEANUP;
25977c478bd9Sstevel@tonic-gate 			mutex_enter(&as->a_contents);
2598b0b27ce6Spraks 			if (aiop->aio_rqclnup) {
2599b0b27ce6Spraks 				aiop->aio_rqclnup = 0;
2600b0b27ce6Spraks 				rqclnup = 1;
2601b0b27ce6Spraks 			}
2602b0b27ce6Spraks 
2603b0b27ce6Spraks 			if ((rqclnup || AS_ISUNMAPWAIT(as)) &&
2604b0b27ce6Spraks 			    aiop->aio_doneq) {
26057c478bd9Sstevel@tonic-gate 				aio_req_t *doneqhead = aiop->aio_doneq;
26067c478bd9Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26077c478bd9Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
26087c478bd9Sstevel@tonic-gate 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
26097c478bd9Sstevel@tonic-gate 			} else {
26107c478bd9Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26117c478bd9Sstevel@tonic-gate 			}
26127c478bd9Sstevel@tonic-gate 		}
26137c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26147c478bd9Sstevel@tonic-gate 		aio_cleanup(AIO_CLEANUP_THREAD);
26157c478bd9Sstevel@tonic-gate 		/*
26167c478bd9Sstevel@tonic-gate 		 * thread should block on the cleanupcv while
26177c478bd9Sstevel@tonic-gate 		 * AIO_CLEANUP is set.
26187c478bd9Sstevel@tonic-gate 		 */
26197c478bd9Sstevel@tonic-gate 		cvp = &aiop->aio_cleanupcv;
26207c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
26217c478bd9Sstevel@tonic-gate 
26227c478bd9Sstevel@tonic-gate 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
26237c478bd9Sstevel@tonic-gate 		    aiop->aio_notifyq != NULL ||
26247c478bd9Sstevel@tonic-gate 		    aiop->aio_portcleanupq != NULL) {
26257c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
26267c478bd9Sstevel@tonic-gate 			continue;
26277c478bd9Sstevel@tonic-gate 		}
26287c478bd9Sstevel@tonic-gate 		mutex_enter(&as->a_contents);
26297c478bd9Sstevel@tonic-gate 
26307c478bd9Sstevel@tonic-gate 		/*
26317c478bd9Sstevel@tonic-gate 		 * AIO_CLEANUP determines when the cleanup thread
2632b0b27ce6Spraks 		 * should be active. This flag is set when
2633b0b27ce6Spraks 		 * the cleanup thread is awakened by as_unmap() or
2634b0b27ce6Spraks 		 * due to DR operations.
26357c478bd9Sstevel@tonic-gate 		 * The flag is cleared when the blocking as_unmap()
26367c478bd9Sstevel@tonic-gate 		 * that originally awakened us is allowed to
26377c478bd9Sstevel@tonic-gate 		 * complete. as_unmap() blocks when trying to
26387c478bd9Sstevel@tonic-gate 		 * unmap a segment that has SOFTLOCKed pages. when
26397c478bd9Sstevel@tonic-gate 		 * the segment's pages are all SOFTUNLOCKed,
2640b0b27ce6Spraks 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2641b0b27ce6Spraks 		 *
2642b0b27ce6Spraks 		 * In case of cleanup request by DR, the flag is cleared
2643b0b27ce6Spraks 		 * once all the pending aio requests have been processed.
2644b0b27ce6Spraks 		 *
2645b0b27ce6Spraks 		 * The flag shouldn't be cleared right away if the
2646b0b27ce6Spraks 		 * cleanup thread was interrupted because the process
2647b0b27ce6Spraks 		 * is doing forkall(). This happens when cv_wait_sig()
2648b0b27ce6Spraks 		 * returns zero, because it was awakened by a pokelwps().
2649b0b27ce6Spraks 		 * If the process is not exiting, it must be doing forkall().
26507c478bd9Sstevel@tonic-gate 		 */
26517c478bd9Sstevel@tonic-gate 		if ((poked == 0) &&
2652b0b27ce6Spraks 		    ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2653b0b27ce6Spraks 		    (aiop->aio_pending == 0))) {
26547c478bd9Sstevel@tonic-gate 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
26557c478bd9Sstevel@tonic-gate 			cvp = &as->a_cv;
2656b0b27ce6Spraks 			rqclnup = 0;
26577c478bd9Sstevel@tonic-gate 		}
26587c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26597c478bd9Sstevel@tonic-gate 		if (poked) {
26607c478bd9Sstevel@tonic-gate 			/*
26617c478bd9Sstevel@tonic-gate 			 * If the process is exiting/killed, don't return
26627c478bd9Sstevel@tonic-gate 			 * immediately without waiting for pending I/O's
26637c478bd9Sstevel@tonic-gate 			 * and releasing the page locks.
26647c478bd9Sstevel@tonic-gate 			 */
26657c478bd9Sstevel@tonic-gate 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
26667c478bd9Sstevel@tonic-gate 				/*
26677c478bd9Sstevel@tonic-gate 				 * If exit_flag is set, then it is
26687c478bd9Sstevel@tonic-gate 				 * safe to exit because we have released
26697c478bd9Sstevel@tonic-gate 				 * page locks of completed I/O's.
26707c478bd9Sstevel@tonic-gate 				 */
26717c478bd9Sstevel@tonic-gate 				if (exit_flag)
26727c478bd9Sstevel@tonic-gate 					break;
26737c478bd9Sstevel@tonic-gate 
26747c478bd9Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26757c478bd9Sstevel@tonic-gate 
26767c478bd9Sstevel@tonic-gate 				/*
26777c478bd9Sstevel@tonic-gate 				 * Wait for all the pending aio to complete.
26787c478bd9Sstevel@tonic-gate 				 */
26797c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
26807c478bd9Sstevel@tonic-gate 				aiop->aio_flags |= AIO_REQ_BLOCK;
26817c478bd9Sstevel@tonic-gate 				while (aiop->aio_pending != 0)
26827c478bd9Sstevel@tonic-gate 					cv_wait(&aiop->aio_cleanupcv,
26837c478bd9Sstevel@tonic-gate 					    &aiop->aio_mutex);
26847c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
26857c478bd9Sstevel@tonic-gate 				exit_flag = 1;
26867c478bd9Sstevel@tonic-gate 				continue;
26877c478bd9Sstevel@tonic-gate 			} else if (p->p_flag &
26887c478bd9Sstevel@tonic-gate 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
26897c478bd9Sstevel@tonic-gate 				/*
26907c478bd9Sstevel@tonic-gate 				 * hold LWP until it
26917c478bd9Sstevel@tonic-gate 				 * is continued.
26927c478bd9Sstevel@tonic-gate 				 */
26937c478bd9Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26947c478bd9Sstevel@tonic-gate 				mutex_enter(&p->p_lock);
26957c478bd9Sstevel@tonic-gate 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
26967c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
26977c478bd9Sstevel@tonic-gate 				poked = 0;
26987c478bd9Sstevel@tonic-gate 				continue;
26997c478bd9Sstevel@tonic-gate 			}
27007c478bd9Sstevel@tonic-gate 		} else {
27017c478bd9Sstevel@tonic-gate 			/*
27027c478bd9Sstevel@tonic-gate 			 * When started this thread will sleep on as->a_cv.
27037c478bd9Sstevel@tonic-gate 			 * as_unmap will awake this thread if the
27047c478bd9Sstevel@tonic-gate 			 * segment has SOFTLOCKed pages (poked = 0).
27057c478bd9Sstevel@tonic-gate 			 * 1. pokelwps() awakes this thread =>
27067c478bd9Sstevel@tonic-gate 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
27077c478bd9Sstevel@tonic-gate 			 * 2. as_unmap awakes this thread =>
27087c478bd9Sstevel@tonic-gate 			 *    to break the loop it is necessary that
27097c478bd9Sstevel@tonic-gate 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
27107c478bd9Sstevel@tonic-gate 			 *	memory to be unlocked)
27117c478bd9Sstevel@tonic-gate 			 *    - AIO_CLEANUP is not set
27127c478bd9Sstevel@tonic-gate 			 *	(if AIO_CLEANUP is set we have to wait for
27137c478bd9Sstevel@tonic-gate 			 *	pending requests. aio_done will send a signal
27147c478bd9Sstevel@tonic-gate 			 *	for every request which completes to continue
27157c478bd9Sstevel@tonic-gate 			 *	unmapping the corresponding address range)
2716b0b27ce6Spraks 			 * 3. A cleanup request will wake this thread up, ex.
2717b0b27ce6Spraks 			 *    by the DR operations. The aio_rqclnup flag will
2718b0b27ce6Spraks 			 *    be set.
27197c478bd9Sstevel@tonic-gate 			 */
27207c478bd9Sstevel@tonic-gate 			while (poked == 0) {
2721b0b27ce6Spraks 				/*
2722*fa7f62f0Ssp92102 				 * The clean up requests that came in
2723*fa7f62f0Ssp92102 				 * after we had just cleaned up, couldn't
2724*fa7f62f0Ssp92102 				 * be causing the unmap thread to block - as
2725*fa7f62f0Ssp92102 				 * unmap event happened first.
2726*fa7f62f0Ssp92102 				 * Let aio_done() wake us up if it sees a need.
2727b0b27ce6Spraks 				 */
2728*fa7f62f0Ssp92102 				if (aiop->aio_rqclnup &&
2729b0b27ce6Spraks 				    (aiop->aio_flags & AIO_CLEANUP) == 0)
27307c478bd9Sstevel@tonic-gate 					break;
27317c478bd9Sstevel@tonic-gate 				poked = !cv_wait_sig(cvp, &as->a_contents);
27327c478bd9Sstevel@tonic-gate 				if (AS_ISUNMAPWAIT(as) == 0)
27337c478bd9Sstevel@tonic-gate 					cv_signal(cvp);
27347c478bd9Sstevel@tonic-gate 				if (aiop->aio_outstanding != 0)
27357c478bd9Sstevel@tonic-gate 					break;
27367c478bd9Sstevel@tonic-gate 			}
27377c478bd9Sstevel@tonic-gate 		}
27387c478bd9Sstevel@tonic-gate 		mutex_exit(&as->a_contents);
27397c478bd9Sstevel@tonic-gate 	}
27407c478bd9Sstevel@tonic-gate exit:
27417c478bd9Sstevel@tonic-gate 	mutex_exit(&as->a_contents);
27427c478bd9Sstevel@tonic-gate 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
27437c478bd9Sstevel@tonic-gate 	aston(curthread);	/* make thread do post_syscall */
27447c478bd9Sstevel@tonic-gate 	return (0);
27457c478bd9Sstevel@tonic-gate }
27467c478bd9Sstevel@tonic-gate 
27477c478bd9Sstevel@tonic-gate /*
27487c478bd9Sstevel@tonic-gate  * save a reference to a user's outstanding aio in a hash list.
27497c478bd9Sstevel@tonic-gate  */
27507c478bd9Sstevel@tonic-gate static int
27517c478bd9Sstevel@tonic-gate aio_hash_insert(
27527c478bd9Sstevel@tonic-gate 	aio_req_t *aio_reqp,
27537c478bd9Sstevel@tonic-gate 	aio_t *aiop)
27547c478bd9Sstevel@tonic-gate {
27557c478bd9Sstevel@tonic-gate 	long index;
27567c478bd9Sstevel@tonic-gate 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
27577c478bd9Sstevel@tonic-gate 	aio_req_t *current;
27587c478bd9Sstevel@tonic-gate 	aio_req_t **nextp;
27597c478bd9Sstevel@tonic-gate 
27607c478bd9Sstevel@tonic-gate 	index = AIO_HASH(resultp);
27617c478bd9Sstevel@tonic-gate 	nextp = &aiop->aio_hash[index];
27627c478bd9Sstevel@tonic-gate 	while ((current = *nextp) != NULL) {
27637c478bd9Sstevel@tonic-gate 		if (current->aio_req_resultp == resultp)
27647c478bd9Sstevel@tonic-gate 			return (DUPLICATE);
27657c478bd9Sstevel@tonic-gate 		nextp = &current->aio_hash_next;
27667c478bd9Sstevel@tonic-gate 	}
27677c478bd9Sstevel@tonic-gate 	*nextp = aio_reqp;
27687c478bd9Sstevel@tonic-gate 	aio_reqp->aio_hash_next = NULL;
27697c478bd9Sstevel@tonic-gate 	return (0);
27707c478bd9Sstevel@tonic-gate }
27717c478bd9Sstevel@tonic-gate 
27727c478bd9Sstevel@tonic-gate static int
27737c478bd9Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
27747c478bd9Sstevel@tonic-gate     cred_t *)
27757c478bd9Sstevel@tonic-gate {
27767c478bd9Sstevel@tonic-gate 	struct snode *sp;
27777c478bd9Sstevel@tonic-gate 	dev_t		dev;
27787c478bd9Sstevel@tonic-gate 	struct cb_ops  	*cb;
27797c478bd9Sstevel@tonic-gate 	major_t		major;
27807c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
27817c478bd9Sstevel@tonic-gate 
27827c478bd9Sstevel@tonic-gate 	dev = vp->v_rdev;
27837c478bd9Sstevel@tonic-gate 	major = getmajor(dev);
27847c478bd9Sstevel@tonic-gate 
27857c478bd9Sstevel@tonic-gate 	/*
27867c478bd9Sstevel@tonic-gate 	 * return NULL for requests to files and STREAMs so
27877c478bd9Sstevel@tonic-gate 	 * that libaio takes care of them.
27887c478bd9Sstevel@tonic-gate 	 */
27897c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
27907c478bd9Sstevel@tonic-gate 		/* no stream device for kaio */
27917c478bd9Sstevel@tonic-gate 		if (STREAMSTAB(major)) {
27927c478bd9Sstevel@tonic-gate 			return (NULL);
27937c478bd9Sstevel@tonic-gate 		}
27947c478bd9Sstevel@tonic-gate 	} else {
27957c478bd9Sstevel@tonic-gate 		return (NULL);
27967c478bd9Sstevel@tonic-gate 	}
27977c478bd9Sstevel@tonic-gate 
27987c478bd9Sstevel@tonic-gate 	/*
27997c478bd9Sstevel@tonic-gate 	 * Check old drivers which do not have async I/O entry points.
28007c478bd9Sstevel@tonic-gate 	 */
28017c478bd9Sstevel@tonic-gate 	if (devopsp[major]->devo_rev < 3)
28027c478bd9Sstevel@tonic-gate 		return (NULL);
28037c478bd9Sstevel@tonic-gate 
28047c478bd9Sstevel@tonic-gate 	cb = devopsp[major]->devo_cb_ops;
28057c478bd9Sstevel@tonic-gate 
28067c478bd9Sstevel@tonic-gate 	if (cb->cb_rev < 1)
28077c478bd9Sstevel@tonic-gate 		return (NULL);
28087c478bd9Sstevel@tonic-gate 
28097c478bd9Sstevel@tonic-gate 	/*
28107c478bd9Sstevel@tonic-gate 	 * Check whether this device is a block device.
28117c478bd9Sstevel@tonic-gate 	 * Kaio is not supported for devices like tty.
28127c478bd9Sstevel@tonic-gate 	 */
28137c478bd9Sstevel@tonic-gate 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
28147c478bd9Sstevel@tonic-gate 		return (NULL);
28157c478bd9Sstevel@tonic-gate 
28167c478bd9Sstevel@tonic-gate 	/*
28177c478bd9Sstevel@tonic-gate 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
28187c478bd9Sstevel@tonic-gate 	 * We cannot call the driver directly. Instead return the
28197c478bd9Sstevel@tonic-gate 	 * PXFS functions.
28207c478bd9Sstevel@tonic-gate 	 */
28217c478bd9Sstevel@tonic-gate 
28227c478bd9Sstevel@tonic-gate 	if (IS_PXFSVP(vp)) {
28237c478bd9Sstevel@tonic-gate 		if (mode & FREAD)
28247c478bd9Sstevel@tonic-gate 			return (clpxfs_aio_read);
28257c478bd9Sstevel@tonic-gate 		else
28267c478bd9Sstevel@tonic-gate 			return (clpxfs_aio_write);
28277c478bd9Sstevel@tonic-gate 	}
28287c478bd9Sstevel@tonic-gate 	if (mode & FREAD)
28297c478bd9Sstevel@tonic-gate 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
28307c478bd9Sstevel@tonic-gate 	else
28317c478bd9Sstevel@tonic-gate 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
28327c478bd9Sstevel@tonic-gate 
28337c478bd9Sstevel@tonic-gate 	/*
28347c478bd9Sstevel@tonic-gate 	 * Do we need this ?
28357c478bd9Sstevel@tonic-gate 	 * nodev returns ENXIO anyway.
28367c478bd9Sstevel@tonic-gate 	 */
28377c478bd9Sstevel@tonic-gate 	if (aio_func == nodev)
28387c478bd9Sstevel@tonic-gate 		return (NULL);
28397c478bd9Sstevel@tonic-gate 
28407c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
28417c478bd9Sstevel@tonic-gate 	smark(sp, SACC);
28427c478bd9Sstevel@tonic-gate 	return (aio_func);
28437c478bd9Sstevel@tonic-gate }
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate /*
28467c478bd9Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28477c478bd9Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28487c478bd9Sstevel@tonic-gate  * We define this intermediate function that will do the right
28497c478bd9Sstevel@tonic-gate  * thing for driver cases.
28507c478bd9Sstevel@tonic-gate  */
28517c478bd9Sstevel@tonic-gate 
28527c478bd9Sstevel@tonic-gate static int
28537c478bd9Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28547c478bd9Sstevel@tonic-gate {
28557c478bd9Sstevel@tonic-gate 	dev_t dev;
28567c478bd9Sstevel@tonic-gate 	struct cb_ops  	*cb;
28577c478bd9Sstevel@tonic-gate 
28587c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28597c478bd9Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28607c478bd9Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28617c478bd9Sstevel@tonic-gate 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
28627c478bd9Sstevel@tonic-gate 
28637c478bd9Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28647c478bd9Sstevel@tonic-gate 
28657c478bd9Sstevel@tonic-gate 	ASSERT(cb->cb_awrite != nodev);
28667c478bd9Sstevel@tonic-gate 	return ((*cb->cb_awrite)(dev, aio, cred_p));
28677c478bd9Sstevel@tonic-gate }
28687c478bd9Sstevel@tonic-gate 
28697c478bd9Sstevel@tonic-gate /*
28707c478bd9Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28717c478bd9Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28727c478bd9Sstevel@tonic-gate  * We define this intermediate function that will do the right
28737c478bd9Sstevel@tonic-gate  * thing for driver cases.
28747c478bd9Sstevel@tonic-gate  */
28757c478bd9Sstevel@tonic-gate 
28767c478bd9Sstevel@tonic-gate static int
28777c478bd9Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28787c478bd9Sstevel@tonic-gate {
28797c478bd9Sstevel@tonic-gate 	dev_t dev;
28807c478bd9Sstevel@tonic-gate 	struct cb_ops  	*cb;
28817c478bd9Sstevel@tonic-gate 
28827c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28837c478bd9Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28847c478bd9Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28857c478bd9Sstevel@tonic-gate 	ASSERT(!STREAMSTAB(getmajor(dev)));
28867c478bd9Sstevel@tonic-gate 
28877c478bd9Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28887c478bd9Sstevel@tonic-gate 
28897c478bd9Sstevel@tonic-gate 	ASSERT(cb->cb_aread != nodev);
28907c478bd9Sstevel@tonic-gate 	return ((*cb->cb_aread)(dev, aio, cred_p));
28917c478bd9Sstevel@tonic-gate }
28927c478bd9Sstevel@tonic-gate 
28937c478bd9Sstevel@tonic-gate /*
28947c478bd9Sstevel@tonic-gate  * This routine is called when a largefile call is made by a 32bit
28957c478bd9Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel. All 64bit processes are large
28967c478bd9Sstevel@tonic-gate  * file by definition and will call alio() instead.
28977c478bd9Sstevel@tonic-gate  */
28987c478bd9Sstevel@tonic-gate static int
28997c478bd9Sstevel@tonic-gate alioLF(
29007c478bd9Sstevel@tonic-gate 	int		mode_arg,
29017c478bd9Sstevel@tonic-gate 	void		*aiocb_arg,
29027c478bd9Sstevel@tonic-gate 	int		nent,
29037c478bd9Sstevel@tonic-gate 	void		*sigev)
29047c478bd9Sstevel@tonic-gate {
29057c478bd9Sstevel@tonic-gate 	file_t		*fp;
29067c478bd9Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
29077c478bd9Sstevel@tonic-gate 	int		prev_mode = -1;
29087c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
29097c478bd9Sstevel@tonic-gate 	aio_lio_t	*head;
29107c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
29117c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
29127c478bd9Sstevel@tonic-gate 	caddr_t		cbplist;
29137c478bd9Sstevel@tonic-gate 	aiocb64_32_t	cb64;
29147c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*aiocb = &cb64;
291534709573Sraf 	aiocb64_32_t	*cbp;
291634709573Sraf 	caddr32_t	*ucbp;
29177c478bd9Sstevel@tonic-gate #ifdef _LP64
29187c478bd9Sstevel@tonic-gate 	aiocb_t		aiocb_n;
29197c478bd9Sstevel@tonic-gate #endif
29207c478bd9Sstevel@tonic-gate 	struct sigevent32	sigevk;
29217c478bd9Sstevel@tonic-gate 	sigqueue_t	*sqp;
29227c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
29237c478bd9Sstevel@tonic-gate 	int		mode;
292434709573Sraf 	int		error = 0;
292534709573Sraf 	int		aio_errors = 0;
29267c478bd9Sstevel@tonic-gate 	int		i;
29277c478bd9Sstevel@tonic-gate 	size_t		ssize;
29287c478bd9Sstevel@tonic-gate 	int		deadhead = 0;
29297c478bd9Sstevel@tonic-gate 	int		aio_notsupported = 0;
293034709573Sraf 	int		lio_head_port;
293134709573Sraf 	int		aio_port;
293234709573Sraf 	int		aio_thread;
29337c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
293434b3058fSpraks 	int		portused = 0;
29357c478bd9Sstevel@tonic-gate 	port_notify32_t	pnotify;
293634709573Sraf 	int		event;
29377c478bd9Sstevel@tonic-gate 
29387c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
29397c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
29407c478bd9Sstevel@tonic-gate 		return (EINVAL);
29417c478bd9Sstevel@tonic-gate 
29427c478bd9Sstevel@tonic-gate 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
29437c478bd9Sstevel@tonic-gate 
29447c478bd9Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
29457c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
29467c478bd9Sstevel@tonic-gate 	ucbp = (caddr32_t *)cbplist;
29477c478bd9Sstevel@tonic-gate 
294834709573Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
294934709573Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
29507c478bd9Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
29517c478bd9Sstevel@tonic-gate 		return (EFAULT);
29527c478bd9Sstevel@tonic-gate 	}
29537c478bd9Sstevel@tonic-gate 
295434709573Sraf 	/* Event Ports  */
295534709573Sraf 	if (sigev &&
295634709573Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
295734709573Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
295834709573Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
295934709573Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
296034709573Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
296134709573Sraf 		} else if (copyin(
296234709573Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
296334709573Sraf 		    &pnotify, sizeof (pnotify))) {
29647c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
29657c478bd9Sstevel@tonic-gate 			return (EFAULT);
29667c478bd9Sstevel@tonic-gate 		}
296734709573Sraf 		error = port_alloc_event(pnotify.portnfy_port,
296834709573Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
296934709573Sraf 		if (error) {
297034709573Sraf 			if (error == ENOMEM || error == EAGAIN)
297134709573Sraf 				error = EAGAIN;
297234709573Sraf 			else
297334709573Sraf 				error = EINVAL;
297434709573Sraf 			kmem_free(cbplist, ssize);
297534709573Sraf 			return (error);
297634709573Sraf 		}
297734709573Sraf 		lio_head_port = pnotify.portnfy_port;
297834b3058fSpraks 		portused = 1;
29797c478bd9Sstevel@tonic-gate 	}
29807c478bd9Sstevel@tonic-gate 
29817c478bd9Sstevel@tonic-gate 	/*
29827c478bd9Sstevel@tonic-gate 	 * a list head should be allocated if notification is
29837c478bd9Sstevel@tonic-gate 	 * enabled for this list.
29847c478bd9Sstevel@tonic-gate 	 */
29857c478bd9Sstevel@tonic-gate 	head = NULL;
29867c478bd9Sstevel@tonic-gate 
298734709573Sraf 	if (mode_arg == LIO_WAIT || sigev) {
29887c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
29897c478bd9Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
29907c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
29917c478bd9Sstevel@tonic-gate 		if (error)
29927c478bd9Sstevel@tonic-gate 			goto done;
29937c478bd9Sstevel@tonic-gate 		deadhead = 1;
29947c478bd9Sstevel@tonic-gate 		head->lio_nent = nent;
29957c478bd9Sstevel@tonic-gate 		head->lio_refcnt = nent;
299634709573Sraf 		head->lio_port = -1;
299734709573Sraf 		head->lio_portkev = NULL;
299834709573Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
299934709573Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
30007c478bd9Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
30017c478bd9Sstevel@tonic-gate 			if (sqp == NULL) {
30027c478bd9Sstevel@tonic-gate 				error = EAGAIN;
30037c478bd9Sstevel@tonic-gate 				goto done;
30047c478bd9Sstevel@tonic-gate 			}
30057c478bd9Sstevel@tonic-gate 			sqp->sq_func = NULL;
30067c478bd9Sstevel@tonic-gate 			sqp->sq_next = NULL;
30077c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
30087c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
30097c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
30107c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
30117c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
30127c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
30137c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
30147c478bd9Sstevel@tonic-gate 			    sigevk.sigev_value.sival_int;
30157c478bd9Sstevel@tonic-gate 			head->lio_sigqp = sqp;
30167c478bd9Sstevel@tonic-gate 		} else {
30177c478bd9Sstevel@tonic-gate 			head->lio_sigqp = NULL;
30187c478bd9Sstevel@tonic-gate 		}
301934709573Sraf 		if (pkevtp) {
302034709573Sraf 			/*
302134709573Sraf 			 * Prepare data to send when list of aiocb's
302234709573Sraf 			 * has completed.
302334709573Sraf 			 */
302434709573Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
302534709573Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
302634709573Sraf 			    NULL, head);
302734709573Sraf 			pkevtp->portkev_events = AIOLIO64;
302834709573Sraf 			head->lio_portkev = pkevtp;
302934709573Sraf 			head->lio_port = pnotify.portnfy_port;
303034709573Sraf 		}
30317c478bd9Sstevel@tonic-gate 	}
30327c478bd9Sstevel@tonic-gate 
30337c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
30347c478bd9Sstevel@tonic-gate 
30357c478bd9Sstevel@tonic-gate 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
30367c478bd9Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
303734709573Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
30387c478bd9Sstevel@tonic-gate 			if (head) {
30397c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30407c478bd9Sstevel@tonic-gate 				head->lio_nent--;
30417c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
30427c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30437c478bd9Sstevel@tonic-gate 			}
30447c478bd9Sstevel@tonic-gate 			continue;
30457c478bd9Sstevel@tonic-gate 		}
30467c478bd9Sstevel@tonic-gate 
30477c478bd9Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
30487c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
30497c478bd9Sstevel@tonic-gate 		if (mode == LIO_NOP) {
30507c478bd9Sstevel@tonic-gate 			cbp = NULL;
30517c478bd9Sstevel@tonic-gate 			if (head) {
30527c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30537c478bd9Sstevel@tonic-gate 				head->lio_nent--;
30547c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
30557c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30567c478bd9Sstevel@tonic-gate 			}
30577c478bd9Sstevel@tonic-gate 			continue;
30587c478bd9Sstevel@tonic-gate 		}
30597c478bd9Sstevel@tonic-gate 
30607c478bd9Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
30617c478bd9Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
30627c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30637c478bd9Sstevel@tonic-gate 			if (head) {
30647c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30657c478bd9Sstevel@tonic-gate 				head->lio_nent--;
30667c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
30677c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30687c478bd9Sstevel@tonic-gate 			}
30697c478bd9Sstevel@tonic-gate 			aio_errors++;
30707c478bd9Sstevel@tonic-gate 			continue;
30717c478bd9Sstevel@tonic-gate 		}
30727c478bd9Sstevel@tonic-gate 
30737c478bd9Sstevel@tonic-gate 		/*
30747c478bd9Sstevel@tonic-gate 		 * check the permission of the partition
30757c478bd9Sstevel@tonic-gate 		 */
30767c478bd9Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
30777c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
30787c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30797c478bd9Sstevel@tonic-gate 			if (head) {
30807c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30817c478bd9Sstevel@tonic-gate 				head->lio_nent--;
30827c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
30837c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30847c478bd9Sstevel@tonic-gate 			}
30857c478bd9Sstevel@tonic-gate 			aio_errors++;
30867c478bd9Sstevel@tonic-gate 			continue;
30877c478bd9Sstevel@tonic-gate 		}
30887c478bd9Sstevel@tonic-gate 
30897c478bd9Sstevel@tonic-gate 		/*
30907c478bd9Sstevel@tonic-gate 		 * common case where requests are to the same fd
30917c478bd9Sstevel@tonic-gate 		 * for the same r/w operation
30927c478bd9Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
30937c478bd9Sstevel@tonic-gate 		 */
309434709573Sraf 		vp = fp->f_vnode;
309534709573Sraf 		if (fp != prev_fp || mode != prev_mode) {
30967c478bd9Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
30977c478bd9Sstevel@tonic-gate 			if (aio_func == NULL) {
30987c478bd9Sstevel@tonic-gate 				prev_fp = NULL;
30997c478bd9Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
31007c478bd9Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
31017c478bd9Sstevel@tonic-gate 				aio_notsupported++;
31027c478bd9Sstevel@tonic-gate 				if (head) {
31037c478bd9Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
31047c478bd9Sstevel@tonic-gate 					head->lio_nent--;
31057c478bd9Sstevel@tonic-gate 					head->lio_refcnt--;
31067c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
31077c478bd9Sstevel@tonic-gate 				}
31087c478bd9Sstevel@tonic-gate 				continue;
31097c478bd9Sstevel@tonic-gate 			} else {
31107c478bd9Sstevel@tonic-gate 				prev_fp = fp;
31117c478bd9Sstevel@tonic-gate 				prev_mode = mode;
31127c478bd9Sstevel@tonic-gate 			}
31137c478bd9Sstevel@tonic-gate 		}
311434709573Sraf 
31157c478bd9Sstevel@tonic-gate #ifdef	_LP64
31167c478bd9Sstevel@tonic-gate 		aiocb_LFton(aiocb, &aiocb_n);
31177c478bd9Sstevel@tonic-gate 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
311834709573Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
31197c478bd9Sstevel@tonic-gate #else
31207c478bd9Sstevel@tonic-gate 		error = aio_req_setupLF(&reqp, aiop, aiocb,
312134709573Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
31227c478bd9Sstevel@tonic-gate #endif  /* _LP64 */
31237c478bd9Sstevel@tonic-gate 		if (error) {
31247c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
312534709573Sraf 			lio_set_uerror(&cbp->aio_resultp, error);
31267c478bd9Sstevel@tonic-gate 			if (head) {
31277c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31287c478bd9Sstevel@tonic-gate 				head->lio_nent--;
31297c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
31307c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31317c478bd9Sstevel@tonic-gate 			}
31327c478bd9Sstevel@tonic-gate 			aio_errors++;
31337c478bd9Sstevel@tonic-gate 			continue;
31347c478bd9Sstevel@tonic-gate 		}
31357c478bd9Sstevel@tonic-gate 
31367c478bd9Sstevel@tonic-gate 		reqp->aio_req_lio = head;
31377c478bd9Sstevel@tonic-gate 		deadhead = 0;
31387c478bd9Sstevel@tonic-gate 
31397c478bd9Sstevel@tonic-gate 		/*
31407c478bd9Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
31417c478bd9Sstevel@tonic-gate 		 * the driver to avoid a race condition
31427c478bd9Sstevel@tonic-gate 		 */
31437c478bd9Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
31447c478bd9Sstevel@tonic-gate 		    EINPROGRESS);
31457c478bd9Sstevel@tonic-gate 
31467c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = *ucbp;
31477c478bd9Sstevel@tonic-gate 
314834709573Sraf 		event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
314934709573Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
315034709573Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
315134709573Sraf 		if (aio_port | aio_thread) {
315234709573Sraf 			port_kevent_t *lpkevp;
315334709573Sraf 			/*
315434709573Sraf 			 * Prepare data to send with each aiocb completed.
315534709573Sraf 			 */
315634709573Sraf 			if (aio_port) {
315734709573Sraf 				void *paddr = (void *)(uintptr_t)
315834709573Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
315934709573Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
316034709573Sraf 					error = EFAULT;
316134709573Sraf 			} else {	/* aio_thread */
316234709573Sraf 				pnotify.portnfy_port =
316334709573Sraf 				    aiocb->aio_sigevent.sigev_signo;
316434709573Sraf 				pnotify.portnfy_user =
316534709573Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
316634709573Sraf 			}
316734709573Sraf 			if (error)
316834709573Sraf 				/* EMPTY */;
316934709573Sraf 			else if (pkevtp != NULL &&
317034709573Sraf 			    pnotify.portnfy_port == lio_head_port)
317134709573Sraf 				error = port_dup_event(pkevtp, &lpkevp,
317234709573Sraf 				    PORT_ALLOC_DEFAULT);
317334709573Sraf 			else
317434709573Sraf 				error = port_alloc_event(pnotify.portnfy_port,
317534709573Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
317634709573Sraf 				    &lpkevp);
317734709573Sraf 			if (error == 0) {
317834709573Sraf 				port_init_event(lpkevp, (uintptr_t)*ucbp,
31797c478bd9Sstevel@tonic-gate 				    (void *)(uintptr_t)pnotify.portnfy_user,
318034709573Sraf 				    aio_port_callback, reqp);
318134709573Sraf 				lpkevp->portkev_events = event;
318234709573Sraf 				reqp->aio_req_portkev = lpkevp;
318334709573Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
318434709573Sraf 			}
31857c478bd9Sstevel@tonic-gate 		}
31867c478bd9Sstevel@tonic-gate 
31877c478bd9Sstevel@tonic-gate 		/*
31887c478bd9Sstevel@tonic-gate 		 * send the request to driver.
31897c478bd9Sstevel@tonic-gate 		 */
31907c478bd9Sstevel@tonic-gate 		if (error == 0) {
31917c478bd9Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
31927c478bd9Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
31937c478bd9Sstevel@tonic-gate 				aio_zerolen(reqp);
31947c478bd9Sstevel@tonic-gate 				continue;
31957c478bd9Sstevel@tonic-gate 			}
31967c478bd9Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
31977c478bd9Sstevel@tonic-gate 			    CRED());
31987c478bd9Sstevel@tonic-gate 		}
31997c478bd9Sstevel@tonic-gate 
32007c478bd9Sstevel@tonic-gate 		/*
32017c478bd9Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
32027c478bd9Sstevel@tonic-gate 		 * completed unless there was an error.
32037c478bd9Sstevel@tonic-gate 		 */
32047c478bd9Sstevel@tonic-gate 		if (error) {
32057c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
32067c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
32077c478bd9Sstevel@tonic-gate 			if (head) {
32087c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
32097c478bd9Sstevel@tonic-gate 				head->lio_nent--;
32107c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
32117c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32127c478bd9Sstevel@tonic-gate 			}
32137c478bd9Sstevel@tonic-gate 			if (error == ENOTSUP)
32147c478bd9Sstevel@tonic-gate 				aio_notsupported++;
32157c478bd9Sstevel@tonic-gate 			else
32167c478bd9Sstevel@tonic-gate 				aio_errors++;
321734b3058fSpraks 			lio_set_error(reqp, portused);
32187c478bd9Sstevel@tonic-gate 		} else {
32197c478bd9Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
32207c478bd9Sstevel@tonic-gate 		}
32217c478bd9Sstevel@tonic-gate 	}
32227c478bd9Sstevel@tonic-gate 
32237c478bd9Sstevel@tonic-gate 	if (aio_notsupported) {
32247c478bd9Sstevel@tonic-gate 		error = ENOTSUP;
32257c478bd9Sstevel@tonic-gate 	} else if (aio_errors) {
32267c478bd9Sstevel@tonic-gate 		/*
32277c478bd9Sstevel@tonic-gate 		 * return EIO if any request failed
32287c478bd9Sstevel@tonic-gate 		 */
32297c478bd9Sstevel@tonic-gate 		error = EIO;
32307c478bd9Sstevel@tonic-gate 	}
32317c478bd9Sstevel@tonic-gate 
32327c478bd9Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
32337c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
32347c478bd9Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
32357c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
32367c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32377c478bd9Sstevel@tonic-gate 				error = EINTR;
32387c478bd9Sstevel@tonic-gate 				goto done;
32397c478bd9Sstevel@tonic-gate 			}
32407c478bd9Sstevel@tonic-gate 		}
32417c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
32427c478bd9Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
32437c478bd9Sstevel@tonic-gate 	}
32447c478bd9Sstevel@tonic-gate 
32457c478bd9Sstevel@tonic-gate done:
32467c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
32477c478bd9Sstevel@tonic-gate 	if (deadhead) {
32487c478bd9Sstevel@tonic-gate 		if (head->lio_sigqp)
32497c478bd9Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
325034709573Sraf 		if (head->lio_portkev)
325134709573Sraf 			port_free_event(head->lio_portkev);
32527c478bd9Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
32537c478bd9Sstevel@tonic-gate 	}
32547c478bd9Sstevel@tonic-gate 	return (error);
32557c478bd9Sstevel@tonic-gate }
32567c478bd9Sstevel@tonic-gate 
32577c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
32587c478bd9Sstevel@tonic-gate static void
32597c478bd9Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
32607c478bd9Sstevel@tonic-gate {
32617c478bd9Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
32627c478bd9Sstevel@tonic-gate 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
32637c478bd9Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
32647c478bd9Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
32657c478bd9Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
32667c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
32677c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
32687c478bd9Sstevel@tonic-gate 
32697c478bd9Sstevel@tonic-gate 	/*
32707c478bd9Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
32717c478bd9Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
32727c478bd9Sstevel@tonic-gate 	 */
32737c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
32747c478bd9Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
32757c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
32767c478bd9Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
32777c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
32787c478bd9Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
32797c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
32807c478bd9Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
32817c478bd9Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
32827c478bd9Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
32837c478bd9Sstevel@tonic-gate }
32847c478bd9Sstevel@tonic-gate #endif
32857c478bd9Sstevel@tonic-gate 
32867c478bd9Sstevel@tonic-gate /*
32877c478bd9Sstevel@tonic-gate  * This function is used only for largefile calls made by
328834709573Sraf  * 32 bit applications.
32897c478bd9Sstevel@tonic-gate  */
32907c478bd9Sstevel@tonic-gate static int
32917c478bd9Sstevel@tonic-gate aio_req_setupLF(
32927c478bd9Sstevel@tonic-gate 	aio_req_t	**reqpp,
32937c478bd9Sstevel@tonic-gate 	aio_t		*aiop,
32947c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*arg,
32957c478bd9Sstevel@tonic-gate 	aio_result_t	*resultp,
32967c478bd9Sstevel@tonic-gate 	vnode_t		*vp)
32977c478bd9Sstevel@tonic-gate {
329834709573Sraf 	sigqueue_t	*sqp = NULL;
32997c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
33007c478bd9Sstevel@tonic-gate 	struct uio	*uio;
330134709573Sraf 	struct sigevent32 *sigev;
33027c478bd9Sstevel@tonic-gate 	int 		error;
33037c478bd9Sstevel@tonic-gate 
330434709573Sraf 	sigev = &arg->aio_sigevent;
330534709573Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
330634709573Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
33077c478bd9Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
33087c478bd9Sstevel@tonic-gate 		if (sqp == NULL)
33097c478bd9Sstevel@tonic-gate 			return (EAGAIN);
33107c478bd9Sstevel@tonic-gate 		sqp->sq_func = NULL;
33117c478bd9Sstevel@tonic-gate 		sqp->sq_next = NULL;
33127c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
33137c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
33147c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
33157c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
33167c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
33177c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
331834709573Sraf 		sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
331934709573Sraf 	}
33207c478bd9Sstevel@tonic-gate 
33217c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
33227c478bd9Sstevel@tonic-gate 
33237c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
33247c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33257c478bd9Sstevel@tonic-gate 		if (sqp)
33267c478bd9Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33277c478bd9Sstevel@tonic-gate 		return (EIO);
33287c478bd9Sstevel@tonic-gate 	}
33297c478bd9Sstevel@tonic-gate 	/*
33307c478bd9Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
33317c478bd9Sstevel@tonic-gate 	 * from dynamic memory.
33327c478bd9Sstevel@tonic-gate 	 */
33337c478bd9Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
33347c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33357c478bd9Sstevel@tonic-gate 		if (sqp)
33367c478bd9Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33377c478bd9Sstevel@tonic-gate 		return (error);
33387c478bd9Sstevel@tonic-gate 	}
33397c478bd9Sstevel@tonic-gate 	aiop->aio_pending++;
33407c478bd9Sstevel@tonic-gate 	aiop->aio_outstanding++;
33417c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
334234709573Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
334334709573Sraf 	    sigev->sigev_notify == SIGEV_PORT)
334434709573Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
33457c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
33467c478bd9Sstevel@tonic-gate 	/*
33477c478bd9Sstevel@tonic-gate 	 * initialize aio request.
33487c478bd9Sstevel@tonic-gate 	 */
33497c478bd9Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
33507c478bd9Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
33517c478bd9Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
335234709573Sraf 	reqp->aio_req_lio = NULL;
33537c478bd9Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
33547c478bd9Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
33557c478bd9Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
33567c478bd9Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
33577c478bd9Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
33587c478bd9Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
33597c478bd9Sstevel@tonic-gate 	*reqpp = reqp;
33607c478bd9Sstevel@tonic-gate 	return (0);
33617c478bd9Sstevel@tonic-gate }
33627c478bd9Sstevel@tonic-gate 
33637c478bd9Sstevel@tonic-gate /*
33647c478bd9Sstevel@tonic-gate  * This routine is called when a non largefile call is made by a 32bit
33657c478bd9Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel.
33667c478bd9Sstevel@tonic-gate  */
33677c478bd9Sstevel@tonic-gate static int
33687c478bd9Sstevel@tonic-gate alio32(
33697c478bd9Sstevel@tonic-gate 	int		mode_arg,
33707c478bd9Sstevel@tonic-gate 	void		*aiocb_arg,
33717c478bd9Sstevel@tonic-gate 	int		nent,
337234709573Sraf 	void		*sigev)
33737c478bd9Sstevel@tonic-gate {
33747c478bd9Sstevel@tonic-gate 	file_t		*fp;
33757c478bd9Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
33767c478bd9Sstevel@tonic-gate 	int		prev_mode = -1;
33777c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
33787c478bd9Sstevel@tonic-gate 	aio_lio_t	*head;
33797c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
33807c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
338134709573Sraf 	caddr_t		cbplist;
33827c478bd9Sstevel@tonic-gate 	aiocb_t		cb;
33837c478bd9Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
33847c478bd9Sstevel@tonic-gate #ifdef	_LP64
33857c478bd9Sstevel@tonic-gate 	aiocb32_t	*cbp;
33867c478bd9Sstevel@tonic-gate 	caddr32_t	*ucbp;
33877c478bd9Sstevel@tonic-gate 	aiocb32_t	cb32;
33887c478bd9Sstevel@tonic-gate 	aiocb32_t	*aiocb32 = &cb32;
338934709573Sraf 	struct sigevent32	sigevk;
33907c478bd9Sstevel@tonic-gate #else
33917c478bd9Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
339234709573Sraf 	struct sigevent	sigevk;
33937c478bd9Sstevel@tonic-gate #endif
33947c478bd9Sstevel@tonic-gate 	sigqueue_t	*sqp;
33957c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
33967c478bd9Sstevel@tonic-gate 	int		mode;
339734709573Sraf 	int		error = 0;
339834709573Sraf 	int		aio_errors = 0;
33997c478bd9Sstevel@tonic-gate 	int		i;
34007c478bd9Sstevel@tonic-gate 	size_t		ssize;
34017c478bd9Sstevel@tonic-gate 	int		deadhead = 0;
34027c478bd9Sstevel@tonic-gate 	int		aio_notsupported = 0;
340334709573Sraf 	int		lio_head_port;
340434709573Sraf 	int		aio_port;
340534709573Sraf 	int		aio_thread;
34067c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
340734b3058fSpraks 	int		portused = 0;
34087c478bd9Sstevel@tonic-gate #ifdef	_LP64
34097c478bd9Sstevel@tonic-gate 	port_notify32_t	pnotify;
34107c478bd9Sstevel@tonic-gate #else
34117c478bd9Sstevel@tonic-gate 	port_notify_t	pnotify;
34127c478bd9Sstevel@tonic-gate #endif
341334709573Sraf 	int		event;
341434709573Sraf 
34157c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
34167c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
34177c478bd9Sstevel@tonic-gate 		return (EINVAL);
34187c478bd9Sstevel@tonic-gate 
34197c478bd9Sstevel@tonic-gate #ifdef	_LP64
34207c478bd9Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
34217c478bd9Sstevel@tonic-gate #else
34227c478bd9Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
34237c478bd9Sstevel@tonic-gate #endif
34247c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
34257c478bd9Sstevel@tonic-gate 	ucbp = (void *)cbplist;
34267c478bd9Sstevel@tonic-gate 
342734709573Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
342834709573Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
34297c478bd9Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
34307c478bd9Sstevel@tonic-gate 		return (EFAULT);
34317c478bd9Sstevel@tonic-gate 	}
34327c478bd9Sstevel@tonic-gate 
343334709573Sraf 	/* Event Ports  */
343434709573Sraf 	if (sigev &&
343534709573Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
343634709573Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
343734709573Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
343834709573Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
343934709573Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
344034709573Sraf 		} else if (copyin(
344134709573Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
344234709573Sraf 		    &pnotify, sizeof (pnotify))) {
34437c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
34447c478bd9Sstevel@tonic-gate 			return (EFAULT);
34457c478bd9Sstevel@tonic-gate 		}
344634709573Sraf 		error = port_alloc_event(pnotify.portnfy_port,
344734709573Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
344834709573Sraf 		if (error) {
344934709573Sraf 			if (error == ENOMEM || error == EAGAIN)
345034709573Sraf 				error = EAGAIN;
345134709573Sraf 			else
345234709573Sraf 				error = EINVAL;
345334709573Sraf 			kmem_free(cbplist, ssize);
345434709573Sraf 			return (error);
345534709573Sraf 		}
345634709573Sraf 		lio_head_port = pnotify.portnfy_port;
345734b3058fSpraks 		portused = 1;
34587c478bd9Sstevel@tonic-gate 	}
34597c478bd9Sstevel@tonic-gate 
34607c478bd9Sstevel@tonic-gate 	/*
34617c478bd9Sstevel@tonic-gate 	 * a list head should be allocated if notification is
34627c478bd9Sstevel@tonic-gate 	 * enabled for this list.
34637c478bd9Sstevel@tonic-gate 	 */
34647c478bd9Sstevel@tonic-gate 	head = NULL;
34657c478bd9Sstevel@tonic-gate 
346634709573Sraf 	if (mode_arg == LIO_WAIT || sigev) {
34677c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
34687c478bd9Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
34697c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
34707c478bd9Sstevel@tonic-gate 		if (error)
34717c478bd9Sstevel@tonic-gate 			goto done;
34727c478bd9Sstevel@tonic-gate 		deadhead = 1;
34737c478bd9Sstevel@tonic-gate 		head->lio_nent = nent;
34747c478bd9Sstevel@tonic-gate 		head->lio_refcnt = nent;
347534709573Sraf 		head->lio_port = -1;
347634709573Sraf 		head->lio_portkev = NULL;
347734709573Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
347834709573Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
34797c478bd9Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
34807c478bd9Sstevel@tonic-gate 			if (sqp == NULL) {
34817c478bd9Sstevel@tonic-gate 				error = EAGAIN;
34827c478bd9Sstevel@tonic-gate 				goto done;
34837c478bd9Sstevel@tonic-gate 			}
34847c478bd9Sstevel@tonic-gate 			sqp->sq_func = NULL;
34857c478bd9Sstevel@tonic-gate 			sqp->sq_next = NULL;
34867c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
34877c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
34887c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
34897c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
34907c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
349134709573Sraf 			sqp->sq_info.si_signo = sigevk.sigev_signo;
34927c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
349334709573Sraf 			    sigevk.sigev_value.sival_int;
34947c478bd9Sstevel@tonic-gate 			head->lio_sigqp = sqp;
34957c478bd9Sstevel@tonic-gate 		} else {
34967c478bd9Sstevel@tonic-gate 			head->lio_sigqp = NULL;
34977c478bd9Sstevel@tonic-gate 		}
349834709573Sraf 		if (pkevtp) {
349934709573Sraf 			/*
350034709573Sraf 			 * Prepare data to send when list of aiocb's has
350134709573Sraf 			 * completed.
350234709573Sraf 			 */
350334709573Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
350434709573Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
350534709573Sraf 			    NULL, head);
350634709573Sraf 			pkevtp->portkev_events = AIOLIO;
350734709573Sraf 			head->lio_portkev = pkevtp;
350834709573Sraf 			head->lio_port = pnotify.portnfy_port;
350934709573Sraf 		}
35107c478bd9Sstevel@tonic-gate 	}
35117c478bd9Sstevel@tonic-gate 
35127c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
35137c478bd9Sstevel@tonic-gate 
35147c478bd9Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
35157c478bd9Sstevel@tonic-gate #ifdef	_LP64
35167c478bd9Sstevel@tonic-gate 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
351734709573Sraf 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
35187c478bd9Sstevel@tonic-gate #else
35197c478bd9Sstevel@tonic-gate 		cbp = (aiocb_t *)*ucbp;
352034709573Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
35217c478bd9Sstevel@tonic-gate #endif
352234709573Sraf 		{
35237c478bd9Sstevel@tonic-gate 			if (head) {
35247c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35257c478bd9Sstevel@tonic-gate 				head->lio_nent--;
35267c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
35277c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35287c478bd9Sstevel@tonic-gate 			}
35297c478bd9Sstevel@tonic-gate 			continue;
35307c478bd9Sstevel@tonic-gate 		}
35317c478bd9Sstevel@tonic-gate #ifdef	_LP64
35327c478bd9Sstevel@tonic-gate 		/*
35337c478bd9Sstevel@tonic-gate 		 * copy 32 bit structure into 64 bit structure
35347c478bd9Sstevel@tonic-gate 		 */
35357c478bd9Sstevel@tonic-gate 		aiocb_32ton(aiocb32, aiocb);
35367c478bd9Sstevel@tonic-gate #endif /* _LP64 */
35377c478bd9Sstevel@tonic-gate 
35387c478bd9Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
35397c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
35407c478bd9Sstevel@tonic-gate 		if (mode == LIO_NOP) {
35417c478bd9Sstevel@tonic-gate 			cbp = NULL;
35427c478bd9Sstevel@tonic-gate 			if (head) {
35437c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35447c478bd9Sstevel@tonic-gate 				head->lio_nent--;
35457c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
35467c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35477c478bd9Sstevel@tonic-gate 			}
35487c478bd9Sstevel@tonic-gate 			continue;
35497c478bd9Sstevel@tonic-gate 		}
35507c478bd9Sstevel@tonic-gate 
35517c478bd9Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
35527c478bd9Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
35537c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35547c478bd9Sstevel@tonic-gate 			if (head) {
35557c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35567c478bd9Sstevel@tonic-gate 				head->lio_nent--;
35577c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
35587c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35597c478bd9Sstevel@tonic-gate 			}
35607c478bd9Sstevel@tonic-gate 			aio_errors++;
35617c478bd9Sstevel@tonic-gate 			continue;
35627c478bd9Sstevel@tonic-gate 		}
35637c478bd9Sstevel@tonic-gate 
35647c478bd9Sstevel@tonic-gate 		/*
35657c478bd9Sstevel@tonic-gate 		 * check the permission of the partition
35667c478bd9Sstevel@tonic-gate 		 */
35677c478bd9Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
35687c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
35697c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35707c478bd9Sstevel@tonic-gate 			if (head) {
35717c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35727c478bd9Sstevel@tonic-gate 				head->lio_nent--;
35737c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
35747c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35757c478bd9Sstevel@tonic-gate 			}
35767c478bd9Sstevel@tonic-gate 			aio_errors++;
35777c478bd9Sstevel@tonic-gate 			continue;
35787c478bd9Sstevel@tonic-gate 		}
35797c478bd9Sstevel@tonic-gate 
35807c478bd9Sstevel@tonic-gate 		/*
35817c478bd9Sstevel@tonic-gate 		 * common case where requests are to the same fd
35827c478bd9Sstevel@tonic-gate 		 * for the same r/w operation
35837c478bd9Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
35847c478bd9Sstevel@tonic-gate 		 */
358534709573Sraf 		vp = fp->f_vnode;
358634709573Sraf 		if (fp != prev_fp || mode != prev_mode) {
35877c478bd9Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
35887c478bd9Sstevel@tonic-gate 			if (aio_func == NULL) {
35897c478bd9Sstevel@tonic-gate 				prev_fp = NULL;
35907c478bd9Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
359134709573Sraf 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
35927c478bd9Sstevel@tonic-gate 				aio_notsupported++;
35937c478bd9Sstevel@tonic-gate 				if (head) {
35947c478bd9Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
35957c478bd9Sstevel@tonic-gate 					head->lio_nent--;
35967c478bd9Sstevel@tonic-gate 					head->lio_refcnt--;
35977c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
35987c478bd9Sstevel@tonic-gate 				}
35997c478bd9Sstevel@tonic-gate 				continue;
36007c478bd9Sstevel@tonic-gate 			} else {
36017c478bd9Sstevel@tonic-gate 				prev_fp = fp;
36027c478bd9Sstevel@tonic-gate 				prev_mode = mode;
36037c478bd9Sstevel@tonic-gate 			}
36047c478bd9Sstevel@tonic-gate 		}
360534709573Sraf 
360634709573Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
360734709573Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
360834709573Sraf 		if (error) {
36097c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
36107c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
36117c478bd9Sstevel@tonic-gate 			if (head) {
36127c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
36137c478bd9Sstevel@tonic-gate 				head->lio_nent--;
36147c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
36157c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
36167c478bd9Sstevel@tonic-gate 			}
36177c478bd9Sstevel@tonic-gate 			aio_errors++;
36187c478bd9Sstevel@tonic-gate 			continue;
36197c478bd9Sstevel@tonic-gate 		}
36207c478bd9Sstevel@tonic-gate 
36217c478bd9Sstevel@tonic-gate 		reqp->aio_req_lio = head;
36227c478bd9Sstevel@tonic-gate 		deadhead = 0;
36237c478bd9Sstevel@tonic-gate 
36247c478bd9Sstevel@tonic-gate 		/*
36257c478bd9Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
36267c478bd9Sstevel@tonic-gate 		 * the driver to avoid a race condition
36277c478bd9Sstevel@tonic-gate 		 */
36287c478bd9Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
36297c478bd9Sstevel@tonic-gate 		    EINPROGRESS);
36307c478bd9Sstevel@tonic-gate 
363134709573Sraf 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
36327c478bd9Sstevel@tonic-gate 
363334709573Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
363434709573Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
363534709573Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
363634709573Sraf 		if (aio_port | aio_thread) {
363734709573Sraf 			port_kevent_t *lpkevp;
363834709573Sraf 			/*
363934709573Sraf 			 * Prepare data to send with each aiocb completed.
364034709573Sraf 			 */
36417c478bd9Sstevel@tonic-gate #ifdef _LP64
364234709573Sraf 			if (aio_port) {
364334709573Sraf 				void *paddr = (void  *)(uintptr_t)
364434709573Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
364534709573Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
364634709573Sraf 					error = EFAULT;
364734709573Sraf 			} else {	/* aio_thread */
364834709573Sraf 				pnotify.portnfy_port =
364934709573Sraf 				    aiocb32->aio_sigevent.sigev_signo;
365034709573Sraf 				pnotify.portnfy_user =
365134709573Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
365234709573Sraf 			}
36537c478bd9Sstevel@tonic-gate #else
365434709573Sraf 			if (aio_port) {
365534709573Sraf 				void *paddr =
365634709573Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
365734709573Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
365834709573Sraf 					error = EFAULT;
365934709573Sraf 			} else {	/* aio_thread */
366034709573Sraf 				pnotify.portnfy_port =
366134709573Sraf 				    aiocb->aio_sigevent.sigev_signo;
366234709573Sraf 				pnotify.portnfy_user =
366334709573Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
366434709573Sraf 			}
36657c478bd9Sstevel@tonic-gate #endif
366634709573Sraf 			if (error)
366734709573Sraf 				/* EMPTY */;
366834709573Sraf 			else if (pkevtp != NULL &&
366934709573Sraf 			    pnotify.portnfy_port == lio_head_port)
367034709573Sraf 				error = port_dup_event(pkevtp, &lpkevp,
367134709573Sraf 				    PORT_ALLOC_DEFAULT);
367234709573Sraf 			else
367334709573Sraf 				error = port_alloc_event(pnotify.portnfy_port,
367434709573Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
367534709573Sraf 				    &lpkevp);
367634709573Sraf 			if (error == 0) {
367734709573Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
367834709573Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
367934709573Sraf 				    aio_port_callback, reqp);
368034709573Sraf 				lpkevp->portkev_events = event;
368134709573Sraf 				reqp->aio_req_portkev = lpkevp;
368234709573Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
368334709573Sraf 			}
36847c478bd9Sstevel@tonic-gate 		}
36857c478bd9Sstevel@tonic-gate 
36867c478bd9Sstevel@tonic-gate 		/*
36877c478bd9Sstevel@tonic-gate 		 * send the request to driver.
36887c478bd9Sstevel@tonic-gate 		 */
36897c478bd9Sstevel@tonic-gate 		if (error == 0) {
36907c478bd9Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
36917c478bd9Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
36927c478bd9Sstevel@tonic-gate 				aio_zerolen(reqp);
36937c478bd9Sstevel@tonic-gate 				continue;
36947c478bd9Sstevel@tonic-gate 			}
36957c478bd9Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
36967c478bd9Sstevel@tonic-gate 			    CRED());
36977c478bd9Sstevel@tonic-gate 		}
36987c478bd9Sstevel@tonic-gate 
36997c478bd9Sstevel@tonic-gate 		/*
37007c478bd9Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
37017c478bd9Sstevel@tonic-gate 		 * completed unless there was an error.
37027c478bd9Sstevel@tonic-gate 		 */
37037c478bd9Sstevel@tonic-gate 		if (error) {
37047c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
37057c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
37067c478bd9Sstevel@tonic-gate 			if (head) {
37077c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
37087c478bd9Sstevel@tonic-gate 				head->lio_nent--;
37097c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
37107c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37117c478bd9Sstevel@tonic-gate 			}
37127c478bd9Sstevel@tonic-gate 			if (error == ENOTSUP)
37137c478bd9Sstevel@tonic-gate 				aio_notsupported++;
37147c478bd9Sstevel@tonic-gate 			else
37157c478bd9Sstevel@tonic-gate 				aio_errors++;
371634b3058fSpraks 			lio_set_error(reqp, portused);
37177c478bd9Sstevel@tonic-gate 		} else {
37187c478bd9Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
37197c478bd9Sstevel@tonic-gate 		}
37207c478bd9Sstevel@tonic-gate 	}
37217c478bd9Sstevel@tonic-gate 
37227c478bd9Sstevel@tonic-gate 	if (aio_notsupported) {
37237c478bd9Sstevel@tonic-gate 		error = ENOTSUP;
37247c478bd9Sstevel@tonic-gate 	} else if (aio_errors) {
37257c478bd9Sstevel@tonic-gate 		/*
37267c478bd9Sstevel@tonic-gate 		 * return EIO if any request failed
37277c478bd9Sstevel@tonic-gate 		 */
37287c478bd9Sstevel@tonic-gate 		error = EIO;
37297c478bd9Sstevel@tonic-gate 	}
37307c478bd9Sstevel@tonic-gate 
37317c478bd9Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
37327c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
37337c478bd9Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
37347c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
37357c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37367c478bd9Sstevel@tonic-gate 				error = EINTR;
37377c478bd9Sstevel@tonic-gate 				goto done;
37387c478bd9Sstevel@tonic-gate 			}
37397c478bd9Sstevel@tonic-gate 		}
37407c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
37417c478bd9Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
37427c478bd9Sstevel@tonic-gate 	}
37437c478bd9Sstevel@tonic-gate 
37447c478bd9Sstevel@tonic-gate done:
37457c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
37467c478bd9Sstevel@tonic-gate 	if (deadhead) {
37477c478bd9Sstevel@tonic-gate 		if (head->lio_sigqp)
37487c478bd9Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
374934709573Sraf 		if (head->lio_portkev)
375034709573Sraf 			port_free_event(head->lio_portkev);
37517c478bd9Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
37527c478bd9Sstevel@tonic-gate 	}
37537c478bd9Sstevel@tonic-gate 	return (error);
37547c478bd9Sstevel@tonic-gate }
37557c478bd9Sstevel@tonic-gate 
37567c478bd9Sstevel@tonic-gate 
37577c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
37587c478bd9Sstevel@tonic-gate void
37597c478bd9Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
37607c478bd9Sstevel@tonic-gate {
37617c478bd9Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
37627c478bd9Sstevel@tonic-gate 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
37637c478bd9Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
37647c478bd9Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
37657c478bd9Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
37667c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
37677c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
37687c478bd9Sstevel@tonic-gate 
37697c478bd9Sstevel@tonic-gate 	/*
37707c478bd9Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
37717c478bd9Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
37727c478bd9Sstevel@tonic-gate 	 */
37737c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
37747c478bd9Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
37757c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
37767c478bd9Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
37777c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
37787c478bd9Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
37797c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
37807c478bd9Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
37817c478bd9Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
37827c478bd9Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
37837c478bd9Sstevel@tonic-gate }
37847c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
37857c478bd9Sstevel@tonic-gate 
37867c478bd9Sstevel@tonic-gate /*
37877c478bd9Sstevel@tonic-gate  * aio_port_callback() is called just before the event is retrieved from the
37887c478bd9Sstevel@tonic-gate  * port. The task of this callback function is to finish the work of the
37897c478bd9Sstevel@tonic-gate  * transaction for the application, it means :
37907c478bd9Sstevel@tonic-gate  * - copyout transaction data to the application
37917c478bd9Sstevel@tonic-gate  *	(this thread is running in the right process context)
37927c478bd9Sstevel@tonic-gate  * - keep trace of the transaction (update of counters).
37937c478bd9Sstevel@tonic-gate  * - free allocated buffers
37947c478bd9Sstevel@tonic-gate  * The aiocb pointer is the object element of the port_kevent_t structure.
37957c478bd9Sstevel@tonic-gate  *
37967c478bd9Sstevel@tonic-gate  * flag :
37977c478bd9Sstevel@tonic-gate  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
37987c478bd9Sstevel@tonic-gate  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
37997c478bd9Sstevel@tonic-gate  */
38007c478bd9Sstevel@tonic-gate 
38017c478bd9Sstevel@tonic-gate /*ARGSUSED*/
38027c478bd9Sstevel@tonic-gate int
38037c478bd9Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
38047c478bd9Sstevel@tonic-gate {
38057c478bd9Sstevel@tonic-gate 	aio_t		*aiop = curproc->p_aio;
38067c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp = arg;
38077c478bd9Sstevel@tonic-gate 	struct	iovec	*iov;
38087c478bd9Sstevel@tonic-gate 	struct	buf	*bp;
38097c478bd9Sstevel@tonic-gate 	void		*resultp;
38107c478bd9Sstevel@tonic-gate 
38117c478bd9Sstevel@tonic-gate 	if (pid != curproc->p_pid) {
38127c478bd9Sstevel@tonic-gate 		/* wrong proc !!, can not deliver data here ... */
38137c478bd9Sstevel@tonic-gate 		return (EACCES);
38147c478bd9Sstevel@tonic-gate 	}
38157c478bd9Sstevel@tonic-gate 
38167c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_portq_mutex);
38177c478bd9Sstevel@tonic-gate 	reqp->aio_req_portkev = NULL;
38187c478bd9Sstevel@tonic-gate 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
38197c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_portq_mutex);
38207c478bd9Sstevel@tonic-gate 	aphysio_unlock(reqp);		/* unlock used pages */
38217c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
38227c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
38237c478bd9Sstevel@tonic-gate 		aio_req_free_port(aiop, reqp);	/* back to free list */
38247c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
38257c478bd9Sstevel@tonic-gate 		return (0);
38267c478bd9Sstevel@tonic-gate 	}
38277c478bd9Sstevel@tonic-gate 
38287c478bd9Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
38297c478bd9Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
38307c478bd9Sstevel@tonic-gate 	resultp = (void *)reqp->aio_req_resultp;
38317c478bd9Sstevel@tonic-gate 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
38327c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
38337c478bd9Sstevel@tonic-gate 	if (flag == PORT_CALLBACK_DEFAULT)
38347c478bd9Sstevel@tonic-gate 		aio_copyout_result_port(iov, bp, resultp);
38357c478bd9Sstevel@tonic-gate 	return (0);
38367c478bd9Sstevel@tonic-gate }
3837