xref: /titanic_51/usr/src/uts/common/os/aio.c (revision b0b27ce6df72b0212689361744a0be6872c2b07b)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * Kernel asynchronous I/O.
317c478bd9Sstevel@tonic-gate  * This is only for raw devices now (as of Nov. 1993).
327c478bd9Sstevel@tonic-gate  */
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate #include <sys/types.h>
357c478bd9Sstevel@tonic-gate #include <sys/errno.h>
367c478bd9Sstevel@tonic-gate #include <sys/conf.h>
377c478bd9Sstevel@tonic-gate #include <sys/file.h>
387c478bd9Sstevel@tonic-gate #include <sys/fs/snode.h>
397c478bd9Sstevel@tonic-gate #include <sys/unistd.h>
407c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
417c478bd9Sstevel@tonic-gate #include <vm/as.h>
427c478bd9Sstevel@tonic-gate #include <vm/faultcode.h>
437c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
447c478bd9Sstevel@tonic-gate #include <sys/procfs.h>
457c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
467c478bd9Sstevel@tonic-gate #include <sys/autoconf.h>
477c478bd9Sstevel@tonic-gate #include <sys/ddi_impldefs.h>
487c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
497c478bd9Sstevel@tonic-gate #include <sys/aio_impl.h>
507c478bd9Sstevel@tonic-gate #include <sys/debug.h>
517c478bd9Sstevel@tonic-gate #include <sys/param.h>
527c478bd9Sstevel@tonic-gate #include <sys/systm.h>
537c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
547c478bd9Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h>
557c478bd9Sstevel@tonic-gate #include <sys/contract/process_impl.h>
567c478bd9Sstevel@tonic-gate 
577c478bd9Sstevel@tonic-gate /*
587c478bd9Sstevel@tonic-gate  * external entry point.
597c478bd9Sstevel@tonic-gate  */
607c478bd9Sstevel@tonic-gate #ifdef _LP64
617c478bd9Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long);
627c478bd9Sstevel@tonic-gate #endif
637c478bd9Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *);
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate #define	AIO_64	0
677c478bd9Sstevel@tonic-gate #define	AIO_32	1
687c478bd9Sstevel@tonic-gate #define	AIO_LARGEFILE	2
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate /*
717c478bd9Sstevel@tonic-gate  * implementation specific functions (private)
727c478bd9Sstevel@tonic-gate  */
737c478bd9Sstevel@tonic-gate #ifdef _LP64
747c478bd9Sstevel@tonic-gate static int alio(int, int, aiocb_t **, int, struct sigevent *);
757c478bd9Sstevel@tonic-gate #endif
767c478bd9Sstevel@tonic-gate static int aionotify(void);
777c478bd9Sstevel@tonic-gate static int aioinit(void);
787c478bd9Sstevel@tonic-gate static int aiostart(void);
797c478bd9Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int);
807c478bd9Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
817c478bd9Sstevel@tonic-gate     cred_t *);
827c478bd9Sstevel@tonic-gate static void lio_set_error(aio_req_t *);
837c478bd9Sstevel@tonic-gate static aio_t *aio_aiop_alloc();
847c478bd9Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *);
857c478bd9Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **);
867c478bd9Sstevel@tonic-gate static aio_req_t *aio_req_done(void *);
877c478bd9Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *);
887c478bd9Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **);
897c478bd9Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *);
907c478bd9Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
917c478bd9Sstevel@tonic-gate     aio_result_t *, int, vnode_t *);
927c478bd9Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *);
937c478bd9Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *);
947c478bd9Sstevel@tonic-gate static void lio_set_uerror(void *, int);
957c478bd9Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *);
967c478bd9Sstevel@tonic-gate static int aiowait(struct timeval *, int, long	*);
977c478bd9Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
987c478bd9Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
997c478bd9Sstevel@tonic-gate     aio_req_t *reqlist, aio_t *aiop, model_t model);
1007c478bd9Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
1017c478bd9Sstevel@tonic-gate static int aiosuspend(void *, int, struct  timespec *, int,
1027c478bd9Sstevel@tonic-gate     long	*, int);
1037c478bd9Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int);
1047c478bd9Sstevel@tonic-gate static int aioerror(void *, int);
1057c478bd9Sstevel@tonic-gate static int aio_cancel(int, void *, long	*, int);
1067c478bd9Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
1077c478bd9Sstevel@tonic-gate static int aiorw(int, void *, int, int);
1087c478bd9Sstevel@tonic-gate 
1097c478bd9Sstevel@tonic-gate static int alioLF(int, void *, int, void *);
1107c478bd9Sstevel@tonic-gate static int aio_req_setupLF(aio_req_t **, aio_t *,
1117c478bd9Sstevel@tonic-gate     aiocb64_32_t *, aio_result_t *, int, vnode_t *);
1127c478bd9Sstevel@tonic-gate static int alio32(int, void *, int, void *);
1137c478bd9Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1147c478bd9Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1177c478bd9Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
1187c478bd9Sstevel@tonic-gate void	aiocb_32ton(aiocb32_t *, aiocb_t *);
1197c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate /*
1227c478bd9Sstevel@tonic-gate  * implementation specific functions (external)
1237c478bd9Sstevel@tonic-gate  */
1247c478bd9Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *);
1257c478bd9Sstevel@tonic-gate 
1267c478bd9Sstevel@tonic-gate /*
1277c478bd9Sstevel@tonic-gate  * Event Port framework
1287c478bd9Sstevel@tonic-gate  */
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *);
1317c478bd9Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *);
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate /*
1347c478bd9Sstevel@tonic-gate  * This is the loadable module wrapper.
1357c478bd9Sstevel@tonic-gate  */
1367c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
1377c478bd9Sstevel@tonic-gate #include <sys/syscall.h>
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate #ifdef _LP64
1407c478bd9Sstevel@tonic-gate 
1417c478bd9Sstevel@tonic-gate static struct sysent kaio_sysent = {
1427c478bd9Sstevel@tonic-gate 	6,
1437c478bd9Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
1447c478bd9Sstevel@tonic-gate 	(int (*)())kaioc
1457c478bd9Sstevel@tonic-gate };
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1487c478bd9Sstevel@tonic-gate static struct sysent kaio_sysent32 = {
1497c478bd9Sstevel@tonic-gate 	7,
1507c478bd9Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL,
1517c478bd9Sstevel@tonic-gate 	kaio
1527c478bd9Sstevel@tonic-gate };
1537c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate #else   /* _LP64 */
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate static struct sysent kaio_sysent = {
1587c478bd9Sstevel@tonic-gate 	7,
1597c478bd9Sstevel@tonic-gate 	SE_NOUNLOAD | SE_32RVAL1,
1607c478bd9Sstevel@tonic-gate 	kaio
1617c478bd9Sstevel@tonic-gate };
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate #endif  /* _LP64 */
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate /*
1667c478bd9Sstevel@tonic-gate  * Module linkage information for the kernel.
1677c478bd9Sstevel@tonic-gate  */
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate static struct modlsys modlsys = {
1707c478bd9Sstevel@tonic-gate 	&mod_syscallops,
1717c478bd9Sstevel@tonic-gate 	"kernel Async I/O",
1727c478bd9Sstevel@tonic-gate 	&kaio_sysent
1737c478bd9Sstevel@tonic-gate };
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1767c478bd9Sstevel@tonic-gate static struct modlsys modlsys32 = {
1777c478bd9Sstevel@tonic-gate 	&mod_syscallops32,
1787c478bd9Sstevel@tonic-gate 	"kernel Async I/O for 32 bit compatibility",
1797c478bd9Sstevel@tonic-gate 	&kaio_sysent32
1807c478bd9Sstevel@tonic-gate };
1817c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1827c478bd9Sstevel@tonic-gate 
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = {
1857c478bd9Sstevel@tonic-gate 	MODREV_1,
1867c478bd9Sstevel@tonic-gate 	&modlsys,
1877c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1887c478bd9Sstevel@tonic-gate 	&modlsys32,
1897c478bd9Sstevel@tonic-gate #endif
1907c478bd9Sstevel@tonic-gate 	NULL
1917c478bd9Sstevel@tonic-gate };
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate int
1947c478bd9Sstevel@tonic-gate _init(void)
1957c478bd9Sstevel@tonic-gate {
1967c478bd9Sstevel@tonic-gate 	int retval;
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate 	if ((retval = mod_install(&modlinkage)) != 0)
1997c478bd9Sstevel@tonic-gate 		return (retval);
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	return (0);
2027c478bd9Sstevel@tonic-gate }
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate int
2057c478bd9Sstevel@tonic-gate _fini(void)
2067c478bd9Sstevel@tonic-gate {
2077c478bd9Sstevel@tonic-gate 	int retval;
2087c478bd9Sstevel@tonic-gate 
2097c478bd9Sstevel@tonic-gate 	retval = mod_remove(&modlinkage);
2107c478bd9Sstevel@tonic-gate 
2117c478bd9Sstevel@tonic-gate 	return (retval);
2127c478bd9Sstevel@tonic-gate }
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate int
2157c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
2167c478bd9Sstevel@tonic-gate {
2177c478bd9Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
2187c478bd9Sstevel@tonic-gate }
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate #ifdef	_LP64
2217c478bd9Sstevel@tonic-gate static int64_t
2227c478bd9Sstevel@tonic-gate kaioc(
2237c478bd9Sstevel@tonic-gate 	long	a0,
2247c478bd9Sstevel@tonic-gate 	long	a1,
2257c478bd9Sstevel@tonic-gate 	long	a2,
2267c478bd9Sstevel@tonic-gate 	long	a3,
2277c478bd9Sstevel@tonic-gate 	long	a4,
2287c478bd9Sstevel@tonic-gate 	long	a5)
2297c478bd9Sstevel@tonic-gate {
2307c478bd9Sstevel@tonic-gate 	int	error;
2317c478bd9Sstevel@tonic-gate 	long	rval = 0;
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	switch ((int)a0 & ~AIO_POLL_BIT) {
2347c478bd9Sstevel@tonic-gate 	case AIOREAD:
2357c478bd9Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2367c478bd9Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
2377c478bd9Sstevel@tonic-gate 		break;
2387c478bd9Sstevel@tonic-gate 	case AIOWRITE:
2397c478bd9Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2407c478bd9Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
2417c478bd9Sstevel@tonic-gate 		break;
2427c478bd9Sstevel@tonic-gate 	case AIOWAIT:
2437c478bd9Sstevel@tonic-gate 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
2447c478bd9Sstevel@tonic-gate 		break;
2457c478bd9Sstevel@tonic-gate 	case AIOWAITN:
2467c478bd9Sstevel@tonic-gate 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
2477c478bd9Sstevel@tonic-gate 		    (timespec_t *)a4);
2487c478bd9Sstevel@tonic-gate 		break;
2497c478bd9Sstevel@tonic-gate 	case AIONOTIFY:
2507c478bd9Sstevel@tonic-gate 		error = aionotify();
2517c478bd9Sstevel@tonic-gate 		break;
2527c478bd9Sstevel@tonic-gate 	case AIOINIT:
2537c478bd9Sstevel@tonic-gate 		error = aioinit();
2547c478bd9Sstevel@tonic-gate 		break;
2557c478bd9Sstevel@tonic-gate 	case AIOSTART:
2567c478bd9Sstevel@tonic-gate 		error = aiostart();
2577c478bd9Sstevel@tonic-gate 		break;
2587c478bd9Sstevel@tonic-gate 	case AIOLIO:
2597c478bd9Sstevel@tonic-gate 		error = alio((int)a0, (int)a1, (aiocb_t **)a2, (int)a3,
2607c478bd9Sstevel@tonic-gate 		    (struct sigevent *)a4);
2617c478bd9Sstevel@tonic-gate 		break;
2627c478bd9Sstevel@tonic-gate 	case AIOLIOWAIT:
2637c478bd9Sstevel@tonic-gate 		error = aliowait((int)a1, (void *)a2, (int)a3,
2647c478bd9Sstevel@tonic-gate 		    (struct sigevent *)a4, AIO_64);
2657c478bd9Sstevel@tonic-gate 		break;
2667c478bd9Sstevel@tonic-gate 	case AIOSUSPEND:
2677c478bd9Sstevel@tonic-gate 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
2687c478bd9Sstevel@tonic-gate 		    (int)a4, &rval, AIO_64);
2697c478bd9Sstevel@tonic-gate 		break;
2707c478bd9Sstevel@tonic-gate 	case AIOERROR:
2717c478bd9Sstevel@tonic-gate 		error = aioerror((void *)a1, AIO_64);
2727c478bd9Sstevel@tonic-gate 		break;
2737c478bd9Sstevel@tonic-gate 	case AIOAREAD:
2747c478bd9Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
2757c478bd9Sstevel@tonic-gate 		break;
2767c478bd9Sstevel@tonic-gate 	case AIOAWRITE:
2777c478bd9Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
2787c478bd9Sstevel@tonic-gate 		break;
2797c478bd9Sstevel@tonic-gate 	case AIOCANCEL:
2807c478bd9Sstevel@tonic-gate 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
2817c478bd9Sstevel@tonic-gate 		break;
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 	/*
2847c478bd9Sstevel@tonic-gate 	 * The large file related stuff is valid only for
2857c478bd9Sstevel@tonic-gate 	 * 32 bit kernel and not for 64 bit kernel
2867c478bd9Sstevel@tonic-gate 	 * On 64 bit kernel we convert large file calls
2877c478bd9Sstevel@tonic-gate 	 * to regular 64bit calls.
2887c478bd9Sstevel@tonic-gate 	 */
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 	default:
2917c478bd9Sstevel@tonic-gate 		error = EINVAL;
2927c478bd9Sstevel@tonic-gate 	}
2937c478bd9Sstevel@tonic-gate 	if (error)
2947c478bd9Sstevel@tonic-gate 		return ((int64_t)set_errno(error));
2957c478bd9Sstevel@tonic-gate 	return (rval);
2967c478bd9Sstevel@tonic-gate }
2977c478bd9Sstevel@tonic-gate #endif
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate static int
3007c478bd9Sstevel@tonic-gate kaio(
3017c478bd9Sstevel@tonic-gate 	ulong_t *uap,
3027c478bd9Sstevel@tonic-gate 	rval_t *rvp)
3037c478bd9Sstevel@tonic-gate {
3047c478bd9Sstevel@tonic-gate 	long rval = 0;
3057c478bd9Sstevel@tonic-gate 	int	error = 0;
3067c478bd9Sstevel@tonic-gate 	offset_t	off;
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate 		rvp->r_vals = 0;
3107c478bd9Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
3117c478bd9Sstevel@tonic-gate 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
3127c478bd9Sstevel@tonic-gate #else
3137c478bd9Sstevel@tonic-gate 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
3147c478bd9Sstevel@tonic-gate #endif
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate 	switch (uap[0] & ~AIO_POLL_BIT) {
3177c478bd9Sstevel@tonic-gate 	/*
3187c478bd9Sstevel@tonic-gate 	 * It must be the 32 bit system call on 64 bit kernel
3197c478bd9Sstevel@tonic-gate 	 */
3207c478bd9Sstevel@tonic-gate 	case AIOREAD:
3217c478bd9Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3227c478bd9Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
3237c478bd9Sstevel@tonic-gate 	case AIOWRITE:
3247c478bd9Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3257c478bd9Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
3267c478bd9Sstevel@tonic-gate 	case AIOWAIT:
3277c478bd9Sstevel@tonic-gate 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
3287c478bd9Sstevel@tonic-gate 		    &rval);
3297c478bd9Sstevel@tonic-gate 		break;
3307c478bd9Sstevel@tonic-gate 	case AIOWAITN:
3317c478bd9Sstevel@tonic-gate 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
3327c478bd9Sstevel@tonic-gate 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
3337c478bd9Sstevel@tonic-gate 		break;
3347c478bd9Sstevel@tonic-gate 	case AIONOTIFY:
3357c478bd9Sstevel@tonic-gate 		return (aionotify());
3367c478bd9Sstevel@tonic-gate 	case AIOINIT:
3377c478bd9Sstevel@tonic-gate 		return (aioinit());
3387c478bd9Sstevel@tonic-gate 	case AIOSTART:
3397c478bd9Sstevel@tonic-gate 		return (aiostart());
3407c478bd9Sstevel@tonic-gate 	case AIOLIO:
3417c478bd9Sstevel@tonic-gate 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
3427c478bd9Sstevel@tonic-gate 		    (void *)uap[4]));
3437c478bd9Sstevel@tonic-gate 	case AIOLIOWAIT:
3447c478bd9Sstevel@tonic-gate 		return (aliowait((int)uap[1], (void *)uap[2],
3457c478bd9Sstevel@tonic-gate 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
3467c478bd9Sstevel@tonic-gate 	case AIOSUSPEND:
3477c478bd9Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3487c478bd9Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4],
3497c478bd9Sstevel@tonic-gate 		    &rval, AIO_32);
3507c478bd9Sstevel@tonic-gate 		break;
3517c478bd9Sstevel@tonic-gate 	case AIOERROR:
3527c478bd9Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_32));
3537c478bd9Sstevel@tonic-gate 	case AIOAREAD:
3547c478bd9Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3557c478bd9Sstevel@tonic-gate 		    FREAD, AIO_32));
3567c478bd9Sstevel@tonic-gate 	case AIOAWRITE:
3577c478bd9Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3587c478bd9Sstevel@tonic-gate 		    FWRITE, AIO_32));
3597c478bd9Sstevel@tonic-gate 	case AIOCANCEL:
3607c478bd9Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
3617c478bd9Sstevel@tonic-gate 		    AIO_32));
3627c478bd9Sstevel@tonic-gate 		break;
3637c478bd9Sstevel@tonic-gate 	case AIOLIO64:
3647c478bd9Sstevel@tonic-gate 		return (alioLF((int)uap[1], (void *)uap[2],
3657c478bd9Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4]));
3667c478bd9Sstevel@tonic-gate 	case AIOLIOWAIT64:
3677c478bd9Sstevel@tonic-gate 		return (aliowait(uap[1], (void *)uap[2],
3687c478bd9Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
3697c478bd9Sstevel@tonic-gate 	case AIOSUSPEND64:
3707c478bd9Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3717c478bd9Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4], &rval,
3727c478bd9Sstevel@tonic-gate 		    AIO_LARGEFILE);
3737c478bd9Sstevel@tonic-gate 		break;
3747c478bd9Sstevel@tonic-gate 	case AIOERROR64:
3757c478bd9Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
3767c478bd9Sstevel@tonic-gate 	case AIOAREAD64:
3777c478bd9Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
3787c478bd9Sstevel@tonic-gate 		    AIO_LARGEFILE));
3797c478bd9Sstevel@tonic-gate 	case AIOAWRITE64:
3807c478bd9Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
3817c478bd9Sstevel@tonic-gate 		    AIO_LARGEFILE));
3827c478bd9Sstevel@tonic-gate 	case AIOCANCEL64:
3837c478bd9Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2],
3847c478bd9Sstevel@tonic-gate 		    &rval, AIO_LARGEFILE));
3857c478bd9Sstevel@tonic-gate 		break;
3867c478bd9Sstevel@tonic-gate 	default:
3877c478bd9Sstevel@tonic-gate 		return (EINVAL);
3887c478bd9Sstevel@tonic-gate 	}
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	rvp->r_val1 = rval;
3917c478bd9Sstevel@tonic-gate 	return (error);
3927c478bd9Sstevel@tonic-gate }
3937c478bd9Sstevel@tonic-gate 
3947c478bd9Sstevel@tonic-gate /*
3957c478bd9Sstevel@tonic-gate  * wake up LWPs in this process that are sleeping in
3967c478bd9Sstevel@tonic-gate  * aiowait().
3977c478bd9Sstevel@tonic-gate  */
3987c478bd9Sstevel@tonic-gate static int
3997c478bd9Sstevel@tonic-gate aionotify(void)
4007c478bd9Sstevel@tonic-gate {
4017c478bd9Sstevel@tonic-gate 	aio_t	*aiop;
4027c478bd9Sstevel@tonic-gate 
4037c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
4047c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
4057c478bd9Sstevel@tonic-gate 		return (0);
4067c478bd9Sstevel@tonic-gate 
4077c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
4087c478bd9Sstevel@tonic-gate 	aiop->aio_notifycnt++;
4097c478bd9Sstevel@tonic-gate 	cv_broadcast(&aiop->aio_waitcv);
4107c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate 	return (0);
4137c478bd9Sstevel@tonic-gate }
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate static int
4167c478bd9Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
4177c478bd9Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4187c478bd9Sstevel@tonic-gate {
4197c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4207c478bd9Sstevel@tonic-gate 	struct timeval32 wait_time_32;
4217c478bd9Sstevel@tonic-gate #endif
4227c478bd9Sstevel@tonic-gate 	struct timeval wait_time;
4237c478bd9Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4247c478bd9Sstevel@tonic-gate 
4257c478bd9Sstevel@tonic-gate 	*rqtp = NULL;
4267c478bd9Sstevel@tonic-gate 	if (timout == NULL) {		/* wait indefinitely */
4277c478bd9Sstevel@tonic-gate 		*blocking = 1;
4287c478bd9Sstevel@tonic-gate 		return (0);
4297c478bd9Sstevel@tonic-gate 	}
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate 	/*
4327c478bd9Sstevel@tonic-gate 	 * Need to correctly compare with the -1 passed in for a user
4337c478bd9Sstevel@tonic-gate 	 * address pointer, with both 32 bit and 64 bit apps.
4347c478bd9Sstevel@tonic-gate 	 */
4357c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4367c478bd9Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
4377c478bd9Sstevel@tonic-gate 			*blocking = 0;
4387c478bd9Sstevel@tonic-gate 			return (0);
4397c478bd9Sstevel@tonic-gate 		}
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 		if (copyin(timout, &wait_time, sizeof (wait_time)))
4427c478bd9Sstevel@tonic-gate 			return (EFAULT);
4437c478bd9Sstevel@tonic-gate 	}
4447c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4457c478bd9Sstevel@tonic-gate 	else {
4467c478bd9Sstevel@tonic-gate 		/*
4477c478bd9Sstevel@tonic-gate 		 * -1 from a 32bit app. It will not get sign extended.
4487c478bd9Sstevel@tonic-gate 		 * don't wait if -1.
4497c478bd9Sstevel@tonic-gate 		 */
4507c478bd9Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
4517c478bd9Sstevel@tonic-gate 			*blocking = 0;
4527c478bd9Sstevel@tonic-gate 			return (0);
4537c478bd9Sstevel@tonic-gate 		}
4547c478bd9Sstevel@tonic-gate 
4557c478bd9Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4567c478bd9Sstevel@tonic-gate 			return (EFAULT);
4577c478bd9Sstevel@tonic-gate 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
4587c478bd9Sstevel@tonic-gate 	}
4597c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
4607c478bd9Sstevel@tonic-gate 
4617c478bd9Sstevel@tonic-gate 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
4627c478bd9Sstevel@tonic-gate 		*blocking = 0;
4637c478bd9Sstevel@tonic-gate 		return (0);
4647c478bd9Sstevel@tonic-gate 	}
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 	if (wait_time.tv_sec < 0 ||
4677c478bd9Sstevel@tonic-gate 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
4687c478bd9Sstevel@tonic-gate 		return (EINVAL);
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate 	rqtime->tv_sec = wait_time.tv_sec;
4717c478bd9Sstevel@tonic-gate 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
4727c478bd9Sstevel@tonic-gate 	*rqtp = rqtime;
4737c478bd9Sstevel@tonic-gate 	*blocking = 1;
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate 	return (0);
4767c478bd9Sstevel@tonic-gate }
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate static int
4797c478bd9Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
4807c478bd9Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4817c478bd9Sstevel@tonic-gate {
4827c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4837c478bd9Sstevel@tonic-gate 	timespec32_t wait_time_32;
4847c478bd9Sstevel@tonic-gate #endif
4857c478bd9Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate 	*rqtp = NULL;
4887c478bd9Sstevel@tonic-gate 	if (timout == NULL) {
4897c478bd9Sstevel@tonic-gate 		*blocking = 1;
4907c478bd9Sstevel@tonic-gate 		return (0);
4917c478bd9Sstevel@tonic-gate 	}
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4947c478bd9Sstevel@tonic-gate 		if (copyin(timout, rqtime, sizeof (*rqtime)))
4957c478bd9Sstevel@tonic-gate 			return (EFAULT);
4967c478bd9Sstevel@tonic-gate 	}
4977c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4987c478bd9Sstevel@tonic-gate 	else {
4997c478bd9Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
5007c478bd9Sstevel@tonic-gate 			return (EFAULT);
5017c478bd9Sstevel@tonic-gate 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
5027c478bd9Sstevel@tonic-gate 	}
5037c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
5067c478bd9Sstevel@tonic-gate 		*blocking = 0;
5077c478bd9Sstevel@tonic-gate 		return (0);
5087c478bd9Sstevel@tonic-gate 	}
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate 	if (rqtime->tv_sec < 0 ||
5117c478bd9Sstevel@tonic-gate 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
5127c478bd9Sstevel@tonic-gate 		return (EINVAL);
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 	*rqtp = rqtime;
5157c478bd9Sstevel@tonic-gate 	*blocking = 1;
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate 	return (0);
5187c478bd9Sstevel@tonic-gate }
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5217c478bd9Sstevel@tonic-gate static int
5227c478bd9Sstevel@tonic-gate aiowait(
5237c478bd9Sstevel@tonic-gate 	struct timeval	*timout,
5247c478bd9Sstevel@tonic-gate 	int	dontblockflg,
5257c478bd9Sstevel@tonic-gate 	long	*rval)
5267c478bd9Sstevel@tonic-gate {
5277c478bd9Sstevel@tonic-gate 	int 		error;
5287c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
5297c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
5307c478bd9Sstevel@tonic-gate 	clock_t		status;
5317c478bd9Sstevel@tonic-gate 	int		blocking;
5327c478bd9Sstevel@tonic-gate 	int		timecheck;
5337c478bd9Sstevel@tonic-gate 	timestruc_t	rqtime;
5347c478bd9Sstevel@tonic-gate 	timestruc_t	*rqtp;
5357c478bd9Sstevel@tonic-gate 
5367c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
5377c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
5387c478bd9Sstevel@tonic-gate 		return (EINVAL);
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate 	/*
5417c478bd9Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
5427c478bd9Sstevel@tonic-gate 	 */
5437c478bd9Sstevel@tonic-gate 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
5447c478bd9Sstevel@tonic-gate 	if (error)
5457c478bd9Sstevel@tonic-gate 		return (error);
5467c478bd9Sstevel@tonic-gate 	if (rqtp) {
5477c478bd9Sstevel@tonic-gate 		timestruc_t now;
5487c478bd9Sstevel@tonic-gate 		timecheck = timechanged;
5497c478bd9Sstevel@tonic-gate 		gethrestime(&now);
5507c478bd9Sstevel@tonic-gate 		timespecadd(rqtp, &now);
5517c478bd9Sstevel@tonic-gate 	}
5527c478bd9Sstevel@tonic-gate 
5537c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
5547c478bd9Sstevel@tonic-gate 	for (;;) {
5557c478bd9Sstevel@tonic-gate 		/* process requests on poll queue */
5567c478bd9Sstevel@tonic-gate 		if (aiop->aio_pollq) {
5577c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
5587c478bd9Sstevel@tonic-gate 			aio_cleanup(0);
5597c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
5607c478bd9Sstevel@tonic-gate 		}
5617c478bd9Sstevel@tonic-gate 		if ((reqp = aio_req_remove(NULL)) != NULL) {
5627c478bd9Sstevel@tonic-gate 			*rval = (long)reqp->aio_req_resultp;
5637c478bd9Sstevel@tonic-gate 			break;
5647c478bd9Sstevel@tonic-gate 		}
5657c478bd9Sstevel@tonic-gate 		/* user-level done queue might not be empty */
5667c478bd9Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
5677c478bd9Sstevel@tonic-gate 			aiop->aio_notifycnt--;
5687c478bd9Sstevel@tonic-gate 			*rval = 1;
5697c478bd9Sstevel@tonic-gate 			break;
5707c478bd9Sstevel@tonic-gate 		}
5717c478bd9Sstevel@tonic-gate 		/* don't block if no outstanding aio */
5727c478bd9Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0 && dontblockflg) {
5737c478bd9Sstevel@tonic-gate 			error = EINVAL;
5747c478bd9Sstevel@tonic-gate 			break;
5757c478bd9Sstevel@tonic-gate 		}
5767c478bd9Sstevel@tonic-gate 		if (blocking) {
5777c478bd9Sstevel@tonic-gate 			status = cv_waituntil_sig(&aiop->aio_waitcv,
5787c478bd9Sstevel@tonic-gate 			    &aiop->aio_mutex, rqtp, timecheck);
5797c478bd9Sstevel@tonic-gate 
5807c478bd9Sstevel@tonic-gate 			if (status > 0)		/* check done queue again */
5817c478bd9Sstevel@tonic-gate 				continue;
5827c478bd9Sstevel@tonic-gate 			if (status == 0) {	/* interrupted by a signal */
5837c478bd9Sstevel@tonic-gate 				error = EINTR;
5847c478bd9Sstevel@tonic-gate 				*rval = -1;
5857c478bd9Sstevel@tonic-gate 			} else {		/* timer expired */
5867c478bd9Sstevel@tonic-gate 				error = ETIME;
5877c478bd9Sstevel@tonic-gate 			}
5887c478bd9Sstevel@tonic-gate 		}
5897c478bd9Sstevel@tonic-gate 		break;
5907c478bd9Sstevel@tonic-gate 	}
5917c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
5927c478bd9Sstevel@tonic-gate 	if (reqp) {
5937c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
5947c478bd9Sstevel@tonic-gate 		aio_copyout_result(reqp);
5957c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
5967c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
5977c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
5987c478bd9Sstevel@tonic-gate 	}
5997c478bd9Sstevel@tonic-gate 	return (error);
6007c478bd9Sstevel@tonic-gate }
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate /*
6037c478bd9Sstevel@tonic-gate  * aiowaitn can be used to reap completed asynchronous requests submitted with
6047c478bd9Sstevel@tonic-gate  * lio_listio, aio_read or aio_write.
6057c478bd9Sstevel@tonic-gate  * This function only reaps asynchronous raw I/Os.
6067c478bd9Sstevel@tonic-gate  */
6077c478bd9Sstevel@tonic-gate 
6087c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6097c478bd9Sstevel@tonic-gate static int
6107c478bd9Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
6117c478bd9Sstevel@tonic-gate {
6127c478bd9Sstevel@tonic-gate 	int 		error = 0;
6137c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
6147c478bd9Sstevel@tonic-gate 	aio_req_t	*reqlist = NULL;
6157c478bd9Sstevel@tonic-gate 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
6167c478bd9Sstevel@tonic-gate 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
6177c478bd9Sstevel@tonic-gate 	size_t		iocbsz;			/* users iocb size */
6187c478bd9Sstevel@tonic-gate 	size_t		riocbsz;		/* returned iocb size */
6197c478bd9Sstevel@tonic-gate 	int		iocb_index = 0;
6207c478bd9Sstevel@tonic-gate 	model_t		model = get_udatamodel();
6217c478bd9Sstevel@tonic-gate 	int		blocking = 1;
6227c478bd9Sstevel@tonic-gate 	int		timecheck;
6237c478bd9Sstevel@tonic-gate 	timestruc_t	rqtime;
6247c478bd9Sstevel@tonic-gate 	timestruc_t	*rqtp;
6257c478bd9Sstevel@tonic-gate 
6267c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
6277c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
6287c478bd9Sstevel@tonic-gate 		return (EINVAL);
6297c478bd9Sstevel@tonic-gate 
6307c478bd9Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0)
6317c478bd9Sstevel@tonic-gate 		return (EAGAIN);
6327c478bd9Sstevel@tonic-gate 
6337c478bd9Sstevel@tonic-gate 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
6347c478bd9Sstevel@tonic-gate 		return (EFAULT);
6357c478bd9Sstevel@tonic-gate 
6367c478bd9Sstevel@tonic-gate 	/* set *nwait to zero, if we must return prematurely */
6377c478bd9Sstevel@tonic-gate 	if (copyout(&cnt, nwait, sizeof (uint_t)))
6387c478bd9Sstevel@tonic-gate 		return (EFAULT);
6397c478bd9Sstevel@tonic-gate 
6407c478bd9Sstevel@tonic-gate 	if (waitcnt == 0) {
6417c478bd9Sstevel@tonic-gate 		blocking = 0;
6427c478bd9Sstevel@tonic-gate 		rqtp = NULL;
6437c478bd9Sstevel@tonic-gate 		waitcnt = nent;
6447c478bd9Sstevel@tonic-gate 	} else {
6457c478bd9Sstevel@tonic-gate 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
6467c478bd9Sstevel@tonic-gate 		if (error)
6477c478bd9Sstevel@tonic-gate 			return (error);
6487c478bd9Sstevel@tonic-gate 	}
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
6517c478bd9Sstevel@tonic-gate 		iocbsz = (sizeof (aiocb_t *) * nent);
6527c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
6537c478bd9Sstevel@tonic-gate 	else
6547c478bd9Sstevel@tonic-gate 		iocbsz = (sizeof (caddr32_t) * nent);
6557c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
6567c478bd9Sstevel@tonic-gate 
6577c478bd9Sstevel@tonic-gate 	/*
6587c478bd9Sstevel@tonic-gate 	 * Only one aio_waitn call is allowed at a time.
6597c478bd9Sstevel@tonic-gate 	 * The active aio_waitn will collect all requests
6607c478bd9Sstevel@tonic-gate 	 * out of the "done" list and if necessary it will wait
6617c478bd9Sstevel@tonic-gate 	 * for some/all pending requests to fulfill the nwait
6627c478bd9Sstevel@tonic-gate 	 * parameter.
6637c478bd9Sstevel@tonic-gate 	 * A second or further aio_waitn calls will sleep here
6647c478bd9Sstevel@tonic-gate 	 * until the active aio_waitn finishes and leaves the kernel
6657c478bd9Sstevel@tonic-gate 	 * If the second call does not block (poll), then return
6667c478bd9Sstevel@tonic-gate 	 * immediately with the error code : EAGAIN.
6677c478bd9Sstevel@tonic-gate 	 * If the second call should block, then sleep here, but
6687c478bd9Sstevel@tonic-gate 	 * do not touch the timeout. The timeout starts when this
6697c478bd9Sstevel@tonic-gate 	 * aio_waitn-call becomes active.
6707c478bd9Sstevel@tonic-gate 	 */
6717c478bd9Sstevel@tonic-gate 
6727c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
6737c478bd9Sstevel@tonic-gate 
6747c478bd9Sstevel@tonic-gate 	while (aiop->aio_flags & AIO_WAITN) {
6757c478bd9Sstevel@tonic-gate 		if (blocking == 0) {
6767c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6777c478bd9Sstevel@tonic-gate 			return (EAGAIN);
6787c478bd9Sstevel@tonic-gate 		}
6797c478bd9Sstevel@tonic-gate 
6807c478bd9Sstevel@tonic-gate 		/* block, no timeout */
6817c478bd9Sstevel@tonic-gate 		aiop->aio_flags |= AIO_WAITN_PENDING;
6827c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
6837c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6847c478bd9Sstevel@tonic-gate 			return (EINTR);
6857c478bd9Sstevel@tonic-gate 		}
6867c478bd9Sstevel@tonic-gate 	}
6877c478bd9Sstevel@tonic-gate 
6887c478bd9Sstevel@tonic-gate 	/*
6897c478bd9Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
6907c478bd9Sstevel@tonic-gate 	 */
6917c478bd9Sstevel@tonic-gate 	if (rqtp) {
6927c478bd9Sstevel@tonic-gate 		timestruc_t now;
6937c478bd9Sstevel@tonic-gate 		timecheck = timechanged;
6947c478bd9Sstevel@tonic-gate 		gethrestime(&now);
6957c478bd9Sstevel@tonic-gate 		timespecadd(rqtp, &now);
6967c478bd9Sstevel@tonic-gate 	}
6977c478bd9Sstevel@tonic-gate 
6987c478bd9Sstevel@tonic-gate 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
6997c478bd9Sstevel@tonic-gate 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
7007c478bd9Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
7017c478bd9Sstevel@tonic-gate 	}
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate 	if (aiop->aio_iocb == NULL) {
7047c478bd9Sstevel@tonic-gate 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
7057c478bd9Sstevel@tonic-gate 		if (iocblist == NULL) {
7067c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7077c478bd9Sstevel@tonic-gate 			return (ENOMEM);
7087c478bd9Sstevel@tonic-gate 		}
7097c478bd9Sstevel@tonic-gate 		aiop->aio_iocb = (aiocb_t **)iocblist;
7107c478bd9Sstevel@tonic-gate 		aiop->aio_iocbsz = iocbsz;
7117c478bd9Sstevel@tonic-gate 	} else {
7127c478bd9Sstevel@tonic-gate 		iocblist = (char *)aiop->aio_iocb;
7137c478bd9Sstevel@tonic-gate 	}
7147c478bd9Sstevel@tonic-gate 
7157c478bd9Sstevel@tonic-gate 	aiop->aio_waitncnt = waitcnt;
7167c478bd9Sstevel@tonic-gate 	aiop->aio_flags |= AIO_WAITN;
7177c478bd9Sstevel@tonic-gate 
7187c478bd9Sstevel@tonic-gate 	for (;;) {
7197c478bd9Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
7207c478bd9Sstevel@tonic-gate 		if (aiop->aio_pollq) {
7217c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7227c478bd9Sstevel@tonic-gate 			aio_cleanup(0);
7237c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
7247c478bd9Sstevel@tonic-gate 		}
7257c478bd9Sstevel@tonic-gate 
7267c478bd9Sstevel@tonic-gate 		/* check for requests on done queue */
7277c478bd9Sstevel@tonic-gate 		if (aiop->aio_doneq) {
7287c478bd9Sstevel@tonic-gate 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
7297c478bd9Sstevel@tonic-gate 			aiop->aio_waitncnt = waitcnt - cnt;
7307c478bd9Sstevel@tonic-gate 		}
7317c478bd9Sstevel@tonic-gate 
7327c478bd9Sstevel@tonic-gate 		/* user-level done queue might not be empty */
7337c478bd9Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
7347c478bd9Sstevel@tonic-gate 			aiop->aio_notifycnt--;
7357c478bd9Sstevel@tonic-gate 			error = 0;
7367c478bd9Sstevel@tonic-gate 			break;
7377c478bd9Sstevel@tonic-gate 		}
7387c478bd9Sstevel@tonic-gate 
7397c478bd9Sstevel@tonic-gate 		/*
7407c478bd9Sstevel@tonic-gate 		 * if we are here second time as a result of timer
7417c478bd9Sstevel@tonic-gate 		 * expiration, we reset error if there are enough
7427c478bd9Sstevel@tonic-gate 		 * aiocb's to satisfy request.
7437c478bd9Sstevel@tonic-gate 		 * We return also if all requests are already done
7447c478bd9Sstevel@tonic-gate 		 * and we picked up the whole done queue.
7457c478bd9Sstevel@tonic-gate 		 */
7467c478bd9Sstevel@tonic-gate 
7477c478bd9Sstevel@tonic-gate 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
7487c478bd9Sstevel@tonic-gate 		    aiop->aio_doneq == NULL)) {
7497c478bd9Sstevel@tonic-gate 			error = 0;
7507c478bd9Sstevel@tonic-gate 			break;
7517c478bd9Sstevel@tonic-gate 		}
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate 		if ((cnt < waitcnt) && blocking) {
7547c478bd9Sstevel@tonic-gate 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
7557c478bd9Sstevel@tonic-gate 				&aiop->aio_mutex, rqtp, timecheck);
7567c478bd9Sstevel@tonic-gate 			if (rval > 0)
7577c478bd9Sstevel@tonic-gate 				continue;
7587c478bd9Sstevel@tonic-gate 			if (rval < 0) {
7597c478bd9Sstevel@tonic-gate 				error = ETIME;
7607c478bd9Sstevel@tonic-gate 				blocking = 0;
7617c478bd9Sstevel@tonic-gate 				continue;
7627c478bd9Sstevel@tonic-gate 			}
7637c478bd9Sstevel@tonic-gate 			error = EINTR;
7647c478bd9Sstevel@tonic-gate 		}
7657c478bd9Sstevel@tonic-gate 		break;
7667c478bd9Sstevel@tonic-gate 	}
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7697c478bd9Sstevel@tonic-gate 
7707c478bd9Sstevel@tonic-gate 	if (cnt > 0) {
7717c478bd9Sstevel@tonic-gate 
7727c478bd9Sstevel@tonic-gate 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
7737c478bd9Sstevel@tonic-gate 		    aiop, model);
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE)
7767c478bd9Sstevel@tonic-gate 			riocbsz = (sizeof (aiocb_t *) * cnt);
7777c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
7787c478bd9Sstevel@tonic-gate 		else
7797c478bd9Sstevel@tonic-gate 			riocbsz = (sizeof (caddr32_t) * cnt);
7807c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
7817c478bd9Sstevel@tonic-gate 
7827c478bd9Sstevel@tonic-gate 		if (copyout(iocblist, uiocb, riocbsz) ||
7837c478bd9Sstevel@tonic-gate 		    copyout(&cnt, nwait, sizeof (uint_t)))
7847c478bd9Sstevel@tonic-gate 			error = EFAULT;
7857c478bd9Sstevel@tonic-gate 	}
7867c478bd9Sstevel@tonic-gate 
7877c478bd9Sstevel@tonic-gate 	if (aiop->aio_iocbsz > AIO_IOCB_MAX) {
7887c478bd9Sstevel@tonic-gate 		kmem_free(iocblist, aiop->aio_iocbsz);
7897c478bd9Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
7907c478bd9Sstevel@tonic-gate 	}
7917c478bd9Sstevel@tonic-gate 
7927c478bd9Sstevel@tonic-gate 	/* check if there is another thread waiting for execution */
7937c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7947c478bd9Sstevel@tonic-gate 	aiop->aio_flags &= ~AIO_WAITN;
7957c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
7967c478bd9Sstevel@tonic-gate 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
7977c478bd9Sstevel@tonic-gate 		cv_signal(&aiop->aio_waitncv);
7987c478bd9Sstevel@tonic-gate 	}
7997c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
8007c478bd9Sstevel@tonic-gate 
8017c478bd9Sstevel@tonic-gate 	return (error);
8027c478bd9Sstevel@tonic-gate }
8037c478bd9Sstevel@tonic-gate 
8047c478bd9Sstevel@tonic-gate /*
8057c478bd9Sstevel@tonic-gate  * aio_unlock_requests
8067c478bd9Sstevel@tonic-gate  * copyouts the result of the request as well as the return value.
8077c478bd9Sstevel@tonic-gate  * It builds the list of completed asynchronous requests,
8087c478bd9Sstevel@tonic-gate  * unlocks the allocated memory ranges and
8097c478bd9Sstevel@tonic-gate  * put the aio request structure back into the free list.
8107c478bd9Sstevel@tonic-gate  */
8117c478bd9Sstevel@tonic-gate 
8127c478bd9Sstevel@tonic-gate static int
8137c478bd9Sstevel@tonic-gate aio_unlock_requests(
8147c478bd9Sstevel@tonic-gate 	caddr_t	iocblist,
8157c478bd9Sstevel@tonic-gate 	int	iocb_index,
8167c478bd9Sstevel@tonic-gate 	aio_req_t *reqlist,
8177c478bd9Sstevel@tonic-gate 	aio_t	*aiop,
8187c478bd9Sstevel@tonic-gate 	model_t	model)
8197c478bd9Sstevel@tonic-gate {
8207c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp, *nreqp;
8217c478bd9Sstevel@tonic-gate 
8227c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
8237c478bd9Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8247c478bd9Sstevel@tonic-gate 			(((caddr_t *)iocblist)[iocb_index++]) =
8257c478bd9Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb;
8267c478bd9Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8277c478bd9Sstevel@tonic-gate 			aphysio_unlock(reqp);
8287c478bd9Sstevel@tonic-gate 			aio_copyout_result(reqp);
8297c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8307c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8317c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8327c478bd9Sstevel@tonic-gate 		}
8337c478bd9Sstevel@tonic-gate 	}
8347c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
8357c478bd9Sstevel@tonic-gate 	else {
8367c478bd9Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8377c478bd9Sstevel@tonic-gate 			((caddr32_t *)iocblist)[iocb_index++] =
8387c478bd9Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb32;
8397c478bd9Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8407c478bd9Sstevel@tonic-gate 			aphysio_unlock(reqp);
8417c478bd9Sstevel@tonic-gate 			aio_copyout_result(reqp);
8427c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8437c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8447c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8457c478bd9Sstevel@tonic-gate 		}
8467c478bd9Sstevel@tonic-gate 	}
8477c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
8487c478bd9Sstevel@tonic-gate 	return (iocb_index);
8497c478bd9Sstevel@tonic-gate }
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate /*
8527c478bd9Sstevel@tonic-gate  * aio_reqlist_concat
8537c478bd9Sstevel@tonic-gate  * moves "max" elements from the done queue to the reqlist queue and removes
8547c478bd9Sstevel@tonic-gate  * the AIO_DONEQ flag.
8557c478bd9Sstevel@tonic-gate  * - reqlist queue is a simple linked list
8567c478bd9Sstevel@tonic-gate  * - done queue is a double linked list
8577c478bd9Sstevel@tonic-gate  */
8587c478bd9Sstevel@tonic-gate 
8597c478bd9Sstevel@tonic-gate static int
8607c478bd9Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
8617c478bd9Sstevel@tonic-gate {
8627c478bd9Sstevel@tonic-gate 	aio_req_t *q2, *q2work, *list;
8637c478bd9Sstevel@tonic-gate 	int count = 0;
8647c478bd9Sstevel@tonic-gate 
8657c478bd9Sstevel@tonic-gate 	list = *reqlist;
8667c478bd9Sstevel@tonic-gate 	q2 = aiop->aio_doneq;
8677c478bd9Sstevel@tonic-gate 	q2work = q2;
8687c478bd9Sstevel@tonic-gate 	while (max-- > 0) {
8697c478bd9Sstevel@tonic-gate 		q2work->aio_req_flags &= ~AIO_DONEQ;
8707c478bd9Sstevel@tonic-gate 		q2work = q2work->aio_req_next;
8717c478bd9Sstevel@tonic-gate 		count++;
8727c478bd9Sstevel@tonic-gate 		if (q2work == q2)
8737c478bd9Sstevel@tonic-gate 			break;
8747c478bd9Sstevel@tonic-gate 	}
8757c478bd9Sstevel@tonic-gate 
8767c478bd9Sstevel@tonic-gate 	if (q2work == q2) {
8777c478bd9Sstevel@tonic-gate 		/* all elements revised */
8787c478bd9Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = list;
8797c478bd9Sstevel@tonic-gate 		list = q2;
8807c478bd9Sstevel@tonic-gate 		aiop->aio_doneq = NULL;
8817c478bd9Sstevel@tonic-gate 	} else {
8827c478bd9Sstevel@tonic-gate 		/*
8837c478bd9Sstevel@tonic-gate 		 * max < elements in the doneq
8847c478bd9Sstevel@tonic-gate 		 * detach only the required amount of elements
8857c478bd9Sstevel@tonic-gate 		 * out of the doneq
8867c478bd9Sstevel@tonic-gate 		 */
8877c478bd9Sstevel@tonic-gate 		q2work->aio_req_prev->aio_req_next = list;
8887c478bd9Sstevel@tonic-gate 		list = q2;
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate 		aiop->aio_doneq = q2work;
8917c478bd9Sstevel@tonic-gate 		q2work->aio_req_prev = q2->aio_req_prev;
8927c478bd9Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = q2work;
8937c478bd9Sstevel@tonic-gate 	}
8947c478bd9Sstevel@tonic-gate 	*reqlist = list;
8957c478bd9Sstevel@tonic-gate 	return (count);
8967c478bd9Sstevel@tonic-gate }
8977c478bd9Sstevel@tonic-gate 
8987c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8997c478bd9Sstevel@tonic-gate static int
9007c478bd9Sstevel@tonic-gate aiosuspend(
9017c478bd9Sstevel@tonic-gate 	void	*aiocb,
9027c478bd9Sstevel@tonic-gate 	int	nent,
9037c478bd9Sstevel@tonic-gate 	struct	timespec	*timout,
9047c478bd9Sstevel@tonic-gate 	int	flag,
9057c478bd9Sstevel@tonic-gate 	long	*rval,
9067c478bd9Sstevel@tonic-gate 	int	run_mode)
9077c478bd9Sstevel@tonic-gate {
9087c478bd9Sstevel@tonic-gate 	int 		error;
9097c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
9107c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp, *found, *next;
9117c478bd9Sstevel@tonic-gate 	caddr_t		cbplist = NULL;
9127c478bd9Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
9137c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9147c478bd9Sstevel@tonic-gate 	aiocb32_t	*cbp32;
9157c478bd9Sstevel@tonic-gate 	caddr32_t	*ucbp32;
9167c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9177c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
9187c478bd9Sstevel@tonic-gate 	int		rv;
9197c478bd9Sstevel@tonic-gate 	int		i;
9207c478bd9Sstevel@tonic-gate 	size_t		ssize;
9217c478bd9Sstevel@tonic-gate 	model_t		model = get_udatamodel();
9227c478bd9Sstevel@tonic-gate 	int		blocking;
9237c478bd9Sstevel@tonic-gate 	int		timecheck;
9247c478bd9Sstevel@tonic-gate 	timestruc_t	rqtime;
9257c478bd9Sstevel@tonic-gate 	timestruc_t	*rqtp;
9267c478bd9Sstevel@tonic-gate 
9277c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
9287c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0)
9297c478bd9Sstevel@tonic-gate 		return (EINVAL);
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 	/*
9327c478bd9Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
9337c478bd9Sstevel@tonic-gate 	 */
9347c478bd9Sstevel@tonic-gate 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
9357c478bd9Sstevel@tonic-gate 	if (error)
9367c478bd9Sstevel@tonic-gate 		return (error);
9377c478bd9Sstevel@tonic-gate 	if (rqtp) {
9387c478bd9Sstevel@tonic-gate 		timestruc_t now;
9397c478bd9Sstevel@tonic-gate 		timecheck = timechanged;
9407c478bd9Sstevel@tonic-gate 		gethrestime(&now);
9417c478bd9Sstevel@tonic-gate 		timespecadd(rqtp, &now);
9427c478bd9Sstevel@tonic-gate 	}
9437c478bd9Sstevel@tonic-gate 
9447c478bd9Sstevel@tonic-gate 	/*
9457c478bd9Sstevel@tonic-gate 	 * If we are not blocking and there's no IO complete
9467c478bd9Sstevel@tonic-gate 	 * skip aiocb copyin.
9477c478bd9Sstevel@tonic-gate 	 */
9487c478bd9Sstevel@tonic-gate 	if (!blocking && (aiop->aio_pollq == NULL) &&
9497c478bd9Sstevel@tonic-gate 	    (aiop->aio_doneq == NULL)) {
9507c478bd9Sstevel@tonic-gate 		return (EAGAIN);
9517c478bd9Sstevel@tonic-gate 	}
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
9547c478bd9Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
9557c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9567c478bd9Sstevel@tonic-gate 	else
9577c478bd9Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
9587c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
9617c478bd9Sstevel@tonic-gate 	if (cbplist == NULL)
9627c478bd9Sstevel@tonic-gate 		return (ENOMEM);
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
9657c478bd9Sstevel@tonic-gate 		error = EFAULT;
9667c478bd9Sstevel@tonic-gate 		goto done;
9677c478bd9Sstevel@tonic-gate 	}
9687c478bd9Sstevel@tonic-gate 
9697c478bd9Sstevel@tonic-gate 	found = NULL;
9707c478bd9Sstevel@tonic-gate 	/*
9717c478bd9Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
9727c478bd9Sstevel@tonic-gate 	 * aio_req_done().
9737c478bd9Sstevel@tonic-gate 	 */
9747c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
9757c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
9767c478bd9Sstevel@tonic-gate 	for (;;) {
9777c478bd9Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
9787c478bd9Sstevel@tonic-gate 		if (aiop->aio_pollq) {
9797c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
9807c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
9817c478bd9Sstevel@tonic-gate 			aio_cleanup(0);
9827c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
9837c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9847c478bd9Sstevel@tonic-gate 		}
9857c478bd9Sstevel@tonic-gate 		/* check for requests on done queue */
9867c478bd9Sstevel@tonic-gate 		if (aiop->aio_doneq) {
9877c478bd9Sstevel@tonic-gate 			if (model == DATAMODEL_NATIVE)
9887c478bd9Sstevel@tonic-gate 				ucbp = (aiocb_t **)cbplist;
9897c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9907c478bd9Sstevel@tonic-gate 			else
9917c478bd9Sstevel@tonic-gate 				ucbp32 = (caddr32_t *)cbplist;
9927c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9937c478bd9Sstevel@tonic-gate 			for (i = 0; i < nent; i++) {
9947c478bd9Sstevel@tonic-gate 				if (model == DATAMODEL_NATIVE) {
9957c478bd9Sstevel@tonic-gate 					if ((cbp = *ucbp++) == NULL)
9967c478bd9Sstevel@tonic-gate 						continue;
9977c478bd9Sstevel@tonic-gate 					if (run_mode != AIO_LARGEFILE)
9987c478bd9Sstevel@tonic-gate 						reqp = aio_req_done(
9997c478bd9Sstevel@tonic-gate 						    &cbp->aio_resultp);
10007c478bd9Sstevel@tonic-gate 					else {
10017c478bd9Sstevel@tonic-gate 						cbp64 = (aiocb64_32_t *)cbp;
10027c478bd9Sstevel@tonic-gate 						reqp = aio_req_done(
10037c478bd9Sstevel@tonic-gate 						    &cbp64->aio_resultp);
10047c478bd9Sstevel@tonic-gate 					}
10057c478bd9Sstevel@tonic-gate 				}
10067c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
10077c478bd9Sstevel@tonic-gate 				else {
10087c478bd9Sstevel@tonic-gate 					if (run_mode == AIO_32) {
10097c478bd9Sstevel@tonic-gate 						if ((cbp32 =
10107c478bd9Sstevel@tonic-gate 						    (aiocb32_t *)(uintptr_t)
10117c478bd9Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10127c478bd9Sstevel@tonic-gate 							continue;
10137c478bd9Sstevel@tonic-gate 						reqp = aio_req_done(
10147c478bd9Sstevel@tonic-gate 						    &cbp32->aio_resultp);
10157c478bd9Sstevel@tonic-gate 					} else if (run_mode == AIO_LARGEFILE) {
10167c478bd9Sstevel@tonic-gate 						if ((cbp64 =
10177c478bd9Sstevel@tonic-gate 						    (aiocb64_32_t *)(uintptr_t)
10187c478bd9Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10197c478bd9Sstevel@tonic-gate 							continue;
10207c478bd9Sstevel@tonic-gate 						    reqp = aio_req_done(
10217c478bd9Sstevel@tonic-gate 							&cbp64->aio_resultp);
10227c478bd9Sstevel@tonic-gate 					}
10237c478bd9Sstevel@tonic-gate 
10247c478bd9Sstevel@tonic-gate 				}
10257c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
10267c478bd9Sstevel@tonic-gate 				if (reqp) {
10277c478bd9Sstevel@tonic-gate 					reqp->aio_req_next = found;
10287c478bd9Sstevel@tonic-gate 					found = reqp;
10297c478bd9Sstevel@tonic-gate 				}
10307c478bd9Sstevel@tonic-gate 				if (aiop->aio_doneq == NULL)
10317c478bd9Sstevel@tonic-gate 					break;
10327c478bd9Sstevel@tonic-gate 			}
10337c478bd9Sstevel@tonic-gate 			if (found)
10347c478bd9Sstevel@tonic-gate 				break;
10357c478bd9Sstevel@tonic-gate 		}
10367c478bd9Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
10377c478bd9Sstevel@tonic-gate 			/*
10387c478bd9Sstevel@tonic-gate 			 * nothing on the kernel's queue. the user
10397c478bd9Sstevel@tonic-gate 			 * has notified the kernel that it has items
10407c478bd9Sstevel@tonic-gate 			 * on a user-level queue.
10417c478bd9Sstevel@tonic-gate 			 */
10427c478bd9Sstevel@tonic-gate 			aiop->aio_notifycnt--;
10437c478bd9Sstevel@tonic-gate 			*rval = 1;
10447c478bd9Sstevel@tonic-gate 			error = 0;
10457c478bd9Sstevel@tonic-gate 			break;
10467c478bd9Sstevel@tonic-gate 		}
10477c478bd9Sstevel@tonic-gate 		/* don't block if nothing is outstanding */
10487c478bd9Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0) {
10497c478bd9Sstevel@tonic-gate 			error = EAGAIN;
10507c478bd9Sstevel@tonic-gate 			break;
10517c478bd9Sstevel@tonic-gate 		}
10527c478bd9Sstevel@tonic-gate 		if (blocking) {
10537c478bd9Sstevel@tonic-gate 			/*
10547c478bd9Sstevel@tonic-gate 			 * drop the aio_cleanupq_mutex as we are
10557c478bd9Sstevel@tonic-gate 			 * going to block.
10567c478bd9Sstevel@tonic-gate 			 */
10577c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
10587c478bd9Sstevel@tonic-gate 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
10597c478bd9Sstevel@tonic-gate 				&aiop->aio_mutex, rqtp, timecheck);
10607c478bd9Sstevel@tonic-gate 			/*
10617c478bd9Sstevel@tonic-gate 			 * we have to drop aio_mutex and
10627c478bd9Sstevel@tonic-gate 			 * grab it in the right order.
10637c478bd9Sstevel@tonic-gate 			 */
10647c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
10657c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
10667c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
10677c478bd9Sstevel@tonic-gate 			if (rv > 0)	/* check done queue again */
10687c478bd9Sstevel@tonic-gate 				continue;
10697c478bd9Sstevel@tonic-gate 			if (rv == 0)	/* interrupted by a signal */
10707c478bd9Sstevel@tonic-gate 				error = EINTR;
10717c478bd9Sstevel@tonic-gate 			else		/* timer expired */
10727c478bd9Sstevel@tonic-gate 				error = ETIME;
10737c478bd9Sstevel@tonic-gate 		} else {
10747c478bd9Sstevel@tonic-gate 			error = EAGAIN;
10757c478bd9Sstevel@tonic-gate 		}
10767c478bd9Sstevel@tonic-gate 		break;
10777c478bd9Sstevel@tonic-gate 	}
10787c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
10797c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
10807c478bd9Sstevel@tonic-gate 	for (reqp = found; reqp != NULL; reqp = next) {
10817c478bd9Sstevel@tonic-gate 		next = reqp->aio_req_next;
10827c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
10837c478bd9Sstevel@tonic-gate 		aio_copyout_result(reqp);
10847c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
10857c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
10867c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
10877c478bd9Sstevel@tonic-gate 	}
10887c478bd9Sstevel@tonic-gate done:
10897c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
10907c478bd9Sstevel@tonic-gate 	return (error);
10917c478bd9Sstevel@tonic-gate }
10927c478bd9Sstevel@tonic-gate 
10937c478bd9Sstevel@tonic-gate /*
10947c478bd9Sstevel@tonic-gate  * initialize aio by allocating an aio_t struct for this
10957c478bd9Sstevel@tonic-gate  * process.
10967c478bd9Sstevel@tonic-gate  */
10977c478bd9Sstevel@tonic-gate static int
10987c478bd9Sstevel@tonic-gate aioinit(void)
10997c478bd9Sstevel@tonic-gate {
11007c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
11017c478bd9Sstevel@tonic-gate 	aio_t *aiop;
11027c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11037c478bd9Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL) {
11047c478bd9Sstevel@tonic-gate 		aiop = aio_aiop_alloc();
11057c478bd9Sstevel@tonic-gate 		p->p_aio = aiop;
11067c478bd9Sstevel@tonic-gate 	}
11077c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11087c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
11097c478bd9Sstevel@tonic-gate 		return (ENOMEM);
11107c478bd9Sstevel@tonic-gate 	return (0);
11117c478bd9Sstevel@tonic-gate }
11127c478bd9Sstevel@tonic-gate 
11137c478bd9Sstevel@tonic-gate /*
11147c478bd9Sstevel@tonic-gate  * start a special thread that will cleanup after aio requests
11157c478bd9Sstevel@tonic-gate  * that are preventing a segment from being unmapped. as_unmap()
11167c478bd9Sstevel@tonic-gate  * blocks until all phsyio to this segment is completed. this
11177c478bd9Sstevel@tonic-gate  * doesn't happen until all the pages in this segment are not
11187c478bd9Sstevel@tonic-gate  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
11197c478bd9Sstevel@tonic-gate  * requests still outstanding. this special thread will make sure
11207c478bd9Sstevel@tonic-gate  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
11217c478bd9Sstevel@tonic-gate  *
11227c478bd9Sstevel@tonic-gate  * this function will return an error if the process has only
11237c478bd9Sstevel@tonic-gate  * one LWP. the assumption is that the caller is a separate LWP
11247c478bd9Sstevel@tonic-gate  * that remains blocked in the kernel for the life of this process.
11257c478bd9Sstevel@tonic-gate  */
11267c478bd9Sstevel@tonic-gate static int
11277c478bd9Sstevel@tonic-gate aiostart(void)
11287c478bd9Sstevel@tonic-gate {
11297c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
11307c478bd9Sstevel@tonic-gate 	aio_t *aiop;
11317c478bd9Sstevel@tonic-gate 	int first, error = 0;
11327c478bd9Sstevel@tonic-gate 
11337c478bd9Sstevel@tonic-gate 	if (p->p_lwpcnt == 1)
11347c478bd9Sstevel@tonic-gate 		return (EDEADLK);
11357c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11367c478bd9Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL)
11377c478bd9Sstevel@tonic-gate 		error = EINVAL;
11387c478bd9Sstevel@tonic-gate 	else {
11397c478bd9Sstevel@tonic-gate 		first = aiop->aio_ok;
11407c478bd9Sstevel@tonic-gate 		if (aiop->aio_ok == 0)
11417c478bd9Sstevel@tonic-gate 			aiop->aio_ok = 1;
11427c478bd9Sstevel@tonic-gate 	}
11437c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11447c478bd9Sstevel@tonic-gate 	if (error == 0 && first == 0) {
11457c478bd9Sstevel@tonic-gate 		return (aio_cleanup_thread(aiop));
11467c478bd9Sstevel@tonic-gate 		/* should return only to exit */
11477c478bd9Sstevel@tonic-gate 	}
11487c478bd9Sstevel@tonic-gate 	return (error);
11497c478bd9Sstevel@tonic-gate }
11507c478bd9Sstevel@tonic-gate 
11517c478bd9Sstevel@tonic-gate /*
11527c478bd9Sstevel@tonic-gate  * Associate an aiocb with a port.
11537c478bd9Sstevel@tonic-gate  * This function is used by aiorw() to associate a transaction with a port.
11547c478bd9Sstevel@tonic-gate  * Allocate an event port structure (port_alloc_event()) and store the
11557c478bd9Sstevel@tonic-gate  * delivered user pointer (portnfy_user) in the portkev_user field of the
11567c478bd9Sstevel@tonic-gate  * port_kevent_t structure..
11577c478bd9Sstevel@tonic-gate  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
11587c478bd9Sstevel@tonic-gate  * the port association.
11597c478bd9Sstevel@tonic-gate  */
11607c478bd9Sstevel@tonic-gate 
11617c478bd9Sstevel@tonic-gate static int
11627c478bd9Sstevel@tonic-gate aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, aio_req_t *reqp)
11637c478bd9Sstevel@tonic-gate {
11647c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevp = NULL;
11657c478bd9Sstevel@tonic-gate 	int		error;
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
11687c478bd9Sstevel@tonic-gate 	    PORT_SOURCE_AIO, &pkevp);
11697c478bd9Sstevel@tonic-gate 	if (error) {
11707c478bd9Sstevel@tonic-gate 		if ((error == ENOMEM) || (error == EAGAIN))
11717c478bd9Sstevel@tonic-gate 			error = EAGAIN;
11727c478bd9Sstevel@tonic-gate 		else
11737c478bd9Sstevel@tonic-gate 			error = EINVAL;
11747c478bd9Sstevel@tonic-gate 	} else {
11757c478bd9Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
11767c478bd9Sstevel@tonic-gate 		    aio_port_callback, reqp);
11777c478bd9Sstevel@tonic-gate 		reqp->aio_req_portkev = pkevp;
11787c478bd9Sstevel@tonic-gate 		reqp->aio_req_port = pntfy->portnfy_port;
11797c478bd9Sstevel@tonic-gate 	}
11807c478bd9Sstevel@tonic-gate 	return (error);
11817c478bd9Sstevel@tonic-gate }
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate /*
11847c478bd9Sstevel@tonic-gate  * Associate an aiocb with a port.
11857c478bd9Sstevel@tonic-gate  * This function is used by lio_listio() to associate a transaction with a port.
11867c478bd9Sstevel@tonic-gate  * Allocate an event port structure (port_alloc_event()) and store the
11877c478bd9Sstevel@tonic-gate  * delivered user pointer (portnfy_user) in the portkev_user field of the
11887c478bd9Sstevel@tonic-gate  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
11897c478bd9Sstevel@tonic-gate  * the port association.
11907c478bd9Sstevel@tonic-gate  * The event port notification can be requested attaching the port_notify_t
11917c478bd9Sstevel@tonic-gate  * structure to the sigevent argument of lio_listio() or attaching the
11927c478bd9Sstevel@tonic-gate  * port_notify_t structure to the sigevent structure which is embedded in the
11937c478bd9Sstevel@tonic-gate  * aiocb.
11947c478bd9Sstevel@tonic-gate  * The attachement to the global sigevent structure is valid for all aiocbs
11957c478bd9Sstevel@tonic-gate  * in the list.
11967c478bd9Sstevel@tonic-gate  */
11977c478bd9Sstevel@tonic-gate 
11987c478bd9Sstevel@tonic-gate static int
11997c478bd9Sstevel@tonic-gate aio_req_assoc_port(struct sigevent *sigev, void	*user, aiocb_t *cbp,
12007c478bd9Sstevel@tonic-gate     aio_req_t *reqp, port_kevent_t *pkevtp)
12017c478bd9Sstevel@tonic-gate {
12027c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevp = NULL;
12037c478bd9Sstevel@tonic-gate 	port_notify_t	pntfy;
12047c478bd9Sstevel@tonic-gate 	int		error;
12057c478bd9Sstevel@tonic-gate 
12067c478bd9Sstevel@tonic-gate 	if (sigev->sigev_notify == SIGEV_PORT) {
12077c478bd9Sstevel@tonic-gate 		/* aiocb has an own port notification embedded */
12087c478bd9Sstevel@tonic-gate 		if (copyin((void *)sigev->sigev_value.sival_ptr, &pntfy,
12097c478bd9Sstevel@tonic-gate 		    sizeof (port_notify_t)))
12107c478bd9Sstevel@tonic-gate 			return (EFAULT);
12117c478bd9Sstevel@tonic-gate 
12127c478bd9Sstevel@tonic-gate 		error = port_alloc_event(pntfy.portnfy_port, PORT_ALLOC_DEFAULT,
12137c478bd9Sstevel@tonic-gate 		    PORT_SOURCE_AIO, &pkevp);
12147c478bd9Sstevel@tonic-gate 		if (error) {
12157c478bd9Sstevel@tonic-gate 			if ((error == ENOMEM) || (error == EAGAIN))
12167c478bd9Sstevel@tonic-gate 				return (EAGAIN);
12177c478bd9Sstevel@tonic-gate 			else
12187c478bd9Sstevel@tonic-gate 				return (EINVAL);
12197c478bd9Sstevel@tonic-gate 		}
12207c478bd9Sstevel@tonic-gate 		/* use this values instead of the global values in port */
12217c478bd9Sstevel@tonic-gate 
12227c478bd9Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, pntfy.portnfy_user,
12237c478bd9Sstevel@tonic-gate 		    aio_port_callback, reqp);
12247c478bd9Sstevel@tonic-gate 		reqp->aio_req_port = pntfy.portnfy_port;
12257c478bd9Sstevel@tonic-gate 	} else {
12267c478bd9Sstevel@tonic-gate 		/* use global port notification */
12277c478bd9Sstevel@tonic-gate 		error = port_dup_event(pkevtp, &pkevp, PORT_ALLOC_DEFAULT);
12287c478bd9Sstevel@tonic-gate 		if (error)
12297c478bd9Sstevel@tonic-gate 			return (EAGAIN);
12307c478bd9Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, user, aio_port_callback,
12317c478bd9Sstevel@tonic-gate 		    reqp);
12327c478bd9Sstevel@tonic-gate 	}
12337c478bd9Sstevel@tonic-gate 	reqp->aio_req_portkev = pkevp;
12347c478bd9Sstevel@tonic-gate 	return (0);
12357c478bd9Sstevel@tonic-gate }
12367c478bd9Sstevel@tonic-gate 
12377c478bd9Sstevel@tonic-gate /*
12387c478bd9Sstevel@tonic-gate  * Same comments as in aio_req_assoc_port(), see above.
12397c478bd9Sstevel@tonic-gate  */
12407c478bd9Sstevel@tonic-gate 
12417c478bd9Sstevel@tonic-gate static int
12427c478bd9Sstevel@tonic-gate aio_req_assoc_port32(struct sigevent32 *sigev, void *user, aiocb_t *cbp,
12437c478bd9Sstevel@tonic-gate     aio_req_t *reqp, port_kevent_t *pkevtp)
12447c478bd9Sstevel@tonic-gate {
12457c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevp = NULL;
12467c478bd9Sstevel@tonic-gate 	port_notify32_t	pntfy;
12477c478bd9Sstevel@tonic-gate 	int		error;
12487c478bd9Sstevel@tonic-gate 
12497c478bd9Sstevel@tonic-gate 	if (sigev->sigev_notify == SIGEV_PORT) {
12507c478bd9Sstevel@tonic-gate 		if (copyin((void *)(uintptr_t)sigev->sigev_value.sival_int,
12517c478bd9Sstevel@tonic-gate 		    &pntfy, sizeof (port_notify32_t)))
12527c478bd9Sstevel@tonic-gate 			return (EFAULT);
12537c478bd9Sstevel@tonic-gate 
12547c478bd9Sstevel@tonic-gate 		error = port_alloc_event(pntfy.portnfy_port,
12557c478bd9Sstevel@tonic-gate 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevp);
12567c478bd9Sstevel@tonic-gate 		if (error) {
12577c478bd9Sstevel@tonic-gate 			if ((error == ENOMEM) || (error == EAGAIN))
12587c478bd9Sstevel@tonic-gate 				return (EAGAIN);
12597c478bd9Sstevel@tonic-gate 			else
12607c478bd9Sstevel@tonic-gate 				return (EINVAL);
12617c478bd9Sstevel@tonic-gate 		}
12627c478bd9Sstevel@tonic-gate 		/* use this values instead of the global values in port */
12637c478bd9Sstevel@tonic-gate 
12647c478bd9Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp,
12657c478bd9Sstevel@tonic-gate 		    (void *)(uintptr_t)pntfy.portnfy_user,
12667c478bd9Sstevel@tonic-gate 		    aio_port_callback, reqp);
12677c478bd9Sstevel@tonic-gate 		reqp->aio_req_port = pntfy.portnfy_port;
12687c478bd9Sstevel@tonic-gate 	} else {
12697c478bd9Sstevel@tonic-gate 		error = port_dup_event(pkevtp, &pkevp, PORT_ALLOC_DEFAULT);
12707c478bd9Sstevel@tonic-gate 		if (error)
12717c478bd9Sstevel@tonic-gate 			return (EAGAIN);
12727c478bd9Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, user, aio_port_callback,
12737c478bd9Sstevel@tonic-gate 		    reqp);
12747c478bd9Sstevel@tonic-gate 	}
12757c478bd9Sstevel@tonic-gate 	reqp->aio_req_portkev = pkevp;
12767c478bd9Sstevel@tonic-gate 	return (0);
12777c478bd9Sstevel@tonic-gate }
12787c478bd9Sstevel@tonic-gate 
12797c478bd9Sstevel@tonic-gate 
12807c478bd9Sstevel@tonic-gate #ifdef _LP64
12817c478bd9Sstevel@tonic-gate 
12827c478bd9Sstevel@tonic-gate /*
12837c478bd9Sstevel@tonic-gate  * Asynchronous list IO. A chain of aiocb's are copied in
12847c478bd9Sstevel@tonic-gate  * one at a time. If the aiocb is invalid, it is skipped.
12857c478bd9Sstevel@tonic-gate  * For each aiocb, the appropriate driver entry point is
12867c478bd9Sstevel@tonic-gate  * called. Optimize for the common case where the list
12877c478bd9Sstevel@tonic-gate  * of requests is to the same file descriptor.
12887c478bd9Sstevel@tonic-gate  *
12897c478bd9Sstevel@tonic-gate  * One possible optimization is to define a new driver entry
12907c478bd9Sstevel@tonic-gate  * point that supports a list of IO requests. Whether this
12917c478bd9Sstevel@tonic-gate  * improves performance depends somewhat on the driver's
12927c478bd9Sstevel@tonic-gate  * locking strategy. Processing a list could adversely impact
12937c478bd9Sstevel@tonic-gate  * the driver's interrupt latency.
12947c478bd9Sstevel@tonic-gate  */
12957c478bd9Sstevel@tonic-gate /*ARGSUSED*/
12967c478bd9Sstevel@tonic-gate static int
12977c478bd9Sstevel@tonic-gate alio(
12987c478bd9Sstevel@tonic-gate 	int	opcode,
12997c478bd9Sstevel@tonic-gate 	int	mode_arg,
13007c478bd9Sstevel@tonic-gate 	aiocb_t	**aiocb_arg,
13017c478bd9Sstevel@tonic-gate 	int	nent,
13027c478bd9Sstevel@tonic-gate 	struct	sigevent *sigev)
13037c478bd9Sstevel@tonic-gate 
13047c478bd9Sstevel@tonic-gate {
13057c478bd9Sstevel@tonic-gate 	file_t		*fp;
13067c478bd9Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
13077c478bd9Sstevel@tonic-gate 	int		prev_mode = -1;
13087c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
13097c478bd9Sstevel@tonic-gate 	aio_lio_t	*head;
13107c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
13117c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
13127c478bd9Sstevel@tonic-gate 	caddr_t		cbplist;
13137c478bd9Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
13147c478bd9Sstevel@tonic-gate 	aiocb_t		cb;
13157c478bd9Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
13167c478bd9Sstevel@tonic-gate 	struct sigevent sigevk;
13177c478bd9Sstevel@tonic-gate 	sigqueue_t	*sqp;
13187c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
13197c478bd9Sstevel@tonic-gate 	int		mode;
13207c478bd9Sstevel@tonic-gate 	int		error = 0;
13217c478bd9Sstevel@tonic-gate 	int		aio_errors = 0;
13227c478bd9Sstevel@tonic-gate 	int		i;
13237c478bd9Sstevel@tonic-gate 	size_t		ssize;
13247c478bd9Sstevel@tonic-gate 	int		deadhead = 0;
13257c478bd9Sstevel@tonic-gate 	int		aio_notsupported = 0;
13267c478bd9Sstevel@tonic-gate 	int		aio_use_port = 0;
13277c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
13287c478bd9Sstevel@tonic-gate 	port_notify_t	pnotify;
13297c478bd9Sstevel@tonic-gate 
13307c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
13317c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
13327c478bd9Sstevel@tonic-gate 		return (EINVAL);
13337c478bd9Sstevel@tonic-gate 
13347c478bd9Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
13357c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
13367c478bd9Sstevel@tonic-gate 	ucbp = (aiocb_t **)cbplist;
13377c478bd9Sstevel@tonic-gate 
13387c478bd9Sstevel@tonic-gate 	if (copyin(aiocb_arg, cbplist, sizeof (aiocb_t *) * nent)) {
13397c478bd9Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
13407c478bd9Sstevel@tonic-gate 		return (EFAULT);
13417c478bd9Sstevel@tonic-gate 	}
13427c478bd9Sstevel@tonic-gate 
13437c478bd9Sstevel@tonic-gate 	if (sigev) {
13447c478bd9Sstevel@tonic-gate 		if (copyin(sigev, &sigevk, sizeof (struct sigevent))) {
13457c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
13467c478bd9Sstevel@tonic-gate 			return (EFAULT);
13477c478bd9Sstevel@tonic-gate 		}
13487c478bd9Sstevel@tonic-gate 	}
13497c478bd9Sstevel@tonic-gate 
13507c478bd9Sstevel@tonic-gate 	/*
13517c478bd9Sstevel@tonic-gate 	 * a list head should be allocated if notification is
13527c478bd9Sstevel@tonic-gate 	 * enabled for this list.
13537c478bd9Sstevel@tonic-gate 	 */
13547c478bd9Sstevel@tonic-gate 	head = NULL;
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate 	/* Event Ports  */
13577c478bd9Sstevel@tonic-gate 
13587c478bd9Sstevel@tonic-gate 	if (sigev && sigevk.sigev_notify == SIGEV_PORT) {
13597c478bd9Sstevel@tonic-gate 		/* Use port for completion notification */
13607c478bd9Sstevel@tonic-gate 		if (copyin(sigevk.sigev_value.sival_ptr, &pnotify,
13617c478bd9Sstevel@tonic-gate 		    sizeof (port_notify_t))) {
13627c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
13637c478bd9Sstevel@tonic-gate 			return (EFAULT);
13647c478bd9Sstevel@tonic-gate 		}
13657c478bd9Sstevel@tonic-gate 		/* use event ports for the list of aiocbs */
13667c478bd9Sstevel@tonic-gate 		aio_use_port = 1;
13677c478bd9Sstevel@tonic-gate 		error = port_alloc_event(pnotify.portnfy_port,
13687c478bd9Sstevel@tonic-gate 		    PORT_ALLOC_PRIVATE, PORT_SOURCE_AIO, &pkevtp);
13697c478bd9Sstevel@tonic-gate 		if (error) {
13707c478bd9Sstevel@tonic-gate 			if ((error == ENOMEM) || (error == EAGAIN))
13717c478bd9Sstevel@tonic-gate 				error = EAGAIN;
13727c478bd9Sstevel@tonic-gate 			else
13737c478bd9Sstevel@tonic-gate 				error = EINVAL;
13747c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
13757c478bd9Sstevel@tonic-gate 			return (error);
13767c478bd9Sstevel@tonic-gate 		}
13777c478bd9Sstevel@tonic-gate 	} else if ((mode_arg == LIO_WAIT) || sigev) {
13787c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
13797c478bd9Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
13807c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
13817c478bd9Sstevel@tonic-gate 		if (error)
13827c478bd9Sstevel@tonic-gate 			goto done;
13837c478bd9Sstevel@tonic-gate 		deadhead = 1;
13847c478bd9Sstevel@tonic-gate 		head->lio_nent = nent;
13857c478bd9Sstevel@tonic-gate 		head->lio_refcnt = nent;
13867c478bd9Sstevel@tonic-gate 		if (sigev && (sigevk.sigev_notify == SIGEV_SIGNAL) &&
13877c478bd9Sstevel@tonic-gate 		    (sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG)) {
13887c478bd9Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
13897c478bd9Sstevel@tonic-gate 			if (sqp == NULL) {
13907c478bd9Sstevel@tonic-gate 				error = EAGAIN;
13917c478bd9Sstevel@tonic-gate 				goto done;
13927c478bd9Sstevel@tonic-gate 			}
13937c478bd9Sstevel@tonic-gate 			sqp->sq_func = NULL;
13947c478bd9Sstevel@tonic-gate 			sqp->sq_next = NULL;
13957c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
13967c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
13977c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
13987c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
13997c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
14007c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
14017c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_value = sigevk.sigev_value;
14027c478bd9Sstevel@tonic-gate 			head->lio_sigqp = sqp;
14037c478bd9Sstevel@tonic-gate 		} else {
14047c478bd9Sstevel@tonic-gate 			head->lio_sigqp = NULL;
14057c478bd9Sstevel@tonic-gate 		}
14067c478bd9Sstevel@tonic-gate 	}
14077c478bd9Sstevel@tonic-gate 
14087c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
14097c478bd9Sstevel@tonic-gate 
14107c478bd9Sstevel@tonic-gate 		cbp = *ucbp;
14117c478bd9Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
14127c478bd9Sstevel@tonic-gate 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (aiocb_t))) {
14137c478bd9Sstevel@tonic-gate 			if (head) {
14147c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14157c478bd9Sstevel@tonic-gate 				head->lio_nent--;
14167c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
14177c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14187c478bd9Sstevel@tonic-gate 			}
14197c478bd9Sstevel@tonic-gate 			continue;
14207c478bd9Sstevel@tonic-gate 		}
14217c478bd9Sstevel@tonic-gate 
14227c478bd9Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
14237c478bd9Sstevel@tonic-gate 
14247c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
14257c478bd9Sstevel@tonic-gate 		if (mode == LIO_NOP) {
14267c478bd9Sstevel@tonic-gate 			cbp = NULL;
14277c478bd9Sstevel@tonic-gate 			if (head) {
14287c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14297c478bd9Sstevel@tonic-gate 				head->lio_nent--;
14307c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
14317c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14327c478bd9Sstevel@tonic-gate 			}
14337c478bd9Sstevel@tonic-gate 			continue;
14347c478bd9Sstevel@tonic-gate 		}
14357c478bd9Sstevel@tonic-gate 
14367c478bd9Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
14377c478bd9Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
14387c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
14397c478bd9Sstevel@tonic-gate 			if (head) {
14407c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14417c478bd9Sstevel@tonic-gate 				head->lio_nent--;
14427c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
14437c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14447c478bd9Sstevel@tonic-gate 			}
14457c478bd9Sstevel@tonic-gate 			aio_errors++;
14467c478bd9Sstevel@tonic-gate 			continue;
14477c478bd9Sstevel@tonic-gate 		}
14487c478bd9Sstevel@tonic-gate 
14497c478bd9Sstevel@tonic-gate 		vp = fp->f_vnode;
14507c478bd9Sstevel@tonic-gate 
14517c478bd9Sstevel@tonic-gate 		/*
14527c478bd9Sstevel@tonic-gate 		 * check the permission of the partition
14537c478bd9Sstevel@tonic-gate 		 */
14547c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
14557c478bd9Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
14567c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14577c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
14587c478bd9Sstevel@tonic-gate 			if (head) {
14597c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14607c478bd9Sstevel@tonic-gate 				head->lio_nent--;
14617c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
14627c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14637c478bd9Sstevel@tonic-gate 			}
14647c478bd9Sstevel@tonic-gate 			aio_errors++;
14657c478bd9Sstevel@tonic-gate 			continue;
14667c478bd9Sstevel@tonic-gate 		}
14677c478bd9Sstevel@tonic-gate 
14687c478bd9Sstevel@tonic-gate 		/*
14697c478bd9Sstevel@tonic-gate 		 * common case where requests are to the same fd for the
14707c478bd9Sstevel@tonic-gate 		 * same r/w operation.
14717c478bd9Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
14727c478bd9Sstevel@tonic-gate 		 */
14737c478bd9Sstevel@tonic-gate 		if ((fp != prev_fp) || (mode != prev_mode)) {
14747c478bd9Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
14757c478bd9Sstevel@tonic-gate 			if (aio_func == NULL) {
14767c478bd9Sstevel@tonic-gate 				prev_fp = NULL;
14777c478bd9Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
14787c478bd9Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
14797c478bd9Sstevel@tonic-gate 				aio_notsupported++;
14807c478bd9Sstevel@tonic-gate 				if (head) {
14817c478bd9Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
14827c478bd9Sstevel@tonic-gate 					head->lio_nent--;
14837c478bd9Sstevel@tonic-gate 					head->lio_refcnt--;
14847c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
14857c478bd9Sstevel@tonic-gate 				}
14867c478bd9Sstevel@tonic-gate 				continue;
14877c478bd9Sstevel@tonic-gate 			} else {
14887c478bd9Sstevel@tonic-gate 				prev_fp = fp;
14897c478bd9Sstevel@tonic-gate 				prev_mode = mode;
14907c478bd9Sstevel@tonic-gate 			}
14917c478bd9Sstevel@tonic-gate 		}
14927c478bd9Sstevel@tonic-gate 
14937c478bd9Sstevel@tonic-gate 		if (error = aio_req_setup(&reqp, aiop, aiocb,
14947c478bd9Sstevel@tonic-gate 		    &cbp->aio_resultp, aio_use_port, vp)) {
14957c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14967c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14977c478bd9Sstevel@tonic-gate 			if (head) {
14987c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14997c478bd9Sstevel@tonic-gate 				head->lio_nent--;
15007c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
15017c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
15027c478bd9Sstevel@tonic-gate 			}
15037c478bd9Sstevel@tonic-gate 			aio_errors++;
15047c478bd9Sstevel@tonic-gate 			continue;
15057c478bd9Sstevel@tonic-gate 		}
15067c478bd9Sstevel@tonic-gate 
15077c478bd9Sstevel@tonic-gate 		reqp->aio_req_lio = head;
15087c478bd9Sstevel@tonic-gate 		deadhead = 0;
15097c478bd9Sstevel@tonic-gate 
15107c478bd9Sstevel@tonic-gate 		/*
15117c478bd9Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
15127c478bd9Sstevel@tonic-gate 		 * the driver to avoid a race condition
15137c478bd9Sstevel@tonic-gate 		 */
15147c478bd9Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
15157c478bd9Sstevel@tonic-gate 		    EINPROGRESS);
15167c478bd9Sstevel@tonic-gate 
15177c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
15187c478bd9Sstevel@tonic-gate 
15197c478bd9Sstevel@tonic-gate 		if (aio_use_port) {
15207c478bd9Sstevel@tonic-gate 			reqp->aio_req_port = pnotify.portnfy_port;
15217c478bd9Sstevel@tonic-gate 			error = aio_req_assoc_port(&aiocb->aio_sigevent,
15227c478bd9Sstevel@tonic-gate 			    pnotify.portnfy_user, cbp, reqp, pkevtp);
15237c478bd9Sstevel@tonic-gate 		}
15247c478bd9Sstevel@tonic-gate 
15257c478bd9Sstevel@tonic-gate 		/*
15267c478bd9Sstevel@tonic-gate 		 * send the request to driver.
15277c478bd9Sstevel@tonic-gate 		 * Clustering: If PXFS vnode, call PXFS function.
15287c478bd9Sstevel@tonic-gate 		 */
15297c478bd9Sstevel@tonic-gate 		if (error == 0) {
15307c478bd9Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
15317c478bd9Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
15327c478bd9Sstevel@tonic-gate 				aio_zerolen(reqp);
15337c478bd9Sstevel@tonic-gate 				continue;
15347c478bd9Sstevel@tonic-gate 			}
15357c478bd9Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
15367c478bd9Sstevel@tonic-gate 			    CRED());
15377c478bd9Sstevel@tonic-gate 		}
15387c478bd9Sstevel@tonic-gate 		/*
15397c478bd9Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
15407c478bd9Sstevel@tonic-gate 		 * completed unless there was an error.
15417c478bd9Sstevel@tonic-gate 		 */
15427c478bd9Sstevel@tonic-gate 		if (error) {
15437c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
15447c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
15457c478bd9Sstevel@tonic-gate 			if (head) {
15467c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
15477c478bd9Sstevel@tonic-gate 				head->lio_nent--;
15487c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
15497c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
15507c478bd9Sstevel@tonic-gate 			}
15517c478bd9Sstevel@tonic-gate 			if (error == ENOTSUP)
15527c478bd9Sstevel@tonic-gate 				aio_notsupported++;
15537c478bd9Sstevel@tonic-gate 			else
15547c478bd9Sstevel@tonic-gate 				aio_errors++;
15557c478bd9Sstevel@tonic-gate 			lio_set_error(reqp);
15567c478bd9Sstevel@tonic-gate 		} else {
15577c478bd9Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
15587c478bd9Sstevel@tonic-gate 		}
15597c478bd9Sstevel@tonic-gate 	}
15607c478bd9Sstevel@tonic-gate 
15617c478bd9Sstevel@tonic-gate 	if (pkevtp)
15627c478bd9Sstevel@tonic-gate 		port_free_event(pkevtp);
15637c478bd9Sstevel@tonic-gate 
15647c478bd9Sstevel@tonic-gate 	if (aio_notsupported) {
15657c478bd9Sstevel@tonic-gate 		error = ENOTSUP;
15667c478bd9Sstevel@tonic-gate 	} else if (aio_errors) {
15677c478bd9Sstevel@tonic-gate 		/*
15687c478bd9Sstevel@tonic-gate 		 * return EIO if any request failed
15697c478bd9Sstevel@tonic-gate 		 */
15707c478bd9Sstevel@tonic-gate 		error = EIO;
15717c478bd9Sstevel@tonic-gate 	}
15727c478bd9Sstevel@tonic-gate 
15737c478bd9Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
15747c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
15757c478bd9Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
15767c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
15777c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
15787c478bd9Sstevel@tonic-gate 				error = EINTR;
15797c478bd9Sstevel@tonic-gate 				goto done;
15807c478bd9Sstevel@tonic-gate 			}
15817c478bd9Sstevel@tonic-gate 		}
15827c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
15837c478bd9Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
15847c478bd9Sstevel@tonic-gate 	}
15857c478bd9Sstevel@tonic-gate 
15867c478bd9Sstevel@tonic-gate done:
15877c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
15887c478bd9Sstevel@tonic-gate 	if (deadhead) {
15897c478bd9Sstevel@tonic-gate 		if (head->lio_sigqp)
15907c478bd9Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
15917c478bd9Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
15927c478bd9Sstevel@tonic-gate 	}
15937c478bd9Sstevel@tonic-gate 	return (error);
15947c478bd9Sstevel@tonic-gate }
15957c478bd9Sstevel@tonic-gate 
15967c478bd9Sstevel@tonic-gate #endif /* _LP64 */
15977c478bd9Sstevel@tonic-gate 
15987c478bd9Sstevel@tonic-gate /*
15997c478bd9Sstevel@tonic-gate  * Asynchronous list IO.
16007c478bd9Sstevel@tonic-gate  * If list I/O is called with LIO_WAIT it can still return
16017c478bd9Sstevel@tonic-gate  * before all the I/O's are completed if a signal is caught
16027c478bd9Sstevel@tonic-gate  * or if the list include UFS I/O requests. If this happens,
16037c478bd9Sstevel@tonic-gate  * libaio will call aliowait() to wait for the I/O's to
16047c478bd9Sstevel@tonic-gate  * complete
16057c478bd9Sstevel@tonic-gate  */
16067c478bd9Sstevel@tonic-gate /*ARGSUSED*/
16077c478bd9Sstevel@tonic-gate static int
16087c478bd9Sstevel@tonic-gate aliowait(
16097c478bd9Sstevel@tonic-gate 	int	mode,
16107c478bd9Sstevel@tonic-gate 	void	*aiocb,
16117c478bd9Sstevel@tonic-gate 	int	nent,
16127c478bd9Sstevel@tonic-gate 	void	*sigev,
16137c478bd9Sstevel@tonic-gate 	int	run_mode)
16147c478bd9Sstevel@tonic-gate {
16157c478bd9Sstevel@tonic-gate 	aio_lio_t	*head;
16167c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
16177c478bd9Sstevel@tonic-gate 	caddr_t		cbplist;
16187c478bd9Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
16197c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16207c478bd9Sstevel@tonic-gate 	aiocb32_t	*cbp32;
16217c478bd9Sstevel@tonic-gate 	caddr32_t	*ucbp32;
16227c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
16237c478bd9Sstevel@tonic-gate #endif
16247c478bd9Sstevel@tonic-gate 	int		error = 0;
16257c478bd9Sstevel@tonic-gate 	int		i;
16267c478bd9Sstevel@tonic-gate 	size_t		ssize = 0;
16277c478bd9Sstevel@tonic-gate 	model_t		model = get_udatamodel();
16287c478bd9Sstevel@tonic-gate 
16297c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
16307c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
16317c478bd9Sstevel@tonic-gate 		return (EINVAL);
16327c478bd9Sstevel@tonic-gate 
16337c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
16347c478bd9Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
16357c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16367c478bd9Sstevel@tonic-gate 	else
16377c478bd9Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
16387c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
16397c478bd9Sstevel@tonic-gate 
16407c478bd9Sstevel@tonic-gate 	if (ssize == 0)
16417c478bd9Sstevel@tonic-gate 		return (EINVAL);
16427c478bd9Sstevel@tonic-gate 
16437c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
16447c478bd9Sstevel@tonic-gate 
16457c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
16467c478bd9Sstevel@tonic-gate 		ucbp = (aiocb_t **)cbplist;
16477c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16487c478bd9Sstevel@tonic-gate 	else
16497c478bd9Sstevel@tonic-gate 		ucbp32 = (caddr32_t *)cbplist;
16507c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
16517c478bd9Sstevel@tonic-gate 
16527c478bd9Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
16537c478bd9Sstevel@tonic-gate 		error = EFAULT;
16547c478bd9Sstevel@tonic-gate 		goto done;
16557c478bd9Sstevel@tonic-gate 	}
16567c478bd9Sstevel@tonic-gate 
16577c478bd9Sstevel@tonic-gate 	/*
16587c478bd9Sstevel@tonic-gate 	 * To find the list head, we go through the
16597c478bd9Sstevel@tonic-gate 	 * list of aiocb structs, find the request
16607c478bd9Sstevel@tonic-gate 	 * its for, then get the list head that reqp
16617c478bd9Sstevel@tonic-gate 	 * points to
16627c478bd9Sstevel@tonic-gate 	 */
16637c478bd9Sstevel@tonic-gate 	head = NULL;
16647c478bd9Sstevel@tonic-gate 
16657c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
16667c478bd9Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE) {
16677c478bd9Sstevel@tonic-gate 			/*
16687c478bd9Sstevel@tonic-gate 			 * Since we are only checking for a NULL pointer
16697c478bd9Sstevel@tonic-gate 			 * Following should work on both native data sizes
16707c478bd9Sstevel@tonic-gate 			 * as well as for largefile aiocb.
16717c478bd9Sstevel@tonic-gate 			 */
16727c478bd9Sstevel@tonic-gate 			if ((cbp = *ucbp++) == NULL)
16737c478bd9Sstevel@tonic-gate 				continue;
16747c478bd9Sstevel@tonic-gate 			if (run_mode != AIO_LARGEFILE)
16757c478bd9Sstevel@tonic-gate 				if (head = aio_list_get(&cbp->aio_resultp))
16767c478bd9Sstevel@tonic-gate 					break;
16777c478bd9Sstevel@tonic-gate 			else {
16787c478bd9Sstevel@tonic-gate 				/*
16797c478bd9Sstevel@tonic-gate 				 * This is a case when largefile call is
16807c478bd9Sstevel@tonic-gate 				 * made on 32 bit kernel.
16817c478bd9Sstevel@tonic-gate 				 * Treat each pointer as pointer to
16827c478bd9Sstevel@tonic-gate 				 * aiocb64_32
16837c478bd9Sstevel@tonic-gate 				 */
16847c478bd9Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16857c478bd9Sstevel@tonic-gate 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
16867c478bd9Sstevel@tonic-gate 					break;
16877c478bd9Sstevel@tonic-gate 			}
16887c478bd9Sstevel@tonic-gate 		}
16897c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16907c478bd9Sstevel@tonic-gate 		else {
16917c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
16927c478bd9Sstevel@tonic-gate 				if ((cbp64 = (aiocb64_32_t *)
16937c478bd9Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16947c478bd9Sstevel@tonic-gate 					continue;
16957c478bd9Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16967c478bd9Sstevel@tonic-gate 				    &cbp64->aio_resultp))
16977c478bd9Sstevel@tonic-gate 					break;
16987c478bd9Sstevel@tonic-gate 			} else if (run_mode == AIO_32) {
16997c478bd9Sstevel@tonic-gate 				if ((cbp32 = (aiocb32_t *)
17007c478bd9Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
17017c478bd9Sstevel@tonic-gate 					continue;
17027c478bd9Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
17037c478bd9Sstevel@tonic-gate 				    &cbp32->aio_resultp))
17047c478bd9Sstevel@tonic-gate 					break;
17057c478bd9Sstevel@tonic-gate 			}
17067c478bd9Sstevel@tonic-gate 		}
17077c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
17087c478bd9Sstevel@tonic-gate 	}
17097c478bd9Sstevel@tonic-gate 
17107c478bd9Sstevel@tonic-gate 	if (head == NULL) {
17117c478bd9Sstevel@tonic-gate 		error = EINVAL;
17127c478bd9Sstevel@tonic-gate 		goto done;
17137c478bd9Sstevel@tonic-gate 	}
17147c478bd9Sstevel@tonic-gate 
17157c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
17167c478bd9Sstevel@tonic-gate 	while (head->lio_refcnt > 0) {
17177c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
17187c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
17197c478bd9Sstevel@tonic-gate 			error = EINTR;
17207c478bd9Sstevel@tonic-gate 			goto done;
17217c478bd9Sstevel@tonic-gate 		}
17227c478bd9Sstevel@tonic-gate 	}
17237c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
17247c478bd9Sstevel@tonic-gate 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
17257c478bd9Sstevel@tonic-gate done:
17267c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
17277c478bd9Sstevel@tonic-gate 	return (error);
17287c478bd9Sstevel@tonic-gate }
17297c478bd9Sstevel@tonic-gate 
17307c478bd9Sstevel@tonic-gate aio_lio_t *
17317c478bd9Sstevel@tonic-gate aio_list_get(aio_result_t *resultp)
17327c478bd9Sstevel@tonic-gate {
17337c478bd9Sstevel@tonic-gate 	aio_lio_t	*head = NULL;
17347c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
17357c478bd9Sstevel@tonic-gate 	aio_req_t 	**bucket;
17367c478bd9Sstevel@tonic-gate 	aio_req_t 	*reqp;
17377c478bd9Sstevel@tonic-gate 	long		index;
17387c478bd9Sstevel@tonic-gate 
17397c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
17407c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
17417c478bd9Sstevel@tonic-gate 		return (NULL);
17427c478bd9Sstevel@tonic-gate 
17437c478bd9Sstevel@tonic-gate 	if (resultp) {
17447c478bd9Sstevel@tonic-gate 		index = AIO_HASH(resultp);
17457c478bd9Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
17467c478bd9Sstevel@tonic-gate 		for (reqp = *bucket; reqp != NULL;
17477c478bd9Sstevel@tonic-gate 		    reqp = reqp->aio_hash_next) {
17487c478bd9Sstevel@tonic-gate 			if (reqp->aio_req_resultp == resultp) {
17497c478bd9Sstevel@tonic-gate 				head = reqp->aio_req_lio;
17507c478bd9Sstevel@tonic-gate 				return (head);
17517c478bd9Sstevel@tonic-gate 			}
17527c478bd9Sstevel@tonic-gate 		}
17537c478bd9Sstevel@tonic-gate 	}
17547c478bd9Sstevel@tonic-gate 	return (NULL);
17557c478bd9Sstevel@tonic-gate }
17567c478bd9Sstevel@tonic-gate 
17577c478bd9Sstevel@tonic-gate 
17587c478bd9Sstevel@tonic-gate static void
17597c478bd9Sstevel@tonic-gate lio_set_uerror(void *resultp, int error)
17607c478bd9Sstevel@tonic-gate {
17617c478bd9Sstevel@tonic-gate 	/*
17627c478bd9Sstevel@tonic-gate 	 * the resultp field is a pointer to where the
17637c478bd9Sstevel@tonic-gate 	 * error should be written out to the user's
17647c478bd9Sstevel@tonic-gate 	 * aiocb.
17657c478bd9Sstevel@tonic-gate 	 *
17667c478bd9Sstevel@tonic-gate 	 */
17677c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
17687c478bd9Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
17697c478bd9Sstevel@tonic-gate 		    (ssize_t)-1);
17707c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
17717c478bd9Sstevel@tonic-gate 	}
17727c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17737c478bd9Sstevel@tonic-gate 	else {
17747c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
17757c478bd9Sstevel@tonic-gate 		    (uint_t)-1);
17767c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
17777c478bd9Sstevel@tonic-gate 	}
17787c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17797c478bd9Sstevel@tonic-gate }
17807c478bd9Sstevel@tonic-gate 
17817c478bd9Sstevel@tonic-gate /*
17827c478bd9Sstevel@tonic-gate  * do cleanup completion for all requests in list. memory for
17837c478bd9Sstevel@tonic-gate  * each request is also freed.
17847c478bd9Sstevel@tonic-gate  */
17857c478bd9Sstevel@tonic-gate static void
17867c478bd9Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
17877c478bd9Sstevel@tonic-gate {
17887c478bd9Sstevel@tonic-gate 	int i;
17897c478bd9Sstevel@tonic-gate 	aio_req_t *reqp;
17907c478bd9Sstevel@tonic-gate 	aio_result_t *resultp;
17917c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*aiocb_64;
17927c478bd9Sstevel@tonic-gate 
17937c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
17947c478bd9Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
17957c478bd9Sstevel@tonic-gate 			if (cbp[i] == NULL)
17967c478bd9Sstevel@tonic-gate 				continue;
17977c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
17987c478bd9Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)cbp[i];
17997c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_64->
18007c478bd9Sstevel@tonic-gate 				    aio_resultp;
18017c478bd9Sstevel@tonic-gate 			} else
18027c478bd9Sstevel@tonic-gate 				resultp = &cbp[i]->aio_resultp;
18037c478bd9Sstevel@tonic-gate 		}
18047c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18057c478bd9Sstevel@tonic-gate 		else {
18067c478bd9Sstevel@tonic-gate 			aiocb32_t	*aiocb_32;
18077c478bd9Sstevel@tonic-gate 			caddr32_t	*cbp32;
18087c478bd9Sstevel@tonic-gate 
18097c478bd9Sstevel@tonic-gate 			cbp32 = (caddr32_t *)cbp;
18107c478bd9Sstevel@tonic-gate 			if (cbp32[i] == NULL)
18117c478bd9Sstevel@tonic-gate 				continue;
18127c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_32) {
18137c478bd9Sstevel@tonic-gate 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
18147c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_32->
18157c478bd9Sstevel@tonic-gate 				    aio_resultp;
18167c478bd9Sstevel@tonic-gate 			} else if (run_mode == AIO_LARGEFILE) {
18177c478bd9Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
18187c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_64->
18197c478bd9Sstevel@tonic-gate 				    aio_resultp;
18207c478bd9Sstevel@tonic-gate 			}
18217c478bd9Sstevel@tonic-gate 		}
18227c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18237c478bd9Sstevel@tonic-gate 		/*
18247c478bd9Sstevel@tonic-gate 		 * we need to get the aio_cleanupq_mutex since we call
18257c478bd9Sstevel@tonic-gate 		 * aio_req_done().
18267c478bd9Sstevel@tonic-gate 		 */
18277c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_cleanupq_mutex);
18287c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
18297c478bd9Sstevel@tonic-gate 		reqp = aio_req_done(resultp);
18307c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
18317c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_cleanupq_mutex);
18327c478bd9Sstevel@tonic-gate 		if (reqp != NULL) {
18337c478bd9Sstevel@tonic-gate 			aphysio_unlock(reqp);
18347c478bd9Sstevel@tonic-gate 			aio_copyout_result(reqp);
18357c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
18367c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
18377c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
18387c478bd9Sstevel@tonic-gate 		}
18397c478bd9Sstevel@tonic-gate 	}
18407c478bd9Sstevel@tonic-gate }
18417c478bd9Sstevel@tonic-gate 
18427c478bd9Sstevel@tonic-gate /*
18437c478bd9Sstevel@tonic-gate  * write out the results for an aio request that is
18447c478bd9Sstevel@tonic-gate  * done.
18457c478bd9Sstevel@tonic-gate  */
18467c478bd9Sstevel@tonic-gate static int
18477c478bd9Sstevel@tonic-gate aioerror(void *cb, int run_mode)
18487c478bd9Sstevel@tonic-gate {
18497c478bd9Sstevel@tonic-gate 	aio_result_t *resultp;
18507c478bd9Sstevel@tonic-gate 	aio_t *aiop;
18517c478bd9Sstevel@tonic-gate 	aio_req_t *reqp;
18527c478bd9Sstevel@tonic-gate 	int retval;
18537c478bd9Sstevel@tonic-gate 
18547c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
18557c478bd9Sstevel@tonic-gate 	if (aiop == NULL || cb == NULL)
18567c478bd9Sstevel@tonic-gate 		return (EINVAL);
18577c478bd9Sstevel@tonic-gate 
18587c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
18597c478bd9Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18607c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18617c478bd9Sstevel@tonic-gate 			    aio_resultp;
18627c478bd9Sstevel@tonic-gate 		else
18637c478bd9Sstevel@tonic-gate 			resultp = &((aiocb_t *)cb)->aio_resultp;
18647c478bd9Sstevel@tonic-gate 	}
18657c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18667c478bd9Sstevel@tonic-gate 	else {
18677c478bd9Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18687c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18697c478bd9Sstevel@tonic-gate 			    aio_resultp;
18707c478bd9Sstevel@tonic-gate 		else if (run_mode == AIO_32)
18717c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
18727c478bd9Sstevel@tonic-gate 			    aio_resultp;
18737c478bd9Sstevel@tonic-gate 	}
18747c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18757c478bd9Sstevel@tonic-gate 	/*
18767c478bd9Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
18777c478bd9Sstevel@tonic-gate 	 * aio_req_find().
18787c478bd9Sstevel@tonic-gate 	 */
18797c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
18807c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18817c478bd9Sstevel@tonic-gate 	retval = aio_req_find(resultp, &reqp);
18827c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
18837c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
18847c478bd9Sstevel@tonic-gate 	if (retval == 0) {
18857c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
18867c478bd9Sstevel@tonic-gate 		aio_copyout_result(reqp);
18877c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
18887c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
18897c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
18907c478bd9Sstevel@tonic-gate 		return (0);
18917c478bd9Sstevel@tonic-gate 	} else if (retval == 1)
18927c478bd9Sstevel@tonic-gate 		return (EINPROGRESS);
18937c478bd9Sstevel@tonic-gate 	else if (retval == 2)
18947c478bd9Sstevel@tonic-gate 		return (EINVAL);
18957c478bd9Sstevel@tonic-gate 	return (0);
18967c478bd9Sstevel@tonic-gate }
18977c478bd9Sstevel@tonic-gate 
18987c478bd9Sstevel@tonic-gate /*
18997c478bd9Sstevel@tonic-gate  * 	aio_cancel - if no requests outstanding,
19007c478bd9Sstevel@tonic-gate  *			return AIO_ALLDONE
19017c478bd9Sstevel@tonic-gate  *			else
19027c478bd9Sstevel@tonic-gate  *			return AIO_NOTCANCELED
19037c478bd9Sstevel@tonic-gate  */
19047c478bd9Sstevel@tonic-gate static int
19057c478bd9Sstevel@tonic-gate aio_cancel(
19067c478bd9Sstevel@tonic-gate 	int	fildes,
19077c478bd9Sstevel@tonic-gate 	void 	*cb,
19087c478bd9Sstevel@tonic-gate 	long	*rval,
19097c478bd9Sstevel@tonic-gate 	int	run_mode)
19107c478bd9Sstevel@tonic-gate {
19117c478bd9Sstevel@tonic-gate 	aio_t *aiop;
19127c478bd9Sstevel@tonic-gate 	void *resultp;
19137c478bd9Sstevel@tonic-gate 	int index;
19147c478bd9Sstevel@tonic-gate 	aio_req_t **bucket;
19157c478bd9Sstevel@tonic-gate 	aio_req_t *ent;
19167c478bd9Sstevel@tonic-gate 
19177c478bd9Sstevel@tonic-gate 
19187c478bd9Sstevel@tonic-gate 	/*
19197c478bd9Sstevel@tonic-gate 	 * Verify valid file descriptor
19207c478bd9Sstevel@tonic-gate 	 */
19217c478bd9Sstevel@tonic-gate 	if ((getf(fildes)) == NULL) {
19227c478bd9Sstevel@tonic-gate 		return (EBADF);
19237c478bd9Sstevel@tonic-gate 	}
19247c478bd9Sstevel@tonic-gate 	releasef(fildes);
19257c478bd9Sstevel@tonic-gate 
19267c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
19277c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
19287c478bd9Sstevel@tonic-gate 		return (EINVAL);
19297c478bd9Sstevel@tonic-gate 
19307c478bd9Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0) {
19317c478bd9Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
19327c478bd9Sstevel@tonic-gate 		return (0);
19337c478bd9Sstevel@tonic-gate 	}
19347c478bd9Sstevel@tonic-gate 
19357c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
19367c478bd9Sstevel@tonic-gate 	if (cb != NULL) {
19377c478bd9Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
19387c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
19397c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
19407c478bd9Sstevel@tonic-gate 				    ->aio_resultp;
19417c478bd9Sstevel@tonic-gate 			else
19427c478bd9Sstevel@tonic-gate 				resultp = &((aiocb_t *)cb)->aio_resultp;
19437c478bd9Sstevel@tonic-gate 		}
19447c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
19457c478bd9Sstevel@tonic-gate 		else {
19467c478bd9Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
19477c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
19487c478bd9Sstevel@tonic-gate 				    ->aio_resultp;
19497c478bd9Sstevel@tonic-gate 			else if (run_mode == AIO_32)
19507c478bd9Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
19517c478bd9Sstevel@tonic-gate 				    ->aio_resultp;
19527c478bd9Sstevel@tonic-gate 		}
19537c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
19547c478bd9Sstevel@tonic-gate 		index = AIO_HASH(resultp);
19557c478bd9Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19567c478bd9Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19577c478bd9Sstevel@tonic-gate 			if (ent->aio_req_resultp == resultp) {
19587c478bd9Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
19597c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19607c478bd9Sstevel@tonic-gate 					*rval = AIO_ALLDONE;
19617c478bd9Sstevel@tonic-gate 					return (0);
19627c478bd9Sstevel@tonic-gate 				}
19637c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
19647c478bd9Sstevel@tonic-gate 				*rval = AIO_NOTCANCELED;
19657c478bd9Sstevel@tonic-gate 				return (0);
19667c478bd9Sstevel@tonic-gate 			}
19677c478bd9Sstevel@tonic-gate 		}
19687c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
19697c478bd9Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
19707c478bd9Sstevel@tonic-gate 		return (0);
19717c478bd9Sstevel@tonic-gate 	}
19727c478bd9Sstevel@tonic-gate 
19737c478bd9Sstevel@tonic-gate 	for (index = 0; index < AIO_HASHSZ; index++) {
19747c478bd9Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19757c478bd9Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19767c478bd9Sstevel@tonic-gate 			if (ent->aio_req_fd == fildes) {
19777c478bd9Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
19787c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19797c478bd9Sstevel@tonic-gate 					*rval = AIO_NOTCANCELED;
19807c478bd9Sstevel@tonic-gate 					return (0);
19817c478bd9Sstevel@tonic-gate 				}
19827c478bd9Sstevel@tonic-gate 			}
19837c478bd9Sstevel@tonic-gate 		}
19847c478bd9Sstevel@tonic-gate 	}
19857c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
19867c478bd9Sstevel@tonic-gate 	*rval = AIO_ALLDONE;
19877c478bd9Sstevel@tonic-gate 	return (0);
19887c478bd9Sstevel@tonic-gate }
19897c478bd9Sstevel@tonic-gate 
19907c478bd9Sstevel@tonic-gate /*
19917c478bd9Sstevel@tonic-gate  * solaris version of asynchronous read and write
19927c478bd9Sstevel@tonic-gate  */
19937c478bd9Sstevel@tonic-gate static int
19947c478bd9Sstevel@tonic-gate arw(
19957c478bd9Sstevel@tonic-gate 	int	opcode,
19967c478bd9Sstevel@tonic-gate 	int	fdes,
19977c478bd9Sstevel@tonic-gate 	char	*bufp,
19987c478bd9Sstevel@tonic-gate 	int	bufsize,
19997c478bd9Sstevel@tonic-gate 	offset_t	offset,
20007c478bd9Sstevel@tonic-gate 	aio_result_t	*resultp,
20017c478bd9Sstevel@tonic-gate 	int		mode)
20027c478bd9Sstevel@tonic-gate {
20037c478bd9Sstevel@tonic-gate 	file_t		*fp;
20047c478bd9Sstevel@tonic-gate 	int		error;
20057c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
20067c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
20077c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
20087c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
20097c478bd9Sstevel@tonic-gate #ifdef _LP64
20107c478bd9Sstevel@tonic-gate 	aiocb_t		aiocb;
20117c478bd9Sstevel@tonic-gate #else
20127c478bd9Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
20137c478bd9Sstevel@tonic-gate #endif
20147c478bd9Sstevel@tonic-gate 
20157c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
20167c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
20177c478bd9Sstevel@tonic-gate 		return (EINVAL);
20187c478bd9Sstevel@tonic-gate 
20197c478bd9Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL) {
20207c478bd9Sstevel@tonic-gate 		return (EBADF);
20217c478bd9Sstevel@tonic-gate 	}
20227c478bd9Sstevel@tonic-gate 
20237c478bd9Sstevel@tonic-gate 	/*
20247c478bd9Sstevel@tonic-gate 	 * check the permission of the partition
20257c478bd9Sstevel@tonic-gate 	 */
20267c478bd9Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
20277c478bd9Sstevel@tonic-gate 		releasef(fdes);
20287c478bd9Sstevel@tonic-gate 		return (EBADF);
20297c478bd9Sstevel@tonic-gate 	}
20307c478bd9Sstevel@tonic-gate 
20317c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
20327c478bd9Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
20337c478bd9Sstevel@tonic-gate 	if (aio_func == NULL) {
20347c478bd9Sstevel@tonic-gate 		releasef(fdes);
20357c478bd9Sstevel@tonic-gate 		return (EBADFD);
20367c478bd9Sstevel@tonic-gate 	}
20377c478bd9Sstevel@tonic-gate #ifdef _LP64
20387c478bd9Sstevel@tonic-gate 	aiocb.aio_fildes = fdes;
20397c478bd9Sstevel@tonic-gate 	aiocb.aio_buf = bufp;
20407c478bd9Sstevel@tonic-gate 	aiocb.aio_nbytes = bufsize;
20417c478bd9Sstevel@tonic-gate 	aiocb.aio_offset = offset;
20427c478bd9Sstevel@tonic-gate 	aiocb.aio_sigevent.sigev_notify = 0;
20437c478bd9Sstevel@tonic-gate 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, 0, vp);
20447c478bd9Sstevel@tonic-gate #else
20457c478bd9Sstevel@tonic-gate 	aiocb64.aio_fildes = fdes;
20467c478bd9Sstevel@tonic-gate 	aiocb64.aio_buf = (caddr32_t)bufp;
20477c478bd9Sstevel@tonic-gate 	aiocb64.aio_nbytes = bufsize;
20487c478bd9Sstevel@tonic-gate 	aiocb64.aio_offset = offset;
20497c478bd9Sstevel@tonic-gate 	aiocb64.aio_sigevent.sigev_notify = 0;
20507c478bd9Sstevel@tonic-gate 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, 0, vp);
20517c478bd9Sstevel@tonic-gate #endif
20527c478bd9Sstevel@tonic-gate 	if (error) {
20537c478bd9Sstevel@tonic-gate 		releasef(fdes);
20547c478bd9Sstevel@tonic-gate 		return (error);
20557c478bd9Sstevel@tonic-gate 	}
20567c478bd9Sstevel@tonic-gate 
20577c478bd9Sstevel@tonic-gate 	/*
20587c478bd9Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
20597c478bd9Sstevel@tonic-gate 	 * the AIO poll bit set
20607c478bd9Sstevel@tonic-gate 	 */
20617c478bd9Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
20627c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
20637c478bd9Sstevel@tonic-gate 
20647c478bd9Sstevel@tonic-gate 	if (bufsize == 0) {
20657c478bd9Sstevel@tonic-gate 		clear_active_fd(fdes);
20667c478bd9Sstevel@tonic-gate 		aio_zerolen(reqp);
20677c478bd9Sstevel@tonic-gate 		return (0);
20687c478bd9Sstevel@tonic-gate 	}
20697c478bd9Sstevel@tonic-gate 	/*
20707c478bd9Sstevel@tonic-gate 	 * send the request to driver.
20717c478bd9Sstevel@tonic-gate 	 * Clustering: If PXFS vnode, call PXFS function.
20727c478bd9Sstevel@tonic-gate 	 */
20737c478bd9Sstevel@tonic-gate 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
20747c478bd9Sstevel@tonic-gate 	/*
20757c478bd9Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
20767c478bd9Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
20777c478bd9Sstevel@tonic-gate 	 * completed.
20787c478bd9Sstevel@tonic-gate 	 */
20797c478bd9Sstevel@tonic-gate 	if (error) {
20807c478bd9Sstevel@tonic-gate 		releasef(fdes);
20817c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
20827c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
20837c478bd9Sstevel@tonic-gate 		aiop->aio_pending--;
20847c478bd9Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
20857c478bd9Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
20867c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
20877c478bd9Sstevel@tonic-gate 		return (error);
20887c478bd9Sstevel@tonic-gate 	}
20897c478bd9Sstevel@tonic-gate 	clear_active_fd(fdes);
20907c478bd9Sstevel@tonic-gate 	return (0);
20917c478bd9Sstevel@tonic-gate }
20927c478bd9Sstevel@tonic-gate 
20937c478bd9Sstevel@tonic-gate /*
20947c478bd9Sstevel@tonic-gate  * Take request out of the port pending queue ...
20957c478bd9Sstevel@tonic-gate  */
20967c478bd9Sstevel@tonic-gate 
20977c478bd9Sstevel@tonic-gate void
20987c478bd9Sstevel@tonic-gate aio_deq_port_pending(aio_t *aiop, aio_req_t *reqp)
20997c478bd9Sstevel@tonic-gate {
21007c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
21017c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_prev == NULL)
21027c478bd9Sstevel@tonic-gate 		/* first request */
21037c478bd9Sstevel@tonic-gate 		aiop->aio_portpending = reqp->aio_req_next;
21047c478bd9Sstevel@tonic-gate 	else
21057c478bd9Sstevel@tonic-gate 		reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
21067c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_next != NULL)
21077c478bd9Sstevel@tonic-gate 		reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
21087c478bd9Sstevel@tonic-gate }
21097c478bd9Sstevel@tonic-gate 
21107c478bd9Sstevel@tonic-gate /*
21117c478bd9Sstevel@tonic-gate  * posix version of asynchronous read and write
21127c478bd9Sstevel@tonic-gate  */
21137c478bd9Sstevel@tonic-gate static	int
21147c478bd9Sstevel@tonic-gate aiorw(
21157c478bd9Sstevel@tonic-gate 	int		opcode,
21167c478bd9Sstevel@tonic-gate 	void		*aiocb_arg,
21177c478bd9Sstevel@tonic-gate 	int		mode,
21187c478bd9Sstevel@tonic-gate 	int		run_mode)
21197c478bd9Sstevel@tonic-gate {
21207c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
21217c478bd9Sstevel@tonic-gate 	aiocb32_t	aiocb32;
21227c478bd9Sstevel@tonic-gate 	struct	sigevent32 *sigev32;
21237c478bd9Sstevel@tonic-gate 	port_notify32_t	pntfy32;
21247c478bd9Sstevel@tonic-gate #endif
21257c478bd9Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
21267c478bd9Sstevel@tonic-gate 	aiocb_t		aiocb;
21277c478bd9Sstevel@tonic-gate 	file_t		*fp;
21287c478bd9Sstevel@tonic-gate 	int		error, fd;
21297c478bd9Sstevel@tonic-gate 	size_t		bufsize;
21307c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
21317c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
21327c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
21337c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
21347c478bd9Sstevel@tonic-gate 	aio_result_t	*resultp;
21357c478bd9Sstevel@tonic-gate 	struct	sigevent *sigev;
21367c478bd9Sstevel@tonic-gate 	model_t		model;
21377c478bd9Sstevel@tonic-gate 	int		aio_use_port = 0;
21387c478bd9Sstevel@tonic-gate 	port_notify_t	pntfy;
21397c478bd9Sstevel@tonic-gate 
21407c478bd9Sstevel@tonic-gate 	model = get_udatamodel();
21417c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
21427c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
21437c478bd9Sstevel@tonic-gate 		return (EINVAL);
21447c478bd9Sstevel@tonic-gate 
21457c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
21467c478bd9Sstevel@tonic-gate 		if (run_mode != AIO_LARGEFILE) {
21477c478bd9Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
21487c478bd9Sstevel@tonic-gate 				return (EFAULT);
21497c478bd9Sstevel@tonic-gate 			bufsize = aiocb.aio_nbytes;
21507c478bd9Sstevel@tonic-gate 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
21517c478bd9Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
21527c478bd9Sstevel@tonic-gate 				return (EBADF);
21537c478bd9Sstevel@tonic-gate 			}
21547c478bd9Sstevel@tonic-gate 			sigev = &aiocb.aio_sigevent;
21557c478bd9Sstevel@tonic-gate 		} else {
21567c478bd9Sstevel@tonic-gate 			/*
21577c478bd9Sstevel@tonic-gate 			 * We come here only when we make largefile
21587c478bd9Sstevel@tonic-gate 			 * call on 32 bit kernel using 32 bit library.
21597c478bd9Sstevel@tonic-gate 			 */
21607c478bd9Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
21617c478bd9Sstevel@tonic-gate 				return (EFAULT);
21627c478bd9Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
21637c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
21647c478bd9Sstevel@tonic-gate 			    ->aio_resultp);
21657c478bd9Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) {
21667c478bd9Sstevel@tonic-gate 				return (EBADF);
21677c478bd9Sstevel@tonic-gate 			}
21687c478bd9Sstevel@tonic-gate 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
21697c478bd9Sstevel@tonic-gate 		}
21707c478bd9Sstevel@tonic-gate 
21717c478bd9Sstevel@tonic-gate 		if (sigev->sigev_notify == SIGEV_PORT) {
21727c478bd9Sstevel@tonic-gate 			if (copyin((void *)sigev->sigev_value.sival_ptr,
21737c478bd9Sstevel@tonic-gate 			    &pntfy, sizeof (port_notify_t))) {
21747c478bd9Sstevel@tonic-gate 				releasef(fd);
21757c478bd9Sstevel@tonic-gate 				return (EFAULT);
21767c478bd9Sstevel@tonic-gate 			}
21777c478bd9Sstevel@tonic-gate 			aio_use_port = 1;
21787c478bd9Sstevel@tonic-gate 		}
21797c478bd9Sstevel@tonic-gate 	}
21807c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
21817c478bd9Sstevel@tonic-gate 	else {
21827c478bd9Sstevel@tonic-gate 		if (run_mode == AIO_32) {
21837c478bd9Sstevel@tonic-gate 			/* 32 bit system call is being made on 64 bit kernel */
21847c478bd9Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
21857c478bd9Sstevel@tonic-gate 				return (EFAULT);
21867c478bd9Sstevel@tonic-gate 
21877c478bd9Sstevel@tonic-gate 			bufsize = aiocb32.aio_nbytes;
21887c478bd9Sstevel@tonic-gate 			aiocb_32ton(&aiocb32, &aiocb);
21897c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
21907c478bd9Sstevel@tonic-gate 			    aio_resultp);
21917c478bd9Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
21927c478bd9Sstevel@tonic-gate 				return (EBADF);
21937c478bd9Sstevel@tonic-gate 			}
21947c478bd9Sstevel@tonic-gate 			sigev32 = &aiocb32.aio_sigevent;
21957c478bd9Sstevel@tonic-gate 		} else if (run_mode == AIO_LARGEFILE) {
21967c478bd9Sstevel@tonic-gate 			/*
21977c478bd9Sstevel@tonic-gate 			 * We come here only when we make largefile
21987c478bd9Sstevel@tonic-gate 			 * call on 64 bit kernel using 32 bit library.
21997c478bd9Sstevel@tonic-gate 			 */
22007c478bd9Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
22017c478bd9Sstevel@tonic-gate 				return (EFAULT);
22027c478bd9Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
22037c478bd9Sstevel@tonic-gate 			aiocb_LFton(&aiocb64, &aiocb);
22047c478bd9Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
22057c478bd9Sstevel@tonic-gate 			    ->aio_resultp);
22067c478bd9Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
22077c478bd9Sstevel@tonic-gate 				return (EBADF);
22087c478bd9Sstevel@tonic-gate 			sigev32 = &aiocb64.aio_sigevent;
22097c478bd9Sstevel@tonic-gate 		}
22107c478bd9Sstevel@tonic-gate 
22117c478bd9Sstevel@tonic-gate 		if (sigev32->sigev_notify == SIGEV_PORT) {
22127c478bd9Sstevel@tonic-gate 			if (copyin(
22137c478bd9Sstevel@tonic-gate 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
22147c478bd9Sstevel@tonic-gate 			    &pntfy32, sizeof (port_notify32_t))) {
22157c478bd9Sstevel@tonic-gate 				releasef(fd);
22167c478bd9Sstevel@tonic-gate 				return (EFAULT);
22177c478bd9Sstevel@tonic-gate 			}
22187c478bd9Sstevel@tonic-gate 			pntfy.portnfy_port = pntfy32.portnfy_port;
22197c478bd9Sstevel@tonic-gate 			pntfy.portnfy_user =
22207c478bd9Sstevel@tonic-gate 			    (void *)(uintptr_t)pntfy32.portnfy_user;
22217c478bd9Sstevel@tonic-gate 			aio_use_port = 1;
22227c478bd9Sstevel@tonic-gate 		}
22237c478bd9Sstevel@tonic-gate 	}
22247c478bd9Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
22257c478bd9Sstevel@tonic-gate 
22267c478bd9Sstevel@tonic-gate 	/*
22277c478bd9Sstevel@tonic-gate 	 * check the permission of the partition
22287c478bd9Sstevel@tonic-gate 	 */
22297c478bd9Sstevel@tonic-gate 
22307c478bd9Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
22317c478bd9Sstevel@tonic-gate 		releasef(fd);
22327c478bd9Sstevel@tonic-gate 		return (EBADF);
22337c478bd9Sstevel@tonic-gate 	}
22347c478bd9Sstevel@tonic-gate 
22357c478bd9Sstevel@tonic-gate 	vp = fp->f_vnode;
22367c478bd9Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
22377c478bd9Sstevel@tonic-gate 	if (aio_func == NULL) {
22387c478bd9Sstevel@tonic-gate 		releasef(fd);
22397c478bd9Sstevel@tonic-gate 		return (EBADFD);
22407c478bd9Sstevel@tonic-gate 	}
22417c478bd9Sstevel@tonic-gate 	if ((model == DATAMODEL_NATIVE) && (run_mode == AIO_LARGEFILE))
22427c478bd9Sstevel@tonic-gate 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp,
22437c478bd9Sstevel@tonic-gate 		    aio_use_port, vp);
22447c478bd9Sstevel@tonic-gate 	else
22457c478bd9Sstevel@tonic-gate 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp,
22467c478bd9Sstevel@tonic-gate 		    aio_use_port, vp);
22477c478bd9Sstevel@tonic-gate 
22487c478bd9Sstevel@tonic-gate 	if (error) {
22497c478bd9Sstevel@tonic-gate 		releasef(fd);
22507c478bd9Sstevel@tonic-gate 		return (error);
22517c478bd9Sstevel@tonic-gate 	}
22527c478bd9Sstevel@tonic-gate 	/*
22537c478bd9Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
22547c478bd9Sstevel@tonic-gate 	 * the AIO poll bit set
22557c478bd9Sstevel@tonic-gate 	 */
22567c478bd9Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
22577c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
22587c478bd9Sstevel@tonic-gate 
22597c478bd9Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
22607c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = aiocb_arg;
22617c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
22627c478bd9Sstevel@tonic-gate 	else
22637c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
22647c478bd9Sstevel@tonic-gate #endif
22657c478bd9Sstevel@tonic-gate 
22667c478bd9Sstevel@tonic-gate 	if (aio_use_port)
22677c478bd9Sstevel@tonic-gate 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp);
22687c478bd9Sstevel@tonic-gate 
22697c478bd9Sstevel@tonic-gate 	/*
22707c478bd9Sstevel@tonic-gate 	 * send the request to driver.
22717c478bd9Sstevel@tonic-gate 	 * Clustering: If PXFS vnode, call PXFS function.
22727c478bd9Sstevel@tonic-gate 	 */
22737c478bd9Sstevel@tonic-gate 	if (error == 0) {
22747c478bd9Sstevel@tonic-gate 		if (bufsize == 0) {
22757c478bd9Sstevel@tonic-gate 			clear_active_fd(fd);
22767c478bd9Sstevel@tonic-gate 			aio_zerolen(reqp);
22777c478bd9Sstevel@tonic-gate 			return (0);
22787c478bd9Sstevel@tonic-gate 		}
22797c478bd9Sstevel@tonic-gate 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
22807c478bd9Sstevel@tonic-gate 	}
22817c478bd9Sstevel@tonic-gate 
22827c478bd9Sstevel@tonic-gate 	/*
22837c478bd9Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
22847c478bd9Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
22857c478bd9Sstevel@tonic-gate 	 * completed.
22867c478bd9Sstevel@tonic-gate 	 */
22877c478bd9Sstevel@tonic-gate 	if (error) {
22887c478bd9Sstevel@tonic-gate 		releasef(fd);
22897c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
22907c478bd9Sstevel@tonic-gate 		aio_deq_port_pending(aiop, reqp);
22917c478bd9Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
22927c478bd9Sstevel@tonic-gate 		aiop->aio_pending--;
22937c478bd9Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
22947c478bd9Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
22957c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
22967c478bd9Sstevel@tonic-gate 		return (error);
22977c478bd9Sstevel@tonic-gate 	}
22987c478bd9Sstevel@tonic-gate 	clear_active_fd(fd);
22997c478bd9Sstevel@tonic-gate 	return (0);
23007c478bd9Sstevel@tonic-gate }
23017c478bd9Sstevel@tonic-gate 
23027c478bd9Sstevel@tonic-gate 
23037c478bd9Sstevel@tonic-gate /*
23047c478bd9Sstevel@tonic-gate  * set error for a list IO entry that failed.
23057c478bd9Sstevel@tonic-gate  */
23067c478bd9Sstevel@tonic-gate static void
23077c478bd9Sstevel@tonic-gate lio_set_error(aio_req_t *reqp)
23087c478bd9Sstevel@tonic-gate {
23097c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23107c478bd9Sstevel@tonic-gate 
23117c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
23127c478bd9Sstevel@tonic-gate 		return;
23137c478bd9Sstevel@tonic-gate 
23147c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
23157c478bd9Sstevel@tonic-gate 	aio_deq_port_pending(aiop, reqp);
23167c478bd9Sstevel@tonic-gate 	aiop->aio_pending--;
23177c478bd9Sstevel@tonic-gate 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
23187c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PHYSIODONE;
23197c478bd9Sstevel@tonic-gate 	/*
23207c478bd9Sstevel@tonic-gate 	 * Need to free the request now as its never
23217c478bd9Sstevel@tonic-gate 	 * going to get on the done queue
23227c478bd9Sstevel@tonic-gate 	 *
23237c478bd9Sstevel@tonic-gate 	 * Note: aio_outstanding is decremented in
23247c478bd9Sstevel@tonic-gate 	 *	 aio_req_free()
23257c478bd9Sstevel@tonic-gate 	 */
23267c478bd9Sstevel@tonic-gate 	aio_req_free(aiop, reqp);
23277c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK)
23287c478bd9Sstevel@tonic-gate 		cv_signal(&aiop->aio_cleanupcv);
23297c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
23307c478bd9Sstevel@tonic-gate }
23317c478bd9Sstevel@tonic-gate 
23327c478bd9Sstevel@tonic-gate /*
23337c478bd9Sstevel@tonic-gate  * check if a specified request is done, and remove it from
23347c478bd9Sstevel@tonic-gate  * the done queue. otherwise remove anybody from the done queue
23357c478bd9Sstevel@tonic-gate  * if NULL is specified.
23367c478bd9Sstevel@tonic-gate  */
23377c478bd9Sstevel@tonic-gate static aio_req_t *
23387c478bd9Sstevel@tonic-gate aio_req_done(void *resultp)
23397c478bd9Sstevel@tonic-gate {
23407c478bd9Sstevel@tonic-gate 	aio_req_t **bucket;
23417c478bd9Sstevel@tonic-gate 	aio_req_t *ent;
23427c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23437c478bd9Sstevel@tonic-gate 	long index;
23447c478bd9Sstevel@tonic-gate 
23457c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
23467c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23477c478bd9Sstevel@tonic-gate 
23487c478bd9Sstevel@tonic-gate 	if (resultp) {
23497c478bd9Sstevel@tonic-gate 		index = AIO_HASH(resultp);
23507c478bd9Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
23517c478bd9Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
23527c478bd9Sstevel@tonic-gate 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
23537c478bd9Sstevel@tonic-gate 				if (ent->aio_req_flags & AIO_DONEQ) {
23547c478bd9Sstevel@tonic-gate 					return (aio_req_remove(ent));
23557c478bd9Sstevel@tonic-gate 				}
23567c478bd9Sstevel@tonic-gate 				return (NULL);
23577c478bd9Sstevel@tonic-gate 			}
23587c478bd9Sstevel@tonic-gate 		}
23597c478bd9Sstevel@tonic-gate 		/* no match, resultp is invalid */
23607c478bd9Sstevel@tonic-gate 		return (NULL);
23617c478bd9Sstevel@tonic-gate 	}
23627c478bd9Sstevel@tonic-gate 	return (aio_req_remove(NULL));
23637c478bd9Sstevel@tonic-gate }
23647c478bd9Sstevel@tonic-gate 
23657c478bd9Sstevel@tonic-gate /*
23667c478bd9Sstevel@tonic-gate  * determine if a user-level resultp pointer is associated with an
23677c478bd9Sstevel@tonic-gate  * active IO request. Zero is returned when the request is done,
23687c478bd9Sstevel@tonic-gate  * and the request is removed from the done queue. Only when the
23697c478bd9Sstevel@tonic-gate  * return value is zero, is the "reqp" pointer valid. One is returned
23707c478bd9Sstevel@tonic-gate  * when the request is inprogress. Two is returned when the request
23717c478bd9Sstevel@tonic-gate  * is invalid.
23727c478bd9Sstevel@tonic-gate  */
23737c478bd9Sstevel@tonic-gate static int
23747c478bd9Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
23757c478bd9Sstevel@tonic-gate {
23767c478bd9Sstevel@tonic-gate 	aio_req_t **bucket;
23777c478bd9Sstevel@tonic-gate 	aio_req_t *ent;
23787c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23797c478bd9Sstevel@tonic-gate 	long index;
23807c478bd9Sstevel@tonic-gate 
23817c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
23827c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23837c478bd9Sstevel@tonic-gate 
23847c478bd9Sstevel@tonic-gate 	index = AIO_HASH(resultp);
23857c478bd9Sstevel@tonic-gate 	bucket = &aiop->aio_hash[index];
23867c478bd9Sstevel@tonic-gate 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
23877c478bd9Sstevel@tonic-gate 		if (ent->aio_req_resultp == resultp) {
23887c478bd9Sstevel@tonic-gate 			if (ent->aio_req_flags & AIO_DONEQ) {
23897c478bd9Sstevel@tonic-gate 				*reqp = aio_req_remove(ent);
23907c478bd9Sstevel@tonic-gate 				return (0);
23917c478bd9Sstevel@tonic-gate 			}
23927c478bd9Sstevel@tonic-gate 			return (1);
23937c478bd9Sstevel@tonic-gate 		}
23947c478bd9Sstevel@tonic-gate 	}
23957c478bd9Sstevel@tonic-gate 	/* no match, resultp is invalid */
23967c478bd9Sstevel@tonic-gate 	return (2);
23977c478bd9Sstevel@tonic-gate }
23987c478bd9Sstevel@tonic-gate 
23997c478bd9Sstevel@tonic-gate /*
24007c478bd9Sstevel@tonic-gate  * remove a request from the done queue.
24017c478bd9Sstevel@tonic-gate  */
24027c478bd9Sstevel@tonic-gate static aio_req_t *
24037c478bd9Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp)
24047c478bd9Sstevel@tonic-gate {
24057c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
24067c478bd9Sstevel@tonic-gate 	aio_req_t *head;
24077c478bd9Sstevel@tonic-gate 
24087c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
24097c478bd9Sstevel@tonic-gate 
24107c478bd9Sstevel@tonic-gate 	if (reqp) {
24117c478bd9Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
24127c478bd9Sstevel@tonic-gate 		if (reqp->aio_req_next == reqp) {
24137c478bd9Sstevel@tonic-gate 			/* only one request on queue */
24147c478bd9Sstevel@tonic-gate 			if (reqp ==  aiop->aio_doneq) {
24157c478bd9Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
24167c478bd9Sstevel@tonic-gate 			} else {
24177c478bd9Sstevel@tonic-gate 				ASSERT(reqp == aiop->aio_cleanupq);
24187c478bd9Sstevel@tonic-gate 				aiop->aio_cleanupq = NULL;
24197c478bd9Sstevel@tonic-gate 			}
24207c478bd9Sstevel@tonic-gate 		} else {
24217c478bd9Sstevel@tonic-gate 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
24227c478bd9Sstevel@tonic-gate 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
24237c478bd9Sstevel@tonic-gate 			/*
24247c478bd9Sstevel@tonic-gate 			 * The request can be either on the aio_doneq or the
24257c478bd9Sstevel@tonic-gate 			 * aio_cleanupq
24267c478bd9Sstevel@tonic-gate 			 */
24277c478bd9Sstevel@tonic-gate 			if (reqp == aiop->aio_doneq)
24287c478bd9Sstevel@tonic-gate 				aiop->aio_doneq = reqp->aio_req_next;
24297c478bd9Sstevel@tonic-gate 
24307c478bd9Sstevel@tonic-gate 			if (reqp == aiop->aio_cleanupq)
24317c478bd9Sstevel@tonic-gate 				aiop->aio_cleanupq = reqp->aio_req_next;
24327c478bd9Sstevel@tonic-gate 		}
24337c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags &= ~AIO_DONEQ;
24347c478bd9Sstevel@tonic-gate 		return (reqp);
24357c478bd9Sstevel@tonic-gate 	}
24367c478bd9Sstevel@tonic-gate 
24377c478bd9Sstevel@tonic-gate 	if (aiop->aio_doneq) {
24387c478bd9Sstevel@tonic-gate 		head = aiop->aio_doneq;
24397c478bd9Sstevel@tonic-gate 		ASSERT(head->aio_req_flags & AIO_DONEQ);
24407c478bd9Sstevel@tonic-gate 		if (head == head->aio_req_next) {
24417c478bd9Sstevel@tonic-gate 			/* only one request on queue */
24427c478bd9Sstevel@tonic-gate 			aiop->aio_doneq = NULL;
24437c478bd9Sstevel@tonic-gate 		} else {
24447c478bd9Sstevel@tonic-gate 			head->aio_req_prev->aio_req_next = head->aio_req_next;
24457c478bd9Sstevel@tonic-gate 			head->aio_req_next->aio_req_prev = head->aio_req_prev;
24467c478bd9Sstevel@tonic-gate 			aiop->aio_doneq = head->aio_req_next;
24477c478bd9Sstevel@tonic-gate 		}
24487c478bd9Sstevel@tonic-gate 		head->aio_req_flags &= ~AIO_DONEQ;
24497c478bd9Sstevel@tonic-gate 		return (head);
24507c478bd9Sstevel@tonic-gate 	}
24517c478bd9Sstevel@tonic-gate 	return (NULL);
24527c478bd9Sstevel@tonic-gate }
24537c478bd9Sstevel@tonic-gate 
24547c478bd9Sstevel@tonic-gate static int
24557c478bd9Sstevel@tonic-gate aio_req_setup(
24567c478bd9Sstevel@tonic-gate 	aio_req_t	**reqpp,
24577c478bd9Sstevel@tonic-gate 	aio_t 		*aiop,
24587c478bd9Sstevel@tonic-gate 	aiocb_t 	*arg,
24597c478bd9Sstevel@tonic-gate 	aio_result_t 	*resultp,
24607c478bd9Sstevel@tonic-gate 	int		port,
24617c478bd9Sstevel@tonic-gate 	vnode_t		*vp)
24627c478bd9Sstevel@tonic-gate {
24637c478bd9Sstevel@tonic-gate 	aio_req_t 	*reqp;
24647c478bd9Sstevel@tonic-gate 	sigqueue_t	*sqp;
24657c478bd9Sstevel@tonic-gate 	struct uio 	*uio;
24667c478bd9Sstevel@tonic-gate 
24677c478bd9Sstevel@tonic-gate 	struct sigevent *sigev;
24687c478bd9Sstevel@tonic-gate 	int		error;
24697c478bd9Sstevel@tonic-gate 
24707c478bd9Sstevel@tonic-gate 	sigev = &arg->aio_sigevent;
24717c478bd9Sstevel@tonic-gate 	if ((sigev->sigev_notify == SIGEV_SIGNAL) &&
24727c478bd9Sstevel@tonic-gate 	    (sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG)) {
24737c478bd9Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
24747c478bd9Sstevel@tonic-gate 		if (sqp == NULL)
24757c478bd9Sstevel@tonic-gate 			return (EAGAIN);
24767c478bd9Sstevel@tonic-gate 		sqp->sq_func = NULL;
24777c478bd9Sstevel@tonic-gate 		sqp->sq_next = NULL;
24787c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
24797c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
24807c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
24817c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
24827c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
24837c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
24847c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_value = sigev->sigev_value;
24857c478bd9Sstevel@tonic-gate 	} else
24867c478bd9Sstevel@tonic-gate 		sqp = NULL;
24877c478bd9Sstevel@tonic-gate 
24887c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
24897c478bd9Sstevel@tonic-gate 
24907c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
24917c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24927c478bd9Sstevel@tonic-gate 		if (sqp)
24937c478bd9Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24947c478bd9Sstevel@tonic-gate 		return (EIO);
24957c478bd9Sstevel@tonic-gate 	}
24967c478bd9Sstevel@tonic-gate 	/*
24977c478bd9Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
24987c478bd9Sstevel@tonic-gate 	 * from dynamic memory.
24997c478bd9Sstevel@tonic-gate 	 */
25007c478bd9Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
25017c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
25027c478bd9Sstevel@tonic-gate 		if (sqp)
25037c478bd9Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
25047c478bd9Sstevel@tonic-gate 		return (error);
25057c478bd9Sstevel@tonic-gate 	}
25067c478bd9Sstevel@tonic-gate 	aiop->aio_pending++;
25077c478bd9Sstevel@tonic-gate 	aiop->aio_outstanding++;
25087c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
25097c478bd9Sstevel@tonic-gate 	if (port)
25107c478bd9Sstevel@tonic-gate 		aio_enq_port_pending(aiop, reqp);
25117c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
25127c478bd9Sstevel@tonic-gate 	/*
25137c478bd9Sstevel@tonic-gate 	 * initialize aio request.
25147c478bd9Sstevel@tonic-gate 	 */
25157c478bd9Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
25167c478bd9Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
25177c478bd9Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
25187c478bd9Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
25197c478bd9Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
25207c478bd9Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
25217c478bd9Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
25227c478bd9Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
25237c478bd9Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
25247c478bd9Sstevel@tonic-gate 	*reqpp = reqp;
25257c478bd9Sstevel@tonic-gate 	return (0);
25267c478bd9Sstevel@tonic-gate }
25277c478bd9Sstevel@tonic-gate 
25287c478bd9Sstevel@tonic-gate /*
25297c478bd9Sstevel@tonic-gate  * Allocate p_aio struct.
25307c478bd9Sstevel@tonic-gate  */
25317c478bd9Sstevel@tonic-gate static aio_t *
25327c478bd9Sstevel@tonic-gate aio_aiop_alloc(void)
25337c478bd9Sstevel@tonic-gate {
25347c478bd9Sstevel@tonic-gate 	aio_t	*aiop;
25357c478bd9Sstevel@tonic-gate 
25367c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&curproc->p_lock));
25377c478bd9Sstevel@tonic-gate 
25387c478bd9Sstevel@tonic-gate 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
25397c478bd9Sstevel@tonic-gate 	if (aiop) {
25407c478bd9Sstevel@tonic-gate 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
25417c478bd9Sstevel@tonic-gate 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
25427c478bd9Sstevel@tonic-gate 									NULL);
25437c478bd9Sstevel@tonic-gate 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
25447c478bd9Sstevel@tonic-gate 	}
25457c478bd9Sstevel@tonic-gate 	return (aiop);
25467c478bd9Sstevel@tonic-gate }
25477c478bd9Sstevel@tonic-gate 
25487c478bd9Sstevel@tonic-gate /*
25497c478bd9Sstevel@tonic-gate  * Allocate an aio_req struct.
25507c478bd9Sstevel@tonic-gate  */
25517c478bd9Sstevel@tonic-gate static int
25527c478bd9Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
25537c478bd9Sstevel@tonic-gate {
25547c478bd9Sstevel@tonic-gate 	aio_req_t *reqp;
25557c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
25567c478bd9Sstevel@tonic-gate 
25577c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
25587c478bd9Sstevel@tonic-gate 
25597c478bd9Sstevel@tonic-gate 	if ((reqp = aiop->aio_free) != NULL) {
25607c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags = 0;
25617c478bd9Sstevel@tonic-gate 		aiop->aio_free = reqp->aio_req_next;
25627c478bd9Sstevel@tonic-gate 		/*
25637c478bd9Sstevel@tonic-gate 		 * Clustering:This field has to be specifically
25647c478bd9Sstevel@tonic-gate 		 * set to null so that the right thing can be
25657c478bd9Sstevel@tonic-gate 		 * done in aphysio()
25667c478bd9Sstevel@tonic-gate 		 */
25677c478bd9Sstevel@tonic-gate 		reqp->aio_req_buf.b_iodone = NULL;
25687c478bd9Sstevel@tonic-gate 	} else {
25697c478bd9Sstevel@tonic-gate 		/*
25707c478bd9Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25717c478bd9Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25727c478bd9Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25737c478bd9Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25747c478bd9Sstevel@tonic-gate 		 */
25757c478bd9Sstevel@tonic-gate 		if (freemem < desfree)
25767c478bd9Sstevel@tonic-gate 			return (EAGAIN);
25777c478bd9Sstevel@tonic-gate 
25787c478bd9Sstevel@tonic-gate 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
25797c478bd9Sstevel@tonic-gate 		if (reqp == NULL)
25807c478bd9Sstevel@tonic-gate 			return (EAGAIN);
25817c478bd9Sstevel@tonic-gate 		reqp->aio_req.aio_uio = &(reqp->aio_req_uio);
25827c478bd9Sstevel@tonic-gate 		reqp->aio_req.aio_uio->uio_iov = &(reqp->aio_req_iov);
25837c478bd9Sstevel@tonic-gate 		reqp->aio_req.aio_private = reqp;
25847c478bd9Sstevel@tonic-gate 	}
25857c478bd9Sstevel@tonic-gate 
25867c478bd9Sstevel@tonic-gate 	reqp->aio_req_buf.b_offset = -1;
25877c478bd9Sstevel@tonic-gate 	reqp->aio_req_resultp = resultp;
25887c478bd9Sstevel@tonic-gate 	if (aio_hash_insert(reqp, aiop)) {
25897c478bd9Sstevel@tonic-gate 		reqp->aio_req_next = aiop->aio_free;
25907c478bd9Sstevel@tonic-gate 		aiop->aio_free = reqp;
25917c478bd9Sstevel@tonic-gate 		return (EINVAL);
25927c478bd9Sstevel@tonic-gate 	}
25937c478bd9Sstevel@tonic-gate 	*nreqp = reqp;
25947c478bd9Sstevel@tonic-gate 	return (0);
25957c478bd9Sstevel@tonic-gate }
25967c478bd9Sstevel@tonic-gate 
25977c478bd9Sstevel@tonic-gate /*
25987c478bd9Sstevel@tonic-gate  * Allocate an aio_lio_t struct.
25997c478bd9Sstevel@tonic-gate  */
26007c478bd9Sstevel@tonic-gate static int
26017c478bd9Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head)
26027c478bd9Sstevel@tonic-gate {
26037c478bd9Sstevel@tonic-gate 	aio_lio_t *liop;
26047c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
26057c478bd9Sstevel@tonic-gate 
26067c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
26077c478bd9Sstevel@tonic-gate 
26087c478bd9Sstevel@tonic-gate 	if ((liop = aiop->aio_lio_free) != NULL) {
26097c478bd9Sstevel@tonic-gate 		aiop->aio_lio_free = liop->lio_next;
26107c478bd9Sstevel@tonic-gate 	} else {
26117c478bd9Sstevel@tonic-gate 		/*
26127c478bd9Sstevel@tonic-gate 		 * Check whether memory is getting tight.
26137c478bd9Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
26147c478bd9Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
26157c478bd9Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
26167c478bd9Sstevel@tonic-gate 		 */
26177c478bd9Sstevel@tonic-gate 		if (freemem < desfree)
26187c478bd9Sstevel@tonic-gate 			return (EAGAIN);
26197c478bd9Sstevel@tonic-gate 
26207c478bd9Sstevel@tonic-gate 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
26217c478bd9Sstevel@tonic-gate 		if (liop == NULL)
26227c478bd9Sstevel@tonic-gate 			return (EAGAIN);
26237c478bd9Sstevel@tonic-gate 	}
26247c478bd9Sstevel@tonic-gate 	*head = liop;
26257c478bd9Sstevel@tonic-gate 	return (0);
26267c478bd9Sstevel@tonic-gate }
26277c478bd9Sstevel@tonic-gate 
26287c478bd9Sstevel@tonic-gate /*
26297c478bd9Sstevel@tonic-gate  * this is a special per-process thread that is only activated if
26307c478bd9Sstevel@tonic-gate  * the process is unmapping a segment with outstanding aio. normally,
26317c478bd9Sstevel@tonic-gate  * the process will have completed the aio before unmapping the
26327c478bd9Sstevel@tonic-gate  * segment. If the process does unmap a segment with outstanding aio,
26337c478bd9Sstevel@tonic-gate  * this special thread will guarentee that the locked pages due to
26347c478bd9Sstevel@tonic-gate  * aphysio() are released, thereby permitting the segment to be
2635*b0b27ce6Spraks  * unmapped. In addition to this, the cleanup thread is woken up
2636*b0b27ce6Spraks  * during DR operations to release the locked pages.
26377c478bd9Sstevel@tonic-gate  */
26387c478bd9Sstevel@tonic-gate 
26397c478bd9Sstevel@tonic-gate static int
26407c478bd9Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop)
26417c478bd9Sstevel@tonic-gate {
26427c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
26437c478bd9Sstevel@tonic-gate 	struct as *as = p->p_as;
26447c478bd9Sstevel@tonic-gate 	int poked = 0;
26457c478bd9Sstevel@tonic-gate 	kcondvar_t *cvp;
26467c478bd9Sstevel@tonic-gate 	int exit_flag = 0;
2647*b0b27ce6Spraks 	int rqclnup = 0;
26487c478bd9Sstevel@tonic-gate 
26497c478bd9Sstevel@tonic-gate 	sigfillset(&curthread->t_hold);
26507c478bd9Sstevel@tonic-gate 	sigdiffset(&curthread->t_hold, &cantmask);
26517c478bd9Sstevel@tonic-gate 	for (;;) {
26527c478bd9Sstevel@tonic-gate 		/*
26537c478bd9Sstevel@tonic-gate 		 * if a segment is being unmapped, and the current
26547c478bd9Sstevel@tonic-gate 		 * process's done queue is not empty, then every request
26557c478bd9Sstevel@tonic-gate 		 * on the doneq with locked resources should be forced
26567c478bd9Sstevel@tonic-gate 		 * to release their locks. By moving the doneq request
26577c478bd9Sstevel@tonic-gate 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
26587c478bd9Sstevel@tonic-gate 		 * and place requests back onto the doneq. All requests
26597c478bd9Sstevel@tonic-gate 		 * processed by aio_cleanup() will have their physical
26607c478bd9Sstevel@tonic-gate 		 * resources unlocked.
26617c478bd9Sstevel@tonic-gate 		 */
26627c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
26637c478bd9Sstevel@tonic-gate 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
26647c478bd9Sstevel@tonic-gate 			aiop->aio_flags |= AIO_CLEANUP;
26657c478bd9Sstevel@tonic-gate 			mutex_enter(&as->a_contents);
2666*b0b27ce6Spraks 			if (aiop->aio_rqclnup) {
2667*b0b27ce6Spraks 				aiop->aio_rqclnup = 0;
2668*b0b27ce6Spraks 				rqclnup = 1;
2669*b0b27ce6Spraks 			}
2670*b0b27ce6Spraks 
2671*b0b27ce6Spraks 			if ((rqclnup || AS_ISUNMAPWAIT(as)) &&
2672*b0b27ce6Spraks 					aiop->aio_doneq) {
26737c478bd9Sstevel@tonic-gate 				aio_req_t *doneqhead = aiop->aio_doneq;
26747c478bd9Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26757c478bd9Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
26767c478bd9Sstevel@tonic-gate 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
26777c478bd9Sstevel@tonic-gate 			} else {
26787c478bd9Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26797c478bd9Sstevel@tonic-gate 			}
26807c478bd9Sstevel@tonic-gate 		}
26817c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26827c478bd9Sstevel@tonic-gate 		aio_cleanup(AIO_CLEANUP_THREAD);
26837c478bd9Sstevel@tonic-gate 		/*
26847c478bd9Sstevel@tonic-gate 		 * thread should block on the cleanupcv while
26857c478bd9Sstevel@tonic-gate 		 * AIO_CLEANUP is set.
26867c478bd9Sstevel@tonic-gate 		 */
26877c478bd9Sstevel@tonic-gate 		cvp = &aiop->aio_cleanupcv;
26887c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
26897c478bd9Sstevel@tonic-gate 
26907c478bd9Sstevel@tonic-gate 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
26917c478bd9Sstevel@tonic-gate 		    aiop->aio_notifyq != NULL ||
26927c478bd9Sstevel@tonic-gate 		    aiop->aio_portcleanupq != NULL) {
26937c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
26947c478bd9Sstevel@tonic-gate 			continue;
26957c478bd9Sstevel@tonic-gate 		}
26967c478bd9Sstevel@tonic-gate 		mutex_enter(&as->a_contents);
26977c478bd9Sstevel@tonic-gate 
26987c478bd9Sstevel@tonic-gate 		/*
26997c478bd9Sstevel@tonic-gate 		 * AIO_CLEANUP determines when the cleanup thread
2700*b0b27ce6Spraks 		 * should be active. This flag is set when
2701*b0b27ce6Spraks 		 * the cleanup thread is awakened by as_unmap() or
2702*b0b27ce6Spraks 		 * due to DR operations.
27037c478bd9Sstevel@tonic-gate 		 * The flag is cleared when the blocking as_unmap()
27047c478bd9Sstevel@tonic-gate 		 * that originally awakened us is allowed to
27057c478bd9Sstevel@tonic-gate 		 * complete. as_unmap() blocks when trying to
27067c478bd9Sstevel@tonic-gate 		 * unmap a segment that has SOFTLOCKed pages. when
27077c478bd9Sstevel@tonic-gate 		 * the segment's pages are all SOFTUNLOCKed,
2708*b0b27ce6Spraks 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2709*b0b27ce6Spraks 		 *
2710*b0b27ce6Spraks 		 * In case of cleanup request by DR, the flag is cleared
2711*b0b27ce6Spraks 		 * once all the pending aio requests have been processed.
2712*b0b27ce6Spraks 		 *
2713*b0b27ce6Spraks 		 * The flag shouldn't be cleared right away if the
2714*b0b27ce6Spraks 		 * cleanup thread was interrupted because the process
2715*b0b27ce6Spraks 		 * is doing forkall(). This happens when cv_wait_sig()
2716*b0b27ce6Spraks 		 * returns zero, because it was awakened by a pokelwps().
2717*b0b27ce6Spraks 		 * If the process is not exiting, it must be doing forkall().
27187c478bd9Sstevel@tonic-gate 		 */
27197c478bd9Sstevel@tonic-gate 		if ((poked == 0) &&
2720*b0b27ce6Spraks 			((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2721*b0b27ce6Spraks 					(aiop->aio_pending == 0))) {
27227c478bd9Sstevel@tonic-gate 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
27237c478bd9Sstevel@tonic-gate 			cvp = &as->a_cv;
2724*b0b27ce6Spraks 			rqclnup = 0;
27257c478bd9Sstevel@tonic-gate 		}
27267c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
27277c478bd9Sstevel@tonic-gate 		if (poked) {
27287c478bd9Sstevel@tonic-gate 			/*
27297c478bd9Sstevel@tonic-gate 			 * If the process is exiting/killed, don't return
27307c478bd9Sstevel@tonic-gate 			 * immediately without waiting for pending I/O's
27317c478bd9Sstevel@tonic-gate 			 * and releasing the page locks.
27327c478bd9Sstevel@tonic-gate 			 */
27337c478bd9Sstevel@tonic-gate 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
27347c478bd9Sstevel@tonic-gate 				/*
27357c478bd9Sstevel@tonic-gate 				 * If exit_flag is set, then it is
27367c478bd9Sstevel@tonic-gate 				 * safe to exit because we have released
27377c478bd9Sstevel@tonic-gate 				 * page locks of completed I/O's.
27387c478bd9Sstevel@tonic-gate 				 */
27397c478bd9Sstevel@tonic-gate 				if (exit_flag)
27407c478bd9Sstevel@tonic-gate 					break;
27417c478bd9Sstevel@tonic-gate 
27427c478bd9Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
27437c478bd9Sstevel@tonic-gate 
27447c478bd9Sstevel@tonic-gate 				/*
27457c478bd9Sstevel@tonic-gate 				 * Wait for all the pending aio to complete.
27467c478bd9Sstevel@tonic-gate 				 */
27477c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
27487c478bd9Sstevel@tonic-gate 				aiop->aio_flags |= AIO_REQ_BLOCK;
27497c478bd9Sstevel@tonic-gate 				while (aiop->aio_pending != 0)
27507c478bd9Sstevel@tonic-gate 					cv_wait(&aiop->aio_cleanupcv,
27517c478bd9Sstevel@tonic-gate 						&aiop->aio_mutex);
27527c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
27537c478bd9Sstevel@tonic-gate 				exit_flag = 1;
27547c478bd9Sstevel@tonic-gate 				continue;
27557c478bd9Sstevel@tonic-gate 			} else if (p->p_flag &
27567c478bd9Sstevel@tonic-gate 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
27577c478bd9Sstevel@tonic-gate 				/*
27587c478bd9Sstevel@tonic-gate 				 * hold LWP until it
27597c478bd9Sstevel@tonic-gate 				 * is continued.
27607c478bd9Sstevel@tonic-gate 				 */
27617c478bd9Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
27627c478bd9Sstevel@tonic-gate 				mutex_enter(&p->p_lock);
27637c478bd9Sstevel@tonic-gate 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
27647c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
27657c478bd9Sstevel@tonic-gate 				poked = 0;
27667c478bd9Sstevel@tonic-gate 				continue;
27677c478bd9Sstevel@tonic-gate 			}
27687c478bd9Sstevel@tonic-gate 		} else {
27697c478bd9Sstevel@tonic-gate 			/*
27707c478bd9Sstevel@tonic-gate 			 * When started this thread will sleep on as->a_cv.
27717c478bd9Sstevel@tonic-gate 			 * as_unmap will awake this thread if the
27727c478bd9Sstevel@tonic-gate 			 * segment has SOFTLOCKed pages (poked = 0).
27737c478bd9Sstevel@tonic-gate 			 * 1. pokelwps() awakes this thread =>
27747c478bd9Sstevel@tonic-gate 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
27757c478bd9Sstevel@tonic-gate 			 * 2. as_unmap awakes this thread =>
27767c478bd9Sstevel@tonic-gate 			 *    to break the loop it is necessary that
27777c478bd9Sstevel@tonic-gate 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
27787c478bd9Sstevel@tonic-gate 			 *	memory to be unlocked)
27797c478bd9Sstevel@tonic-gate 			 *    - AIO_CLEANUP is not set
27807c478bd9Sstevel@tonic-gate 			 *	(if AIO_CLEANUP is set we have to wait for
27817c478bd9Sstevel@tonic-gate 			 *	pending requests. aio_done will send a signal
27827c478bd9Sstevel@tonic-gate 			 *	for every request which completes to continue
27837c478bd9Sstevel@tonic-gate 			 *	unmapping the corresponding address range)
2784*b0b27ce6Spraks 			 * 3. A cleanup request will wake this thread up, ex.
2785*b0b27ce6Spraks 			 *    by the DR operations. The aio_rqclnup flag will
2786*b0b27ce6Spraks 			 *    be set.
27877c478bd9Sstevel@tonic-gate 			 */
27887c478bd9Sstevel@tonic-gate 			while (poked == 0) {
2789*b0b27ce6Spraks 				/*
2790*b0b27ce6Spraks 				 * we need to handle cleanup requests
2791*b0b27ce6Spraks 				 * that come in after we had just cleaned up,
2792*b0b27ce6Spraks 				 * so that we do cleanup of any new aio
2793*b0b27ce6Spraks 				 * requests that got completed and have
2794*b0b27ce6Spraks 				 * locked resources.
2795*b0b27ce6Spraks 				 */
2796*b0b27ce6Spraks 				if ((aiop->aio_rqclnup ||
2797*b0b27ce6Spraks 					(AS_ISUNMAPWAIT(as) != 0)) &&
2798*b0b27ce6Spraks 					(aiop->aio_flags & AIO_CLEANUP) == 0)
27997c478bd9Sstevel@tonic-gate 					break;
28007c478bd9Sstevel@tonic-gate 				poked = !cv_wait_sig(cvp, &as->a_contents);
28017c478bd9Sstevel@tonic-gate 				if (AS_ISUNMAPWAIT(as) == 0)
28027c478bd9Sstevel@tonic-gate 					cv_signal(cvp);
28037c478bd9Sstevel@tonic-gate 				if (aiop->aio_outstanding != 0)
28047c478bd9Sstevel@tonic-gate 					break;
28057c478bd9Sstevel@tonic-gate 			}
28067c478bd9Sstevel@tonic-gate 		}
28077c478bd9Sstevel@tonic-gate 		mutex_exit(&as->a_contents);
28087c478bd9Sstevel@tonic-gate 	}
28097c478bd9Sstevel@tonic-gate exit:
28107c478bd9Sstevel@tonic-gate 	mutex_exit(&as->a_contents);
28117c478bd9Sstevel@tonic-gate 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
28127c478bd9Sstevel@tonic-gate 	aston(curthread);	/* make thread do post_syscall */
28137c478bd9Sstevel@tonic-gate 	return (0);
28147c478bd9Sstevel@tonic-gate }
28157c478bd9Sstevel@tonic-gate 
28167c478bd9Sstevel@tonic-gate /*
28177c478bd9Sstevel@tonic-gate  * save a reference to a user's outstanding aio in a hash list.
28187c478bd9Sstevel@tonic-gate  */
28197c478bd9Sstevel@tonic-gate static int
28207c478bd9Sstevel@tonic-gate aio_hash_insert(
28217c478bd9Sstevel@tonic-gate 	aio_req_t *aio_reqp,
28227c478bd9Sstevel@tonic-gate 	aio_t *aiop)
28237c478bd9Sstevel@tonic-gate {
28247c478bd9Sstevel@tonic-gate 	long index;
28257c478bd9Sstevel@tonic-gate 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
28267c478bd9Sstevel@tonic-gate 	aio_req_t *current;
28277c478bd9Sstevel@tonic-gate 	aio_req_t **nextp;
28287c478bd9Sstevel@tonic-gate 
28297c478bd9Sstevel@tonic-gate 	index = AIO_HASH(resultp);
28307c478bd9Sstevel@tonic-gate 	nextp = &aiop->aio_hash[index];
28317c478bd9Sstevel@tonic-gate 	while ((current = *nextp) != NULL) {
28327c478bd9Sstevel@tonic-gate 		if (current->aio_req_resultp == resultp)
28337c478bd9Sstevel@tonic-gate 			return (DUPLICATE);
28347c478bd9Sstevel@tonic-gate 		nextp = &current->aio_hash_next;
28357c478bd9Sstevel@tonic-gate 	}
28367c478bd9Sstevel@tonic-gate 	*nextp = aio_reqp;
28377c478bd9Sstevel@tonic-gate 	aio_reqp->aio_hash_next = NULL;
28387c478bd9Sstevel@tonic-gate 	return (0);
28397c478bd9Sstevel@tonic-gate }
28407c478bd9Sstevel@tonic-gate 
28417c478bd9Sstevel@tonic-gate static int
28427c478bd9Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
28437c478bd9Sstevel@tonic-gate     cred_t *)
28447c478bd9Sstevel@tonic-gate {
28457c478bd9Sstevel@tonic-gate 	struct snode *sp;
28467c478bd9Sstevel@tonic-gate 	dev_t		dev;
28477c478bd9Sstevel@tonic-gate 	struct cb_ops  	*cb;
28487c478bd9Sstevel@tonic-gate 	major_t		major;
28497c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
28507c478bd9Sstevel@tonic-gate 
28517c478bd9Sstevel@tonic-gate 	dev = vp->v_rdev;
28527c478bd9Sstevel@tonic-gate 	major = getmajor(dev);
28537c478bd9Sstevel@tonic-gate 
28547c478bd9Sstevel@tonic-gate 	/*
28557c478bd9Sstevel@tonic-gate 	 * return NULL for requests to files and STREAMs so
28567c478bd9Sstevel@tonic-gate 	 * that libaio takes care of them.
28577c478bd9Sstevel@tonic-gate 	 */
28587c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
28597c478bd9Sstevel@tonic-gate 		/* no stream device for kaio */
28607c478bd9Sstevel@tonic-gate 		if (STREAMSTAB(major)) {
28617c478bd9Sstevel@tonic-gate 			return (NULL);
28627c478bd9Sstevel@tonic-gate 		}
28637c478bd9Sstevel@tonic-gate 	} else {
28647c478bd9Sstevel@tonic-gate 		return (NULL);
28657c478bd9Sstevel@tonic-gate 	}
28667c478bd9Sstevel@tonic-gate 
28677c478bd9Sstevel@tonic-gate 	/*
28687c478bd9Sstevel@tonic-gate 	 * Check old drivers which do not have async I/O entry points.
28697c478bd9Sstevel@tonic-gate 	 */
28707c478bd9Sstevel@tonic-gate 	if (devopsp[major]->devo_rev < 3)
28717c478bd9Sstevel@tonic-gate 		return (NULL);
28727c478bd9Sstevel@tonic-gate 
28737c478bd9Sstevel@tonic-gate 	cb = devopsp[major]->devo_cb_ops;
28747c478bd9Sstevel@tonic-gate 
28757c478bd9Sstevel@tonic-gate 	if (cb->cb_rev < 1)
28767c478bd9Sstevel@tonic-gate 		return (NULL);
28777c478bd9Sstevel@tonic-gate 
28787c478bd9Sstevel@tonic-gate 	/*
28797c478bd9Sstevel@tonic-gate 	 * Check whether this device is a block device.
28807c478bd9Sstevel@tonic-gate 	 * Kaio is not supported for devices like tty.
28817c478bd9Sstevel@tonic-gate 	 */
28827c478bd9Sstevel@tonic-gate 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
28837c478bd9Sstevel@tonic-gate 		return (NULL);
28847c478bd9Sstevel@tonic-gate 
28857c478bd9Sstevel@tonic-gate 	/*
28867c478bd9Sstevel@tonic-gate 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
28877c478bd9Sstevel@tonic-gate 	 * We cannot call the driver directly. Instead return the
28887c478bd9Sstevel@tonic-gate 	 * PXFS functions.
28897c478bd9Sstevel@tonic-gate 	 */
28907c478bd9Sstevel@tonic-gate 
28917c478bd9Sstevel@tonic-gate 	if (IS_PXFSVP(vp)) {
28927c478bd9Sstevel@tonic-gate 		if (mode & FREAD)
28937c478bd9Sstevel@tonic-gate 			return (clpxfs_aio_read);
28947c478bd9Sstevel@tonic-gate 		else
28957c478bd9Sstevel@tonic-gate 			return (clpxfs_aio_write);
28967c478bd9Sstevel@tonic-gate 	}
28977c478bd9Sstevel@tonic-gate 	if (mode & FREAD)
28987c478bd9Sstevel@tonic-gate 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
28997c478bd9Sstevel@tonic-gate 	else
29007c478bd9Sstevel@tonic-gate 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
29017c478bd9Sstevel@tonic-gate 
29027c478bd9Sstevel@tonic-gate 	/*
29037c478bd9Sstevel@tonic-gate 	 * Do we need this ?
29047c478bd9Sstevel@tonic-gate 	 * nodev returns ENXIO anyway.
29057c478bd9Sstevel@tonic-gate 	 */
29067c478bd9Sstevel@tonic-gate 	if (aio_func == nodev)
29077c478bd9Sstevel@tonic-gate 		return (NULL);
29087c478bd9Sstevel@tonic-gate 
29097c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
29107c478bd9Sstevel@tonic-gate 	smark(sp, SACC);
29117c478bd9Sstevel@tonic-gate 	return (aio_func);
29127c478bd9Sstevel@tonic-gate }
29137c478bd9Sstevel@tonic-gate 
29147c478bd9Sstevel@tonic-gate /*
29157c478bd9Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
29167c478bd9Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
29177c478bd9Sstevel@tonic-gate  * We define this intermediate function that will do the right
29187c478bd9Sstevel@tonic-gate  * thing for driver cases.
29197c478bd9Sstevel@tonic-gate  */
29207c478bd9Sstevel@tonic-gate 
29217c478bd9Sstevel@tonic-gate static int
29227c478bd9Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
29237c478bd9Sstevel@tonic-gate {
29247c478bd9Sstevel@tonic-gate 	dev_t dev;
29257c478bd9Sstevel@tonic-gate 	struct cb_ops  	*cb;
29267c478bd9Sstevel@tonic-gate 
29277c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
29287c478bd9Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
29297c478bd9Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
29307c478bd9Sstevel@tonic-gate 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
29317c478bd9Sstevel@tonic-gate 
29327c478bd9Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
29337c478bd9Sstevel@tonic-gate 
29347c478bd9Sstevel@tonic-gate 	ASSERT(cb->cb_awrite != nodev);
29357c478bd9Sstevel@tonic-gate 	return ((*cb->cb_awrite)(dev, aio, cred_p));
29367c478bd9Sstevel@tonic-gate }
29377c478bd9Sstevel@tonic-gate 
29387c478bd9Sstevel@tonic-gate /*
29397c478bd9Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
29407c478bd9Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
29417c478bd9Sstevel@tonic-gate  * We define this intermediate function that will do the right
29427c478bd9Sstevel@tonic-gate  * thing for driver cases.
29437c478bd9Sstevel@tonic-gate  */
29447c478bd9Sstevel@tonic-gate 
29457c478bd9Sstevel@tonic-gate static int
29467c478bd9Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
29477c478bd9Sstevel@tonic-gate {
29487c478bd9Sstevel@tonic-gate 	dev_t dev;
29497c478bd9Sstevel@tonic-gate 	struct cb_ops  	*cb;
29507c478bd9Sstevel@tonic-gate 
29517c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
29527c478bd9Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
29537c478bd9Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
29547c478bd9Sstevel@tonic-gate 	ASSERT(!STREAMSTAB(getmajor(dev)));
29557c478bd9Sstevel@tonic-gate 
29567c478bd9Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
29577c478bd9Sstevel@tonic-gate 
29587c478bd9Sstevel@tonic-gate 	ASSERT(cb->cb_aread != nodev);
29597c478bd9Sstevel@tonic-gate 	return ((*cb->cb_aread)(dev, aio, cred_p));
29607c478bd9Sstevel@tonic-gate }
29617c478bd9Sstevel@tonic-gate 
29627c478bd9Sstevel@tonic-gate /*
29637c478bd9Sstevel@tonic-gate  * This routine is called when a largefile call is made by a 32bit
29647c478bd9Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel. All 64bit processes are large
29657c478bd9Sstevel@tonic-gate  * file by definition and will call alio() instead.
29667c478bd9Sstevel@tonic-gate  */
29677c478bd9Sstevel@tonic-gate static int
29687c478bd9Sstevel@tonic-gate alioLF(
29697c478bd9Sstevel@tonic-gate 	int		mode_arg,
29707c478bd9Sstevel@tonic-gate 	void		*aiocb_arg,
29717c478bd9Sstevel@tonic-gate 	int		nent,
29727c478bd9Sstevel@tonic-gate 	void		*sigev)
29737c478bd9Sstevel@tonic-gate {
29747c478bd9Sstevel@tonic-gate 	file_t		*fp;
29757c478bd9Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
29767c478bd9Sstevel@tonic-gate 	int		prev_mode = -1;
29777c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
29787c478bd9Sstevel@tonic-gate 	aio_lio_t	*head;
29797c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
29807c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
29817c478bd9Sstevel@tonic-gate 	caddr_t		cbplist;
29827c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*cbp;
29837c478bd9Sstevel@tonic-gate 	caddr32_t	*ucbp;
29847c478bd9Sstevel@tonic-gate 	aiocb64_32_t	cb64;
29857c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*aiocb = &cb64;
29867c478bd9Sstevel@tonic-gate #ifdef _LP64
29877c478bd9Sstevel@tonic-gate 	aiocb_t		aiocb_n;
29887c478bd9Sstevel@tonic-gate #endif
29897c478bd9Sstevel@tonic-gate 	struct sigevent32	sigevk;
29907c478bd9Sstevel@tonic-gate 	sigqueue_t	*sqp;
29917c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
29927c478bd9Sstevel@tonic-gate 	int		mode;
29937c478bd9Sstevel@tonic-gate 	int		error = 0, aio_errors = 0;
29947c478bd9Sstevel@tonic-gate 	int		i;
29957c478bd9Sstevel@tonic-gate 	size_t		ssize;
29967c478bd9Sstevel@tonic-gate 	int		deadhead = 0;
29977c478bd9Sstevel@tonic-gate 	int		aio_notsupported = 0;
29987c478bd9Sstevel@tonic-gate 	int		aio_use_port = 0;
29997c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
30007c478bd9Sstevel@tonic-gate 	port_notify32_t	pnotify;
30017c478bd9Sstevel@tonic-gate 
30027c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
30037c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
30047c478bd9Sstevel@tonic-gate 		return (EINVAL);
30057c478bd9Sstevel@tonic-gate 
30067c478bd9Sstevel@tonic-gate 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
30077c478bd9Sstevel@tonic-gate 
30087c478bd9Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
30097c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
30107c478bd9Sstevel@tonic-gate 	ucbp = (caddr32_t *)cbplist;
30117c478bd9Sstevel@tonic-gate 
30127c478bd9Sstevel@tonic-gate 	if (copyin(aiocb_arg, cbplist, ssize)) {
30137c478bd9Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
30147c478bd9Sstevel@tonic-gate 		return (EFAULT);
30157c478bd9Sstevel@tonic-gate 	}
30167c478bd9Sstevel@tonic-gate 
30177c478bd9Sstevel@tonic-gate 	if (sigev) {
30187c478bd9Sstevel@tonic-gate 		if (copyin(sigev, &sigevk, sizeof (sigevk))) {
30197c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
30207c478bd9Sstevel@tonic-gate 			return (EFAULT);
30217c478bd9Sstevel@tonic-gate 		}
30227c478bd9Sstevel@tonic-gate 	}
30237c478bd9Sstevel@tonic-gate 
30247c478bd9Sstevel@tonic-gate 	/*
30257c478bd9Sstevel@tonic-gate 	 * a list head should be allocated if notification is
30267c478bd9Sstevel@tonic-gate 	 * enabled for this list.
30277c478bd9Sstevel@tonic-gate 	 */
30287c478bd9Sstevel@tonic-gate 	head = NULL;
30297c478bd9Sstevel@tonic-gate 
30307c478bd9Sstevel@tonic-gate 	/* Event Ports  */
30317c478bd9Sstevel@tonic-gate 
30327c478bd9Sstevel@tonic-gate 	if (sigev && sigevk.sigev_notify == SIGEV_PORT) {
30337c478bd9Sstevel@tonic-gate 		/* Use PORT for completion notification */
30347c478bd9Sstevel@tonic-gate 		if (copyin((void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
30357c478bd9Sstevel@tonic-gate 		    &pnotify, sizeof (port_notify32_t))) {
30367c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
30377c478bd9Sstevel@tonic-gate 			return (EFAULT);
30387c478bd9Sstevel@tonic-gate 		}
30397c478bd9Sstevel@tonic-gate 		/* use event ports for the list of aiocbs */
30407c478bd9Sstevel@tonic-gate 		aio_use_port = 1;
30417c478bd9Sstevel@tonic-gate 		error = port_alloc_event(pnotify.portnfy_port,
30427c478bd9Sstevel@tonic-gate 		    PORT_ALLOC_PRIVATE, PORT_SOURCE_AIO, &pkevtp);
30437c478bd9Sstevel@tonic-gate 		if (error) {
30447c478bd9Sstevel@tonic-gate 			if (error == ENOMEM)
30457c478bd9Sstevel@tonic-gate 				error = EAGAIN;
30467c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
30477c478bd9Sstevel@tonic-gate 			return (error);
30487c478bd9Sstevel@tonic-gate 		}
30497c478bd9Sstevel@tonic-gate 	} else if ((mode_arg == LIO_WAIT) || sigev) {
30507c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
30517c478bd9Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
30527c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
30537c478bd9Sstevel@tonic-gate 		if (error)
30547c478bd9Sstevel@tonic-gate 			goto done;
30557c478bd9Sstevel@tonic-gate 		deadhead = 1;
30567c478bd9Sstevel@tonic-gate 		head->lio_nent = nent;
30577c478bd9Sstevel@tonic-gate 		head->lio_refcnt = nent;
30587c478bd9Sstevel@tonic-gate 		if (sigev && (sigevk.sigev_notify == SIGEV_SIGNAL) &&
30597c478bd9Sstevel@tonic-gate 		    (sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG)) {
30607c478bd9Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
30617c478bd9Sstevel@tonic-gate 			if (sqp == NULL) {
30627c478bd9Sstevel@tonic-gate 				error = EAGAIN;
30637c478bd9Sstevel@tonic-gate 				goto done;
30647c478bd9Sstevel@tonic-gate 			}
30657c478bd9Sstevel@tonic-gate 			sqp->sq_func = NULL;
30667c478bd9Sstevel@tonic-gate 			sqp->sq_next = NULL;
30677c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
30687c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
30697c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
30707c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
30717c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
30727c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
30737c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
30747c478bd9Sstevel@tonic-gate 			    sigevk.sigev_value.sival_int;
30757c478bd9Sstevel@tonic-gate 			head->lio_sigqp = sqp;
30767c478bd9Sstevel@tonic-gate 		} else {
30777c478bd9Sstevel@tonic-gate 			head->lio_sigqp = NULL;
30787c478bd9Sstevel@tonic-gate 		}
30797c478bd9Sstevel@tonic-gate 	}
30807c478bd9Sstevel@tonic-gate 
30817c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
30827c478bd9Sstevel@tonic-gate 
30837c478bd9Sstevel@tonic-gate 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
30847c478bd9Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
30857c478bd9Sstevel@tonic-gate 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (aiocb64_32_t))) {
30867c478bd9Sstevel@tonic-gate 			if (head) {
30877c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30887c478bd9Sstevel@tonic-gate 				head->lio_nent--;
30897c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
30907c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30917c478bd9Sstevel@tonic-gate 			}
30927c478bd9Sstevel@tonic-gate 			continue;
30937c478bd9Sstevel@tonic-gate 		}
30947c478bd9Sstevel@tonic-gate 
30957c478bd9Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
30967c478bd9Sstevel@tonic-gate 
30977c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
30987c478bd9Sstevel@tonic-gate 		if (mode == LIO_NOP) {
30997c478bd9Sstevel@tonic-gate 			cbp = NULL;
31007c478bd9Sstevel@tonic-gate 			if (head) {
31017c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31027c478bd9Sstevel@tonic-gate 				head->lio_nent--;
31037c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
31047c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31057c478bd9Sstevel@tonic-gate 			}
31067c478bd9Sstevel@tonic-gate 			continue;
31077c478bd9Sstevel@tonic-gate 		}
31087c478bd9Sstevel@tonic-gate 
31097c478bd9Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
31107c478bd9Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
31117c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
31127c478bd9Sstevel@tonic-gate 			if (head) {
31137c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31147c478bd9Sstevel@tonic-gate 				head->lio_nent--;
31157c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
31167c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31177c478bd9Sstevel@tonic-gate 			}
31187c478bd9Sstevel@tonic-gate 			aio_errors++;
31197c478bd9Sstevel@tonic-gate 			continue;
31207c478bd9Sstevel@tonic-gate 		}
31217c478bd9Sstevel@tonic-gate 
31227c478bd9Sstevel@tonic-gate 		vp = fp->f_vnode;
31237c478bd9Sstevel@tonic-gate 
31247c478bd9Sstevel@tonic-gate 		/*
31257c478bd9Sstevel@tonic-gate 		 * check the permission of the partition
31267c478bd9Sstevel@tonic-gate 		 */
31277c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
31287c478bd9Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
31297c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
31307c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
31317c478bd9Sstevel@tonic-gate 			if (head) {
31327c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31337c478bd9Sstevel@tonic-gate 				head->lio_nent--;
31347c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
31357c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31367c478bd9Sstevel@tonic-gate 			}
31377c478bd9Sstevel@tonic-gate 			aio_errors++;
31387c478bd9Sstevel@tonic-gate 			continue;
31397c478bd9Sstevel@tonic-gate 		}
31407c478bd9Sstevel@tonic-gate 
31417c478bd9Sstevel@tonic-gate 		/*
31427c478bd9Sstevel@tonic-gate 		 * common case where requests are to the same fd
31437c478bd9Sstevel@tonic-gate 		 * for the same r/w operation
31447c478bd9Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
31457c478bd9Sstevel@tonic-gate 		 */
31467c478bd9Sstevel@tonic-gate 		if ((fp != prev_fp) || (mode != prev_mode)) {
31477c478bd9Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
31487c478bd9Sstevel@tonic-gate 			if (aio_func == NULL) {
31497c478bd9Sstevel@tonic-gate 				prev_fp = NULL;
31507c478bd9Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
31517c478bd9Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
31527c478bd9Sstevel@tonic-gate 				aio_notsupported++;
31537c478bd9Sstevel@tonic-gate 				if (head) {
31547c478bd9Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
31557c478bd9Sstevel@tonic-gate 					head->lio_nent--;
31567c478bd9Sstevel@tonic-gate 					head->lio_refcnt--;
31577c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
31587c478bd9Sstevel@tonic-gate 				}
31597c478bd9Sstevel@tonic-gate 				continue;
31607c478bd9Sstevel@tonic-gate 			} else {
31617c478bd9Sstevel@tonic-gate 				prev_fp = fp;
31627c478bd9Sstevel@tonic-gate 				prev_mode = mode;
31637c478bd9Sstevel@tonic-gate 			}
31647c478bd9Sstevel@tonic-gate 		}
31657c478bd9Sstevel@tonic-gate #ifdef	_LP64
31667c478bd9Sstevel@tonic-gate 		aiocb_LFton(aiocb, &aiocb_n);
31677c478bd9Sstevel@tonic-gate 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
31687c478bd9Sstevel@tonic-gate 		    (aio_result_t *)&cbp->aio_resultp, aio_use_port, vp);
31697c478bd9Sstevel@tonic-gate #else
31707c478bd9Sstevel@tonic-gate 		error = aio_req_setupLF(&reqp, aiop, aiocb,
31717c478bd9Sstevel@tonic-gate 		    (aio_result_t *)&cbp->aio_resultp, aio_use_port, vp);
31727c478bd9Sstevel@tonic-gate #endif  /* _LP64 */
31737c478bd9Sstevel@tonic-gate 		if (error) {
31747c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
31757c478bd9Sstevel@tonic-gate 			if (head) {
31767c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31777c478bd9Sstevel@tonic-gate 				head->lio_nent--;
31787c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
31797c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31807c478bd9Sstevel@tonic-gate 			}
31817c478bd9Sstevel@tonic-gate 			aio_errors++;
31827c478bd9Sstevel@tonic-gate 			continue;
31837c478bd9Sstevel@tonic-gate 		}
31847c478bd9Sstevel@tonic-gate 
31857c478bd9Sstevel@tonic-gate 		reqp->aio_req_lio = head;
31867c478bd9Sstevel@tonic-gate 		deadhead = 0;
31877c478bd9Sstevel@tonic-gate 
31887c478bd9Sstevel@tonic-gate 		/*
31897c478bd9Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
31907c478bd9Sstevel@tonic-gate 		 * the driver to avoid a race condition
31917c478bd9Sstevel@tonic-gate 		 */
31927c478bd9Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
31937c478bd9Sstevel@tonic-gate 		    EINPROGRESS);
31947c478bd9Sstevel@tonic-gate 
31957c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = *ucbp;
31967c478bd9Sstevel@tonic-gate 
31977c478bd9Sstevel@tonic-gate 		if (aio_use_port) {
31987c478bd9Sstevel@tonic-gate 			reqp->aio_req_port = pnotify.portnfy_port;
31997c478bd9Sstevel@tonic-gate 			error = aio_req_assoc_port32(&aiocb->aio_sigevent,
32007c478bd9Sstevel@tonic-gate 			    (void *)(uintptr_t)pnotify.portnfy_user,
32017c478bd9Sstevel@tonic-gate 			    (aiocb_t *)(uintptr_t)*ucbp, reqp, pkevtp);
32027c478bd9Sstevel@tonic-gate 		}
32037c478bd9Sstevel@tonic-gate 
32047c478bd9Sstevel@tonic-gate 		/*
32057c478bd9Sstevel@tonic-gate 		 * send the request to driver.
32067c478bd9Sstevel@tonic-gate 		 * Clustering: If PXFS vnode, call PXFS function.
32077c478bd9Sstevel@tonic-gate 		 */
32087c478bd9Sstevel@tonic-gate 		if (error == 0) {
32097c478bd9Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
32107c478bd9Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
32117c478bd9Sstevel@tonic-gate 				aio_zerolen(reqp);
32127c478bd9Sstevel@tonic-gate 				continue;
32137c478bd9Sstevel@tonic-gate 			}
32147c478bd9Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
32157c478bd9Sstevel@tonic-gate 			    CRED());
32167c478bd9Sstevel@tonic-gate 		}
32177c478bd9Sstevel@tonic-gate 
32187c478bd9Sstevel@tonic-gate 		/*
32197c478bd9Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
32207c478bd9Sstevel@tonic-gate 		 * completed unless there was an error.
32217c478bd9Sstevel@tonic-gate 		 */
32227c478bd9Sstevel@tonic-gate 		if (error) {
32237c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
32247c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
32257c478bd9Sstevel@tonic-gate 			if (head) {
32267c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
32277c478bd9Sstevel@tonic-gate 				head->lio_nent--;
32287c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
32297c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32307c478bd9Sstevel@tonic-gate 			}
32317c478bd9Sstevel@tonic-gate 			if (error == ENOTSUP)
32327c478bd9Sstevel@tonic-gate 				aio_notsupported++;
32337c478bd9Sstevel@tonic-gate 			else
32347c478bd9Sstevel@tonic-gate 				aio_errors++;
32357c478bd9Sstevel@tonic-gate 			lio_set_error(reqp);
32367c478bd9Sstevel@tonic-gate 		} else {
32377c478bd9Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
32387c478bd9Sstevel@tonic-gate 		}
32397c478bd9Sstevel@tonic-gate 	}
32407c478bd9Sstevel@tonic-gate 
32417c478bd9Sstevel@tonic-gate 	if (pkevtp)
32427c478bd9Sstevel@tonic-gate 		port_free_event(pkevtp);
32437c478bd9Sstevel@tonic-gate 
32447c478bd9Sstevel@tonic-gate 	if (aio_notsupported) {
32457c478bd9Sstevel@tonic-gate 		error = ENOTSUP;
32467c478bd9Sstevel@tonic-gate 	} else if (aio_errors) {
32477c478bd9Sstevel@tonic-gate 		/*
32487c478bd9Sstevel@tonic-gate 		 * return EIO if any request failed
32497c478bd9Sstevel@tonic-gate 		 */
32507c478bd9Sstevel@tonic-gate 		error = EIO;
32517c478bd9Sstevel@tonic-gate 	}
32527c478bd9Sstevel@tonic-gate 
32537c478bd9Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
32547c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
32557c478bd9Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
32567c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
32577c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32587c478bd9Sstevel@tonic-gate 				error = EINTR;
32597c478bd9Sstevel@tonic-gate 				goto done;
32607c478bd9Sstevel@tonic-gate 			}
32617c478bd9Sstevel@tonic-gate 		}
32627c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
32637c478bd9Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
32647c478bd9Sstevel@tonic-gate 	}
32657c478bd9Sstevel@tonic-gate 
32667c478bd9Sstevel@tonic-gate done:
32677c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
32687c478bd9Sstevel@tonic-gate 	if (deadhead) {
32697c478bd9Sstevel@tonic-gate 		if (head->lio_sigqp)
32707c478bd9Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
32717c478bd9Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
32727c478bd9Sstevel@tonic-gate 	}
32737c478bd9Sstevel@tonic-gate 	return (error);
32747c478bd9Sstevel@tonic-gate }
32757c478bd9Sstevel@tonic-gate 
32767c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
32777c478bd9Sstevel@tonic-gate static void
32787c478bd9Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
32797c478bd9Sstevel@tonic-gate {
32807c478bd9Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
32817c478bd9Sstevel@tonic-gate 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
32827c478bd9Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
32837c478bd9Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
32847c478bd9Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
32857c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
32867c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
32877c478bd9Sstevel@tonic-gate 
32887c478bd9Sstevel@tonic-gate 	/*
32897c478bd9Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
32907c478bd9Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
32917c478bd9Sstevel@tonic-gate 	 */
32927c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
32937c478bd9Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
32947c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
32957c478bd9Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
32967c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
32977c478bd9Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
32987c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
32997c478bd9Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
33007c478bd9Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
33017c478bd9Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
33027c478bd9Sstevel@tonic-gate }
33037c478bd9Sstevel@tonic-gate #endif
33047c478bd9Sstevel@tonic-gate 
33057c478bd9Sstevel@tonic-gate /*
33067c478bd9Sstevel@tonic-gate  * This function is used only for largefile calls made by
33077c478bd9Sstevel@tonic-gate  * 32 bit applications on 32 bit kernel.
33087c478bd9Sstevel@tonic-gate  */
33097c478bd9Sstevel@tonic-gate static int
33107c478bd9Sstevel@tonic-gate aio_req_setupLF(
33117c478bd9Sstevel@tonic-gate 	aio_req_t	**reqpp,
33127c478bd9Sstevel@tonic-gate 	aio_t		*aiop,
33137c478bd9Sstevel@tonic-gate 	aiocb64_32_t	*arg,
33147c478bd9Sstevel@tonic-gate 	aio_result_t	*resultp,
33157c478bd9Sstevel@tonic-gate 	int		port,
33167c478bd9Sstevel@tonic-gate 	vnode_t		*vp)
33177c478bd9Sstevel@tonic-gate {
33187c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
33197c478bd9Sstevel@tonic-gate 	sigqueue_t	*sqp;
33207c478bd9Sstevel@tonic-gate 	struct	uio	*uio;
33217c478bd9Sstevel@tonic-gate 
33227c478bd9Sstevel@tonic-gate 	struct	sigevent *sigev;
33237c478bd9Sstevel@tonic-gate 	int 		error;
33247c478bd9Sstevel@tonic-gate 
33257c478bd9Sstevel@tonic-gate 	sigev = (struct	sigevent *)&arg->aio_sigevent;
33267c478bd9Sstevel@tonic-gate 	if ((sigev->sigev_notify == SIGEV_SIGNAL) &&
33277c478bd9Sstevel@tonic-gate 	    (sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG)) {
33287c478bd9Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
33297c478bd9Sstevel@tonic-gate 		if (sqp == NULL)
33307c478bd9Sstevel@tonic-gate 			return (EAGAIN);
33317c478bd9Sstevel@tonic-gate 		sqp->sq_func = NULL;
33327c478bd9Sstevel@tonic-gate 		sqp->sq_next = NULL;
33337c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
33347c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
33357c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
33367c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
33377c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
33387c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
33397c478bd9Sstevel@tonic-gate 		sqp->sq_info.si_value = sigev->sigev_value;
33407c478bd9Sstevel@tonic-gate 	} else
33417c478bd9Sstevel@tonic-gate 		sqp = NULL;
33427c478bd9Sstevel@tonic-gate 
33437c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
33447c478bd9Sstevel@tonic-gate 
33457c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
33467c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33477c478bd9Sstevel@tonic-gate 		if (sqp)
33487c478bd9Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33497c478bd9Sstevel@tonic-gate 		return (EIO);
33507c478bd9Sstevel@tonic-gate 	}
33517c478bd9Sstevel@tonic-gate 	/*
33527c478bd9Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
33537c478bd9Sstevel@tonic-gate 	 * from dynamic memory.
33547c478bd9Sstevel@tonic-gate 	 */
33557c478bd9Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
33567c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33577c478bd9Sstevel@tonic-gate 		if (sqp)
33587c478bd9Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33597c478bd9Sstevel@tonic-gate 		return (error);
33607c478bd9Sstevel@tonic-gate 	}
33617c478bd9Sstevel@tonic-gate 	aiop->aio_pending++;
33627c478bd9Sstevel@tonic-gate 	aiop->aio_outstanding++;
33637c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
33647c478bd9Sstevel@tonic-gate 	if (port)
33657c478bd9Sstevel@tonic-gate 		aio_enq_port_pending(aiop, reqp);
33667c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
33677c478bd9Sstevel@tonic-gate 	/*
33687c478bd9Sstevel@tonic-gate 	 * initialize aio request.
33697c478bd9Sstevel@tonic-gate 	 */
33707c478bd9Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
33717c478bd9Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
33727c478bd9Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
33737c478bd9Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
33747c478bd9Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
33757c478bd9Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
33767c478bd9Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
33777c478bd9Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
33787c478bd9Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
33797c478bd9Sstevel@tonic-gate 	*reqpp = reqp;
33807c478bd9Sstevel@tonic-gate 	return (0);
33817c478bd9Sstevel@tonic-gate }
33827c478bd9Sstevel@tonic-gate 
33837c478bd9Sstevel@tonic-gate /*
33847c478bd9Sstevel@tonic-gate  * This routine is called when a non largefile call is made by a 32bit
33857c478bd9Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel.
33867c478bd9Sstevel@tonic-gate  */
33877c478bd9Sstevel@tonic-gate static int
33887c478bd9Sstevel@tonic-gate alio32(
33897c478bd9Sstevel@tonic-gate 	int		mode_arg,
33907c478bd9Sstevel@tonic-gate 	void		*aiocb_arg,
33917c478bd9Sstevel@tonic-gate 	int		nent,
33927c478bd9Sstevel@tonic-gate 	void		*sigev_arg)
33937c478bd9Sstevel@tonic-gate {
33947c478bd9Sstevel@tonic-gate 	file_t		*fp;
33957c478bd9Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
33967c478bd9Sstevel@tonic-gate 	int		prev_mode = -1;
33977c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
33987c478bd9Sstevel@tonic-gate 	aio_lio_t	*head;
33997c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
34007c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
34017c478bd9Sstevel@tonic-gate 	aiocb_t		cb;
34027c478bd9Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
34037c478bd9Sstevel@tonic-gate 	caddr_t		cbplist;
34047c478bd9Sstevel@tonic-gate #ifdef	_LP64
34057c478bd9Sstevel@tonic-gate 	aiocb32_t	*cbp;
34067c478bd9Sstevel@tonic-gate 	caddr32_t	*ucbp;
34077c478bd9Sstevel@tonic-gate 	aiocb32_t	cb32;
34087c478bd9Sstevel@tonic-gate 	aiocb32_t	*aiocb32 = &cb32;
34097c478bd9Sstevel@tonic-gate 	struct sigevent32	sigev;
34107c478bd9Sstevel@tonic-gate #else
34117c478bd9Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
34127c478bd9Sstevel@tonic-gate 	struct sigevent	sigev;
34137c478bd9Sstevel@tonic-gate #endif
34147c478bd9Sstevel@tonic-gate 	sigqueue_t	*sqp;
34157c478bd9Sstevel@tonic-gate 	int		(*aio_func)();
34167c478bd9Sstevel@tonic-gate 	int		mode;
34177c478bd9Sstevel@tonic-gate 	int		error = 0, aio_errors = 0;
34187c478bd9Sstevel@tonic-gate 	int		i;
34197c478bd9Sstevel@tonic-gate 	size_t		ssize;
34207c478bd9Sstevel@tonic-gate 	int		deadhead = 0;
34217c478bd9Sstevel@tonic-gate 	int		aio_notsupported = 0;
34227c478bd9Sstevel@tonic-gate 	int		aio_use_port = 0;
34237c478bd9Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
34247c478bd9Sstevel@tonic-gate #ifdef	_LP64
34257c478bd9Sstevel@tonic-gate 	port_notify32_t	pnotify;
34267c478bd9Sstevel@tonic-gate #else
34277c478bd9Sstevel@tonic-gate 	port_notify_t	pnotify;
34287c478bd9Sstevel@tonic-gate #endif
34297c478bd9Sstevel@tonic-gate 	aiop = curproc->p_aio;
34307c478bd9Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
34317c478bd9Sstevel@tonic-gate 		return (EINVAL);
34327c478bd9Sstevel@tonic-gate 
34337c478bd9Sstevel@tonic-gate #ifdef	_LP64
34347c478bd9Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
34357c478bd9Sstevel@tonic-gate #else
34367c478bd9Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
34377c478bd9Sstevel@tonic-gate #endif
34387c478bd9Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
34397c478bd9Sstevel@tonic-gate 	ucbp = (void *)cbplist;
34407c478bd9Sstevel@tonic-gate 
34417c478bd9Sstevel@tonic-gate 	if (copyin(aiocb_arg, cbplist, ssize)) {
34427c478bd9Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
34437c478bd9Sstevel@tonic-gate 		return (EFAULT);
34447c478bd9Sstevel@tonic-gate 	}
34457c478bd9Sstevel@tonic-gate 
34467c478bd9Sstevel@tonic-gate 	if (sigev_arg) {
34477c478bd9Sstevel@tonic-gate 		if (copyin(sigev_arg, &sigev, sizeof (struct sigevent32))) {
34487c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
34497c478bd9Sstevel@tonic-gate 			return (EFAULT);
34507c478bd9Sstevel@tonic-gate 		}
34517c478bd9Sstevel@tonic-gate 	}
34527c478bd9Sstevel@tonic-gate 
34537c478bd9Sstevel@tonic-gate 	/*
34547c478bd9Sstevel@tonic-gate 	 * a list head should be allocated if notification is
34557c478bd9Sstevel@tonic-gate 	 * enabled for this list.
34567c478bd9Sstevel@tonic-gate 	 */
34577c478bd9Sstevel@tonic-gate 	head = NULL;
34587c478bd9Sstevel@tonic-gate 
34597c478bd9Sstevel@tonic-gate 	/* Event Ports  */
34607c478bd9Sstevel@tonic-gate 
34617c478bd9Sstevel@tonic-gate 	if (sigev_arg && sigev.sigev_notify == SIGEV_PORT) {
34627c478bd9Sstevel@tonic-gate 		/* Use PORT for completion notification */
34637c478bd9Sstevel@tonic-gate 		if (copyin((void *)(uintptr_t)sigev.sigev_value.sival_ptr,
34647c478bd9Sstevel@tonic-gate 		    &pnotify, sizeof (port_notify32_t))) {
34657c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
34667c478bd9Sstevel@tonic-gate 			return (EFAULT);
34677c478bd9Sstevel@tonic-gate 		}
34687c478bd9Sstevel@tonic-gate 		/* use event ports for the list of aiocbs */
34697c478bd9Sstevel@tonic-gate 		aio_use_port = 1;
34707c478bd9Sstevel@tonic-gate 		error = port_alloc_event(pnotify.portnfy_port,
34717c478bd9Sstevel@tonic-gate 		    PORT_ALLOC_PRIVATE, PORT_SOURCE_AIO, &pkevtp);
34727c478bd9Sstevel@tonic-gate 		if (error) {
34737c478bd9Sstevel@tonic-gate 			if ((error == ENOMEM) || (error == EAGAIN))
34747c478bd9Sstevel@tonic-gate 				error = EAGAIN;
34757c478bd9Sstevel@tonic-gate 			else
34767c478bd9Sstevel@tonic-gate 				error = EINVAL;
34777c478bd9Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
34787c478bd9Sstevel@tonic-gate 			return (error);
34797c478bd9Sstevel@tonic-gate 		}
34807c478bd9Sstevel@tonic-gate 	} else if ((mode_arg == LIO_WAIT) || sigev_arg) {
34817c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
34827c478bd9Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
34837c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
34847c478bd9Sstevel@tonic-gate 		if (error)
34857c478bd9Sstevel@tonic-gate 			goto done;
34867c478bd9Sstevel@tonic-gate 		deadhead = 1;
34877c478bd9Sstevel@tonic-gate 		head->lio_nent = nent;
34887c478bd9Sstevel@tonic-gate 		head->lio_refcnt = nent;
34897c478bd9Sstevel@tonic-gate 		if (sigev_arg && (sigev.sigev_notify == SIGEV_SIGNAL) &&
34907c478bd9Sstevel@tonic-gate 		    (sigev.sigev_signo > 0 && sigev.sigev_signo < NSIG)) {
34917c478bd9Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
34927c478bd9Sstevel@tonic-gate 			if (sqp == NULL) {
34937c478bd9Sstevel@tonic-gate 				error = EAGAIN;
34947c478bd9Sstevel@tonic-gate 				goto done;
34957c478bd9Sstevel@tonic-gate 			}
34967c478bd9Sstevel@tonic-gate 			sqp->sq_func = NULL;
34977c478bd9Sstevel@tonic-gate 			sqp->sq_next = NULL;
34987c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
34997c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
35007c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
35017c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
35027c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
35037c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigev.sigev_signo;
35047c478bd9Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
35057c478bd9Sstevel@tonic-gate 			    sigev.sigev_value.sival_int;
35067c478bd9Sstevel@tonic-gate 			head->lio_sigqp = sqp;
35077c478bd9Sstevel@tonic-gate 		} else {
35087c478bd9Sstevel@tonic-gate 			head->lio_sigqp = NULL;
35097c478bd9Sstevel@tonic-gate 		}
35107c478bd9Sstevel@tonic-gate 	}
35117c478bd9Sstevel@tonic-gate 
35127c478bd9Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
35137c478bd9Sstevel@tonic-gate 
35147c478bd9Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
35157c478bd9Sstevel@tonic-gate #ifdef	_LP64
35167c478bd9Sstevel@tonic-gate 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
35177c478bd9Sstevel@tonic-gate 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (aiocb32_t))) {
35187c478bd9Sstevel@tonic-gate #else
35197c478bd9Sstevel@tonic-gate 		cbp = (aiocb_t *)*ucbp;
35207c478bd9Sstevel@tonic-gate 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (aiocb_t))) {
35217c478bd9Sstevel@tonic-gate #endif
35227c478bd9Sstevel@tonic-gate 			if (head) {
35237c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35247c478bd9Sstevel@tonic-gate 				head->lio_nent--;
35257c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
35267c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35277c478bd9Sstevel@tonic-gate 			}
35287c478bd9Sstevel@tonic-gate 			continue;
35297c478bd9Sstevel@tonic-gate 		}
35307c478bd9Sstevel@tonic-gate #ifdef	_LP64
35317c478bd9Sstevel@tonic-gate 		/*
35327c478bd9Sstevel@tonic-gate 		 * copy 32 bit structure into 64 bit structure
35337c478bd9Sstevel@tonic-gate 		 */
35347c478bd9Sstevel@tonic-gate 		aiocb_32ton(aiocb32, aiocb);
35357c478bd9Sstevel@tonic-gate #endif /* _LP64 */
35367c478bd9Sstevel@tonic-gate 
35377c478bd9Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
35387c478bd9Sstevel@tonic-gate 
35397c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
35407c478bd9Sstevel@tonic-gate 		if (mode == LIO_NOP) {
35417c478bd9Sstevel@tonic-gate 			cbp = NULL;
35427c478bd9Sstevel@tonic-gate 			if (head) {
35437c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35447c478bd9Sstevel@tonic-gate 				head->lio_nent--;
35457c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
35467c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35477c478bd9Sstevel@tonic-gate 			}
35487c478bd9Sstevel@tonic-gate 			continue;
35497c478bd9Sstevel@tonic-gate 		}
35507c478bd9Sstevel@tonic-gate 
35517c478bd9Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
35527c478bd9Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
35537c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35547c478bd9Sstevel@tonic-gate 			if (head) {
35557c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35567c478bd9Sstevel@tonic-gate 				head->lio_nent--;
35577c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
35587c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35597c478bd9Sstevel@tonic-gate 			}
35607c478bd9Sstevel@tonic-gate 			aio_errors++;
35617c478bd9Sstevel@tonic-gate 			continue;
35627c478bd9Sstevel@tonic-gate 		}
35637c478bd9Sstevel@tonic-gate 
35647c478bd9Sstevel@tonic-gate 		vp = fp->f_vnode;
35657c478bd9Sstevel@tonic-gate 
35667c478bd9Sstevel@tonic-gate 		/*
35677c478bd9Sstevel@tonic-gate 		 * check the permission of the partition
35687c478bd9Sstevel@tonic-gate 		 */
35697c478bd9Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
35707c478bd9Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
35717c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
35727c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35737c478bd9Sstevel@tonic-gate 			if (head) {
35747c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35757c478bd9Sstevel@tonic-gate 				head->lio_nent--;
35767c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
35777c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35787c478bd9Sstevel@tonic-gate 			}
35797c478bd9Sstevel@tonic-gate 			aio_errors++;
35807c478bd9Sstevel@tonic-gate 			continue;
35817c478bd9Sstevel@tonic-gate 		}
35827c478bd9Sstevel@tonic-gate 
35837c478bd9Sstevel@tonic-gate 		/*
35847c478bd9Sstevel@tonic-gate 		 * common case where requests are to the same fd
35857c478bd9Sstevel@tonic-gate 		 * for the same r/w operation
35867c478bd9Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
35877c478bd9Sstevel@tonic-gate 		 */
35887c478bd9Sstevel@tonic-gate 		if ((fp != prev_fp) || (mode != prev_mode)) {
35897c478bd9Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
35907c478bd9Sstevel@tonic-gate 			if (aio_func == NULL) {
35917c478bd9Sstevel@tonic-gate 				prev_fp = NULL;
35927c478bd9Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
35937c478bd9Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp,
35947c478bd9Sstevel@tonic-gate 				    EBADFD);
35957c478bd9Sstevel@tonic-gate 				aio_notsupported++;
35967c478bd9Sstevel@tonic-gate 				if (head) {
35977c478bd9Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
35987c478bd9Sstevel@tonic-gate 					head->lio_nent--;
35997c478bd9Sstevel@tonic-gate 					head->lio_refcnt--;
36007c478bd9Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
36017c478bd9Sstevel@tonic-gate 				}
36027c478bd9Sstevel@tonic-gate 				continue;
36037c478bd9Sstevel@tonic-gate 			} else {
36047c478bd9Sstevel@tonic-gate 				prev_fp = fp;
36057c478bd9Sstevel@tonic-gate 				prev_mode = mode;
36067c478bd9Sstevel@tonic-gate 			}
36077c478bd9Sstevel@tonic-gate 		}
36087c478bd9Sstevel@tonic-gate 		if (error = aio_req_setup(&reqp, aiop, aiocb,
36097c478bd9Sstevel@tonic-gate 		    (aio_result_t *)&cbp->aio_resultp, aio_use_port, vp)) {
36107c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
36117c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
36127c478bd9Sstevel@tonic-gate 			if (head) {
36137c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
36147c478bd9Sstevel@tonic-gate 				head->lio_nent--;
36157c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
36167c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
36177c478bd9Sstevel@tonic-gate 			}
36187c478bd9Sstevel@tonic-gate 			aio_errors++;
36197c478bd9Sstevel@tonic-gate 			continue;
36207c478bd9Sstevel@tonic-gate 		}
36217c478bd9Sstevel@tonic-gate 
36227c478bd9Sstevel@tonic-gate 		reqp->aio_req_lio = head;
36237c478bd9Sstevel@tonic-gate 		deadhead = 0;
36247c478bd9Sstevel@tonic-gate 
36257c478bd9Sstevel@tonic-gate 		/*
36267c478bd9Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
36277c478bd9Sstevel@tonic-gate 		 * the driver to avoid a race condition
36287c478bd9Sstevel@tonic-gate 		 */
36297c478bd9Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
36307c478bd9Sstevel@tonic-gate 		    EINPROGRESS);
36317c478bd9Sstevel@tonic-gate 
36327c478bd9Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = ((caddr32_t *)cbplist)[i];
36337c478bd9Sstevel@tonic-gate 
36347c478bd9Sstevel@tonic-gate 		if (aio_use_port) {
36357c478bd9Sstevel@tonic-gate 			reqp->aio_req_port = pnotify.portnfy_port;
36367c478bd9Sstevel@tonic-gate #ifdef _LP64
36377c478bd9Sstevel@tonic-gate 			error = aio_req_assoc_port32(&aiocb32->aio_sigevent,
36387c478bd9Sstevel@tonic-gate 			    (void *)(uintptr_t)pnotify.portnfy_user,
36397c478bd9Sstevel@tonic-gate 			    (aiocb_t *)(uintptr_t)(((caddr32_t *)cbplist)[i]),
36407c478bd9Sstevel@tonic-gate 			    reqp, pkevtp);
36417c478bd9Sstevel@tonic-gate #else
36427c478bd9Sstevel@tonic-gate 			error = aio_req_assoc_port(&aiocb->aio_sigevent,
36437c478bd9Sstevel@tonic-gate 			    pnotify.portnfy_user,
36447c478bd9Sstevel@tonic-gate 			    (aiocb_t *)(((caddr32_t *)cbplist)[i]),
36457c478bd9Sstevel@tonic-gate 			    reqp, pkevtp);
36467c478bd9Sstevel@tonic-gate #endif
36477c478bd9Sstevel@tonic-gate 		}
36487c478bd9Sstevel@tonic-gate 
36497c478bd9Sstevel@tonic-gate 		/*
36507c478bd9Sstevel@tonic-gate 		 * send the request to driver.
36517c478bd9Sstevel@tonic-gate 		 * Clustering: If PXFS vnode, call PXFS function.
36527c478bd9Sstevel@tonic-gate 		 */
36537c478bd9Sstevel@tonic-gate 		if (error == 0) {
36547c478bd9Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
36557c478bd9Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
36567c478bd9Sstevel@tonic-gate 				aio_zerolen(reqp);
36577c478bd9Sstevel@tonic-gate 				continue;
36587c478bd9Sstevel@tonic-gate 			}
36597c478bd9Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
36607c478bd9Sstevel@tonic-gate 			    CRED());
36617c478bd9Sstevel@tonic-gate 		}
36627c478bd9Sstevel@tonic-gate 
36637c478bd9Sstevel@tonic-gate 		/*
36647c478bd9Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
36657c478bd9Sstevel@tonic-gate 		 * completed unless there was an error.
36667c478bd9Sstevel@tonic-gate 		 */
36677c478bd9Sstevel@tonic-gate 		if (error) {
36687c478bd9Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
36697c478bd9Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
36707c478bd9Sstevel@tonic-gate 			if (head) {
36717c478bd9Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
36727c478bd9Sstevel@tonic-gate 				head->lio_nent--;
36737c478bd9Sstevel@tonic-gate 				head->lio_refcnt--;
36747c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
36757c478bd9Sstevel@tonic-gate 			}
36767c478bd9Sstevel@tonic-gate 			if (error == ENOTSUP)
36777c478bd9Sstevel@tonic-gate 				aio_notsupported++;
36787c478bd9Sstevel@tonic-gate 			else
36797c478bd9Sstevel@tonic-gate 				aio_errors++;
36807c478bd9Sstevel@tonic-gate 			lio_set_error(reqp);
36817c478bd9Sstevel@tonic-gate 		} else {
36827c478bd9Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
36837c478bd9Sstevel@tonic-gate 		}
36847c478bd9Sstevel@tonic-gate 	}
36857c478bd9Sstevel@tonic-gate 
36867c478bd9Sstevel@tonic-gate 	if (pkevtp)
36877c478bd9Sstevel@tonic-gate 		port_free_event(pkevtp);
36887c478bd9Sstevel@tonic-gate 
36897c478bd9Sstevel@tonic-gate 	if (aio_notsupported) {
36907c478bd9Sstevel@tonic-gate 		error = ENOTSUP;
36917c478bd9Sstevel@tonic-gate 	} else if (aio_errors) {
36927c478bd9Sstevel@tonic-gate 		/*
36937c478bd9Sstevel@tonic-gate 		 * return EIO if any request failed
36947c478bd9Sstevel@tonic-gate 		 */
36957c478bd9Sstevel@tonic-gate 		error = EIO;
36967c478bd9Sstevel@tonic-gate 	}
36977c478bd9Sstevel@tonic-gate 
36987c478bd9Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
36997c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
37007c478bd9Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
37017c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
37027c478bd9Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37037c478bd9Sstevel@tonic-gate 				error = EINTR;
37047c478bd9Sstevel@tonic-gate 				goto done;
37057c478bd9Sstevel@tonic-gate 			}
37067c478bd9Sstevel@tonic-gate 		}
37077c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
37087c478bd9Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
37097c478bd9Sstevel@tonic-gate 	}
37107c478bd9Sstevel@tonic-gate 
37117c478bd9Sstevel@tonic-gate done:
37127c478bd9Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
37137c478bd9Sstevel@tonic-gate 	if (deadhead) {
37147c478bd9Sstevel@tonic-gate 		if (head->lio_sigqp)
37157c478bd9Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
37167c478bd9Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
37177c478bd9Sstevel@tonic-gate 	}
37187c478bd9Sstevel@tonic-gate 	return (error);
37197c478bd9Sstevel@tonic-gate }
37207c478bd9Sstevel@tonic-gate 
37217c478bd9Sstevel@tonic-gate 
37227c478bd9Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
37237c478bd9Sstevel@tonic-gate void
37247c478bd9Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
37257c478bd9Sstevel@tonic-gate {
37267c478bd9Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
37277c478bd9Sstevel@tonic-gate 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
37287c478bd9Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
37297c478bd9Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
37307c478bd9Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
37317c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
37327c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
37337c478bd9Sstevel@tonic-gate 
37347c478bd9Sstevel@tonic-gate 	/*
37357c478bd9Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
37367c478bd9Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
37377c478bd9Sstevel@tonic-gate 	 */
37387c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
37397c478bd9Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
37407c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
37417c478bd9Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
37427c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
37437c478bd9Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
37447c478bd9Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
37457c478bd9Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
37467c478bd9Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
37477c478bd9Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
37487c478bd9Sstevel@tonic-gate }
37497c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
37507c478bd9Sstevel@tonic-gate 
37517c478bd9Sstevel@tonic-gate /*
37527c478bd9Sstevel@tonic-gate  * aio_port_callback() is called just before the event is retrieved from the
37537c478bd9Sstevel@tonic-gate  * port. The task of this callback function is to finish the work of the
37547c478bd9Sstevel@tonic-gate  * transaction for the application, it means :
37557c478bd9Sstevel@tonic-gate  * - copyout transaction data to the application
37567c478bd9Sstevel@tonic-gate  *	(this thread is running in the right process context)
37577c478bd9Sstevel@tonic-gate  * - keep trace of the transaction (update of counters).
37587c478bd9Sstevel@tonic-gate  * - free allocated buffers
37597c478bd9Sstevel@tonic-gate  * The aiocb pointer is the object element of the port_kevent_t structure.
37607c478bd9Sstevel@tonic-gate  *
37617c478bd9Sstevel@tonic-gate  * flag :
37627c478bd9Sstevel@tonic-gate  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
37637c478bd9Sstevel@tonic-gate  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
37647c478bd9Sstevel@tonic-gate  */
37657c478bd9Sstevel@tonic-gate 
37667c478bd9Sstevel@tonic-gate /*ARGSUSED*/
37677c478bd9Sstevel@tonic-gate int
37687c478bd9Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
37697c478bd9Sstevel@tonic-gate {
37707c478bd9Sstevel@tonic-gate 	aio_t		*aiop = curproc->p_aio;
37717c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp = arg;
37727c478bd9Sstevel@tonic-gate 	struct	iovec	*iov;
37737c478bd9Sstevel@tonic-gate 	struct	buf	*bp;
37747c478bd9Sstevel@tonic-gate 	void		*resultp;
37757c478bd9Sstevel@tonic-gate 
37767c478bd9Sstevel@tonic-gate 	if (pid != curproc->p_pid) {
37777c478bd9Sstevel@tonic-gate 		/* wrong proc !!, can not deliver data here ... */
37787c478bd9Sstevel@tonic-gate 		return (EACCES);
37797c478bd9Sstevel@tonic-gate 	}
37807c478bd9Sstevel@tonic-gate 
37817c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_portq_mutex);
37827c478bd9Sstevel@tonic-gate 	reqp->aio_req_portkev = NULL;
37837c478bd9Sstevel@tonic-gate 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
37847c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_portq_mutex);
37857c478bd9Sstevel@tonic-gate 	aphysio_unlock(reqp);		/* unlock used pages */
37867c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
37877c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
37887c478bd9Sstevel@tonic-gate 		aio_req_free_port(aiop, reqp);	/* back to free list */
37897c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
37907c478bd9Sstevel@tonic-gate 		return (0);
37917c478bd9Sstevel@tonic-gate 	}
37927c478bd9Sstevel@tonic-gate 
37937c478bd9Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
37947c478bd9Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
37957c478bd9Sstevel@tonic-gate 	resultp = (void *)reqp->aio_req_resultp;
37967c478bd9Sstevel@tonic-gate 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
37977c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
37987c478bd9Sstevel@tonic-gate 	if (flag == PORT_CALLBACK_DEFAULT)
37997c478bd9Sstevel@tonic-gate 		aio_copyout_result_port(iov, bp, resultp);
38007c478bd9Sstevel@tonic-gate 	return (0);
38017c478bd9Sstevel@tonic-gate }
3802