xref: /titanic_41/usr/src/uts/common/os/aio.c (revision cb6207858a9fcc2feaee22e626912fba281ac969)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Kernel asynchronous I/O.
31  * This is only for raw devices now (as of Nov. 1993).
32  */
33 
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/conf.h>
37 #include <sys/file.h>
38 #include <sys/fs/snode.h>
39 #include <sys/unistd.h>
40 #include <sys/cmn_err.h>
41 #include <vm/as.h>
42 #include <vm/faultcode.h>
43 #include <sys/sysmacros.h>
44 #include <sys/procfs.h>
45 #include <sys/kmem.h>
46 #include <sys/autoconf.h>
47 #include <sys/ddi_impldefs.h>
48 #include <sys/sunddi.h>
49 #include <sys/aio_impl.h>
50 #include <sys/debug.h>
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/vmsystm.h>
54 #include <sys/fs/pxfs_ki.h>
55 #include <sys/contract/process_impl.h>
56 
57 /*
58  * external entry point.
59  */
60 #ifdef _LP64
61 static int64_t kaioc(long, long, long, long, long, long);
62 #endif
63 static int kaio(ulong_t *, rval_t *);
64 
65 
66 #define	AIO_64	0
67 #define	AIO_32	1
68 #define	AIO_LARGEFILE	2
69 
70 /*
71  * implementation specific functions (private)
72  */
73 #ifdef _LP64
74 static int alio(int, aiocb_t **, int, struct sigevent *);
75 #endif
76 static int aionotify(void);
77 static int aioinit(void);
78 static int aiostart(void);
79 static void alio_cleanup(aio_t *, aiocb_t **, int, int);
80 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
81     cred_t *);
82 static void lio_set_error(aio_req_t *);
83 static aio_t *aio_aiop_alloc();
84 static int aio_req_alloc(aio_req_t **, aio_result_t *);
85 static int aio_lio_alloc(aio_lio_t **);
86 static aio_req_t *aio_req_done(void *);
87 static aio_req_t *aio_req_remove(aio_req_t *);
88 static int aio_req_find(aio_result_t *, aio_req_t **);
89 static int aio_hash_insert(struct aio_req_t *, aio_t *);
90 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
91     aio_result_t *, vnode_t *);
92 static int aio_cleanup_thread(aio_t *);
93 static aio_lio_t *aio_list_get(aio_result_t *);
94 static void lio_set_uerror(void *, int);
95 extern void aio_zerolen(aio_req_t *);
96 static int aiowait(struct timeval *, int, long	*);
97 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
98 static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
99     aio_req_t *reqlist, aio_t *aiop, model_t model);
100 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
101 static int aiosuspend(void *, int, struct  timespec *, int,
102     long	*, int);
103 static int aliowait(int, void *, int, void *, int);
104 static int aioerror(void *, int);
105 static int aio_cancel(int, void *, long	*, int);
106 static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
107 static int aiorw(int, void *, int, int);
108 
109 static int alioLF(int, void *, int, void *);
110 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
111     aio_result_t *, vnode_t *);
112 static int alio32(int, void *, int, void *);
113 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
114 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
115 
116 #ifdef  _SYSCALL32_IMPL
117 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
118 void	aiocb_32ton(aiocb32_t *, aiocb_t *);
119 #endif /* _SYSCALL32_IMPL */
120 
121 /*
122  * implementation specific functions (external)
123  */
124 void aio_req_free(aio_t *, aio_req_t *);
125 
126 /*
127  * Event Port framework
128  */
129 
130 void aio_req_free_port(aio_t *, aio_req_t *);
131 static int aio_port_callback(void *, int *, pid_t, int, void *);
132 
133 /*
134  * This is the loadable module wrapper.
135  */
136 #include <sys/modctl.h>
137 #include <sys/syscall.h>
138 
139 #ifdef _LP64
140 
141 static struct sysent kaio_sysent = {
142 	6,
143 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
144 	(int (*)())kaioc
145 };
146 
147 #ifdef _SYSCALL32_IMPL
148 static struct sysent kaio_sysent32 = {
149 	7,
150 	SE_NOUNLOAD | SE_64RVAL,
151 	kaio
152 };
153 #endif  /* _SYSCALL32_IMPL */
154 
155 #else   /* _LP64 */
156 
157 static struct sysent kaio_sysent = {
158 	7,
159 	SE_NOUNLOAD | SE_32RVAL1,
160 	kaio
161 };
162 
163 #endif  /* _LP64 */
164 
165 /*
166  * Module linkage information for the kernel.
167  */
168 
169 static struct modlsys modlsys = {
170 	&mod_syscallops,
171 	"kernel Async I/O",
172 	&kaio_sysent
173 };
174 
175 #ifdef  _SYSCALL32_IMPL
176 static struct modlsys modlsys32 = {
177 	&mod_syscallops32,
178 	"kernel Async I/O for 32 bit compatibility",
179 	&kaio_sysent32
180 };
181 #endif  /* _SYSCALL32_IMPL */
182 
183 
184 static struct modlinkage modlinkage = {
185 	MODREV_1,
186 	&modlsys,
187 #ifdef  _SYSCALL32_IMPL
188 	&modlsys32,
189 #endif
190 	NULL
191 };
192 
193 int
194 _init(void)
195 {
196 	int retval;
197 
198 	if ((retval = mod_install(&modlinkage)) != 0)
199 		return (retval);
200 
201 	return (0);
202 }
203 
204 int
205 _fini(void)
206 {
207 	int retval;
208 
209 	retval = mod_remove(&modlinkage);
210 
211 	return (retval);
212 }
213 
214 int
215 _info(struct modinfo *modinfop)
216 {
217 	return (mod_info(&modlinkage, modinfop));
218 }
219 
220 #ifdef	_LP64
221 static int64_t
222 kaioc(
223 	long	a0,
224 	long	a1,
225 	long	a2,
226 	long	a3,
227 	long	a4,
228 	long	a5)
229 {
230 	int	error;
231 	long	rval = 0;
232 
233 	switch ((int)a0 & ~AIO_POLL_BIT) {
234 	case AIOREAD:
235 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
236 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
237 		break;
238 	case AIOWRITE:
239 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
240 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
241 		break;
242 	case AIOWAIT:
243 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
244 		break;
245 	case AIOWAITN:
246 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
247 		    (timespec_t *)a4);
248 		break;
249 	case AIONOTIFY:
250 		error = aionotify();
251 		break;
252 	case AIOINIT:
253 		error = aioinit();
254 		break;
255 	case AIOSTART:
256 		error = aiostart();
257 		break;
258 	case AIOLIO:
259 		error = alio((int)a1, (aiocb_t **)a2, (int)a3,
260 		    (struct sigevent *)a4);
261 		break;
262 	case AIOLIOWAIT:
263 		error = aliowait((int)a1, (void *)a2, (int)a3,
264 		    (struct sigevent *)a4, AIO_64);
265 		break;
266 	case AIOSUSPEND:
267 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
268 		    (int)a4, &rval, AIO_64);
269 		break;
270 	case AIOERROR:
271 		error = aioerror((void *)a1, AIO_64);
272 		break;
273 	case AIOAREAD:
274 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
275 		break;
276 	case AIOAWRITE:
277 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
278 		break;
279 	case AIOCANCEL:
280 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
281 		break;
282 
283 	/*
284 	 * The large file related stuff is valid only for
285 	 * 32 bit kernel and not for 64 bit kernel
286 	 * On 64 bit kernel we convert large file calls
287 	 * to regular 64bit calls.
288 	 */
289 
290 	default:
291 		error = EINVAL;
292 	}
293 	if (error)
294 		return ((int64_t)set_errno(error));
295 	return (rval);
296 }
297 #endif
298 
299 static int
300 kaio(
301 	ulong_t *uap,
302 	rval_t *rvp)
303 {
304 	long rval = 0;
305 	int	error = 0;
306 	offset_t	off;
307 
308 
309 		rvp->r_vals = 0;
310 #if defined(_LITTLE_ENDIAN)
311 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
312 #else
313 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
314 #endif
315 
316 	switch (uap[0] & ~AIO_POLL_BIT) {
317 	/*
318 	 * It must be the 32 bit system call on 64 bit kernel
319 	 */
320 	case AIOREAD:
321 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
322 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
323 	case AIOWRITE:
324 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
325 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
326 	case AIOWAIT:
327 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
328 		    &rval);
329 		break;
330 	case AIOWAITN:
331 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
332 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
333 		break;
334 	case AIONOTIFY:
335 		return (aionotify());
336 	case AIOINIT:
337 		return (aioinit());
338 	case AIOSTART:
339 		return (aiostart());
340 	case AIOLIO:
341 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
342 		    (void *)uap[4]));
343 	case AIOLIOWAIT:
344 		return (aliowait((int)uap[1], (void *)uap[2],
345 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
346 	case AIOSUSPEND:
347 		error = aiosuspend((void *)uap[1], (int)uap[2],
348 		    (timespec_t *)uap[3], (int)uap[4],
349 		    &rval, AIO_32);
350 		break;
351 	case AIOERROR:
352 		return (aioerror((void *)uap[1], AIO_32));
353 	case AIOAREAD:
354 		return (aiorw((int)uap[0], (void *)uap[1],
355 		    FREAD, AIO_32));
356 	case AIOAWRITE:
357 		return (aiorw((int)uap[0], (void *)uap[1],
358 		    FWRITE, AIO_32));
359 	case AIOCANCEL:
360 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
361 		    AIO_32));
362 		break;
363 	case AIOLIO64:
364 		return (alioLF((int)uap[1], (void *)uap[2],
365 		    (int)uap[3], (void *)uap[4]));
366 	case AIOLIOWAIT64:
367 		return (aliowait(uap[1], (void *)uap[2],
368 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
369 	case AIOSUSPEND64:
370 		error = aiosuspend((void *)uap[1], (int)uap[2],
371 		    (timespec_t *)uap[3], (int)uap[4], &rval,
372 		    AIO_LARGEFILE);
373 		break;
374 	case AIOERROR64:
375 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
376 	case AIOAREAD64:
377 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
378 		    AIO_LARGEFILE));
379 	case AIOAWRITE64:
380 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
381 		    AIO_LARGEFILE));
382 	case AIOCANCEL64:
383 		error = (aio_cancel((int)uap[1], (void *)uap[2],
384 		    &rval, AIO_LARGEFILE));
385 		break;
386 	default:
387 		return (EINVAL);
388 	}
389 
390 	rvp->r_val1 = rval;
391 	return (error);
392 }
393 
394 /*
395  * wake up LWPs in this process that are sleeping in
396  * aiowait().
397  */
398 static int
399 aionotify(void)
400 {
401 	aio_t	*aiop;
402 
403 	aiop = curproc->p_aio;
404 	if (aiop == NULL)
405 		return (0);
406 
407 	mutex_enter(&aiop->aio_mutex);
408 	aiop->aio_notifycnt++;
409 	cv_broadcast(&aiop->aio_waitcv);
410 	mutex_exit(&aiop->aio_mutex);
411 
412 	return (0);
413 }
414 
415 static int
416 timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
417 	timestruc_t **rqtp, int *blocking)
418 {
419 #ifdef	_SYSCALL32_IMPL
420 	struct timeval32 wait_time_32;
421 #endif
422 	struct timeval wait_time;
423 	model_t	model = get_udatamodel();
424 
425 	*rqtp = NULL;
426 	if (timout == NULL) {		/* wait indefinitely */
427 		*blocking = 1;
428 		return (0);
429 	}
430 
431 	/*
432 	 * Need to correctly compare with the -1 passed in for a user
433 	 * address pointer, with both 32 bit and 64 bit apps.
434 	 */
435 	if (model == DATAMODEL_NATIVE) {
436 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
437 			*blocking = 0;
438 			return (0);
439 		}
440 
441 		if (copyin(timout, &wait_time, sizeof (wait_time)))
442 			return (EFAULT);
443 	}
444 #ifdef	_SYSCALL32_IMPL
445 	else {
446 		/*
447 		 * -1 from a 32bit app. It will not get sign extended.
448 		 * don't wait if -1.
449 		 */
450 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
451 			*blocking = 0;
452 			return (0);
453 		}
454 
455 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
456 			return (EFAULT);
457 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
458 	}
459 #endif  /* _SYSCALL32_IMPL */
460 
461 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
462 		*blocking = 0;
463 		return (0);
464 	}
465 
466 	if (wait_time.tv_sec < 0 ||
467 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
468 		return (EINVAL);
469 
470 	rqtime->tv_sec = wait_time.tv_sec;
471 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
472 	*rqtp = rqtime;
473 	*blocking = 1;
474 
475 	return (0);
476 }
477 
478 static int
479 timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
480 	timestruc_t **rqtp, int *blocking)
481 {
482 #ifdef	_SYSCALL32_IMPL
483 	timespec32_t wait_time_32;
484 #endif
485 	model_t	model = get_udatamodel();
486 
487 	*rqtp = NULL;
488 	if (timout == NULL) {
489 		*blocking = 1;
490 		return (0);
491 	}
492 
493 	if (model == DATAMODEL_NATIVE) {
494 		if (copyin(timout, rqtime, sizeof (*rqtime)))
495 			return (EFAULT);
496 	}
497 #ifdef	_SYSCALL32_IMPL
498 	else {
499 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
500 			return (EFAULT);
501 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
502 	}
503 #endif  /* _SYSCALL32_IMPL */
504 
505 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
506 		*blocking = 0;
507 		return (0);
508 	}
509 
510 	if (rqtime->tv_sec < 0 ||
511 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
512 		return (EINVAL);
513 
514 	*rqtp = rqtime;
515 	*blocking = 1;
516 
517 	return (0);
518 }
519 
520 /*ARGSUSED*/
521 static int
522 aiowait(
523 	struct timeval	*timout,
524 	int	dontblockflg,
525 	long	*rval)
526 {
527 	int 		error;
528 	aio_t		*aiop;
529 	aio_req_t	*reqp;
530 	clock_t		status;
531 	int		blocking;
532 	timestruc_t	rqtime;
533 	timestruc_t	*rqtp;
534 
535 	aiop = curproc->p_aio;
536 	if (aiop == NULL)
537 		return (EINVAL);
538 
539 	/*
540 	 * Establish the absolute future time for the timeout.
541 	 */
542 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
543 	if (error)
544 		return (error);
545 	if (rqtp) {
546 		timestruc_t now;
547 		gethrestime(&now);
548 		timespecadd(rqtp, &now);
549 	}
550 
551 	mutex_enter(&aiop->aio_mutex);
552 	for (;;) {
553 		/* process requests on poll queue */
554 		if (aiop->aio_pollq) {
555 			mutex_exit(&aiop->aio_mutex);
556 			aio_cleanup(0);
557 			mutex_enter(&aiop->aio_mutex);
558 		}
559 		if ((reqp = aio_req_remove(NULL)) != NULL) {
560 			*rval = (long)reqp->aio_req_resultp;
561 			break;
562 		}
563 		/* user-level done queue might not be empty */
564 		if (aiop->aio_notifycnt > 0) {
565 			aiop->aio_notifycnt--;
566 			*rval = 1;
567 			break;
568 		}
569 		/* don't block if no outstanding aio */
570 		if (aiop->aio_outstanding == 0 && dontblockflg) {
571 			error = EINVAL;
572 			break;
573 		}
574 		if (blocking) {
575 			status = cv_waituntil_sig(&aiop->aio_waitcv,
576 			    &aiop->aio_mutex, rqtp);
577 
578 			if (status > 0)		/* check done queue again */
579 				continue;
580 			if (status == 0) {	/* interrupted by a signal */
581 				error = EINTR;
582 				*rval = -1;
583 			} else {		/* timer expired */
584 				error = ETIME;
585 			}
586 		}
587 		break;
588 	}
589 	mutex_exit(&aiop->aio_mutex);
590 	if (reqp) {
591 		aphysio_unlock(reqp);
592 		aio_copyout_result(reqp);
593 		mutex_enter(&aiop->aio_mutex);
594 		aio_req_free(aiop, reqp);
595 		mutex_exit(&aiop->aio_mutex);
596 	}
597 	return (error);
598 }
599 
600 /*
601  * aiowaitn can be used to reap completed asynchronous requests submitted with
602  * lio_listio, aio_read or aio_write.
603  * This function only reaps asynchronous raw I/Os.
604  */
605 
606 /*ARGSUSED*/
607 static int
608 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
609 {
610 	int 		error = 0;
611 	aio_t		*aiop;
612 	aio_req_t	*reqlist = NULL;
613 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
614 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
615 	size_t		iocbsz;			/* users iocb size */
616 	size_t		riocbsz;		/* returned iocb size */
617 	int		iocb_index = 0;
618 	model_t		model = get_udatamodel();
619 	int		blocking = 1;
620 	timestruc_t	rqtime;
621 	timestruc_t	*rqtp;
622 
623 	aiop = curproc->p_aio;
624 	if (aiop == NULL)
625 		return (EINVAL);
626 
627 	if (aiop->aio_outstanding == 0)
628 		return (EAGAIN);
629 
630 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
631 		return (EFAULT);
632 
633 	/* set *nwait to zero, if we must return prematurely */
634 	if (copyout(&cnt, nwait, sizeof (uint_t)))
635 		return (EFAULT);
636 
637 	if (waitcnt == 0) {
638 		blocking = 0;
639 		rqtp = NULL;
640 		waitcnt = nent;
641 	} else {
642 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
643 		if (error)
644 			return (error);
645 	}
646 
647 	if (model == DATAMODEL_NATIVE)
648 		iocbsz = (sizeof (aiocb_t *) * nent);
649 #ifdef	_SYSCALL32_IMPL
650 	else
651 		iocbsz = (sizeof (caddr32_t) * nent);
652 #endif  /* _SYSCALL32_IMPL */
653 
654 	/*
655 	 * Only one aio_waitn call is allowed at a time.
656 	 * The active aio_waitn will collect all requests
657 	 * out of the "done" list and if necessary it will wait
658 	 * for some/all pending requests to fulfill the nwait
659 	 * parameter.
660 	 * A second or further aio_waitn calls will sleep here
661 	 * until the active aio_waitn finishes and leaves the kernel
662 	 * If the second call does not block (poll), then return
663 	 * immediately with the error code : EAGAIN.
664 	 * If the second call should block, then sleep here, but
665 	 * do not touch the timeout. The timeout starts when this
666 	 * aio_waitn-call becomes active.
667 	 */
668 
669 	mutex_enter(&aiop->aio_mutex);
670 
671 	while (aiop->aio_flags & AIO_WAITN) {
672 		if (blocking == 0) {
673 			mutex_exit(&aiop->aio_mutex);
674 			return (EAGAIN);
675 		}
676 
677 		/* block, no timeout */
678 		aiop->aio_flags |= AIO_WAITN_PENDING;
679 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
680 			mutex_exit(&aiop->aio_mutex);
681 			return (EINTR);
682 		}
683 	}
684 
685 	/*
686 	 * Establish the absolute future time for the timeout.
687 	 */
688 	if (rqtp) {
689 		timestruc_t now;
690 		gethrestime(&now);
691 		timespecadd(rqtp, &now);
692 	}
693 
694 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
695 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
696 		aiop->aio_iocb = NULL;
697 	}
698 
699 	if (aiop->aio_iocb == NULL) {
700 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
701 		if (iocblist == NULL) {
702 			mutex_exit(&aiop->aio_mutex);
703 			return (ENOMEM);
704 		}
705 		aiop->aio_iocb = (aiocb_t **)iocblist;
706 		aiop->aio_iocbsz = iocbsz;
707 	} else {
708 		iocblist = (char *)aiop->aio_iocb;
709 	}
710 
711 	aiop->aio_waitncnt = waitcnt;
712 	aiop->aio_flags |= AIO_WAITN;
713 
714 	for (;;) {
715 		/* push requests on poll queue to done queue */
716 		if (aiop->aio_pollq) {
717 			mutex_exit(&aiop->aio_mutex);
718 			aio_cleanup(0);
719 			mutex_enter(&aiop->aio_mutex);
720 		}
721 
722 		/* check for requests on done queue */
723 		if (aiop->aio_doneq) {
724 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
725 			aiop->aio_waitncnt = waitcnt - cnt;
726 		}
727 
728 		/* user-level done queue might not be empty */
729 		if (aiop->aio_notifycnt > 0) {
730 			aiop->aio_notifycnt--;
731 			error = 0;
732 			break;
733 		}
734 
735 		/*
736 		 * if we are here second time as a result of timer
737 		 * expiration, we reset error if there are enough
738 		 * aiocb's to satisfy request.
739 		 * We return also if all requests are already done
740 		 * and we picked up the whole done queue.
741 		 */
742 
743 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
744 		    aiop->aio_doneq == NULL)) {
745 			error = 0;
746 			break;
747 		}
748 
749 		if ((cnt < waitcnt) && blocking) {
750 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
751 				&aiop->aio_mutex, rqtp);
752 			if (rval > 0)
753 				continue;
754 			if (rval < 0) {
755 				error = ETIME;
756 				blocking = 0;
757 				continue;
758 			}
759 			error = EINTR;
760 		}
761 		break;
762 	}
763 
764 	mutex_exit(&aiop->aio_mutex);
765 
766 	if (cnt > 0) {
767 
768 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
769 		    aiop, model);
770 
771 		if (model == DATAMODEL_NATIVE)
772 			riocbsz = (sizeof (aiocb_t *) * cnt);
773 #ifdef	_SYSCALL32_IMPL
774 		else
775 			riocbsz = (sizeof (caddr32_t) * cnt);
776 #endif  /* _SYSCALL32_IMPL */
777 
778 		if (copyout(iocblist, uiocb, riocbsz) ||
779 		    copyout(&cnt, nwait, sizeof (uint_t)))
780 			error = EFAULT;
781 	}
782 
783 	if (aiop->aio_iocbsz > AIO_IOCB_MAX) {
784 		kmem_free(iocblist, aiop->aio_iocbsz);
785 		aiop->aio_iocb = NULL;
786 	}
787 
788 	/* check if there is another thread waiting for execution */
789 	mutex_enter(&aiop->aio_mutex);
790 	aiop->aio_flags &= ~AIO_WAITN;
791 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
792 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
793 		cv_signal(&aiop->aio_waitncv);
794 	}
795 	mutex_exit(&aiop->aio_mutex);
796 
797 	return (error);
798 }
799 
800 /*
801  * aio_unlock_requests
802  * copyouts the result of the request as well as the return value.
803  * It builds the list of completed asynchronous requests,
804  * unlocks the allocated memory ranges and
805  * put the aio request structure back into the free list.
806  */
807 
808 static int
809 aio_unlock_requests(
810 	caddr_t	iocblist,
811 	int	iocb_index,
812 	aio_req_t *reqlist,
813 	aio_t	*aiop,
814 	model_t	model)
815 {
816 	aio_req_t	*reqp, *nreqp;
817 
818 	if (model == DATAMODEL_NATIVE) {
819 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
820 			(((caddr_t *)iocblist)[iocb_index++]) =
821 			    reqp->aio_req_iocb.iocb;
822 			nreqp = reqp->aio_req_next;
823 			aphysio_unlock(reqp);
824 			aio_copyout_result(reqp);
825 			mutex_enter(&aiop->aio_mutex);
826 			aio_req_free(aiop, reqp);
827 			mutex_exit(&aiop->aio_mutex);
828 		}
829 	}
830 #ifdef	_SYSCALL32_IMPL
831 	else {
832 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
833 			((caddr32_t *)iocblist)[iocb_index++] =
834 			    reqp->aio_req_iocb.iocb32;
835 			nreqp = reqp->aio_req_next;
836 			aphysio_unlock(reqp);
837 			aio_copyout_result(reqp);
838 			mutex_enter(&aiop->aio_mutex);
839 			aio_req_free(aiop, reqp);
840 			mutex_exit(&aiop->aio_mutex);
841 		}
842 	}
843 #endif	/* _SYSCALL32_IMPL */
844 	return (iocb_index);
845 }
846 
847 /*
848  * aio_reqlist_concat
849  * moves "max" elements from the done queue to the reqlist queue and removes
850  * the AIO_DONEQ flag.
851  * - reqlist queue is a simple linked list
852  * - done queue is a double linked list
853  */
854 
855 static int
856 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
857 {
858 	aio_req_t *q2, *q2work, *list;
859 	int count = 0;
860 
861 	list = *reqlist;
862 	q2 = aiop->aio_doneq;
863 	q2work = q2;
864 	while (max-- > 0) {
865 		q2work->aio_req_flags &= ~AIO_DONEQ;
866 		q2work = q2work->aio_req_next;
867 		count++;
868 		if (q2work == q2)
869 			break;
870 	}
871 
872 	if (q2work == q2) {
873 		/* all elements revised */
874 		q2->aio_req_prev->aio_req_next = list;
875 		list = q2;
876 		aiop->aio_doneq = NULL;
877 	} else {
878 		/*
879 		 * max < elements in the doneq
880 		 * detach only the required amount of elements
881 		 * out of the doneq
882 		 */
883 		q2work->aio_req_prev->aio_req_next = list;
884 		list = q2;
885 
886 		aiop->aio_doneq = q2work;
887 		q2work->aio_req_prev = q2->aio_req_prev;
888 		q2->aio_req_prev->aio_req_next = q2work;
889 	}
890 	*reqlist = list;
891 	return (count);
892 }
893 
894 /*ARGSUSED*/
895 static int
896 aiosuspend(
897 	void	*aiocb,
898 	int	nent,
899 	struct	timespec	*timout,
900 	int	flag,
901 	long	*rval,
902 	int	run_mode)
903 {
904 	int 		error;
905 	aio_t		*aiop;
906 	aio_req_t	*reqp, *found, *next;
907 	caddr_t		cbplist = NULL;
908 	aiocb_t		*cbp, **ucbp;
909 #ifdef	_SYSCALL32_IMPL
910 	aiocb32_t	*cbp32;
911 	caddr32_t	*ucbp32;
912 #endif  /* _SYSCALL32_IMPL */
913 	aiocb64_32_t	*cbp64;
914 	int		rv;
915 	int		i;
916 	size_t		ssize;
917 	model_t		model = get_udatamodel();
918 	int		blocking;
919 	timestruc_t	rqtime;
920 	timestruc_t	*rqtp;
921 
922 	aiop = curproc->p_aio;
923 	if (aiop == NULL || nent <= 0)
924 		return (EINVAL);
925 
926 	/*
927 	 * Establish the absolute future time for the timeout.
928 	 */
929 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
930 	if (error)
931 		return (error);
932 	if (rqtp) {
933 		timestruc_t now;
934 		gethrestime(&now);
935 		timespecadd(rqtp, &now);
936 	}
937 
938 	/*
939 	 * If we are not blocking and there's no IO complete
940 	 * skip aiocb copyin.
941 	 */
942 	if (!blocking && (aiop->aio_pollq == NULL) &&
943 	    (aiop->aio_doneq == NULL)) {
944 		return (EAGAIN);
945 	}
946 
947 	if (model == DATAMODEL_NATIVE)
948 		ssize = (sizeof (aiocb_t *) * nent);
949 #ifdef	_SYSCALL32_IMPL
950 	else
951 		ssize = (sizeof (caddr32_t) * nent);
952 #endif  /* _SYSCALL32_IMPL */
953 
954 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
955 	if (cbplist == NULL)
956 		return (ENOMEM);
957 
958 	if (copyin(aiocb, cbplist, ssize)) {
959 		error = EFAULT;
960 		goto done;
961 	}
962 
963 	found = NULL;
964 	/*
965 	 * we need to get the aio_cleanupq_mutex since we call
966 	 * aio_req_done().
967 	 */
968 	mutex_enter(&aiop->aio_cleanupq_mutex);
969 	mutex_enter(&aiop->aio_mutex);
970 	for (;;) {
971 		/* push requests on poll queue to done queue */
972 		if (aiop->aio_pollq) {
973 			mutex_exit(&aiop->aio_mutex);
974 			mutex_exit(&aiop->aio_cleanupq_mutex);
975 			aio_cleanup(0);
976 			mutex_enter(&aiop->aio_cleanupq_mutex);
977 			mutex_enter(&aiop->aio_mutex);
978 		}
979 		/* check for requests on done queue */
980 		if (aiop->aio_doneq) {
981 			if (model == DATAMODEL_NATIVE)
982 				ucbp = (aiocb_t **)cbplist;
983 #ifdef	_SYSCALL32_IMPL
984 			else
985 				ucbp32 = (caddr32_t *)cbplist;
986 #endif  /* _SYSCALL32_IMPL */
987 			for (i = 0; i < nent; i++) {
988 				if (model == DATAMODEL_NATIVE) {
989 					if ((cbp = *ucbp++) == NULL)
990 						continue;
991 					if (run_mode != AIO_LARGEFILE)
992 						reqp = aio_req_done(
993 						    &cbp->aio_resultp);
994 					else {
995 						cbp64 = (aiocb64_32_t *)cbp;
996 						reqp = aio_req_done(
997 						    &cbp64->aio_resultp);
998 					}
999 				}
1000 #ifdef	_SYSCALL32_IMPL
1001 				else {
1002 					if (run_mode == AIO_32) {
1003 						if ((cbp32 =
1004 						    (aiocb32_t *)(uintptr_t)
1005 						    *ucbp32++) == NULL)
1006 							continue;
1007 						reqp = aio_req_done(
1008 						    &cbp32->aio_resultp);
1009 					} else if (run_mode == AIO_LARGEFILE) {
1010 						if ((cbp64 =
1011 						    (aiocb64_32_t *)(uintptr_t)
1012 						    *ucbp32++) == NULL)
1013 							continue;
1014 						    reqp = aio_req_done(
1015 							&cbp64->aio_resultp);
1016 					}
1017 
1018 				}
1019 #endif  /* _SYSCALL32_IMPL */
1020 				if (reqp) {
1021 					reqp->aio_req_next = found;
1022 					found = reqp;
1023 				}
1024 				if (aiop->aio_doneq == NULL)
1025 					break;
1026 			}
1027 			if (found)
1028 				break;
1029 		}
1030 		if (aiop->aio_notifycnt > 0) {
1031 			/*
1032 			 * nothing on the kernel's queue. the user
1033 			 * has notified the kernel that it has items
1034 			 * on a user-level queue.
1035 			 */
1036 			aiop->aio_notifycnt--;
1037 			*rval = 1;
1038 			error = 0;
1039 			break;
1040 		}
1041 		/* don't block if nothing is outstanding */
1042 		if (aiop->aio_outstanding == 0) {
1043 			error = EAGAIN;
1044 			break;
1045 		}
1046 		if (blocking) {
1047 			/*
1048 			 * drop the aio_cleanupq_mutex as we are
1049 			 * going to block.
1050 			 */
1051 			mutex_exit(&aiop->aio_cleanupq_mutex);
1052 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
1053 				&aiop->aio_mutex, rqtp);
1054 			/*
1055 			 * we have to drop aio_mutex and
1056 			 * grab it in the right order.
1057 			 */
1058 			mutex_exit(&aiop->aio_mutex);
1059 			mutex_enter(&aiop->aio_cleanupq_mutex);
1060 			mutex_enter(&aiop->aio_mutex);
1061 			if (rv > 0)	/* check done queue again */
1062 				continue;
1063 			if (rv == 0)	/* interrupted by a signal */
1064 				error = EINTR;
1065 			else		/* timer expired */
1066 				error = ETIME;
1067 		} else {
1068 			error = EAGAIN;
1069 		}
1070 		break;
1071 	}
1072 	mutex_exit(&aiop->aio_mutex);
1073 	mutex_exit(&aiop->aio_cleanupq_mutex);
1074 	for (reqp = found; reqp != NULL; reqp = next) {
1075 		next = reqp->aio_req_next;
1076 		aphysio_unlock(reqp);
1077 		aio_copyout_result(reqp);
1078 		mutex_enter(&aiop->aio_mutex);
1079 		aio_req_free(aiop, reqp);
1080 		mutex_exit(&aiop->aio_mutex);
1081 	}
1082 done:
1083 	kmem_free(cbplist, ssize);
1084 	return (error);
1085 }
1086 
1087 /*
1088  * initialize aio by allocating an aio_t struct for this
1089  * process.
1090  */
1091 static int
1092 aioinit(void)
1093 {
1094 	proc_t *p = curproc;
1095 	aio_t *aiop;
1096 	mutex_enter(&p->p_lock);
1097 	if ((aiop = p->p_aio) == NULL) {
1098 		aiop = aio_aiop_alloc();
1099 		p->p_aio = aiop;
1100 	}
1101 	mutex_exit(&p->p_lock);
1102 	if (aiop == NULL)
1103 		return (ENOMEM);
1104 	return (0);
1105 }
1106 
1107 /*
1108  * start a special thread that will cleanup after aio requests
1109  * that are preventing a segment from being unmapped. as_unmap()
1110  * blocks until all phsyio to this segment is completed. this
1111  * doesn't happen until all the pages in this segment are not
1112  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1113  * requests still outstanding. this special thread will make sure
1114  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1115  *
1116  * this function will return an error if the process has only
1117  * one LWP. the assumption is that the caller is a separate LWP
1118  * that remains blocked in the kernel for the life of this process.
1119  */
1120 static int
1121 aiostart(void)
1122 {
1123 	proc_t *p = curproc;
1124 	aio_t *aiop;
1125 	int first, error = 0;
1126 
1127 	if (p->p_lwpcnt == 1)
1128 		return (EDEADLK);
1129 	mutex_enter(&p->p_lock);
1130 	if ((aiop = p->p_aio) == NULL)
1131 		error = EINVAL;
1132 	else {
1133 		first = aiop->aio_ok;
1134 		if (aiop->aio_ok == 0)
1135 			aiop->aio_ok = 1;
1136 	}
1137 	mutex_exit(&p->p_lock);
1138 	if (error == 0 && first == 0) {
1139 		return (aio_cleanup_thread(aiop));
1140 		/* should return only to exit */
1141 	}
1142 	return (error);
1143 }
1144 
1145 /*
1146  * Associate an aiocb with a port.
1147  * This function is used by aiorw() to associate a transaction with a port.
1148  * Allocate an event port structure (port_alloc_event()) and store the
1149  * delivered user pointer (portnfy_user) in the portkev_user field of the
1150  * port_kevent_t structure..
1151  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1152  * the port association.
1153  */
1154 
1155 static int
1156 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
1157 	aio_req_t *reqp, int event)
1158 {
1159 	port_kevent_t	*pkevp = NULL;
1160 	int		error;
1161 
1162 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
1163 	    PORT_SOURCE_AIO, &pkevp);
1164 	if (error) {
1165 		if ((error == ENOMEM) || (error == EAGAIN))
1166 			error = EAGAIN;
1167 		else
1168 			error = EINVAL;
1169 	} else {
1170 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
1171 		    aio_port_callback, reqp);
1172 		pkevp->portkev_events = event;
1173 		reqp->aio_req_portkev = pkevp;
1174 		reqp->aio_req_port = pntfy->portnfy_port;
1175 	}
1176 	return (error);
1177 }
1178 
1179 #ifdef _LP64
1180 
1181 /*
1182  * Asynchronous list IO. A chain of aiocb's are copied in
1183  * one at a time. If the aiocb is invalid, it is skipped.
1184  * For each aiocb, the appropriate driver entry point is
1185  * called. Optimize for the common case where the list
1186  * of requests is to the same file descriptor.
1187  *
1188  * One possible optimization is to define a new driver entry
1189  * point that supports a list of IO requests. Whether this
1190  * improves performance depends somewhat on the driver's
1191  * locking strategy. Processing a list could adversely impact
1192  * the driver's interrupt latency.
1193  */
1194 static int
1195 alio(
1196 	int		mode_arg,
1197 	aiocb_t		**aiocb_arg,
1198 	int		nent,
1199 	struct sigevent	*sigev)
1200 {
1201 	file_t		*fp;
1202 	file_t		*prev_fp = NULL;
1203 	int		prev_mode = -1;
1204 	struct vnode	*vp;
1205 	aio_lio_t	*head;
1206 	aio_req_t	*reqp;
1207 	aio_t		*aiop;
1208 	caddr_t		cbplist;
1209 	aiocb_t		cb;
1210 	aiocb_t		*aiocb = &cb;
1211 	aiocb_t		*cbp;
1212 	aiocb_t		**ucbp;
1213 	struct sigevent sigevk;
1214 	sigqueue_t	*sqp;
1215 	int		(*aio_func)();
1216 	int		mode;
1217 	int		error = 0;
1218 	int		aio_errors = 0;
1219 	int		i;
1220 	size_t		ssize;
1221 	int		deadhead = 0;
1222 	int		aio_notsupported = 0;
1223 	int		lio_head_port;
1224 	int		aio_port;
1225 	int		aio_thread;
1226 	port_kevent_t	*pkevtp = NULL;
1227 	port_notify_t	pnotify;
1228 	int		event;
1229 
1230 	aiop = curproc->p_aio;
1231 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1232 		return (EINVAL);
1233 
1234 	ssize = (sizeof (aiocb_t *) * nent);
1235 	cbplist = kmem_alloc(ssize, KM_SLEEP);
1236 	ucbp = (aiocb_t **)cbplist;
1237 
1238 	if (copyin(aiocb_arg, cbplist, ssize) ||
1239 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
1240 		kmem_free(cbplist, ssize);
1241 		return (EFAULT);
1242 	}
1243 
1244 	/* Event Ports  */
1245 	if (sigev &&
1246 	    (sigevk.sigev_notify == SIGEV_THREAD ||
1247 	    sigevk.sigev_notify == SIGEV_PORT)) {
1248 		if (sigevk.sigev_notify == SIGEV_THREAD) {
1249 			pnotify.portnfy_port = sigevk.sigev_signo;
1250 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
1251 		} else if (copyin(sigevk.sigev_value.sival_ptr,
1252 		    &pnotify, sizeof (pnotify))) {
1253 			kmem_free(cbplist, ssize);
1254 			return (EFAULT);
1255 		}
1256 		error = port_alloc_event(pnotify.portnfy_port,
1257 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
1258 		if (error) {
1259 			if (error == ENOMEM || error == EAGAIN)
1260 				error = EAGAIN;
1261 			else
1262 				error = EINVAL;
1263 			kmem_free(cbplist, ssize);
1264 			return (error);
1265 		}
1266 		lio_head_port = pnotify.portnfy_port;
1267 	}
1268 
1269 	/*
1270 	 * a list head should be allocated if notification is
1271 	 * enabled for this list.
1272 	 */
1273 	head = NULL;
1274 
1275 	if (mode_arg == LIO_WAIT || sigev) {
1276 		mutex_enter(&aiop->aio_mutex);
1277 		error = aio_lio_alloc(&head);
1278 		mutex_exit(&aiop->aio_mutex);
1279 		if (error)
1280 			goto done;
1281 		deadhead = 1;
1282 		head->lio_nent = nent;
1283 		head->lio_refcnt = nent;
1284 		head->lio_port = -1;
1285 		head->lio_portkev = NULL;
1286 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
1287 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
1288 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
1289 			if (sqp == NULL) {
1290 				error = EAGAIN;
1291 				goto done;
1292 			}
1293 			sqp->sq_func = NULL;
1294 			sqp->sq_next = NULL;
1295 			sqp->sq_info.si_code = SI_ASYNCIO;
1296 			sqp->sq_info.si_pid = curproc->p_pid;
1297 			sqp->sq_info.si_ctid = PRCTID(curproc);
1298 			sqp->sq_info.si_zoneid = getzoneid();
1299 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
1300 			sqp->sq_info.si_signo = sigevk.sigev_signo;
1301 			sqp->sq_info.si_value = sigevk.sigev_value;
1302 			head->lio_sigqp = sqp;
1303 		} else {
1304 			head->lio_sigqp = NULL;
1305 		}
1306 		if (pkevtp) {
1307 			/*
1308 			 * Prepare data to send when list of aiocb's
1309 			 * has completed.
1310 			 */
1311 			port_init_event(pkevtp, (uintptr_t)sigev,
1312 			    (void *)(uintptr_t)pnotify.portnfy_user,
1313 			    NULL, head);
1314 			pkevtp->portkev_events = AIOLIO;
1315 			head->lio_portkev = pkevtp;
1316 			head->lio_port = pnotify.portnfy_port;
1317 		}
1318 	}
1319 
1320 	for (i = 0; i < nent; i++, ucbp++) {
1321 
1322 		cbp = *ucbp;
1323 		/* skip entry if it can't be copied. */
1324 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
1325 			if (head) {
1326 				mutex_enter(&aiop->aio_mutex);
1327 				head->lio_nent--;
1328 				head->lio_refcnt--;
1329 				mutex_exit(&aiop->aio_mutex);
1330 			}
1331 			continue;
1332 		}
1333 
1334 		/* skip if opcode for aiocb is LIO_NOP */
1335 		mode = aiocb->aio_lio_opcode;
1336 		if (mode == LIO_NOP) {
1337 			cbp = NULL;
1338 			if (head) {
1339 				mutex_enter(&aiop->aio_mutex);
1340 				head->lio_nent--;
1341 				head->lio_refcnt--;
1342 				mutex_exit(&aiop->aio_mutex);
1343 			}
1344 			continue;
1345 		}
1346 
1347 		/* increment file descriptor's ref count. */
1348 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
1349 			lio_set_uerror(&cbp->aio_resultp, EBADF);
1350 			if (head) {
1351 				mutex_enter(&aiop->aio_mutex);
1352 				head->lio_nent--;
1353 				head->lio_refcnt--;
1354 				mutex_exit(&aiop->aio_mutex);
1355 			}
1356 			aio_errors++;
1357 			continue;
1358 		}
1359 
1360 		/*
1361 		 * check the permission of the partition
1362 		 */
1363 		if ((fp->f_flag & mode) == 0) {
1364 			releasef(aiocb->aio_fildes);
1365 			lio_set_uerror(&cbp->aio_resultp, EBADF);
1366 			if (head) {
1367 				mutex_enter(&aiop->aio_mutex);
1368 				head->lio_nent--;
1369 				head->lio_refcnt--;
1370 				mutex_exit(&aiop->aio_mutex);
1371 			}
1372 			aio_errors++;
1373 			continue;
1374 		}
1375 
1376 		/*
1377 		 * common case where requests are to the same fd
1378 		 * for the same r/w operation.
1379 		 * for UFS, need to set EBADFD
1380 		 */
1381 		vp = fp->f_vnode;
1382 		if (fp != prev_fp || mode != prev_mode) {
1383 			aio_func = check_vp(vp, mode);
1384 			if (aio_func == NULL) {
1385 				prev_fp = NULL;
1386 				releasef(aiocb->aio_fildes);
1387 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
1388 				aio_notsupported++;
1389 				if (head) {
1390 					mutex_enter(&aiop->aio_mutex);
1391 					head->lio_nent--;
1392 					head->lio_refcnt--;
1393 					mutex_exit(&aiop->aio_mutex);
1394 				}
1395 				continue;
1396 			} else {
1397 				prev_fp = fp;
1398 				prev_mode = mode;
1399 			}
1400 		}
1401 
1402 		error = aio_req_setup(&reqp, aiop, aiocb,
1403 		    &cbp->aio_resultp, vp);
1404 		if (error) {
1405 			releasef(aiocb->aio_fildes);
1406 			lio_set_uerror(&cbp->aio_resultp, error);
1407 			if (head) {
1408 				mutex_enter(&aiop->aio_mutex);
1409 				head->lio_nent--;
1410 				head->lio_refcnt--;
1411 				mutex_exit(&aiop->aio_mutex);
1412 			}
1413 			aio_errors++;
1414 			continue;
1415 		}
1416 
1417 		reqp->aio_req_lio = head;
1418 		deadhead = 0;
1419 
1420 		/*
1421 		 * Set the errno field now before sending the request to
1422 		 * the driver to avoid a race condition
1423 		 */
1424 		(void) suword32(&cbp->aio_resultp.aio_errno,
1425 		    EINPROGRESS);
1426 
1427 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
1428 
1429 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
1430 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
1431 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
1432 		if (aio_port | aio_thread) {
1433 			port_kevent_t *lpkevp;
1434 			/*
1435 			 * Prepare data to send with each aiocb completed.
1436 			 */
1437 			if (aio_port) {
1438 				void *paddr =
1439 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
1440 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
1441 					error = EFAULT;
1442 			} else {	/* aio_thread */
1443 				pnotify.portnfy_port =
1444 				    aiocb->aio_sigevent.sigev_signo;
1445 				pnotify.portnfy_user =
1446 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
1447 			}
1448 			if (error)
1449 				/* EMPTY */;
1450 			else if (pkevtp != NULL &&
1451 			    pnotify.portnfy_port == lio_head_port)
1452 				error = port_dup_event(pkevtp, &lpkevp,
1453 				    PORT_ALLOC_DEFAULT);
1454 			else
1455 				error = port_alloc_event(pnotify.portnfy_port,
1456 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
1457 				    &lpkevp);
1458 			if (error == 0) {
1459 				port_init_event(lpkevp, (uintptr_t)cbp,
1460 				    (void *)(uintptr_t)pnotify.portnfy_user,
1461 				    aio_port_callback, reqp);
1462 				lpkevp->portkev_events = event;
1463 				reqp->aio_req_portkev = lpkevp;
1464 				reqp->aio_req_port = pnotify.portnfy_port;
1465 			}
1466 		}
1467 
1468 		/*
1469 		 * send the request to driver.
1470 		 */
1471 		if (error == 0) {
1472 			if (aiocb->aio_nbytes == 0) {
1473 				clear_active_fd(aiocb->aio_fildes);
1474 				aio_zerolen(reqp);
1475 				continue;
1476 			}
1477 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
1478 			    CRED());
1479 		}
1480 
1481 		/*
1482 		 * the fd's ref count is not decremented until the IO has
1483 		 * completed unless there was an error.
1484 		 */
1485 		if (error) {
1486 			releasef(aiocb->aio_fildes);
1487 			lio_set_uerror(&cbp->aio_resultp, error);
1488 			if (head) {
1489 				mutex_enter(&aiop->aio_mutex);
1490 				head->lio_nent--;
1491 				head->lio_refcnt--;
1492 				mutex_exit(&aiop->aio_mutex);
1493 			}
1494 			if (error == ENOTSUP)
1495 				aio_notsupported++;
1496 			else
1497 				aio_errors++;
1498 			lio_set_error(reqp);
1499 		} else {
1500 			clear_active_fd(aiocb->aio_fildes);
1501 		}
1502 	}
1503 
1504 	if (aio_notsupported) {
1505 		error = ENOTSUP;
1506 	} else if (aio_errors) {
1507 		/*
1508 		 * return EIO if any request failed
1509 		 */
1510 		error = EIO;
1511 	}
1512 
1513 	if (mode_arg == LIO_WAIT) {
1514 		mutex_enter(&aiop->aio_mutex);
1515 		while (head->lio_refcnt > 0) {
1516 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1517 				mutex_exit(&aiop->aio_mutex);
1518 				error = EINTR;
1519 				goto done;
1520 			}
1521 		}
1522 		mutex_exit(&aiop->aio_mutex);
1523 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
1524 	}
1525 
1526 done:
1527 	kmem_free(cbplist, ssize);
1528 	if (deadhead) {
1529 		if (head->lio_sigqp)
1530 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
1531 		if (head->lio_portkev)
1532 			port_free_event(head->lio_portkev);
1533 		kmem_free(head, sizeof (aio_lio_t));
1534 	}
1535 	return (error);
1536 }
1537 
1538 #endif /* _LP64 */
1539 
1540 /*
1541  * Asynchronous list IO.
1542  * If list I/O is called with LIO_WAIT it can still return
1543  * before all the I/O's are completed if a signal is caught
1544  * or if the list include UFS I/O requests. If this happens,
1545  * libaio will call aliowait() to wait for the I/O's to
1546  * complete
1547  */
1548 /*ARGSUSED*/
1549 static int
1550 aliowait(
1551 	int	mode,
1552 	void	*aiocb,
1553 	int	nent,
1554 	void	*sigev,
1555 	int	run_mode)
1556 {
1557 	aio_lio_t	*head;
1558 	aio_t		*aiop;
1559 	caddr_t		cbplist;
1560 	aiocb_t		*cbp, **ucbp;
1561 #ifdef	_SYSCALL32_IMPL
1562 	aiocb32_t	*cbp32;
1563 	caddr32_t	*ucbp32;
1564 	aiocb64_32_t	*cbp64;
1565 #endif
1566 	int		error = 0;
1567 	int		i;
1568 	size_t		ssize = 0;
1569 	model_t		model = get_udatamodel();
1570 
1571 	aiop = curproc->p_aio;
1572 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1573 		return (EINVAL);
1574 
1575 	if (model == DATAMODEL_NATIVE)
1576 		ssize = (sizeof (aiocb_t *) * nent);
1577 #ifdef	_SYSCALL32_IMPL
1578 	else
1579 		ssize = (sizeof (caddr32_t) * nent);
1580 #endif  /* _SYSCALL32_IMPL */
1581 
1582 	if (ssize == 0)
1583 		return (EINVAL);
1584 
1585 	cbplist = kmem_alloc(ssize, KM_SLEEP);
1586 
1587 	if (model == DATAMODEL_NATIVE)
1588 		ucbp = (aiocb_t **)cbplist;
1589 #ifdef	_SYSCALL32_IMPL
1590 	else
1591 		ucbp32 = (caddr32_t *)cbplist;
1592 #endif  /* _SYSCALL32_IMPL */
1593 
1594 	if (copyin(aiocb, cbplist, ssize)) {
1595 		error = EFAULT;
1596 		goto done;
1597 	}
1598 
1599 	/*
1600 	 * To find the list head, we go through the
1601 	 * list of aiocb structs, find the request
1602 	 * its for, then get the list head that reqp
1603 	 * points to
1604 	 */
1605 	head = NULL;
1606 
1607 	for (i = 0; i < nent; i++) {
1608 		if (model == DATAMODEL_NATIVE) {
1609 			/*
1610 			 * Since we are only checking for a NULL pointer
1611 			 * Following should work on both native data sizes
1612 			 * as well as for largefile aiocb.
1613 			 */
1614 			if ((cbp = *ucbp++) == NULL)
1615 				continue;
1616 			if (run_mode != AIO_LARGEFILE)
1617 				if (head = aio_list_get(&cbp->aio_resultp))
1618 					break;
1619 			else {
1620 				/*
1621 				 * This is a case when largefile call is
1622 				 * made on 32 bit kernel.
1623 				 * Treat each pointer as pointer to
1624 				 * aiocb64_32
1625 				 */
1626 				if (head = aio_list_get((aio_result_t *)
1627 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
1628 					break;
1629 			}
1630 		}
1631 #ifdef	_SYSCALL32_IMPL
1632 		else {
1633 			if (run_mode == AIO_LARGEFILE) {
1634 				if ((cbp64 = (aiocb64_32_t *)
1635 				    (uintptr_t)*ucbp32++) == NULL)
1636 					continue;
1637 				if (head = aio_list_get((aio_result_t *)
1638 				    &cbp64->aio_resultp))
1639 					break;
1640 			} else if (run_mode == AIO_32) {
1641 				if ((cbp32 = (aiocb32_t *)
1642 				    (uintptr_t)*ucbp32++) == NULL)
1643 					continue;
1644 				if (head = aio_list_get((aio_result_t *)
1645 				    &cbp32->aio_resultp))
1646 					break;
1647 			}
1648 		}
1649 #endif	/* _SYSCALL32_IMPL */
1650 	}
1651 
1652 	if (head == NULL) {
1653 		error = EINVAL;
1654 		goto done;
1655 	}
1656 
1657 	mutex_enter(&aiop->aio_mutex);
1658 	while (head->lio_refcnt > 0) {
1659 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1660 			mutex_exit(&aiop->aio_mutex);
1661 			error = EINTR;
1662 			goto done;
1663 		}
1664 	}
1665 	mutex_exit(&aiop->aio_mutex);
1666 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
1667 done:
1668 	kmem_free(cbplist, ssize);
1669 	return (error);
1670 }
1671 
1672 aio_lio_t *
1673 aio_list_get(aio_result_t *resultp)
1674 {
1675 	aio_lio_t	*head = NULL;
1676 	aio_t		*aiop;
1677 	aio_req_t 	**bucket;
1678 	aio_req_t 	*reqp;
1679 	long		index;
1680 
1681 	aiop = curproc->p_aio;
1682 	if (aiop == NULL)
1683 		return (NULL);
1684 
1685 	if (resultp) {
1686 		index = AIO_HASH(resultp);
1687 		bucket = &aiop->aio_hash[index];
1688 		for (reqp = *bucket; reqp != NULL;
1689 		    reqp = reqp->aio_hash_next) {
1690 			if (reqp->aio_req_resultp == resultp) {
1691 				head = reqp->aio_req_lio;
1692 				return (head);
1693 			}
1694 		}
1695 	}
1696 	return (NULL);
1697 }
1698 
1699 
1700 static void
1701 lio_set_uerror(void *resultp, int error)
1702 {
1703 	/*
1704 	 * the resultp field is a pointer to where the
1705 	 * error should be written out to the user's
1706 	 * aiocb.
1707 	 *
1708 	 */
1709 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1710 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
1711 		    (ssize_t)-1);
1712 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1713 	}
1714 #ifdef	_SYSCALL32_IMPL
1715 	else {
1716 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1717 		    (uint_t)-1);
1718 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1719 	}
1720 #endif  /* _SYSCALL32_IMPL */
1721 }
1722 
1723 /*
1724  * do cleanup completion for all requests in list. memory for
1725  * each request is also freed.
1726  */
1727 static void
1728 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
1729 {
1730 	int i;
1731 	aio_req_t *reqp;
1732 	aio_result_t *resultp;
1733 	aiocb64_32_t *aiocb_64;
1734 
1735 	for (i = 0; i < nent; i++) {
1736 		if (get_udatamodel() == DATAMODEL_NATIVE) {
1737 			if (cbp[i] == NULL)
1738 				continue;
1739 			if (run_mode == AIO_LARGEFILE) {
1740 				aiocb_64 = (aiocb64_32_t *)cbp[i];
1741 				resultp = (aio_result_t *)
1742 				    &aiocb_64->aio_resultp;
1743 			} else
1744 				resultp = &cbp[i]->aio_resultp;
1745 		}
1746 #ifdef	_SYSCALL32_IMPL
1747 		else {
1748 			aiocb32_t *aiocb_32;
1749 			caddr32_t *cbp32;
1750 
1751 			cbp32 = (caddr32_t *)cbp;
1752 			if (cbp32[i] == NULL)
1753 				continue;
1754 			if (run_mode == AIO_32) {
1755 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
1756 				resultp = (aio_result_t *)&aiocb_32->
1757 				    aio_resultp;
1758 			} else if (run_mode == AIO_LARGEFILE) {
1759 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
1760 				resultp = (aio_result_t *)&aiocb_64->
1761 				    aio_resultp;
1762 			}
1763 		}
1764 #endif  /* _SYSCALL32_IMPL */
1765 		/*
1766 		 * we need to get the aio_cleanupq_mutex since we call
1767 		 * aio_req_done().
1768 		 */
1769 		mutex_enter(&aiop->aio_cleanupq_mutex);
1770 		mutex_enter(&aiop->aio_mutex);
1771 		reqp = aio_req_done(resultp);
1772 		mutex_exit(&aiop->aio_mutex);
1773 		mutex_exit(&aiop->aio_cleanupq_mutex);
1774 		if (reqp != NULL) {
1775 			aphysio_unlock(reqp);
1776 			aio_copyout_result(reqp);
1777 			mutex_enter(&aiop->aio_mutex);
1778 			aio_req_free(aiop, reqp);
1779 			mutex_exit(&aiop->aio_mutex);
1780 		}
1781 	}
1782 }
1783 
1784 /*
1785  * Write out the results for an aio request that is done.
1786  */
1787 static int
1788 aioerror(void *cb, int run_mode)
1789 {
1790 	aio_result_t *resultp;
1791 	aio_t *aiop;
1792 	aio_req_t *reqp;
1793 	int retval;
1794 
1795 	aiop = curproc->p_aio;
1796 	if (aiop == NULL || cb == NULL)
1797 		return (EINVAL);
1798 
1799 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1800 		if (run_mode == AIO_LARGEFILE)
1801 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1802 			    aio_resultp;
1803 		else
1804 			resultp = &((aiocb_t *)cb)->aio_resultp;
1805 	}
1806 #ifdef	_SYSCALL32_IMPL
1807 	else {
1808 		if (run_mode == AIO_LARGEFILE)
1809 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1810 			    aio_resultp;
1811 		else if (run_mode == AIO_32)
1812 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
1813 			    aio_resultp;
1814 	}
1815 #endif  /* _SYSCALL32_IMPL */
1816 	/*
1817 	 * we need to get the aio_cleanupq_mutex since we call
1818 	 * aio_req_find().
1819 	 */
1820 	mutex_enter(&aiop->aio_cleanupq_mutex);
1821 	mutex_enter(&aiop->aio_mutex);
1822 	retval = aio_req_find(resultp, &reqp);
1823 	mutex_exit(&aiop->aio_mutex);
1824 	mutex_exit(&aiop->aio_cleanupq_mutex);
1825 	if (retval == 0) {
1826 		aphysio_unlock(reqp);
1827 		aio_copyout_result(reqp);
1828 		mutex_enter(&aiop->aio_mutex);
1829 		aio_req_free(aiop, reqp);
1830 		mutex_exit(&aiop->aio_mutex);
1831 		return (0);
1832 	} else if (retval == 1)
1833 		return (EINPROGRESS);
1834 	else if (retval == 2)
1835 		return (EINVAL);
1836 	return (0);
1837 }
1838 
1839 /*
1840  * 	aio_cancel - if no requests outstanding,
1841  *			return AIO_ALLDONE
1842  *			else
1843  *			return AIO_NOTCANCELED
1844  */
1845 static int
1846 aio_cancel(
1847 	int	fildes,
1848 	void 	*cb,
1849 	long	*rval,
1850 	int	run_mode)
1851 {
1852 	aio_t *aiop;
1853 	void *resultp;
1854 	int index;
1855 	aio_req_t **bucket;
1856 	aio_req_t *ent;
1857 
1858 
1859 	/*
1860 	 * Verify valid file descriptor
1861 	 */
1862 	if ((getf(fildes)) == NULL) {
1863 		return (EBADF);
1864 	}
1865 	releasef(fildes);
1866 
1867 	aiop = curproc->p_aio;
1868 	if (aiop == NULL)
1869 		return (EINVAL);
1870 
1871 	if (aiop->aio_outstanding == 0) {
1872 		*rval = AIO_ALLDONE;
1873 		return (0);
1874 	}
1875 
1876 	mutex_enter(&aiop->aio_mutex);
1877 	if (cb != NULL) {
1878 		if (get_udatamodel() == DATAMODEL_NATIVE) {
1879 			if (run_mode == AIO_LARGEFILE)
1880 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1881 				    ->aio_resultp;
1882 			else
1883 				resultp = &((aiocb_t *)cb)->aio_resultp;
1884 		}
1885 #ifdef	_SYSCALL32_IMPL
1886 		else {
1887 			if (run_mode == AIO_LARGEFILE)
1888 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1889 				    ->aio_resultp;
1890 			else if (run_mode == AIO_32)
1891 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
1892 				    ->aio_resultp;
1893 		}
1894 #endif  /* _SYSCALL32_IMPL */
1895 		index = AIO_HASH(resultp);
1896 		bucket = &aiop->aio_hash[index];
1897 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1898 			if (ent->aio_req_resultp == resultp) {
1899 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
1900 					mutex_exit(&aiop->aio_mutex);
1901 					*rval = AIO_ALLDONE;
1902 					return (0);
1903 				}
1904 				mutex_exit(&aiop->aio_mutex);
1905 				*rval = AIO_NOTCANCELED;
1906 				return (0);
1907 			}
1908 		}
1909 		mutex_exit(&aiop->aio_mutex);
1910 		*rval = AIO_ALLDONE;
1911 		return (0);
1912 	}
1913 
1914 	for (index = 0; index < AIO_HASHSZ; index++) {
1915 		bucket = &aiop->aio_hash[index];
1916 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1917 			if (ent->aio_req_fd == fildes) {
1918 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
1919 					mutex_exit(&aiop->aio_mutex);
1920 					*rval = AIO_NOTCANCELED;
1921 					return (0);
1922 				}
1923 			}
1924 		}
1925 	}
1926 	mutex_exit(&aiop->aio_mutex);
1927 	*rval = AIO_ALLDONE;
1928 	return (0);
1929 }
1930 
1931 /*
1932  * solaris version of asynchronous read and write
1933  */
1934 static int
1935 arw(
1936 	int	opcode,
1937 	int	fdes,
1938 	char	*bufp,
1939 	int	bufsize,
1940 	offset_t	offset,
1941 	aio_result_t	*resultp,
1942 	int		mode)
1943 {
1944 	file_t		*fp;
1945 	int		error;
1946 	struct vnode	*vp;
1947 	aio_req_t	*reqp;
1948 	aio_t		*aiop;
1949 	int		(*aio_func)();
1950 #ifdef _LP64
1951 	aiocb_t		aiocb;
1952 #else
1953 	aiocb64_32_t	aiocb64;
1954 #endif
1955 
1956 	aiop = curproc->p_aio;
1957 	if (aiop == NULL)
1958 		return (EINVAL);
1959 
1960 	if ((fp = getf(fdes)) == NULL) {
1961 		return (EBADF);
1962 	}
1963 
1964 	/*
1965 	 * check the permission of the partition
1966 	 */
1967 	if ((fp->f_flag & mode) == 0) {
1968 		releasef(fdes);
1969 		return (EBADF);
1970 	}
1971 
1972 	vp = fp->f_vnode;
1973 	aio_func = check_vp(vp, mode);
1974 	if (aio_func == NULL) {
1975 		releasef(fdes);
1976 		return (EBADFD);
1977 	}
1978 #ifdef _LP64
1979 	aiocb.aio_fildes = fdes;
1980 	aiocb.aio_buf = bufp;
1981 	aiocb.aio_nbytes = bufsize;
1982 	aiocb.aio_offset = offset;
1983 	aiocb.aio_sigevent.sigev_notify = 0;
1984 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
1985 #else
1986 	aiocb64.aio_fildes = fdes;
1987 	aiocb64.aio_buf = (caddr32_t)bufp;
1988 	aiocb64.aio_nbytes = bufsize;
1989 	aiocb64.aio_offset = offset;
1990 	aiocb64.aio_sigevent.sigev_notify = 0;
1991 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
1992 #endif
1993 	if (error) {
1994 		releasef(fdes);
1995 		return (error);
1996 	}
1997 
1998 	/*
1999 	 * enable polling on this request if the opcode has
2000 	 * the AIO poll bit set
2001 	 */
2002 	if (opcode & AIO_POLL_BIT)
2003 		reqp->aio_req_flags |= AIO_POLL;
2004 
2005 	if (bufsize == 0) {
2006 		clear_active_fd(fdes);
2007 		aio_zerolen(reqp);
2008 		return (0);
2009 	}
2010 	/*
2011 	 * send the request to driver.
2012 	 */
2013 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2014 	/*
2015 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
2016 	 * is released by the aio_cleanup_thread() when the IO has
2017 	 * completed.
2018 	 */
2019 	if (error) {
2020 		releasef(fdes);
2021 		mutex_enter(&aiop->aio_mutex);
2022 		aio_req_free(aiop, reqp);
2023 		aiop->aio_pending--;
2024 		if (aiop->aio_flags & AIO_REQ_BLOCK)
2025 			cv_signal(&aiop->aio_cleanupcv);
2026 		mutex_exit(&aiop->aio_mutex);
2027 		return (error);
2028 	}
2029 	clear_active_fd(fdes);
2030 	return (0);
2031 }
2032 
2033 /*
2034  * posix version of asynchronous read and write
2035  */
2036 static int
2037 aiorw(
2038 	int		opcode,
2039 	void		*aiocb_arg,
2040 	int		mode,
2041 	int		run_mode)
2042 {
2043 #ifdef _SYSCALL32_IMPL
2044 	aiocb32_t	aiocb32;
2045 	struct	sigevent32 *sigev32;
2046 	port_notify32_t	pntfy32;
2047 #endif
2048 	aiocb64_32_t	aiocb64;
2049 	aiocb_t		aiocb;
2050 	file_t		*fp;
2051 	int		error, fd;
2052 	size_t		bufsize;
2053 	struct vnode	*vp;
2054 	aio_req_t	*reqp;
2055 	aio_t		*aiop;
2056 	int		(*aio_func)();
2057 	aio_result_t	*resultp;
2058 	struct	sigevent *sigev;
2059 	model_t		model;
2060 	int		aio_use_port = 0;
2061 	port_notify_t	pntfy;
2062 
2063 	model = get_udatamodel();
2064 	aiop = curproc->p_aio;
2065 	if (aiop == NULL)
2066 		return (EINVAL);
2067 
2068 	if (model == DATAMODEL_NATIVE) {
2069 		if (run_mode != AIO_LARGEFILE) {
2070 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
2071 				return (EFAULT);
2072 			bufsize = aiocb.aio_nbytes;
2073 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
2074 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
2075 				return (EBADF);
2076 			}
2077 			sigev = &aiocb.aio_sigevent;
2078 		} else {
2079 			/*
2080 			 * We come here only when we make largefile
2081 			 * call on 32 bit kernel using 32 bit library.
2082 			 */
2083 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2084 				return (EFAULT);
2085 			bufsize = aiocb64.aio_nbytes;
2086 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2087 			    ->aio_resultp);
2088 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2089 				return (EBADF);
2090 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
2091 		}
2092 
2093 		if (sigev->sigev_notify == SIGEV_PORT) {
2094 			if (copyin((void *)sigev->sigev_value.sival_ptr,
2095 			    &pntfy, sizeof (port_notify_t))) {
2096 				releasef(fd);
2097 				return (EFAULT);
2098 			}
2099 			aio_use_port = 1;
2100 		} else if (sigev->sigev_notify == SIGEV_THREAD) {
2101 			pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
2102 			pntfy.portnfy_user =
2103 			    aiocb.aio_sigevent.sigev_value.sival_ptr;
2104 			aio_use_port = 1;
2105 		}
2106 	}
2107 #ifdef	_SYSCALL32_IMPL
2108 	else {
2109 		if (run_mode == AIO_32) {
2110 			/* 32 bit system call is being made on 64 bit kernel */
2111 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
2112 				return (EFAULT);
2113 
2114 			bufsize = aiocb32.aio_nbytes;
2115 			aiocb_32ton(&aiocb32, &aiocb);
2116 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
2117 			    aio_resultp);
2118 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
2119 				return (EBADF);
2120 			}
2121 			sigev32 = &aiocb32.aio_sigevent;
2122 		} else if (run_mode == AIO_LARGEFILE) {
2123 			/*
2124 			 * We come here only when we make largefile
2125 			 * call on 64 bit kernel using 32 bit library.
2126 			 */
2127 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2128 				return (EFAULT);
2129 			bufsize = aiocb64.aio_nbytes;
2130 			aiocb_LFton(&aiocb64, &aiocb);
2131 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2132 			    ->aio_resultp);
2133 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2134 				return (EBADF);
2135 			sigev32 = &aiocb64.aio_sigevent;
2136 		}
2137 
2138 		if (sigev32->sigev_notify == SIGEV_PORT) {
2139 			if (copyin(
2140 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
2141 			    &pntfy32, sizeof (port_notify32_t))) {
2142 				releasef(fd);
2143 				return (EFAULT);
2144 			}
2145 			pntfy.portnfy_port = pntfy32.portnfy_port;
2146 			pntfy.portnfy_user = (void *)(uintptr_t)
2147 			    pntfy32.portnfy_user;
2148 			aio_use_port = 1;
2149 		} else if (sigev32->sigev_notify == SIGEV_THREAD) {
2150 			pntfy.portnfy_port = sigev32->sigev_signo;
2151 			pntfy.portnfy_user = (void *)(uintptr_t)
2152 			    sigev32->sigev_value.sival_ptr;
2153 			aio_use_port = 1;
2154 		}
2155 	}
2156 #endif  /* _SYSCALL32_IMPL */
2157 
2158 	/*
2159 	 * check the permission of the partition
2160 	 */
2161 
2162 	if ((fp->f_flag & mode) == 0) {
2163 		releasef(fd);
2164 		return (EBADF);
2165 	}
2166 
2167 	vp = fp->f_vnode;
2168 	aio_func = check_vp(vp, mode);
2169 	if (aio_func == NULL) {
2170 		releasef(fd);
2171 		return (EBADFD);
2172 	}
2173 	if (run_mode == AIO_LARGEFILE)
2174 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
2175 	else
2176 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
2177 
2178 	if (error) {
2179 		releasef(fd);
2180 		return (error);
2181 	}
2182 	/*
2183 	 * enable polling on this request if the opcode has
2184 	 * the AIO poll bit set
2185 	 */
2186 	if (opcode & AIO_POLL_BIT)
2187 		reqp->aio_req_flags |= AIO_POLL;
2188 
2189 	if (model == DATAMODEL_NATIVE)
2190 		reqp->aio_req_iocb.iocb = aiocb_arg;
2191 #ifdef  _SYSCALL32_IMPL
2192 	else
2193 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
2194 #endif
2195 
2196 	if (aio_use_port) {
2197 		int event = (run_mode == AIO_LARGEFILE)?
2198 		    ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
2199 		    ((mode == FREAD)? AIOAREAD : AIOAWRITE);
2200 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
2201 	}
2202 
2203 	/*
2204 	 * send the request to driver.
2205 	 */
2206 	if (error == 0) {
2207 		if (bufsize == 0) {
2208 			clear_active_fd(fd);
2209 			aio_zerolen(reqp);
2210 			return (0);
2211 		}
2212 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2213 	}
2214 
2215 	/*
2216 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
2217 	 * is released by the aio_cleanup_thread() when the IO has
2218 	 * completed.
2219 	 */
2220 	if (error) {
2221 		releasef(fd);
2222 		mutex_enter(&aiop->aio_mutex);
2223 		aio_deq(&aiop->aio_portpending, reqp);
2224 		aio_req_free(aiop, reqp);
2225 		aiop->aio_pending--;
2226 		if (aiop->aio_flags & AIO_REQ_BLOCK)
2227 			cv_signal(&aiop->aio_cleanupcv);
2228 		mutex_exit(&aiop->aio_mutex);
2229 		return (error);
2230 	}
2231 	clear_active_fd(fd);
2232 	return (0);
2233 }
2234 
2235 
2236 /*
2237  * set error for a list IO entry that failed.
2238  */
2239 static void
2240 lio_set_error(aio_req_t *reqp)
2241 {
2242 	aio_t *aiop = curproc->p_aio;
2243 
2244 	if (aiop == NULL)
2245 		return;
2246 
2247 	mutex_enter(&aiop->aio_mutex);
2248 	aio_deq(&aiop->aio_portpending, reqp);
2249 	aiop->aio_pending--;
2250 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2251 	reqp->aio_req_flags |= AIO_PHYSIODONE;
2252 	/*
2253 	 * Need to free the request now as its never
2254 	 * going to get on the done queue
2255 	 *
2256 	 * Note: aio_outstanding is decremented in
2257 	 *	 aio_req_free()
2258 	 */
2259 	aio_req_free(aiop, reqp);
2260 	if (aiop->aio_flags & AIO_REQ_BLOCK)
2261 		cv_signal(&aiop->aio_cleanupcv);
2262 	mutex_exit(&aiop->aio_mutex);
2263 }
2264 
2265 /*
2266  * check if a specified request is done, and remove it from
2267  * the done queue. otherwise remove anybody from the done queue
2268  * if NULL is specified.
2269  */
2270 static aio_req_t *
2271 aio_req_done(void *resultp)
2272 {
2273 	aio_req_t **bucket;
2274 	aio_req_t *ent;
2275 	aio_t *aiop = curproc->p_aio;
2276 	long index;
2277 
2278 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2279 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2280 
2281 	if (resultp) {
2282 		index = AIO_HASH(resultp);
2283 		bucket = &aiop->aio_hash[index];
2284 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2285 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
2286 				if (ent->aio_req_flags & AIO_DONEQ) {
2287 					return (aio_req_remove(ent));
2288 				}
2289 				return (NULL);
2290 			}
2291 		}
2292 		/* no match, resultp is invalid */
2293 		return (NULL);
2294 	}
2295 	return (aio_req_remove(NULL));
2296 }
2297 
2298 /*
2299  * determine if a user-level resultp pointer is associated with an
2300  * active IO request. Zero is returned when the request is done,
2301  * and the request is removed from the done queue. Only when the
2302  * return value is zero, is the "reqp" pointer valid. One is returned
2303  * when the request is inprogress. Two is returned when the request
2304  * is invalid.
2305  */
2306 static int
2307 aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
2308 {
2309 	aio_req_t **bucket;
2310 	aio_req_t *ent;
2311 	aio_t *aiop = curproc->p_aio;
2312 	long index;
2313 
2314 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2315 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2316 
2317 	index = AIO_HASH(resultp);
2318 	bucket = &aiop->aio_hash[index];
2319 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2320 		if (ent->aio_req_resultp == resultp) {
2321 			if (ent->aio_req_flags & AIO_DONEQ) {
2322 				*reqp = aio_req_remove(ent);
2323 				return (0);
2324 			}
2325 			return (1);
2326 		}
2327 	}
2328 	/* no match, resultp is invalid */
2329 	return (2);
2330 }
2331 
2332 /*
2333  * remove a request from the done queue.
2334  */
2335 static aio_req_t *
2336 aio_req_remove(aio_req_t *reqp)
2337 {
2338 	aio_t *aiop = curproc->p_aio;
2339 
2340 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2341 
2342 	if (reqp != NULL) {
2343 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2344 		if (reqp->aio_req_next == reqp) {
2345 			/* only one request on queue */
2346 			if (reqp ==  aiop->aio_doneq) {
2347 				aiop->aio_doneq = NULL;
2348 			} else {
2349 				ASSERT(reqp == aiop->aio_cleanupq);
2350 				aiop->aio_cleanupq = NULL;
2351 			}
2352 		} else {
2353 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2354 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2355 			/*
2356 			 * The request can be either on the aio_doneq or the
2357 			 * aio_cleanupq
2358 			 */
2359 			if (reqp == aiop->aio_doneq)
2360 				aiop->aio_doneq = reqp->aio_req_next;
2361 
2362 			if (reqp == aiop->aio_cleanupq)
2363 				aiop->aio_cleanupq = reqp->aio_req_next;
2364 		}
2365 		reqp->aio_req_flags &= ~AIO_DONEQ;
2366 		reqp->aio_req_next = NULL;
2367 		reqp->aio_req_prev = NULL;
2368 	} else if ((reqp = aiop->aio_doneq) != NULL) {
2369 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2370 		if (reqp == reqp->aio_req_next) {
2371 			/* only one request on queue */
2372 			aiop->aio_doneq = NULL;
2373 		} else {
2374 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2375 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2376 			aiop->aio_doneq = reqp->aio_req_next;
2377 		}
2378 		reqp->aio_req_flags &= ~AIO_DONEQ;
2379 		reqp->aio_req_next = NULL;
2380 		reqp->aio_req_prev = NULL;
2381 	}
2382 	if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
2383 		cv_broadcast(&aiop->aio_waitcv);
2384 	return (reqp);
2385 }
2386 
2387 static int
2388 aio_req_setup(
2389 	aio_req_t	**reqpp,
2390 	aio_t 		*aiop,
2391 	aiocb_t 	*arg,
2392 	aio_result_t 	*resultp,
2393 	vnode_t		*vp)
2394 {
2395 	sigqueue_t	*sqp = NULL;
2396 	aio_req_t 	*reqp;
2397 	struct uio 	*uio;
2398 	struct sigevent *sigev;
2399 	int		error;
2400 
2401 	sigev = &arg->aio_sigevent;
2402 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
2403 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
2404 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2405 		if (sqp == NULL)
2406 			return (EAGAIN);
2407 		sqp->sq_func = NULL;
2408 		sqp->sq_next = NULL;
2409 		sqp->sq_info.si_code = SI_ASYNCIO;
2410 		sqp->sq_info.si_pid = curproc->p_pid;
2411 		sqp->sq_info.si_ctid = PRCTID(curproc);
2412 		sqp->sq_info.si_zoneid = getzoneid();
2413 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
2414 		sqp->sq_info.si_signo = sigev->sigev_signo;
2415 		sqp->sq_info.si_value = sigev->sigev_value;
2416 	}
2417 
2418 	mutex_enter(&aiop->aio_mutex);
2419 
2420 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
2421 		mutex_exit(&aiop->aio_mutex);
2422 		if (sqp)
2423 			kmem_free(sqp, sizeof (sigqueue_t));
2424 		return (EIO);
2425 	}
2426 	/*
2427 	 * get an aio_reqp from the free list or allocate one
2428 	 * from dynamic memory.
2429 	 */
2430 	if (error = aio_req_alloc(&reqp, resultp)) {
2431 		mutex_exit(&aiop->aio_mutex);
2432 		if (sqp)
2433 			kmem_free(sqp, sizeof (sigqueue_t));
2434 		return (error);
2435 	}
2436 	aiop->aio_pending++;
2437 	aiop->aio_outstanding++;
2438 	reqp->aio_req_flags = AIO_PENDING;
2439 	if (sigev->sigev_notify == SIGEV_THREAD ||
2440 	    sigev->sigev_notify == SIGEV_PORT)
2441 		aio_enq(&aiop->aio_portpending, reqp, 0);
2442 	mutex_exit(&aiop->aio_mutex);
2443 	/*
2444 	 * initialize aio request.
2445 	 */
2446 	reqp->aio_req_fd = arg->aio_fildes;
2447 	reqp->aio_req_sigqp = sqp;
2448 	reqp->aio_req_iocb.iocb = NULL;
2449 	reqp->aio_req_lio = NULL;
2450 	reqp->aio_req_buf.b_file = vp;
2451 	uio = reqp->aio_req.aio_uio;
2452 	uio->uio_iovcnt = 1;
2453 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
2454 	uio->uio_iov->iov_len = arg->aio_nbytes;
2455 	uio->uio_loffset = arg->aio_offset;
2456 	*reqpp = reqp;
2457 	return (0);
2458 }
2459 
2460 /*
2461  * Allocate p_aio struct.
2462  */
2463 static aio_t *
2464 aio_aiop_alloc(void)
2465 {
2466 	aio_t	*aiop;
2467 
2468 	ASSERT(MUTEX_HELD(&curproc->p_lock));
2469 
2470 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
2471 	if (aiop) {
2472 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
2473 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
2474 									NULL);
2475 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
2476 	}
2477 	return (aiop);
2478 }
2479 
2480 /*
2481  * Allocate an aio_req struct.
2482  */
2483 static int
2484 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
2485 {
2486 	aio_req_t *reqp;
2487 	aio_t *aiop = curproc->p_aio;
2488 
2489 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2490 
2491 	if ((reqp = aiop->aio_free) != NULL) {
2492 		aiop->aio_free = reqp->aio_req_next;
2493 		bzero(reqp, sizeof (*reqp));
2494 	} else {
2495 		/*
2496 		 * Check whether memory is getting tight.
2497 		 * This is a temporary mechanism to avoid memory
2498 		 * exhaustion by a single process until we come up
2499 		 * with a per process solution such as setrlimit().
2500 		 */
2501 		if (freemem < desfree)
2502 			return (EAGAIN);
2503 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
2504 		if (reqp == NULL)
2505 			return (EAGAIN);
2506 	}
2507 	reqp->aio_req.aio_uio = &reqp->aio_req_uio;
2508 	reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
2509 	reqp->aio_req.aio_private = reqp;
2510 	reqp->aio_req_buf.b_offset = -1;
2511 	reqp->aio_req_resultp = resultp;
2512 	if (aio_hash_insert(reqp, aiop)) {
2513 		reqp->aio_req_next = aiop->aio_free;
2514 		aiop->aio_free = reqp;
2515 		return (EINVAL);
2516 	}
2517 	*nreqp = reqp;
2518 	return (0);
2519 }
2520 
2521 /*
2522  * Allocate an aio_lio_t struct.
2523  */
2524 static int
2525 aio_lio_alloc(aio_lio_t **head)
2526 {
2527 	aio_lio_t *liop;
2528 	aio_t *aiop = curproc->p_aio;
2529 
2530 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2531 
2532 	if ((liop = aiop->aio_lio_free) != NULL) {
2533 		aiop->aio_lio_free = liop->lio_next;
2534 	} else {
2535 		/*
2536 		 * Check whether memory is getting tight.
2537 		 * This is a temporary mechanism to avoid memory
2538 		 * exhaustion by a single process until we come up
2539 		 * with a per process solution such as setrlimit().
2540 		 */
2541 		if (freemem < desfree)
2542 			return (EAGAIN);
2543 
2544 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
2545 		if (liop == NULL)
2546 			return (EAGAIN);
2547 	}
2548 	*head = liop;
2549 	return (0);
2550 }
2551 
2552 /*
2553  * this is a special per-process thread that is only activated if
2554  * the process is unmapping a segment with outstanding aio. normally,
2555  * the process will have completed the aio before unmapping the
2556  * segment. If the process does unmap a segment with outstanding aio,
2557  * this special thread will guarentee that the locked pages due to
2558  * aphysio() are released, thereby permitting the segment to be
2559  * unmapped. In addition to this, the cleanup thread is woken up
2560  * during DR operations to release the locked pages.
2561  */
2562 
2563 static int
2564 aio_cleanup_thread(aio_t *aiop)
2565 {
2566 	proc_t *p = curproc;
2567 	struct as *as = p->p_as;
2568 	int poked = 0;
2569 	kcondvar_t *cvp;
2570 	int exit_flag = 0;
2571 	int rqclnup = 0;
2572 
2573 	sigfillset(&curthread->t_hold);
2574 	sigdiffset(&curthread->t_hold, &cantmask);
2575 	for (;;) {
2576 		/*
2577 		 * if a segment is being unmapped, and the current
2578 		 * process's done queue is not empty, then every request
2579 		 * on the doneq with locked resources should be forced
2580 		 * to release their locks. By moving the doneq request
2581 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
2582 		 * and place requests back onto the doneq. All requests
2583 		 * processed by aio_cleanup() will have their physical
2584 		 * resources unlocked.
2585 		 */
2586 		mutex_enter(&aiop->aio_mutex);
2587 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
2588 			aiop->aio_flags |= AIO_CLEANUP;
2589 			mutex_enter(&as->a_contents);
2590 			if (aiop->aio_rqclnup) {
2591 				aiop->aio_rqclnup = 0;
2592 				rqclnup = 1;
2593 			}
2594 
2595 			if ((rqclnup || AS_ISUNMAPWAIT(as)) &&
2596 			    aiop->aio_doneq) {
2597 				aio_req_t *doneqhead = aiop->aio_doneq;
2598 				mutex_exit(&as->a_contents);
2599 				aiop->aio_doneq = NULL;
2600 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
2601 			} else {
2602 				mutex_exit(&as->a_contents);
2603 			}
2604 		}
2605 		mutex_exit(&aiop->aio_mutex);
2606 		aio_cleanup(AIO_CLEANUP_THREAD);
2607 		/*
2608 		 * thread should block on the cleanupcv while
2609 		 * AIO_CLEANUP is set.
2610 		 */
2611 		cvp = &aiop->aio_cleanupcv;
2612 		mutex_enter(&aiop->aio_mutex);
2613 
2614 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
2615 		    aiop->aio_notifyq != NULL ||
2616 		    aiop->aio_portcleanupq != NULL) {
2617 			mutex_exit(&aiop->aio_mutex);
2618 			continue;
2619 		}
2620 		mutex_enter(&as->a_contents);
2621 
2622 		/*
2623 		 * AIO_CLEANUP determines when the cleanup thread
2624 		 * should be active. This flag is set when
2625 		 * the cleanup thread is awakened by as_unmap() or
2626 		 * due to DR operations.
2627 		 * The flag is cleared when the blocking as_unmap()
2628 		 * that originally awakened us is allowed to
2629 		 * complete. as_unmap() blocks when trying to
2630 		 * unmap a segment that has SOFTLOCKed pages. when
2631 		 * the segment's pages are all SOFTUNLOCKed,
2632 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2633 		 *
2634 		 * In case of cleanup request by DR, the flag is cleared
2635 		 * once all the pending aio requests have been processed.
2636 		 *
2637 		 * The flag shouldn't be cleared right away if the
2638 		 * cleanup thread was interrupted because the process
2639 		 * is doing forkall(). This happens when cv_wait_sig()
2640 		 * returns zero, because it was awakened by a pokelwps().
2641 		 * If the process is not exiting, it must be doing forkall().
2642 		 */
2643 		if ((poked == 0) &&
2644 			((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2645 					(aiop->aio_pending == 0))) {
2646 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
2647 			cvp = &as->a_cv;
2648 			rqclnup = 0;
2649 		}
2650 		mutex_exit(&aiop->aio_mutex);
2651 		if (poked) {
2652 			/*
2653 			 * If the process is exiting/killed, don't return
2654 			 * immediately without waiting for pending I/O's
2655 			 * and releasing the page locks.
2656 			 */
2657 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
2658 				/*
2659 				 * If exit_flag is set, then it is
2660 				 * safe to exit because we have released
2661 				 * page locks of completed I/O's.
2662 				 */
2663 				if (exit_flag)
2664 					break;
2665 
2666 				mutex_exit(&as->a_contents);
2667 
2668 				/*
2669 				 * Wait for all the pending aio to complete.
2670 				 */
2671 				mutex_enter(&aiop->aio_mutex);
2672 				aiop->aio_flags |= AIO_REQ_BLOCK;
2673 				while (aiop->aio_pending != 0)
2674 					cv_wait(&aiop->aio_cleanupcv,
2675 						&aiop->aio_mutex);
2676 				mutex_exit(&aiop->aio_mutex);
2677 				exit_flag = 1;
2678 				continue;
2679 			} else if (p->p_flag &
2680 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
2681 				/*
2682 				 * hold LWP until it
2683 				 * is continued.
2684 				 */
2685 				mutex_exit(&as->a_contents);
2686 				mutex_enter(&p->p_lock);
2687 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
2688 				mutex_exit(&p->p_lock);
2689 				poked = 0;
2690 				continue;
2691 			}
2692 		} else {
2693 			/*
2694 			 * When started this thread will sleep on as->a_cv.
2695 			 * as_unmap will awake this thread if the
2696 			 * segment has SOFTLOCKed pages (poked = 0).
2697 			 * 1. pokelwps() awakes this thread =>
2698 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
2699 			 * 2. as_unmap awakes this thread =>
2700 			 *    to break the loop it is necessary that
2701 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
2702 			 *	memory to be unlocked)
2703 			 *    - AIO_CLEANUP is not set
2704 			 *	(if AIO_CLEANUP is set we have to wait for
2705 			 *	pending requests. aio_done will send a signal
2706 			 *	for every request which completes to continue
2707 			 *	unmapping the corresponding address range)
2708 			 * 3. A cleanup request will wake this thread up, ex.
2709 			 *    by the DR operations. The aio_rqclnup flag will
2710 			 *    be set.
2711 			 */
2712 			while (poked == 0) {
2713 				/*
2714 				 * we need to handle cleanup requests
2715 				 * that come in after we had just cleaned up,
2716 				 * so that we do cleanup of any new aio
2717 				 * requests that got completed and have
2718 				 * locked resources.
2719 				 */
2720 				if ((aiop->aio_rqclnup ||
2721 					(AS_ISUNMAPWAIT(as) != 0)) &&
2722 					(aiop->aio_flags & AIO_CLEANUP) == 0)
2723 					break;
2724 				poked = !cv_wait_sig(cvp, &as->a_contents);
2725 				if (AS_ISUNMAPWAIT(as) == 0)
2726 					cv_signal(cvp);
2727 				if (aiop->aio_outstanding != 0)
2728 					break;
2729 			}
2730 		}
2731 		mutex_exit(&as->a_contents);
2732 	}
2733 exit:
2734 	mutex_exit(&as->a_contents);
2735 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
2736 	aston(curthread);	/* make thread do post_syscall */
2737 	return (0);
2738 }
2739 
2740 /*
2741  * save a reference to a user's outstanding aio in a hash list.
2742  */
2743 static int
2744 aio_hash_insert(
2745 	aio_req_t *aio_reqp,
2746 	aio_t *aiop)
2747 {
2748 	long index;
2749 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
2750 	aio_req_t *current;
2751 	aio_req_t **nextp;
2752 
2753 	index = AIO_HASH(resultp);
2754 	nextp = &aiop->aio_hash[index];
2755 	while ((current = *nextp) != NULL) {
2756 		if (current->aio_req_resultp == resultp)
2757 			return (DUPLICATE);
2758 		nextp = &current->aio_hash_next;
2759 	}
2760 	*nextp = aio_reqp;
2761 	aio_reqp->aio_hash_next = NULL;
2762 	return (0);
2763 }
2764 
2765 static int
2766 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
2767     cred_t *)
2768 {
2769 	struct snode *sp;
2770 	dev_t		dev;
2771 	struct cb_ops  	*cb;
2772 	major_t		major;
2773 	int		(*aio_func)();
2774 
2775 	dev = vp->v_rdev;
2776 	major = getmajor(dev);
2777 
2778 	/*
2779 	 * return NULL for requests to files and STREAMs so
2780 	 * that libaio takes care of them.
2781 	 */
2782 	if (vp->v_type == VCHR) {
2783 		/* no stream device for kaio */
2784 		if (STREAMSTAB(major)) {
2785 			return (NULL);
2786 		}
2787 	} else {
2788 		return (NULL);
2789 	}
2790 
2791 	/*
2792 	 * Check old drivers which do not have async I/O entry points.
2793 	 */
2794 	if (devopsp[major]->devo_rev < 3)
2795 		return (NULL);
2796 
2797 	cb = devopsp[major]->devo_cb_ops;
2798 
2799 	if (cb->cb_rev < 1)
2800 		return (NULL);
2801 
2802 	/*
2803 	 * Check whether this device is a block device.
2804 	 * Kaio is not supported for devices like tty.
2805 	 */
2806 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
2807 		return (NULL);
2808 
2809 	/*
2810 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
2811 	 * We cannot call the driver directly. Instead return the
2812 	 * PXFS functions.
2813 	 */
2814 
2815 	if (IS_PXFSVP(vp)) {
2816 		if (mode & FREAD)
2817 			return (clpxfs_aio_read);
2818 		else
2819 			return (clpxfs_aio_write);
2820 	}
2821 	if (mode & FREAD)
2822 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
2823 	else
2824 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
2825 
2826 	/*
2827 	 * Do we need this ?
2828 	 * nodev returns ENXIO anyway.
2829 	 */
2830 	if (aio_func == nodev)
2831 		return (NULL);
2832 
2833 	sp = VTOS(vp);
2834 	smark(sp, SACC);
2835 	return (aio_func);
2836 }
2837 
2838 /*
2839  * Clustering: We want check_vp to return a function prototyped
2840  * correctly that will be common to both PXFS and regular case.
2841  * We define this intermediate function that will do the right
2842  * thing for driver cases.
2843  */
2844 
2845 static int
2846 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2847 {
2848 	dev_t dev;
2849 	struct cb_ops  	*cb;
2850 
2851 	ASSERT(vp->v_type == VCHR);
2852 	ASSERT(!IS_PXFSVP(vp));
2853 	dev = VTOS(vp)->s_dev;
2854 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
2855 
2856 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
2857 
2858 	ASSERT(cb->cb_awrite != nodev);
2859 	return ((*cb->cb_awrite)(dev, aio, cred_p));
2860 }
2861 
2862 /*
2863  * Clustering: We want check_vp to return a function prototyped
2864  * correctly that will be common to both PXFS and regular case.
2865  * We define this intermediate function that will do the right
2866  * thing for driver cases.
2867  */
2868 
2869 static int
2870 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2871 {
2872 	dev_t dev;
2873 	struct cb_ops  	*cb;
2874 
2875 	ASSERT(vp->v_type == VCHR);
2876 	ASSERT(!IS_PXFSVP(vp));
2877 	dev = VTOS(vp)->s_dev;
2878 	ASSERT(!STREAMSTAB(getmajor(dev)));
2879 
2880 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
2881 
2882 	ASSERT(cb->cb_aread != nodev);
2883 	return ((*cb->cb_aread)(dev, aio, cred_p));
2884 }
2885 
2886 /*
2887  * This routine is called when a largefile call is made by a 32bit
2888  * process on a ILP32 or LP64 kernel. All 64bit processes are large
2889  * file by definition and will call alio() instead.
2890  */
2891 static int
2892 alioLF(
2893 	int		mode_arg,
2894 	void		*aiocb_arg,
2895 	int		nent,
2896 	void		*sigev)
2897 {
2898 	file_t		*fp;
2899 	file_t		*prev_fp = NULL;
2900 	int		prev_mode = -1;
2901 	struct vnode	*vp;
2902 	aio_lio_t	*head;
2903 	aio_req_t	*reqp;
2904 	aio_t		*aiop;
2905 	caddr_t		cbplist;
2906 	aiocb64_32_t	cb64;
2907 	aiocb64_32_t	*aiocb = &cb64;
2908 	aiocb64_32_t	*cbp;
2909 	caddr32_t	*ucbp;
2910 #ifdef _LP64
2911 	aiocb_t		aiocb_n;
2912 #endif
2913 	struct sigevent32	sigevk;
2914 	sigqueue_t	*sqp;
2915 	int		(*aio_func)();
2916 	int		mode;
2917 	int		error = 0;
2918 	int		aio_errors = 0;
2919 	int		i;
2920 	size_t		ssize;
2921 	int		deadhead = 0;
2922 	int		aio_notsupported = 0;
2923 	int		lio_head_port;
2924 	int		aio_port;
2925 	int		aio_thread;
2926 	port_kevent_t	*pkevtp = NULL;
2927 	port_notify32_t	pnotify;
2928 	int		event;
2929 
2930 	aiop = curproc->p_aio;
2931 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
2932 		return (EINVAL);
2933 
2934 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
2935 
2936 	ssize = (sizeof (caddr32_t) * nent);
2937 	cbplist = kmem_alloc(ssize, KM_SLEEP);
2938 	ucbp = (caddr32_t *)cbplist;
2939 
2940 	if (copyin(aiocb_arg, cbplist, ssize) ||
2941 	    (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
2942 		kmem_free(cbplist, ssize);
2943 		return (EFAULT);
2944 	}
2945 
2946 	/* Event Ports  */
2947 	if (sigev &&
2948 	    (sigevk.sigev_notify == SIGEV_THREAD ||
2949 	    sigevk.sigev_notify == SIGEV_PORT)) {
2950 		if (sigevk.sigev_notify == SIGEV_THREAD) {
2951 			pnotify.portnfy_port = sigevk.sigev_signo;
2952 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
2953 		} else if (copyin(
2954 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
2955 		    &pnotify, sizeof (pnotify))) {
2956 			kmem_free(cbplist, ssize);
2957 			return (EFAULT);
2958 		}
2959 		error = port_alloc_event(pnotify.portnfy_port,
2960 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
2961 		if (error) {
2962 			if (error == ENOMEM || error == EAGAIN)
2963 				error = EAGAIN;
2964 			else
2965 				error = EINVAL;
2966 			kmem_free(cbplist, ssize);
2967 			return (error);
2968 		}
2969 		lio_head_port = pnotify.portnfy_port;
2970 	}
2971 
2972 	/*
2973 	 * a list head should be allocated if notification is
2974 	 * enabled for this list.
2975 	 */
2976 	head = NULL;
2977 
2978 	if (mode_arg == LIO_WAIT || sigev) {
2979 		mutex_enter(&aiop->aio_mutex);
2980 		error = aio_lio_alloc(&head);
2981 		mutex_exit(&aiop->aio_mutex);
2982 		if (error)
2983 			goto done;
2984 		deadhead = 1;
2985 		head->lio_nent = nent;
2986 		head->lio_refcnt = nent;
2987 		head->lio_port = -1;
2988 		head->lio_portkev = NULL;
2989 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
2990 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
2991 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2992 			if (sqp == NULL) {
2993 				error = EAGAIN;
2994 				goto done;
2995 			}
2996 			sqp->sq_func = NULL;
2997 			sqp->sq_next = NULL;
2998 			sqp->sq_info.si_code = SI_ASYNCIO;
2999 			sqp->sq_info.si_pid = curproc->p_pid;
3000 			sqp->sq_info.si_ctid = PRCTID(curproc);
3001 			sqp->sq_info.si_zoneid = getzoneid();
3002 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3003 			sqp->sq_info.si_signo = sigevk.sigev_signo;
3004 			sqp->sq_info.si_value.sival_int =
3005 			    sigevk.sigev_value.sival_int;
3006 			head->lio_sigqp = sqp;
3007 		} else {
3008 			head->lio_sigqp = NULL;
3009 		}
3010 		if (pkevtp) {
3011 			/*
3012 			 * Prepare data to send when list of aiocb's
3013 			 * has completed.
3014 			 */
3015 			port_init_event(pkevtp, (uintptr_t)sigev,
3016 			    (void *)(uintptr_t)pnotify.portnfy_user,
3017 			    NULL, head);
3018 			pkevtp->portkev_events = AIOLIO64;
3019 			head->lio_portkev = pkevtp;
3020 			head->lio_port = pnotify.portnfy_port;
3021 		}
3022 	}
3023 
3024 	for (i = 0; i < nent; i++, ucbp++) {
3025 
3026 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
3027 		/* skip entry if it can't be copied. */
3028 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
3029 			if (head) {
3030 				mutex_enter(&aiop->aio_mutex);
3031 				head->lio_nent--;
3032 				head->lio_refcnt--;
3033 				mutex_exit(&aiop->aio_mutex);
3034 			}
3035 			continue;
3036 		}
3037 
3038 		/* skip if opcode for aiocb is LIO_NOP */
3039 		mode = aiocb->aio_lio_opcode;
3040 		if (mode == LIO_NOP) {
3041 			cbp = NULL;
3042 			if (head) {
3043 				mutex_enter(&aiop->aio_mutex);
3044 				head->lio_nent--;
3045 				head->lio_refcnt--;
3046 				mutex_exit(&aiop->aio_mutex);
3047 			}
3048 			continue;
3049 		}
3050 
3051 		/* increment file descriptor's ref count. */
3052 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3053 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3054 			if (head) {
3055 				mutex_enter(&aiop->aio_mutex);
3056 				head->lio_nent--;
3057 				head->lio_refcnt--;
3058 				mutex_exit(&aiop->aio_mutex);
3059 			}
3060 			aio_errors++;
3061 			continue;
3062 		}
3063 
3064 		/*
3065 		 * check the permission of the partition
3066 		 */
3067 		if ((fp->f_flag & mode) == 0) {
3068 			releasef(aiocb->aio_fildes);
3069 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3070 			if (head) {
3071 				mutex_enter(&aiop->aio_mutex);
3072 				head->lio_nent--;
3073 				head->lio_refcnt--;
3074 				mutex_exit(&aiop->aio_mutex);
3075 			}
3076 			aio_errors++;
3077 			continue;
3078 		}
3079 
3080 		/*
3081 		 * common case where requests are to the same fd
3082 		 * for the same r/w operation
3083 		 * for UFS, need to set EBADFD
3084 		 */
3085 		vp = fp->f_vnode;
3086 		if (fp != prev_fp || mode != prev_mode) {
3087 			aio_func = check_vp(vp, mode);
3088 			if (aio_func == NULL) {
3089 				prev_fp = NULL;
3090 				releasef(aiocb->aio_fildes);
3091 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
3092 				aio_notsupported++;
3093 				if (head) {
3094 					mutex_enter(&aiop->aio_mutex);
3095 					head->lio_nent--;
3096 					head->lio_refcnt--;
3097 					mutex_exit(&aiop->aio_mutex);
3098 				}
3099 				continue;
3100 			} else {
3101 				prev_fp = fp;
3102 				prev_mode = mode;
3103 			}
3104 		}
3105 
3106 #ifdef	_LP64
3107 		aiocb_LFton(aiocb, &aiocb_n);
3108 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
3109 		    (aio_result_t *)&cbp->aio_resultp, vp);
3110 #else
3111 		error = aio_req_setupLF(&reqp, aiop, aiocb,
3112 		    (aio_result_t *)&cbp->aio_resultp, vp);
3113 #endif  /* _LP64 */
3114 		if (error) {
3115 			releasef(aiocb->aio_fildes);
3116 			lio_set_uerror(&cbp->aio_resultp, error);
3117 			if (head) {
3118 				mutex_enter(&aiop->aio_mutex);
3119 				head->lio_nent--;
3120 				head->lio_refcnt--;
3121 				mutex_exit(&aiop->aio_mutex);
3122 			}
3123 			aio_errors++;
3124 			continue;
3125 		}
3126 
3127 		reqp->aio_req_lio = head;
3128 		deadhead = 0;
3129 
3130 		/*
3131 		 * Set the errno field now before sending the request to
3132 		 * the driver to avoid a race condition
3133 		 */
3134 		(void) suword32(&cbp->aio_resultp.aio_errno,
3135 		    EINPROGRESS);
3136 
3137 		reqp->aio_req_iocb.iocb32 = *ucbp;
3138 
3139 		event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
3140 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3141 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3142 		if (aio_port | aio_thread) {
3143 			port_kevent_t *lpkevp;
3144 			/*
3145 			 * Prepare data to send with each aiocb completed.
3146 			 */
3147 			if (aio_port) {
3148 				void *paddr = (void *)(uintptr_t)
3149 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3150 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3151 					error = EFAULT;
3152 			} else {	/* aio_thread */
3153 				pnotify.portnfy_port =
3154 				    aiocb->aio_sigevent.sigev_signo;
3155 				pnotify.portnfy_user =
3156 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3157 			}
3158 			if (error)
3159 				/* EMPTY */;
3160 			else if (pkevtp != NULL &&
3161 			    pnotify.portnfy_port == lio_head_port)
3162 				error = port_dup_event(pkevtp, &lpkevp,
3163 				    PORT_ALLOC_DEFAULT);
3164 			else
3165 				error = port_alloc_event(pnotify.portnfy_port,
3166 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3167 				    &lpkevp);
3168 			if (error == 0) {
3169 				port_init_event(lpkevp, (uintptr_t)*ucbp,
3170 				    (void *)(uintptr_t)pnotify.portnfy_user,
3171 				    aio_port_callback, reqp);
3172 				lpkevp->portkev_events = event;
3173 				reqp->aio_req_portkev = lpkevp;
3174 				reqp->aio_req_port = pnotify.portnfy_port;
3175 			}
3176 		}
3177 
3178 		/*
3179 		 * send the request to driver.
3180 		 */
3181 		if (error == 0) {
3182 			if (aiocb->aio_nbytes == 0) {
3183 				clear_active_fd(aiocb->aio_fildes);
3184 				aio_zerolen(reqp);
3185 				continue;
3186 			}
3187 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3188 			    CRED());
3189 		}
3190 
3191 		/*
3192 		 * the fd's ref count is not decremented until the IO has
3193 		 * completed unless there was an error.
3194 		 */
3195 		if (error) {
3196 			releasef(aiocb->aio_fildes);
3197 			lio_set_uerror(&cbp->aio_resultp, error);
3198 			if (head) {
3199 				mutex_enter(&aiop->aio_mutex);
3200 				head->lio_nent--;
3201 				head->lio_refcnt--;
3202 				mutex_exit(&aiop->aio_mutex);
3203 			}
3204 			if (error == ENOTSUP)
3205 				aio_notsupported++;
3206 			else
3207 				aio_errors++;
3208 			lio_set_error(reqp);
3209 		} else {
3210 			clear_active_fd(aiocb->aio_fildes);
3211 		}
3212 	}
3213 
3214 	if (aio_notsupported) {
3215 		error = ENOTSUP;
3216 	} else if (aio_errors) {
3217 		/*
3218 		 * return EIO if any request failed
3219 		 */
3220 		error = EIO;
3221 	}
3222 
3223 	if (mode_arg == LIO_WAIT) {
3224 		mutex_enter(&aiop->aio_mutex);
3225 		while (head->lio_refcnt > 0) {
3226 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3227 				mutex_exit(&aiop->aio_mutex);
3228 				error = EINTR;
3229 				goto done;
3230 			}
3231 		}
3232 		mutex_exit(&aiop->aio_mutex);
3233 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
3234 	}
3235 
3236 done:
3237 	kmem_free(cbplist, ssize);
3238 	if (deadhead) {
3239 		if (head->lio_sigqp)
3240 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3241 		if (head->lio_portkev)
3242 			port_free_event(head->lio_portkev);
3243 		kmem_free(head, sizeof (aio_lio_t));
3244 	}
3245 	return (error);
3246 }
3247 
3248 #ifdef  _SYSCALL32_IMPL
3249 static void
3250 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
3251 {
3252 	dest->aio_fildes = src->aio_fildes;
3253 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
3254 	dest->aio_nbytes = (size_t)src->aio_nbytes;
3255 	dest->aio_offset = (off_t)src->aio_offset;
3256 	dest->aio_reqprio = src->aio_reqprio;
3257 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3258 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3259 
3260 	/*
3261 	 * See comment in sigqueue32() on handling of 32-bit
3262 	 * sigvals in a 64-bit kernel.
3263 	 */
3264 	dest->aio_sigevent.sigev_value.sival_int =
3265 	    (int)src->aio_sigevent.sigev_value.sival_int;
3266 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3267 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
3268 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3269 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3270 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3271 	dest->aio_lio_opcode = src->aio_lio_opcode;
3272 	dest->aio_state = src->aio_state;
3273 	dest->aio__pad[0] = src->aio__pad[0];
3274 }
3275 #endif
3276 
3277 /*
3278  * This function is used only for largefile calls made by
3279  * 32 bit applications.
3280  */
3281 static int
3282 aio_req_setupLF(
3283 	aio_req_t	**reqpp,
3284 	aio_t		*aiop,
3285 	aiocb64_32_t	*arg,
3286 	aio_result_t	*resultp,
3287 	vnode_t		*vp)
3288 {
3289 	sigqueue_t	*sqp = NULL;
3290 	aio_req_t	*reqp;
3291 	struct uio	*uio;
3292 	struct sigevent32 *sigev;
3293 	int 		error;
3294 
3295 	sigev = &arg->aio_sigevent;
3296 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
3297 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
3298 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3299 		if (sqp == NULL)
3300 			return (EAGAIN);
3301 		sqp->sq_func = NULL;
3302 		sqp->sq_next = NULL;
3303 		sqp->sq_info.si_code = SI_ASYNCIO;
3304 		sqp->sq_info.si_pid = curproc->p_pid;
3305 		sqp->sq_info.si_ctid = PRCTID(curproc);
3306 		sqp->sq_info.si_zoneid = getzoneid();
3307 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3308 		sqp->sq_info.si_signo = sigev->sigev_signo;
3309 		sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
3310 	}
3311 
3312 	mutex_enter(&aiop->aio_mutex);
3313 
3314 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
3315 		mutex_exit(&aiop->aio_mutex);
3316 		if (sqp)
3317 			kmem_free(sqp, sizeof (sigqueue_t));
3318 		return (EIO);
3319 	}
3320 	/*
3321 	 * get an aio_reqp from the free list or allocate one
3322 	 * from dynamic memory.
3323 	 */
3324 	if (error = aio_req_alloc(&reqp, resultp)) {
3325 		mutex_exit(&aiop->aio_mutex);
3326 		if (sqp)
3327 			kmem_free(sqp, sizeof (sigqueue_t));
3328 		return (error);
3329 	}
3330 	aiop->aio_pending++;
3331 	aiop->aio_outstanding++;
3332 	reqp->aio_req_flags = AIO_PENDING;
3333 	if (sigev->sigev_notify == SIGEV_THREAD ||
3334 	    sigev->sigev_notify == SIGEV_PORT)
3335 		aio_enq(&aiop->aio_portpending, reqp, 0);
3336 	mutex_exit(&aiop->aio_mutex);
3337 	/*
3338 	 * initialize aio request.
3339 	 */
3340 	reqp->aio_req_fd = arg->aio_fildes;
3341 	reqp->aio_req_sigqp = sqp;
3342 	reqp->aio_req_iocb.iocb = NULL;
3343 	reqp->aio_req_lio = NULL;
3344 	reqp->aio_req_buf.b_file = vp;
3345 	uio = reqp->aio_req.aio_uio;
3346 	uio->uio_iovcnt = 1;
3347 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
3348 	uio->uio_iov->iov_len = arg->aio_nbytes;
3349 	uio->uio_loffset = arg->aio_offset;
3350 	*reqpp = reqp;
3351 	return (0);
3352 }
3353 
3354 /*
3355  * This routine is called when a non largefile call is made by a 32bit
3356  * process on a ILP32 or LP64 kernel.
3357  */
3358 static int
3359 alio32(
3360 	int		mode_arg,
3361 	void		*aiocb_arg,
3362 	int		nent,
3363 	void		*sigev)
3364 {
3365 	file_t		*fp;
3366 	file_t		*prev_fp = NULL;
3367 	int		prev_mode = -1;
3368 	struct vnode	*vp;
3369 	aio_lio_t	*head;
3370 	aio_req_t	*reqp;
3371 	aio_t		*aiop;
3372 	caddr_t		cbplist;
3373 	aiocb_t		cb;
3374 	aiocb_t		*aiocb = &cb;
3375 #ifdef	_LP64
3376 	aiocb32_t	*cbp;
3377 	caddr32_t	*ucbp;
3378 	aiocb32_t	cb32;
3379 	aiocb32_t	*aiocb32 = &cb32;
3380 	struct sigevent32	sigevk;
3381 #else
3382 	aiocb_t		*cbp, **ucbp;
3383 	struct sigevent	sigevk;
3384 #endif
3385 	sigqueue_t	*sqp;
3386 	int		(*aio_func)();
3387 	int		mode;
3388 	int		error = 0;
3389 	int		aio_errors = 0;
3390 	int		i;
3391 	size_t		ssize;
3392 	int		deadhead = 0;
3393 	int		aio_notsupported = 0;
3394 	int		lio_head_port;
3395 	int		aio_port;
3396 	int		aio_thread;
3397 	port_kevent_t	*pkevtp = NULL;
3398 #ifdef	_LP64
3399 	port_notify32_t	pnotify;
3400 #else
3401 	port_notify_t	pnotify;
3402 #endif
3403 	int		event;
3404 
3405 	aiop = curproc->p_aio;
3406 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
3407 		return (EINVAL);
3408 
3409 #ifdef	_LP64
3410 	ssize = (sizeof (caddr32_t) * nent);
3411 #else
3412 	ssize = (sizeof (aiocb_t *) * nent);
3413 #endif
3414 	cbplist = kmem_alloc(ssize, KM_SLEEP);
3415 	ucbp = (void *)cbplist;
3416 
3417 	if (copyin(aiocb_arg, cbplist, ssize) ||
3418 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
3419 		kmem_free(cbplist, ssize);
3420 		return (EFAULT);
3421 	}
3422 
3423 	/* Event Ports  */
3424 	if (sigev &&
3425 	    (sigevk.sigev_notify == SIGEV_THREAD ||
3426 	    sigevk.sigev_notify == SIGEV_PORT)) {
3427 		if (sigevk.sigev_notify == SIGEV_THREAD) {
3428 			pnotify.portnfy_port = sigevk.sigev_signo;
3429 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
3430 		} else if (copyin(
3431 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
3432 		    &pnotify, sizeof (pnotify))) {
3433 			kmem_free(cbplist, ssize);
3434 			return (EFAULT);
3435 		}
3436 		error = port_alloc_event(pnotify.portnfy_port,
3437 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
3438 		if (error) {
3439 			if (error == ENOMEM || error == EAGAIN)
3440 				error = EAGAIN;
3441 			else
3442 				error = EINVAL;
3443 			kmem_free(cbplist, ssize);
3444 			return (error);
3445 		}
3446 		lio_head_port = pnotify.portnfy_port;
3447 	}
3448 
3449 	/*
3450 	 * a list head should be allocated if notification is
3451 	 * enabled for this list.
3452 	 */
3453 	head = NULL;
3454 
3455 	if (mode_arg == LIO_WAIT || sigev) {
3456 		mutex_enter(&aiop->aio_mutex);
3457 		error = aio_lio_alloc(&head);
3458 		mutex_exit(&aiop->aio_mutex);
3459 		if (error)
3460 			goto done;
3461 		deadhead = 1;
3462 		head->lio_nent = nent;
3463 		head->lio_refcnt = nent;
3464 		head->lio_port = -1;
3465 		head->lio_portkev = NULL;
3466 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
3467 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
3468 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3469 			if (sqp == NULL) {
3470 				error = EAGAIN;
3471 				goto done;
3472 			}
3473 			sqp->sq_func = NULL;
3474 			sqp->sq_next = NULL;
3475 			sqp->sq_info.si_code = SI_ASYNCIO;
3476 			sqp->sq_info.si_pid = curproc->p_pid;
3477 			sqp->sq_info.si_ctid = PRCTID(curproc);
3478 			sqp->sq_info.si_zoneid = getzoneid();
3479 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3480 			sqp->sq_info.si_signo = sigevk.sigev_signo;
3481 			sqp->sq_info.si_value.sival_int =
3482 			    sigevk.sigev_value.sival_int;
3483 			head->lio_sigqp = sqp;
3484 		} else {
3485 			head->lio_sigqp = NULL;
3486 		}
3487 		if (pkevtp) {
3488 			/*
3489 			 * Prepare data to send when list of aiocb's has
3490 			 * completed.
3491 			 */
3492 			port_init_event(pkevtp, (uintptr_t)sigev,
3493 			    (void *)(uintptr_t)pnotify.portnfy_user,
3494 			    NULL, head);
3495 			pkevtp->portkev_events = AIOLIO;
3496 			head->lio_portkev = pkevtp;
3497 			head->lio_port = pnotify.portnfy_port;
3498 		}
3499 	}
3500 
3501 	for (i = 0; i < nent; i++, ucbp++) {
3502 
3503 		/* skip entry if it can't be copied. */
3504 #ifdef	_LP64
3505 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
3506 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
3507 #else
3508 		cbp = (aiocb_t *)*ucbp;
3509 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
3510 #endif
3511 		{
3512 			if (head) {
3513 				mutex_enter(&aiop->aio_mutex);
3514 				head->lio_nent--;
3515 				head->lio_refcnt--;
3516 				mutex_exit(&aiop->aio_mutex);
3517 			}
3518 			continue;
3519 		}
3520 #ifdef	_LP64
3521 		/*
3522 		 * copy 32 bit structure into 64 bit structure
3523 		 */
3524 		aiocb_32ton(aiocb32, aiocb);
3525 #endif /* _LP64 */
3526 
3527 		/* skip if opcode for aiocb is LIO_NOP */
3528 		mode = aiocb->aio_lio_opcode;
3529 		if (mode == LIO_NOP) {
3530 			cbp = NULL;
3531 			if (head) {
3532 				mutex_enter(&aiop->aio_mutex);
3533 				head->lio_nent--;
3534 				head->lio_refcnt--;
3535 				mutex_exit(&aiop->aio_mutex);
3536 			}
3537 			continue;
3538 		}
3539 
3540 		/* increment file descriptor's ref count. */
3541 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3542 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3543 			if (head) {
3544 				mutex_enter(&aiop->aio_mutex);
3545 				head->lio_nent--;
3546 				head->lio_refcnt--;
3547 				mutex_exit(&aiop->aio_mutex);
3548 			}
3549 			aio_errors++;
3550 			continue;
3551 		}
3552 
3553 		/*
3554 		 * check the permission of the partition
3555 		 */
3556 		if ((fp->f_flag & mode) == 0) {
3557 			releasef(aiocb->aio_fildes);
3558 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3559 			if (head) {
3560 				mutex_enter(&aiop->aio_mutex);
3561 				head->lio_nent--;
3562 				head->lio_refcnt--;
3563 				mutex_exit(&aiop->aio_mutex);
3564 			}
3565 			aio_errors++;
3566 			continue;
3567 		}
3568 
3569 		/*
3570 		 * common case where requests are to the same fd
3571 		 * for the same r/w operation
3572 		 * for UFS, need to set EBADFD
3573 		 */
3574 		vp = fp->f_vnode;
3575 		if (fp != prev_fp || mode != prev_mode) {
3576 			aio_func = check_vp(vp, mode);
3577 			if (aio_func == NULL) {
3578 				prev_fp = NULL;
3579 				releasef(aiocb->aio_fildes);
3580 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
3581 				aio_notsupported++;
3582 				if (head) {
3583 					mutex_enter(&aiop->aio_mutex);
3584 					head->lio_nent--;
3585 					head->lio_refcnt--;
3586 					mutex_exit(&aiop->aio_mutex);
3587 				}
3588 				continue;
3589 			} else {
3590 				prev_fp = fp;
3591 				prev_mode = mode;
3592 			}
3593 		}
3594 
3595 		error = aio_req_setup(&reqp, aiop, aiocb,
3596 		    (aio_result_t *)&cbp->aio_resultp, vp);
3597 		if (error) {
3598 			releasef(aiocb->aio_fildes);
3599 			lio_set_uerror(&cbp->aio_resultp, error);
3600 			if (head) {
3601 				mutex_enter(&aiop->aio_mutex);
3602 				head->lio_nent--;
3603 				head->lio_refcnt--;
3604 				mutex_exit(&aiop->aio_mutex);
3605 			}
3606 			aio_errors++;
3607 			continue;
3608 		}
3609 
3610 		reqp->aio_req_lio = head;
3611 		deadhead = 0;
3612 
3613 		/*
3614 		 * Set the errno field now before sending the request to
3615 		 * the driver to avoid a race condition
3616 		 */
3617 		(void) suword32(&cbp->aio_resultp.aio_errno,
3618 		    EINPROGRESS);
3619 
3620 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
3621 
3622 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
3623 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3624 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3625 		if (aio_port | aio_thread) {
3626 			port_kevent_t *lpkevp;
3627 			/*
3628 			 * Prepare data to send with each aiocb completed.
3629 			 */
3630 #ifdef _LP64
3631 			if (aio_port) {
3632 				void *paddr = (void  *)(uintptr_t)
3633 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
3634 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3635 					error = EFAULT;
3636 			} else {	/* aio_thread */
3637 				pnotify.portnfy_port =
3638 				    aiocb32->aio_sigevent.sigev_signo;
3639 				pnotify.portnfy_user =
3640 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
3641 			}
3642 #else
3643 			if (aio_port) {
3644 				void *paddr =
3645 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3646 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3647 					error = EFAULT;
3648 			} else {	/* aio_thread */
3649 				pnotify.portnfy_port =
3650 				    aiocb->aio_sigevent.sigev_signo;
3651 				pnotify.portnfy_user =
3652 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3653 			}
3654 #endif
3655 			if (error)
3656 				/* EMPTY */;
3657 			else if (pkevtp != NULL &&
3658 			    pnotify.portnfy_port == lio_head_port)
3659 				error = port_dup_event(pkevtp, &lpkevp,
3660 				    PORT_ALLOC_DEFAULT);
3661 			else
3662 				error = port_alloc_event(pnotify.portnfy_port,
3663 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3664 				    &lpkevp);
3665 			if (error == 0) {
3666 				port_init_event(lpkevp, (uintptr_t)cbp,
3667 				    (void *)(uintptr_t)pnotify.portnfy_user,
3668 				    aio_port_callback, reqp);
3669 				lpkevp->portkev_events = event;
3670 				reqp->aio_req_portkev = lpkevp;
3671 				reqp->aio_req_port = pnotify.portnfy_port;
3672 			}
3673 		}
3674 
3675 		/*
3676 		 * send the request to driver.
3677 		 */
3678 		if (error == 0) {
3679 			if (aiocb->aio_nbytes == 0) {
3680 				clear_active_fd(aiocb->aio_fildes);
3681 				aio_zerolen(reqp);
3682 				continue;
3683 			}
3684 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3685 			    CRED());
3686 		}
3687 
3688 		/*
3689 		 * the fd's ref count is not decremented until the IO has
3690 		 * completed unless there was an error.
3691 		 */
3692 		if (error) {
3693 			releasef(aiocb->aio_fildes);
3694 			lio_set_uerror(&cbp->aio_resultp, error);
3695 			if (head) {
3696 				mutex_enter(&aiop->aio_mutex);
3697 				head->lio_nent--;
3698 				head->lio_refcnt--;
3699 				mutex_exit(&aiop->aio_mutex);
3700 			}
3701 			if (error == ENOTSUP)
3702 				aio_notsupported++;
3703 			else
3704 				aio_errors++;
3705 			lio_set_error(reqp);
3706 		} else {
3707 			clear_active_fd(aiocb->aio_fildes);
3708 		}
3709 	}
3710 
3711 	if (aio_notsupported) {
3712 		error = ENOTSUP;
3713 	} else if (aio_errors) {
3714 		/*
3715 		 * return EIO if any request failed
3716 		 */
3717 		error = EIO;
3718 	}
3719 
3720 	if (mode_arg == LIO_WAIT) {
3721 		mutex_enter(&aiop->aio_mutex);
3722 		while (head->lio_refcnt > 0) {
3723 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3724 				mutex_exit(&aiop->aio_mutex);
3725 				error = EINTR;
3726 				goto done;
3727 			}
3728 		}
3729 		mutex_exit(&aiop->aio_mutex);
3730 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
3731 	}
3732 
3733 done:
3734 	kmem_free(cbplist, ssize);
3735 	if (deadhead) {
3736 		if (head->lio_sigqp)
3737 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3738 		if (head->lio_portkev)
3739 			port_free_event(head->lio_portkev);
3740 		kmem_free(head, sizeof (aio_lio_t));
3741 	}
3742 	return (error);
3743 }
3744 
3745 
3746 #ifdef  _SYSCALL32_IMPL
3747 void
3748 aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
3749 {
3750 	dest->aio_fildes = src->aio_fildes;
3751 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
3752 	dest->aio_nbytes = (size_t)src->aio_nbytes;
3753 	dest->aio_offset = (off_t)src->aio_offset;
3754 	dest->aio_reqprio = src->aio_reqprio;
3755 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3756 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3757 
3758 	/*
3759 	 * See comment in sigqueue32() on handling of 32-bit
3760 	 * sigvals in a 64-bit kernel.
3761 	 */
3762 	dest->aio_sigevent.sigev_value.sival_int =
3763 	    (int)src->aio_sigevent.sigev_value.sival_int;
3764 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3765 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
3766 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3767 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3768 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3769 	dest->aio_lio_opcode = src->aio_lio_opcode;
3770 	dest->aio_state = src->aio_state;
3771 	dest->aio__pad[0] = src->aio__pad[0];
3772 }
3773 #endif /* _SYSCALL32_IMPL */
3774 
3775 /*
3776  * aio_port_callback() is called just before the event is retrieved from the
3777  * port. The task of this callback function is to finish the work of the
3778  * transaction for the application, it means :
3779  * - copyout transaction data to the application
3780  *	(this thread is running in the right process context)
3781  * - keep trace of the transaction (update of counters).
3782  * - free allocated buffers
3783  * The aiocb pointer is the object element of the port_kevent_t structure.
3784  *
3785  * flag :
3786  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
3787  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
3788  */
3789 
3790 /*ARGSUSED*/
3791 int
3792 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
3793 {
3794 	aio_t		*aiop = curproc->p_aio;
3795 	aio_req_t	*reqp = arg;
3796 	struct	iovec	*iov;
3797 	struct	buf	*bp;
3798 	void		*resultp;
3799 
3800 	if (pid != curproc->p_pid) {
3801 		/* wrong proc !!, can not deliver data here ... */
3802 		return (EACCES);
3803 	}
3804 
3805 	mutex_enter(&aiop->aio_portq_mutex);
3806 	reqp->aio_req_portkev = NULL;
3807 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
3808 	mutex_exit(&aiop->aio_portq_mutex);
3809 	aphysio_unlock(reqp);		/* unlock used pages */
3810 	mutex_enter(&aiop->aio_mutex);
3811 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
3812 		aio_req_free_port(aiop, reqp);	/* back to free list */
3813 		mutex_exit(&aiop->aio_mutex);
3814 		return (0);
3815 	}
3816 
3817 	iov = reqp->aio_req_uio.uio_iov;
3818 	bp = &reqp->aio_req_buf;
3819 	resultp = (void *)reqp->aio_req_resultp;
3820 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
3821 	mutex_exit(&aiop->aio_mutex);
3822 	if (flag == PORT_CALLBACK_DEFAULT)
3823 		aio_copyout_result_port(iov, bp, resultp);
3824 	return (0);
3825 }
3826