xref: /titanic_51/usr/src/uts/common/os/aio.c (revision b2e8ece49d4c2e04af8e8e83caa60e23caa58061)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Kernel asynchronous I/O.
31  * This is only for raw devices now (as of Nov. 1993).
32  */
33 
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/conf.h>
37 #include <sys/file.h>
38 #include <sys/fs/snode.h>
39 #include <sys/unistd.h>
40 #include <sys/cmn_err.h>
41 #include <vm/as.h>
42 #include <vm/faultcode.h>
43 #include <sys/sysmacros.h>
44 #include <sys/procfs.h>
45 #include <sys/kmem.h>
46 #include <sys/autoconf.h>
47 #include <sys/ddi_impldefs.h>
48 #include <sys/sunddi.h>
49 #include <sys/aio_impl.h>
50 #include <sys/debug.h>
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/vmsystm.h>
54 #include <sys/fs/pxfs_ki.h>
55 #include <sys/contract/process_impl.h>
56 
57 /*
58  * external entry point.
59  */
60 #ifdef _LP64
61 static int64_t kaioc(long, long, long, long, long, long);
62 #endif
63 static int kaio(ulong_t *, rval_t *);
64 
65 
66 #define	AIO_64	0
67 #define	AIO_32	1
68 #define	AIO_LARGEFILE	2
69 
70 /*
71  * implementation specific functions (private)
72  */
73 #ifdef _LP64
74 static int alio(int, aiocb_t **, int, struct sigevent *);
75 #endif
76 static int aionotify(void);
77 static int aioinit(void);
78 static int aiostart(void);
79 static void alio_cleanup(aio_t *, aiocb_t **, int, int);
80 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
81     cred_t *);
82 static void lio_set_error(aio_req_t *);
83 static aio_t *aio_aiop_alloc();
84 static int aio_req_alloc(aio_req_t **, aio_result_t *);
85 static int aio_lio_alloc(aio_lio_t **);
86 static aio_req_t *aio_req_done(void *);
87 static aio_req_t *aio_req_remove(aio_req_t *);
88 static int aio_req_find(aio_result_t *, aio_req_t **);
89 static int aio_hash_insert(struct aio_req_t *, aio_t *);
90 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
91     aio_result_t *, vnode_t *);
92 static int aio_cleanup_thread(aio_t *);
93 static aio_lio_t *aio_list_get(aio_result_t *);
94 static void lio_set_uerror(void *, int);
95 extern void aio_zerolen(aio_req_t *);
96 static int aiowait(struct timeval *, int, long	*);
97 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
98 static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
99     aio_req_t *reqlist, aio_t *aiop, model_t model);
100 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
101 static int aiosuspend(void *, int, struct  timespec *, int,
102     long	*, int);
103 static int aliowait(int, void *, int, void *, int);
104 static int aioerror(void *, int);
105 static int aio_cancel(int, void *, long	*, int);
106 static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
107 static int aiorw(int, void *, int, int);
108 
109 static int alioLF(int, void *, int, void *);
110 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
111     aio_result_t *, vnode_t *);
112 static int alio32(int, void *, int, void *);
113 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
114 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
115 
116 #ifdef  _SYSCALL32_IMPL
117 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
118 void	aiocb_32ton(aiocb32_t *, aiocb_t *);
119 #endif /* _SYSCALL32_IMPL */
120 
121 /*
122  * implementation specific functions (external)
123  */
124 void aio_req_free(aio_t *, aio_req_t *);
125 
126 /*
127  * Event Port framework
128  */
129 
130 void aio_req_free_port(aio_t *, aio_req_t *);
131 static int aio_port_callback(void *, int *, pid_t, int, void *);
132 
133 /*
134  * This is the loadable module wrapper.
135  */
136 #include <sys/modctl.h>
137 #include <sys/syscall.h>
138 
139 #ifdef _LP64
140 
141 static struct sysent kaio_sysent = {
142 	6,
143 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
144 	(int (*)())kaioc
145 };
146 
147 #ifdef _SYSCALL32_IMPL
148 static struct sysent kaio_sysent32 = {
149 	7,
150 	SE_NOUNLOAD | SE_64RVAL,
151 	kaio
152 };
153 #endif  /* _SYSCALL32_IMPL */
154 
155 #else   /* _LP64 */
156 
157 static struct sysent kaio_sysent = {
158 	7,
159 	SE_NOUNLOAD | SE_32RVAL1,
160 	kaio
161 };
162 
163 #endif  /* _LP64 */
164 
165 /*
166  * Module linkage information for the kernel.
167  */
168 
169 static struct modlsys modlsys = {
170 	&mod_syscallops,
171 	"kernel Async I/O",
172 	&kaio_sysent
173 };
174 
175 #ifdef  _SYSCALL32_IMPL
176 static struct modlsys modlsys32 = {
177 	&mod_syscallops32,
178 	"kernel Async I/O for 32 bit compatibility",
179 	&kaio_sysent32
180 };
181 #endif  /* _SYSCALL32_IMPL */
182 
183 
184 static struct modlinkage modlinkage = {
185 	MODREV_1,
186 	&modlsys,
187 #ifdef  _SYSCALL32_IMPL
188 	&modlsys32,
189 #endif
190 	NULL
191 };
192 
193 int
194 _init(void)
195 {
196 	int retval;
197 
198 	if ((retval = mod_install(&modlinkage)) != 0)
199 		return (retval);
200 
201 	return (0);
202 }
203 
204 int
205 _fini(void)
206 {
207 	int retval;
208 
209 	retval = mod_remove(&modlinkage);
210 
211 	return (retval);
212 }
213 
214 int
215 _info(struct modinfo *modinfop)
216 {
217 	return (mod_info(&modlinkage, modinfop));
218 }
219 
220 #ifdef	_LP64
221 static int64_t
222 kaioc(
223 	long	a0,
224 	long	a1,
225 	long	a2,
226 	long	a3,
227 	long	a4,
228 	long	a5)
229 {
230 	int	error;
231 	long	rval = 0;
232 
233 	switch ((int)a0 & ~AIO_POLL_BIT) {
234 	case AIOREAD:
235 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
236 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
237 		break;
238 	case AIOWRITE:
239 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
240 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
241 		break;
242 	case AIOWAIT:
243 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
244 		break;
245 	case AIOWAITN:
246 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
247 		    (timespec_t *)a4);
248 		break;
249 	case AIONOTIFY:
250 		error = aionotify();
251 		break;
252 	case AIOINIT:
253 		error = aioinit();
254 		break;
255 	case AIOSTART:
256 		error = aiostart();
257 		break;
258 	case AIOLIO:
259 		error = alio((int)a1, (aiocb_t **)a2, (int)a3,
260 		    (struct sigevent *)a4);
261 		break;
262 	case AIOLIOWAIT:
263 		error = aliowait((int)a1, (void *)a2, (int)a3,
264 		    (struct sigevent *)a4, AIO_64);
265 		break;
266 	case AIOSUSPEND:
267 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
268 		    (int)a4, &rval, AIO_64);
269 		break;
270 	case AIOERROR:
271 		error = aioerror((void *)a1, AIO_64);
272 		break;
273 	case AIOAREAD:
274 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
275 		break;
276 	case AIOAWRITE:
277 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
278 		break;
279 	case AIOCANCEL:
280 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
281 		break;
282 
283 	/*
284 	 * The large file related stuff is valid only for
285 	 * 32 bit kernel and not for 64 bit kernel
286 	 * On 64 bit kernel we convert large file calls
287 	 * to regular 64bit calls.
288 	 */
289 
290 	default:
291 		error = EINVAL;
292 	}
293 	if (error)
294 		return ((int64_t)set_errno(error));
295 	return (rval);
296 }
297 #endif
298 
299 static int
300 kaio(
301 	ulong_t *uap,
302 	rval_t *rvp)
303 {
304 	long rval = 0;
305 	int	error = 0;
306 	offset_t	off;
307 
308 
309 		rvp->r_vals = 0;
310 #if defined(_LITTLE_ENDIAN)
311 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
312 #else
313 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
314 #endif
315 
316 	switch (uap[0] & ~AIO_POLL_BIT) {
317 	/*
318 	 * It must be the 32 bit system call on 64 bit kernel
319 	 */
320 	case AIOREAD:
321 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
322 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
323 	case AIOWRITE:
324 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
325 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
326 	case AIOWAIT:
327 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
328 		    &rval);
329 		break;
330 	case AIOWAITN:
331 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
332 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
333 		break;
334 	case AIONOTIFY:
335 		return (aionotify());
336 	case AIOINIT:
337 		return (aioinit());
338 	case AIOSTART:
339 		return (aiostart());
340 	case AIOLIO:
341 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
342 		    (void *)uap[4]));
343 	case AIOLIOWAIT:
344 		return (aliowait((int)uap[1], (void *)uap[2],
345 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
346 	case AIOSUSPEND:
347 		error = aiosuspend((void *)uap[1], (int)uap[2],
348 		    (timespec_t *)uap[3], (int)uap[4],
349 		    &rval, AIO_32);
350 		break;
351 	case AIOERROR:
352 		return (aioerror((void *)uap[1], AIO_32));
353 	case AIOAREAD:
354 		return (aiorw((int)uap[0], (void *)uap[1],
355 		    FREAD, AIO_32));
356 	case AIOAWRITE:
357 		return (aiorw((int)uap[0], (void *)uap[1],
358 		    FWRITE, AIO_32));
359 	case AIOCANCEL:
360 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
361 		    AIO_32));
362 		break;
363 	case AIOLIO64:
364 		return (alioLF((int)uap[1], (void *)uap[2],
365 		    (int)uap[3], (void *)uap[4]));
366 	case AIOLIOWAIT64:
367 		return (aliowait(uap[1], (void *)uap[2],
368 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
369 	case AIOSUSPEND64:
370 		error = aiosuspend((void *)uap[1], (int)uap[2],
371 		    (timespec_t *)uap[3], (int)uap[4], &rval,
372 		    AIO_LARGEFILE);
373 		break;
374 	case AIOERROR64:
375 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
376 	case AIOAREAD64:
377 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
378 		    AIO_LARGEFILE));
379 	case AIOAWRITE64:
380 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
381 		    AIO_LARGEFILE));
382 	case AIOCANCEL64:
383 		error = (aio_cancel((int)uap[1], (void *)uap[2],
384 		    &rval, AIO_LARGEFILE));
385 		break;
386 	default:
387 		return (EINVAL);
388 	}
389 
390 	rvp->r_val1 = rval;
391 	return (error);
392 }
393 
394 /*
395  * wake up LWPs in this process that are sleeping in
396  * aiowait().
397  */
398 static int
399 aionotify(void)
400 {
401 	aio_t	*aiop;
402 
403 	aiop = curproc->p_aio;
404 	if (aiop == NULL)
405 		return (0);
406 
407 	mutex_enter(&aiop->aio_mutex);
408 	aiop->aio_notifycnt++;
409 	cv_broadcast(&aiop->aio_waitcv);
410 	mutex_exit(&aiop->aio_mutex);
411 
412 	return (0);
413 }
414 
415 static int
416 timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
417 	timestruc_t **rqtp, int *blocking)
418 {
419 #ifdef	_SYSCALL32_IMPL
420 	struct timeval32 wait_time_32;
421 #endif
422 	struct timeval wait_time;
423 	model_t	model = get_udatamodel();
424 
425 	*rqtp = NULL;
426 	if (timout == NULL) {		/* wait indefinitely */
427 		*blocking = 1;
428 		return (0);
429 	}
430 
431 	/*
432 	 * Need to correctly compare with the -1 passed in for a user
433 	 * address pointer, with both 32 bit and 64 bit apps.
434 	 */
435 	if (model == DATAMODEL_NATIVE) {
436 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
437 			*blocking = 0;
438 			return (0);
439 		}
440 
441 		if (copyin(timout, &wait_time, sizeof (wait_time)))
442 			return (EFAULT);
443 	}
444 #ifdef	_SYSCALL32_IMPL
445 	else {
446 		/*
447 		 * -1 from a 32bit app. It will not get sign extended.
448 		 * don't wait if -1.
449 		 */
450 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
451 			*blocking = 0;
452 			return (0);
453 		}
454 
455 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
456 			return (EFAULT);
457 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
458 	}
459 #endif  /* _SYSCALL32_IMPL */
460 
461 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
462 		*blocking = 0;
463 		return (0);
464 	}
465 
466 	if (wait_time.tv_sec < 0 ||
467 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
468 		return (EINVAL);
469 
470 	rqtime->tv_sec = wait_time.tv_sec;
471 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
472 	*rqtp = rqtime;
473 	*blocking = 1;
474 
475 	return (0);
476 }
477 
478 static int
479 timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
480 	timestruc_t **rqtp, int *blocking)
481 {
482 #ifdef	_SYSCALL32_IMPL
483 	timespec32_t wait_time_32;
484 #endif
485 	model_t	model = get_udatamodel();
486 
487 	*rqtp = NULL;
488 	if (timout == NULL) {
489 		*blocking = 1;
490 		return (0);
491 	}
492 
493 	if (model == DATAMODEL_NATIVE) {
494 		if (copyin(timout, rqtime, sizeof (*rqtime)))
495 			return (EFAULT);
496 	}
497 #ifdef	_SYSCALL32_IMPL
498 	else {
499 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
500 			return (EFAULT);
501 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
502 	}
503 #endif  /* _SYSCALL32_IMPL */
504 
505 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
506 		*blocking = 0;
507 		return (0);
508 	}
509 
510 	if (rqtime->tv_sec < 0 ||
511 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
512 		return (EINVAL);
513 
514 	*rqtp = rqtime;
515 	*blocking = 1;
516 
517 	return (0);
518 }
519 
520 /*ARGSUSED*/
521 static int
522 aiowait(
523 	struct timeval	*timout,
524 	int	dontblockflg,
525 	long	*rval)
526 {
527 	int 		error;
528 	aio_t		*aiop;
529 	aio_req_t	*reqp;
530 	clock_t		status;
531 	int		blocking;
532 	int		timecheck;
533 	timestruc_t	rqtime;
534 	timestruc_t	*rqtp;
535 
536 	aiop = curproc->p_aio;
537 	if (aiop == NULL)
538 		return (EINVAL);
539 
540 	/*
541 	 * Establish the absolute future time for the timeout.
542 	 */
543 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
544 	if (error)
545 		return (error);
546 	if (rqtp) {
547 		timestruc_t now;
548 		timecheck = timechanged;
549 		gethrestime(&now);
550 		timespecadd(rqtp, &now);
551 	}
552 
553 	mutex_enter(&aiop->aio_mutex);
554 	for (;;) {
555 		/* process requests on poll queue */
556 		if (aiop->aio_pollq) {
557 			mutex_exit(&aiop->aio_mutex);
558 			aio_cleanup(0);
559 			mutex_enter(&aiop->aio_mutex);
560 		}
561 		if ((reqp = aio_req_remove(NULL)) != NULL) {
562 			*rval = (long)reqp->aio_req_resultp;
563 			break;
564 		}
565 		/* user-level done queue might not be empty */
566 		if (aiop->aio_notifycnt > 0) {
567 			aiop->aio_notifycnt--;
568 			*rval = 1;
569 			break;
570 		}
571 		/* don't block if no outstanding aio */
572 		if (aiop->aio_outstanding == 0 && dontblockflg) {
573 			error = EINVAL;
574 			break;
575 		}
576 		if (blocking) {
577 			status = cv_waituntil_sig(&aiop->aio_waitcv,
578 			    &aiop->aio_mutex, rqtp, timecheck);
579 
580 			if (status > 0)		/* check done queue again */
581 				continue;
582 			if (status == 0) {	/* interrupted by a signal */
583 				error = EINTR;
584 				*rval = -1;
585 			} else {		/* timer expired */
586 				error = ETIME;
587 			}
588 		}
589 		break;
590 	}
591 	mutex_exit(&aiop->aio_mutex);
592 	if (reqp) {
593 		aphysio_unlock(reqp);
594 		aio_copyout_result(reqp);
595 		mutex_enter(&aiop->aio_mutex);
596 		aio_req_free(aiop, reqp);
597 		mutex_exit(&aiop->aio_mutex);
598 	}
599 	return (error);
600 }
601 
602 /*
603  * aiowaitn can be used to reap completed asynchronous requests submitted with
604  * lio_listio, aio_read or aio_write.
605  * This function only reaps asynchronous raw I/Os.
606  */
607 
608 /*ARGSUSED*/
609 static int
610 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
611 {
612 	int 		error = 0;
613 	aio_t		*aiop;
614 	aio_req_t	*reqlist = NULL;
615 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
616 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
617 	size_t		iocbsz;			/* users iocb size */
618 	size_t		riocbsz;		/* returned iocb size */
619 	int		iocb_index = 0;
620 	model_t		model = get_udatamodel();
621 	int		blocking = 1;
622 	int		timecheck;
623 	timestruc_t	rqtime;
624 	timestruc_t	*rqtp;
625 
626 	aiop = curproc->p_aio;
627 
628 	if (aiop == NULL || aiop->aio_outstanding == 0)
629 		return (EAGAIN);
630 
631 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
632 		return (EFAULT);
633 
634 	/* set *nwait to zero, if we must return prematurely */
635 	if (copyout(&cnt, nwait, sizeof (uint_t)))
636 		return (EFAULT);
637 
638 	if (waitcnt == 0) {
639 		blocking = 0;
640 		rqtp = NULL;
641 		waitcnt = nent;
642 	} else {
643 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
644 		if (error)
645 			return (error);
646 	}
647 
648 	if (model == DATAMODEL_NATIVE)
649 		iocbsz = (sizeof (aiocb_t *) * nent);
650 #ifdef	_SYSCALL32_IMPL
651 	else
652 		iocbsz = (sizeof (caddr32_t) * nent);
653 #endif  /* _SYSCALL32_IMPL */
654 
655 	/*
656 	 * Only one aio_waitn call is allowed at a time.
657 	 * The active aio_waitn will collect all requests
658 	 * out of the "done" list and if necessary it will wait
659 	 * for some/all pending requests to fulfill the nwait
660 	 * parameter.
661 	 * A second or further aio_waitn calls will sleep here
662 	 * until the active aio_waitn finishes and leaves the kernel
663 	 * If the second call does not block (poll), then return
664 	 * immediately with the error code : EAGAIN.
665 	 * If the second call should block, then sleep here, but
666 	 * do not touch the timeout. The timeout starts when this
667 	 * aio_waitn-call becomes active.
668 	 */
669 
670 	mutex_enter(&aiop->aio_mutex);
671 
672 	while (aiop->aio_flags & AIO_WAITN) {
673 		if (blocking == 0) {
674 			mutex_exit(&aiop->aio_mutex);
675 			return (EAGAIN);
676 		}
677 
678 		/* block, no timeout */
679 		aiop->aio_flags |= AIO_WAITN_PENDING;
680 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
681 			mutex_exit(&aiop->aio_mutex);
682 			return (EINTR);
683 		}
684 	}
685 
686 	/*
687 	 * Establish the absolute future time for the timeout.
688 	 */
689 	if (rqtp) {
690 		timestruc_t now;
691 		timecheck = timechanged;
692 		gethrestime(&now);
693 		timespecadd(rqtp, &now);
694 	}
695 
696 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
697 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
698 		aiop->aio_iocb = NULL;
699 	}
700 
701 	if (aiop->aio_iocb == NULL) {
702 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
703 		if (iocblist == NULL) {
704 			mutex_exit(&aiop->aio_mutex);
705 			return (ENOMEM);
706 		}
707 		aiop->aio_iocb = (aiocb_t **)iocblist;
708 		aiop->aio_iocbsz = iocbsz;
709 	} else {
710 		iocblist = (char *)aiop->aio_iocb;
711 	}
712 
713 	aiop->aio_waitncnt = waitcnt;
714 	aiop->aio_flags |= AIO_WAITN;
715 
716 	for (;;) {
717 		/* push requests on poll queue to done queue */
718 		if (aiop->aio_pollq) {
719 			mutex_exit(&aiop->aio_mutex);
720 			aio_cleanup(0);
721 			mutex_enter(&aiop->aio_mutex);
722 		}
723 
724 		/* check for requests on done queue */
725 		if (aiop->aio_doneq) {
726 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
727 			aiop->aio_waitncnt = waitcnt - cnt;
728 		}
729 
730 		/* user-level done queue might not be empty */
731 		if (aiop->aio_notifycnt > 0) {
732 			aiop->aio_notifycnt--;
733 			error = 0;
734 			break;
735 		}
736 
737 		/*
738 		 * if we are here second time as a result of timer
739 		 * expiration, we reset error if there are enough
740 		 * aiocb's to satisfy request.
741 		 * We return also if all requests are already done
742 		 * and we picked up the whole done queue.
743 		 */
744 
745 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
746 		    aiop->aio_doneq == NULL)) {
747 			error = 0;
748 			break;
749 		}
750 
751 		if ((cnt < waitcnt) && blocking) {
752 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
753 				&aiop->aio_mutex, rqtp, timecheck);
754 			if (rval > 0)
755 				continue;
756 			if (rval < 0) {
757 				error = ETIME;
758 				blocking = 0;
759 				continue;
760 			}
761 			error = EINTR;
762 		}
763 		break;
764 	}
765 
766 	mutex_exit(&aiop->aio_mutex);
767 
768 	if (cnt > 0) {
769 
770 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
771 		    aiop, model);
772 
773 		if (model == DATAMODEL_NATIVE)
774 			riocbsz = (sizeof (aiocb_t *) * cnt);
775 #ifdef	_SYSCALL32_IMPL
776 		else
777 			riocbsz = (sizeof (caddr32_t) * cnt);
778 #endif  /* _SYSCALL32_IMPL */
779 
780 		if (copyout(iocblist, uiocb, riocbsz) ||
781 		    copyout(&cnt, nwait, sizeof (uint_t)))
782 			error = EFAULT;
783 	}
784 
785 	if (aiop->aio_iocbsz > AIO_IOCB_MAX) {
786 		kmem_free(iocblist, aiop->aio_iocbsz);
787 		aiop->aio_iocb = NULL;
788 	}
789 
790 	/* check if there is another thread waiting for execution */
791 	mutex_enter(&aiop->aio_mutex);
792 	aiop->aio_flags &= ~AIO_WAITN;
793 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
794 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
795 		cv_signal(&aiop->aio_waitncv);
796 	}
797 	mutex_exit(&aiop->aio_mutex);
798 
799 	return (error);
800 }
801 
802 /*
803  * aio_unlock_requests
804  * copyouts the result of the request as well as the return value.
805  * It builds the list of completed asynchronous requests,
806  * unlocks the allocated memory ranges and
807  * put the aio request structure back into the free list.
808  */
809 
810 static int
811 aio_unlock_requests(
812 	caddr_t	iocblist,
813 	int	iocb_index,
814 	aio_req_t *reqlist,
815 	aio_t	*aiop,
816 	model_t	model)
817 {
818 	aio_req_t	*reqp, *nreqp;
819 
820 	if (model == DATAMODEL_NATIVE) {
821 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
822 			(((caddr_t *)iocblist)[iocb_index++]) =
823 			    reqp->aio_req_iocb.iocb;
824 			nreqp = reqp->aio_req_next;
825 			aphysio_unlock(reqp);
826 			aio_copyout_result(reqp);
827 			mutex_enter(&aiop->aio_mutex);
828 			aio_req_free(aiop, reqp);
829 			mutex_exit(&aiop->aio_mutex);
830 		}
831 	}
832 #ifdef	_SYSCALL32_IMPL
833 	else {
834 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
835 			((caddr32_t *)iocblist)[iocb_index++] =
836 			    reqp->aio_req_iocb.iocb32;
837 			nreqp = reqp->aio_req_next;
838 			aphysio_unlock(reqp);
839 			aio_copyout_result(reqp);
840 			mutex_enter(&aiop->aio_mutex);
841 			aio_req_free(aiop, reqp);
842 			mutex_exit(&aiop->aio_mutex);
843 		}
844 	}
845 #endif	/* _SYSCALL32_IMPL */
846 	return (iocb_index);
847 }
848 
849 /*
850  * aio_reqlist_concat
851  * moves "max" elements from the done queue to the reqlist queue and removes
852  * the AIO_DONEQ flag.
853  * - reqlist queue is a simple linked list
854  * - done queue is a double linked list
855  */
856 
857 static int
858 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
859 {
860 	aio_req_t *q2, *q2work, *list;
861 	int count = 0;
862 
863 	list = *reqlist;
864 	q2 = aiop->aio_doneq;
865 	q2work = q2;
866 	while (max-- > 0) {
867 		q2work->aio_req_flags &= ~AIO_DONEQ;
868 		q2work = q2work->aio_req_next;
869 		count++;
870 		if (q2work == q2)
871 			break;
872 	}
873 
874 	if (q2work == q2) {
875 		/* all elements revised */
876 		q2->aio_req_prev->aio_req_next = list;
877 		list = q2;
878 		aiop->aio_doneq = NULL;
879 	} else {
880 		/*
881 		 * max < elements in the doneq
882 		 * detach only the required amount of elements
883 		 * out of the doneq
884 		 */
885 		q2work->aio_req_prev->aio_req_next = list;
886 		list = q2;
887 
888 		aiop->aio_doneq = q2work;
889 		q2work->aio_req_prev = q2->aio_req_prev;
890 		q2->aio_req_prev->aio_req_next = q2work;
891 	}
892 	*reqlist = list;
893 	return (count);
894 }
895 
896 /*ARGSUSED*/
897 static int
898 aiosuspend(
899 	void	*aiocb,
900 	int	nent,
901 	struct	timespec	*timout,
902 	int	flag,
903 	long	*rval,
904 	int	run_mode)
905 {
906 	int 		error;
907 	aio_t		*aiop;
908 	aio_req_t	*reqp, *found, *next;
909 	caddr_t		cbplist = NULL;
910 	aiocb_t		*cbp, **ucbp;
911 #ifdef	_SYSCALL32_IMPL
912 	aiocb32_t	*cbp32;
913 	caddr32_t	*ucbp32;
914 #endif  /* _SYSCALL32_IMPL */
915 	aiocb64_32_t	*cbp64;
916 	int		rv;
917 	int		i;
918 	size_t		ssize;
919 	model_t		model = get_udatamodel();
920 	int		blocking;
921 	int		timecheck;
922 	timestruc_t	rqtime;
923 	timestruc_t	*rqtp;
924 
925 	aiop = curproc->p_aio;
926 	if (aiop == NULL || nent <= 0)
927 		return (EINVAL);
928 
929 	/*
930 	 * Establish the absolute future time for the timeout.
931 	 */
932 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
933 	if (error)
934 		return (error);
935 	if (rqtp) {
936 		timestruc_t now;
937 		timecheck = timechanged;
938 		gethrestime(&now);
939 		timespecadd(rqtp, &now);
940 	}
941 
942 	/*
943 	 * If we are not blocking and there's no IO complete
944 	 * skip aiocb copyin.
945 	 */
946 	if (!blocking && (aiop->aio_pollq == NULL) &&
947 	    (aiop->aio_doneq == NULL)) {
948 		return (EAGAIN);
949 	}
950 
951 	if (model == DATAMODEL_NATIVE)
952 		ssize = (sizeof (aiocb_t *) * nent);
953 #ifdef	_SYSCALL32_IMPL
954 	else
955 		ssize = (sizeof (caddr32_t) * nent);
956 #endif  /* _SYSCALL32_IMPL */
957 
958 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
959 	if (cbplist == NULL)
960 		return (ENOMEM);
961 
962 	if (copyin(aiocb, cbplist, ssize)) {
963 		error = EFAULT;
964 		goto done;
965 	}
966 
967 	found = NULL;
968 	/*
969 	 * we need to get the aio_cleanupq_mutex since we call
970 	 * aio_req_done().
971 	 */
972 	mutex_enter(&aiop->aio_cleanupq_mutex);
973 	mutex_enter(&aiop->aio_mutex);
974 	for (;;) {
975 		/* push requests on poll queue to done queue */
976 		if (aiop->aio_pollq) {
977 			mutex_exit(&aiop->aio_mutex);
978 			mutex_exit(&aiop->aio_cleanupq_mutex);
979 			aio_cleanup(0);
980 			mutex_enter(&aiop->aio_cleanupq_mutex);
981 			mutex_enter(&aiop->aio_mutex);
982 		}
983 		/* check for requests on done queue */
984 		if (aiop->aio_doneq) {
985 			if (model == DATAMODEL_NATIVE)
986 				ucbp = (aiocb_t **)cbplist;
987 #ifdef	_SYSCALL32_IMPL
988 			else
989 				ucbp32 = (caddr32_t *)cbplist;
990 #endif  /* _SYSCALL32_IMPL */
991 			for (i = 0; i < nent; i++) {
992 				if (model == DATAMODEL_NATIVE) {
993 					if ((cbp = *ucbp++) == NULL)
994 						continue;
995 					if (run_mode != AIO_LARGEFILE)
996 						reqp = aio_req_done(
997 						    &cbp->aio_resultp);
998 					else {
999 						cbp64 = (aiocb64_32_t *)cbp;
1000 						reqp = aio_req_done(
1001 						    &cbp64->aio_resultp);
1002 					}
1003 				}
1004 #ifdef	_SYSCALL32_IMPL
1005 				else {
1006 					if (run_mode == AIO_32) {
1007 						if ((cbp32 =
1008 						    (aiocb32_t *)(uintptr_t)
1009 						    *ucbp32++) == NULL)
1010 							continue;
1011 						reqp = aio_req_done(
1012 						    &cbp32->aio_resultp);
1013 					} else if (run_mode == AIO_LARGEFILE) {
1014 						if ((cbp64 =
1015 						    (aiocb64_32_t *)(uintptr_t)
1016 						    *ucbp32++) == NULL)
1017 							continue;
1018 						    reqp = aio_req_done(
1019 							&cbp64->aio_resultp);
1020 					}
1021 
1022 				}
1023 #endif  /* _SYSCALL32_IMPL */
1024 				if (reqp) {
1025 					reqp->aio_req_next = found;
1026 					found = reqp;
1027 				}
1028 				if (aiop->aio_doneq == NULL)
1029 					break;
1030 			}
1031 			if (found)
1032 				break;
1033 		}
1034 		if (aiop->aio_notifycnt > 0) {
1035 			/*
1036 			 * nothing on the kernel's queue. the user
1037 			 * has notified the kernel that it has items
1038 			 * on a user-level queue.
1039 			 */
1040 			aiop->aio_notifycnt--;
1041 			*rval = 1;
1042 			error = 0;
1043 			break;
1044 		}
1045 		/* don't block if nothing is outstanding */
1046 		if (aiop->aio_outstanding == 0) {
1047 			error = EAGAIN;
1048 			break;
1049 		}
1050 		if (blocking) {
1051 			/*
1052 			 * drop the aio_cleanupq_mutex as we are
1053 			 * going to block.
1054 			 */
1055 			mutex_exit(&aiop->aio_cleanupq_mutex);
1056 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
1057 				&aiop->aio_mutex, rqtp, timecheck);
1058 			/*
1059 			 * we have to drop aio_mutex and
1060 			 * grab it in the right order.
1061 			 */
1062 			mutex_exit(&aiop->aio_mutex);
1063 			mutex_enter(&aiop->aio_cleanupq_mutex);
1064 			mutex_enter(&aiop->aio_mutex);
1065 			if (rv > 0)	/* check done queue again */
1066 				continue;
1067 			if (rv == 0)	/* interrupted by a signal */
1068 				error = EINTR;
1069 			else		/* timer expired */
1070 				error = ETIME;
1071 		} else {
1072 			error = EAGAIN;
1073 		}
1074 		break;
1075 	}
1076 	mutex_exit(&aiop->aio_mutex);
1077 	mutex_exit(&aiop->aio_cleanupq_mutex);
1078 	for (reqp = found; reqp != NULL; reqp = next) {
1079 		next = reqp->aio_req_next;
1080 		aphysio_unlock(reqp);
1081 		aio_copyout_result(reqp);
1082 		mutex_enter(&aiop->aio_mutex);
1083 		aio_req_free(aiop, reqp);
1084 		mutex_exit(&aiop->aio_mutex);
1085 	}
1086 done:
1087 	kmem_free(cbplist, ssize);
1088 	return (error);
1089 }
1090 
1091 /*
1092  * initialize aio by allocating an aio_t struct for this
1093  * process.
1094  */
1095 static int
1096 aioinit(void)
1097 {
1098 	proc_t *p = curproc;
1099 	aio_t *aiop;
1100 	mutex_enter(&p->p_lock);
1101 	if ((aiop = p->p_aio) == NULL) {
1102 		aiop = aio_aiop_alloc();
1103 		p->p_aio = aiop;
1104 	}
1105 	mutex_exit(&p->p_lock);
1106 	if (aiop == NULL)
1107 		return (ENOMEM);
1108 	return (0);
1109 }
1110 
1111 /*
1112  * start a special thread that will cleanup after aio requests
1113  * that are preventing a segment from being unmapped. as_unmap()
1114  * blocks until all phsyio to this segment is completed. this
1115  * doesn't happen until all the pages in this segment are not
1116  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1117  * requests still outstanding. this special thread will make sure
1118  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1119  *
1120  * this function will return an error if the process has only
1121  * one LWP. the assumption is that the caller is a separate LWP
1122  * that remains blocked in the kernel for the life of this process.
1123  */
1124 static int
1125 aiostart(void)
1126 {
1127 	proc_t *p = curproc;
1128 	aio_t *aiop;
1129 	int first, error = 0;
1130 
1131 	if (p->p_lwpcnt == 1)
1132 		return (EDEADLK);
1133 	mutex_enter(&p->p_lock);
1134 	if ((aiop = p->p_aio) == NULL)
1135 		error = EINVAL;
1136 	else {
1137 		first = aiop->aio_ok;
1138 		if (aiop->aio_ok == 0)
1139 			aiop->aio_ok = 1;
1140 	}
1141 	mutex_exit(&p->p_lock);
1142 	if (error == 0 && first == 0) {
1143 		return (aio_cleanup_thread(aiop));
1144 		/* should return only to exit */
1145 	}
1146 	return (error);
1147 }
1148 
1149 /*
1150  * Associate an aiocb with a port.
1151  * This function is used by aiorw() to associate a transaction with a port.
1152  * Allocate an event port structure (port_alloc_event()) and store the
1153  * delivered user pointer (portnfy_user) in the portkev_user field of the
1154  * port_kevent_t structure..
1155  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1156  * the port association.
1157  */
1158 
1159 static int
1160 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
1161 	aio_req_t *reqp, int event)
1162 {
1163 	port_kevent_t	*pkevp = NULL;
1164 	int		error;
1165 
1166 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
1167 	    PORT_SOURCE_AIO, &pkevp);
1168 	if (error) {
1169 		if ((error == ENOMEM) || (error == EAGAIN))
1170 			error = EAGAIN;
1171 		else
1172 			error = EINVAL;
1173 	} else {
1174 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
1175 		    aio_port_callback, reqp);
1176 		pkevp->portkev_events = event;
1177 		reqp->aio_req_portkev = pkevp;
1178 		reqp->aio_req_port = pntfy->portnfy_port;
1179 	}
1180 	return (error);
1181 }
1182 
1183 #ifdef _LP64
1184 
1185 /*
1186  * Asynchronous list IO. A chain of aiocb's are copied in
1187  * one at a time. If the aiocb is invalid, it is skipped.
1188  * For each aiocb, the appropriate driver entry point is
1189  * called. Optimize for the common case where the list
1190  * of requests is to the same file descriptor.
1191  *
1192  * One possible optimization is to define a new driver entry
1193  * point that supports a list of IO requests. Whether this
1194  * improves performance depends somewhat on the driver's
1195  * locking strategy. Processing a list could adversely impact
1196  * the driver's interrupt latency.
1197  */
1198 static int
1199 alio(
1200 	int		mode_arg,
1201 	aiocb_t		**aiocb_arg,
1202 	int		nent,
1203 	struct sigevent	*sigev)
1204 {
1205 	file_t		*fp;
1206 	file_t		*prev_fp = NULL;
1207 	int		prev_mode = -1;
1208 	struct vnode	*vp;
1209 	aio_lio_t	*head;
1210 	aio_req_t	*reqp;
1211 	aio_t		*aiop;
1212 	caddr_t		cbplist;
1213 	aiocb_t		cb;
1214 	aiocb_t		*aiocb = &cb;
1215 	aiocb_t		*cbp;
1216 	aiocb_t		**ucbp;
1217 	struct sigevent sigevk;
1218 	sigqueue_t	*sqp;
1219 	int		(*aio_func)();
1220 	int		mode;
1221 	int		error = 0;
1222 	int		aio_errors = 0;
1223 	int		i;
1224 	size_t		ssize;
1225 	int		deadhead = 0;
1226 	int		aio_notsupported = 0;
1227 	int		lio_head_port;
1228 	int		aio_port;
1229 	int		aio_thread;
1230 	port_kevent_t	*pkevtp = NULL;
1231 	port_notify_t	pnotify;
1232 	int		event;
1233 
1234 	aiop = curproc->p_aio;
1235 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1236 		return (EINVAL);
1237 
1238 	ssize = (sizeof (aiocb_t *) * nent);
1239 	cbplist = kmem_alloc(ssize, KM_SLEEP);
1240 	ucbp = (aiocb_t **)cbplist;
1241 
1242 	if (copyin(aiocb_arg, cbplist, ssize) ||
1243 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
1244 		kmem_free(cbplist, ssize);
1245 		return (EFAULT);
1246 	}
1247 
1248 	/* Event Ports  */
1249 	if (sigev &&
1250 	    (sigevk.sigev_notify == SIGEV_THREAD ||
1251 	    sigevk.sigev_notify == SIGEV_PORT)) {
1252 		if (sigevk.sigev_notify == SIGEV_THREAD) {
1253 			pnotify.portnfy_port = sigevk.sigev_signo;
1254 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
1255 		} else if (copyin(sigevk.sigev_value.sival_ptr,
1256 		    &pnotify, sizeof (pnotify))) {
1257 			kmem_free(cbplist, ssize);
1258 			return (EFAULT);
1259 		}
1260 		error = port_alloc_event(pnotify.portnfy_port,
1261 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
1262 		if (error) {
1263 			if (error == ENOMEM || error == EAGAIN)
1264 				error = EAGAIN;
1265 			else
1266 				error = EINVAL;
1267 			kmem_free(cbplist, ssize);
1268 			return (error);
1269 		}
1270 		lio_head_port = pnotify.portnfy_port;
1271 	}
1272 
1273 	/*
1274 	 * a list head should be allocated if notification is
1275 	 * enabled for this list.
1276 	 */
1277 	head = NULL;
1278 
1279 	if (mode_arg == LIO_WAIT || sigev) {
1280 		mutex_enter(&aiop->aio_mutex);
1281 		error = aio_lio_alloc(&head);
1282 		mutex_exit(&aiop->aio_mutex);
1283 		if (error)
1284 			goto done;
1285 		deadhead = 1;
1286 		head->lio_nent = nent;
1287 		head->lio_refcnt = nent;
1288 		head->lio_port = -1;
1289 		head->lio_portkev = NULL;
1290 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
1291 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
1292 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
1293 			if (sqp == NULL) {
1294 				error = EAGAIN;
1295 				goto done;
1296 			}
1297 			sqp->sq_func = NULL;
1298 			sqp->sq_next = NULL;
1299 			sqp->sq_info.si_code = SI_ASYNCIO;
1300 			sqp->sq_info.si_pid = curproc->p_pid;
1301 			sqp->sq_info.si_ctid = PRCTID(curproc);
1302 			sqp->sq_info.si_zoneid = getzoneid();
1303 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
1304 			sqp->sq_info.si_signo = sigevk.sigev_signo;
1305 			sqp->sq_info.si_value = sigevk.sigev_value;
1306 			head->lio_sigqp = sqp;
1307 		} else {
1308 			head->lio_sigqp = NULL;
1309 		}
1310 		if (pkevtp) {
1311 			/*
1312 			 * Prepare data to send when list of aiocb's
1313 			 * has completed.
1314 			 */
1315 			port_init_event(pkevtp, (uintptr_t)sigev,
1316 			    (void *)(uintptr_t)pnotify.portnfy_user,
1317 			    NULL, head);
1318 			pkevtp->portkev_events = AIOLIO;
1319 			head->lio_portkev = pkevtp;
1320 			head->lio_port = pnotify.portnfy_port;
1321 		}
1322 	}
1323 
1324 	for (i = 0; i < nent; i++, ucbp++) {
1325 
1326 		cbp = *ucbp;
1327 		/* skip entry if it can't be copied. */
1328 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
1329 			if (head) {
1330 				mutex_enter(&aiop->aio_mutex);
1331 				head->lio_nent--;
1332 				head->lio_refcnt--;
1333 				mutex_exit(&aiop->aio_mutex);
1334 			}
1335 			continue;
1336 		}
1337 
1338 		/* skip if opcode for aiocb is LIO_NOP */
1339 		mode = aiocb->aio_lio_opcode;
1340 		if (mode == LIO_NOP) {
1341 			cbp = NULL;
1342 			if (head) {
1343 				mutex_enter(&aiop->aio_mutex);
1344 				head->lio_nent--;
1345 				head->lio_refcnt--;
1346 				mutex_exit(&aiop->aio_mutex);
1347 			}
1348 			continue;
1349 		}
1350 
1351 		/* increment file descriptor's ref count. */
1352 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
1353 			lio_set_uerror(&cbp->aio_resultp, EBADF);
1354 			if (head) {
1355 				mutex_enter(&aiop->aio_mutex);
1356 				head->lio_nent--;
1357 				head->lio_refcnt--;
1358 				mutex_exit(&aiop->aio_mutex);
1359 			}
1360 			aio_errors++;
1361 			continue;
1362 		}
1363 
1364 		/*
1365 		 * check the permission of the partition
1366 		 */
1367 		if ((fp->f_flag & mode) == 0) {
1368 			releasef(aiocb->aio_fildes);
1369 			lio_set_uerror(&cbp->aio_resultp, EBADF);
1370 			if (head) {
1371 				mutex_enter(&aiop->aio_mutex);
1372 				head->lio_nent--;
1373 				head->lio_refcnt--;
1374 				mutex_exit(&aiop->aio_mutex);
1375 			}
1376 			aio_errors++;
1377 			continue;
1378 		}
1379 
1380 		/*
1381 		 * common case where requests are to the same fd
1382 		 * for the same r/w operation.
1383 		 * for UFS, need to set EBADFD
1384 		 */
1385 		vp = fp->f_vnode;
1386 		if (fp != prev_fp || mode != prev_mode) {
1387 			aio_func = check_vp(vp, mode);
1388 			if (aio_func == NULL) {
1389 				prev_fp = NULL;
1390 				releasef(aiocb->aio_fildes);
1391 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
1392 				aio_notsupported++;
1393 				if (head) {
1394 					mutex_enter(&aiop->aio_mutex);
1395 					head->lio_nent--;
1396 					head->lio_refcnt--;
1397 					mutex_exit(&aiop->aio_mutex);
1398 				}
1399 				continue;
1400 			} else {
1401 				prev_fp = fp;
1402 				prev_mode = mode;
1403 			}
1404 		}
1405 
1406 		error = aio_req_setup(&reqp, aiop, aiocb,
1407 		    &cbp->aio_resultp, vp);
1408 		if (error) {
1409 			releasef(aiocb->aio_fildes);
1410 			lio_set_uerror(&cbp->aio_resultp, error);
1411 			if (head) {
1412 				mutex_enter(&aiop->aio_mutex);
1413 				head->lio_nent--;
1414 				head->lio_refcnt--;
1415 				mutex_exit(&aiop->aio_mutex);
1416 			}
1417 			aio_errors++;
1418 			continue;
1419 		}
1420 
1421 		reqp->aio_req_lio = head;
1422 		deadhead = 0;
1423 
1424 		/*
1425 		 * Set the errno field now before sending the request to
1426 		 * the driver to avoid a race condition
1427 		 */
1428 		(void) suword32(&cbp->aio_resultp.aio_errno,
1429 		    EINPROGRESS);
1430 
1431 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
1432 
1433 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
1434 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
1435 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
1436 		if (aio_port | aio_thread) {
1437 			port_kevent_t *lpkevp;
1438 			/*
1439 			 * Prepare data to send with each aiocb completed.
1440 			 */
1441 			if (aio_port) {
1442 				void *paddr =
1443 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
1444 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
1445 					error = EFAULT;
1446 			} else {	/* aio_thread */
1447 				pnotify.portnfy_port =
1448 				    aiocb->aio_sigevent.sigev_signo;
1449 				pnotify.portnfy_user =
1450 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
1451 			}
1452 			if (error)
1453 				/* EMPTY */;
1454 			else if (pkevtp != NULL &&
1455 			    pnotify.portnfy_port == lio_head_port)
1456 				error = port_dup_event(pkevtp, &lpkevp,
1457 				    PORT_ALLOC_DEFAULT);
1458 			else
1459 				error = port_alloc_event(pnotify.portnfy_port,
1460 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
1461 				    &lpkevp);
1462 			if (error == 0) {
1463 				port_init_event(lpkevp, (uintptr_t)cbp,
1464 				    (void *)(uintptr_t)pnotify.portnfy_user,
1465 				    aio_port_callback, reqp);
1466 				lpkevp->portkev_events = event;
1467 				reqp->aio_req_portkev = lpkevp;
1468 				reqp->aio_req_port = pnotify.portnfy_port;
1469 			}
1470 		}
1471 
1472 		/*
1473 		 * send the request to driver.
1474 		 */
1475 		if (error == 0) {
1476 			if (aiocb->aio_nbytes == 0) {
1477 				clear_active_fd(aiocb->aio_fildes);
1478 				aio_zerolen(reqp);
1479 				continue;
1480 			}
1481 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
1482 			    CRED());
1483 		}
1484 
1485 		/*
1486 		 * the fd's ref count is not decremented until the IO has
1487 		 * completed unless there was an error.
1488 		 */
1489 		if (error) {
1490 			releasef(aiocb->aio_fildes);
1491 			lio_set_uerror(&cbp->aio_resultp, error);
1492 			if (head) {
1493 				mutex_enter(&aiop->aio_mutex);
1494 				head->lio_nent--;
1495 				head->lio_refcnt--;
1496 				mutex_exit(&aiop->aio_mutex);
1497 			}
1498 			if (error == ENOTSUP)
1499 				aio_notsupported++;
1500 			else
1501 				aio_errors++;
1502 			lio_set_error(reqp);
1503 		} else {
1504 			clear_active_fd(aiocb->aio_fildes);
1505 		}
1506 	}
1507 
1508 	if (aio_notsupported) {
1509 		error = ENOTSUP;
1510 	} else if (aio_errors) {
1511 		/*
1512 		 * return EIO if any request failed
1513 		 */
1514 		error = EIO;
1515 	}
1516 
1517 	if (mode_arg == LIO_WAIT) {
1518 		mutex_enter(&aiop->aio_mutex);
1519 		while (head->lio_refcnt > 0) {
1520 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1521 				mutex_exit(&aiop->aio_mutex);
1522 				error = EINTR;
1523 				goto done;
1524 			}
1525 		}
1526 		mutex_exit(&aiop->aio_mutex);
1527 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
1528 	}
1529 
1530 done:
1531 	kmem_free(cbplist, ssize);
1532 	if (deadhead) {
1533 		if (head->lio_sigqp)
1534 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
1535 		if (head->lio_portkev)
1536 			port_free_event(head->lio_portkev);
1537 		kmem_free(head, sizeof (aio_lio_t));
1538 	}
1539 	return (error);
1540 }
1541 
1542 #endif /* _LP64 */
1543 
1544 /*
1545  * Asynchronous list IO.
1546  * If list I/O is called with LIO_WAIT it can still return
1547  * before all the I/O's are completed if a signal is caught
1548  * or if the list include UFS I/O requests. If this happens,
1549  * libaio will call aliowait() to wait for the I/O's to
1550  * complete
1551  */
1552 /*ARGSUSED*/
1553 static int
1554 aliowait(
1555 	int	mode,
1556 	void	*aiocb,
1557 	int	nent,
1558 	void	*sigev,
1559 	int	run_mode)
1560 {
1561 	aio_lio_t	*head;
1562 	aio_t		*aiop;
1563 	caddr_t		cbplist;
1564 	aiocb_t		*cbp, **ucbp;
1565 #ifdef	_SYSCALL32_IMPL
1566 	aiocb32_t	*cbp32;
1567 	caddr32_t	*ucbp32;
1568 	aiocb64_32_t	*cbp64;
1569 #endif
1570 	int		error = 0;
1571 	int		i;
1572 	size_t		ssize = 0;
1573 	model_t		model = get_udatamodel();
1574 
1575 	aiop = curproc->p_aio;
1576 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1577 		return (EINVAL);
1578 
1579 	if (model == DATAMODEL_NATIVE)
1580 		ssize = (sizeof (aiocb_t *) * nent);
1581 #ifdef	_SYSCALL32_IMPL
1582 	else
1583 		ssize = (sizeof (caddr32_t) * nent);
1584 #endif  /* _SYSCALL32_IMPL */
1585 
1586 	if (ssize == 0)
1587 		return (EINVAL);
1588 
1589 	cbplist = kmem_alloc(ssize, KM_SLEEP);
1590 
1591 	if (model == DATAMODEL_NATIVE)
1592 		ucbp = (aiocb_t **)cbplist;
1593 #ifdef	_SYSCALL32_IMPL
1594 	else
1595 		ucbp32 = (caddr32_t *)cbplist;
1596 #endif  /* _SYSCALL32_IMPL */
1597 
1598 	if (copyin(aiocb, cbplist, ssize)) {
1599 		error = EFAULT;
1600 		goto done;
1601 	}
1602 
1603 	/*
1604 	 * To find the list head, we go through the
1605 	 * list of aiocb structs, find the request
1606 	 * its for, then get the list head that reqp
1607 	 * points to
1608 	 */
1609 	head = NULL;
1610 
1611 	for (i = 0; i < nent; i++) {
1612 		if (model == DATAMODEL_NATIVE) {
1613 			/*
1614 			 * Since we are only checking for a NULL pointer
1615 			 * Following should work on both native data sizes
1616 			 * as well as for largefile aiocb.
1617 			 */
1618 			if ((cbp = *ucbp++) == NULL)
1619 				continue;
1620 			if (run_mode != AIO_LARGEFILE)
1621 				if (head = aio_list_get(&cbp->aio_resultp))
1622 					break;
1623 			else {
1624 				/*
1625 				 * This is a case when largefile call is
1626 				 * made on 32 bit kernel.
1627 				 * Treat each pointer as pointer to
1628 				 * aiocb64_32
1629 				 */
1630 				if (head = aio_list_get((aio_result_t *)
1631 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
1632 					break;
1633 			}
1634 		}
1635 #ifdef	_SYSCALL32_IMPL
1636 		else {
1637 			if (run_mode == AIO_LARGEFILE) {
1638 				if ((cbp64 = (aiocb64_32_t *)
1639 				    (uintptr_t)*ucbp32++) == NULL)
1640 					continue;
1641 				if (head = aio_list_get((aio_result_t *)
1642 				    &cbp64->aio_resultp))
1643 					break;
1644 			} else if (run_mode == AIO_32) {
1645 				if ((cbp32 = (aiocb32_t *)
1646 				    (uintptr_t)*ucbp32++) == NULL)
1647 					continue;
1648 				if (head = aio_list_get((aio_result_t *)
1649 				    &cbp32->aio_resultp))
1650 					break;
1651 			}
1652 		}
1653 #endif	/* _SYSCALL32_IMPL */
1654 	}
1655 
1656 	if (head == NULL) {
1657 		error = EINVAL;
1658 		goto done;
1659 	}
1660 
1661 	mutex_enter(&aiop->aio_mutex);
1662 	while (head->lio_refcnt > 0) {
1663 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1664 			mutex_exit(&aiop->aio_mutex);
1665 			error = EINTR;
1666 			goto done;
1667 		}
1668 	}
1669 	mutex_exit(&aiop->aio_mutex);
1670 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
1671 done:
1672 	kmem_free(cbplist, ssize);
1673 	return (error);
1674 }
1675 
1676 aio_lio_t *
1677 aio_list_get(aio_result_t *resultp)
1678 {
1679 	aio_lio_t	*head = NULL;
1680 	aio_t		*aiop;
1681 	aio_req_t 	**bucket;
1682 	aio_req_t 	*reqp;
1683 	long		index;
1684 
1685 	aiop = curproc->p_aio;
1686 	if (aiop == NULL)
1687 		return (NULL);
1688 
1689 	if (resultp) {
1690 		index = AIO_HASH(resultp);
1691 		bucket = &aiop->aio_hash[index];
1692 		for (reqp = *bucket; reqp != NULL;
1693 		    reqp = reqp->aio_hash_next) {
1694 			if (reqp->aio_req_resultp == resultp) {
1695 				head = reqp->aio_req_lio;
1696 				return (head);
1697 			}
1698 		}
1699 	}
1700 	return (NULL);
1701 }
1702 
1703 
1704 static void
1705 lio_set_uerror(void *resultp, int error)
1706 {
1707 	/*
1708 	 * the resultp field is a pointer to where the
1709 	 * error should be written out to the user's
1710 	 * aiocb.
1711 	 *
1712 	 */
1713 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1714 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
1715 		    (ssize_t)-1);
1716 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1717 	}
1718 #ifdef	_SYSCALL32_IMPL
1719 	else {
1720 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1721 		    (uint_t)-1);
1722 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1723 	}
1724 #endif  /* _SYSCALL32_IMPL */
1725 }
1726 
1727 /*
1728  * do cleanup completion for all requests in list. memory for
1729  * each request is also freed.
1730  */
1731 static void
1732 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
1733 {
1734 	int i;
1735 	aio_req_t *reqp;
1736 	aio_result_t *resultp;
1737 	aiocb64_32_t *aiocb_64;
1738 
1739 	for (i = 0; i < nent; i++) {
1740 		if (get_udatamodel() == DATAMODEL_NATIVE) {
1741 			if (cbp[i] == NULL)
1742 				continue;
1743 			if (run_mode == AIO_LARGEFILE) {
1744 				aiocb_64 = (aiocb64_32_t *)cbp[i];
1745 				resultp = (aio_result_t *)
1746 				    &aiocb_64->aio_resultp;
1747 			} else
1748 				resultp = &cbp[i]->aio_resultp;
1749 		}
1750 #ifdef	_SYSCALL32_IMPL
1751 		else {
1752 			aiocb32_t *aiocb_32;
1753 			caddr32_t *cbp32;
1754 
1755 			cbp32 = (caddr32_t *)cbp;
1756 			if (cbp32[i] == NULL)
1757 				continue;
1758 			if (run_mode == AIO_32) {
1759 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
1760 				resultp = (aio_result_t *)&aiocb_32->
1761 				    aio_resultp;
1762 			} else if (run_mode == AIO_LARGEFILE) {
1763 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
1764 				resultp = (aio_result_t *)&aiocb_64->
1765 				    aio_resultp;
1766 			}
1767 		}
1768 #endif  /* _SYSCALL32_IMPL */
1769 		/*
1770 		 * we need to get the aio_cleanupq_mutex since we call
1771 		 * aio_req_done().
1772 		 */
1773 		mutex_enter(&aiop->aio_cleanupq_mutex);
1774 		mutex_enter(&aiop->aio_mutex);
1775 		reqp = aio_req_done(resultp);
1776 		mutex_exit(&aiop->aio_mutex);
1777 		mutex_exit(&aiop->aio_cleanupq_mutex);
1778 		if (reqp != NULL) {
1779 			aphysio_unlock(reqp);
1780 			aio_copyout_result(reqp);
1781 			mutex_enter(&aiop->aio_mutex);
1782 			aio_req_free(aiop, reqp);
1783 			mutex_exit(&aiop->aio_mutex);
1784 		}
1785 	}
1786 }
1787 
1788 /*
1789  * Write out the results for an aio request that is done.
1790  */
1791 static int
1792 aioerror(void *cb, int run_mode)
1793 {
1794 	aio_result_t *resultp;
1795 	aio_t *aiop;
1796 	aio_req_t *reqp;
1797 	int retval;
1798 
1799 	aiop = curproc->p_aio;
1800 	if (aiop == NULL || cb == NULL)
1801 		return (EINVAL);
1802 
1803 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1804 		if (run_mode == AIO_LARGEFILE)
1805 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1806 			    aio_resultp;
1807 		else
1808 			resultp = &((aiocb_t *)cb)->aio_resultp;
1809 	}
1810 #ifdef	_SYSCALL32_IMPL
1811 	else {
1812 		if (run_mode == AIO_LARGEFILE)
1813 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1814 			    aio_resultp;
1815 		else if (run_mode == AIO_32)
1816 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
1817 			    aio_resultp;
1818 	}
1819 #endif  /* _SYSCALL32_IMPL */
1820 	/*
1821 	 * we need to get the aio_cleanupq_mutex since we call
1822 	 * aio_req_find().
1823 	 */
1824 	mutex_enter(&aiop->aio_cleanupq_mutex);
1825 	mutex_enter(&aiop->aio_mutex);
1826 	retval = aio_req_find(resultp, &reqp);
1827 	mutex_exit(&aiop->aio_mutex);
1828 	mutex_exit(&aiop->aio_cleanupq_mutex);
1829 	if (retval == 0) {
1830 		aphysio_unlock(reqp);
1831 		aio_copyout_result(reqp);
1832 		mutex_enter(&aiop->aio_mutex);
1833 		aio_req_free(aiop, reqp);
1834 		mutex_exit(&aiop->aio_mutex);
1835 		return (0);
1836 	} else if (retval == 1)
1837 		return (EINPROGRESS);
1838 	else if (retval == 2)
1839 		return (EINVAL);
1840 	return (0);
1841 }
1842 
1843 /*
1844  * 	aio_cancel - if no requests outstanding,
1845  *			return AIO_ALLDONE
1846  *			else
1847  *			return AIO_NOTCANCELED
1848  */
1849 static int
1850 aio_cancel(
1851 	int	fildes,
1852 	void 	*cb,
1853 	long	*rval,
1854 	int	run_mode)
1855 {
1856 	aio_t *aiop;
1857 	void *resultp;
1858 	int index;
1859 	aio_req_t **bucket;
1860 	aio_req_t *ent;
1861 
1862 
1863 	/*
1864 	 * Verify valid file descriptor
1865 	 */
1866 	if ((getf(fildes)) == NULL) {
1867 		return (EBADF);
1868 	}
1869 	releasef(fildes);
1870 
1871 	aiop = curproc->p_aio;
1872 	if (aiop == NULL)
1873 		return (EINVAL);
1874 
1875 	if (aiop->aio_outstanding == 0) {
1876 		*rval = AIO_ALLDONE;
1877 		return (0);
1878 	}
1879 
1880 	mutex_enter(&aiop->aio_mutex);
1881 	if (cb != NULL) {
1882 		if (get_udatamodel() == DATAMODEL_NATIVE) {
1883 			if (run_mode == AIO_LARGEFILE)
1884 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1885 				    ->aio_resultp;
1886 			else
1887 				resultp = &((aiocb_t *)cb)->aio_resultp;
1888 		}
1889 #ifdef	_SYSCALL32_IMPL
1890 		else {
1891 			if (run_mode == AIO_LARGEFILE)
1892 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1893 				    ->aio_resultp;
1894 			else if (run_mode == AIO_32)
1895 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
1896 				    ->aio_resultp;
1897 		}
1898 #endif  /* _SYSCALL32_IMPL */
1899 		index = AIO_HASH(resultp);
1900 		bucket = &aiop->aio_hash[index];
1901 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1902 			if (ent->aio_req_resultp == resultp) {
1903 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
1904 					mutex_exit(&aiop->aio_mutex);
1905 					*rval = AIO_ALLDONE;
1906 					return (0);
1907 				}
1908 				mutex_exit(&aiop->aio_mutex);
1909 				*rval = AIO_NOTCANCELED;
1910 				return (0);
1911 			}
1912 		}
1913 		mutex_exit(&aiop->aio_mutex);
1914 		*rval = AIO_ALLDONE;
1915 		return (0);
1916 	}
1917 
1918 	for (index = 0; index < AIO_HASHSZ; index++) {
1919 		bucket = &aiop->aio_hash[index];
1920 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1921 			if (ent->aio_req_fd == fildes) {
1922 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
1923 					mutex_exit(&aiop->aio_mutex);
1924 					*rval = AIO_NOTCANCELED;
1925 					return (0);
1926 				}
1927 			}
1928 		}
1929 	}
1930 	mutex_exit(&aiop->aio_mutex);
1931 	*rval = AIO_ALLDONE;
1932 	return (0);
1933 }
1934 
1935 /*
1936  * solaris version of asynchronous read and write
1937  */
1938 static int
1939 arw(
1940 	int	opcode,
1941 	int	fdes,
1942 	char	*bufp,
1943 	int	bufsize,
1944 	offset_t	offset,
1945 	aio_result_t	*resultp,
1946 	int		mode)
1947 {
1948 	file_t		*fp;
1949 	int		error;
1950 	struct vnode	*vp;
1951 	aio_req_t	*reqp;
1952 	aio_t		*aiop;
1953 	int		(*aio_func)();
1954 #ifdef _LP64
1955 	aiocb_t		aiocb;
1956 #else
1957 	aiocb64_32_t	aiocb64;
1958 #endif
1959 
1960 	aiop = curproc->p_aio;
1961 	if (aiop == NULL)
1962 		return (EINVAL);
1963 
1964 	if ((fp = getf(fdes)) == NULL) {
1965 		return (EBADF);
1966 	}
1967 
1968 	/*
1969 	 * check the permission of the partition
1970 	 */
1971 	if ((fp->f_flag & mode) == 0) {
1972 		releasef(fdes);
1973 		return (EBADF);
1974 	}
1975 
1976 	vp = fp->f_vnode;
1977 	aio_func = check_vp(vp, mode);
1978 	if (aio_func == NULL) {
1979 		releasef(fdes);
1980 		return (EBADFD);
1981 	}
1982 #ifdef _LP64
1983 	aiocb.aio_fildes = fdes;
1984 	aiocb.aio_buf = bufp;
1985 	aiocb.aio_nbytes = bufsize;
1986 	aiocb.aio_offset = offset;
1987 	aiocb.aio_sigevent.sigev_notify = 0;
1988 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
1989 #else
1990 	aiocb64.aio_fildes = fdes;
1991 	aiocb64.aio_buf = (caddr32_t)bufp;
1992 	aiocb64.aio_nbytes = bufsize;
1993 	aiocb64.aio_offset = offset;
1994 	aiocb64.aio_sigevent.sigev_notify = 0;
1995 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
1996 #endif
1997 	if (error) {
1998 		releasef(fdes);
1999 		return (error);
2000 	}
2001 
2002 	/*
2003 	 * enable polling on this request if the opcode has
2004 	 * the AIO poll bit set
2005 	 */
2006 	if (opcode & AIO_POLL_BIT)
2007 		reqp->aio_req_flags |= AIO_POLL;
2008 
2009 	if (bufsize == 0) {
2010 		clear_active_fd(fdes);
2011 		aio_zerolen(reqp);
2012 		return (0);
2013 	}
2014 	/*
2015 	 * send the request to driver.
2016 	 */
2017 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2018 	/*
2019 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
2020 	 * is released by the aio_cleanup_thread() when the IO has
2021 	 * completed.
2022 	 */
2023 	if (error) {
2024 		releasef(fdes);
2025 		mutex_enter(&aiop->aio_mutex);
2026 		aio_req_free(aiop, reqp);
2027 		aiop->aio_pending--;
2028 		if (aiop->aio_flags & AIO_REQ_BLOCK)
2029 			cv_signal(&aiop->aio_cleanupcv);
2030 		mutex_exit(&aiop->aio_mutex);
2031 		return (error);
2032 	}
2033 	clear_active_fd(fdes);
2034 	return (0);
2035 }
2036 
2037 /*
2038  * posix version of asynchronous read and write
2039  */
2040 static int
2041 aiorw(
2042 	int		opcode,
2043 	void		*aiocb_arg,
2044 	int		mode,
2045 	int		run_mode)
2046 {
2047 #ifdef _SYSCALL32_IMPL
2048 	aiocb32_t	aiocb32;
2049 	struct	sigevent32 *sigev32;
2050 	port_notify32_t	pntfy32;
2051 #endif
2052 	aiocb64_32_t	aiocb64;
2053 	aiocb_t		aiocb;
2054 	file_t		*fp;
2055 	int		error, fd;
2056 	size_t		bufsize;
2057 	struct vnode	*vp;
2058 	aio_req_t	*reqp;
2059 	aio_t		*aiop;
2060 	int		(*aio_func)();
2061 	aio_result_t	*resultp;
2062 	struct	sigevent *sigev;
2063 	model_t		model;
2064 	int		aio_use_port = 0;
2065 	port_notify_t	pntfy;
2066 
2067 	model = get_udatamodel();
2068 	aiop = curproc->p_aio;
2069 	if (aiop == NULL)
2070 		return (EINVAL);
2071 
2072 	if (model == DATAMODEL_NATIVE) {
2073 		if (run_mode != AIO_LARGEFILE) {
2074 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
2075 				return (EFAULT);
2076 			bufsize = aiocb.aio_nbytes;
2077 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
2078 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
2079 				return (EBADF);
2080 			}
2081 			sigev = &aiocb.aio_sigevent;
2082 		} else {
2083 			/*
2084 			 * We come here only when we make largefile
2085 			 * call on 32 bit kernel using 32 bit library.
2086 			 */
2087 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2088 				return (EFAULT);
2089 			bufsize = aiocb64.aio_nbytes;
2090 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2091 			    ->aio_resultp);
2092 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2093 				return (EBADF);
2094 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
2095 		}
2096 
2097 		if (sigev->sigev_notify == SIGEV_PORT) {
2098 			if (copyin((void *)sigev->sigev_value.sival_ptr,
2099 			    &pntfy, sizeof (port_notify_t))) {
2100 				releasef(fd);
2101 				return (EFAULT);
2102 			}
2103 			aio_use_port = 1;
2104 		} else if (sigev->sigev_notify == SIGEV_THREAD) {
2105 			pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
2106 			pntfy.portnfy_user =
2107 			    aiocb.aio_sigevent.sigev_value.sival_ptr;
2108 			aio_use_port = 1;
2109 		}
2110 	}
2111 #ifdef	_SYSCALL32_IMPL
2112 	else {
2113 		if (run_mode == AIO_32) {
2114 			/* 32 bit system call is being made on 64 bit kernel */
2115 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
2116 				return (EFAULT);
2117 
2118 			bufsize = aiocb32.aio_nbytes;
2119 			aiocb_32ton(&aiocb32, &aiocb);
2120 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
2121 			    aio_resultp);
2122 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
2123 				return (EBADF);
2124 			}
2125 			sigev32 = &aiocb32.aio_sigevent;
2126 		} else if (run_mode == AIO_LARGEFILE) {
2127 			/*
2128 			 * We come here only when we make largefile
2129 			 * call on 64 bit kernel using 32 bit library.
2130 			 */
2131 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2132 				return (EFAULT);
2133 			bufsize = aiocb64.aio_nbytes;
2134 			aiocb_LFton(&aiocb64, &aiocb);
2135 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2136 			    ->aio_resultp);
2137 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2138 				return (EBADF);
2139 			sigev32 = &aiocb64.aio_sigevent;
2140 		}
2141 
2142 		if (sigev32->sigev_notify == SIGEV_PORT) {
2143 			if (copyin(
2144 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
2145 			    &pntfy32, sizeof (port_notify32_t))) {
2146 				releasef(fd);
2147 				return (EFAULT);
2148 			}
2149 			pntfy.portnfy_port = pntfy32.portnfy_port;
2150 			pntfy.portnfy_user = (void *)(uintptr_t)
2151 			    pntfy32.portnfy_user;
2152 			aio_use_port = 1;
2153 		} else if (sigev32->sigev_notify == SIGEV_THREAD) {
2154 			pntfy.portnfy_port = sigev32->sigev_signo;
2155 			pntfy.portnfy_user = (void *)(uintptr_t)
2156 			    sigev32->sigev_value.sival_ptr;
2157 			aio_use_port = 1;
2158 		}
2159 	}
2160 #endif  /* _SYSCALL32_IMPL */
2161 
2162 	/*
2163 	 * check the permission of the partition
2164 	 */
2165 
2166 	if ((fp->f_flag & mode) == 0) {
2167 		releasef(fd);
2168 		return (EBADF);
2169 	}
2170 
2171 	vp = fp->f_vnode;
2172 	aio_func = check_vp(vp, mode);
2173 	if (aio_func == NULL) {
2174 		releasef(fd);
2175 		return (EBADFD);
2176 	}
2177 	if (run_mode == AIO_LARGEFILE)
2178 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
2179 	else
2180 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
2181 
2182 	if (error) {
2183 		releasef(fd);
2184 		return (error);
2185 	}
2186 	/*
2187 	 * enable polling on this request if the opcode has
2188 	 * the AIO poll bit set
2189 	 */
2190 	if (opcode & AIO_POLL_BIT)
2191 		reqp->aio_req_flags |= AIO_POLL;
2192 
2193 	if (model == DATAMODEL_NATIVE)
2194 		reqp->aio_req_iocb.iocb = aiocb_arg;
2195 #ifdef  _SYSCALL32_IMPL
2196 	else
2197 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
2198 #endif
2199 
2200 	if (aio_use_port) {
2201 		int event = (run_mode == AIO_LARGEFILE)?
2202 		    ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
2203 		    ((mode == FREAD)? AIOAREAD : AIOAWRITE);
2204 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
2205 	}
2206 
2207 	/*
2208 	 * send the request to driver.
2209 	 */
2210 	if (error == 0) {
2211 		if (bufsize == 0) {
2212 			clear_active_fd(fd);
2213 			aio_zerolen(reqp);
2214 			return (0);
2215 		}
2216 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2217 	}
2218 
2219 	/*
2220 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
2221 	 * is released by the aio_cleanup_thread() when the IO has
2222 	 * completed.
2223 	 */
2224 	if (error) {
2225 		releasef(fd);
2226 		mutex_enter(&aiop->aio_mutex);
2227 		aio_deq(&aiop->aio_portpending, reqp);
2228 		aio_req_free(aiop, reqp);
2229 		aiop->aio_pending--;
2230 		if (aiop->aio_flags & AIO_REQ_BLOCK)
2231 			cv_signal(&aiop->aio_cleanupcv);
2232 		mutex_exit(&aiop->aio_mutex);
2233 		return (error);
2234 	}
2235 	clear_active_fd(fd);
2236 	return (0);
2237 }
2238 
2239 
2240 /*
2241  * set error for a list IO entry that failed.
2242  */
2243 static void
2244 lio_set_error(aio_req_t *reqp)
2245 {
2246 	aio_t *aiop = curproc->p_aio;
2247 
2248 	if (aiop == NULL)
2249 		return;
2250 
2251 	mutex_enter(&aiop->aio_mutex);
2252 	aio_deq(&aiop->aio_portpending, reqp);
2253 	aiop->aio_pending--;
2254 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2255 	reqp->aio_req_flags |= AIO_PHYSIODONE;
2256 	/*
2257 	 * Need to free the request now as its never
2258 	 * going to get on the done queue
2259 	 *
2260 	 * Note: aio_outstanding is decremented in
2261 	 *	 aio_req_free()
2262 	 */
2263 	aio_req_free(aiop, reqp);
2264 	if (aiop->aio_flags & AIO_REQ_BLOCK)
2265 		cv_signal(&aiop->aio_cleanupcv);
2266 	mutex_exit(&aiop->aio_mutex);
2267 }
2268 
2269 /*
2270  * check if a specified request is done, and remove it from
2271  * the done queue. otherwise remove anybody from the done queue
2272  * if NULL is specified.
2273  */
2274 static aio_req_t *
2275 aio_req_done(void *resultp)
2276 {
2277 	aio_req_t **bucket;
2278 	aio_req_t *ent;
2279 	aio_t *aiop = curproc->p_aio;
2280 	long index;
2281 
2282 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2283 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2284 
2285 	if (resultp) {
2286 		index = AIO_HASH(resultp);
2287 		bucket = &aiop->aio_hash[index];
2288 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2289 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
2290 				if (ent->aio_req_flags & AIO_DONEQ) {
2291 					return (aio_req_remove(ent));
2292 				}
2293 				return (NULL);
2294 			}
2295 		}
2296 		/* no match, resultp is invalid */
2297 		return (NULL);
2298 	}
2299 	return (aio_req_remove(NULL));
2300 }
2301 
2302 /*
2303  * determine if a user-level resultp pointer is associated with an
2304  * active IO request. Zero is returned when the request is done,
2305  * and the request is removed from the done queue. Only when the
2306  * return value is zero, is the "reqp" pointer valid. One is returned
2307  * when the request is inprogress. Two is returned when the request
2308  * is invalid.
2309  */
2310 static int
2311 aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
2312 {
2313 	aio_req_t **bucket;
2314 	aio_req_t *ent;
2315 	aio_t *aiop = curproc->p_aio;
2316 	long index;
2317 
2318 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2319 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2320 
2321 	index = AIO_HASH(resultp);
2322 	bucket = &aiop->aio_hash[index];
2323 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2324 		if (ent->aio_req_resultp == resultp) {
2325 			if (ent->aio_req_flags & AIO_DONEQ) {
2326 				*reqp = aio_req_remove(ent);
2327 				return (0);
2328 			}
2329 			return (1);
2330 		}
2331 	}
2332 	/* no match, resultp is invalid */
2333 	return (2);
2334 }
2335 
2336 /*
2337  * remove a request from the done queue.
2338  */
2339 static aio_req_t *
2340 aio_req_remove(aio_req_t *reqp)
2341 {
2342 	aio_t *aiop = curproc->p_aio;
2343 
2344 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2345 
2346 	if (reqp != NULL) {
2347 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2348 		if (reqp->aio_req_next == reqp) {
2349 			/* only one request on queue */
2350 			if (reqp ==  aiop->aio_doneq) {
2351 				aiop->aio_doneq = NULL;
2352 			} else {
2353 				ASSERT(reqp == aiop->aio_cleanupq);
2354 				aiop->aio_cleanupq = NULL;
2355 			}
2356 		} else {
2357 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2358 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2359 			/*
2360 			 * The request can be either on the aio_doneq or the
2361 			 * aio_cleanupq
2362 			 */
2363 			if (reqp == aiop->aio_doneq)
2364 				aiop->aio_doneq = reqp->aio_req_next;
2365 
2366 			if (reqp == aiop->aio_cleanupq)
2367 				aiop->aio_cleanupq = reqp->aio_req_next;
2368 		}
2369 		reqp->aio_req_flags &= ~AIO_DONEQ;
2370 		reqp->aio_req_next = NULL;
2371 		reqp->aio_req_prev = NULL;
2372 	} else if ((reqp = aiop->aio_doneq) != NULL) {
2373 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2374 		if (reqp == reqp->aio_req_next) {
2375 			/* only one request on queue */
2376 			aiop->aio_doneq = NULL;
2377 		} else {
2378 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2379 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2380 			aiop->aio_doneq = reqp->aio_req_next;
2381 		}
2382 		reqp->aio_req_flags &= ~AIO_DONEQ;
2383 		reqp->aio_req_next = NULL;
2384 		reqp->aio_req_prev = NULL;
2385 	}
2386 	if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
2387 		cv_broadcast(&aiop->aio_waitcv);
2388 	return (reqp);
2389 }
2390 
2391 static int
2392 aio_req_setup(
2393 	aio_req_t	**reqpp,
2394 	aio_t 		*aiop,
2395 	aiocb_t 	*arg,
2396 	aio_result_t 	*resultp,
2397 	vnode_t		*vp)
2398 {
2399 	sigqueue_t	*sqp = NULL;
2400 	aio_req_t 	*reqp;
2401 	struct uio 	*uio;
2402 	struct sigevent *sigev;
2403 	int		error;
2404 
2405 	sigev = &arg->aio_sigevent;
2406 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
2407 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
2408 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2409 		if (sqp == NULL)
2410 			return (EAGAIN);
2411 		sqp->sq_func = NULL;
2412 		sqp->sq_next = NULL;
2413 		sqp->sq_info.si_code = SI_ASYNCIO;
2414 		sqp->sq_info.si_pid = curproc->p_pid;
2415 		sqp->sq_info.si_ctid = PRCTID(curproc);
2416 		sqp->sq_info.si_zoneid = getzoneid();
2417 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
2418 		sqp->sq_info.si_signo = sigev->sigev_signo;
2419 		sqp->sq_info.si_value = sigev->sigev_value;
2420 	}
2421 
2422 	mutex_enter(&aiop->aio_mutex);
2423 
2424 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
2425 		mutex_exit(&aiop->aio_mutex);
2426 		if (sqp)
2427 			kmem_free(sqp, sizeof (sigqueue_t));
2428 		return (EIO);
2429 	}
2430 	/*
2431 	 * get an aio_reqp from the free list or allocate one
2432 	 * from dynamic memory.
2433 	 */
2434 	if (error = aio_req_alloc(&reqp, resultp)) {
2435 		mutex_exit(&aiop->aio_mutex);
2436 		if (sqp)
2437 			kmem_free(sqp, sizeof (sigqueue_t));
2438 		return (error);
2439 	}
2440 	aiop->aio_pending++;
2441 	aiop->aio_outstanding++;
2442 	reqp->aio_req_flags = AIO_PENDING;
2443 	if (sigev->sigev_notify == SIGEV_THREAD ||
2444 	    sigev->sigev_notify == SIGEV_PORT)
2445 		aio_enq(&aiop->aio_portpending, reqp, 0);
2446 	mutex_exit(&aiop->aio_mutex);
2447 	/*
2448 	 * initialize aio request.
2449 	 */
2450 	reqp->aio_req_fd = arg->aio_fildes;
2451 	reqp->aio_req_sigqp = sqp;
2452 	reqp->aio_req_iocb.iocb = NULL;
2453 	reqp->aio_req_lio = NULL;
2454 	reqp->aio_req_buf.b_file = vp;
2455 	uio = reqp->aio_req.aio_uio;
2456 	uio->uio_iovcnt = 1;
2457 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
2458 	uio->uio_iov->iov_len = arg->aio_nbytes;
2459 	uio->uio_loffset = arg->aio_offset;
2460 	*reqpp = reqp;
2461 	return (0);
2462 }
2463 
2464 /*
2465  * Allocate p_aio struct.
2466  */
2467 static aio_t *
2468 aio_aiop_alloc(void)
2469 {
2470 	aio_t	*aiop;
2471 
2472 	ASSERT(MUTEX_HELD(&curproc->p_lock));
2473 
2474 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
2475 	if (aiop) {
2476 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
2477 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
2478 									NULL);
2479 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
2480 	}
2481 	return (aiop);
2482 }
2483 
2484 /*
2485  * Allocate an aio_req struct.
2486  */
2487 static int
2488 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
2489 {
2490 	aio_req_t *reqp;
2491 	aio_t *aiop = curproc->p_aio;
2492 
2493 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2494 
2495 	if ((reqp = aiop->aio_free) != NULL) {
2496 		aiop->aio_free = reqp->aio_req_next;
2497 		bzero(reqp, sizeof (*reqp));
2498 	} else {
2499 		/*
2500 		 * Check whether memory is getting tight.
2501 		 * This is a temporary mechanism to avoid memory
2502 		 * exhaustion by a single process until we come up
2503 		 * with a per process solution such as setrlimit().
2504 		 */
2505 		if (freemem < desfree)
2506 			return (EAGAIN);
2507 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
2508 		if (reqp == NULL)
2509 			return (EAGAIN);
2510 	}
2511 	reqp->aio_req.aio_uio = &reqp->aio_req_uio;
2512 	reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
2513 	reqp->aio_req.aio_private = reqp;
2514 	reqp->aio_req_buf.b_offset = -1;
2515 	reqp->aio_req_resultp = resultp;
2516 	if (aio_hash_insert(reqp, aiop)) {
2517 		reqp->aio_req_next = aiop->aio_free;
2518 		aiop->aio_free = reqp;
2519 		return (EINVAL);
2520 	}
2521 	*nreqp = reqp;
2522 	return (0);
2523 }
2524 
2525 /*
2526  * Allocate an aio_lio_t struct.
2527  */
2528 static int
2529 aio_lio_alloc(aio_lio_t **head)
2530 {
2531 	aio_lio_t *liop;
2532 	aio_t *aiop = curproc->p_aio;
2533 
2534 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2535 
2536 	if ((liop = aiop->aio_lio_free) != NULL) {
2537 		aiop->aio_lio_free = liop->lio_next;
2538 	} else {
2539 		/*
2540 		 * Check whether memory is getting tight.
2541 		 * This is a temporary mechanism to avoid memory
2542 		 * exhaustion by a single process until we come up
2543 		 * with a per process solution such as setrlimit().
2544 		 */
2545 		if (freemem < desfree)
2546 			return (EAGAIN);
2547 
2548 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
2549 		if (liop == NULL)
2550 			return (EAGAIN);
2551 	}
2552 	*head = liop;
2553 	return (0);
2554 }
2555 
2556 /*
2557  * this is a special per-process thread that is only activated if
2558  * the process is unmapping a segment with outstanding aio. normally,
2559  * the process will have completed the aio before unmapping the
2560  * segment. If the process does unmap a segment with outstanding aio,
2561  * this special thread will guarentee that the locked pages due to
2562  * aphysio() are released, thereby permitting the segment to be
2563  * unmapped. In addition to this, the cleanup thread is woken up
2564  * during DR operations to release the locked pages.
2565  */
2566 
2567 static int
2568 aio_cleanup_thread(aio_t *aiop)
2569 {
2570 	proc_t *p = curproc;
2571 	struct as *as = p->p_as;
2572 	int poked = 0;
2573 	kcondvar_t *cvp;
2574 	int exit_flag = 0;
2575 	int rqclnup = 0;
2576 
2577 	sigfillset(&curthread->t_hold);
2578 	sigdiffset(&curthread->t_hold, &cantmask);
2579 	for (;;) {
2580 		/*
2581 		 * if a segment is being unmapped, and the current
2582 		 * process's done queue is not empty, then every request
2583 		 * on the doneq with locked resources should be forced
2584 		 * to release their locks. By moving the doneq request
2585 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
2586 		 * and place requests back onto the doneq. All requests
2587 		 * processed by aio_cleanup() will have their physical
2588 		 * resources unlocked.
2589 		 */
2590 		mutex_enter(&aiop->aio_mutex);
2591 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
2592 			aiop->aio_flags |= AIO_CLEANUP;
2593 			mutex_enter(&as->a_contents);
2594 			if (aiop->aio_rqclnup) {
2595 				aiop->aio_rqclnup = 0;
2596 				rqclnup = 1;
2597 			}
2598 
2599 			if ((rqclnup || AS_ISUNMAPWAIT(as)) &&
2600 			    aiop->aio_doneq) {
2601 				aio_req_t *doneqhead = aiop->aio_doneq;
2602 				mutex_exit(&as->a_contents);
2603 				aiop->aio_doneq = NULL;
2604 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
2605 			} else {
2606 				mutex_exit(&as->a_contents);
2607 			}
2608 		}
2609 		mutex_exit(&aiop->aio_mutex);
2610 		aio_cleanup(AIO_CLEANUP_THREAD);
2611 		/*
2612 		 * thread should block on the cleanupcv while
2613 		 * AIO_CLEANUP is set.
2614 		 */
2615 		cvp = &aiop->aio_cleanupcv;
2616 		mutex_enter(&aiop->aio_mutex);
2617 
2618 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
2619 		    aiop->aio_notifyq != NULL ||
2620 		    aiop->aio_portcleanupq != NULL) {
2621 			mutex_exit(&aiop->aio_mutex);
2622 			continue;
2623 		}
2624 		mutex_enter(&as->a_contents);
2625 
2626 		/*
2627 		 * AIO_CLEANUP determines when the cleanup thread
2628 		 * should be active. This flag is set when
2629 		 * the cleanup thread is awakened by as_unmap() or
2630 		 * due to DR operations.
2631 		 * The flag is cleared when the blocking as_unmap()
2632 		 * that originally awakened us is allowed to
2633 		 * complete. as_unmap() blocks when trying to
2634 		 * unmap a segment that has SOFTLOCKed pages. when
2635 		 * the segment's pages are all SOFTUNLOCKed,
2636 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2637 		 *
2638 		 * In case of cleanup request by DR, the flag is cleared
2639 		 * once all the pending aio requests have been processed.
2640 		 *
2641 		 * The flag shouldn't be cleared right away if the
2642 		 * cleanup thread was interrupted because the process
2643 		 * is doing forkall(). This happens when cv_wait_sig()
2644 		 * returns zero, because it was awakened by a pokelwps().
2645 		 * If the process is not exiting, it must be doing forkall().
2646 		 */
2647 		if ((poked == 0) &&
2648 			((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2649 					(aiop->aio_pending == 0))) {
2650 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
2651 			cvp = &as->a_cv;
2652 			rqclnup = 0;
2653 		}
2654 		mutex_exit(&aiop->aio_mutex);
2655 		if (poked) {
2656 			/*
2657 			 * If the process is exiting/killed, don't return
2658 			 * immediately without waiting for pending I/O's
2659 			 * and releasing the page locks.
2660 			 */
2661 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
2662 				/*
2663 				 * If exit_flag is set, then it is
2664 				 * safe to exit because we have released
2665 				 * page locks of completed I/O's.
2666 				 */
2667 				if (exit_flag)
2668 					break;
2669 
2670 				mutex_exit(&as->a_contents);
2671 
2672 				/*
2673 				 * Wait for all the pending aio to complete.
2674 				 */
2675 				mutex_enter(&aiop->aio_mutex);
2676 				aiop->aio_flags |= AIO_REQ_BLOCK;
2677 				while (aiop->aio_pending != 0)
2678 					cv_wait(&aiop->aio_cleanupcv,
2679 						&aiop->aio_mutex);
2680 				mutex_exit(&aiop->aio_mutex);
2681 				exit_flag = 1;
2682 				continue;
2683 			} else if (p->p_flag &
2684 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
2685 				/*
2686 				 * hold LWP until it
2687 				 * is continued.
2688 				 */
2689 				mutex_exit(&as->a_contents);
2690 				mutex_enter(&p->p_lock);
2691 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
2692 				mutex_exit(&p->p_lock);
2693 				poked = 0;
2694 				continue;
2695 			}
2696 		} else {
2697 			/*
2698 			 * When started this thread will sleep on as->a_cv.
2699 			 * as_unmap will awake this thread if the
2700 			 * segment has SOFTLOCKed pages (poked = 0).
2701 			 * 1. pokelwps() awakes this thread =>
2702 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
2703 			 * 2. as_unmap awakes this thread =>
2704 			 *    to break the loop it is necessary that
2705 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
2706 			 *	memory to be unlocked)
2707 			 *    - AIO_CLEANUP is not set
2708 			 *	(if AIO_CLEANUP is set we have to wait for
2709 			 *	pending requests. aio_done will send a signal
2710 			 *	for every request which completes to continue
2711 			 *	unmapping the corresponding address range)
2712 			 * 3. A cleanup request will wake this thread up, ex.
2713 			 *    by the DR operations. The aio_rqclnup flag will
2714 			 *    be set.
2715 			 */
2716 			while (poked == 0) {
2717 				/*
2718 				 * we need to handle cleanup requests
2719 				 * that come in after we had just cleaned up,
2720 				 * so that we do cleanup of any new aio
2721 				 * requests that got completed and have
2722 				 * locked resources.
2723 				 */
2724 				if ((aiop->aio_rqclnup ||
2725 					(AS_ISUNMAPWAIT(as) != 0)) &&
2726 					(aiop->aio_flags & AIO_CLEANUP) == 0)
2727 					break;
2728 				poked = !cv_wait_sig(cvp, &as->a_contents);
2729 				if (AS_ISUNMAPWAIT(as) == 0)
2730 					cv_signal(cvp);
2731 				if (aiop->aio_outstanding != 0)
2732 					break;
2733 			}
2734 		}
2735 		mutex_exit(&as->a_contents);
2736 	}
2737 exit:
2738 	mutex_exit(&as->a_contents);
2739 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
2740 	aston(curthread);	/* make thread do post_syscall */
2741 	return (0);
2742 }
2743 
2744 /*
2745  * save a reference to a user's outstanding aio in a hash list.
2746  */
2747 static int
2748 aio_hash_insert(
2749 	aio_req_t *aio_reqp,
2750 	aio_t *aiop)
2751 {
2752 	long index;
2753 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
2754 	aio_req_t *current;
2755 	aio_req_t **nextp;
2756 
2757 	index = AIO_HASH(resultp);
2758 	nextp = &aiop->aio_hash[index];
2759 	while ((current = *nextp) != NULL) {
2760 		if (current->aio_req_resultp == resultp)
2761 			return (DUPLICATE);
2762 		nextp = &current->aio_hash_next;
2763 	}
2764 	*nextp = aio_reqp;
2765 	aio_reqp->aio_hash_next = NULL;
2766 	return (0);
2767 }
2768 
2769 static int
2770 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
2771     cred_t *)
2772 {
2773 	struct snode *sp;
2774 	dev_t		dev;
2775 	struct cb_ops  	*cb;
2776 	major_t		major;
2777 	int		(*aio_func)();
2778 
2779 	dev = vp->v_rdev;
2780 	major = getmajor(dev);
2781 
2782 	/*
2783 	 * return NULL for requests to files and STREAMs so
2784 	 * that libaio takes care of them.
2785 	 */
2786 	if (vp->v_type == VCHR) {
2787 		/* no stream device for kaio */
2788 		if (STREAMSTAB(major)) {
2789 			return (NULL);
2790 		}
2791 	} else {
2792 		return (NULL);
2793 	}
2794 
2795 	/*
2796 	 * Check old drivers which do not have async I/O entry points.
2797 	 */
2798 	if (devopsp[major]->devo_rev < 3)
2799 		return (NULL);
2800 
2801 	cb = devopsp[major]->devo_cb_ops;
2802 
2803 	if (cb->cb_rev < 1)
2804 		return (NULL);
2805 
2806 	/*
2807 	 * Check whether this device is a block device.
2808 	 * Kaio is not supported for devices like tty.
2809 	 */
2810 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
2811 		return (NULL);
2812 
2813 	/*
2814 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
2815 	 * We cannot call the driver directly. Instead return the
2816 	 * PXFS functions.
2817 	 */
2818 
2819 	if (IS_PXFSVP(vp)) {
2820 		if (mode & FREAD)
2821 			return (clpxfs_aio_read);
2822 		else
2823 			return (clpxfs_aio_write);
2824 	}
2825 	if (mode & FREAD)
2826 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
2827 	else
2828 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
2829 
2830 	/*
2831 	 * Do we need this ?
2832 	 * nodev returns ENXIO anyway.
2833 	 */
2834 	if (aio_func == nodev)
2835 		return (NULL);
2836 
2837 	sp = VTOS(vp);
2838 	smark(sp, SACC);
2839 	return (aio_func);
2840 }
2841 
2842 /*
2843  * Clustering: We want check_vp to return a function prototyped
2844  * correctly that will be common to both PXFS and regular case.
2845  * We define this intermediate function that will do the right
2846  * thing for driver cases.
2847  */
2848 
2849 static int
2850 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2851 {
2852 	dev_t dev;
2853 	struct cb_ops  	*cb;
2854 
2855 	ASSERT(vp->v_type == VCHR);
2856 	ASSERT(!IS_PXFSVP(vp));
2857 	dev = VTOS(vp)->s_dev;
2858 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
2859 
2860 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
2861 
2862 	ASSERT(cb->cb_awrite != nodev);
2863 	return ((*cb->cb_awrite)(dev, aio, cred_p));
2864 }
2865 
2866 /*
2867  * Clustering: We want check_vp to return a function prototyped
2868  * correctly that will be common to both PXFS and regular case.
2869  * We define this intermediate function that will do the right
2870  * thing for driver cases.
2871  */
2872 
2873 static int
2874 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2875 {
2876 	dev_t dev;
2877 	struct cb_ops  	*cb;
2878 
2879 	ASSERT(vp->v_type == VCHR);
2880 	ASSERT(!IS_PXFSVP(vp));
2881 	dev = VTOS(vp)->s_dev;
2882 	ASSERT(!STREAMSTAB(getmajor(dev)));
2883 
2884 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
2885 
2886 	ASSERT(cb->cb_aread != nodev);
2887 	return ((*cb->cb_aread)(dev, aio, cred_p));
2888 }
2889 
2890 /*
2891  * This routine is called when a largefile call is made by a 32bit
2892  * process on a ILP32 or LP64 kernel. All 64bit processes are large
2893  * file by definition and will call alio() instead.
2894  */
2895 static int
2896 alioLF(
2897 	int		mode_arg,
2898 	void		*aiocb_arg,
2899 	int		nent,
2900 	void		*sigev)
2901 {
2902 	file_t		*fp;
2903 	file_t		*prev_fp = NULL;
2904 	int		prev_mode = -1;
2905 	struct vnode	*vp;
2906 	aio_lio_t	*head;
2907 	aio_req_t	*reqp;
2908 	aio_t		*aiop;
2909 	caddr_t		cbplist;
2910 	aiocb64_32_t	cb64;
2911 	aiocb64_32_t	*aiocb = &cb64;
2912 	aiocb64_32_t	*cbp;
2913 	caddr32_t	*ucbp;
2914 #ifdef _LP64
2915 	aiocb_t		aiocb_n;
2916 #endif
2917 	struct sigevent32	sigevk;
2918 	sigqueue_t	*sqp;
2919 	int		(*aio_func)();
2920 	int		mode;
2921 	int		error = 0;
2922 	int		aio_errors = 0;
2923 	int		i;
2924 	size_t		ssize;
2925 	int		deadhead = 0;
2926 	int		aio_notsupported = 0;
2927 	int		lio_head_port;
2928 	int		aio_port;
2929 	int		aio_thread;
2930 	port_kevent_t	*pkevtp = NULL;
2931 	port_notify32_t	pnotify;
2932 	int		event;
2933 
2934 	aiop = curproc->p_aio;
2935 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
2936 		return (EINVAL);
2937 
2938 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
2939 
2940 	ssize = (sizeof (caddr32_t) * nent);
2941 	cbplist = kmem_alloc(ssize, KM_SLEEP);
2942 	ucbp = (caddr32_t *)cbplist;
2943 
2944 	if (copyin(aiocb_arg, cbplist, ssize) ||
2945 	    (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
2946 		kmem_free(cbplist, ssize);
2947 		return (EFAULT);
2948 	}
2949 
2950 	/* Event Ports  */
2951 	if (sigev &&
2952 	    (sigevk.sigev_notify == SIGEV_THREAD ||
2953 	    sigevk.sigev_notify == SIGEV_PORT)) {
2954 		if (sigevk.sigev_notify == SIGEV_THREAD) {
2955 			pnotify.portnfy_port = sigevk.sigev_signo;
2956 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
2957 		} else if (copyin(
2958 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
2959 		    &pnotify, sizeof (pnotify))) {
2960 			kmem_free(cbplist, ssize);
2961 			return (EFAULT);
2962 		}
2963 		error = port_alloc_event(pnotify.portnfy_port,
2964 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
2965 		if (error) {
2966 			if (error == ENOMEM || error == EAGAIN)
2967 				error = EAGAIN;
2968 			else
2969 				error = EINVAL;
2970 			kmem_free(cbplist, ssize);
2971 			return (error);
2972 		}
2973 		lio_head_port = pnotify.portnfy_port;
2974 	}
2975 
2976 	/*
2977 	 * a list head should be allocated if notification is
2978 	 * enabled for this list.
2979 	 */
2980 	head = NULL;
2981 
2982 	if (mode_arg == LIO_WAIT || sigev) {
2983 		mutex_enter(&aiop->aio_mutex);
2984 		error = aio_lio_alloc(&head);
2985 		mutex_exit(&aiop->aio_mutex);
2986 		if (error)
2987 			goto done;
2988 		deadhead = 1;
2989 		head->lio_nent = nent;
2990 		head->lio_refcnt = nent;
2991 		head->lio_port = -1;
2992 		head->lio_portkev = NULL;
2993 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
2994 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
2995 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2996 			if (sqp == NULL) {
2997 				error = EAGAIN;
2998 				goto done;
2999 			}
3000 			sqp->sq_func = NULL;
3001 			sqp->sq_next = NULL;
3002 			sqp->sq_info.si_code = SI_ASYNCIO;
3003 			sqp->sq_info.si_pid = curproc->p_pid;
3004 			sqp->sq_info.si_ctid = PRCTID(curproc);
3005 			sqp->sq_info.si_zoneid = getzoneid();
3006 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3007 			sqp->sq_info.si_signo = sigevk.sigev_signo;
3008 			sqp->sq_info.si_value.sival_int =
3009 			    sigevk.sigev_value.sival_int;
3010 			head->lio_sigqp = sqp;
3011 		} else {
3012 			head->lio_sigqp = NULL;
3013 		}
3014 		if (pkevtp) {
3015 			/*
3016 			 * Prepare data to send when list of aiocb's
3017 			 * has completed.
3018 			 */
3019 			port_init_event(pkevtp, (uintptr_t)sigev,
3020 			    (void *)(uintptr_t)pnotify.portnfy_user,
3021 			    NULL, head);
3022 			pkevtp->portkev_events = AIOLIO64;
3023 			head->lio_portkev = pkevtp;
3024 			head->lio_port = pnotify.portnfy_port;
3025 		}
3026 	}
3027 
3028 	for (i = 0; i < nent; i++, ucbp++) {
3029 
3030 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
3031 		/* skip entry if it can't be copied. */
3032 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
3033 			if (head) {
3034 				mutex_enter(&aiop->aio_mutex);
3035 				head->lio_nent--;
3036 				head->lio_refcnt--;
3037 				mutex_exit(&aiop->aio_mutex);
3038 			}
3039 			continue;
3040 		}
3041 
3042 		/* skip if opcode for aiocb is LIO_NOP */
3043 		mode = aiocb->aio_lio_opcode;
3044 		if (mode == LIO_NOP) {
3045 			cbp = NULL;
3046 			if (head) {
3047 				mutex_enter(&aiop->aio_mutex);
3048 				head->lio_nent--;
3049 				head->lio_refcnt--;
3050 				mutex_exit(&aiop->aio_mutex);
3051 			}
3052 			continue;
3053 		}
3054 
3055 		/* increment file descriptor's ref count. */
3056 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3057 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3058 			if (head) {
3059 				mutex_enter(&aiop->aio_mutex);
3060 				head->lio_nent--;
3061 				head->lio_refcnt--;
3062 				mutex_exit(&aiop->aio_mutex);
3063 			}
3064 			aio_errors++;
3065 			continue;
3066 		}
3067 
3068 		/*
3069 		 * check the permission of the partition
3070 		 */
3071 		if ((fp->f_flag & mode) == 0) {
3072 			releasef(aiocb->aio_fildes);
3073 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3074 			if (head) {
3075 				mutex_enter(&aiop->aio_mutex);
3076 				head->lio_nent--;
3077 				head->lio_refcnt--;
3078 				mutex_exit(&aiop->aio_mutex);
3079 			}
3080 			aio_errors++;
3081 			continue;
3082 		}
3083 
3084 		/*
3085 		 * common case where requests are to the same fd
3086 		 * for the same r/w operation
3087 		 * for UFS, need to set EBADFD
3088 		 */
3089 		vp = fp->f_vnode;
3090 		if (fp != prev_fp || mode != prev_mode) {
3091 			aio_func = check_vp(vp, mode);
3092 			if (aio_func == NULL) {
3093 				prev_fp = NULL;
3094 				releasef(aiocb->aio_fildes);
3095 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
3096 				aio_notsupported++;
3097 				if (head) {
3098 					mutex_enter(&aiop->aio_mutex);
3099 					head->lio_nent--;
3100 					head->lio_refcnt--;
3101 					mutex_exit(&aiop->aio_mutex);
3102 				}
3103 				continue;
3104 			} else {
3105 				prev_fp = fp;
3106 				prev_mode = mode;
3107 			}
3108 		}
3109 
3110 #ifdef	_LP64
3111 		aiocb_LFton(aiocb, &aiocb_n);
3112 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
3113 		    (aio_result_t *)&cbp->aio_resultp, vp);
3114 #else
3115 		error = aio_req_setupLF(&reqp, aiop, aiocb,
3116 		    (aio_result_t *)&cbp->aio_resultp, vp);
3117 #endif  /* _LP64 */
3118 		if (error) {
3119 			releasef(aiocb->aio_fildes);
3120 			lio_set_uerror(&cbp->aio_resultp, error);
3121 			if (head) {
3122 				mutex_enter(&aiop->aio_mutex);
3123 				head->lio_nent--;
3124 				head->lio_refcnt--;
3125 				mutex_exit(&aiop->aio_mutex);
3126 			}
3127 			aio_errors++;
3128 			continue;
3129 		}
3130 
3131 		reqp->aio_req_lio = head;
3132 		deadhead = 0;
3133 
3134 		/*
3135 		 * Set the errno field now before sending the request to
3136 		 * the driver to avoid a race condition
3137 		 */
3138 		(void) suword32(&cbp->aio_resultp.aio_errno,
3139 		    EINPROGRESS);
3140 
3141 		reqp->aio_req_iocb.iocb32 = *ucbp;
3142 
3143 		event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
3144 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3145 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3146 		if (aio_port | aio_thread) {
3147 			port_kevent_t *lpkevp;
3148 			/*
3149 			 * Prepare data to send with each aiocb completed.
3150 			 */
3151 			if (aio_port) {
3152 				void *paddr = (void *)(uintptr_t)
3153 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3154 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3155 					error = EFAULT;
3156 			} else {	/* aio_thread */
3157 				pnotify.portnfy_port =
3158 				    aiocb->aio_sigevent.sigev_signo;
3159 				pnotify.portnfy_user =
3160 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3161 			}
3162 			if (error)
3163 				/* EMPTY */;
3164 			else if (pkevtp != NULL &&
3165 			    pnotify.portnfy_port == lio_head_port)
3166 				error = port_dup_event(pkevtp, &lpkevp,
3167 				    PORT_ALLOC_DEFAULT);
3168 			else
3169 				error = port_alloc_event(pnotify.portnfy_port,
3170 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3171 				    &lpkevp);
3172 			if (error == 0) {
3173 				port_init_event(lpkevp, (uintptr_t)*ucbp,
3174 				    (void *)(uintptr_t)pnotify.portnfy_user,
3175 				    aio_port_callback, reqp);
3176 				lpkevp->portkev_events = event;
3177 				reqp->aio_req_portkev = lpkevp;
3178 				reqp->aio_req_port = pnotify.portnfy_port;
3179 			}
3180 		}
3181 
3182 		/*
3183 		 * send the request to driver.
3184 		 */
3185 		if (error == 0) {
3186 			if (aiocb->aio_nbytes == 0) {
3187 				clear_active_fd(aiocb->aio_fildes);
3188 				aio_zerolen(reqp);
3189 				continue;
3190 			}
3191 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3192 			    CRED());
3193 		}
3194 
3195 		/*
3196 		 * the fd's ref count is not decremented until the IO has
3197 		 * completed unless there was an error.
3198 		 */
3199 		if (error) {
3200 			releasef(aiocb->aio_fildes);
3201 			lio_set_uerror(&cbp->aio_resultp, error);
3202 			if (head) {
3203 				mutex_enter(&aiop->aio_mutex);
3204 				head->lio_nent--;
3205 				head->lio_refcnt--;
3206 				mutex_exit(&aiop->aio_mutex);
3207 			}
3208 			if (error == ENOTSUP)
3209 				aio_notsupported++;
3210 			else
3211 				aio_errors++;
3212 			lio_set_error(reqp);
3213 		} else {
3214 			clear_active_fd(aiocb->aio_fildes);
3215 		}
3216 	}
3217 
3218 	if (aio_notsupported) {
3219 		error = ENOTSUP;
3220 	} else if (aio_errors) {
3221 		/*
3222 		 * return EIO if any request failed
3223 		 */
3224 		error = EIO;
3225 	}
3226 
3227 	if (mode_arg == LIO_WAIT) {
3228 		mutex_enter(&aiop->aio_mutex);
3229 		while (head->lio_refcnt > 0) {
3230 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3231 				mutex_exit(&aiop->aio_mutex);
3232 				error = EINTR;
3233 				goto done;
3234 			}
3235 		}
3236 		mutex_exit(&aiop->aio_mutex);
3237 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
3238 	}
3239 
3240 done:
3241 	kmem_free(cbplist, ssize);
3242 	if (deadhead) {
3243 		if (head->lio_sigqp)
3244 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3245 		if (head->lio_portkev)
3246 			port_free_event(head->lio_portkev);
3247 		kmem_free(head, sizeof (aio_lio_t));
3248 	}
3249 	return (error);
3250 }
3251 
3252 #ifdef  _SYSCALL32_IMPL
3253 static void
3254 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
3255 {
3256 	dest->aio_fildes = src->aio_fildes;
3257 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
3258 	dest->aio_nbytes = (size_t)src->aio_nbytes;
3259 	dest->aio_offset = (off_t)src->aio_offset;
3260 	dest->aio_reqprio = src->aio_reqprio;
3261 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3262 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3263 
3264 	/*
3265 	 * See comment in sigqueue32() on handling of 32-bit
3266 	 * sigvals in a 64-bit kernel.
3267 	 */
3268 	dest->aio_sigevent.sigev_value.sival_int =
3269 	    (int)src->aio_sigevent.sigev_value.sival_int;
3270 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3271 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
3272 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3273 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3274 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3275 	dest->aio_lio_opcode = src->aio_lio_opcode;
3276 	dest->aio_state = src->aio_state;
3277 	dest->aio__pad[0] = src->aio__pad[0];
3278 }
3279 #endif
3280 
3281 /*
3282  * This function is used only for largefile calls made by
3283  * 32 bit applications.
3284  */
3285 static int
3286 aio_req_setupLF(
3287 	aio_req_t	**reqpp,
3288 	aio_t		*aiop,
3289 	aiocb64_32_t	*arg,
3290 	aio_result_t	*resultp,
3291 	vnode_t		*vp)
3292 {
3293 	sigqueue_t	*sqp = NULL;
3294 	aio_req_t	*reqp;
3295 	struct uio	*uio;
3296 	struct sigevent32 *sigev;
3297 	int 		error;
3298 
3299 	sigev = &arg->aio_sigevent;
3300 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
3301 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
3302 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3303 		if (sqp == NULL)
3304 			return (EAGAIN);
3305 		sqp->sq_func = NULL;
3306 		sqp->sq_next = NULL;
3307 		sqp->sq_info.si_code = SI_ASYNCIO;
3308 		sqp->sq_info.si_pid = curproc->p_pid;
3309 		sqp->sq_info.si_ctid = PRCTID(curproc);
3310 		sqp->sq_info.si_zoneid = getzoneid();
3311 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3312 		sqp->sq_info.si_signo = sigev->sigev_signo;
3313 		sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
3314 	}
3315 
3316 	mutex_enter(&aiop->aio_mutex);
3317 
3318 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
3319 		mutex_exit(&aiop->aio_mutex);
3320 		if (sqp)
3321 			kmem_free(sqp, sizeof (sigqueue_t));
3322 		return (EIO);
3323 	}
3324 	/*
3325 	 * get an aio_reqp from the free list or allocate one
3326 	 * from dynamic memory.
3327 	 */
3328 	if (error = aio_req_alloc(&reqp, resultp)) {
3329 		mutex_exit(&aiop->aio_mutex);
3330 		if (sqp)
3331 			kmem_free(sqp, sizeof (sigqueue_t));
3332 		return (error);
3333 	}
3334 	aiop->aio_pending++;
3335 	aiop->aio_outstanding++;
3336 	reqp->aio_req_flags = AIO_PENDING;
3337 	if (sigev->sigev_notify == SIGEV_THREAD ||
3338 	    sigev->sigev_notify == SIGEV_PORT)
3339 		aio_enq(&aiop->aio_portpending, reqp, 0);
3340 	mutex_exit(&aiop->aio_mutex);
3341 	/*
3342 	 * initialize aio request.
3343 	 */
3344 	reqp->aio_req_fd = arg->aio_fildes;
3345 	reqp->aio_req_sigqp = sqp;
3346 	reqp->aio_req_iocb.iocb = NULL;
3347 	reqp->aio_req_lio = NULL;
3348 	reqp->aio_req_buf.b_file = vp;
3349 	uio = reqp->aio_req.aio_uio;
3350 	uio->uio_iovcnt = 1;
3351 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
3352 	uio->uio_iov->iov_len = arg->aio_nbytes;
3353 	uio->uio_loffset = arg->aio_offset;
3354 	*reqpp = reqp;
3355 	return (0);
3356 }
3357 
3358 /*
3359  * This routine is called when a non largefile call is made by a 32bit
3360  * process on a ILP32 or LP64 kernel.
3361  */
3362 static int
3363 alio32(
3364 	int		mode_arg,
3365 	void		*aiocb_arg,
3366 	int		nent,
3367 	void		*sigev)
3368 {
3369 	file_t		*fp;
3370 	file_t		*prev_fp = NULL;
3371 	int		prev_mode = -1;
3372 	struct vnode	*vp;
3373 	aio_lio_t	*head;
3374 	aio_req_t	*reqp;
3375 	aio_t		*aiop;
3376 	caddr_t		cbplist;
3377 	aiocb_t		cb;
3378 	aiocb_t		*aiocb = &cb;
3379 #ifdef	_LP64
3380 	aiocb32_t	*cbp;
3381 	caddr32_t	*ucbp;
3382 	aiocb32_t	cb32;
3383 	aiocb32_t	*aiocb32 = &cb32;
3384 	struct sigevent32	sigevk;
3385 #else
3386 	aiocb_t		*cbp, **ucbp;
3387 	struct sigevent	sigevk;
3388 #endif
3389 	sigqueue_t	*sqp;
3390 	int		(*aio_func)();
3391 	int		mode;
3392 	int		error = 0;
3393 	int		aio_errors = 0;
3394 	int		i;
3395 	size_t		ssize;
3396 	int		deadhead = 0;
3397 	int		aio_notsupported = 0;
3398 	int		lio_head_port;
3399 	int		aio_port;
3400 	int		aio_thread;
3401 	port_kevent_t	*pkevtp = NULL;
3402 #ifdef	_LP64
3403 	port_notify32_t	pnotify;
3404 #else
3405 	port_notify_t	pnotify;
3406 #endif
3407 	int		event;
3408 
3409 	aiop = curproc->p_aio;
3410 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
3411 		return (EINVAL);
3412 
3413 #ifdef	_LP64
3414 	ssize = (sizeof (caddr32_t) * nent);
3415 #else
3416 	ssize = (sizeof (aiocb_t *) * nent);
3417 #endif
3418 	cbplist = kmem_alloc(ssize, KM_SLEEP);
3419 	ucbp = (void *)cbplist;
3420 
3421 	if (copyin(aiocb_arg, cbplist, ssize) ||
3422 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
3423 		kmem_free(cbplist, ssize);
3424 		return (EFAULT);
3425 	}
3426 
3427 	/* Event Ports  */
3428 	if (sigev &&
3429 	    (sigevk.sigev_notify == SIGEV_THREAD ||
3430 	    sigevk.sigev_notify == SIGEV_PORT)) {
3431 		if (sigevk.sigev_notify == SIGEV_THREAD) {
3432 			pnotify.portnfy_port = sigevk.sigev_signo;
3433 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
3434 		} else if (copyin(
3435 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
3436 		    &pnotify, sizeof (pnotify))) {
3437 			kmem_free(cbplist, ssize);
3438 			return (EFAULT);
3439 		}
3440 		error = port_alloc_event(pnotify.portnfy_port,
3441 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
3442 		if (error) {
3443 			if (error == ENOMEM || error == EAGAIN)
3444 				error = EAGAIN;
3445 			else
3446 				error = EINVAL;
3447 			kmem_free(cbplist, ssize);
3448 			return (error);
3449 		}
3450 		lio_head_port = pnotify.portnfy_port;
3451 	}
3452 
3453 	/*
3454 	 * a list head should be allocated if notification is
3455 	 * enabled for this list.
3456 	 */
3457 	head = NULL;
3458 
3459 	if (mode_arg == LIO_WAIT || sigev) {
3460 		mutex_enter(&aiop->aio_mutex);
3461 		error = aio_lio_alloc(&head);
3462 		mutex_exit(&aiop->aio_mutex);
3463 		if (error)
3464 			goto done;
3465 		deadhead = 1;
3466 		head->lio_nent = nent;
3467 		head->lio_refcnt = nent;
3468 		head->lio_port = -1;
3469 		head->lio_portkev = NULL;
3470 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
3471 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
3472 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3473 			if (sqp == NULL) {
3474 				error = EAGAIN;
3475 				goto done;
3476 			}
3477 			sqp->sq_func = NULL;
3478 			sqp->sq_next = NULL;
3479 			sqp->sq_info.si_code = SI_ASYNCIO;
3480 			sqp->sq_info.si_pid = curproc->p_pid;
3481 			sqp->sq_info.si_ctid = PRCTID(curproc);
3482 			sqp->sq_info.si_zoneid = getzoneid();
3483 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3484 			sqp->sq_info.si_signo = sigevk.sigev_signo;
3485 			sqp->sq_info.si_value.sival_int =
3486 			    sigevk.sigev_value.sival_int;
3487 			head->lio_sigqp = sqp;
3488 		} else {
3489 			head->lio_sigqp = NULL;
3490 		}
3491 		if (pkevtp) {
3492 			/*
3493 			 * Prepare data to send when list of aiocb's has
3494 			 * completed.
3495 			 */
3496 			port_init_event(pkevtp, (uintptr_t)sigev,
3497 			    (void *)(uintptr_t)pnotify.portnfy_user,
3498 			    NULL, head);
3499 			pkevtp->portkev_events = AIOLIO;
3500 			head->lio_portkev = pkevtp;
3501 			head->lio_port = pnotify.portnfy_port;
3502 		}
3503 	}
3504 
3505 	for (i = 0; i < nent; i++, ucbp++) {
3506 
3507 		/* skip entry if it can't be copied. */
3508 #ifdef	_LP64
3509 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
3510 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
3511 #else
3512 		cbp = (aiocb_t *)*ucbp;
3513 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
3514 #endif
3515 		{
3516 			if (head) {
3517 				mutex_enter(&aiop->aio_mutex);
3518 				head->lio_nent--;
3519 				head->lio_refcnt--;
3520 				mutex_exit(&aiop->aio_mutex);
3521 			}
3522 			continue;
3523 		}
3524 #ifdef	_LP64
3525 		/*
3526 		 * copy 32 bit structure into 64 bit structure
3527 		 */
3528 		aiocb_32ton(aiocb32, aiocb);
3529 #endif /* _LP64 */
3530 
3531 		/* skip if opcode for aiocb is LIO_NOP */
3532 		mode = aiocb->aio_lio_opcode;
3533 		if (mode == LIO_NOP) {
3534 			cbp = NULL;
3535 			if (head) {
3536 				mutex_enter(&aiop->aio_mutex);
3537 				head->lio_nent--;
3538 				head->lio_refcnt--;
3539 				mutex_exit(&aiop->aio_mutex);
3540 			}
3541 			continue;
3542 		}
3543 
3544 		/* increment file descriptor's ref count. */
3545 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3546 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3547 			if (head) {
3548 				mutex_enter(&aiop->aio_mutex);
3549 				head->lio_nent--;
3550 				head->lio_refcnt--;
3551 				mutex_exit(&aiop->aio_mutex);
3552 			}
3553 			aio_errors++;
3554 			continue;
3555 		}
3556 
3557 		/*
3558 		 * check the permission of the partition
3559 		 */
3560 		if ((fp->f_flag & mode) == 0) {
3561 			releasef(aiocb->aio_fildes);
3562 			lio_set_uerror(&cbp->aio_resultp, EBADF);
3563 			if (head) {
3564 				mutex_enter(&aiop->aio_mutex);
3565 				head->lio_nent--;
3566 				head->lio_refcnt--;
3567 				mutex_exit(&aiop->aio_mutex);
3568 			}
3569 			aio_errors++;
3570 			continue;
3571 		}
3572 
3573 		/*
3574 		 * common case where requests are to the same fd
3575 		 * for the same r/w operation
3576 		 * for UFS, need to set EBADFD
3577 		 */
3578 		vp = fp->f_vnode;
3579 		if (fp != prev_fp || mode != prev_mode) {
3580 			aio_func = check_vp(vp, mode);
3581 			if (aio_func == NULL) {
3582 				prev_fp = NULL;
3583 				releasef(aiocb->aio_fildes);
3584 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
3585 				aio_notsupported++;
3586 				if (head) {
3587 					mutex_enter(&aiop->aio_mutex);
3588 					head->lio_nent--;
3589 					head->lio_refcnt--;
3590 					mutex_exit(&aiop->aio_mutex);
3591 				}
3592 				continue;
3593 			} else {
3594 				prev_fp = fp;
3595 				prev_mode = mode;
3596 			}
3597 		}
3598 
3599 		error = aio_req_setup(&reqp, aiop, aiocb,
3600 		    (aio_result_t *)&cbp->aio_resultp, vp);
3601 		if (error) {
3602 			releasef(aiocb->aio_fildes);
3603 			lio_set_uerror(&cbp->aio_resultp, error);
3604 			if (head) {
3605 				mutex_enter(&aiop->aio_mutex);
3606 				head->lio_nent--;
3607 				head->lio_refcnt--;
3608 				mutex_exit(&aiop->aio_mutex);
3609 			}
3610 			aio_errors++;
3611 			continue;
3612 		}
3613 
3614 		reqp->aio_req_lio = head;
3615 		deadhead = 0;
3616 
3617 		/*
3618 		 * Set the errno field now before sending the request to
3619 		 * the driver to avoid a race condition
3620 		 */
3621 		(void) suword32(&cbp->aio_resultp.aio_errno,
3622 		    EINPROGRESS);
3623 
3624 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
3625 
3626 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
3627 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3628 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3629 		if (aio_port | aio_thread) {
3630 			port_kevent_t *lpkevp;
3631 			/*
3632 			 * Prepare data to send with each aiocb completed.
3633 			 */
3634 #ifdef _LP64
3635 			if (aio_port) {
3636 				void *paddr = (void  *)(uintptr_t)
3637 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
3638 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3639 					error = EFAULT;
3640 			} else {	/* aio_thread */
3641 				pnotify.portnfy_port =
3642 				    aiocb32->aio_sigevent.sigev_signo;
3643 				pnotify.portnfy_user =
3644 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
3645 			}
3646 #else
3647 			if (aio_port) {
3648 				void *paddr =
3649 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3650 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
3651 					error = EFAULT;
3652 			} else {	/* aio_thread */
3653 				pnotify.portnfy_port =
3654 				    aiocb->aio_sigevent.sigev_signo;
3655 				pnotify.portnfy_user =
3656 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
3657 			}
3658 #endif
3659 			if (error)
3660 				/* EMPTY */;
3661 			else if (pkevtp != NULL &&
3662 			    pnotify.portnfy_port == lio_head_port)
3663 				error = port_dup_event(pkevtp, &lpkevp,
3664 				    PORT_ALLOC_DEFAULT);
3665 			else
3666 				error = port_alloc_event(pnotify.portnfy_port,
3667 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3668 				    &lpkevp);
3669 			if (error == 0) {
3670 				port_init_event(lpkevp, (uintptr_t)cbp,
3671 				    (void *)(uintptr_t)pnotify.portnfy_user,
3672 				    aio_port_callback, reqp);
3673 				lpkevp->portkev_events = event;
3674 				reqp->aio_req_portkev = lpkevp;
3675 				reqp->aio_req_port = pnotify.portnfy_port;
3676 			}
3677 		}
3678 
3679 		/*
3680 		 * send the request to driver.
3681 		 */
3682 		if (error == 0) {
3683 			if (aiocb->aio_nbytes == 0) {
3684 				clear_active_fd(aiocb->aio_fildes);
3685 				aio_zerolen(reqp);
3686 				continue;
3687 			}
3688 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3689 			    CRED());
3690 		}
3691 
3692 		/*
3693 		 * the fd's ref count is not decremented until the IO has
3694 		 * completed unless there was an error.
3695 		 */
3696 		if (error) {
3697 			releasef(aiocb->aio_fildes);
3698 			lio_set_uerror(&cbp->aio_resultp, error);
3699 			if (head) {
3700 				mutex_enter(&aiop->aio_mutex);
3701 				head->lio_nent--;
3702 				head->lio_refcnt--;
3703 				mutex_exit(&aiop->aio_mutex);
3704 			}
3705 			if (error == ENOTSUP)
3706 				aio_notsupported++;
3707 			else
3708 				aio_errors++;
3709 			lio_set_error(reqp);
3710 		} else {
3711 			clear_active_fd(aiocb->aio_fildes);
3712 		}
3713 	}
3714 
3715 	if (aio_notsupported) {
3716 		error = ENOTSUP;
3717 	} else if (aio_errors) {
3718 		/*
3719 		 * return EIO if any request failed
3720 		 */
3721 		error = EIO;
3722 	}
3723 
3724 	if (mode_arg == LIO_WAIT) {
3725 		mutex_enter(&aiop->aio_mutex);
3726 		while (head->lio_refcnt > 0) {
3727 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3728 				mutex_exit(&aiop->aio_mutex);
3729 				error = EINTR;
3730 				goto done;
3731 			}
3732 		}
3733 		mutex_exit(&aiop->aio_mutex);
3734 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
3735 	}
3736 
3737 done:
3738 	kmem_free(cbplist, ssize);
3739 	if (deadhead) {
3740 		if (head->lio_sigqp)
3741 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3742 		if (head->lio_portkev)
3743 			port_free_event(head->lio_portkev);
3744 		kmem_free(head, sizeof (aio_lio_t));
3745 	}
3746 	return (error);
3747 }
3748 
3749 
3750 #ifdef  _SYSCALL32_IMPL
3751 void
3752 aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
3753 {
3754 	dest->aio_fildes = src->aio_fildes;
3755 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
3756 	dest->aio_nbytes = (size_t)src->aio_nbytes;
3757 	dest->aio_offset = (off_t)src->aio_offset;
3758 	dest->aio_reqprio = src->aio_reqprio;
3759 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3760 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3761 
3762 	/*
3763 	 * See comment in sigqueue32() on handling of 32-bit
3764 	 * sigvals in a 64-bit kernel.
3765 	 */
3766 	dest->aio_sigevent.sigev_value.sival_int =
3767 	    (int)src->aio_sigevent.sigev_value.sival_int;
3768 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3769 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
3770 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3771 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3772 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3773 	dest->aio_lio_opcode = src->aio_lio_opcode;
3774 	dest->aio_state = src->aio_state;
3775 	dest->aio__pad[0] = src->aio__pad[0];
3776 }
3777 #endif /* _SYSCALL32_IMPL */
3778 
3779 /*
3780  * aio_port_callback() is called just before the event is retrieved from the
3781  * port. The task of this callback function is to finish the work of the
3782  * transaction for the application, it means :
3783  * - copyout transaction data to the application
3784  *	(this thread is running in the right process context)
3785  * - keep trace of the transaction (update of counters).
3786  * - free allocated buffers
3787  * The aiocb pointer is the object element of the port_kevent_t structure.
3788  *
3789  * flag :
3790  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
3791  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
3792  */
3793 
3794 /*ARGSUSED*/
3795 int
3796 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
3797 {
3798 	aio_t		*aiop = curproc->p_aio;
3799 	aio_req_t	*reqp = arg;
3800 	struct	iovec	*iov;
3801 	struct	buf	*bp;
3802 	void		*resultp;
3803 
3804 	if (pid != curproc->p_pid) {
3805 		/* wrong proc !!, can not deliver data here ... */
3806 		return (EACCES);
3807 	}
3808 
3809 	mutex_enter(&aiop->aio_portq_mutex);
3810 	reqp->aio_req_portkev = NULL;
3811 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
3812 	mutex_exit(&aiop->aio_portq_mutex);
3813 	aphysio_unlock(reqp);		/* unlock used pages */
3814 	mutex_enter(&aiop->aio_mutex);
3815 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
3816 		aio_req_free_port(aiop, reqp);	/* back to free list */
3817 		mutex_exit(&aiop->aio_mutex);
3818 		return (0);
3819 	}
3820 
3821 	iov = reqp->aio_req_uio.uio_iov;
3822 	bp = &reqp->aio_req_buf;
3823 	resultp = (void *)reqp->aio_req_resultp;
3824 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
3825 	mutex_exit(&aiop->aio_mutex);
3826 	if (flag == PORT_CALLBACK_DEFAULT)
3827 		aio_copyout_result_port(iov, bp, resultp);
3828 	return (0);
3829 }
3830