xref: /titanic_51/usr/src/uts/common/os/aio_subr.c (revision 2dae3fb5f236a83380b9deea54417c4e1f535121)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/proc.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/param.h>
34 #include <sys/sysmacros.h>
35 #include <sys/cmn_err.h>
36 #include <sys/systm.h>
37 #include <vm/as.h>
38 #include <vm/page.h>
39 #include <sys/uio.h>
40 #include <sys/kmem.h>
41 #include <sys/debug.h>
42 #include <sys/aio_impl.h>
43 #include <sys/epm.h>
44 #include <sys/fs/snode.h>
45 #include <sys/siginfo.h>
46 #include <sys/cpuvar.h>
47 #include <sys/tnf_probe.h>
48 #include <sys/conf.h>
49 #include <sys/sdt.h>
50 
51 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *);
52 void aio_done(struct buf *);
53 void aphysio_unlock(aio_req_t *);
54 void aio_cleanup(int);
55 void aio_cleanup_exit(void);
56 
57 /*
58  * private functions
59  */
60 static void aio_sigev_send(proc_t *, sigqueue_t *);
61 static void aio_hash_delete(aio_t *, aio_req_t *);
62 static void aio_lio_free(aio_t *, aio_lio_t *);
63 static void aio_enq(aio_req_t **, aio_req_t *, int);
64 static void aio_cleanup_cleanupq(aio_t *, aio_req_t *, int);
65 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int);
66 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int);
67 static void aio_enq_doneq(aio_t *aiop, aio_req_t *reqp);
68 static void aio_enq_portq(aio_t *, aio_req_t *, int);
69 static void aio_enq_port_cleanupq(aio_t *, aio_req_t *);
70 static void aio_cleanup_portq(aio_t *, aio_req_t *, int);
71 
72 /*
73  * async version of physio() that doesn't wait synchronously
74  * for the driver's strategy routine to complete.
75  */
76 
77 int
78 aphysio(
79 	int (*strategy)(struct buf *),
80 	int (*cancel)(struct buf *),
81 	dev_t dev,
82 	int rw,
83 	void (*mincnt)(struct buf *),
84 	struct aio_req *aio)
85 {
86 	struct uio *uio = aio->aio_uio;
87 	aio_req_t *reqp = (aio_req_t *)aio->aio_private;
88 	struct buf *bp = &reqp->aio_req_buf;
89 	struct iovec *iov;
90 	struct as *as;
91 	char *a;
92 	int	error;
93 	size_t	c;
94 	struct page **pplist;
95 	struct dev_ops *ops = devopsp[getmajor(dev)];
96 
97 	if (uio->uio_loffset < 0)
98 		return (EINVAL);
99 #ifdef	_ILP32
100 	/*
101 	 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
102 	 * the maximum size that can be supported by the IO subsystem.
103 	 * XXX this code assumes a D_64BIT driver.
104 	 */
105 	if (uio->uio_loffset > SPEC_MAXOFFSET_T)
106 		return (EINVAL);
107 #endif	/* _ILP32 */
108 
109 	TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */,
110 		tnf_opaque, bp, bp,
111 		tnf_device, device, dev,
112 		tnf_offset, blkno, btodt(uio->uio_loffset),
113 		tnf_size, size, uio->uio_iov->iov_len,
114 		tnf_bioflags, rw, rw);
115 
116 	if (rw == B_READ) {
117 		CPU_STATS_ADD_K(sys, phread, 1);
118 	} else {
119 		CPU_STATS_ADD_K(sys, phwrite, 1);
120 	}
121 
122 	iov = uio->uio_iov;
123 	sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
124 	sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
125 
126 	bp->b_error = 0;
127 	bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw;
128 	bp->b_edev = dev;
129 	bp->b_dev = cmpdev(dev);
130 	bp->b_lblkno = btodt(uio->uio_loffset);
131 	bp->b_offset = uio->uio_loffset;
132 	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
133 	    (void *)bp->b_edev, (void **)&bp->b_dip);
134 
135 	/*
136 	 * Clustering: Clustering can set the b_iodone, b_forw and
137 	 * b_proc fields to cluster-specifc values.
138 	 */
139 	if (bp->b_iodone == NULL) {
140 		bp->b_iodone = (int (*)()) aio_done;
141 		/* b_forw points at an aio_req_t structure */
142 		bp->b_forw = (struct buf *)reqp;
143 		bp->b_proc = curproc;
144 	}
145 
146 	a = bp->b_un.b_addr = iov->iov_base;
147 	c = bp->b_bcount = iov->iov_len;
148 
149 	(*mincnt)(bp);
150 	if (bp->b_bcount != iov->iov_len)
151 		return (ENOTSUP);
152 
153 	as = bp->b_proc->p_as;
154 
155 	error = as_pagelock(as, &pplist, a,
156 	    c, rw == B_READ? S_WRITE : S_READ);
157 	if (error != 0) {
158 		bp->b_flags |= B_ERROR;
159 		bp->b_error = error;
160 		bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
161 		return (error);
162 	}
163 	reqp->aio_req_flags |= AIO_PAGELOCKDONE;
164 	bp->b_shadow = pplist;
165 	if (pplist != NULL) {
166 		bp->b_flags |= B_SHADOW;
167 	}
168 
169 	if (cancel != anocancel)
170 		cmn_err(CE_PANIC,
171 		    "aphysio: cancellation not supported, use anocancel");
172 
173 	reqp->aio_req_cancel = cancel;
174 
175 	DTRACE_IO1(start, struct buf *, bp);
176 
177 	return ((*strategy)(bp));
178 }
179 
180 /*ARGSUSED*/
181 int
182 anocancel(struct buf *bp)
183 {
184 	return (ENXIO);
185 }
186 
187 /*
188  * Called from biodone().
189  * Notify process that a pending AIO has finished.
190  */
191 
192 /*
193  * Clustering: This function is made non-static as it is used
194  * by clustering s/w as contract private interface.
195  */
196 
197 void
198 aio_done(struct buf *bp)
199 {
200 	proc_t *p;
201 	struct as *as;
202 	aio_req_t *reqp;
203 	aio_lio_t *head;
204 	aio_t *aiop;
205 	sigqueue_t *sigev;
206 	sigqueue_t *lio_sigev = NULL;
207 	int fd;
208 	int cleanupqflag;
209 	int pollqflag;
210 	int portevpend;
211 	void (*func)();
212 
213 	p = bp->b_proc;
214 	reqp = (aio_req_t *)bp->b_forw;
215 	fd = reqp->aio_req_fd;
216 
217 	TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */,
218 		tnf_opaque, bp, bp,
219 		tnf_device, device, bp->b_edev,
220 		tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset),
221 		tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len,
222 		tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE)));
223 
224 	/*
225 	 * mapout earlier so that more kmem is available when aio is
226 	 * heavily used. bug #1262082
227 	 */
228 	if (bp->b_flags & B_REMAPPED)
229 		bp_mapout(bp);
230 
231 	/* decrement fd's ref count by one, now that aio request is done. */
232 	areleasef(fd, P_FINFO(p));
233 
234 	aiop = p->p_aio;
235 	ASSERT(aiop != NULL);
236 
237 	if (reqp->aio_req_portkev) {
238 		mutex_enter(&aiop->aio_portq_mutex);
239 		mutex_enter(&aiop->aio_mutex);
240 		aiop->aio_pending--;
241 		reqp->aio_req_flags &= ~AIO_PENDING;
242 		/* Event port notification is desired for this transaction */
243 		if (reqp->aio_req_flags & AIO_CLOSE_PORT) {
244 			/*
245 			 * The port is being closed and it is waiting for
246 			 * pending asynchronous I/O transactions to complete.
247 			 */
248 			portevpend = --aiop->aio_portpendcnt;
249 			aio_enq_portq(aiop, reqp, 1);
250 			mutex_exit(&aiop->aio_mutex);
251 			mutex_exit(&aiop->aio_portq_mutex);
252 			(void) port_send_event(reqp->aio_req_portkev);
253 			if (portevpend == 0)
254 				cv_broadcast(&aiop->aio_portcv);
255 			return;
256 		}
257 
258 		if (aiop->aio_flags & AIO_CLEANUP) {
259 			/*
260 			 * aio_cleanup_thread() is waiting for completion of
261 			 * transactions.
262 			 */
263 			as = p->p_as;
264 			mutex_enter(&as->a_contents);
265 			aio_enq_port_cleanupq(aiop, reqp);
266 			cv_signal(&aiop->aio_cleanupcv);
267 			mutex_exit(&as->a_contents);
268 			mutex_exit(&aiop->aio_mutex);
269 			mutex_exit(&aiop->aio_portq_mutex);
270 			return;
271 		}
272 
273 		aio_enq_portq(aiop, reqp, 1);
274 		mutex_exit(&aiop->aio_mutex);
275 		mutex_exit(&aiop->aio_portq_mutex);
276 		(void) port_send_event(reqp->aio_req_portkev);
277 		return;
278 	}
279 
280 	mutex_enter(&aiop->aio_mutex);
281 	ASSERT(aiop->aio_pending > 0);
282 	ASSERT(reqp->aio_req_flags & AIO_PENDING);
283 	aiop->aio_pending--;
284 	reqp->aio_req_flags &= ~AIO_PENDING;
285 
286 	reqp->aio_req_next = NULL;
287 	/*
288 	 * when the AIO_CLEANUP flag is enabled for this
289 	 * process, or when the AIO_POLL bit is set for
290 	 * this request, special handling is required.
291 	 * otherwise the request is put onto the doneq.
292 	 */
293 	cleanupqflag = (aiop->aio_flags & AIO_CLEANUP);
294 	pollqflag = (reqp->aio_req_flags & AIO_POLL);
295 	if (cleanupqflag | pollqflag) {
296 
297 		if (cleanupqflag) {
298 			as = p->p_as;
299 			mutex_enter(&as->a_contents);
300 		}
301 
302 		/*
303 		 * requests with their AIO_POLL bit set are put
304 		 * on the pollq, requests with sigevent structures
305 		 * or with listio heads are put on the notifyq, and
306 		 * the remaining requests don't require any special
307 		 * cleanup handling, so they're put onto the default
308 		 * cleanupq.
309 		 */
310 		if (pollqflag)
311 			aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ);
312 		else if (reqp->aio_req_sigqp || reqp->aio_req_lio)
313 			aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ);
314 		else
315 			aio_enq(&aiop->aio_cleanupq, reqp, AIO_CLEANUPQ);
316 
317 		if (cleanupqflag) {
318 			cv_signal(&aiop->aio_cleanupcv);
319 			mutex_exit(&as->a_contents);
320 			mutex_exit(&aiop->aio_mutex);
321 		} else {
322 			ASSERT(pollqflag);
323 			/* block aio_cleanup_exit until we're done */
324 			aiop->aio_flags |= AIO_DONE_ACTIVE;
325 			mutex_exit(&aiop->aio_mutex);
326 			/*
327 			 * let the cleanup processing happen from an
328 			 * AST. set an AST on all threads in this process
329 			 */
330 			mutex_enter(&p->p_lock);
331 			set_proc_ast(p);
332 			mutex_exit(&p->p_lock);
333 			mutex_enter(&aiop->aio_mutex);
334 			/* wakeup anybody waiting in aiowait() */
335 			cv_broadcast(&aiop->aio_waitcv);
336 
337 			/* wakeup aio_cleanup_exit if needed */
338 			if (aiop->aio_flags & AIO_CLEANUP)
339 				cv_signal(&aiop->aio_cleanupcv);
340 			aiop->aio_flags &= ~AIO_DONE_ACTIVE;
341 			mutex_exit(&aiop->aio_mutex);
342 		}
343 		return;
344 	}
345 
346 	/* put request on done queue. */
347 	aio_enq_doneq(aiop, reqp);
348 
349 	/*
350 	 * save req's sigevent pointer, and check its
351 	 * value after releasing aio_mutex lock.
352 	 */
353 	sigev = reqp->aio_req_sigqp;
354 	reqp->aio_req_sigqp = NULL;
355 
356 	/*
357 	 * when list IO notification is enabled, a signal
358 	 * is sent only when all entries in the list are
359 	 * done.
360 	 */
361 	if ((head = reqp->aio_req_lio) != NULL) {
362 		ASSERT(head->lio_refcnt > 0);
363 		if (--head->lio_refcnt == 0) {
364 			cv_signal(&head->lio_notify);
365 			/*
366 			 * save lio's sigevent pointer, and check
367 			 * its value after releasing aio_mutex
368 			 * lock.
369 			 */
370 			lio_sigev = head->lio_sigqp;
371 			head->lio_sigqp = NULL;
372 		}
373 		mutex_exit(&aiop->aio_mutex);
374 		if (sigev)
375 			aio_sigev_send(p, sigev);
376 		if (lio_sigev)
377 			aio_sigev_send(p, lio_sigev);
378 		return;
379 	}
380 
381 	/*
382 	 * if AIO_WAITN set then
383 	 * send signal only when we reached the
384 	 * required amount of IO's finished
385 	 * or when all IO's are done
386 	 */
387 	if (aiop->aio_flags & AIO_WAITN) {
388 		if (aiop->aio_waitncnt > 0)
389 			aiop->aio_waitncnt--;
390 		if (aiop->aio_pending == 0 ||
391 		    aiop->aio_waitncnt == 0)
392 			cv_broadcast(&aiop->aio_waitcv);
393 	} else {
394 		cv_broadcast(&aiop->aio_waitcv);
395 	}
396 
397 	mutex_exit(&aiop->aio_mutex);
398 	if (sigev)
399 		aio_sigev_send(p, sigev);
400 	else {
401 		/*
402 		 * send a SIGIO signal when the process
403 		 * has a handler enabled.
404 		 */
405 		if ((func = p->p_user.u_signal[SIGIO - 1]) !=
406 		    SIG_DFL && (func != SIG_IGN))
407 			psignal(p, SIGIO);
408 	}
409 }
410 
411 /*
412  * send a queued signal to the specified process when
413  * the event signal is non-NULL. A return value of 1
414  * will indicate that a signal is queued, and 0 means that
415  * no signal was specified, nor sent.
416  */
417 static void
418 aio_sigev_send(proc_t *p, sigqueue_t *sigev)
419 {
420 	ASSERT(sigev != NULL);
421 
422 	mutex_enter(&p->p_lock);
423 	sigaddqa(p, NULL, sigev);
424 	mutex_exit(&p->p_lock);
425 }
426 
427 /*
428  * special case handling for zero length requests. the aio request
429  * short circuits the normal completion path since all that's required
430  * to complete this request is to copyout a zero to the aio request's
431  * return value.
432  */
433 void
434 aio_zerolen(aio_req_t *reqp)
435 {
436 
437 	struct buf *bp = &reqp->aio_req_buf;
438 
439 	reqp->aio_req_flags |= AIO_ZEROLEN;
440 
441 	bp->b_forw = (struct buf *)reqp;
442 	bp->b_proc = curproc;
443 
444 	bp->b_resid = 0;
445 	bp->b_flags = 0;
446 
447 	aio_done(bp);
448 }
449 
450 /*
451  * unlock pages previously locked by as_pagelock
452  */
453 void
454 aphysio_unlock(aio_req_t *reqp)
455 {
456 	struct buf *bp;
457 	struct iovec *iov;
458 	int flags;
459 
460 	if (reqp->aio_req_flags & AIO_PHYSIODONE)
461 		return;
462 
463 	reqp->aio_req_flags |= AIO_PHYSIODONE;
464 
465 	if (reqp->aio_req_flags & AIO_ZEROLEN)
466 		return;
467 
468 	bp = &reqp->aio_req_buf;
469 	iov = reqp->aio_req_uio.uio_iov;
470 	flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ);
471 	if (reqp->aio_req_flags & AIO_PAGELOCKDONE) {
472 		as_pageunlock(bp->b_proc->p_as,
473 			bp->b_flags & B_SHADOW ? bp->b_shadow : NULL,
474 			iov->iov_base, iov->iov_len, flags);
475 		reqp->aio_req_flags &= ~AIO_PAGELOCKDONE;
476 	}
477 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
478 	bp->b_flags |= B_DONE;
479 }
480 
481 /*
482  * deletes a requests id from the hash table of outstanding
483  * io.
484  */
485 static void
486 aio_hash_delete(
487 	aio_t *aiop,
488 	struct aio_req_t *reqp)
489 {
490 	long index;
491 	aio_result_t *resultp = reqp->aio_req_resultp;
492 	aio_req_t *current;
493 	aio_req_t **nextp;
494 
495 	index = AIO_HASH(resultp);
496 	nextp = (aiop->aio_hash + index);
497 	while ((current = *nextp) != NULL) {
498 		if (current->aio_req_resultp == resultp) {
499 			*nextp = current->aio_hash_next;
500 			return;
501 		}
502 		nextp = &current->aio_hash_next;
503 	}
504 }
505 
506 /*
507  * Put a list head struct onto its free list.
508  */
509 static void
510 aio_lio_free(aio_t *aiop, aio_lio_t *head)
511 {
512 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
513 
514 	if (head->lio_sigqp != NULL)
515 		kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
516 	head->lio_next = aiop->aio_lio_free;
517 	aiop->aio_lio_free = head;
518 }
519 
520 /*
521  * Put a reqp onto the freelist.
522  */
523 void
524 aio_req_free(aio_t *aiop, aio_req_t *reqp)
525 {
526 	aio_lio_t *liop;
527 
528 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
529 
530 	if (reqp->aio_req_portkev) {
531 		port_free_event(reqp->aio_req_portkev);
532 		reqp->aio_req_portkev = NULL;
533 	}
534 
535 	if ((liop = reqp->aio_req_lio) != NULL) {
536 		if (--liop->lio_nent == 0)
537 			aio_lio_free(aiop, liop);
538 		reqp->aio_req_lio = NULL;
539 	}
540 	if (reqp->aio_req_sigqp != NULL)
541 		kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t));
542 	reqp->aio_req_next = aiop->aio_free;
543 	aiop->aio_free = reqp;
544 	aiop->aio_outstanding--;
545 	if (aiop->aio_outstanding == 0)
546 		cv_broadcast(&aiop->aio_waitcv);
547 	aio_hash_delete(aiop, reqp);
548 }
549 
550 /*
551  * Put a reqp onto the freelist.
552  */
553 void
554 aio_req_free_port(aio_t *aiop, aio_req_t *reqp)
555 {
556 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
557 
558 	reqp->aio_req_next = aiop->aio_free;
559 	aiop->aio_free = reqp;
560 	aiop->aio_outstanding--;
561 	aio_hash_delete(aiop, reqp);
562 }
563 
564 
565 /*
566  * Put a completed request onto its appropiate done queue.
567  */
568 /*ARGSUSED*/
569 static void
570 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new)
571 {
572 	if (*qhead == NULL) {
573 		*qhead = reqp;
574 		reqp->aio_req_next = reqp;
575 		reqp->aio_req_prev = reqp;
576 	} else {
577 		reqp->aio_req_next = *qhead;
578 		reqp->aio_req_prev = (*qhead)->aio_req_prev;
579 		reqp->aio_req_prev->aio_req_next = reqp;
580 		(*qhead)->aio_req_prev = reqp;
581 	}
582 
583 	reqp->aio_req_flags |= qflg_new;
584 }
585 
586 /*
587  * Put a completed request onto its appropiate done queue.
588  */
589 static void
590 aio_enq_doneq(aio_t *aiop, aio_req_t *reqp)
591 {
592 
593 	if (aiop->aio_doneq == NULL) {
594 		aiop->aio_doneq = reqp;
595 		reqp->aio_req_next = reqp;
596 		reqp->aio_req_prev = reqp;
597 	} else {
598 		reqp->aio_req_next = aiop->aio_doneq;
599 		reqp->aio_req_prev = aiop->aio_doneq->aio_req_prev;
600 		reqp->aio_req_prev->aio_req_next = reqp;
601 		aiop->aio_doneq->aio_req_prev = reqp;
602 	}
603 
604 	reqp->aio_req_flags |= AIO_DONEQ;
605 }
606 
607 #ifdef DEBUG
608 /* ARGSUSED */
609 void
610 aio_check_flag(aio_req_t *reqp, int check, int val, int flag)
611 {
612 	int	lval;
613 	if (reqp == NULL)
614 		return;
615 	lval = reqp->aio_req_flags & check;
616 	ASSERT(lval == val);
617 }
618 
619 void
620 aio_checkset_flag(aio_req_t *reqp, int checkdel, int set)
621 {
622 	aio_check_flag(reqp, checkdel, checkdel, 0);
623 	reqp->aio_req_flags &= ~checkdel;
624 	reqp->aio_req_flags |= set;
625 
626 	aio_check_flag(reqp->aio_req_next, set, set, 1);
627 	aio_check_flag(reqp->aio_req_prev, set, set, 2);
628 }
629 #endif	/* DEBUG */
630 
631 /*
632  * Put a pending request onto the pending port queue.
633  */
634 void
635 aio_enq_port_pending(aio_t *aiop, aio_req_t *reqp)
636 {
637 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
638 
639 	if (aiop->aio_portpending != NULL) {
640 		reqp->aio_req_next = aiop->aio_portpending;
641 		aiop->aio_portpending->aio_req_prev = reqp;
642 	} else {
643 		reqp->aio_req_next = NULL;
644 	}
645 	reqp->aio_req_prev = NULL;
646 	aiop->aio_portpending = reqp;
647 #ifdef DEBUG
648 	reqp->aio_req_flags |= AIO_REQ_PEND;
649 #endif
650 }
651 
652 /*
653  * Put a completed request onto the port queue.
654  */
655 static void
656 aio_enq_portq(aio_t *aiop, aio_req_t *reqp, int pending)
657 {
658 
659 	ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
660 	if (pending) {
661 #ifdef DEBUG
662 		aio_checkset_flag(reqp, AIO_REQ_PEND, AIO_REQ_PEND);
663 #endif
664 		/* first take request out of the pending queue ... */
665 		if (reqp->aio_req_prev == NULL)
666 			/* first request */
667 			aiop->aio_portpending = reqp->aio_req_next;
668 		else
669 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
670 		if (reqp->aio_req_next != NULL)
671 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
672 	}
673 
674 	/* ... and insert request into done queue */
675 	if (aiop->aio_portq != NULL) {
676 		reqp->aio_req_next = aiop->aio_portq;
677 		aiop->aio_portq->aio_req_prev = reqp;
678 	} else {
679 		reqp->aio_req_next = NULL;
680 	}
681 	reqp->aio_req_prev = NULL;
682 	aiop->aio_portq = reqp;
683 #ifdef DEBUG
684 	if (pending)
685 		aio_checkset_flag(reqp, AIO_REQ_PEND, AIO_REQ_PORTQ);
686 	else
687 		aio_checkset_flag(reqp, AIO_REQ_CLEAN, AIO_REQ_PORTQ);
688 #endif
689 }
690 
691 /*
692  * Put a completed request onto the port cleanup queue.
693  */
694 static void
695 aio_enq_port_cleanupq(aio_t *aiop, aio_req_t *reqp)
696 {
697 
698 #ifdef DEBUG
699 	aio_checkset_flag(reqp, AIO_REQ_PEND, AIO_REQ_PEND);
700 #endif
701 	/* first take request out of the pending queue ... */
702 	if (reqp->aio_req_prev == NULL)
703 		/* first request */
704 		aiop->aio_portpending = reqp->aio_req_next;
705 	else
706 		reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
707 
708 	if (reqp->aio_req_next != NULL)
709 		reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
710 
711 	/* ... and insert request into the cleanup queue */
712 	reqp->aio_req_next = aiop->aio_portcleanupq;
713 	aiop->aio_portcleanupq = reqp;
714 #ifdef DEBUG
715 	reqp->aio_req_prev = NULL;
716 	aio_checkset_flag(reqp, AIO_REQ_PEND, AIO_REQ_CLEAN);
717 #endif
718 }
719 
720 /*
721  * concatenate a specified queue with the cleanupq. the specified
722  * queue is put onto the tail of the cleanupq. all elements on the
723  * specified queue should have their aio_req_flags field cleared.
724  */
725 /*ARGSUSED*/
726 void
727 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg)
728 {
729 	aio_req_t *cleanupqhead, *q2tail;
730 
731 #ifdef DEBUG
732 	aio_req_t *reqp = q2;
733 
734 	do {
735 		ASSERT(reqp->aio_req_flags & qflg);
736 		reqp->aio_req_flags &= ~qflg;
737 		reqp->aio_req_flags |= AIO_CLEANUPQ;
738 	} while ((reqp = reqp->aio_req_next) != q2);
739 #endif
740 
741 	cleanupqhead = aiop->aio_cleanupq;
742 	if (cleanupqhead == NULL)
743 		aiop->aio_cleanupq = q2;
744 	else {
745 		cleanupqhead->aio_req_prev->aio_req_next = q2;
746 		q2tail = q2->aio_req_prev;
747 		q2tail->aio_req_next = cleanupqhead;
748 		q2->aio_req_prev = cleanupqhead->aio_req_prev;
749 		cleanupqhead->aio_req_prev = q2tail;
750 	}
751 }
752 
753 /*
754  * cleanup aio requests that are on the per-process poll queue.
755  */
756 void
757 aio_cleanup(int flag)
758 {
759 	aio_t *aiop = curproc->p_aio;
760 	aio_req_t *pollqhead, *cleanupqhead, *notifyqhead;
761 	aio_req_t *cleanupport;
762 	aio_req_t *portq = NULL;
763 	void (*func)();
764 	int signalled = 0;
765 	int qflag = 0;
766 	int exitflg;
767 
768 	ASSERT(aiop != NULL);
769 
770 	if (flag == AIO_CLEANUP_EXIT)
771 		exitflg = AIO_CLEANUP_EXIT;
772 	else
773 		exitflg = 0;
774 
775 	/*
776 	 * We need to get the aio_cleanupq_mutex because we are calling
777 	 * aio_cleanup_cleanupq()
778 	 */
779 	mutex_enter(&aiop->aio_cleanupq_mutex);
780 	/*
781 	 * take all the requests off the cleanupq, the notifyq,
782 	 * and the pollq.
783 	 */
784 	mutex_enter(&aiop->aio_mutex);
785 	if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
786 		aiop->aio_cleanupq = NULL;
787 		qflag++;
788 	}
789 	if ((notifyqhead = aiop->aio_notifyq) != NULL) {
790 		aiop->aio_notifyq = NULL;
791 		qflag++;
792 	}
793 	if ((pollqhead = aiop->aio_pollq) != NULL) {
794 		aiop->aio_pollq = NULL;
795 		qflag++;
796 	}
797 	if (flag) {
798 		if ((portq = aiop->aio_portq) != NULL)
799 			qflag++;
800 
801 		if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
802 			aiop->aio_portcleanupq = NULL;
803 			qflag++;
804 		}
805 	}
806 	mutex_exit(&aiop->aio_mutex);
807 
808 	/*
809 	 * return immediately if cleanupq, pollq, and
810 	 * notifyq are all empty. someone else must have
811 	 * emptied them.
812 	 */
813 	if (!qflag) {
814 		mutex_exit(&aiop->aio_cleanupq_mutex);
815 		return;
816 	}
817 
818 	/*
819 	 * do cleanup for the various queues.
820 	 */
821 	if (cleanupqhead)
822 		aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg);
823 	mutex_exit(&aiop->aio_cleanupq_mutex);
824 	if (notifyqhead)
825 		signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg);
826 	if (pollqhead)
827 		aio_cleanup_pollq(aiop, pollqhead, exitflg);
828 	if (flag && (cleanupport || portq))
829 		aio_cleanup_portq(aiop, cleanupport, exitflg);
830 
831 	if (exitflg)
832 		return;
833 
834 	/*
835 	 * If we have an active aio_cleanup_thread it's possible for
836 	 * this routine to push something on to the done queue after
837 	 * an aiowait/aiosuspend thread has already decided to block.
838 	 * This being the case, we need a cv_broadcast here to wake
839 	 * these threads up. It is simpler and cleaner to do this
840 	 * broadcast here than in the individual cleanup routines.
841 	 */
842 
843 	mutex_enter(&aiop->aio_mutex);
844 	cv_broadcast(&aiop->aio_waitcv);
845 	mutex_exit(&aiop->aio_mutex);
846 
847 	/*
848 	 * Only if the process wasn't already signalled,
849 	 * determine if a SIGIO signal should be delievered.
850 	 */
851 	if (!signalled &&
852 	    (func = curproc->p_user.u_signal[SIGIO - 1]) != SIG_DFL &&
853 	    func != SIG_IGN)
854 		psignal(curproc, SIGIO);
855 }
856 
857 
858 /*
859  * Do cleanup for every element of the port cleanup queue.
860  */
861 static void
862 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag)
863 {
864 	aio_req_t	*reqp;
865 	aio_req_t	*next;
866 	aio_req_t	*headp;
867 	aio_req_t	*tailp;
868 
869 	/* first check the portq */
870 	if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) {
871 		mutex_enter(&aiop->aio_mutex);
872 		if (aiop->aio_flags & AIO_CLEANUP)
873 			aiop->aio_flags |= AIO_CLEANUP_PORT;
874 		mutex_exit(&aiop->aio_mutex);
875 
876 		mutex_enter(&aiop->aio_portq_mutex);
877 		headp = aiop->aio_portq;
878 		aiop->aio_portq = NULL;
879 		mutex_exit(&aiop->aio_portq_mutex);
880 
881 		for (reqp = headp; reqp != NULL; reqp = next) {
882 			tailp = reqp;
883 			next = reqp->aio_req_next;
884 			/*
885 			 * It is not allowed to hold locks during
886 			 * aphysio_unlock(). The aio_done() interrupt function
887 			 * will try to acquire aio_mutex and aio_portq_mutex.
888 			 */
889 			aphysio_unlock(reqp);
890 			if (exitflag) {
891 				mutex_enter(&aiop->aio_mutex);
892 				aio_req_free(aiop, reqp);
893 				mutex_exit(&aiop->aio_mutex);
894 			}
895 		}
896 
897 		if (headp != NULL && exitflag == 0) {
898 			/* move unlocked requests back to the done queue */
899 			mutex_enter(&aiop->aio_portq_mutex);
900 			if (aiop->aio_portq != NULL) {
901 				tailp->aio_req_next = aiop->aio_portq;
902 				aiop->aio_portq->aio_req_prev = tailp;
903 			}
904 			aiop->aio_portq = headp;
905 			cv_broadcast(&aiop->aio_portcv);
906 			mutex_exit(&aiop->aio_portq_mutex);
907 		}
908 	}
909 
910 	/* now check the port cleanup queue */
911 	for (reqp = cleanupq; reqp != NULL; reqp = next) {
912 #ifdef DEBUG
913 		aio_checkset_flag(reqp, AIO_REQ_CLEAN, AIO_REQ_CLEAN);
914 #endif
915 		next = reqp->aio_req_next;
916 		aphysio_unlock(reqp);
917 		if (exitflag) {
918 #ifdef DEBUG
919 			aio_checkset_flag(reqp, AIO_REQ_CLEAN, AIO_REQ_FREE);
920 #endif
921 			mutex_enter(&aiop->aio_mutex);
922 			aio_req_free(aiop, reqp);
923 			mutex_exit(&aiop->aio_mutex);
924 		} else {
925 			mutex_enter(&aiop->aio_portq_mutex);
926 			aio_enq_portq(aiop, reqp, 0);
927 			mutex_exit(&aiop->aio_portq_mutex);
928 			(void) port_send_event(reqp->aio_req_portkev);
929 		}
930 	}
931 }
932 
933 /*
934  * Do cleanup for every element of the cleanupq.
935  */
936 static void
937 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg)
938 {
939 	aio_req_t *reqp, *next;
940 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
941 
942 	/*
943 	 * Since aio_req_done() or aio_req_find() use the HASH list to find
944 	 * the required requests, they could potentially take away elements
945 	 * if they are already done (AIO_DONEQ is set).
946 	 * The aio_cleanupq_mutex protects the queue for the duration of the
947 	 * loop from aio_req_done() and aio_req_find().
948 	 */
949 
950 	qhead->aio_req_prev->aio_req_next = NULL;
951 	for (reqp = qhead; reqp != NULL; reqp = next) {
952 		ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ);
953 		next = reqp->aio_req_next;
954 		aphysio_unlock(reqp);
955 		mutex_enter(&aiop->aio_mutex);
956 		if (exitflg) {
957 			/*
958 			 * reqp can't be referenced after its freed
959 			 */
960 			aio_req_free(aiop, reqp);
961 		} else {
962 			if (reqp->aio_req_portkev &&
963 			    ((reqp->aio_req_flags & AIO_DONEQ) == 0)) {
964 				aio_enq_doneq(aiop, reqp);
965 				(void) port_send_event(reqp->aio_req_portkev);
966 			} else {
967 				aio_enq_doneq(aiop, reqp);
968 			}
969 		}
970 		mutex_exit(&aiop->aio_mutex);
971 	}
972 }
973 
974 /*
975  * do cleanup for every element of the notify queue.
976  */
977 static int
978 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg)
979 {
980 	aio_req_t *reqp, *next;
981 	aio_lio_t *liohead;
982 	sigqueue_t *sigev, *lio_sigev = NULL;
983 	int signalled = 0;
984 
985 	qhead->aio_req_prev->aio_req_next = NULL;
986 	for (reqp = qhead; reqp != NULL; reqp = next) {
987 		ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ);
988 		next = reqp->aio_req_next;
989 		aphysio_unlock(reqp);
990 		if (exitflg) {
991 			/* reqp cann't be referenced after its freed */
992 			mutex_enter(&aiop->aio_mutex);
993 			aio_req_free(aiop, reqp);
994 			mutex_exit(&aiop->aio_mutex);
995 			continue;
996 		}
997 		mutex_enter(&aiop->aio_mutex);
998 		aio_enq_doneq(aiop, reqp);
999 		sigev = reqp->aio_req_sigqp;
1000 		reqp->aio_req_sigqp = NULL;
1001 		/* check if list IO completion notification is required */
1002 		if ((liohead = reqp->aio_req_lio) != NULL) {
1003 			ASSERT(liohead->lio_refcnt > 0);
1004 			if (--liohead->lio_refcnt == 0) {
1005 				cv_signal(&liohead->lio_notify);
1006 				lio_sigev = liohead->lio_sigqp;
1007 				liohead->lio_sigqp = NULL;
1008 			}
1009 		}
1010 		mutex_exit(&aiop->aio_mutex);
1011 		if (sigev) {
1012 			signalled++;
1013 			aio_sigev_send(reqp->aio_req_buf.b_proc, sigev);
1014 		}
1015 		if (lio_sigev) {
1016 			signalled++;
1017 			aio_sigev_send(reqp->aio_req_buf.b_proc, lio_sigev);
1018 		}
1019 	}
1020 	return (signalled);
1021 }
1022 
1023 /*
1024  * Do cleanup for every element of the poll queue.
1025  */
1026 static void
1027 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg)
1028 {
1029 	aio_req_t *reqp, *next;
1030 
1031 	/*
1032 	 * As no other threads should be accessing the queue at this point,
1033 	 * it isn't necessary to hold aio_mutex while we traverse its elements.
1034 	 */
1035 
1036 	qhead->aio_req_prev->aio_req_next = NULL;
1037 	for (reqp = qhead; reqp != NULL; reqp = next) {
1038 		ASSERT(reqp->aio_req_flags & AIO_POLLQ);
1039 		next = reqp->aio_req_next;
1040 		aphysio_unlock(reqp);
1041 		if (exitflg) {
1042 			/* reqp cann't be referenced after its freed */
1043 			mutex_enter(&aiop->aio_mutex);
1044 			aio_req_free(aiop, reqp);
1045 			mutex_exit(&aiop->aio_mutex);
1046 			continue;
1047 		}
1048 		/* copy out request's result_t. */
1049 		aio_copyout_result(reqp);
1050 		mutex_enter(&aiop->aio_mutex);
1051 		aio_enq_doneq(aiop, reqp);
1052 		mutex_exit(&aiop->aio_mutex);
1053 	}
1054 }
1055 
1056 /*
1057  * called by exit(). waits for all outstanding kaio to finish
1058  * before the kaio resources are freed.
1059  */
1060 void
1061 aio_cleanup_exit(void)
1062 {
1063 	proc_t *p = curproc;
1064 	aio_t *aiop = p->p_aio;
1065 	aio_req_t *reqp, *next, *head;
1066 	aio_lio_t *nxtlio, *liop;
1067 
1068 	/*
1069 	 * wait for all outstanding kaio to complete. process
1070 	 * is now single-threaded; no other kaio requests can
1071 	 * happen once aio_pending is zero.
1072 	 */
1073 	mutex_enter(&aiop->aio_mutex);
1074 	aiop->aio_flags |= AIO_CLEANUP;
1075 	while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE))
1076 		cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex);
1077 	mutex_exit(&aiop->aio_mutex);
1078 
1079 	/* cleanup the cleanup-thread queues. */
1080 	aio_cleanup(AIO_CLEANUP_EXIT);
1081 
1082 	/*
1083 	 * Although this process is now single-threaded, we
1084 	 * still need to protect ourselves against a race with
1085 	 * aio_cleanup_dr_delete_memory().
1086 	 */
1087 	mutex_enter(&p->p_lock);
1088 
1089 	/*
1090 	 * free up the done queue's resources.
1091 	 */
1092 	if ((head = aiop->aio_doneq) != NULL) {
1093 		head->aio_req_prev->aio_req_next = NULL;
1094 		for (reqp = head; reqp != NULL; reqp = next) {
1095 			next = reqp->aio_req_next;
1096 			aphysio_unlock(reqp);
1097 			kmem_free(reqp, sizeof (struct aio_req_t));
1098 		}
1099 	}
1100 	/*
1101 	 * release aio request freelist.
1102 	 */
1103 	for (reqp = aiop->aio_free; reqp != NULL; reqp = next) {
1104 		next = reqp->aio_req_next;
1105 		kmem_free(reqp, sizeof (struct aio_req_t));
1106 	}
1107 
1108 	/*
1109 	 * release io list head freelist.
1110 	 */
1111 	for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) {
1112 		nxtlio = liop->lio_next;
1113 		kmem_free(liop, sizeof (aio_lio_t));
1114 	}
1115 
1116 	if (aiop->aio_iocb)
1117 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
1118 
1119 	mutex_destroy(&aiop->aio_mutex);
1120 	mutex_destroy(&aiop->aio_portq_mutex);
1121 	mutex_destroy(&aiop->aio_cleanupq_mutex);
1122 	p->p_aio = NULL;
1123 	mutex_exit(&p->p_lock);
1124 	kmem_free(aiop, sizeof (struct aio));
1125 }
1126 
1127 /*
1128  * copy out aio request's result to a user-level result_t buffer.
1129  */
1130 void
1131 aio_copyout_result(aio_req_t *reqp)
1132 {
1133 	struct buf	*bp;
1134 	struct iovec	*iov;
1135 	void		*resultp;
1136 	int		error;
1137 	size_t		retval;
1138 
1139 	if (reqp->aio_req_flags & AIO_COPYOUTDONE)
1140 		return;
1141 
1142 	reqp->aio_req_flags |= AIO_COPYOUTDONE;
1143 
1144 	iov = reqp->aio_req_uio.uio_iov;
1145 	bp = &reqp->aio_req_buf;
1146 	/* "resultp" points to user-level result_t buffer */
1147 	resultp = (void *)reqp->aio_req_resultp;
1148 	if (bp->b_flags & B_ERROR) {
1149 		if (bp->b_error)
1150 			error = bp->b_error;
1151 		else
1152 			error = EIO;
1153 		retval = (size_t)-1;
1154 	} else {
1155 		error = 0;
1156 		retval = iov->iov_len - bp->b_resid;
1157 	}
1158 #ifdef	_SYSCALL32_IMPL
1159 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1160 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1161 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1162 	} else {
1163 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1164 		    (int)retval);
1165 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1166 	}
1167 #else
1168 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1169 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1170 #endif
1171 }
1172 
1173 
1174 void
1175 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp)
1176 {
1177 	int errno;
1178 	size_t retval;
1179 
1180 	if (bp->b_flags & B_ERROR) {
1181 		if (bp->b_error)
1182 			errno = bp->b_error;
1183 		else
1184 			errno = EIO;
1185 		retval = (size_t)-1;
1186 	} else {
1187 		errno = 0;
1188 		retval = iov->iov_len - bp->b_resid;
1189 	}
1190 #ifdef	_SYSCALL32_IMPL
1191 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1192 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1193 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1194 	} else {
1195 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1196 		    (int)retval);
1197 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno);
1198 	}
1199 #else
1200 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1201 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1202 #endif
1203 }
1204 
1205 /*
1206  * This function is used to remove a request from the done queue.
1207  */
1208 
1209 void
1210 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp)
1211 {
1212 	ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
1213 	while (aiop->aio_portq == NULL) {
1214 		/*
1215 		 * aio_portq is set to NULL when aio_cleanup_portq()
1216 		 * is working with the event queue.
1217 		 * The aio_cleanup_thread() uses aio_cleanup_portq()
1218 		 * to unlock all AIO buffers with completed transactions.
1219 		 * Wait here until aio_cleanup_portq() restores the
1220 		 * list of completed transactions in aio_portq.
1221 		 */
1222 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1223 	}
1224 	if (reqp == aiop->aio_portq) {
1225 		/* first request in the queue */
1226 		aiop->aio_portq = reqp->aio_req_next;
1227 	} else {
1228 		reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
1229 		if (reqp->aio_req_next)
1230 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
1231 	}
1232 }
1233 
1234 /* ARGSUSED */
1235 void
1236 aio_close_port(void *arg, int port, pid_t pid, int lastclose)
1237 {
1238 	aio_t		*aiop;
1239 	aio_req_t 	*reqp;
1240 	aio_req_t 	*next;
1241 	aio_req_t	*headp;
1242 	int		counter;
1243 
1244 	if (arg == NULL)
1245 		aiop = curproc->p_aio;
1246 	else
1247 		aiop = (aio_t *)arg;
1248 
1249 	/*
1250 	 * The PORT_SOURCE_AIO source is always associated with every new
1251 	 * created port by default.
1252 	 * If no asynchronous I/O transactions were associated with the port
1253 	 * then the aiop pointer will still be set to NULL.
1254 	 */
1255 	if (aiop == NULL)
1256 		return;
1257 
1258 	/*
1259 	 * Within a process event ports can be used to collect events other
1260 	 * than PORT_SOURCE_AIO events. At the same time the process can submit
1261 	 * asynchronous I/Os transactions which are not associated with the
1262 	 * current port.
1263 	 * The current process oriented model of AIO uses a sigle queue for
1264 	 * pending events. On close the pending queue (queue of asynchronous
1265 	 * I/O transactions using event port notification) must be scanned
1266 	 * to detect and handle pending I/Os using the current port.
1267 	 */
1268 	mutex_enter(&aiop->aio_portq_mutex);
1269 	mutex_enter(&aiop->aio_mutex);
1270 	reqp = aiop->aio_portpending;
1271 	for (counter = 0; reqp != NULL; reqp = reqp->aio_req_next) {
1272 		if (reqp->aio_req_portkev && (reqp->aio_req_port == port)) {
1273 			reqp->aio_req_flags |= AIO_CLOSE_PORT;
1274 			counter++;
1275 		}
1276 	}
1277 	if (counter == 0) {
1278 		/* no AIOs pending */
1279 		mutex_exit(&aiop->aio_mutex);
1280 		mutex_exit(&aiop->aio_portq_mutex);
1281 		return;
1282 	}
1283 	aiop->aio_portpendcnt += counter;
1284 	mutex_exit(&aiop->aio_mutex);
1285 	while (aiop->aio_portpendcnt)
1286 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1287 
1288 	/*
1289 	 * all pending AIOs are completed.
1290 	 * check port doneq
1291 	 */
1292 
1293 	reqp = aiop->aio_portq;
1294 	headp = NULL;
1295 	for (; reqp != NULL; reqp = next) {
1296 		next = reqp->aio_req_next;
1297 		if (reqp->aio_req_port == port) {
1298 			/* discard event */
1299 			aio_req_remove_portq(aiop, reqp);
1300 			port_free_event(reqp->aio_req_portkev);
1301 			/* put request in temporary queue */
1302 			reqp->aio_req_next = headp;
1303 			headp = reqp;
1304 		}
1305 	}
1306 	mutex_exit(&aiop->aio_portq_mutex);
1307 
1308 	/* headp points to the list of requests to be discarded */
1309 	for (reqp = headp; reqp != NULL; reqp = next) {
1310 		next = reqp->aio_req_next;
1311 		aphysio_unlock(reqp);
1312 		mutex_enter(&aiop->aio_mutex);
1313 		aio_req_free_port(aiop, reqp);
1314 		mutex_exit(&aiop->aio_mutex);
1315 	}
1316 
1317 	if (aiop->aio_flags & AIO_CLEANUP)
1318 		cv_broadcast(&aiop->aio_waitcv);
1319 }
1320 
1321 /*
1322  * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1323  * to force aio cleanup for a given process.  This is needed so that
1324  * delete_memory_thread can obtain writer locks on pages that need to
1325  * be relocated during a dr memory delete operation, otherwise a
1326  * deadly embrace may occur.
1327  * This implementation uses code from aio_cleanup_thread to move
1328  * entries from the doneq to the cleanupq; it also uses code from
1329  * aio_cleanup to cleanup the various queues and to signal the process's
1330  * aio_cleanup_thread.
1331  * Returns: non-zero if aio cleanup occurred, otherwise 0 is returned.
1332  */
1333 int
1334 aio_cleanup_dr_delete_memory(proc_t *procp)
1335 {
1336 	aio_req_t *cleanupqhead, *notifyqhead;
1337 	aio_req_t *cleanupport;
1338 	aio_req_t *portq;
1339 	int qflag;
1340 	void (*func)();
1341 	int signalled = 0;
1342 	struct aio *aiop = procp->p_aio;
1343 
1344 	ASSERT(MUTEX_HELD(&procp->p_lock));
1345 	ASSERT(aiop != NULL);
1346 	qflag = 0;
1347 	/*
1348 	 * we need to get aio_cleanupq_mutex.
1349 	 */
1350 	mutex_enter(&aiop->aio_cleanupq_mutex);
1351 	mutex_enter(&aiop->aio_mutex);
1352 	/*
1353 	 * do aio cleanup for this process, this code was shamelessly
1354 	 * stolen from aio_cleanup_thread and aio_cleanup
1355 	 */
1356 	if (aiop->aio_doneq) {
1357 		/* move doneq's aio_req_t's to cleanupq */
1358 		aio_req_t *doneqhead = aiop->aio_doneq;
1359 		aiop->aio_doneq = NULL;
1360 		aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
1361 	}
1362 	/*
1363 	 * take all the requests off the cleanupq, the notifyq,
1364 	 * and the event port queues (aio_portq and
1365 	 * aio_portcleanupq).  we cannot process the pollq from
1366 	 * a kernel thread that has an invalid secondary context,
1367 	 * as aio_copyout_result requires the secondary context
1368 	 * to be a valid user context.
1369 	 */
1370 	if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
1371 		aiop->aio_cleanupq = NULL;
1372 		qflag++;
1373 	}
1374 	if ((notifyqhead = aiop->aio_notifyq) != NULL) {
1375 		aiop->aio_notifyq = NULL;
1376 		qflag++;
1377 	}
1378 	if ((portq = aiop->aio_portq) != NULL)
1379 		qflag++;
1380 	if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
1381 		aiop->aio_portcleanupq = NULL;
1382 		qflag++;
1383 	}
1384 	mutex_exit(&aiop->aio_mutex);
1385 	/*
1386 	 * return immediately if cleanupq and
1387 	 * notifyq are all empty. someone else must have
1388 	 * emptied them.
1389 	 */
1390 	if (!qflag) {
1391 		mutex_exit(&aiop->aio_cleanupq_mutex);
1392 		return (0);
1393 	}
1394 
1395 	/*
1396 	 * do cleanup for the various queues.
1397 	 */
1398 	if (cleanupqhead)
1399 		aio_cleanup_cleanupq(aiop, cleanupqhead, 0);
1400 	mutex_exit(&aiop->aio_cleanupq_mutex);
1401 	if (notifyqhead)
1402 		signalled = aio_cleanup_notifyq(aiop, notifyqhead, 0);
1403 	if (cleanupport || portq)
1404 		aio_cleanup_portq(aiop, cleanupport, 0);
1405 	/*
1406 	 * If we have an active aio_cleanup_thread it's possible for
1407 	 * this routine to push something on to the done queue after
1408 	 * an aiowait/aiosuspend thread has already decided to block.
1409 	 * This being the case, we need a cv_broadcast here to wake
1410 	 * these threads up. It is simpler and cleaner to do this
1411 	 * broadcast here than in the individual cleanup routines.
1412 	 */
1413 	mutex_enter(&aiop->aio_mutex);
1414 	/* also re-enable aio requests */
1415 	cv_broadcast(&aiop->aio_waitcv);
1416 	mutex_exit(&aiop->aio_mutex);
1417 	/*
1418 	 * Only if the process wasn't already signalled,
1419 	 * determine if a SIGIO signal should be delievered.
1420 	 */
1421 	if (!signalled &&
1422 	    (func = procp->p_user.u_signal[SIGIO - 1]) != SIG_DFL &&
1423 	    func != SIG_IGN)
1424 		sigtoproc(procp, NULL, SIGIO);
1425 	return (qflag);
1426 }
1427