xref: /titanic_51/usr/src/uts/common/os/aio_subr.c (revision 705dd6c22fd6bceb21d995f1eb7c2774ff0e5317)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/proc.h>
29 #include <sys/file.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cmn_err.h>
34 #include <sys/systm.h>
35 #include <vm/as.h>
36 #include <vm/page.h>
37 #include <sys/uio.h>
38 #include <sys/kmem.h>
39 #include <sys/debug.h>
40 #include <sys/aio_impl.h>
41 #include <sys/epm.h>
42 #include <sys/fs/snode.h>
43 #include <sys/siginfo.h>
44 #include <sys/cpuvar.h>
45 #include <sys/tnf_probe.h>
46 #include <sys/conf.h>
47 #include <sys/sdt.h>
48 
49 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *);
50 void aio_done(struct buf *);
51 void aphysio_unlock(aio_req_t *);
52 void aio_cleanup(int);
53 void aio_cleanup_exit(void);
54 
55 /*
56  * private functions
57  */
58 static void aio_sigev_send(proc_t *, sigqueue_t *);
59 static void aio_hash_delete(aio_t *, aio_req_t *);
60 static void aio_lio_free(aio_t *, aio_lio_t *);
61 static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int);
62 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int);
63 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int);
64 static void aio_cleanup_portq(aio_t *, aio_req_t *, int);
65 
66 /*
67  * async version of physio() that doesn't wait synchronously
68  * for the driver's strategy routine to complete.
69  */
70 
71 int
72 aphysio(
73 	int (*strategy)(struct buf *),
74 	int (*cancel)(struct buf *),
75 	dev_t dev,
76 	int rw,
77 	void (*mincnt)(struct buf *),
78 	struct aio_req *aio)
79 {
80 	struct uio *uio = aio->aio_uio;
81 	aio_req_t *reqp = (aio_req_t *)aio->aio_private;
82 	struct buf *bp = &reqp->aio_req_buf;
83 	struct iovec *iov;
84 	struct as *as;
85 	char *a;
86 	int	error;
87 	size_t	c;
88 	struct page **pplist;
89 	struct dev_ops *ops = devopsp[getmajor(dev)];
90 
91 	if (uio->uio_loffset < 0)
92 		return (EINVAL);
93 #ifdef	_ILP32
94 	/*
95 	 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
96 	 * the maximum size that can be supported by the IO subsystem.
97 	 * XXX this code assumes a D_64BIT driver.
98 	 */
99 	if (uio->uio_loffset > SPEC_MAXOFFSET_T)
100 		return (EINVAL);
101 #endif	/* _ILP32 */
102 
103 	TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */,
104 	    tnf_opaque, bp, bp,
105 	    tnf_device, device, dev,
106 	    tnf_offset, blkno, btodt(uio->uio_loffset),
107 	    tnf_size, size, uio->uio_iov->iov_len,
108 	    tnf_bioflags, rw, rw);
109 
110 	if (rw == B_READ) {
111 		CPU_STATS_ADD_K(sys, phread, 1);
112 	} else {
113 		CPU_STATS_ADD_K(sys, phwrite, 1);
114 	}
115 
116 	iov = uio->uio_iov;
117 	sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
118 	sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
119 
120 	bp->b_error = 0;
121 	bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw;
122 	bp->b_edev = dev;
123 	bp->b_dev = cmpdev(dev);
124 	bp->b_lblkno = btodt(uio->uio_loffset);
125 	bp->b_offset = uio->uio_loffset;
126 	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
127 	    (void *)bp->b_edev, (void **)&bp->b_dip);
128 
129 	/*
130 	 * Clustering: Clustering can set the b_iodone, b_forw and
131 	 * b_proc fields to cluster-specifc values.
132 	 */
133 	if (bp->b_iodone == NULL) {
134 		bp->b_iodone = (int (*)()) aio_done;
135 		/* b_forw points at an aio_req_t structure */
136 		bp->b_forw = (struct buf *)reqp;
137 		bp->b_proc = curproc;
138 	}
139 
140 	a = bp->b_un.b_addr = iov->iov_base;
141 	c = bp->b_bcount = iov->iov_len;
142 
143 	(*mincnt)(bp);
144 	if (bp->b_bcount != iov->iov_len)
145 		return (ENOTSUP);
146 
147 	as = bp->b_proc->p_as;
148 
149 	error = as_pagelock(as, &pplist, a,
150 	    c, rw == B_READ? S_WRITE : S_READ);
151 	if (error != 0) {
152 		bp->b_flags |= B_ERROR;
153 		bp->b_error = error;
154 		bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
155 		return (error);
156 	}
157 	reqp->aio_req_flags |= AIO_PAGELOCKDONE;
158 	bp->b_shadow = pplist;
159 	if (pplist != NULL) {
160 		bp->b_flags |= B_SHADOW;
161 	}
162 
163 	if (cancel != anocancel)
164 		cmn_err(CE_PANIC,
165 		    "aphysio: cancellation not supported, use anocancel");
166 
167 	reqp->aio_req_cancel = cancel;
168 
169 	DTRACE_IO1(start, struct buf *, bp);
170 
171 	return ((*strategy)(bp));
172 }
173 
174 /*ARGSUSED*/
175 int
176 anocancel(struct buf *bp)
177 {
178 	return (ENXIO);
179 }
180 
181 /*
182  * Called from biodone().
183  * Notify process that a pending AIO has finished.
184  */
185 
186 /*
187  * Clustering: This function is made non-static as it is used
188  * by clustering s/w as contract private interface.
189  */
190 
191 void
192 aio_done(struct buf *bp)
193 {
194 	proc_t *p;
195 	struct as *as;
196 	aio_req_t *reqp;
197 	aio_lio_t *head = NULL;
198 	aio_t *aiop;
199 	sigqueue_t *sigev = NULL;
200 	sigqueue_t *lio_sigev = NULL;
201 	port_kevent_t *pkevp = NULL;
202 	port_kevent_t *lio_pkevp = NULL;
203 	int fd;
204 	int cleanupqflag;
205 	int pollqflag;
206 	int portevpend;
207 	void (*func)();
208 	int use_port = 0;
209 	int reqp_flags = 0;
210 	int send_signal = 0;
211 
212 	p = bp->b_proc;
213 	as = p->p_as;
214 	reqp = (aio_req_t *)bp->b_forw;
215 	fd = reqp->aio_req_fd;
216 
217 	TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */,
218 	    tnf_opaque, bp, bp,
219 	    tnf_device, device, bp->b_edev,
220 	    tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset),
221 	    tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len,
222 	    tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE)));
223 
224 	/*
225 	 * mapout earlier so that more kmem is available when aio is
226 	 * heavily used. bug #1262082
227 	 */
228 	if (bp->b_flags & B_REMAPPED)
229 		bp_mapout(bp);
230 
231 	/* decrement fd's ref count by one, now that aio request is done. */
232 	areleasef(fd, P_FINFO(p));
233 
234 	aiop = p->p_aio;
235 	ASSERT(aiop != NULL);
236 
237 	mutex_enter(&aiop->aio_portq_mutex);
238 	mutex_enter(&aiop->aio_mutex);
239 	ASSERT(aiop->aio_pending > 0);
240 	ASSERT(reqp->aio_req_flags & AIO_PENDING);
241 	aiop->aio_pending--;
242 	reqp->aio_req_flags &= ~AIO_PENDING;
243 	reqp_flags = reqp->aio_req_flags;
244 	if ((pkevp = reqp->aio_req_portkev) != NULL) {
245 		/* Event port notification is desired for this transaction */
246 		if (reqp->aio_req_flags & AIO_CLOSE_PORT) {
247 			/*
248 			 * The port is being closed and it is waiting for
249 			 * pending asynchronous I/O transactions to complete.
250 			 */
251 			portevpend = --aiop->aio_portpendcnt;
252 			aio_deq(&aiop->aio_portpending, reqp);
253 			aio_enq(&aiop->aio_portq, reqp, 0);
254 			mutex_exit(&aiop->aio_mutex);
255 			mutex_exit(&aiop->aio_portq_mutex);
256 			port_send_event(pkevp);
257 			if (portevpend == 0)
258 				cv_broadcast(&aiop->aio_portcv);
259 			return;
260 		}
261 
262 		if (aiop->aio_flags & AIO_CLEANUP) {
263 			/*
264 			 * aio_cleanup_thread() is waiting for completion of
265 			 * transactions.
266 			 */
267 			mutex_enter(&as->a_contents);
268 			aio_deq(&aiop->aio_portpending, reqp);
269 			aio_enq(&aiop->aio_portcleanupq, reqp, 0);
270 			cv_signal(&aiop->aio_cleanupcv);
271 			mutex_exit(&as->a_contents);
272 			mutex_exit(&aiop->aio_mutex);
273 			mutex_exit(&aiop->aio_portq_mutex);
274 			return;
275 		}
276 
277 		aio_deq(&aiop->aio_portpending, reqp);
278 		aio_enq(&aiop->aio_portq, reqp, 0);
279 
280 		use_port = 1;
281 	} else {
282 		/*
283 		 * when the AIO_CLEANUP flag is enabled for this
284 		 * process, or when the AIO_POLL bit is set for
285 		 * this request, special handling is required.
286 		 * otherwise the request is put onto the doneq.
287 		 */
288 		cleanupqflag = (aiop->aio_flags & AIO_CLEANUP);
289 		pollqflag = (reqp->aio_req_flags & AIO_POLL);
290 		if (cleanupqflag | pollqflag) {
291 
292 			if (cleanupqflag)
293 				mutex_enter(&as->a_contents);
294 
295 			/*
296 			 * requests with their AIO_POLL bit set are put
297 			 * on the pollq, requests with sigevent structures
298 			 * or with listio heads are put on the notifyq, and
299 			 * the remaining requests don't require any special
300 			 * cleanup handling, so they're put onto the default
301 			 * cleanupq.
302 			 */
303 			if (pollqflag)
304 				aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ);
305 			else if (reqp->aio_req_sigqp || reqp->aio_req_lio)
306 				aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ);
307 			else
308 				aio_enq(&aiop->aio_cleanupq, reqp,
309 				    AIO_CLEANUPQ);
310 
311 			if (cleanupqflag) {
312 				cv_signal(&aiop->aio_cleanupcv);
313 				mutex_exit(&as->a_contents);
314 				mutex_exit(&aiop->aio_mutex);
315 				mutex_exit(&aiop->aio_portq_mutex);
316 			} else {
317 				ASSERT(pollqflag);
318 				/* block aio_cleanup_exit until we're done */
319 				aiop->aio_flags |= AIO_DONE_ACTIVE;
320 				mutex_exit(&aiop->aio_mutex);
321 				mutex_exit(&aiop->aio_portq_mutex);
322 				/*
323 				 * let the cleanup processing happen from an AST
324 				 * set an AST on all threads in this process
325 				 */
326 				mutex_enter(&p->p_lock);
327 				set_proc_ast(p);
328 				mutex_exit(&p->p_lock);
329 				mutex_enter(&aiop->aio_mutex);
330 				/* wakeup anybody waiting in aiowait() */
331 				cv_broadcast(&aiop->aio_waitcv);
332 
333 				/* wakeup aio_cleanup_exit if needed */
334 				if (aiop->aio_flags & AIO_CLEANUP)
335 					cv_signal(&aiop->aio_cleanupcv);
336 				aiop->aio_flags &= ~AIO_DONE_ACTIVE;
337 				mutex_exit(&aiop->aio_mutex);
338 			}
339 			return;
340 		}
341 
342 		/*
343 		 * save req's sigevent pointer, and check its
344 		 * value after releasing aio_mutex lock.
345 		 */
346 		sigev = reqp->aio_req_sigqp;
347 		reqp->aio_req_sigqp = NULL;
348 
349 		/* put request on done queue. */
350 		aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
351 	} /* portkevent */
352 
353 	/*
354 	 * when list IO notification is enabled, a notification or
355 	 * signal is sent only when all entries in the list are done.
356 	 */
357 	if ((head = reqp->aio_req_lio) != NULL) {
358 		ASSERT(head->lio_refcnt > 0);
359 		if (--head->lio_refcnt == 0) {
360 			/*
361 			 * save lio's sigevent pointer, and check
362 			 * its value after releasing aio_mutex lock.
363 			 */
364 			lio_sigev = head->lio_sigqp;
365 			head->lio_sigqp = NULL;
366 			cv_signal(&head->lio_notify);
367 			if (head->lio_port >= 0 &&
368 			    (lio_pkevp = head->lio_portkev) != NULL)
369 				head->lio_port = -1;
370 		}
371 	}
372 
373 	/*
374 	 * if AIO_WAITN set then
375 	 * send signal only when we reached the
376 	 * required amount of IO's finished
377 	 * or when all IO's are done
378 	 */
379 	if (aiop->aio_flags & AIO_WAITN) {
380 		if (aiop->aio_waitncnt > 0)
381 			aiop->aio_waitncnt--;
382 		if (aiop->aio_pending == 0 ||
383 		    aiop->aio_waitncnt == 0)
384 			cv_broadcast(&aiop->aio_waitcv);
385 	} else {
386 		cv_broadcast(&aiop->aio_waitcv);
387 	}
388 
389 	/*
390 	 * No need to set this flag for pollq, portq, lio requests.
391 	 * If this is an old Solaris aio request, and the process has
392 	 * a SIGIO signal handler enabled, then send a SIGIO signal.
393 	 */
394 	if (!sigev && !use_port && head == NULL &&
395 	    (reqp->aio_req_flags & AIO_SOLARIS) &&
396 	    (func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL &&
397 	    (func != SIG_IGN)) {
398 		send_signal = 1;
399 		reqp->aio_req_flags |= AIO_SIGNALLED;
400 	}
401 
402 	mutex_exit(&aiop->aio_mutex);
403 	mutex_exit(&aiop->aio_portq_mutex);
404 
405 	/*
406 	 * Could the cleanup thread be waiting for AIO with locked
407 	 * resources to finish?
408 	 * Ideally in that case cleanup thread should block on cleanupcv,
409 	 * but there is a window, where it could miss to see a new aio
410 	 * request that sneaked in.
411 	 */
412 	mutex_enter(&as->a_contents);
413 	if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as))
414 		cv_broadcast(&as->a_cv);
415 	mutex_exit(&as->a_contents);
416 
417 	if (sigev)
418 		aio_sigev_send(p, sigev);
419 	else if (send_signal)
420 		psignal(p, SIGIO);
421 
422 	if (pkevp)
423 		port_send_event(pkevp);
424 	if (lio_sigev)
425 		aio_sigev_send(p, lio_sigev);
426 	if (lio_pkevp)
427 		port_send_event(lio_pkevp);
428 }
429 
430 /*
431  * send a queued signal to the specified process when
432  * the event signal is non-NULL. A return value of 1
433  * will indicate that a signal is queued, and 0 means that
434  * no signal was specified, nor sent.
435  */
436 static void
437 aio_sigev_send(proc_t *p, sigqueue_t *sigev)
438 {
439 	ASSERT(sigev != NULL);
440 
441 	mutex_enter(&p->p_lock);
442 	sigaddqa(p, NULL, sigev);
443 	mutex_exit(&p->p_lock);
444 }
445 
446 /*
447  * special case handling for zero length requests. the aio request
448  * short circuits the normal completion path since all that's required
449  * to complete this request is to copyout a zero to the aio request's
450  * return value.
451  */
452 void
453 aio_zerolen(aio_req_t *reqp)
454 {
455 
456 	struct buf *bp = &reqp->aio_req_buf;
457 
458 	reqp->aio_req_flags |= AIO_ZEROLEN;
459 
460 	bp->b_forw = (struct buf *)reqp;
461 	bp->b_proc = curproc;
462 
463 	bp->b_resid = 0;
464 	bp->b_flags = 0;
465 
466 	aio_done(bp);
467 }
468 
469 /*
470  * unlock pages previously locked by as_pagelock
471  */
472 void
473 aphysio_unlock(aio_req_t *reqp)
474 {
475 	struct buf *bp;
476 	struct iovec *iov;
477 	int flags;
478 
479 	if (reqp->aio_req_flags & AIO_PHYSIODONE)
480 		return;
481 
482 	reqp->aio_req_flags |= AIO_PHYSIODONE;
483 
484 	if (reqp->aio_req_flags & AIO_ZEROLEN)
485 		return;
486 
487 	bp = &reqp->aio_req_buf;
488 	iov = reqp->aio_req_uio.uio_iov;
489 	flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ);
490 	if (reqp->aio_req_flags & AIO_PAGELOCKDONE) {
491 		as_pageunlock(bp->b_proc->p_as,
492 		    bp->b_flags & B_SHADOW ? bp->b_shadow : NULL,
493 		    iov->iov_base, iov->iov_len, flags);
494 		reqp->aio_req_flags &= ~AIO_PAGELOCKDONE;
495 	}
496 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
497 	bp->b_flags |= B_DONE;
498 }
499 
500 /*
501  * deletes a requests id from the hash table of outstanding io.
502  */
503 static void
504 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp)
505 {
506 	long index;
507 	aio_result_t *resultp = reqp->aio_req_resultp;
508 	aio_req_t *current;
509 	aio_req_t **nextp;
510 
511 	index = AIO_HASH(resultp);
512 	nextp = (aiop->aio_hash + index);
513 	while ((current = *nextp) != NULL) {
514 		if (current->aio_req_resultp == resultp) {
515 			*nextp = current->aio_hash_next;
516 			return;
517 		}
518 		nextp = &current->aio_hash_next;
519 	}
520 }
521 
522 /*
523  * Put a list head struct onto its free list.
524  */
525 static void
526 aio_lio_free(aio_t *aiop, aio_lio_t *head)
527 {
528 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
529 
530 	if (head->lio_sigqp != NULL)
531 		kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
532 	head->lio_next = aiop->aio_lio_free;
533 	aiop->aio_lio_free = head;
534 }
535 
536 /*
537  * Put a reqp onto the freelist.
538  */
539 void
540 aio_req_free(aio_t *aiop, aio_req_t *reqp)
541 {
542 	aio_lio_t *liop;
543 
544 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
545 
546 	if (reqp->aio_req_portkev) {
547 		port_free_event(reqp->aio_req_portkev);
548 		reqp->aio_req_portkev = NULL;
549 	}
550 
551 	if ((liop = reqp->aio_req_lio) != NULL) {
552 		if (--liop->lio_nent == 0)
553 			aio_lio_free(aiop, liop);
554 		reqp->aio_req_lio = NULL;
555 	}
556 	if (reqp->aio_req_sigqp != NULL) {
557 		kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t));
558 		reqp->aio_req_sigqp = NULL;
559 	}
560 	reqp->aio_req_next = aiop->aio_free;
561 	reqp->aio_req_prev = NULL;
562 	aiop->aio_free = reqp;
563 	aiop->aio_outstanding--;
564 	if (aiop->aio_outstanding == 0)
565 		cv_broadcast(&aiop->aio_waitcv);
566 	aio_hash_delete(aiop, reqp);
567 }
568 
569 /*
570  * Put a reqp onto the freelist.
571  */
572 void
573 aio_req_free_port(aio_t *aiop, aio_req_t *reqp)
574 {
575 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
576 
577 	reqp->aio_req_next = aiop->aio_free;
578 	reqp->aio_req_prev = NULL;
579 	aiop->aio_free = reqp;
580 	aiop->aio_outstanding--;
581 	aio_hash_delete(aiop, reqp);
582 }
583 
584 
585 /*
586  * Verify the integrity of a queue.
587  */
588 #if defined(DEBUG)
589 static void
590 aio_verify_queue(aio_req_t *head,
591 	aio_req_t *entry_present, aio_req_t *entry_missing)
592 {
593 	aio_req_t *reqp;
594 	int found = 0;
595 	int present = 0;
596 
597 	if ((reqp = head) != NULL) {
598 		do {
599 			ASSERT(reqp->aio_req_prev->aio_req_next == reqp);
600 			ASSERT(reqp->aio_req_next->aio_req_prev == reqp);
601 			if (entry_present == reqp)
602 				found++;
603 			if (entry_missing == reqp)
604 				present++;
605 		} while ((reqp = reqp->aio_req_next) != head);
606 	}
607 	ASSERT(entry_present == NULL || found == 1);
608 	ASSERT(entry_missing == NULL || present == 0);
609 }
610 #else
611 #define	aio_verify_queue(x, y, z)
612 #endif
613 
614 /*
615  * Put a request onto the tail of a queue.
616  */
617 void
618 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new)
619 {
620 	aio_req_t *head;
621 	aio_req_t *prev;
622 
623 	aio_verify_queue(*qhead, NULL, reqp);
624 
625 	if ((head = *qhead) == NULL) {
626 		reqp->aio_req_next = reqp;
627 		reqp->aio_req_prev = reqp;
628 		*qhead = reqp;
629 	} else {
630 		reqp->aio_req_next = head;
631 		reqp->aio_req_prev = prev = head->aio_req_prev;
632 		prev->aio_req_next = reqp;
633 		head->aio_req_prev = reqp;
634 	}
635 	reqp->aio_req_flags |= qflg_new;
636 }
637 
638 /*
639  * Remove a request from its queue.
640  */
641 void
642 aio_deq(aio_req_t **qhead, aio_req_t *reqp)
643 {
644 	aio_verify_queue(*qhead, reqp, NULL);
645 
646 	if (reqp->aio_req_next == reqp) {
647 		*qhead = NULL;
648 	} else {
649 		reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
650 		reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
651 		if (*qhead == reqp)
652 			*qhead = reqp->aio_req_next;
653 	}
654 	reqp->aio_req_next = NULL;
655 	reqp->aio_req_prev = NULL;
656 }
657 
658 /*
659  * concatenate a specified queue with the cleanupq. the specified
660  * queue is put onto the tail of the cleanupq. all elements on the
661  * specified queue should have their aio_req_flags field cleared.
662  */
663 /*ARGSUSED*/
664 void
665 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg)
666 {
667 	aio_req_t *cleanupqhead, *q2tail;
668 	aio_req_t *reqp = q2;
669 
670 	do {
671 		ASSERT(reqp->aio_req_flags & qflg);
672 		reqp->aio_req_flags &= ~qflg;
673 		reqp->aio_req_flags |= AIO_CLEANUPQ;
674 	} while ((reqp = reqp->aio_req_next) != q2);
675 
676 	cleanupqhead = aiop->aio_cleanupq;
677 	if (cleanupqhead == NULL)
678 		aiop->aio_cleanupq = q2;
679 	else {
680 		cleanupqhead->aio_req_prev->aio_req_next = q2;
681 		q2tail = q2->aio_req_prev;
682 		q2tail->aio_req_next = cleanupqhead;
683 		q2->aio_req_prev = cleanupqhead->aio_req_prev;
684 		cleanupqhead->aio_req_prev = q2tail;
685 	}
686 }
687 
688 /*
689  * cleanup aio requests that are on the per-process poll queue.
690  */
691 void
692 aio_cleanup(int flag)
693 {
694 	aio_t *aiop = curproc->p_aio;
695 	aio_req_t *pollqhead, *cleanupqhead, *notifyqhead;
696 	aio_req_t *cleanupport;
697 	aio_req_t *portq = NULL;
698 	void (*func)();
699 	int signalled = 0;
700 	int qflag = 0;
701 	int exitflg;
702 
703 	ASSERT(aiop != NULL);
704 
705 	if (flag == AIO_CLEANUP_EXIT)
706 		exitflg = AIO_CLEANUP_EXIT;
707 	else
708 		exitflg = 0;
709 
710 	/*
711 	 * We need to get the aio_cleanupq_mutex because we are calling
712 	 * aio_cleanup_cleanupq()
713 	 */
714 	mutex_enter(&aiop->aio_cleanupq_mutex);
715 	/*
716 	 * take all the requests off the cleanupq, the notifyq,
717 	 * and the pollq.
718 	 */
719 	mutex_enter(&aiop->aio_mutex);
720 	if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
721 		aiop->aio_cleanupq = NULL;
722 		qflag++;
723 	}
724 	if ((notifyqhead = aiop->aio_notifyq) != NULL) {
725 		aiop->aio_notifyq = NULL;
726 		qflag++;
727 	}
728 	if ((pollqhead = aiop->aio_pollq) != NULL) {
729 		aiop->aio_pollq = NULL;
730 		qflag++;
731 	}
732 	if (flag) {
733 		if ((portq = aiop->aio_portq) != NULL)
734 			qflag++;
735 
736 		if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
737 			aiop->aio_portcleanupq = NULL;
738 			qflag++;
739 		}
740 	}
741 	mutex_exit(&aiop->aio_mutex);
742 
743 	/*
744 	 * return immediately if cleanupq, pollq, and
745 	 * notifyq are all empty. someone else must have
746 	 * emptied them.
747 	 */
748 	if (!qflag) {
749 		mutex_exit(&aiop->aio_cleanupq_mutex);
750 		return;
751 	}
752 
753 	/*
754 	 * do cleanup for the various queues.
755 	 */
756 	if (cleanupqhead)
757 		signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg);
758 	mutex_exit(&aiop->aio_cleanupq_mutex);
759 	if (notifyqhead)
760 		signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg);
761 	if (pollqhead)
762 		aio_cleanup_pollq(aiop, pollqhead, exitflg);
763 	if (flag && (cleanupport || portq))
764 		aio_cleanup_portq(aiop, cleanupport, exitflg);
765 
766 	if (exitflg)
767 		return;
768 
769 	/*
770 	 * If we have an active aio_cleanup_thread it's possible for
771 	 * this routine to push something on to the done queue after
772 	 * an aiowait/aiosuspend thread has already decided to block.
773 	 * This being the case, we need a cv_broadcast here to wake
774 	 * these threads up. It is simpler and cleaner to do this
775 	 * broadcast here than in the individual cleanup routines.
776 	 */
777 
778 	mutex_enter(&aiop->aio_mutex);
779 	/*
780 	 * If there has never been an old solaris aio request
781 	 * issued by this process, then do not send a SIGIO signal.
782 	 */
783 	if (!(aiop->aio_flags & AIO_SOLARIS_REQ))
784 		signalled = 1;
785 	cv_broadcast(&aiop->aio_waitcv);
786 	mutex_exit(&aiop->aio_mutex);
787 
788 	/*
789 	 * Only if the process wasn't already signalled,
790 	 * determine if a SIGIO signal should be delievered.
791 	 */
792 	if (!signalled &&
793 	    (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL &&
794 	    func != SIG_IGN)
795 		psignal(curproc, SIGIO);
796 }
797 
798 
799 /*
800  * Do cleanup for every element of the port cleanup queue.
801  */
802 static void
803 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag)
804 {
805 	aio_req_t	*reqp;
806 	aio_req_t	*next;
807 	aio_req_t	*headp;
808 	aio_lio_t	*liop;
809 
810 	/* first check the portq */
811 	if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) {
812 		mutex_enter(&aiop->aio_mutex);
813 		if (aiop->aio_flags & AIO_CLEANUP)
814 			aiop->aio_flags |= AIO_CLEANUP_PORT;
815 		mutex_exit(&aiop->aio_mutex);
816 
817 		/*
818 		 * It is not allowed to hold locks during aphysio_unlock().
819 		 * The aio_done() interrupt function will try to acquire
820 		 * aio_mutex and aio_portq_mutex.  Therefore we disconnect
821 		 * the portq list from the aiop for the duration of the
822 		 * aphysio_unlock() loop below.
823 		 */
824 		mutex_enter(&aiop->aio_portq_mutex);
825 		headp = aiop->aio_portq;
826 		aiop->aio_portq = NULL;
827 		mutex_exit(&aiop->aio_portq_mutex);
828 		if ((reqp = headp) != NULL) {
829 			do {
830 				next = reqp->aio_req_next;
831 				aphysio_unlock(reqp);
832 				if (exitflag) {
833 					mutex_enter(&aiop->aio_mutex);
834 					aio_req_free(aiop, reqp);
835 					mutex_exit(&aiop->aio_mutex);
836 				}
837 			} while ((reqp = next) != headp);
838 		}
839 
840 		if (headp != NULL && exitflag == 0) {
841 			/* move unlocked requests back to the port queue */
842 			aio_req_t *newq;
843 
844 			mutex_enter(&aiop->aio_portq_mutex);
845 			if ((newq = aiop->aio_portq) != NULL) {
846 				aio_req_t *headprev = headp->aio_req_prev;
847 				aio_req_t *newqprev = newq->aio_req_prev;
848 
849 				headp->aio_req_prev = newqprev;
850 				newq->aio_req_prev = headprev;
851 				headprev->aio_req_next = newq;
852 				newqprev->aio_req_next = headp;
853 			}
854 			aiop->aio_portq = headp;
855 			cv_broadcast(&aiop->aio_portcv);
856 			mutex_exit(&aiop->aio_portq_mutex);
857 		}
858 	}
859 
860 	/* now check the port cleanup queue */
861 	if ((reqp = cleanupq) == NULL)
862 		return;
863 	do {
864 		next = reqp->aio_req_next;
865 		aphysio_unlock(reqp);
866 		if (exitflag) {
867 			mutex_enter(&aiop->aio_mutex);
868 			aio_req_free(aiop, reqp);
869 			mutex_exit(&aiop->aio_mutex);
870 		} else {
871 			mutex_enter(&aiop->aio_portq_mutex);
872 			aio_enq(&aiop->aio_portq, reqp, 0);
873 			mutex_exit(&aiop->aio_portq_mutex);
874 			port_send_event(reqp->aio_req_portkev);
875 			if ((liop = reqp->aio_req_lio) != NULL) {
876 				int send_event = 0;
877 
878 				mutex_enter(&aiop->aio_mutex);
879 				ASSERT(liop->lio_refcnt > 0);
880 				if (--liop->lio_refcnt == 0) {
881 					if (liop->lio_port >= 0 &&
882 					    liop->lio_portkev) {
883 						liop->lio_port = -1;
884 						send_event = 1;
885 					}
886 				}
887 				mutex_exit(&aiop->aio_mutex);
888 				if (send_event)
889 					port_send_event(liop->lio_portkev);
890 			}
891 		}
892 	} while ((reqp = next) != cleanupq);
893 }
894 
895 /*
896  * Do cleanup for every element of the cleanupq.
897  */
898 static int
899 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg)
900 {
901 	aio_req_t *reqp, *next;
902 	int signalled = 0;
903 
904 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
905 
906 	/*
907 	 * Since aio_req_done() or aio_req_find() use the HASH list to find
908 	 * the required requests, they could potentially take away elements
909 	 * if they are already done (AIO_DONEQ is set).
910 	 * The aio_cleanupq_mutex protects the queue for the duration of the
911 	 * loop from aio_req_done() and aio_req_find().
912 	 */
913 	if ((reqp = qhead) == NULL)
914 		return (0);
915 	do {
916 		ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ);
917 		ASSERT(reqp->aio_req_portkev == NULL);
918 		next = reqp->aio_req_next;
919 		aphysio_unlock(reqp);
920 		mutex_enter(&aiop->aio_mutex);
921 		if (exitflg)
922 			aio_req_free(aiop, reqp);
923 		else
924 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
925 		if (!exitflg) {
926 			if (reqp->aio_req_flags & AIO_SIGNALLED)
927 				signalled++;
928 			else
929 				reqp->aio_req_flags |= AIO_SIGNALLED;
930 		}
931 		mutex_exit(&aiop->aio_mutex);
932 	} while ((reqp = next) != qhead);
933 	return (signalled);
934 }
935 
936 /*
937  * do cleanup for every element of the notify queue.
938  */
939 static int
940 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg)
941 {
942 	aio_req_t *reqp, *next;
943 	aio_lio_t *liohead;
944 	sigqueue_t *sigev, *lio_sigev = NULL;
945 	int signalled = 0;
946 
947 	if ((reqp = qhead) == NULL)
948 		return (0);
949 	do {
950 		ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ);
951 		next = reqp->aio_req_next;
952 		aphysio_unlock(reqp);
953 		if (exitflg) {
954 			mutex_enter(&aiop->aio_mutex);
955 			aio_req_free(aiop, reqp);
956 			mutex_exit(&aiop->aio_mutex);
957 		} else {
958 			mutex_enter(&aiop->aio_mutex);
959 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
960 			sigev = reqp->aio_req_sigqp;
961 			reqp->aio_req_sigqp = NULL;
962 			if ((liohead = reqp->aio_req_lio) != NULL) {
963 				ASSERT(liohead->lio_refcnt > 0);
964 				if (--liohead->lio_refcnt == 0) {
965 					cv_signal(&liohead->lio_notify);
966 					lio_sigev = liohead->lio_sigqp;
967 					liohead->lio_sigqp = NULL;
968 				}
969 			}
970 			mutex_exit(&aiop->aio_mutex);
971 			if (sigev) {
972 				signalled++;
973 				aio_sigev_send(reqp->aio_req_buf.b_proc,
974 				    sigev);
975 			}
976 			if (lio_sigev) {
977 				signalled++;
978 				aio_sigev_send(reqp->aio_req_buf.b_proc,
979 				    lio_sigev);
980 			}
981 		}
982 	} while ((reqp = next) != qhead);
983 
984 	return (signalled);
985 }
986 
987 /*
988  * Do cleanup for every element of the poll queue.
989  */
990 static void
991 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg)
992 {
993 	aio_req_t *reqp, *next;
994 
995 	/*
996 	 * As no other threads should be accessing the queue at this point,
997 	 * it isn't necessary to hold aio_mutex while we traverse its elements.
998 	 */
999 	if ((reqp = qhead) == NULL)
1000 		return;
1001 	do {
1002 		ASSERT(reqp->aio_req_flags & AIO_POLLQ);
1003 		next = reqp->aio_req_next;
1004 		aphysio_unlock(reqp);
1005 		if (exitflg) {
1006 			mutex_enter(&aiop->aio_mutex);
1007 			aio_req_free(aiop, reqp);
1008 			mutex_exit(&aiop->aio_mutex);
1009 		} else {
1010 			aio_copyout_result(reqp);
1011 			mutex_enter(&aiop->aio_mutex);
1012 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
1013 			mutex_exit(&aiop->aio_mutex);
1014 		}
1015 	} while ((reqp = next) != qhead);
1016 }
1017 
1018 /*
1019  * called by exit(). waits for all outstanding kaio to finish
1020  * before the kaio resources are freed.
1021  */
1022 void
1023 aio_cleanup_exit(void)
1024 {
1025 	proc_t *p = curproc;
1026 	aio_t *aiop = p->p_aio;
1027 	aio_req_t *reqp, *next, *head;
1028 	aio_lio_t *nxtlio, *liop;
1029 
1030 	/*
1031 	 * wait for all outstanding kaio to complete. process
1032 	 * is now single-threaded; no other kaio requests can
1033 	 * happen once aio_pending is zero.
1034 	 */
1035 	mutex_enter(&aiop->aio_mutex);
1036 	aiop->aio_flags |= AIO_CLEANUP;
1037 	while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE))
1038 		cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex);
1039 	mutex_exit(&aiop->aio_mutex);
1040 
1041 	/* cleanup the cleanup-thread queues. */
1042 	aio_cleanup(AIO_CLEANUP_EXIT);
1043 
1044 	/*
1045 	 * Although this process is now single-threaded, we
1046 	 * still need to protect ourselves against a race with
1047 	 * aio_cleanup_dr_delete_memory().
1048 	 */
1049 	mutex_enter(&p->p_lock);
1050 
1051 	/*
1052 	 * free up the done queue's resources.
1053 	 */
1054 	if ((head = aiop->aio_doneq) != NULL) {
1055 		aiop->aio_doneq = NULL;
1056 		reqp = head;
1057 		do {
1058 			next = reqp->aio_req_next;
1059 			aphysio_unlock(reqp);
1060 			kmem_free(reqp, sizeof (struct aio_req_t));
1061 		} while ((reqp = next) != head);
1062 	}
1063 	/*
1064 	 * release aio request freelist.
1065 	 */
1066 	for (reqp = aiop->aio_free; reqp != NULL; reqp = next) {
1067 		next = reqp->aio_req_next;
1068 		kmem_free(reqp, sizeof (struct aio_req_t));
1069 	}
1070 
1071 	/*
1072 	 * release io list head freelist.
1073 	 */
1074 	for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) {
1075 		nxtlio = liop->lio_next;
1076 		kmem_free(liop, sizeof (aio_lio_t));
1077 	}
1078 
1079 	if (aiop->aio_iocb)
1080 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
1081 
1082 	mutex_destroy(&aiop->aio_mutex);
1083 	mutex_destroy(&aiop->aio_portq_mutex);
1084 	mutex_destroy(&aiop->aio_cleanupq_mutex);
1085 	p->p_aio = NULL;
1086 	mutex_exit(&p->p_lock);
1087 	kmem_free(aiop, sizeof (struct aio));
1088 }
1089 
1090 /*
1091  * copy out aio request's result to a user-level result_t buffer.
1092  */
1093 void
1094 aio_copyout_result(aio_req_t *reqp)
1095 {
1096 	struct buf	*bp;
1097 	struct iovec	*iov;
1098 	void		*resultp;
1099 	int		error;
1100 	size_t		retval;
1101 
1102 	if (reqp->aio_req_flags & AIO_COPYOUTDONE)
1103 		return;
1104 
1105 	reqp->aio_req_flags |= AIO_COPYOUTDONE;
1106 
1107 	iov = reqp->aio_req_uio.uio_iov;
1108 	bp = &reqp->aio_req_buf;
1109 	/* "resultp" points to user-level result_t buffer */
1110 	resultp = (void *)reqp->aio_req_resultp;
1111 	if (bp->b_flags & B_ERROR) {
1112 		if (bp->b_error)
1113 			error = bp->b_error;
1114 		else
1115 			error = EIO;
1116 		retval = (size_t)-1;
1117 	} else {
1118 		error = 0;
1119 		retval = iov->iov_len - bp->b_resid;
1120 	}
1121 #ifdef	_SYSCALL32_IMPL
1122 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1123 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1124 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1125 	} else {
1126 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1127 		    (int)retval);
1128 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1129 	}
1130 #else
1131 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1132 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1133 #endif
1134 }
1135 
1136 
1137 void
1138 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp)
1139 {
1140 	int errno;
1141 	size_t retval;
1142 
1143 	if (bp->b_flags & B_ERROR) {
1144 		if (bp->b_error)
1145 			errno = bp->b_error;
1146 		else
1147 			errno = EIO;
1148 		retval = (size_t)-1;
1149 	} else {
1150 		errno = 0;
1151 		retval = iov->iov_len - bp->b_resid;
1152 	}
1153 #ifdef	_SYSCALL32_IMPL
1154 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1155 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1156 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1157 	} else {
1158 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1159 		    (int)retval);
1160 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno);
1161 	}
1162 #else
1163 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1164 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1165 #endif
1166 }
1167 
1168 /*
1169  * This function is used to remove a request from the done queue.
1170  */
1171 
1172 void
1173 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp)
1174 {
1175 	ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
1176 	while (aiop->aio_portq == NULL) {
1177 		/*
1178 		 * aio_portq is set to NULL when aio_cleanup_portq()
1179 		 * is working with the event queue.
1180 		 * The aio_cleanup_thread() uses aio_cleanup_portq()
1181 		 * to unlock all AIO buffers with completed transactions.
1182 		 * Wait here until aio_cleanup_portq() restores the
1183 		 * list of completed transactions in aio_portq.
1184 		 */
1185 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1186 	}
1187 	aio_deq(&aiop->aio_portq, reqp);
1188 }
1189 
1190 /* ARGSUSED */
1191 void
1192 aio_close_port(void *arg, int port, pid_t pid, int lastclose)
1193 {
1194 	aio_t		*aiop;
1195 	aio_req_t 	*reqp;
1196 	aio_req_t 	*next;
1197 	aio_req_t	*headp;
1198 	int		counter;
1199 
1200 	if (arg == NULL)
1201 		aiop = curproc->p_aio;
1202 	else
1203 		aiop = (aio_t *)arg;
1204 
1205 	/*
1206 	 * The PORT_SOURCE_AIO source is always associated with every new
1207 	 * created port by default.
1208 	 * If no asynchronous I/O transactions were associated with the port
1209 	 * then the aiop pointer will still be set to NULL.
1210 	 */
1211 	if (aiop == NULL)
1212 		return;
1213 
1214 	/*
1215 	 * Within a process event ports can be used to collect events other
1216 	 * than PORT_SOURCE_AIO events. At the same time the process can submit
1217 	 * asynchronous I/Os transactions which are not associated with the
1218 	 * current port.
1219 	 * The current process oriented model of AIO uses a sigle queue for
1220 	 * pending events. On close the pending queue (queue of asynchronous
1221 	 * I/O transactions using event port notification) must be scanned
1222 	 * to detect and handle pending I/Os using the current port.
1223 	 */
1224 	mutex_enter(&aiop->aio_portq_mutex);
1225 	mutex_enter(&aiop->aio_mutex);
1226 	counter = 0;
1227 	if ((headp = aiop->aio_portpending) != NULL) {
1228 		reqp = headp;
1229 		do {
1230 			if (reqp->aio_req_portkev &&
1231 			    reqp->aio_req_port == port) {
1232 				reqp->aio_req_flags |= AIO_CLOSE_PORT;
1233 				counter++;
1234 			}
1235 		} while ((reqp = reqp->aio_req_next) != headp);
1236 	}
1237 	if (counter == 0) {
1238 		/* no AIOs pending */
1239 		mutex_exit(&aiop->aio_mutex);
1240 		mutex_exit(&aiop->aio_portq_mutex);
1241 		return;
1242 	}
1243 	aiop->aio_portpendcnt += counter;
1244 	mutex_exit(&aiop->aio_mutex);
1245 	while (aiop->aio_portpendcnt)
1246 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1247 
1248 	/*
1249 	 * all pending AIOs are completed.
1250 	 * check port doneq
1251 	 */
1252 	headp = NULL;
1253 	if ((reqp = aiop->aio_portq) != NULL) {
1254 		do {
1255 			next = reqp->aio_req_next;
1256 			if (reqp->aio_req_port == port) {
1257 				/* dequeue request and discard event */
1258 				aio_req_remove_portq(aiop, reqp);
1259 				port_free_event(reqp->aio_req_portkev);
1260 				/* put request in temporary queue */
1261 				reqp->aio_req_next = headp;
1262 				headp = reqp;
1263 			}
1264 		} while ((reqp = next) != aiop->aio_portq);
1265 	}
1266 	mutex_exit(&aiop->aio_portq_mutex);
1267 
1268 	/* headp points to the list of requests to be discarded */
1269 	for (reqp = headp; reqp != NULL; reqp = next) {
1270 		next = reqp->aio_req_next;
1271 		aphysio_unlock(reqp);
1272 		mutex_enter(&aiop->aio_mutex);
1273 		aio_req_free_port(aiop, reqp);
1274 		mutex_exit(&aiop->aio_mutex);
1275 	}
1276 
1277 	if (aiop->aio_flags & AIO_CLEANUP)
1278 		cv_broadcast(&aiop->aio_waitcv);
1279 }
1280 
1281 /*
1282  * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1283  * to kick start the aio_cleanup_thread for the give process to do the
1284  * necessary cleanup.
1285  * This is needed so that delete_memory_thread can obtain writer locks
1286  * on pages that need to be relocated during a dr memory delete operation,
1287  * otherwise a deadly embrace may occur.
1288  */
1289 int
1290 aio_cleanup_dr_delete_memory(proc_t *procp)
1291 {
1292 	struct aio *aiop = procp->p_aio;
1293 	struct as *as = procp->p_as;
1294 	int ret = 0;
1295 
1296 	ASSERT(MUTEX_HELD(&procp->p_lock));
1297 
1298 	mutex_enter(&as->a_contents);
1299 
1300 	if (aiop != NULL) {
1301 		aiop->aio_rqclnup = 1;
1302 		cv_broadcast(&as->a_cv);
1303 		ret = 1;
1304 	}
1305 	mutex_exit(&as->a_contents);
1306 	return (ret);
1307 }
1308