1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/proc.h>
29 #include <sys/file.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cmn_err.h>
34 #include <sys/systm.h>
35 #include <vm/as.h>
36 #include <vm/page.h>
37 #include <sys/uio.h>
38 #include <sys/kmem.h>
39 #include <sys/debug.h>
40 #include <sys/aio_impl.h>
41 #include <sys/epm.h>
42 #include <sys/fs/snode.h>
43 #include <sys/siginfo.h>
44 #include <sys/cpuvar.h>
45 #include <sys/conf.h>
46 #include <sys/sdt.h>
47
48 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *);
49 int aio_done(struct buf *);
50 void aphysio_unlock(aio_req_t *);
51 void aio_cleanup(int);
52 void aio_cleanup_exit(void);
53
54 /*
55 * private functions
56 */
57 static void aio_sigev_send(proc_t *, sigqueue_t *);
58 static void aio_hash_delete(aio_t *, aio_req_t *);
59 static void aio_lio_free(aio_t *, aio_lio_t *);
60 static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int);
61 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int);
62 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int);
63 static void aio_cleanup_portq(aio_t *, aio_req_t *, int);
64
65 /*
66 * async version of physio() that doesn't wait synchronously
67 * for the driver's strategy routine to complete.
68 */
69
70 int
aphysio(int (* strategy)(struct buf *),int (* cancel)(struct buf *),dev_t dev,int rw,void (* mincnt)(struct buf *),struct aio_req * aio)71 aphysio(
72 int (*strategy)(struct buf *),
73 int (*cancel)(struct buf *),
74 dev_t dev,
75 int rw,
76 void (*mincnt)(struct buf *),
77 struct aio_req *aio)
78 {
79 struct uio *uio = aio->aio_uio;
80 aio_req_t *reqp = (aio_req_t *)aio->aio_private;
81 struct buf *bp = &reqp->aio_req_buf;
82 struct iovec *iov;
83 struct as *as;
84 char *a;
85 int error;
86 size_t c;
87 struct page **pplist;
88 struct dev_ops *ops = devopsp[getmajor(dev)];
89
90 if (uio->uio_loffset < 0)
91 return (EINVAL);
92 #ifdef _ILP32
93 /*
94 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
95 * the maximum size that can be supported by the IO subsystem.
96 * XXX this code assumes a D_64BIT driver.
97 */
98 if (uio->uio_loffset > SPEC_MAXOFFSET_T)
99 return (EINVAL);
100 #endif /* _ILP32 */
101
102 if (rw == B_READ) {
103 CPU_STATS_ADD_K(sys, phread, 1);
104 } else {
105 CPU_STATS_ADD_K(sys, phwrite, 1);
106 }
107
108 iov = uio->uio_iov;
109 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
110 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
111
112 bp->b_error = 0;
113 bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw;
114 bp->b_edev = dev;
115 bp->b_dev = cmpdev(dev);
116 bp->b_lblkno = btodt(uio->uio_loffset);
117 bp->b_offset = uio->uio_loffset;
118 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
119 (void *)bp->b_edev, (void **)&bp->b_dip);
120
121 /*
122 * Clustering: Clustering can set the b_iodone, b_forw and
123 * b_proc fields to cluster-specifc values.
124 */
125 if (bp->b_iodone == NULL) {
126 bp->b_iodone = aio_done;
127 /* b_forw points at an aio_req_t structure */
128 bp->b_forw = (struct buf *)reqp;
129 bp->b_proc = curproc;
130 }
131
132 a = bp->b_un.b_addr = iov->iov_base;
133 c = bp->b_bcount = iov->iov_len;
134
135 (*mincnt)(bp);
136 if (bp->b_bcount != iov->iov_len)
137 return (ENOTSUP);
138
139 as = bp->b_proc->p_as;
140
141 error = as_pagelock(as, &pplist, a,
142 c, rw == B_READ? S_WRITE : S_READ);
143 if (error != 0) {
144 bp->b_flags |= B_ERROR;
145 bp->b_error = error;
146 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
147 return (error);
148 }
149 reqp->aio_req_flags |= AIO_PAGELOCKDONE;
150 bp->b_shadow = pplist;
151 if (pplist != NULL) {
152 bp->b_flags |= B_SHADOW;
153 }
154
155 if (cancel != anocancel)
156 cmn_err(CE_PANIC,
157 "aphysio: cancellation not supported, use anocancel");
158
159 reqp->aio_req_cancel = cancel;
160
161 DTRACE_IO1(start, struct buf *, bp);
162
163 return ((*strategy)(bp));
164 }
165
166 /*ARGSUSED*/
167 int
anocancel(struct buf * bp)168 anocancel(struct buf *bp)
169 {
170 return (ENXIO);
171 }
172
173 /*
174 * Called from biodone().
175 * Notify process that a pending AIO has finished.
176 */
177
178 /*
179 * Clustering: This function is made non-static as it is used
180 * by clustering s/w as contract private interface.
181 */
182
183 int
aio_done(struct buf * bp)184 aio_done(struct buf *bp)
185 {
186 proc_t *p;
187 struct as *as;
188 aio_req_t *reqp;
189 aio_lio_t *head = NULL;
190 aio_t *aiop;
191 sigqueue_t *sigev = NULL;
192 sigqueue_t *lio_sigev = NULL;
193 port_kevent_t *pkevp = NULL;
194 port_kevent_t *lio_pkevp = NULL;
195 int fd;
196 int cleanupqflag;
197 int pollqflag;
198 int portevpend;
199 void (*func)();
200 int use_port = 0;
201 int reqp_flags = 0;
202 int send_signal = 0;
203
204 p = bp->b_proc;
205 as = p->p_as;
206 reqp = (aio_req_t *)bp->b_forw;
207 fd = reqp->aio_req_fd;
208
209 /*
210 * mapout earlier so that more kmem is available when aio is
211 * heavily used. bug #1262082
212 */
213 if (bp->b_flags & B_REMAPPED)
214 bp_mapout(bp);
215
216 /* decrement fd's ref count by one, now that aio request is done. */
217 areleasef(fd, P_FINFO(p));
218
219 aiop = p->p_aio;
220 ASSERT(aiop != NULL);
221
222 mutex_enter(&aiop->aio_portq_mutex);
223 mutex_enter(&aiop->aio_mutex);
224 ASSERT(aiop->aio_pending > 0);
225 ASSERT(reqp->aio_req_flags & AIO_PENDING);
226 aiop->aio_pending--;
227 reqp->aio_req_flags &= ~AIO_PENDING;
228 reqp_flags = reqp->aio_req_flags;
229 if ((pkevp = reqp->aio_req_portkev) != NULL) {
230 /* Event port notification is desired for this transaction */
231 if (reqp->aio_req_flags & AIO_CLOSE_PORT) {
232 /*
233 * The port is being closed and it is waiting for
234 * pending asynchronous I/O transactions to complete.
235 */
236 portevpend = --aiop->aio_portpendcnt;
237 aio_deq(&aiop->aio_portpending, reqp);
238 aio_enq(&aiop->aio_portq, reqp, 0);
239 mutex_exit(&aiop->aio_mutex);
240 mutex_exit(&aiop->aio_portq_mutex);
241 port_send_event(pkevp);
242 if (portevpend == 0)
243 cv_broadcast(&aiop->aio_portcv);
244 return (0);
245 }
246
247 if (aiop->aio_flags & AIO_CLEANUP) {
248 /*
249 * aio_cleanup_thread() is waiting for completion of
250 * transactions.
251 */
252 mutex_enter(&as->a_contents);
253 aio_deq(&aiop->aio_portpending, reqp);
254 aio_enq(&aiop->aio_portcleanupq, reqp, 0);
255 cv_signal(&aiop->aio_cleanupcv);
256 mutex_exit(&as->a_contents);
257 mutex_exit(&aiop->aio_mutex);
258 mutex_exit(&aiop->aio_portq_mutex);
259 return (0);
260 }
261
262 aio_deq(&aiop->aio_portpending, reqp);
263 aio_enq(&aiop->aio_portq, reqp, 0);
264
265 use_port = 1;
266 } else {
267 /*
268 * when the AIO_CLEANUP flag is enabled for this
269 * process, or when the AIO_POLL bit is set for
270 * this request, special handling is required.
271 * otherwise the request is put onto the doneq.
272 */
273 cleanupqflag = (aiop->aio_flags & AIO_CLEANUP);
274 pollqflag = (reqp->aio_req_flags & AIO_POLL);
275 if (cleanupqflag | pollqflag) {
276
277 if (cleanupqflag)
278 mutex_enter(&as->a_contents);
279
280 /*
281 * requests with their AIO_POLL bit set are put
282 * on the pollq, requests with sigevent structures
283 * or with listio heads are put on the notifyq, and
284 * the remaining requests don't require any special
285 * cleanup handling, so they're put onto the default
286 * cleanupq.
287 */
288 if (pollqflag)
289 aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ);
290 else if (reqp->aio_req_sigqp || reqp->aio_req_lio)
291 aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ);
292 else
293 aio_enq(&aiop->aio_cleanupq, reqp,
294 AIO_CLEANUPQ);
295
296 if (cleanupqflag) {
297 cv_signal(&aiop->aio_cleanupcv);
298 mutex_exit(&as->a_contents);
299 mutex_exit(&aiop->aio_mutex);
300 mutex_exit(&aiop->aio_portq_mutex);
301 } else {
302 ASSERT(pollqflag);
303 /* block aio_cleanup_exit until we're done */
304 aiop->aio_flags |= AIO_DONE_ACTIVE;
305 mutex_exit(&aiop->aio_mutex);
306 mutex_exit(&aiop->aio_portq_mutex);
307 /*
308 * let the cleanup processing happen from an AST
309 * set an AST on all threads in this process
310 */
311 mutex_enter(&p->p_lock);
312 set_proc_ast(p);
313 mutex_exit(&p->p_lock);
314 mutex_enter(&aiop->aio_mutex);
315 /* wakeup anybody waiting in aiowait() */
316 cv_broadcast(&aiop->aio_waitcv);
317
318 /* wakeup aio_cleanup_exit if needed */
319 if (aiop->aio_flags & AIO_CLEANUP)
320 cv_signal(&aiop->aio_cleanupcv);
321 aiop->aio_flags &= ~AIO_DONE_ACTIVE;
322 mutex_exit(&aiop->aio_mutex);
323 }
324 return (0);
325 }
326
327 /*
328 * save req's sigevent pointer, and check its
329 * value after releasing aio_mutex lock.
330 */
331 sigev = reqp->aio_req_sigqp;
332 reqp->aio_req_sigqp = NULL;
333
334 /* put request on done queue. */
335 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
336 } /* portkevent */
337
338 /*
339 * when list IO notification is enabled, a notification or
340 * signal is sent only when all entries in the list are done.
341 */
342 if ((head = reqp->aio_req_lio) != NULL) {
343 ASSERT(head->lio_refcnt > 0);
344 if (--head->lio_refcnt == 0) {
345 /*
346 * save lio's sigevent pointer, and check
347 * its value after releasing aio_mutex lock.
348 */
349 lio_sigev = head->lio_sigqp;
350 head->lio_sigqp = NULL;
351 cv_signal(&head->lio_notify);
352 if (head->lio_port >= 0 &&
353 (lio_pkevp = head->lio_portkev) != NULL)
354 head->lio_port = -1;
355 }
356 }
357
358 /*
359 * if AIO_WAITN set then
360 * send signal only when we reached the
361 * required amount of IO's finished
362 * or when all IO's are done
363 */
364 if (aiop->aio_flags & AIO_WAITN) {
365 if (aiop->aio_waitncnt > 0)
366 aiop->aio_waitncnt--;
367 if (aiop->aio_pending == 0 ||
368 aiop->aio_waitncnt == 0)
369 cv_broadcast(&aiop->aio_waitcv);
370 } else {
371 cv_broadcast(&aiop->aio_waitcv);
372 }
373
374 /*
375 * No need to set this flag for pollq, portq, lio requests.
376 * If this is an old Solaris aio request, and the process has
377 * a SIGIO signal handler enabled, then send a SIGIO signal.
378 */
379 if (!sigev && !use_port && head == NULL &&
380 (reqp->aio_req_flags & AIO_SOLARIS) &&
381 (func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL &&
382 (func != SIG_IGN)) {
383 send_signal = 1;
384 reqp->aio_req_flags |= AIO_SIGNALLED;
385 }
386
387 mutex_exit(&aiop->aio_mutex);
388 mutex_exit(&aiop->aio_portq_mutex);
389
390 /*
391 * Could the cleanup thread be waiting for AIO with locked
392 * resources to finish?
393 * Ideally in that case cleanup thread should block on cleanupcv,
394 * but there is a window, where it could miss to see a new aio
395 * request that sneaked in.
396 */
397 mutex_enter(&as->a_contents);
398 if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as))
399 cv_broadcast(&as->a_cv);
400 mutex_exit(&as->a_contents);
401
402 if (sigev)
403 aio_sigev_send(p, sigev);
404 else if (send_signal)
405 psignal(p, SIGIO);
406
407 if (pkevp)
408 port_send_event(pkevp);
409 if (lio_sigev)
410 aio_sigev_send(p, lio_sigev);
411 if (lio_pkevp)
412 port_send_event(lio_pkevp);
413
414 return (0);
415 }
416
417 /*
418 * send a queued signal to the specified process when
419 * the event signal is non-NULL. A return value of 1
420 * will indicate that a signal is queued, and 0 means that
421 * no signal was specified, nor sent.
422 */
423 static void
aio_sigev_send(proc_t * p,sigqueue_t * sigev)424 aio_sigev_send(proc_t *p, sigqueue_t *sigev)
425 {
426 ASSERT(sigev != NULL);
427
428 mutex_enter(&p->p_lock);
429 sigaddqa(p, NULL, sigev);
430 mutex_exit(&p->p_lock);
431 }
432
433 /*
434 * special case handling for zero length requests. the aio request
435 * short circuits the normal completion path since all that's required
436 * to complete this request is to copyout a zero to the aio request's
437 * return value.
438 */
439 void
aio_zerolen(aio_req_t * reqp)440 aio_zerolen(aio_req_t *reqp)
441 {
442
443 struct buf *bp = &reqp->aio_req_buf;
444
445 reqp->aio_req_flags |= AIO_ZEROLEN;
446
447 bp->b_forw = (struct buf *)reqp;
448 bp->b_proc = curproc;
449
450 bp->b_resid = 0;
451 bp->b_flags = 0;
452
453 aio_done(bp);
454 }
455
456 /*
457 * unlock pages previously locked by as_pagelock
458 */
459 void
aphysio_unlock(aio_req_t * reqp)460 aphysio_unlock(aio_req_t *reqp)
461 {
462 struct buf *bp;
463 struct iovec *iov;
464 int flags;
465
466 if (reqp->aio_req_flags & AIO_PHYSIODONE)
467 return;
468
469 reqp->aio_req_flags |= AIO_PHYSIODONE;
470
471 if (reqp->aio_req_flags & AIO_ZEROLEN)
472 return;
473
474 bp = &reqp->aio_req_buf;
475 iov = reqp->aio_req_uio.uio_iov;
476 flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ);
477 if (reqp->aio_req_flags & AIO_PAGELOCKDONE) {
478 as_pageunlock(bp->b_proc->p_as,
479 bp->b_flags & B_SHADOW ? bp->b_shadow : NULL,
480 iov->iov_base, iov->iov_len, flags);
481 reqp->aio_req_flags &= ~AIO_PAGELOCKDONE;
482 }
483 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
484 bp->b_flags |= B_DONE;
485 }
486
487 /*
488 * deletes a requests id from the hash table of outstanding io.
489 */
490 static void
aio_hash_delete(aio_t * aiop,struct aio_req_t * reqp)491 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp)
492 {
493 long index;
494 aio_result_t *resultp = reqp->aio_req_resultp;
495 aio_req_t *current;
496 aio_req_t **nextp;
497
498 index = AIO_HASH(resultp);
499 nextp = (aiop->aio_hash + index);
500 while ((current = *nextp) != NULL) {
501 if (current->aio_req_resultp == resultp) {
502 *nextp = current->aio_hash_next;
503 return;
504 }
505 nextp = ¤t->aio_hash_next;
506 }
507 }
508
509 /*
510 * Put a list head struct onto its free list.
511 */
512 static void
aio_lio_free(aio_t * aiop,aio_lio_t * head)513 aio_lio_free(aio_t *aiop, aio_lio_t *head)
514 {
515 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
516
517 if (head->lio_sigqp != NULL)
518 kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
519 head->lio_next = aiop->aio_lio_free;
520 aiop->aio_lio_free = head;
521 }
522
523 /*
524 * Put a reqp onto the freelist.
525 */
526 void
aio_req_free(aio_t * aiop,aio_req_t * reqp)527 aio_req_free(aio_t *aiop, aio_req_t *reqp)
528 {
529 aio_lio_t *liop;
530
531 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
532
533 if (reqp->aio_req_portkev) {
534 port_free_event(reqp->aio_req_portkev);
535 reqp->aio_req_portkev = NULL;
536 }
537
538 if ((liop = reqp->aio_req_lio) != NULL) {
539 if (--liop->lio_nent == 0)
540 aio_lio_free(aiop, liop);
541 reqp->aio_req_lio = NULL;
542 }
543 if (reqp->aio_req_sigqp != NULL) {
544 kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t));
545 reqp->aio_req_sigqp = NULL;
546 }
547 reqp->aio_req_next = aiop->aio_free;
548 reqp->aio_req_prev = NULL;
549 aiop->aio_free = reqp;
550 aiop->aio_outstanding--;
551 if (aiop->aio_outstanding == 0)
552 cv_broadcast(&aiop->aio_waitcv);
553 aio_hash_delete(aiop, reqp);
554 }
555
556 /*
557 * Put a reqp onto the freelist.
558 */
559 void
aio_req_free_port(aio_t * aiop,aio_req_t * reqp)560 aio_req_free_port(aio_t *aiop, aio_req_t *reqp)
561 {
562 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
563
564 reqp->aio_req_next = aiop->aio_free;
565 reqp->aio_req_prev = NULL;
566 aiop->aio_free = reqp;
567 aiop->aio_outstanding--;
568 aio_hash_delete(aiop, reqp);
569 }
570
571
572 /*
573 * Verify the integrity of a queue.
574 */
575 #if defined(DEBUG)
576 static void
aio_verify_queue(aio_req_t * head,aio_req_t * entry_present,aio_req_t * entry_missing)577 aio_verify_queue(aio_req_t *head,
578 aio_req_t *entry_present, aio_req_t *entry_missing)
579 {
580 aio_req_t *reqp;
581 int found = 0;
582 int present = 0;
583
584 if ((reqp = head) != NULL) {
585 do {
586 ASSERT(reqp->aio_req_prev->aio_req_next == reqp);
587 ASSERT(reqp->aio_req_next->aio_req_prev == reqp);
588 if (entry_present == reqp)
589 found++;
590 if (entry_missing == reqp)
591 present++;
592 } while ((reqp = reqp->aio_req_next) != head);
593 }
594 ASSERT(entry_present == NULL || found == 1);
595 ASSERT(entry_missing == NULL || present == 0);
596 }
597 #else
598 #define aio_verify_queue(x, y, z)
599 #endif
600
601 /*
602 * Put a request onto the tail of a queue.
603 */
604 void
aio_enq(aio_req_t ** qhead,aio_req_t * reqp,int qflg_new)605 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new)
606 {
607 aio_req_t *head;
608 aio_req_t *prev;
609
610 aio_verify_queue(*qhead, NULL, reqp);
611
612 if ((head = *qhead) == NULL) {
613 reqp->aio_req_next = reqp;
614 reqp->aio_req_prev = reqp;
615 *qhead = reqp;
616 } else {
617 reqp->aio_req_next = head;
618 reqp->aio_req_prev = prev = head->aio_req_prev;
619 prev->aio_req_next = reqp;
620 head->aio_req_prev = reqp;
621 }
622 reqp->aio_req_flags |= qflg_new;
623 }
624
625 /*
626 * Remove a request from its queue.
627 */
628 void
aio_deq(aio_req_t ** qhead,aio_req_t * reqp)629 aio_deq(aio_req_t **qhead, aio_req_t *reqp)
630 {
631 aio_verify_queue(*qhead, reqp, NULL);
632
633 if (reqp->aio_req_next == reqp) {
634 *qhead = NULL;
635 } else {
636 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
637 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
638 if (*qhead == reqp)
639 *qhead = reqp->aio_req_next;
640 }
641 reqp->aio_req_next = NULL;
642 reqp->aio_req_prev = NULL;
643 }
644
645 /*
646 * concatenate a specified queue with the cleanupq. the specified
647 * queue is put onto the tail of the cleanupq. all elements on the
648 * specified queue should have their aio_req_flags field cleared.
649 */
650 /*ARGSUSED*/
651 void
aio_cleanupq_concat(aio_t * aiop,aio_req_t * q2,int qflg)652 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg)
653 {
654 aio_req_t *cleanupqhead, *q2tail;
655 aio_req_t *reqp = q2;
656
657 do {
658 ASSERT(reqp->aio_req_flags & qflg);
659 reqp->aio_req_flags &= ~qflg;
660 reqp->aio_req_flags |= AIO_CLEANUPQ;
661 } while ((reqp = reqp->aio_req_next) != q2);
662
663 cleanupqhead = aiop->aio_cleanupq;
664 if (cleanupqhead == NULL)
665 aiop->aio_cleanupq = q2;
666 else {
667 cleanupqhead->aio_req_prev->aio_req_next = q2;
668 q2tail = q2->aio_req_prev;
669 q2tail->aio_req_next = cleanupqhead;
670 q2->aio_req_prev = cleanupqhead->aio_req_prev;
671 cleanupqhead->aio_req_prev = q2tail;
672 }
673 }
674
675 /*
676 * cleanup aio requests that are on the per-process poll queue.
677 */
678 void
aio_cleanup(int flag)679 aio_cleanup(int flag)
680 {
681 aio_t *aiop = curproc->p_aio;
682 aio_req_t *pollqhead, *cleanupqhead, *notifyqhead;
683 aio_req_t *cleanupport;
684 aio_req_t *portq = NULL;
685 void (*func)();
686 int signalled = 0;
687 int qflag = 0;
688 int exitflg;
689
690 ASSERT(aiop != NULL);
691
692 if (flag == AIO_CLEANUP_EXIT)
693 exitflg = AIO_CLEANUP_EXIT;
694 else
695 exitflg = 0;
696
697 /*
698 * We need to get the aio_cleanupq_mutex because we are calling
699 * aio_cleanup_cleanupq()
700 */
701 mutex_enter(&aiop->aio_cleanupq_mutex);
702 /*
703 * take all the requests off the cleanupq, the notifyq,
704 * and the pollq.
705 */
706 mutex_enter(&aiop->aio_mutex);
707 if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
708 aiop->aio_cleanupq = NULL;
709 qflag++;
710 }
711 if ((notifyqhead = aiop->aio_notifyq) != NULL) {
712 aiop->aio_notifyq = NULL;
713 qflag++;
714 }
715 if ((pollqhead = aiop->aio_pollq) != NULL) {
716 aiop->aio_pollq = NULL;
717 qflag++;
718 }
719 if (flag) {
720 if ((portq = aiop->aio_portq) != NULL)
721 qflag++;
722
723 if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
724 aiop->aio_portcleanupq = NULL;
725 qflag++;
726 }
727 }
728 mutex_exit(&aiop->aio_mutex);
729
730 /*
731 * return immediately if cleanupq, pollq, and
732 * notifyq are all empty. someone else must have
733 * emptied them.
734 */
735 if (!qflag) {
736 mutex_exit(&aiop->aio_cleanupq_mutex);
737 return;
738 }
739
740 /*
741 * do cleanup for the various queues.
742 */
743 if (cleanupqhead)
744 signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg);
745 mutex_exit(&aiop->aio_cleanupq_mutex);
746 if (notifyqhead)
747 signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg);
748 if (pollqhead)
749 aio_cleanup_pollq(aiop, pollqhead, exitflg);
750 if (flag && (cleanupport || portq))
751 aio_cleanup_portq(aiop, cleanupport, exitflg);
752
753 if (exitflg)
754 return;
755
756 /*
757 * If we have an active aio_cleanup_thread it's possible for
758 * this routine to push something on to the done queue after
759 * an aiowait/aiosuspend thread has already decided to block.
760 * This being the case, we need a cv_broadcast here to wake
761 * these threads up. It is simpler and cleaner to do this
762 * broadcast here than in the individual cleanup routines.
763 */
764
765 mutex_enter(&aiop->aio_mutex);
766 /*
767 * If there has never been an old solaris aio request
768 * issued by this process, then do not send a SIGIO signal.
769 */
770 if (!(aiop->aio_flags & AIO_SOLARIS_REQ))
771 signalled = 1;
772 cv_broadcast(&aiop->aio_waitcv);
773 mutex_exit(&aiop->aio_mutex);
774
775 /*
776 * Only if the process wasn't already signalled,
777 * determine if a SIGIO signal should be delievered.
778 */
779 if (!signalled &&
780 (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL &&
781 func != SIG_IGN)
782 psignal(curproc, SIGIO);
783 }
784
785
786 /*
787 * Do cleanup for every element of the port cleanup queue.
788 */
789 static void
aio_cleanup_portq(aio_t * aiop,aio_req_t * cleanupq,int exitflag)790 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag)
791 {
792 aio_req_t *reqp;
793 aio_req_t *next;
794 aio_req_t *headp;
795 aio_lio_t *liop;
796
797 /* first check the portq */
798 if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) {
799 mutex_enter(&aiop->aio_mutex);
800 if (aiop->aio_flags & AIO_CLEANUP)
801 aiop->aio_flags |= AIO_CLEANUP_PORT;
802 mutex_exit(&aiop->aio_mutex);
803
804 /*
805 * It is not allowed to hold locks during aphysio_unlock().
806 * The aio_done() interrupt function will try to acquire
807 * aio_mutex and aio_portq_mutex. Therefore we disconnect
808 * the portq list from the aiop for the duration of the
809 * aphysio_unlock() loop below.
810 */
811 mutex_enter(&aiop->aio_portq_mutex);
812 headp = aiop->aio_portq;
813 aiop->aio_portq = NULL;
814 mutex_exit(&aiop->aio_portq_mutex);
815 if ((reqp = headp) != NULL) {
816 do {
817 next = reqp->aio_req_next;
818 aphysio_unlock(reqp);
819 if (exitflag) {
820 mutex_enter(&aiop->aio_mutex);
821 aio_req_free(aiop, reqp);
822 mutex_exit(&aiop->aio_mutex);
823 }
824 } while ((reqp = next) != headp);
825 }
826
827 if (headp != NULL && exitflag == 0) {
828 /* move unlocked requests back to the port queue */
829 aio_req_t *newq;
830
831 mutex_enter(&aiop->aio_portq_mutex);
832 if ((newq = aiop->aio_portq) != NULL) {
833 aio_req_t *headprev = headp->aio_req_prev;
834 aio_req_t *newqprev = newq->aio_req_prev;
835
836 headp->aio_req_prev = newqprev;
837 newq->aio_req_prev = headprev;
838 headprev->aio_req_next = newq;
839 newqprev->aio_req_next = headp;
840 }
841 aiop->aio_portq = headp;
842 cv_broadcast(&aiop->aio_portcv);
843 mutex_exit(&aiop->aio_portq_mutex);
844 }
845 }
846
847 /* now check the port cleanup queue */
848 if ((reqp = cleanupq) == NULL)
849 return;
850 do {
851 next = reqp->aio_req_next;
852 aphysio_unlock(reqp);
853 if (exitflag) {
854 mutex_enter(&aiop->aio_mutex);
855 aio_req_free(aiop, reqp);
856 mutex_exit(&aiop->aio_mutex);
857 } else {
858 mutex_enter(&aiop->aio_portq_mutex);
859 aio_enq(&aiop->aio_portq, reqp, 0);
860 mutex_exit(&aiop->aio_portq_mutex);
861 port_send_event(reqp->aio_req_portkev);
862 if ((liop = reqp->aio_req_lio) != NULL) {
863 int send_event = 0;
864
865 mutex_enter(&aiop->aio_mutex);
866 ASSERT(liop->lio_refcnt > 0);
867 if (--liop->lio_refcnt == 0) {
868 if (liop->lio_port >= 0 &&
869 liop->lio_portkev) {
870 liop->lio_port = -1;
871 send_event = 1;
872 }
873 }
874 mutex_exit(&aiop->aio_mutex);
875 if (send_event)
876 port_send_event(liop->lio_portkev);
877 }
878 }
879 } while ((reqp = next) != cleanupq);
880 }
881
882 /*
883 * Do cleanup for every element of the cleanupq.
884 */
885 static int
aio_cleanup_cleanupq(aio_t * aiop,aio_req_t * qhead,int exitflg)886 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg)
887 {
888 aio_req_t *reqp, *next;
889 int signalled = 0;
890
891 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
892
893 /*
894 * Since aio_req_done() or aio_req_find() use the HASH list to find
895 * the required requests, they could potentially take away elements
896 * if they are already done (AIO_DONEQ is set).
897 * The aio_cleanupq_mutex protects the queue for the duration of the
898 * loop from aio_req_done() and aio_req_find().
899 */
900 if ((reqp = qhead) == NULL)
901 return (0);
902 do {
903 ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ);
904 ASSERT(reqp->aio_req_portkev == NULL);
905 next = reqp->aio_req_next;
906 aphysio_unlock(reqp);
907 mutex_enter(&aiop->aio_mutex);
908 if (exitflg)
909 aio_req_free(aiop, reqp);
910 else
911 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
912 if (!exitflg) {
913 if (reqp->aio_req_flags & AIO_SIGNALLED)
914 signalled++;
915 else
916 reqp->aio_req_flags |= AIO_SIGNALLED;
917 }
918 mutex_exit(&aiop->aio_mutex);
919 } while ((reqp = next) != qhead);
920 return (signalled);
921 }
922
923 /*
924 * do cleanup for every element of the notify queue.
925 */
926 static int
aio_cleanup_notifyq(aio_t * aiop,aio_req_t * qhead,int exitflg)927 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg)
928 {
929 aio_req_t *reqp, *next;
930 aio_lio_t *liohead;
931 sigqueue_t *sigev, *lio_sigev = NULL;
932 int signalled = 0;
933
934 if ((reqp = qhead) == NULL)
935 return (0);
936 do {
937 ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ);
938 next = reqp->aio_req_next;
939 aphysio_unlock(reqp);
940 if (exitflg) {
941 mutex_enter(&aiop->aio_mutex);
942 aio_req_free(aiop, reqp);
943 mutex_exit(&aiop->aio_mutex);
944 } else {
945 mutex_enter(&aiop->aio_mutex);
946 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
947 sigev = reqp->aio_req_sigqp;
948 reqp->aio_req_sigqp = NULL;
949 if ((liohead = reqp->aio_req_lio) != NULL) {
950 ASSERT(liohead->lio_refcnt > 0);
951 if (--liohead->lio_refcnt == 0) {
952 cv_signal(&liohead->lio_notify);
953 lio_sigev = liohead->lio_sigqp;
954 liohead->lio_sigqp = NULL;
955 }
956 }
957 mutex_exit(&aiop->aio_mutex);
958 if (sigev) {
959 signalled++;
960 aio_sigev_send(reqp->aio_req_buf.b_proc,
961 sigev);
962 }
963 if (lio_sigev) {
964 signalled++;
965 aio_sigev_send(reqp->aio_req_buf.b_proc,
966 lio_sigev);
967 }
968 }
969 } while ((reqp = next) != qhead);
970
971 return (signalled);
972 }
973
974 /*
975 * Do cleanup for every element of the poll queue.
976 */
977 static void
aio_cleanup_pollq(aio_t * aiop,aio_req_t * qhead,int exitflg)978 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg)
979 {
980 aio_req_t *reqp, *next;
981
982 /*
983 * As no other threads should be accessing the queue at this point,
984 * it isn't necessary to hold aio_mutex while we traverse its elements.
985 */
986 if ((reqp = qhead) == NULL)
987 return;
988 do {
989 ASSERT(reqp->aio_req_flags & AIO_POLLQ);
990 next = reqp->aio_req_next;
991 aphysio_unlock(reqp);
992 if (exitflg) {
993 mutex_enter(&aiop->aio_mutex);
994 aio_req_free(aiop, reqp);
995 mutex_exit(&aiop->aio_mutex);
996 } else {
997 aio_copyout_result(reqp);
998 mutex_enter(&aiop->aio_mutex);
999 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
1000 mutex_exit(&aiop->aio_mutex);
1001 }
1002 } while ((reqp = next) != qhead);
1003 }
1004
1005 /*
1006 * called by exit(). waits for all outstanding kaio to finish
1007 * before the kaio resources are freed.
1008 */
1009 void
aio_cleanup_exit(void)1010 aio_cleanup_exit(void)
1011 {
1012 proc_t *p = curproc;
1013 aio_t *aiop = p->p_aio;
1014 aio_req_t *reqp, *next, *head;
1015 aio_lio_t *nxtlio, *liop;
1016
1017 /*
1018 * wait for all outstanding kaio to complete. process
1019 * is now single-threaded; no other kaio requests can
1020 * happen once aio_pending is zero.
1021 */
1022 mutex_enter(&aiop->aio_mutex);
1023 aiop->aio_flags |= AIO_CLEANUP;
1024 while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE))
1025 cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex);
1026 mutex_exit(&aiop->aio_mutex);
1027
1028 /* cleanup the cleanup-thread queues. */
1029 aio_cleanup(AIO_CLEANUP_EXIT);
1030
1031 /*
1032 * Although this process is now single-threaded, we
1033 * still need to protect ourselves against a race with
1034 * aio_cleanup_dr_delete_memory().
1035 */
1036 mutex_enter(&p->p_lock);
1037
1038 /*
1039 * free up the done queue's resources.
1040 */
1041 if ((head = aiop->aio_doneq) != NULL) {
1042 aiop->aio_doneq = NULL;
1043 reqp = head;
1044 do {
1045 next = reqp->aio_req_next;
1046 aphysio_unlock(reqp);
1047 kmem_free(reqp, sizeof (struct aio_req_t));
1048 } while ((reqp = next) != head);
1049 }
1050 /*
1051 * release aio request freelist.
1052 */
1053 for (reqp = aiop->aio_free; reqp != NULL; reqp = next) {
1054 next = reqp->aio_req_next;
1055 kmem_free(reqp, sizeof (struct aio_req_t));
1056 }
1057
1058 /*
1059 * release io list head freelist.
1060 */
1061 for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) {
1062 nxtlio = liop->lio_next;
1063 kmem_free(liop, sizeof (aio_lio_t));
1064 }
1065
1066 if (aiop->aio_iocb)
1067 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
1068
1069 mutex_destroy(&aiop->aio_mutex);
1070 mutex_destroy(&aiop->aio_portq_mutex);
1071 mutex_destroy(&aiop->aio_cleanupq_mutex);
1072 p->p_aio = NULL;
1073 mutex_exit(&p->p_lock);
1074 kmem_free(aiop, sizeof (struct aio));
1075 }
1076
1077 /*
1078 * copy out aio request's result to a user-level result_t buffer.
1079 */
1080 void
aio_copyout_result(aio_req_t * reqp)1081 aio_copyout_result(aio_req_t *reqp)
1082 {
1083 struct buf *bp;
1084 struct iovec *iov;
1085 void *resultp;
1086 int error;
1087 size_t retval;
1088
1089 if (reqp->aio_req_flags & AIO_COPYOUTDONE)
1090 return;
1091
1092 reqp->aio_req_flags |= AIO_COPYOUTDONE;
1093
1094 iov = reqp->aio_req_uio.uio_iov;
1095 bp = &reqp->aio_req_buf;
1096 /* "resultp" points to user-level result_t buffer */
1097 resultp = (void *)reqp->aio_req_resultp;
1098 if (bp->b_flags & B_ERROR) {
1099 if (bp->b_error)
1100 error = bp->b_error;
1101 else
1102 error = EIO;
1103 retval = (size_t)-1;
1104 } else {
1105 error = 0;
1106 retval = iov->iov_len - bp->b_resid;
1107 }
1108 #ifdef _SYSCALL32_IMPL
1109 if (get_udatamodel() == DATAMODEL_NATIVE) {
1110 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1111 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1112 } else {
1113 (void) suword32(&((aio_result32_t *)resultp)->aio_return,
1114 (int)retval);
1115 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1116 }
1117 #else
1118 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1119 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1120 #endif
1121 }
1122
1123
1124 void
aio_copyout_result_port(struct iovec * iov,struct buf * bp,void * resultp)1125 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp)
1126 {
1127 int errno;
1128 size_t retval;
1129
1130 if (bp->b_flags & B_ERROR) {
1131 if (bp->b_error)
1132 errno = bp->b_error;
1133 else
1134 errno = EIO;
1135 retval = (size_t)-1;
1136 } else {
1137 errno = 0;
1138 retval = iov->iov_len - bp->b_resid;
1139 }
1140 #ifdef _SYSCALL32_IMPL
1141 if (get_udatamodel() == DATAMODEL_NATIVE) {
1142 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1143 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1144 } else {
1145 (void) suword32(&((aio_result32_t *)resultp)->aio_return,
1146 (int)retval);
1147 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno);
1148 }
1149 #else
1150 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1151 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1152 #endif
1153 }
1154
1155 /*
1156 * This function is used to remove a request from the done queue.
1157 */
1158
1159 void
aio_req_remove_portq(aio_t * aiop,aio_req_t * reqp)1160 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp)
1161 {
1162 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
1163 while (aiop->aio_portq == NULL) {
1164 /*
1165 * aio_portq is set to NULL when aio_cleanup_portq()
1166 * is working with the event queue.
1167 * The aio_cleanup_thread() uses aio_cleanup_portq()
1168 * to unlock all AIO buffers with completed transactions.
1169 * Wait here until aio_cleanup_portq() restores the
1170 * list of completed transactions in aio_portq.
1171 */
1172 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1173 }
1174 aio_deq(&aiop->aio_portq, reqp);
1175 }
1176
1177 /* ARGSUSED */
1178 void
aio_close_port(void * arg,int port,pid_t pid,int lastclose)1179 aio_close_port(void *arg, int port, pid_t pid, int lastclose)
1180 {
1181 aio_t *aiop;
1182 aio_req_t *reqp;
1183 aio_req_t *next;
1184 aio_req_t *headp;
1185 int counter;
1186
1187 if (arg == NULL)
1188 aiop = curproc->p_aio;
1189 else
1190 aiop = (aio_t *)arg;
1191
1192 /*
1193 * The PORT_SOURCE_AIO source is always associated with every new
1194 * created port by default.
1195 * If no asynchronous I/O transactions were associated with the port
1196 * then the aiop pointer will still be set to NULL.
1197 */
1198 if (aiop == NULL)
1199 return;
1200
1201 /*
1202 * Within a process event ports can be used to collect events other
1203 * than PORT_SOURCE_AIO events. At the same time the process can submit
1204 * asynchronous I/Os transactions which are not associated with the
1205 * current port.
1206 * The current process oriented model of AIO uses a sigle queue for
1207 * pending events. On close the pending queue (queue of asynchronous
1208 * I/O transactions using event port notification) must be scanned
1209 * to detect and handle pending I/Os using the current port.
1210 */
1211 mutex_enter(&aiop->aio_portq_mutex);
1212 mutex_enter(&aiop->aio_mutex);
1213 counter = 0;
1214 if ((headp = aiop->aio_portpending) != NULL) {
1215 reqp = headp;
1216 do {
1217 if (reqp->aio_req_portkev &&
1218 reqp->aio_req_port == port) {
1219 reqp->aio_req_flags |= AIO_CLOSE_PORT;
1220 counter++;
1221 }
1222 } while ((reqp = reqp->aio_req_next) != headp);
1223 }
1224 if (counter == 0) {
1225 /* no AIOs pending */
1226 mutex_exit(&aiop->aio_mutex);
1227 mutex_exit(&aiop->aio_portq_mutex);
1228 return;
1229 }
1230 aiop->aio_portpendcnt += counter;
1231 mutex_exit(&aiop->aio_mutex);
1232 while (aiop->aio_portpendcnt)
1233 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1234
1235 /*
1236 * all pending AIOs are completed.
1237 * check port doneq
1238 */
1239 headp = NULL;
1240 if ((reqp = aiop->aio_portq) != NULL) {
1241 do {
1242 next = reqp->aio_req_next;
1243 if (reqp->aio_req_port == port) {
1244 /* dequeue request and discard event */
1245 aio_req_remove_portq(aiop, reqp);
1246 port_free_event(reqp->aio_req_portkev);
1247 /* put request in temporary queue */
1248 reqp->aio_req_next = headp;
1249 headp = reqp;
1250 }
1251 } while ((reqp = next) != aiop->aio_portq);
1252 }
1253 mutex_exit(&aiop->aio_portq_mutex);
1254
1255 /* headp points to the list of requests to be discarded */
1256 for (reqp = headp; reqp != NULL; reqp = next) {
1257 next = reqp->aio_req_next;
1258 aphysio_unlock(reqp);
1259 mutex_enter(&aiop->aio_mutex);
1260 aio_req_free_port(aiop, reqp);
1261 mutex_exit(&aiop->aio_mutex);
1262 }
1263
1264 if (aiop->aio_flags & AIO_CLEANUP)
1265 cv_broadcast(&aiop->aio_waitcv);
1266 }
1267
1268 /*
1269 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1270 * to kick start the aio_cleanup_thread for the give process to do the
1271 * necessary cleanup.
1272 * This is needed so that delete_memory_thread can obtain writer locks
1273 * on pages that need to be relocated during a dr memory delete operation,
1274 * otherwise a deadly embrace may occur.
1275 */
1276 int
aio_cleanup_dr_delete_memory(proc_t * procp)1277 aio_cleanup_dr_delete_memory(proc_t *procp)
1278 {
1279 struct aio *aiop = procp->p_aio;
1280 struct as *as = procp->p_as;
1281 int ret = 0;
1282
1283 ASSERT(MUTEX_HELD(&procp->p_lock));
1284
1285 mutex_enter(&as->a_contents);
1286
1287 if (aiop != NULL) {
1288 aiop->aio_rqclnup = 1;
1289 cv_broadcast(&as->a_cv);
1290 ret = 1;
1291 }
1292 mutex_exit(&as->a_contents);
1293 return (ret);
1294 }
1295