1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Kernel asynchronous I/O. 31 * This is only for raw devices now (as of Nov. 1993). 32 */ 33 34 #include <sys/types.h> 35 #include <sys/errno.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/fs/snode.h> 39 #include <sys/unistd.h> 40 #include <sys/cmn_err.h> 41 #include <vm/as.h> 42 #include <vm/faultcode.h> 43 #include <sys/sysmacros.h> 44 #include <sys/procfs.h> 45 #include <sys/kmem.h> 46 #include <sys/autoconf.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/sunddi.h> 49 #include <sys/aio_impl.h> 50 #include <sys/debug.h> 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/vmsystm.h> 54 #include <sys/fs/pxfs_ki.h> 55 #include <sys/contract/process_impl.h> 56 57 /* 58 * external entry point. 59 */ 60 #ifdef _LP64 61 static int64_t kaioc(long, long, long, long, long, long); 62 #endif 63 static int kaio(ulong_t *, rval_t *); 64 65 66 #define AIO_64 0 67 #define AIO_32 1 68 #define AIO_LARGEFILE 2 69 70 /* 71 * implementation specific functions (private) 72 */ 73 #ifdef _LP64 74 static int alio(int, aiocb_t **, int, struct sigevent *); 75 #endif 76 static int aionotify(void); 77 static int aioinit(void); 78 static int aiostart(void); 79 static void alio_cleanup(aio_t *, aiocb_t **, int, int); 80 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 81 cred_t *); 82 static void lio_set_error(aio_req_t *, int portused); 83 static aio_t *aio_aiop_alloc(); 84 static int aio_req_alloc(aio_req_t **, aio_result_t *); 85 static int aio_lio_alloc(aio_lio_t **); 86 static aio_req_t *aio_req_done(void *); 87 static aio_req_t *aio_req_remove(aio_req_t *); 88 static int aio_req_find(aio_result_t *, aio_req_t **); 89 static int aio_hash_insert(struct aio_req_t *, aio_t *); 90 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 91 aio_result_t *, vnode_t *); 92 static int aio_cleanup_thread(aio_t *); 93 static aio_lio_t *aio_list_get(aio_result_t *); 94 static void lio_set_uerror(void *, int); 95 extern void aio_zerolen(aio_req_t *); 96 static int aiowait(struct timeval *, int, long *); 97 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 98 static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 99 aio_req_t *reqlist, aio_t *aiop, model_t model); 100 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 101 static int aiosuspend(void *, int, struct timespec *, int, 102 long *, int); 103 static int aliowait(int, void *, int, void *, int); 104 static int aioerror(void *, int); 105 static int aio_cancel(int, void *, long *, int); 106 static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 107 static int aiorw(int, void *, int, int); 108 109 static int alioLF(int, void *, int, void *); 110 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 111 aio_result_t *, vnode_t *); 112 static int alio32(int, void *, int, void *); 113 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 114 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 115 116 #ifdef _SYSCALL32_IMPL 117 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 118 void aiocb_32ton(aiocb32_t *, aiocb_t *); 119 #endif /* _SYSCALL32_IMPL */ 120 121 /* 122 * implementation specific functions (external) 123 */ 124 void aio_req_free(aio_t *, aio_req_t *); 125 126 /* 127 * Event Port framework 128 */ 129 130 void aio_req_free_port(aio_t *, aio_req_t *); 131 static int aio_port_callback(void *, int *, pid_t, int, void *); 132 133 /* 134 * This is the loadable module wrapper. 135 */ 136 #include <sys/modctl.h> 137 #include <sys/syscall.h> 138 139 #ifdef _LP64 140 141 static struct sysent kaio_sysent = { 142 6, 143 SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 144 (int (*)())kaioc 145 }; 146 147 #ifdef _SYSCALL32_IMPL 148 static struct sysent kaio_sysent32 = { 149 7, 150 SE_NOUNLOAD | SE_64RVAL, 151 kaio 152 }; 153 #endif /* _SYSCALL32_IMPL */ 154 155 #else /* _LP64 */ 156 157 static struct sysent kaio_sysent = { 158 7, 159 SE_NOUNLOAD | SE_32RVAL1, 160 kaio 161 }; 162 163 #endif /* _LP64 */ 164 165 /* 166 * Module linkage information for the kernel. 167 */ 168 169 static struct modlsys modlsys = { 170 &mod_syscallops, 171 "kernel Async I/O", 172 &kaio_sysent 173 }; 174 175 #ifdef _SYSCALL32_IMPL 176 static struct modlsys modlsys32 = { 177 &mod_syscallops32, 178 "kernel Async I/O for 32 bit compatibility", 179 &kaio_sysent32 180 }; 181 #endif /* _SYSCALL32_IMPL */ 182 183 184 static struct modlinkage modlinkage = { 185 MODREV_1, 186 &modlsys, 187 #ifdef _SYSCALL32_IMPL 188 &modlsys32, 189 #endif 190 NULL 191 }; 192 193 int 194 _init(void) 195 { 196 int retval; 197 198 if ((retval = mod_install(&modlinkage)) != 0) 199 return (retval); 200 201 return (0); 202 } 203 204 int 205 _fini(void) 206 { 207 int retval; 208 209 retval = mod_remove(&modlinkage); 210 211 return (retval); 212 } 213 214 int 215 _info(struct modinfo *modinfop) 216 { 217 return (mod_info(&modlinkage, modinfop)); 218 } 219 220 #ifdef _LP64 221 static int64_t 222 kaioc( 223 long a0, 224 long a1, 225 long a2, 226 long a3, 227 long a4, 228 long a5) 229 { 230 int error; 231 long rval = 0; 232 233 switch ((int)a0 & ~AIO_POLL_BIT) { 234 case AIOREAD: 235 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 236 (offset_t)a4, (aio_result_t *)a5, FREAD); 237 break; 238 case AIOWRITE: 239 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 240 (offset_t)a4, (aio_result_t *)a5, FWRITE); 241 break; 242 case AIOWAIT: 243 error = aiowait((struct timeval *)a1, (int)a2, &rval); 244 break; 245 case AIOWAITN: 246 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 247 (timespec_t *)a4); 248 break; 249 case AIONOTIFY: 250 error = aionotify(); 251 break; 252 case AIOINIT: 253 error = aioinit(); 254 break; 255 case AIOSTART: 256 error = aiostart(); 257 break; 258 case AIOLIO: 259 error = alio((int)a1, (aiocb_t **)a2, (int)a3, 260 (struct sigevent *)a4); 261 break; 262 case AIOLIOWAIT: 263 error = aliowait((int)a1, (void *)a2, (int)a3, 264 (struct sigevent *)a4, AIO_64); 265 break; 266 case AIOSUSPEND: 267 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 268 (int)a4, &rval, AIO_64); 269 break; 270 case AIOERROR: 271 error = aioerror((void *)a1, AIO_64); 272 break; 273 case AIOAREAD: 274 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 275 break; 276 case AIOAWRITE: 277 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 278 break; 279 case AIOCANCEL: 280 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 281 break; 282 283 /* 284 * The large file related stuff is valid only for 285 * 32 bit kernel and not for 64 bit kernel 286 * On 64 bit kernel we convert large file calls 287 * to regular 64bit calls. 288 */ 289 290 default: 291 error = EINVAL; 292 } 293 if (error) 294 return ((int64_t)set_errno(error)); 295 return (rval); 296 } 297 #endif 298 299 static int 300 kaio( 301 ulong_t *uap, 302 rval_t *rvp) 303 { 304 long rval = 0; 305 int error = 0; 306 offset_t off; 307 308 309 rvp->r_vals = 0; 310 #if defined(_LITTLE_ENDIAN) 311 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 312 #else 313 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 314 #endif 315 316 switch (uap[0] & ~AIO_POLL_BIT) { 317 /* 318 * It must be the 32 bit system call on 64 bit kernel 319 */ 320 case AIOREAD: 321 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 322 (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 323 case AIOWRITE: 324 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 325 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 326 case AIOWAIT: 327 error = aiowait((struct timeval *)uap[1], (int)uap[2], 328 &rval); 329 break; 330 case AIOWAITN: 331 error = aiowaitn((void *)uap[1], (uint_t)uap[2], 332 (uint_t *)uap[3], (timespec_t *)uap[4]); 333 break; 334 case AIONOTIFY: 335 return (aionotify()); 336 case AIOINIT: 337 return (aioinit()); 338 case AIOSTART: 339 return (aiostart()); 340 case AIOLIO: 341 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 342 (void *)uap[4])); 343 case AIOLIOWAIT: 344 return (aliowait((int)uap[1], (void *)uap[2], 345 (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 346 case AIOSUSPEND: 347 error = aiosuspend((void *)uap[1], (int)uap[2], 348 (timespec_t *)uap[3], (int)uap[4], 349 &rval, AIO_32); 350 break; 351 case AIOERROR: 352 return (aioerror((void *)uap[1], AIO_32)); 353 case AIOAREAD: 354 return (aiorw((int)uap[0], (void *)uap[1], 355 FREAD, AIO_32)); 356 case AIOAWRITE: 357 return (aiorw((int)uap[0], (void *)uap[1], 358 FWRITE, AIO_32)); 359 case AIOCANCEL: 360 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 361 AIO_32)); 362 break; 363 case AIOLIO64: 364 return (alioLF((int)uap[1], (void *)uap[2], 365 (int)uap[3], (void *)uap[4])); 366 case AIOLIOWAIT64: 367 return (aliowait(uap[1], (void *)uap[2], 368 (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 369 case AIOSUSPEND64: 370 error = aiosuspend((void *)uap[1], (int)uap[2], 371 (timespec_t *)uap[3], (int)uap[4], &rval, 372 AIO_LARGEFILE); 373 break; 374 case AIOERROR64: 375 return (aioerror((void *)uap[1], AIO_LARGEFILE)); 376 case AIOAREAD64: 377 return (aiorw((int)uap[0], (void *)uap[1], FREAD, 378 AIO_LARGEFILE)); 379 case AIOAWRITE64: 380 return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 381 AIO_LARGEFILE)); 382 case AIOCANCEL64: 383 error = (aio_cancel((int)uap[1], (void *)uap[2], 384 &rval, AIO_LARGEFILE)); 385 break; 386 default: 387 return (EINVAL); 388 } 389 390 rvp->r_val1 = rval; 391 return (error); 392 } 393 394 /* 395 * wake up LWPs in this process that are sleeping in 396 * aiowait(). 397 */ 398 static int 399 aionotify(void) 400 { 401 aio_t *aiop; 402 403 aiop = curproc->p_aio; 404 if (aiop == NULL) 405 return (0); 406 407 mutex_enter(&aiop->aio_mutex); 408 aiop->aio_notifycnt++; 409 cv_broadcast(&aiop->aio_waitcv); 410 mutex_exit(&aiop->aio_mutex); 411 412 return (0); 413 } 414 415 static int 416 timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 417 timestruc_t **rqtp, int *blocking) 418 { 419 #ifdef _SYSCALL32_IMPL 420 struct timeval32 wait_time_32; 421 #endif 422 struct timeval wait_time; 423 model_t model = get_udatamodel(); 424 425 *rqtp = NULL; 426 if (timout == NULL) { /* wait indefinitely */ 427 *blocking = 1; 428 return (0); 429 } 430 431 /* 432 * Need to correctly compare with the -1 passed in for a user 433 * address pointer, with both 32 bit and 64 bit apps. 434 */ 435 if (model == DATAMODEL_NATIVE) { 436 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 437 *blocking = 0; 438 return (0); 439 } 440 441 if (copyin(timout, &wait_time, sizeof (wait_time))) 442 return (EFAULT); 443 } 444 #ifdef _SYSCALL32_IMPL 445 else { 446 /* 447 * -1 from a 32bit app. It will not get sign extended. 448 * don't wait if -1. 449 */ 450 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 451 *blocking = 0; 452 return (0); 453 } 454 455 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 456 return (EFAULT); 457 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 458 } 459 #endif /* _SYSCALL32_IMPL */ 460 461 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 462 *blocking = 0; 463 return (0); 464 } 465 466 if (wait_time.tv_sec < 0 || 467 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 468 return (EINVAL); 469 470 rqtime->tv_sec = wait_time.tv_sec; 471 rqtime->tv_nsec = wait_time.tv_usec * 1000; 472 *rqtp = rqtime; 473 *blocking = 1; 474 475 return (0); 476 } 477 478 static int 479 timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 480 timestruc_t **rqtp, int *blocking) 481 { 482 #ifdef _SYSCALL32_IMPL 483 timespec32_t wait_time_32; 484 #endif 485 model_t model = get_udatamodel(); 486 487 *rqtp = NULL; 488 if (timout == NULL) { 489 *blocking = 1; 490 return (0); 491 } 492 493 if (model == DATAMODEL_NATIVE) { 494 if (copyin(timout, rqtime, sizeof (*rqtime))) 495 return (EFAULT); 496 } 497 #ifdef _SYSCALL32_IMPL 498 else { 499 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 500 return (EFAULT); 501 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 502 } 503 #endif /* _SYSCALL32_IMPL */ 504 505 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 506 *blocking = 0; 507 return (0); 508 } 509 510 if (rqtime->tv_sec < 0 || 511 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 512 return (EINVAL); 513 514 *rqtp = rqtime; 515 *blocking = 1; 516 517 return (0); 518 } 519 520 /*ARGSUSED*/ 521 static int 522 aiowait( 523 struct timeval *timout, 524 int dontblockflg, 525 long *rval) 526 { 527 int error; 528 aio_t *aiop; 529 aio_req_t *reqp; 530 clock_t status; 531 int blocking; 532 int timecheck; 533 timestruc_t rqtime; 534 timestruc_t *rqtp; 535 536 aiop = curproc->p_aio; 537 if (aiop == NULL) 538 return (EINVAL); 539 540 /* 541 * Establish the absolute future time for the timeout. 542 */ 543 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 544 if (error) 545 return (error); 546 if (rqtp) { 547 timestruc_t now; 548 timecheck = timechanged; 549 gethrestime(&now); 550 timespecadd(rqtp, &now); 551 } 552 553 mutex_enter(&aiop->aio_mutex); 554 for (;;) { 555 /* process requests on poll queue */ 556 if (aiop->aio_pollq) { 557 mutex_exit(&aiop->aio_mutex); 558 aio_cleanup(0); 559 mutex_enter(&aiop->aio_mutex); 560 } 561 if ((reqp = aio_req_remove(NULL)) != NULL) { 562 *rval = (long)reqp->aio_req_resultp; 563 break; 564 } 565 /* user-level done queue might not be empty */ 566 if (aiop->aio_notifycnt > 0) { 567 aiop->aio_notifycnt--; 568 *rval = 1; 569 break; 570 } 571 /* don't block if no outstanding aio */ 572 if (aiop->aio_outstanding == 0 && dontblockflg) { 573 error = EINVAL; 574 break; 575 } 576 if (blocking) { 577 status = cv_waituntil_sig(&aiop->aio_waitcv, 578 &aiop->aio_mutex, rqtp, timecheck); 579 580 if (status > 0) /* check done queue again */ 581 continue; 582 if (status == 0) { /* interrupted by a signal */ 583 error = EINTR; 584 *rval = -1; 585 } else { /* timer expired */ 586 error = ETIME; 587 } 588 } 589 break; 590 } 591 mutex_exit(&aiop->aio_mutex); 592 if (reqp) { 593 aphysio_unlock(reqp); 594 aio_copyout_result(reqp); 595 mutex_enter(&aiop->aio_mutex); 596 aio_req_free(aiop, reqp); 597 mutex_exit(&aiop->aio_mutex); 598 } 599 return (error); 600 } 601 602 /* 603 * aiowaitn can be used to reap completed asynchronous requests submitted with 604 * lio_listio, aio_read or aio_write. 605 * This function only reaps asynchronous raw I/Os. 606 */ 607 608 /*ARGSUSED*/ 609 static int 610 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 611 { 612 int error = 0; 613 aio_t *aiop; 614 aio_req_t *reqlist = NULL; 615 caddr_t iocblist = NULL; /* array of iocb ptr's */ 616 uint_t waitcnt, cnt = 0; /* iocb cnt */ 617 size_t iocbsz; /* users iocb size */ 618 size_t riocbsz; /* returned iocb size */ 619 int iocb_index = 0; 620 model_t model = get_udatamodel(); 621 int blocking = 1; 622 int timecheck; 623 timestruc_t rqtime; 624 timestruc_t *rqtp; 625 626 aiop = curproc->p_aio; 627 628 if (aiop == NULL || aiop->aio_outstanding == 0) 629 return (EAGAIN); 630 631 if (copyin(nwait, &waitcnt, sizeof (uint_t))) 632 return (EFAULT); 633 634 /* set *nwait to zero, if we must return prematurely */ 635 if (copyout(&cnt, nwait, sizeof (uint_t))) 636 return (EFAULT); 637 638 if (waitcnt == 0) { 639 blocking = 0; 640 rqtp = NULL; 641 waitcnt = nent; 642 } else { 643 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 644 if (error) 645 return (error); 646 } 647 648 if (model == DATAMODEL_NATIVE) 649 iocbsz = (sizeof (aiocb_t *) * nent); 650 #ifdef _SYSCALL32_IMPL 651 else 652 iocbsz = (sizeof (caddr32_t) * nent); 653 #endif /* _SYSCALL32_IMPL */ 654 655 /* 656 * Only one aio_waitn call is allowed at a time. 657 * The active aio_waitn will collect all requests 658 * out of the "done" list and if necessary it will wait 659 * for some/all pending requests to fulfill the nwait 660 * parameter. 661 * A second or further aio_waitn calls will sleep here 662 * until the active aio_waitn finishes and leaves the kernel 663 * If the second call does not block (poll), then return 664 * immediately with the error code : EAGAIN. 665 * If the second call should block, then sleep here, but 666 * do not touch the timeout. The timeout starts when this 667 * aio_waitn-call becomes active. 668 */ 669 670 mutex_enter(&aiop->aio_mutex); 671 672 while (aiop->aio_flags & AIO_WAITN) { 673 if (blocking == 0) { 674 mutex_exit(&aiop->aio_mutex); 675 return (EAGAIN); 676 } 677 678 /* block, no timeout */ 679 aiop->aio_flags |= AIO_WAITN_PENDING; 680 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 681 mutex_exit(&aiop->aio_mutex); 682 return (EINTR); 683 } 684 } 685 686 /* 687 * Establish the absolute future time for the timeout. 688 */ 689 if (rqtp) { 690 timestruc_t now; 691 timecheck = timechanged; 692 gethrestime(&now); 693 timespecadd(rqtp, &now); 694 } 695 696 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 697 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 698 aiop->aio_iocb = NULL; 699 } 700 701 if (aiop->aio_iocb == NULL) { 702 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 703 if (iocblist == NULL) { 704 mutex_exit(&aiop->aio_mutex); 705 return (ENOMEM); 706 } 707 aiop->aio_iocb = (aiocb_t **)iocblist; 708 aiop->aio_iocbsz = iocbsz; 709 } else { 710 iocblist = (char *)aiop->aio_iocb; 711 } 712 713 aiop->aio_waitncnt = waitcnt; 714 aiop->aio_flags |= AIO_WAITN; 715 716 for (;;) { 717 /* push requests on poll queue to done queue */ 718 if (aiop->aio_pollq) { 719 mutex_exit(&aiop->aio_mutex); 720 aio_cleanup(0); 721 mutex_enter(&aiop->aio_mutex); 722 } 723 724 /* check for requests on done queue */ 725 if (aiop->aio_doneq) { 726 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 727 aiop->aio_waitncnt = waitcnt - cnt; 728 } 729 730 /* user-level done queue might not be empty */ 731 if (aiop->aio_notifycnt > 0) { 732 aiop->aio_notifycnt--; 733 error = 0; 734 break; 735 } 736 737 /* 738 * if we are here second time as a result of timer 739 * expiration, we reset error if there are enough 740 * aiocb's to satisfy request. 741 * We return also if all requests are already done 742 * and we picked up the whole done queue. 743 */ 744 745 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 746 aiop->aio_doneq == NULL)) { 747 error = 0; 748 break; 749 } 750 751 if ((cnt < waitcnt) && blocking) { 752 int rval = cv_waituntil_sig(&aiop->aio_waitcv, 753 &aiop->aio_mutex, rqtp, timecheck); 754 if (rval > 0) 755 continue; 756 if (rval < 0) { 757 error = ETIME; 758 blocking = 0; 759 continue; 760 } 761 error = EINTR; 762 } 763 break; 764 } 765 766 mutex_exit(&aiop->aio_mutex); 767 768 if (cnt > 0) { 769 770 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 771 aiop, model); 772 773 if (model == DATAMODEL_NATIVE) 774 riocbsz = (sizeof (aiocb_t *) * cnt); 775 #ifdef _SYSCALL32_IMPL 776 else 777 riocbsz = (sizeof (caddr32_t) * cnt); 778 #endif /* _SYSCALL32_IMPL */ 779 780 if (copyout(iocblist, uiocb, riocbsz) || 781 copyout(&cnt, nwait, sizeof (uint_t))) 782 error = EFAULT; 783 } 784 785 if (aiop->aio_iocbsz > AIO_IOCB_MAX) { 786 kmem_free(iocblist, aiop->aio_iocbsz); 787 aiop->aio_iocb = NULL; 788 } 789 790 /* check if there is another thread waiting for execution */ 791 mutex_enter(&aiop->aio_mutex); 792 aiop->aio_flags &= ~AIO_WAITN; 793 if (aiop->aio_flags & AIO_WAITN_PENDING) { 794 aiop->aio_flags &= ~AIO_WAITN_PENDING; 795 cv_signal(&aiop->aio_waitncv); 796 } 797 mutex_exit(&aiop->aio_mutex); 798 799 return (error); 800 } 801 802 /* 803 * aio_unlock_requests 804 * copyouts the result of the request as well as the return value. 805 * It builds the list of completed asynchronous requests, 806 * unlocks the allocated memory ranges and 807 * put the aio request structure back into the free list. 808 */ 809 810 static int 811 aio_unlock_requests( 812 caddr_t iocblist, 813 int iocb_index, 814 aio_req_t *reqlist, 815 aio_t *aiop, 816 model_t model) 817 { 818 aio_req_t *reqp, *nreqp; 819 820 if (model == DATAMODEL_NATIVE) { 821 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 822 (((caddr_t *)iocblist)[iocb_index++]) = 823 reqp->aio_req_iocb.iocb; 824 nreqp = reqp->aio_req_next; 825 aphysio_unlock(reqp); 826 aio_copyout_result(reqp); 827 mutex_enter(&aiop->aio_mutex); 828 aio_req_free(aiop, reqp); 829 mutex_exit(&aiop->aio_mutex); 830 } 831 } 832 #ifdef _SYSCALL32_IMPL 833 else { 834 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 835 ((caddr32_t *)iocblist)[iocb_index++] = 836 reqp->aio_req_iocb.iocb32; 837 nreqp = reqp->aio_req_next; 838 aphysio_unlock(reqp); 839 aio_copyout_result(reqp); 840 mutex_enter(&aiop->aio_mutex); 841 aio_req_free(aiop, reqp); 842 mutex_exit(&aiop->aio_mutex); 843 } 844 } 845 #endif /* _SYSCALL32_IMPL */ 846 return (iocb_index); 847 } 848 849 /* 850 * aio_reqlist_concat 851 * moves "max" elements from the done queue to the reqlist queue and removes 852 * the AIO_DONEQ flag. 853 * - reqlist queue is a simple linked list 854 * - done queue is a double linked list 855 */ 856 857 static int 858 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 859 { 860 aio_req_t *q2, *q2work, *list; 861 int count = 0; 862 863 list = *reqlist; 864 q2 = aiop->aio_doneq; 865 q2work = q2; 866 while (max-- > 0) { 867 q2work->aio_req_flags &= ~AIO_DONEQ; 868 q2work = q2work->aio_req_next; 869 count++; 870 if (q2work == q2) 871 break; 872 } 873 874 if (q2work == q2) { 875 /* all elements revised */ 876 q2->aio_req_prev->aio_req_next = list; 877 list = q2; 878 aiop->aio_doneq = NULL; 879 } else { 880 /* 881 * max < elements in the doneq 882 * detach only the required amount of elements 883 * out of the doneq 884 */ 885 q2work->aio_req_prev->aio_req_next = list; 886 list = q2; 887 888 aiop->aio_doneq = q2work; 889 q2work->aio_req_prev = q2->aio_req_prev; 890 q2->aio_req_prev->aio_req_next = q2work; 891 } 892 *reqlist = list; 893 return (count); 894 } 895 896 /*ARGSUSED*/ 897 static int 898 aiosuspend( 899 void *aiocb, 900 int nent, 901 struct timespec *timout, 902 int flag, 903 long *rval, 904 int run_mode) 905 { 906 int error; 907 aio_t *aiop; 908 aio_req_t *reqp, *found, *next; 909 caddr_t cbplist = NULL; 910 aiocb_t *cbp, **ucbp; 911 #ifdef _SYSCALL32_IMPL 912 aiocb32_t *cbp32; 913 caddr32_t *ucbp32; 914 #endif /* _SYSCALL32_IMPL */ 915 aiocb64_32_t *cbp64; 916 int rv; 917 int i; 918 size_t ssize; 919 model_t model = get_udatamodel(); 920 int blocking; 921 int timecheck; 922 timestruc_t rqtime; 923 timestruc_t *rqtp; 924 925 aiop = curproc->p_aio; 926 if (aiop == NULL || nent <= 0) 927 return (EINVAL); 928 929 /* 930 * Establish the absolute future time for the timeout. 931 */ 932 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 933 if (error) 934 return (error); 935 if (rqtp) { 936 timestruc_t now; 937 timecheck = timechanged; 938 gethrestime(&now); 939 timespecadd(rqtp, &now); 940 } 941 942 /* 943 * If we are not blocking and there's no IO complete 944 * skip aiocb copyin. 945 */ 946 if (!blocking && (aiop->aio_pollq == NULL) && 947 (aiop->aio_doneq == NULL)) { 948 return (EAGAIN); 949 } 950 951 if (model == DATAMODEL_NATIVE) 952 ssize = (sizeof (aiocb_t *) * nent); 953 #ifdef _SYSCALL32_IMPL 954 else 955 ssize = (sizeof (caddr32_t) * nent); 956 #endif /* _SYSCALL32_IMPL */ 957 958 cbplist = kmem_alloc(ssize, KM_NOSLEEP); 959 if (cbplist == NULL) 960 return (ENOMEM); 961 962 if (copyin(aiocb, cbplist, ssize)) { 963 error = EFAULT; 964 goto done; 965 } 966 967 found = NULL; 968 /* 969 * we need to get the aio_cleanupq_mutex since we call 970 * aio_req_done(). 971 */ 972 mutex_enter(&aiop->aio_cleanupq_mutex); 973 mutex_enter(&aiop->aio_mutex); 974 for (;;) { 975 /* push requests on poll queue to done queue */ 976 if (aiop->aio_pollq) { 977 mutex_exit(&aiop->aio_mutex); 978 mutex_exit(&aiop->aio_cleanupq_mutex); 979 aio_cleanup(0); 980 mutex_enter(&aiop->aio_cleanupq_mutex); 981 mutex_enter(&aiop->aio_mutex); 982 } 983 /* check for requests on done queue */ 984 if (aiop->aio_doneq) { 985 if (model == DATAMODEL_NATIVE) 986 ucbp = (aiocb_t **)cbplist; 987 #ifdef _SYSCALL32_IMPL 988 else 989 ucbp32 = (caddr32_t *)cbplist; 990 #endif /* _SYSCALL32_IMPL */ 991 for (i = 0; i < nent; i++) { 992 if (model == DATAMODEL_NATIVE) { 993 if ((cbp = *ucbp++) == NULL) 994 continue; 995 if (run_mode != AIO_LARGEFILE) 996 reqp = aio_req_done( 997 &cbp->aio_resultp); 998 else { 999 cbp64 = (aiocb64_32_t *)cbp; 1000 reqp = aio_req_done( 1001 &cbp64->aio_resultp); 1002 } 1003 } 1004 #ifdef _SYSCALL32_IMPL 1005 else { 1006 if (run_mode == AIO_32) { 1007 if ((cbp32 = 1008 (aiocb32_t *)(uintptr_t) 1009 *ucbp32++) == NULL) 1010 continue; 1011 reqp = aio_req_done( 1012 &cbp32->aio_resultp); 1013 } else if (run_mode == AIO_LARGEFILE) { 1014 if ((cbp64 = 1015 (aiocb64_32_t *)(uintptr_t) 1016 *ucbp32++) == NULL) 1017 continue; 1018 reqp = aio_req_done( 1019 &cbp64->aio_resultp); 1020 } 1021 1022 } 1023 #endif /* _SYSCALL32_IMPL */ 1024 if (reqp) { 1025 reqp->aio_req_next = found; 1026 found = reqp; 1027 } 1028 if (aiop->aio_doneq == NULL) 1029 break; 1030 } 1031 if (found) 1032 break; 1033 } 1034 if (aiop->aio_notifycnt > 0) { 1035 /* 1036 * nothing on the kernel's queue. the user 1037 * has notified the kernel that it has items 1038 * on a user-level queue. 1039 */ 1040 aiop->aio_notifycnt--; 1041 *rval = 1; 1042 error = 0; 1043 break; 1044 } 1045 /* don't block if nothing is outstanding */ 1046 if (aiop->aio_outstanding == 0) { 1047 error = EAGAIN; 1048 break; 1049 } 1050 if (blocking) { 1051 /* 1052 * drop the aio_cleanupq_mutex as we are 1053 * going to block. 1054 */ 1055 mutex_exit(&aiop->aio_cleanupq_mutex); 1056 rv = cv_waituntil_sig(&aiop->aio_waitcv, 1057 &aiop->aio_mutex, rqtp, timecheck); 1058 /* 1059 * we have to drop aio_mutex and 1060 * grab it in the right order. 1061 */ 1062 mutex_exit(&aiop->aio_mutex); 1063 mutex_enter(&aiop->aio_cleanupq_mutex); 1064 mutex_enter(&aiop->aio_mutex); 1065 if (rv > 0) /* check done queue again */ 1066 continue; 1067 if (rv == 0) /* interrupted by a signal */ 1068 error = EINTR; 1069 else /* timer expired */ 1070 error = ETIME; 1071 } else { 1072 error = EAGAIN; 1073 } 1074 break; 1075 } 1076 mutex_exit(&aiop->aio_mutex); 1077 mutex_exit(&aiop->aio_cleanupq_mutex); 1078 for (reqp = found; reqp != NULL; reqp = next) { 1079 next = reqp->aio_req_next; 1080 aphysio_unlock(reqp); 1081 aio_copyout_result(reqp); 1082 mutex_enter(&aiop->aio_mutex); 1083 aio_req_free(aiop, reqp); 1084 mutex_exit(&aiop->aio_mutex); 1085 } 1086 done: 1087 kmem_free(cbplist, ssize); 1088 return (error); 1089 } 1090 1091 /* 1092 * initialize aio by allocating an aio_t struct for this 1093 * process. 1094 */ 1095 static int 1096 aioinit(void) 1097 { 1098 proc_t *p = curproc; 1099 aio_t *aiop; 1100 mutex_enter(&p->p_lock); 1101 if ((aiop = p->p_aio) == NULL) { 1102 aiop = aio_aiop_alloc(); 1103 p->p_aio = aiop; 1104 } 1105 mutex_exit(&p->p_lock); 1106 if (aiop == NULL) 1107 return (ENOMEM); 1108 return (0); 1109 } 1110 1111 /* 1112 * start a special thread that will cleanup after aio requests 1113 * that are preventing a segment from being unmapped. as_unmap() 1114 * blocks until all phsyio to this segment is completed. this 1115 * doesn't happen until all the pages in this segment are not 1116 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 1117 * requests still outstanding. this special thread will make sure 1118 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 1119 * 1120 * this function will return an error if the process has only 1121 * one LWP. the assumption is that the caller is a separate LWP 1122 * that remains blocked in the kernel for the life of this process. 1123 */ 1124 static int 1125 aiostart(void) 1126 { 1127 proc_t *p = curproc; 1128 aio_t *aiop; 1129 int first, error = 0; 1130 1131 if (p->p_lwpcnt == 1) 1132 return (EDEADLK); 1133 mutex_enter(&p->p_lock); 1134 if ((aiop = p->p_aio) == NULL) 1135 error = EINVAL; 1136 else { 1137 first = aiop->aio_ok; 1138 if (aiop->aio_ok == 0) 1139 aiop->aio_ok = 1; 1140 } 1141 mutex_exit(&p->p_lock); 1142 if (error == 0 && first == 0) { 1143 return (aio_cleanup_thread(aiop)); 1144 /* should return only to exit */ 1145 } 1146 return (error); 1147 } 1148 1149 /* 1150 * Associate an aiocb with a port. 1151 * This function is used by aiorw() to associate a transaction with a port. 1152 * Allocate an event port structure (port_alloc_event()) and store the 1153 * delivered user pointer (portnfy_user) in the portkev_user field of the 1154 * port_kevent_t structure.. 1155 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1156 * the port association. 1157 */ 1158 1159 static int 1160 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 1161 aio_req_t *reqp, int event) 1162 { 1163 port_kevent_t *pkevp = NULL; 1164 int error; 1165 1166 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 1167 PORT_SOURCE_AIO, &pkevp); 1168 if (error) { 1169 if ((error == ENOMEM) || (error == EAGAIN)) 1170 error = EAGAIN; 1171 else 1172 error = EINVAL; 1173 } else { 1174 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 1175 aio_port_callback, reqp); 1176 pkevp->portkev_events = event; 1177 reqp->aio_req_portkev = pkevp; 1178 reqp->aio_req_port = pntfy->portnfy_port; 1179 } 1180 return (error); 1181 } 1182 1183 #ifdef _LP64 1184 1185 /* 1186 * Asynchronous list IO. A chain of aiocb's are copied in 1187 * one at a time. If the aiocb is invalid, it is skipped. 1188 * For each aiocb, the appropriate driver entry point is 1189 * called. Optimize for the common case where the list 1190 * of requests is to the same file descriptor. 1191 * 1192 * One possible optimization is to define a new driver entry 1193 * point that supports a list of IO requests. Whether this 1194 * improves performance depends somewhat on the driver's 1195 * locking strategy. Processing a list could adversely impact 1196 * the driver's interrupt latency. 1197 */ 1198 static int 1199 alio( 1200 int mode_arg, 1201 aiocb_t **aiocb_arg, 1202 int nent, 1203 struct sigevent *sigev) 1204 { 1205 file_t *fp; 1206 file_t *prev_fp = NULL; 1207 int prev_mode = -1; 1208 struct vnode *vp; 1209 aio_lio_t *head; 1210 aio_req_t *reqp; 1211 aio_t *aiop; 1212 caddr_t cbplist; 1213 aiocb_t cb; 1214 aiocb_t *aiocb = &cb; 1215 aiocb_t *cbp; 1216 aiocb_t **ucbp; 1217 struct sigevent sigevk; 1218 sigqueue_t *sqp; 1219 int (*aio_func)(); 1220 int mode; 1221 int error = 0; 1222 int aio_errors = 0; 1223 int i; 1224 size_t ssize; 1225 int deadhead = 0; 1226 int aio_notsupported = 0; 1227 int lio_head_port; 1228 int aio_port; 1229 int aio_thread; 1230 port_kevent_t *pkevtp = NULL; 1231 int portused = 0; 1232 port_notify_t pnotify; 1233 int event; 1234 1235 aiop = curproc->p_aio; 1236 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1237 return (EINVAL); 1238 1239 ssize = (sizeof (aiocb_t *) * nent); 1240 cbplist = kmem_alloc(ssize, KM_SLEEP); 1241 ucbp = (aiocb_t **)cbplist; 1242 1243 if (copyin(aiocb_arg, cbplist, ssize) || 1244 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 1245 kmem_free(cbplist, ssize); 1246 return (EFAULT); 1247 } 1248 1249 /* Event Ports */ 1250 if (sigev && 1251 (sigevk.sigev_notify == SIGEV_THREAD || 1252 sigevk.sigev_notify == SIGEV_PORT)) { 1253 if (sigevk.sigev_notify == SIGEV_THREAD) { 1254 pnotify.portnfy_port = sigevk.sigev_signo; 1255 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 1256 } else if (copyin(sigevk.sigev_value.sival_ptr, 1257 &pnotify, sizeof (pnotify))) { 1258 kmem_free(cbplist, ssize); 1259 return (EFAULT); 1260 } 1261 error = port_alloc_event(pnotify.portnfy_port, 1262 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 1263 if (error) { 1264 if (error == ENOMEM || error == EAGAIN) 1265 error = EAGAIN; 1266 else 1267 error = EINVAL; 1268 kmem_free(cbplist, ssize); 1269 return (error); 1270 } 1271 lio_head_port = pnotify.portnfy_port; 1272 portused = 1; 1273 } 1274 1275 /* 1276 * a list head should be allocated if notification is 1277 * enabled for this list. 1278 */ 1279 head = NULL; 1280 1281 if (mode_arg == LIO_WAIT || sigev) { 1282 mutex_enter(&aiop->aio_mutex); 1283 error = aio_lio_alloc(&head); 1284 mutex_exit(&aiop->aio_mutex); 1285 if (error) 1286 goto done; 1287 deadhead = 1; 1288 head->lio_nent = nent; 1289 head->lio_refcnt = nent; 1290 head->lio_port = -1; 1291 head->lio_portkev = NULL; 1292 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 1293 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 1294 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 1295 if (sqp == NULL) { 1296 error = EAGAIN; 1297 goto done; 1298 } 1299 sqp->sq_func = NULL; 1300 sqp->sq_next = NULL; 1301 sqp->sq_info.si_code = SI_ASYNCIO; 1302 sqp->sq_info.si_pid = curproc->p_pid; 1303 sqp->sq_info.si_ctid = PRCTID(curproc); 1304 sqp->sq_info.si_zoneid = getzoneid(); 1305 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 1306 sqp->sq_info.si_signo = sigevk.sigev_signo; 1307 sqp->sq_info.si_value = sigevk.sigev_value; 1308 head->lio_sigqp = sqp; 1309 } else { 1310 head->lio_sigqp = NULL; 1311 } 1312 if (pkevtp) { 1313 /* 1314 * Prepare data to send when list of aiocb's 1315 * has completed. 1316 */ 1317 port_init_event(pkevtp, (uintptr_t)sigev, 1318 (void *)(uintptr_t)pnotify.portnfy_user, 1319 NULL, head); 1320 pkevtp->portkev_events = AIOLIO; 1321 head->lio_portkev = pkevtp; 1322 head->lio_port = pnotify.portnfy_port; 1323 } 1324 } 1325 1326 for (i = 0; i < nent; i++, ucbp++) { 1327 1328 cbp = *ucbp; 1329 /* skip entry if it can't be copied. */ 1330 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 1331 if (head) { 1332 mutex_enter(&aiop->aio_mutex); 1333 head->lio_nent--; 1334 head->lio_refcnt--; 1335 mutex_exit(&aiop->aio_mutex); 1336 } 1337 continue; 1338 } 1339 1340 /* skip if opcode for aiocb is LIO_NOP */ 1341 mode = aiocb->aio_lio_opcode; 1342 if (mode == LIO_NOP) { 1343 cbp = NULL; 1344 if (head) { 1345 mutex_enter(&aiop->aio_mutex); 1346 head->lio_nent--; 1347 head->lio_refcnt--; 1348 mutex_exit(&aiop->aio_mutex); 1349 } 1350 continue; 1351 } 1352 1353 /* increment file descriptor's ref count. */ 1354 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 1355 lio_set_uerror(&cbp->aio_resultp, EBADF); 1356 if (head) { 1357 mutex_enter(&aiop->aio_mutex); 1358 head->lio_nent--; 1359 head->lio_refcnt--; 1360 mutex_exit(&aiop->aio_mutex); 1361 } 1362 aio_errors++; 1363 continue; 1364 } 1365 1366 /* 1367 * check the permission of the partition 1368 */ 1369 if ((fp->f_flag & mode) == 0) { 1370 releasef(aiocb->aio_fildes); 1371 lio_set_uerror(&cbp->aio_resultp, EBADF); 1372 if (head) { 1373 mutex_enter(&aiop->aio_mutex); 1374 head->lio_nent--; 1375 head->lio_refcnt--; 1376 mutex_exit(&aiop->aio_mutex); 1377 } 1378 aio_errors++; 1379 continue; 1380 } 1381 1382 /* 1383 * common case where requests are to the same fd 1384 * for the same r/w operation. 1385 * for UFS, need to set EBADFD 1386 */ 1387 vp = fp->f_vnode; 1388 if (fp != prev_fp || mode != prev_mode) { 1389 aio_func = check_vp(vp, mode); 1390 if (aio_func == NULL) { 1391 prev_fp = NULL; 1392 releasef(aiocb->aio_fildes); 1393 lio_set_uerror(&cbp->aio_resultp, EBADFD); 1394 aio_notsupported++; 1395 if (head) { 1396 mutex_enter(&aiop->aio_mutex); 1397 head->lio_nent--; 1398 head->lio_refcnt--; 1399 mutex_exit(&aiop->aio_mutex); 1400 } 1401 continue; 1402 } else { 1403 prev_fp = fp; 1404 prev_mode = mode; 1405 } 1406 } 1407 1408 error = aio_req_setup(&reqp, aiop, aiocb, 1409 &cbp->aio_resultp, vp); 1410 if (error) { 1411 releasef(aiocb->aio_fildes); 1412 lio_set_uerror(&cbp->aio_resultp, error); 1413 if (head) { 1414 mutex_enter(&aiop->aio_mutex); 1415 head->lio_nent--; 1416 head->lio_refcnt--; 1417 mutex_exit(&aiop->aio_mutex); 1418 } 1419 aio_errors++; 1420 continue; 1421 } 1422 1423 reqp->aio_req_lio = head; 1424 deadhead = 0; 1425 1426 /* 1427 * Set the errno field now before sending the request to 1428 * the driver to avoid a race condition 1429 */ 1430 (void) suword32(&cbp->aio_resultp.aio_errno, 1431 EINPROGRESS); 1432 1433 reqp->aio_req_iocb.iocb = (caddr_t)cbp; 1434 1435 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 1436 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 1437 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 1438 if (aio_port | aio_thread) { 1439 port_kevent_t *lpkevp; 1440 /* 1441 * Prepare data to send with each aiocb completed. 1442 */ 1443 if (aio_port) { 1444 void *paddr = 1445 aiocb->aio_sigevent.sigev_value.sival_ptr; 1446 if (copyin(paddr, &pnotify, sizeof (pnotify))) 1447 error = EFAULT; 1448 } else { /* aio_thread */ 1449 pnotify.portnfy_port = 1450 aiocb->aio_sigevent.sigev_signo; 1451 pnotify.portnfy_user = 1452 aiocb->aio_sigevent.sigev_value.sival_ptr; 1453 } 1454 if (error) 1455 /* EMPTY */; 1456 else if (pkevtp != NULL && 1457 pnotify.portnfy_port == lio_head_port) 1458 error = port_dup_event(pkevtp, &lpkevp, 1459 PORT_ALLOC_DEFAULT); 1460 else 1461 error = port_alloc_event(pnotify.portnfy_port, 1462 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 1463 &lpkevp); 1464 if (error == 0) { 1465 port_init_event(lpkevp, (uintptr_t)cbp, 1466 (void *)(uintptr_t)pnotify.portnfy_user, 1467 aio_port_callback, reqp); 1468 lpkevp->portkev_events = event; 1469 reqp->aio_req_portkev = lpkevp; 1470 reqp->aio_req_port = pnotify.portnfy_port; 1471 } 1472 } 1473 1474 /* 1475 * send the request to driver. 1476 */ 1477 if (error == 0) { 1478 if (aiocb->aio_nbytes == 0) { 1479 clear_active_fd(aiocb->aio_fildes); 1480 aio_zerolen(reqp); 1481 continue; 1482 } 1483 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 1484 CRED()); 1485 } 1486 1487 /* 1488 * the fd's ref count is not decremented until the IO has 1489 * completed unless there was an error. 1490 */ 1491 if (error) { 1492 releasef(aiocb->aio_fildes); 1493 lio_set_uerror(&cbp->aio_resultp, error); 1494 if (head) { 1495 mutex_enter(&aiop->aio_mutex); 1496 head->lio_nent--; 1497 head->lio_refcnt--; 1498 mutex_exit(&aiop->aio_mutex); 1499 } 1500 if (error == ENOTSUP) 1501 aio_notsupported++; 1502 else 1503 aio_errors++; 1504 lio_set_error(reqp, portused); 1505 } else { 1506 clear_active_fd(aiocb->aio_fildes); 1507 } 1508 } 1509 1510 if (aio_notsupported) { 1511 error = ENOTSUP; 1512 } else if (aio_errors) { 1513 /* 1514 * return EIO if any request failed 1515 */ 1516 error = EIO; 1517 } 1518 1519 if (mode_arg == LIO_WAIT) { 1520 mutex_enter(&aiop->aio_mutex); 1521 while (head->lio_refcnt > 0) { 1522 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1523 mutex_exit(&aiop->aio_mutex); 1524 error = EINTR; 1525 goto done; 1526 } 1527 } 1528 mutex_exit(&aiop->aio_mutex); 1529 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 1530 } 1531 1532 done: 1533 kmem_free(cbplist, ssize); 1534 if (deadhead) { 1535 if (head->lio_sigqp) 1536 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 1537 if (head->lio_portkev) 1538 port_free_event(head->lio_portkev); 1539 kmem_free(head, sizeof (aio_lio_t)); 1540 } 1541 return (error); 1542 } 1543 1544 #endif /* _LP64 */ 1545 1546 /* 1547 * Asynchronous list IO. 1548 * If list I/O is called with LIO_WAIT it can still return 1549 * before all the I/O's are completed if a signal is caught 1550 * or if the list include UFS I/O requests. If this happens, 1551 * libaio will call aliowait() to wait for the I/O's to 1552 * complete 1553 */ 1554 /*ARGSUSED*/ 1555 static int 1556 aliowait( 1557 int mode, 1558 void *aiocb, 1559 int nent, 1560 void *sigev, 1561 int run_mode) 1562 { 1563 aio_lio_t *head; 1564 aio_t *aiop; 1565 caddr_t cbplist; 1566 aiocb_t *cbp, **ucbp; 1567 #ifdef _SYSCALL32_IMPL 1568 aiocb32_t *cbp32; 1569 caddr32_t *ucbp32; 1570 aiocb64_32_t *cbp64; 1571 #endif 1572 int error = 0; 1573 int i; 1574 size_t ssize = 0; 1575 model_t model = get_udatamodel(); 1576 1577 aiop = curproc->p_aio; 1578 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1579 return (EINVAL); 1580 1581 if (model == DATAMODEL_NATIVE) 1582 ssize = (sizeof (aiocb_t *) * nent); 1583 #ifdef _SYSCALL32_IMPL 1584 else 1585 ssize = (sizeof (caddr32_t) * nent); 1586 #endif /* _SYSCALL32_IMPL */ 1587 1588 if (ssize == 0) 1589 return (EINVAL); 1590 1591 cbplist = kmem_alloc(ssize, KM_SLEEP); 1592 1593 if (model == DATAMODEL_NATIVE) 1594 ucbp = (aiocb_t **)cbplist; 1595 #ifdef _SYSCALL32_IMPL 1596 else 1597 ucbp32 = (caddr32_t *)cbplist; 1598 #endif /* _SYSCALL32_IMPL */ 1599 1600 if (copyin(aiocb, cbplist, ssize)) { 1601 error = EFAULT; 1602 goto done; 1603 } 1604 1605 /* 1606 * To find the list head, we go through the 1607 * list of aiocb structs, find the request 1608 * its for, then get the list head that reqp 1609 * points to 1610 */ 1611 head = NULL; 1612 1613 for (i = 0; i < nent; i++) { 1614 if (model == DATAMODEL_NATIVE) { 1615 /* 1616 * Since we are only checking for a NULL pointer 1617 * Following should work on both native data sizes 1618 * as well as for largefile aiocb. 1619 */ 1620 if ((cbp = *ucbp++) == NULL) 1621 continue; 1622 if (run_mode != AIO_LARGEFILE) 1623 if (head = aio_list_get(&cbp->aio_resultp)) 1624 break; 1625 else { 1626 /* 1627 * This is a case when largefile call is 1628 * made on 32 bit kernel. 1629 * Treat each pointer as pointer to 1630 * aiocb64_32 1631 */ 1632 if (head = aio_list_get((aio_result_t *) 1633 &(((aiocb64_32_t *)cbp)->aio_resultp))) 1634 break; 1635 } 1636 } 1637 #ifdef _SYSCALL32_IMPL 1638 else { 1639 if (run_mode == AIO_LARGEFILE) { 1640 if ((cbp64 = (aiocb64_32_t *) 1641 (uintptr_t)*ucbp32++) == NULL) 1642 continue; 1643 if (head = aio_list_get((aio_result_t *) 1644 &cbp64->aio_resultp)) 1645 break; 1646 } else if (run_mode == AIO_32) { 1647 if ((cbp32 = (aiocb32_t *) 1648 (uintptr_t)*ucbp32++) == NULL) 1649 continue; 1650 if (head = aio_list_get((aio_result_t *) 1651 &cbp32->aio_resultp)) 1652 break; 1653 } 1654 } 1655 #endif /* _SYSCALL32_IMPL */ 1656 } 1657 1658 if (head == NULL) { 1659 error = EINVAL; 1660 goto done; 1661 } 1662 1663 mutex_enter(&aiop->aio_mutex); 1664 while (head->lio_refcnt > 0) { 1665 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1666 mutex_exit(&aiop->aio_mutex); 1667 error = EINTR; 1668 goto done; 1669 } 1670 } 1671 mutex_exit(&aiop->aio_mutex); 1672 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 1673 done: 1674 kmem_free(cbplist, ssize); 1675 return (error); 1676 } 1677 1678 aio_lio_t * 1679 aio_list_get(aio_result_t *resultp) 1680 { 1681 aio_lio_t *head = NULL; 1682 aio_t *aiop; 1683 aio_req_t **bucket; 1684 aio_req_t *reqp; 1685 long index; 1686 1687 aiop = curproc->p_aio; 1688 if (aiop == NULL) 1689 return (NULL); 1690 1691 if (resultp) { 1692 index = AIO_HASH(resultp); 1693 bucket = &aiop->aio_hash[index]; 1694 for (reqp = *bucket; reqp != NULL; 1695 reqp = reqp->aio_hash_next) { 1696 if (reqp->aio_req_resultp == resultp) { 1697 head = reqp->aio_req_lio; 1698 return (head); 1699 } 1700 } 1701 } 1702 return (NULL); 1703 } 1704 1705 1706 static void 1707 lio_set_uerror(void *resultp, int error) 1708 { 1709 /* 1710 * the resultp field is a pointer to where the 1711 * error should be written out to the user's 1712 * aiocb. 1713 * 1714 */ 1715 if (get_udatamodel() == DATAMODEL_NATIVE) { 1716 (void) sulword(&((aio_result_t *)resultp)->aio_return, 1717 (ssize_t)-1); 1718 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1719 } 1720 #ifdef _SYSCALL32_IMPL 1721 else { 1722 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1723 (uint_t)-1); 1724 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1725 } 1726 #endif /* _SYSCALL32_IMPL */ 1727 } 1728 1729 /* 1730 * do cleanup completion for all requests in list. memory for 1731 * each request is also freed. 1732 */ 1733 static void 1734 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 1735 { 1736 int i; 1737 aio_req_t *reqp; 1738 aio_result_t *resultp; 1739 aiocb64_32_t *aiocb_64; 1740 1741 for (i = 0; i < nent; i++) { 1742 if (get_udatamodel() == DATAMODEL_NATIVE) { 1743 if (cbp[i] == NULL) 1744 continue; 1745 if (run_mode == AIO_LARGEFILE) { 1746 aiocb_64 = (aiocb64_32_t *)cbp[i]; 1747 resultp = (aio_result_t *) 1748 &aiocb_64->aio_resultp; 1749 } else 1750 resultp = &cbp[i]->aio_resultp; 1751 } 1752 #ifdef _SYSCALL32_IMPL 1753 else { 1754 aiocb32_t *aiocb_32; 1755 caddr32_t *cbp32; 1756 1757 cbp32 = (caddr32_t *)cbp; 1758 if (cbp32[i] == NULL) 1759 continue; 1760 if (run_mode == AIO_32) { 1761 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 1762 resultp = (aio_result_t *)&aiocb_32-> 1763 aio_resultp; 1764 } else if (run_mode == AIO_LARGEFILE) { 1765 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 1766 resultp = (aio_result_t *)&aiocb_64-> 1767 aio_resultp; 1768 } 1769 } 1770 #endif /* _SYSCALL32_IMPL */ 1771 /* 1772 * we need to get the aio_cleanupq_mutex since we call 1773 * aio_req_done(). 1774 */ 1775 mutex_enter(&aiop->aio_cleanupq_mutex); 1776 mutex_enter(&aiop->aio_mutex); 1777 reqp = aio_req_done(resultp); 1778 mutex_exit(&aiop->aio_mutex); 1779 mutex_exit(&aiop->aio_cleanupq_mutex); 1780 if (reqp != NULL) { 1781 aphysio_unlock(reqp); 1782 aio_copyout_result(reqp); 1783 mutex_enter(&aiop->aio_mutex); 1784 aio_req_free(aiop, reqp); 1785 mutex_exit(&aiop->aio_mutex); 1786 } 1787 } 1788 } 1789 1790 /* 1791 * Write out the results for an aio request that is done. 1792 */ 1793 static int 1794 aioerror(void *cb, int run_mode) 1795 { 1796 aio_result_t *resultp; 1797 aio_t *aiop; 1798 aio_req_t *reqp; 1799 int retval; 1800 1801 aiop = curproc->p_aio; 1802 if (aiop == NULL || cb == NULL) 1803 return (EINVAL); 1804 1805 if (get_udatamodel() == DATAMODEL_NATIVE) { 1806 if (run_mode == AIO_LARGEFILE) 1807 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1808 aio_resultp; 1809 else 1810 resultp = &((aiocb_t *)cb)->aio_resultp; 1811 } 1812 #ifdef _SYSCALL32_IMPL 1813 else { 1814 if (run_mode == AIO_LARGEFILE) 1815 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1816 aio_resultp; 1817 else if (run_mode == AIO_32) 1818 resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 1819 aio_resultp; 1820 } 1821 #endif /* _SYSCALL32_IMPL */ 1822 /* 1823 * we need to get the aio_cleanupq_mutex since we call 1824 * aio_req_find(). 1825 */ 1826 mutex_enter(&aiop->aio_cleanupq_mutex); 1827 mutex_enter(&aiop->aio_mutex); 1828 retval = aio_req_find(resultp, &reqp); 1829 mutex_exit(&aiop->aio_mutex); 1830 mutex_exit(&aiop->aio_cleanupq_mutex); 1831 if (retval == 0) { 1832 aphysio_unlock(reqp); 1833 aio_copyout_result(reqp); 1834 mutex_enter(&aiop->aio_mutex); 1835 aio_req_free(aiop, reqp); 1836 mutex_exit(&aiop->aio_mutex); 1837 return (0); 1838 } else if (retval == 1) 1839 return (EINPROGRESS); 1840 else if (retval == 2) 1841 return (EINVAL); 1842 return (0); 1843 } 1844 1845 /* 1846 * aio_cancel - if no requests outstanding, 1847 * return AIO_ALLDONE 1848 * else 1849 * return AIO_NOTCANCELED 1850 */ 1851 static int 1852 aio_cancel( 1853 int fildes, 1854 void *cb, 1855 long *rval, 1856 int run_mode) 1857 { 1858 aio_t *aiop; 1859 void *resultp; 1860 int index; 1861 aio_req_t **bucket; 1862 aio_req_t *ent; 1863 1864 1865 /* 1866 * Verify valid file descriptor 1867 */ 1868 if ((getf(fildes)) == NULL) { 1869 return (EBADF); 1870 } 1871 releasef(fildes); 1872 1873 aiop = curproc->p_aio; 1874 if (aiop == NULL) 1875 return (EINVAL); 1876 1877 if (aiop->aio_outstanding == 0) { 1878 *rval = AIO_ALLDONE; 1879 return (0); 1880 } 1881 1882 mutex_enter(&aiop->aio_mutex); 1883 if (cb != NULL) { 1884 if (get_udatamodel() == DATAMODEL_NATIVE) { 1885 if (run_mode == AIO_LARGEFILE) 1886 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1887 ->aio_resultp; 1888 else 1889 resultp = &((aiocb_t *)cb)->aio_resultp; 1890 } 1891 #ifdef _SYSCALL32_IMPL 1892 else { 1893 if (run_mode == AIO_LARGEFILE) 1894 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1895 ->aio_resultp; 1896 else if (run_mode == AIO_32) 1897 resultp = (aio_result_t *)&((aiocb32_t *)cb) 1898 ->aio_resultp; 1899 } 1900 #endif /* _SYSCALL32_IMPL */ 1901 index = AIO_HASH(resultp); 1902 bucket = &aiop->aio_hash[index]; 1903 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1904 if (ent->aio_req_resultp == resultp) { 1905 if ((ent->aio_req_flags & AIO_PENDING) == 0) { 1906 mutex_exit(&aiop->aio_mutex); 1907 *rval = AIO_ALLDONE; 1908 return (0); 1909 } 1910 mutex_exit(&aiop->aio_mutex); 1911 *rval = AIO_NOTCANCELED; 1912 return (0); 1913 } 1914 } 1915 mutex_exit(&aiop->aio_mutex); 1916 *rval = AIO_ALLDONE; 1917 return (0); 1918 } 1919 1920 for (index = 0; index < AIO_HASHSZ; index++) { 1921 bucket = &aiop->aio_hash[index]; 1922 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1923 if (ent->aio_req_fd == fildes) { 1924 if ((ent->aio_req_flags & AIO_PENDING) != 0) { 1925 mutex_exit(&aiop->aio_mutex); 1926 *rval = AIO_NOTCANCELED; 1927 return (0); 1928 } 1929 } 1930 } 1931 } 1932 mutex_exit(&aiop->aio_mutex); 1933 *rval = AIO_ALLDONE; 1934 return (0); 1935 } 1936 1937 /* 1938 * solaris version of asynchronous read and write 1939 */ 1940 static int 1941 arw( 1942 int opcode, 1943 int fdes, 1944 char *bufp, 1945 int bufsize, 1946 offset_t offset, 1947 aio_result_t *resultp, 1948 int mode) 1949 { 1950 file_t *fp; 1951 int error; 1952 struct vnode *vp; 1953 aio_req_t *reqp; 1954 aio_t *aiop; 1955 int (*aio_func)(); 1956 #ifdef _LP64 1957 aiocb_t aiocb; 1958 #else 1959 aiocb64_32_t aiocb64; 1960 #endif 1961 1962 aiop = curproc->p_aio; 1963 if (aiop == NULL) 1964 return (EINVAL); 1965 1966 if ((fp = getf(fdes)) == NULL) { 1967 return (EBADF); 1968 } 1969 1970 /* 1971 * check the permission of the partition 1972 */ 1973 if ((fp->f_flag & mode) == 0) { 1974 releasef(fdes); 1975 return (EBADF); 1976 } 1977 1978 vp = fp->f_vnode; 1979 aio_func = check_vp(vp, mode); 1980 if (aio_func == NULL) { 1981 releasef(fdes); 1982 return (EBADFD); 1983 } 1984 #ifdef _LP64 1985 aiocb.aio_fildes = fdes; 1986 aiocb.aio_buf = bufp; 1987 aiocb.aio_nbytes = bufsize; 1988 aiocb.aio_offset = offset; 1989 aiocb.aio_sigevent.sigev_notify = 0; 1990 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 1991 #else 1992 aiocb64.aio_fildes = fdes; 1993 aiocb64.aio_buf = (caddr32_t)bufp; 1994 aiocb64.aio_nbytes = bufsize; 1995 aiocb64.aio_offset = offset; 1996 aiocb64.aio_sigevent.sigev_notify = 0; 1997 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 1998 #endif 1999 if (error) { 2000 releasef(fdes); 2001 return (error); 2002 } 2003 2004 /* 2005 * enable polling on this request if the opcode has 2006 * the AIO poll bit set 2007 */ 2008 if (opcode & AIO_POLL_BIT) 2009 reqp->aio_req_flags |= AIO_POLL; 2010 2011 if (bufsize == 0) { 2012 clear_active_fd(fdes); 2013 aio_zerolen(reqp); 2014 return (0); 2015 } 2016 /* 2017 * send the request to driver. 2018 */ 2019 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2020 /* 2021 * the fd is stored in the aio_req_t by aio_req_setup(), and 2022 * is released by the aio_cleanup_thread() when the IO has 2023 * completed. 2024 */ 2025 if (error) { 2026 releasef(fdes); 2027 mutex_enter(&aiop->aio_mutex); 2028 aio_req_free(aiop, reqp); 2029 aiop->aio_pending--; 2030 if (aiop->aio_flags & AIO_REQ_BLOCK) 2031 cv_signal(&aiop->aio_cleanupcv); 2032 mutex_exit(&aiop->aio_mutex); 2033 return (error); 2034 } 2035 clear_active_fd(fdes); 2036 return (0); 2037 } 2038 2039 /* 2040 * posix version of asynchronous read and write 2041 */ 2042 static int 2043 aiorw( 2044 int opcode, 2045 void *aiocb_arg, 2046 int mode, 2047 int run_mode) 2048 { 2049 #ifdef _SYSCALL32_IMPL 2050 aiocb32_t aiocb32; 2051 struct sigevent32 *sigev32; 2052 port_notify32_t pntfy32; 2053 #endif 2054 aiocb64_32_t aiocb64; 2055 aiocb_t aiocb; 2056 file_t *fp; 2057 int error, fd; 2058 size_t bufsize; 2059 struct vnode *vp; 2060 aio_req_t *reqp; 2061 aio_t *aiop; 2062 int (*aio_func)(); 2063 aio_result_t *resultp; 2064 struct sigevent *sigev; 2065 model_t model; 2066 int aio_use_port = 0; 2067 port_notify_t pntfy; 2068 2069 model = get_udatamodel(); 2070 aiop = curproc->p_aio; 2071 if (aiop == NULL) 2072 return (EINVAL); 2073 2074 if (model == DATAMODEL_NATIVE) { 2075 if (run_mode != AIO_LARGEFILE) { 2076 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 2077 return (EFAULT); 2078 bufsize = aiocb.aio_nbytes; 2079 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 2080 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 2081 return (EBADF); 2082 } 2083 sigev = &aiocb.aio_sigevent; 2084 } else { 2085 /* 2086 * We come here only when we make largefile 2087 * call on 32 bit kernel using 32 bit library. 2088 */ 2089 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2090 return (EFAULT); 2091 bufsize = aiocb64.aio_nbytes; 2092 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2093 ->aio_resultp); 2094 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2095 return (EBADF); 2096 sigev = (struct sigevent *)&aiocb64.aio_sigevent; 2097 } 2098 2099 if (sigev->sigev_notify == SIGEV_PORT) { 2100 if (copyin((void *)sigev->sigev_value.sival_ptr, 2101 &pntfy, sizeof (port_notify_t))) { 2102 releasef(fd); 2103 return (EFAULT); 2104 } 2105 aio_use_port = 1; 2106 } else if (sigev->sigev_notify == SIGEV_THREAD) { 2107 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 2108 pntfy.portnfy_user = 2109 aiocb.aio_sigevent.sigev_value.sival_ptr; 2110 aio_use_port = 1; 2111 } 2112 } 2113 #ifdef _SYSCALL32_IMPL 2114 else { 2115 if (run_mode == AIO_32) { 2116 /* 32 bit system call is being made on 64 bit kernel */ 2117 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 2118 return (EFAULT); 2119 2120 bufsize = aiocb32.aio_nbytes; 2121 aiocb_32ton(&aiocb32, &aiocb); 2122 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 2123 aio_resultp); 2124 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 2125 return (EBADF); 2126 } 2127 sigev32 = &aiocb32.aio_sigevent; 2128 } else if (run_mode == AIO_LARGEFILE) { 2129 /* 2130 * We come here only when we make largefile 2131 * call on 64 bit kernel using 32 bit library. 2132 */ 2133 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2134 return (EFAULT); 2135 bufsize = aiocb64.aio_nbytes; 2136 aiocb_LFton(&aiocb64, &aiocb); 2137 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2138 ->aio_resultp); 2139 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2140 return (EBADF); 2141 sigev32 = &aiocb64.aio_sigevent; 2142 } 2143 2144 if (sigev32->sigev_notify == SIGEV_PORT) { 2145 if (copyin( 2146 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 2147 &pntfy32, sizeof (port_notify32_t))) { 2148 releasef(fd); 2149 return (EFAULT); 2150 } 2151 pntfy.portnfy_port = pntfy32.portnfy_port; 2152 pntfy.portnfy_user = (void *)(uintptr_t) 2153 pntfy32.portnfy_user; 2154 aio_use_port = 1; 2155 } else if (sigev32->sigev_notify == SIGEV_THREAD) { 2156 pntfy.portnfy_port = sigev32->sigev_signo; 2157 pntfy.portnfy_user = (void *)(uintptr_t) 2158 sigev32->sigev_value.sival_ptr; 2159 aio_use_port = 1; 2160 } 2161 } 2162 #endif /* _SYSCALL32_IMPL */ 2163 2164 /* 2165 * check the permission of the partition 2166 */ 2167 2168 if ((fp->f_flag & mode) == 0) { 2169 releasef(fd); 2170 return (EBADF); 2171 } 2172 2173 vp = fp->f_vnode; 2174 aio_func = check_vp(vp, mode); 2175 if (aio_func == NULL) { 2176 releasef(fd); 2177 return (EBADFD); 2178 } 2179 if (run_mode == AIO_LARGEFILE) 2180 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 2181 else 2182 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 2183 2184 if (error) { 2185 releasef(fd); 2186 return (error); 2187 } 2188 /* 2189 * enable polling on this request if the opcode has 2190 * the AIO poll bit set 2191 */ 2192 if (opcode & AIO_POLL_BIT) 2193 reqp->aio_req_flags |= AIO_POLL; 2194 2195 if (model == DATAMODEL_NATIVE) 2196 reqp->aio_req_iocb.iocb = aiocb_arg; 2197 #ifdef _SYSCALL32_IMPL 2198 else 2199 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 2200 #endif 2201 2202 if (aio_use_port) { 2203 int event = (run_mode == AIO_LARGEFILE)? 2204 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 2205 ((mode == FREAD)? AIOAREAD : AIOAWRITE); 2206 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 2207 } 2208 2209 /* 2210 * send the request to driver. 2211 */ 2212 if (error == 0) { 2213 if (bufsize == 0) { 2214 clear_active_fd(fd); 2215 aio_zerolen(reqp); 2216 return (0); 2217 } 2218 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2219 } 2220 2221 /* 2222 * the fd is stored in the aio_req_t by aio_req_setup(), and 2223 * is released by the aio_cleanup_thread() when the IO has 2224 * completed. 2225 */ 2226 if (error) { 2227 releasef(fd); 2228 mutex_enter(&aiop->aio_mutex); 2229 if (aio_use_port) 2230 aio_deq(&aiop->aio_portpending, reqp); 2231 aio_req_free(aiop, reqp); 2232 aiop->aio_pending--; 2233 if (aiop->aio_flags & AIO_REQ_BLOCK) 2234 cv_signal(&aiop->aio_cleanupcv); 2235 mutex_exit(&aiop->aio_mutex); 2236 return (error); 2237 } 2238 clear_active_fd(fd); 2239 return (0); 2240 } 2241 2242 2243 /* 2244 * set error for a list IO entry that failed. 2245 */ 2246 static void 2247 lio_set_error(aio_req_t *reqp, int portused) 2248 { 2249 aio_t *aiop = curproc->p_aio; 2250 2251 if (aiop == NULL) 2252 return; 2253 2254 mutex_enter(&aiop->aio_mutex); 2255 if (portused) 2256 aio_deq(&aiop->aio_portpending, reqp); 2257 aiop->aio_pending--; 2258 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 2259 reqp->aio_req_flags |= AIO_PHYSIODONE; 2260 /* 2261 * Need to free the request now as its never 2262 * going to get on the done queue 2263 * 2264 * Note: aio_outstanding is decremented in 2265 * aio_req_free() 2266 */ 2267 aio_req_free(aiop, reqp); 2268 if (aiop->aio_flags & AIO_REQ_BLOCK) 2269 cv_signal(&aiop->aio_cleanupcv); 2270 mutex_exit(&aiop->aio_mutex); 2271 } 2272 2273 /* 2274 * check if a specified request is done, and remove it from 2275 * the done queue. otherwise remove anybody from the done queue 2276 * if NULL is specified. 2277 */ 2278 static aio_req_t * 2279 aio_req_done(void *resultp) 2280 { 2281 aio_req_t **bucket; 2282 aio_req_t *ent; 2283 aio_t *aiop = curproc->p_aio; 2284 long index; 2285 2286 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2287 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2288 2289 if (resultp) { 2290 index = AIO_HASH(resultp); 2291 bucket = &aiop->aio_hash[index]; 2292 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2293 if (ent->aio_req_resultp == (aio_result_t *)resultp) { 2294 if (ent->aio_req_flags & AIO_DONEQ) { 2295 return (aio_req_remove(ent)); 2296 } 2297 return (NULL); 2298 } 2299 } 2300 /* no match, resultp is invalid */ 2301 return (NULL); 2302 } 2303 return (aio_req_remove(NULL)); 2304 } 2305 2306 /* 2307 * determine if a user-level resultp pointer is associated with an 2308 * active IO request. Zero is returned when the request is done, 2309 * and the request is removed from the done queue. Only when the 2310 * return value is zero, is the "reqp" pointer valid. One is returned 2311 * when the request is inprogress. Two is returned when the request 2312 * is invalid. 2313 */ 2314 static int 2315 aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 2316 { 2317 aio_req_t **bucket; 2318 aio_req_t *ent; 2319 aio_t *aiop = curproc->p_aio; 2320 long index; 2321 2322 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2323 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2324 2325 index = AIO_HASH(resultp); 2326 bucket = &aiop->aio_hash[index]; 2327 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2328 if (ent->aio_req_resultp == resultp) { 2329 if (ent->aio_req_flags & AIO_DONEQ) { 2330 *reqp = aio_req_remove(ent); 2331 return (0); 2332 } 2333 return (1); 2334 } 2335 } 2336 /* no match, resultp is invalid */ 2337 return (2); 2338 } 2339 2340 /* 2341 * remove a request from the done queue. 2342 */ 2343 static aio_req_t * 2344 aio_req_remove(aio_req_t *reqp) 2345 { 2346 aio_t *aiop = curproc->p_aio; 2347 2348 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2349 2350 if (reqp != NULL) { 2351 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2352 if (reqp->aio_req_next == reqp) { 2353 /* only one request on queue */ 2354 if (reqp == aiop->aio_doneq) { 2355 aiop->aio_doneq = NULL; 2356 } else { 2357 ASSERT(reqp == aiop->aio_cleanupq); 2358 aiop->aio_cleanupq = NULL; 2359 } 2360 } else { 2361 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2362 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2363 /* 2364 * The request can be either on the aio_doneq or the 2365 * aio_cleanupq 2366 */ 2367 if (reqp == aiop->aio_doneq) 2368 aiop->aio_doneq = reqp->aio_req_next; 2369 2370 if (reqp == aiop->aio_cleanupq) 2371 aiop->aio_cleanupq = reqp->aio_req_next; 2372 } 2373 reqp->aio_req_flags &= ~AIO_DONEQ; 2374 reqp->aio_req_next = NULL; 2375 reqp->aio_req_prev = NULL; 2376 } else if ((reqp = aiop->aio_doneq) != NULL) { 2377 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2378 if (reqp == reqp->aio_req_next) { 2379 /* only one request on queue */ 2380 aiop->aio_doneq = NULL; 2381 } else { 2382 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2383 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2384 aiop->aio_doneq = reqp->aio_req_next; 2385 } 2386 reqp->aio_req_flags &= ~AIO_DONEQ; 2387 reqp->aio_req_next = NULL; 2388 reqp->aio_req_prev = NULL; 2389 } 2390 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 2391 cv_broadcast(&aiop->aio_waitcv); 2392 return (reqp); 2393 } 2394 2395 static int 2396 aio_req_setup( 2397 aio_req_t **reqpp, 2398 aio_t *aiop, 2399 aiocb_t *arg, 2400 aio_result_t *resultp, 2401 vnode_t *vp) 2402 { 2403 sigqueue_t *sqp = NULL; 2404 aio_req_t *reqp; 2405 struct uio *uio; 2406 struct sigevent *sigev; 2407 int error; 2408 2409 sigev = &arg->aio_sigevent; 2410 if (sigev->sigev_notify == SIGEV_SIGNAL && 2411 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 2412 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2413 if (sqp == NULL) 2414 return (EAGAIN); 2415 sqp->sq_func = NULL; 2416 sqp->sq_next = NULL; 2417 sqp->sq_info.si_code = SI_ASYNCIO; 2418 sqp->sq_info.si_pid = curproc->p_pid; 2419 sqp->sq_info.si_ctid = PRCTID(curproc); 2420 sqp->sq_info.si_zoneid = getzoneid(); 2421 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2422 sqp->sq_info.si_signo = sigev->sigev_signo; 2423 sqp->sq_info.si_value = sigev->sigev_value; 2424 } 2425 2426 mutex_enter(&aiop->aio_mutex); 2427 2428 if (aiop->aio_flags & AIO_REQ_BLOCK) { 2429 mutex_exit(&aiop->aio_mutex); 2430 if (sqp) 2431 kmem_free(sqp, sizeof (sigqueue_t)); 2432 return (EIO); 2433 } 2434 /* 2435 * get an aio_reqp from the free list or allocate one 2436 * from dynamic memory. 2437 */ 2438 if (error = aio_req_alloc(&reqp, resultp)) { 2439 mutex_exit(&aiop->aio_mutex); 2440 if (sqp) 2441 kmem_free(sqp, sizeof (sigqueue_t)); 2442 return (error); 2443 } 2444 aiop->aio_pending++; 2445 aiop->aio_outstanding++; 2446 reqp->aio_req_flags = AIO_PENDING; 2447 if (sigev->sigev_notify == SIGEV_THREAD || 2448 sigev->sigev_notify == SIGEV_PORT) 2449 aio_enq(&aiop->aio_portpending, reqp, 0); 2450 mutex_exit(&aiop->aio_mutex); 2451 /* 2452 * initialize aio request. 2453 */ 2454 reqp->aio_req_fd = arg->aio_fildes; 2455 reqp->aio_req_sigqp = sqp; 2456 reqp->aio_req_iocb.iocb = NULL; 2457 reqp->aio_req_lio = NULL; 2458 reqp->aio_req_buf.b_file = vp; 2459 uio = reqp->aio_req.aio_uio; 2460 uio->uio_iovcnt = 1; 2461 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 2462 uio->uio_iov->iov_len = arg->aio_nbytes; 2463 uio->uio_loffset = arg->aio_offset; 2464 *reqpp = reqp; 2465 return (0); 2466 } 2467 2468 /* 2469 * Allocate p_aio struct. 2470 */ 2471 static aio_t * 2472 aio_aiop_alloc(void) 2473 { 2474 aio_t *aiop; 2475 2476 ASSERT(MUTEX_HELD(&curproc->p_lock)); 2477 2478 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 2479 if (aiop) { 2480 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 2481 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 2482 NULL); 2483 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 2484 } 2485 return (aiop); 2486 } 2487 2488 /* 2489 * Allocate an aio_req struct. 2490 */ 2491 static int 2492 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 2493 { 2494 aio_req_t *reqp; 2495 aio_t *aiop = curproc->p_aio; 2496 2497 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2498 2499 if ((reqp = aiop->aio_free) != NULL) { 2500 aiop->aio_free = reqp->aio_req_next; 2501 bzero(reqp, sizeof (*reqp)); 2502 } else { 2503 /* 2504 * Check whether memory is getting tight. 2505 * This is a temporary mechanism to avoid memory 2506 * exhaustion by a single process until we come up 2507 * with a per process solution such as setrlimit(). 2508 */ 2509 if (freemem < desfree) 2510 return (EAGAIN); 2511 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 2512 if (reqp == NULL) 2513 return (EAGAIN); 2514 } 2515 reqp->aio_req.aio_uio = &reqp->aio_req_uio; 2516 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 2517 reqp->aio_req.aio_private = reqp; 2518 reqp->aio_req_buf.b_offset = -1; 2519 reqp->aio_req_resultp = resultp; 2520 if (aio_hash_insert(reqp, aiop)) { 2521 reqp->aio_req_next = aiop->aio_free; 2522 aiop->aio_free = reqp; 2523 return (EINVAL); 2524 } 2525 *nreqp = reqp; 2526 return (0); 2527 } 2528 2529 /* 2530 * Allocate an aio_lio_t struct. 2531 */ 2532 static int 2533 aio_lio_alloc(aio_lio_t **head) 2534 { 2535 aio_lio_t *liop; 2536 aio_t *aiop = curproc->p_aio; 2537 2538 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2539 2540 if ((liop = aiop->aio_lio_free) != NULL) { 2541 aiop->aio_lio_free = liop->lio_next; 2542 } else { 2543 /* 2544 * Check whether memory is getting tight. 2545 * This is a temporary mechanism to avoid memory 2546 * exhaustion by a single process until we come up 2547 * with a per process solution such as setrlimit(). 2548 */ 2549 if (freemem < desfree) 2550 return (EAGAIN); 2551 2552 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 2553 if (liop == NULL) 2554 return (EAGAIN); 2555 } 2556 *head = liop; 2557 return (0); 2558 } 2559 2560 /* 2561 * this is a special per-process thread that is only activated if 2562 * the process is unmapping a segment with outstanding aio. normally, 2563 * the process will have completed the aio before unmapping the 2564 * segment. If the process does unmap a segment with outstanding aio, 2565 * this special thread will guarentee that the locked pages due to 2566 * aphysio() are released, thereby permitting the segment to be 2567 * unmapped. In addition to this, the cleanup thread is woken up 2568 * during DR operations to release the locked pages. 2569 */ 2570 2571 static int 2572 aio_cleanup_thread(aio_t *aiop) 2573 { 2574 proc_t *p = curproc; 2575 struct as *as = p->p_as; 2576 int poked = 0; 2577 kcondvar_t *cvp; 2578 int exit_flag = 0; 2579 int rqclnup = 0; 2580 2581 sigfillset(&curthread->t_hold); 2582 sigdiffset(&curthread->t_hold, &cantmask); 2583 for (;;) { 2584 /* 2585 * if a segment is being unmapped, and the current 2586 * process's done queue is not empty, then every request 2587 * on the doneq with locked resources should be forced 2588 * to release their locks. By moving the doneq request 2589 * to the cleanupq, aio_cleanup() will process the cleanupq, 2590 * and place requests back onto the doneq. All requests 2591 * processed by aio_cleanup() will have their physical 2592 * resources unlocked. 2593 */ 2594 mutex_enter(&aiop->aio_mutex); 2595 if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 2596 aiop->aio_flags |= AIO_CLEANUP; 2597 mutex_enter(&as->a_contents); 2598 if (aiop->aio_rqclnup) { 2599 aiop->aio_rqclnup = 0; 2600 rqclnup = 1; 2601 } 2602 2603 if ((rqclnup || AS_ISUNMAPWAIT(as)) && 2604 aiop->aio_doneq) { 2605 aio_req_t *doneqhead = aiop->aio_doneq; 2606 mutex_exit(&as->a_contents); 2607 aiop->aio_doneq = NULL; 2608 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 2609 } else { 2610 mutex_exit(&as->a_contents); 2611 } 2612 } 2613 mutex_exit(&aiop->aio_mutex); 2614 aio_cleanup(AIO_CLEANUP_THREAD); 2615 /* 2616 * thread should block on the cleanupcv while 2617 * AIO_CLEANUP is set. 2618 */ 2619 cvp = &aiop->aio_cleanupcv; 2620 mutex_enter(&aiop->aio_mutex); 2621 2622 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 2623 aiop->aio_notifyq != NULL || 2624 aiop->aio_portcleanupq != NULL) { 2625 mutex_exit(&aiop->aio_mutex); 2626 continue; 2627 } 2628 mutex_enter(&as->a_contents); 2629 2630 /* 2631 * AIO_CLEANUP determines when the cleanup thread 2632 * should be active. This flag is set when 2633 * the cleanup thread is awakened by as_unmap() or 2634 * due to DR operations. 2635 * The flag is cleared when the blocking as_unmap() 2636 * that originally awakened us is allowed to 2637 * complete. as_unmap() blocks when trying to 2638 * unmap a segment that has SOFTLOCKed pages. when 2639 * the segment's pages are all SOFTUNLOCKed, 2640 * as->a_flags & AS_UNMAPWAIT should be zero. 2641 * 2642 * In case of cleanup request by DR, the flag is cleared 2643 * once all the pending aio requests have been processed. 2644 * 2645 * The flag shouldn't be cleared right away if the 2646 * cleanup thread was interrupted because the process 2647 * is doing forkall(). This happens when cv_wait_sig() 2648 * returns zero, because it was awakened by a pokelwps(). 2649 * If the process is not exiting, it must be doing forkall(). 2650 */ 2651 if ((poked == 0) && 2652 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 2653 (aiop->aio_pending == 0))) { 2654 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 2655 cvp = &as->a_cv; 2656 rqclnup = 0; 2657 } 2658 mutex_exit(&aiop->aio_mutex); 2659 if (poked) { 2660 /* 2661 * If the process is exiting/killed, don't return 2662 * immediately without waiting for pending I/O's 2663 * and releasing the page locks. 2664 */ 2665 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2666 /* 2667 * If exit_flag is set, then it is 2668 * safe to exit because we have released 2669 * page locks of completed I/O's. 2670 */ 2671 if (exit_flag) 2672 break; 2673 2674 mutex_exit(&as->a_contents); 2675 2676 /* 2677 * Wait for all the pending aio to complete. 2678 */ 2679 mutex_enter(&aiop->aio_mutex); 2680 aiop->aio_flags |= AIO_REQ_BLOCK; 2681 while (aiop->aio_pending != 0) 2682 cv_wait(&aiop->aio_cleanupcv, 2683 &aiop->aio_mutex); 2684 mutex_exit(&aiop->aio_mutex); 2685 exit_flag = 1; 2686 continue; 2687 } else if (p->p_flag & 2688 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 2689 /* 2690 * hold LWP until it 2691 * is continued. 2692 */ 2693 mutex_exit(&as->a_contents); 2694 mutex_enter(&p->p_lock); 2695 stop(PR_SUSPENDED, SUSPEND_NORMAL); 2696 mutex_exit(&p->p_lock); 2697 poked = 0; 2698 continue; 2699 } 2700 } else { 2701 /* 2702 * When started this thread will sleep on as->a_cv. 2703 * as_unmap will awake this thread if the 2704 * segment has SOFTLOCKed pages (poked = 0). 2705 * 1. pokelwps() awakes this thread => 2706 * break the loop to check SEXITLWPS, SHOLDFORK, etc 2707 * 2. as_unmap awakes this thread => 2708 * to break the loop it is necessary that 2709 * - AS_UNMAPWAIT is set (as_unmap is waiting for 2710 * memory to be unlocked) 2711 * - AIO_CLEANUP is not set 2712 * (if AIO_CLEANUP is set we have to wait for 2713 * pending requests. aio_done will send a signal 2714 * for every request which completes to continue 2715 * unmapping the corresponding address range) 2716 * 3. A cleanup request will wake this thread up, ex. 2717 * by the DR operations. The aio_rqclnup flag will 2718 * be set. 2719 */ 2720 while (poked == 0) { 2721 /* 2722 * The clean up requests that came in 2723 * after we had just cleaned up, couldn't 2724 * be causing the unmap thread to block - as 2725 * unmap event happened first. 2726 * Let aio_done() wake us up if it sees a need. 2727 */ 2728 if (aiop->aio_rqclnup && 2729 (aiop->aio_flags & AIO_CLEANUP) == 0) 2730 break; 2731 poked = !cv_wait_sig(cvp, &as->a_contents); 2732 if (AS_ISUNMAPWAIT(as) == 0) 2733 cv_signal(cvp); 2734 if (aiop->aio_outstanding != 0) 2735 break; 2736 } 2737 } 2738 mutex_exit(&as->a_contents); 2739 } 2740 exit: 2741 mutex_exit(&as->a_contents); 2742 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 2743 aston(curthread); /* make thread do post_syscall */ 2744 return (0); 2745 } 2746 2747 /* 2748 * save a reference to a user's outstanding aio in a hash list. 2749 */ 2750 static int 2751 aio_hash_insert( 2752 aio_req_t *aio_reqp, 2753 aio_t *aiop) 2754 { 2755 long index; 2756 aio_result_t *resultp = aio_reqp->aio_req_resultp; 2757 aio_req_t *current; 2758 aio_req_t **nextp; 2759 2760 index = AIO_HASH(resultp); 2761 nextp = &aiop->aio_hash[index]; 2762 while ((current = *nextp) != NULL) { 2763 if (current->aio_req_resultp == resultp) 2764 return (DUPLICATE); 2765 nextp = ¤t->aio_hash_next; 2766 } 2767 *nextp = aio_reqp; 2768 aio_reqp->aio_hash_next = NULL; 2769 return (0); 2770 } 2771 2772 static int 2773 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 2774 cred_t *) 2775 { 2776 struct snode *sp; 2777 dev_t dev; 2778 struct cb_ops *cb; 2779 major_t major; 2780 int (*aio_func)(); 2781 2782 dev = vp->v_rdev; 2783 major = getmajor(dev); 2784 2785 /* 2786 * return NULL for requests to files and STREAMs so 2787 * that libaio takes care of them. 2788 */ 2789 if (vp->v_type == VCHR) { 2790 /* no stream device for kaio */ 2791 if (STREAMSTAB(major)) { 2792 return (NULL); 2793 } 2794 } else { 2795 return (NULL); 2796 } 2797 2798 /* 2799 * Check old drivers which do not have async I/O entry points. 2800 */ 2801 if (devopsp[major]->devo_rev < 3) 2802 return (NULL); 2803 2804 cb = devopsp[major]->devo_cb_ops; 2805 2806 if (cb->cb_rev < 1) 2807 return (NULL); 2808 2809 /* 2810 * Check whether this device is a block device. 2811 * Kaio is not supported for devices like tty. 2812 */ 2813 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 2814 return (NULL); 2815 2816 /* 2817 * Clustering: If vnode is a PXFS vnode, then the device may be remote. 2818 * We cannot call the driver directly. Instead return the 2819 * PXFS functions. 2820 */ 2821 2822 if (IS_PXFSVP(vp)) { 2823 if (mode & FREAD) 2824 return (clpxfs_aio_read); 2825 else 2826 return (clpxfs_aio_write); 2827 } 2828 if (mode & FREAD) 2829 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 2830 else 2831 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 2832 2833 /* 2834 * Do we need this ? 2835 * nodev returns ENXIO anyway. 2836 */ 2837 if (aio_func == nodev) 2838 return (NULL); 2839 2840 sp = VTOS(vp); 2841 smark(sp, SACC); 2842 return (aio_func); 2843 } 2844 2845 /* 2846 * Clustering: We want check_vp to return a function prototyped 2847 * correctly that will be common to both PXFS and regular case. 2848 * We define this intermediate function that will do the right 2849 * thing for driver cases. 2850 */ 2851 2852 static int 2853 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2854 { 2855 dev_t dev; 2856 struct cb_ops *cb; 2857 2858 ASSERT(vp->v_type == VCHR); 2859 ASSERT(!IS_PXFSVP(vp)); 2860 dev = VTOS(vp)->s_dev; 2861 ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 2862 2863 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2864 2865 ASSERT(cb->cb_awrite != nodev); 2866 return ((*cb->cb_awrite)(dev, aio, cred_p)); 2867 } 2868 2869 /* 2870 * Clustering: We want check_vp to return a function prototyped 2871 * correctly that will be common to both PXFS and regular case. 2872 * We define this intermediate function that will do the right 2873 * thing for driver cases. 2874 */ 2875 2876 static int 2877 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2878 { 2879 dev_t dev; 2880 struct cb_ops *cb; 2881 2882 ASSERT(vp->v_type == VCHR); 2883 ASSERT(!IS_PXFSVP(vp)); 2884 dev = VTOS(vp)->s_dev; 2885 ASSERT(!STREAMSTAB(getmajor(dev))); 2886 2887 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2888 2889 ASSERT(cb->cb_aread != nodev); 2890 return ((*cb->cb_aread)(dev, aio, cred_p)); 2891 } 2892 2893 /* 2894 * This routine is called when a largefile call is made by a 32bit 2895 * process on a ILP32 or LP64 kernel. All 64bit processes are large 2896 * file by definition and will call alio() instead. 2897 */ 2898 static int 2899 alioLF( 2900 int mode_arg, 2901 void *aiocb_arg, 2902 int nent, 2903 void *sigev) 2904 { 2905 file_t *fp; 2906 file_t *prev_fp = NULL; 2907 int prev_mode = -1; 2908 struct vnode *vp; 2909 aio_lio_t *head; 2910 aio_req_t *reqp; 2911 aio_t *aiop; 2912 caddr_t cbplist; 2913 aiocb64_32_t cb64; 2914 aiocb64_32_t *aiocb = &cb64; 2915 aiocb64_32_t *cbp; 2916 caddr32_t *ucbp; 2917 #ifdef _LP64 2918 aiocb_t aiocb_n; 2919 #endif 2920 struct sigevent32 sigevk; 2921 sigqueue_t *sqp; 2922 int (*aio_func)(); 2923 int mode; 2924 int error = 0; 2925 int aio_errors = 0; 2926 int i; 2927 size_t ssize; 2928 int deadhead = 0; 2929 int aio_notsupported = 0; 2930 int lio_head_port; 2931 int aio_port; 2932 int aio_thread; 2933 port_kevent_t *pkevtp = NULL; 2934 int portused = 0; 2935 port_notify32_t pnotify; 2936 int event; 2937 2938 aiop = curproc->p_aio; 2939 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 2940 return (EINVAL); 2941 2942 ASSERT(get_udatamodel() == DATAMODEL_ILP32); 2943 2944 ssize = (sizeof (caddr32_t) * nent); 2945 cbplist = kmem_alloc(ssize, KM_SLEEP); 2946 ucbp = (caddr32_t *)cbplist; 2947 2948 if (copyin(aiocb_arg, cbplist, ssize) || 2949 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 2950 kmem_free(cbplist, ssize); 2951 return (EFAULT); 2952 } 2953 2954 /* Event Ports */ 2955 if (sigev && 2956 (sigevk.sigev_notify == SIGEV_THREAD || 2957 sigevk.sigev_notify == SIGEV_PORT)) { 2958 if (sigevk.sigev_notify == SIGEV_THREAD) { 2959 pnotify.portnfy_port = sigevk.sigev_signo; 2960 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 2961 } else if (copyin( 2962 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 2963 &pnotify, sizeof (pnotify))) { 2964 kmem_free(cbplist, ssize); 2965 return (EFAULT); 2966 } 2967 error = port_alloc_event(pnotify.portnfy_port, 2968 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 2969 if (error) { 2970 if (error == ENOMEM || error == EAGAIN) 2971 error = EAGAIN; 2972 else 2973 error = EINVAL; 2974 kmem_free(cbplist, ssize); 2975 return (error); 2976 } 2977 lio_head_port = pnotify.portnfy_port; 2978 portused = 1; 2979 } 2980 2981 /* 2982 * a list head should be allocated if notification is 2983 * enabled for this list. 2984 */ 2985 head = NULL; 2986 2987 if (mode_arg == LIO_WAIT || sigev) { 2988 mutex_enter(&aiop->aio_mutex); 2989 error = aio_lio_alloc(&head); 2990 mutex_exit(&aiop->aio_mutex); 2991 if (error) 2992 goto done; 2993 deadhead = 1; 2994 head->lio_nent = nent; 2995 head->lio_refcnt = nent; 2996 head->lio_port = -1; 2997 head->lio_portkev = NULL; 2998 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 2999 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3000 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3001 if (sqp == NULL) { 3002 error = EAGAIN; 3003 goto done; 3004 } 3005 sqp->sq_func = NULL; 3006 sqp->sq_next = NULL; 3007 sqp->sq_info.si_code = SI_ASYNCIO; 3008 sqp->sq_info.si_pid = curproc->p_pid; 3009 sqp->sq_info.si_ctid = PRCTID(curproc); 3010 sqp->sq_info.si_zoneid = getzoneid(); 3011 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3012 sqp->sq_info.si_signo = sigevk.sigev_signo; 3013 sqp->sq_info.si_value.sival_int = 3014 sigevk.sigev_value.sival_int; 3015 head->lio_sigqp = sqp; 3016 } else { 3017 head->lio_sigqp = NULL; 3018 } 3019 if (pkevtp) { 3020 /* 3021 * Prepare data to send when list of aiocb's 3022 * has completed. 3023 */ 3024 port_init_event(pkevtp, (uintptr_t)sigev, 3025 (void *)(uintptr_t)pnotify.portnfy_user, 3026 NULL, head); 3027 pkevtp->portkev_events = AIOLIO64; 3028 head->lio_portkev = pkevtp; 3029 head->lio_port = pnotify.portnfy_port; 3030 } 3031 } 3032 3033 for (i = 0; i < nent; i++, ucbp++) { 3034 3035 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 3036 /* skip entry if it can't be copied. */ 3037 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 3038 if (head) { 3039 mutex_enter(&aiop->aio_mutex); 3040 head->lio_nent--; 3041 head->lio_refcnt--; 3042 mutex_exit(&aiop->aio_mutex); 3043 } 3044 continue; 3045 } 3046 3047 /* skip if opcode for aiocb is LIO_NOP */ 3048 mode = aiocb->aio_lio_opcode; 3049 if (mode == LIO_NOP) { 3050 cbp = NULL; 3051 if (head) { 3052 mutex_enter(&aiop->aio_mutex); 3053 head->lio_nent--; 3054 head->lio_refcnt--; 3055 mutex_exit(&aiop->aio_mutex); 3056 } 3057 continue; 3058 } 3059 3060 /* increment file descriptor's ref count. */ 3061 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3062 lio_set_uerror(&cbp->aio_resultp, EBADF); 3063 if (head) { 3064 mutex_enter(&aiop->aio_mutex); 3065 head->lio_nent--; 3066 head->lio_refcnt--; 3067 mutex_exit(&aiop->aio_mutex); 3068 } 3069 aio_errors++; 3070 continue; 3071 } 3072 3073 /* 3074 * check the permission of the partition 3075 */ 3076 if ((fp->f_flag & mode) == 0) { 3077 releasef(aiocb->aio_fildes); 3078 lio_set_uerror(&cbp->aio_resultp, EBADF); 3079 if (head) { 3080 mutex_enter(&aiop->aio_mutex); 3081 head->lio_nent--; 3082 head->lio_refcnt--; 3083 mutex_exit(&aiop->aio_mutex); 3084 } 3085 aio_errors++; 3086 continue; 3087 } 3088 3089 /* 3090 * common case where requests are to the same fd 3091 * for the same r/w operation 3092 * for UFS, need to set EBADFD 3093 */ 3094 vp = fp->f_vnode; 3095 if (fp != prev_fp || mode != prev_mode) { 3096 aio_func = check_vp(vp, mode); 3097 if (aio_func == NULL) { 3098 prev_fp = NULL; 3099 releasef(aiocb->aio_fildes); 3100 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3101 aio_notsupported++; 3102 if (head) { 3103 mutex_enter(&aiop->aio_mutex); 3104 head->lio_nent--; 3105 head->lio_refcnt--; 3106 mutex_exit(&aiop->aio_mutex); 3107 } 3108 continue; 3109 } else { 3110 prev_fp = fp; 3111 prev_mode = mode; 3112 } 3113 } 3114 3115 #ifdef _LP64 3116 aiocb_LFton(aiocb, &aiocb_n); 3117 error = aio_req_setup(&reqp, aiop, &aiocb_n, 3118 (aio_result_t *)&cbp->aio_resultp, vp); 3119 #else 3120 error = aio_req_setupLF(&reqp, aiop, aiocb, 3121 (aio_result_t *)&cbp->aio_resultp, vp); 3122 #endif /* _LP64 */ 3123 if (error) { 3124 releasef(aiocb->aio_fildes); 3125 lio_set_uerror(&cbp->aio_resultp, error); 3126 if (head) { 3127 mutex_enter(&aiop->aio_mutex); 3128 head->lio_nent--; 3129 head->lio_refcnt--; 3130 mutex_exit(&aiop->aio_mutex); 3131 } 3132 aio_errors++; 3133 continue; 3134 } 3135 3136 reqp->aio_req_lio = head; 3137 deadhead = 0; 3138 3139 /* 3140 * Set the errno field now before sending the request to 3141 * the driver to avoid a race condition 3142 */ 3143 (void) suword32(&cbp->aio_resultp.aio_errno, 3144 EINPROGRESS); 3145 3146 reqp->aio_req_iocb.iocb32 = *ucbp; 3147 3148 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 3149 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3150 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3151 if (aio_port | aio_thread) { 3152 port_kevent_t *lpkevp; 3153 /* 3154 * Prepare data to send with each aiocb completed. 3155 */ 3156 if (aio_port) { 3157 void *paddr = (void *)(uintptr_t) 3158 aiocb->aio_sigevent.sigev_value.sival_ptr; 3159 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3160 error = EFAULT; 3161 } else { /* aio_thread */ 3162 pnotify.portnfy_port = 3163 aiocb->aio_sigevent.sigev_signo; 3164 pnotify.portnfy_user = 3165 aiocb->aio_sigevent.sigev_value.sival_ptr; 3166 } 3167 if (error) 3168 /* EMPTY */; 3169 else if (pkevtp != NULL && 3170 pnotify.portnfy_port == lio_head_port) 3171 error = port_dup_event(pkevtp, &lpkevp, 3172 PORT_ALLOC_DEFAULT); 3173 else 3174 error = port_alloc_event(pnotify.portnfy_port, 3175 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3176 &lpkevp); 3177 if (error == 0) { 3178 port_init_event(lpkevp, (uintptr_t)*ucbp, 3179 (void *)(uintptr_t)pnotify.portnfy_user, 3180 aio_port_callback, reqp); 3181 lpkevp->portkev_events = event; 3182 reqp->aio_req_portkev = lpkevp; 3183 reqp->aio_req_port = pnotify.portnfy_port; 3184 } 3185 } 3186 3187 /* 3188 * send the request to driver. 3189 */ 3190 if (error == 0) { 3191 if (aiocb->aio_nbytes == 0) { 3192 clear_active_fd(aiocb->aio_fildes); 3193 aio_zerolen(reqp); 3194 continue; 3195 } 3196 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3197 CRED()); 3198 } 3199 3200 /* 3201 * the fd's ref count is not decremented until the IO has 3202 * completed unless there was an error. 3203 */ 3204 if (error) { 3205 releasef(aiocb->aio_fildes); 3206 lio_set_uerror(&cbp->aio_resultp, error); 3207 if (head) { 3208 mutex_enter(&aiop->aio_mutex); 3209 head->lio_nent--; 3210 head->lio_refcnt--; 3211 mutex_exit(&aiop->aio_mutex); 3212 } 3213 if (error == ENOTSUP) 3214 aio_notsupported++; 3215 else 3216 aio_errors++; 3217 lio_set_error(reqp, portused); 3218 } else { 3219 clear_active_fd(aiocb->aio_fildes); 3220 } 3221 } 3222 3223 if (aio_notsupported) { 3224 error = ENOTSUP; 3225 } else if (aio_errors) { 3226 /* 3227 * return EIO if any request failed 3228 */ 3229 error = EIO; 3230 } 3231 3232 if (mode_arg == LIO_WAIT) { 3233 mutex_enter(&aiop->aio_mutex); 3234 while (head->lio_refcnt > 0) { 3235 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3236 mutex_exit(&aiop->aio_mutex); 3237 error = EINTR; 3238 goto done; 3239 } 3240 } 3241 mutex_exit(&aiop->aio_mutex); 3242 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 3243 } 3244 3245 done: 3246 kmem_free(cbplist, ssize); 3247 if (deadhead) { 3248 if (head->lio_sigqp) 3249 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3250 if (head->lio_portkev) 3251 port_free_event(head->lio_portkev); 3252 kmem_free(head, sizeof (aio_lio_t)); 3253 } 3254 return (error); 3255 } 3256 3257 #ifdef _SYSCALL32_IMPL 3258 static void 3259 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 3260 { 3261 dest->aio_fildes = src->aio_fildes; 3262 dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 3263 dest->aio_nbytes = (size_t)src->aio_nbytes; 3264 dest->aio_offset = (off_t)src->aio_offset; 3265 dest->aio_reqprio = src->aio_reqprio; 3266 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3267 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3268 3269 /* 3270 * See comment in sigqueue32() on handling of 32-bit 3271 * sigvals in a 64-bit kernel. 3272 */ 3273 dest->aio_sigevent.sigev_value.sival_int = 3274 (int)src->aio_sigevent.sigev_value.sival_int; 3275 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3276 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3277 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3278 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3279 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3280 dest->aio_lio_opcode = src->aio_lio_opcode; 3281 dest->aio_state = src->aio_state; 3282 dest->aio__pad[0] = src->aio__pad[0]; 3283 } 3284 #endif 3285 3286 /* 3287 * This function is used only for largefile calls made by 3288 * 32 bit applications. 3289 */ 3290 static int 3291 aio_req_setupLF( 3292 aio_req_t **reqpp, 3293 aio_t *aiop, 3294 aiocb64_32_t *arg, 3295 aio_result_t *resultp, 3296 vnode_t *vp) 3297 { 3298 sigqueue_t *sqp = NULL; 3299 aio_req_t *reqp; 3300 struct uio *uio; 3301 struct sigevent32 *sigev; 3302 int error; 3303 3304 sigev = &arg->aio_sigevent; 3305 if (sigev->sigev_notify == SIGEV_SIGNAL && 3306 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 3307 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3308 if (sqp == NULL) 3309 return (EAGAIN); 3310 sqp->sq_func = NULL; 3311 sqp->sq_next = NULL; 3312 sqp->sq_info.si_code = SI_ASYNCIO; 3313 sqp->sq_info.si_pid = curproc->p_pid; 3314 sqp->sq_info.si_ctid = PRCTID(curproc); 3315 sqp->sq_info.si_zoneid = getzoneid(); 3316 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3317 sqp->sq_info.si_signo = sigev->sigev_signo; 3318 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 3319 } 3320 3321 mutex_enter(&aiop->aio_mutex); 3322 3323 if (aiop->aio_flags & AIO_REQ_BLOCK) { 3324 mutex_exit(&aiop->aio_mutex); 3325 if (sqp) 3326 kmem_free(sqp, sizeof (sigqueue_t)); 3327 return (EIO); 3328 } 3329 /* 3330 * get an aio_reqp from the free list or allocate one 3331 * from dynamic memory. 3332 */ 3333 if (error = aio_req_alloc(&reqp, resultp)) { 3334 mutex_exit(&aiop->aio_mutex); 3335 if (sqp) 3336 kmem_free(sqp, sizeof (sigqueue_t)); 3337 return (error); 3338 } 3339 aiop->aio_pending++; 3340 aiop->aio_outstanding++; 3341 reqp->aio_req_flags = AIO_PENDING; 3342 if (sigev->sigev_notify == SIGEV_THREAD || 3343 sigev->sigev_notify == SIGEV_PORT) 3344 aio_enq(&aiop->aio_portpending, reqp, 0); 3345 mutex_exit(&aiop->aio_mutex); 3346 /* 3347 * initialize aio request. 3348 */ 3349 reqp->aio_req_fd = arg->aio_fildes; 3350 reqp->aio_req_sigqp = sqp; 3351 reqp->aio_req_iocb.iocb = NULL; 3352 reqp->aio_req_lio = NULL; 3353 reqp->aio_req_buf.b_file = vp; 3354 uio = reqp->aio_req.aio_uio; 3355 uio->uio_iovcnt = 1; 3356 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 3357 uio->uio_iov->iov_len = arg->aio_nbytes; 3358 uio->uio_loffset = arg->aio_offset; 3359 *reqpp = reqp; 3360 return (0); 3361 } 3362 3363 /* 3364 * This routine is called when a non largefile call is made by a 32bit 3365 * process on a ILP32 or LP64 kernel. 3366 */ 3367 static int 3368 alio32( 3369 int mode_arg, 3370 void *aiocb_arg, 3371 int nent, 3372 void *sigev) 3373 { 3374 file_t *fp; 3375 file_t *prev_fp = NULL; 3376 int prev_mode = -1; 3377 struct vnode *vp; 3378 aio_lio_t *head; 3379 aio_req_t *reqp; 3380 aio_t *aiop; 3381 caddr_t cbplist; 3382 aiocb_t cb; 3383 aiocb_t *aiocb = &cb; 3384 #ifdef _LP64 3385 aiocb32_t *cbp; 3386 caddr32_t *ucbp; 3387 aiocb32_t cb32; 3388 aiocb32_t *aiocb32 = &cb32; 3389 struct sigevent32 sigevk; 3390 #else 3391 aiocb_t *cbp, **ucbp; 3392 struct sigevent sigevk; 3393 #endif 3394 sigqueue_t *sqp; 3395 int (*aio_func)(); 3396 int mode; 3397 int error = 0; 3398 int aio_errors = 0; 3399 int i; 3400 size_t ssize; 3401 int deadhead = 0; 3402 int aio_notsupported = 0; 3403 int lio_head_port; 3404 int aio_port; 3405 int aio_thread; 3406 port_kevent_t *pkevtp = NULL; 3407 int portused = 0; 3408 #ifdef _LP64 3409 port_notify32_t pnotify; 3410 #else 3411 port_notify_t pnotify; 3412 #endif 3413 int event; 3414 3415 aiop = curproc->p_aio; 3416 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 3417 return (EINVAL); 3418 3419 #ifdef _LP64 3420 ssize = (sizeof (caddr32_t) * nent); 3421 #else 3422 ssize = (sizeof (aiocb_t *) * nent); 3423 #endif 3424 cbplist = kmem_alloc(ssize, KM_SLEEP); 3425 ucbp = (void *)cbplist; 3426 3427 if (copyin(aiocb_arg, cbplist, ssize) || 3428 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 3429 kmem_free(cbplist, ssize); 3430 return (EFAULT); 3431 } 3432 3433 /* Event Ports */ 3434 if (sigev && 3435 (sigevk.sigev_notify == SIGEV_THREAD || 3436 sigevk.sigev_notify == SIGEV_PORT)) { 3437 if (sigevk.sigev_notify == SIGEV_THREAD) { 3438 pnotify.portnfy_port = sigevk.sigev_signo; 3439 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 3440 } else if (copyin( 3441 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 3442 &pnotify, sizeof (pnotify))) { 3443 kmem_free(cbplist, ssize); 3444 return (EFAULT); 3445 } 3446 error = port_alloc_event(pnotify.portnfy_port, 3447 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 3448 if (error) { 3449 if (error == ENOMEM || error == EAGAIN) 3450 error = EAGAIN; 3451 else 3452 error = EINVAL; 3453 kmem_free(cbplist, ssize); 3454 return (error); 3455 } 3456 lio_head_port = pnotify.portnfy_port; 3457 portused = 1; 3458 } 3459 3460 /* 3461 * a list head should be allocated if notification is 3462 * enabled for this list. 3463 */ 3464 head = NULL; 3465 3466 if (mode_arg == LIO_WAIT || sigev) { 3467 mutex_enter(&aiop->aio_mutex); 3468 error = aio_lio_alloc(&head); 3469 mutex_exit(&aiop->aio_mutex); 3470 if (error) 3471 goto done; 3472 deadhead = 1; 3473 head->lio_nent = nent; 3474 head->lio_refcnt = nent; 3475 head->lio_port = -1; 3476 head->lio_portkev = NULL; 3477 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 3478 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3479 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3480 if (sqp == NULL) { 3481 error = EAGAIN; 3482 goto done; 3483 } 3484 sqp->sq_func = NULL; 3485 sqp->sq_next = NULL; 3486 sqp->sq_info.si_code = SI_ASYNCIO; 3487 sqp->sq_info.si_pid = curproc->p_pid; 3488 sqp->sq_info.si_ctid = PRCTID(curproc); 3489 sqp->sq_info.si_zoneid = getzoneid(); 3490 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3491 sqp->sq_info.si_signo = sigevk.sigev_signo; 3492 sqp->sq_info.si_value.sival_int = 3493 sigevk.sigev_value.sival_int; 3494 head->lio_sigqp = sqp; 3495 } else { 3496 head->lio_sigqp = NULL; 3497 } 3498 if (pkevtp) { 3499 /* 3500 * Prepare data to send when list of aiocb's has 3501 * completed. 3502 */ 3503 port_init_event(pkevtp, (uintptr_t)sigev, 3504 (void *)(uintptr_t)pnotify.portnfy_user, 3505 NULL, head); 3506 pkevtp->portkev_events = AIOLIO; 3507 head->lio_portkev = pkevtp; 3508 head->lio_port = pnotify.portnfy_port; 3509 } 3510 } 3511 3512 for (i = 0; i < nent; i++, ucbp++) { 3513 3514 /* skip entry if it can't be copied. */ 3515 #ifdef _LP64 3516 cbp = (aiocb32_t *)(uintptr_t)*ucbp; 3517 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 3518 #else 3519 cbp = (aiocb_t *)*ucbp; 3520 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 3521 #endif 3522 { 3523 if (head) { 3524 mutex_enter(&aiop->aio_mutex); 3525 head->lio_nent--; 3526 head->lio_refcnt--; 3527 mutex_exit(&aiop->aio_mutex); 3528 } 3529 continue; 3530 } 3531 #ifdef _LP64 3532 /* 3533 * copy 32 bit structure into 64 bit structure 3534 */ 3535 aiocb_32ton(aiocb32, aiocb); 3536 #endif /* _LP64 */ 3537 3538 /* skip if opcode for aiocb is LIO_NOP */ 3539 mode = aiocb->aio_lio_opcode; 3540 if (mode == LIO_NOP) { 3541 cbp = NULL; 3542 if (head) { 3543 mutex_enter(&aiop->aio_mutex); 3544 head->lio_nent--; 3545 head->lio_refcnt--; 3546 mutex_exit(&aiop->aio_mutex); 3547 } 3548 continue; 3549 } 3550 3551 /* increment file descriptor's ref count. */ 3552 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3553 lio_set_uerror(&cbp->aio_resultp, EBADF); 3554 if (head) { 3555 mutex_enter(&aiop->aio_mutex); 3556 head->lio_nent--; 3557 head->lio_refcnt--; 3558 mutex_exit(&aiop->aio_mutex); 3559 } 3560 aio_errors++; 3561 continue; 3562 } 3563 3564 /* 3565 * check the permission of the partition 3566 */ 3567 if ((fp->f_flag & mode) == 0) { 3568 releasef(aiocb->aio_fildes); 3569 lio_set_uerror(&cbp->aio_resultp, EBADF); 3570 if (head) { 3571 mutex_enter(&aiop->aio_mutex); 3572 head->lio_nent--; 3573 head->lio_refcnt--; 3574 mutex_exit(&aiop->aio_mutex); 3575 } 3576 aio_errors++; 3577 continue; 3578 } 3579 3580 /* 3581 * common case where requests are to the same fd 3582 * for the same r/w operation 3583 * for UFS, need to set EBADFD 3584 */ 3585 vp = fp->f_vnode; 3586 if (fp != prev_fp || mode != prev_mode) { 3587 aio_func = check_vp(vp, mode); 3588 if (aio_func == NULL) { 3589 prev_fp = NULL; 3590 releasef(aiocb->aio_fildes); 3591 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3592 aio_notsupported++; 3593 if (head) { 3594 mutex_enter(&aiop->aio_mutex); 3595 head->lio_nent--; 3596 head->lio_refcnt--; 3597 mutex_exit(&aiop->aio_mutex); 3598 } 3599 continue; 3600 } else { 3601 prev_fp = fp; 3602 prev_mode = mode; 3603 } 3604 } 3605 3606 error = aio_req_setup(&reqp, aiop, aiocb, 3607 (aio_result_t *)&cbp->aio_resultp, vp); 3608 if (error) { 3609 releasef(aiocb->aio_fildes); 3610 lio_set_uerror(&cbp->aio_resultp, error); 3611 if (head) { 3612 mutex_enter(&aiop->aio_mutex); 3613 head->lio_nent--; 3614 head->lio_refcnt--; 3615 mutex_exit(&aiop->aio_mutex); 3616 } 3617 aio_errors++; 3618 continue; 3619 } 3620 3621 reqp->aio_req_lio = head; 3622 deadhead = 0; 3623 3624 /* 3625 * Set the errno field now before sending the request to 3626 * the driver to avoid a race condition 3627 */ 3628 (void) suword32(&cbp->aio_resultp.aio_errno, 3629 EINPROGRESS); 3630 3631 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 3632 3633 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 3634 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3635 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3636 if (aio_port | aio_thread) { 3637 port_kevent_t *lpkevp; 3638 /* 3639 * Prepare data to send with each aiocb completed. 3640 */ 3641 #ifdef _LP64 3642 if (aio_port) { 3643 void *paddr = (void *)(uintptr_t) 3644 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3645 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3646 error = EFAULT; 3647 } else { /* aio_thread */ 3648 pnotify.portnfy_port = 3649 aiocb32->aio_sigevent.sigev_signo; 3650 pnotify.portnfy_user = 3651 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3652 } 3653 #else 3654 if (aio_port) { 3655 void *paddr = 3656 aiocb->aio_sigevent.sigev_value.sival_ptr; 3657 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3658 error = EFAULT; 3659 } else { /* aio_thread */ 3660 pnotify.portnfy_port = 3661 aiocb->aio_sigevent.sigev_signo; 3662 pnotify.portnfy_user = 3663 aiocb->aio_sigevent.sigev_value.sival_ptr; 3664 } 3665 #endif 3666 if (error) 3667 /* EMPTY */; 3668 else if (pkevtp != NULL && 3669 pnotify.portnfy_port == lio_head_port) 3670 error = port_dup_event(pkevtp, &lpkevp, 3671 PORT_ALLOC_DEFAULT); 3672 else 3673 error = port_alloc_event(pnotify.portnfy_port, 3674 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3675 &lpkevp); 3676 if (error == 0) { 3677 port_init_event(lpkevp, (uintptr_t)cbp, 3678 (void *)(uintptr_t)pnotify.portnfy_user, 3679 aio_port_callback, reqp); 3680 lpkevp->portkev_events = event; 3681 reqp->aio_req_portkev = lpkevp; 3682 reqp->aio_req_port = pnotify.portnfy_port; 3683 } 3684 } 3685 3686 /* 3687 * send the request to driver. 3688 */ 3689 if (error == 0) { 3690 if (aiocb->aio_nbytes == 0) { 3691 clear_active_fd(aiocb->aio_fildes); 3692 aio_zerolen(reqp); 3693 continue; 3694 } 3695 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3696 CRED()); 3697 } 3698 3699 /* 3700 * the fd's ref count is not decremented until the IO has 3701 * completed unless there was an error. 3702 */ 3703 if (error) { 3704 releasef(aiocb->aio_fildes); 3705 lio_set_uerror(&cbp->aio_resultp, error); 3706 if (head) { 3707 mutex_enter(&aiop->aio_mutex); 3708 head->lio_nent--; 3709 head->lio_refcnt--; 3710 mutex_exit(&aiop->aio_mutex); 3711 } 3712 if (error == ENOTSUP) 3713 aio_notsupported++; 3714 else 3715 aio_errors++; 3716 lio_set_error(reqp, portused); 3717 } else { 3718 clear_active_fd(aiocb->aio_fildes); 3719 } 3720 } 3721 3722 if (aio_notsupported) { 3723 error = ENOTSUP; 3724 } else if (aio_errors) { 3725 /* 3726 * return EIO if any request failed 3727 */ 3728 error = EIO; 3729 } 3730 3731 if (mode_arg == LIO_WAIT) { 3732 mutex_enter(&aiop->aio_mutex); 3733 while (head->lio_refcnt > 0) { 3734 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3735 mutex_exit(&aiop->aio_mutex); 3736 error = EINTR; 3737 goto done; 3738 } 3739 } 3740 mutex_exit(&aiop->aio_mutex); 3741 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 3742 } 3743 3744 done: 3745 kmem_free(cbplist, ssize); 3746 if (deadhead) { 3747 if (head->lio_sigqp) 3748 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3749 if (head->lio_portkev) 3750 port_free_event(head->lio_portkev); 3751 kmem_free(head, sizeof (aio_lio_t)); 3752 } 3753 return (error); 3754 } 3755 3756 3757 #ifdef _SYSCALL32_IMPL 3758 void 3759 aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 3760 { 3761 dest->aio_fildes = src->aio_fildes; 3762 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 3763 dest->aio_nbytes = (size_t)src->aio_nbytes; 3764 dest->aio_offset = (off_t)src->aio_offset; 3765 dest->aio_reqprio = src->aio_reqprio; 3766 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3767 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3768 3769 /* 3770 * See comment in sigqueue32() on handling of 32-bit 3771 * sigvals in a 64-bit kernel. 3772 */ 3773 dest->aio_sigevent.sigev_value.sival_int = 3774 (int)src->aio_sigevent.sigev_value.sival_int; 3775 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3776 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3777 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3778 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3779 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3780 dest->aio_lio_opcode = src->aio_lio_opcode; 3781 dest->aio_state = src->aio_state; 3782 dest->aio__pad[0] = src->aio__pad[0]; 3783 } 3784 #endif /* _SYSCALL32_IMPL */ 3785 3786 /* 3787 * aio_port_callback() is called just before the event is retrieved from the 3788 * port. The task of this callback function is to finish the work of the 3789 * transaction for the application, it means : 3790 * - copyout transaction data to the application 3791 * (this thread is running in the right process context) 3792 * - keep trace of the transaction (update of counters). 3793 * - free allocated buffers 3794 * The aiocb pointer is the object element of the port_kevent_t structure. 3795 * 3796 * flag : 3797 * PORT_CALLBACK_DEFAULT : do copyout and free resources 3798 * PORT_CALLBACK_CLOSE : don't do copyout, free resources 3799 */ 3800 3801 /*ARGSUSED*/ 3802 int 3803 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 3804 { 3805 aio_t *aiop = curproc->p_aio; 3806 aio_req_t *reqp = arg; 3807 struct iovec *iov; 3808 struct buf *bp; 3809 void *resultp; 3810 3811 if (pid != curproc->p_pid) { 3812 /* wrong proc !!, can not deliver data here ... */ 3813 return (EACCES); 3814 } 3815 3816 mutex_enter(&aiop->aio_portq_mutex); 3817 reqp->aio_req_portkev = NULL; 3818 aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 3819 mutex_exit(&aiop->aio_portq_mutex); 3820 aphysio_unlock(reqp); /* unlock used pages */ 3821 mutex_enter(&aiop->aio_mutex); 3822 if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 3823 aio_req_free_port(aiop, reqp); /* back to free list */ 3824 mutex_exit(&aiop->aio_mutex); 3825 return (0); 3826 } 3827 3828 iov = reqp->aio_req_uio.uio_iov; 3829 bp = &reqp->aio_req_buf; 3830 resultp = (void *)reqp->aio_req_resultp; 3831 aio_req_free_port(aiop, reqp); /* request struct back to free list */ 3832 mutex_exit(&aiop->aio_mutex); 3833 if (flag == PORT_CALLBACK_DEFAULT) 3834 aio_copyout_result_port(iov, bp, resultp); 3835 return (0); 3836 } 3837