1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Kernel asynchronous I/O. 31 * This is only for raw devices now (as of Nov. 1993). 32 */ 33 34 #include <sys/types.h> 35 #include <sys/errno.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/fs/snode.h> 39 #include <sys/unistd.h> 40 #include <sys/cmn_err.h> 41 #include <vm/as.h> 42 #include <vm/faultcode.h> 43 #include <sys/sysmacros.h> 44 #include <sys/procfs.h> 45 #include <sys/kmem.h> 46 #include <sys/autoconf.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/sunddi.h> 49 #include <sys/aio_impl.h> 50 #include <sys/debug.h> 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/vmsystm.h> 54 #include <sys/fs/pxfs_ki.h> 55 #include <sys/contract/process_impl.h> 56 57 /* 58 * external entry point. 59 */ 60 #ifdef _LP64 61 static int64_t kaioc(long, long, long, long, long, long); 62 #endif 63 static int kaio(ulong_t *, rval_t *); 64 65 66 #define AIO_64 0 67 #define AIO_32 1 68 #define AIO_LARGEFILE 2 69 70 /* 71 * implementation specific functions (private) 72 */ 73 #ifdef _LP64 74 static int alio(int, aiocb_t **, int, struct sigevent *); 75 #endif 76 static int aionotify(void); 77 static int aioinit(void); 78 static int aiostart(void); 79 static void alio_cleanup(aio_t *, aiocb_t **, int, int); 80 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 81 cred_t *); 82 static void lio_set_error(aio_req_t *, int portused); 83 static aio_t *aio_aiop_alloc(); 84 static int aio_req_alloc(aio_req_t **, aio_result_t *); 85 static int aio_lio_alloc(aio_lio_t **); 86 static aio_req_t *aio_req_done(void *); 87 static aio_req_t *aio_req_remove(aio_req_t *); 88 static int aio_req_find(aio_result_t *, aio_req_t **); 89 static int aio_hash_insert(struct aio_req_t *, aio_t *); 90 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 91 aio_result_t *, vnode_t *); 92 static int aio_cleanup_thread(aio_t *); 93 static aio_lio_t *aio_list_get(aio_result_t *); 94 static void lio_set_uerror(void *, int); 95 extern void aio_zerolen(aio_req_t *); 96 static int aiowait(struct timeval *, int, long *); 97 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 98 static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 99 aio_req_t *reqlist, aio_t *aiop, model_t model); 100 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 101 static int aiosuspend(void *, int, struct timespec *, int, 102 long *, int); 103 static int aliowait(int, void *, int, void *, int); 104 static int aioerror(void *, int); 105 static int aio_cancel(int, void *, long *, int); 106 static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 107 static int aiorw(int, void *, int, int); 108 109 static int alioLF(int, void *, int, void *); 110 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 111 aio_result_t *, vnode_t *); 112 static int alio32(int, void *, int, void *); 113 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 114 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 115 116 #ifdef _SYSCALL32_IMPL 117 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 118 void aiocb_32ton(aiocb32_t *, aiocb_t *); 119 #endif /* _SYSCALL32_IMPL */ 120 121 /* 122 * implementation specific functions (external) 123 */ 124 void aio_req_free(aio_t *, aio_req_t *); 125 126 /* 127 * Event Port framework 128 */ 129 130 void aio_req_free_port(aio_t *, aio_req_t *); 131 static int aio_port_callback(void *, int *, pid_t, int, void *); 132 133 /* 134 * This is the loadable module wrapper. 135 */ 136 #include <sys/modctl.h> 137 #include <sys/syscall.h> 138 139 #ifdef _LP64 140 141 static struct sysent kaio_sysent = { 142 6, 143 SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 144 (int (*)())kaioc 145 }; 146 147 #ifdef _SYSCALL32_IMPL 148 static struct sysent kaio_sysent32 = { 149 7, 150 SE_NOUNLOAD | SE_64RVAL, 151 kaio 152 }; 153 #endif /* _SYSCALL32_IMPL */ 154 155 #else /* _LP64 */ 156 157 static struct sysent kaio_sysent = { 158 7, 159 SE_NOUNLOAD | SE_32RVAL1, 160 kaio 161 }; 162 163 #endif /* _LP64 */ 164 165 /* 166 * Module linkage information for the kernel. 167 */ 168 169 static struct modlsys modlsys = { 170 &mod_syscallops, 171 "kernel Async I/O", 172 &kaio_sysent 173 }; 174 175 #ifdef _SYSCALL32_IMPL 176 static struct modlsys modlsys32 = { 177 &mod_syscallops32, 178 "kernel Async I/O for 32 bit compatibility", 179 &kaio_sysent32 180 }; 181 #endif /* _SYSCALL32_IMPL */ 182 183 184 static struct modlinkage modlinkage = { 185 MODREV_1, 186 &modlsys, 187 #ifdef _SYSCALL32_IMPL 188 &modlsys32, 189 #endif 190 NULL 191 }; 192 193 int 194 _init(void) 195 { 196 int retval; 197 198 if ((retval = mod_install(&modlinkage)) != 0) 199 return (retval); 200 201 return (0); 202 } 203 204 int 205 _fini(void) 206 { 207 int retval; 208 209 retval = mod_remove(&modlinkage); 210 211 return (retval); 212 } 213 214 int 215 _info(struct modinfo *modinfop) 216 { 217 return (mod_info(&modlinkage, modinfop)); 218 } 219 220 #ifdef _LP64 221 static int64_t 222 kaioc( 223 long a0, 224 long a1, 225 long a2, 226 long a3, 227 long a4, 228 long a5) 229 { 230 int error; 231 long rval = 0; 232 233 switch ((int)a0 & ~AIO_POLL_BIT) { 234 case AIOREAD: 235 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 236 (offset_t)a4, (aio_result_t *)a5, FREAD); 237 break; 238 case AIOWRITE: 239 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 240 (offset_t)a4, (aio_result_t *)a5, FWRITE); 241 break; 242 case AIOWAIT: 243 error = aiowait((struct timeval *)a1, (int)a2, &rval); 244 break; 245 case AIOWAITN: 246 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 247 (timespec_t *)a4); 248 break; 249 case AIONOTIFY: 250 error = aionotify(); 251 break; 252 case AIOINIT: 253 error = aioinit(); 254 break; 255 case AIOSTART: 256 error = aiostart(); 257 break; 258 case AIOLIO: 259 error = alio((int)a1, (aiocb_t **)a2, (int)a3, 260 (struct sigevent *)a4); 261 break; 262 case AIOLIOWAIT: 263 error = aliowait((int)a1, (void *)a2, (int)a3, 264 (struct sigevent *)a4, AIO_64); 265 break; 266 case AIOSUSPEND: 267 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 268 (int)a4, &rval, AIO_64); 269 break; 270 case AIOERROR: 271 error = aioerror((void *)a1, AIO_64); 272 break; 273 case AIOAREAD: 274 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 275 break; 276 case AIOAWRITE: 277 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 278 break; 279 case AIOCANCEL: 280 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 281 break; 282 283 /* 284 * The large file related stuff is valid only for 285 * 32 bit kernel and not for 64 bit kernel 286 * On 64 bit kernel we convert large file calls 287 * to regular 64bit calls. 288 */ 289 290 default: 291 error = EINVAL; 292 } 293 if (error) 294 return ((int64_t)set_errno(error)); 295 return (rval); 296 } 297 #endif 298 299 static int 300 kaio( 301 ulong_t *uap, 302 rval_t *rvp) 303 { 304 long rval = 0; 305 int error = 0; 306 offset_t off; 307 308 309 rvp->r_vals = 0; 310 #if defined(_LITTLE_ENDIAN) 311 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 312 #else 313 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 314 #endif 315 316 switch (uap[0] & ~AIO_POLL_BIT) { 317 /* 318 * It must be the 32 bit system call on 64 bit kernel 319 */ 320 case AIOREAD: 321 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 322 (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 323 case AIOWRITE: 324 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 325 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 326 case AIOWAIT: 327 error = aiowait((struct timeval *)uap[1], (int)uap[2], 328 &rval); 329 break; 330 case AIOWAITN: 331 error = aiowaitn((void *)uap[1], (uint_t)uap[2], 332 (uint_t *)uap[3], (timespec_t *)uap[4]); 333 break; 334 case AIONOTIFY: 335 return (aionotify()); 336 case AIOINIT: 337 return (aioinit()); 338 case AIOSTART: 339 return (aiostart()); 340 case AIOLIO: 341 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 342 (void *)uap[4])); 343 case AIOLIOWAIT: 344 return (aliowait((int)uap[1], (void *)uap[2], 345 (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 346 case AIOSUSPEND: 347 error = aiosuspend((void *)uap[1], (int)uap[2], 348 (timespec_t *)uap[3], (int)uap[4], 349 &rval, AIO_32); 350 break; 351 case AIOERROR: 352 return (aioerror((void *)uap[1], AIO_32)); 353 case AIOAREAD: 354 return (aiorw((int)uap[0], (void *)uap[1], 355 FREAD, AIO_32)); 356 case AIOAWRITE: 357 return (aiorw((int)uap[0], (void *)uap[1], 358 FWRITE, AIO_32)); 359 case AIOCANCEL: 360 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 361 AIO_32)); 362 break; 363 case AIOLIO64: 364 return (alioLF((int)uap[1], (void *)uap[2], 365 (int)uap[3], (void *)uap[4])); 366 case AIOLIOWAIT64: 367 return (aliowait(uap[1], (void *)uap[2], 368 (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 369 case AIOSUSPEND64: 370 error = aiosuspend((void *)uap[1], (int)uap[2], 371 (timespec_t *)uap[3], (int)uap[4], &rval, 372 AIO_LARGEFILE); 373 break; 374 case AIOERROR64: 375 return (aioerror((void *)uap[1], AIO_LARGEFILE)); 376 case AIOAREAD64: 377 return (aiorw((int)uap[0], (void *)uap[1], FREAD, 378 AIO_LARGEFILE)); 379 case AIOAWRITE64: 380 return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 381 AIO_LARGEFILE)); 382 case AIOCANCEL64: 383 error = (aio_cancel((int)uap[1], (void *)uap[2], 384 &rval, AIO_LARGEFILE)); 385 break; 386 default: 387 return (EINVAL); 388 } 389 390 rvp->r_val1 = rval; 391 return (error); 392 } 393 394 /* 395 * wake up LWPs in this process that are sleeping in 396 * aiowait(). 397 */ 398 static int 399 aionotify(void) 400 { 401 aio_t *aiop; 402 403 aiop = curproc->p_aio; 404 if (aiop == NULL) 405 return (0); 406 407 mutex_enter(&aiop->aio_mutex); 408 aiop->aio_notifycnt++; 409 cv_broadcast(&aiop->aio_waitcv); 410 mutex_exit(&aiop->aio_mutex); 411 412 return (0); 413 } 414 415 static int 416 timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 417 timestruc_t **rqtp, int *blocking) 418 { 419 #ifdef _SYSCALL32_IMPL 420 struct timeval32 wait_time_32; 421 #endif 422 struct timeval wait_time; 423 model_t model = get_udatamodel(); 424 425 *rqtp = NULL; 426 if (timout == NULL) { /* wait indefinitely */ 427 *blocking = 1; 428 return (0); 429 } 430 431 /* 432 * Need to correctly compare with the -1 passed in for a user 433 * address pointer, with both 32 bit and 64 bit apps. 434 */ 435 if (model == DATAMODEL_NATIVE) { 436 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 437 *blocking = 0; 438 return (0); 439 } 440 441 if (copyin(timout, &wait_time, sizeof (wait_time))) 442 return (EFAULT); 443 } 444 #ifdef _SYSCALL32_IMPL 445 else { 446 /* 447 * -1 from a 32bit app. It will not get sign extended. 448 * don't wait if -1. 449 */ 450 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 451 *blocking = 0; 452 return (0); 453 } 454 455 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 456 return (EFAULT); 457 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 458 } 459 #endif /* _SYSCALL32_IMPL */ 460 461 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 462 *blocking = 0; 463 return (0); 464 } 465 466 if (wait_time.tv_sec < 0 || 467 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 468 return (EINVAL); 469 470 rqtime->tv_sec = wait_time.tv_sec; 471 rqtime->tv_nsec = wait_time.tv_usec * 1000; 472 *rqtp = rqtime; 473 *blocking = 1; 474 475 return (0); 476 } 477 478 static int 479 timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 480 timestruc_t **rqtp, int *blocking) 481 { 482 #ifdef _SYSCALL32_IMPL 483 timespec32_t wait_time_32; 484 #endif 485 model_t model = get_udatamodel(); 486 487 *rqtp = NULL; 488 if (timout == NULL) { 489 *blocking = 1; 490 return (0); 491 } 492 493 if (model == DATAMODEL_NATIVE) { 494 if (copyin(timout, rqtime, sizeof (*rqtime))) 495 return (EFAULT); 496 } 497 #ifdef _SYSCALL32_IMPL 498 else { 499 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 500 return (EFAULT); 501 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 502 } 503 #endif /* _SYSCALL32_IMPL */ 504 505 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 506 *blocking = 0; 507 return (0); 508 } 509 510 if (rqtime->tv_sec < 0 || 511 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 512 return (EINVAL); 513 514 *rqtp = rqtime; 515 *blocking = 1; 516 517 return (0); 518 } 519 520 /*ARGSUSED*/ 521 static int 522 aiowait( 523 struct timeval *timout, 524 int dontblockflg, 525 long *rval) 526 { 527 int error; 528 aio_t *aiop; 529 aio_req_t *reqp; 530 clock_t status; 531 int blocking; 532 int timecheck; 533 timestruc_t rqtime; 534 timestruc_t *rqtp; 535 536 aiop = curproc->p_aio; 537 if (aiop == NULL) 538 return (EINVAL); 539 540 /* 541 * Establish the absolute future time for the timeout. 542 */ 543 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 544 if (error) 545 return (error); 546 if (rqtp) { 547 timestruc_t now; 548 timecheck = timechanged; 549 gethrestime(&now); 550 timespecadd(rqtp, &now); 551 } 552 553 mutex_enter(&aiop->aio_mutex); 554 for (;;) { 555 /* process requests on poll queue */ 556 if (aiop->aio_pollq) { 557 mutex_exit(&aiop->aio_mutex); 558 aio_cleanup(0); 559 mutex_enter(&aiop->aio_mutex); 560 } 561 if ((reqp = aio_req_remove(NULL)) != NULL) { 562 *rval = (long)reqp->aio_req_resultp; 563 break; 564 } 565 /* user-level done queue might not be empty */ 566 if (aiop->aio_notifycnt > 0) { 567 aiop->aio_notifycnt--; 568 *rval = 1; 569 break; 570 } 571 /* don't block if no outstanding aio */ 572 if (aiop->aio_outstanding == 0 && dontblockflg) { 573 error = EINVAL; 574 break; 575 } 576 if (blocking) { 577 status = cv_waituntil_sig(&aiop->aio_waitcv, 578 &aiop->aio_mutex, rqtp, timecheck); 579 580 if (status > 0) /* check done queue again */ 581 continue; 582 if (status == 0) { /* interrupted by a signal */ 583 error = EINTR; 584 *rval = -1; 585 } else { /* timer expired */ 586 error = ETIME; 587 } 588 } 589 break; 590 } 591 mutex_exit(&aiop->aio_mutex); 592 if (reqp) { 593 aphysio_unlock(reqp); 594 aio_copyout_result(reqp); 595 mutex_enter(&aiop->aio_mutex); 596 aio_req_free(aiop, reqp); 597 mutex_exit(&aiop->aio_mutex); 598 } 599 return (error); 600 } 601 602 /* 603 * aiowaitn can be used to reap completed asynchronous requests submitted with 604 * lio_listio, aio_read or aio_write. 605 * This function only reaps asynchronous raw I/Os. 606 */ 607 608 /*ARGSUSED*/ 609 static int 610 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 611 { 612 int error = 0; 613 aio_t *aiop; 614 aio_req_t *reqlist = NULL; 615 caddr_t iocblist = NULL; /* array of iocb ptr's */ 616 uint_t waitcnt, cnt = 0; /* iocb cnt */ 617 size_t iocbsz; /* users iocb size */ 618 size_t riocbsz; /* returned iocb size */ 619 int iocb_index = 0; 620 model_t model = get_udatamodel(); 621 int blocking = 1; 622 int timecheck; 623 timestruc_t rqtime; 624 timestruc_t *rqtp; 625 626 aiop = curproc->p_aio; 627 628 if (aiop == NULL || aiop->aio_outstanding == 0) 629 return (EAGAIN); 630 631 if (copyin(nwait, &waitcnt, sizeof (uint_t))) 632 return (EFAULT); 633 634 /* set *nwait to zero, if we must return prematurely */ 635 if (copyout(&cnt, nwait, sizeof (uint_t))) 636 return (EFAULT); 637 638 if (waitcnt == 0) { 639 blocking = 0; 640 rqtp = NULL; 641 waitcnt = nent; 642 } else { 643 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 644 if (error) 645 return (error); 646 } 647 648 if (model == DATAMODEL_NATIVE) 649 iocbsz = (sizeof (aiocb_t *) * nent); 650 #ifdef _SYSCALL32_IMPL 651 else 652 iocbsz = (sizeof (caddr32_t) * nent); 653 #endif /* _SYSCALL32_IMPL */ 654 655 /* 656 * Only one aio_waitn call is allowed at a time. 657 * The active aio_waitn will collect all requests 658 * out of the "done" list and if necessary it will wait 659 * for some/all pending requests to fulfill the nwait 660 * parameter. 661 * A second or further aio_waitn calls will sleep here 662 * until the active aio_waitn finishes and leaves the kernel 663 * If the second call does not block (poll), then return 664 * immediately with the error code : EAGAIN. 665 * If the second call should block, then sleep here, but 666 * do not touch the timeout. The timeout starts when this 667 * aio_waitn-call becomes active. 668 */ 669 670 mutex_enter(&aiop->aio_mutex); 671 672 while (aiop->aio_flags & AIO_WAITN) { 673 if (blocking == 0) { 674 mutex_exit(&aiop->aio_mutex); 675 return (EAGAIN); 676 } 677 678 /* block, no timeout */ 679 aiop->aio_flags |= AIO_WAITN_PENDING; 680 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 681 mutex_exit(&aiop->aio_mutex); 682 return (EINTR); 683 } 684 } 685 686 /* 687 * Establish the absolute future time for the timeout. 688 */ 689 if (rqtp) { 690 timestruc_t now; 691 timecheck = timechanged; 692 gethrestime(&now); 693 timespecadd(rqtp, &now); 694 } 695 696 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 697 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 698 aiop->aio_iocb = NULL; 699 } 700 701 if (aiop->aio_iocb == NULL) { 702 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 703 if (iocblist == NULL) { 704 mutex_exit(&aiop->aio_mutex); 705 return (ENOMEM); 706 } 707 aiop->aio_iocb = (aiocb_t **)iocblist; 708 aiop->aio_iocbsz = iocbsz; 709 } else { 710 iocblist = (char *)aiop->aio_iocb; 711 } 712 713 aiop->aio_waitncnt = waitcnt; 714 aiop->aio_flags |= AIO_WAITN; 715 716 for (;;) { 717 /* push requests on poll queue to done queue */ 718 if (aiop->aio_pollq) { 719 mutex_exit(&aiop->aio_mutex); 720 aio_cleanup(0); 721 mutex_enter(&aiop->aio_mutex); 722 } 723 724 /* check for requests on done queue */ 725 if (aiop->aio_doneq) { 726 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 727 aiop->aio_waitncnt = waitcnt - cnt; 728 } 729 730 /* user-level done queue might not be empty */ 731 if (aiop->aio_notifycnt > 0) { 732 aiop->aio_notifycnt--; 733 error = 0; 734 break; 735 } 736 737 /* 738 * if we are here second time as a result of timer 739 * expiration, we reset error if there are enough 740 * aiocb's to satisfy request. 741 * We return also if all requests are already done 742 * and we picked up the whole done queue. 743 */ 744 745 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 746 aiop->aio_doneq == NULL)) { 747 error = 0; 748 break; 749 } 750 751 if ((cnt < waitcnt) && blocking) { 752 int rval = cv_waituntil_sig(&aiop->aio_waitcv, 753 &aiop->aio_mutex, rqtp, timecheck); 754 if (rval > 0) 755 continue; 756 if (rval < 0) { 757 error = ETIME; 758 blocking = 0; 759 continue; 760 } 761 error = EINTR; 762 } 763 break; 764 } 765 766 mutex_exit(&aiop->aio_mutex); 767 768 if (cnt > 0) { 769 770 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 771 aiop, model); 772 773 if (model == DATAMODEL_NATIVE) 774 riocbsz = (sizeof (aiocb_t *) * cnt); 775 #ifdef _SYSCALL32_IMPL 776 else 777 riocbsz = (sizeof (caddr32_t) * cnt); 778 #endif /* _SYSCALL32_IMPL */ 779 780 if (copyout(iocblist, uiocb, riocbsz) || 781 copyout(&cnt, nwait, sizeof (uint_t))) 782 error = EFAULT; 783 } 784 785 if (aiop->aio_iocbsz > AIO_IOCB_MAX) { 786 kmem_free(iocblist, aiop->aio_iocbsz); 787 aiop->aio_iocb = NULL; 788 } 789 790 /* check if there is another thread waiting for execution */ 791 mutex_enter(&aiop->aio_mutex); 792 aiop->aio_flags &= ~AIO_WAITN; 793 if (aiop->aio_flags & AIO_WAITN_PENDING) { 794 aiop->aio_flags &= ~AIO_WAITN_PENDING; 795 cv_signal(&aiop->aio_waitncv); 796 } 797 mutex_exit(&aiop->aio_mutex); 798 799 return (error); 800 } 801 802 /* 803 * aio_unlock_requests 804 * copyouts the result of the request as well as the return value. 805 * It builds the list of completed asynchronous requests, 806 * unlocks the allocated memory ranges and 807 * put the aio request structure back into the free list. 808 */ 809 810 static int 811 aio_unlock_requests( 812 caddr_t iocblist, 813 int iocb_index, 814 aio_req_t *reqlist, 815 aio_t *aiop, 816 model_t model) 817 { 818 aio_req_t *reqp, *nreqp; 819 820 if (model == DATAMODEL_NATIVE) { 821 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 822 (((caddr_t *)iocblist)[iocb_index++]) = 823 reqp->aio_req_iocb.iocb; 824 nreqp = reqp->aio_req_next; 825 aphysio_unlock(reqp); 826 aio_copyout_result(reqp); 827 mutex_enter(&aiop->aio_mutex); 828 aio_req_free(aiop, reqp); 829 mutex_exit(&aiop->aio_mutex); 830 } 831 } 832 #ifdef _SYSCALL32_IMPL 833 else { 834 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 835 ((caddr32_t *)iocblist)[iocb_index++] = 836 reqp->aio_req_iocb.iocb32; 837 nreqp = reqp->aio_req_next; 838 aphysio_unlock(reqp); 839 aio_copyout_result(reqp); 840 mutex_enter(&aiop->aio_mutex); 841 aio_req_free(aiop, reqp); 842 mutex_exit(&aiop->aio_mutex); 843 } 844 } 845 #endif /* _SYSCALL32_IMPL */ 846 return (iocb_index); 847 } 848 849 /* 850 * aio_reqlist_concat 851 * moves "max" elements from the done queue to the reqlist queue and removes 852 * the AIO_DONEQ flag. 853 * - reqlist queue is a simple linked list 854 * - done queue is a double linked list 855 */ 856 857 static int 858 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 859 { 860 aio_req_t *q2, *q2work, *list; 861 int count = 0; 862 863 list = *reqlist; 864 q2 = aiop->aio_doneq; 865 q2work = q2; 866 while (max-- > 0) { 867 q2work->aio_req_flags &= ~AIO_DONEQ; 868 q2work = q2work->aio_req_next; 869 count++; 870 if (q2work == q2) 871 break; 872 } 873 874 if (q2work == q2) { 875 /* all elements revised */ 876 q2->aio_req_prev->aio_req_next = list; 877 list = q2; 878 aiop->aio_doneq = NULL; 879 } else { 880 /* 881 * max < elements in the doneq 882 * detach only the required amount of elements 883 * out of the doneq 884 */ 885 q2work->aio_req_prev->aio_req_next = list; 886 list = q2; 887 888 aiop->aio_doneq = q2work; 889 q2work->aio_req_prev = q2->aio_req_prev; 890 q2->aio_req_prev->aio_req_next = q2work; 891 } 892 *reqlist = list; 893 return (count); 894 } 895 896 /*ARGSUSED*/ 897 static int 898 aiosuspend( 899 void *aiocb, 900 int nent, 901 struct timespec *timout, 902 int flag, 903 long *rval, 904 int run_mode) 905 { 906 int error; 907 aio_t *aiop; 908 aio_req_t *reqp, *found, *next; 909 caddr_t cbplist = NULL; 910 aiocb_t *cbp, **ucbp; 911 #ifdef _SYSCALL32_IMPL 912 aiocb32_t *cbp32; 913 caddr32_t *ucbp32; 914 #endif /* _SYSCALL32_IMPL */ 915 aiocb64_32_t *cbp64; 916 int rv; 917 int i; 918 size_t ssize; 919 model_t model = get_udatamodel(); 920 int blocking; 921 int timecheck; 922 timestruc_t rqtime; 923 timestruc_t *rqtp; 924 925 aiop = curproc->p_aio; 926 if (aiop == NULL || nent <= 0) 927 return (EINVAL); 928 929 /* 930 * Establish the absolute future time for the timeout. 931 */ 932 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 933 if (error) 934 return (error); 935 if (rqtp) { 936 timestruc_t now; 937 timecheck = timechanged; 938 gethrestime(&now); 939 timespecadd(rqtp, &now); 940 } 941 942 /* 943 * If we are not blocking and there's no IO complete 944 * skip aiocb copyin. 945 */ 946 if (!blocking && (aiop->aio_pollq == NULL) && 947 (aiop->aio_doneq == NULL)) { 948 return (EAGAIN); 949 } 950 951 if (model == DATAMODEL_NATIVE) 952 ssize = (sizeof (aiocb_t *) * nent); 953 #ifdef _SYSCALL32_IMPL 954 else 955 ssize = (sizeof (caddr32_t) * nent); 956 #endif /* _SYSCALL32_IMPL */ 957 958 cbplist = kmem_alloc(ssize, KM_NOSLEEP); 959 if (cbplist == NULL) 960 return (ENOMEM); 961 962 if (copyin(aiocb, cbplist, ssize)) { 963 error = EFAULT; 964 goto done; 965 } 966 967 found = NULL; 968 /* 969 * we need to get the aio_cleanupq_mutex since we call 970 * aio_req_done(). 971 */ 972 mutex_enter(&aiop->aio_cleanupq_mutex); 973 mutex_enter(&aiop->aio_mutex); 974 for (;;) { 975 /* push requests on poll queue to done queue */ 976 if (aiop->aio_pollq) { 977 mutex_exit(&aiop->aio_mutex); 978 mutex_exit(&aiop->aio_cleanupq_mutex); 979 aio_cleanup(0); 980 mutex_enter(&aiop->aio_cleanupq_mutex); 981 mutex_enter(&aiop->aio_mutex); 982 } 983 /* check for requests on done queue */ 984 if (aiop->aio_doneq) { 985 if (model == DATAMODEL_NATIVE) 986 ucbp = (aiocb_t **)cbplist; 987 #ifdef _SYSCALL32_IMPL 988 else 989 ucbp32 = (caddr32_t *)cbplist; 990 #endif /* _SYSCALL32_IMPL */ 991 for (i = 0; i < nent; i++) { 992 if (model == DATAMODEL_NATIVE) { 993 if ((cbp = *ucbp++) == NULL) 994 continue; 995 if (run_mode != AIO_LARGEFILE) 996 reqp = aio_req_done( 997 &cbp->aio_resultp); 998 else { 999 cbp64 = (aiocb64_32_t *)cbp; 1000 reqp = aio_req_done( 1001 &cbp64->aio_resultp); 1002 } 1003 } 1004 #ifdef _SYSCALL32_IMPL 1005 else { 1006 if (run_mode == AIO_32) { 1007 if ((cbp32 = 1008 (aiocb32_t *)(uintptr_t) 1009 *ucbp32++) == NULL) 1010 continue; 1011 reqp = aio_req_done( 1012 &cbp32->aio_resultp); 1013 } else if (run_mode == AIO_LARGEFILE) { 1014 if ((cbp64 = 1015 (aiocb64_32_t *)(uintptr_t) 1016 *ucbp32++) == NULL) 1017 continue; 1018 reqp = aio_req_done( 1019 &cbp64->aio_resultp); 1020 } 1021 1022 } 1023 #endif /* _SYSCALL32_IMPL */ 1024 if (reqp) { 1025 reqp->aio_req_next = found; 1026 found = reqp; 1027 } 1028 if (aiop->aio_doneq == NULL) 1029 break; 1030 } 1031 if (found) 1032 break; 1033 } 1034 if (aiop->aio_notifycnt > 0) { 1035 /* 1036 * nothing on the kernel's queue. the user 1037 * has notified the kernel that it has items 1038 * on a user-level queue. 1039 */ 1040 aiop->aio_notifycnt--; 1041 *rval = 1; 1042 error = 0; 1043 break; 1044 } 1045 /* don't block if nothing is outstanding */ 1046 if (aiop->aio_outstanding == 0) { 1047 error = EAGAIN; 1048 break; 1049 } 1050 if (blocking) { 1051 /* 1052 * drop the aio_cleanupq_mutex as we are 1053 * going to block. 1054 */ 1055 mutex_exit(&aiop->aio_cleanupq_mutex); 1056 rv = cv_waituntil_sig(&aiop->aio_waitcv, 1057 &aiop->aio_mutex, rqtp, timecheck); 1058 /* 1059 * we have to drop aio_mutex and 1060 * grab it in the right order. 1061 */ 1062 mutex_exit(&aiop->aio_mutex); 1063 mutex_enter(&aiop->aio_cleanupq_mutex); 1064 mutex_enter(&aiop->aio_mutex); 1065 if (rv > 0) /* check done queue again */ 1066 continue; 1067 if (rv == 0) /* interrupted by a signal */ 1068 error = EINTR; 1069 else /* timer expired */ 1070 error = ETIME; 1071 } else { 1072 error = EAGAIN; 1073 } 1074 break; 1075 } 1076 mutex_exit(&aiop->aio_mutex); 1077 mutex_exit(&aiop->aio_cleanupq_mutex); 1078 for (reqp = found; reqp != NULL; reqp = next) { 1079 next = reqp->aio_req_next; 1080 aphysio_unlock(reqp); 1081 aio_copyout_result(reqp); 1082 mutex_enter(&aiop->aio_mutex); 1083 aio_req_free(aiop, reqp); 1084 mutex_exit(&aiop->aio_mutex); 1085 } 1086 done: 1087 kmem_free(cbplist, ssize); 1088 return (error); 1089 } 1090 1091 /* 1092 * initialize aio by allocating an aio_t struct for this 1093 * process. 1094 */ 1095 static int 1096 aioinit(void) 1097 { 1098 proc_t *p = curproc; 1099 aio_t *aiop; 1100 mutex_enter(&p->p_lock); 1101 if ((aiop = p->p_aio) == NULL) { 1102 aiop = aio_aiop_alloc(); 1103 p->p_aio = aiop; 1104 } 1105 mutex_exit(&p->p_lock); 1106 if (aiop == NULL) 1107 return (ENOMEM); 1108 return (0); 1109 } 1110 1111 /* 1112 * start a special thread that will cleanup after aio requests 1113 * that are preventing a segment from being unmapped. as_unmap() 1114 * blocks until all phsyio to this segment is completed. this 1115 * doesn't happen until all the pages in this segment are not 1116 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 1117 * requests still outstanding. this special thread will make sure 1118 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 1119 * 1120 * this function will return an error if the process has only 1121 * one LWP. the assumption is that the caller is a separate LWP 1122 * that remains blocked in the kernel for the life of this process. 1123 */ 1124 static int 1125 aiostart(void) 1126 { 1127 proc_t *p = curproc; 1128 aio_t *aiop; 1129 int first, error = 0; 1130 1131 if (p->p_lwpcnt == 1) 1132 return (EDEADLK); 1133 mutex_enter(&p->p_lock); 1134 if ((aiop = p->p_aio) == NULL) 1135 error = EINVAL; 1136 else { 1137 first = aiop->aio_ok; 1138 if (aiop->aio_ok == 0) 1139 aiop->aio_ok = 1; 1140 } 1141 mutex_exit(&p->p_lock); 1142 if (error == 0 && first == 0) { 1143 return (aio_cleanup_thread(aiop)); 1144 /* should return only to exit */ 1145 } 1146 return (error); 1147 } 1148 1149 /* 1150 * Associate an aiocb with a port. 1151 * This function is used by aiorw() to associate a transaction with a port. 1152 * Allocate an event port structure (port_alloc_event()) and store the 1153 * delivered user pointer (portnfy_user) in the portkev_user field of the 1154 * port_kevent_t structure.. 1155 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1156 * the port association. 1157 */ 1158 1159 static int 1160 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 1161 aio_req_t *reqp, int event) 1162 { 1163 port_kevent_t *pkevp = NULL; 1164 int error; 1165 1166 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 1167 PORT_SOURCE_AIO, &pkevp); 1168 if (error) { 1169 if ((error == ENOMEM) || (error == EAGAIN)) 1170 error = EAGAIN; 1171 else 1172 error = EINVAL; 1173 } else { 1174 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 1175 aio_port_callback, reqp); 1176 pkevp->portkev_events = event; 1177 reqp->aio_req_portkev = pkevp; 1178 reqp->aio_req_port = pntfy->portnfy_port; 1179 } 1180 return (error); 1181 } 1182 1183 #ifdef _LP64 1184 1185 /* 1186 * Asynchronous list IO. A chain of aiocb's are copied in 1187 * one at a time. If the aiocb is invalid, it is skipped. 1188 * For each aiocb, the appropriate driver entry point is 1189 * called. Optimize for the common case where the list 1190 * of requests is to the same file descriptor. 1191 * 1192 * One possible optimization is to define a new driver entry 1193 * point that supports a list of IO requests. Whether this 1194 * improves performance depends somewhat on the driver's 1195 * locking strategy. Processing a list could adversely impact 1196 * the driver's interrupt latency. 1197 */ 1198 static int 1199 alio( 1200 int mode_arg, 1201 aiocb_t **aiocb_arg, 1202 int nent, 1203 struct sigevent *sigev) 1204 { 1205 file_t *fp; 1206 file_t *prev_fp = NULL; 1207 int prev_mode = -1; 1208 struct vnode *vp; 1209 aio_lio_t *head; 1210 aio_req_t *reqp; 1211 aio_t *aiop; 1212 caddr_t cbplist; 1213 aiocb_t cb; 1214 aiocb_t *aiocb = &cb; 1215 aiocb_t *cbp; 1216 aiocb_t **ucbp; 1217 struct sigevent sigevk; 1218 sigqueue_t *sqp; 1219 int (*aio_func)(); 1220 int mode; 1221 int error = 0; 1222 int aio_errors = 0; 1223 int i; 1224 size_t ssize; 1225 int deadhead = 0; 1226 int aio_notsupported = 0; 1227 int lio_head_port; 1228 int aio_port; 1229 int aio_thread; 1230 port_kevent_t *pkevtp = NULL; 1231 int portused = 0; 1232 port_notify_t pnotify; 1233 int event; 1234 1235 aiop = curproc->p_aio; 1236 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1237 return (EINVAL); 1238 1239 ssize = (sizeof (aiocb_t *) * nent); 1240 cbplist = kmem_alloc(ssize, KM_SLEEP); 1241 ucbp = (aiocb_t **)cbplist; 1242 1243 if (copyin(aiocb_arg, cbplist, ssize) || 1244 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 1245 kmem_free(cbplist, ssize); 1246 return (EFAULT); 1247 } 1248 1249 /* Event Ports */ 1250 if (sigev && 1251 (sigevk.sigev_notify == SIGEV_THREAD || 1252 sigevk.sigev_notify == SIGEV_PORT)) { 1253 if (sigevk.sigev_notify == SIGEV_THREAD) { 1254 pnotify.portnfy_port = sigevk.sigev_signo; 1255 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 1256 } else if (copyin(sigevk.sigev_value.sival_ptr, 1257 &pnotify, sizeof (pnotify))) { 1258 kmem_free(cbplist, ssize); 1259 return (EFAULT); 1260 } 1261 error = port_alloc_event(pnotify.portnfy_port, 1262 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 1263 if (error) { 1264 if (error == ENOMEM || error == EAGAIN) 1265 error = EAGAIN; 1266 else 1267 error = EINVAL; 1268 kmem_free(cbplist, ssize); 1269 return (error); 1270 } 1271 lio_head_port = pnotify.portnfy_port; 1272 portused = 1; 1273 } 1274 1275 /* 1276 * a list head should be allocated if notification is 1277 * enabled for this list. 1278 */ 1279 head = NULL; 1280 1281 if (mode_arg == LIO_WAIT || sigev) { 1282 mutex_enter(&aiop->aio_mutex); 1283 error = aio_lio_alloc(&head); 1284 mutex_exit(&aiop->aio_mutex); 1285 if (error) 1286 goto done; 1287 deadhead = 1; 1288 head->lio_nent = nent; 1289 head->lio_refcnt = nent; 1290 head->lio_port = -1; 1291 head->lio_portkev = NULL; 1292 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 1293 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 1294 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 1295 if (sqp == NULL) { 1296 error = EAGAIN; 1297 goto done; 1298 } 1299 sqp->sq_func = NULL; 1300 sqp->sq_next = NULL; 1301 sqp->sq_info.si_code = SI_ASYNCIO; 1302 sqp->sq_info.si_pid = curproc->p_pid; 1303 sqp->sq_info.si_ctid = PRCTID(curproc); 1304 sqp->sq_info.si_zoneid = getzoneid(); 1305 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 1306 sqp->sq_info.si_signo = sigevk.sigev_signo; 1307 sqp->sq_info.si_value = sigevk.sigev_value; 1308 head->lio_sigqp = sqp; 1309 } else { 1310 head->lio_sigqp = NULL; 1311 } 1312 if (pkevtp) { 1313 /* 1314 * Prepare data to send when list of aiocb's 1315 * has completed. 1316 */ 1317 port_init_event(pkevtp, (uintptr_t)sigev, 1318 (void *)(uintptr_t)pnotify.portnfy_user, 1319 NULL, head); 1320 pkevtp->portkev_events = AIOLIO; 1321 head->lio_portkev = pkevtp; 1322 head->lio_port = pnotify.portnfy_port; 1323 } 1324 } 1325 1326 for (i = 0; i < nent; i++, ucbp++) { 1327 1328 cbp = *ucbp; 1329 /* skip entry if it can't be copied. */ 1330 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 1331 if (head) { 1332 mutex_enter(&aiop->aio_mutex); 1333 head->lio_nent--; 1334 head->lio_refcnt--; 1335 mutex_exit(&aiop->aio_mutex); 1336 } 1337 continue; 1338 } 1339 1340 /* skip if opcode for aiocb is LIO_NOP */ 1341 mode = aiocb->aio_lio_opcode; 1342 if (mode == LIO_NOP) { 1343 cbp = NULL; 1344 if (head) { 1345 mutex_enter(&aiop->aio_mutex); 1346 head->lio_nent--; 1347 head->lio_refcnt--; 1348 mutex_exit(&aiop->aio_mutex); 1349 } 1350 continue; 1351 } 1352 1353 /* increment file descriptor's ref count. */ 1354 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 1355 lio_set_uerror(&cbp->aio_resultp, EBADF); 1356 if (head) { 1357 mutex_enter(&aiop->aio_mutex); 1358 head->lio_nent--; 1359 head->lio_refcnt--; 1360 mutex_exit(&aiop->aio_mutex); 1361 } 1362 aio_errors++; 1363 continue; 1364 } 1365 1366 /* 1367 * check the permission of the partition 1368 */ 1369 if ((fp->f_flag & mode) == 0) { 1370 releasef(aiocb->aio_fildes); 1371 lio_set_uerror(&cbp->aio_resultp, EBADF); 1372 if (head) { 1373 mutex_enter(&aiop->aio_mutex); 1374 head->lio_nent--; 1375 head->lio_refcnt--; 1376 mutex_exit(&aiop->aio_mutex); 1377 } 1378 aio_errors++; 1379 continue; 1380 } 1381 1382 /* 1383 * common case where requests are to the same fd 1384 * for the same r/w operation. 1385 * for UFS, need to set EBADFD 1386 */ 1387 vp = fp->f_vnode; 1388 if (fp != prev_fp || mode != prev_mode) { 1389 aio_func = check_vp(vp, mode); 1390 if (aio_func == NULL) { 1391 prev_fp = NULL; 1392 releasef(aiocb->aio_fildes); 1393 lio_set_uerror(&cbp->aio_resultp, EBADFD); 1394 aio_notsupported++; 1395 if (head) { 1396 mutex_enter(&aiop->aio_mutex); 1397 head->lio_nent--; 1398 head->lio_refcnt--; 1399 mutex_exit(&aiop->aio_mutex); 1400 } 1401 continue; 1402 } else { 1403 prev_fp = fp; 1404 prev_mode = mode; 1405 } 1406 } 1407 1408 error = aio_req_setup(&reqp, aiop, aiocb, 1409 &cbp->aio_resultp, vp); 1410 if (error) { 1411 releasef(aiocb->aio_fildes); 1412 lio_set_uerror(&cbp->aio_resultp, error); 1413 if (head) { 1414 mutex_enter(&aiop->aio_mutex); 1415 head->lio_nent--; 1416 head->lio_refcnt--; 1417 mutex_exit(&aiop->aio_mutex); 1418 } 1419 aio_errors++; 1420 continue; 1421 } 1422 1423 reqp->aio_req_lio = head; 1424 deadhead = 0; 1425 1426 /* 1427 * Set the errno field now before sending the request to 1428 * the driver to avoid a race condition 1429 */ 1430 (void) suword32(&cbp->aio_resultp.aio_errno, 1431 EINPROGRESS); 1432 1433 reqp->aio_req_iocb.iocb = (caddr_t)cbp; 1434 1435 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 1436 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 1437 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 1438 if (aio_port | aio_thread) { 1439 port_kevent_t *lpkevp; 1440 /* 1441 * Prepare data to send with each aiocb completed. 1442 */ 1443 if (aio_port) { 1444 void *paddr = 1445 aiocb->aio_sigevent.sigev_value.sival_ptr; 1446 if (copyin(paddr, &pnotify, sizeof (pnotify))) 1447 error = EFAULT; 1448 } else { /* aio_thread */ 1449 pnotify.portnfy_port = 1450 aiocb->aio_sigevent.sigev_signo; 1451 pnotify.portnfy_user = 1452 aiocb->aio_sigevent.sigev_value.sival_ptr; 1453 } 1454 if (error) 1455 /* EMPTY */; 1456 else if (pkevtp != NULL && 1457 pnotify.portnfy_port == lio_head_port) 1458 error = port_dup_event(pkevtp, &lpkevp, 1459 PORT_ALLOC_DEFAULT); 1460 else 1461 error = port_alloc_event(pnotify.portnfy_port, 1462 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 1463 &lpkevp); 1464 if (error == 0) { 1465 port_init_event(lpkevp, (uintptr_t)cbp, 1466 (void *)(uintptr_t)pnotify.portnfy_user, 1467 aio_port_callback, reqp); 1468 lpkevp->portkev_events = event; 1469 reqp->aio_req_portkev = lpkevp; 1470 reqp->aio_req_port = pnotify.portnfy_port; 1471 } 1472 } 1473 1474 /* 1475 * send the request to driver. 1476 */ 1477 if (error == 0) { 1478 if (aiocb->aio_nbytes == 0) { 1479 clear_active_fd(aiocb->aio_fildes); 1480 aio_zerolen(reqp); 1481 continue; 1482 } 1483 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 1484 CRED()); 1485 } 1486 1487 /* 1488 * the fd's ref count is not decremented until the IO has 1489 * completed unless there was an error. 1490 */ 1491 if (error) { 1492 releasef(aiocb->aio_fildes); 1493 lio_set_uerror(&cbp->aio_resultp, error); 1494 if (head) { 1495 mutex_enter(&aiop->aio_mutex); 1496 head->lio_nent--; 1497 head->lio_refcnt--; 1498 mutex_exit(&aiop->aio_mutex); 1499 } 1500 if (error == ENOTSUP) 1501 aio_notsupported++; 1502 else 1503 aio_errors++; 1504 lio_set_error(reqp, portused); 1505 } else { 1506 clear_active_fd(aiocb->aio_fildes); 1507 } 1508 } 1509 1510 if (aio_notsupported) { 1511 error = ENOTSUP; 1512 } else if (aio_errors) { 1513 /* 1514 * return EIO if any request failed 1515 */ 1516 error = EIO; 1517 } 1518 1519 if (mode_arg == LIO_WAIT) { 1520 mutex_enter(&aiop->aio_mutex); 1521 while (head->lio_refcnt > 0) { 1522 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1523 mutex_exit(&aiop->aio_mutex); 1524 error = EINTR; 1525 goto done; 1526 } 1527 } 1528 mutex_exit(&aiop->aio_mutex); 1529 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 1530 } 1531 1532 done: 1533 kmem_free(cbplist, ssize); 1534 if (deadhead) { 1535 if (head->lio_sigqp) 1536 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 1537 if (head->lio_portkev) 1538 port_free_event(head->lio_portkev); 1539 kmem_free(head, sizeof (aio_lio_t)); 1540 } 1541 return (error); 1542 } 1543 1544 #endif /* _LP64 */ 1545 1546 /* 1547 * Asynchronous list IO. 1548 * If list I/O is called with LIO_WAIT it can still return 1549 * before all the I/O's are completed if a signal is caught 1550 * or if the list include UFS I/O requests. If this happens, 1551 * libaio will call aliowait() to wait for the I/O's to 1552 * complete 1553 */ 1554 /*ARGSUSED*/ 1555 static int 1556 aliowait( 1557 int mode, 1558 void *aiocb, 1559 int nent, 1560 void *sigev, 1561 int run_mode) 1562 { 1563 aio_lio_t *head; 1564 aio_t *aiop; 1565 caddr_t cbplist; 1566 aiocb_t *cbp, **ucbp; 1567 #ifdef _SYSCALL32_IMPL 1568 aiocb32_t *cbp32; 1569 caddr32_t *ucbp32; 1570 aiocb64_32_t *cbp64; 1571 #endif 1572 int error = 0; 1573 int i; 1574 size_t ssize = 0; 1575 model_t model = get_udatamodel(); 1576 1577 aiop = curproc->p_aio; 1578 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1579 return (EINVAL); 1580 1581 if (model == DATAMODEL_NATIVE) 1582 ssize = (sizeof (aiocb_t *) * nent); 1583 #ifdef _SYSCALL32_IMPL 1584 else 1585 ssize = (sizeof (caddr32_t) * nent); 1586 #endif /* _SYSCALL32_IMPL */ 1587 1588 if (ssize == 0) 1589 return (EINVAL); 1590 1591 cbplist = kmem_alloc(ssize, KM_SLEEP); 1592 1593 if (model == DATAMODEL_NATIVE) 1594 ucbp = (aiocb_t **)cbplist; 1595 #ifdef _SYSCALL32_IMPL 1596 else 1597 ucbp32 = (caddr32_t *)cbplist; 1598 #endif /* _SYSCALL32_IMPL */ 1599 1600 if (copyin(aiocb, cbplist, ssize)) { 1601 error = EFAULT; 1602 goto done; 1603 } 1604 1605 /* 1606 * To find the list head, we go through the 1607 * list of aiocb structs, find the request 1608 * its for, then get the list head that reqp 1609 * points to 1610 */ 1611 head = NULL; 1612 1613 for (i = 0; i < nent; i++) { 1614 if (model == DATAMODEL_NATIVE) { 1615 /* 1616 * Since we are only checking for a NULL pointer 1617 * Following should work on both native data sizes 1618 * as well as for largefile aiocb. 1619 */ 1620 if ((cbp = *ucbp++) == NULL) 1621 continue; 1622 if (run_mode != AIO_LARGEFILE) 1623 if (head = aio_list_get(&cbp->aio_resultp)) 1624 break; 1625 else { 1626 /* 1627 * This is a case when largefile call is 1628 * made on 32 bit kernel. 1629 * Treat each pointer as pointer to 1630 * aiocb64_32 1631 */ 1632 if (head = aio_list_get((aio_result_t *) 1633 &(((aiocb64_32_t *)cbp)->aio_resultp))) 1634 break; 1635 } 1636 } 1637 #ifdef _SYSCALL32_IMPL 1638 else { 1639 if (run_mode == AIO_LARGEFILE) { 1640 if ((cbp64 = (aiocb64_32_t *) 1641 (uintptr_t)*ucbp32++) == NULL) 1642 continue; 1643 if (head = aio_list_get((aio_result_t *) 1644 &cbp64->aio_resultp)) 1645 break; 1646 } else if (run_mode == AIO_32) { 1647 if ((cbp32 = (aiocb32_t *) 1648 (uintptr_t)*ucbp32++) == NULL) 1649 continue; 1650 if (head = aio_list_get((aio_result_t *) 1651 &cbp32->aio_resultp)) 1652 break; 1653 } 1654 } 1655 #endif /* _SYSCALL32_IMPL */ 1656 } 1657 1658 if (head == NULL) { 1659 error = EINVAL; 1660 goto done; 1661 } 1662 1663 mutex_enter(&aiop->aio_mutex); 1664 while (head->lio_refcnt > 0) { 1665 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1666 mutex_exit(&aiop->aio_mutex); 1667 error = EINTR; 1668 goto done; 1669 } 1670 } 1671 mutex_exit(&aiop->aio_mutex); 1672 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 1673 done: 1674 kmem_free(cbplist, ssize); 1675 return (error); 1676 } 1677 1678 aio_lio_t * 1679 aio_list_get(aio_result_t *resultp) 1680 { 1681 aio_lio_t *head = NULL; 1682 aio_t *aiop; 1683 aio_req_t **bucket; 1684 aio_req_t *reqp; 1685 long index; 1686 1687 aiop = curproc->p_aio; 1688 if (aiop == NULL) 1689 return (NULL); 1690 1691 if (resultp) { 1692 index = AIO_HASH(resultp); 1693 bucket = &aiop->aio_hash[index]; 1694 for (reqp = *bucket; reqp != NULL; 1695 reqp = reqp->aio_hash_next) { 1696 if (reqp->aio_req_resultp == resultp) { 1697 head = reqp->aio_req_lio; 1698 return (head); 1699 } 1700 } 1701 } 1702 return (NULL); 1703 } 1704 1705 1706 static void 1707 lio_set_uerror(void *resultp, int error) 1708 { 1709 /* 1710 * the resultp field is a pointer to where the 1711 * error should be written out to the user's 1712 * aiocb. 1713 * 1714 */ 1715 if (get_udatamodel() == DATAMODEL_NATIVE) { 1716 (void) sulword(&((aio_result_t *)resultp)->aio_return, 1717 (ssize_t)-1); 1718 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1719 } 1720 #ifdef _SYSCALL32_IMPL 1721 else { 1722 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1723 (uint_t)-1); 1724 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1725 } 1726 #endif /* _SYSCALL32_IMPL */ 1727 } 1728 1729 /* 1730 * do cleanup completion for all requests in list. memory for 1731 * each request is also freed. 1732 */ 1733 static void 1734 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 1735 { 1736 int i; 1737 aio_req_t *reqp; 1738 aio_result_t *resultp; 1739 aiocb64_32_t *aiocb_64; 1740 1741 for (i = 0; i < nent; i++) { 1742 if (get_udatamodel() == DATAMODEL_NATIVE) { 1743 if (cbp[i] == NULL) 1744 continue; 1745 if (run_mode == AIO_LARGEFILE) { 1746 aiocb_64 = (aiocb64_32_t *)cbp[i]; 1747 resultp = (aio_result_t *) 1748 &aiocb_64->aio_resultp; 1749 } else 1750 resultp = &cbp[i]->aio_resultp; 1751 } 1752 #ifdef _SYSCALL32_IMPL 1753 else { 1754 aiocb32_t *aiocb_32; 1755 caddr32_t *cbp32; 1756 1757 cbp32 = (caddr32_t *)cbp; 1758 if (cbp32[i] == NULL) 1759 continue; 1760 if (run_mode == AIO_32) { 1761 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 1762 resultp = (aio_result_t *)&aiocb_32-> 1763 aio_resultp; 1764 } else if (run_mode == AIO_LARGEFILE) { 1765 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 1766 resultp = (aio_result_t *)&aiocb_64-> 1767 aio_resultp; 1768 } 1769 } 1770 #endif /* _SYSCALL32_IMPL */ 1771 /* 1772 * we need to get the aio_cleanupq_mutex since we call 1773 * aio_req_done(). 1774 */ 1775 mutex_enter(&aiop->aio_cleanupq_mutex); 1776 mutex_enter(&aiop->aio_mutex); 1777 reqp = aio_req_done(resultp); 1778 mutex_exit(&aiop->aio_mutex); 1779 mutex_exit(&aiop->aio_cleanupq_mutex); 1780 if (reqp != NULL) { 1781 aphysio_unlock(reqp); 1782 aio_copyout_result(reqp); 1783 mutex_enter(&aiop->aio_mutex); 1784 aio_req_free(aiop, reqp); 1785 mutex_exit(&aiop->aio_mutex); 1786 } 1787 } 1788 } 1789 1790 /* 1791 * Write out the results for an aio request that is done. 1792 */ 1793 static int 1794 aioerror(void *cb, int run_mode) 1795 { 1796 aio_result_t *resultp; 1797 aio_t *aiop; 1798 aio_req_t *reqp; 1799 int retval; 1800 1801 aiop = curproc->p_aio; 1802 if (aiop == NULL || cb == NULL) 1803 return (EINVAL); 1804 1805 if (get_udatamodel() == DATAMODEL_NATIVE) { 1806 if (run_mode == AIO_LARGEFILE) 1807 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1808 aio_resultp; 1809 else 1810 resultp = &((aiocb_t *)cb)->aio_resultp; 1811 } 1812 #ifdef _SYSCALL32_IMPL 1813 else { 1814 if (run_mode == AIO_LARGEFILE) 1815 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1816 aio_resultp; 1817 else if (run_mode == AIO_32) 1818 resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 1819 aio_resultp; 1820 } 1821 #endif /* _SYSCALL32_IMPL */ 1822 /* 1823 * we need to get the aio_cleanupq_mutex since we call 1824 * aio_req_find(). 1825 */ 1826 mutex_enter(&aiop->aio_cleanupq_mutex); 1827 mutex_enter(&aiop->aio_mutex); 1828 retval = aio_req_find(resultp, &reqp); 1829 mutex_exit(&aiop->aio_mutex); 1830 mutex_exit(&aiop->aio_cleanupq_mutex); 1831 if (retval == 0) { 1832 aphysio_unlock(reqp); 1833 aio_copyout_result(reqp); 1834 mutex_enter(&aiop->aio_mutex); 1835 aio_req_free(aiop, reqp); 1836 mutex_exit(&aiop->aio_mutex); 1837 return (0); 1838 } else if (retval == 1) 1839 return (EINPROGRESS); 1840 else if (retval == 2) 1841 return (EINVAL); 1842 return (0); 1843 } 1844 1845 /* 1846 * aio_cancel - if no requests outstanding, 1847 * return AIO_ALLDONE 1848 * else 1849 * return AIO_NOTCANCELED 1850 */ 1851 static int 1852 aio_cancel( 1853 int fildes, 1854 void *cb, 1855 long *rval, 1856 int run_mode) 1857 { 1858 aio_t *aiop; 1859 void *resultp; 1860 int index; 1861 aio_req_t **bucket; 1862 aio_req_t *ent; 1863 1864 1865 /* 1866 * Verify valid file descriptor 1867 */ 1868 if ((getf(fildes)) == NULL) { 1869 return (EBADF); 1870 } 1871 releasef(fildes); 1872 1873 aiop = curproc->p_aio; 1874 if (aiop == NULL) 1875 return (EINVAL); 1876 1877 if (aiop->aio_outstanding == 0) { 1878 *rval = AIO_ALLDONE; 1879 return (0); 1880 } 1881 1882 mutex_enter(&aiop->aio_mutex); 1883 if (cb != NULL) { 1884 if (get_udatamodel() == DATAMODEL_NATIVE) { 1885 if (run_mode == AIO_LARGEFILE) 1886 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1887 ->aio_resultp; 1888 else 1889 resultp = &((aiocb_t *)cb)->aio_resultp; 1890 } 1891 #ifdef _SYSCALL32_IMPL 1892 else { 1893 if (run_mode == AIO_LARGEFILE) 1894 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1895 ->aio_resultp; 1896 else if (run_mode == AIO_32) 1897 resultp = (aio_result_t *)&((aiocb32_t *)cb) 1898 ->aio_resultp; 1899 } 1900 #endif /* _SYSCALL32_IMPL */ 1901 index = AIO_HASH(resultp); 1902 bucket = &aiop->aio_hash[index]; 1903 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1904 if (ent->aio_req_resultp == resultp) { 1905 if ((ent->aio_req_flags & AIO_PENDING) == 0) { 1906 mutex_exit(&aiop->aio_mutex); 1907 *rval = AIO_ALLDONE; 1908 return (0); 1909 } 1910 mutex_exit(&aiop->aio_mutex); 1911 *rval = AIO_NOTCANCELED; 1912 return (0); 1913 } 1914 } 1915 mutex_exit(&aiop->aio_mutex); 1916 *rval = AIO_ALLDONE; 1917 return (0); 1918 } 1919 1920 for (index = 0; index < AIO_HASHSZ; index++) { 1921 bucket = &aiop->aio_hash[index]; 1922 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1923 if (ent->aio_req_fd == fildes) { 1924 if ((ent->aio_req_flags & AIO_PENDING) != 0) { 1925 mutex_exit(&aiop->aio_mutex); 1926 *rval = AIO_NOTCANCELED; 1927 return (0); 1928 } 1929 } 1930 } 1931 } 1932 mutex_exit(&aiop->aio_mutex); 1933 *rval = AIO_ALLDONE; 1934 return (0); 1935 } 1936 1937 /* 1938 * solaris version of asynchronous read and write 1939 */ 1940 static int 1941 arw( 1942 int opcode, 1943 int fdes, 1944 char *bufp, 1945 int bufsize, 1946 offset_t offset, 1947 aio_result_t *resultp, 1948 int mode) 1949 { 1950 file_t *fp; 1951 int error; 1952 struct vnode *vp; 1953 aio_req_t *reqp; 1954 aio_t *aiop; 1955 int (*aio_func)(); 1956 #ifdef _LP64 1957 aiocb_t aiocb; 1958 #else 1959 aiocb64_32_t aiocb64; 1960 #endif 1961 1962 aiop = curproc->p_aio; 1963 if (aiop == NULL) 1964 return (EINVAL); 1965 1966 if ((fp = getf(fdes)) == NULL) { 1967 return (EBADF); 1968 } 1969 1970 /* 1971 * check the permission of the partition 1972 */ 1973 if ((fp->f_flag & mode) == 0) { 1974 releasef(fdes); 1975 return (EBADF); 1976 } 1977 1978 vp = fp->f_vnode; 1979 aio_func = check_vp(vp, mode); 1980 if (aio_func == NULL) { 1981 releasef(fdes); 1982 return (EBADFD); 1983 } 1984 #ifdef _LP64 1985 aiocb.aio_fildes = fdes; 1986 aiocb.aio_buf = bufp; 1987 aiocb.aio_nbytes = bufsize; 1988 aiocb.aio_offset = offset; 1989 aiocb.aio_sigevent.sigev_notify = 0; 1990 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 1991 #else 1992 aiocb64.aio_fildes = fdes; 1993 aiocb64.aio_buf = (caddr32_t)bufp; 1994 aiocb64.aio_nbytes = bufsize; 1995 aiocb64.aio_offset = offset; 1996 aiocb64.aio_sigevent.sigev_notify = 0; 1997 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 1998 #endif 1999 if (error) { 2000 releasef(fdes); 2001 return (error); 2002 } 2003 2004 /* 2005 * enable polling on this request if the opcode has 2006 * the AIO poll bit set 2007 */ 2008 if (opcode & AIO_POLL_BIT) 2009 reqp->aio_req_flags |= AIO_POLL; 2010 2011 if (bufsize == 0) { 2012 clear_active_fd(fdes); 2013 aio_zerolen(reqp); 2014 return (0); 2015 } 2016 /* 2017 * send the request to driver. 2018 */ 2019 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2020 /* 2021 * the fd is stored in the aio_req_t by aio_req_setup(), and 2022 * is released by the aio_cleanup_thread() when the IO has 2023 * completed. 2024 */ 2025 if (error) { 2026 releasef(fdes); 2027 mutex_enter(&aiop->aio_mutex); 2028 aio_req_free(aiop, reqp); 2029 aiop->aio_pending--; 2030 if (aiop->aio_flags & AIO_REQ_BLOCK) 2031 cv_signal(&aiop->aio_cleanupcv); 2032 mutex_exit(&aiop->aio_mutex); 2033 return (error); 2034 } 2035 clear_active_fd(fdes); 2036 return (0); 2037 } 2038 2039 /* 2040 * posix version of asynchronous read and write 2041 */ 2042 static int 2043 aiorw( 2044 int opcode, 2045 void *aiocb_arg, 2046 int mode, 2047 int run_mode) 2048 { 2049 #ifdef _SYSCALL32_IMPL 2050 aiocb32_t aiocb32; 2051 struct sigevent32 *sigev32; 2052 port_notify32_t pntfy32; 2053 #endif 2054 aiocb64_32_t aiocb64; 2055 aiocb_t aiocb; 2056 file_t *fp; 2057 int error, fd; 2058 size_t bufsize; 2059 struct vnode *vp; 2060 aio_req_t *reqp; 2061 aio_t *aiop; 2062 int (*aio_func)(); 2063 aio_result_t *resultp; 2064 struct sigevent *sigev; 2065 model_t model; 2066 int aio_use_port = 0; 2067 port_notify_t pntfy; 2068 2069 model = get_udatamodel(); 2070 aiop = curproc->p_aio; 2071 if (aiop == NULL) 2072 return (EINVAL); 2073 2074 if (model == DATAMODEL_NATIVE) { 2075 if (run_mode != AIO_LARGEFILE) { 2076 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 2077 return (EFAULT); 2078 bufsize = aiocb.aio_nbytes; 2079 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 2080 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 2081 return (EBADF); 2082 } 2083 sigev = &aiocb.aio_sigevent; 2084 } else { 2085 /* 2086 * We come here only when we make largefile 2087 * call on 32 bit kernel using 32 bit library. 2088 */ 2089 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2090 return (EFAULT); 2091 bufsize = aiocb64.aio_nbytes; 2092 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2093 ->aio_resultp); 2094 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2095 return (EBADF); 2096 sigev = (struct sigevent *)&aiocb64.aio_sigevent; 2097 } 2098 2099 if (sigev->sigev_notify == SIGEV_PORT) { 2100 if (copyin((void *)sigev->sigev_value.sival_ptr, 2101 &pntfy, sizeof (port_notify_t))) { 2102 releasef(fd); 2103 return (EFAULT); 2104 } 2105 aio_use_port = 1; 2106 } else if (sigev->sigev_notify == SIGEV_THREAD) { 2107 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 2108 pntfy.portnfy_user = 2109 aiocb.aio_sigevent.sigev_value.sival_ptr; 2110 aio_use_port = 1; 2111 } 2112 } 2113 #ifdef _SYSCALL32_IMPL 2114 else { 2115 if (run_mode == AIO_32) { 2116 /* 32 bit system call is being made on 64 bit kernel */ 2117 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 2118 return (EFAULT); 2119 2120 bufsize = aiocb32.aio_nbytes; 2121 aiocb_32ton(&aiocb32, &aiocb); 2122 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 2123 aio_resultp); 2124 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 2125 return (EBADF); 2126 } 2127 sigev32 = &aiocb32.aio_sigevent; 2128 } else if (run_mode == AIO_LARGEFILE) { 2129 /* 2130 * We come here only when we make largefile 2131 * call on 64 bit kernel using 32 bit library. 2132 */ 2133 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2134 return (EFAULT); 2135 bufsize = aiocb64.aio_nbytes; 2136 aiocb_LFton(&aiocb64, &aiocb); 2137 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2138 ->aio_resultp); 2139 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2140 return (EBADF); 2141 sigev32 = &aiocb64.aio_sigevent; 2142 } 2143 2144 if (sigev32->sigev_notify == SIGEV_PORT) { 2145 if (copyin( 2146 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 2147 &pntfy32, sizeof (port_notify32_t))) { 2148 releasef(fd); 2149 return (EFAULT); 2150 } 2151 pntfy.portnfy_port = pntfy32.portnfy_port; 2152 pntfy.portnfy_user = (void *)(uintptr_t) 2153 pntfy32.portnfy_user; 2154 aio_use_port = 1; 2155 } else if (sigev32->sigev_notify == SIGEV_THREAD) { 2156 pntfy.portnfy_port = sigev32->sigev_signo; 2157 pntfy.portnfy_user = (void *)(uintptr_t) 2158 sigev32->sigev_value.sival_ptr; 2159 aio_use_port = 1; 2160 } 2161 } 2162 #endif /* _SYSCALL32_IMPL */ 2163 2164 /* 2165 * check the permission of the partition 2166 */ 2167 2168 if ((fp->f_flag & mode) == 0) { 2169 releasef(fd); 2170 return (EBADF); 2171 } 2172 2173 vp = fp->f_vnode; 2174 aio_func = check_vp(vp, mode); 2175 if (aio_func == NULL) { 2176 releasef(fd); 2177 return (EBADFD); 2178 } 2179 if (run_mode == AIO_LARGEFILE) 2180 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 2181 else 2182 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 2183 2184 if (error) { 2185 releasef(fd); 2186 return (error); 2187 } 2188 /* 2189 * enable polling on this request if the opcode has 2190 * the AIO poll bit set 2191 */ 2192 if (opcode & AIO_POLL_BIT) 2193 reqp->aio_req_flags |= AIO_POLL; 2194 2195 if (model == DATAMODEL_NATIVE) 2196 reqp->aio_req_iocb.iocb = aiocb_arg; 2197 #ifdef _SYSCALL32_IMPL 2198 else 2199 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 2200 #endif 2201 2202 if (aio_use_port) { 2203 int event = (run_mode == AIO_LARGEFILE)? 2204 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 2205 ((mode == FREAD)? AIOAREAD : AIOAWRITE); 2206 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 2207 } 2208 2209 /* 2210 * send the request to driver. 2211 */ 2212 if (error == 0) { 2213 if (bufsize == 0) { 2214 clear_active_fd(fd); 2215 aio_zerolen(reqp); 2216 return (0); 2217 } 2218 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2219 } 2220 2221 /* 2222 * the fd is stored in the aio_req_t by aio_req_setup(), and 2223 * is released by the aio_cleanup_thread() when the IO has 2224 * completed. 2225 */ 2226 if (error) { 2227 releasef(fd); 2228 mutex_enter(&aiop->aio_mutex); 2229 if (aio_use_port) 2230 aio_deq(&aiop->aio_portpending, reqp); 2231 aio_req_free(aiop, reqp); 2232 aiop->aio_pending--; 2233 if (aiop->aio_flags & AIO_REQ_BLOCK) 2234 cv_signal(&aiop->aio_cleanupcv); 2235 mutex_exit(&aiop->aio_mutex); 2236 return (error); 2237 } 2238 clear_active_fd(fd); 2239 return (0); 2240 } 2241 2242 2243 /* 2244 * set error for a list IO entry that failed. 2245 */ 2246 static void 2247 lio_set_error(aio_req_t *reqp, int portused) 2248 { 2249 aio_t *aiop = curproc->p_aio; 2250 2251 if (aiop == NULL) 2252 return; 2253 2254 mutex_enter(&aiop->aio_mutex); 2255 if (portused) 2256 aio_deq(&aiop->aio_portpending, reqp); 2257 aiop->aio_pending--; 2258 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 2259 reqp->aio_req_flags |= AIO_PHYSIODONE; 2260 /* 2261 * Need to free the request now as its never 2262 * going to get on the done queue 2263 * 2264 * Note: aio_outstanding is decremented in 2265 * aio_req_free() 2266 */ 2267 aio_req_free(aiop, reqp); 2268 if (aiop->aio_flags & AIO_REQ_BLOCK) 2269 cv_signal(&aiop->aio_cleanupcv); 2270 mutex_exit(&aiop->aio_mutex); 2271 } 2272 2273 /* 2274 * check if a specified request is done, and remove it from 2275 * the done queue. otherwise remove anybody from the done queue 2276 * if NULL is specified. 2277 */ 2278 static aio_req_t * 2279 aio_req_done(void *resultp) 2280 { 2281 aio_req_t **bucket; 2282 aio_req_t *ent; 2283 aio_t *aiop = curproc->p_aio; 2284 long index; 2285 2286 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2287 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2288 2289 if (resultp) { 2290 index = AIO_HASH(resultp); 2291 bucket = &aiop->aio_hash[index]; 2292 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2293 if (ent->aio_req_resultp == (aio_result_t *)resultp) { 2294 if (ent->aio_req_flags & AIO_DONEQ) { 2295 return (aio_req_remove(ent)); 2296 } 2297 return (NULL); 2298 } 2299 } 2300 /* no match, resultp is invalid */ 2301 return (NULL); 2302 } 2303 return (aio_req_remove(NULL)); 2304 } 2305 2306 /* 2307 * determine if a user-level resultp pointer is associated with an 2308 * active IO request. Zero is returned when the request is done, 2309 * and the request is removed from the done queue. Only when the 2310 * return value is zero, is the "reqp" pointer valid. One is returned 2311 * when the request is inprogress. Two is returned when the request 2312 * is invalid. 2313 */ 2314 static int 2315 aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 2316 { 2317 aio_req_t **bucket; 2318 aio_req_t *ent; 2319 aio_t *aiop = curproc->p_aio; 2320 long index; 2321 2322 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2323 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2324 2325 index = AIO_HASH(resultp); 2326 bucket = &aiop->aio_hash[index]; 2327 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2328 if (ent->aio_req_resultp == resultp) { 2329 if (ent->aio_req_flags & AIO_DONEQ) { 2330 *reqp = aio_req_remove(ent); 2331 return (0); 2332 } 2333 return (1); 2334 } 2335 } 2336 /* no match, resultp is invalid */ 2337 return (2); 2338 } 2339 2340 /* 2341 * remove a request from the done queue. 2342 */ 2343 static aio_req_t * 2344 aio_req_remove(aio_req_t *reqp) 2345 { 2346 aio_t *aiop = curproc->p_aio; 2347 2348 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2349 2350 if (reqp != NULL) { 2351 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2352 if (reqp->aio_req_next == reqp) { 2353 /* only one request on queue */ 2354 if (reqp == aiop->aio_doneq) { 2355 aiop->aio_doneq = NULL; 2356 } else { 2357 ASSERT(reqp == aiop->aio_cleanupq); 2358 aiop->aio_cleanupq = NULL; 2359 } 2360 } else { 2361 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2362 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2363 /* 2364 * The request can be either on the aio_doneq or the 2365 * aio_cleanupq 2366 */ 2367 if (reqp == aiop->aio_doneq) 2368 aiop->aio_doneq = reqp->aio_req_next; 2369 2370 if (reqp == aiop->aio_cleanupq) 2371 aiop->aio_cleanupq = reqp->aio_req_next; 2372 } 2373 reqp->aio_req_flags &= ~AIO_DONEQ; 2374 reqp->aio_req_next = NULL; 2375 reqp->aio_req_prev = NULL; 2376 } else if ((reqp = aiop->aio_doneq) != NULL) { 2377 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2378 if (reqp == reqp->aio_req_next) { 2379 /* only one request on queue */ 2380 aiop->aio_doneq = NULL; 2381 } else { 2382 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2383 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2384 aiop->aio_doneq = reqp->aio_req_next; 2385 } 2386 reqp->aio_req_flags &= ~AIO_DONEQ; 2387 reqp->aio_req_next = NULL; 2388 reqp->aio_req_prev = NULL; 2389 } 2390 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 2391 cv_broadcast(&aiop->aio_waitcv); 2392 return (reqp); 2393 } 2394 2395 static int 2396 aio_req_setup( 2397 aio_req_t **reqpp, 2398 aio_t *aiop, 2399 aiocb_t *arg, 2400 aio_result_t *resultp, 2401 vnode_t *vp) 2402 { 2403 sigqueue_t *sqp = NULL; 2404 aio_req_t *reqp; 2405 struct uio *uio; 2406 struct sigevent *sigev; 2407 int error; 2408 2409 sigev = &arg->aio_sigevent; 2410 if (sigev->sigev_notify == SIGEV_SIGNAL && 2411 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 2412 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2413 if (sqp == NULL) 2414 return (EAGAIN); 2415 sqp->sq_func = NULL; 2416 sqp->sq_next = NULL; 2417 sqp->sq_info.si_code = SI_ASYNCIO; 2418 sqp->sq_info.si_pid = curproc->p_pid; 2419 sqp->sq_info.si_ctid = PRCTID(curproc); 2420 sqp->sq_info.si_zoneid = getzoneid(); 2421 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2422 sqp->sq_info.si_signo = sigev->sigev_signo; 2423 sqp->sq_info.si_value = sigev->sigev_value; 2424 } 2425 2426 mutex_enter(&aiop->aio_mutex); 2427 2428 if (aiop->aio_flags & AIO_REQ_BLOCK) { 2429 mutex_exit(&aiop->aio_mutex); 2430 if (sqp) 2431 kmem_free(sqp, sizeof (sigqueue_t)); 2432 return (EIO); 2433 } 2434 /* 2435 * get an aio_reqp from the free list or allocate one 2436 * from dynamic memory. 2437 */ 2438 if (error = aio_req_alloc(&reqp, resultp)) { 2439 mutex_exit(&aiop->aio_mutex); 2440 if (sqp) 2441 kmem_free(sqp, sizeof (sigqueue_t)); 2442 return (error); 2443 } 2444 aiop->aio_pending++; 2445 aiop->aio_outstanding++; 2446 reqp->aio_req_flags = AIO_PENDING; 2447 if (sigev->sigev_notify == SIGEV_THREAD || 2448 sigev->sigev_notify == SIGEV_PORT) 2449 aio_enq(&aiop->aio_portpending, reqp, 0); 2450 mutex_exit(&aiop->aio_mutex); 2451 /* 2452 * initialize aio request. 2453 */ 2454 reqp->aio_req_fd = arg->aio_fildes; 2455 reqp->aio_req_sigqp = sqp; 2456 reqp->aio_req_iocb.iocb = NULL; 2457 reqp->aio_req_lio = NULL; 2458 reqp->aio_req_buf.b_file = vp; 2459 uio = reqp->aio_req.aio_uio; 2460 uio->uio_iovcnt = 1; 2461 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 2462 uio->uio_iov->iov_len = arg->aio_nbytes; 2463 uio->uio_loffset = arg->aio_offset; 2464 *reqpp = reqp; 2465 return (0); 2466 } 2467 2468 /* 2469 * Allocate p_aio struct. 2470 */ 2471 static aio_t * 2472 aio_aiop_alloc(void) 2473 { 2474 aio_t *aiop; 2475 2476 ASSERT(MUTEX_HELD(&curproc->p_lock)); 2477 2478 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 2479 if (aiop) { 2480 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 2481 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 2482 NULL); 2483 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 2484 } 2485 return (aiop); 2486 } 2487 2488 /* 2489 * Allocate an aio_req struct. 2490 */ 2491 static int 2492 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 2493 { 2494 aio_req_t *reqp; 2495 aio_t *aiop = curproc->p_aio; 2496 2497 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2498 2499 if ((reqp = aiop->aio_free) != NULL) { 2500 aiop->aio_free = reqp->aio_req_next; 2501 bzero(reqp, sizeof (*reqp)); 2502 } else { 2503 /* 2504 * Check whether memory is getting tight. 2505 * This is a temporary mechanism to avoid memory 2506 * exhaustion by a single process until we come up 2507 * with a per process solution such as setrlimit(). 2508 */ 2509 if (freemem < desfree) 2510 return (EAGAIN); 2511 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 2512 if (reqp == NULL) 2513 return (EAGAIN); 2514 } 2515 reqp->aio_req.aio_uio = &reqp->aio_req_uio; 2516 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 2517 reqp->aio_req.aio_private = reqp; 2518 reqp->aio_req_buf.b_offset = -1; 2519 reqp->aio_req_resultp = resultp; 2520 if (aio_hash_insert(reqp, aiop)) { 2521 reqp->aio_req_next = aiop->aio_free; 2522 aiop->aio_free = reqp; 2523 return (EINVAL); 2524 } 2525 *nreqp = reqp; 2526 return (0); 2527 } 2528 2529 /* 2530 * Allocate an aio_lio_t struct. 2531 */ 2532 static int 2533 aio_lio_alloc(aio_lio_t **head) 2534 { 2535 aio_lio_t *liop; 2536 aio_t *aiop = curproc->p_aio; 2537 2538 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2539 2540 if ((liop = aiop->aio_lio_free) != NULL) { 2541 aiop->aio_lio_free = liop->lio_next; 2542 } else { 2543 /* 2544 * Check whether memory is getting tight. 2545 * This is a temporary mechanism to avoid memory 2546 * exhaustion by a single process until we come up 2547 * with a per process solution such as setrlimit(). 2548 */ 2549 if (freemem < desfree) 2550 return (EAGAIN); 2551 2552 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 2553 if (liop == NULL) 2554 return (EAGAIN); 2555 } 2556 *head = liop; 2557 return (0); 2558 } 2559 2560 /* 2561 * this is a special per-process thread that is only activated if 2562 * the process is unmapping a segment with outstanding aio. normally, 2563 * the process will have completed the aio before unmapping the 2564 * segment. If the process does unmap a segment with outstanding aio, 2565 * this special thread will guarentee that the locked pages due to 2566 * aphysio() are released, thereby permitting the segment to be 2567 * unmapped. In addition to this, the cleanup thread is woken up 2568 * during DR operations to release the locked pages. 2569 */ 2570 2571 static int 2572 aio_cleanup_thread(aio_t *aiop) 2573 { 2574 proc_t *p = curproc; 2575 struct as *as = p->p_as; 2576 int poked = 0; 2577 kcondvar_t *cvp; 2578 int exit_flag = 0; 2579 int rqclnup = 0; 2580 2581 sigfillset(&curthread->t_hold); 2582 sigdiffset(&curthread->t_hold, &cantmask); 2583 for (;;) { 2584 /* 2585 * if a segment is being unmapped, and the current 2586 * process's done queue is not empty, then every request 2587 * on the doneq with locked resources should be forced 2588 * to release their locks. By moving the doneq request 2589 * to the cleanupq, aio_cleanup() will process the cleanupq, 2590 * and place requests back onto the doneq. All requests 2591 * processed by aio_cleanup() will have their physical 2592 * resources unlocked. 2593 */ 2594 mutex_enter(&aiop->aio_mutex); 2595 if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 2596 aiop->aio_flags |= AIO_CLEANUP; 2597 mutex_enter(&as->a_contents); 2598 if (aiop->aio_rqclnup) { 2599 aiop->aio_rqclnup = 0; 2600 rqclnup = 1; 2601 } 2602 2603 if ((rqclnup || AS_ISUNMAPWAIT(as)) && 2604 aiop->aio_doneq) { 2605 aio_req_t *doneqhead = aiop->aio_doneq; 2606 mutex_exit(&as->a_contents); 2607 aiop->aio_doneq = NULL; 2608 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 2609 } else { 2610 mutex_exit(&as->a_contents); 2611 } 2612 } 2613 mutex_exit(&aiop->aio_mutex); 2614 aio_cleanup(AIO_CLEANUP_THREAD); 2615 /* 2616 * thread should block on the cleanupcv while 2617 * AIO_CLEANUP is set. 2618 */ 2619 cvp = &aiop->aio_cleanupcv; 2620 mutex_enter(&aiop->aio_mutex); 2621 2622 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 2623 aiop->aio_notifyq != NULL || 2624 aiop->aio_portcleanupq != NULL) { 2625 mutex_exit(&aiop->aio_mutex); 2626 continue; 2627 } 2628 mutex_enter(&as->a_contents); 2629 2630 /* 2631 * AIO_CLEANUP determines when the cleanup thread 2632 * should be active. This flag is set when 2633 * the cleanup thread is awakened by as_unmap() or 2634 * due to DR operations. 2635 * The flag is cleared when the blocking as_unmap() 2636 * that originally awakened us is allowed to 2637 * complete. as_unmap() blocks when trying to 2638 * unmap a segment that has SOFTLOCKed pages. when 2639 * the segment's pages are all SOFTUNLOCKed, 2640 * as->a_flags & AS_UNMAPWAIT should be zero. 2641 * 2642 * In case of cleanup request by DR, the flag is cleared 2643 * once all the pending aio requests have been processed. 2644 * 2645 * The flag shouldn't be cleared right away if the 2646 * cleanup thread was interrupted because the process 2647 * is doing forkall(). This happens when cv_wait_sig() 2648 * returns zero, because it was awakened by a pokelwps(). 2649 * If the process is not exiting, it must be doing forkall(). 2650 */ 2651 if ((poked == 0) && 2652 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 2653 (aiop->aio_pending == 0))) { 2654 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 2655 cvp = &as->a_cv; 2656 rqclnup = 0; 2657 } 2658 mutex_exit(&aiop->aio_mutex); 2659 if (poked) { 2660 /* 2661 * If the process is exiting/killed, don't return 2662 * immediately without waiting for pending I/O's 2663 * and releasing the page locks. 2664 */ 2665 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2666 /* 2667 * If exit_flag is set, then it is 2668 * safe to exit because we have released 2669 * page locks of completed I/O's. 2670 */ 2671 if (exit_flag) 2672 break; 2673 2674 mutex_exit(&as->a_contents); 2675 2676 /* 2677 * Wait for all the pending aio to complete. 2678 */ 2679 mutex_enter(&aiop->aio_mutex); 2680 aiop->aio_flags |= AIO_REQ_BLOCK; 2681 while (aiop->aio_pending != 0) 2682 cv_wait(&aiop->aio_cleanupcv, 2683 &aiop->aio_mutex); 2684 mutex_exit(&aiop->aio_mutex); 2685 exit_flag = 1; 2686 continue; 2687 } else if (p->p_flag & 2688 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 2689 /* 2690 * hold LWP until it 2691 * is continued. 2692 */ 2693 mutex_exit(&as->a_contents); 2694 mutex_enter(&p->p_lock); 2695 stop(PR_SUSPENDED, SUSPEND_NORMAL); 2696 mutex_exit(&p->p_lock); 2697 poked = 0; 2698 continue; 2699 } 2700 } else { 2701 /* 2702 * When started this thread will sleep on as->a_cv. 2703 * as_unmap will awake this thread if the 2704 * segment has SOFTLOCKed pages (poked = 0). 2705 * 1. pokelwps() awakes this thread => 2706 * break the loop to check SEXITLWPS, SHOLDFORK, etc 2707 * 2. as_unmap awakes this thread => 2708 * to break the loop it is necessary that 2709 * - AS_UNMAPWAIT is set (as_unmap is waiting for 2710 * memory to be unlocked) 2711 * - AIO_CLEANUP is not set 2712 * (if AIO_CLEANUP is set we have to wait for 2713 * pending requests. aio_done will send a signal 2714 * for every request which completes to continue 2715 * unmapping the corresponding address range) 2716 * 3. A cleanup request will wake this thread up, ex. 2717 * by the DR operations. The aio_rqclnup flag will 2718 * be set. 2719 */ 2720 while (poked == 0) { 2721 /* 2722 * we need to handle cleanup requests 2723 * that come in after we had just cleaned up, 2724 * so that we do cleanup of any new aio 2725 * requests that got completed and have 2726 * locked resources. 2727 */ 2728 if ((aiop->aio_rqclnup || 2729 (AS_ISUNMAPWAIT(as) != 0)) && 2730 (aiop->aio_flags & AIO_CLEANUP) == 0) 2731 break; 2732 poked = !cv_wait_sig(cvp, &as->a_contents); 2733 if (AS_ISUNMAPWAIT(as) == 0) 2734 cv_signal(cvp); 2735 if (aiop->aio_outstanding != 0) 2736 break; 2737 } 2738 } 2739 mutex_exit(&as->a_contents); 2740 } 2741 exit: 2742 mutex_exit(&as->a_contents); 2743 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 2744 aston(curthread); /* make thread do post_syscall */ 2745 return (0); 2746 } 2747 2748 /* 2749 * save a reference to a user's outstanding aio in a hash list. 2750 */ 2751 static int 2752 aio_hash_insert( 2753 aio_req_t *aio_reqp, 2754 aio_t *aiop) 2755 { 2756 long index; 2757 aio_result_t *resultp = aio_reqp->aio_req_resultp; 2758 aio_req_t *current; 2759 aio_req_t **nextp; 2760 2761 index = AIO_HASH(resultp); 2762 nextp = &aiop->aio_hash[index]; 2763 while ((current = *nextp) != NULL) { 2764 if (current->aio_req_resultp == resultp) 2765 return (DUPLICATE); 2766 nextp = ¤t->aio_hash_next; 2767 } 2768 *nextp = aio_reqp; 2769 aio_reqp->aio_hash_next = NULL; 2770 return (0); 2771 } 2772 2773 static int 2774 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 2775 cred_t *) 2776 { 2777 struct snode *sp; 2778 dev_t dev; 2779 struct cb_ops *cb; 2780 major_t major; 2781 int (*aio_func)(); 2782 2783 dev = vp->v_rdev; 2784 major = getmajor(dev); 2785 2786 /* 2787 * return NULL for requests to files and STREAMs so 2788 * that libaio takes care of them. 2789 */ 2790 if (vp->v_type == VCHR) { 2791 /* no stream device for kaio */ 2792 if (STREAMSTAB(major)) { 2793 return (NULL); 2794 } 2795 } else { 2796 return (NULL); 2797 } 2798 2799 /* 2800 * Check old drivers which do not have async I/O entry points. 2801 */ 2802 if (devopsp[major]->devo_rev < 3) 2803 return (NULL); 2804 2805 cb = devopsp[major]->devo_cb_ops; 2806 2807 if (cb->cb_rev < 1) 2808 return (NULL); 2809 2810 /* 2811 * Check whether this device is a block device. 2812 * Kaio is not supported for devices like tty. 2813 */ 2814 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 2815 return (NULL); 2816 2817 /* 2818 * Clustering: If vnode is a PXFS vnode, then the device may be remote. 2819 * We cannot call the driver directly. Instead return the 2820 * PXFS functions. 2821 */ 2822 2823 if (IS_PXFSVP(vp)) { 2824 if (mode & FREAD) 2825 return (clpxfs_aio_read); 2826 else 2827 return (clpxfs_aio_write); 2828 } 2829 if (mode & FREAD) 2830 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 2831 else 2832 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 2833 2834 /* 2835 * Do we need this ? 2836 * nodev returns ENXIO anyway. 2837 */ 2838 if (aio_func == nodev) 2839 return (NULL); 2840 2841 sp = VTOS(vp); 2842 smark(sp, SACC); 2843 return (aio_func); 2844 } 2845 2846 /* 2847 * Clustering: We want check_vp to return a function prototyped 2848 * correctly that will be common to both PXFS and regular case. 2849 * We define this intermediate function that will do the right 2850 * thing for driver cases. 2851 */ 2852 2853 static int 2854 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2855 { 2856 dev_t dev; 2857 struct cb_ops *cb; 2858 2859 ASSERT(vp->v_type == VCHR); 2860 ASSERT(!IS_PXFSVP(vp)); 2861 dev = VTOS(vp)->s_dev; 2862 ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 2863 2864 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2865 2866 ASSERT(cb->cb_awrite != nodev); 2867 return ((*cb->cb_awrite)(dev, aio, cred_p)); 2868 } 2869 2870 /* 2871 * Clustering: We want check_vp to return a function prototyped 2872 * correctly that will be common to both PXFS and regular case. 2873 * We define this intermediate function that will do the right 2874 * thing for driver cases. 2875 */ 2876 2877 static int 2878 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2879 { 2880 dev_t dev; 2881 struct cb_ops *cb; 2882 2883 ASSERT(vp->v_type == VCHR); 2884 ASSERT(!IS_PXFSVP(vp)); 2885 dev = VTOS(vp)->s_dev; 2886 ASSERT(!STREAMSTAB(getmajor(dev))); 2887 2888 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2889 2890 ASSERT(cb->cb_aread != nodev); 2891 return ((*cb->cb_aread)(dev, aio, cred_p)); 2892 } 2893 2894 /* 2895 * This routine is called when a largefile call is made by a 32bit 2896 * process on a ILP32 or LP64 kernel. All 64bit processes are large 2897 * file by definition and will call alio() instead. 2898 */ 2899 static int 2900 alioLF( 2901 int mode_arg, 2902 void *aiocb_arg, 2903 int nent, 2904 void *sigev) 2905 { 2906 file_t *fp; 2907 file_t *prev_fp = NULL; 2908 int prev_mode = -1; 2909 struct vnode *vp; 2910 aio_lio_t *head; 2911 aio_req_t *reqp; 2912 aio_t *aiop; 2913 caddr_t cbplist; 2914 aiocb64_32_t cb64; 2915 aiocb64_32_t *aiocb = &cb64; 2916 aiocb64_32_t *cbp; 2917 caddr32_t *ucbp; 2918 #ifdef _LP64 2919 aiocb_t aiocb_n; 2920 #endif 2921 struct sigevent32 sigevk; 2922 sigqueue_t *sqp; 2923 int (*aio_func)(); 2924 int mode; 2925 int error = 0; 2926 int aio_errors = 0; 2927 int i; 2928 size_t ssize; 2929 int deadhead = 0; 2930 int aio_notsupported = 0; 2931 int lio_head_port; 2932 int aio_port; 2933 int aio_thread; 2934 port_kevent_t *pkevtp = NULL; 2935 int portused = 0; 2936 port_notify32_t pnotify; 2937 int event; 2938 2939 aiop = curproc->p_aio; 2940 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 2941 return (EINVAL); 2942 2943 ASSERT(get_udatamodel() == DATAMODEL_ILP32); 2944 2945 ssize = (sizeof (caddr32_t) * nent); 2946 cbplist = kmem_alloc(ssize, KM_SLEEP); 2947 ucbp = (caddr32_t *)cbplist; 2948 2949 if (copyin(aiocb_arg, cbplist, ssize) || 2950 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 2951 kmem_free(cbplist, ssize); 2952 return (EFAULT); 2953 } 2954 2955 /* Event Ports */ 2956 if (sigev && 2957 (sigevk.sigev_notify == SIGEV_THREAD || 2958 sigevk.sigev_notify == SIGEV_PORT)) { 2959 if (sigevk.sigev_notify == SIGEV_THREAD) { 2960 pnotify.portnfy_port = sigevk.sigev_signo; 2961 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 2962 } else if (copyin( 2963 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 2964 &pnotify, sizeof (pnotify))) { 2965 kmem_free(cbplist, ssize); 2966 return (EFAULT); 2967 } 2968 error = port_alloc_event(pnotify.portnfy_port, 2969 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 2970 if (error) { 2971 if (error == ENOMEM || error == EAGAIN) 2972 error = EAGAIN; 2973 else 2974 error = EINVAL; 2975 kmem_free(cbplist, ssize); 2976 return (error); 2977 } 2978 lio_head_port = pnotify.portnfy_port; 2979 portused = 1; 2980 } 2981 2982 /* 2983 * a list head should be allocated if notification is 2984 * enabled for this list. 2985 */ 2986 head = NULL; 2987 2988 if (mode_arg == LIO_WAIT || sigev) { 2989 mutex_enter(&aiop->aio_mutex); 2990 error = aio_lio_alloc(&head); 2991 mutex_exit(&aiop->aio_mutex); 2992 if (error) 2993 goto done; 2994 deadhead = 1; 2995 head->lio_nent = nent; 2996 head->lio_refcnt = nent; 2997 head->lio_port = -1; 2998 head->lio_portkev = NULL; 2999 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 3000 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3001 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3002 if (sqp == NULL) { 3003 error = EAGAIN; 3004 goto done; 3005 } 3006 sqp->sq_func = NULL; 3007 sqp->sq_next = NULL; 3008 sqp->sq_info.si_code = SI_ASYNCIO; 3009 sqp->sq_info.si_pid = curproc->p_pid; 3010 sqp->sq_info.si_ctid = PRCTID(curproc); 3011 sqp->sq_info.si_zoneid = getzoneid(); 3012 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3013 sqp->sq_info.si_signo = sigevk.sigev_signo; 3014 sqp->sq_info.si_value.sival_int = 3015 sigevk.sigev_value.sival_int; 3016 head->lio_sigqp = sqp; 3017 } else { 3018 head->lio_sigqp = NULL; 3019 } 3020 if (pkevtp) { 3021 /* 3022 * Prepare data to send when list of aiocb's 3023 * has completed. 3024 */ 3025 port_init_event(pkevtp, (uintptr_t)sigev, 3026 (void *)(uintptr_t)pnotify.portnfy_user, 3027 NULL, head); 3028 pkevtp->portkev_events = AIOLIO64; 3029 head->lio_portkev = pkevtp; 3030 head->lio_port = pnotify.portnfy_port; 3031 } 3032 } 3033 3034 for (i = 0; i < nent; i++, ucbp++) { 3035 3036 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 3037 /* skip entry if it can't be copied. */ 3038 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 3039 if (head) { 3040 mutex_enter(&aiop->aio_mutex); 3041 head->lio_nent--; 3042 head->lio_refcnt--; 3043 mutex_exit(&aiop->aio_mutex); 3044 } 3045 continue; 3046 } 3047 3048 /* skip if opcode for aiocb is LIO_NOP */ 3049 mode = aiocb->aio_lio_opcode; 3050 if (mode == LIO_NOP) { 3051 cbp = NULL; 3052 if (head) { 3053 mutex_enter(&aiop->aio_mutex); 3054 head->lio_nent--; 3055 head->lio_refcnt--; 3056 mutex_exit(&aiop->aio_mutex); 3057 } 3058 continue; 3059 } 3060 3061 /* increment file descriptor's ref count. */ 3062 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3063 lio_set_uerror(&cbp->aio_resultp, EBADF); 3064 if (head) { 3065 mutex_enter(&aiop->aio_mutex); 3066 head->lio_nent--; 3067 head->lio_refcnt--; 3068 mutex_exit(&aiop->aio_mutex); 3069 } 3070 aio_errors++; 3071 continue; 3072 } 3073 3074 /* 3075 * check the permission of the partition 3076 */ 3077 if ((fp->f_flag & mode) == 0) { 3078 releasef(aiocb->aio_fildes); 3079 lio_set_uerror(&cbp->aio_resultp, EBADF); 3080 if (head) { 3081 mutex_enter(&aiop->aio_mutex); 3082 head->lio_nent--; 3083 head->lio_refcnt--; 3084 mutex_exit(&aiop->aio_mutex); 3085 } 3086 aio_errors++; 3087 continue; 3088 } 3089 3090 /* 3091 * common case where requests are to the same fd 3092 * for the same r/w operation 3093 * for UFS, need to set EBADFD 3094 */ 3095 vp = fp->f_vnode; 3096 if (fp != prev_fp || mode != prev_mode) { 3097 aio_func = check_vp(vp, mode); 3098 if (aio_func == NULL) { 3099 prev_fp = NULL; 3100 releasef(aiocb->aio_fildes); 3101 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3102 aio_notsupported++; 3103 if (head) { 3104 mutex_enter(&aiop->aio_mutex); 3105 head->lio_nent--; 3106 head->lio_refcnt--; 3107 mutex_exit(&aiop->aio_mutex); 3108 } 3109 continue; 3110 } else { 3111 prev_fp = fp; 3112 prev_mode = mode; 3113 } 3114 } 3115 3116 #ifdef _LP64 3117 aiocb_LFton(aiocb, &aiocb_n); 3118 error = aio_req_setup(&reqp, aiop, &aiocb_n, 3119 (aio_result_t *)&cbp->aio_resultp, vp); 3120 #else 3121 error = aio_req_setupLF(&reqp, aiop, aiocb, 3122 (aio_result_t *)&cbp->aio_resultp, vp); 3123 #endif /* _LP64 */ 3124 if (error) { 3125 releasef(aiocb->aio_fildes); 3126 lio_set_uerror(&cbp->aio_resultp, error); 3127 if (head) { 3128 mutex_enter(&aiop->aio_mutex); 3129 head->lio_nent--; 3130 head->lio_refcnt--; 3131 mutex_exit(&aiop->aio_mutex); 3132 } 3133 aio_errors++; 3134 continue; 3135 } 3136 3137 reqp->aio_req_lio = head; 3138 deadhead = 0; 3139 3140 /* 3141 * Set the errno field now before sending the request to 3142 * the driver to avoid a race condition 3143 */ 3144 (void) suword32(&cbp->aio_resultp.aio_errno, 3145 EINPROGRESS); 3146 3147 reqp->aio_req_iocb.iocb32 = *ucbp; 3148 3149 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 3150 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3151 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3152 if (aio_port | aio_thread) { 3153 port_kevent_t *lpkevp; 3154 /* 3155 * Prepare data to send with each aiocb completed. 3156 */ 3157 if (aio_port) { 3158 void *paddr = (void *)(uintptr_t) 3159 aiocb->aio_sigevent.sigev_value.sival_ptr; 3160 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3161 error = EFAULT; 3162 } else { /* aio_thread */ 3163 pnotify.portnfy_port = 3164 aiocb->aio_sigevent.sigev_signo; 3165 pnotify.portnfy_user = 3166 aiocb->aio_sigevent.sigev_value.sival_ptr; 3167 } 3168 if (error) 3169 /* EMPTY */; 3170 else if (pkevtp != NULL && 3171 pnotify.portnfy_port == lio_head_port) 3172 error = port_dup_event(pkevtp, &lpkevp, 3173 PORT_ALLOC_DEFAULT); 3174 else 3175 error = port_alloc_event(pnotify.portnfy_port, 3176 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3177 &lpkevp); 3178 if (error == 0) { 3179 port_init_event(lpkevp, (uintptr_t)*ucbp, 3180 (void *)(uintptr_t)pnotify.portnfy_user, 3181 aio_port_callback, reqp); 3182 lpkevp->portkev_events = event; 3183 reqp->aio_req_portkev = lpkevp; 3184 reqp->aio_req_port = pnotify.portnfy_port; 3185 } 3186 } 3187 3188 /* 3189 * send the request to driver. 3190 */ 3191 if (error == 0) { 3192 if (aiocb->aio_nbytes == 0) { 3193 clear_active_fd(aiocb->aio_fildes); 3194 aio_zerolen(reqp); 3195 continue; 3196 } 3197 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3198 CRED()); 3199 } 3200 3201 /* 3202 * the fd's ref count is not decremented until the IO has 3203 * completed unless there was an error. 3204 */ 3205 if (error) { 3206 releasef(aiocb->aio_fildes); 3207 lio_set_uerror(&cbp->aio_resultp, error); 3208 if (head) { 3209 mutex_enter(&aiop->aio_mutex); 3210 head->lio_nent--; 3211 head->lio_refcnt--; 3212 mutex_exit(&aiop->aio_mutex); 3213 } 3214 if (error == ENOTSUP) 3215 aio_notsupported++; 3216 else 3217 aio_errors++; 3218 lio_set_error(reqp, portused); 3219 } else { 3220 clear_active_fd(aiocb->aio_fildes); 3221 } 3222 } 3223 3224 if (aio_notsupported) { 3225 error = ENOTSUP; 3226 } else if (aio_errors) { 3227 /* 3228 * return EIO if any request failed 3229 */ 3230 error = EIO; 3231 } 3232 3233 if (mode_arg == LIO_WAIT) { 3234 mutex_enter(&aiop->aio_mutex); 3235 while (head->lio_refcnt > 0) { 3236 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3237 mutex_exit(&aiop->aio_mutex); 3238 error = EINTR; 3239 goto done; 3240 } 3241 } 3242 mutex_exit(&aiop->aio_mutex); 3243 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 3244 } 3245 3246 done: 3247 kmem_free(cbplist, ssize); 3248 if (deadhead) { 3249 if (head->lio_sigqp) 3250 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3251 if (head->lio_portkev) 3252 port_free_event(head->lio_portkev); 3253 kmem_free(head, sizeof (aio_lio_t)); 3254 } 3255 return (error); 3256 } 3257 3258 #ifdef _SYSCALL32_IMPL 3259 static void 3260 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 3261 { 3262 dest->aio_fildes = src->aio_fildes; 3263 dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 3264 dest->aio_nbytes = (size_t)src->aio_nbytes; 3265 dest->aio_offset = (off_t)src->aio_offset; 3266 dest->aio_reqprio = src->aio_reqprio; 3267 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3268 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3269 3270 /* 3271 * See comment in sigqueue32() on handling of 32-bit 3272 * sigvals in a 64-bit kernel. 3273 */ 3274 dest->aio_sigevent.sigev_value.sival_int = 3275 (int)src->aio_sigevent.sigev_value.sival_int; 3276 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3277 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3278 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3279 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3280 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3281 dest->aio_lio_opcode = src->aio_lio_opcode; 3282 dest->aio_state = src->aio_state; 3283 dest->aio__pad[0] = src->aio__pad[0]; 3284 } 3285 #endif 3286 3287 /* 3288 * This function is used only for largefile calls made by 3289 * 32 bit applications. 3290 */ 3291 static int 3292 aio_req_setupLF( 3293 aio_req_t **reqpp, 3294 aio_t *aiop, 3295 aiocb64_32_t *arg, 3296 aio_result_t *resultp, 3297 vnode_t *vp) 3298 { 3299 sigqueue_t *sqp = NULL; 3300 aio_req_t *reqp; 3301 struct uio *uio; 3302 struct sigevent32 *sigev; 3303 int error; 3304 3305 sigev = &arg->aio_sigevent; 3306 if (sigev->sigev_notify == SIGEV_SIGNAL && 3307 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 3308 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3309 if (sqp == NULL) 3310 return (EAGAIN); 3311 sqp->sq_func = NULL; 3312 sqp->sq_next = NULL; 3313 sqp->sq_info.si_code = SI_ASYNCIO; 3314 sqp->sq_info.si_pid = curproc->p_pid; 3315 sqp->sq_info.si_ctid = PRCTID(curproc); 3316 sqp->sq_info.si_zoneid = getzoneid(); 3317 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3318 sqp->sq_info.si_signo = sigev->sigev_signo; 3319 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 3320 } 3321 3322 mutex_enter(&aiop->aio_mutex); 3323 3324 if (aiop->aio_flags & AIO_REQ_BLOCK) { 3325 mutex_exit(&aiop->aio_mutex); 3326 if (sqp) 3327 kmem_free(sqp, sizeof (sigqueue_t)); 3328 return (EIO); 3329 } 3330 /* 3331 * get an aio_reqp from the free list or allocate one 3332 * from dynamic memory. 3333 */ 3334 if (error = aio_req_alloc(&reqp, resultp)) { 3335 mutex_exit(&aiop->aio_mutex); 3336 if (sqp) 3337 kmem_free(sqp, sizeof (sigqueue_t)); 3338 return (error); 3339 } 3340 aiop->aio_pending++; 3341 aiop->aio_outstanding++; 3342 reqp->aio_req_flags = AIO_PENDING; 3343 if (sigev->sigev_notify == SIGEV_THREAD || 3344 sigev->sigev_notify == SIGEV_PORT) 3345 aio_enq(&aiop->aio_portpending, reqp, 0); 3346 mutex_exit(&aiop->aio_mutex); 3347 /* 3348 * initialize aio request. 3349 */ 3350 reqp->aio_req_fd = arg->aio_fildes; 3351 reqp->aio_req_sigqp = sqp; 3352 reqp->aio_req_iocb.iocb = NULL; 3353 reqp->aio_req_lio = NULL; 3354 reqp->aio_req_buf.b_file = vp; 3355 uio = reqp->aio_req.aio_uio; 3356 uio->uio_iovcnt = 1; 3357 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 3358 uio->uio_iov->iov_len = arg->aio_nbytes; 3359 uio->uio_loffset = arg->aio_offset; 3360 *reqpp = reqp; 3361 return (0); 3362 } 3363 3364 /* 3365 * This routine is called when a non largefile call is made by a 32bit 3366 * process on a ILP32 or LP64 kernel. 3367 */ 3368 static int 3369 alio32( 3370 int mode_arg, 3371 void *aiocb_arg, 3372 int nent, 3373 void *sigev) 3374 { 3375 file_t *fp; 3376 file_t *prev_fp = NULL; 3377 int prev_mode = -1; 3378 struct vnode *vp; 3379 aio_lio_t *head; 3380 aio_req_t *reqp; 3381 aio_t *aiop; 3382 caddr_t cbplist; 3383 aiocb_t cb; 3384 aiocb_t *aiocb = &cb; 3385 #ifdef _LP64 3386 aiocb32_t *cbp; 3387 caddr32_t *ucbp; 3388 aiocb32_t cb32; 3389 aiocb32_t *aiocb32 = &cb32; 3390 struct sigevent32 sigevk; 3391 #else 3392 aiocb_t *cbp, **ucbp; 3393 struct sigevent sigevk; 3394 #endif 3395 sigqueue_t *sqp; 3396 int (*aio_func)(); 3397 int mode; 3398 int error = 0; 3399 int aio_errors = 0; 3400 int i; 3401 size_t ssize; 3402 int deadhead = 0; 3403 int aio_notsupported = 0; 3404 int lio_head_port; 3405 int aio_port; 3406 int aio_thread; 3407 port_kevent_t *pkevtp = NULL; 3408 int portused = 0; 3409 #ifdef _LP64 3410 port_notify32_t pnotify; 3411 #else 3412 port_notify_t pnotify; 3413 #endif 3414 int event; 3415 3416 aiop = curproc->p_aio; 3417 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 3418 return (EINVAL); 3419 3420 #ifdef _LP64 3421 ssize = (sizeof (caddr32_t) * nent); 3422 #else 3423 ssize = (sizeof (aiocb_t *) * nent); 3424 #endif 3425 cbplist = kmem_alloc(ssize, KM_SLEEP); 3426 ucbp = (void *)cbplist; 3427 3428 if (copyin(aiocb_arg, cbplist, ssize) || 3429 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 3430 kmem_free(cbplist, ssize); 3431 return (EFAULT); 3432 } 3433 3434 /* Event Ports */ 3435 if (sigev && 3436 (sigevk.sigev_notify == SIGEV_THREAD || 3437 sigevk.sigev_notify == SIGEV_PORT)) { 3438 if (sigevk.sigev_notify == SIGEV_THREAD) { 3439 pnotify.portnfy_port = sigevk.sigev_signo; 3440 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 3441 } else if (copyin( 3442 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 3443 &pnotify, sizeof (pnotify))) { 3444 kmem_free(cbplist, ssize); 3445 return (EFAULT); 3446 } 3447 error = port_alloc_event(pnotify.portnfy_port, 3448 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 3449 if (error) { 3450 if (error == ENOMEM || error == EAGAIN) 3451 error = EAGAIN; 3452 else 3453 error = EINVAL; 3454 kmem_free(cbplist, ssize); 3455 return (error); 3456 } 3457 lio_head_port = pnotify.portnfy_port; 3458 portused = 1; 3459 } 3460 3461 /* 3462 * a list head should be allocated if notification is 3463 * enabled for this list. 3464 */ 3465 head = NULL; 3466 3467 if (mode_arg == LIO_WAIT || sigev) { 3468 mutex_enter(&aiop->aio_mutex); 3469 error = aio_lio_alloc(&head); 3470 mutex_exit(&aiop->aio_mutex); 3471 if (error) 3472 goto done; 3473 deadhead = 1; 3474 head->lio_nent = nent; 3475 head->lio_refcnt = nent; 3476 head->lio_port = -1; 3477 head->lio_portkev = NULL; 3478 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 3479 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3480 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3481 if (sqp == NULL) { 3482 error = EAGAIN; 3483 goto done; 3484 } 3485 sqp->sq_func = NULL; 3486 sqp->sq_next = NULL; 3487 sqp->sq_info.si_code = SI_ASYNCIO; 3488 sqp->sq_info.si_pid = curproc->p_pid; 3489 sqp->sq_info.si_ctid = PRCTID(curproc); 3490 sqp->sq_info.si_zoneid = getzoneid(); 3491 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3492 sqp->sq_info.si_signo = sigevk.sigev_signo; 3493 sqp->sq_info.si_value.sival_int = 3494 sigevk.sigev_value.sival_int; 3495 head->lio_sigqp = sqp; 3496 } else { 3497 head->lio_sigqp = NULL; 3498 } 3499 if (pkevtp) { 3500 /* 3501 * Prepare data to send when list of aiocb's has 3502 * completed. 3503 */ 3504 port_init_event(pkevtp, (uintptr_t)sigev, 3505 (void *)(uintptr_t)pnotify.portnfy_user, 3506 NULL, head); 3507 pkevtp->portkev_events = AIOLIO; 3508 head->lio_portkev = pkevtp; 3509 head->lio_port = pnotify.portnfy_port; 3510 } 3511 } 3512 3513 for (i = 0; i < nent; i++, ucbp++) { 3514 3515 /* skip entry if it can't be copied. */ 3516 #ifdef _LP64 3517 cbp = (aiocb32_t *)(uintptr_t)*ucbp; 3518 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 3519 #else 3520 cbp = (aiocb_t *)*ucbp; 3521 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 3522 #endif 3523 { 3524 if (head) { 3525 mutex_enter(&aiop->aio_mutex); 3526 head->lio_nent--; 3527 head->lio_refcnt--; 3528 mutex_exit(&aiop->aio_mutex); 3529 } 3530 continue; 3531 } 3532 #ifdef _LP64 3533 /* 3534 * copy 32 bit structure into 64 bit structure 3535 */ 3536 aiocb_32ton(aiocb32, aiocb); 3537 #endif /* _LP64 */ 3538 3539 /* skip if opcode for aiocb is LIO_NOP */ 3540 mode = aiocb->aio_lio_opcode; 3541 if (mode == LIO_NOP) { 3542 cbp = NULL; 3543 if (head) { 3544 mutex_enter(&aiop->aio_mutex); 3545 head->lio_nent--; 3546 head->lio_refcnt--; 3547 mutex_exit(&aiop->aio_mutex); 3548 } 3549 continue; 3550 } 3551 3552 /* increment file descriptor's ref count. */ 3553 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3554 lio_set_uerror(&cbp->aio_resultp, EBADF); 3555 if (head) { 3556 mutex_enter(&aiop->aio_mutex); 3557 head->lio_nent--; 3558 head->lio_refcnt--; 3559 mutex_exit(&aiop->aio_mutex); 3560 } 3561 aio_errors++; 3562 continue; 3563 } 3564 3565 /* 3566 * check the permission of the partition 3567 */ 3568 if ((fp->f_flag & mode) == 0) { 3569 releasef(aiocb->aio_fildes); 3570 lio_set_uerror(&cbp->aio_resultp, EBADF); 3571 if (head) { 3572 mutex_enter(&aiop->aio_mutex); 3573 head->lio_nent--; 3574 head->lio_refcnt--; 3575 mutex_exit(&aiop->aio_mutex); 3576 } 3577 aio_errors++; 3578 continue; 3579 } 3580 3581 /* 3582 * common case where requests are to the same fd 3583 * for the same r/w operation 3584 * for UFS, need to set EBADFD 3585 */ 3586 vp = fp->f_vnode; 3587 if (fp != prev_fp || mode != prev_mode) { 3588 aio_func = check_vp(vp, mode); 3589 if (aio_func == NULL) { 3590 prev_fp = NULL; 3591 releasef(aiocb->aio_fildes); 3592 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3593 aio_notsupported++; 3594 if (head) { 3595 mutex_enter(&aiop->aio_mutex); 3596 head->lio_nent--; 3597 head->lio_refcnt--; 3598 mutex_exit(&aiop->aio_mutex); 3599 } 3600 continue; 3601 } else { 3602 prev_fp = fp; 3603 prev_mode = mode; 3604 } 3605 } 3606 3607 error = aio_req_setup(&reqp, aiop, aiocb, 3608 (aio_result_t *)&cbp->aio_resultp, vp); 3609 if (error) { 3610 releasef(aiocb->aio_fildes); 3611 lio_set_uerror(&cbp->aio_resultp, error); 3612 if (head) { 3613 mutex_enter(&aiop->aio_mutex); 3614 head->lio_nent--; 3615 head->lio_refcnt--; 3616 mutex_exit(&aiop->aio_mutex); 3617 } 3618 aio_errors++; 3619 continue; 3620 } 3621 3622 reqp->aio_req_lio = head; 3623 deadhead = 0; 3624 3625 /* 3626 * Set the errno field now before sending the request to 3627 * the driver to avoid a race condition 3628 */ 3629 (void) suword32(&cbp->aio_resultp.aio_errno, 3630 EINPROGRESS); 3631 3632 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 3633 3634 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 3635 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3636 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3637 if (aio_port | aio_thread) { 3638 port_kevent_t *lpkevp; 3639 /* 3640 * Prepare data to send with each aiocb completed. 3641 */ 3642 #ifdef _LP64 3643 if (aio_port) { 3644 void *paddr = (void *)(uintptr_t) 3645 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3646 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3647 error = EFAULT; 3648 } else { /* aio_thread */ 3649 pnotify.portnfy_port = 3650 aiocb32->aio_sigevent.sigev_signo; 3651 pnotify.portnfy_user = 3652 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3653 } 3654 #else 3655 if (aio_port) { 3656 void *paddr = 3657 aiocb->aio_sigevent.sigev_value.sival_ptr; 3658 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3659 error = EFAULT; 3660 } else { /* aio_thread */ 3661 pnotify.portnfy_port = 3662 aiocb->aio_sigevent.sigev_signo; 3663 pnotify.portnfy_user = 3664 aiocb->aio_sigevent.sigev_value.sival_ptr; 3665 } 3666 #endif 3667 if (error) 3668 /* EMPTY */; 3669 else if (pkevtp != NULL && 3670 pnotify.portnfy_port == lio_head_port) 3671 error = port_dup_event(pkevtp, &lpkevp, 3672 PORT_ALLOC_DEFAULT); 3673 else 3674 error = port_alloc_event(pnotify.portnfy_port, 3675 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3676 &lpkevp); 3677 if (error == 0) { 3678 port_init_event(lpkevp, (uintptr_t)cbp, 3679 (void *)(uintptr_t)pnotify.portnfy_user, 3680 aio_port_callback, reqp); 3681 lpkevp->portkev_events = event; 3682 reqp->aio_req_portkev = lpkevp; 3683 reqp->aio_req_port = pnotify.portnfy_port; 3684 } 3685 } 3686 3687 /* 3688 * send the request to driver. 3689 */ 3690 if (error == 0) { 3691 if (aiocb->aio_nbytes == 0) { 3692 clear_active_fd(aiocb->aio_fildes); 3693 aio_zerolen(reqp); 3694 continue; 3695 } 3696 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3697 CRED()); 3698 } 3699 3700 /* 3701 * the fd's ref count is not decremented until the IO has 3702 * completed unless there was an error. 3703 */ 3704 if (error) { 3705 releasef(aiocb->aio_fildes); 3706 lio_set_uerror(&cbp->aio_resultp, error); 3707 if (head) { 3708 mutex_enter(&aiop->aio_mutex); 3709 head->lio_nent--; 3710 head->lio_refcnt--; 3711 mutex_exit(&aiop->aio_mutex); 3712 } 3713 if (error == ENOTSUP) 3714 aio_notsupported++; 3715 else 3716 aio_errors++; 3717 lio_set_error(reqp, portused); 3718 } else { 3719 clear_active_fd(aiocb->aio_fildes); 3720 } 3721 } 3722 3723 if (aio_notsupported) { 3724 error = ENOTSUP; 3725 } else if (aio_errors) { 3726 /* 3727 * return EIO if any request failed 3728 */ 3729 error = EIO; 3730 } 3731 3732 if (mode_arg == LIO_WAIT) { 3733 mutex_enter(&aiop->aio_mutex); 3734 while (head->lio_refcnt > 0) { 3735 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3736 mutex_exit(&aiop->aio_mutex); 3737 error = EINTR; 3738 goto done; 3739 } 3740 } 3741 mutex_exit(&aiop->aio_mutex); 3742 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 3743 } 3744 3745 done: 3746 kmem_free(cbplist, ssize); 3747 if (deadhead) { 3748 if (head->lio_sigqp) 3749 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3750 if (head->lio_portkev) 3751 port_free_event(head->lio_portkev); 3752 kmem_free(head, sizeof (aio_lio_t)); 3753 } 3754 return (error); 3755 } 3756 3757 3758 #ifdef _SYSCALL32_IMPL 3759 void 3760 aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 3761 { 3762 dest->aio_fildes = src->aio_fildes; 3763 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 3764 dest->aio_nbytes = (size_t)src->aio_nbytes; 3765 dest->aio_offset = (off_t)src->aio_offset; 3766 dest->aio_reqprio = src->aio_reqprio; 3767 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3768 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3769 3770 /* 3771 * See comment in sigqueue32() on handling of 32-bit 3772 * sigvals in a 64-bit kernel. 3773 */ 3774 dest->aio_sigevent.sigev_value.sival_int = 3775 (int)src->aio_sigevent.sigev_value.sival_int; 3776 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3777 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3778 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3779 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3780 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3781 dest->aio_lio_opcode = src->aio_lio_opcode; 3782 dest->aio_state = src->aio_state; 3783 dest->aio__pad[0] = src->aio__pad[0]; 3784 } 3785 #endif /* _SYSCALL32_IMPL */ 3786 3787 /* 3788 * aio_port_callback() is called just before the event is retrieved from the 3789 * port. The task of this callback function is to finish the work of the 3790 * transaction for the application, it means : 3791 * - copyout transaction data to the application 3792 * (this thread is running in the right process context) 3793 * - keep trace of the transaction (update of counters). 3794 * - free allocated buffers 3795 * The aiocb pointer is the object element of the port_kevent_t structure. 3796 * 3797 * flag : 3798 * PORT_CALLBACK_DEFAULT : do copyout and free resources 3799 * PORT_CALLBACK_CLOSE : don't do copyout, free resources 3800 */ 3801 3802 /*ARGSUSED*/ 3803 int 3804 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 3805 { 3806 aio_t *aiop = curproc->p_aio; 3807 aio_req_t *reqp = arg; 3808 struct iovec *iov; 3809 struct buf *bp; 3810 void *resultp; 3811 3812 if (pid != curproc->p_pid) { 3813 /* wrong proc !!, can not deliver data here ... */ 3814 return (EACCES); 3815 } 3816 3817 mutex_enter(&aiop->aio_portq_mutex); 3818 reqp->aio_req_portkev = NULL; 3819 aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 3820 mutex_exit(&aiop->aio_portq_mutex); 3821 aphysio_unlock(reqp); /* unlock used pages */ 3822 mutex_enter(&aiop->aio_mutex); 3823 if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 3824 aio_req_free_port(aiop, reqp); /* back to free list */ 3825 mutex_exit(&aiop->aio_mutex); 3826 return (0); 3827 } 3828 3829 iov = reqp->aio_req_uio.uio_iov; 3830 bp = &reqp->aio_req_buf; 3831 resultp = (void *)reqp->aio_req_resultp; 3832 aio_req_free_port(aiop, reqp); /* request struct back to free list */ 3833 mutex_exit(&aiop->aio_mutex); 3834 if (flag == PORT_CALLBACK_DEFAULT) 3835 aio_copyout_result_port(iov, bp, resultp); 3836 return (0); 3837 } 3838