1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Kernel asynchronous I/O. 29 * This is only for raw devices now (as of Nov. 1993). 30 */ 31 32 #include <sys/types.h> 33 #include <sys/errno.h> 34 #include <sys/conf.h> 35 #include <sys/file.h> 36 #include <sys/fs/snode.h> 37 #include <sys/unistd.h> 38 #include <sys/cmn_err.h> 39 #include <vm/as.h> 40 #include <vm/faultcode.h> 41 #include <sys/sysmacros.h> 42 #include <sys/procfs.h> 43 #include <sys/kmem.h> 44 #include <sys/autoconf.h> 45 #include <sys/ddi_impldefs.h> 46 #include <sys/sunddi.h> 47 #include <sys/aio_impl.h> 48 #include <sys/debug.h> 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/vmsystm.h> 52 #include <sys/fs/pxfs_ki.h> 53 #include <sys/contract/process_impl.h> 54 55 /* 56 * external entry point. 57 */ 58 #ifdef _LP64 59 static int64_t kaioc(long, long, long, long, long, long); 60 #endif 61 static int kaio(ulong_t *, rval_t *); 62 63 64 #define AIO_64 0 65 #define AIO_32 1 66 #define AIO_LARGEFILE 2 67 68 /* 69 * implementation specific functions (private) 70 */ 71 #ifdef _LP64 72 static int alio(int, aiocb_t **, int, struct sigevent *); 73 #endif 74 static int aionotify(void); 75 static int aioinit(void); 76 static int aiostart(void); 77 static void alio_cleanup(aio_t *, aiocb_t **, int, int); 78 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 79 cred_t *); 80 static void lio_set_error(aio_req_t *, int portused); 81 static aio_t *aio_aiop_alloc(); 82 static int aio_req_alloc(aio_req_t **, aio_result_t *); 83 static int aio_lio_alloc(aio_lio_t **); 84 static aio_req_t *aio_req_done(void *); 85 static aio_req_t *aio_req_remove(aio_req_t *); 86 static int aio_req_find(aio_result_t *, aio_req_t **); 87 static int aio_hash_insert(struct aio_req_t *, aio_t *); 88 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 89 aio_result_t *, vnode_t *); 90 static int aio_cleanup_thread(aio_t *); 91 static aio_lio_t *aio_list_get(aio_result_t *); 92 static void lio_set_uerror(void *, int); 93 extern void aio_zerolen(aio_req_t *); 94 static int aiowait(struct timeval *, int, long *); 95 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 96 static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 97 aio_req_t *reqlist, aio_t *aiop, model_t model); 98 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 99 static int aiosuspend(void *, int, struct timespec *, int, 100 long *, int); 101 static int aliowait(int, void *, int, void *, int); 102 static int aioerror(void *, int); 103 static int aio_cancel(int, void *, long *, int); 104 static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 105 static int aiorw(int, void *, int, int); 106 107 static int alioLF(int, void *, int, void *); 108 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 109 aio_result_t *, vnode_t *); 110 static int alio32(int, void *, int, void *); 111 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 112 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 113 114 #ifdef _SYSCALL32_IMPL 115 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 116 void aiocb_32ton(aiocb32_t *, aiocb_t *); 117 #endif /* _SYSCALL32_IMPL */ 118 119 /* 120 * implementation specific functions (external) 121 */ 122 void aio_req_free(aio_t *, aio_req_t *); 123 124 /* 125 * Event Port framework 126 */ 127 128 void aio_req_free_port(aio_t *, aio_req_t *); 129 static int aio_port_callback(void *, int *, pid_t, int, void *); 130 131 /* 132 * This is the loadable module wrapper. 133 */ 134 #include <sys/modctl.h> 135 #include <sys/syscall.h> 136 137 #ifdef _LP64 138 139 static struct sysent kaio_sysent = { 140 6, 141 SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 142 (int (*)())kaioc 143 }; 144 145 #ifdef _SYSCALL32_IMPL 146 static struct sysent kaio_sysent32 = { 147 7, 148 SE_NOUNLOAD | SE_64RVAL, 149 kaio 150 }; 151 #endif /* _SYSCALL32_IMPL */ 152 153 #else /* _LP64 */ 154 155 static struct sysent kaio_sysent = { 156 7, 157 SE_NOUNLOAD | SE_32RVAL1, 158 kaio 159 }; 160 161 #endif /* _LP64 */ 162 163 /* 164 * Module linkage information for the kernel. 165 */ 166 167 static struct modlsys modlsys = { 168 &mod_syscallops, 169 "kernel Async I/O", 170 &kaio_sysent 171 }; 172 173 #ifdef _SYSCALL32_IMPL 174 static struct modlsys modlsys32 = { 175 &mod_syscallops32, 176 "kernel Async I/O for 32 bit compatibility", 177 &kaio_sysent32 178 }; 179 #endif /* _SYSCALL32_IMPL */ 180 181 182 static struct modlinkage modlinkage = { 183 MODREV_1, 184 &modlsys, 185 #ifdef _SYSCALL32_IMPL 186 &modlsys32, 187 #endif 188 NULL 189 }; 190 191 int 192 _init(void) 193 { 194 int retval; 195 196 if ((retval = mod_install(&modlinkage)) != 0) 197 return (retval); 198 199 return (0); 200 } 201 202 int 203 _fini(void) 204 { 205 int retval; 206 207 retval = mod_remove(&modlinkage); 208 209 return (retval); 210 } 211 212 int 213 _info(struct modinfo *modinfop) 214 { 215 return (mod_info(&modlinkage, modinfop)); 216 } 217 218 #ifdef _LP64 219 static int64_t 220 kaioc( 221 long a0, 222 long a1, 223 long a2, 224 long a3, 225 long a4, 226 long a5) 227 { 228 int error; 229 long rval = 0; 230 231 switch ((int)a0 & ~AIO_POLL_BIT) { 232 case AIOREAD: 233 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 234 (offset_t)a4, (aio_result_t *)a5, FREAD); 235 break; 236 case AIOWRITE: 237 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 238 (offset_t)a4, (aio_result_t *)a5, FWRITE); 239 break; 240 case AIOWAIT: 241 error = aiowait((struct timeval *)a1, (int)a2, &rval); 242 break; 243 case AIOWAITN: 244 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 245 (timespec_t *)a4); 246 break; 247 case AIONOTIFY: 248 error = aionotify(); 249 break; 250 case AIOINIT: 251 error = aioinit(); 252 break; 253 case AIOSTART: 254 error = aiostart(); 255 break; 256 case AIOLIO: 257 error = alio((int)a1, (aiocb_t **)a2, (int)a3, 258 (struct sigevent *)a4); 259 break; 260 case AIOLIOWAIT: 261 error = aliowait((int)a1, (void *)a2, (int)a3, 262 (struct sigevent *)a4, AIO_64); 263 break; 264 case AIOSUSPEND: 265 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 266 (int)a4, &rval, AIO_64); 267 break; 268 case AIOERROR: 269 error = aioerror((void *)a1, AIO_64); 270 break; 271 case AIOAREAD: 272 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 273 break; 274 case AIOAWRITE: 275 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 276 break; 277 case AIOCANCEL: 278 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 279 break; 280 281 /* 282 * The large file related stuff is valid only for 283 * 32 bit kernel and not for 64 bit kernel 284 * On 64 bit kernel we convert large file calls 285 * to regular 64bit calls. 286 */ 287 288 default: 289 error = EINVAL; 290 } 291 if (error) 292 return ((int64_t)set_errno(error)); 293 return (rval); 294 } 295 #endif 296 297 static int 298 kaio( 299 ulong_t *uap, 300 rval_t *rvp) 301 { 302 long rval = 0; 303 int error = 0; 304 offset_t off; 305 306 307 rvp->r_vals = 0; 308 #if defined(_LITTLE_ENDIAN) 309 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 310 #else 311 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 312 #endif 313 314 switch (uap[0] & ~AIO_POLL_BIT) { 315 /* 316 * It must be the 32 bit system call on 64 bit kernel 317 */ 318 case AIOREAD: 319 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 320 (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 321 case AIOWRITE: 322 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 323 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 324 case AIOWAIT: 325 error = aiowait((struct timeval *)uap[1], (int)uap[2], 326 &rval); 327 break; 328 case AIOWAITN: 329 error = aiowaitn((void *)uap[1], (uint_t)uap[2], 330 (uint_t *)uap[3], (timespec_t *)uap[4]); 331 break; 332 case AIONOTIFY: 333 return (aionotify()); 334 case AIOINIT: 335 return (aioinit()); 336 case AIOSTART: 337 return (aiostart()); 338 case AIOLIO: 339 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 340 (void *)uap[4])); 341 case AIOLIOWAIT: 342 return (aliowait((int)uap[1], (void *)uap[2], 343 (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 344 case AIOSUSPEND: 345 error = aiosuspend((void *)uap[1], (int)uap[2], 346 (timespec_t *)uap[3], (int)uap[4], 347 &rval, AIO_32); 348 break; 349 case AIOERROR: 350 return (aioerror((void *)uap[1], AIO_32)); 351 case AIOAREAD: 352 return (aiorw((int)uap[0], (void *)uap[1], 353 FREAD, AIO_32)); 354 case AIOAWRITE: 355 return (aiorw((int)uap[0], (void *)uap[1], 356 FWRITE, AIO_32)); 357 case AIOCANCEL: 358 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 359 AIO_32)); 360 break; 361 case AIOLIO64: 362 return (alioLF((int)uap[1], (void *)uap[2], 363 (int)uap[3], (void *)uap[4])); 364 case AIOLIOWAIT64: 365 return (aliowait(uap[1], (void *)uap[2], 366 (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 367 case AIOSUSPEND64: 368 error = aiosuspend((void *)uap[1], (int)uap[2], 369 (timespec_t *)uap[3], (int)uap[4], &rval, 370 AIO_LARGEFILE); 371 break; 372 case AIOERROR64: 373 return (aioerror((void *)uap[1], AIO_LARGEFILE)); 374 case AIOAREAD64: 375 return (aiorw((int)uap[0], (void *)uap[1], FREAD, 376 AIO_LARGEFILE)); 377 case AIOAWRITE64: 378 return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 379 AIO_LARGEFILE)); 380 case AIOCANCEL64: 381 error = (aio_cancel((int)uap[1], (void *)uap[2], 382 &rval, AIO_LARGEFILE)); 383 break; 384 default: 385 return (EINVAL); 386 } 387 388 rvp->r_val1 = rval; 389 return (error); 390 } 391 392 /* 393 * wake up LWPs in this process that are sleeping in 394 * aiowait(). 395 */ 396 static int 397 aionotify(void) 398 { 399 aio_t *aiop; 400 401 aiop = curproc->p_aio; 402 if (aiop == NULL) 403 return (0); 404 405 mutex_enter(&aiop->aio_mutex); 406 aiop->aio_notifycnt++; 407 cv_broadcast(&aiop->aio_waitcv); 408 mutex_exit(&aiop->aio_mutex); 409 410 return (0); 411 } 412 413 static int 414 timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 415 timestruc_t **rqtp, int *blocking) 416 { 417 #ifdef _SYSCALL32_IMPL 418 struct timeval32 wait_time_32; 419 #endif 420 struct timeval wait_time; 421 model_t model = get_udatamodel(); 422 423 *rqtp = NULL; 424 if (timout == NULL) { /* wait indefinitely */ 425 *blocking = 1; 426 return (0); 427 } 428 429 /* 430 * Need to correctly compare with the -1 passed in for a user 431 * address pointer, with both 32 bit and 64 bit apps. 432 */ 433 if (model == DATAMODEL_NATIVE) { 434 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 435 *blocking = 0; 436 return (0); 437 } 438 439 if (copyin(timout, &wait_time, sizeof (wait_time))) 440 return (EFAULT); 441 } 442 #ifdef _SYSCALL32_IMPL 443 else { 444 /* 445 * -1 from a 32bit app. It will not get sign extended. 446 * don't wait if -1. 447 */ 448 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 449 *blocking = 0; 450 return (0); 451 } 452 453 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 454 return (EFAULT); 455 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 456 } 457 #endif /* _SYSCALL32_IMPL */ 458 459 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 460 *blocking = 0; 461 return (0); 462 } 463 464 if (wait_time.tv_sec < 0 || 465 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 466 return (EINVAL); 467 468 rqtime->tv_sec = wait_time.tv_sec; 469 rqtime->tv_nsec = wait_time.tv_usec * 1000; 470 *rqtp = rqtime; 471 *blocking = 1; 472 473 return (0); 474 } 475 476 static int 477 timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 478 timestruc_t **rqtp, int *blocking) 479 { 480 #ifdef _SYSCALL32_IMPL 481 timespec32_t wait_time_32; 482 #endif 483 model_t model = get_udatamodel(); 484 485 *rqtp = NULL; 486 if (timout == NULL) { 487 *blocking = 1; 488 return (0); 489 } 490 491 if (model == DATAMODEL_NATIVE) { 492 if (copyin(timout, rqtime, sizeof (*rqtime))) 493 return (EFAULT); 494 } 495 #ifdef _SYSCALL32_IMPL 496 else { 497 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 498 return (EFAULT); 499 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 500 } 501 #endif /* _SYSCALL32_IMPL */ 502 503 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 504 *blocking = 0; 505 return (0); 506 } 507 508 if (rqtime->tv_sec < 0 || 509 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 510 return (EINVAL); 511 512 *rqtp = rqtime; 513 *blocking = 1; 514 515 return (0); 516 } 517 518 /*ARGSUSED*/ 519 static int 520 aiowait( 521 struct timeval *timout, 522 int dontblockflg, 523 long *rval) 524 { 525 int error; 526 aio_t *aiop; 527 aio_req_t *reqp; 528 clock_t status; 529 int blocking; 530 int timecheck; 531 timestruc_t rqtime; 532 timestruc_t *rqtp; 533 534 aiop = curproc->p_aio; 535 if (aiop == NULL) 536 return (EINVAL); 537 538 /* 539 * Establish the absolute future time for the timeout. 540 */ 541 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 542 if (error) 543 return (error); 544 if (rqtp) { 545 timestruc_t now; 546 timecheck = timechanged; 547 gethrestime(&now); 548 timespecadd(rqtp, &now); 549 } 550 551 mutex_enter(&aiop->aio_mutex); 552 for (;;) { 553 /* process requests on poll queue */ 554 if (aiop->aio_pollq) { 555 mutex_exit(&aiop->aio_mutex); 556 aio_cleanup(0); 557 mutex_enter(&aiop->aio_mutex); 558 } 559 if ((reqp = aio_req_remove(NULL)) != NULL) { 560 *rval = (long)reqp->aio_req_resultp; 561 break; 562 } 563 /* user-level done queue might not be empty */ 564 if (aiop->aio_notifycnt > 0) { 565 aiop->aio_notifycnt--; 566 *rval = 1; 567 break; 568 } 569 /* don't block if no outstanding aio */ 570 if (aiop->aio_outstanding == 0 && dontblockflg) { 571 error = EINVAL; 572 break; 573 } 574 if (blocking) { 575 status = cv_waituntil_sig(&aiop->aio_waitcv, 576 &aiop->aio_mutex, rqtp, timecheck); 577 578 if (status > 0) /* check done queue again */ 579 continue; 580 if (status == 0) { /* interrupted by a signal */ 581 error = EINTR; 582 *rval = -1; 583 } else { /* timer expired */ 584 error = ETIME; 585 } 586 } 587 break; 588 } 589 mutex_exit(&aiop->aio_mutex); 590 if (reqp) { 591 aphysio_unlock(reqp); 592 aio_copyout_result(reqp); 593 mutex_enter(&aiop->aio_mutex); 594 aio_req_free(aiop, reqp); 595 mutex_exit(&aiop->aio_mutex); 596 } 597 return (error); 598 } 599 600 /* 601 * aiowaitn can be used to reap completed asynchronous requests submitted with 602 * lio_listio, aio_read or aio_write. 603 * This function only reaps asynchronous raw I/Os. 604 */ 605 606 /*ARGSUSED*/ 607 static int 608 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 609 { 610 int error = 0; 611 aio_t *aiop; 612 aio_req_t *reqlist = NULL; 613 caddr_t iocblist = NULL; /* array of iocb ptr's */ 614 uint_t waitcnt, cnt = 0; /* iocb cnt */ 615 size_t iocbsz; /* users iocb size */ 616 size_t riocbsz; /* returned iocb size */ 617 int iocb_index = 0; 618 model_t model = get_udatamodel(); 619 int blocking = 1; 620 int timecheck; 621 timestruc_t rqtime; 622 timestruc_t *rqtp; 623 624 aiop = curproc->p_aio; 625 if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX) 626 return (EINVAL); 627 628 if (aiop->aio_outstanding == 0) 629 return (EAGAIN); 630 631 if (copyin(nwait, &waitcnt, sizeof (uint_t))) 632 return (EFAULT); 633 634 /* set *nwait to zero, if we must return prematurely */ 635 if (copyout(&cnt, nwait, sizeof (uint_t))) 636 return (EFAULT); 637 638 if (waitcnt == 0) { 639 blocking = 0; 640 rqtp = NULL; 641 waitcnt = nent; 642 } else { 643 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 644 if (error) 645 return (error); 646 } 647 648 if (model == DATAMODEL_NATIVE) 649 iocbsz = (sizeof (aiocb_t *) * nent); 650 #ifdef _SYSCALL32_IMPL 651 else 652 iocbsz = (sizeof (caddr32_t) * nent); 653 #endif /* _SYSCALL32_IMPL */ 654 655 /* 656 * Only one aio_waitn call is allowed at a time. 657 * The active aio_waitn will collect all requests 658 * out of the "done" list and if necessary it will wait 659 * for some/all pending requests to fulfill the nwait 660 * parameter. 661 * A second or further aio_waitn calls will sleep here 662 * until the active aio_waitn finishes and leaves the kernel 663 * If the second call does not block (poll), then return 664 * immediately with the error code : EAGAIN. 665 * If the second call should block, then sleep here, but 666 * do not touch the timeout. The timeout starts when this 667 * aio_waitn-call becomes active. 668 */ 669 670 mutex_enter(&aiop->aio_mutex); 671 672 while (aiop->aio_flags & AIO_WAITN) { 673 if (blocking == 0) { 674 mutex_exit(&aiop->aio_mutex); 675 return (EAGAIN); 676 } 677 678 /* block, no timeout */ 679 aiop->aio_flags |= AIO_WAITN_PENDING; 680 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 681 mutex_exit(&aiop->aio_mutex); 682 return (EINTR); 683 } 684 } 685 686 /* 687 * Establish the absolute future time for the timeout. 688 */ 689 if (rqtp) { 690 timestruc_t now; 691 timecheck = timechanged; 692 gethrestime(&now); 693 timespecadd(rqtp, &now); 694 } 695 696 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 697 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 698 aiop->aio_iocb = NULL; 699 } 700 701 if (aiop->aio_iocb == NULL) { 702 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 703 if (iocblist == NULL) { 704 mutex_exit(&aiop->aio_mutex); 705 return (ENOMEM); 706 } 707 aiop->aio_iocb = (aiocb_t **)iocblist; 708 aiop->aio_iocbsz = iocbsz; 709 } else { 710 iocblist = (char *)aiop->aio_iocb; 711 } 712 713 aiop->aio_waitncnt = waitcnt; 714 aiop->aio_flags |= AIO_WAITN; 715 716 for (;;) { 717 /* push requests on poll queue to done queue */ 718 if (aiop->aio_pollq) { 719 mutex_exit(&aiop->aio_mutex); 720 aio_cleanup(0); 721 mutex_enter(&aiop->aio_mutex); 722 } 723 724 /* check for requests on done queue */ 725 if (aiop->aio_doneq) { 726 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 727 aiop->aio_waitncnt = waitcnt - cnt; 728 } 729 730 /* user-level done queue might not be empty */ 731 if (aiop->aio_notifycnt > 0) { 732 aiop->aio_notifycnt--; 733 error = 0; 734 break; 735 } 736 737 /* 738 * if we are here second time as a result of timer 739 * expiration, we reset error if there are enough 740 * aiocb's to satisfy request. 741 * We return also if all requests are already done 742 * and we picked up the whole done queue. 743 */ 744 745 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 746 aiop->aio_doneq == NULL)) { 747 error = 0; 748 break; 749 } 750 751 if ((cnt < waitcnt) && blocking) { 752 int rval = cv_waituntil_sig(&aiop->aio_waitcv, 753 &aiop->aio_mutex, rqtp, timecheck); 754 if (rval > 0) 755 continue; 756 if (rval < 0) { 757 error = ETIME; 758 blocking = 0; 759 continue; 760 } 761 error = EINTR; 762 } 763 break; 764 } 765 766 mutex_exit(&aiop->aio_mutex); 767 768 if (cnt > 0) { 769 770 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 771 aiop, model); 772 773 if (model == DATAMODEL_NATIVE) 774 riocbsz = (sizeof (aiocb_t *) * cnt); 775 #ifdef _SYSCALL32_IMPL 776 else 777 riocbsz = (sizeof (caddr32_t) * cnt); 778 #endif /* _SYSCALL32_IMPL */ 779 780 if (copyout(iocblist, uiocb, riocbsz) || 781 copyout(&cnt, nwait, sizeof (uint_t))) 782 error = EFAULT; 783 } 784 785 /* check if there is another thread waiting for execution */ 786 mutex_enter(&aiop->aio_mutex); 787 aiop->aio_flags &= ~AIO_WAITN; 788 if (aiop->aio_flags & AIO_WAITN_PENDING) { 789 aiop->aio_flags &= ~AIO_WAITN_PENDING; 790 cv_signal(&aiop->aio_waitncv); 791 } 792 mutex_exit(&aiop->aio_mutex); 793 794 return (error); 795 } 796 797 /* 798 * aio_unlock_requests 799 * copyouts the result of the request as well as the return value. 800 * It builds the list of completed asynchronous requests, 801 * unlocks the allocated memory ranges and 802 * put the aio request structure back into the free list. 803 */ 804 805 static int 806 aio_unlock_requests( 807 caddr_t iocblist, 808 int iocb_index, 809 aio_req_t *reqlist, 810 aio_t *aiop, 811 model_t model) 812 { 813 aio_req_t *reqp, *nreqp; 814 815 if (model == DATAMODEL_NATIVE) { 816 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 817 (((caddr_t *)iocblist)[iocb_index++]) = 818 reqp->aio_req_iocb.iocb; 819 nreqp = reqp->aio_req_next; 820 aphysio_unlock(reqp); 821 aio_copyout_result(reqp); 822 mutex_enter(&aiop->aio_mutex); 823 aio_req_free(aiop, reqp); 824 mutex_exit(&aiop->aio_mutex); 825 } 826 } 827 #ifdef _SYSCALL32_IMPL 828 else { 829 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 830 ((caddr32_t *)iocblist)[iocb_index++] = 831 reqp->aio_req_iocb.iocb32; 832 nreqp = reqp->aio_req_next; 833 aphysio_unlock(reqp); 834 aio_copyout_result(reqp); 835 mutex_enter(&aiop->aio_mutex); 836 aio_req_free(aiop, reqp); 837 mutex_exit(&aiop->aio_mutex); 838 } 839 } 840 #endif /* _SYSCALL32_IMPL */ 841 return (iocb_index); 842 } 843 844 /* 845 * aio_reqlist_concat 846 * moves "max" elements from the done queue to the reqlist queue and removes 847 * the AIO_DONEQ flag. 848 * - reqlist queue is a simple linked list 849 * - done queue is a double linked list 850 */ 851 852 static int 853 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 854 { 855 aio_req_t *q2, *q2work, *list; 856 int count = 0; 857 858 list = *reqlist; 859 q2 = aiop->aio_doneq; 860 q2work = q2; 861 while (max-- > 0) { 862 q2work->aio_req_flags &= ~AIO_DONEQ; 863 q2work = q2work->aio_req_next; 864 count++; 865 if (q2work == q2) 866 break; 867 } 868 869 if (q2work == q2) { 870 /* all elements revised */ 871 q2->aio_req_prev->aio_req_next = list; 872 list = q2; 873 aiop->aio_doneq = NULL; 874 } else { 875 /* 876 * max < elements in the doneq 877 * detach only the required amount of elements 878 * out of the doneq 879 */ 880 q2work->aio_req_prev->aio_req_next = list; 881 list = q2; 882 883 aiop->aio_doneq = q2work; 884 q2work->aio_req_prev = q2->aio_req_prev; 885 q2->aio_req_prev->aio_req_next = q2work; 886 } 887 *reqlist = list; 888 return (count); 889 } 890 891 /*ARGSUSED*/ 892 static int 893 aiosuspend( 894 void *aiocb, 895 int nent, 896 struct timespec *timout, 897 int flag, 898 long *rval, 899 int run_mode) 900 { 901 int error; 902 aio_t *aiop; 903 aio_req_t *reqp, *found, *next; 904 caddr_t cbplist = NULL; 905 aiocb_t *cbp, **ucbp; 906 #ifdef _SYSCALL32_IMPL 907 aiocb32_t *cbp32; 908 caddr32_t *ucbp32; 909 #endif /* _SYSCALL32_IMPL */ 910 aiocb64_32_t *cbp64; 911 int rv; 912 int i; 913 size_t ssize; 914 model_t model = get_udatamodel(); 915 int blocking; 916 int timecheck; 917 timestruc_t rqtime; 918 timestruc_t *rqtp; 919 920 aiop = curproc->p_aio; 921 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 922 return (EINVAL); 923 924 /* 925 * Establish the absolute future time for the timeout. 926 */ 927 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 928 if (error) 929 return (error); 930 if (rqtp) { 931 timestruc_t now; 932 timecheck = timechanged; 933 gethrestime(&now); 934 timespecadd(rqtp, &now); 935 } 936 937 /* 938 * If we are not blocking and there's no IO complete 939 * skip aiocb copyin. 940 */ 941 if (!blocking && (aiop->aio_pollq == NULL) && 942 (aiop->aio_doneq == NULL)) { 943 return (EAGAIN); 944 } 945 946 if (model == DATAMODEL_NATIVE) 947 ssize = (sizeof (aiocb_t *) * nent); 948 #ifdef _SYSCALL32_IMPL 949 else 950 ssize = (sizeof (caddr32_t) * nent); 951 #endif /* _SYSCALL32_IMPL */ 952 953 cbplist = kmem_alloc(ssize, KM_NOSLEEP); 954 if (cbplist == NULL) 955 return (ENOMEM); 956 957 if (copyin(aiocb, cbplist, ssize)) { 958 error = EFAULT; 959 goto done; 960 } 961 962 found = NULL; 963 /* 964 * we need to get the aio_cleanupq_mutex since we call 965 * aio_req_done(). 966 */ 967 mutex_enter(&aiop->aio_cleanupq_mutex); 968 mutex_enter(&aiop->aio_mutex); 969 for (;;) { 970 /* push requests on poll queue to done queue */ 971 if (aiop->aio_pollq) { 972 mutex_exit(&aiop->aio_mutex); 973 mutex_exit(&aiop->aio_cleanupq_mutex); 974 aio_cleanup(0); 975 mutex_enter(&aiop->aio_cleanupq_mutex); 976 mutex_enter(&aiop->aio_mutex); 977 } 978 /* check for requests on done queue */ 979 if (aiop->aio_doneq) { 980 if (model == DATAMODEL_NATIVE) 981 ucbp = (aiocb_t **)cbplist; 982 #ifdef _SYSCALL32_IMPL 983 else 984 ucbp32 = (caddr32_t *)cbplist; 985 #endif /* _SYSCALL32_IMPL */ 986 for (i = 0; i < nent; i++) { 987 if (model == DATAMODEL_NATIVE) { 988 if ((cbp = *ucbp++) == NULL) 989 continue; 990 if (run_mode != AIO_LARGEFILE) 991 reqp = aio_req_done( 992 &cbp->aio_resultp); 993 else { 994 cbp64 = (aiocb64_32_t *)cbp; 995 reqp = aio_req_done( 996 &cbp64->aio_resultp); 997 } 998 } 999 #ifdef _SYSCALL32_IMPL 1000 else { 1001 if (run_mode == AIO_32) { 1002 if ((cbp32 = 1003 (aiocb32_t *)(uintptr_t) 1004 *ucbp32++) == NULL) 1005 continue; 1006 reqp = aio_req_done( 1007 &cbp32->aio_resultp); 1008 } else if (run_mode == AIO_LARGEFILE) { 1009 if ((cbp64 = 1010 (aiocb64_32_t *)(uintptr_t) 1011 *ucbp32++) == NULL) 1012 continue; 1013 reqp = aio_req_done( 1014 &cbp64->aio_resultp); 1015 } 1016 1017 } 1018 #endif /* _SYSCALL32_IMPL */ 1019 if (reqp) { 1020 reqp->aio_req_next = found; 1021 found = reqp; 1022 } 1023 if (aiop->aio_doneq == NULL) 1024 break; 1025 } 1026 if (found) 1027 break; 1028 } 1029 if (aiop->aio_notifycnt > 0) { 1030 /* 1031 * nothing on the kernel's queue. the user 1032 * has notified the kernel that it has items 1033 * on a user-level queue. 1034 */ 1035 aiop->aio_notifycnt--; 1036 *rval = 1; 1037 error = 0; 1038 break; 1039 } 1040 /* don't block if nothing is outstanding */ 1041 if (aiop->aio_outstanding == 0) { 1042 error = EAGAIN; 1043 break; 1044 } 1045 if (blocking) { 1046 /* 1047 * drop the aio_cleanupq_mutex as we are 1048 * going to block. 1049 */ 1050 mutex_exit(&aiop->aio_cleanupq_mutex); 1051 rv = cv_waituntil_sig(&aiop->aio_waitcv, 1052 &aiop->aio_mutex, rqtp, timecheck); 1053 /* 1054 * we have to drop aio_mutex and 1055 * grab it in the right order. 1056 */ 1057 mutex_exit(&aiop->aio_mutex); 1058 mutex_enter(&aiop->aio_cleanupq_mutex); 1059 mutex_enter(&aiop->aio_mutex); 1060 if (rv > 0) /* check done queue again */ 1061 continue; 1062 if (rv == 0) /* interrupted by a signal */ 1063 error = EINTR; 1064 else /* timer expired */ 1065 error = ETIME; 1066 } else { 1067 error = EAGAIN; 1068 } 1069 break; 1070 } 1071 mutex_exit(&aiop->aio_mutex); 1072 mutex_exit(&aiop->aio_cleanupq_mutex); 1073 for (reqp = found; reqp != NULL; reqp = next) { 1074 next = reqp->aio_req_next; 1075 aphysio_unlock(reqp); 1076 aio_copyout_result(reqp); 1077 mutex_enter(&aiop->aio_mutex); 1078 aio_req_free(aiop, reqp); 1079 mutex_exit(&aiop->aio_mutex); 1080 } 1081 done: 1082 kmem_free(cbplist, ssize); 1083 return (error); 1084 } 1085 1086 /* 1087 * initialize aio by allocating an aio_t struct for this 1088 * process. 1089 */ 1090 static int 1091 aioinit(void) 1092 { 1093 proc_t *p = curproc; 1094 aio_t *aiop; 1095 mutex_enter(&p->p_lock); 1096 if ((aiop = p->p_aio) == NULL) { 1097 aiop = aio_aiop_alloc(); 1098 p->p_aio = aiop; 1099 } 1100 mutex_exit(&p->p_lock); 1101 if (aiop == NULL) 1102 return (ENOMEM); 1103 return (0); 1104 } 1105 1106 /* 1107 * start a special thread that will cleanup after aio requests 1108 * that are preventing a segment from being unmapped. as_unmap() 1109 * blocks until all phsyio to this segment is completed. this 1110 * doesn't happen until all the pages in this segment are not 1111 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 1112 * requests still outstanding. this special thread will make sure 1113 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 1114 * 1115 * this function will return an error if the process has only 1116 * one LWP. the assumption is that the caller is a separate LWP 1117 * that remains blocked in the kernel for the life of this process. 1118 */ 1119 static int 1120 aiostart(void) 1121 { 1122 proc_t *p = curproc; 1123 aio_t *aiop; 1124 int first, error = 0; 1125 1126 if (p->p_lwpcnt == 1) 1127 return (EDEADLK); 1128 mutex_enter(&p->p_lock); 1129 if ((aiop = p->p_aio) == NULL) 1130 error = EINVAL; 1131 else { 1132 first = aiop->aio_ok; 1133 if (aiop->aio_ok == 0) 1134 aiop->aio_ok = 1; 1135 } 1136 mutex_exit(&p->p_lock); 1137 if (error == 0 && first == 0) { 1138 return (aio_cleanup_thread(aiop)); 1139 /* should return only to exit */ 1140 } 1141 return (error); 1142 } 1143 1144 /* 1145 * Associate an aiocb with a port. 1146 * This function is used by aiorw() to associate a transaction with a port. 1147 * Allocate an event port structure (port_alloc_event()) and store the 1148 * delivered user pointer (portnfy_user) in the portkev_user field of the 1149 * port_kevent_t structure.. 1150 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1151 * the port association. 1152 */ 1153 1154 static int 1155 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 1156 aio_req_t *reqp, int event) 1157 { 1158 port_kevent_t *pkevp = NULL; 1159 int error; 1160 1161 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 1162 PORT_SOURCE_AIO, &pkevp); 1163 if (error) { 1164 if ((error == ENOMEM) || (error == EAGAIN)) 1165 error = EAGAIN; 1166 else 1167 error = EINVAL; 1168 } else { 1169 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 1170 aio_port_callback, reqp); 1171 pkevp->portkev_events = event; 1172 reqp->aio_req_portkev = pkevp; 1173 reqp->aio_req_port = pntfy->portnfy_port; 1174 } 1175 return (error); 1176 } 1177 1178 #ifdef _LP64 1179 1180 /* 1181 * Asynchronous list IO. A chain of aiocb's are copied in 1182 * one at a time. If the aiocb is invalid, it is skipped. 1183 * For each aiocb, the appropriate driver entry point is 1184 * called. Optimize for the common case where the list 1185 * of requests is to the same file descriptor. 1186 * 1187 * One possible optimization is to define a new driver entry 1188 * point that supports a list of IO requests. Whether this 1189 * improves performance depends somewhat on the driver's 1190 * locking strategy. Processing a list could adversely impact 1191 * the driver's interrupt latency. 1192 */ 1193 static int 1194 alio( 1195 int mode_arg, 1196 aiocb_t **aiocb_arg, 1197 int nent, 1198 struct sigevent *sigev) 1199 { 1200 file_t *fp; 1201 file_t *prev_fp = NULL; 1202 int prev_mode = -1; 1203 struct vnode *vp; 1204 aio_lio_t *head; 1205 aio_req_t *reqp; 1206 aio_t *aiop; 1207 caddr_t cbplist; 1208 aiocb_t cb; 1209 aiocb_t *aiocb = &cb; 1210 aiocb_t *cbp; 1211 aiocb_t **ucbp; 1212 struct sigevent sigevk; 1213 sigqueue_t *sqp; 1214 int (*aio_func)(); 1215 int mode; 1216 int error = 0; 1217 int aio_errors = 0; 1218 int i; 1219 size_t ssize; 1220 int deadhead = 0; 1221 int aio_notsupported = 0; 1222 int lio_head_port; 1223 int aio_port; 1224 int aio_thread; 1225 port_kevent_t *pkevtp = NULL; 1226 int portused = 0; 1227 port_notify_t pnotify; 1228 int event; 1229 1230 aiop = curproc->p_aio; 1231 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1232 return (EINVAL); 1233 1234 ssize = (sizeof (aiocb_t *) * nent); 1235 cbplist = kmem_alloc(ssize, KM_SLEEP); 1236 ucbp = (aiocb_t **)cbplist; 1237 1238 if (copyin(aiocb_arg, cbplist, ssize) || 1239 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 1240 kmem_free(cbplist, ssize); 1241 return (EFAULT); 1242 } 1243 1244 /* Event Ports */ 1245 if (sigev && 1246 (sigevk.sigev_notify == SIGEV_THREAD || 1247 sigevk.sigev_notify == SIGEV_PORT)) { 1248 if (sigevk.sigev_notify == SIGEV_THREAD) { 1249 pnotify.portnfy_port = sigevk.sigev_signo; 1250 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 1251 } else if (copyin(sigevk.sigev_value.sival_ptr, 1252 &pnotify, sizeof (pnotify))) { 1253 kmem_free(cbplist, ssize); 1254 return (EFAULT); 1255 } 1256 error = port_alloc_event(pnotify.portnfy_port, 1257 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 1258 if (error) { 1259 if (error == ENOMEM || error == EAGAIN) 1260 error = EAGAIN; 1261 else 1262 error = EINVAL; 1263 kmem_free(cbplist, ssize); 1264 return (error); 1265 } 1266 lio_head_port = pnotify.portnfy_port; 1267 portused = 1; 1268 } 1269 1270 /* 1271 * a list head should be allocated if notification is 1272 * enabled for this list. 1273 */ 1274 head = NULL; 1275 1276 if (mode_arg == LIO_WAIT || sigev) { 1277 mutex_enter(&aiop->aio_mutex); 1278 error = aio_lio_alloc(&head); 1279 mutex_exit(&aiop->aio_mutex); 1280 if (error) 1281 goto done; 1282 deadhead = 1; 1283 head->lio_nent = nent; 1284 head->lio_refcnt = nent; 1285 head->lio_port = -1; 1286 head->lio_portkev = NULL; 1287 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 1288 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 1289 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 1290 if (sqp == NULL) { 1291 error = EAGAIN; 1292 goto done; 1293 } 1294 sqp->sq_func = NULL; 1295 sqp->sq_next = NULL; 1296 sqp->sq_info.si_code = SI_ASYNCIO; 1297 sqp->sq_info.si_pid = curproc->p_pid; 1298 sqp->sq_info.si_ctid = PRCTID(curproc); 1299 sqp->sq_info.si_zoneid = getzoneid(); 1300 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 1301 sqp->sq_info.si_signo = sigevk.sigev_signo; 1302 sqp->sq_info.si_value = sigevk.sigev_value; 1303 head->lio_sigqp = sqp; 1304 } else { 1305 head->lio_sigqp = NULL; 1306 } 1307 if (pkevtp) { 1308 /* 1309 * Prepare data to send when list of aiocb's 1310 * has completed. 1311 */ 1312 port_init_event(pkevtp, (uintptr_t)sigev, 1313 (void *)(uintptr_t)pnotify.portnfy_user, 1314 NULL, head); 1315 pkevtp->portkev_events = AIOLIO; 1316 head->lio_portkev = pkevtp; 1317 head->lio_port = pnotify.portnfy_port; 1318 } 1319 } 1320 1321 for (i = 0; i < nent; i++, ucbp++) { 1322 1323 cbp = *ucbp; 1324 /* skip entry if it can't be copied. */ 1325 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 1326 if (head) { 1327 mutex_enter(&aiop->aio_mutex); 1328 head->lio_nent--; 1329 head->lio_refcnt--; 1330 mutex_exit(&aiop->aio_mutex); 1331 } 1332 continue; 1333 } 1334 1335 /* skip if opcode for aiocb is LIO_NOP */ 1336 mode = aiocb->aio_lio_opcode; 1337 if (mode == LIO_NOP) { 1338 cbp = NULL; 1339 if (head) { 1340 mutex_enter(&aiop->aio_mutex); 1341 head->lio_nent--; 1342 head->lio_refcnt--; 1343 mutex_exit(&aiop->aio_mutex); 1344 } 1345 continue; 1346 } 1347 1348 /* increment file descriptor's ref count. */ 1349 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 1350 lio_set_uerror(&cbp->aio_resultp, EBADF); 1351 if (head) { 1352 mutex_enter(&aiop->aio_mutex); 1353 head->lio_nent--; 1354 head->lio_refcnt--; 1355 mutex_exit(&aiop->aio_mutex); 1356 } 1357 aio_errors++; 1358 continue; 1359 } 1360 1361 /* 1362 * check the permission of the partition 1363 */ 1364 if ((fp->f_flag & mode) == 0) { 1365 releasef(aiocb->aio_fildes); 1366 lio_set_uerror(&cbp->aio_resultp, EBADF); 1367 if (head) { 1368 mutex_enter(&aiop->aio_mutex); 1369 head->lio_nent--; 1370 head->lio_refcnt--; 1371 mutex_exit(&aiop->aio_mutex); 1372 } 1373 aio_errors++; 1374 continue; 1375 } 1376 1377 /* 1378 * common case where requests are to the same fd 1379 * for the same r/w operation. 1380 * for UFS, need to set EBADFD 1381 */ 1382 vp = fp->f_vnode; 1383 if (fp != prev_fp || mode != prev_mode) { 1384 aio_func = check_vp(vp, mode); 1385 if (aio_func == NULL) { 1386 prev_fp = NULL; 1387 releasef(aiocb->aio_fildes); 1388 lio_set_uerror(&cbp->aio_resultp, EBADFD); 1389 aio_notsupported++; 1390 if (head) { 1391 mutex_enter(&aiop->aio_mutex); 1392 head->lio_nent--; 1393 head->lio_refcnt--; 1394 mutex_exit(&aiop->aio_mutex); 1395 } 1396 continue; 1397 } else { 1398 prev_fp = fp; 1399 prev_mode = mode; 1400 } 1401 } 1402 1403 error = aio_req_setup(&reqp, aiop, aiocb, 1404 &cbp->aio_resultp, vp); 1405 if (error) { 1406 releasef(aiocb->aio_fildes); 1407 lio_set_uerror(&cbp->aio_resultp, error); 1408 if (head) { 1409 mutex_enter(&aiop->aio_mutex); 1410 head->lio_nent--; 1411 head->lio_refcnt--; 1412 mutex_exit(&aiop->aio_mutex); 1413 } 1414 aio_errors++; 1415 continue; 1416 } 1417 1418 reqp->aio_req_lio = head; 1419 deadhead = 0; 1420 1421 /* 1422 * Set the errno field now before sending the request to 1423 * the driver to avoid a race condition 1424 */ 1425 (void) suword32(&cbp->aio_resultp.aio_errno, 1426 EINPROGRESS); 1427 1428 reqp->aio_req_iocb.iocb = (caddr_t)cbp; 1429 1430 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 1431 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 1432 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 1433 if (aio_port | aio_thread) { 1434 port_kevent_t *lpkevp; 1435 /* 1436 * Prepare data to send with each aiocb completed. 1437 */ 1438 if (aio_port) { 1439 void *paddr = 1440 aiocb->aio_sigevent.sigev_value.sival_ptr; 1441 if (copyin(paddr, &pnotify, sizeof (pnotify))) 1442 error = EFAULT; 1443 } else { /* aio_thread */ 1444 pnotify.portnfy_port = 1445 aiocb->aio_sigevent.sigev_signo; 1446 pnotify.portnfy_user = 1447 aiocb->aio_sigevent.sigev_value.sival_ptr; 1448 } 1449 if (error) 1450 /* EMPTY */; 1451 else if (pkevtp != NULL && 1452 pnotify.portnfy_port == lio_head_port) 1453 error = port_dup_event(pkevtp, &lpkevp, 1454 PORT_ALLOC_DEFAULT); 1455 else 1456 error = port_alloc_event(pnotify.portnfy_port, 1457 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 1458 &lpkevp); 1459 if (error == 0) { 1460 port_init_event(lpkevp, (uintptr_t)cbp, 1461 (void *)(uintptr_t)pnotify.portnfy_user, 1462 aio_port_callback, reqp); 1463 lpkevp->portkev_events = event; 1464 reqp->aio_req_portkev = lpkevp; 1465 reqp->aio_req_port = pnotify.portnfy_port; 1466 } 1467 } 1468 1469 /* 1470 * send the request to driver. 1471 */ 1472 if (error == 0) { 1473 if (aiocb->aio_nbytes == 0) { 1474 clear_active_fd(aiocb->aio_fildes); 1475 aio_zerolen(reqp); 1476 continue; 1477 } 1478 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 1479 CRED()); 1480 } 1481 1482 /* 1483 * the fd's ref count is not decremented until the IO has 1484 * completed unless there was an error. 1485 */ 1486 if (error) { 1487 releasef(aiocb->aio_fildes); 1488 lio_set_uerror(&cbp->aio_resultp, error); 1489 if (head) { 1490 mutex_enter(&aiop->aio_mutex); 1491 head->lio_nent--; 1492 head->lio_refcnt--; 1493 mutex_exit(&aiop->aio_mutex); 1494 } 1495 if (error == ENOTSUP) 1496 aio_notsupported++; 1497 else 1498 aio_errors++; 1499 lio_set_error(reqp, portused); 1500 } else { 1501 clear_active_fd(aiocb->aio_fildes); 1502 } 1503 } 1504 1505 if (aio_notsupported) { 1506 error = ENOTSUP; 1507 } else if (aio_errors) { 1508 /* 1509 * return EIO if any request failed 1510 */ 1511 error = EIO; 1512 } 1513 1514 if (mode_arg == LIO_WAIT) { 1515 mutex_enter(&aiop->aio_mutex); 1516 while (head->lio_refcnt > 0) { 1517 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1518 mutex_exit(&aiop->aio_mutex); 1519 error = EINTR; 1520 goto done; 1521 } 1522 } 1523 mutex_exit(&aiop->aio_mutex); 1524 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 1525 } 1526 1527 done: 1528 kmem_free(cbplist, ssize); 1529 if (deadhead) { 1530 if (head->lio_sigqp) 1531 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 1532 if (head->lio_portkev) 1533 port_free_event(head->lio_portkev); 1534 kmem_free(head, sizeof (aio_lio_t)); 1535 } 1536 return (error); 1537 } 1538 1539 #endif /* _LP64 */ 1540 1541 /* 1542 * Asynchronous list IO. 1543 * If list I/O is called with LIO_WAIT it can still return 1544 * before all the I/O's are completed if a signal is caught 1545 * or if the list include UFS I/O requests. If this happens, 1546 * libaio will call aliowait() to wait for the I/O's to 1547 * complete 1548 */ 1549 /*ARGSUSED*/ 1550 static int 1551 aliowait( 1552 int mode, 1553 void *aiocb, 1554 int nent, 1555 void *sigev, 1556 int run_mode) 1557 { 1558 aio_lio_t *head; 1559 aio_t *aiop; 1560 caddr_t cbplist; 1561 aiocb_t *cbp, **ucbp; 1562 #ifdef _SYSCALL32_IMPL 1563 aiocb32_t *cbp32; 1564 caddr32_t *ucbp32; 1565 aiocb64_32_t *cbp64; 1566 #endif 1567 int error = 0; 1568 int i; 1569 size_t ssize = 0; 1570 model_t model = get_udatamodel(); 1571 1572 aiop = curproc->p_aio; 1573 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1574 return (EINVAL); 1575 1576 if (model == DATAMODEL_NATIVE) 1577 ssize = (sizeof (aiocb_t *) * nent); 1578 #ifdef _SYSCALL32_IMPL 1579 else 1580 ssize = (sizeof (caddr32_t) * nent); 1581 #endif /* _SYSCALL32_IMPL */ 1582 1583 if (ssize == 0) 1584 return (EINVAL); 1585 1586 cbplist = kmem_alloc(ssize, KM_SLEEP); 1587 1588 if (model == DATAMODEL_NATIVE) 1589 ucbp = (aiocb_t **)cbplist; 1590 #ifdef _SYSCALL32_IMPL 1591 else 1592 ucbp32 = (caddr32_t *)cbplist; 1593 #endif /* _SYSCALL32_IMPL */ 1594 1595 if (copyin(aiocb, cbplist, ssize)) { 1596 error = EFAULT; 1597 goto done; 1598 } 1599 1600 /* 1601 * To find the list head, we go through the 1602 * list of aiocb structs, find the request 1603 * its for, then get the list head that reqp 1604 * points to 1605 */ 1606 head = NULL; 1607 1608 for (i = 0; i < nent; i++) { 1609 if (model == DATAMODEL_NATIVE) { 1610 /* 1611 * Since we are only checking for a NULL pointer 1612 * Following should work on both native data sizes 1613 * as well as for largefile aiocb. 1614 */ 1615 if ((cbp = *ucbp++) == NULL) 1616 continue; 1617 if (run_mode != AIO_LARGEFILE) 1618 if (head = aio_list_get(&cbp->aio_resultp)) 1619 break; 1620 else { 1621 /* 1622 * This is a case when largefile call is 1623 * made on 32 bit kernel. 1624 * Treat each pointer as pointer to 1625 * aiocb64_32 1626 */ 1627 if (head = aio_list_get((aio_result_t *) 1628 &(((aiocb64_32_t *)cbp)->aio_resultp))) 1629 break; 1630 } 1631 } 1632 #ifdef _SYSCALL32_IMPL 1633 else { 1634 if (run_mode == AIO_LARGEFILE) { 1635 if ((cbp64 = (aiocb64_32_t *) 1636 (uintptr_t)*ucbp32++) == NULL) 1637 continue; 1638 if (head = aio_list_get((aio_result_t *) 1639 &cbp64->aio_resultp)) 1640 break; 1641 } else if (run_mode == AIO_32) { 1642 if ((cbp32 = (aiocb32_t *) 1643 (uintptr_t)*ucbp32++) == NULL) 1644 continue; 1645 if (head = aio_list_get((aio_result_t *) 1646 &cbp32->aio_resultp)) 1647 break; 1648 } 1649 } 1650 #endif /* _SYSCALL32_IMPL */ 1651 } 1652 1653 if (head == NULL) { 1654 error = EINVAL; 1655 goto done; 1656 } 1657 1658 mutex_enter(&aiop->aio_mutex); 1659 while (head->lio_refcnt > 0) { 1660 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1661 mutex_exit(&aiop->aio_mutex); 1662 error = EINTR; 1663 goto done; 1664 } 1665 } 1666 mutex_exit(&aiop->aio_mutex); 1667 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 1668 done: 1669 kmem_free(cbplist, ssize); 1670 return (error); 1671 } 1672 1673 aio_lio_t * 1674 aio_list_get(aio_result_t *resultp) 1675 { 1676 aio_lio_t *head = NULL; 1677 aio_t *aiop; 1678 aio_req_t **bucket; 1679 aio_req_t *reqp; 1680 long index; 1681 1682 aiop = curproc->p_aio; 1683 if (aiop == NULL) 1684 return (NULL); 1685 1686 if (resultp) { 1687 index = AIO_HASH(resultp); 1688 bucket = &aiop->aio_hash[index]; 1689 for (reqp = *bucket; reqp != NULL; 1690 reqp = reqp->aio_hash_next) { 1691 if (reqp->aio_req_resultp == resultp) { 1692 head = reqp->aio_req_lio; 1693 return (head); 1694 } 1695 } 1696 } 1697 return (NULL); 1698 } 1699 1700 1701 static void 1702 lio_set_uerror(void *resultp, int error) 1703 { 1704 /* 1705 * the resultp field is a pointer to where the 1706 * error should be written out to the user's 1707 * aiocb. 1708 * 1709 */ 1710 if (get_udatamodel() == DATAMODEL_NATIVE) { 1711 (void) sulword(&((aio_result_t *)resultp)->aio_return, 1712 (ssize_t)-1); 1713 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1714 } 1715 #ifdef _SYSCALL32_IMPL 1716 else { 1717 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1718 (uint_t)-1); 1719 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1720 } 1721 #endif /* _SYSCALL32_IMPL */ 1722 } 1723 1724 /* 1725 * do cleanup completion for all requests in list. memory for 1726 * each request is also freed. 1727 */ 1728 static void 1729 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 1730 { 1731 int i; 1732 aio_req_t *reqp; 1733 aio_result_t *resultp; 1734 aiocb64_32_t *aiocb_64; 1735 1736 for (i = 0; i < nent; i++) { 1737 if (get_udatamodel() == DATAMODEL_NATIVE) { 1738 if (cbp[i] == NULL) 1739 continue; 1740 if (run_mode == AIO_LARGEFILE) { 1741 aiocb_64 = (aiocb64_32_t *)cbp[i]; 1742 resultp = (aio_result_t *) 1743 &aiocb_64->aio_resultp; 1744 } else 1745 resultp = &cbp[i]->aio_resultp; 1746 } 1747 #ifdef _SYSCALL32_IMPL 1748 else { 1749 aiocb32_t *aiocb_32; 1750 caddr32_t *cbp32; 1751 1752 cbp32 = (caddr32_t *)cbp; 1753 if (cbp32[i] == NULL) 1754 continue; 1755 if (run_mode == AIO_32) { 1756 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 1757 resultp = (aio_result_t *)&aiocb_32-> 1758 aio_resultp; 1759 } else if (run_mode == AIO_LARGEFILE) { 1760 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 1761 resultp = (aio_result_t *)&aiocb_64-> 1762 aio_resultp; 1763 } 1764 } 1765 #endif /* _SYSCALL32_IMPL */ 1766 /* 1767 * we need to get the aio_cleanupq_mutex since we call 1768 * aio_req_done(). 1769 */ 1770 mutex_enter(&aiop->aio_cleanupq_mutex); 1771 mutex_enter(&aiop->aio_mutex); 1772 reqp = aio_req_done(resultp); 1773 mutex_exit(&aiop->aio_mutex); 1774 mutex_exit(&aiop->aio_cleanupq_mutex); 1775 if (reqp != NULL) { 1776 aphysio_unlock(reqp); 1777 aio_copyout_result(reqp); 1778 mutex_enter(&aiop->aio_mutex); 1779 aio_req_free(aiop, reqp); 1780 mutex_exit(&aiop->aio_mutex); 1781 } 1782 } 1783 } 1784 1785 /* 1786 * Write out the results for an aio request that is done. 1787 */ 1788 static int 1789 aioerror(void *cb, int run_mode) 1790 { 1791 aio_result_t *resultp; 1792 aio_t *aiop; 1793 aio_req_t *reqp; 1794 int retval; 1795 1796 aiop = curproc->p_aio; 1797 if (aiop == NULL || cb == NULL) 1798 return (EINVAL); 1799 1800 if (get_udatamodel() == DATAMODEL_NATIVE) { 1801 if (run_mode == AIO_LARGEFILE) 1802 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1803 aio_resultp; 1804 else 1805 resultp = &((aiocb_t *)cb)->aio_resultp; 1806 } 1807 #ifdef _SYSCALL32_IMPL 1808 else { 1809 if (run_mode == AIO_LARGEFILE) 1810 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1811 aio_resultp; 1812 else if (run_mode == AIO_32) 1813 resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 1814 aio_resultp; 1815 } 1816 #endif /* _SYSCALL32_IMPL */ 1817 /* 1818 * we need to get the aio_cleanupq_mutex since we call 1819 * aio_req_find(). 1820 */ 1821 mutex_enter(&aiop->aio_cleanupq_mutex); 1822 mutex_enter(&aiop->aio_mutex); 1823 retval = aio_req_find(resultp, &reqp); 1824 mutex_exit(&aiop->aio_mutex); 1825 mutex_exit(&aiop->aio_cleanupq_mutex); 1826 if (retval == 0) { 1827 aphysio_unlock(reqp); 1828 aio_copyout_result(reqp); 1829 mutex_enter(&aiop->aio_mutex); 1830 aio_req_free(aiop, reqp); 1831 mutex_exit(&aiop->aio_mutex); 1832 return (0); 1833 } else if (retval == 1) 1834 return (EINPROGRESS); 1835 else if (retval == 2) 1836 return (EINVAL); 1837 return (0); 1838 } 1839 1840 /* 1841 * aio_cancel - if no requests outstanding, 1842 * return AIO_ALLDONE 1843 * else 1844 * return AIO_NOTCANCELED 1845 */ 1846 static int 1847 aio_cancel( 1848 int fildes, 1849 void *cb, 1850 long *rval, 1851 int run_mode) 1852 { 1853 aio_t *aiop; 1854 void *resultp; 1855 int index; 1856 aio_req_t **bucket; 1857 aio_req_t *ent; 1858 1859 1860 /* 1861 * Verify valid file descriptor 1862 */ 1863 if ((getf(fildes)) == NULL) { 1864 return (EBADF); 1865 } 1866 releasef(fildes); 1867 1868 aiop = curproc->p_aio; 1869 if (aiop == NULL) 1870 return (EINVAL); 1871 1872 if (aiop->aio_outstanding == 0) { 1873 *rval = AIO_ALLDONE; 1874 return (0); 1875 } 1876 1877 mutex_enter(&aiop->aio_mutex); 1878 if (cb != NULL) { 1879 if (get_udatamodel() == DATAMODEL_NATIVE) { 1880 if (run_mode == AIO_LARGEFILE) 1881 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1882 ->aio_resultp; 1883 else 1884 resultp = &((aiocb_t *)cb)->aio_resultp; 1885 } 1886 #ifdef _SYSCALL32_IMPL 1887 else { 1888 if (run_mode == AIO_LARGEFILE) 1889 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1890 ->aio_resultp; 1891 else if (run_mode == AIO_32) 1892 resultp = (aio_result_t *)&((aiocb32_t *)cb) 1893 ->aio_resultp; 1894 } 1895 #endif /* _SYSCALL32_IMPL */ 1896 index = AIO_HASH(resultp); 1897 bucket = &aiop->aio_hash[index]; 1898 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1899 if (ent->aio_req_resultp == resultp) { 1900 if ((ent->aio_req_flags & AIO_PENDING) == 0) { 1901 mutex_exit(&aiop->aio_mutex); 1902 *rval = AIO_ALLDONE; 1903 return (0); 1904 } 1905 mutex_exit(&aiop->aio_mutex); 1906 *rval = AIO_NOTCANCELED; 1907 return (0); 1908 } 1909 } 1910 mutex_exit(&aiop->aio_mutex); 1911 *rval = AIO_ALLDONE; 1912 return (0); 1913 } 1914 1915 for (index = 0; index < AIO_HASHSZ; index++) { 1916 bucket = &aiop->aio_hash[index]; 1917 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1918 if (ent->aio_req_fd == fildes) { 1919 if ((ent->aio_req_flags & AIO_PENDING) != 0) { 1920 mutex_exit(&aiop->aio_mutex); 1921 *rval = AIO_NOTCANCELED; 1922 return (0); 1923 } 1924 } 1925 } 1926 } 1927 mutex_exit(&aiop->aio_mutex); 1928 *rval = AIO_ALLDONE; 1929 return (0); 1930 } 1931 1932 /* 1933 * solaris version of asynchronous read and write 1934 */ 1935 static int 1936 arw( 1937 int opcode, 1938 int fdes, 1939 char *bufp, 1940 int bufsize, 1941 offset_t offset, 1942 aio_result_t *resultp, 1943 int mode) 1944 { 1945 file_t *fp; 1946 int error; 1947 struct vnode *vp; 1948 aio_req_t *reqp; 1949 aio_t *aiop; 1950 int (*aio_func)(); 1951 #ifdef _LP64 1952 aiocb_t aiocb; 1953 #else 1954 aiocb64_32_t aiocb64; 1955 #endif 1956 1957 aiop = curproc->p_aio; 1958 if (aiop == NULL) 1959 return (EINVAL); 1960 1961 if ((fp = getf(fdes)) == NULL) { 1962 return (EBADF); 1963 } 1964 1965 /* 1966 * check the permission of the partition 1967 */ 1968 if ((fp->f_flag & mode) == 0) { 1969 releasef(fdes); 1970 return (EBADF); 1971 } 1972 1973 vp = fp->f_vnode; 1974 aio_func = check_vp(vp, mode); 1975 if (aio_func == NULL) { 1976 releasef(fdes); 1977 return (EBADFD); 1978 } 1979 #ifdef _LP64 1980 aiocb.aio_fildes = fdes; 1981 aiocb.aio_buf = bufp; 1982 aiocb.aio_nbytes = bufsize; 1983 aiocb.aio_offset = offset; 1984 aiocb.aio_sigevent.sigev_notify = 0; 1985 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 1986 #else 1987 aiocb64.aio_fildes = fdes; 1988 aiocb64.aio_buf = (caddr32_t)bufp; 1989 aiocb64.aio_nbytes = bufsize; 1990 aiocb64.aio_offset = offset; 1991 aiocb64.aio_sigevent.sigev_notify = 0; 1992 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 1993 #endif 1994 if (error) { 1995 releasef(fdes); 1996 return (error); 1997 } 1998 1999 /* 2000 * enable polling on this request if the opcode has 2001 * the AIO poll bit set 2002 */ 2003 if (opcode & AIO_POLL_BIT) 2004 reqp->aio_req_flags |= AIO_POLL; 2005 2006 if (bufsize == 0) { 2007 clear_active_fd(fdes); 2008 aio_zerolen(reqp); 2009 return (0); 2010 } 2011 /* 2012 * send the request to driver. 2013 */ 2014 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2015 /* 2016 * the fd is stored in the aio_req_t by aio_req_setup(), and 2017 * is released by the aio_cleanup_thread() when the IO has 2018 * completed. 2019 */ 2020 if (error) { 2021 releasef(fdes); 2022 mutex_enter(&aiop->aio_mutex); 2023 aio_req_free(aiop, reqp); 2024 aiop->aio_pending--; 2025 if (aiop->aio_flags & AIO_REQ_BLOCK) 2026 cv_signal(&aiop->aio_cleanupcv); 2027 mutex_exit(&aiop->aio_mutex); 2028 return (error); 2029 } 2030 clear_active_fd(fdes); 2031 return (0); 2032 } 2033 2034 /* 2035 * posix version of asynchronous read and write 2036 */ 2037 static int 2038 aiorw( 2039 int opcode, 2040 void *aiocb_arg, 2041 int mode, 2042 int run_mode) 2043 { 2044 #ifdef _SYSCALL32_IMPL 2045 aiocb32_t aiocb32; 2046 struct sigevent32 *sigev32; 2047 port_notify32_t pntfy32; 2048 #endif 2049 aiocb64_32_t aiocb64; 2050 aiocb_t aiocb; 2051 file_t *fp; 2052 int error, fd; 2053 size_t bufsize; 2054 struct vnode *vp; 2055 aio_req_t *reqp; 2056 aio_t *aiop; 2057 int (*aio_func)(); 2058 aio_result_t *resultp; 2059 struct sigevent *sigev; 2060 model_t model; 2061 int aio_use_port = 0; 2062 port_notify_t pntfy; 2063 2064 model = get_udatamodel(); 2065 aiop = curproc->p_aio; 2066 if (aiop == NULL) 2067 return (EINVAL); 2068 2069 if (model == DATAMODEL_NATIVE) { 2070 if (run_mode != AIO_LARGEFILE) { 2071 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 2072 return (EFAULT); 2073 bufsize = aiocb.aio_nbytes; 2074 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 2075 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 2076 return (EBADF); 2077 } 2078 sigev = &aiocb.aio_sigevent; 2079 } else { 2080 /* 2081 * We come here only when we make largefile 2082 * call on 32 bit kernel using 32 bit library. 2083 */ 2084 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2085 return (EFAULT); 2086 bufsize = aiocb64.aio_nbytes; 2087 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2088 ->aio_resultp); 2089 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2090 return (EBADF); 2091 sigev = (struct sigevent *)&aiocb64.aio_sigevent; 2092 } 2093 2094 if (sigev->sigev_notify == SIGEV_PORT) { 2095 if (copyin((void *)sigev->sigev_value.sival_ptr, 2096 &pntfy, sizeof (port_notify_t))) { 2097 releasef(fd); 2098 return (EFAULT); 2099 } 2100 aio_use_port = 1; 2101 } else if (sigev->sigev_notify == SIGEV_THREAD) { 2102 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 2103 pntfy.portnfy_user = 2104 aiocb.aio_sigevent.sigev_value.sival_ptr; 2105 aio_use_port = 1; 2106 } 2107 } 2108 #ifdef _SYSCALL32_IMPL 2109 else { 2110 if (run_mode == AIO_32) { 2111 /* 32 bit system call is being made on 64 bit kernel */ 2112 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 2113 return (EFAULT); 2114 2115 bufsize = aiocb32.aio_nbytes; 2116 aiocb_32ton(&aiocb32, &aiocb); 2117 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 2118 aio_resultp); 2119 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 2120 return (EBADF); 2121 } 2122 sigev32 = &aiocb32.aio_sigevent; 2123 } else if (run_mode == AIO_LARGEFILE) { 2124 /* 2125 * We come here only when we make largefile 2126 * call on 64 bit kernel using 32 bit library. 2127 */ 2128 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2129 return (EFAULT); 2130 bufsize = aiocb64.aio_nbytes; 2131 aiocb_LFton(&aiocb64, &aiocb); 2132 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2133 ->aio_resultp); 2134 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2135 return (EBADF); 2136 sigev32 = &aiocb64.aio_sigevent; 2137 } 2138 2139 if (sigev32->sigev_notify == SIGEV_PORT) { 2140 if (copyin( 2141 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 2142 &pntfy32, sizeof (port_notify32_t))) { 2143 releasef(fd); 2144 return (EFAULT); 2145 } 2146 pntfy.portnfy_port = pntfy32.portnfy_port; 2147 pntfy.portnfy_user = (void *)(uintptr_t) 2148 pntfy32.portnfy_user; 2149 aio_use_port = 1; 2150 } else if (sigev32->sigev_notify == SIGEV_THREAD) { 2151 pntfy.portnfy_port = sigev32->sigev_signo; 2152 pntfy.portnfy_user = (void *)(uintptr_t) 2153 sigev32->sigev_value.sival_ptr; 2154 aio_use_port = 1; 2155 } 2156 } 2157 #endif /* _SYSCALL32_IMPL */ 2158 2159 /* 2160 * check the permission of the partition 2161 */ 2162 2163 if ((fp->f_flag & mode) == 0) { 2164 releasef(fd); 2165 return (EBADF); 2166 } 2167 2168 vp = fp->f_vnode; 2169 aio_func = check_vp(vp, mode); 2170 if (aio_func == NULL) { 2171 releasef(fd); 2172 return (EBADFD); 2173 } 2174 if (run_mode == AIO_LARGEFILE) 2175 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 2176 else 2177 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 2178 2179 if (error) { 2180 releasef(fd); 2181 return (error); 2182 } 2183 /* 2184 * enable polling on this request if the opcode has 2185 * the AIO poll bit set 2186 */ 2187 if (opcode & AIO_POLL_BIT) 2188 reqp->aio_req_flags |= AIO_POLL; 2189 2190 if (model == DATAMODEL_NATIVE) 2191 reqp->aio_req_iocb.iocb = aiocb_arg; 2192 #ifdef _SYSCALL32_IMPL 2193 else 2194 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 2195 #endif 2196 2197 if (aio_use_port) { 2198 int event = (run_mode == AIO_LARGEFILE)? 2199 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 2200 ((mode == FREAD)? AIOAREAD : AIOAWRITE); 2201 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 2202 } 2203 2204 /* 2205 * send the request to driver. 2206 */ 2207 if (error == 0) { 2208 if (bufsize == 0) { 2209 clear_active_fd(fd); 2210 aio_zerolen(reqp); 2211 return (0); 2212 } 2213 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2214 } 2215 2216 /* 2217 * the fd is stored in the aio_req_t by aio_req_setup(), and 2218 * is released by the aio_cleanup_thread() when the IO has 2219 * completed. 2220 */ 2221 if (error) { 2222 releasef(fd); 2223 mutex_enter(&aiop->aio_mutex); 2224 if (aio_use_port) 2225 aio_deq(&aiop->aio_portpending, reqp); 2226 aio_req_free(aiop, reqp); 2227 aiop->aio_pending--; 2228 if (aiop->aio_flags & AIO_REQ_BLOCK) 2229 cv_signal(&aiop->aio_cleanupcv); 2230 mutex_exit(&aiop->aio_mutex); 2231 return (error); 2232 } 2233 clear_active_fd(fd); 2234 return (0); 2235 } 2236 2237 2238 /* 2239 * set error for a list IO entry that failed. 2240 */ 2241 static void 2242 lio_set_error(aio_req_t *reqp, int portused) 2243 { 2244 aio_t *aiop = curproc->p_aio; 2245 2246 if (aiop == NULL) 2247 return; 2248 2249 mutex_enter(&aiop->aio_mutex); 2250 if (portused) 2251 aio_deq(&aiop->aio_portpending, reqp); 2252 aiop->aio_pending--; 2253 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 2254 reqp->aio_req_flags |= AIO_PHYSIODONE; 2255 /* 2256 * Need to free the request now as its never 2257 * going to get on the done queue 2258 * 2259 * Note: aio_outstanding is decremented in 2260 * aio_req_free() 2261 */ 2262 aio_req_free(aiop, reqp); 2263 if (aiop->aio_flags & AIO_REQ_BLOCK) 2264 cv_signal(&aiop->aio_cleanupcv); 2265 mutex_exit(&aiop->aio_mutex); 2266 } 2267 2268 /* 2269 * check if a specified request is done, and remove it from 2270 * the done queue. otherwise remove anybody from the done queue 2271 * if NULL is specified. 2272 */ 2273 static aio_req_t * 2274 aio_req_done(void *resultp) 2275 { 2276 aio_req_t **bucket; 2277 aio_req_t *ent; 2278 aio_t *aiop = curproc->p_aio; 2279 long index; 2280 2281 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2282 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2283 2284 if (resultp) { 2285 index = AIO_HASH(resultp); 2286 bucket = &aiop->aio_hash[index]; 2287 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2288 if (ent->aio_req_resultp == (aio_result_t *)resultp) { 2289 if (ent->aio_req_flags & AIO_DONEQ) { 2290 return (aio_req_remove(ent)); 2291 } 2292 return (NULL); 2293 } 2294 } 2295 /* no match, resultp is invalid */ 2296 return (NULL); 2297 } 2298 return (aio_req_remove(NULL)); 2299 } 2300 2301 /* 2302 * determine if a user-level resultp pointer is associated with an 2303 * active IO request. Zero is returned when the request is done, 2304 * and the request is removed from the done queue. Only when the 2305 * return value is zero, is the "reqp" pointer valid. One is returned 2306 * when the request is inprogress. Two is returned when the request 2307 * is invalid. 2308 */ 2309 static int 2310 aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 2311 { 2312 aio_req_t **bucket; 2313 aio_req_t *ent; 2314 aio_t *aiop = curproc->p_aio; 2315 long index; 2316 2317 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2318 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2319 2320 index = AIO_HASH(resultp); 2321 bucket = &aiop->aio_hash[index]; 2322 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2323 if (ent->aio_req_resultp == resultp) { 2324 if (ent->aio_req_flags & AIO_DONEQ) { 2325 *reqp = aio_req_remove(ent); 2326 return (0); 2327 } 2328 return (1); 2329 } 2330 } 2331 /* no match, resultp is invalid */ 2332 return (2); 2333 } 2334 2335 /* 2336 * remove a request from the done queue. 2337 */ 2338 static aio_req_t * 2339 aio_req_remove(aio_req_t *reqp) 2340 { 2341 aio_t *aiop = curproc->p_aio; 2342 2343 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2344 2345 if (reqp != NULL) { 2346 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2347 if (reqp->aio_req_next == reqp) { 2348 /* only one request on queue */ 2349 if (reqp == aiop->aio_doneq) { 2350 aiop->aio_doneq = NULL; 2351 } else { 2352 ASSERT(reqp == aiop->aio_cleanupq); 2353 aiop->aio_cleanupq = NULL; 2354 } 2355 } else { 2356 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2357 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2358 /* 2359 * The request can be either on the aio_doneq or the 2360 * aio_cleanupq 2361 */ 2362 if (reqp == aiop->aio_doneq) 2363 aiop->aio_doneq = reqp->aio_req_next; 2364 2365 if (reqp == aiop->aio_cleanupq) 2366 aiop->aio_cleanupq = reqp->aio_req_next; 2367 } 2368 reqp->aio_req_flags &= ~AIO_DONEQ; 2369 reqp->aio_req_next = NULL; 2370 reqp->aio_req_prev = NULL; 2371 } else if ((reqp = aiop->aio_doneq) != NULL) { 2372 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2373 if (reqp == reqp->aio_req_next) { 2374 /* only one request on queue */ 2375 aiop->aio_doneq = NULL; 2376 } else { 2377 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2378 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2379 aiop->aio_doneq = reqp->aio_req_next; 2380 } 2381 reqp->aio_req_flags &= ~AIO_DONEQ; 2382 reqp->aio_req_next = NULL; 2383 reqp->aio_req_prev = NULL; 2384 } 2385 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 2386 cv_broadcast(&aiop->aio_waitcv); 2387 return (reqp); 2388 } 2389 2390 static int 2391 aio_req_setup( 2392 aio_req_t **reqpp, 2393 aio_t *aiop, 2394 aiocb_t *arg, 2395 aio_result_t *resultp, 2396 vnode_t *vp) 2397 { 2398 sigqueue_t *sqp = NULL; 2399 aio_req_t *reqp; 2400 struct uio *uio; 2401 struct sigevent *sigev; 2402 int error; 2403 2404 sigev = &arg->aio_sigevent; 2405 if (sigev->sigev_notify == SIGEV_SIGNAL && 2406 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 2407 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2408 if (sqp == NULL) 2409 return (EAGAIN); 2410 sqp->sq_func = NULL; 2411 sqp->sq_next = NULL; 2412 sqp->sq_info.si_code = SI_ASYNCIO; 2413 sqp->sq_info.si_pid = curproc->p_pid; 2414 sqp->sq_info.si_ctid = PRCTID(curproc); 2415 sqp->sq_info.si_zoneid = getzoneid(); 2416 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2417 sqp->sq_info.si_signo = sigev->sigev_signo; 2418 sqp->sq_info.si_value = sigev->sigev_value; 2419 } 2420 2421 mutex_enter(&aiop->aio_mutex); 2422 2423 if (aiop->aio_flags & AIO_REQ_BLOCK) { 2424 mutex_exit(&aiop->aio_mutex); 2425 if (sqp) 2426 kmem_free(sqp, sizeof (sigqueue_t)); 2427 return (EIO); 2428 } 2429 /* 2430 * get an aio_reqp from the free list or allocate one 2431 * from dynamic memory. 2432 */ 2433 if (error = aio_req_alloc(&reqp, resultp)) { 2434 mutex_exit(&aiop->aio_mutex); 2435 if (sqp) 2436 kmem_free(sqp, sizeof (sigqueue_t)); 2437 return (error); 2438 } 2439 aiop->aio_pending++; 2440 aiop->aio_outstanding++; 2441 reqp->aio_req_flags = AIO_PENDING; 2442 if (sigev->sigev_notify == SIGEV_THREAD || 2443 sigev->sigev_notify == SIGEV_PORT) 2444 aio_enq(&aiop->aio_portpending, reqp, 0); 2445 mutex_exit(&aiop->aio_mutex); 2446 /* 2447 * initialize aio request. 2448 */ 2449 reqp->aio_req_fd = arg->aio_fildes; 2450 reqp->aio_req_sigqp = sqp; 2451 reqp->aio_req_iocb.iocb = NULL; 2452 reqp->aio_req_lio = NULL; 2453 reqp->aio_req_buf.b_file = vp; 2454 uio = reqp->aio_req.aio_uio; 2455 uio->uio_iovcnt = 1; 2456 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 2457 uio->uio_iov->iov_len = arg->aio_nbytes; 2458 uio->uio_loffset = arg->aio_offset; 2459 *reqpp = reqp; 2460 return (0); 2461 } 2462 2463 /* 2464 * Allocate p_aio struct. 2465 */ 2466 static aio_t * 2467 aio_aiop_alloc(void) 2468 { 2469 aio_t *aiop; 2470 2471 ASSERT(MUTEX_HELD(&curproc->p_lock)); 2472 2473 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 2474 if (aiop) { 2475 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 2476 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 2477 NULL); 2478 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 2479 } 2480 return (aiop); 2481 } 2482 2483 /* 2484 * Allocate an aio_req struct. 2485 */ 2486 static int 2487 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 2488 { 2489 aio_req_t *reqp; 2490 aio_t *aiop = curproc->p_aio; 2491 2492 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2493 2494 if ((reqp = aiop->aio_free) != NULL) { 2495 aiop->aio_free = reqp->aio_req_next; 2496 bzero(reqp, sizeof (*reqp)); 2497 } else { 2498 /* 2499 * Check whether memory is getting tight. 2500 * This is a temporary mechanism to avoid memory 2501 * exhaustion by a single process until we come up 2502 * with a per process solution such as setrlimit(). 2503 */ 2504 if (freemem < desfree) 2505 return (EAGAIN); 2506 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 2507 if (reqp == NULL) 2508 return (EAGAIN); 2509 } 2510 reqp->aio_req.aio_uio = &reqp->aio_req_uio; 2511 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 2512 reqp->aio_req.aio_private = reqp; 2513 reqp->aio_req_buf.b_offset = -1; 2514 reqp->aio_req_resultp = resultp; 2515 if (aio_hash_insert(reqp, aiop)) { 2516 reqp->aio_req_next = aiop->aio_free; 2517 aiop->aio_free = reqp; 2518 return (EINVAL); 2519 } 2520 *nreqp = reqp; 2521 return (0); 2522 } 2523 2524 /* 2525 * Allocate an aio_lio_t struct. 2526 */ 2527 static int 2528 aio_lio_alloc(aio_lio_t **head) 2529 { 2530 aio_lio_t *liop; 2531 aio_t *aiop = curproc->p_aio; 2532 2533 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2534 2535 if ((liop = aiop->aio_lio_free) != NULL) { 2536 aiop->aio_lio_free = liop->lio_next; 2537 } else { 2538 /* 2539 * Check whether memory is getting tight. 2540 * This is a temporary mechanism to avoid memory 2541 * exhaustion by a single process until we come up 2542 * with a per process solution such as setrlimit(). 2543 */ 2544 if (freemem < desfree) 2545 return (EAGAIN); 2546 2547 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 2548 if (liop == NULL) 2549 return (EAGAIN); 2550 } 2551 *head = liop; 2552 return (0); 2553 } 2554 2555 /* 2556 * this is a special per-process thread that is only activated if 2557 * the process is unmapping a segment with outstanding aio. normally, 2558 * the process will have completed the aio before unmapping the 2559 * segment. If the process does unmap a segment with outstanding aio, 2560 * this special thread will guarentee that the locked pages due to 2561 * aphysio() are released, thereby permitting the segment to be 2562 * unmapped. In addition to this, the cleanup thread is woken up 2563 * during DR operations to release the locked pages. 2564 */ 2565 2566 static int 2567 aio_cleanup_thread(aio_t *aiop) 2568 { 2569 proc_t *p = curproc; 2570 struct as *as = p->p_as; 2571 int poked = 0; 2572 kcondvar_t *cvp; 2573 int exit_flag = 0; 2574 int rqclnup = 0; 2575 2576 sigfillset(&curthread->t_hold); 2577 sigdiffset(&curthread->t_hold, &cantmask); 2578 for (;;) { 2579 /* 2580 * if a segment is being unmapped, and the current 2581 * process's done queue is not empty, then every request 2582 * on the doneq with locked resources should be forced 2583 * to release their locks. By moving the doneq request 2584 * to the cleanupq, aio_cleanup() will process the cleanupq, 2585 * and place requests back onto the doneq. All requests 2586 * processed by aio_cleanup() will have their physical 2587 * resources unlocked. 2588 */ 2589 mutex_enter(&aiop->aio_mutex); 2590 if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 2591 aiop->aio_flags |= AIO_CLEANUP; 2592 mutex_enter(&as->a_contents); 2593 if (aiop->aio_rqclnup) { 2594 aiop->aio_rqclnup = 0; 2595 rqclnup = 1; 2596 } 2597 2598 if ((rqclnup || AS_ISUNMAPWAIT(as)) && 2599 aiop->aio_doneq) { 2600 aio_req_t *doneqhead = aiop->aio_doneq; 2601 mutex_exit(&as->a_contents); 2602 aiop->aio_doneq = NULL; 2603 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 2604 } else { 2605 mutex_exit(&as->a_contents); 2606 } 2607 } 2608 mutex_exit(&aiop->aio_mutex); 2609 aio_cleanup(AIO_CLEANUP_THREAD); 2610 /* 2611 * thread should block on the cleanupcv while 2612 * AIO_CLEANUP is set. 2613 */ 2614 cvp = &aiop->aio_cleanupcv; 2615 mutex_enter(&aiop->aio_mutex); 2616 2617 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 2618 aiop->aio_notifyq != NULL || 2619 aiop->aio_portcleanupq != NULL) { 2620 mutex_exit(&aiop->aio_mutex); 2621 continue; 2622 } 2623 mutex_enter(&as->a_contents); 2624 2625 /* 2626 * AIO_CLEANUP determines when the cleanup thread 2627 * should be active. This flag is set when 2628 * the cleanup thread is awakened by as_unmap() or 2629 * due to DR operations. 2630 * The flag is cleared when the blocking as_unmap() 2631 * that originally awakened us is allowed to 2632 * complete. as_unmap() blocks when trying to 2633 * unmap a segment that has SOFTLOCKed pages. when 2634 * the segment's pages are all SOFTUNLOCKed, 2635 * as->a_flags & AS_UNMAPWAIT should be zero. 2636 * 2637 * In case of cleanup request by DR, the flag is cleared 2638 * once all the pending aio requests have been processed. 2639 * 2640 * The flag shouldn't be cleared right away if the 2641 * cleanup thread was interrupted because the process 2642 * is doing forkall(). This happens when cv_wait_sig() 2643 * returns zero, because it was awakened by a pokelwps(). 2644 * If the process is not exiting, it must be doing forkall(). 2645 */ 2646 if ((poked == 0) && 2647 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 2648 (aiop->aio_pending == 0))) { 2649 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 2650 cvp = &as->a_cv; 2651 rqclnup = 0; 2652 } 2653 mutex_exit(&aiop->aio_mutex); 2654 if (poked) { 2655 /* 2656 * If the process is exiting/killed, don't return 2657 * immediately without waiting for pending I/O's 2658 * and releasing the page locks. 2659 */ 2660 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2661 /* 2662 * If exit_flag is set, then it is 2663 * safe to exit because we have released 2664 * page locks of completed I/O's. 2665 */ 2666 if (exit_flag) 2667 break; 2668 2669 mutex_exit(&as->a_contents); 2670 2671 /* 2672 * Wait for all the pending aio to complete. 2673 */ 2674 mutex_enter(&aiop->aio_mutex); 2675 aiop->aio_flags |= AIO_REQ_BLOCK; 2676 while (aiop->aio_pending != 0) 2677 cv_wait(&aiop->aio_cleanupcv, 2678 &aiop->aio_mutex); 2679 mutex_exit(&aiop->aio_mutex); 2680 exit_flag = 1; 2681 continue; 2682 } else if (p->p_flag & 2683 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 2684 /* 2685 * hold LWP until it 2686 * is continued. 2687 */ 2688 mutex_exit(&as->a_contents); 2689 mutex_enter(&p->p_lock); 2690 stop(PR_SUSPENDED, SUSPEND_NORMAL); 2691 mutex_exit(&p->p_lock); 2692 poked = 0; 2693 continue; 2694 } 2695 } else { 2696 /* 2697 * When started this thread will sleep on as->a_cv. 2698 * as_unmap will awake this thread if the 2699 * segment has SOFTLOCKed pages (poked = 0). 2700 * 1. pokelwps() awakes this thread => 2701 * break the loop to check SEXITLWPS, SHOLDFORK, etc 2702 * 2. as_unmap awakes this thread => 2703 * to break the loop it is necessary that 2704 * - AS_UNMAPWAIT is set (as_unmap is waiting for 2705 * memory to be unlocked) 2706 * - AIO_CLEANUP is not set 2707 * (if AIO_CLEANUP is set we have to wait for 2708 * pending requests. aio_done will send a signal 2709 * for every request which completes to continue 2710 * unmapping the corresponding address range) 2711 * 3. A cleanup request will wake this thread up, ex. 2712 * by the DR operations. The aio_rqclnup flag will 2713 * be set. 2714 */ 2715 while (poked == 0) { 2716 /* 2717 * The clean up requests that came in 2718 * after we had just cleaned up, couldn't 2719 * be causing the unmap thread to block - as 2720 * unmap event happened first. 2721 * Let aio_done() wake us up if it sees a need. 2722 */ 2723 if (aiop->aio_rqclnup && 2724 (aiop->aio_flags & AIO_CLEANUP) == 0) 2725 break; 2726 poked = !cv_wait_sig(cvp, &as->a_contents); 2727 if (AS_ISUNMAPWAIT(as) == 0) 2728 cv_signal(cvp); 2729 if (aiop->aio_outstanding != 0) 2730 break; 2731 } 2732 } 2733 mutex_exit(&as->a_contents); 2734 } 2735 exit: 2736 mutex_exit(&as->a_contents); 2737 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 2738 aston(curthread); /* make thread do post_syscall */ 2739 return (0); 2740 } 2741 2742 /* 2743 * save a reference to a user's outstanding aio in a hash list. 2744 */ 2745 static int 2746 aio_hash_insert( 2747 aio_req_t *aio_reqp, 2748 aio_t *aiop) 2749 { 2750 long index; 2751 aio_result_t *resultp = aio_reqp->aio_req_resultp; 2752 aio_req_t *current; 2753 aio_req_t **nextp; 2754 2755 index = AIO_HASH(resultp); 2756 nextp = &aiop->aio_hash[index]; 2757 while ((current = *nextp) != NULL) { 2758 if (current->aio_req_resultp == resultp) 2759 return (DUPLICATE); 2760 nextp = ¤t->aio_hash_next; 2761 } 2762 *nextp = aio_reqp; 2763 aio_reqp->aio_hash_next = NULL; 2764 return (0); 2765 } 2766 2767 static int 2768 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 2769 cred_t *) 2770 { 2771 struct snode *sp; 2772 dev_t dev; 2773 struct cb_ops *cb; 2774 major_t major; 2775 int (*aio_func)(); 2776 2777 dev = vp->v_rdev; 2778 major = getmajor(dev); 2779 2780 /* 2781 * return NULL for requests to files and STREAMs so 2782 * that libaio takes care of them. 2783 */ 2784 if (vp->v_type == VCHR) { 2785 /* no stream device for kaio */ 2786 if (STREAMSTAB(major)) { 2787 return (NULL); 2788 } 2789 } else { 2790 return (NULL); 2791 } 2792 2793 /* 2794 * Check old drivers which do not have async I/O entry points. 2795 */ 2796 if (devopsp[major]->devo_rev < 3) 2797 return (NULL); 2798 2799 cb = devopsp[major]->devo_cb_ops; 2800 2801 if (cb->cb_rev < 1) 2802 return (NULL); 2803 2804 /* 2805 * Check whether this device is a block device. 2806 * Kaio is not supported for devices like tty. 2807 */ 2808 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 2809 return (NULL); 2810 2811 /* 2812 * Clustering: If vnode is a PXFS vnode, then the device may be remote. 2813 * We cannot call the driver directly. Instead return the 2814 * PXFS functions. 2815 */ 2816 2817 if (IS_PXFSVP(vp)) { 2818 if (mode & FREAD) 2819 return (clpxfs_aio_read); 2820 else 2821 return (clpxfs_aio_write); 2822 } 2823 if (mode & FREAD) 2824 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 2825 else 2826 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 2827 2828 /* 2829 * Do we need this ? 2830 * nodev returns ENXIO anyway. 2831 */ 2832 if (aio_func == nodev) 2833 return (NULL); 2834 2835 sp = VTOS(vp); 2836 smark(sp, SACC); 2837 return (aio_func); 2838 } 2839 2840 /* 2841 * Clustering: We want check_vp to return a function prototyped 2842 * correctly that will be common to both PXFS and regular case. 2843 * We define this intermediate function that will do the right 2844 * thing for driver cases. 2845 */ 2846 2847 static int 2848 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2849 { 2850 dev_t dev; 2851 struct cb_ops *cb; 2852 2853 ASSERT(vp->v_type == VCHR); 2854 ASSERT(!IS_PXFSVP(vp)); 2855 dev = VTOS(vp)->s_dev; 2856 ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 2857 2858 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2859 2860 ASSERT(cb->cb_awrite != nodev); 2861 return ((*cb->cb_awrite)(dev, aio, cred_p)); 2862 } 2863 2864 /* 2865 * Clustering: We want check_vp to return a function prototyped 2866 * correctly that will be common to both PXFS and regular case. 2867 * We define this intermediate function that will do the right 2868 * thing for driver cases. 2869 */ 2870 2871 static int 2872 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2873 { 2874 dev_t dev; 2875 struct cb_ops *cb; 2876 2877 ASSERT(vp->v_type == VCHR); 2878 ASSERT(!IS_PXFSVP(vp)); 2879 dev = VTOS(vp)->s_dev; 2880 ASSERT(!STREAMSTAB(getmajor(dev))); 2881 2882 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2883 2884 ASSERT(cb->cb_aread != nodev); 2885 return ((*cb->cb_aread)(dev, aio, cred_p)); 2886 } 2887 2888 /* 2889 * This routine is called when a largefile call is made by a 32bit 2890 * process on a ILP32 or LP64 kernel. All 64bit processes are large 2891 * file by definition and will call alio() instead. 2892 */ 2893 static int 2894 alioLF( 2895 int mode_arg, 2896 void *aiocb_arg, 2897 int nent, 2898 void *sigev) 2899 { 2900 file_t *fp; 2901 file_t *prev_fp = NULL; 2902 int prev_mode = -1; 2903 struct vnode *vp; 2904 aio_lio_t *head; 2905 aio_req_t *reqp; 2906 aio_t *aiop; 2907 caddr_t cbplist; 2908 aiocb64_32_t cb64; 2909 aiocb64_32_t *aiocb = &cb64; 2910 aiocb64_32_t *cbp; 2911 caddr32_t *ucbp; 2912 #ifdef _LP64 2913 aiocb_t aiocb_n; 2914 #endif 2915 struct sigevent32 sigevk; 2916 sigqueue_t *sqp; 2917 int (*aio_func)(); 2918 int mode; 2919 int error = 0; 2920 int aio_errors = 0; 2921 int i; 2922 size_t ssize; 2923 int deadhead = 0; 2924 int aio_notsupported = 0; 2925 int lio_head_port; 2926 int aio_port; 2927 int aio_thread; 2928 port_kevent_t *pkevtp = NULL; 2929 int portused = 0; 2930 port_notify32_t pnotify; 2931 int event; 2932 2933 aiop = curproc->p_aio; 2934 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 2935 return (EINVAL); 2936 2937 ASSERT(get_udatamodel() == DATAMODEL_ILP32); 2938 2939 ssize = (sizeof (caddr32_t) * nent); 2940 cbplist = kmem_alloc(ssize, KM_SLEEP); 2941 ucbp = (caddr32_t *)cbplist; 2942 2943 if (copyin(aiocb_arg, cbplist, ssize) || 2944 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 2945 kmem_free(cbplist, ssize); 2946 return (EFAULT); 2947 } 2948 2949 /* Event Ports */ 2950 if (sigev && 2951 (sigevk.sigev_notify == SIGEV_THREAD || 2952 sigevk.sigev_notify == SIGEV_PORT)) { 2953 if (sigevk.sigev_notify == SIGEV_THREAD) { 2954 pnotify.portnfy_port = sigevk.sigev_signo; 2955 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 2956 } else if (copyin( 2957 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 2958 &pnotify, sizeof (pnotify))) { 2959 kmem_free(cbplist, ssize); 2960 return (EFAULT); 2961 } 2962 error = port_alloc_event(pnotify.portnfy_port, 2963 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 2964 if (error) { 2965 if (error == ENOMEM || error == EAGAIN) 2966 error = EAGAIN; 2967 else 2968 error = EINVAL; 2969 kmem_free(cbplist, ssize); 2970 return (error); 2971 } 2972 lio_head_port = pnotify.portnfy_port; 2973 portused = 1; 2974 } 2975 2976 /* 2977 * a list head should be allocated if notification is 2978 * enabled for this list. 2979 */ 2980 head = NULL; 2981 2982 if (mode_arg == LIO_WAIT || sigev) { 2983 mutex_enter(&aiop->aio_mutex); 2984 error = aio_lio_alloc(&head); 2985 mutex_exit(&aiop->aio_mutex); 2986 if (error) 2987 goto done; 2988 deadhead = 1; 2989 head->lio_nent = nent; 2990 head->lio_refcnt = nent; 2991 head->lio_port = -1; 2992 head->lio_portkev = NULL; 2993 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 2994 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 2995 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2996 if (sqp == NULL) { 2997 error = EAGAIN; 2998 goto done; 2999 } 3000 sqp->sq_func = NULL; 3001 sqp->sq_next = NULL; 3002 sqp->sq_info.si_code = SI_ASYNCIO; 3003 sqp->sq_info.si_pid = curproc->p_pid; 3004 sqp->sq_info.si_ctid = PRCTID(curproc); 3005 sqp->sq_info.si_zoneid = getzoneid(); 3006 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3007 sqp->sq_info.si_signo = sigevk.sigev_signo; 3008 sqp->sq_info.si_value.sival_int = 3009 sigevk.sigev_value.sival_int; 3010 head->lio_sigqp = sqp; 3011 } else { 3012 head->lio_sigqp = NULL; 3013 } 3014 if (pkevtp) { 3015 /* 3016 * Prepare data to send when list of aiocb's 3017 * has completed. 3018 */ 3019 port_init_event(pkevtp, (uintptr_t)sigev, 3020 (void *)(uintptr_t)pnotify.portnfy_user, 3021 NULL, head); 3022 pkevtp->portkev_events = AIOLIO64; 3023 head->lio_portkev = pkevtp; 3024 head->lio_port = pnotify.portnfy_port; 3025 } 3026 } 3027 3028 for (i = 0; i < nent; i++, ucbp++) { 3029 3030 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 3031 /* skip entry if it can't be copied. */ 3032 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 3033 if (head) { 3034 mutex_enter(&aiop->aio_mutex); 3035 head->lio_nent--; 3036 head->lio_refcnt--; 3037 mutex_exit(&aiop->aio_mutex); 3038 } 3039 continue; 3040 } 3041 3042 /* skip if opcode for aiocb is LIO_NOP */ 3043 mode = aiocb->aio_lio_opcode; 3044 if (mode == LIO_NOP) { 3045 cbp = NULL; 3046 if (head) { 3047 mutex_enter(&aiop->aio_mutex); 3048 head->lio_nent--; 3049 head->lio_refcnt--; 3050 mutex_exit(&aiop->aio_mutex); 3051 } 3052 continue; 3053 } 3054 3055 /* increment file descriptor's ref count. */ 3056 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3057 lio_set_uerror(&cbp->aio_resultp, EBADF); 3058 if (head) { 3059 mutex_enter(&aiop->aio_mutex); 3060 head->lio_nent--; 3061 head->lio_refcnt--; 3062 mutex_exit(&aiop->aio_mutex); 3063 } 3064 aio_errors++; 3065 continue; 3066 } 3067 3068 /* 3069 * check the permission of the partition 3070 */ 3071 if ((fp->f_flag & mode) == 0) { 3072 releasef(aiocb->aio_fildes); 3073 lio_set_uerror(&cbp->aio_resultp, EBADF); 3074 if (head) { 3075 mutex_enter(&aiop->aio_mutex); 3076 head->lio_nent--; 3077 head->lio_refcnt--; 3078 mutex_exit(&aiop->aio_mutex); 3079 } 3080 aio_errors++; 3081 continue; 3082 } 3083 3084 /* 3085 * common case where requests are to the same fd 3086 * for the same r/w operation 3087 * for UFS, need to set EBADFD 3088 */ 3089 vp = fp->f_vnode; 3090 if (fp != prev_fp || mode != prev_mode) { 3091 aio_func = check_vp(vp, mode); 3092 if (aio_func == NULL) { 3093 prev_fp = NULL; 3094 releasef(aiocb->aio_fildes); 3095 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3096 aio_notsupported++; 3097 if (head) { 3098 mutex_enter(&aiop->aio_mutex); 3099 head->lio_nent--; 3100 head->lio_refcnt--; 3101 mutex_exit(&aiop->aio_mutex); 3102 } 3103 continue; 3104 } else { 3105 prev_fp = fp; 3106 prev_mode = mode; 3107 } 3108 } 3109 3110 #ifdef _LP64 3111 aiocb_LFton(aiocb, &aiocb_n); 3112 error = aio_req_setup(&reqp, aiop, &aiocb_n, 3113 (aio_result_t *)&cbp->aio_resultp, vp); 3114 #else 3115 error = aio_req_setupLF(&reqp, aiop, aiocb, 3116 (aio_result_t *)&cbp->aio_resultp, vp); 3117 #endif /* _LP64 */ 3118 if (error) { 3119 releasef(aiocb->aio_fildes); 3120 lio_set_uerror(&cbp->aio_resultp, error); 3121 if (head) { 3122 mutex_enter(&aiop->aio_mutex); 3123 head->lio_nent--; 3124 head->lio_refcnt--; 3125 mutex_exit(&aiop->aio_mutex); 3126 } 3127 aio_errors++; 3128 continue; 3129 } 3130 3131 reqp->aio_req_lio = head; 3132 deadhead = 0; 3133 3134 /* 3135 * Set the errno field now before sending the request to 3136 * the driver to avoid a race condition 3137 */ 3138 (void) suword32(&cbp->aio_resultp.aio_errno, 3139 EINPROGRESS); 3140 3141 reqp->aio_req_iocb.iocb32 = *ucbp; 3142 3143 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 3144 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3145 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3146 if (aio_port | aio_thread) { 3147 port_kevent_t *lpkevp; 3148 /* 3149 * Prepare data to send with each aiocb completed. 3150 */ 3151 if (aio_port) { 3152 void *paddr = (void *)(uintptr_t) 3153 aiocb->aio_sigevent.sigev_value.sival_ptr; 3154 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3155 error = EFAULT; 3156 } else { /* aio_thread */ 3157 pnotify.portnfy_port = 3158 aiocb->aio_sigevent.sigev_signo; 3159 pnotify.portnfy_user = 3160 aiocb->aio_sigevent.sigev_value.sival_ptr; 3161 } 3162 if (error) 3163 /* EMPTY */; 3164 else if (pkevtp != NULL && 3165 pnotify.portnfy_port == lio_head_port) 3166 error = port_dup_event(pkevtp, &lpkevp, 3167 PORT_ALLOC_DEFAULT); 3168 else 3169 error = port_alloc_event(pnotify.portnfy_port, 3170 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3171 &lpkevp); 3172 if (error == 0) { 3173 port_init_event(lpkevp, (uintptr_t)*ucbp, 3174 (void *)(uintptr_t)pnotify.portnfy_user, 3175 aio_port_callback, reqp); 3176 lpkevp->portkev_events = event; 3177 reqp->aio_req_portkev = lpkevp; 3178 reqp->aio_req_port = pnotify.portnfy_port; 3179 } 3180 } 3181 3182 /* 3183 * send the request to driver. 3184 */ 3185 if (error == 0) { 3186 if (aiocb->aio_nbytes == 0) { 3187 clear_active_fd(aiocb->aio_fildes); 3188 aio_zerolen(reqp); 3189 continue; 3190 } 3191 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3192 CRED()); 3193 } 3194 3195 /* 3196 * the fd's ref count is not decremented until the IO has 3197 * completed unless there was an error. 3198 */ 3199 if (error) { 3200 releasef(aiocb->aio_fildes); 3201 lio_set_uerror(&cbp->aio_resultp, error); 3202 if (head) { 3203 mutex_enter(&aiop->aio_mutex); 3204 head->lio_nent--; 3205 head->lio_refcnt--; 3206 mutex_exit(&aiop->aio_mutex); 3207 } 3208 if (error == ENOTSUP) 3209 aio_notsupported++; 3210 else 3211 aio_errors++; 3212 lio_set_error(reqp, portused); 3213 } else { 3214 clear_active_fd(aiocb->aio_fildes); 3215 } 3216 } 3217 3218 if (aio_notsupported) { 3219 error = ENOTSUP; 3220 } else if (aio_errors) { 3221 /* 3222 * return EIO if any request failed 3223 */ 3224 error = EIO; 3225 } 3226 3227 if (mode_arg == LIO_WAIT) { 3228 mutex_enter(&aiop->aio_mutex); 3229 while (head->lio_refcnt > 0) { 3230 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3231 mutex_exit(&aiop->aio_mutex); 3232 error = EINTR; 3233 goto done; 3234 } 3235 } 3236 mutex_exit(&aiop->aio_mutex); 3237 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 3238 } 3239 3240 done: 3241 kmem_free(cbplist, ssize); 3242 if (deadhead) { 3243 if (head->lio_sigqp) 3244 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3245 if (head->lio_portkev) 3246 port_free_event(head->lio_portkev); 3247 kmem_free(head, sizeof (aio_lio_t)); 3248 } 3249 return (error); 3250 } 3251 3252 #ifdef _SYSCALL32_IMPL 3253 static void 3254 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 3255 { 3256 dest->aio_fildes = src->aio_fildes; 3257 dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 3258 dest->aio_nbytes = (size_t)src->aio_nbytes; 3259 dest->aio_offset = (off_t)src->aio_offset; 3260 dest->aio_reqprio = src->aio_reqprio; 3261 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3262 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3263 3264 /* 3265 * See comment in sigqueue32() on handling of 32-bit 3266 * sigvals in a 64-bit kernel. 3267 */ 3268 dest->aio_sigevent.sigev_value.sival_int = 3269 (int)src->aio_sigevent.sigev_value.sival_int; 3270 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3271 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3272 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3273 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3274 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3275 dest->aio_lio_opcode = src->aio_lio_opcode; 3276 dest->aio_state = src->aio_state; 3277 dest->aio__pad[0] = src->aio__pad[0]; 3278 } 3279 #endif 3280 3281 /* 3282 * This function is used only for largefile calls made by 3283 * 32 bit applications. 3284 */ 3285 static int 3286 aio_req_setupLF( 3287 aio_req_t **reqpp, 3288 aio_t *aiop, 3289 aiocb64_32_t *arg, 3290 aio_result_t *resultp, 3291 vnode_t *vp) 3292 { 3293 sigqueue_t *sqp = NULL; 3294 aio_req_t *reqp; 3295 struct uio *uio; 3296 struct sigevent32 *sigev; 3297 int error; 3298 3299 sigev = &arg->aio_sigevent; 3300 if (sigev->sigev_notify == SIGEV_SIGNAL && 3301 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 3302 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3303 if (sqp == NULL) 3304 return (EAGAIN); 3305 sqp->sq_func = NULL; 3306 sqp->sq_next = NULL; 3307 sqp->sq_info.si_code = SI_ASYNCIO; 3308 sqp->sq_info.si_pid = curproc->p_pid; 3309 sqp->sq_info.si_ctid = PRCTID(curproc); 3310 sqp->sq_info.si_zoneid = getzoneid(); 3311 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3312 sqp->sq_info.si_signo = sigev->sigev_signo; 3313 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 3314 } 3315 3316 mutex_enter(&aiop->aio_mutex); 3317 3318 if (aiop->aio_flags & AIO_REQ_BLOCK) { 3319 mutex_exit(&aiop->aio_mutex); 3320 if (sqp) 3321 kmem_free(sqp, sizeof (sigqueue_t)); 3322 return (EIO); 3323 } 3324 /* 3325 * get an aio_reqp from the free list or allocate one 3326 * from dynamic memory. 3327 */ 3328 if (error = aio_req_alloc(&reqp, resultp)) { 3329 mutex_exit(&aiop->aio_mutex); 3330 if (sqp) 3331 kmem_free(sqp, sizeof (sigqueue_t)); 3332 return (error); 3333 } 3334 aiop->aio_pending++; 3335 aiop->aio_outstanding++; 3336 reqp->aio_req_flags = AIO_PENDING; 3337 if (sigev->sigev_notify == SIGEV_THREAD || 3338 sigev->sigev_notify == SIGEV_PORT) 3339 aio_enq(&aiop->aio_portpending, reqp, 0); 3340 mutex_exit(&aiop->aio_mutex); 3341 /* 3342 * initialize aio request. 3343 */ 3344 reqp->aio_req_fd = arg->aio_fildes; 3345 reqp->aio_req_sigqp = sqp; 3346 reqp->aio_req_iocb.iocb = NULL; 3347 reqp->aio_req_lio = NULL; 3348 reqp->aio_req_buf.b_file = vp; 3349 uio = reqp->aio_req.aio_uio; 3350 uio->uio_iovcnt = 1; 3351 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 3352 uio->uio_iov->iov_len = arg->aio_nbytes; 3353 uio->uio_loffset = arg->aio_offset; 3354 *reqpp = reqp; 3355 return (0); 3356 } 3357 3358 /* 3359 * This routine is called when a non largefile call is made by a 32bit 3360 * process on a ILP32 or LP64 kernel. 3361 */ 3362 static int 3363 alio32( 3364 int mode_arg, 3365 void *aiocb_arg, 3366 int nent, 3367 void *sigev) 3368 { 3369 file_t *fp; 3370 file_t *prev_fp = NULL; 3371 int prev_mode = -1; 3372 struct vnode *vp; 3373 aio_lio_t *head; 3374 aio_req_t *reqp; 3375 aio_t *aiop; 3376 caddr_t cbplist; 3377 aiocb_t cb; 3378 aiocb_t *aiocb = &cb; 3379 #ifdef _LP64 3380 aiocb32_t *cbp; 3381 caddr32_t *ucbp; 3382 aiocb32_t cb32; 3383 aiocb32_t *aiocb32 = &cb32; 3384 struct sigevent32 sigevk; 3385 #else 3386 aiocb_t *cbp, **ucbp; 3387 struct sigevent sigevk; 3388 #endif 3389 sigqueue_t *sqp; 3390 int (*aio_func)(); 3391 int mode; 3392 int error = 0; 3393 int aio_errors = 0; 3394 int i; 3395 size_t ssize; 3396 int deadhead = 0; 3397 int aio_notsupported = 0; 3398 int lio_head_port; 3399 int aio_port; 3400 int aio_thread; 3401 port_kevent_t *pkevtp = NULL; 3402 int portused = 0; 3403 #ifdef _LP64 3404 port_notify32_t pnotify; 3405 #else 3406 port_notify_t pnotify; 3407 #endif 3408 int event; 3409 3410 aiop = curproc->p_aio; 3411 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 3412 return (EINVAL); 3413 3414 #ifdef _LP64 3415 ssize = (sizeof (caddr32_t) * nent); 3416 #else 3417 ssize = (sizeof (aiocb_t *) * nent); 3418 #endif 3419 cbplist = kmem_alloc(ssize, KM_SLEEP); 3420 ucbp = (void *)cbplist; 3421 3422 if (copyin(aiocb_arg, cbplist, ssize) || 3423 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 3424 kmem_free(cbplist, ssize); 3425 return (EFAULT); 3426 } 3427 3428 /* Event Ports */ 3429 if (sigev && 3430 (sigevk.sigev_notify == SIGEV_THREAD || 3431 sigevk.sigev_notify == SIGEV_PORT)) { 3432 if (sigevk.sigev_notify == SIGEV_THREAD) { 3433 pnotify.portnfy_port = sigevk.sigev_signo; 3434 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 3435 } else if (copyin( 3436 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 3437 &pnotify, sizeof (pnotify))) { 3438 kmem_free(cbplist, ssize); 3439 return (EFAULT); 3440 } 3441 error = port_alloc_event(pnotify.portnfy_port, 3442 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 3443 if (error) { 3444 if (error == ENOMEM || error == EAGAIN) 3445 error = EAGAIN; 3446 else 3447 error = EINVAL; 3448 kmem_free(cbplist, ssize); 3449 return (error); 3450 } 3451 lio_head_port = pnotify.portnfy_port; 3452 portused = 1; 3453 } 3454 3455 /* 3456 * a list head should be allocated if notification is 3457 * enabled for this list. 3458 */ 3459 head = NULL; 3460 3461 if (mode_arg == LIO_WAIT || sigev) { 3462 mutex_enter(&aiop->aio_mutex); 3463 error = aio_lio_alloc(&head); 3464 mutex_exit(&aiop->aio_mutex); 3465 if (error) 3466 goto done; 3467 deadhead = 1; 3468 head->lio_nent = nent; 3469 head->lio_refcnt = nent; 3470 head->lio_port = -1; 3471 head->lio_portkev = NULL; 3472 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 3473 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3474 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3475 if (sqp == NULL) { 3476 error = EAGAIN; 3477 goto done; 3478 } 3479 sqp->sq_func = NULL; 3480 sqp->sq_next = NULL; 3481 sqp->sq_info.si_code = SI_ASYNCIO; 3482 sqp->sq_info.si_pid = curproc->p_pid; 3483 sqp->sq_info.si_ctid = PRCTID(curproc); 3484 sqp->sq_info.si_zoneid = getzoneid(); 3485 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3486 sqp->sq_info.si_signo = sigevk.sigev_signo; 3487 sqp->sq_info.si_value.sival_int = 3488 sigevk.sigev_value.sival_int; 3489 head->lio_sigqp = sqp; 3490 } else { 3491 head->lio_sigqp = NULL; 3492 } 3493 if (pkevtp) { 3494 /* 3495 * Prepare data to send when list of aiocb's has 3496 * completed. 3497 */ 3498 port_init_event(pkevtp, (uintptr_t)sigev, 3499 (void *)(uintptr_t)pnotify.portnfy_user, 3500 NULL, head); 3501 pkevtp->portkev_events = AIOLIO; 3502 head->lio_portkev = pkevtp; 3503 head->lio_port = pnotify.portnfy_port; 3504 } 3505 } 3506 3507 for (i = 0; i < nent; i++, ucbp++) { 3508 3509 /* skip entry if it can't be copied. */ 3510 #ifdef _LP64 3511 cbp = (aiocb32_t *)(uintptr_t)*ucbp; 3512 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 3513 #else 3514 cbp = (aiocb_t *)*ucbp; 3515 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 3516 #endif 3517 { 3518 if (head) { 3519 mutex_enter(&aiop->aio_mutex); 3520 head->lio_nent--; 3521 head->lio_refcnt--; 3522 mutex_exit(&aiop->aio_mutex); 3523 } 3524 continue; 3525 } 3526 #ifdef _LP64 3527 /* 3528 * copy 32 bit structure into 64 bit structure 3529 */ 3530 aiocb_32ton(aiocb32, aiocb); 3531 #endif /* _LP64 */ 3532 3533 /* skip if opcode for aiocb is LIO_NOP */ 3534 mode = aiocb->aio_lio_opcode; 3535 if (mode == LIO_NOP) { 3536 cbp = NULL; 3537 if (head) { 3538 mutex_enter(&aiop->aio_mutex); 3539 head->lio_nent--; 3540 head->lio_refcnt--; 3541 mutex_exit(&aiop->aio_mutex); 3542 } 3543 continue; 3544 } 3545 3546 /* increment file descriptor's ref count. */ 3547 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3548 lio_set_uerror(&cbp->aio_resultp, EBADF); 3549 if (head) { 3550 mutex_enter(&aiop->aio_mutex); 3551 head->lio_nent--; 3552 head->lio_refcnt--; 3553 mutex_exit(&aiop->aio_mutex); 3554 } 3555 aio_errors++; 3556 continue; 3557 } 3558 3559 /* 3560 * check the permission of the partition 3561 */ 3562 if ((fp->f_flag & mode) == 0) { 3563 releasef(aiocb->aio_fildes); 3564 lio_set_uerror(&cbp->aio_resultp, EBADF); 3565 if (head) { 3566 mutex_enter(&aiop->aio_mutex); 3567 head->lio_nent--; 3568 head->lio_refcnt--; 3569 mutex_exit(&aiop->aio_mutex); 3570 } 3571 aio_errors++; 3572 continue; 3573 } 3574 3575 /* 3576 * common case where requests are to the same fd 3577 * for the same r/w operation 3578 * for UFS, need to set EBADFD 3579 */ 3580 vp = fp->f_vnode; 3581 if (fp != prev_fp || mode != prev_mode) { 3582 aio_func = check_vp(vp, mode); 3583 if (aio_func == NULL) { 3584 prev_fp = NULL; 3585 releasef(aiocb->aio_fildes); 3586 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3587 aio_notsupported++; 3588 if (head) { 3589 mutex_enter(&aiop->aio_mutex); 3590 head->lio_nent--; 3591 head->lio_refcnt--; 3592 mutex_exit(&aiop->aio_mutex); 3593 } 3594 continue; 3595 } else { 3596 prev_fp = fp; 3597 prev_mode = mode; 3598 } 3599 } 3600 3601 error = aio_req_setup(&reqp, aiop, aiocb, 3602 (aio_result_t *)&cbp->aio_resultp, vp); 3603 if (error) { 3604 releasef(aiocb->aio_fildes); 3605 lio_set_uerror(&cbp->aio_resultp, error); 3606 if (head) { 3607 mutex_enter(&aiop->aio_mutex); 3608 head->lio_nent--; 3609 head->lio_refcnt--; 3610 mutex_exit(&aiop->aio_mutex); 3611 } 3612 aio_errors++; 3613 continue; 3614 } 3615 3616 reqp->aio_req_lio = head; 3617 deadhead = 0; 3618 3619 /* 3620 * Set the errno field now before sending the request to 3621 * the driver to avoid a race condition 3622 */ 3623 (void) suword32(&cbp->aio_resultp.aio_errno, 3624 EINPROGRESS); 3625 3626 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 3627 3628 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 3629 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3630 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3631 if (aio_port | aio_thread) { 3632 port_kevent_t *lpkevp; 3633 /* 3634 * Prepare data to send with each aiocb completed. 3635 */ 3636 #ifdef _LP64 3637 if (aio_port) { 3638 void *paddr = (void *)(uintptr_t) 3639 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3640 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3641 error = EFAULT; 3642 } else { /* aio_thread */ 3643 pnotify.portnfy_port = 3644 aiocb32->aio_sigevent.sigev_signo; 3645 pnotify.portnfy_user = 3646 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3647 } 3648 #else 3649 if (aio_port) { 3650 void *paddr = 3651 aiocb->aio_sigevent.sigev_value.sival_ptr; 3652 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3653 error = EFAULT; 3654 } else { /* aio_thread */ 3655 pnotify.portnfy_port = 3656 aiocb->aio_sigevent.sigev_signo; 3657 pnotify.portnfy_user = 3658 aiocb->aio_sigevent.sigev_value.sival_ptr; 3659 } 3660 #endif 3661 if (error) 3662 /* EMPTY */; 3663 else if (pkevtp != NULL && 3664 pnotify.portnfy_port == lio_head_port) 3665 error = port_dup_event(pkevtp, &lpkevp, 3666 PORT_ALLOC_DEFAULT); 3667 else 3668 error = port_alloc_event(pnotify.portnfy_port, 3669 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3670 &lpkevp); 3671 if (error == 0) { 3672 port_init_event(lpkevp, (uintptr_t)cbp, 3673 (void *)(uintptr_t)pnotify.portnfy_user, 3674 aio_port_callback, reqp); 3675 lpkevp->portkev_events = event; 3676 reqp->aio_req_portkev = lpkevp; 3677 reqp->aio_req_port = pnotify.portnfy_port; 3678 } 3679 } 3680 3681 /* 3682 * send the request to driver. 3683 */ 3684 if (error == 0) { 3685 if (aiocb->aio_nbytes == 0) { 3686 clear_active_fd(aiocb->aio_fildes); 3687 aio_zerolen(reqp); 3688 continue; 3689 } 3690 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3691 CRED()); 3692 } 3693 3694 /* 3695 * the fd's ref count is not decremented until the IO has 3696 * completed unless there was an error. 3697 */ 3698 if (error) { 3699 releasef(aiocb->aio_fildes); 3700 lio_set_uerror(&cbp->aio_resultp, error); 3701 if (head) { 3702 mutex_enter(&aiop->aio_mutex); 3703 head->lio_nent--; 3704 head->lio_refcnt--; 3705 mutex_exit(&aiop->aio_mutex); 3706 } 3707 if (error == ENOTSUP) 3708 aio_notsupported++; 3709 else 3710 aio_errors++; 3711 lio_set_error(reqp, portused); 3712 } else { 3713 clear_active_fd(aiocb->aio_fildes); 3714 } 3715 } 3716 3717 if (aio_notsupported) { 3718 error = ENOTSUP; 3719 } else if (aio_errors) { 3720 /* 3721 * return EIO if any request failed 3722 */ 3723 error = EIO; 3724 } 3725 3726 if (mode_arg == LIO_WAIT) { 3727 mutex_enter(&aiop->aio_mutex); 3728 while (head->lio_refcnt > 0) { 3729 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3730 mutex_exit(&aiop->aio_mutex); 3731 error = EINTR; 3732 goto done; 3733 } 3734 } 3735 mutex_exit(&aiop->aio_mutex); 3736 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 3737 } 3738 3739 done: 3740 kmem_free(cbplist, ssize); 3741 if (deadhead) { 3742 if (head->lio_sigqp) 3743 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3744 if (head->lio_portkev) 3745 port_free_event(head->lio_portkev); 3746 kmem_free(head, sizeof (aio_lio_t)); 3747 } 3748 return (error); 3749 } 3750 3751 3752 #ifdef _SYSCALL32_IMPL 3753 void 3754 aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 3755 { 3756 dest->aio_fildes = src->aio_fildes; 3757 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 3758 dest->aio_nbytes = (size_t)src->aio_nbytes; 3759 dest->aio_offset = (off_t)src->aio_offset; 3760 dest->aio_reqprio = src->aio_reqprio; 3761 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3762 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3763 3764 /* 3765 * See comment in sigqueue32() on handling of 32-bit 3766 * sigvals in a 64-bit kernel. 3767 */ 3768 dest->aio_sigevent.sigev_value.sival_int = 3769 (int)src->aio_sigevent.sigev_value.sival_int; 3770 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3771 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3772 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3773 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3774 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3775 dest->aio_lio_opcode = src->aio_lio_opcode; 3776 dest->aio_state = src->aio_state; 3777 dest->aio__pad[0] = src->aio__pad[0]; 3778 } 3779 #endif /* _SYSCALL32_IMPL */ 3780 3781 /* 3782 * aio_port_callback() is called just before the event is retrieved from the 3783 * port. The task of this callback function is to finish the work of the 3784 * transaction for the application, it means : 3785 * - copyout transaction data to the application 3786 * (this thread is running in the right process context) 3787 * - keep trace of the transaction (update of counters). 3788 * - free allocated buffers 3789 * The aiocb pointer is the object element of the port_kevent_t structure. 3790 * 3791 * flag : 3792 * PORT_CALLBACK_DEFAULT : do copyout and free resources 3793 * PORT_CALLBACK_CLOSE : don't do copyout, free resources 3794 */ 3795 3796 /*ARGSUSED*/ 3797 int 3798 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 3799 { 3800 aio_t *aiop = curproc->p_aio; 3801 aio_req_t *reqp = arg; 3802 struct iovec *iov; 3803 struct buf *bp; 3804 void *resultp; 3805 3806 if (pid != curproc->p_pid) { 3807 /* wrong proc !!, can not deliver data here ... */ 3808 return (EACCES); 3809 } 3810 3811 mutex_enter(&aiop->aio_portq_mutex); 3812 reqp->aio_req_portkev = NULL; 3813 aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 3814 mutex_exit(&aiop->aio_portq_mutex); 3815 aphysio_unlock(reqp); /* unlock used pages */ 3816 mutex_enter(&aiop->aio_mutex); 3817 if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 3818 aio_req_free_port(aiop, reqp); /* back to free list */ 3819 mutex_exit(&aiop->aio_mutex); 3820 return (0); 3821 } 3822 3823 iov = reqp->aio_req_uio.uio_iov; 3824 bp = &reqp->aio_req_buf; 3825 resultp = (void *)reqp->aio_req_resultp; 3826 aio_req_free_port(aiop, reqp); /* request struct back to free list */ 3827 mutex_exit(&aiop->aio_mutex); 3828 if (flag == PORT_CALLBACK_DEFAULT) 3829 aio_copyout_result_port(iov, bp, resultp); 3830 return (0); 3831 } 3832