1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Kernel asynchronous I/O. 29 * This is only for raw devices now (as of Nov. 1993). 30 */ 31 32 #include <sys/types.h> 33 #include <sys/errno.h> 34 #include <sys/conf.h> 35 #include <sys/file.h> 36 #include <sys/fs/snode.h> 37 #include <sys/unistd.h> 38 #include <sys/cmn_err.h> 39 #include <vm/as.h> 40 #include <vm/faultcode.h> 41 #include <sys/sysmacros.h> 42 #include <sys/procfs.h> 43 #include <sys/kmem.h> 44 #include <sys/autoconf.h> 45 #include <sys/ddi_impldefs.h> 46 #include <sys/sunddi.h> 47 #include <sys/aio_impl.h> 48 #include <sys/debug.h> 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/vmsystm.h> 52 #include <sys/fs/pxfs_ki.h> 53 #include <sys/contract/process_impl.h> 54 55 /* 56 * external entry point. 57 */ 58 #ifdef _LP64 59 static int64_t kaioc(long, long, long, long, long, long); 60 #endif 61 static int kaio(ulong_t *, rval_t *); 62 63 64 #define AIO_64 0 65 #define AIO_32 1 66 #define AIO_LARGEFILE 2 67 68 /* 69 * implementation specific functions (private) 70 */ 71 #ifdef _LP64 72 static int alio(int, aiocb_t **, int, struct sigevent *); 73 #endif 74 static int aionotify(void); 75 static int aioinit(void); 76 static int aiostart(void); 77 static void alio_cleanup(aio_t *, aiocb_t **, int, int); 78 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 79 cred_t *); 80 static void lio_set_error(aio_req_t *, int portused); 81 static aio_t *aio_aiop_alloc(); 82 static int aio_req_alloc(aio_req_t **, aio_result_t *); 83 static int aio_lio_alloc(aio_lio_t **); 84 static aio_req_t *aio_req_done(void *); 85 static aio_req_t *aio_req_remove(aio_req_t *); 86 static int aio_req_find(aio_result_t *, aio_req_t **); 87 static int aio_hash_insert(struct aio_req_t *, aio_t *); 88 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 89 aio_result_t *, vnode_t *); 90 static int aio_cleanup_thread(aio_t *); 91 static aio_lio_t *aio_list_get(aio_result_t *); 92 static void lio_set_uerror(void *, int); 93 extern void aio_zerolen(aio_req_t *); 94 static int aiowait(struct timeval *, int, long *); 95 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 96 static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 97 aio_req_t *reqlist, aio_t *aiop, model_t model); 98 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 99 static int aiosuspend(void *, int, struct timespec *, int, 100 long *, int); 101 static int aliowait(int, void *, int, void *, int); 102 static int aioerror(void *, int); 103 static int aio_cancel(int, void *, long *, int); 104 static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 105 static int aiorw(int, void *, int, int); 106 107 static int alioLF(int, void *, int, void *); 108 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 109 aio_result_t *, vnode_t *); 110 static int alio32(int, void *, int, void *); 111 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 112 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 113 114 #ifdef _SYSCALL32_IMPL 115 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 116 void aiocb_32ton(aiocb32_t *, aiocb_t *); 117 #endif /* _SYSCALL32_IMPL */ 118 119 /* 120 * implementation specific functions (external) 121 */ 122 void aio_req_free(aio_t *, aio_req_t *); 123 124 /* 125 * Event Port framework 126 */ 127 128 void aio_req_free_port(aio_t *, aio_req_t *); 129 static int aio_port_callback(void *, int *, pid_t, int, void *); 130 131 /* 132 * This is the loadable module wrapper. 133 */ 134 #include <sys/modctl.h> 135 #include <sys/syscall.h> 136 137 #ifdef _LP64 138 139 static struct sysent kaio_sysent = { 140 6, 141 SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 142 (int (*)())kaioc 143 }; 144 145 #ifdef _SYSCALL32_IMPL 146 static struct sysent kaio_sysent32 = { 147 7, 148 SE_NOUNLOAD | SE_64RVAL, 149 kaio 150 }; 151 #endif /* _SYSCALL32_IMPL */ 152 153 #else /* _LP64 */ 154 155 static struct sysent kaio_sysent = { 156 7, 157 SE_NOUNLOAD | SE_32RVAL1, 158 kaio 159 }; 160 161 #endif /* _LP64 */ 162 163 /* 164 * Module linkage information for the kernel. 165 */ 166 167 static struct modlsys modlsys = { 168 &mod_syscallops, 169 "kernel Async I/O", 170 &kaio_sysent 171 }; 172 173 #ifdef _SYSCALL32_IMPL 174 static struct modlsys modlsys32 = { 175 &mod_syscallops32, 176 "kernel Async I/O for 32 bit compatibility", 177 &kaio_sysent32 178 }; 179 #endif /* _SYSCALL32_IMPL */ 180 181 182 static struct modlinkage modlinkage = { 183 MODREV_1, 184 &modlsys, 185 #ifdef _SYSCALL32_IMPL 186 &modlsys32, 187 #endif 188 NULL 189 }; 190 191 int 192 _init(void) 193 { 194 int retval; 195 196 if ((retval = mod_install(&modlinkage)) != 0) 197 return (retval); 198 199 return (0); 200 } 201 202 int 203 _fini(void) 204 { 205 int retval; 206 207 retval = mod_remove(&modlinkage); 208 209 return (retval); 210 } 211 212 int 213 _info(struct modinfo *modinfop) 214 { 215 return (mod_info(&modlinkage, modinfop)); 216 } 217 218 #ifdef _LP64 219 static int64_t 220 kaioc( 221 long a0, 222 long a1, 223 long a2, 224 long a3, 225 long a4, 226 long a5) 227 { 228 int error; 229 long rval = 0; 230 231 switch ((int)a0 & ~AIO_POLL_BIT) { 232 case AIOREAD: 233 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 234 (offset_t)a4, (aio_result_t *)a5, FREAD); 235 break; 236 case AIOWRITE: 237 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 238 (offset_t)a4, (aio_result_t *)a5, FWRITE); 239 break; 240 case AIOWAIT: 241 error = aiowait((struct timeval *)a1, (int)a2, &rval); 242 break; 243 case AIOWAITN: 244 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 245 (timespec_t *)a4); 246 break; 247 case AIONOTIFY: 248 error = aionotify(); 249 break; 250 case AIOINIT: 251 error = aioinit(); 252 break; 253 case AIOSTART: 254 error = aiostart(); 255 break; 256 case AIOLIO: 257 error = alio((int)a1, (aiocb_t **)a2, (int)a3, 258 (struct sigevent *)a4); 259 break; 260 case AIOLIOWAIT: 261 error = aliowait((int)a1, (void *)a2, (int)a3, 262 (struct sigevent *)a4, AIO_64); 263 break; 264 case AIOSUSPEND: 265 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 266 (int)a4, &rval, AIO_64); 267 break; 268 case AIOERROR: 269 error = aioerror((void *)a1, AIO_64); 270 break; 271 case AIOAREAD: 272 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 273 break; 274 case AIOAWRITE: 275 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 276 break; 277 case AIOCANCEL: 278 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 279 break; 280 281 /* 282 * The large file related stuff is valid only for 283 * 32 bit kernel and not for 64 bit kernel 284 * On 64 bit kernel we convert large file calls 285 * to regular 64bit calls. 286 */ 287 288 default: 289 error = EINVAL; 290 } 291 if (error) 292 return ((int64_t)set_errno(error)); 293 return (rval); 294 } 295 #endif 296 297 static int 298 kaio( 299 ulong_t *uap, 300 rval_t *rvp) 301 { 302 long rval = 0; 303 int error = 0; 304 offset_t off; 305 306 307 rvp->r_vals = 0; 308 #if defined(_LITTLE_ENDIAN) 309 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 310 #else 311 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 312 #endif 313 314 switch (uap[0] & ~AIO_POLL_BIT) { 315 /* 316 * It must be the 32 bit system call on 64 bit kernel 317 */ 318 case AIOREAD: 319 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 320 (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 321 case AIOWRITE: 322 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 323 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 324 case AIOWAIT: 325 error = aiowait((struct timeval *)uap[1], (int)uap[2], 326 &rval); 327 break; 328 case AIOWAITN: 329 error = aiowaitn((void *)uap[1], (uint_t)uap[2], 330 (uint_t *)uap[3], (timespec_t *)uap[4]); 331 break; 332 case AIONOTIFY: 333 return (aionotify()); 334 case AIOINIT: 335 return (aioinit()); 336 case AIOSTART: 337 return (aiostart()); 338 case AIOLIO: 339 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 340 (void *)uap[4])); 341 case AIOLIOWAIT: 342 return (aliowait((int)uap[1], (void *)uap[2], 343 (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 344 case AIOSUSPEND: 345 error = aiosuspend((void *)uap[1], (int)uap[2], 346 (timespec_t *)uap[3], (int)uap[4], 347 &rval, AIO_32); 348 break; 349 case AIOERROR: 350 return (aioerror((void *)uap[1], AIO_32)); 351 case AIOAREAD: 352 return (aiorw((int)uap[0], (void *)uap[1], 353 FREAD, AIO_32)); 354 case AIOAWRITE: 355 return (aiorw((int)uap[0], (void *)uap[1], 356 FWRITE, AIO_32)); 357 case AIOCANCEL: 358 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 359 AIO_32)); 360 break; 361 case AIOLIO64: 362 return (alioLF((int)uap[1], (void *)uap[2], 363 (int)uap[3], (void *)uap[4])); 364 case AIOLIOWAIT64: 365 return (aliowait(uap[1], (void *)uap[2], 366 (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 367 case AIOSUSPEND64: 368 error = aiosuspend((void *)uap[1], (int)uap[2], 369 (timespec_t *)uap[3], (int)uap[4], &rval, 370 AIO_LARGEFILE); 371 break; 372 case AIOERROR64: 373 return (aioerror((void *)uap[1], AIO_LARGEFILE)); 374 case AIOAREAD64: 375 return (aiorw((int)uap[0], (void *)uap[1], FREAD, 376 AIO_LARGEFILE)); 377 case AIOAWRITE64: 378 return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 379 AIO_LARGEFILE)); 380 case AIOCANCEL64: 381 error = (aio_cancel((int)uap[1], (void *)uap[2], 382 &rval, AIO_LARGEFILE)); 383 break; 384 default: 385 return (EINVAL); 386 } 387 388 rvp->r_val1 = rval; 389 return (error); 390 } 391 392 /* 393 * wake up LWPs in this process that are sleeping in 394 * aiowait(). 395 */ 396 static int 397 aionotify(void) 398 { 399 aio_t *aiop; 400 401 aiop = curproc->p_aio; 402 if (aiop == NULL) 403 return (0); 404 405 mutex_enter(&aiop->aio_mutex); 406 aiop->aio_notifycnt++; 407 cv_broadcast(&aiop->aio_waitcv); 408 mutex_exit(&aiop->aio_mutex); 409 410 return (0); 411 } 412 413 static int 414 timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 415 timestruc_t **rqtp, int *blocking) 416 { 417 #ifdef _SYSCALL32_IMPL 418 struct timeval32 wait_time_32; 419 #endif 420 struct timeval wait_time; 421 model_t model = get_udatamodel(); 422 423 *rqtp = NULL; 424 if (timout == NULL) { /* wait indefinitely */ 425 *blocking = 1; 426 return (0); 427 } 428 429 /* 430 * Need to correctly compare with the -1 passed in for a user 431 * address pointer, with both 32 bit and 64 bit apps. 432 */ 433 if (model == DATAMODEL_NATIVE) { 434 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 435 *blocking = 0; 436 return (0); 437 } 438 439 if (copyin(timout, &wait_time, sizeof (wait_time))) 440 return (EFAULT); 441 } 442 #ifdef _SYSCALL32_IMPL 443 else { 444 /* 445 * -1 from a 32bit app. It will not get sign extended. 446 * don't wait if -1. 447 */ 448 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 449 *blocking = 0; 450 return (0); 451 } 452 453 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 454 return (EFAULT); 455 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 456 } 457 #endif /* _SYSCALL32_IMPL */ 458 459 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 460 *blocking = 0; 461 return (0); 462 } 463 464 if (wait_time.tv_sec < 0 || 465 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 466 return (EINVAL); 467 468 rqtime->tv_sec = wait_time.tv_sec; 469 rqtime->tv_nsec = wait_time.tv_usec * 1000; 470 *rqtp = rqtime; 471 *blocking = 1; 472 473 return (0); 474 } 475 476 static int 477 timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 478 timestruc_t **rqtp, int *blocking) 479 { 480 #ifdef _SYSCALL32_IMPL 481 timespec32_t wait_time_32; 482 #endif 483 model_t model = get_udatamodel(); 484 485 *rqtp = NULL; 486 if (timout == NULL) { 487 *blocking = 1; 488 return (0); 489 } 490 491 if (model == DATAMODEL_NATIVE) { 492 if (copyin(timout, rqtime, sizeof (*rqtime))) 493 return (EFAULT); 494 } 495 #ifdef _SYSCALL32_IMPL 496 else { 497 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 498 return (EFAULT); 499 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 500 } 501 #endif /* _SYSCALL32_IMPL */ 502 503 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 504 *blocking = 0; 505 return (0); 506 } 507 508 if (rqtime->tv_sec < 0 || 509 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 510 return (EINVAL); 511 512 *rqtp = rqtime; 513 *blocking = 1; 514 515 return (0); 516 } 517 518 /*ARGSUSED*/ 519 static int 520 aiowait( 521 struct timeval *timout, 522 int dontblockflg, 523 long *rval) 524 { 525 int error; 526 aio_t *aiop; 527 aio_req_t *reqp; 528 clock_t status; 529 int blocking; 530 int timecheck; 531 timestruc_t rqtime; 532 timestruc_t *rqtp; 533 534 aiop = curproc->p_aio; 535 if (aiop == NULL) 536 return (EINVAL); 537 538 /* 539 * Establish the absolute future time for the timeout. 540 */ 541 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 542 if (error) 543 return (error); 544 if (rqtp) { 545 timestruc_t now; 546 timecheck = timechanged; 547 gethrestime(&now); 548 timespecadd(rqtp, &now); 549 } 550 551 mutex_enter(&aiop->aio_mutex); 552 for (;;) { 553 /* process requests on poll queue */ 554 if (aiop->aio_pollq) { 555 mutex_exit(&aiop->aio_mutex); 556 aio_cleanup(0); 557 mutex_enter(&aiop->aio_mutex); 558 } 559 if ((reqp = aio_req_remove(NULL)) != NULL) { 560 *rval = (long)reqp->aio_req_resultp; 561 break; 562 } 563 /* user-level done queue might not be empty */ 564 if (aiop->aio_notifycnt > 0) { 565 aiop->aio_notifycnt--; 566 *rval = 1; 567 break; 568 } 569 /* don't block if no outstanding aio */ 570 if (aiop->aio_outstanding == 0 && dontblockflg) { 571 error = EINVAL; 572 break; 573 } 574 if (blocking) { 575 status = cv_waituntil_sig(&aiop->aio_waitcv, 576 &aiop->aio_mutex, rqtp, timecheck); 577 578 if (status > 0) /* check done queue again */ 579 continue; 580 if (status == 0) { /* interrupted by a signal */ 581 error = EINTR; 582 *rval = -1; 583 } else { /* timer expired */ 584 error = ETIME; 585 } 586 } 587 break; 588 } 589 mutex_exit(&aiop->aio_mutex); 590 if (reqp) { 591 aphysio_unlock(reqp); 592 aio_copyout_result(reqp); 593 mutex_enter(&aiop->aio_mutex); 594 aio_req_free(aiop, reqp); 595 mutex_exit(&aiop->aio_mutex); 596 } 597 return (error); 598 } 599 600 /* 601 * aiowaitn can be used to reap completed asynchronous requests submitted with 602 * lio_listio, aio_read or aio_write. 603 * This function only reaps asynchronous raw I/Os. 604 */ 605 606 /*ARGSUSED*/ 607 static int 608 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 609 { 610 int error = 0; 611 aio_t *aiop; 612 aio_req_t *reqlist = NULL; 613 caddr_t iocblist = NULL; /* array of iocb ptr's */ 614 uint_t waitcnt, cnt = 0; /* iocb cnt */ 615 size_t iocbsz; /* users iocb size */ 616 size_t riocbsz; /* returned iocb size */ 617 int iocb_index = 0; 618 model_t model = get_udatamodel(); 619 int blocking = 1; 620 int timecheck; 621 timestruc_t rqtime; 622 timestruc_t *rqtp; 623 624 aiop = curproc->p_aio; 625 if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX) 626 return (EINVAL); 627 628 if (aiop->aio_outstanding == 0) 629 return (EAGAIN); 630 631 if (copyin(nwait, &waitcnt, sizeof (uint_t))) 632 return (EFAULT); 633 634 /* set *nwait to zero, if we must return prematurely */ 635 if (copyout(&cnt, nwait, sizeof (uint_t))) 636 return (EFAULT); 637 638 if (waitcnt == 0) { 639 blocking = 0; 640 rqtp = NULL; 641 waitcnt = nent; 642 } else { 643 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 644 if (error) 645 return (error); 646 } 647 648 if (model == DATAMODEL_NATIVE) 649 iocbsz = (sizeof (aiocb_t *) * nent); 650 #ifdef _SYSCALL32_IMPL 651 else 652 iocbsz = (sizeof (caddr32_t) * nent); 653 #endif /* _SYSCALL32_IMPL */ 654 655 /* 656 * Only one aio_waitn call is allowed at a time. 657 * The active aio_waitn will collect all requests 658 * out of the "done" list and if necessary it will wait 659 * for some/all pending requests to fulfill the nwait 660 * parameter. 661 * A second or further aio_waitn calls will sleep here 662 * until the active aio_waitn finishes and leaves the kernel 663 * If the second call does not block (poll), then return 664 * immediately with the error code : EAGAIN. 665 * If the second call should block, then sleep here, but 666 * do not touch the timeout. The timeout starts when this 667 * aio_waitn-call becomes active. 668 */ 669 670 mutex_enter(&aiop->aio_mutex); 671 672 while (aiop->aio_flags & AIO_WAITN) { 673 if (blocking == 0) { 674 mutex_exit(&aiop->aio_mutex); 675 return (EAGAIN); 676 } 677 678 /* block, no timeout */ 679 aiop->aio_flags |= AIO_WAITN_PENDING; 680 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 681 mutex_exit(&aiop->aio_mutex); 682 return (EINTR); 683 } 684 } 685 686 /* 687 * Establish the absolute future time for the timeout. 688 */ 689 if (rqtp) { 690 timestruc_t now; 691 timecheck = timechanged; 692 gethrestime(&now); 693 timespecadd(rqtp, &now); 694 } 695 696 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 697 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 698 aiop->aio_iocb = NULL; 699 } 700 701 if (aiop->aio_iocb == NULL) { 702 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 703 if (iocblist == NULL) { 704 mutex_exit(&aiop->aio_mutex); 705 return (ENOMEM); 706 } 707 aiop->aio_iocb = (aiocb_t **)iocblist; 708 aiop->aio_iocbsz = iocbsz; 709 } else { 710 iocblist = (char *)aiop->aio_iocb; 711 } 712 713 aiop->aio_waitncnt = waitcnt; 714 aiop->aio_flags |= AIO_WAITN; 715 716 for (;;) { 717 /* push requests on poll queue to done queue */ 718 if (aiop->aio_pollq) { 719 mutex_exit(&aiop->aio_mutex); 720 aio_cleanup(0); 721 mutex_enter(&aiop->aio_mutex); 722 } 723 724 /* check for requests on done queue */ 725 if (aiop->aio_doneq) { 726 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 727 aiop->aio_waitncnt = waitcnt - cnt; 728 } 729 730 /* user-level done queue might not be empty */ 731 if (aiop->aio_notifycnt > 0) { 732 aiop->aio_notifycnt--; 733 error = 0; 734 break; 735 } 736 737 /* 738 * if we are here second time as a result of timer 739 * expiration, we reset error if there are enough 740 * aiocb's to satisfy request. 741 * We return also if all requests are already done 742 * and we picked up the whole done queue. 743 */ 744 745 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 746 aiop->aio_doneq == NULL)) { 747 error = 0; 748 break; 749 } 750 751 if ((cnt < waitcnt) && blocking) { 752 int rval = cv_waituntil_sig(&aiop->aio_waitcv, 753 &aiop->aio_mutex, rqtp, timecheck); 754 if (rval > 0) 755 continue; 756 if (rval < 0) { 757 error = ETIME; 758 blocking = 0; 759 continue; 760 } 761 error = EINTR; 762 } 763 break; 764 } 765 766 mutex_exit(&aiop->aio_mutex); 767 768 if (cnt > 0) { 769 770 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 771 aiop, model); 772 773 if (model == DATAMODEL_NATIVE) 774 riocbsz = (sizeof (aiocb_t *) * cnt); 775 #ifdef _SYSCALL32_IMPL 776 else 777 riocbsz = (sizeof (caddr32_t) * cnt); 778 #endif /* _SYSCALL32_IMPL */ 779 780 if (copyout(iocblist, uiocb, riocbsz) || 781 copyout(&cnt, nwait, sizeof (uint_t))) 782 error = EFAULT; 783 } 784 785 /* check if there is another thread waiting for execution */ 786 mutex_enter(&aiop->aio_mutex); 787 aiop->aio_flags &= ~AIO_WAITN; 788 if (aiop->aio_flags & AIO_WAITN_PENDING) { 789 aiop->aio_flags &= ~AIO_WAITN_PENDING; 790 cv_signal(&aiop->aio_waitncv); 791 } 792 mutex_exit(&aiop->aio_mutex); 793 794 return (error); 795 } 796 797 /* 798 * aio_unlock_requests 799 * copyouts the result of the request as well as the return value. 800 * It builds the list of completed asynchronous requests, 801 * unlocks the allocated memory ranges and 802 * put the aio request structure back into the free list. 803 */ 804 805 static int 806 aio_unlock_requests( 807 caddr_t iocblist, 808 int iocb_index, 809 aio_req_t *reqlist, 810 aio_t *aiop, 811 model_t model) 812 { 813 aio_req_t *reqp, *nreqp; 814 815 if (model == DATAMODEL_NATIVE) { 816 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 817 (((caddr_t *)iocblist)[iocb_index++]) = 818 reqp->aio_req_iocb.iocb; 819 nreqp = reqp->aio_req_next; 820 aphysio_unlock(reqp); 821 aio_copyout_result(reqp); 822 mutex_enter(&aiop->aio_mutex); 823 aio_req_free(aiop, reqp); 824 mutex_exit(&aiop->aio_mutex); 825 } 826 } 827 #ifdef _SYSCALL32_IMPL 828 else { 829 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 830 ((caddr32_t *)iocblist)[iocb_index++] = 831 reqp->aio_req_iocb.iocb32; 832 nreqp = reqp->aio_req_next; 833 aphysio_unlock(reqp); 834 aio_copyout_result(reqp); 835 mutex_enter(&aiop->aio_mutex); 836 aio_req_free(aiop, reqp); 837 mutex_exit(&aiop->aio_mutex); 838 } 839 } 840 #endif /* _SYSCALL32_IMPL */ 841 return (iocb_index); 842 } 843 844 /* 845 * aio_reqlist_concat 846 * moves "max" elements from the done queue to the reqlist queue and removes 847 * the AIO_DONEQ flag. 848 * - reqlist queue is a simple linked list 849 * - done queue is a double linked list 850 */ 851 852 static int 853 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 854 { 855 aio_req_t *q2, *q2work, *list; 856 int count = 0; 857 858 list = *reqlist; 859 q2 = aiop->aio_doneq; 860 q2work = q2; 861 while (max-- > 0) { 862 q2work->aio_req_flags &= ~AIO_DONEQ; 863 q2work = q2work->aio_req_next; 864 count++; 865 if (q2work == q2) 866 break; 867 } 868 869 if (q2work == q2) { 870 /* all elements revised */ 871 q2->aio_req_prev->aio_req_next = list; 872 list = q2; 873 aiop->aio_doneq = NULL; 874 } else { 875 /* 876 * max < elements in the doneq 877 * detach only the required amount of elements 878 * out of the doneq 879 */ 880 q2work->aio_req_prev->aio_req_next = list; 881 list = q2; 882 883 aiop->aio_doneq = q2work; 884 q2work->aio_req_prev = q2->aio_req_prev; 885 q2->aio_req_prev->aio_req_next = q2work; 886 } 887 *reqlist = list; 888 return (count); 889 } 890 891 /*ARGSUSED*/ 892 static int 893 aiosuspend( 894 void *aiocb, 895 int nent, 896 struct timespec *timout, 897 int flag, 898 long *rval, 899 int run_mode) 900 { 901 int error; 902 aio_t *aiop; 903 aio_req_t *reqp, *found, *next; 904 caddr_t cbplist = NULL; 905 aiocb_t *cbp, **ucbp; 906 #ifdef _SYSCALL32_IMPL 907 aiocb32_t *cbp32; 908 caddr32_t *ucbp32; 909 #endif /* _SYSCALL32_IMPL */ 910 aiocb64_32_t *cbp64; 911 int rv; 912 int i; 913 size_t ssize; 914 model_t model = get_udatamodel(); 915 int blocking; 916 int timecheck; 917 timestruc_t rqtime; 918 timestruc_t *rqtp; 919 920 aiop = curproc->p_aio; 921 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 922 return (EINVAL); 923 924 /* 925 * Establish the absolute future time for the timeout. 926 */ 927 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 928 if (error) 929 return (error); 930 if (rqtp) { 931 timestruc_t now; 932 timecheck = timechanged; 933 gethrestime(&now); 934 timespecadd(rqtp, &now); 935 } 936 937 /* 938 * If we are not blocking and there's no IO complete 939 * skip aiocb copyin. 940 */ 941 if (!blocking && (aiop->aio_pollq == NULL) && 942 (aiop->aio_doneq == NULL)) { 943 return (EAGAIN); 944 } 945 946 if (model == DATAMODEL_NATIVE) 947 ssize = (sizeof (aiocb_t *) * nent); 948 #ifdef _SYSCALL32_IMPL 949 else 950 ssize = (sizeof (caddr32_t) * nent); 951 #endif /* _SYSCALL32_IMPL */ 952 953 cbplist = kmem_alloc(ssize, KM_NOSLEEP); 954 if (cbplist == NULL) 955 return (ENOMEM); 956 957 if (copyin(aiocb, cbplist, ssize)) { 958 error = EFAULT; 959 goto done; 960 } 961 962 found = NULL; 963 /* 964 * we need to get the aio_cleanupq_mutex since we call 965 * aio_req_done(). 966 */ 967 mutex_enter(&aiop->aio_cleanupq_mutex); 968 mutex_enter(&aiop->aio_mutex); 969 for (;;) { 970 /* push requests on poll queue to done queue */ 971 if (aiop->aio_pollq) { 972 mutex_exit(&aiop->aio_mutex); 973 mutex_exit(&aiop->aio_cleanupq_mutex); 974 aio_cleanup(0); 975 mutex_enter(&aiop->aio_cleanupq_mutex); 976 mutex_enter(&aiop->aio_mutex); 977 } 978 /* check for requests on done queue */ 979 if (aiop->aio_doneq) { 980 if (model == DATAMODEL_NATIVE) 981 ucbp = (aiocb_t **)cbplist; 982 #ifdef _SYSCALL32_IMPL 983 else 984 ucbp32 = (caddr32_t *)cbplist; 985 #endif /* _SYSCALL32_IMPL */ 986 for (i = 0; i < nent; i++) { 987 if (model == DATAMODEL_NATIVE) { 988 if ((cbp = *ucbp++) == NULL) 989 continue; 990 if (run_mode != AIO_LARGEFILE) 991 reqp = aio_req_done( 992 &cbp->aio_resultp); 993 else { 994 cbp64 = (aiocb64_32_t *)cbp; 995 reqp = aio_req_done( 996 &cbp64->aio_resultp); 997 } 998 } 999 #ifdef _SYSCALL32_IMPL 1000 else { 1001 if (run_mode == AIO_32) { 1002 if ((cbp32 = 1003 (aiocb32_t *)(uintptr_t) 1004 *ucbp32++) == NULL) 1005 continue; 1006 reqp = aio_req_done( 1007 &cbp32->aio_resultp); 1008 } else if (run_mode == AIO_LARGEFILE) { 1009 if ((cbp64 = 1010 (aiocb64_32_t *)(uintptr_t) 1011 *ucbp32++) == NULL) 1012 continue; 1013 reqp = aio_req_done( 1014 &cbp64->aio_resultp); 1015 } 1016 1017 } 1018 #endif /* _SYSCALL32_IMPL */ 1019 if (reqp) { 1020 reqp->aio_req_next = found; 1021 found = reqp; 1022 } 1023 if (aiop->aio_doneq == NULL) 1024 break; 1025 } 1026 if (found) 1027 break; 1028 } 1029 if (aiop->aio_notifycnt > 0) { 1030 /* 1031 * nothing on the kernel's queue. the user 1032 * has notified the kernel that it has items 1033 * on a user-level queue. 1034 */ 1035 aiop->aio_notifycnt--; 1036 *rval = 1; 1037 error = 0; 1038 break; 1039 } 1040 /* don't block if nothing is outstanding */ 1041 if (aiop->aio_outstanding == 0) { 1042 error = EAGAIN; 1043 break; 1044 } 1045 if (blocking) { 1046 /* 1047 * drop the aio_cleanupq_mutex as we are 1048 * going to block. 1049 */ 1050 mutex_exit(&aiop->aio_cleanupq_mutex); 1051 rv = cv_waituntil_sig(&aiop->aio_waitcv, 1052 &aiop->aio_mutex, rqtp, timecheck); 1053 /* 1054 * we have to drop aio_mutex and 1055 * grab it in the right order. 1056 */ 1057 mutex_exit(&aiop->aio_mutex); 1058 mutex_enter(&aiop->aio_cleanupq_mutex); 1059 mutex_enter(&aiop->aio_mutex); 1060 if (rv > 0) /* check done queue again */ 1061 continue; 1062 if (rv == 0) /* interrupted by a signal */ 1063 error = EINTR; 1064 else /* timer expired */ 1065 error = ETIME; 1066 } else { 1067 error = EAGAIN; 1068 } 1069 break; 1070 } 1071 mutex_exit(&aiop->aio_mutex); 1072 mutex_exit(&aiop->aio_cleanupq_mutex); 1073 for (reqp = found; reqp != NULL; reqp = next) { 1074 next = reqp->aio_req_next; 1075 aphysio_unlock(reqp); 1076 aio_copyout_result(reqp); 1077 mutex_enter(&aiop->aio_mutex); 1078 aio_req_free(aiop, reqp); 1079 mutex_exit(&aiop->aio_mutex); 1080 } 1081 done: 1082 kmem_free(cbplist, ssize); 1083 return (error); 1084 } 1085 1086 /* 1087 * initialize aio by allocating an aio_t struct for this 1088 * process. 1089 */ 1090 static int 1091 aioinit(void) 1092 { 1093 proc_t *p = curproc; 1094 aio_t *aiop; 1095 mutex_enter(&p->p_lock); 1096 if ((aiop = p->p_aio) == NULL) { 1097 aiop = aio_aiop_alloc(); 1098 p->p_aio = aiop; 1099 } 1100 mutex_exit(&p->p_lock); 1101 if (aiop == NULL) 1102 return (ENOMEM); 1103 return (0); 1104 } 1105 1106 /* 1107 * start a special thread that will cleanup after aio requests 1108 * that are preventing a segment from being unmapped. as_unmap() 1109 * blocks until all phsyio to this segment is completed. this 1110 * doesn't happen until all the pages in this segment are not 1111 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 1112 * requests still outstanding. this special thread will make sure 1113 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 1114 * 1115 * this function will return an error if the process has only 1116 * one LWP. the assumption is that the caller is a separate LWP 1117 * that remains blocked in the kernel for the life of this process. 1118 */ 1119 static int 1120 aiostart(void) 1121 { 1122 proc_t *p = curproc; 1123 aio_t *aiop; 1124 int first, error = 0; 1125 1126 if (p->p_lwpcnt == 1) 1127 return (EDEADLK); 1128 mutex_enter(&p->p_lock); 1129 if ((aiop = p->p_aio) == NULL) 1130 error = EINVAL; 1131 else { 1132 first = aiop->aio_ok; 1133 if (aiop->aio_ok == 0) 1134 aiop->aio_ok = 1; 1135 } 1136 mutex_exit(&p->p_lock); 1137 if (error == 0 && first == 0) { 1138 return (aio_cleanup_thread(aiop)); 1139 /* should return only to exit */ 1140 } 1141 return (error); 1142 } 1143 1144 /* 1145 * Associate an aiocb with a port. 1146 * This function is used by aiorw() to associate a transaction with a port. 1147 * Allocate an event port structure (port_alloc_event()) and store the 1148 * delivered user pointer (portnfy_user) in the portkev_user field of the 1149 * port_kevent_t structure.. 1150 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1151 * the port association. 1152 */ 1153 1154 static int 1155 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 1156 aio_req_t *reqp, int event) 1157 { 1158 port_kevent_t *pkevp = NULL; 1159 int error; 1160 1161 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 1162 PORT_SOURCE_AIO, &pkevp); 1163 if (error) { 1164 if ((error == ENOMEM) || (error == EAGAIN)) 1165 error = EAGAIN; 1166 else 1167 error = EINVAL; 1168 } else { 1169 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 1170 aio_port_callback, reqp); 1171 pkevp->portkev_events = event; 1172 reqp->aio_req_portkev = pkevp; 1173 reqp->aio_req_port = pntfy->portnfy_port; 1174 } 1175 return (error); 1176 } 1177 1178 #ifdef _LP64 1179 1180 /* 1181 * Asynchronous list IO. A chain of aiocb's are copied in 1182 * one at a time. If the aiocb is invalid, it is skipped. 1183 * For each aiocb, the appropriate driver entry point is 1184 * called. Optimize for the common case where the list 1185 * of requests is to the same file descriptor. 1186 * 1187 * One possible optimization is to define a new driver entry 1188 * point that supports a list of IO requests. Whether this 1189 * improves performance depends somewhat on the driver's 1190 * locking strategy. Processing a list could adversely impact 1191 * the driver's interrupt latency. 1192 */ 1193 static int 1194 alio( 1195 int mode_arg, 1196 aiocb_t **aiocb_arg, 1197 int nent, 1198 struct sigevent *sigev) 1199 { 1200 file_t *fp; 1201 file_t *prev_fp = NULL; 1202 int prev_mode = -1; 1203 struct vnode *vp; 1204 aio_lio_t *head; 1205 aio_req_t *reqp; 1206 aio_t *aiop; 1207 caddr_t cbplist; 1208 aiocb_t cb; 1209 aiocb_t *aiocb = &cb; 1210 aiocb_t *cbp; 1211 aiocb_t **ucbp; 1212 struct sigevent sigevk; 1213 sigqueue_t *sqp; 1214 int (*aio_func)(); 1215 int mode; 1216 int error = 0; 1217 int aio_errors = 0; 1218 int i; 1219 size_t ssize; 1220 int deadhead = 0; 1221 int aio_notsupported = 0; 1222 int lio_head_port; 1223 int aio_port; 1224 int aio_thread; 1225 port_kevent_t *pkevtp = NULL; 1226 int portused = 0; 1227 port_notify_t pnotify; 1228 int event; 1229 1230 aiop = curproc->p_aio; 1231 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1232 return (EINVAL); 1233 1234 ssize = (sizeof (aiocb_t *) * nent); 1235 cbplist = kmem_alloc(ssize, KM_SLEEP); 1236 ucbp = (aiocb_t **)cbplist; 1237 1238 if (copyin(aiocb_arg, cbplist, ssize) || 1239 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 1240 kmem_free(cbplist, ssize); 1241 return (EFAULT); 1242 } 1243 1244 /* Event Ports */ 1245 if (sigev && 1246 (sigevk.sigev_notify == SIGEV_THREAD || 1247 sigevk.sigev_notify == SIGEV_PORT)) { 1248 if (sigevk.sigev_notify == SIGEV_THREAD) { 1249 pnotify.portnfy_port = sigevk.sigev_signo; 1250 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 1251 } else if (copyin(sigevk.sigev_value.sival_ptr, 1252 &pnotify, sizeof (pnotify))) { 1253 kmem_free(cbplist, ssize); 1254 return (EFAULT); 1255 } 1256 error = port_alloc_event(pnotify.portnfy_port, 1257 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 1258 if (error) { 1259 if (error == ENOMEM || error == EAGAIN) 1260 error = EAGAIN; 1261 else 1262 error = EINVAL; 1263 kmem_free(cbplist, ssize); 1264 return (error); 1265 } 1266 lio_head_port = pnotify.portnfy_port; 1267 portused = 1; 1268 } 1269 1270 /* 1271 * a list head should be allocated if notification is 1272 * enabled for this list. 1273 */ 1274 head = NULL; 1275 1276 if (mode_arg == LIO_WAIT || sigev) { 1277 mutex_enter(&aiop->aio_mutex); 1278 error = aio_lio_alloc(&head); 1279 mutex_exit(&aiop->aio_mutex); 1280 if (error) 1281 goto done; 1282 deadhead = 1; 1283 head->lio_nent = nent; 1284 head->lio_refcnt = nent; 1285 head->lio_port = -1; 1286 head->lio_portkev = NULL; 1287 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 1288 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 1289 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 1290 if (sqp == NULL) { 1291 error = EAGAIN; 1292 goto done; 1293 } 1294 sqp->sq_func = NULL; 1295 sqp->sq_next = NULL; 1296 sqp->sq_info.si_code = SI_ASYNCIO; 1297 sqp->sq_info.si_pid = curproc->p_pid; 1298 sqp->sq_info.si_ctid = PRCTID(curproc); 1299 sqp->sq_info.si_zoneid = getzoneid(); 1300 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 1301 sqp->sq_info.si_signo = sigevk.sigev_signo; 1302 sqp->sq_info.si_value = sigevk.sigev_value; 1303 head->lio_sigqp = sqp; 1304 } else { 1305 head->lio_sigqp = NULL; 1306 } 1307 if (pkevtp) { 1308 /* 1309 * Prepare data to send when list of aiocb's 1310 * has completed. 1311 */ 1312 port_init_event(pkevtp, (uintptr_t)sigev, 1313 (void *)(uintptr_t)pnotify.portnfy_user, 1314 NULL, head); 1315 pkevtp->portkev_events = AIOLIO; 1316 head->lio_portkev = pkevtp; 1317 head->lio_port = pnotify.portnfy_port; 1318 } 1319 } 1320 1321 for (i = 0; i < nent; i++, ucbp++) { 1322 1323 cbp = *ucbp; 1324 /* skip entry if it can't be copied. */ 1325 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 1326 if (head) { 1327 mutex_enter(&aiop->aio_mutex); 1328 head->lio_nent--; 1329 head->lio_refcnt--; 1330 mutex_exit(&aiop->aio_mutex); 1331 } 1332 continue; 1333 } 1334 1335 /* skip if opcode for aiocb is LIO_NOP */ 1336 mode = aiocb->aio_lio_opcode; 1337 if (mode == LIO_NOP) { 1338 cbp = NULL; 1339 if (head) { 1340 mutex_enter(&aiop->aio_mutex); 1341 head->lio_nent--; 1342 head->lio_refcnt--; 1343 mutex_exit(&aiop->aio_mutex); 1344 } 1345 continue; 1346 } 1347 1348 /* increment file descriptor's ref count. */ 1349 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 1350 lio_set_uerror(&cbp->aio_resultp, EBADF); 1351 if (head) { 1352 mutex_enter(&aiop->aio_mutex); 1353 head->lio_nent--; 1354 head->lio_refcnt--; 1355 mutex_exit(&aiop->aio_mutex); 1356 } 1357 aio_errors++; 1358 continue; 1359 } 1360 1361 /* 1362 * check the permission of the partition 1363 */ 1364 if ((fp->f_flag & mode) == 0) { 1365 releasef(aiocb->aio_fildes); 1366 lio_set_uerror(&cbp->aio_resultp, EBADF); 1367 if (head) { 1368 mutex_enter(&aiop->aio_mutex); 1369 head->lio_nent--; 1370 head->lio_refcnt--; 1371 mutex_exit(&aiop->aio_mutex); 1372 } 1373 aio_errors++; 1374 continue; 1375 } 1376 1377 /* 1378 * common case where requests are to the same fd 1379 * for the same r/w operation. 1380 * for UFS, need to set EBADFD 1381 */ 1382 vp = fp->f_vnode; 1383 if (fp != prev_fp || mode != prev_mode) { 1384 aio_func = check_vp(vp, mode); 1385 if (aio_func == NULL) { 1386 prev_fp = NULL; 1387 releasef(aiocb->aio_fildes); 1388 lio_set_uerror(&cbp->aio_resultp, EBADFD); 1389 aio_notsupported++; 1390 if (head) { 1391 mutex_enter(&aiop->aio_mutex); 1392 head->lio_nent--; 1393 head->lio_refcnt--; 1394 mutex_exit(&aiop->aio_mutex); 1395 } 1396 continue; 1397 } else { 1398 prev_fp = fp; 1399 prev_mode = mode; 1400 } 1401 } 1402 1403 error = aio_req_setup(&reqp, aiop, aiocb, 1404 &cbp->aio_resultp, vp); 1405 if (error) { 1406 releasef(aiocb->aio_fildes); 1407 lio_set_uerror(&cbp->aio_resultp, error); 1408 if (head) { 1409 mutex_enter(&aiop->aio_mutex); 1410 head->lio_nent--; 1411 head->lio_refcnt--; 1412 mutex_exit(&aiop->aio_mutex); 1413 } 1414 aio_errors++; 1415 continue; 1416 } 1417 1418 reqp->aio_req_lio = head; 1419 deadhead = 0; 1420 1421 /* 1422 * Set the errno field now before sending the request to 1423 * the driver to avoid a race condition 1424 */ 1425 (void) suword32(&cbp->aio_resultp.aio_errno, 1426 EINPROGRESS); 1427 1428 reqp->aio_req_iocb.iocb = (caddr_t)cbp; 1429 1430 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 1431 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 1432 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 1433 if (aio_port | aio_thread) { 1434 port_kevent_t *lpkevp; 1435 /* 1436 * Prepare data to send with each aiocb completed. 1437 */ 1438 if (aio_port) { 1439 void *paddr = 1440 aiocb->aio_sigevent.sigev_value.sival_ptr; 1441 if (copyin(paddr, &pnotify, sizeof (pnotify))) 1442 error = EFAULT; 1443 } else { /* aio_thread */ 1444 pnotify.portnfy_port = 1445 aiocb->aio_sigevent.sigev_signo; 1446 pnotify.portnfy_user = 1447 aiocb->aio_sigevent.sigev_value.sival_ptr; 1448 } 1449 if (error) 1450 /* EMPTY */; 1451 else if (pkevtp != NULL && 1452 pnotify.portnfy_port == lio_head_port) 1453 error = port_dup_event(pkevtp, &lpkevp, 1454 PORT_ALLOC_DEFAULT); 1455 else 1456 error = port_alloc_event(pnotify.portnfy_port, 1457 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 1458 &lpkevp); 1459 if (error == 0) { 1460 port_init_event(lpkevp, (uintptr_t)cbp, 1461 (void *)(uintptr_t)pnotify.portnfy_user, 1462 aio_port_callback, reqp); 1463 lpkevp->portkev_events = event; 1464 reqp->aio_req_portkev = lpkevp; 1465 reqp->aio_req_port = pnotify.portnfy_port; 1466 } 1467 } 1468 1469 /* 1470 * send the request to driver. 1471 */ 1472 if (error == 0) { 1473 if (aiocb->aio_nbytes == 0) { 1474 clear_active_fd(aiocb->aio_fildes); 1475 aio_zerolen(reqp); 1476 continue; 1477 } 1478 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 1479 CRED()); 1480 } 1481 1482 /* 1483 * the fd's ref count is not decremented until the IO has 1484 * completed unless there was an error. 1485 */ 1486 if (error) { 1487 releasef(aiocb->aio_fildes); 1488 lio_set_uerror(&cbp->aio_resultp, error); 1489 if (head) { 1490 mutex_enter(&aiop->aio_mutex); 1491 head->lio_nent--; 1492 head->lio_refcnt--; 1493 mutex_exit(&aiop->aio_mutex); 1494 } 1495 if (error == ENOTSUP) 1496 aio_notsupported++; 1497 else 1498 aio_errors++; 1499 lio_set_error(reqp, portused); 1500 } else { 1501 clear_active_fd(aiocb->aio_fildes); 1502 } 1503 } 1504 1505 if (aio_notsupported) { 1506 error = ENOTSUP; 1507 } else if (aio_errors) { 1508 /* 1509 * return EIO if any request failed 1510 */ 1511 error = EIO; 1512 } 1513 1514 if (mode_arg == LIO_WAIT) { 1515 mutex_enter(&aiop->aio_mutex); 1516 while (head->lio_refcnt > 0) { 1517 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1518 mutex_exit(&aiop->aio_mutex); 1519 error = EINTR; 1520 goto done; 1521 } 1522 } 1523 mutex_exit(&aiop->aio_mutex); 1524 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 1525 } 1526 1527 done: 1528 kmem_free(cbplist, ssize); 1529 if (deadhead) { 1530 if (head->lio_sigqp) 1531 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 1532 if (head->lio_portkev) 1533 port_free_event(head->lio_portkev); 1534 kmem_free(head, sizeof (aio_lio_t)); 1535 } 1536 return (error); 1537 } 1538 1539 #endif /* _LP64 */ 1540 1541 /* 1542 * Asynchronous list IO. 1543 * If list I/O is called with LIO_WAIT it can still return 1544 * before all the I/O's are completed if a signal is caught 1545 * or if the list include UFS I/O requests. If this happens, 1546 * libaio will call aliowait() to wait for the I/O's to 1547 * complete 1548 */ 1549 /*ARGSUSED*/ 1550 static int 1551 aliowait( 1552 int mode, 1553 void *aiocb, 1554 int nent, 1555 void *sigev, 1556 int run_mode) 1557 { 1558 aio_lio_t *head; 1559 aio_t *aiop; 1560 caddr_t cbplist; 1561 aiocb_t *cbp, **ucbp; 1562 #ifdef _SYSCALL32_IMPL 1563 aiocb32_t *cbp32; 1564 caddr32_t *ucbp32; 1565 aiocb64_32_t *cbp64; 1566 #endif 1567 int error = 0; 1568 int i; 1569 size_t ssize = 0; 1570 model_t model = get_udatamodel(); 1571 1572 aiop = curproc->p_aio; 1573 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1574 return (EINVAL); 1575 1576 if (model == DATAMODEL_NATIVE) 1577 ssize = (sizeof (aiocb_t *) * nent); 1578 #ifdef _SYSCALL32_IMPL 1579 else 1580 ssize = (sizeof (caddr32_t) * nent); 1581 #endif /* _SYSCALL32_IMPL */ 1582 1583 if (ssize == 0) 1584 return (EINVAL); 1585 1586 cbplist = kmem_alloc(ssize, KM_SLEEP); 1587 1588 if (model == DATAMODEL_NATIVE) 1589 ucbp = (aiocb_t **)cbplist; 1590 #ifdef _SYSCALL32_IMPL 1591 else 1592 ucbp32 = (caddr32_t *)cbplist; 1593 #endif /* _SYSCALL32_IMPL */ 1594 1595 if (copyin(aiocb, cbplist, ssize)) { 1596 error = EFAULT; 1597 goto done; 1598 } 1599 1600 /* 1601 * To find the list head, we go through the 1602 * list of aiocb structs, find the request 1603 * its for, then get the list head that reqp 1604 * points to 1605 */ 1606 head = NULL; 1607 1608 for (i = 0; i < nent; i++) { 1609 if (model == DATAMODEL_NATIVE) { 1610 /* 1611 * Since we are only checking for a NULL pointer 1612 * Following should work on both native data sizes 1613 * as well as for largefile aiocb. 1614 */ 1615 if ((cbp = *ucbp++) == NULL) 1616 continue; 1617 if (run_mode != AIO_LARGEFILE) 1618 if (head = aio_list_get(&cbp->aio_resultp)) 1619 break; 1620 else { 1621 /* 1622 * This is a case when largefile call is 1623 * made on 32 bit kernel. 1624 * Treat each pointer as pointer to 1625 * aiocb64_32 1626 */ 1627 if (head = aio_list_get((aio_result_t *) 1628 &(((aiocb64_32_t *)cbp)->aio_resultp))) 1629 break; 1630 } 1631 } 1632 #ifdef _SYSCALL32_IMPL 1633 else { 1634 if (run_mode == AIO_LARGEFILE) { 1635 if ((cbp64 = (aiocb64_32_t *) 1636 (uintptr_t)*ucbp32++) == NULL) 1637 continue; 1638 if (head = aio_list_get((aio_result_t *) 1639 &cbp64->aio_resultp)) 1640 break; 1641 } else if (run_mode == AIO_32) { 1642 if ((cbp32 = (aiocb32_t *) 1643 (uintptr_t)*ucbp32++) == NULL) 1644 continue; 1645 if (head = aio_list_get((aio_result_t *) 1646 &cbp32->aio_resultp)) 1647 break; 1648 } 1649 } 1650 #endif /* _SYSCALL32_IMPL */ 1651 } 1652 1653 if (head == NULL) { 1654 error = EINVAL; 1655 goto done; 1656 } 1657 1658 mutex_enter(&aiop->aio_mutex); 1659 while (head->lio_refcnt > 0) { 1660 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1661 mutex_exit(&aiop->aio_mutex); 1662 error = EINTR; 1663 goto done; 1664 } 1665 } 1666 mutex_exit(&aiop->aio_mutex); 1667 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 1668 done: 1669 kmem_free(cbplist, ssize); 1670 return (error); 1671 } 1672 1673 aio_lio_t * 1674 aio_list_get(aio_result_t *resultp) 1675 { 1676 aio_lio_t *head = NULL; 1677 aio_t *aiop; 1678 aio_req_t **bucket; 1679 aio_req_t *reqp; 1680 long index; 1681 1682 aiop = curproc->p_aio; 1683 if (aiop == NULL) 1684 return (NULL); 1685 1686 if (resultp) { 1687 index = AIO_HASH(resultp); 1688 bucket = &aiop->aio_hash[index]; 1689 for (reqp = *bucket; reqp != NULL; 1690 reqp = reqp->aio_hash_next) { 1691 if (reqp->aio_req_resultp == resultp) { 1692 head = reqp->aio_req_lio; 1693 return (head); 1694 } 1695 } 1696 } 1697 return (NULL); 1698 } 1699 1700 1701 static void 1702 lio_set_uerror(void *resultp, int error) 1703 { 1704 /* 1705 * the resultp field is a pointer to where the 1706 * error should be written out to the user's 1707 * aiocb. 1708 * 1709 */ 1710 if (get_udatamodel() == DATAMODEL_NATIVE) { 1711 (void) sulword(&((aio_result_t *)resultp)->aio_return, 1712 (ssize_t)-1); 1713 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1714 } 1715 #ifdef _SYSCALL32_IMPL 1716 else { 1717 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1718 (uint_t)-1); 1719 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1720 } 1721 #endif /* _SYSCALL32_IMPL */ 1722 } 1723 1724 /* 1725 * do cleanup completion for all requests in list. memory for 1726 * each request is also freed. 1727 */ 1728 static void 1729 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 1730 { 1731 int i; 1732 aio_req_t *reqp; 1733 aio_result_t *resultp; 1734 aiocb64_32_t *aiocb_64; 1735 1736 for (i = 0; i < nent; i++) { 1737 if (get_udatamodel() == DATAMODEL_NATIVE) { 1738 if (cbp[i] == NULL) 1739 continue; 1740 if (run_mode == AIO_LARGEFILE) { 1741 aiocb_64 = (aiocb64_32_t *)cbp[i]; 1742 resultp = (aio_result_t *) 1743 &aiocb_64->aio_resultp; 1744 } else 1745 resultp = &cbp[i]->aio_resultp; 1746 } 1747 #ifdef _SYSCALL32_IMPL 1748 else { 1749 aiocb32_t *aiocb_32; 1750 caddr32_t *cbp32; 1751 1752 cbp32 = (caddr32_t *)cbp; 1753 if (cbp32[i] == NULL) 1754 continue; 1755 if (run_mode == AIO_32) { 1756 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 1757 resultp = (aio_result_t *)&aiocb_32-> 1758 aio_resultp; 1759 } else if (run_mode == AIO_LARGEFILE) { 1760 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 1761 resultp = (aio_result_t *)&aiocb_64-> 1762 aio_resultp; 1763 } 1764 } 1765 #endif /* _SYSCALL32_IMPL */ 1766 /* 1767 * we need to get the aio_cleanupq_mutex since we call 1768 * aio_req_done(). 1769 */ 1770 mutex_enter(&aiop->aio_cleanupq_mutex); 1771 mutex_enter(&aiop->aio_mutex); 1772 reqp = aio_req_done(resultp); 1773 mutex_exit(&aiop->aio_mutex); 1774 mutex_exit(&aiop->aio_cleanupq_mutex); 1775 if (reqp != NULL) { 1776 aphysio_unlock(reqp); 1777 aio_copyout_result(reqp); 1778 mutex_enter(&aiop->aio_mutex); 1779 aio_req_free(aiop, reqp); 1780 mutex_exit(&aiop->aio_mutex); 1781 } 1782 } 1783 } 1784 1785 /* 1786 * Write out the results for an aio request that is done. 1787 */ 1788 static int 1789 aioerror(void *cb, int run_mode) 1790 { 1791 aio_result_t *resultp; 1792 aio_t *aiop; 1793 aio_req_t *reqp; 1794 int retval; 1795 1796 aiop = curproc->p_aio; 1797 if (aiop == NULL || cb == NULL) 1798 return (EINVAL); 1799 1800 if (get_udatamodel() == DATAMODEL_NATIVE) { 1801 if (run_mode == AIO_LARGEFILE) 1802 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1803 aio_resultp; 1804 else 1805 resultp = &((aiocb_t *)cb)->aio_resultp; 1806 } 1807 #ifdef _SYSCALL32_IMPL 1808 else { 1809 if (run_mode == AIO_LARGEFILE) 1810 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1811 aio_resultp; 1812 else if (run_mode == AIO_32) 1813 resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 1814 aio_resultp; 1815 } 1816 #endif /* _SYSCALL32_IMPL */ 1817 /* 1818 * we need to get the aio_cleanupq_mutex since we call 1819 * aio_req_find(). 1820 */ 1821 mutex_enter(&aiop->aio_cleanupq_mutex); 1822 mutex_enter(&aiop->aio_mutex); 1823 retval = aio_req_find(resultp, &reqp); 1824 mutex_exit(&aiop->aio_mutex); 1825 mutex_exit(&aiop->aio_cleanupq_mutex); 1826 if (retval == 0) { 1827 aphysio_unlock(reqp); 1828 aio_copyout_result(reqp); 1829 mutex_enter(&aiop->aio_mutex); 1830 aio_req_free(aiop, reqp); 1831 mutex_exit(&aiop->aio_mutex); 1832 return (0); 1833 } else if (retval == 1) 1834 return (EINPROGRESS); 1835 else if (retval == 2) 1836 return (EINVAL); 1837 return (0); 1838 } 1839 1840 /* 1841 * aio_cancel - if no requests outstanding, 1842 * return AIO_ALLDONE 1843 * else 1844 * return AIO_NOTCANCELED 1845 */ 1846 static int 1847 aio_cancel( 1848 int fildes, 1849 void *cb, 1850 long *rval, 1851 int run_mode) 1852 { 1853 aio_t *aiop; 1854 void *resultp; 1855 int index; 1856 aio_req_t **bucket; 1857 aio_req_t *ent; 1858 1859 1860 /* 1861 * Verify valid file descriptor 1862 */ 1863 if ((getf(fildes)) == NULL) { 1864 return (EBADF); 1865 } 1866 releasef(fildes); 1867 1868 aiop = curproc->p_aio; 1869 if (aiop == NULL) 1870 return (EINVAL); 1871 1872 if (aiop->aio_outstanding == 0) { 1873 *rval = AIO_ALLDONE; 1874 return (0); 1875 } 1876 1877 mutex_enter(&aiop->aio_mutex); 1878 if (cb != NULL) { 1879 if (get_udatamodel() == DATAMODEL_NATIVE) { 1880 if (run_mode == AIO_LARGEFILE) 1881 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1882 ->aio_resultp; 1883 else 1884 resultp = &((aiocb_t *)cb)->aio_resultp; 1885 } 1886 #ifdef _SYSCALL32_IMPL 1887 else { 1888 if (run_mode == AIO_LARGEFILE) 1889 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1890 ->aio_resultp; 1891 else if (run_mode == AIO_32) 1892 resultp = (aio_result_t *)&((aiocb32_t *)cb) 1893 ->aio_resultp; 1894 } 1895 #endif /* _SYSCALL32_IMPL */ 1896 index = AIO_HASH(resultp); 1897 bucket = &aiop->aio_hash[index]; 1898 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1899 if (ent->aio_req_resultp == resultp) { 1900 if ((ent->aio_req_flags & AIO_PENDING) == 0) { 1901 mutex_exit(&aiop->aio_mutex); 1902 *rval = AIO_ALLDONE; 1903 return (0); 1904 } 1905 mutex_exit(&aiop->aio_mutex); 1906 *rval = AIO_NOTCANCELED; 1907 return (0); 1908 } 1909 } 1910 mutex_exit(&aiop->aio_mutex); 1911 *rval = AIO_ALLDONE; 1912 return (0); 1913 } 1914 1915 for (index = 0; index < AIO_HASHSZ; index++) { 1916 bucket = &aiop->aio_hash[index]; 1917 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1918 if (ent->aio_req_fd == fildes) { 1919 if ((ent->aio_req_flags & AIO_PENDING) != 0) { 1920 mutex_exit(&aiop->aio_mutex); 1921 *rval = AIO_NOTCANCELED; 1922 return (0); 1923 } 1924 } 1925 } 1926 } 1927 mutex_exit(&aiop->aio_mutex); 1928 *rval = AIO_ALLDONE; 1929 return (0); 1930 } 1931 1932 /* 1933 * solaris version of asynchronous read and write 1934 */ 1935 static int 1936 arw( 1937 int opcode, 1938 int fdes, 1939 char *bufp, 1940 int bufsize, 1941 offset_t offset, 1942 aio_result_t *resultp, 1943 int mode) 1944 { 1945 file_t *fp; 1946 int error; 1947 struct vnode *vp; 1948 aio_req_t *reqp; 1949 aio_t *aiop; 1950 int (*aio_func)(); 1951 #ifdef _LP64 1952 aiocb_t aiocb; 1953 #else 1954 aiocb64_32_t aiocb64; 1955 #endif 1956 1957 aiop = curproc->p_aio; 1958 if (aiop == NULL) 1959 return (EINVAL); 1960 1961 if ((fp = getf(fdes)) == NULL) { 1962 return (EBADF); 1963 } 1964 1965 /* 1966 * check the permission of the partition 1967 */ 1968 if ((fp->f_flag & mode) == 0) { 1969 releasef(fdes); 1970 return (EBADF); 1971 } 1972 1973 vp = fp->f_vnode; 1974 aio_func = check_vp(vp, mode); 1975 if (aio_func == NULL) { 1976 releasef(fdes); 1977 return (EBADFD); 1978 } 1979 #ifdef _LP64 1980 aiocb.aio_fildes = fdes; 1981 aiocb.aio_buf = bufp; 1982 aiocb.aio_nbytes = bufsize; 1983 aiocb.aio_offset = offset; 1984 aiocb.aio_sigevent.sigev_notify = 0; 1985 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 1986 #else 1987 aiocb64.aio_fildes = fdes; 1988 aiocb64.aio_buf = (caddr32_t)bufp; 1989 aiocb64.aio_nbytes = bufsize; 1990 aiocb64.aio_offset = offset; 1991 aiocb64.aio_sigevent.sigev_notify = 0; 1992 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 1993 #endif 1994 if (error) { 1995 releasef(fdes); 1996 return (error); 1997 } 1998 1999 /* 2000 * enable polling on this request if the opcode has 2001 * the AIO poll bit set 2002 */ 2003 if (opcode & AIO_POLL_BIT) 2004 reqp->aio_req_flags |= AIO_POLL; 2005 2006 if (bufsize == 0) { 2007 clear_active_fd(fdes); 2008 aio_zerolen(reqp); 2009 return (0); 2010 } 2011 /* 2012 * send the request to driver. 2013 */ 2014 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2015 /* 2016 * the fd is stored in the aio_req_t by aio_req_setup(), and 2017 * is released by the aio_cleanup_thread() when the IO has 2018 * completed. 2019 */ 2020 if (error) { 2021 releasef(fdes); 2022 mutex_enter(&aiop->aio_mutex); 2023 aio_req_free(aiop, reqp); 2024 aiop->aio_pending--; 2025 if (aiop->aio_flags & AIO_REQ_BLOCK) 2026 cv_signal(&aiop->aio_cleanupcv); 2027 mutex_exit(&aiop->aio_mutex); 2028 return (error); 2029 } 2030 clear_active_fd(fdes); 2031 return (0); 2032 } 2033 2034 /* 2035 * posix version of asynchronous read and write 2036 */ 2037 static int 2038 aiorw( 2039 int opcode, 2040 void *aiocb_arg, 2041 int mode, 2042 int run_mode) 2043 { 2044 #ifdef _SYSCALL32_IMPL 2045 aiocb32_t aiocb32; 2046 struct sigevent32 *sigev32; 2047 port_notify32_t pntfy32; 2048 #endif 2049 aiocb64_32_t aiocb64; 2050 aiocb_t aiocb; 2051 file_t *fp; 2052 int error, fd; 2053 size_t bufsize; 2054 struct vnode *vp; 2055 aio_req_t *reqp; 2056 aio_t *aiop; 2057 int (*aio_func)(); 2058 aio_result_t *resultp; 2059 struct sigevent *sigev; 2060 model_t model; 2061 int aio_use_port = 0; 2062 port_notify_t pntfy; 2063 2064 model = get_udatamodel(); 2065 aiop = curproc->p_aio; 2066 if (aiop == NULL) 2067 return (EINVAL); 2068 2069 if (model == DATAMODEL_NATIVE) { 2070 if (run_mode != AIO_LARGEFILE) { 2071 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 2072 return (EFAULT); 2073 bufsize = aiocb.aio_nbytes; 2074 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 2075 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 2076 return (EBADF); 2077 } 2078 sigev = &aiocb.aio_sigevent; 2079 } else { 2080 /* 2081 * We come here only when we make largefile 2082 * call on 32 bit kernel using 32 bit library. 2083 */ 2084 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2085 return (EFAULT); 2086 bufsize = aiocb64.aio_nbytes; 2087 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2088 ->aio_resultp); 2089 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2090 return (EBADF); 2091 sigev = (struct sigevent *)&aiocb64.aio_sigevent; 2092 } 2093 2094 if (sigev->sigev_notify == SIGEV_PORT) { 2095 if (copyin((void *)sigev->sigev_value.sival_ptr, 2096 &pntfy, sizeof (port_notify_t))) { 2097 releasef(fd); 2098 return (EFAULT); 2099 } 2100 aio_use_port = 1; 2101 } else if (sigev->sigev_notify == SIGEV_THREAD) { 2102 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 2103 pntfy.portnfy_user = 2104 aiocb.aio_sigevent.sigev_value.sival_ptr; 2105 aio_use_port = 1; 2106 } 2107 } 2108 #ifdef _SYSCALL32_IMPL 2109 else { 2110 if (run_mode == AIO_32) { 2111 /* 32 bit system call is being made on 64 bit kernel */ 2112 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 2113 return (EFAULT); 2114 2115 bufsize = aiocb32.aio_nbytes; 2116 aiocb_32ton(&aiocb32, &aiocb); 2117 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 2118 aio_resultp); 2119 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 2120 return (EBADF); 2121 } 2122 sigev32 = &aiocb32.aio_sigevent; 2123 } else if (run_mode == AIO_LARGEFILE) { 2124 /* 2125 * We come here only when we make largefile 2126 * call on 64 bit kernel using 32 bit library. 2127 */ 2128 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2129 return (EFAULT); 2130 bufsize = aiocb64.aio_nbytes; 2131 aiocb_LFton(&aiocb64, &aiocb); 2132 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2133 ->aio_resultp); 2134 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2135 return (EBADF); 2136 sigev32 = &aiocb64.aio_sigevent; 2137 } 2138 2139 if (sigev32->sigev_notify == SIGEV_PORT) { 2140 if (copyin( 2141 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 2142 &pntfy32, sizeof (port_notify32_t))) { 2143 releasef(fd); 2144 return (EFAULT); 2145 } 2146 pntfy.portnfy_port = pntfy32.portnfy_port; 2147 pntfy.portnfy_user = (void *)(uintptr_t) 2148 pntfy32.portnfy_user; 2149 aio_use_port = 1; 2150 } else if (sigev32->sigev_notify == SIGEV_THREAD) { 2151 pntfy.portnfy_port = sigev32->sigev_signo; 2152 pntfy.portnfy_user = (void *)(uintptr_t) 2153 sigev32->sigev_value.sival_ptr; 2154 aio_use_port = 1; 2155 } 2156 } 2157 #endif /* _SYSCALL32_IMPL */ 2158 2159 /* 2160 * check the permission of the partition 2161 */ 2162 2163 if ((fp->f_flag & mode) == 0) { 2164 releasef(fd); 2165 return (EBADF); 2166 } 2167 2168 vp = fp->f_vnode; 2169 aio_func = check_vp(vp, mode); 2170 if (aio_func == NULL) { 2171 releasef(fd); 2172 return (EBADFD); 2173 } 2174 if (run_mode == AIO_LARGEFILE) 2175 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 2176 else 2177 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 2178 2179 if (error) { 2180 releasef(fd); 2181 return (error); 2182 } 2183 /* 2184 * enable polling on this request if the opcode has 2185 * the AIO poll bit set 2186 */ 2187 if (opcode & AIO_POLL_BIT) 2188 reqp->aio_req_flags |= AIO_POLL; 2189 2190 if (model == DATAMODEL_NATIVE) 2191 reqp->aio_req_iocb.iocb = aiocb_arg; 2192 #ifdef _SYSCALL32_IMPL 2193 else 2194 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 2195 #endif 2196 2197 if (aio_use_port) { 2198 int event = (run_mode == AIO_LARGEFILE)? 2199 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 2200 ((mode == FREAD)? AIOAREAD : AIOAWRITE); 2201 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 2202 } 2203 2204 /* 2205 * send the request to driver. 2206 */ 2207 if (error == 0) { 2208 if (bufsize == 0) { 2209 clear_active_fd(fd); 2210 aio_zerolen(reqp); 2211 return (0); 2212 } 2213 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2214 } 2215 2216 /* 2217 * the fd is stored in the aio_req_t by aio_req_setup(), and 2218 * is released by the aio_cleanup_thread() when the IO has 2219 * completed. 2220 */ 2221 if (error) { 2222 releasef(fd); 2223 mutex_enter(&aiop->aio_mutex); 2224 if (aio_use_port) 2225 aio_deq(&aiop->aio_portpending, reqp); 2226 aio_req_free(aiop, reqp); 2227 aiop->aio_pending--; 2228 if (aiop->aio_flags & AIO_REQ_BLOCK) 2229 cv_signal(&aiop->aio_cleanupcv); 2230 mutex_exit(&aiop->aio_mutex); 2231 return (error); 2232 } 2233 clear_active_fd(fd); 2234 return (0); 2235 } 2236 2237 2238 /* 2239 * set error for a list IO entry that failed. 2240 */ 2241 static void 2242 lio_set_error(aio_req_t *reqp, int portused) 2243 { 2244 aio_t *aiop = curproc->p_aio; 2245 2246 if (aiop == NULL) 2247 return; 2248 2249 mutex_enter(&aiop->aio_mutex); 2250 if (portused) 2251 aio_deq(&aiop->aio_portpending, reqp); 2252 aiop->aio_pending--; 2253 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 2254 reqp->aio_req_flags |= AIO_PHYSIODONE; 2255 /* 2256 * Need to free the request now as its never 2257 * going to get on the done queue 2258 * 2259 * Note: aio_outstanding is decremented in 2260 * aio_req_free() 2261 */ 2262 aio_req_free(aiop, reqp); 2263 if (aiop->aio_flags & AIO_REQ_BLOCK) 2264 cv_signal(&aiop->aio_cleanupcv); 2265 mutex_exit(&aiop->aio_mutex); 2266 } 2267 2268 /* 2269 * check if a specified request is done, and remove it from 2270 * the done queue. otherwise remove anybody from the done queue 2271 * if NULL is specified. 2272 */ 2273 static aio_req_t * 2274 aio_req_done(void *resultp) 2275 { 2276 aio_req_t **bucket; 2277 aio_req_t *ent; 2278 aio_t *aiop = curproc->p_aio; 2279 long index; 2280 2281 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2282 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2283 2284 if (resultp) { 2285 index = AIO_HASH(resultp); 2286 bucket = &aiop->aio_hash[index]; 2287 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2288 if (ent->aio_req_resultp == (aio_result_t *)resultp) { 2289 if (ent->aio_req_flags & AIO_DONEQ) { 2290 return (aio_req_remove(ent)); 2291 } 2292 return (NULL); 2293 } 2294 } 2295 /* no match, resultp is invalid */ 2296 return (NULL); 2297 } 2298 return (aio_req_remove(NULL)); 2299 } 2300 2301 /* 2302 * determine if a user-level resultp pointer is associated with an 2303 * active IO request. Zero is returned when the request is done, 2304 * and the request is removed from the done queue. Only when the 2305 * return value is zero, is the "reqp" pointer valid. One is returned 2306 * when the request is inprogress. Two is returned when the request 2307 * is invalid. 2308 */ 2309 static int 2310 aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 2311 { 2312 aio_req_t **bucket; 2313 aio_req_t *ent; 2314 aio_t *aiop = curproc->p_aio; 2315 long index; 2316 2317 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2318 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2319 2320 index = AIO_HASH(resultp); 2321 bucket = &aiop->aio_hash[index]; 2322 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2323 if (ent->aio_req_resultp == resultp) { 2324 if (ent->aio_req_flags & AIO_DONEQ) { 2325 *reqp = aio_req_remove(ent); 2326 return (0); 2327 } 2328 return (1); 2329 } 2330 } 2331 /* no match, resultp is invalid */ 2332 return (2); 2333 } 2334 2335 /* 2336 * remove a request from the done queue. 2337 */ 2338 static aio_req_t * 2339 aio_req_remove(aio_req_t *reqp) 2340 { 2341 aio_t *aiop = curproc->p_aio; 2342 2343 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2344 2345 if (reqp != NULL) { 2346 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2347 if (reqp->aio_req_next == reqp) { 2348 /* only one request on queue */ 2349 if (reqp == aiop->aio_doneq) { 2350 aiop->aio_doneq = NULL; 2351 } else { 2352 ASSERT(reqp == aiop->aio_cleanupq); 2353 aiop->aio_cleanupq = NULL; 2354 } 2355 } else { 2356 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2357 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2358 /* 2359 * The request can be either on the aio_doneq or the 2360 * aio_cleanupq 2361 */ 2362 if (reqp == aiop->aio_doneq) 2363 aiop->aio_doneq = reqp->aio_req_next; 2364 2365 if (reqp == aiop->aio_cleanupq) 2366 aiop->aio_cleanupq = reqp->aio_req_next; 2367 } 2368 reqp->aio_req_flags &= ~AIO_DONEQ; 2369 reqp->aio_req_next = NULL; 2370 reqp->aio_req_prev = NULL; 2371 } else if ((reqp = aiop->aio_doneq) != NULL) { 2372 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2373 if (reqp == reqp->aio_req_next) { 2374 /* only one request on queue */ 2375 aiop->aio_doneq = NULL; 2376 } else { 2377 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2378 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2379 aiop->aio_doneq = reqp->aio_req_next; 2380 } 2381 reqp->aio_req_flags &= ~AIO_DONEQ; 2382 reqp->aio_req_next = NULL; 2383 reqp->aio_req_prev = NULL; 2384 } 2385 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 2386 cv_broadcast(&aiop->aio_waitcv); 2387 return (reqp); 2388 } 2389 2390 static int 2391 aio_req_setup( 2392 aio_req_t **reqpp, 2393 aio_t *aiop, 2394 aiocb_t *arg, 2395 aio_result_t *resultp, 2396 vnode_t *vp) 2397 { 2398 sigqueue_t *sqp = NULL; 2399 aio_req_t *reqp; 2400 struct uio *uio; 2401 struct sigevent *sigev; 2402 int error; 2403 2404 sigev = &arg->aio_sigevent; 2405 if (sigev->sigev_notify == SIGEV_SIGNAL && 2406 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 2407 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2408 if (sqp == NULL) 2409 return (EAGAIN); 2410 sqp->sq_func = NULL; 2411 sqp->sq_next = NULL; 2412 sqp->sq_info.si_code = SI_ASYNCIO; 2413 sqp->sq_info.si_pid = curproc->p_pid; 2414 sqp->sq_info.si_ctid = PRCTID(curproc); 2415 sqp->sq_info.si_zoneid = getzoneid(); 2416 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2417 sqp->sq_info.si_signo = sigev->sigev_signo; 2418 sqp->sq_info.si_value = sigev->sigev_value; 2419 } 2420 2421 mutex_enter(&aiop->aio_mutex); 2422 2423 if (aiop->aio_flags & AIO_REQ_BLOCK) { 2424 mutex_exit(&aiop->aio_mutex); 2425 if (sqp) 2426 kmem_free(sqp, sizeof (sigqueue_t)); 2427 return (EIO); 2428 } 2429 /* 2430 * get an aio_reqp from the free list or allocate one 2431 * from dynamic memory. 2432 */ 2433 if (error = aio_req_alloc(&reqp, resultp)) { 2434 mutex_exit(&aiop->aio_mutex); 2435 if (sqp) 2436 kmem_free(sqp, sizeof (sigqueue_t)); 2437 return (error); 2438 } 2439 aiop->aio_pending++; 2440 aiop->aio_outstanding++; 2441 reqp->aio_req_flags = AIO_PENDING; 2442 if (sigev->sigev_notify == SIGEV_THREAD || 2443 sigev->sigev_notify == SIGEV_PORT) 2444 aio_enq(&aiop->aio_portpending, reqp, 0); 2445 mutex_exit(&aiop->aio_mutex); 2446 /* 2447 * initialize aio request. 2448 */ 2449 reqp->aio_req_fd = arg->aio_fildes; 2450 reqp->aio_req_sigqp = sqp; 2451 reqp->aio_req_iocb.iocb = NULL; 2452 reqp->aio_req_lio = NULL; 2453 reqp->aio_req_buf.b_file = vp; 2454 uio = reqp->aio_req.aio_uio; 2455 uio->uio_iovcnt = 1; 2456 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 2457 uio->uio_iov->iov_len = arg->aio_nbytes; 2458 uio->uio_loffset = arg->aio_offset; 2459 *reqpp = reqp; 2460 return (0); 2461 } 2462 2463 /* 2464 * Allocate p_aio struct. 2465 */ 2466 static aio_t * 2467 aio_aiop_alloc(void) 2468 { 2469 aio_t *aiop; 2470 2471 ASSERT(MUTEX_HELD(&curproc->p_lock)); 2472 2473 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 2474 if (aiop) { 2475 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 2476 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 2477 NULL); 2478 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 2479 } 2480 return (aiop); 2481 } 2482 2483 /* 2484 * Allocate an aio_req struct. 2485 */ 2486 static int 2487 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 2488 { 2489 aio_req_t *reqp; 2490 aio_t *aiop = curproc->p_aio; 2491 2492 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2493 2494 if ((reqp = aiop->aio_free) != NULL) { 2495 aiop->aio_free = reqp->aio_req_next; 2496 bzero(reqp, sizeof (*reqp)); 2497 } else { 2498 /* 2499 * Check whether memory is getting tight. 2500 * This is a temporary mechanism to avoid memory 2501 * exhaustion by a single process until we come up 2502 * with a per process solution such as setrlimit(). 2503 */ 2504 if (freemem < desfree) 2505 return (EAGAIN); 2506 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 2507 if (reqp == NULL) 2508 return (EAGAIN); 2509 } 2510 reqp->aio_req.aio_uio = &reqp->aio_req_uio; 2511 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 2512 reqp->aio_req.aio_private = reqp; 2513 reqp->aio_req_buf.b_offset = -1; 2514 reqp->aio_req_resultp = resultp; 2515 if (aio_hash_insert(reqp, aiop)) { 2516 reqp->aio_req_next = aiop->aio_free; 2517 aiop->aio_free = reqp; 2518 return (EBUSY); 2519 } 2520 *nreqp = reqp; 2521 return (0); 2522 } 2523 2524 /* 2525 * Allocate an aio_lio_t struct. 2526 */ 2527 static int 2528 aio_lio_alloc(aio_lio_t **head) 2529 { 2530 aio_lio_t *liop; 2531 aio_t *aiop = curproc->p_aio; 2532 2533 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2534 2535 if ((liop = aiop->aio_lio_free) != NULL) { 2536 aiop->aio_lio_free = liop->lio_next; 2537 } else { 2538 /* 2539 * Check whether memory is getting tight. 2540 * This is a temporary mechanism to avoid memory 2541 * exhaustion by a single process until we come up 2542 * with a per process solution such as setrlimit(). 2543 */ 2544 if (freemem < desfree) 2545 return (EAGAIN); 2546 2547 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 2548 if (liop == NULL) 2549 return (EAGAIN); 2550 } 2551 *head = liop; 2552 return (0); 2553 } 2554 2555 /* 2556 * this is a special per-process thread that is only activated if 2557 * the process is unmapping a segment with outstanding aio. normally, 2558 * the process will have completed the aio before unmapping the 2559 * segment. If the process does unmap a segment with outstanding aio, 2560 * this special thread will guarentee that the locked pages due to 2561 * aphysio() are released, thereby permitting the segment to be 2562 * unmapped. In addition to this, the cleanup thread is woken up 2563 * during DR operations to release the locked pages. 2564 */ 2565 2566 static int 2567 aio_cleanup_thread(aio_t *aiop) 2568 { 2569 proc_t *p = curproc; 2570 struct as *as = p->p_as; 2571 int poked = 0; 2572 kcondvar_t *cvp; 2573 int exit_flag = 0; 2574 int rqclnup = 0; 2575 2576 sigfillset(&curthread->t_hold); 2577 sigdiffset(&curthread->t_hold, &cantmask); 2578 for (;;) { 2579 /* 2580 * if a segment is being unmapped, and the current 2581 * process's done queue is not empty, then every request 2582 * on the doneq with locked resources should be forced 2583 * to release their locks. By moving the doneq request 2584 * to the cleanupq, aio_cleanup() will process the cleanupq, 2585 * and place requests back onto the doneq. All requests 2586 * processed by aio_cleanup() will have their physical 2587 * resources unlocked. 2588 */ 2589 mutex_enter(&aiop->aio_mutex); 2590 if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 2591 aiop->aio_flags |= AIO_CLEANUP; 2592 mutex_enter(&as->a_contents); 2593 if (aiop->aio_rqclnup) { 2594 aiop->aio_rqclnup = 0; 2595 rqclnup = 1; 2596 } 2597 mutex_exit(&as->a_contents); 2598 if (aiop->aio_doneq) { 2599 aio_req_t *doneqhead = aiop->aio_doneq; 2600 aiop->aio_doneq = NULL; 2601 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 2602 } 2603 } 2604 mutex_exit(&aiop->aio_mutex); 2605 aio_cleanup(AIO_CLEANUP_THREAD); 2606 /* 2607 * thread should block on the cleanupcv while 2608 * AIO_CLEANUP is set. 2609 */ 2610 cvp = &aiop->aio_cleanupcv; 2611 mutex_enter(&aiop->aio_mutex); 2612 2613 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 2614 aiop->aio_notifyq != NULL || 2615 aiop->aio_portcleanupq != NULL) { 2616 mutex_exit(&aiop->aio_mutex); 2617 continue; 2618 } 2619 mutex_enter(&as->a_contents); 2620 2621 /* 2622 * AIO_CLEANUP determines when the cleanup thread 2623 * should be active. This flag is set when 2624 * the cleanup thread is awakened by as_unmap() or 2625 * due to DR operations. 2626 * The flag is cleared when the blocking as_unmap() 2627 * that originally awakened us is allowed to 2628 * complete. as_unmap() blocks when trying to 2629 * unmap a segment that has SOFTLOCKed pages. when 2630 * the segment's pages are all SOFTUNLOCKed, 2631 * as->a_flags & AS_UNMAPWAIT should be zero. 2632 * 2633 * In case of cleanup request by DR, the flag is cleared 2634 * once all the pending aio requests have been processed. 2635 * 2636 * The flag shouldn't be cleared right away if the 2637 * cleanup thread was interrupted because the process 2638 * is doing forkall(). This happens when cv_wait_sig() 2639 * returns zero, because it was awakened by a pokelwps(). 2640 * If the process is not exiting, it must be doing forkall(). 2641 */ 2642 if ((poked == 0) && 2643 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 2644 (aiop->aio_pending == 0))) { 2645 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 2646 cvp = &as->a_cv; 2647 rqclnup = 0; 2648 } 2649 mutex_exit(&aiop->aio_mutex); 2650 if (poked) { 2651 /* 2652 * If the process is exiting/killed, don't return 2653 * immediately without waiting for pending I/O's 2654 * and releasing the page locks. 2655 */ 2656 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2657 /* 2658 * If exit_flag is set, then it is 2659 * safe to exit because we have released 2660 * page locks of completed I/O's. 2661 */ 2662 if (exit_flag) 2663 break; 2664 2665 mutex_exit(&as->a_contents); 2666 2667 /* 2668 * Wait for all the pending aio to complete. 2669 */ 2670 mutex_enter(&aiop->aio_mutex); 2671 aiop->aio_flags |= AIO_REQ_BLOCK; 2672 while (aiop->aio_pending != 0) 2673 cv_wait(&aiop->aio_cleanupcv, 2674 &aiop->aio_mutex); 2675 mutex_exit(&aiop->aio_mutex); 2676 exit_flag = 1; 2677 continue; 2678 } else if (p->p_flag & 2679 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 2680 /* 2681 * hold LWP until it 2682 * is continued. 2683 */ 2684 mutex_exit(&as->a_contents); 2685 mutex_enter(&p->p_lock); 2686 stop(PR_SUSPENDED, SUSPEND_NORMAL); 2687 mutex_exit(&p->p_lock); 2688 poked = 0; 2689 continue; 2690 } 2691 } else { 2692 /* 2693 * When started this thread will sleep on as->a_cv. 2694 * as_unmap will awake this thread if the 2695 * segment has SOFTLOCKed pages (poked = 0). 2696 * 1. pokelwps() awakes this thread => 2697 * break the loop to check SEXITLWPS, SHOLDFORK, etc 2698 * 2. as_unmap awakes this thread => 2699 * to break the loop it is necessary that 2700 * - AS_UNMAPWAIT is set (as_unmap is waiting for 2701 * memory to be unlocked) 2702 * - AIO_CLEANUP is not set 2703 * (if AIO_CLEANUP is set we have to wait for 2704 * pending requests. aio_done will send a signal 2705 * for every request which completes to continue 2706 * unmapping the corresponding address range) 2707 * 3. A cleanup request will wake this thread up, ex. 2708 * by the DR operations. The aio_rqclnup flag will 2709 * be set. 2710 */ 2711 while (poked == 0) { 2712 /* 2713 * The clean up requests that came in 2714 * after we had just cleaned up, couldn't 2715 * be causing the unmap thread to block - as 2716 * unmap event happened first. 2717 * Let aio_done() wake us up if it sees a need. 2718 */ 2719 if (aiop->aio_rqclnup && 2720 (aiop->aio_flags & AIO_CLEANUP) == 0) 2721 break; 2722 poked = !cv_wait_sig(cvp, &as->a_contents); 2723 if (AS_ISUNMAPWAIT(as) == 0) 2724 cv_signal(cvp); 2725 if (aiop->aio_outstanding != 0) 2726 break; 2727 } 2728 } 2729 mutex_exit(&as->a_contents); 2730 } 2731 exit: 2732 mutex_exit(&as->a_contents); 2733 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 2734 aston(curthread); /* make thread do post_syscall */ 2735 return (0); 2736 } 2737 2738 /* 2739 * save a reference to a user's outstanding aio in a hash list. 2740 */ 2741 static int 2742 aio_hash_insert( 2743 aio_req_t *aio_reqp, 2744 aio_t *aiop) 2745 { 2746 long index; 2747 aio_result_t *resultp = aio_reqp->aio_req_resultp; 2748 aio_req_t *current; 2749 aio_req_t **nextp; 2750 2751 index = AIO_HASH(resultp); 2752 nextp = &aiop->aio_hash[index]; 2753 while ((current = *nextp) != NULL) { 2754 if (current->aio_req_resultp == resultp) 2755 return (DUPLICATE); 2756 nextp = ¤t->aio_hash_next; 2757 } 2758 *nextp = aio_reqp; 2759 aio_reqp->aio_hash_next = NULL; 2760 return (0); 2761 } 2762 2763 static int 2764 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 2765 cred_t *) 2766 { 2767 struct snode *sp; 2768 dev_t dev; 2769 struct cb_ops *cb; 2770 major_t major; 2771 int (*aio_func)(); 2772 2773 dev = vp->v_rdev; 2774 major = getmajor(dev); 2775 2776 /* 2777 * return NULL for requests to files and STREAMs so 2778 * that libaio takes care of them. 2779 */ 2780 if (vp->v_type == VCHR) { 2781 /* no stream device for kaio */ 2782 if (STREAMSTAB(major)) { 2783 return (NULL); 2784 } 2785 } else { 2786 return (NULL); 2787 } 2788 2789 /* 2790 * Check old drivers which do not have async I/O entry points. 2791 */ 2792 if (devopsp[major]->devo_rev < 3) 2793 return (NULL); 2794 2795 cb = devopsp[major]->devo_cb_ops; 2796 2797 if (cb->cb_rev < 1) 2798 return (NULL); 2799 2800 /* 2801 * Check whether this device is a block device. 2802 * Kaio is not supported for devices like tty. 2803 */ 2804 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 2805 return (NULL); 2806 2807 /* 2808 * Clustering: If vnode is a PXFS vnode, then the device may be remote. 2809 * We cannot call the driver directly. Instead return the 2810 * PXFS functions. 2811 */ 2812 2813 if (IS_PXFSVP(vp)) { 2814 if (mode & FREAD) 2815 return (clpxfs_aio_read); 2816 else 2817 return (clpxfs_aio_write); 2818 } 2819 if (mode & FREAD) 2820 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 2821 else 2822 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 2823 2824 /* 2825 * Do we need this ? 2826 * nodev returns ENXIO anyway. 2827 */ 2828 if (aio_func == nodev) 2829 return (NULL); 2830 2831 sp = VTOS(vp); 2832 smark(sp, SACC); 2833 return (aio_func); 2834 } 2835 2836 /* 2837 * Clustering: We want check_vp to return a function prototyped 2838 * correctly that will be common to both PXFS and regular case. 2839 * We define this intermediate function that will do the right 2840 * thing for driver cases. 2841 */ 2842 2843 static int 2844 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2845 { 2846 dev_t dev; 2847 struct cb_ops *cb; 2848 2849 ASSERT(vp->v_type == VCHR); 2850 ASSERT(!IS_PXFSVP(vp)); 2851 dev = VTOS(vp)->s_dev; 2852 ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 2853 2854 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2855 2856 ASSERT(cb->cb_awrite != nodev); 2857 return ((*cb->cb_awrite)(dev, aio, cred_p)); 2858 } 2859 2860 /* 2861 * Clustering: We want check_vp to return a function prototyped 2862 * correctly that will be common to both PXFS and regular case. 2863 * We define this intermediate function that will do the right 2864 * thing for driver cases. 2865 */ 2866 2867 static int 2868 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2869 { 2870 dev_t dev; 2871 struct cb_ops *cb; 2872 2873 ASSERT(vp->v_type == VCHR); 2874 ASSERT(!IS_PXFSVP(vp)); 2875 dev = VTOS(vp)->s_dev; 2876 ASSERT(!STREAMSTAB(getmajor(dev))); 2877 2878 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2879 2880 ASSERT(cb->cb_aread != nodev); 2881 return ((*cb->cb_aread)(dev, aio, cred_p)); 2882 } 2883 2884 /* 2885 * This routine is called when a largefile call is made by a 32bit 2886 * process on a ILP32 or LP64 kernel. All 64bit processes are large 2887 * file by definition and will call alio() instead. 2888 */ 2889 static int 2890 alioLF( 2891 int mode_arg, 2892 void *aiocb_arg, 2893 int nent, 2894 void *sigev) 2895 { 2896 file_t *fp; 2897 file_t *prev_fp = NULL; 2898 int prev_mode = -1; 2899 struct vnode *vp; 2900 aio_lio_t *head; 2901 aio_req_t *reqp; 2902 aio_t *aiop; 2903 caddr_t cbplist; 2904 aiocb64_32_t cb64; 2905 aiocb64_32_t *aiocb = &cb64; 2906 aiocb64_32_t *cbp; 2907 caddr32_t *ucbp; 2908 #ifdef _LP64 2909 aiocb_t aiocb_n; 2910 #endif 2911 struct sigevent32 sigevk; 2912 sigqueue_t *sqp; 2913 int (*aio_func)(); 2914 int mode; 2915 int error = 0; 2916 int aio_errors = 0; 2917 int i; 2918 size_t ssize; 2919 int deadhead = 0; 2920 int aio_notsupported = 0; 2921 int lio_head_port; 2922 int aio_port; 2923 int aio_thread; 2924 port_kevent_t *pkevtp = NULL; 2925 int portused = 0; 2926 port_notify32_t pnotify; 2927 int event; 2928 2929 aiop = curproc->p_aio; 2930 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 2931 return (EINVAL); 2932 2933 ASSERT(get_udatamodel() == DATAMODEL_ILP32); 2934 2935 ssize = (sizeof (caddr32_t) * nent); 2936 cbplist = kmem_alloc(ssize, KM_SLEEP); 2937 ucbp = (caddr32_t *)cbplist; 2938 2939 if (copyin(aiocb_arg, cbplist, ssize) || 2940 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 2941 kmem_free(cbplist, ssize); 2942 return (EFAULT); 2943 } 2944 2945 /* Event Ports */ 2946 if (sigev && 2947 (sigevk.sigev_notify == SIGEV_THREAD || 2948 sigevk.sigev_notify == SIGEV_PORT)) { 2949 if (sigevk.sigev_notify == SIGEV_THREAD) { 2950 pnotify.portnfy_port = sigevk.sigev_signo; 2951 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 2952 } else if (copyin( 2953 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 2954 &pnotify, sizeof (pnotify))) { 2955 kmem_free(cbplist, ssize); 2956 return (EFAULT); 2957 } 2958 error = port_alloc_event(pnotify.portnfy_port, 2959 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 2960 if (error) { 2961 if (error == ENOMEM || error == EAGAIN) 2962 error = EAGAIN; 2963 else 2964 error = EINVAL; 2965 kmem_free(cbplist, ssize); 2966 return (error); 2967 } 2968 lio_head_port = pnotify.portnfy_port; 2969 portused = 1; 2970 } 2971 2972 /* 2973 * a list head should be allocated if notification is 2974 * enabled for this list. 2975 */ 2976 head = NULL; 2977 2978 if (mode_arg == LIO_WAIT || sigev) { 2979 mutex_enter(&aiop->aio_mutex); 2980 error = aio_lio_alloc(&head); 2981 mutex_exit(&aiop->aio_mutex); 2982 if (error) 2983 goto done; 2984 deadhead = 1; 2985 head->lio_nent = nent; 2986 head->lio_refcnt = nent; 2987 head->lio_port = -1; 2988 head->lio_portkev = NULL; 2989 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 2990 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 2991 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2992 if (sqp == NULL) { 2993 error = EAGAIN; 2994 goto done; 2995 } 2996 sqp->sq_func = NULL; 2997 sqp->sq_next = NULL; 2998 sqp->sq_info.si_code = SI_ASYNCIO; 2999 sqp->sq_info.si_pid = curproc->p_pid; 3000 sqp->sq_info.si_ctid = PRCTID(curproc); 3001 sqp->sq_info.si_zoneid = getzoneid(); 3002 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3003 sqp->sq_info.si_signo = sigevk.sigev_signo; 3004 sqp->sq_info.si_value.sival_int = 3005 sigevk.sigev_value.sival_int; 3006 head->lio_sigqp = sqp; 3007 } else { 3008 head->lio_sigqp = NULL; 3009 } 3010 if (pkevtp) { 3011 /* 3012 * Prepare data to send when list of aiocb's 3013 * has completed. 3014 */ 3015 port_init_event(pkevtp, (uintptr_t)sigev, 3016 (void *)(uintptr_t)pnotify.portnfy_user, 3017 NULL, head); 3018 pkevtp->portkev_events = AIOLIO64; 3019 head->lio_portkev = pkevtp; 3020 head->lio_port = pnotify.portnfy_port; 3021 } 3022 } 3023 3024 for (i = 0; i < nent; i++, ucbp++) { 3025 3026 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 3027 /* skip entry if it can't be copied. */ 3028 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 3029 if (head) { 3030 mutex_enter(&aiop->aio_mutex); 3031 head->lio_nent--; 3032 head->lio_refcnt--; 3033 mutex_exit(&aiop->aio_mutex); 3034 } 3035 continue; 3036 } 3037 3038 /* skip if opcode for aiocb is LIO_NOP */ 3039 mode = aiocb->aio_lio_opcode; 3040 if (mode == LIO_NOP) { 3041 cbp = NULL; 3042 if (head) { 3043 mutex_enter(&aiop->aio_mutex); 3044 head->lio_nent--; 3045 head->lio_refcnt--; 3046 mutex_exit(&aiop->aio_mutex); 3047 } 3048 continue; 3049 } 3050 3051 /* increment file descriptor's ref count. */ 3052 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3053 lio_set_uerror(&cbp->aio_resultp, EBADF); 3054 if (head) { 3055 mutex_enter(&aiop->aio_mutex); 3056 head->lio_nent--; 3057 head->lio_refcnt--; 3058 mutex_exit(&aiop->aio_mutex); 3059 } 3060 aio_errors++; 3061 continue; 3062 } 3063 3064 /* 3065 * check the permission of the partition 3066 */ 3067 if ((fp->f_flag & mode) == 0) { 3068 releasef(aiocb->aio_fildes); 3069 lio_set_uerror(&cbp->aio_resultp, EBADF); 3070 if (head) { 3071 mutex_enter(&aiop->aio_mutex); 3072 head->lio_nent--; 3073 head->lio_refcnt--; 3074 mutex_exit(&aiop->aio_mutex); 3075 } 3076 aio_errors++; 3077 continue; 3078 } 3079 3080 /* 3081 * common case where requests are to the same fd 3082 * for the same r/w operation 3083 * for UFS, need to set EBADFD 3084 */ 3085 vp = fp->f_vnode; 3086 if (fp != prev_fp || mode != prev_mode) { 3087 aio_func = check_vp(vp, mode); 3088 if (aio_func == NULL) { 3089 prev_fp = NULL; 3090 releasef(aiocb->aio_fildes); 3091 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3092 aio_notsupported++; 3093 if (head) { 3094 mutex_enter(&aiop->aio_mutex); 3095 head->lio_nent--; 3096 head->lio_refcnt--; 3097 mutex_exit(&aiop->aio_mutex); 3098 } 3099 continue; 3100 } else { 3101 prev_fp = fp; 3102 prev_mode = mode; 3103 } 3104 } 3105 3106 #ifdef _LP64 3107 aiocb_LFton(aiocb, &aiocb_n); 3108 error = aio_req_setup(&reqp, aiop, &aiocb_n, 3109 (aio_result_t *)&cbp->aio_resultp, vp); 3110 #else 3111 error = aio_req_setupLF(&reqp, aiop, aiocb, 3112 (aio_result_t *)&cbp->aio_resultp, vp); 3113 #endif /* _LP64 */ 3114 if (error) { 3115 releasef(aiocb->aio_fildes); 3116 lio_set_uerror(&cbp->aio_resultp, error); 3117 if (head) { 3118 mutex_enter(&aiop->aio_mutex); 3119 head->lio_nent--; 3120 head->lio_refcnt--; 3121 mutex_exit(&aiop->aio_mutex); 3122 } 3123 aio_errors++; 3124 continue; 3125 } 3126 3127 reqp->aio_req_lio = head; 3128 deadhead = 0; 3129 3130 /* 3131 * Set the errno field now before sending the request to 3132 * the driver to avoid a race condition 3133 */ 3134 (void) suword32(&cbp->aio_resultp.aio_errno, 3135 EINPROGRESS); 3136 3137 reqp->aio_req_iocb.iocb32 = *ucbp; 3138 3139 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 3140 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3141 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3142 if (aio_port | aio_thread) { 3143 port_kevent_t *lpkevp; 3144 /* 3145 * Prepare data to send with each aiocb completed. 3146 */ 3147 if (aio_port) { 3148 void *paddr = (void *)(uintptr_t) 3149 aiocb->aio_sigevent.sigev_value.sival_ptr; 3150 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3151 error = EFAULT; 3152 } else { /* aio_thread */ 3153 pnotify.portnfy_port = 3154 aiocb->aio_sigevent.sigev_signo; 3155 pnotify.portnfy_user = 3156 aiocb->aio_sigevent.sigev_value.sival_ptr; 3157 } 3158 if (error) 3159 /* EMPTY */; 3160 else if (pkevtp != NULL && 3161 pnotify.portnfy_port == lio_head_port) 3162 error = port_dup_event(pkevtp, &lpkevp, 3163 PORT_ALLOC_DEFAULT); 3164 else 3165 error = port_alloc_event(pnotify.portnfy_port, 3166 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3167 &lpkevp); 3168 if (error == 0) { 3169 port_init_event(lpkevp, (uintptr_t)*ucbp, 3170 (void *)(uintptr_t)pnotify.portnfy_user, 3171 aio_port_callback, reqp); 3172 lpkevp->portkev_events = event; 3173 reqp->aio_req_portkev = lpkevp; 3174 reqp->aio_req_port = pnotify.portnfy_port; 3175 } 3176 } 3177 3178 /* 3179 * send the request to driver. 3180 */ 3181 if (error == 0) { 3182 if (aiocb->aio_nbytes == 0) { 3183 clear_active_fd(aiocb->aio_fildes); 3184 aio_zerolen(reqp); 3185 continue; 3186 } 3187 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3188 CRED()); 3189 } 3190 3191 /* 3192 * the fd's ref count is not decremented until the IO has 3193 * completed unless there was an error. 3194 */ 3195 if (error) { 3196 releasef(aiocb->aio_fildes); 3197 lio_set_uerror(&cbp->aio_resultp, error); 3198 if (head) { 3199 mutex_enter(&aiop->aio_mutex); 3200 head->lio_nent--; 3201 head->lio_refcnt--; 3202 mutex_exit(&aiop->aio_mutex); 3203 } 3204 if (error == ENOTSUP) 3205 aio_notsupported++; 3206 else 3207 aio_errors++; 3208 lio_set_error(reqp, portused); 3209 } else { 3210 clear_active_fd(aiocb->aio_fildes); 3211 } 3212 } 3213 3214 if (aio_notsupported) { 3215 error = ENOTSUP; 3216 } else if (aio_errors) { 3217 /* 3218 * return EIO if any request failed 3219 */ 3220 error = EIO; 3221 } 3222 3223 if (mode_arg == LIO_WAIT) { 3224 mutex_enter(&aiop->aio_mutex); 3225 while (head->lio_refcnt > 0) { 3226 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3227 mutex_exit(&aiop->aio_mutex); 3228 error = EINTR; 3229 goto done; 3230 } 3231 } 3232 mutex_exit(&aiop->aio_mutex); 3233 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 3234 } 3235 3236 done: 3237 kmem_free(cbplist, ssize); 3238 if (deadhead) { 3239 if (head->lio_sigqp) 3240 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3241 if (head->lio_portkev) 3242 port_free_event(head->lio_portkev); 3243 kmem_free(head, sizeof (aio_lio_t)); 3244 } 3245 return (error); 3246 } 3247 3248 #ifdef _SYSCALL32_IMPL 3249 static void 3250 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 3251 { 3252 dest->aio_fildes = src->aio_fildes; 3253 dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 3254 dest->aio_nbytes = (size_t)src->aio_nbytes; 3255 dest->aio_offset = (off_t)src->aio_offset; 3256 dest->aio_reqprio = src->aio_reqprio; 3257 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3258 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3259 3260 /* 3261 * See comment in sigqueue32() on handling of 32-bit 3262 * sigvals in a 64-bit kernel. 3263 */ 3264 dest->aio_sigevent.sigev_value.sival_int = 3265 (int)src->aio_sigevent.sigev_value.sival_int; 3266 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3267 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3268 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3269 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3270 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3271 dest->aio_lio_opcode = src->aio_lio_opcode; 3272 dest->aio_state = src->aio_state; 3273 dest->aio__pad[0] = src->aio__pad[0]; 3274 } 3275 #endif 3276 3277 /* 3278 * This function is used only for largefile calls made by 3279 * 32 bit applications. 3280 */ 3281 static int 3282 aio_req_setupLF( 3283 aio_req_t **reqpp, 3284 aio_t *aiop, 3285 aiocb64_32_t *arg, 3286 aio_result_t *resultp, 3287 vnode_t *vp) 3288 { 3289 sigqueue_t *sqp = NULL; 3290 aio_req_t *reqp; 3291 struct uio *uio; 3292 struct sigevent32 *sigev; 3293 int error; 3294 3295 sigev = &arg->aio_sigevent; 3296 if (sigev->sigev_notify == SIGEV_SIGNAL && 3297 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 3298 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3299 if (sqp == NULL) 3300 return (EAGAIN); 3301 sqp->sq_func = NULL; 3302 sqp->sq_next = NULL; 3303 sqp->sq_info.si_code = SI_ASYNCIO; 3304 sqp->sq_info.si_pid = curproc->p_pid; 3305 sqp->sq_info.si_ctid = PRCTID(curproc); 3306 sqp->sq_info.si_zoneid = getzoneid(); 3307 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3308 sqp->sq_info.si_signo = sigev->sigev_signo; 3309 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 3310 } 3311 3312 mutex_enter(&aiop->aio_mutex); 3313 3314 if (aiop->aio_flags & AIO_REQ_BLOCK) { 3315 mutex_exit(&aiop->aio_mutex); 3316 if (sqp) 3317 kmem_free(sqp, sizeof (sigqueue_t)); 3318 return (EIO); 3319 } 3320 /* 3321 * get an aio_reqp from the free list or allocate one 3322 * from dynamic memory. 3323 */ 3324 if (error = aio_req_alloc(&reqp, resultp)) { 3325 mutex_exit(&aiop->aio_mutex); 3326 if (sqp) 3327 kmem_free(sqp, sizeof (sigqueue_t)); 3328 return (error); 3329 } 3330 aiop->aio_pending++; 3331 aiop->aio_outstanding++; 3332 reqp->aio_req_flags = AIO_PENDING; 3333 if (sigev->sigev_notify == SIGEV_THREAD || 3334 sigev->sigev_notify == SIGEV_PORT) 3335 aio_enq(&aiop->aio_portpending, reqp, 0); 3336 mutex_exit(&aiop->aio_mutex); 3337 /* 3338 * initialize aio request. 3339 */ 3340 reqp->aio_req_fd = arg->aio_fildes; 3341 reqp->aio_req_sigqp = sqp; 3342 reqp->aio_req_iocb.iocb = NULL; 3343 reqp->aio_req_lio = NULL; 3344 reqp->aio_req_buf.b_file = vp; 3345 uio = reqp->aio_req.aio_uio; 3346 uio->uio_iovcnt = 1; 3347 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 3348 uio->uio_iov->iov_len = arg->aio_nbytes; 3349 uio->uio_loffset = arg->aio_offset; 3350 *reqpp = reqp; 3351 return (0); 3352 } 3353 3354 /* 3355 * This routine is called when a non largefile call is made by a 32bit 3356 * process on a ILP32 or LP64 kernel. 3357 */ 3358 static int 3359 alio32( 3360 int mode_arg, 3361 void *aiocb_arg, 3362 int nent, 3363 void *sigev) 3364 { 3365 file_t *fp; 3366 file_t *prev_fp = NULL; 3367 int prev_mode = -1; 3368 struct vnode *vp; 3369 aio_lio_t *head; 3370 aio_req_t *reqp; 3371 aio_t *aiop; 3372 caddr_t cbplist; 3373 aiocb_t cb; 3374 aiocb_t *aiocb = &cb; 3375 #ifdef _LP64 3376 aiocb32_t *cbp; 3377 caddr32_t *ucbp; 3378 aiocb32_t cb32; 3379 aiocb32_t *aiocb32 = &cb32; 3380 struct sigevent32 sigevk; 3381 #else 3382 aiocb_t *cbp, **ucbp; 3383 struct sigevent sigevk; 3384 #endif 3385 sigqueue_t *sqp; 3386 int (*aio_func)(); 3387 int mode; 3388 int error = 0; 3389 int aio_errors = 0; 3390 int i; 3391 size_t ssize; 3392 int deadhead = 0; 3393 int aio_notsupported = 0; 3394 int lio_head_port; 3395 int aio_port; 3396 int aio_thread; 3397 port_kevent_t *pkevtp = NULL; 3398 int portused = 0; 3399 #ifdef _LP64 3400 port_notify32_t pnotify; 3401 #else 3402 port_notify_t pnotify; 3403 #endif 3404 int event; 3405 3406 aiop = curproc->p_aio; 3407 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 3408 return (EINVAL); 3409 3410 #ifdef _LP64 3411 ssize = (sizeof (caddr32_t) * nent); 3412 #else 3413 ssize = (sizeof (aiocb_t *) * nent); 3414 #endif 3415 cbplist = kmem_alloc(ssize, KM_SLEEP); 3416 ucbp = (void *)cbplist; 3417 3418 if (copyin(aiocb_arg, cbplist, ssize) || 3419 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 3420 kmem_free(cbplist, ssize); 3421 return (EFAULT); 3422 } 3423 3424 /* Event Ports */ 3425 if (sigev && 3426 (sigevk.sigev_notify == SIGEV_THREAD || 3427 sigevk.sigev_notify == SIGEV_PORT)) { 3428 if (sigevk.sigev_notify == SIGEV_THREAD) { 3429 pnotify.portnfy_port = sigevk.sigev_signo; 3430 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 3431 } else if (copyin( 3432 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 3433 &pnotify, sizeof (pnotify))) { 3434 kmem_free(cbplist, ssize); 3435 return (EFAULT); 3436 } 3437 error = port_alloc_event(pnotify.portnfy_port, 3438 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 3439 if (error) { 3440 if (error == ENOMEM || error == EAGAIN) 3441 error = EAGAIN; 3442 else 3443 error = EINVAL; 3444 kmem_free(cbplist, ssize); 3445 return (error); 3446 } 3447 lio_head_port = pnotify.portnfy_port; 3448 portused = 1; 3449 } 3450 3451 /* 3452 * a list head should be allocated if notification is 3453 * enabled for this list. 3454 */ 3455 head = NULL; 3456 3457 if (mode_arg == LIO_WAIT || sigev) { 3458 mutex_enter(&aiop->aio_mutex); 3459 error = aio_lio_alloc(&head); 3460 mutex_exit(&aiop->aio_mutex); 3461 if (error) 3462 goto done; 3463 deadhead = 1; 3464 head->lio_nent = nent; 3465 head->lio_refcnt = nent; 3466 head->lio_port = -1; 3467 head->lio_portkev = NULL; 3468 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 3469 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3470 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3471 if (sqp == NULL) { 3472 error = EAGAIN; 3473 goto done; 3474 } 3475 sqp->sq_func = NULL; 3476 sqp->sq_next = NULL; 3477 sqp->sq_info.si_code = SI_ASYNCIO; 3478 sqp->sq_info.si_pid = curproc->p_pid; 3479 sqp->sq_info.si_ctid = PRCTID(curproc); 3480 sqp->sq_info.si_zoneid = getzoneid(); 3481 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3482 sqp->sq_info.si_signo = sigevk.sigev_signo; 3483 sqp->sq_info.si_value.sival_int = 3484 sigevk.sigev_value.sival_int; 3485 head->lio_sigqp = sqp; 3486 } else { 3487 head->lio_sigqp = NULL; 3488 } 3489 if (pkevtp) { 3490 /* 3491 * Prepare data to send when list of aiocb's has 3492 * completed. 3493 */ 3494 port_init_event(pkevtp, (uintptr_t)sigev, 3495 (void *)(uintptr_t)pnotify.portnfy_user, 3496 NULL, head); 3497 pkevtp->portkev_events = AIOLIO; 3498 head->lio_portkev = pkevtp; 3499 head->lio_port = pnotify.portnfy_port; 3500 } 3501 } 3502 3503 for (i = 0; i < nent; i++, ucbp++) { 3504 3505 /* skip entry if it can't be copied. */ 3506 #ifdef _LP64 3507 cbp = (aiocb32_t *)(uintptr_t)*ucbp; 3508 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 3509 #else 3510 cbp = (aiocb_t *)*ucbp; 3511 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 3512 #endif 3513 { 3514 if (head) { 3515 mutex_enter(&aiop->aio_mutex); 3516 head->lio_nent--; 3517 head->lio_refcnt--; 3518 mutex_exit(&aiop->aio_mutex); 3519 } 3520 continue; 3521 } 3522 #ifdef _LP64 3523 /* 3524 * copy 32 bit structure into 64 bit structure 3525 */ 3526 aiocb_32ton(aiocb32, aiocb); 3527 #endif /* _LP64 */ 3528 3529 /* skip if opcode for aiocb is LIO_NOP */ 3530 mode = aiocb->aio_lio_opcode; 3531 if (mode == LIO_NOP) { 3532 cbp = NULL; 3533 if (head) { 3534 mutex_enter(&aiop->aio_mutex); 3535 head->lio_nent--; 3536 head->lio_refcnt--; 3537 mutex_exit(&aiop->aio_mutex); 3538 } 3539 continue; 3540 } 3541 3542 /* increment file descriptor's ref count. */ 3543 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3544 lio_set_uerror(&cbp->aio_resultp, EBADF); 3545 if (head) { 3546 mutex_enter(&aiop->aio_mutex); 3547 head->lio_nent--; 3548 head->lio_refcnt--; 3549 mutex_exit(&aiop->aio_mutex); 3550 } 3551 aio_errors++; 3552 continue; 3553 } 3554 3555 /* 3556 * check the permission of the partition 3557 */ 3558 if ((fp->f_flag & mode) == 0) { 3559 releasef(aiocb->aio_fildes); 3560 lio_set_uerror(&cbp->aio_resultp, EBADF); 3561 if (head) { 3562 mutex_enter(&aiop->aio_mutex); 3563 head->lio_nent--; 3564 head->lio_refcnt--; 3565 mutex_exit(&aiop->aio_mutex); 3566 } 3567 aio_errors++; 3568 continue; 3569 } 3570 3571 /* 3572 * common case where requests are to the same fd 3573 * for the same r/w operation 3574 * for UFS, need to set EBADFD 3575 */ 3576 vp = fp->f_vnode; 3577 if (fp != prev_fp || mode != prev_mode) { 3578 aio_func = check_vp(vp, mode); 3579 if (aio_func == NULL) { 3580 prev_fp = NULL; 3581 releasef(aiocb->aio_fildes); 3582 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3583 aio_notsupported++; 3584 if (head) { 3585 mutex_enter(&aiop->aio_mutex); 3586 head->lio_nent--; 3587 head->lio_refcnt--; 3588 mutex_exit(&aiop->aio_mutex); 3589 } 3590 continue; 3591 } else { 3592 prev_fp = fp; 3593 prev_mode = mode; 3594 } 3595 } 3596 3597 error = aio_req_setup(&reqp, aiop, aiocb, 3598 (aio_result_t *)&cbp->aio_resultp, vp); 3599 if (error) { 3600 releasef(aiocb->aio_fildes); 3601 lio_set_uerror(&cbp->aio_resultp, error); 3602 if (head) { 3603 mutex_enter(&aiop->aio_mutex); 3604 head->lio_nent--; 3605 head->lio_refcnt--; 3606 mutex_exit(&aiop->aio_mutex); 3607 } 3608 aio_errors++; 3609 continue; 3610 } 3611 3612 reqp->aio_req_lio = head; 3613 deadhead = 0; 3614 3615 /* 3616 * Set the errno field now before sending the request to 3617 * the driver to avoid a race condition 3618 */ 3619 (void) suword32(&cbp->aio_resultp.aio_errno, 3620 EINPROGRESS); 3621 3622 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 3623 3624 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 3625 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3626 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3627 if (aio_port | aio_thread) { 3628 port_kevent_t *lpkevp; 3629 /* 3630 * Prepare data to send with each aiocb completed. 3631 */ 3632 #ifdef _LP64 3633 if (aio_port) { 3634 void *paddr = (void *)(uintptr_t) 3635 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3636 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3637 error = EFAULT; 3638 } else { /* aio_thread */ 3639 pnotify.portnfy_port = 3640 aiocb32->aio_sigevent.sigev_signo; 3641 pnotify.portnfy_user = 3642 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3643 } 3644 #else 3645 if (aio_port) { 3646 void *paddr = 3647 aiocb->aio_sigevent.sigev_value.sival_ptr; 3648 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3649 error = EFAULT; 3650 } else { /* aio_thread */ 3651 pnotify.portnfy_port = 3652 aiocb->aio_sigevent.sigev_signo; 3653 pnotify.portnfy_user = 3654 aiocb->aio_sigevent.sigev_value.sival_ptr; 3655 } 3656 #endif 3657 if (error) 3658 /* EMPTY */; 3659 else if (pkevtp != NULL && 3660 pnotify.portnfy_port == lio_head_port) 3661 error = port_dup_event(pkevtp, &lpkevp, 3662 PORT_ALLOC_DEFAULT); 3663 else 3664 error = port_alloc_event(pnotify.portnfy_port, 3665 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3666 &lpkevp); 3667 if (error == 0) { 3668 port_init_event(lpkevp, (uintptr_t)cbp, 3669 (void *)(uintptr_t)pnotify.portnfy_user, 3670 aio_port_callback, reqp); 3671 lpkevp->portkev_events = event; 3672 reqp->aio_req_portkev = lpkevp; 3673 reqp->aio_req_port = pnotify.portnfy_port; 3674 } 3675 } 3676 3677 /* 3678 * send the request to driver. 3679 */ 3680 if (error == 0) { 3681 if (aiocb->aio_nbytes == 0) { 3682 clear_active_fd(aiocb->aio_fildes); 3683 aio_zerolen(reqp); 3684 continue; 3685 } 3686 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3687 CRED()); 3688 } 3689 3690 /* 3691 * the fd's ref count is not decremented until the IO has 3692 * completed unless there was an error. 3693 */ 3694 if (error) { 3695 releasef(aiocb->aio_fildes); 3696 lio_set_uerror(&cbp->aio_resultp, error); 3697 if (head) { 3698 mutex_enter(&aiop->aio_mutex); 3699 head->lio_nent--; 3700 head->lio_refcnt--; 3701 mutex_exit(&aiop->aio_mutex); 3702 } 3703 if (error == ENOTSUP) 3704 aio_notsupported++; 3705 else 3706 aio_errors++; 3707 lio_set_error(reqp, portused); 3708 } else { 3709 clear_active_fd(aiocb->aio_fildes); 3710 } 3711 } 3712 3713 if (aio_notsupported) { 3714 error = ENOTSUP; 3715 } else if (aio_errors) { 3716 /* 3717 * return EIO if any request failed 3718 */ 3719 error = EIO; 3720 } 3721 3722 if (mode_arg == LIO_WAIT) { 3723 mutex_enter(&aiop->aio_mutex); 3724 while (head->lio_refcnt > 0) { 3725 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3726 mutex_exit(&aiop->aio_mutex); 3727 error = EINTR; 3728 goto done; 3729 } 3730 } 3731 mutex_exit(&aiop->aio_mutex); 3732 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 3733 } 3734 3735 done: 3736 kmem_free(cbplist, ssize); 3737 if (deadhead) { 3738 if (head->lio_sigqp) 3739 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3740 if (head->lio_portkev) 3741 port_free_event(head->lio_portkev); 3742 kmem_free(head, sizeof (aio_lio_t)); 3743 } 3744 return (error); 3745 } 3746 3747 3748 #ifdef _SYSCALL32_IMPL 3749 void 3750 aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 3751 { 3752 dest->aio_fildes = src->aio_fildes; 3753 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 3754 dest->aio_nbytes = (size_t)src->aio_nbytes; 3755 dest->aio_offset = (off_t)src->aio_offset; 3756 dest->aio_reqprio = src->aio_reqprio; 3757 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3758 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3759 3760 /* 3761 * See comment in sigqueue32() on handling of 32-bit 3762 * sigvals in a 64-bit kernel. 3763 */ 3764 dest->aio_sigevent.sigev_value.sival_int = 3765 (int)src->aio_sigevent.sigev_value.sival_int; 3766 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3767 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3768 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3769 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3770 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3771 dest->aio_lio_opcode = src->aio_lio_opcode; 3772 dest->aio_state = src->aio_state; 3773 dest->aio__pad[0] = src->aio__pad[0]; 3774 } 3775 #endif /* _SYSCALL32_IMPL */ 3776 3777 /* 3778 * aio_port_callback() is called just before the event is retrieved from the 3779 * port. The task of this callback function is to finish the work of the 3780 * transaction for the application, it means : 3781 * - copyout transaction data to the application 3782 * (this thread is running in the right process context) 3783 * - keep trace of the transaction (update of counters). 3784 * - free allocated buffers 3785 * The aiocb pointer is the object element of the port_kevent_t structure. 3786 * 3787 * flag : 3788 * PORT_CALLBACK_DEFAULT : do copyout and free resources 3789 * PORT_CALLBACK_CLOSE : don't do copyout, free resources 3790 */ 3791 3792 /*ARGSUSED*/ 3793 int 3794 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 3795 { 3796 aio_t *aiop = curproc->p_aio; 3797 aio_req_t *reqp = arg; 3798 struct iovec *iov; 3799 struct buf *bp; 3800 void *resultp; 3801 3802 if (pid != curproc->p_pid) { 3803 /* wrong proc !!, can not deliver data here ... */ 3804 return (EACCES); 3805 } 3806 3807 mutex_enter(&aiop->aio_portq_mutex); 3808 reqp->aio_req_portkev = NULL; 3809 aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 3810 mutex_exit(&aiop->aio_portq_mutex); 3811 aphysio_unlock(reqp); /* unlock used pages */ 3812 mutex_enter(&aiop->aio_mutex); 3813 if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 3814 aio_req_free_port(aiop, reqp); /* back to free list */ 3815 mutex_exit(&aiop->aio_mutex); 3816 return (0); 3817 } 3818 3819 iov = reqp->aio_req_uio.uio_iov; 3820 bp = &reqp->aio_req_buf; 3821 resultp = (void *)reqp->aio_req_resultp; 3822 aio_req_free_port(aiop, reqp); /* request struct back to free list */ 3823 mutex_exit(&aiop->aio_mutex); 3824 if (flag == PORT_CALLBACK_DEFAULT) 3825 aio_copyout_result_port(iov, bp, resultp); 3826 return (0); 3827 } 3828