1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Kernel asynchronous I/O. 31 * This is only for raw devices now (as of Nov. 1993). 32 */ 33 34 #include <sys/types.h> 35 #include <sys/errno.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/fs/snode.h> 39 #include <sys/unistd.h> 40 #include <sys/cmn_err.h> 41 #include <vm/as.h> 42 #include <vm/faultcode.h> 43 #include <sys/sysmacros.h> 44 #include <sys/procfs.h> 45 #include <sys/kmem.h> 46 #include <sys/autoconf.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/sunddi.h> 49 #include <sys/aio_impl.h> 50 #include <sys/debug.h> 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/vmsystm.h> 54 #include <sys/fs/pxfs_ki.h> 55 #include <sys/contract/process_impl.h> 56 57 /* 58 * external entry point. 59 */ 60 #ifdef _LP64 61 static int64_t kaioc(long, long, long, long, long, long); 62 #endif 63 static int kaio(ulong_t *, rval_t *); 64 65 66 #define AIO_64 0 67 #define AIO_32 1 68 #define AIO_LARGEFILE 2 69 70 /* 71 * implementation specific functions (private) 72 */ 73 #ifdef _LP64 74 static int alio(int, aiocb_t **, int, struct sigevent *); 75 #endif 76 static int aionotify(void); 77 static int aioinit(void); 78 static int aiostart(void); 79 static void alio_cleanup(aio_t *, aiocb_t **, int, int); 80 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 81 cred_t *); 82 static void lio_set_error(aio_req_t *); 83 static aio_t *aio_aiop_alloc(); 84 static int aio_req_alloc(aio_req_t **, aio_result_t *); 85 static int aio_lio_alloc(aio_lio_t **); 86 static aio_req_t *aio_req_done(void *); 87 static aio_req_t *aio_req_remove(aio_req_t *); 88 static int aio_req_find(aio_result_t *, aio_req_t **); 89 static int aio_hash_insert(struct aio_req_t *, aio_t *); 90 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 91 aio_result_t *, vnode_t *); 92 static int aio_cleanup_thread(aio_t *); 93 static aio_lio_t *aio_list_get(aio_result_t *); 94 static void lio_set_uerror(void *, int); 95 extern void aio_zerolen(aio_req_t *); 96 static int aiowait(struct timeval *, int, long *); 97 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 98 static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 99 aio_req_t *reqlist, aio_t *aiop, model_t model); 100 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 101 static int aiosuspend(void *, int, struct timespec *, int, 102 long *, int); 103 static int aliowait(int, void *, int, void *, int); 104 static int aioerror(void *, int); 105 static int aio_cancel(int, void *, long *, int); 106 static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 107 static int aiorw(int, void *, int, int); 108 109 static int alioLF(int, void *, int, void *); 110 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 111 aio_result_t *, vnode_t *); 112 static int alio32(int, void *, int, void *); 113 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 114 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 115 116 #ifdef _SYSCALL32_IMPL 117 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 118 void aiocb_32ton(aiocb32_t *, aiocb_t *); 119 #endif /* _SYSCALL32_IMPL */ 120 121 /* 122 * implementation specific functions (external) 123 */ 124 void aio_req_free(aio_t *, aio_req_t *); 125 126 /* 127 * Event Port framework 128 */ 129 130 void aio_req_free_port(aio_t *, aio_req_t *); 131 static int aio_port_callback(void *, int *, pid_t, int, void *); 132 133 /* 134 * This is the loadable module wrapper. 135 */ 136 #include <sys/modctl.h> 137 #include <sys/syscall.h> 138 139 #ifdef _LP64 140 141 static struct sysent kaio_sysent = { 142 6, 143 SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 144 (int (*)())kaioc 145 }; 146 147 #ifdef _SYSCALL32_IMPL 148 static struct sysent kaio_sysent32 = { 149 7, 150 SE_NOUNLOAD | SE_64RVAL, 151 kaio 152 }; 153 #endif /* _SYSCALL32_IMPL */ 154 155 #else /* _LP64 */ 156 157 static struct sysent kaio_sysent = { 158 7, 159 SE_NOUNLOAD | SE_32RVAL1, 160 kaio 161 }; 162 163 #endif /* _LP64 */ 164 165 /* 166 * Module linkage information for the kernel. 167 */ 168 169 static struct modlsys modlsys = { 170 &mod_syscallops, 171 "kernel Async I/O", 172 &kaio_sysent 173 }; 174 175 #ifdef _SYSCALL32_IMPL 176 static struct modlsys modlsys32 = { 177 &mod_syscallops32, 178 "kernel Async I/O for 32 bit compatibility", 179 &kaio_sysent32 180 }; 181 #endif /* _SYSCALL32_IMPL */ 182 183 184 static struct modlinkage modlinkage = { 185 MODREV_1, 186 &modlsys, 187 #ifdef _SYSCALL32_IMPL 188 &modlsys32, 189 #endif 190 NULL 191 }; 192 193 int 194 _init(void) 195 { 196 int retval; 197 198 if ((retval = mod_install(&modlinkage)) != 0) 199 return (retval); 200 201 return (0); 202 } 203 204 int 205 _fini(void) 206 { 207 int retval; 208 209 retval = mod_remove(&modlinkage); 210 211 return (retval); 212 } 213 214 int 215 _info(struct modinfo *modinfop) 216 { 217 return (mod_info(&modlinkage, modinfop)); 218 } 219 220 #ifdef _LP64 221 static int64_t 222 kaioc( 223 long a0, 224 long a1, 225 long a2, 226 long a3, 227 long a4, 228 long a5) 229 { 230 int error; 231 long rval = 0; 232 233 switch ((int)a0 & ~AIO_POLL_BIT) { 234 case AIOREAD: 235 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 236 (offset_t)a4, (aio_result_t *)a5, FREAD); 237 break; 238 case AIOWRITE: 239 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 240 (offset_t)a4, (aio_result_t *)a5, FWRITE); 241 break; 242 case AIOWAIT: 243 error = aiowait((struct timeval *)a1, (int)a2, &rval); 244 break; 245 case AIOWAITN: 246 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 247 (timespec_t *)a4); 248 break; 249 case AIONOTIFY: 250 error = aionotify(); 251 break; 252 case AIOINIT: 253 error = aioinit(); 254 break; 255 case AIOSTART: 256 error = aiostart(); 257 break; 258 case AIOLIO: 259 error = alio((int)a1, (aiocb_t **)a2, (int)a3, 260 (struct sigevent *)a4); 261 break; 262 case AIOLIOWAIT: 263 error = aliowait((int)a1, (void *)a2, (int)a3, 264 (struct sigevent *)a4, AIO_64); 265 break; 266 case AIOSUSPEND: 267 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 268 (int)a4, &rval, AIO_64); 269 break; 270 case AIOERROR: 271 error = aioerror((void *)a1, AIO_64); 272 break; 273 case AIOAREAD: 274 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 275 break; 276 case AIOAWRITE: 277 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 278 break; 279 case AIOCANCEL: 280 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 281 break; 282 283 /* 284 * The large file related stuff is valid only for 285 * 32 bit kernel and not for 64 bit kernel 286 * On 64 bit kernel we convert large file calls 287 * to regular 64bit calls. 288 */ 289 290 default: 291 error = EINVAL; 292 } 293 if (error) 294 return ((int64_t)set_errno(error)); 295 return (rval); 296 } 297 #endif 298 299 static int 300 kaio( 301 ulong_t *uap, 302 rval_t *rvp) 303 { 304 long rval = 0; 305 int error = 0; 306 offset_t off; 307 308 309 rvp->r_vals = 0; 310 #if defined(_LITTLE_ENDIAN) 311 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 312 #else 313 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 314 #endif 315 316 switch (uap[0] & ~AIO_POLL_BIT) { 317 /* 318 * It must be the 32 bit system call on 64 bit kernel 319 */ 320 case AIOREAD: 321 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 322 (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 323 case AIOWRITE: 324 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 325 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 326 case AIOWAIT: 327 error = aiowait((struct timeval *)uap[1], (int)uap[2], 328 &rval); 329 break; 330 case AIOWAITN: 331 error = aiowaitn((void *)uap[1], (uint_t)uap[2], 332 (uint_t *)uap[3], (timespec_t *)uap[4]); 333 break; 334 case AIONOTIFY: 335 return (aionotify()); 336 case AIOINIT: 337 return (aioinit()); 338 case AIOSTART: 339 return (aiostart()); 340 case AIOLIO: 341 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 342 (void *)uap[4])); 343 case AIOLIOWAIT: 344 return (aliowait((int)uap[1], (void *)uap[2], 345 (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 346 case AIOSUSPEND: 347 error = aiosuspend((void *)uap[1], (int)uap[2], 348 (timespec_t *)uap[3], (int)uap[4], 349 &rval, AIO_32); 350 break; 351 case AIOERROR: 352 return (aioerror((void *)uap[1], AIO_32)); 353 case AIOAREAD: 354 return (aiorw((int)uap[0], (void *)uap[1], 355 FREAD, AIO_32)); 356 case AIOAWRITE: 357 return (aiorw((int)uap[0], (void *)uap[1], 358 FWRITE, AIO_32)); 359 case AIOCANCEL: 360 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 361 AIO_32)); 362 break; 363 case AIOLIO64: 364 return (alioLF((int)uap[1], (void *)uap[2], 365 (int)uap[3], (void *)uap[4])); 366 case AIOLIOWAIT64: 367 return (aliowait(uap[1], (void *)uap[2], 368 (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 369 case AIOSUSPEND64: 370 error = aiosuspend((void *)uap[1], (int)uap[2], 371 (timespec_t *)uap[3], (int)uap[4], &rval, 372 AIO_LARGEFILE); 373 break; 374 case AIOERROR64: 375 return (aioerror((void *)uap[1], AIO_LARGEFILE)); 376 case AIOAREAD64: 377 return (aiorw((int)uap[0], (void *)uap[1], FREAD, 378 AIO_LARGEFILE)); 379 case AIOAWRITE64: 380 return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 381 AIO_LARGEFILE)); 382 case AIOCANCEL64: 383 error = (aio_cancel((int)uap[1], (void *)uap[2], 384 &rval, AIO_LARGEFILE)); 385 break; 386 default: 387 return (EINVAL); 388 } 389 390 rvp->r_val1 = rval; 391 return (error); 392 } 393 394 /* 395 * wake up LWPs in this process that are sleeping in 396 * aiowait(). 397 */ 398 static int 399 aionotify(void) 400 { 401 aio_t *aiop; 402 403 aiop = curproc->p_aio; 404 if (aiop == NULL) 405 return (0); 406 407 mutex_enter(&aiop->aio_mutex); 408 aiop->aio_notifycnt++; 409 cv_broadcast(&aiop->aio_waitcv); 410 mutex_exit(&aiop->aio_mutex); 411 412 return (0); 413 } 414 415 static int 416 timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 417 timestruc_t **rqtp, int *blocking) 418 { 419 #ifdef _SYSCALL32_IMPL 420 struct timeval32 wait_time_32; 421 #endif 422 struct timeval wait_time; 423 model_t model = get_udatamodel(); 424 425 *rqtp = NULL; 426 if (timout == NULL) { /* wait indefinitely */ 427 *blocking = 1; 428 return (0); 429 } 430 431 /* 432 * Need to correctly compare with the -1 passed in for a user 433 * address pointer, with both 32 bit and 64 bit apps. 434 */ 435 if (model == DATAMODEL_NATIVE) { 436 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 437 *blocking = 0; 438 return (0); 439 } 440 441 if (copyin(timout, &wait_time, sizeof (wait_time))) 442 return (EFAULT); 443 } 444 #ifdef _SYSCALL32_IMPL 445 else { 446 /* 447 * -1 from a 32bit app. It will not get sign extended. 448 * don't wait if -1. 449 */ 450 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 451 *blocking = 0; 452 return (0); 453 } 454 455 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 456 return (EFAULT); 457 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 458 } 459 #endif /* _SYSCALL32_IMPL */ 460 461 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 462 *blocking = 0; 463 return (0); 464 } 465 466 if (wait_time.tv_sec < 0 || 467 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 468 return (EINVAL); 469 470 rqtime->tv_sec = wait_time.tv_sec; 471 rqtime->tv_nsec = wait_time.tv_usec * 1000; 472 *rqtp = rqtime; 473 *blocking = 1; 474 475 return (0); 476 } 477 478 static int 479 timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 480 timestruc_t **rqtp, int *blocking) 481 { 482 #ifdef _SYSCALL32_IMPL 483 timespec32_t wait_time_32; 484 #endif 485 model_t model = get_udatamodel(); 486 487 *rqtp = NULL; 488 if (timout == NULL) { 489 *blocking = 1; 490 return (0); 491 } 492 493 if (model == DATAMODEL_NATIVE) { 494 if (copyin(timout, rqtime, sizeof (*rqtime))) 495 return (EFAULT); 496 } 497 #ifdef _SYSCALL32_IMPL 498 else { 499 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 500 return (EFAULT); 501 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 502 } 503 #endif /* _SYSCALL32_IMPL */ 504 505 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 506 *blocking = 0; 507 return (0); 508 } 509 510 if (rqtime->tv_sec < 0 || 511 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 512 return (EINVAL); 513 514 *rqtp = rqtime; 515 *blocking = 1; 516 517 return (0); 518 } 519 520 /*ARGSUSED*/ 521 static int 522 aiowait( 523 struct timeval *timout, 524 int dontblockflg, 525 long *rval) 526 { 527 int error; 528 aio_t *aiop; 529 aio_req_t *reqp; 530 clock_t status; 531 int blocking; 532 int timecheck; 533 timestruc_t rqtime; 534 timestruc_t *rqtp; 535 536 aiop = curproc->p_aio; 537 if (aiop == NULL) 538 return (EINVAL); 539 540 /* 541 * Establish the absolute future time for the timeout. 542 */ 543 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 544 if (error) 545 return (error); 546 if (rqtp) { 547 timestruc_t now; 548 timecheck = timechanged; 549 gethrestime(&now); 550 timespecadd(rqtp, &now); 551 } 552 553 mutex_enter(&aiop->aio_mutex); 554 for (;;) { 555 /* process requests on poll queue */ 556 if (aiop->aio_pollq) { 557 mutex_exit(&aiop->aio_mutex); 558 aio_cleanup(0); 559 mutex_enter(&aiop->aio_mutex); 560 } 561 if ((reqp = aio_req_remove(NULL)) != NULL) { 562 *rval = (long)reqp->aio_req_resultp; 563 break; 564 } 565 /* user-level done queue might not be empty */ 566 if (aiop->aio_notifycnt > 0) { 567 aiop->aio_notifycnt--; 568 *rval = 1; 569 break; 570 } 571 /* don't block if no outstanding aio */ 572 if (aiop->aio_outstanding == 0 && dontblockflg) { 573 error = EINVAL; 574 break; 575 } 576 if (blocking) { 577 status = cv_waituntil_sig(&aiop->aio_waitcv, 578 &aiop->aio_mutex, rqtp, timecheck); 579 580 if (status > 0) /* check done queue again */ 581 continue; 582 if (status == 0) { /* interrupted by a signal */ 583 error = EINTR; 584 *rval = -1; 585 } else { /* timer expired */ 586 error = ETIME; 587 } 588 } 589 break; 590 } 591 mutex_exit(&aiop->aio_mutex); 592 if (reqp) { 593 aphysio_unlock(reqp); 594 aio_copyout_result(reqp); 595 mutex_enter(&aiop->aio_mutex); 596 aio_req_free(aiop, reqp); 597 mutex_exit(&aiop->aio_mutex); 598 } 599 return (error); 600 } 601 602 /* 603 * aiowaitn can be used to reap completed asynchronous requests submitted with 604 * lio_listio, aio_read or aio_write. 605 * This function only reaps asynchronous raw I/Os. 606 */ 607 608 /*ARGSUSED*/ 609 static int 610 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 611 { 612 int error = 0; 613 aio_t *aiop; 614 aio_req_t *reqlist = NULL; 615 caddr_t iocblist = NULL; /* array of iocb ptr's */ 616 uint_t waitcnt, cnt = 0; /* iocb cnt */ 617 size_t iocbsz; /* users iocb size */ 618 size_t riocbsz; /* returned iocb size */ 619 int iocb_index = 0; 620 model_t model = get_udatamodel(); 621 int blocking = 1; 622 int timecheck; 623 timestruc_t rqtime; 624 timestruc_t *rqtp; 625 626 aiop = curproc->p_aio; 627 628 if (aiop == NULL || aiop->aio_outstanding == 0) 629 return (EAGAIN); 630 631 if (copyin(nwait, &waitcnt, sizeof (uint_t))) 632 return (EFAULT); 633 634 /* set *nwait to zero, if we must return prematurely */ 635 if (copyout(&cnt, nwait, sizeof (uint_t))) 636 return (EFAULT); 637 638 if (waitcnt == 0) { 639 blocking = 0; 640 rqtp = NULL; 641 waitcnt = nent; 642 } else { 643 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 644 if (error) 645 return (error); 646 } 647 648 if (model == DATAMODEL_NATIVE) 649 iocbsz = (sizeof (aiocb_t *) * nent); 650 #ifdef _SYSCALL32_IMPL 651 else 652 iocbsz = (sizeof (caddr32_t) * nent); 653 #endif /* _SYSCALL32_IMPL */ 654 655 /* 656 * Only one aio_waitn call is allowed at a time. 657 * The active aio_waitn will collect all requests 658 * out of the "done" list and if necessary it will wait 659 * for some/all pending requests to fulfill the nwait 660 * parameter. 661 * A second or further aio_waitn calls will sleep here 662 * until the active aio_waitn finishes and leaves the kernel 663 * If the second call does not block (poll), then return 664 * immediately with the error code : EAGAIN. 665 * If the second call should block, then sleep here, but 666 * do not touch the timeout. The timeout starts when this 667 * aio_waitn-call becomes active. 668 */ 669 670 mutex_enter(&aiop->aio_mutex); 671 672 while (aiop->aio_flags & AIO_WAITN) { 673 if (blocking == 0) { 674 mutex_exit(&aiop->aio_mutex); 675 return (EAGAIN); 676 } 677 678 /* block, no timeout */ 679 aiop->aio_flags |= AIO_WAITN_PENDING; 680 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 681 mutex_exit(&aiop->aio_mutex); 682 return (EINTR); 683 } 684 } 685 686 /* 687 * Establish the absolute future time for the timeout. 688 */ 689 if (rqtp) { 690 timestruc_t now; 691 timecheck = timechanged; 692 gethrestime(&now); 693 timespecadd(rqtp, &now); 694 } 695 696 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 697 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 698 aiop->aio_iocb = NULL; 699 } 700 701 if (aiop->aio_iocb == NULL) { 702 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 703 if (iocblist == NULL) { 704 mutex_exit(&aiop->aio_mutex); 705 return (ENOMEM); 706 } 707 aiop->aio_iocb = (aiocb_t **)iocblist; 708 aiop->aio_iocbsz = iocbsz; 709 } else { 710 iocblist = (char *)aiop->aio_iocb; 711 } 712 713 aiop->aio_waitncnt = waitcnt; 714 aiop->aio_flags |= AIO_WAITN; 715 716 for (;;) { 717 /* push requests on poll queue to done queue */ 718 if (aiop->aio_pollq) { 719 mutex_exit(&aiop->aio_mutex); 720 aio_cleanup(0); 721 mutex_enter(&aiop->aio_mutex); 722 } 723 724 /* check for requests on done queue */ 725 if (aiop->aio_doneq) { 726 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 727 aiop->aio_waitncnt = waitcnt - cnt; 728 } 729 730 /* user-level done queue might not be empty */ 731 if (aiop->aio_notifycnt > 0) { 732 aiop->aio_notifycnt--; 733 error = 0; 734 break; 735 } 736 737 /* 738 * if we are here second time as a result of timer 739 * expiration, we reset error if there are enough 740 * aiocb's to satisfy request. 741 * We return also if all requests are already done 742 * and we picked up the whole done queue. 743 */ 744 745 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 746 aiop->aio_doneq == NULL)) { 747 error = 0; 748 break; 749 } 750 751 if ((cnt < waitcnt) && blocking) { 752 int rval = cv_waituntil_sig(&aiop->aio_waitcv, 753 &aiop->aio_mutex, rqtp, timecheck); 754 if (rval > 0) 755 continue; 756 if (rval < 0) { 757 error = ETIME; 758 blocking = 0; 759 continue; 760 } 761 error = EINTR; 762 } 763 break; 764 } 765 766 mutex_exit(&aiop->aio_mutex); 767 768 if (cnt > 0) { 769 770 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 771 aiop, model); 772 773 if (model == DATAMODEL_NATIVE) 774 riocbsz = (sizeof (aiocb_t *) * cnt); 775 #ifdef _SYSCALL32_IMPL 776 else 777 riocbsz = (sizeof (caddr32_t) * cnt); 778 #endif /* _SYSCALL32_IMPL */ 779 780 if (copyout(iocblist, uiocb, riocbsz) || 781 copyout(&cnt, nwait, sizeof (uint_t))) 782 error = EFAULT; 783 } 784 785 if (aiop->aio_iocbsz > AIO_IOCB_MAX) { 786 kmem_free(iocblist, aiop->aio_iocbsz); 787 aiop->aio_iocb = NULL; 788 } 789 790 /* check if there is another thread waiting for execution */ 791 mutex_enter(&aiop->aio_mutex); 792 aiop->aio_flags &= ~AIO_WAITN; 793 if (aiop->aio_flags & AIO_WAITN_PENDING) { 794 aiop->aio_flags &= ~AIO_WAITN_PENDING; 795 cv_signal(&aiop->aio_waitncv); 796 } 797 mutex_exit(&aiop->aio_mutex); 798 799 return (error); 800 } 801 802 /* 803 * aio_unlock_requests 804 * copyouts the result of the request as well as the return value. 805 * It builds the list of completed asynchronous requests, 806 * unlocks the allocated memory ranges and 807 * put the aio request structure back into the free list. 808 */ 809 810 static int 811 aio_unlock_requests( 812 caddr_t iocblist, 813 int iocb_index, 814 aio_req_t *reqlist, 815 aio_t *aiop, 816 model_t model) 817 { 818 aio_req_t *reqp, *nreqp; 819 820 if (model == DATAMODEL_NATIVE) { 821 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 822 (((caddr_t *)iocblist)[iocb_index++]) = 823 reqp->aio_req_iocb.iocb; 824 nreqp = reqp->aio_req_next; 825 aphysio_unlock(reqp); 826 aio_copyout_result(reqp); 827 mutex_enter(&aiop->aio_mutex); 828 aio_req_free(aiop, reqp); 829 mutex_exit(&aiop->aio_mutex); 830 } 831 } 832 #ifdef _SYSCALL32_IMPL 833 else { 834 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 835 ((caddr32_t *)iocblist)[iocb_index++] = 836 reqp->aio_req_iocb.iocb32; 837 nreqp = reqp->aio_req_next; 838 aphysio_unlock(reqp); 839 aio_copyout_result(reqp); 840 mutex_enter(&aiop->aio_mutex); 841 aio_req_free(aiop, reqp); 842 mutex_exit(&aiop->aio_mutex); 843 } 844 } 845 #endif /* _SYSCALL32_IMPL */ 846 return (iocb_index); 847 } 848 849 /* 850 * aio_reqlist_concat 851 * moves "max" elements from the done queue to the reqlist queue and removes 852 * the AIO_DONEQ flag. 853 * - reqlist queue is a simple linked list 854 * - done queue is a double linked list 855 */ 856 857 static int 858 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 859 { 860 aio_req_t *q2, *q2work, *list; 861 int count = 0; 862 863 list = *reqlist; 864 q2 = aiop->aio_doneq; 865 q2work = q2; 866 while (max-- > 0) { 867 q2work->aio_req_flags &= ~AIO_DONEQ; 868 q2work = q2work->aio_req_next; 869 count++; 870 if (q2work == q2) 871 break; 872 } 873 874 if (q2work == q2) { 875 /* all elements revised */ 876 q2->aio_req_prev->aio_req_next = list; 877 list = q2; 878 aiop->aio_doneq = NULL; 879 } else { 880 /* 881 * max < elements in the doneq 882 * detach only the required amount of elements 883 * out of the doneq 884 */ 885 q2work->aio_req_prev->aio_req_next = list; 886 list = q2; 887 888 aiop->aio_doneq = q2work; 889 q2work->aio_req_prev = q2->aio_req_prev; 890 q2->aio_req_prev->aio_req_next = q2work; 891 } 892 *reqlist = list; 893 return (count); 894 } 895 896 /*ARGSUSED*/ 897 static int 898 aiosuspend( 899 void *aiocb, 900 int nent, 901 struct timespec *timout, 902 int flag, 903 long *rval, 904 int run_mode) 905 { 906 int error; 907 aio_t *aiop; 908 aio_req_t *reqp, *found, *next; 909 caddr_t cbplist = NULL; 910 aiocb_t *cbp, **ucbp; 911 #ifdef _SYSCALL32_IMPL 912 aiocb32_t *cbp32; 913 caddr32_t *ucbp32; 914 #endif /* _SYSCALL32_IMPL */ 915 aiocb64_32_t *cbp64; 916 int rv; 917 int i; 918 size_t ssize; 919 model_t model = get_udatamodel(); 920 int blocking; 921 int timecheck; 922 timestruc_t rqtime; 923 timestruc_t *rqtp; 924 925 aiop = curproc->p_aio; 926 if (aiop == NULL || nent <= 0) 927 return (EINVAL); 928 929 /* 930 * Establish the absolute future time for the timeout. 931 */ 932 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 933 if (error) 934 return (error); 935 if (rqtp) { 936 timestruc_t now; 937 timecheck = timechanged; 938 gethrestime(&now); 939 timespecadd(rqtp, &now); 940 } 941 942 /* 943 * If we are not blocking and there's no IO complete 944 * skip aiocb copyin. 945 */ 946 if (!blocking && (aiop->aio_pollq == NULL) && 947 (aiop->aio_doneq == NULL)) { 948 return (EAGAIN); 949 } 950 951 if (model == DATAMODEL_NATIVE) 952 ssize = (sizeof (aiocb_t *) * nent); 953 #ifdef _SYSCALL32_IMPL 954 else 955 ssize = (sizeof (caddr32_t) * nent); 956 #endif /* _SYSCALL32_IMPL */ 957 958 cbplist = kmem_alloc(ssize, KM_NOSLEEP); 959 if (cbplist == NULL) 960 return (ENOMEM); 961 962 if (copyin(aiocb, cbplist, ssize)) { 963 error = EFAULT; 964 goto done; 965 } 966 967 found = NULL; 968 /* 969 * we need to get the aio_cleanupq_mutex since we call 970 * aio_req_done(). 971 */ 972 mutex_enter(&aiop->aio_cleanupq_mutex); 973 mutex_enter(&aiop->aio_mutex); 974 for (;;) { 975 /* push requests on poll queue to done queue */ 976 if (aiop->aio_pollq) { 977 mutex_exit(&aiop->aio_mutex); 978 mutex_exit(&aiop->aio_cleanupq_mutex); 979 aio_cleanup(0); 980 mutex_enter(&aiop->aio_cleanupq_mutex); 981 mutex_enter(&aiop->aio_mutex); 982 } 983 /* check for requests on done queue */ 984 if (aiop->aio_doneq) { 985 if (model == DATAMODEL_NATIVE) 986 ucbp = (aiocb_t **)cbplist; 987 #ifdef _SYSCALL32_IMPL 988 else 989 ucbp32 = (caddr32_t *)cbplist; 990 #endif /* _SYSCALL32_IMPL */ 991 for (i = 0; i < nent; i++) { 992 if (model == DATAMODEL_NATIVE) { 993 if ((cbp = *ucbp++) == NULL) 994 continue; 995 if (run_mode != AIO_LARGEFILE) 996 reqp = aio_req_done( 997 &cbp->aio_resultp); 998 else { 999 cbp64 = (aiocb64_32_t *)cbp; 1000 reqp = aio_req_done( 1001 &cbp64->aio_resultp); 1002 } 1003 } 1004 #ifdef _SYSCALL32_IMPL 1005 else { 1006 if (run_mode == AIO_32) { 1007 if ((cbp32 = 1008 (aiocb32_t *)(uintptr_t) 1009 *ucbp32++) == NULL) 1010 continue; 1011 reqp = aio_req_done( 1012 &cbp32->aio_resultp); 1013 } else if (run_mode == AIO_LARGEFILE) { 1014 if ((cbp64 = 1015 (aiocb64_32_t *)(uintptr_t) 1016 *ucbp32++) == NULL) 1017 continue; 1018 reqp = aio_req_done( 1019 &cbp64->aio_resultp); 1020 } 1021 1022 } 1023 #endif /* _SYSCALL32_IMPL */ 1024 if (reqp) { 1025 reqp->aio_req_next = found; 1026 found = reqp; 1027 } 1028 if (aiop->aio_doneq == NULL) 1029 break; 1030 } 1031 if (found) 1032 break; 1033 } 1034 if (aiop->aio_notifycnt > 0) { 1035 /* 1036 * nothing on the kernel's queue. the user 1037 * has notified the kernel that it has items 1038 * on a user-level queue. 1039 */ 1040 aiop->aio_notifycnt--; 1041 *rval = 1; 1042 error = 0; 1043 break; 1044 } 1045 /* don't block if nothing is outstanding */ 1046 if (aiop->aio_outstanding == 0) { 1047 error = EAGAIN; 1048 break; 1049 } 1050 if (blocking) { 1051 /* 1052 * drop the aio_cleanupq_mutex as we are 1053 * going to block. 1054 */ 1055 mutex_exit(&aiop->aio_cleanupq_mutex); 1056 rv = cv_waituntil_sig(&aiop->aio_waitcv, 1057 &aiop->aio_mutex, rqtp, timecheck); 1058 /* 1059 * we have to drop aio_mutex and 1060 * grab it in the right order. 1061 */ 1062 mutex_exit(&aiop->aio_mutex); 1063 mutex_enter(&aiop->aio_cleanupq_mutex); 1064 mutex_enter(&aiop->aio_mutex); 1065 if (rv > 0) /* check done queue again */ 1066 continue; 1067 if (rv == 0) /* interrupted by a signal */ 1068 error = EINTR; 1069 else /* timer expired */ 1070 error = ETIME; 1071 } else { 1072 error = EAGAIN; 1073 } 1074 break; 1075 } 1076 mutex_exit(&aiop->aio_mutex); 1077 mutex_exit(&aiop->aio_cleanupq_mutex); 1078 for (reqp = found; reqp != NULL; reqp = next) { 1079 next = reqp->aio_req_next; 1080 aphysio_unlock(reqp); 1081 aio_copyout_result(reqp); 1082 mutex_enter(&aiop->aio_mutex); 1083 aio_req_free(aiop, reqp); 1084 mutex_exit(&aiop->aio_mutex); 1085 } 1086 done: 1087 kmem_free(cbplist, ssize); 1088 return (error); 1089 } 1090 1091 /* 1092 * initialize aio by allocating an aio_t struct for this 1093 * process. 1094 */ 1095 static int 1096 aioinit(void) 1097 { 1098 proc_t *p = curproc; 1099 aio_t *aiop; 1100 mutex_enter(&p->p_lock); 1101 if ((aiop = p->p_aio) == NULL) { 1102 aiop = aio_aiop_alloc(); 1103 p->p_aio = aiop; 1104 } 1105 mutex_exit(&p->p_lock); 1106 if (aiop == NULL) 1107 return (ENOMEM); 1108 return (0); 1109 } 1110 1111 /* 1112 * start a special thread that will cleanup after aio requests 1113 * that are preventing a segment from being unmapped. as_unmap() 1114 * blocks until all phsyio to this segment is completed. this 1115 * doesn't happen until all the pages in this segment are not 1116 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 1117 * requests still outstanding. this special thread will make sure 1118 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 1119 * 1120 * this function will return an error if the process has only 1121 * one LWP. the assumption is that the caller is a separate LWP 1122 * that remains blocked in the kernel for the life of this process. 1123 */ 1124 static int 1125 aiostart(void) 1126 { 1127 proc_t *p = curproc; 1128 aio_t *aiop; 1129 int first, error = 0; 1130 1131 if (p->p_lwpcnt == 1) 1132 return (EDEADLK); 1133 mutex_enter(&p->p_lock); 1134 if ((aiop = p->p_aio) == NULL) 1135 error = EINVAL; 1136 else { 1137 first = aiop->aio_ok; 1138 if (aiop->aio_ok == 0) 1139 aiop->aio_ok = 1; 1140 } 1141 mutex_exit(&p->p_lock); 1142 if (error == 0 && first == 0) { 1143 return (aio_cleanup_thread(aiop)); 1144 /* should return only to exit */ 1145 } 1146 return (error); 1147 } 1148 1149 /* 1150 * Associate an aiocb with a port. 1151 * This function is used by aiorw() to associate a transaction with a port. 1152 * Allocate an event port structure (port_alloc_event()) and store the 1153 * delivered user pointer (portnfy_user) in the portkev_user field of the 1154 * port_kevent_t structure.. 1155 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1156 * the port association. 1157 */ 1158 1159 static int 1160 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 1161 aio_req_t *reqp, int event) 1162 { 1163 port_kevent_t *pkevp = NULL; 1164 int error; 1165 1166 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 1167 PORT_SOURCE_AIO, &pkevp); 1168 if (error) { 1169 if ((error == ENOMEM) || (error == EAGAIN)) 1170 error = EAGAIN; 1171 else 1172 error = EINVAL; 1173 } else { 1174 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 1175 aio_port_callback, reqp); 1176 pkevp->portkev_events = event; 1177 reqp->aio_req_portkev = pkevp; 1178 reqp->aio_req_port = pntfy->portnfy_port; 1179 } 1180 return (error); 1181 } 1182 1183 #ifdef _LP64 1184 1185 /* 1186 * Asynchronous list IO. A chain of aiocb's are copied in 1187 * one at a time. If the aiocb is invalid, it is skipped. 1188 * For each aiocb, the appropriate driver entry point is 1189 * called. Optimize for the common case where the list 1190 * of requests is to the same file descriptor. 1191 * 1192 * One possible optimization is to define a new driver entry 1193 * point that supports a list of IO requests. Whether this 1194 * improves performance depends somewhat on the driver's 1195 * locking strategy. Processing a list could adversely impact 1196 * the driver's interrupt latency. 1197 */ 1198 static int 1199 alio( 1200 int mode_arg, 1201 aiocb_t **aiocb_arg, 1202 int nent, 1203 struct sigevent *sigev) 1204 { 1205 file_t *fp; 1206 file_t *prev_fp = NULL; 1207 int prev_mode = -1; 1208 struct vnode *vp; 1209 aio_lio_t *head; 1210 aio_req_t *reqp; 1211 aio_t *aiop; 1212 caddr_t cbplist; 1213 aiocb_t cb; 1214 aiocb_t *aiocb = &cb; 1215 aiocb_t *cbp; 1216 aiocb_t **ucbp; 1217 struct sigevent sigevk; 1218 sigqueue_t *sqp; 1219 int (*aio_func)(); 1220 int mode; 1221 int error = 0; 1222 int aio_errors = 0; 1223 int i; 1224 size_t ssize; 1225 int deadhead = 0; 1226 int aio_notsupported = 0; 1227 int lio_head_port; 1228 int aio_port; 1229 int aio_thread; 1230 port_kevent_t *pkevtp = NULL; 1231 port_notify_t pnotify; 1232 int event; 1233 1234 aiop = curproc->p_aio; 1235 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1236 return (EINVAL); 1237 1238 ssize = (sizeof (aiocb_t *) * nent); 1239 cbplist = kmem_alloc(ssize, KM_SLEEP); 1240 ucbp = (aiocb_t **)cbplist; 1241 1242 if (copyin(aiocb_arg, cbplist, ssize) || 1243 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 1244 kmem_free(cbplist, ssize); 1245 return (EFAULT); 1246 } 1247 1248 /* Event Ports */ 1249 if (sigev && 1250 (sigevk.sigev_notify == SIGEV_THREAD || 1251 sigevk.sigev_notify == SIGEV_PORT)) { 1252 if (sigevk.sigev_notify == SIGEV_THREAD) { 1253 pnotify.portnfy_port = sigevk.sigev_signo; 1254 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 1255 } else if (copyin(sigevk.sigev_value.sival_ptr, 1256 &pnotify, sizeof (pnotify))) { 1257 kmem_free(cbplist, ssize); 1258 return (EFAULT); 1259 } 1260 error = port_alloc_event(pnotify.portnfy_port, 1261 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 1262 if (error) { 1263 if (error == ENOMEM || error == EAGAIN) 1264 error = EAGAIN; 1265 else 1266 error = EINVAL; 1267 kmem_free(cbplist, ssize); 1268 return (error); 1269 } 1270 lio_head_port = pnotify.portnfy_port; 1271 } 1272 1273 /* 1274 * a list head should be allocated if notification is 1275 * enabled for this list. 1276 */ 1277 head = NULL; 1278 1279 if (mode_arg == LIO_WAIT || sigev) { 1280 mutex_enter(&aiop->aio_mutex); 1281 error = aio_lio_alloc(&head); 1282 mutex_exit(&aiop->aio_mutex); 1283 if (error) 1284 goto done; 1285 deadhead = 1; 1286 head->lio_nent = nent; 1287 head->lio_refcnt = nent; 1288 head->lio_port = -1; 1289 head->lio_portkev = NULL; 1290 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 1291 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 1292 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 1293 if (sqp == NULL) { 1294 error = EAGAIN; 1295 goto done; 1296 } 1297 sqp->sq_func = NULL; 1298 sqp->sq_next = NULL; 1299 sqp->sq_info.si_code = SI_ASYNCIO; 1300 sqp->sq_info.si_pid = curproc->p_pid; 1301 sqp->sq_info.si_ctid = PRCTID(curproc); 1302 sqp->sq_info.si_zoneid = getzoneid(); 1303 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 1304 sqp->sq_info.si_signo = sigevk.sigev_signo; 1305 sqp->sq_info.si_value = sigevk.sigev_value; 1306 head->lio_sigqp = sqp; 1307 } else { 1308 head->lio_sigqp = NULL; 1309 } 1310 if (pkevtp) { 1311 /* 1312 * Prepare data to send when list of aiocb's 1313 * has completed. 1314 */ 1315 port_init_event(pkevtp, (uintptr_t)sigev, 1316 (void *)(uintptr_t)pnotify.portnfy_user, 1317 NULL, head); 1318 pkevtp->portkev_events = AIOLIO; 1319 head->lio_portkev = pkevtp; 1320 head->lio_port = pnotify.portnfy_port; 1321 } 1322 } 1323 1324 for (i = 0; i < nent; i++, ucbp++) { 1325 1326 cbp = *ucbp; 1327 /* skip entry if it can't be copied. */ 1328 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 1329 if (head) { 1330 mutex_enter(&aiop->aio_mutex); 1331 head->lio_nent--; 1332 head->lio_refcnt--; 1333 mutex_exit(&aiop->aio_mutex); 1334 } 1335 continue; 1336 } 1337 1338 /* skip if opcode for aiocb is LIO_NOP */ 1339 mode = aiocb->aio_lio_opcode; 1340 if (mode == LIO_NOP) { 1341 cbp = NULL; 1342 if (head) { 1343 mutex_enter(&aiop->aio_mutex); 1344 head->lio_nent--; 1345 head->lio_refcnt--; 1346 mutex_exit(&aiop->aio_mutex); 1347 } 1348 continue; 1349 } 1350 1351 /* increment file descriptor's ref count. */ 1352 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 1353 lio_set_uerror(&cbp->aio_resultp, EBADF); 1354 if (head) { 1355 mutex_enter(&aiop->aio_mutex); 1356 head->lio_nent--; 1357 head->lio_refcnt--; 1358 mutex_exit(&aiop->aio_mutex); 1359 } 1360 aio_errors++; 1361 continue; 1362 } 1363 1364 /* 1365 * check the permission of the partition 1366 */ 1367 if ((fp->f_flag & mode) == 0) { 1368 releasef(aiocb->aio_fildes); 1369 lio_set_uerror(&cbp->aio_resultp, EBADF); 1370 if (head) { 1371 mutex_enter(&aiop->aio_mutex); 1372 head->lio_nent--; 1373 head->lio_refcnt--; 1374 mutex_exit(&aiop->aio_mutex); 1375 } 1376 aio_errors++; 1377 continue; 1378 } 1379 1380 /* 1381 * common case where requests are to the same fd 1382 * for the same r/w operation. 1383 * for UFS, need to set EBADFD 1384 */ 1385 vp = fp->f_vnode; 1386 if (fp != prev_fp || mode != prev_mode) { 1387 aio_func = check_vp(vp, mode); 1388 if (aio_func == NULL) { 1389 prev_fp = NULL; 1390 releasef(aiocb->aio_fildes); 1391 lio_set_uerror(&cbp->aio_resultp, EBADFD); 1392 aio_notsupported++; 1393 if (head) { 1394 mutex_enter(&aiop->aio_mutex); 1395 head->lio_nent--; 1396 head->lio_refcnt--; 1397 mutex_exit(&aiop->aio_mutex); 1398 } 1399 continue; 1400 } else { 1401 prev_fp = fp; 1402 prev_mode = mode; 1403 } 1404 } 1405 1406 error = aio_req_setup(&reqp, aiop, aiocb, 1407 &cbp->aio_resultp, vp); 1408 if (error) { 1409 releasef(aiocb->aio_fildes); 1410 lio_set_uerror(&cbp->aio_resultp, error); 1411 if (head) { 1412 mutex_enter(&aiop->aio_mutex); 1413 head->lio_nent--; 1414 head->lio_refcnt--; 1415 mutex_exit(&aiop->aio_mutex); 1416 } 1417 aio_errors++; 1418 continue; 1419 } 1420 1421 reqp->aio_req_lio = head; 1422 deadhead = 0; 1423 1424 /* 1425 * Set the errno field now before sending the request to 1426 * the driver to avoid a race condition 1427 */ 1428 (void) suword32(&cbp->aio_resultp.aio_errno, 1429 EINPROGRESS); 1430 1431 reqp->aio_req_iocb.iocb = (caddr_t)cbp; 1432 1433 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 1434 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 1435 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 1436 if (aio_port | aio_thread) { 1437 port_kevent_t *lpkevp; 1438 /* 1439 * Prepare data to send with each aiocb completed. 1440 */ 1441 if (aio_port) { 1442 void *paddr = 1443 aiocb->aio_sigevent.sigev_value.sival_ptr; 1444 if (copyin(paddr, &pnotify, sizeof (pnotify))) 1445 error = EFAULT; 1446 } else { /* aio_thread */ 1447 pnotify.portnfy_port = 1448 aiocb->aio_sigevent.sigev_signo; 1449 pnotify.portnfy_user = 1450 aiocb->aio_sigevent.sigev_value.sival_ptr; 1451 } 1452 if (error) 1453 /* EMPTY */; 1454 else if (pkevtp != NULL && 1455 pnotify.portnfy_port == lio_head_port) 1456 error = port_dup_event(pkevtp, &lpkevp, 1457 PORT_ALLOC_DEFAULT); 1458 else 1459 error = port_alloc_event(pnotify.portnfy_port, 1460 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 1461 &lpkevp); 1462 if (error == 0) { 1463 port_init_event(lpkevp, (uintptr_t)cbp, 1464 (void *)(uintptr_t)pnotify.portnfy_user, 1465 aio_port_callback, reqp); 1466 lpkevp->portkev_events = event; 1467 reqp->aio_req_portkev = lpkevp; 1468 reqp->aio_req_port = pnotify.portnfy_port; 1469 } 1470 } 1471 1472 /* 1473 * send the request to driver. 1474 */ 1475 if (error == 0) { 1476 if (aiocb->aio_nbytes == 0) { 1477 clear_active_fd(aiocb->aio_fildes); 1478 aio_zerolen(reqp); 1479 continue; 1480 } 1481 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 1482 CRED()); 1483 } 1484 1485 /* 1486 * the fd's ref count is not decremented until the IO has 1487 * completed unless there was an error. 1488 */ 1489 if (error) { 1490 releasef(aiocb->aio_fildes); 1491 lio_set_uerror(&cbp->aio_resultp, error); 1492 if (head) { 1493 mutex_enter(&aiop->aio_mutex); 1494 head->lio_nent--; 1495 head->lio_refcnt--; 1496 mutex_exit(&aiop->aio_mutex); 1497 } 1498 if (error == ENOTSUP) 1499 aio_notsupported++; 1500 else 1501 aio_errors++; 1502 lio_set_error(reqp); 1503 } else { 1504 clear_active_fd(aiocb->aio_fildes); 1505 } 1506 } 1507 1508 if (aio_notsupported) { 1509 error = ENOTSUP; 1510 } else if (aio_errors) { 1511 /* 1512 * return EIO if any request failed 1513 */ 1514 error = EIO; 1515 } 1516 1517 if (mode_arg == LIO_WAIT) { 1518 mutex_enter(&aiop->aio_mutex); 1519 while (head->lio_refcnt > 0) { 1520 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1521 mutex_exit(&aiop->aio_mutex); 1522 error = EINTR; 1523 goto done; 1524 } 1525 } 1526 mutex_exit(&aiop->aio_mutex); 1527 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 1528 } 1529 1530 done: 1531 kmem_free(cbplist, ssize); 1532 if (deadhead) { 1533 if (head->lio_sigqp) 1534 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 1535 if (head->lio_portkev) 1536 port_free_event(head->lio_portkev); 1537 kmem_free(head, sizeof (aio_lio_t)); 1538 } 1539 return (error); 1540 } 1541 1542 #endif /* _LP64 */ 1543 1544 /* 1545 * Asynchronous list IO. 1546 * If list I/O is called with LIO_WAIT it can still return 1547 * before all the I/O's are completed if a signal is caught 1548 * or if the list include UFS I/O requests. If this happens, 1549 * libaio will call aliowait() to wait for the I/O's to 1550 * complete 1551 */ 1552 /*ARGSUSED*/ 1553 static int 1554 aliowait( 1555 int mode, 1556 void *aiocb, 1557 int nent, 1558 void *sigev, 1559 int run_mode) 1560 { 1561 aio_lio_t *head; 1562 aio_t *aiop; 1563 caddr_t cbplist; 1564 aiocb_t *cbp, **ucbp; 1565 #ifdef _SYSCALL32_IMPL 1566 aiocb32_t *cbp32; 1567 caddr32_t *ucbp32; 1568 aiocb64_32_t *cbp64; 1569 #endif 1570 int error = 0; 1571 int i; 1572 size_t ssize = 0; 1573 model_t model = get_udatamodel(); 1574 1575 aiop = curproc->p_aio; 1576 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1577 return (EINVAL); 1578 1579 if (model == DATAMODEL_NATIVE) 1580 ssize = (sizeof (aiocb_t *) * nent); 1581 #ifdef _SYSCALL32_IMPL 1582 else 1583 ssize = (sizeof (caddr32_t) * nent); 1584 #endif /* _SYSCALL32_IMPL */ 1585 1586 if (ssize == 0) 1587 return (EINVAL); 1588 1589 cbplist = kmem_alloc(ssize, KM_SLEEP); 1590 1591 if (model == DATAMODEL_NATIVE) 1592 ucbp = (aiocb_t **)cbplist; 1593 #ifdef _SYSCALL32_IMPL 1594 else 1595 ucbp32 = (caddr32_t *)cbplist; 1596 #endif /* _SYSCALL32_IMPL */ 1597 1598 if (copyin(aiocb, cbplist, ssize)) { 1599 error = EFAULT; 1600 goto done; 1601 } 1602 1603 /* 1604 * To find the list head, we go through the 1605 * list of aiocb structs, find the request 1606 * its for, then get the list head that reqp 1607 * points to 1608 */ 1609 head = NULL; 1610 1611 for (i = 0; i < nent; i++) { 1612 if (model == DATAMODEL_NATIVE) { 1613 /* 1614 * Since we are only checking for a NULL pointer 1615 * Following should work on both native data sizes 1616 * as well as for largefile aiocb. 1617 */ 1618 if ((cbp = *ucbp++) == NULL) 1619 continue; 1620 if (run_mode != AIO_LARGEFILE) 1621 if (head = aio_list_get(&cbp->aio_resultp)) 1622 break; 1623 else { 1624 /* 1625 * This is a case when largefile call is 1626 * made on 32 bit kernel. 1627 * Treat each pointer as pointer to 1628 * aiocb64_32 1629 */ 1630 if (head = aio_list_get((aio_result_t *) 1631 &(((aiocb64_32_t *)cbp)->aio_resultp))) 1632 break; 1633 } 1634 } 1635 #ifdef _SYSCALL32_IMPL 1636 else { 1637 if (run_mode == AIO_LARGEFILE) { 1638 if ((cbp64 = (aiocb64_32_t *) 1639 (uintptr_t)*ucbp32++) == NULL) 1640 continue; 1641 if (head = aio_list_get((aio_result_t *) 1642 &cbp64->aio_resultp)) 1643 break; 1644 } else if (run_mode == AIO_32) { 1645 if ((cbp32 = (aiocb32_t *) 1646 (uintptr_t)*ucbp32++) == NULL) 1647 continue; 1648 if (head = aio_list_get((aio_result_t *) 1649 &cbp32->aio_resultp)) 1650 break; 1651 } 1652 } 1653 #endif /* _SYSCALL32_IMPL */ 1654 } 1655 1656 if (head == NULL) { 1657 error = EINVAL; 1658 goto done; 1659 } 1660 1661 mutex_enter(&aiop->aio_mutex); 1662 while (head->lio_refcnt > 0) { 1663 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1664 mutex_exit(&aiop->aio_mutex); 1665 error = EINTR; 1666 goto done; 1667 } 1668 } 1669 mutex_exit(&aiop->aio_mutex); 1670 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 1671 done: 1672 kmem_free(cbplist, ssize); 1673 return (error); 1674 } 1675 1676 aio_lio_t * 1677 aio_list_get(aio_result_t *resultp) 1678 { 1679 aio_lio_t *head = NULL; 1680 aio_t *aiop; 1681 aio_req_t **bucket; 1682 aio_req_t *reqp; 1683 long index; 1684 1685 aiop = curproc->p_aio; 1686 if (aiop == NULL) 1687 return (NULL); 1688 1689 if (resultp) { 1690 index = AIO_HASH(resultp); 1691 bucket = &aiop->aio_hash[index]; 1692 for (reqp = *bucket; reqp != NULL; 1693 reqp = reqp->aio_hash_next) { 1694 if (reqp->aio_req_resultp == resultp) { 1695 head = reqp->aio_req_lio; 1696 return (head); 1697 } 1698 } 1699 } 1700 return (NULL); 1701 } 1702 1703 1704 static void 1705 lio_set_uerror(void *resultp, int error) 1706 { 1707 /* 1708 * the resultp field is a pointer to where the 1709 * error should be written out to the user's 1710 * aiocb. 1711 * 1712 */ 1713 if (get_udatamodel() == DATAMODEL_NATIVE) { 1714 (void) sulword(&((aio_result_t *)resultp)->aio_return, 1715 (ssize_t)-1); 1716 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1717 } 1718 #ifdef _SYSCALL32_IMPL 1719 else { 1720 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1721 (uint_t)-1); 1722 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1723 } 1724 #endif /* _SYSCALL32_IMPL */ 1725 } 1726 1727 /* 1728 * do cleanup completion for all requests in list. memory for 1729 * each request is also freed. 1730 */ 1731 static void 1732 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 1733 { 1734 int i; 1735 aio_req_t *reqp; 1736 aio_result_t *resultp; 1737 aiocb64_32_t *aiocb_64; 1738 1739 for (i = 0; i < nent; i++) { 1740 if (get_udatamodel() == DATAMODEL_NATIVE) { 1741 if (cbp[i] == NULL) 1742 continue; 1743 if (run_mode == AIO_LARGEFILE) { 1744 aiocb_64 = (aiocb64_32_t *)cbp[i]; 1745 resultp = (aio_result_t *) 1746 &aiocb_64->aio_resultp; 1747 } else 1748 resultp = &cbp[i]->aio_resultp; 1749 } 1750 #ifdef _SYSCALL32_IMPL 1751 else { 1752 aiocb32_t *aiocb_32; 1753 caddr32_t *cbp32; 1754 1755 cbp32 = (caddr32_t *)cbp; 1756 if (cbp32[i] == NULL) 1757 continue; 1758 if (run_mode == AIO_32) { 1759 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 1760 resultp = (aio_result_t *)&aiocb_32-> 1761 aio_resultp; 1762 } else if (run_mode == AIO_LARGEFILE) { 1763 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 1764 resultp = (aio_result_t *)&aiocb_64-> 1765 aio_resultp; 1766 } 1767 } 1768 #endif /* _SYSCALL32_IMPL */ 1769 /* 1770 * we need to get the aio_cleanupq_mutex since we call 1771 * aio_req_done(). 1772 */ 1773 mutex_enter(&aiop->aio_cleanupq_mutex); 1774 mutex_enter(&aiop->aio_mutex); 1775 reqp = aio_req_done(resultp); 1776 mutex_exit(&aiop->aio_mutex); 1777 mutex_exit(&aiop->aio_cleanupq_mutex); 1778 if (reqp != NULL) { 1779 aphysio_unlock(reqp); 1780 aio_copyout_result(reqp); 1781 mutex_enter(&aiop->aio_mutex); 1782 aio_req_free(aiop, reqp); 1783 mutex_exit(&aiop->aio_mutex); 1784 } 1785 } 1786 } 1787 1788 /* 1789 * Write out the results for an aio request that is done. 1790 */ 1791 static int 1792 aioerror(void *cb, int run_mode) 1793 { 1794 aio_result_t *resultp; 1795 aio_t *aiop; 1796 aio_req_t *reqp; 1797 int retval; 1798 1799 aiop = curproc->p_aio; 1800 if (aiop == NULL || cb == NULL) 1801 return (EINVAL); 1802 1803 if (get_udatamodel() == DATAMODEL_NATIVE) { 1804 if (run_mode == AIO_LARGEFILE) 1805 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1806 aio_resultp; 1807 else 1808 resultp = &((aiocb_t *)cb)->aio_resultp; 1809 } 1810 #ifdef _SYSCALL32_IMPL 1811 else { 1812 if (run_mode == AIO_LARGEFILE) 1813 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1814 aio_resultp; 1815 else if (run_mode == AIO_32) 1816 resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 1817 aio_resultp; 1818 } 1819 #endif /* _SYSCALL32_IMPL */ 1820 /* 1821 * we need to get the aio_cleanupq_mutex since we call 1822 * aio_req_find(). 1823 */ 1824 mutex_enter(&aiop->aio_cleanupq_mutex); 1825 mutex_enter(&aiop->aio_mutex); 1826 retval = aio_req_find(resultp, &reqp); 1827 mutex_exit(&aiop->aio_mutex); 1828 mutex_exit(&aiop->aio_cleanupq_mutex); 1829 if (retval == 0) { 1830 aphysio_unlock(reqp); 1831 aio_copyout_result(reqp); 1832 mutex_enter(&aiop->aio_mutex); 1833 aio_req_free(aiop, reqp); 1834 mutex_exit(&aiop->aio_mutex); 1835 return (0); 1836 } else if (retval == 1) 1837 return (EINPROGRESS); 1838 else if (retval == 2) 1839 return (EINVAL); 1840 return (0); 1841 } 1842 1843 /* 1844 * aio_cancel - if no requests outstanding, 1845 * return AIO_ALLDONE 1846 * else 1847 * return AIO_NOTCANCELED 1848 */ 1849 static int 1850 aio_cancel( 1851 int fildes, 1852 void *cb, 1853 long *rval, 1854 int run_mode) 1855 { 1856 aio_t *aiop; 1857 void *resultp; 1858 int index; 1859 aio_req_t **bucket; 1860 aio_req_t *ent; 1861 1862 1863 /* 1864 * Verify valid file descriptor 1865 */ 1866 if ((getf(fildes)) == NULL) { 1867 return (EBADF); 1868 } 1869 releasef(fildes); 1870 1871 aiop = curproc->p_aio; 1872 if (aiop == NULL) 1873 return (EINVAL); 1874 1875 if (aiop->aio_outstanding == 0) { 1876 *rval = AIO_ALLDONE; 1877 return (0); 1878 } 1879 1880 mutex_enter(&aiop->aio_mutex); 1881 if (cb != NULL) { 1882 if (get_udatamodel() == DATAMODEL_NATIVE) { 1883 if (run_mode == AIO_LARGEFILE) 1884 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1885 ->aio_resultp; 1886 else 1887 resultp = &((aiocb_t *)cb)->aio_resultp; 1888 } 1889 #ifdef _SYSCALL32_IMPL 1890 else { 1891 if (run_mode == AIO_LARGEFILE) 1892 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1893 ->aio_resultp; 1894 else if (run_mode == AIO_32) 1895 resultp = (aio_result_t *)&((aiocb32_t *)cb) 1896 ->aio_resultp; 1897 } 1898 #endif /* _SYSCALL32_IMPL */ 1899 index = AIO_HASH(resultp); 1900 bucket = &aiop->aio_hash[index]; 1901 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1902 if (ent->aio_req_resultp == resultp) { 1903 if ((ent->aio_req_flags & AIO_PENDING) == 0) { 1904 mutex_exit(&aiop->aio_mutex); 1905 *rval = AIO_ALLDONE; 1906 return (0); 1907 } 1908 mutex_exit(&aiop->aio_mutex); 1909 *rval = AIO_NOTCANCELED; 1910 return (0); 1911 } 1912 } 1913 mutex_exit(&aiop->aio_mutex); 1914 *rval = AIO_ALLDONE; 1915 return (0); 1916 } 1917 1918 for (index = 0; index < AIO_HASHSZ; index++) { 1919 bucket = &aiop->aio_hash[index]; 1920 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1921 if (ent->aio_req_fd == fildes) { 1922 if ((ent->aio_req_flags & AIO_PENDING) != 0) { 1923 mutex_exit(&aiop->aio_mutex); 1924 *rval = AIO_NOTCANCELED; 1925 return (0); 1926 } 1927 } 1928 } 1929 } 1930 mutex_exit(&aiop->aio_mutex); 1931 *rval = AIO_ALLDONE; 1932 return (0); 1933 } 1934 1935 /* 1936 * solaris version of asynchronous read and write 1937 */ 1938 static int 1939 arw( 1940 int opcode, 1941 int fdes, 1942 char *bufp, 1943 int bufsize, 1944 offset_t offset, 1945 aio_result_t *resultp, 1946 int mode) 1947 { 1948 file_t *fp; 1949 int error; 1950 struct vnode *vp; 1951 aio_req_t *reqp; 1952 aio_t *aiop; 1953 int (*aio_func)(); 1954 #ifdef _LP64 1955 aiocb_t aiocb; 1956 #else 1957 aiocb64_32_t aiocb64; 1958 #endif 1959 1960 aiop = curproc->p_aio; 1961 if (aiop == NULL) 1962 return (EINVAL); 1963 1964 if ((fp = getf(fdes)) == NULL) { 1965 return (EBADF); 1966 } 1967 1968 /* 1969 * check the permission of the partition 1970 */ 1971 if ((fp->f_flag & mode) == 0) { 1972 releasef(fdes); 1973 return (EBADF); 1974 } 1975 1976 vp = fp->f_vnode; 1977 aio_func = check_vp(vp, mode); 1978 if (aio_func == NULL) { 1979 releasef(fdes); 1980 return (EBADFD); 1981 } 1982 #ifdef _LP64 1983 aiocb.aio_fildes = fdes; 1984 aiocb.aio_buf = bufp; 1985 aiocb.aio_nbytes = bufsize; 1986 aiocb.aio_offset = offset; 1987 aiocb.aio_sigevent.sigev_notify = 0; 1988 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 1989 #else 1990 aiocb64.aio_fildes = fdes; 1991 aiocb64.aio_buf = (caddr32_t)bufp; 1992 aiocb64.aio_nbytes = bufsize; 1993 aiocb64.aio_offset = offset; 1994 aiocb64.aio_sigevent.sigev_notify = 0; 1995 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 1996 #endif 1997 if (error) { 1998 releasef(fdes); 1999 return (error); 2000 } 2001 2002 /* 2003 * enable polling on this request if the opcode has 2004 * the AIO poll bit set 2005 */ 2006 if (opcode & AIO_POLL_BIT) 2007 reqp->aio_req_flags |= AIO_POLL; 2008 2009 if (bufsize == 0) { 2010 clear_active_fd(fdes); 2011 aio_zerolen(reqp); 2012 return (0); 2013 } 2014 /* 2015 * send the request to driver. 2016 */ 2017 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2018 /* 2019 * the fd is stored in the aio_req_t by aio_req_setup(), and 2020 * is released by the aio_cleanup_thread() when the IO has 2021 * completed. 2022 */ 2023 if (error) { 2024 releasef(fdes); 2025 mutex_enter(&aiop->aio_mutex); 2026 aio_req_free(aiop, reqp); 2027 aiop->aio_pending--; 2028 if (aiop->aio_flags & AIO_REQ_BLOCK) 2029 cv_signal(&aiop->aio_cleanupcv); 2030 mutex_exit(&aiop->aio_mutex); 2031 return (error); 2032 } 2033 clear_active_fd(fdes); 2034 return (0); 2035 } 2036 2037 /* 2038 * posix version of asynchronous read and write 2039 */ 2040 static int 2041 aiorw( 2042 int opcode, 2043 void *aiocb_arg, 2044 int mode, 2045 int run_mode) 2046 { 2047 #ifdef _SYSCALL32_IMPL 2048 aiocb32_t aiocb32; 2049 struct sigevent32 *sigev32; 2050 port_notify32_t pntfy32; 2051 #endif 2052 aiocb64_32_t aiocb64; 2053 aiocb_t aiocb; 2054 file_t *fp; 2055 int error, fd; 2056 size_t bufsize; 2057 struct vnode *vp; 2058 aio_req_t *reqp; 2059 aio_t *aiop; 2060 int (*aio_func)(); 2061 aio_result_t *resultp; 2062 struct sigevent *sigev; 2063 model_t model; 2064 int aio_use_port = 0; 2065 port_notify_t pntfy; 2066 2067 model = get_udatamodel(); 2068 aiop = curproc->p_aio; 2069 if (aiop == NULL) 2070 return (EINVAL); 2071 2072 if (model == DATAMODEL_NATIVE) { 2073 if (run_mode != AIO_LARGEFILE) { 2074 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 2075 return (EFAULT); 2076 bufsize = aiocb.aio_nbytes; 2077 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 2078 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 2079 return (EBADF); 2080 } 2081 sigev = &aiocb.aio_sigevent; 2082 } else { 2083 /* 2084 * We come here only when we make largefile 2085 * call on 32 bit kernel using 32 bit library. 2086 */ 2087 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2088 return (EFAULT); 2089 bufsize = aiocb64.aio_nbytes; 2090 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2091 ->aio_resultp); 2092 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2093 return (EBADF); 2094 sigev = (struct sigevent *)&aiocb64.aio_sigevent; 2095 } 2096 2097 if (sigev->sigev_notify == SIGEV_PORT) { 2098 if (copyin((void *)sigev->sigev_value.sival_ptr, 2099 &pntfy, sizeof (port_notify_t))) { 2100 releasef(fd); 2101 return (EFAULT); 2102 } 2103 aio_use_port = 1; 2104 } else if (sigev->sigev_notify == SIGEV_THREAD) { 2105 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 2106 pntfy.portnfy_user = 2107 aiocb.aio_sigevent.sigev_value.sival_ptr; 2108 aio_use_port = 1; 2109 } 2110 } 2111 #ifdef _SYSCALL32_IMPL 2112 else { 2113 if (run_mode == AIO_32) { 2114 /* 32 bit system call is being made on 64 bit kernel */ 2115 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 2116 return (EFAULT); 2117 2118 bufsize = aiocb32.aio_nbytes; 2119 aiocb_32ton(&aiocb32, &aiocb); 2120 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 2121 aio_resultp); 2122 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 2123 return (EBADF); 2124 } 2125 sigev32 = &aiocb32.aio_sigevent; 2126 } else if (run_mode == AIO_LARGEFILE) { 2127 /* 2128 * We come here only when we make largefile 2129 * call on 64 bit kernel using 32 bit library. 2130 */ 2131 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2132 return (EFAULT); 2133 bufsize = aiocb64.aio_nbytes; 2134 aiocb_LFton(&aiocb64, &aiocb); 2135 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2136 ->aio_resultp); 2137 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2138 return (EBADF); 2139 sigev32 = &aiocb64.aio_sigevent; 2140 } 2141 2142 if (sigev32->sigev_notify == SIGEV_PORT) { 2143 if (copyin( 2144 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 2145 &pntfy32, sizeof (port_notify32_t))) { 2146 releasef(fd); 2147 return (EFAULT); 2148 } 2149 pntfy.portnfy_port = pntfy32.portnfy_port; 2150 pntfy.portnfy_user = (void *)(uintptr_t) 2151 pntfy32.portnfy_user; 2152 aio_use_port = 1; 2153 } else if (sigev32->sigev_notify == SIGEV_THREAD) { 2154 pntfy.portnfy_port = sigev32->sigev_signo; 2155 pntfy.portnfy_user = (void *)(uintptr_t) 2156 sigev32->sigev_value.sival_ptr; 2157 aio_use_port = 1; 2158 } 2159 } 2160 #endif /* _SYSCALL32_IMPL */ 2161 2162 /* 2163 * check the permission of the partition 2164 */ 2165 2166 if ((fp->f_flag & mode) == 0) { 2167 releasef(fd); 2168 return (EBADF); 2169 } 2170 2171 vp = fp->f_vnode; 2172 aio_func = check_vp(vp, mode); 2173 if (aio_func == NULL) { 2174 releasef(fd); 2175 return (EBADFD); 2176 } 2177 if (run_mode == AIO_LARGEFILE) 2178 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 2179 else 2180 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 2181 2182 if (error) { 2183 releasef(fd); 2184 return (error); 2185 } 2186 /* 2187 * enable polling on this request if the opcode has 2188 * the AIO poll bit set 2189 */ 2190 if (opcode & AIO_POLL_BIT) 2191 reqp->aio_req_flags |= AIO_POLL; 2192 2193 if (model == DATAMODEL_NATIVE) 2194 reqp->aio_req_iocb.iocb = aiocb_arg; 2195 #ifdef _SYSCALL32_IMPL 2196 else 2197 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 2198 #endif 2199 2200 if (aio_use_port) { 2201 int event = (run_mode == AIO_LARGEFILE)? 2202 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 2203 ((mode == FREAD)? AIOAREAD : AIOAWRITE); 2204 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 2205 } 2206 2207 /* 2208 * send the request to driver. 2209 */ 2210 if (error == 0) { 2211 if (bufsize == 0) { 2212 clear_active_fd(fd); 2213 aio_zerolen(reqp); 2214 return (0); 2215 } 2216 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2217 } 2218 2219 /* 2220 * the fd is stored in the aio_req_t by aio_req_setup(), and 2221 * is released by the aio_cleanup_thread() when the IO has 2222 * completed. 2223 */ 2224 if (error) { 2225 releasef(fd); 2226 mutex_enter(&aiop->aio_mutex); 2227 aio_deq(&aiop->aio_portpending, reqp); 2228 aio_req_free(aiop, reqp); 2229 aiop->aio_pending--; 2230 if (aiop->aio_flags & AIO_REQ_BLOCK) 2231 cv_signal(&aiop->aio_cleanupcv); 2232 mutex_exit(&aiop->aio_mutex); 2233 return (error); 2234 } 2235 clear_active_fd(fd); 2236 return (0); 2237 } 2238 2239 2240 /* 2241 * set error for a list IO entry that failed. 2242 */ 2243 static void 2244 lio_set_error(aio_req_t *reqp) 2245 { 2246 aio_t *aiop = curproc->p_aio; 2247 2248 if (aiop == NULL) 2249 return; 2250 2251 mutex_enter(&aiop->aio_mutex); 2252 aio_deq(&aiop->aio_portpending, reqp); 2253 aiop->aio_pending--; 2254 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 2255 reqp->aio_req_flags |= AIO_PHYSIODONE; 2256 /* 2257 * Need to free the request now as its never 2258 * going to get on the done queue 2259 * 2260 * Note: aio_outstanding is decremented in 2261 * aio_req_free() 2262 */ 2263 aio_req_free(aiop, reqp); 2264 if (aiop->aio_flags & AIO_REQ_BLOCK) 2265 cv_signal(&aiop->aio_cleanupcv); 2266 mutex_exit(&aiop->aio_mutex); 2267 } 2268 2269 /* 2270 * check if a specified request is done, and remove it from 2271 * the done queue. otherwise remove anybody from the done queue 2272 * if NULL is specified. 2273 */ 2274 static aio_req_t * 2275 aio_req_done(void *resultp) 2276 { 2277 aio_req_t **bucket; 2278 aio_req_t *ent; 2279 aio_t *aiop = curproc->p_aio; 2280 long index; 2281 2282 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2283 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2284 2285 if (resultp) { 2286 index = AIO_HASH(resultp); 2287 bucket = &aiop->aio_hash[index]; 2288 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2289 if (ent->aio_req_resultp == (aio_result_t *)resultp) { 2290 if (ent->aio_req_flags & AIO_DONEQ) { 2291 return (aio_req_remove(ent)); 2292 } 2293 return (NULL); 2294 } 2295 } 2296 /* no match, resultp is invalid */ 2297 return (NULL); 2298 } 2299 return (aio_req_remove(NULL)); 2300 } 2301 2302 /* 2303 * determine if a user-level resultp pointer is associated with an 2304 * active IO request. Zero is returned when the request is done, 2305 * and the request is removed from the done queue. Only when the 2306 * return value is zero, is the "reqp" pointer valid. One is returned 2307 * when the request is inprogress. Two is returned when the request 2308 * is invalid. 2309 */ 2310 static int 2311 aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 2312 { 2313 aio_req_t **bucket; 2314 aio_req_t *ent; 2315 aio_t *aiop = curproc->p_aio; 2316 long index; 2317 2318 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2319 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2320 2321 index = AIO_HASH(resultp); 2322 bucket = &aiop->aio_hash[index]; 2323 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2324 if (ent->aio_req_resultp == resultp) { 2325 if (ent->aio_req_flags & AIO_DONEQ) { 2326 *reqp = aio_req_remove(ent); 2327 return (0); 2328 } 2329 return (1); 2330 } 2331 } 2332 /* no match, resultp is invalid */ 2333 return (2); 2334 } 2335 2336 /* 2337 * remove a request from the done queue. 2338 */ 2339 static aio_req_t * 2340 aio_req_remove(aio_req_t *reqp) 2341 { 2342 aio_t *aiop = curproc->p_aio; 2343 2344 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2345 2346 if (reqp != NULL) { 2347 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2348 if (reqp->aio_req_next == reqp) { 2349 /* only one request on queue */ 2350 if (reqp == aiop->aio_doneq) { 2351 aiop->aio_doneq = NULL; 2352 } else { 2353 ASSERT(reqp == aiop->aio_cleanupq); 2354 aiop->aio_cleanupq = NULL; 2355 } 2356 } else { 2357 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2358 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2359 /* 2360 * The request can be either on the aio_doneq or the 2361 * aio_cleanupq 2362 */ 2363 if (reqp == aiop->aio_doneq) 2364 aiop->aio_doneq = reqp->aio_req_next; 2365 2366 if (reqp == aiop->aio_cleanupq) 2367 aiop->aio_cleanupq = reqp->aio_req_next; 2368 } 2369 reqp->aio_req_flags &= ~AIO_DONEQ; 2370 reqp->aio_req_next = NULL; 2371 reqp->aio_req_prev = NULL; 2372 } else if ((reqp = aiop->aio_doneq) != NULL) { 2373 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2374 if (reqp == reqp->aio_req_next) { 2375 /* only one request on queue */ 2376 aiop->aio_doneq = NULL; 2377 } else { 2378 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2379 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2380 aiop->aio_doneq = reqp->aio_req_next; 2381 } 2382 reqp->aio_req_flags &= ~AIO_DONEQ; 2383 reqp->aio_req_next = NULL; 2384 reqp->aio_req_prev = NULL; 2385 } 2386 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 2387 cv_broadcast(&aiop->aio_waitcv); 2388 return (reqp); 2389 } 2390 2391 static int 2392 aio_req_setup( 2393 aio_req_t **reqpp, 2394 aio_t *aiop, 2395 aiocb_t *arg, 2396 aio_result_t *resultp, 2397 vnode_t *vp) 2398 { 2399 sigqueue_t *sqp = NULL; 2400 aio_req_t *reqp; 2401 struct uio *uio; 2402 struct sigevent *sigev; 2403 int error; 2404 2405 sigev = &arg->aio_sigevent; 2406 if (sigev->sigev_notify == SIGEV_SIGNAL && 2407 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 2408 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2409 if (sqp == NULL) 2410 return (EAGAIN); 2411 sqp->sq_func = NULL; 2412 sqp->sq_next = NULL; 2413 sqp->sq_info.si_code = SI_ASYNCIO; 2414 sqp->sq_info.si_pid = curproc->p_pid; 2415 sqp->sq_info.si_ctid = PRCTID(curproc); 2416 sqp->sq_info.si_zoneid = getzoneid(); 2417 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2418 sqp->sq_info.si_signo = sigev->sigev_signo; 2419 sqp->sq_info.si_value = sigev->sigev_value; 2420 } 2421 2422 mutex_enter(&aiop->aio_mutex); 2423 2424 if (aiop->aio_flags & AIO_REQ_BLOCK) { 2425 mutex_exit(&aiop->aio_mutex); 2426 if (sqp) 2427 kmem_free(sqp, sizeof (sigqueue_t)); 2428 return (EIO); 2429 } 2430 /* 2431 * get an aio_reqp from the free list or allocate one 2432 * from dynamic memory. 2433 */ 2434 if (error = aio_req_alloc(&reqp, resultp)) { 2435 mutex_exit(&aiop->aio_mutex); 2436 if (sqp) 2437 kmem_free(sqp, sizeof (sigqueue_t)); 2438 return (error); 2439 } 2440 aiop->aio_pending++; 2441 aiop->aio_outstanding++; 2442 reqp->aio_req_flags = AIO_PENDING; 2443 if (sigev->sigev_notify == SIGEV_THREAD || 2444 sigev->sigev_notify == SIGEV_PORT) 2445 aio_enq(&aiop->aio_portpending, reqp, 0); 2446 mutex_exit(&aiop->aio_mutex); 2447 /* 2448 * initialize aio request. 2449 */ 2450 reqp->aio_req_fd = arg->aio_fildes; 2451 reqp->aio_req_sigqp = sqp; 2452 reqp->aio_req_iocb.iocb = NULL; 2453 reqp->aio_req_lio = NULL; 2454 reqp->aio_req_buf.b_file = vp; 2455 uio = reqp->aio_req.aio_uio; 2456 uio->uio_iovcnt = 1; 2457 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 2458 uio->uio_iov->iov_len = arg->aio_nbytes; 2459 uio->uio_loffset = arg->aio_offset; 2460 *reqpp = reqp; 2461 return (0); 2462 } 2463 2464 /* 2465 * Allocate p_aio struct. 2466 */ 2467 static aio_t * 2468 aio_aiop_alloc(void) 2469 { 2470 aio_t *aiop; 2471 2472 ASSERT(MUTEX_HELD(&curproc->p_lock)); 2473 2474 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 2475 if (aiop) { 2476 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 2477 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 2478 NULL); 2479 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 2480 } 2481 return (aiop); 2482 } 2483 2484 /* 2485 * Allocate an aio_req struct. 2486 */ 2487 static int 2488 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 2489 { 2490 aio_req_t *reqp; 2491 aio_t *aiop = curproc->p_aio; 2492 2493 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2494 2495 if ((reqp = aiop->aio_free) != NULL) { 2496 aiop->aio_free = reqp->aio_req_next; 2497 bzero(reqp, sizeof (*reqp)); 2498 } else { 2499 /* 2500 * Check whether memory is getting tight. 2501 * This is a temporary mechanism to avoid memory 2502 * exhaustion by a single process until we come up 2503 * with a per process solution such as setrlimit(). 2504 */ 2505 if (freemem < desfree) 2506 return (EAGAIN); 2507 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 2508 if (reqp == NULL) 2509 return (EAGAIN); 2510 } 2511 reqp->aio_req.aio_uio = &reqp->aio_req_uio; 2512 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 2513 reqp->aio_req.aio_private = reqp; 2514 reqp->aio_req_buf.b_offset = -1; 2515 reqp->aio_req_resultp = resultp; 2516 if (aio_hash_insert(reqp, aiop)) { 2517 reqp->aio_req_next = aiop->aio_free; 2518 aiop->aio_free = reqp; 2519 return (EINVAL); 2520 } 2521 *nreqp = reqp; 2522 return (0); 2523 } 2524 2525 /* 2526 * Allocate an aio_lio_t struct. 2527 */ 2528 static int 2529 aio_lio_alloc(aio_lio_t **head) 2530 { 2531 aio_lio_t *liop; 2532 aio_t *aiop = curproc->p_aio; 2533 2534 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2535 2536 if ((liop = aiop->aio_lio_free) != NULL) { 2537 aiop->aio_lio_free = liop->lio_next; 2538 } else { 2539 /* 2540 * Check whether memory is getting tight. 2541 * This is a temporary mechanism to avoid memory 2542 * exhaustion by a single process until we come up 2543 * with a per process solution such as setrlimit(). 2544 */ 2545 if (freemem < desfree) 2546 return (EAGAIN); 2547 2548 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 2549 if (liop == NULL) 2550 return (EAGAIN); 2551 } 2552 *head = liop; 2553 return (0); 2554 } 2555 2556 /* 2557 * this is a special per-process thread that is only activated if 2558 * the process is unmapping a segment with outstanding aio. normally, 2559 * the process will have completed the aio before unmapping the 2560 * segment. If the process does unmap a segment with outstanding aio, 2561 * this special thread will guarentee that the locked pages due to 2562 * aphysio() are released, thereby permitting the segment to be 2563 * unmapped. In addition to this, the cleanup thread is woken up 2564 * during DR operations to release the locked pages. 2565 */ 2566 2567 static int 2568 aio_cleanup_thread(aio_t *aiop) 2569 { 2570 proc_t *p = curproc; 2571 struct as *as = p->p_as; 2572 int poked = 0; 2573 kcondvar_t *cvp; 2574 int exit_flag = 0; 2575 int rqclnup = 0; 2576 2577 sigfillset(&curthread->t_hold); 2578 sigdiffset(&curthread->t_hold, &cantmask); 2579 for (;;) { 2580 /* 2581 * if a segment is being unmapped, and the current 2582 * process's done queue is not empty, then every request 2583 * on the doneq with locked resources should be forced 2584 * to release their locks. By moving the doneq request 2585 * to the cleanupq, aio_cleanup() will process the cleanupq, 2586 * and place requests back onto the doneq. All requests 2587 * processed by aio_cleanup() will have their physical 2588 * resources unlocked. 2589 */ 2590 mutex_enter(&aiop->aio_mutex); 2591 if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 2592 aiop->aio_flags |= AIO_CLEANUP; 2593 mutex_enter(&as->a_contents); 2594 if (aiop->aio_rqclnup) { 2595 aiop->aio_rqclnup = 0; 2596 rqclnup = 1; 2597 } 2598 2599 if ((rqclnup || AS_ISUNMAPWAIT(as)) && 2600 aiop->aio_doneq) { 2601 aio_req_t *doneqhead = aiop->aio_doneq; 2602 mutex_exit(&as->a_contents); 2603 aiop->aio_doneq = NULL; 2604 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 2605 } else { 2606 mutex_exit(&as->a_contents); 2607 } 2608 } 2609 mutex_exit(&aiop->aio_mutex); 2610 aio_cleanup(AIO_CLEANUP_THREAD); 2611 /* 2612 * thread should block on the cleanupcv while 2613 * AIO_CLEANUP is set. 2614 */ 2615 cvp = &aiop->aio_cleanupcv; 2616 mutex_enter(&aiop->aio_mutex); 2617 2618 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 2619 aiop->aio_notifyq != NULL || 2620 aiop->aio_portcleanupq != NULL) { 2621 mutex_exit(&aiop->aio_mutex); 2622 continue; 2623 } 2624 mutex_enter(&as->a_contents); 2625 2626 /* 2627 * AIO_CLEANUP determines when the cleanup thread 2628 * should be active. This flag is set when 2629 * the cleanup thread is awakened by as_unmap() or 2630 * due to DR operations. 2631 * The flag is cleared when the blocking as_unmap() 2632 * that originally awakened us is allowed to 2633 * complete. as_unmap() blocks when trying to 2634 * unmap a segment that has SOFTLOCKed pages. when 2635 * the segment's pages are all SOFTUNLOCKed, 2636 * as->a_flags & AS_UNMAPWAIT should be zero. 2637 * 2638 * In case of cleanup request by DR, the flag is cleared 2639 * once all the pending aio requests have been processed. 2640 * 2641 * The flag shouldn't be cleared right away if the 2642 * cleanup thread was interrupted because the process 2643 * is doing forkall(). This happens when cv_wait_sig() 2644 * returns zero, because it was awakened by a pokelwps(). 2645 * If the process is not exiting, it must be doing forkall(). 2646 */ 2647 if ((poked == 0) && 2648 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 2649 (aiop->aio_pending == 0))) { 2650 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 2651 cvp = &as->a_cv; 2652 rqclnup = 0; 2653 } 2654 mutex_exit(&aiop->aio_mutex); 2655 if (poked) { 2656 /* 2657 * If the process is exiting/killed, don't return 2658 * immediately without waiting for pending I/O's 2659 * and releasing the page locks. 2660 */ 2661 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2662 /* 2663 * If exit_flag is set, then it is 2664 * safe to exit because we have released 2665 * page locks of completed I/O's. 2666 */ 2667 if (exit_flag) 2668 break; 2669 2670 mutex_exit(&as->a_contents); 2671 2672 /* 2673 * Wait for all the pending aio to complete. 2674 */ 2675 mutex_enter(&aiop->aio_mutex); 2676 aiop->aio_flags |= AIO_REQ_BLOCK; 2677 while (aiop->aio_pending != 0) 2678 cv_wait(&aiop->aio_cleanupcv, 2679 &aiop->aio_mutex); 2680 mutex_exit(&aiop->aio_mutex); 2681 exit_flag = 1; 2682 continue; 2683 } else if (p->p_flag & 2684 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 2685 /* 2686 * hold LWP until it 2687 * is continued. 2688 */ 2689 mutex_exit(&as->a_contents); 2690 mutex_enter(&p->p_lock); 2691 stop(PR_SUSPENDED, SUSPEND_NORMAL); 2692 mutex_exit(&p->p_lock); 2693 poked = 0; 2694 continue; 2695 } 2696 } else { 2697 /* 2698 * When started this thread will sleep on as->a_cv. 2699 * as_unmap will awake this thread if the 2700 * segment has SOFTLOCKed pages (poked = 0). 2701 * 1. pokelwps() awakes this thread => 2702 * break the loop to check SEXITLWPS, SHOLDFORK, etc 2703 * 2. as_unmap awakes this thread => 2704 * to break the loop it is necessary that 2705 * - AS_UNMAPWAIT is set (as_unmap is waiting for 2706 * memory to be unlocked) 2707 * - AIO_CLEANUP is not set 2708 * (if AIO_CLEANUP is set we have to wait for 2709 * pending requests. aio_done will send a signal 2710 * for every request which completes to continue 2711 * unmapping the corresponding address range) 2712 * 3. A cleanup request will wake this thread up, ex. 2713 * by the DR operations. The aio_rqclnup flag will 2714 * be set. 2715 */ 2716 while (poked == 0) { 2717 /* 2718 * we need to handle cleanup requests 2719 * that come in after we had just cleaned up, 2720 * so that we do cleanup of any new aio 2721 * requests that got completed and have 2722 * locked resources. 2723 */ 2724 if ((aiop->aio_rqclnup || 2725 (AS_ISUNMAPWAIT(as) != 0)) && 2726 (aiop->aio_flags & AIO_CLEANUP) == 0) 2727 break; 2728 poked = !cv_wait_sig(cvp, &as->a_contents); 2729 if (AS_ISUNMAPWAIT(as) == 0) 2730 cv_signal(cvp); 2731 if (aiop->aio_outstanding != 0) 2732 break; 2733 } 2734 } 2735 mutex_exit(&as->a_contents); 2736 } 2737 exit: 2738 mutex_exit(&as->a_contents); 2739 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 2740 aston(curthread); /* make thread do post_syscall */ 2741 return (0); 2742 } 2743 2744 /* 2745 * save a reference to a user's outstanding aio in a hash list. 2746 */ 2747 static int 2748 aio_hash_insert( 2749 aio_req_t *aio_reqp, 2750 aio_t *aiop) 2751 { 2752 long index; 2753 aio_result_t *resultp = aio_reqp->aio_req_resultp; 2754 aio_req_t *current; 2755 aio_req_t **nextp; 2756 2757 index = AIO_HASH(resultp); 2758 nextp = &aiop->aio_hash[index]; 2759 while ((current = *nextp) != NULL) { 2760 if (current->aio_req_resultp == resultp) 2761 return (DUPLICATE); 2762 nextp = ¤t->aio_hash_next; 2763 } 2764 *nextp = aio_reqp; 2765 aio_reqp->aio_hash_next = NULL; 2766 return (0); 2767 } 2768 2769 static int 2770 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 2771 cred_t *) 2772 { 2773 struct snode *sp; 2774 dev_t dev; 2775 struct cb_ops *cb; 2776 major_t major; 2777 int (*aio_func)(); 2778 2779 dev = vp->v_rdev; 2780 major = getmajor(dev); 2781 2782 /* 2783 * return NULL for requests to files and STREAMs so 2784 * that libaio takes care of them. 2785 */ 2786 if (vp->v_type == VCHR) { 2787 /* no stream device for kaio */ 2788 if (STREAMSTAB(major)) { 2789 return (NULL); 2790 } 2791 } else { 2792 return (NULL); 2793 } 2794 2795 /* 2796 * Check old drivers which do not have async I/O entry points. 2797 */ 2798 if (devopsp[major]->devo_rev < 3) 2799 return (NULL); 2800 2801 cb = devopsp[major]->devo_cb_ops; 2802 2803 if (cb->cb_rev < 1) 2804 return (NULL); 2805 2806 /* 2807 * Check whether this device is a block device. 2808 * Kaio is not supported for devices like tty. 2809 */ 2810 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 2811 return (NULL); 2812 2813 /* 2814 * Clustering: If vnode is a PXFS vnode, then the device may be remote. 2815 * We cannot call the driver directly. Instead return the 2816 * PXFS functions. 2817 */ 2818 2819 if (IS_PXFSVP(vp)) { 2820 if (mode & FREAD) 2821 return (clpxfs_aio_read); 2822 else 2823 return (clpxfs_aio_write); 2824 } 2825 if (mode & FREAD) 2826 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 2827 else 2828 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 2829 2830 /* 2831 * Do we need this ? 2832 * nodev returns ENXIO anyway. 2833 */ 2834 if (aio_func == nodev) 2835 return (NULL); 2836 2837 sp = VTOS(vp); 2838 smark(sp, SACC); 2839 return (aio_func); 2840 } 2841 2842 /* 2843 * Clustering: We want check_vp to return a function prototyped 2844 * correctly that will be common to both PXFS and regular case. 2845 * We define this intermediate function that will do the right 2846 * thing for driver cases. 2847 */ 2848 2849 static int 2850 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2851 { 2852 dev_t dev; 2853 struct cb_ops *cb; 2854 2855 ASSERT(vp->v_type == VCHR); 2856 ASSERT(!IS_PXFSVP(vp)); 2857 dev = VTOS(vp)->s_dev; 2858 ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 2859 2860 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2861 2862 ASSERT(cb->cb_awrite != nodev); 2863 return ((*cb->cb_awrite)(dev, aio, cred_p)); 2864 } 2865 2866 /* 2867 * Clustering: We want check_vp to return a function prototyped 2868 * correctly that will be common to both PXFS and regular case. 2869 * We define this intermediate function that will do the right 2870 * thing for driver cases. 2871 */ 2872 2873 static int 2874 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2875 { 2876 dev_t dev; 2877 struct cb_ops *cb; 2878 2879 ASSERT(vp->v_type == VCHR); 2880 ASSERT(!IS_PXFSVP(vp)); 2881 dev = VTOS(vp)->s_dev; 2882 ASSERT(!STREAMSTAB(getmajor(dev))); 2883 2884 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2885 2886 ASSERT(cb->cb_aread != nodev); 2887 return ((*cb->cb_aread)(dev, aio, cred_p)); 2888 } 2889 2890 /* 2891 * This routine is called when a largefile call is made by a 32bit 2892 * process on a ILP32 or LP64 kernel. All 64bit processes are large 2893 * file by definition and will call alio() instead. 2894 */ 2895 static int 2896 alioLF( 2897 int mode_arg, 2898 void *aiocb_arg, 2899 int nent, 2900 void *sigev) 2901 { 2902 file_t *fp; 2903 file_t *prev_fp = NULL; 2904 int prev_mode = -1; 2905 struct vnode *vp; 2906 aio_lio_t *head; 2907 aio_req_t *reqp; 2908 aio_t *aiop; 2909 caddr_t cbplist; 2910 aiocb64_32_t cb64; 2911 aiocb64_32_t *aiocb = &cb64; 2912 aiocb64_32_t *cbp; 2913 caddr32_t *ucbp; 2914 #ifdef _LP64 2915 aiocb_t aiocb_n; 2916 #endif 2917 struct sigevent32 sigevk; 2918 sigqueue_t *sqp; 2919 int (*aio_func)(); 2920 int mode; 2921 int error = 0; 2922 int aio_errors = 0; 2923 int i; 2924 size_t ssize; 2925 int deadhead = 0; 2926 int aio_notsupported = 0; 2927 int lio_head_port; 2928 int aio_port; 2929 int aio_thread; 2930 port_kevent_t *pkevtp = NULL; 2931 port_notify32_t pnotify; 2932 int event; 2933 2934 aiop = curproc->p_aio; 2935 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 2936 return (EINVAL); 2937 2938 ASSERT(get_udatamodel() == DATAMODEL_ILP32); 2939 2940 ssize = (sizeof (caddr32_t) * nent); 2941 cbplist = kmem_alloc(ssize, KM_SLEEP); 2942 ucbp = (caddr32_t *)cbplist; 2943 2944 if (copyin(aiocb_arg, cbplist, ssize) || 2945 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 2946 kmem_free(cbplist, ssize); 2947 return (EFAULT); 2948 } 2949 2950 /* Event Ports */ 2951 if (sigev && 2952 (sigevk.sigev_notify == SIGEV_THREAD || 2953 sigevk.sigev_notify == SIGEV_PORT)) { 2954 if (sigevk.sigev_notify == SIGEV_THREAD) { 2955 pnotify.portnfy_port = sigevk.sigev_signo; 2956 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 2957 } else if (copyin( 2958 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 2959 &pnotify, sizeof (pnotify))) { 2960 kmem_free(cbplist, ssize); 2961 return (EFAULT); 2962 } 2963 error = port_alloc_event(pnotify.portnfy_port, 2964 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 2965 if (error) { 2966 if (error == ENOMEM || error == EAGAIN) 2967 error = EAGAIN; 2968 else 2969 error = EINVAL; 2970 kmem_free(cbplist, ssize); 2971 return (error); 2972 } 2973 lio_head_port = pnotify.portnfy_port; 2974 } 2975 2976 /* 2977 * a list head should be allocated if notification is 2978 * enabled for this list. 2979 */ 2980 head = NULL; 2981 2982 if (mode_arg == LIO_WAIT || sigev) { 2983 mutex_enter(&aiop->aio_mutex); 2984 error = aio_lio_alloc(&head); 2985 mutex_exit(&aiop->aio_mutex); 2986 if (error) 2987 goto done; 2988 deadhead = 1; 2989 head->lio_nent = nent; 2990 head->lio_refcnt = nent; 2991 head->lio_port = -1; 2992 head->lio_portkev = NULL; 2993 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 2994 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 2995 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2996 if (sqp == NULL) { 2997 error = EAGAIN; 2998 goto done; 2999 } 3000 sqp->sq_func = NULL; 3001 sqp->sq_next = NULL; 3002 sqp->sq_info.si_code = SI_ASYNCIO; 3003 sqp->sq_info.si_pid = curproc->p_pid; 3004 sqp->sq_info.si_ctid = PRCTID(curproc); 3005 sqp->sq_info.si_zoneid = getzoneid(); 3006 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3007 sqp->sq_info.si_signo = sigevk.sigev_signo; 3008 sqp->sq_info.si_value.sival_int = 3009 sigevk.sigev_value.sival_int; 3010 head->lio_sigqp = sqp; 3011 } else { 3012 head->lio_sigqp = NULL; 3013 } 3014 if (pkevtp) { 3015 /* 3016 * Prepare data to send when list of aiocb's 3017 * has completed. 3018 */ 3019 port_init_event(pkevtp, (uintptr_t)sigev, 3020 (void *)(uintptr_t)pnotify.portnfy_user, 3021 NULL, head); 3022 pkevtp->portkev_events = AIOLIO64; 3023 head->lio_portkev = pkevtp; 3024 head->lio_port = pnotify.portnfy_port; 3025 } 3026 } 3027 3028 for (i = 0; i < nent; i++, ucbp++) { 3029 3030 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 3031 /* skip entry if it can't be copied. */ 3032 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 3033 if (head) { 3034 mutex_enter(&aiop->aio_mutex); 3035 head->lio_nent--; 3036 head->lio_refcnt--; 3037 mutex_exit(&aiop->aio_mutex); 3038 } 3039 continue; 3040 } 3041 3042 /* skip if opcode for aiocb is LIO_NOP */ 3043 mode = aiocb->aio_lio_opcode; 3044 if (mode == LIO_NOP) { 3045 cbp = NULL; 3046 if (head) { 3047 mutex_enter(&aiop->aio_mutex); 3048 head->lio_nent--; 3049 head->lio_refcnt--; 3050 mutex_exit(&aiop->aio_mutex); 3051 } 3052 continue; 3053 } 3054 3055 /* increment file descriptor's ref count. */ 3056 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3057 lio_set_uerror(&cbp->aio_resultp, EBADF); 3058 if (head) { 3059 mutex_enter(&aiop->aio_mutex); 3060 head->lio_nent--; 3061 head->lio_refcnt--; 3062 mutex_exit(&aiop->aio_mutex); 3063 } 3064 aio_errors++; 3065 continue; 3066 } 3067 3068 /* 3069 * check the permission of the partition 3070 */ 3071 if ((fp->f_flag & mode) == 0) { 3072 releasef(aiocb->aio_fildes); 3073 lio_set_uerror(&cbp->aio_resultp, EBADF); 3074 if (head) { 3075 mutex_enter(&aiop->aio_mutex); 3076 head->lio_nent--; 3077 head->lio_refcnt--; 3078 mutex_exit(&aiop->aio_mutex); 3079 } 3080 aio_errors++; 3081 continue; 3082 } 3083 3084 /* 3085 * common case where requests are to the same fd 3086 * for the same r/w operation 3087 * for UFS, need to set EBADFD 3088 */ 3089 vp = fp->f_vnode; 3090 if (fp != prev_fp || mode != prev_mode) { 3091 aio_func = check_vp(vp, mode); 3092 if (aio_func == NULL) { 3093 prev_fp = NULL; 3094 releasef(aiocb->aio_fildes); 3095 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3096 aio_notsupported++; 3097 if (head) { 3098 mutex_enter(&aiop->aio_mutex); 3099 head->lio_nent--; 3100 head->lio_refcnt--; 3101 mutex_exit(&aiop->aio_mutex); 3102 } 3103 continue; 3104 } else { 3105 prev_fp = fp; 3106 prev_mode = mode; 3107 } 3108 } 3109 3110 #ifdef _LP64 3111 aiocb_LFton(aiocb, &aiocb_n); 3112 error = aio_req_setup(&reqp, aiop, &aiocb_n, 3113 (aio_result_t *)&cbp->aio_resultp, vp); 3114 #else 3115 error = aio_req_setupLF(&reqp, aiop, aiocb, 3116 (aio_result_t *)&cbp->aio_resultp, vp); 3117 #endif /* _LP64 */ 3118 if (error) { 3119 releasef(aiocb->aio_fildes); 3120 lio_set_uerror(&cbp->aio_resultp, error); 3121 if (head) { 3122 mutex_enter(&aiop->aio_mutex); 3123 head->lio_nent--; 3124 head->lio_refcnt--; 3125 mutex_exit(&aiop->aio_mutex); 3126 } 3127 aio_errors++; 3128 continue; 3129 } 3130 3131 reqp->aio_req_lio = head; 3132 deadhead = 0; 3133 3134 /* 3135 * Set the errno field now before sending the request to 3136 * the driver to avoid a race condition 3137 */ 3138 (void) suword32(&cbp->aio_resultp.aio_errno, 3139 EINPROGRESS); 3140 3141 reqp->aio_req_iocb.iocb32 = *ucbp; 3142 3143 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 3144 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3145 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3146 if (aio_port | aio_thread) { 3147 port_kevent_t *lpkevp; 3148 /* 3149 * Prepare data to send with each aiocb completed. 3150 */ 3151 if (aio_port) { 3152 void *paddr = (void *)(uintptr_t) 3153 aiocb->aio_sigevent.sigev_value.sival_ptr; 3154 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3155 error = EFAULT; 3156 } else { /* aio_thread */ 3157 pnotify.portnfy_port = 3158 aiocb->aio_sigevent.sigev_signo; 3159 pnotify.portnfy_user = 3160 aiocb->aio_sigevent.sigev_value.sival_ptr; 3161 } 3162 if (error) 3163 /* EMPTY */; 3164 else if (pkevtp != NULL && 3165 pnotify.portnfy_port == lio_head_port) 3166 error = port_dup_event(pkevtp, &lpkevp, 3167 PORT_ALLOC_DEFAULT); 3168 else 3169 error = port_alloc_event(pnotify.portnfy_port, 3170 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3171 &lpkevp); 3172 if (error == 0) { 3173 port_init_event(lpkevp, (uintptr_t)*ucbp, 3174 (void *)(uintptr_t)pnotify.portnfy_user, 3175 aio_port_callback, reqp); 3176 lpkevp->portkev_events = event; 3177 reqp->aio_req_portkev = lpkevp; 3178 reqp->aio_req_port = pnotify.portnfy_port; 3179 } 3180 } 3181 3182 /* 3183 * send the request to driver. 3184 */ 3185 if (error == 0) { 3186 if (aiocb->aio_nbytes == 0) { 3187 clear_active_fd(aiocb->aio_fildes); 3188 aio_zerolen(reqp); 3189 continue; 3190 } 3191 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3192 CRED()); 3193 } 3194 3195 /* 3196 * the fd's ref count is not decremented until the IO has 3197 * completed unless there was an error. 3198 */ 3199 if (error) { 3200 releasef(aiocb->aio_fildes); 3201 lio_set_uerror(&cbp->aio_resultp, error); 3202 if (head) { 3203 mutex_enter(&aiop->aio_mutex); 3204 head->lio_nent--; 3205 head->lio_refcnt--; 3206 mutex_exit(&aiop->aio_mutex); 3207 } 3208 if (error == ENOTSUP) 3209 aio_notsupported++; 3210 else 3211 aio_errors++; 3212 lio_set_error(reqp); 3213 } else { 3214 clear_active_fd(aiocb->aio_fildes); 3215 } 3216 } 3217 3218 if (aio_notsupported) { 3219 error = ENOTSUP; 3220 } else if (aio_errors) { 3221 /* 3222 * return EIO if any request failed 3223 */ 3224 error = EIO; 3225 } 3226 3227 if (mode_arg == LIO_WAIT) { 3228 mutex_enter(&aiop->aio_mutex); 3229 while (head->lio_refcnt > 0) { 3230 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3231 mutex_exit(&aiop->aio_mutex); 3232 error = EINTR; 3233 goto done; 3234 } 3235 } 3236 mutex_exit(&aiop->aio_mutex); 3237 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 3238 } 3239 3240 done: 3241 kmem_free(cbplist, ssize); 3242 if (deadhead) { 3243 if (head->lio_sigqp) 3244 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3245 if (head->lio_portkev) 3246 port_free_event(head->lio_portkev); 3247 kmem_free(head, sizeof (aio_lio_t)); 3248 } 3249 return (error); 3250 } 3251 3252 #ifdef _SYSCALL32_IMPL 3253 static void 3254 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 3255 { 3256 dest->aio_fildes = src->aio_fildes; 3257 dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 3258 dest->aio_nbytes = (size_t)src->aio_nbytes; 3259 dest->aio_offset = (off_t)src->aio_offset; 3260 dest->aio_reqprio = src->aio_reqprio; 3261 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3262 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3263 3264 /* 3265 * See comment in sigqueue32() on handling of 32-bit 3266 * sigvals in a 64-bit kernel. 3267 */ 3268 dest->aio_sigevent.sigev_value.sival_int = 3269 (int)src->aio_sigevent.sigev_value.sival_int; 3270 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3271 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3272 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3273 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3274 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3275 dest->aio_lio_opcode = src->aio_lio_opcode; 3276 dest->aio_state = src->aio_state; 3277 dest->aio__pad[0] = src->aio__pad[0]; 3278 } 3279 #endif 3280 3281 /* 3282 * This function is used only for largefile calls made by 3283 * 32 bit applications. 3284 */ 3285 static int 3286 aio_req_setupLF( 3287 aio_req_t **reqpp, 3288 aio_t *aiop, 3289 aiocb64_32_t *arg, 3290 aio_result_t *resultp, 3291 vnode_t *vp) 3292 { 3293 sigqueue_t *sqp = NULL; 3294 aio_req_t *reqp; 3295 struct uio *uio; 3296 struct sigevent32 *sigev; 3297 int error; 3298 3299 sigev = &arg->aio_sigevent; 3300 if (sigev->sigev_notify == SIGEV_SIGNAL && 3301 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 3302 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3303 if (sqp == NULL) 3304 return (EAGAIN); 3305 sqp->sq_func = NULL; 3306 sqp->sq_next = NULL; 3307 sqp->sq_info.si_code = SI_ASYNCIO; 3308 sqp->sq_info.si_pid = curproc->p_pid; 3309 sqp->sq_info.si_ctid = PRCTID(curproc); 3310 sqp->sq_info.si_zoneid = getzoneid(); 3311 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3312 sqp->sq_info.si_signo = sigev->sigev_signo; 3313 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 3314 } 3315 3316 mutex_enter(&aiop->aio_mutex); 3317 3318 if (aiop->aio_flags & AIO_REQ_BLOCK) { 3319 mutex_exit(&aiop->aio_mutex); 3320 if (sqp) 3321 kmem_free(sqp, sizeof (sigqueue_t)); 3322 return (EIO); 3323 } 3324 /* 3325 * get an aio_reqp from the free list or allocate one 3326 * from dynamic memory. 3327 */ 3328 if (error = aio_req_alloc(&reqp, resultp)) { 3329 mutex_exit(&aiop->aio_mutex); 3330 if (sqp) 3331 kmem_free(sqp, sizeof (sigqueue_t)); 3332 return (error); 3333 } 3334 aiop->aio_pending++; 3335 aiop->aio_outstanding++; 3336 reqp->aio_req_flags = AIO_PENDING; 3337 if (sigev->sigev_notify == SIGEV_THREAD || 3338 sigev->sigev_notify == SIGEV_PORT) 3339 aio_enq(&aiop->aio_portpending, reqp, 0); 3340 mutex_exit(&aiop->aio_mutex); 3341 /* 3342 * initialize aio request. 3343 */ 3344 reqp->aio_req_fd = arg->aio_fildes; 3345 reqp->aio_req_sigqp = sqp; 3346 reqp->aio_req_iocb.iocb = NULL; 3347 reqp->aio_req_lio = NULL; 3348 reqp->aio_req_buf.b_file = vp; 3349 uio = reqp->aio_req.aio_uio; 3350 uio->uio_iovcnt = 1; 3351 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 3352 uio->uio_iov->iov_len = arg->aio_nbytes; 3353 uio->uio_loffset = arg->aio_offset; 3354 *reqpp = reqp; 3355 return (0); 3356 } 3357 3358 /* 3359 * This routine is called when a non largefile call is made by a 32bit 3360 * process on a ILP32 or LP64 kernel. 3361 */ 3362 static int 3363 alio32( 3364 int mode_arg, 3365 void *aiocb_arg, 3366 int nent, 3367 void *sigev) 3368 { 3369 file_t *fp; 3370 file_t *prev_fp = NULL; 3371 int prev_mode = -1; 3372 struct vnode *vp; 3373 aio_lio_t *head; 3374 aio_req_t *reqp; 3375 aio_t *aiop; 3376 caddr_t cbplist; 3377 aiocb_t cb; 3378 aiocb_t *aiocb = &cb; 3379 #ifdef _LP64 3380 aiocb32_t *cbp; 3381 caddr32_t *ucbp; 3382 aiocb32_t cb32; 3383 aiocb32_t *aiocb32 = &cb32; 3384 struct sigevent32 sigevk; 3385 #else 3386 aiocb_t *cbp, **ucbp; 3387 struct sigevent sigevk; 3388 #endif 3389 sigqueue_t *sqp; 3390 int (*aio_func)(); 3391 int mode; 3392 int error = 0; 3393 int aio_errors = 0; 3394 int i; 3395 size_t ssize; 3396 int deadhead = 0; 3397 int aio_notsupported = 0; 3398 int lio_head_port; 3399 int aio_port; 3400 int aio_thread; 3401 port_kevent_t *pkevtp = NULL; 3402 #ifdef _LP64 3403 port_notify32_t pnotify; 3404 #else 3405 port_notify_t pnotify; 3406 #endif 3407 int event; 3408 3409 aiop = curproc->p_aio; 3410 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 3411 return (EINVAL); 3412 3413 #ifdef _LP64 3414 ssize = (sizeof (caddr32_t) * nent); 3415 #else 3416 ssize = (sizeof (aiocb_t *) * nent); 3417 #endif 3418 cbplist = kmem_alloc(ssize, KM_SLEEP); 3419 ucbp = (void *)cbplist; 3420 3421 if (copyin(aiocb_arg, cbplist, ssize) || 3422 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 3423 kmem_free(cbplist, ssize); 3424 return (EFAULT); 3425 } 3426 3427 /* Event Ports */ 3428 if (sigev && 3429 (sigevk.sigev_notify == SIGEV_THREAD || 3430 sigevk.sigev_notify == SIGEV_PORT)) { 3431 if (sigevk.sigev_notify == SIGEV_THREAD) { 3432 pnotify.portnfy_port = sigevk.sigev_signo; 3433 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 3434 } else if (copyin( 3435 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 3436 &pnotify, sizeof (pnotify))) { 3437 kmem_free(cbplist, ssize); 3438 return (EFAULT); 3439 } 3440 error = port_alloc_event(pnotify.portnfy_port, 3441 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 3442 if (error) { 3443 if (error == ENOMEM || error == EAGAIN) 3444 error = EAGAIN; 3445 else 3446 error = EINVAL; 3447 kmem_free(cbplist, ssize); 3448 return (error); 3449 } 3450 lio_head_port = pnotify.portnfy_port; 3451 } 3452 3453 /* 3454 * a list head should be allocated if notification is 3455 * enabled for this list. 3456 */ 3457 head = NULL; 3458 3459 if (mode_arg == LIO_WAIT || sigev) { 3460 mutex_enter(&aiop->aio_mutex); 3461 error = aio_lio_alloc(&head); 3462 mutex_exit(&aiop->aio_mutex); 3463 if (error) 3464 goto done; 3465 deadhead = 1; 3466 head->lio_nent = nent; 3467 head->lio_refcnt = nent; 3468 head->lio_port = -1; 3469 head->lio_portkev = NULL; 3470 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 3471 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3472 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3473 if (sqp == NULL) { 3474 error = EAGAIN; 3475 goto done; 3476 } 3477 sqp->sq_func = NULL; 3478 sqp->sq_next = NULL; 3479 sqp->sq_info.si_code = SI_ASYNCIO; 3480 sqp->sq_info.si_pid = curproc->p_pid; 3481 sqp->sq_info.si_ctid = PRCTID(curproc); 3482 sqp->sq_info.si_zoneid = getzoneid(); 3483 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3484 sqp->sq_info.si_signo = sigevk.sigev_signo; 3485 sqp->sq_info.si_value.sival_int = 3486 sigevk.sigev_value.sival_int; 3487 head->lio_sigqp = sqp; 3488 } else { 3489 head->lio_sigqp = NULL; 3490 } 3491 if (pkevtp) { 3492 /* 3493 * Prepare data to send when list of aiocb's has 3494 * completed. 3495 */ 3496 port_init_event(pkevtp, (uintptr_t)sigev, 3497 (void *)(uintptr_t)pnotify.portnfy_user, 3498 NULL, head); 3499 pkevtp->portkev_events = AIOLIO; 3500 head->lio_portkev = pkevtp; 3501 head->lio_port = pnotify.portnfy_port; 3502 } 3503 } 3504 3505 for (i = 0; i < nent; i++, ucbp++) { 3506 3507 /* skip entry if it can't be copied. */ 3508 #ifdef _LP64 3509 cbp = (aiocb32_t *)(uintptr_t)*ucbp; 3510 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 3511 #else 3512 cbp = (aiocb_t *)*ucbp; 3513 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 3514 #endif 3515 { 3516 if (head) { 3517 mutex_enter(&aiop->aio_mutex); 3518 head->lio_nent--; 3519 head->lio_refcnt--; 3520 mutex_exit(&aiop->aio_mutex); 3521 } 3522 continue; 3523 } 3524 #ifdef _LP64 3525 /* 3526 * copy 32 bit structure into 64 bit structure 3527 */ 3528 aiocb_32ton(aiocb32, aiocb); 3529 #endif /* _LP64 */ 3530 3531 /* skip if opcode for aiocb is LIO_NOP */ 3532 mode = aiocb->aio_lio_opcode; 3533 if (mode == LIO_NOP) { 3534 cbp = NULL; 3535 if (head) { 3536 mutex_enter(&aiop->aio_mutex); 3537 head->lio_nent--; 3538 head->lio_refcnt--; 3539 mutex_exit(&aiop->aio_mutex); 3540 } 3541 continue; 3542 } 3543 3544 /* increment file descriptor's ref count. */ 3545 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3546 lio_set_uerror(&cbp->aio_resultp, EBADF); 3547 if (head) { 3548 mutex_enter(&aiop->aio_mutex); 3549 head->lio_nent--; 3550 head->lio_refcnt--; 3551 mutex_exit(&aiop->aio_mutex); 3552 } 3553 aio_errors++; 3554 continue; 3555 } 3556 3557 /* 3558 * check the permission of the partition 3559 */ 3560 if ((fp->f_flag & mode) == 0) { 3561 releasef(aiocb->aio_fildes); 3562 lio_set_uerror(&cbp->aio_resultp, EBADF); 3563 if (head) { 3564 mutex_enter(&aiop->aio_mutex); 3565 head->lio_nent--; 3566 head->lio_refcnt--; 3567 mutex_exit(&aiop->aio_mutex); 3568 } 3569 aio_errors++; 3570 continue; 3571 } 3572 3573 /* 3574 * common case where requests are to the same fd 3575 * for the same r/w operation 3576 * for UFS, need to set EBADFD 3577 */ 3578 vp = fp->f_vnode; 3579 if (fp != prev_fp || mode != prev_mode) { 3580 aio_func = check_vp(vp, mode); 3581 if (aio_func == NULL) { 3582 prev_fp = NULL; 3583 releasef(aiocb->aio_fildes); 3584 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3585 aio_notsupported++; 3586 if (head) { 3587 mutex_enter(&aiop->aio_mutex); 3588 head->lio_nent--; 3589 head->lio_refcnt--; 3590 mutex_exit(&aiop->aio_mutex); 3591 } 3592 continue; 3593 } else { 3594 prev_fp = fp; 3595 prev_mode = mode; 3596 } 3597 } 3598 3599 error = aio_req_setup(&reqp, aiop, aiocb, 3600 (aio_result_t *)&cbp->aio_resultp, vp); 3601 if (error) { 3602 releasef(aiocb->aio_fildes); 3603 lio_set_uerror(&cbp->aio_resultp, error); 3604 if (head) { 3605 mutex_enter(&aiop->aio_mutex); 3606 head->lio_nent--; 3607 head->lio_refcnt--; 3608 mutex_exit(&aiop->aio_mutex); 3609 } 3610 aio_errors++; 3611 continue; 3612 } 3613 3614 reqp->aio_req_lio = head; 3615 deadhead = 0; 3616 3617 /* 3618 * Set the errno field now before sending the request to 3619 * the driver to avoid a race condition 3620 */ 3621 (void) suword32(&cbp->aio_resultp.aio_errno, 3622 EINPROGRESS); 3623 3624 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 3625 3626 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 3627 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3628 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3629 if (aio_port | aio_thread) { 3630 port_kevent_t *lpkevp; 3631 /* 3632 * Prepare data to send with each aiocb completed. 3633 */ 3634 #ifdef _LP64 3635 if (aio_port) { 3636 void *paddr = (void *)(uintptr_t) 3637 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3638 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3639 error = EFAULT; 3640 } else { /* aio_thread */ 3641 pnotify.portnfy_port = 3642 aiocb32->aio_sigevent.sigev_signo; 3643 pnotify.portnfy_user = 3644 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3645 } 3646 #else 3647 if (aio_port) { 3648 void *paddr = 3649 aiocb->aio_sigevent.sigev_value.sival_ptr; 3650 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3651 error = EFAULT; 3652 } else { /* aio_thread */ 3653 pnotify.portnfy_port = 3654 aiocb->aio_sigevent.sigev_signo; 3655 pnotify.portnfy_user = 3656 aiocb->aio_sigevent.sigev_value.sival_ptr; 3657 } 3658 #endif 3659 if (error) 3660 /* EMPTY */; 3661 else if (pkevtp != NULL && 3662 pnotify.portnfy_port == lio_head_port) 3663 error = port_dup_event(pkevtp, &lpkevp, 3664 PORT_ALLOC_DEFAULT); 3665 else 3666 error = port_alloc_event(pnotify.portnfy_port, 3667 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3668 &lpkevp); 3669 if (error == 0) { 3670 port_init_event(lpkevp, (uintptr_t)cbp, 3671 (void *)(uintptr_t)pnotify.portnfy_user, 3672 aio_port_callback, reqp); 3673 lpkevp->portkev_events = event; 3674 reqp->aio_req_portkev = lpkevp; 3675 reqp->aio_req_port = pnotify.portnfy_port; 3676 } 3677 } 3678 3679 /* 3680 * send the request to driver. 3681 */ 3682 if (error == 0) { 3683 if (aiocb->aio_nbytes == 0) { 3684 clear_active_fd(aiocb->aio_fildes); 3685 aio_zerolen(reqp); 3686 continue; 3687 } 3688 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3689 CRED()); 3690 } 3691 3692 /* 3693 * the fd's ref count is not decremented until the IO has 3694 * completed unless there was an error. 3695 */ 3696 if (error) { 3697 releasef(aiocb->aio_fildes); 3698 lio_set_uerror(&cbp->aio_resultp, error); 3699 if (head) { 3700 mutex_enter(&aiop->aio_mutex); 3701 head->lio_nent--; 3702 head->lio_refcnt--; 3703 mutex_exit(&aiop->aio_mutex); 3704 } 3705 if (error == ENOTSUP) 3706 aio_notsupported++; 3707 else 3708 aio_errors++; 3709 lio_set_error(reqp); 3710 } else { 3711 clear_active_fd(aiocb->aio_fildes); 3712 } 3713 } 3714 3715 if (aio_notsupported) { 3716 error = ENOTSUP; 3717 } else if (aio_errors) { 3718 /* 3719 * return EIO if any request failed 3720 */ 3721 error = EIO; 3722 } 3723 3724 if (mode_arg == LIO_WAIT) { 3725 mutex_enter(&aiop->aio_mutex); 3726 while (head->lio_refcnt > 0) { 3727 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3728 mutex_exit(&aiop->aio_mutex); 3729 error = EINTR; 3730 goto done; 3731 } 3732 } 3733 mutex_exit(&aiop->aio_mutex); 3734 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 3735 } 3736 3737 done: 3738 kmem_free(cbplist, ssize); 3739 if (deadhead) { 3740 if (head->lio_sigqp) 3741 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3742 if (head->lio_portkev) 3743 port_free_event(head->lio_portkev); 3744 kmem_free(head, sizeof (aio_lio_t)); 3745 } 3746 return (error); 3747 } 3748 3749 3750 #ifdef _SYSCALL32_IMPL 3751 void 3752 aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 3753 { 3754 dest->aio_fildes = src->aio_fildes; 3755 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 3756 dest->aio_nbytes = (size_t)src->aio_nbytes; 3757 dest->aio_offset = (off_t)src->aio_offset; 3758 dest->aio_reqprio = src->aio_reqprio; 3759 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3760 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3761 3762 /* 3763 * See comment in sigqueue32() on handling of 32-bit 3764 * sigvals in a 64-bit kernel. 3765 */ 3766 dest->aio_sigevent.sigev_value.sival_int = 3767 (int)src->aio_sigevent.sigev_value.sival_int; 3768 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3769 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3770 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3771 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3772 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3773 dest->aio_lio_opcode = src->aio_lio_opcode; 3774 dest->aio_state = src->aio_state; 3775 dest->aio__pad[0] = src->aio__pad[0]; 3776 } 3777 #endif /* _SYSCALL32_IMPL */ 3778 3779 /* 3780 * aio_port_callback() is called just before the event is retrieved from the 3781 * port. The task of this callback function is to finish the work of the 3782 * transaction for the application, it means : 3783 * - copyout transaction data to the application 3784 * (this thread is running in the right process context) 3785 * - keep trace of the transaction (update of counters). 3786 * - free allocated buffers 3787 * The aiocb pointer is the object element of the port_kevent_t structure. 3788 * 3789 * flag : 3790 * PORT_CALLBACK_DEFAULT : do copyout and free resources 3791 * PORT_CALLBACK_CLOSE : don't do copyout, free resources 3792 */ 3793 3794 /*ARGSUSED*/ 3795 int 3796 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 3797 { 3798 aio_t *aiop = curproc->p_aio; 3799 aio_req_t *reqp = arg; 3800 struct iovec *iov; 3801 struct buf *bp; 3802 void *resultp; 3803 3804 if (pid != curproc->p_pid) { 3805 /* wrong proc !!, can not deliver data here ... */ 3806 return (EACCES); 3807 } 3808 3809 mutex_enter(&aiop->aio_portq_mutex); 3810 reqp->aio_req_portkev = NULL; 3811 aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 3812 mutex_exit(&aiop->aio_portq_mutex); 3813 aphysio_unlock(reqp); /* unlock used pages */ 3814 mutex_enter(&aiop->aio_mutex); 3815 if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 3816 aio_req_free_port(aiop, reqp); /* back to free list */ 3817 mutex_exit(&aiop->aio_mutex); 3818 return (0); 3819 } 3820 3821 iov = reqp->aio_req_uio.uio_iov; 3822 bp = &reqp->aio_req_buf; 3823 resultp = (void *)reqp->aio_req_resultp; 3824 aio_req_free_port(aiop, reqp); /* request struct back to free list */ 3825 mutex_exit(&aiop->aio_mutex); 3826 if (flag == PORT_CALLBACK_DEFAULT) 3827 aio_copyout_result_port(iov, bp, resultp); 3828 return (0); 3829 } 3830