1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Kernel asynchronous I/O. 31 * This is only for raw devices now (as of Nov. 1993). 32 */ 33 34 #include <sys/types.h> 35 #include <sys/errno.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/fs/snode.h> 39 #include <sys/unistd.h> 40 #include <sys/cmn_err.h> 41 #include <vm/as.h> 42 #include <vm/faultcode.h> 43 #include <sys/sysmacros.h> 44 #include <sys/procfs.h> 45 #include <sys/kmem.h> 46 #include <sys/autoconf.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/sunddi.h> 49 #include <sys/aio_impl.h> 50 #include <sys/debug.h> 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/vmsystm.h> 54 #include <sys/fs/pxfs_ki.h> 55 #include <sys/contract/process_impl.h> 56 57 /* 58 * external entry point. 59 */ 60 #ifdef _LP64 61 static int64_t kaioc(long, long, long, long, long, long); 62 #endif 63 static int kaio(ulong_t *, rval_t *); 64 65 66 #define AIO_64 0 67 #define AIO_32 1 68 #define AIO_LARGEFILE 2 69 70 /* 71 * implementation specific functions (private) 72 */ 73 #ifdef _LP64 74 static int alio(int, aiocb_t **, int, struct sigevent *); 75 #endif 76 static int aionotify(void); 77 static int aioinit(void); 78 static int aiostart(void); 79 static void alio_cleanup(aio_t *, aiocb_t **, int, int); 80 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 81 cred_t *); 82 static void lio_set_error(aio_req_t *); 83 static aio_t *aio_aiop_alloc(); 84 static int aio_req_alloc(aio_req_t **, aio_result_t *); 85 static int aio_lio_alloc(aio_lio_t **); 86 static aio_req_t *aio_req_done(void *); 87 static aio_req_t *aio_req_remove(aio_req_t *); 88 static int aio_req_find(aio_result_t *, aio_req_t **); 89 static int aio_hash_insert(struct aio_req_t *, aio_t *); 90 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 91 aio_result_t *, vnode_t *); 92 static int aio_cleanup_thread(aio_t *); 93 static aio_lio_t *aio_list_get(aio_result_t *); 94 static void lio_set_uerror(void *, int); 95 extern void aio_zerolen(aio_req_t *); 96 static int aiowait(struct timeval *, int, long *); 97 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 98 static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 99 aio_req_t *reqlist, aio_t *aiop, model_t model); 100 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 101 static int aiosuspend(void *, int, struct timespec *, int, 102 long *, int); 103 static int aliowait(int, void *, int, void *, int); 104 static int aioerror(void *, int); 105 static int aio_cancel(int, void *, long *, int); 106 static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 107 static int aiorw(int, void *, int, int); 108 109 static int alioLF(int, void *, int, void *); 110 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 111 aio_result_t *, vnode_t *); 112 static int alio32(int, void *, int, void *); 113 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 114 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 115 116 #ifdef _SYSCALL32_IMPL 117 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 118 void aiocb_32ton(aiocb32_t *, aiocb_t *); 119 #endif /* _SYSCALL32_IMPL */ 120 121 /* 122 * implementation specific functions (external) 123 */ 124 void aio_req_free(aio_t *, aio_req_t *); 125 126 /* 127 * Event Port framework 128 */ 129 130 void aio_req_free_port(aio_t *, aio_req_t *); 131 static int aio_port_callback(void *, int *, pid_t, int, void *); 132 133 /* 134 * This is the loadable module wrapper. 135 */ 136 #include <sys/modctl.h> 137 #include <sys/syscall.h> 138 139 #ifdef _LP64 140 141 static struct sysent kaio_sysent = { 142 6, 143 SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 144 (int (*)())kaioc 145 }; 146 147 #ifdef _SYSCALL32_IMPL 148 static struct sysent kaio_sysent32 = { 149 7, 150 SE_NOUNLOAD | SE_64RVAL, 151 kaio 152 }; 153 #endif /* _SYSCALL32_IMPL */ 154 155 #else /* _LP64 */ 156 157 static struct sysent kaio_sysent = { 158 7, 159 SE_NOUNLOAD | SE_32RVAL1, 160 kaio 161 }; 162 163 #endif /* _LP64 */ 164 165 /* 166 * Module linkage information for the kernel. 167 */ 168 169 static struct modlsys modlsys = { 170 &mod_syscallops, 171 "kernel Async I/O", 172 &kaio_sysent 173 }; 174 175 #ifdef _SYSCALL32_IMPL 176 static struct modlsys modlsys32 = { 177 &mod_syscallops32, 178 "kernel Async I/O for 32 bit compatibility", 179 &kaio_sysent32 180 }; 181 #endif /* _SYSCALL32_IMPL */ 182 183 184 static struct modlinkage modlinkage = { 185 MODREV_1, 186 &modlsys, 187 #ifdef _SYSCALL32_IMPL 188 &modlsys32, 189 #endif 190 NULL 191 }; 192 193 int 194 _init(void) 195 { 196 int retval; 197 198 if ((retval = mod_install(&modlinkage)) != 0) 199 return (retval); 200 201 return (0); 202 } 203 204 int 205 _fini(void) 206 { 207 int retval; 208 209 retval = mod_remove(&modlinkage); 210 211 return (retval); 212 } 213 214 int 215 _info(struct modinfo *modinfop) 216 { 217 return (mod_info(&modlinkage, modinfop)); 218 } 219 220 #ifdef _LP64 221 static int64_t 222 kaioc( 223 long a0, 224 long a1, 225 long a2, 226 long a3, 227 long a4, 228 long a5) 229 { 230 int error; 231 long rval = 0; 232 233 switch ((int)a0 & ~AIO_POLL_BIT) { 234 case AIOREAD: 235 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 236 (offset_t)a4, (aio_result_t *)a5, FREAD); 237 break; 238 case AIOWRITE: 239 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 240 (offset_t)a4, (aio_result_t *)a5, FWRITE); 241 break; 242 case AIOWAIT: 243 error = aiowait((struct timeval *)a1, (int)a2, &rval); 244 break; 245 case AIOWAITN: 246 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 247 (timespec_t *)a4); 248 break; 249 case AIONOTIFY: 250 error = aionotify(); 251 break; 252 case AIOINIT: 253 error = aioinit(); 254 break; 255 case AIOSTART: 256 error = aiostart(); 257 break; 258 case AIOLIO: 259 error = alio((int)a1, (aiocb_t **)a2, (int)a3, 260 (struct sigevent *)a4); 261 break; 262 case AIOLIOWAIT: 263 error = aliowait((int)a1, (void *)a2, (int)a3, 264 (struct sigevent *)a4, AIO_64); 265 break; 266 case AIOSUSPEND: 267 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 268 (int)a4, &rval, AIO_64); 269 break; 270 case AIOERROR: 271 error = aioerror((void *)a1, AIO_64); 272 break; 273 case AIOAREAD: 274 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 275 break; 276 case AIOAWRITE: 277 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 278 break; 279 case AIOCANCEL: 280 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 281 break; 282 283 /* 284 * The large file related stuff is valid only for 285 * 32 bit kernel and not for 64 bit kernel 286 * On 64 bit kernel we convert large file calls 287 * to regular 64bit calls. 288 */ 289 290 default: 291 error = EINVAL; 292 } 293 if (error) 294 return ((int64_t)set_errno(error)); 295 return (rval); 296 } 297 #endif 298 299 static int 300 kaio( 301 ulong_t *uap, 302 rval_t *rvp) 303 { 304 long rval = 0; 305 int error = 0; 306 offset_t off; 307 308 309 rvp->r_vals = 0; 310 #if defined(_LITTLE_ENDIAN) 311 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 312 #else 313 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 314 #endif 315 316 switch (uap[0] & ~AIO_POLL_BIT) { 317 /* 318 * It must be the 32 bit system call on 64 bit kernel 319 */ 320 case AIOREAD: 321 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 322 (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 323 case AIOWRITE: 324 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 325 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 326 case AIOWAIT: 327 error = aiowait((struct timeval *)uap[1], (int)uap[2], 328 &rval); 329 break; 330 case AIOWAITN: 331 error = aiowaitn((void *)uap[1], (uint_t)uap[2], 332 (uint_t *)uap[3], (timespec_t *)uap[4]); 333 break; 334 case AIONOTIFY: 335 return (aionotify()); 336 case AIOINIT: 337 return (aioinit()); 338 case AIOSTART: 339 return (aiostart()); 340 case AIOLIO: 341 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 342 (void *)uap[4])); 343 case AIOLIOWAIT: 344 return (aliowait((int)uap[1], (void *)uap[2], 345 (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 346 case AIOSUSPEND: 347 error = aiosuspend((void *)uap[1], (int)uap[2], 348 (timespec_t *)uap[3], (int)uap[4], 349 &rval, AIO_32); 350 break; 351 case AIOERROR: 352 return (aioerror((void *)uap[1], AIO_32)); 353 case AIOAREAD: 354 return (aiorw((int)uap[0], (void *)uap[1], 355 FREAD, AIO_32)); 356 case AIOAWRITE: 357 return (aiorw((int)uap[0], (void *)uap[1], 358 FWRITE, AIO_32)); 359 case AIOCANCEL: 360 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 361 AIO_32)); 362 break; 363 case AIOLIO64: 364 return (alioLF((int)uap[1], (void *)uap[2], 365 (int)uap[3], (void *)uap[4])); 366 case AIOLIOWAIT64: 367 return (aliowait(uap[1], (void *)uap[2], 368 (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 369 case AIOSUSPEND64: 370 error = aiosuspend((void *)uap[1], (int)uap[2], 371 (timespec_t *)uap[3], (int)uap[4], &rval, 372 AIO_LARGEFILE); 373 break; 374 case AIOERROR64: 375 return (aioerror((void *)uap[1], AIO_LARGEFILE)); 376 case AIOAREAD64: 377 return (aiorw((int)uap[0], (void *)uap[1], FREAD, 378 AIO_LARGEFILE)); 379 case AIOAWRITE64: 380 return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 381 AIO_LARGEFILE)); 382 case AIOCANCEL64: 383 error = (aio_cancel((int)uap[1], (void *)uap[2], 384 &rval, AIO_LARGEFILE)); 385 break; 386 default: 387 return (EINVAL); 388 } 389 390 rvp->r_val1 = rval; 391 return (error); 392 } 393 394 /* 395 * wake up LWPs in this process that are sleeping in 396 * aiowait(). 397 */ 398 static int 399 aionotify(void) 400 { 401 aio_t *aiop; 402 403 aiop = curproc->p_aio; 404 if (aiop == NULL) 405 return (0); 406 407 mutex_enter(&aiop->aio_mutex); 408 aiop->aio_notifycnt++; 409 cv_broadcast(&aiop->aio_waitcv); 410 mutex_exit(&aiop->aio_mutex); 411 412 return (0); 413 } 414 415 static int 416 timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 417 timestruc_t **rqtp, int *blocking) 418 { 419 #ifdef _SYSCALL32_IMPL 420 struct timeval32 wait_time_32; 421 #endif 422 struct timeval wait_time; 423 model_t model = get_udatamodel(); 424 425 *rqtp = NULL; 426 if (timout == NULL) { /* wait indefinitely */ 427 *blocking = 1; 428 return (0); 429 } 430 431 /* 432 * Need to correctly compare with the -1 passed in for a user 433 * address pointer, with both 32 bit and 64 bit apps. 434 */ 435 if (model == DATAMODEL_NATIVE) { 436 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 437 *blocking = 0; 438 return (0); 439 } 440 441 if (copyin(timout, &wait_time, sizeof (wait_time))) 442 return (EFAULT); 443 } 444 #ifdef _SYSCALL32_IMPL 445 else { 446 /* 447 * -1 from a 32bit app. It will not get sign extended. 448 * don't wait if -1. 449 */ 450 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 451 *blocking = 0; 452 return (0); 453 } 454 455 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 456 return (EFAULT); 457 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 458 } 459 #endif /* _SYSCALL32_IMPL */ 460 461 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 462 *blocking = 0; 463 return (0); 464 } 465 466 if (wait_time.tv_sec < 0 || 467 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 468 return (EINVAL); 469 470 rqtime->tv_sec = wait_time.tv_sec; 471 rqtime->tv_nsec = wait_time.tv_usec * 1000; 472 *rqtp = rqtime; 473 *blocking = 1; 474 475 return (0); 476 } 477 478 static int 479 timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 480 timestruc_t **rqtp, int *blocking) 481 { 482 #ifdef _SYSCALL32_IMPL 483 timespec32_t wait_time_32; 484 #endif 485 model_t model = get_udatamodel(); 486 487 *rqtp = NULL; 488 if (timout == NULL) { 489 *blocking = 1; 490 return (0); 491 } 492 493 if (model == DATAMODEL_NATIVE) { 494 if (copyin(timout, rqtime, sizeof (*rqtime))) 495 return (EFAULT); 496 } 497 #ifdef _SYSCALL32_IMPL 498 else { 499 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 500 return (EFAULT); 501 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 502 } 503 #endif /* _SYSCALL32_IMPL */ 504 505 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 506 *blocking = 0; 507 return (0); 508 } 509 510 if (rqtime->tv_sec < 0 || 511 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 512 return (EINVAL); 513 514 *rqtp = rqtime; 515 *blocking = 1; 516 517 return (0); 518 } 519 520 /*ARGSUSED*/ 521 static int 522 aiowait( 523 struct timeval *timout, 524 int dontblockflg, 525 long *rval) 526 { 527 int error; 528 aio_t *aiop; 529 aio_req_t *reqp; 530 clock_t status; 531 int blocking; 532 timestruc_t rqtime; 533 timestruc_t *rqtp; 534 535 aiop = curproc->p_aio; 536 if (aiop == NULL) 537 return (EINVAL); 538 539 /* 540 * Establish the absolute future time for the timeout. 541 */ 542 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 543 if (error) 544 return (error); 545 if (rqtp) { 546 timestruc_t now; 547 gethrestime(&now); 548 timespecadd(rqtp, &now); 549 } 550 551 mutex_enter(&aiop->aio_mutex); 552 for (;;) { 553 /* process requests on poll queue */ 554 if (aiop->aio_pollq) { 555 mutex_exit(&aiop->aio_mutex); 556 aio_cleanup(0); 557 mutex_enter(&aiop->aio_mutex); 558 } 559 if ((reqp = aio_req_remove(NULL)) != NULL) { 560 *rval = (long)reqp->aio_req_resultp; 561 break; 562 } 563 /* user-level done queue might not be empty */ 564 if (aiop->aio_notifycnt > 0) { 565 aiop->aio_notifycnt--; 566 *rval = 1; 567 break; 568 } 569 /* don't block if no outstanding aio */ 570 if (aiop->aio_outstanding == 0 && dontblockflg) { 571 error = EINVAL; 572 break; 573 } 574 if (blocking) { 575 status = cv_waituntil_sig(&aiop->aio_waitcv, 576 &aiop->aio_mutex, rqtp); 577 578 if (status > 0) /* check done queue again */ 579 continue; 580 if (status == 0) { /* interrupted by a signal */ 581 error = EINTR; 582 *rval = -1; 583 } else { /* timer expired */ 584 error = ETIME; 585 } 586 } 587 break; 588 } 589 mutex_exit(&aiop->aio_mutex); 590 if (reqp) { 591 aphysio_unlock(reqp); 592 aio_copyout_result(reqp); 593 mutex_enter(&aiop->aio_mutex); 594 aio_req_free(aiop, reqp); 595 mutex_exit(&aiop->aio_mutex); 596 } 597 return (error); 598 } 599 600 /* 601 * aiowaitn can be used to reap completed asynchronous requests submitted with 602 * lio_listio, aio_read or aio_write. 603 * This function only reaps asynchronous raw I/Os. 604 */ 605 606 /*ARGSUSED*/ 607 static int 608 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 609 { 610 int error = 0; 611 aio_t *aiop; 612 aio_req_t *reqlist = NULL; 613 caddr_t iocblist = NULL; /* array of iocb ptr's */ 614 uint_t waitcnt, cnt = 0; /* iocb cnt */ 615 size_t iocbsz; /* users iocb size */ 616 size_t riocbsz; /* returned iocb size */ 617 int iocb_index = 0; 618 model_t model = get_udatamodel(); 619 int blocking = 1; 620 timestruc_t rqtime; 621 timestruc_t *rqtp; 622 623 aiop = curproc->p_aio; 624 if (aiop == NULL) 625 return (EINVAL); 626 627 if (aiop->aio_outstanding == 0) 628 return (EAGAIN); 629 630 if (copyin(nwait, &waitcnt, sizeof (uint_t))) 631 return (EFAULT); 632 633 /* set *nwait to zero, if we must return prematurely */ 634 if (copyout(&cnt, nwait, sizeof (uint_t))) 635 return (EFAULT); 636 637 if (waitcnt == 0) { 638 blocking = 0; 639 rqtp = NULL; 640 waitcnt = nent; 641 } else { 642 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 643 if (error) 644 return (error); 645 } 646 647 if (model == DATAMODEL_NATIVE) 648 iocbsz = (sizeof (aiocb_t *) * nent); 649 #ifdef _SYSCALL32_IMPL 650 else 651 iocbsz = (sizeof (caddr32_t) * nent); 652 #endif /* _SYSCALL32_IMPL */ 653 654 /* 655 * Only one aio_waitn call is allowed at a time. 656 * The active aio_waitn will collect all requests 657 * out of the "done" list and if necessary it will wait 658 * for some/all pending requests to fulfill the nwait 659 * parameter. 660 * A second or further aio_waitn calls will sleep here 661 * until the active aio_waitn finishes and leaves the kernel 662 * If the second call does not block (poll), then return 663 * immediately with the error code : EAGAIN. 664 * If the second call should block, then sleep here, but 665 * do not touch the timeout. The timeout starts when this 666 * aio_waitn-call becomes active. 667 */ 668 669 mutex_enter(&aiop->aio_mutex); 670 671 while (aiop->aio_flags & AIO_WAITN) { 672 if (blocking == 0) { 673 mutex_exit(&aiop->aio_mutex); 674 return (EAGAIN); 675 } 676 677 /* block, no timeout */ 678 aiop->aio_flags |= AIO_WAITN_PENDING; 679 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 680 mutex_exit(&aiop->aio_mutex); 681 return (EINTR); 682 } 683 } 684 685 /* 686 * Establish the absolute future time for the timeout. 687 */ 688 if (rqtp) { 689 timestruc_t now; 690 gethrestime(&now); 691 timespecadd(rqtp, &now); 692 } 693 694 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 695 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 696 aiop->aio_iocb = NULL; 697 } 698 699 if (aiop->aio_iocb == NULL) { 700 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 701 if (iocblist == NULL) { 702 mutex_exit(&aiop->aio_mutex); 703 return (ENOMEM); 704 } 705 aiop->aio_iocb = (aiocb_t **)iocblist; 706 aiop->aio_iocbsz = iocbsz; 707 } else { 708 iocblist = (char *)aiop->aio_iocb; 709 } 710 711 aiop->aio_waitncnt = waitcnt; 712 aiop->aio_flags |= AIO_WAITN; 713 714 for (;;) { 715 /* push requests on poll queue to done queue */ 716 if (aiop->aio_pollq) { 717 mutex_exit(&aiop->aio_mutex); 718 aio_cleanup(0); 719 mutex_enter(&aiop->aio_mutex); 720 } 721 722 /* check for requests on done queue */ 723 if (aiop->aio_doneq) { 724 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 725 aiop->aio_waitncnt = waitcnt - cnt; 726 } 727 728 /* user-level done queue might not be empty */ 729 if (aiop->aio_notifycnt > 0) { 730 aiop->aio_notifycnt--; 731 error = 0; 732 break; 733 } 734 735 /* 736 * if we are here second time as a result of timer 737 * expiration, we reset error if there are enough 738 * aiocb's to satisfy request. 739 * We return also if all requests are already done 740 * and we picked up the whole done queue. 741 */ 742 743 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 744 aiop->aio_doneq == NULL)) { 745 error = 0; 746 break; 747 } 748 749 if ((cnt < waitcnt) && blocking) { 750 int rval = cv_waituntil_sig(&aiop->aio_waitcv, 751 &aiop->aio_mutex, rqtp); 752 if (rval > 0) 753 continue; 754 if (rval < 0) { 755 error = ETIME; 756 blocking = 0; 757 continue; 758 } 759 error = EINTR; 760 } 761 break; 762 } 763 764 mutex_exit(&aiop->aio_mutex); 765 766 if (cnt > 0) { 767 768 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 769 aiop, model); 770 771 if (model == DATAMODEL_NATIVE) 772 riocbsz = (sizeof (aiocb_t *) * cnt); 773 #ifdef _SYSCALL32_IMPL 774 else 775 riocbsz = (sizeof (caddr32_t) * cnt); 776 #endif /* _SYSCALL32_IMPL */ 777 778 if (copyout(iocblist, uiocb, riocbsz) || 779 copyout(&cnt, nwait, sizeof (uint_t))) 780 error = EFAULT; 781 } 782 783 if (aiop->aio_iocbsz > AIO_IOCB_MAX) { 784 kmem_free(iocblist, aiop->aio_iocbsz); 785 aiop->aio_iocb = NULL; 786 } 787 788 /* check if there is another thread waiting for execution */ 789 mutex_enter(&aiop->aio_mutex); 790 aiop->aio_flags &= ~AIO_WAITN; 791 if (aiop->aio_flags & AIO_WAITN_PENDING) { 792 aiop->aio_flags &= ~AIO_WAITN_PENDING; 793 cv_signal(&aiop->aio_waitncv); 794 } 795 mutex_exit(&aiop->aio_mutex); 796 797 return (error); 798 } 799 800 /* 801 * aio_unlock_requests 802 * copyouts the result of the request as well as the return value. 803 * It builds the list of completed asynchronous requests, 804 * unlocks the allocated memory ranges and 805 * put the aio request structure back into the free list. 806 */ 807 808 static int 809 aio_unlock_requests( 810 caddr_t iocblist, 811 int iocb_index, 812 aio_req_t *reqlist, 813 aio_t *aiop, 814 model_t model) 815 { 816 aio_req_t *reqp, *nreqp; 817 818 if (model == DATAMODEL_NATIVE) { 819 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 820 (((caddr_t *)iocblist)[iocb_index++]) = 821 reqp->aio_req_iocb.iocb; 822 nreqp = reqp->aio_req_next; 823 aphysio_unlock(reqp); 824 aio_copyout_result(reqp); 825 mutex_enter(&aiop->aio_mutex); 826 aio_req_free(aiop, reqp); 827 mutex_exit(&aiop->aio_mutex); 828 } 829 } 830 #ifdef _SYSCALL32_IMPL 831 else { 832 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 833 ((caddr32_t *)iocblist)[iocb_index++] = 834 reqp->aio_req_iocb.iocb32; 835 nreqp = reqp->aio_req_next; 836 aphysio_unlock(reqp); 837 aio_copyout_result(reqp); 838 mutex_enter(&aiop->aio_mutex); 839 aio_req_free(aiop, reqp); 840 mutex_exit(&aiop->aio_mutex); 841 } 842 } 843 #endif /* _SYSCALL32_IMPL */ 844 return (iocb_index); 845 } 846 847 /* 848 * aio_reqlist_concat 849 * moves "max" elements from the done queue to the reqlist queue and removes 850 * the AIO_DONEQ flag. 851 * - reqlist queue is a simple linked list 852 * - done queue is a double linked list 853 */ 854 855 static int 856 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 857 { 858 aio_req_t *q2, *q2work, *list; 859 int count = 0; 860 861 list = *reqlist; 862 q2 = aiop->aio_doneq; 863 q2work = q2; 864 while (max-- > 0) { 865 q2work->aio_req_flags &= ~AIO_DONEQ; 866 q2work = q2work->aio_req_next; 867 count++; 868 if (q2work == q2) 869 break; 870 } 871 872 if (q2work == q2) { 873 /* all elements revised */ 874 q2->aio_req_prev->aio_req_next = list; 875 list = q2; 876 aiop->aio_doneq = NULL; 877 } else { 878 /* 879 * max < elements in the doneq 880 * detach only the required amount of elements 881 * out of the doneq 882 */ 883 q2work->aio_req_prev->aio_req_next = list; 884 list = q2; 885 886 aiop->aio_doneq = q2work; 887 q2work->aio_req_prev = q2->aio_req_prev; 888 q2->aio_req_prev->aio_req_next = q2work; 889 } 890 *reqlist = list; 891 return (count); 892 } 893 894 /*ARGSUSED*/ 895 static int 896 aiosuspend( 897 void *aiocb, 898 int nent, 899 struct timespec *timout, 900 int flag, 901 long *rval, 902 int run_mode) 903 { 904 int error; 905 aio_t *aiop; 906 aio_req_t *reqp, *found, *next; 907 caddr_t cbplist = NULL; 908 aiocb_t *cbp, **ucbp; 909 #ifdef _SYSCALL32_IMPL 910 aiocb32_t *cbp32; 911 caddr32_t *ucbp32; 912 #endif /* _SYSCALL32_IMPL */ 913 aiocb64_32_t *cbp64; 914 int rv; 915 int i; 916 size_t ssize; 917 model_t model = get_udatamodel(); 918 int blocking; 919 timestruc_t rqtime; 920 timestruc_t *rqtp; 921 922 aiop = curproc->p_aio; 923 if (aiop == NULL || nent <= 0) 924 return (EINVAL); 925 926 /* 927 * Establish the absolute future time for the timeout. 928 */ 929 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 930 if (error) 931 return (error); 932 if (rqtp) { 933 timestruc_t now; 934 gethrestime(&now); 935 timespecadd(rqtp, &now); 936 } 937 938 /* 939 * If we are not blocking and there's no IO complete 940 * skip aiocb copyin. 941 */ 942 if (!blocking && (aiop->aio_pollq == NULL) && 943 (aiop->aio_doneq == NULL)) { 944 return (EAGAIN); 945 } 946 947 if (model == DATAMODEL_NATIVE) 948 ssize = (sizeof (aiocb_t *) * nent); 949 #ifdef _SYSCALL32_IMPL 950 else 951 ssize = (sizeof (caddr32_t) * nent); 952 #endif /* _SYSCALL32_IMPL */ 953 954 cbplist = kmem_alloc(ssize, KM_NOSLEEP); 955 if (cbplist == NULL) 956 return (ENOMEM); 957 958 if (copyin(aiocb, cbplist, ssize)) { 959 error = EFAULT; 960 goto done; 961 } 962 963 found = NULL; 964 /* 965 * we need to get the aio_cleanupq_mutex since we call 966 * aio_req_done(). 967 */ 968 mutex_enter(&aiop->aio_cleanupq_mutex); 969 mutex_enter(&aiop->aio_mutex); 970 for (;;) { 971 /* push requests on poll queue to done queue */ 972 if (aiop->aio_pollq) { 973 mutex_exit(&aiop->aio_mutex); 974 mutex_exit(&aiop->aio_cleanupq_mutex); 975 aio_cleanup(0); 976 mutex_enter(&aiop->aio_cleanupq_mutex); 977 mutex_enter(&aiop->aio_mutex); 978 } 979 /* check for requests on done queue */ 980 if (aiop->aio_doneq) { 981 if (model == DATAMODEL_NATIVE) 982 ucbp = (aiocb_t **)cbplist; 983 #ifdef _SYSCALL32_IMPL 984 else 985 ucbp32 = (caddr32_t *)cbplist; 986 #endif /* _SYSCALL32_IMPL */ 987 for (i = 0; i < nent; i++) { 988 if (model == DATAMODEL_NATIVE) { 989 if ((cbp = *ucbp++) == NULL) 990 continue; 991 if (run_mode != AIO_LARGEFILE) 992 reqp = aio_req_done( 993 &cbp->aio_resultp); 994 else { 995 cbp64 = (aiocb64_32_t *)cbp; 996 reqp = aio_req_done( 997 &cbp64->aio_resultp); 998 } 999 } 1000 #ifdef _SYSCALL32_IMPL 1001 else { 1002 if (run_mode == AIO_32) { 1003 if ((cbp32 = 1004 (aiocb32_t *)(uintptr_t) 1005 *ucbp32++) == NULL) 1006 continue; 1007 reqp = aio_req_done( 1008 &cbp32->aio_resultp); 1009 } else if (run_mode == AIO_LARGEFILE) { 1010 if ((cbp64 = 1011 (aiocb64_32_t *)(uintptr_t) 1012 *ucbp32++) == NULL) 1013 continue; 1014 reqp = aio_req_done( 1015 &cbp64->aio_resultp); 1016 } 1017 1018 } 1019 #endif /* _SYSCALL32_IMPL */ 1020 if (reqp) { 1021 reqp->aio_req_next = found; 1022 found = reqp; 1023 } 1024 if (aiop->aio_doneq == NULL) 1025 break; 1026 } 1027 if (found) 1028 break; 1029 } 1030 if (aiop->aio_notifycnt > 0) { 1031 /* 1032 * nothing on the kernel's queue. the user 1033 * has notified the kernel that it has items 1034 * on a user-level queue. 1035 */ 1036 aiop->aio_notifycnt--; 1037 *rval = 1; 1038 error = 0; 1039 break; 1040 } 1041 /* don't block if nothing is outstanding */ 1042 if (aiop->aio_outstanding == 0) { 1043 error = EAGAIN; 1044 break; 1045 } 1046 if (blocking) { 1047 /* 1048 * drop the aio_cleanupq_mutex as we are 1049 * going to block. 1050 */ 1051 mutex_exit(&aiop->aio_cleanupq_mutex); 1052 rv = cv_waituntil_sig(&aiop->aio_waitcv, 1053 &aiop->aio_mutex, rqtp); 1054 /* 1055 * we have to drop aio_mutex and 1056 * grab it in the right order. 1057 */ 1058 mutex_exit(&aiop->aio_mutex); 1059 mutex_enter(&aiop->aio_cleanupq_mutex); 1060 mutex_enter(&aiop->aio_mutex); 1061 if (rv > 0) /* check done queue again */ 1062 continue; 1063 if (rv == 0) /* interrupted by a signal */ 1064 error = EINTR; 1065 else /* timer expired */ 1066 error = ETIME; 1067 } else { 1068 error = EAGAIN; 1069 } 1070 break; 1071 } 1072 mutex_exit(&aiop->aio_mutex); 1073 mutex_exit(&aiop->aio_cleanupq_mutex); 1074 for (reqp = found; reqp != NULL; reqp = next) { 1075 next = reqp->aio_req_next; 1076 aphysio_unlock(reqp); 1077 aio_copyout_result(reqp); 1078 mutex_enter(&aiop->aio_mutex); 1079 aio_req_free(aiop, reqp); 1080 mutex_exit(&aiop->aio_mutex); 1081 } 1082 done: 1083 kmem_free(cbplist, ssize); 1084 return (error); 1085 } 1086 1087 /* 1088 * initialize aio by allocating an aio_t struct for this 1089 * process. 1090 */ 1091 static int 1092 aioinit(void) 1093 { 1094 proc_t *p = curproc; 1095 aio_t *aiop; 1096 mutex_enter(&p->p_lock); 1097 if ((aiop = p->p_aio) == NULL) { 1098 aiop = aio_aiop_alloc(); 1099 p->p_aio = aiop; 1100 } 1101 mutex_exit(&p->p_lock); 1102 if (aiop == NULL) 1103 return (ENOMEM); 1104 return (0); 1105 } 1106 1107 /* 1108 * start a special thread that will cleanup after aio requests 1109 * that are preventing a segment from being unmapped. as_unmap() 1110 * blocks until all phsyio to this segment is completed. this 1111 * doesn't happen until all the pages in this segment are not 1112 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 1113 * requests still outstanding. this special thread will make sure 1114 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 1115 * 1116 * this function will return an error if the process has only 1117 * one LWP. the assumption is that the caller is a separate LWP 1118 * that remains blocked in the kernel for the life of this process. 1119 */ 1120 static int 1121 aiostart(void) 1122 { 1123 proc_t *p = curproc; 1124 aio_t *aiop; 1125 int first, error = 0; 1126 1127 if (p->p_lwpcnt == 1) 1128 return (EDEADLK); 1129 mutex_enter(&p->p_lock); 1130 if ((aiop = p->p_aio) == NULL) 1131 error = EINVAL; 1132 else { 1133 first = aiop->aio_ok; 1134 if (aiop->aio_ok == 0) 1135 aiop->aio_ok = 1; 1136 } 1137 mutex_exit(&p->p_lock); 1138 if (error == 0 && first == 0) { 1139 return (aio_cleanup_thread(aiop)); 1140 /* should return only to exit */ 1141 } 1142 return (error); 1143 } 1144 1145 /* 1146 * Associate an aiocb with a port. 1147 * This function is used by aiorw() to associate a transaction with a port. 1148 * Allocate an event port structure (port_alloc_event()) and store the 1149 * delivered user pointer (portnfy_user) in the portkev_user field of the 1150 * port_kevent_t structure.. 1151 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1152 * the port association. 1153 */ 1154 1155 static int 1156 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 1157 aio_req_t *reqp, int event) 1158 { 1159 port_kevent_t *pkevp = NULL; 1160 int error; 1161 1162 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 1163 PORT_SOURCE_AIO, &pkevp); 1164 if (error) { 1165 if ((error == ENOMEM) || (error == EAGAIN)) 1166 error = EAGAIN; 1167 else 1168 error = EINVAL; 1169 } else { 1170 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 1171 aio_port_callback, reqp); 1172 pkevp->portkev_events = event; 1173 reqp->aio_req_portkev = pkevp; 1174 reqp->aio_req_port = pntfy->portnfy_port; 1175 } 1176 return (error); 1177 } 1178 1179 #ifdef _LP64 1180 1181 /* 1182 * Asynchronous list IO. A chain of aiocb's are copied in 1183 * one at a time. If the aiocb is invalid, it is skipped. 1184 * For each aiocb, the appropriate driver entry point is 1185 * called. Optimize for the common case where the list 1186 * of requests is to the same file descriptor. 1187 * 1188 * One possible optimization is to define a new driver entry 1189 * point that supports a list of IO requests. Whether this 1190 * improves performance depends somewhat on the driver's 1191 * locking strategy. Processing a list could adversely impact 1192 * the driver's interrupt latency. 1193 */ 1194 static int 1195 alio( 1196 int mode_arg, 1197 aiocb_t **aiocb_arg, 1198 int nent, 1199 struct sigevent *sigev) 1200 { 1201 file_t *fp; 1202 file_t *prev_fp = NULL; 1203 int prev_mode = -1; 1204 struct vnode *vp; 1205 aio_lio_t *head; 1206 aio_req_t *reqp; 1207 aio_t *aiop; 1208 caddr_t cbplist; 1209 aiocb_t cb; 1210 aiocb_t *aiocb = &cb; 1211 aiocb_t *cbp; 1212 aiocb_t **ucbp; 1213 struct sigevent sigevk; 1214 sigqueue_t *sqp; 1215 int (*aio_func)(); 1216 int mode; 1217 int error = 0; 1218 int aio_errors = 0; 1219 int i; 1220 size_t ssize; 1221 int deadhead = 0; 1222 int aio_notsupported = 0; 1223 int lio_head_port; 1224 int aio_port; 1225 int aio_thread; 1226 port_kevent_t *pkevtp = NULL; 1227 port_notify_t pnotify; 1228 int event; 1229 1230 aiop = curproc->p_aio; 1231 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1232 return (EINVAL); 1233 1234 ssize = (sizeof (aiocb_t *) * nent); 1235 cbplist = kmem_alloc(ssize, KM_SLEEP); 1236 ucbp = (aiocb_t **)cbplist; 1237 1238 if (copyin(aiocb_arg, cbplist, ssize) || 1239 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 1240 kmem_free(cbplist, ssize); 1241 return (EFAULT); 1242 } 1243 1244 /* Event Ports */ 1245 if (sigev && 1246 (sigevk.sigev_notify == SIGEV_THREAD || 1247 sigevk.sigev_notify == SIGEV_PORT)) { 1248 if (sigevk.sigev_notify == SIGEV_THREAD) { 1249 pnotify.portnfy_port = sigevk.sigev_signo; 1250 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 1251 } else if (copyin(sigevk.sigev_value.sival_ptr, 1252 &pnotify, sizeof (pnotify))) { 1253 kmem_free(cbplist, ssize); 1254 return (EFAULT); 1255 } 1256 error = port_alloc_event(pnotify.portnfy_port, 1257 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 1258 if (error) { 1259 if (error == ENOMEM || error == EAGAIN) 1260 error = EAGAIN; 1261 else 1262 error = EINVAL; 1263 kmem_free(cbplist, ssize); 1264 return (error); 1265 } 1266 lio_head_port = pnotify.portnfy_port; 1267 } 1268 1269 /* 1270 * a list head should be allocated if notification is 1271 * enabled for this list. 1272 */ 1273 head = NULL; 1274 1275 if (mode_arg == LIO_WAIT || sigev) { 1276 mutex_enter(&aiop->aio_mutex); 1277 error = aio_lio_alloc(&head); 1278 mutex_exit(&aiop->aio_mutex); 1279 if (error) 1280 goto done; 1281 deadhead = 1; 1282 head->lio_nent = nent; 1283 head->lio_refcnt = nent; 1284 head->lio_port = -1; 1285 head->lio_portkev = NULL; 1286 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 1287 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 1288 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 1289 if (sqp == NULL) { 1290 error = EAGAIN; 1291 goto done; 1292 } 1293 sqp->sq_func = NULL; 1294 sqp->sq_next = NULL; 1295 sqp->sq_info.si_code = SI_ASYNCIO; 1296 sqp->sq_info.si_pid = curproc->p_pid; 1297 sqp->sq_info.si_ctid = PRCTID(curproc); 1298 sqp->sq_info.si_zoneid = getzoneid(); 1299 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 1300 sqp->sq_info.si_signo = sigevk.sigev_signo; 1301 sqp->sq_info.si_value = sigevk.sigev_value; 1302 head->lio_sigqp = sqp; 1303 } else { 1304 head->lio_sigqp = NULL; 1305 } 1306 if (pkevtp) { 1307 /* 1308 * Prepare data to send when list of aiocb's 1309 * has completed. 1310 */ 1311 port_init_event(pkevtp, (uintptr_t)sigev, 1312 (void *)(uintptr_t)pnotify.portnfy_user, 1313 NULL, head); 1314 pkevtp->portkev_events = AIOLIO; 1315 head->lio_portkev = pkevtp; 1316 head->lio_port = pnotify.portnfy_port; 1317 } 1318 } 1319 1320 for (i = 0; i < nent; i++, ucbp++) { 1321 1322 cbp = *ucbp; 1323 /* skip entry if it can't be copied. */ 1324 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 1325 if (head) { 1326 mutex_enter(&aiop->aio_mutex); 1327 head->lio_nent--; 1328 head->lio_refcnt--; 1329 mutex_exit(&aiop->aio_mutex); 1330 } 1331 continue; 1332 } 1333 1334 /* skip if opcode for aiocb is LIO_NOP */ 1335 mode = aiocb->aio_lio_opcode; 1336 if (mode == LIO_NOP) { 1337 cbp = NULL; 1338 if (head) { 1339 mutex_enter(&aiop->aio_mutex); 1340 head->lio_nent--; 1341 head->lio_refcnt--; 1342 mutex_exit(&aiop->aio_mutex); 1343 } 1344 continue; 1345 } 1346 1347 /* increment file descriptor's ref count. */ 1348 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 1349 lio_set_uerror(&cbp->aio_resultp, EBADF); 1350 if (head) { 1351 mutex_enter(&aiop->aio_mutex); 1352 head->lio_nent--; 1353 head->lio_refcnt--; 1354 mutex_exit(&aiop->aio_mutex); 1355 } 1356 aio_errors++; 1357 continue; 1358 } 1359 1360 /* 1361 * check the permission of the partition 1362 */ 1363 if ((fp->f_flag & mode) == 0) { 1364 releasef(aiocb->aio_fildes); 1365 lio_set_uerror(&cbp->aio_resultp, EBADF); 1366 if (head) { 1367 mutex_enter(&aiop->aio_mutex); 1368 head->lio_nent--; 1369 head->lio_refcnt--; 1370 mutex_exit(&aiop->aio_mutex); 1371 } 1372 aio_errors++; 1373 continue; 1374 } 1375 1376 /* 1377 * common case where requests are to the same fd 1378 * for the same r/w operation. 1379 * for UFS, need to set EBADFD 1380 */ 1381 vp = fp->f_vnode; 1382 if (fp != prev_fp || mode != prev_mode) { 1383 aio_func = check_vp(vp, mode); 1384 if (aio_func == NULL) { 1385 prev_fp = NULL; 1386 releasef(aiocb->aio_fildes); 1387 lio_set_uerror(&cbp->aio_resultp, EBADFD); 1388 aio_notsupported++; 1389 if (head) { 1390 mutex_enter(&aiop->aio_mutex); 1391 head->lio_nent--; 1392 head->lio_refcnt--; 1393 mutex_exit(&aiop->aio_mutex); 1394 } 1395 continue; 1396 } else { 1397 prev_fp = fp; 1398 prev_mode = mode; 1399 } 1400 } 1401 1402 error = aio_req_setup(&reqp, aiop, aiocb, 1403 &cbp->aio_resultp, vp); 1404 if (error) { 1405 releasef(aiocb->aio_fildes); 1406 lio_set_uerror(&cbp->aio_resultp, error); 1407 if (head) { 1408 mutex_enter(&aiop->aio_mutex); 1409 head->lio_nent--; 1410 head->lio_refcnt--; 1411 mutex_exit(&aiop->aio_mutex); 1412 } 1413 aio_errors++; 1414 continue; 1415 } 1416 1417 reqp->aio_req_lio = head; 1418 deadhead = 0; 1419 1420 /* 1421 * Set the errno field now before sending the request to 1422 * the driver to avoid a race condition 1423 */ 1424 (void) suword32(&cbp->aio_resultp.aio_errno, 1425 EINPROGRESS); 1426 1427 reqp->aio_req_iocb.iocb = (caddr_t)cbp; 1428 1429 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 1430 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 1431 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 1432 if (aio_port | aio_thread) { 1433 port_kevent_t *lpkevp; 1434 /* 1435 * Prepare data to send with each aiocb completed. 1436 */ 1437 if (aio_port) { 1438 void *paddr = 1439 aiocb->aio_sigevent.sigev_value.sival_ptr; 1440 if (copyin(paddr, &pnotify, sizeof (pnotify))) 1441 error = EFAULT; 1442 } else { /* aio_thread */ 1443 pnotify.portnfy_port = 1444 aiocb->aio_sigevent.sigev_signo; 1445 pnotify.portnfy_user = 1446 aiocb->aio_sigevent.sigev_value.sival_ptr; 1447 } 1448 if (error) 1449 /* EMPTY */; 1450 else if (pkevtp != NULL && 1451 pnotify.portnfy_port == lio_head_port) 1452 error = port_dup_event(pkevtp, &lpkevp, 1453 PORT_ALLOC_DEFAULT); 1454 else 1455 error = port_alloc_event(pnotify.portnfy_port, 1456 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 1457 &lpkevp); 1458 if (error == 0) { 1459 port_init_event(lpkevp, (uintptr_t)cbp, 1460 (void *)(uintptr_t)pnotify.portnfy_user, 1461 aio_port_callback, reqp); 1462 lpkevp->portkev_events = event; 1463 reqp->aio_req_portkev = lpkevp; 1464 reqp->aio_req_port = pnotify.portnfy_port; 1465 } 1466 } 1467 1468 /* 1469 * send the request to driver. 1470 */ 1471 if (error == 0) { 1472 if (aiocb->aio_nbytes == 0) { 1473 clear_active_fd(aiocb->aio_fildes); 1474 aio_zerolen(reqp); 1475 continue; 1476 } 1477 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 1478 CRED()); 1479 } 1480 1481 /* 1482 * the fd's ref count is not decremented until the IO has 1483 * completed unless there was an error. 1484 */ 1485 if (error) { 1486 releasef(aiocb->aio_fildes); 1487 lio_set_uerror(&cbp->aio_resultp, error); 1488 if (head) { 1489 mutex_enter(&aiop->aio_mutex); 1490 head->lio_nent--; 1491 head->lio_refcnt--; 1492 mutex_exit(&aiop->aio_mutex); 1493 } 1494 if (error == ENOTSUP) 1495 aio_notsupported++; 1496 else 1497 aio_errors++; 1498 lio_set_error(reqp); 1499 } else { 1500 clear_active_fd(aiocb->aio_fildes); 1501 } 1502 } 1503 1504 if (aio_notsupported) { 1505 error = ENOTSUP; 1506 } else if (aio_errors) { 1507 /* 1508 * return EIO if any request failed 1509 */ 1510 error = EIO; 1511 } 1512 1513 if (mode_arg == LIO_WAIT) { 1514 mutex_enter(&aiop->aio_mutex); 1515 while (head->lio_refcnt > 0) { 1516 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1517 mutex_exit(&aiop->aio_mutex); 1518 error = EINTR; 1519 goto done; 1520 } 1521 } 1522 mutex_exit(&aiop->aio_mutex); 1523 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 1524 } 1525 1526 done: 1527 kmem_free(cbplist, ssize); 1528 if (deadhead) { 1529 if (head->lio_sigqp) 1530 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 1531 if (head->lio_portkev) 1532 port_free_event(head->lio_portkev); 1533 kmem_free(head, sizeof (aio_lio_t)); 1534 } 1535 return (error); 1536 } 1537 1538 #endif /* _LP64 */ 1539 1540 /* 1541 * Asynchronous list IO. 1542 * If list I/O is called with LIO_WAIT it can still return 1543 * before all the I/O's are completed if a signal is caught 1544 * or if the list include UFS I/O requests. If this happens, 1545 * libaio will call aliowait() to wait for the I/O's to 1546 * complete 1547 */ 1548 /*ARGSUSED*/ 1549 static int 1550 aliowait( 1551 int mode, 1552 void *aiocb, 1553 int nent, 1554 void *sigev, 1555 int run_mode) 1556 { 1557 aio_lio_t *head; 1558 aio_t *aiop; 1559 caddr_t cbplist; 1560 aiocb_t *cbp, **ucbp; 1561 #ifdef _SYSCALL32_IMPL 1562 aiocb32_t *cbp32; 1563 caddr32_t *ucbp32; 1564 aiocb64_32_t *cbp64; 1565 #endif 1566 int error = 0; 1567 int i; 1568 size_t ssize = 0; 1569 model_t model = get_udatamodel(); 1570 1571 aiop = curproc->p_aio; 1572 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1573 return (EINVAL); 1574 1575 if (model == DATAMODEL_NATIVE) 1576 ssize = (sizeof (aiocb_t *) * nent); 1577 #ifdef _SYSCALL32_IMPL 1578 else 1579 ssize = (sizeof (caddr32_t) * nent); 1580 #endif /* _SYSCALL32_IMPL */ 1581 1582 if (ssize == 0) 1583 return (EINVAL); 1584 1585 cbplist = kmem_alloc(ssize, KM_SLEEP); 1586 1587 if (model == DATAMODEL_NATIVE) 1588 ucbp = (aiocb_t **)cbplist; 1589 #ifdef _SYSCALL32_IMPL 1590 else 1591 ucbp32 = (caddr32_t *)cbplist; 1592 #endif /* _SYSCALL32_IMPL */ 1593 1594 if (copyin(aiocb, cbplist, ssize)) { 1595 error = EFAULT; 1596 goto done; 1597 } 1598 1599 /* 1600 * To find the list head, we go through the 1601 * list of aiocb structs, find the request 1602 * its for, then get the list head that reqp 1603 * points to 1604 */ 1605 head = NULL; 1606 1607 for (i = 0; i < nent; i++) { 1608 if (model == DATAMODEL_NATIVE) { 1609 /* 1610 * Since we are only checking for a NULL pointer 1611 * Following should work on both native data sizes 1612 * as well as for largefile aiocb. 1613 */ 1614 if ((cbp = *ucbp++) == NULL) 1615 continue; 1616 if (run_mode != AIO_LARGEFILE) 1617 if (head = aio_list_get(&cbp->aio_resultp)) 1618 break; 1619 else { 1620 /* 1621 * This is a case when largefile call is 1622 * made on 32 bit kernel. 1623 * Treat each pointer as pointer to 1624 * aiocb64_32 1625 */ 1626 if (head = aio_list_get((aio_result_t *) 1627 &(((aiocb64_32_t *)cbp)->aio_resultp))) 1628 break; 1629 } 1630 } 1631 #ifdef _SYSCALL32_IMPL 1632 else { 1633 if (run_mode == AIO_LARGEFILE) { 1634 if ((cbp64 = (aiocb64_32_t *) 1635 (uintptr_t)*ucbp32++) == NULL) 1636 continue; 1637 if (head = aio_list_get((aio_result_t *) 1638 &cbp64->aio_resultp)) 1639 break; 1640 } else if (run_mode == AIO_32) { 1641 if ((cbp32 = (aiocb32_t *) 1642 (uintptr_t)*ucbp32++) == NULL) 1643 continue; 1644 if (head = aio_list_get((aio_result_t *) 1645 &cbp32->aio_resultp)) 1646 break; 1647 } 1648 } 1649 #endif /* _SYSCALL32_IMPL */ 1650 } 1651 1652 if (head == NULL) { 1653 error = EINVAL; 1654 goto done; 1655 } 1656 1657 mutex_enter(&aiop->aio_mutex); 1658 while (head->lio_refcnt > 0) { 1659 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1660 mutex_exit(&aiop->aio_mutex); 1661 error = EINTR; 1662 goto done; 1663 } 1664 } 1665 mutex_exit(&aiop->aio_mutex); 1666 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 1667 done: 1668 kmem_free(cbplist, ssize); 1669 return (error); 1670 } 1671 1672 aio_lio_t * 1673 aio_list_get(aio_result_t *resultp) 1674 { 1675 aio_lio_t *head = NULL; 1676 aio_t *aiop; 1677 aio_req_t **bucket; 1678 aio_req_t *reqp; 1679 long index; 1680 1681 aiop = curproc->p_aio; 1682 if (aiop == NULL) 1683 return (NULL); 1684 1685 if (resultp) { 1686 index = AIO_HASH(resultp); 1687 bucket = &aiop->aio_hash[index]; 1688 for (reqp = *bucket; reqp != NULL; 1689 reqp = reqp->aio_hash_next) { 1690 if (reqp->aio_req_resultp == resultp) { 1691 head = reqp->aio_req_lio; 1692 return (head); 1693 } 1694 } 1695 } 1696 return (NULL); 1697 } 1698 1699 1700 static void 1701 lio_set_uerror(void *resultp, int error) 1702 { 1703 /* 1704 * the resultp field is a pointer to where the 1705 * error should be written out to the user's 1706 * aiocb. 1707 * 1708 */ 1709 if (get_udatamodel() == DATAMODEL_NATIVE) { 1710 (void) sulword(&((aio_result_t *)resultp)->aio_return, 1711 (ssize_t)-1); 1712 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1713 } 1714 #ifdef _SYSCALL32_IMPL 1715 else { 1716 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1717 (uint_t)-1); 1718 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1719 } 1720 #endif /* _SYSCALL32_IMPL */ 1721 } 1722 1723 /* 1724 * do cleanup completion for all requests in list. memory for 1725 * each request is also freed. 1726 */ 1727 static void 1728 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 1729 { 1730 int i; 1731 aio_req_t *reqp; 1732 aio_result_t *resultp; 1733 aiocb64_32_t *aiocb_64; 1734 1735 for (i = 0; i < nent; i++) { 1736 if (get_udatamodel() == DATAMODEL_NATIVE) { 1737 if (cbp[i] == NULL) 1738 continue; 1739 if (run_mode == AIO_LARGEFILE) { 1740 aiocb_64 = (aiocb64_32_t *)cbp[i]; 1741 resultp = (aio_result_t *) 1742 &aiocb_64->aio_resultp; 1743 } else 1744 resultp = &cbp[i]->aio_resultp; 1745 } 1746 #ifdef _SYSCALL32_IMPL 1747 else { 1748 aiocb32_t *aiocb_32; 1749 caddr32_t *cbp32; 1750 1751 cbp32 = (caddr32_t *)cbp; 1752 if (cbp32[i] == NULL) 1753 continue; 1754 if (run_mode == AIO_32) { 1755 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 1756 resultp = (aio_result_t *)&aiocb_32-> 1757 aio_resultp; 1758 } else if (run_mode == AIO_LARGEFILE) { 1759 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 1760 resultp = (aio_result_t *)&aiocb_64-> 1761 aio_resultp; 1762 } 1763 } 1764 #endif /* _SYSCALL32_IMPL */ 1765 /* 1766 * we need to get the aio_cleanupq_mutex since we call 1767 * aio_req_done(). 1768 */ 1769 mutex_enter(&aiop->aio_cleanupq_mutex); 1770 mutex_enter(&aiop->aio_mutex); 1771 reqp = aio_req_done(resultp); 1772 mutex_exit(&aiop->aio_mutex); 1773 mutex_exit(&aiop->aio_cleanupq_mutex); 1774 if (reqp != NULL) { 1775 aphysio_unlock(reqp); 1776 aio_copyout_result(reqp); 1777 mutex_enter(&aiop->aio_mutex); 1778 aio_req_free(aiop, reqp); 1779 mutex_exit(&aiop->aio_mutex); 1780 } 1781 } 1782 } 1783 1784 /* 1785 * Write out the results for an aio request that is done. 1786 */ 1787 static int 1788 aioerror(void *cb, int run_mode) 1789 { 1790 aio_result_t *resultp; 1791 aio_t *aiop; 1792 aio_req_t *reqp; 1793 int retval; 1794 1795 aiop = curproc->p_aio; 1796 if (aiop == NULL || cb == NULL) 1797 return (EINVAL); 1798 1799 if (get_udatamodel() == DATAMODEL_NATIVE) { 1800 if (run_mode == AIO_LARGEFILE) 1801 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1802 aio_resultp; 1803 else 1804 resultp = &((aiocb_t *)cb)->aio_resultp; 1805 } 1806 #ifdef _SYSCALL32_IMPL 1807 else { 1808 if (run_mode == AIO_LARGEFILE) 1809 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1810 aio_resultp; 1811 else if (run_mode == AIO_32) 1812 resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 1813 aio_resultp; 1814 } 1815 #endif /* _SYSCALL32_IMPL */ 1816 /* 1817 * we need to get the aio_cleanupq_mutex since we call 1818 * aio_req_find(). 1819 */ 1820 mutex_enter(&aiop->aio_cleanupq_mutex); 1821 mutex_enter(&aiop->aio_mutex); 1822 retval = aio_req_find(resultp, &reqp); 1823 mutex_exit(&aiop->aio_mutex); 1824 mutex_exit(&aiop->aio_cleanupq_mutex); 1825 if (retval == 0) { 1826 aphysio_unlock(reqp); 1827 aio_copyout_result(reqp); 1828 mutex_enter(&aiop->aio_mutex); 1829 aio_req_free(aiop, reqp); 1830 mutex_exit(&aiop->aio_mutex); 1831 return (0); 1832 } else if (retval == 1) 1833 return (EINPROGRESS); 1834 else if (retval == 2) 1835 return (EINVAL); 1836 return (0); 1837 } 1838 1839 /* 1840 * aio_cancel - if no requests outstanding, 1841 * return AIO_ALLDONE 1842 * else 1843 * return AIO_NOTCANCELED 1844 */ 1845 static int 1846 aio_cancel( 1847 int fildes, 1848 void *cb, 1849 long *rval, 1850 int run_mode) 1851 { 1852 aio_t *aiop; 1853 void *resultp; 1854 int index; 1855 aio_req_t **bucket; 1856 aio_req_t *ent; 1857 1858 1859 /* 1860 * Verify valid file descriptor 1861 */ 1862 if ((getf(fildes)) == NULL) { 1863 return (EBADF); 1864 } 1865 releasef(fildes); 1866 1867 aiop = curproc->p_aio; 1868 if (aiop == NULL) 1869 return (EINVAL); 1870 1871 if (aiop->aio_outstanding == 0) { 1872 *rval = AIO_ALLDONE; 1873 return (0); 1874 } 1875 1876 mutex_enter(&aiop->aio_mutex); 1877 if (cb != NULL) { 1878 if (get_udatamodel() == DATAMODEL_NATIVE) { 1879 if (run_mode == AIO_LARGEFILE) 1880 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1881 ->aio_resultp; 1882 else 1883 resultp = &((aiocb_t *)cb)->aio_resultp; 1884 } 1885 #ifdef _SYSCALL32_IMPL 1886 else { 1887 if (run_mode == AIO_LARGEFILE) 1888 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1889 ->aio_resultp; 1890 else if (run_mode == AIO_32) 1891 resultp = (aio_result_t *)&((aiocb32_t *)cb) 1892 ->aio_resultp; 1893 } 1894 #endif /* _SYSCALL32_IMPL */ 1895 index = AIO_HASH(resultp); 1896 bucket = &aiop->aio_hash[index]; 1897 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1898 if (ent->aio_req_resultp == resultp) { 1899 if ((ent->aio_req_flags & AIO_PENDING) == 0) { 1900 mutex_exit(&aiop->aio_mutex); 1901 *rval = AIO_ALLDONE; 1902 return (0); 1903 } 1904 mutex_exit(&aiop->aio_mutex); 1905 *rval = AIO_NOTCANCELED; 1906 return (0); 1907 } 1908 } 1909 mutex_exit(&aiop->aio_mutex); 1910 *rval = AIO_ALLDONE; 1911 return (0); 1912 } 1913 1914 for (index = 0; index < AIO_HASHSZ; index++) { 1915 bucket = &aiop->aio_hash[index]; 1916 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1917 if (ent->aio_req_fd == fildes) { 1918 if ((ent->aio_req_flags & AIO_PENDING) != 0) { 1919 mutex_exit(&aiop->aio_mutex); 1920 *rval = AIO_NOTCANCELED; 1921 return (0); 1922 } 1923 } 1924 } 1925 } 1926 mutex_exit(&aiop->aio_mutex); 1927 *rval = AIO_ALLDONE; 1928 return (0); 1929 } 1930 1931 /* 1932 * solaris version of asynchronous read and write 1933 */ 1934 static int 1935 arw( 1936 int opcode, 1937 int fdes, 1938 char *bufp, 1939 int bufsize, 1940 offset_t offset, 1941 aio_result_t *resultp, 1942 int mode) 1943 { 1944 file_t *fp; 1945 int error; 1946 struct vnode *vp; 1947 aio_req_t *reqp; 1948 aio_t *aiop; 1949 int (*aio_func)(); 1950 #ifdef _LP64 1951 aiocb_t aiocb; 1952 #else 1953 aiocb64_32_t aiocb64; 1954 #endif 1955 1956 aiop = curproc->p_aio; 1957 if (aiop == NULL) 1958 return (EINVAL); 1959 1960 if ((fp = getf(fdes)) == NULL) { 1961 return (EBADF); 1962 } 1963 1964 /* 1965 * check the permission of the partition 1966 */ 1967 if ((fp->f_flag & mode) == 0) { 1968 releasef(fdes); 1969 return (EBADF); 1970 } 1971 1972 vp = fp->f_vnode; 1973 aio_func = check_vp(vp, mode); 1974 if (aio_func == NULL) { 1975 releasef(fdes); 1976 return (EBADFD); 1977 } 1978 #ifdef _LP64 1979 aiocb.aio_fildes = fdes; 1980 aiocb.aio_buf = bufp; 1981 aiocb.aio_nbytes = bufsize; 1982 aiocb.aio_offset = offset; 1983 aiocb.aio_sigevent.sigev_notify = 0; 1984 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 1985 #else 1986 aiocb64.aio_fildes = fdes; 1987 aiocb64.aio_buf = (caddr32_t)bufp; 1988 aiocb64.aio_nbytes = bufsize; 1989 aiocb64.aio_offset = offset; 1990 aiocb64.aio_sigevent.sigev_notify = 0; 1991 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 1992 #endif 1993 if (error) { 1994 releasef(fdes); 1995 return (error); 1996 } 1997 1998 /* 1999 * enable polling on this request if the opcode has 2000 * the AIO poll bit set 2001 */ 2002 if (opcode & AIO_POLL_BIT) 2003 reqp->aio_req_flags |= AIO_POLL; 2004 2005 if (bufsize == 0) { 2006 clear_active_fd(fdes); 2007 aio_zerolen(reqp); 2008 return (0); 2009 } 2010 /* 2011 * send the request to driver. 2012 */ 2013 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2014 /* 2015 * the fd is stored in the aio_req_t by aio_req_setup(), and 2016 * is released by the aio_cleanup_thread() when the IO has 2017 * completed. 2018 */ 2019 if (error) { 2020 releasef(fdes); 2021 mutex_enter(&aiop->aio_mutex); 2022 aio_req_free(aiop, reqp); 2023 aiop->aio_pending--; 2024 if (aiop->aio_flags & AIO_REQ_BLOCK) 2025 cv_signal(&aiop->aio_cleanupcv); 2026 mutex_exit(&aiop->aio_mutex); 2027 return (error); 2028 } 2029 clear_active_fd(fdes); 2030 return (0); 2031 } 2032 2033 /* 2034 * posix version of asynchronous read and write 2035 */ 2036 static int 2037 aiorw( 2038 int opcode, 2039 void *aiocb_arg, 2040 int mode, 2041 int run_mode) 2042 { 2043 #ifdef _SYSCALL32_IMPL 2044 aiocb32_t aiocb32; 2045 struct sigevent32 *sigev32; 2046 port_notify32_t pntfy32; 2047 #endif 2048 aiocb64_32_t aiocb64; 2049 aiocb_t aiocb; 2050 file_t *fp; 2051 int error, fd; 2052 size_t bufsize; 2053 struct vnode *vp; 2054 aio_req_t *reqp; 2055 aio_t *aiop; 2056 int (*aio_func)(); 2057 aio_result_t *resultp; 2058 struct sigevent *sigev; 2059 model_t model; 2060 int aio_use_port = 0; 2061 port_notify_t pntfy; 2062 2063 model = get_udatamodel(); 2064 aiop = curproc->p_aio; 2065 if (aiop == NULL) 2066 return (EINVAL); 2067 2068 if (model == DATAMODEL_NATIVE) { 2069 if (run_mode != AIO_LARGEFILE) { 2070 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 2071 return (EFAULT); 2072 bufsize = aiocb.aio_nbytes; 2073 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 2074 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 2075 return (EBADF); 2076 } 2077 sigev = &aiocb.aio_sigevent; 2078 } else { 2079 /* 2080 * We come here only when we make largefile 2081 * call on 32 bit kernel using 32 bit library. 2082 */ 2083 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2084 return (EFAULT); 2085 bufsize = aiocb64.aio_nbytes; 2086 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2087 ->aio_resultp); 2088 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2089 return (EBADF); 2090 sigev = (struct sigevent *)&aiocb64.aio_sigevent; 2091 } 2092 2093 if (sigev->sigev_notify == SIGEV_PORT) { 2094 if (copyin((void *)sigev->sigev_value.sival_ptr, 2095 &pntfy, sizeof (port_notify_t))) { 2096 releasef(fd); 2097 return (EFAULT); 2098 } 2099 aio_use_port = 1; 2100 } else if (sigev->sigev_notify == SIGEV_THREAD) { 2101 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 2102 pntfy.portnfy_user = 2103 aiocb.aio_sigevent.sigev_value.sival_ptr; 2104 aio_use_port = 1; 2105 } 2106 } 2107 #ifdef _SYSCALL32_IMPL 2108 else { 2109 if (run_mode == AIO_32) { 2110 /* 32 bit system call is being made on 64 bit kernel */ 2111 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 2112 return (EFAULT); 2113 2114 bufsize = aiocb32.aio_nbytes; 2115 aiocb_32ton(&aiocb32, &aiocb); 2116 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 2117 aio_resultp); 2118 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 2119 return (EBADF); 2120 } 2121 sigev32 = &aiocb32.aio_sigevent; 2122 } else if (run_mode == AIO_LARGEFILE) { 2123 /* 2124 * We come here only when we make largefile 2125 * call on 64 bit kernel using 32 bit library. 2126 */ 2127 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2128 return (EFAULT); 2129 bufsize = aiocb64.aio_nbytes; 2130 aiocb_LFton(&aiocb64, &aiocb); 2131 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2132 ->aio_resultp); 2133 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2134 return (EBADF); 2135 sigev32 = &aiocb64.aio_sigevent; 2136 } 2137 2138 if (sigev32->sigev_notify == SIGEV_PORT) { 2139 if (copyin( 2140 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 2141 &pntfy32, sizeof (port_notify32_t))) { 2142 releasef(fd); 2143 return (EFAULT); 2144 } 2145 pntfy.portnfy_port = pntfy32.portnfy_port; 2146 pntfy.portnfy_user = (void *)(uintptr_t) 2147 pntfy32.portnfy_user; 2148 aio_use_port = 1; 2149 } else if (sigev32->sigev_notify == SIGEV_THREAD) { 2150 pntfy.portnfy_port = sigev32->sigev_signo; 2151 pntfy.portnfy_user = (void *)(uintptr_t) 2152 sigev32->sigev_value.sival_ptr; 2153 aio_use_port = 1; 2154 } 2155 } 2156 #endif /* _SYSCALL32_IMPL */ 2157 2158 /* 2159 * check the permission of the partition 2160 */ 2161 2162 if ((fp->f_flag & mode) == 0) { 2163 releasef(fd); 2164 return (EBADF); 2165 } 2166 2167 vp = fp->f_vnode; 2168 aio_func = check_vp(vp, mode); 2169 if (aio_func == NULL) { 2170 releasef(fd); 2171 return (EBADFD); 2172 } 2173 if (run_mode == AIO_LARGEFILE) 2174 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 2175 else 2176 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 2177 2178 if (error) { 2179 releasef(fd); 2180 return (error); 2181 } 2182 /* 2183 * enable polling on this request if the opcode has 2184 * the AIO poll bit set 2185 */ 2186 if (opcode & AIO_POLL_BIT) 2187 reqp->aio_req_flags |= AIO_POLL; 2188 2189 if (model == DATAMODEL_NATIVE) 2190 reqp->aio_req_iocb.iocb = aiocb_arg; 2191 #ifdef _SYSCALL32_IMPL 2192 else 2193 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 2194 #endif 2195 2196 if (aio_use_port) { 2197 int event = (run_mode == AIO_LARGEFILE)? 2198 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 2199 ((mode == FREAD)? AIOAREAD : AIOAWRITE); 2200 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 2201 } 2202 2203 /* 2204 * send the request to driver. 2205 */ 2206 if (error == 0) { 2207 if (bufsize == 0) { 2208 clear_active_fd(fd); 2209 aio_zerolen(reqp); 2210 return (0); 2211 } 2212 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2213 } 2214 2215 /* 2216 * the fd is stored in the aio_req_t by aio_req_setup(), and 2217 * is released by the aio_cleanup_thread() when the IO has 2218 * completed. 2219 */ 2220 if (error) { 2221 releasef(fd); 2222 mutex_enter(&aiop->aio_mutex); 2223 aio_deq(&aiop->aio_portpending, reqp); 2224 aio_req_free(aiop, reqp); 2225 aiop->aio_pending--; 2226 if (aiop->aio_flags & AIO_REQ_BLOCK) 2227 cv_signal(&aiop->aio_cleanupcv); 2228 mutex_exit(&aiop->aio_mutex); 2229 return (error); 2230 } 2231 clear_active_fd(fd); 2232 return (0); 2233 } 2234 2235 2236 /* 2237 * set error for a list IO entry that failed. 2238 */ 2239 static void 2240 lio_set_error(aio_req_t *reqp) 2241 { 2242 aio_t *aiop = curproc->p_aio; 2243 2244 if (aiop == NULL) 2245 return; 2246 2247 mutex_enter(&aiop->aio_mutex); 2248 aio_deq(&aiop->aio_portpending, reqp); 2249 aiop->aio_pending--; 2250 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 2251 reqp->aio_req_flags |= AIO_PHYSIODONE; 2252 /* 2253 * Need to free the request now as its never 2254 * going to get on the done queue 2255 * 2256 * Note: aio_outstanding is decremented in 2257 * aio_req_free() 2258 */ 2259 aio_req_free(aiop, reqp); 2260 if (aiop->aio_flags & AIO_REQ_BLOCK) 2261 cv_signal(&aiop->aio_cleanupcv); 2262 mutex_exit(&aiop->aio_mutex); 2263 } 2264 2265 /* 2266 * check if a specified request is done, and remove it from 2267 * the done queue. otherwise remove anybody from the done queue 2268 * if NULL is specified. 2269 */ 2270 static aio_req_t * 2271 aio_req_done(void *resultp) 2272 { 2273 aio_req_t **bucket; 2274 aio_req_t *ent; 2275 aio_t *aiop = curproc->p_aio; 2276 long index; 2277 2278 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2279 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2280 2281 if (resultp) { 2282 index = AIO_HASH(resultp); 2283 bucket = &aiop->aio_hash[index]; 2284 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2285 if (ent->aio_req_resultp == (aio_result_t *)resultp) { 2286 if (ent->aio_req_flags & AIO_DONEQ) { 2287 return (aio_req_remove(ent)); 2288 } 2289 return (NULL); 2290 } 2291 } 2292 /* no match, resultp is invalid */ 2293 return (NULL); 2294 } 2295 return (aio_req_remove(NULL)); 2296 } 2297 2298 /* 2299 * determine if a user-level resultp pointer is associated with an 2300 * active IO request. Zero is returned when the request is done, 2301 * and the request is removed from the done queue. Only when the 2302 * return value is zero, is the "reqp" pointer valid. One is returned 2303 * when the request is inprogress. Two is returned when the request 2304 * is invalid. 2305 */ 2306 static int 2307 aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 2308 { 2309 aio_req_t **bucket; 2310 aio_req_t *ent; 2311 aio_t *aiop = curproc->p_aio; 2312 long index; 2313 2314 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2315 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2316 2317 index = AIO_HASH(resultp); 2318 bucket = &aiop->aio_hash[index]; 2319 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2320 if (ent->aio_req_resultp == resultp) { 2321 if (ent->aio_req_flags & AIO_DONEQ) { 2322 *reqp = aio_req_remove(ent); 2323 return (0); 2324 } 2325 return (1); 2326 } 2327 } 2328 /* no match, resultp is invalid */ 2329 return (2); 2330 } 2331 2332 /* 2333 * remove a request from the done queue. 2334 */ 2335 static aio_req_t * 2336 aio_req_remove(aio_req_t *reqp) 2337 { 2338 aio_t *aiop = curproc->p_aio; 2339 2340 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2341 2342 if (reqp != NULL) { 2343 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2344 if (reqp->aio_req_next == reqp) { 2345 /* only one request on queue */ 2346 if (reqp == aiop->aio_doneq) { 2347 aiop->aio_doneq = NULL; 2348 } else { 2349 ASSERT(reqp == aiop->aio_cleanupq); 2350 aiop->aio_cleanupq = NULL; 2351 } 2352 } else { 2353 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2354 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2355 /* 2356 * The request can be either on the aio_doneq or the 2357 * aio_cleanupq 2358 */ 2359 if (reqp == aiop->aio_doneq) 2360 aiop->aio_doneq = reqp->aio_req_next; 2361 2362 if (reqp == aiop->aio_cleanupq) 2363 aiop->aio_cleanupq = reqp->aio_req_next; 2364 } 2365 reqp->aio_req_flags &= ~AIO_DONEQ; 2366 reqp->aio_req_next = NULL; 2367 reqp->aio_req_prev = NULL; 2368 } else if ((reqp = aiop->aio_doneq) != NULL) { 2369 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2370 if (reqp == reqp->aio_req_next) { 2371 /* only one request on queue */ 2372 aiop->aio_doneq = NULL; 2373 } else { 2374 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2375 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2376 aiop->aio_doneq = reqp->aio_req_next; 2377 } 2378 reqp->aio_req_flags &= ~AIO_DONEQ; 2379 reqp->aio_req_next = NULL; 2380 reqp->aio_req_prev = NULL; 2381 } 2382 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 2383 cv_broadcast(&aiop->aio_waitcv); 2384 return (reqp); 2385 } 2386 2387 static int 2388 aio_req_setup( 2389 aio_req_t **reqpp, 2390 aio_t *aiop, 2391 aiocb_t *arg, 2392 aio_result_t *resultp, 2393 vnode_t *vp) 2394 { 2395 sigqueue_t *sqp = NULL; 2396 aio_req_t *reqp; 2397 struct uio *uio; 2398 struct sigevent *sigev; 2399 int error; 2400 2401 sigev = &arg->aio_sigevent; 2402 if (sigev->sigev_notify == SIGEV_SIGNAL && 2403 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 2404 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2405 if (sqp == NULL) 2406 return (EAGAIN); 2407 sqp->sq_func = NULL; 2408 sqp->sq_next = NULL; 2409 sqp->sq_info.si_code = SI_ASYNCIO; 2410 sqp->sq_info.si_pid = curproc->p_pid; 2411 sqp->sq_info.si_ctid = PRCTID(curproc); 2412 sqp->sq_info.si_zoneid = getzoneid(); 2413 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2414 sqp->sq_info.si_signo = sigev->sigev_signo; 2415 sqp->sq_info.si_value = sigev->sigev_value; 2416 } 2417 2418 mutex_enter(&aiop->aio_mutex); 2419 2420 if (aiop->aio_flags & AIO_REQ_BLOCK) { 2421 mutex_exit(&aiop->aio_mutex); 2422 if (sqp) 2423 kmem_free(sqp, sizeof (sigqueue_t)); 2424 return (EIO); 2425 } 2426 /* 2427 * get an aio_reqp from the free list or allocate one 2428 * from dynamic memory. 2429 */ 2430 if (error = aio_req_alloc(&reqp, resultp)) { 2431 mutex_exit(&aiop->aio_mutex); 2432 if (sqp) 2433 kmem_free(sqp, sizeof (sigqueue_t)); 2434 return (error); 2435 } 2436 aiop->aio_pending++; 2437 aiop->aio_outstanding++; 2438 reqp->aio_req_flags = AIO_PENDING; 2439 if (sigev->sigev_notify == SIGEV_THREAD || 2440 sigev->sigev_notify == SIGEV_PORT) 2441 aio_enq(&aiop->aio_portpending, reqp, 0); 2442 mutex_exit(&aiop->aio_mutex); 2443 /* 2444 * initialize aio request. 2445 */ 2446 reqp->aio_req_fd = arg->aio_fildes; 2447 reqp->aio_req_sigqp = sqp; 2448 reqp->aio_req_iocb.iocb = NULL; 2449 reqp->aio_req_lio = NULL; 2450 reqp->aio_req_buf.b_file = vp; 2451 uio = reqp->aio_req.aio_uio; 2452 uio->uio_iovcnt = 1; 2453 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 2454 uio->uio_iov->iov_len = arg->aio_nbytes; 2455 uio->uio_loffset = arg->aio_offset; 2456 *reqpp = reqp; 2457 return (0); 2458 } 2459 2460 /* 2461 * Allocate p_aio struct. 2462 */ 2463 static aio_t * 2464 aio_aiop_alloc(void) 2465 { 2466 aio_t *aiop; 2467 2468 ASSERT(MUTEX_HELD(&curproc->p_lock)); 2469 2470 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 2471 if (aiop) { 2472 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 2473 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 2474 NULL); 2475 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 2476 } 2477 return (aiop); 2478 } 2479 2480 /* 2481 * Allocate an aio_req struct. 2482 */ 2483 static int 2484 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 2485 { 2486 aio_req_t *reqp; 2487 aio_t *aiop = curproc->p_aio; 2488 2489 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2490 2491 if ((reqp = aiop->aio_free) != NULL) { 2492 aiop->aio_free = reqp->aio_req_next; 2493 bzero(reqp, sizeof (*reqp)); 2494 } else { 2495 /* 2496 * Check whether memory is getting tight. 2497 * This is a temporary mechanism to avoid memory 2498 * exhaustion by a single process until we come up 2499 * with a per process solution such as setrlimit(). 2500 */ 2501 if (freemem < desfree) 2502 return (EAGAIN); 2503 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 2504 if (reqp == NULL) 2505 return (EAGAIN); 2506 } 2507 reqp->aio_req.aio_uio = &reqp->aio_req_uio; 2508 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 2509 reqp->aio_req.aio_private = reqp; 2510 reqp->aio_req_buf.b_offset = -1; 2511 reqp->aio_req_resultp = resultp; 2512 if (aio_hash_insert(reqp, aiop)) { 2513 reqp->aio_req_next = aiop->aio_free; 2514 aiop->aio_free = reqp; 2515 return (EINVAL); 2516 } 2517 *nreqp = reqp; 2518 return (0); 2519 } 2520 2521 /* 2522 * Allocate an aio_lio_t struct. 2523 */ 2524 static int 2525 aio_lio_alloc(aio_lio_t **head) 2526 { 2527 aio_lio_t *liop; 2528 aio_t *aiop = curproc->p_aio; 2529 2530 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2531 2532 if ((liop = aiop->aio_lio_free) != NULL) { 2533 aiop->aio_lio_free = liop->lio_next; 2534 } else { 2535 /* 2536 * Check whether memory is getting tight. 2537 * This is a temporary mechanism to avoid memory 2538 * exhaustion by a single process until we come up 2539 * with a per process solution such as setrlimit(). 2540 */ 2541 if (freemem < desfree) 2542 return (EAGAIN); 2543 2544 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 2545 if (liop == NULL) 2546 return (EAGAIN); 2547 } 2548 *head = liop; 2549 return (0); 2550 } 2551 2552 /* 2553 * this is a special per-process thread that is only activated if 2554 * the process is unmapping a segment with outstanding aio. normally, 2555 * the process will have completed the aio before unmapping the 2556 * segment. If the process does unmap a segment with outstanding aio, 2557 * this special thread will guarentee that the locked pages due to 2558 * aphysio() are released, thereby permitting the segment to be 2559 * unmapped. In addition to this, the cleanup thread is woken up 2560 * during DR operations to release the locked pages. 2561 */ 2562 2563 static int 2564 aio_cleanup_thread(aio_t *aiop) 2565 { 2566 proc_t *p = curproc; 2567 struct as *as = p->p_as; 2568 int poked = 0; 2569 kcondvar_t *cvp; 2570 int exit_flag = 0; 2571 int rqclnup = 0; 2572 2573 sigfillset(&curthread->t_hold); 2574 sigdiffset(&curthread->t_hold, &cantmask); 2575 for (;;) { 2576 /* 2577 * if a segment is being unmapped, and the current 2578 * process's done queue is not empty, then every request 2579 * on the doneq with locked resources should be forced 2580 * to release their locks. By moving the doneq request 2581 * to the cleanupq, aio_cleanup() will process the cleanupq, 2582 * and place requests back onto the doneq. All requests 2583 * processed by aio_cleanup() will have their physical 2584 * resources unlocked. 2585 */ 2586 mutex_enter(&aiop->aio_mutex); 2587 if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 2588 aiop->aio_flags |= AIO_CLEANUP; 2589 mutex_enter(&as->a_contents); 2590 if (aiop->aio_rqclnup) { 2591 aiop->aio_rqclnup = 0; 2592 rqclnup = 1; 2593 } 2594 2595 if ((rqclnup || AS_ISUNMAPWAIT(as)) && 2596 aiop->aio_doneq) { 2597 aio_req_t *doneqhead = aiop->aio_doneq; 2598 mutex_exit(&as->a_contents); 2599 aiop->aio_doneq = NULL; 2600 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 2601 } else { 2602 mutex_exit(&as->a_contents); 2603 } 2604 } 2605 mutex_exit(&aiop->aio_mutex); 2606 aio_cleanup(AIO_CLEANUP_THREAD); 2607 /* 2608 * thread should block on the cleanupcv while 2609 * AIO_CLEANUP is set. 2610 */ 2611 cvp = &aiop->aio_cleanupcv; 2612 mutex_enter(&aiop->aio_mutex); 2613 2614 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 2615 aiop->aio_notifyq != NULL || 2616 aiop->aio_portcleanupq != NULL) { 2617 mutex_exit(&aiop->aio_mutex); 2618 continue; 2619 } 2620 mutex_enter(&as->a_contents); 2621 2622 /* 2623 * AIO_CLEANUP determines when the cleanup thread 2624 * should be active. This flag is set when 2625 * the cleanup thread is awakened by as_unmap() or 2626 * due to DR operations. 2627 * The flag is cleared when the blocking as_unmap() 2628 * that originally awakened us is allowed to 2629 * complete. as_unmap() blocks when trying to 2630 * unmap a segment that has SOFTLOCKed pages. when 2631 * the segment's pages are all SOFTUNLOCKed, 2632 * as->a_flags & AS_UNMAPWAIT should be zero. 2633 * 2634 * In case of cleanup request by DR, the flag is cleared 2635 * once all the pending aio requests have been processed. 2636 * 2637 * The flag shouldn't be cleared right away if the 2638 * cleanup thread was interrupted because the process 2639 * is doing forkall(). This happens when cv_wait_sig() 2640 * returns zero, because it was awakened by a pokelwps(). 2641 * If the process is not exiting, it must be doing forkall(). 2642 */ 2643 if ((poked == 0) && 2644 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 2645 (aiop->aio_pending == 0))) { 2646 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 2647 cvp = &as->a_cv; 2648 rqclnup = 0; 2649 } 2650 mutex_exit(&aiop->aio_mutex); 2651 if (poked) { 2652 /* 2653 * If the process is exiting/killed, don't return 2654 * immediately without waiting for pending I/O's 2655 * and releasing the page locks. 2656 */ 2657 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2658 /* 2659 * If exit_flag is set, then it is 2660 * safe to exit because we have released 2661 * page locks of completed I/O's. 2662 */ 2663 if (exit_flag) 2664 break; 2665 2666 mutex_exit(&as->a_contents); 2667 2668 /* 2669 * Wait for all the pending aio to complete. 2670 */ 2671 mutex_enter(&aiop->aio_mutex); 2672 aiop->aio_flags |= AIO_REQ_BLOCK; 2673 while (aiop->aio_pending != 0) 2674 cv_wait(&aiop->aio_cleanupcv, 2675 &aiop->aio_mutex); 2676 mutex_exit(&aiop->aio_mutex); 2677 exit_flag = 1; 2678 continue; 2679 } else if (p->p_flag & 2680 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 2681 /* 2682 * hold LWP until it 2683 * is continued. 2684 */ 2685 mutex_exit(&as->a_contents); 2686 mutex_enter(&p->p_lock); 2687 stop(PR_SUSPENDED, SUSPEND_NORMAL); 2688 mutex_exit(&p->p_lock); 2689 poked = 0; 2690 continue; 2691 } 2692 } else { 2693 /* 2694 * When started this thread will sleep on as->a_cv. 2695 * as_unmap will awake this thread if the 2696 * segment has SOFTLOCKed pages (poked = 0). 2697 * 1. pokelwps() awakes this thread => 2698 * break the loop to check SEXITLWPS, SHOLDFORK, etc 2699 * 2. as_unmap awakes this thread => 2700 * to break the loop it is necessary that 2701 * - AS_UNMAPWAIT is set (as_unmap is waiting for 2702 * memory to be unlocked) 2703 * - AIO_CLEANUP is not set 2704 * (if AIO_CLEANUP is set we have to wait for 2705 * pending requests. aio_done will send a signal 2706 * for every request which completes to continue 2707 * unmapping the corresponding address range) 2708 * 3. A cleanup request will wake this thread up, ex. 2709 * by the DR operations. The aio_rqclnup flag will 2710 * be set. 2711 */ 2712 while (poked == 0) { 2713 /* 2714 * we need to handle cleanup requests 2715 * that come in after we had just cleaned up, 2716 * so that we do cleanup of any new aio 2717 * requests that got completed and have 2718 * locked resources. 2719 */ 2720 if ((aiop->aio_rqclnup || 2721 (AS_ISUNMAPWAIT(as) != 0)) && 2722 (aiop->aio_flags & AIO_CLEANUP) == 0) 2723 break; 2724 poked = !cv_wait_sig(cvp, &as->a_contents); 2725 if (AS_ISUNMAPWAIT(as) == 0) 2726 cv_signal(cvp); 2727 if (aiop->aio_outstanding != 0) 2728 break; 2729 } 2730 } 2731 mutex_exit(&as->a_contents); 2732 } 2733 exit: 2734 mutex_exit(&as->a_contents); 2735 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 2736 aston(curthread); /* make thread do post_syscall */ 2737 return (0); 2738 } 2739 2740 /* 2741 * save a reference to a user's outstanding aio in a hash list. 2742 */ 2743 static int 2744 aio_hash_insert( 2745 aio_req_t *aio_reqp, 2746 aio_t *aiop) 2747 { 2748 long index; 2749 aio_result_t *resultp = aio_reqp->aio_req_resultp; 2750 aio_req_t *current; 2751 aio_req_t **nextp; 2752 2753 index = AIO_HASH(resultp); 2754 nextp = &aiop->aio_hash[index]; 2755 while ((current = *nextp) != NULL) { 2756 if (current->aio_req_resultp == resultp) 2757 return (DUPLICATE); 2758 nextp = ¤t->aio_hash_next; 2759 } 2760 *nextp = aio_reqp; 2761 aio_reqp->aio_hash_next = NULL; 2762 return (0); 2763 } 2764 2765 static int 2766 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 2767 cred_t *) 2768 { 2769 struct snode *sp; 2770 dev_t dev; 2771 struct cb_ops *cb; 2772 major_t major; 2773 int (*aio_func)(); 2774 2775 dev = vp->v_rdev; 2776 major = getmajor(dev); 2777 2778 /* 2779 * return NULL for requests to files and STREAMs so 2780 * that libaio takes care of them. 2781 */ 2782 if (vp->v_type == VCHR) { 2783 /* no stream device for kaio */ 2784 if (STREAMSTAB(major)) { 2785 return (NULL); 2786 } 2787 } else { 2788 return (NULL); 2789 } 2790 2791 /* 2792 * Check old drivers which do not have async I/O entry points. 2793 */ 2794 if (devopsp[major]->devo_rev < 3) 2795 return (NULL); 2796 2797 cb = devopsp[major]->devo_cb_ops; 2798 2799 if (cb->cb_rev < 1) 2800 return (NULL); 2801 2802 /* 2803 * Check whether this device is a block device. 2804 * Kaio is not supported for devices like tty. 2805 */ 2806 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 2807 return (NULL); 2808 2809 /* 2810 * Clustering: If vnode is a PXFS vnode, then the device may be remote. 2811 * We cannot call the driver directly. Instead return the 2812 * PXFS functions. 2813 */ 2814 2815 if (IS_PXFSVP(vp)) { 2816 if (mode & FREAD) 2817 return (clpxfs_aio_read); 2818 else 2819 return (clpxfs_aio_write); 2820 } 2821 if (mode & FREAD) 2822 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 2823 else 2824 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 2825 2826 /* 2827 * Do we need this ? 2828 * nodev returns ENXIO anyway. 2829 */ 2830 if (aio_func == nodev) 2831 return (NULL); 2832 2833 sp = VTOS(vp); 2834 smark(sp, SACC); 2835 return (aio_func); 2836 } 2837 2838 /* 2839 * Clustering: We want check_vp to return a function prototyped 2840 * correctly that will be common to both PXFS and regular case. 2841 * We define this intermediate function that will do the right 2842 * thing for driver cases. 2843 */ 2844 2845 static int 2846 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2847 { 2848 dev_t dev; 2849 struct cb_ops *cb; 2850 2851 ASSERT(vp->v_type == VCHR); 2852 ASSERT(!IS_PXFSVP(vp)); 2853 dev = VTOS(vp)->s_dev; 2854 ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 2855 2856 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2857 2858 ASSERT(cb->cb_awrite != nodev); 2859 return ((*cb->cb_awrite)(dev, aio, cred_p)); 2860 } 2861 2862 /* 2863 * Clustering: We want check_vp to return a function prototyped 2864 * correctly that will be common to both PXFS and regular case. 2865 * We define this intermediate function that will do the right 2866 * thing for driver cases. 2867 */ 2868 2869 static int 2870 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2871 { 2872 dev_t dev; 2873 struct cb_ops *cb; 2874 2875 ASSERT(vp->v_type == VCHR); 2876 ASSERT(!IS_PXFSVP(vp)); 2877 dev = VTOS(vp)->s_dev; 2878 ASSERT(!STREAMSTAB(getmajor(dev))); 2879 2880 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2881 2882 ASSERT(cb->cb_aread != nodev); 2883 return ((*cb->cb_aread)(dev, aio, cred_p)); 2884 } 2885 2886 /* 2887 * This routine is called when a largefile call is made by a 32bit 2888 * process on a ILP32 or LP64 kernel. All 64bit processes are large 2889 * file by definition and will call alio() instead. 2890 */ 2891 static int 2892 alioLF( 2893 int mode_arg, 2894 void *aiocb_arg, 2895 int nent, 2896 void *sigev) 2897 { 2898 file_t *fp; 2899 file_t *prev_fp = NULL; 2900 int prev_mode = -1; 2901 struct vnode *vp; 2902 aio_lio_t *head; 2903 aio_req_t *reqp; 2904 aio_t *aiop; 2905 caddr_t cbplist; 2906 aiocb64_32_t cb64; 2907 aiocb64_32_t *aiocb = &cb64; 2908 aiocb64_32_t *cbp; 2909 caddr32_t *ucbp; 2910 #ifdef _LP64 2911 aiocb_t aiocb_n; 2912 #endif 2913 struct sigevent32 sigevk; 2914 sigqueue_t *sqp; 2915 int (*aio_func)(); 2916 int mode; 2917 int error = 0; 2918 int aio_errors = 0; 2919 int i; 2920 size_t ssize; 2921 int deadhead = 0; 2922 int aio_notsupported = 0; 2923 int lio_head_port; 2924 int aio_port; 2925 int aio_thread; 2926 port_kevent_t *pkevtp = NULL; 2927 port_notify32_t pnotify; 2928 int event; 2929 2930 aiop = curproc->p_aio; 2931 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 2932 return (EINVAL); 2933 2934 ASSERT(get_udatamodel() == DATAMODEL_ILP32); 2935 2936 ssize = (sizeof (caddr32_t) * nent); 2937 cbplist = kmem_alloc(ssize, KM_SLEEP); 2938 ucbp = (caddr32_t *)cbplist; 2939 2940 if (copyin(aiocb_arg, cbplist, ssize) || 2941 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 2942 kmem_free(cbplist, ssize); 2943 return (EFAULT); 2944 } 2945 2946 /* Event Ports */ 2947 if (sigev && 2948 (sigevk.sigev_notify == SIGEV_THREAD || 2949 sigevk.sigev_notify == SIGEV_PORT)) { 2950 if (sigevk.sigev_notify == SIGEV_THREAD) { 2951 pnotify.portnfy_port = sigevk.sigev_signo; 2952 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 2953 } else if (copyin( 2954 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 2955 &pnotify, sizeof (pnotify))) { 2956 kmem_free(cbplist, ssize); 2957 return (EFAULT); 2958 } 2959 error = port_alloc_event(pnotify.portnfy_port, 2960 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 2961 if (error) { 2962 if (error == ENOMEM || error == EAGAIN) 2963 error = EAGAIN; 2964 else 2965 error = EINVAL; 2966 kmem_free(cbplist, ssize); 2967 return (error); 2968 } 2969 lio_head_port = pnotify.portnfy_port; 2970 } 2971 2972 /* 2973 * a list head should be allocated if notification is 2974 * enabled for this list. 2975 */ 2976 head = NULL; 2977 2978 if (mode_arg == LIO_WAIT || sigev) { 2979 mutex_enter(&aiop->aio_mutex); 2980 error = aio_lio_alloc(&head); 2981 mutex_exit(&aiop->aio_mutex); 2982 if (error) 2983 goto done; 2984 deadhead = 1; 2985 head->lio_nent = nent; 2986 head->lio_refcnt = nent; 2987 head->lio_port = -1; 2988 head->lio_portkev = NULL; 2989 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 2990 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 2991 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2992 if (sqp == NULL) { 2993 error = EAGAIN; 2994 goto done; 2995 } 2996 sqp->sq_func = NULL; 2997 sqp->sq_next = NULL; 2998 sqp->sq_info.si_code = SI_ASYNCIO; 2999 sqp->sq_info.si_pid = curproc->p_pid; 3000 sqp->sq_info.si_ctid = PRCTID(curproc); 3001 sqp->sq_info.si_zoneid = getzoneid(); 3002 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3003 sqp->sq_info.si_signo = sigevk.sigev_signo; 3004 sqp->sq_info.si_value.sival_int = 3005 sigevk.sigev_value.sival_int; 3006 head->lio_sigqp = sqp; 3007 } else { 3008 head->lio_sigqp = NULL; 3009 } 3010 if (pkevtp) { 3011 /* 3012 * Prepare data to send when list of aiocb's 3013 * has completed. 3014 */ 3015 port_init_event(pkevtp, (uintptr_t)sigev, 3016 (void *)(uintptr_t)pnotify.portnfy_user, 3017 NULL, head); 3018 pkevtp->portkev_events = AIOLIO64; 3019 head->lio_portkev = pkevtp; 3020 head->lio_port = pnotify.portnfy_port; 3021 } 3022 } 3023 3024 for (i = 0; i < nent; i++, ucbp++) { 3025 3026 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 3027 /* skip entry if it can't be copied. */ 3028 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 3029 if (head) { 3030 mutex_enter(&aiop->aio_mutex); 3031 head->lio_nent--; 3032 head->lio_refcnt--; 3033 mutex_exit(&aiop->aio_mutex); 3034 } 3035 continue; 3036 } 3037 3038 /* skip if opcode for aiocb is LIO_NOP */ 3039 mode = aiocb->aio_lio_opcode; 3040 if (mode == LIO_NOP) { 3041 cbp = NULL; 3042 if (head) { 3043 mutex_enter(&aiop->aio_mutex); 3044 head->lio_nent--; 3045 head->lio_refcnt--; 3046 mutex_exit(&aiop->aio_mutex); 3047 } 3048 continue; 3049 } 3050 3051 /* increment file descriptor's ref count. */ 3052 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3053 lio_set_uerror(&cbp->aio_resultp, EBADF); 3054 if (head) { 3055 mutex_enter(&aiop->aio_mutex); 3056 head->lio_nent--; 3057 head->lio_refcnt--; 3058 mutex_exit(&aiop->aio_mutex); 3059 } 3060 aio_errors++; 3061 continue; 3062 } 3063 3064 /* 3065 * check the permission of the partition 3066 */ 3067 if ((fp->f_flag & mode) == 0) { 3068 releasef(aiocb->aio_fildes); 3069 lio_set_uerror(&cbp->aio_resultp, EBADF); 3070 if (head) { 3071 mutex_enter(&aiop->aio_mutex); 3072 head->lio_nent--; 3073 head->lio_refcnt--; 3074 mutex_exit(&aiop->aio_mutex); 3075 } 3076 aio_errors++; 3077 continue; 3078 } 3079 3080 /* 3081 * common case where requests are to the same fd 3082 * for the same r/w operation 3083 * for UFS, need to set EBADFD 3084 */ 3085 vp = fp->f_vnode; 3086 if (fp != prev_fp || mode != prev_mode) { 3087 aio_func = check_vp(vp, mode); 3088 if (aio_func == NULL) { 3089 prev_fp = NULL; 3090 releasef(aiocb->aio_fildes); 3091 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3092 aio_notsupported++; 3093 if (head) { 3094 mutex_enter(&aiop->aio_mutex); 3095 head->lio_nent--; 3096 head->lio_refcnt--; 3097 mutex_exit(&aiop->aio_mutex); 3098 } 3099 continue; 3100 } else { 3101 prev_fp = fp; 3102 prev_mode = mode; 3103 } 3104 } 3105 3106 #ifdef _LP64 3107 aiocb_LFton(aiocb, &aiocb_n); 3108 error = aio_req_setup(&reqp, aiop, &aiocb_n, 3109 (aio_result_t *)&cbp->aio_resultp, vp); 3110 #else 3111 error = aio_req_setupLF(&reqp, aiop, aiocb, 3112 (aio_result_t *)&cbp->aio_resultp, vp); 3113 #endif /* _LP64 */ 3114 if (error) { 3115 releasef(aiocb->aio_fildes); 3116 lio_set_uerror(&cbp->aio_resultp, error); 3117 if (head) { 3118 mutex_enter(&aiop->aio_mutex); 3119 head->lio_nent--; 3120 head->lio_refcnt--; 3121 mutex_exit(&aiop->aio_mutex); 3122 } 3123 aio_errors++; 3124 continue; 3125 } 3126 3127 reqp->aio_req_lio = head; 3128 deadhead = 0; 3129 3130 /* 3131 * Set the errno field now before sending the request to 3132 * the driver to avoid a race condition 3133 */ 3134 (void) suword32(&cbp->aio_resultp.aio_errno, 3135 EINPROGRESS); 3136 3137 reqp->aio_req_iocb.iocb32 = *ucbp; 3138 3139 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 3140 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3141 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3142 if (aio_port | aio_thread) { 3143 port_kevent_t *lpkevp; 3144 /* 3145 * Prepare data to send with each aiocb completed. 3146 */ 3147 if (aio_port) { 3148 void *paddr = (void *)(uintptr_t) 3149 aiocb->aio_sigevent.sigev_value.sival_ptr; 3150 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3151 error = EFAULT; 3152 } else { /* aio_thread */ 3153 pnotify.portnfy_port = 3154 aiocb->aio_sigevent.sigev_signo; 3155 pnotify.portnfy_user = 3156 aiocb->aio_sigevent.sigev_value.sival_ptr; 3157 } 3158 if (error) 3159 /* EMPTY */; 3160 else if (pkevtp != NULL && 3161 pnotify.portnfy_port == lio_head_port) 3162 error = port_dup_event(pkevtp, &lpkevp, 3163 PORT_ALLOC_DEFAULT); 3164 else 3165 error = port_alloc_event(pnotify.portnfy_port, 3166 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3167 &lpkevp); 3168 if (error == 0) { 3169 port_init_event(lpkevp, (uintptr_t)*ucbp, 3170 (void *)(uintptr_t)pnotify.portnfy_user, 3171 aio_port_callback, reqp); 3172 lpkevp->portkev_events = event; 3173 reqp->aio_req_portkev = lpkevp; 3174 reqp->aio_req_port = pnotify.portnfy_port; 3175 } 3176 } 3177 3178 /* 3179 * send the request to driver. 3180 */ 3181 if (error == 0) { 3182 if (aiocb->aio_nbytes == 0) { 3183 clear_active_fd(aiocb->aio_fildes); 3184 aio_zerolen(reqp); 3185 continue; 3186 } 3187 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3188 CRED()); 3189 } 3190 3191 /* 3192 * the fd's ref count is not decremented until the IO has 3193 * completed unless there was an error. 3194 */ 3195 if (error) { 3196 releasef(aiocb->aio_fildes); 3197 lio_set_uerror(&cbp->aio_resultp, error); 3198 if (head) { 3199 mutex_enter(&aiop->aio_mutex); 3200 head->lio_nent--; 3201 head->lio_refcnt--; 3202 mutex_exit(&aiop->aio_mutex); 3203 } 3204 if (error == ENOTSUP) 3205 aio_notsupported++; 3206 else 3207 aio_errors++; 3208 lio_set_error(reqp); 3209 } else { 3210 clear_active_fd(aiocb->aio_fildes); 3211 } 3212 } 3213 3214 if (aio_notsupported) { 3215 error = ENOTSUP; 3216 } else if (aio_errors) { 3217 /* 3218 * return EIO if any request failed 3219 */ 3220 error = EIO; 3221 } 3222 3223 if (mode_arg == LIO_WAIT) { 3224 mutex_enter(&aiop->aio_mutex); 3225 while (head->lio_refcnt > 0) { 3226 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3227 mutex_exit(&aiop->aio_mutex); 3228 error = EINTR; 3229 goto done; 3230 } 3231 } 3232 mutex_exit(&aiop->aio_mutex); 3233 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 3234 } 3235 3236 done: 3237 kmem_free(cbplist, ssize); 3238 if (deadhead) { 3239 if (head->lio_sigqp) 3240 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3241 if (head->lio_portkev) 3242 port_free_event(head->lio_portkev); 3243 kmem_free(head, sizeof (aio_lio_t)); 3244 } 3245 return (error); 3246 } 3247 3248 #ifdef _SYSCALL32_IMPL 3249 static void 3250 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 3251 { 3252 dest->aio_fildes = src->aio_fildes; 3253 dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 3254 dest->aio_nbytes = (size_t)src->aio_nbytes; 3255 dest->aio_offset = (off_t)src->aio_offset; 3256 dest->aio_reqprio = src->aio_reqprio; 3257 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3258 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3259 3260 /* 3261 * See comment in sigqueue32() on handling of 32-bit 3262 * sigvals in a 64-bit kernel. 3263 */ 3264 dest->aio_sigevent.sigev_value.sival_int = 3265 (int)src->aio_sigevent.sigev_value.sival_int; 3266 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3267 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3268 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3269 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3270 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3271 dest->aio_lio_opcode = src->aio_lio_opcode; 3272 dest->aio_state = src->aio_state; 3273 dest->aio__pad[0] = src->aio__pad[0]; 3274 } 3275 #endif 3276 3277 /* 3278 * This function is used only for largefile calls made by 3279 * 32 bit applications. 3280 */ 3281 static int 3282 aio_req_setupLF( 3283 aio_req_t **reqpp, 3284 aio_t *aiop, 3285 aiocb64_32_t *arg, 3286 aio_result_t *resultp, 3287 vnode_t *vp) 3288 { 3289 sigqueue_t *sqp = NULL; 3290 aio_req_t *reqp; 3291 struct uio *uio; 3292 struct sigevent32 *sigev; 3293 int error; 3294 3295 sigev = &arg->aio_sigevent; 3296 if (sigev->sigev_notify == SIGEV_SIGNAL && 3297 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 3298 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3299 if (sqp == NULL) 3300 return (EAGAIN); 3301 sqp->sq_func = NULL; 3302 sqp->sq_next = NULL; 3303 sqp->sq_info.si_code = SI_ASYNCIO; 3304 sqp->sq_info.si_pid = curproc->p_pid; 3305 sqp->sq_info.si_ctid = PRCTID(curproc); 3306 sqp->sq_info.si_zoneid = getzoneid(); 3307 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3308 sqp->sq_info.si_signo = sigev->sigev_signo; 3309 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 3310 } 3311 3312 mutex_enter(&aiop->aio_mutex); 3313 3314 if (aiop->aio_flags & AIO_REQ_BLOCK) { 3315 mutex_exit(&aiop->aio_mutex); 3316 if (sqp) 3317 kmem_free(sqp, sizeof (sigqueue_t)); 3318 return (EIO); 3319 } 3320 /* 3321 * get an aio_reqp from the free list or allocate one 3322 * from dynamic memory. 3323 */ 3324 if (error = aio_req_alloc(&reqp, resultp)) { 3325 mutex_exit(&aiop->aio_mutex); 3326 if (sqp) 3327 kmem_free(sqp, sizeof (sigqueue_t)); 3328 return (error); 3329 } 3330 aiop->aio_pending++; 3331 aiop->aio_outstanding++; 3332 reqp->aio_req_flags = AIO_PENDING; 3333 if (sigev->sigev_notify == SIGEV_THREAD || 3334 sigev->sigev_notify == SIGEV_PORT) 3335 aio_enq(&aiop->aio_portpending, reqp, 0); 3336 mutex_exit(&aiop->aio_mutex); 3337 /* 3338 * initialize aio request. 3339 */ 3340 reqp->aio_req_fd = arg->aio_fildes; 3341 reqp->aio_req_sigqp = sqp; 3342 reqp->aio_req_iocb.iocb = NULL; 3343 reqp->aio_req_lio = NULL; 3344 reqp->aio_req_buf.b_file = vp; 3345 uio = reqp->aio_req.aio_uio; 3346 uio->uio_iovcnt = 1; 3347 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 3348 uio->uio_iov->iov_len = arg->aio_nbytes; 3349 uio->uio_loffset = arg->aio_offset; 3350 *reqpp = reqp; 3351 return (0); 3352 } 3353 3354 /* 3355 * This routine is called when a non largefile call is made by a 32bit 3356 * process on a ILP32 or LP64 kernel. 3357 */ 3358 static int 3359 alio32( 3360 int mode_arg, 3361 void *aiocb_arg, 3362 int nent, 3363 void *sigev) 3364 { 3365 file_t *fp; 3366 file_t *prev_fp = NULL; 3367 int prev_mode = -1; 3368 struct vnode *vp; 3369 aio_lio_t *head; 3370 aio_req_t *reqp; 3371 aio_t *aiop; 3372 caddr_t cbplist; 3373 aiocb_t cb; 3374 aiocb_t *aiocb = &cb; 3375 #ifdef _LP64 3376 aiocb32_t *cbp; 3377 caddr32_t *ucbp; 3378 aiocb32_t cb32; 3379 aiocb32_t *aiocb32 = &cb32; 3380 struct sigevent32 sigevk; 3381 #else 3382 aiocb_t *cbp, **ucbp; 3383 struct sigevent sigevk; 3384 #endif 3385 sigqueue_t *sqp; 3386 int (*aio_func)(); 3387 int mode; 3388 int error = 0; 3389 int aio_errors = 0; 3390 int i; 3391 size_t ssize; 3392 int deadhead = 0; 3393 int aio_notsupported = 0; 3394 int lio_head_port; 3395 int aio_port; 3396 int aio_thread; 3397 port_kevent_t *pkevtp = NULL; 3398 #ifdef _LP64 3399 port_notify32_t pnotify; 3400 #else 3401 port_notify_t pnotify; 3402 #endif 3403 int event; 3404 3405 aiop = curproc->p_aio; 3406 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 3407 return (EINVAL); 3408 3409 #ifdef _LP64 3410 ssize = (sizeof (caddr32_t) * nent); 3411 #else 3412 ssize = (sizeof (aiocb_t *) * nent); 3413 #endif 3414 cbplist = kmem_alloc(ssize, KM_SLEEP); 3415 ucbp = (void *)cbplist; 3416 3417 if (copyin(aiocb_arg, cbplist, ssize) || 3418 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 3419 kmem_free(cbplist, ssize); 3420 return (EFAULT); 3421 } 3422 3423 /* Event Ports */ 3424 if (sigev && 3425 (sigevk.sigev_notify == SIGEV_THREAD || 3426 sigevk.sigev_notify == SIGEV_PORT)) { 3427 if (sigevk.sigev_notify == SIGEV_THREAD) { 3428 pnotify.portnfy_port = sigevk.sigev_signo; 3429 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 3430 } else if (copyin( 3431 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 3432 &pnotify, sizeof (pnotify))) { 3433 kmem_free(cbplist, ssize); 3434 return (EFAULT); 3435 } 3436 error = port_alloc_event(pnotify.portnfy_port, 3437 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 3438 if (error) { 3439 if (error == ENOMEM || error == EAGAIN) 3440 error = EAGAIN; 3441 else 3442 error = EINVAL; 3443 kmem_free(cbplist, ssize); 3444 return (error); 3445 } 3446 lio_head_port = pnotify.portnfy_port; 3447 } 3448 3449 /* 3450 * a list head should be allocated if notification is 3451 * enabled for this list. 3452 */ 3453 head = NULL; 3454 3455 if (mode_arg == LIO_WAIT || sigev) { 3456 mutex_enter(&aiop->aio_mutex); 3457 error = aio_lio_alloc(&head); 3458 mutex_exit(&aiop->aio_mutex); 3459 if (error) 3460 goto done; 3461 deadhead = 1; 3462 head->lio_nent = nent; 3463 head->lio_refcnt = nent; 3464 head->lio_port = -1; 3465 head->lio_portkev = NULL; 3466 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 3467 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3468 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3469 if (sqp == NULL) { 3470 error = EAGAIN; 3471 goto done; 3472 } 3473 sqp->sq_func = NULL; 3474 sqp->sq_next = NULL; 3475 sqp->sq_info.si_code = SI_ASYNCIO; 3476 sqp->sq_info.si_pid = curproc->p_pid; 3477 sqp->sq_info.si_ctid = PRCTID(curproc); 3478 sqp->sq_info.si_zoneid = getzoneid(); 3479 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3480 sqp->sq_info.si_signo = sigevk.sigev_signo; 3481 sqp->sq_info.si_value.sival_int = 3482 sigevk.sigev_value.sival_int; 3483 head->lio_sigqp = sqp; 3484 } else { 3485 head->lio_sigqp = NULL; 3486 } 3487 if (pkevtp) { 3488 /* 3489 * Prepare data to send when list of aiocb's has 3490 * completed. 3491 */ 3492 port_init_event(pkevtp, (uintptr_t)sigev, 3493 (void *)(uintptr_t)pnotify.portnfy_user, 3494 NULL, head); 3495 pkevtp->portkev_events = AIOLIO; 3496 head->lio_portkev = pkevtp; 3497 head->lio_port = pnotify.portnfy_port; 3498 } 3499 } 3500 3501 for (i = 0; i < nent; i++, ucbp++) { 3502 3503 /* skip entry if it can't be copied. */ 3504 #ifdef _LP64 3505 cbp = (aiocb32_t *)(uintptr_t)*ucbp; 3506 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 3507 #else 3508 cbp = (aiocb_t *)*ucbp; 3509 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 3510 #endif 3511 { 3512 if (head) { 3513 mutex_enter(&aiop->aio_mutex); 3514 head->lio_nent--; 3515 head->lio_refcnt--; 3516 mutex_exit(&aiop->aio_mutex); 3517 } 3518 continue; 3519 } 3520 #ifdef _LP64 3521 /* 3522 * copy 32 bit structure into 64 bit structure 3523 */ 3524 aiocb_32ton(aiocb32, aiocb); 3525 #endif /* _LP64 */ 3526 3527 /* skip if opcode for aiocb is LIO_NOP */ 3528 mode = aiocb->aio_lio_opcode; 3529 if (mode == LIO_NOP) { 3530 cbp = NULL; 3531 if (head) { 3532 mutex_enter(&aiop->aio_mutex); 3533 head->lio_nent--; 3534 head->lio_refcnt--; 3535 mutex_exit(&aiop->aio_mutex); 3536 } 3537 continue; 3538 } 3539 3540 /* increment file descriptor's ref count. */ 3541 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3542 lio_set_uerror(&cbp->aio_resultp, EBADF); 3543 if (head) { 3544 mutex_enter(&aiop->aio_mutex); 3545 head->lio_nent--; 3546 head->lio_refcnt--; 3547 mutex_exit(&aiop->aio_mutex); 3548 } 3549 aio_errors++; 3550 continue; 3551 } 3552 3553 /* 3554 * check the permission of the partition 3555 */ 3556 if ((fp->f_flag & mode) == 0) { 3557 releasef(aiocb->aio_fildes); 3558 lio_set_uerror(&cbp->aio_resultp, EBADF); 3559 if (head) { 3560 mutex_enter(&aiop->aio_mutex); 3561 head->lio_nent--; 3562 head->lio_refcnt--; 3563 mutex_exit(&aiop->aio_mutex); 3564 } 3565 aio_errors++; 3566 continue; 3567 } 3568 3569 /* 3570 * common case where requests are to the same fd 3571 * for the same r/w operation 3572 * for UFS, need to set EBADFD 3573 */ 3574 vp = fp->f_vnode; 3575 if (fp != prev_fp || mode != prev_mode) { 3576 aio_func = check_vp(vp, mode); 3577 if (aio_func == NULL) { 3578 prev_fp = NULL; 3579 releasef(aiocb->aio_fildes); 3580 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3581 aio_notsupported++; 3582 if (head) { 3583 mutex_enter(&aiop->aio_mutex); 3584 head->lio_nent--; 3585 head->lio_refcnt--; 3586 mutex_exit(&aiop->aio_mutex); 3587 } 3588 continue; 3589 } else { 3590 prev_fp = fp; 3591 prev_mode = mode; 3592 } 3593 } 3594 3595 error = aio_req_setup(&reqp, aiop, aiocb, 3596 (aio_result_t *)&cbp->aio_resultp, vp); 3597 if (error) { 3598 releasef(aiocb->aio_fildes); 3599 lio_set_uerror(&cbp->aio_resultp, error); 3600 if (head) { 3601 mutex_enter(&aiop->aio_mutex); 3602 head->lio_nent--; 3603 head->lio_refcnt--; 3604 mutex_exit(&aiop->aio_mutex); 3605 } 3606 aio_errors++; 3607 continue; 3608 } 3609 3610 reqp->aio_req_lio = head; 3611 deadhead = 0; 3612 3613 /* 3614 * Set the errno field now before sending the request to 3615 * the driver to avoid a race condition 3616 */ 3617 (void) suword32(&cbp->aio_resultp.aio_errno, 3618 EINPROGRESS); 3619 3620 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 3621 3622 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 3623 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3624 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3625 if (aio_port | aio_thread) { 3626 port_kevent_t *lpkevp; 3627 /* 3628 * Prepare data to send with each aiocb completed. 3629 */ 3630 #ifdef _LP64 3631 if (aio_port) { 3632 void *paddr = (void *)(uintptr_t) 3633 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3634 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3635 error = EFAULT; 3636 } else { /* aio_thread */ 3637 pnotify.portnfy_port = 3638 aiocb32->aio_sigevent.sigev_signo; 3639 pnotify.portnfy_user = 3640 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3641 } 3642 #else 3643 if (aio_port) { 3644 void *paddr = 3645 aiocb->aio_sigevent.sigev_value.sival_ptr; 3646 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3647 error = EFAULT; 3648 } else { /* aio_thread */ 3649 pnotify.portnfy_port = 3650 aiocb->aio_sigevent.sigev_signo; 3651 pnotify.portnfy_user = 3652 aiocb->aio_sigevent.sigev_value.sival_ptr; 3653 } 3654 #endif 3655 if (error) 3656 /* EMPTY */; 3657 else if (pkevtp != NULL && 3658 pnotify.portnfy_port == lio_head_port) 3659 error = port_dup_event(pkevtp, &lpkevp, 3660 PORT_ALLOC_DEFAULT); 3661 else 3662 error = port_alloc_event(pnotify.portnfy_port, 3663 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3664 &lpkevp); 3665 if (error == 0) { 3666 port_init_event(lpkevp, (uintptr_t)cbp, 3667 (void *)(uintptr_t)pnotify.portnfy_user, 3668 aio_port_callback, reqp); 3669 lpkevp->portkev_events = event; 3670 reqp->aio_req_portkev = lpkevp; 3671 reqp->aio_req_port = pnotify.portnfy_port; 3672 } 3673 } 3674 3675 /* 3676 * send the request to driver. 3677 */ 3678 if (error == 0) { 3679 if (aiocb->aio_nbytes == 0) { 3680 clear_active_fd(aiocb->aio_fildes); 3681 aio_zerolen(reqp); 3682 continue; 3683 } 3684 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3685 CRED()); 3686 } 3687 3688 /* 3689 * the fd's ref count is not decremented until the IO has 3690 * completed unless there was an error. 3691 */ 3692 if (error) { 3693 releasef(aiocb->aio_fildes); 3694 lio_set_uerror(&cbp->aio_resultp, error); 3695 if (head) { 3696 mutex_enter(&aiop->aio_mutex); 3697 head->lio_nent--; 3698 head->lio_refcnt--; 3699 mutex_exit(&aiop->aio_mutex); 3700 } 3701 if (error == ENOTSUP) 3702 aio_notsupported++; 3703 else 3704 aio_errors++; 3705 lio_set_error(reqp); 3706 } else { 3707 clear_active_fd(aiocb->aio_fildes); 3708 } 3709 } 3710 3711 if (aio_notsupported) { 3712 error = ENOTSUP; 3713 } else if (aio_errors) { 3714 /* 3715 * return EIO if any request failed 3716 */ 3717 error = EIO; 3718 } 3719 3720 if (mode_arg == LIO_WAIT) { 3721 mutex_enter(&aiop->aio_mutex); 3722 while (head->lio_refcnt > 0) { 3723 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3724 mutex_exit(&aiop->aio_mutex); 3725 error = EINTR; 3726 goto done; 3727 } 3728 } 3729 mutex_exit(&aiop->aio_mutex); 3730 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 3731 } 3732 3733 done: 3734 kmem_free(cbplist, ssize); 3735 if (deadhead) { 3736 if (head->lio_sigqp) 3737 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3738 if (head->lio_portkev) 3739 port_free_event(head->lio_portkev); 3740 kmem_free(head, sizeof (aio_lio_t)); 3741 } 3742 return (error); 3743 } 3744 3745 3746 #ifdef _SYSCALL32_IMPL 3747 void 3748 aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 3749 { 3750 dest->aio_fildes = src->aio_fildes; 3751 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 3752 dest->aio_nbytes = (size_t)src->aio_nbytes; 3753 dest->aio_offset = (off_t)src->aio_offset; 3754 dest->aio_reqprio = src->aio_reqprio; 3755 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3756 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3757 3758 /* 3759 * See comment in sigqueue32() on handling of 32-bit 3760 * sigvals in a 64-bit kernel. 3761 */ 3762 dest->aio_sigevent.sigev_value.sival_int = 3763 (int)src->aio_sigevent.sigev_value.sival_int; 3764 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3765 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3766 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3767 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3768 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3769 dest->aio_lio_opcode = src->aio_lio_opcode; 3770 dest->aio_state = src->aio_state; 3771 dest->aio__pad[0] = src->aio__pad[0]; 3772 } 3773 #endif /* _SYSCALL32_IMPL */ 3774 3775 /* 3776 * aio_port_callback() is called just before the event is retrieved from the 3777 * port. The task of this callback function is to finish the work of the 3778 * transaction for the application, it means : 3779 * - copyout transaction data to the application 3780 * (this thread is running in the right process context) 3781 * - keep trace of the transaction (update of counters). 3782 * - free allocated buffers 3783 * The aiocb pointer is the object element of the port_kevent_t structure. 3784 * 3785 * flag : 3786 * PORT_CALLBACK_DEFAULT : do copyout and free resources 3787 * PORT_CALLBACK_CLOSE : don't do copyout, free resources 3788 */ 3789 3790 /*ARGSUSED*/ 3791 int 3792 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 3793 { 3794 aio_t *aiop = curproc->p_aio; 3795 aio_req_t *reqp = arg; 3796 struct iovec *iov; 3797 struct buf *bp; 3798 void *resultp; 3799 3800 if (pid != curproc->p_pid) { 3801 /* wrong proc !!, can not deliver data here ... */ 3802 return (EACCES); 3803 } 3804 3805 mutex_enter(&aiop->aio_portq_mutex); 3806 reqp->aio_req_portkev = NULL; 3807 aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 3808 mutex_exit(&aiop->aio_portq_mutex); 3809 aphysio_unlock(reqp); /* unlock used pages */ 3810 mutex_enter(&aiop->aio_mutex); 3811 if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 3812 aio_req_free_port(aiop, reqp); /* back to free list */ 3813 mutex_exit(&aiop->aio_mutex); 3814 return (0); 3815 } 3816 3817 iov = reqp->aio_req_uio.uio_iov; 3818 bp = &reqp->aio_req_buf; 3819 resultp = (void *)reqp->aio_req_resultp; 3820 aio_req_free_port(aiop, reqp); /* request struct back to free list */ 3821 mutex_exit(&aiop->aio_mutex); 3822 if (flag == PORT_CALLBACK_DEFAULT) 3823 aio_copyout_result_port(iov, bp, resultp); 3824 return (0); 3825 } 3826