1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Kernel asynchronous I/O. 31 * This is only for raw devices now (as of Nov. 1993). 32 */ 33 34 #include <sys/types.h> 35 #include <sys/errno.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/fs/snode.h> 39 #include <sys/unistd.h> 40 #include <sys/cmn_err.h> 41 #include <vm/as.h> 42 #include <vm/faultcode.h> 43 #include <sys/sysmacros.h> 44 #include <sys/procfs.h> 45 #include <sys/kmem.h> 46 #include <sys/autoconf.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/sunddi.h> 49 #include <sys/aio_impl.h> 50 #include <sys/debug.h> 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/vmsystm.h> 54 #include <sys/fs/pxfs_ki.h> 55 #include <sys/contract/process_impl.h> 56 57 /* 58 * external entry point. 59 */ 60 #ifdef _LP64 61 static int64_t kaioc(long, long, long, long, long, long); 62 #endif 63 static int kaio(ulong_t *, rval_t *); 64 65 66 #define AIO_64 0 67 #define AIO_32 1 68 #define AIO_LARGEFILE 2 69 70 /* 71 * implementation specific functions (private) 72 */ 73 #ifdef _LP64 74 static int alio(int, int, aiocb_t **, int, struct sigevent *); 75 #endif 76 static int aionotify(void); 77 static int aioinit(void); 78 static int aiostart(void); 79 static void alio_cleanup(aio_t *, aiocb_t **, int, int); 80 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 81 cred_t *); 82 static void lio_set_error(aio_req_t *); 83 static aio_t *aio_aiop_alloc(); 84 static int aio_req_alloc(aio_req_t **, aio_result_t *); 85 static int aio_lio_alloc(aio_lio_t **); 86 static aio_req_t *aio_req_done(void *); 87 static aio_req_t *aio_req_remove(aio_req_t *); 88 static int aio_req_find(aio_result_t *, aio_req_t **); 89 static int aio_hash_insert(struct aio_req_t *, aio_t *); 90 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 91 aio_result_t *, int, vnode_t *); 92 static int aio_cleanup_thread(aio_t *); 93 static aio_lio_t *aio_list_get(aio_result_t *); 94 static void lio_set_uerror(void *, int); 95 extern void aio_zerolen(aio_req_t *); 96 static int aiowait(struct timeval *, int, long *); 97 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 98 static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 99 aio_req_t *reqlist, aio_t *aiop, model_t model); 100 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 101 static int aiosuspend(void *, int, struct timespec *, int, 102 long *, int); 103 static int aliowait(int, void *, int, void *, int); 104 static int aioerror(void *, int); 105 static int aio_cancel(int, void *, long *, int); 106 static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 107 static int aiorw(int, void *, int, int); 108 109 static int alioLF(int, void *, int, void *); 110 static int aio_req_setupLF(aio_req_t **, aio_t *, 111 aiocb64_32_t *, aio_result_t *, int, vnode_t *); 112 static int alio32(int, void *, int, void *); 113 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 114 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 115 116 #ifdef _SYSCALL32_IMPL 117 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 118 void aiocb_32ton(aiocb32_t *, aiocb_t *); 119 #endif /* _SYSCALL32_IMPL */ 120 121 /* 122 * implementation specific functions (external) 123 */ 124 void aio_req_free(aio_t *, aio_req_t *); 125 126 /* 127 * Event Port framework 128 */ 129 130 void aio_req_free_port(aio_t *, aio_req_t *); 131 static int aio_port_callback(void *, int *, pid_t, int, void *); 132 133 /* 134 * This is the loadable module wrapper. 135 */ 136 #include <sys/modctl.h> 137 #include <sys/syscall.h> 138 139 #ifdef _LP64 140 141 static struct sysent kaio_sysent = { 142 6, 143 SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 144 (int (*)())kaioc 145 }; 146 147 #ifdef _SYSCALL32_IMPL 148 static struct sysent kaio_sysent32 = { 149 7, 150 SE_NOUNLOAD | SE_64RVAL, 151 kaio 152 }; 153 #endif /* _SYSCALL32_IMPL */ 154 155 #else /* _LP64 */ 156 157 static struct sysent kaio_sysent = { 158 7, 159 SE_NOUNLOAD | SE_32RVAL1, 160 kaio 161 }; 162 163 #endif /* _LP64 */ 164 165 /* 166 * Module linkage information for the kernel. 167 */ 168 169 static struct modlsys modlsys = { 170 &mod_syscallops, 171 "kernel Async I/O", 172 &kaio_sysent 173 }; 174 175 #ifdef _SYSCALL32_IMPL 176 static struct modlsys modlsys32 = { 177 &mod_syscallops32, 178 "kernel Async I/O for 32 bit compatibility", 179 &kaio_sysent32 180 }; 181 #endif /* _SYSCALL32_IMPL */ 182 183 184 static struct modlinkage modlinkage = { 185 MODREV_1, 186 &modlsys, 187 #ifdef _SYSCALL32_IMPL 188 &modlsys32, 189 #endif 190 NULL 191 }; 192 193 int 194 _init(void) 195 { 196 int retval; 197 198 if ((retval = mod_install(&modlinkage)) != 0) 199 return (retval); 200 201 return (0); 202 } 203 204 int 205 _fini(void) 206 { 207 int retval; 208 209 retval = mod_remove(&modlinkage); 210 211 return (retval); 212 } 213 214 int 215 _info(struct modinfo *modinfop) 216 { 217 return (mod_info(&modlinkage, modinfop)); 218 } 219 220 #ifdef _LP64 221 static int64_t 222 kaioc( 223 long a0, 224 long a1, 225 long a2, 226 long a3, 227 long a4, 228 long a5) 229 { 230 int error; 231 long rval = 0; 232 233 switch ((int)a0 & ~AIO_POLL_BIT) { 234 case AIOREAD: 235 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 236 (offset_t)a4, (aio_result_t *)a5, FREAD); 237 break; 238 case AIOWRITE: 239 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 240 (offset_t)a4, (aio_result_t *)a5, FWRITE); 241 break; 242 case AIOWAIT: 243 error = aiowait((struct timeval *)a1, (int)a2, &rval); 244 break; 245 case AIOWAITN: 246 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 247 (timespec_t *)a4); 248 break; 249 case AIONOTIFY: 250 error = aionotify(); 251 break; 252 case AIOINIT: 253 error = aioinit(); 254 break; 255 case AIOSTART: 256 error = aiostart(); 257 break; 258 case AIOLIO: 259 error = alio((int)a0, (int)a1, (aiocb_t **)a2, (int)a3, 260 (struct sigevent *)a4); 261 break; 262 case AIOLIOWAIT: 263 error = aliowait((int)a1, (void *)a2, (int)a3, 264 (struct sigevent *)a4, AIO_64); 265 break; 266 case AIOSUSPEND: 267 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 268 (int)a4, &rval, AIO_64); 269 break; 270 case AIOERROR: 271 error = aioerror((void *)a1, AIO_64); 272 break; 273 case AIOAREAD: 274 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 275 break; 276 case AIOAWRITE: 277 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 278 break; 279 case AIOCANCEL: 280 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 281 break; 282 283 /* 284 * The large file related stuff is valid only for 285 * 32 bit kernel and not for 64 bit kernel 286 * On 64 bit kernel we convert large file calls 287 * to regular 64bit calls. 288 */ 289 290 default: 291 error = EINVAL; 292 } 293 if (error) 294 return ((int64_t)set_errno(error)); 295 return (rval); 296 } 297 #endif 298 299 static int 300 kaio( 301 ulong_t *uap, 302 rval_t *rvp) 303 { 304 long rval = 0; 305 int error = 0; 306 offset_t off; 307 308 309 rvp->r_vals = 0; 310 #if defined(_LITTLE_ENDIAN) 311 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 312 #else 313 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 314 #endif 315 316 switch (uap[0] & ~AIO_POLL_BIT) { 317 /* 318 * It must be the 32 bit system call on 64 bit kernel 319 */ 320 case AIOREAD: 321 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 322 (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 323 case AIOWRITE: 324 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 325 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 326 case AIOWAIT: 327 error = aiowait((struct timeval *)uap[1], (int)uap[2], 328 &rval); 329 break; 330 case AIOWAITN: 331 error = aiowaitn((void *)uap[1], (uint_t)uap[2], 332 (uint_t *)uap[3], (timespec_t *)uap[4]); 333 break; 334 case AIONOTIFY: 335 return (aionotify()); 336 case AIOINIT: 337 return (aioinit()); 338 case AIOSTART: 339 return (aiostart()); 340 case AIOLIO: 341 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 342 (void *)uap[4])); 343 case AIOLIOWAIT: 344 return (aliowait((int)uap[1], (void *)uap[2], 345 (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 346 case AIOSUSPEND: 347 error = aiosuspend((void *)uap[1], (int)uap[2], 348 (timespec_t *)uap[3], (int)uap[4], 349 &rval, AIO_32); 350 break; 351 case AIOERROR: 352 return (aioerror((void *)uap[1], AIO_32)); 353 case AIOAREAD: 354 return (aiorw((int)uap[0], (void *)uap[1], 355 FREAD, AIO_32)); 356 case AIOAWRITE: 357 return (aiorw((int)uap[0], (void *)uap[1], 358 FWRITE, AIO_32)); 359 case AIOCANCEL: 360 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 361 AIO_32)); 362 break; 363 case AIOLIO64: 364 return (alioLF((int)uap[1], (void *)uap[2], 365 (int)uap[3], (void *)uap[4])); 366 case AIOLIOWAIT64: 367 return (aliowait(uap[1], (void *)uap[2], 368 (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 369 case AIOSUSPEND64: 370 error = aiosuspend((void *)uap[1], (int)uap[2], 371 (timespec_t *)uap[3], (int)uap[4], &rval, 372 AIO_LARGEFILE); 373 break; 374 case AIOERROR64: 375 return (aioerror((void *)uap[1], AIO_LARGEFILE)); 376 case AIOAREAD64: 377 return (aiorw((int)uap[0], (void *)uap[1], FREAD, 378 AIO_LARGEFILE)); 379 case AIOAWRITE64: 380 return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 381 AIO_LARGEFILE)); 382 case AIOCANCEL64: 383 error = (aio_cancel((int)uap[1], (void *)uap[2], 384 &rval, AIO_LARGEFILE)); 385 break; 386 default: 387 return (EINVAL); 388 } 389 390 rvp->r_val1 = rval; 391 return (error); 392 } 393 394 /* 395 * wake up LWPs in this process that are sleeping in 396 * aiowait(). 397 */ 398 static int 399 aionotify(void) 400 { 401 aio_t *aiop; 402 403 aiop = curproc->p_aio; 404 if (aiop == NULL) 405 return (0); 406 407 mutex_enter(&aiop->aio_mutex); 408 aiop->aio_notifycnt++; 409 cv_broadcast(&aiop->aio_waitcv); 410 mutex_exit(&aiop->aio_mutex); 411 412 return (0); 413 } 414 415 static int 416 timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 417 timestruc_t **rqtp, int *blocking) 418 { 419 #ifdef _SYSCALL32_IMPL 420 struct timeval32 wait_time_32; 421 #endif 422 struct timeval wait_time; 423 model_t model = get_udatamodel(); 424 425 *rqtp = NULL; 426 if (timout == NULL) { /* wait indefinitely */ 427 *blocking = 1; 428 return (0); 429 } 430 431 /* 432 * Need to correctly compare with the -1 passed in for a user 433 * address pointer, with both 32 bit and 64 bit apps. 434 */ 435 if (model == DATAMODEL_NATIVE) { 436 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 437 *blocking = 0; 438 return (0); 439 } 440 441 if (copyin(timout, &wait_time, sizeof (wait_time))) 442 return (EFAULT); 443 } 444 #ifdef _SYSCALL32_IMPL 445 else { 446 /* 447 * -1 from a 32bit app. It will not get sign extended. 448 * don't wait if -1. 449 */ 450 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 451 *blocking = 0; 452 return (0); 453 } 454 455 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 456 return (EFAULT); 457 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 458 } 459 #endif /* _SYSCALL32_IMPL */ 460 461 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 462 *blocking = 0; 463 return (0); 464 } 465 466 if (wait_time.tv_sec < 0 || 467 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 468 return (EINVAL); 469 470 rqtime->tv_sec = wait_time.tv_sec; 471 rqtime->tv_nsec = wait_time.tv_usec * 1000; 472 *rqtp = rqtime; 473 *blocking = 1; 474 475 return (0); 476 } 477 478 static int 479 timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 480 timestruc_t **rqtp, int *blocking) 481 { 482 #ifdef _SYSCALL32_IMPL 483 timespec32_t wait_time_32; 484 #endif 485 model_t model = get_udatamodel(); 486 487 *rqtp = NULL; 488 if (timout == NULL) { 489 *blocking = 1; 490 return (0); 491 } 492 493 if (model == DATAMODEL_NATIVE) { 494 if (copyin(timout, rqtime, sizeof (*rqtime))) 495 return (EFAULT); 496 } 497 #ifdef _SYSCALL32_IMPL 498 else { 499 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 500 return (EFAULT); 501 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 502 } 503 #endif /* _SYSCALL32_IMPL */ 504 505 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 506 *blocking = 0; 507 return (0); 508 } 509 510 if (rqtime->tv_sec < 0 || 511 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 512 return (EINVAL); 513 514 *rqtp = rqtime; 515 *blocking = 1; 516 517 return (0); 518 } 519 520 /*ARGSUSED*/ 521 static int 522 aiowait( 523 struct timeval *timout, 524 int dontblockflg, 525 long *rval) 526 { 527 int error; 528 aio_t *aiop; 529 aio_req_t *reqp; 530 clock_t status; 531 int blocking; 532 int timecheck; 533 timestruc_t rqtime; 534 timestruc_t *rqtp; 535 536 aiop = curproc->p_aio; 537 if (aiop == NULL) 538 return (EINVAL); 539 540 /* 541 * Establish the absolute future time for the timeout. 542 */ 543 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 544 if (error) 545 return (error); 546 if (rqtp) { 547 timestruc_t now; 548 timecheck = timechanged; 549 gethrestime(&now); 550 timespecadd(rqtp, &now); 551 } 552 553 mutex_enter(&aiop->aio_mutex); 554 for (;;) { 555 /* process requests on poll queue */ 556 if (aiop->aio_pollq) { 557 mutex_exit(&aiop->aio_mutex); 558 aio_cleanup(0); 559 mutex_enter(&aiop->aio_mutex); 560 } 561 if ((reqp = aio_req_remove(NULL)) != NULL) { 562 *rval = (long)reqp->aio_req_resultp; 563 break; 564 } 565 /* user-level done queue might not be empty */ 566 if (aiop->aio_notifycnt > 0) { 567 aiop->aio_notifycnt--; 568 *rval = 1; 569 break; 570 } 571 /* don't block if no outstanding aio */ 572 if (aiop->aio_outstanding == 0 && dontblockflg) { 573 error = EINVAL; 574 break; 575 } 576 if (blocking) { 577 status = cv_waituntil_sig(&aiop->aio_waitcv, 578 &aiop->aio_mutex, rqtp, timecheck); 579 580 if (status > 0) /* check done queue again */ 581 continue; 582 if (status == 0) { /* interrupted by a signal */ 583 error = EINTR; 584 *rval = -1; 585 } else { /* timer expired */ 586 error = ETIME; 587 } 588 } 589 break; 590 } 591 mutex_exit(&aiop->aio_mutex); 592 if (reqp) { 593 aphysio_unlock(reqp); 594 aio_copyout_result(reqp); 595 mutex_enter(&aiop->aio_mutex); 596 aio_req_free(aiop, reqp); 597 mutex_exit(&aiop->aio_mutex); 598 } 599 return (error); 600 } 601 602 /* 603 * aiowaitn can be used to reap completed asynchronous requests submitted with 604 * lio_listio, aio_read or aio_write. 605 * This function only reaps asynchronous raw I/Os. 606 */ 607 608 /*ARGSUSED*/ 609 static int 610 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 611 { 612 int error = 0; 613 aio_t *aiop; 614 aio_req_t *reqlist = NULL; 615 caddr_t iocblist = NULL; /* array of iocb ptr's */ 616 uint_t waitcnt, cnt = 0; /* iocb cnt */ 617 size_t iocbsz; /* users iocb size */ 618 size_t riocbsz; /* returned iocb size */ 619 int iocb_index = 0; 620 model_t model = get_udatamodel(); 621 int blocking = 1; 622 int timecheck; 623 timestruc_t rqtime; 624 timestruc_t *rqtp; 625 626 aiop = curproc->p_aio; 627 if (aiop == NULL) 628 return (EINVAL); 629 630 if (aiop->aio_outstanding == 0) 631 return (EAGAIN); 632 633 if (copyin(nwait, &waitcnt, sizeof (uint_t))) 634 return (EFAULT); 635 636 /* set *nwait to zero, if we must return prematurely */ 637 if (copyout(&cnt, nwait, sizeof (uint_t))) 638 return (EFAULT); 639 640 if (waitcnt == 0) { 641 blocking = 0; 642 rqtp = NULL; 643 waitcnt = nent; 644 } else { 645 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 646 if (error) 647 return (error); 648 } 649 650 if (model == DATAMODEL_NATIVE) 651 iocbsz = (sizeof (aiocb_t *) * nent); 652 #ifdef _SYSCALL32_IMPL 653 else 654 iocbsz = (sizeof (caddr32_t) * nent); 655 #endif /* _SYSCALL32_IMPL */ 656 657 /* 658 * Only one aio_waitn call is allowed at a time. 659 * The active aio_waitn will collect all requests 660 * out of the "done" list and if necessary it will wait 661 * for some/all pending requests to fulfill the nwait 662 * parameter. 663 * A second or further aio_waitn calls will sleep here 664 * until the active aio_waitn finishes and leaves the kernel 665 * If the second call does not block (poll), then return 666 * immediately with the error code : EAGAIN. 667 * If the second call should block, then sleep here, but 668 * do not touch the timeout. The timeout starts when this 669 * aio_waitn-call becomes active. 670 */ 671 672 mutex_enter(&aiop->aio_mutex); 673 674 while (aiop->aio_flags & AIO_WAITN) { 675 if (blocking == 0) { 676 mutex_exit(&aiop->aio_mutex); 677 return (EAGAIN); 678 } 679 680 /* block, no timeout */ 681 aiop->aio_flags |= AIO_WAITN_PENDING; 682 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 683 mutex_exit(&aiop->aio_mutex); 684 return (EINTR); 685 } 686 } 687 688 /* 689 * Establish the absolute future time for the timeout. 690 */ 691 if (rqtp) { 692 timestruc_t now; 693 timecheck = timechanged; 694 gethrestime(&now); 695 timespecadd(rqtp, &now); 696 } 697 698 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 699 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 700 aiop->aio_iocb = NULL; 701 } 702 703 if (aiop->aio_iocb == NULL) { 704 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 705 if (iocblist == NULL) { 706 mutex_exit(&aiop->aio_mutex); 707 return (ENOMEM); 708 } 709 aiop->aio_iocb = (aiocb_t **)iocblist; 710 aiop->aio_iocbsz = iocbsz; 711 } else { 712 iocblist = (char *)aiop->aio_iocb; 713 } 714 715 aiop->aio_waitncnt = waitcnt; 716 aiop->aio_flags |= AIO_WAITN; 717 718 for (;;) { 719 /* push requests on poll queue to done queue */ 720 if (aiop->aio_pollq) { 721 mutex_exit(&aiop->aio_mutex); 722 aio_cleanup(0); 723 mutex_enter(&aiop->aio_mutex); 724 } 725 726 /* check for requests on done queue */ 727 if (aiop->aio_doneq) { 728 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 729 aiop->aio_waitncnt = waitcnt - cnt; 730 } 731 732 /* user-level done queue might not be empty */ 733 if (aiop->aio_notifycnt > 0) { 734 aiop->aio_notifycnt--; 735 error = 0; 736 break; 737 } 738 739 /* 740 * if we are here second time as a result of timer 741 * expiration, we reset error if there are enough 742 * aiocb's to satisfy request. 743 * We return also if all requests are already done 744 * and we picked up the whole done queue. 745 */ 746 747 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 748 aiop->aio_doneq == NULL)) { 749 error = 0; 750 break; 751 } 752 753 if ((cnt < waitcnt) && blocking) { 754 int rval = cv_waituntil_sig(&aiop->aio_waitcv, 755 &aiop->aio_mutex, rqtp, timecheck); 756 if (rval > 0) 757 continue; 758 if (rval < 0) { 759 error = ETIME; 760 blocking = 0; 761 continue; 762 } 763 error = EINTR; 764 } 765 break; 766 } 767 768 mutex_exit(&aiop->aio_mutex); 769 770 if (cnt > 0) { 771 772 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 773 aiop, model); 774 775 if (model == DATAMODEL_NATIVE) 776 riocbsz = (sizeof (aiocb_t *) * cnt); 777 #ifdef _SYSCALL32_IMPL 778 else 779 riocbsz = (sizeof (caddr32_t) * cnt); 780 #endif /* _SYSCALL32_IMPL */ 781 782 if (copyout(iocblist, uiocb, riocbsz) || 783 copyout(&cnt, nwait, sizeof (uint_t))) 784 error = EFAULT; 785 } 786 787 if (aiop->aio_iocbsz > AIO_IOCB_MAX) { 788 kmem_free(iocblist, aiop->aio_iocbsz); 789 aiop->aio_iocb = NULL; 790 } 791 792 /* check if there is another thread waiting for execution */ 793 mutex_enter(&aiop->aio_mutex); 794 aiop->aio_flags &= ~AIO_WAITN; 795 if (aiop->aio_flags & AIO_WAITN_PENDING) { 796 aiop->aio_flags &= ~AIO_WAITN_PENDING; 797 cv_signal(&aiop->aio_waitncv); 798 } 799 mutex_exit(&aiop->aio_mutex); 800 801 return (error); 802 } 803 804 /* 805 * aio_unlock_requests 806 * copyouts the result of the request as well as the return value. 807 * It builds the list of completed asynchronous requests, 808 * unlocks the allocated memory ranges and 809 * put the aio request structure back into the free list. 810 */ 811 812 static int 813 aio_unlock_requests( 814 caddr_t iocblist, 815 int iocb_index, 816 aio_req_t *reqlist, 817 aio_t *aiop, 818 model_t model) 819 { 820 aio_req_t *reqp, *nreqp; 821 822 if (model == DATAMODEL_NATIVE) { 823 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 824 (((caddr_t *)iocblist)[iocb_index++]) = 825 reqp->aio_req_iocb.iocb; 826 nreqp = reqp->aio_req_next; 827 aphysio_unlock(reqp); 828 aio_copyout_result(reqp); 829 mutex_enter(&aiop->aio_mutex); 830 aio_req_free(aiop, reqp); 831 mutex_exit(&aiop->aio_mutex); 832 } 833 } 834 #ifdef _SYSCALL32_IMPL 835 else { 836 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 837 ((caddr32_t *)iocblist)[iocb_index++] = 838 reqp->aio_req_iocb.iocb32; 839 nreqp = reqp->aio_req_next; 840 aphysio_unlock(reqp); 841 aio_copyout_result(reqp); 842 mutex_enter(&aiop->aio_mutex); 843 aio_req_free(aiop, reqp); 844 mutex_exit(&aiop->aio_mutex); 845 } 846 } 847 #endif /* _SYSCALL32_IMPL */ 848 return (iocb_index); 849 } 850 851 /* 852 * aio_reqlist_concat 853 * moves "max" elements from the done queue to the reqlist queue and removes 854 * the AIO_DONEQ flag. 855 * - reqlist queue is a simple linked list 856 * - done queue is a double linked list 857 */ 858 859 static int 860 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 861 { 862 aio_req_t *q2, *q2work, *list; 863 int count = 0; 864 865 list = *reqlist; 866 q2 = aiop->aio_doneq; 867 q2work = q2; 868 while (max-- > 0) { 869 q2work->aio_req_flags &= ~AIO_DONEQ; 870 q2work = q2work->aio_req_next; 871 count++; 872 if (q2work == q2) 873 break; 874 } 875 876 if (q2work == q2) { 877 /* all elements revised */ 878 q2->aio_req_prev->aio_req_next = list; 879 list = q2; 880 aiop->aio_doneq = NULL; 881 } else { 882 /* 883 * max < elements in the doneq 884 * detach only the required amount of elements 885 * out of the doneq 886 */ 887 q2work->aio_req_prev->aio_req_next = list; 888 list = q2; 889 890 aiop->aio_doneq = q2work; 891 q2work->aio_req_prev = q2->aio_req_prev; 892 q2->aio_req_prev->aio_req_next = q2work; 893 } 894 *reqlist = list; 895 return (count); 896 } 897 898 /*ARGSUSED*/ 899 static int 900 aiosuspend( 901 void *aiocb, 902 int nent, 903 struct timespec *timout, 904 int flag, 905 long *rval, 906 int run_mode) 907 { 908 int error; 909 aio_t *aiop; 910 aio_req_t *reqp, *found, *next; 911 caddr_t cbplist = NULL; 912 aiocb_t *cbp, **ucbp; 913 #ifdef _SYSCALL32_IMPL 914 aiocb32_t *cbp32; 915 caddr32_t *ucbp32; 916 #endif /* _SYSCALL32_IMPL */ 917 aiocb64_32_t *cbp64; 918 int rv; 919 int i; 920 size_t ssize; 921 model_t model = get_udatamodel(); 922 int blocking; 923 int timecheck; 924 timestruc_t rqtime; 925 timestruc_t *rqtp; 926 927 aiop = curproc->p_aio; 928 if (aiop == NULL || nent <= 0) 929 return (EINVAL); 930 931 /* 932 * Establish the absolute future time for the timeout. 933 */ 934 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 935 if (error) 936 return (error); 937 if (rqtp) { 938 timestruc_t now; 939 timecheck = timechanged; 940 gethrestime(&now); 941 timespecadd(rqtp, &now); 942 } 943 944 /* 945 * If we are not blocking and there's no IO complete 946 * skip aiocb copyin. 947 */ 948 if (!blocking && (aiop->aio_pollq == NULL) && 949 (aiop->aio_doneq == NULL)) { 950 return (EAGAIN); 951 } 952 953 if (model == DATAMODEL_NATIVE) 954 ssize = (sizeof (aiocb_t *) * nent); 955 #ifdef _SYSCALL32_IMPL 956 else 957 ssize = (sizeof (caddr32_t) * nent); 958 #endif /* _SYSCALL32_IMPL */ 959 960 cbplist = kmem_alloc(ssize, KM_NOSLEEP); 961 if (cbplist == NULL) 962 return (ENOMEM); 963 964 if (copyin(aiocb, cbplist, ssize)) { 965 error = EFAULT; 966 goto done; 967 } 968 969 found = NULL; 970 /* 971 * we need to get the aio_cleanupq_mutex since we call 972 * aio_req_done(). 973 */ 974 mutex_enter(&aiop->aio_cleanupq_mutex); 975 mutex_enter(&aiop->aio_mutex); 976 for (;;) { 977 /* push requests on poll queue to done queue */ 978 if (aiop->aio_pollq) { 979 mutex_exit(&aiop->aio_mutex); 980 mutex_exit(&aiop->aio_cleanupq_mutex); 981 aio_cleanup(0); 982 mutex_enter(&aiop->aio_cleanupq_mutex); 983 mutex_enter(&aiop->aio_mutex); 984 } 985 /* check for requests on done queue */ 986 if (aiop->aio_doneq) { 987 if (model == DATAMODEL_NATIVE) 988 ucbp = (aiocb_t **)cbplist; 989 #ifdef _SYSCALL32_IMPL 990 else 991 ucbp32 = (caddr32_t *)cbplist; 992 #endif /* _SYSCALL32_IMPL */ 993 for (i = 0; i < nent; i++) { 994 if (model == DATAMODEL_NATIVE) { 995 if ((cbp = *ucbp++) == NULL) 996 continue; 997 if (run_mode != AIO_LARGEFILE) 998 reqp = aio_req_done( 999 &cbp->aio_resultp); 1000 else { 1001 cbp64 = (aiocb64_32_t *)cbp; 1002 reqp = aio_req_done( 1003 &cbp64->aio_resultp); 1004 } 1005 } 1006 #ifdef _SYSCALL32_IMPL 1007 else { 1008 if (run_mode == AIO_32) { 1009 if ((cbp32 = 1010 (aiocb32_t *)(uintptr_t) 1011 *ucbp32++) == NULL) 1012 continue; 1013 reqp = aio_req_done( 1014 &cbp32->aio_resultp); 1015 } else if (run_mode == AIO_LARGEFILE) { 1016 if ((cbp64 = 1017 (aiocb64_32_t *)(uintptr_t) 1018 *ucbp32++) == NULL) 1019 continue; 1020 reqp = aio_req_done( 1021 &cbp64->aio_resultp); 1022 } 1023 1024 } 1025 #endif /* _SYSCALL32_IMPL */ 1026 if (reqp) { 1027 reqp->aio_req_next = found; 1028 found = reqp; 1029 } 1030 if (aiop->aio_doneq == NULL) 1031 break; 1032 } 1033 if (found) 1034 break; 1035 } 1036 if (aiop->aio_notifycnt > 0) { 1037 /* 1038 * nothing on the kernel's queue. the user 1039 * has notified the kernel that it has items 1040 * on a user-level queue. 1041 */ 1042 aiop->aio_notifycnt--; 1043 *rval = 1; 1044 error = 0; 1045 break; 1046 } 1047 /* don't block if nothing is outstanding */ 1048 if (aiop->aio_outstanding == 0) { 1049 error = EAGAIN; 1050 break; 1051 } 1052 if (blocking) { 1053 /* 1054 * drop the aio_cleanupq_mutex as we are 1055 * going to block. 1056 */ 1057 mutex_exit(&aiop->aio_cleanupq_mutex); 1058 rv = cv_waituntil_sig(&aiop->aio_waitcv, 1059 &aiop->aio_mutex, rqtp, timecheck); 1060 /* 1061 * we have to drop aio_mutex and 1062 * grab it in the right order. 1063 */ 1064 mutex_exit(&aiop->aio_mutex); 1065 mutex_enter(&aiop->aio_cleanupq_mutex); 1066 mutex_enter(&aiop->aio_mutex); 1067 if (rv > 0) /* check done queue again */ 1068 continue; 1069 if (rv == 0) /* interrupted by a signal */ 1070 error = EINTR; 1071 else /* timer expired */ 1072 error = ETIME; 1073 } else { 1074 error = EAGAIN; 1075 } 1076 break; 1077 } 1078 mutex_exit(&aiop->aio_mutex); 1079 mutex_exit(&aiop->aio_cleanupq_mutex); 1080 for (reqp = found; reqp != NULL; reqp = next) { 1081 next = reqp->aio_req_next; 1082 aphysio_unlock(reqp); 1083 aio_copyout_result(reqp); 1084 mutex_enter(&aiop->aio_mutex); 1085 aio_req_free(aiop, reqp); 1086 mutex_exit(&aiop->aio_mutex); 1087 } 1088 done: 1089 kmem_free(cbplist, ssize); 1090 return (error); 1091 } 1092 1093 /* 1094 * initialize aio by allocating an aio_t struct for this 1095 * process. 1096 */ 1097 static int 1098 aioinit(void) 1099 { 1100 proc_t *p = curproc; 1101 aio_t *aiop; 1102 mutex_enter(&p->p_lock); 1103 if ((aiop = p->p_aio) == NULL) { 1104 aiop = aio_aiop_alloc(); 1105 p->p_aio = aiop; 1106 } 1107 mutex_exit(&p->p_lock); 1108 if (aiop == NULL) 1109 return (ENOMEM); 1110 return (0); 1111 } 1112 1113 /* 1114 * start a special thread that will cleanup after aio requests 1115 * that are preventing a segment from being unmapped. as_unmap() 1116 * blocks until all phsyio to this segment is completed. this 1117 * doesn't happen until all the pages in this segment are not 1118 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 1119 * requests still outstanding. this special thread will make sure 1120 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 1121 * 1122 * this function will return an error if the process has only 1123 * one LWP. the assumption is that the caller is a separate LWP 1124 * that remains blocked in the kernel for the life of this process. 1125 */ 1126 static int 1127 aiostart(void) 1128 { 1129 proc_t *p = curproc; 1130 aio_t *aiop; 1131 int first, error = 0; 1132 1133 if (p->p_lwpcnt == 1) 1134 return (EDEADLK); 1135 mutex_enter(&p->p_lock); 1136 if ((aiop = p->p_aio) == NULL) 1137 error = EINVAL; 1138 else { 1139 first = aiop->aio_ok; 1140 if (aiop->aio_ok == 0) 1141 aiop->aio_ok = 1; 1142 } 1143 mutex_exit(&p->p_lock); 1144 if (error == 0 && first == 0) { 1145 return (aio_cleanup_thread(aiop)); 1146 /* should return only to exit */ 1147 } 1148 return (error); 1149 } 1150 1151 /* 1152 * Associate an aiocb with a port. 1153 * This function is used by aiorw() to associate a transaction with a port. 1154 * Allocate an event port structure (port_alloc_event()) and store the 1155 * delivered user pointer (portnfy_user) in the portkev_user field of the 1156 * port_kevent_t structure.. 1157 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1158 * the port association. 1159 */ 1160 1161 static int 1162 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, aio_req_t *reqp) 1163 { 1164 port_kevent_t *pkevp = NULL; 1165 int error; 1166 1167 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 1168 PORT_SOURCE_AIO, &pkevp); 1169 if (error) { 1170 if ((error == ENOMEM) || (error == EAGAIN)) 1171 error = EAGAIN; 1172 else 1173 error = EINVAL; 1174 } else { 1175 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 1176 aio_port_callback, reqp); 1177 reqp->aio_req_portkev = pkevp; 1178 reqp->aio_req_port = pntfy->portnfy_port; 1179 } 1180 return (error); 1181 } 1182 1183 /* 1184 * Associate an aiocb with a port. 1185 * This function is used by lio_listio() to associate a transaction with a port. 1186 * Allocate an event port structure (port_alloc_event()) and store the 1187 * delivered user pointer (portnfy_user) in the portkev_user field of the 1188 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1189 * the port association. 1190 * The event port notification can be requested attaching the port_notify_t 1191 * structure to the sigevent argument of lio_listio() or attaching the 1192 * port_notify_t structure to the sigevent structure which is embedded in the 1193 * aiocb. 1194 * The attachement to the global sigevent structure is valid for all aiocbs 1195 * in the list. 1196 */ 1197 1198 static int 1199 aio_req_assoc_port(struct sigevent *sigev, void *user, aiocb_t *cbp, 1200 aio_req_t *reqp, port_kevent_t *pkevtp) 1201 { 1202 port_kevent_t *pkevp = NULL; 1203 port_notify_t pntfy; 1204 int error; 1205 1206 if (sigev->sigev_notify == SIGEV_PORT) { 1207 /* aiocb has an own port notification embedded */ 1208 if (copyin((void *)sigev->sigev_value.sival_ptr, &pntfy, 1209 sizeof (port_notify_t))) 1210 return (EFAULT); 1211 1212 error = port_alloc_event(pntfy.portnfy_port, PORT_ALLOC_DEFAULT, 1213 PORT_SOURCE_AIO, &pkevp); 1214 if (error) { 1215 if ((error == ENOMEM) || (error == EAGAIN)) 1216 return (EAGAIN); 1217 else 1218 return (EINVAL); 1219 } 1220 /* use this values instead of the global values in port */ 1221 1222 port_init_event(pkevp, (uintptr_t)cbp, pntfy.portnfy_user, 1223 aio_port_callback, reqp); 1224 reqp->aio_req_port = pntfy.portnfy_port; 1225 } else { 1226 /* use global port notification */ 1227 error = port_dup_event(pkevtp, &pkevp, PORT_ALLOC_DEFAULT); 1228 if (error) 1229 return (EAGAIN); 1230 port_init_event(pkevp, (uintptr_t)cbp, user, aio_port_callback, 1231 reqp); 1232 } 1233 reqp->aio_req_portkev = pkevp; 1234 return (0); 1235 } 1236 1237 /* 1238 * Same comments as in aio_req_assoc_port(), see above. 1239 */ 1240 1241 static int 1242 aio_req_assoc_port32(struct sigevent32 *sigev, void *user, aiocb_t *cbp, 1243 aio_req_t *reqp, port_kevent_t *pkevtp) 1244 { 1245 port_kevent_t *pkevp = NULL; 1246 port_notify32_t pntfy; 1247 int error; 1248 1249 if (sigev->sigev_notify == SIGEV_PORT) { 1250 if (copyin((void *)(uintptr_t)sigev->sigev_value.sival_int, 1251 &pntfy, sizeof (port_notify32_t))) 1252 return (EFAULT); 1253 1254 error = port_alloc_event(pntfy.portnfy_port, 1255 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevp); 1256 if (error) { 1257 if ((error == ENOMEM) || (error == EAGAIN)) 1258 return (EAGAIN); 1259 else 1260 return (EINVAL); 1261 } 1262 /* use this values instead of the global values in port */ 1263 1264 port_init_event(pkevp, (uintptr_t)cbp, 1265 (void *)(uintptr_t)pntfy.portnfy_user, 1266 aio_port_callback, reqp); 1267 reqp->aio_req_port = pntfy.portnfy_port; 1268 } else { 1269 error = port_dup_event(pkevtp, &pkevp, PORT_ALLOC_DEFAULT); 1270 if (error) 1271 return (EAGAIN); 1272 port_init_event(pkevp, (uintptr_t)cbp, user, aio_port_callback, 1273 reqp); 1274 } 1275 reqp->aio_req_portkev = pkevp; 1276 return (0); 1277 } 1278 1279 1280 #ifdef _LP64 1281 1282 /* 1283 * Asynchronous list IO. A chain of aiocb's are copied in 1284 * one at a time. If the aiocb is invalid, it is skipped. 1285 * For each aiocb, the appropriate driver entry point is 1286 * called. Optimize for the common case where the list 1287 * of requests is to the same file descriptor. 1288 * 1289 * One possible optimization is to define a new driver entry 1290 * point that supports a list of IO requests. Whether this 1291 * improves performance depends somewhat on the driver's 1292 * locking strategy. Processing a list could adversely impact 1293 * the driver's interrupt latency. 1294 */ 1295 /*ARGSUSED*/ 1296 static int 1297 alio( 1298 int opcode, 1299 int mode_arg, 1300 aiocb_t **aiocb_arg, 1301 int nent, 1302 struct sigevent *sigev) 1303 1304 { 1305 file_t *fp; 1306 file_t *prev_fp = NULL; 1307 int prev_mode = -1; 1308 struct vnode *vp; 1309 aio_lio_t *head; 1310 aio_req_t *reqp; 1311 aio_t *aiop; 1312 caddr_t cbplist; 1313 aiocb_t *cbp, **ucbp; 1314 aiocb_t cb; 1315 aiocb_t *aiocb = &cb; 1316 struct sigevent sigevk; 1317 sigqueue_t *sqp; 1318 int (*aio_func)(); 1319 int mode; 1320 int error = 0; 1321 int aio_errors = 0; 1322 int i; 1323 size_t ssize; 1324 int deadhead = 0; 1325 int aio_notsupported = 0; 1326 int aio_use_port = 0; 1327 port_kevent_t *pkevtp = NULL; 1328 port_notify_t pnotify; 1329 1330 aiop = curproc->p_aio; 1331 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1332 return (EINVAL); 1333 1334 ssize = (sizeof (aiocb_t *) * nent); 1335 cbplist = kmem_alloc(ssize, KM_SLEEP); 1336 ucbp = (aiocb_t **)cbplist; 1337 1338 if (copyin(aiocb_arg, cbplist, sizeof (aiocb_t *) * nent)) { 1339 kmem_free(cbplist, ssize); 1340 return (EFAULT); 1341 } 1342 1343 if (sigev) { 1344 if (copyin(sigev, &sigevk, sizeof (struct sigevent))) { 1345 kmem_free(cbplist, ssize); 1346 return (EFAULT); 1347 } 1348 } 1349 1350 /* 1351 * a list head should be allocated if notification is 1352 * enabled for this list. 1353 */ 1354 head = NULL; 1355 1356 /* Event Ports */ 1357 1358 if (sigev && sigevk.sigev_notify == SIGEV_PORT) { 1359 /* Use port for completion notification */ 1360 if (copyin(sigevk.sigev_value.sival_ptr, &pnotify, 1361 sizeof (port_notify_t))) { 1362 kmem_free(cbplist, ssize); 1363 return (EFAULT); 1364 } 1365 /* use event ports for the list of aiocbs */ 1366 aio_use_port = 1; 1367 error = port_alloc_event(pnotify.portnfy_port, 1368 PORT_ALLOC_PRIVATE, PORT_SOURCE_AIO, &pkevtp); 1369 if (error) { 1370 if ((error == ENOMEM) || (error == EAGAIN)) 1371 error = EAGAIN; 1372 else 1373 error = EINVAL; 1374 kmem_free(cbplist, ssize); 1375 return (error); 1376 } 1377 } else if ((mode_arg == LIO_WAIT) || sigev) { 1378 mutex_enter(&aiop->aio_mutex); 1379 error = aio_lio_alloc(&head); 1380 mutex_exit(&aiop->aio_mutex); 1381 if (error) 1382 goto done; 1383 deadhead = 1; 1384 head->lio_nent = nent; 1385 head->lio_refcnt = nent; 1386 if (sigev && (sigevk.sigev_notify == SIGEV_SIGNAL) && 1387 (sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG)) { 1388 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 1389 if (sqp == NULL) { 1390 error = EAGAIN; 1391 goto done; 1392 } 1393 sqp->sq_func = NULL; 1394 sqp->sq_next = NULL; 1395 sqp->sq_info.si_code = SI_ASYNCIO; 1396 sqp->sq_info.si_pid = curproc->p_pid; 1397 sqp->sq_info.si_ctid = PRCTID(curproc); 1398 sqp->sq_info.si_zoneid = getzoneid(); 1399 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 1400 sqp->sq_info.si_signo = sigevk.sigev_signo; 1401 sqp->sq_info.si_value = sigevk.sigev_value; 1402 head->lio_sigqp = sqp; 1403 } else { 1404 head->lio_sigqp = NULL; 1405 } 1406 } 1407 1408 for (i = 0; i < nent; i++, ucbp++) { 1409 1410 cbp = *ucbp; 1411 /* skip entry if it can't be copied. */ 1412 if (cbp == NULL || copyin(cbp, aiocb, sizeof (aiocb_t))) { 1413 if (head) { 1414 mutex_enter(&aiop->aio_mutex); 1415 head->lio_nent--; 1416 head->lio_refcnt--; 1417 mutex_exit(&aiop->aio_mutex); 1418 } 1419 continue; 1420 } 1421 1422 /* skip if opcode for aiocb is LIO_NOP */ 1423 1424 mode = aiocb->aio_lio_opcode; 1425 if (mode == LIO_NOP) { 1426 cbp = NULL; 1427 if (head) { 1428 mutex_enter(&aiop->aio_mutex); 1429 head->lio_nent--; 1430 head->lio_refcnt--; 1431 mutex_exit(&aiop->aio_mutex); 1432 } 1433 continue; 1434 } 1435 1436 /* increment file descriptor's ref count. */ 1437 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 1438 lio_set_uerror(&cbp->aio_resultp, EBADF); 1439 if (head) { 1440 mutex_enter(&aiop->aio_mutex); 1441 head->lio_nent--; 1442 head->lio_refcnt--; 1443 mutex_exit(&aiop->aio_mutex); 1444 } 1445 aio_errors++; 1446 continue; 1447 } 1448 1449 vp = fp->f_vnode; 1450 1451 /* 1452 * check the permission of the partition 1453 */ 1454 mode = aiocb->aio_lio_opcode; 1455 if ((fp->f_flag & mode) == 0) { 1456 releasef(aiocb->aio_fildes); 1457 lio_set_uerror(&cbp->aio_resultp, EBADF); 1458 if (head) { 1459 mutex_enter(&aiop->aio_mutex); 1460 head->lio_nent--; 1461 head->lio_refcnt--; 1462 mutex_exit(&aiop->aio_mutex); 1463 } 1464 aio_errors++; 1465 continue; 1466 } 1467 1468 /* 1469 * common case where requests are to the same fd for the 1470 * same r/w operation. 1471 * for UFS, need to set EBADFD 1472 */ 1473 if ((fp != prev_fp) || (mode != prev_mode)) { 1474 aio_func = check_vp(vp, mode); 1475 if (aio_func == NULL) { 1476 prev_fp = NULL; 1477 releasef(aiocb->aio_fildes); 1478 lio_set_uerror(&cbp->aio_resultp, EBADFD); 1479 aio_notsupported++; 1480 if (head) { 1481 mutex_enter(&aiop->aio_mutex); 1482 head->lio_nent--; 1483 head->lio_refcnt--; 1484 mutex_exit(&aiop->aio_mutex); 1485 } 1486 continue; 1487 } else { 1488 prev_fp = fp; 1489 prev_mode = mode; 1490 } 1491 } 1492 1493 if (error = aio_req_setup(&reqp, aiop, aiocb, 1494 &cbp->aio_resultp, aio_use_port, vp)) { 1495 releasef(aiocb->aio_fildes); 1496 lio_set_uerror(&cbp->aio_resultp, error); 1497 if (head) { 1498 mutex_enter(&aiop->aio_mutex); 1499 head->lio_nent--; 1500 head->lio_refcnt--; 1501 mutex_exit(&aiop->aio_mutex); 1502 } 1503 aio_errors++; 1504 continue; 1505 } 1506 1507 reqp->aio_req_lio = head; 1508 deadhead = 0; 1509 1510 /* 1511 * Set the errno field now before sending the request to 1512 * the driver to avoid a race condition 1513 */ 1514 (void) suword32(&cbp->aio_resultp.aio_errno, 1515 EINPROGRESS); 1516 1517 reqp->aio_req_iocb.iocb = (caddr_t)cbp; 1518 1519 if (aio_use_port) { 1520 reqp->aio_req_port = pnotify.portnfy_port; 1521 error = aio_req_assoc_port(&aiocb->aio_sigevent, 1522 pnotify.portnfy_user, cbp, reqp, pkevtp); 1523 } 1524 1525 /* 1526 * send the request to driver. 1527 * Clustering: If PXFS vnode, call PXFS function. 1528 */ 1529 if (error == 0) { 1530 if (aiocb->aio_nbytes == 0) { 1531 clear_active_fd(aiocb->aio_fildes); 1532 aio_zerolen(reqp); 1533 continue; 1534 } 1535 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 1536 CRED()); 1537 } 1538 /* 1539 * the fd's ref count is not decremented until the IO has 1540 * completed unless there was an error. 1541 */ 1542 if (error) { 1543 releasef(aiocb->aio_fildes); 1544 lio_set_uerror(&cbp->aio_resultp, error); 1545 if (head) { 1546 mutex_enter(&aiop->aio_mutex); 1547 head->lio_nent--; 1548 head->lio_refcnt--; 1549 mutex_exit(&aiop->aio_mutex); 1550 } 1551 if (error == ENOTSUP) 1552 aio_notsupported++; 1553 else 1554 aio_errors++; 1555 lio_set_error(reqp); 1556 } else { 1557 clear_active_fd(aiocb->aio_fildes); 1558 } 1559 } 1560 1561 if (pkevtp) 1562 port_free_event(pkevtp); 1563 1564 if (aio_notsupported) { 1565 error = ENOTSUP; 1566 } else if (aio_errors) { 1567 /* 1568 * return EIO if any request failed 1569 */ 1570 error = EIO; 1571 } 1572 1573 if (mode_arg == LIO_WAIT) { 1574 mutex_enter(&aiop->aio_mutex); 1575 while (head->lio_refcnt > 0) { 1576 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1577 mutex_exit(&aiop->aio_mutex); 1578 error = EINTR; 1579 goto done; 1580 } 1581 } 1582 mutex_exit(&aiop->aio_mutex); 1583 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 1584 } 1585 1586 done: 1587 kmem_free(cbplist, ssize); 1588 if (deadhead) { 1589 if (head->lio_sigqp) 1590 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 1591 kmem_free(head, sizeof (aio_lio_t)); 1592 } 1593 return (error); 1594 } 1595 1596 #endif /* _LP64 */ 1597 1598 /* 1599 * Asynchronous list IO. 1600 * If list I/O is called with LIO_WAIT it can still return 1601 * before all the I/O's are completed if a signal is caught 1602 * or if the list include UFS I/O requests. If this happens, 1603 * libaio will call aliowait() to wait for the I/O's to 1604 * complete 1605 */ 1606 /*ARGSUSED*/ 1607 static int 1608 aliowait( 1609 int mode, 1610 void *aiocb, 1611 int nent, 1612 void *sigev, 1613 int run_mode) 1614 { 1615 aio_lio_t *head; 1616 aio_t *aiop; 1617 caddr_t cbplist; 1618 aiocb_t *cbp, **ucbp; 1619 #ifdef _SYSCALL32_IMPL 1620 aiocb32_t *cbp32; 1621 caddr32_t *ucbp32; 1622 aiocb64_32_t *cbp64; 1623 #endif 1624 int error = 0; 1625 int i; 1626 size_t ssize = 0; 1627 model_t model = get_udatamodel(); 1628 1629 aiop = curproc->p_aio; 1630 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1631 return (EINVAL); 1632 1633 if (model == DATAMODEL_NATIVE) 1634 ssize = (sizeof (aiocb_t *) * nent); 1635 #ifdef _SYSCALL32_IMPL 1636 else 1637 ssize = (sizeof (caddr32_t) * nent); 1638 #endif /* _SYSCALL32_IMPL */ 1639 1640 if (ssize == 0) 1641 return (EINVAL); 1642 1643 cbplist = kmem_alloc(ssize, KM_SLEEP); 1644 1645 if (model == DATAMODEL_NATIVE) 1646 ucbp = (aiocb_t **)cbplist; 1647 #ifdef _SYSCALL32_IMPL 1648 else 1649 ucbp32 = (caddr32_t *)cbplist; 1650 #endif /* _SYSCALL32_IMPL */ 1651 1652 if (copyin(aiocb, cbplist, ssize)) { 1653 error = EFAULT; 1654 goto done; 1655 } 1656 1657 /* 1658 * To find the list head, we go through the 1659 * list of aiocb structs, find the request 1660 * its for, then get the list head that reqp 1661 * points to 1662 */ 1663 head = NULL; 1664 1665 for (i = 0; i < nent; i++) { 1666 if (model == DATAMODEL_NATIVE) { 1667 /* 1668 * Since we are only checking for a NULL pointer 1669 * Following should work on both native data sizes 1670 * as well as for largefile aiocb. 1671 */ 1672 if ((cbp = *ucbp++) == NULL) 1673 continue; 1674 if (run_mode != AIO_LARGEFILE) 1675 if (head = aio_list_get(&cbp->aio_resultp)) 1676 break; 1677 else { 1678 /* 1679 * This is a case when largefile call is 1680 * made on 32 bit kernel. 1681 * Treat each pointer as pointer to 1682 * aiocb64_32 1683 */ 1684 if (head = aio_list_get((aio_result_t *) 1685 &(((aiocb64_32_t *)cbp)->aio_resultp))) 1686 break; 1687 } 1688 } 1689 #ifdef _SYSCALL32_IMPL 1690 else { 1691 if (run_mode == AIO_LARGEFILE) { 1692 if ((cbp64 = (aiocb64_32_t *) 1693 (uintptr_t)*ucbp32++) == NULL) 1694 continue; 1695 if (head = aio_list_get((aio_result_t *) 1696 &cbp64->aio_resultp)) 1697 break; 1698 } else if (run_mode == AIO_32) { 1699 if ((cbp32 = (aiocb32_t *) 1700 (uintptr_t)*ucbp32++) == NULL) 1701 continue; 1702 if (head = aio_list_get((aio_result_t *) 1703 &cbp32->aio_resultp)) 1704 break; 1705 } 1706 } 1707 #endif /* _SYSCALL32_IMPL */ 1708 } 1709 1710 if (head == NULL) { 1711 error = EINVAL; 1712 goto done; 1713 } 1714 1715 mutex_enter(&aiop->aio_mutex); 1716 while (head->lio_refcnt > 0) { 1717 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1718 mutex_exit(&aiop->aio_mutex); 1719 error = EINTR; 1720 goto done; 1721 } 1722 } 1723 mutex_exit(&aiop->aio_mutex); 1724 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 1725 done: 1726 kmem_free(cbplist, ssize); 1727 return (error); 1728 } 1729 1730 aio_lio_t * 1731 aio_list_get(aio_result_t *resultp) 1732 { 1733 aio_lio_t *head = NULL; 1734 aio_t *aiop; 1735 aio_req_t **bucket; 1736 aio_req_t *reqp; 1737 long index; 1738 1739 aiop = curproc->p_aio; 1740 if (aiop == NULL) 1741 return (NULL); 1742 1743 if (resultp) { 1744 index = AIO_HASH(resultp); 1745 bucket = &aiop->aio_hash[index]; 1746 for (reqp = *bucket; reqp != NULL; 1747 reqp = reqp->aio_hash_next) { 1748 if (reqp->aio_req_resultp == resultp) { 1749 head = reqp->aio_req_lio; 1750 return (head); 1751 } 1752 } 1753 } 1754 return (NULL); 1755 } 1756 1757 1758 static void 1759 lio_set_uerror(void *resultp, int error) 1760 { 1761 /* 1762 * the resultp field is a pointer to where the 1763 * error should be written out to the user's 1764 * aiocb. 1765 * 1766 */ 1767 if (get_udatamodel() == DATAMODEL_NATIVE) { 1768 (void) sulword(&((aio_result_t *)resultp)->aio_return, 1769 (ssize_t)-1); 1770 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1771 } 1772 #ifdef _SYSCALL32_IMPL 1773 else { 1774 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1775 (uint_t)-1); 1776 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1777 } 1778 #endif /* _SYSCALL32_IMPL */ 1779 } 1780 1781 /* 1782 * do cleanup completion for all requests in list. memory for 1783 * each request is also freed. 1784 */ 1785 static void 1786 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 1787 { 1788 int i; 1789 aio_req_t *reqp; 1790 aio_result_t *resultp; 1791 aiocb64_32_t *aiocb_64; 1792 1793 for (i = 0; i < nent; i++) { 1794 if (get_udatamodel() == DATAMODEL_NATIVE) { 1795 if (cbp[i] == NULL) 1796 continue; 1797 if (run_mode == AIO_LARGEFILE) { 1798 aiocb_64 = (aiocb64_32_t *)cbp[i]; 1799 resultp = (aio_result_t *)&aiocb_64-> 1800 aio_resultp; 1801 } else 1802 resultp = &cbp[i]->aio_resultp; 1803 } 1804 #ifdef _SYSCALL32_IMPL 1805 else { 1806 aiocb32_t *aiocb_32; 1807 caddr32_t *cbp32; 1808 1809 cbp32 = (caddr32_t *)cbp; 1810 if (cbp32[i] == NULL) 1811 continue; 1812 if (run_mode == AIO_32) { 1813 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 1814 resultp = (aio_result_t *)&aiocb_32-> 1815 aio_resultp; 1816 } else if (run_mode == AIO_LARGEFILE) { 1817 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 1818 resultp = (aio_result_t *)&aiocb_64-> 1819 aio_resultp; 1820 } 1821 } 1822 #endif /* _SYSCALL32_IMPL */ 1823 /* 1824 * we need to get the aio_cleanupq_mutex since we call 1825 * aio_req_done(). 1826 */ 1827 mutex_enter(&aiop->aio_cleanupq_mutex); 1828 mutex_enter(&aiop->aio_mutex); 1829 reqp = aio_req_done(resultp); 1830 mutex_exit(&aiop->aio_mutex); 1831 mutex_exit(&aiop->aio_cleanupq_mutex); 1832 if (reqp != NULL) { 1833 aphysio_unlock(reqp); 1834 aio_copyout_result(reqp); 1835 mutex_enter(&aiop->aio_mutex); 1836 aio_req_free(aiop, reqp); 1837 mutex_exit(&aiop->aio_mutex); 1838 } 1839 } 1840 } 1841 1842 /* 1843 * write out the results for an aio request that is 1844 * done. 1845 */ 1846 static int 1847 aioerror(void *cb, int run_mode) 1848 { 1849 aio_result_t *resultp; 1850 aio_t *aiop; 1851 aio_req_t *reqp; 1852 int retval; 1853 1854 aiop = curproc->p_aio; 1855 if (aiop == NULL || cb == NULL) 1856 return (EINVAL); 1857 1858 if (get_udatamodel() == DATAMODEL_NATIVE) { 1859 if (run_mode == AIO_LARGEFILE) 1860 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1861 aio_resultp; 1862 else 1863 resultp = &((aiocb_t *)cb)->aio_resultp; 1864 } 1865 #ifdef _SYSCALL32_IMPL 1866 else { 1867 if (run_mode == AIO_LARGEFILE) 1868 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1869 aio_resultp; 1870 else if (run_mode == AIO_32) 1871 resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 1872 aio_resultp; 1873 } 1874 #endif /* _SYSCALL32_IMPL */ 1875 /* 1876 * we need to get the aio_cleanupq_mutex since we call 1877 * aio_req_find(). 1878 */ 1879 mutex_enter(&aiop->aio_cleanupq_mutex); 1880 mutex_enter(&aiop->aio_mutex); 1881 retval = aio_req_find(resultp, &reqp); 1882 mutex_exit(&aiop->aio_mutex); 1883 mutex_exit(&aiop->aio_cleanupq_mutex); 1884 if (retval == 0) { 1885 aphysio_unlock(reqp); 1886 aio_copyout_result(reqp); 1887 mutex_enter(&aiop->aio_mutex); 1888 aio_req_free(aiop, reqp); 1889 mutex_exit(&aiop->aio_mutex); 1890 return (0); 1891 } else if (retval == 1) 1892 return (EINPROGRESS); 1893 else if (retval == 2) 1894 return (EINVAL); 1895 return (0); 1896 } 1897 1898 /* 1899 * aio_cancel - if no requests outstanding, 1900 * return AIO_ALLDONE 1901 * else 1902 * return AIO_NOTCANCELED 1903 */ 1904 static int 1905 aio_cancel( 1906 int fildes, 1907 void *cb, 1908 long *rval, 1909 int run_mode) 1910 { 1911 aio_t *aiop; 1912 void *resultp; 1913 int index; 1914 aio_req_t **bucket; 1915 aio_req_t *ent; 1916 1917 1918 /* 1919 * Verify valid file descriptor 1920 */ 1921 if ((getf(fildes)) == NULL) { 1922 return (EBADF); 1923 } 1924 releasef(fildes); 1925 1926 aiop = curproc->p_aio; 1927 if (aiop == NULL) 1928 return (EINVAL); 1929 1930 if (aiop->aio_outstanding == 0) { 1931 *rval = AIO_ALLDONE; 1932 return (0); 1933 } 1934 1935 mutex_enter(&aiop->aio_mutex); 1936 if (cb != NULL) { 1937 if (get_udatamodel() == DATAMODEL_NATIVE) { 1938 if (run_mode == AIO_LARGEFILE) 1939 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1940 ->aio_resultp; 1941 else 1942 resultp = &((aiocb_t *)cb)->aio_resultp; 1943 } 1944 #ifdef _SYSCALL32_IMPL 1945 else { 1946 if (run_mode == AIO_LARGEFILE) 1947 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1948 ->aio_resultp; 1949 else if (run_mode == AIO_32) 1950 resultp = (aio_result_t *)&((aiocb32_t *)cb) 1951 ->aio_resultp; 1952 } 1953 #endif /* _SYSCALL32_IMPL */ 1954 index = AIO_HASH(resultp); 1955 bucket = &aiop->aio_hash[index]; 1956 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1957 if (ent->aio_req_resultp == resultp) { 1958 if ((ent->aio_req_flags & AIO_PENDING) == 0) { 1959 mutex_exit(&aiop->aio_mutex); 1960 *rval = AIO_ALLDONE; 1961 return (0); 1962 } 1963 mutex_exit(&aiop->aio_mutex); 1964 *rval = AIO_NOTCANCELED; 1965 return (0); 1966 } 1967 } 1968 mutex_exit(&aiop->aio_mutex); 1969 *rval = AIO_ALLDONE; 1970 return (0); 1971 } 1972 1973 for (index = 0; index < AIO_HASHSZ; index++) { 1974 bucket = &aiop->aio_hash[index]; 1975 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1976 if (ent->aio_req_fd == fildes) { 1977 if ((ent->aio_req_flags & AIO_PENDING) != 0) { 1978 mutex_exit(&aiop->aio_mutex); 1979 *rval = AIO_NOTCANCELED; 1980 return (0); 1981 } 1982 } 1983 } 1984 } 1985 mutex_exit(&aiop->aio_mutex); 1986 *rval = AIO_ALLDONE; 1987 return (0); 1988 } 1989 1990 /* 1991 * solaris version of asynchronous read and write 1992 */ 1993 static int 1994 arw( 1995 int opcode, 1996 int fdes, 1997 char *bufp, 1998 int bufsize, 1999 offset_t offset, 2000 aio_result_t *resultp, 2001 int mode) 2002 { 2003 file_t *fp; 2004 int error; 2005 struct vnode *vp; 2006 aio_req_t *reqp; 2007 aio_t *aiop; 2008 int (*aio_func)(); 2009 #ifdef _LP64 2010 aiocb_t aiocb; 2011 #else 2012 aiocb64_32_t aiocb64; 2013 #endif 2014 2015 aiop = curproc->p_aio; 2016 if (aiop == NULL) 2017 return (EINVAL); 2018 2019 if ((fp = getf(fdes)) == NULL) { 2020 return (EBADF); 2021 } 2022 2023 /* 2024 * check the permission of the partition 2025 */ 2026 if ((fp->f_flag & mode) == 0) { 2027 releasef(fdes); 2028 return (EBADF); 2029 } 2030 2031 vp = fp->f_vnode; 2032 aio_func = check_vp(vp, mode); 2033 if (aio_func == NULL) { 2034 releasef(fdes); 2035 return (EBADFD); 2036 } 2037 #ifdef _LP64 2038 aiocb.aio_fildes = fdes; 2039 aiocb.aio_buf = bufp; 2040 aiocb.aio_nbytes = bufsize; 2041 aiocb.aio_offset = offset; 2042 aiocb.aio_sigevent.sigev_notify = 0; 2043 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, 0, vp); 2044 #else 2045 aiocb64.aio_fildes = fdes; 2046 aiocb64.aio_buf = (caddr32_t)bufp; 2047 aiocb64.aio_nbytes = bufsize; 2048 aiocb64.aio_offset = offset; 2049 aiocb64.aio_sigevent.sigev_notify = 0; 2050 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, 0, vp); 2051 #endif 2052 if (error) { 2053 releasef(fdes); 2054 return (error); 2055 } 2056 2057 /* 2058 * enable polling on this request if the opcode has 2059 * the AIO poll bit set 2060 */ 2061 if (opcode & AIO_POLL_BIT) 2062 reqp->aio_req_flags |= AIO_POLL; 2063 2064 if (bufsize == 0) { 2065 clear_active_fd(fdes); 2066 aio_zerolen(reqp); 2067 return (0); 2068 } 2069 /* 2070 * send the request to driver. 2071 * Clustering: If PXFS vnode, call PXFS function. 2072 */ 2073 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2074 /* 2075 * the fd is stored in the aio_req_t by aio_req_setup(), and 2076 * is released by the aio_cleanup_thread() when the IO has 2077 * completed. 2078 */ 2079 if (error) { 2080 releasef(fdes); 2081 mutex_enter(&aiop->aio_mutex); 2082 aio_req_free(aiop, reqp); 2083 aiop->aio_pending--; 2084 if (aiop->aio_flags & AIO_REQ_BLOCK) 2085 cv_signal(&aiop->aio_cleanupcv); 2086 mutex_exit(&aiop->aio_mutex); 2087 return (error); 2088 } 2089 clear_active_fd(fdes); 2090 return (0); 2091 } 2092 2093 /* 2094 * Take request out of the port pending queue ... 2095 */ 2096 2097 void 2098 aio_deq_port_pending(aio_t *aiop, aio_req_t *reqp) 2099 { 2100 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2101 if (reqp->aio_req_prev == NULL) 2102 /* first request */ 2103 aiop->aio_portpending = reqp->aio_req_next; 2104 else 2105 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2106 if (reqp->aio_req_next != NULL) 2107 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2108 } 2109 2110 /* 2111 * posix version of asynchronous read and write 2112 */ 2113 static int 2114 aiorw( 2115 int opcode, 2116 void *aiocb_arg, 2117 int mode, 2118 int run_mode) 2119 { 2120 #ifdef _SYSCALL32_IMPL 2121 aiocb32_t aiocb32; 2122 struct sigevent32 *sigev32; 2123 port_notify32_t pntfy32; 2124 #endif 2125 aiocb64_32_t aiocb64; 2126 aiocb_t aiocb; 2127 file_t *fp; 2128 int error, fd; 2129 size_t bufsize; 2130 struct vnode *vp; 2131 aio_req_t *reqp; 2132 aio_t *aiop; 2133 int (*aio_func)(); 2134 aio_result_t *resultp; 2135 struct sigevent *sigev; 2136 model_t model; 2137 int aio_use_port = 0; 2138 port_notify_t pntfy; 2139 2140 model = get_udatamodel(); 2141 aiop = curproc->p_aio; 2142 if (aiop == NULL) 2143 return (EINVAL); 2144 2145 if (model == DATAMODEL_NATIVE) { 2146 if (run_mode != AIO_LARGEFILE) { 2147 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 2148 return (EFAULT); 2149 bufsize = aiocb.aio_nbytes; 2150 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 2151 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 2152 return (EBADF); 2153 } 2154 sigev = &aiocb.aio_sigevent; 2155 } else { 2156 /* 2157 * We come here only when we make largefile 2158 * call on 32 bit kernel using 32 bit library. 2159 */ 2160 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2161 return (EFAULT); 2162 bufsize = aiocb64.aio_nbytes; 2163 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2164 ->aio_resultp); 2165 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) { 2166 return (EBADF); 2167 } 2168 sigev = (struct sigevent *)&aiocb64.aio_sigevent; 2169 } 2170 2171 if (sigev->sigev_notify == SIGEV_PORT) { 2172 if (copyin((void *)sigev->sigev_value.sival_ptr, 2173 &pntfy, sizeof (port_notify_t))) { 2174 releasef(fd); 2175 return (EFAULT); 2176 } 2177 aio_use_port = 1; 2178 } 2179 } 2180 #ifdef _SYSCALL32_IMPL 2181 else { 2182 if (run_mode == AIO_32) { 2183 /* 32 bit system call is being made on 64 bit kernel */ 2184 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 2185 return (EFAULT); 2186 2187 bufsize = aiocb32.aio_nbytes; 2188 aiocb_32ton(&aiocb32, &aiocb); 2189 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 2190 aio_resultp); 2191 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 2192 return (EBADF); 2193 } 2194 sigev32 = &aiocb32.aio_sigevent; 2195 } else if (run_mode == AIO_LARGEFILE) { 2196 /* 2197 * We come here only when we make largefile 2198 * call on 64 bit kernel using 32 bit library. 2199 */ 2200 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2201 return (EFAULT); 2202 bufsize = aiocb64.aio_nbytes; 2203 aiocb_LFton(&aiocb64, &aiocb); 2204 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2205 ->aio_resultp); 2206 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2207 return (EBADF); 2208 sigev32 = &aiocb64.aio_sigevent; 2209 } 2210 2211 if (sigev32->sigev_notify == SIGEV_PORT) { 2212 if (copyin( 2213 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 2214 &pntfy32, sizeof (port_notify32_t))) { 2215 releasef(fd); 2216 return (EFAULT); 2217 } 2218 pntfy.portnfy_port = pntfy32.portnfy_port; 2219 pntfy.portnfy_user = 2220 (void *)(uintptr_t)pntfy32.portnfy_user; 2221 aio_use_port = 1; 2222 } 2223 } 2224 #endif /* _SYSCALL32_IMPL */ 2225 2226 /* 2227 * check the permission of the partition 2228 */ 2229 2230 if ((fp->f_flag & mode) == 0) { 2231 releasef(fd); 2232 return (EBADF); 2233 } 2234 2235 vp = fp->f_vnode; 2236 aio_func = check_vp(vp, mode); 2237 if (aio_func == NULL) { 2238 releasef(fd); 2239 return (EBADFD); 2240 } 2241 if ((model == DATAMODEL_NATIVE) && (run_mode == AIO_LARGEFILE)) 2242 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, 2243 aio_use_port, vp); 2244 else 2245 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, 2246 aio_use_port, vp); 2247 2248 if (error) { 2249 releasef(fd); 2250 return (error); 2251 } 2252 /* 2253 * enable polling on this request if the opcode has 2254 * the AIO poll bit set 2255 */ 2256 if (opcode & AIO_POLL_BIT) 2257 reqp->aio_req_flags |= AIO_POLL; 2258 2259 if (model == DATAMODEL_NATIVE) 2260 reqp->aio_req_iocb.iocb = aiocb_arg; 2261 #ifdef _SYSCALL32_IMPL 2262 else 2263 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 2264 #endif 2265 2266 if (aio_use_port) 2267 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp); 2268 2269 /* 2270 * send the request to driver. 2271 * Clustering: If PXFS vnode, call PXFS function. 2272 */ 2273 if (error == 0) { 2274 if (bufsize == 0) { 2275 clear_active_fd(fd); 2276 aio_zerolen(reqp); 2277 return (0); 2278 } 2279 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2280 } 2281 2282 /* 2283 * the fd is stored in the aio_req_t by aio_req_setup(), and 2284 * is released by the aio_cleanup_thread() when the IO has 2285 * completed. 2286 */ 2287 if (error) { 2288 releasef(fd); 2289 mutex_enter(&aiop->aio_mutex); 2290 aio_deq_port_pending(aiop, reqp); 2291 aio_req_free(aiop, reqp); 2292 aiop->aio_pending--; 2293 if (aiop->aio_flags & AIO_REQ_BLOCK) 2294 cv_signal(&aiop->aio_cleanupcv); 2295 mutex_exit(&aiop->aio_mutex); 2296 return (error); 2297 } 2298 clear_active_fd(fd); 2299 return (0); 2300 } 2301 2302 2303 /* 2304 * set error for a list IO entry that failed. 2305 */ 2306 static void 2307 lio_set_error(aio_req_t *reqp) 2308 { 2309 aio_t *aiop = curproc->p_aio; 2310 2311 if (aiop == NULL) 2312 return; 2313 2314 mutex_enter(&aiop->aio_mutex); 2315 aio_deq_port_pending(aiop, reqp); 2316 aiop->aio_pending--; 2317 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 2318 reqp->aio_req_flags |= AIO_PHYSIODONE; 2319 /* 2320 * Need to free the request now as its never 2321 * going to get on the done queue 2322 * 2323 * Note: aio_outstanding is decremented in 2324 * aio_req_free() 2325 */ 2326 aio_req_free(aiop, reqp); 2327 if (aiop->aio_flags & AIO_REQ_BLOCK) 2328 cv_signal(&aiop->aio_cleanupcv); 2329 mutex_exit(&aiop->aio_mutex); 2330 } 2331 2332 /* 2333 * check if a specified request is done, and remove it from 2334 * the done queue. otherwise remove anybody from the done queue 2335 * if NULL is specified. 2336 */ 2337 static aio_req_t * 2338 aio_req_done(void *resultp) 2339 { 2340 aio_req_t **bucket; 2341 aio_req_t *ent; 2342 aio_t *aiop = curproc->p_aio; 2343 long index; 2344 2345 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2346 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2347 2348 if (resultp) { 2349 index = AIO_HASH(resultp); 2350 bucket = &aiop->aio_hash[index]; 2351 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2352 if (ent->aio_req_resultp == (aio_result_t *)resultp) { 2353 if (ent->aio_req_flags & AIO_DONEQ) { 2354 return (aio_req_remove(ent)); 2355 } 2356 return (NULL); 2357 } 2358 } 2359 /* no match, resultp is invalid */ 2360 return (NULL); 2361 } 2362 return (aio_req_remove(NULL)); 2363 } 2364 2365 /* 2366 * determine if a user-level resultp pointer is associated with an 2367 * active IO request. Zero is returned when the request is done, 2368 * and the request is removed from the done queue. Only when the 2369 * return value is zero, is the "reqp" pointer valid. One is returned 2370 * when the request is inprogress. Two is returned when the request 2371 * is invalid. 2372 */ 2373 static int 2374 aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 2375 { 2376 aio_req_t **bucket; 2377 aio_req_t *ent; 2378 aio_t *aiop = curproc->p_aio; 2379 long index; 2380 2381 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2382 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2383 2384 index = AIO_HASH(resultp); 2385 bucket = &aiop->aio_hash[index]; 2386 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2387 if (ent->aio_req_resultp == resultp) { 2388 if (ent->aio_req_flags & AIO_DONEQ) { 2389 *reqp = aio_req_remove(ent); 2390 return (0); 2391 } 2392 return (1); 2393 } 2394 } 2395 /* no match, resultp is invalid */ 2396 return (2); 2397 } 2398 2399 /* 2400 * remove a request from the done queue. 2401 */ 2402 static aio_req_t * 2403 aio_req_remove(aio_req_t *reqp) 2404 { 2405 aio_t *aiop = curproc->p_aio; 2406 aio_req_t *head; 2407 2408 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2409 2410 if (reqp) { 2411 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2412 if (reqp->aio_req_next == reqp) { 2413 /* only one request on queue */ 2414 if (reqp == aiop->aio_doneq) { 2415 aiop->aio_doneq = NULL; 2416 } else { 2417 ASSERT(reqp == aiop->aio_cleanupq); 2418 aiop->aio_cleanupq = NULL; 2419 } 2420 } else { 2421 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2422 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2423 /* 2424 * The request can be either on the aio_doneq or the 2425 * aio_cleanupq 2426 */ 2427 if (reqp == aiop->aio_doneq) 2428 aiop->aio_doneq = reqp->aio_req_next; 2429 2430 if (reqp == aiop->aio_cleanupq) 2431 aiop->aio_cleanupq = reqp->aio_req_next; 2432 } 2433 reqp->aio_req_flags &= ~AIO_DONEQ; 2434 return (reqp); 2435 } 2436 2437 if (aiop->aio_doneq) { 2438 head = aiop->aio_doneq; 2439 ASSERT(head->aio_req_flags & AIO_DONEQ); 2440 if (head == head->aio_req_next) { 2441 /* only one request on queue */ 2442 aiop->aio_doneq = NULL; 2443 } else { 2444 head->aio_req_prev->aio_req_next = head->aio_req_next; 2445 head->aio_req_next->aio_req_prev = head->aio_req_prev; 2446 aiop->aio_doneq = head->aio_req_next; 2447 } 2448 head->aio_req_flags &= ~AIO_DONEQ; 2449 return (head); 2450 } 2451 return (NULL); 2452 } 2453 2454 static int 2455 aio_req_setup( 2456 aio_req_t **reqpp, 2457 aio_t *aiop, 2458 aiocb_t *arg, 2459 aio_result_t *resultp, 2460 int port, 2461 vnode_t *vp) 2462 { 2463 aio_req_t *reqp; 2464 sigqueue_t *sqp; 2465 struct uio *uio; 2466 2467 struct sigevent *sigev; 2468 int error; 2469 2470 sigev = &arg->aio_sigevent; 2471 if ((sigev->sigev_notify == SIGEV_SIGNAL) && 2472 (sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG)) { 2473 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2474 if (sqp == NULL) 2475 return (EAGAIN); 2476 sqp->sq_func = NULL; 2477 sqp->sq_next = NULL; 2478 sqp->sq_info.si_code = SI_ASYNCIO; 2479 sqp->sq_info.si_pid = curproc->p_pid; 2480 sqp->sq_info.si_ctid = PRCTID(curproc); 2481 sqp->sq_info.si_zoneid = getzoneid(); 2482 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2483 sqp->sq_info.si_signo = sigev->sigev_signo; 2484 sqp->sq_info.si_value = sigev->sigev_value; 2485 } else 2486 sqp = NULL; 2487 2488 mutex_enter(&aiop->aio_mutex); 2489 2490 if (aiop->aio_flags & AIO_REQ_BLOCK) { 2491 mutex_exit(&aiop->aio_mutex); 2492 if (sqp) 2493 kmem_free(sqp, sizeof (sigqueue_t)); 2494 return (EIO); 2495 } 2496 /* 2497 * get an aio_reqp from the free list or allocate one 2498 * from dynamic memory. 2499 */ 2500 if (error = aio_req_alloc(&reqp, resultp)) { 2501 mutex_exit(&aiop->aio_mutex); 2502 if (sqp) 2503 kmem_free(sqp, sizeof (sigqueue_t)); 2504 return (error); 2505 } 2506 aiop->aio_pending++; 2507 aiop->aio_outstanding++; 2508 reqp->aio_req_flags = AIO_PENDING; 2509 if (port) 2510 aio_enq_port_pending(aiop, reqp); 2511 mutex_exit(&aiop->aio_mutex); 2512 /* 2513 * initialize aio request. 2514 */ 2515 reqp->aio_req_fd = arg->aio_fildes; 2516 reqp->aio_req_sigqp = sqp; 2517 reqp->aio_req_iocb.iocb = NULL; 2518 reqp->aio_req_buf.b_file = vp; 2519 uio = reqp->aio_req.aio_uio; 2520 uio->uio_iovcnt = 1; 2521 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 2522 uio->uio_iov->iov_len = arg->aio_nbytes; 2523 uio->uio_loffset = arg->aio_offset; 2524 *reqpp = reqp; 2525 return (0); 2526 } 2527 2528 /* 2529 * Allocate p_aio struct. 2530 */ 2531 static aio_t * 2532 aio_aiop_alloc(void) 2533 { 2534 aio_t *aiop; 2535 2536 ASSERT(MUTEX_HELD(&curproc->p_lock)); 2537 2538 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 2539 if (aiop) { 2540 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 2541 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 2542 NULL); 2543 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 2544 } 2545 return (aiop); 2546 } 2547 2548 /* 2549 * Allocate an aio_req struct. 2550 */ 2551 static int 2552 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 2553 { 2554 aio_req_t *reqp; 2555 aio_t *aiop = curproc->p_aio; 2556 2557 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2558 2559 if ((reqp = aiop->aio_free) != NULL) { 2560 reqp->aio_req_flags = 0; 2561 aiop->aio_free = reqp->aio_req_next; 2562 /* 2563 * Clustering:This field has to be specifically 2564 * set to null so that the right thing can be 2565 * done in aphysio() 2566 */ 2567 reqp->aio_req_buf.b_iodone = NULL; 2568 } else { 2569 /* 2570 * Check whether memory is getting tight. 2571 * This is a temporary mechanism to avoid memory 2572 * exhaustion by a single process until we come up 2573 * with a per process solution such as setrlimit(). 2574 */ 2575 if (freemem < desfree) 2576 return (EAGAIN); 2577 2578 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 2579 if (reqp == NULL) 2580 return (EAGAIN); 2581 reqp->aio_req.aio_uio = &(reqp->aio_req_uio); 2582 reqp->aio_req.aio_uio->uio_iov = &(reqp->aio_req_iov); 2583 reqp->aio_req.aio_private = reqp; 2584 } 2585 2586 reqp->aio_req_buf.b_offset = -1; 2587 reqp->aio_req_resultp = resultp; 2588 if (aio_hash_insert(reqp, aiop)) { 2589 reqp->aio_req_next = aiop->aio_free; 2590 aiop->aio_free = reqp; 2591 return (EINVAL); 2592 } 2593 *nreqp = reqp; 2594 return (0); 2595 } 2596 2597 /* 2598 * Allocate an aio_lio_t struct. 2599 */ 2600 static int 2601 aio_lio_alloc(aio_lio_t **head) 2602 { 2603 aio_lio_t *liop; 2604 aio_t *aiop = curproc->p_aio; 2605 2606 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2607 2608 if ((liop = aiop->aio_lio_free) != NULL) { 2609 aiop->aio_lio_free = liop->lio_next; 2610 } else { 2611 /* 2612 * Check whether memory is getting tight. 2613 * This is a temporary mechanism to avoid memory 2614 * exhaustion by a single process until we come up 2615 * with a per process solution such as setrlimit(). 2616 */ 2617 if (freemem < desfree) 2618 return (EAGAIN); 2619 2620 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 2621 if (liop == NULL) 2622 return (EAGAIN); 2623 } 2624 *head = liop; 2625 return (0); 2626 } 2627 2628 /* 2629 * this is a special per-process thread that is only activated if 2630 * the process is unmapping a segment with outstanding aio. normally, 2631 * the process will have completed the aio before unmapping the 2632 * segment. If the process does unmap a segment with outstanding aio, 2633 * this special thread will guarentee that the locked pages due to 2634 * aphysio() are released, thereby permitting the segment to be 2635 * unmapped. 2636 */ 2637 2638 static int 2639 aio_cleanup_thread(aio_t *aiop) 2640 { 2641 proc_t *p = curproc; 2642 struct as *as = p->p_as; 2643 int poked = 0; 2644 kcondvar_t *cvp; 2645 int exit_flag = 0; 2646 2647 sigfillset(&curthread->t_hold); 2648 sigdiffset(&curthread->t_hold, &cantmask); 2649 for (;;) { 2650 /* 2651 * if a segment is being unmapped, and the current 2652 * process's done queue is not empty, then every request 2653 * on the doneq with locked resources should be forced 2654 * to release their locks. By moving the doneq request 2655 * to the cleanupq, aio_cleanup() will process the cleanupq, 2656 * and place requests back onto the doneq. All requests 2657 * processed by aio_cleanup() will have their physical 2658 * resources unlocked. 2659 */ 2660 mutex_enter(&aiop->aio_mutex); 2661 if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 2662 aiop->aio_flags |= AIO_CLEANUP; 2663 mutex_enter(&as->a_contents); 2664 if (AS_ISUNMAPWAIT(as) && aiop->aio_doneq) { 2665 aio_req_t *doneqhead = aiop->aio_doneq; 2666 mutex_exit(&as->a_contents); 2667 aiop->aio_doneq = NULL; 2668 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 2669 } else { 2670 mutex_exit(&as->a_contents); 2671 } 2672 } 2673 mutex_exit(&aiop->aio_mutex); 2674 aio_cleanup(AIO_CLEANUP_THREAD); 2675 /* 2676 * thread should block on the cleanupcv while 2677 * AIO_CLEANUP is set. 2678 */ 2679 cvp = &aiop->aio_cleanupcv; 2680 mutex_enter(&aiop->aio_mutex); 2681 2682 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 2683 aiop->aio_notifyq != NULL || 2684 aiop->aio_portcleanupq != NULL) { 2685 mutex_exit(&aiop->aio_mutex); 2686 continue; 2687 } 2688 mutex_enter(&as->a_contents); 2689 2690 /* 2691 * AIO_CLEANUP determines when the cleanup thread 2692 * should be active. This flag is only set when 2693 * the cleanup thread is awakened by as_unmap(). 2694 * The flag is cleared when the blocking as_unmap() 2695 * that originally awakened us is allowed to 2696 * complete. as_unmap() blocks when trying to 2697 * unmap a segment that has SOFTLOCKed pages. when 2698 * the segment's pages are all SOFTUNLOCKed, 2699 * as->a_flags & AS_UNMAPWAIT should be zero. The flag 2700 * shouldn't be cleared right away if the cleanup thread 2701 * was interrupted because the process is doing forkall(). 2702 * This happens when cv_wait_sig() returns zero, 2703 * because it was awakened by a pokelwps(). If the 2704 * process is not exiting, it must be doing forkall(). 2705 */ 2706 if ((poked == 0) && 2707 ((AS_ISUNMAPWAIT(as) == 0) || (aiop->aio_pending == 0))) { 2708 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 2709 cvp = &as->a_cv; 2710 } 2711 mutex_exit(&aiop->aio_mutex); 2712 if (poked) { 2713 /* 2714 * If the process is exiting/killed, don't return 2715 * immediately without waiting for pending I/O's 2716 * and releasing the page locks. 2717 */ 2718 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2719 /* 2720 * If exit_flag is set, then it is 2721 * safe to exit because we have released 2722 * page locks of completed I/O's. 2723 */ 2724 if (exit_flag) 2725 break; 2726 2727 mutex_exit(&as->a_contents); 2728 2729 /* 2730 * Wait for all the pending aio to complete. 2731 */ 2732 mutex_enter(&aiop->aio_mutex); 2733 aiop->aio_flags |= AIO_REQ_BLOCK; 2734 while (aiop->aio_pending != 0) 2735 cv_wait(&aiop->aio_cleanupcv, 2736 &aiop->aio_mutex); 2737 mutex_exit(&aiop->aio_mutex); 2738 exit_flag = 1; 2739 continue; 2740 } else if (p->p_flag & 2741 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 2742 /* 2743 * hold LWP until it 2744 * is continued. 2745 */ 2746 mutex_exit(&as->a_contents); 2747 mutex_enter(&p->p_lock); 2748 stop(PR_SUSPENDED, SUSPEND_NORMAL); 2749 mutex_exit(&p->p_lock); 2750 poked = 0; 2751 continue; 2752 } 2753 } else { 2754 /* 2755 * When started this thread will sleep on as->a_cv. 2756 * as_unmap will awake this thread if the 2757 * segment has SOFTLOCKed pages (poked = 0). 2758 * 1. pokelwps() awakes this thread => 2759 * break the loop to check SEXITLWPS, SHOLDFORK, etc 2760 * 2. as_unmap awakes this thread => 2761 * to break the loop it is necessary that 2762 * - AS_UNMAPWAIT is set (as_unmap is waiting for 2763 * memory to be unlocked) 2764 * - some transactions are still pending 2765 * - AIO_CLEANUP is not set 2766 * (if AIO_CLEANUP is set we have to wait for 2767 * pending requests. aio_done will send a signal 2768 * for every request which completes to continue 2769 * unmapping the corresponding address range) 2770 */ 2771 while (poked == 0) { 2772 if ((AS_ISUNMAPWAIT(as) != 0) && 2773 (aiop->aio_pending != 0) && 2774 ((aiop->aio_flags & AIO_CLEANUP) == 0)) 2775 break; 2776 poked = !cv_wait_sig(cvp, &as->a_contents); 2777 if (AS_ISUNMAPWAIT(as) == 0) 2778 cv_signal(cvp); 2779 if (aiop->aio_outstanding != 0) 2780 break; 2781 } 2782 } 2783 mutex_exit(&as->a_contents); 2784 } 2785 exit: 2786 mutex_exit(&as->a_contents); 2787 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 2788 aston(curthread); /* make thread do post_syscall */ 2789 return (0); 2790 } 2791 2792 /* 2793 * save a reference to a user's outstanding aio in a hash list. 2794 */ 2795 static int 2796 aio_hash_insert( 2797 aio_req_t *aio_reqp, 2798 aio_t *aiop) 2799 { 2800 long index; 2801 aio_result_t *resultp = aio_reqp->aio_req_resultp; 2802 aio_req_t *current; 2803 aio_req_t **nextp; 2804 2805 index = AIO_HASH(resultp); 2806 nextp = &aiop->aio_hash[index]; 2807 while ((current = *nextp) != NULL) { 2808 if (current->aio_req_resultp == resultp) 2809 return (DUPLICATE); 2810 nextp = ¤t->aio_hash_next; 2811 } 2812 *nextp = aio_reqp; 2813 aio_reqp->aio_hash_next = NULL; 2814 return (0); 2815 } 2816 2817 static int 2818 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 2819 cred_t *) 2820 { 2821 struct snode *sp; 2822 dev_t dev; 2823 struct cb_ops *cb; 2824 major_t major; 2825 int (*aio_func)(); 2826 2827 dev = vp->v_rdev; 2828 major = getmajor(dev); 2829 2830 /* 2831 * return NULL for requests to files and STREAMs so 2832 * that libaio takes care of them. 2833 */ 2834 if (vp->v_type == VCHR) { 2835 /* no stream device for kaio */ 2836 if (STREAMSTAB(major)) { 2837 return (NULL); 2838 } 2839 } else { 2840 return (NULL); 2841 } 2842 2843 /* 2844 * Check old drivers which do not have async I/O entry points. 2845 */ 2846 if (devopsp[major]->devo_rev < 3) 2847 return (NULL); 2848 2849 cb = devopsp[major]->devo_cb_ops; 2850 2851 if (cb->cb_rev < 1) 2852 return (NULL); 2853 2854 /* 2855 * Check whether this device is a block device. 2856 * Kaio is not supported for devices like tty. 2857 */ 2858 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 2859 return (NULL); 2860 2861 /* 2862 * Clustering: If vnode is a PXFS vnode, then the device may be remote. 2863 * We cannot call the driver directly. Instead return the 2864 * PXFS functions. 2865 */ 2866 2867 if (IS_PXFSVP(vp)) { 2868 if (mode & FREAD) 2869 return (clpxfs_aio_read); 2870 else 2871 return (clpxfs_aio_write); 2872 } 2873 if (mode & FREAD) 2874 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 2875 else 2876 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 2877 2878 /* 2879 * Do we need this ? 2880 * nodev returns ENXIO anyway. 2881 */ 2882 if (aio_func == nodev) 2883 return (NULL); 2884 2885 sp = VTOS(vp); 2886 smark(sp, SACC); 2887 return (aio_func); 2888 } 2889 2890 /* 2891 * Clustering: We want check_vp to return a function prototyped 2892 * correctly that will be common to both PXFS and regular case. 2893 * We define this intermediate function that will do the right 2894 * thing for driver cases. 2895 */ 2896 2897 static int 2898 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2899 { 2900 dev_t dev; 2901 struct cb_ops *cb; 2902 2903 ASSERT(vp->v_type == VCHR); 2904 ASSERT(!IS_PXFSVP(vp)); 2905 dev = VTOS(vp)->s_dev; 2906 ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 2907 2908 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2909 2910 ASSERT(cb->cb_awrite != nodev); 2911 return ((*cb->cb_awrite)(dev, aio, cred_p)); 2912 } 2913 2914 /* 2915 * Clustering: We want check_vp to return a function prototyped 2916 * correctly that will be common to both PXFS and regular case. 2917 * We define this intermediate function that will do the right 2918 * thing for driver cases. 2919 */ 2920 2921 static int 2922 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2923 { 2924 dev_t dev; 2925 struct cb_ops *cb; 2926 2927 ASSERT(vp->v_type == VCHR); 2928 ASSERT(!IS_PXFSVP(vp)); 2929 dev = VTOS(vp)->s_dev; 2930 ASSERT(!STREAMSTAB(getmajor(dev))); 2931 2932 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2933 2934 ASSERT(cb->cb_aread != nodev); 2935 return ((*cb->cb_aread)(dev, aio, cred_p)); 2936 } 2937 2938 /* 2939 * This routine is called when a largefile call is made by a 32bit 2940 * process on a ILP32 or LP64 kernel. All 64bit processes are large 2941 * file by definition and will call alio() instead. 2942 */ 2943 static int 2944 alioLF( 2945 int mode_arg, 2946 void *aiocb_arg, 2947 int nent, 2948 void *sigev) 2949 { 2950 file_t *fp; 2951 file_t *prev_fp = NULL; 2952 int prev_mode = -1; 2953 struct vnode *vp; 2954 aio_lio_t *head; 2955 aio_req_t *reqp; 2956 aio_t *aiop; 2957 caddr_t cbplist; 2958 aiocb64_32_t *cbp; 2959 caddr32_t *ucbp; 2960 aiocb64_32_t cb64; 2961 aiocb64_32_t *aiocb = &cb64; 2962 #ifdef _LP64 2963 aiocb_t aiocb_n; 2964 #endif 2965 struct sigevent32 sigevk; 2966 sigqueue_t *sqp; 2967 int (*aio_func)(); 2968 int mode; 2969 int error = 0, aio_errors = 0; 2970 int i; 2971 size_t ssize; 2972 int deadhead = 0; 2973 int aio_notsupported = 0; 2974 int aio_use_port = 0; 2975 port_kevent_t *pkevtp = NULL; 2976 port_notify32_t pnotify; 2977 2978 aiop = curproc->p_aio; 2979 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 2980 return (EINVAL); 2981 2982 ASSERT(get_udatamodel() == DATAMODEL_ILP32); 2983 2984 ssize = (sizeof (caddr32_t) * nent); 2985 cbplist = kmem_alloc(ssize, KM_SLEEP); 2986 ucbp = (caddr32_t *)cbplist; 2987 2988 if (copyin(aiocb_arg, cbplist, ssize)) { 2989 kmem_free(cbplist, ssize); 2990 return (EFAULT); 2991 } 2992 2993 if (sigev) { 2994 if (copyin(sigev, &sigevk, sizeof (sigevk))) { 2995 kmem_free(cbplist, ssize); 2996 return (EFAULT); 2997 } 2998 } 2999 3000 /* 3001 * a list head should be allocated if notification is 3002 * enabled for this list. 3003 */ 3004 head = NULL; 3005 3006 /* Event Ports */ 3007 3008 if (sigev && sigevk.sigev_notify == SIGEV_PORT) { 3009 /* Use PORT for completion notification */ 3010 if (copyin((void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 3011 &pnotify, sizeof (port_notify32_t))) { 3012 kmem_free(cbplist, ssize); 3013 return (EFAULT); 3014 } 3015 /* use event ports for the list of aiocbs */ 3016 aio_use_port = 1; 3017 error = port_alloc_event(pnotify.portnfy_port, 3018 PORT_ALLOC_PRIVATE, PORT_SOURCE_AIO, &pkevtp); 3019 if (error) { 3020 if (error == ENOMEM) 3021 error = EAGAIN; 3022 kmem_free(cbplist, ssize); 3023 return (error); 3024 } 3025 } else if ((mode_arg == LIO_WAIT) || sigev) { 3026 mutex_enter(&aiop->aio_mutex); 3027 error = aio_lio_alloc(&head); 3028 mutex_exit(&aiop->aio_mutex); 3029 if (error) 3030 goto done; 3031 deadhead = 1; 3032 head->lio_nent = nent; 3033 head->lio_refcnt = nent; 3034 if (sigev && (sigevk.sigev_notify == SIGEV_SIGNAL) && 3035 (sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG)) { 3036 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3037 if (sqp == NULL) { 3038 error = EAGAIN; 3039 goto done; 3040 } 3041 sqp->sq_func = NULL; 3042 sqp->sq_next = NULL; 3043 sqp->sq_info.si_code = SI_ASYNCIO; 3044 sqp->sq_info.si_pid = curproc->p_pid; 3045 sqp->sq_info.si_ctid = PRCTID(curproc); 3046 sqp->sq_info.si_zoneid = getzoneid(); 3047 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3048 sqp->sq_info.si_signo = sigevk.sigev_signo; 3049 sqp->sq_info.si_value.sival_int = 3050 sigevk.sigev_value.sival_int; 3051 head->lio_sigqp = sqp; 3052 } else { 3053 head->lio_sigqp = NULL; 3054 } 3055 } 3056 3057 for (i = 0; i < nent; i++, ucbp++) { 3058 3059 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 3060 /* skip entry if it can't be copied. */ 3061 if (cbp == NULL || copyin(cbp, aiocb, sizeof (aiocb64_32_t))) { 3062 if (head) { 3063 mutex_enter(&aiop->aio_mutex); 3064 head->lio_nent--; 3065 head->lio_refcnt--; 3066 mutex_exit(&aiop->aio_mutex); 3067 } 3068 continue; 3069 } 3070 3071 /* skip if opcode for aiocb is LIO_NOP */ 3072 3073 mode = aiocb->aio_lio_opcode; 3074 if (mode == LIO_NOP) { 3075 cbp = NULL; 3076 if (head) { 3077 mutex_enter(&aiop->aio_mutex); 3078 head->lio_nent--; 3079 head->lio_refcnt--; 3080 mutex_exit(&aiop->aio_mutex); 3081 } 3082 continue; 3083 } 3084 3085 /* increment file descriptor's ref count. */ 3086 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3087 lio_set_uerror(&cbp->aio_resultp, EBADF); 3088 if (head) { 3089 mutex_enter(&aiop->aio_mutex); 3090 head->lio_nent--; 3091 head->lio_refcnt--; 3092 mutex_exit(&aiop->aio_mutex); 3093 } 3094 aio_errors++; 3095 continue; 3096 } 3097 3098 vp = fp->f_vnode; 3099 3100 /* 3101 * check the permission of the partition 3102 */ 3103 mode = aiocb->aio_lio_opcode; 3104 if ((fp->f_flag & mode) == 0) { 3105 releasef(aiocb->aio_fildes); 3106 lio_set_uerror(&cbp->aio_resultp, EBADF); 3107 if (head) { 3108 mutex_enter(&aiop->aio_mutex); 3109 head->lio_nent--; 3110 head->lio_refcnt--; 3111 mutex_exit(&aiop->aio_mutex); 3112 } 3113 aio_errors++; 3114 continue; 3115 } 3116 3117 /* 3118 * common case where requests are to the same fd 3119 * for the same r/w operation 3120 * for UFS, need to set EBADFD 3121 */ 3122 if ((fp != prev_fp) || (mode != prev_mode)) { 3123 aio_func = check_vp(vp, mode); 3124 if (aio_func == NULL) { 3125 prev_fp = NULL; 3126 releasef(aiocb->aio_fildes); 3127 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3128 aio_notsupported++; 3129 if (head) { 3130 mutex_enter(&aiop->aio_mutex); 3131 head->lio_nent--; 3132 head->lio_refcnt--; 3133 mutex_exit(&aiop->aio_mutex); 3134 } 3135 continue; 3136 } else { 3137 prev_fp = fp; 3138 prev_mode = mode; 3139 } 3140 } 3141 #ifdef _LP64 3142 aiocb_LFton(aiocb, &aiocb_n); 3143 error = aio_req_setup(&reqp, aiop, &aiocb_n, 3144 (aio_result_t *)&cbp->aio_resultp, aio_use_port, vp); 3145 #else 3146 error = aio_req_setupLF(&reqp, aiop, aiocb, 3147 (aio_result_t *)&cbp->aio_resultp, aio_use_port, vp); 3148 #endif /* _LP64 */ 3149 if (error) { 3150 releasef(aiocb->aio_fildes); 3151 if (head) { 3152 mutex_enter(&aiop->aio_mutex); 3153 head->lio_nent--; 3154 head->lio_refcnt--; 3155 mutex_exit(&aiop->aio_mutex); 3156 } 3157 aio_errors++; 3158 continue; 3159 } 3160 3161 reqp->aio_req_lio = head; 3162 deadhead = 0; 3163 3164 /* 3165 * Set the errno field now before sending the request to 3166 * the driver to avoid a race condition 3167 */ 3168 (void) suword32(&cbp->aio_resultp.aio_errno, 3169 EINPROGRESS); 3170 3171 reqp->aio_req_iocb.iocb32 = *ucbp; 3172 3173 if (aio_use_port) { 3174 reqp->aio_req_port = pnotify.portnfy_port; 3175 error = aio_req_assoc_port32(&aiocb->aio_sigevent, 3176 (void *)(uintptr_t)pnotify.portnfy_user, 3177 (aiocb_t *)(uintptr_t)*ucbp, reqp, pkevtp); 3178 } 3179 3180 /* 3181 * send the request to driver. 3182 * Clustering: If PXFS vnode, call PXFS function. 3183 */ 3184 if (error == 0) { 3185 if (aiocb->aio_nbytes == 0) { 3186 clear_active_fd(aiocb->aio_fildes); 3187 aio_zerolen(reqp); 3188 continue; 3189 } 3190 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3191 CRED()); 3192 } 3193 3194 /* 3195 * the fd's ref count is not decremented until the IO has 3196 * completed unless there was an error. 3197 */ 3198 if (error) { 3199 releasef(aiocb->aio_fildes); 3200 lio_set_uerror(&cbp->aio_resultp, error); 3201 if (head) { 3202 mutex_enter(&aiop->aio_mutex); 3203 head->lio_nent--; 3204 head->lio_refcnt--; 3205 mutex_exit(&aiop->aio_mutex); 3206 } 3207 if (error == ENOTSUP) 3208 aio_notsupported++; 3209 else 3210 aio_errors++; 3211 lio_set_error(reqp); 3212 } else { 3213 clear_active_fd(aiocb->aio_fildes); 3214 } 3215 } 3216 3217 if (pkevtp) 3218 port_free_event(pkevtp); 3219 3220 if (aio_notsupported) { 3221 error = ENOTSUP; 3222 } else if (aio_errors) { 3223 /* 3224 * return EIO if any request failed 3225 */ 3226 error = EIO; 3227 } 3228 3229 if (mode_arg == LIO_WAIT) { 3230 mutex_enter(&aiop->aio_mutex); 3231 while (head->lio_refcnt > 0) { 3232 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3233 mutex_exit(&aiop->aio_mutex); 3234 error = EINTR; 3235 goto done; 3236 } 3237 } 3238 mutex_exit(&aiop->aio_mutex); 3239 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 3240 } 3241 3242 done: 3243 kmem_free(cbplist, ssize); 3244 if (deadhead) { 3245 if (head->lio_sigqp) 3246 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3247 kmem_free(head, sizeof (aio_lio_t)); 3248 } 3249 return (error); 3250 } 3251 3252 #ifdef _SYSCALL32_IMPL 3253 static void 3254 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 3255 { 3256 dest->aio_fildes = src->aio_fildes; 3257 dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 3258 dest->aio_nbytes = (size_t)src->aio_nbytes; 3259 dest->aio_offset = (off_t)src->aio_offset; 3260 dest->aio_reqprio = src->aio_reqprio; 3261 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3262 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3263 3264 /* 3265 * See comment in sigqueue32() on handling of 32-bit 3266 * sigvals in a 64-bit kernel. 3267 */ 3268 dest->aio_sigevent.sigev_value.sival_int = 3269 (int)src->aio_sigevent.sigev_value.sival_int; 3270 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3271 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3272 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3273 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3274 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3275 dest->aio_lio_opcode = src->aio_lio_opcode; 3276 dest->aio_state = src->aio_state; 3277 dest->aio__pad[0] = src->aio__pad[0]; 3278 } 3279 #endif 3280 3281 /* 3282 * This function is used only for largefile calls made by 3283 * 32 bit applications on 32 bit kernel. 3284 */ 3285 static int 3286 aio_req_setupLF( 3287 aio_req_t **reqpp, 3288 aio_t *aiop, 3289 aiocb64_32_t *arg, 3290 aio_result_t *resultp, 3291 int port, 3292 vnode_t *vp) 3293 { 3294 aio_req_t *reqp; 3295 sigqueue_t *sqp; 3296 struct uio *uio; 3297 3298 struct sigevent *sigev; 3299 int error; 3300 3301 sigev = (struct sigevent *)&arg->aio_sigevent; 3302 if ((sigev->sigev_notify == SIGEV_SIGNAL) && 3303 (sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG)) { 3304 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3305 if (sqp == NULL) 3306 return (EAGAIN); 3307 sqp->sq_func = NULL; 3308 sqp->sq_next = NULL; 3309 sqp->sq_info.si_code = SI_ASYNCIO; 3310 sqp->sq_info.si_pid = curproc->p_pid; 3311 sqp->sq_info.si_ctid = PRCTID(curproc); 3312 sqp->sq_info.si_zoneid = getzoneid(); 3313 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3314 sqp->sq_info.si_signo = sigev->sigev_signo; 3315 sqp->sq_info.si_value = sigev->sigev_value; 3316 } else 3317 sqp = NULL; 3318 3319 mutex_enter(&aiop->aio_mutex); 3320 3321 if (aiop->aio_flags & AIO_REQ_BLOCK) { 3322 mutex_exit(&aiop->aio_mutex); 3323 if (sqp) 3324 kmem_free(sqp, sizeof (sigqueue_t)); 3325 return (EIO); 3326 } 3327 /* 3328 * get an aio_reqp from the free list or allocate one 3329 * from dynamic memory. 3330 */ 3331 if (error = aio_req_alloc(&reqp, resultp)) { 3332 mutex_exit(&aiop->aio_mutex); 3333 if (sqp) 3334 kmem_free(sqp, sizeof (sigqueue_t)); 3335 return (error); 3336 } 3337 aiop->aio_pending++; 3338 aiop->aio_outstanding++; 3339 reqp->aio_req_flags = AIO_PENDING; 3340 if (port) 3341 aio_enq_port_pending(aiop, reqp); 3342 mutex_exit(&aiop->aio_mutex); 3343 /* 3344 * initialize aio request. 3345 */ 3346 reqp->aio_req_fd = arg->aio_fildes; 3347 reqp->aio_req_sigqp = sqp; 3348 reqp->aio_req_iocb.iocb = NULL; 3349 reqp->aio_req_buf.b_file = vp; 3350 uio = reqp->aio_req.aio_uio; 3351 uio->uio_iovcnt = 1; 3352 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 3353 uio->uio_iov->iov_len = arg->aio_nbytes; 3354 uio->uio_loffset = arg->aio_offset; 3355 *reqpp = reqp; 3356 return (0); 3357 } 3358 3359 /* 3360 * This routine is called when a non largefile call is made by a 32bit 3361 * process on a ILP32 or LP64 kernel. 3362 */ 3363 static int 3364 alio32( 3365 int mode_arg, 3366 void *aiocb_arg, 3367 int nent, 3368 void *sigev_arg) 3369 { 3370 file_t *fp; 3371 file_t *prev_fp = NULL; 3372 int prev_mode = -1; 3373 struct vnode *vp; 3374 aio_lio_t *head; 3375 aio_req_t *reqp; 3376 aio_t *aiop; 3377 aiocb_t cb; 3378 aiocb_t *aiocb = &cb; 3379 caddr_t cbplist; 3380 #ifdef _LP64 3381 aiocb32_t *cbp; 3382 caddr32_t *ucbp; 3383 aiocb32_t cb32; 3384 aiocb32_t *aiocb32 = &cb32; 3385 struct sigevent32 sigev; 3386 #else 3387 aiocb_t *cbp, **ucbp; 3388 struct sigevent sigev; 3389 #endif 3390 sigqueue_t *sqp; 3391 int (*aio_func)(); 3392 int mode; 3393 int error = 0, aio_errors = 0; 3394 int i; 3395 size_t ssize; 3396 int deadhead = 0; 3397 int aio_notsupported = 0; 3398 int aio_use_port = 0; 3399 port_kevent_t *pkevtp = NULL; 3400 #ifdef _LP64 3401 port_notify32_t pnotify; 3402 #else 3403 port_notify_t pnotify; 3404 #endif 3405 aiop = curproc->p_aio; 3406 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 3407 return (EINVAL); 3408 3409 #ifdef _LP64 3410 ssize = (sizeof (caddr32_t) * nent); 3411 #else 3412 ssize = (sizeof (aiocb_t *) * nent); 3413 #endif 3414 cbplist = kmem_alloc(ssize, KM_SLEEP); 3415 ucbp = (void *)cbplist; 3416 3417 if (copyin(aiocb_arg, cbplist, ssize)) { 3418 kmem_free(cbplist, ssize); 3419 return (EFAULT); 3420 } 3421 3422 if (sigev_arg) { 3423 if (copyin(sigev_arg, &sigev, sizeof (struct sigevent32))) { 3424 kmem_free(cbplist, ssize); 3425 return (EFAULT); 3426 } 3427 } 3428 3429 /* 3430 * a list head should be allocated if notification is 3431 * enabled for this list. 3432 */ 3433 head = NULL; 3434 3435 /* Event Ports */ 3436 3437 if (sigev_arg && sigev.sigev_notify == SIGEV_PORT) { 3438 /* Use PORT for completion notification */ 3439 if (copyin((void *)(uintptr_t)sigev.sigev_value.sival_ptr, 3440 &pnotify, sizeof (port_notify32_t))) { 3441 kmem_free(cbplist, ssize); 3442 return (EFAULT); 3443 } 3444 /* use event ports for the list of aiocbs */ 3445 aio_use_port = 1; 3446 error = port_alloc_event(pnotify.portnfy_port, 3447 PORT_ALLOC_PRIVATE, PORT_SOURCE_AIO, &pkevtp); 3448 if (error) { 3449 if ((error == ENOMEM) || (error == EAGAIN)) 3450 error = EAGAIN; 3451 else 3452 error = EINVAL; 3453 kmem_free(cbplist, ssize); 3454 return (error); 3455 } 3456 } else if ((mode_arg == LIO_WAIT) || sigev_arg) { 3457 mutex_enter(&aiop->aio_mutex); 3458 error = aio_lio_alloc(&head); 3459 mutex_exit(&aiop->aio_mutex); 3460 if (error) 3461 goto done; 3462 deadhead = 1; 3463 head->lio_nent = nent; 3464 head->lio_refcnt = nent; 3465 if (sigev_arg && (sigev.sigev_notify == SIGEV_SIGNAL) && 3466 (sigev.sigev_signo > 0 && sigev.sigev_signo < NSIG)) { 3467 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3468 if (sqp == NULL) { 3469 error = EAGAIN; 3470 goto done; 3471 } 3472 sqp->sq_func = NULL; 3473 sqp->sq_next = NULL; 3474 sqp->sq_info.si_code = SI_ASYNCIO; 3475 sqp->sq_info.si_pid = curproc->p_pid; 3476 sqp->sq_info.si_ctid = PRCTID(curproc); 3477 sqp->sq_info.si_zoneid = getzoneid(); 3478 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3479 sqp->sq_info.si_signo = sigev.sigev_signo; 3480 sqp->sq_info.si_value.sival_int = 3481 sigev.sigev_value.sival_int; 3482 head->lio_sigqp = sqp; 3483 } else { 3484 head->lio_sigqp = NULL; 3485 } 3486 } 3487 3488 for (i = 0; i < nent; i++, ucbp++) { 3489 3490 /* skip entry if it can't be copied. */ 3491 #ifdef _LP64 3492 cbp = (aiocb32_t *)(uintptr_t)*ucbp; 3493 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (aiocb32_t))) { 3494 #else 3495 cbp = (aiocb_t *)*ucbp; 3496 if (cbp == NULL || copyin(cbp, aiocb, sizeof (aiocb_t))) { 3497 #endif 3498 if (head) { 3499 mutex_enter(&aiop->aio_mutex); 3500 head->lio_nent--; 3501 head->lio_refcnt--; 3502 mutex_exit(&aiop->aio_mutex); 3503 } 3504 continue; 3505 } 3506 #ifdef _LP64 3507 /* 3508 * copy 32 bit structure into 64 bit structure 3509 */ 3510 aiocb_32ton(aiocb32, aiocb); 3511 #endif /* _LP64 */ 3512 3513 /* skip if opcode for aiocb is LIO_NOP */ 3514 3515 mode = aiocb->aio_lio_opcode; 3516 if (mode == LIO_NOP) { 3517 cbp = NULL; 3518 if (head) { 3519 mutex_enter(&aiop->aio_mutex); 3520 head->lio_nent--; 3521 head->lio_refcnt--; 3522 mutex_exit(&aiop->aio_mutex); 3523 } 3524 continue; 3525 } 3526 3527 /* increment file descriptor's ref count. */ 3528 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3529 lio_set_uerror(&cbp->aio_resultp, EBADF); 3530 if (head) { 3531 mutex_enter(&aiop->aio_mutex); 3532 head->lio_nent--; 3533 head->lio_refcnt--; 3534 mutex_exit(&aiop->aio_mutex); 3535 } 3536 aio_errors++; 3537 continue; 3538 } 3539 3540 vp = fp->f_vnode; 3541 3542 /* 3543 * check the permission of the partition 3544 */ 3545 mode = aiocb->aio_lio_opcode; 3546 if ((fp->f_flag & mode) == 0) { 3547 releasef(aiocb->aio_fildes); 3548 lio_set_uerror(&cbp->aio_resultp, EBADF); 3549 if (head) { 3550 mutex_enter(&aiop->aio_mutex); 3551 head->lio_nent--; 3552 head->lio_refcnt--; 3553 mutex_exit(&aiop->aio_mutex); 3554 } 3555 aio_errors++; 3556 continue; 3557 } 3558 3559 /* 3560 * common case where requests are to the same fd 3561 * for the same r/w operation 3562 * for UFS, need to set EBADFD 3563 */ 3564 if ((fp != prev_fp) || (mode != prev_mode)) { 3565 aio_func = check_vp(vp, mode); 3566 if (aio_func == NULL) { 3567 prev_fp = NULL; 3568 releasef(aiocb->aio_fildes); 3569 lio_set_uerror(&cbp->aio_resultp, 3570 EBADFD); 3571 aio_notsupported++; 3572 if (head) { 3573 mutex_enter(&aiop->aio_mutex); 3574 head->lio_nent--; 3575 head->lio_refcnt--; 3576 mutex_exit(&aiop->aio_mutex); 3577 } 3578 continue; 3579 } else { 3580 prev_fp = fp; 3581 prev_mode = mode; 3582 } 3583 } 3584 if (error = aio_req_setup(&reqp, aiop, aiocb, 3585 (aio_result_t *)&cbp->aio_resultp, aio_use_port, vp)) { 3586 releasef(aiocb->aio_fildes); 3587 lio_set_uerror(&cbp->aio_resultp, error); 3588 if (head) { 3589 mutex_enter(&aiop->aio_mutex); 3590 head->lio_nent--; 3591 head->lio_refcnt--; 3592 mutex_exit(&aiop->aio_mutex); 3593 } 3594 aio_errors++; 3595 continue; 3596 } 3597 3598 reqp->aio_req_lio = head; 3599 deadhead = 0; 3600 3601 /* 3602 * Set the errno field now before sending the request to 3603 * the driver to avoid a race condition 3604 */ 3605 (void) suword32(&cbp->aio_resultp.aio_errno, 3606 EINPROGRESS); 3607 3608 reqp->aio_req_iocb.iocb32 = ((caddr32_t *)cbplist)[i]; 3609 3610 if (aio_use_port) { 3611 reqp->aio_req_port = pnotify.portnfy_port; 3612 #ifdef _LP64 3613 error = aio_req_assoc_port32(&aiocb32->aio_sigevent, 3614 (void *)(uintptr_t)pnotify.portnfy_user, 3615 (aiocb_t *)(uintptr_t)(((caddr32_t *)cbplist)[i]), 3616 reqp, pkevtp); 3617 #else 3618 error = aio_req_assoc_port(&aiocb->aio_sigevent, 3619 pnotify.portnfy_user, 3620 (aiocb_t *)(((caddr32_t *)cbplist)[i]), 3621 reqp, pkevtp); 3622 #endif 3623 } 3624 3625 /* 3626 * send the request to driver. 3627 * Clustering: If PXFS vnode, call PXFS function. 3628 */ 3629 if (error == 0) { 3630 if (aiocb->aio_nbytes == 0) { 3631 clear_active_fd(aiocb->aio_fildes); 3632 aio_zerolen(reqp); 3633 continue; 3634 } 3635 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3636 CRED()); 3637 } 3638 3639 /* 3640 * the fd's ref count is not decremented until the IO has 3641 * completed unless there was an error. 3642 */ 3643 if (error) { 3644 releasef(aiocb->aio_fildes); 3645 lio_set_uerror(&cbp->aio_resultp, error); 3646 if (head) { 3647 mutex_enter(&aiop->aio_mutex); 3648 head->lio_nent--; 3649 head->lio_refcnt--; 3650 mutex_exit(&aiop->aio_mutex); 3651 } 3652 if (error == ENOTSUP) 3653 aio_notsupported++; 3654 else 3655 aio_errors++; 3656 lio_set_error(reqp); 3657 } else { 3658 clear_active_fd(aiocb->aio_fildes); 3659 } 3660 } 3661 3662 if (pkevtp) 3663 port_free_event(pkevtp); 3664 3665 if (aio_notsupported) { 3666 error = ENOTSUP; 3667 } else if (aio_errors) { 3668 /* 3669 * return EIO if any request failed 3670 */ 3671 error = EIO; 3672 } 3673 3674 if (mode_arg == LIO_WAIT) { 3675 mutex_enter(&aiop->aio_mutex); 3676 while (head->lio_refcnt > 0) { 3677 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3678 mutex_exit(&aiop->aio_mutex); 3679 error = EINTR; 3680 goto done; 3681 } 3682 } 3683 mutex_exit(&aiop->aio_mutex); 3684 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 3685 } 3686 3687 done: 3688 kmem_free(cbplist, ssize); 3689 if (deadhead) { 3690 if (head->lio_sigqp) 3691 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3692 kmem_free(head, sizeof (aio_lio_t)); 3693 } 3694 return (error); 3695 } 3696 3697 3698 #ifdef _SYSCALL32_IMPL 3699 void 3700 aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 3701 { 3702 dest->aio_fildes = src->aio_fildes; 3703 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 3704 dest->aio_nbytes = (size_t)src->aio_nbytes; 3705 dest->aio_offset = (off_t)src->aio_offset; 3706 dest->aio_reqprio = src->aio_reqprio; 3707 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3708 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3709 3710 /* 3711 * See comment in sigqueue32() on handling of 32-bit 3712 * sigvals in a 64-bit kernel. 3713 */ 3714 dest->aio_sigevent.sigev_value.sival_int = 3715 (int)src->aio_sigevent.sigev_value.sival_int; 3716 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3717 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3718 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3719 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3720 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3721 dest->aio_lio_opcode = src->aio_lio_opcode; 3722 dest->aio_state = src->aio_state; 3723 dest->aio__pad[0] = src->aio__pad[0]; 3724 } 3725 #endif /* _SYSCALL32_IMPL */ 3726 3727 /* 3728 * aio_port_callback() is called just before the event is retrieved from the 3729 * port. The task of this callback function is to finish the work of the 3730 * transaction for the application, it means : 3731 * - copyout transaction data to the application 3732 * (this thread is running in the right process context) 3733 * - keep trace of the transaction (update of counters). 3734 * - free allocated buffers 3735 * The aiocb pointer is the object element of the port_kevent_t structure. 3736 * 3737 * flag : 3738 * PORT_CALLBACK_DEFAULT : do copyout and free resources 3739 * PORT_CALLBACK_CLOSE : don't do copyout, free resources 3740 */ 3741 3742 /*ARGSUSED*/ 3743 int 3744 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 3745 { 3746 aio_t *aiop = curproc->p_aio; 3747 aio_req_t *reqp = arg; 3748 struct iovec *iov; 3749 struct buf *bp; 3750 void *resultp; 3751 3752 if (pid != curproc->p_pid) { 3753 /* wrong proc !!, can not deliver data here ... */ 3754 return (EACCES); 3755 } 3756 3757 mutex_enter(&aiop->aio_portq_mutex); 3758 reqp->aio_req_portkev = NULL; 3759 aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 3760 mutex_exit(&aiop->aio_portq_mutex); 3761 aphysio_unlock(reqp); /* unlock used pages */ 3762 mutex_enter(&aiop->aio_mutex); 3763 if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 3764 aio_req_free_port(aiop, reqp); /* back to free list */ 3765 mutex_exit(&aiop->aio_mutex); 3766 return (0); 3767 } 3768 3769 iov = reqp->aio_req_uio.uio_iov; 3770 bp = &reqp->aio_req_buf; 3771 resultp = (void *)reqp->aio_req_resultp; 3772 aio_req_free_port(aiop, reqp); /* request struct back to free list */ 3773 mutex_exit(&aiop->aio_mutex); 3774 if (flag == PORT_CALLBACK_DEFAULT) 3775 aio_copyout_result_port(iov, bp, resultp); 3776 return (0); 3777 } 3778