1 /* 2 * Copyright (c) 1997 John S. Dyson. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. John S. Dyson's name may not be used to endorse or promote products 10 * derived from this software without specific prior written permission. 11 * 12 * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13 * bad that happens because of using this software isn't the responsibility 14 * of the author. This software is distributed AS-IS. 15 * 16 * $Id: vfs_aio.c,v 1.3 1997/07/17 04:49:31 dyson Exp $ 17 */ 18 19 /* 20 * This file contains support for the POSIX.4 AIO facility. 21 * 22 * The initial version provides only the (bogus) synchronous semantics 23 * but will support async in the future. Note that a bit 24 * in a private field allows the user mode subroutine to adapt 25 * the kernel operations to true POSIX.4 for future compatibility. 26 * 27 * This code is used to support true POSIX.4 AIO/LIO with the help 28 * of a user mode subroutine package. Note that eventually more support 29 * will be pushed into the kernel. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/sysproto.h> 35 #include <sys/filedesc.h> 36 #include <sys/kernel.h> 37 #include <sys/fcntl.h> 38 #include <sys/file.h> 39 #include <sys/unistd.h> 40 #include <sys/vnode.h> 41 #include <sys/proc.h> 42 #include <sys/uio.h> 43 #include <sys/malloc.h> 44 #include <sys/signalvar.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_param.h> 48 #include <vm/vm_extern.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_map.h> 51 #include <sys/aio.h> 52 #include <sys/shm.h> 53 54 #include <machine/cpu.h> 55 56 #define AIOCBLIST_CANCELLED 0x1 57 #define AIOCBLIST_RUNDOWN 0x4 58 #define AIOCBLIST_ASYNCFREE 0x8 59 #define AIOCBLIST_SUSPEND 0x10 60 61 #if 0 62 #define DEBUGAIO 63 #define DIAGNOSTIC 64 #endif 65 66 static int jobrefid; 67 68 #define JOBST_NULL 0x0 69 #define JOBST_JOBQPROC 0x1 70 #define JOBST_JOBQGLOBAL 0x2 71 #define JOBST_JOBRUNNING 0x3 72 #define JOBST_JOBFINISHED 0x4 73 74 #define MAX_AIO_PER_PROC 32 75 #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ 76 #define MAX_AIO_PROCS 128 77 #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ 78 #define TARGET_AIO_PROCS 64 79 80 /* 81 * Job queue item 82 */ 83 struct aiocblist { 84 TAILQ_ENTRY (aiocblist) list; /* List of jobs */ 85 TAILQ_ENTRY (aiocblist) plist; /* List of jobs for proc */ 86 int jobflags; 87 int jobstate; 88 struct proc *userproc; /* User process */ 89 struct aioproclist *jobaioproc; /* AIO process descriptor */ 90 struct aiocb uaiocb; /* Kernel I/O control block */ 91 }; 92 93 #define AIOP_FREE 0x1 /* proc on free queue */ 94 /* 95 * AIO process info 96 */ 97 struct aioproclist { 98 int aioprocflags; /* AIO proc flags */ 99 TAILQ_ENTRY(aioproclist) list; /* List of processes */ 100 struct proc *aioproc; /* The AIO thread */ 101 TAILQ_HEAD (,aiocblist) jobtorun; /* suggested job to run */ 102 }; 103 104 struct kaioinfo { 105 int kaio_maxactive_count; /* maximum number of AIOs */ 106 int kaio_active_count; /* number of currently used AIOs */ 107 int kaio_qallowed_count; /* maxiumu size of AIO queue */ 108 int kaio_queue_count; /* size of AIO queue */ 109 TAILQ_HEAD (,aiocblist) kaio_jobqueue; /* job queue for process */ 110 TAILQ_HEAD (,aiocblist) kaio_jobdone; /* done queue for process */ 111 }; 112 113 TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc; 114 TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list */ 115 TAILQ_HEAD(,aiocblist) aio_freejobs; 116 117 int max_aio_procs = MAX_AIO_PROCS; 118 int num_aio_procs = 0; 119 int target_aio_procs = TARGET_AIO_PROCS; 120 121 int max_queue_count = MAX_AIO_QUEUE; 122 int num_queue_count = 0; 123 124 void aio_init_aioinfo(struct proc *p) ; 125 void aio_onceonly(void *) ; 126 int aio_free_entry(struct aiocblist *aiocbe); 127 void aio_cancel_internal(struct aiocblist *aiocbe); 128 void aio_process(struct aiocblist *aiocbe); 129 void pmap_newvmspace(struct vmspace *); 130 static int aio_newproc(void) ; 131 static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ; 132 static void aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) ; 133 134 SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL); 135 136 /* 137 * Startup initialization 138 */ 139 void 140 aio_onceonly(void *na) { 141 TAILQ_INIT(&aio_freeproc); 142 TAILQ_INIT(&aio_activeproc); 143 TAILQ_INIT(&aio_jobs); 144 TAILQ_INIT(&aio_freejobs); 145 } 146 147 /* 148 * Init the per-process aioinfo structure. 149 */ 150 void 151 aio_init_aioinfo(struct proc *p) { 152 struct kaioinfo *ki; 153 if (p->p_aioinfo == NULL) { 154 ki = malloc(sizeof (struct kaioinfo), M_AIO, M_WAITOK); 155 p->p_aioinfo = ki; 156 ki->kaio_maxactive_count = MAX_AIO_PER_PROC; 157 ki->kaio_active_count = 0; 158 ki->kaio_qallowed_count = MAX_AIO_QUEUE_PER_PROC; 159 ki->kaio_queue_count = 0; 160 TAILQ_INIT(&ki->kaio_jobdone); 161 TAILQ_INIT(&ki->kaio_jobqueue); 162 } 163 } 164 165 /* 166 * Free a job entry. Wait for completion if it is currently 167 * active, but don't delay forever. If we delay, we return 168 * a flag that says that we have to restart the queue scan. 169 */ 170 int 171 aio_free_entry(struct aiocblist *aiocbe) { 172 struct kaioinfo *ki; 173 struct aioproclist *aiop; 174 struct proc *p; 175 176 if (aiocbe->jobstate == JOBST_NULL) 177 panic("aio_free_entry: freeing already free job"); 178 179 p = aiocbe->userproc; 180 ki = p->p_aioinfo; 181 if (ki == NULL) 182 panic("aio_free_entry: missing p->p_aioinfo"); 183 184 if (aiocbe->jobstate == JOBST_JOBRUNNING) { 185 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) 186 return 0; 187 aiocbe->jobflags |= AIOCBLIST_RUNDOWN; 188 if (tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", hz*5)) { 189 aiocbe->jobflags |= AIOCBLIST_ASYNCFREE; 190 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 191 return 1; 192 } 193 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 194 } 195 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 196 197 if (ki->kaio_queue_count <= 0) 198 panic("aio_free_entry: process queue size <= 0"); 199 if (num_queue_count <= 0) 200 panic("aio_free_entry: system wide queue size <= 0"); 201 202 --ki->kaio_queue_count; 203 --num_queue_count; 204 205 if ( aiocbe->jobstate == JOBST_JOBQPROC) { 206 aiop = aiocbe->jobaioproc; 207 TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 208 } else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) { 209 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 210 } else if ( aiocbe->jobstate == JOBST_JOBFINISHED) { 211 ki = p->p_aioinfo; 212 TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist); 213 } 214 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 215 aiocbe->jobstate = JOBST_NULL; 216 return 0; 217 } 218 219 /* 220 * Rundown the jobs for a given process. 221 */ 222 void 223 aio_proc_rundown(struct proc *p) { 224 struct kaioinfo *ki; 225 struct aiocblist *aiocbe, *aiocbn; 226 227 ki = p->p_aioinfo; 228 if (ki == NULL) 229 return; 230 231 restart1: 232 for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone); 233 aiocbe; 234 aiocbe = aiocbn) { 235 aiocbn = TAILQ_NEXT(aiocbe, plist); 236 if (aio_free_entry(aiocbe)) 237 goto restart1; 238 } 239 240 restart2: 241 for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 242 aiocbe; 243 aiocbe = aiocbn) { 244 aiocbn = TAILQ_NEXT(aiocbe, plist); 245 if (aio_free_entry(aiocbe)) 246 goto restart2; 247 } 248 free(ki, M_AIO); 249 } 250 251 /* 252 * Select a job to run (called by an AIO daemon) 253 */ 254 static struct aiocblist * 255 aio_selectjob(struct aioproclist *aiop) { 256 257 struct aiocblist *aiocbe; 258 259 aiocbe = TAILQ_FIRST(&aiop->jobtorun); 260 if (aiocbe) { 261 TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 262 return aiocbe; 263 } 264 265 for (aiocbe = TAILQ_FIRST(&aio_jobs); 266 aiocbe; 267 aiocbe = TAILQ_NEXT(aiocbe, list)) { 268 struct kaioinfo *ki; 269 struct proc *userp; 270 271 userp = aiocbe->userproc; 272 ki = userp->p_aioinfo; 273 274 if (ki->kaio_active_count < ki->kaio_maxactive_count) { 275 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 276 return aiocbe; 277 } 278 } 279 280 return NULL; 281 } 282 283 /* 284 * The AIO activity proper. 285 */ 286 void 287 aio_process(struct aiocblist *aiocbe) { 288 struct filedesc *fdp; 289 struct proc *userp; 290 struct aiocb *cb; 291 struct file *fp; 292 struct uio auio; 293 struct iovec aiov; 294 unsigned int fd; 295 int cnt; 296 int error; 297 298 userp = aiocbe->userproc; 299 cb = &aiocbe->uaiocb; 300 301 #ifdef DEBUGAIO 302 printf("fd: %d, offset: 0x%x, address: 0x%x, size: %d\n", 303 cb->aio_fildes, (int) cb->aio_offset, 304 cb->aio_buf, cb->aio_nbytes); 305 tsleep(curproc, PVM, "aioprc", hz); 306 #endif 307 fdp = curproc->p_fd; 308 /* 309 * Range check file descriptor 310 */ 311 fd = cb->aio_fildes; 312 fp = fdp->fd_ofiles[fd]; 313 314 aiov.iov_base = cb->aio_buf; 315 aiov.iov_len = cb->aio_nbytes; 316 317 auio.uio_iov = &aiov; 318 auio.uio_iovcnt = 1; 319 auio.uio_offset = cb->aio_offset; 320 auio.uio_resid = cb->aio_nbytes; 321 cnt = cb->aio_nbytes; 322 auio.uio_segflg = UIO_USERSPACE; 323 auio.uio_procp = curproc; 324 325 if (cb->aio_lio_opcode == LIO_READ) { 326 auio.uio_rw = UIO_READ; 327 error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 328 } else { 329 auio.uio_rw = UIO_WRITE; 330 error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 331 } 332 333 if (error) { 334 if (auio.uio_resid != cnt) { 335 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 336 error = 0; 337 if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) 338 psignal(userp, SIGPIPE); 339 } 340 } 341 342 cnt -= auio.uio_resid; 343 cb->_aiocb_private.error = error; 344 cb->_aiocb_private.status = cnt; 345 346 return; 347 348 } 349 350 /* 351 * The AIO daemon. 352 */ 353 static void 354 aio_startproc(void *uproc) 355 { 356 struct aioproclist *aiop; 357 358 /* 359 * Allocate and ready the aio control info 360 */ 361 aiop = malloc(sizeof *aiop, M_AIO, M_WAITOK); 362 aiop->aioproc = curproc; 363 aiop->aioprocflags |= AIOP_FREE; 364 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 365 TAILQ_INIT(&aiop->jobtorun); 366 367 /* 368 * Get rid of current address space 369 */ 370 if (curproc->p_vmspace->vm_refcnt == 1) { 371 if (curproc->p_vmspace->vm_shm) 372 shmexit(curproc); 373 pmap_remove_pages(&curproc->p_vmspace->vm_pmap, 0, USRSTACK); 374 vm_map_remove(&curproc->p_vmspace->vm_map, 0, USRSTACK); 375 } else { 376 vmspace_exec(curproc); 377 } 378 379 /* 380 * Make up a name for the daemon 381 */ 382 strcpy(curproc->p_comm, "aiodaemon"); 383 384 /* 385 * Get rid of our current filedescriptors 386 */ 387 fdfree(curproc); 388 curproc->p_fd = NULL; 389 curproc->p_ucred = crcopy(curproc->p_ucred); 390 curproc->p_ucred->cr_uid = 0; 391 curproc->p_ucred->cr_groups[0] = 1; 392 curproc->p_flag |= P_SYSTEM; 393 394 #ifdef DEBUGAIO 395 printf("Started new process: %d\n", curproc->p_pid); 396 #endif 397 wakeup(uproc); 398 399 while(1) { 400 struct vmspace *myvm, *tmpvm; 401 struct proc *cp = curproc; 402 struct proc *up = NULL; 403 struct aiocblist *aiocbe; 404 405 if ((aiop->aioprocflags & AIOP_FREE) == 0) { 406 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 407 aiop->aioprocflags |= AIOP_FREE; 408 } 409 tsleep(curproc, PZERO, "aiordy", 0); 410 if (aiop->aioprocflags & AIOP_FREE) { 411 TAILQ_REMOVE(&aio_freeproc, aiop, list); 412 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 413 aiop->aioprocflags &= ~AIOP_FREE; 414 } 415 416 myvm = curproc->p_vmspace; 417 418 while ( aiocbe = aio_selectjob(aiop)) { 419 struct aiocb *cb; 420 struct kaioinfo *ki; 421 struct proc *userp; 422 423 cb = &aiocbe->uaiocb; 424 userp = aiocbe->userproc; 425 ki = userp->p_aioinfo; 426 427 aiocbe->jobstate = JOBST_JOBRUNNING; 428 if (userp != cp) { 429 tmpvm = curproc->p_vmspace; 430 curproc->p_vmspace = userp->p_vmspace; 431 ++curproc->p_vmspace->vm_refcnt; 432 pmap_activate(curproc); 433 if (tmpvm != myvm) { 434 vmspace_free(tmpvm); 435 } 436 if (curproc->p_fd) 437 fdfree(curproc); 438 curproc->p_fd = fdshare(userp); 439 cp = userp; 440 } 441 442 ki->kaio_active_count++; 443 aiocbe->jobaioproc = aiop; 444 aio_process(aiocbe); 445 --ki->kaio_active_count; 446 447 aiocbe->jobstate = JOBST_JOBFINISHED; 448 449 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) { 450 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 451 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 452 } else { 453 TAILQ_REMOVE(&ki->kaio_jobqueue, 454 aiocbe, plist); 455 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, 456 aiocbe, plist); 457 } 458 459 if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) { 460 wakeup(aiocbe); 461 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 462 } 463 464 if (aiocbe->jobflags & AIOCBLIST_SUSPEND) { 465 wakeup(userp); 466 aiocbe->jobflags &= ~AIOCBLIST_SUSPEND; 467 } 468 469 if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 470 psignal(userp, cb->aio_sigevent.sigev_signo); 471 } 472 } 473 474 if (cp != curproc) { 475 tmpvm = curproc->p_vmspace; 476 curproc->p_vmspace = myvm; 477 pmap_activate(curproc); 478 vmspace_free(tmpvm); 479 if (curproc->p_fd) 480 fdfree(curproc); 481 curproc->p_fd = NULL; 482 cp = curproc; 483 } 484 } 485 } 486 487 /* 488 * Create a new AIO daemon. 489 */ 490 static int 491 aio_newproc() { 492 int error; 493 int rval[2]; 494 struct rfork_args rfa; 495 struct proc *p; 496 497 rfa.flags = RFMEM | RFPROC | RFCFDG; 498 499 if (error = rfork(curproc, &rfa, &rval[0])) 500 return error; 501 502 cpu_set_fork_handler(p = pfind(rval[0]), aio_startproc, curproc); 503 504 #ifdef DEBUGAIO 505 printf("Waiting for new process: %d, count: %d\n", 506 curproc->p_pid, num_aio_procs); 507 #endif 508 509 error = tsleep(curproc, PZERO, "aiosta", 5*hz); 510 ++num_aio_procs; 511 512 return error; 513 514 } 515 516 /* 517 * Queue a new AIO request. 518 */ 519 static int 520 _aio_aqueue(struct proc *p, struct aiocb *job, int type) { 521 struct filedesc *fdp; 522 struct file *fp; 523 unsigned int fd; 524 525 int error; 526 int opcode; 527 struct aiocblist *aiocbe; 528 struct aioproclist *aiop; 529 struct kaioinfo *ki; 530 531 if (aiocbe = TAILQ_FIRST(&aio_freejobs)) { 532 TAILQ_REMOVE(&aio_freejobs, aiocbe, list); 533 } else { 534 aiocbe = malloc (sizeof *aiocbe, M_AIO, M_WAITOK); 535 } 536 537 error = copyin((caddr_t)job, 538 (caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb); 539 if (error) { 540 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 541 return error; 542 } 543 544 545 /* 546 * Get the fd info for process 547 */ 548 fdp = p->p_fd; 549 550 /* 551 * Range check file descriptor 552 */ 553 fd = aiocbe->uaiocb.aio_fildes; 554 if (fd >= fdp->fd_nfiles) { 555 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 556 if (type == 0) { 557 suword(&job->_aiocb_private.status, -1); 558 suword(&job->_aiocb_private.error, EBADF); 559 } 560 return EBADF; 561 } 562 563 fp = fdp->fd_ofiles[fd]; 564 if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0)) { 565 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 566 if (type == 0) { 567 suword(&job->_aiocb_private.status, -1); 568 suword(&job->_aiocb_private.error, EBADF); 569 } 570 return EBADF; 571 } 572 573 if (aiocbe->uaiocb.aio_offset == -1LL) { 574 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 575 if (type == 0) { 576 suword(&job->_aiocb_private.status, -1); 577 suword(&job->_aiocb_private.error, EINVAL); 578 } 579 return EINVAL; 580 } 581 582 #ifdef DEBUGAIO 583 printf("job addr: 0x%x, 0x%x, %d\n", job, &job->_aiocb_private.kernelinfo, jobrefid); 584 #endif 585 586 error = suword(&job->_aiocb_private.kernelinfo, jobrefid); 587 if (error) { 588 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 589 if (type == 0) { 590 suword(&job->_aiocb_private.status, -1); 591 suword(&job->_aiocb_private.error, EINVAL); 592 } 593 return error; 594 } 595 596 aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)jobrefid; 597 #ifdef DEBUGAIO 598 printf("aio_aqueue: New job: %d... ", jobrefid); 599 #endif 600 ++jobrefid; 601 602 if (type != LIO_NOP) { 603 aiocbe->uaiocb.aio_lio_opcode = type; 604 } 605 606 opcode = aiocbe->uaiocb.aio_lio_opcode; 607 if (opcode == LIO_NOP) { 608 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 609 if (type == 0) { 610 suword(&job->_aiocb_private.status, -1); 611 suword(&job->_aiocb_private.error, 0); 612 } 613 return 0; 614 } 615 616 if ((opcode != LIO_NOP) && 617 (opcode != LIO_READ) && (opcode != LIO_WRITE)) { 618 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 619 if (type == 0) { 620 suword(&job->_aiocb_private.status, -1); 621 suword(&job->_aiocb_private.error, EINVAL); 622 } 623 return EINVAL; 624 } 625 626 suword(&job->_aiocb_private.error, 0); 627 suword(&job->_aiocb_private.status, 0); 628 aiocbe->userproc = p; 629 aiocbe->jobflags = 0; 630 ki = p->p_aioinfo; 631 ++num_queue_count; 632 ++ki->kaio_queue_count; 633 634 retryproc: 635 if (aiop = TAILQ_FIRST(&aio_freeproc)) { 636 #ifdef DEBUGAIO 637 printf("found a free AIO process\n"); 638 #endif 639 TAILQ_REMOVE(&aio_freeproc, aiop, list); 640 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 641 aiop->aioprocflags &= ~AIOP_FREE; 642 TAILQ_INSERT_TAIL(&aiop->jobtorun, aiocbe, list); 643 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 644 aiocbe->jobstate = JOBST_JOBQPROC; 645 aiocbe->jobaioproc = aiop; 646 wakeup(aiop->aioproc); 647 } else if ((num_aio_procs < max_aio_procs) && 648 (ki->kaio_active_count < ki->kaio_maxactive_count)) { 649 if (error = aio_newproc()) { 650 #ifdef DEBUGAIO 651 printf("aio_aqueue: problem sleeping for starting proc: %d\n", 652 error); 653 #endif 654 } 655 goto retryproc; 656 } else { 657 #ifdef DEBUGAIO 658 printf("queuing to global queue\n"); 659 #endif 660 TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); 661 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 662 aiocbe->jobstate = JOBST_JOBQGLOBAL; 663 } 664 665 return 0; 666 } 667 668 static int 669 aio_aqueue(struct proc *p, struct aiocb *job, int type) { 670 struct kaioinfo *ki; 671 672 if (p->p_aioinfo == NULL) { 673 aio_init_aioinfo(p); 674 } 675 676 if (num_queue_count >= max_queue_count) 677 return EAGAIN; 678 679 ki = p->p_aioinfo; 680 if (ki->kaio_queue_count >= ki->kaio_qallowed_count) 681 return EAGAIN; 682 683 return _aio_aqueue(p, job, type); 684 } 685 686 /* 687 * Support the aio_return system call 688 */ 689 int 690 aio_return(struct proc *p, struct aio_return_args *uap, int *retval) { 691 int jobref, status; 692 struct aiocblist *cb; 693 struct kaioinfo *ki; 694 struct proc *userp; 695 696 ki = p->p_aioinfo; 697 if (ki == NULL) { 698 return EINVAL; 699 } 700 701 jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 702 if (jobref == -1) 703 return EINVAL; 704 705 706 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 707 cb; 708 cb = TAILQ_NEXT(cb, plist)) { 709 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 710 retval[0] = cb->uaiocb._aiocb_private.status; 711 aio_free_entry(cb); 712 return 0; 713 } 714 } 715 716 status = fuword(&uap->aiocbp->_aiocb_private.status); 717 if (status == -1) 718 return 0; 719 720 return (EINVAL); 721 } 722 723 /* 724 * Rundown the jobs for a given process. 725 */ 726 void 727 aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) { 728 struct aiocblist *aiocbe; 729 struct kaioinfo *ki; 730 731 ki = p->p_aioinfo; 732 if (ki == NULL) 733 return; 734 735 for (aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 736 aiocbe; 737 aiocbe = TAILQ_NEXT(aiocbe, plist)) { 738 739 if (njobs) { 740 741 int i; 742 743 for(i = 0; i < njobs; i++) { 744 if (((int) aiocbe->uaiocb._aiocb_private.kernelinfo) == joblist[i]) 745 break; 746 } 747 748 if (i == njobs) 749 continue; 750 } 751 752 if (set) 753 aiocbe->jobflags |= AIOCBLIST_SUSPEND; 754 else 755 aiocbe->jobflags &= ~AIOCBLIST_SUSPEND; 756 } 757 } 758 759 /* 760 * Allow a process to wakeup when any of the I/O requests are 761 * completed. 762 */ 763 int 764 aio_suspend(struct proc *p, struct aio_suspend_args *uap, int *retval) { 765 struct timeval atv, utv; 766 struct timespec ts; 767 struct aiocb *const *cbptr, *cbp; 768 struct kaioinfo *ki; 769 struct aiocblist *cb; 770 int i; 771 int error, s, timo; 772 int *joblist; 773 774 775 timo = 0; 776 if (uap->timeout) { 777 /* 778 * Get timespec struct 779 */ 780 if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) { 781 return error; 782 } 783 784 if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) 785 return (EINVAL); 786 787 TIMESPEC_TO_TIMEVAL(&atv, &ts) 788 if (itimerfix(&atv)) 789 return (EINVAL); 790 /* 791 * XXX this is not as careful as settimeofday() about minimising 792 * interrupt latency. The hzto() interface is inconvenient as usual. 793 */ 794 s = splclock(); 795 timevaladd(&atv, &time); 796 timo = hzto(&atv); 797 splx(s); 798 if (timo == 0) 799 timo = 1; 800 } 801 802 ki = p->p_aioinfo; 803 if (ki == NULL) 804 return EAGAIN; 805 806 joblist = malloc(uap->nent * sizeof(int), M_TEMP, M_WAITOK); 807 cbptr = uap->aiocbp; 808 809 for(i=0;i<uap->nent;i++) { 810 cbp = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 811 #ifdef DEBUGAIO 812 printf("cbp: %x\n", cbp); 813 #endif 814 joblist[i] = fuword(&cbp->_aiocb_private.kernelinfo); 815 cbptr++; 816 } 817 818 #ifdef DEBUGAIO 819 printf("Suspend, timeout: %d clocks, jobs:", timo); 820 for(i=0;i<uap->nent;i++) 821 printf(" %d", joblist[i]); 822 printf("\n"); 823 #endif 824 825 while (1) { 826 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 827 cb; 828 cb = TAILQ_NEXT(cb, plist)) { 829 for(i=0;i<uap->nent;i++) { 830 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == joblist[i]) { 831 free(joblist, M_TEMP); 832 return 0; 833 } 834 } 835 } 836 837 aio_marksuspend(p, uap->nent, joblist, 1); 838 #ifdef DEBUGAIO 839 printf("Suspending -- waiting for all I/O's to complete: "); 840 for(i=0;i<uap->nent;i++) 841 printf(" %d", joblist[i]); 842 printf("\n"); 843 #endif 844 error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo); 845 aio_marksuspend(p, uap->nent, joblist, 0); 846 847 if (error == EINTR) { 848 #ifdef DEBUGAIO 849 printf(" signal\n"); 850 #endif 851 free(joblist, M_TEMP); 852 return EINTR; 853 } else if (error == EWOULDBLOCK) { 854 #ifdef DEBUGAIO 855 printf(" timeout\n"); 856 #endif 857 free(joblist, M_TEMP); 858 return EAGAIN; 859 } 860 #ifdef DEBUGAIO 861 printf("\n"); 862 #endif 863 } 864 865 /* NOTREACHED */ 866 return EINVAL; 867 } 868 869 /* 870 * aio_cancel at the kernel level is a NOOP right now. It 871 * might be possible to support it partially in user mode, or 872 * in kernel mode later on. 873 */ 874 int 875 aio_cancel(struct proc *p, struct aio_cancel_args *uap, int *retval) { 876 return AIO_NOTCANCELLED; 877 } 878 879 /* 880 * aio_error is implemented in the kernel level for compatibility 881 * purposes only. For a user mode async implementation, it would be 882 * best to do it in a userland subroutine. 883 */ 884 int 885 aio_error(struct proc *p, struct aio_error_args *uap, int *retval) { 886 int activeflag, errorcode; 887 struct aiocblist *cb; 888 struct kaioinfo *ki; 889 int jobref; 890 int error, status; 891 892 ki = p->p_aioinfo; 893 if (ki == NULL) 894 return EINVAL; 895 896 jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 897 if (jobref == -1) 898 return EFAULT; 899 900 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 901 cb; 902 cb = TAILQ_NEXT(cb, plist)) { 903 904 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 905 retval[0] = cb->uaiocb._aiocb_private.error; 906 return 0; 907 } 908 } 909 910 for (cb = TAILQ_FIRST(&ki->kaio_jobqueue); 911 cb; 912 cb = TAILQ_NEXT(cb, plist)) { 913 914 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 915 retval[0] = EINPROGRESS; 916 return 0; 917 } 918 } 919 920 /* 921 * Hack for lio 922 */ 923 status = fuword(&uap->aiocbp->_aiocb_private.status); 924 if (status == -1) { 925 return fuword(&uap->aiocbp->_aiocb_private.error); 926 } 927 return EINVAL; 928 } 929 930 int 931 aio_read(struct proc *p, struct aio_read_args *uap, int *retval) { 932 struct filedesc *fdp; 933 struct file *fp; 934 struct uio auio; 935 struct iovec aiov; 936 unsigned int fd; 937 int cnt; 938 struct aiocb iocb; 939 int error, pmodes; 940 941 pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 942 if ((pmodes & AIO_PMODE_SYNC) == 0) { 943 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 944 } 945 946 /* 947 * Get control block 948 */ 949 if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 950 return error; 951 952 /* 953 * Get the fd info for process 954 */ 955 fdp = p->p_fd; 956 957 /* 958 * Range check file descriptor 959 */ 960 fd = iocb.aio_fildes; 961 if (fd >= fdp->fd_nfiles) 962 return EBADF; 963 fp = fdp->fd_ofiles[fd]; 964 if ((fp == NULL) || ((fp->f_flag & FREAD) == 0)) 965 return EBADF; 966 if (iocb.aio_offset == -1LL) 967 return EINVAL; 968 969 auio.uio_resid = iocb.aio_nbytes; 970 if (auio.uio_resid < 0) 971 return (EINVAL); 972 973 /* 974 * Process sync simply -- queue async request. 975 */ 976 if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) { 977 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 978 } 979 980 aiov.iov_base = iocb.aio_buf; 981 aiov.iov_len = iocb.aio_nbytes; 982 983 auio.uio_iov = &aiov; 984 auio.uio_iovcnt = 1; 985 auio.uio_offset = iocb.aio_offset; 986 auio.uio_rw = UIO_READ; 987 auio.uio_segflg = UIO_USERSPACE; 988 auio.uio_procp = p; 989 990 cnt = iocb.aio_nbytes; 991 error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 992 if (error && 993 (auio.uio_resid != cnt) && 994 (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) 995 error = 0; 996 cnt -= auio.uio_resid; 997 *retval = cnt; 998 return error; 999 } 1000 1001 int 1002 aio_write(struct proc *p, struct aio_write_args *uap, int *retval) { 1003 struct filedesc *fdp; 1004 struct file *fp; 1005 struct uio auio; 1006 struct iovec aiov; 1007 unsigned int fd; 1008 int cnt; 1009 struct aiocb iocb; 1010 int error; 1011 int pmodes; 1012 1013 /* 1014 * Process sync simply -- queue async request. 1015 */ 1016 pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 1017 if ((pmodes & AIO_PMODE_SYNC) == 0) { 1018 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE); 1019 } 1020 1021 if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1022 return error; 1023 1024 /* 1025 * Get the fd info for process 1026 */ 1027 fdp = p->p_fd; 1028 1029 /* 1030 * Range check file descriptor 1031 */ 1032 fd = iocb.aio_fildes; 1033 if (fd >= fdp->fd_nfiles) 1034 return EBADF; 1035 fp = fdp->fd_ofiles[fd]; 1036 if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0)) 1037 return EBADF; 1038 if (iocb.aio_offset == -1LL) 1039 return EINVAL; 1040 1041 aiov.iov_base = iocb.aio_buf; 1042 aiov.iov_len = iocb.aio_nbytes; 1043 auio.uio_iov = &aiov; 1044 auio.uio_iovcnt = 1; 1045 auio.uio_offset = iocb.aio_offset; 1046 1047 auio.uio_resid = iocb.aio_nbytes; 1048 if (auio.uio_resid < 0) 1049 return (EINVAL); 1050 1051 auio.uio_rw = UIO_WRITE; 1052 auio.uio_segflg = UIO_USERSPACE; 1053 auio.uio_procp = p; 1054 1055 cnt = iocb.aio_nbytes; 1056 error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 1057 if (error) { 1058 if (auio.uio_resid != cnt) { 1059 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 1060 error = 0; 1061 if (error == EPIPE) 1062 psignal(p, SIGPIPE); 1063 } 1064 } 1065 cnt -= auio.uio_resid; 1066 *retval = cnt; 1067 return error; 1068 } 1069 1070 int 1071 lio_listio(struct proc *p, struct lio_listio_args *uap, int *retval) { 1072 int cnt, nent, nentqueued; 1073 struct aiocb *iocb, * const *cbptr; 1074 struct aiocblist *cb; 1075 struct kaioinfo *ki; 1076 int error, runningcode; 1077 int i; 1078 1079 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 1080 return EINVAL; 1081 1082 nent = uap->nent; 1083 if (nent > AIO_LISTIO_MAX) 1084 return EINVAL; 1085 1086 if (p->p_aioinfo == NULL) { 1087 aio_init_aioinfo(p); 1088 } 1089 1090 if ((nent + num_queue_count) > max_queue_count) 1091 return EAGAIN; 1092 1093 ki = p->p_aioinfo; 1094 if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) 1095 return EAGAIN; 1096 1097 /* 1098 * reserve resources, remember that we have to unwind part of them sometimes 1099 */ 1100 num_queue_count += nent; 1101 ki->kaio_queue_count += nent; 1102 nentqueued = 0; 1103 1104 /* 1105 * get pointers to the list of I/O requests 1106 iocbvec = malloc(uap->nent * sizeof(struct aiocb *), M_TEMP, M_WAITOK); 1107 */ 1108 1109 cbptr = uap->acb_list; 1110 for(i = 0; i < uap->nent; i++) { 1111 iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1112 error = aio_aqueue(p, iocb, 0); 1113 if (error == 0) 1114 nentqueued++; 1115 } 1116 1117 if (nentqueued == 0) 1118 return EIO; 1119 1120 runningcode = 0; 1121 if (nentqueued != nent) 1122 runningcode = EIO; 1123 1124 if (uap->mode == LIO_WAIT) { 1125 while (1) { 1126 for(i = 0; i < uap->nent; i++) { 1127 int found; 1128 int jobref, command, status; 1129 1130 iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1131 command = fuword(&iocb->aio_lio_opcode); 1132 if (command == LIO_NOP) 1133 continue; 1134 1135 status = fuword(&iocb->_aiocb_private.status); 1136 if (status == -1) 1137 continue; 1138 jobref = fuword(&iocb->_aiocb_private.kernelinfo); 1139 1140 found = 0; 1141 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 1142 cb; 1143 cb = TAILQ_NEXT(cb, plist)) { 1144 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1145 found++; 1146 break; 1147 } 1148 } 1149 if (found == 0) 1150 break; 1151 } 1152 1153 if (i == uap->nent) { 1154 return runningcode; 1155 } 1156 1157 aio_marksuspend(p, 0, 0, 1); 1158 error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0); 1159 aio_marksuspend(p, 0, 0, 0); 1160 1161 if (error == EINTR) { 1162 return EINTR; 1163 } else if (error == EWOULDBLOCK) { 1164 return EAGAIN; 1165 } 1166 1167 } 1168 } 1169 1170 return runningcode; 1171 } 1172