1 /* 2 * Copyright (c) 1997 John S. Dyson. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. John S. Dyson's name may not be used to endorse or promote products 10 * derived from this software without specific prior written permission. 11 * 12 * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13 * bad that happens because of using this software isn't the responsibility 14 * of the author. This software is distributed AS-IS. 15 * 16 * $Id: vfs_aio.c,v 1.1 1997/06/16 00:27:26 dyson Exp $ 17 */ 18 19 /* 20 * This file contains support for the POSIX.4 AIO facility. 21 * 22 * The initial version provides only the (bogus) synchronous semantics 23 * but will support async in the future. Note that a bit 24 * in a private field allows the user mode subroutine to adapt 25 * the kernel operations to true POSIX.4 for future compatibility. 26 * 27 * This code is used to support true POSIX.4 AIO/LIO with the help 28 * of a user mode subroutine package. Note that eventually more support 29 * will be pushed into the kernel. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/sysent.h> 35 #include <sys/sysproto.h> 36 #include <sys/namei.h> 37 #include <sys/filedesc.h> 38 #include <sys/kernel.h> 39 #include <sys/fcntl.h> 40 #include <sys/file.h> 41 #include <sys/stat.h> 42 #include <sys/unistd.h> 43 #include <sys/vnode.h> 44 #include <sys/mount.h> 45 #include <sys/proc.h> 46 #include <sys/uio.h> 47 #include <sys/malloc.h> 48 #include <sys/dirent.h> 49 #include <sys/signalvar.h> 50 #include <sys/queue.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_param.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_extern.h> 56 #include <vm/pmap.h> 57 #include <vm/vm_map.h> 58 #include <sys/sysctl.h> 59 #include <sys/aio.h> 60 61 #define AIOCBLIST_CANCELLED 0x1 62 #define AIOCBLIST_RUNDOWN 0x4 63 #define AIOCBLIST_ASYNCFREE 0x8 64 #define AIOCBLIST_SUSPEND 0x10 65 66 #if 0 67 #define DEBUGAIO 68 #define DIAGNOSTIC 69 #endif 70 71 static int jobrefid; 72 73 #define JOBST_NULL 0x0 74 #define JOBST_JOBQPROC 0x1 75 #define JOBST_JOBQGLOBAL 0x2 76 #define JOBST_JOBRUNNING 0x3 77 #define JOBST_JOBFINISHED 0x4 78 79 #define MAX_AIO_PER_PROC 32 80 #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ 81 #define MAX_AIO_PROCS 128 82 #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ 83 #define TARGET_AIO_PROCS 64 84 85 /* 86 * Job queue item 87 */ 88 struct aiocblist { 89 TAILQ_ENTRY (aiocblist) list; /* List of jobs */ 90 TAILQ_ENTRY (aiocblist) plist; /* List of jobs for proc */ 91 int jobflags; 92 int jobstate; 93 struct proc *userproc; /* User process */ 94 struct aioproclist *jobaioproc; /* AIO process descriptor */ 95 struct aiocb uaiocb; /* Kernel I/O control block */ 96 }; 97 98 #define AIOP_FREE 0x1 /* proc on free queue */ 99 /* 100 * AIO process info 101 */ 102 struct aioproclist { 103 int aioprocflags; /* AIO proc flags */ 104 TAILQ_ENTRY(aioproclist) list; /* List of processes */ 105 struct proc *aioproc; /* The AIO thread */ 106 TAILQ_HEAD (,aiocblist) jobtorun; /* suggested job to run */ 107 }; 108 109 struct kaioinfo { 110 int kaio_maxactive_count; /* maximum number of AIOs */ 111 int kaio_active_count; /* number of currently used AIOs */ 112 int kaio_qallowed_count; /* maxiumu size of AIO queue */ 113 int kaio_queue_count; /* size of AIO queue */ 114 TAILQ_HEAD (,aiocblist) kaio_jobqueue; /* job queue for process */ 115 TAILQ_HEAD (,aiocblist) kaio_jobdone; /* done queue for process */ 116 }; 117 118 TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc; 119 TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list */ 120 TAILQ_HEAD(,aiocblist) aio_freejobs; 121 122 int max_aio_procs = MAX_AIO_PROCS; 123 int num_aio_procs = 0; 124 int target_aio_procs = TARGET_AIO_PROCS; 125 126 int max_queue_count = MAX_AIO_QUEUE; 127 int num_queue_count = 0; 128 129 void aio_init_aioinfo(struct proc *p) ; 130 void aio_onceonly(void) ; 131 void aio_proc_rundown(struct proc *p) ; 132 int aio_free_entry(struct aiocblist *aiocbe); 133 void aio_cancel_internal(struct aiocblist *aiocbe); 134 void aio_process(struct aiocblist *aiocbe); 135 void pmap_newvmspace(struct vmspace *); 136 static int aio_newproc(void) ; 137 static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ; 138 static void aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) ; 139 140 SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL); 141 142 /* 143 * Startup initialization 144 */ 145 void 146 aio_onceonly() { 147 TAILQ_INIT(&aio_freeproc); 148 TAILQ_INIT(&aio_activeproc); 149 TAILQ_INIT(&aio_jobs); 150 TAILQ_INIT(&aio_freejobs); 151 } 152 153 /* 154 * Init the per-process aioinfo structure. 155 */ 156 void 157 aio_init_aioinfo(struct proc *p) { 158 struct kaioinfo *ki; 159 if (p->p_aioinfo == NULL) { 160 ki = malloc(sizeof (struct kaioinfo), M_AIO, M_WAITOK); 161 p->p_aioinfo = ki; 162 ki->kaio_maxactive_count = MAX_AIO_PER_PROC; 163 ki->kaio_active_count = 0; 164 ki->kaio_qallowed_count = MAX_AIO_QUEUE_PER_PROC; 165 ki->kaio_queue_count = 0; 166 TAILQ_INIT(&ki->kaio_jobdone); 167 TAILQ_INIT(&ki->kaio_jobqueue); 168 } 169 } 170 171 /* 172 * Free a job entry. Wait for completion if it is currently 173 * active, but don't delay forever. If we delay, we return 174 * a flag that says that we have to restart the queue scan. 175 */ 176 int 177 aio_free_entry(struct aiocblist *aiocbe) { 178 struct kaioinfo *ki; 179 struct aioproclist *aiop; 180 struct proc *p; 181 182 if (aiocbe->jobstate == JOBST_NULL) 183 panic("aio_free_entry: freeing already free job"); 184 185 p = aiocbe->userproc; 186 ki = p->p_aioinfo; 187 if (ki == NULL) 188 panic("aio_free_entry: missing p->p_aioinfo"); 189 190 if (aiocbe->jobstate == JOBST_JOBRUNNING) { 191 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) 192 return 0; 193 aiocbe->jobflags |= AIOCBLIST_RUNDOWN; 194 if (tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", hz*5)) { 195 aiocbe->jobflags |= AIOCBLIST_ASYNCFREE; 196 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 197 return 1; 198 } 199 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 200 } 201 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 202 203 if (ki->kaio_queue_count <= 0) 204 panic("aio_free_entry: process queue size <= 0"); 205 if (num_queue_count <= 0) 206 panic("aio_free_entry: system wide queue size <= 0"); 207 208 --ki->kaio_queue_count; 209 --num_queue_count; 210 211 if ( aiocbe->jobstate == JOBST_JOBQPROC) { 212 aiop = aiocbe->jobaioproc; 213 TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 214 } else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) { 215 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 216 } else if ( aiocbe->jobstate == JOBST_JOBFINISHED) { 217 ki = p->p_aioinfo; 218 TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist); 219 } 220 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 221 aiocbe->jobstate = JOBST_NULL; 222 return 0; 223 } 224 225 /* 226 * Rundown the jobs for a given process. 227 */ 228 void 229 aio_proc_rundown(struct proc *p) { 230 struct kaioinfo *ki; 231 struct aiocblist *aiocbe, *aiocbn; 232 233 ki = p->p_aioinfo; 234 if (ki == NULL) 235 return; 236 237 restart1: 238 for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone); 239 aiocbe; 240 aiocbe = aiocbn) { 241 aiocbn = TAILQ_NEXT(aiocbe, plist); 242 if (aio_free_entry(aiocbe)) 243 goto restart1; 244 } 245 246 restart2: 247 for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 248 aiocbe; 249 aiocbe = aiocbn) { 250 aiocbn = TAILQ_NEXT(aiocbe, plist); 251 if (aio_free_entry(aiocbe)) 252 goto restart2; 253 } 254 free(ki, M_AIO); 255 } 256 257 /* 258 * Select a job to run (called by an AIO daemon) 259 */ 260 static struct aiocblist * 261 aio_selectjob(struct aioproclist *aiop) { 262 263 struct aiocblist *aiocbe; 264 265 aiocbe = TAILQ_FIRST(&aiop->jobtorun); 266 if (aiocbe) { 267 TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 268 return aiocbe; 269 } 270 271 for (aiocbe = TAILQ_FIRST(&aio_jobs); 272 aiocbe; 273 aiocbe = TAILQ_NEXT(aiocbe, list)) { 274 struct kaioinfo *ki; 275 struct proc *userp; 276 277 userp = aiocbe->userproc; 278 ki = userp->p_aioinfo; 279 280 if (ki->kaio_active_count < ki->kaio_maxactive_count) { 281 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 282 return aiocbe; 283 } 284 } 285 286 return NULL; 287 } 288 289 /* 290 * The AIO activity proper. 291 */ 292 void 293 aio_process(struct aiocblist *aiocbe) { 294 struct filedesc *fdp; 295 struct proc *userp; 296 struct aiocb *cb; 297 struct file *fp; 298 struct uio auio; 299 struct iovec aiov; 300 unsigned int fd; 301 int cnt; 302 int error; 303 304 userp = aiocbe->userproc; 305 cb = &aiocbe->uaiocb; 306 307 #ifdef DEBUGAIO 308 printf("fd: %d, offset: 0x%x, address: 0x%x, size: %d\n", 309 cb->aio_fildes, (int) cb->aio_offset, 310 cb->aio_buf, cb->aio_nbytes); 311 tsleep(curproc, PVM, "aioprc", hz); 312 #endif 313 fdp = curproc->p_fd; 314 /* 315 * Range check file descriptor 316 */ 317 fd = cb->aio_fildes; 318 fp = fdp->fd_ofiles[fd]; 319 320 aiov.iov_base = cb->aio_buf; 321 aiov.iov_len = cb->aio_nbytes; 322 323 auio.uio_iov = &aiov; 324 auio.uio_iovcnt = 1; 325 auio.uio_offset = cb->aio_offset; 326 auio.uio_resid = cb->aio_nbytes; 327 cnt = cb->aio_nbytes; 328 auio.uio_segflg = UIO_USERSPACE; 329 auio.uio_procp = curproc; 330 331 if (cb->aio_lio_opcode == LIO_READ) { 332 auio.uio_rw = UIO_READ; 333 error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 334 } else { 335 auio.uio_rw = UIO_WRITE; 336 error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 337 } 338 339 if (error) { 340 if (auio.uio_resid != cnt) { 341 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 342 error = 0; 343 if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) 344 psignal(userp, SIGPIPE); 345 } 346 } 347 348 cnt -= auio.uio_resid; 349 cb->_aiocb_private.error = error; 350 cb->_aiocb_private.status = cnt; 351 352 return; 353 354 } 355 356 /* 357 * The AIO daemon. 358 */ 359 static void 360 aio_startproc(void *uproc) 361 { 362 struct aioproclist *aiop; 363 364 /* 365 * Allocate and ready the aio control info 366 */ 367 aiop = malloc(sizeof *aiop, M_AIO, M_WAITOK); 368 aiop->aioproc = curproc; 369 aiop->aioprocflags |= AIOP_FREE; 370 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 371 TAILQ_INIT(&aiop->jobtorun); 372 373 /* 374 * Get rid of current address space 375 */ 376 if (curproc->p_vmspace->vm_refcnt == 1) { 377 if (curproc->p_vmspace->vm_shm) 378 shmexit(curproc); 379 pmap_remove_pages(&curproc->p_vmspace->vm_pmap, 0, USRSTACK); 380 vm_map_remove(&curproc->p_vmspace->vm_map, 0, USRSTACK); 381 } else { 382 vmspace_exec(curproc); 383 } 384 385 /* 386 * Make up a name for the daemon 387 */ 388 strcpy(curproc->p_comm, "aiodaemon"); 389 390 /* 391 * Get rid of our current filedescriptors 392 */ 393 fdfree(curproc); 394 curproc->p_fd = NULL; 395 curproc->p_ucred = crcopy(curproc->p_ucred); 396 curproc->p_ucred->cr_uid = 0; 397 curproc->p_ucred->cr_groups[0] = 1; 398 curproc->p_flag |= P_SYSTEM; 399 400 #ifdef DEBUGAIO 401 printf("Started new process: %d\n", curproc->p_pid); 402 #endif 403 wakeup(uproc); 404 405 while(1) { 406 struct vmspace *myvm, *tmpvm; 407 struct proc *cp = curproc; 408 struct proc *up = NULL; 409 struct aiocblist *aiocbe; 410 411 if ((aiop->aioprocflags & AIOP_FREE) == 0) { 412 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 413 aiop->aioprocflags |= AIOP_FREE; 414 } 415 tsleep(curproc, PZERO, "aiordy", 0); 416 if (aiop->aioprocflags & AIOP_FREE) { 417 TAILQ_REMOVE(&aio_freeproc, aiop, list); 418 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 419 aiop->aioprocflags &= ~AIOP_FREE; 420 } 421 422 myvm = curproc->p_vmspace; 423 424 while ( aiocbe = aio_selectjob(aiop)) { 425 struct aiocb *cb; 426 struct kaioinfo *ki; 427 struct proc *userp; 428 429 cb = &aiocbe->uaiocb; 430 userp = aiocbe->userproc; 431 ki = userp->p_aioinfo; 432 433 aiocbe->jobstate = JOBST_JOBRUNNING; 434 if (userp != cp) { 435 tmpvm = curproc->p_vmspace; 436 curproc->p_vmspace = userp->p_vmspace; 437 ++curproc->p_vmspace->vm_refcnt; 438 pmap_activate(curproc); 439 if (tmpvm != myvm) { 440 vmspace_free(tmpvm); 441 } 442 if (curproc->p_fd) 443 fdfree(curproc); 444 curproc->p_fd = fdshare(userp); 445 cp = userp; 446 } 447 448 ki->kaio_active_count++; 449 aiocbe->jobaioproc = aiop; 450 aio_process(aiocbe); 451 --ki->kaio_active_count; 452 453 aiocbe->jobstate = JOBST_JOBFINISHED; 454 455 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) { 456 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 457 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 458 } else { 459 TAILQ_REMOVE(&ki->kaio_jobqueue, 460 aiocbe, plist); 461 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, 462 aiocbe, plist); 463 } 464 465 if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) { 466 wakeup(aiocbe); 467 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 468 } 469 470 if (aiocbe->jobflags & AIOCBLIST_SUSPEND) { 471 wakeup(userp); 472 aiocbe->jobflags &= ~AIOCBLIST_SUSPEND; 473 } 474 475 if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 476 psignal(userp, cb->aio_sigevent.sigev_signo); 477 } 478 } 479 480 if (cp != curproc) { 481 tmpvm = curproc->p_vmspace; 482 curproc->p_vmspace = myvm; 483 pmap_activate(curproc); 484 vmspace_free(tmpvm); 485 if (curproc->p_fd) 486 fdfree(curproc); 487 curproc->p_fd = NULL; 488 cp = curproc; 489 } 490 } 491 } 492 493 /* 494 * Create a new AIO daemon. 495 */ 496 static int 497 aio_newproc() { 498 int error; 499 int rval[2]; 500 struct rfork_args rfa; 501 struct proc *p; 502 503 rfa.flags = RFMEM | RFPROC | RFCFDG; 504 505 if (error = rfork(curproc, &rfa, &rval[0])) 506 return error; 507 508 cpu_set_fork_handler(p = pfind(rval[0]), aio_startproc, curproc); 509 510 #ifdef DEBUGAIO 511 printf("Waiting for new process: %d, count: %d\n", 512 curproc->p_pid, num_aio_procs); 513 #endif 514 515 error = tsleep(curproc, PZERO, "aiosta", 5*hz); 516 ++num_aio_procs; 517 518 return error; 519 520 } 521 522 /* 523 * Queue a new AIO request. 524 */ 525 static int 526 _aio_aqueue(struct proc *p, struct aiocb *job, int type) { 527 struct filedesc *fdp; 528 struct file *fp; 529 unsigned int fd; 530 531 int error; 532 int opcode; 533 struct aiocblist *aiocbe; 534 struct aioproclist *aiop; 535 struct kaioinfo *ki; 536 537 if (aiocbe = TAILQ_FIRST(&aio_freejobs)) { 538 TAILQ_REMOVE(&aio_freejobs, aiocbe, list); 539 } else { 540 aiocbe = malloc (sizeof *aiocbe, M_AIO, M_WAITOK); 541 } 542 543 error = copyin((caddr_t)job, 544 (caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb); 545 if (error) { 546 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 547 return error; 548 } 549 550 551 /* 552 * Get the fd info for process 553 */ 554 fdp = p->p_fd; 555 556 /* 557 * Range check file descriptor 558 */ 559 fd = aiocbe->uaiocb.aio_fildes; 560 if (fd >= fdp->fd_nfiles) { 561 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 562 if (type == 0) { 563 suword(&job->_aiocb_private.status, -1); 564 suword(&job->_aiocb_private.error, EBADF); 565 } 566 return EBADF; 567 } 568 569 fp = fdp->fd_ofiles[fd]; 570 if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0)) { 571 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 572 if (type == 0) { 573 suword(&job->_aiocb_private.status, -1); 574 suword(&job->_aiocb_private.error, EBADF); 575 } 576 return EBADF; 577 } 578 579 if (aiocbe->uaiocb.aio_offset == -1LL) { 580 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 581 if (type == 0) { 582 suword(&job->_aiocb_private.status, -1); 583 suword(&job->_aiocb_private.error, EINVAL); 584 } 585 return EINVAL; 586 } 587 588 #ifdef DEBUGAIO 589 printf("job addr: 0x%x, 0x%x, %d\n", job, &job->_aiocb_private.kernelinfo, jobrefid); 590 #endif 591 592 error = suword(&job->_aiocb_private.kernelinfo, jobrefid); 593 if (error) { 594 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 595 if (type == 0) { 596 suword(&job->_aiocb_private.status, -1); 597 suword(&job->_aiocb_private.error, EINVAL); 598 } 599 return error; 600 } 601 602 aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)jobrefid; 603 #ifdef DEBUGAIO 604 printf("aio_aqueue: New job: %d... ", jobrefid); 605 #endif 606 ++jobrefid; 607 608 if (type != LIO_NOP) { 609 aiocbe->uaiocb.aio_lio_opcode = type; 610 } 611 612 opcode = aiocbe->uaiocb.aio_lio_opcode; 613 if (opcode == LIO_NOP) { 614 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 615 if (type == 0) { 616 suword(&job->_aiocb_private.status, -1); 617 suword(&job->_aiocb_private.error, 0); 618 } 619 return 0; 620 } 621 622 if ((opcode != LIO_NOP) && 623 (opcode != LIO_READ) && (opcode != LIO_WRITE)) { 624 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 625 if (type == 0) { 626 suword(&job->_aiocb_private.status, -1); 627 suword(&job->_aiocb_private.error, EINVAL); 628 } 629 return EINVAL; 630 } 631 632 suword(&job->_aiocb_private.error, 0); 633 suword(&job->_aiocb_private.status, 0); 634 aiocbe->userproc = p; 635 aiocbe->jobflags = 0; 636 ki = p->p_aioinfo; 637 ++num_queue_count; 638 ++ki->kaio_queue_count; 639 640 retryproc: 641 if (aiop = TAILQ_FIRST(&aio_freeproc)) { 642 #ifdef DEBUGAIO 643 printf("found a free AIO process\n"); 644 #endif 645 TAILQ_REMOVE(&aio_freeproc, aiop, list); 646 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 647 aiop->aioprocflags &= ~AIOP_FREE; 648 TAILQ_INSERT_TAIL(&aiop->jobtorun, aiocbe, list); 649 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 650 aiocbe->jobstate = JOBST_JOBQPROC; 651 aiocbe->jobaioproc = aiop; 652 wakeup(aiop->aioproc); 653 } else if ((num_aio_procs < max_aio_procs) && 654 (ki->kaio_active_count < ki->kaio_maxactive_count)) { 655 if (error = aio_newproc()) { 656 #ifdef DEBUGAIO 657 printf("aio_aqueue: problem sleeping for starting proc: %d\n", 658 error); 659 #endif 660 } 661 goto retryproc; 662 } else { 663 #ifdef DEBUGAIO 664 printf("queuing to global queue\n"); 665 #endif 666 TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); 667 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 668 aiocbe->jobstate = JOBST_JOBQGLOBAL; 669 } 670 671 return 0; 672 } 673 674 static int 675 aio_aqueue(struct proc *p, struct aiocb *job, int type) { 676 struct kaioinfo *ki; 677 678 if (p->p_aioinfo == NULL) { 679 aio_init_aioinfo(p); 680 } 681 682 if (num_queue_count >= max_queue_count) 683 return EAGAIN; 684 685 ki = p->p_aioinfo; 686 if (ki->kaio_queue_count >= ki->kaio_qallowed_count) 687 return EAGAIN; 688 689 return _aio_aqueue(p, job, type); 690 } 691 692 /* 693 * Support the aio_return system call 694 */ 695 int 696 aio_return(struct proc *p, struct aio_return_args *uap, int *retval) { 697 int jobref, status; 698 struct aiocblist *cb; 699 struct kaioinfo *ki; 700 struct proc *userp; 701 702 ki = p->p_aioinfo; 703 if (ki == NULL) { 704 return EINVAL; 705 } 706 707 jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 708 if (jobref == -1) 709 return EINVAL; 710 711 712 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 713 cb; 714 cb = TAILQ_NEXT(cb, plist)) { 715 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 716 retval[0] = cb->uaiocb._aiocb_private.status; 717 aio_free_entry(cb); 718 return 0; 719 } 720 } 721 722 status = fuword(&uap->aiocbp->_aiocb_private.status); 723 if (status == -1) 724 return 0; 725 726 return (EINVAL); 727 } 728 729 /* 730 * Rundown the jobs for a given process. 731 */ 732 void 733 aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) { 734 struct aiocblist *aiocbe; 735 struct kaioinfo *ki; 736 737 ki = p->p_aioinfo; 738 if (ki == NULL) 739 return; 740 741 for (aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 742 aiocbe; 743 aiocbe = TAILQ_NEXT(aiocbe, plist)) { 744 745 if (njobs) { 746 747 int i; 748 749 for(i = 0; i < njobs; i++) { 750 if (((int) aiocbe->uaiocb._aiocb_private.kernelinfo) == joblist[i]) 751 break; 752 } 753 754 if (i == njobs) 755 continue; 756 } 757 758 if (set) 759 aiocbe->jobflags |= AIOCBLIST_SUSPEND; 760 else 761 aiocbe->jobflags &= ~AIOCBLIST_SUSPEND; 762 } 763 } 764 765 /* 766 * Allow a process to wakeup when any of the I/O requests are 767 * completed. 768 */ 769 int 770 aio_suspend(struct proc *p, struct aio_suspend_args *uap, int *retval) { 771 struct timeval atv, utv; 772 struct timespec ts; 773 struct aiocb *const *cbptr, *cbp; 774 struct kaioinfo *ki; 775 struct aiocblist *cb; 776 int i; 777 int error, s, timo; 778 int *joblist; 779 780 781 timo = 0; 782 if (uap->timeout) { 783 /* 784 * Get timespec struct 785 */ 786 if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) { 787 return error; 788 } 789 790 if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) 791 return (EINVAL); 792 793 TIMESPEC_TO_TIMEVAL(&atv, &ts) 794 if (itimerfix(&atv)) 795 return (EINVAL); 796 /* 797 * XXX this is not as careful as settimeofday() about minimising 798 * interrupt latency. The hzto() interface is inconvenient as usual. 799 */ 800 s = splclock(); 801 timevaladd(&atv, &time); 802 timo = hzto(&atv); 803 splx(s); 804 if (timo == 0) 805 timo = 1; 806 } 807 808 ki = p->p_aioinfo; 809 if (ki == NULL) 810 return EAGAIN; 811 812 joblist = malloc(uap->nent * sizeof(int), M_TEMP, M_WAITOK); 813 cbptr = uap->aiocbp; 814 815 for(i=0;i<uap->nent;i++) { 816 cbp = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 817 #ifdef DEBUGAIO 818 printf("cbp: %x\n", cbp); 819 #endif 820 joblist[i] = fuword(&cbp->_aiocb_private.kernelinfo); 821 cbptr++; 822 } 823 824 #ifdef DEBUGAIO 825 printf("Suspend, timeout: %d clocks, jobs:", timo); 826 for(i=0;i<uap->nent;i++) 827 printf(" %d", joblist[i]); 828 printf("\n"); 829 #endif 830 831 while (1) { 832 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 833 cb; 834 cb = TAILQ_NEXT(cb, plist)) { 835 for(i=0;i<uap->nent;i++) { 836 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == joblist[i]) { 837 free(joblist, M_TEMP); 838 return 0; 839 } 840 } 841 } 842 843 aio_marksuspend(p, uap->nent, joblist, 1); 844 #ifdef DEBUGAIO 845 printf("Suspending -- waiting for all I/O's to complete: "); 846 for(i=0;i<uap->nent;i++) 847 printf(" %d", joblist[i]); 848 printf("\n"); 849 #endif 850 error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo); 851 aio_marksuspend(p, uap->nent, joblist, 0); 852 853 if (error == EINTR) { 854 #ifdef DEBUGAIO 855 printf(" signal\n"); 856 #endif 857 free(joblist, M_TEMP); 858 return EINTR; 859 } else if (error == EWOULDBLOCK) { 860 #ifdef DEBUGAIO 861 printf(" timeout\n"); 862 #endif 863 free(joblist, M_TEMP); 864 return EAGAIN; 865 } 866 #ifdef DEBUGAIO 867 printf("\n"); 868 #endif 869 } 870 871 /* NOTREACHED */ 872 return EINVAL; 873 } 874 875 /* 876 * aio_cancel at the kernel level is a NOOP right now. It 877 * might be possible to support it partially in user mode, or 878 * in kernel mode later on. 879 */ 880 int 881 aio_cancel(struct proc *p, struct aio_cancel_args *uap, int *retval) { 882 return AIO_NOTCANCELLED; 883 } 884 885 /* 886 * aio_error is implemented in the kernel level for compatibility 887 * purposes only. For a user mode async implementation, it would be 888 * best to do it in a userland subroutine. 889 */ 890 int 891 aio_error(struct proc *p, struct aio_error_args *uap, int *retval) { 892 int activeflag, errorcode; 893 struct aiocblist *cb; 894 struct kaioinfo *ki; 895 int jobref; 896 int error, status; 897 898 ki = p->p_aioinfo; 899 if (ki == NULL) 900 return EINVAL; 901 902 jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 903 if (jobref == -1) 904 return EFAULT; 905 906 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 907 cb; 908 cb = TAILQ_NEXT(cb, plist)) { 909 910 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 911 retval[0] = cb->uaiocb._aiocb_private.error; 912 return 0; 913 } 914 } 915 916 for (cb = TAILQ_FIRST(&ki->kaio_jobqueue); 917 cb; 918 cb = TAILQ_NEXT(cb, plist)) { 919 920 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 921 retval[0] = EINPROGRESS; 922 return 0; 923 } 924 } 925 926 /* 927 * Hack for lio 928 */ 929 status = fuword(&uap->aiocbp->_aiocb_private.status); 930 if (status == -1) { 931 return fuword(&uap->aiocbp->_aiocb_private.error); 932 } 933 return EINVAL; 934 } 935 936 int 937 aio_read(struct proc *p, struct aio_read_args *uap, int *retval) { 938 struct filedesc *fdp; 939 struct file *fp; 940 struct uio auio; 941 struct iovec aiov; 942 unsigned int fd; 943 int cnt; 944 struct aiocb iocb; 945 int error, pmodes; 946 947 pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 948 if ((pmodes & AIO_PMODE_SYNC) == 0) { 949 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 950 } 951 952 /* 953 * Get control block 954 */ 955 if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 956 return error; 957 958 /* 959 * Get the fd info for process 960 */ 961 fdp = p->p_fd; 962 963 /* 964 * Range check file descriptor 965 */ 966 fd = iocb.aio_fildes; 967 if (fd >= fdp->fd_nfiles) 968 return EBADF; 969 fp = fdp->fd_ofiles[fd]; 970 if ((fp == NULL) || ((fp->f_flag & FREAD) == 0)) 971 return EBADF; 972 if (iocb.aio_offset == -1LL) 973 return EINVAL; 974 975 auio.uio_resid = iocb.aio_nbytes; 976 if (auio.uio_resid < 0) 977 return (EINVAL); 978 979 /* 980 * Process sync simply -- queue async request. 981 */ 982 if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) { 983 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 984 } 985 986 aiov.iov_base = iocb.aio_buf; 987 aiov.iov_len = iocb.aio_nbytes; 988 989 auio.uio_iov = &aiov; 990 auio.uio_iovcnt = 1; 991 auio.uio_offset = iocb.aio_offset; 992 auio.uio_rw = UIO_READ; 993 auio.uio_segflg = UIO_USERSPACE; 994 auio.uio_procp = p; 995 996 cnt = iocb.aio_nbytes; 997 error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 998 if (error && 999 (auio.uio_resid != cnt) && 1000 (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) 1001 error = 0; 1002 cnt -= auio.uio_resid; 1003 *retval = cnt; 1004 return error; 1005 } 1006 1007 int 1008 aio_write(struct proc *p, struct aio_write_args *uap, int *retval) { 1009 struct filedesc *fdp; 1010 struct file *fp; 1011 struct uio auio; 1012 struct iovec aiov; 1013 unsigned int fd; 1014 int cnt; 1015 struct aiocb iocb; 1016 int error; 1017 int pmodes; 1018 1019 /* 1020 * Process sync simply -- queue async request. 1021 */ 1022 pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 1023 if ((pmodes & AIO_PMODE_SYNC) == 0) { 1024 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE); 1025 } 1026 1027 if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1028 return error; 1029 1030 /* 1031 * Get the fd info for process 1032 */ 1033 fdp = p->p_fd; 1034 1035 /* 1036 * Range check file descriptor 1037 */ 1038 fd = iocb.aio_fildes; 1039 if (fd >= fdp->fd_nfiles) 1040 return EBADF; 1041 fp = fdp->fd_ofiles[fd]; 1042 if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0)) 1043 return EBADF; 1044 if (iocb.aio_offset == -1LL) 1045 return EINVAL; 1046 1047 aiov.iov_base = iocb.aio_buf; 1048 aiov.iov_len = iocb.aio_nbytes; 1049 auio.uio_iov = &aiov; 1050 auio.uio_iovcnt = 1; 1051 auio.uio_offset = iocb.aio_offset; 1052 1053 auio.uio_resid = iocb.aio_nbytes; 1054 if (auio.uio_resid < 0) 1055 return (EINVAL); 1056 1057 auio.uio_rw = UIO_WRITE; 1058 auio.uio_segflg = UIO_USERSPACE; 1059 auio.uio_procp = p; 1060 1061 cnt = iocb.aio_nbytes; 1062 error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 1063 if (error) { 1064 if (auio.uio_resid != cnt) { 1065 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 1066 error = 0; 1067 if (error == EPIPE) 1068 psignal(p, SIGPIPE); 1069 } 1070 } 1071 cnt -= auio.uio_resid; 1072 *retval = cnt; 1073 return error; 1074 } 1075 1076 int 1077 lio_listio(struct proc *p, struct lio_listio_args *uap, int *retval) { 1078 int cnt, nent, nentqueued; 1079 struct aiocb *iocb, * const *cbptr; 1080 struct aiocblist *cb; 1081 struct kaioinfo *ki; 1082 int error, runningcode; 1083 int i; 1084 1085 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 1086 return EINVAL; 1087 1088 nent = uap->nent; 1089 if (nent > AIO_LISTIO_MAX) 1090 return EINVAL; 1091 1092 if (p->p_aioinfo == NULL) { 1093 aio_init_aioinfo(p); 1094 } 1095 1096 if ((nent + num_queue_count) > max_queue_count) 1097 return EAGAIN; 1098 1099 ki = p->p_aioinfo; 1100 if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) 1101 return EAGAIN; 1102 1103 /* 1104 * reserve resources, remember that we have to unwind part of them sometimes 1105 */ 1106 num_queue_count += nent; 1107 ki->kaio_queue_count += nent; 1108 nentqueued = 0; 1109 1110 /* 1111 * get pointers to the list of I/O requests 1112 iocbvec = malloc(uap->nent * sizeof(struct aiocb *), M_TEMP, M_WAITOK); 1113 */ 1114 1115 cbptr = uap->acb_list; 1116 for(i = 0; i < uap->nent; i++) { 1117 iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1118 error = aio_aqueue(p, iocb, 0); 1119 if (error == 0) 1120 nentqueued++; 1121 } 1122 1123 if (nentqueued == 0) 1124 return EIO; 1125 1126 runningcode = 0; 1127 if (nentqueued != nent) 1128 runningcode = EIO; 1129 1130 if (uap->mode == LIO_WAIT) { 1131 while (1) { 1132 for(i = 0; i < uap->nent; i++) { 1133 int found; 1134 int jobref, command, status; 1135 1136 iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1137 command = fuword(&iocb->aio_lio_opcode); 1138 if (command == LIO_NOP) 1139 continue; 1140 1141 status = fuword(&iocb->_aiocb_private.status); 1142 if (status == -1) 1143 continue; 1144 jobref = fuword(&iocb->_aiocb_private.kernelinfo); 1145 1146 found = 0; 1147 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 1148 cb; 1149 cb = TAILQ_NEXT(cb, plist)) { 1150 if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1151 found++; 1152 break; 1153 } 1154 } 1155 if (found == 0) 1156 break; 1157 } 1158 1159 if (i == uap->nent) { 1160 return runningcode; 1161 } 1162 1163 aio_marksuspend(p, 0, 0, 1); 1164 error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0); 1165 aio_marksuspend(p, 0, 0, 0); 1166 1167 if (error == EINTR) { 1168 return EINTR; 1169 } else if (error == EWOULDBLOCK) { 1170 return EAGAIN; 1171 } 1172 1173 } 1174 } 1175 1176 return runningcode; 1177 } 1178