1ee877a35SJohn Dyson /* 2ee877a35SJohn Dyson * Copyright (c) 1997 John S. Dyson. All rights reserved. 3ee877a35SJohn Dyson * 4ee877a35SJohn Dyson * Redistribution and use in source and binary forms, with or without 5ee877a35SJohn Dyson * modification, are permitted provided that the following conditions 6ee877a35SJohn Dyson * are met: 7ee877a35SJohn Dyson * 1. Redistributions of source code must retain the above copyright 8ee877a35SJohn Dyson * notice, this list of conditions and the following disclaimer. 9ee877a35SJohn Dyson * 2. John S. Dyson's name may not be used to endorse or promote products 10ee877a35SJohn Dyson * derived from this software without specific prior written permission. 11ee877a35SJohn Dyson * 12ee877a35SJohn Dyson * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13ee877a35SJohn Dyson * bad that happens because of using this software isn't the responsibility 14ee877a35SJohn Dyson * of the author. This software is distributed AS-IS. 15ee877a35SJohn Dyson * 16fdebd4f0SBruce Evans * $Id: vfs_aio.c,v 1.10 1997/11/07 08:53:03 phk Exp $ 17ee877a35SJohn Dyson */ 18ee877a35SJohn Dyson 19ee877a35SJohn Dyson /* 20ee877a35SJohn Dyson * This file contains support for the POSIX.4 AIO facility. 21ee877a35SJohn Dyson * 22ee877a35SJohn Dyson * The initial version provides only the (bogus) synchronous semantics 23ee877a35SJohn Dyson * but will support async in the future. Note that a bit 24ee877a35SJohn Dyson * in a private field allows the user mode subroutine to adapt 25ee877a35SJohn Dyson * the kernel operations to true POSIX.4 for future compatibility. 26ee877a35SJohn Dyson * 27ee877a35SJohn Dyson * This code is used to support true POSIX.4 AIO/LIO with the help 28ee877a35SJohn Dyson * of a user mode subroutine package. Note that eventually more support 29ee877a35SJohn Dyson * will be pushed into the kernel. 30ee877a35SJohn Dyson */ 31ee877a35SJohn Dyson 32ee877a35SJohn Dyson #include <sys/param.h> 33ee877a35SJohn Dyson #include <sys/systm.h> 34ee877a35SJohn Dyson #include <sys/sysproto.h> 35ee877a35SJohn Dyson #include <sys/filedesc.h> 36ee877a35SJohn Dyson #include <sys/kernel.h> 37ee877a35SJohn Dyson #include <sys/fcntl.h> 38ee877a35SJohn Dyson #include <sys/file.h> 39fdebd4f0SBruce Evans #include <sys/lock.h> 40ee877a35SJohn Dyson #include <sys/unistd.h> 41ee877a35SJohn Dyson #include <sys/proc.h> 42ee877a35SJohn Dyson #include <sys/uio.h> 43ee877a35SJohn Dyson #include <sys/malloc.h> 44ee877a35SJohn Dyson #include <sys/signalvar.h> 45a624e84fSJohn Dyson #include <sys/sysctl.h> 46ee877a35SJohn Dyson 47ee877a35SJohn Dyson #include <vm/vm.h> 48ee877a35SJohn Dyson #include <vm/vm_param.h> 49ee877a35SJohn Dyson #include <vm/vm_extern.h> 502244ea07SJohn Dyson #include <vm/pmap.h> 512244ea07SJohn Dyson #include <vm/vm_map.h> 52ee877a35SJohn Dyson #include <sys/aio.h> 535aaef07cSJohn Dyson #include <sys/shm.h> 545aaef07cSJohn Dyson 555aaef07cSJohn Dyson #include <machine/cpu.h> 56ee877a35SJohn Dyson 57a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_AIO, "AIO", "AIO structure(s)"); 5855166637SPoul-Henning Kamp 592244ea07SJohn Dyson #define AIOCBLIST_CANCELLED 0x1 602244ea07SJohn Dyson #define AIOCBLIST_RUNDOWN 0x4 612244ea07SJohn Dyson #define AIOCBLIST_ASYNCFREE 0x8 622244ea07SJohn Dyson #define AIOCBLIST_SUSPEND 0x10 632244ea07SJohn Dyson 642244ea07SJohn Dyson #if 0 652244ea07SJohn Dyson #define DEBUGAIO 662244ea07SJohn Dyson #define DIAGNOSTIC 672244ea07SJohn Dyson #endif 682244ea07SJohn Dyson 69a624e84fSJohn Dyson #define DEBUGAIO 1 70a624e84fSJohn Dyson 712244ea07SJohn Dyson static int jobrefid; 722244ea07SJohn Dyson 732244ea07SJohn Dyson #define JOBST_NULL 0x0 742244ea07SJohn Dyson #define JOBST_JOBQPROC 0x1 752244ea07SJohn Dyson #define JOBST_JOBQGLOBAL 0x2 762244ea07SJohn Dyson #define JOBST_JOBRUNNING 0x3 772244ea07SJohn Dyson #define JOBST_JOBFINISHED 0x4 782244ea07SJohn Dyson 792244ea07SJohn Dyson #define MAX_AIO_PER_PROC 32 802244ea07SJohn Dyson #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ 812244ea07SJohn Dyson #define MAX_AIO_PROCS 128 822244ea07SJohn Dyson #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ 832244ea07SJohn Dyson #define TARGET_AIO_PROCS 64 842244ea07SJohn Dyson 85a624e84fSJohn Dyson int max_aio_procs = MAX_AIO_PROCS; 86a624e84fSJohn Dyson int num_aio_procs = 0; 87a624e84fSJohn Dyson int target_aio_procs = TARGET_AIO_PROCS; 88a624e84fSJohn Dyson int max_queue_count = MAX_AIO_QUEUE; 89a624e84fSJohn Dyson int num_queue_count = 0; 90a624e84fSJohn Dyson 91a624e84fSJohn Dyson int max_aio_per_proc = MAX_AIO_PER_PROC, 92a624e84fSJohn Dyson max_aio_queue_per_proc=MAX_AIO_QUEUE_PER_PROC; 93a624e84fSJohn Dyson 94a624e84fSJohn Dyson 95a624e84fSJohn Dyson SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt"); 96a624e84fSJohn Dyson 97a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, 98a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_per_proc, 0, ""); 99a624e84fSJohn Dyson 100a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, 101a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_queue_per_proc, 0, ""); 102a624e84fSJohn Dyson 103a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, 104a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_procs, 0, ""); 105a624e84fSJohn Dyson 106a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, 107a624e84fSJohn Dyson CTLFLAG_RD, &num_aio_procs, 0, ""); 108a624e84fSJohn Dyson 109a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, 110a624e84fSJohn Dyson CTLFLAG_RD, &num_queue_count, 0, ""); 111a624e84fSJohn Dyson 112a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, 113a624e84fSJohn Dyson CTLFLAG_RW, &max_queue_count, 0, ""); 114a624e84fSJohn Dyson 115a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, 116a624e84fSJohn Dyson CTLFLAG_RW, &target_aio_procs, 0, ""); 117a624e84fSJohn Dyson 118a624e84fSJohn Dyson #if DEBUGAIO > 0 119a624e84fSJohn Dyson static int debugaio; 120a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, debugaio, CTLFLAG_RW, &debugaio, 0, ""); 121a624e84fSJohn Dyson #endif 122a624e84fSJohn Dyson 1232244ea07SJohn Dyson /* 1242244ea07SJohn Dyson * Job queue item 1252244ea07SJohn Dyson */ 1262244ea07SJohn Dyson struct aiocblist { 1272244ea07SJohn Dyson TAILQ_ENTRY (aiocblist) list; /* List of jobs */ 1282244ea07SJohn Dyson TAILQ_ENTRY (aiocblist) plist; /* List of jobs for proc */ 1292244ea07SJohn Dyson int jobflags; 1302244ea07SJohn Dyson int jobstate; 1312244ea07SJohn Dyson struct proc *userproc; /* User process */ 1322244ea07SJohn Dyson struct aioproclist *jobaioproc; /* AIO process descriptor */ 1332244ea07SJohn Dyson struct aiocb uaiocb; /* Kernel I/O control block */ 1342244ea07SJohn Dyson }; 1352244ea07SJohn Dyson 1362244ea07SJohn Dyson #define AIOP_FREE 0x1 /* proc on free queue */ 1372244ea07SJohn Dyson /* 1382244ea07SJohn Dyson * AIO process info 1392244ea07SJohn Dyson */ 1402244ea07SJohn Dyson struct aioproclist { 1412244ea07SJohn Dyson int aioprocflags; /* AIO proc flags */ 1422244ea07SJohn Dyson TAILQ_ENTRY(aioproclist) list; /* List of processes */ 1432244ea07SJohn Dyson struct proc *aioproc; /* The AIO thread */ 1442244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) jobtorun; /* suggested job to run */ 1452244ea07SJohn Dyson }; 1462244ea07SJohn Dyson 1472244ea07SJohn Dyson struct kaioinfo { 1482244ea07SJohn Dyson int kaio_maxactive_count; /* maximum number of AIOs */ 1492244ea07SJohn Dyson int kaio_active_count; /* number of currently used AIOs */ 1502244ea07SJohn Dyson int kaio_qallowed_count; /* maxiumu size of AIO queue */ 1512244ea07SJohn Dyson int kaio_queue_count; /* size of AIO queue */ 1522244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_jobqueue; /* job queue for process */ 1532244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_jobdone; /* done queue for process */ 1542244ea07SJohn Dyson }; 1552244ea07SJohn Dyson 1562244ea07SJohn Dyson TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc; 1572244ea07SJohn Dyson TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list */ 1582244ea07SJohn Dyson TAILQ_HEAD(,aiocblist) aio_freejobs; 1592244ea07SJohn Dyson 1602244ea07SJohn Dyson 1612244ea07SJohn Dyson void aio_init_aioinfo(struct proc *p) ; 1625aaef07cSJohn Dyson void aio_onceonly(void *) ; 1632244ea07SJohn Dyson int aio_free_entry(struct aiocblist *aiocbe); 1642244ea07SJohn Dyson void aio_cancel_internal(struct aiocblist *aiocbe); 1652244ea07SJohn Dyson void aio_process(struct aiocblist *aiocbe); 1662244ea07SJohn Dyson void pmap_newvmspace(struct vmspace *); 1672244ea07SJohn Dyson static int aio_newproc(void) ; 1682244ea07SJohn Dyson static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ; 1692244ea07SJohn Dyson static void aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) ; 1702244ea07SJohn Dyson 1712244ea07SJohn Dyson SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL); 1722244ea07SJohn Dyson 173a624e84fSJohn Dyson 1742244ea07SJohn Dyson /* 1752244ea07SJohn Dyson * Startup initialization 1762244ea07SJohn Dyson */ 1772244ea07SJohn Dyson void 1785aaef07cSJohn Dyson aio_onceonly(void *na) { 1792244ea07SJohn Dyson TAILQ_INIT(&aio_freeproc); 1802244ea07SJohn Dyson TAILQ_INIT(&aio_activeproc); 1812244ea07SJohn Dyson TAILQ_INIT(&aio_jobs); 1822244ea07SJohn Dyson TAILQ_INIT(&aio_freejobs); 1832244ea07SJohn Dyson } 1842244ea07SJohn Dyson 1852244ea07SJohn Dyson /* 1862244ea07SJohn Dyson * Init the per-process aioinfo structure. 1872244ea07SJohn Dyson */ 1882244ea07SJohn Dyson void 1892244ea07SJohn Dyson aio_init_aioinfo(struct proc *p) { 1902244ea07SJohn Dyson struct kaioinfo *ki; 1912244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 1922244ea07SJohn Dyson ki = malloc(sizeof (struct kaioinfo), M_AIO, M_WAITOK); 1932244ea07SJohn Dyson p->p_aioinfo = ki; 194a624e84fSJohn Dyson ki->kaio_maxactive_count = max_aio_per_proc; 1952244ea07SJohn Dyson ki->kaio_active_count = 0; 196a624e84fSJohn Dyson ki->kaio_qallowed_count = max_aio_queue_per_proc; 1972244ea07SJohn Dyson ki->kaio_queue_count = 0; 1982244ea07SJohn Dyson TAILQ_INIT(&ki->kaio_jobdone); 1992244ea07SJohn Dyson TAILQ_INIT(&ki->kaio_jobqueue); 2002244ea07SJohn Dyson } 2012244ea07SJohn Dyson } 2022244ea07SJohn Dyson 2032244ea07SJohn Dyson /* 2042244ea07SJohn Dyson * Free a job entry. Wait for completion if it is currently 2052244ea07SJohn Dyson * active, but don't delay forever. If we delay, we return 2062244ea07SJohn Dyson * a flag that says that we have to restart the queue scan. 2072244ea07SJohn Dyson */ 2082244ea07SJohn Dyson int 2092244ea07SJohn Dyson aio_free_entry(struct aiocblist *aiocbe) { 2102244ea07SJohn Dyson struct kaioinfo *ki; 2112244ea07SJohn Dyson struct aioproclist *aiop; 2122244ea07SJohn Dyson struct proc *p; 2132244ea07SJohn Dyson 2142244ea07SJohn Dyson if (aiocbe->jobstate == JOBST_NULL) 2152244ea07SJohn Dyson panic("aio_free_entry: freeing already free job"); 2162244ea07SJohn Dyson 2172244ea07SJohn Dyson p = aiocbe->userproc; 2182244ea07SJohn Dyson ki = p->p_aioinfo; 2192244ea07SJohn Dyson if (ki == NULL) 2202244ea07SJohn Dyson panic("aio_free_entry: missing p->p_aioinfo"); 2212244ea07SJohn Dyson 2222244ea07SJohn Dyson if (aiocbe->jobstate == JOBST_JOBRUNNING) { 2232244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) 2242244ea07SJohn Dyson return 0; 2252244ea07SJohn Dyson aiocbe->jobflags |= AIOCBLIST_RUNDOWN; 226a624e84fSJohn Dyson tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", 0); 227a624e84fSJohn Dyson /* 2282244ea07SJohn Dyson if (tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", hz*5)) { 2292244ea07SJohn Dyson aiocbe->jobflags |= AIOCBLIST_ASYNCFREE; 2302244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 2312244ea07SJohn Dyson return 1; 2322244ea07SJohn Dyson } 2332244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 234a624e84fSJohn Dyson */ 2352244ea07SJohn Dyson } 2362244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 2372244ea07SJohn Dyson 2382244ea07SJohn Dyson if (ki->kaio_queue_count <= 0) 2392244ea07SJohn Dyson panic("aio_free_entry: process queue size <= 0"); 2402244ea07SJohn Dyson if (num_queue_count <= 0) 2412244ea07SJohn Dyson panic("aio_free_entry: system wide queue size <= 0"); 2422244ea07SJohn Dyson 2432244ea07SJohn Dyson --ki->kaio_queue_count; 2442244ea07SJohn Dyson --num_queue_count; 245a624e84fSJohn Dyson #if DEBUGAIO > 0 246a624e84fSJohn Dyson if (debugaio > 0) 247a624e84fSJohn Dyson printf("freeing entry: %d, %d\n", 248a624e84fSJohn Dyson ki->kaio_queue_count, num_queue_count); 249a624e84fSJohn Dyson #endif 2502244ea07SJohn Dyson 2512244ea07SJohn Dyson if ( aiocbe->jobstate == JOBST_JOBQPROC) { 2522244ea07SJohn Dyson aiop = aiocbe->jobaioproc; 2532244ea07SJohn Dyson TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 2542244ea07SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) { 2552244ea07SJohn Dyson TAILQ_REMOVE(&aio_jobs, aiocbe, list); 2562244ea07SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBFINISHED) { 2572244ea07SJohn Dyson ki = p->p_aioinfo; 2582244ea07SJohn Dyson TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist); 2592244ea07SJohn Dyson } 2602244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 2612244ea07SJohn Dyson aiocbe->jobstate = JOBST_NULL; 2622244ea07SJohn Dyson return 0; 2632244ea07SJohn Dyson } 2642244ea07SJohn Dyson 2652244ea07SJohn Dyson /* 2662244ea07SJohn Dyson * Rundown the jobs for a given process. 2672244ea07SJohn Dyson */ 2682244ea07SJohn Dyson void 2692244ea07SJohn Dyson aio_proc_rundown(struct proc *p) { 2702244ea07SJohn Dyson struct kaioinfo *ki; 2712244ea07SJohn Dyson struct aiocblist *aiocbe, *aiocbn; 2722244ea07SJohn Dyson 2732244ea07SJohn Dyson ki = p->p_aioinfo; 2742244ea07SJohn Dyson if (ki == NULL) 2752244ea07SJohn Dyson return; 2762244ea07SJohn Dyson 277a624e84fSJohn Dyson while (ki->kaio_active_count > 0) { 278a624e84fSJohn Dyson if (tsleep(ki, PRIBIO, "kaiowt", 60 * hz)) 279a624e84fSJohn Dyson break; 280a624e84fSJohn Dyson } 281a624e84fSJohn Dyson 282a624e84fSJohn Dyson #if DEBUGAIO > 0 283a624e84fSJohn Dyson if (debugaio > 0) 284a624e84fSJohn Dyson printf("Proc rundown: %d %d\n", 285a624e84fSJohn Dyson num_queue_count, ki->kaio_queue_count); 286a624e84fSJohn Dyson #endif 287a624e84fSJohn Dyson 2882244ea07SJohn Dyson restart1: 2892244ea07SJohn Dyson for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone); 2902244ea07SJohn Dyson aiocbe; 2912244ea07SJohn Dyson aiocbe = aiocbn) { 2922244ea07SJohn Dyson aiocbn = TAILQ_NEXT(aiocbe, plist); 2932244ea07SJohn Dyson if (aio_free_entry(aiocbe)) 2942244ea07SJohn Dyson goto restart1; 2952244ea07SJohn Dyson } 2962244ea07SJohn Dyson 2972244ea07SJohn Dyson restart2: 2982244ea07SJohn Dyson for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 2992244ea07SJohn Dyson aiocbe; 3002244ea07SJohn Dyson aiocbe = aiocbn) { 3012244ea07SJohn Dyson aiocbn = TAILQ_NEXT(aiocbe, plist); 3022244ea07SJohn Dyson if (aio_free_entry(aiocbe)) 3032244ea07SJohn Dyson goto restart2; 3042244ea07SJohn Dyson } 3052244ea07SJohn Dyson free(ki, M_AIO); 306a624e84fSJohn Dyson p->p_aioinfo = NULL; 3072244ea07SJohn Dyson } 3082244ea07SJohn Dyson 3092244ea07SJohn Dyson /* 3102244ea07SJohn Dyson * Select a job to run (called by an AIO daemon) 3112244ea07SJohn Dyson */ 3122244ea07SJohn Dyson static struct aiocblist * 3132244ea07SJohn Dyson aio_selectjob(struct aioproclist *aiop) { 3142244ea07SJohn Dyson 3152244ea07SJohn Dyson struct aiocblist *aiocbe; 3162244ea07SJohn Dyson 3172244ea07SJohn Dyson aiocbe = TAILQ_FIRST(&aiop->jobtorun); 3182244ea07SJohn Dyson if (aiocbe) { 3192244ea07SJohn Dyson TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 3202244ea07SJohn Dyson return aiocbe; 3212244ea07SJohn Dyson } 3222244ea07SJohn Dyson 3232244ea07SJohn Dyson for (aiocbe = TAILQ_FIRST(&aio_jobs); 3242244ea07SJohn Dyson aiocbe; 3252244ea07SJohn Dyson aiocbe = TAILQ_NEXT(aiocbe, list)) { 3262244ea07SJohn Dyson struct kaioinfo *ki; 3272244ea07SJohn Dyson struct proc *userp; 3282244ea07SJohn Dyson 3292244ea07SJohn Dyson userp = aiocbe->userproc; 3302244ea07SJohn Dyson ki = userp->p_aioinfo; 3312244ea07SJohn Dyson 3322244ea07SJohn Dyson if (ki->kaio_active_count < ki->kaio_maxactive_count) { 3332244ea07SJohn Dyson TAILQ_REMOVE(&aio_jobs, aiocbe, list); 3342244ea07SJohn Dyson return aiocbe; 3352244ea07SJohn Dyson } 3362244ea07SJohn Dyson } 3372244ea07SJohn Dyson 3382244ea07SJohn Dyson return NULL; 3392244ea07SJohn Dyson } 3402244ea07SJohn Dyson 3412244ea07SJohn Dyson /* 3422244ea07SJohn Dyson * The AIO activity proper. 3432244ea07SJohn Dyson */ 3442244ea07SJohn Dyson void 3452244ea07SJohn Dyson aio_process(struct aiocblist *aiocbe) { 3462244ea07SJohn Dyson struct filedesc *fdp; 3472244ea07SJohn Dyson struct proc *userp; 3482244ea07SJohn Dyson struct aiocb *cb; 3492244ea07SJohn Dyson struct file *fp; 3502244ea07SJohn Dyson struct uio auio; 3512244ea07SJohn Dyson struct iovec aiov; 3522244ea07SJohn Dyson unsigned int fd; 3532244ea07SJohn Dyson int cnt; 3542244ea07SJohn Dyson int error; 355a624e84fSJohn Dyson off_t offset; 3562244ea07SJohn Dyson 3572244ea07SJohn Dyson userp = aiocbe->userproc; 3582244ea07SJohn Dyson cb = &aiocbe->uaiocb; 3592244ea07SJohn Dyson 360a624e84fSJohn Dyson #if DEBUGAIO > 0 361a624e84fSJohn Dyson if (debugaio > 1) 362a624e84fSJohn Dyson printf("AIO %s, fd: %d, offset: 0x%x, address: 0x%x, size: %d\n", 363a624e84fSJohn Dyson cb->aio_lio_opcode == LIO_READ?"Read":"Write", 3642244ea07SJohn Dyson cb->aio_fildes, (int) cb->aio_offset, 3652244ea07SJohn Dyson cb->aio_buf, cb->aio_nbytes); 366a624e84fSJohn Dyson #endif 367a624e84fSJohn Dyson #if SLOW 3682244ea07SJohn Dyson tsleep(curproc, PVM, "aioprc", hz); 3692244ea07SJohn Dyson #endif 3702244ea07SJohn Dyson fdp = curproc->p_fd; 3712244ea07SJohn Dyson /* 3722244ea07SJohn Dyson * Range check file descriptor 3732244ea07SJohn Dyson */ 3742244ea07SJohn Dyson fd = cb->aio_fildes; 3752244ea07SJohn Dyson fp = fdp->fd_ofiles[fd]; 3762244ea07SJohn Dyson 3772244ea07SJohn Dyson aiov.iov_base = cb->aio_buf; 3782244ea07SJohn Dyson aiov.iov_len = cb->aio_nbytes; 3792244ea07SJohn Dyson 3802244ea07SJohn Dyson auio.uio_iov = &aiov; 3812244ea07SJohn Dyson auio.uio_iovcnt = 1; 382a624e84fSJohn Dyson auio.uio_offset = offset = cb->aio_offset; 3832244ea07SJohn Dyson auio.uio_resid = cb->aio_nbytes; 3842244ea07SJohn Dyson cnt = cb->aio_nbytes; 3852244ea07SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 3862244ea07SJohn Dyson auio.uio_procp = curproc; 3872244ea07SJohn Dyson 3882244ea07SJohn Dyson if (cb->aio_lio_opcode == LIO_READ) { 3892244ea07SJohn Dyson auio.uio_rw = UIO_READ; 3902244ea07SJohn Dyson error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 3912244ea07SJohn Dyson } else { 3922244ea07SJohn Dyson auio.uio_rw = UIO_WRITE; 3932244ea07SJohn Dyson error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 3942244ea07SJohn Dyson } 3952244ea07SJohn Dyson 3962244ea07SJohn Dyson if (error) { 3972244ea07SJohn Dyson if (auio.uio_resid != cnt) { 3982244ea07SJohn Dyson if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 3992244ea07SJohn Dyson error = 0; 4002244ea07SJohn Dyson if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) 4012244ea07SJohn Dyson psignal(userp, SIGPIPE); 4022244ea07SJohn Dyson } 4032244ea07SJohn Dyson } 404a624e84fSJohn Dyson #if DEBUGAIO > 0 405a624e84fSJohn Dyson if (debugaio > 1) 406a624e84fSJohn Dyson printf("%s complete: error: %d, status: %d, nio: %d, resid: %d, offset: %d\n", 407a624e84fSJohn Dyson cb->aio_lio_opcode == LIO_READ?"Read":"Write", 408a624e84fSJohn Dyson error, cnt, cnt - auio.uio_resid, auio.uio_resid, (int) offset & 0xffffffff); 409a624e84fSJohn Dyson #endif 4102244ea07SJohn Dyson 4112244ea07SJohn Dyson cnt -= auio.uio_resid; 4122244ea07SJohn Dyson cb->_aiocb_private.error = error; 4132244ea07SJohn Dyson cb->_aiocb_private.status = cnt; 4142244ea07SJohn Dyson 4152244ea07SJohn Dyson return; 4162244ea07SJohn Dyson 4172244ea07SJohn Dyson } 4182244ea07SJohn Dyson 4192244ea07SJohn Dyson /* 4202244ea07SJohn Dyson * The AIO daemon. 4212244ea07SJohn Dyson */ 4222244ea07SJohn Dyson static void 4232244ea07SJohn Dyson aio_startproc(void *uproc) 4242244ea07SJohn Dyson { 4252244ea07SJohn Dyson struct aioproclist *aiop; 4262244ea07SJohn Dyson 4272244ea07SJohn Dyson /* 4282244ea07SJohn Dyson * Allocate and ready the aio control info 4292244ea07SJohn Dyson */ 4302244ea07SJohn Dyson aiop = malloc(sizeof *aiop, M_AIO, M_WAITOK); 4312244ea07SJohn Dyson aiop->aioproc = curproc; 4322244ea07SJohn Dyson aiop->aioprocflags |= AIOP_FREE; 4332244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 4342244ea07SJohn Dyson TAILQ_INIT(&aiop->jobtorun); 4352244ea07SJohn Dyson 4362244ea07SJohn Dyson /* 4372244ea07SJohn Dyson * Get rid of current address space 4382244ea07SJohn Dyson */ 4392244ea07SJohn Dyson if (curproc->p_vmspace->vm_refcnt == 1) { 4402244ea07SJohn Dyson if (curproc->p_vmspace->vm_shm) 4412244ea07SJohn Dyson shmexit(curproc); 4422244ea07SJohn Dyson pmap_remove_pages(&curproc->p_vmspace->vm_pmap, 0, USRSTACK); 4432244ea07SJohn Dyson vm_map_remove(&curproc->p_vmspace->vm_map, 0, USRSTACK); 4442244ea07SJohn Dyson } else { 4452244ea07SJohn Dyson vmspace_exec(curproc); 4462244ea07SJohn Dyson } 4472244ea07SJohn Dyson 4482244ea07SJohn Dyson /* 4492244ea07SJohn Dyson * Make up a name for the daemon 4502244ea07SJohn Dyson */ 4512244ea07SJohn Dyson strcpy(curproc->p_comm, "aiodaemon"); 4522244ea07SJohn Dyson 4532244ea07SJohn Dyson /* 4542244ea07SJohn Dyson * Get rid of our current filedescriptors 4552244ea07SJohn Dyson */ 4562244ea07SJohn Dyson fdfree(curproc); 4572244ea07SJohn Dyson curproc->p_fd = NULL; 4582244ea07SJohn Dyson curproc->p_ucred = crcopy(curproc->p_ucred); 4592244ea07SJohn Dyson curproc->p_ucred->cr_uid = 0; 4602244ea07SJohn Dyson curproc->p_ucred->cr_groups[0] = 1; 4612244ea07SJohn Dyson curproc->p_flag |= P_SYSTEM; 4622244ea07SJohn Dyson 463a624e84fSJohn Dyson #if DEBUGAIO > 0 464a624e84fSJohn Dyson if (debugaio > 2) 4652244ea07SJohn Dyson printf("Started new process: %d\n", curproc->p_pid); 4662244ea07SJohn Dyson #endif 467a624e84fSJohn Dyson wakeup(&aio_freeproc); 4682244ea07SJohn Dyson 4692244ea07SJohn Dyson while(1) { 4702244ea07SJohn Dyson struct vmspace *myvm, *tmpvm; 4712244ea07SJohn Dyson struct proc *cp = curproc; 4722244ea07SJohn Dyson struct aiocblist *aiocbe; 4732244ea07SJohn Dyson 4742244ea07SJohn Dyson if ((aiop->aioprocflags & AIOP_FREE) == 0) { 4752244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 4762244ea07SJohn Dyson aiop->aioprocflags |= AIOP_FREE; 4772244ea07SJohn Dyson } 478c4860686SJohn Dyson if (tsleep(cp, PRIBIO, "aiordy", hz*30)) { 479c4860686SJohn Dyson if ((num_aio_procs > target_aio_procs) && 480c4860686SJohn Dyson (TAILQ_FIRST(&aiop->jobtorun) == NULL)) 481c4860686SJohn Dyson exit1(curproc, 0); 482c4860686SJohn Dyson } 483c4860686SJohn Dyson 4842244ea07SJohn Dyson if (aiop->aioprocflags & AIOP_FREE) { 4852244ea07SJohn Dyson TAILQ_REMOVE(&aio_freeproc, aiop, list); 4862244ea07SJohn Dyson TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 4872244ea07SJohn Dyson aiop->aioprocflags &= ~AIOP_FREE; 4882244ea07SJohn Dyson } 4892244ea07SJohn Dyson 4902244ea07SJohn Dyson myvm = curproc->p_vmspace; 4912244ea07SJohn Dyson 4922244ea07SJohn Dyson while ( aiocbe = aio_selectjob(aiop)) { 4932244ea07SJohn Dyson struct aiocb *cb; 4942244ea07SJohn Dyson struct kaioinfo *ki; 4952244ea07SJohn Dyson struct proc *userp; 4962244ea07SJohn Dyson 4972244ea07SJohn Dyson cb = &aiocbe->uaiocb; 4982244ea07SJohn Dyson userp = aiocbe->userproc; 4992244ea07SJohn Dyson ki = userp->p_aioinfo; 5002244ea07SJohn Dyson 5012244ea07SJohn Dyson aiocbe->jobstate = JOBST_JOBRUNNING; 5022244ea07SJohn Dyson if (userp != cp) { 5032244ea07SJohn Dyson tmpvm = curproc->p_vmspace; 5042244ea07SJohn Dyson curproc->p_vmspace = userp->p_vmspace; 5052244ea07SJohn Dyson ++curproc->p_vmspace->vm_refcnt; 5062244ea07SJohn Dyson pmap_activate(curproc); 5072244ea07SJohn Dyson if (tmpvm != myvm) { 5082244ea07SJohn Dyson vmspace_free(tmpvm); 5092244ea07SJohn Dyson } 5102244ea07SJohn Dyson if (curproc->p_fd) 5112244ea07SJohn Dyson fdfree(curproc); 5122244ea07SJohn Dyson curproc->p_fd = fdshare(userp); 5132244ea07SJohn Dyson cp = userp; 5142244ea07SJohn Dyson } 5152244ea07SJohn Dyson 5162244ea07SJohn Dyson ki->kaio_active_count++; 517a624e84fSJohn Dyson #if DEBUGAIO > 0 518a624e84fSJohn Dyson if (debugaio > 0) 519a624e84fSJohn Dyson printf("process: pid: %d(%d), active: %d, queue: %d\n", 520a624e84fSJohn Dyson cb->_aiocb_private.kernelinfo, 521a624e84fSJohn Dyson userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count); 522a624e84fSJohn Dyson #endif 5232244ea07SJohn Dyson aiocbe->jobaioproc = aiop; 5242244ea07SJohn Dyson aio_process(aiocbe); 5252244ea07SJohn Dyson --ki->kaio_active_count; 526a624e84fSJohn Dyson if (ki->kaio_active_count == 0) 527a624e84fSJohn Dyson wakeup(ki); 528a624e84fSJohn Dyson #if DEBUGAIO > 0 529a624e84fSJohn Dyson if (debugaio > 0) 530a624e84fSJohn Dyson printf("DONE process: pid: %d(%d), active: %d, queue: %d\n", 531a624e84fSJohn Dyson cb->_aiocb_private.kernelinfo, 532a624e84fSJohn Dyson userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count); 533a624e84fSJohn Dyson #endif 5342244ea07SJohn Dyson 5352244ea07SJohn Dyson aiocbe->jobstate = JOBST_JOBFINISHED; 5362244ea07SJohn Dyson 5372244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) { 5382244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 5392244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 5402244ea07SJohn Dyson } else { 5412244ea07SJohn Dyson TAILQ_REMOVE(&ki->kaio_jobqueue, 5422244ea07SJohn Dyson aiocbe, plist); 5432244ea07SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_jobdone, 5442244ea07SJohn Dyson aiocbe, plist); 5452244ea07SJohn Dyson } 5462244ea07SJohn Dyson 5472244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) { 5482244ea07SJohn Dyson wakeup(aiocbe); 5492244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 5502244ea07SJohn Dyson } 5512244ea07SJohn Dyson 5522244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_SUSPEND) { 5532244ea07SJohn Dyson wakeup(userp); 5542244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_SUSPEND; 5552244ea07SJohn Dyson } 5562244ea07SJohn Dyson 5572244ea07SJohn Dyson if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 5582244ea07SJohn Dyson psignal(userp, cb->aio_sigevent.sigev_signo); 5592244ea07SJohn Dyson } 5602244ea07SJohn Dyson } 5612244ea07SJohn Dyson 5622244ea07SJohn Dyson if (cp != curproc) { 5632244ea07SJohn Dyson tmpvm = curproc->p_vmspace; 5642244ea07SJohn Dyson curproc->p_vmspace = myvm; 5652244ea07SJohn Dyson pmap_activate(curproc); 5662244ea07SJohn Dyson vmspace_free(tmpvm); 5672244ea07SJohn Dyson if (curproc->p_fd) 5682244ea07SJohn Dyson fdfree(curproc); 5692244ea07SJohn Dyson curproc->p_fd = NULL; 5702244ea07SJohn Dyson cp = curproc; 5712244ea07SJohn Dyson } 5722244ea07SJohn Dyson } 5732244ea07SJohn Dyson } 5742244ea07SJohn Dyson 5752244ea07SJohn Dyson /* 5762244ea07SJohn Dyson * Create a new AIO daemon. 5772244ea07SJohn Dyson */ 5782244ea07SJohn Dyson static int 5792244ea07SJohn Dyson aio_newproc() { 5802244ea07SJohn Dyson int error; 5812244ea07SJohn Dyson struct rfork_args rfa; 5822244ea07SJohn Dyson struct proc *p; 5832244ea07SJohn Dyson 5842244ea07SJohn Dyson rfa.flags = RFMEM | RFPROC | RFCFDG; 5852244ea07SJohn Dyson 586cb226aaaSPoul-Henning Kamp p = curproc; 587cb226aaaSPoul-Henning Kamp if (error = rfork(p, &rfa)) 5882244ea07SJohn Dyson return error; 589cb226aaaSPoul-Henning Kamp cpu_set_fork_handler(p = pfind(p->p_retval[0]), aio_startproc, curproc); 5902244ea07SJohn Dyson 591a624e84fSJohn Dyson #if DEBUGAIO > 0 592a624e84fSJohn Dyson if (debugaio > 2) 5932244ea07SJohn Dyson printf("Waiting for new process: %d, count: %d\n", 5942244ea07SJohn Dyson curproc->p_pid, num_aio_procs); 5952244ea07SJohn Dyson #endif 5962244ea07SJohn Dyson 597a624e84fSJohn Dyson error = tsleep(&aio_freeproc, PZERO, "aiosta", 5*hz); 5982244ea07SJohn Dyson ++num_aio_procs; 5992244ea07SJohn Dyson 6002244ea07SJohn Dyson return error; 6012244ea07SJohn Dyson 6022244ea07SJohn Dyson } 6032244ea07SJohn Dyson 6042244ea07SJohn Dyson /* 6052244ea07SJohn Dyson * Queue a new AIO request. 6062244ea07SJohn Dyson */ 6072244ea07SJohn Dyson static int 6082244ea07SJohn Dyson _aio_aqueue(struct proc *p, struct aiocb *job, int type) { 6092244ea07SJohn Dyson struct filedesc *fdp; 6102244ea07SJohn Dyson struct file *fp; 6112244ea07SJohn Dyson unsigned int fd; 6122244ea07SJohn Dyson 6132244ea07SJohn Dyson int error; 6142244ea07SJohn Dyson int opcode; 6152244ea07SJohn Dyson struct aiocblist *aiocbe; 6162244ea07SJohn Dyson struct aioproclist *aiop; 6172244ea07SJohn Dyson struct kaioinfo *ki; 6182244ea07SJohn Dyson 6192244ea07SJohn Dyson if (aiocbe = TAILQ_FIRST(&aio_freejobs)) { 6202244ea07SJohn Dyson TAILQ_REMOVE(&aio_freejobs, aiocbe, list); 6212244ea07SJohn Dyson } else { 6222244ea07SJohn Dyson aiocbe = malloc (sizeof *aiocbe, M_AIO, M_WAITOK); 6232244ea07SJohn Dyson } 6242244ea07SJohn Dyson 6252244ea07SJohn Dyson error = copyin((caddr_t)job, 6262244ea07SJohn Dyson (caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb); 6272244ea07SJohn Dyson if (error) { 628a624e84fSJohn Dyson #if DEBUGAIO > 0 629a624e84fSJohn Dyson if (debugaio > 0) 630a624e84fSJohn Dyson printf("aio_aqueue: Copyin error: %d\n", error); 631a624e84fSJohn Dyson #endif 6322244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 6332244ea07SJohn Dyson return error; 6342244ea07SJohn Dyson } 6352244ea07SJohn Dyson 636a624e84fSJohn Dyson /* 637a624e84fSJohn Dyson * Get the opcode 638a624e84fSJohn Dyson */ 639a624e84fSJohn Dyson if (type != LIO_NOP) { 640a624e84fSJohn Dyson aiocbe->uaiocb.aio_lio_opcode = type; 641a624e84fSJohn Dyson } 642a624e84fSJohn Dyson opcode = aiocbe->uaiocb.aio_lio_opcode; 6432244ea07SJohn Dyson 6442244ea07SJohn Dyson /* 6452244ea07SJohn Dyson * Get the fd info for process 6462244ea07SJohn Dyson */ 6472244ea07SJohn Dyson fdp = p->p_fd; 6482244ea07SJohn Dyson 6492244ea07SJohn Dyson /* 6502244ea07SJohn Dyson * Range check file descriptor 6512244ea07SJohn Dyson */ 6522244ea07SJohn Dyson fd = aiocbe->uaiocb.aio_fildes; 6532244ea07SJohn Dyson if (fd >= fdp->fd_nfiles) { 6542244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 6552244ea07SJohn Dyson if (type == 0) { 656a624e84fSJohn Dyson #if DEBUGAIO > 0 657a624e84fSJohn Dyson if (debugaio > 0) 658a624e84fSJohn Dyson printf("aio_aqueue: Null type\n"); 659a624e84fSJohn Dyson #endif 6602244ea07SJohn Dyson suword(&job->_aiocb_private.status, -1); 6612244ea07SJohn Dyson suword(&job->_aiocb_private.error, EBADF); 6622244ea07SJohn Dyson } 6632244ea07SJohn Dyson return EBADF; 6642244ea07SJohn Dyson } 6652244ea07SJohn Dyson 666c4860686SJohn Dyson #if DEBUGAIO > 0 667c4860686SJohn Dyson if (debugaio > 3) 668c4860686SJohn Dyson printf("aio_aqueue: fd: %d, cmd: %d, buf: %d, cnt: %d, fileoffset: %d\n", 669c4860686SJohn Dyson aiocbe->uaiocb.aio_fildes, 670c4860686SJohn Dyson aiocbe->uaiocb.aio_lio_opcode, 671c4860686SJohn Dyson (int) aiocbe->uaiocb.aio_buf & 0xffffffff, 672c4860686SJohn Dyson aiocbe->uaiocb.aio_nbytes, 673c4860686SJohn Dyson (int) aiocbe->uaiocb.aio_offset & 0xffffffff); 674c4860686SJohn Dyson #endif 675c4860686SJohn Dyson 676c4860686SJohn Dyson 6772244ea07SJohn Dyson fp = fdp->fd_ofiles[fd]; 678a624e84fSJohn Dyson if ((fp == NULL) || 679a624e84fSJohn Dyson ((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 0))) { 6802244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 6812244ea07SJohn Dyson if (type == 0) { 6822244ea07SJohn Dyson suword(&job->_aiocb_private.status, -1); 6832244ea07SJohn Dyson suword(&job->_aiocb_private.error, EBADF); 6842244ea07SJohn Dyson } 685a624e84fSJohn Dyson #if DEBUGAIO > 0 686a624e84fSJohn Dyson if (debugaio > 0) 687a624e84fSJohn Dyson printf("aio_aqueue: Bad file descriptor\n"); 688a624e84fSJohn Dyson #endif 6892244ea07SJohn Dyson return EBADF; 6902244ea07SJohn Dyson } 6912244ea07SJohn Dyson 6922244ea07SJohn Dyson if (aiocbe->uaiocb.aio_offset == -1LL) { 6932244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 6942244ea07SJohn Dyson if (type == 0) { 6952244ea07SJohn Dyson suword(&job->_aiocb_private.status, -1); 6962244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 6972244ea07SJohn Dyson } 698a624e84fSJohn Dyson #if DEBUGAIO > 0 699a624e84fSJohn Dyson if (debugaio > 0) 700a624e84fSJohn Dyson printf("aio_aqueue: bad offset\n"); 701a624e84fSJohn Dyson #endif 7022244ea07SJohn Dyson return EINVAL; 7032244ea07SJohn Dyson } 7042244ea07SJohn Dyson 705a624e84fSJohn Dyson #if DEBUGAIO > 0 706a624e84fSJohn Dyson if (debugaio > 2) 7072244ea07SJohn Dyson printf("job addr: 0x%x, 0x%x, %d\n", job, &job->_aiocb_private.kernelinfo, jobrefid); 7082244ea07SJohn Dyson #endif 7092244ea07SJohn Dyson 7102244ea07SJohn Dyson error = suword(&job->_aiocb_private.kernelinfo, jobrefid); 7112244ea07SJohn Dyson if (error) { 7122244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 7132244ea07SJohn Dyson if (type == 0) { 7142244ea07SJohn Dyson suword(&job->_aiocb_private.status, -1); 7152244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 7162244ea07SJohn Dyson } 717a624e84fSJohn Dyson #if DEBUGAIO > 0 718a624e84fSJohn Dyson if (debugaio > 0) 719a624e84fSJohn Dyson printf("aio_aqueue: fetch of kernelinfo from user space\n"); 720a624e84fSJohn Dyson #endif 7212244ea07SJohn Dyson return error; 7222244ea07SJohn Dyson } 7232244ea07SJohn Dyson 7242244ea07SJohn Dyson aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)jobrefid; 725a624e84fSJohn Dyson #if DEBUGAIO > 0 726a624e84fSJohn Dyson if (debugaio > 2) 7272244ea07SJohn Dyson printf("aio_aqueue: New job: %d... ", jobrefid); 7282244ea07SJohn Dyson #endif 7292244ea07SJohn Dyson ++jobrefid; 7302244ea07SJohn Dyson 7312244ea07SJohn Dyson if (opcode == LIO_NOP) { 7322244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 7332244ea07SJohn Dyson if (type == 0) { 7342244ea07SJohn Dyson suword(&job->_aiocb_private.status, -1); 7352244ea07SJohn Dyson suword(&job->_aiocb_private.error, 0); 7362244ea07SJohn Dyson } 7372244ea07SJohn Dyson return 0; 7382244ea07SJohn Dyson } 7392244ea07SJohn Dyson 7402244ea07SJohn Dyson if ((opcode != LIO_NOP) && 7412244ea07SJohn Dyson (opcode != LIO_READ) && (opcode != LIO_WRITE)) { 7422244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 7432244ea07SJohn Dyson if (type == 0) { 7442244ea07SJohn Dyson suword(&job->_aiocb_private.status, -1); 7452244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 7462244ea07SJohn Dyson } 747a624e84fSJohn Dyson #if DEBUGAIO > 0 748a624e84fSJohn Dyson if (debugaio > 0) 749a624e84fSJohn Dyson printf("aio_aqueue: invalid LIO op: %d\n", opcode); 750a624e84fSJohn Dyson #endif 7512244ea07SJohn Dyson return EINVAL; 7522244ea07SJohn Dyson } 7532244ea07SJohn Dyson 7542244ea07SJohn Dyson suword(&job->_aiocb_private.error, 0); 7552244ea07SJohn Dyson suword(&job->_aiocb_private.status, 0); 7562244ea07SJohn Dyson aiocbe->userproc = p; 7572244ea07SJohn Dyson aiocbe->jobflags = 0; 7582244ea07SJohn Dyson ki = p->p_aioinfo; 7592244ea07SJohn Dyson ++num_queue_count; 7602244ea07SJohn Dyson ++ki->kaio_queue_count; 7612244ea07SJohn Dyson 7622244ea07SJohn Dyson retryproc: 7632244ea07SJohn Dyson if (aiop = TAILQ_FIRST(&aio_freeproc)) { 764a624e84fSJohn Dyson #if DEBUGAIO > 0 765a624e84fSJohn Dyson if (debugaio > 0) 7662244ea07SJohn Dyson printf("found a free AIO process\n"); 7672244ea07SJohn Dyson #endif 7682244ea07SJohn Dyson TAILQ_REMOVE(&aio_freeproc, aiop, list); 7692244ea07SJohn Dyson TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 7702244ea07SJohn Dyson aiop->aioprocflags &= ~AIOP_FREE; 7712244ea07SJohn Dyson TAILQ_INSERT_TAIL(&aiop->jobtorun, aiocbe, list); 7722244ea07SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 7732244ea07SJohn Dyson aiocbe->jobstate = JOBST_JOBQPROC; 774a624e84fSJohn Dyson 7752244ea07SJohn Dyson aiocbe->jobaioproc = aiop; 7762244ea07SJohn Dyson wakeup(aiop->aioproc); 7772244ea07SJohn Dyson } else if ((num_aio_procs < max_aio_procs) && 7782244ea07SJohn Dyson (ki->kaio_active_count < ki->kaio_maxactive_count)) { 779a624e84fSJohn Dyson #if DEBUGAIO > 0 780a624e84fSJohn Dyson if (debugaio > 1) { 781a624e84fSJohn Dyson printf("aio_aqueue: starting new proc: num_aio_procs(%d), max_aio_procs(%d)\n", num_aio_procs, max_aio_procs); 782a624e84fSJohn Dyson printf(" ki->kaio_active_count(%d), ki->kaio_maxactive_count(%d)\n", ki->kaio_active_count, ki->kaio_maxactive_count); 783a624e84fSJohn Dyson } 784a624e84fSJohn Dyson #endif 7852244ea07SJohn Dyson if (error = aio_newproc()) { 786a624e84fSJohn Dyson #if DEBUGAIO > 0 787a624e84fSJohn Dyson if (debugaio > 0) 7882244ea07SJohn Dyson printf("aio_aqueue: problem sleeping for starting proc: %d\n", 7892244ea07SJohn Dyson error); 7902244ea07SJohn Dyson #endif 7912244ea07SJohn Dyson } 7922244ea07SJohn Dyson goto retryproc; 7932244ea07SJohn Dyson } else { 794a624e84fSJohn Dyson #if DEBUGAIO > 0 795a624e84fSJohn Dyson if (debugaio > 0) 7962244ea07SJohn Dyson printf("queuing to global queue\n"); 7972244ea07SJohn Dyson #endif 7982244ea07SJohn Dyson TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); 7992244ea07SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 8002244ea07SJohn Dyson aiocbe->jobstate = JOBST_JOBQGLOBAL; 8012244ea07SJohn Dyson } 8022244ea07SJohn Dyson 8032244ea07SJohn Dyson return 0; 8042244ea07SJohn Dyson } 8052244ea07SJohn Dyson 8062244ea07SJohn Dyson static int 8072244ea07SJohn Dyson aio_aqueue(struct proc *p, struct aiocb *job, int type) { 8082244ea07SJohn Dyson struct kaioinfo *ki; 8092244ea07SJohn Dyson 8102244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 8112244ea07SJohn Dyson aio_init_aioinfo(p); 8122244ea07SJohn Dyson } 8132244ea07SJohn Dyson 8142244ea07SJohn Dyson if (num_queue_count >= max_queue_count) 8152244ea07SJohn Dyson return EAGAIN; 8162244ea07SJohn Dyson 8172244ea07SJohn Dyson ki = p->p_aioinfo; 8182244ea07SJohn Dyson if (ki->kaio_queue_count >= ki->kaio_qallowed_count) 8192244ea07SJohn Dyson return EAGAIN; 8202244ea07SJohn Dyson 8212244ea07SJohn Dyson return _aio_aqueue(p, job, type); 8222244ea07SJohn Dyson } 8232244ea07SJohn Dyson 8242244ea07SJohn Dyson /* 8252244ea07SJohn Dyson * Support the aio_return system call 8262244ea07SJohn Dyson */ 8272244ea07SJohn Dyson int 828cb226aaaSPoul-Henning Kamp aio_return(struct proc *p, struct aio_return_args *uap) { 8292244ea07SJohn Dyson int jobref, status; 8302244ea07SJohn Dyson struct aiocblist *cb; 8312244ea07SJohn Dyson struct kaioinfo *ki; 8322244ea07SJohn Dyson 8332244ea07SJohn Dyson ki = p->p_aioinfo; 8342244ea07SJohn Dyson if (ki == NULL) { 8352244ea07SJohn Dyson return EINVAL; 8362244ea07SJohn Dyson } 8372244ea07SJohn Dyson 8382244ea07SJohn Dyson jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 8392244ea07SJohn Dyson if (jobref == -1) 8402244ea07SJohn Dyson return EINVAL; 8412244ea07SJohn Dyson 842a624e84fSJohn Dyson #if DEBUGAIO > 0 843a624e84fSJohn Dyson if (debugaio > 0) 844a624e84fSJohn Dyson printf("aio_return: jobref: %d\n", jobref); 845a624e84fSJohn Dyson #endif 846a624e84fSJohn Dyson 8472244ea07SJohn Dyson 8482244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 8492244ea07SJohn Dyson cb; 8502244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 8512244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 852cb226aaaSPoul-Henning Kamp p->p_retval[0] = cb->uaiocb._aiocb_private.status; 8532244ea07SJohn Dyson aio_free_entry(cb); 8542244ea07SJohn Dyson return 0; 8552244ea07SJohn Dyson } 8562244ea07SJohn Dyson } 8572244ea07SJohn Dyson 8582244ea07SJohn Dyson status = fuword(&uap->aiocbp->_aiocb_private.status); 8592244ea07SJohn Dyson if (status == -1) 8602244ea07SJohn Dyson return 0; 8612244ea07SJohn Dyson 8622244ea07SJohn Dyson return (EINVAL); 8632244ea07SJohn Dyson } 8642244ea07SJohn Dyson 8652244ea07SJohn Dyson /* 8662244ea07SJohn Dyson * Rundown the jobs for a given process. 8672244ea07SJohn Dyson */ 8682244ea07SJohn Dyson void 8692244ea07SJohn Dyson aio_marksuspend(struct proc *p, int njobs, int *joblist, int set) { 8702244ea07SJohn Dyson struct aiocblist *aiocbe; 8712244ea07SJohn Dyson struct kaioinfo *ki; 8722244ea07SJohn Dyson 8732244ea07SJohn Dyson ki = p->p_aioinfo; 8742244ea07SJohn Dyson if (ki == NULL) 8752244ea07SJohn Dyson return; 8762244ea07SJohn Dyson 8772244ea07SJohn Dyson for (aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 8782244ea07SJohn Dyson aiocbe; 8792244ea07SJohn Dyson aiocbe = TAILQ_NEXT(aiocbe, plist)) { 8802244ea07SJohn Dyson 8812244ea07SJohn Dyson if (njobs) { 8822244ea07SJohn Dyson 8832244ea07SJohn Dyson int i; 8842244ea07SJohn Dyson 8852244ea07SJohn Dyson for(i = 0; i < njobs; i++) { 8862244ea07SJohn Dyson if (((int) aiocbe->uaiocb._aiocb_private.kernelinfo) == joblist[i]) 8872244ea07SJohn Dyson break; 8882244ea07SJohn Dyson } 8892244ea07SJohn Dyson 8902244ea07SJohn Dyson if (i == njobs) 8912244ea07SJohn Dyson continue; 8922244ea07SJohn Dyson } 8932244ea07SJohn Dyson 8942244ea07SJohn Dyson if (set) 8952244ea07SJohn Dyson aiocbe->jobflags |= AIOCBLIST_SUSPEND; 8962244ea07SJohn Dyson else 8972244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_SUSPEND; 8982244ea07SJohn Dyson } 8992244ea07SJohn Dyson } 9002244ea07SJohn Dyson 9012244ea07SJohn Dyson /* 9022244ea07SJohn Dyson * Allow a process to wakeup when any of the I/O requests are 9032244ea07SJohn Dyson * completed. 9042244ea07SJohn Dyson */ 9052244ea07SJohn Dyson int 906cb226aaaSPoul-Henning Kamp aio_suspend(struct proc *p, struct aio_suspend_args *uap) { 9074a11ca4eSPoul-Henning Kamp struct timeval atv; 9082244ea07SJohn Dyson struct timespec ts; 9092244ea07SJohn Dyson struct aiocb *const *cbptr, *cbp; 9102244ea07SJohn Dyson struct kaioinfo *ki; 9112244ea07SJohn Dyson struct aiocblist *cb; 9122244ea07SJohn Dyson int i; 9132244ea07SJohn Dyson int error, s, timo; 9142244ea07SJohn Dyson int *joblist; 9152244ea07SJohn Dyson 9162244ea07SJohn Dyson 9172244ea07SJohn Dyson timo = 0; 9182244ea07SJohn Dyson if (uap->timeout) { 9192244ea07SJohn Dyson /* 9202244ea07SJohn Dyson * Get timespec struct 9212244ea07SJohn Dyson */ 9222244ea07SJohn Dyson if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) { 9232244ea07SJohn Dyson return error; 9242244ea07SJohn Dyson } 9252244ea07SJohn Dyson 9262244ea07SJohn Dyson if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) 9272244ea07SJohn Dyson return (EINVAL); 9282244ea07SJohn Dyson 9292244ea07SJohn Dyson TIMESPEC_TO_TIMEVAL(&atv, &ts) 9302244ea07SJohn Dyson if (itimerfix(&atv)) 9312244ea07SJohn Dyson return (EINVAL); 9322244ea07SJohn Dyson /* 9332244ea07SJohn Dyson * XXX this is not as careful as settimeofday() about minimising 9342244ea07SJohn Dyson * interrupt latency. The hzto() interface is inconvenient as usual. 9352244ea07SJohn Dyson */ 9362244ea07SJohn Dyson s = splclock(); 9372244ea07SJohn Dyson timevaladd(&atv, &time); 9382244ea07SJohn Dyson timo = hzto(&atv); 9392244ea07SJohn Dyson splx(s); 9402244ea07SJohn Dyson if (timo == 0) 9412244ea07SJohn Dyson timo = 1; 9422244ea07SJohn Dyson } 9432244ea07SJohn Dyson 9442244ea07SJohn Dyson ki = p->p_aioinfo; 9452244ea07SJohn Dyson if (ki == NULL) 9462244ea07SJohn Dyson return EAGAIN; 9472244ea07SJohn Dyson 9482244ea07SJohn Dyson joblist = malloc(uap->nent * sizeof(int), M_TEMP, M_WAITOK); 9492244ea07SJohn Dyson cbptr = uap->aiocbp; 9502244ea07SJohn Dyson 9512244ea07SJohn Dyson for(i=0;i<uap->nent;i++) { 9522244ea07SJohn Dyson cbp = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 953a624e84fSJohn Dyson #if DEBUGAIO > 1 954a624e84fSJohn Dyson if (debugaio > 2) 9552244ea07SJohn Dyson printf("cbp: %x\n", cbp); 9562244ea07SJohn Dyson #endif 9572244ea07SJohn Dyson joblist[i] = fuword(&cbp->_aiocb_private.kernelinfo); 9582244ea07SJohn Dyson cbptr++; 9592244ea07SJohn Dyson } 9602244ea07SJohn Dyson 9612244ea07SJohn Dyson 9622244ea07SJohn Dyson while (1) { 9632244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 9642244ea07SJohn Dyson cb; 9652244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 9662244ea07SJohn Dyson for(i=0;i<uap->nent;i++) { 9672244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == joblist[i]) { 9682244ea07SJohn Dyson free(joblist, M_TEMP); 9692244ea07SJohn Dyson return 0; 9702244ea07SJohn Dyson } 9712244ea07SJohn Dyson } 9722244ea07SJohn Dyson } 9732244ea07SJohn Dyson 974a624e84fSJohn Dyson #if DEBUGAIO > 0 975a624e84fSJohn Dyson if (debugaio > 0) { 976a624e84fSJohn Dyson printf("Suspend, timeout: %d clocks, jobs:", timo); 977a624e84fSJohn Dyson for(i=0;i<uap->nent;i++) 978a624e84fSJohn Dyson printf(" %d", joblist[i]); 979a624e84fSJohn Dyson printf("\n"); 980a624e84fSJohn Dyson } 981a624e84fSJohn Dyson #endif 982a624e84fSJohn Dyson 9832244ea07SJohn Dyson aio_marksuspend(p, uap->nent, joblist, 1); 984a624e84fSJohn Dyson #if DEBUGAIO > 0 985a624e84fSJohn Dyson if (debugaio > 2) { 9862244ea07SJohn Dyson printf("Suspending -- waiting for all I/O's to complete: "); 9872244ea07SJohn Dyson for(i=0;i<uap->nent;i++) 9882244ea07SJohn Dyson printf(" %d", joblist[i]); 9892244ea07SJohn Dyson printf("\n"); 990a624e84fSJohn Dyson } 9912244ea07SJohn Dyson #endif 9922244ea07SJohn Dyson error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo); 9932244ea07SJohn Dyson aio_marksuspend(p, uap->nent, joblist, 0); 9942244ea07SJohn Dyson 9952244ea07SJohn Dyson if (error == EINTR) { 996a624e84fSJohn Dyson #if DEBUGAIO > 0 997a624e84fSJohn Dyson if (debugaio > 2) 9982244ea07SJohn Dyson printf(" signal\n"); 9992244ea07SJohn Dyson #endif 10002244ea07SJohn Dyson free(joblist, M_TEMP); 10012244ea07SJohn Dyson return EINTR; 10022244ea07SJohn Dyson } else if (error == EWOULDBLOCK) { 1003a624e84fSJohn Dyson #if DEBUGAIO > 0 1004a624e84fSJohn Dyson if (debugaio > 2) 10052244ea07SJohn Dyson printf(" timeout\n"); 10062244ea07SJohn Dyson #endif 10072244ea07SJohn Dyson free(joblist, M_TEMP); 10082244ea07SJohn Dyson return EAGAIN; 10092244ea07SJohn Dyson } 1010a624e84fSJohn Dyson #if DEBUGAIO > 0 1011a624e84fSJohn Dyson if (debugaio > 2) 10122244ea07SJohn Dyson printf("\n"); 10132244ea07SJohn Dyson #endif 10142244ea07SJohn Dyson } 10152244ea07SJohn Dyson 10162244ea07SJohn Dyson /* NOTREACHED */ 10172244ea07SJohn Dyson return EINVAL; 10182244ea07SJohn Dyson } 1019ee877a35SJohn Dyson 1020ee877a35SJohn Dyson /* 1021ee877a35SJohn Dyson * aio_cancel at the kernel level is a NOOP right now. It 1022ee877a35SJohn Dyson * might be possible to support it partially in user mode, or 1023ee877a35SJohn Dyson * in kernel mode later on. 1024ee877a35SJohn Dyson */ 1025ee877a35SJohn Dyson int 1026cb226aaaSPoul-Henning Kamp aio_cancel(struct proc *p, struct aio_cancel_args *uap) { 1027ee877a35SJohn Dyson return AIO_NOTCANCELLED; 1028ee877a35SJohn Dyson } 1029ee877a35SJohn Dyson 1030ee877a35SJohn Dyson /* 1031ee877a35SJohn Dyson * aio_error is implemented in the kernel level for compatibility 1032ee877a35SJohn Dyson * purposes only. For a user mode async implementation, it would be 1033ee877a35SJohn Dyson * best to do it in a userland subroutine. 1034ee877a35SJohn Dyson */ 1035ee877a35SJohn Dyson int 1036cb226aaaSPoul-Henning Kamp aio_error(struct proc *p, struct aio_error_args *uap) { 10372244ea07SJohn Dyson struct aiocblist *cb; 10382244ea07SJohn Dyson struct kaioinfo *ki; 10392244ea07SJohn Dyson int jobref; 10404a11ca4eSPoul-Henning Kamp int status; 1041ee877a35SJohn Dyson 10422244ea07SJohn Dyson ki = p->p_aioinfo; 10432244ea07SJohn Dyson if (ki == NULL) 10442244ea07SJohn Dyson return EINVAL; 10452244ea07SJohn Dyson 10462244ea07SJohn Dyson jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 10472244ea07SJohn Dyson if (jobref == -1) 1048ee877a35SJohn Dyson return EFAULT; 1049ee877a35SJohn Dyson 10502244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 10512244ea07SJohn Dyson cb; 10522244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 10532244ea07SJohn Dyson 10542244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1055cb226aaaSPoul-Henning Kamp p->p_retval[0] = cb->uaiocb._aiocb_private.error; 10562244ea07SJohn Dyson return 0; 10572244ea07SJohn Dyson } 1058ee877a35SJohn Dyson } 1059ee877a35SJohn Dyson 10602244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobqueue); 10612244ea07SJohn Dyson cb; 10622244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 10632244ea07SJohn Dyson 10642244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1065cb226aaaSPoul-Henning Kamp p->p_retval[0] = EINPROGRESS; 10662244ea07SJohn Dyson return 0; 10672244ea07SJohn Dyson } 10682244ea07SJohn Dyson } 10692244ea07SJohn Dyson 10702244ea07SJohn Dyson /* 10712244ea07SJohn Dyson * Hack for lio 10722244ea07SJohn Dyson */ 10732244ea07SJohn Dyson status = fuword(&uap->aiocbp->_aiocb_private.status); 10742244ea07SJohn Dyson if (status == -1) { 10752244ea07SJohn Dyson return fuword(&uap->aiocbp->_aiocb_private.error); 10762244ea07SJohn Dyson } 10772244ea07SJohn Dyson return EINVAL; 1078ee877a35SJohn Dyson } 1079ee877a35SJohn Dyson 1080ee877a35SJohn Dyson int 1081cb226aaaSPoul-Henning Kamp aio_read(struct proc *p, struct aio_read_args *uap) { 1082ee877a35SJohn Dyson struct filedesc *fdp; 1083ee877a35SJohn Dyson struct file *fp; 1084ee877a35SJohn Dyson struct uio auio; 1085ee877a35SJohn Dyson struct iovec aiov; 1086ee877a35SJohn Dyson unsigned int fd; 1087ee877a35SJohn Dyson int cnt; 1088ee877a35SJohn Dyson struct aiocb iocb; 10892244ea07SJohn Dyson int error, pmodes; 1090ee877a35SJohn Dyson 10912244ea07SJohn Dyson pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 10922244ea07SJohn Dyson if ((pmodes & AIO_PMODE_SYNC) == 0) { 1093a624e84fSJohn Dyson #if DEBUGAIO > 1 1094a624e84fSJohn Dyson if (debugaio > 2) 1095a624e84fSJohn Dyson printf("queueing aio_read\n"); 1096a624e84fSJohn Dyson #endif 10972244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 10982244ea07SJohn Dyson } 1099ee877a35SJohn Dyson 1100ee877a35SJohn Dyson /* 1101ee877a35SJohn Dyson * Get control block 1102ee877a35SJohn Dyson */ 1103ee877a35SJohn Dyson if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1104ee877a35SJohn Dyson return error; 1105ee877a35SJohn Dyson 1106ee877a35SJohn Dyson /* 1107ee877a35SJohn Dyson * Get the fd info for process 1108ee877a35SJohn Dyson */ 1109ee877a35SJohn Dyson fdp = p->p_fd; 1110ee877a35SJohn Dyson 1111ee877a35SJohn Dyson /* 1112ee877a35SJohn Dyson * Range check file descriptor 1113ee877a35SJohn Dyson */ 1114ee877a35SJohn Dyson fd = iocb.aio_fildes; 1115ee877a35SJohn Dyson if (fd >= fdp->fd_nfiles) 1116ee877a35SJohn Dyson return EBADF; 1117ee877a35SJohn Dyson fp = fdp->fd_ofiles[fd]; 1118ee877a35SJohn Dyson if ((fp == NULL) || ((fp->f_flag & FREAD) == 0)) 1119ee877a35SJohn Dyson return EBADF; 11202244ea07SJohn Dyson if (iocb.aio_offset == -1LL) 1121ee877a35SJohn Dyson return EINVAL; 1122ee877a35SJohn Dyson 1123ee877a35SJohn Dyson auio.uio_resid = iocb.aio_nbytes; 1124ee877a35SJohn Dyson if (auio.uio_resid < 0) 1125ee877a35SJohn Dyson return (EINVAL); 1126ee877a35SJohn Dyson 11272244ea07SJohn Dyson /* 11282244ea07SJohn Dyson * Process sync simply -- queue async request. 11292244ea07SJohn Dyson */ 11302244ea07SJohn Dyson if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) { 11312244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 11322244ea07SJohn Dyson } 11332244ea07SJohn Dyson 11342244ea07SJohn Dyson aiov.iov_base = iocb.aio_buf; 11352244ea07SJohn Dyson aiov.iov_len = iocb.aio_nbytes; 11362244ea07SJohn Dyson 11372244ea07SJohn Dyson auio.uio_iov = &aiov; 11382244ea07SJohn Dyson auio.uio_iovcnt = 1; 11392244ea07SJohn Dyson auio.uio_offset = iocb.aio_offset; 1140ee877a35SJohn Dyson auio.uio_rw = UIO_READ; 1141ee877a35SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 1142ee877a35SJohn Dyson auio.uio_procp = p; 1143ee877a35SJohn Dyson 1144ee877a35SJohn Dyson cnt = iocb.aio_nbytes; 1145ee877a35SJohn Dyson error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 1146ee877a35SJohn Dyson if (error && 1147ee877a35SJohn Dyson (auio.uio_resid != cnt) && 1148ee877a35SJohn Dyson (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) 1149ee877a35SJohn Dyson error = 0; 1150ee877a35SJohn Dyson cnt -= auio.uio_resid; 1151cb226aaaSPoul-Henning Kamp p->p_retval[0] = cnt; 1152ee877a35SJohn Dyson return error; 1153ee877a35SJohn Dyson } 1154ee877a35SJohn Dyson 1155ee877a35SJohn Dyson int 1156cb226aaaSPoul-Henning Kamp aio_write(struct proc *p, struct aio_write_args *uap) { 1157ee877a35SJohn Dyson struct filedesc *fdp; 1158ee877a35SJohn Dyson struct file *fp; 1159ee877a35SJohn Dyson struct uio auio; 1160ee877a35SJohn Dyson struct iovec aiov; 1161ee877a35SJohn Dyson unsigned int fd; 1162ee877a35SJohn Dyson int cnt; 1163ee877a35SJohn Dyson struct aiocb iocb; 1164ee877a35SJohn Dyson int error; 11652244ea07SJohn Dyson int pmodes; 11662244ea07SJohn Dyson 11672244ea07SJohn Dyson /* 11682244ea07SJohn Dyson * Process sync simply -- queue async request. 11692244ea07SJohn Dyson */ 11702244ea07SJohn Dyson pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 11712244ea07SJohn Dyson if ((pmodes & AIO_PMODE_SYNC) == 0) { 1172a624e84fSJohn Dyson #if DEBUGAIO > 1 1173a624e84fSJohn Dyson if (debugaio > 2) 1174a624e84fSJohn Dyson printf("queing aio_write\n"); 1175a624e84fSJohn Dyson #endif 11762244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE); 11772244ea07SJohn Dyson } 1178ee877a35SJohn Dyson 1179ee877a35SJohn Dyson if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1180ee877a35SJohn Dyson return error; 1181ee877a35SJohn Dyson 1182ee877a35SJohn Dyson /* 1183ee877a35SJohn Dyson * Get the fd info for process 1184ee877a35SJohn Dyson */ 1185ee877a35SJohn Dyson fdp = p->p_fd; 1186ee877a35SJohn Dyson 1187ee877a35SJohn Dyson /* 1188ee877a35SJohn Dyson * Range check file descriptor 1189ee877a35SJohn Dyson */ 1190ee877a35SJohn Dyson fd = iocb.aio_fildes; 1191ee877a35SJohn Dyson if (fd >= fdp->fd_nfiles) 1192ee877a35SJohn Dyson return EBADF; 1193ee877a35SJohn Dyson fp = fdp->fd_ofiles[fd]; 1194ee877a35SJohn Dyson if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0)) 1195ee877a35SJohn Dyson return EBADF; 11962244ea07SJohn Dyson if (iocb.aio_offset == -1LL) 1197ee877a35SJohn Dyson return EINVAL; 1198ee877a35SJohn Dyson 1199ee877a35SJohn Dyson aiov.iov_base = iocb.aio_buf; 1200ee877a35SJohn Dyson aiov.iov_len = iocb.aio_nbytes; 1201ee877a35SJohn Dyson auio.uio_iov = &aiov; 1202ee877a35SJohn Dyson auio.uio_iovcnt = 1; 1203ee877a35SJohn Dyson auio.uio_offset = iocb.aio_offset; 1204ee877a35SJohn Dyson 1205ee877a35SJohn Dyson auio.uio_resid = iocb.aio_nbytes; 1206ee877a35SJohn Dyson if (auio.uio_resid < 0) 1207ee877a35SJohn Dyson return (EINVAL); 1208ee877a35SJohn Dyson 1209ee877a35SJohn Dyson auio.uio_rw = UIO_WRITE; 1210ee877a35SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 1211ee877a35SJohn Dyson auio.uio_procp = p; 1212ee877a35SJohn Dyson 1213ee877a35SJohn Dyson cnt = iocb.aio_nbytes; 1214ee877a35SJohn Dyson error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 1215ee877a35SJohn Dyson if (error) { 1216ee877a35SJohn Dyson if (auio.uio_resid != cnt) { 1217ee877a35SJohn Dyson if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 1218ee877a35SJohn Dyson error = 0; 1219ee877a35SJohn Dyson if (error == EPIPE) 1220ee877a35SJohn Dyson psignal(p, SIGPIPE); 1221ee877a35SJohn Dyson } 1222ee877a35SJohn Dyson } 1223ee877a35SJohn Dyson cnt -= auio.uio_resid; 1224cb226aaaSPoul-Henning Kamp p->p_retval[0] = cnt; 1225ee877a35SJohn Dyson return error; 1226ee877a35SJohn Dyson } 1227ee877a35SJohn Dyson 1228ee877a35SJohn Dyson int 1229cb226aaaSPoul-Henning Kamp lio_listio(struct proc *p, struct lio_listio_args *uap) { 12304a11ca4eSPoul-Henning Kamp int nent, nentqueued; 12312244ea07SJohn Dyson struct aiocb *iocb, * const *cbptr; 12322244ea07SJohn Dyson struct aiocblist *cb; 12332244ea07SJohn Dyson struct kaioinfo *ki; 12342244ea07SJohn Dyson int error, runningcode; 1235ee877a35SJohn Dyson int i; 1236ee877a35SJohn Dyson 1237a624e84fSJohn Dyson if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) { 1238a624e84fSJohn Dyson #if DEBUGAIO > 0 1239a624e84fSJohn Dyson if (debugaio > 0) 1240a624e84fSJohn Dyson printf("lio_listio: bad mode: %d\n", uap->mode); 1241a624e84fSJohn Dyson #endif 1242ee877a35SJohn Dyson return EINVAL; 1243a624e84fSJohn Dyson } 12442244ea07SJohn Dyson 12452244ea07SJohn Dyson nent = uap->nent; 1246a624e84fSJohn Dyson if (nent > AIO_LISTIO_MAX) { 1247a624e84fSJohn Dyson #if DEBUGAIO > 0 1248a624e84fSJohn Dyson if (debugaio > 0) 1249a624e84fSJohn Dyson printf("lio_listio: nent > AIO_LISTIO_MAX: %d > %d\n", nent, AIO_LISTIO_MAX); 1250a624e84fSJohn Dyson #endif 12512244ea07SJohn Dyson return EINVAL; 1252a624e84fSJohn Dyson } 12532244ea07SJohn Dyson 12542244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 12552244ea07SJohn Dyson aio_init_aioinfo(p); 12562244ea07SJohn Dyson } 12572244ea07SJohn Dyson 1258a624e84fSJohn Dyson if ((nent + num_queue_count) > max_queue_count) { 1259a624e84fSJohn Dyson #if DEBUGAIO > 0 1260a624e84fSJohn Dyson if (debugaio > 0) 1261a624e84fSJohn Dyson printf("lio_listio: (nent(%d) + num_queue_count(%d)) > max_queue_count(%d)\n", nent, num_queue_count, max_queue_count); 1262a624e84fSJohn Dyson #endif 12632244ea07SJohn Dyson return EAGAIN; 1264a624e84fSJohn Dyson } 12652244ea07SJohn Dyson 12662244ea07SJohn Dyson ki = p->p_aioinfo; 1267a624e84fSJohn Dyson if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) { 1268a624e84fSJohn Dyson #if DEBUGAIO > 0 1269a624e84fSJohn Dyson if (debugaio > 0) 1270a624e84fSJohn Dyson printf("lio_listio: (nent(%d) + ki->kaio_queue_count(%d)) > ki->kaio_qallowed_count(%d)\n", nent, ki->kaio_queue_count, ki->kaio_qallowed_count); 1271a624e84fSJohn Dyson #endif 12722244ea07SJohn Dyson return EAGAIN; 1273a624e84fSJohn Dyson } 12742244ea07SJohn Dyson 12752244ea07SJohn Dyson /* 12762244ea07SJohn Dyson num_queue_count += nent; 12772244ea07SJohn Dyson ki->kaio_queue_count += nent; 1278a624e84fSJohn Dyson */ 12792244ea07SJohn Dyson nentqueued = 0; 12802244ea07SJohn Dyson 12812244ea07SJohn Dyson /* 12822244ea07SJohn Dyson * get pointers to the list of I/O requests 12832244ea07SJohn Dyson iocbvec = malloc(uap->nent * sizeof(struct aiocb *), M_TEMP, M_WAITOK); 12842244ea07SJohn Dyson */ 12852244ea07SJohn Dyson 12862244ea07SJohn Dyson cbptr = uap->acb_list; 12872244ea07SJohn Dyson for(i = 0; i < uap->nent; i++) { 12882244ea07SJohn Dyson iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1289a624e84fSJohn Dyson error = _aio_aqueue(p, iocb, 0); 12902244ea07SJohn Dyson if (error == 0) 12912244ea07SJohn Dyson nentqueued++; 12922244ea07SJohn Dyson } 12932244ea07SJohn Dyson 1294a624e84fSJohn Dyson /* 1295a624e84fSJohn Dyson * If we haven't queued any, then just return error 1296a624e84fSJohn Dyson */ 1297a624e84fSJohn Dyson if (nentqueued == 0) { 1298a624e84fSJohn Dyson #if DEBUGAIO > 0 1299a624e84fSJohn Dyson if (debugaio > 0) 1300a624e84fSJohn Dyson printf("lio_listio: none queued\n"); 1301a624e84fSJohn Dyson #endif 13022244ea07SJohn Dyson return EIO; 1303a624e84fSJohn Dyson } 13042244ea07SJohn Dyson 1305a624e84fSJohn Dyson #if DEBUGAIO > 0 1306a624e84fSJohn Dyson if (debugaio > 0) 1307a624e84fSJohn Dyson printf("lio_listio: %d queued\n", nentqueued); 1308a624e84fSJohn Dyson #endif 1309a624e84fSJohn Dyson 1310a624e84fSJohn Dyson /* 1311a624e84fSJohn Dyson * Calculate the appropriate error return 1312a624e84fSJohn Dyson */ 13132244ea07SJohn Dyson runningcode = 0; 13142244ea07SJohn Dyson if (nentqueued != nent) 13152244ea07SJohn Dyson runningcode = EIO; 13162244ea07SJohn Dyson 13172244ea07SJohn Dyson if (uap->mode == LIO_WAIT) { 13182244ea07SJohn Dyson while (1) { 13192244ea07SJohn Dyson for(i = 0; i < uap->nent; i++) { 13202244ea07SJohn Dyson int found; 13212244ea07SJohn Dyson int jobref, command, status; 13222244ea07SJohn Dyson 1323a624e84fSJohn Dyson /* 1324a624e84fSJohn Dyson * Fetch address of the control buf pointer in user space 1325a624e84fSJohn Dyson */ 13262244ea07SJohn Dyson iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1327a624e84fSJohn Dyson 1328a624e84fSJohn Dyson /* 1329a624e84fSJohn Dyson * Fetch the associated command from user space 1330a624e84fSJohn Dyson */ 13312244ea07SJohn Dyson command = fuword(&iocb->aio_lio_opcode); 13322244ea07SJohn Dyson if (command == LIO_NOP) 13332244ea07SJohn Dyson continue; 13342244ea07SJohn Dyson 1335a624e84fSJohn Dyson /* 1336a624e84fSJohn Dyson * If the status shows error or complete, then skip this entry. 1337a624e84fSJohn Dyson */ 13382244ea07SJohn Dyson status = fuword(&iocb->_aiocb_private.status); 1339a624e84fSJohn Dyson if (status != 0) 13402244ea07SJohn Dyson continue; 1341a624e84fSJohn Dyson 13422244ea07SJohn Dyson jobref = fuword(&iocb->_aiocb_private.kernelinfo); 13432244ea07SJohn Dyson 13442244ea07SJohn Dyson found = 0; 13452244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 13462244ea07SJohn Dyson cb; 13472244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 13482244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 13492244ea07SJohn Dyson found++; 13502244ea07SJohn Dyson break; 13512244ea07SJohn Dyson } 13522244ea07SJohn Dyson } 13532244ea07SJohn Dyson if (found == 0) 13542244ea07SJohn Dyson break; 13552244ea07SJohn Dyson } 13562244ea07SJohn Dyson 1357a624e84fSJohn Dyson /* 1358a624e84fSJohn Dyson * If all I/Os have been disposed of, then we can return 1359a624e84fSJohn Dyson */ 13602244ea07SJohn Dyson if (i == uap->nent) { 13612244ea07SJohn Dyson return runningcode; 13622244ea07SJohn Dyson } 13632244ea07SJohn Dyson 13642244ea07SJohn Dyson aio_marksuspend(p, 0, 0, 1); 13652244ea07SJohn Dyson error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0); 13662244ea07SJohn Dyson aio_marksuspend(p, 0, 0, 0); 13672244ea07SJohn Dyson 13682244ea07SJohn Dyson if (error == EINTR) { 13692244ea07SJohn Dyson return EINTR; 13702244ea07SJohn Dyson } else if (error == EWOULDBLOCK) { 13712244ea07SJohn Dyson return EAGAIN; 13722244ea07SJohn Dyson } 13732244ea07SJohn Dyson 13742244ea07SJohn Dyson } 13752244ea07SJohn Dyson } 13762244ea07SJohn Dyson 13772244ea07SJohn Dyson return runningcode; 1378ee877a35SJohn Dyson } 1379