1ee877a35SJohn Dyson /* 2ee877a35SJohn Dyson * Copyright (c) 1997 John S. Dyson. All rights reserved. 3ee877a35SJohn Dyson * 4ee877a35SJohn Dyson * Redistribution and use in source and binary forms, with or without 5ee877a35SJohn Dyson * modification, are permitted provided that the following conditions 6ee877a35SJohn Dyson * are met: 7ee877a35SJohn Dyson * 1. Redistributions of source code must retain the above copyright 8ee877a35SJohn Dyson * notice, this list of conditions and the following disclaimer. 9ee877a35SJohn Dyson * 2. John S. Dyson's name may not be used to endorse or promote products 10ee877a35SJohn Dyson * derived from this software without specific prior written permission. 11ee877a35SJohn Dyson * 12ee877a35SJohn Dyson * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13ee877a35SJohn Dyson * bad that happens because of using this software isn't the responsibility 14ee877a35SJohn Dyson * of the author. This software is distributed AS-IS. 15ee877a35SJohn Dyson * 16fd3bf775SJohn Dyson * $Id: vfs_aio.c,v 1.11 1997/11/18 10:02:40 bde Exp $ 17ee877a35SJohn Dyson */ 18ee877a35SJohn Dyson 19ee877a35SJohn Dyson /* 20ee877a35SJohn Dyson * This file contains support for the POSIX.4 AIO facility. 21ee877a35SJohn Dyson * 22ee877a35SJohn Dyson * The initial version provides only the (bogus) synchronous semantics 23ee877a35SJohn Dyson * but will support async in the future. Note that a bit 24ee877a35SJohn Dyson * in a private field allows the user mode subroutine to adapt 25ee877a35SJohn Dyson * the kernel operations to true POSIX.4 for future compatibility. 26ee877a35SJohn Dyson * 27ee877a35SJohn Dyson * This code is used to support true POSIX.4 AIO/LIO with the help 28ee877a35SJohn Dyson * of a user mode subroutine package. Note that eventually more support 29ee877a35SJohn Dyson * will be pushed into the kernel. 30ee877a35SJohn Dyson */ 31ee877a35SJohn Dyson 32ee877a35SJohn Dyson #include <sys/param.h> 33ee877a35SJohn Dyson #include <sys/systm.h> 34ee877a35SJohn Dyson #include <sys/sysproto.h> 35ee877a35SJohn Dyson #include <sys/filedesc.h> 36ee877a35SJohn Dyson #include <sys/kernel.h> 37ee877a35SJohn Dyson #include <sys/fcntl.h> 38ee877a35SJohn Dyson #include <sys/file.h> 39fdebd4f0SBruce Evans #include <sys/lock.h> 40ee877a35SJohn Dyson #include <sys/unistd.h> 41ee877a35SJohn Dyson #include <sys/proc.h> 42ee877a35SJohn Dyson #include <sys/uio.h> 43ee877a35SJohn Dyson #include <sys/malloc.h> 44ee877a35SJohn Dyson #include <sys/signalvar.h> 45a624e84fSJohn Dyson #include <sys/sysctl.h> 46fd3bf775SJohn Dyson #include <sys/vnode.h> 47fd3bf775SJohn Dyson #include <sys/conf.h> 48fd3bf775SJohn Dyson #include <miscfs/specfs/specdev.h> 49ee877a35SJohn Dyson 50ee877a35SJohn Dyson #include <vm/vm.h> 51ee877a35SJohn Dyson #include <vm/vm_param.h> 52ee877a35SJohn Dyson #include <vm/vm_extern.h> 532244ea07SJohn Dyson #include <vm/pmap.h> 542244ea07SJohn Dyson #include <vm/vm_map.h> 55fd3bf775SJohn Dyson #include <vm/vm_zone.h> 56ee877a35SJohn Dyson #include <sys/aio.h> 575aaef07cSJohn Dyson #include <sys/shm.h> 58fd3bf775SJohn Dyson #include <sys/user.h> 595aaef07cSJohn Dyson 605aaef07cSJohn Dyson #include <machine/cpu.h> 61ee877a35SJohn Dyson 622244ea07SJohn Dyson #define AIOCBLIST_CANCELLED 0x1 632244ea07SJohn Dyson #define AIOCBLIST_RUNDOWN 0x4 642244ea07SJohn Dyson #define AIOCBLIST_ASYNCFREE 0x8 652244ea07SJohn Dyson 662244ea07SJohn Dyson #if 0 672244ea07SJohn Dyson #define DEBUGAIO 682244ea07SJohn Dyson #define DIAGNOSTIC 692244ea07SJohn Dyson #endif 702244ea07SJohn Dyson 71a624e84fSJohn Dyson #define DEBUGAIO 1 72a624e84fSJohn Dyson 732244ea07SJohn Dyson static int jobrefid; 742244ea07SJohn Dyson 752244ea07SJohn Dyson #define JOBST_NULL 0x0 762244ea07SJohn Dyson #define JOBST_JOBQPROC 0x1 772244ea07SJohn Dyson #define JOBST_JOBQGLOBAL 0x2 782244ea07SJohn Dyson #define JOBST_JOBRUNNING 0x3 792244ea07SJohn Dyson #define JOBST_JOBFINISHED 0x4 80fd3bf775SJohn Dyson #define JOBST_JOBQBUF 0x5 81fd3bf775SJohn Dyson #define JOBST_JOBBFINISHED 0x6 822244ea07SJohn Dyson 832244ea07SJohn Dyson #define MAX_AIO_PER_PROC 32 842244ea07SJohn Dyson #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ 85fd3bf775SJohn Dyson #define MAX_AIO_PROCS 32 862244ea07SJohn Dyson #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ 87fd3bf775SJohn Dyson #define TARGET_AIO_PROCS 16 88fd3bf775SJohn Dyson #define MAX_AIO_BALLOW_PER_PROC 16 892244ea07SJohn Dyson 90a624e84fSJohn Dyson int max_aio_procs = MAX_AIO_PROCS; 91a624e84fSJohn Dyson int num_aio_procs = 0; 92a624e84fSJohn Dyson int target_aio_procs = TARGET_AIO_PROCS; 93a624e84fSJohn Dyson int max_queue_count = MAX_AIO_QUEUE; 94a624e84fSJohn Dyson int num_queue_count = 0; 95fd3bf775SJohn Dyson int num_buf_aio = 0; 96fd3bf775SJohn Dyson int num_aio_resv_start = 0; 97a624e84fSJohn Dyson 98a624e84fSJohn Dyson int max_aio_per_proc = MAX_AIO_PER_PROC, 99a624e84fSJohn Dyson max_aio_queue_per_proc=MAX_AIO_QUEUE_PER_PROC; 100a624e84fSJohn Dyson 101fd3bf775SJohn Dyson int max_aio_ballow_per_proc = MAX_AIO_BALLOW_PER_PROC; 102a624e84fSJohn Dyson 103a624e84fSJohn Dyson SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt"); 104a624e84fSJohn Dyson 105a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, 106a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_per_proc, 0, ""); 107a624e84fSJohn Dyson 108a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, 109a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_queue_per_proc, 0, ""); 110a624e84fSJohn Dyson 111a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, 112a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_procs, 0, ""); 113a624e84fSJohn Dyson 114a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, 115a624e84fSJohn Dyson CTLFLAG_RD, &num_aio_procs, 0, ""); 116a624e84fSJohn Dyson 117a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, 118a624e84fSJohn Dyson CTLFLAG_RD, &num_queue_count, 0, ""); 119a624e84fSJohn Dyson 120a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, 121a624e84fSJohn Dyson CTLFLAG_RW, &max_queue_count, 0, ""); 122a624e84fSJohn Dyson 123a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, 124a624e84fSJohn Dyson CTLFLAG_RW, &target_aio_procs, 0, ""); 125a624e84fSJohn Dyson 126fd3bf775SJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_ballow_per_proc, 127fd3bf775SJohn Dyson CTLFLAG_RW, &max_aio_ballow_per_proc, 0, ""); 128fd3bf775SJohn Dyson 129fd3bf775SJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, 130fd3bf775SJohn Dyson CTLFLAG_RD, &num_buf_aio, 0, ""); 131fd3bf775SJohn Dyson 132a624e84fSJohn Dyson #if DEBUGAIO > 0 133a624e84fSJohn Dyson static int debugaio; 134a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, debugaio, CTLFLAG_RW, &debugaio, 0, ""); 135a624e84fSJohn Dyson #endif 136a624e84fSJohn Dyson 137fd3bf775SJohn Dyson #define DEBUGFLOW (debugaio & 0xff) 138fd3bf775SJohn Dyson #define DEBUGREQ ((debugaio & 0xff00) >> 8) 139fd3bf775SJohn Dyson 1402244ea07SJohn Dyson /* 1412244ea07SJohn Dyson * Job queue item 1422244ea07SJohn Dyson */ 1432244ea07SJohn Dyson struct aiocblist { 1442244ea07SJohn Dyson TAILQ_ENTRY (aiocblist) list; /* List of jobs */ 1452244ea07SJohn Dyson TAILQ_ENTRY (aiocblist) plist; /* List of jobs for proc */ 1462244ea07SJohn Dyson int jobflags; 1472244ea07SJohn Dyson int jobstate; 148fd3bf775SJohn Dyson int inputcharge, outputcharge; 149fd3bf775SJohn Dyson struct buf *bp; /* buffer pointer */ 1502244ea07SJohn Dyson struct proc *userproc; /* User process */ 1512244ea07SJohn Dyson struct aioproclist *jobaioproc; /* AIO process descriptor */ 1522244ea07SJohn Dyson struct aiocb uaiocb; /* Kernel I/O control block */ 1532244ea07SJohn Dyson }; 1542244ea07SJohn Dyson 1552244ea07SJohn Dyson #define AIOP_FREE 0x1 /* proc on free queue */ 156fd3bf775SJohn Dyson #define AIOP_SCHED 0x2 /* proc explicitly scheduled */ 157fd3bf775SJohn Dyson 1582244ea07SJohn Dyson /* 1592244ea07SJohn Dyson * AIO process info 1602244ea07SJohn Dyson */ 1612244ea07SJohn Dyson struct aioproclist { 1622244ea07SJohn Dyson int aioprocflags; /* AIO proc flags */ 1632244ea07SJohn Dyson TAILQ_ENTRY(aioproclist) list; /* List of processes */ 1642244ea07SJohn Dyson struct proc *aioproc; /* The AIO thread */ 1652244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) jobtorun; /* suggested job to run */ 1662244ea07SJohn Dyson }; 1672244ea07SJohn Dyson 1682244ea07SJohn Dyson struct kaioinfo { 169fd3bf775SJohn Dyson int kaio_flags; /* per process kaio flags */ 1702244ea07SJohn Dyson int kaio_maxactive_count; /* maximum number of AIOs */ 1712244ea07SJohn Dyson int kaio_active_count; /* number of currently used AIOs */ 1722244ea07SJohn Dyson int kaio_qallowed_count; /* maxiumu size of AIO queue */ 1732244ea07SJohn Dyson int kaio_queue_count; /* size of AIO queue */ 174fd3bf775SJohn Dyson int kaio_ballowed_count; /* maximum number of buffers */ 175fd3bf775SJohn Dyson int kaio_buffer_count; /* number of physio buffers */ 1762244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_jobqueue; /* job queue for process */ 1772244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_jobdone; /* done queue for process */ 178fd3bf775SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_bufqueue; /* buffer job queue for process */ 179fd3bf775SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_bufdone; /* buffer done queue for process */ 1802244ea07SJohn Dyson }; 1812244ea07SJohn Dyson 182fd3bf775SJohn Dyson #define KAIO_RUNDOWN 0x1 183fd3bf775SJohn Dyson #define KAIO_WAKEUP 0x2 184fd3bf775SJohn Dyson 1852244ea07SJohn Dyson TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc; 1862244ea07SJohn Dyson TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list */ 187fd3bf775SJohn Dyson TAILQ_HEAD(,aiocblist) aio_bufjobs; /* Phys I/O job list */ 1882244ea07SJohn Dyson TAILQ_HEAD(,aiocblist) aio_freejobs; 1892244ea07SJohn Dyson 190fd3bf775SJohn Dyson static void aio_init_aioinfo(struct proc *p) ; 191fd3bf775SJohn Dyson static void aio_onceonly(void *) ; 192fd3bf775SJohn Dyson static int aio_free_entry(struct aiocblist *aiocbe); 193fd3bf775SJohn Dyson static void aio_process(struct aiocblist *aiocbe); 1942244ea07SJohn Dyson static int aio_newproc(void) ; 1952244ea07SJohn Dyson static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ; 196fd3bf775SJohn Dyson static void aio_physwakeup(struct buf *bp); 197fd3bf775SJohn Dyson static int aio_fphysio(struct proc *p, struct aiocblist *aiocbe, int type); 198fd3bf775SJohn Dyson static int aio_qphysio(struct proc *p, struct aiocblist *iocb); 199fd3bf775SJohn Dyson static void aio_daemon(void *uproc); 2002244ea07SJohn Dyson 2012244ea07SJohn Dyson SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL); 2022244ea07SJohn Dyson 203fd3bf775SJohn Dyson static vm_zone_t kaio_zone=0, aiop_zone=0, aiocb_zone=0, aiol_zone=0; 204fd3bf775SJohn Dyson 205fd3bf775SJohn Dyson /* 206fd3bf775SJohn Dyson * Single AIOD vmspace shared amongst all of them 207fd3bf775SJohn Dyson */ 208fd3bf775SJohn Dyson static struct vmspace *aiovmspace = NULL; 209a624e84fSJohn Dyson 2102244ea07SJohn Dyson /* 2112244ea07SJohn Dyson * Startup initialization 2122244ea07SJohn Dyson */ 2132244ea07SJohn Dyson void 214fd3bf775SJohn Dyson aio_onceonly(void *na) 215fd3bf775SJohn Dyson { 2162244ea07SJohn Dyson TAILQ_INIT(&aio_freeproc); 2172244ea07SJohn Dyson TAILQ_INIT(&aio_activeproc); 2182244ea07SJohn Dyson TAILQ_INIT(&aio_jobs); 219fd3bf775SJohn Dyson TAILQ_INIT(&aio_bufjobs); 2202244ea07SJohn Dyson TAILQ_INIT(&aio_freejobs); 221fd3bf775SJohn Dyson kaio_zone = zinit("AIO", sizeof (struct kaioinfo), 0, 0, 1); 222fd3bf775SJohn Dyson aiop_zone = zinit("AIOP", sizeof (struct aioproclist), 0, 0, 1); 223fd3bf775SJohn Dyson aiocb_zone = zinit("AIOCB", sizeof (struct aiocblist), 0, 0, 1); 224fd3bf775SJohn Dyson aiol_zone = zinit("AIOL", AIO_LISTIO_MAX * sizeof (int), 0, 0, 1); 225fd3bf775SJohn Dyson jobrefid = 1; 2262244ea07SJohn Dyson } 2272244ea07SJohn Dyson 2282244ea07SJohn Dyson /* 2292244ea07SJohn Dyson * Init the per-process aioinfo structure. 2302244ea07SJohn Dyson */ 2312244ea07SJohn Dyson void 232fd3bf775SJohn Dyson aio_init_aioinfo(struct proc *p) 233fd3bf775SJohn Dyson { 2342244ea07SJohn Dyson struct kaioinfo *ki; 2352244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 236fd3bf775SJohn Dyson ki = zalloc(kaio_zone); 2372244ea07SJohn Dyson p->p_aioinfo = ki; 238a624e84fSJohn Dyson ki->kaio_maxactive_count = max_aio_per_proc; 2392244ea07SJohn Dyson ki->kaio_active_count = 0; 240a624e84fSJohn Dyson ki->kaio_qallowed_count = max_aio_queue_per_proc; 2412244ea07SJohn Dyson ki->kaio_queue_count = 0; 242fd3bf775SJohn Dyson ki->kaio_ballowed_count = max_aio_ballow_per_proc; 243fd3bf775SJohn Dyson ki->kaio_buffer_count = 0; 2442244ea07SJohn Dyson TAILQ_INIT(&ki->kaio_jobdone); 2452244ea07SJohn Dyson TAILQ_INIT(&ki->kaio_jobqueue); 246fd3bf775SJohn Dyson TAILQ_INIT(&ki->kaio_bufdone); 247fd3bf775SJohn Dyson TAILQ_INIT(&ki->kaio_bufqueue); 2482244ea07SJohn Dyson } 2492244ea07SJohn Dyson } 2502244ea07SJohn Dyson 2512244ea07SJohn Dyson /* 2522244ea07SJohn Dyson * Free a job entry. Wait for completion if it is currently 2532244ea07SJohn Dyson * active, but don't delay forever. If we delay, we return 2542244ea07SJohn Dyson * a flag that says that we have to restart the queue scan. 2552244ea07SJohn Dyson */ 2562244ea07SJohn Dyson int 257fd3bf775SJohn Dyson aio_free_entry(struct aiocblist *aiocbe) 258fd3bf775SJohn Dyson { 2592244ea07SJohn Dyson struct kaioinfo *ki; 2602244ea07SJohn Dyson struct aioproclist *aiop; 2612244ea07SJohn Dyson struct proc *p; 262fd3bf775SJohn Dyson int error; 2632244ea07SJohn Dyson 2642244ea07SJohn Dyson if (aiocbe->jobstate == JOBST_NULL) 2652244ea07SJohn Dyson panic("aio_free_entry: freeing already free job"); 2662244ea07SJohn Dyson 2672244ea07SJohn Dyson p = aiocbe->userproc; 2682244ea07SJohn Dyson ki = p->p_aioinfo; 2692244ea07SJohn Dyson if (ki == NULL) 2702244ea07SJohn Dyson panic("aio_free_entry: missing p->p_aioinfo"); 2712244ea07SJohn Dyson 2722244ea07SJohn Dyson if (aiocbe->jobstate == JOBST_JOBRUNNING) { 2732244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) 2742244ea07SJohn Dyson return 0; 2752244ea07SJohn Dyson aiocbe->jobflags |= AIOCBLIST_RUNDOWN; 276a624e84fSJohn Dyson tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", 0); 2772244ea07SJohn Dyson } 2782244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 2792244ea07SJohn Dyson 280fd3bf775SJohn Dyson if (aiocbe->bp == NULL) { 2812244ea07SJohn Dyson if (ki->kaio_queue_count <= 0) 2822244ea07SJohn Dyson panic("aio_free_entry: process queue size <= 0"); 2832244ea07SJohn Dyson if (num_queue_count <= 0) 2842244ea07SJohn Dyson panic("aio_free_entry: system wide queue size <= 0"); 2852244ea07SJohn Dyson 2862244ea07SJohn Dyson --ki->kaio_queue_count; 2872244ea07SJohn Dyson --num_queue_count; 288fd3bf775SJohn Dyson 289a624e84fSJohn Dyson #if DEBUGAIO > 0 290fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 291fd3bf775SJohn Dyson printf("freeing normal file I/O entry: Proc Q: %d, Global Q: %d\n", 292a624e84fSJohn Dyson ki->kaio_queue_count, num_queue_count); 293a624e84fSJohn Dyson #endif 294fd3bf775SJohn Dyson } else { 295fd3bf775SJohn Dyson --ki->kaio_buffer_count; 296fd3bf775SJohn Dyson --num_buf_aio; 2972244ea07SJohn Dyson 298fd3bf775SJohn Dyson #if DEBUGAIO > 0 299fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 300fd3bf775SJohn Dyson printf("freeing physical I/O entry: Proc BQ: %d, Global BQ: %d\n", 301fd3bf775SJohn Dyson ki->kaio_buffer_count, num_buf_aio); 302fd3bf775SJohn Dyson #endif 303fd3bf775SJohn Dyson } 304fd3bf775SJohn Dyson 305fd3bf775SJohn Dyson if ((ki->kaio_flags & KAIO_WAKEUP) || 306fd3bf775SJohn Dyson (ki->kaio_flags & KAIO_RUNDOWN) && 307fd3bf775SJohn Dyson ((ki->kaio_buffer_count == 0) && (ki->kaio_queue_count == 0))) { 308fd3bf775SJohn Dyson ki->kaio_flags &= ~KAIO_WAKEUP; 309fd3bf775SJohn Dyson wakeup(p); 310fd3bf775SJohn Dyson } 311fd3bf775SJohn Dyson 312fd3bf775SJohn Dyson if ( aiocbe->jobstate == JOBST_JOBQBUF) { 313fd3bf775SJohn Dyson if ((error = aio_fphysio(p, aiocbe, 1)) != 0) 314fd3bf775SJohn Dyson return error; 315fd3bf775SJohn Dyson if (aiocbe->jobstate != JOBST_JOBBFINISHED) 316fd3bf775SJohn Dyson panic("aio_free_entry: invalid physio finish-up state"); 317fd3bf775SJohn Dyson TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 318fd3bf775SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBQPROC) { 3192244ea07SJohn Dyson aiop = aiocbe->jobaioproc; 3202244ea07SJohn Dyson TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 3212244ea07SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) { 3222244ea07SJohn Dyson TAILQ_REMOVE(&aio_jobs, aiocbe, list); 3232244ea07SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBFINISHED) { 3242244ea07SJohn Dyson TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist); 325fd3bf775SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBBFINISHED) { 326fd3bf775SJohn Dyson TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 3272244ea07SJohn Dyson } 3282244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 3292244ea07SJohn Dyson aiocbe->jobstate = JOBST_NULL; 3302244ea07SJohn Dyson return 0; 3312244ea07SJohn Dyson } 3322244ea07SJohn Dyson 3332244ea07SJohn Dyson /* 3342244ea07SJohn Dyson * Rundown the jobs for a given process. 3352244ea07SJohn Dyson */ 3362244ea07SJohn Dyson void 337fd3bf775SJohn Dyson aio_proc_rundown(struct proc *p) 338fd3bf775SJohn Dyson { 3392244ea07SJohn Dyson struct kaioinfo *ki; 3402244ea07SJohn Dyson struct aiocblist *aiocbe, *aiocbn; 3412244ea07SJohn Dyson 3422244ea07SJohn Dyson ki = p->p_aioinfo; 3432244ea07SJohn Dyson if (ki == NULL) 3442244ea07SJohn Dyson return; 3452244ea07SJohn Dyson 346fd3bf775SJohn Dyson while ((ki->kaio_active_count > 0) || (ki->kaio_buffer_count > 0)) { 347fd3bf775SJohn Dyson ki->kaio_flags |= KAIO_RUNDOWN; 348fd3bf775SJohn Dyson if (tsleep(p, PRIBIO, "kaiowt", 20 * hz)) 349a624e84fSJohn Dyson break; 350a624e84fSJohn Dyson } 351a624e84fSJohn Dyson 352a624e84fSJohn Dyson #if DEBUGAIO > 0 353fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 354a624e84fSJohn Dyson printf("Proc rundown: %d %d\n", 355a624e84fSJohn Dyson num_queue_count, ki->kaio_queue_count); 356a624e84fSJohn Dyson #endif 357a624e84fSJohn Dyson 3582244ea07SJohn Dyson restart1: 3592244ea07SJohn Dyson for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone); 3602244ea07SJohn Dyson aiocbe; 3612244ea07SJohn Dyson aiocbe = aiocbn) { 3622244ea07SJohn Dyson aiocbn = TAILQ_NEXT(aiocbe, plist); 3632244ea07SJohn Dyson if (aio_free_entry(aiocbe)) 3642244ea07SJohn Dyson goto restart1; 3652244ea07SJohn Dyson } 3662244ea07SJohn Dyson 3672244ea07SJohn Dyson restart2: 3682244ea07SJohn Dyson for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 3692244ea07SJohn Dyson aiocbe; 3702244ea07SJohn Dyson aiocbe = aiocbn) { 3712244ea07SJohn Dyson aiocbn = TAILQ_NEXT(aiocbe, plist); 3722244ea07SJohn Dyson if (aio_free_entry(aiocbe)) 3732244ea07SJohn Dyson goto restart2; 3742244ea07SJohn Dyson } 375fd3bf775SJohn Dyson zfree(kaio_zone, ki); 376a624e84fSJohn Dyson p->p_aioinfo = NULL; 3772244ea07SJohn Dyson } 3782244ea07SJohn Dyson 3792244ea07SJohn Dyson /* 3802244ea07SJohn Dyson * Select a job to run (called by an AIO daemon) 3812244ea07SJohn Dyson */ 3822244ea07SJohn Dyson static struct aiocblist * 383fd3bf775SJohn Dyson aio_selectjob(struct aioproclist *aiop) 384fd3bf775SJohn Dyson { 3852244ea07SJohn Dyson 3862244ea07SJohn Dyson struct aiocblist *aiocbe; 3872244ea07SJohn Dyson 3882244ea07SJohn Dyson aiocbe = TAILQ_FIRST(&aiop->jobtorun); 3892244ea07SJohn Dyson if (aiocbe) { 3902244ea07SJohn Dyson TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 3912244ea07SJohn Dyson return aiocbe; 3922244ea07SJohn Dyson } 3932244ea07SJohn Dyson 3942244ea07SJohn Dyson for (aiocbe = TAILQ_FIRST(&aio_jobs); 3952244ea07SJohn Dyson aiocbe; 3962244ea07SJohn Dyson aiocbe = TAILQ_NEXT(aiocbe, list)) { 3972244ea07SJohn Dyson struct kaioinfo *ki; 3982244ea07SJohn Dyson struct proc *userp; 3992244ea07SJohn Dyson 4002244ea07SJohn Dyson userp = aiocbe->userproc; 4012244ea07SJohn Dyson ki = userp->p_aioinfo; 4022244ea07SJohn Dyson 4032244ea07SJohn Dyson if (ki->kaio_active_count < ki->kaio_maxactive_count) { 4042244ea07SJohn Dyson TAILQ_REMOVE(&aio_jobs, aiocbe, list); 4052244ea07SJohn Dyson return aiocbe; 4062244ea07SJohn Dyson } 4072244ea07SJohn Dyson } 4082244ea07SJohn Dyson 4092244ea07SJohn Dyson return NULL; 4102244ea07SJohn Dyson } 4112244ea07SJohn Dyson 4122244ea07SJohn Dyson /* 413fd3bf775SJohn Dyson * The AIO processing activity. This is the code that does the 414fd3bf775SJohn Dyson * I/O request for the non-physio version of the operations. The 415fd3bf775SJohn Dyson * normal vn operations are used, and this code should work in 416fd3bf775SJohn Dyson * all instances for every type of file, including pipes, sockets, 417fd3bf775SJohn Dyson * fifos, and regular files. 4182244ea07SJohn Dyson */ 4192244ea07SJohn Dyson void 420fd3bf775SJohn Dyson aio_process(struct aiocblist *aiocbe) 421fd3bf775SJohn Dyson { 4222244ea07SJohn Dyson struct filedesc *fdp; 423fd3bf775SJohn Dyson struct proc *userp, *mycp; 4242244ea07SJohn Dyson struct aiocb *cb; 4252244ea07SJohn Dyson struct file *fp; 4262244ea07SJohn Dyson struct uio auio; 4272244ea07SJohn Dyson struct iovec aiov; 4282244ea07SJohn Dyson unsigned int fd; 4292244ea07SJohn Dyson int cnt; 430fd3bf775SJohn Dyson static nperline=0; 4312244ea07SJohn Dyson int error; 432a624e84fSJohn Dyson off_t offset; 433fd3bf775SJohn Dyson int oublock_st, oublock_end; 434fd3bf775SJohn Dyson int inblock_st, inblock_end; 4352244ea07SJohn Dyson 4362244ea07SJohn Dyson userp = aiocbe->userproc; 4372244ea07SJohn Dyson cb = &aiocbe->uaiocb; 4382244ea07SJohn Dyson 439fd3bf775SJohn Dyson mycp = curproc; 440fd3bf775SJohn Dyson 441a624e84fSJohn Dyson #if DEBUGAIO > 0 442fd3bf775SJohn Dyson if (DEBUGREQ) 443fd3bf775SJohn Dyson printf("AIOD %s, fd: %d, offset: 0x%x, address: 0x%x, size: %d\n", 444a624e84fSJohn Dyson cb->aio_lio_opcode == LIO_READ?"Read":"Write", 4452244ea07SJohn Dyson cb->aio_fildes, (int) cb->aio_offset, 4462244ea07SJohn Dyson cb->aio_buf, cb->aio_nbytes); 447a624e84fSJohn Dyson #endif 448fd3bf775SJohn Dyson #if 0 449fd3bf775SJohn Dyson if (cb->aio_lio_opcode == LIO_WRITE) { 450fd3bf775SJohn Dyson nperline++; 451fd3bf775SJohn Dyson printf("(0x%8.8x,0x%8.8x)", (unsigned) cb->aio_offset, cb->aio_buf); 452fd3bf775SJohn Dyson if (nperline >= 3) { 453fd3bf775SJohn Dyson nperline = 0; 454fd3bf775SJohn Dyson printf("\n"); 455fd3bf775SJohn Dyson } 456fd3bf775SJohn Dyson } 4572244ea07SJohn Dyson #endif 458fd3bf775SJohn Dyson #if SLOW 459fd3bf775SJohn Dyson tsleep(mycp, PVM, "aioprc", hz); 460fd3bf775SJohn Dyson #endif 461fd3bf775SJohn Dyson fdp = mycp->p_fd; 4622244ea07SJohn Dyson fd = cb->aio_fildes; 4632244ea07SJohn Dyson fp = fdp->fd_ofiles[fd]; 4642244ea07SJohn Dyson 4652244ea07SJohn Dyson aiov.iov_base = cb->aio_buf; 4662244ea07SJohn Dyson aiov.iov_len = cb->aio_nbytes; 4672244ea07SJohn Dyson 4682244ea07SJohn Dyson auio.uio_iov = &aiov; 4692244ea07SJohn Dyson auio.uio_iovcnt = 1; 470a624e84fSJohn Dyson auio.uio_offset = offset = cb->aio_offset; 4712244ea07SJohn Dyson auio.uio_resid = cb->aio_nbytes; 4722244ea07SJohn Dyson cnt = cb->aio_nbytes; 4732244ea07SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 474fd3bf775SJohn Dyson auio.uio_procp = mycp; 4752244ea07SJohn Dyson 476fd3bf775SJohn Dyson inblock_st = mycp->p_stats->p_ru.ru_inblock; 477fd3bf775SJohn Dyson oublock_st = mycp->p_stats->p_ru.ru_oublock; 4782244ea07SJohn Dyson if (cb->aio_lio_opcode == LIO_READ) { 4792244ea07SJohn Dyson auio.uio_rw = UIO_READ; 4802244ea07SJohn Dyson error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 4812244ea07SJohn Dyson } else { 4822244ea07SJohn Dyson auio.uio_rw = UIO_WRITE; 4832244ea07SJohn Dyson error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 4842244ea07SJohn Dyson } 485fd3bf775SJohn Dyson inblock_end = mycp->p_stats->p_ru.ru_inblock; 486fd3bf775SJohn Dyson oublock_end = mycp->p_stats->p_ru.ru_oublock; 487fd3bf775SJohn Dyson 488fd3bf775SJohn Dyson aiocbe->inputcharge = inblock_end - inblock_st; 489fd3bf775SJohn Dyson aiocbe->outputcharge = oublock_end - oublock_st; 4902244ea07SJohn Dyson 4912244ea07SJohn Dyson if (error) { 4922244ea07SJohn Dyson if (auio.uio_resid != cnt) { 4932244ea07SJohn Dyson if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 4942244ea07SJohn Dyson error = 0; 4952244ea07SJohn Dyson if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) 4962244ea07SJohn Dyson psignal(userp, SIGPIPE); 4972244ea07SJohn Dyson } 4982244ea07SJohn Dyson } 499a624e84fSJohn Dyson #if DEBUGAIO > 0 500fd3bf775SJohn Dyson if (DEBUGFLOW > 1) 501fd3bf775SJohn Dyson printf("%s complete: error: %d, status: %d," 502fd3bf775SJohn Dyson " nio: %d, resid: %d, offset: %d %s\n", 503a624e84fSJohn Dyson cb->aio_lio_opcode == LIO_READ?"Read":"Write", 504fd3bf775SJohn Dyson error, cnt, cnt - auio.uio_resid, auio.uio_resid, (int) offset & 0xffffffff, 505fd3bf775SJohn Dyson (cnt - auio.uio_resid) > 0 ? "" : "<EOF>"); 506a624e84fSJohn Dyson #endif 5072244ea07SJohn Dyson 5082244ea07SJohn Dyson cnt -= auio.uio_resid; 5092244ea07SJohn Dyson cb->_aiocb_private.error = error; 5102244ea07SJohn Dyson cb->_aiocb_private.status = cnt; 5112244ea07SJohn Dyson 5122244ea07SJohn Dyson return; 5132244ea07SJohn Dyson 5142244ea07SJohn Dyson } 5152244ea07SJohn Dyson 5162244ea07SJohn Dyson /* 5172244ea07SJohn Dyson * The AIO daemon. 5182244ea07SJohn Dyson */ 5192244ea07SJohn Dyson static void 520fd3bf775SJohn Dyson aio_daemon(void *uproc) 5212244ea07SJohn Dyson { 5222244ea07SJohn Dyson struct aioproclist *aiop; 523fd3bf775SJohn Dyson struct vmspace *myvm, *aiovm; 524fd3bf775SJohn Dyson struct proc *mycp; 5252244ea07SJohn Dyson 5262244ea07SJohn Dyson /* 527fd3bf775SJohn Dyson * Local copies of curproc (cp) and vmspace (myvm) 5282244ea07SJohn Dyson */ 529fd3bf775SJohn Dyson mycp = curproc; 530fd3bf775SJohn Dyson myvm = mycp->p_vmspace; 531fd3bf775SJohn Dyson 532fd3bf775SJohn Dyson /* 533fd3bf775SJohn Dyson * We manage to create only one VM space for all AIOD processes. 534fd3bf775SJohn Dyson * The VM space for the first AIOD created becomes the shared VM 535fd3bf775SJohn Dyson * space for all of them. We add an additional reference count, 536fd3bf775SJohn Dyson * even for the first AIOD, so the address space does not go away, 537fd3bf775SJohn Dyson * and we continue to use that original VM space even if the first 538fd3bf775SJohn Dyson * AIOD exits. 539fd3bf775SJohn Dyson */ 540fd3bf775SJohn Dyson if ((aiovm = aiovmspace) == NULL) { 541fd3bf775SJohn Dyson aiovmspace = myvm; 542fd3bf775SJohn Dyson ++myvm->vm_refcnt; 543fd3bf775SJohn Dyson /* 544fd3bf775SJohn Dyson * Remove userland cruft from address space. 545fd3bf775SJohn Dyson */ 546fd3bf775SJohn Dyson if (myvm->vm_shm) 547fd3bf775SJohn Dyson shmexit(mycp); 548fd3bf775SJohn Dyson pmap_remove_pages(&myvm->vm_pmap, 0, USRSTACK); 549fd3bf775SJohn Dyson vm_map_remove(&myvm->vm_map, 0, USRSTACK); 550fd3bf775SJohn Dyson myvm->vm_tsize = 0; 551fd3bf775SJohn Dyson myvm->vm_dsize = 0; 552fd3bf775SJohn Dyson myvm->vm_ssize = 0; 553fd3bf775SJohn Dyson } else { 554fd3bf775SJohn Dyson ++aiovm->vm_refcnt; 555fd3bf775SJohn Dyson mycp->p_vmspace = aiovm; 556fd3bf775SJohn Dyson pmap_activate(mycp); 557fd3bf775SJohn Dyson vmspace_free(myvm); 558fd3bf775SJohn Dyson myvm = aiovm; 559fd3bf775SJohn Dyson } 560fd3bf775SJohn Dyson 561fd3bf775SJohn Dyson if (mycp->p_textvp) { 562fd3bf775SJohn Dyson vrele(mycp->p_textvp); 563fd3bf775SJohn Dyson mycp->p_textvp = NULL; 564fd3bf775SJohn Dyson } 565fd3bf775SJohn Dyson 566fd3bf775SJohn Dyson /* 567fd3bf775SJohn Dyson * Allocate and ready the aio control info. There is one 568fd3bf775SJohn Dyson * aiop structure per daemon. 569fd3bf775SJohn Dyson */ 570fd3bf775SJohn Dyson aiop = zalloc(aiop_zone); 571fd3bf775SJohn Dyson aiop->aioproc = mycp; 5722244ea07SJohn Dyson aiop->aioprocflags |= AIOP_FREE; 5732244ea07SJohn Dyson TAILQ_INIT(&aiop->jobtorun); 5742244ea07SJohn Dyson 5752244ea07SJohn Dyson /* 576fd3bf775SJohn Dyson * Place thread (lightweight process) onto the AIO free thread list 5772244ea07SJohn Dyson */ 578fd3bf775SJohn Dyson if (TAILQ_EMPTY(&aio_freeproc)) 579fd3bf775SJohn Dyson wakeup(&aio_freeproc); 580fd3bf775SJohn Dyson TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 5812244ea07SJohn Dyson 5822244ea07SJohn Dyson /* 5832244ea07SJohn Dyson * Make up a name for the daemon 5842244ea07SJohn Dyson */ 585fd3bf775SJohn Dyson strcpy(mycp->p_comm, "aiod"); 5862244ea07SJohn Dyson 5872244ea07SJohn Dyson /* 588fd3bf775SJohn Dyson * Get rid of our current filedescriptors. AIOD's don't need any 589fd3bf775SJohn Dyson * filedescriptors, except as temporarily inherited from the client. 590fd3bf775SJohn Dyson * Credentials are also cloned, and made equivalent to "root." 5912244ea07SJohn Dyson */ 592fd3bf775SJohn Dyson fdfree(mycp); 593fd3bf775SJohn Dyson mycp->p_fd = NULL; 594fd3bf775SJohn Dyson mycp->p_ucred = crcopy(mycp->p_ucred); 595fd3bf775SJohn Dyson mycp->p_ucred->cr_uid = 0; 596fd3bf775SJohn Dyson mycp->p_ucred->cr_ngroups = 1; 597fd3bf775SJohn Dyson mycp->p_ucred->cr_groups[0] = 1; 598fd3bf775SJohn Dyson 599fd3bf775SJohn Dyson /* 600fd3bf775SJohn Dyson * The daemon resides in it's own pgrp. 601fd3bf775SJohn Dyson */ 602fd3bf775SJohn Dyson enterpgrp(mycp, mycp->p_pid, 1); 603fd3bf775SJohn Dyson 604fd3bf775SJohn Dyson /* 605fd3bf775SJohn Dyson * Mark special process type 606fd3bf775SJohn Dyson */ 607fd3bf775SJohn Dyson mycp->p_flag |= P_SYSTEM|P_KTHREADP; 6082244ea07SJohn Dyson 609a624e84fSJohn Dyson #if DEBUGAIO > 0 610fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 611fd3bf775SJohn Dyson printf("Started new process: %d\n", mycp->p_pid); 6122244ea07SJohn Dyson #endif 613fd3bf775SJohn Dyson 614fd3bf775SJohn Dyson /* 615fd3bf775SJohn Dyson * Wakeup parent process. (Parent sleeps to keep from blasting away 616fd3bf775SJohn Dyson * creating to many daemons.) 617fd3bf775SJohn Dyson */ 618fd3bf775SJohn Dyson wakeup(mycp); 6192244ea07SJohn Dyson 6202244ea07SJohn Dyson while(1) { 621fd3bf775SJohn Dyson struct proc *curcp; 6222244ea07SJohn Dyson struct aiocblist *aiocbe; 6232244ea07SJohn Dyson 624fd3bf775SJohn Dyson /* 625fd3bf775SJohn Dyson * curcp is the current daemon process context. 626fd3bf775SJohn Dyson * userp is the current user process context. 627fd3bf775SJohn Dyson */ 628fd3bf775SJohn Dyson curcp = mycp; 629c4860686SJohn Dyson 630fd3bf775SJohn Dyson /* 631fd3bf775SJohn Dyson * Take daemon off of free queue 632fd3bf775SJohn Dyson */ 6332244ea07SJohn Dyson if (aiop->aioprocflags & AIOP_FREE) { 6342244ea07SJohn Dyson TAILQ_REMOVE(&aio_freeproc, aiop, list); 6352244ea07SJohn Dyson TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 6362244ea07SJohn Dyson aiop->aioprocflags &= ~AIOP_FREE; 6372244ea07SJohn Dyson } 638fd3bf775SJohn Dyson aiop->aioprocflags &= ~AIOP_SCHED; 6392244ea07SJohn Dyson 640fd3bf775SJohn Dyson /* 641fd3bf775SJohn Dyson * Check for jobs 642fd3bf775SJohn Dyson */ 6432244ea07SJohn Dyson while ( aiocbe = aio_selectjob(aiop)) { 644fd3bf775SJohn Dyson struct proc *userp; 6452244ea07SJohn Dyson struct aiocb *cb; 6462244ea07SJohn Dyson struct kaioinfo *ki; 6472244ea07SJohn Dyson 6482244ea07SJohn Dyson cb = &aiocbe->uaiocb; 6492244ea07SJohn Dyson userp = aiocbe->userproc; 6502244ea07SJohn Dyson 6512244ea07SJohn Dyson aiocbe->jobstate = JOBST_JOBRUNNING; 652fd3bf775SJohn Dyson 653fd3bf775SJohn Dyson /* 654fd3bf775SJohn Dyson * Connect to process address space for user program 655fd3bf775SJohn Dyson */ 656fd3bf775SJohn Dyson if (userp != curcp) { 657fd3bf775SJohn Dyson struct vmspace *tmpvm; 658fd3bf775SJohn Dyson /* 659fd3bf775SJohn Dyson * Save the current address space that we are connected to. 660fd3bf775SJohn Dyson */ 661fd3bf775SJohn Dyson tmpvm = mycp->p_vmspace; 662fd3bf775SJohn Dyson /* 663fd3bf775SJohn Dyson * Point to the new user address space, and refer to it. 664fd3bf775SJohn Dyson */ 665fd3bf775SJohn Dyson mycp->p_vmspace = userp->p_vmspace; 666fd3bf775SJohn Dyson ++mycp->p_vmspace->vm_refcnt; 667fd3bf775SJohn Dyson /* 668fd3bf775SJohn Dyson * Activate the new mapping. 669fd3bf775SJohn Dyson */ 670fd3bf775SJohn Dyson pmap_activate(mycp); 671fd3bf775SJohn Dyson /* 672fd3bf775SJohn Dyson * If the old address space wasn't the daemons own address 673fd3bf775SJohn Dyson * space, then we need to remove the daemon's reference from 674fd3bf775SJohn Dyson * the other process that it was acting on behalf of. 675fd3bf775SJohn Dyson */ 6762244ea07SJohn Dyson if (tmpvm != myvm) { 6772244ea07SJohn Dyson vmspace_free(tmpvm); 6782244ea07SJohn Dyson } 679fd3bf775SJohn Dyson /* 680fd3bf775SJohn Dyson * Disassociate from previous clients file descriptors, and 681fd3bf775SJohn Dyson * associate to the new clients descriptors. Note that 682fd3bf775SJohn Dyson * the daemon doesn't need to worry about it's orginal 683fd3bf775SJohn Dyson * descriptors, because they were originally freed. 684fd3bf775SJohn Dyson */ 685fd3bf775SJohn Dyson if (mycp->p_fd) 686fd3bf775SJohn Dyson fdfree(mycp); 687fd3bf775SJohn Dyson mycp->p_fd = fdshare(userp); 688fd3bf775SJohn Dyson curcp = userp; 6892244ea07SJohn Dyson } 6902244ea07SJohn Dyson 691fd3bf775SJohn Dyson ki = userp->p_aioinfo; 6922244ea07SJohn Dyson ki->kaio_active_count++; 693a624e84fSJohn Dyson #if DEBUGAIO > 0 694fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 695a624e84fSJohn Dyson printf("process: pid: %d(%d), active: %d, queue: %d\n", 696a624e84fSJohn Dyson cb->_aiocb_private.kernelinfo, 697a624e84fSJohn Dyson userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count); 698a624e84fSJohn Dyson #endif 6992244ea07SJohn Dyson aiocbe->jobaioproc = aiop; 7002244ea07SJohn Dyson aio_process(aiocbe); 7012244ea07SJohn Dyson --ki->kaio_active_count; 702fd3bf775SJohn Dyson if ((ki->kaio_flags & KAIO_WAKEUP) || 703fd3bf775SJohn Dyson (ki->kaio_flags & KAIO_RUNDOWN) && (ki->kaio_active_count == 0)) { 704fd3bf775SJohn Dyson ki->kaio_flags &= ~KAIO_WAKEUP; 705fd3bf775SJohn Dyson wakeup(userp); 706fd3bf775SJohn Dyson } 707a624e84fSJohn Dyson #if DEBUGAIO > 0 708fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 709a624e84fSJohn Dyson printf("DONE process: pid: %d(%d), active: %d, queue: %d\n", 710a624e84fSJohn Dyson cb->_aiocb_private.kernelinfo, 711a624e84fSJohn Dyson userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count); 712a624e84fSJohn Dyson #endif 7132244ea07SJohn Dyson 7142244ea07SJohn Dyson aiocbe->jobstate = JOBST_JOBFINISHED; 7152244ea07SJohn Dyson 716fd3bf775SJohn Dyson /* 717fd3bf775SJohn Dyson * If the I/O request should be automatically rundown, do the 718fd3bf775SJohn Dyson * needed cleanup. Otherwise, place the queue entry for 719fd3bf775SJohn Dyson * the just finished I/O request into the done queue for the 720fd3bf775SJohn Dyson * associated client. 721fd3bf775SJohn Dyson */ 7222244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) { 7232244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 7242244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 7252244ea07SJohn Dyson } else { 7262244ea07SJohn Dyson TAILQ_REMOVE(&ki->kaio_jobqueue, 7272244ea07SJohn Dyson aiocbe, plist); 7282244ea07SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_jobdone, 7292244ea07SJohn Dyson aiocbe, plist); 7302244ea07SJohn Dyson } 7312244ea07SJohn Dyson 7322244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) { 7332244ea07SJohn Dyson wakeup(aiocbe); 7342244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 7352244ea07SJohn Dyson } 7362244ea07SJohn Dyson 7372244ea07SJohn Dyson if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 7382244ea07SJohn Dyson psignal(userp, cb->aio_sigevent.sigev_signo); 7392244ea07SJohn Dyson } 7402244ea07SJohn Dyson } 7412244ea07SJohn Dyson 742fd3bf775SJohn Dyson #if DEBUGAIO > 0 743fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 744fd3bf775SJohn Dyson printf("AIOD: daemon going idle: %d\n", mycp->p_pid); 745fd3bf775SJohn Dyson #endif 746fd3bf775SJohn Dyson 747fd3bf775SJohn Dyson /* 748fd3bf775SJohn Dyson * Disconnect from user address space 749fd3bf775SJohn Dyson */ 750fd3bf775SJohn Dyson if (curcp != mycp) { 751fd3bf775SJohn Dyson struct vmspace *tmpvm; 752fd3bf775SJohn Dyson /* 753fd3bf775SJohn Dyson * Get the user address space to disconnect from. 754fd3bf775SJohn Dyson */ 755fd3bf775SJohn Dyson tmpvm = mycp->p_vmspace; 756fd3bf775SJohn Dyson /* 757fd3bf775SJohn Dyson * Get original address space for daemon. 758fd3bf775SJohn Dyson */ 759fd3bf775SJohn Dyson mycp->p_vmspace = myvm; 760fd3bf775SJohn Dyson /* 761fd3bf775SJohn Dyson * Activate the daemon's address space. 762fd3bf775SJohn Dyson */ 763fd3bf775SJohn Dyson pmap_activate(mycp); 764fd3bf775SJohn Dyson if (tmpvm == myvm) 765fd3bf775SJohn Dyson printf("AIOD: vmspace problem -- %d\n", mycp->p_pid); 766fd3bf775SJohn Dyson /* 767fd3bf775SJohn Dyson * remove our vmspace reference. 768fd3bf775SJohn Dyson */ 7692244ea07SJohn Dyson vmspace_free(tmpvm); 770fd3bf775SJohn Dyson /* 771fd3bf775SJohn Dyson * disassociate from the user process's file descriptors. 772fd3bf775SJohn Dyson */ 773fd3bf775SJohn Dyson if (mycp->p_fd) 774fd3bf775SJohn Dyson fdfree(mycp); 775fd3bf775SJohn Dyson mycp->p_fd = NULL; 776fd3bf775SJohn Dyson curcp = mycp; 777fd3bf775SJohn Dyson } 778fd3bf775SJohn Dyson 779fd3bf775SJohn Dyson /* 780fd3bf775SJohn Dyson * If we are the first to be put onto the free queue, wakeup 781fd3bf775SJohn Dyson * anyone waiting for a daemon. 782fd3bf775SJohn Dyson */ 783fd3bf775SJohn Dyson TAILQ_REMOVE(&aio_activeproc, aiop, list); 784fd3bf775SJohn Dyson if (TAILQ_EMPTY(&aio_freeproc)) 785fd3bf775SJohn Dyson wakeup(&aio_freeproc); 786fd3bf775SJohn Dyson TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 787fd3bf775SJohn Dyson aiop->aioprocflags |= AIOP_FREE; 788fd3bf775SJohn Dyson 789fd3bf775SJohn Dyson #if DEBUGAIO > 0 790fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 791fd3bf775SJohn Dyson printf("AIOD: daemon sleeping -- %d\n", mycp->p_pid); 792fd3bf775SJohn Dyson #endif 793fd3bf775SJohn Dyson /* 794fd3bf775SJohn Dyson * If daemon is inactive for a long time, allow it to exit, thereby 795fd3bf775SJohn Dyson * freeing resources. 796fd3bf775SJohn Dyson */ 797fd3bf775SJohn Dyson if (((aiop->aioprocflags & AIOP_SCHED) == 0) && 798fd3bf775SJohn Dyson tsleep(mycp, PRIBIO, "aiordy", hz*10)) { 799fd3bf775SJohn Dyson if ((TAILQ_FIRST(&aio_jobs) == NULL) && 800fd3bf775SJohn Dyson (TAILQ_FIRST(&aiop->jobtorun) == NULL)) { 801fd3bf775SJohn Dyson if (aiop->aioprocflags & AIOP_FREE) { 802fd3bf775SJohn Dyson TAILQ_REMOVE(&aio_freeproc, aiop, list); 803fd3bf775SJohn Dyson zfree(aiop_zone, aiop); 804fd3bf775SJohn Dyson --num_aio_procs; 805fd3bf775SJohn Dyson #if DEBUGAIO > 0 806fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 807fd3bf775SJohn Dyson printf("AIOD: Daemon exiting -- %d\n", mycp->p_pid); 808fd3bf775SJohn Dyson #endif 809fd3bf775SJohn Dyson if (mycp->p_vmspace->vm_refcnt <= 1) 810fd3bf775SJohn Dyson printf("AIOD: bad vm refcnt for exiting daemon: %d\n", 811fd3bf775SJohn Dyson mycp->p_vmspace->vm_refcnt); 812fd3bf775SJohn Dyson exit1(mycp, 0); 813fd3bf775SJohn Dyson } 814fd3bf775SJohn Dyson } 8152244ea07SJohn Dyson } 8162244ea07SJohn Dyson } 8172244ea07SJohn Dyson } 8182244ea07SJohn Dyson 8192244ea07SJohn Dyson /* 8202244ea07SJohn Dyson * Create a new AIO daemon. 8212244ea07SJohn Dyson */ 8222244ea07SJohn Dyson static int 823fd3bf775SJohn Dyson aio_newproc() 824fd3bf775SJohn Dyson { 8252244ea07SJohn Dyson int error; 8262244ea07SJohn Dyson struct rfork_args rfa; 827fd3bf775SJohn Dyson struct proc *p, *np; 8282244ea07SJohn Dyson 829fd3bf775SJohn Dyson rfa.flags = RFPROC | RFCFDG; 8302244ea07SJohn Dyson 831cb226aaaSPoul-Henning Kamp p = curproc; 832cb226aaaSPoul-Henning Kamp if (error = rfork(p, &rfa)) 8332244ea07SJohn Dyson return error; 834fd3bf775SJohn Dyson 835fd3bf775SJohn Dyson np = pfind(p->p_retval[0]); 836fd3bf775SJohn Dyson cpu_set_fork_handler(np, aio_daemon, p); 8372244ea07SJohn Dyson 838a624e84fSJohn Dyson #if DEBUGAIO > 0 839fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 8402244ea07SJohn Dyson printf("Waiting for new process: %d, count: %d\n", 8412244ea07SJohn Dyson curproc->p_pid, num_aio_procs); 8422244ea07SJohn Dyson #endif 8432244ea07SJohn Dyson 844fd3bf775SJohn Dyson /* 845fd3bf775SJohn Dyson * Wait until daemon is started, but continue on just in case (to 846fd3bf775SJohn Dyson * handle error conditions. 847fd3bf775SJohn Dyson */ 848fd3bf775SJohn Dyson error = tsleep(np, PZERO, "aiosta", 5*hz); 8492244ea07SJohn Dyson ++num_aio_procs; 8502244ea07SJohn Dyson 8512244ea07SJohn Dyson return error; 8522244ea07SJohn Dyson 8532244ea07SJohn Dyson } 8542244ea07SJohn Dyson 8552244ea07SJohn Dyson /* 856fd3bf775SJohn Dyson * Try the high-performance physio method for eligible VCHR devices 857fd3bf775SJohn Dyson */ 858fd3bf775SJohn Dyson int 859fd3bf775SJohn Dyson aio_qphysio(p, iocb) 860fd3bf775SJohn Dyson struct proc *p; 861fd3bf775SJohn Dyson struct aiocblist *iocb; 862fd3bf775SJohn Dyson { 863fd3bf775SJohn Dyson int error; 864fd3bf775SJohn Dyson caddr_t sa; 865fd3bf775SJohn Dyson struct aiocb *cb; 866fd3bf775SJohn Dyson struct file *fp; 867fd3bf775SJohn Dyson struct buf *bp; 868fd3bf775SJohn Dyson int bflags; 869fd3bf775SJohn Dyson struct aiocblist *aiocbe; 870fd3bf775SJohn Dyson struct vnode *vp; 871fd3bf775SJohn Dyson struct kaioinfo *ki; 872fd3bf775SJohn Dyson struct filedesc *fdp; 873fd3bf775SJohn Dyson int fd; 874fd3bf775SJohn Dyson int majordev; 875fd3bf775SJohn Dyson int s; 876fd3bf775SJohn Dyson int cnt; 877fd3bf775SJohn Dyson dev_t dev; 878fd3bf775SJohn Dyson int rw; 879fd3bf775SJohn Dyson d_strategy_t *fstrategy; 880fd3bf775SJohn Dyson 881fd3bf775SJohn Dyson cb = &iocb->uaiocb; 882fd3bf775SJohn Dyson if (cb->aio_nbytes > MAXPHYS) 883fd3bf775SJohn Dyson return -1; 884fd3bf775SJohn Dyson 885fd3bf775SJohn Dyson fdp = p->p_fd; 886fd3bf775SJohn Dyson fd = cb->aio_fildes; 887fd3bf775SJohn Dyson fp = fdp->fd_ofiles[fd]; 888fd3bf775SJohn Dyson 889fd3bf775SJohn Dyson if (fp->f_type != DTYPE_VNODE) 890fd3bf775SJohn Dyson return -1; 891fd3bf775SJohn Dyson 892fd3bf775SJohn Dyson vp = (struct vnode *)fp->f_data; 893fd3bf775SJohn Dyson if (vp->v_type != VCHR || ((cb->aio_nbytes & (DEV_BSIZE - 1)) != 0)) 894fd3bf775SJohn Dyson return -1; 895fd3bf775SJohn Dyson 896fd3bf775SJohn Dyson if ((vp->v_specinfo == NULL) || (vp->v_flag & VISTTY)) 897fd3bf775SJohn Dyson return -1; 898fd3bf775SJohn Dyson 899fd3bf775SJohn Dyson majordev = major(vp->v_rdev); 900fd3bf775SJohn Dyson if (majordev == NODEV) 901fd3bf775SJohn Dyson return -1; 902fd3bf775SJohn Dyson 903fd3bf775SJohn Dyson if (chrtoblk(majordev) == NODEV) 904fd3bf775SJohn Dyson return -1; 905fd3bf775SJohn Dyson 906fd3bf775SJohn Dyson ki = p->p_aioinfo; 907fd3bf775SJohn Dyson if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) 908fd3bf775SJohn Dyson return -1; 909fd3bf775SJohn Dyson 910fd3bf775SJohn Dyson cnt = cb->aio_nbytes; 911fd3bf775SJohn Dyson if (cnt > MAXPHYS) 912fd3bf775SJohn Dyson return -1; 913fd3bf775SJohn Dyson 914fd3bf775SJohn Dyson ki->kaio_buffer_count++; 915fd3bf775SJohn Dyson 916fd3bf775SJohn Dyson /* create and build a buffer header for a transfer */ 917fd3bf775SJohn Dyson bp = (struct buf *)getpbuf(); 918fd3bf775SJohn Dyson 919fd3bf775SJohn Dyson /* 920fd3bf775SJohn Dyson * get a copy of the kva from the physical buffer 921fd3bf775SJohn Dyson */ 922fd3bf775SJohn Dyson bp->b_proc = p; 923fd3bf775SJohn Dyson bp->b_dev = dev; 924fd3bf775SJohn Dyson error = bp->b_error = 0; 925fd3bf775SJohn Dyson 926fd3bf775SJohn Dyson if (cb->aio_lio_opcode == LIO_WRITE) { 927fd3bf775SJohn Dyson rw = 0; 928fd3bf775SJohn Dyson bflags = B_WRITE; 929fd3bf775SJohn Dyson } else { 930fd3bf775SJohn Dyson rw = 1; 931fd3bf775SJohn Dyson bflags = B_READ; 932fd3bf775SJohn Dyson } 933fd3bf775SJohn Dyson 934fd3bf775SJohn Dyson bp->b_bcount = cb->aio_nbytes; 935fd3bf775SJohn Dyson bp->b_bufsize = cb->aio_nbytes; 936fd3bf775SJohn Dyson bp->b_flags = B_BUSY | B_PHYS | B_CALL | bflags; 937fd3bf775SJohn Dyson bp->b_iodone = aio_physwakeup; 938fd3bf775SJohn Dyson bp->b_saveaddr = bp->b_data; 939fd3bf775SJohn Dyson bp->b_data = cb->aio_buf; 940fd3bf775SJohn Dyson 941fd3bf775SJohn Dyson bp->b_blkno = btodb(cb->aio_offset); 942fd3bf775SJohn Dyson 943fd3bf775SJohn Dyson if (rw && !useracc(bp->b_data, bp->b_bufsize, B_WRITE)) { 944fd3bf775SJohn Dyson error = EFAULT; 945fd3bf775SJohn Dyson goto doerror; 946fd3bf775SJohn Dyson } 947fd3bf775SJohn Dyson if (!rw && !useracc(bp->b_data, bp->b_bufsize, B_READ)) { 948fd3bf775SJohn Dyson error = EFAULT; 949fd3bf775SJohn Dyson goto doerror; 950fd3bf775SJohn Dyson } 951fd3bf775SJohn Dyson 952fd3bf775SJohn Dyson /* bring buffer into kernel space */ 953fd3bf775SJohn Dyson vmapbuf(bp); 954fd3bf775SJohn Dyson 955fd3bf775SJohn Dyson aiocbe->bp = bp; 956fd3bf775SJohn Dyson bp->b_spc = (void *)aiocbe; 957fd3bf775SJohn Dyson TAILQ_INSERT_TAIL(&aio_bufjobs, aiocbe, list); 958fd3bf775SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 959fd3bf775SJohn Dyson aiocbe->jobstate = JOBST_JOBQBUF; 960fd3bf775SJohn Dyson ++num_buf_aio; 961fd3bf775SJohn Dyson fstrategy = cdevsw[major(dev)]->d_strategy; 962fd3bf775SJohn Dyson bp->b_error = 0; 963fd3bf775SJohn Dyson 964fd3bf775SJohn Dyson /* perform transfer */ 965fd3bf775SJohn Dyson (*fstrategy)(bp); 966fd3bf775SJohn Dyson 967fd3bf775SJohn Dyson if (bp->b_error || (bp->b_flags & B_ERROR)) { 968fd3bf775SJohn Dyson error = bp->b_error; 969fd3bf775SJohn Dyson TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 970fd3bf775SJohn Dyson TAILQ_REMOVE(&ki->kaio_jobqueue, aiocbe, plist); 971fd3bf775SJohn Dyson aiocbe->bp = NULL; 972fd3bf775SJohn Dyson aiocbe->jobstate = JOBST_NULL; 973fd3bf775SJohn Dyson vunmapbuf(bp); 974fd3bf775SJohn Dyson relpbuf(bp); 975fd3bf775SJohn Dyson --num_buf_aio; 976fd3bf775SJohn Dyson return error; 977fd3bf775SJohn Dyson } 978fd3bf775SJohn Dyson return 0; 979fd3bf775SJohn Dyson 980fd3bf775SJohn Dyson doerror: 981fd3bf775SJohn Dyson ki->kaio_buffer_count--; 982fd3bf775SJohn Dyson relpbuf(bp); 983fd3bf775SJohn Dyson return error; 984fd3bf775SJohn Dyson } 985fd3bf775SJohn Dyson 986fd3bf775SJohn Dyson int 987fd3bf775SJohn Dyson aio_fphysio(p, iocb, flgwait) 988fd3bf775SJohn Dyson struct proc *p; 989fd3bf775SJohn Dyson struct aiocblist *iocb; 990fd3bf775SJohn Dyson int flgwait; 991fd3bf775SJohn Dyson { 992fd3bf775SJohn Dyson int s; 993fd3bf775SJohn Dyson struct buf *bp; 994fd3bf775SJohn Dyson int error; 995fd3bf775SJohn Dyson 996fd3bf775SJohn Dyson bp = iocb->bp; 997fd3bf775SJohn Dyson 998fd3bf775SJohn Dyson s = splbio(); 999fd3bf775SJohn Dyson if (flgwait == 0) { 1000fd3bf775SJohn Dyson if ((bp->b_flags & B_DONE) == 0) { 1001fd3bf775SJohn Dyson splx(s); 1002fd3bf775SJohn Dyson return EINPROGRESS; 1003fd3bf775SJohn Dyson } 1004fd3bf775SJohn Dyson } 1005fd3bf775SJohn Dyson 1006fd3bf775SJohn Dyson while ((bp->b_flags & B_DONE) == 0) { 1007fd3bf775SJohn Dyson if (tsleep((caddr_t)bp, PCATCH|PRIBIO, "physstr", 0)) { 1008fd3bf775SJohn Dyson if ((bp->b_flags & B_DONE) == 0) { 1009fd3bf775SJohn Dyson splx(s); 1010fd3bf775SJohn Dyson return EINPROGRESS; 1011fd3bf775SJohn Dyson } else { 1012fd3bf775SJohn Dyson break; 1013fd3bf775SJohn Dyson } 1014fd3bf775SJohn Dyson } 1015fd3bf775SJohn Dyson } 1016fd3bf775SJohn Dyson 1017fd3bf775SJohn Dyson /* release mapping into kernel space */ 1018fd3bf775SJohn Dyson vunmapbuf(bp); 1019fd3bf775SJohn Dyson iocb->bp = 0; 1020fd3bf775SJohn Dyson 1021fd3bf775SJohn Dyson error = 0; 1022fd3bf775SJohn Dyson /* 1023fd3bf775SJohn Dyson * check for an error 1024fd3bf775SJohn Dyson */ 1025fd3bf775SJohn Dyson if (bp->b_flags & B_ERROR) { 1026fd3bf775SJohn Dyson error = bp->b_error; 1027fd3bf775SJohn Dyson } 1028fd3bf775SJohn Dyson 1029fd3bf775SJohn Dyson relpbuf(bp); 1030fd3bf775SJohn Dyson return (error); 1031fd3bf775SJohn Dyson } 1032fd3bf775SJohn Dyson 1033fd3bf775SJohn Dyson /* 10342244ea07SJohn Dyson * Queue a new AIO request. 10352244ea07SJohn Dyson */ 10362244ea07SJohn Dyson static int 1037fd3bf775SJohn Dyson _aio_aqueue(struct proc *p, struct aiocb *job, int type) 1038fd3bf775SJohn Dyson { 10392244ea07SJohn Dyson struct filedesc *fdp; 10402244ea07SJohn Dyson struct file *fp; 10412244ea07SJohn Dyson unsigned int fd; 10422244ea07SJohn Dyson 10432244ea07SJohn Dyson int error; 10442244ea07SJohn Dyson int opcode; 10452244ea07SJohn Dyson struct aiocblist *aiocbe; 10462244ea07SJohn Dyson struct aioproclist *aiop; 10472244ea07SJohn Dyson struct kaioinfo *ki; 10482244ea07SJohn Dyson 10492244ea07SJohn Dyson if (aiocbe = TAILQ_FIRST(&aio_freejobs)) { 10502244ea07SJohn Dyson TAILQ_REMOVE(&aio_freejobs, aiocbe, list); 10512244ea07SJohn Dyson } else { 1052fd3bf775SJohn Dyson aiocbe = zalloc (aiocb_zone); 10532244ea07SJohn Dyson } 10542244ea07SJohn Dyson 1055fd3bf775SJohn Dyson aiocbe->inputcharge = 0; 1056fd3bf775SJohn Dyson aiocbe->outputcharge = 0; 1057fd3bf775SJohn Dyson 1058fd3bf775SJohn Dyson suword(&job->_aiocb_private.status, -1); 1059fd3bf775SJohn Dyson suword(&job->_aiocb_private.error, 0); 1060fd3bf775SJohn Dyson suword(&job->_aiocb_private.kernelinfo, -1); 1061fd3bf775SJohn Dyson 10622244ea07SJohn Dyson error = copyin((caddr_t)job, 10632244ea07SJohn Dyson (caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb); 10642244ea07SJohn Dyson if (error) { 1065a624e84fSJohn Dyson #if DEBUGAIO > 0 1066fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1067a624e84fSJohn Dyson printf("aio_aqueue: Copyin error: %d\n", error); 1068a624e84fSJohn Dyson #endif 1069fd3bf775SJohn Dyson suword(&job->_aiocb_private.error, error); 1070fd3bf775SJohn Dyson 10712244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 10722244ea07SJohn Dyson return error; 10732244ea07SJohn Dyson } 10742244ea07SJohn Dyson 1075a624e84fSJohn Dyson /* 1076a624e84fSJohn Dyson * Get the opcode 1077a624e84fSJohn Dyson */ 1078a624e84fSJohn Dyson if (type != LIO_NOP) { 1079a624e84fSJohn Dyson aiocbe->uaiocb.aio_lio_opcode = type; 1080a624e84fSJohn Dyson } 1081a624e84fSJohn Dyson opcode = aiocbe->uaiocb.aio_lio_opcode; 10822244ea07SJohn Dyson 10832244ea07SJohn Dyson /* 10842244ea07SJohn Dyson * Get the fd info for process 10852244ea07SJohn Dyson */ 10862244ea07SJohn Dyson fdp = p->p_fd; 10872244ea07SJohn Dyson 10882244ea07SJohn Dyson /* 10892244ea07SJohn Dyson * Range check file descriptor 10902244ea07SJohn Dyson */ 10912244ea07SJohn Dyson fd = aiocbe->uaiocb.aio_fildes; 10922244ea07SJohn Dyson if (fd >= fdp->fd_nfiles) { 10932244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 10942244ea07SJohn Dyson if (type == 0) { 1095a624e84fSJohn Dyson #if DEBUGAIO > 0 1096fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1097a624e84fSJohn Dyson printf("aio_aqueue: Null type\n"); 1098a624e84fSJohn Dyson #endif 10992244ea07SJohn Dyson suword(&job->_aiocb_private.error, EBADF); 11002244ea07SJohn Dyson } 11012244ea07SJohn Dyson return EBADF; 11022244ea07SJohn Dyson } 11032244ea07SJohn Dyson 1104c4860686SJohn Dyson #if DEBUGAIO > 0 1105fd3bf775SJohn Dyson if (DEBUGFLOW > 3) 1106fd3bf775SJohn Dyson printf("aio_aqueue: fd: %d, cmd: %d," 1107fd3bf775SJohn Dyson " buf: %d, cnt: %d, fileoffset: %d\n", 1108c4860686SJohn Dyson aiocbe->uaiocb.aio_fildes, 1109c4860686SJohn Dyson aiocbe->uaiocb.aio_lio_opcode, 1110c4860686SJohn Dyson (int) aiocbe->uaiocb.aio_buf & 0xffffffff, 1111c4860686SJohn Dyson aiocbe->uaiocb.aio_nbytes, 1112c4860686SJohn Dyson (int) aiocbe->uaiocb.aio_offset & 0xffffffff); 1113c4860686SJohn Dyson #endif 1114c4860686SJohn Dyson 1115c4860686SJohn Dyson 11162244ea07SJohn Dyson fp = fdp->fd_ofiles[fd]; 1117a624e84fSJohn Dyson if ((fp == NULL) || 1118a624e84fSJohn Dyson ((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 0))) { 11192244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 11202244ea07SJohn Dyson if (type == 0) { 11212244ea07SJohn Dyson suword(&job->_aiocb_private.error, EBADF); 11222244ea07SJohn Dyson } 1123a624e84fSJohn Dyson #if DEBUGAIO > 0 1124fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1125a624e84fSJohn Dyson printf("aio_aqueue: Bad file descriptor\n"); 1126a624e84fSJohn Dyson #endif 11272244ea07SJohn Dyson return EBADF; 11282244ea07SJohn Dyson } 11292244ea07SJohn Dyson 11302244ea07SJohn Dyson if (aiocbe->uaiocb.aio_offset == -1LL) { 11312244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 11322244ea07SJohn Dyson if (type == 0) { 11332244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 11342244ea07SJohn Dyson } 1135a624e84fSJohn Dyson #if DEBUGAIO > 0 1136fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1137a624e84fSJohn Dyson printf("aio_aqueue: bad offset\n"); 1138a624e84fSJohn Dyson #endif 11392244ea07SJohn Dyson return EINVAL; 11402244ea07SJohn Dyson } 11412244ea07SJohn Dyson 1142a624e84fSJohn Dyson #if DEBUGAIO > 0 1143fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 1144fd3bf775SJohn Dyson printf("job addr: 0x%x, 0x%x, %d\n", 1145fd3bf775SJohn Dyson job, &job->_aiocb_private.kernelinfo, jobrefid); 11462244ea07SJohn Dyson #endif 11472244ea07SJohn Dyson 11482244ea07SJohn Dyson error = suword(&job->_aiocb_private.kernelinfo, jobrefid); 11492244ea07SJohn Dyson if (error) { 11502244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 11512244ea07SJohn Dyson if (type == 0) { 11522244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 11532244ea07SJohn Dyson } 1154a624e84fSJohn Dyson #if DEBUGAIO > 0 1155fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1156a624e84fSJohn Dyson printf("aio_aqueue: fetch of kernelinfo from user space\n"); 1157a624e84fSJohn Dyson #endif 11582244ea07SJohn Dyson return error; 11592244ea07SJohn Dyson } 11602244ea07SJohn Dyson 11612244ea07SJohn Dyson aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)jobrefid; 1162a624e84fSJohn Dyson #if DEBUGAIO > 0 1163fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 11642244ea07SJohn Dyson printf("aio_aqueue: New job: %d... ", jobrefid); 11652244ea07SJohn Dyson #endif 11662244ea07SJohn Dyson ++jobrefid; 1167fd3bf775SJohn Dyson if (jobrefid > INT_MAX) 1168fd3bf775SJohn Dyson jobrefid = 1; 11692244ea07SJohn Dyson 11702244ea07SJohn Dyson if (opcode == LIO_NOP) { 11712244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 11722244ea07SJohn Dyson if (type == 0) { 11732244ea07SJohn Dyson suword(&job->_aiocb_private.error, 0); 1174fd3bf775SJohn Dyson suword(&job->_aiocb_private.status, 0); 1175fd3bf775SJohn Dyson suword(&job->_aiocb_private.kernelinfo, 0); 11762244ea07SJohn Dyson } 11772244ea07SJohn Dyson return 0; 11782244ea07SJohn Dyson } 11792244ea07SJohn Dyson 1180fd3bf775SJohn Dyson if ((opcode != LIO_READ) && (opcode != LIO_WRITE)) { 11812244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 11822244ea07SJohn Dyson if (type == 0) { 1183fd3bf775SJohn Dyson suword(&job->_aiocb_private.status, 0); 11842244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 11852244ea07SJohn Dyson } 1186a624e84fSJohn Dyson #if DEBUGAIO > 0 1187fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1188a624e84fSJohn Dyson printf("aio_aqueue: invalid LIO op: %d\n", opcode); 1189a624e84fSJohn Dyson #endif 11902244ea07SJohn Dyson return EINVAL; 11912244ea07SJohn Dyson } 11922244ea07SJohn Dyson 1193fd3bf775SJohn Dyson suword(&job->_aiocb_private.error, EINPROGRESS); 1194fd3bf775SJohn Dyson aiocbe->uaiocb._aiocb_private.error = EINPROGRESS; 11952244ea07SJohn Dyson aiocbe->userproc = p; 11962244ea07SJohn Dyson aiocbe->jobflags = 0; 11972244ea07SJohn Dyson 1198fd3bf775SJohn Dyson if ((error = aio_qphysio(p, aiocbe)) == 0) { 1199fd3bf775SJohn Dyson return 0; 1200fd3bf775SJohn Dyson } else if (error > 0) { 1201fd3bf775SJohn Dyson suword(&job->_aiocb_private.status, 0); 1202fd3bf775SJohn Dyson aiocbe->uaiocb._aiocb_private.error = error; 1203fd3bf775SJohn Dyson suword(&job->_aiocb_private.error, error); 1204fd3bf775SJohn Dyson return error; 1205fd3bf775SJohn Dyson } 1206fd3bf775SJohn Dyson 1207fd3bf775SJohn Dyson ki = p->p_aioinfo; 1208fd3bf775SJohn Dyson ++ki->kaio_queue_count; 1209fd3bf775SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 1210fd3bf775SJohn Dyson TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); 1211fd3bf775SJohn Dyson aiocbe->jobstate = JOBST_JOBQGLOBAL; 1212fd3bf775SJohn Dyson 1213fd3bf775SJohn Dyson ++num_queue_count; 1214fd3bf775SJohn Dyson #if DEBUGAIO > 0 1215fd3bf775SJohn Dyson if (DEBUGREQ) { 1216fd3bf775SJohn Dyson printf("PROC %s, fd: %d, offset: 0x%x, address: 0x%x, size: %d\n", 1217fd3bf775SJohn Dyson job->aio_lio_opcode == LIO_READ?"Read":"Write", 1218fd3bf775SJohn Dyson job->aio_fildes, (int) job->aio_offset, 1219fd3bf775SJohn Dyson job->aio_buf, job->aio_nbytes); 1220fd3bf775SJohn Dyson } 1221fd3bf775SJohn Dyson #endif 1222fd3bf775SJohn Dyson error = 0; 1223fd3bf775SJohn Dyson 1224fd3bf775SJohn Dyson /* 1225fd3bf775SJohn Dyson * If we don't have a free AIO process, and we are below our 1226fd3bf775SJohn Dyson * quota, then start one. Otherwise, depend on the subsequent 1227fd3bf775SJohn Dyson * I/O completions to pick-up this job. If we don't sucessfully 1228fd3bf775SJohn Dyson * create the new process (thread) due to resource issues, we 1229fd3bf775SJohn Dyson * return an error for now (EAGAIN), which is likely not the 1230fd3bf775SJohn Dyson * correct thing to do. 1231fd3bf775SJohn Dyson */ 12322244ea07SJohn Dyson retryproc: 12332244ea07SJohn Dyson if (aiop = TAILQ_FIRST(&aio_freeproc)) { 12342244ea07SJohn Dyson TAILQ_REMOVE(&aio_freeproc, aiop, list); 12352244ea07SJohn Dyson TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 12362244ea07SJohn Dyson aiop->aioprocflags &= ~AIOP_FREE; 12372244ea07SJohn Dyson wakeup(aiop->aioproc); 1238fd3bf775SJohn Dyson } else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) && 1239fd3bf775SJohn Dyson ((ki->kaio_active_count + num_aio_resv_start) < 1240fd3bf775SJohn Dyson ki->kaio_maxactive_count)) { 1241fd3bf775SJohn Dyson num_aio_resv_start++; 1242fd3bf775SJohn Dyson if ((error = aio_newproc()) == 0) { 1243fd3bf775SJohn Dyson --num_aio_resv_start; 12442244ea07SJohn Dyson goto retryproc; 1245fd3bf775SJohn Dyson } 1246fd3bf775SJohn Dyson --num_aio_resv_start; 1247fd3bf775SJohn Dyson } 1248fd3bf775SJohn Dyson return error; 12492244ea07SJohn Dyson } 12502244ea07SJohn Dyson 1251fd3bf775SJohn Dyson /* 1252fd3bf775SJohn Dyson * This routine queues an AIO request, checking for quotas. 1253fd3bf775SJohn Dyson */ 12542244ea07SJohn Dyson static int 1255fd3bf775SJohn Dyson aio_aqueue(struct proc *p, struct aiocb *job, int type) 1256fd3bf775SJohn Dyson { 12572244ea07SJohn Dyson struct kaioinfo *ki; 12582244ea07SJohn Dyson 12592244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 12602244ea07SJohn Dyson aio_init_aioinfo(p); 12612244ea07SJohn Dyson } 12622244ea07SJohn Dyson 12632244ea07SJohn Dyson if (num_queue_count >= max_queue_count) 12642244ea07SJohn Dyson return EAGAIN; 12652244ea07SJohn Dyson 12662244ea07SJohn Dyson ki = p->p_aioinfo; 12672244ea07SJohn Dyson if (ki->kaio_queue_count >= ki->kaio_qallowed_count) 12682244ea07SJohn Dyson return EAGAIN; 12692244ea07SJohn Dyson 12702244ea07SJohn Dyson return _aio_aqueue(p, job, type); 12712244ea07SJohn Dyson } 12722244ea07SJohn Dyson 12732244ea07SJohn Dyson /* 1274fd3bf775SJohn Dyson * Support the aio_return system call, as a side-effect, kernel 1275fd3bf775SJohn Dyson * resources are released. 12762244ea07SJohn Dyson */ 12772244ea07SJohn Dyson int 1278fd3bf775SJohn Dyson aio_return(struct proc *p, struct aio_return_args *uap) 1279fd3bf775SJohn Dyson { 12802244ea07SJohn Dyson int jobref, status; 12812244ea07SJohn Dyson struct aiocblist *cb; 12822244ea07SJohn Dyson struct kaioinfo *ki; 1283fd3bf775SJohn Dyson struct proc *userp; 12842244ea07SJohn Dyson 12852244ea07SJohn Dyson ki = p->p_aioinfo; 12862244ea07SJohn Dyson if (ki == NULL) { 12872244ea07SJohn Dyson return EINVAL; 12882244ea07SJohn Dyson } 12892244ea07SJohn Dyson 12902244ea07SJohn Dyson jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 1291fd3bf775SJohn Dyson if (jobref == -1 || jobref == 0) 12922244ea07SJohn Dyson return EINVAL; 12932244ea07SJohn Dyson 1294a624e84fSJohn Dyson #if DEBUGAIO > 0 1295fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1296fd3bf775SJohn Dyson printf("aio_return: jobref: %d, ", jobref); 1297a624e84fSJohn Dyson #endif 1298a624e84fSJohn Dyson 12992244ea07SJohn Dyson 13002244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 13012244ea07SJohn Dyson cb; 13022244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 13032244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1304fd3bf775SJohn Dyson #if DEBUGAIO > 0 1305fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1306fd3bf775SJohn Dyson printf("status: %d, error: %d\n", 1307fd3bf775SJohn Dyson cb->uaiocb._aiocb_private.status, 1308fd3bf775SJohn Dyson cb->uaiocb._aiocb_private.error); 1309fd3bf775SJohn Dyson #endif 1310cb226aaaSPoul-Henning Kamp p->p_retval[0] = cb->uaiocb._aiocb_private.status; 1311fd3bf775SJohn Dyson if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 1312fd3bf775SJohn Dyson curproc->p_stats->p_ru.ru_oublock += cb->outputcharge; 1313fd3bf775SJohn Dyson cb->outputcharge = 0; 1314fd3bf775SJohn Dyson } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 1315fd3bf775SJohn Dyson curproc->p_stats->p_ru.ru_inblock += cb->inputcharge; 1316fd3bf775SJohn Dyson cb->inputcharge = 0; 1317fd3bf775SJohn Dyson } 13182244ea07SJohn Dyson aio_free_entry(cb); 13192244ea07SJohn Dyson return 0; 13202244ea07SJohn Dyson } 13212244ea07SJohn Dyson } 13222244ea07SJohn Dyson 1323fd3bf775SJohn Dyson #if DEBUGAIO > 0 1324fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1325fd3bf775SJohn Dyson printf("(not found) status: %d, error: %d\n", 1326fd3bf775SJohn Dyson cb->uaiocb._aiocb_private.status, 1327fd3bf775SJohn Dyson cb->uaiocb._aiocb_private.error); 1328fd3bf775SJohn Dyson #endif 1329fd3bf775SJohn Dyson /* 13302244ea07SJohn Dyson status = fuword(&uap->aiocbp->_aiocb_private.status); 13312244ea07SJohn Dyson if (status == -1) 13322244ea07SJohn Dyson return 0; 1333fd3bf775SJohn Dyson */ 13342244ea07SJohn Dyson 13352244ea07SJohn Dyson return (EINVAL); 13362244ea07SJohn Dyson } 13372244ea07SJohn Dyson 13382244ea07SJohn Dyson /* 13392244ea07SJohn Dyson * Allow a process to wakeup when any of the I/O requests are 13402244ea07SJohn Dyson * completed. 13412244ea07SJohn Dyson */ 13422244ea07SJohn Dyson int 1343fd3bf775SJohn Dyson aio_suspend(struct proc *p, struct aio_suspend_args *uap) 1344fd3bf775SJohn Dyson { 13454a11ca4eSPoul-Henning Kamp struct timeval atv; 13462244ea07SJohn Dyson struct timespec ts; 13472244ea07SJohn Dyson struct aiocb *const *cbptr, *cbp; 13482244ea07SJohn Dyson struct kaioinfo *ki; 13492244ea07SJohn Dyson struct aiocblist *cb; 13502244ea07SJohn Dyson int i; 13512244ea07SJohn Dyson int error, s, timo; 13522244ea07SJohn Dyson int *joblist; 13532244ea07SJohn Dyson 1354fd3bf775SJohn Dyson if (uap->nent >= AIO_LISTIO_MAX) 1355fd3bf775SJohn Dyson return EINVAL; 13562244ea07SJohn Dyson 13572244ea07SJohn Dyson timo = 0; 13582244ea07SJohn Dyson if (uap->timeout) { 13592244ea07SJohn Dyson /* 13602244ea07SJohn Dyson * Get timespec struct 13612244ea07SJohn Dyson */ 13622244ea07SJohn Dyson if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) { 13632244ea07SJohn Dyson return error; 13642244ea07SJohn Dyson } 13652244ea07SJohn Dyson 13662244ea07SJohn Dyson if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) 13672244ea07SJohn Dyson return (EINVAL); 13682244ea07SJohn Dyson 13692244ea07SJohn Dyson TIMESPEC_TO_TIMEVAL(&atv, &ts) 13702244ea07SJohn Dyson if (itimerfix(&atv)) 13712244ea07SJohn Dyson return (EINVAL); 13722244ea07SJohn Dyson /* 13732244ea07SJohn Dyson * XXX this is not as careful as settimeofday() about minimising 13742244ea07SJohn Dyson * interrupt latency. The hzto() interface is inconvenient as usual. 13752244ea07SJohn Dyson */ 13762244ea07SJohn Dyson s = splclock(); 13772244ea07SJohn Dyson timevaladd(&atv, &time); 13782244ea07SJohn Dyson timo = hzto(&atv); 13792244ea07SJohn Dyson splx(s); 13802244ea07SJohn Dyson if (timo == 0) 13812244ea07SJohn Dyson timo = 1; 13822244ea07SJohn Dyson } 13832244ea07SJohn Dyson 13842244ea07SJohn Dyson ki = p->p_aioinfo; 13852244ea07SJohn Dyson if (ki == NULL) 13862244ea07SJohn Dyson return EAGAIN; 13872244ea07SJohn Dyson 1388fd3bf775SJohn Dyson joblist = zalloc(aiol_zone); 13892244ea07SJohn Dyson cbptr = uap->aiocbp; 13902244ea07SJohn Dyson 13912244ea07SJohn Dyson for(i=0;i<uap->nent;i++) { 13922244ea07SJohn Dyson cbp = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1393a624e84fSJohn Dyson #if DEBUGAIO > 1 1394fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 13952244ea07SJohn Dyson printf("cbp: %x\n", cbp); 13962244ea07SJohn Dyson #endif 13972244ea07SJohn Dyson joblist[i] = fuword(&cbp->_aiocb_private.kernelinfo); 13982244ea07SJohn Dyson } 13992244ea07SJohn Dyson 14002244ea07SJohn Dyson 14012244ea07SJohn Dyson while (1) { 14022244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 14032244ea07SJohn Dyson cb; 14042244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 14052244ea07SJohn Dyson for(i=0;i<uap->nent;i++) { 1406fd3bf775SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == 1407fd3bf775SJohn Dyson joblist[i]) { 1408fd3bf775SJohn Dyson /* 1409fd3bf775SJohn Dyson printf("suspend(awake): %d, offset: %d\n", joblist[i], (int) cb->uaiocb.aio_offset & 0xffffffff); 1410fd3bf775SJohn Dyson */ 1411fd3bf775SJohn Dyson zfree(aiol_zone, joblist); 14122244ea07SJohn Dyson return 0; 14132244ea07SJohn Dyson } 14142244ea07SJohn Dyson } 14152244ea07SJohn Dyson } 14162244ea07SJohn Dyson 1417a624e84fSJohn Dyson #if DEBUGAIO > 0 1418fd3bf775SJohn Dyson if (DEBUGFLOW > 0) { 1419a624e84fSJohn Dyson printf("Suspend, timeout: %d clocks, jobs:", timo); 1420a624e84fSJohn Dyson for(i=0;i<uap->nent;i++) 1421a624e84fSJohn Dyson printf(" %d", joblist[i]); 1422a624e84fSJohn Dyson printf("\n"); 1423a624e84fSJohn Dyson } 1424a624e84fSJohn Dyson 1425fd3bf775SJohn Dyson if (DEBUGFLOW > 2) { 14262244ea07SJohn Dyson printf("Suspending -- waiting for all I/O's to complete: "); 14272244ea07SJohn Dyson for(i=0;i<uap->nent;i++) 14282244ea07SJohn Dyson printf(" %d", joblist[i]); 14292244ea07SJohn Dyson printf("\n"); 1430a624e84fSJohn Dyson } 14312244ea07SJohn Dyson #endif 1432fd3bf775SJohn Dyson ki->kaio_flags |= KAIO_WAKEUP; 14332244ea07SJohn Dyson error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo); 14342244ea07SJohn Dyson 14352244ea07SJohn Dyson if (error == EINTR) { 1436a624e84fSJohn Dyson #if DEBUGAIO > 0 1437fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 14382244ea07SJohn Dyson printf(" signal\n"); 14392244ea07SJohn Dyson #endif 1440fd3bf775SJohn Dyson zfree(aiol_zone, joblist); 14412244ea07SJohn Dyson return EINTR; 14422244ea07SJohn Dyson } else if (error == EWOULDBLOCK) { 1443a624e84fSJohn Dyson #if DEBUGAIO > 0 1444fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 14452244ea07SJohn Dyson printf(" timeout\n"); 14462244ea07SJohn Dyson #endif 1447fd3bf775SJohn Dyson zfree(aiol_zone, joblist); 14482244ea07SJohn Dyson return EAGAIN; 14492244ea07SJohn Dyson } 1450a624e84fSJohn Dyson #if DEBUGAIO > 0 1451fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 14522244ea07SJohn Dyson printf("\n"); 14532244ea07SJohn Dyson #endif 14542244ea07SJohn Dyson } 14552244ea07SJohn Dyson 14562244ea07SJohn Dyson /* NOTREACHED */ 14572244ea07SJohn Dyson return EINVAL; 14582244ea07SJohn Dyson } 1459ee877a35SJohn Dyson 1460ee877a35SJohn Dyson /* 1461ee877a35SJohn Dyson * aio_cancel at the kernel level is a NOOP right now. It 1462ee877a35SJohn Dyson * might be possible to support it partially in user mode, or 1463ee877a35SJohn Dyson * in kernel mode later on. 1464ee877a35SJohn Dyson */ 1465ee877a35SJohn Dyson int 1466fd3bf775SJohn Dyson aio_cancel(struct proc *p, struct aio_cancel_args *uap) 1467fd3bf775SJohn Dyson { 1468ee877a35SJohn Dyson return AIO_NOTCANCELLED; 1469ee877a35SJohn Dyson } 1470ee877a35SJohn Dyson 1471ee877a35SJohn Dyson /* 1472ee877a35SJohn Dyson * aio_error is implemented in the kernel level for compatibility 1473ee877a35SJohn Dyson * purposes only. For a user mode async implementation, it would be 1474ee877a35SJohn Dyson * best to do it in a userland subroutine. 1475ee877a35SJohn Dyson */ 1476ee877a35SJohn Dyson int 1477fd3bf775SJohn Dyson aio_error(struct proc *p, struct aio_error_args *uap) 1478fd3bf775SJohn Dyson { 14792244ea07SJohn Dyson struct aiocblist *cb; 14802244ea07SJohn Dyson struct kaioinfo *ki; 14812244ea07SJohn Dyson int jobref; 1482fd3bf775SJohn Dyson int error, status; 1483ee877a35SJohn Dyson 14842244ea07SJohn Dyson ki = p->p_aioinfo; 14852244ea07SJohn Dyson if (ki == NULL) 14862244ea07SJohn Dyson return EINVAL; 14872244ea07SJohn Dyson 14882244ea07SJohn Dyson jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 1489fd3bf775SJohn Dyson if ((jobref == -1) || (jobref == 0)) 1490fd3bf775SJohn Dyson return EINVAL; 1491ee877a35SJohn Dyson 14922244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 14932244ea07SJohn Dyson cb; 14942244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 14952244ea07SJohn Dyson 14962244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1497cb226aaaSPoul-Henning Kamp p->p_retval[0] = cb->uaiocb._aiocb_private.error; 14982244ea07SJohn Dyson return 0; 14992244ea07SJohn Dyson } 1500ee877a35SJohn Dyson } 1501ee877a35SJohn Dyson 15022244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobqueue); 15032244ea07SJohn Dyson cb; 15042244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 15052244ea07SJohn Dyson 15062244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1507cb226aaaSPoul-Henning Kamp p->p_retval[0] = EINPROGRESS; 15082244ea07SJohn Dyson return 0; 15092244ea07SJohn Dyson } 15102244ea07SJohn Dyson } 15112244ea07SJohn Dyson 15122244ea07SJohn Dyson /* 15132244ea07SJohn Dyson * Hack for lio 15142244ea07SJohn Dyson */ 1515fd3bf775SJohn Dyson /* 15162244ea07SJohn Dyson status = fuword(&uap->aiocbp->_aiocb_private.status); 15172244ea07SJohn Dyson if (status == -1) { 15182244ea07SJohn Dyson return fuword(&uap->aiocbp->_aiocb_private.error); 15192244ea07SJohn Dyson } 1520fd3bf775SJohn Dyson */ 15212244ea07SJohn Dyson return EINVAL; 1522ee877a35SJohn Dyson } 1523ee877a35SJohn Dyson 1524ee877a35SJohn Dyson int 1525fd3bf775SJohn Dyson aio_read(struct proc *p, struct aio_read_args *uap) 1526fd3bf775SJohn Dyson { 1527ee877a35SJohn Dyson struct filedesc *fdp; 1528ee877a35SJohn Dyson struct file *fp; 1529ee877a35SJohn Dyson struct uio auio; 1530ee877a35SJohn Dyson struct iovec aiov; 1531ee877a35SJohn Dyson unsigned int fd; 1532ee877a35SJohn Dyson int cnt; 1533ee877a35SJohn Dyson struct aiocb iocb; 15342244ea07SJohn Dyson int error, pmodes; 1535ee877a35SJohn Dyson 15362244ea07SJohn Dyson pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 15372244ea07SJohn Dyson if ((pmodes & AIO_PMODE_SYNC) == 0) { 1538a624e84fSJohn Dyson #if DEBUGAIO > 1 1539fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 1540a624e84fSJohn Dyson printf("queueing aio_read\n"); 1541a624e84fSJohn Dyson #endif 15422244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 15432244ea07SJohn Dyson } 1544ee877a35SJohn Dyson 1545ee877a35SJohn Dyson /* 1546ee877a35SJohn Dyson * Get control block 1547ee877a35SJohn Dyson */ 1548ee877a35SJohn Dyson if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1549ee877a35SJohn Dyson return error; 1550ee877a35SJohn Dyson 1551ee877a35SJohn Dyson /* 1552ee877a35SJohn Dyson * Get the fd info for process 1553ee877a35SJohn Dyson */ 1554ee877a35SJohn Dyson fdp = p->p_fd; 1555ee877a35SJohn Dyson 1556ee877a35SJohn Dyson /* 1557ee877a35SJohn Dyson * Range check file descriptor 1558ee877a35SJohn Dyson */ 1559ee877a35SJohn Dyson fd = iocb.aio_fildes; 1560ee877a35SJohn Dyson if (fd >= fdp->fd_nfiles) 1561ee877a35SJohn Dyson return EBADF; 1562ee877a35SJohn Dyson fp = fdp->fd_ofiles[fd]; 1563ee877a35SJohn Dyson if ((fp == NULL) || ((fp->f_flag & FREAD) == 0)) 1564ee877a35SJohn Dyson return EBADF; 15652244ea07SJohn Dyson if (iocb.aio_offset == -1LL) 1566ee877a35SJohn Dyson return EINVAL; 1567ee877a35SJohn Dyson 1568ee877a35SJohn Dyson auio.uio_resid = iocb.aio_nbytes; 1569ee877a35SJohn Dyson if (auio.uio_resid < 0) 1570ee877a35SJohn Dyson return (EINVAL); 1571ee877a35SJohn Dyson 15722244ea07SJohn Dyson /* 15732244ea07SJohn Dyson * Process sync simply -- queue async request. 15742244ea07SJohn Dyson */ 15752244ea07SJohn Dyson if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) { 15762244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 15772244ea07SJohn Dyson } 15782244ea07SJohn Dyson 15792244ea07SJohn Dyson aiov.iov_base = iocb.aio_buf; 15802244ea07SJohn Dyson aiov.iov_len = iocb.aio_nbytes; 15812244ea07SJohn Dyson 15822244ea07SJohn Dyson auio.uio_iov = &aiov; 15832244ea07SJohn Dyson auio.uio_iovcnt = 1; 15842244ea07SJohn Dyson auio.uio_offset = iocb.aio_offset; 1585ee877a35SJohn Dyson auio.uio_rw = UIO_READ; 1586ee877a35SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 1587ee877a35SJohn Dyson auio.uio_procp = p; 1588ee877a35SJohn Dyson 1589ee877a35SJohn Dyson cnt = iocb.aio_nbytes; 1590ee877a35SJohn Dyson error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 1591ee877a35SJohn Dyson if (error && 1592ee877a35SJohn Dyson (auio.uio_resid != cnt) && 1593ee877a35SJohn Dyson (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) 1594ee877a35SJohn Dyson error = 0; 1595ee877a35SJohn Dyson cnt -= auio.uio_resid; 1596cb226aaaSPoul-Henning Kamp p->p_retval[0] = cnt; 1597ee877a35SJohn Dyson return error; 1598ee877a35SJohn Dyson } 1599ee877a35SJohn Dyson 1600ee877a35SJohn Dyson int 1601fd3bf775SJohn Dyson aio_write(struct proc *p, struct aio_write_args *uap) 1602fd3bf775SJohn Dyson { 1603ee877a35SJohn Dyson struct filedesc *fdp; 1604ee877a35SJohn Dyson struct file *fp; 1605ee877a35SJohn Dyson struct uio auio; 1606ee877a35SJohn Dyson struct iovec aiov; 1607ee877a35SJohn Dyson unsigned int fd; 1608ee877a35SJohn Dyson int cnt; 1609ee877a35SJohn Dyson struct aiocb iocb; 1610ee877a35SJohn Dyson int error; 16112244ea07SJohn Dyson int pmodes; 16122244ea07SJohn Dyson 16132244ea07SJohn Dyson /* 16142244ea07SJohn Dyson * Process sync simply -- queue async request. 16152244ea07SJohn Dyson */ 16162244ea07SJohn Dyson pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 16172244ea07SJohn Dyson if ((pmodes & AIO_PMODE_SYNC) == 0) { 1618a624e84fSJohn Dyson #if DEBUGAIO > 1 1619fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 1620a624e84fSJohn Dyson printf("queing aio_write\n"); 1621a624e84fSJohn Dyson #endif 16222244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE); 16232244ea07SJohn Dyson } 1624ee877a35SJohn Dyson 1625ee877a35SJohn Dyson if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1626ee877a35SJohn Dyson return error; 1627ee877a35SJohn Dyson 1628ee877a35SJohn Dyson /* 1629ee877a35SJohn Dyson * Get the fd info for process 1630ee877a35SJohn Dyson */ 1631ee877a35SJohn Dyson fdp = p->p_fd; 1632ee877a35SJohn Dyson 1633ee877a35SJohn Dyson /* 1634ee877a35SJohn Dyson * Range check file descriptor 1635ee877a35SJohn Dyson */ 1636ee877a35SJohn Dyson fd = iocb.aio_fildes; 1637ee877a35SJohn Dyson if (fd >= fdp->fd_nfiles) 1638ee877a35SJohn Dyson return EBADF; 1639ee877a35SJohn Dyson fp = fdp->fd_ofiles[fd]; 1640ee877a35SJohn Dyson if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0)) 1641ee877a35SJohn Dyson return EBADF; 16422244ea07SJohn Dyson if (iocb.aio_offset == -1LL) 1643ee877a35SJohn Dyson return EINVAL; 1644ee877a35SJohn Dyson 1645ee877a35SJohn Dyson aiov.iov_base = iocb.aio_buf; 1646ee877a35SJohn Dyson aiov.iov_len = iocb.aio_nbytes; 1647ee877a35SJohn Dyson auio.uio_iov = &aiov; 1648ee877a35SJohn Dyson auio.uio_iovcnt = 1; 1649ee877a35SJohn Dyson auio.uio_offset = iocb.aio_offset; 1650ee877a35SJohn Dyson 1651ee877a35SJohn Dyson auio.uio_resid = iocb.aio_nbytes; 1652ee877a35SJohn Dyson if (auio.uio_resid < 0) 1653ee877a35SJohn Dyson return (EINVAL); 1654ee877a35SJohn Dyson 1655ee877a35SJohn Dyson auio.uio_rw = UIO_WRITE; 1656ee877a35SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 1657ee877a35SJohn Dyson auio.uio_procp = p; 1658ee877a35SJohn Dyson 1659ee877a35SJohn Dyson cnt = iocb.aio_nbytes; 1660ee877a35SJohn Dyson error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 1661ee877a35SJohn Dyson if (error) { 1662ee877a35SJohn Dyson if (auio.uio_resid != cnt) { 1663ee877a35SJohn Dyson if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 1664ee877a35SJohn Dyson error = 0; 1665ee877a35SJohn Dyson if (error == EPIPE) 1666ee877a35SJohn Dyson psignal(p, SIGPIPE); 1667ee877a35SJohn Dyson } 1668ee877a35SJohn Dyson } 1669ee877a35SJohn Dyson cnt -= auio.uio_resid; 1670cb226aaaSPoul-Henning Kamp p->p_retval[0] = cnt; 1671ee877a35SJohn Dyson return error; 1672ee877a35SJohn Dyson } 1673ee877a35SJohn Dyson 1674ee877a35SJohn Dyson int 1675fd3bf775SJohn Dyson lio_listio(struct proc *p, struct lio_listio_args *uap) 1676fd3bf775SJohn Dyson { 16774a11ca4eSPoul-Henning Kamp int nent, nentqueued; 16782244ea07SJohn Dyson struct aiocb *iocb, * const *cbptr; 16792244ea07SJohn Dyson struct aiocblist *cb; 16802244ea07SJohn Dyson struct kaioinfo *ki; 16812244ea07SJohn Dyson int error, runningcode; 1682fd3bf775SJohn Dyson int nerror; 1683ee877a35SJohn Dyson int i; 1684ee877a35SJohn Dyson 1685a624e84fSJohn Dyson if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) { 1686a624e84fSJohn Dyson #if DEBUGAIO > 0 1687fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1688a624e84fSJohn Dyson printf("lio_listio: bad mode: %d\n", uap->mode); 1689a624e84fSJohn Dyson #endif 1690ee877a35SJohn Dyson return EINVAL; 1691a624e84fSJohn Dyson } 16922244ea07SJohn Dyson 16932244ea07SJohn Dyson nent = uap->nent; 1694a624e84fSJohn Dyson if (nent > AIO_LISTIO_MAX) { 1695a624e84fSJohn Dyson #if DEBUGAIO > 0 1696fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1697fd3bf775SJohn Dyson printf("lio_listio: nent > AIO_LISTIO_MAX: %d > %d\n", 1698fd3bf775SJohn Dyson nent, AIO_LISTIO_MAX); 1699a624e84fSJohn Dyson #endif 17002244ea07SJohn Dyson return EINVAL; 1701a624e84fSJohn Dyson } 17022244ea07SJohn Dyson 17032244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 17042244ea07SJohn Dyson aio_init_aioinfo(p); 17052244ea07SJohn Dyson } 17062244ea07SJohn Dyson 1707a624e84fSJohn Dyson if ((nent + num_queue_count) > max_queue_count) { 1708a624e84fSJohn Dyson #if DEBUGAIO > 0 1709fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1710fd3bf775SJohn Dyson printf("lio_listio: (nent(%d) + num_queue_count(%d)) >" 1711fd3bf775SJohn Dyson " max_queue_count(%d)\n", 1712fd3bf775SJohn Dyson nent, num_queue_count, max_queue_count); 1713a624e84fSJohn Dyson #endif 17142244ea07SJohn Dyson return EAGAIN; 1715a624e84fSJohn Dyson } 17162244ea07SJohn Dyson 17172244ea07SJohn Dyson ki = p->p_aioinfo; 1718a624e84fSJohn Dyson if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) { 1719a624e84fSJohn Dyson #if DEBUGAIO > 0 1720fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1721fd3bf775SJohn Dyson printf("lio_listio: (nent(%d) + ki->kaio_queue_count(%d)) >" 1722fd3bf775SJohn Dyson " ki->kaio_qallowed_count(%d)\n", 1723fd3bf775SJohn Dyson nent, ki->kaio_queue_count, ki->kaio_qallowed_count); 1724a624e84fSJohn Dyson #endif 17252244ea07SJohn Dyson return EAGAIN; 1726a624e84fSJohn Dyson } 17272244ea07SJohn Dyson 17282244ea07SJohn Dyson /* 17292244ea07SJohn Dyson * get pointers to the list of I/O requests 17302244ea07SJohn Dyson */ 17312244ea07SJohn Dyson 1732fd3bf775SJohn Dyson nerror = 0; 1733fd3bf775SJohn Dyson nentqueued = 0; 17342244ea07SJohn Dyson cbptr = uap->acb_list; 17352244ea07SJohn Dyson for(i = 0; i < uap->nent; i++) { 17362244ea07SJohn Dyson iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1737fd3bf775SJohn Dyson if (((int) iocb != -1) && ((int) iocb != NULL)) { 1738a624e84fSJohn Dyson error = _aio_aqueue(p, iocb, 0); 1739fd3bf775SJohn Dyson if (error == 0) { 17402244ea07SJohn Dyson nentqueued++; 1741fd3bf775SJohn Dyson } else { 1742fd3bf775SJohn Dyson nerror++; 1743fd3bf775SJohn Dyson printf("_aio_aqueue: error: %d\n", error); 1744fd3bf775SJohn Dyson } 1745fd3bf775SJohn Dyson } 17462244ea07SJohn Dyson } 17472244ea07SJohn Dyson 1748a624e84fSJohn Dyson /* 1749a624e84fSJohn Dyson * If we haven't queued any, then just return error 1750a624e84fSJohn Dyson */ 1751a624e84fSJohn Dyson if (nentqueued == 0) { 1752a624e84fSJohn Dyson #if DEBUGAIO > 0 1753fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1754a624e84fSJohn Dyson printf("lio_listio: none queued\n"); 1755a624e84fSJohn Dyson #endif 1756fd3bf775SJohn Dyson return 0; 1757a624e84fSJohn Dyson } 17582244ea07SJohn Dyson 1759a624e84fSJohn Dyson #if DEBUGAIO > 0 1760fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1761a624e84fSJohn Dyson printf("lio_listio: %d queued\n", nentqueued); 1762a624e84fSJohn Dyson #endif 1763a624e84fSJohn Dyson 1764a624e84fSJohn Dyson /* 1765a624e84fSJohn Dyson * Calculate the appropriate error return 1766a624e84fSJohn Dyson */ 17672244ea07SJohn Dyson runningcode = 0; 1768fd3bf775SJohn Dyson if (nerror) 17692244ea07SJohn Dyson runningcode = EIO; 17702244ea07SJohn Dyson 17712244ea07SJohn Dyson if (uap->mode == LIO_WAIT) { 17722244ea07SJohn Dyson while (1) { 17732244ea07SJohn Dyson int found; 1774fd3bf775SJohn Dyson found = 0; 1775fd3bf775SJohn Dyson for(i = 0; i < uap->nent; i++) { 1776fd3bf775SJohn Dyson int jobref, command; 17772244ea07SJohn Dyson 1778a624e84fSJohn Dyson /* 1779a624e84fSJohn Dyson * Fetch address of the control buf pointer in user space 1780a624e84fSJohn Dyson */ 17812244ea07SJohn Dyson iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 1782fd3bf775SJohn Dyson if (((int) iocb == -1) || ((int) iocb == 0)) 1783fd3bf775SJohn Dyson continue; 1784a624e84fSJohn Dyson 1785a624e84fSJohn Dyson /* 1786a624e84fSJohn Dyson * Fetch the associated command from user space 1787a624e84fSJohn Dyson */ 17882244ea07SJohn Dyson command = fuword(&iocb->aio_lio_opcode); 1789fd3bf775SJohn Dyson if (command == LIO_NOP) { 1790fd3bf775SJohn Dyson found++; 17912244ea07SJohn Dyson continue; 1792fd3bf775SJohn Dyson } 1793a624e84fSJohn Dyson 17942244ea07SJohn Dyson jobref = fuword(&iocb->_aiocb_private.kernelinfo); 17952244ea07SJohn Dyson 17962244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 17972244ea07SJohn Dyson cb; 17982244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 1799fd3bf775SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == 1800fd3bf775SJohn Dyson jobref) { 18012244ea07SJohn Dyson found++; 18022244ea07SJohn Dyson break; 18032244ea07SJohn Dyson } 18042244ea07SJohn Dyson } 1805fd3bf775SJohn Dyson 1806fd3bf775SJohn Dyson if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 1807fd3bf775SJohn Dyson curproc->p_stats->p_ru.ru_oublock += cb->outputcharge; 1808fd3bf775SJohn Dyson cb->outputcharge = 0; 1809fd3bf775SJohn Dyson } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 1810fd3bf775SJohn Dyson curproc->p_stats->p_ru.ru_inblock += cb->inputcharge; 1811fd3bf775SJohn Dyson cb->inputcharge = 0; 1812fd3bf775SJohn Dyson } 18132244ea07SJohn Dyson } 18142244ea07SJohn Dyson 1815a624e84fSJohn Dyson /* 1816a624e84fSJohn Dyson * If all I/Os have been disposed of, then we can return 1817a624e84fSJohn Dyson */ 1818fd3bf775SJohn Dyson if (found == nentqueued) { 18192244ea07SJohn Dyson return runningcode; 18202244ea07SJohn Dyson } 18212244ea07SJohn Dyson 1822fd3bf775SJohn Dyson 1823fd3bf775SJohn Dyson ki->kaio_flags |= KAIO_WAKEUP; 18242244ea07SJohn Dyson error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0); 18252244ea07SJohn Dyson 18262244ea07SJohn Dyson if (error == EINTR) { 18272244ea07SJohn Dyson return EINTR; 18282244ea07SJohn Dyson } else if (error == EWOULDBLOCK) { 18292244ea07SJohn Dyson return EAGAIN; 18302244ea07SJohn Dyson } 18312244ea07SJohn Dyson 18322244ea07SJohn Dyson } 18332244ea07SJohn Dyson } 18342244ea07SJohn Dyson 18352244ea07SJohn Dyson return runningcode; 1836ee877a35SJohn Dyson } 1837fd3bf775SJohn Dyson 1838fd3bf775SJohn Dyson static void 1839fd3bf775SJohn Dyson aio_physwakeup(bp) 1840fd3bf775SJohn Dyson struct buf *bp; 1841fd3bf775SJohn Dyson { 1842fd3bf775SJohn Dyson struct aiocbe *iocb; 1843fd3bf775SJohn Dyson struct proc *p; 1844fd3bf775SJohn Dyson struct kaioinfo *ki; 1845fd3bf775SJohn Dyson 1846fd3bf775SJohn Dyson wakeup((caddr_t) bp); 1847fd3bf775SJohn Dyson bp->b_flags &= ~B_CALL; 1848fd3bf775SJohn Dyson 1849fd3bf775SJohn Dyson iocb = (struct aiocbe *)bp->b_spc; 1850fd3bf775SJohn Dyson if (iocb) { 1851fd3bf775SJohn Dyson ki = p->p_aioinfo; 1852fd3bf775SJohn Dyson if (ki && (ki->kaio_flags & (KAIO_RUNDOWN|KAIO_WAKEUP))) { 1853fd3bf775SJohn Dyson ki->kaio_flags &= ~KAIO_WAKEUP; 1854fd3bf775SJohn Dyson wakeup(p); 1855fd3bf775SJohn Dyson } 1856fd3bf775SJohn Dyson } 1857fd3bf775SJohn Dyson } 1858