1ee877a35SJohn Dyson /* 2ee877a35SJohn Dyson * Copyright (c) 1997 John S. Dyson. All rights reserved. 3ee877a35SJohn Dyson * 4ee877a35SJohn Dyson * Redistribution and use in source and binary forms, with or without 5ee877a35SJohn Dyson * modification, are permitted provided that the following conditions 6ee877a35SJohn Dyson * are met: 7ee877a35SJohn Dyson * 1. Redistributions of source code must retain the above copyright 8ee877a35SJohn Dyson * notice, this list of conditions and the following disclaimer. 9ee877a35SJohn Dyson * 2. John S. Dyson's name may not be used to endorse or promote products 10ee877a35SJohn Dyson * derived from this software without specific prior written permission. 11ee877a35SJohn Dyson * 12ee877a35SJohn Dyson * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13ee877a35SJohn Dyson * bad that happens because of using this software isn't the responsibility 14ee877a35SJohn Dyson * of the author. This software is distributed AS-IS. 15ee877a35SJohn Dyson * 1684af4da6SJohn Dyson * $Id: vfs_aio.c,v 1.13 1997/11/29 02:57:46 dyson Exp $ 17ee877a35SJohn Dyson */ 18ee877a35SJohn Dyson 19ee877a35SJohn Dyson /* 2084af4da6SJohn Dyson * This file contains support for the POSIX.4 AIO/LIO facility. 21ee877a35SJohn Dyson */ 22ee877a35SJohn Dyson 23ee877a35SJohn Dyson #include <sys/param.h> 24ee877a35SJohn Dyson #include <sys/systm.h> 25ee877a35SJohn Dyson #include <sys/sysproto.h> 26ee877a35SJohn Dyson #include <sys/filedesc.h> 27ee877a35SJohn Dyson #include <sys/kernel.h> 28ee877a35SJohn Dyson #include <sys/fcntl.h> 29ee877a35SJohn Dyson #include <sys/file.h> 30fdebd4f0SBruce Evans #include <sys/lock.h> 31ee877a35SJohn Dyson #include <sys/unistd.h> 32ee877a35SJohn Dyson #include <sys/proc.h> 33ee877a35SJohn Dyson #include <sys/uio.h> 34ee877a35SJohn Dyson #include <sys/malloc.h> 35ee877a35SJohn Dyson #include <sys/signalvar.h> 36a624e84fSJohn Dyson #include <sys/sysctl.h> 37fd3bf775SJohn Dyson #include <sys/vnode.h> 38fd3bf775SJohn Dyson #include <sys/conf.h> 39fd3bf775SJohn Dyson #include <miscfs/specfs/specdev.h> 40ee877a35SJohn Dyson 41ee877a35SJohn Dyson #include <vm/vm.h> 42ee877a35SJohn Dyson #include <vm/vm_param.h> 43ee877a35SJohn Dyson #include <vm/vm_extern.h> 442244ea07SJohn Dyson #include <vm/pmap.h> 452244ea07SJohn Dyson #include <vm/vm_map.h> 46fd3bf775SJohn Dyson #include <vm/vm_zone.h> 47ee877a35SJohn Dyson #include <sys/aio.h> 485aaef07cSJohn Dyson #include <sys/shm.h> 49fd3bf775SJohn Dyson #include <sys/user.h> 505aaef07cSJohn Dyson 515aaef07cSJohn Dyson #include <machine/cpu.h> 52ee877a35SJohn Dyson 532244ea07SJohn Dyson #if 0 542244ea07SJohn Dyson #define DEBUGAIO 552244ea07SJohn Dyson #define DIAGNOSTIC 562244ea07SJohn Dyson #endif 572244ea07SJohn Dyson 5884af4da6SJohn Dyson #define DEBUGAIO 0 59a624e84fSJohn Dyson 602244ea07SJohn Dyson static int jobrefid; 612244ea07SJohn Dyson 622244ea07SJohn Dyson #define JOBST_NULL 0x0 632244ea07SJohn Dyson #define JOBST_JOBQPROC 0x1 642244ea07SJohn Dyson #define JOBST_JOBQGLOBAL 0x2 652244ea07SJohn Dyson #define JOBST_JOBRUNNING 0x3 662244ea07SJohn Dyson #define JOBST_JOBFINISHED 0x4 67fd3bf775SJohn Dyson #define JOBST_JOBQBUF 0x5 68fd3bf775SJohn Dyson #define JOBST_JOBBFINISHED 0x6 692244ea07SJohn Dyson 7084af4da6SJohn Dyson #ifndef MAX_AIO_PER_PROC 712244ea07SJohn Dyson #define MAX_AIO_PER_PROC 32 7284af4da6SJohn Dyson #endif 7384af4da6SJohn Dyson 7484af4da6SJohn Dyson #ifndef MAX_AIO_QUEUE_PER_PROC 752244ea07SJohn Dyson #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ 7684af4da6SJohn Dyson #endif 7784af4da6SJohn Dyson 7884af4da6SJohn Dyson #ifndef MAX_AIO_PROCS 79fd3bf775SJohn Dyson #define MAX_AIO_PROCS 32 8084af4da6SJohn Dyson #endif 8184af4da6SJohn Dyson 8284af4da6SJohn Dyson #ifndef MAX_AIO_QUEUE 832244ea07SJohn Dyson #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ 8484af4da6SJohn Dyson #endif 8584af4da6SJohn Dyson 8684af4da6SJohn Dyson #ifndef TARGET_AIO_PROCS 8784af4da6SJohn Dyson #define TARGET_AIO_PROCS 0 8884af4da6SJohn Dyson #endif 8984af4da6SJohn Dyson 9084af4da6SJohn Dyson #ifndef MAX_BUF_AIO 9184af4da6SJohn Dyson #define MAX_BUF_AIO 16 9284af4da6SJohn Dyson #endif 9384af4da6SJohn Dyson 9484af4da6SJohn Dyson #ifndef AIOD_TIMEOUT_DEFAULT 9584af4da6SJohn Dyson #define AIOD_TIMEOUT_DEFAULT (10 * hz) 9684af4da6SJohn Dyson #endif 9784af4da6SJohn Dyson 9884af4da6SJohn Dyson #ifndef AIOD_LIFETIME_DEFAULT 9984af4da6SJohn Dyson #define AIOD_LIFETIME_DEFAULT (30 * hz) 10084af4da6SJohn Dyson #endif 1012244ea07SJohn Dyson 102a624e84fSJohn Dyson int max_aio_procs = MAX_AIO_PROCS; 103a624e84fSJohn Dyson int num_aio_procs = 0; 104a624e84fSJohn Dyson int target_aio_procs = TARGET_AIO_PROCS; 105a624e84fSJohn Dyson int max_queue_count = MAX_AIO_QUEUE; 106a624e84fSJohn Dyson int num_queue_count = 0; 107fd3bf775SJohn Dyson int num_buf_aio = 0; 108fd3bf775SJohn Dyson int num_aio_resv_start = 0; 10984af4da6SJohn Dyson int aiod_timeout; 11084af4da6SJohn Dyson int aiod_lifetime; 111a624e84fSJohn Dyson 112a624e84fSJohn Dyson int max_aio_per_proc = MAX_AIO_PER_PROC, 113a624e84fSJohn Dyson max_aio_queue_per_proc=MAX_AIO_QUEUE_PER_PROC; 114a624e84fSJohn Dyson 11584af4da6SJohn Dyson int max_buf_aio = MAX_BUF_AIO; 116a624e84fSJohn Dyson 117a624e84fSJohn Dyson SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt"); 118a624e84fSJohn Dyson 119a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, 120a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_per_proc, 0, ""); 121a624e84fSJohn Dyson 122a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, 123a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_queue_per_proc, 0, ""); 124a624e84fSJohn Dyson 125a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, 126a624e84fSJohn Dyson CTLFLAG_RW, &max_aio_procs, 0, ""); 127a624e84fSJohn Dyson 128a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, 129a624e84fSJohn Dyson CTLFLAG_RD, &num_aio_procs, 0, ""); 130a624e84fSJohn Dyson 131a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, 132a624e84fSJohn Dyson CTLFLAG_RD, &num_queue_count, 0, ""); 133a624e84fSJohn Dyson 134a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, 135a624e84fSJohn Dyson CTLFLAG_RW, &max_queue_count, 0, ""); 136a624e84fSJohn Dyson 137a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, 138a624e84fSJohn Dyson CTLFLAG_RW, &target_aio_procs, 0, ""); 139a624e84fSJohn Dyson 14084af4da6SJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, 14184af4da6SJohn Dyson CTLFLAG_RW, &max_buf_aio, 0, ""); 142fd3bf775SJohn Dyson 143fd3bf775SJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, 144fd3bf775SJohn Dyson CTLFLAG_RD, &num_buf_aio, 0, ""); 145fd3bf775SJohn Dyson 14684af4da6SJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, 14784af4da6SJohn Dyson CTLFLAG_RW, &aiod_lifetime, 0, ""); 14884af4da6SJohn Dyson 14984af4da6SJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout, 15084af4da6SJohn Dyson CTLFLAG_RW, &aiod_timeout, 0, ""); 15184af4da6SJohn Dyson 15284af4da6SJohn Dyson 153a624e84fSJohn Dyson #if DEBUGAIO > 0 154a624e84fSJohn Dyson static int debugaio; 155a624e84fSJohn Dyson SYSCTL_INT(_vfs_aio, OID_AUTO, debugaio, CTLFLAG_RW, &debugaio, 0, ""); 156a624e84fSJohn Dyson #endif 157a624e84fSJohn Dyson 158fd3bf775SJohn Dyson #define DEBUGFLOW (debugaio & 0xff) 159fd3bf775SJohn Dyson #define DEBUGREQ ((debugaio & 0xff00) >> 8) 16084af4da6SJohn Dyson #define DEBUGCHR (debugaio & 0x10000) 161fd3bf775SJohn Dyson 1622244ea07SJohn Dyson /* 1632244ea07SJohn Dyson * Job queue item 1642244ea07SJohn Dyson */ 16584af4da6SJohn Dyson 16684af4da6SJohn Dyson #define AIOCBLIST_CANCELLED 0x1 16784af4da6SJohn Dyson #define AIOCBLIST_RUNDOWN 0x4 16884af4da6SJohn Dyson #define AIOCBLIST_ASYNCFREE 0x8 16984af4da6SJohn Dyson #define AIOCBLIST_DONE 0x10 17084af4da6SJohn Dyson 1712244ea07SJohn Dyson struct aiocblist { 1722244ea07SJohn Dyson TAILQ_ENTRY (aiocblist) list; /* List of jobs */ 1732244ea07SJohn Dyson TAILQ_ENTRY (aiocblist) plist; /* List of jobs for proc */ 1742244ea07SJohn Dyson int jobflags; 1752244ea07SJohn Dyson int jobstate; 176fd3bf775SJohn Dyson int inputcharge, outputcharge; 177fd3bf775SJohn Dyson struct buf *bp; /* buffer pointer */ 1782244ea07SJohn Dyson struct proc *userproc; /* User process */ 1792244ea07SJohn Dyson struct aioproclist *jobaioproc; /* AIO process descriptor */ 18084af4da6SJohn Dyson struct aio_liojob *lio; /* optional lio job */ 1812244ea07SJohn Dyson struct aiocb uaiocb; /* Kernel I/O control block */ 1822244ea07SJohn Dyson }; 1832244ea07SJohn Dyson 184fd3bf775SJohn Dyson 1852244ea07SJohn Dyson /* 1862244ea07SJohn Dyson * AIO process info 1872244ea07SJohn Dyson */ 18884af4da6SJohn Dyson #define AIOP_FREE 0x1 /* proc on free queue */ 18984af4da6SJohn Dyson #define AIOP_SCHED 0x2 /* proc explicitly scheduled */ 19084af4da6SJohn Dyson 1912244ea07SJohn Dyson struct aioproclist { 1922244ea07SJohn Dyson int aioprocflags; /* AIO proc flags */ 1932244ea07SJohn Dyson TAILQ_ENTRY(aioproclist) list; /* List of processes */ 1942244ea07SJohn Dyson struct proc *aioproc; /* The AIO thread */ 1952244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) jobtorun; /* suggested job to run */ 1962244ea07SJohn Dyson }; 1972244ea07SJohn Dyson 19884af4da6SJohn Dyson /* 19984af4da6SJohn Dyson * data-structure for lio signal management 20084af4da6SJohn Dyson */ 20184af4da6SJohn Dyson struct aio_liojob { 20284af4da6SJohn Dyson int lioj_flags; 20384af4da6SJohn Dyson int lioj_buffer_count; 20484af4da6SJohn Dyson int lioj_buffer_finished_count; 20584af4da6SJohn Dyson int lioj_queue_count; 20684af4da6SJohn Dyson int lioj_queue_finished_count; 20784af4da6SJohn Dyson struct sigevent lioj_signal; /* signal on all I/O done */ 20884af4da6SJohn Dyson TAILQ_ENTRY (aio_liojob) lioj_list; 20984af4da6SJohn Dyson struct kaioinfo *lioj_ki; 21084af4da6SJohn Dyson }; 21184af4da6SJohn Dyson #define LIOJ_SIGNAL 0x1 /* signal on all done (lio) */ 21284af4da6SJohn Dyson #define LIOJ_SIGNAL_POSTED 0x2 /* signal has been posted */ 21384af4da6SJohn Dyson 21484af4da6SJohn Dyson /* 21584af4da6SJohn Dyson * per process aio data structure 21684af4da6SJohn Dyson */ 2172244ea07SJohn Dyson struct kaioinfo { 218fd3bf775SJohn Dyson int kaio_flags; /* per process kaio flags */ 2192244ea07SJohn Dyson int kaio_maxactive_count; /* maximum number of AIOs */ 2202244ea07SJohn Dyson int kaio_active_count; /* number of currently used AIOs */ 2212244ea07SJohn Dyson int kaio_qallowed_count; /* maxiumu size of AIO queue */ 2222244ea07SJohn Dyson int kaio_queue_count; /* size of AIO queue */ 223fd3bf775SJohn Dyson int kaio_ballowed_count; /* maximum number of buffers */ 22484af4da6SJohn Dyson int kaio_queue_finished_count; /* number of daemon jobs finished */ 225fd3bf775SJohn Dyson int kaio_buffer_count; /* number of physio buffers */ 22684af4da6SJohn Dyson int kaio_buffer_finished_count; /* count of I/O done */ 22784af4da6SJohn Dyson struct proc *kaio_p; /* process that uses this kaio block */ 22884af4da6SJohn Dyson TAILQ_HEAD (,aio_liojob) kaio_liojoblist; /* list of lio jobs */ 2292244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_jobqueue; /* job queue for process */ 2302244ea07SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_jobdone; /* done queue for process */ 231fd3bf775SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_bufqueue; /* buffer job queue for process */ 232fd3bf775SJohn Dyson TAILQ_HEAD (,aiocblist) kaio_bufdone; /* buffer done queue for process */ 2332244ea07SJohn Dyson }; 2342244ea07SJohn Dyson 23584af4da6SJohn Dyson #define KAIO_RUNDOWN 0x1 /* process is being run down */ 23684af4da6SJohn Dyson #define KAIO_WAKEUP 0x2 /* wakeup process when there is a significant 23784af4da6SJohn Dyson event */ 23884af4da6SJohn Dyson 239fd3bf775SJohn Dyson 2402244ea07SJohn Dyson TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc; 2412244ea07SJohn Dyson TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list */ 242fd3bf775SJohn Dyson TAILQ_HEAD(,aiocblist) aio_bufjobs; /* Phys I/O job list */ 24384af4da6SJohn Dyson TAILQ_HEAD(,aiocblist) aio_freejobs; /* Pool of free jobs */ 2442244ea07SJohn Dyson 245fd3bf775SJohn Dyson static void aio_init_aioinfo(struct proc *p) ; 246fd3bf775SJohn Dyson static void aio_onceonly(void *) ; 247fd3bf775SJohn Dyson static int aio_free_entry(struct aiocblist *aiocbe); 248fd3bf775SJohn Dyson static void aio_process(struct aiocblist *aiocbe); 2492244ea07SJohn Dyson static int aio_newproc(void) ; 2502244ea07SJohn Dyson static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ; 251fd3bf775SJohn Dyson static void aio_physwakeup(struct buf *bp); 252fd3bf775SJohn Dyson static int aio_fphysio(struct proc *p, struct aiocblist *aiocbe, int type); 253fd3bf775SJohn Dyson static int aio_qphysio(struct proc *p, struct aiocblist *iocb); 254fd3bf775SJohn Dyson static void aio_daemon(void *uproc); 2552244ea07SJohn Dyson 2562244ea07SJohn Dyson SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL); 2572244ea07SJohn Dyson 25884af4da6SJohn Dyson static vm_zone_t kaio_zone=0, aiop_zone=0, 25984af4da6SJohn Dyson aiocb_zone=0, aiol_zone=0, aiolio_zone=0; 260fd3bf775SJohn Dyson 261fd3bf775SJohn Dyson /* 262fd3bf775SJohn Dyson * Single AIOD vmspace shared amongst all of them 263fd3bf775SJohn Dyson */ 264fd3bf775SJohn Dyson static struct vmspace *aiovmspace = NULL; 265a624e84fSJohn Dyson 2662244ea07SJohn Dyson /* 2672244ea07SJohn Dyson * Startup initialization 2682244ea07SJohn Dyson */ 2692244ea07SJohn Dyson void 270fd3bf775SJohn Dyson aio_onceonly(void *na) 271fd3bf775SJohn Dyson { 2722244ea07SJohn Dyson TAILQ_INIT(&aio_freeproc); 2732244ea07SJohn Dyson TAILQ_INIT(&aio_activeproc); 2742244ea07SJohn Dyson TAILQ_INIT(&aio_jobs); 275fd3bf775SJohn Dyson TAILQ_INIT(&aio_bufjobs); 2762244ea07SJohn Dyson TAILQ_INIT(&aio_freejobs); 277fd3bf775SJohn Dyson kaio_zone = zinit("AIO", sizeof (struct kaioinfo), 0, 0, 1); 278fd3bf775SJohn Dyson aiop_zone = zinit("AIOP", sizeof (struct aioproclist), 0, 0, 1); 279fd3bf775SJohn Dyson aiocb_zone = zinit("AIOCB", sizeof (struct aiocblist), 0, 0, 1); 280fd3bf775SJohn Dyson aiol_zone = zinit("AIOL", AIO_LISTIO_MAX * sizeof (int), 0, 0, 1); 28184af4da6SJohn Dyson aiolio_zone = zinit("AIOLIO", 28284af4da6SJohn Dyson AIO_LISTIO_MAX * sizeof (struct aio_liojob), 0, 0, 1); 28384af4da6SJohn Dyson aiod_timeout = AIOD_TIMEOUT_DEFAULT; 28484af4da6SJohn Dyson aiod_lifetime = AIOD_LIFETIME_DEFAULT; 285fd3bf775SJohn Dyson jobrefid = 1; 2862244ea07SJohn Dyson } 2872244ea07SJohn Dyson 2882244ea07SJohn Dyson /* 2892244ea07SJohn Dyson * Init the per-process aioinfo structure. 29084af4da6SJohn Dyson * The aioinfo limits are set per-process for user limit (resource) management. 2912244ea07SJohn Dyson */ 2922244ea07SJohn Dyson void 293fd3bf775SJohn Dyson aio_init_aioinfo(struct proc *p) 294fd3bf775SJohn Dyson { 2952244ea07SJohn Dyson struct kaioinfo *ki; 2962244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 297fd3bf775SJohn Dyson ki = zalloc(kaio_zone); 2982244ea07SJohn Dyson p->p_aioinfo = ki; 29984af4da6SJohn Dyson ki->kaio_flags = 0; 300a624e84fSJohn Dyson ki->kaio_maxactive_count = max_aio_per_proc; 3012244ea07SJohn Dyson ki->kaio_active_count = 0; 302a624e84fSJohn Dyson ki->kaio_qallowed_count = max_aio_queue_per_proc; 3032244ea07SJohn Dyson ki->kaio_queue_count = 0; 30484af4da6SJohn Dyson ki->kaio_ballowed_count = max_buf_aio; 305fd3bf775SJohn Dyson ki->kaio_buffer_count = 0; 30684af4da6SJohn Dyson ki->kaio_buffer_finished_count = 0; 30784af4da6SJohn Dyson ki->kaio_p = p; 3082244ea07SJohn Dyson TAILQ_INIT(&ki->kaio_jobdone); 3092244ea07SJohn Dyson TAILQ_INIT(&ki->kaio_jobqueue); 310fd3bf775SJohn Dyson TAILQ_INIT(&ki->kaio_bufdone); 311fd3bf775SJohn Dyson TAILQ_INIT(&ki->kaio_bufqueue); 31284af4da6SJohn Dyson TAILQ_INIT(&ki->kaio_liojoblist); 3132244ea07SJohn Dyson } 3142244ea07SJohn Dyson } 3152244ea07SJohn Dyson 3162244ea07SJohn Dyson /* 3172244ea07SJohn Dyson * Free a job entry. Wait for completion if it is currently 3182244ea07SJohn Dyson * active, but don't delay forever. If we delay, we return 3192244ea07SJohn Dyson * a flag that says that we have to restart the queue scan. 3202244ea07SJohn Dyson */ 3212244ea07SJohn Dyson int 322fd3bf775SJohn Dyson aio_free_entry(struct aiocblist *aiocbe) 323fd3bf775SJohn Dyson { 3242244ea07SJohn Dyson struct kaioinfo *ki; 3252244ea07SJohn Dyson struct aioproclist *aiop; 32684af4da6SJohn Dyson struct aio_liojob *lj; 3272244ea07SJohn Dyson struct proc *p; 328fd3bf775SJohn Dyson int error; 3292244ea07SJohn Dyson 3302244ea07SJohn Dyson if (aiocbe->jobstate == JOBST_NULL) 3312244ea07SJohn Dyson panic("aio_free_entry: freeing already free job"); 3322244ea07SJohn Dyson 3332244ea07SJohn Dyson p = aiocbe->userproc; 3342244ea07SJohn Dyson ki = p->p_aioinfo; 33584af4da6SJohn Dyson lj = aiocbe->lio; 3362244ea07SJohn Dyson if (ki == NULL) 3372244ea07SJohn Dyson panic("aio_free_entry: missing p->p_aioinfo"); 3382244ea07SJohn Dyson 3392244ea07SJohn Dyson if (aiocbe->jobstate == JOBST_JOBRUNNING) { 3402244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) 3412244ea07SJohn Dyson return 0; 3422244ea07SJohn Dyson aiocbe->jobflags |= AIOCBLIST_RUNDOWN; 343a624e84fSJohn Dyson tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", 0); 3442244ea07SJohn Dyson } 3452244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 3462244ea07SJohn Dyson 347fd3bf775SJohn Dyson if (aiocbe->bp == NULL) { 3482244ea07SJohn Dyson if (ki->kaio_queue_count <= 0) 3492244ea07SJohn Dyson panic("aio_free_entry: process queue size <= 0"); 3502244ea07SJohn Dyson if (num_queue_count <= 0) 3512244ea07SJohn Dyson panic("aio_free_entry: system wide queue size <= 0"); 3522244ea07SJohn Dyson 35384af4da6SJohn Dyson if(lj) { 35484af4da6SJohn Dyson lj->lioj_queue_count--; 35584af4da6SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_DONE) 35684af4da6SJohn Dyson lj->lioj_queue_finished_count--; 35784af4da6SJohn Dyson } 35884af4da6SJohn Dyson ki->kaio_queue_count--; 35984af4da6SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_DONE) 36084af4da6SJohn Dyson ki->kaio_queue_finished_count--; 36184af4da6SJohn Dyson num_queue_count--; 362fd3bf775SJohn Dyson 363a624e84fSJohn Dyson #if DEBUGAIO > 0 364fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 365fd3bf775SJohn Dyson printf("freeing normal file I/O entry: Proc Q: %d, Global Q: %d\n", 366a624e84fSJohn Dyson ki->kaio_queue_count, num_queue_count); 367a624e84fSJohn Dyson #endif 368fd3bf775SJohn Dyson } else { 36984af4da6SJohn Dyson if(lj) { 37084af4da6SJohn Dyson lj->lioj_buffer_count--; 37184af4da6SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_DONE) 37284af4da6SJohn Dyson lj->lioj_buffer_finished_count--; 37384af4da6SJohn Dyson } 37484af4da6SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_DONE) 37584af4da6SJohn Dyson ki->kaio_buffer_finished_count--; 37684af4da6SJohn Dyson ki->kaio_buffer_count--; 37784af4da6SJohn Dyson num_buf_aio--; 3782244ea07SJohn Dyson 379fd3bf775SJohn Dyson #if DEBUGAIO > 0 380fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 381fd3bf775SJohn Dyson printf("freeing physical I/O entry: Proc BQ: %d, Global BQ: %d\n", 382fd3bf775SJohn Dyson ki->kaio_buffer_count, num_buf_aio); 383fd3bf775SJohn Dyson #endif 384fd3bf775SJohn Dyson } 385fd3bf775SJohn Dyson 386fd3bf775SJohn Dyson if ((ki->kaio_flags & KAIO_WAKEUP) || 387fd3bf775SJohn Dyson (ki->kaio_flags & KAIO_RUNDOWN) && 388fd3bf775SJohn Dyson ((ki->kaio_buffer_count == 0) && (ki->kaio_queue_count == 0))) { 389fd3bf775SJohn Dyson ki->kaio_flags &= ~KAIO_WAKEUP; 390fd3bf775SJohn Dyson wakeup(p); 391fd3bf775SJohn Dyson } 392fd3bf775SJohn Dyson 393fd3bf775SJohn Dyson if ( aiocbe->jobstate == JOBST_JOBQBUF) { 394fd3bf775SJohn Dyson if ((error = aio_fphysio(p, aiocbe, 1)) != 0) 395fd3bf775SJohn Dyson return error; 396fd3bf775SJohn Dyson if (aiocbe->jobstate != JOBST_JOBBFINISHED) 397fd3bf775SJohn Dyson panic("aio_free_entry: invalid physio finish-up state"); 398fd3bf775SJohn Dyson TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 399fd3bf775SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBQPROC) { 4002244ea07SJohn Dyson aiop = aiocbe->jobaioproc; 4012244ea07SJohn Dyson TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 4022244ea07SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) { 4032244ea07SJohn Dyson TAILQ_REMOVE(&aio_jobs, aiocbe, list); 4042244ea07SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBFINISHED) { 4052244ea07SJohn Dyson TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist); 406fd3bf775SJohn Dyson } else if ( aiocbe->jobstate == JOBST_JOBBFINISHED) { 407fd3bf775SJohn Dyson TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 40884af4da6SJohn Dyson if (aiocbe->bp) { 40984af4da6SJohn Dyson vunmapbuf(aiocbe->bp); 41084af4da6SJohn Dyson relpbuf(aiocbe->bp); 41184af4da6SJohn Dyson aiocbe->bp = NULL; 41284af4da6SJohn Dyson } 41384af4da6SJohn Dyson } 41484af4da6SJohn Dyson if (lj && (lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) { 41584af4da6SJohn Dyson TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 41684af4da6SJohn Dyson zfree(aiolio_zone, lj); 4172244ea07SJohn Dyson } 4182244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 4192244ea07SJohn Dyson aiocbe->jobstate = JOBST_NULL; 4202244ea07SJohn Dyson return 0; 4212244ea07SJohn Dyson } 4222244ea07SJohn Dyson 4232244ea07SJohn Dyson /* 4242244ea07SJohn Dyson * Rundown the jobs for a given process. 4252244ea07SJohn Dyson */ 4262244ea07SJohn Dyson void 427fd3bf775SJohn Dyson aio_proc_rundown(struct proc *p) 428fd3bf775SJohn Dyson { 42984af4da6SJohn Dyson int s; 4302244ea07SJohn Dyson struct kaioinfo *ki; 43184af4da6SJohn Dyson struct aio_liojob *lj, *ljn; 4322244ea07SJohn Dyson struct aiocblist *aiocbe, *aiocbn; 4332244ea07SJohn Dyson 4342244ea07SJohn Dyson ki = p->p_aioinfo; 4352244ea07SJohn Dyson if (ki == NULL) 4362244ea07SJohn Dyson return; 4372244ea07SJohn Dyson 43884af4da6SJohn Dyson ki->kaio_flags |= LIOJ_SIGNAL_POSTED; 43984af4da6SJohn Dyson while ((ki->kaio_active_count > 0) || 44084af4da6SJohn Dyson (ki->kaio_buffer_count > ki->kaio_buffer_finished_count)) { 441fd3bf775SJohn Dyson ki->kaio_flags |= KAIO_RUNDOWN; 44284af4da6SJohn Dyson if (tsleep(p, PRIBIO, "kaiowt", aiod_timeout)) 443a624e84fSJohn Dyson break; 444a624e84fSJohn Dyson } 445a624e84fSJohn Dyson 446a624e84fSJohn Dyson #if DEBUGAIO > 0 447fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 448a624e84fSJohn Dyson printf("Proc rundown: %d %d\n", 449a624e84fSJohn Dyson num_queue_count, ki->kaio_queue_count); 450a624e84fSJohn Dyson #endif 451a624e84fSJohn Dyson 4522244ea07SJohn Dyson restart1: 4532244ea07SJohn Dyson for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone); 4542244ea07SJohn Dyson aiocbe; 4552244ea07SJohn Dyson aiocbe = aiocbn) { 4562244ea07SJohn Dyson aiocbn = TAILQ_NEXT(aiocbe, plist); 4572244ea07SJohn Dyson if (aio_free_entry(aiocbe)) 4582244ea07SJohn Dyson goto restart1; 4592244ea07SJohn Dyson } 4602244ea07SJohn Dyson 4612244ea07SJohn Dyson restart2: 4622244ea07SJohn Dyson for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 4632244ea07SJohn Dyson aiocbe; 4642244ea07SJohn Dyson aiocbe = aiocbn) { 4652244ea07SJohn Dyson aiocbn = TAILQ_NEXT(aiocbe, plist); 4662244ea07SJohn Dyson if (aio_free_entry(aiocbe)) 4672244ea07SJohn Dyson goto restart2; 4682244ea07SJohn Dyson } 46984af4da6SJohn Dyson 47084af4da6SJohn Dyson restart3: 47184af4da6SJohn Dyson s = splbio(); 47284af4da6SJohn Dyson while (TAILQ_FIRST(&ki->kaio_bufqueue)) { 47384af4da6SJohn Dyson ki->kaio_flags |= KAIO_WAKEUP; 47484af4da6SJohn Dyson tsleep (p, PRIBIO, "aioprn", 0); 47584af4da6SJohn Dyson splx(s); 47684af4da6SJohn Dyson goto restart3; 47784af4da6SJohn Dyson } 47884af4da6SJohn Dyson 47984af4da6SJohn Dyson restart4: 48084af4da6SJohn Dyson s = splbio(); 48184af4da6SJohn Dyson for ( aiocbe = TAILQ_FIRST(&ki->kaio_bufdone); 48284af4da6SJohn Dyson aiocbe; 48384af4da6SJohn Dyson aiocbe = aiocbn) { 48484af4da6SJohn Dyson aiocbn = TAILQ_NEXT(aiocbe, plist); 48584af4da6SJohn Dyson if (aio_free_entry(aiocbe)) { 48684af4da6SJohn Dyson splx(s); 48784af4da6SJohn Dyson goto restart4; 48884af4da6SJohn Dyson } 48984af4da6SJohn Dyson } 49084af4da6SJohn Dyson splx(s); 49184af4da6SJohn Dyson 49284af4da6SJohn Dyson for ( lj = TAILQ_FIRST(&ki->kaio_liojoblist); 49384af4da6SJohn Dyson lj; 49484af4da6SJohn Dyson lj = ljn) { 49584af4da6SJohn Dyson ljn = TAILQ_NEXT(lj, lioj_list); 49684af4da6SJohn Dyson if ((lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) { 49784af4da6SJohn Dyson TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 49884af4da6SJohn Dyson zfree(aiolio_zone, lj); 49984af4da6SJohn Dyson } 50084af4da6SJohn Dyson } 50184af4da6SJohn Dyson 50284af4da6SJohn Dyson for ( lj = TAILQ_FIRST(&ki->kaio_liojoblist); 50384af4da6SJohn Dyson lj; 50484af4da6SJohn Dyson lj = ljn) { 50584af4da6SJohn Dyson printf("LIO job not cleaned up: B:%d, BF:%d, Q:%d, QF:%d\n", 50684af4da6SJohn Dyson lj->lioj_buffer_count, lj->lioj_buffer_finished_count, 50784af4da6SJohn Dyson lj->lioj_queue_count, lj->lioj_queue_finished_count); 50884af4da6SJohn Dyson } 50984af4da6SJohn Dyson 510fd3bf775SJohn Dyson zfree(kaio_zone, ki); 511a624e84fSJohn Dyson p->p_aioinfo = NULL; 5122244ea07SJohn Dyson } 5132244ea07SJohn Dyson 5142244ea07SJohn Dyson /* 5152244ea07SJohn Dyson * Select a job to run (called by an AIO daemon) 5162244ea07SJohn Dyson */ 5172244ea07SJohn Dyson static struct aiocblist * 518fd3bf775SJohn Dyson aio_selectjob(struct aioproclist *aiop) 519fd3bf775SJohn Dyson { 5202244ea07SJohn Dyson 5212244ea07SJohn Dyson struct aiocblist *aiocbe; 5222244ea07SJohn Dyson 5232244ea07SJohn Dyson aiocbe = TAILQ_FIRST(&aiop->jobtorun); 5242244ea07SJohn Dyson if (aiocbe) { 5252244ea07SJohn Dyson TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 5262244ea07SJohn Dyson return aiocbe; 5272244ea07SJohn Dyson } 5282244ea07SJohn Dyson 5292244ea07SJohn Dyson for (aiocbe = TAILQ_FIRST(&aio_jobs); 5302244ea07SJohn Dyson aiocbe; 5312244ea07SJohn Dyson aiocbe = TAILQ_NEXT(aiocbe, list)) { 5322244ea07SJohn Dyson struct kaioinfo *ki; 5332244ea07SJohn Dyson struct proc *userp; 5342244ea07SJohn Dyson 5352244ea07SJohn Dyson userp = aiocbe->userproc; 5362244ea07SJohn Dyson ki = userp->p_aioinfo; 5372244ea07SJohn Dyson 5382244ea07SJohn Dyson if (ki->kaio_active_count < ki->kaio_maxactive_count) { 5392244ea07SJohn Dyson TAILQ_REMOVE(&aio_jobs, aiocbe, list); 5402244ea07SJohn Dyson return aiocbe; 5412244ea07SJohn Dyson } 5422244ea07SJohn Dyson } 5432244ea07SJohn Dyson 5442244ea07SJohn Dyson return NULL; 5452244ea07SJohn Dyson } 5462244ea07SJohn Dyson 5472244ea07SJohn Dyson /* 548fd3bf775SJohn Dyson * The AIO processing activity. This is the code that does the 549fd3bf775SJohn Dyson * I/O request for the non-physio version of the operations. The 550fd3bf775SJohn Dyson * normal vn operations are used, and this code should work in 551fd3bf775SJohn Dyson * all instances for every type of file, including pipes, sockets, 552fd3bf775SJohn Dyson * fifos, and regular files. 5532244ea07SJohn Dyson */ 5542244ea07SJohn Dyson void 555fd3bf775SJohn Dyson aio_process(struct aiocblist *aiocbe) 556fd3bf775SJohn Dyson { 5572244ea07SJohn Dyson struct filedesc *fdp; 558fd3bf775SJohn Dyson struct proc *userp, *mycp; 5592244ea07SJohn Dyson struct aiocb *cb; 5602244ea07SJohn Dyson struct file *fp; 5612244ea07SJohn Dyson struct uio auio; 5622244ea07SJohn Dyson struct iovec aiov; 5632244ea07SJohn Dyson unsigned int fd; 5642244ea07SJohn Dyson int cnt; 565fd3bf775SJohn Dyson static nperline=0; 5662244ea07SJohn Dyson int error; 567a624e84fSJohn Dyson off_t offset; 568fd3bf775SJohn Dyson int oublock_st, oublock_end; 569fd3bf775SJohn Dyson int inblock_st, inblock_end; 5702244ea07SJohn Dyson 5712244ea07SJohn Dyson userp = aiocbe->userproc; 5722244ea07SJohn Dyson cb = &aiocbe->uaiocb; 5732244ea07SJohn Dyson 574fd3bf775SJohn Dyson mycp = curproc; 575fd3bf775SJohn Dyson 576a624e84fSJohn Dyson #if DEBUGAIO > 0 577fd3bf775SJohn Dyson if (DEBUGREQ) 578fd3bf775SJohn Dyson printf("AIOD %s, fd: %d, offset: 0x%x, address: 0x%x, size: %d\n", 579a624e84fSJohn Dyson cb->aio_lio_opcode == LIO_READ?"Read":"Write", 5802244ea07SJohn Dyson cb->aio_fildes, (int) cb->aio_offset, 5812244ea07SJohn Dyson cb->aio_buf, cb->aio_nbytes); 582a624e84fSJohn Dyson #endif 583fd3bf775SJohn Dyson #if 0 584fd3bf775SJohn Dyson if (cb->aio_lio_opcode == LIO_WRITE) { 585fd3bf775SJohn Dyson nperline++; 586fd3bf775SJohn Dyson printf("(0x%8.8x,0x%8.8x)", (unsigned) cb->aio_offset, cb->aio_buf); 587fd3bf775SJohn Dyson if (nperline >= 3) { 588fd3bf775SJohn Dyson nperline = 0; 589fd3bf775SJohn Dyson printf("\n"); 590fd3bf775SJohn Dyson } 591fd3bf775SJohn Dyson } 5922244ea07SJohn Dyson #endif 593fd3bf775SJohn Dyson #if SLOW 594fd3bf775SJohn Dyson tsleep(mycp, PVM, "aioprc", hz); 595fd3bf775SJohn Dyson #endif 596fd3bf775SJohn Dyson fdp = mycp->p_fd; 5972244ea07SJohn Dyson fd = cb->aio_fildes; 5982244ea07SJohn Dyson fp = fdp->fd_ofiles[fd]; 5992244ea07SJohn Dyson 6002244ea07SJohn Dyson aiov.iov_base = cb->aio_buf; 6012244ea07SJohn Dyson aiov.iov_len = cb->aio_nbytes; 6022244ea07SJohn Dyson 6032244ea07SJohn Dyson auio.uio_iov = &aiov; 6042244ea07SJohn Dyson auio.uio_iovcnt = 1; 605a624e84fSJohn Dyson auio.uio_offset = offset = cb->aio_offset; 6062244ea07SJohn Dyson auio.uio_resid = cb->aio_nbytes; 6072244ea07SJohn Dyson cnt = cb->aio_nbytes; 6082244ea07SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 609fd3bf775SJohn Dyson auio.uio_procp = mycp; 6102244ea07SJohn Dyson 611fd3bf775SJohn Dyson inblock_st = mycp->p_stats->p_ru.ru_inblock; 612fd3bf775SJohn Dyson oublock_st = mycp->p_stats->p_ru.ru_oublock; 6132244ea07SJohn Dyson if (cb->aio_lio_opcode == LIO_READ) { 6142244ea07SJohn Dyson auio.uio_rw = UIO_READ; 6152244ea07SJohn Dyson error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 6162244ea07SJohn Dyson } else { 6172244ea07SJohn Dyson auio.uio_rw = UIO_WRITE; 6182244ea07SJohn Dyson error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 6192244ea07SJohn Dyson } 620fd3bf775SJohn Dyson inblock_end = mycp->p_stats->p_ru.ru_inblock; 621fd3bf775SJohn Dyson oublock_end = mycp->p_stats->p_ru.ru_oublock; 622fd3bf775SJohn Dyson 623fd3bf775SJohn Dyson aiocbe->inputcharge = inblock_end - inblock_st; 624fd3bf775SJohn Dyson aiocbe->outputcharge = oublock_end - oublock_st; 6252244ea07SJohn Dyson 6262244ea07SJohn Dyson if (error) { 6272244ea07SJohn Dyson if (auio.uio_resid != cnt) { 6282244ea07SJohn Dyson if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 6292244ea07SJohn Dyson error = 0; 6302244ea07SJohn Dyson if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) 6312244ea07SJohn Dyson psignal(userp, SIGPIPE); 6322244ea07SJohn Dyson } 6332244ea07SJohn Dyson } 634a624e84fSJohn Dyson #if DEBUGAIO > 0 635fd3bf775SJohn Dyson if (DEBUGFLOW > 1) 636fd3bf775SJohn Dyson printf("%s complete: error: %d, status: %d," 637fd3bf775SJohn Dyson " nio: %d, resid: %d, offset: %d %s\n", 638a624e84fSJohn Dyson cb->aio_lio_opcode == LIO_READ?"Read":"Write", 639fd3bf775SJohn Dyson error, cnt, cnt - auio.uio_resid, auio.uio_resid, (int) offset & 0xffffffff, 640fd3bf775SJohn Dyson (cnt - auio.uio_resid) > 0 ? "" : "<EOF>"); 641a624e84fSJohn Dyson #endif 6422244ea07SJohn Dyson 6432244ea07SJohn Dyson cnt -= auio.uio_resid; 6442244ea07SJohn Dyson cb->_aiocb_private.error = error; 6452244ea07SJohn Dyson cb->_aiocb_private.status = cnt; 6462244ea07SJohn Dyson 6472244ea07SJohn Dyson return; 6482244ea07SJohn Dyson 6492244ea07SJohn Dyson } 6502244ea07SJohn Dyson 6512244ea07SJohn Dyson /* 65284af4da6SJohn Dyson * The AIO daemon, most of the actual work is done in aio_process, 65384af4da6SJohn Dyson * but the setup (and address space mgmt) is done in this routine. 6542244ea07SJohn Dyson */ 6552244ea07SJohn Dyson static void 656fd3bf775SJohn Dyson aio_daemon(void *uproc) 6572244ea07SJohn Dyson { 65884af4da6SJohn Dyson int s; 6592244ea07SJohn Dyson struct aioproclist *aiop; 660fd3bf775SJohn Dyson struct vmspace *myvm, *aiovm; 661fd3bf775SJohn Dyson struct proc *mycp; 6622244ea07SJohn Dyson 6632244ea07SJohn Dyson /* 664fd3bf775SJohn Dyson * Local copies of curproc (cp) and vmspace (myvm) 6652244ea07SJohn Dyson */ 666fd3bf775SJohn Dyson mycp = curproc; 667fd3bf775SJohn Dyson myvm = mycp->p_vmspace; 668fd3bf775SJohn Dyson 669fd3bf775SJohn Dyson /* 670fd3bf775SJohn Dyson * We manage to create only one VM space for all AIOD processes. 671fd3bf775SJohn Dyson * The VM space for the first AIOD created becomes the shared VM 672fd3bf775SJohn Dyson * space for all of them. We add an additional reference count, 673fd3bf775SJohn Dyson * even for the first AIOD, so the address space does not go away, 674fd3bf775SJohn Dyson * and we continue to use that original VM space even if the first 675fd3bf775SJohn Dyson * AIOD exits. 676fd3bf775SJohn Dyson */ 677fd3bf775SJohn Dyson if ((aiovm = aiovmspace) == NULL) { 678fd3bf775SJohn Dyson aiovmspace = myvm; 67984af4da6SJohn Dyson myvm->vm_refcnt++; 680fd3bf775SJohn Dyson /* 681fd3bf775SJohn Dyson * Remove userland cruft from address space. 682fd3bf775SJohn Dyson */ 683fd3bf775SJohn Dyson if (myvm->vm_shm) 684fd3bf775SJohn Dyson shmexit(mycp); 685fd3bf775SJohn Dyson pmap_remove_pages(&myvm->vm_pmap, 0, USRSTACK); 686fd3bf775SJohn Dyson vm_map_remove(&myvm->vm_map, 0, USRSTACK); 687fd3bf775SJohn Dyson myvm->vm_tsize = 0; 688fd3bf775SJohn Dyson myvm->vm_dsize = 0; 689fd3bf775SJohn Dyson myvm->vm_ssize = 0; 690fd3bf775SJohn Dyson } else { 69184af4da6SJohn Dyson aiovm->vm_refcnt++; 692fd3bf775SJohn Dyson mycp->p_vmspace = aiovm; 693fd3bf775SJohn Dyson pmap_activate(mycp); 694fd3bf775SJohn Dyson vmspace_free(myvm); 695fd3bf775SJohn Dyson myvm = aiovm; 696fd3bf775SJohn Dyson } 697fd3bf775SJohn Dyson 698fd3bf775SJohn Dyson if (mycp->p_textvp) { 699fd3bf775SJohn Dyson vrele(mycp->p_textvp); 700fd3bf775SJohn Dyson mycp->p_textvp = NULL; 701fd3bf775SJohn Dyson } 702fd3bf775SJohn Dyson 703fd3bf775SJohn Dyson /* 704fd3bf775SJohn Dyson * Allocate and ready the aio control info. There is one 705fd3bf775SJohn Dyson * aiop structure per daemon. 706fd3bf775SJohn Dyson */ 707fd3bf775SJohn Dyson aiop = zalloc(aiop_zone); 708fd3bf775SJohn Dyson aiop->aioproc = mycp; 7092244ea07SJohn Dyson aiop->aioprocflags |= AIOP_FREE; 7102244ea07SJohn Dyson TAILQ_INIT(&aiop->jobtorun); 7112244ea07SJohn Dyson 7122244ea07SJohn Dyson /* 713fd3bf775SJohn Dyson * Place thread (lightweight process) onto the AIO free thread list 7142244ea07SJohn Dyson */ 715fd3bf775SJohn Dyson if (TAILQ_EMPTY(&aio_freeproc)) 716fd3bf775SJohn Dyson wakeup(&aio_freeproc); 717fd3bf775SJohn Dyson TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 7182244ea07SJohn Dyson 7192244ea07SJohn Dyson /* 7202244ea07SJohn Dyson * Make up a name for the daemon 7212244ea07SJohn Dyson */ 722fd3bf775SJohn Dyson strcpy(mycp->p_comm, "aiod"); 7232244ea07SJohn Dyson 7242244ea07SJohn Dyson /* 725fd3bf775SJohn Dyson * Get rid of our current filedescriptors. AIOD's don't need any 726fd3bf775SJohn Dyson * filedescriptors, except as temporarily inherited from the client. 727fd3bf775SJohn Dyson * Credentials are also cloned, and made equivalent to "root." 7282244ea07SJohn Dyson */ 729fd3bf775SJohn Dyson fdfree(mycp); 730fd3bf775SJohn Dyson mycp->p_fd = NULL; 731fd3bf775SJohn Dyson mycp->p_ucred = crcopy(mycp->p_ucred); 732fd3bf775SJohn Dyson mycp->p_ucred->cr_uid = 0; 733fd3bf775SJohn Dyson mycp->p_ucred->cr_ngroups = 1; 734fd3bf775SJohn Dyson mycp->p_ucred->cr_groups[0] = 1; 735fd3bf775SJohn Dyson 736fd3bf775SJohn Dyson /* 737fd3bf775SJohn Dyson * The daemon resides in it's own pgrp. 738fd3bf775SJohn Dyson */ 739fd3bf775SJohn Dyson enterpgrp(mycp, mycp->p_pid, 1); 740fd3bf775SJohn Dyson 741fd3bf775SJohn Dyson /* 742fd3bf775SJohn Dyson * Mark special process type 743fd3bf775SJohn Dyson */ 744fd3bf775SJohn Dyson mycp->p_flag |= P_SYSTEM|P_KTHREADP; 7452244ea07SJohn Dyson 746a624e84fSJohn Dyson #if DEBUGAIO > 0 747fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 748fd3bf775SJohn Dyson printf("Started new process: %d\n", mycp->p_pid); 7492244ea07SJohn Dyson #endif 750fd3bf775SJohn Dyson 751fd3bf775SJohn Dyson /* 752fd3bf775SJohn Dyson * Wakeup parent process. (Parent sleeps to keep from blasting away 753fd3bf775SJohn Dyson * creating to many daemons.) 754fd3bf775SJohn Dyson */ 755fd3bf775SJohn Dyson wakeup(mycp); 7562244ea07SJohn Dyson 7572244ea07SJohn Dyson while(1) { 758fd3bf775SJohn Dyson struct proc *curcp; 7592244ea07SJohn Dyson struct aiocblist *aiocbe; 7602244ea07SJohn Dyson 761fd3bf775SJohn Dyson /* 762fd3bf775SJohn Dyson * curcp is the current daemon process context. 763fd3bf775SJohn Dyson * userp is the current user process context. 764fd3bf775SJohn Dyson */ 765fd3bf775SJohn Dyson curcp = mycp; 766c4860686SJohn Dyson 767fd3bf775SJohn Dyson /* 768fd3bf775SJohn Dyson * Take daemon off of free queue 769fd3bf775SJohn Dyson */ 7702244ea07SJohn Dyson if (aiop->aioprocflags & AIOP_FREE) { 7712244ea07SJohn Dyson TAILQ_REMOVE(&aio_freeproc, aiop, list); 7722244ea07SJohn Dyson TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 7732244ea07SJohn Dyson aiop->aioprocflags &= ~AIOP_FREE; 7742244ea07SJohn Dyson } 775fd3bf775SJohn Dyson aiop->aioprocflags &= ~AIOP_SCHED; 7762244ea07SJohn Dyson 777fd3bf775SJohn Dyson /* 778fd3bf775SJohn Dyson * Check for jobs 779fd3bf775SJohn Dyson */ 7802244ea07SJohn Dyson while ( aiocbe = aio_selectjob(aiop)) { 781fd3bf775SJohn Dyson struct proc *userp; 7822244ea07SJohn Dyson struct aiocb *cb; 7832244ea07SJohn Dyson struct kaioinfo *ki; 78484af4da6SJohn Dyson struct aio_liojob *lj; 7852244ea07SJohn Dyson 7862244ea07SJohn Dyson cb = &aiocbe->uaiocb; 7872244ea07SJohn Dyson userp = aiocbe->userproc; 7882244ea07SJohn Dyson 7892244ea07SJohn Dyson aiocbe->jobstate = JOBST_JOBRUNNING; 790fd3bf775SJohn Dyson 791fd3bf775SJohn Dyson /* 792fd3bf775SJohn Dyson * Connect to process address space for user program 793fd3bf775SJohn Dyson */ 794fd3bf775SJohn Dyson if (userp != curcp) { 795fd3bf775SJohn Dyson struct vmspace *tmpvm; 796fd3bf775SJohn Dyson /* 797fd3bf775SJohn Dyson * Save the current address space that we are connected to. 798fd3bf775SJohn Dyson */ 799fd3bf775SJohn Dyson tmpvm = mycp->p_vmspace; 800fd3bf775SJohn Dyson /* 801fd3bf775SJohn Dyson * Point to the new user address space, and refer to it. 802fd3bf775SJohn Dyson */ 803fd3bf775SJohn Dyson mycp->p_vmspace = userp->p_vmspace; 80484af4da6SJohn Dyson mycp->p_vmspace->vm_refcnt++; 805fd3bf775SJohn Dyson /* 806fd3bf775SJohn Dyson * Activate the new mapping. 807fd3bf775SJohn Dyson */ 808fd3bf775SJohn Dyson pmap_activate(mycp); 809fd3bf775SJohn Dyson /* 810fd3bf775SJohn Dyson * If the old address space wasn't the daemons own address 811fd3bf775SJohn Dyson * space, then we need to remove the daemon's reference from 812fd3bf775SJohn Dyson * the other process that it was acting on behalf of. 813fd3bf775SJohn Dyson */ 8142244ea07SJohn Dyson if (tmpvm != myvm) { 8152244ea07SJohn Dyson vmspace_free(tmpvm); 8162244ea07SJohn Dyson } 817fd3bf775SJohn Dyson /* 818fd3bf775SJohn Dyson * Disassociate from previous clients file descriptors, and 819fd3bf775SJohn Dyson * associate to the new clients descriptors. Note that 820fd3bf775SJohn Dyson * the daemon doesn't need to worry about it's orginal 821fd3bf775SJohn Dyson * descriptors, because they were originally freed. 822fd3bf775SJohn Dyson */ 823fd3bf775SJohn Dyson if (mycp->p_fd) 824fd3bf775SJohn Dyson fdfree(mycp); 825fd3bf775SJohn Dyson mycp->p_fd = fdshare(userp); 826fd3bf775SJohn Dyson curcp = userp; 8272244ea07SJohn Dyson } 8282244ea07SJohn Dyson 829fd3bf775SJohn Dyson ki = userp->p_aioinfo; 83084af4da6SJohn Dyson lj = aiocbe->lio; 83184af4da6SJohn Dyson 83284af4da6SJohn Dyson /* 83384af4da6SJohn Dyson * Account for currently active jobs 83484af4da6SJohn Dyson */ 8352244ea07SJohn Dyson ki->kaio_active_count++; 836a624e84fSJohn Dyson #if DEBUGAIO > 0 837fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 838a624e84fSJohn Dyson printf("process: pid: %d(%d), active: %d, queue: %d\n", 839a624e84fSJohn Dyson cb->_aiocb_private.kernelinfo, 840a624e84fSJohn Dyson userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count); 841a624e84fSJohn Dyson #endif 84284af4da6SJohn Dyson /* 84384af4da6SJohn Dyson * Do the I/O function 84484af4da6SJohn Dyson */ 8452244ea07SJohn Dyson aiocbe->jobaioproc = aiop; 8462244ea07SJohn Dyson aio_process(aiocbe); 84784af4da6SJohn Dyson 84884af4da6SJohn Dyson /* 84984af4da6SJohn Dyson * decrement the active job count 85084af4da6SJohn Dyson */ 85184af4da6SJohn Dyson ki->kaio_active_count--; 85284af4da6SJohn Dyson 85384af4da6SJohn Dyson /* 85484af4da6SJohn Dyson * increment the completion count for wakeup/signal comparisons 85584af4da6SJohn Dyson */ 85684af4da6SJohn Dyson aiocbe->jobflags |= AIOCBLIST_DONE; 85784af4da6SJohn Dyson ki->kaio_queue_finished_count++; 85884af4da6SJohn Dyson if (lj) { 85984af4da6SJohn Dyson lj->lioj_queue_finished_count++; 86084af4da6SJohn Dyson } 861fd3bf775SJohn Dyson if ((ki->kaio_flags & KAIO_WAKEUP) || 86284af4da6SJohn Dyson (ki->kaio_flags & KAIO_RUNDOWN) && 86384af4da6SJohn Dyson (ki->kaio_active_count == 0)) { 864fd3bf775SJohn Dyson ki->kaio_flags &= ~KAIO_WAKEUP; 865fd3bf775SJohn Dyson wakeup(userp); 866fd3bf775SJohn Dyson } 867a624e84fSJohn Dyson #if DEBUGAIO > 0 868fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 869a624e84fSJohn Dyson printf("DONE process: pid: %d(%d), active: %d, queue: %d\n", 870a624e84fSJohn Dyson cb->_aiocb_private.kernelinfo, 871a624e84fSJohn Dyson userp->p_pid, ki->kaio_active_count, ki->kaio_queue_count); 872a624e84fSJohn Dyson #endif 8732244ea07SJohn Dyson 87484af4da6SJohn Dyson s = splbio(); 87584af4da6SJohn Dyson if (lj && (lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) == 87684af4da6SJohn Dyson LIOJ_SIGNAL) { 87784af4da6SJohn Dyson if ((lj->lioj_queue_finished_count == lj->lioj_queue_count) && 87884af4da6SJohn Dyson (lj->lioj_buffer_finished_count == lj->lioj_buffer_count)) { 87984af4da6SJohn Dyson psignal(userp, lj->lioj_signal.sigev_signo); 88084af4da6SJohn Dyson lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 88184af4da6SJohn Dyson } 88284af4da6SJohn Dyson } 88384af4da6SJohn Dyson splx(s); 88484af4da6SJohn Dyson 8852244ea07SJohn Dyson aiocbe->jobstate = JOBST_JOBFINISHED; 8862244ea07SJohn Dyson 887fd3bf775SJohn Dyson /* 888fd3bf775SJohn Dyson * If the I/O request should be automatically rundown, do the 889fd3bf775SJohn Dyson * needed cleanup. Otherwise, place the queue entry for 890fd3bf775SJohn Dyson * the just finished I/O request into the done queue for the 891fd3bf775SJohn Dyson * associated client. 892fd3bf775SJohn Dyson */ 8932244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) { 8942244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 8952244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 8962244ea07SJohn Dyson } else { 8972244ea07SJohn Dyson TAILQ_REMOVE(&ki->kaio_jobqueue, 8982244ea07SJohn Dyson aiocbe, plist); 8992244ea07SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_jobdone, 9002244ea07SJohn Dyson aiocbe, plist); 9012244ea07SJohn Dyson } 9022244ea07SJohn Dyson 9032244ea07SJohn Dyson if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) { 9042244ea07SJohn Dyson wakeup(aiocbe); 9052244ea07SJohn Dyson aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 9062244ea07SJohn Dyson } 9072244ea07SJohn Dyson 9082244ea07SJohn Dyson if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 9092244ea07SJohn Dyson psignal(userp, cb->aio_sigevent.sigev_signo); 9102244ea07SJohn Dyson } 9112244ea07SJohn Dyson } 9122244ea07SJohn Dyson 913fd3bf775SJohn Dyson #if DEBUGAIO > 0 914fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 915fd3bf775SJohn Dyson printf("AIOD: daemon going idle: %d\n", mycp->p_pid); 916fd3bf775SJohn Dyson #endif 917fd3bf775SJohn Dyson 918fd3bf775SJohn Dyson /* 919fd3bf775SJohn Dyson * Disconnect from user address space 920fd3bf775SJohn Dyson */ 921fd3bf775SJohn Dyson if (curcp != mycp) { 922fd3bf775SJohn Dyson struct vmspace *tmpvm; 923fd3bf775SJohn Dyson /* 924fd3bf775SJohn Dyson * Get the user address space to disconnect from. 925fd3bf775SJohn Dyson */ 926fd3bf775SJohn Dyson tmpvm = mycp->p_vmspace; 927fd3bf775SJohn Dyson /* 928fd3bf775SJohn Dyson * Get original address space for daemon. 929fd3bf775SJohn Dyson */ 930fd3bf775SJohn Dyson mycp->p_vmspace = myvm; 931fd3bf775SJohn Dyson /* 932fd3bf775SJohn Dyson * Activate the daemon's address space. 933fd3bf775SJohn Dyson */ 934fd3bf775SJohn Dyson pmap_activate(mycp); 935fd3bf775SJohn Dyson if (tmpvm == myvm) 936fd3bf775SJohn Dyson printf("AIOD: vmspace problem -- %d\n", mycp->p_pid); 937fd3bf775SJohn Dyson /* 938fd3bf775SJohn Dyson * remove our vmspace reference. 939fd3bf775SJohn Dyson */ 9402244ea07SJohn Dyson vmspace_free(tmpvm); 941fd3bf775SJohn Dyson /* 942fd3bf775SJohn Dyson * disassociate from the user process's file descriptors. 943fd3bf775SJohn Dyson */ 944fd3bf775SJohn Dyson if (mycp->p_fd) 945fd3bf775SJohn Dyson fdfree(mycp); 946fd3bf775SJohn Dyson mycp->p_fd = NULL; 947fd3bf775SJohn Dyson curcp = mycp; 948fd3bf775SJohn Dyson } 949fd3bf775SJohn Dyson 950fd3bf775SJohn Dyson /* 951fd3bf775SJohn Dyson * If we are the first to be put onto the free queue, wakeup 952fd3bf775SJohn Dyson * anyone waiting for a daemon. 953fd3bf775SJohn Dyson */ 954fd3bf775SJohn Dyson TAILQ_REMOVE(&aio_activeproc, aiop, list); 955fd3bf775SJohn Dyson if (TAILQ_EMPTY(&aio_freeproc)) 956fd3bf775SJohn Dyson wakeup(&aio_freeproc); 957fd3bf775SJohn Dyson TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 958fd3bf775SJohn Dyson aiop->aioprocflags |= AIOP_FREE; 959fd3bf775SJohn Dyson 960fd3bf775SJohn Dyson #if DEBUGAIO > 0 961fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 962fd3bf775SJohn Dyson printf("AIOD: daemon sleeping -- %d\n", mycp->p_pid); 963fd3bf775SJohn Dyson #endif 964fd3bf775SJohn Dyson /* 965fd3bf775SJohn Dyson * If daemon is inactive for a long time, allow it to exit, thereby 966fd3bf775SJohn Dyson * freeing resources. 967fd3bf775SJohn Dyson */ 968fd3bf775SJohn Dyson if (((aiop->aioprocflags & AIOP_SCHED) == 0) && 96984af4da6SJohn Dyson tsleep(mycp, PRIBIO, "aiordy", aiod_lifetime)) { 970fd3bf775SJohn Dyson if ((TAILQ_FIRST(&aio_jobs) == NULL) && 971fd3bf775SJohn Dyson (TAILQ_FIRST(&aiop->jobtorun) == NULL)) { 97284af4da6SJohn Dyson if ((aiop->aioprocflags & AIOP_FREE) && 97384af4da6SJohn Dyson (num_aio_procs > target_aio_procs)) { 974fd3bf775SJohn Dyson TAILQ_REMOVE(&aio_freeproc, aiop, list); 975fd3bf775SJohn Dyson zfree(aiop_zone, aiop); 97684af4da6SJohn Dyson num_aio_procs--; 977fd3bf775SJohn Dyson #if DEBUGAIO > 0 978fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 979fd3bf775SJohn Dyson printf("AIOD: Daemon exiting -- %d\n", mycp->p_pid); 980fd3bf775SJohn Dyson #endif 981fd3bf775SJohn Dyson if (mycp->p_vmspace->vm_refcnt <= 1) 982fd3bf775SJohn Dyson printf("AIOD: bad vm refcnt for exiting daemon: %d\n", 983fd3bf775SJohn Dyson mycp->p_vmspace->vm_refcnt); 984fd3bf775SJohn Dyson exit1(mycp, 0); 985fd3bf775SJohn Dyson } 986fd3bf775SJohn Dyson } 9872244ea07SJohn Dyson } 9882244ea07SJohn Dyson } 9892244ea07SJohn Dyson } 9902244ea07SJohn Dyson 9912244ea07SJohn Dyson /* 99284af4da6SJohn Dyson * Create a new AIO daemon. This is mostly a kernel-thread fork routine. 99384af4da6SJohn Dyson * The AIO daemon modifies it's environment itself. 9942244ea07SJohn Dyson */ 9952244ea07SJohn Dyson static int 996fd3bf775SJohn Dyson aio_newproc() 997fd3bf775SJohn Dyson { 9982244ea07SJohn Dyson int error; 9992244ea07SJohn Dyson struct rfork_args rfa; 1000fd3bf775SJohn Dyson struct proc *p, *np; 10012244ea07SJohn Dyson 1002fd3bf775SJohn Dyson rfa.flags = RFPROC | RFCFDG; 10032244ea07SJohn Dyson 1004cb226aaaSPoul-Henning Kamp p = curproc; 1005cb226aaaSPoul-Henning Kamp if (error = rfork(p, &rfa)) 10062244ea07SJohn Dyson return error; 1007fd3bf775SJohn Dyson 1008fd3bf775SJohn Dyson np = pfind(p->p_retval[0]); 1009fd3bf775SJohn Dyson cpu_set_fork_handler(np, aio_daemon, p); 10102244ea07SJohn Dyson 1011a624e84fSJohn Dyson #if DEBUGAIO > 0 1012fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 10132244ea07SJohn Dyson printf("Waiting for new process: %d, count: %d\n", 10142244ea07SJohn Dyson curproc->p_pid, num_aio_procs); 10152244ea07SJohn Dyson #endif 10162244ea07SJohn Dyson 1017fd3bf775SJohn Dyson /* 1018fd3bf775SJohn Dyson * Wait until daemon is started, but continue on just in case (to 1019fd3bf775SJohn Dyson * handle error conditions. 1020fd3bf775SJohn Dyson */ 102184af4da6SJohn Dyson error = tsleep(np, PZERO, "aiosta", aiod_timeout); 102284af4da6SJohn Dyson num_aio_procs++; 10232244ea07SJohn Dyson 10242244ea07SJohn Dyson return error; 10252244ea07SJohn Dyson 10262244ea07SJohn Dyson } 10272244ea07SJohn Dyson 10282244ea07SJohn Dyson /* 102984af4da6SJohn Dyson * Try the high-performance physio method for eligible VCHR devices. This 103084af4da6SJohn Dyson * routine doesn't require the use of any additional threads, and have 103184af4da6SJohn Dyson * overhead. 1032fd3bf775SJohn Dyson */ 1033fd3bf775SJohn Dyson int 103484af4da6SJohn Dyson aio_qphysio(p, aiocbe) 1035fd3bf775SJohn Dyson struct proc *p; 103684af4da6SJohn Dyson struct aiocblist *aiocbe; 1037fd3bf775SJohn Dyson { 1038fd3bf775SJohn Dyson int error; 1039fd3bf775SJohn Dyson caddr_t sa; 1040fd3bf775SJohn Dyson struct aiocb *cb; 1041fd3bf775SJohn Dyson struct file *fp; 1042fd3bf775SJohn Dyson struct buf *bp; 1043fd3bf775SJohn Dyson int bflags; 1044fd3bf775SJohn Dyson struct vnode *vp; 1045fd3bf775SJohn Dyson struct kaioinfo *ki; 1046fd3bf775SJohn Dyson struct filedesc *fdp; 104784af4da6SJohn Dyson struct aio_liojob *lj; 1048fd3bf775SJohn Dyson int fd; 1049fd3bf775SJohn Dyson int majordev; 1050fd3bf775SJohn Dyson int s; 1051fd3bf775SJohn Dyson int cnt; 1052fd3bf775SJohn Dyson dev_t dev; 1053fd3bf775SJohn Dyson int rw; 1054fd3bf775SJohn Dyson d_strategy_t *fstrategy; 105584af4da6SJohn Dyson struct cdevsw *cdev; 105684af4da6SJohn Dyson struct bdevsw *bdev; 1057fd3bf775SJohn Dyson 105884af4da6SJohn Dyson cb = &aiocbe->uaiocb; 105984af4da6SJohn Dyson if ((cb->aio_nbytes > MAXPHYS) && (num_buf_aio >= max_buf_aio)) { 106084af4da6SJohn Dyson #if DEBUGAIO > 0 106184af4da6SJohn Dyson if (DEBUGCHR) { 106284af4da6SJohn Dyson printf("AIOP: failed CHR criteria: aio_nbytes: %d, num_buf_aio: %d\n", 106384af4da6SJohn Dyson cb->aio_nbytes, num_buf_aio); 106484af4da6SJohn Dyson } 106584af4da6SJohn Dyson #endif 1066fd3bf775SJohn Dyson return -1; 106784af4da6SJohn Dyson } 1068fd3bf775SJohn Dyson 1069fd3bf775SJohn Dyson fdp = p->p_fd; 1070fd3bf775SJohn Dyson fd = cb->aio_fildes; 1071fd3bf775SJohn Dyson fp = fdp->fd_ofiles[fd]; 107284af4da6SJohn Dyson lj = aiocbe->lio; 1073fd3bf775SJohn Dyson 107484af4da6SJohn Dyson if (fp->f_type != DTYPE_VNODE) { 107584af4da6SJohn Dyson #if DEBUGAIO > 0 107684af4da6SJohn Dyson if (DEBUGCHR) { 107784af4da6SJohn Dyson printf("AIOP: failed CHR criteria: type != DTYPE_VNODE\n"); 107884af4da6SJohn Dyson } 107984af4da6SJohn Dyson #endif 1080fd3bf775SJohn Dyson return -1; 108184af4da6SJohn Dyson } 1082fd3bf775SJohn Dyson 1083fd3bf775SJohn Dyson vp = (struct vnode *)fp->f_data; 108484af4da6SJohn Dyson if (vp->v_type != VCHR || ((cb->aio_nbytes & (DEV_BSIZE - 1)) != 0)) { 108584af4da6SJohn Dyson #if DEBUGAIO > 0 108684af4da6SJohn Dyson if (DEBUGCHR) { 108784af4da6SJohn Dyson printf("AIOP: failed CHR criteria: v_type: %d, nbytes: 0x%x\n", 108884af4da6SJohn Dyson vp->v_type, cb->aio_nbytes); 108984af4da6SJohn Dyson } 109084af4da6SJohn Dyson #endif 1091fd3bf775SJohn Dyson return -1; 109284af4da6SJohn Dyson } 1093fd3bf775SJohn Dyson 109484af4da6SJohn Dyson if ((vp->v_specinfo == NULL) || (vp->v_flag & VISTTY)) { 109584af4da6SJohn Dyson #if DEBUGAIO > 0 109684af4da6SJohn Dyson if (DEBUGCHR) { 109784af4da6SJohn Dyson printf("AIOP: failed CHR criteria: v_specinfo: %x, istty: %x\n", 109884af4da6SJohn Dyson vp->v_specinfo, (vp->v_flag & VISTTY)); 109984af4da6SJohn Dyson } 110084af4da6SJohn Dyson #endif 1101fd3bf775SJohn Dyson return -1; 110284af4da6SJohn Dyson } 1103fd3bf775SJohn Dyson 1104fd3bf775SJohn Dyson majordev = major(vp->v_rdev); 110584af4da6SJohn Dyson if (majordev == NODEV) { 110684af4da6SJohn Dyson #if DEBUGAIO > 0 110784af4da6SJohn Dyson if (DEBUGCHR) { 110884af4da6SJohn Dyson printf("AIOP: failed CHR criteria, NODEV?: 0x%x\n", vp->v_rdev); 110984af4da6SJohn Dyson } 111084af4da6SJohn Dyson #endif 1111fd3bf775SJohn Dyson return -1; 111284af4da6SJohn Dyson } 1113fd3bf775SJohn Dyson 111484af4da6SJohn Dyson cdev = cdevsw[major(vp->v_rdev)]; 111584af4da6SJohn Dyson if (cdev == NULL) { 111684af4da6SJohn Dyson #if DEBUGAIO > 0 111784af4da6SJohn Dyson if (DEBUGCHR) { 111884af4da6SJohn Dyson printf("AIOP: failed CHR criteria: cdevsw entry missing\n"); 111984af4da6SJohn Dyson } 112084af4da6SJohn Dyson #endif 1121fd3bf775SJohn Dyson return -1; 112284af4da6SJohn Dyson } 112384af4da6SJohn Dyson bdev = cdev->d_bdev; 112484af4da6SJohn Dyson if (bdev == NULL) { 112584af4da6SJohn Dyson #if DEBUGIO > 0 112684af4da6SJohn Dyson if (DEBUGCHR) { 112784af4da6SJohn Dyson printf("AIOP: failed CHR criteria: bdevsw entry missing\n"); 112884af4da6SJohn Dyson } 112984af4da6SJohn Dyson #endif 113084af4da6SJohn Dyson return -1; 113184af4da6SJohn Dyson } 1132fd3bf775SJohn Dyson 1133fd3bf775SJohn Dyson ki = p->p_aioinfo; 113484af4da6SJohn Dyson if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) { 113584af4da6SJohn Dyson #if DEBUGAIO > 0 113684af4da6SJohn Dyson if (DEBUGCHR) { 113784af4da6SJohn Dyson printf("AIOP: failed CHR criteria, buffer_count(%d) > ballowed_count(%d)\n", 113884af4da6SJohn Dyson ki->kaio_buffer_count, ki->kaio_ballowed_count); 113984af4da6SJohn Dyson } 114084af4da6SJohn Dyson #endif 1141fd3bf775SJohn Dyson return -1; 114284af4da6SJohn Dyson } 1143fd3bf775SJohn Dyson 1144fd3bf775SJohn Dyson cnt = cb->aio_nbytes; 114584af4da6SJohn Dyson if (cnt > MAXPHYS) { 114684af4da6SJohn Dyson #if DEBUGAIO > 0 114784af4da6SJohn Dyson if (DEBUGCHR) { 114884af4da6SJohn Dyson printf("AIOP: failed CHR criteria, cnt(%d) > MAXPHYS\n", cnt); 114984af4da6SJohn Dyson } 115084af4da6SJohn Dyson #endif 1151fd3bf775SJohn Dyson return -1; 115284af4da6SJohn Dyson } 115384af4da6SJohn Dyson 115484af4da6SJohn Dyson dev = makedev(bdev->d_maj, minor(vp->v_rdev)); 115584af4da6SJohn Dyson #if DEBUGAIO > 0 115684af4da6SJohn Dyson if (DEBUGCHR) { 115784af4da6SJohn Dyson printf("AIOP: device: %x\n", dev); 115884af4da6SJohn Dyson } 115984af4da6SJohn Dyson #endif 116084af4da6SJohn Dyson 116184af4da6SJohn Dyson /* 116284af4da6SJohn Dyson * Physical I/O is charged directly to the process, so we don't have 116384af4da6SJohn Dyson * to fake it. 116484af4da6SJohn Dyson */ 116584af4da6SJohn Dyson aiocbe->inputcharge = 0; 116684af4da6SJohn Dyson aiocbe->outputcharge = 0; 1167fd3bf775SJohn Dyson 1168fd3bf775SJohn Dyson ki->kaio_buffer_count++; 116984af4da6SJohn Dyson if (lj) { 117084af4da6SJohn Dyson lj->lioj_buffer_count++; 117184af4da6SJohn Dyson } 1172fd3bf775SJohn Dyson 1173fd3bf775SJohn Dyson /* create and build a buffer header for a transfer */ 1174fd3bf775SJohn Dyson bp = (struct buf *)getpbuf(); 1175fd3bf775SJohn Dyson 1176fd3bf775SJohn Dyson /* 1177fd3bf775SJohn Dyson * get a copy of the kva from the physical buffer 1178fd3bf775SJohn Dyson */ 1179fd3bf775SJohn Dyson bp->b_proc = p; 1180fd3bf775SJohn Dyson bp->b_dev = dev; 1181fd3bf775SJohn Dyson error = bp->b_error = 0; 1182fd3bf775SJohn Dyson 1183fd3bf775SJohn Dyson if (cb->aio_lio_opcode == LIO_WRITE) { 1184fd3bf775SJohn Dyson rw = 0; 1185fd3bf775SJohn Dyson bflags = B_WRITE; 1186fd3bf775SJohn Dyson } else { 1187fd3bf775SJohn Dyson rw = 1; 1188fd3bf775SJohn Dyson bflags = B_READ; 1189fd3bf775SJohn Dyson } 1190fd3bf775SJohn Dyson 1191fd3bf775SJohn Dyson bp->b_bcount = cb->aio_nbytes; 1192fd3bf775SJohn Dyson bp->b_bufsize = cb->aio_nbytes; 1193fd3bf775SJohn Dyson bp->b_flags = B_BUSY | B_PHYS | B_CALL | bflags; 1194fd3bf775SJohn Dyson bp->b_iodone = aio_physwakeup; 1195fd3bf775SJohn Dyson bp->b_saveaddr = bp->b_data; 1196fd3bf775SJohn Dyson bp->b_data = cb->aio_buf; 1197fd3bf775SJohn Dyson bp->b_blkno = btodb(cb->aio_offset); 1198fd3bf775SJohn Dyson 1199fd3bf775SJohn Dyson if (rw && !useracc(bp->b_data, bp->b_bufsize, B_WRITE)) { 1200fd3bf775SJohn Dyson error = EFAULT; 1201fd3bf775SJohn Dyson goto doerror; 1202fd3bf775SJohn Dyson } 1203fd3bf775SJohn Dyson if (!rw && !useracc(bp->b_data, bp->b_bufsize, B_READ)) { 1204fd3bf775SJohn Dyson error = EFAULT; 1205fd3bf775SJohn Dyson goto doerror; 1206fd3bf775SJohn Dyson } 1207fd3bf775SJohn Dyson 1208fd3bf775SJohn Dyson /* bring buffer into kernel space */ 1209fd3bf775SJohn Dyson vmapbuf(bp); 1210fd3bf775SJohn Dyson 121184af4da6SJohn Dyson s = splbio(); 1212fd3bf775SJohn Dyson aiocbe->bp = bp; 1213fd3bf775SJohn Dyson bp->b_spc = (void *)aiocbe; 1214fd3bf775SJohn Dyson TAILQ_INSERT_TAIL(&aio_bufjobs, aiocbe, list); 121584af4da6SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist); 1216fd3bf775SJohn Dyson aiocbe->jobstate = JOBST_JOBQBUF; 121784af4da6SJohn Dyson cb->_aiocb_private.status = cb->aio_nbytes; 121884af4da6SJohn Dyson num_buf_aio++; 121984af4da6SJohn Dyson fstrategy = bdev->d_strategy; 1220fd3bf775SJohn Dyson bp->b_error = 0; 1221fd3bf775SJohn Dyson 122284af4da6SJohn Dyson splx(s); 1223fd3bf775SJohn Dyson /* perform transfer */ 1224fd3bf775SJohn Dyson (*fstrategy)(bp); 1225fd3bf775SJohn Dyson 1226fd3bf775SJohn Dyson if (bp->b_error || (bp->b_flags & B_ERROR)) { 122784af4da6SJohn Dyson s = splbio(); 1228fd3bf775SJohn Dyson error = bp->b_error; 1229fd3bf775SJohn Dyson TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 123084af4da6SJohn Dyson TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); 1231fd3bf775SJohn Dyson aiocbe->bp = NULL; 1232fd3bf775SJohn Dyson aiocbe->jobstate = JOBST_NULL; 123384af4da6SJohn Dyson splx(s); 1234fd3bf775SJohn Dyson vunmapbuf(bp); 1235fd3bf775SJohn Dyson relpbuf(bp); 123684af4da6SJohn Dyson num_buf_aio--; 123784af4da6SJohn Dyson #if DEBUGAIO > 0 123884af4da6SJohn Dyson if (DEBUGCHR) { 123984af4da6SJohn Dyson printf("AIOP: error: %d\n", error); 124084af4da6SJohn Dyson } 124184af4da6SJohn Dyson #endif 124284af4da6SJohn Dyson ki->kaio_buffer_count--; 124384af4da6SJohn Dyson if (lj) { 124484af4da6SJohn Dyson lj->lioj_buffer_count--; 124584af4da6SJohn Dyson } 1246fd3bf775SJohn Dyson return error; 1247fd3bf775SJohn Dyson } 1248fd3bf775SJohn Dyson return 0; 1249fd3bf775SJohn Dyson 1250fd3bf775SJohn Dyson doerror: 1251fd3bf775SJohn Dyson ki->kaio_buffer_count--; 125284af4da6SJohn Dyson if (lj) { 125384af4da6SJohn Dyson lj->lioj_buffer_count--; 125484af4da6SJohn Dyson } 125584af4da6SJohn Dyson aiocbe->bp = NULL; 1256fd3bf775SJohn Dyson relpbuf(bp); 1257fd3bf775SJohn Dyson return error; 1258fd3bf775SJohn Dyson } 1259fd3bf775SJohn Dyson 126084af4da6SJohn Dyson /* 126184af4da6SJohn Dyson * This waits/tests physio completion. 126284af4da6SJohn Dyson */ 1263fd3bf775SJohn Dyson int 1264fd3bf775SJohn Dyson aio_fphysio(p, iocb, flgwait) 1265fd3bf775SJohn Dyson struct proc *p; 1266fd3bf775SJohn Dyson struct aiocblist *iocb; 1267fd3bf775SJohn Dyson int flgwait; 1268fd3bf775SJohn Dyson { 1269fd3bf775SJohn Dyson int s; 1270fd3bf775SJohn Dyson struct buf *bp; 1271fd3bf775SJohn Dyson int error; 1272fd3bf775SJohn Dyson 1273fd3bf775SJohn Dyson bp = iocb->bp; 1274fd3bf775SJohn Dyson 1275fd3bf775SJohn Dyson s = splbio(); 1276fd3bf775SJohn Dyson if (flgwait == 0) { 1277fd3bf775SJohn Dyson if ((bp->b_flags & B_DONE) == 0) { 1278fd3bf775SJohn Dyson splx(s); 1279fd3bf775SJohn Dyson return EINPROGRESS; 1280fd3bf775SJohn Dyson } 1281fd3bf775SJohn Dyson } 1282fd3bf775SJohn Dyson 1283fd3bf775SJohn Dyson while ((bp->b_flags & B_DONE) == 0) { 128484af4da6SJohn Dyson if (tsleep((caddr_t)bp, PRIBIO, "physstr", aiod_timeout)) { 1285fd3bf775SJohn Dyson if ((bp->b_flags & B_DONE) == 0) { 1286fd3bf775SJohn Dyson splx(s); 1287fd3bf775SJohn Dyson return EINPROGRESS; 1288fd3bf775SJohn Dyson } else { 1289fd3bf775SJohn Dyson break; 1290fd3bf775SJohn Dyson } 1291fd3bf775SJohn Dyson } 1292fd3bf775SJohn Dyson } 1293fd3bf775SJohn Dyson 1294fd3bf775SJohn Dyson /* release mapping into kernel space */ 1295fd3bf775SJohn Dyson vunmapbuf(bp); 1296fd3bf775SJohn Dyson iocb->bp = 0; 1297fd3bf775SJohn Dyson 1298fd3bf775SJohn Dyson error = 0; 1299fd3bf775SJohn Dyson /* 1300fd3bf775SJohn Dyson * check for an error 1301fd3bf775SJohn Dyson */ 1302fd3bf775SJohn Dyson if (bp->b_flags & B_ERROR) { 1303fd3bf775SJohn Dyson error = bp->b_error; 1304fd3bf775SJohn Dyson } 1305fd3bf775SJohn Dyson 1306fd3bf775SJohn Dyson relpbuf(bp); 1307fd3bf775SJohn Dyson return (error); 1308fd3bf775SJohn Dyson } 1309fd3bf775SJohn Dyson 1310fd3bf775SJohn Dyson /* 131184af4da6SJohn Dyson * Queue a new AIO request. Choosing either the threaded or direct physio 131284af4da6SJohn Dyson * VCHR technique is done in this code. 13132244ea07SJohn Dyson */ 13142244ea07SJohn Dyson static int 131584af4da6SJohn Dyson _aio_aqueue(struct proc *p, struct aiocb *job, struct aio_liojob *lj, int type) 1316fd3bf775SJohn Dyson { 13172244ea07SJohn Dyson struct filedesc *fdp; 13182244ea07SJohn Dyson struct file *fp; 13192244ea07SJohn Dyson unsigned int fd; 13202244ea07SJohn Dyson 13212244ea07SJohn Dyson int error; 13222244ea07SJohn Dyson int opcode; 13232244ea07SJohn Dyson struct aiocblist *aiocbe; 13242244ea07SJohn Dyson struct aioproclist *aiop; 13252244ea07SJohn Dyson struct kaioinfo *ki; 13262244ea07SJohn Dyson 13272244ea07SJohn Dyson if (aiocbe = TAILQ_FIRST(&aio_freejobs)) { 13282244ea07SJohn Dyson TAILQ_REMOVE(&aio_freejobs, aiocbe, list); 13292244ea07SJohn Dyson } else { 1330fd3bf775SJohn Dyson aiocbe = zalloc (aiocb_zone); 13312244ea07SJohn Dyson } 13322244ea07SJohn Dyson 1333fd3bf775SJohn Dyson aiocbe->inputcharge = 0; 1334fd3bf775SJohn Dyson aiocbe->outputcharge = 0; 1335fd3bf775SJohn Dyson 1336fd3bf775SJohn Dyson suword(&job->_aiocb_private.status, -1); 1337fd3bf775SJohn Dyson suword(&job->_aiocb_private.error, 0); 1338fd3bf775SJohn Dyson suword(&job->_aiocb_private.kernelinfo, -1); 1339fd3bf775SJohn Dyson 13402244ea07SJohn Dyson error = copyin((caddr_t)job, 13412244ea07SJohn Dyson (caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb); 13422244ea07SJohn Dyson if (error) { 1343a624e84fSJohn Dyson #if DEBUGAIO > 0 1344fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1345a624e84fSJohn Dyson printf("aio_aqueue: Copyin error: %d\n", error); 1346a624e84fSJohn Dyson #endif 1347fd3bf775SJohn Dyson suword(&job->_aiocb_private.error, error); 1348fd3bf775SJohn Dyson 13492244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 13502244ea07SJohn Dyson return error; 13512244ea07SJohn Dyson } 13522244ea07SJohn Dyson 1353a624e84fSJohn Dyson /* 1354a624e84fSJohn Dyson * Get the opcode 1355a624e84fSJohn Dyson */ 1356a624e84fSJohn Dyson if (type != LIO_NOP) { 1357a624e84fSJohn Dyson aiocbe->uaiocb.aio_lio_opcode = type; 1358a624e84fSJohn Dyson } 1359a624e84fSJohn Dyson opcode = aiocbe->uaiocb.aio_lio_opcode; 13602244ea07SJohn Dyson 13612244ea07SJohn Dyson /* 13622244ea07SJohn Dyson * Get the fd info for process 13632244ea07SJohn Dyson */ 13642244ea07SJohn Dyson fdp = p->p_fd; 13652244ea07SJohn Dyson 13662244ea07SJohn Dyson /* 13672244ea07SJohn Dyson * Range check file descriptor 13682244ea07SJohn Dyson */ 13692244ea07SJohn Dyson fd = aiocbe->uaiocb.aio_fildes; 13702244ea07SJohn Dyson if (fd >= fdp->fd_nfiles) { 13712244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 13722244ea07SJohn Dyson if (type == 0) { 1373a624e84fSJohn Dyson #if DEBUGAIO > 0 1374fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1375a624e84fSJohn Dyson printf("aio_aqueue: Null type\n"); 1376a624e84fSJohn Dyson #endif 13772244ea07SJohn Dyson suword(&job->_aiocb_private.error, EBADF); 13782244ea07SJohn Dyson } 13792244ea07SJohn Dyson return EBADF; 13802244ea07SJohn Dyson } 13812244ea07SJohn Dyson 1382c4860686SJohn Dyson #if DEBUGAIO > 0 1383fd3bf775SJohn Dyson if (DEBUGFLOW > 3) 1384fd3bf775SJohn Dyson printf("aio_aqueue: fd: %d, cmd: %d," 1385fd3bf775SJohn Dyson " buf: %d, cnt: %d, fileoffset: %d\n", 1386c4860686SJohn Dyson aiocbe->uaiocb.aio_fildes, 1387c4860686SJohn Dyson aiocbe->uaiocb.aio_lio_opcode, 1388c4860686SJohn Dyson (int) aiocbe->uaiocb.aio_buf & 0xffffffff, 1389c4860686SJohn Dyson aiocbe->uaiocb.aio_nbytes, 1390c4860686SJohn Dyson (int) aiocbe->uaiocb.aio_offset & 0xffffffff); 1391c4860686SJohn Dyson #endif 1392c4860686SJohn Dyson 1393c4860686SJohn Dyson 13942244ea07SJohn Dyson fp = fdp->fd_ofiles[fd]; 1395a624e84fSJohn Dyson if ((fp == NULL) || 1396a624e84fSJohn Dyson ((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 0))) { 13972244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 13982244ea07SJohn Dyson if (type == 0) { 13992244ea07SJohn Dyson suword(&job->_aiocb_private.error, EBADF); 14002244ea07SJohn Dyson } 1401a624e84fSJohn Dyson #if DEBUGAIO > 0 1402fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1403a624e84fSJohn Dyson printf("aio_aqueue: Bad file descriptor\n"); 1404a624e84fSJohn Dyson #endif 14052244ea07SJohn Dyson return EBADF; 14062244ea07SJohn Dyson } 14072244ea07SJohn Dyson 14082244ea07SJohn Dyson if (aiocbe->uaiocb.aio_offset == -1LL) { 14092244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 14102244ea07SJohn Dyson if (type == 0) { 14112244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 14122244ea07SJohn Dyson } 1413a624e84fSJohn Dyson #if DEBUGAIO > 0 1414fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1415a624e84fSJohn Dyson printf("aio_aqueue: bad offset\n"); 1416a624e84fSJohn Dyson #endif 14172244ea07SJohn Dyson return EINVAL; 14182244ea07SJohn Dyson } 14192244ea07SJohn Dyson 1420a624e84fSJohn Dyson #if DEBUGAIO > 0 1421fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 1422fd3bf775SJohn Dyson printf("job addr: 0x%x, 0x%x, %d\n", 1423fd3bf775SJohn Dyson job, &job->_aiocb_private.kernelinfo, jobrefid); 14242244ea07SJohn Dyson #endif 14252244ea07SJohn Dyson 14262244ea07SJohn Dyson error = suword(&job->_aiocb_private.kernelinfo, jobrefid); 14272244ea07SJohn Dyson if (error) { 14282244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 14292244ea07SJohn Dyson if (type == 0) { 14302244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 14312244ea07SJohn Dyson } 1432a624e84fSJohn Dyson #if DEBUGAIO > 0 1433fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1434a624e84fSJohn Dyson printf("aio_aqueue: fetch of kernelinfo from user space\n"); 1435a624e84fSJohn Dyson #endif 14362244ea07SJohn Dyson return error; 14372244ea07SJohn Dyson } 14382244ea07SJohn Dyson 14392244ea07SJohn Dyson aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)jobrefid; 1440a624e84fSJohn Dyson #if DEBUGAIO > 0 1441fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 14422244ea07SJohn Dyson printf("aio_aqueue: New job: %d... ", jobrefid); 14432244ea07SJohn Dyson #endif 144484af4da6SJohn Dyson jobrefid++; 1445fd3bf775SJohn Dyson if (jobrefid > INT_MAX) 1446fd3bf775SJohn Dyson jobrefid = 1; 14472244ea07SJohn Dyson 14482244ea07SJohn Dyson if (opcode == LIO_NOP) { 14492244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 14502244ea07SJohn Dyson if (type == 0) { 14512244ea07SJohn Dyson suword(&job->_aiocb_private.error, 0); 1452fd3bf775SJohn Dyson suword(&job->_aiocb_private.status, 0); 1453fd3bf775SJohn Dyson suword(&job->_aiocb_private.kernelinfo, 0); 14542244ea07SJohn Dyson } 14552244ea07SJohn Dyson return 0; 14562244ea07SJohn Dyson } 14572244ea07SJohn Dyson 1458fd3bf775SJohn Dyson if ((opcode != LIO_READ) && (opcode != LIO_WRITE)) { 14592244ea07SJohn Dyson TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 14602244ea07SJohn Dyson if (type == 0) { 1461fd3bf775SJohn Dyson suword(&job->_aiocb_private.status, 0); 14622244ea07SJohn Dyson suword(&job->_aiocb_private.error, EINVAL); 14632244ea07SJohn Dyson } 1464a624e84fSJohn Dyson #if DEBUGAIO > 0 1465fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1466a624e84fSJohn Dyson printf("aio_aqueue: invalid LIO op: %d\n", opcode); 1467a624e84fSJohn Dyson #endif 14682244ea07SJohn Dyson return EINVAL; 14692244ea07SJohn Dyson } 14702244ea07SJohn Dyson 1471fd3bf775SJohn Dyson suword(&job->_aiocb_private.error, EINPROGRESS); 1472fd3bf775SJohn Dyson aiocbe->uaiocb._aiocb_private.error = EINPROGRESS; 14732244ea07SJohn Dyson aiocbe->userproc = p; 14742244ea07SJohn Dyson aiocbe->jobflags = 0; 147584af4da6SJohn Dyson aiocbe->lio = lj; 147684af4da6SJohn Dyson ki = p->p_aioinfo; 14772244ea07SJohn Dyson 1478fd3bf775SJohn Dyson if ((error = aio_qphysio(p, aiocbe)) == 0) { 1479fd3bf775SJohn Dyson return 0; 1480fd3bf775SJohn Dyson } else if (error > 0) { 1481fd3bf775SJohn Dyson suword(&job->_aiocb_private.status, 0); 1482fd3bf775SJohn Dyson aiocbe->uaiocb._aiocb_private.error = error; 1483fd3bf775SJohn Dyson suword(&job->_aiocb_private.error, error); 1484fd3bf775SJohn Dyson return error; 1485fd3bf775SJohn Dyson } 1486fd3bf775SJohn Dyson 148784af4da6SJohn Dyson /* 148884af4da6SJohn Dyson * No buffer for daemon I/O 148984af4da6SJohn Dyson */ 149084af4da6SJohn Dyson aiocbe->bp = NULL; 149184af4da6SJohn Dyson 149284af4da6SJohn Dyson ki->kaio_queue_count++; 149384af4da6SJohn Dyson if (lj) { 149484af4da6SJohn Dyson lj->lioj_queue_count++; 149584af4da6SJohn Dyson } 1496fd3bf775SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 1497fd3bf775SJohn Dyson TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); 1498fd3bf775SJohn Dyson aiocbe->jobstate = JOBST_JOBQGLOBAL; 1499fd3bf775SJohn Dyson 150084af4da6SJohn Dyson num_queue_count++; 1501fd3bf775SJohn Dyson #if DEBUGAIO > 0 1502fd3bf775SJohn Dyson if (DEBUGREQ) { 1503fd3bf775SJohn Dyson printf("PROC %s, fd: %d, offset: 0x%x, address: 0x%x, size: %d\n", 1504fd3bf775SJohn Dyson job->aio_lio_opcode == LIO_READ?"Read":"Write", 1505fd3bf775SJohn Dyson job->aio_fildes, (int) job->aio_offset, 1506fd3bf775SJohn Dyson job->aio_buf, job->aio_nbytes); 1507fd3bf775SJohn Dyson } 1508fd3bf775SJohn Dyson #endif 1509fd3bf775SJohn Dyson error = 0; 1510fd3bf775SJohn Dyson 1511fd3bf775SJohn Dyson /* 1512fd3bf775SJohn Dyson * If we don't have a free AIO process, and we are below our 1513fd3bf775SJohn Dyson * quota, then start one. Otherwise, depend on the subsequent 1514fd3bf775SJohn Dyson * I/O completions to pick-up this job. If we don't sucessfully 1515fd3bf775SJohn Dyson * create the new process (thread) due to resource issues, we 1516fd3bf775SJohn Dyson * return an error for now (EAGAIN), which is likely not the 1517fd3bf775SJohn Dyson * correct thing to do. 1518fd3bf775SJohn Dyson */ 15192244ea07SJohn Dyson retryproc: 15202244ea07SJohn Dyson if (aiop = TAILQ_FIRST(&aio_freeproc)) { 15212244ea07SJohn Dyson TAILQ_REMOVE(&aio_freeproc, aiop, list); 15222244ea07SJohn Dyson TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 15232244ea07SJohn Dyson aiop->aioprocflags &= ~AIOP_FREE; 15242244ea07SJohn Dyson wakeup(aiop->aioproc); 1525fd3bf775SJohn Dyson } else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) && 1526fd3bf775SJohn Dyson ((ki->kaio_active_count + num_aio_resv_start) < 1527fd3bf775SJohn Dyson ki->kaio_maxactive_count)) { 1528fd3bf775SJohn Dyson num_aio_resv_start++; 1529fd3bf775SJohn Dyson if ((error = aio_newproc()) == 0) { 153084af4da6SJohn Dyson num_aio_resv_start--; 15312244ea07SJohn Dyson goto retryproc; 1532fd3bf775SJohn Dyson } 153384af4da6SJohn Dyson num_aio_resv_start--; 1534fd3bf775SJohn Dyson } 1535fd3bf775SJohn Dyson return error; 15362244ea07SJohn Dyson } 15372244ea07SJohn Dyson 1538fd3bf775SJohn Dyson /* 1539fd3bf775SJohn Dyson * This routine queues an AIO request, checking for quotas. 1540fd3bf775SJohn Dyson */ 15412244ea07SJohn Dyson static int 1542fd3bf775SJohn Dyson aio_aqueue(struct proc *p, struct aiocb *job, int type) 1543fd3bf775SJohn Dyson { 15442244ea07SJohn Dyson struct kaioinfo *ki; 15452244ea07SJohn Dyson 15462244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 15472244ea07SJohn Dyson aio_init_aioinfo(p); 15482244ea07SJohn Dyson } 15492244ea07SJohn Dyson 15502244ea07SJohn Dyson if (num_queue_count >= max_queue_count) 15512244ea07SJohn Dyson return EAGAIN; 15522244ea07SJohn Dyson 15532244ea07SJohn Dyson ki = p->p_aioinfo; 15542244ea07SJohn Dyson if (ki->kaio_queue_count >= ki->kaio_qallowed_count) 15552244ea07SJohn Dyson return EAGAIN; 15562244ea07SJohn Dyson 155784af4da6SJohn Dyson return _aio_aqueue(p, job, NULL, type); 15582244ea07SJohn Dyson } 15592244ea07SJohn Dyson 15602244ea07SJohn Dyson /* 1561fd3bf775SJohn Dyson * Support the aio_return system call, as a side-effect, kernel 1562fd3bf775SJohn Dyson * resources are released. 15632244ea07SJohn Dyson */ 15642244ea07SJohn Dyson int 1565fd3bf775SJohn Dyson aio_return(struct proc *p, struct aio_return_args *uap) 1566fd3bf775SJohn Dyson { 156784af4da6SJohn Dyson int s; 15682244ea07SJohn Dyson int jobref, status; 156984af4da6SJohn Dyson struct aiocblist *cb, *ncb; 15702244ea07SJohn Dyson struct kaioinfo *ki; 1571fd3bf775SJohn Dyson struct proc *userp; 15722244ea07SJohn Dyson 15732244ea07SJohn Dyson ki = p->p_aioinfo; 15742244ea07SJohn Dyson if (ki == NULL) { 15752244ea07SJohn Dyson return EINVAL; 15762244ea07SJohn Dyson } 15772244ea07SJohn Dyson 15782244ea07SJohn Dyson jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 1579fd3bf775SJohn Dyson if (jobref == -1 || jobref == 0) 15802244ea07SJohn Dyson return EINVAL; 15812244ea07SJohn Dyson 1582a624e84fSJohn Dyson #if DEBUGAIO > 0 1583fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1584fd3bf775SJohn Dyson printf("aio_return: jobref: %d, ", jobref); 1585a624e84fSJohn Dyson #endif 1586a624e84fSJohn Dyson 15872244ea07SJohn Dyson 15882244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 15892244ea07SJohn Dyson cb; 15902244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 15912244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1592fd3bf775SJohn Dyson #if DEBUGAIO > 0 1593fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1594fd3bf775SJohn Dyson printf("status: %d, error: %d\n", 1595fd3bf775SJohn Dyson cb->uaiocb._aiocb_private.status, 1596fd3bf775SJohn Dyson cb->uaiocb._aiocb_private.error); 1597fd3bf775SJohn Dyson #endif 1598cb226aaaSPoul-Henning Kamp p->p_retval[0] = cb->uaiocb._aiocb_private.status; 1599fd3bf775SJohn Dyson if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 1600fd3bf775SJohn Dyson curproc->p_stats->p_ru.ru_oublock += cb->outputcharge; 1601fd3bf775SJohn Dyson cb->outputcharge = 0; 1602fd3bf775SJohn Dyson } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 1603fd3bf775SJohn Dyson curproc->p_stats->p_ru.ru_inblock += cb->inputcharge; 1604fd3bf775SJohn Dyson cb->inputcharge = 0; 1605fd3bf775SJohn Dyson } 16062244ea07SJohn Dyson aio_free_entry(cb); 16072244ea07SJohn Dyson return 0; 16082244ea07SJohn Dyson } 16092244ea07SJohn Dyson } 16102244ea07SJohn Dyson 161184af4da6SJohn Dyson s = splbio(); 161284af4da6SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_bufdone); 161384af4da6SJohn Dyson cb; 161484af4da6SJohn Dyson cb = ncb) { 161584af4da6SJohn Dyson ncb = TAILQ_NEXT(cb, plist); 161684af4da6SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 161784af4da6SJohn Dyson splx(s); 161884af4da6SJohn Dyson p->p_retval[0] = cb->uaiocb._aiocb_private.status; 161984af4da6SJohn Dyson aio_free_entry(cb); 162084af4da6SJohn Dyson return 0; 162184af4da6SJohn Dyson } 162284af4da6SJohn Dyson } 162384af4da6SJohn Dyson splx(s); 162484af4da6SJohn Dyson 1625fd3bf775SJohn Dyson #if DEBUGAIO > 0 1626fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 1627fd3bf775SJohn Dyson printf("(not found) status: %d, error: %d\n", 1628fd3bf775SJohn Dyson cb->uaiocb._aiocb_private.status, 1629fd3bf775SJohn Dyson cb->uaiocb._aiocb_private.error); 1630fd3bf775SJohn Dyson #endif 1631fd3bf775SJohn Dyson /* 16322244ea07SJohn Dyson status = fuword(&uap->aiocbp->_aiocb_private.status); 16332244ea07SJohn Dyson if (status == -1) 16342244ea07SJohn Dyson return 0; 1635fd3bf775SJohn Dyson */ 16362244ea07SJohn Dyson 16372244ea07SJohn Dyson return (EINVAL); 16382244ea07SJohn Dyson } 16392244ea07SJohn Dyson 16402244ea07SJohn Dyson /* 16412244ea07SJohn Dyson * Allow a process to wakeup when any of the I/O requests are 16422244ea07SJohn Dyson * completed. 16432244ea07SJohn Dyson */ 16442244ea07SJohn Dyson int 1645fd3bf775SJohn Dyson aio_suspend(struct proc *p, struct aio_suspend_args *uap) 1646fd3bf775SJohn Dyson { 16474a11ca4eSPoul-Henning Kamp struct timeval atv; 16482244ea07SJohn Dyson struct timespec ts; 16492244ea07SJohn Dyson struct aiocb *const *cbptr, *cbp; 16502244ea07SJohn Dyson struct kaioinfo *ki; 16512244ea07SJohn Dyson struct aiocblist *cb; 16522244ea07SJohn Dyson int i; 165384af4da6SJohn Dyson int njoblist; 16542244ea07SJohn Dyson int error, s, timo; 16552244ea07SJohn Dyson int *joblist; 16562244ea07SJohn Dyson 1657fd3bf775SJohn Dyson if (uap->nent >= AIO_LISTIO_MAX) 1658fd3bf775SJohn Dyson return EINVAL; 16592244ea07SJohn Dyson 16602244ea07SJohn Dyson timo = 0; 16612244ea07SJohn Dyson if (uap->timeout) { 16622244ea07SJohn Dyson /* 16632244ea07SJohn Dyson * Get timespec struct 16642244ea07SJohn Dyson */ 16652244ea07SJohn Dyson if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) { 16662244ea07SJohn Dyson return error; 16672244ea07SJohn Dyson } 16682244ea07SJohn Dyson 16692244ea07SJohn Dyson if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) 16702244ea07SJohn Dyson return (EINVAL); 16712244ea07SJohn Dyson 16722244ea07SJohn Dyson TIMESPEC_TO_TIMEVAL(&atv, &ts) 16732244ea07SJohn Dyson if (itimerfix(&atv)) 16742244ea07SJohn Dyson return (EINVAL); 16752244ea07SJohn Dyson /* 16762244ea07SJohn Dyson * XXX this is not as careful as settimeofday() about minimising 16772244ea07SJohn Dyson * interrupt latency. The hzto() interface is inconvenient as usual. 16782244ea07SJohn Dyson */ 16792244ea07SJohn Dyson s = splclock(); 16802244ea07SJohn Dyson timevaladd(&atv, &time); 16812244ea07SJohn Dyson timo = hzto(&atv); 16822244ea07SJohn Dyson splx(s); 16832244ea07SJohn Dyson if (timo == 0) 16842244ea07SJohn Dyson timo = 1; 16852244ea07SJohn Dyson } 16862244ea07SJohn Dyson 16872244ea07SJohn Dyson ki = p->p_aioinfo; 16882244ea07SJohn Dyson if (ki == NULL) 16892244ea07SJohn Dyson return EAGAIN; 16902244ea07SJohn Dyson 169184af4da6SJohn Dyson njoblist = 0; 1692fd3bf775SJohn Dyson joblist = zalloc(aiol_zone); 16932244ea07SJohn Dyson cbptr = uap->aiocbp; 16942244ea07SJohn Dyson 16952244ea07SJohn Dyson for(i = 0; i < uap->nent; i++) { 16962244ea07SJohn Dyson cbp = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 169784af4da6SJohn Dyson if (cbp == 0) 169884af4da6SJohn Dyson continue; 169984af4da6SJohn Dyson #if DEBUGAIO > 0 1700fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 17012244ea07SJohn Dyson printf("cbp: %x\n", cbp); 17022244ea07SJohn Dyson #endif 17032244ea07SJohn Dyson joblist[i] = fuword(&cbp->_aiocb_private.kernelinfo); 170484af4da6SJohn Dyson njoblist++; 17052244ea07SJohn Dyson } 170684af4da6SJohn Dyson if (njoblist == 0) 170784af4da6SJohn Dyson return 0; 17082244ea07SJohn Dyson 17092244ea07SJohn Dyson while (1) { 17102244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 171184af4da6SJohn Dyson cb; cb = TAILQ_NEXT(cb, plist)) { 171284af4da6SJohn Dyson for(i = 0; i < njoblist; i++) { 1713fd3bf775SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == 1714fd3bf775SJohn Dyson joblist[i]) { 171584af4da6SJohn Dyson zfree(aiol_zone, joblist); 171684af4da6SJohn Dyson return 0; 171784af4da6SJohn Dyson } 171884af4da6SJohn Dyson } 171984af4da6SJohn Dyson } 172084af4da6SJohn Dyson 172184af4da6SJohn Dyson s = splbio(); 172284af4da6SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_bufdone); 172384af4da6SJohn Dyson cb; cb = TAILQ_NEXT(cb, plist)) { 172484af4da6SJohn Dyson for(i = 0; i < njoblist; i++) { 172584af4da6SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == 172684af4da6SJohn Dyson joblist[i]) { 172784af4da6SJohn Dyson splx(s); 1728fd3bf775SJohn Dyson zfree(aiol_zone, joblist); 17292244ea07SJohn Dyson return 0; 17302244ea07SJohn Dyson } 17312244ea07SJohn Dyson } 17322244ea07SJohn Dyson } 17332244ea07SJohn Dyson 1734a624e84fSJohn Dyson #if DEBUGAIO > 0 1735fd3bf775SJohn Dyson if (DEBUGFLOW > 0) { 1736a624e84fSJohn Dyson printf("Suspend, timeout: %d clocks, jobs:", timo); 173784af4da6SJohn Dyson for(i=0;i<njoblist;i++) 1738a624e84fSJohn Dyson printf(" %d", joblist[i]); 1739a624e84fSJohn Dyson printf("\n"); 1740a624e84fSJohn Dyson } 1741a624e84fSJohn Dyson 1742fd3bf775SJohn Dyson if (DEBUGFLOW > 2) { 17432244ea07SJohn Dyson printf("Suspending -- waiting for all I/O's to complete: "); 174484af4da6SJohn Dyson for(i=0;i<njoblist;i++) 17452244ea07SJohn Dyson printf(" %d", joblist[i]); 17462244ea07SJohn Dyson printf("\n"); 1747a624e84fSJohn Dyson } 17482244ea07SJohn Dyson #endif 1749fd3bf775SJohn Dyson ki->kaio_flags |= KAIO_WAKEUP; 17502244ea07SJohn Dyson error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo); 175184af4da6SJohn Dyson splx(s); 17522244ea07SJohn Dyson 17532244ea07SJohn Dyson if (error == EINTR) { 1754a624e84fSJohn Dyson #if DEBUGAIO > 0 1755fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 17562244ea07SJohn Dyson printf(" signal\n"); 17572244ea07SJohn Dyson #endif 1758fd3bf775SJohn Dyson zfree(aiol_zone, joblist); 17592244ea07SJohn Dyson return EINTR; 17602244ea07SJohn Dyson } else if (error == EWOULDBLOCK) { 1761a624e84fSJohn Dyson #if DEBUGAIO > 0 1762fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 17632244ea07SJohn Dyson printf(" timeout\n"); 17642244ea07SJohn Dyson #endif 1765fd3bf775SJohn Dyson zfree(aiol_zone, joblist); 17662244ea07SJohn Dyson return EAGAIN; 17672244ea07SJohn Dyson } 1768a624e84fSJohn Dyson #if DEBUGAIO > 0 1769fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 17702244ea07SJohn Dyson printf("\n"); 17712244ea07SJohn Dyson #endif 17722244ea07SJohn Dyson } 17732244ea07SJohn Dyson 17742244ea07SJohn Dyson /* NOTREACHED */ 17752244ea07SJohn Dyson return EINVAL; 17762244ea07SJohn Dyson } 1777ee877a35SJohn Dyson 1778ee877a35SJohn Dyson /* 1779ee877a35SJohn Dyson * aio_cancel at the kernel level is a NOOP right now. It 1780ee877a35SJohn Dyson * might be possible to support it partially in user mode, or 1781ee877a35SJohn Dyson * in kernel mode later on. 1782ee877a35SJohn Dyson */ 1783ee877a35SJohn Dyson int 1784fd3bf775SJohn Dyson aio_cancel(struct proc *p, struct aio_cancel_args *uap) 1785fd3bf775SJohn Dyson { 1786ee877a35SJohn Dyson return AIO_NOTCANCELLED; 1787ee877a35SJohn Dyson } 1788ee877a35SJohn Dyson 1789ee877a35SJohn Dyson /* 1790ee877a35SJohn Dyson * aio_error is implemented in the kernel level for compatibility 1791ee877a35SJohn Dyson * purposes only. For a user mode async implementation, it would be 1792ee877a35SJohn Dyson * best to do it in a userland subroutine. 1793ee877a35SJohn Dyson */ 1794ee877a35SJohn Dyson int 1795fd3bf775SJohn Dyson aio_error(struct proc *p, struct aio_error_args *uap) 1796fd3bf775SJohn Dyson { 179784af4da6SJohn Dyson int s; 17982244ea07SJohn Dyson struct aiocblist *cb; 17992244ea07SJohn Dyson struct kaioinfo *ki; 18002244ea07SJohn Dyson int jobref; 1801fd3bf775SJohn Dyson int error, status; 1802ee877a35SJohn Dyson 18032244ea07SJohn Dyson ki = p->p_aioinfo; 18042244ea07SJohn Dyson if (ki == NULL) 18052244ea07SJohn Dyson return EINVAL; 18062244ea07SJohn Dyson 18072244ea07SJohn Dyson jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 1808fd3bf775SJohn Dyson if ((jobref == -1) || (jobref == 0)) 1809fd3bf775SJohn Dyson return EINVAL; 1810ee877a35SJohn Dyson 18112244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 18122244ea07SJohn Dyson cb; 18132244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 18142244ea07SJohn Dyson 18152244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1816cb226aaaSPoul-Henning Kamp p->p_retval[0] = cb->uaiocb._aiocb_private.error; 18172244ea07SJohn Dyson return 0; 18182244ea07SJohn Dyson } 1819ee877a35SJohn Dyson } 1820ee877a35SJohn Dyson 18212244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobqueue); 18222244ea07SJohn Dyson cb; 18232244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 18242244ea07SJohn Dyson 18252244ea07SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1826cb226aaaSPoul-Henning Kamp p->p_retval[0] = EINPROGRESS; 18272244ea07SJohn Dyson return 0; 18282244ea07SJohn Dyson } 18292244ea07SJohn Dyson } 18302244ea07SJohn Dyson 183184af4da6SJohn Dyson s = splbio(); 183284af4da6SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_bufdone); 183384af4da6SJohn Dyson cb; 183484af4da6SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 183584af4da6SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 183684af4da6SJohn Dyson p->p_retval[0] = cb->uaiocb._aiocb_private.error; 183784af4da6SJohn Dyson splx(s); 183884af4da6SJohn Dyson return 0; 183984af4da6SJohn Dyson } 184084af4da6SJohn Dyson } 184184af4da6SJohn Dyson 184284af4da6SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_bufqueue); 184384af4da6SJohn Dyson cb; 184484af4da6SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 184584af4da6SJohn Dyson 184684af4da6SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 184784af4da6SJohn Dyson p->p_retval[0] = EINPROGRESS; 184884af4da6SJohn Dyson splx(s); 184984af4da6SJohn Dyson return 0; 185084af4da6SJohn Dyson } 185184af4da6SJohn Dyson } 185284af4da6SJohn Dyson splx(s); 185384af4da6SJohn Dyson 185484af4da6SJohn Dyson 18552244ea07SJohn Dyson /* 18562244ea07SJohn Dyson * Hack for lio 18572244ea07SJohn Dyson */ 1858fd3bf775SJohn Dyson /* 18592244ea07SJohn Dyson status = fuword(&uap->aiocbp->_aiocb_private.status); 18602244ea07SJohn Dyson if (status == -1) { 18612244ea07SJohn Dyson return fuword(&uap->aiocbp->_aiocb_private.error); 18622244ea07SJohn Dyson } 1863fd3bf775SJohn Dyson */ 18642244ea07SJohn Dyson return EINVAL; 1865ee877a35SJohn Dyson } 1866ee877a35SJohn Dyson 1867ee877a35SJohn Dyson int 1868fd3bf775SJohn Dyson aio_read(struct proc *p, struct aio_read_args *uap) 1869fd3bf775SJohn Dyson { 1870ee877a35SJohn Dyson struct filedesc *fdp; 1871ee877a35SJohn Dyson struct file *fp; 1872ee877a35SJohn Dyson struct uio auio; 1873ee877a35SJohn Dyson struct iovec aiov; 1874ee877a35SJohn Dyson unsigned int fd; 1875ee877a35SJohn Dyson int cnt; 1876ee877a35SJohn Dyson struct aiocb iocb; 18772244ea07SJohn Dyson int error, pmodes; 1878ee877a35SJohn Dyson 18792244ea07SJohn Dyson pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 18802244ea07SJohn Dyson if ((pmodes & AIO_PMODE_SYNC) == 0) { 188184af4da6SJohn Dyson #if DEBUGAIO > 0 1882fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 1883a624e84fSJohn Dyson printf("queueing aio_read\n"); 1884a624e84fSJohn Dyson #endif 18852244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 18862244ea07SJohn Dyson } 1887ee877a35SJohn Dyson 1888ee877a35SJohn Dyson /* 1889ee877a35SJohn Dyson * Get control block 1890ee877a35SJohn Dyson */ 1891ee877a35SJohn Dyson if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1892ee877a35SJohn Dyson return error; 1893ee877a35SJohn Dyson 1894ee877a35SJohn Dyson /* 1895ee877a35SJohn Dyson * Get the fd info for process 1896ee877a35SJohn Dyson */ 1897ee877a35SJohn Dyson fdp = p->p_fd; 1898ee877a35SJohn Dyson 1899ee877a35SJohn Dyson /* 1900ee877a35SJohn Dyson * Range check file descriptor 1901ee877a35SJohn Dyson */ 1902ee877a35SJohn Dyson fd = iocb.aio_fildes; 1903ee877a35SJohn Dyson if (fd >= fdp->fd_nfiles) 1904ee877a35SJohn Dyson return EBADF; 1905ee877a35SJohn Dyson fp = fdp->fd_ofiles[fd]; 1906ee877a35SJohn Dyson if ((fp == NULL) || ((fp->f_flag & FREAD) == 0)) 1907ee877a35SJohn Dyson return EBADF; 19082244ea07SJohn Dyson if (iocb.aio_offset == -1LL) 1909ee877a35SJohn Dyson return EINVAL; 1910ee877a35SJohn Dyson 1911ee877a35SJohn Dyson auio.uio_resid = iocb.aio_nbytes; 1912ee877a35SJohn Dyson if (auio.uio_resid < 0) 1913ee877a35SJohn Dyson return (EINVAL); 1914ee877a35SJohn Dyson 19152244ea07SJohn Dyson /* 19162244ea07SJohn Dyson * Process sync simply -- queue async request. 19172244ea07SJohn Dyson */ 19182244ea07SJohn Dyson if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) { 19192244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 19202244ea07SJohn Dyson } 19212244ea07SJohn Dyson 19222244ea07SJohn Dyson aiov.iov_base = iocb.aio_buf; 19232244ea07SJohn Dyson aiov.iov_len = iocb.aio_nbytes; 19242244ea07SJohn Dyson 19252244ea07SJohn Dyson auio.uio_iov = &aiov; 19262244ea07SJohn Dyson auio.uio_iovcnt = 1; 19272244ea07SJohn Dyson auio.uio_offset = iocb.aio_offset; 1928ee877a35SJohn Dyson auio.uio_rw = UIO_READ; 1929ee877a35SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 1930ee877a35SJohn Dyson auio.uio_procp = p; 1931ee877a35SJohn Dyson 1932ee877a35SJohn Dyson cnt = iocb.aio_nbytes; 1933ee877a35SJohn Dyson error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 1934ee877a35SJohn Dyson if (error && 1935ee877a35SJohn Dyson (auio.uio_resid != cnt) && 1936ee877a35SJohn Dyson (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) 1937ee877a35SJohn Dyson error = 0; 1938ee877a35SJohn Dyson cnt -= auio.uio_resid; 1939cb226aaaSPoul-Henning Kamp p->p_retval[0] = cnt; 1940ee877a35SJohn Dyson return error; 1941ee877a35SJohn Dyson } 1942ee877a35SJohn Dyson 1943ee877a35SJohn Dyson int 1944fd3bf775SJohn Dyson aio_write(struct proc *p, struct aio_write_args *uap) 1945fd3bf775SJohn Dyson { 1946ee877a35SJohn Dyson struct filedesc *fdp; 1947ee877a35SJohn Dyson struct file *fp; 1948ee877a35SJohn Dyson struct uio auio; 1949ee877a35SJohn Dyson struct iovec aiov; 1950ee877a35SJohn Dyson unsigned int fd; 1951ee877a35SJohn Dyson int cnt; 1952ee877a35SJohn Dyson struct aiocb iocb; 1953ee877a35SJohn Dyson int error; 19542244ea07SJohn Dyson int pmodes; 19552244ea07SJohn Dyson 19562244ea07SJohn Dyson /* 19572244ea07SJohn Dyson * Process sync simply -- queue async request. 19582244ea07SJohn Dyson */ 19592244ea07SJohn Dyson pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 19602244ea07SJohn Dyson if ((pmodes & AIO_PMODE_SYNC) == 0) { 196184af4da6SJohn Dyson #if DEBUGAIO > 0 1962fd3bf775SJohn Dyson if (DEBUGFLOW > 2) 1963a624e84fSJohn Dyson printf("queing aio_write\n"); 1964a624e84fSJohn Dyson #endif 19652244ea07SJohn Dyson return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE); 19662244ea07SJohn Dyson } 1967ee877a35SJohn Dyson 1968ee877a35SJohn Dyson if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1969ee877a35SJohn Dyson return error; 1970ee877a35SJohn Dyson 1971ee877a35SJohn Dyson /* 1972ee877a35SJohn Dyson * Get the fd info for process 1973ee877a35SJohn Dyson */ 1974ee877a35SJohn Dyson fdp = p->p_fd; 1975ee877a35SJohn Dyson 1976ee877a35SJohn Dyson /* 1977ee877a35SJohn Dyson * Range check file descriptor 1978ee877a35SJohn Dyson */ 1979ee877a35SJohn Dyson fd = iocb.aio_fildes; 1980ee877a35SJohn Dyson if (fd >= fdp->fd_nfiles) 1981ee877a35SJohn Dyson return EBADF; 1982ee877a35SJohn Dyson fp = fdp->fd_ofiles[fd]; 1983ee877a35SJohn Dyson if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0)) 1984ee877a35SJohn Dyson return EBADF; 19852244ea07SJohn Dyson if (iocb.aio_offset == -1LL) 1986ee877a35SJohn Dyson return EINVAL; 1987ee877a35SJohn Dyson 1988ee877a35SJohn Dyson aiov.iov_base = iocb.aio_buf; 1989ee877a35SJohn Dyson aiov.iov_len = iocb.aio_nbytes; 1990ee877a35SJohn Dyson auio.uio_iov = &aiov; 1991ee877a35SJohn Dyson auio.uio_iovcnt = 1; 1992ee877a35SJohn Dyson auio.uio_offset = iocb.aio_offset; 1993ee877a35SJohn Dyson 1994ee877a35SJohn Dyson auio.uio_resid = iocb.aio_nbytes; 1995ee877a35SJohn Dyson if (auio.uio_resid < 0) 1996ee877a35SJohn Dyson return (EINVAL); 1997ee877a35SJohn Dyson 1998ee877a35SJohn Dyson auio.uio_rw = UIO_WRITE; 1999ee877a35SJohn Dyson auio.uio_segflg = UIO_USERSPACE; 2000ee877a35SJohn Dyson auio.uio_procp = p; 2001ee877a35SJohn Dyson 2002ee877a35SJohn Dyson cnt = iocb.aio_nbytes; 2003ee877a35SJohn Dyson error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 2004ee877a35SJohn Dyson if (error) { 2005ee877a35SJohn Dyson if (auio.uio_resid != cnt) { 2006ee877a35SJohn Dyson if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 2007ee877a35SJohn Dyson error = 0; 2008ee877a35SJohn Dyson if (error == EPIPE) 2009ee877a35SJohn Dyson psignal(p, SIGPIPE); 2010ee877a35SJohn Dyson } 2011ee877a35SJohn Dyson } 2012ee877a35SJohn Dyson cnt -= auio.uio_resid; 2013cb226aaaSPoul-Henning Kamp p->p_retval[0] = cnt; 2014ee877a35SJohn Dyson return error; 2015ee877a35SJohn Dyson } 2016ee877a35SJohn Dyson 2017ee877a35SJohn Dyson int 2018fd3bf775SJohn Dyson lio_listio(struct proc *p, struct lio_listio_args *uap) 2019fd3bf775SJohn Dyson { 20204a11ca4eSPoul-Henning Kamp int nent, nentqueued; 20212244ea07SJohn Dyson struct aiocb *iocb, * const *cbptr; 20222244ea07SJohn Dyson struct aiocblist *cb; 20232244ea07SJohn Dyson struct kaioinfo *ki; 202484af4da6SJohn Dyson struct aio_liojob *lj; 20252244ea07SJohn Dyson int error, runningcode; 2026fd3bf775SJohn Dyson int nerror; 2027ee877a35SJohn Dyson int i; 202884af4da6SJohn Dyson int s; 2029ee877a35SJohn Dyson 2030a624e84fSJohn Dyson if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) { 2031a624e84fSJohn Dyson #if DEBUGAIO > 0 2032fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 2033a624e84fSJohn Dyson printf("lio_listio: bad mode: %d\n", uap->mode); 2034a624e84fSJohn Dyson #endif 2035ee877a35SJohn Dyson return EINVAL; 2036a624e84fSJohn Dyson } 20372244ea07SJohn Dyson 20382244ea07SJohn Dyson nent = uap->nent; 2039a624e84fSJohn Dyson if (nent > AIO_LISTIO_MAX) { 2040a624e84fSJohn Dyson #if DEBUGAIO > 0 2041fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 2042fd3bf775SJohn Dyson printf("lio_listio: nent > AIO_LISTIO_MAX: %d > %d\n", 2043fd3bf775SJohn Dyson nent, AIO_LISTIO_MAX); 2044a624e84fSJohn Dyson #endif 20452244ea07SJohn Dyson return EINVAL; 2046a624e84fSJohn Dyson } 20472244ea07SJohn Dyson 20482244ea07SJohn Dyson if (p->p_aioinfo == NULL) { 20492244ea07SJohn Dyson aio_init_aioinfo(p); 20502244ea07SJohn Dyson } 20512244ea07SJohn Dyson 2052a624e84fSJohn Dyson if ((nent + num_queue_count) > max_queue_count) { 2053a624e84fSJohn Dyson #if DEBUGAIO > 0 2054fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 2055fd3bf775SJohn Dyson printf("lio_listio: (nent(%d) + num_queue_count(%d)) >" 2056fd3bf775SJohn Dyson " max_queue_count(%d)\n", 2057fd3bf775SJohn Dyson nent, num_queue_count, max_queue_count); 2058a624e84fSJohn Dyson #endif 20592244ea07SJohn Dyson return EAGAIN; 2060a624e84fSJohn Dyson } 20612244ea07SJohn Dyson 20622244ea07SJohn Dyson ki = p->p_aioinfo; 2063a624e84fSJohn Dyson if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) { 2064a624e84fSJohn Dyson #if DEBUGAIO > 0 2065fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 2066fd3bf775SJohn Dyson printf("lio_listio: (nent(%d) + ki->kaio_queue_count(%d)) >" 2067fd3bf775SJohn Dyson " ki->kaio_qallowed_count(%d)\n", 2068fd3bf775SJohn Dyson nent, ki->kaio_queue_count, ki->kaio_qallowed_count); 2069a624e84fSJohn Dyson #endif 20702244ea07SJohn Dyson return EAGAIN; 2071a624e84fSJohn Dyson } 20722244ea07SJohn Dyson 207384af4da6SJohn Dyson lj = zalloc(aiolio_zone); 207484af4da6SJohn Dyson if (!lj) { 207584af4da6SJohn Dyson return EAGAIN; 207684af4da6SJohn Dyson } 207784af4da6SJohn Dyson 207884af4da6SJohn Dyson lj->lioj_flags = 0; 207984af4da6SJohn Dyson lj->lioj_buffer_count = 0; 208084af4da6SJohn Dyson lj->lioj_buffer_finished_count = 0; 208184af4da6SJohn Dyson lj->lioj_queue_count = 0; 208284af4da6SJohn Dyson lj->lioj_queue_finished_count = 0; 208384af4da6SJohn Dyson lj->lioj_ki = ki; 208484af4da6SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list); 208584af4da6SJohn Dyson 208684af4da6SJohn Dyson /* 208784af4da6SJohn Dyson * Setup signal 208884af4da6SJohn Dyson */ 208984af4da6SJohn Dyson if (uap->sig && (uap->mode == LIO_NOWAIT)) { 209084af4da6SJohn Dyson error = copyin(uap->sig, &lj->lioj_signal, sizeof lj->lioj_signal); 209184af4da6SJohn Dyson if (error) 209284af4da6SJohn Dyson return error; 209384af4da6SJohn Dyson lj->lioj_flags |= LIOJ_SIGNAL; 209484af4da6SJohn Dyson lj->lioj_flags &= ~LIOJ_SIGNAL_POSTED; 209584af4da6SJohn Dyson } else { 209684af4da6SJohn Dyson lj->lioj_flags &= ~LIOJ_SIGNAL; 209784af4da6SJohn Dyson } 209884af4da6SJohn Dyson 20992244ea07SJohn Dyson /* 21002244ea07SJohn Dyson * get pointers to the list of I/O requests 21012244ea07SJohn Dyson */ 21022244ea07SJohn Dyson 2103fd3bf775SJohn Dyson nerror = 0; 2104fd3bf775SJohn Dyson nentqueued = 0; 21052244ea07SJohn Dyson cbptr = uap->acb_list; 21062244ea07SJohn Dyson for(i = 0; i < uap->nent; i++) { 21072244ea07SJohn Dyson iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 2108fd3bf775SJohn Dyson if (((int) iocb != -1) && ((int) iocb != NULL)) { 210984af4da6SJohn Dyson error = _aio_aqueue(p, iocb, lj, 0); 2110fd3bf775SJohn Dyson if (error == 0) { 21112244ea07SJohn Dyson nentqueued++; 2112fd3bf775SJohn Dyson } else { 2113fd3bf775SJohn Dyson nerror++; 211484af4da6SJohn Dyson #if DEBUGAIO > 0 211584af4da6SJohn Dyson if (DEBUGFLOW > 0) 2116fd3bf775SJohn Dyson printf("_aio_aqueue: error: %d\n", error); 211784af4da6SJohn Dyson #endif 2118fd3bf775SJohn Dyson } 2119fd3bf775SJohn Dyson } 21202244ea07SJohn Dyson } 21212244ea07SJohn Dyson 2122a624e84fSJohn Dyson /* 2123a624e84fSJohn Dyson * If we haven't queued any, then just return error 2124a624e84fSJohn Dyson */ 2125a624e84fSJohn Dyson if (nentqueued == 0) { 2126a624e84fSJohn Dyson #if DEBUGAIO > 0 2127fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 2128a624e84fSJohn Dyson printf("lio_listio: none queued\n"); 2129a624e84fSJohn Dyson #endif 2130fd3bf775SJohn Dyson return 0; 2131a624e84fSJohn Dyson } 21322244ea07SJohn Dyson 2133a624e84fSJohn Dyson #if DEBUGAIO > 0 2134fd3bf775SJohn Dyson if (DEBUGFLOW > 0) 2135a624e84fSJohn Dyson printf("lio_listio: %d queued\n", nentqueued); 2136a624e84fSJohn Dyson #endif 2137a624e84fSJohn Dyson 2138a624e84fSJohn Dyson /* 2139a624e84fSJohn Dyson * Calculate the appropriate error return 2140a624e84fSJohn Dyson */ 21412244ea07SJohn Dyson runningcode = 0; 2142fd3bf775SJohn Dyson if (nerror) 21432244ea07SJohn Dyson runningcode = EIO; 21442244ea07SJohn Dyson 21452244ea07SJohn Dyson if (uap->mode == LIO_WAIT) { 21462244ea07SJohn Dyson while (1) { 21472244ea07SJohn Dyson int found; 2148fd3bf775SJohn Dyson found = 0; 2149fd3bf775SJohn Dyson for(i = 0; i < uap->nent; i++) { 2150fd3bf775SJohn Dyson int jobref, command; 21512244ea07SJohn Dyson 2152a624e84fSJohn Dyson /* 2153a624e84fSJohn Dyson * Fetch address of the control buf pointer in user space 2154a624e84fSJohn Dyson */ 21552244ea07SJohn Dyson iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]); 2156fd3bf775SJohn Dyson if (((int) iocb == -1) || ((int) iocb == 0)) 2157fd3bf775SJohn Dyson continue; 2158a624e84fSJohn Dyson 2159a624e84fSJohn Dyson /* 2160a624e84fSJohn Dyson * Fetch the associated command from user space 2161a624e84fSJohn Dyson */ 21622244ea07SJohn Dyson command = fuword(&iocb->aio_lio_opcode); 2163fd3bf775SJohn Dyson if (command == LIO_NOP) { 2164fd3bf775SJohn Dyson found++; 21652244ea07SJohn Dyson continue; 2166fd3bf775SJohn Dyson } 2167a624e84fSJohn Dyson 21682244ea07SJohn Dyson jobref = fuword(&iocb->_aiocb_private.kernelinfo); 21692244ea07SJohn Dyson 21702244ea07SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 21712244ea07SJohn Dyson cb; 21722244ea07SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 2173fd3bf775SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == 2174fd3bf775SJohn Dyson jobref) { 217584af4da6SJohn Dyson if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 217684af4da6SJohn Dyson curproc->p_stats->p_ru.ru_oublock += 217784af4da6SJohn Dyson cb->outputcharge; 217884af4da6SJohn Dyson cb->outputcharge = 0; 217984af4da6SJohn Dyson } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 218084af4da6SJohn Dyson curproc->p_stats->p_ru.ru_inblock += 218184af4da6SJohn Dyson cb->inputcharge; 218284af4da6SJohn Dyson cb->inputcharge = 0; 218384af4da6SJohn Dyson } 21842244ea07SJohn Dyson found++; 21852244ea07SJohn Dyson break; 21862244ea07SJohn Dyson } 21872244ea07SJohn Dyson } 2188fd3bf775SJohn Dyson 218984af4da6SJohn Dyson s = splbio(); 219084af4da6SJohn Dyson for (cb = TAILQ_FIRST(&ki->kaio_bufdone); 219184af4da6SJohn Dyson cb; 219284af4da6SJohn Dyson cb = TAILQ_NEXT(cb, plist)) { 219384af4da6SJohn Dyson if (((int) cb->uaiocb._aiocb_private.kernelinfo) == 219484af4da6SJohn Dyson jobref) { 219584af4da6SJohn Dyson found++; 219684af4da6SJohn Dyson break; 2197fd3bf775SJohn Dyson } 21982244ea07SJohn Dyson } 219984af4da6SJohn Dyson splx(s); 220084af4da6SJohn Dyson 220184af4da6SJohn Dyson } 22022244ea07SJohn Dyson 2203a624e84fSJohn Dyson /* 2204a624e84fSJohn Dyson * If all I/Os have been disposed of, then we can return 2205a624e84fSJohn Dyson */ 2206fd3bf775SJohn Dyson if (found == nentqueued) { 22072244ea07SJohn Dyson return runningcode; 22082244ea07SJohn Dyson } 22092244ea07SJohn Dyson 2210fd3bf775SJohn Dyson ki->kaio_flags |= KAIO_WAKEUP; 22112244ea07SJohn Dyson error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0); 22122244ea07SJohn Dyson 22132244ea07SJohn Dyson if (error == EINTR) { 22142244ea07SJohn Dyson return EINTR; 22152244ea07SJohn Dyson } else if (error == EWOULDBLOCK) { 22162244ea07SJohn Dyson return EAGAIN; 22172244ea07SJohn Dyson } 22182244ea07SJohn Dyson 22192244ea07SJohn Dyson } 22202244ea07SJohn Dyson } 22212244ea07SJohn Dyson 22222244ea07SJohn Dyson return runningcode; 2223ee877a35SJohn Dyson } 2224fd3bf775SJohn Dyson 222584af4da6SJohn Dyson /* 222684af4da6SJohn Dyson * This is a wierd hack so that we can post a signal. It is safe 222784af4da6SJohn Dyson * to do so from a timeout routine, but *not* from an interrupt routine. 222884af4da6SJohn Dyson */ 222984af4da6SJohn Dyson static void 223084af4da6SJohn Dyson process_signal(void *ljarg) 223184af4da6SJohn Dyson { 223284af4da6SJohn Dyson struct aio_liojob *lj = ljarg; 223384af4da6SJohn Dyson if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL) { 223484af4da6SJohn Dyson if (lj->lioj_queue_count == lj->lioj_queue_finished_count) { 223584af4da6SJohn Dyson psignal(lj->lioj_ki->kaio_p, lj->lioj_signal.sigev_signo); 223684af4da6SJohn Dyson lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 223784af4da6SJohn Dyson } 223884af4da6SJohn Dyson } 223984af4da6SJohn Dyson } 224084af4da6SJohn Dyson 224184af4da6SJohn Dyson /* 224284af4da6SJohn Dyson * Interrupt handler for physio, performs the necessary process wakeups, 224384af4da6SJohn Dyson * and signals. 224484af4da6SJohn Dyson */ 2245fd3bf775SJohn Dyson static void 2246fd3bf775SJohn Dyson aio_physwakeup(bp) 2247fd3bf775SJohn Dyson struct buf *bp; 2248fd3bf775SJohn Dyson { 224984af4da6SJohn Dyson struct aiocblist *aiocbe; 2250fd3bf775SJohn Dyson struct proc *p; 2251fd3bf775SJohn Dyson struct kaioinfo *ki; 225284af4da6SJohn Dyson struct aio_liojob *lj; 2253fd3bf775SJohn Dyson 2254fd3bf775SJohn Dyson wakeup((caddr_t) bp); 2255fd3bf775SJohn Dyson bp->b_flags &= ~B_CALL; 225684af4da6SJohn Dyson bp->b_flags |= B_DONE; 2257fd3bf775SJohn Dyson 225884af4da6SJohn Dyson aiocbe = (struct aiocblist *)bp->b_spc; 225984af4da6SJohn Dyson if (aiocbe) { 226084af4da6SJohn Dyson p = bp->b_proc; 226184af4da6SJohn Dyson 226284af4da6SJohn Dyson aiocbe->jobstate = JOBST_JOBBFINISHED; 226384af4da6SJohn Dyson aiocbe->uaiocb._aiocb_private.status -= bp->b_resid; 226484af4da6SJohn Dyson aiocbe->uaiocb._aiocb_private.error = 0; 226584af4da6SJohn Dyson aiocbe->jobflags |= AIOCBLIST_DONE; 226684af4da6SJohn Dyson 226784af4da6SJohn Dyson if (bp->b_flags & B_ERROR) { 226884af4da6SJohn Dyson aiocbe->uaiocb._aiocb_private.error = bp->b_error; 226984af4da6SJohn Dyson } 227084af4da6SJohn Dyson 227184af4da6SJohn Dyson lj = aiocbe->lio; 227284af4da6SJohn Dyson if (lj) { 227384af4da6SJohn Dyson lj->lioj_buffer_finished_count++; 227484af4da6SJohn Dyson /* 227584af4da6SJohn Dyson * wakeup/signal if all of the interrupt jobs are done 227684af4da6SJohn Dyson */ 227784af4da6SJohn Dyson if (lj->lioj_buffer_finished_count == lj->lioj_buffer_count) { 227884af4da6SJohn Dyson /* 227984af4da6SJohn Dyson * post a signal if it is called for 228084af4da6SJohn Dyson */ 228184af4da6SJohn Dyson if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) == 228284af4da6SJohn Dyson LIOJ_SIGNAL) { 228384af4da6SJohn Dyson lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 228484af4da6SJohn Dyson timeout(process_signal, lj, 0); 228584af4da6SJohn Dyson } 228684af4da6SJohn Dyson } 228784af4da6SJohn Dyson } 228884af4da6SJohn Dyson 2289fd3bf775SJohn Dyson ki = p->p_aioinfo; 229084af4da6SJohn Dyson if (ki) { 229184af4da6SJohn Dyson ki->kaio_buffer_finished_count++; 229284af4da6SJohn Dyson TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 229384af4da6SJohn Dyson TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); 229484af4da6SJohn Dyson TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist); 229584af4da6SJohn Dyson /* 229684af4da6SJohn Dyson * and do the wakeup 229784af4da6SJohn Dyson */ 229884af4da6SJohn Dyson if (ki->kaio_flags & (KAIO_RUNDOWN|KAIO_WAKEUP)) { 2299fd3bf775SJohn Dyson ki->kaio_flags &= ~KAIO_WAKEUP; 2300fd3bf775SJohn Dyson wakeup(p); 2301fd3bf775SJohn Dyson } 2302fd3bf775SJohn Dyson } 2303fd3bf775SJohn Dyson } 230484af4da6SJohn Dyson } 2305