1 /*- 2 * Copyright (c) 1997 John S. Dyson. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. John S. Dyson's name may not be used to endorse or promote products 10 * derived from this software without specific prior written permission. 11 * 12 * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13 * bad that happens because of using this software isn't the responsibility 14 * of the author. This software is distributed AS-IS. 15 */ 16 17 /* 18 * This file contains support for the POSIX 1003.1B AIO/LIO facility. 19 */ 20 21 #include <sys/cdefs.h> 22 __FBSDID("$FreeBSD$"); 23 24 #include "opt_compat.h" 25 26 #include <sys/param.h> 27 #include <sys/systm.h> 28 #include <sys/malloc.h> 29 #include <sys/bio.h> 30 #include <sys/buf.h> 31 #include <sys/capsicum.h> 32 #include <sys/eventhandler.h> 33 #include <sys/sysproto.h> 34 #include <sys/filedesc.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/kthread.h> 38 #include <sys/fcntl.h> 39 #include <sys/file.h> 40 #include <sys/limits.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/unistd.h> 44 #include <sys/posix4.h> 45 #include <sys/proc.h> 46 #include <sys/resourcevar.h> 47 #include <sys/signalvar.h> 48 #include <sys/protosw.h> 49 #include <sys/rwlock.h> 50 #include <sys/sema.h> 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/syscall.h> 54 #include <sys/sysent.h> 55 #include <sys/sysctl.h> 56 #include <sys/sx.h> 57 #include <sys/taskqueue.h> 58 #include <sys/vnode.h> 59 #include <sys/conf.h> 60 #include <sys/event.h> 61 #include <sys/mount.h> 62 #include <geom/geom.h> 63 64 #include <machine/atomic.h> 65 66 #include <vm/vm.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_extern.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/uma.h> 73 #include <sys/aio.h> 74 75 /* 76 * Counter for allocating reference ids to new jobs. Wrapped to 1 on 77 * overflow. (XXX will be removed soon.) 78 */ 79 static u_long jobrefid; 80 81 /* 82 * Counter for aio_fsync. 83 */ 84 static uint64_t jobseqno; 85 86 #ifndef MAX_AIO_PER_PROC 87 #define MAX_AIO_PER_PROC 32 88 #endif 89 90 #ifndef MAX_AIO_QUEUE_PER_PROC 91 #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ 92 #endif 93 94 #ifndef MAX_AIO_QUEUE 95 #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ 96 #endif 97 98 #ifndef MAX_BUF_AIO 99 #define MAX_BUF_AIO 16 100 #endif 101 102 FEATURE(aio, "Asynchronous I/O"); 103 104 static MALLOC_DEFINE(M_LIO, "lio", "listio aio control block list"); 105 106 static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, 107 "Async IO management"); 108 109 static int enable_aio_unsafe = 0; 110 SYSCTL_INT(_vfs_aio, OID_AUTO, enable_unsafe, CTLFLAG_RW, &enable_aio_unsafe, 0, 111 "Permit asynchronous IO on all file types, not just known-safe types"); 112 113 static int max_aio_procs = MAX_AIO_PROCS; 114 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, CTLFLAG_RW, &max_aio_procs, 0, 115 "Maximum number of kernel processes to use for handling async IO "); 116 117 static int num_aio_procs = 0; 118 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, CTLFLAG_RD, &num_aio_procs, 0, 119 "Number of presently active kernel processes for async IO"); 120 121 /* 122 * The code will adjust the actual number of AIO processes towards this 123 * number when it gets a chance. 124 */ 125 static int target_aio_procs = TARGET_AIO_PROCS; 126 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs, 127 0, 128 "Preferred number of ready kernel processes for async IO"); 129 130 static int max_queue_count = MAX_AIO_QUEUE; 131 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0, 132 "Maximum number of aio requests to queue, globally"); 133 134 static int num_queue_count = 0; 135 SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, CTLFLAG_RD, &num_queue_count, 0, 136 "Number of queued aio requests"); 137 138 static int num_buf_aio = 0; 139 SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0, 140 "Number of aio requests presently handled by the buf subsystem"); 141 142 /* Number of async I/O processes in the process of being started */ 143 /* XXX This should be local to aio_aqueue() */ 144 static int num_aio_resv_start = 0; 145 146 static int aiod_lifetime; 147 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, CTLFLAG_RW, &aiod_lifetime, 0, 148 "Maximum lifetime for idle aiod"); 149 150 static int max_aio_per_proc = MAX_AIO_PER_PROC; 151 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, CTLFLAG_RW, &max_aio_per_proc, 152 0, 153 "Maximum active aio requests per process (stored in the process)"); 154 155 static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC; 156 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, CTLFLAG_RW, 157 &max_aio_queue_per_proc, 0, 158 "Maximum queued aio requests per process (stored in the process)"); 159 160 static int max_buf_aio = MAX_BUF_AIO; 161 SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, CTLFLAG_RW, &max_buf_aio, 0, 162 "Maximum buf aio requests per process (stored in the process)"); 163 164 #ifdef COMPAT_FREEBSD6 165 typedef struct oaiocb { 166 int aio_fildes; /* File descriptor */ 167 off_t aio_offset; /* File offset for I/O */ 168 volatile void *aio_buf; /* I/O buffer in process space */ 169 size_t aio_nbytes; /* Number of bytes for I/O */ 170 struct osigevent aio_sigevent; /* Signal to deliver */ 171 int aio_lio_opcode; /* LIO opcode */ 172 int aio_reqprio; /* Request priority -- ignored */ 173 struct __aiocb_private _aiocb_private; 174 } oaiocb_t; 175 #endif 176 177 /* 178 * Below is a key of locks used to protect each member of struct kaiocb 179 * aioliojob and kaioinfo and any backends. 180 * 181 * * - need not protected 182 * a - locked by kaioinfo lock 183 * b - locked by backend lock, the backend lock can be null in some cases, 184 * for example, BIO belongs to this type, in this case, proc lock is 185 * reused. 186 * c - locked by aio_job_mtx, the lock for the generic file I/O backend. 187 */ 188 189 /* 190 * If the routine that services an AIO request blocks while running in an 191 * AIO kernel process it can starve other I/O requests. BIO requests 192 * queued via aio_qphysio() complete in GEOM and do not use AIO kernel 193 * processes at all. Socket I/O requests use a separate pool of 194 * kprocs and also force non-blocking I/O. Other file I/O requests 195 * use the generic fo_read/fo_write operations which can block. The 196 * fsync and mlock operations can also block while executing. Ideally 197 * none of these requests would block while executing. 198 * 199 * Note that the service routines cannot toggle O_NONBLOCK in the file 200 * structure directly while handling a request due to races with 201 * userland threads. 202 */ 203 204 /* jobflags */ 205 #define KAIOCB_QUEUEING 0x01 206 #define KAIOCB_CANCELLED 0x02 207 #define KAIOCB_CANCELLING 0x04 208 #define KAIOCB_CHECKSYNC 0x08 209 #define KAIOCB_CLEARED 0x10 210 #define KAIOCB_FINISHED 0x20 211 212 /* 213 * AIO process info 214 */ 215 #define AIOP_FREE 0x1 /* proc on free queue */ 216 217 struct aioproc { 218 int aioprocflags; /* (c) AIO proc flags */ 219 TAILQ_ENTRY(aioproc) list; /* (c) list of processes */ 220 struct proc *aioproc; /* (*) the AIO proc */ 221 }; 222 223 /* 224 * data-structure for lio signal management 225 */ 226 struct aioliojob { 227 int lioj_flags; /* (a) listio flags */ 228 int lioj_count; /* (a) listio flags */ 229 int lioj_finished_count; /* (a) listio flags */ 230 struct sigevent lioj_signal; /* (a) signal on all I/O done */ 231 TAILQ_ENTRY(aioliojob) lioj_list; /* (a) lio list */ 232 struct knlist klist; /* (a) list of knotes */ 233 ksiginfo_t lioj_ksi; /* (a) Realtime signal info */ 234 }; 235 236 #define LIOJ_SIGNAL 0x1 /* signal on all done (lio) */ 237 #define LIOJ_SIGNAL_POSTED 0x2 /* signal has been posted */ 238 #define LIOJ_KEVENT_POSTED 0x4 /* kevent triggered */ 239 240 /* 241 * per process aio data structure 242 */ 243 struct kaioinfo { 244 struct mtx kaio_mtx; /* the lock to protect this struct */ 245 int kaio_flags; /* (a) per process kaio flags */ 246 int kaio_maxactive_count; /* (*) maximum number of AIOs */ 247 int kaio_active_count; /* (c) number of currently used AIOs */ 248 int kaio_qallowed_count; /* (*) maxiumu size of AIO queue */ 249 int kaio_count; /* (a) size of AIO queue */ 250 int kaio_ballowed_count; /* (*) maximum number of buffers */ 251 int kaio_buffer_count; /* (a) number of physio buffers */ 252 TAILQ_HEAD(,kaiocb) kaio_all; /* (a) all AIOs in a process */ 253 TAILQ_HEAD(,kaiocb) kaio_done; /* (a) done queue for process */ 254 TAILQ_HEAD(,aioliojob) kaio_liojoblist; /* (a) list of lio jobs */ 255 TAILQ_HEAD(,kaiocb) kaio_jobqueue; /* (a) job queue for process */ 256 TAILQ_HEAD(,kaiocb) kaio_syncqueue; /* (a) queue for aio_fsync */ 257 TAILQ_HEAD(,kaiocb) kaio_syncready; /* (a) second q for aio_fsync */ 258 struct task kaio_task; /* (*) task to kick aio processes */ 259 struct task kaio_sync_task; /* (*) task to schedule fsync jobs */ 260 }; 261 262 #define AIO_LOCK(ki) mtx_lock(&(ki)->kaio_mtx) 263 #define AIO_UNLOCK(ki) mtx_unlock(&(ki)->kaio_mtx) 264 #define AIO_LOCK_ASSERT(ki, f) mtx_assert(&(ki)->kaio_mtx, (f)) 265 #define AIO_MTX(ki) (&(ki)->kaio_mtx) 266 267 #define KAIO_RUNDOWN 0x1 /* process is being run down */ 268 #define KAIO_WAKEUP 0x2 /* wakeup process when AIO completes */ 269 270 /* 271 * Operations used to interact with userland aio control blocks. 272 * Different ABIs provide their own operations. 273 */ 274 struct aiocb_ops { 275 int (*copyin)(struct aiocb *ujob, struct aiocb *kjob); 276 long (*fetch_status)(struct aiocb *ujob); 277 long (*fetch_error)(struct aiocb *ujob); 278 int (*store_status)(struct aiocb *ujob, long status); 279 int (*store_error)(struct aiocb *ujob, long error); 280 int (*store_kernelinfo)(struct aiocb *ujob, long jobref); 281 int (*store_aiocb)(struct aiocb **ujobp, struct aiocb *ujob); 282 }; 283 284 static TAILQ_HEAD(,aioproc) aio_freeproc; /* (c) Idle daemons */ 285 static struct sema aio_newproc_sem; 286 static struct mtx aio_job_mtx; 287 static TAILQ_HEAD(,kaiocb) aio_jobs; /* (c) Async job list */ 288 static struct unrhdr *aiod_unr; 289 290 void aio_init_aioinfo(struct proc *p); 291 static int aio_onceonly(void); 292 static int aio_free_entry(struct kaiocb *job); 293 static void aio_process_rw(struct kaiocb *job); 294 static void aio_process_sync(struct kaiocb *job); 295 static void aio_process_mlock(struct kaiocb *job); 296 static void aio_schedule_fsync(void *context, int pending); 297 static int aio_newproc(int *); 298 int aio_aqueue(struct thread *td, struct aiocb *ujob, 299 struct aioliojob *lio, int type, struct aiocb_ops *ops); 300 static int aio_queue_file(struct file *fp, struct kaiocb *job); 301 static void aio_physwakeup(struct bio *bp); 302 static void aio_proc_rundown(void *arg, struct proc *p); 303 static void aio_proc_rundown_exec(void *arg, struct proc *p, 304 struct image_params *imgp); 305 static int aio_qphysio(struct proc *p, struct kaiocb *job); 306 static void aio_daemon(void *param); 307 static void aio_bio_done_notify(struct proc *userp, struct kaiocb *job); 308 static int aio_kick(struct proc *userp); 309 static void aio_kick_nowait(struct proc *userp); 310 static void aio_kick_helper(void *context, int pending); 311 static int filt_aioattach(struct knote *kn); 312 static void filt_aiodetach(struct knote *kn); 313 static int filt_aio(struct knote *kn, long hint); 314 static int filt_lioattach(struct knote *kn); 315 static void filt_liodetach(struct knote *kn); 316 static int filt_lio(struct knote *kn, long hint); 317 318 /* 319 * Zones for: 320 * kaio Per process async io info 321 * aiop async io process data 322 * aiocb async io jobs 323 * aiol list io job pointer - internal to aio_suspend XXX 324 * aiolio list io jobs 325 */ 326 static uma_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone, aiolio_zone; 327 328 /* kqueue filters for aio */ 329 static struct filterops aio_filtops = { 330 .f_isfd = 0, 331 .f_attach = filt_aioattach, 332 .f_detach = filt_aiodetach, 333 .f_event = filt_aio, 334 }; 335 static struct filterops lio_filtops = { 336 .f_isfd = 0, 337 .f_attach = filt_lioattach, 338 .f_detach = filt_liodetach, 339 .f_event = filt_lio 340 }; 341 342 static eventhandler_tag exit_tag, exec_tag; 343 344 TASKQUEUE_DEFINE_THREAD(aiod_kick); 345 346 /* 347 * Main operations function for use as a kernel module. 348 */ 349 static int 350 aio_modload(struct module *module, int cmd, void *arg) 351 { 352 int error = 0; 353 354 switch (cmd) { 355 case MOD_LOAD: 356 aio_onceonly(); 357 break; 358 case MOD_SHUTDOWN: 359 break; 360 default: 361 error = EOPNOTSUPP; 362 break; 363 } 364 return (error); 365 } 366 367 static moduledata_t aio_mod = { 368 "aio", 369 &aio_modload, 370 NULL 371 }; 372 373 DECLARE_MODULE(aio, aio_mod, SI_SUB_VFS, SI_ORDER_ANY); 374 MODULE_VERSION(aio, 1); 375 376 /* 377 * Startup initialization 378 */ 379 static int 380 aio_onceonly(void) 381 { 382 383 exit_tag = EVENTHANDLER_REGISTER(process_exit, aio_proc_rundown, NULL, 384 EVENTHANDLER_PRI_ANY); 385 exec_tag = EVENTHANDLER_REGISTER(process_exec, aio_proc_rundown_exec, 386 NULL, EVENTHANDLER_PRI_ANY); 387 kqueue_add_filteropts(EVFILT_AIO, &aio_filtops); 388 kqueue_add_filteropts(EVFILT_LIO, &lio_filtops); 389 TAILQ_INIT(&aio_freeproc); 390 sema_init(&aio_newproc_sem, 0, "aio_new_proc"); 391 mtx_init(&aio_job_mtx, "aio_job", NULL, MTX_DEF); 392 TAILQ_INIT(&aio_jobs); 393 aiod_unr = new_unrhdr(1, INT_MAX, NULL); 394 kaio_zone = uma_zcreate("AIO", sizeof(struct kaioinfo), NULL, NULL, 395 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 396 aiop_zone = uma_zcreate("AIOP", sizeof(struct aioproc), NULL, 397 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 398 aiocb_zone = uma_zcreate("AIOCB", sizeof(struct kaiocb), NULL, NULL, 399 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 400 aiol_zone = uma_zcreate("AIOL", AIO_LISTIO_MAX*sizeof(intptr_t) , NULL, 401 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 402 aiolio_zone = uma_zcreate("AIOLIO", sizeof(struct aioliojob), NULL, 403 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 404 aiod_lifetime = AIOD_LIFETIME_DEFAULT; 405 jobrefid = 1; 406 p31b_setcfg(CTL_P1003_1B_ASYNCHRONOUS_IO, _POSIX_ASYNCHRONOUS_IO); 407 p31b_setcfg(CTL_P1003_1B_AIO_LISTIO_MAX, AIO_LISTIO_MAX); 408 p31b_setcfg(CTL_P1003_1B_AIO_MAX, MAX_AIO_QUEUE); 409 p31b_setcfg(CTL_P1003_1B_AIO_PRIO_DELTA_MAX, 0); 410 411 return (0); 412 } 413 414 /* 415 * Init the per-process aioinfo structure. The aioinfo limits are set 416 * per-process for user limit (resource) management. 417 */ 418 void 419 aio_init_aioinfo(struct proc *p) 420 { 421 struct kaioinfo *ki; 422 423 ki = uma_zalloc(kaio_zone, M_WAITOK); 424 mtx_init(&ki->kaio_mtx, "aiomtx", NULL, MTX_DEF | MTX_NEW); 425 ki->kaio_flags = 0; 426 ki->kaio_maxactive_count = max_aio_per_proc; 427 ki->kaio_active_count = 0; 428 ki->kaio_qallowed_count = max_aio_queue_per_proc; 429 ki->kaio_count = 0; 430 ki->kaio_ballowed_count = max_buf_aio; 431 ki->kaio_buffer_count = 0; 432 TAILQ_INIT(&ki->kaio_all); 433 TAILQ_INIT(&ki->kaio_done); 434 TAILQ_INIT(&ki->kaio_jobqueue); 435 TAILQ_INIT(&ki->kaio_liojoblist); 436 TAILQ_INIT(&ki->kaio_syncqueue); 437 TAILQ_INIT(&ki->kaio_syncready); 438 TASK_INIT(&ki->kaio_task, 0, aio_kick_helper, p); 439 TASK_INIT(&ki->kaio_sync_task, 0, aio_schedule_fsync, ki); 440 PROC_LOCK(p); 441 if (p->p_aioinfo == NULL) { 442 p->p_aioinfo = ki; 443 PROC_UNLOCK(p); 444 } else { 445 PROC_UNLOCK(p); 446 mtx_destroy(&ki->kaio_mtx); 447 uma_zfree(kaio_zone, ki); 448 } 449 450 while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) 451 aio_newproc(NULL); 452 } 453 454 static int 455 aio_sendsig(struct proc *p, struct sigevent *sigev, ksiginfo_t *ksi) 456 { 457 struct thread *td; 458 int error; 459 460 error = sigev_findtd(p, sigev, &td); 461 if (error) 462 return (error); 463 if (!KSI_ONQ(ksi)) { 464 ksiginfo_set_sigev(ksi, sigev); 465 ksi->ksi_code = SI_ASYNCIO; 466 ksi->ksi_flags |= KSI_EXT | KSI_INS; 467 tdsendsignal(p, td, ksi->ksi_signo, ksi); 468 } 469 PROC_UNLOCK(p); 470 return (error); 471 } 472 473 /* 474 * Free a job entry. Wait for completion if it is currently active, but don't 475 * delay forever. If we delay, we return a flag that says that we have to 476 * restart the queue scan. 477 */ 478 static int 479 aio_free_entry(struct kaiocb *job) 480 { 481 struct kaioinfo *ki; 482 struct aioliojob *lj; 483 struct proc *p; 484 485 p = job->userproc; 486 MPASS(curproc == p); 487 ki = p->p_aioinfo; 488 MPASS(ki != NULL); 489 490 AIO_LOCK_ASSERT(ki, MA_OWNED); 491 MPASS(job->jobflags & KAIOCB_FINISHED); 492 493 atomic_subtract_int(&num_queue_count, 1); 494 495 ki->kaio_count--; 496 MPASS(ki->kaio_count >= 0); 497 498 TAILQ_REMOVE(&ki->kaio_done, job, plist); 499 TAILQ_REMOVE(&ki->kaio_all, job, allist); 500 501 lj = job->lio; 502 if (lj) { 503 lj->lioj_count--; 504 lj->lioj_finished_count--; 505 506 if (lj->lioj_count == 0) { 507 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 508 /* lio is going away, we need to destroy any knotes */ 509 knlist_delete(&lj->klist, curthread, 1); 510 PROC_LOCK(p); 511 sigqueue_take(&lj->lioj_ksi); 512 PROC_UNLOCK(p); 513 uma_zfree(aiolio_zone, lj); 514 } 515 } 516 517 /* job is going away, we need to destroy any knotes */ 518 knlist_delete(&job->klist, curthread, 1); 519 PROC_LOCK(p); 520 sigqueue_take(&job->ksi); 521 PROC_UNLOCK(p); 522 523 AIO_UNLOCK(ki); 524 525 /* 526 * The thread argument here is used to find the owning process 527 * and is also passed to fo_close() which may pass it to various 528 * places such as devsw close() routines. Because of that, we 529 * need a thread pointer from the process owning the job that is 530 * persistent and won't disappear out from under us or move to 531 * another process. 532 * 533 * Currently, all the callers of this function call it to remove 534 * a kaiocb from the current process' job list either via a 535 * syscall or due to the current process calling exit() or 536 * execve(). Thus, we know that p == curproc. We also know that 537 * curthread can't exit since we are curthread. 538 * 539 * Therefore, we use curthread as the thread to pass to 540 * knlist_delete(). This does mean that it is possible for the 541 * thread pointer at close time to differ from the thread pointer 542 * at open time, but this is already true of file descriptors in 543 * a multithreaded process. 544 */ 545 if (job->fd_file) 546 fdrop(job->fd_file, curthread); 547 crfree(job->cred); 548 uma_zfree(aiocb_zone, job); 549 AIO_LOCK(ki); 550 551 return (0); 552 } 553 554 static void 555 aio_proc_rundown_exec(void *arg, struct proc *p, 556 struct image_params *imgp __unused) 557 { 558 aio_proc_rundown(arg, p); 559 } 560 561 static int 562 aio_cancel_job(struct proc *p, struct kaioinfo *ki, struct kaiocb *job) 563 { 564 aio_cancel_fn_t *func; 565 int cancelled; 566 567 AIO_LOCK_ASSERT(ki, MA_OWNED); 568 if (job->jobflags & (KAIOCB_CANCELLED | KAIOCB_FINISHED)) 569 return (0); 570 MPASS((job->jobflags & KAIOCB_CANCELLING) == 0); 571 job->jobflags |= KAIOCB_CANCELLED; 572 573 func = job->cancel_fn; 574 575 /* 576 * If there is no cancel routine, just leave the job marked as 577 * cancelled. The job should be in active use by a caller who 578 * should complete it normally or when it fails to install a 579 * cancel routine. 580 */ 581 if (func == NULL) 582 return (0); 583 584 /* 585 * Set the CANCELLING flag so that aio_complete() will defer 586 * completions of this job. This prevents the job from being 587 * freed out from under the cancel callback. After the 588 * callback any deferred completion (whether from the callback 589 * or any other source) will be completed. 590 */ 591 job->jobflags |= KAIOCB_CANCELLING; 592 AIO_UNLOCK(ki); 593 func(job); 594 AIO_LOCK(ki); 595 job->jobflags &= ~KAIOCB_CANCELLING; 596 if (job->jobflags & KAIOCB_FINISHED) { 597 cancelled = job->uaiocb._aiocb_private.error == ECANCELED; 598 TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist); 599 aio_bio_done_notify(p, job); 600 } else { 601 /* 602 * The cancel callback might have scheduled an 603 * operation to cancel this request, but it is 604 * only counted as cancelled if the request is 605 * cancelled when the callback returns. 606 */ 607 cancelled = 0; 608 } 609 return (cancelled); 610 } 611 612 /* 613 * Rundown the jobs for a given process. 614 */ 615 static void 616 aio_proc_rundown(void *arg, struct proc *p) 617 { 618 struct kaioinfo *ki; 619 struct aioliojob *lj; 620 struct kaiocb *job, *jobn; 621 622 KASSERT(curthread->td_proc == p, 623 ("%s: called on non-curproc", __func__)); 624 ki = p->p_aioinfo; 625 if (ki == NULL) 626 return; 627 628 AIO_LOCK(ki); 629 ki->kaio_flags |= KAIO_RUNDOWN; 630 631 restart: 632 633 /* 634 * Try to cancel all pending requests. This code simulates 635 * aio_cancel on all pending I/O requests. 636 */ 637 TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) { 638 aio_cancel_job(p, ki, job); 639 } 640 641 /* Wait for all running I/O to be finished */ 642 if (TAILQ_FIRST(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) { 643 ki->kaio_flags |= KAIO_WAKEUP; 644 msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz); 645 goto restart; 646 } 647 648 /* Free all completed I/O requests. */ 649 while ((job = TAILQ_FIRST(&ki->kaio_done)) != NULL) 650 aio_free_entry(job); 651 652 while ((lj = TAILQ_FIRST(&ki->kaio_liojoblist)) != NULL) { 653 if (lj->lioj_count == 0) { 654 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 655 knlist_delete(&lj->klist, curthread, 1); 656 PROC_LOCK(p); 657 sigqueue_take(&lj->lioj_ksi); 658 PROC_UNLOCK(p); 659 uma_zfree(aiolio_zone, lj); 660 } else { 661 panic("LIO job not cleaned up: C:%d, FC:%d\n", 662 lj->lioj_count, lj->lioj_finished_count); 663 } 664 } 665 AIO_UNLOCK(ki); 666 taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_task); 667 taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_sync_task); 668 mtx_destroy(&ki->kaio_mtx); 669 uma_zfree(kaio_zone, ki); 670 p->p_aioinfo = NULL; 671 } 672 673 /* 674 * Select a job to run (called by an AIO daemon). 675 */ 676 static struct kaiocb * 677 aio_selectjob(struct aioproc *aiop) 678 { 679 struct kaiocb *job; 680 struct kaioinfo *ki; 681 struct proc *userp; 682 683 mtx_assert(&aio_job_mtx, MA_OWNED); 684 restart: 685 TAILQ_FOREACH(job, &aio_jobs, list) { 686 userp = job->userproc; 687 ki = userp->p_aioinfo; 688 689 if (ki->kaio_active_count < ki->kaio_maxactive_count) { 690 TAILQ_REMOVE(&aio_jobs, job, list); 691 if (!aio_clear_cancel_function(job)) 692 goto restart; 693 694 /* Account for currently active jobs. */ 695 ki->kaio_active_count++; 696 break; 697 } 698 } 699 return (job); 700 } 701 702 /* 703 * Move all data to a permanent storage device. This code 704 * simulates the fsync syscall. 705 */ 706 static int 707 aio_fsync_vnode(struct thread *td, struct vnode *vp) 708 { 709 struct mount *mp; 710 int error; 711 712 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 713 goto drop; 714 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 715 if (vp->v_object != NULL) { 716 VM_OBJECT_WLOCK(vp->v_object); 717 vm_object_page_clean(vp->v_object, 0, 0, 0); 718 VM_OBJECT_WUNLOCK(vp->v_object); 719 } 720 error = VOP_FSYNC(vp, MNT_WAIT, td); 721 722 VOP_UNLOCK(vp, 0); 723 vn_finished_write(mp); 724 drop: 725 return (error); 726 } 727 728 /* 729 * The AIO processing activity for LIO_READ/LIO_WRITE. This is the code that 730 * does the I/O request for the non-physio version of the operations. The 731 * normal vn operations are used, and this code should work in all instances 732 * for every type of file, including pipes, sockets, fifos, and regular files. 733 * 734 * XXX I don't think it works well for socket, pipe, and fifo. 735 */ 736 static void 737 aio_process_rw(struct kaiocb *job) 738 { 739 struct ucred *td_savedcred; 740 struct thread *td; 741 struct aiocb *cb; 742 struct file *fp; 743 struct uio auio; 744 struct iovec aiov; 745 ssize_t cnt; 746 int error; 747 int oublock_st, oublock_end; 748 int inblock_st, inblock_end; 749 750 KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ || 751 job->uaiocb.aio_lio_opcode == LIO_WRITE, 752 ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); 753 754 aio_switch_vmspace(job); 755 td = curthread; 756 td_savedcred = td->td_ucred; 757 td->td_ucred = job->cred; 758 cb = &job->uaiocb; 759 fp = job->fd_file; 760 761 aiov.iov_base = (void *)(uintptr_t)cb->aio_buf; 762 aiov.iov_len = cb->aio_nbytes; 763 764 auio.uio_iov = &aiov; 765 auio.uio_iovcnt = 1; 766 auio.uio_offset = cb->aio_offset; 767 auio.uio_resid = cb->aio_nbytes; 768 cnt = cb->aio_nbytes; 769 auio.uio_segflg = UIO_USERSPACE; 770 auio.uio_td = td; 771 772 inblock_st = td->td_ru.ru_inblock; 773 oublock_st = td->td_ru.ru_oublock; 774 /* 775 * aio_aqueue() acquires a reference to the file that is 776 * released in aio_free_entry(). 777 */ 778 if (cb->aio_lio_opcode == LIO_READ) { 779 auio.uio_rw = UIO_READ; 780 if (auio.uio_resid == 0) 781 error = 0; 782 else 783 error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td); 784 } else { 785 if (fp->f_type == DTYPE_VNODE) 786 bwillwrite(); 787 auio.uio_rw = UIO_WRITE; 788 error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); 789 } 790 inblock_end = td->td_ru.ru_inblock; 791 oublock_end = td->td_ru.ru_oublock; 792 793 job->inputcharge = inblock_end - inblock_st; 794 job->outputcharge = oublock_end - oublock_st; 795 796 if ((error) && (auio.uio_resid != cnt)) { 797 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 798 error = 0; 799 if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) { 800 PROC_LOCK(job->userproc); 801 kern_psignal(job->userproc, SIGPIPE); 802 PROC_UNLOCK(job->userproc); 803 } 804 } 805 806 cnt -= auio.uio_resid; 807 td->td_ucred = td_savedcred; 808 if (error) 809 aio_complete(job, -1, error); 810 else 811 aio_complete(job, cnt, 0); 812 } 813 814 static void 815 aio_process_sync(struct kaiocb *job) 816 { 817 struct thread *td = curthread; 818 struct ucred *td_savedcred = td->td_ucred; 819 struct file *fp = job->fd_file; 820 int error = 0; 821 822 KASSERT(job->uaiocb.aio_lio_opcode == LIO_SYNC, 823 ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); 824 825 td->td_ucred = job->cred; 826 if (fp->f_vnode != NULL) 827 error = aio_fsync_vnode(td, fp->f_vnode); 828 td->td_ucred = td_savedcred; 829 if (error) 830 aio_complete(job, -1, error); 831 else 832 aio_complete(job, 0, 0); 833 } 834 835 static void 836 aio_process_mlock(struct kaiocb *job) 837 { 838 struct aiocb *cb = &job->uaiocb; 839 int error; 840 841 KASSERT(job->uaiocb.aio_lio_opcode == LIO_MLOCK, 842 ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); 843 844 aio_switch_vmspace(job); 845 error = vm_mlock(job->userproc, job->cred, 846 __DEVOLATILE(void *, cb->aio_buf), cb->aio_nbytes); 847 if (error) 848 aio_complete(job, -1, error); 849 else 850 aio_complete(job, 0, 0); 851 } 852 853 static void 854 aio_bio_done_notify(struct proc *userp, struct kaiocb *job) 855 { 856 struct aioliojob *lj; 857 struct kaioinfo *ki; 858 struct kaiocb *sjob, *sjobn; 859 int lj_done; 860 bool schedule_fsync; 861 862 ki = userp->p_aioinfo; 863 AIO_LOCK_ASSERT(ki, MA_OWNED); 864 lj = job->lio; 865 lj_done = 0; 866 if (lj) { 867 lj->lioj_finished_count++; 868 if (lj->lioj_count == lj->lioj_finished_count) 869 lj_done = 1; 870 } 871 TAILQ_INSERT_TAIL(&ki->kaio_done, job, plist); 872 MPASS(job->jobflags & KAIOCB_FINISHED); 873 874 if (ki->kaio_flags & KAIO_RUNDOWN) 875 goto notification_done; 876 877 if (job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || 878 job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) 879 aio_sendsig(userp, &job->uaiocb.aio_sigevent, &job->ksi); 880 881 KNOTE_LOCKED(&job->klist, 1); 882 883 if (lj_done) { 884 if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { 885 lj->lioj_flags |= LIOJ_KEVENT_POSTED; 886 KNOTE_LOCKED(&lj->klist, 1); 887 } 888 if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) 889 == LIOJ_SIGNAL 890 && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || 891 lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) { 892 aio_sendsig(userp, &lj->lioj_signal, &lj->lioj_ksi); 893 lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 894 } 895 } 896 897 notification_done: 898 if (job->jobflags & KAIOCB_CHECKSYNC) { 899 schedule_fsync = false; 900 TAILQ_FOREACH_SAFE(sjob, &ki->kaio_syncqueue, list, sjobn) { 901 if (job->fd_file == sjob->fd_file && 902 job->seqno < sjob->seqno) { 903 if (--sjob->pending == 0) { 904 TAILQ_REMOVE(&ki->kaio_syncqueue, sjob, 905 list); 906 if (!aio_clear_cancel_function(sjob)) 907 continue; 908 TAILQ_INSERT_TAIL(&ki->kaio_syncready, 909 sjob, list); 910 schedule_fsync = true; 911 } 912 } 913 } 914 if (schedule_fsync) 915 taskqueue_enqueue(taskqueue_aiod_kick, 916 &ki->kaio_sync_task); 917 } 918 if (ki->kaio_flags & KAIO_WAKEUP) { 919 ki->kaio_flags &= ~KAIO_WAKEUP; 920 wakeup(&userp->p_aioinfo); 921 } 922 } 923 924 static void 925 aio_schedule_fsync(void *context, int pending) 926 { 927 struct kaioinfo *ki; 928 struct kaiocb *job; 929 930 ki = context; 931 AIO_LOCK(ki); 932 while (!TAILQ_EMPTY(&ki->kaio_syncready)) { 933 job = TAILQ_FIRST(&ki->kaio_syncready); 934 TAILQ_REMOVE(&ki->kaio_syncready, job, list); 935 AIO_UNLOCK(ki); 936 aio_schedule(job, aio_process_sync); 937 AIO_LOCK(ki); 938 } 939 AIO_UNLOCK(ki); 940 } 941 942 bool 943 aio_cancel_cleared(struct kaiocb *job) 944 { 945 struct kaioinfo *ki; 946 947 /* 948 * The caller should hold the same queue lock held when 949 * aio_clear_cancel_function() was called and set this flag 950 * ensuring this check sees an up-to-date value. However, 951 * there is no way to assert that. 952 */ 953 ki = job->userproc->p_aioinfo; 954 return ((job->jobflags & KAIOCB_CLEARED) != 0); 955 } 956 957 bool 958 aio_clear_cancel_function(struct kaiocb *job) 959 { 960 struct kaioinfo *ki; 961 962 ki = job->userproc->p_aioinfo; 963 AIO_LOCK(ki); 964 MPASS(job->cancel_fn != NULL); 965 if (job->jobflags & KAIOCB_CANCELLING) { 966 job->jobflags |= KAIOCB_CLEARED; 967 AIO_UNLOCK(ki); 968 return (false); 969 } 970 job->cancel_fn = NULL; 971 AIO_UNLOCK(ki); 972 return (true); 973 } 974 975 bool 976 aio_set_cancel_function(struct kaiocb *job, aio_cancel_fn_t *func) 977 { 978 struct kaioinfo *ki; 979 980 ki = job->userproc->p_aioinfo; 981 AIO_LOCK(ki); 982 if (job->jobflags & KAIOCB_CANCELLED) { 983 AIO_UNLOCK(ki); 984 return (false); 985 } 986 job->cancel_fn = func; 987 AIO_UNLOCK(ki); 988 return (true); 989 } 990 991 void 992 aio_complete(struct kaiocb *job, long status, int error) 993 { 994 struct kaioinfo *ki; 995 struct proc *userp; 996 997 job->uaiocb._aiocb_private.error = error; 998 job->uaiocb._aiocb_private.status = status; 999 1000 userp = job->userproc; 1001 ki = userp->p_aioinfo; 1002 1003 AIO_LOCK(ki); 1004 KASSERT(!(job->jobflags & KAIOCB_FINISHED), 1005 ("duplicate aio_complete")); 1006 job->jobflags |= KAIOCB_FINISHED; 1007 if ((job->jobflags & (KAIOCB_QUEUEING | KAIOCB_CANCELLING)) == 0) { 1008 TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist); 1009 aio_bio_done_notify(userp, job); 1010 } 1011 AIO_UNLOCK(ki); 1012 } 1013 1014 void 1015 aio_cancel(struct kaiocb *job) 1016 { 1017 1018 aio_complete(job, -1, ECANCELED); 1019 } 1020 1021 void 1022 aio_switch_vmspace(struct kaiocb *job) 1023 { 1024 1025 vmspace_switch_aio(job->userproc->p_vmspace); 1026 } 1027 1028 /* 1029 * The AIO daemon, most of the actual work is done in aio_process_*, 1030 * but the setup (and address space mgmt) is done in this routine. 1031 */ 1032 static void 1033 aio_daemon(void *_id) 1034 { 1035 struct kaiocb *job; 1036 struct aioproc *aiop; 1037 struct kaioinfo *ki; 1038 struct proc *p; 1039 struct vmspace *myvm; 1040 struct thread *td = curthread; 1041 int id = (intptr_t)_id; 1042 1043 /* 1044 * Grab an extra reference on the daemon's vmspace so that it 1045 * doesn't get freed by jobs that switch to a different 1046 * vmspace. 1047 */ 1048 p = td->td_proc; 1049 myvm = vmspace_acquire_ref(p); 1050 1051 KASSERT(p->p_textvp == NULL, ("kthread has a textvp")); 1052 1053 /* 1054 * Allocate and ready the aio control info. There is one aiop structure 1055 * per daemon. 1056 */ 1057 aiop = uma_zalloc(aiop_zone, M_WAITOK); 1058 aiop->aioproc = p; 1059 aiop->aioprocflags = 0; 1060 1061 /* 1062 * Wakeup parent process. (Parent sleeps to keep from blasting away 1063 * and creating too many daemons.) 1064 */ 1065 sema_post(&aio_newproc_sem); 1066 1067 mtx_lock(&aio_job_mtx); 1068 for (;;) { 1069 /* 1070 * Take daemon off of free queue 1071 */ 1072 if (aiop->aioprocflags & AIOP_FREE) { 1073 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1074 aiop->aioprocflags &= ~AIOP_FREE; 1075 } 1076 1077 /* 1078 * Check for jobs. 1079 */ 1080 while ((job = aio_selectjob(aiop)) != NULL) { 1081 mtx_unlock(&aio_job_mtx); 1082 1083 ki = job->userproc->p_aioinfo; 1084 job->handle_fn(job); 1085 1086 mtx_lock(&aio_job_mtx); 1087 /* Decrement the active job count. */ 1088 ki->kaio_active_count--; 1089 } 1090 1091 /* 1092 * Disconnect from user address space. 1093 */ 1094 if (p->p_vmspace != myvm) { 1095 mtx_unlock(&aio_job_mtx); 1096 vmspace_switch_aio(myvm); 1097 mtx_lock(&aio_job_mtx); 1098 /* 1099 * We have to restart to avoid race, we only sleep if 1100 * no job can be selected. 1101 */ 1102 continue; 1103 } 1104 1105 mtx_assert(&aio_job_mtx, MA_OWNED); 1106 1107 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 1108 aiop->aioprocflags |= AIOP_FREE; 1109 1110 /* 1111 * If daemon is inactive for a long time, allow it to exit, 1112 * thereby freeing resources. 1113 */ 1114 if (msleep(p, &aio_job_mtx, PRIBIO, "aiordy", 1115 aiod_lifetime) == EWOULDBLOCK && TAILQ_EMPTY(&aio_jobs) && 1116 (aiop->aioprocflags & AIOP_FREE) && 1117 num_aio_procs > target_aio_procs) 1118 break; 1119 } 1120 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1121 num_aio_procs--; 1122 mtx_unlock(&aio_job_mtx); 1123 uma_zfree(aiop_zone, aiop); 1124 free_unr(aiod_unr, id); 1125 vmspace_free(myvm); 1126 1127 KASSERT(p->p_vmspace == myvm, 1128 ("AIOD: bad vmspace for exiting daemon")); 1129 KASSERT(myvm->vm_refcnt > 1, 1130 ("AIOD: bad vm refcnt for exiting daemon: %d", myvm->vm_refcnt)); 1131 kproc_exit(0); 1132 } 1133 1134 /* 1135 * Create a new AIO daemon. This is mostly a kernel-thread fork routine. The 1136 * AIO daemon modifies its environment itself. 1137 */ 1138 static int 1139 aio_newproc(int *start) 1140 { 1141 int error; 1142 struct proc *p; 1143 int id; 1144 1145 id = alloc_unr(aiod_unr); 1146 error = kproc_create(aio_daemon, (void *)(intptr_t)id, &p, 1147 RFNOWAIT, 0, "aiod%d", id); 1148 if (error == 0) { 1149 /* 1150 * Wait until daemon is started. 1151 */ 1152 sema_wait(&aio_newproc_sem); 1153 mtx_lock(&aio_job_mtx); 1154 num_aio_procs++; 1155 if (start != NULL) 1156 (*start)--; 1157 mtx_unlock(&aio_job_mtx); 1158 } else { 1159 free_unr(aiod_unr, id); 1160 } 1161 return (error); 1162 } 1163 1164 /* 1165 * Try the high-performance, low-overhead physio method for eligible 1166 * VCHR devices. This method doesn't use an aio helper thread, and 1167 * thus has very low overhead. 1168 * 1169 * Assumes that the caller, aio_aqueue(), has incremented the file 1170 * structure's reference count, preventing its deallocation for the 1171 * duration of this call. 1172 */ 1173 static int 1174 aio_qphysio(struct proc *p, struct kaiocb *job) 1175 { 1176 struct aiocb *cb; 1177 struct file *fp; 1178 struct bio *bp; 1179 struct buf *pbuf; 1180 struct vnode *vp; 1181 struct cdevsw *csw; 1182 struct cdev *dev; 1183 struct kaioinfo *ki; 1184 int error, ref, poff; 1185 vm_prot_t prot; 1186 1187 cb = &job->uaiocb; 1188 fp = job->fd_file; 1189 1190 if (fp == NULL || fp->f_type != DTYPE_VNODE) 1191 return (-1); 1192 1193 vp = fp->f_vnode; 1194 if (vp->v_type != VCHR) 1195 return (-1); 1196 if (vp->v_bufobj.bo_bsize == 0) 1197 return (-1); 1198 if (cb->aio_nbytes % vp->v_bufobj.bo_bsize) 1199 return (-1); 1200 1201 ref = 0; 1202 csw = devvn_refthread(vp, &dev, &ref); 1203 if (csw == NULL) 1204 return (ENXIO); 1205 1206 if ((csw->d_flags & D_DISK) == 0) { 1207 error = -1; 1208 goto unref; 1209 } 1210 if (cb->aio_nbytes > dev->si_iosize_max) { 1211 error = -1; 1212 goto unref; 1213 } 1214 1215 ki = p->p_aioinfo; 1216 poff = (vm_offset_t)cb->aio_buf & PAGE_MASK; 1217 if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { 1218 if (cb->aio_nbytes > MAXPHYS) { 1219 error = -1; 1220 goto unref; 1221 } 1222 1223 pbuf = NULL; 1224 } else { 1225 if (cb->aio_nbytes > MAXPHYS - poff) { 1226 error = -1; 1227 goto unref; 1228 } 1229 if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) { 1230 error = -1; 1231 goto unref; 1232 } 1233 1234 job->pbuf = pbuf = (struct buf *)getpbuf(NULL); 1235 BUF_KERNPROC(pbuf); 1236 AIO_LOCK(ki); 1237 ki->kaio_buffer_count++; 1238 AIO_UNLOCK(ki); 1239 } 1240 job->bp = bp = g_alloc_bio(); 1241 1242 bp->bio_length = cb->aio_nbytes; 1243 bp->bio_bcount = cb->aio_nbytes; 1244 bp->bio_done = aio_physwakeup; 1245 bp->bio_data = (void *)(uintptr_t)cb->aio_buf; 1246 bp->bio_offset = cb->aio_offset; 1247 bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; 1248 bp->bio_dev = dev; 1249 bp->bio_caller1 = (void *)job; 1250 1251 prot = VM_PROT_READ; 1252 if (cb->aio_lio_opcode == LIO_READ) 1253 prot |= VM_PROT_WRITE; /* Less backwards than it looks */ 1254 job->npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 1255 (vm_offset_t)bp->bio_data, bp->bio_length, prot, job->pages, 1256 nitems(job->pages)); 1257 if (job->npages < 0) { 1258 error = EFAULT; 1259 goto doerror; 1260 } 1261 if (pbuf != NULL) { 1262 pmap_qenter((vm_offset_t)pbuf->b_data, 1263 job->pages, job->npages); 1264 bp->bio_data = pbuf->b_data + poff; 1265 atomic_add_int(&num_buf_aio, 1); 1266 } else { 1267 bp->bio_ma = job->pages; 1268 bp->bio_ma_n = job->npages; 1269 bp->bio_ma_offset = poff; 1270 bp->bio_data = unmapped_buf; 1271 bp->bio_flags |= BIO_UNMAPPED; 1272 } 1273 1274 /* Perform transfer. */ 1275 csw->d_strategy(bp); 1276 dev_relthread(dev, ref); 1277 return (0); 1278 1279 doerror: 1280 if (pbuf != NULL) { 1281 AIO_LOCK(ki); 1282 ki->kaio_buffer_count--; 1283 AIO_UNLOCK(ki); 1284 relpbuf(pbuf, NULL); 1285 job->pbuf = NULL; 1286 } 1287 g_destroy_bio(bp); 1288 job->bp = NULL; 1289 unref: 1290 dev_relthread(dev, ref); 1291 return (error); 1292 } 1293 1294 #ifdef COMPAT_FREEBSD6 1295 static int 1296 convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) 1297 { 1298 1299 /* 1300 * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are 1301 * supported by AIO with the old sigevent structure. 1302 */ 1303 nsig->sigev_notify = osig->sigev_notify; 1304 switch (nsig->sigev_notify) { 1305 case SIGEV_NONE: 1306 break; 1307 case SIGEV_SIGNAL: 1308 nsig->sigev_signo = osig->__sigev_u.__sigev_signo; 1309 break; 1310 case SIGEV_KEVENT: 1311 nsig->sigev_notify_kqueue = 1312 osig->__sigev_u.__sigev_notify_kqueue; 1313 nsig->sigev_value.sival_ptr = osig->sigev_value.sival_ptr; 1314 break; 1315 default: 1316 return (EINVAL); 1317 } 1318 return (0); 1319 } 1320 1321 static int 1322 aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) 1323 { 1324 struct oaiocb *ojob; 1325 int error; 1326 1327 bzero(kjob, sizeof(struct aiocb)); 1328 error = copyin(ujob, kjob, sizeof(struct oaiocb)); 1329 if (error) 1330 return (error); 1331 ojob = (struct oaiocb *)kjob; 1332 return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent)); 1333 } 1334 #endif 1335 1336 static int 1337 aiocb_copyin(struct aiocb *ujob, struct aiocb *kjob) 1338 { 1339 1340 return (copyin(ujob, kjob, sizeof(struct aiocb))); 1341 } 1342 1343 static long 1344 aiocb_fetch_status(struct aiocb *ujob) 1345 { 1346 1347 return (fuword(&ujob->_aiocb_private.status)); 1348 } 1349 1350 static long 1351 aiocb_fetch_error(struct aiocb *ujob) 1352 { 1353 1354 return (fuword(&ujob->_aiocb_private.error)); 1355 } 1356 1357 static int 1358 aiocb_store_status(struct aiocb *ujob, long status) 1359 { 1360 1361 return (suword(&ujob->_aiocb_private.status, status)); 1362 } 1363 1364 static int 1365 aiocb_store_error(struct aiocb *ujob, long error) 1366 { 1367 1368 return (suword(&ujob->_aiocb_private.error, error)); 1369 } 1370 1371 static int 1372 aiocb_store_kernelinfo(struct aiocb *ujob, long jobref) 1373 { 1374 1375 return (suword(&ujob->_aiocb_private.kernelinfo, jobref)); 1376 } 1377 1378 static int 1379 aiocb_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) 1380 { 1381 1382 return (suword(ujobp, (long)ujob)); 1383 } 1384 1385 static struct aiocb_ops aiocb_ops = { 1386 .copyin = aiocb_copyin, 1387 .fetch_status = aiocb_fetch_status, 1388 .fetch_error = aiocb_fetch_error, 1389 .store_status = aiocb_store_status, 1390 .store_error = aiocb_store_error, 1391 .store_kernelinfo = aiocb_store_kernelinfo, 1392 .store_aiocb = aiocb_store_aiocb, 1393 }; 1394 1395 #ifdef COMPAT_FREEBSD6 1396 static struct aiocb_ops aiocb_ops_osigevent = { 1397 .copyin = aiocb_copyin_old_sigevent, 1398 .fetch_status = aiocb_fetch_status, 1399 .fetch_error = aiocb_fetch_error, 1400 .store_status = aiocb_store_status, 1401 .store_error = aiocb_store_error, 1402 .store_kernelinfo = aiocb_store_kernelinfo, 1403 .store_aiocb = aiocb_store_aiocb, 1404 }; 1405 #endif 1406 1407 /* 1408 * Queue a new AIO request. Choosing either the threaded or direct physio VCHR 1409 * technique is done in this code. 1410 */ 1411 int 1412 aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, 1413 int type, struct aiocb_ops *ops) 1414 { 1415 struct proc *p = td->td_proc; 1416 cap_rights_t rights; 1417 struct file *fp; 1418 struct kaiocb *job; 1419 struct kaioinfo *ki; 1420 struct kevent kev; 1421 int opcode; 1422 int error; 1423 int fd, kqfd; 1424 int jid; 1425 u_short evflags; 1426 1427 if (p->p_aioinfo == NULL) 1428 aio_init_aioinfo(p); 1429 1430 ki = p->p_aioinfo; 1431 1432 ops->store_status(ujob, -1); 1433 ops->store_error(ujob, 0); 1434 ops->store_kernelinfo(ujob, -1); 1435 1436 if (num_queue_count >= max_queue_count || 1437 ki->kaio_count >= ki->kaio_qallowed_count) { 1438 ops->store_error(ujob, EAGAIN); 1439 return (EAGAIN); 1440 } 1441 1442 job = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO); 1443 knlist_init_mtx(&job->klist, AIO_MTX(ki)); 1444 1445 error = ops->copyin(ujob, &job->uaiocb); 1446 if (error) { 1447 ops->store_error(ujob, error); 1448 uma_zfree(aiocb_zone, job); 1449 return (error); 1450 } 1451 1452 if (job->uaiocb.aio_nbytes > IOSIZE_MAX) { 1453 uma_zfree(aiocb_zone, job); 1454 return (EINVAL); 1455 } 1456 1457 if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT && 1458 job->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL && 1459 job->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID && 1460 job->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) { 1461 ops->store_error(ujob, EINVAL); 1462 uma_zfree(aiocb_zone, job); 1463 return (EINVAL); 1464 } 1465 1466 if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || 1467 job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) && 1468 !_SIG_VALID(job->uaiocb.aio_sigevent.sigev_signo)) { 1469 uma_zfree(aiocb_zone, job); 1470 return (EINVAL); 1471 } 1472 1473 ksiginfo_init(&job->ksi); 1474 1475 /* Save userspace address of the job info. */ 1476 job->ujob = ujob; 1477 1478 /* Get the opcode. */ 1479 if (type != LIO_NOP) 1480 job->uaiocb.aio_lio_opcode = type; 1481 opcode = job->uaiocb.aio_lio_opcode; 1482 1483 /* 1484 * Validate the opcode and fetch the file object for the specified 1485 * file descriptor. 1486 * 1487 * XXXRW: Moved the opcode validation up here so that we don't 1488 * retrieve a file descriptor without knowing what the capabiltity 1489 * should be. 1490 */ 1491 fd = job->uaiocb.aio_fildes; 1492 switch (opcode) { 1493 case LIO_WRITE: 1494 error = fget_write(td, fd, 1495 cap_rights_init(&rights, CAP_PWRITE), &fp); 1496 break; 1497 case LIO_READ: 1498 error = fget_read(td, fd, 1499 cap_rights_init(&rights, CAP_PREAD), &fp); 1500 break; 1501 case LIO_SYNC: 1502 error = fget(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 1503 break; 1504 case LIO_MLOCK: 1505 fp = NULL; 1506 break; 1507 case LIO_NOP: 1508 error = fget(td, fd, cap_rights_init(&rights), &fp); 1509 break; 1510 default: 1511 error = EINVAL; 1512 } 1513 if (error) { 1514 uma_zfree(aiocb_zone, job); 1515 ops->store_error(ujob, error); 1516 return (error); 1517 } 1518 1519 if (opcode == LIO_SYNC && fp->f_vnode == NULL) { 1520 error = EINVAL; 1521 goto aqueue_fail; 1522 } 1523 1524 if (opcode != LIO_SYNC && job->uaiocb.aio_offset == -1LL) { 1525 error = EINVAL; 1526 goto aqueue_fail; 1527 } 1528 1529 job->fd_file = fp; 1530 1531 mtx_lock(&aio_job_mtx); 1532 jid = jobrefid++; 1533 job->seqno = jobseqno++; 1534 mtx_unlock(&aio_job_mtx); 1535 error = ops->store_kernelinfo(ujob, jid); 1536 if (error) { 1537 error = EINVAL; 1538 goto aqueue_fail; 1539 } 1540 job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid; 1541 1542 if (opcode == LIO_NOP) { 1543 fdrop(fp, td); 1544 uma_zfree(aiocb_zone, job); 1545 return (0); 1546 } 1547 1548 if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT) 1549 goto no_kqueue; 1550 evflags = job->uaiocb.aio_sigevent.sigev_notify_kevent_flags; 1551 if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) { 1552 error = EINVAL; 1553 goto aqueue_fail; 1554 } 1555 kqfd = job->uaiocb.aio_sigevent.sigev_notify_kqueue; 1556 kev.ident = (uintptr_t)job->ujob; 1557 kev.filter = EVFILT_AIO; 1558 kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | evflags; 1559 kev.data = (intptr_t)job; 1560 kev.udata = job->uaiocb.aio_sigevent.sigev_value.sival_ptr; 1561 error = kqfd_register(kqfd, &kev, td, 1); 1562 if (error) 1563 goto aqueue_fail; 1564 1565 no_kqueue: 1566 1567 ops->store_error(ujob, EINPROGRESS); 1568 job->uaiocb._aiocb_private.error = EINPROGRESS; 1569 job->userproc = p; 1570 job->cred = crhold(td->td_ucred); 1571 job->jobflags = KAIOCB_QUEUEING; 1572 job->lio = lj; 1573 1574 if (opcode == LIO_MLOCK) { 1575 aio_schedule(job, aio_process_mlock); 1576 error = 0; 1577 } else if (fp->f_ops->fo_aio_queue == NULL) 1578 error = aio_queue_file(fp, job); 1579 else 1580 error = fo_aio_queue(fp, job); 1581 if (error) 1582 goto aqueue_fail; 1583 1584 AIO_LOCK(ki); 1585 job->jobflags &= ~KAIOCB_QUEUEING; 1586 TAILQ_INSERT_TAIL(&ki->kaio_all, job, allist); 1587 ki->kaio_count++; 1588 if (lj) 1589 lj->lioj_count++; 1590 atomic_add_int(&num_queue_count, 1); 1591 if (job->jobflags & KAIOCB_FINISHED) { 1592 /* 1593 * The queue callback completed the request synchronously. 1594 * The bulk of the completion is deferred in that case 1595 * until this point. 1596 */ 1597 aio_bio_done_notify(p, job); 1598 } else 1599 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, job, plist); 1600 AIO_UNLOCK(ki); 1601 return (0); 1602 1603 aqueue_fail: 1604 knlist_delete(&job->klist, curthread, 0); 1605 if (fp) 1606 fdrop(fp, td); 1607 uma_zfree(aiocb_zone, job); 1608 ops->store_error(ujob, error); 1609 return (error); 1610 } 1611 1612 static void 1613 aio_cancel_daemon_job(struct kaiocb *job) 1614 { 1615 1616 mtx_lock(&aio_job_mtx); 1617 if (!aio_cancel_cleared(job)) 1618 TAILQ_REMOVE(&aio_jobs, job, list); 1619 mtx_unlock(&aio_job_mtx); 1620 aio_cancel(job); 1621 } 1622 1623 void 1624 aio_schedule(struct kaiocb *job, aio_handle_fn_t *func) 1625 { 1626 1627 mtx_lock(&aio_job_mtx); 1628 if (!aio_set_cancel_function(job, aio_cancel_daemon_job)) { 1629 mtx_unlock(&aio_job_mtx); 1630 aio_cancel(job); 1631 return; 1632 } 1633 job->handle_fn = func; 1634 TAILQ_INSERT_TAIL(&aio_jobs, job, list); 1635 aio_kick_nowait(job->userproc); 1636 mtx_unlock(&aio_job_mtx); 1637 } 1638 1639 static void 1640 aio_cancel_sync(struct kaiocb *job) 1641 { 1642 struct kaioinfo *ki; 1643 1644 ki = job->userproc->p_aioinfo; 1645 mtx_lock(&aio_job_mtx); 1646 if (!aio_cancel_cleared(job)) 1647 TAILQ_REMOVE(&ki->kaio_syncqueue, job, list); 1648 mtx_unlock(&aio_job_mtx); 1649 aio_cancel(job); 1650 } 1651 1652 int 1653 aio_queue_file(struct file *fp, struct kaiocb *job) 1654 { 1655 struct aioliojob *lj; 1656 struct kaioinfo *ki; 1657 struct kaiocb *job2; 1658 int error, opcode; 1659 1660 lj = job->lio; 1661 ki = job->userproc->p_aioinfo; 1662 opcode = job->uaiocb.aio_lio_opcode; 1663 if (opcode == LIO_SYNC) 1664 goto queueit; 1665 1666 if ((error = aio_qphysio(job->userproc, job)) == 0) 1667 goto done; 1668 #if 0 1669 /* 1670 * XXX: This means qphysio() failed with EFAULT. The current 1671 * behavior is to retry the operation via fo_read/fo_write. 1672 * Wouldn't it be better to just complete the request with an 1673 * error here? 1674 */ 1675 if (error > 0) 1676 goto done; 1677 #endif 1678 queueit: 1679 if (!enable_aio_unsafe) 1680 return (EOPNOTSUPP); 1681 1682 if (opcode == LIO_SYNC) { 1683 AIO_LOCK(ki); 1684 TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) { 1685 if (job2->fd_file == job->fd_file && 1686 job2->uaiocb.aio_lio_opcode != LIO_SYNC && 1687 job2->seqno < job->seqno) { 1688 job2->jobflags |= KAIOCB_CHECKSYNC; 1689 job->pending++; 1690 } 1691 } 1692 if (job->pending != 0) { 1693 if (!aio_set_cancel_function(job, aio_cancel_sync)) { 1694 AIO_UNLOCK(ki); 1695 aio_cancel(job); 1696 return (0); 1697 } 1698 TAILQ_INSERT_TAIL(&ki->kaio_syncqueue, job, list); 1699 AIO_UNLOCK(ki); 1700 return (0); 1701 } 1702 AIO_UNLOCK(ki); 1703 } 1704 1705 switch (opcode) { 1706 case LIO_READ: 1707 case LIO_WRITE: 1708 aio_schedule(job, aio_process_rw); 1709 error = 0; 1710 break; 1711 case LIO_SYNC: 1712 aio_schedule(job, aio_process_sync); 1713 error = 0; 1714 break; 1715 default: 1716 error = EINVAL; 1717 } 1718 done: 1719 return (error); 1720 } 1721 1722 static void 1723 aio_kick_nowait(struct proc *userp) 1724 { 1725 struct kaioinfo *ki = userp->p_aioinfo; 1726 struct aioproc *aiop; 1727 1728 mtx_assert(&aio_job_mtx, MA_OWNED); 1729 if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { 1730 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1731 aiop->aioprocflags &= ~AIOP_FREE; 1732 wakeup(aiop->aioproc); 1733 } else if (num_aio_resv_start + num_aio_procs < max_aio_procs && 1734 ki->kaio_active_count + num_aio_resv_start < 1735 ki->kaio_maxactive_count) { 1736 taskqueue_enqueue(taskqueue_aiod_kick, &ki->kaio_task); 1737 } 1738 } 1739 1740 static int 1741 aio_kick(struct proc *userp) 1742 { 1743 struct kaioinfo *ki = userp->p_aioinfo; 1744 struct aioproc *aiop; 1745 int error, ret = 0; 1746 1747 mtx_assert(&aio_job_mtx, MA_OWNED); 1748 retryproc: 1749 if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { 1750 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1751 aiop->aioprocflags &= ~AIOP_FREE; 1752 wakeup(aiop->aioproc); 1753 } else if (num_aio_resv_start + num_aio_procs < max_aio_procs && 1754 ki->kaio_active_count + num_aio_resv_start < 1755 ki->kaio_maxactive_count) { 1756 num_aio_resv_start++; 1757 mtx_unlock(&aio_job_mtx); 1758 error = aio_newproc(&num_aio_resv_start); 1759 mtx_lock(&aio_job_mtx); 1760 if (error) { 1761 num_aio_resv_start--; 1762 goto retryproc; 1763 } 1764 } else { 1765 ret = -1; 1766 } 1767 return (ret); 1768 } 1769 1770 static void 1771 aio_kick_helper(void *context, int pending) 1772 { 1773 struct proc *userp = context; 1774 1775 mtx_lock(&aio_job_mtx); 1776 while (--pending >= 0) { 1777 if (aio_kick(userp)) 1778 break; 1779 } 1780 mtx_unlock(&aio_job_mtx); 1781 } 1782 1783 /* 1784 * Support the aio_return system call, as a side-effect, kernel resources are 1785 * released. 1786 */ 1787 static int 1788 kern_aio_return(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops) 1789 { 1790 struct proc *p = td->td_proc; 1791 struct kaiocb *job; 1792 struct kaioinfo *ki; 1793 long status, error; 1794 1795 ki = p->p_aioinfo; 1796 if (ki == NULL) 1797 return (EINVAL); 1798 AIO_LOCK(ki); 1799 TAILQ_FOREACH(job, &ki->kaio_done, plist) { 1800 if (job->ujob == ujob) 1801 break; 1802 } 1803 if (job != NULL) { 1804 MPASS(job->jobflags & KAIOCB_FINISHED); 1805 status = job->uaiocb._aiocb_private.status; 1806 error = job->uaiocb._aiocb_private.error; 1807 td->td_retval[0] = status; 1808 if (job->uaiocb.aio_lio_opcode == LIO_WRITE) { 1809 td->td_ru.ru_oublock += job->outputcharge; 1810 job->outputcharge = 0; 1811 } else if (job->uaiocb.aio_lio_opcode == LIO_READ) { 1812 td->td_ru.ru_inblock += job->inputcharge; 1813 job->inputcharge = 0; 1814 } 1815 aio_free_entry(job); 1816 AIO_UNLOCK(ki); 1817 ops->store_error(ujob, error); 1818 ops->store_status(ujob, status); 1819 } else { 1820 error = EINVAL; 1821 AIO_UNLOCK(ki); 1822 } 1823 return (error); 1824 } 1825 1826 int 1827 sys_aio_return(struct thread *td, struct aio_return_args *uap) 1828 { 1829 1830 return (kern_aio_return(td, uap->aiocbp, &aiocb_ops)); 1831 } 1832 1833 /* 1834 * Allow a process to wakeup when any of the I/O requests are completed. 1835 */ 1836 static int 1837 kern_aio_suspend(struct thread *td, int njoblist, struct aiocb **ujoblist, 1838 struct timespec *ts) 1839 { 1840 struct proc *p = td->td_proc; 1841 struct timeval atv; 1842 struct kaioinfo *ki; 1843 struct kaiocb *firstjob, *job; 1844 int error, i, timo; 1845 1846 timo = 0; 1847 if (ts) { 1848 if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) 1849 return (EINVAL); 1850 1851 TIMESPEC_TO_TIMEVAL(&atv, ts); 1852 if (itimerfix(&atv)) 1853 return (EINVAL); 1854 timo = tvtohz(&atv); 1855 } 1856 1857 ki = p->p_aioinfo; 1858 if (ki == NULL) 1859 return (EAGAIN); 1860 1861 if (njoblist == 0) 1862 return (0); 1863 1864 AIO_LOCK(ki); 1865 for (;;) { 1866 firstjob = NULL; 1867 error = 0; 1868 TAILQ_FOREACH(job, &ki->kaio_all, allist) { 1869 for (i = 0; i < njoblist; i++) { 1870 if (job->ujob == ujoblist[i]) { 1871 if (firstjob == NULL) 1872 firstjob = job; 1873 if (job->jobflags & KAIOCB_FINISHED) 1874 goto RETURN; 1875 } 1876 } 1877 } 1878 /* All tasks were finished. */ 1879 if (firstjob == NULL) 1880 break; 1881 1882 ki->kaio_flags |= KAIO_WAKEUP; 1883 error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, 1884 "aiospn", timo); 1885 if (error == ERESTART) 1886 error = EINTR; 1887 if (error) 1888 break; 1889 } 1890 RETURN: 1891 AIO_UNLOCK(ki); 1892 return (error); 1893 } 1894 1895 int 1896 sys_aio_suspend(struct thread *td, struct aio_suspend_args *uap) 1897 { 1898 struct timespec ts, *tsp; 1899 struct aiocb **ujoblist; 1900 int error; 1901 1902 if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX) 1903 return (EINVAL); 1904 1905 if (uap->timeout) { 1906 /* Get timespec struct. */ 1907 if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0) 1908 return (error); 1909 tsp = &ts; 1910 } else 1911 tsp = NULL; 1912 1913 ujoblist = uma_zalloc(aiol_zone, M_WAITOK); 1914 error = copyin(uap->aiocbp, ujoblist, uap->nent * sizeof(ujoblist[0])); 1915 if (error == 0) 1916 error = kern_aio_suspend(td, uap->nent, ujoblist, tsp); 1917 uma_zfree(aiol_zone, ujoblist); 1918 return (error); 1919 } 1920 1921 /* 1922 * aio_cancel cancels any non-physio aio operations not currently in 1923 * progress. 1924 */ 1925 int 1926 sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap) 1927 { 1928 struct proc *p = td->td_proc; 1929 struct kaioinfo *ki; 1930 struct kaiocb *job, *jobn; 1931 struct file *fp; 1932 cap_rights_t rights; 1933 int error; 1934 int cancelled = 0; 1935 int notcancelled = 0; 1936 struct vnode *vp; 1937 1938 /* Lookup file object. */ 1939 error = fget(td, uap->fd, cap_rights_init(&rights), &fp); 1940 if (error) 1941 return (error); 1942 1943 ki = p->p_aioinfo; 1944 if (ki == NULL) 1945 goto done; 1946 1947 if (fp->f_type == DTYPE_VNODE) { 1948 vp = fp->f_vnode; 1949 if (vn_isdisk(vp, &error)) { 1950 fdrop(fp, td); 1951 td->td_retval[0] = AIO_NOTCANCELED; 1952 return (0); 1953 } 1954 } 1955 1956 AIO_LOCK(ki); 1957 TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) { 1958 if ((uap->fd == job->uaiocb.aio_fildes) && 1959 ((uap->aiocbp == NULL) || 1960 (uap->aiocbp == job->ujob))) { 1961 if (aio_cancel_job(p, ki, job)) { 1962 cancelled++; 1963 } else { 1964 notcancelled++; 1965 } 1966 if (uap->aiocbp != NULL) 1967 break; 1968 } 1969 } 1970 AIO_UNLOCK(ki); 1971 1972 done: 1973 fdrop(fp, td); 1974 1975 if (uap->aiocbp != NULL) { 1976 if (cancelled) { 1977 td->td_retval[0] = AIO_CANCELED; 1978 return (0); 1979 } 1980 } 1981 1982 if (notcancelled) { 1983 td->td_retval[0] = AIO_NOTCANCELED; 1984 return (0); 1985 } 1986 1987 if (cancelled) { 1988 td->td_retval[0] = AIO_CANCELED; 1989 return (0); 1990 } 1991 1992 td->td_retval[0] = AIO_ALLDONE; 1993 1994 return (0); 1995 } 1996 1997 /* 1998 * aio_error is implemented in the kernel level for compatibility purposes 1999 * only. For a user mode async implementation, it would be best to do it in 2000 * a userland subroutine. 2001 */ 2002 static int 2003 kern_aio_error(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops) 2004 { 2005 struct proc *p = td->td_proc; 2006 struct kaiocb *job; 2007 struct kaioinfo *ki; 2008 int status; 2009 2010 ki = p->p_aioinfo; 2011 if (ki == NULL) { 2012 td->td_retval[0] = EINVAL; 2013 return (0); 2014 } 2015 2016 AIO_LOCK(ki); 2017 TAILQ_FOREACH(job, &ki->kaio_all, allist) { 2018 if (job->ujob == ujob) { 2019 if (job->jobflags & KAIOCB_FINISHED) 2020 td->td_retval[0] = 2021 job->uaiocb._aiocb_private.error; 2022 else 2023 td->td_retval[0] = EINPROGRESS; 2024 AIO_UNLOCK(ki); 2025 return (0); 2026 } 2027 } 2028 AIO_UNLOCK(ki); 2029 2030 /* 2031 * Hack for failure of aio_aqueue. 2032 */ 2033 status = ops->fetch_status(ujob); 2034 if (status == -1) { 2035 td->td_retval[0] = ops->fetch_error(ujob); 2036 return (0); 2037 } 2038 2039 td->td_retval[0] = EINVAL; 2040 return (0); 2041 } 2042 2043 int 2044 sys_aio_error(struct thread *td, struct aio_error_args *uap) 2045 { 2046 2047 return (kern_aio_error(td, uap->aiocbp, &aiocb_ops)); 2048 } 2049 2050 /* syscall - asynchronous read from a file (REALTIME) */ 2051 #ifdef COMPAT_FREEBSD6 2052 int 2053 freebsd6_aio_read(struct thread *td, struct freebsd6_aio_read_args *uap) 2054 { 2055 2056 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, 2057 &aiocb_ops_osigevent)); 2058 } 2059 #endif 2060 2061 int 2062 sys_aio_read(struct thread *td, struct aio_read_args *uap) 2063 { 2064 2065 return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops)); 2066 } 2067 2068 /* syscall - asynchronous write to a file (REALTIME) */ 2069 #ifdef COMPAT_FREEBSD6 2070 int 2071 freebsd6_aio_write(struct thread *td, struct freebsd6_aio_write_args *uap) 2072 { 2073 2074 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, 2075 &aiocb_ops_osigevent)); 2076 } 2077 #endif 2078 2079 int 2080 sys_aio_write(struct thread *td, struct aio_write_args *uap) 2081 { 2082 2083 return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops)); 2084 } 2085 2086 int 2087 sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap) 2088 { 2089 2090 return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops)); 2091 } 2092 2093 static int 2094 kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, 2095 struct aiocb **acb_list, int nent, struct sigevent *sig, 2096 struct aiocb_ops *ops) 2097 { 2098 struct proc *p = td->td_proc; 2099 struct aiocb *job; 2100 struct kaioinfo *ki; 2101 struct aioliojob *lj; 2102 struct kevent kev; 2103 int error; 2104 int nerror; 2105 int i; 2106 2107 if ((mode != LIO_NOWAIT) && (mode != LIO_WAIT)) 2108 return (EINVAL); 2109 2110 if (nent < 0 || nent > AIO_LISTIO_MAX) 2111 return (EINVAL); 2112 2113 if (p->p_aioinfo == NULL) 2114 aio_init_aioinfo(p); 2115 2116 ki = p->p_aioinfo; 2117 2118 lj = uma_zalloc(aiolio_zone, M_WAITOK); 2119 lj->lioj_flags = 0; 2120 lj->lioj_count = 0; 2121 lj->lioj_finished_count = 0; 2122 knlist_init_mtx(&lj->klist, AIO_MTX(ki)); 2123 ksiginfo_init(&lj->lioj_ksi); 2124 2125 /* 2126 * Setup signal. 2127 */ 2128 if (sig && (mode == LIO_NOWAIT)) { 2129 bcopy(sig, &lj->lioj_signal, sizeof(lj->lioj_signal)); 2130 if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { 2131 /* Assume only new style KEVENT */ 2132 kev.filter = EVFILT_LIO; 2133 kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1; 2134 kev.ident = (uintptr_t)uacb_list; /* something unique */ 2135 kev.data = (intptr_t)lj; 2136 /* pass user defined sigval data */ 2137 kev.udata = lj->lioj_signal.sigev_value.sival_ptr; 2138 error = kqfd_register( 2139 lj->lioj_signal.sigev_notify_kqueue, &kev, td, 1); 2140 if (error) { 2141 uma_zfree(aiolio_zone, lj); 2142 return (error); 2143 } 2144 } else if (lj->lioj_signal.sigev_notify == SIGEV_NONE) { 2145 ; 2146 } else if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || 2147 lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID) { 2148 if (!_SIG_VALID(lj->lioj_signal.sigev_signo)) { 2149 uma_zfree(aiolio_zone, lj); 2150 return EINVAL; 2151 } 2152 lj->lioj_flags |= LIOJ_SIGNAL; 2153 } else { 2154 uma_zfree(aiolio_zone, lj); 2155 return EINVAL; 2156 } 2157 } 2158 2159 AIO_LOCK(ki); 2160 TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list); 2161 /* 2162 * Add extra aiocb count to avoid the lio to be freed 2163 * by other threads doing aio_waitcomplete or aio_return, 2164 * and prevent event from being sent until we have queued 2165 * all tasks. 2166 */ 2167 lj->lioj_count = 1; 2168 AIO_UNLOCK(ki); 2169 2170 /* 2171 * Get pointers to the list of I/O requests. 2172 */ 2173 nerror = 0; 2174 for (i = 0; i < nent; i++) { 2175 job = acb_list[i]; 2176 if (job != NULL) { 2177 error = aio_aqueue(td, job, lj, LIO_NOP, ops); 2178 if (error != 0) 2179 nerror++; 2180 } 2181 } 2182 2183 error = 0; 2184 AIO_LOCK(ki); 2185 if (mode == LIO_WAIT) { 2186 while (lj->lioj_count - 1 != lj->lioj_finished_count) { 2187 ki->kaio_flags |= KAIO_WAKEUP; 2188 error = msleep(&p->p_aioinfo, AIO_MTX(ki), 2189 PRIBIO | PCATCH, "aiospn", 0); 2190 if (error == ERESTART) 2191 error = EINTR; 2192 if (error) 2193 break; 2194 } 2195 } else { 2196 if (lj->lioj_count - 1 == lj->lioj_finished_count) { 2197 if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { 2198 lj->lioj_flags |= LIOJ_KEVENT_POSTED; 2199 KNOTE_LOCKED(&lj->klist, 1); 2200 } 2201 if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) 2202 == LIOJ_SIGNAL 2203 && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || 2204 lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) { 2205 aio_sendsig(p, &lj->lioj_signal, 2206 &lj->lioj_ksi); 2207 lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 2208 } 2209 } 2210 } 2211 lj->lioj_count--; 2212 if (lj->lioj_count == 0) { 2213 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 2214 knlist_delete(&lj->klist, curthread, 1); 2215 PROC_LOCK(p); 2216 sigqueue_take(&lj->lioj_ksi); 2217 PROC_UNLOCK(p); 2218 AIO_UNLOCK(ki); 2219 uma_zfree(aiolio_zone, lj); 2220 } else 2221 AIO_UNLOCK(ki); 2222 2223 if (nerror) 2224 return (EIO); 2225 return (error); 2226 } 2227 2228 /* syscall - list directed I/O (REALTIME) */ 2229 #ifdef COMPAT_FREEBSD6 2230 int 2231 freebsd6_lio_listio(struct thread *td, struct freebsd6_lio_listio_args *uap) 2232 { 2233 struct aiocb **acb_list; 2234 struct sigevent *sigp, sig; 2235 struct osigevent osig; 2236 int error, nent; 2237 2238 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 2239 return (EINVAL); 2240 2241 nent = uap->nent; 2242 if (nent < 0 || nent > AIO_LISTIO_MAX) 2243 return (EINVAL); 2244 2245 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 2246 error = copyin(uap->sig, &osig, sizeof(osig)); 2247 if (error) 2248 return (error); 2249 error = convert_old_sigevent(&osig, &sig); 2250 if (error) 2251 return (error); 2252 sigp = &sig; 2253 } else 2254 sigp = NULL; 2255 2256 acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); 2257 error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); 2258 if (error == 0) 2259 error = kern_lio_listio(td, uap->mode, 2260 (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, 2261 &aiocb_ops_osigevent); 2262 free(acb_list, M_LIO); 2263 return (error); 2264 } 2265 #endif 2266 2267 /* syscall - list directed I/O (REALTIME) */ 2268 int 2269 sys_lio_listio(struct thread *td, struct lio_listio_args *uap) 2270 { 2271 struct aiocb **acb_list; 2272 struct sigevent *sigp, sig; 2273 int error, nent; 2274 2275 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 2276 return (EINVAL); 2277 2278 nent = uap->nent; 2279 if (nent < 0 || nent > AIO_LISTIO_MAX) 2280 return (EINVAL); 2281 2282 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 2283 error = copyin(uap->sig, &sig, sizeof(sig)); 2284 if (error) 2285 return (error); 2286 sigp = &sig; 2287 } else 2288 sigp = NULL; 2289 2290 acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); 2291 error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); 2292 if (error == 0) 2293 error = kern_lio_listio(td, uap->mode, uap->acb_list, acb_list, 2294 nent, sigp, &aiocb_ops); 2295 free(acb_list, M_LIO); 2296 return (error); 2297 } 2298 2299 static void 2300 aio_physwakeup(struct bio *bp) 2301 { 2302 struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; 2303 struct proc *userp; 2304 struct kaioinfo *ki; 2305 size_t nbytes; 2306 int error, nblks; 2307 2308 /* Release mapping into kernel space. */ 2309 userp = job->userproc; 2310 ki = userp->p_aioinfo; 2311 if (job->pbuf) { 2312 pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages); 2313 relpbuf(job->pbuf, NULL); 2314 job->pbuf = NULL; 2315 atomic_subtract_int(&num_buf_aio, 1); 2316 AIO_LOCK(ki); 2317 ki->kaio_buffer_count--; 2318 AIO_UNLOCK(ki); 2319 } 2320 vm_page_unhold_pages(job->pages, job->npages); 2321 2322 bp = job->bp; 2323 job->bp = NULL; 2324 nbytes = job->uaiocb.aio_nbytes - bp->bio_resid; 2325 error = 0; 2326 if (bp->bio_flags & BIO_ERROR) 2327 error = bp->bio_error; 2328 nblks = btodb(nbytes); 2329 if (job->uaiocb.aio_lio_opcode == LIO_WRITE) 2330 job->outputcharge += nblks; 2331 else 2332 job->inputcharge += nblks; 2333 2334 if (error) 2335 aio_complete(job, -1, error); 2336 else 2337 aio_complete(job, nbytes, 0); 2338 2339 g_destroy_bio(bp); 2340 } 2341 2342 /* syscall - wait for the next completion of an aio request */ 2343 static int 2344 kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp, 2345 struct timespec *ts, struct aiocb_ops *ops) 2346 { 2347 struct proc *p = td->td_proc; 2348 struct timeval atv; 2349 struct kaioinfo *ki; 2350 struct kaiocb *job; 2351 struct aiocb *ujob; 2352 long error, status; 2353 int timo; 2354 2355 ops->store_aiocb(ujobp, NULL); 2356 2357 if (ts == NULL) { 2358 timo = 0; 2359 } else if (ts->tv_sec == 0 && ts->tv_nsec == 0) { 2360 timo = -1; 2361 } else { 2362 if ((ts->tv_nsec < 0) || (ts->tv_nsec >= 1000000000)) 2363 return (EINVAL); 2364 2365 TIMESPEC_TO_TIMEVAL(&atv, ts); 2366 if (itimerfix(&atv)) 2367 return (EINVAL); 2368 timo = tvtohz(&atv); 2369 } 2370 2371 if (p->p_aioinfo == NULL) 2372 aio_init_aioinfo(p); 2373 ki = p->p_aioinfo; 2374 2375 error = 0; 2376 job = NULL; 2377 AIO_LOCK(ki); 2378 while ((job = TAILQ_FIRST(&ki->kaio_done)) == NULL) { 2379 if (timo == -1) { 2380 error = EWOULDBLOCK; 2381 break; 2382 } 2383 ki->kaio_flags |= KAIO_WAKEUP; 2384 error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, 2385 "aiowc", timo); 2386 if (timo && error == ERESTART) 2387 error = EINTR; 2388 if (error) 2389 break; 2390 } 2391 2392 if (job != NULL) { 2393 MPASS(job->jobflags & KAIOCB_FINISHED); 2394 ujob = job->ujob; 2395 status = job->uaiocb._aiocb_private.status; 2396 error = job->uaiocb._aiocb_private.error; 2397 td->td_retval[0] = status; 2398 if (job->uaiocb.aio_lio_opcode == LIO_WRITE) { 2399 td->td_ru.ru_oublock += job->outputcharge; 2400 job->outputcharge = 0; 2401 } else if (job->uaiocb.aio_lio_opcode == LIO_READ) { 2402 td->td_ru.ru_inblock += job->inputcharge; 2403 job->inputcharge = 0; 2404 } 2405 aio_free_entry(job); 2406 AIO_UNLOCK(ki); 2407 ops->store_aiocb(ujobp, ujob); 2408 ops->store_error(ujob, error); 2409 ops->store_status(ujob, status); 2410 } else 2411 AIO_UNLOCK(ki); 2412 2413 return (error); 2414 } 2415 2416 int 2417 sys_aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap) 2418 { 2419 struct timespec ts, *tsp; 2420 int error; 2421 2422 if (uap->timeout) { 2423 /* Get timespec struct. */ 2424 error = copyin(uap->timeout, &ts, sizeof(ts)); 2425 if (error) 2426 return (error); 2427 tsp = &ts; 2428 } else 2429 tsp = NULL; 2430 2431 return (kern_aio_waitcomplete(td, uap->aiocbp, tsp, &aiocb_ops)); 2432 } 2433 2434 static int 2435 kern_aio_fsync(struct thread *td, int op, struct aiocb *ujob, 2436 struct aiocb_ops *ops) 2437 { 2438 struct proc *p = td->td_proc; 2439 struct kaioinfo *ki; 2440 2441 if (op != O_SYNC) /* XXX lack of O_DSYNC */ 2442 return (EINVAL); 2443 ki = p->p_aioinfo; 2444 if (ki == NULL) 2445 aio_init_aioinfo(p); 2446 return (aio_aqueue(td, ujob, NULL, LIO_SYNC, ops)); 2447 } 2448 2449 int 2450 sys_aio_fsync(struct thread *td, struct aio_fsync_args *uap) 2451 { 2452 2453 return (kern_aio_fsync(td, uap->op, uap->aiocbp, &aiocb_ops)); 2454 } 2455 2456 /* kqueue attach function */ 2457 static int 2458 filt_aioattach(struct knote *kn) 2459 { 2460 struct kaiocb *job = (struct kaiocb *)kn->kn_sdata; 2461 2462 /* 2463 * The job pointer must be validated before using it, so 2464 * registration is restricted to the kernel; the user cannot 2465 * set EV_FLAG1. 2466 */ 2467 if ((kn->kn_flags & EV_FLAG1) == 0) 2468 return (EPERM); 2469 kn->kn_ptr.p_aio = job; 2470 kn->kn_flags &= ~EV_FLAG1; 2471 2472 knlist_add(&job->klist, kn, 0); 2473 2474 return (0); 2475 } 2476 2477 /* kqueue detach function */ 2478 static void 2479 filt_aiodetach(struct knote *kn) 2480 { 2481 struct knlist *knl; 2482 2483 knl = &kn->kn_ptr.p_aio->klist; 2484 knl->kl_lock(knl->kl_lockarg); 2485 if (!knlist_empty(knl)) 2486 knlist_remove(knl, kn, 1); 2487 knl->kl_unlock(knl->kl_lockarg); 2488 } 2489 2490 /* kqueue filter function */ 2491 /*ARGSUSED*/ 2492 static int 2493 filt_aio(struct knote *kn, long hint) 2494 { 2495 struct kaiocb *job = kn->kn_ptr.p_aio; 2496 2497 kn->kn_data = job->uaiocb._aiocb_private.error; 2498 if (!(job->jobflags & KAIOCB_FINISHED)) 2499 return (0); 2500 kn->kn_flags |= EV_EOF; 2501 return (1); 2502 } 2503 2504 /* kqueue attach function */ 2505 static int 2506 filt_lioattach(struct knote *kn) 2507 { 2508 struct aioliojob * lj = (struct aioliojob *)kn->kn_sdata; 2509 2510 /* 2511 * The aioliojob pointer must be validated before using it, so 2512 * registration is restricted to the kernel; the user cannot 2513 * set EV_FLAG1. 2514 */ 2515 if ((kn->kn_flags & EV_FLAG1) == 0) 2516 return (EPERM); 2517 kn->kn_ptr.p_lio = lj; 2518 kn->kn_flags &= ~EV_FLAG1; 2519 2520 knlist_add(&lj->klist, kn, 0); 2521 2522 return (0); 2523 } 2524 2525 /* kqueue detach function */ 2526 static void 2527 filt_liodetach(struct knote *kn) 2528 { 2529 struct knlist *knl; 2530 2531 knl = &kn->kn_ptr.p_lio->klist; 2532 knl->kl_lock(knl->kl_lockarg); 2533 if (!knlist_empty(knl)) 2534 knlist_remove(knl, kn, 1); 2535 knl->kl_unlock(knl->kl_lockarg); 2536 } 2537 2538 /* kqueue filter function */ 2539 /*ARGSUSED*/ 2540 static int 2541 filt_lio(struct knote *kn, long hint) 2542 { 2543 struct aioliojob * lj = kn->kn_ptr.p_lio; 2544 2545 return (lj->lioj_flags & LIOJ_KEVENT_POSTED); 2546 } 2547 2548 #ifdef COMPAT_FREEBSD32 2549 #include <sys/mount.h> 2550 #include <sys/socket.h> 2551 #include <compat/freebsd32/freebsd32.h> 2552 #include <compat/freebsd32/freebsd32_proto.h> 2553 #include <compat/freebsd32/freebsd32_signal.h> 2554 #include <compat/freebsd32/freebsd32_syscall.h> 2555 #include <compat/freebsd32/freebsd32_util.h> 2556 2557 struct __aiocb_private32 { 2558 int32_t status; 2559 int32_t error; 2560 uint32_t kernelinfo; 2561 }; 2562 2563 #ifdef COMPAT_FREEBSD6 2564 typedef struct oaiocb32 { 2565 int aio_fildes; /* File descriptor */ 2566 uint64_t aio_offset __packed; /* File offset for I/O */ 2567 uint32_t aio_buf; /* I/O buffer in process space */ 2568 uint32_t aio_nbytes; /* Number of bytes for I/O */ 2569 struct osigevent32 aio_sigevent; /* Signal to deliver */ 2570 int aio_lio_opcode; /* LIO opcode */ 2571 int aio_reqprio; /* Request priority -- ignored */ 2572 struct __aiocb_private32 _aiocb_private; 2573 } oaiocb32_t; 2574 #endif 2575 2576 typedef struct aiocb32 { 2577 int32_t aio_fildes; /* File descriptor */ 2578 uint64_t aio_offset __packed; /* File offset for I/O */ 2579 uint32_t aio_buf; /* I/O buffer in process space */ 2580 uint32_t aio_nbytes; /* Number of bytes for I/O */ 2581 int __spare__[2]; 2582 uint32_t __spare2__; 2583 int aio_lio_opcode; /* LIO opcode */ 2584 int aio_reqprio; /* Request priority -- ignored */ 2585 struct __aiocb_private32 _aiocb_private; 2586 struct sigevent32 aio_sigevent; /* Signal to deliver */ 2587 } aiocb32_t; 2588 2589 #ifdef COMPAT_FREEBSD6 2590 static int 2591 convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig) 2592 { 2593 2594 /* 2595 * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are 2596 * supported by AIO with the old sigevent structure. 2597 */ 2598 CP(*osig, *nsig, sigev_notify); 2599 switch (nsig->sigev_notify) { 2600 case SIGEV_NONE: 2601 break; 2602 case SIGEV_SIGNAL: 2603 nsig->sigev_signo = osig->__sigev_u.__sigev_signo; 2604 break; 2605 case SIGEV_KEVENT: 2606 nsig->sigev_notify_kqueue = 2607 osig->__sigev_u.__sigev_notify_kqueue; 2608 PTRIN_CP(*osig, *nsig, sigev_value.sival_ptr); 2609 break; 2610 default: 2611 return (EINVAL); 2612 } 2613 return (0); 2614 } 2615 2616 static int 2617 aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) 2618 { 2619 struct oaiocb32 job32; 2620 int error; 2621 2622 bzero(kjob, sizeof(struct aiocb)); 2623 error = copyin(ujob, &job32, sizeof(job32)); 2624 if (error) 2625 return (error); 2626 2627 CP(job32, *kjob, aio_fildes); 2628 CP(job32, *kjob, aio_offset); 2629 PTRIN_CP(job32, *kjob, aio_buf); 2630 CP(job32, *kjob, aio_nbytes); 2631 CP(job32, *kjob, aio_lio_opcode); 2632 CP(job32, *kjob, aio_reqprio); 2633 CP(job32, *kjob, _aiocb_private.status); 2634 CP(job32, *kjob, _aiocb_private.error); 2635 PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); 2636 return (convert_old_sigevent32(&job32.aio_sigevent, 2637 &kjob->aio_sigevent)); 2638 } 2639 #endif 2640 2641 static int 2642 aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob) 2643 { 2644 struct aiocb32 job32; 2645 int error; 2646 2647 error = copyin(ujob, &job32, sizeof(job32)); 2648 if (error) 2649 return (error); 2650 CP(job32, *kjob, aio_fildes); 2651 CP(job32, *kjob, aio_offset); 2652 PTRIN_CP(job32, *kjob, aio_buf); 2653 CP(job32, *kjob, aio_nbytes); 2654 CP(job32, *kjob, aio_lio_opcode); 2655 CP(job32, *kjob, aio_reqprio); 2656 CP(job32, *kjob, _aiocb_private.status); 2657 CP(job32, *kjob, _aiocb_private.error); 2658 PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); 2659 return (convert_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent)); 2660 } 2661 2662 static long 2663 aiocb32_fetch_status(struct aiocb *ujob) 2664 { 2665 struct aiocb32 *ujob32; 2666 2667 ujob32 = (struct aiocb32 *)ujob; 2668 return (fuword32(&ujob32->_aiocb_private.status)); 2669 } 2670 2671 static long 2672 aiocb32_fetch_error(struct aiocb *ujob) 2673 { 2674 struct aiocb32 *ujob32; 2675 2676 ujob32 = (struct aiocb32 *)ujob; 2677 return (fuword32(&ujob32->_aiocb_private.error)); 2678 } 2679 2680 static int 2681 aiocb32_store_status(struct aiocb *ujob, long status) 2682 { 2683 struct aiocb32 *ujob32; 2684 2685 ujob32 = (struct aiocb32 *)ujob; 2686 return (suword32(&ujob32->_aiocb_private.status, status)); 2687 } 2688 2689 static int 2690 aiocb32_store_error(struct aiocb *ujob, long error) 2691 { 2692 struct aiocb32 *ujob32; 2693 2694 ujob32 = (struct aiocb32 *)ujob; 2695 return (suword32(&ujob32->_aiocb_private.error, error)); 2696 } 2697 2698 static int 2699 aiocb32_store_kernelinfo(struct aiocb *ujob, long jobref) 2700 { 2701 struct aiocb32 *ujob32; 2702 2703 ujob32 = (struct aiocb32 *)ujob; 2704 return (suword32(&ujob32->_aiocb_private.kernelinfo, jobref)); 2705 } 2706 2707 static int 2708 aiocb32_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) 2709 { 2710 2711 return (suword32(ujobp, (long)ujob)); 2712 } 2713 2714 static struct aiocb_ops aiocb32_ops = { 2715 .copyin = aiocb32_copyin, 2716 .fetch_status = aiocb32_fetch_status, 2717 .fetch_error = aiocb32_fetch_error, 2718 .store_status = aiocb32_store_status, 2719 .store_error = aiocb32_store_error, 2720 .store_kernelinfo = aiocb32_store_kernelinfo, 2721 .store_aiocb = aiocb32_store_aiocb, 2722 }; 2723 2724 #ifdef COMPAT_FREEBSD6 2725 static struct aiocb_ops aiocb32_ops_osigevent = { 2726 .copyin = aiocb32_copyin_old_sigevent, 2727 .fetch_status = aiocb32_fetch_status, 2728 .fetch_error = aiocb32_fetch_error, 2729 .store_status = aiocb32_store_status, 2730 .store_error = aiocb32_store_error, 2731 .store_kernelinfo = aiocb32_store_kernelinfo, 2732 .store_aiocb = aiocb32_store_aiocb, 2733 }; 2734 #endif 2735 2736 int 2737 freebsd32_aio_return(struct thread *td, struct freebsd32_aio_return_args *uap) 2738 { 2739 2740 return (kern_aio_return(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); 2741 } 2742 2743 int 2744 freebsd32_aio_suspend(struct thread *td, struct freebsd32_aio_suspend_args *uap) 2745 { 2746 struct timespec32 ts32; 2747 struct timespec ts, *tsp; 2748 struct aiocb **ujoblist; 2749 uint32_t *ujoblist32; 2750 int error, i; 2751 2752 if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX) 2753 return (EINVAL); 2754 2755 if (uap->timeout) { 2756 /* Get timespec struct. */ 2757 if ((error = copyin(uap->timeout, &ts32, sizeof(ts32))) != 0) 2758 return (error); 2759 CP(ts32, ts, tv_sec); 2760 CP(ts32, ts, tv_nsec); 2761 tsp = &ts; 2762 } else 2763 tsp = NULL; 2764 2765 ujoblist = uma_zalloc(aiol_zone, M_WAITOK); 2766 ujoblist32 = (uint32_t *)ujoblist; 2767 error = copyin(uap->aiocbp, ujoblist32, uap->nent * 2768 sizeof(ujoblist32[0])); 2769 if (error == 0) { 2770 for (i = uap->nent; i > 0; i--) 2771 ujoblist[i] = PTRIN(ujoblist32[i]); 2772 2773 error = kern_aio_suspend(td, uap->nent, ujoblist, tsp); 2774 } 2775 uma_zfree(aiol_zone, ujoblist); 2776 return (error); 2777 } 2778 2779 int 2780 freebsd32_aio_error(struct thread *td, struct freebsd32_aio_error_args *uap) 2781 { 2782 2783 return (kern_aio_error(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); 2784 } 2785 2786 #ifdef COMPAT_FREEBSD6 2787 int 2788 freebsd6_freebsd32_aio_read(struct thread *td, 2789 struct freebsd6_freebsd32_aio_read_args *uap) 2790 { 2791 2792 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, 2793 &aiocb32_ops_osigevent)); 2794 } 2795 #endif 2796 2797 int 2798 freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap) 2799 { 2800 2801 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, 2802 &aiocb32_ops)); 2803 } 2804 2805 #ifdef COMPAT_FREEBSD6 2806 int 2807 freebsd6_freebsd32_aio_write(struct thread *td, 2808 struct freebsd6_freebsd32_aio_write_args *uap) 2809 { 2810 2811 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, 2812 &aiocb32_ops_osigevent)); 2813 } 2814 #endif 2815 2816 int 2817 freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap) 2818 { 2819 2820 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, 2821 &aiocb32_ops)); 2822 } 2823 2824 int 2825 freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap) 2826 { 2827 2828 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK, 2829 &aiocb32_ops)); 2830 } 2831 2832 int 2833 freebsd32_aio_waitcomplete(struct thread *td, 2834 struct freebsd32_aio_waitcomplete_args *uap) 2835 { 2836 struct timespec32 ts32; 2837 struct timespec ts, *tsp; 2838 int error; 2839 2840 if (uap->timeout) { 2841 /* Get timespec struct. */ 2842 error = copyin(uap->timeout, &ts32, sizeof(ts32)); 2843 if (error) 2844 return (error); 2845 CP(ts32, ts, tv_sec); 2846 CP(ts32, ts, tv_nsec); 2847 tsp = &ts; 2848 } else 2849 tsp = NULL; 2850 2851 return (kern_aio_waitcomplete(td, (struct aiocb **)uap->aiocbp, tsp, 2852 &aiocb32_ops)); 2853 } 2854 2855 int 2856 freebsd32_aio_fsync(struct thread *td, struct freebsd32_aio_fsync_args *uap) 2857 { 2858 2859 return (kern_aio_fsync(td, uap->op, (struct aiocb *)uap->aiocbp, 2860 &aiocb32_ops)); 2861 } 2862 2863 #ifdef COMPAT_FREEBSD6 2864 int 2865 freebsd6_freebsd32_lio_listio(struct thread *td, 2866 struct freebsd6_freebsd32_lio_listio_args *uap) 2867 { 2868 struct aiocb **acb_list; 2869 struct sigevent *sigp, sig; 2870 struct osigevent32 osig; 2871 uint32_t *acb_list32; 2872 int error, i, nent; 2873 2874 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 2875 return (EINVAL); 2876 2877 nent = uap->nent; 2878 if (nent < 0 || nent > AIO_LISTIO_MAX) 2879 return (EINVAL); 2880 2881 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 2882 error = copyin(uap->sig, &osig, sizeof(osig)); 2883 if (error) 2884 return (error); 2885 error = convert_old_sigevent32(&osig, &sig); 2886 if (error) 2887 return (error); 2888 sigp = &sig; 2889 } else 2890 sigp = NULL; 2891 2892 acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK); 2893 error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t)); 2894 if (error) { 2895 free(acb_list32, M_LIO); 2896 return (error); 2897 } 2898 acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); 2899 for (i = 0; i < nent; i++) 2900 acb_list[i] = PTRIN(acb_list32[i]); 2901 free(acb_list32, M_LIO); 2902 2903 error = kern_lio_listio(td, uap->mode, 2904 (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, 2905 &aiocb32_ops_osigevent); 2906 free(acb_list, M_LIO); 2907 return (error); 2908 } 2909 #endif 2910 2911 int 2912 freebsd32_lio_listio(struct thread *td, struct freebsd32_lio_listio_args *uap) 2913 { 2914 struct aiocb **acb_list; 2915 struct sigevent *sigp, sig; 2916 struct sigevent32 sig32; 2917 uint32_t *acb_list32; 2918 int error, i, nent; 2919 2920 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 2921 return (EINVAL); 2922 2923 nent = uap->nent; 2924 if (nent < 0 || nent > AIO_LISTIO_MAX) 2925 return (EINVAL); 2926 2927 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 2928 error = copyin(uap->sig, &sig32, sizeof(sig32)); 2929 if (error) 2930 return (error); 2931 error = convert_sigevent32(&sig32, &sig); 2932 if (error) 2933 return (error); 2934 sigp = &sig; 2935 } else 2936 sigp = NULL; 2937 2938 acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK); 2939 error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t)); 2940 if (error) { 2941 free(acb_list32, M_LIO); 2942 return (error); 2943 } 2944 acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); 2945 for (i = 0; i < nent; i++) 2946 acb_list[i] = PTRIN(acb_list32[i]); 2947 free(acb_list32, M_LIO); 2948 2949 error = kern_lio_listio(td, uap->mode, 2950 (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, 2951 &aiocb32_ops); 2952 free(acb_list, M_LIO); 2953 return (error); 2954 } 2955 2956 #endif 2957