1 /*- 2 * Copyright (c) 1997 John S. Dyson. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. John S. Dyson's name may not be used to endorse or promote products 10 * derived from this software without specific prior written permission. 11 * 12 * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13 * bad that happens because of using this software isn't the responsibility 14 * of the author. This software is distributed AS-IS. 15 */ 16 17 /* 18 * This file contains support for the POSIX 1003.1B AIO/LIO facility. 19 */ 20 21 #include <sys/cdefs.h> 22 __FBSDID("$FreeBSD$"); 23 24 #include "opt_compat.h" 25 26 #include <sys/param.h> 27 #include <sys/systm.h> 28 #include <sys/malloc.h> 29 #include <sys/bio.h> 30 #include <sys/buf.h> 31 #include <sys/capsicum.h> 32 #include <sys/eventhandler.h> 33 #include <sys/sysproto.h> 34 #include <sys/filedesc.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/kthread.h> 38 #include <sys/fcntl.h> 39 #include <sys/file.h> 40 #include <sys/limits.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/unistd.h> 44 #include <sys/posix4.h> 45 #include <sys/proc.h> 46 #include <sys/resourcevar.h> 47 #include <sys/signalvar.h> 48 #include <sys/protosw.h> 49 #include <sys/rwlock.h> 50 #include <sys/sema.h> 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/syscall.h> 54 #include <sys/sysent.h> 55 #include <sys/sysctl.h> 56 #include <sys/syslog.h> 57 #include <sys/sx.h> 58 #include <sys/taskqueue.h> 59 #include <sys/vnode.h> 60 #include <sys/conf.h> 61 #include <sys/event.h> 62 #include <sys/mount.h> 63 #include <geom/geom.h> 64 65 #include <machine/atomic.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_extern.h> 70 #include <vm/pmap.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_object.h> 73 #include <vm/uma.h> 74 #include <sys/aio.h> 75 76 /* 77 * Counter for allocating reference ids to new jobs. Wrapped to 1 on 78 * overflow. (XXX will be removed soon.) 79 */ 80 static u_long jobrefid; 81 82 /* 83 * Counter for aio_fsync. 84 */ 85 static uint64_t jobseqno; 86 87 #ifndef MAX_AIO_PER_PROC 88 #define MAX_AIO_PER_PROC 32 89 #endif 90 91 #ifndef MAX_AIO_QUEUE_PER_PROC 92 #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ 93 #endif 94 95 #ifndef MAX_AIO_QUEUE 96 #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ 97 #endif 98 99 #ifndef MAX_BUF_AIO 100 #define MAX_BUF_AIO 16 101 #endif 102 103 FEATURE(aio, "Asynchronous I/O"); 104 105 static MALLOC_DEFINE(M_LIO, "lio", "listio aio control block list"); 106 107 static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, 108 "Async IO management"); 109 110 static int enable_aio_unsafe = 0; 111 SYSCTL_INT(_vfs_aio, OID_AUTO, enable_unsafe, CTLFLAG_RW, &enable_aio_unsafe, 0, 112 "Permit asynchronous IO on all file types, not just known-safe types"); 113 114 static unsigned int unsafe_warningcnt = 1; 115 SYSCTL_UINT(_vfs_aio, OID_AUTO, unsafe_warningcnt, CTLFLAG_RW, 116 &unsafe_warningcnt, 0, 117 "Warnings that will be triggered upon failed IO requests on unsafe files"); 118 119 static int max_aio_procs = MAX_AIO_PROCS; 120 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, CTLFLAG_RW, &max_aio_procs, 0, 121 "Maximum number of kernel processes to use for handling async IO "); 122 123 static int num_aio_procs = 0; 124 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, CTLFLAG_RD, &num_aio_procs, 0, 125 "Number of presently active kernel processes for async IO"); 126 127 /* 128 * The code will adjust the actual number of AIO processes towards this 129 * number when it gets a chance. 130 */ 131 static int target_aio_procs = TARGET_AIO_PROCS; 132 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs, 133 0, 134 "Preferred number of ready kernel processes for async IO"); 135 136 static int max_queue_count = MAX_AIO_QUEUE; 137 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0, 138 "Maximum number of aio requests to queue, globally"); 139 140 static int num_queue_count = 0; 141 SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, CTLFLAG_RD, &num_queue_count, 0, 142 "Number of queued aio requests"); 143 144 static int num_buf_aio = 0; 145 SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0, 146 "Number of aio requests presently handled by the buf subsystem"); 147 148 /* Number of async I/O processes in the process of being started */ 149 /* XXX This should be local to aio_aqueue() */ 150 static int num_aio_resv_start = 0; 151 152 static int aiod_lifetime; 153 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, CTLFLAG_RW, &aiod_lifetime, 0, 154 "Maximum lifetime for idle aiod"); 155 156 static int max_aio_per_proc = MAX_AIO_PER_PROC; 157 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, CTLFLAG_RW, &max_aio_per_proc, 158 0, 159 "Maximum active aio requests per process (stored in the process)"); 160 161 static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC; 162 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, CTLFLAG_RW, 163 &max_aio_queue_per_proc, 0, 164 "Maximum queued aio requests per process (stored in the process)"); 165 166 static int max_buf_aio = MAX_BUF_AIO; 167 SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, CTLFLAG_RW, &max_buf_aio, 0, 168 "Maximum buf aio requests per process (stored in the process)"); 169 170 #ifdef COMPAT_FREEBSD6 171 typedef struct oaiocb { 172 int aio_fildes; /* File descriptor */ 173 off_t aio_offset; /* File offset for I/O */ 174 volatile void *aio_buf; /* I/O buffer in process space */ 175 size_t aio_nbytes; /* Number of bytes for I/O */ 176 struct osigevent aio_sigevent; /* Signal to deliver */ 177 int aio_lio_opcode; /* LIO opcode */ 178 int aio_reqprio; /* Request priority -- ignored */ 179 struct __aiocb_private _aiocb_private; 180 } oaiocb_t; 181 #endif 182 183 /* 184 * Below is a key of locks used to protect each member of struct kaiocb 185 * aioliojob and kaioinfo and any backends. 186 * 187 * * - need not protected 188 * a - locked by kaioinfo lock 189 * b - locked by backend lock, the backend lock can be null in some cases, 190 * for example, BIO belongs to this type, in this case, proc lock is 191 * reused. 192 * c - locked by aio_job_mtx, the lock for the generic file I/O backend. 193 */ 194 195 /* 196 * If the routine that services an AIO request blocks while running in an 197 * AIO kernel process it can starve other I/O requests. BIO requests 198 * queued via aio_qphysio() complete in GEOM and do not use AIO kernel 199 * processes at all. Socket I/O requests use a separate pool of 200 * kprocs and also force non-blocking I/O. Other file I/O requests 201 * use the generic fo_read/fo_write operations which can block. The 202 * fsync and mlock operations can also block while executing. Ideally 203 * none of these requests would block while executing. 204 * 205 * Note that the service routines cannot toggle O_NONBLOCK in the file 206 * structure directly while handling a request due to races with 207 * userland threads. 208 */ 209 210 /* jobflags */ 211 #define KAIOCB_QUEUEING 0x01 212 #define KAIOCB_CANCELLED 0x02 213 #define KAIOCB_CANCELLING 0x04 214 #define KAIOCB_CHECKSYNC 0x08 215 #define KAIOCB_CLEARED 0x10 216 #define KAIOCB_FINISHED 0x20 217 218 /* 219 * AIO process info 220 */ 221 #define AIOP_FREE 0x1 /* proc on free queue */ 222 223 struct aioproc { 224 int aioprocflags; /* (c) AIO proc flags */ 225 TAILQ_ENTRY(aioproc) list; /* (c) list of processes */ 226 struct proc *aioproc; /* (*) the AIO proc */ 227 }; 228 229 /* 230 * data-structure for lio signal management 231 */ 232 struct aioliojob { 233 int lioj_flags; /* (a) listio flags */ 234 int lioj_count; /* (a) listio flags */ 235 int lioj_finished_count; /* (a) listio flags */ 236 struct sigevent lioj_signal; /* (a) signal on all I/O done */ 237 TAILQ_ENTRY(aioliojob) lioj_list; /* (a) lio list */ 238 struct knlist klist; /* (a) list of knotes */ 239 ksiginfo_t lioj_ksi; /* (a) Realtime signal info */ 240 }; 241 242 #define LIOJ_SIGNAL 0x1 /* signal on all done (lio) */ 243 #define LIOJ_SIGNAL_POSTED 0x2 /* signal has been posted */ 244 #define LIOJ_KEVENT_POSTED 0x4 /* kevent triggered */ 245 246 /* 247 * per process aio data structure 248 */ 249 struct kaioinfo { 250 struct mtx kaio_mtx; /* the lock to protect this struct */ 251 int kaio_flags; /* (a) per process kaio flags */ 252 int kaio_maxactive_count; /* (*) maximum number of AIOs */ 253 int kaio_active_count; /* (c) number of currently used AIOs */ 254 int kaio_qallowed_count; /* (*) maxiumu size of AIO queue */ 255 int kaio_count; /* (a) size of AIO queue */ 256 int kaio_ballowed_count; /* (*) maximum number of buffers */ 257 int kaio_buffer_count; /* (a) number of physio buffers */ 258 TAILQ_HEAD(,kaiocb) kaio_all; /* (a) all AIOs in a process */ 259 TAILQ_HEAD(,kaiocb) kaio_done; /* (a) done queue for process */ 260 TAILQ_HEAD(,aioliojob) kaio_liojoblist; /* (a) list of lio jobs */ 261 TAILQ_HEAD(,kaiocb) kaio_jobqueue; /* (a) job queue for process */ 262 TAILQ_HEAD(,kaiocb) kaio_syncqueue; /* (a) queue for aio_fsync */ 263 TAILQ_HEAD(,kaiocb) kaio_syncready; /* (a) second q for aio_fsync */ 264 struct task kaio_task; /* (*) task to kick aio processes */ 265 struct task kaio_sync_task; /* (*) task to schedule fsync jobs */ 266 }; 267 268 #define AIO_LOCK(ki) mtx_lock(&(ki)->kaio_mtx) 269 #define AIO_UNLOCK(ki) mtx_unlock(&(ki)->kaio_mtx) 270 #define AIO_LOCK_ASSERT(ki, f) mtx_assert(&(ki)->kaio_mtx, (f)) 271 #define AIO_MTX(ki) (&(ki)->kaio_mtx) 272 273 #define KAIO_RUNDOWN 0x1 /* process is being run down */ 274 #define KAIO_WAKEUP 0x2 /* wakeup process when AIO completes */ 275 276 /* 277 * Operations used to interact with userland aio control blocks. 278 * Different ABIs provide their own operations. 279 */ 280 struct aiocb_ops { 281 int (*copyin)(struct aiocb *ujob, struct aiocb *kjob); 282 long (*fetch_status)(struct aiocb *ujob); 283 long (*fetch_error)(struct aiocb *ujob); 284 int (*store_status)(struct aiocb *ujob, long status); 285 int (*store_error)(struct aiocb *ujob, long error); 286 int (*store_kernelinfo)(struct aiocb *ujob, long jobref); 287 int (*store_aiocb)(struct aiocb **ujobp, struct aiocb *ujob); 288 }; 289 290 static TAILQ_HEAD(,aioproc) aio_freeproc; /* (c) Idle daemons */ 291 static struct sema aio_newproc_sem; 292 static struct mtx aio_job_mtx; 293 static TAILQ_HEAD(,kaiocb) aio_jobs; /* (c) Async job list */ 294 static struct unrhdr *aiod_unr; 295 296 void aio_init_aioinfo(struct proc *p); 297 static int aio_onceonly(void); 298 static int aio_free_entry(struct kaiocb *job); 299 static void aio_process_rw(struct kaiocb *job); 300 static void aio_process_sync(struct kaiocb *job); 301 static void aio_process_mlock(struct kaiocb *job); 302 static void aio_schedule_fsync(void *context, int pending); 303 static int aio_newproc(int *); 304 int aio_aqueue(struct thread *td, struct aiocb *ujob, 305 struct aioliojob *lio, int type, struct aiocb_ops *ops); 306 static int aio_queue_file(struct file *fp, struct kaiocb *job); 307 static void aio_physwakeup(struct bio *bp); 308 static void aio_proc_rundown(void *arg, struct proc *p); 309 static void aio_proc_rundown_exec(void *arg, struct proc *p, 310 struct image_params *imgp); 311 static int aio_qphysio(struct proc *p, struct kaiocb *job); 312 static void aio_daemon(void *param); 313 static void aio_bio_done_notify(struct proc *userp, struct kaiocb *job); 314 static bool aio_clear_cancel_function_locked(struct kaiocb *job); 315 static int aio_kick(struct proc *userp); 316 static void aio_kick_nowait(struct proc *userp); 317 static void aio_kick_helper(void *context, int pending); 318 static int filt_aioattach(struct knote *kn); 319 static void filt_aiodetach(struct knote *kn); 320 static int filt_aio(struct knote *kn, long hint); 321 static int filt_lioattach(struct knote *kn); 322 static void filt_liodetach(struct knote *kn); 323 static int filt_lio(struct knote *kn, long hint); 324 325 /* 326 * Zones for: 327 * kaio Per process async io info 328 * aiop async io process data 329 * aiocb async io jobs 330 * aiol list io job pointer - internal to aio_suspend XXX 331 * aiolio list io jobs 332 */ 333 static uma_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone, aiolio_zone; 334 335 /* kqueue filters for aio */ 336 static struct filterops aio_filtops = { 337 .f_isfd = 0, 338 .f_attach = filt_aioattach, 339 .f_detach = filt_aiodetach, 340 .f_event = filt_aio, 341 }; 342 static struct filterops lio_filtops = { 343 .f_isfd = 0, 344 .f_attach = filt_lioattach, 345 .f_detach = filt_liodetach, 346 .f_event = filt_lio 347 }; 348 349 static eventhandler_tag exit_tag, exec_tag; 350 351 TASKQUEUE_DEFINE_THREAD(aiod_kick); 352 353 /* 354 * Main operations function for use as a kernel module. 355 */ 356 static int 357 aio_modload(struct module *module, int cmd, void *arg) 358 { 359 int error = 0; 360 361 switch (cmd) { 362 case MOD_LOAD: 363 aio_onceonly(); 364 break; 365 case MOD_SHUTDOWN: 366 break; 367 default: 368 error = EOPNOTSUPP; 369 break; 370 } 371 return (error); 372 } 373 374 static moduledata_t aio_mod = { 375 "aio", 376 &aio_modload, 377 NULL 378 }; 379 380 DECLARE_MODULE(aio, aio_mod, SI_SUB_VFS, SI_ORDER_ANY); 381 MODULE_VERSION(aio, 1); 382 383 /* 384 * Startup initialization 385 */ 386 static int 387 aio_onceonly(void) 388 { 389 390 exit_tag = EVENTHANDLER_REGISTER(process_exit, aio_proc_rundown, NULL, 391 EVENTHANDLER_PRI_ANY); 392 exec_tag = EVENTHANDLER_REGISTER(process_exec, aio_proc_rundown_exec, 393 NULL, EVENTHANDLER_PRI_ANY); 394 kqueue_add_filteropts(EVFILT_AIO, &aio_filtops); 395 kqueue_add_filteropts(EVFILT_LIO, &lio_filtops); 396 TAILQ_INIT(&aio_freeproc); 397 sema_init(&aio_newproc_sem, 0, "aio_new_proc"); 398 mtx_init(&aio_job_mtx, "aio_job", NULL, MTX_DEF); 399 TAILQ_INIT(&aio_jobs); 400 aiod_unr = new_unrhdr(1, INT_MAX, NULL); 401 kaio_zone = uma_zcreate("AIO", sizeof(struct kaioinfo), NULL, NULL, 402 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 403 aiop_zone = uma_zcreate("AIOP", sizeof(struct aioproc), NULL, 404 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 405 aiocb_zone = uma_zcreate("AIOCB", sizeof(struct kaiocb), NULL, NULL, 406 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 407 aiol_zone = uma_zcreate("AIOL", AIO_LISTIO_MAX*sizeof(intptr_t) , NULL, 408 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 409 aiolio_zone = uma_zcreate("AIOLIO", sizeof(struct aioliojob), NULL, 410 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 411 aiod_lifetime = AIOD_LIFETIME_DEFAULT; 412 jobrefid = 1; 413 p31b_setcfg(CTL_P1003_1B_ASYNCHRONOUS_IO, _POSIX_ASYNCHRONOUS_IO); 414 p31b_setcfg(CTL_P1003_1B_AIO_LISTIO_MAX, AIO_LISTIO_MAX); 415 p31b_setcfg(CTL_P1003_1B_AIO_MAX, MAX_AIO_QUEUE); 416 p31b_setcfg(CTL_P1003_1B_AIO_PRIO_DELTA_MAX, 0); 417 418 return (0); 419 } 420 421 /* 422 * Init the per-process aioinfo structure. The aioinfo limits are set 423 * per-process for user limit (resource) management. 424 */ 425 void 426 aio_init_aioinfo(struct proc *p) 427 { 428 struct kaioinfo *ki; 429 430 ki = uma_zalloc(kaio_zone, M_WAITOK); 431 mtx_init(&ki->kaio_mtx, "aiomtx", NULL, MTX_DEF | MTX_NEW); 432 ki->kaio_flags = 0; 433 ki->kaio_maxactive_count = max_aio_per_proc; 434 ki->kaio_active_count = 0; 435 ki->kaio_qallowed_count = max_aio_queue_per_proc; 436 ki->kaio_count = 0; 437 ki->kaio_ballowed_count = max_buf_aio; 438 ki->kaio_buffer_count = 0; 439 TAILQ_INIT(&ki->kaio_all); 440 TAILQ_INIT(&ki->kaio_done); 441 TAILQ_INIT(&ki->kaio_jobqueue); 442 TAILQ_INIT(&ki->kaio_liojoblist); 443 TAILQ_INIT(&ki->kaio_syncqueue); 444 TAILQ_INIT(&ki->kaio_syncready); 445 TASK_INIT(&ki->kaio_task, 0, aio_kick_helper, p); 446 TASK_INIT(&ki->kaio_sync_task, 0, aio_schedule_fsync, ki); 447 PROC_LOCK(p); 448 if (p->p_aioinfo == NULL) { 449 p->p_aioinfo = ki; 450 PROC_UNLOCK(p); 451 } else { 452 PROC_UNLOCK(p); 453 mtx_destroy(&ki->kaio_mtx); 454 uma_zfree(kaio_zone, ki); 455 } 456 457 while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) 458 aio_newproc(NULL); 459 } 460 461 static int 462 aio_sendsig(struct proc *p, struct sigevent *sigev, ksiginfo_t *ksi) 463 { 464 struct thread *td; 465 int error; 466 467 error = sigev_findtd(p, sigev, &td); 468 if (error) 469 return (error); 470 if (!KSI_ONQ(ksi)) { 471 ksiginfo_set_sigev(ksi, sigev); 472 ksi->ksi_code = SI_ASYNCIO; 473 ksi->ksi_flags |= KSI_EXT | KSI_INS; 474 tdsendsignal(p, td, ksi->ksi_signo, ksi); 475 } 476 PROC_UNLOCK(p); 477 return (error); 478 } 479 480 /* 481 * Free a job entry. Wait for completion if it is currently active, but don't 482 * delay forever. If we delay, we return a flag that says that we have to 483 * restart the queue scan. 484 */ 485 static int 486 aio_free_entry(struct kaiocb *job) 487 { 488 struct kaioinfo *ki; 489 struct aioliojob *lj; 490 struct proc *p; 491 492 p = job->userproc; 493 MPASS(curproc == p); 494 ki = p->p_aioinfo; 495 MPASS(ki != NULL); 496 497 AIO_LOCK_ASSERT(ki, MA_OWNED); 498 MPASS(job->jobflags & KAIOCB_FINISHED); 499 500 atomic_subtract_int(&num_queue_count, 1); 501 502 ki->kaio_count--; 503 MPASS(ki->kaio_count >= 0); 504 505 TAILQ_REMOVE(&ki->kaio_done, job, plist); 506 TAILQ_REMOVE(&ki->kaio_all, job, allist); 507 508 lj = job->lio; 509 if (lj) { 510 lj->lioj_count--; 511 lj->lioj_finished_count--; 512 513 if (lj->lioj_count == 0) { 514 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 515 /* lio is going away, we need to destroy any knotes */ 516 knlist_delete(&lj->klist, curthread, 1); 517 PROC_LOCK(p); 518 sigqueue_take(&lj->lioj_ksi); 519 PROC_UNLOCK(p); 520 uma_zfree(aiolio_zone, lj); 521 } 522 } 523 524 /* job is going away, we need to destroy any knotes */ 525 knlist_delete(&job->klist, curthread, 1); 526 PROC_LOCK(p); 527 sigqueue_take(&job->ksi); 528 PROC_UNLOCK(p); 529 530 AIO_UNLOCK(ki); 531 532 /* 533 * The thread argument here is used to find the owning process 534 * and is also passed to fo_close() which may pass it to various 535 * places such as devsw close() routines. Because of that, we 536 * need a thread pointer from the process owning the job that is 537 * persistent and won't disappear out from under us or move to 538 * another process. 539 * 540 * Currently, all the callers of this function call it to remove 541 * a kaiocb from the current process' job list either via a 542 * syscall or due to the current process calling exit() or 543 * execve(). Thus, we know that p == curproc. We also know that 544 * curthread can't exit since we are curthread. 545 * 546 * Therefore, we use curthread as the thread to pass to 547 * knlist_delete(). This does mean that it is possible for the 548 * thread pointer at close time to differ from the thread pointer 549 * at open time, but this is already true of file descriptors in 550 * a multithreaded process. 551 */ 552 if (job->fd_file) 553 fdrop(job->fd_file, curthread); 554 crfree(job->cred); 555 uma_zfree(aiocb_zone, job); 556 AIO_LOCK(ki); 557 558 return (0); 559 } 560 561 static void 562 aio_proc_rundown_exec(void *arg, struct proc *p, 563 struct image_params *imgp __unused) 564 { 565 aio_proc_rundown(arg, p); 566 } 567 568 static int 569 aio_cancel_job(struct proc *p, struct kaioinfo *ki, struct kaiocb *job) 570 { 571 aio_cancel_fn_t *func; 572 int cancelled; 573 574 AIO_LOCK_ASSERT(ki, MA_OWNED); 575 if (job->jobflags & (KAIOCB_CANCELLED | KAIOCB_FINISHED)) 576 return (0); 577 MPASS((job->jobflags & KAIOCB_CANCELLING) == 0); 578 job->jobflags |= KAIOCB_CANCELLED; 579 580 func = job->cancel_fn; 581 582 /* 583 * If there is no cancel routine, just leave the job marked as 584 * cancelled. The job should be in active use by a caller who 585 * should complete it normally or when it fails to install a 586 * cancel routine. 587 */ 588 if (func == NULL) 589 return (0); 590 591 /* 592 * Set the CANCELLING flag so that aio_complete() will defer 593 * completions of this job. This prevents the job from being 594 * freed out from under the cancel callback. After the 595 * callback any deferred completion (whether from the callback 596 * or any other source) will be completed. 597 */ 598 job->jobflags |= KAIOCB_CANCELLING; 599 AIO_UNLOCK(ki); 600 func(job); 601 AIO_LOCK(ki); 602 job->jobflags &= ~KAIOCB_CANCELLING; 603 if (job->jobflags & KAIOCB_FINISHED) { 604 cancelled = job->uaiocb._aiocb_private.error == ECANCELED; 605 TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist); 606 aio_bio_done_notify(p, job); 607 } else { 608 /* 609 * The cancel callback might have scheduled an 610 * operation to cancel this request, but it is 611 * only counted as cancelled if the request is 612 * cancelled when the callback returns. 613 */ 614 cancelled = 0; 615 } 616 return (cancelled); 617 } 618 619 /* 620 * Rundown the jobs for a given process. 621 */ 622 static void 623 aio_proc_rundown(void *arg, struct proc *p) 624 { 625 struct kaioinfo *ki; 626 struct aioliojob *lj; 627 struct kaiocb *job, *jobn; 628 629 KASSERT(curthread->td_proc == p, 630 ("%s: called on non-curproc", __func__)); 631 ki = p->p_aioinfo; 632 if (ki == NULL) 633 return; 634 635 AIO_LOCK(ki); 636 ki->kaio_flags |= KAIO_RUNDOWN; 637 638 restart: 639 640 /* 641 * Try to cancel all pending requests. This code simulates 642 * aio_cancel on all pending I/O requests. 643 */ 644 TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) { 645 aio_cancel_job(p, ki, job); 646 } 647 648 /* Wait for all running I/O to be finished */ 649 if (TAILQ_FIRST(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) { 650 ki->kaio_flags |= KAIO_WAKEUP; 651 msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz); 652 goto restart; 653 } 654 655 /* Free all completed I/O requests. */ 656 while ((job = TAILQ_FIRST(&ki->kaio_done)) != NULL) 657 aio_free_entry(job); 658 659 while ((lj = TAILQ_FIRST(&ki->kaio_liojoblist)) != NULL) { 660 if (lj->lioj_count == 0) { 661 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 662 knlist_delete(&lj->klist, curthread, 1); 663 PROC_LOCK(p); 664 sigqueue_take(&lj->lioj_ksi); 665 PROC_UNLOCK(p); 666 uma_zfree(aiolio_zone, lj); 667 } else { 668 panic("LIO job not cleaned up: C:%d, FC:%d\n", 669 lj->lioj_count, lj->lioj_finished_count); 670 } 671 } 672 AIO_UNLOCK(ki); 673 taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_task); 674 taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_sync_task); 675 mtx_destroy(&ki->kaio_mtx); 676 uma_zfree(kaio_zone, ki); 677 p->p_aioinfo = NULL; 678 } 679 680 /* 681 * Select a job to run (called by an AIO daemon). 682 */ 683 static struct kaiocb * 684 aio_selectjob(struct aioproc *aiop) 685 { 686 struct kaiocb *job; 687 struct kaioinfo *ki; 688 struct proc *userp; 689 690 mtx_assert(&aio_job_mtx, MA_OWNED); 691 restart: 692 TAILQ_FOREACH(job, &aio_jobs, list) { 693 userp = job->userproc; 694 ki = userp->p_aioinfo; 695 696 if (ki->kaio_active_count < ki->kaio_maxactive_count) { 697 TAILQ_REMOVE(&aio_jobs, job, list); 698 if (!aio_clear_cancel_function(job)) 699 goto restart; 700 701 /* Account for currently active jobs. */ 702 ki->kaio_active_count++; 703 break; 704 } 705 } 706 return (job); 707 } 708 709 /* 710 * Move all data to a permanent storage device. This code 711 * simulates the fsync syscall. 712 */ 713 static int 714 aio_fsync_vnode(struct thread *td, struct vnode *vp) 715 { 716 struct mount *mp; 717 int error; 718 719 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 720 goto drop; 721 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 722 if (vp->v_object != NULL) { 723 VM_OBJECT_WLOCK(vp->v_object); 724 vm_object_page_clean(vp->v_object, 0, 0, 0); 725 VM_OBJECT_WUNLOCK(vp->v_object); 726 } 727 error = VOP_FSYNC(vp, MNT_WAIT, td); 728 729 VOP_UNLOCK(vp, 0); 730 vn_finished_write(mp); 731 drop: 732 return (error); 733 } 734 735 /* 736 * The AIO processing activity for LIO_READ/LIO_WRITE. This is the code that 737 * does the I/O request for the non-physio version of the operations. The 738 * normal vn operations are used, and this code should work in all instances 739 * for every type of file, including pipes, sockets, fifos, and regular files. 740 * 741 * XXX I don't think it works well for socket, pipe, and fifo. 742 */ 743 static void 744 aio_process_rw(struct kaiocb *job) 745 { 746 struct ucred *td_savedcred; 747 struct thread *td; 748 struct aiocb *cb; 749 struct file *fp; 750 struct uio auio; 751 struct iovec aiov; 752 ssize_t cnt; 753 long msgsnd_st, msgsnd_end; 754 long msgrcv_st, msgrcv_end; 755 long oublock_st, oublock_end; 756 long inblock_st, inblock_end; 757 int error; 758 759 KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ || 760 job->uaiocb.aio_lio_opcode == LIO_WRITE, 761 ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); 762 763 aio_switch_vmspace(job); 764 td = curthread; 765 td_savedcred = td->td_ucred; 766 td->td_ucred = job->cred; 767 cb = &job->uaiocb; 768 fp = job->fd_file; 769 770 aiov.iov_base = (void *)(uintptr_t)cb->aio_buf; 771 aiov.iov_len = cb->aio_nbytes; 772 773 auio.uio_iov = &aiov; 774 auio.uio_iovcnt = 1; 775 auio.uio_offset = cb->aio_offset; 776 auio.uio_resid = cb->aio_nbytes; 777 cnt = cb->aio_nbytes; 778 auio.uio_segflg = UIO_USERSPACE; 779 auio.uio_td = td; 780 781 msgrcv_st = td->td_ru.ru_msgrcv; 782 msgsnd_st = td->td_ru.ru_msgsnd; 783 inblock_st = td->td_ru.ru_inblock; 784 oublock_st = td->td_ru.ru_oublock; 785 786 /* 787 * aio_aqueue() acquires a reference to the file that is 788 * released in aio_free_entry(). 789 */ 790 if (cb->aio_lio_opcode == LIO_READ) { 791 auio.uio_rw = UIO_READ; 792 if (auio.uio_resid == 0) 793 error = 0; 794 else 795 error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td); 796 } else { 797 if (fp->f_type == DTYPE_VNODE) 798 bwillwrite(); 799 auio.uio_rw = UIO_WRITE; 800 error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); 801 } 802 msgrcv_end = td->td_ru.ru_msgrcv; 803 msgsnd_end = td->td_ru.ru_msgsnd; 804 inblock_end = td->td_ru.ru_inblock; 805 oublock_end = td->td_ru.ru_oublock; 806 807 job->msgrcv = msgrcv_end - msgrcv_st; 808 job->msgsnd = msgsnd_end - msgsnd_st; 809 job->inblock = inblock_end - inblock_st; 810 job->outblock = oublock_end - oublock_st; 811 812 if ((error) && (auio.uio_resid != cnt)) { 813 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 814 error = 0; 815 if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) { 816 PROC_LOCK(job->userproc); 817 kern_psignal(job->userproc, SIGPIPE); 818 PROC_UNLOCK(job->userproc); 819 } 820 } 821 822 cnt -= auio.uio_resid; 823 td->td_ucred = td_savedcred; 824 if (error) 825 aio_complete(job, -1, error); 826 else 827 aio_complete(job, cnt, 0); 828 } 829 830 static void 831 aio_process_sync(struct kaiocb *job) 832 { 833 struct thread *td = curthread; 834 struct ucred *td_savedcred = td->td_ucred; 835 struct file *fp = job->fd_file; 836 int error = 0; 837 838 KASSERT(job->uaiocb.aio_lio_opcode == LIO_SYNC, 839 ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); 840 841 td->td_ucred = job->cred; 842 if (fp->f_vnode != NULL) 843 error = aio_fsync_vnode(td, fp->f_vnode); 844 td->td_ucred = td_savedcred; 845 if (error) 846 aio_complete(job, -1, error); 847 else 848 aio_complete(job, 0, 0); 849 } 850 851 static void 852 aio_process_mlock(struct kaiocb *job) 853 { 854 struct aiocb *cb = &job->uaiocb; 855 int error; 856 857 KASSERT(job->uaiocb.aio_lio_opcode == LIO_MLOCK, 858 ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); 859 860 aio_switch_vmspace(job); 861 error = vm_mlock(job->userproc, job->cred, 862 __DEVOLATILE(void *, cb->aio_buf), cb->aio_nbytes); 863 if (error) 864 aio_complete(job, -1, error); 865 else 866 aio_complete(job, 0, 0); 867 } 868 869 static void 870 aio_bio_done_notify(struct proc *userp, struct kaiocb *job) 871 { 872 struct aioliojob *lj; 873 struct kaioinfo *ki; 874 struct kaiocb *sjob, *sjobn; 875 int lj_done; 876 bool schedule_fsync; 877 878 ki = userp->p_aioinfo; 879 AIO_LOCK_ASSERT(ki, MA_OWNED); 880 lj = job->lio; 881 lj_done = 0; 882 if (lj) { 883 lj->lioj_finished_count++; 884 if (lj->lioj_count == lj->lioj_finished_count) 885 lj_done = 1; 886 } 887 TAILQ_INSERT_TAIL(&ki->kaio_done, job, plist); 888 MPASS(job->jobflags & KAIOCB_FINISHED); 889 890 if (ki->kaio_flags & KAIO_RUNDOWN) 891 goto notification_done; 892 893 if (job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || 894 job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) 895 aio_sendsig(userp, &job->uaiocb.aio_sigevent, &job->ksi); 896 897 KNOTE_LOCKED(&job->klist, 1); 898 899 if (lj_done) { 900 if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { 901 lj->lioj_flags |= LIOJ_KEVENT_POSTED; 902 KNOTE_LOCKED(&lj->klist, 1); 903 } 904 if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) 905 == LIOJ_SIGNAL 906 && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || 907 lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) { 908 aio_sendsig(userp, &lj->lioj_signal, &lj->lioj_ksi); 909 lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 910 } 911 } 912 913 notification_done: 914 if (job->jobflags & KAIOCB_CHECKSYNC) { 915 schedule_fsync = false; 916 TAILQ_FOREACH_SAFE(sjob, &ki->kaio_syncqueue, list, sjobn) { 917 if (job->fd_file != sjob->fd_file || 918 job->seqno >= sjob->seqno) 919 continue; 920 if (--sjob->pending > 0) 921 continue; 922 TAILQ_REMOVE(&ki->kaio_syncqueue, sjob, list); 923 if (!aio_clear_cancel_function_locked(sjob)) 924 continue; 925 TAILQ_INSERT_TAIL(&ki->kaio_syncready, sjob, list); 926 schedule_fsync = true; 927 } 928 if (schedule_fsync) 929 taskqueue_enqueue(taskqueue_aiod_kick, 930 &ki->kaio_sync_task); 931 } 932 if (ki->kaio_flags & KAIO_WAKEUP) { 933 ki->kaio_flags &= ~KAIO_WAKEUP; 934 wakeup(&userp->p_aioinfo); 935 } 936 } 937 938 static void 939 aio_schedule_fsync(void *context, int pending) 940 { 941 struct kaioinfo *ki; 942 struct kaiocb *job; 943 944 ki = context; 945 AIO_LOCK(ki); 946 while (!TAILQ_EMPTY(&ki->kaio_syncready)) { 947 job = TAILQ_FIRST(&ki->kaio_syncready); 948 TAILQ_REMOVE(&ki->kaio_syncready, job, list); 949 AIO_UNLOCK(ki); 950 aio_schedule(job, aio_process_sync); 951 AIO_LOCK(ki); 952 } 953 AIO_UNLOCK(ki); 954 } 955 956 bool 957 aio_cancel_cleared(struct kaiocb *job) 958 { 959 struct kaioinfo *ki; 960 961 /* 962 * The caller should hold the same queue lock held when 963 * aio_clear_cancel_function() was called and set this flag 964 * ensuring this check sees an up-to-date value. However, 965 * there is no way to assert that. 966 */ 967 ki = job->userproc->p_aioinfo; 968 return ((job->jobflags & KAIOCB_CLEARED) != 0); 969 } 970 971 static bool 972 aio_clear_cancel_function_locked(struct kaiocb *job) 973 { 974 975 AIO_LOCK_ASSERT(job->userproc->p_aioinfo, MA_OWNED); 976 MPASS(job->cancel_fn != NULL); 977 if (job->jobflags & KAIOCB_CANCELLING) { 978 job->jobflags |= KAIOCB_CLEARED; 979 return (false); 980 } 981 job->cancel_fn = NULL; 982 return (true); 983 } 984 985 bool 986 aio_clear_cancel_function(struct kaiocb *job) 987 { 988 struct kaioinfo *ki; 989 bool ret; 990 991 ki = job->userproc->p_aioinfo; 992 AIO_LOCK(ki); 993 ret = aio_clear_cancel_function_locked(job); 994 AIO_UNLOCK(ki); 995 return (ret); 996 } 997 998 static bool 999 aio_set_cancel_function_locked(struct kaiocb *job, aio_cancel_fn_t *func) 1000 { 1001 1002 AIO_LOCK_ASSERT(job->userproc->p_aioinfo, MA_OWNED); 1003 if (job->jobflags & KAIOCB_CANCELLED) 1004 return (false); 1005 job->cancel_fn = func; 1006 return (true); 1007 } 1008 1009 bool 1010 aio_set_cancel_function(struct kaiocb *job, aio_cancel_fn_t *func) 1011 { 1012 struct kaioinfo *ki; 1013 bool ret; 1014 1015 ki = job->userproc->p_aioinfo; 1016 AIO_LOCK(ki); 1017 ret = aio_set_cancel_function_locked(job, func); 1018 AIO_UNLOCK(ki); 1019 return (ret); 1020 } 1021 1022 void 1023 aio_complete(struct kaiocb *job, long status, int error) 1024 { 1025 struct kaioinfo *ki; 1026 struct proc *userp; 1027 1028 job->uaiocb._aiocb_private.error = error; 1029 job->uaiocb._aiocb_private.status = status; 1030 1031 userp = job->userproc; 1032 ki = userp->p_aioinfo; 1033 1034 AIO_LOCK(ki); 1035 KASSERT(!(job->jobflags & KAIOCB_FINISHED), 1036 ("duplicate aio_complete")); 1037 job->jobflags |= KAIOCB_FINISHED; 1038 if ((job->jobflags & (KAIOCB_QUEUEING | KAIOCB_CANCELLING)) == 0) { 1039 TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist); 1040 aio_bio_done_notify(userp, job); 1041 } 1042 AIO_UNLOCK(ki); 1043 } 1044 1045 void 1046 aio_cancel(struct kaiocb *job) 1047 { 1048 1049 aio_complete(job, -1, ECANCELED); 1050 } 1051 1052 void 1053 aio_switch_vmspace(struct kaiocb *job) 1054 { 1055 1056 vmspace_switch_aio(job->userproc->p_vmspace); 1057 } 1058 1059 /* 1060 * The AIO daemon, most of the actual work is done in aio_process_*, 1061 * but the setup (and address space mgmt) is done in this routine. 1062 */ 1063 static void 1064 aio_daemon(void *_id) 1065 { 1066 struct kaiocb *job; 1067 struct aioproc *aiop; 1068 struct kaioinfo *ki; 1069 struct proc *p; 1070 struct vmspace *myvm; 1071 struct thread *td = curthread; 1072 int id = (intptr_t)_id; 1073 1074 /* 1075 * Grab an extra reference on the daemon's vmspace so that it 1076 * doesn't get freed by jobs that switch to a different 1077 * vmspace. 1078 */ 1079 p = td->td_proc; 1080 myvm = vmspace_acquire_ref(p); 1081 1082 KASSERT(p->p_textvp == NULL, ("kthread has a textvp")); 1083 1084 /* 1085 * Allocate and ready the aio control info. There is one aiop structure 1086 * per daemon. 1087 */ 1088 aiop = uma_zalloc(aiop_zone, M_WAITOK); 1089 aiop->aioproc = p; 1090 aiop->aioprocflags = 0; 1091 1092 /* 1093 * Wakeup parent process. (Parent sleeps to keep from blasting away 1094 * and creating too many daemons.) 1095 */ 1096 sema_post(&aio_newproc_sem); 1097 1098 mtx_lock(&aio_job_mtx); 1099 for (;;) { 1100 /* 1101 * Take daemon off of free queue 1102 */ 1103 if (aiop->aioprocflags & AIOP_FREE) { 1104 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1105 aiop->aioprocflags &= ~AIOP_FREE; 1106 } 1107 1108 /* 1109 * Check for jobs. 1110 */ 1111 while ((job = aio_selectjob(aiop)) != NULL) { 1112 mtx_unlock(&aio_job_mtx); 1113 1114 ki = job->userproc->p_aioinfo; 1115 job->handle_fn(job); 1116 1117 mtx_lock(&aio_job_mtx); 1118 /* Decrement the active job count. */ 1119 ki->kaio_active_count--; 1120 } 1121 1122 /* 1123 * Disconnect from user address space. 1124 */ 1125 if (p->p_vmspace != myvm) { 1126 mtx_unlock(&aio_job_mtx); 1127 vmspace_switch_aio(myvm); 1128 mtx_lock(&aio_job_mtx); 1129 /* 1130 * We have to restart to avoid race, we only sleep if 1131 * no job can be selected. 1132 */ 1133 continue; 1134 } 1135 1136 mtx_assert(&aio_job_mtx, MA_OWNED); 1137 1138 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 1139 aiop->aioprocflags |= AIOP_FREE; 1140 1141 /* 1142 * If daemon is inactive for a long time, allow it to exit, 1143 * thereby freeing resources. 1144 */ 1145 if (msleep(p, &aio_job_mtx, PRIBIO, "aiordy", 1146 aiod_lifetime) == EWOULDBLOCK && TAILQ_EMPTY(&aio_jobs) && 1147 (aiop->aioprocflags & AIOP_FREE) && 1148 num_aio_procs > target_aio_procs) 1149 break; 1150 } 1151 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1152 num_aio_procs--; 1153 mtx_unlock(&aio_job_mtx); 1154 uma_zfree(aiop_zone, aiop); 1155 free_unr(aiod_unr, id); 1156 vmspace_free(myvm); 1157 1158 KASSERT(p->p_vmspace == myvm, 1159 ("AIOD: bad vmspace for exiting daemon")); 1160 KASSERT(myvm->vm_refcnt > 1, 1161 ("AIOD: bad vm refcnt for exiting daemon: %d", myvm->vm_refcnt)); 1162 kproc_exit(0); 1163 } 1164 1165 /* 1166 * Create a new AIO daemon. This is mostly a kernel-thread fork routine. The 1167 * AIO daemon modifies its environment itself. 1168 */ 1169 static int 1170 aio_newproc(int *start) 1171 { 1172 int error; 1173 struct proc *p; 1174 int id; 1175 1176 id = alloc_unr(aiod_unr); 1177 error = kproc_create(aio_daemon, (void *)(intptr_t)id, &p, 1178 RFNOWAIT, 0, "aiod%d", id); 1179 if (error == 0) { 1180 /* 1181 * Wait until daemon is started. 1182 */ 1183 sema_wait(&aio_newproc_sem); 1184 mtx_lock(&aio_job_mtx); 1185 num_aio_procs++; 1186 if (start != NULL) 1187 (*start)--; 1188 mtx_unlock(&aio_job_mtx); 1189 } else { 1190 free_unr(aiod_unr, id); 1191 } 1192 return (error); 1193 } 1194 1195 /* 1196 * Try the high-performance, low-overhead physio method for eligible 1197 * VCHR devices. This method doesn't use an aio helper thread, and 1198 * thus has very low overhead. 1199 * 1200 * Assumes that the caller, aio_aqueue(), has incremented the file 1201 * structure's reference count, preventing its deallocation for the 1202 * duration of this call. 1203 */ 1204 static int 1205 aio_qphysio(struct proc *p, struct kaiocb *job) 1206 { 1207 struct aiocb *cb; 1208 struct file *fp; 1209 struct bio *bp; 1210 struct buf *pbuf; 1211 struct vnode *vp; 1212 struct cdevsw *csw; 1213 struct cdev *dev; 1214 struct kaioinfo *ki; 1215 int error, ref, poff; 1216 vm_prot_t prot; 1217 1218 cb = &job->uaiocb; 1219 fp = job->fd_file; 1220 1221 if (fp == NULL || fp->f_type != DTYPE_VNODE) 1222 return (-1); 1223 1224 vp = fp->f_vnode; 1225 if (vp->v_type != VCHR) 1226 return (-1); 1227 if (vp->v_bufobj.bo_bsize == 0) 1228 return (-1); 1229 if (cb->aio_nbytes % vp->v_bufobj.bo_bsize) 1230 return (-1); 1231 1232 ref = 0; 1233 csw = devvn_refthread(vp, &dev, &ref); 1234 if (csw == NULL) 1235 return (ENXIO); 1236 1237 if ((csw->d_flags & D_DISK) == 0) { 1238 error = -1; 1239 goto unref; 1240 } 1241 if (cb->aio_nbytes > dev->si_iosize_max) { 1242 error = -1; 1243 goto unref; 1244 } 1245 1246 ki = p->p_aioinfo; 1247 poff = (vm_offset_t)cb->aio_buf & PAGE_MASK; 1248 if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { 1249 if (cb->aio_nbytes > MAXPHYS) { 1250 error = -1; 1251 goto unref; 1252 } 1253 1254 pbuf = NULL; 1255 } else { 1256 if (cb->aio_nbytes > MAXPHYS - poff) { 1257 error = -1; 1258 goto unref; 1259 } 1260 if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) { 1261 error = -1; 1262 goto unref; 1263 } 1264 1265 job->pbuf = pbuf = (struct buf *)getpbuf(NULL); 1266 BUF_KERNPROC(pbuf); 1267 AIO_LOCK(ki); 1268 ki->kaio_buffer_count++; 1269 AIO_UNLOCK(ki); 1270 } 1271 job->bp = bp = g_alloc_bio(); 1272 1273 bp->bio_length = cb->aio_nbytes; 1274 bp->bio_bcount = cb->aio_nbytes; 1275 bp->bio_done = aio_physwakeup; 1276 bp->bio_data = (void *)(uintptr_t)cb->aio_buf; 1277 bp->bio_offset = cb->aio_offset; 1278 bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; 1279 bp->bio_dev = dev; 1280 bp->bio_caller1 = (void *)job; 1281 1282 prot = VM_PROT_READ; 1283 if (cb->aio_lio_opcode == LIO_READ) 1284 prot |= VM_PROT_WRITE; /* Less backwards than it looks */ 1285 job->npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 1286 (vm_offset_t)bp->bio_data, bp->bio_length, prot, job->pages, 1287 nitems(job->pages)); 1288 if (job->npages < 0) { 1289 error = EFAULT; 1290 goto doerror; 1291 } 1292 if (pbuf != NULL) { 1293 pmap_qenter((vm_offset_t)pbuf->b_data, 1294 job->pages, job->npages); 1295 bp->bio_data = pbuf->b_data + poff; 1296 atomic_add_int(&num_buf_aio, 1); 1297 } else { 1298 bp->bio_ma = job->pages; 1299 bp->bio_ma_n = job->npages; 1300 bp->bio_ma_offset = poff; 1301 bp->bio_data = unmapped_buf; 1302 bp->bio_flags |= BIO_UNMAPPED; 1303 } 1304 1305 /* Perform transfer. */ 1306 csw->d_strategy(bp); 1307 dev_relthread(dev, ref); 1308 return (0); 1309 1310 doerror: 1311 if (pbuf != NULL) { 1312 AIO_LOCK(ki); 1313 ki->kaio_buffer_count--; 1314 AIO_UNLOCK(ki); 1315 relpbuf(pbuf, NULL); 1316 job->pbuf = NULL; 1317 } 1318 g_destroy_bio(bp); 1319 job->bp = NULL; 1320 unref: 1321 dev_relthread(dev, ref); 1322 return (error); 1323 } 1324 1325 #ifdef COMPAT_FREEBSD6 1326 static int 1327 convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) 1328 { 1329 1330 /* 1331 * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are 1332 * supported by AIO with the old sigevent structure. 1333 */ 1334 nsig->sigev_notify = osig->sigev_notify; 1335 switch (nsig->sigev_notify) { 1336 case SIGEV_NONE: 1337 break; 1338 case SIGEV_SIGNAL: 1339 nsig->sigev_signo = osig->__sigev_u.__sigev_signo; 1340 break; 1341 case SIGEV_KEVENT: 1342 nsig->sigev_notify_kqueue = 1343 osig->__sigev_u.__sigev_notify_kqueue; 1344 nsig->sigev_value.sival_ptr = osig->sigev_value.sival_ptr; 1345 break; 1346 default: 1347 return (EINVAL); 1348 } 1349 return (0); 1350 } 1351 1352 static int 1353 aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) 1354 { 1355 struct oaiocb *ojob; 1356 int error; 1357 1358 bzero(kjob, sizeof(struct aiocb)); 1359 error = copyin(ujob, kjob, sizeof(struct oaiocb)); 1360 if (error) 1361 return (error); 1362 ojob = (struct oaiocb *)kjob; 1363 return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent)); 1364 } 1365 #endif 1366 1367 static int 1368 aiocb_copyin(struct aiocb *ujob, struct aiocb *kjob) 1369 { 1370 1371 return (copyin(ujob, kjob, sizeof(struct aiocb))); 1372 } 1373 1374 static long 1375 aiocb_fetch_status(struct aiocb *ujob) 1376 { 1377 1378 return (fuword(&ujob->_aiocb_private.status)); 1379 } 1380 1381 static long 1382 aiocb_fetch_error(struct aiocb *ujob) 1383 { 1384 1385 return (fuword(&ujob->_aiocb_private.error)); 1386 } 1387 1388 static int 1389 aiocb_store_status(struct aiocb *ujob, long status) 1390 { 1391 1392 return (suword(&ujob->_aiocb_private.status, status)); 1393 } 1394 1395 static int 1396 aiocb_store_error(struct aiocb *ujob, long error) 1397 { 1398 1399 return (suword(&ujob->_aiocb_private.error, error)); 1400 } 1401 1402 static int 1403 aiocb_store_kernelinfo(struct aiocb *ujob, long jobref) 1404 { 1405 1406 return (suword(&ujob->_aiocb_private.kernelinfo, jobref)); 1407 } 1408 1409 static int 1410 aiocb_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) 1411 { 1412 1413 return (suword(ujobp, (long)ujob)); 1414 } 1415 1416 static struct aiocb_ops aiocb_ops = { 1417 .copyin = aiocb_copyin, 1418 .fetch_status = aiocb_fetch_status, 1419 .fetch_error = aiocb_fetch_error, 1420 .store_status = aiocb_store_status, 1421 .store_error = aiocb_store_error, 1422 .store_kernelinfo = aiocb_store_kernelinfo, 1423 .store_aiocb = aiocb_store_aiocb, 1424 }; 1425 1426 #ifdef COMPAT_FREEBSD6 1427 static struct aiocb_ops aiocb_ops_osigevent = { 1428 .copyin = aiocb_copyin_old_sigevent, 1429 .fetch_status = aiocb_fetch_status, 1430 .fetch_error = aiocb_fetch_error, 1431 .store_status = aiocb_store_status, 1432 .store_error = aiocb_store_error, 1433 .store_kernelinfo = aiocb_store_kernelinfo, 1434 .store_aiocb = aiocb_store_aiocb, 1435 }; 1436 #endif 1437 1438 /* 1439 * Queue a new AIO request. Choosing either the threaded or direct physio VCHR 1440 * technique is done in this code. 1441 */ 1442 int 1443 aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, 1444 int type, struct aiocb_ops *ops) 1445 { 1446 struct proc *p = td->td_proc; 1447 cap_rights_t rights; 1448 struct file *fp; 1449 struct kaiocb *job; 1450 struct kaioinfo *ki; 1451 struct kevent kev; 1452 int opcode; 1453 int error; 1454 int fd, kqfd; 1455 int jid; 1456 u_short evflags; 1457 1458 if (p->p_aioinfo == NULL) 1459 aio_init_aioinfo(p); 1460 1461 ki = p->p_aioinfo; 1462 1463 ops->store_status(ujob, -1); 1464 ops->store_error(ujob, 0); 1465 ops->store_kernelinfo(ujob, -1); 1466 1467 if (num_queue_count >= max_queue_count || 1468 ki->kaio_count >= ki->kaio_qallowed_count) { 1469 ops->store_error(ujob, EAGAIN); 1470 return (EAGAIN); 1471 } 1472 1473 job = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO); 1474 knlist_init_mtx(&job->klist, AIO_MTX(ki)); 1475 1476 error = ops->copyin(ujob, &job->uaiocb); 1477 if (error) { 1478 ops->store_error(ujob, error); 1479 uma_zfree(aiocb_zone, job); 1480 return (error); 1481 } 1482 1483 if (job->uaiocb.aio_nbytes > IOSIZE_MAX) { 1484 uma_zfree(aiocb_zone, job); 1485 return (EINVAL); 1486 } 1487 1488 if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT && 1489 job->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL && 1490 job->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID && 1491 job->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) { 1492 ops->store_error(ujob, EINVAL); 1493 uma_zfree(aiocb_zone, job); 1494 return (EINVAL); 1495 } 1496 1497 if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || 1498 job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) && 1499 !_SIG_VALID(job->uaiocb.aio_sigevent.sigev_signo)) { 1500 uma_zfree(aiocb_zone, job); 1501 return (EINVAL); 1502 } 1503 1504 ksiginfo_init(&job->ksi); 1505 1506 /* Save userspace address of the job info. */ 1507 job->ujob = ujob; 1508 1509 /* Get the opcode. */ 1510 if (type != LIO_NOP) 1511 job->uaiocb.aio_lio_opcode = type; 1512 opcode = job->uaiocb.aio_lio_opcode; 1513 1514 /* 1515 * Validate the opcode and fetch the file object for the specified 1516 * file descriptor. 1517 * 1518 * XXXRW: Moved the opcode validation up here so that we don't 1519 * retrieve a file descriptor without knowing what the capabiltity 1520 * should be. 1521 */ 1522 fd = job->uaiocb.aio_fildes; 1523 switch (opcode) { 1524 case LIO_WRITE: 1525 error = fget_write(td, fd, 1526 cap_rights_init(&rights, CAP_PWRITE), &fp); 1527 break; 1528 case LIO_READ: 1529 error = fget_read(td, fd, 1530 cap_rights_init(&rights, CAP_PREAD), &fp); 1531 break; 1532 case LIO_SYNC: 1533 error = fget(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 1534 break; 1535 case LIO_MLOCK: 1536 fp = NULL; 1537 break; 1538 case LIO_NOP: 1539 error = fget(td, fd, cap_rights_init(&rights), &fp); 1540 break; 1541 default: 1542 error = EINVAL; 1543 } 1544 if (error) { 1545 uma_zfree(aiocb_zone, job); 1546 ops->store_error(ujob, error); 1547 return (error); 1548 } 1549 1550 if (opcode == LIO_SYNC && fp->f_vnode == NULL) { 1551 error = EINVAL; 1552 goto aqueue_fail; 1553 } 1554 1555 if (opcode != LIO_SYNC && job->uaiocb.aio_offset == -1LL) { 1556 error = EINVAL; 1557 goto aqueue_fail; 1558 } 1559 1560 job->fd_file = fp; 1561 1562 mtx_lock(&aio_job_mtx); 1563 jid = jobrefid++; 1564 job->seqno = jobseqno++; 1565 mtx_unlock(&aio_job_mtx); 1566 error = ops->store_kernelinfo(ujob, jid); 1567 if (error) { 1568 error = EINVAL; 1569 goto aqueue_fail; 1570 } 1571 job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid; 1572 1573 if (opcode == LIO_NOP) { 1574 fdrop(fp, td); 1575 uma_zfree(aiocb_zone, job); 1576 return (0); 1577 } 1578 1579 if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT) 1580 goto no_kqueue; 1581 evflags = job->uaiocb.aio_sigevent.sigev_notify_kevent_flags; 1582 if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) { 1583 error = EINVAL; 1584 goto aqueue_fail; 1585 } 1586 kqfd = job->uaiocb.aio_sigevent.sigev_notify_kqueue; 1587 kev.ident = (uintptr_t)job->ujob; 1588 kev.filter = EVFILT_AIO; 1589 kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | evflags; 1590 kev.data = (intptr_t)job; 1591 kev.udata = job->uaiocb.aio_sigevent.sigev_value.sival_ptr; 1592 error = kqfd_register(kqfd, &kev, td, 1); 1593 if (error) 1594 goto aqueue_fail; 1595 1596 no_kqueue: 1597 1598 ops->store_error(ujob, EINPROGRESS); 1599 job->uaiocb._aiocb_private.error = EINPROGRESS; 1600 job->userproc = p; 1601 job->cred = crhold(td->td_ucred); 1602 job->jobflags = KAIOCB_QUEUEING; 1603 job->lio = lj; 1604 1605 if (opcode == LIO_MLOCK) { 1606 aio_schedule(job, aio_process_mlock); 1607 error = 0; 1608 } else if (fp->f_ops->fo_aio_queue == NULL) 1609 error = aio_queue_file(fp, job); 1610 else 1611 error = fo_aio_queue(fp, job); 1612 if (error) 1613 goto aqueue_fail; 1614 1615 AIO_LOCK(ki); 1616 job->jobflags &= ~KAIOCB_QUEUEING; 1617 TAILQ_INSERT_TAIL(&ki->kaio_all, job, allist); 1618 ki->kaio_count++; 1619 if (lj) 1620 lj->lioj_count++; 1621 atomic_add_int(&num_queue_count, 1); 1622 if (job->jobflags & KAIOCB_FINISHED) { 1623 /* 1624 * The queue callback completed the request synchronously. 1625 * The bulk of the completion is deferred in that case 1626 * until this point. 1627 */ 1628 aio_bio_done_notify(p, job); 1629 } else 1630 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, job, plist); 1631 AIO_UNLOCK(ki); 1632 return (0); 1633 1634 aqueue_fail: 1635 knlist_delete(&job->klist, curthread, 0); 1636 if (fp) 1637 fdrop(fp, td); 1638 uma_zfree(aiocb_zone, job); 1639 ops->store_error(ujob, error); 1640 return (error); 1641 } 1642 1643 static void 1644 aio_cancel_daemon_job(struct kaiocb *job) 1645 { 1646 1647 mtx_lock(&aio_job_mtx); 1648 if (!aio_cancel_cleared(job)) 1649 TAILQ_REMOVE(&aio_jobs, job, list); 1650 mtx_unlock(&aio_job_mtx); 1651 aio_cancel(job); 1652 } 1653 1654 void 1655 aio_schedule(struct kaiocb *job, aio_handle_fn_t *func) 1656 { 1657 1658 mtx_lock(&aio_job_mtx); 1659 if (!aio_set_cancel_function(job, aio_cancel_daemon_job)) { 1660 mtx_unlock(&aio_job_mtx); 1661 aio_cancel(job); 1662 return; 1663 } 1664 job->handle_fn = func; 1665 TAILQ_INSERT_TAIL(&aio_jobs, job, list); 1666 aio_kick_nowait(job->userproc); 1667 mtx_unlock(&aio_job_mtx); 1668 } 1669 1670 static void 1671 aio_cancel_sync(struct kaiocb *job) 1672 { 1673 struct kaioinfo *ki; 1674 1675 ki = job->userproc->p_aioinfo; 1676 AIO_LOCK(ki); 1677 if (!aio_cancel_cleared(job)) 1678 TAILQ_REMOVE(&ki->kaio_syncqueue, job, list); 1679 AIO_UNLOCK(ki); 1680 aio_cancel(job); 1681 } 1682 1683 int 1684 aio_queue_file(struct file *fp, struct kaiocb *job) 1685 { 1686 struct aioliojob *lj; 1687 struct kaioinfo *ki; 1688 struct kaiocb *job2; 1689 struct vnode *vp; 1690 struct mount *mp; 1691 int error, opcode; 1692 bool safe; 1693 1694 lj = job->lio; 1695 ki = job->userproc->p_aioinfo; 1696 opcode = job->uaiocb.aio_lio_opcode; 1697 if (opcode == LIO_SYNC) 1698 goto queueit; 1699 1700 if ((error = aio_qphysio(job->userproc, job)) == 0) 1701 goto done; 1702 #if 0 1703 /* 1704 * XXX: This means qphysio() failed with EFAULT. The current 1705 * behavior is to retry the operation via fo_read/fo_write. 1706 * Wouldn't it be better to just complete the request with an 1707 * error here? 1708 */ 1709 if (error > 0) 1710 goto done; 1711 #endif 1712 queueit: 1713 safe = false; 1714 if (fp->f_type == DTYPE_VNODE) { 1715 vp = fp->f_vnode; 1716 if (vp->v_type == VREG || vp->v_type == VDIR) { 1717 mp = fp->f_vnode->v_mount; 1718 if (mp == NULL || (mp->mnt_flag & MNT_LOCAL) != 0) 1719 safe = true; 1720 } 1721 } 1722 if (!(safe || enable_aio_unsafe)) { 1723 counted_warning(&unsafe_warningcnt, 1724 "is attempting to use unsafe AIO requests"); 1725 return (EOPNOTSUPP); 1726 } 1727 1728 if (opcode == LIO_SYNC) { 1729 AIO_LOCK(ki); 1730 TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) { 1731 if (job2->fd_file == job->fd_file && 1732 job2->uaiocb.aio_lio_opcode != LIO_SYNC && 1733 job2->seqno < job->seqno) { 1734 job2->jobflags |= KAIOCB_CHECKSYNC; 1735 job->pending++; 1736 } 1737 } 1738 if (job->pending != 0) { 1739 if (!aio_set_cancel_function_locked(job, 1740 aio_cancel_sync)) { 1741 AIO_UNLOCK(ki); 1742 aio_cancel(job); 1743 return (0); 1744 } 1745 TAILQ_INSERT_TAIL(&ki->kaio_syncqueue, job, list); 1746 AIO_UNLOCK(ki); 1747 return (0); 1748 } 1749 AIO_UNLOCK(ki); 1750 } 1751 1752 switch (opcode) { 1753 case LIO_READ: 1754 case LIO_WRITE: 1755 aio_schedule(job, aio_process_rw); 1756 error = 0; 1757 break; 1758 case LIO_SYNC: 1759 aio_schedule(job, aio_process_sync); 1760 error = 0; 1761 break; 1762 default: 1763 error = EINVAL; 1764 } 1765 done: 1766 return (error); 1767 } 1768 1769 static void 1770 aio_kick_nowait(struct proc *userp) 1771 { 1772 struct kaioinfo *ki = userp->p_aioinfo; 1773 struct aioproc *aiop; 1774 1775 mtx_assert(&aio_job_mtx, MA_OWNED); 1776 if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { 1777 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1778 aiop->aioprocflags &= ~AIOP_FREE; 1779 wakeup(aiop->aioproc); 1780 } else if (num_aio_resv_start + num_aio_procs < max_aio_procs && 1781 ki->kaio_active_count + num_aio_resv_start < 1782 ki->kaio_maxactive_count) { 1783 taskqueue_enqueue(taskqueue_aiod_kick, &ki->kaio_task); 1784 } 1785 } 1786 1787 static int 1788 aio_kick(struct proc *userp) 1789 { 1790 struct kaioinfo *ki = userp->p_aioinfo; 1791 struct aioproc *aiop; 1792 int error, ret = 0; 1793 1794 mtx_assert(&aio_job_mtx, MA_OWNED); 1795 retryproc: 1796 if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { 1797 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1798 aiop->aioprocflags &= ~AIOP_FREE; 1799 wakeup(aiop->aioproc); 1800 } else if (num_aio_resv_start + num_aio_procs < max_aio_procs && 1801 ki->kaio_active_count + num_aio_resv_start < 1802 ki->kaio_maxactive_count) { 1803 num_aio_resv_start++; 1804 mtx_unlock(&aio_job_mtx); 1805 error = aio_newproc(&num_aio_resv_start); 1806 mtx_lock(&aio_job_mtx); 1807 if (error) { 1808 num_aio_resv_start--; 1809 goto retryproc; 1810 } 1811 } else { 1812 ret = -1; 1813 } 1814 return (ret); 1815 } 1816 1817 static void 1818 aio_kick_helper(void *context, int pending) 1819 { 1820 struct proc *userp = context; 1821 1822 mtx_lock(&aio_job_mtx); 1823 while (--pending >= 0) { 1824 if (aio_kick(userp)) 1825 break; 1826 } 1827 mtx_unlock(&aio_job_mtx); 1828 } 1829 1830 /* 1831 * Support the aio_return system call, as a side-effect, kernel resources are 1832 * released. 1833 */ 1834 static int 1835 kern_aio_return(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops) 1836 { 1837 struct proc *p = td->td_proc; 1838 struct kaiocb *job; 1839 struct kaioinfo *ki; 1840 long status, error; 1841 1842 ki = p->p_aioinfo; 1843 if (ki == NULL) 1844 return (EINVAL); 1845 AIO_LOCK(ki); 1846 TAILQ_FOREACH(job, &ki->kaio_done, plist) { 1847 if (job->ujob == ujob) 1848 break; 1849 } 1850 if (job != NULL) { 1851 MPASS(job->jobflags & KAIOCB_FINISHED); 1852 status = job->uaiocb._aiocb_private.status; 1853 error = job->uaiocb._aiocb_private.error; 1854 td->td_retval[0] = status; 1855 td->td_ru.ru_oublock += job->outblock; 1856 td->td_ru.ru_inblock += job->inblock; 1857 td->td_ru.ru_msgsnd += job->msgsnd; 1858 td->td_ru.ru_msgrcv += job->msgrcv; 1859 aio_free_entry(job); 1860 AIO_UNLOCK(ki); 1861 ops->store_error(ujob, error); 1862 ops->store_status(ujob, status); 1863 } else { 1864 error = EINVAL; 1865 AIO_UNLOCK(ki); 1866 } 1867 return (error); 1868 } 1869 1870 int 1871 sys_aio_return(struct thread *td, struct aio_return_args *uap) 1872 { 1873 1874 return (kern_aio_return(td, uap->aiocbp, &aiocb_ops)); 1875 } 1876 1877 /* 1878 * Allow a process to wakeup when any of the I/O requests are completed. 1879 */ 1880 static int 1881 kern_aio_suspend(struct thread *td, int njoblist, struct aiocb **ujoblist, 1882 struct timespec *ts) 1883 { 1884 struct proc *p = td->td_proc; 1885 struct timeval atv; 1886 struct kaioinfo *ki; 1887 struct kaiocb *firstjob, *job; 1888 int error, i, timo; 1889 1890 timo = 0; 1891 if (ts) { 1892 if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) 1893 return (EINVAL); 1894 1895 TIMESPEC_TO_TIMEVAL(&atv, ts); 1896 if (itimerfix(&atv)) 1897 return (EINVAL); 1898 timo = tvtohz(&atv); 1899 } 1900 1901 ki = p->p_aioinfo; 1902 if (ki == NULL) 1903 return (EAGAIN); 1904 1905 if (njoblist == 0) 1906 return (0); 1907 1908 AIO_LOCK(ki); 1909 for (;;) { 1910 firstjob = NULL; 1911 error = 0; 1912 TAILQ_FOREACH(job, &ki->kaio_all, allist) { 1913 for (i = 0; i < njoblist; i++) { 1914 if (job->ujob == ujoblist[i]) { 1915 if (firstjob == NULL) 1916 firstjob = job; 1917 if (job->jobflags & KAIOCB_FINISHED) 1918 goto RETURN; 1919 } 1920 } 1921 } 1922 /* All tasks were finished. */ 1923 if (firstjob == NULL) 1924 break; 1925 1926 ki->kaio_flags |= KAIO_WAKEUP; 1927 error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, 1928 "aiospn", timo); 1929 if (error == ERESTART) 1930 error = EINTR; 1931 if (error) 1932 break; 1933 } 1934 RETURN: 1935 AIO_UNLOCK(ki); 1936 return (error); 1937 } 1938 1939 int 1940 sys_aio_suspend(struct thread *td, struct aio_suspend_args *uap) 1941 { 1942 struct timespec ts, *tsp; 1943 struct aiocb **ujoblist; 1944 int error; 1945 1946 if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX) 1947 return (EINVAL); 1948 1949 if (uap->timeout) { 1950 /* Get timespec struct. */ 1951 if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0) 1952 return (error); 1953 tsp = &ts; 1954 } else 1955 tsp = NULL; 1956 1957 ujoblist = uma_zalloc(aiol_zone, M_WAITOK); 1958 error = copyin(uap->aiocbp, ujoblist, uap->nent * sizeof(ujoblist[0])); 1959 if (error == 0) 1960 error = kern_aio_suspend(td, uap->nent, ujoblist, tsp); 1961 uma_zfree(aiol_zone, ujoblist); 1962 return (error); 1963 } 1964 1965 /* 1966 * aio_cancel cancels any non-physio aio operations not currently in 1967 * progress. 1968 */ 1969 int 1970 sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap) 1971 { 1972 struct proc *p = td->td_proc; 1973 struct kaioinfo *ki; 1974 struct kaiocb *job, *jobn; 1975 struct file *fp; 1976 cap_rights_t rights; 1977 int error; 1978 int cancelled = 0; 1979 int notcancelled = 0; 1980 struct vnode *vp; 1981 1982 /* Lookup file object. */ 1983 error = fget(td, uap->fd, cap_rights_init(&rights), &fp); 1984 if (error) 1985 return (error); 1986 1987 ki = p->p_aioinfo; 1988 if (ki == NULL) 1989 goto done; 1990 1991 if (fp->f_type == DTYPE_VNODE) { 1992 vp = fp->f_vnode; 1993 if (vn_isdisk(vp, &error)) { 1994 fdrop(fp, td); 1995 td->td_retval[0] = AIO_NOTCANCELED; 1996 return (0); 1997 } 1998 } 1999 2000 AIO_LOCK(ki); 2001 TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) { 2002 if ((uap->fd == job->uaiocb.aio_fildes) && 2003 ((uap->aiocbp == NULL) || 2004 (uap->aiocbp == job->ujob))) { 2005 if (aio_cancel_job(p, ki, job)) { 2006 cancelled++; 2007 } else { 2008 notcancelled++; 2009 } 2010 if (uap->aiocbp != NULL) 2011 break; 2012 } 2013 } 2014 AIO_UNLOCK(ki); 2015 2016 done: 2017 fdrop(fp, td); 2018 2019 if (uap->aiocbp != NULL) { 2020 if (cancelled) { 2021 td->td_retval[0] = AIO_CANCELED; 2022 return (0); 2023 } 2024 } 2025 2026 if (notcancelled) { 2027 td->td_retval[0] = AIO_NOTCANCELED; 2028 return (0); 2029 } 2030 2031 if (cancelled) { 2032 td->td_retval[0] = AIO_CANCELED; 2033 return (0); 2034 } 2035 2036 td->td_retval[0] = AIO_ALLDONE; 2037 2038 return (0); 2039 } 2040 2041 /* 2042 * aio_error is implemented in the kernel level for compatibility purposes 2043 * only. For a user mode async implementation, it would be best to do it in 2044 * a userland subroutine. 2045 */ 2046 static int 2047 kern_aio_error(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops) 2048 { 2049 struct proc *p = td->td_proc; 2050 struct kaiocb *job; 2051 struct kaioinfo *ki; 2052 int status; 2053 2054 ki = p->p_aioinfo; 2055 if (ki == NULL) { 2056 td->td_retval[0] = EINVAL; 2057 return (0); 2058 } 2059 2060 AIO_LOCK(ki); 2061 TAILQ_FOREACH(job, &ki->kaio_all, allist) { 2062 if (job->ujob == ujob) { 2063 if (job->jobflags & KAIOCB_FINISHED) 2064 td->td_retval[0] = 2065 job->uaiocb._aiocb_private.error; 2066 else 2067 td->td_retval[0] = EINPROGRESS; 2068 AIO_UNLOCK(ki); 2069 return (0); 2070 } 2071 } 2072 AIO_UNLOCK(ki); 2073 2074 /* 2075 * Hack for failure of aio_aqueue. 2076 */ 2077 status = ops->fetch_status(ujob); 2078 if (status == -1) { 2079 td->td_retval[0] = ops->fetch_error(ujob); 2080 return (0); 2081 } 2082 2083 td->td_retval[0] = EINVAL; 2084 return (0); 2085 } 2086 2087 int 2088 sys_aio_error(struct thread *td, struct aio_error_args *uap) 2089 { 2090 2091 return (kern_aio_error(td, uap->aiocbp, &aiocb_ops)); 2092 } 2093 2094 /* syscall - asynchronous read from a file (REALTIME) */ 2095 #ifdef COMPAT_FREEBSD6 2096 int 2097 freebsd6_aio_read(struct thread *td, struct freebsd6_aio_read_args *uap) 2098 { 2099 2100 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, 2101 &aiocb_ops_osigevent)); 2102 } 2103 #endif 2104 2105 int 2106 sys_aio_read(struct thread *td, struct aio_read_args *uap) 2107 { 2108 2109 return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops)); 2110 } 2111 2112 /* syscall - asynchronous write to a file (REALTIME) */ 2113 #ifdef COMPAT_FREEBSD6 2114 int 2115 freebsd6_aio_write(struct thread *td, struct freebsd6_aio_write_args *uap) 2116 { 2117 2118 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, 2119 &aiocb_ops_osigevent)); 2120 } 2121 #endif 2122 2123 int 2124 sys_aio_write(struct thread *td, struct aio_write_args *uap) 2125 { 2126 2127 return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops)); 2128 } 2129 2130 int 2131 sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap) 2132 { 2133 2134 return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops)); 2135 } 2136 2137 static int 2138 kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, 2139 struct aiocb **acb_list, int nent, struct sigevent *sig, 2140 struct aiocb_ops *ops) 2141 { 2142 struct proc *p = td->td_proc; 2143 struct aiocb *job; 2144 struct kaioinfo *ki; 2145 struct aioliojob *lj; 2146 struct kevent kev; 2147 int error; 2148 int nerror; 2149 int i; 2150 2151 if ((mode != LIO_NOWAIT) && (mode != LIO_WAIT)) 2152 return (EINVAL); 2153 2154 if (nent < 0 || nent > AIO_LISTIO_MAX) 2155 return (EINVAL); 2156 2157 if (p->p_aioinfo == NULL) 2158 aio_init_aioinfo(p); 2159 2160 ki = p->p_aioinfo; 2161 2162 lj = uma_zalloc(aiolio_zone, M_WAITOK); 2163 lj->lioj_flags = 0; 2164 lj->lioj_count = 0; 2165 lj->lioj_finished_count = 0; 2166 knlist_init_mtx(&lj->klist, AIO_MTX(ki)); 2167 ksiginfo_init(&lj->lioj_ksi); 2168 2169 /* 2170 * Setup signal. 2171 */ 2172 if (sig && (mode == LIO_NOWAIT)) { 2173 bcopy(sig, &lj->lioj_signal, sizeof(lj->lioj_signal)); 2174 if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { 2175 /* Assume only new style KEVENT */ 2176 kev.filter = EVFILT_LIO; 2177 kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1; 2178 kev.ident = (uintptr_t)uacb_list; /* something unique */ 2179 kev.data = (intptr_t)lj; 2180 /* pass user defined sigval data */ 2181 kev.udata = lj->lioj_signal.sigev_value.sival_ptr; 2182 error = kqfd_register( 2183 lj->lioj_signal.sigev_notify_kqueue, &kev, td, 1); 2184 if (error) { 2185 uma_zfree(aiolio_zone, lj); 2186 return (error); 2187 } 2188 } else if (lj->lioj_signal.sigev_notify == SIGEV_NONE) { 2189 ; 2190 } else if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || 2191 lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID) { 2192 if (!_SIG_VALID(lj->lioj_signal.sigev_signo)) { 2193 uma_zfree(aiolio_zone, lj); 2194 return EINVAL; 2195 } 2196 lj->lioj_flags |= LIOJ_SIGNAL; 2197 } else { 2198 uma_zfree(aiolio_zone, lj); 2199 return EINVAL; 2200 } 2201 } 2202 2203 AIO_LOCK(ki); 2204 TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list); 2205 /* 2206 * Add extra aiocb count to avoid the lio to be freed 2207 * by other threads doing aio_waitcomplete or aio_return, 2208 * and prevent event from being sent until we have queued 2209 * all tasks. 2210 */ 2211 lj->lioj_count = 1; 2212 AIO_UNLOCK(ki); 2213 2214 /* 2215 * Get pointers to the list of I/O requests. 2216 */ 2217 nerror = 0; 2218 for (i = 0; i < nent; i++) { 2219 job = acb_list[i]; 2220 if (job != NULL) { 2221 error = aio_aqueue(td, job, lj, LIO_NOP, ops); 2222 if (error != 0) 2223 nerror++; 2224 } 2225 } 2226 2227 error = 0; 2228 AIO_LOCK(ki); 2229 if (mode == LIO_WAIT) { 2230 while (lj->lioj_count - 1 != lj->lioj_finished_count) { 2231 ki->kaio_flags |= KAIO_WAKEUP; 2232 error = msleep(&p->p_aioinfo, AIO_MTX(ki), 2233 PRIBIO | PCATCH, "aiospn", 0); 2234 if (error == ERESTART) 2235 error = EINTR; 2236 if (error) 2237 break; 2238 } 2239 } else { 2240 if (lj->lioj_count - 1 == lj->lioj_finished_count) { 2241 if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { 2242 lj->lioj_flags |= LIOJ_KEVENT_POSTED; 2243 KNOTE_LOCKED(&lj->klist, 1); 2244 } 2245 if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) 2246 == LIOJ_SIGNAL 2247 && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || 2248 lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) { 2249 aio_sendsig(p, &lj->lioj_signal, 2250 &lj->lioj_ksi); 2251 lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 2252 } 2253 } 2254 } 2255 lj->lioj_count--; 2256 if (lj->lioj_count == 0) { 2257 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 2258 knlist_delete(&lj->klist, curthread, 1); 2259 PROC_LOCK(p); 2260 sigqueue_take(&lj->lioj_ksi); 2261 PROC_UNLOCK(p); 2262 AIO_UNLOCK(ki); 2263 uma_zfree(aiolio_zone, lj); 2264 } else 2265 AIO_UNLOCK(ki); 2266 2267 if (nerror) 2268 return (EIO); 2269 return (error); 2270 } 2271 2272 /* syscall - list directed I/O (REALTIME) */ 2273 #ifdef COMPAT_FREEBSD6 2274 int 2275 freebsd6_lio_listio(struct thread *td, struct freebsd6_lio_listio_args *uap) 2276 { 2277 struct aiocb **acb_list; 2278 struct sigevent *sigp, sig; 2279 struct osigevent osig; 2280 int error, nent; 2281 2282 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 2283 return (EINVAL); 2284 2285 nent = uap->nent; 2286 if (nent < 0 || nent > AIO_LISTIO_MAX) 2287 return (EINVAL); 2288 2289 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 2290 error = copyin(uap->sig, &osig, sizeof(osig)); 2291 if (error) 2292 return (error); 2293 error = convert_old_sigevent(&osig, &sig); 2294 if (error) 2295 return (error); 2296 sigp = &sig; 2297 } else 2298 sigp = NULL; 2299 2300 acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); 2301 error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); 2302 if (error == 0) 2303 error = kern_lio_listio(td, uap->mode, 2304 (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, 2305 &aiocb_ops_osigevent); 2306 free(acb_list, M_LIO); 2307 return (error); 2308 } 2309 #endif 2310 2311 /* syscall - list directed I/O (REALTIME) */ 2312 int 2313 sys_lio_listio(struct thread *td, struct lio_listio_args *uap) 2314 { 2315 struct aiocb **acb_list; 2316 struct sigevent *sigp, sig; 2317 int error, nent; 2318 2319 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 2320 return (EINVAL); 2321 2322 nent = uap->nent; 2323 if (nent < 0 || nent > AIO_LISTIO_MAX) 2324 return (EINVAL); 2325 2326 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 2327 error = copyin(uap->sig, &sig, sizeof(sig)); 2328 if (error) 2329 return (error); 2330 sigp = &sig; 2331 } else 2332 sigp = NULL; 2333 2334 acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); 2335 error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); 2336 if (error == 0) 2337 error = kern_lio_listio(td, uap->mode, uap->acb_list, acb_list, 2338 nent, sigp, &aiocb_ops); 2339 free(acb_list, M_LIO); 2340 return (error); 2341 } 2342 2343 static void 2344 aio_physwakeup(struct bio *bp) 2345 { 2346 struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; 2347 struct proc *userp; 2348 struct kaioinfo *ki; 2349 size_t nbytes; 2350 int error, nblks; 2351 2352 /* Release mapping into kernel space. */ 2353 userp = job->userproc; 2354 ki = userp->p_aioinfo; 2355 if (job->pbuf) { 2356 pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages); 2357 relpbuf(job->pbuf, NULL); 2358 job->pbuf = NULL; 2359 atomic_subtract_int(&num_buf_aio, 1); 2360 AIO_LOCK(ki); 2361 ki->kaio_buffer_count--; 2362 AIO_UNLOCK(ki); 2363 } 2364 vm_page_unhold_pages(job->pages, job->npages); 2365 2366 bp = job->bp; 2367 job->bp = NULL; 2368 nbytes = job->uaiocb.aio_nbytes - bp->bio_resid; 2369 error = 0; 2370 if (bp->bio_flags & BIO_ERROR) 2371 error = bp->bio_error; 2372 nblks = btodb(nbytes); 2373 if (job->uaiocb.aio_lio_opcode == LIO_WRITE) 2374 job->outblock += nblks; 2375 else 2376 job->inblock += nblks; 2377 2378 if (error) 2379 aio_complete(job, -1, error); 2380 else 2381 aio_complete(job, nbytes, 0); 2382 2383 g_destroy_bio(bp); 2384 } 2385 2386 /* syscall - wait for the next completion of an aio request */ 2387 static int 2388 kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp, 2389 struct timespec *ts, struct aiocb_ops *ops) 2390 { 2391 struct proc *p = td->td_proc; 2392 struct timeval atv; 2393 struct kaioinfo *ki; 2394 struct kaiocb *job; 2395 struct aiocb *ujob; 2396 long error, status; 2397 int timo; 2398 2399 ops->store_aiocb(ujobp, NULL); 2400 2401 if (ts == NULL) { 2402 timo = 0; 2403 } else if (ts->tv_sec == 0 && ts->tv_nsec == 0) { 2404 timo = -1; 2405 } else { 2406 if ((ts->tv_nsec < 0) || (ts->tv_nsec >= 1000000000)) 2407 return (EINVAL); 2408 2409 TIMESPEC_TO_TIMEVAL(&atv, ts); 2410 if (itimerfix(&atv)) 2411 return (EINVAL); 2412 timo = tvtohz(&atv); 2413 } 2414 2415 if (p->p_aioinfo == NULL) 2416 aio_init_aioinfo(p); 2417 ki = p->p_aioinfo; 2418 2419 error = 0; 2420 job = NULL; 2421 AIO_LOCK(ki); 2422 while ((job = TAILQ_FIRST(&ki->kaio_done)) == NULL) { 2423 if (timo == -1) { 2424 error = EWOULDBLOCK; 2425 break; 2426 } 2427 ki->kaio_flags |= KAIO_WAKEUP; 2428 error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, 2429 "aiowc", timo); 2430 if (timo && error == ERESTART) 2431 error = EINTR; 2432 if (error) 2433 break; 2434 } 2435 2436 if (job != NULL) { 2437 MPASS(job->jobflags & KAIOCB_FINISHED); 2438 ujob = job->ujob; 2439 status = job->uaiocb._aiocb_private.status; 2440 error = job->uaiocb._aiocb_private.error; 2441 td->td_retval[0] = status; 2442 td->td_ru.ru_oublock += job->outblock; 2443 td->td_ru.ru_inblock += job->inblock; 2444 td->td_ru.ru_msgsnd += job->msgsnd; 2445 td->td_ru.ru_msgrcv += job->msgrcv; 2446 aio_free_entry(job); 2447 AIO_UNLOCK(ki); 2448 ops->store_aiocb(ujobp, ujob); 2449 ops->store_error(ujob, error); 2450 ops->store_status(ujob, status); 2451 } else 2452 AIO_UNLOCK(ki); 2453 2454 return (error); 2455 } 2456 2457 int 2458 sys_aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap) 2459 { 2460 struct timespec ts, *tsp; 2461 int error; 2462 2463 if (uap->timeout) { 2464 /* Get timespec struct. */ 2465 error = copyin(uap->timeout, &ts, sizeof(ts)); 2466 if (error) 2467 return (error); 2468 tsp = &ts; 2469 } else 2470 tsp = NULL; 2471 2472 return (kern_aio_waitcomplete(td, uap->aiocbp, tsp, &aiocb_ops)); 2473 } 2474 2475 static int 2476 kern_aio_fsync(struct thread *td, int op, struct aiocb *ujob, 2477 struct aiocb_ops *ops) 2478 { 2479 2480 if (op != O_SYNC) /* XXX lack of O_DSYNC */ 2481 return (EINVAL); 2482 return (aio_aqueue(td, ujob, NULL, LIO_SYNC, ops)); 2483 } 2484 2485 int 2486 sys_aio_fsync(struct thread *td, struct aio_fsync_args *uap) 2487 { 2488 2489 return (kern_aio_fsync(td, uap->op, uap->aiocbp, &aiocb_ops)); 2490 } 2491 2492 /* kqueue attach function */ 2493 static int 2494 filt_aioattach(struct knote *kn) 2495 { 2496 struct kaiocb *job = (struct kaiocb *)kn->kn_sdata; 2497 2498 /* 2499 * The job pointer must be validated before using it, so 2500 * registration is restricted to the kernel; the user cannot 2501 * set EV_FLAG1. 2502 */ 2503 if ((kn->kn_flags & EV_FLAG1) == 0) 2504 return (EPERM); 2505 kn->kn_ptr.p_aio = job; 2506 kn->kn_flags &= ~EV_FLAG1; 2507 2508 knlist_add(&job->klist, kn, 0); 2509 2510 return (0); 2511 } 2512 2513 /* kqueue detach function */ 2514 static void 2515 filt_aiodetach(struct knote *kn) 2516 { 2517 struct knlist *knl; 2518 2519 knl = &kn->kn_ptr.p_aio->klist; 2520 knl->kl_lock(knl->kl_lockarg); 2521 if (!knlist_empty(knl)) 2522 knlist_remove(knl, kn, 1); 2523 knl->kl_unlock(knl->kl_lockarg); 2524 } 2525 2526 /* kqueue filter function */ 2527 /*ARGSUSED*/ 2528 static int 2529 filt_aio(struct knote *kn, long hint) 2530 { 2531 struct kaiocb *job = kn->kn_ptr.p_aio; 2532 2533 kn->kn_data = job->uaiocb._aiocb_private.error; 2534 if (!(job->jobflags & KAIOCB_FINISHED)) 2535 return (0); 2536 kn->kn_flags |= EV_EOF; 2537 return (1); 2538 } 2539 2540 /* kqueue attach function */ 2541 static int 2542 filt_lioattach(struct knote *kn) 2543 { 2544 struct aioliojob * lj = (struct aioliojob *)kn->kn_sdata; 2545 2546 /* 2547 * The aioliojob pointer must be validated before using it, so 2548 * registration is restricted to the kernel; the user cannot 2549 * set EV_FLAG1. 2550 */ 2551 if ((kn->kn_flags & EV_FLAG1) == 0) 2552 return (EPERM); 2553 kn->kn_ptr.p_lio = lj; 2554 kn->kn_flags &= ~EV_FLAG1; 2555 2556 knlist_add(&lj->klist, kn, 0); 2557 2558 return (0); 2559 } 2560 2561 /* kqueue detach function */ 2562 static void 2563 filt_liodetach(struct knote *kn) 2564 { 2565 struct knlist *knl; 2566 2567 knl = &kn->kn_ptr.p_lio->klist; 2568 knl->kl_lock(knl->kl_lockarg); 2569 if (!knlist_empty(knl)) 2570 knlist_remove(knl, kn, 1); 2571 knl->kl_unlock(knl->kl_lockarg); 2572 } 2573 2574 /* kqueue filter function */ 2575 /*ARGSUSED*/ 2576 static int 2577 filt_lio(struct knote *kn, long hint) 2578 { 2579 struct aioliojob * lj = kn->kn_ptr.p_lio; 2580 2581 return (lj->lioj_flags & LIOJ_KEVENT_POSTED); 2582 } 2583 2584 #ifdef COMPAT_FREEBSD32 2585 #include <sys/mount.h> 2586 #include <sys/socket.h> 2587 #include <compat/freebsd32/freebsd32.h> 2588 #include <compat/freebsd32/freebsd32_proto.h> 2589 #include <compat/freebsd32/freebsd32_signal.h> 2590 #include <compat/freebsd32/freebsd32_syscall.h> 2591 #include <compat/freebsd32/freebsd32_util.h> 2592 2593 struct __aiocb_private32 { 2594 int32_t status; 2595 int32_t error; 2596 uint32_t kernelinfo; 2597 }; 2598 2599 #ifdef COMPAT_FREEBSD6 2600 typedef struct oaiocb32 { 2601 int aio_fildes; /* File descriptor */ 2602 uint64_t aio_offset __packed; /* File offset for I/O */ 2603 uint32_t aio_buf; /* I/O buffer in process space */ 2604 uint32_t aio_nbytes; /* Number of bytes for I/O */ 2605 struct osigevent32 aio_sigevent; /* Signal to deliver */ 2606 int aio_lio_opcode; /* LIO opcode */ 2607 int aio_reqprio; /* Request priority -- ignored */ 2608 struct __aiocb_private32 _aiocb_private; 2609 } oaiocb32_t; 2610 #endif 2611 2612 typedef struct aiocb32 { 2613 int32_t aio_fildes; /* File descriptor */ 2614 uint64_t aio_offset __packed; /* File offset for I/O */ 2615 uint32_t aio_buf; /* I/O buffer in process space */ 2616 uint32_t aio_nbytes; /* Number of bytes for I/O */ 2617 int __spare__[2]; 2618 uint32_t __spare2__; 2619 int aio_lio_opcode; /* LIO opcode */ 2620 int aio_reqprio; /* Request priority -- ignored */ 2621 struct __aiocb_private32 _aiocb_private; 2622 struct sigevent32 aio_sigevent; /* Signal to deliver */ 2623 } aiocb32_t; 2624 2625 #ifdef COMPAT_FREEBSD6 2626 static int 2627 convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig) 2628 { 2629 2630 /* 2631 * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are 2632 * supported by AIO with the old sigevent structure. 2633 */ 2634 CP(*osig, *nsig, sigev_notify); 2635 switch (nsig->sigev_notify) { 2636 case SIGEV_NONE: 2637 break; 2638 case SIGEV_SIGNAL: 2639 nsig->sigev_signo = osig->__sigev_u.__sigev_signo; 2640 break; 2641 case SIGEV_KEVENT: 2642 nsig->sigev_notify_kqueue = 2643 osig->__sigev_u.__sigev_notify_kqueue; 2644 PTRIN_CP(*osig, *nsig, sigev_value.sival_ptr); 2645 break; 2646 default: 2647 return (EINVAL); 2648 } 2649 return (0); 2650 } 2651 2652 static int 2653 aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) 2654 { 2655 struct oaiocb32 job32; 2656 int error; 2657 2658 bzero(kjob, sizeof(struct aiocb)); 2659 error = copyin(ujob, &job32, sizeof(job32)); 2660 if (error) 2661 return (error); 2662 2663 CP(job32, *kjob, aio_fildes); 2664 CP(job32, *kjob, aio_offset); 2665 PTRIN_CP(job32, *kjob, aio_buf); 2666 CP(job32, *kjob, aio_nbytes); 2667 CP(job32, *kjob, aio_lio_opcode); 2668 CP(job32, *kjob, aio_reqprio); 2669 CP(job32, *kjob, _aiocb_private.status); 2670 CP(job32, *kjob, _aiocb_private.error); 2671 PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); 2672 return (convert_old_sigevent32(&job32.aio_sigevent, 2673 &kjob->aio_sigevent)); 2674 } 2675 #endif 2676 2677 static int 2678 aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob) 2679 { 2680 struct aiocb32 job32; 2681 int error; 2682 2683 error = copyin(ujob, &job32, sizeof(job32)); 2684 if (error) 2685 return (error); 2686 CP(job32, *kjob, aio_fildes); 2687 CP(job32, *kjob, aio_offset); 2688 PTRIN_CP(job32, *kjob, aio_buf); 2689 CP(job32, *kjob, aio_nbytes); 2690 CP(job32, *kjob, aio_lio_opcode); 2691 CP(job32, *kjob, aio_reqprio); 2692 CP(job32, *kjob, _aiocb_private.status); 2693 CP(job32, *kjob, _aiocb_private.error); 2694 PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); 2695 return (convert_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent)); 2696 } 2697 2698 static long 2699 aiocb32_fetch_status(struct aiocb *ujob) 2700 { 2701 struct aiocb32 *ujob32; 2702 2703 ujob32 = (struct aiocb32 *)ujob; 2704 return (fuword32(&ujob32->_aiocb_private.status)); 2705 } 2706 2707 static long 2708 aiocb32_fetch_error(struct aiocb *ujob) 2709 { 2710 struct aiocb32 *ujob32; 2711 2712 ujob32 = (struct aiocb32 *)ujob; 2713 return (fuword32(&ujob32->_aiocb_private.error)); 2714 } 2715 2716 static int 2717 aiocb32_store_status(struct aiocb *ujob, long status) 2718 { 2719 struct aiocb32 *ujob32; 2720 2721 ujob32 = (struct aiocb32 *)ujob; 2722 return (suword32(&ujob32->_aiocb_private.status, status)); 2723 } 2724 2725 static int 2726 aiocb32_store_error(struct aiocb *ujob, long error) 2727 { 2728 struct aiocb32 *ujob32; 2729 2730 ujob32 = (struct aiocb32 *)ujob; 2731 return (suword32(&ujob32->_aiocb_private.error, error)); 2732 } 2733 2734 static int 2735 aiocb32_store_kernelinfo(struct aiocb *ujob, long jobref) 2736 { 2737 struct aiocb32 *ujob32; 2738 2739 ujob32 = (struct aiocb32 *)ujob; 2740 return (suword32(&ujob32->_aiocb_private.kernelinfo, jobref)); 2741 } 2742 2743 static int 2744 aiocb32_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) 2745 { 2746 2747 return (suword32(ujobp, (long)ujob)); 2748 } 2749 2750 static struct aiocb_ops aiocb32_ops = { 2751 .copyin = aiocb32_copyin, 2752 .fetch_status = aiocb32_fetch_status, 2753 .fetch_error = aiocb32_fetch_error, 2754 .store_status = aiocb32_store_status, 2755 .store_error = aiocb32_store_error, 2756 .store_kernelinfo = aiocb32_store_kernelinfo, 2757 .store_aiocb = aiocb32_store_aiocb, 2758 }; 2759 2760 #ifdef COMPAT_FREEBSD6 2761 static struct aiocb_ops aiocb32_ops_osigevent = { 2762 .copyin = aiocb32_copyin_old_sigevent, 2763 .fetch_status = aiocb32_fetch_status, 2764 .fetch_error = aiocb32_fetch_error, 2765 .store_status = aiocb32_store_status, 2766 .store_error = aiocb32_store_error, 2767 .store_kernelinfo = aiocb32_store_kernelinfo, 2768 .store_aiocb = aiocb32_store_aiocb, 2769 }; 2770 #endif 2771 2772 int 2773 freebsd32_aio_return(struct thread *td, struct freebsd32_aio_return_args *uap) 2774 { 2775 2776 return (kern_aio_return(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); 2777 } 2778 2779 int 2780 freebsd32_aio_suspend(struct thread *td, struct freebsd32_aio_suspend_args *uap) 2781 { 2782 struct timespec32 ts32; 2783 struct timespec ts, *tsp; 2784 struct aiocb **ujoblist; 2785 uint32_t *ujoblist32; 2786 int error, i; 2787 2788 if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX) 2789 return (EINVAL); 2790 2791 if (uap->timeout) { 2792 /* Get timespec struct. */ 2793 if ((error = copyin(uap->timeout, &ts32, sizeof(ts32))) != 0) 2794 return (error); 2795 CP(ts32, ts, tv_sec); 2796 CP(ts32, ts, tv_nsec); 2797 tsp = &ts; 2798 } else 2799 tsp = NULL; 2800 2801 ujoblist = uma_zalloc(aiol_zone, M_WAITOK); 2802 ujoblist32 = (uint32_t *)ujoblist; 2803 error = copyin(uap->aiocbp, ujoblist32, uap->nent * 2804 sizeof(ujoblist32[0])); 2805 if (error == 0) { 2806 for (i = uap->nent; i > 0; i--) 2807 ujoblist[i] = PTRIN(ujoblist32[i]); 2808 2809 error = kern_aio_suspend(td, uap->nent, ujoblist, tsp); 2810 } 2811 uma_zfree(aiol_zone, ujoblist); 2812 return (error); 2813 } 2814 2815 int 2816 freebsd32_aio_error(struct thread *td, struct freebsd32_aio_error_args *uap) 2817 { 2818 2819 return (kern_aio_error(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); 2820 } 2821 2822 #ifdef COMPAT_FREEBSD6 2823 int 2824 freebsd6_freebsd32_aio_read(struct thread *td, 2825 struct freebsd6_freebsd32_aio_read_args *uap) 2826 { 2827 2828 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, 2829 &aiocb32_ops_osigevent)); 2830 } 2831 #endif 2832 2833 int 2834 freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap) 2835 { 2836 2837 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, 2838 &aiocb32_ops)); 2839 } 2840 2841 #ifdef COMPAT_FREEBSD6 2842 int 2843 freebsd6_freebsd32_aio_write(struct thread *td, 2844 struct freebsd6_freebsd32_aio_write_args *uap) 2845 { 2846 2847 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, 2848 &aiocb32_ops_osigevent)); 2849 } 2850 #endif 2851 2852 int 2853 freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap) 2854 { 2855 2856 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, 2857 &aiocb32_ops)); 2858 } 2859 2860 int 2861 freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap) 2862 { 2863 2864 return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK, 2865 &aiocb32_ops)); 2866 } 2867 2868 int 2869 freebsd32_aio_waitcomplete(struct thread *td, 2870 struct freebsd32_aio_waitcomplete_args *uap) 2871 { 2872 struct timespec32 ts32; 2873 struct timespec ts, *tsp; 2874 int error; 2875 2876 if (uap->timeout) { 2877 /* Get timespec struct. */ 2878 error = copyin(uap->timeout, &ts32, sizeof(ts32)); 2879 if (error) 2880 return (error); 2881 CP(ts32, ts, tv_sec); 2882 CP(ts32, ts, tv_nsec); 2883 tsp = &ts; 2884 } else 2885 tsp = NULL; 2886 2887 return (kern_aio_waitcomplete(td, (struct aiocb **)uap->aiocbp, tsp, 2888 &aiocb32_ops)); 2889 } 2890 2891 int 2892 freebsd32_aio_fsync(struct thread *td, struct freebsd32_aio_fsync_args *uap) 2893 { 2894 2895 return (kern_aio_fsync(td, uap->op, (struct aiocb *)uap->aiocbp, 2896 &aiocb32_ops)); 2897 } 2898 2899 #ifdef COMPAT_FREEBSD6 2900 int 2901 freebsd6_freebsd32_lio_listio(struct thread *td, 2902 struct freebsd6_freebsd32_lio_listio_args *uap) 2903 { 2904 struct aiocb **acb_list; 2905 struct sigevent *sigp, sig; 2906 struct osigevent32 osig; 2907 uint32_t *acb_list32; 2908 int error, i, nent; 2909 2910 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 2911 return (EINVAL); 2912 2913 nent = uap->nent; 2914 if (nent < 0 || nent > AIO_LISTIO_MAX) 2915 return (EINVAL); 2916 2917 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 2918 error = copyin(uap->sig, &osig, sizeof(osig)); 2919 if (error) 2920 return (error); 2921 error = convert_old_sigevent32(&osig, &sig); 2922 if (error) 2923 return (error); 2924 sigp = &sig; 2925 } else 2926 sigp = NULL; 2927 2928 acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK); 2929 error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t)); 2930 if (error) { 2931 free(acb_list32, M_LIO); 2932 return (error); 2933 } 2934 acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); 2935 for (i = 0; i < nent; i++) 2936 acb_list[i] = PTRIN(acb_list32[i]); 2937 free(acb_list32, M_LIO); 2938 2939 error = kern_lio_listio(td, uap->mode, 2940 (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, 2941 &aiocb32_ops_osigevent); 2942 free(acb_list, M_LIO); 2943 return (error); 2944 } 2945 #endif 2946 2947 int 2948 freebsd32_lio_listio(struct thread *td, struct freebsd32_lio_listio_args *uap) 2949 { 2950 struct aiocb **acb_list; 2951 struct sigevent *sigp, sig; 2952 struct sigevent32 sig32; 2953 uint32_t *acb_list32; 2954 int error, i, nent; 2955 2956 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) 2957 return (EINVAL); 2958 2959 nent = uap->nent; 2960 if (nent < 0 || nent > AIO_LISTIO_MAX) 2961 return (EINVAL); 2962 2963 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 2964 error = copyin(uap->sig, &sig32, sizeof(sig32)); 2965 if (error) 2966 return (error); 2967 error = convert_sigevent32(&sig32, &sig); 2968 if (error) 2969 return (error); 2970 sigp = &sig; 2971 } else 2972 sigp = NULL; 2973 2974 acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK); 2975 error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t)); 2976 if (error) { 2977 free(acb_list32, M_LIO); 2978 return (error); 2979 } 2980 acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); 2981 for (i = 0; i < nent; i++) 2982 acb_list[i] = PTRIN(acb_list32[i]); 2983 free(acb_list32, M_LIO); 2984 2985 error = kern_lio_listio(td, uap->mode, 2986 (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, 2987 &aiocb32_ops); 2988 free(acb_list, M_LIO); 2989 return (error); 2990 } 2991 2992 #endif 2993