1 /* 2 * Copyright (c) 1997 John S. Dyson. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. John S. Dyson's name may not be used to endorse or promote products 10 * derived from this software without specific prior written permission. 11 * 12 * DISCLAIMER: This code isn't warranted to do anything useful. Anything 13 * bad that happens because of using this software isn't the responsibility 14 * of the author. This software is distributed AS-IS. 15 * 16 * $Id: vfs_aio.c,v 1.32 1998/07/15 06:51:14 bde Exp $ 17 */ 18 19 /* 20 * This file contains support for the POSIX 1003.1B AIO/LIO facility. 21 */ 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/sysproto.h> 26 #include <sys/filedesc.h> 27 #include <sys/kernel.h> 28 #include <sys/fcntl.h> 29 #include <sys/file.h> 30 #include <sys/lock.h> 31 #include <sys/unistd.h> 32 #include <sys/proc.h> 33 #include <sys/resourcevar.h> 34 #include <sys/signalvar.h> 35 #include <sys/sysctl.h> 36 #include <sys/vnode.h> 37 #include <sys/conf.h> 38 #include <miscfs/specfs/specdev.h> 39 40 #include <vm/vm.h> 41 #include <vm/vm_param.h> 42 #include <vm/vm_extern.h> 43 #include <vm/pmap.h> 44 #include <vm/vm_map.h> 45 #include <vm/vm_zone.h> 46 #include <sys/aio.h> 47 #include <sys/shm.h> 48 49 #include <machine/cpu.h> 50 #include <machine/limits.h> 51 52 static long jobrefid; 53 54 #define JOBST_NULL 0x0 55 #define JOBST_JOBQPROC 0x1 56 #define JOBST_JOBQGLOBAL 0x2 57 #define JOBST_JOBRUNNING 0x3 58 #define JOBST_JOBFINISHED 0x4 59 #define JOBST_JOBQBUF 0x5 60 #define JOBST_JOBBFINISHED 0x6 61 62 #ifndef MAX_AIO_PER_PROC 63 #define MAX_AIO_PER_PROC 32 64 #endif 65 66 #ifndef MAX_AIO_QUEUE_PER_PROC 67 #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ 68 #endif 69 70 #ifndef MAX_AIO_PROCS 71 #define MAX_AIO_PROCS 32 72 #endif 73 74 #ifndef MAX_AIO_QUEUE 75 #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ 76 #endif 77 78 #ifndef TARGET_AIO_PROCS 79 #define TARGET_AIO_PROCS 0 80 #endif 81 82 #ifndef MAX_BUF_AIO 83 #define MAX_BUF_AIO 16 84 #endif 85 86 #ifndef AIOD_TIMEOUT_DEFAULT 87 #define AIOD_TIMEOUT_DEFAULT (10 * hz) 88 #endif 89 90 #ifndef AIOD_LIFETIME_DEFAULT 91 #define AIOD_LIFETIME_DEFAULT (30 * hz) 92 #endif 93 94 static int max_aio_procs = MAX_AIO_PROCS; 95 static int num_aio_procs = 0; 96 static int target_aio_procs = TARGET_AIO_PROCS; 97 static int max_queue_count = MAX_AIO_QUEUE; 98 static int num_queue_count = 0; 99 static int num_buf_aio = 0; 100 static int num_aio_resv_start = 0; 101 static int aiod_timeout; 102 static int aiod_lifetime; 103 104 static int max_aio_per_proc = MAX_AIO_PER_PROC, 105 max_aio_queue_per_proc=MAX_AIO_QUEUE_PER_PROC; 106 107 static int max_buf_aio = MAX_BUF_AIO; 108 109 SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt"); 110 111 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, 112 CTLFLAG_RW, &max_aio_per_proc, 0, ""); 113 114 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, 115 CTLFLAG_RW, &max_aio_queue_per_proc, 0, ""); 116 117 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, 118 CTLFLAG_RW, &max_aio_procs, 0, ""); 119 120 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, 121 CTLFLAG_RD, &num_aio_procs, 0, ""); 122 123 SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, 124 CTLFLAG_RD, &num_queue_count, 0, ""); 125 126 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, 127 CTLFLAG_RW, &max_queue_count, 0, ""); 128 129 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, 130 CTLFLAG_RW, &target_aio_procs, 0, ""); 131 132 SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, 133 CTLFLAG_RW, &max_buf_aio, 0, ""); 134 135 SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, 136 CTLFLAG_RD, &num_buf_aio, 0, ""); 137 138 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, 139 CTLFLAG_RW, &aiod_lifetime, 0, ""); 140 141 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout, 142 CTLFLAG_RW, &aiod_timeout, 0, ""); 143 144 145 /* 146 * Job queue item 147 */ 148 149 #define AIOCBLIST_CANCELLED 0x1 150 #define AIOCBLIST_RUNDOWN 0x4 151 #define AIOCBLIST_ASYNCFREE 0x8 152 #define AIOCBLIST_DONE 0x10 153 154 struct aiocblist { 155 TAILQ_ENTRY (aiocblist) list; /* List of jobs */ 156 TAILQ_ENTRY (aiocblist) plist; /* List of jobs for proc */ 157 int jobflags; 158 int jobstate; 159 int inputcharge, outputcharge; 160 struct buf *bp; /* buffer pointer */ 161 struct proc *userproc; /* User process */ 162 struct aioproclist *jobaioproc; /* AIO process descriptor */ 163 struct aio_liojob *lio; /* optional lio job */ 164 struct aiocb *uuaiocb; /* pointer in userspace of aiocb */ 165 struct aiocb uaiocb; /* Kernel I/O control block */ 166 }; 167 168 169 /* 170 * AIO process info 171 */ 172 #define AIOP_FREE 0x1 /* proc on free queue */ 173 #define AIOP_SCHED 0x2 /* proc explicitly scheduled */ 174 175 struct aioproclist { 176 int aioprocflags; /* AIO proc flags */ 177 TAILQ_ENTRY(aioproclist) list; /* List of processes */ 178 struct proc *aioproc; /* The AIO thread */ 179 TAILQ_HEAD (,aiocblist) jobtorun; /* suggested job to run */ 180 }; 181 182 /* 183 * data-structure for lio signal management 184 */ 185 struct aio_liojob { 186 int lioj_flags; 187 int lioj_buffer_count; 188 int lioj_buffer_finished_count; 189 int lioj_queue_count; 190 int lioj_queue_finished_count; 191 struct sigevent lioj_signal; /* signal on all I/O done */ 192 TAILQ_ENTRY (aio_liojob) lioj_list; 193 struct kaioinfo *lioj_ki; 194 }; 195 #define LIOJ_SIGNAL 0x1 /* signal on all done (lio) */ 196 #define LIOJ_SIGNAL_POSTED 0x2 /* signal has been posted */ 197 198 /* 199 * per process aio data structure 200 */ 201 struct kaioinfo { 202 int kaio_flags; /* per process kaio flags */ 203 int kaio_maxactive_count; /* maximum number of AIOs */ 204 int kaio_active_count; /* number of currently used AIOs */ 205 int kaio_qallowed_count; /* maxiumu size of AIO queue */ 206 int kaio_queue_count; /* size of AIO queue */ 207 int kaio_ballowed_count; /* maximum number of buffers */ 208 int kaio_queue_finished_count; /* number of daemon jobs finished */ 209 int kaio_buffer_count; /* number of physio buffers */ 210 int kaio_buffer_finished_count; /* count of I/O done */ 211 struct proc *kaio_p; /* process that uses this kaio block */ 212 TAILQ_HEAD (,aio_liojob) kaio_liojoblist; /* list of lio jobs */ 213 TAILQ_HEAD (,aiocblist) kaio_jobqueue; /* job queue for process */ 214 TAILQ_HEAD (,aiocblist) kaio_jobdone; /* done queue for process */ 215 TAILQ_HEAD (,aiocblist) kaio_bufqueue; /* buffer job queue for process */ 216 TAILQ_HEAD (,aiocblist) kaio_bufdone; /* buffer done queue for process */ 217 }; 218 219 #define KAIO_RUNDOWN 0x1 /* process is being run down */ 220 #define KAIO_WAKEUP 0x2 /* wakeup process when there is a significant 221 event */ 222 223 224 static TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc; 225 static TAILQ_HEAD(,aiocblist) aio_jobs; /* Async job list */ 226 static TAILQ_HEAD(,aiocblist) aio_bufjobs; /* Phys I/O job list */ 227 static TAILQ_HEAD(,aiocblist) aio_freejobs; /* Pool of free jobs */ 228 229 static void aio_init_aioinfo(struct proc *p) ; 230 static void aio_onceonly(void *) ; 231 static int aio_free_entry(struct aiocblist *aiocbe); 232 static void aio_process(struct aiocblist *aiocbe); 233 static int aio_newproc(void) ; 234 static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ; 235 static void aio_physwakeup(struct buf *bp); 236 static int aio_fphysio(struct proc *p, struct aiocblist *aiocbe, int type); 237 static int aio_qphysio(struct proc *p, struct aiocblist *iocb); 238 static void aio_daemon(void *uproc); 239 240 SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL); 241 242 static vm_zone_t kaio_zone=0, aiop_zone=0, 243 aiocb_zone=0, aiol_zone=0, aiolio_zone=0; 244 245 /* 246 * Single AIOD vmspace shared amongst all of them 247 */ 248 static struct vmspace *aiovmspace = NULL; 249 250 /* 251 * Startup initialization 252 */ 253 void 254 aio_onceonly(void *na) 255 { 256 TAILQ_INIT(&aio_freeproc); 257 TAILQ_INIT(&aio_activeproc); 258 TAILQ_INIT(&aio_jobs); 259 TAILQ_INIT(&aio_bufjobs); 260 TAILQ_INIT(&aio_freejobs); 261 kaio_zone = zinit("AIO", sizeof (struct kaioinfo), 0, 0, 1); 262 aiop_zone = zinit("AIOP", sizeof (struct aioproclist), 0, 0, 1); 263 aiocb_zone = zinit("AIOCB", sizeof (struct aiocblist), 0, 0, 1); 264 aiol_zone = zinit("AIOL", AIO_LISTIO_MAX * sizeof (int), 0, 0, 1); 265 aiolio_zone = zinit("AIOLIO", 266 AIO_LISTIO_MAX * sizeof (struct aio_liojob), 0, 0, 1); 267 aiod_timeout = AIOD_TIMEOUT_DEFAULT; 268 aiod_lifetime = AIOD_LIFETIME_DEFAULT; 269 jobrefid = 1; 270 } 271 272 /* 273 * Init the per-process aioinfo structure. 274 * The aioinfo limits are set per-process for user limit (resource) management. 275 */ 276 void 277 aio_init_aioinfo(struct proc *p) 278 { 279 struct kaioinfo *ki; 280 if (p->p_aioinfo == NULL) { 281 ki = zalloc(kaio_zone); 282 p->p_aioinfo = ki; 283 ki->kaio_flags = 0; 284 ki->kaio_maxactive_count = max_aio_per_proc; 285 ki->kaio_active_count = 0; 286 ki->kaio_qallowed_count = max_aio_queue_per_proc; 287 ki->kaio_queue_count = 0; 288 ki->kaio_ballowed_count = max_buf_aio; 289 ki->kaio_buffer_count = 0; 290 ki->kaio_buffer_finished_count = 0; 291 ki->kaio_p = p; 292 TAILQ_INIT(&ki->kaio_jobdone); 293 TAILQ_INIT(&ki->kaio_jobqueue); 294 TAILQ_INIT(&ki->kaio_bufdone); 295 TAILQ_INIT(&ki->kaio_bufqueue); 296 TAILQ_INIT(&ki->kaio_liojoblist); 297 } 298 } 299 300 /* 301 * Free a job entry. Wait for completion if it is currently 302 * active, but don't delay forever. If we delay, we return 303 * a flag that says that we have to restart the queue scan. 304 */ 305 int 306 aio_free_entry(struct aiocblist *aiocbe) 307 { 308 struct kaioinfo *ki; 309 struct aioproclist *aiop; 310 struct aio_liojob *lj; 311 struct proc *p; 312 int error; 313 int s; 314 315 if (aiocbe->jobstate == JOBST_NULL) 316 panic("aio_free_entry: freeing already free job"); 317 318 p = aiocbe->userproc; 319 ki = p->p_aioinfo; 320 lj = aiocbe->lio; 321 if (ki == NULL) 322 panic("aio_free_entry: missing p->p_aioinfo"); 323 324 if (aiocbe->jobstate == JOBST_JOBRUNNING) { 325 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) 326 return 0; 327 aiocbe->jobflags |= AIOCBLIST_RUNDOWN; 328 tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", 0); 329 } 330 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 331 332 if (aiocbe->bp == NULL) { 333 if (ki->kaio_queue_count <= 0) 334 panic("aio_free_entry: process queue size <= 0"); 335 if (num_queue_count <= 0) 336 panic("aio_free_entry: system wide queue size <= 0"); 337 338 if(lj) { 339 lj->lioj_queue_count--; 340 if (aiocbe->jobflags & AIOCBLIST_DONE) 341 lj->lioj_queue_finished_count--; 342 } 343 ki->kaio_queue_count--; 344 if (aiocbe->jobflags & AIOCBLIST_DONE) 345 ki->kaio_queue_finished_count--; 346 num_queue_count--; 347 348 } else { 349 if(lj) { 350 lj->lioj_buffer_count--; 351 if (aiocbe->jobflags & AIOCBLIST_DONE) 352 lj->lioj_buffer_finished_count--; 353 } 354 if (aiocbe->jobflags & AIOCBLIST_DONE) 355 ki->kaio_buffer_finished_count--; 356 ki->kaio_buffer_count--; 357 num_buf_aio--; 358 359 } 360 361 if ((ki->kaio_flags & KAIO_WAKEUP) || 362 (ki->kaio_flags & KAIO_RUNDOWN) && 363 ((ki->kaio_buffer_count == 0) && (ki->kaio_queue_count == 0))) { 364 ki->kaio_flags &= ~KAIO_WAKEUP; 365 wakeup(p); 366 } 367 368 if ( aiocbe->jobstate == JOBST_JOBQBUF) { 369 if ((error = aio_fphysio(p, aiocbe, 1)) != 0) 370 return error; 371 if (aiocbe->jobstate != JOBST_JOBBFINISHED) 372 panic("aio_free_entry: invalid physio finish-up state"); 373 s = splbio(); 374 TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 375 splx(s); 376 } else if ( aiocbe->jobstate == JOBST_JOBQPROC) { 377 aiop = aiocbe->jobaioproc; 378 TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 379 } else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) { 380 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 381 } else if ( aiocbe->jobstate == JOBST_JOBFINISHED) { 382 TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist); 383 } else if ( aiocbe->jobstate == JOBST_JOBBFINISHED) { 384 s = splbio(); 385 TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist); 386 splx(s); 387 if (aiocbe->bp) { 388 vunmapbuf(aiocbe->bp); 389 relpbuf(aiocbe->bp); 390 aiocbe->bp = NULL; 391 } 392 } 393 if (lj && (lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) { 394 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 395 zfree(aiolio_zone, lj); 396 } 397 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 398 aiocbe->jobstate = JOBST_NULL; 399 return 0; 400 } 401 402 /* 403 * Rundown the jobs for a given process. 404 */ 405 void 406 aio_proc_rundown(struct proc *p) 407 { 408 int s; 409 struct kaioinfo *ki; 410 struct aio_liojob *lj, *ljn; 411 struct aiocblist *aiocbe, *aiocbn; 412 413 ki = p->p_aioinfo; 414 if (ki == NULL) 415 return; 416 417 ki->kaio_flags |= LIOJ_SIGNAL_POSTED; 418 while ((ki->kaio_active_count > 0) || 419 (ki->kaio_buffer_count > ki->kaio_buffer_finished_count)) { 420 ki->kaio_flags |= KAIO_RUNDOWN; 421 if (tsleep(p, PRIBIO, "kaiowt", aiod_timeout)) 422 break; 423 } 424 425 restart1: 426 for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone); 427 aiocbe; 428 aiocbe = aiocbn) { 429 aiocbn = TAILQ_NEXT(aiocbe, plist); 430 if (aio_free_entry(aiocbe)) 431 goto restart1; 432 } 433 434 restart2: 435 for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue); 436 aiocbe; 437 aiocbe = aiocbn) { 438 aiocbn = TAILQ_NEXT(aiocbe, plist); 439 if (aio_free_entry(aiocbe)) 440 goto restart2; 441 } 442 443 /* 444 * Note the use of lots of splbio here, trying to avoid 445 * splbio for long chains of I/O. Probably unnecessary. 446 */ 447 448 restart3: 449 s = splbio(); 450 while (TAILQ_FIRST(&ki->kaio_bufqueue)) { 451 ki->kaio_flags |= KAIO_WAKEUP; 452 tsleep (p, PRIBIO, "aioprn", 0); 453 splx(s); 454 goto restart3; 455 } 456 splx(s); 457 458 restart4: 459 s = splbio(); 460 for ( aiocbe = TAILQ_FIRST(&ki->kaio_bufdone); 461 aiocbe; 462 aiocbe = aiocbn) { 463 aiocbn = TAILQ_NEXT(aiocbe, plist); 464 if (aio_free_entry(aiocbe)) { 465 splx(s); 466 goto restart4; 467 } 468 } 469 splx(s); 470 471 for ( lj = TAILQ_FIRST(&ki->kaio_liojoblist); 472 lj; 473 lj = ljn) { 474 ljn = TAILQ_NEXT(lj, lioj_list); 475 if ((lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) { 476 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); 477 zfree(aiolio_zone, lj); 478 } else { 479 #if defined(DIAGNOSTIC) 480 printf("LIO job not cleaned up: B:%d, BF:%d, Q:%d, QF:%d\n", 481 lj->lioj_buffer_count, lj->lioj_buffer_finished_count, 482 lj->lioj_queue_count, lj->lioj_queue_finished_count); 483 #endif 484 } 485 } 486 487 zfree(kaio_zone, ki); 488 p->p_aioinfo = NULL; 489 } 490 491 /* 492 * Select a job to run (called by an AIO daemon) 493 */ 494 static struct aiocblist * 495 aio_selectjob(struct aioproclist *aiop) 496 { 497 498 struct aiocblist *aiocbe; 499 500 aiocbe = TAILQ_FIRST(&aiop->jobtorun); 501 if (aiocbe) { 502 TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list); 503 return aiocbe; 504 } 505 506 for (aiocbe = TAILQ_FIRST(&aio_jobs); 507 aiocbe; 508 aiocbe = TAILQ_NEXT(aiocbe, list)) { 509 struct kaioinfo *ki; 510 struct proc *userp; 511 512 userp = aiocbe->userproc; 513 ki = userp->p_aioinfo; 514 515 if (ki->kaio_active_count < ki->kaio_maxactive_count) { 516 TAILQ_REMOVE(&aio_jobs, aiocbe, list); 517 return aiocbe; 518 } 519 } 520 521 return NULL; 522 } 523 524 /* 525 * The AIO processing activity. This is the code that does the 526 * I/O request for the non-physio version of the operations. The 527 * normal vn operations are used, and this code should work in 528 * all instances for every type of file, including pipes, sockets, 529 * fifos, and regular files. 530 */ 531 void 532 aio_process(struct aiocblist *aiocbe) 533 { 534 struct filedesc *fdp; 535 struct proc *userp, *mycp; 536 struct aiocb *cb; 537 struct file *fp; 538 struct uio auio; 539 struct iovec aiov; 540 unsigned int fd; 541 int cnt; 542 int error; 543 off_t offset; 544 int oublock_st, oublock_end; 545 int inblock_st, inblock_end; 546 547 userp = aiocbe->userproc; 548 cb = &aiocbe->uaiocb; 549 550 mycp = curproc; 551 552 fdp = mycp->p_fd; 553 fd = cb->aio_fildes; 554 fp = fdp->fd_ofiles[fd]; 555 556 aiov.iov_base = (void *) cb->aio_buf; 557 aiov.iov_len = cb->aio_nbytes; 558 559 auio.uio_iov = &aiov; 560 auio.uio_iovcnt = 1; 561 auio.uio_offset = offset = cb->aio_offset; 562 auio.uio_resid = cb->aio_nbytes; 563 cnt = cb->aio_nbytes; 564 auio.uio_segflg = UIO_USERSPACE; 565 auio.uio_procp = mycp; 566 567 inblock_st = mycp->p_stats->p_ru.ru_inblock; 568 oublock_st = mycp->p_stats->p_ru.ru_oublock; 569 if (cb->aio_lio_opcode == LIO_READ) { 570 auio.uio_rw = UIO_READ; 571 error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 572 } else { 573 auio.uio_rw = UIO_WRITE; 574 error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 575 } 576 inblock_end = mycp->p_stats->p_ru.ru_inblock; 577 oublock_end = mycp->p_stats->p_ru.ru_oublock; 578 579 aiocbe->inputcharge = inblock_end - inblock_st; 580 aiocbe->outputcharge = oublock_end - oublock_st; 581 582 if (error) { 583 if (auio.uio_resid != cnt) { 584 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 585 error = 0; 586 if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) 587 psignal(userp, SIGPIPE); 588 } 589 } 590 591 cnt -= auio.uio_resid; 592 cb->_aiocb_private.error = error; 593 cb->_aiocb_private.status = cnt; 594 595 return; 596 597 } 598 599 /* 600 * The AIO daemon, most of the actual work is done in aio_process, 601 * but the setup (and address space mgmt) is done in this routine. 602 */ 603 static void 604 aio_daemon(void *uproc) 605 { 606 int s; 607 struct aioproclist *aiop; 608 struct vmspace *myvm, *aiovm; 609 struct proc *mycp; 610 611 /* 612 * Local copies of curproc (cp) and vmspace (myvm) 613 */ 614 mycp = curproc; 615 myvm = mycp->p_vmspace; 616 617 /* 618 * We manage to create only one VM space for all AIOD processes. 619 * The VM space for the first AIOD created becomes the shared VM 620 * space for all of them. We add an additional reference count, 621 * even for the first AIOD, so the address space does not go away, 622 * and we continue to use that original VM space even if the first 623 * AIOD exits. 624 */ 625 if ((aiovm = aiovmspace) == NULL) { 626 aiovmspace = myvm; 627 myvm->vm_refcnt++; 628 /* 629 * Remove userland cruft from address space. 630 */ 631 if (myvm->vm_shm) 632 shmexit(mycp); 633 pmap_remove_pages(&myvm->vm_pmap, 0, USRSTACK); 634 vm_map_remove(&myvm->vm_map, 0, USRSTACK); 635 myvm->vm_tsize = 0; 636 myvm->vm_dsize = 0; 637 myvm->vm_ssize = 0; 638 } else { 639 aiovm->vm_refcnt++; 640 mycp->p_vmspace = aiovm; 641 pmap_activate(mycp); 642 vmspace_free(myvm); 643 myvm = aiovm; 644 } 645 646 if (mycp->p_textvp) { 647 vrele(mycp->p_textvp); 648 mycp->p_textvp = NULL; 649 } 650 651 /* 652 * Allocate and ready the aio control info. There is one 653 * aiop structure per daemon. 654 */ 655 aiop = zalloc(aiop_zone); 656 aiop->aioproc = mycp; 657 aiop->aioprocflags |= AIOP_FREE; 658 TAILQ_INIT(&aiop->jobtorun); 659 660 /* 661 * Place thread (lightweight process) onto the AIO free thread list 662 */ 663 if (TAILQ_EMPTY(&aio_freeproc)) 664 wakeup(&aio_freeproc); 665 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 666 667 /* 668 * Make up a name for the daemon 669 */ 670 strcpy(mycp->p_comm, "aiod"); 671 672 /* 673 * Get rid of our current filedescriptors. AIOD's don't need any 674 * filedescriptors, except as temporarily inherited from the client. 675 * Credentials are also cloned, and made equivalent to "root." 676 */ 677 fdfree(mycp); 678 mycp->p_fd = NULL; 679 mycp->p_ucred = crcopy(mycp->p_ucred); 680 mycp->p_ucred->cr_uid = 0; 681 mycp->p_ucred->cr_ngroups = 1; 682 mycp->p_ucred->cr_groups[0] = 1; 683 684 /* 685 * The daemon resides in its own pgrp. 686 */ 687 enterpgrp(mycp, mycp->p_pid, 1); 688 689 /* 690 * Mark special process type 691 */ 692 mycp->p_flag |= P_SYSTEM|P_KTHREADP; 693 694 /* 695 * Wakeup parent process. (Parent sleeps to keep from blasting away 696 * creating to many daemons.) 697 */ 698 wakeup(mycp); 699 700 while(1) { 701 struct proc *curcp; 702 struct aiocblist *aiocbe; 703 704 /* 705 * curcp is the current daemon process context. 706 * userp is the current user process context. 707 */ 708 curcp = mycp; 709 710 /* 711 * Take daemon off of free queue 712 */ 713 if (aiop->aioprocflags & AIOP_FREE) { 714 TAILQ_REMOVE(&aio_freeproc, aiop, list); 715 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 716 aiop->aioprocflags &= ~AIOP_FREE; 717 } 718 aiop->aioprocflags &= ~AIOP_SCHED; 719 720 /* 721 * Check for jobs 722 */ 723 while ( aiocbe = aio_selectjob(aiop)) { 724 struct proc *userp; 725 struct aiocb *cb; 726 struct kaioinfo *ki; 727 struct aio_liojob *lj; 728 729 cb = &aiocbe->uaiocb; 730 userp = aiocbe->userproc; 731 732 aiocbe->jobstate = JOBST_JOBRUNNING; 733 734 /* 735 * Connect to process address space for user program 736 */ 737 if (userp != curcp) { 738 struct vmspace *tmpvm; 739 /* 740 * Save the current address space that we are connected to. 741 */ 742 tmpvm = mycp->p_vmspace; 743 /* 744 * Point to the new user address space, and refer to it. 745 */ 746 mycp->p_vmspace = userp->p_vmspace; 747 mycp->p_vmspace->vm_refcnt++; 748 /* 749 * Activate the new mapping. 750 */ 751 pmap_activate(mycp); 752 /* 753 * If the old address space wasn't the daemons own address 754 * space, then we need to remove the daemon's reference from 755 * the other process that it was acting on behalf of. 756 */ 757 if (tmpvm != myvm) { 758 vmspace_free(tmpvm); 759 } 760 /* 761 * Disassociate from previous clients file descriptors, and 762 * associate to the new clients descriptors. Note that 763 * the daemon doesn't need to worry about its orginal 764 * descriptors, because they were originally freed. 765 */ 766 if (mycp->p_fd) 767 fdfree(mycp); 768 mycp->p_fd = fdshare(userp); 769 curcp = userp; 770 } 771 772 ki = userp->p_aioinfo; 773 lj = aiocbe->lio; 774 775 /* 776 * Account for currently active jobs 777 */ 778 ki->kaio_active_count++; 779 780 /* 781 * Do the I/O function 782 */ 783 aiocbe->jobaioproc = aiop; 784 aio_process(aiocbe); 785 786 /* 787 * decrement the active job count 788 */ 789 ki->kaio_active_count--; 790 791 /* 792 * increment the completion count for wakeup/signal comparisons 793 */ 794 aiocbe->jobflags |= AIOCBLIST_DONE; 795 ki->kaio_queue_finished_count++; 796 if (lj) { 797 lj->lioj_queue_finished_count++; 798 } 799 if ((ki->kaio_flags & KAIO_WAKEUP) || 800 (ki->kaio_flags & KAIO_RUNDOWN) && 801 (ki->kaio_active_count == 0)) { 802 ki->kaio_flags &= ~KAIO_WAKEUP; 803 wakeup(userp); 804 } 805 806 s = splbio(); 807 if (lj && (lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) == 808 LIOJ_SIGNAL) { 809 if ((lj->lioj_queue_finished_count == lj->lioj_queue_count) && 810 (lj->lioj_buffer_finished_count == lj->lioj_buffer_count)) { 811 psignal(userp, lj->lioj_signal.sigev_signo); 812 lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 813 } 814 } 815 splx(s); 816 817 aiocbe->jobstate = JOBST_JOBFINISHED; 818 819 /* 820 * If the I/O request should be automatically rundown, do the 821 * needed cleanup. Otherwise, place the queue entry for 822 * the just finished I/O request into the done queue for the 823 * associated client. 824 */ 825 if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) { 826 aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE; 827 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 828 } else { 829 TAILQ_REMOVE(&ki->kaio_jobqueue, 830 aiocbe, plist); 831 TAILQ_INSERT_TAIL(&ki->kaio_jobdone, 832 aiocbe, plist); 833 } 834 835 if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) { 836 wakeup(aiocbe); 837 aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN; 838 } 839 840 if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 841 psignal(userp, cb->aio_sigevent.sigev_signo); 842 } 843 } 844 845 /* 846 * Disconnect from user address space 847 */ 848 if (curcp != mycp) { 849 struct vmspace *tmpvm; 850 /* 851 * Get the user address space to disconnect from. 852 */ 853 tmpvm = mycp->p_vmspace; 854 /* 855 * Get original address space for daemon. 856 */ 857 mycp->p_vmspace = myvm; 858 /* 859 * Activate the daemon's address space. 860 */ 861 pmap_activate(mycp); 862 #if defined(DIAGNOSTIC) 863 if (tmpvm == myvm) 864 printf("AIOD: vmspace problem -- %d\n", mycp->p_pid); 865 #endif 866 /* 867 * remove our vmspace reference. 868 */ 869 vmspace_free(tmpvm); 870 /* 871 * disassociate from the user process's file descriptors. 872 */ 873 if (mycp->p_fd) 874 fdfree(mycp); 875 mycp->p_fd = NULL; 876 curcp = mycp; 877 } 878 879 /* 880 * If we are the first to be put onto the free queue, wakeup 881 * anyone waiting for a daemon. 882 */ 883 TAILQ_REMOVE(&aio_activeproc, aiop, list); 884 if (TAILQ_EMPTY(&aio_freeproc)) 885 wakeup(&aio_freeproc); 886 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); 887 aiop->aioprocflags |= AIOP_FREE; 888 889 /* 890 * If daemon is inactive for a long time, allow it to exit, thereby 891 * freeing resources. 892 */ 893 if (((aiop->aioprocflags & AIOP_SCHED) == 0) && 894 tsleep(mycp, PRIBIO, "aiordy", aiod_lifetime)) { 895 if ((TAILQ_FIRST(&aio_jobs) == NULL) && 896 (TAILQ_FIRST(&aiop->jobtorun) == NULL)) { 897 if ((aiop->aioprocflags & AIOP_FREE) && 898 (num_aio_procs > target_aio_procs)) { 899 TAILQ_REMOVE(&aio_freeproc, aiop, list); 900 zfree(aiop_zone, aiop); 901 num_aio_procs--; 902 #if defined(DIAGNOSTIC) 903 if (mycp->p_vmspace->vm_refcnt <= 1) 904 printf("AIOD: bad vm refcnt for exiting daemon: %d\n", 905 mycp->p_vmspace->vm_refcnt); 906 #endif 907 exit1(mycp, 0); 908 } 909 } 910 } 911 } 912 } 913 914 /* 915 * Create a new AIO daemon. This is mostly a kernel-thread fork routine. 916 * The AIO daemon modifies its environment itself. 917 */ 918 static int 919 aio_newproc() 920 { 921 int error; 922 struct rfork_args rfa; 923 struct proc *p, *np; 924 925 rfa.flags = RFPROC | RFCFDG; 926 927 p = curproc; 928 if (error = rfork(p, &rfa)) 929 return error; 930 931 np = pfind(p->p_retval[0]); 932 cpu_set_fork_handler(np, aio_daemon, p); 933 934 /* 935 * Wait until daemon is started, but continue on just in case (to 936 * handle error conditions. 937 */ 938 error = tsleep(np, PZERO, "aiosta", aiod_timeout); 939 num_aio_procs++; 940 941 return error; 942 943 } 944 945 /* 946 * Try the high-performance physio method for eligible VCHR devices. This 947 * routine doesn't require the use of any additional threads, and have 948 * overhead. 949 */ 950 int 951 aio_qphysio(p, aiocbe) 952 struct proc *p; 953 struct aiocblist *aiocbe; 954 { 955 int error; 956 caddr_t sa; 957 struct aiocb *cb; 958 struct file *fp; 959 struct buf *bp; 960 int bflags; 961 struct vnode *vp; 962 struct kaioinfo *ki; 963 struct filedesc *fdp; 964 struct aio_liojob *lj; 965 int fd; 966 int majordev; 967 int s; 968 int cnt; 969 dev_t dev; 970 int rw; 971 d_strategy_t *fstrategy; 972 struct cdevsw *cdev; 973 struct cdevsw *bdev; 974 975 cb = &aiocbe->uaiocb; 976 fdp = p->p_fd; 977 fd = cb->aio_fildes; 978 fp = fdp->fd_ofiles[fd]; 979 980 if (fp->f_type != DTYPE_VNODE) { 981 return -1; 982 } 983 984 vp = (struct vnode *)fp->f_data; 985 if (vp->v_type != VCHR || ((cb->aio_nbytes & (DEV_BSIZE - 1)) != 0)) { 986 return -1; 987 } 988 989 if ((cb->aio_nbytes > MAXPHYS) && (num_buf_aio >= max_buf_aio)) { 990 return -1; 991 } 992 993 if ((vp->v_specinfo == NULL) || (vp->v_flag & VISTTY)) { 994 return -1; 995 } 996 997 majordev = major(vp->v_rdev); 998 if (majordev == NODEV) { 999 return -1; 1000 } 1001 1002 cdev = cdevsw[major(vp->v_rdev)]; 1003 if (cdev == NULL) { 1004 return -1; 1005 } 1006 1007 if (cdev->d_bmaj == -1) { 1008 return -1; 1009 } 1010 bdev = cdev; 1011 1012 ki = p->p_aioinfo; 1013 if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) { 1014 return -1; 1015 } 1016 1017 cnt = cb->aio_nbytes; 1018 if (cnt > MAXPHYS) { 1019 return -1; 1020 } 1021 1022 dev = makedev(bdev->d_bmaj, minor(vp->v_rdev)); 1023 1024 /* 1025 * Physical I/O is charged directly to the process, so we don't have 1026 * to fake it. 1027 */ 1028 aiocbe->inputcharge = 0; 1029 aiocbe->outputcharge = 0; 1030 1031 ki->kaio_buffer_count++; 1032 1033 lj = aiocbe->lio; 1034 if (lj) { 1035 lj->lioj_buffer_count++; 1036 } 1037 1038 /* create and build a buffer header for a transfer */ 1039 bp = (struct buf *)getpbuf(); 1040 1041 /* 1042 * get a copy of the kva from the physical buffer 1043 */ 1044 bp->b_proc = p; 1045 bp->b_dev = dev; 1046 error = bp->b_error = 0; 1047 1048 if (cb->aio_lio_opcode == LIO_WRITE) { 1049 rw = 0; 1050 bflags = B_WRITE; 1051 } else { 1052 rw = 1; 1053 bflags = B_READ; 1054 } 1055 1056 bp->b_bcount = cb->aio_nbytes; 1057 bp->b_bufsize = cb->aio_nbytes; 1058 bp->b_flags = B_BUSY | B_PHYS | B_CALL | bflags; 1059 bp->b_iodone = aio_physwakeup; 1060 bp->b_saveaddr = bp->b_data; 1061 bp->b_data = (void *) cb->aio_buf; 1062 bp->b_blkno = btodb(cb->aio_offset); 1063 1064 if (rw && !useracc(bp->b_data, bp->b_bufsize, B_WRITE)) { 1065 error = EFAULT; 1066 goto doerror; 1067 } 1068 if (!rw && !useracc(bp->b_data, bp->b_bufsize, B_READ)) { 1069 error = EFAULT; 1070 goto doerror; 1071 } 1072 1073 /* bring buffer into kernel space */ 1074 vmapbuf(bp); 1075 1076 s = splbio(); 1077 aiocbe->bp = bp; 1078 bp->b_spc = (void *)aiocbe; 1079 TAILQ_INSERT_TAIL(&aio_bufjobs, aiocbe, list); 1080 TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist); 1081 aiocbe->jobstate = JOBST_JOBQBUF; 1082 cb->_aiocb_private.status = cb->aio_nbytes; 1083 num_buf_aio++; 1084 fstrategy = bdev->d_strategy; 1085 bp->b_error = 0; 1086 1087 splx(s); 1088 /* perform transfer */ 1089 (*fstrategy)(bp); 1090 1091 s = splbio(); 1092 /* 1093 * If we had an error invoking the request, or an error in processing 1094 * the request before we have returned, we process it as an error 1095 * in transfer. Note that such an I/O error is not indicated immediately, 1096 * but is returned using the aio_error mechanism. In this case, aio_suspend 1097 * will return immediately. 1098 */ 1099 if (bp->b_error || (bp->b_flags & B_ERROR)) { 1100 struct aiocb *job = aiocbe->uuaiocb; 1101 1102 aiocbe->uaiocb._aiocb_private.status = 0; 1103 suword(&job->_aiocb_private.status, 0); 1104 aiocbe->uaiocb._aiocb_private.error = bp->b_error; 1105 suword(&job->_aiocb_private.error, bp->b_error); 1106 1107 ki->kaio_buffer_finished_count++; 1108 1109 if (aiocbe->jobstate != JOBST_JOBBFINISHED) { 1110 aiocbe->jobstate = JOBST_JOBBFINISHED; 1111 aiocbe->jobflags |= AIOCBLIST_DONE; 1112 TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 1113 TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); 1114 TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist); 1115 } 1116 } 1117 splx(s); 1118 return 0; 1119 1120 doerror: 1121 ki->kaio_buffer_count--; 1122 if (lj) { 1123 lj->lioj_buffer_count--; 1124 } 1125 aiocbe->bp = NULL; 1126 relpbuf(bp); 1127 return error; 1128 } 1129 1130 /* 1131 * This waits/tests physio completion. 1132 */ 1133 int 1134 aio_fphysio(p, iocb, flgwait) 1135 struct proc *p; 1136 struct aiocblist *iocb; 1137 int flgwait; 1138 { 1139 int s; 1140 struct buf *bp; 1141 int error; 1142 1143 bp = iocb->bp; 1144 1145 s = splbio(); 1146 if (flgwait == 0) { 1147 if ((bp->b_flags & B_DONE) == 0) { 1148 splx(s); 1149 return EINPROGRESS; 1150 } 1151 } 1152 1153 while ((bp->b_flags & B_DONE) == 0) { 1154 if (tsleep((caddr_t)bp, PRIBIO, "physstr", aiod_timeout)) { 1155 if ((bp->b_flags & B_DONE) == 0) { 1156 splx(s); 1157 return EINPROGRESS; 1158 } else { 1159 break; 1160 } 1161 } 1162 } 1163 1164 /* release mapping into kernel space */ 1165 vunmapbuf(bp); 1166 iocb->bp = 0; 1167 1168 error = 0; 1169 /* 1170 * check for an error 1171 */ 1172 if (bp->b_flags & B_ERROR) { 1173 error = bp->b_error; 1174 } 1175 1176 relpbuf(bp); 1177 return (error); 1178 } 1179 1180 /* 1181 * Queue a new AIO request. Choosing either the threaded or direct physio 1182 * VCHR technique is done in this code. 1183 */ 1184 static int 1185 _aio_aqueue(struct proc *p, struct aiocb *job, struct aio_liojob *lj, int type) 1186 { 1187 struct filedesc *fdp; 1188 struct file *fp; 1189 unsigned int fd; 1190 1191 int error; 1192 int opcode; 1193 struct aiocblist *aiocbe; 1194 struct aioproclist *aiop; 1195 struct kaioinfo *ki; 1196 1197 if (aiocbe = TAILQ_FIRST(&aio_freejobs)) { 1198 TAILQ_REMOVE(&aio_freejobs, aiocbe, list); 1199 } else { 1200 aiocbe = zalloc (aiocb_zone); 1201 } 1202 1203 aiocbe->inputcharge = 0; 1204 aiocbe->outputcharge = 0; 1205 1206 suword(&job->_aiocb_private.status, -1); 1207 suword(&job->_aiocb_private.error, 0); 1208 suword(&job->_aiocb_private.kernelinfo, -1); 1209 1210 error = copyin((caddr_t)job, 1211 (caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb); 1212 if (error) { 1213 suword(&job->_aiocb_private.error, error); 1214 1215 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 1216 return error; 1217 } 1218 1219 /* 1220 * Save userspace address of the job info 1221 */ 1222 aiocbe->uuaiocb = job; 1223 1224 /* 1225 * Get the opcode 1226 */ 1227 if (type != LIO_NOP) { 1228 aiocbe->uaiocb.aio_lio_opcode = type; 1229 } 1230 opcode = aiocbe->uaiocb.aio_lio_opcode; 1231 1232 /* 1233 * Get the fd info for process 1234 */ 1235 fdp = p->p_fd; 1236 1237 /* 1238 * Range check file descriptor 1239 */ 1240 fd = aiocbe->uaiocb.aio_fildes; 1241 if (fd >= fdp->fd_nfiles) { 1242 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 1243 if (type == 0) { 1244 suword(&job->_aiocb_private.error, EBADF); 1245 } 1246 return EBADF; 1247 } 1248 1249 fp = fdp->fd_ofiles[fd]; 1250 if ((fp == NULL) || 1251 ((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 0))) { 1252 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 1253 if (type == 0) { 1254 suword(&job->_aiocb_private.error, EBADF); 1255 } 1256 return EBADF; 1257 } 1258 1259 if (aiocbe->uaiocb.aio_offset == -1LL) { 1260 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 1261 if (type == 0) { 1262 suword(&job->_aiocb_private.error, EINVAL); 1263 } 1264 return EINVAL; 1265 } 1266 1267 error = suword(&job->_aiocb_private.kernelinfo, jobrefid); 1268 if (error) { 1269 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 1270 if (type == 0) { 1271 suword(&job->_aiocb_private.error, EINVAL); 1272 } 1273 return error; 1274 } 1275 1276 aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jobrefid; 1277 if (jobrefid == LONG_MAX) 1278 jobrefid = 1; 1279 else 1280 jobrefid++; 1281 1282 if (opcode == LIO_NOP) { 1283 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 1284 if (type == 0) { 1285 suword(&job->_aiocb_private.error, 0); 1286 suword(&job->_aiocb_private.status, 0); 1287 suword(&job->_aiocb_private.kernelinfo, 0); 1288 } 1289 return 0; 1290 } 1291 1292 if ((opcode != LIO_READ) && (opcode != LIO_WRITE)) { 1293 TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); 1294 if (type == 0) { 1295 suword(&job->_aiocb_private.status, 0); 1296 suword(&job->_aiocb_private.error, EINVAL); 1297 } 1298 return EINVAL; 1299 } 1300 1301 suword(&job->_aiocb_private.error, EINPROGRESS); 1302 aiocbe->uaiocb._aiocb_private.error = EINPROGRESS; 1303 aiocbe->userproc = p; 1304 aiocbe->jobflags = 0; 1305 aiocbe->lio = lj; 1306 ki = p->p_aioinfo; 1307 1308 if ((error = aio_qphysio(p, aiocbe)) == 0) { 1309 return 0; 1310 } else if (error > 0) { 1311 suword(&job->_aiocb_private.status, 0); 1312 aiocbe->uaiocb._aiocb_private.error = error; 1313 suword(&job->_aiocb_private.error, error); 1314 return error; 1315 } 1316 1317 /* 1318 * No buffer for daemon I/O 1319 */ 1320 aiocbe->bp = NULL; 1321 1322 ki->kaio_queue_count++; 1323 if (lj) { 1324 lj->lioj_queue_count++; 1325 } 1326 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); 1327 TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); 1328 aiocbe->jobstate = JOBST_JOBQGLOBAL; 1329 1330 num_queue_count++; 1331 error = 0; 1332 1333 /* 1334 * If we don't have a free AIO process, and we are below our 1335 * quota, then start one. Otherwise, depend on the subsequent 1336 * I/O completions to pick-up this job. If we don't sucessfully 1337 * create the new process (thread) due to resource issues, we 1338 * return an error for now (EAGAIN), which is likely not the 1339 * correct thing to do. 1340 */ 1341 retryproc: 1342 if (aiop = TAILQ_FIRST(&aio_freeproc)) { 1343 TAILQ_REMOVE(&aio_freeproc, aiop, list); 1344 TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list); 1345 aiop->aioprocflags &= ~AIOP_FREE; 1346 wakeup(aiop->aioproc); 1347 } else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) && 1348 ((ki->kaio_active_count + num_aio_resv_start) < 1349 ki->kaio_maxactive_count)) { 1350 num_aio_resv_start++; 1351 if ((error = aio_newproc()) == 0) { 1352 num_aio_resv_start--; 1353 p->p_retval[0] = 0; 1354 goto retryproc; 1355 } 1356 num_aio_resv_start--; 1357 } 1358 return error; 1359 } 1360 1361 /* 1362 * This routine queues an AIO request, checking for quotas. 1363 */ 1364 static int 1365 aio_aqueue(struct proc *p, struct aiocb *job, int type) 1366 { 1367 struct kaioinfo *ki; 1368 1369 if (p->p_aioinfo == NULL) { 1370 aio_init_aioinfo(p); 1371 } 1372 1373 if (num_queue_count >= max_queue_count) 1374 return EAGAIN; 1375 1376 ki = p->p_aioinfo; 1377 if (ki->kaio_queue_count >= ki->kaio_qallowed_count) 1378 return EAGAIN; 1379 1380 return _aio_aqueue(p, job, NULL, type); 1381 } 1382 1383 /* 1384 * Support the aio_return system call, as a side-effect, kernel 1385 * resources are released. 1386 */ 1387 int 1388 aio_return(struct proc *p, struct aio_return_args *uap) 1389 { 1390 int s; 1391 int jobref, status; 1392 struct aiocblist *cb, *ncb; 1393 struct aiocb *ujob; 1394 struct kaioinfo *ki; 1395 struct proc *userp; 1396 1397 ki = p->p_aioinfo; 1398 if (ki == NULL) { 1399 return EINVAL; 1400 } 1401 1402 ujob = uap->aiocbp; 1403 1404 jobref = fuword(&ujob->_aiocb_private.kernelinfo); 1405 if (jobref == -1 || jobref == 0) 1406 return EINVAL; 1407 1408 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 1409 cb; 1410 cb = TAILQ_NEXT(cb, plist)) { 1411 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1412 if (ujob == cb->uuaiocb) { 1413 p->p_retval[0] = cb->uaiocb._aiocb_private.status; 1414 } else { 1415 p->p_retval[0] = EFAULT; 1416 } 1417 if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 1418 curproc->p_stats->p_ru.ru_oublock += cb->outputcharge; 1419 cb->outputcharge = 0; 1420 } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 1421 curproc->p_stats->p_ru.ru_inblock += cb->inputcharge; 1422 cb->inputcharge = 0; 1423 } 1424 aio_free_entry(cb); 1425 return 0; 1426 } 1427 } 1428 1429 s = splbio(); 1430 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); 1431 cb; 1432 cb = ncb) { 1433 ncb = TAILQ_NEXT(cb, plist); 1434 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1435 splx(s); 1436 if (ujob == cb->uuaiocb) { 1437 p->p_retval[0] = cb->uaiocb._aiocb_private.status; 1438 } else { 1439 p->p_retval[0] = EFAULT; 1440 } 1441 aio_free_entry(cb); 1442 return 0; 1443 } 1444 } 1445 splx(s); 1446 1447 return (EINVAL); 1448 } 1449 1450 /* 1451 * Allow a process to wakeup when any of the I/O requests are 1452 * completed. 1453 */ 1454 int 1455 aio_suspend(struct proc *p, struct aio_suspend_args *uap) 1456 { 1457 struct timeval atv; 1458 struct timespec ts; 1459 struct aiocb *const *cbptr, *cbp; 1460 struct kaioinfo *ki; 1461 struct aiocblist *cb; 1462 int i; 1463 int njoblist; 1464 int error, s, timo; 1465 int *ijoblist; 1466 struct aiocb **ujoblist; 1467 1468 if (uap->nent >= AIO_LISTIO_MAX) 1469 return EINVAL; 1470 1471 timo = 0; 1472 if (uap->timeout) { 1473 /* 1474 * Get timespec struct 1475 */ 1476 if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) { 1477 return error; 1478 } 1479 1480 if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) 1481 return (EINVAL); 1482 1483 TIMESPEC_TO_TIMEVAL(&atv, &ts) 1484 if (itimerfix(&atv)) 1485 return (EINVAL); 1486 timo = tvtohz(&atv); 1487 } 1488 1489 ki = p->p_aioinfo; 1490 if (ki == NULL) 1491 return EAGAIN; 1492 1493 njoblist = 0; 1494 ijoblist = zalloc(aiol_zone); 1495 ujoblist = zalloc(aiol_zone); 1496 cbptr = uap->aiocbp; 1497 1498 for(i = 0; i < uap->nent; i++) { 1499 cbp = (struct aiocb *) (intptr_t) fuword((caddr_t) &cbptr[i]); 1500 if (cbp == 0) 1501 continue; 1502 ujoblist[njoblist] = cbp; 1503 ijoblist[njoblist] = fuword(&cbp->_aiocb_private.kernelinfo); 1504 njoblist++; 1505 } 1506 if (njoblist == 0) { 1507 zfree(aiol_zone, ijoblist); 1508 zfree(aiol_zone, ujoblist); 1509 return 0; 1510 } 1511 1512 error = 0; 1513 while (1) { 1514 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 1515 cb; cb = TAILQ_NEXT(cb, plist)) { 1516 for(i = 0; i < njoblist; i++) { 1517 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == 1518 ijoblist[i]) { 1519 if (ujoblist[i] != cb->uuaiocb) 1520 error = EINVAL; 1521 zfree(aiol_zone, ijoblist); 1522 zfree(aiol_zone, ujoblist); 1523 return error; 1524 } 1525 } 1526 } 1527 1528 s = splbio(); 1529 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); 1530 cb; cb = TAILQ_NEXT(cb, plist)) { 1531 for(i = 0; i < njoblist; i++) { 1532 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == 1533 ijoblist[i]) { 1534 splx(s); 1535 if (ujoblist[i] != cb->uuaiocb) 1536 error = EINVAL; 1537 zfree(aiol_zone, ijoblist); 1538 zfree(aiol_zone, ujoblist); 1539 return error; 1540 } 1541 } 1542 } 1543 1544 ki->kaio_flags |= KAIO_WAKEUP; 1545 error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo); 1546 splx(s); 1547 1548 if (error == EINTR) { 1549 zfree(aiol_zone, ijoblist); 1550 zfree(aiol_zone, ujoblist); 1551 return EINTR; 1552 } else if (error == EWOULDBLOCK) { 1553 zfree(aiol_zone, ijoblist); 1554 zfree(aiol_zone, ujoblist); 1555 return EAGAIN; 1556 } 1557 } 1558 1559 /* NOTREACHED */ 1560 return EINVAL; 1561 } 1562 1563 /* 1564 * aio_cancel at the kernel level is a NOOP right now. It 1565 * might be possible to support it partially in user mode, or 1566 * in kernel mode later on. 1567 */ 1568 int 1569 aio_cancel(struct proc *p, struct aio_cancel_args *uap) 1570 { 1571 return ENOSYS; 1572 } 1573 1574 /* 1575 * aio_error is implemented in the kernel level for compatibility 1576 * purposes only. For a user mode async implementation, it would be 1577 * best to do it in a userland subroutine. 1578 */ 1579 int 1580 aio_error(struct proc *p, struct aio_error_args *uap) 1581 { 1582 int s; 1583 struct aiocblist *cb; 1584 struct kaioinfo *ki; 1585 int jobref; 1586 int error, status; 1587 1588 ki = p->p_aioinfo; 1589 if (ki == NULL) 1590 return EINVAL; 1591 1592 jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo); 1593 if ((jobref == -1) || (jobref == 0)) 1594 return EINVAL; 1595 1596 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 1597 cb; 1598 cb = TAILQ_NEXT(cb, plist)) { 1599 1600 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1601 p->p_retval[0] = cb->uaiocb._aiocb_private.error; 1602 return 0; 1603 } 1604 } 1605 1606 for (cb = TAILQ_FIRST(&ki->kaio_jobqueue); 1607 cb; 1608 cb = TAILQ_NEXT(cb, plist)) { 1609 1610 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1611 p->p_retval[0] = EINPROGRESS; 1612 return 0; 1613 } 1614 } 1615 1616 s = splbio(); 1617 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); 1618 cb; 1619 cb = TAILQ_NEXT(cb, plist)) { 1620 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1621 p->p_retval[0] = cb->uaiocb._aiocb_private.error; 1622 splx(s); 1623 return 0; 1624 } 1625 } 1626 1627 for (cb = TAILQ_FIRST(&ki->kaio_bufqueue); 1628 cb; 1629 cb = TAILQ_NEXT(cb, plist)) { 1630 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) { 1631 p->p_retval[0] = EINPROGRESS; 1632 splx(s); 1633 return 0; 1634 } 1635 } 1636 splx(s); 1637 1638 1639 /* 1640 * Hack for lio 1641 */ 1642 /* 1643 status = fuword(&uap->aiocbp->_aiocb_private.status); 1644 if (status == -1) { 1645 return fuword(&uap->aiocbp->_aiocb_private.error); 1646 } 1647 */ 1648 return EINVAL; 1649 } 1650 1651 int 1652 aio_read(struct proc *p, struct aio_read_args *uap) 1653 { 1654 struct filedesc *fdp; 1655 struct file *fp; 1656 struct uio auio; 1657 struct iovec aiov; 1658 unsigned int fd; 1659 int cnt; 1660 struct aiocb iocb; 1661 int error, pmodes; 1662 1663 pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 1664 if ((pmodes & AIO_PMODE_SYNC) == 0) { 1665 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 1666 } 1667 1668 /* 1669 * Get control block 1670 */ 1671 if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1672 return error; 1673 1674 /* 1675 * Get the fd info for process 1676 */ 1677 fdp = p->p_fd; 1678 1679 /* 1680 * Range check file descriptor 1681 */ 1682 fd = iocb.aio_fildes; 1683 if (fd >= fdp->fd_nfiles) 1684 return EBADF; 1685 fp = fdp->fd_ofiles[fd]; 1686 if ((fp == NULL) || ((fp->f_flag & FREAD) == 0)) 1687 return EBADF; 1688 if (iocb.aio_offset == -1LL) 1689 return EINVAL; 1690 1691 auio.uio_resid = iocb.aio_nbytes; 1692 if (auio.uio_resid < 0) 1693 return (EINVAL); 1694 1695 /* 1696 * Process sync simply -- queue async request. 1697 */ 1698 if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) { 1699 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ); 1700 } 1701 1702 aiov.iov_base = (void *) iocb.aio_buf; 1703 aiov.iov_len = iocb.aio_nbytes; 1704 1705 auio.uio_iov = &aiov; 1706 auio.uio_iovcnt = 1; 1707 auio.uio_offset = iocb.aio_offset; 1708 auio.uio_rw = UIO_READ; 1709 auio.uio_segflg = UIO_USERSPACE; 1710 auio.uio_procp = p; 1711 1712 cnt = iocb.aio_nbytes; 1713 error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred); 1714 if (error && 1715 (auio.uio_resid != cnt) && 1716 (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) 1717 error = 0; 1718 cnt -= auio.uio_resid; 1719 p->p_retval[0] = cnt; 1720 return error; 1721 } 1722 1723 int 1724 aio_write(struct proc *p, struct aio_write_args *uap) 1725 { 1726 struct filedesc *fdp; 1727 struct file *fp; 1728 struct uio auio; 1729 struct iovec aiov; 1730 unsigned int fd; 1731 int cnt; 1732 struct aiocb iocb; 1733 int error; 1734 int pmodes; 1735 1736 /* 1737 * Process sync simply -- queue async request. 1738 */ 1739 pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes); 1740 if ((pmodes & AIO_PMODE_SYNC) == 0) { 1741 return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE); 1742 } 1743 1744 if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) 1745 return error; 1746 1747 /* 1748 * Get the fd info for process 1749 */ 1750 fdp = p->p_fd; 1751 1752 /* 1753 * Range check file descriptor 1754 */ 1755 fd = iocb.aio_fildes; 1756 if (fd >= fdp->fd_nfiles) 1757 return EBADF; 1758 fp = fdp->fd_ofiles[fd]; 1759 if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0)) 1760 return EBADF; 1761 if (iocb.aio_offset == -1LL) 1762 return EINVAL; 1763 1764 aiov.iov_base = (void *) iocb.aio_buf; 1765 aiov.iov_len = iocb.aio_nbytes; 1766 auio.uio_iov = &aiov; 1767 auio.uio_iovcnt = 1; 1768 auio.uio_offset = iocb.aio_offset; 1769 1770 auio.uio_resid = iocb.aio_nbytes; 1771 if (auio.uio_resid < 0) 1772 return (EINVAL); 1773 1774 auio.uio_rw = UIO_WRITE; 1775 auio.uio_segflg = UIO_USERSPACE; 1776 auio.uio_procp = p; 1777 1778 cnt = iocb.aio_nbytes; 1779 error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred); 1780 if (error) { 1781 if (auio.uio_resid != cnt) { 1782 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) 1783 error = 0; 1784 if (error == EPIPE) 1785 psignal(p, SIGPIPE); 1786 } 1787 } 1788 cnt -= auio.uio_resid; 1789 p->p_retval[0] = cnt; 1790 return error; 1791 } 1792 1793 int 1794 lio_listio(struct proc *p, struct lio_listio_args *uap) 1795 { 1796 int nent, nentqueued; 1797 struct aiocb *iocb, * const *cbptr; 1798 struct aiocblist *cb; 1799 struct kaioinfo *ki; 1800 struct aio_liojob *lj; 1801 int error, runningcode; 1802 int nerror; 1803 int i; 1804 int s; 1805 1806 if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) { 1807 return EINVAL; 1808 } 1809 1810 nent = uap->nent; 1811 if (nent > AIO_LISTIO_MAX) { 1812 return EINVAL; 1813 } 1814 1815 if (p->p_aioinfo == NULL) { 1816 aio_init_aioinfo(p); 1817 } 1818 1819 if ((nent + num_queue_count) > max_queue_count) { 1820 return EAGAIN; 1821 } 1822 1823 ki = p->p_aioinfo; 1824 if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) { 1825 return EAGAIN; 1826 } 1827 1828 lj = zalloc(aiolio_zone); 1829 if (!lj) { 1830 return EAGAIN; 1831 } 1832 1833 lj->lioj_flags = 0; 1834 lj->lioj_buffer_count = 0; 1835 lj->lioj_buffer_finished_count = 0; 1836 lj->lioj_queue_count = 0; 1837 lj->lioj_queue_finished_count = 0; 1838 lj->lioj_ki = ki; 1839 TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list); 1840 1841 /* 1842 * Setup signal 1843 */ 1844 if (uap->sig && (uap->mode == LIO_NOWAIT)) { 1845 error = copyin(uap->sig, &lj->lioj_signal, sizeof lj->lioj_signal); 1846 if (error) 1847 return error; 1848 lj->lioj_flags |= LIOJ_SIGNAL; 1849 lj->lioj_flags &= ~LIOJ_SIGNAL_POSTED; 1850 } else { 1851 lj->lioj_flags &= ~LIOJ_SIGNAL; 1852 } 1853 1854 /* 1855 * get pointers to the list of I/O requests 1856 */ 1857 1858 nerror = 0; 1859 nentqueued = 0; 1860 cbptr = uap->acb_list; 1861 for(i = 0; i < uap->nent; i++) { 1862 iocb = (struct aiocb *) (intptr_t) fuword((caddr_t) &cbptr[i]); 1863 if (((intptr_t) iocb != -1) && ((intptr_t) iocb != NULL)) { 1864 error = _aio_aqueue(p, iocb, lj, 0); 1865 if (error == 0) { 1866 nentqueued++; 1867 } else { 1868 nerror++; 1869 } 1870 } 1871 } 1872 1873 /* 1874 * If we haven't queued any, then just return error 1875 */ 1876 if (nentqueued == 0) { 1877 return 0; 1878 } 1879 1880 /* 1881 * Calculate the appropriate error return 1882 */ 1883 runningcode = 0; 1884 if (nerror) 1885 runningcode = EIO; 1886 1887 if (uap->mode == LIO_WAIT) { 1888 while (1) { 1889 int found; 1890 found = 0; 1891 for(i = 0; i < uap->nent; i++) { 1892 int jobref, command; 1893 1894 /* 1895 * Fetch address of the control buf pointer in user space 1896 */ 1897 iocb = (struct aiocb *) (intptr_t) fuword((caddr_t) &cbptr[i]); 1898 if (((intptr_t) iocb == -1) || ((intptr_t) iocb == 0)) 1899 continue; 1900 1901 /* 1902 * Fetch the associated command from user space 1903 */ 1904 command = fuword(&iocb->aio_lio_opcode); 1905 if (command == LIO_NOP) { 1906 found++; 1907 continue; 1908 } 1909 1910 jobref = fuword(&iocb->_aiocb_private.kernelinfo); 1911 1912 for (cb = TAILQ_FIRST(&ki->kaio_jobdone); 1913 cb; 1914 cb = TAILQ_NEXT(cb, plist)) { 1915 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == 1916 jobref) { 1917 if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { 1918 curproc->p_stats->p_ru.ru_oublock += 1919 cb->outputcharge; 1920 cb->outputcharge = 0; 1921 } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { 1922 curproc->p_stats->p_ru.ru_inblock += 1923 cb->inputcharge; 1924 cb->inputcharge = 0; 1925 } 1926 found++; 1927 break; 1928 } 1929 } 1930 1931 s = splbio(); 1932 for (cb = TAILQ_FIRST(&ki->kaio_bufdone); 1933 cb; 1934 cb = TAILQ_NEXT(cb, plist)) { 1935 if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == 1936 jobref) { 1937 found++; 1938 break; 1939 } 1940 } 1941 splx(s); 1942 1943 } 1944 1945 /* 1946 * If all I/Os have been disposed of, then we can return 1947 */ 1948 if (found == nentqueued) { 1949 return runningcode; 1950 } 1951 1952 ki->kaio_flags |= KAIO_WAKEUP; 1953 error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0); 1954 1955 if (error == EINTR) { 1956 return EINTR; 1957 } else if (error == EWOULDBLOCK) { 1958 return EAGAIN; 1959 } 1960 1961 } 1962 } 1963 1964 return runningcode; 1965 } 1966 1967 /* 1968 * This is a wierd hack so that we can post a signal. It is safe 1969 * to do so from a timeout routine, but *not* from an interrupt routine. 1970 */ 1971 static void 1972 process_signal(void *ljarg) 1973 { 1974 struct aio_liojob *lj = ljarg; 1975 if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL) { 1976 if (lj->lioj_queue_count == lj->lioj_queue_finished_count) { 1977 psignal(lj->lioj_ki->kaio_p, lj->lioj_signal.sigev_signo); 1978 lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 1979 } 1980 } 1981 } 1982 1983 /* 1984 * Interrupt handler for physio, performs the necessary process wakeups, 1985 * and signals. 1986 */ 1987 static void 1988 aio_physwakeup(bp) 1989 struct buf *bp; 1990 { 1991 struct aiocblist *aiocbe; 1992 struct proc *p; 1993 struct kaioinfo *ki; 1994 struct aio_liojob *lj; 1995 int s; 1996 s = splbio(); 1997 1998 wakeup((caddr_t) bp); 1999 bp->b_flags &= ~B_CALL; 2000 bp->b_flags |= B_DONE; 2001 2002 aiocbe = (struct aiocblist *)bp->b_spc; 2003 if (aiocbe) { 2004 p = bp->b_proc; 2005 2006 aiocbe->jobstate = JOBST_JOBBFINISHED; 2007 aiocbe->uaiocb._aiocb_private.status -= bp->b_resid; 2008 aiocbe->uaiocb._aiocb_private.error = 0; 2009 aiocbe->jobflags |= AIOCBLIST_DONE; 2010 2011 if (bp->b_flags & B_ERROR) { 2012 aiocbe->uaiocb._aiocb_private.error = bp->b_error; 2013 } 2014 2015 lj = aiocbe->lio; 2016 if (lj) { 2017 lj->lioj_buffer_finished_count++; 2018 /* 2019 * wakeup/signal if all of the interrupt jobs are done 2020 */ 2021 if (lj->lioj_buffer_finished_count == lj->lioj_buffer_count) { 2022 /* 2023 * post a signal if it is called for 2024 */ 2025 if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) == 2026 LIOJ_SIGNAL) { 2027 lj->lioj_flags |= LIOJ_SIGNAL_POSTED; 2028 timeout(process_signal, lj, 0); 2029 } 2030 } 2031 } 2032 2033 ki = p->p_aioinfo; 2034 if (ki) { 2035 ki->kaio_buffer_finished_count++; 2036 TAILQ_REMOVE(&aio_bufjobs, aiocbe, list); 2037 TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); 2038 TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist); 2039 /* 2040 * and do the wakeup 2041 */ 2042 if (ki->kaio_flags & (KAIO_RUNDOWN|KAIO_WAKEUP)) { 2043 ki->kaio_flags &= ~KAIO_WAKEUP; 2044 wakeup(p); 2045 } 2046 } 2047 } 2048 splx(s); 2049 } 2050