1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department, and code derived from software contributed to 9 * Berkeley by William Jolitz. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: mem.c 1.13 89/10/08$ 40 * from: @(#)mem.c 7.2 (Berkeley) 5/9/91 41 * $Id: mem.c,v 1.62 1999/05/30 16:52:04 phk Exp $ 42 */ 43 44 /* 45 * Memory special file 46 */ 47 48 #include "opt_devfs.h" 49 #include "opt_perfmon.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/conf.h> 54 #include <sys/buf.h> 55 #ifdef DEVFS 56 #include <sys/devfsext.h> 57 #endif /* DEVFS */ 58 #include <sys/kernel.h> 59 #include <sys/uio.h> 60 #include <sys/ioccom.h> 61 #include <sys/malloc.h> 62 #include <sys/memrange.h> 63 #include <sys/proc.h> 64 #include <sys/signalvar.h> 65 66 #include <machine/frame.h> 67 #include <machine/md_var.h> 68 #include <machine/random.h> 69 #include <machine/psl.h> 70 #include <machine/specialreg.h> 71 #ifdef PERFMON 72 #include <machine/perfmon.h> 73 #endif 74 #include <i386/isa/intr_machdep.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_prot.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_extern.h> 80 81 82 static d_open_t mmopen; 83 static d_close_t mmclose; 84 static d_read_t mmrw; 85 static d_ioctl_t mmioctl; 86 static d_mmap_t memmmap; 87 static d_poll_t mmpoll; 88 89 #define CDEV_MAJOR 2 90 static struct cdevsw mem_cdevsw = { 91 /* open */ mmopen, 92 /* close */ mmclose, 93 /* read */ mmrw, 94 /* write */ mmrw, 95 /* ioctl */ mmioctl, 96 /* stop */ nostop, 97 /* reset */ noreset, 98 /* devtotty */ nodevtotty, 99 /* poll */ mmpoll, 100 /* mmap */ memmmap, 101 /* strategy */ nostrategy, 102 /* name */ "mem", 103 /* parms */ noparms, 104 /* maj */ CDEV_MAJOR, 105 /* dump */ nodump, 106 /* psize */ nopsize, 107 /* flags */ 0, 108 /* maxio */ 0, 109 /* bmaj */ -1 110 }; 111 112 static struct random_softc random_softc[16]; 113 static caddr_t zbuf; 114 115 MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors"); 116 static int mem_ioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); 117 static int random_ioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); 118 119 struct mem_range_softc mem_range_softc; 120 121 #ifdef DEVFS 122 static void *mem_devfs_token; 123 static void *kmem_devfs_token; 124 static void *null_devfs_token; 125 static void *random_devfs_token; 126 static void *urandom_devfs_token; 127 static void *zero_devfs_token; 128 static void *io_devfs_token; 129 #ifdef PERFMON 130 static void *perfmon_devfs_token; 131 #endif 132 133 static void memdevfs_init __P((void)); 134 135 static void 136 memdevfs_init() 137 { 138 mem_devfs_token = 139 devfs_add_devswf(&mem_cdevsw, 0, DV_CHR, 140 UID_ROOT, GID_KMEM, 0640, "mem"); 141 kmem_devfs_token = 142 devfs_add_devswf(&mem_cdevsw, 1, DV_CHR, 143 UID_ROOT, GID_KMEM, 0640, "kmem"); 144 null_devfs_token = 145 devfs_add_devswf(&mem_cdevsw, 2, DV_CHR, 146 UID_ROOT, GID_WHEEL, 0666, "null"); 147 random_devfs_token = 148 devfs_add_devswf(&mem_cdevsw, 3, DV_CHR, 149 UID_ROOT, GID_WHEEL, 0644, "random"); 150 urandom_devfs_token = 151 devfs_add_devswf(&mem_cdevsw, 4, DV_CHR, 152 UID_ROOT, GID_WHEEL, 0644, "urandom"); 153 zero_devfs_token = 154 devfs_add_devswf(&mem_cdevsw, 12, DV_CHR, 155 UID_ROOT, GID_WHEEL, 0666, "zero"); 156 io_devfs_token = 157 devfs_add_devswf(&mem_cdevsw, 14, DV_CHR, 158 UID_ROOT, GID_WHEEL, 0600, "io"); 159 #ifdef PERFMON 160 perfmon_devfs_token = 161 devfs_add_devswf(&mem_cdevsw, 32, DV_CHR, 162 UID_ROOT, GID_KMEM, 0640, "perfmon"); 163 #endif /* PERFMON */ 164 } 165 #endif /* DEVFS */ 166 167 static int 168 mmclose(dev, flags, fmt, p) 169 dev_t dev; 170 int flags; 171 int fmt; 172 struct proc *p; 173 { 174 switch (minor(dev)) { 175 #ifdef PERFMON 176 case 32: 177 return perfmon_close(dev, flags, fmt, p); 178 #endif 179 case 14: 180 curproc->p_md.md_regs->tf_eflags &= ~PSL_IOPL; 181 break; 182 default: 183 break; 184 } 185 return(0); 186 } 187 188 static int 189 mmopen(dev, flags, fmt, p) 190 dev_t dev; 191 int flags; 192 int fmt; 193 struct proc *p; 194 { 195 int error; 196 197 switch (minor(dev)) { 198 case 32: 199 #ifdef PERFMON 200 return perfmon_open(dev, flags, fmt, p); 201 #else 202 return ENODEV; 203 #endif 204 case 14: 205 error = suser(p); 206 if (error != 0) 207 return (error); 208 if (securelevel > 0) 209 return (EPERM); 210 curproc->p_md.md_regs->tf_eflags |= PSL_IOPL; 211 break; 212 default: 213 break; 214 } 215 return(0); 216 } 217 218 static int 219 mmrw(dev, uio, flags) 220 dev_t dev; 221 struct uio *uio; 222 int flags; 223 { 224 register int o; 225 register u_int c, v; 226 u_int poolsize; 227 register struct iovec *iov; 228 int error = 0; 229 caddr_t buf = NULL; 230 231 while (uio->uio_resid > 0 && error == 0) { 232 iov = uio->uio_iov; 233 if (iov->iov_len == 0) { 234 uio->uio_iov++; 235 uio->uio_iovcnt--; 236 if (uio->uio_iovcnt < 0) 237 panic("mmrw"); 238 continue; 239 } 240 switch (minor(dev)) { 241 242 /* minor device 0 is physical memory */ 243 case 0: 244 v = uio->uio_offset; 245 pmap_enter(kernel_pmap, (vm_offset_t)ptvmmap, v, 246 uio->uio_rw == UIO_READ ? VM_PROT_READ : VM_PROT_WRITE, 247 TRUE); 248 o = (int)uio->uio_offset & PAGE_MASK; 249 c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK)); 250 c = min(c, (u_int)(PAGE_SIZE - o)); 251 c = min(c, (u_int)iov->iov_len); 252 error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio); 253 pmap_remove(kernel_pmap, (vm_offset_t)ptvmmap, 254 (vm_offset_t)&ptvmmap[PAGE_SIZE]); 255 continue; 256 257 /* minor device 1 is kernel memory */ 258 case 1: { 259 vm_offset_t addr, eaddr; 260 c = iov->iov_len; 261 262 /* 263 * Make sure that all of the pages are currently resident so 264 * that we don't create any zero-fill pages. 265 */ 266 addr = trunc_page(uio->uio_offset); 267 eaddr = round_page(uio->uio_offset + c); 268 269 if (addr < (vm_offset_t)VADDR(PTDPTDI, 0)) 270 return EFAULT; 271 if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0)) 272 return EFAULT; 273 for (; addr < eaddr; addr += PAGE_SIZE) 274 if (pmap_extract(kernel_pmap, addr) == 0) 275 return EFAULT; 276 277 if (!kernacc((caddr_t)(int)uio->uio_offset, c, 278 uio->uio_rw == UIO_READ ? B_READ : B_WRITE)) 279 return(EFAULT); 280 error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio); 281 continue; 282 } 283 284 /* minor device 2 is EOF/RATHOLE */ 285 case 2: 286 if (uio->uio_rw == UIO_READ) 287 return (0); 288 c = iov->iov_len; 289 break; 290 291 /* minor device 3 (/dev/random) is source of filth on read, rathole on write */ 292 case 3: 293 if (uio->uio_rw == UIO_WRITE) { 294 c = iov->iov_len; 295 break; 296 } 297 if (buf == NULL) 298 buf = (caddr_t) 299 malloc(PAGE_SIZE, M_TEMP, M_WAITOK); 300 c = min(iov->iov_len, PAGE_SIZE); 301 poolsize = read_random(buf, c); 302 if (poolsize == 0) { 303 if (buf) 304 free(buf, M_TEMP); 305 return (0); 306 } 307 c = min(c, poolsize); 308 error = uiomove(buf, (int)c, uio); 309 continue; 310 311 /* minor device 4 (/dev/urandom) is source of muck on read, rathole on write */ 312 case 4: 313 if (uio->uio_rw == UIO_WRITE) { 314 c = iov->iov_len; 315 break; 316 } 317 if (CURSIG(curproc) != 0) { 318 /* 319 * Use tsleep() to get the error code right. 320 * It should return immediately. 321 */ 322 error = tsleep(&random_softc[0], 323 PZERO | PCATCH, "urand", 1); 324 if (error != 0 && error != EWOULDBLOCK) 325 continue; 326 } 327 if (buf == NULL) 328 buf = (caddr_t) 329 malloc(PAGE_SIZE, M_TEMP, M_WAITOK); 330 c = min(iov->iov_len, PAGE_SIZE); 331 poolsize = read_random_unlimited(buf, c); 332 c = min(c, poolsize); 333 error = uiomove(buf, (int)c, uio); 334 continue; 335 336 /* minor device 12 (/dev/zero) is source of nulls on read, rathole on write */ 337 case 12: 338 if (uio->uio_rw == UIO_WRITE) { 339 c = iov->iov_len; 340 break; 341 } 342 if (zbuf == NULL) { 343 zbuf = (caddr_t) 344 malloc(PAGE_SIZE, M_TEMP, M_WAITOK); 345 bzero(zbuf, PAGE_SIZE); 346 } 347 c = min(iov->iov_len, PAGE_SIZE); 348 error = uiomove(zbuf, (int)c, uio); 349 continue; 350 351 #ifdef notyet 352 /* 386 I/O address space (/dev/ioport[bwl]) is a read/write access to seperate 353 i/o device address bus, different than memory bus. Semantics here are 354 very different than ordinary read/write, as if iov_len is a multiple 355 an implied string move from a single port will be done. Note that lseek 356 must be used to set the port number reliably. */ 357 case 14: 358 if (iov->iov_len == 1) { 359 u_char tmp; 360 tmp = inb(uio->uio_offset); 361 error = uiomove (&tmp, iov->iov_len, uio); 362 } else { 363 if (!useracc((caddr_t)iov->iov_base, 364 iov->iov_len, uio->uio_rw)) 365 return (EFAULT); 366 insb(uio->uio_offset, iov->iov_base, 367 iov->iov_len); 368 } 369 break; 370 case 15: 371 if (iov->iov_len == sizeof (short)) { 372 u_short tmp; 373 tmp = inw(uio->uio_offset); 374 error = uiomove (&tmp, iov->iov_len, uio); 375 } else { 376 if (!useracc((caddr_t)iov->iov_base, 377 iov->iov_len, uio->uio_rw)) 378 return (EFAULT); 379 insw(uio->uio_offset, iov->iov_base, 380 iov->iov_len/ sizeof (short)); 381 } 382 break; 383 case 16: 384 if (iov->iov_len == sizeof (long)) { 385 u_long tmp; 386 tmp = inl(uio->uio_offset); 387 error = uiomove (&tmp, iov->iov_len, uio); 388 } else { 389 if (!useracc((caddr_t)iov->iov_base, 390 iov->iov_len, uio->uio_rw)) 391 return (EFAULT); 392 insl(uio->uio_offset, iov->iov_base, 393 iov->iov_len/ sizeof (long)); 394 } 395 break; 396 #endif 397 398 default: 399 return (ENXIO); 400 } 401 if (error) 402 break; 403 iov->iov_base += c; 404 iov->iov_len -= c; 405 uio->uio_offset += c; 406 uio->uio_resid -= c; 407 } 408 if (buf) 409 free(buf, M_TEMP); 410 return (error); 411 } 412 413 414 415 416 /*******************************************************\ 417 * allow user processes to MMAP some memory sections * 418 * instead of going through read/write * 419 \*******************************************************/ 420 static int 421 memmmap(dev_t dev, vm_offset_t offset, int nprot) 422 { 423 switch (minor(dev)) 424 { 425 426 /* minor device 0 is physical memory */ 427 case 0: 428 return i386_btop(offset); 429 430 /* minor device 1 is kernel memory */ 431 case 1: 432 return i386_btop(vtophys(offset)); 433 434 default: 435 return -1; 436 } 437 } 438 439 static int 440 mmioctl(dev, cmd, data, flags, p) 441 dev_t dev; 442 u_long cmd; 443 caddr_t data; 444 int flags; 445 struct proc *p; 446 { 447 448 switch (minor(dev)) { 449 case 0: 450 return mem_ioctl(dev, cmd, data, flags, p); 451 case 3: 452 case 4: 453 return random_ioctl(dev, cmd, data, flags, p); 454 #ifdef PERFMON 455 case 32: 456 return perfmon_ioctl(dev, cmd, data, flags, p); 457 #endif 458 } 459 return (ENODEV); 460 } 461 462 /* 463 * Operations for changing memory attributes. 464 * 465 * This is basically just an ioctl shim for mem_range_attr_get 466 * and mem_range_attr_set. 467 */ 468 static int 469 mem_ioctl(dev, cmd, data, flags, p) 470 dev_t dev; 471 u_long cmd; 472 caddr_t data; 473 int flags; 474 struct proc *p; 475 { 476 int nd, error = 0; 477 struct mem_range_op *mo = (struct mem_range_op *)data; 478 struct mem_range_desc *md; 479 480 /* is this for us? */ 481 if ((cmd != MEMRANGE_GET) && 482 (cmd != MEMRANGE_SET)) 483 return(ENODEV); 484 485 /* any chance we can handle this? */ 486 if (mem_range_softc.mr_op == NULL) 487 return(EOPNOTSUPP); 488 489 /* do we have any descriptors? */ 490 if (mem_range_softc.mr_ndesc == 0) 491 return(ENXIO); 492 493 switch(cmd) { 494 case MEMRANGE_GET: 495 nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc); 496 if (nd > 0) { 497 md = (struct mem_range_desc *) 498 malloc(nd * sizeof(struct mem_range_desc), 499 M_MEMDESC, M_WAITOK); 500 mem_range_attr_get(md, &nd); 501 error = copyout(md, mo->mo_desc, 502 nd * sizeof(struct mem_range_desc)); 503 free(md, M_MEMDESC); 504 } else { 505 nd = mem_range_softc.mr_ndesc; 506 } 507 mo->mo_arg[0] = nd; 508 break; 509 510 case MEMRANGE_SET: 511 md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc), 512 M_MEMDESC, M_WAITOK); 513 error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc)); 514 /* clamp description string */ 515 md->mr_owner[sizeof(md->mr_owner) - 1] = 0; 516 if (error == 0) 517 error = mem_range_attr_set(md, &mo->mo_arg[0]); 518 free(md, M_MEMDESC); 519 break; 520 521 default: 522 error = EOPNOTSUPP; 523 } 524 return(error); 525 } 526 527 /* 528 * Implementation-neutral, kernel-callable functions for manipulating 529 * memory range attributes. 530 */ 531 void 532 mem_range_attr_get(struct mem_range_desc *mrd, int *arg) 533 { 534 if (*arg == 0) { 535 *arg = mem_range_softc.mr_ndesc; 536 } else { 537 bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc)); 538 } 539 } 540 541 int 542 mem_range_attr_set(struct mem_range_desc *mrd, int *arg) 543 { 544 return(mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg)); 545 } 546 547 #ifdef SMP 548 void 549 mem_range_AP_init(void) 550 { 551 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) 552 return(mem_range_softc.mr_op->initAP(&mem_range_softc)); 553 } 554 #endif 555 556 static int 557 random_ioctl(dev, cmd, data, flags, p) 558 dev_t dev; 559 u_long cmd; 560 caddr_t data; 561 int flags; 562 struct proc *p; 563 { 564 static intrmask_t interrupt_allowed; 565 intrmask_t interrupt_mask; 566 int error, intr; 567 struct random_softc *sc; 568 569 /* 570 * We're the random or urandom device. The only ioctls are for 571 * selecting and inspecting which interrupts are used in the muck 572 * gathering business. 573 */ 574 if (cmd != MEM_SETIRQ && cmd != MEM_CLEARIRQ && cmd != MEM_RETURNIRQ) 575 return (ENOTTY); 576 577 /* 578 * Even inspecting the state is privileged, since it gives a hint 579 * about how easily the randomness might be guessed. 580 */ 581 error = suser(p); 582 if (error != 0) 583 return (error); 584 585 /* 586 * XXX the data is 16-bit due to a historical botch, so we use 587 * magic 16's instead of ICU_LEN and can't support 24 interrupts 588 * under SMP. 589 */ 590 intr = *(int16_t *)data; 591 if (cmd != MEM_RETURNIRQ && (intr < 0 || intr >= 16)) 592 return (EINVAL); 593 594 interrupt_mask = 1 << intr; 595 sc = &random_softc[intr]; 596 switch (cmd) { 597 case MEM_SETIRQ: 598 if (interrupt_allowed & interrupt_mask) 599 break; 600 interrupt_allowed |= interrupt_mask; 601 sc->sc_intr = intr; 602 disable_intr(); 603 sc->sc_handler = intr_handler[intr]; 604 intr_handler[intr] = add_interrupt_randomness; 605 sc->sc_arg = intr_unit[intr]; 606 intr_unit[intr] = sc; 607 enable_intr(); 608 break; 609 case MEM_CLEARIRQ: 610 if (!(interrupt_allowed & interrupt_mask)) 611 break; 612 interrupt_allowed &= ~interrupt_mask; 613 disable_intr(); 614 intr_handler[intr] = sc->sc_handler; 615 intr_unit[intr] = sc->sc_arg; 616 enable_intr(); 617 break; 618 case MEM_RETURNIRQ: 619 *(u_int16_t *)data = interrupt_allowed; 620 break; 621 default: 622 return (ENOTTY); 623 } 624 return (0); 625 } 626 627 int 628 mmpoll(dev, events, p) 629 dev_t dev; 630 int events; 631 struct proc *p; 632 { 633 switch (minor(dev)) { 634 case 3: /* /dev/random */ 635 return random_poll(dev, events, p); 636 case 4: /* /dev/urandom */ 637 default: 638 return seltrue(dev, events, p); 639 } 640 } 641 642 /* 643 * Routine that identifies /dev/mem and /dev/kmem. 644 * 645 * A minimal stub routine can always return 0. 646 */ 647 int 648 iskmemdev(dev) 649 dev_t dev; 650 { 651 652 return ((major(dev) == mem_cdevsw.d_maj) 653 && (minor(dev) == 0 || minor(dev) == 1)); 654 } 655 656 int 657 iszerodev(dev) 658 dev_t dev; 659 { 660 return ((major(dev) == mem_cdevsw.d_maj) 661 && minor(dev) == 12); 662 } 663 664 665 666 static int mem_devsw_installed; 667 668 static void 669 mem_drvinit(void *unused) 670 { 671 672 /* Initialise memory range handling */ 673 if (mem_range_softc.mr_op != NULL) 674 mem_range_softc.mr_op->init(&mem_range_softc); 675 676 /* device registration */ 677 if( ! mem_devsw_installed ) { 678 cdevsw_add(&mem_cdevsw); 679 mem_devsw_installed = 1; 680 #ifdef DEVFS 681 memdevfs_init(); 682 #endif 683 } 684 } 685 686 SYSINIT(memdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,mem_drvinit,NULL) 687 688