1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 /* 28 * Ramdisk device driver. 29 * 30 * There are two types of ramdisk: 'real' OBP-created ramdisks, and 'pseudo' 31 * ramdisks created at runtime with no corresponding OBP device node. The 32 * ramdisk(7D) driver is capable of dealing with both, and with the creation 33 * and deletion of 'pseudo' ramdisks. 34 * 35 * Every ramdisk has a single 'state' structure which maintains data for 36 * that ramdisk, and is assigned a single minor number. The bottom 10-bits 37 * of the minor number index the state structures; the top 8-bits give a 38 * 'real OBP disk' number, i.e. they are zero for 'pseudo' ramdisks. Thus 39 * it is possible to distinguish 'real' from 'pseudo' ramdisks using the 40 * top 8-bits of the minor number. 41 * 42 * Each OBP-created ramdisk has its own node in the device tree with an 43 * "existing" property which describes the one-or-more physical address ranges 44 * assigned to the ramdisk. All 'pseudo' ramdisks share a common devinfo 45 * structure. 46 * 47 * A single character device node is used by ramdiskadm(1M) to communicate 48 * with the ramdisk driver, with minor number 0: 49 * 50 * /dev/ramdiskctl -> /devices/pseudo/ramdisk@0:ctl 51 * 52 * For consistent access, block and raw device nodes are created for *every* 53 * ramdisk. For 'pseudo' ramdisks: 54 * 55 * /dev/ramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname> 56 * /dev/rramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname>,raw 57 * 58 * For OBP-created ramdisks: 59 * 60 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a 61 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a,raw 62 * 63 * This allows the transition from the standalone to the kernel to proceed 64 * when booting from a ramdisk, and for the installation to correctly identify 65 * the root device. 66 */ 67 68 #include <sys/types.h> 69 #include <sys/param.h> 70 #include <sys/sysmacros.h> 71 #include <sys/errno.h> 72 #include <sys/uio.h> 73 #include <sys/buf.h> 74 #include <sys/modctl.h> 75 #include <sys/open.h> 76 #include <sys/kmem.h> 77 #include <sys/poll.h> 78 #include <sys/conf.h> 79 #include <sys/cmn_err.h> 80 #include <sys/stat.h> 81 #include <sys/file.h> 82 #include <sys/ddi.h> 83 #include <sys/sunddi.h> 84 #include <sys/ramdisk.h> 85 #include <vm/seg_kmem.h> 86 87 /* 88 * An opaque handle where information about our set of ramdisk devices lives. 89 */ 90 static void *rd_statep; 91 92 /* 93 * Pointer to devinfo for the 'pseudo' ramdisks. Real OBP-created ramdisks 94 * get their own individual devinfo. 95 */ 96 static dev_info_t *rd_dip = NULL; 97 98 /* 99 * Global state lock. 100 */ 101 static kmutex_t rd_lock; 102 103 /* 104 * Maximum number of ramdisks supported by this driver. 105 */ 106 static uint32_t rd_max_disks = RD_DFLT_DISKS; 107 108 /* 109 * Percentage of physical memory which can be assigned to pseudo ramdisks, 110 * what that equates to in pages, and how many pages are currently assigned. 111 */ 112 static uint_t rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 113 static pgcnt_t rd_max_physmem; 114 static pgcnt_t rd_tot_physmem; 115 116 static uint_t rd_maxphys = RD_DEFAULT_MAXPHYS; 117 118 /* 119 * Is the driver busy, i.e. are there any pseudo ramdisk devices in existence? 120 */ 121 static int 122 rd_is_busy(void) 123 { 124 minor_t minor; 125 rd_devstate_t *rsp; 126 127 ASSERT(mutex_owned(&rd_lock)); 128 for (minor = 1; minor <= rd_max_disks; ++minor) { 129 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 130 rsp->rd_dip == rd_dip) { 131 return (EBUSY); 132 } 133 } 134 return (0); 135 } 136 137 /* 138 * Find the first free minor number; returns zero if there isn't one. 139 */ 140 static minor_t 141 rd_find_free_minor(void) 142 { 143 minor_t minor; 144 145 ASSERT(mutex_owned(&rd_lock)); 146 for (minor = 1; minor <= rd_max_disks; ++minor) { 147 if (ddi_get_soft_state(rd_statep, minor) == NULL) { 148 return (minor); 149 } 150 } 151 return (0); 152 } 153 154 /* 155 * Locate the rd_devstate for the named ramdisk; returns NULL if not found. 156 * Each ramdisk is identified uniquely by name, i.e. an OBP-created ramdisk 157 * cannot have the same name as a pseudo ramdisk. 158 */ 159 static rd_devstate_t * 160 rd_find_named_disk(char *name) 161 { 162 minor_t minor; 163 rd_devstate_t *rsp; 164 165 ASSERT(mutex_owned(&rd_lock)); 166 for (minor = 1; minor <= rd_max_disks; ++minor) { 167 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 168 strcmp(rsp->rd_name, name) == 0) { 169 return (rsp); 170 } 171 } 172 return (NULL); 173 } 174 175 /* 176 * Locate the rd_devstate for the real OBP-created ramdisk whose devinfo 177 * is referenced by 'dip'; returns NULL if not found (shouldn't happen). 178 */ 179 static rd_devstate_t * 180 rd_find_dip_state(dev_info_t *dip) 181 { 182 minor_t minor; 183 rd_devstate_t *rsp; 184 185 ASSERT(mutex_owned(&rd_lock)); 186 for (minor = 1; minor <= rd_max_disks; ++minor) { 187 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 188 rsp->rd_dip == dip) { 189 return (rsp); 190 } 191 } 192 return (NULL); 193 } 194 195 /* 196 * Is the ramdisk open? 197 */ 198 static int 199 rd_is_open(rd_devstate_t *rsp) 200 { 201 ASSERT(mutex_owned(&rd_lock)); 202 return (rsp->rd_chr_open || rsp->rd_blk_open || rsp->rd_lyr_open_cnt); 203 } 204 205 /* 206 * Mark the ramdisk open. 207 */ 208 static int 209 rd_opened(rd_devstate_t *rsp, int otyp) 210 { 211 ASSERT(mutex_owned(&rd_lock)); 212 switch (otyp) { 213 case OTYP_CHR: 214 rsp->rd_chr_open = 1; 215 break; 216 case OTYP_BLK: 217 rsp->rd_blk_open = 1; 218 break; 219 case OTYP_LYR: 220 rsp->rd_lyr_open_cnt++; 221 break; 222 default: 223 return (-1); 224 } 225 return (0); 226 } 227 228 /* 229 * Mark the ramdisk closed. 230 */ 231 static void 232 rd_closed(rd_devstate_t *rsp, int otyp) 233 { 234 ASSERT(mutex_owned(&rd_lock)); 235 switch (otyp) { 236 case OTYP_CHR: 237 rsp->rd_chr_open = 0; 238 break; 239 case OTYP_BLK: 240 rsp->rd_blk_open = 0; 241 break; 242 case OTYP_LYR: 243 rsp->rd_lyr_open_cnt--; 244 break; 245 default: 246 break; 247 } 248 } 249 250 static void 251 rd_init_tuneables(void) 252 { 253 char *prop, *p; 254 255 /* 256 * Ensure sanity of 'rd_max_disks', which may be tuned in ramdisk.conf. 257 */ 258 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 259 "max_disks", &prop) == DDI_PROP_SUCCESS) { 260 p = prop; 261 rd_max_disks = (uint32_t)stoi(&p); 262 ddi_prop_free(prop); 263 } 264 if (rd_max_disks >= RD_MAX_DISKS) { 265 cmn_err(CE_WARN, "ramdisk: rd_max_disks (%u) too big;" 266 " using default (%u).", rd_max_disks, RD_MAX_DISKS - 1); 267 268 rd_max_disks = RD_MAX_DISKS - 1; 269 } 270 271 /* 272 * Ensure sanity of 'rd_percent_physmem', which may be tuned 273 * in ramdisk.conf. 274 */ 275 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 276 "percent_physmem", &prop) == DDI_PROP_SUCCESS) { 277 p = prop; 278 rd_percent_physmem = (uint_t)stoi(&p); 279 ddi_prop_free(prop); 280 } 281 if (rd_percent_physmem >= 100) { 282 cmn_err(CE_WARN, "ramdisk: rd_percent_physmem (%u) >= 100;" 283 " using default (%u%%).", rd_percent_physmem, 284 RD_DEFAULT_PERCENT_PHYSMEM); 285 286 rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 287 } 288 289 /* 290 * Since availrmem_initial is a long, this won't overflow. 291 */ 292 rd_max_physmem = (availrmem_initial * rd_percent_physmem) / 100; 293 } 294 295 /* 296 * Allocate enough physical pages to hold "npages" pages. Returns an 297 * array of page_t * pointers that can later be mapped in or out via 298 * rd_{un}map_window() but is otherwise opaque, or NULL on failure. 299 */ 300 page_t ** 301 rd_phys_alloc(pgcnt_t npages) 302 { 303 page_t *pp, **ppa; 304 spgcnt_t i; 305 size_t ppalen; 306 struct seg kseg; 307 caddr_t addr; /* For coloring */ 308 309 if (rd_tot_physmem + npages > rd_max_physmem) 310 return (NULL); 311 312 if (!page_resv(npages, KM_NOSLEEP)) 313 return (NULL); 314 315 if (!page_create_wait(npages, 0)) { 316 page_unresv(npages); 317 return (NULL); 318 } 319 320 ppalen = npages * sizeof (struct page_t *); 321 ppa = kmem_zalloc(ppalen, KM_NOSLEEP); 322 if (ppa == NULL) { 323 page_create_putback(npages); 324 page_unresv(npages); 325 return (NULL); 326 } 327 328 kseg.s_as = &kas; 329 for (i = 0, addr = NULL; i < npages; ++i, addr += PAGESIZE) { 330 pp = page_get_freelist(&kvp, 0, &kseg, addr, PAGESIZE, 0, NULL); 331 if (pp == NULL) { 332 pp = page_get_cachelist(&kvp, 0, &kseg, addr, 0, NULL); 333 if (pp == NULL) 334 goto out; 335 if (!PP_ISAGED(pp)) 336 page_hashout(pp, NULL); 337 } 338 339 PP_CLRFREE(pp); 340 PP_CLRAGED(pp); 341 ppa[i] = pp; 342 } 343 344 for (i = 0; i < npages; i++) 345 page_downgrade(ppa[i]); 346 rd_tot_physmem += npages; 347 348 return (ppa); 349 350 out: 351 ASSERT(i < npages); 352 page_create_putback(npages - i); 353 while (--i >= 0) 354 page_free(ppa[i], 0); 355 kmem_free(ppa, ppalen); 356 page_unresv(npages); 357 358 return (NULL); 359 } 360 361 /* 362 * Free physical pages previously allocated via rd_phys_alloc(); note that 363 * this function may block as it has to wait until it can exclusively lock 364 * all the pages first. 365 */ 366 static void 367 rd_phys_free(page_t **ppa, pgcnt_t npages) 368 { 369 pgcnt_t i; 370 size_t ppalen = npages * sizeof (struct page_t *); 371 372 for (i = 0; i < npages; ++i) { 373 if (! page_tryupgrade(ppa[i])) { 374 page_unlock(ppa[i]); 375 while (! page_lock(ppa[i], SE_EXCL, NULL, P_RECLAIM)) 376 ; 377 } 378 page_free(ppa[i], 0); 379 } 380 381 kmem_free(ppa, ppalen); 382 383 page_unresv(npages); 384 rd_tot_physmem -= npages; 385 } 386 387 /* 388 * Remove a window mapping (if present). 389 */ 390 static void 391 rd_unmap_window(rd_devstate_t *rsp) 392 { 393 ASSERT(rsp->rd_window_obp == 0); 394 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 395 hat_unload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 396 HAT_UNLOAD_UNLOCK); 397 } 398 } 399 400 /* 401 * Map a portion of the ramdisk into the virtual window. 402 */ 403 static void 404 rd_map_window(rd_devstate_t *rsp, off_t offset) 405 { 406 pgcnt_t offpgs = btop(offset); 407 408 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 409 /* 410 * Already mapped; is offset within our window? 411 */ 412 if (offset >= rsp->rd_window_base && 413 offset < rsp->rd_window_base + rsp->rd_window_size) { 414 return; 415 } 416 417 /* 418 * No, we need to re-map; toss the old mapping. 419 */ 420 rd_unmap_window(rsp); 421 } 422 rsp->rd_window_base = ptob(offpgs); 423 424 /* 425 * Different algorithms depending on whether this is a real 426 * OBP-created ramdisk, or a pseudo ramdisk. 427 */ 428 if (rsp->rd_dip == rd_dip) { 429 pgcnt_t pi, lastpi; 430 caddr_t vaddr; 431 432 /* 433 * Find the range of pages which should be mapped. 434 */ 435 pi = offpgs; 436 lastpi = pi + btopr(rsp->rd_window_size); 437 if (lastpi > rsp->rd_npages) { 438 lastpi = rsp->rd_npages; 439 } 440 441 /* 442 * Load the mapping. 443 */ 444 vaddr = rsp->rd_window_virt; 445 for (; pi < lastpi; ++pi) { 446 hat_memload(kas.a_hat, vaddr, rsp->rd_ppa[pi], 447 (PROT_READ | PROT_WRITE) | HAT_NOSYNC, 448 HAT_LOAD_LOCK); 449 vaddr += ptob(1); 450 } 451 } else { 452 uint_t i; 453 pfn_t pfn; 454 455 /* 456 * Real OBP-created ramdisk: locate the physical range which 457 * contains this offset. 458 */ 459 for (i = 0; i < rsp->rd_nexisting; ++i) { 460 if (offset < rsp->rd_existing[i].size) { 461 break; 462 } 463 offset -= rsp->rd_existing[i].size; 464 } 465 ASSERT(i < rsp->rd_nexisting); 466 467 /* 468 * Load the mapping. 469 */ 470 pfn = btop(rsp->rd_existing[i].phys + offset); 471 hat_devload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 472 pfn, (PROT_READ | PROT_WRITE), 473 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 474 } 475 } 476 477 /* 478 * Fakes up a disk geometry, and one big partition, based on the size 479 * of the file. This is needed because we allow newfs'ing the device, 480 * and newfs will do several disk ioctls to figure out the geometry and 481 * partition information. It uses that information to determine the parameters 482 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 483 * have to support it. 484 * 485 * Stolen from lofi.c - should maybe split out common code sometime. 486 */ 487 static void 488 rd_fake_disk_geometry(rd_devstate_t *rsp) 489 { 490 /* dk_geom - see dkio(7I) */ 491 /* 492 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 493 * of sectors), but that breaks programs like fdisk which want to 494 * partition a disk by cylinder. With one cylinder, you can't create 495 * an fdisk partition and put pcfs on it for testing (hard to pick 496 * a number between one and one). 497 * 498 * The cheezy floppy test is an attempt to not have too few cylinders 499 * for a small file, or so many on a big file that you waste space 500 * for backup superblocks or cylinder group structures. 501 */ 502 if (rsp->rd_size < (2 * 1024 * 1024)) /* floppy? */ 503 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (100 * 1024); 504 else 505 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (300 * 1024); 506 /* in case file file is < 100k */ 507 if (rsp->rd_dkg.dkg_ncyl == 0) 508 rsp->rd_dkg.dkg_ncyl = 1; 509 rsp->rd_dkg.dkg_acyl = 0; 510 rsp->rd_dkg.dkg_bcyl = 0; 511 rsp->rd_dkg.dkg_nhead = 1; 512 rsp->rd_dkg.dkg_obs1 = 0; 513 rsp->rd_dkg.dkg_intrlv = 0; 514 rsp->rd_dkg.dkg_obs2 = 0; 515 rsp->rd_dkg.dkg_obs3 = 0; 516 rsp->rd_dkg.dkg_apc = 0; 517 rsp->rd_dkg.dkg_rpm = 7200; 518 rsp->rd_dkg.dkg_pcyl = rsp->rd_dkg.dkg_ncyl + rsp->rd_dkg.dkg_acyl; 519 rsp->rd_dkg.dkg_nsect = rsp->rd_size / 520 (DEV_BSIZE * rsp->rd_dkg.dkg_ncyl); 521 rsp->rd_dkg.dkg_write_reinstruct = 0; 522 rsp->rd_dkg.dkg_read_reinstruct = 0; 523 524 /* vtoc - see dkio(7I) */ 525 bzero(&rsp->rd_vtoc, sizeof (struct vtoc)); 526 rsp->rd_vtoc.v_sanity = VTOC_SANE; 527 rsp->rd_vtoc.v_version = V_VERSION; 528 bcopy(RD_DRIVER_NAME, rsp->rd_vtoc.v_volume, 7); 529 rsp->rd_vtoc.v_sectorsz = DEV_BSIZE; 530 rsp->rd_vtoc.v_nparts = 1; 531 rsp->rd_vtoc.v_part[0].p_tag = V_UNASSIGNED; 532 rsp->rd_vtoc.v_part[0].p_flag = V_UNMNT; 533 rsp->rd_vtoc.v_part[0].p_start = (daddr_t)0; 534 /* 535 * The partition size cannot just be the number of sectors, because 536 * that might not end on a cylinder boundary. And if that's the case, 537 * newfs/mkfs will print a scary warning. So just figure the size 538 * based on the number of cylinders and sectors/cylinder. 539 */ 540 rsp->rd_vtoc.v_part[0].p_size = rsp->rd_dkg.dkg_pcyl * 541 rsp->rd_dkg.dkg_nsect * rsp->rd_dkg.dkg_nhead; 542 543 /* dk_cinfo - see dkio(7I) */ 544 bzero(&rsp->rd_ci, sizeof (struct dk_cinfo)); 545 (void) strcpy(rsp->rd_ci.dki_cname, RD_DRIVER_NAME); 546 rsp->rd_ci.dki_ctype = DKC_MD; 547 rsp->rd_ci.dki_flags = 0; 548 rsp->rd_ci.dki_cnum = 0; 549 rsp->rd_ci.dki_addr = 0; 550 rsp->rd_ci.dki_space = 0; 551 rsp->rd_ci.dki_prio = 0; 552 rsp->rd_ci.dki_vec = 0; 553 (void) strcpy(rsp->rd_ci.dki_dname, RD_DRIVER_NAME); 554 rsp->rd_ci.dki_unit = 0; 555 rsp->rd_ci.dki_slave = 0; 556 rsp->rd_ci.dki_partition = 0; 557 /* 558 * newfs uses this to set maxcontig. Must not be < 16, or it 559 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 560 * it by the block size. Then tunefs doesn't work because 561 * maxcontig is 0. 562 */ 563 rsp->rd_ci.dki_maxtransfer = 16; 564 } 565 566 /* 567 * Deallocate resources (virtual and physical, device nodes, structures) 568 * from a ramdisk. 569 */ 570 static void 571 rd_dealloc_resources(rd_devstate_t *rsp) 572 { 573 dev_info_t *dip = rsp->rd_dip; 574 char namebuf[RD_NAME_LEN + 5]; 575 dev_t fulldev; 576 577 if (rsp->rd_window_obp == 0 && rsp->rd_window_virt != NULL) { 578 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 579 rd_unmap_window(rsp); 580 } 581 vmem_free(heap_arena, rsp->rd_window_virt, rsp->rd_window_size); 582 } 583 mutex_destroy(&rsp->rd_device_lock); 584 585 if (rsp->rd_existing) { 586 ddi_prop_free(rsp->rd_existing); 587 } 588 if (rsp->rd_ppa != NULL) { 589 rd_phys_free(rsp->rd_ppa, rsp->rd_npages); 590 } 591 592 /* 593 * Remove the block and raw device nodes. 594 */ 595 if (dip == rd_dip) { 596 (void) snprintf(namebuf, sizeof (namebuf), "%s", 597 rsp->rd_name); 598 ddi_remove_minor_node(dip, namebuf); 599 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 600 rsp->rd_name); 601 ddi_remove_minor_node(dip, namebuf); 602 } else { 603 ddi_remove_minor_node(dip, "a"); 604 ddi_remove_minor_node(dip, "a,raw"); 605 } 606 607 /* 608 * Remove the "Size" and "Nblocks" properties. 609 */ 610 fulldev = makedevice(ddi_driver_major(dip), rsp->rd_minor); 611 (void) ddi_prop_remove(fulldev, dip, SIZE_PROP_NAME); 612 (void) ddi_prop_remove(fulldev, dip, NBLOCKS_PROP_NAME); 613 614 if (rsp->rd_kstat) { 615 kstat_delete(rsp->rd_kstat); 616 mutex_destroy(&rsp->rd_kstat_lock); 617 } 618 619 ddi_soft_state_free(rd_statep, rsp->rd_minor); 620 } 621 622 /* 623 * Allocate resources (virtual and physical, device nodes, structures) 624 * to a ramdisk. 625 */ 626 static rd_devstate_t * 627 rd_alloc_resources(char *name, uint_t addr, size_t size, dev_info_t *dip) 628 { 629 minor_t minor; 630 rd_devstate_t *rsp; 631 char namebuf[RD_NAME_LEN + 5]; 632 dev_t fulldev; 633 int64_t Nblocks_prop_val; 634 int64_t Size_prop_val; 635 636 minor = rd_find_free_minor(); 637 if (ddi_soft_state_zalloc(rd_statep, minor) == DDI_FAILURE) { 638 return (NULL); 639 } 640 rsp = ddi_get_soft_state(rd_statep, minor); 641 642 (void) strcpy(rsp->rd_name, name); 643 rsp->rd_dip = dip; 644 rsp->rd_minor = minor; 645 rsp->rd_size = size; 646 647 /* 648 * Allocate virtual window onto ramdisk. 649 */ 650 mutex_init(&rsp->rd_device_lock, NULL, MUTEX_DRIVER, NULL); 651 if (addr == 0) { 652 rsp->rd_window_obp = 0; 653 rsp->rd_window_base = RD_WINDOW_NOT_MAPPED; 654 rsp->rd_window_size = PAGESIZE; 655 rsp->rd_window_virt = vmem_alloc(heap_arena, 656 rsp->rd_window_size, VM_SLEEP); 657 if (rsp->rd_window_virt == NULL) { 658 goto create_failed; 659 } 660 } else { 661 rsp->rd_window_obp = 1; 662 rsp->rd_window_base = 0; 663 rsp->rd_window_size = size; 664 rsp->rd_window_virt = (caddr_t)((ulong_t)addr); 665 } 666 667 /* 668 * Allocate physical memory for non-OBP ramdisks. 669 * Create pseudo block and raw device nodes. 670 */ 671 if (dip == rd_dip) { 672 rsp->rd_npages = btopr(size); 673 rsp->rd_ppa = rd_phys_alloc(rsp->rd_npages); 674 if (rsp->rd_ppa == NULL) { 675 goto create_failed; 676 } 677 678 /* 679 * For non-OBP ramdisks the device nodes are: 680 * 681 * /devices/pseudo/ramdisk@0:<diskname> 682 * /devices/pseudo/ramdisk@0:<diskname>,raw 683 */ 684 (void) snprintf(namebuf, sizeof (namebuf), "%s", 685 rsp->rd_name); 686 if (ddi_create_minor_node(dip, namebuf, S_IFBLK, minor, 687 DDI_PSEUDO, 0) == DDI_FAILURE) { 688 goto create_failed; 689 } 690 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 691 rsp->rd_name); 692 if (ddi_create_minor_node(dip, namebuf, S_IFCHR, minor, 693 DDI_PSEUDO, 0) == DDI_FAILURE) { 694 goto create_failed; 695 } 696 } else { 697 /* 698 * For OBP-created ramdisks the device nodes are: 699 * 700 * /devices/ramdisk-<diskname>:a 701 * /devices/ramdisk-<diskname>:a,raw 702 */ 703 if (ddi_create_minor_node(dip, "a", S_IFBLK, minor, 704 DDI_PSEUDO, 0) == DDI_FAILURE) { 705 goto create_failed; 706 } 707 if (ddi_create_minor_node(dip, "a,raw", S_IFCHR, minor, 708 DDI_PSEUDO, 0) == DDI_FAILURE) { 709 goto create_failed; 710 } 711 } 712 713 /* 714 * Create the "Size" and "Nblocks" properties. 715 */ 716 fulldev = makedevice(ddi_driver_major(dip), minor); 717 Size_prop_val = size; 718 if ((ddi_prop_update_int64(fulldev, dip, 719 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 720 goto create_failed; 721 } 722 Nblocks_prop_val = size / DEV_BSIZE; 723 if ((ddi_prop_update_int64(fulldev, dip, 724 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 725 goto create_failed; 726 } 727 728 /* 729 * Allocate kstat stuff. 730 */ 731 rsp->rd_kstat = kstat_create(RD_DRIVER_NAME, minor, NULL, 732 "disk", KSTAT_TYPE_IO, 1, 0); 733 if (rsp->rd_kstat) { 734 mutex_init(&rsp->rd_kstat_lock, NULL, 735 MUTEX_DRIVER, NULL); 736 rsp->rd_kstat->ks_lock = &rsp->rd_kstat_lock; 737 kstat_install(rsp->rd_kstat); 738 } 739 740 rd_fake_disk_geometry(rsp); 741 742 return (rsp); 743 744 create_failed: 745 /* 746 * Cleanup. 747 */ 748 rd_dealloc_resources(rsp); 749 750 return (NULL); 751 } 752 753 /* 754 * Undo what we did in rd_attach, freeing resources and removing things which 755 * we installed. The system framework guarantees we are not active with this 756 * devinfo node in any other entry points at this time. 757 */ 758 static int 759 rd_common_detach(dev_info_t *dip) 760 { 761 if (dip == rd_dip) { 762 /* 763 * Pseudo node: can't detach if any pseudo ramdisks exist. 764 */ 765 if (rd_is_busy()) { 766 return (DDI_FAILURE); 767 } 768 ddi_soft_state_free(rd_statep, RD_CTL_MINOR); 769 rd_dip = NULL; 770 } else { 771 /* 772 * A 'real' ramdisk; find the state and free resources. 773 */ 774 rd_devstate_t *rsp; 775 776 if ((rsp = rd_find_dip_state(dip)) != NULL) { 777 rd_dealloc_resources(rsp); 778 } 779 } 780 ddi_remove_minor_node(dip, NULL); 781 782 return (DDI_SUCCESS); 783 } 784 785 static int 786 rd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 787 { 788 char *name; 789 rd_existing_t *ep = NULL; 790 uint_t obpaddr = 0, nep, i; 791 size_t size = 0; 792 rd_devstate_t *rsp; 793 794 switch (cmd) { 795 796 case DDI_ATTACH: 797 mutex_enter(&rd_lock); 798 799 /* 800 * For pseudo ramdisk devinfo set up state 0 and :ctl device; 801 * else it's an OBP-created ramdisk. 802 */ 803 if (is_pseudo_device(dip)) { 804 rd_dip = dip; 805 rd_init_tuneables(); 806 807 /* 808 * The zeroth minor is reserved for the ramdisk 809 * 'control' device. 810 */ 811 if (ddi_soft_state_zalloc(rd_statep, RD_CTL_MINOR) == 812 DDI_FAILURE) { 813 goto attach_failed; 814 } 815 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 816 rsp->rd_dip = dip; 817 818 if (ddi_create_minor_node(dip, RD_CTL_NODE, 819 S_IFCHR, 0, DDI_PSEUDO, NULL) == DDI_FAILURE) { 820 goto attach_failed; 821 } 822 } else { 823 RD_STRIP_PREFIX(name, ddi_node_name(dip)); 824 825 if (strlen(name) > RD_NAME_LEN) { 826 cmn_err(CE_CONT, 827 "%s: name too long - ignoring\n", name); 828 goto attach_failed; 829 } 830 831 /* 832 * An OBP-created ramdisk must have an 'existing' 833 * property; get and check it. 834 */ 835 if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, dip, 836 DDI_PROP_DONTPASS, OBP_EXISTING_PROP_NAME, 837 (uchar_t **)&ep, &nep) == DDI_SUCCESS) { 838 839 if (nep == 0 || (nep % sizeof (*ep)) != 0) { 840 cmn_err(CE_CONT, 841 "%s: " OBP_EXISTING_PROP_NAME 842 " illegal size\n", name); 843 goto attach_failed; 844 } 845 nep /= sizeof (*ep); 846 847 /* 848 * Calculate the size of the ramdisk. 849 */ 850 for (i = 0; i < nep; ++i) { 851 size += ep[i].size; 852 } 853 } else if ((obpaddr = ddi_prop_get_int(DDI_DEV_T_ANY, 854 dip, DDI_PROP_DONTPASS, OBP_ADDRESS_PROP_NAME, 855 0)) != 0) { 856 857 size = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 858 DDI_PROP_DONTPASS, OBP_SIZE_PROP_NAME, 0); 859 } else { 860 cmn_err(CE_CONT, "%s: missing OBP properties\n", 861 name); 862 goto attach_failed; 863 } 864 865 /* 866 * Allocate driver resources for the ramdisk. 867 */ 868 if ((rsp = rd_alloc_resources(name, obpaddr, size, 869 dip)) == NULL) { 870 goto attach_failed; 871 } 872 873 rsp->rd_existing = ep; 874 rsp->rd_nexisting = nep; 875 } 876 877 mutex_exit(&rd_lock); 878 879 ddi_report_dev(dip); 880 881 return (DDI_SUCCESS); 882 883 case DDI_RESUME: 884 return (DDI_SUCCESS); 885 886 default: 887 return (DDI_FAILURE); 888 } 889 890 attach_failed: 891 /* 892 * Use our common detach routine to unallocate any stuff which 893 * was allocated above. 894 */ 895 (void) rd_common_detach(dip); 896 mutex_exit(&rd_lock); 897 898 if (ep != NULL) { 899 ddi_prop_free(ep); 900 } 901 return (DDI_FAILURE); 902 } 903 904 static int 905 rd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 906 { 907 int e; 908 909 switch (cmd) { 910 911 case DDI_DETACH: 912 mutex_enter(&rd_lock); 913 e = rd_common_detach(dip); 914 mutex_exit(&rd_lock); 915 916 return (e); 917 918 case DDI_SUSPEND: 919 return (DDI_SUCCESS); 920 921 default: 922 return (DDI_FAILURE); 923 } 924 } 925 926 /*ARGSUSED*/ 927 static int 928 rd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 929 { 930 rd_devstate_t *rsp; 931 932 switch (infocmd) { 933 case DDI_INFO_DEVT2DEVINFO: 934 if ((rsp = ddi_get_soft_state(rd_statep, 935 getminor((dev_t)arg))) != NULL) { 936 *result = rsp->rd_dip; 937 return (DDI_SUCCESS); 938 } 939 *result = NULL; 940 return (DDI_FAILURE); 941 942 case DDI_INFO_DEVT2INSTANCE: 943 if ((rsp = ddi_get_soft_state(rd_statep, 944 getminor((dev_t)arg))) != NULL) { 945 *result = (void *)(uintptr_t) 946 ddi_get_instance(rsp->rd_dip); 947 return (DDI_SUCCESS); 948 } 949 *result = NULL; 950 return (DDI_FAILURE); 951 952 default: 953 return (DDI_FAILURE); 954 } 955 } 956 957 /*ARGSUSED3*/ 958 static int 959 rd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 960 { 961 minor_t minor; 962 rd_devstate_t *rsp; 963 964 mutex_enter(&rd_lock); 965 966 minor = getminor(*devp); 967 if (minor == RD_CTL_MINOR) { 968 /* 969 * Master control device; must be opened exclusively. 970 */ 971 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) { 972 mutex_exit(&rd_lock); 973 return (EINVAL); 974 } 975 976 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 977 if (rsp == NULL) { 978 mutex_exit(&rd_lock); 979 return (ENXIO); 980 } 981 982 if (rd_is_open(rsp)) { 983 mutex_exit(&rd_lock); 984 return (EBUSY); 985 } 986 (void) rd_opened(rsp, OTYP_CHR); 987 988 mutex_exit(&rd_lock); 989 990 return (0); 991 } 992 993 rsp = ddi_get_soft_state(rd_statep, minor); 994 if (rsp == NULL) { 995 mutex_exit(&rd_lock); 996 return (ENXIO); 997 } 998 999 if (rd_opened(rsp, otyp) == -1) { 1000 mutex_exit(&rd_lock); 1001 return (EINVAL); 1002 } 1003 1004 mutex_exit(&rd_lock); 1005 return (0); 1006 } 1007 1008 /*ARGSUSED*/ 1009 static int 1010 rd_close(dev_t dev, int flag, int otyp, struct cred *credp) 1011 { 1012 minor_t minor; 1013 rd_devstate_t *rsp; 1014 1015 mutex_enter(&rd_lock); 1016 1017 minor = getminor(dev); 1018 1019 rsp = ddi_get_soft_state(rd_statep, minor); 1020 if (rsp == NULL) { 1021 mutex_exit(&rd_lock); 1022 return (EINVAL); 1023 } 1024 1025 rd_closed(rsp, otyp); 1026 1027 mutex_exit(&rd_lock); 1028 1029 return (0); 1030 } 1031 1032 static void 1033 rd_minphys(struct buf *bp) 1034 { 1035 if (bp->b_bcount > rd_maxphys) { 1036 bp->b_bcount = rd_maxphys; 1037 } 1038 } 1039 1040 static void 1041 rd_rw(rd_devstate_t *rsp, struct buf *bp, offset_t offset, size_t nbytes) 1042 { 1043 int reading = bp->b_flags & B_READ; 1044 caddr_t buf_addr; 1045 1046 bp_mapin(bp); 1047 buf_addr = bp->b_un.b_addr; 1048 1049 while (nbytes > 0) { 1050 offset_t off_in_window; 1051 size_t rem_in_window, copy_bytes; 1052 caddr_t raddr; 1053 1054 mutex_enter(&rsp->rd_device_lock); 1055 rd_map_window(rsp, offset); 1056 1057 off_in_window = offset - rsp->rd_window_base; 1058 rem_in_window = rsp->rd_window_size - off_in_window; 1059 1060 raddr = rsp->rd_window_virt + off_in_window; 1061 copy_bytes = MIN(nbytes, rem_in_window); 1062 1063 if (reading) { 1064 (void) bcopy(raddr, buf_addr, copy_bytes); 1065 } else { 1066 (void) bcopy(buf_addr, raddr, copy_bytes); 1067 } 1068 mutex_exit(&rsp->rd_device_lock); 1069 1070 offset += copy_bytes; 1071 buf_addr += copy_bytes; 1072 nbytes -= copy_bytes; 1073 } 1074 } 1075 1076 static int 1077 rd_strategy(struct buf *bp) 1078 { 1079 rd_devstate_t *rsp; 1080 offset_t offset; 1081 1082 rsp = ddi_get_soft_state(rd_statep, getminor(bp->b_edev)); 1083 offset = bp->b_blkno * DEV_BSIZE; 1084 1085 if (rsp == NULL) { 1086 bp->b_error = ENXIO; 1087 bp->b_flags |= B_ERROR; 1088 } else if (offset >= rsp->rd_size) { 1089 bp->b_error = EINVAL; 1090 bp->b_flags |= B_ERROR; 1091 } else { 1092 size_t nbytes; 1093 1094 if (rsp->rd_kstat) { 1095 mutex_enter(rsp->rd_kstat->ks_lock); 1096 kstat_runq_enter(KSTAT_IO_PTR(rsp->rd_kstat)); 1097 mutex_exit(rsp->rd_kstat->ks_lock); 1098 } 1099 1100 nbytes = min(bp->b_bcount, rsp->rd_size - offset); 1101 1102 rd_rw(rsp, bp, offset, nbytes); 1103 1104 bp->b_resid = bp->b_bcount - nbytes; 1105 1106 if (rsp->rd_kstat) { 1107 kstat_io_t *kioptr; 1108 1109 mutex_enter(rsp->rd_kstat->ks_lock); 1110 kioptr = KSTAT_IO_PTR(rsp->rd_kstat); 1111 if (bp->b_flags & B_READ) { 1112 kioptr->nread += nbytes; 1113 kioptr->reads++; 1114 } else { 1115 kioptr->nwritten += nbytes; 1116 kioptr->writes++; 1117 } 1118 kstat_runq_exit(kioptr); 1119 mutex_exit(rsp->rd_kstat->ks_lock); 1120 } 1121 } 1122 1123 biodone(bp); 1124 return (0); 1125 } 1126 1127 /*ARGSUSED*/ 1128 static int 1129 rd_read(dev_t dev, struct uio *uiop, cred_t *credp) 1130 { 1131 rd_devstate_t *rsp; 1132 1133 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1134 1135 if (uiop->uio_offset >= rsp->rd_size) 1136 return (EINVAL); 1137 1138 return (physio(rd_strategy, NULL, dev, B_READ, rd_minphys, uiop)); 1139 } 1140 1141 /*ARGSUSED*/ 1142 static int 1143 rd_write(dev_t dev, register struct uio *uiop, cred_t *credp) 1144 { 1145 rd_devstate_t *rsp; 1146 1147 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1148 1149 if (uiop->uio_offset >= rsp->rd_size) 1150 return (EINVAL); 1151 1152 return (physio(rd_strategy, NULL, dev, B_WRITE, rd_minphys, uiop)); 1153 } 1154 1155 /*ARGSUSED*/ 1156 static int 1157 rd_create_disk(dev_t dev, struct rd_ioctl *urip, int mode, int *rvalp) 1158 { 1159 struct rd_ioctl kri; 1160 size_t size; 1161 rd_devstate_t *rsp; 1162 1163 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1164 return (EFAULT); 1165 } 1166 1167 kri.ri_name[RD_NAME_LEN] = '\0'; 1168 1169 size = kri.ri_size; 1170 if (size == 0) { 1171 return (EINVAL); 1172 } 1173 size = ptob(btopr(size)); 1174 1175 mutex_enter(&rd_lock); 1176 1177 if (rd_find_named_disk(kri.ri_name) != NULL) { 1178 mutex_exit(&rd_lock); 1179 return (EEXIST); 1180 } 1181 1182 rsp = rd_alloc_resources(kri.ri_name, 0, size, rd_dip); 1183 if (rsp == NULL) { 1184 mutex_exit(&rd_lock); 1185 return (EAGAIN); 1186 } 1187 1188 mutex_exit(&rd_lock); 1189 1190 return (ddi_copyout(&kri, urip, sizeof (kri), mode) == -1 ? EFAULT : 0); 1191 } 1192 1193 /*ARGSUSED*/ 1194 static int 1195 rd_delete_disk(dev_t dev, struct rd_ioctl *urip, int mode) 1196 { 1197 struct rd_ioctl kri; 1198 rd_devstate_t *rsp; 1199 1200 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1201 return (EFAULT); 1202 } 1203 1204 kri.ri_name[RD_NAME_LEN] = '\0'; 1205 1206 mutex_enter(&rd_lock); 1207 1208 rsp = rd_find_named_disk(kri.ri_name); 1209 if (rsp == NULL || rsp->rd_dip != rd_dip) { 1210 mutex_exit(&rd_lock); 1211 return (EINVAL); 1212 } 1213 if (rd_is_open(rsp)) { 1214 mutex_exit(&rd_lock); 1215 return (EBUSY); 1216 } 1217 1218 rd_dealloc_resources(rsp); 1219 1220 mutex_exit(&rd_lock); 1221 1222 return (0); 1223 } 1224 1225 /*ARGSUSED*/ 1226 static int 1227 rd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1228 { 1229 minor_t minor; 1230 int error; 1231 enum dkio_state dkstate; 1232 rd_devstate_t *rsp; 1233 1234 minor = getminor(dev); 1235 1236 /* 1237 * Ramdisk ioctls only apply to the master device. 1238 */ 1239 if (minor == RD_CTL_MINOR) { 1240 struct rd_ioctl *rip = (struct rd_ioctl *)arg; 1241 1242 /* 1243 * The query commands only need read-access - i.e., normal 1244 * users are allowed to do those on the controlling device 1245 * as long as they can open it read-only. 1246 */ 1247 switch (cmd) { 1248 case RD_CREATE_DISK: 1249 if ((mode & FWRITE) == 0) 1250 return (EPERM); 1251 return (rd_create_disk(dev, rip, mode, rvalp)); 1252 1253 case RD_DELETE_DISK: 1254 if ((mode & FWRITE) == 0) 1255 return (EPERM); 1256 return (rd_delete_disk(dev, rip, mode)); 1257 1258 default: 1259 return (EINVAL); 1260 } 1261 } 1262 1263 rsp = ddi_get_soft_state(rd_statep, minor); 1264 if (rsp == NULL) { 1265 return (ENXIO); 1266 } 1267 1268 /* 1269 * These are for faking out utilities like newfs. 1270 */ 1271 switch (cmd) { 1272 case DKIOCGVTOC: 1273 switch (ddi_model_convert_from(mode & FMODELS)) { 1274 case DDI_MODEL_ILP32: { 1275 struct vtoc32 vtoc32; 1276 1277 vtoctovtoc32(rsp->rd_vtoc, vtoc32); 1278 if (ddi_copyout(&vtoc32, (void *)arg, 1279 sizeof (struct vtoc32), mode)) 1280 return (EFAULT); 1281 } 1282 break; 1283 1284 case DDI_MODEL_NONE: 1285 if (ddi_copyout(&rsp->rd_vtoc, (void *)arg, 1286 sizeof (struct vtoc), mode)) 1287 return (EFAULT); 1288 break; 1289 } 1290 return (0); 1291 case DKIOCINFO: 1292 error = ddi_copyout(&rsp->rd_ci, (void *)arg, 1293 sizeof (struct dk_cinfo), mode); 1294 if (error) 1295 return (EFAULT); 1296 return (0); 1297 case DKIOCG_VIRTGEOM: 1298 case DKIOCG_PHYGEOM: 1299 case DKIOCGGEOM: 1300 error = ddi_copyout(&rsp->rd_dkg, (void *)arg, 1301 sizeof (struct dk_geom), mode); 1302 if (error) 1303 return (EFAULT); 1304 return (0); 1305 case DKIOCSTATE: 1306 /* the file is always there */ 1307 dkstate = DKIO_INSERTED; 1308 error = ddi_copyout(&dkstate, (void *)arg, 1309 sizeof (enum dkio_state), mode); 1310 if (error) 1311 return (EFAULT); 1312 return (0); 1313 default: 1314 return (ENOTTY); 1315 } 1316 } 1317 1318 1319 static struct cb_ops rd_cb_ops = { 1320 rd_open, 1321 rd_close, 1322 rd_strategy, 1323 nodev, 1324 nodev, /* dump */ 1325 rd_read, 1326 rd_write, 1327 rd_ioctl, 1328 nodev, /* devmap */ 1329 nodev, /* mmap */ 1330 nodev, /* segmap */ 1331 nochpoll, /* poll */ 1332 ddi_prop_op, 1333 NULL, 1334 D_NEW | D_MP 1335 }; 1336 1337 static struct dev_ops rd_ops = { 1338 DEVO_REV, 1339 0, 1340 rd_getinfo, 1341 nulldev, /* identify */ 1342 nulldev, /* probe */ 1343 rd_attach, 1344 rd_detach, 1345 nodev, /* reset */ 1346 &rd_cb_ops, 1347 (struct bus_ops *)0, 1348 NULL, 1349 ddi_quiesce_not_needed, /* quiesce */ 1350 }; 1351 1352 1353 extern struct mod_ops mod_driverops; 1354 1355 static struct modldrv modldrv = { 1356 &mod_driverops, 1357 "ramdisk driver", 1358 &rd_ops 1359 }; 1360 1361 static struct modlinkage modlinkage = { 1362 MODREV_1, 1363 &modldrv, 1364 0 1365 }; 1366 1367 int 1368 _init(void) 1369 { 1370 int e; 1371 1372 if ((e = ddi_soft_state_init(&rd_statep, 1373 sizeof (rd_devstate_t), 0)) != 0) { 1374 return (e); 1375 } 1376 1377 mutex_init(&rd_lock, NULL, MUTEX_DRIVER, NULL); 1378 1379 if ((e = mod_install(&modlinkage)) != 0) { 1380 mutex_destroy(&rd_lock); 1381 ddi_soft_state_fini(&rd_statep); 1382 } 1383 1384 return (e); 1385 } 1386 1387 int 1388 _fini(void) 1389 { 1390 int e; 1391 1392 if ((e = mod_remove(&modlinkage)) != 0) { 1393 return (e); 1394 } 1395 1396 ddi_soft_state_fini(&rd_statep); 1397 mutex_destroy(&rd_lock); 1398 1399 return (e); 1400 } 1401 1402 int 1403 _info(struct modinfo *modinfop) 1404 { 1405 return (mod_info(&modlinkage, modinfop)); 1406 } 1407