1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Ramdisk device driver. 30 * 31 * There are two types of ramdisk: 'real' OBP-created ramdisks, and 'pseudo' 32 * ramdisks created at runtime with no corresponding OBP device node. The 33 * ramdisk(7D) driver is capable of dealing with both, and with the creation 34 * and deletion of 'pseudo' ramdisks. 35 * 36 * Every ramdisk has a single 'state' structure which maintains data for 37 * that ramdisk, and is assigned a single minor number. The bottom 10-bits 38 * of the minor number index the state structures; the top 8-bits give a 39 * 'real OBP disk' number, i.e. they are zero for 'pseudo' ramdisks. Thus 40 * it is possible to distinguish 'real' from 'pseudo' ramdisks using the 41 * top 8-bits of the minor number. 42 * 43 * Each OBP-created ramdisk has its own node in the device tree with an 44 * "existing" property which describes the one-or-more physical address ranges 45 * assigned to the ramdisk. All 'pseudo' ramdisks share a common devinfo 46 * structure. 47 * 48 * A single character device node is used by ramdiskadm(1M) to communicate 49 * with the ramdisk driver, with minor number 0: 50 * 51 * /dev/ramdiskctl -> /devices/pseudo/ramdisk@0:ctl 52 * 53 * For consistent access, block and raw device nodes are created for *every* 54 * ramdisk. For 'pseudo' ramdisks: 55 * 56 * /dev/ramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname> 57 * /dev/rramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname>,raw 58 * 59 * For OBP-created ramdisks: 60 * 61 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a 62 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a,raw 63 * 64 * This allows the transition from the standalone to the kernel to proceed 65 * when booting from a ramdisk, and for the installation to correctly identify 66 * the root device. 67 */ 68 69 #include <sys/types.h> 70 #include <sys/param.h> 71 #include <sys/sysmacros.h> 72 #include <sys/errno.h> 73 #include <sys/uio.h> 74 #include <sys/buf.h> 75 #include <sys/modctl.h> 76 #include <sys/open.h> 77 #include <sys/kmem.h> 78 #include <sys/poll.h> 79 #include <sys/conf.h> 80 #include <sys/cmn_err.h> 81 #include <sys/stat.h> 82 #include <sys/file.h> 83 #include <sys/ddi.h> 84 #include <sys/sunddi.h> 85 #include <sys/ramdisk.h> 86 #include <vm/seg_kmem.h> 87 88 /* 89 * An opaque handle where information about our set of ramdisk devices lives. 90 */ 91 static void *rd_statep; 92 93 /* 94 * Pointer to devinfo for the 'pseudo' ramdisks. Real OBP-created ramdisks 95 * get their own individual devinfo. 96 */ 97 static dev_info_t *rd_dip = NULL; 98 99 /* 100 * Global state lock. 101 */ 102 static kmutex_t rd_lock; 103 104 /* 105 * Maximum number of ramdisks supported by this driver. 106 */ 107 static uint32_t rd_max_disks = RD_DFLT_DISKS; 108 109 /* 110 * Percentage of physical memory which can be assigned to pseudo ramdisks, 111 * what that equates to in pages, and how many pages are currently assigned. 112 */ 113 static uint_t rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 114 static pgcnt_t rd_max_physmem; 115 static pgcnt_t rd_tot_physmem; 116 117 static uint_t rd_maxphys = RD_DEFAULT_MAXPHYS; 118 119 /* 120 * Is the driver busy, i.e. are there any pseudo ramdisk devices in existence? 121 */ 122 static int 123 rd_is_busy(void) 124 { 125 minor_t minor; 126 rd_devstate_t *rsp; 127 128 ASSERT(mutex_owned(&rd_lock)); 129 for (minor = 1; minor <= rd_max_disks; ++minor) { 130 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 131 rsp->rd_dip == rd_dip) { 132 return (EBUSY); 133 } 134 } 135 return (0); 136 } 137 138 /* 139 * Find the first free minor number; returns zero if there isn't one. 140 */ 141 static minor_t 142 rd_find_free_minor(void) 143 { 144 minor_t minor; 145 146 ASSERT(mutex_owned(&rd_lock)); 147 for (minor = 1; minor <= rd_max_disks; ++minor) { 148 if (ddi_get_soft_state(rd_statep, minor) == NULL) { 149 return (minor); 150 } 151 } 152 return (0); 153 } 154 155 /* 156 * Locate the rd_devstate for the named ramdisk; returns NULL if not found. 157 * Each ramdisk is identified uniquely by name, i.e. an OBP-created ramdisk 158 * cannot have the same name as a pseudo ramdisk. 159 */ 160 static rd_devstate_t * 161 rd_find_named_disk(char *name) 162 { 163 minor_t minor; 164 rd_devstate_t *rsp; 165 166 ASSERT(mutex_owned(&rd_lock)); 167 for (minor = 1; minor <= rd_max_disks; ++minor) { 168 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 169 strcmp(rsp->rd_name, name) == 0) { 170 return (rsp); 171 } 172 } 173 return (NULL); 174 } 175 176 /* 177 * Locate the rd_devstate for the real OBP-created ramdisk whose devinfo 178 * is referenced by 'dip'; returns NULL if not found (shouldn't happen). 179 */ 180 static rd_devstate_t * 181 rd_find_dip_state(dev_info_t *dip) 182 { 183 minor_t minor; 184 rd_devstate_t *rsp; 185 186 ASSERT(mutex_owned(&rd_lock)); 187 for (minor = 1; minor <= rd_max_disks; ++minor) { 188 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 189 rsp->rd_dip == dip) { 190 return (rsp); 191 } 192 } 193 return (NULL); 194 } 195 196 /* 197 * Is the ramdisk open? 198 */ 199 static int 200 rd_is_open(rd_devstate_t *rsp) 201 { 202 ASSERT(mutex_owned(&rd_lock)); 203 return (rsp->rd_chr_open || rsp->rd_blk_open || rsp->rd_lyr_open_cnt); 204 } 205 206 /* 207 * Mark the ramdisk open. 208 */ 209 static int 210 rd_opened(rd_devstate_t *rsp, int otyp) 211 { 212 ASSERT(mutex_owned(&rd_lock)); 213 switch (otyp) { 214 case OTYP_CHR: 215 rsp->rd_chr_open = 1; 216 break; 217 case OTYP_BLK: 218 rsp->rd_blk_open = 1; 219 break; 220 case OTYP_LYR: 221 rsp->rd_lyr_open_cnt++; 222 break; 223 default: 224 return (-1); 225 } 226 return (0); 227 } 228 229 /* 230 * Mark the ramdisk closed. 231 */ 232 static void 233 rd_closed(rd_devstate_t *rsp, int otyp) 234 { 235 ASSERT(mutex_owned(&rd_lock)); 236 switch (otyp) { 237 case OTYP_CHR: 238 rsp->rd_chr_open = 0; 239 break; 240 case OTYP_BLK: 241 rsp->rd_blk_open = 0; 242 break; 243 case OTYP_LYR: 244 rsp->rd_lyr_open_cnt--; 245 break; 246 default: 247 break; 248 } 249 } 250 251 static void 252 rd_init_tuneables(void) 253 { 254 char *prop, *p; 255 256 /* 257 * Ensure sanity of 'rd_max_disks', which may be tuned in ramdisk.conf. 258 */ 259 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 260 "max_disks", &prop) == DDI_PROP_SUCCESS) { 261 p = prop; 262 rd_max_disks = (uint32_t)stoi(&p); 263 ddi_prop_free(prop); 264 } 265 if (rd_max_disks >= RD_MAX_DISKS) { 266 cmn_err(CE_WARN, "ramdisk: rd_max_disks (%u) too big;" 267 " using default (%u).", rd_max_disks, RD_MAX_DISKS - 1); 268 269 rd_max_disks = RD_MAX_DISKS - 1; 270 } 271 272 /* 273 * Ensure sanity of 'rd_percent_physmem', which may be tuned 274 * in ramdisk.conf. 275 */ 276 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 277 "percent_physmem", &prop) == DDI_PROP_SUCCESS) { 278 p = prop; 279 rd_percent_physmem = (uint_t)stoi(&p); 280 ddi_prop_free(prop); 281 } 282 if (rd_percent_physmem >= 100) { 283 cmn_err(CE_WARN, "ramdisk: rd_percent_physmem (%u) >= 100;" 284 " using default (%u%%).", rd_percent_physmem, 285 RD_DEFAULT_PERCENT_PHYSMEM); 286 287 rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 288 } 289 290 /* 291 * Since availrmem_initial is a long, this won't overflow. 292 */ 293 rd_max_physmem = (availrmem_initial * rd_percent_physmem) / 100; 294 } 295 296 /* 297 * Allocate enough physical pages to hold "npages" pages. Returns an 298 * array of page_t * pointers that can later be mapped in or out via 299 * rd_{un}map_window() but is otherwise opaque, or NULL on failure. 300 */ 301 page_t ** 302 rd_phys_alloc(pgcnt_t npages) 303 { 304 page_t *pp, **ppa; 305 spgcnt_t i; 306 size_t ppalen; 307 struct seg kseg; 308 caddr_t addr; /* For coloring */ 309 310 if (rd_tot_physmem + npages > rd_max_physmem) 311 return (NULL); 312 313 if (!page_resv(npages, KM_NOSLEEP)) 314 return (NULL); 315 316 if (!page_create_wait(npages, 0)) { 317 page_unresv(npages); 318 return (NULL); 319 } 320 321 ppalen = npages * sizeof (struct page_t *); 322 ppa = kmem_zalloc(ppalen, KM_NOSLEEP); 323 if (ppa == NULL) { 324 page_create_putback(npages); 325 page_unresv(npages); 326 return (NULL); 327 } 328 329 kseg.s_as = &kas; 330 for (i = 0, addr = NULL; i < npages; ++i, addr += PAGESIZE) { 331 pp = page_get_freelist(&kvp, 0, &kseg, addr, PAGESIZE, 0, NULL); 332 if (pp == NULL) { 333 pp = page_get_cachelist(&kvp, 0, &kseg, addr, 0, NULL); 334 if (pp == NULL) 335 goto out; 336 if (!PP_ISAGED(pp)) 337 page_hashout(pp, NULL); 338 } 339 340 PP_CLRFREE(pp); 341 PP_CLRAGED(pp); 342 ppa[i] = pp; 343 } 344 345 for (i = 0; i < npages; i++) 346 page_downgrade(ppa[i]); 347 rd_tot_physmem += npages; 348 349 return (ppa); 350 351 out: 352 ASSERT(i < npages); 353 page_create_putback(npages - i); 354 while (--i >= 0) 355 page_free(ppa[i], 0); 356 kmem_free(ppa, ppalen); 357 page_unresv(npages); 358 359 return (NULL); 360 } 361 362 /* 363 * Free physical pages previously allocated via rd_phys_alloc(); note that 364 * this function may block as it has to wait until it can exclusively lock 365 * all the pages first. 366 */ 367 static void 368 rd_phys_free(page_t **ppa, pgcnt_t npages) 369 { 370 pgcnt_t i; 371 size_t ppalen = npages * sizeof (struct page_t *); 372 373 for (i = 0; i < npages; ++i) { 374 if (! page_tryupgrade(ppa[i])) { 375 page_unlock(ppa[i]); 376 while (! page_lock(ppa[i], SE_EXCL, NULL, P_RECLAIM)) 377 ; 378 } 379 page_free(ppa[i], 0); 380 } 381 382 kmem_free(ppa, ppalen); 383 384 page_unresv(npages); 385 rd_tot_physmem -= npages; 386 } 387 388 /* 389 * Remove a window mapping (if present). 390 */ 391 static void 392 rd_unmap_window(rd_devstate_t *rsp) 393 { 394 ASSERT(rsp->rd_window_obp == 0); 395 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 396 hat_unload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 397 HAT_UNLOAD_UNLOCK); 398 } 399 } 400 401 /* 402 * Map a portion of the ramdisk into the virtual window. 403 */ 404 static void 405 rd_map_window(rd_devstate_t *rsp, off_t offset) 406 { 407 pgcnt_t offpgs = btop(offset); 408 409 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 410 /* 411 * Already mapped; is offset within our window? 412 */ 413 if (offset >= rsp->rd_window_base && 414 offset < rsp->rd_window_base + rsp->rd_window_size) { 415 return; 416 } 417 418 /* 419 * No, we need to re-map; toss the old mapping. 420 */ 421 rd_unmap_window(rsp); 422 } 423 rsp->rd_window_base = ptob(offpgs); 424 425 /* 426 * Different algorithms depending on whether this is a real 427 * OBP-created ramdisk, or a pseudo ramdisk. 428 */ 429 if (rsp->rd_dip == rd_dip) { 430 pgcnt_t pi, lastpi; 431 caddr_t vaddr; 432 433 /* 434 * Find the range of pages which should be mapped. 435 */ 436 pi = offpgs; 437 lastpi = pi + btopr(rsp->rd_window_size); 438 if (lastpi > rsp->rd_npages) { 439 lastpi = rsp->rd_npages; 440 } 441 442 /* 443 * Load the mapping. 444 */ 445 vaddr = rsp->rd_window_virt; 446 for (; pi < lastpi; ++pi) { 447 hat_memload(kas.a_hat, vaddr, rsp->rd_ppa[pi], 448 (PROT_READ | PROT_WRITE) | HAT_NOSYNC, 449 HAT_LOAD_LOCK); 450 vaddr += ptob(1); 451 } 452 } else { 453 uint_t i; 454 pfn_t pfn; 455 456 /* 457 * Real OBP-created ramdisk: locate the physical range which 458 * contains this offset. 459 */ 460 for (i = 0; i < rsp->rd_nexisting; ++i) { 461 if (offset < rsp->rd_existing[i].size) { 462 break; 463 } 464 offset -= rsp->rd_existing[i].size; 465 } 466 ASSERT(i < rsp->rd_nexisting); 467 468 /* 469 * Load the mapping. 470 */ 471 pfn = btop(rsp->rd_existing[i].phys + offset); 472 hat_devload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 473 pfn, (PROT_READ | PROT_WRITE), 474 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 475 } 476 } 477 478 /* 479 * Fakes up a disk geometry, and one big partition, based on the size 480 * of the file. This is needed because we allow newfs'ing the device, 481 * and newfs will do several disk ioctls to figure out the geometry and 482 * partition information. It uses that information to determine the parameters 483 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 484 * have to support it. 485 * 486 * Stolen from lofi.c - should maybe split out common code sometime. 487 */ 488 static void 489 rd_fake_disk_geometry(rd_devstate_t *rsp) 490 { 491 /* dk_geom - see dkio(7I) */ 492 /* 493 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 494 * of sectors), but that breaks programs like fdisk which want to 495 * partition a disk by cylinder. With one cylinder, you can't create 496 * an fdisk partition and put pcfs on it for testing (hard to pick 497 * a number between one and one). 498 * 499 * The cheezy floppy test is an attempt to not have too few cylinders 500 * for a small file, or so many on a big file that you waste space 501 * for backup superblocks or cylinder group structures. 502 */ 503 if (rsp->rd_size < (2 * 1024 * 1024)) /* floppy? */ 504 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (100 * 1024); 505 else 506 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (300 * 1024); 507 /* in case file file is < 100k */ 508 if (rsp->rd_dkg.dkg_ncyl == 0) 509 rsp->rd_dkg.dkg_ncyl = 1; 510 rsp->rd_dkg.dkg_acyl = 0; 511 rsp->rd_dkg.dkg_bcyl = 0; 512 rsp->rd_dkg.dkg_nhead = 1; 513 rsp->rd_dkg.dkg_obs1 = 0; 514 rsp->rd_dkg.dkg_intrlv = 0; 515 rsp->rd_dkg.dkg_obs2 = 0; 516 rsp->rd_dkg.dkg_obs3 = 0; 517 rsp->rd_dkg.dkg_apc = 0; 518 rsp->rd_dkg.dkg_rpm = 7200; 519 rsp->rd_dkg.dkg_pcyl = rsp->rd_dkg.dkg_ncyl + rsp->rd_dkg.dkg_acyl; 520 rsp->rd_dkg.dkg_nsect = rsp->rd_size / 521 (DEV_BSIZE * rsp->rd_dkg.dkg_ncyl); 522 rsp->rd_dkg.dkg_write_reinstruct = 0; 523 rsp->rd_dkg.dkg_read_reinstruct = 0; 524 525 /* vtoc - see dkio(7I) */ 526 bzero(&rsp->rd_vtoc, sizeof (struct vtoc)); 527 rsp->rd_vtoc.v_sanity = VTOC_SANE; 528 rsp->rd_vtoc.v_version = V_VERSION; 529 bcopy(RD_DRIVER_NAME, rsp->rd_vtoc.v_volume, 7); 530 rsp->rd_vtoc.v_sectorsz = DEV_BSIZE; 531 rsp->rd_vtoc.v_nparts = 1; 532 rsp->rd_vtoc.v_part[0].p_tag = V_UNASSIGNED; 533 rsp->rd_vtoc.v_part[0].p_flag = V_UNMNT; 534 rsp->rd_vtoc.v_part[0].p_start = (daddr_t)0; 535 /* 536 * The partition size cannot just be the number of sectors, because 537 * that might not end on a cylinder boundary. And if that's the case, 538 * newfs/mkfs will print a scary warning. So just figure the size 539 * based on the number of cylinders and sectors/cylinder. 540 */ 541 rsp->rd_vtoc.v_part[0].p_size = rsp->rd_dkg.dkg_pcyl * 542 rsp->rd_dkg.dkg_nsect * rsp->rd_dkg.dkg_nhead; 543 544 /* dk_cinfo - see dkio(7I) */ 545 bzero(&rsp->rd_ci, sizeof (struct dk_cinfo)); 546 (void) strcpy(rsp->rd_ci.dki_cname, RD_DRIVER_NAME); 547 rsp->rd_ci.dki_ctype = DKC_MD; 548 rsp->rd_ci.dki_flags = 0; 549 rsp->rd_ci.dki_cnum = 0; 550 rsp->rd_ci.dki_addr = 0; 551 rsp->rd_ci.dki_space = 0; 552 rsp->rd_ci.dki_prio = 0; 553 rsp->rd_ci.dki_vec = 0; 554 (void) strcpy(rsp->rd_ci.dki_dname, RD_DRIVER_NAME); 555 rsp->rd_ci.dki_unit = 0; 556 rsp->rd_ci.dki_slave = 0; 557 rsp->rd_ci.dki_partition = 0; 558 /* 559 * newfs uses this to set maxcontig. Must not be < 16, or it 560 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 561 * it by the block size. Then tunefs doesn't work because 562 * maxcontig is 0. 563 */ 564 rsp->rd_ci.dki_maxtransfer = 16; 565 } 566 567 /* 568 * Deallocate resources (virtual and physical, device nodes, structures) 569 * from a ramdisk. 570 */ 571 static void 572 rd_dealloc_resources(rd_devstate_t *rsp) 573 { 574 dev_info_t *dip = rsp->rd_dip; 575 char namebuf[RD_NAME_LEN + 5]; 576 dev_t fulldev; 577 578 if (rsp->rd_window_obp == 0 && rsp->rd_window_virt != NULL) { 579 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 580 rd_unmap_window(rsp); 581 } 582 vmem_free(heap_arena, rsp->rd_window_virt, rsp->rd_window_size); 583 } 584 mutex_destroy(&rsp->rd_device_lock); 585 586 if (rsp->rd_existing) { 587 ddi_prop_free(rsp->rd_existing); 588 } 589 if (rsp->rd_ppa != NULL) { 590 rd_phys_free(rsp->rd_ppa, rsp->rd_npages); 591 } 592 593 /* 594 * Remove the block and raw device nodes. 595 */ 596 if (dip == rd_dip) { 597 (void) snprintf(namebuf, sizeof (namebuf), "%s", 598 rsp->rd_name); 599 ddi_remove_minor_node(dip, namebuf); 600 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 601 rsp->rd_name); 602 ddi_remove_minor_node(dip, namebuf); 603 } else { 604 ddi_remove_minor_node(dip, "a"); 605 ddi_remove_minor_node(dip, "a,raw"); 606 } 607 608 /* 609 * Remove the "Size" and "Nblocks" properties. 610 */ 611 fulldev = makedevice(ddi_driver_major(dip), rsp->rd_minor); 612 (void) ddi_prop_remove(fulldev, dip, SIZE_PROP_NAME); 613 (void) ddi_prop_remove(fulldev, dip, NBLOCKS_PROP_NAME); 614 615 if (rsp->rd_kstat) { 616 kstat_delete(rsp->rd_kstat); 617 mutex_destroy(&rsp->rd_kstat_lock); 618 } 619 620 ddi_soft_state_free(rd_statep, rsp->rd_minor); 621 } 622 623 /* 624 * Allocate resources (virtual and physical, device nodes, structures) 625 * to a ramdisk. 626 */ 627 static rd_devstate_t * 628 rd_alloc_resources(char *name, uint_t addr, size_t size, dev_info_t *dip) 629 { 630 minor_t minor; 631 rd_devstate_t *rsp; 632 char namebuf[RD_NAME_LEN + 5]; 633 dev_t fulldev; 634 int64_t Nblocks_prop_val; 635 int64_t Size_prop_val; 636 637 minor = rd_find_free_minor(); 638 if (ddi_soft_state_zalloc(rd_statep, minor) == DDI_FAILURE) { 639 return (NULL); 640 } 641 rsp = ddi_get_soft_state(rd_statep, minor); 642 643 (void) strcpy(rsp->rd_name, name); 644 rsp->rd_dip = dip; 645 rsp->rd_minor = minor; 646 rsp->rd_size = size; 647 648 /* 649 * Allocate virtual window onto ramdisk. 650 */ 651 mutex_init(&rsp->rd_device_lock, NULL, MUTEX_DRIVER, NULL); 652 if (addr == 0) { 653 rsp->rd_window_obp = 0; 654 rsp->rd_window_base = RD_WINDOW_NOT_MAPPED; 655 rsp->rd_window_size = PAGESIZE; 656 rsp->rd_window_virt = vmem_alloc(heap_arena, 657 rsp->rd_window_size, VM_SLEEP); 658 if (rsp->rd_window_virt == NULL) { 659 goto create_failed; 660 } 661 } else { 662 rsp->rd_window_obp = 1; 663 rsp->rd_window_base = 0; 664 rsp->rd_window_size = size; 665 rsp->rd_window_virt = (caddr_t)((ulong_t)addr); 666 } 667 668 /* 669 * Allocate physical memory for non-OBP ramdisks. 670 * Create pseudo block and raw device nodes. 671 */ 672 if (dip == rd_dip) { 673 rsp->rd_npages = btopr(size); 674 rsp->rd_ppa = rd_phys_alloc(rsp->rd_npages); 675 if (rsp->rd_ppa == NULL) { 676 goto create_failed; 677 } 678 679 /* 680 * For non-OBP ramdisks the device nodes are: 681 * 682 * /devices/pseudo/ramdisk@0:<diskname> 683 * /devices/pseudo/ramdisk@0:<diskname>,raw 684 */ 685 (void) snprintf(namebuf, sizeof (namebuf), "%s", 686 rsp->rd_name); 687 if (ddi_create_minor_node(dip, namebuf, S_IFBLK, minor, 688 DDI_PSEUDO, 0) == DDI_FAILURE) { 689 goto create_failed; 690 } 691 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 692 rsp->rd_name); 693 if (ddi_create_minor_node(dip, namebuf, S_IFCHR, minor, 694 DDI_PSEUDO, 0) == DDI_FAILURE) { 695 goto create_failed; 696 } 697 } else { 698 /* 699 * For OBP-created ramdisks the device nodes are: 700 * 701 * /devices/ramdisk-<diskname>:a 702 * /devices/ramdisk-<diskname>:a,raw 703 */ 704 if (ddi_create_minor_node(dip, "a", S_IFBLK, minor, 705 DDI_PSEUDO, 0) == DDI_FAILURE) { 706 goto create_failed; 707 } 708 if (ddi_create_minor_node(dip, "a,raw", S_IFCHR, minor, 709 DDI_PSEUDO, 0) == DDI_FAILURE) { 710 goto create_failed; 711 } 712 } 713 714 /* 715 * Create the "Size" and "Nblocks" properties. 716 */ 717 fulldev = makedevice(ddi_driver_major(dip), minor); 718 Size_prop_val = size; 719 if ((ddi_prop_update_int64(fulldev, dip, 720 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 721 goto create_failed; 722 } 723 Nblocks_prop_val = size / DEV_BSIZE; 724 if ((ddi_prop_update_int64(fulldev, dip, 725 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 726 goto create_failed; 727 } 728 729 /* 730 * Allocate kstat stuff. 731 */ 732 rsp->rd_kstat = kstat_create(RD_DRIVER_NAME, minor, NULL, 733 "disk", KSTAT_TYPE_IO, 1, 0); 734 if (rsp->rd_kstat) { 735 mutex_init(&rsp->rd_kstat_lock, NULL, 736 MUTEX_DRIVER, NULL); 737 rsp->rd_kstat->ks_lock = &rsp->rd_kstat_lock; 738 kstat_install(rsp->rd_kstat); 739 } 740 741 rd_fake_disk_geometry(rsp); 742 743 return (rsp); 744 745 create_failed: 746 /* 747 * Cleanup. 748 */ 749 rd_dealloc_resources(rsp); 750 751 return (NULL); 752 } 753 754 /* 755 * Undo what we did in rd_attach, freeing resources and removing things which 756 * we installed. The system framework guarantees we are not active with this 757 * devinfo node in any other entry points at this time. 758 */ 759 static int 760 rd_common_detach(dev_info_t *dip) 761 { 762 if (dip == rd_dip) { 763 /* 764 * Pseudo node: can't detach if any pseudo ramdisks exist. 765 */ 766 if (rd_is_busy()) { 767 return (DDI_FAILURE); 768 } 769 ddi_soft_state_free(rd_statep, RD_CTL_MINOR); 770 rd_dip = NULL; 771 } else { 772 /* 773 * A 'real' ramdisk; find the state and free resources. 774 */ 775 rd_devstate_t *rsp; 776 777 if ((rsp = rd_find_dip_state(dip)) != NULL) { 778 rd_dealloc_resources(rsp); 779 } 780 } 781 ddi_remove_minor_node(dip, NULL); 782 783 return (DDI_SUCCESS); 784 } 785 786 static int 787 rd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 788 { 789 char *name; 790 rd_existing_t *ep = NULL; 791 uint_t obpaddr = 0, nep, i; 792 size_t size = 0; 793 rd_devstate_t *rsp; 794 795 switch (cmd) { 796 797 case DDI_ATTACH: 798 mutex_enter(&rd_lock); 799 800 /* 801 * For pseudo ramdisk devinfo set up state 0 and :ctl device; 802 * else it's an OBP-created ramdisk. 803 */ 804 if (is_pseudo_device(dip)) { 805 rd_dip = dip; 806 rd_init_tuneables(); 807 808 /* 809 * The zeroth minor is reserved for the ramdisk 810 * 'control' device. 811 */ 812 if (ddi_soft_state_zalloc(rd_statep, RD_CTL_MINOR) == 813 DDI_FAILURE) { 814 goto attach_failed; 815 } 816 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 817 rsp->rd_dip = dip; 818 819 if (ddi_create_minor_node(dip, RD_CTL_NODE, 820 S_IFCHR, 0, DDI_PSEUDO, NULL) == DDI_FAILURE) { 821 goto attach_failed; 822 } 823 } else { 824 RD_STRIP_PREFIX(name, ddi_node_name(dip)); 825 826 if (strlen(name) > RD_NAME_LEN) { 827 cmn_err(CE_CONT, 828 "%s: name too long - ignoring\n", name); 829 goto attach_failed; 830 } 831 832 /* 833 * An OBP-created ramdisk must have an 'existing' 834 * property; get and check it. 835 */ 836 if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, dip, 837 DDI_PROP_DONTPASS, OBP_EXISTING_PROP_NAME, 838 (uchar_t **)&ep, &nep) == DDI_SUCCESS) { 839 840 if (nep == 0 || (nep % sizeof (*ep)) != 0) { 841 cmn_err(CE_CONT, 842 "%s: " OBP_EXISTING_PROP_NAME 843 " illegal size\n", name); 844 goto attach_failed; 845 } 846 nep /= sizeof (*ep); 847 848 /* 849 * Calculate the size of the ramdisk. 850 */ 851 for (i = 0; i < nep; ++i) { 852 size += ep[i].size; 853 } 854 } else if ((obpaddr = ddi_prop_get_int(DDI_DEV_T_ANY, 855 dip, DDI_PROP_DONTPASS, OBP_ADDRESS_PROP_NAME, 856 0)) != 0) { 857 858 size = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 859 DDI_PROP_DONTPASS, OBP_SIZE_PROP_NAME, 0); 860 } else { 861 cmn_err(CE_CONT, "%s: missing OBP properties\n", 862 name); 863 goto attach_failed; 864 } 865 866 /* 867 * Allocate driver resources for the ramdisk. 868 */ 869 if ((rsp = rd_alloc_resources(name, obpaddr, size, 870 dip)) == NULL) { 871 goto attach_failed; 872 } 873 874 rsp->rd_existing = ep; 875 rsp->rd_nexisting = nep; 876 } 877 878 mutex_exit(&rd_lock); 879 880 ddi_report_dev(dip); 881 882 return (DDI_SUCCESS); 883 884 case DDI_RESUME: 885 return (DDI_SUCCESS); 886 887 default: 888 return (DDI_FAILURE); 889 } 890 891 attach_failed: 892 /* 893 * Use our common detach routine to unallocate any stuff which 894 * was allocated above. 895 */ 896 (void) rd_common_detach(dip); 897 mutex_exit(&rd_lock); 898 899 if (ep != NULL) { 900 ddi_prop_free(ep); 901 } 902 return (DDI_FAILURE); 903 } 904 905 static int 906 rd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 907 { 908 int e; 909 910 switch (cmd) { 911 912 case DDI_DETACH: 913 mutex_enter(&rd_lock); 914 e = rd_common_detach(dip); 915 mutex_exit(&rd_lock); 916 917 return (e); 918 919 case DDI_SUSPEND: 920 return (DDI_SUCCESS); 921 922 default: 923 return (DDI_FAILURE); 924 } 925 } 926 927 /*ARGSUSED*/ 928 static int 929 rd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 930 { 931 rd_devstate_t *rsp; 932 933 switch (infocmd) { 934 case DDI_INFO_DEVT2DEVINFO: 935 if ((rsp = ddi_get_soft_state(rd_statep, 936 getminor((dev_t)arg))) != NULL) { 937 *result = rsp->rd_dip; 938 return (DDI_SUCCESS); 939 } 940 *result = NULL; 941 return (DDI_FAILURE); 942 943 case DDI_INFO_DEVT2INSTANCE: 944 if ((rsp = ddi_get_soft_state(rd_statep, 945 getminor((dev_t)arg))) != NULL) { 946 *result = (void *)(uintptr_t) 947 ddi_get_instance(rsp->rd_dip); 948 return (DDI_SUCCESS); 949 } 950 *result = NULL; 951 return (DDI_FAILURE); 952 953 default: 954 return (DDI_FAILURE); 955 } 956 } 957 958 /*ARGSUSED3*/ 959 static int 960 rd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 961 { 962 minor_t minor; 963 rd_devstate_t *rsp; 964 965 mutex_enter(&rd_lock); 966 967 minor = getminor(*devp); 968 if (minor == RD_CTL_MINOR) { 969 /* 970 * Master control device; must be opened exclusively. 971 */ 972 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) { 973 mutex_exit(&rd_lock); 974 return (EINVAL); 975 } 976 977 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 978 if (rsp == NULL) { 979 mutex_exit(&rd_lock); 980 return (ENXIO); 981 } 982 983 if (rd_is_open(rsp)) { 984 mutex_exit(&rd_lock); 985 return (EBUSY); 986 } 987 (void) rd_opened(rsp, OTYP_CHR); 988 989 mutex_exit(&rd_lock); 990 991 return (0); 992 } 993 994 rsp = ddi_get_soft_state(rd_statep, minor); 995 if (rsp == NULL) { 996 mutex_exit(&rd_lock); 997 return (ENXIO); 998 } 999 1000 if (rd_opened(rsp, otyp) == -1) { 1001 mutex_exit(&rd_lock); 1002 return (EINVAL); 1003 } 1004 1005 mutex_exit(&rd_lock); 1006 return (0); 1007 } 1008 1009 /*ARGSUSED*/ 1010 static int 1011 rd_close(dev_t dev, int flag, int otyp, struct cred *credp) 1012 { 1013 minor_t minor; 1014 rd_devstate_t *rsp; 1015 1016 mutex_enter(&rd_lock); 1017 1018 minor = getminor(dev); 1019 1020 rsp = ddi_get_soft_state(rd_statep, minor); 1021 if (rsp == NULL) { 1022 mutex_exit(&rd_lock); 1023 return (EINVAL); 1024 } 1025 1026 rd_closed(rsp, otyp); 1027 1028 mutex_exit(&rd_lock); 1029 1030 return (0); 1031 } 1032 1033 static void 1034 rd_minphys(struct buf *bp) 1035 { 1036 if (bp->b_bcount > rd_maxphys) { 1037 bp->b_bcount = rd_maxphys; 1038 } 1039 } 1040 1041 static void 1042 rd_rw(rd_devstate_t *rsp, struct buf *bp, offset_t offset, size_t nbytes) 1043 { 1044 int reading = bp->b_flags & B_READ; 1045 caddr_t buf_addr; 1046 1047 bp_mapin(bp); 1048 buf_addr = bp->b_un.b_addr; 1049 1050 while (nbytes > 0) { 1051 offset_t off_in_window; 1052 size_t rem_in_window, copy_bytes; 1053 caddr_t raddr; 1054 1055 mutex_enter(&rsp->rd_device_lock); 1056 rd_map_window(rsp, offset); 1057 1058 off_in_window = offset - rsp->rd_window_base; 1059 rem_in_window = rsp->rd_window_size - off_in_window; 1060 1061 raddr = rsp->rd_window_virt + off_in_window; 1062 copy_bytes = MIN(nbytes, rem_in_window); 1063 1064 if (reading) { 1065 (void) bcopy(raddr, buf_addr, copy_bytes); 1066 } else { 1067 (void) bcopy(buf_addr, raddr, copy_bytes); 1068 } 1069 mutex_exit(&rsp->rd_device_lock); 1070 1071 offset += copy_bytes; 1072 buf_addr += copy_bytes; 1073 nbytes -= copy_bytes; 1074 } 1075 } 1076 1077 static int 1078 rd_strategy(struct buf *bp) 1079 { 1080 rd_devstate_t *rsp; 1081 offset_t offset; 1082 1083 rsp = ddi_get_soft_state(rd_statep, getminor(bp->b_edev)); 1084 offset = bp->b_blkno * DEV_BSIZE; 1085 1086 if (rsp == NULL) { 1087 bp->b_error = ENXIO; 1088 bp->b_flags |= B_ERROR; 1089 } else if (offset >= rsp->rd_size) { 1090 bp->b_error = EINVAL; 1091 bp->b_flags |= B_ERROR; 1092 } else { 1093 size_t nbytes; 1094 1095 if (rsp->rd_kstat) { 1096 mutex_enter(rsp->rd_kstat->ks_lock); 1097 kstat_runq_enter(KSTAT_IO_PTR(rsp->rd_kstat)); 1098 mutex_exit(rsp->rd_kstat->ks_lock); 1099 } 1100 1101 nbytes = min(bp->b_bcount, rsp->rd_size - offset); 1102 1103 rd_rw(rsp, bp, offset, nbytes); 1104 1105 bp->b_resid = bp->b_bcount - nbytes; 1106 1107 if (rsp->rd_kstat) { 1108 kstat_io_t *kioptr; 1109 1110 mutex_enter(rsp->rd_kstat->ks_lock); 1111 kioptr = KSTAT_IO_PTR(rsp->rd_kstat); 1112 if (bp->b_flags & B_READ) { 1113 kioptr->nread += nbytes; 1114 kioptr->reads++; 1115 } else { 1116 kioptr->nwritten += nbytes; 1117 kioptr->writes++; 1118 } 1119 kstat_runq_exit(kioptr); 1120 mutex_exit(rsp->rd_kstat->ks_lock); 1121 } 1122 } 1123 1124 biodone(bp); 1125 return (0); 1126 } 1127 1128 /*ARGSUSED*/ 1129 static int 1130 rd_read(dev_t dev, struct uio *uiop, cred_t *credp) 1131 { 1132 rd_devstate_t *rsp; 1133 1134 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1135 1136 if (uiop->uio_offset >= rsp->rd_size) 1137 return (EINVAL); 1138 1139 return (physio(rd_strategy, NULL, dev, B_READ, rd_minphys, uiop)); 1140 } 1141 1142 /*ARGSUSED*/ 1143 static int 1144 rd_write(dev_t dev, register struct uio *uiop, cred_t *credp) 1145 { 1146 rd_devstate_t *rsp; 1147 1148 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1149 1150 if (uiop->uio_offset >= rsp->rd_size) 1151 return (EINVAL); 1152 1153 return (physio(rd_strategy, NULL, dev, B_WRITE, rd_minphys, uiop)); 1154 } 1155 1156 /*ARGSUSED*/ 1157 static int 1158 rd_create_disk(dev_t dev, struct rd_ioctl *urip, int mode, int *rvalp) 1159 { 1160 struct rd_ioctl kri; 1161 size_t size; 1162 rd_devstate_t *rsp; 1163 1164 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1165 return (EFAULT); 1166 } 1167 1168 kri.ri_name[RD_NAME_LEN] = '\0'; 1169 1170 size = kri.ri_size; 1171 if (size == 0) { 1172 return (EINVAL); 1173 } 1174 size = ptob(btopr(size)); 1175 1176 mutex_enter(&rd_lock); 1177 1178 if (rd_find_named_disk(kri.ri_name) != NULL) { 1179 mutex_exit(&rd_lock); 1180 return (EEXIST); 1181 } 1182 1183 rsp = rd_alloc_resources(kri.ri_name, 0, size, rd_dip); 1184 if (rsp == NULL) { 1185 mutex_exit(&rd_lock); 1186 return (EAGAIN); 1187 } 1188 1189 mutex_exit(&rd_lock); 1190 1191 return (ddi_copyout(&kri, urip, sizeof (kri), mode) == -1 ? EFAULT : 0); 1192 } 1193 1194 /*ARGSUSED*/ 1195 static int 1196 rd_delete_disk(dev_t dev, struct rd_ioctl *urip, int mode) 1197 { 1198 struct rd_ioctl kri; 1199 rd_devstate_t *rsp; 1200 1201 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1202 return (EFAULT); 1203 } 1204 1205 kri.ri_name[RD_NAME_LEN] = '\0'; 1206 1207 mutex_enter(&rd_lock); 1208 1209 rsp = rd_find_named_disk(kri.ri_name); 1210 if (rsp == NULL || rsp->rd_dip != rd_dip) { 1211 mutex_exit(&rd_lock); 1212 return (EINVAL); 1213 } 1214 if (rd_is_open(rsp)) { 1215 mutex_exit(&rd_lock); 1216 return (EBUSY); 1217 } 1218 1219 rd_dealloc_resources(rsp); 1220 1221 mutex_exit(&rd_lock); 1222 1223 return (0); 1224 } 1225 1226 /*ARGSUSED*/ 1227 static int 1228 rd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1229 { 1230 minor_t minor; 1231 int error; 1232 enum dkio_state dkstate; 1233 rd_devstate_t *rsp; 1234 1235 minor = getminor(dev); 1236 1237 /* 1238 * Ramdisk ioctls only apply to the master device. 1239 */ 1240 if (minor == RD_CTL_MINOR) { 1241 struct rd_ioctl *rip = (struct rd_ioctl *)arg; 1242 1243 /* 1244 * The query commands only need read-access - i.e., normal 1245 * users are allowed to do those on the controlling device 1246 * as long as they can open it read-only. 1247 */ 1248 switch (cmd) { 1249 case RD_CREATE_DISK: 1250 if ((mode & FWRITE) == 0) 1251 return (EPERM); 1252 return (rd_create_disk(dev, rip, mode, rvalp)); 1253 1254 case RD_DELETE_DISK: 1255 if ((mode & FWRITE) == 0) 1256 return (EPERM); 1257 return (rd_delete_disk(dev, rip, mode)); 1258 1259 default: 1260 return (EINVAL); 1261 } 1262 } 1263 1264 rsp = ddi_get_soft_state(rd_statep, minor); 1265 if (rsp == NULL) { 1266 return (ENXIO); 1267 } 1268 1269 /* 1270 * These are for faking out utilities like newfs. 1271 */ 1272 switch (cmd) { 1273 case DKIOCGVTOC: 1274 switch (ddi_model_convert_from(mode & FMODELS)) { 1275 case DDI_MODEL_ILP32: { 1276 struct vtoc32 vtoc32; 1277 1278 vtoctovtoc32(rsp->rd_vtoc, vtoc32); 1279 if (ddi_copyout(&vtoc32, (void *)arg, 1280 sizeof (struct vtoc32), mode)) 1281 return (EFAULT); 1282 } 1283 break; 1284 1285 case DDI_MODEL_NONE: 1286 if (ddi_copyout(&rsp->rd_vtoc, (void *)arg, 1287 sizeof (struct vtoc), mode)) 1288 return (EFAULT); 1289 break; 1290 } 1291 return (0); 1292 case DKIOCINFO: 1293 error = ddi_copyout(&rsp->rd_ci, (void *)arg, 1294 sizeof (struct dk_cinfo), mode); 1295 if (error) 1296 return (EFAULT); 1297 return (0); 1298 case DKIOCG_VIRTGEOM: 1299 case DKIOCG_PHYGEOM: 1300 case DKIOCGGEOM: 1301 error = ddi_copyout(&rsp->rd_dkg, (void *)arg, 1302 sizeof (struct dk_geom), mode); 1303 if (error) 1304 return (EFAULT); 1305 return (0); 1306 case DKIOCSTATE: 1307 /* the file is always there */ 1308 dkstate = DKIO_INSERTED; 1309 error = ddi_copyout(&dkstate, (void *)arg, 1310 sizeof (enum dkio_state), mode); 1311 if (error) 1312 return (EFAULT); 1313 return (0); 1314 default: 1315 return (ENOTTY); 1316 } 1317 } 1318 1319 1320 static struct cb_ops rd_cb_ops = { 1321 rd_open, 1322 rd_close, 1323 rd_strategy, 1324 nodev, 1325 nodev, /* dump */ 1326 rd_read, 1327 rd_write, 1328 rd_ioctl, 1329 nodev, /* devmap */ 1330 nodev, /* mmap */ 1331 nodev, /* segmap */ 1332 nochpoll, /* poll */ 1333 ddi_prop_op, 1334 NULL, 1335 D_NEW | D_MP 1336 }; 1337 1338 static struct dev_ops rd_ops = { 1339 DEVO_REV, 1340 0, 1341 rd_getinfo, 1342 nulldev, /* identify */ 1343 nulldev, /* probe */ 1344 rd_attach, 1345 rd_detach, 1346 nodev, /* reset */ 1347 &rd_cb_ops, 1348 (struct bus_ops *)0 1349 }; 1350 1351 1352 extern struct mod_ops mod_driverops; 1353 1354 static struct modldrv modldrv = { 1355 &mod_driverops, 1356 "ramdisk driver", 1357 &rd_ops 1358 }; 1359 1360 static struct modlinkage modlinkage = { 1361 MODREV_1, 1362 &modldrv, 1363 0 1364 }; 1365 1366 int 1367 _init(void) 1368 { 1369 int e; 1370 1371 if ((e = ddi_soft_state_init(&rd_statep, 1372 sizeof (rd_devstate_t), 0)) != 0) { 1373 return (e); 1374 } 1375 1376 mutex_init(&rd_lock, NULL, MUTEX_DRIVER, NULL); 1377 1378 if ((e = mod_install(&modlinkage)) != 0) { 1379 mutex_destroy(&rd_lock); 1380 ddi_soft_state_fini(&rd_statep); 1381 } 1382 1383 return (e); 1384 } 1385 1386 int 1387 _fini(void) 1388 { 1389 int e; 1390 1391 if ((e = mod_remove(&modlinkage)) != 0) { 1392 return (e); 1393 } 1394 1395 ddi_soft_state_fini(&rd_statep); 1396 mutex_destroy(&rd_lock); 1397 1398 return (e); 1399 } 1400 1401 int 1402 _info(struct modinfo *modinfop) 1403 { 1404 return (mod_info(&modlinkage, modinfop)); 1405 } 1406