1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Ramdisk device driver. 30 * 31 * There are two types of ramdisk: 'real' OBP-created ramdisks, and 'pseudo' 32 * ramdisks created at runtime with no corresponding OBP device node. The 33 * ramdisk(7D) driver is capable of dealing with both, and with the creation 34 * and deletion of 'pseudo' ramdisks. 35 * 36 * Every ramdisk has a single 'state' structure which maintains data for 37 * that ramdisk, and is assigned a single minor number. The bottom 10-bits 38 * of the minor number index the state structures; the top 8-bits give a 39 * 'real OBP disk' number, i.e. they are zero for 'pseudo' ramdisks. Thus 40 * it is possible to distinguish 'real' from 'pseudo' ramdisks using the 41 * top 8-bits of the minor number. 42 * 43 * Each OBP-created ramdisk has its own node in the device tree with an 44 * "existing" property which describes the one-or-more physical address ranges 45 * assigned to the ramdisk. All 'pseudo' ramdisks share a common devinfo 46 * structure. 47 * 48 * A single character device node is used by ramdiskadm(1M) to communicate 49 * with the ramdisk driver, with minor number 0: 50 * 51 * /dev/ramdiskctl -> /devices/pseudo/ramdisk@0:ctl 52 * 53 * For consistent access, block and raw device nodes are created for *every* 54 * ramdisk. For 'pseudo' ramdisks: 55 * 56 * /dev/ramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname> 57 * /dev/rramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname>,raw 58 * 59 * For OBP-created ramdisks: 60 * 61 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a 62 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a,raw 63 * 64 * This allows the transition from the standalone to the kernel to proceed 65 * when booting from a ramdisk, and for the installation to correctly identify 66 * the root device. 67 */ 68 69 #include <sys/types.h> 70 #include <sys/param.h> 71 #include <sys/sysmacros.h> 72 #include <sys/errno.h> 73 #include <sys/uio.h> 74 #include <sys/buf.h> 75 #include <sys/modctl.h> 76 #include <sys/open.h> 77 #include <sys/kmem.h> 78 #include <sys/poll.h> 79 #include <sys/conf.h> 80 #include <sys/cmn_err.h> 81 #include <sys/stat.h> 82 #include <sys/file.h> 83 #include <sys/ddi.h> 84 #include <sys/sunddi.h> 85 #include <sys/ramdisk.h> 86 #include <vm/seg_kmem.h> 87 88 /* 89 * An opaque handle where information about our set of ramdisk devices lives. 90 */ 91 static void *rd_statep; 92 93 /* 94 * Pointer to devinfo for the 'pseudo' ramdisks. Real OBP-created ramdisks 95 * get their own individual devinfo. 96 */ 97 static dev_info_t *rd_dip = NULL; 98 99 /* 100 * Global state lock. 101 */ 102 static kmutex_t rd_lock; 103 104 /* 105 * Maximum number of ramdisks supported by this driver. 106 */ 107 static uint32_t rd_max_disks = RD_DFLT_DISKS; 108 109 /* 110 * Percentage of physical memory which can be assigned to pseudo ramdisks, 111 * what that equates to in pages, and how many pages are currently assigned. 112 */ 113 static uint_t rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 114 static pgcnt_t rd_max_physmem; 115 static pgcnt_t rd_tot_physmem; 116 117 static uint_t rd_maxphys = RD_DEFAULT_MAXPHYS; 118 119 /* 120 * Is the driver busy, i.e. are there any pseudo ramdisk devices in existence? 121 */ 122 static int 123 rd_is_busy(void) 124 { 125 minor_t minor; 126 rd_devstate_t *rsp; 127 128 ASSERT(mutex_owned(&rd_lock)); 129 for (minor = 1; minor <= rd_max_disks; ++minor) { 130 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 131 rsp->rd_dip == rd_dip) { 132 return (EBUSY); 133 } 134 } 135 return (0); 136 } 137 138 /* 139 * Find the first free minor number; returns zero if there isn't one. 140 */ 141 static minor_t 142 rd_find_free_minor(void) 143 { 144 minor_t minor; 145 146 ASSERT(mutex_owned(&rd_lock)); 147 for (minor = 1; minor <= rd_max_disks; ++minor) { 148 if (ddi_get_soft_state(rd_statep, minor) == NULL) { 149 return (minor); 150 } 151 } 152 return (0); 153 } 154 155 /* 156 * Locate the rd_devstate for the named ramdisk; returns NULL if not found. 157 * Each ramdisk is identified uniquely by name, i.e. an OBP-created ramdisk 158 * cannot have the same name as a pseudo ramdisk. 159 */ 160 static rd_devstate_t * 161 rd_find_named_disk(char *name) 162 { 163 minor_t minor; 164 rd_devstate_t *rsp; 165 166 ASSERT(mutex_owned(&rd_lock)); 167 for (minor = 1; minor <= rd_max_disks; ++minor) { 168 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 169 strcmp(rsp->rd_name, name) == 0) { 170 return (rsp); 171 } 172 } 173 return (NULL); 174 } 175 176 /* 177 * Locate the rd_devstate for the real OBP-created ramdisk whose devinfo 178 * is referenced by 'dip'; returns NULL if not found (shouldn't happen). 179 */ 180 static rd_devstate_t * 181 rd_find_dip_state(dev_info_t *dip) 182 { 183 minor_t minor; 184 rd_devstate_t *rsp; 185 186 ASSERT(mutex_owned(&rd_lock)); 187 for (minor = 1; minor <= rd_max_disks; ++minor) { 188 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 189 rsp->rd_dip == dip) { 190 return (rsp); 191 } 192 } 193 return (NULL); 194 } 195 196 /* 197 * Is the ramdisk open? 198 */ 199 static int 200 rd_is_open(rd_devstate_t *rsp) 201 { 202 ASSERT(mutex_owned(&rd_lock)); 203 return (rsp->rd_chr_open || rsp->rd_blk_open || rsp->rd_lyr_open_cnt); 204 } 205 206 /* 207 * Mark the ramdisk open. 208 */ 209 static int 210 rd_opened(rd_devstate_t *rsp, int otyp) 211 { 212 ASSERT(mutex_owned(&rd_lock)); 213 switch (otyp) { 214 case OTYP_CHR: 215 rsp->rd_chr_open = 1; 216 break; 217 case OTYP_BLK: 218 rsp->rd_blk_open = 1; 219 break; 220 case OTYP_LYR: 221 rsp->rd_lyr_open_cnt++; 222 break; 223 default: 224 return (-1); 225 } 226 return (0); 227 } 228 229 /* 230 * Mark the ramdisk closed. 231 */ 232 static void 233 rd_closed(rd_devstate_t *rsp, int otyp) 234 { 235 ASSERT(mutex_owned(&rd_lock)); 236 switch (otyp) { 237 case OTYP_CHR: 238 rsp->rd_chr_open = 0; 239 break; 240 case OTYP_BLK: 241 rsp->rd_blk_open = 0; 242 break; 243 case OTYP_LYR: 244 rsp->rd_lyr_open_cnt--; 245 break; 246 default: 247 break; 248 } 249 } 250 251 static void 252 rd_init_tuneables(void) 253 { 254 char *prop, *p; 255 256 /* 257 * Ensure sanity of 'rd_max_disks', which may be tuned in ramdisk.conf. 258 */ 259 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 260 "max_disks", &prop) == DDI_PROP_SUCCESS) { 261 p = prop; 262 rd_max_disks = (uint32_t)stoi(&p); 263 ddi_prop_free(prop); 264 } 265 if (rd_max_disks >= RD_MAX_DISKS) { 266 cmn_err(CE_WARN, "ramdisk: rd_max_disks (%u) too big;" 267 " using default (%u).", rd_max_disks, RD_MAX_DISKS - 1); 268 269 rd_max_disks = RD_MAX_DISKS - 1; 270 } 271 272 /* 273 * Ensure sanity of 'rd_percent_physmem', which may be tuned 274 * in ramdisk.conf. 275 */ 276 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 277 "percent_physmem", &prop) == DDI_PROP_SUCCESS) { 278 p = prop; 279 rd_percent_physmem = (uint_t)stoi(&p); 280 ddi_prop_free(prop); 281 } 282 if (rd_percent_physmem >= 100) { 283 cmn_err(CE_WARN, "ramdisk: rd_percent_physmem (%u) >= 100;" 284 " using default (%u%%).", rd_percent_physmem, 285 RD_DEFAULT_PERCENT_PHYSMEM); 286 287 rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 288 } 289 290 /* 291 * Since availrmem_initial is a long, this won't overflow. 292 */ 293 rd_max_physmem = (availrmem_initial * rd_percent_physmem) / 100; 294 } 295 296 /* 297 * Allocate enough physical pages to hold "npages" pages. Returns an 298 * array of page_t * pointers that can later be mapped in or out via 299 * rd_{un}map_window() but is otherwise opaque, or NULL on failure. 300 */ 301 page_t ** 302 rd_phys_alloc(pgcnt_t npages) 303 { 304 page_t *pp, **ppa; 305 spgcnt_t i; 306 size_t ppalen; 307 struct seg kseg; 308 caddr_t addr; /* For coloring */ 309 310 if (rd_tot_physmem + npages > rd_max_physmem) 311 return (NULL); 312 313 if (!page_resv(npages, KM_NOSLEEP)) 314 return (NULL); 315 316 if (!page_create_wait(npages, 0)) { 317 page_unresv(npages); 318 return (NULL); 319 } 320 321 ppalen = npages * sizeof (struct page_t *); 322 ppa = kmem_zalloc(ppalen, KM_NOSLEEP); 323 if (ppa == NULL) { 324 page_create_putback(npages); 325 page_unresv(npages); 326 return (NULL); 327 } 328 329 kseg.s_as = &kas; 330 for (i = 0, addr = NULL; i < npages; ++i, addr += PAGESIZE) { 331 pp = page_get_freelist(&kvp, 0, &kseg, addr, PAGESIZE, 0, NULL); 332 if (pp == NULL) { 333 pp = page_get_cachelist(&kvp, 0, &kseg, addr, 0, NULL); 334 if (pp == NULL) 335 goto out; 336 if (!PP_ISAGED(pp)) 337 page_hashout(pp, NULL); 338 } 339 340 PP_CLRFREE(pp); 341 PP_CLRAGED(pp); 342 ppa[i] = pp; 343 } 344 345 for (i = 0; i < npages; i++) 346 page_downgrade(ppa[i]); 347 rd_tot_physmem += npages; 348 349 return (ppa); 350 351 out: 352 ASSERT(i < npages); 353 page_create_putback(npages - i); 354 while (--i >= 0) 355 page_free(ppa[i], 0); 356 kmem_free(ppa, ppalen); 357 page_unresv(npages); 358 359 return (NULL); 360 } 361 362 /* 363 * Free physical pages previously allocated via rd_phys_alloc(); note that 364 * this function may block as it has to wait until it can exclusively lock 365 * all the pages first. 366 */ 367 static void 368 rd_phys_free(page_t **ppa, pgcnt_t npages) 369 { 370 pgcnt_t i; 371 size_t ppalen = npages * sizeof (struct page_t *); 372 373 for (i = 0; i < npages; ++i) { 374 if (! page_tryupgrade(ppa[i])) { 375 page_unlock(ppa[i]); 376 while (! page_lock(ppa[i], SE_EXCL, NULL, P_RECLAIM)) 377 ; 378 } 379 page_free(ppa[i], 0); 380 } 381 382 kmem_free(ppa, ppalen); 383 384 page_unresv(npages); 385 rd_tot_physmem -= npages; 386 } 387 388 /* 389 * Remove a window mapping (if present). 390 */ 391 static void 392 rd_unmap_window(rd_devstate_t *rsp) 393 { 394 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 395 hat_unload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 396 HAT_UNLOAD_UNLOCK); 397 } 398 } 399 400 /* 401 * Map a portion of the ramdisk into the virtual window. 402 */ 403 static void 404 rd_map_window(rd_devstate_t *rsp, off_t offset) 405 { 406 pgcnt_t offpgs = btop(offset); 407 408 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 409 /* 410 * Already mapped; is offset within our window? 411 */ 412 if (offset >= rsp->rd_window_base && 413 offset < rsp->rd_window_base + rsp->rd_window_size) { 414 return; 415 } 416 417 /* 418 * No, we need to re-map; toss the old mapping. 419 */ 420 rd_unmap_window(rsp); 421 } 422 rsp->rd_window_base = ptob(offpgs); 423 424 /* 425 * Different algorithms depending on whether this is a real 426 * OBP-created ramdisk, or a pseudo ramdisk. 427 */ 428 if (rsp->rd_dip == rd_dip) { 429 pgcnt_t pi, lastpi; 430 caddr_t vaddr; 431 432 /* 433 * Find the range of pages which should be mapped. 434 */ 435 pi = offpgs; 436 lastpi = pi + btopr(rsp->rd_window_size); 437 if (lastpi > rsp->rd_npages) { 438 lastpi = rsp->rd_npages; 439 } 440 441 /* 442 * Load the mapping. 443 */ 444 vaddr = rsp->rd_window_virt; 445 for (; pi < lastpi; ++pi) { 446 hat_memload(kas.a_hat, vaddr, rsp->rd_ppa[pi], 447 (PROT_READ | PROT_WRITE) | HAT_NOSYNC, 448 HAT_LOAD_LOCK); 449 vaddr += ptob(1); 450 } 451 } else { 452 uint_t i; 453 pfn_t pfn; 454 455 /* 456 * Real OBP-created ramdisk: locate the physical range which 457 * contains this offset. 458 */ 459 for (i = 0; i < rsp->rd_nexisting; ++i) { 460 if (offset < rsp->rd_existing[i].size) { 461 break; 462 } 463 offset -= rsp->rd_existing[i].size; 464 } 465 ASSERT(i < rsp->rd_nexisting); 466 467 /* 468 * Load the mapping. 469 */ 470 pfn = btop(rsp->rd_existing[i].phys + offset); 471 hat_devload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 472 pfn, (PROT_READ | PROT_WRITE), 473 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 474 } 475 } 476 477 /* 478 * Fakes up a disk geometry, and one big partition, based on the size 479 * of the file. This is needed because we allow newfs'ing the device, 480 * and newfs will do several disk ioctls to figure out the geometry and 481 * partition information. It uses that information to determine the parameters 482 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 483 * have to support it. 484 * 485 * Stolen from lofi.c - should maybe split out common code sometime. 486 */ 487 static void 488 rd_fake_disk_geometry(rd_devstate_t *rsp) 489 { 490 /* dk_geom - see dkio(7I) */ 491 /* 492 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 493 * of sectors), but that breaks programs like fdisk which want to 494 * partition a disk by cylinder. With one cylinder, you can't create 495 * an fdisk partition and put pcfs on it for testing (hard to pick 496 * a number between one and one). 497 * 498 * The cheezy floppy test is an attempt to not have too few cylinders 499 * for a small file, or so many on a big file that you waste space 500 * for backup superblocks or cylinder group structures. 501 */ 502 if (rsp->rd_size < (2 * 1024 * 1024)) /* floppy? */ 503 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (100 * 1024); 504 else 505 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (300 * 1024); 506 /* in case file file is < 100k */ 507 if (rsp->rd_dkg.dkg_ncyl == 0) 508 rsp->rd_dkg.dkg_ncyl = 1; 509 rsp->rd_dkg.dkg_acyl = 0; 510 rsp->rd_dkg.dkg_bcyl = 0; 511 rsp->rd_dkg.dkg_nhead = 1; 512 rsp->rd_dkg.dkg_obs1 = 0; 513 rsp->rd_dkg.dkg_intrlv = 0; 514 rsp->rd_dkg.dkg_obs2 = 0; 515 rsp->rd_dkg.dkg_obs3 = 0; 516 rsp->rd_dkg.dkg_apc = 0; 517 rsp->rd_dkg.dkg_rpm = 7200; 518 rsp->rd_dkg.dkg_pcyl = rsp->rd_dkg.dkg_ncyl + rsp->rd_dkg.dkg_acyl; 519 rsp->rd_dkg.dkg_nsect = rsp->rd_size / 520 (DEV_BSIZE * rsp->rd_dkg.dkg_ncyl); 521 rsp->rd_dkg.dkg_write_reinstruct = 0; 522 rsp->rd_dkg.dkg_read_reinstruct = 0; 523 524 /* vtoc - see dkio(7I) */ 525 bzero(&rsp->rd_vtoc, sizeof (struct vtoc)); 526 rsp->rd_vtoc.v_sanity = VTOC_SANE; 527 rsp->rd_vtoc.v_version = V_VERSION; 528 bcopy(RD_DRIVER_NAME, rsp->rd_vtoc.v_volume, 7); 529 rsp->rd_vtoc.v_sectorsz = DEV_BSIZE; 530 rsp->rd_vtoc.v_nparts = 1; 531 rsp->rd_vtoc.v_part[0].p_tag = V_UNASSIGNED; 532 rsp->rd_vtoc.v_part[0].p_flag = V_UNMNT; 533 rsp->rd_vtoc.v_part[0].p_start = (daddr_t)0; 534 /* 535 * The partition size cannot just be the number of sectors, because 536 * that might not end on a cylinder boundary. And if that's the case, 537 * newfs/mkfs will print a scary warning. So just figure the size 538 * based on the number of cylinders and sectors/cylinder. 539 */ 540 rsp->rd_vtoc.v_part[0].p_size = rsp->rd_dkg.dkg_pcyl * 541 rsp->rd_dkg.dkg_nsect * rsp->rd_dkg.dkg_nhead; 542 543 /* dk_cinfo - see dkio(7I) */ 544 bzero(&rsp->rd_ci, sizeof (struct dk_cinfo)); 545 (void) strcpy(rsp->rd_ci.dki_cname, RD_DRIVER_NAME); 546 rsp->rd_ci.dki_ctype = DKC_MD; 547 rsp->rd_ci.dki_flags = 0; 548 rsp->rd_ci.dki_cnum = 0; 549 rsp->rd_ci.dki_addr = 0; 550 rsp->rd_ci.dki_space = 0; 551 rsp->rd_ci.dki_prio = 0; 552 rsp->rd_ci.dki_vec = 0; 553 (void) strcpy(rsp->rd_ci.dki_dname, RD_DRIVER_NAME); 554 rsp->rd_ci.dki_unit = 0; 555 rsp->rd_ci.dki_slave = 0; 556 rsp->rd_ci.dki_partition = 0; 557 /* 558 * newfs uses this to set maxcontig. Must not be < 16, or it 559 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 560 * it by the block size. Then tunefs doesn't work because 561 * maxcontig is 0. 562 */ 563 rsp->rd_ci.dki_maxtransfer = 16; 564 } 565 566 /* 567 * Deallocate resources (virtual and physical, device nodes, structures) 568 * from a ramdisk. 569 */ 570 static void 571 rd_dealloc_resources(rd_devstate_t *rsp) 572 { 573 dev_info_t *dip = rsp->rd_dip; 574 char namebuf[RD_NAME_LEN + 5]; 575 dev_t fulldev; 576 577 if (rsp->rd_window_virt != NULL) { 578 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 579 rd_unmap_window(rsp); 580 } 581 vmem_free(heap_arena, rsp->rd_window_virt, rsp->rd_window_size); 582 } 583 mutex_destroy(&rsp->rd_device_lock); 584 585 if (rsp->rd_existing) { 586 ddi_prop_free(rsp->rd_existing); 587 } 588 if (rsp->rd_ppa != NULL) { 589 rd_phys_free(rsp->rd_ppa, rsp->rd_npages); 590 } 591 592 /* 593 * Remove the block and raw device nodes. 594 */ 595 if (dip == rd_dip) { 596 (void) snprintf(namebuf, sizeof (namebuf), "%s", 597 rsp->rd_name); 598 ddi_remove_minor_node(dip, namebuf); 599 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 600 rsp->rd_name); 601 ddi_remove_minor_node(dip, namebuf); 602 } else { 603 ddi_remove_minor_node(dip, "a"); 604 ddi_remove_minor_node(dip, "a,raw"); 605 } 606 607 /* 608 * Remove the "Size" and "Nblocks" properties. 609 */ 610 fulldev = makedevice(ddi_driver_major(dip), rsp->rd_minor); 611 (void) ddi_prop_remove(fulldev, dip, SIZE_PROP_NAME); 612 (void) ddi_prop_remove(fulldev, dip, NBLOCKS_PROP_NAME); 613 614 if (rsp->rd_kstat) { 615 kstat_delete(rsp->rd_kstat); 616 mutex_destroy(&rsp->rd_kstat_lock); 617 } 618 619 ddi_soft_state_free(rd_statep, rsp->rd_minor); 620 } 621 622 /* 623 * Allocate resources (virtual and physical, device nodes, structures) 624 * to a ramdisk. 625 */ 626 static rd_devstate_t * 627 rd_alloc_resources(char *name, size_t size, dev_info_t *dip) 628 { 629 minor_t minor; 630 rd_devstate_t *rsp; 631 char namebuf[RD_NAME_LEN + 5]; 632 dev_t fulldev; 633 int64_t Nblocks_prop_val; 634 int64_t Size_prop_val; 635 636 minor = rd_find_free_minor(); 637 if (ddi_soft_state_zalloc(rd_statep, minor) == DDI_FAILURE) { 638 return (NULL); 639 } 640 rsp = ddi_get_soft_state(rd_statep, minor); 641 642 (void) strcpy(rsp->rd_name, name); 643 rsp->rd_dip = dip; 644 rsp->rd_minor = minor; 645 rsp->rd_size = size; 646 647 /* 648 * Allocate virtual window onto ramdisk. 649 */ 650 mutex_init(&rsp->rd_device_lock, NULL, MUTEX_DRIVER, NULL); 651 rsp->rd_window_base = RD_WINDOW_NOT_MAPPED; 652 rsp->rd_window_size = PAGESIZE; 653 rsp->rd_window_virt = vmem_alloc(heap_arena, 654 rsp->rd_window_size, VM_SLEEP); 655 if (rsp->rd_window_virt == NULL) { 656 goto create_failed; 657 } 658 659 /* 660 * Allocate physical memory for non-OBP ramdisks. 661 * Create pseudo block and raw device nodes. 662 */ 663 if (dip == rd_dip) { 664 rsp->rd_npages = btopr(size); 665 rsp->rd_ppa = rd_phys_alloc(rsp->rd_npages); 666 if (rsp->rd_ppa == NULL) { 667 goto create_failed; 668 } 669 670 /* 671 * For non-OBP ramdisks the device nodes are: 672 * 673 * /devices/pseudo/ramdisk@0:<diskname> 674 * /devices/pseudo/ramdisk@0:<diskname>,raw 675 */ 676 (void) snprintf(namebuf, sizeof (namebuf), "%s", 677 rsp->rd_name); 678 if (ddi_create_minor_node(dip, namebuf, S_IFBLK, minor, 679 DDI_PSEUDO, 0) == DDI_FAILURE) { 680 goto create_failed; 681 } 682 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 683 rsp->rd_name); 684 if (ddi_create_minor_node(dip, namebuf, S_IFCHR, minor, 685 DDI_PSEUDO, 0) == DDI_FAILURE) { 686 goto create_failed; 687 } 688 } else { 689 /* 690 * For OBP-created ramdisks the device nodes are: 691 * 692 * /devices/ramdisk-<diskname>:a 693 * /devices/ramdisk-<diskname>:a,raw 694 */ 695 if (ddi_create_minor_node(dip, "a", S_IFBLK, minor, 696 DDI_PSEUDO, 0) == DDI_FAILURE) { 697 goto create_failed; 698 } 699 if (ddi_create_minor_node(dip, "a,raw", S_IFCHR, minor, 700 DDI_PSEUDO, 0) == DDI_FAILURE) { 701 goto create_failed; 702 } 703 } 704 705 /* 706 * Create the "Size" and "Nblocks" properties. 707 */ 708 fulldev = makedevice(ddi_driver_major(dip), minor); 709 Size_prop_val = size; 710 if ((ddi_prop_update_int64(fulldev, dip, 711 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 712 goto create_failed; 713 } 714 Nblocks_prop_val = size / DEV_BSIZE; 715 if ((ddi_prop_update_int64(fulldev, dip, 716 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 717 goto create_failed; 718 } 719 720 /* 721 * Allocate kstat stuff. 722 */ 723 rsp->rd_kstat = kstat_create(RD_DRIVER_NAME, minor, NULL, 724 "disk", KSTAT_TYPE_IO, 1, 0); 725 if (rsp->rd_kstat) { 726 mutex_init(&rsp->rd_kstat_lock, NULL, 727 MUTEX_DRIVER, NULL); 728 rsp->rd_kstat->ks_lock = &rsp->rd_kstat_lock; 729 kstat_install(rsp->rd_kstat); 730 } 731 732 rd_fake_disk_geometry(rsp); 733 734 return (rsp); 735 736 create_failed: 737 /* 738 * Cleanup. 739 */ 740 rd_dealloc_resources(rsp); 741 742 return (NULL); 743 } 744 745 /* 746 * Undo what we did in rd_attach, freeing resources and removing things which 747 * we installed. The system framework guarantees we are not active with this 748 * devinfo node in any other entry points at this time. 749 */ 750 static int 751 rd_common_detach(dev_info_t *dip) 752 { 753 if (dip == rd_dip) { 754 /* 755 * Pseudo node: can't detach if any pseudo ramdisks exist. 756 */ 757 if (rd_is_busy()) { 758 return (DDI_FAILURE); 759 } 760 ddi_soft_state_free(rd_statep, RD_CTL_MINOR); 761 rd_dip = NULL; 762 } else { 763 /* 764 * A 'real' ramdisk; find the state and free resources. 765 */ 766 rd_devstate_t *rsp; 767 768 if ((rsp = rd_find_dip_state(dip)) != NULL) { 769 rd_dealloc_resources(rsp); 770 } 771 } 772 ddi_remove_minor_node(dip, NULL); 773 774 return (DDI_SUCCESS); 775 } 776 777 static int 778 rd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 779 { 780 char *name; 781 rd_existing_t *ep = NULL; 782 uint_t nep, i; 783 size_t size = 0; 784 rd_devstate_t *rsp; 785 786 switch (cmd) { 787 788 case DDI_ATTACH: 789 mutex_enter(&rd_lock); 790 791 /* 792 * For pseudo ramdisk devinfo set up state 0 and :ctl device; 793 * else it's an OBP-created ramdisk. 794 */ 795 if (is_pseudo_device(dip)) { 796 rd_dip = dip; 797 rd_init_tuneables(); 798 799 /* 800 * The zeroth minor is reserved for the ramdisk 801 * 'control' device. 802 */ 803 if (ddi_soft_state_zalloc(rd_statep, RD_CTL_MINOR) == 804 DDI_FAILURE) { 805 goto attach_failed; 806 } 807 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 808 rsp->rd_dip = dip; 809 810 if (ddi_create_minor_node(dip, RD_CTL_NODE, 811 S_IFCHR, 0, DDI_PSEUDO, NULL) == DDI_FAILURE) { 812 goto attach_failed; 813 } 814 } else { 815 RD_STRIP_PREFIX(name, ddi_node_name(dip)); 816 817 if (strlen(name) > RD_NAME_LEN) { 818 cmn_err(CE_CONT, 819 "%s: name too long - ignoring\n", name); 820 goto attach_failed; 821 } 822 823 /* 824 * An OBP-created ramdisk must have an 'existing' 825 * property; get and check it. 826 */ 827 if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, dip, 828 DDI_PROP_DONTPASS, RD_EXISTING_PROP_NAME, 829 (uchar_t **)&ep, &nep) != DDI_SUCCESS) { 830 cmn_err(CE_CONT, 831 "%s: " RD_EXISTING_PROP_NAME 832 " property missing\n", name); 833 goto attach_failed; 834 } 835 if (nep == 0 || (nep % sizeof (*ep)) != 0) { 836 cmn_err(CE_CONT, 837 "%s: " RD_EXISTING_PROP_NAME 838 " illegal size\n", name); 839 goto attach_failed; 840 } 841 nep /= sizeof (*ep); 842 843 /* 844 * Calculate the size of the ramdisk. 845 */ 846 for (i = 0; i < nep; ++i) { 847 size += ep[i].size; 848 } 849 850 /* 851 * Allocate driver resources for the ramdisk. 852 */ 853 if ((rsp = rd_alloc_resources(name, size, 854 dip)) == NULL) { 855 goto attach_failed; 856 } 857 858 rsp->rd_existing = ep; 859 rsp->rd_nexisting = nep; 860 } 861 862 mutex_exit(&rd_lock); 863 864 ddi_report_dev(dip); 865 866 return (DDI_SUCCESS); 867 868 case DDI_RESUME: 869 return (DDI_SUCCESS); 870 871 default: 872 return (DDI_FAILURE); 873 } 874 875 attach_failed: 876 /* 877 * Use our common detach routine to unallocate any stuff which 878 * was allocated above. 879 */ 880 (void) rd_common_detach(dip); 881 mutex_exit(&rd_lock); 882 883 if (ep != NULL) { 884 ddi_prop_free(ep); 885 } 886 return (DDI_FAILURE); 887 } 888 889 static int 890 rd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 891 { 892 int e; 893 894 switch (cmd) { 895 896 case DDI_DETACH: 897 mutex_enter(&rd_lock); 898 e = rd_common_detach(dip); 899 mutex_exit(&rd_lock); 900 901 return (e); 902 903 case DDI_SUSPEND: 904 return (DDI_SUCCESS); 905 906 default: 907 return (DDI_FAILURE); 908 } 909 } 910 911 /*ARGSUSED*/ 912 static int 913 rd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 914 { 915 rd_devstate_t *rsp; 916 917 switch (infocmd) { 918 case DDI_INFO_DEVT2DEVINFO: 919 if ((rsp = ddi_get_soft_state(rd_statep, 920 getminor((dev_t)arg))) != NULL) { 921 *result = rsp->rd_dip; 922 return (DDI_SUCCESS); 923 } 924 *result = NULL; 925 return (DDI_FAILURE); 926 927 case DDI_INFO_DEVT2INSTANCE: 928 if ((rsp = ddi_get_soft_state(rd_statep, 929 getminor((dev_t)arg))) != NULL) { 930 *result = (void *)(uintptr_t) 931 ddi_get_instance(rsp->rd_dip); 932 return (DDI_SUCCESS); 933 } 934 *result = NULL; 935 return (DDI_FAILURE); 936 937 default: 938 return (DDI_FAILURE); 939 } 940 } 941 942 /*ARGSUSED3*/ 943 static int 944 rd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 945 { 946 minor_t minor; 947 rd_devstate_t *rsp; 948 949 mutex_enter(&rd_lock); 950 951 minor = getminor(*devp); 952 if (minor == RD_CTL_MINOR) { 953 /* 954 * Master control device; must be opened exclusively. 955 */ 956 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) { 957 mutex_exit(&rd_lock); 958 return (EINVAL); 959 } 960 961 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 962 if (rsp == NULL) { 963 mutex_exit(&rd_lock); 964 return (ENXIO); 965 } 966 967 if (rd_is_open(rsp)) { 968 mutex_exit(&rd_lock); 969 return (EBUSY); 970 } 971 (void) rd_opened(rsp, OTYP_CHR); 972 973 mutex_exit(&rd_lock); 974 975 return (0); 976 } 977 978 rsp = ddi_get_soft_state(rd_statep, minor); 979 if (rsp == NULL) { 980 mutex_exit(&rd_lock); 981 return (ENXIO); 982 } 983 984 if (rd_opened(rsp, otyp) == -1) { 985 mutex_exit(&rd_lock); 986 return (EINVAL); 987 } 988 989 mutex_exit(&rd_lock); 990 return (0); 991 } 992 993 /*ARGSUSED*/ 994 static int 995 rd_close(dev_t dev, int flag, int otyp, struct cred *credp) 996 { 997 minor_t minor; 998 rd_devstate_t *rsp; 999 1000 mutex_enter(&rd_lock); 1001 1002 minor = getminor(dev); 1003 1004 rsp = ddi_get_soft_state(rd_statep, minor); 1005 if (rsp == NULL) { 1006 mutex_exit(&rd_lock); 1007 return (EINVAL); 1008 } 1009 1010 rd_closed(rsp, otyp); 1011 1012 mutex_exit(&rd_lock); 1013 1014 return (0); 1015 } 1016 1017 static void 1018 rd_minphys(struct buf *bp) 1019 { 1020 if (bp->b_bcount > rd_maxphys) { 1021 bp->b_bcount = rd_maxphys; 1022 } 1023 } 1024 1025 static void 1026 rd_rw(rd_devstate_t *rsp, struct buf *bp, offset_t offset, size_t nbytes) 1027 { 1028 int reading = bp->b_flags & B_READ; 1029 caddr_t buf_addr; 1030 1031 bp_mapin(bp); 1032 buf_addr = bp->b_un.b_addr; 1033 1034 while (nbytes > 0) { 1035 offset_t off_in_window; 1036 size_t rem_in_window, copy_bytes; 1037 caddr_t raddr; 1038 1039 mutex_enter(&rsp->rd_device_lock); 1040 rd_map_window(rsp, offset); 1041 1042 off_in_window = offset - rsp->rd_window_base; 1043 rem_in_window = rsp->rd_window_size - off_in_window; 1044 1045 raddr = rsp->rd_window_virt + off_in_window; 1046 copy_bytes = MIN(nbytes, rem_in_window); 1047 1048 if (reading) { 1049 (void) bcopy(raddr, buf_addr, copy_bytes); 1050 } else { 1051 (void) bcopy(buf_addr, raddr, copy_bytes); 1052 } 1053 mutex_exit(&rsp->rd_device_lock); 1054 1055 offset += copy_bytes; 1056 buf_addr += copy_bytes; 1057 nbytes -= copy_bytes; 1058 } 1059 } 1060 1061 static int 1062 rd_strategy(struct buf *bp) 1063 { 1064 rd_devstate_t *rsp; 1065 offset_t offset; 1066 1067 rsp = ddi_get_soft_state(rd_statep, getminor(bp->b_edev)); 1068 offset = bp->b_blkno * DEV_BSIZE; 1069 1070 if (rsp == NULL) { 1071 bp->b_error = ENXIO; 1072 bp->b_flags |= B_ERROR; 1073 } else if (offset >= rsp->rd_size) { 1074 bp->b_error = EINVAL; 1075 bp->b_flags |= B_ERROR; 1076 } else { 1077 size_t nbytes; 1078 1079 if (rsp->rd_kstat) { 1080 mutex_enter(rsp->rd_kstat->ks_lock); 1081 kstat_runq_enter(KSTAT_IO_PTR(rsp->rd_kstat)); 1082 mutex_exit(rsp->rd_kstat->ks_lock); 1083 } 1084 1085 nbytes = min(bp->b_bcount, rsp->rd_size - offset); 1086 1087 rd_rw(rsp, bp, offset, nbytes); 1088 1089 bp->b_resid = bp->b_bcount - nbytes; 1090 1091 if (rsp->rd_kstat) { 1092 kstat_io_t *kioptr; 1093 1094 mutex_enter(rsp->rd_kstat->ks_lock); 1095 kioptr = KSTAT_IO_PTR(rsp->rd_kstat); 1096 if (bp->b_flags & B_READ) { 1097 kioptr->nread += nbytes; 1098 kioptr->reads++; 1099 } else { 1100 kioptr->nwritten += nbytes; 1101 kioptr->writes++; 1102 } 1103 kstat_runq_exit(kioptr); 1104 mutex_exit(rsp->rd_kstat->ks_lock); 1105 } 1106 } 1107 1108 biodone(bp); 1109 return (0); 1110 } 1111 1112 /*ARGSUSED*/ 1113 static int 1114 rd_read(dev_t dev, struct uio *uiop, cred_t *credp) 1115 { 1116 rd_devstate_t *rsp; 1117 1118 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1119 1120 if (uiop->uio_offset >= rsp->rd_size) 1121 return (EINVAL); 1122 1123 return (physio(rd_strategy, NULL, dev, B_READ, rd_minphys, uiop)); 1124 } 1125 1126 /*ARGSUSED*/ 1127 static int 1128 rd_write(dev_t dev, register struct uio *uiop, cred_t *credp) 1129 { 1130 rd_devstate_t *rsp; 1131 1132 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1133 1134 if (uiop->uio_offset >= rsp->rd_size) 1135 return (EINVAL); 1136 1137 return (physio(rd_strategy, NULL, dev, B_WRITE, rd_minphys, uiop)); 1138 } 1139 1140 /*ARGSUSED*/ 1141 static int 1142 rd_create_disk(dev_t dev, struct rd_ioctl *urip, int mode, int *rvalp) 1143 { 1144 struct rd_ioctl kri; 1145 size_t size; 1146 rd_devstate_t *rsp; 1147 1148 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1149 return (EFAULT); 1150 } 1151 1152 kri.ri_name[RD_NAME_LEN] = '\0'; 1153 1154 size = kri.ri_size; 1155 if (size == 0) { 1156 return (EINVAL); 1157 } 1158 size = ptob(btopr(size)); 1159 1160 mutex_enter(&rd_lock); 1161 1162 if (rd_find_named_disk(kri.ri_name) != NULL) { 1163 mutex_exit(&rd_lock); 1164 return (EEXIST); 1165 } 1166 1167 rsp = rd_alloc_resources(kri.ri_name, size, rd_dip); 1168 if (rsp == NULL) { 1169 mutex_exit(&rd_lock); 1170 return (EAGAIN); 1171 } 1172 1173 mutex_exit(&rd_lock); 1174 1175 return (ddi_copyout(&kri, urip, sizeof (kri), mode) == -1 ? EFAULT : 0); 1176 } 1177 1178 /*ARGSUSED*/ 1179 static int 1180 rd_delete_disk(dev_t dev, struct rd_ioctl *urip, int mode) 1181 { 1182 struct rd_ioctl kri; 1183 rd_devstate_t *rsp; 1184 1185 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1186 return (EFAULT); 1187 } 1188 1189 kri.ri_name[RD_NAME_LEN] = '\0'; 1190 1191 mutex_enter(&rd_lock); 1192 1193 rsp = rd_find_named_disk(kri.ri_name); 1194 if (rsp == NULL || rsp->rd_dip != rd_dip) { 1195 mutex_exit(&rd_lock); 1196 return (EINVAL); 1197 } 1198 if (rd_is_open(rsp)) { 1199 mutex_exit(&rd_lock); 1200 return (EBUSY); 1201 } 1202 1203 rd_dealloc_resources(rsp); 1204 1205 mutex_exit(&rd_lock); 1206 1207 return (0); 1208 } 1209 1210 /*ARGSUSED*/ 1211 static int 1212 rd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1213 { 1214 minor_t minor; 1215 int error; 1216 enum dkio_state dkstate; 1217 rd_devstate_t *rsp; 1218 1219 minor = getminor(dev); 1220 1221 /* 1222 * Ramdisk ioctls only apply to the master device. 1223 */ 1224 if (minor == RD_CTL_MINOR) { 1225 struct rd_ioctl *rip = (struct rd_ioctl *)arg; 1226 1227 /* 1228 * The query commands only need read-access - i.e., normal 1229 * users are allowed to do those on the controlling device 1230 * as long as they can open it read-only. 1231 */ 1232 switch (cmd) { 1233 case RD_CREATE_DISK: 1234 if ((mode & FWRITE) == 0) 1235 return (EPERM); 1236 return (rd_create_disk(dev, rip, mode, rvalp)); 1237 1238 case RD_DELETE_DISK: 1239 if ((mode & FWRITE) == 0) 1240 return (EPERM); 1241 return (rd_delete_disk(dev, rip, mode)); 1242 1243 default: 1244 return (EINVAL); 1245 } 1246 } 1247 1248 rsp = ddi_get_soft_state(rd_statep, minor); 1249 if (rsp == NULL) { 1250 return (ENXIO); 1251 } 1252 1253 /* 1254 * These are for faking out utilities like newfs. 1255 */ 1256 switch (cmd) { 1257 case DKIOCGVTOC: 1258 switch (ddi_model_convert_from(mode & FMODELS)) { 1259 case DDI_MODEL_ILP32: { 1260 struct vtoc32 vtoc32; 1261 1262 vtoctovtoc32(rsp->rd_vtoc, vtoc32); 1263 if (ddi_copyout(&vtoc32, (void *)arg, 1264 sizeof (struct vtoc32), mode)) 1265 return (EFAULT); 1266 } 1267 break; 1268 1269 case DDI_MODEL_NONE: 1270 if (ddi_copyout(&rsp->rd_vtoc, (void *)arg, 1271 sizeof (struct vtoc), mode)) 1272 return (EFAULT); 1273 break; 1274 } 1275 return (0); 1276 case DKIOCINFO: 1277 error = ddi_copyout(&rsp->rd_ci, (void *)arg, 1278 sizeof (struct dk_cinfo), mode); 1279 if (error) 1280 return (EFAULT); 1281 return (0); 1282 case DKIOCG_VIRTGEOM: 1283 case DKIOCG_PHYGEOM: 1284 case DKIOCGGEOM: 1285 error = ddi_copyout(&rsp->rd_dkg, (void *)arg, 1286 sizeof (struct dk_geom), mode); 1287 if (error) 1288 return (EFAULT); 1289 return (0); 1290 case DKIOCSTATE: 1291 /* the file is always there */ 1292 dkstate = DKIO_INSERTED; 1293 error = ddi_copyout(&dkstate, (void *)arg, 1294 sizeof (enum dkio_state), mode); 1295 if (error) 1296 return (EFAULT); 1297 return (0); 1298 default: 1299 return (ENOTTY); 1300 } 1301 } 1302 1303 1304 static struct cb_ops rd_cb_ops = { 1305 rd_open, 1306 rd_close, 1307 rd_strategy, 1308 nodev, 1309 nodev, /* dump */ 1310 rd_read, 1311 rd_write, 1312 rd_ioctl, 1313 nodev, /* devmap */ 1314 nodev, /* mmap */ 1315 nodev, /* segmap */ 1316 nochpoll, /* poll */ 1317 ddi_prop_op, 1318 NULL, 1319 D_NEW | D_MP 1320 }; 1321 1322 static struct dev_ops rd_ops = { 1323 DEVO_REV, 1324 0, 1325 rd_getinfo, 1326 nulldev, /* identify */ 1327 nulldev, /* probe */ 1328 rd_attach, 1329 rd_detach, 1330 nodev, /* reset */ 1331 &rd_cb_ops, 1332 (struct bus_ops *)0 1333 }; 1334 1335 1336 extern struct mod_ops mod_driverops; 1337 1338 static struct modldrv modldrv = { 1339 &mod_driverops, 1340 "ramdisk driver v%I%", 1341 &rd_ops 1342 }; 1343 1344 static struct modlinkage modlinkage = { 1345 MODREV_1, 1346 &modldrv, 1347 0 1348 }; 1349 1350 int 1351 _init(void) 1352 { 1353 int e; 1354 1355 if ((e = ddi_soft_state_init(&rd_statep, 1356 sizeof (rd_devstate_t), 0)) != 0) { 1357 return (e); 1358 } 1359 1360 mutex_init(&rd_lock, NULL, MUTEX_DRIVER, NULL); 1361 1362 if ((e = mod_install(&modlinkage)) != 0) { 1363 mutex_destroy(&rd_lock); 1364 ddi_soft_state_fini(&rd_statep); 1365 } 1366 1367 return (e); 1368 } 1369 1370 int 1371 _fini(void) 1372 { 1373 int e; 1374 1375 if ((e = mod_remove(&modlinkage)) != 0) { 1376 return (e); 1377 } 1378 1379 ddi_soft_state_fini(&rd_statep); 1380 mutex_destroy(&rd_lock); 1381 1382 return (e); 1383 } 1384 1385 int 1386 _info(struct modinfo *modinfop) 1387 { 1388 return (mod_info(&modlinkage, modinfop)); 1389 } 1390