1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Ramdisk device driver. 31 * 32 * There are two types of ramdisk: 'real' OBP-created ramdisks, and 'pseudo' 33 * ramdisks created at runtime with no corresponding OBP device node. The 34 * ramdisk(7D) driver is capable of dealing with both, and with the creation 35 * and deletion of 'pseudo' ramdisks. 36 * 37 * Every ramdisk has a single 'state' structure which maintains data for 38 * that ramdisk, and is assigned a single minor number. The bottom 10-bits 39 * of the minor number index the state structures; the top 8-bits give a 40 * 'real OBP disk' number, i.e. they are zero for 'pseudo' ramdisks. Thus 41 * it is possible to distinguish 'real' from 'pseudo' ramdisks using the 42 * top 8-bits of the minor number. 43 * 44 * Each OBP-created ramdisk has its own node in the device tree with an 45 * "existing" property which describes the one-or-more physical address ranges 46 * assigned to the ramdisk. All 'pseudo' ramdisks share a common devinfo 47 * structure. 48 * 49 * A single character device node is used by ramdiskadm(1M) to communicate 50 * with the ramdisk driver, with minor number 0: 51 * 52 * /dev/ramdiskctl -> /devices/pseudo/ramdisk@0:ctl 53 * 54 * For consistent access, block and raw device nodes are created for *every* 55 * ramdisk. For 'pseudo' ramdisks: 56 * 57 * /dev/ramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname> 58 * /dev/rramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname>,raw 59 * 60 * For OBP-created ramdisks: 61 * 62 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a 63 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a,raw 64 * 65 * This allows the transition from the standalone to the kernel to proceed 66 * when booting from a ramdisk, and for the installation to correctly identify 67 * the root device. 68 */ 69 70 #include <sys/types.h> 71 #include <sys/param.h> 72 #include <sys/sysmacros.h> 73 #include <sys/errno.h> 74 #include <sys/uio.h> 75 #include <sys/buf.h> 76 #include <sys/modctl.h> 77 #include <sys/open.h> 78 #include <sys/kmem.h> 79 #include <sys/poll.h> 80 #include <sys/conf.h> 81 #include <sys/cmn_err.h> 82 #include <sys/stat.h> 83 #include <sys/file.h> 84 #include <sys/vol.h> 85 #include <sys/ddi.h> 86 #include <sys/sunddi.h> 87 #include <sys/ramdisk.h> 88 #include <vm/seg_kmem.h> 89 90 /* 91 * An opaque handle where information about our set of ramdisk devices lives. 92 */ 93 static void *rd_statep; 94 95 /* 96 * Pointer to devinfo for the 'pseudo' ramdisks. Real OBP-created ramdisks 97 * get their own individual devinfo. 98 */ 99 static dev_info_t *rd_dip = NULL; 100 101 /* 102 * Global state lock. 103 */ 104 static kmutex_t rd_lock; 105 106 /* 107 * Maximum number of ramdisks supported by this driver. 108 */ 109 static uint32_t rd_max_disks = RD_DFLT_DISKS; 110 111 /* 112 * Percentage of physical memory which can be assigned to pseudo ramdisks, 113 * what that equates to in pages, and how many pages are currently assigned. 114 */ 115 static uint_t rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 116 static pgcnt_t rd_max_physmem; 117 static pgcnt_t rd_tot_physmem; 118 119 static uint_t rd_maxphys = RD_DEFAULT_MAXPHYS; 120 121 /* 122 * Is the driver busy, i.e. are there any pseudo ramdisk devices in existence? 123 */ 124 static int 125 rd_is_busy(void) 126 { 127 minor_t minor; 128 rd_devstate_t *rsp; 129 130 ASSERT(mutex_owned(&rd_lock)); 131 for (minor = 1; minor <= rd_max_disks; ++minor) { 132 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 133 rsp->rd_dip == rd_dip) { 134 return (EBUSY); 135 } 136 } 137 return (0); 138 } 139 140 /* 141 * Find the first free minor number; returns zero if there isn't one. 142 */ 143 static minor_t 144 rd_find_free_minor(void) 145 { 146 minor_t minor; 147 148 ASSERT(mutex_owned(&rd_lock)); 149 for (minor = 1; minor <= rd_max_disks; ++minor) { 150 if (ddi_get_soft_state(rd_statep, minor) == NULL) { 151 return (minor); 152 } 153 } 154 return (0); 155 } 156 157 /* 158 * Locate the rd_devstate for the named ramdisk; returns NULL if not found. 159 * Each ramdisk is identified uniquely by name, i.e. an OBP-created ramdisk 160 * cannot have the same name as a pseudo ramdisk. 161 */ 162 static rd_devstate_t * 163 rd_find_named_disk(char *name) 164 { 165 minor_t minor; 166 rd_devstate_t *rsp; 167 168 ASSERT(mutex_owned(&rd_lock)); 169 for (minor = 1; minor <= rd_max_disks; ++minor) { 170 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 171 strcmp(rsp->rd_name, name) == 0) { 172 return (rsp); 173 } 174 } 175 return (NULL); 176 } 177 178 /* 179 * Locate the rd_devstate for the real OBP-created ramdisk whose devinfo 180 * is referenced by 'dip'; returns NULL if not found (shouldn't happen). 181 */ 182 static rd_devstate_t * 183 rd_find_dip_state(dev_info_t *dip) 184 { 185 minor_t minor; 186 rd_devstate_t *rsp; 187 188 ASSERT(mutex_owned(&rd_lock)); 189 for (minor = 1; minor <= rd_max_disks; ++minor) { 190 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 191 rsp->rd_dip == dip) { 192 return (rsp); 193 } 194 } 195 return (NULL); 196 } 197 198 /* 199 * Is the ramdisk open? 200 */ 201 static int 202 rd_is_open(rd_devstate_t *rsp) 203 { 204 ASSERT(mutex_owned(&rd_lock)); 205 return (rsp->rd_chr_open || rsp->rd_blk_open || rsp->rd_lyr_open_cnt); 206 } 207 208 /* 209 * Mark the ramdisk open. 210 */ 211 static int 212 rd_opened(rd_devstate_t *rsp, int otyp) 213 { 214 ASSERT(mutex_owned(&rd_lock)); 215 switch (otyp) { 216 case OTYP_CHR: 217 rsp->rd_chr_open = 1; 218 break; 219 case OTYP_BLK: 220 rsp->rd_blk_open = 1; 221 break; 222 case OTYP_LYR: 223 rsp->rd_lyr_open_cnt++; 224 break; 225 default: 226 return (-1); 227 } 228 return (0); 229 } 230 231 /* 232 * Mark the ramdisk closed. 233 */ 234 static void 235 rd_closed(rd_devstate_t *rsp, int otyp) 236 { 237 ASSERT(mutex_owned(&rd_lock)); 238 switch (otyp) { 239 case OTYP_CHR: 240 rsp->rd_chr_open = 0; 241 break; 242 case OTYP_BLK: 243 rsp->rd_blk_open = 0; 244 break; 245 case OTYP_LYR: 246 rsp->rd_lyr_open_cnt--; 247 break; 248 default: 249 break; 250 } 251 } 252 253 static void 254 rd_init_tuneables(void) 255 { 256 char *prop, *p; 257 258 /* 259 * Ensure sanity of 'rd_max_disks', which may be tuned in ramdisk.conf. 260 */ 261 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 262 "max_disks", &prop) == DDI_PROP_SUCCESS) { 263 p = prop; 264 rd_max_disks = (uint32_t)stoi(&p); 265 ddi_prop_free(prop); 266 } 267 if (rd_max_disks >= RD_MAX_DISKS) { 268 cmn_err(CE_WARN, "ramdisk: rd_max_disks (%u) too big;" 269 " using default (%u).", rd_max_disks, RD_MAX_DISKS - 1); 270 271 rd_max_disks = RD_MAX_DISKS - 1; 272 } 273 274 /* 275 * Ensure sanity of 'rd_percent_physmem', which may be tuned 276 * in ramdisk.conf. 277 */ 278 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 279 "percent_physmem", &prop) == DDI_PROP_SUCCESS) { 280 p = prop; 281 rd_percent_physmem = (uint_t)stoi(&p); 282 ddi_prop_free(prop); 283 } 284 if (rd_percent_physmem >= 100) { 285 cmn_err(CE_WARN, "ramdisk: rd_percent_physmem (%u) >= 100;" 286 " using default (%u%%).", rd_percent_physmem, 287 RD_DEFAULT_PERCENT_PHYSMEM); 288 289 rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 290 } 291 292 /* 293 * Since availrmem is in pages (and is a long), this won't overflow. 294 */ 295 rd_max_physmem = (availrmem * rd_percent_physmem) / 100; 296 } 297 298 /* 299 * Allocate enough physical pages to hold `size' bytes. Returns an 300 * array of page_t * pointers that can later be mapped in or out via 301 * rd_{un}map_window() but is otherwise opaque, or NULL on failure. 302 * 303 * This code stolen from the NCA driver. 304 */ 305 page_t ** 306 rd_phys_alloc(pgcnt_t npages) 307 { 308 page_t *pp, **ppa; 309 pgcnt_t i; 310 size_t ppalen = npages * sizeof (struct page_t *); 311 struct seg kseg; 312 char *addr; /* For the purposes of coloring */ 313 314 if (rd_tot_physmem + npages > rd_max_physmem) { 315 return (NULL); 316 } 317 ppa = kmem_zalloc(ppalen, KM_SLEEP); 318 (void) page_resv(npages, KM_SLEEP); 319 320 for (i = 0, addr = NULL; i < npages; ++i, addr += PAGESIZE) { 321 if (!page_create_wait(1, KM_SLEEP)) { 322 goto out; 323 } 324 325 kseg.s_as = &kas; 326 327 if ((pp = page_get_freelist(&kvp, 0, &kseg, addr, PAGESIZE, 328 KM_SLEEP, NULL)) == NULL) { 329 if ((pp = page_get_cachelist(&kvp, 0, &kseg, addr, 330 KM_SLEEP, NULL)) == NULL) { 331 goto out; 332 } 333 if (PP_ISAGED(pp) == 0) { 334 page_hashout(pp, NULL); 335 } 336 } 337 338 PP_CLRFREE(pp); 339 PP_CLRAGED(pp); 340 ppa[i] = pp; 341 page_downgrade(pp); 342 } 343 rd_tot_physmem += npages; 344 345 return (ppa); 346 out: 347 for (i = 0; ppa[i] != NULL && i < npages; ++i) { 348 page_free(ppa[i], 0); 349 } 350 351 page_create_putback(i); 352 kmem_free(ppa, ppalen); 353 354 page_unresv(npages); 355 356 return (NULL); 357 } 358 359 /* 360 * Free physical pages previously allocated via rd_phys_alloc(); note that 361 * this function may block as it has to wait until it can exclusively lock 362 * all the pages first. 363 */ 364 static void 365 rd_phys_free(page_t **ppa, pgcnt_t npages) 366 { 367 pgcnt_t i; 368 size_t ppalen = npages * sizeof (struct page_t *); 369 370 for (i = 0; i < npages; ++i) { 371 if (! page_tryupgrade(ppa[i])) { 372 page_unlock(ppa[i]); 373 while (! page_lock(ppa[i], SE_EXCL, NULL, P_RECLAIM)) 374 ; 375 } 376 page_free(ppa[i], 0); 377 } 378 379 kmem_free(ppa, ppalen); 380 381 page_unresv(npages); 382 rd_tot_physmem -= npages; 383 } 384 385 /* 386 * Remove a window mapping (if present). 387 */ 388 static void 389 rd_unmap_window(rd_devstate_t *rsp) 390 { 391 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 392 hat_unload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 393 HAT_UNLOAD_UNLOCK); 394 } 395 } 396 397 /* 398 * Map a portion of the ramdisk into the virtual window. 399 */ 400 static void 401 rd_map_window(rd_devstate_t *rsp, off_t offset) 402 { 403 pgcnt_t offpgs = btop(offset); 404 405 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 406 /* 407 * Already mapped; is offset within our window? 408 */ 409 if (offset >= rsp->rd_window_base && 410 offset < rsp->rd_window_base + rsp->rd_window_size) { 411 return; 412 } 413 414 /* 415 * No, we need to re-map; toss the old mapping. 416 */ 417 rd_unmap_window(rsp); 418 } 419 rsp->rd_window_base = ptob(offpgs); 420 421 /* 422 * Different algorithms depending on whether this is a real 423 * OBP-created ramdisk, or a pseudo ramdisk. 424 */ 425 if (rsp->rd_dip == rd_dip) { 426 pgcnt_t pi, lastpi; 427 caddr_t vaddr; 428 429 /* 430 * Find the range of pages which should be mapped. 431 */ 432 pi = offpgs; 433 lastpi = pi + btopr(rsp->rd_window_size); 434 if (lastpi > rsp->rd_npages) { 435 lastpi = rsp->rd_npages; 436 } 437 438 /* 439 * Load the mapping. 440 */ 441 vaddr = rsp->rd_window_virt; 442 for (; pi < lastpi; ++pi) { 443 hat_memload(kas.a_hat, vaddr, rsp->rd_ppa[pi], 444 (PROT_READ | PROT_WRITE) | HAT_NOSYNC, 445 HAT_LOAD_LOCK); 446 vaddr += ptob(1); 447 } 448 } else { 449 uint_t i; 450 pfn_t pfn; 451 452 /* 453 * Real OBP-created ramdisk: locate the physical range which 454 * contains this offset. 455 */ 456 for (i = 0; i < rsp->rd_nexisting; ++i) { 457 if (offset < rsp->rd_existing[i].size) { 458 break; 459 } 460 offset -= rsp->rd_existing[i].size; 461 } 462 ASSERT(i < rsp->rd_nexisting); 463 464 /* 465 * Load the mapping. 466 */ 467 pfn = btop(rsp->rd_existing[i].phys + offset); 468 hat_devload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 469 pfn, (PROT_READ | PROT_WRITE), 470 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 471 } 472 } 473 474 /* 475 * Fakes up a disk geometry, and one big partition, based on the size 476 * of the file. This is needed because we allow newfs'ing the device, 477 * and newfs will do several disk ioctls to figure out the geometry and 478 * partition information. It uses that information to determine the parameters 479 * to pass to mkfs. Geometry is pretty much irrelevent these days, but we 480 * have to support it. 481 * 482 * Stolen from lofi.c - should maybe split out common code sometime. 483 */ 484 static void 485 rd_fake_disk_geometry(rd_devstate_t *rsp) 486 { 487 /* dk_geom - see dkio(7I) */ 488 /* 489 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 490 * of sectors), but that breaks programs like fdisk which want to 491 * partition a disk by cylinder. With one cylinder, you can't create 492 * an fdisk partition and put pcfs on it for testing (hard to pick 493 * a number between one and one). 494 * 495 * The cheezy floppy test is an attempt to not have too few cylinders 496 * for a small file, or so many on a big file that you waste space 497 * for backup superblocks or cylinder group structures. 498 */ 499 if (rsp->rd_size < (2 * 1024 * 1024)) /* floppy? */ 500 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (100 * 1024); 501 else 502 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (300 * 1024); 503 /* in case file file is < 100k */ 504 if (rsp->rd_dkg.dkg_ncyl == 0) 505 rsp->rd_dkg.dkg_ncyl = 1; 506 rsp->rd_dkg.dkg_acyl = 0; 507 rsp->rd_dkg.dkg_bcyl = 0; 508 rsp->rd_dkg.dkg_nhead = 1; 509 rsp->rd_dkg.dkg_obs1 = 0; 510 rsp->rd_dkg.dkg_intrlv = 0; 511 rsp->rd_dkg.dkg_obs2 = 0; 512 rsp->rd_dkg.dkg_obs3 = 0; 513 rsp->rd_dkg.dkg_apc = 0; 514 rsp->rd_dkg.dkg_rpm = 7200; 515 rsp->rd_dkg.dkg_pcyl = rsp->rd_dkg.dkg_ncyl + rsp->rd_dkg.dkg_acyl; 516 rsp->rd_dkg.dkg_nsect = rsp->rd_size / 517 (DEV_BSIZE * rsp->rd_dkg.dkg_ncyl); 518 rsp->rd_dkg.dkg_write_reinstruct = 0; 519 rsp->rd_dkg.dkg_read_reinstruct = 0; 520 521 /* vtoc - see dkio(7I) */ 522 bzero(&rsp->rd_vtoc, sizeof (struct vtoc)); 523 rsp->rd_vtoc.v_sanity = VTOC_SANE; 524 rsp->rd_vtoc.v_version = V_VERSION; 525 bcopy(RD_DRIVER_NAME, rsp->rd_vtoc.v_volume, 7); 526 rsp->rd_vtoc.v_sectorsz = DEV_BSIZE; 527 rsp->rd_vtoc.v_nparts = 1; 528 rsp->rd_vtoc.v_part[0].p_tag = V_UNASSIGNED; 529 rsp->rd_vtoc.v_part[0].p_flag = V_UNMNT; 530 rsp->rd_vtoc.v_part[0].p_start = (daddr_t)0; 531 /* 532 * The partition size cannot just be the number of sectors, because 533 * that might not end on a cylinder boundary. And if that's the case, 534 * newfs/mkfs will print a scary warning. So just figure the size 535 * based on the number of cylinders and sectors/cylinder. 536 */ 537 rsp->rd_vtoc.v_part[0].p_size = rsp->rd_dkg.dkg_pcyl * 538 rsp->rd_dkg.dkg_nsect * rsp->rd_dkg.dkg_nhead; 539 540 /* dk_cinfo - see dkio(7I) */ 541 bzero(&rsp->rd_ci, sizeof (struct dk_cinfo)); 542 (void) strcpy(rsp->rd_ci.dki_cname, RD_DRIVER_NAME); 543 rsp->rd_ci.dki_ctype = DKC_MD; 544 rsp->rd_ci.dki_flags = 0; 545 rsp->rd_ci.dki_cnum = 0; 546 rsp->rd_ci.dki_addr = 0; 547 rsp->rd_ci.dki_space = 0; 548 rsp->rd_ci.dki_prio = 0; 549 rsp->rd_ci.dki_vec = 0; 550 (void) strcpy(rsp->rd_ci.dki_dname, RD_DRIVER_NAME); 551 rsp->rd_ci.dki_unit = 0; 552 rsp->rd_ci.dki_slave = 0; 553 rsp->rd_ci.dki_partition = 0; 554 /* 555 * newfs uses this to set maxcontig. Must not be < 16, or it 556 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 557 * it by the block size. Then tunefs doesn't work because 558 * maxcontig is 0. 559 */ 560 rsp->rd_ci.dki_maxtransfer = 16; 561 } 562 563 /* 564 * Deallocate resources (virtual and physical, device nodes, structures) 565 * from a ramdisk. 566 */ 567 static void 568 rd_dealloc_resources(rd_devstate_t *rsp) 569 { 570 dev_info_t *dip = rsp->rd_dip; 571 char namebuf[RD_NAME_LEN + 5]; 572 dev_t fulldev; 573 574 if (rsp->rd_window_virt != NULL) { 575 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 576 rd_unmap_window(rsp); 577 } 578 vmem_free(heap_arena, rsp->rd_window_virt, rsp->rd_window_size); 579 } 580 mutex_destroy(&rsp->rd_device_lock); 581 582 if (rsp->rd_existing) { 583 ddi_prop_free(rsp->rd_existing); 584 } 585 if (rsp->rd_ppa != NULL) { 586 rd_phys_free(rsp->rd_ppa, rsp->rd_npages); 587 } 588 589 /* 590 * Remove the block and raw device nodes. 591 */ 592 if (dip == rd_dip) { 593 (void) snprintf(namebuf, sizeof (namebuf), "%s", 594 rsp->rd_name); 595 ddi_remove_minor_node(dip, namebuf); 596 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 597 rsp->rd_name); 598 ddi_remove_minor_node(dip, namebuf); 599 } else { 600 ddi_remove_minor_node(dip, "a"); 601 ddi_remove_minor_node(dip, "a,raw"); 602 } 603 604 /* 605 * Remove the "Size" and "Nblocks" properties. 606 */ 607 fulldev = makedevice(ddi_driver_major(dip), rsp->rd_minor); 608 (void) ddi_prop_remove(fulldev, dip, SIZE_PROP_NAME); 609 (void) ddi_prop_remove(fulldev, dip, NBLOCKS_PROP_NAME); 610 611 if (rsp->rd_kstat) { 612 kstat_delete(rsp->rd_kstat); 613 mutex_destroy(&rsp->rd_kstat_lock); 614 } 615 616 ddi_soft_state_free(rd_statep, rsp->rd_minor); 617 } 618 619 /* 620 * Allocate resources (virtual and physical, device nodes, structures) 621 * to a ramdisk. 622 */ 623 static rd_devstate_t * 624 rd_alloc_resources(char *name, size_t size, dev_info_t *dip) 625 { 626 minor_t minor; 627 rd_devstate_t *rsp; 628 char namebuf[RD_NAME_LEN + 5]; 629 dev_t fulldev; 630 int64_t Nblocks_prop_val; 631 int64_t Size_prop_val; 632 633 minor = rd_find_free_minor(); 634 if (ddi_soft_state_zalloc(rd_statep, minor) == DDI_FAILURE) { 635 return (NULL); 636 } 637 rsp = ddi_get_soft_state(rd_statep, minor); 638 639 (void) strcpy(rsp->rd_name, name); 640 rsp->rd_dip = dip; 641 rsp->rd_minor = minor; 642 rsp->rd_size = size; 643 644 /* 645 * Allocate virtual window onto ramdisk. 646 */ 647 mutex_init(&rsp->rd_device_lock, NULL, MUTEX_DRIVER, NULL); 648 rsp->rd_window_base = RD_WINDOW_NOT_MAPPED; 649 rsp->rd_window_size = PAGESIZE; 650 rsp->rd_window_virt = vmem_alloc(heap_arena, 651 rsp->rd_window_size, VM_SLEEP); 652 if (rsp->rd_window_virt == NULL) { 653 goto create_failed; 654 } 655 656 /* 657 * Allocate physical memory for non-OBP ramdisks. 658 * Create pseudo block and raw device nodes. 659 */ 660 if (dip == rd_dip) { 661 rsp->rd_npages = btopr(size); 662 rsp->rd_ppa = rd_phys_alloc(rsp->rd_npages); 663 if (rsp->rd_ppa == NULL) { 664 goto create_failed; 665 } 666 667 /* 668 * For non-OBP ramdisks the device nodes are: 669 * 670 * /devices/pseudo/ramdisk@0:<diskname> 671 * /devices/pseudo/ramdisk@0:<diskname>,raw 672 */ 673 (void) snprintf(namebuf, sizeof (namebuf), "%s", 674 rsp->rd_name); 675 if (ddi_create_minor_node(dip, namebuf, S_IFBLK, minor, 676 DDI_PSEUDO, 0) == DDI_FAILURE) { 677 goto create_failed; 678 } 679 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 680 rsp->rd_name); 681 if (ddi_create_minor_node(dip, namebuf, S_IFCHR, minor, 682 DDI_PSEUDO, 0) == DDI_FAILURE) { 683 goto create_failed; 684 } 685 } else { 686 /* 687 * For OBP-created ramdisks the device nodes are: 688 * 689 * /devices/ramdisk-<diskname>:a 690 * /devices/ramdisk-<diskname>:a,raw 691 */ 692 if (ddi_create_minor_node(dip, "a", S_IFBLK, minor, 693 DDI_PSEUDO, 0) == DDI_FAILURE) { 694 goto create_failed; 695 } 696 if (ddi_create_minor_node(dip, "a,raw", S_IFCHR, minor, 697 DDI_PSEUDO, 0) == DDI_FAILURE) { 698 goto create_failed; 699 } 700 } 701 702 /* 703 * Create the "Size" and "Nblocks" properties. 704 */ 705 fulldev = makedevice(ddi_driver_major(dip), minor); 706 Size_prop_val = size; 707 if ((ddi_prop_update_int64(fulldev, dip, 708 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 709 goto create_failed; 710 } 711 Nblocks_prop_val = size / DEV_BSIZE; 712 if ((ddi_prop_update_int64(fulldev, dip, 713 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 714 goto create_failed; 715 } 716 717 /* 718 * Allocate kstat stuff. 719 */ 720 rsp->rd_kstat = kstat_create(RD_DRIVER_NAME, minor, NULL, 721 "disk", KSTAT_TYPE_IO, 1, 0); 722 if (rsp->rd_kstat) { 723 mutex_init(&rsp->rd_kstat_lock, NULL, 724 MUTEX_DRIVER, NULL); 725 rsp->rd_kstat->ks_lock = &rsp->rd_kstat_lock; 726 kstat_install(rsp->rd_kstat); 727 } 728 729 rd_fake_disk_geometry(rsp); 730 731 return (rsp); 732 733 create_failed: 734 /* 735 * Cleanup. 736 */ 737 rd_dealloc_resources(rsp); 738 739 return (NULL); 740 } 741 742 /* 743 * Undo what we did in rd_attach, freeing resources and removing things which 744 * we installed. The system framework guarantees we are not active with this 745 * devinfo node in any other entry points at this time. 746 */ 747 static int 748 rd_common_detach(dev_info_t *dip) 749 { 750 if (dip == rd_dip) { 751 /* 752 * Pseudo node: can't detach if any pseudo ramdisks exist. 753 */ 754 if (rd_is_busy()) { 755 return (DDI_FAILURE); 756 } 757 ddi_soft_state_free(rd_statep, RD_CTL_MINOR); 758 rd_dip = NULL; 759 } else { 760 /* 761 * A 'real' ramdisk; find the state and free resources. 762 */ 763 rd_devstate_t *rsp; 764 765 if ((rsp = rd_find_dip_state(dip)) != NULL) { 766 rd_dealloc_resources(rsp); 767 } 768 } 769 ddi_remove_minor_node(dip, NULL); 770 771 return (DDI_SUCCESS); 772 } 773 774 static int 775 rd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 776 { 777 char *name; 778 rd_existing_t *ep = NULL; 779 uint_t nep, i; 780 size_t size = 0; 781 rd_devstate_t *rsp; 782 783 switch (cmd) { 784 785 case DDI_ATTACH: 786 mutex_enter(&rd_lock); 787 788 /* 789 * For pseudo ramdisk devinfo set up state 0 and :ctl device; 790 * else it's an OBP-created ramdisk. 791 */ 792 if (is_pseudo_device(dip)) { 793 rd_dip = dip; 794 rd_init_tuneables(); 795 796 /* 797 * The zeroth minor is reserved for the ramdisk 798 * 'control' device. 799 */ 800 if (ddi_soft_state_zalloc(rd_statep, RD_CTL_MINOR) == 801 DDI_FAILURE) { 802 goto attach_failed; 803 } 804 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 805 rsp->rd_dip = dip; 806 807 if (ddi_create_minor_node(dip, RD_CTL_NODE, 808 S_IFCHR, 0, DDI_PSEUDO, NULL) == DDI_FAILURE) { 809 goto attach_failed; 810 } 811 } else { 812 RD_STRIP_PREFIX(name, ddi_node_name(dip)); 813 814 if (strlen(name) > RD_NAME_LEN) { 815 cmn_err(CE_CONT, 816 "%s: name too long - ignoring\n", name); 817 goto attach_failed; 818 } 819 820 /* 821 * An OBP-created ramdisk must have an 'existing' 822 * property; get and check it. 823 */ 824 if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, dip, 825 DDI_PROP_DONTPASS, RD_EXISTING_PROP_NAME, 826 (uchar_t **)&ep, &nep) != DDI_SUCCESS) { 827 cmn_err(CE_CONT, 828 "%s: " RD_EXISTING_PROP_NAME 829 " property missing\n", name); 830 goto attach_failed; 831 } 832 if (nep == 0 || (nep % sizeof (*ep)) != 0) { 833 cmn_err(CE_CONT, 834 "%s: " RD_EXISTING_PROP_NAME 835 " illegal size\n", name); 836 goto attach_failed; 837 } 838 nep /= sizeof (*ep); 839 840 /* 841 * Calculate the size of the ramdisk. 842 */ 843 for (i = 0; i < nep; ++i) { 844 size += ep[i].size; 845 } 846 847 /* 848 * Allocate driver resources for the ramdisk. 849 */ 850 if ((rsp = rd_alloc_resources(name, size, 851 dip)) == NULL) { 852 goto attach_failed; 853 } 854 855 rsp->rd_existing = ep; 856 rsp->rd_nexisting = nep; 857 } 858 859 mutex_exit(&rd_lock); 860 861 ddi_report_dev(dip); 862 863 return (DDI_SUCCESS); 864 865 case DDI_RESUME: 866 return (DDI_SUCCESS); 867 868 default: 869 return (DDI_FAILURE); 870 } 871 872 attach_failed: 873 /* 874 * Use our common detach routine to unallocate any stuff which 875 * was allocated above. 876 */ 877 (void) rd_common_detach(dip); 878 mutex_exit(&rd_lock); 879 880 if (ep != NULL) { 881 ddi_prop_free(ep); 882 } 883 return (DDI_FAILURE); 884 } 885 886 static int 887 rd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 888 { 889 int e; 890 891 switch (cmd) { 892 893 case DDI_DETACH: 894 mutex_enter(&rd_lock); 895 e = rd_common_detach(dip); 896 mutex_exit(&rd_lock); 897 898 return (e); 899 900 case DDI_SUSPEND: 901 return (DDI_SUCCESS); 902 903 default: 904 return (DDI_FAILURE); 905 } 906 } 907 908 /*ARGSUSED*/ 909 static int 910 rd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 911 { 912 rd_devstate_t *rsp; 913 914 switch (infocmd) { 915 case DDI_INFO_DEVT2DEVINFO: 916 if ((rsp = ddi_get_soft_state(rd_statep, 917 getminor((dev_t)arg))) != NULL) { 918 *result = rsp->rd_dip; 919 return (DDI_SUCCESS); 920 } 921 *result = NULL; 922 return (DDI_FAILURE); 923 924 case DDI_INFO_DEVT2INSTANCE: 925 if ((rsp = ddi_get_soft_state(rd_statep, 926 getminor((dev_t)arg))) != NULL) { 927 *result = (void *)(uintptr_t) 928 ddi_get_instance(rsp->rd_dip); 929 return (DDI_SUCCESS); 930 } 931 *result = NULL; 932 return (DDI_FAILURE); 933 934 default: 935 return (DDI_FAILURE); 936 } 937 } 938 939 /*ARGSUSED3*/ 940 static int 941 rd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 942 { 943 minor_t minor; 944 rd_devstate_t *rsp; 945 946 mutex_enter(&rd_lock); 947 948 minor = getminor(*devp); 949 if (minor == RD_CTL_MINOR) { 950 /* 951 * Master control device; must be opened exclusively. 952 */ 953 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) { 954 mutex_exit(&rd_lock); 955 return (EINVAL); 956 } 957 958 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 959 if (rsp == NULL) { 960 mutex_exit(&rd_lock); 961 return (ENXIO); 962 } 963 964 if (rd_is_open(rsp)) { 965 mutex_exit(&rd_lock); 966 return (EBUSY); 967 } 968 (void) rd_opened(rsp, OTYP_CHR); 969 970 mutex_exit(&rd_lock); 971 972 return (0); 973 } 974 975 rsp = ddi_get_soft_state(rd_statep, minor); 976 if (rsp == NULL) { 977 mutex_exit(&rd_lock); 978 return (ENXIO); 979 } 980 981 if (rd_opened(rsp, otyp) == -1) { 982 mutex_exit(&rd_lock); 983 return (EINVAL); 984 } 985 986 mutex_exit(&rd_lock); 987 return (0); 988 } 989 990 /*ARGSUSED*/ 991 static int 992 rd_close(dev_t dev, int flag, int otyp, struct cred *credp) 993 { 994 minor_t minor; 995 rd_devstate_t *rsp; 996 997 mutex_enter(&rd_lock); 998 999 minor = getminor(dev); 1000 1001 rsp = ddi_get_soft_state(rd_statep, minor); 1002 if (rsp == NULL) { 1003 mutex_exit(&rd_lock); 1004 return (EINVAL); 1005 } 1006 1007 rd_closed(rsp, otyp); 1008 1009 mutex_exit(&rd_lock); 1010 1011 return (0); 1012 } 1013 1014 static void 1015 rd_minphys(struct buf *bp) 1016 { 1017 if (bp->b_bcount > rd_maxphys) { 1018 bp->b_bcount = rd_maxphys; 1019 } 1020 } 1021 1022 static void 1023 rd_rw(rd_devstate_t *rsp, struct buf *bp, offset_t offset, size_t nbytes) 1024 { 1025 int reading = bp->b_flags & B_READ; 1026 caddr_t buf_addr; 1027 1028 bp_mapin(bp); 1029 buf_addr = bp->b_un.b_addr; 1030 1031 while (nbytes > 0) { 1032 offset_t off_in_window; 1033 size_t rem_in_window, copy_bytes; 1034 caddr_t raddr; 1035 1036 mutex_enter(&rsp->rd_device_lock); 1037 rd_map_window(rsp, offset); 1038 1039 off_in_window = offset - rsp->rd_window_base; 1040 rem_in_window = rsp->rd_window_size - off_in_window; 1041 1042 raddr = rsp->rd_window_virt + off_in_window; 1043 copy_bytes = MIN(nbytes, rem_in_window); 1044 1045 if (reading) { 1046 (void) bcopy(raddr, buf_addr, copy_bytes); 1047 } else { 1048 (void) bcopy(buf_addr, raddr, copy_bytes); 1049 } 1050 mutex_exit(&rsp->rd_device_lock); 1051 1052 offset += copy_bytes; 1053 buf_addr += copy_bytes; 1054 nbytes -= copy_bytes; 1055 } 1056 } 1057 1058 static int 1059 rd_strategy(struct buf *bp) 1060 { 1061 rd_devstate_t *rsp; 1062 offset_t offset; 1063 1064 rsp = ddi_get_soft_state(rd_statep, getminor(bp->b_edev)); 1065 offset = bp->b_blkno * DEV_BSIZE; 1066 1067 if (rsp == NULL) { 1068 bp->b_error = ENXIO; 1069 bp->b_flags |= B_ERROR; 1070 } else if (offset >= rsp->rd_size) { 1071 bp->b_error = EINVAL; 1072 bp->b_flags |= B_ERROR; 1073 } else { 1074 size_t nbytes; 1075 1076 if (rsp->rd_kstat) { 1077 mutex_enter(rsp->rd_kstat->ks_lock); 1078 kstat_runq_enter(KSTAT_IO_PTR(rsp->rd_kstat)); 1079 mutex_exit(rsp->rd_kstat->ks_lock); 1080 } 1081 1082 nbytes = min(bp->b_bcount, rsp->rd_size - offset); 1083 1084 rd_rw(rsp, bp, offset, nbytes); 1085 1086 bp->b_resid = bp->b_bcount - nbytes; 1087 1088 if (rsp->rd_kstat) { 1089 kstat_io_t *kioptr; 1090 1091 mutex_enter(rsp->rd_kstat->ks_lock); 1092 kioptr = KSTAT_IO_PTR(rsp->rd_kstat); 1093 if (bp->b_flags & B_READ) { 1094 kioptr->nread += nbytes; 1095 kioptr->reads++; 1096 } else { 1097 kioptr->nwritten += nbytes; 1098 kioptr->writes++; 1099 } 1100 kstat_runq_exit(kioptr); 1101 mutex_exit(rsp->rd_kstat->ks_lock); 1102 } 1103 } 1104 1105 biodone(bp); 1106 return (0); 1107 } 1108 1109 /*ARGSUSED*/ 1110 static int 1111 rd_read(dev_t dev, struct uio *uiop, cred_t *credp) 1112 { 1113 rd_devstate_t *rsp; 1114 1115 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1116 1117 if (uiop->uio_offset >= rsp->rd_size) 1118 return (EINVAL); 1119 1120 return (physio(rd_strategy, NULL, dev, B_READ, rd_minphys, uiop)); 1121 } 1122 1123 /*ARGSUSED*/ 1124 static int 1125 rd_write(dev_t dev, register struct uio *uiop, cred_t *credp) 1126 { 1127 rd_devstate_t *rsp; 1128 1129 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1130 1131 if (uiop->uio_offset >= rsp->rd_size) 1132 return (EINVAL); 1133 1134 return (physio(rd_strategy, NULL, dev, B_WRITE, rd_minphys, uiop)); 1135 } 1136 1137 /*ARGSUSED*/ 1138 static int 1139 rd_create_disk(dev_t dev, struct rd_ioctl *urip, int mode, int *rvalp) 1140 { 1141 struct rd_ioctl kri; 1142 size_t size; 1143 rd_devstate_t *rsp; 1144 1145 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1146 return (EFAULT); 1147 } 1148 1149 kri.ri_name[RD_NAME_LEN] = '\0'; 1150 1151 size = kri.ri_size; 1152 if (size == 0) { 1153 return (EINVAL); 1154 } 1155 size = ptob(btopr(size)); 1156 1157 mutex_enter(&rd_lock); 1158 1159 if (rd_find_named_disk(kri.ri_name) != NULL) { 1160 mutex_exit(&rd_lock); 1161 return (EEXIST); 1162 } 1163 1164 rsp = rd_alloc_resources(kri.ri_name, size, rd_dip); 1165 if (rsp == NULL) { 1166 mutex_exit(&rd_lock); 1167 return (EAGAIN); 1168 } 1169 1170 mutex_exit(&rd_lock); 1171 1172 return (ddi_copyout(&kri, urip, sizeof (kri), mode) == -1 ? EFAULT : 0); 1173 } 1174 1175 /*ARGSUSED*/ 1176 static int 1177 rd_delete_disk(dev_t dev, struct rd_ioctl *urip, int mode) 1178 { 1179 struct rd_ioctl kri; 1180 rd_devstate_t *rsp; 1181 1182 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1183 return (EFAULT); 1184 } 1185 1186 kri.ri_name[RD_NAME_LEN] = '\0'; 1187 1188 mutex_enter(&rd_lock); 1189 1190 rsp = rd_find_named_disk(kri.ri_name); 1191 if (rsp == NULL || rsp->rd_dip != rd_dip) { 1192 mutex_exit(&rd_lock); 1193 return (EINVAL); 1194 } 1195 if (rd_is_open(rsp)) { 1196 mutex_exit(&rd_lock); 1197 return (EBUSY); 1198 } 1199 1200 rd_dealloc_resources(rsp); 1201 1202 mutex_exit(&rd_lock); 1203 1204 return (0); 1205 } 1206 1207 /*ARGSUSED*/ 1208 static int 1209 rd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1210 { 1211 minor_t minor; 1212 int error; 1213 enum dkio_state dkstate; 1214 rd_devstate_t *rsp; 1215 1216 minor = getminor(dev); 1217 1218 /* 1219 * Ramdisk ioctls only apply to the master device. 1220 */ 1221 if (minor == RD_CTL_MINOR) { 1222 struct rd_ioctl *rip = (struct rd_ioctl *)arg; 1223 1224 /* 1225 * The query commands only need read-access - i.e., normal 1226 * users are allowed to do those on the controlling device 1227 * as long as they can open it read-only. 1228 */ 1229 switch (cmd) { 1230 case RD_CREATE_DISK: 1231 if ((mode & FWRITE) == 0) 1232 return (EPERM); 1233 return (rd_create_disk(dev, rip, mode, rvalp)); 1234 1235 case RD_DELETE_DISK: 1236 if ((mode & FWRITE) == 0) 1237 return (EPERM); 1238 return (rd_delete_disk(dev, rip, mode)); 1239 1240 default: 1241 return (EINVAL); 1242 } 1243 } 1244 1245 rsp = ddi_get_soft_state(rd_statep, minor); 1246 if (rsp == NULL) { 1247 return (ENXIO); 1248 } 1249 1250 /* 1251 * These are for faking out utilities like newfs. 1252 */ 1253 switch (cmd) { 1254 case VOLIOCINFO: 1255 /* pcfs does this to see if it needs to set PCFS_NOCHK */ 1256 /* 0 means it should set it */ 1257 return (0); 1258 case DKIOCGVTOC: 1259 switch (ddi_model_convert_from(mode & FMODELS)) { 1260 case DDI_MODEL_ILP32: { 1261 struct vtoc32 vtoc32; 1262 1263 vtoctovtoc32(rsp->rd_vtoc, vtoc32); 1264 if (ddi_copyout(&vtoc32, (void *)arg, 1265 sizeof (struct vtoc32), mode)) 1266 return (EFAULT); 1267 } 1268 break; 1269 1270 case DDI_MODEL_NONE: 1271 if (ddi_copyout(&rsp->rd_vtoc, (void *)arg, 1272 sizeof (struct vtoc), mode)) 1273 return (EFAULT); 1274 break; 1275 } 1276 return (0); 1277 case DKIOCINFO: 1278 error = ddi_copyout(&rsp->rd_ci, (void *)arg, 1279 sizeof (struct dk_cinfo), mode); 1280 if (error) 1281 return (EFAULT); 1282 return (0); 1283 case DKIOCG_VIRTGEOM: 1284 case DKIOCG_PHYGEOM: 1285 case DKIOCGGEOM: 1286 error = ddi_copyout(&rsp->rd_dkg, (void *)arg, 1287 sizeof (struct dk_geom), mode); 1288 if (error) 1289 return (EFAULT); 1290 return (0); 1291 case DKIOCSTATE: 1292 /* the file is always there */ 1293 dkstate = DKIO_INSERTED; 1294 error = ddi_copyout(&dkstate, (void *)arg, 1295 sizeof (enum dkio_state), mode); 1296 if (error) 1297 return (EFAULT); 1298 return (0); 1299 default: 1300 return (ENOTTY); 1301 } 1302 } 1303 1304 1305 static struct cb_ops rd_cb_ops = { 1306 rd_open, 1307 rd_close, 1308 rd_strategy, 1309 nodev, 1310 nodev, /* dump */ 1311 rd_read, 1312 rd_write, 1313 rd_ioctl, 1314 nodev, /* devmap */ 1315 nodev, /* mmap */ 1316 nodev, /* segmap */ 1317 nochpoll, /* poll */ 1318 ddi_prop_op, 1319 NULL, 1320 D_NEW | D_MP 1321 }; 1322 1323 static struct dev_ops rd_ops = { 1324 DEVO_REV, 1325 0, 1326 rd_getinfo, 1327 nulldev, /* identify */ 1328 nulldev, /* probe */ 1329 rd_attach, 1330 rd_detach, 1331 nodev, /* reset */ 1332 &rd_cb_ops, 1333 (struct bus_ops *)0 1334 }; 1335 1336 1337 extern struct mod_ops mod_driverops; 1338 1339 static struct modldrv modldrv = { 1340 &mod_driverops, 1341 "ramdisk driver v%I%", 1342 &rd_ops 1343 }; 1344 1345 static struct modlinkage modlinkage = { 1346 MODREV_1, 1347 &modldrv, 1348 0 1349 }; 1350 1351 int 1352 _init(void) 1353 { 1354 int e; 1355 1356 if ((e = ddi_soft_state_init(&rd_statep, 1357 sizeof (rd_devstate_t), 0)) != 0) { 1358 return (e); 1359 } 1360 1361 mutex_init(&rd_lock, NULL, MUTEX_DRIVER, NULL); 1362 1363 if ((e = mod_install(&modlinkage)) != 0) { 1364 mutex_destroy(&rd_lock); 1365 ddi_soft_state_fini(&rd_statep); 1366 } 1367 1368 return (e); 1369 } 1370 1371 int 1372 _fini(void) 1373 { 1374 int e; 1375 1376 if ((e = mod_remove(&modlinkage)) != 0) { 1377 return (e); 1378 } 1379 1380 ddi_soft_state_fini(&rd_statep); 1381 mutex_destroy(&rd_lock); 1382 1383 return (e); 1384 } 1385 1386 int 1387 _info(struct modinfo *modinfop) 1388 { 1389 return (mod_info(&modlinkage, modinfop)); 1390 } 1391