1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Ramdisk device driver. 30 * 31 * There are two types of ramdisk: 'real' OBP-created ramdisks, and 'pseudo' 32 * ramdisks created at runtime with no corresponding OBP device node. The 33 * ramdisk(7D) driver is capable of dealing with both, and with the creation 34 * and deletion of 'pseudo' ramdisks. 35 * 36 * Every ramdisk has a single 'state' structure which maintains data for 37 * that ramdisk, and is assigned a single minor number. The bottom 10-bits 38 * of the minor number index the state structures; the top 8-bits give a 39 * 'real OBP disk' number, i.e. they are zero for 'pseudo' ramdisks. Thus 40 * it is possible to distinguish 'real' from 'pseudo' ramdisks using the 41 * top 8-bits of the minor number. 42 * 43 * Each OBP-created ramdisk has its own node in the device tree with an 44 * "existing" property which describes the one-or-more physical address ranges 45 * assigned to the ramdisk. All 'pseudo' ramdisks share a common devinfo 46 * structure. 47 * 48 * A single character device node is used by ramdiskadm(1M) to communicate 49 * with the ramdisk driver, with minor number 0: 50 * 51 * /dev/ramdiskctl -> /devices/pseudo/ramdisk@0:ctl 52 * 53 * For consistent access, block and raw device nodes are created for *every* 54 * ramdisk. For 'pseudo' ramdisks: 55 * 56 * /dev/ramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname> 57 * /dev/rramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname>,raw 58 * 59 * For OBP-created ramdisks: 60 * 61 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a 62 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a,raw 63 * 64 * This allows the transition from the standalone to the kernel to proceed 65 * when booting from a ramdisk, and for the installation to correctly identify 66 * the root device. 67 */ 68 69 #include <sys/types.h> 70 #include <sys/param.h> 71 #include <sys/sysmacros.h> 72 #include <sys/errno.h> 73 #include <sys/uio.h> 74 #include <sys/buf.h> 75 #include <sys/modctl.h> 76 #include <sys/open.h> 77 #include <sys/kmem.h> 78 #include <sys/poll.h> 79 #include <sys/conf.h> 80 #include <sys/cmn_err.h> 81 #include <sys/stat.h> 82 #include <sys/file.h> 83 #include <sys/ddi.h> 84 #include <sys/sunddi.h> 85 #include <sys/ramdisk.h> 86 #include <vm/seg_kmem.h> 87 88 /* 89 * An opaque handle where information about our set of ramdisk devices lives. 90 */ 91 static void *rd_statep; 92 93 /* 94 * Pointer to devinfo for the 'pseudo' ramdisks. Real OBP-created ramdisks 95 * get their own individual devinfo. 96 */ 97 static dev_info_t *rd_dip = NULL; 98 99 /* 100 * Global state lock. 101 */ 102 static kmutex_t rd_lock; 103 104 /* 105 * Maximum number of ramdisks supported by this driver. 106 */ 107 static uint32_t rd_max_disks = RD_DFLT_DISKS; 108 109 /* 110 * Percentage of physical memory which can be assigned to pseudo ramdisks, 111 * what that equates to in pages, and how many pages are currently assigned. 112 */ 113 static uint_t rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 114 static pgcnt_t rd_max_physmem; 115 static pgcnt_t rd_tot_physmem; 116 117 static uint_t rd_maxphys = RD_DEFAULT_MAXPHYS; 118 119 /* 120 * Is the driver busy, i.e. are there any pseudo ramdisk devices in existence? 121 */ 122 static int 123 rd_is_busy(void) 124 { 125 minor_t minor; 126 rd_devstate_t *rsp; 127 128 ASSERT(mutex_owned(&rd_lock)); 129 for (minor = 1; minor <= rd_max_disks; ++minor) { 130 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 131 rsp->rd_dip == rd_dip) { 132 return (EBUSY); 133 } 134 } 135 return (0); 136 } 137 138 /* 139 * Find the first free minor number; returns zero if there isn't one. 140 */ 141 static minor_t 142 rd_find_free_minor(void) 143 { 144 minor_t minor; 145 146 ASSERT(mutex_owned(&rd_lock)); 147 for (minor = 1; minor <= rd_max_disks; ++minor) { 148 if (ddi_get_soft_state(rd_statep, minor) == NULL) { 149 return (minor); 150 } 151 } 152 return (0); 153 } 154 155 /* 156 * Locate the rd_devstate for the named ramdisk; returns NULL if not found. 157 * Each ramdisk is identified uniquely by name, i.e. an OBP-created ramdisk 158 * cannot have the same name as a pseudo ramdisk. 159 */ 160 static rd_devstate_t * 161 rd_find_named_disk(char *name) 162 { 163 minor_t minor; 164 rd_devstate_t *rsp; 165 166 ASSERT(mutex_owned(&rd_lock)); 167 for (minor = 1; minor <= rd_max_disks; ++minor) { 168 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 169 strcmp(rsp->rd_name, name) == 0) { 170 return (rsp); 171 } 172 } 173 return (NULL); 174 } 175 176 /* 177 * Locate the rd_devstate for the real OBP-created ramdisk whose devinfo 178 * is referenced by 'dip'; returns NULL if not found (shouldn't happen). 179 */ 180 static rd_devstate_t * 181 rd_find_dip_state(dev_info_t *dip) 182 { 183 minor_t minor; 184 rd_devstate_t *rsp; 185 186 ASSERT(mutex_owned(&rd_lock)); 187 for (minor = 1; minor <= rd_max_disks; ++minor) { 188 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL && 189 rsp->rd_dip == dip) { 190 return (rsp); 191 } 192 } 193 return (NULL); 194 } 195 196 /* 197 * Is the ramdisk open? 198 */ 199 static int 200 rd_is_open(rd_devstate_t *rsp) 201 { 202 ASSERT(mutex_owned(&rd_lock)); 203 return (rsp->rd_chr_open || rsp->rd_blk_open || rsp->rd_lyr_open_cnt); 204 } 205 206 /* 207 * Mark the ramdisk open. 208 */ 209 static int 210 rd_opened(rd_devstate_t *rsp, int otyp) 211 { 212 ASSERT(mutex_owned(&rd_lock)); 213 switch (otyp) { 214 case OTYP_CHR: 215 rsp->rd_chr_open = 1; 216 break; 217 case OTYP_BLK: 218 rsp->rd_blk_open = 1; 219 break; 220 case OTYP_LYR: 221 rsp->rd_lyr_open_cnt++; 222 break; 223 default: 224 return (-1); 225 } 226 return (0); 227 } 228 229 /* 230 * Mark the ramdisk closed. 231 */ 232 static void 233 rd_closed(rd_devstate_t *rsp, int otyp) 234 { 235 ASSERT(mutex_owned(&rd_lock)); 236 switch (otyp) { 237 case OTYP_CHR: 238 rsp->rd_chr_open = 0; 239 break; 240 case OTYP_BLK: 241 rsp->rd_blk_open = 0; 242 break; 243 case OTYP_LYR: 244 rsp->rd_lyr_open_cnt--; 245 break; 246 default: 247 break; 248 } 249 } 250 251 static void 252 rd_init_tuneables(void) 253 { 254 char *prop, *p; 255 256 /* 257 * Ensure sanity of 'rd_max_disks', which may be tuned in ramdisk.conf. 258 */ 259 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 260 "max_disks", &prop) == DDI_PROP_SUCCESS) { 261 p = prop; 262 rd_max_disks = (uint32_t)stoi(&p); 263 ddi_prop_free(prop); 264 } 265 if (rd_max_disks >= RD_MAX_DISKS) { 266 cmn_err(CE_WARN, "ramdisk: rd_max_disks (%u) too big;" 267 " using default (%u).", rd_max_disks, RD_MAX_DISKS - 1); 268 269 rd_max_disks = RD_MAX_DISKS - 1; 270 } 271 272 /* 273 * Ensure sanity of 'rd_percent_physmem', which may be tuned 274 * in ramdisk.conf. 275 */ 276 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0, 277 "percent_physmem", &prop) == DDI_PROP_SUCCESS) { 278 p = prop; 279 rd_percent_physmem = (uint_t)stoi(&p); 280 ddi_prop_free(prop); 281 } 282 if (rd_percent_physmem >= 100) { 283 cmn_err(CE_WARN, "ramdisk: rd_percent_physmem (%u) >= 100;" 284 " using default (%u%%).", rd_percent_physmem, 285 RD_DEFAULT_PERCENT_PHYSMEM); 286 287 rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM; 288 } 289 290 /* 291 * Since availrmem is in pages (and is a long), this won't overflow. 292 */ 293 rd_max_physmem = (availrmem * rd_percent_physmem) / 100; 294 } 295 296 /* 297 * Allocate enough physical pages to hold `size' bytes. Returns an 298 * array of page_t * pointers that can later be mapped in or out via 299 * rd_{un}map_window() but is otherwise opaque, or NULL on failure. 300 * 301 * This code stolen from the NCA driver. 302 */ 303 page_t ** 304 rd_phys_alloc(pgcnt_t npages) 305 { 306 page_t *pp, **ppa; 307 pgcnt_t i; 308 size_t ppalen = npages * sizeof (struct page_t *); 309 struct seg kseg; 310 char *addr; /* For the purposes of coloring */ 311 312 if (rd_tot_physmem + npages > rd_max_physmem) { 313 return (NULL); 314 } 315 ppa = kmem_zalloc(ppalen, KM_SLEEP); 316 (void) page_resv(npages, KM_SLEEP); 317 318 for (i = 0, addr = NULL; i < npages; ++i, addr += PAGESIZE) { 319 if (!page_create_wait(1, KM_SLEEP)) { 320 goto out; 321 } 322 323 kseg.s_as = &kas; 324 325 if ((pp = page_get_freelist(&kvp, 0, &kseg, addr, PAGESIZE, 326 KM_SLEEP, NULL)) == NULL) { 327 if ((pp = page_get_cachelist(&kvp, 0, &kseg, addr, 328 KM_SLEEP, NULL)) == NULL) { 329 goto out; 330 } 331 if (PP_ISAGED(pp) == 0) { 332 page_hashout(pp, NULL); 333 } 334 } 335 336 PP_CLRFREE(pp); 337 PP_CLRAGED(pp); 338 ppa[i] = pp; 339 page_downgrade(pp); 340 } 341 rd_tot_physmem += npages; 342 343 return (ppa); 344 out: 345 for (i = 0; ppa[i] != NULL && i < npages; ++i) { 346 page_free(ppa[i], 0); 347 } 348 349 page_create_putback(i); 350 kmem_free(ppa, ppalen); 351 352 page_unresv(npages); 353 354 return (NULL); 355 } 356 357 /* 358 * Free physical pages previously allocated via rd_phys_alloc(); note that 359 * this function may block as it has to wait until it can exclusively lock 360 * all the pages first. 361 */ 362 static void 363 rd_phys_free(page_t **ppa, pgcnt_t npages) 364 { 365 pgcnt_t i; 366 size_t ppalen = npages * sizeof (struct page_t *); 367 368 for (i = 0; i < npages; ++i) { 369 if (! page_tryupgrade(ppa[i])) { 370 page_unlock(ppa[i]); 371 while (! page_lock(ppa[i], SE_EXCL, NULL, P_RECLAIM)) 372 ; 373 } 374 page_free(ppa[i], 0); 375 } 376 377 kmem_free(ppa, ppalen); 378 379 page_unresv(npages); 380 rd_tot_physmem -= npages; 381 } 382 383 /* 384 * Remove a window mapping (if present). 385 */ 386 static void 387 rd_unmap_window(rd_devstate_t *rsp) 388 { 389 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 390 hat_unload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 391 HAT_UNLOAD_UNLOCK); 392 } 393 } 394 395 /* 396 * Map a portion of the ramdisk into the virtual window. 397 */ 398 static void 399 rd_map_window(rd_devstate_t *rsp, off_t offset) 400 { 401 pgcnt_t offpgs = btop(offset); 402 403 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 404 /* 405 * Already mapped; is offset within our window? 406 */ 407 if (offset >= rsp->rd_window_base && 408 offset < rsp->rd_window_base + rsp->rd_window_size) { 409 return; 410 } 411 412 /* 413 * No, we need to re-map; toss the old mapping. 414 */ 415 rd_unmap_window(rsp); 416 } 417 rsp->rd_window_base = ptob(offpgs); 418 419 /* 420 * Different algorithms depending on whether this is a real 421 * OBP-created ramdisk, or a pseudo ramdisk. 422 */ 423 if (rsp->rd_dip == rd_dip) { 424 pgcnt_t pi, lastpi; 425 caddr_t vaddr; 426 427 /* 428 * Find the range of pages which should be mapped. 429 */ 430 pi = offpgs; 431 lastpi = pi + btopr(rsp->rd_window_size); 432 if (lastpi > rsp->rd_npages) { 433 lastpi = rsp->rd_npages; 434 } 435 436 /* 437 * Load the mapping. 438 */ 439 vaddr = rsp->rd_window_virt; 440 for (; pi < lastpi; ++pi) { 441 hat_memload(kas.a_hat, vaddr, rsp->rd_ppa[pi], 442 (PROT_READ | PROT_WRITE) | HAT_NOSYNC, 443 HAT_LOAD_LOCK); 444 vaddr += ptob(1); 445 } 446 } else { 447 uint_t i; 448 pfn_t pfn; 449 450 /* 451 * Real OBP-created ramdisk: locate the physical range which 452 * contains this offset. 453 */ 454 for (i = 0; i < rsp->rd_nexisting; ++i) { 455 if (offset < rsp->rd_existing[i].size) { 456 break; 457 } 458 offset -= rsp->rd_existing[i].size; 459 } 460 ASSERT(i < rsp->rd_nexisting); 461 462 /* 463 * Load the mapping. 464 */ 465 pfn = btop(rsp->rd_existing[i].phys + offset); 466 hat_devload(kas.a_hat, rsp->rd_window_virt, rsp->rd_window_size, 467 pfn, (PROT_READ | PROT_WRITE), 468 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 469 } 470 } 471 472 /* 473 * Fakes up a disk geometry, and one big partition, based on the size 474 * of the file. This is needed because we allow newfs'ing the device, 475 * and newfs will do several disk ioctls to figure out the geometry and 476 * partition information. It uses that information to determine the parameters 477 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we 478 * have to support it. 479 * 480 * Stolen from lofi.c - should maybe split out common code sometime. 481 */ 482 static void 483 rd_fake_disk_geometry(rd_devstate_t *rsp) 484 { 485 /* dk_geom - see dkio(7I) */ 486 /* 487 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 488 * of sectors), but that breaks programs like fdisk which want to 489 * partition a disk by cylinder. With one cylinder, you can't create 490 * an fdisk partition and put pcfs on it for testing (hard to pick 491 * a number between one and one). 492 * 493 * The cheezy floppy test is an attempt to not have too few cylinders 494 * for a small file, or so many on a big file that you waste space 495 * for backup superblocks or cylinder group structures. 496 */ 497 if (rsp->rd_size < (2 * 1024 * 1024)) /* floppy? */ 498 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (100 * 1024); 499 else 500 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (300 * 1024); 501 /* in case file file is < 100k */ 502 if (rsp->rd_dkg.dkg_ncyl == 0) 503 rsp->rd_dkg.dkg_ncyl = 1; 504 rsp->rd_dkg.dkg_acyl = 0; 505 rsp->rd_dkg.dkg_bcyl = 0; 506 rsp->rd_dkg.dkg_nhead = 1; 507 rsp->rd_dkg.dkg_obs1 = 0; 508 rsp->rd_dkg.dkg_intrlv = 0; 509 rsp->rd_dkg.dkg_obs2 = 0; 510 rsp->rd_dkg.dkg_obs3 = 0; 511 rsp->rd_dkg.dkg_apc = 0; 512 rsp->rd_dkg.dkg_rpm = 7200; 513 rsp->rd_dkg.dkg_pcyl = rsp->rd_dkg.dkg_ncyl + rsp->rd_dkg.dkg_acyl; 514 rsp->rd_dkg.dkg_nsect = rsp->rd_size / 515 (DEV_BSIZE * rsp->rd_dkg.dkg_ncyl); 516 rsp->rd_dkg.dkg_write_reinstruct = 0; 517 rsp->rd_dkg.dkg_read_reinstruct = 0; 518 519 /* vtoc - see dkio(7I) */ 520 bzero(&rsp->rd_vtoc, sizeof (struct vtoc)); 521 rsp->rd_vtoc.v_sanity = VTOC_SANE; 522 rsp->rd_vtoc.v_version = V_VERSION; 523 bcopy(RD_DRIVER_NAME, rsp->rd_vtoc.v_volume, 7); 524 rsp->rd_vtoc.v_sectorsz = DEV_BSIZE; 525 rsp->rd_vtoc.v_nparts = 1; 526 rsp->rd_vtoc.v_part[0].p_tag = V_UNASSIGNED; 527 rsp->rd_vtoc.v_part[0].p_flag = V_UNMNT; 528 rsp->rd_vtoc.v_part[0].p_start = (daddr_t)0; 529 /* 530 * The partition size cannot just be the number of sectors, because 531 * that might not end on a cylinder boundary. And if that's the case, 532 * newfs/mkfs will print a scary warning. So just figure the size 533 * based on the number of cylinders and sectors/cylinder. 534 */ 535 rsp->rd_vtoc.v_part[0].p_size = rsp->rd_dkg.dkg_pcyl * 536 rsp->rd_dkg.dkg_nsect * rsp->rd_dkg.dkg_nhead; 537 538 /* dk_cinfo - see dkio(7I) */ 539 bzero(&rsp->rd_ci, sizeof (struct dk_cinfo)); 540 (void) strcpy(rsp->rd_ci.dki_cname, RD_DRIVER_NAME); 541 rsp->rd_ci.dki_ctype = DKC_MD; 542 rsp->rd_ci.dki_flags = 0; 543 rsp->rd_ci.dki_cnum = 0; 544 rsp->rd_ci.dki_addr = 0; 545 rsp->rd_ci.dki_space = 0; 546 rsp->rd_ci.dki_prio = 0; 547 rsp->rd_ci.dki_vec = 0; 548 (void) strcpy(rsp->rd_ci.dki_dname, RD_DRIVER_NAME); 549 rsp->rd_ci.dki_unit = 0; 550 rsp->rd_ci.dki_slave = 0; 551 rsp->rd_ci.dki_partition = 0; 552 /* 553 * newfs uses this to set maxcontig. Must not be < 16, or it 554 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 555 * it by the block size. Then tunefs doesn't work because 556 * maxcontig is 0. 557 */ 558 rsp->rd_ci.dki_maxtransfer = 16; 559 } 560 561 /* 562 * Deallocate resources (virtual and physical, device nodes, structures) 563 * from a ramdisk. 564 */ 565 static void 566 rd_dealloc_resources(rd_devstate_t *rsp) 567 { 568 dev_info_t *dip = rsp->rd_dip; 569 char namebuf[RD_NAME_LEN + 5]; 570 dev_t fulldev; 571 572 if (rsp->rd_window_virt != NULL) { 573 if (rsp->rd_window_base != RD_WINDOW_NOT_MAPPED) { 574 rd_unmap_window(rsp); 575 } 576 vmem_free(heap_arena, rsp->rd_window_virt, rsp->rd_window_size); 577 } 578 mutex_destroy(&rsp->rd_device_lock); 579 580 if (rsp->rd_existing) { 581 ddi_prop_free(rsp->rd_existing); 582 } 583 if (rsp->rd_ppa != NULL) { 584 rd_phys_free(rsp->rd_ppa, rsp->rd_npages); 585 } 586 587 /* 588 * Remove the block and raw device nodes. 589 */ 590 if (dip == rd_dip) { 591 (void) snprintf(namebuf, sizeof (namebuf), "%s", 592 rsp->rd_name); 593 ddi_remove_minor_node(dip, namebuf); 594 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 595 rsp->rd_name); 596 ddi_remove_minor_node(dip, namebuf); 597 } else { 598 ddi_remove_minor_node(dip, "a"); 599 ddi_remove_minor_node(dip, "a,raw"); 600 } 601 602 /* 603 * Remove the "Size" and "Nblocks" properties. 604 */ 605 fulldev = makedevice(ddi_driver_major(dip), rsp->rd_minor); 606 (void) ddi_prop_remove(fulldev, dip, SIZE_PROP_NAME); 607 (void) ddi_prop_remove(fulldev, dip, NBLOCKS_PROP_NAME); 608 609 if (rsp->rd_kstat) { 610 kstat_delete(rsp->rd_kstat); 611 mutex_destroy(&rsp->rd_kstat_lock); 612 } 613 614 ddi_soft_state_free(rd_statep, rsp->rd_minor); 615 } 616 617 /* 618 * Allocate resources (virtual and physical, device nodes, structures) 619 * to a ramdisk. 620 */ 621 static rd_devstate_t * 622 rd_alloc_resources(char *name, size_t size, dev_info_t *dip) 623 { 624 minor_t minor; 625 rd_devstate_t *rsp; 626 char namebuf[RD_NAME_LEN + 5]; 627 dev_t fulldev; 628 int64_t Nblocks_prop_val; 629 int64_t Size_prop_val; 630 631 minor = rd_find_free_minor(); 632 if (ddi_soft_state_zalloc(rd_statep, minor) == DDI_FAILURE) { 633 return (NULL); 634 } 635 rsp = ddi_get_soft_state(rd_statep, minor); 636 637 (void) strcpy(rsp->rd_name, name); 638 rsp->rd_dip = dip; 639 rsp->rd_minor = minor; 640 rsp->rd_size = size; 641 642 /* 643 * Allocate virtual window onto ramdisk. 644 */ 645 mutex_init(&rsp->rd_device_lock, NULL, MUTEX_DRIVER, NULL); 646 rsp->rd_window_base = RD_WINDOW_NOT_MAPPED; 647 rsp->rd_window_size = PAGESIZE; 648 rsp->rd_window_virt = vmem_alloc(heap_arena, 649 rsp->rd_window_size, VM_SLEEP); 650 if (rsp->rd_window_virt == NULL) { 651 goto create_failed; 652 } 653 654 /* 655 * Allocate physical memory for non-OBP ramdisks. 656 * Create pseudo block and raw device nodes. 657 */ 658 if (dip == rd_dip) { 659 rsp->rd_npages = btopr(size); 660 rsp->rd_ppa = rd_phys_alloc(rsp->rd_npages); 661 if (rsp->rd_ppa == NULL) { 662 goto create_failed; 663 } 664 665 /* 666 * For non-OBP ramdisks the device nodes are: 667 * 668 * /devices/pseudo/ramdisk@0:<diskname> 669 * /devices/pseudo/ramdisk@0:<diskname>,raw 670 */ 671 (void) snprintf(namebuf, sizeof (namebuf), "%s", 672 rsp->rd_name); 673 if (ddi_create_minor_node(dip, namebuf, S_IFBLK, minor, 674 DDI_PSEUDO, 0) == DDI_FAILURE) { 675 goto create_failed; 676 } 677 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", 678 rsp->rd_name); 679 if (ddi_create_minor_node(dip, namebuf, S_IFCHR, minor, 680 DDI_PSEUDO, 0) == DDI_FAILURE) { 681 goto create_failed; 682 } 683 } else { 684 /* 685 * For OBP-created ramdisks the device nodes are: 686 * 687 * /devices/ramdisk-<diskname>:a 688 * /devices/ramdisk-<diskname>:a,raw 689 */ 690 if (ddi_create_minor_node(dip, "a", S_IFBLK, minor, 691 DDI_PSEUDO, 0) == DDI_FAILURE) { 692 goto create_failed; 693 } 694 if (ddi_create_minor_node(dip, "a,raw", S_IFCHR, minor, 695 DDI_PSEUDO, 0) == DDI_FAILURE) { 696 goto create_failed; 697 } 698 } 699 700 /* 701 * Create the "Size" and "Nblocks" properties. 702 */ 703 fulldev = makedevice(ddi_driver_major(dip), minor); 704 Size_prop_val = size; 705 if ((ddi_prop_update_int64(fulldev, dip, 706 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { 707 goto create_failed; 708 } 709 Nblocks_prop_val = size / DEV_BSIZE; 710 if ((ddi_prop_update_int64(fulldev, dip, 711 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { 712 goto create_failed; 713 } 714 715 /* 716 * Allocate kstat stuff. 717 */ 718 rsp->rd_kstat = kstat_create(RD_DRIVER_NAME, minor, NULL, 719 "disk", KSTAT_TYPE_IO, 1, 0); 720 if (rsp->rd_kstat) { 721 mutex_init(&rsp->rd_kstat_lock, NULL, 722 MUTEX_DRIVER, NULL); 723 rsp->rd_kstat->ks_lock = &rsp->rd_kstat_lock; 724 kstat_install(rsp->rd_kstat); 725 } 726 727 rd_fake_disk_geometry(rsp); 728 729 return (rsp); 730 731 create_failed: 732 /* 733 * Cleanup. 734 */ 735 rd_dealloc_resources(rsp); 736 737 return (NULL); 738 } 739 740 /* 741 * Undo what we did in rd_attach, freeing resources and removing things which 742 * we installed. The system framework guarantees we are not active with this 743 * devinfo node in any other entry points at this time. 744 */ 745 static int 746 rd_common_detach(dev_info_t *dip) 747 { 748 if (dip == rd_dip) { 749 /* 750 * Pseudo node: can't detach if any pseudo ramdisks exist. 751 */ 752 if (rd_is_busy()) { 753 return (DDI_FAILURE); 754 } 755 ddi_soft_state_free(rd_statep, RD_CTL_MINOR); 756 rd_dip = NULL; 757 } else { 758 /* 759 * A 'real' ramdisk; find the state and free resources. 760 */ 761 rd_devstate_t *rsp; 762 763 if ((rsp = rd_find_dip_state(dip)) != NULL) { 764 rd_dealloc_resources(rsp); 765 } 766 } 767 ddi_remove_minor_node(dip, NULL); 768 769 return (DDI_SUCCESS); 770 } 771 772 static int 773 rd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 774 { 775 char *name; 776 rd_existing_t *ep = NULL; 777 uint_t nep, i; 778 size_t size = 0; 779 rd_devstate_t *rsp; 780 781 switch (cmd) { 782 783 case DDI_ATTACH: 784 mutex_enter(&rd_lock); 785 786 /* 787 * For pseudo ramdisk devinfo set up state 0 and :ctl device; 788 * else it's an OBP-created ramdisk. 789 */ 790 if (is_pseudo_device(dip)) { 791 rd_dip = dip; 792 rd_init_tuneables(); 793 794 /* 795 * The zeroth minor is reserved for the ramdisk 796 * 'control' device. 797 */ 798 if (ddi_soft_state_zalloc(rd_statep, RD_CTL_MINOR) == 799 DDI_FAILURE) { 800 goto attach_failed; 801 } 802 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 803 rsp->rd_dip = dip; 804 805 if (ddi_create_minor_node(dip, RD_CTL_NODE, 806 S_IFCHR, 0, DDI_PSEUDO, NULL) == DDI_FAILURE) { 807 goto attach_failed; 808 } 809 } else { 810 RD_STRIP_PREFIX(name, ddi_node_name(dip)); 811 812 if (strlen(name) > RD_NAME_LEN) { 813 cmn_err(CE_CONT, 814 "%s: name too long - ignoring\n", name); 815 goto attach_failed; 816 } 817 818 /* 819 * An OBP-created ramdisk must have an 'existing' 820 * property; get and check it. 821 */ 822 if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, dip, 823 DDI_PROP_DONTPASS, RD_EXISTING_PROP_NAME, 824 (uchar_t **)&ep, &nep) != DDI_SUCCESS) { 825 cmn_err(CE_CONT, 826 "%s: " RD_EXISTING_PROP_NAME 827 " property missing\n", name); 828 goto attach_failed; 829 } 830 if (nep == 0 || (nep % sizeof (*ep)) != 0) { 831 cmn_err(CE_CONT, 832 "%s: " RD_EXISTING_PROP_NAME 833 " illegal size\n", name); 834 goto attach_failed; 835 } 836 nep /= sizeof (*ep); 837 838 /* 839 * Calculate the size of the ramdisk. 840 */ 841 for (i = 0; i < nep; ++i) { 842 size += ep[i].size; 843 } 844 845 /* 846 * Allocate driver resources for the ramdisk. 847 */ 848 if ((rsp = rd_alloc_resources(name, size, 849 dip)) == NULL) { 850 goto attach_failed; 851 } 852 853 rsp->rd_existing = ep; 854 rsp->rd_nexisting = nep; 855 } 856 857 mutex_exit(&rd_lock); 858 859 ddi_report_dev(dip); 860 861 return (DDI_SUCCESS); 862 863 case DDI_RESUME: 864 return (DDI_SUCCESS); 865 866 default: 867 return (DDI_FAILURE); 868 } 869 870 attach_failed: 871 /* 872 * Use our common detach routine to unallocate any stuff which 873 * was allocated above. 874 */ 875 (void) rd_common_detach(dip); 876 mutex_exit(&rd_lock); 877 878 if (ep != NULL) { 879 ddi_prop_free(ep); 880 } 881 return (DDI_FAILURE); 882 } 883 884 static int 885 rd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 886 { 887 int e; 888 889 switch (cmd) { 890 891 case DDI_DETACH: 892 mutex_enter(&rd_lock); 893 e = rd_common_detach(dip); 894 mutex_exit(&rd_lock); 895 896 return (e); 897 898 case DDI_SUSPEND: 899 return (DDI_SUCCESS); 900 901 default: 902 return (DDI_FAILURE); 903 } 904 } 905 906 /*ARGSUSED*/ 907 static int 908 rd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 909 { 910 rd_devstate_t *rsp; 911 912 switch (infocmd) { 913 case DDI_INFO_DEVT2DEVINFO: 914 if ((rsp = ddi_get_soft_state(rd_statep, 915 getminor((dev_t)arg))) != NULL) { 916 *result = rsp->rd_dip; 917 return (DDI_SUCCESS); 918 } 919 *result = NULL; 920 return (DDI_FAILURE); 921 922 case DDI_INFO_DEVT2INSTANCE: 923 if ((rsp = ddi_get_soft_state(rd_statep, 924 getminor((dev_t)arg))) != NULL) { 925 *result = (void *)(uintptr_t) 926 ddi_get_instance(rsp->rd_dip); 927 return (DDI_SUCCESS); 928 } 929 *result = NULL; 930 return (DDI_FAILURE); 931 932 default: 933 return (DDI_FAILURE); 934 } 935 } 936 937 /*ARGSUSED3*/ 938 static int 939 rd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 940 { 941 minor_t minor; 942 rd_devstate_t *rsp; 943 944 mutex_enter(&rd_lock); 945 946 minor = getminor(*devp); 947 if (minor == RD_CTL_MINOR) { 948 /* 949 * Master control device; must be opened exclusively. 950 */ 951 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) { 952 mutex_exit(&rd_lock); 953 return (EINVAL); 954 } 955 956 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR); 957 if (rsp == NULL) { 958 mutex_exit(&rd_lock); 959 return (ENXIO); 960 } 961 962 if (rd_is_open(rsp)) { 963 mutex_exit(&rd_lock); 964 return (EBUSY); 965 } 966 (void) rd_opened(rsp, OTYP_CHR); 967 968 mutex_exit(&rd_lock); 969 970 return (0); 971 } 972 973 rsp = ddi_get_soft_state(rd_statep, minor); 974 if (rsp == NULL) { 975 mutex_exit(&rd_lock); 976 return (ENXIO); 977 } 978 979 if (rd_opened(rsp, otyp) == -1) { 980 mutex_exit(&rd_lock); 981 return (EINVAL); 982 } 983 984 mutex_exit(&rd_lock); 985 return (0); 986 } 987 988 /*ARGSUSED*/ 989 static int 990 rd_close(dev_t dev, int flag, int otyp, struct cred *credp) 991 { 992 minor_t minor; 993 rd_devstate_t *rsp; 994 995 mutex_enter(&rd_lock); 996 997 minor = getminor(dev); 998 999 rsp = ddi_get_soft_state(rd_statep, minor); 1000 if (rsp == NULL) { 1001 mutex_exit(&rd_lock); 1002 return (EINVAL); 1003 } 1004 1005 rd_closed(rsp, otyp); 1006 1007 mutex_exit(&rd_lock); 1008 1009 return (0); 1010 } 1011 1012 static void 1013 rd_minphys(struct buf *bp) 1014 { 1015 if (bp->b_bcount > rd_maxphys) { 1016 bp->b_bcount = rd_maxphys; 1017 } 1018 } 1019 1020 static void 1021 rd_rw(rd_devstate_t *rsp, struct buf *bp, offset_t offset, size_t nbytes) 1022 { 1023 int reading = bp->b_flags & B_READ; 1024 caddr_t buf_addr; 1025 1026 bp_mapin(bp); 1027 buf_addr = bp->b_un.b_addr; 1028 1029 while (nbytes > 0) { 1030 offset_t off_in_window; 1031 size_t rem_in_window, copy_bytes; 1032 caddr_t raddr; 1033 1034 mutex_enter(&rsp->rd_device_lock); 1035 rd_map_window(rsp, offset); 1036 1037 off_in_window = offset - rsp->rd_window_base; 1038 rem_in_window = rsp->rd_window_size - off_in_window; 1039 1040 raddr = rsp->rd_window_virt + off_in_window; 1041 copy_bytes = MIN(nbytes, rem_in_window); 1042 1043 if (reading) { 1044 (void) bcopy(raddr, buf_addr, copy_bytes); 1045 } else { 1046 (void) bcopy(buf_addr, raddr, copy_bytes); 1047 } 1048 mutex_exit(&rsp->rd_device_lock); 1049 1050 offset += copy_bytes; 1051 buf_addr += copy_bytes; 1052 nbytes -= copy_bytes; 1053 } 1054 } 1055 1056 static int 1057 rd_strategy(struct buf *bp) 1058 { 1059 rd_devstate_t *rsp; 1060 offset_t offset; 1061 1062 rsp = ddi_get_soft_state(rd_statep, getminor(bp->b_edev)); 1063 offset = bp->b_blkno * DEV_BSIZE; 1064 1065 if (rsp == NULL) { 1066 bp->b_error = ENXIO; 1067 bp->b_flags |= B_ERROR; 1068 } else if (offset >= rsp->rd_size) { 1069 bp->b_error = EINVAL; 1070 bp->b_flags |= B_ERROR; 1071 } else { 1072 size_t nbytes; 1073 1074 if (rsp->rd_kstat) { 1075 mutex_enter(rsp->rd_kstat->ks_lock); 1076 kstat_runq_enter(KSTAT_IO_PTR(rsp->rd_kstat)); 1077 mutex_exit(rsp->rd_kstat->ks_lock); 1078 } 1079 1080 nbytes = min(bp->b_bcount, rsp->rd_size - offset); 1081 1082 rd_rw(rsp, bp, offset, nbytes); 1083 1084 bp->b_resid = bp->b_bcount - nbytes; 1085 1086 if (rsp->rd_kstat) { 1087 kstat_io_t *kioptr; 1088 1089 mutex_enter(rsp->rd_kstat->ks_lock); 1090 kioptr = KSTAT_IO_PTR(rsp->rd_kstat); 1091 if (bp->b_flags & B_READ) { 1092 kioptr->nread += nbytes; 1093 kioptr->reads++; 1094 } else { 1095 kioptr->nwritten += nbytes; 1096 kioptr->writes++; 1097 } 1098 kstat_runq_exit(kioptr); 1099 mutex_exit(rsp->rd_kstat->ks_lock); 1100 } 1101 } 1102 1103 biodone(bp); 1104 return (0); 1105 } 1106 1107 /*ARGSUSED*/ 1108 static int 1109 rd_read(dev_t dev, struct uio *uiop, cred_t *credp) 1110 { 1111 rd_devstate_t *rsp; 1112 1113 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1114 1115 if (uiop->uio_offset >= rsp->rd_size) 1116 return (EINVAL); 1117 1118 return (physio(rd_strategy, NULL, dev, B_READ, rd_minphys, uiop)); 1119 } 1120 1121 /*ARGSUSED*/ 1122 static int 1123 rd_write(dev_t dev, register struct uio *uiop, cred_t *credp) 1124 { 1125 rd_devstate_t *rsp; 1126 1127 rsp = ddi_get_soft_state(rd_statep, getminor(dev)); 1128 1129 if (uiop->uio_offset >= rsp->rd_size) 1130 return (EINVAL); 1131 1132 return (physio(rd_strategy, NULL, dev, B_WRITE, rd_minphys, uiop)); 1133 } 1134 1135 /*ARGSUSED*/ 1136 static int 1137 rd_create_disk(dev_t dev, struct rd_ioctl *urip, int mode, int *rvalp) 1138 { 1139 struct rd_ioctl kri; 1140 size_t size; 1141 rd_devstate_t *rsp; 1142 1143 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1144 return (EFAULT); 1145 } 1146 1147 kri.ri_name[RD_NAME_LEN] = '\0'; 1148 1149 size = kri.ri_size; 1150 if (size == 0) { 1151 return (EINVAL); 1152 } 1153 size = ptob(btopr(size)); 1154 1155 mutex_enter(&rd_lock); 1156 1157 if (rd_find_named_disk(kri.ri_name) != NULL) { 1158 mutex_exit(&rd_lock); 1159 return (EEXIST); 1160 } 1161 1162 rsp = rd_alloc_resources(kri.ri_name, size, rd_dip); 1163 if (rsp == NULL) { 1164 mutex_exit(&rd_lock); 1165 return (EAGAIN); 1166 } 1167 1168 mutex_exit(&rd_lock); 1169 1170 return (ddi_copyout(&kri, urip, sizeof (kri), mode) == -1 ? EFAULT : 0); 1171 } 1172 1173 /*ARGSUSED*/ 1174 static int 1175 rd_delete_disk(dev_t dev, struct rd_ioctl *urip, int mode) 1176 { 1177 struct rd_ioctl kri; 1178 rd_devstate_t *rsp; 1179 1180 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) { 1181 return (EFAULT); 1182 } 1183 1184 kri.ri_name[RD_NAME_LEN] = '\0'; 1185 1186 mutex_enter(&rd_lock); 1187 1188 rsp = rd_find_named_disk(kri.ri_name); 1189 if (rsp == NULL || rsp->rd_dip != rd_dip) { 1190 mutex_exit(&rd_lock); 1191 return (EINVAL); 1192 } 1193 if (rd_is_open(rsp)) { 1194 mutex_exit(&rd_lock); 1195 return (EBUSY); 1196 } 1197 1198 rd_dealloc_resources(rsp); 1199 1200 mutex_exit(&rd_lock); 1201 1202 return (0); 1203 } 1204 1205 /*ARGSUSED*/ 1206 static int 1207 rd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1208 { 1209 minor_t minor; 1210 int error; 1211 enum dkio_state dkstate; 1212 rd_devstate_t *rsp; 1213 1214 minor = getminor(dev); 1215 1216 /* 1217 * Ramdisk ioctls only apply to the master device. 1218 */ 1219 if (minor == RD_CTL_MINOR) { 1220 struct rd_ioctl *rip = (struct rd_ioctl *)arg; 1221 1222 /* 1223 * The query commands only need read-access - i.e., normal 1224 * users are allowed to do those on the controlling device 1225 * as long as they can open it read-only. 1226 */ 1227 switch (cmd) { 1228 case RD_CREATE_DISK: 1229 if ((mode & FWRITE) == 0) 1230 return (EPERM); 1231 return (rd_create_disk(dev, rip, mode, rvalp)); 1232 1233 case RD_DELETE_DISK: 1234 if ((mode & FWRITE) == 0) 1235 return (EPERM); 1236 return (rd_delete_disk(dev, rip, mode)); 1237 1238 default: 1239 return (EINVAL); 1240 } 1241 } 1242 1243 rsp = ddi_get_soft_state(rd_statep, minor); 1244 if (rsp == NULL) { 1245 return (ENXIO); 1246 } 1247 1248 /* 1249 * These are for faking out utilities like newfs. 1250 */ 1251 switch (cmd) { 1252 case DKIOCGVTOC: 1253 switch (ddi_model_convert_from(mode & FMODELS)) { 1254 case DDI_MODEL_ILP32: { 1255 struct vtoc32 vtoc32; 1256 1257 vtoctovtoc32(rsp->rd_vtoc, vtoc32); 1258 if (ddi_copyout(&vtoc32, (void *)arg, 1259 sizeof (struct vtoc32), mode)) 1260 return (EFAULT); 1261 } 1262 break; 1263 1264 case DDI_MODEL_NONE: 1265 if (ddi_copyout(&rsp->rd_vtoc, (void *)arg, 1266 sizeof (struct vtoc), mode)) 1267 return (EFAULT); 1268 break; 1269 } 1270 return (0); 1271 case DKIOCINFO: 1272 error = ddi_copyout(&rsp->rd_ci, (void *)arg, 1273 sizeof (struct dk_cinfo), mode); 1274 if (error) 1275 return (EFAULT); 1276 return (0); 1277 case DKIOCG_VIRTGEOM: 1278 case DKIOCG_PHYGEOM: 1279 case DKIOCGGEOM: 1280 error = ddi_copyout(&rsp->rd_dkg, (void *)arg, 1281 sizeof (struct dk_geom), mode); 1282 if (error) 1283 return (EFAULT); 1284 return (0); 1285 case DKIOCSTATE: 1286 /* the file is always there */ 1287 dkstate = DKIO_INSERTED; 1288 error = ddi_copyout(&dkstate, (void *)arg, 1289 sizeof (enum dkio_state), mode); 1290 if (error) 1291 return (EFAULT); 1292 return (0); 1293 default: 1294 return (ENOTTY); 1295 } 1296 } 1297 1298 1299 static struct cb_ops rd_cb_ops = { 1300 rd_open, 1301 rd_close, 1302 rd_strategy, 1303 nodev, 1304 nodev, /* dump */ 1305 rd_read, 1306 rd_write, 1307 rd_ioctl, 1308 nodev, /* devmap */ 1309 nodev, /* mmap */ 1310 nodev, /* segmap */ 1311 nochpoll, /* poll */ 1312 ddi_prop_op, 1313 NULL, 1314 D_NEW | D_MP 1315 }; 1316 1317 static struct dev_ops rd_ops = { 1318 DEVO_REV, 1319 0, 1320 rd_getinfo, 1321 nulldev, /* identify */ 1322 nulldev, /* probe */ 1323 rd_attach, 1324 rd_detach, 1325 nodev, /* reset */ 1326 &rd_cb_ops, 1327 (struct bus_ops *)0 1328 }; 1329 1330 1331 extern struct mod_ops mod_driverops; 1332 1333 static struct modldrv modldrv = { 1334 &mod_driverops, 1335 "ramdisk driver v%I%", 1336 &rd_ops 1337 }; 1338 1339 static struct modlinkage modlinkage = { 1340 MODREV_1, 1341 &modldrv, 1342 0 1343 }; 1344 1345 int 1346 _init(void) 1347 { 1348 int e; 1349 1350 if ((e = ddi_soft_state_init(&rd_statep, 1351 sizeof (rd_devstate_t), 0)) != 0) { 1352 return (e); 1353 } 1354 1355 mutex_init(&rd_lock, NULL, MUTEX_DRIVER, NULL); 1356 1357 if ((e = mod_install(&modlinkage)) != 0) { 1358 mutex_destroy(&rd_lock); 1359 ddi_soft_state_fini(&rd_statep); 1360 } 1361 1362 return (e); 1363 } 1364 1365 int 1366 _fini(void) 1367 { 1368 int e; 1369 1370 if ((e = mod_remove(&modlinkage)) != 0) { 1371 return (e); 1372 } 1373 1374 ddi_soft_state_fini(&rd_statep); 1375 mutex_destroy(&rd_lock); 1376 1377 return (e); 1378 } 1379 1380 int 1381 _info(struct modinfo *modinfop) 1382 { 1383 return (mod_info(&modlinkage, modinfop)); 1384 } 1385