1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/scsi/scsi_types.h> 28 #include <sys/modctl.h> 29 #include <sys/cmlb.h> 30 #include <sys/types.h> 31 #include <sys/xpv_support.h> 32 #include <sys/xendev.h> 33 #include <sys/gnttab.h> 34 #include <public/xen.h> 35 #include <public/grant_table.h> 36 #include <io/xdf.h> 37 #include <sys/vtoc.h> 38 #include <sys/dkio.h> 39 #include <sys/dktp/dadev.h> 40 #include <sys/dktp/dadkio.h> 41 #include <sys/dktp/tgdk.h> 42 #include <sys/dktp/bbh.h> 43 #include <sys/dktp/cmdk.h> 44 #include <sys/dktp/altsctr.h> 45 46 /* 47 * General Notes 48 * 49 * We don't support disks with bad block mappins. We have this 50 * limitation because the underlying xdf driver doesn't support 51 * bad block remapping. If there is a need to support this feature 52 * it should be added directly to the xdf driver and we should just 53 * pass requests strait on through and let it handle the remapping. 54 * Also, it's probably worth pointing out that most modern disks do bad 55 * block remapping internally in the hardware so there's actually less 56 * of a chance of us ever discovering bad blocks. Also, in most cases 57 * this driver (and the xdf driver) will only be used with virtualized 58 * devices, so one might wonder why a virtual device would ever actually 59 * experience bad blocks. To wrap this up, you might be wondering how 60 * these bad block mappings get created and how they are managed. Well, 61 * there are two tools for managing bad block mappings, format(1M) and 62 * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk 63 * to attempt to find bad block and create mappings for them. Format(1M) 64 * and addbadsec(1M) can also be used to edit existing mappings that may 65 * be saved on the disk. 66 * 67 * The underlying PV driver that this driver passes on requests to is the 68 * xdf driver. Since in most cases the xdf driver doesn't deal with 69 * physical disks it has it's own algorithm for assigning a physical 70 * geometry to a virtual disk (ie, cylinder count, head count, etc.) 71 * The default values chosen by the xdf driver may not match those 72 * assigned to a disk by a hardware disk emulator in an HVM environment. 73 * This is a problem since these physical geometry attributes affect 74 * things like the partition table, backup label location, etc. So 75 * to emulate disk devices correctly we need to know the physical geometry 76 * that was assigned to a disk at the time of it's initalization. 77 * Normally in an HVM environment this information will passed to 78 * the BIOS and operating system from the hardware emulator that is 79 * emulating the disk devices. In the case of a solaris dom0+xvm 80 * this would be qemu. So to work around this issue, this driver will 81 * query the emulated hardware to get the assigned physical geometry 82 * and then pass this geometry onto the xdf driver so that it can use it. 83 * But really, this information is essentially metadata about the disk 84 * that should be kept with the disk image itself. (Assuming or course 85 * that a disk image is the actual backingstore for this emulated device.) 86 * This metadata should also be made available to PV drivers via a common 87 * mechamisn, probably the xenstore. The fact that this metadata isn't 88 * available outside of HVM domains means that it's difficult to move 89 * disks between HVM and PV domains, since a fully PV domain will have no 90 * way of knowing what the correct geometry of the target device is. 91 * (Short of reading the disk, looking for things like partition tables 92 * and labels, and taking a best guess at what the geometry was when 93 * the disk was initialized. Unsuprisingly, qemu actually does this.) 94 * 95 * This driver has to map cmdk device instances into their corresponding 96 * xdf device instances. We have to do this to ensure that when a user 97 * accesses a emulated cmdk device we map those accesses to the proper 98 * paravirtualized device. Basically what we need to know is how multiple 99 * 'disk' entries in a domU configuration file get mapped to emulated 100 * cmdk devices and to xdf devices. The 'disk' entry to xdf instance 101 * mappings we know because those are done within the Solaris xvdi code 102 * and the xpvd nexus driver. But the config to emulated devices mappings 103 * are handled entirely within the xen management tool chain and the 104 * hardware emulator. Since all the tools that establish these mappings 105 * live in dom0, dom0 should really supply us with this information, 106 * probably via the xenstore. Unfortunatly it doesn't so, since there's 107 * no good way to determine this mapping dynamically, this driver uses 108 * a hard coded set of static mappings. These mappings are hardware 109 * emulator specific because each different hardware emulator could have 110 * a different device tree with different cmdk device paths. This 111 * means that if we want to continue to use this static mapping approach 112 * to allow Solaris to run on different hardware emulators we'll have 113 * to analyze each of those emulators to determine what paths they 114 * use and hard code those paths into this driver. yech. This metadata 115 * really needs to be supplied to us by dom0. 116 * 117 * This driver access underlying xdf nodes. Unfortunatly, devices 118 * must create minor nodes during attach, and for disk devices to create 119 * minor nodes, they have to look at the label on the disk, so this means 120 * that disk drivers must be able to access a disk contents during 121 * attach. That means that this disk driver must be able to access 122 * underlying xdf nodes during attach. Unfortunatly, due to device tree 123 * locking restrictions, we cannot have an attach operation occuring on 124 * this device and then attempt to access another device which may 125 * cause another attach to occur in a different device tree branch 126 * since this could result in deadlock. Hence, this driver can only 127 * access xdf device nodes that we know are attached, and it can't use 128 * any ddi interfaces to access those nodes if those interfaces could 129 * trigger an attach of the xdf device. So this driver works around 130 * these restrictions by talking directly to xdf devices via 131 * xdf_hvm_hold(). This interface takes a pathname to an xdf device, 132 * and if that device is already attached then it returns the a held dip 133 * pointer for that device node. This prevents us from getting into 134 * deadlock situations, but now we need a mechanism to ensure that all 135 * the xdf device nodes this driver might access are attached before 136 * this driver tries to access them. This is accomplished via the 137 * hvmboot_rootconf() callback which is invoked just before root is 138 * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure 139 * all xdf device visible to the system. All these xdf device nodes 140 * will also be marked with the "ddi-no-autodetach" property so that 141 * once they are configured, the will not be automatically unconfigured. 142 * The only way that they could be unconfigured is if the administrator 143 * explicitly attempts to unload required modules via rem_drv(1M) 144 * or modunload(1M). 145 */ 146 147 /* 148 * 16 paritions + fdisk (see xdf.h) 149 */ 150 #define XDF_DEV2UNIT(dev) XDF_INST((getminor((dev)))) 151 #define XDF_DEV2PART(dev) XDF_PART((getminor((dev)))) 152 153 #define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \ 154 (otyp == OTYP_CHR) || \ 155 (otyp == OTYP_LYR)) 156 157 #define PV_CMDK_NODES 4 158 159 typedef struct hvm_to_pv { 160 char *h2p_hvm_path; 161 char *h2p_pv_path; 162 } hvm_to_pv_t; 163 164 /* 165 */ 166 static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = { 167 /* 168 * The paths mapping here are very specific to xen and qemu. When a 169 * domU is booted under xen in HVM mode, qemu is normally used to 170 * emulate up to four ide disks. These disks always have the four 171 * path listed below. To configure an emulated ide device, the 172 * xen domain configuration file normally has an entry that looks 173 * like this: 174 * disk = [ 'file:/foo.img,hda,w' ] 175 * 176 * The part we're interested in is the 'hda', which we'll call the 177 * xen disk device name here. The xen management tools (which parse 178 * the xen domain configuration file and launch qemu) makes the 179 * following assumptions about this value: 180 * hda == emulated ide disk 0 (ide bus 0, master) 181 * hdb == emulated ide disk 1 (ide bus 0, slave) 182 * hdc == emulated ide disk 2 (ide bus 1, master) 183 * hdd == emulated ide disk 3 (ide bus 1, slave) 184 * 185 * (Uncoincidentally, these xen disk device names actually map to 186 * the /dev filesystem names of ide disk devices in Linux. So in 187 * Linux /dev/hda is the first ide disk.) So for the first part of 188 * our mapping we've just hardcoded the cmdk paths that we know 189 * qemu will use. 190 * 191 * To understand the second half of the mapping (ie, the xdf device 192 * that each emulated cmdk device should be mapped two) we need to 193 * know the solaris device node address that will be assigned to 194 * each xdf device. (The device node address is the decimal 195 * number that comes after the "xdf@" in the device path.) 196 * 197 * So the question becomes, how do we know what the xenstore device 198 * id for emulated disk will be? Well, it turns out that since the 199 * xen management tools expect the disk device names to be Linux 200 * device names, those same management tools assign each disk a 201 * device id that matches the dev_t of the corresponding device 202 * under Linux. (Big shocker.) This xen device name-to-id mapping 203 * is currently all hard coded here: 204 * xen.hg/tools/python/xen/util/blkif.py`blkdev_name_to_number() 205 * 206 * So looking at the code above we can see the following xen disk 207 * device name to xenstore device id mappings: 208 * 'hda' == 0t768 == ((3 * 256) + (0 * 64)) 209 * 'hdb' == 0t832 == ((3 * 256) + (1 * 64)) 210 * 'hdc' == 0t5632 == ((22 * 256) + (0 * 64)) 211 * 'hdd' == 0t5696 == ((22 * 256) + (1 * 64)) 212 */ 213 { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@0,0", "/xpvd/xdf@768" }, 214 { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@1,0", "/xpvd/xdf@832" }, 215 { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@0,0", "/xpvd/xdf@5632" }, 216 { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@1,0", "/xpvd/xdf@5696" }, 217 { NULL, 0 } 218 }; 219 220 typedef struct pv_cmdk { 221 dev_info_t *dk_dip; 222 cmlb_handle_t dk_cmlbhandle; 223 ddi_devid_t dk_devid; 224 kmutex_t dk_mutex; 225 dev_info_t *dk_xdf_dip; 226 dev_t dk_xdf_dev; 227 int dk_xdf_otyp_count[OTYPCNT][XDF_PEXT]; 228 ldi_handle_t dk_xdf_lh[XDF_PEXT]; 229 } pv_cmdk_t; 230 231 /* 232 * Globals 233 */ 234 static void *pv_cmdk_state; 235 static major_t pv_cmdk_major; 236 static hvm_to_pv_t *pv_cmdk_h2p; 237 238 /* 239 * Function prototypes for xdf callback functions 240 */ 241 extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 242 extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 243 void *); 244 245 static boolean_t 246 pv_cmdk_isopen_part(struct pv_cmdk *dkp, int part) 247 { 248 int otyp; 249 250 ASSERT(MUTEX_HELD(&dkp->dk_mutex)); 251 252 for (otyp = 0; (otyp < OTYPCNT); otyp++) { 253 if (dkp->dk_xdf_otyp_count[otyp][part] != 0) 254 return (B_TRUE); 255 } 256 return (B_FALSE); 257 } 258 259 /* 260 * Cmlb ops vectors, allows the cmlb module to directly access the entire 261 * pv_cmdk disk device without going through any partitioning layers. 262 */ 263 /*ARGSUSED*/ 264 static int 265 pv_cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, 266 diskaddr_t start, size_t count, void *tg_cookie) 267 { 268 int instance = ddi_get_instance(dip); 269 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 270 271 if (dkp == NULL) 272 return (ENXIO); 273 274 return (xdf_lb_rdwr(dkp->dk_xdf_dip, cmd, bufaddr, start, count, 275 tg_cookie)); 276 } 277 278 /*ARGSUSED*/ 279 static int 280 pv_cmdk_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 281 { 282 int instance = ddi_get_instance(dip); 283 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 284 int err; 285 286 if (dkp == NULL) 287 return (ENXIO); 288 289 if (cmd == TG_GETVIRTGEOM) { 290 cmlb_geom_t pgeom, *vgeomp; 291 diskaddr_t capacity; 292 293 /* 294 * The native xdf driver doesn't support this ioctl. 295 * Intead of passing it on, emulate it here so that the 296 * results look the same as what we get for a real cmdk 297 * device. 298 * 299 * Get the real size of the device 300 */ 301 if ((err = xdf_lb_getinfo(dkp->dk_xdf_dip, 302 TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0) 303 return (err); 304 capacity = pgeom.g_capacity; 305 306 /* 307 * If the controller returned us something that doesn't 308 * really fit into an Int 13/function 8 geometry 309 * result, just fail the ioctl. See PSARC 1998/313. 310 */ 311 if (capacity >= (63 * 254 * 1024)) 312 return (EINVAL); 313 314 vgeomp = (cmlb_geom_t *)arg; 315 vgeomp->g_capacity = capacity; 316 vgeomp->g_nsect = 63; 317 vgeomp->g_nhead = 254; 318 vgeomp->g_ncyl = capacity / (63 * 254); 319 vgeomp->g_acyl = 0; 320 vgeomp->g_secsize = 512; 321 vgeomp->g_intrlv = 1; 322 vgeomp->g_rpm = 3600; 323 return (0); 324 } 325 326 return (xdf_lb_getinfo(dkp->dk_xdf_dip, cmd, arg, tg_cookie)); 327 } 328 329 static cmlb_tg_ops_t pv_cmdk_lb_ops = { 330 TG_DK_OPS_VERSION_1, 331 pv_cmdk_lb_rdwr, 332 pv_cmdk_lb_getinfo 333 }; 334 335 /* 336 * devid management functions 337 */ 338 339 /* 340 * pv_cmdk_get_modser() is basically a local copy of 341 * cmdk_get_modser() modified to work without the dadk layer. 342 * (which the non-pv version of the cmdk driver uses.) 343 */ 344 static int 345 pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len) 346 { 347 struct scsi_device *scsi_device; 348 opaque_t ctlobjp; 349 dadk_ioc_string_t strarg; 350 char *s; 351 char ch; 352 boolean_t ret; 353 int i; 354 int tb; 355 356 strarg.is_buf = buf; 357 strarg.is_size = len; 358 scsi_device = ddi_get_driver_private(dkp->dk_dip); 359 ctlobjp = scsi_device->sd_address.a_hba_tran; 360 if (CTL_IOCTL(ctlobjp, 361 ioccmd, (uintptr_t)&strarg, FNATIVE | FKIOCTL) != 0) 362 return (0); 363 364 /* 365 * valid model/serial string must contain a non-zero non-space 366 * trim trailing spaces/NULL 367 */ 368 ret = B_FALSE; 369 s = buf; 370 for (i = 0; i < strarg.is_size; i++) { 371 ch = *s++; 372 if (ch != ' ' && ch != '\0') 373 tb = i + 1; 374 if (ch != ' ' && ch != '\0' && ch != '0') 375 ret = B_TRUE; 376 } 377 378 if (ret == B_FALSE) 379 return (0); 380 381 return (tb); 382 } 383 384 /* 385 * pv_cmdk_devid_modser() is basically a copy of cmdk_devid_modser() 386 * that has been modified to use local pv cmdk driver functions. 387 * 388 * Build a devid from the model and serial number 389 * Return DDI_SUCCESS or DDI_FAILURE. 390 */ 391 static int 392 pv_cmdk_devid_modser(struct pv_cmdk *dkp) 393 { 394 int rc = DDI_FAILURE; 395 char *hwid; 396 int modlen; 397 int serlen; 398 399 /* 400 * device ID is a concatenation of model number, '=', serial number. 401 */ 402 hwid = kmem_alloc(CMDK_HWIDLEN, KM_SLEEP); 403 modlen = pv_cmdk_get_modser(dkp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN); 404 if (modlen == 0) 405 goto err; 406 407 hwid[modlen++] = '='; 408 serlen = pv_cmdk_get_modser(dkp, DIOCTL_GETSERIAL, 409 hwid + modlen, CMDK_HWIDLEN - modlen); 410 if (serlen == 0) 411 goto err; 412 413 hwid[modlen + serlen] = 0; 414 415 /* Initialize the device ID, trailing NULL not included */ 416 rc = ddi_devid_init(dkp->dk_dip, DEVID_ATA_SERIAL, modlen + serlen, 417 hwid, (ddi_devid_t *)&dkp->dk_devid); 418 if (rc != DDI_SUCCESS) 419 goto err; 420 421 kmem_free(hwid, CMDK_HWIDLEN); 422 return (DDI_SUCCESS); 423 424 err: 425 kmem_free(hwid, CMDK_HWIDLEN); 426 return (DDI_FAILURE); 427 } 428 429 /* 430 * pv_cmdk_devid_read() is basically a local copy of 431 * cmdk_devid_read() modified to work without the dadk layer. 432 * (which the non-pv version of the cmdk driver uses.) 433 * 434 * Read a devid from on the first block of the last track of 435 * the last cylinder. Make sure what we read is a valid devid. 436 * Return DDI_SUCCESS or DDI_FAILURE. 437 */ 438 static int 439 pv_cmdk_devid_read(struct pv_cmdk *dkp) 440 { 441 diskaddr_t blk; 442 struct dk_devid *dkdevidp; 443 uint_t *ip, chksum; 444 int i; 445 446 if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) 447 return (DDI_FAILURE); 448 449 dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP); 450 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 451 TG_READ, dkdevidp, blk, NBPSCTR, NULL) != 0) 452 goto err; 453 454 /* Validate the revision */ 455 if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) || 456 (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB)) 457 goto err; 458 459 /* Calculate the checksum */ 460 chksum = 0; 461 ip = (uint_t *)dkdevidp; 462 for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++) 463 chksum ^= ip[i]; 464 if (DKD_GETCHKSUM(dkdevidp) != chksum) 465 goto err; 466 467 /* Validate the device id */ 468 if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS) 469 goto err; 470 471 /* keep a copy of the device id */ 472 i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid); 473 dkp->dk_devid = kmem_alloc(i, KM_SLEEP); 474 bcopy(dkdevidp->dkd_devid, dkp->dk_devid, i); 475 kmem_free(dkdevidp, NBPSCTR); 476 return (DDI_SUCCESS); 477 478 err: 479 kmem_free(dkdevidp, NBPSCTR); 480 return (DDI_FAILURE); 481 } 482 483 /* 484 * pv_cmdk_devid_fabricate() is basically a local copy of 485 * cmdk_devid_fabricate() modified to work without the dadk layer. 486 * (which the non-pv version of the cmdk driver uses.) 487 * 488 * Create a devid and write it on the first block of the last track of 489 * the last cylinder. 490 * Return DDI_SUCCESS or DDI_FAILURE. 491 */ 492 static int 493 pv_cmdk_devid_fabricate(struct pv_cmdk *dkp) 494 { 495 ddi_devid_t devid = NULL; /* devid made by ddi_devid_init */ 496 struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */ 497 diskaddr_t blk; 498 uint_t *ip, chksum; 499 int i; 500 501 if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) 502 return (DDI_FAILURE); 503 504 if (ddi_devid_init(dkp->dk_dip, DEVID_FAB, 0, NULL, &devid) != 505 DDI_SUCCESS) 506 return (DDI_FAILURE); 507 508 /* allocate a buffer */ 509 dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP); 510 511 /* Fill in the revision */ 512 dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB; 513 dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB; 514 515 /* Copy in the device id */ 516 i = ddi_devid_sizeof(devid); 517 if (i > DK_DEVID_SIZE) 518 goto err; 519 bcopy(devid, dkdevidp->dkd_devid, i); 520 521 /* Calculate the chksum */ 522 chksum = 0; 523 ip = (uint_t *)dkdevidp; 524 for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++) 525 chksum ^= ip[i]; 526 527 /* Fill in the checksum */ 528 DKD_FORMCHKSUM(chksum, dkdevidp); 529 530 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 531 TG_WRITE, dkdevidp, blk, NBPSCTR, NULL) != 0) 532 goto err; 533 534 kmem_free(dkdevidp, NBPSCTR); 535 536 dkp->dk_devid = devid; 537 return (DDI_SUCCESS); 538 539 err: 540 if (dkdevidp != NULL) 541 kmem_free(dkdevidp, NBPSCTR); 542 if (devid != NULL) 543 ddi_devid_free(devid); 544 return (DDI_FAILURE); 545 } 546 547 /* 548 * pv_cmdk_devid_setup() is basically a local copy ofcmdk_devid_setup() 549 * that has been modified to use local pv cmdk driver functions. 550 * 551 * Create and register the devid. 552 * There are 4 different ways we can get a device id: 553 * 1. Already have one - nothing to do 554 * 2. Build one from the drive's model and serial numbers 555 * 3. Read one from the disk (first sector of last track) 556 * 4. Fabricate one and write it on the disk. 557 * If any of these succeeds, register the deviceid 558 */ 559 static void 560 pv_cmdk_devid_setup(struct pv_cmdk *dkp) 561 { 562 int rc; 563 564 /* Try options until one succeeds, or all have failed */ 565 566 /* 1. All done if already registered */ 567 568 if (dkp->dk_devid != NULL) 569 return; 570 571 /* 2. Build a devid from the model and serial number */ 572 rc = pv_cmdk_devid_modser(dkp); 573 if (rc != DDI_SUCCESS) { 574 /* 3. Read devid from the disk, if present */ 575 rc = pv_cmdk_devid_read(dkp); 576 577 /* 4. otherwise make one up and write it on the disk */ 578 if (rc != DDI_SUCCESS) 579 rc = pv_cmdk_devid_fabricate(dkp); 580 } 581 582 /* If we managed to get a devid any of the above ways, register it */ 583 if (rc == DDI_SUCCESS) 584 (void) ddi_devid_register(dkp->dk_dip, dkp->dk_devid); 585 } 586 587 /* 588 * Local Functions 589 */ 590 static int 591 pv_cmdk_iodone(struct buf *bp) 592 { 593 struct buf *bp_orig = bp->b_chain; 594 595 /* Propegate back the io results */ 596 bp_orig->b_resid = bp->b_resid; 597 bioerror(bp_orig, geterror(bp)); 598 biodone(bp_orig); 599 600 freerbuf(bp); 601 return (0); 602 } 603 604 static int 605 pv_cmdkstrategy(struct buf *bp) 606 { 607 dev_t dev = bp->b_edev; 608 int instance = XDF_DEV2UNIT(dev); 609 int part = XDF_DEV2PART(dev); 610 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 611 dev_t xdf_devt; 612 struct buf *bp_clone; 613 614 /* 615 * Sanity checks that the dev_t associated with the buf we were 616 * passed actually corresponds us and that the partition we're 617 * trying to access is actually open. On debug kernels we'll 618 * panic and on non-debug kernels we'll return failure. 619 */ 620 ASSERT(getmajor(dev) == pv_cmdk_major); 621 if (getmajor(dev) != pv_cmdk_major) 622 goto err; 623 624 mutex_enter(&dkp->dk_mutex); 625 ASSERT(pv_cmdk_isopen_part(dkp, part)); 626 if (!pv_cmdk_isopen_part(dkp, part)) { 627 mutex_exit(&dkp->dk_mutex); 628 goto err; 629 } 630 mutex_exit(&dkp->dk_mutex); 631 632 /* clone this buffer */ 633 xdf_devt = dkp->dk_xdf_dev | part; 634 bp_clone = bioclone(bp, 0, bp->b_bcount, xdf_devt, bp->b_blkno, 635 pv_cmdk_iodone, NULL, KM_SLEEP); 636 bp_clone->b_chain = bp; 637 638 /* 639 * If we're being invoked on behalf of the physio() call in 640 * pv_cmdk_dioctl_rwcmd() then b_private will be set to 641 * XB_SLICE_NONE and we need to propegate this flag into the 642 * cloned buffer so that the xdf driver will see it. 643 */ 644 if (bp->b_private == (void *)XB_SLICE_NONE) 645 bp_clone->b_private = (void *)XB_SLICE_NONE; 646 647 /* 648 * Pass on the cloned buffer. Note that we don't bother to check 649 * for failure because the xdf strategy routine will have to 650 * invoke biodone() if it wants to return an error, which means 651 * that the pv_cmdk_iodone() callback will get invoked and it 652 * will propegate the error back up the stack and free the cloned 653 * buffer. 654 */ 655 ASSERT(dkp->dk_xdf_lh[part] != NULL); 656 return (ldi_strategy(dkp->dk_xdf_lh[part], bp_clone)); 657 658 err: 659 bioerror(bp, ENXIO); 660 bp->b_resid = bp->b_bcount; 661 biodone(bp); 662 return (0); 663 } 664 665 /*ARGSUSED*/ 666 static int 667 pv_cmdkread(dev_t dev, struct uio *uio, cred_t *credp) 668 { 669 int instance = XDF_DEV2UNIT(dev); 670 int part = XDF_DEV2PART(dev); 671 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 672 673 return (ldi_read(dkp->dk_xdf_lh[part], uio, credp)); 674 } 675 676 /*ARGSUSED*/ 677 static int 678 pv_cmdkwrite(dev_t dev, struct uio *uio, cred_t *credp) 679 { 680 int instance = XDF_DEV2UNIT(dev); 681 int part = XDF_DEV2PART(dev); 682 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 683 684 return (ldi_write(dkp->dk_xdf_lh[part], uio, credp)); 685 } 686 687 /*ARGSUSED*/ 688 static int 689 pv_cmdkaread(dev_t dev, struct aio_req *aio, cred_t *credp) 690 { 691 int instance = XDF_DEV2UNIT(dev); 692 int part = XDF_DEV2PART(dev); 693 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 694 return (ldi_aread(dkp->dk_xdf_lh[part], aio, credp)); 695 } 696 697 /*ARGSUSED*/ 698 static int 699 pv_cmdkawrite(dev_t dev, struct aio_req *aio, cred_t *credp) 700 { 701 int instance = XDF_DEV2UNIT(dev); 702 int part = XDF_DEV2PART(dev); 703 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 704 return (ldi_awrite(dkp->dk_xdf_lh[part], aio, credp)); 705 } 706 707 static int 708 pv_cmdkdump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 709 { 710 int instance = XDF_DEV2UNIT(dev); 711 int part = XDF_DEV2PART(dev); 712 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 713 714 return (ldi_dump(dkp->dk_xdf_lh[part], addr, blkno, nblk)); 715 } 716 717 /* 718 * pv_rwcmd_copyin() is a duplicate of rwcmd_copyin(). 719 */ 720 static int 721 pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag) 722 { 723 switch (ddi_model_convert_from(flag)) { 724 case DDI_MODEL_ILP32: { 725 struct dadkio_rwcmd32 cmd32; 726 727 if (ddi_copyin(inaddr, &cmd32, 728 sizeof (struct dadkio_rwcmd32), flag)) { 729 return (EFAULT); 730 } 731 732 rwcmdp->cmd = cmd32.cmd; 733 rwcmdp->flags = cmd32.flags; 734 rwcmdp->blkaddr = (blkaddr_t)cmd32.blkaddr; 735 rwcmdp->buflen = cmd32.buflen; 736 rwcmdp->bufaddr = (caddr_t)(intptr_t)cmd32.bufaddr; 737 /* 738 * Note: we do not convert the 'status' field, 739 * as it should not contain valid data at this 740 * point. 741 */ 742 bzero(&rwcmdp->status, sizeof (rwcmdp->status)); 743 break; 744 } 745 case DDI_MODEL_NONE: { 746 if (ddi_copyin(inaddr, rwcmdp, 747 sizeof (struct dadkio_rwcmd), flag)) { 748 return (EFAULT); 749 } 750 } 751 } 752 return (0); 753 } 754 755 /* 756 * pv_rwcmd_copyout() is a duplicate of rwcmd_copyout(). 757 */ 758 static int 759 pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag) 760 { 761 switch (ddi_model_convert_from(flag)) { 762 case DDI_MODEL_ILP32: { 763 struct dadkio_rwcmd32 cmd32; 764 765 cmd32.cmd = rwcmdp->cmd; 766 cmd32.flags = rwcmdp->flags; 767 cmd32.blkaddr = rwcmdp->blkaddr; 768 cmd32.buflen = rwcmdp->buflen; 769 ASSERT64(((uintptr_t)rwcmdp->bufaddr >> 32) == 0); 770 cmd32.bufaddr = (caddr32_t)(uintptr_t)rwcmdp->bufaddr; 771 772 cmd32.status.status = rwcmdp->status.status; 773 cmd32.status.resid = rwcmdp->status.resid; 774 cmd32.status.failed_blk_is_valid = 775 rwcmdp->status.failed_blk_is_valid; 776 cmd32.status.failed_blk = rwcmdp->status.failed_blk; 777 cmd32.status.fru_code_is_valid = 778 rwcmdp->status.fru_code_is_valid; 779 cmd32.status.fru_code = rwcmdp->status.fru_code; 780 781 bcopy(rwcmdp->status.add_error_info, 782 cmd32.status.add_error_info, DADKIO_ERROR_INFO_LEN); 783 784 if (ddi_copyout(&cmd32, outaddr, 785 sizeof (struct dadkio_rwcmd32), flag)) 786 return (EFAULT); 787 break; 788 } 789 case DDI_MODEL_NONE: { 790 if (ddi_copyout(rwcmdp, outaddr, 791 sizeof (struct dadkio_rwcmd), flag)) 792 return (EFAULT); 793 } 794 } 795 return (0); 796 } 797 798 static void 799 pv_cmdkmin(struct buf *bp) 800 { 801 if (bp->b_bcount > DK_MAXRECSIZE) 802 bp->b_bcount = DK_MAXRECSIZE; 803 } 804 805 static int 806 pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag) 807 { 808 struct dadkio_rwcmd *rwcmdp; 809 struct iovec aiov; 810 struct uio auio; 811 struct buf *bp; 812 int rw, status; 813 814 rwcmdp = kmem_alloc(sizeof (struct dadkio_rwcmd), KM_SLEEP); 815 status = pv_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag); 816 817 if (status != 0) 818 goto out; 819 820 switch (rwcmdp->cmd) { 821 case DADKIO_RWCMD_READ: 822 case DADKIO_RWCMD_WRITE: 823 break; 824 default: 825 status = EINVAL; 826 goto out; 827 } 828 829 bzero((caddr_t)&aiov, sizeof (struct iovec)); 830 aiov.iov_base = rwcmdp->bufaddr; 831 aiov.iov_len = rwcmdp->buflen; 832 833 bzero((caddr_t)&auio, sizeof (struct uio)); 834 auio.uio_iov = &aiov; 835 auio.uio_iovcnt = 1; 836 auio.uio_loffset = (offset_t)rwcmdp->blkaddr * (offset_t)XB_BSIZE; 837 auio.uio_resid = rwcmdp->buflen; 838 auio.uio_segflg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE; 839 840 /* 841 * Tell the xdf driver that this I/O request is using an absolute 842 * offset. 843 */ 844 bp = getrbuf(KM_SLEEP); 845 bp->b_private = (void *)XB_SLICE_NONE; 846 847 rw = ((rwcmdp->cmd == DADKIO_RWCMD_WRITE) ? B_WRITE : B_READ); 848 status = physio(pv_cmdkstrategy, bp, dev, rw, pv_cmdkmin, &auio); 849 850 biofini(bp); 851 kmem_free(bp, sizeof (buf_t)); 852 853 if (status == 0) 854 status = pv_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag); 855 856 out: 857 kmem_free(rwcmdp, sizeof (struct dadkio_rwcmd)); 858 return (status); 859 } 860 861 static int 862 pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 863 int *rvalp) 864 { 865 int instance = XDF_DEV2UNIT(dev); 866 int part = XDF_DEV2PART(dev); 867 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 868 int err; 869 870 switch (cmd) { 871 default: 872 return (ldi_ioctl(dkp->dk_xdf_lh[part], 873 cmd, arg, flag, credp, rvalp)); 874 case DKIOCGETWCE: 875 case DKIOCSETWCE: 876 return (EIO); 877 case DKIOCADDBAD: { 878 /* 879 * This is for ata/ide bad block handling. It is supposed 880 * to cause the driver to re-read the bad block list and 881 * alternate map after it has been updated. Our driver 882 * will refuse to attach to any disk which has a bad blocks 883 * list defined, so there really isn't much to do here. 884 */ 885 return (0); 886 } 887 case DKIOCGETDEF: { 888 /* 889 * I can't actually find any code that utilizes this ioctl, 890 * hence we're leaving it explicitly unimplemented. 891 */ 892 ASSERT("ioctl cmd unsupported by pv_cmdk: DKIOCGETDEF"); 893 return (EIO); 894 } 895 case DIOCTL_RWCMD: { 896 /* 897 * This just seems to just be an alternate interface for 898 * reading and writing the disk. Great, another way to 899 * do the same thing... 900 */ 901 return (pv_cmdk_dioctl_rwcmd(dev, arg, flag)); 902 } 903 case DKIOCINFO: { 904 dev_info_t *dip = dkp->dk_dip; 905 struct dk_cinfo info; 906 907 /* Pass on the ioctl request, save the response */ 908 if ((err = ldi_ioctl(dkp->dk_xdf_lh[part], 909 cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0) 910 return (err); 911 912 /* Update controller info */ 913 info.dki_cnum = ddi_get_instance(ddi_get_parent(dip)); 914 (void) strlcpy(info.dki_cname, 915 ddi_get_name(ddi_get_parent(dip)), sizeof (info.dki_cname)); 916 917 /* Update unit info. */ 918 if (info.dki_ctype == DKC_VBD) 919 info.dki_ctype = DKC_DIRECT; 920 info.dki_unit = instance; 921 (void) strlcpy(info.dki_dname, 922 ddi_driver_name(dip), sizeof (info.dki_dname)); 923 info.dki_addr = 1; 924 925 if (ddi_copyout(&info, (void *)arg, sizeof (info), flag)) 926 return (EFAULT); 927 return (0); 928 } 929 } /* switch (cmd) */ 930 /*NOTREACHED*/ 931 } 932 933 /*ARGSUSED*/ 934 static int 935 pv_cmdkopen(dev_t *dev_p, int flag, int otyp, cred_t *credp) 936 { 937 ldi_ident_t li; 938 dev_t dev = *dev_p; 939 int instance = XDF_DEV2UNIT(dev); 940 int part = XDF_DEV2PART(dev); 941 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 942 dev_t xdf_devt = dkp->dk_xdf_dev | part; 943 int err = 0; 944 945 if ((otyp < 0) || (otyp >= OTYPCNT)) 946 return (EINVAL); 947 948 /* allocate an ldi handle */ 949 VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0); 950 951 mutex_enter(&dkp->dk_mutex); 952 953 /* 954 * We translate all device opens (chr, blk, and lyr) into 955 * block device opens. Why? Because for all the opens that 956 * come through this driver, we only keep around one LDI handle. 957 * So that handle can only be of one open type. The reason 958 * that we choose the block interface for this is that to use 959 * the block interfaces for a device the system needs to allocatex 960 * buf_ts, which are associated with system memory which can act 961 * as a cache for device data. So normally when a block device 962 * is closed the system will ensure that all these pages get 963 * flushed out of memory. But if we were to open the device 964 * as a character device, then when we went to close the underlying 965 * device (even if we had invoked the block interfaces) any data 966 * remaining in memory wouldn't necessairly be flushed out 967 * before the device was closed. 968 */ 969 if (dkp->dk_xdf_lh[part] == NULL) { 970 ASSERT(!pv_cmdk_isopen_part(dkp, part)); 971 972 err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, 973 &dkp->dk_xdf_lh[part], li); 974 975 if (err != 0) { 976 mutex_exit(&dkp->dk_mutex); 977 ldi_ident_release(li); 978 return (err); 979 } 980 981 /* Disk devices really shouldn't clone */ 982 ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); 983 } else { 984 ldi_handle_t lh_tmp; 985 986 ASSERT(pv_cmdk_isopen_part(dkp, part)); 987 988 /* do ldi open/close to get flags and cred check */ 989 err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, 990 &lh_tmp, li); 991 if (err != 0) { 992 mutex_exit(&dkp->dk_mutex); 993 ldi_ident_release(li); 994 return (err); 995 } 996 997 /* Disk devices really shouldn't clone */ 998 ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); 999 (void) ldi_close(lh_tmp, flag, credp); 1000 } 1001 ldi_ident_release(li); 1002 1003 dkp->dk_xdf_otyp_count[otyp][part]++; 1004 1005 mutex_exit(&dkp->dk_mutex); 1006 return (0); 1007 } 1008 1009 /*ARGSUSED*/ 1010 static int 1011 pv_cmdkclose(dev_t dev, int flag, int otyp, cred_t *credp) 1012 { 1013 int instance = XDF_DEV2UNIT(dev); 1014 int part = XDF_DEV2PART(dev); 1015 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1016 int err = 0; 1017 1018 ASSERT((otyp >= 0) && otyp < OTYPCNT); 1019 1020 /* 1021 * Sanity check that that the dev_t specified corresponds to this 1022 * driver and that the device is actually open. On debug kernels we'll 1023 * panic and on non-debug kernels we'll return failure. 1024 */ 1025 ASSERT(getmajor(dev) == pv_cmdk_major); 1026 if (getmajor(dev) != pv_cmdk_major) 1027 return (ENXIO); 1028 1029 mutex_enter(&dkp->dk_mutex); 1030 ASSERT(pv_cmdk_isopen_part(dkp, part)); 1031 if (!pv_cmdk_isopen_part(dkp, part)) { 1032 mutex_exit(&dkp->dk_mutex); 1033 return (ENXIO); 1034 } 1035 1036 ASSERT(dkp->dk_xdf_lh[part] != NULL); 1037 ASSERT(dkp->dk_xdf_otyp_count[otyp][part] > 0); 1038 if (otyp == OTYP_LYR) { 1039 dkp->dk_xdf_otyp_count[otyp][part]--; 1040 } else { 1041 dkp->dk_xdf_otyp_count[otyp][part] = 0; 1042 } 1043 1044 if (!pv_cmdk_isopen_part(dkp, part)) { 1045 err = ldi_close(dkp->dk_xdf_lh[part], flag, credp); 1046 dkp->dk_xdf_lh[part] = NULL; 1047 } 1048 1049 mutex_exit(&dkp->dk_mutex); 1050 1051 return (err); 1052 } 1053 1054 static int 1055 pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom) 1056 { 1057 struct scsi_device *scsi_device; 1058 struct tgdk_geom tgdk_geom; 1059 opaque_t ctlobjp; 1060 int err; 1061 1062 scsi_device = ddi_get_driver_private(dip); 1063 ctlobjp = scsi_device->sd_address.a_hba_tran; 1064 if ((err = CTL_IOCTL(ctlobjp, 1065 DIOCTL_GETPHYGEOM, (uintptr_t)&tgdk_geom, FKIOCTL)) != 0) 1066 return (err); 1067 1068 /* This driver won't work if this isn't true */ 1069 ASSERT(tgdk_geom.g_secsiz == XB_BSIZE); 1070 1071 pgeom->g_ncyl = tgdk_geom.g_cyl; 1072 pgeom->g_acyl = tgdk_geom.g_acyl; 1073 pgeom->g_nhead = tgdk_geom.g_head; 1074 pgeom->g_nsect = tgdk_geom.g_sec; 1075 pgeom->g_secsize = tgdk_geom.g_secsiz; 1076 pgeom->g_capacity = tgdk_geom.g_cap; 1077 pgeom->g_intrlv = 1; 1078 pgeom->g_rpm = 3600; 1079 return (0); 1080 } 1081 1082 /* 1083 * pv_cmdk_bb_check() checks for the existance of bad blocks mappings in 1084 * the alternate partition/slice. Returns B_FALSE is there are no bad 1085 * block mappins found, and B_TRUE is there are bad block mappins found. 1086 */ 1087 static boolean_t 1088 pv_cmdk_bb_check(struct pv_cmdk *dkp) 1089 { 1090 struct alts_parttbl *ap; 1091 diskaddr_t nblocks, blk; 1092 uint32_t altused, altbase, altlast; 1093 uint16_t vtoctag; 1094 int alts; 1095 1096 /* find slice with V_ALTSCTR tag */ 1097 for (alts = 0; alts < NDKMAP; alts++) { 1098 1099 if (cmlb_partinfo(dkp->dk_cmlbhandle, alts, 1100 &nblocks, &blk, NULL, &vtoctag, 0) != 0) { 1101 /* no partition table exists */ 1102 return (B_FALSE); 1103 } 1104 1105 if ((vtoctag == V_ALTSCTR) && (nblocks > 1)) 1106 break; 1107 } 1108 if (alts >= NDKMAP) 1109 return (B_FALSE); /* no V_ALTSCTR slice defined */ 1110 1111 /* read in ALTS label block */ 1112 ap = (struct alts_parttbl *)kmem_zalloc(NBPSCTR, KM_SLEEP); 1113 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 1114 TG_READ, ap, blk, NBPSCTR, NULL) != 0) 1115 goto err; 1116 1117 altused = ap->alts_ent_used; /* number of BB entries */ 1118 altbase = ap->alts_ent_base; /* blk offset from begin slice */ 1119 altlast = ap->alts_ent_end; /* blk offset to last block */ 1120 1121 if ((altused == 0) || (altbase < 1) || 1122 (altbase > altlast) || (altlast >= nblocks)) 1123 goto err; 1124 1125 /* we found bad block mappins */ 1126 kmem_free(ap, NBPSCTR); 1127 return (B_TRUE); 1128 1129 err: 1130 kmem_free(ap, NBPSCTR); 1131 return (B_FALSE); 1132 } 1133 1134 /* 1135 * Autoconfiguration Routines 1136 */ 1137 static int 1138 pv_cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1139 { 1140 int instance = ddi_get_instance(dip); 1141 dev_info_t *xdf_dip = NULL; 1142 struct pv_cmdk *dkp; 1143 cmlb_geom_t pgeom; 1144 char *path; 1145 int i; 1146 1147 if (cmd != DDI_ATTACH) 1148 return (DDI_FAILURE); 1149 1150 /* 1151 * This cmdk device layers on top of an xdf device. So the first 1152 * thing we need to do is determine which xdf device instance this 1153 * cmdk instance should be layered on top of. 1154 */ 1155 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1156 (void) ddi_pathname(dip, path); 1157 for (i = 0; pv_cmdk_h2p[i].h2p_hvm_path != NULL; i++) { 1158 if (strcmp(pv_cmdk_h2p[i].h2p_hvm_path, path) == 0) 1159 break; 1160 } 1161 kmem_free(path, MAXPATHLEN); 1162 1163 if (pv_cmdk_h2p[i].h2p_hvm_path == NULL) { 1164 /* 1165 * UhOh. We don't know what xdf instance this cmdk device 1166 * should be mapped to. 1167 */ 1168 return (DDI_FAILURE); 1169 } 1170 1171 /* Check if this device exists */ 1172 xdf_dip = xdf_hvm_hold(pv_cmdk_h2p[i].h2p_pv_path); 1173 if (xdf_dip == NULL) 1174 return (DDI_FAILURE); 1175 1176 /* allocate and initialize our state structure */ 1177 (void) ddi_soft_state_zalloc(pv_cmdk_state, instance); 1178 dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1179 mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL); 1180 dkp->dk_dip = dip; 1181 dkp->dk_xdf_dip = xdf_dip; 1182 dkp->dk_xdf_dev = makedevice(ddi_driver_major(xdf_dip), 1183 XDF_MINOR(ddi_get_instance(xdf_dip), 0)); 1184 1185 ASSERT((dkp->dk_xdf_dev & XDF_PMASK) == 0); 1186 1187 /* 1188 * GROSS HACK ALERT! GROSS HACK ALERT! 1189 * 1190 * Before we can initialize the cmlb layer, we have to tell the 1191 * underlying xdf device what it's physical geometry should be. 1192 * See the block comments at the top of this file for more info. 1193 */ 1194 if ((pv_cmdk_getpgeom(dip, &pgeom) != 0) || 1195 (xdf_hvm_setpgeom(dkp->dk_xdf_dip, &pgeom) != 0)) { 1196 ddi_release_devi(dkp->dk_xdf_dip); 1197 mutex_destroy(&dkp->dk_mutex); 1198 ddi_soft_state_free(pv_cmdk_state, instance); 1199 return (DDI_FAILURE); 1200 } 1201 1202 /* create kstat for iostat(1M) */ 1203 if (xdf_kstat_create(dkp->dk_xdf_dip, "cmdk", instance) != 0) { 1204 ddi_release_devi(dkp->dk_xdf_dip); 1205 mutex_destroy(&dkp->dk_mutex); 1206 ddi_soft_state_free(pv_cmdk_state, instance); 1207 return (DDI_FAILURE); 1208 } 1209 1210 /* 1211 * Force the xdf front end driver to connect to the backend. From 1212 * the solaris device tree perspective, the xdf driver devinfo node 1213 * is already in the ATTACHED state. (Otherwise xdf_hvm_hold() 1214 * would not have returned a dip.) But this doesn't mean that the 1215 * xdf device has actually established a connection to it's back 1216 * end driver. For us to be able to access the xdf device it needs 1217 * to be connected. There are two ways to force the xdf driver to 1218 * connect to the backend device. 1219 */ 1220 if (xdf_hvm_connect(dkp->dk_xdf_dip) != 0) { 1221 cmn_err(CE_WARN, 1222 "pv driver failed to connect: %s", 1223 pv_cmdk_h2p[i].h2p_pv_path); 1224 xdf_kstat_delete(dkp->dk_xdf_dip); 1225 ddi_release_devi(dkp->dk_xdf_dip); 1226 mutex_destroy(&dkp->dk_mutex); 1227 ddi_soft_state_free(pv_cmdk_state, instance); 1228 return (DDI_FAILURE); 1229 } 1230 1231 /* 1232 * Initalize cmlb. Note that for partition information cmlb 1233 * will access the underly xdf disk device directly via 1234 * pv_cmdk_lb_rdwr() and pv_cmdk_lb_getinfo(). There are no 1235 * layered driver handles associated with this access because 1236 * it is a direct disk access that doesn't go through 1237 * any of the device nodes exported by the xdf device (since 1238 * all exported device nodes only reflect the portion of 1239 * the device visible via the partition/slice that the node 1240 * is associated with.) So while not observable via the LDI, 1241 * this direct disk access is ok since we're actually holding 1242 * the target device. 1243 */ 1244 cmlb_alloc_handle((cmlb_handle_t *)&dkp->dk_cmlbhandle); 1245 if (cmlb_attach(dkp->dk_dip, &pv_cmdk_lb_ops, 1246 DTYPE_DIRECT, /* device_type */ 1247 0, /* not removable */ 1248 0, /* not hot pluggable */ 1249 DDI_NT_BLOCK, 1250 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* mimic cmdk */ 1251 dkp->dk_cmlbhandle, 0) != 0) { 1252 cmlb_free_handle(&dkp->dk_cmlbhandle); 1253 xdf_kstat_delete(dkp->dk_xdf_dip); 1254 ddi_release_devi(dkp->dk_xdf_dip); 1255 mutex_destroy(&dkp->dk_mutex); 1256 ddi_soft_state_free(pv_cmdk_state, instance); 1257 return (DDI_FAILURE); 1258 } 1259 1260 if (pv_cmdk_bb_check(dkp)) { 1261 cmn_err(CE_WARN, 1262 "pv cmdk disks with bad blocks are unsupported: %s", 1263 pv_cmdk_h2p[i].h2p_hvm_path); 1264 1265 cmlb_detach(dkp->dk_cmlbhandle, 0); 1266 cmlb_free_handle(&dkp->dk_cmlbhandle); 1267 xdf_kstat_delete(dkp->dk_xdf_dip); 1268 ddi_release_devi(dkp->dk_xdf_dip); 1269 mutex_destroy(&dkp->dk_mutex); 1270 ddi_soft_state_free(pv_cmdk_state, instance); 1271 return (DDI_FAILURE); 1272 } 1273 1274 /* setup devid string */ 1275 pv_cmdk_devid_setup(dkp); 1276 1277 /* Calling validate will create minor nodes according to disk label */ 1278 (void) cmlb_validate(dkp->dk_cmlbhandle, 0, 0); 1279 1280 /* 1281 * Add a zero-length attribute to tell the world we support 1282 * kernel ioctls (for layered drivers). 1283 */ 1284 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 1285 DDI_KERNEL_IOCTL, NULL, 0); 1286 1287 /* Have the system report any newly created device nodes */ 1288 ddi_report_dev(dip); 1289 1290 return (DDI_SUCCESS); 1291 } 1292 1293 static int 1294 pv_cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1295 { 1296 int instance = ddi_get_instance(dip); 1297 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1298 1299 if (cmd != DDI_DETACH) 1300 return (DDI_FAILURE); 1301 1302 ASSERT(MUTEX_NOT_HELD(&dkp->dk_mutex)); 1303 1304 ddi_devid_unregister(dip); 1305 if (dkp->dk_devid) 1306 ddi_devid_free(dkp->dk_devid); 1307 cmlb_detach(dkp->dk_cmlbhandle, 0); 1308 cmlb_free_handle(&dkp->dk_cmlbhandle); 1309 mutex_destroy(&dkp->dk_mutex); 1310 xdf_kstat_delete(dkp->dk_xdf_dip); 1311 ddi_release_devi(dkp->dk_xdf_dip); 1312 ddi_soft_state_free(pv_cmdk_state, instance); 1313 ddi_prop_remove_all(dip); 1314 1315 return (DDI_SUCCESS); 1316 } 1317 1318 /*ARGSUSED*/ 1319 static int 1320 pv_cmdk_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 1321 void **result) 1322 { 1323 dev_t dev = (dev_t)arg; 1324 int instance = XDF_DEV2UNIT(dev); 1325 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1326 1327 switch (infocmd) { 1328 case DDI_INFO_DEVT2DEVINFO: 1329 if (dkp == NULL) 1330 return (DDI_FAILURE); 1331 *result = (void *)dkp->dk_dip; 1332 break; 1333 case DDI_INFO_DEVT2INSTANCE: 1334 *result = (void *)(intptr_t)instance; 1335 break; 1336 default: 1337 return (DDI_FAILURE); 1338 } 1339 return (DDI_SUCCESS); 1340 } 1341 1342 static int 1343 pv_cmdk_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1344 int flags, char *name, caddr_t valuep, int *lengthp) 1345 { 1346 int instance = ddi_get_instance(dip); 1347 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1348 dev_info_t *xdf_dip; 1349 dev_t xdf_devt; 1350 int err; 1351 1352 /* 1353 * Sanity check that if a dev_t or dip were specified that they 1354 * correspond to this device driver. On debug kernels we'll 1355 * panic and on non-debug kernels we'll return failure. 1356 */ 1357 ASSERT(ddi_driver_major(dip) == pv_cmdk_major); 1358 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == pv_cmdk_major)); 1359 if ((ddi_driver_major(dip) != pv_cmdk_major) || 1360 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != pv_cmdk_major))) 1361 return (DDI_PROP_NOT_FOUND); 1362 1363 /* 1364 * This property lookup might be associated with a device node 1365 * that is not yet attached, if so pass it onto ddi_prop_op(). 1366 */ 1367 if (dkp == NULL) 1368 return (ddi_prop_op(dev, dip, prop_op, flags, 1369 name, valuep, lengthp)); 1370 1371 /* 1372 * Make sure we only lookup static properties. 1373 * 1374 * If there are static properties of the underlying xdf driver 1375 * that we want to mirror, then we'll have to explicity look them 1376 * up and define them during attach. There are a few reasons 1377 * for this. Most importantly, most static properties are typed 1378 * and all dynamic properties are untyped, ie, for dynamic 1379 * properties the caller must know the type of the property and 1380 * how to interpret the value of the property. the prop_op drivedr 1381 * entry point is only designed for returning dynamic/untyped 1382 * properties, so if we were to attempt to lookup and pass back 1383 * static properties of the underlying device here then we would 1384 * be losing the type information for those properties. Another 1385 * reason we don't want to pass on static property requests is that 1386 * static properties are enumerable in the device tree, where as 1387 * dynamic ones are not. 1388 */ 1389 flags |= DDI_PROP_DYNAMIC; 1390 1391 /* 1392 * We can't use the ldi here to access the underlying device because 1393 * the ldi actually opens the device, and that open might fail if the 1394 * device has already been opened with the FEXCL flag. If we used 1395 * the ldi here, it would also be possible for some other caller 1396 * to try open the device with the FEXCL flag and get a failure 1397 * back because we have it open to do a property query. 1398 * 1399 * Instad we'll grab a hold on the target dip and query the 1400 * property directly. 1401 */ 1402 mutex_enter(&dkp->dk_mutex); 1403 1404 if ((xdf_dip = dkp->dk_xdf_dip) == NULL) { 1405 mutex_exit(&dkp->dk_mutex); 1406 return (DDI_PROP_NOT_FOUND); 1407 } 1408 e_ddi_hold_devi(xdf_dip); 1409 1410 /* figure out the dev_t we're going to pass on down */ 1411 if (dev == DDI_DEV_T_ANY) { 1412 xdf_devt = DDI_DEV_T_ANY; 1413 } else { 1414 xdf_devt = dkp->dk_xdf_dev | XDF_DEV2PART(dev); 1415 } 1416 1417 mutex_exit(&dkp->dk_mutex); 1418 1419 /* 1420 * Cdev_prop_op() is not a public interface, and normally the caller 1421 * is required to make sure that the target driver actually implements 1422 * this interface before trying to invoke it. In this case we know 1423 * that we're always accessing the xdf driver and it does have this 1424 * interface defined, so we can skip the check. 1425 */ 1426 err = cdev_prop_op(xdf_devt, xdf_dip, 1427 prop_op, flags, name, valuep, lengthp); 1428 ddi_release_devi(xdf_dip); 1429 return (err); 1430 } 1431 1432 /* 1433 * Device driver ops vector 1434 */ 1435 static struct cb_ops pv_cmdk_cb_ops = { 1436 pv_cmdkopen, /* open */ 1437 pv_cmdkclose, /* close */ 1438 pv_cmdkstrategy, /* strategy */ 1439 nodev, /* print */ 1440 pv_cmdkdump, /* dump */ 1441 pv_cmdkread, /* read */ 1442 pv_cmdkwrite, /* write */ 1443 pv_cmdkioctl, /* ioctl */ 1444 nodev, /* devmap */ 1445 nodev, /* mmap */ 1446 nodev, /* segmap */ 1447 nochpoll, /* poll */ 1448 pv_cmdk_prop_op, /* cb_prop_op */ 1449 0, /* streamtab */ 1450 D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */ 1451 CB_REV, /* cb_rev */ 1452 pv_cmdkaread, /* async read */ 1453 pv_cmdkawrite /* async write */ 1454 }; 1455 1456 struct dev_ops pv_cmdk_ops = { 1457 DEVO_REV, /* devo_rev, */ 1458 0, /* refcnt */ 1459 pv_cmdk_getinfo, /* info */ 1460 nulldev, /* identify */ 1461 nulldev, /* probe */ 1462 pv_cmdkattach, /* attach */ 1463 pv_cmdkdetach, /* detach */ 1464 nodev, /* reset */ 1465 &pv_cmdk_cb_ops, /* driver operations */ 1466 (struct bus_ops *)0, /* bus operations */ 1467 NULL, /* power */ 1468 ddi_quiesce_not_supported, /* devo_quiesce */ 1469 }; 1470 1471 /* 1472 * Module linkage information for the kernel. 1473 */ 1474 static struct modldrv modldrv = { 1475 &mod_driverops, /* Type of module. This one is a driver */ 1476 "PV Common Direct Access Disk", 1477 &pv_cmdk_ops, /* driver ops */ 1478 }; 1479 1480 static struct modlinkage modlinkage = { 1481 MODREV_1, (void *)&modldrv, NULL 1482 }; 1483 1484 int 1485 _init(void) 1486 { 1487 int rval; 1488 1489 if ((pv_cmdk_major = ddi_name_to_major("cmdk")) == (major_t)-1) 1490 return (EINVAL); 1491 1492 /* 1493 * In general ide usually supports 4 disk devices, this same 1494 * limitation also applies to software emulating ide devices. 1495 * so by default we pre-allocate 4 cmdk soft state structures. 1496 */ 1497 if ((rval = ddi_soft_state_init(&pv_cmdk_state, 1498 sizeof (struct pv_cmdk), PV_CMDK_NODES)) != 0) 1499 return (rval); 1500 1501 /* 1502 * Currently we only support qemu as the backing hardware emulator 1503 * for cmdk devices. 1504 */ 1505 pv_cmdk_h2p = pv_cmdk_h2p_xen_qemu; 1506 1507 /* Install our module */ 1508 if ((rval = mod_install(&modlinkage)) != 0) { 1509 ddi_soft_state_fini(&pv_cmdk_state); 1510 return (rval); 1511 } 1512 1513 return (0); 1514 } 1515 1516 int 1517 _info(struct modinfo *modinfop) 1518 { 1519 return (mod_info(&modlinkage, modinfop)); 1520 } 1521 1522 int 1523 _fini(void) 1524 { 1525 int rval; 1526 if ((rval = mod_remove(&modlinkage)) != 0) 1527 return (rval); 1528 ddi_soft_state_fini(&pv_cmdk_state); 1529 return (0); 1530 } 1531