1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/scsi/scsi_types.h> 29 #include <sys/modctl.h> 30 #include <sys/cmlb.h> 31 #include <sys/types.h> 32 #include <sys/xpv_support.h> 33 #include <sys/xendev.h> 34 #include <sys/gnttab.h> 35 #include <public/xen.h> 36 #include <public/grant_table.h> 37 #include <io/xdf.h> 38 #include <sys/vtoc.h> 39 #include <sys/dkio.h> 40 #include <sys/dktp/dadev.h> 41 #include <sys/dktp/dadkio.h> 42 #include <sys/dktp/tgdk.h> 43 #include <sys/dktp/bbh.h> 44 #include <sys/dktp/cmdk.h> 45 #include <sys/dktp/altsctr.h> 46 47 /* 48 * General Notes 49 * 50 * We don't support disks with bad block mappins. We have this 51 * limitation because the underlying xdf driver doesn't support 52 * bad block remapping. If there is a need to support this feature 53 * it should be added directly to the xdf driver and we should just 54 * pass requests strait on through and let it handle the remapping. 55 * Also, it's probably worth pointing out that most modern disks do bad 56 * block remapping internally in the hardware so there's actually less 57 * of a chance of us ever discovering bad blocks. Also, in most cases 58 * this driver (and the xdf driver) will only be used with virtualized 59 * devices, so one might wonder why a virtual device would ever actually 60 * experience bad blocks. To wrap this up, you might be wondering how 61 * these bad block mappings get created and how they are managed. Well, 62 * there are two tools for managing bad block mappings, format(1M) and 63 * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk 64 * to attempt to find bad block and create mappings for them. Format(1M) 65 * and addbadsec(1M) can also be used to edit existing mappings that may 66 * be saved on the disk. 67 * 68 * The underlying PV driver that this driver passes on requests to is the 69 * xdf driver. Since in most cases the xdf driver doesn't deal with 70 * physical disks it has it's own algorithm for assigning a physical 71 * geometry to a virtual disk (ie, cylinder count, head count, etc.) 72 * The default values chosen by the xdf driver may not match those 73 * assigned to a disk by a hardware disk emulator in an HVM environment. 74 * This is a problem since these physical geometry attributes affect 75 * things like the partition table, backup label location, etc. So 76 * to emulate disk devices correctly we need to know the physical geometry 77 * that was assigned to a disk at the time of it's initalization. 78 * Normally in an HVM environment this information will passed to 79 * the BIOS and operating system from the hardware emulator that is 80 * emulating the disk devices. In the case of a solaris dom0+xvm 81 * this would be qemu. So to work around this issue, this driver will 82 * query the emulated hardware to get the assigned physical geometry 83 * and then pass this geometry onto the xdf driver so that it can use it. 84 * But really, this information is essentially metadata about the disk 85 * that should be kept with the disk image itself. (Assuming or course 86 * that a disk image is the actual backingstore for this emulated device.) 87 * This metadata should also be made available to PV drivers via a common 88 * mechamisn, probably the xenstore. The fact that this metadata isn't 89 * available outside of HVM domains means that it's difficult to move 90 * disks between HVM and PV domains, since a fully PV domain will have no 91 * way of knowing what the correct geometry of the target device is. 92 * (Short of reading the disk, looking for things like partition tables 93 * and labels, and taking a best guess at what the geometry was when 94 * the disk was initialized. Unsuprisingly, qemu actually does this.) 95 * 96 * This driver has to map cmdk device instances into their corresponding 97 * xdf device instances. We have to do this to ensure that when a user 98 * accesses a emulated cmdk device we map those accesses to the proper 99 * paravirtualized device. Basically what we need to know is how multiple 100 * 'disk' entries in a domU configuration file get mapped to emulated 101 * cmdk devices and to xdf devices. The 'disk' entry to xdf instance 102 * mappings we know because those are done within the Solaris xvdi code 103 * and the xpvd nexus driver. But the config to emulated devices mappings 104 * are handled entirely within the xen management tool chain and the 105 * hardware emulator. Since all the tools that establish these mappings 106 * live in dom0, dom0 should really supply us with this information, 107 * probably via the xenstore. Unfortunatly it doesn't so, since there's 108 * no good way to determine this mapping dynamically, this driver uses 109 * a hard coded set of static mappings. These mappings are hardware 110 * emulator specific because each different hardware emulator could have 111 * a different device tree with different cmdk device paths. This 112 * means that if we want to continue to use this static mapping approach 113 * to allow Solaris to run on different hardware emulators we'll have 114 * to analyze each of those emulators to determine what paths they 115 * use and hard code those paths into this driver. yech. This metadata 116 * really needs to be supplied to us by dom0. 117 * 118 * This driver access underlying xdf nodes. Unfortunatly, devices 119 * must create minor nodes during attach, and for disk devices to create 120 * minor nodes, they have to look at the label on the disk, so this means 121 * that disk drivers must be able to access a disk contents during 122 * attach. That means that this disk driver must be able to access 123 * underlying xdf nodes during attach. Unfortunatly, due to device tree 124 * locking restrictions, we cannot have an attach operation occuring on 125 * this device and then attempt to access another device which may 126 * cause another attach to occur in a different device tree branch 127 * since this could result in deadlock. Hence, this driver can only 128 * access xdf device nodes that we know are attached, and it can't use 129 * any ddi interfaces to access those nodes if those interfaces could 130 * trigger an attach of the xdf device. So this driver works around 131 * these restrictions by talking directly to xdf devices via 132 * xdf_hvm_hold(). This interface takes a pathname to an xdf device, 133 * and if that device is already attached then it returns the a held dip 134 * pointer for that device node. This prevents us from getting into 135 * deadlock situations, but now we need a mechanism to ensure that all 136 * the xdf device nodes this driver might access are attached before 137 * this driver tries to access them. This is accomplished via the 138 * hvmboot_rootconf() callback which is invoked just before root is 139 * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure 140 * all xdf device visible to the system. All these xdf device nodes 141 * will also be marked with the "ddi-no-autodetach" property so that 142 * once they are configured, the will not be automatically unconfigured. 143 * The only way that they could be unconfigured is if the administrator 144 * explicitly attempts to unload required modules via rem_drv(1M) 145 * or modunload(1M). 146 */ 147 148 /* 149 * 16 paritions + fdisk (see xdf.h) 150 */ 151 #define XDF_DEV2UNIT(dev) XDF_INST((getminor((dev)))) 152 #define XDF_DEV2PART(dev) XDF_PART((getminor((dev)))) 153 154 #define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \ 155 (otyp == OTYP_CHR) || \ 156 (otyp == OTYP_LYR)) 157 158 #define PV_CMDK_NODES 4 159 160 typedef struct hvm_to_pv { 161 char *h2p_hvm_path; 162 char *h2p_pv_path; 163 } hvm_to_pv_t; 164 165 /* 166 */ 167 static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = { 168 /* 169 * The paths mapping here are very specific to xen and qemu. When a 170 * domU is booted under xen in HVM mode, qemu is normally used to 171 * emulate up to four ide disks. These disks always have the four 172 * path listed below. To configure an emulated ide device, the 173 * xen domain configuration file normally has an entry that looks 174 * like this: 175 * disk = [ 'file:/foo.img,hda,w' ] 176 * 177 * The part we're interested in is the 'hda', which we'll call the 178 * xen disk device name here. The xen management tools (which parse 179 * the xen domain configuration file and launch qemu) makes the 180 * following assumptions about this value: 181 * hda == emulated ide disk 0 (ide bus 0, master) 182 * hdb == emulated ide disk 1 (ide bus 0, slave) 183 * hdc == emulated ide disk 2 (ide bus 1, master) 184 * hdd == emulated ide disk 3 (ide bus 1, slave) 185 * 186 * (Uncoincidentally, these xen disk device names actually map to 187 * the /dev filesystem names of ide disk devices in Linux. So in 188 * Linux /dev/hda is the first ide disk.) So for the first part of 189 * our mapping we've just hardcoded the cmdk paths that we know 190 * qemu will use. 191 * 192 * To understand the second half of the mapping (ie, the xdf device 193 * that each emulated cmdk device should be mapped two) we need to 194 * know the solaris device node address that will be assigned to 195 * each xdf device. (The device node address is the decimal 196 * number that comes after the "xdf@" in the device path.) 197 * 198 * So the question becomes, how do we know what the xenstore device 199 * id for emulated disk will be? Well, it turns out that since the 200 * xen management tools expect the disk device names to be Linux 201 * device names, those same management tools assign each disk a 202 * device id that matches the dev_t of the corresponding device 203 * under Linux. (Big shocker.) This xen device name-to-id mapping 204 * is currently all hard coded here: 205 * xen.hg/tools/python/xen/util/blkif.py`blkdev_name_to_number() 206 * 207 * So looking at the code above we can see the following xen disk 208 * device name to xenstore device id mappings: 209 * 'hda' == 0t768 == ((3 * 256) + (0 * 64)) 210 * 'hdb' == 0t832 == ((3 * 256) + (1 * 64)) 211 * 'hdc' == 0t5632 == ((22 * 256) + (0 * 64)) 212 * 'hdd' == 0t5696 == ((22 * 256) + (1 * 64)) 213 */ 214 { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@0,0", "/xpvd/xdf@768" }, 215 { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@1,0", "/xpvd/xdf@832" }, 216 { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@0,0", "/xpvd/xdf@5632" }, 217 { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@1,0", "/xpvd/xdf@5696" }, 218 { NULL, 0 } 219 }; 220 221 typedef struct pv_cmdk { 222 dev_info_t *dk_dip; 223 cmlb_handle_t dk_cmlbhandle; 224 ddi_devid_t dk_devid; 225 kmutex_t dk_mutex; 226 dev_info_t *dk_xdf_dip; 227 dev_t dk_xdf_dev; 228 int dk_xdf_otyp_count[OTYPCNT][XDF_PEXT]; 229 ldi_handle_t dk_xdf_lh[XDF_PEXT]; 230 } pv_cmdk_t; 231 232 /* 233 * Globals 234 */ 235 static void *pv_cmdk_state; 236 static major_t pv_cmdk_major; 237 static hvm_to_pv_t *pv_cmdk_h2p; 238 239 /* 240 * Function prototypes for xdf callback functions 241 */ 242 extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 243 extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 244 void *); 245 246 static boolean_t 247 pv_cmdk_isopen_part(struct pv_cmdk *dkp, int part) 248 { 249 int otyp; 250 251 ASSERT(MUTEX_HELD(&dkp->dk_mutex)); 252 253 for (otyp = 0; (otyp < OTYPCNT); otyp++) { 254 if (dkp->dk_xdf_otyp_count[otyp][part] != 0) 255 return (B_TRUE); 256 } 257 return (B_FALSE); 258 } 259 260 /* 261 * Cmlb ops vectors, allows the cmlb module to directly access the entire 262 * pv_cmdk disk device without going through any partitioning layers. 263 */ 264 /*ARGSUSED*/ 265 static int 266 pv_cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, 267 diskaddr_t start, size_t count, void *tg_cookie) 268 { 269 int instance = ddi_get_instance(dip); 270 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 271 272 if (dkp == NULL) 273 return (ENXIO); 274 275 return (xdf_lb_rdwr(dkp->dk_xdf_dip, cmd, bufaddr, start, count, 276 tg_cookie)); 277 } 278 279 /*ARGSUSED*/ 280 static int 281 pv_cmdk_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 282 { 283 int instance = ddi_get_instance(dip); 284 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 285 int err; 286 287 if (dkp == NULL) 288 return (ENXIO); 289 290 if (cmd == TG_GETVIRTGEOM) { 291 cmlb_geom_t pgeom, *vgeomp; 292 diskaddr_t capacity; 293 294 /* 295 * The native xdf driver doesn't support this ioctl. 296 * Intead of passing it on, emulate it here so that the 297 * results look the same as what we get for a real cmdk 298 * device. 299 * 300 * Get the real size of the device 301 */ 302 if ((err = xdf_lb_getinfo(dkp->dk_xdf_dip, 303 TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0) 304 return (err); 305 capacity = pgeom.g_capacity; 306 307 /* 308 * If the controller returned us something that doesn't 309 * really fit into an Int 13/function 8 geometry 310 * result, just fail the ioctl. See PSARC 1998/313. 311 */ 312 if (capacity >= (63 * 254 * 1024)) 313 return (EINVAL); 314 315 vgeomp = (cmlb_geom_t *)arg; 316 vgeomp->g_capacity = capacity; 317 vgeomp->g_nsect = 63; 318 vgeomp->g_nhead = 254; 319 vgeomp->g_ncyl = capacity / (63 * 254); 320 vgeomp->g_acyl = 0; 321 vgeomp->g_secsize = 512; 322 vgeomp->g_intrlv = 1; 323 vgeomp->g_rpm = 3600; 324 return (0); 325 } 326 327 return (xdf_lb_getinfo(dkp->dk_xdf_dip, cmd, arg, tg_cookie)); 328 } 329 330 static cmlb_tg_ops_t pv_cmdk_lb_ops = { 331 TG_DK_OPS_VERSION_1, 332 pv_cmdk_lb_rdwr, 333 pv_cmdk_lb_getinfo 334 }; 335 336 /* 337 * devid management functions 338 */ 339 340 /* 341 * pv_cmdk_get_modser() is basically a local copy of 342 * cmdk_get_modser() modified to work without the dadk layer. 343 * (which the non-pv version of the cmdk driver uses.) 344 */ 345 static int 346 pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len) 347 { 348 struct scsi_device *scsi_device; 349 opaque_t ctlobjp; 350 dadk_ioc_string_t strarg; 351 char *s; 352 char ch; 353 boolean_t ret; 354 int i; 355 int tb; 356 357 strarg.is_buf = buf; 358 strarg.is_size = len; 359 scsi_device = ddi_get_driver_private(dkp->dk_dip); 360 ctlobjp = scsi_device->sd_address.a_hba_tran; 361 if (CTL_IOCTL(ctlobjp, 362 ioccmd, (uintptr_t)&strarg, FNATIVE | FKIOCTL) != 0) 363 return (0); 364 365 /* 366 * valid model/serial string must contain a non-zero non-space 367 * trim trailing spaces/NULL 368 */ 369 ret = B_FALSE; 370 s = buf; 371 for (i = 0; i < strarg.is_size; i++) { 372 ch = *s++; 373 if (ch != ' ' && ch != '\0') 374 tb = i + 1; 375 if (ch != ' ' && ch != '\0' && ch != '0') 376 ret = B_TRUE; 377 } 378 379 if (ret == B_FALSE) 380 return (0); 381 382 return (tb); 383 } 384 385 /* 386 * pv_cmdk_devid_modser() is basically a copy of cmdk_devid_modser() 387 * that has been modified to use local pv cmdk driver functions. 388 * 389 * Build a devid from the model and serial number 390 * Return DDI_SUCCESS or DDI_FAILURE. 391 */ 392 static int 393 pv_cmdk_devid_modser(struct pv_cmdk *dkp) 394 { 395 int rc = DDI_FAILURE; 396 char *hwid; 397 int modlen; 398 int serlen; 399 400 /* 401 * device ID is a concatenation of model number, '=', serial number. 402 */ 403 hwid = kmem_alloc(CMDK_HWIDLEN, KM_SLEEP); 404 modlen = pv_cmdk_get_modser(dkp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN); 405 if (modlen == 0) 406 goto err; 407 408 hwid[modlen++] = '='; 409 serlen = pv_cmdk_get_modser(dkp, DIOCTL_GETSERIAL, 410 hwid + modlen, CMDK_HWIDLEN - modlen); 411 if (serlen == 0) 412 goto err; 413 414 hwid[modlen + serlen] = 0; 415 416 /* Initialize the device ID, trailing NULL not included */ 417 rc = ddi_devid_init(dkp->dk_dip, DEVID_ATA_SERIAL, modlen + serlen, 418 hwid, (ddi_devid_t *)&dkp->dk_devid); 419 if (rc != DDI_SUCCESS) 420 goto err; 421 422 kmem_free(hwid, CMDK_HWIDLEN); 423 return (DDI_SUCCESS); 424 425 err: 426 kmem_free(hwid, CMDK_HWIDLEN); 427 return (DDI_FAILURE); 428 } 429 430 /* 431 * pv_cmdk_devid_read() is basically a local copy of 432 * cmdk_devid_read() modified to work without the dadk layer. 433 * (which the non-pv version of the cmdk driver uses.) 434 * 435 * Read a devid from on the first block of the last track of 436 * the last cylinder. Make sure what we read is a valid devid. 437 * Return DDI_SUCCESS or DDI_FAILURE. 438 */ 439 static int 440 pv_cmdk_devid_read(struct pv_cmdk *dkp) 441 { 442 diskaddr_t blk; 443 struct dk_devid *dkdevidp; 444 uint_t *ip, chksum; 445 int i; 446 447 if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) 448 return (DDI_FAILURE); 449 450 dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP); 451 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 452 TG_READ, dkdevidp, blk, NBPSCTR, NULL) != 0) 453 goto err; 454 455 /* Validate the revision */ 456 if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) || 457 (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB)) 458 goto err; 459 460 /* Calculate the checksum */ 461 chksum = 0; 462 ip = (uint_t *)dkdevidp; 463 for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++) 464 chksum ^= ip[i]; 465 if (DKD_GETCHKSUM(dkdevidp) != chksum) 466 goto err; 467 468 /* Validate the device id */ 469 if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS) 470 goto err; 471 472 /* keep a copy of the device id */ 473 i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid); 474 dkp->dk_devid = kmem_alloc(i, KM_SLEEP); 475 bcopy(dkdevidp->dkd_devid, dkp->dk_devid, i); 476 kmem_free(dkdevidp, NBPSCTR); 477 return (DDI_SUCCESS); 478 479 err: 480 kmem_free(dkdevidp, NBPSCTR); 481 return (DDI_FAILURE); 482 } 483 484 /* 485 * pv_cmdk_devid_fabricate() is basically a local copy of 486 * cmdk_devid_fabricate() modified to work without the dadk layer. 487 * (which the non-pv version of the cmdk driver uses.) 488 * 489 * Create a devid and write it on the first block of the last track of 490 * the last cylinder. 491 * Return DDI_SUCCESS or DDI_FAILURE. 492 */ 493 static int 494 pv_cmdk_devid_fabricate(struct pv_cmdk *dkp) 495 { 496 ddi_devid_t devid = NULL; /* devid made by ddi_devid_init */ 497 struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */ 498 diskaddr_t blk; 499 uint_t *ip, chksum; 500 int i; 501 502 if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) 503 return (DDI_FAILURE); 504 505 if (ddi_devid_init(dkp->dk_dip, DEVID_FAB, 0, NULL, &devid) != 506 DDI_SUCCESS) 507 return (DDI_FAILURE); 508 509 /* allocate a buffer */ 510 dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP); 511 512 /* Fill in the revision */ 513 dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB; 514 dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB; 515 516 /* Copy in the device id */ 517 i = ddi_devid_sizeof(devid); 518 if (i > DK_DEVID_SIZE) 519 goto err; 520 bcopy(devid, dkdevidp->dkd_devid, i); 521 522 /* Calculate the chksum */ 523 chksum = 0; 524 ip = (uint_t *)dkdevidp; 525 for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++) 526 chksum ^= ip[i]; 527 528 /* Fill in the checksum */ 529 DKD_FORMCHKSUM(chksum, dkdevidp); 530 531 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 532 TG_WRITE, dkdevidp, blk, NBPSCTR, NULL) != 0) 533 goto err; 534 535 kmem_free(dkdevidp, NBPSCTR); 536 537 dkp->dk_devid = devid; 538 return (DDI_SUCCESS); 539 540 err: 541 if (dkdevidp != NULL) 542 kmem_free(dkdevidp, NBPSCTR); 543 if (devid != NULL) 544 ddi_devid_free(devid); 545 return (DDI_FAILURE); 546 } 547 548 /* 549 * pv_cmdk_devid_setup() is basically a local copy ofcmdk_devid_setup() 550 * that has been modified to use local pv cmdk driver functions. 551 * 552 * Create and register the devid. 553 * There are 4 different ways we can get a device id: 554 * 1. Already have one - nothing to do 555 * 2. Build one from the drive's model and serial numbers 556 * 3. Read one from the disk (first sector of last track) 557 * 4. Fabricate one and write it on the disk. 558 * If any of these succeeds, register the deviceid 559 */ 560 static void 561 pv_cmdk_devid_setup(struct pv_cmdk *dkp) 562 { 563 int rc; 564 565 /* Try options until one succeeds, or all have failed */ 566 567 /* 1. All done if already registered */ 568 569 if (dkp->dk_devid != NULL) 570 return; 571 572 /* 2. Build a devid from the model and serial number */ 573 rc = pv_cmdk_devid_modser(dkp); 574 if (rc != DDI_SUCCESS) { 575 /* 3. Read devid from the disk, if present */ 576 rc = pv_cmdk_devid_read(dkp); 577 578 /* 4. otherwise make one up and write it on the disk */ 579 if (rc != DDI_SUCCESS) 580 rc = pv_cmdk_devid_fabricate(dkp); 581 } 582 583 /* If we managed to get a devid any of the above ways, register it */ 584 if (rc == DDI_SUCCESS) 585 (void) ddi_devid_register(dkp->dk_dip, dkp->dk_devid); 586 } 587 588 /* 589 * Local Functions 590 */ 591 static int 592 pv_cmdk_iodone(struct buf *bp) 593 { 594 struct buf *bp_orig = bp->b_chain; 595 596 /* Propegate back the io results */ 597 bp_orig->b_resid = bp->b_resid; 598 bioerror(bp_orig, geterror(bp)); 599 biodone(bp_orig); 600 601 freerbuf(bp); 602 return (0); 603 } 604 605 static int 606 pv_cmdkstrategy(struct buf *bp) 607 { 608 dev_t dev = bp->b_edev; 609 int instance = XDF_DEV2UNIT(dev); 610 int part = XDF_DEV2PART(dev); 611 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 612 dev_t xdf_devt; 613 struct buf *bp_clone; 614 615 /* 616 * Sanity checks that the dev_t associated with the buf we were 617 * passed actually corresponds us and that the partition we're 618 * trying to access is actually open. On debug kernels we'll 619 * panic and on non-debug kernels we'll return failure. 620 */ 621 ASSERT(getmajor(dev) == pv_cmdk_major); 622 if (getmajor(dev) != pv_cmdk_major) 623 goto err; 624 625 mutex_enter(&dkp->dk_mutex); 626 ASSERT(pv_cmdk_isopen_part(dkp, part)); 627 if (!pv_cmdk_isopen_part(dkp, part)) { 628 mutex_exit(&dkp->dk_mutex); 629 goto err; 630 } 631 mutex_exit(&dkp->dk_mutex); 632 633 /* clone this buffer */ 634 xdf_devt = dkp->dk_xdf_dev | part; 635 bp_clone = bioclone(bp, 0, bp->b_bcount, xdf_devt, bp->b_blkno, 636 pv_cmdk_iodone, NULL, KM_SLEEP); 637 bp_clone->b_chain = bp; 638 639 /* 640 * If we're being invoked on behalf of the physio() call in 641 * pv_cmdk_dioctl_rwcmd() then b_private will be set to 642 * XB_SLICE_NONE and we need to propegate this flag into the 643 * cloned buffer so that the xdf driver will see it. 644 */ 645 if (bp->b_private == (void *)XB_SLICE_NONE) 646 bp_clone->b_private = (void *)XB_SLICE_NONE; 647 648 /* 649 * Pass on the cloned buffer. Note that we don't bother to check 650 * for failure because the xdf strategy routine will have to 651 * invoke biodone() if it wants to return an error, which means 652 * that the pv_cmdk_iodone() callback will get invoked and it 653 * will propegate the error back up the stack and free the cloned 654 * buffer. 655 */ 656 ASSERT(dkp->dk_xdf_lh[part] != NULL); 657 return (ldi_strategy(dkp->dk_xdf_lh[part], bp_clone)); 658 659 err: 660 bioerror(bp, ENXIO); 661 bp->b_resid = bp->b_bcount; 662 biodone(bp); 663 return (0); 664 } 665 666 /*ARGSUSED*/ 667 static int 668 pv_cmdkread(dev_t dev, struct uio *uio, cred_t *credp) 669 { 670 int instance = XDF_DEV2UNIT(dev); 671 int part = XDF_DEV2PART(dev); 672 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 673 674 return (ldi_read(dkp->dk_xdf_lh[part], uio, credp)); 675 } 676 677 /*ARGSUSED*/ 678 static int 679 pv_cmdkwrite(dev_t dev, struct uio *uio, cred_t *credp) 680 { 681 int instance = XDF_DEV2UNIT(dev); 682 int part = XDF_DEV2PART(dev); 683 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 684 685 return (ldi_write(dkp->dk_xdf_lh[part], uio, credp)); 686 } 687 688 /*ARGSUSED*/ 689 static int 690 pv_cmdkaread(dev_t dev, struct aio_req *aio, cred_t *credp) 691 { 692 int instance = XDF_DEV2UNIT(dev); 693 int part = XDF_DEV2PART(dev); 694 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 695 return (ldi_aread(dkp->dk_xdf_lh[part], aio, credp)); 696 } 697 698 /*ARGSUSED*/ 699 static int 700 pv_cmdkawrite(dev_t dev, struct aio_req *aio, cred_t *credp) 701 { 702 int instance = XDF_DEV2UNIT(dev); 703 int part = XDF_DEV2PART(dev); 704 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 705 return (ldi_awrite(dkp->dk_xdf_lh[part], aio, credp)); 706 } 707 708 static int 709 pv_cmdkdump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 710 { 711 int instance = XDF_DEV2UNIT(dev); 712 int part = XDF_DEV2PART(dev); 713 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 714 715 return (ldi_dump(dkp->dk_xdf_lh[part], addr, blkno, nblk)); 716 } 717 718 /* 719 * pv_rwcmd_copyin() is a duplicate of rwcmd_copyin(). 720 */ 721 static int 722 pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag) 723 { 724 switch (ddi_model_convert_from(flag)) { 725 case DDI_MODEL_ILP32: { 726 struct dadkio_rwcmd32 cmd32; 727 728 if (ddi_copyin(inaddr, &cmd32, 729 sizeof (struct dadkio_rwcmd32), flag)) { 730 return (EFAULT); 731 } 732 733 rwcmdp->cmd = cmd32.cmd; 734 rwcmdp->flags = cmd32.flags; 735 rwcmdp->blkaddr = (daddr_t)cmd32.blkaddr; 736 rwcmdp->buflen = cmd32.buflen; 737 rwcmdp->bufaddr = (caddr_t)(intptr_t)cmd32.bufaddr; 738 /* 739 * Note: we do not convert the 'status' field, 740 * as it should not contain valid data at this 741 * point. 742 */ 743 bzero(&rwcmdp->status, sizeof (rwcmdp->status)); 744 break; 745 } 746 case DDI_MODEL_NONE: { 747 if (ddi_copyin(inaddr, rwcmdp, 748 sizeof (struct dadkio_rwcmd), flag)) { 749 return (EFAULT); 750 } 751 } 752 } 753 return (0); 754 } 755 756 /* 757 * pv_rwcmd_copyout() is a duplicate of rwcmd_copyout(). 758 */ 759 static int 760 pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag) 761 { 762 switch (ddi_model_convert_from(flag)) { 763 case DDI_MODEL_ILP32: { 764 struct dadkio_rwcmd32 cmd32; 765 766 cmd32.cmd = rwcmdp->cmd; 767 cmd32.flags = rwcmdp->flags; 768 cmd32.blkaddr = rwcmdp->blkaddr; 769 cmd32.buflen = rwcmdp->buflen; 770 ASSERT64(((uintptr_t)rwcmdp->bufaddr >> 32) == 0); 771 cmd32.bufaddr = (caddr32_t)(uintptr_t)rwcmdp->bufaddr; 772 773 cmd32.status.status = rwcmdp->status.status; 774 cmd32.status.resid = rwcmdp->status.resid; 775 cmd32.status.failed_blk_is_valid = 776 rwcmdp->status.failed_blk_is_valid; 777 cmd32.status.failed_blk = rwcmdp->status.failed_blk; 778 cmd32.status.fru_code_is_valid = 779 rwcmdp->status.fru_code_is_valid; 780 cmd32.status.fru_code = rwcmdp->status.fru_code; 781 782 bcopy(rwcmdp->status.add_error_info, 783 cmd32.status.add_error_info, DADKIO_ERROR_INFO_LEN); 784 785 if (ddi_copyout(&cmd32, outaddr, 786 sizeof (struct dadkio_rwcmd32), flag)) 787 return (EFAULT); 788 break; 789 } 790 case DDI_MODEL_NONE: { 791 if (ddi_copyout(rwcmdp, outaddr, 792 sizeof (struct dadkio_rwcmd), flag)) 793 return (EFAULT); 794 } 795 } 796 return (0); 797 } 798 799 static void 800 pv_cmdkmin(struct buf *bp) 801 { 802 if (bp->b_bcount > DK_MAXRECSIZE) 803 bp->b_bcount = DK_MAXRECSIZE; 804 } 805 806 static int 807 pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag) 808 { 809 struct dadkio_rwcmd *rwcmdp; 810 struct iovec aiov; 811 struct uio auio; 812 struct buf *bp; 813 int rw, status; 814 815 rwcmdp = kmem_alloc(sizeof (struct dadkio_rwcmd), KM_SLEEP); 816 status = pv_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag); 817 818 if (status != 0) 819 goto out; 820 821 switch (rwcmdp->cmd) { 822 case DADKIO_RWCMD_READ: 823 case DADKIO_RWCMD_WRITE: 824 break; 825 default: 826 status = EINVAL; 827 goto out; 828 } 829 830 bzero((caddr_t)&aiov, sizeof (struct iovec)); 831 aiov.iov_base = rwcmdp->bufaddr; 832 aiov.iov_len = rwcmdp->buflen; 833 834 bzero((caddr_t)&auio, sizeof (struct uio)); 835 auio.uio_iov = &aiov; 836 auio.uio_iovcnt = 1; 837 auio.uio_loffset = (offset_t)rwcmdp->blkaddr * (offset_t)XB_BSIZE; 838 auio.uio_resid = rwcmdp->buflen; 839 auio.uio_segflg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE; 840 841 /* 842 * Tell the xdf driver that this I/O request is using an absolute 843 * offset. 844 */ 845 bp = getrbuf(KM_SLEEP); 846 bp->b_private = (void *)XB_SLICE_NONE; 847 848 rw = ((rwcmdp->cmd == DADKIO_RWCMD_WRITE) ? B_WRITE : B_READ); 849 status = physio(pv_cmdkstrategy, bp, dev, rw, pv_cmdkmin, &auio); 850 851 biofini(bp); 852 kmem_free(bp, sizeof (buf_t)); 853 854 if (status == 0) 855 status = pv_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag); 856 857 out: 858 kmem_free(rwcmdp, sizeof (struct dadkio_rwcmd)); 859 return (status); 860 } 861 862 static int 863 pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 864 int *rvalp) 865 { 866 int instance = XDF_DEV2UNIT(dev); 867 int part = XDF_DEV2PART(dev); 868 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 869 int err; 870 871 switch (cmd) { 872 default: 873 return (ldi_ioctl(dkp->dk_xdf_lh[part], 874 cmd, arg, flag, credp, rvalp)); 875 case DKIOCGETWCE: 876 case DKIOCSETWCE: 877 return (EIO); 878 case DKIOCADDBAD: { 879 /* 880 * This is for ata/ide bad block handling. It is supposed 881 * to cause the driver to re-read the bad block list and 882 * alternate map after it has been updated. Our driver 883 * will refuse to attach to any disk which has a bad blocks 884 * list defined, so there really isn't much to do here. 885 */ 886 return (0); 887 } 888 case DKIOCGETDEF: { 889 /* 890 * I can't actually find any code that utilizes this ioctl, 891 * hence we're leaving it explicitly unimplemented. 892 */ 893 ASSERT("ioctl cmd unsupported by pv_cmdk: DKIOCGETDEF"); 894 return (EIO); 895 } 896 case DIOCTL_RWCMD: { 897 /* 898 * This just seems to just be an alternate interface for 899 * reading and writing the disk. Great, another way to 900 * do the same thing... 901 */ 902 return (pv_cmdk_dioctl_rwcmd(dev, arg, flag)); 903 } 904 case DKIOCINFO: { 905 dev_info_t *dip = dkp->dk_dip; 906 struct dk_cinfo info; 907 908 /* Pass on the ioctl request, save the response */ 909 if ((err = ldi_ioctl(dkp->dk_xdf_lh[part], 910 cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0) 911 return (err); 912 913 /* Update controller info */ 914 info.dki_cnum = ddi_get_instance(ddi_get_parent(dip)); 915 (void) strlcpy(info.dki_cname, 916 ddi_get_name(ddi_get_parent(dip)), sizeof (info.dki_cname)); 917 918 /* Update unit info. */ 919 if (info.dki_ctype == DKC_VBD) 920 info.dki_ctype = DKC_DIRECT; 921 info.dki_unit = instance; 922 (void) strlcpy(info.dki_dname, 923 ddi_driver_name(dip), sizeof (info.dki_dname)); 924 info.dki_addr = 1; 925 926 if (ddi_copyout(&info, (void *)arg, sizeof (info), flag)) 927 return (EFAULT); 928 return (0); 929 } 930 } /* switch (cmd) */ 931 /*NOTREACHED*/ 932 } 933 934 /*ARGSUSED*/ 935 static int 936 pv_cmdkopen(dev_t *dev_p, int flag, int otyp, cred_t *credp) 937 { 938 ldi_ident_t li; 939 dev_t dev = *dev_p; 940 int instance = XDF_DEV2UNIT(dev); 941 int part = XDF_DEV2PART(dev); 942 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 943 dev_t xdf_devt = dkp->dk_xdf_dev | part; 944 int err = 0; 945 946 if ((otyp < 0) || (otyp >= OTYPCNT)) 947 return (EINVAL); 948 949 /* allocate an ldi handle */ 950 VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0); 951 952 mutex_enter(&dkp->dk_mutex); 953 954 /* 955 * We translate all device opens (chr, blk, and lyr) into 956 * block device opens. Why? Because for all the opens that 957 * come through this driver, we only keep around one LDI handle. 958 * So that handle can only be of one open type. The reason 959 * that we choose the block interface for this is that to use 960 * the block interfaces for a device the system needs to allocatex 961 * buf_ts, which are associated with system memory which can act 962 * as a cache for device data. So normally when a block device 963 * is closed the system will ensure that all these pages get 964 * flushed out of memory. But if we were to open the device 965 * as a character device, then when we went to close the underlying 966 * device (even if we had invoked the block interfaces) any data 967 * remaining in memory wouldn't necessairly be flushed out 968 * before the device was closed. 969 */ 970 if (dkp->dk_xdf_lh[part] == NULL) { 971 ASSERT(!pv_cmdk_isopen_part(dkp, part)); 972 973 err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, 974 &dkp->dk_xdf_lh[part], li); 975 976 if (err != 0) { 977 mutex_exit(&dkp->dk_mutex); 978 ldi_ident_release(li); 979 return (err); 980 } 981 982 /* Disk devices really shouldn't clone */ 983 ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); 984 } else { 985 ldi_handle_t lh_tmp; 986 987 ASSERT(pv_cmdk_isopen_part(dkp, part)); 988 989 /* do ldi open/close to get flags and cred check */ 990 err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, 991 &lh_tmp, li); 992 if (err != 0) { 993 mutex_exit(&dkp->dk_mutex); 994 ldi_ident_release(li); 995 return (err); 996 } 997 998 /* Disk devices really shouldn't clone */ 999 ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); 1000 (void) ldi_close(lh_tmp, flag, credp); 1001 } 1002 ldi_ident_release(li); 1003 1004 dkp->dk_xdf_otyp_count[otyp][part]++; 1005 1006 mutex_exit(&dkp->dk_mutex); 1007 return (0); 1008 } 1009 1010 /*ARGSUSED*/ 1011 static int 1012 pv_cmdkclose(dev_t dev, int flag, int otyp, cred_t *credp) 1013 { 1014 int instance = XDF_DEV2UNIT(dev); 1015 int part = XDF_DEV2PART(dev); 1016 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1017 int err = 0; 1018 1019 ASSERT((otyp >= 0) && otyp < OTYPCNT); 1020 1021 /* 1022 * Sanity check that that the dev_t specified corresponds to this 1023 * driver and that the device is actually open. On debug kernels we'll 1024 * panic and on non-debug kernels we'll return failure. 1025 */ 1026 ASSERT(getmajor(dev) == pv_cmdk_major); 1027 if (getmajor(dev) != pv_cmdk_major) 1028 return (ENXIO); 1029 1030 mutex_enter(&dkp->dk_mutex); 1031 ASSERT(pv_cmdk_isopen_part(dkp, part)); 1032 if (!pv_cmdk_isopen_part(dkp, part)) { 1033 mutex_exit(&dkp->dk_mutex); 1034 return (ENXIO); 1035 } 1036 1037 ASSERT(dkp->dk_xdf_lh[part] != NULL); 1038 ASSERT(dkp->dk_xdf_otyp_count[otyp][part] > 0); 1039 if (otyp == OTYP_LYR) { 1040 dkp->dk_xdf_otyp_count[otyp][part]--; 1041 } else { 1042 dkp->dk_xdf_otyp_count[otyp][part] = 0; 1043 } 1044 1045 if (!pv_cmdk_isopen_part(dkp, part)) { 1046 err = ldi_close(dkp->dk_xdf_lh[part], flag, credp); 1047 dkp->dk_xdf_lh[part] = NULL; 1048 } 1049 1050 mutex_exit(&dkp->dk_mutex); 1051 1052 return (err); 1053 } 1054 1055 static int 1056 pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom) 1057 { 1058 struct scsi_device *scsi_device; 1059 struct tgdk_geom tgdk_geom; 1060 opaque_t ctlobjp; 1061 int err; 1062 1063 scsi_device = ddi_get_driver_private(dip); 1064 ctlobjp = scsi_device->sd_address.a_hba_tran; 1065 if ((err = CTL_IOCTL(ctlobjp, 1066 DIOCTL_GETPHYGEOM, (uintptr_t)&tgdk_geom, FKIOCTL)) != 0) 1067 return (err); 1068 1069 /* This driver won't work if this isn't true */ 1070 ASSERT(tgdk_geom.g_secsiz == XB_BSIZE); 1071 1072 pgeom->g_ncyl = tgdk_geom.g_cyl; 1073 pgeom->g_acyl = tgdk_geom.g_acyl; 1074 pgeom->g_nhead = tgdk_geom.g_head; 1075 pgeom->g_nsect = tgdk_geom.g_sec; 1076 pgeom->g_secsize = tgdk_geom.g_secsiz; 1077 pgeom->g_capacity = tgdk_geom.g_cap; 1078 pgeom->g_intrlv = 1; 1079 pgeom->g_rpm = 3600; 1080 return (0); 1081 } 1082 1083 /* 1084 * pv_cmdk_bb_check() checks for the existance of bad blocks mappings in 1085 * the alternate partition/slice. Returns B_FALSE is there are no bad 1086 * block mappins found, and B_TRUE is there are bad block mappins found. 1087 */ 1088 static boolean_t 1089 pv_cmdk_bb_check(struct pv_cmdk *dkp) 1090 { 1091 struct alts_parttbl *ap; 1092 diskaddr_t nblocks, blk; 1093 uint32_t altused, altbase, altlast; 1094 uint16_t vtoctag; 1095 int alts; 1096 1097 /* find slice with V_ALTSCTR tag */ 1098 for (alts = 0; alts < NDKMAP; alts++) { 1099 1100 if (cmlb_partinfo(dkp->dk_cmlbhandle, alts, 1101 &nblocks, &blk, NULL, &vtoctag, 0) != 0) { 1102 /* no partition table exists */ 1103 return (B_FALSE); 1104 } 1105 1106 if ((vtoctag == V_ALTSCTR) && (nblocks > 1)) 1107 break; 1108 } 1109 if (alts >= NDKMAP) 1110 return (B_FALSE); /* no V_ALTSCTR slice defined */ 1111 1112 /* read in ALTS label block */ 1113 ap = (struct alts_parttbl *)kmem_zalloc(NBPSCTR, KM_SLEEP); 1114 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 1115 TG_READ, ap, blk, NBPSCTR, NULL) != 0) 1116 goto err; 1117 1118 altused = ap->alts_ent_used; /* number of BB entries */ 1119 altbase = ap->alts_ent_base; /* blk offset from begin slice */ 1120 altlast = ap->alts_ent_end; /* blk offset to last block */ 1121 1122 if ((altused == 0) || (altbase < 1) || 1123 (altbase > altlast) || (altlast >= nblocks)) 1124 goto err; 1125 1126 /* we found bad block mappins */ 1127 kmem_free(ap, NBPSCTR); 1128 return (B_TRUE); 1129 1130 err: 1131 kmem_free(ap, NBPSCTR); 1132 return (B_FALSE); 1133 } 1134 1135 /* 1136 * Autoconfiguration Routines 1137 */ 1138 static int 1139 pv_cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1140 { 1141 int instance = ddi_get_instance(dip); 1142 dev_info_t *xdf_dip = NULL; 1143 struct pv_cmdk *dkp; 1144 cmlb_geom_t pgeom; 1145 char *path; 1146 int i; 1147 1148 if (cmd != DDI_ATTACH) 1149 return (DDI_FAILURE); 1150 1151 /* 1152 * This cmdk device layers on top of an xdf device. So the first 1153 * thing we need to do is determine which xdf device instance this 1154 * cmdk instance should be layered on top of. 1155 */ 1156 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1157 (void) ddi_pathname(dip, path); 1158 for (i = 0; pv_cmdk_h2p[i].h2p_hvm_path != NULL; i++) { 1159 if (strcmp(pv_cmdk_h2p[i].h2p_hvm_path, path) == 0) 1160 break; 1161 } 1162 kmem_free(path, MAXPATHLEN); 1163 1164 if (pv_cmdk_h2p[i].h2p_hvm_path == NULL) { 1165 /* 1166 * UhOh. We don't know what xdf instance this cmdk device 1167 * should be mapped to. 1168 */ 1169 return (DDI_FAILURE); 1170 } 1171 1172 /* Check if this device exists */ 1173 xdf_dip = xdf_hvm_hold(pv_cmdk_h2p[i].h2p_pv_path); 1174 if (xdf_dip == NULL) 1175 return (DDI_FAILURE); 1176 1177 /* allocate and initialize our state structure */ 1178 (void) ddi_soft_state_zalloc(pv_cmdk_state, instance); 1179 dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1180 mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL); 1181 dkp->dk_dip = dip; 1182 dkp->dk_xdf_dip = xdf_dip; 1183 dkp->dk_xdf_dev = makedevice(ddi_driver_major(xdf_dip), 1184 XDF_MINOR(ddi_get_instance(xdf_dip), 0)); 1185 1186 ASSERT((dkp->dk_xdf_dev & XDF_PMASK) == 0); 1187 1188 /* 1189 * GROSS HACK ALERT! GROSS HACK ALERT! 1190 * 1191 * Before we can initialize the cmlb layer, we have to tell the 1192 * underlying xdf device what it's physical geometry should be. 1193 * See the block comments at the top of this file for more info. 1194 */ 1195 if ((pv_cmdk_getpgeom(dip, &pgeom) != 0) || 1196 (xdf_hvm_setpgeom(dkp->dk_xdf_dip, &pgeom) != 0)) { 1197 ddi_release_devi(dkp->dk_xdf_dip); 1198 mutex_destroy(&dkp->dk_mutex); 1199 ddi_soft_state_free(pv_cmdk_state, instance); 1200 return (DDI_FAILURE); 1201 } 1202 1203 /* create kstat for iostat(1M) */ 1204 if (xdf_kstat_create(dkp->dk_xdf_dip, "cmdk", instance) != 0) { 1205 ddi_release_devi(dkp->dk_xdf_dip); 1206 mutex_destroy(&dkp->dk_mutex); 1207 ddi_soft_state_free(pv_cmdk_state, instance); 1208 return (DDI_FAILURE); 1209 } 1210 1211 /* 1212 * Force the xdf front end driver to connect to the backend. From 1213 * the solaris device tree perspective, the xdf driver devinfo node 1214 * is already in the ATTACHED state. (Otherwise xdf_hvm_hold() 1215 * would not have returned a dip.) But this doesn't mean that the 1216 * xdf device has actually established a connection to it's back 1217 * end driver. For us to be able to access the xdf device it needs 1218 * to be connected. There are two ways to force the xdf driver to 1219 * connect to the backend device. 1220 */ 1221 if (xdf_hvm_connect(dkp->dk_xdf_dip) != 0) { 1222 cmn_err(CE_WARN, 1223 "pv driver failed to connect: %s", 1224 pv_cmdk_h2p[i].h2p_pv_path); 1225 xdf_kstat_delete(dkp->dk_xdf_dip); 1226 ddi_release_devi(dkp->dk_xdf_dip); 1227 mutex_destroy(&dkp->dk_mutex); 1228 ddi_soft_state_free(pv_cmdk_state, instance); 1229 return (DDI_FAILURE); 1230 } 1231 1232 /* 1233 * Initalize cmlb. Note that for partition information cmlb 1234 * will access the underly xdf disk device directly via 1235 * pv_cmdk_lb_rdwr() and pv_cmdk_lb_getinfo(). There are no 1236 * layered driver handles associated with this access because 1237 * it is a direct disk access that doesn't go through 1238 * any of the device nodes exported by the xdf device (since 1239 * all exported device nodes only reflect the portion of 1240 * the device visible via the partition/slice that the node 1241 * is associated with.) So while not observable via the LDI, 1242 * this direct disk access is ok since we're actually holding 1243 * the target device. 1244 */ 1245 cmlb_alloc_handle((cmlb_handle_t *)&dkp->dk_cmlbhandle); 1246 if (cmlb_attach(dkp->dk_dip, &pv_cmdk_lb_ops, 1247 DTYPE_DIRECT, /* device_type */ 1248 0, /* not removable */ 1249 0, /* not hot pluggable */ 1250 DDI_NT_BLOCK, 1251 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* mimic cmdk */ 1252 dkp->dk_cmlbhandle, 0) != 0) { 1253 cmlb_free_handle(&dkp->dk_cmlbhandle); 1254 xdf_kstat_delete(dkp->dk_xdf_dip); 1255 ddi_release_devi(dkp->dk_xdf_dip); 1256 mutex_destroy(&dkp->dk_mutex); 1257 ddi_soft_state_free(pv_cmdk_state, instance); 1258 return (DDI_FAILURE); 1259 } 1260 1261 if (pv_cmdk_bb_check(dkp)) { 1262 cmn_err(CE_WARN, 1263 "pv cmdk disks with bad blocks are unsupported: %s", 1264 pv_cmdk_h2p[i].h2p_hvm_path); 1265 1266 cmlb_detach(dkp->dk_cmlbhandle, 0); 1267 cmlb_free_handle(&dkp->dk_cmlbhandle); 1268 xdf_kstat_delete(dkp->dk_xdf_dip); 1269 ddi_release_devi(dkp->dk_xdf_dip); 1270 mutex_destroy(&dkp->dk_mutex); 1271 ddi_soft_state_free(pv_cmdk_state, instance); 1272 return (DDI_FAILURE); 1273 } 1274 1275 /* setup devid string */ 1276 pv_cmdk_devid_setup(dkp); 1277 1278 /* Calling validate will create minor nodes according to disk label */ 1279 (void) cmlb_validate(dkp->dk_cmlbhandle, 0, 0); 1280 1281 /* 1282 * Add a zero-length attribute to tell the world we support 1283 * kernel ioctls (for layered drivers). 1284 */ 1285 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 1286 DDI_KERNEL_IOCTL, NULL, 0); 1287 1288 /* Have the system report any newly created device nodes */ 1289 ddi_report_dev(dip); 1290 1291 return (DDI_SUCCESS); 1292 } 1293 1294 static int 1295 pv_cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1296 { 1297 int instance = ddi_get_instance(dip); 1298 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1299 1300 if (cmd != DDI_DETACH) 1301 return (DDI_FAILURE); 1302 1303 ASSERT(MUTEX_NOT_HELD(&dkp->dk_mutex)); 1304 1305 ddi_devid_unregister(dip); 1306 if (dkp->dk_devid) 1307 ddi_devid_free(dkp->dk_devid); 1308 cmlb_detach(dkp->dk_cmlbhandle, 0); 1309 cmlb_free_handle(&dkp->dk_cmlbhandle); 1310 mutex_destroy(&dkp->dk_mutex); 1311 xdf_kstat_delete(dkp->dk_xdf_dip); 1312 ddi_release_devi(dkp->dk_xdf_dip); 1313 ddi_soft_state_free(pv_cmdk_state, instance); 1314 ddi_prop_remove_all(dip); 1315 1316 return (DDI_SUCCESS); 1317 } 1318 1319 /*ARGSUSED*/ 1320 static int 1321 pv_cmdk_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 1322 void **result) 1323 { 1324 dev_t dev = (dev_t)arg; 1325 int instance = XDF_DEV2UNIT(dev); 1326 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1327 1328 switch (infocmd) { 1329 case DDI_INFO_DEVT2DEVINFO: 1330 if (dkp == NULL) 1331 return (DDI_FAILURE); 1332 *result = (void *)dkp->dk_dip; 1333 break; 1334 case DDI_INFO_DEVT2INSTANCE: 1335 *result = (void *)(intptr_t)instance; 1336 break; 1337 default: 1338 return (DDI_FAILURE); 1339 } 1340 return (DDI_SUCCESS); 1341 } 1342 1343 static int 1344 pv_cmdk_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1345 int flags, char *name, caddr_t valuep, int *lengthp) 1346 { 1347 int instance = ddi_get_instance(dip); 1348 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1349 dev_info_t *xdf_dip; 1350 dev_t xdf_devt; 1351 int err; 1352 1353 /* 1354 * Sanity check that if a dev_t or dip were specified that they 1355 * correspond to this device driver. On debug kernels we'll 1356 * panic and on non-debug kernels we'll return failure. 1357 */ 1358 ASSERT(ddi_driver_major(dip) == pv_cmdk_major); 1359 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == pv_cmdk_major)); 1360 if ((ddi_driver_major(dip) != pv_cmdk_major) || 1361 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != pv_cmdk_major))) 1362 return (DDI_PROP_NOT_FOUND); 1363 1364 /* 1365 * This property lookup might be associated with a device node 1366 * that is not yet attached, if so pass it onto ddi_prop_op(). 1367 */ 1368 if (dkp == NULL) 1369 return (ddi_prop_op(dev, dip, prop_op, flags, 1370 name, valuep, lengthp)); 1371 1372 /* 1373 * Make sure we only lookup static properties. 1374 * 1375 * If there are static properties of the underlying xdf driver 1376 * that we want to mirror, then we'll have to explicity look them 1377 * up and define them during attach. There are a few reasons 1378 * for this. Most importantly, most static properties are typed 1379 * and all dynamic properties are untyped, ie, for dynamic 1380 * properties the caller must know the type of the property and 1381 * how to interpret the value of the property. the prop_op drivedr 1382 * entry point is only designed for returning dynamic/untyped 1383 * properties, so if we were to attempt to lookup and pass back 1384 * static properties of the underlying device here then we would 1385 * be losing the type information for those properties. Another 1386 * reason we don't want to pass on static property requests is that 1387 * static properties are enumerable in the device tree, where as 1388 * dynamic ones are not. 1389 */ 1390 flags |= DDI_PROP_DYNAMIC; 1391 1392 /* 1393 * We can't use the ldi here to access the underlying device because 1394 * the ldi actually opens the device, and that open might fail if the 1395 * device has already been opened with the FEXCL flag. If we used 1396 * the ldi here, it would also be possible for some other caller 1397 * to try open the device with the FEXCL flag and get a failure 1398 * back because we have it open to do a property query. 1399 * 1400 * Instad we'll grab a hold on the target dip and query the 1401 * property directly. 1402 */ 1403 mutex_enter(&dkp->dk_mutex); 1404 1405 if ((xdf_dip = dkp->dk_xdf_dip) == NULL) { 1406 mutex_exit(&dkp->dk_mutex); 1407 return (DDI_PROP_NOT_FOUND); 1408 } 1409 e_ddi_hold_devi(xdf_dip); 1410 1411 /* figure out the dev_t we're going to pass on down */ 1412 if (dev == DDI_DEV_T_ANY) { 1413 xdf_devt = DDI_DEV_T_ANY; 1414 } else { 1415 xdf_devt = dkp->dk_xdf_dev | XDF_DEV2PART(dev); 1416 } 1417 1418 mutex_exit(&dkp->dk_mutex); 1419 1420 /* 1421 * Cdev_prop_op() is not a public interface, and normally the caller 1422 * is required to make sure that the target driver actually implements 1423 * this interface before trying to invoke it. In this case we know 1424 * that we're always accessing the xdf driver and it does have this 1425 * interface defined, so we can skip the check. 1426 */ 1427 err = cdev_prop_op(xdf_devt, xdf_dip, 1428 prop_op, flags, name, valuep, lengthp); 1429 ddi_release_devi(xdf_dip); 1430 return (err); 1431 } 1432 1433 /* 1434 * Device driver ops vector 1435 */ 1436 static struct cb_ops pv_cmdk_cb_ops = { 1437 pv_cmdkopen, /* open */ 1438 pv_cmdkclose, /* close */ 1439 pv_cmdkstrategy, /* strategy */ 1440 nodev, /* print */ 1441 pv_cmdkdump, /* dump */ 1442 pv_cmdkread, /* read */ 1443 pv_cmdkwrite, /* write */ 1444 pv_cmdkioctl, /* ioctl */ 1445 nodev, /* devmap */ 1446 nodev, /* mmap */ 1447 nodev, /* segmap */ 1448 nochpoll, /* poll */ 1449 pv_cmdk_prop_op, /* cb_prop_op */ 1450 0, /* streamtab */ 1451 D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */ 1452 CB_REV, /* cb_rev */ 1453 pv_cmdkaread, /* async read */ 1454 pv_cmdkawrite /* async write */ 1455 }; 1456 1457 struct dev_ops pv_cmdk_ops = { 1458 DEVO_REV, /* devo_rev, */ 1459 0, /* refcnt */ 1460 pv_cmdk_getinfo, /* info */ 1461 nulldev, /* identify */ 1462 nulldev, /* probe */ 1463 pv_cmdkattach, /* attach */ 1464 pv_cmdkdetach, /* detach */ 1465 nodev, /* reset */ 1466 &pv_cmdk_cb_ops, /* driver operations */ 1467 (struct bus_ops *)0 /* bus operations */ 1468 }; 1469 1470 /* 1471 * Module linkage information for the kernel. 1472 */ 1473 static struct modldrv modldrv = { 1474 &mod_driverops, /* Type of module. This one is a driver */ 1475 "PV Common Direct Access Disk", 1476 &pv_cmdk_ops, /* driver ops */ 1477 }; 1478 1479 static struct modlinkage modlinkage = { 1480 MODREV_1, (void *)&modldrv, NULL 1481 }; 1482 1483 int 1484 _init(void) 1485 { 1486 int rval; 1487 1488 if ((pv_cmdk_major = ddi_name_to_major("cmdk")) == (major_t)-1) 1489 return (EINVAL); 1490 1491 /* 1492 * In general ide usually supports 4 disk devices, this same 1493 * limitation also applies to software emulating ide devices. 1494 * so by default we pre-allocate 4 cmdk soft state structures. 1495 */ 1496 if ((rval = ddi_soft_state_init(&pv_cmdk_state, 1497 sizeof (struct pv_cmdk), PV_CMDK_NODES)) != 0) 1498 return (rval); 1499 1500 /* 1501 * Currently we only support qemu as the backing hardware emulator 1502 * for cmdk devices. 1503 */ 1504 pv_cmdk_h2p = pv_cmdk_h2p_xen_qemu; 1505 1506 /* Install our module */ 1507 if ((rval = mod_install(&modlinkage)) != 0) { 1508 ddi_soft_state_fini(&pv_cmdk_state); 1509 return (rval); 1510 } 1511 1512 return (0); 1513 } 1514 1515 int 1516 _info(struct modinfo *modinfop) 1517 { 1518 return (mod_info(&modlinkage, modinfop)); 1519 } 1520 1521 int 1522 _fini(void) 1523 { 1524 int rval; 1525 if ((rval = mod_remove(&modlinkage)) != 0) 1526 return (rval); 1527 ddi_soft_state_fini(&pv_cmdk_state); 1528 return (0); 1529 } 1530