1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/scsi/scsi_types.h> 29 #include <sys/modctl.h> 30 #include <sys/cmlb.h> 31 #include <sys/types.h> 32 #include <sys/xpv_support.h> 33 #include <sys/xendev.h> 34 #include <sys/gnttab.h> 35 #include <public/xen.h> 36 #include <public/grant_table.h> 37 #include <io/xdf.h> 38 #include <sys/vtoc.h> 39 #include <sys/dkio.h> 40 #include <sys/dktp/dadev.h> 41 #include <sys/dktp/dadkio.h> 42 #include <sys/dktp/tgdk.h> 43 #include <sys/dktp/bbh.h> 44 #include <sys/dktp/cmdk.h> 45 #include <sys/dktp/altsctr.h> 46 47 /* 48 * General Notes 49 * 50 * We don't support disks with bad block mappins. We have this 51 * limitation because the underlying xdf driver doesn't support 52 * bad block remapping. If there is a need to support this feature 53 * it should be added directly to the xdf driver and we should just 54 * pass requests strait on through and let it handle the remapping. 55 * Also, it's probably worth pointing out that most modern disks do bad 56 * block remapping internally in the hardware so there's actually less 57 * of a chance of us ever discovering bad blocks. Also, in most cases 58 * this driver (and the xdf driver) will only be used with virtualized 59 * devices, so one might wonder why a virtual device would ever actually 60 * experience bad blocks. To wrap this up, you might be wondering how 61 * these bad block mappings get created and how they are managed. Well, 62 * there are two tools for managing bad block mappings, format(1M) and 63 * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk 64 * to attempt to find bad block and create mappings for them. Format(1M) 65 * and addbadsec(1M) can also be used to edit existing mappings that may 66 * be saved on the disk. 67 * 68 * The underlying PV driver that this driver passes on requests to is the 69 * xdf driver. Since in most cases the xdf driver doesn't deal with 70 * physical disks it has it's own algorithm for assigning a physical 71 * geometry to a virtual disk (ie, cylinder count, head count, etc.) 72 * The default values chosen by the xdf driver may not match those 73 * assigned to a disk by a hardware disk emulator in an HVM environment. 74 * This is a problem since these physical geometry attributes affect 75 * things like the partition table, backup label location, etc. So 76 * to emulate disk devices correctly we need to know the physical geometry 77 * that was assigned to a disk at the time of it's initalization. 78 * Normally in an HVM environment this information will passed to 79 * the BIOS and operating system from the hardware emulator that is 80 * emulating the disk devices. In the case of a solaris dom0+xvm 81 * this would be qemu. So to work around this issue, this driver will 82 * query the emulated hardware to get the assigned physical geometry 83 * and then pass this geometry onto the xdf driver so that it can use it. 84 * But really, this information is essentially metadata about the disk 85 * that should be kept with the disk image itself. (Assuming or course 86 * that a disk image is the actual backingstore for this emulated device.) 87 * This metadata should also be made available to PV drivers via a common 88 * mechamisn, probably the xenstore. The fact that this metadata isn't 89 * available outside of HVM domains means that it's difficult to move 90 * disks between HVM and PV domains, since a fully PV domain will have no 91 * way of knowing what the correct geometry of the target device is. 92 * (Short of reading the disk, looking for things like partition tables 93 * and labels, and taking a best guess at what the geometry was when 94 * the disk was initialized. Unsuprisingly, qemu actually does this.) 95 * 96 * This driver has to map cmdk device instances into their corresponding 97 * xdf device instances. We have to do this to ensure that when a user 98 * accesses a emulated cmdk device we map those accesses to the proper 99 * paravirtualized device. Basically what we need to know is how multiple 100 * 'disk' entries in a domU configuration file get mapped to emulated 101 * cmdk devices and to xdf devices. The 'disk' entry to xdf instance 102 * mappings we know because those are done within the Solaris xvdi code 103 * and the xpvd nexus driver. But the config to emulated devices mappings 104 * are handled entirely within the xen management tool chain and the 105 * hardware emulator. Since all the tools that establish these mappings 106 * live in dom0, dom0 should really supply us with this information, 107 * probably via the xenstore. Unfortunatly it doesn't so, since there's 108 * no good way to determine this mapping dynamically, this driver uses 109 * a hard coded set of static mappings. These mappings are hardware 110 * emulator specific because each different hardware emulator could have 111 * a different device tree with different cmdk device paths. This 112 * means that if we want to continue to use this static mapping approach 113 * to allow Solaris to run on different hardware emulators we'll have 114 * to analyze each of those emulators to determine what paths they 115 * use and hard code those paths into this driver. yech. This metadata 116 * really needs to be supplied to us by dom0. 117 * 118 * This driver access underlying xdf nodes. Unfortunatly, devices 119 * must create minor nodes during attach, and for disk devices to create 120 * minor nodes, they have to look at the label on the disk, so this means 121 * that disk drivers must be able to access a disk contents during 122 * attach. That means that this disk driver must be able to access 123 * underlying xdf nodes during attach. Unfortunatly, due to device tree 124 * locking restrictions, we cannot have an attach operation occuring on 125 * this device and then attempt to access another device which may 126 * cause another attach to occur in a different device tree branch 127 * since this could result in deadlock. Hence, this driver can only 128 * access xdf device nodes that we know are attached, and it can't use 129 * any ddi interfaces to access those nodes if those interfaces could 130 * trigger an attach of the xdf device. So this driver works around 131 * these restrictions by talking directly to xdf devices via 132 * xdf_hvm_hold(). This interface takes a pathname to an xdf device, 133 * and if that device is already attached then it returns the a held dip 134 * pointer for that device node. This prevents us from getting into 135 * deadlock situations, but now we need a mechanism to ensure that all 136 * the xdf device nodes this driver might access are attached before 137 * this driver tries to access them. This is accomplished via the 138 * hvmboot_rootconf() callback which is invoked just before root is 139 * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure 140 * all xdf device visible to the system. All these xdf device nodes 141 * will also be marked with the "ddi-no-autodetach" property so that 142 * once they are configured, the will not be automatically unconfigured. 143 * The only way that they could be unconfigured is if the administrator 144 * explicitly attempts to unload required modules via rem_drv(1M) 145 * or modunload(1M). 146 */ 147 148 /* 149 * 16 paritions + fdisk (see xdf.h) 150 */ 151 #define XDF_DEV2UNIT(dev) XDF_INST((getminor((dev)))) 152 #define XDF_DEV2PART(dev) XDF_PART((getminor((dev)))) 153 154 #define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \ 155 (otyp == OTYP_CHR) || \ 156 (otyp == OTYP_LYR)) 157 158 #define PV_CMDK_NODES 4 159 160 typedef struct hvm_to_pv { 161 char *h2p_hvm_path; 162 char *h2p_pv_path; 163 } hvm_to_pv_t; 164 165 /* 166 */ 167 static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = { 168 /* 169 * The paths mapping here are very specific to xen and qemu. When a 170 * domU is booted under xen in HVM mode, qemu is normally used to 171 * emulate up to four ide disks. These disks always have the four 172 * path listed below. To configure an emulated ide device, the 173 * xen domain configuration file normally has an entry that looks 174 * like this: 175 * disk = [ 'file:/foo.img,hda,w' ] 176 * 177 * The part we're interested in is the 'hda', which we'll call the 178 * xen disk device name here. The xen management tools (which parse 179 * the xen domain configuration file and launch qemu) makes the 180 * following assumptions about this value: 181 * hda == emulated ide disk 0 (ide bus 0, master) 182 * hdb == emulated ide disk 1 (ide bus 0, slave) 183 * hdc == emulated ide disk 2 (ide bus 1, master) 184 * hdd == emulated ide disk 3 (ide bus 1, slave) 185 * 186 * (Uncoincidentally, these xen disk device names actually map to 187 * the /dev filesystem names of ide disk devices in Linux. So in 188 * Linux /dev/hda is the first ide disk.) So for the first part of 189 * our mapping we've just hardcoded the cmdk paths that we know 190 * qemu will use. 191 * 192 * To understand the second half of the mapping (ie, the xdf device 193 * that each emulated cmdk device should be mapped two) we need to 194 * know the solaris device node address that will be assigned to 195 * each xdf device. (The device node address is the hex number that 196 * comes after the "xdf@" in the device path.) 197 * 198 * Normally when a domU is run in non-HVM mode, the xen disk device 199 * names in the xen domain configuration file are specified with 200 * integers instead of Linux device names. (for example, '0' would 201 * be used instead of 'hda'.) So in the non-HVM case we simply 202 * convert the xen disk device name (which is an interger) into a 203 * hex number and use it as the Solaris xdf device node address. 204 * But when we're running in HVM mode then we have a string for the 205 * xen disk device name, so we can't simply use that as a solaris 206 * device node address. Instead we fall back to using the xenstore 207 * device id for the xen disk device as the xdf device node address. 208 * The xdf device node address assignment happens in xvdi_init_dev(). 209 * 210 * So the question becomes, how do we know what the xenstore device 211 * id for emulated disk will be? Well, it turns out that since the 212 * xen management tools expect the disk device names to be Linux 213 * device names, those same management tools assign each disk a 214 * device id that matches the dev_t of the corresponding device 215 * under Linux. (Big shocker.) This xen device name-to-id mapping 216 * is currently all hard coded here: 217 * xen.hg/tools/python/xen/util/blkif.py`blkdev_name_to_number() 218 * 219 * So looking at the code above we can see the following xen disk 220 * device name to xenstore device id mappings: 221 * 'hda' --> 0x300 == 0t768 == ((3 * 256) + (0 * 64)) 222 * 'hdb' --> 0x340 == 0t832 == ((3 * 256) + (1 * 64)) 223 * 'hdc' --> 0x1600 == 0t5632 == ((22 * 256) + (0 * 64)) 224 * 'hdd' --> 0x1640 == 0t5696 == ((22 * 256) + (1 * 64)) 225 */ 226 { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@0,0", "/xpvd/xdf@300" }, 227 { "/pci@0,0/pci-ide@1,1/ide@0/cmdk@1,0", "/xpvd/xdf@340" }, 228 { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@0,0", "/xpvd/xdf@1600" }, 229 { "/pci@0,0/pci-ide@1,1/ide@1/cmdk@1,0", "/xpvd/xdf@1640" }, 230 { NULL, 0 } 231 }; 232 233 typedef struct pv_cmdk { 234 dev_info_t *dk_dip; 235 cmlb_handle_t dk_cmlbhandle; 236 ddi_devid_t dk_devid; 237 kmutex_t dk_mutex; 238 dev_info_t *dk_xdf_dip; 239 dev_t dk_xdf_dev; 240 int dk_xdf_otyp_count[OTYPCNT][XDF_PEXT]; 241 ldi_handle_t dk_xdf_lh[XDF_PEXT]; 242 } pv_cmdk_t; 243 244 /* 245 * Globals 246 */ 247 static void *pv_cmdk_state; 248 static major_t pv_cmdk_major; 249 static hvm_to_pv_t *pv_cmdk_h2p; 250 251 /* 252 * Function prototypes for xdf callback functions 253 */ 254 extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 255 extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 256 void *); 257 258 static boolean_t 259 pv_cmdk_isopen_part(struct pv_cmdk *dkp, int part) 260 { 261 int otyp; 262 263 ASSERT(MUTEX_HELD(&dkp->dk_mutex)); 264 265 for (otyp = 0; (otyp < OTYPCNT); otyp++) { 266 if (dkp->dk_xdf_otyp_count[otyp][part] != 0) 267 return (B_TRUE); 268 } 269 return (B_FALSE); 270 } 271 272 /* 273 * Cmlb ops vectors, allows the cmlb module to directly access the entire 274 * pv_cmdk disk device without going through any partitioning layers. 275 */ 276 /*ARGSUSED*/ 277 static int 278 pv_cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, 279 diskaddr_t start, size_t count, void *tg_cookie) 280 { 281 int instance = ddi_get_instance(dip); 282 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 283 284 if (dkp == NULL) 285 return (ENXIO); 286 287 return (xdf_lb_rdwr(dkp->dk_xdf_dip, cmd, bufaddr, start, count, 288 tg_cookie)); 289 } 290 291 /*ARGSUSED*/ 292 static int 293 pv_cmdk_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 294 { 295 int instance = ddi_get_instance(dip); 296 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 297 int err; 298 299 if (dkp == NULL) 300 return (ENXIO); 301 302 if (cmd == TG_GETVIRTGEOM) { 303 cmlb_geom_t pgeom, *vgeomp; 304 diskaddr_t capacity; 305 306 /* 307 * The native xdf driver doesn't support this ioctl. 308 * Intead of passing it on, emulate it here so that the 309 * results look the same as what we get for a real cmdk 310 * device. 311 * 312 * Get the real size of the device 313 */ 314 if ((err = xdf_lb_getinfo(dkp->dk_xdf_dip, 315 TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0) 316 return (err); 317 capacity = pgeom.g_capacity; 318 319 /* 320 * If the controller returned us something that doesn't 321 * really fit into an Int 13/function 8 geometry 322 * result, just fail the ioctl. See PSARC 1998/313. 323 */ 324 if (capacity >= (63 * 254 * 1024)) 325 return (EINVAL); 326 327 vgeomp = (cmlb_geom_t *)arg; 328 vgeomp->g_capacity = capacity; 329 vgeomp->g_nsect = 63; 330 vgeomp->g_nhead = 254; 331 vgeomp->g_ncyl = capacity / (63 * 254); 332 vgeomp->g_acyl = 0; 333 vgeomp->g_secsize = 512; 334 vgeomp->g_intrlv = 1; 335 vgeomp->g_rpm = 3600; 336 return (0); 337 } 338 339 return (xdf_lb_getinfo(dkp->dk_xdf_dip, cmd, arg, tg_cookie)); 340 } 341 342 static cmlb_tg_ops_t pv_cmdk_lb_ops = { 343 TG_DK_OPS_VERSION_1, 344 pv_cmdk_lb_rdwr, 345 pv_cmdk_lb_getinfo 346 }; 347 348 /* 349 * devid management functions 350 */ 351 352 /* 353 * pv_cmdk_get_modser() is basically a local copy of 354 * cmdk_get_modser() modified to work without the dadk layer. 355 * (which the non-pv version of the cmdk driver uses.) 356 */ 357 static int 358 pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len) 359 { 360 struct scsi_device *scsi_device; 361 opaque_t ctlobjp; 362 dadk_ioc_string_t strarg; 363 char *s; 364 char ch; 365 boolean_t ret; 366 int i; 367 int tb; 368 369 strarg.is_buf = buf; 370 strarg.is_size = len; 371 scsi_device = ddi_get_driver_private(dkp->dk_dip); 372 ctlobjp = scsi_device->sd_address.a_hba_tran; 373 if (CTL_IOCTL(ctlobjp, 374 ioccmd, (uintptr_t)&strarg, FNATIVE | FKIOCTL) != 0) 375 return (0); 376 377 /* 378 * valid model/serial string must contain a non-zero non-space 379 * trim trailing spaces/NULL 380 */ 381 ret = B_FALSE; 382 s = buf; 383 for (i = 0; i < strarg.is_size; i++) { 384 ch = *s++; 385 if (ch != ' ' && ch != '\0') 386 tb = i + 1; 387 if (ch != ' ' && ch != '\0' && ch != '0') 388 ret = B_TRUE; 389 } 390 391 if (ret == B_FALSE) 392 return (0); 393 394 return (tb); 395 } 396 397 /* 398 * pv_cmdk_devid_modser() is basically a copy of cmdk_devid_modser() 399 * that has been modified to use local pv cmdk driver functions. 400 * 401 * Build a devid from the model and serial number 402 * Return DDI_SUCCESS or DDI_FAILURE. 403 */ 404 static int 405 pv_cmdk_devid_modser(struct pv_cmdk *dkp) 406 { 407 int rc = DDI_FAILURE; 408 char *hwid; 409 int modlen; 410 int serlen; 411 412 /* 413 * device ID is a concatenation of model number, '=', serial number. 414 */ 415 hwid = kmem_alloc(CMDK_HWIDLEN, KM_SLEEP); 416 modlen = pv_cmdk_get_modser(dkp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN); 417 if (modlen == 0) 418 goto err; 419 420 hwid[modlen++] = '='; 421 serlen = pv_cmdk_get_modser(dkp, DIOCTL_GETSERIAL, 422 hwid + modlen, CMDK_HWIDLEN - modlen); 423 if (serlen == 0) 424 goto err; 425 426 hwid[modlen + serlen] = 0; 427 428 /* Initialize the device ID, trailing NULL not included */ 429 rc = ddi_devid_init(dkp->dk_dip, DEVID_ATA_SERIAL, modlen + serlen, 430 hwid, (ddi_devid_t *)&dkp->dk_devid); 431 if (rc != DDI_SUCCESS) 432 goto err; 433 434 kmem_free(hwid, CMDK_HWIDLEN); 435 return (DDI_SUCCESS); 436 437 err: 438 kmem_free(hwid, CMDK_HWIDLEN); 439 return (DDI_FAILURE); 440 } 441 442 /* 443 * pv_cmdk_devid_read() is basically a local copy of 444 * cmdk_devid_read() modified to work without the dadk layer. 445 * (which the non-pv version of the cmdk driver uses.) 446 * 447 * Read a devid from on the first block of the last track of 448 * the last cylinder. Make sure what we read is a valid devid. 449 * Return DDI_SUCCESS or DDI_FAILURE. 450 */ 451 static int 452 pv_cmdk_devid_read(struct pv_cmdk *dkp) 453 { 454 diskaddr_t blk; 455 struct dk_devid *dkdevidp; 456 uint_t *ip, chksum; 457 int i; 458 459 if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) 460 return (DDI_FAILURE); 461 462 dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP); 463 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 464 TG_READ, dkdevidp, blk, NBPSCTR, NULL) != 0) 465 goto err; 466 467 /* Validate the revision */ 468 if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) || 469 (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB)) 470 goto err; 471 472 /* Calculate the checksum */ 473 chksum = 0; 474 ip = (uint_t *)dkdevidp; 475 for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++) 476 chksum ^= ip[i]; 477 if (DKD_GETCHKSUM(dkdevidp) != chksum) 478 goto err; 479 480 /* Validate the device id */ 481 if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS) 482 goto err; 483 484 /* keep a copy of the device id */ 485 i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid); 486 dkp->dk_devid = kmem_alloc(i, KM_SLEEP); 487 bcopy(dkdevidp->dkd_devid, dkp->dk_devid, i); 488 kmem_free(dkdevidp, NBPSCTR); 489 return (DDI_SUCCESS); 490 491 err: 492 kmem_free(dkdevidp, NBPSCTR); 493 return (DDI_FAILURE); 494 } 495 496 /* 497 * pv_cmdk_devid_fabricate() is basically a local copy of 498 * cmdk_devid_fabricate() modified to work without the dadk layer. 499 * (which the non-pv version of the cmdk driver uses.) 500 * 501 * Create a devid and write it on the first block of the last track of 502 * the last cylinder. 503 * Return DDI_SUCCESS or DDI_FAILURE. 504 */ 505 static int 506 pv_cmdk_devid_fabricate(struct pv_cmdk *dkp) 507 { 508 ddi_devid_t devid = NULL; /* devid made by ddi_devid_init */ 509 struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */ 510 diskaddr_t blk; 511 uint_t *ip, chksum; 512 int i; 513 514 if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) 515 return (DDI_FAILURE); 516 517 if (ddi_devid_init(dkp->dk_dip, DEVID_FAB, 0, NULL, &devid) != 518 DDI_SUCCESS) 519 return (DDI_FAILURE); 520 521 /* allocate a buffer */ 522 dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP); 523 524 /* Fill in the revision */ 525 dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB; 526 dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB; 527 528 /* Copy in the device id */ 529 i = ddi_devid_sizeof(devid); 530 if (i > DK_DEVID_SIZE) 531 goto err; 532 bcopy(devid, dkdevidp->dkd_devid, i); 533 534 /* Calculate the chksum */ 535 chksum = 0; 536 ip = (uint_t *)dkdevidp; 537 for (i = 0; i < ((NBPSCTR - sizeof (int))/sizeof (int)); i++) 538 chksum ^= ip[i]; 539 540 /* Fill in the checksum */ 541 DKD_FORMCHKSUM(chksum, dkdevidp); 542 543 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 544 TG_WRITE, dkdevidp, blk, NBPSCTR, NULL) != 0) 545 goto err; 546 547 kmem_free(dkdevidp, NBPSCTR); 548 549 dkp->dk_devid = devid; 550 return (DDI_SUCCESS); 551 552 err: 553 if (dkdevidp != NULL) 554 kmem_free(dkdevidp, NBPSCTR); 555 if (devid != NULL) 556 ddi_devid_free(devid); 557 return (DDI_FAILURE); 558 } 559 560 /* 561 * pv_cmdk_devid_setup() is basically a local copy ofcmdk_devid_setup() 562 * that has been modified to use local pv cmdk driver functions. 563 * 564 * Create and register the devid. 565 * There are 4 different ways we can get a device id: 566 * 1. Already have one - nothing to do 567 * 2. Build one from the drive's model and serial numbers 568 * 3. Read one from the disk (first sector of last track) 569 * 4. Fabricate one and write it on the disk. 570 * If any of these succeeds, register the deviceid 571 */ 572 static void 573 pv_cmdk_devid_setup(struct pv_cmdk *dkp) 574 { 575 int rc; 576 577 /* Try options until one succeeds, or all have failed */ 578 579 /* 1. All done if already registered */ 580 581 if (dkp->dk_devid != NULL) 582 return; 583 584 /* 2. Build a devid from the model and serial number */ 585 rc = pv_cmdk_devid_modser(dkp); 586 if (rc != DDI_SUCCESS) { 587 /* 3. Read devid from the disk, if present */ 588 rc = pv_cmdk_devid_read(dkp); 589 590 /* 4. otherwise make one up and write it on the disk */ 591 if (rc != DDI_SUCCESS) 592 rc = pv_cmdk_devid_fabricate(dkp); 593 } 594 595 /* If we managed to get a devid any of the above ways, register it */ 596 if (rc == DDI_SUCCESS) 597 (void) ddi_devid_register(dkp->dk_dip, dkp->dk_devid); 598 } 599 600 /* 601 * Local Functions 602 */ 603 static int 604 pv_cmdk_iodone(struct buf *bp) 605 { 606 struct buf *bp_orig = bp->b_chain; 607 608 /* Propegate back the io results */ 609 bp_orig->b_resid = bp->b_resid; 610 bioerror(bp_orig, geterror(bp)); 611 biodone(bp_orig); 612 613 freerbuf(bp); 614 return (0); 615 } 616 617 static int 618 pv_cmdkstrategy(struct buf *bp) 619 { 620 dev_t dev = bp->b_edev; 621 int instance = XDF_DEV2UNIT(dev); 622 int part = XDF_DEV2PART(dev); 623 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 624 dev_t xdf_devt; 625 struct buf *bp_clone; 626 627 /* 628 * Sanity checks that the dev_t associated with the buf we were 629 * passed actually corresponds us and that the partition we're 630 * trying to access is actually open. On debug kernels we'll 631 * panic and on non-debug kernels we'll return failure. 632 */ 633 ASSERT(getmajor(dev) == pv_cmdk_major); 634 if (getmajor(dev) != pv_cmdk_major) 635 goto err; 636 637 mutex_enter(&dkp->dk_mutex); 638 ASSERT(pv_cmdk_isopen_part(dkp, part)); 639 if (!pv_cmdk_isopen_part(dkp, part)) { 640 mutex_exit(&dkp->dk_mutex); 641 goto err; 642 } 643 mutex_exit(&dkp->dk_mutex); 644 645 /* clone this buffer */ 646 xdf_devt = dkp->dk_xdf_dev | part; 647 bp_clone = bioclone(bp, 0, bp->b_bcount, xdf_devt, bp->b_blkno, 648 pv_cmdk_iodone, NULL, KM_SLEEP); 649 bp_clone->b_chain = bp; 650 651 /* 652 * If we're being invoked on behalf of the physio() call in 653 * pv_cmdk_dioctl_rwcmd() then b_private will be set to 654 * XB_SLICE_NONE and we need to propegate this flag into the 655 * cloned buffer so that the xdf driver will see it. 656 */ 657 if (bp->b_private == (void *)XB_SLICE_NONE) 658 bp_clone->b_private = (void *)XB_SLICE_NONE; 659 660 /* 661 * Pass on the cloned buffer. Note that we don't bother to check 662 * for failure because the xdf strategy routine will have to 663 * invoke biodone() if it wants to return an error, which means 664 * that the pv_cmdk_iodone() callback will get invoked and it 665 * will propegate the error back up the stack and free the cloned 666 * buffer. 667 */ 668 ASSERT(dkp->dk_xdf_lh[part] != NULL); 669 return (ldi_strategy(dkp->dk_xdf_lh[part], bp_clone)); 670 671 err: 672 bioerror(bp, ENXIO); 673 bp->b_resid = bp->b_bcount; 674 biodone(bp); 675 return (0); 676 } 677 678 /*ARGSUSED*/ 679 static int 680 pv_cmdkread(dev_t dev, struct uio *uio, cred_t *credp) 681 { 682 int instance = XDF_DEV2UNIT(dev); 683 int part = XDF_DEV2PART(dev); 684 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 685 686 return (ldi_read(dkp->dk_xdf_lh[part], uio, credp)); 687 } 688 689 /*ARGSUSED*/ 690 static int 691 pv_cmdkwrite(dev_t dev, struct uio *uio, cred_t *credp) 692 { 693 int instance = XDF_DEV2UNIT(dev); 694 int part = XDF_DEV2PART(dev); 695 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 696 697 return (ldi_write(dkp->dk_xdf_lh[part], uio, credp)); 698 } 699 700 /*ARGSUSED*/ 701 static int 702 pv_cmdkaread(dev_t dev, struct aio_req *aio, cred_t *credp) 703 { 704 int instance = XDF_DEV2UNIT(dev); 705 int part = XDF_DEV2PART(dev); 706 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 707 return (ldi_aread(dkp->dk_xdf_lh[part], aio, credp)); 708 } 709 710 /*ARGSUSED*/ 711 static int 712 pv_cmdkawrite(dev_t dev, struct aio_req *aio, cred_t *credp) 713 { 714 int instance = XDF_DEV2UNIT(dev); 715 int part = XDF_DEV2PART(dev); 716 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 717 return (ldi_awrite(dkp->dk_xdf_lh[part], aio, credp)); 718 } 719 720 static int 721 pv_cmdkdump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 722 { 723 int instance = XDF_DEV2UNIT(dev); 724 int part = XDF_DEV2PART(dev); 725 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 726 727 return (ldi_dump(dkp->dk_xdf_lh[part], addr, blkno, nblk)); 728 } 729 730 /* 731 * pv_rwcmd_copyin() is a duplicate of rwcmd_copyin(). 732 */ 733 static int 734 pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag) 735 { 736 switch (ddi_model_convert_from(flag)) { 737 case DDI_MODEL_ILP32: { 738 struct dadkio_rwcmd32 cmd32; 739 740 if (ddi_copyin(inaddr, &cmd32, 741 sizeof (struct dadkio_rwcmd32), flag)) { 742 return (EFAULT); 743 } 744 745 rwcmdp->cmd = cmd32.cmd; 746 rwcmdp->flags = cmd32.flags; 747 rwcmdp->blkaddr = (daddr_t)cmd32.blkaddr; 748 rwcmdp->buflen = cmd32.buflen; 749 rwcmdp->bufaddr = (caddr_t)(intptr_t)cmd32.bufaddr; 750 /* 751 * Note: we do not convert the 'status' field, 752 * as it should not contain valid data at this 753 * point. 754 */ 755 bzero(&rwcmdp->status, sizeof (rwcmdp->status)); 756 break; 757 } 758 case DDI_MODEL_NONE: { 759 if (ddi_copyin(inaddr, rwcmdp, 760 sizeof (struct dadkio_rwcmd), flag)) { 761 return (EFAULT); 762 } 763 } 764 } 765 return (0); 766 } 767 768 /* 769 * pv_rwcmd_copyout() is a duplicate of rwcmd_copyout(). 770 */ 771 static int 772 pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag) 773 { 774 switch (ddi_model_convert_from(flag)) { 775 case DDI_MODEL_ILP32: { 776 struct dadkio_rwcmd32 cmd32; 777 778 cmd32.cmd = rwcmdp->cmd; 779 cmd32.flags = rwcmdp->flags; 780 cmd32.blkaddr = rwcmdp->blkaddr; 781 cmd32.buflen = rwcmdp->buflen; 782 ASSERT64(((uintptr_t)rwcmdp->bufaddr >> 32) == 0); 783 cmd32.bufaddr = (caddr32_t)(uintptr_t)rwcmdp->bufaddr; 784 785 cmd32.status.status = rwcmdp->status.status; 786 cmd32.status.resid = rwcmdp->status.resid; 787 cmd32.status.failed_blk_is_valid = 788 rwcmdp->status.failed_blk_is_valid; 789 cmd32.status.failed_blk = rwcmdp->status.failed_blk; 790 cmd32.status.fru_code_is_valid = 791 rwcmdp->status.fru_code_is_valid; 792 cmd32.status.fru_code = rwcmdp->status.fru_code; 793 794 bcopy(rwcmdp->status.add_error_info, 795 cmd32.status.add_error_info, DADKIO_ERROR_INFO_LEN); 796 797 if (ddi_copyout(&cmd32, outaddr, 798 sizeof (struct dadkio_rwcmd32), flag)) 799 return (EFAULT); 800 break; 801 } 802 case DDI_MODEL_NONE: { 803 if (ddi_copyout(rwcmdp, outaddr, 804 sizeof (struct dadkio_rwcmd), flag)) 805 return (EFAULT); 806 } 807 } 808 return (0); 809 } 810 811 static void 812 pv_cmdkmin(struct buf *bp) 813 { 814 if (bp->b_bcount > DK_MAXRECSIZE) 815 bp->b_bcount = DK_MAXRECSIZE; 816 } 817 818 static int 819 pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag) 820 { 821 struct dadkio_rwcmd *rwcmdp; 822 struct iovec aiov; 823 struct uio auio; 824 struct buf *bp; 825 int rw, status; 826 827 rwcmdp = kmem_alloc(sizeof (struct dadkio_rwcmd), KM_SLEEP); 828 status = pv_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag); 829 830 if (status != 0) 831 goto out; 832 833 switch (rwcmdp->cmd) { 834 case DADKIO_RWCMD_READ: 835 case DADKIO_RWCMD_WRITE: 836 break; 837 default: 838 status = EINVAL; 839 goto out; 840 } 841 842 bzero((caddr_t)&aiov, sizeof (struct iovec)); 843 aiov.iov_base = rwcmdp->bufaddr; 844 aiov.iov_len = rwcmdp->buflen; 845 846 bzero((caddr_t)&auio, sizeof (struct uio)); 847 auio.uio_iov = &aiov; 848 auio.uio_iovcnt = 1; 849 auio.uio_loffset = (offset_t)rwcmdp->blkaddr * (offset_t)XB_BSIZE; 850 auio.uio_resid = rwcmdp->buflen; 851 auio.uio_segflg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE; 852 853 /* 854 * Tell the xdf driver that this I/O request is using an absolute 855 * offset. 856 */ 857 bp = getrbuf(KM_SLEEP); 858 bp->b_private = (void *)XB_SLICE_NONE; 859 860 rw = ((rwcmdp->cmd == DADKIO_RWCMD_WRITE) ? B_WRITE : B_READ); 861 status = physio(pv_cmdkstrategy, bp, dev, rw, pv_cmdkmin, &auio); 862 863 biofini(bp); 864 kmem_free(bp, sizeof (buf_t)); 865 866 if (status == 0) 867 status = pv_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag); 868 869 out: 870 kmem_free(rwcmdp, sizeof (struct dadkio_rwcmd)); 871 return (status); 872 } 873 874 static int 875 pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 876 int *rvalp) 877 { 878 int instance = XDF_DEV2UNIT(dev); 879 int part = XDF_DEV2PART(dev); 880 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 881 int err; 882 883 switch (cmd) { 884 default: 885 return (ldi_ioctl(dkp->dk_xdf_lh[part], 886 cmd, arg, flag, credp, rvalp)); 887 case DKIOCGETWCE: 888 case DKIOCSETWCE: 889 return (EIO); 890 case DKIOCADDBAD: { 891 /* 892 * This is for ata/ide bad block handling. It is supposed 893 * to cause the driver to re-read the bad block list and 894 * alternate map after it has been updated. Our driver 895 * will refuse to attach to any disk which has a bad blocks 896 * list defined, so there really isn't much to do here. 897 */ 898 return (0); 899 } 900 case DKIOCGETDEF: { 901 /* 902 * I can't actually find any code that utilizes this ioctl, 903 * hence we're leaving it explicitly unimplemented. 904 */ 905 ASSERT("ioctl cmd unsupported by pv_cmdk: DKIOCGETDEF"); 906 return (EIO); 907 } 908 case DIOCTL_RWCMD: { 909 /* 910 * This just seems to just be an alternate interface for 911 * reading and writing the disk. Great, another way to 912 * do the same thing... 913 */ 914 return (pv_cmdk_dioctl_rwcmd(dev, arg, flag)); 915 } 916 case DKIOCINFO: { 917 dev_info_t *dip = dkp->dk_dip; 918 struct dk_cinfo info; 919 920 /* Pass on the ioctl request, save the response */ 921 if ((err = ldi_ioctl(dkp->dk_xdf_lh[part], 922 cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0) 923 return (err); 924 925 /* Update controller info */ 926 info.dki_cnum = ddi_get_instance(ddi_get_parent(dip)); 927 (void) strlcpy(info.dki_cname, 928 ddi_get_name(ddi_get_parent(dip)), sizeof (info.dki_cname)); 929 930 /* Update unit info. */ 931 if (info.dki_ctype == DKC_VBD) 932 info.dki_ctype = DKC_DIRECT; 933 info.dki_unit = instance; 934 (void) strlcpy(info.dki_dname, 935 ddi_driver_name(dip), sizeof (info.dki_dname)); 936 info.dki_addr = 1; 937 938 if (ddi_copyout(&info, (void *)arg, sizeof (info), flag)) 939 return (EFAULT); 940 return (0); 941 } 942 } /* switch (cmd) */ 943 /*NOTREACHED*/ 944 } 945 946 /*ARGSUSED*/ 947 static int 948 pv_cmdkopen(dev_t *dev_p, int flag, int otyp, cred_t *credp) 949 { 950 ldi_ident_t li; 951 dev_t dev = *dev_p; 952 int instance = XDF_DEV2UNIT(dev); 953 int part = XDF_DEV2PART(dev); 954 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 955 dev_t xdf_devt = dkp->dk_xdf_dev | part; 956 int err = 0; 957 958 if ((otyp < 0) || (otyp >= OTYPCNT)) 959 return (EINVAL); 960 961 /* allocate an ldi handle */ 962 VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0); 963 964 mutex_enter(&dkp->dk_mutex); 965 966 /* 967 * We translate all device opens (chr, blk, and lyr) into 968 * block device opens. Why? Because for all the opens that 969 * come through this driver, we only keep around one LDI handle. 970 * So that handle can only be of one open type. The reason 971 * that we choose the block interface for this is that to use 972 * the block interfaces for a device the system needs to allocatex 973 * buf_ts, which are associated with system memory which can act 974 * as a cache for device data. So normally when a block device 975 * is closed the system will ensure that all these pages get 976 * flushed out of memory. But if we were to open the device 977 * as a character device, then when we went to close the underlying 978 * device (even if we had invoked the block interfaces) any data 979 * remaining in memory wouldn't necessairly be flushed out 980 * before the device was closed. 981 */ 982 if (dkp->dk_xdf_lh[part] == NULL) { 983 ASSERT(!pv_cmdk_isopen_part(dkp, part)); 984 985 err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, 986 &dkp->dk_xdf_lh[part], li); 987 988 if (err != 0) { 989 mutex_exit(&dkp->dk_mutex); 990 ldi_ident_release(li); 991 return (err); 992 } 993 994 /* Disk devices really shouldn't clone */ 995 ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); 996 } else { 997 ldi_handle_t lh_tmp; 998 999 ASSERT(pv_cmdk_isopen_part(dkp, part)); 1000 1001 /* do ldi open/close to get flags and cred check */ 1002 err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, 1003 &lh_tmp, li); 1004 if (err != 0) { 1005 mutex_exit(&dkp->dk_mutex); 1006 ldi_ident_release(li); 1007 return (err); 1008 } 1009 1010 /* Disk devices really shouldn't clone */ 1011 ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); 1012 (void) ldi_close(lh_tmp, flag, credp); 1013 } 1014 ldi_ident_release(li); 1015 1016 dkp->dk_xdf_otyp_count[otyp][part]++; 1017 1018 mutex_exit(&dkp->dk_mutex); 1019 return (0); 1020 } 1021 1022 /*ARGSUSED*/ 1023 static int 1024 pv_cmdkclose(dev_t dev, int flag, int otyp, cred_t *credp) 1025 { 1026 int instance = XDF_DEV2UNIT(dev); 1027 int part = XDF_DEV2PART(dev); 1028 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1029 int err = 0; 1030 1031 ASSERT((otyp >= 0) && otyp < OTYPCNT); 1032 1033 /* 1034 * Sanity check that that the dev_t specified corresponds to this 1035 * driver and that the device is actually open. On debug kernels we'll 1036 * panic and on non-debug kernels we'll return failure. 1037 */ 1038 ASSERT(getmajor(dev) == pv_cmdk_major); 1039 if (getmajor(dev) != pv_cmdk_major) 1040 return (ENXIO); 1041 1042 mutex_enter(&dkp->dk_mutex); 1043 ASSERT(pv_cmdk_isopen_part(dkp, part)); 1044 if (!pv_cmdk_isopen_part(dkp, part)) { 1045 mutex_exit(&dkp->dk_mutex); 1046 return (ENXIO); 1047 } 1048 1049 ASSERT(dkp->dk_xdf_lh[part] != NULL); 1050 ASSERT(dkp->dk_xdf_otyp_count[otyp][part] > 0); 1051 if (otyp == OTYP_LYR) { 1052 dkp->dk_xdf_otyp_count[otyp][part]--; 1053 } else { 1054 dkp->dk_xdf_otyp_count[otyp][part] = 0; 1055 } 1056 1057 if (!pv_cmdk_isopen_part(dkp, part)) { 1058 err = ldi_close(dkp->dk_xdf_lh[part], flag, credp); 1059 dkp->dk_xdf_lh[part] = NULL; 1060 } 1061 1062 mutex_exit(&dkp->dk_mutex); 1063 1064 return (err); 1065 } 1066 1067 static int 1068 pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom) 1069 { 1070 struct scsi_device *scsi_device; 1071 struct tgdk_geom tgdk_geom; 1072 opaque_t ctlobjp; 1073 int err; 1074 1075 scsi_device = ddi_get_driver_private(dip); 1076 ctlobjp = scsi_device->sd_address.a_hba_tran; 1077 if ((err = CTL_IOCTL(ctlobjp, 1078 DIOCTL_GETPHYGEOM, (uintptr_t)&tgdk_geom, FKIOCTL)) != 0) 1079 return (err); 1080 1081 /* This driver won't work if this isn't true */ 1082 ASSERT(tgdk_geom.g_secsiz == XB_BSIZE); 1083 1084 pgeom->g_ncyl = tgdk_geom.g_cyl; 1085 pgeom->g_acyl = tgdk_geom.g_acyl; 1086 pgeom->g_nhead = tgdk_geom.g_head; 1087 pgeom->g_nsect = tgdk_geom.g_sec; 1088 pgeom->g_secsize = tgdk_geom.g_secsiz; 1089 pgeom->g_capacity = tgdk_geom.g_cap; 1090 pgeom->g_intrlv = 1; 1091 pgeom->g_rpm = 3600; 1092 return (0); 1093 } 1094 1095 /* 1096 * pv_cmdk_bb_check() checks for the existance of bad blocks mappings in 1097 * the alternate partition/slice. Returns B_FALSE is there are no bad 1098 * block mappins found, and B_TRUE is there are bad block mappins found. 1099 */ 1100 static boolean_t 1101 pv_cmdk_bb_check(struct pv_cmdk *dkp) 1102 { 1103 struct alts_parttbl *ap; 1104 diskaddr_t nblocks, blk; 1105 uint32_t altused, altbase, altlast; 1106 uint16_t vtoctag; 1107 int alts; 1108 1109 /* find slice with V_ALTSCTR tag */ 1110 for (alts = 0; alts < NDKMAP; alts++) { 1111 1112 if (cmlb_partinfo(dkp->dk_cmlbhandle, alts, 1113 &nblocks, &blk, NULL, &vtoctag, 0) != 0) { 1114 /* no partition table exists */ 1115 return (B_FALSE); 1116 } 1117 1118 if ((vtoctag == V_ALTSCTR) && (nblocks > 1)) 1119 break; 1120 } 1121 if (alts >= NDKMAP) 1122 return (B_FALSE); /* no V_ALTSCTR slice defined */ 1123 1124 /* read in ALTS label block */ 1125 ap = (struct alts_parttbl *)kmem_zalloc(NBPSCTR, KM_SLEEP); 1126 if (pv_cmdk_lb_rdwr(dkp->dk_dip, 1127 TG_READ, ap, blk, NBPSCTR, NULL) != 0) 1128 goto err; 1129 1130 altused = ap->alts_ent_used; /* number of BB entries */ 1131 altbase = ap->alts_ent_base; /* blk offset from begin slice */ 1132 altlast = ap->alts_ent_end; /* blk offset to last block */ 1133 1134 if ((altused == 0) || (altbase < 1) || 1135 (altbase > altlast) || (altlast >= nblocks)) 1136 goto err; 1137 1138 /* we found bad block mappins */ 1139 kmem_free(ap, NBPSCTR); 1140 return (B_TRUE); 1141 1142 err: 1143 kmem_free(ap, NBPSCTR); 1144 return (B_FALSE); 1145 } 1146 1147 /* 1148 * Autoconfiguration Routines 1149 */ 1150 static int 1151 pv_cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1152 { 1153 int instance = ddi_get_instance(dip); 1154 dev_info_t *xdf_dip = NULL; 1155 struct pv_cmdk *dkp; 1156 cmlb_geom_t pgeom; 1157 char *path; 1158 int i; 1159 1160 if (cmd != DDI_ATTACH) 1161 return (DDI_FAILURE); 1162 1163 /* 1164 * This cmdk device layers on top of an xdf device. So the first 1165 * thing we need to do is determine which xdf device instance this 1166 * cmdk instance should be layered on top of. 1167 */ 1168 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1169 (void) ddi_pathname(dip, path); 1170 for (i = 0; pv_cmdk_h2p[i].h2p_hvm_path != NULL; i++) { 1171 if (strcmp(pv_cmdk_h2p[i].h2p_hvm_path, path) == 0) 1172 break; 1173 } 1174 kmem_free(path, MAXPATHLEN); 1175 1176 if (pv_cmdk_h2p[i].h2p_hvm_path == NULL) { 1177 /* 1178 * UhOh. We don't know what xdf instance this cmdk device 1179 * should be mapped to. 1180 */ 1181 return (DDI_FAILURE); 1182 } 1183 1184 /* Check if this device exists */ 1185 xdf_dip = xdf_hvm_hold(pv_cmdk_h2p[i].h2p_pv_path); 1186 if (xdf_dip == NULL) 1187 return (DDI_FAILURE); 1188 1189 /* allocate and initialize our state structure */ 1190 (void) ddi_soft_state_zalloc(pv_cmdk_state, instance); 1191 dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1192 mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL); 1193 dkp->dk_dip = dip; 1194 dkp->dk_xdf_dip = xdf_dip; 1195 dkp->dk_xdf_dev = makedevice(ddi_driver_major(xdf_dip), 1196 XDF_MINOR(ddi_get_instance(xdf_dip), 0)); 1197 1198 ASSERT((dkp->dk_xdf_dev & XDF_PMASK) == 0); 1199 1200 /* 1201 * GROSS HACK ALERT! GROSS HACK ALERT! 1202 * 1203 * Before we can initialize the cmlb layer, we have to tell the 1204 * underlying xdf device what it's physical geometry should be. 1205 * See the block comments at the top of this file for more info. 1206 */ 1207 if ((pv_cmdk_getpgeom(dip, &pgeom) != 0) || 1208 (xdf_hvm_setpgeom(dkp->dk_xdf_dip, &pgeom) != 0)) { 1209 ddi_release_devi(dkp->dk_xdf_dip); 1210 mutex_destroy(&dkp->dk_mutex); 1211 ddi_soft_state_free(pv_cmdk_state, instance); 1212 return (DDI_FAILURE); 1213 } 1214 1215 /* create kstat for iostat(1M) */ 1216 if (xdf_kstat_create(dkp->dk_xdf_dip, "cmdk", instance) != 0) { 1217 ddi_release_devi(dkp->dk_xdf_dip); 1218 mutex_destroy(&dkp->dk_mutex); 1219 ddi_soft_state_free(pv_cmdk_state, instance); 1220 return (DDI_FAILURE); 1221 } 1222 1223 /* 1224 * Force the xdf front end driver to connect to the backend. From 1225 * the solaris device tree perspective, the xdf driver devinfo node 1226 * is already in the ATTACHED state. (Otherwise xdf_hvm_hold() 1227 * would not have returned a dip.) But this doesn't mean that the 1228 * xdf device has actually established a connection to it's back 1229 * end driver. For us to be able to access the xdf device it needs 1230 * to be connected. There are two ways to force the xdf driver to 1231 * connect to the backend device. 1232 */ 1233 if (xdf_hvm_connect(dkp->dk_xdf_dip) != 0) { 1234 cmn_err(CE_WARN, 1235 "pv driver failed to connect: %s", 1236 pv_cmdk_h2p[i].h2p_pv_path); 1237 xdf_kstat_delete(dkp->dk_xdf_dip); 1238 ddi_release_devi(dkp->dk_xdf_dip); 1239 mutex_destroy(&dkp->dk_mutex); 1240 ddi_soft_state_free(pv_cmdk_state, instance); 1241 return (DDI_FAILURE); 1242 } 1243 1244 /* 1245 * Initalize cmlb. Note that for partition information cmlb 1246 * will access the underly xdf disk device directly via 1247 * pv_cmdk_lb_rdwr() and pv_cmdk_lb_getinfo(). There are no 1248 * layered driver handles associated with this access because 1249 * it is a direct disk access that doesn't go through 1250 * any of the device nodes exported by the xdf device (since 1251 * all exported device nodes only reflect the portion of 1252 * the device visible via the partition/slice that the node 1253 * is associated with.) So while not observable via the LDI, 1254 * this direct disk access is ok since we're actually holding 1255 * the target device. 1256 */ 1257 cmlb_alloc_handle((cmlb_handle_t *)&dkp->dk_cmlbhandle); 1258 if (cmlb_attach(dkp->dk_dip, &pv_cmdk_lb_ops, 1259 DTYPE_DIRECT, /* device_type */ 1260 0, /* not removable */ 1261 0, /* not hot pluggable */ 1262 DDI_NT_BLOCK, 1263 CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* mimic cmdk */ 1264 dkp->dk_cmlbhandle, 0) != 0) { 1265 cmlb_free_handle(&dkp->dk_cmlbhandle); 1266 xdf_kstat_delete(dkp->dk_xdf_dip); 1267 ddi_release_devi(dkp->dk_xdf_dip); 1268 mutex_destroy(&dkp->dk_mutex); 1269 ddi_soft_state_free(pv_cmdk_state, instance); 1270 return (DDI_FAILURE); 1271 } 1272 1273 if (pv_cmdk_bb_check(dkp)) { 1274 cmn_err(CE_WARN, 1275 "pv cmdk disks with bad blocks are unsupported: %s", 1276 pv_cmdk_h2p[i].h2p_hvm_path); 1277 1278 cmlb_detach(dkp->dk_cmlbhandle, 0); 1279 cmlb_free_handle(&dkp->dk_cmlbhandle); 1280 xdf_kstat_delete(dkp->dk_xdf_dip); 1281 ddi_release_devi(dkp->dk_xdf_dip); 1282 mutex_destroy(&dkp->dk_mutex); 1283 ddi_soft_state_free(pv_cmdk_state, instance); 1284 return (DDI_FAILURE); 1285 } 1286 1287 /* setup devid string */ 1288 pv_cmdk_devid_setup(dkp); 1289 1290 /* Calling validate will create minor nodes according to disk label */ 1291 (void) cmlb_validate(dkp->dk_cmlbhandle, 0, 0); 1292 1293 /* 1294 * Add a zero-length attribute to tell the world we support 1295 * kernel ioctls (for layered drivers). 1296 */ 1297 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 1298 DDI_KERNEL_IOCTL, NULL, 0); 1299 1300 /* Have the system report any newly created device nodes */ 1301 ddi_report_dev(dip); 1302 1303 return (DDI_SUCCESS); 1304 } 1305 1306 static int 1307 pv_cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1308 { 1309 int instance = ddi_get_instance(dip); 1310 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1311 1312 if (cmd != DDI_DETACH) 1313 return (DDI_FAILURE); 1314 1315 ASSERT(MUTEX_NOT_HELD(&dkp->dk_mutex)); 1316 1317 ddi_devid_unregister(dip); 1318 if (dkp->dk_devid) 1319 ddi_devid_free(dkp->dk_devid); 1320 cmlb_detach(dkp->dk_cmlbhandle, 0); 1321 cmlb_free_handle(&dkp->dk_cmlbhandle); 1322 mutex_destroy(&dkp->dk_mutex); 1323 xdf_kstat_delete(dkp->dk_xdf_dip); 1324 ddi_release_devi(dkp->dk_xdf_dip); 1325 ddi_soft_state_free(pv_cmdk_state, instance); 1326 ddi_prop_remove_all(dip); 1327 1328 return (DDI_SUCCESS); 1329 } 1330 1331 /*ARGSUSED*/ 1332 static int 1333 pv_cmdk_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 1334 void **result) 1335 { 1336 dev_t dev = (dev_t)arg; 1337 int instance = XDF_DEV2UNIT(dev); 1338 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1339 1340 switch (infocmd) { 1341 case DDI_INFO_DEVT2DEVINFO: 1342 if (dkp == NULL) 1343 return (DDI_FAILURE); 1344 *result = (void *)dkp->dk_dip; 1345 break; 1346 case DDI_INFO_DEVT2INSTANCE: 1347 *result = (void *)(intptr_t)instance; 1348 break; 1349 default: 1350 return (DDI_FAILURE); 1351 } 1352 return (DDI_SUCCESS); 1353 } 1354 1355 static int 1356 pv_cmdk_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1357 int flags, char *name, caddr_t valuep, int *lengthp) 1358 { 1359 int instance = ddi_get_instance(dip); 1360 struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); 1361 dev_info_t *xdf_dip; 1362 dev_t xdf_devt; 1363 int err; 1364 1365 /* 1366 * Sanity check that if a dev_t or dip were specified that they 1367 * correspond to this device driver. On debug kernels we'll 1368 * panic and on non-debug kernels we'll return failure. 1369 */ 1370 ASSERT(ddi_driver_major(dip) == pv_cmdk_major); 1371 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == pv_cmdk_major)); 1372 if ((ddi_driver_major(dip) != pv_cmdk_major) || 1373 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != pv_cmdk_major))) 1374 return (DDI_PROP_NOT_FOUND); 1375 1376 /* 1377 * This property lookup might be associated with a device node 1378 * that is not yet attached, if so pass it onto ddi_prop_op(). 1379 */ 1380 if (dkp == NULL) 1381 return (ddi_prop_op(dev, dip, prop_op, flags, 1382 name, valuep, lengthp)); 1383 1384 /* 1385 * Make sure we only lookup static properties. 1386 * 1387 * If there are static properties of the underlying xdf driver 1388 * that we want to mirror, then we'll have to explicity look them 1389 * up and define them during attach. There are a few reasons 1390 * for this. Most importantly, most static properties are typed 1391 * and all dynamic properties are untyped, ie, for dynamic 1392 * properties the caller must know the type of the property and 1393 * how to interpret the value of the property. the prop_op drivedr 1394 * entry point is only designed for returning dynamic/untyped 1395 * properties, so if we were to attempt to lookup and pass back 1396 * static properties of the underlying device here then we would 1397 * be losing the type information for those properties. Another 1398 * reason we don't want to pass on static property requests is that 1399 * static properties are enumerable in the device tree, where as 1400 * dynamic ones are not. 1401 */ 1402 flags |= DDI_PROP_DYNAMIC; 1403 1404 /* 1405 * We can't use the ldi here to access the underlying device because 1406 * the ldi actually opens the device, and that open might fail if the 1407 * device has already been opened with the FEXCL flag. If we used 1408 * the ldi here, it would also be possible for some other caller 1409 * to try open the device with the FEXCL flag and get a failure 1410 * back because we have it open to do a property query. 1411 * 1412 * Instad we'll grab a hold on the target dip and query the 1413 * property directly. 1414 */ 1415 mutex_enter(&dkp->dk_mutex); 1416 1417 if ((xdf_dip = dkp->dk_xdf_dip) == NULL) { 1418 mutex_exit(&dkp->dk_mutex); 1419 return (DDI_PROP_NOT_FOUND); 1420 } 1421 e_ddi_hold_devi(xdf_dip); 1422 1423 /* figure out the dev_t we're going to pass on down */ 1424 if (dev == DDI_DEV_T_ANY) { 1425 xdf_devt = DDI_DEV_T_ANY; 1426 } else { 1427 xdf_devt = dkp->dk_xdf_dev | XDF_DEV2PART(dev); 1428 } 1429 1430 mutex_exit(&dkp->dk_mutex); 1431 1432 /* 1433 * Cdev_prop_op() is not a public interface, and normally the caller 1434 * is required to make sure that the target driver actually implements 1435 * this interface before trying to invoke it. In this case we know 1436 * that we're always accessing the xdf driver and it does have this 1437 * interface defined, so we can skip the check. 1438 */ 1439 err = cdev_prop_op(xdf_devt, xdf_dip, 1440 prop_op, flags, name, valuep, lengthp); 1441 ddi_release_devi(xdf_dip); 1442 return (err); 1443 } 1444 1445 /* 1446 * Device driver ops vector 1447 */ 1448 static struct cb_ops pv_cmdk_cb_ops = { 1449 pv_cmdkopen, /* open */ 1450 pv_cmdkclose, /* close */ 1451 pv_cmdkstrategy, /* strategy */ 1452 nodev, /* print */ 1453 pv_cmdkdump, /* dump */ 1454 pv_cmdkread, /* read */ 1455 pv_cmdkwrite, /* write */ 1456 pv_cmdkioctl, /* ioctl */ 1457 nodev, /* devmap */ 1458 nodev, /* mmap */ 1459 nodev, /* segmap */ 1460 nochpoll, /* poll */ 1461 pv_cmdk_prop_op, /* cb_prop_op */ 1462 0, /* streamtab */ 1463 D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */ 1464 CB_REV, /* cb_rev */ 1465 pv_cmdkaread, /* async read */ 1466 pv_cmdkawrite /* async write */ 1467 }; 1468 1469 struct dev_ops pv_cmdk_ops = { 1470 DEVO_REV, /* devo_rev, */ 1471 0, /* refcnt */ 1472 pv_cmdk_getinfo, /* info */ 1473 nulldev, /* identify */ 1474 nulldev, /* probe */ 1475 pv_cmdkattach, /* attach */ 1476 pv_cmdkdetach, /* detach */ 1477 nodev, /* reset */ 1478 &pv_cmdk_cb_ops, /* driver operations */ 1479 (struct bus_ops *)0 /* bus operations */ 1480 }; 1481 1482 /* 1483 * Module linkage information for the kernel. 1484 */ 1485 static struct modldrv modldrv = { 1486 &mod_driverops, /* Type of module. This one is a driver */ 1487 "PV Common Direct Access Disk", 1488 &pv_cmdk_ops, /* driver ops */ 1489 }; 1490 1491 static struct modlinkage modlinkage = { 1492 MODREV_1, (void *)&modldrv, NULL 1493 }; 1494 1495 int 1496 _init(void) 1497 { 1498 int rval; 1499 1500 if ((pv_cmdk_major = ddi_name_to_major("cmdk")) == (major_t)-1) 1501 return (EINVAL); 1502 1503 /* 1504 * In general ide usually supports 4 disk devices, this same 1505 * limitation also applies to software emulating ide devices. 1506 * so by default we pre-allocate 4 cmdk soft state structures. 1507 */ 1508 if ((rval = ddi_soft_state_init(&pv_cmdk_state, 1509 sizeof (struct pv_cmdk), PV_CMDK_NODES)) != 0) 1510 return (rval); 1511 1512 /* 1513 * Currently we only support qemu as the backing hardware emulator 1514 * for cmdk devices. 1515 */ 1516 pv_cmdk_h2p = pv_cmdk_h2p_xen_qemu; 1517 1518 /* Install our module */ 1519 if ((rval = mod_install(&modlinkage)) != 0) { 1520 ddi_soft_state_fini(&pv_cmdk_state); 1521 return (rval); 1522 } 1523 1524 return (0); 1525 } 1526 1527 int 1528 _info(struct modinfo *modinfop) 1529 { 1530 return (mod_info(&modlinkage, modinfop)); 1531 } 1532 1533 int 1534 _fini(void) 1535 { 1536 int rval; 1537 if ((rval = mod_remove(&modlinkage)) != 0) 1538 return (rval); 1539 ddi_soft_state_fini(&pv_cmdk_state); 1540 return (0); 1541 } 1542