11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23d84f0041SAlexandre Chartre * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo /* 281ae08745Sheppo * Virtual disk server 291ae08745Sheppo */ 301ae08745Sheppo 311ae08745Sheppo 321ae08745Sheppo #include <sys/types.h> 331ae08745Sheppo #include <sys/conf.h> 344bac2208Snarayan #include <sys/crc32.h> 351ae08745Sheppo #include <sys/ddi.h> 361ae08745Sheppo #include <sys/dkio.h> 371ae08745Sheppo #include <sys/file.h> 3817cadca8Slm66018 #include <sys/fs/hsfs_isospec.h> 391ae08745Sheppo #include <sys/mdeg.h> 402f5224aeSachartre #include <sys/mhd.h> 411ae08745Sheppo #include <sys/modhash.h> 421ae08745Sheppo #include <sys/note.h> 431ae08745Sheppo #include <sys/pathname.h> 44205eeb1aSlm66018 #include <sys/sdt.h> 451ae08745Sheppo #include <sys/sunddi.h> 461ae08745Sheppo #include <sys/sunldi.h> 471ae08745Sheppo #include <sys/sysmacros.h> 481ae08745Sheppo #include <sys/vio_common.h> 4917cadca8Slm66018 #include <sys/vio_util.h> 501ae08745Sheppo #include <sys/vdsk_mailbox.h> 511ae08745Sheppo #include <sys/vdsk_common.h> 521ae08745Sheppo #include <sys/vtoc.h> 533c96341aSnarayan #include <sys/vfs.h> 543c96341aSnarayan #include <sys/stat.h> 5587a7269eSachartre #include <sys/scsi/impl/uscsi.h> 56bbfa0259Sha137994 #include <sys/ontrap.h> 57690555a1Sachartre #include <vm/seg_map.h> 581ae08745Sheppo 59342440ecSPrasad Singamsetty #define ONE_MEGABYTE (1ULL << 20) 60342440ecSPrasad Singamsetty #define ONE_GIGABYTE (1ULL << 30) 61bae9e67eSachartre #define ONE_TERABYTE (1ULL << 40) 62bae9e67eSachartre 631ae08745Sheppo /* Virtual disk server initialization flags */ 64d10e4ef2Snarayan #define VDS_LDI 0x01 65d10e4ef2Snarayan #define VDS_MDEG 0x02 661ae08745Sheppo 671ae08745Sheppo /* Virtual disk server tunable parameters */ 683c96341aSnarayan #define VDS_RETRIES 5 693c96341aSnarayan #define VDS_LDC_DELAY 1000 /* 1 msecs */ 703c96341aSnarayan #define VDS_DEV_DELAY 10000000 /* 10 secs */ 711ae08745Sheppo #define VDS_NCHAINS 32 721ae08745Sheppo 731ae08745Sheppo /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 741ae08745Sheppo #define VDS_NAME "virtual-disk-server" 751ae08745Sheppo 761ae08745Sheppo #define VD_NAME "vd" 771ae08745Sheppo #define VD_VOLUME_NAME "vdisk" 781ae08745Sheppo #define VD_ASCIILABEL "Virtual Disk" 791ae08745Sheppo 801ae08745Sheppo #define VD_CHANNEL_ENDPOINT "channel-endpoint" 811ae08745Sheppo #define VD_ID_PROP "id" 821ae08745Sheppo #define VD_BLOCK_DEVICE_PROP "vds-block-device" 83047ba61eSachartre #define VD_BLOCK_DEVICE_OPTS "vds-block-device-opts" 84445b4c2eSsb155480 #define VD_REG_PROP "reg" 851ae08745Sheppo 861ae08745Sheppo /* Virtual disk initialization flags */ 873c96341aSnarayan #define VD_DISK_READY 0x01 883c96341aSnarayan #define VD_LOCKING 0x02 893c96341aSnarayan #define VD_LDC 0x04 903c96341aSnarayan #define VD_DRING 0x08 913c96341aSnarayan #define VD_SID 0x10 923c96341aSnarayan #define VD_SEQ_NUM 0x20 93047ba61eSachartre #define VD_SETUP_ERROR 0x40 941ae08745Sheppo 9587a7269eSachartre /* Number of backup labels */ 961aff8f07SAlexandre Chartre #define VD_DSKIMG_NUM_BACKUP 5 9787a7269eSachartre 9887a7269eSachartre /* Timeout for SCSI I/O */ 9987a7269eSachartre #define VD_SCSI_RDWR_TIMEOUT 30 /* 30 secs */ 10087a7269eSachartre 10183990c4aSAlexandre Chartre /* 10283990c4aSAlexandre Chartre * Default number of threads for the I/O queue. In many cases, we will not 10383990c4aSAlexandre Chartre * receive more than 8 I/O requests at the same time. However there are 10483990c4aSAlexandre Chartre * cases (for example during the OS installation) where we can have a lot 10583990c4aSAlexandre Chartre * more (up to the limit of the DRing size). 10683990c4aSAlexandre Chartre */ 10783990c4aSAlexandre Chartre #define VD_IOQ_NTHREADS 8 10883990c4aSAlexandre Chartre 109edcc0754Sachartre /* Maximum number of logical partitions */ 110edcc0754Sachartre #define VD_MAXPART (NDKMAP + 1) 111edcc0754Sachartre 1121ae08745Sheppo /* 1131ae08745Sheppo * By Solaris convention, slice/partition 2 represents the entire disk; 1141ae08745Sheppo * unfortunately, this convention does not appear to be codified. 1151ae08745Sheppo */ 1161ae08745Sheppo #define VD_ENTIRE_DISK_SLICE 2 1171ae08745Sheppo 118bae9e67eSachartre /* Logical block address for EFI */ 119bae9e67eSachartre #define VD_EFI_LBA_GPT 1 /* LBA of the GPT */ 120bae9e67eSachartre #define VD_EFI_LBA_GPE 2 /* LBA of the GPE */ 121bae9e67eSachartre 122*65908c77Syu, larry liu - Sun Microsystems - Beijing China #define VD_EFI_DEV_SET(dev, vdsk, ioctl) \ 123*65908c77Syu, larry liu - Sun Microsystems - Beijing China VDSK_EFI_DEV_SET(dev, vdsk, ioctl, \ 124*65908c77Syu, larry liu - Sun Microsystems - Beijing China (vdsk)->vdisk_bsize, (vdsk)->vdisk_size) 125*65908c77Syu, larry liu - Sun Microsystems - Beijing China 12683990c4aSAlexandre Chartre /* 12783990c4aSAlexandre Chartre * Flags defining the behavior for flushing asynchronous writes used to 12883990c4aSAlexandre Chartre * performed some write I/O requests. 12983990c4aSAlexandre Chartre * 13083990c4aSAlexandre Chartre * The VD_AWFLUSH_IMMEDIATE enables immediate flushing of asynchronous 13183990c4aSAlexandre Chartre * writes. This ensures that data are committed to the backend when the I/O 13283990c4aSAlexandre Chartre * request reply is sent to the guest domain so this prevents any data to 13383990c4aSAlexandre Chartre * be lost in case a service domain unexpectedly crashes. 13483990c4aSAlexandre Chartre * 13583990c4aSAlexandre Chartre * The flag VD_AWFLUSH_DEFER indicates that flushing is deferred to another 13683990c4aSAlexandre Chartre * thread while the request is immediatly marked as completed. In that case, 13783990c4aSAlexandre Chartre * a guest domain can a receive a reply that its write request is completed 13883990c4aSAlexandre Chartre * while data haven't been flushed to disk yet. 13983990c4aSAlexandre Chartre * 14083990c4aSAlexandre Chartre * Flags VD_AWFLUSH_IMMEDIATE and VD_AWFLUSH_DEFER are mutually exclusive. 14183990c4aSAlexandre Chartre */ 14283990c4aSAlexandre Chartre #define VD_AWFLUSH_IMMEDIATE 0x01 /* immediate flushing */ 14383990c4aSAlexandre Chartre #define VD_AWFLUSH_DEFER 0x02 /* defer flushing */ 14483990c4aSAlexandre Chartre #define VD_AWFLUSH_GROUP 0x04 /* group requests before flushing */ 14583990c4aSAlexandre Chartre 1468fce2fd6Sachartre /* Driver types */ 1478fce2fd6Sachartre typedef enum vd_driver { 1488fce2fd6Sachartre VD_DRIVER_UNKNOWN = 0, /* driver type unknown */ 1498fce2fd6Sachartre VD_DRIVER_DISK, /* disk driver */ 1508fce2fd6Sachartre VD_DRIVER_VOLUME /* volume driver */ 1518fce2fd6Sachartre } vd_driver_t; 1528fce2fd6Sachartre 1538fce2fd6Sachartre #define VD_DRIVER_NAME_LEN 64 1548fce2fd6Sachartre 1558fce2fd6Sachartre #define VDS_NUM_DRIVERS (sizeof (vds_driver_types) / sizeof (vd_driver_type_t)) 1568fce2fd6Sachartre 1578fce2fd6Sachartre typedef struct vd_driver_type { 1588fce2fd6Sachartre char name[VD_DRIVER_NAME_LEN]; /* driver name */ 1598fce2fd6Sachartre vd_driver_t type; /* driver type (disk or volume) */ 1608fce2fd6Sachartre } vd_driver_type_t; 1618fce2fd6Sachartre 1628fce2fd6Sachartre /* 1638fce2fd6Sachartre * There is no reliable way to determine if a device is representing a disk 1648fce2fd6Sachartre * or a volume, especially with pseudo devices. So we maintain a list of well 1658fce2fd6Sachartre * known drivers and the type of device they represent (either a disk or a 1668fce2fd6Sachartre * volume). 1678fce2fd6Sachartre * 1688fce2fd6Sachartre * The list can be extended by adding a "driver-type-list" entry in vds.conf 1698fce2fd6Sachartre * with the following syntax: 1708fce2fd6Sachartre * 1718fce2fd6Sachartre * driver-type-list="<driver>:<type>", ... ,"<driver>:<type>"; 1728fce2fd6Sachartre * 1738fce2fd6Sachartre * Where: 1748fce2fd6Sachartre * <driver> is the name of a driver (limited to 64 characters) 1758fce2fd6Sachartre * <type> is either the string "disk" or "volume" 1768fce2fd6Sachartre * 1778fce2fd6Sachartre * Invalid entries in "driver-type-list" will be ignored. 1788fce2fd6Sachartre * 1798fce2fd6Sachartre * For example, the following line in vds.conf: 1808fce2fd6Sachartre * 1818fce2fd6Sachartre * driver-type-list="foo:disk","bar:volume"; 1828fce2fd6Sachartre * 1838fce2fd6Sachartre * defines that "foo" is a disk driver, and driver "bar" is a volume driver. 1848fce2fd6Sachartre * 1858fce2fd6Sachartre * When a list is defined in vds.conf, it is checked before the built-in list 1868fce2fd6Sachartre * (vds_driver_types[]) so that any definition from this list can be overriden 1878fce2fd6Sachartre * using vds.conf. 1888fce2fd6Sachartre */ 1898fce2fd6Sachartre vd_driver_type_t vds_driver_types[] = { 1908fce2fd6Sachartre { "dad", VD_DRIVER_DISK }, /* Solaris */ 1918fce2fd6Sachartre { "did", VD_DRIVER_DISK }, /* Sun Cluster */ 19211f54b6eSAlexandre Chartre { "dlmfdrv", VD_DRIVER_DISK }, /* Hitachi HDLM */ 1935b98b509Sachartre { "emcp", VD_DRIVER_DISK }, /* EMC Powerpath */ 1948fce2fd6Sachartre { "lofi", VD_DRIVER_VOLUME }, /* Solaris */ 1958fce2fd6Sachartre { "md", VD_DRIVER_VOLUME }, /* Solaris - SVM */ 1968fce2fd6Sachartre { "sd", VD_DRIVER_DISK }, /* Solaris */ 1978fce2fd6Sachartre { "ssd", VD_DRIVER_DISK }, /* Solaris */ 1988fce2fd6Sachartre { "vdc", VD_DRIVER_DISK }, /* Solaris */ 1998fce2fd6Sachartre { "vxdmp", VD_DRIVER_DISK }, /* Veritas */ 2008fce2fd6Sachartre { "vxio", VD_DRIVER_VOLUME }, /* Veritas - VxVM */ 2018fce2fd6Sachartre { "zfs", VD_DRIVER_VOLUME } /* Solaris */ 2028fce2fd6Sachartre }; 2038fce2fd6Sachartre 2041ae08745Sheppo /* Return a cpp token as a string */ 2051ae08745Sheppo #define STRINGIZE(token) #token 2061ae08745Sheppo 2071ae08745Sheppo /* 2081ae08745Sheppo * Print a message prefixed with the current function name to the message log 2091ae08745Sheppo * (and optionally to the console for verbose boots); these macros use cpp's 2101ae08745Sheppo * concatenation of string literals and C99 variable-length-argument-list 2111ae08745Sheppo * macros 2121ae08745Sheppo */ 2131ae08745Sheppo #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 2141ae08745Sheppo #define _PRN(format, ...) \ 2151ae08745Sheppo cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 2161ae08745Sheppo 2171ae08745Sheppo /* Return a pointer to the "i"th vdisk dring element */ 2181ae08745Sheppo #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 2191ae08745Sheppo (vd->dring + (i)*vd->descriptor_size)) 2201ae08745Sheppo 2211ae08745Sheppo /* Return the virtual disk client's type as a string (for use in messages) */ 2221ae08745Sheppo #define VD_CLIENT(vd) \ 2231ae08745Sheppo (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 224f0ca1d9aSsb155480 (((vd)->xfer_mode == VIO_DRING_MODE_V1_0) ? "dring client" : \ 2251ae08745Sheppo (((vd)->xfer_mode == 0) ? "null client" : \ 2261ae08745Sheppo "unsupported client"))) 2271ae08745Sheppo 2281aff8f07SAlexandre Chartre /* Read disk label from a disk image */ 2291aff8f07SAlexandre Chartre #define VD_DSKIMG_LABEL_READ(vd, labelp) \ 2301aff8f07SAlexandre Chartre vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)labelp, \ 231690555a1Sachartre 0, sizeof (struct dk_label)) 232690555a1Sachartre 2331aff8f07SAlexandre Chartre /* Write disk label to a disk image */ 2341aff8f07SAlexandre Chartre #define VD_DSKIMG_LABEL_WRITE(vd, labelp) \ 2351aff8f07SAlexandre Chartre vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)labelp, \ 236690555a1Sachartre 0, sizeof (struct dk_label)) 237690555a1Sachartre 2381aff8f07SAlexandre Chartre /* Identify if a backend is a disk image */ 2391aff8f07SAlexandre Chartre #define VD_DSKIMG(vd) ((vd)->vdisk_type == VD_DISK_TYPE_DISK && \ 2401aff8f07SAlexandre Chartre ((vd)->file || (vd)->volume)) 2411aff8f07SAlexandre Chartre 24283990c4aSAlexandre Chartre /* Next index in a write queue */ 24383990c4aSAlexandre Chartre #define VD_WRITE_INDEX_NEXT(vd, id) \ 24483990c4aSAlexandre Chartre ((((id) + 1) >= vd->dring_len)? 0 : (id) + 1) 24583990c4aSAlexandre Chartre 2462f5224aeSachartre /* Message for disk access rights reset failure */ 2472f5224aeSachartre #define VD_RESET_ACCESS_FAILURE_MSG \ 2482f5224aeSachartre "Fail to reset disk access rights for disk %s" 2492f5224aeSachartre 250445b4c2eSsb155480 /* 251445b4c2eSsb155480 * Specification of an MD node passed to the MDEG to filter any 252445b4c2eSsb155480 * 'vport' nodes that do not belong to the specified node. This 253445b4c2eSsb155480 * template is copied for each vds instance and filled in with 254445b4c2eSsb155480 * the appropriate 'cfg-handle' value before being passed to the MDEG. 255445b4c2eSsb155480 */ 256445b4c2eSsb155480 static mdeg_prop_spec_t vds_prop_template[] = { 257445b4c2eSsb155480 { MDET_PROP_STR, "name", VDS_NAME }, 258445b4c2eSsb155480 { MDET_PROP_VAL, "cfg-handle", NULL }, 259445b4c2eSsb155480 { MDET_LIST_END, NULL, NULL } 260445b4c2eSsb155480 }; 261445b4c2eSsb155480 262445b4c2eSsb155480 #define VDS_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 263445b4c2eSsb155480 264445b4c2eSsb155480 /* 265445b4c2eSsb155480 * Matching criteria passed to the MDEG to register interest 266445b4c2eSsb155480 * in changes to 'virtual-device-port' nodes identified by their 267445b4c2eSsb155480 * 'id' property. 268445b4c2eSsb155480 */ 269445b4c2eSsb155480 static md_prop_match_t vd_prop_match[] = { 270445b4c2eSsb155480 { MDET_PROP_VAL, VD_ID_PROP }, 271445b4c2eSsb155480 { MDET_LIST_END, NULL } 272445b4c2eSsb155480 }; 273445b4c2eSsb155480 274445b4c2eSsb155480 static mdeg_node_match_t vd_match = {"virtual-device-port", 275445b4c2eSsb155480 vd_prop_match}; 276445b4c2eSsb155480 277047ba61eSachartre /* 278047ba61eSachartre * Options for the VD_BLOCK_DEVICE_OPTS property. 279047ba61eSachartre */ 280047ba61eSachartre #define VD_OPT_RDONLY 0x1 /* read-only */ 281047ba61eSachartre #define VD_OPT_SLICE 0x2 /* single slice */ 282047ba61eSachartre #define VD_OPT_EXCLUSIVE 0x4 /* exclusive access */ 283047ba61eSachartre 284047ba61eSachartre #define VD_OPTION_NLEN 128 285047ba61eSachartre 286047ba61eSachartre typedef struct vd_option { 287047ba61eSachartre char vdo_name[VD_OPTION_NLEN]; 288047ba61eSachartre uint64_t vdo_value; 289047ba61eSachartre } vd_option_t; 290047ba61eSachartre 291047ba61eSachartre vd_option_t vd_bdev_options[] = { 292047ba61eSachartre { "ro", VD_OPT_RDONLY }, 293047ba61eSachartre { "slice", VD_OPT_SLICE }, 294047ba61eSachartre { "excl", VD_OPT_EXCLUSIVE } 295047ba61eSachartre }; 296047ba61eSachartre 2971ae08745Sheppo /* Debugging macros */ 2981ae08745Sheppo #ifdef DEBUG 2993af08d82Slm66018 3003af08d82Slm66018 static int vd_msglevel = 0; 3013af08d82Slm66018 3021ae08745Sheppo #define PR0 if (vd_msglevel > 0) PRN 3031ae08745Sheppo #define PR1 if (vd_msglevel > 1) PRN 3041ae08745Sheppo #define PR2 if (vd_msglevel > 2) PRN 3051ae08745Sheppo 3061ae08745Sheppo #define VD_DUMP_DRING_ELEM(elem) \ 3073c96341aSnarayan PR0("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 3081ae08745Sheppo elem->hdr.dstate, \ 3091ae08745Sheppo elem->payload.operation, \ 3101ae08745Sheppo elem->payload.status, \ 3111ae08745Sheppo elem->payload.nbytes, \ 3121ae08745Sheppo elem->payload.addr, \ 3131ae08745Sheppo elem->payload.ncookies); 3141ae08745Sheppo 3153af08d82Slm66018 char * 3163af08d82Slm66018 vd_decode_state(int state) 3173af08d82Slm66018 { 3183af08d82Slm66018 char *str; 3193af08d82Slm66018 3203af08d82Slm66018 #define CASE_STATE(_s) case _s: str = #_s; break; 3213af08d82Slm66018 3223af08d82Slm66018 switch (state) { 3233af08d82Slm66018 CASE_STATE(VD_STATE_INIT) 3243af08d82Slm66018 CASE_STATE(VD_STATE_VER) 3253af08d82Slm66018 CASE_STATE(VD_STATE_ATTR) 3263af08d82Slm66018 CASE_STATE(VD_STATE_DRING) 3273af08d82Slm66018 CASE_STATE(VD_STATE_RDX) 3283af08d82Slm66018 CASE_STATE(VD_STATE_DATA) 3293af08d82Slm66018 default: str = "unknown"; break; 3303af08d82Slm66018 } 3313af08d82Slm66018 3323af08d82Slm66018 #undef CASE_STATE 3333af08d82Slm66018 3343af08d82Slm66018 return (str); 3353af08d82Slm66018 } 3363af08d82Slm66018 3373af08d82Slm66018 void 3383af08d82Slm66018 vd_decode_tag(vio_msg_t *msg) 3393af08d82Slm66018 { 3403af08d82Slm66018 char *tstr, *sstr, *estr; 3413af08d82Slm66018 3423af08d82Slm66018 #define CASE_TYPE(_s) case _s: tstr = #_s; break; 3433af08d82Slm66018 3443af08d82Slm66018 switch (msg->tag.vio_msgtype) { 3453af08d82Slm66018 CASE_TYPE(VIO_TYPE_CTRL) 3463af08d82Slm66018 CASE_TYPE(VIO_TYPE_DATA) 3473af08d82Slm66018 CASE_TYPE(VIO_TYPE_ERR) 3483af08d82Slm66018 default: tstr = "unknown"; break; 3493af08d82Slm66018 } 3503af08d82Slm66018 3513af08d82Slm66018 #undef CASE_TYPE 3523af08d82Slm66018 3533af08d82Slm66018 #define CASE_SUBTYPE(_s) case _s: sstr = #_s; break; 3543af08d82Slm66018 3553af08d82Slm66018 switch (msg->tag.vio_subtype) { 3563af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_INFO) 3573af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_ACK) 3583af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_NACK) 3593af08d82Slm66018 default: sstr = "unknown"; break; 3603af08d82Slm66018 } 3613af08d82Slm66018 3623af08d82Slm66018 #undef CASE_SUBTYPE 3633af08d82Slm66018 3643af08d82Slm66018 #define CASE_ENV(_s) case _s: estr = #_s; break; 3653af08d82Slm66018 3663af08d82Slm66018 switch (msg->tag.vio_subtype_env) { 3673af08d82Slm66018 CASE_ENV(VIO_VER_INFO) 3683af08d82Slm66018 CASE_ENV(VIO_ATTR_INFO) 3693af08d82Slm66018 CASE_ENV(VIO_DRING_REG) 3703af08d82Slm66018 CASE_ENV(VIO_DRING_UNREG) 3713af08d82Slm66018 CASE_ENV(VIO_RDX) 3723af08d82Slm66018 CASE_ENV(VIO_PKT_DATA) 3733af08d82Slm66018 CASE_ENV(VIO_DESC_DATA) 3743af08d82Slm66018 CASE_ENV(VIO_DRING_DATA) 3753af08d82Slm66018 default: estr = "unknown"; break; 3763af08d82Slm66018 } 3773af08d82Slm66018 3783af08d82Slm66018 #undef CASE_ENV 3793af08d82Slm66018 3803af08d82Slm66018 PR1("(%x/%x/%x) message : (%s/%s/%s)", 3813af08d82Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 3823af08d82Slm66018 msg->tag.vio_subtype_env, tstr, sstr, estr); 3833af08d82Slm66018 } 3843af08d82Slm66018 3851ae08745Sheppo #else /* !DEBUG */ 3863af08d82Slm66018 3871ae08745Sheppo #define PR0(...) 3881ae08745Sheppo #define PR1(...) 3891ae08745Sheppo #define PR2(...) 3901ae08745Sheppo 3911ae08745Sheppo #define VD_DUMP_DRING_ELEM(elem) 3921ae08745Sheppo 3933af08d82Slm66018 #define vd_decode_state(_s) (NULL) 3943af08d82Slm66018 #define vd_decode_tag(_s) (NULL) 3953af08d82Slm66018 3961ae08745Sheppo #endif /* DEBUG */ 3971ae08745Sheppo 3981ae08745Sheppo 399d10e4ef2Snarayan /* 400d10e4ef2Snarayan * Soft state structure for a vds instance 401d10e4ef2Snarayan */ 4021ae08745Sheppo typedef struct vds { 4031ae08745Sheppo uint_t initialized; /* driver inst initialization flags */ 4041ae08745Sheppo dev_info_t *dip; /* driver inst devinfo pointer */ 4051ae08745Sheppo ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 4061ae08745Sheppo mod_hash_t *vd_table; /* table of virtual disks served */ 407445b4c2eSsb155480 mdeg_node_spec_t *ispecp; /* mdeg node specification */ 4081ae08745Sheppo mdeg_handle_t mdeg; /* handle for MDEG operations */ 4098fce2fd6Sachartre vd_driver_type_t *driver_types; /* extra driver types (from vds.conf) */ 4108fce2fd6Sachartre int num_drivers; /* num of extra driver types */ 4111ae08745Sheppo } vds_t; 4121ae08745Sheppo 413d10e4ef2Snarayan /* 414d10e4ef2Snarayan * Types of descriptor-processing tasks 415d10e4ef2Snarayan */ 416d10e4ef2Snarayan typedef enum vd_task_type { 417d10e4ef2Snarayan VD_NONFINAL_RANGE_TASK, /* task for intermediate descriptor in range */ 418d10e4ef2Snarayan VD_FINAL_RANGE_TASK, /* task for last in a range of descriptors */ 419d10e4ef2Snarayan } vd_task_type_t; 420d10e4ef2Snarayan 421d10e4ef2Snarayan /* 422d10e4ef2Snarayan * Structure describing the task for processing a descriptor 423d10e4ef2Snarayan */ 424d10e4ef2Snarayan typedef struct vd_task { 425d10e4ef2Snarayan struct vd *vd; /* vd instance task is for */ 426d10e4ef2Snarayan vd_task_type_t type; /* type of descriptor task */ 427d10e4ef2Snarayan int index; /* dring elem index for task */ 428d10e4ef2Snarayan vio_msg_t *msg; /* VIO message task is for */ 429d10e4ef2Snarayan size_t msglen; /* length of message content */ 430d10e4ef2Snarayan vd_dring_payload_t *request; /* request task will perform */ 431d10e4ef2Snarayan struct buf buf; /* buf(9s) for I/O request */ 4324bac2208Snarayan ldc_mem_handle_t mhdl; /* task memory handle */ 433205eeb1aSlm66018 int status; /* status of processing task */ 434205eeb1aSlm66018 int (*completef)(struct vd_task *task); /* completion func ptr */ 43583990c4aSAlexandre Chartre uint32_t write_index; /* index in the write_queue */ 436d10e4ef2Snarayan } vd_task_t; 437d10e4ef2Snarayan 438d10e4ef2Snarayan /* 439d10e4ef2Snarayan * Soft state structure for a virtual disk instance 440d10e4ef2Snarayan */ 4411ae08745Sheppo typedef struct vd { 44283990c4aSAlexandre Chartre uint64_t id; /* vdisk id */ 4431ae08745Sheppo uint_t initialized; /* vdisk initialization flags */ 44417cadca8Slm66018 uint64_t operations; /* bitmask of VD_OPs exported */ 44517cadca8Slm66018 vio_ver_t version; /* ver negotiated with client */ 4461ae08745Sheppo vds_t *vds; /* server for this vdisk */ 447d10e4ef2Snarayan ddi_taskq_t *startq; /* queue for I/O start tasks */ 448d10e4ef2Snarayan ddi_taskq_t *completionq; /* queue for completion tasks */ 44983990c4aSAlexandre Chartre ddi_taskq_t *ioq; /* queue for I/O */ 45083990c4aSAlexandre Chartre uint32_t write_index; /* next write index */ 45183990c4aSAlexandre Chartre buf_t **write_queue; /* queue for async writes */ 4521ae08745Sheppo ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 4533c96341aSnarayan char device_path[MAXPATHLEN + 1]; /* vdisk device */ 4541ae08745Sheppo dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 455047ba61eSachartre int open_flags; /* open flags */ 456bae9e67eSachartre uint_t nslices; /* number of slices we export */ 4571ae08745Sheppo size_t vdisk_size; /* number of blocks in vdisk */ 458*65908c77Syu, larry liu - Sun Microsystems - Beijing China size_t vdisk_bsize; /* blk size of the vdisk */ 4591ae08745Sheppo vd_disk_type_t vdisk_type; /* slice or entire disk */ 4604bac2208Snarayan vd_disk_label_t vdisk_label; /* EFI or VTOC label */ 46117cadca8Slm66018 vd_media_t vdisk_media; /* media type of backing dev. */ 46217cadca8Slm66018 boolean_t is_atapi_dev; /* Is this an IDE CD-ROM dev? */ 463e1ebb9ecSlm66018 ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */ 464*65908c77Syu, larry liu - Sun Microsystems - Beijing China size_t backend_bsize; /* blk size of backend device */ 465*65908c77Syu, larry liu - Sun Microsystems - Beijing China int vio_bshift; /* shift for blk convertion */ 4668fce2fd6Sachartre boolean_t volume; /* is vDisk backed by volume */ 4671aff8f07SAlexandre Chartre boolean_t zvol; /* is vDisk backed by a zvol */ 46817cadca8Slm66018 boolean_t file; /* is vDisk backed by a file? */ 4692f5224aeSachartre boolean_t scsi; /* is vDisk backed by scsi? */ 4703c96341aSnarayan vnode_t *file_vnode; /* file vnode */ 4711aff8f07SAlexandre Chartre size_t dskimg_size; /* size of disk image */ 4721aff8f07SAlexandre Chartre ddi_devid_t dskimg_devid; /* devid for disk image */ 473edcc0754Sachartre int efi_reserved; /* EFI reserved slice */ 474bae9e67eSachartre caddr_t flabel; /* fake label for slice type */ 475bae9e67eSachartre uint_t flabel_size; /* fake label size */ 476bae9e67eSachartre uint_t flabel_limit; /* limit of the fake label */ 4771ae08745Sheppo struct dk_geom dk_geom; /* synthetic for slice type */ 478342440ecSPrasad Singamsetty struct extvtoc vtoc; /* synthetic for slice type */ 479edcc0754Sachartre vd_slice_t slices[VD_MAXPART]; /* logical partitions */ 4802f5224aeSachartre boolean_t ownership; /* disk ownership status */ 4811ae08745Sheppo ldc_status_t ldc_state; /* LDC connection state */ 4821ae08745Sheppo ldc_handle_t ldc_handle; /* handle for LDC comm */ 4831ae08745Sheppo size_t max_msglen; /* largest LDC message len */ 4841ae08745Sheppo vd_state_t state; /* client handshake state */ 4851ae08745Sheppo uint8_t xfer_mode; /* transfer mode with client */ 4861ae08745Sheppo uint32_t sid; /* client's session ID */ 4871ae08745Sheppo uint64_t seq_num; /* message sequence number */ 4881ae08745Sheppo uint64_t dring_ident; /* identifier of dring */ 4891ae08745Sheppo ldc_dring_handle_t dring_handle; /* handle for dring ops */ 4901ae08745Sheppo uint32_t descriptor_size; /* num bytes in desc */ 4911ae08745Sheppo uint32_t dring_len; /* number of dring elements */ 492bbfa0259Sha137994 uint8_t dring_mtype; /* dring mem map type */ 4931ae08745Sheppo caddr_t dring; /* address of dring */ 4943af08d82Slm66018 caddr_t vio_msgp; /* vio msg staging buffer */ 495d10e4ef2Snarayan vd_task_t inband_task; /* task for inband descriptor */ 496d10e4ef2Snarayan vd_task_t *dring_task; /* tasks dring elements */ 497d10e4ef2Snarayan 498d10e4ef2Snarayan kmutex_t lock; /* protects variables below */ 499d10e4ef2Snarayan boolean_t enabled; /* is vdisk enabled? */ 500d10e4ef2Snarayan boolean_t reset_state; /* reset connection state? */ 501d10e4ef2Snarayan boolean_t reset_ldc; /* reset LDC channel? */ 5021ae08745Sheppo } vd_t; 5031ae08745Sheppo 504bae9e67eSachartre /* 505bae9e67eSachartre * Macros to manipulate the fake label (flabel) for single slice disks. 506bae9e67eSachartre * 507bae9e67eSachartre * If we fake a VTOC label then the fake label consists of only one block 508bae9e67eSachartre * containing the VTOC label (struct dk_label). 509bae9e67eSachartre * 510bae9e67eSachartre * If we fake an EFI label then the fake label consists of a blank block 511bae9e67eSachartre * followed by a GPT (efi_gpt_t) and a GPE (efi_gpe_t). 512bae9e67eSachartre * 513bae9e67eSachartre */ 514*65908c77Syu, larry liu - Sun Microsystems - Beijing China #define VD_LABEL_VTOC_SIZE(lba) \ 515*65908c77Syu, larry liu - Sun Microsystems - Beijing China P2ROUNDUP(sizeof (struct dk_label), (lba)) 516bae9e67eSachartre 517*65908c77Syu, larry liu - Sun Microsystems - Beijing China #define VD_LABEL_EFI_SIZE(lba) \ 518*65908c77Syu, larry liu - Sun Microsystems - Beijing China P2ROUNDUP(2 * (lba) + sizeof (efi_gpe_t) * VD_MAXPART, \ 519*65908c77Syu, larry liu - Sun Microsystems - Beijing China (lba)) 520bae9e67eSachartre 521bae9e67eSachartre #define VD_LABEL_VTOC(vd) \ 522342440ecSPrasad Singamsetty ((struct dk_label *)(void *)((vd)->flabel)) 523bae9e67eSachartre 524*65908c77Syu, larry liu - Sun Microsystems - Beijing China #define VD_LABEL_EFI_GPT(vd, lba) \ 525*65908c77Syu, larry liu - Sun Microsystems - Beijing China ((efi_gpt_t *)(void *)((vd)->flabel + (lba))) 526*65908c77Syu, larry liu - Sun Microsystems - Beijing China #define VD_LABEL_EFI_GPE(vd, lba) \ 527*65908c77Syu, larry liu - Sun Microsystems - Beijing China ((efi_gpe_t *)(void *)((vd)->flabel + 2 * (lba))) 528bae9e67eSachartre 529bae9e67eSachartre 5301ae08745Sheppo typedef struct vds_operation { 5313af08d82Slm66018 char *namep; 5321ae08745Sheppo uint8_t operation; 533d10e4ef2Snarayan int (*start)(vd_task_t *task); 534205eeb1aSlm66018 int (*complete)(vd_task_t *task); 5351ae08745Sheppo } vds_operation_t; 5361ae08745Sheppo 5370a55fbb7Slm66018 typedef struct vd_ioctl { 5380a55fbb7Slm66018 uint8_t operation; /* vdisk operation */ 5390a55fbb7Slm66018 const char *operation_name; /* vdisk operation name */ 5400a55fbb7Slm66018 size_t nbytes; /* size of operation buffer */ 5410a55fbb7Slm66018 int cmd; /* corresponding ioctl cmd */ 5420a55fbb7Slm66018 const char *cmd_name; /* ioctl cmd name */ 5430a55fbb7Slm66018 void *arg; /* ioctl cmd argument */ 5440a55fbb7Slm66018 /* convert input vd_buf to output ioctl_arg */ 5452f5224aeSachartre int (*copyin)(void *vd_buf, size_t, void *ioctl_arg); 5460a55fbb7Slm66018 /* convert input ioctl_arg to output vd_buf */ 5470a55fbb7Slm66018 void (*copyout)(void *ioctl_arg, void *vd_buf); 548047ba61eSachartre /* write is true if the operation writes any data to the backend */ 549047ba61eSachartre boolean_t write; 5500a55fbb7Slm66018 } vd_ioctl_t; 5510a55fbb7Slm66018 5520a55fbb7Slm66018 /* Define trivial copyin/copyout conversion function flag */ 5532f5224aeSachartre #define VD_IDENTITY_IN ((int (*)(void *, size_t, void *))-1) 5542f5224aeSachartre #define VD_IDENTITY_OUT ((void (*)(void *, void *))-1) 5551ae08745Sheppo 5561ae08745Sheppo 5573c96341aSnarayan static int vds_ldc_retries = VDS_RETRIES; 5583af08d82Slm66018 static int vds_ldc_delay = VDS_LDC_DELAY; 5593c96341aSnarayan static int vds_dev_retries = VDS_RETRIES; 5603c96341aSnarayan static int vds_dev_delay = VDS_DEV_DELAY; 5611ae08745Sheppo static void *vds_state; 5621ae08745Sheppo 56387a7269eSachartre static short vd_scsi_rdwr_timeout = VD_SCSI_RDWR_TIMEOUT; 5642f5224aeSachartre static int vd_scsi_debug = USCSI_SILENT; 5652f5224aeSachartre 5662f5224aeSachartre /* 56783990c4aSAlexandre Chartre * Number of threads in the taskq handling vdisk I/O. This can be set up to 56883990c4aSAlexandre Chartre * the size of the DRing which is the maximum number of I/O we can receive 56983990c4aSAlexandre Chartre * in parallel. Note that using a high number of threads can improve performance 57083990c4aSAlexandre Chartre * but this is going to consume a lot of resources if there are many vdisks. 57183990c4aSAlexandre Chartre */ 57283990c4aSAlexandre Chartre static int vd_ioq_nthreads = VD_IOQ_NTHREADS; 57383990c4aSAlexandre Chartre 57483990c4aSAlexandre Chartre /* 57583990c4aSAlexandre Chartre * Tunable to define the behavior for flushing asynchronous writes used to 57683990c4aSAlexandre Chartre * performed some write I/O requests. The default behavior is to group as 57783990c4aSAlexandre Chartre * much asynchronous writes as possible and to flush them immediatly. 57883990c4aSAlexandre Chartre * 57983990c4aSAlexandre Chartre * If the tunable is set to 0 then explicit flushing is disabled. In that 58083990c4aSAlexandre Chartre * case, data will be flushed by traditional mechanism (like fsflush) but 58183990c4aSAlexandre Chartre * this might not happen immediatly. 58283990c4aSAlexandre Chartre * 58383990c4aSAlexandre Chartre */ 58483990c4aSAlexandre Chartre static int vd_awflush = VD_AWFLUSH_IMMEDIATE | VD_AWFLUSH_GROUP; 58583990c4aSAlexandre Chartre 58683990c4aSAlexandre Chartre /* 5872f5224aeSachartre * Tunable to define the behavior of the service domain if the vdisk server 5882f5224aeSachartre * fails to reset disk exclusive access when a LDC channel is reset. When a 5892f5224aeSachartre * LDC channel is reset the vdisk server will try to reset disk exclusive 5902f5224aeSachartre * access by releasing any SCSI-2 reservation or resetting the disk. If these 5912f5224aeSachartre * actions fail then the default behavior (vd_reset_access_failure = 0) is to 5922f5224aeSachartre * print a warning message. This default behavior can be changed by setting 5932f5224aeSachartre * the vd_reset_access_failure variable to A_REBOOT (= 0x1) and that will 5942f5224aeSachartre * cause the service domain to reboot, or A_DUMP (= 0x5) and that will cause 5952f5224aeSachartre * the service domain to panic. In both cases, the reset of the service domain 5962f5224aeSachartre * should trigger a reset SCSI buses and hopefully clear any SCSI-2 reservation. 5972f5224aeSachartre */ 5982f5224aeSachartre static int vd_reset_access_failure = 0; 5992f5224aeSachartre 6002f5224aeSachartre /* 6012f5224aeSachartre * Tunable for backward compatibility. When this variable is set to B_TRUE, 6022f5224aeSachartre * all disk volumes (ZFS, SVM, VxvM volumes) will be exported as single 6032f5224aeSachartre * slice disks whether or not they have the "slice" option set. This is 6042f5224aeSachartre * to provide a simple backward compatibility mechanism when upgrading 6052f5224aeSachartre * the vds driver and using a domain configuration created before the 6062f5224aeSachartre * "slice" option was available. 6072f5224aeSachartre */ 6082f5224aeSachartre static boolean_t vd_volume_force_slice = B_FALSE; 60987a7269eSachartre 6100a55fbb7Slm66018 /* 61166cfcfbeSachartre * The label of disk images created with some earlier versions of the virtual 61266cfcfbeSachartre * disk software is not entirely correct and have an incorrect v_sanity field 61366cfcfbeSachartre * (usually 0) instead of VTOC_SANE. This creates a compatibility problem with 61466cfcfbeSachartre * these images because we are now validating that the disk label (and the 61566cfcfbeSachartre * sanity) is correct when a disk image is opened. 61666cfcfbeSachartre * 61766cfcfbeSachartre * This tunable is set to false to not validate the sanity field and ensure 61866cfcfbeSachartre * compatibility. If the tunable is set to true, we will do a strict checking 61966cfcfbeSachartre * of the sanity but this can create compatibility problems with old disk 62066cfcfbeSachartre * images. 62166cfcfbeSachartre */ 6221aff8f07SAlexandre Chartre static boolean_t vd_dskimg_validate_sanity = B_FALSE; 62366cfcfbeSachartre 62466cfcfbeSachartre /* 625bbfa0259Sha137994 * Enables the use of LDC_DIRECT_MAP when mapping in imported descriptor rings. 626bbfa0259Sha137994 */ 627bbfa0259Sha137994 static boolean_t vd_direct_mapped_drings = B_TRUE; 628bbfa0259Sha137994 629bbfa0259Sha137994 /* 630bae9e67eSachartre * When a backend is exported as a single-slice disk then we entirely fake 631bae9e67eSachartre * its disk label. So it can be exported either with a VTOC label or with 632bae9e67eSachartre * an EFI label. If vd_slice_label is set to VD_DISK_LABEL_VTOC then all 633bae9e67eSachartre * single-slice disks will be exported with a VTOC label; and if it is set 634bae9e67eSachartre * to VD_DISK_LABEL_EFI then all single-slice disks will be exported with 635bae9e67eSachartre * an EFI label. 636bae9e67eSachartre * 637bae9e67eSachartre * If vd_slice_label is set to VD_DISK_LABEL_UNK and the backend is a disk 638bae9e67eSachartre * or volume device then it will be exported with the same type of label as 639bae9e67eSachartre * defined on the device. Otherwise if the backend is a file then it will 640bae9e67eSachartre * exported with the disk label type set in the vd_file_slice_label variable. 641bae9e67eSachartre * 642bae9e67eSachartre * Note that if the backend size is greater than 1TB then it will always be 643bae9e67eSachartre * exported with an EFI label no matter what the setting is. 644bae9e67eSachartre */ 645bae9e67eSachartre static vd_disk_label_t vd_slice_label = VD_DISK_LABEL_UNK; 646bae9e67eSachartre 647bae9e67eSachartre static vd_disk_label_t vd_file_slice_label = VD_DISK_LABEL_VTOC; 648bae9e67eSachartre 649bae9e67eSachartre /* 650bae9e67eSachartre * Tunable for backward compatibility. If this variable is set to B_TRUE then 651bae9e67eSachartre * single-slice disks are exported as disks with only one slice instead of 652bae9e67eSachartre * faking a complete disk partitioning. 653bae9e67eSachartre */ 654bae9e67eSachartre static boolean_t vd_slice_single_slice = B_FALSE; 655bae9e67eSachartre 656bae9e67eSachartre /* 6570a55fbb7Slm66018 * Supported protocol version pairs, from highest (newest) to lowest (oldest) 6580a55fbb7Slm66018 * 6590a55fbb7Slm66018 * Each supported major version should appear only once, paired with (and only 6600a55fbb7Slm66018 * with) its highest supported minor version number (as the protocol requires 6610a55fbb7Slm66018 * supporting all lower minor version numbers as well) 6620a55fbb7Slm66018 */ 66317cadca8Slm66018 static const vio_ver_t vds_version[] = {{1, 1}}; 6640a55fbb7Slm66018 static const size_t vds_num_versions = 6650a55fbb7Slm66018 sizeof (vds_version)/sizeof (vds_version[0]); 6660a55fbb7Slm66018 6673af08d82Slm66018 static void vd_free_dring_task(vd_t *vdp); 6683c96341aSnarayan static int vd_setup_vd(vd_t *vd); 669047ba61eSachartre static int vd_setup_single_slice_disk(vd_t *vd); 6701aff8f07SAlexandre Chartre static int vd_setup_slice_image(vd_t *vd); 6711aff8f07SAlexandre Chartre static int vd_setup_disk_image(vd_t *vd); 672de3a5331SRamesh Chitrothu static int vd_backend_check_size(vd_t *vd); 6733c96341aSnarayan static boolean_t vd_enabled(vd_t *vd); 67478fcd0a1Sachartre static ushort_t vd_lbl2cksum(struct dk_label *label); 6751aff8f07SAlexandre Chartre static int vd_dskimg_validate_geometry(vd_t *vd); 6761aff8f07SAlexandre Chartre static boolean_t vd_dskimg_is_iso_image(vd_t *vd); 67717cadca8Slm66018 static void vd_set_exported_operations(vd_t *vd); 6782f5224aeSachartre static void vd_reset_access(vd_t *vd); 679edcc0754Sachartre static int vd_backend_ioctl(vd_t *vd, int cmd, caddr_t arg); 680edcc0754Sachartre static int vds_efi_alloc_and_read(vd_t *, efi_gpt_t **, efi_gpe_t **); 681edcc0754Sachartre static void vds_efi_free(vd_t *, efi_gpt_t *, efi_gpe_t *); 6828fce2fd6Sachartre static void vds_driver_types_free(vds_t *vds); 683342440ecSPrasad Singamsetty static void vd_vtocgeom_to_label(struct extvtoc *vtoc, struct dk_geom *geom, 684bae9e67eSachartre struct dk_label *label); 685342440ecSPrasad Singamsetty static void vd_label_to_vtocgeom(struct dk_label *label, struct extvtoc *vtoc, 686bae9e67eSachartre struct dk_geom *geom); 687bae9e67eSachartre static boolean_t vd_slice_geom_isvalid(vd_t *vd, struct dk_geom *geom); 688342440ecSPrasad Singamsetty static boolean_t vd_slice_vtoc_isvalid(vd_t *vd, struct extvtoc *vtoc); 689bae9e67eSachartre 690bae9e67eSachartre extern int is_pseudo_device(dev_info_t *); 691bae9e67eSachartre 692bae9e67eSachartre /* 693bae9e67eSachartre * Function: 694bae9e67eSachartre * vd_get_readable_size 695bae9e67eSachartre * 696bae9e67eSachartre * Description: 697bae9e67eSachartre * Convert a given size in bytes to a human readable format in 698bae9e67eSachartre * kilobytes, megabytes, gigabytes or terabytes. 699bae9e67eSachartre * 700bae9e67eSachartre * Parameters: 701bae9e67eSachartre * full_size - the size to convert in bytes. 702bae9e67eSachartre * size - the converted size. 703bae9e67eSachartre * unit - the unit of the converted size: 'K' (kilobyte), 704bae9e67eSachartre * 'M' (Megabyte), 'G' (Gigabyte), 'T' (Terabyte). 705bae9e67eSachartre * 706bae9e67eSachartre * Return Code: 707bae9e67eSachartre * none 708bae9e67eSachartre */ 7091aff8f07SAlexandre Chartre static void 710bae9e67eSachartre vd_get_readable_size(size_t full_size, size_t *size, char *unit) 711bae9e67eSachartre { 712bae9e67eSachartre if (full_size < (1ULL << 20)) { 713bae9e67eSachartre *size = full_size >> 10; 714bae9e67eSachartre *unit = 'K'; /* Kilobyte */ 715bae9e67eSachartre } else if (full_size < (1ULL << 30)) { 716bae9e67eSachartre *size = full_size >> 20; 717bae9e67eSachartre *unit = 'M'; /* Megabyte */ 718bae9e67eSachartre } else if (full_size < (1ULL << 40)) { 719bae9e67eSachartre *size = full_size >> 30; 720bae9e67eSachartre *unit = 'G'; /* Gigabyte */ 721bae9e67eSachartre } else { 722bae9e67eSachartre *size = full_size >> 40; 723bae9e67eSachartre *unit = 'T'; /* Terabyte */ 724bae9e67eSachartre } 725bae9e67eSachartre } 726047ba61eSachartre 727690555a1Sachartre /* 728690555a1Sachartre * Function: 7291aff8f07SAlexandre Chartre * vd_dskimg_io_params 730690555a1Sachartre * 731690555a1Sachartre * Description: 7321aff8f07SAlexandre Chartre * Convert virtual disk I/O parameters (slice, block, length) to 7331aff8f07SAlexandre Chartre * (offset, length) relative to the disk image and according to 7341aff8f07SAlexandre Chartre * the virtual disk partitioning. 735690555a1Sachartre * 736690555a1Sachartre * Parameters: 737690555a1Sachartre * vd - disk on which the operation is performed. 7381aff8f07SAlexandre Chartre * slice - slice to which is the I/O parameters apply. 7391aff8f07SAlexandre Chartre * VD_SLICE_NONE indicates that parameters are 7401aff8f07SAlexandre Chartre * are relative to the entire virtual disk. 7411aff8f07SAlexandre Chartre * blkp - pointer to the starting block relative to the 7421aff8f07SAlexandre Chartre * slice; return the starting block relative to 7431aff8f07SAlexandre Chartre * the disk image. 7441aff8f07SAlexandre Chartre * lenp - pointer to the number of bytes requested; return 7451aff8f07SAlexandre Chartre * the number of bytes that can effectively be used. 746690555a1Sachartre * 747690555a1Sachartre * Return Code: 7481aff8f07SAlexandre Chartre * 0 - I/O parameters have been successfully converted; 7491aff8f07SAlexandre Chartre * blkp and lenp point to the converted values. 7501aff8f07SAlexandre Chartre * ENODATA - no data are available for the given I/O parameters; 7511aff8f07SAlexandre Chartre * This occurs if the starting block is past the limit 7521aff8f07SAlexandre Chartre * of the slice. 7531aff8f07SAlexandre Chartre * EINVAL - I/O parameters are invalid. 754690555a1Sachartre */ 7551aff8f07SAlexandre Chartre static int 7561aff8f07SAlexandre Chartre vd_dskimg_io_params(vd_t *vd, int slice, size_t *blkp, size_t *lenp) 757690555a1Sachartre { 7581aff8f07SAlexandre Chartre size_t blk = *blkp; 7591aff8f07SAlexandre Chartre size_t len = *lenp; 7601aff8f07SAlexandre Chartre size_t offset, maxlen; 761690555a1Sachartre 7621aff8f07SAlexandre Chartre ASSERT(vd->file || VD_DSKIMG(vd)); 763690555a1Sachartre ASSERT(len > 0); 764*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vd->vdisk_bsize == DEV_BSIZE); 765690555a1Sachartre 766047ba61eSachartre /* 767047ba61eSachartre * If a file is exported as a slice then we don't care about the vtoc. 768047ba61eSachartre * In that case, the vtoc is a fake mainly to make newfs happy and we 769047ba61eSachartre * handle any I/O as a raw disk access so that we can have access to the 770047ba61eSachartre * entire backend. 771047ba61eSachartre */ 772047ba61eSachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE || slice == VD_SLICE_NONE) { 773690555a1Sachartre /* raw disk access */ 774690555a1Sachartre offset = blk * DEV_BSIZE; 7751aff8f07SAlexandre Chartre if (offset >= vd->dskimg_size) { 776bae9e67eSachartre /* offset past the end of the disk */ 7771aff8f07SAlexandre Chartre PR0("offset (0x%lx) >= size (0x%lx)", 7781aff8f07SAlexandre Chartre offset, vd->dskimg_size); 7791aff8f07SAlexandre Chartre return (ENODATA); 780bae9e67eSachartre } 7811aff8f07SAlexandre Chartre maxlen = vd->dskimg_size - offset; 782690555a1Sachartre } else { 783690555a1Sachartre ASSERT(slice >= 0 && slice < V_NUMPAR); 78478fcd0a1Sachartre 78517cadca8Slm66018 /* 78617cadca8Slm66018 * v1.0 vDisk clients depended on the server not verifying 78717cadca8Slm66018 * the label of a unformatted disk. This "feature" is 78817cadca8Slm66018 * maintained for backward compatibility but all versions 78917cadca8Slm66018 * from v1.1 onwards must do the right thing. 79017cadca8Slm66018 */ 79178fcd0a1Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK && 792edcc0754Sachartre vio_ver_is_supported(vd->version, 1, 1)) { 7931aff8f07SAlexandre Chartre (void) vd_dskimg_validate_geometry(vd); 794edcc0754Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK) { 795edcc0754Sachartre PR0("Unknown disk label, can't do I/O " 796edcc0754Sachartre "from slice %d", slice); 7971aff8f07SAlexandre Chartre return (EINVAL); 79878fcd0a1Sachartre } 799edcc0754Sachartre } 80078fcd0a1Sachartre 801edcc0754Sachartre if (vd->vdisk_label == VD_DISK_LABEL_VTOC) { 802edcc0754Sachartre ASSERT(vd->vtoc.v_sectorsz == DEV_BSIZE); 803edcc0754Sachartre } else { 804edcc0754Sachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_EFI); 805edcc0754Sachartre } 806edcc0754Sachartre 807edcc0754Sachartre if (blk >= vd->slices[slice].nblocks) { 808690555a1Sachartre /* address past the end of the slice */ 809bae9e67eSachartre PR0("req_addr (0x%lx) >= psize (0x%lx)", 810edcc0754Sachartre blk, vd->slices[slice].nblocks); 8111aff8f07SAlexandre Chartre return (ENODATA); 812690555a1Sachartre } 813690555a1Sachartre 814edcc0754Sachartre offset = (vd->slices[slice].start + blk) * DEV_BSIZE; 815bae9e67eSachartre maxlen = (vd->slices[slice].nblocks - blk) * DEV_BSIZE; 816bae9e67eSachartre } 817690555a1Sachartre 818690555a1Sachartre /* 819690555a1Sachartre * If the requested size is greater than the size 820690555a1Sachartre * of the partition, truncate the read/write. 821690555a1Sachartre */ 822690555a1Sachartre if (len > maxlen) { 823690555a1Sachartre PR0("I/O size truncated to %lu bytes from %lu bytes", 824690555a1Sachartre maxlen, len); 825690555a1Sachartre len = maxlen; 826690555a1Sachartre } 827690555a1Sachartre 828690555a1Sachartre /* 829690555a1Sachartre * We have to ensure that we are reading/writing into the mmap 830690555a1Sachartre * range. If we have a partial disk image (e.g. an image of 831690555a1Sachartre * s0 instead s2) the system can try to access slices that 832690555a1Sachartre * are not included into the disk image. 833690555a1Sachartre */ 8341aff8f07SAlexandre Chartre if ((offset + len) > vd->dskimg_size) { 835edcc0754Sachartre PR0("offset + nbytes (0x%lx + 0x%lx) > " 8361aff8f07SAlexandre Chartre "dskimg_size (0x%lx)", offset, len, vd->dskimg_size); 8371aff8f07SAlexandre Chartre return (EINVAL); 8381aff8f07SAlexandre Chartre } 8391aff8f07SAlexandre Chartre 8401aff8f07SAlexandre Chartre *blkp = offset / DEV_BSIZE; 8411aff8f07SAlexandre Chartre *lenp = len; 8421aff8f07SAlexandre Chartre 8431aff8f07SAlexandre Chartre return (0); 8441aff8f07SAlexandre Chartre } 8451aff8f07SAlexandre Chartre 8461aff8f07SAlexandre Chartre /* 8471aff8f07SAlexandre Chartre * Function: 8481aff8f07SAlexandre Chartre * vd_dskimg_rw 8491aff8f07SAlexandre Chartre * 8501aff8f07SAlexandre Chartre * Description: 8511aff8f07SAlexandre Chartre * Read or write to a disk image. It handles the case where the disk 8521aff8f07SAlexandre Chartre * image is a file or a volume exported as a full disk or a file 8531aff8f07SAlexandre Chartre * exported as single-slice disk. Read or write to volumes exported as 8541aff8f07SAlexandre Chartre * single slice disks are done by directly using the ldi interface. 8551aff8f07SAlexandre Chartre * 8561aff8f07SAlexandre Chartre * Parameters: 8571aff8f07SAlexandre Chartre * vd - disk on which the operation is performed. 8581aff8f07SAlexandre Chartre * slice - slice on which the operation is performed, 8591aff8f07SAlexandre Chartre * VD_SLICE_NONE indicates that the operation 8601aff8f07SAlexandre Chartre * is done using an absolute disk offset. 8611aff8f07SAlexandre Chartre * operation - operation to execute: read (VD_OP_BREAD) or 8621aff8f07SAlexandre Chartre * write (VD_OP_BWRITE). 8631aff8f07SAlexandre Chartre * data - buffer where data are read to or written from. 8641aff8f07SAlexandre Chartre * blk - starting block for the operation. 8651aff8f07SAlexandre Chartre * len - number of bytes to read or write. 8661aff8f07SAlexandre Chartre * 8671aff8f07SAlexandre Chartre * Return Code: 8681aff8f07SAlexandre Chartre * n >= 0 - success, n indicates the number of bytes read 8691aff8f07SAlexandre Chartre * or written. 8701aff8f07SAlexandre Chartre * -1 - error. 8711aff8f07SAlexandre Chartre */ 8721aff8f07SAlexandre Chartre static ssize_t 8731aff8f07SAlexandre Chartre vd_dskimg_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t offset, 8741aff8f07SAlexandre Chartre size_t len) 8751aff8f07SAlexandre Chartre { 8761aff8f07SAlexandre Chartre ssize_t resid; 8771aff8f07SAlexandre Chartre struct buf buf; 8781aff8f07SAlexandre Chartre int status; 8791aff8f07SAlexandre Chartre 8801aff8f07SAlexandre Chartre ASSERT(vd->file || VD_DSKIMG(vd)); 8811aff8f07SAlexandre Chartre ASSERT(len > 0); 882*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vd->vdisk_bsize == DEV_BSIZE); 8831aff8f07SAlexandre Chartre 8841aff8f07SAlexandre Chartre if ((status = vd_dskimg_io_params(vd, slice, &offset, &len)) != 0) 8851aff8f07SAlexandre Chartre return ((status == ENODATA)? 0: -1); 8861aff8f07SAlexandre Chartre 8871aff8f07SAlexandre Chartre if (vd->volume) { 8881aff8f07SAlexandre Chartre 8891aff8f07SAlexandre Chartre bioinit(&buf); 8901aff8f07SAlexandre Chartre buf.b_flags = B_BUSY | 8911aff8f07SAlexandre Chartre ((operation == VD_OP_BREAD)? B_READ : B_WRITE); 8921aff8f07SAlexandre Chartre buf.b_bcount = len; 89383990c4aSAlexandre Chartre buf.b_lblkno = offset; 8941aff8f07SAlexandre Chartre buf.b_edev = vd->dev[0]; 8951aff8f07SAlexandre Chartre buf.b_un.b_addr = data; 8961aff8f07SAlexandre Chartre 8971aff8f07SAlexandre Chartre /* 8981aff8f07SAlexandre Chartre * We use ldi_strategy() and not ldi_read()/ldi_write() because 8991aff8f07SAlexandre Chartre * the read/write functions of the underlying driver may try to 9001aff8f07SAlexandre Chartre * lock pages of the data buffer, and this requires the data 9011aff8f07SAlexandre Chartre * buffer to be kmem_alloc'ed (and not allocated on the stack). 9021aff8f07SAlexandre Chartre * 9031aff8f07SAlexandre Chartre * Also using ldi_strategy() ensures that writes are immediatly 9041aff8f07SAlexandre Chartre * commited and not cached as this may be the case with 9051aff8f07SAlexandre Chartre * ldi_write() (for example with a ZFS volume). 9061aff8f07SAlexandre Chartre */ 9071aff8f07SAlexandre Chartre if (ldi_strategy(vd->ldi_handle[0], &buf) != 0) { 9081aff8f07SAlexandre Chartre biofini(&buf); 909690555a1Sachartre return (-1); 910690555a1Sachartre } 911690555a1Sachartre 9121aff8f07SAlexandre Chartre if (biowait(&buf) != 0) { 9131aff8f07SAlexandre Chartre biofini(&buf); 9141aff8f07SAlexandre Chartre return (-1); 9151aff8f07SAlexandre Chartre } 9161aff8f07SAlexandre Chartre 9171aff8f07SAlexandre Chartre resid = buf.b_resid; 9181aff8f07SAlexandre Chartre biofini(&buf); 9191aff8f07SAlexandre Chartre 9201aff8f07SAlexandre Chartre ASSERT(resid <= len); 9211aff8f07SAlexandre Chartre return (len - resid); 9221aff8f07SAlexandre Chartre } 9231aff8f07SAlexandre Chartre 9241aff8f07SAlexandre Chartre ASSERT(vd->file); 9251aff8f07SAlexandre Chartre 92683990c4aSAlexandre Chartre status = vn_rdwr((operation == VD_OP_BREAD)? UIO_READ : UIO_WRITE, 92783990c4aSAlexandre Chartre vd->file_vnode, data, len, offset * DEV_BSIZE, UIO_SYSSPACE, FSYNC, 92883990c4aSAlexandre Chartre RLIM64_INFINITY, kcred, &resid); 929690555a1Sachartre 93083990c4aSAlexandre Chartre if (status != 0) 931690555a1Sachartre return (-1); 932690555a1Sachartre 933690555a1Sachartre return (len); 934690555a1Sachartre } 935690555a1Sachartre 93687a7269eSachartre /* 93787a7269eSachartre * Function: 938bae9e67eSachartre * vd_build_default_label 93978fcd0a1Sachartre * 94078fcd0a1Sachartre * Description: 941bae9e67eSachartre * Return a default label for a given disk size. This is used when the disk 94278fcd0a1Sachartre * does not have a valid VTOC so that the user can get a valid default 94317cadca8Slm66018 * configuration. The default label has all slice sizes set to 0 (except 94478fcd0a1Sachartre * slice 2 which is the entire disk) to force the user to write a valid 94578fcd0a1Sachartre * label onto the disk image. 94678fcd0a1Sachartre * 94778fcd0a1Sachartre * Parameters: 948bae9e67eSachartre * disk_size - the disk size in bytes 949*65908c77Syu, larry liu - Sun Microsystems - Beijing China * bsize - the disk block size in bytes 95078fcd0a1Sachartre * label - the returned default label. 95178fcd0a1Sachartre * 95278fcd0a1Sachartre * Return Code: 95378fcd0a1Sachartre * none. 95478fcd0a1Sachartre */ 95578fcd0a1Sachartre static void 956*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd_build_default_label(size_t disk_size, size_t bsize, struct dk_label *label) 95778fcd0a1Sachartre { 95878fcd0a1Sachartre size_t size; 959bae9e67eSachartre char unit; 960edcc0754Sachartre 961edcc0754Sachartre bzero(label, sizeof (struct dk_label)); 96278fcd0a1Sachartre 96378fcd0a1Sachartre /* 964342440ecSPrasad Singamsetty * Ideally we would like the cylinder size (nsect * nhead) to be the 965342440ecSPrasad Singamsetty * same whatever the disk size is. That way the VTOC label could be 966342440ecSPrasad Singamsetty * easily updated in case the disk size is increased (keeping the 967342440ecSPrasad Singamsetty * same cylinder size allows to preserve the existing partitioning 968342440ecSPrasad Singamsetty * when updating the VTOC label). But it is not possible to have 969342440ecSPrasad Singamsetty * a fixed cylinder size and to cover all disk size. 97078fcd0a1Sachartre * 971342440ecSPrasad Singamsetty * So we define different cylinder sizes depending on the disk size. 972342440ecSPrasad Singamsetty * The cylinder size is chosen so that we don't have too few cylinders 973f745d6a3Sachartre * for a small disk image, or so many on a big disk image that you 974f745d6a3Sachartre * waste space for backup superblocks or cylinder group structures. 975342440ecSPrasad Singamsetty * Also we must have a resonable number of cylinders and sectors so 976342440ecSPrasad Singamsetty * that newfs can run using default values. 977342440ecSPrasad Singamsetty * 978342440ecSPrasad Singamsetty * +-----------+--------+---------+--------+ 979342440ecSPrasad Singamsetty * | disk_size | < 2MB | 2MB-4GB | >= 8GB | 980342440ecSPrasad Singamsetty * +-----------+--------+---------+--------+ 981342440ecSPrasad Singamsetty * | nhead | 1 | 1 | 96 | 982342440ecSPrasad Singamsetty * | nsect | 200 | 600 | 768 | 983342440ecSPrasad Singamsetty * +-----------+--------+---------+--------+ 984342440ecSPrasad Singamsetty * 985342440ecSPrasad Singamsetty * Other parameters are computed from these values: 986342440ecSPrasad Singamsetty * 987342440ecSPrasad Singamsetty * pcyl = disk_size / (nhead * nsect * 512) 988342440ecSPrasad Singamsetty * acyl = (pcyl > 2)? 2 : 0 989342440ecSPrasad Singamsetty * ncyl = pcyl - acyl 990342440ecSPrasad Singamsetty * 991342440ecSPrasad Singamsetty * The maximum number of cylinder is 65535 so this allows to define a 992342440ecSPrasad Singamsetty * geometry for a disk size up to 65535 * 96 * 768 * 512 = 2.24 TB 993342440ecSPrasad Singamsetty * which is more than enough to cover the maximum size allowed by the 994342440ecSPrasad Singamsetty * extended VTOC format (2TB). 99578fcd0a1Sachartre */ 996342440ecSPrasad Singamsetty 997342440ecSPrasad Singamsetty if (disk_size >= 8 * ONE_GIGABYTE) { 998342440ecSPrasad Singamsetty 999342440ecSPrasad Singamsetty label->dkl_nhead = 96; 1000342440ecSPrasad Singamsetty label->dkl_nsect = 768; 1001342440ecSPrasad Singamsetty 1002342440ecSPrasad Singamsetty } else if (disk_size >= 2 * ONE_MEGABYTE) { 1003342440ecSPrasad Singamsetty 1004342440ecSPrasad Singamsetty label->dkl_nhead = 1; 1005342440ecSPrasad Singamsetty label->dkl_nsect = 600; 1006342440ecSPrasad Singamsetty 1007342440ecSPrasad Singamsetty } else { 1008342440ecSPrasad Singamsetty 1009342440ecSPrasad Singamsetty label->dkl_nhead = 1; 1010342440ecSPrasad Singamsetty label->dkl_nsect = 200; 1011342440ecSPrasad Singamsetty } 1012342440ecSPrasad Singamsetty 1013342440ecSPrasad Singamsetty label->dkl_pcyl = disk_size / 1014*65908c77Syu, larry liu - Sun Microsystems - Beijing China (label->dkl_nsect * label->dkl_nhead * bsize); 101578fcd0a1Sachartre 101678fcd0a1Sachartre if (label->dkl_pcyl == 0) 101778fcd0a1Sachartre label->dkl_pcyl = 1; 101878fcd0a1Sachartre 1019047ba61eSachartre label->dkl_acyl = 0; 1020047ba61eSachartre 102178fcd0a1Sachartre if (label->dkl_pcyl > 2) 102278fcd0a1Sachartre label->dkl_acyl = 2; 102378fcd0a1Sachartre 102478fcd0a1Sachartre label->dkl_ncyl = label->dkl_pcyl - label->dkl_acyl; 102578fcd0a1Sachartre label->dkl_write_reinstruct = 0; 102678fcd0a1Sachartre label->dkl_read_reinstruct = 0; 102778fcd0a1Sachartre label->dkl_rpm = 7200; 102878fcd0a1Sachartre label->dkl_apc = 0; 102978fcd0a1Sachartre label->dkl_intrlv = 0; 103078fcd0a1Sachartre 1031bae9e67eSachartre PR0("requested disk size: %ld bytes\n", disk_size); 103278fcd0a1Sachartre PR0("setup: ncyl=%d nhead=%d nsec=%d\n", label->dkl_pcyl, 103378fcd0a1Sachartre label->dkl_nhead, label->dkl_nsect); 103478fcd0a1Sachartre PR0("provided disk size: %ld bytes\n", (uint64_t) 103578fcd0a1Sachartre (label->dkl_pcyl * label->dkl_nhead * 1036*65908c77Syu, larry liu - Sun Microsystems - Beijing China label->dkl_nsect * bsize)); 103778fcd0a1Sachartre 1038bae9e67eSachartre vd_get_readable_size(disk_size, &size, &unit); 103978fcd0a1Sachartre 104078fcd0a1Sachartre /* 104178fcd0a1Sachartre * We must have a correct label name otherwise format(1m) will 104278fcd0a1Sachartre * not recognized the disk as labeled. 104378fcd0a1Sachartre */ 104478fcd0a1Sachartre (void) snprintf(label->dkl_asciilabel, LEN_DKL_ASCII, 104578fcd0a1Sachartre "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d", 1046bae9e67eSachartre size, unit, 104778fcd0a1Sachartre label->dkl_ncyl, label->dkl_acyl, label->dkl_nhead, 104878fcd0a1Sachartre label->dkl_nsect); 104978fcd0a1Sachartre 105078fcd0a1Sachartre /* default VTOC */ 1051342440ecSPrasad Singamsetty label->dkl_vtoc.v_version = V_EXTVERSION; 1052edcc0754Sachartre label->dkl_vtoc.v_nparts = V_NUMPAR; 105378fcd0a1Sachartre label->dkl_vtoc.v_sanity = VTOC_SANE; 1054edcc0754Sachartre label->dkl_vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_tag = V_BACKUP; 1055edcc0754Sachartre label->dkl_map[VD_ENTIRE_DISK_SLICE].dkl_cylno = 0; 1056edcc0754Sachartre label->dkl_map[VD_ENTIRE_DISK_SLICE].dkl_nblk = label->dkl_ncyl * 105778fcd0a1Sachartre label->dkl_nhead * label->dkl_nsect; 1058edcc0754Sachartre label->dkl_magic = DKL_MAGIC; 105978fcd0a1Sachartre label->dkl_cksum = vd_lbl2cksum(label); 106078fcd0a1Sachartre } 106178fcd0a1Sachartre 106278fcd0a1Sachartre /* 106378fcd0a1Sachartre * Function: 10641aff8f07SAlexandre Chartre * vd_dskimg_set_vtoc 106587a7269eSachartre * 106687a7269eSachartre * Description: 106787a7269eSachartre * Set the vtoc of a disk image by writing the label and backup 106887a7269eSachartre * labels into the disk image backend. 106987a7269eSachartre * 107087a7269eSachartre * Parameters: 107187a7269eSachartre * vd - disk on which the operation is performed. 107287a7269eSachartre * label - the data to be written. 107387a7269eSachartre * 107487a7269eSachartre * Return Code: 107587a7269eSachartre * 0 - success. 107687a7269eSachartre * n > 0 - error, n indicates the errno code. 107787a7269eSachartre */ 107887a7269eSachartre static int 10791aff8f07SAlexandre Chartre vd_dskimg_set_vtoc(vd_t *vd, struct dk_label *label) 108087a7269eSachartre { 1081342440ecSPrasad Singamsetty size_t blk, sec, cyl, head, cnt; 108287a7269eSachartre 10831aff8f07SAlexandre Chartre ASSERT(VD_DSKIMG(vd)); 108487a7269eSachartre 10851aff8f07SAlexandre Chartre if (VD_DSKIMG_LABEL_WRITE(vd, label) < 0) { 108687a7269eSachartre PR0("fail to write disk label"); 108787a7269eSachartre return (EIO); 108887a7269eSachartre } 108987a7269eSachartre 109087a7269eSachartre /* 109187a7269eSachartre * Backup labels are on the last alternate cylinder's 109287a7269eSachartre * first five odd sectors. 109387a7269eSachartre */ 109487a7269eSachartre if (label->dkl_acyl == 0) { 109587a7269eSachartre PR0("no alternate cylinder, can not store backup labels"); 109687a7269eSachartre return (0); 109787a7269eSachartre } 109887a7269eSachartre 109987a7269eSachartre cyl = label->dkl_ncyl + label->dkl_acyl - 1; 110087a7269eSachartre head = label->dkl_nhead - 1; 110187a7269eSachartre 110287a7269eSachartre blk = (cyl * ((label->dkl_nhead * label->dkl_nsect) - label->dkl_apc)) + 110387a7269eSachartre (head * label->dkl_nsect); 110487a7269eSachartre 110587a7269eSachartre /* 110687a7269eSachartre * Write the backup labels. Make sure we don't try to write past 110787a7269eSachartre * the last cylinder. 110887a7269eSachartre */ 110987a7269eSachartre sec = 1; 111087a7269eSachartre 11111aff8f07SAlexandre Chartre for (cnt = 0; cnt < VD_DSKIMG_NUM_BACKUP; cnt++) { 111287a7269eSachartre 111387a7269eSachartre if (sec >= label->dkl_nsect) { 111487a7269eSachartre PR0("not enough sector to store all backup labels"); 111587a7269eSachartre return (0); 111687a7269eSachartre } 111787a7269eSachartre 11181aff8f07SAlexandre Chartre if (vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, 11191aff8f07SAlexandre Chartre (caddr_t)label, blk + sec, sizeof (struct dk_label)) < 0) { 1120342440ecSPrasad Singamsetty PR0("error writing backup label at block %lu\n", 112187a7269eSachartre blk + sec); 112287a7269eSachartre return (EIO); 112387a7269eSachartre } 112487a7269eSachartre 1125342440ecSPrasad Singamsetty PR1("wrote backup label at block %lu\n", blk + sec); 112687a7269eSachartre 112787a7269eSachartre sec += 2; 112887a7269eSachartre } 112987a7269eSachartre 113087a7269eSachartre return (0); 113187a7269eSachartre } 113287a7269eSachartre 113387a7269eSachartre /* 113487a7269eSachartre * Function: 11351aff8f07SAlexandre Chartre * vd_dskimg_get_devid_block 113687a7269eSachartre * 113787a7269eSachartre * Description: 113887a7269eSachartre * Return the block number where the device id is stored. 113987a7269eSachartre * 114087a7269eSachartre * Parameters: 114187a7269eSachartre * vd - disk on which the operation is performed. 114287a7269eSachartre * blkp - pointer to the block number 114387a7269eSachartre * 114487a7269eSachartre * Return Code: 114587a7269eSachartre * 0 - success 114687a7269eSachartre * ENOSPC - disk has no space to store a device id 114787a7269eSachartre */ 114887a7269eSachartre static int 11491aff8f07SAlexandre Chartre vd_dskimg_get_devid_block(vd_t *vd, size_t *blkp) 115087a7269eSachartre { 115187a7269eSachartre diskaddr_t spc, head, cyl; 115287a7269eSachartre 11531aff8f07SAlexandre Chartre ASSERT(VD_DSKIMG(vd)); 1154edcc0754Sachartre 1155edcc0754Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK) { 1156edcc0754Sachartre /* 1157edcc0754Sachartre * If no label is defined we don't know where to find 1158edcc0754Sachartre * a device id. 1159edcc0754Sachartre */ 1160edcc0754Sachartre return (ENOSPC); 1161edcc0754Sachartre } 1162edcc0754Sachartre 1163edcc0754Sachartre if (vd->vdisk_label == VD_DISK_LABEL_EFI) { 1164edcc0754Sachartre /* 1165edcc0754Sachartre * For an EFI disk, the devid is at the beginning of 1166edcc0754Sachartre * the reserved slice 1167edcc0754Sachartre */ 1168edcc0754Sachartre if (vd->efi_reserved == -1) { 1169edcc0754Sachartre PR0("EFI disk has no reserved slice"); 1170edcc0754Sachartre return (ENOSPC); 1171edcc0754Sachartre } 1172edcc0754Sachartre 1173edcc0754Sachartre *blkp = vd->slices[vd->efi_reserved].start; 1174edcc0754Sachartre return (0); 1175edcc0754Sachartre } 1176edcc0754Sachartre 117787a7269eSachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 117887a7269eSachartre 117987a7269eSachartre /* this geometry doesn't allow us to have a devid */ 118087a7269eSachartre if (vd->dk_geom.dkg_acyl < 2) { 118187a7269eSachartre PR0("not enough alternate cylinder available for devid " 118287a7269eSachartre "(acyl=%u)", vd->dk_geom.dkg_acyl); 118387a7269eSachartre return (ENOSPC); 118487a7269eSachartre } 118587a7269eSachartre 118687a7269eSachartre /* the devid is in on the track next to the last cylinder */ 118787a7269eSachartre cyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl - 2; 118887a7269eSachartre spc = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect; 118987a7269eSachartre head = vd->dk_geom.dkg_nhead - 1; 119087a7269eSachartre 119187a7269eSachartre *blkp = (cyl * (spc - vd->dk_geom.dkg_apc)) + 119287a7269eSachartre (head * vd->dk_geom.dkg_nsect) + 1; 119387a7269eSachartre 119487a7269eSachartre return (0); 119587a7269eSachartre } 119687a7269eSachartre 119787a7269eSachartre /* 119887a7269eSachartre * Return the checksum of a disk block containing an on-disk devid. 119987a7269eSachartre */ 120087a7269eSachartre static uint_t 120187a7269eSachartre vd_dkdevid2cksum(struct dk_devid *dkdevid) 120287a7269eSachartre { 120387a7269eSachartre uint_t chksum, *ip; 120487a7269eSachartre int i; 120587a7269eSachartre 120687a7269eSachartre chksum = 0; 1207342440ecSPrasad Singamsetty ip = (void *)dkdevid; 120887a7269eSachartre for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int)); i++) 120987a7269eSachartre chksum ^= ip[i]; 121087a7269eSachartre 121187a7269eSachartre return (chksum); 121287a7269eSachartre } 121387a7269eSachartre 121487a7269eSachartre /* 121587a7269eSachartre * Function: 12161aff8f07SAlexandre Chartre * vd_dskimg_read_devid 121787a7269eSachartre * 121887a7269eSachartre * Description: 121987a7269eSachartre * Read the device id stored on a disk image. 122087a7269eSachartre * 122187a7269eSachartre * Parameters: 122287a7269eSachartre * vd - disk on which the operation is performed. 122387a7269eSachartre * devid - the return address of the device ID. 122487a7269eSachartre * 122587a7269eSachartre * Return Code: 122687a7269eSachartre * 0 - success 122787a7269eSachartre * EIO - I/O error while trying to access the disk image 122887a7269eSachartre * EINVAL - no valid device id was found 122987a7269eSachartre * ENOSPC - disk has no space to store a device id 123087a7269eSachartre */ 123187a7269eSachartre static int 12321aff8f07SAlexandre Chartre vd_dskimg_read_devid(vd_t *vd, ddi_devid_t *devid) 123387a7269eSachartre { 123487a7269eSachartre struct dk_devid *dkdevid; 123587a7269eSachartre size_t blk; 123687a7269eSachartre uint_t chksum; 123787a7269eSachartre int status, sz; 123887a7269eSachartre 1239*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vd->vdisk_bsize == DEV_BSIZE); 1240*65908c77Syu, larry liu - Sun Microsystems - Beijing China 12411aff8f07SAlexandre Chartre if ((status = vd_dskimg_get_devid_block(vd, &blk)) != 0) 124287a7269eSachartre return (status); 124387a7269eSachartre 124487a7269eSachartre dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 124587a7269eSachartre 124687a7269eSachartre /* get the devid */ 12471aff8f07SAlexandre Chartre if ((vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)dkdevid, blk, 124887a7269eSachartre DEV_BSIZE)) < 0) { 124987a7269eSachartre PR0("error reading devid block at %lu", blk); 125087a7269eSachartre status = EIO; 125187a7269eSachartre goto done; 125287a7269eSachartre } 125387a7269eSachartre 125487a7269eSachartre /* validate the revision */ 125587a7269eSachartre if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) || 125687a7269eSachartre (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) { 125787a7269eSachartre PR0("invalid devid found at block %lu (bad revision)", blk); 125887a7269eSachartre status = EINVAL; 125987a7269eSachartre goto done; 126087a7269eSachartre } 126187a7269eSachartre 126287a7269eSachartre /* compute checksum */ 126387a7269eSachartre chksum = vd_dkdevid2cksum(dkdevid); 126487a7269eSachartre 126587a7269eSachartre /* compare the checksums */ 126687a7269eSachartre if (DKD_GETCHKSUM(dkdevid) != chksum) { 126787a7269eSachartre PR0("invalid devid found at block %lu (bad checksum)", blk); 126887a7269eSachartre status = EINVAL; 126987a7269eSachartre goto done; 127087a7269eSachartre } 127187a7269eSachartre 127287a7269eSachartre /* validate the device id */ 127387a7269eSachartre if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) { 127487a7269eSachartre PR0("invalid devid found at block %lu", blk); 127587a7269eSachartre status = EINVAL; 127687a7269eSachartre goto done; 127787a7269eSachartre } 127887a7269eSachartre 127987a7269eSachartre PR1("devid read at block %lu", blk); 128087a7269eSachartre 128187a7269eSachartre sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid); 128287a7269eSachartre *devid = kmem_alloc(sz, KM_SLEEP); 128387a7269eSachartre bcopy(&dkdevid->dkd_devid, *devid, sz); 128487a7269eSachartre 128587a7269eSachartre done: 128687a7269eSachartre kmem_free(dkdevid, DEV_BSIZE); 128787a7269eSachartre return (status); 128887a7269eSachartre 128987a7269eSachartre } 129087a7269eSachartre 129187a7269eSachartre /* 129287a7269eSachartre * Function: 12931aff8f07SAlexandre Chartre * vd_dskimg_write_devid 129487a7269eSachartre * 129587a7269eSachartre * Description: 129687a7269eSachartre * Write a device id into disk image. 129787a7269eSachartre * 129887a7269eSachartre * Parameters: 129987a7269eSachartre * vd - disk on which the operation is performed. 130087a7269eSachartre * devid - the device ID to store. 130187a7269eSachartre * 130287a7269eSachartre * Return Code: 130387a7269eSachartre * 0 - success 130487a7269eSachartre * EIO - I/O error while trying to access the disk image 130587a7269eSachartre * ENOSPC - disk has no space to store a device id 130687a7269eSachartre */ 130787a7269eSachartre static int 13081aff8f07SAlexandre Chartre vd_dskimg_write_devid(vd_t *vd, ddi_devid_t devid) 130987a7269eSachartre { 131087a7269eSachartre struct dk_devid *dkdevid; 131187a7269eSachartre uint_t chksum; 131287a7269eSachartre size_t blk; 131387a7269eSachartre int status; 131487a7269eSachartre 1315*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vd->vdisk_bsize == DEV_BSIZE); 1316*65908c77Syu, larry liu - Sun Microsystems - Beijing China 1317edcc0754Sachartre if (devid == NULL) { 1318edcc0754Sachartre /* nothing to write */ 1319edcc0754Sachartre return (0); 1320edcc0754Sachartre } 1321edcc0754Sachartre 13221aff8f07SAlexandre Chartre if ((status = vd_dskimg_get_devid_block(vd, &blk)) != 0) 132387a7269eSachartre return (status); 132487a7269eSachartre 132587a7269eSachartre dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 132687a7269eSachartre 132787a7269eSachartre /* set revision */ 132887a7269eSachartre dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB; 132987a7269eSachartre dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB; 133087a7269eSachartre 133187a7269eSachartre /* copy devid */ 133287a7269eSachartre bcopy(devid, &dkdevid->dkd_devid, ddi_devid_sizeof(devid)); 133387a7269eSachartre 133487a7269eSachartre /* compute checksum */ 133587a7269eSachartre chksum = vd_dkdevid2cksum(dkdevid); 133687a7269eSachartre 133787a7269eSachartre /* set checksum */ 133887a7269eSachartre DKD_FORMCHKSUM(chksum, dkdevid); 133987a7269eSachartre 134087a7269eSachartre /* store the devid */ 13411aff8f07SAlexandre Chartre if ((status = vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, 134287a7269eSachartre (caddr_t)dkdevid, blk, DEV_BSIZE)) < 0) { 134387a7269eSachartre PR0("Error writing devid block at %lu", blk); 134487a7269eSachartre status = EIO; 134587a7269eSachartre } else { 134687a7269eSachartre PR1("devid written at block %lu", blk); 134787a7269eSachartre status = 0; 134887a7269eSachartre } 134987a7269eSachartre 135087a7269eSachartre kmem_free(dkdevid, DEV_BSIZE); 135187a7269eSachartre return (status); 135287a7269eSachartre } 135387a7269eSachartre 135487a7269eSachartre /* 135587a7269eSachartre * Function: 135617cadca8Slm66018 * vd_do_scsi_rdwr 135787a7269eSachartre * 135887a7269eSachartre * Description: 135987a7269eSachartre * Read or write to a SCSI disk using an absolute disk offset. 136087a7269eSachartre * 136187a7269eSachartre * Parameters: 136287a7269eSachartre * vd - disk on which the operation is performed. 136387a7269eSachartre * operation - operation to execute: read (VD_OP_BREAD) or 136487a7269eSachartre * write (VD_OP_BWRITE). 136587a7269eSachartre * data - buffer where data are read to or written from. 136687a7269eSachartre * blk - starting block for the operation. 136787a7269eSachartre * len - number of bytes to read or write. 136887a7269eSachartre * 136987a7269eSachartre * Return Code: 137087a7269eSachartre * 0 - success 137187a7269eSachartre * n != 0 - error. 137287a7269eSachartre */ 137387a7269eSachartre static int 137417cadca8Slm66018 vd_do_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len) 137587a7269eSachartre { 137687a7269eSachartre struct uscsi_cmd ucmd; 137787a7269eSachartre union scsi_cdb cdb; 137887a7269eSachartre int nsectors, nblk; 137987a7269eSachartre int max_sectors; 138087a7269eSachartre int status, rval; 138187a7269eSachartre 138287a7269eSachartre ASSERT(!vd->file); 13831aff8f07SAlexandre Chartre ASSERT(!vd->volume); 1384*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vd->vdisk_bsize > 0); 138587a7269eSachartre 138687a7269eSachartre max_sectors = vd->max_xfer_sz; 1387*65908c77Syu, larry liu - Sun Microsystems - Beijing China nblk = (len / vd->vdisk_bsize); 138887a7269eSachartre 1389*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (len % vd->vdisk_bsize != 0) 139087a7269eSachartre return (EINVAL); 139187a7269eSachartre 139287a7269eSachartre /* 139387a7269eSachartre * Build and execute the uscsi ioctl. We build a group0, group1 139487a7269eSachartre * or group4 command as necessary, since some targets 139587a7269eSachartre * do not support group1 commands. 139687a7269eSachartre */ 139787a7269eSachartre while (nblk) { 139887a7269eSachartre 139987a7269eSachartre bzero(&ucmd, sizeof (ucmd)); 140087a7269eSachartre bzero(&cdb, sizeof (cdb)); 140187a7269eSachartre 140287a7269eSachartre nsectors = (max_sectors < nblk) ? max_sectors : nblk; 140387a7269eSachartre 140417cadca8Slm66018 /* 140517cadca8Slm66018 * Some of the optical drives on sun4v machines are ATAPI 140617cadca8Slm66018 * devices which use Group 1 Read/Write commands so we need 140717cadca8Slm66018 * to explicitly check a flag which is set when a domain 140817cadca8Slm66018 * is bound. 140917cadca8Slm66018 */ 141017cadca8Slm66018 if (blk < (2 << 20) && nsectors <= 0xff && !vd->is_atapi_dev) { 141187a7269eSachartre FORMG0ADDR(&cdb, blk); 1412342440ecSPrasad Singamsetty FORMG0COUNT(&cdb, (uchar_t)nsectors); 141387a7269eSachartre ucmd.uscsi_cdblen = CDB_GROUP0; 141487a7269eSachartre } else if (blk > 0xffffffff) { 141587a7269eSachartre FORMG4LONGADDR(&cdb, blk); 141687a7269eSachartre FORMG4COUNT(&cdb, nsectors); 141787a7269eSachartre ucmd.uscsi_cdblen = CDB_GROUP4; 141887a7269eSachartre cdb.scc_cmd |= SCMD_GROUP4; 141987a7269eSachartre } else { 142087a7269eSachartre FORMG1ADDR(&cdb, blk); 142187a7269eSachartre FORMG1COUNT(&cdb, nsectors); 142287a7269eSachartre ucmd.uscsi_cdblen = CDB_GROUP1; 142387a7269eSachartre cdb.scc_cmd |= SCMD_GROUP1; 142487a7269eSachartre } 142587a7269eSachartre ucmd.uscsi_cdb = (caddr_t)&cdb; 142687a7269eSachartre ucmd.uscsi_bufaddr = data; 1427*65908c77Syu, larry liu - Sun Microsystems - Beijing China ucmd.uscsi_buflen = nsectors * vd->backend_bsize; 142887a7269eSachartre ucmd.uscsi_timeout = vd_scsi_rdwr_timeout; 142987a7269eSachartre /* 143087a7269eSachartre * Set flags so that the command is isolated from normal 143187a7269eSachartre * commands and no error message is printed. 143287a7269eSachartre */ 143387a7269eSachartre ucmd.uscsi_flags = USCSI_ISOLATE | USCSI_SILENT; 143487a7269eSachartre 143587a7269eSachartre if (operation == VD_OP_BREAD) { 143687a7269eSachartre cdb.scc_cmd |= SCMD_READ; 143787a7269eSachartre ucmd.uscsi_flags |= USCSI_READ; 143887a7269eSachartre } else { 143987a7269eSachartre cdb.scc_cmd |= SCMD_WRITE; 144087a7269eSachartre } 144187a7269eSachartre 144287a7269eSachartre status = ldi_ioctl(vd->ldi_handle[VD_ENTIRE_DISK_SLICE], 1443047ba61eSachartre USCSICMD, (intptr_t)&ucmd, (vd->open_flags | FKIOCTL), 144487a7269eSachartre kcred, &rval); 144587a7269eSachartre 144687a7269eSachartre if (status == 0) 144787a7269eSachartre status = ucmd.uscsi_status; 144887a7269eSachartre 144987a7269eSachartre if (status != 0) 145087a7269eSachartre break; 145187a7269eSachartre 145287a7269eSachartre /* 145387a7269eSachartre * Check if partial DMA breakup is required. If so, reduce 145487a7269eSachartre * the request size by half and retry the last request. 145587a7269eSachartre */ 145687a7269eSachartre if (ucmd.uscsi_resid == ucmd.uscsi_buflen) { 145787a7269eSachartre max_sectors >>= 1; 145887a7269eSachartre if (max_sectors <= 0) { 145987a7269eSachartre status = EIO; 146087a7269eSachartre break; 146187a7269eSachartre } 146287a7269eSachartre continue; 146387a7269eSachartre } 146487a7269eSachartre 146587a7269eSachartre if (ucmd.uscsi_resid != 0) { 146687a7269eSachartre status = EIO; 146787a7269eSachartre break; 146887a7269eSachartre } 146987a7269eSachartre 147087a7269eSachartre blk += nsectors; 147187a7269eSachartre nblk -= nsectors; 1472*65908c77Syu, larry liu - Sun Microsystems - Beijing China data += nsectors * vd->vdisk_bsize; 147387a7269eSachartre } 147487a7269eSachartre 147587a7269eSachartre return (status); 147687a7269eSachartre } 147787a7269eSachartre 1478205eeb1aSlm66018 /* 147917cadca8Slm66018 * Function: 148017cadca8Slm66018 * vd_scsi_rdwr 148117cadca8Slm66018 * 148217cadca8Slm66018 * Description: 148317cadca8Slm66018 * Wrapper function to read or write to a SCSI disk using an absolute 148417cadca8Slm66018 * disk offset. It checks the blocksize of the underlying device and, 148517cadca8Slm66018 * if necessary, adjusts the buffers accordingly before calling 148617cadca8Slm66018 * vd_do_scsi_rdwr() to do the actual read or write. 148717cadca8Slm66018 * 148817cadca8Slm66018 * Parameters: 148917cadca8Slm66018 * vd - disk on which the operation is performed. 149017cadca8Slm66018 * operation - operation to execute: read (VD_OP_BREAD) or 149117cadca8Slm66018 * write (VD_OP_BWRITE). 149217cadca8Slm66018 * data - buffer where data are read to or written from. 149317cadca8Slm66018 * blk - starting block for the operation. 149417cadca8Slm66018 * len - number of bytes to read or write. 149517cadca8Slm66018 * 149617cadca8Slm66018 * Return Code: 149717cadca8Slm66018 * 0 - success 149817cadca8Slm66018 * n != 0 - error. 149917cadca8Slm66018 */ 150017cadca8Slm66018 static int 150117cadca8Slm66018 vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen) 150217cadca8Slm66018 { 150317cadca8Slm66018 int rv; 150417cadca8Slm66018 150517cadca8Slm66018 size_t pblk; /* physical device block number of data on device */ 150617cadca8Slm66018 size_t delta; /* relative offset between pblk and vblk */ 150717cadca8Slm66018 size_t pnblk; /* number of physical blocks to be read from device */ 150817cadca8Slm66018 size_t plen; /* length of data to be read from physical device */ 150917cadca8Slm66018 char *buf; /* buffer area to fit physical device's block size */ 151017cadca8Slm66018 1511*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (vd->backend_bsize == 0) { 15122f5224aeSachartre /* 15132f5224aeSachartre * The block size was not available during the attach, 15142f5224aeSachartre * try to update it now. 15152f5224aeSachartre */ 1516de3a5331SRamesh Chitrothu if (vd_backend_check_size(vd) != 0) 15172f5224aeSachartre return (EIO); 15182f5224aeSachartre } 15192f5224aeSachartre 152017cadca8Slm66018 /* 152117cadca8Slm66018 * If the vdisk block size and the block size of the underlying device 152217cadca8Slm66018 * match we can skip straight to vd_do_scsi_rdwr(), otherwise we need 152317cadca8Slm66018 * to create a buffer large enough to handle the device's block size 152417cadca8Slm66018 * and adjust the block to be read from and the amount of data to 152517cadca8Slm66018 * read to correspond with the device's block size. 152617cadca8Slm66018 */ 1527*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (vd->vdisk_bsize == vd->backend_bsize) 152817cadca8Slm66018 return (vd_do_scsi_rdwr(vd, operation, data, vblk, vlen)); 152917cadca8Slm66018 1530*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (vd->vdisk_bsize > vd->backend_bsize) 153117cadca8Slm66018 return (EINVAL); 153217cadca8Slm66018 153317cadca8Slm66018 /* 153417cadca8Slm66018 * Writing of physical block sizes larger than the virtual block size 153517cadca8Slm66018 * is not supported. This would be added if/when support for guests 153617cadca8Slm66018 * writing to DVDs is implemented. 153717cadca8Slm66018 */ 153817cadca8Slm66018 if (operation == VD_OP_BWRITE) 153917cadca8Slm66018 return (ENOTSUP); 154017cadca8Slm66018 154117cadca8Slm66018 /* BEGIN CSTYLED */ 154217cadca8Slm66018 /* 154317cadca8Slm66018 * Below is a diagram showing the relationship between the physical 154417cadca8Slm66018 * and virtual blocks. If the virtual blocks marked by 'X' below are 154517cadca8Slm66018 * requested, then the physical blocks denoted by 'Y' are read. 154617cadca8Slm66018 * 154717cadca8Slm66018 * vblk 154817cadca8Slm66018 * | vlen 154917cadca8Slm66018 * |<--------------->| 155017cadca8Slm66018 * v v 155117cadca8Slm66018 * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- virtual disk: 155217cadca8Slm66018 * | | | |XX|XX|XX|XX|XX|XX| | | | | | } block size is 1553*65908c77Syu, larry liu - Sun Microsystems - Beijing China * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- vd->vdisk_bsize 155417cadca8Slm66018 * : : : : 155517cadca8Slm66018 * >:==:< delta : : 155617cadca8Slm66018 * : : : : 155717cadca8Slm66018 * --+-----+-----+-----+-----+-----+-----+-----+-- physical disk: 155817cadca8Slm66018 * | |YY:YY|YYYYY|YYYYY|YY:YY| | | } block size is 1559*65908c77Syu, larry liu - Sun Microsystems - Beijing China * --+-----+-----+-----+-----+-----+-----+-----+-- vd->backend_bsize 156017cadca8Slm66018 * ^ ^ 156117cadca8Slm66018 * |<--------------------->| 156217cadca8Slm66018 * | plen 156317cadca8Slm66018 * pblk 156417cadca8Slm66018 */ 156517cadca8Slm66018 /* END CSTYLED */ 1566*65908c77Syu, larry liu - Sun Microsystems - Beijing China pblk = (vblk * vd->vdisk_bsize) / vd->backend_bsize; 1567*65908c77Syu, larry liu - Sun Microsystems - Beijing China delta = (vblk * vd->vdisk_bsize) - (pblk * vd->backend_bsize); 1568*65908c77Syu, larry liu - Sun Microsystems - Beijing China pnblk = ((delta + vlen - 1) / vd->backend_bsize) + 1; 1569*65908c77Syu, larry liu - Sun Microsystems - Beijing China plen = pnblk * vd->backend_bsize; 157017cadca8Slm66018 157117cadca8Slm66018 PR2("vblk %lx:pblk %lx: vlen %ld:plen %ld", vblk, pblk, vlen, plen); 157217cadca8Slm66018 157317cadca8Slm66018 buf = kmem_zalloc(sizeof (caddr_t) * plen, KM_SLEEP); 157417cadca8Slm66018 rv = vd_do_scsi_rdwr(vd, operation, (caddr_t)buf, pblk, plen); 157517cadca8Slm66018 bcopy(buf + delta, data, vlen); 157617cadca8Slm66018 157717cadca8Slm66018 kmem_free(buf, sizeof (caddr_t) * plen); 157817cadca8Slm66018 157917cadca8Slm66018 return (rv); 158017cadca8Slm66018 } 158117cadca8Slm66018 158217cadca8Slm66018 /* 1583bae9e67eSachartre * Function: 1584bae9e67eSachartre * vd_slice_flabel_read 1585bae9e67eSachartre * 1586bae9e67eSachartre * Description: 1587bae9e67eSachartre * This function simulates a read operation from the fake label of 1588bae9e67eSachartre * a single-slice disk. 1589bae9e67eSachartre * 1590bae9e67eSachartre * Parameters: 1591bae9e67eSachartre * vd - single-slice disk to read from 1592bae9e67eSachartre * data - buffer where data should be read to 1593bae9e67eSachartre * offset - offset in byte where the read should start 1594bae9e67eSachartre * length - number of bytes to read 1595bae9e67eSachartre * 1596bae9e67eSachartre * Return Code: 1597bae9e67eSachartre * n >= 0 - success, n indicates the number of bytes read 1598bae9e67eSachartre * -1 - error 1599bae9e67eSachartre */ 1600bae9e67eSachartre static ssize_t 1601bae9e67eSachartre vd_slice_flabel_read(vd_t *vd, caddr_t data, size_t offset, size_t length) 1602bae9e67eSachartre { 1603bae9e67eSachartre size_t n = 0; 1604*65908c77Syu, larry liu - Sun Microsystems - Beijing China uint_t limit = vd->flabel_limit * vd->vdisk_bsize; 1605bae9e67eSachartre 1606bae9e67eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 1607bae9e67eSachartre ASSERT(vd->flabel != NULL); 1608bae9e67eSachartre 1609bae9e67eSachartre /* if offset is past the fake label limit there's nothing to read */ 1610bae9e67eSachartre if (offset >= limit) 1611bae9e67eSachartre return (0); 1612bae9e67eSachartre 1613bae9e67eSachartre /* data with offset 0 to flabel_size are read from flabel */ 1614bae9e67eSachartre if (offset < vd->flabel_size) { 1615bae9e67eSachartre 1616bae9e67eSachartre if (offset + length <= vd->flabel_size) { 1617bae9e67eSachartre bcopy(vd->flabel + offset, data, length); 1618bae9e67eSachartre return (length); 1619bae9e67eSachartre } 1620bae9e67eSachartre 1621bae9e67eSachartre n = vd->flabel_size - offset; 1622bae9e67eSachartre bcopy(vd->flabel + offset, data, n); 1623bae9e67eSachartre data += n; 1624bae9e67eSachartre } 1625bae9e67eSachartre 1626bae9e67eSachartre /* data with offset from flabel_size to flabel_limit are all zeros */ 1627bae9e67eSachartre if (offset + length <= limit) { 1628bae9e67eSachartre bzero(data, length - n); 1629bae9e67eSachartre return (length); 1630bae9e67eSachartre } 1631bae9e67eSachartre 1632bae9e67eSachartre bzero(data, limit - offset - n); 1633bae9e67eSachartre return (limit - offset); 1634bae9e67eSachartre } 1635bae9e67eSachartre 1636bae9e67eSachartre /* 1637bae9e67eSachartre * Function: 1638bae9e67eSachartre * vd_slice_flabel_write 1639bae9e67eSachartre * 1640bae9e67eSachartre * Description: 1641bae9e67eSachartre * This function simulates a write operation to the fake label of 1642bae9e67eSachartre * a single-slice disk. Write operations are actually faked and return 1643bae9e67eSachartre * success although the label is never changed. This is mostly to 1644bae9e67eSachartre * simulate a successful label update. 1645bae9e67eSachartre * 1646bae9e67eSachartre * Parameters: 1647bae9e67eSachartre * vd - single-slice disk to write to 1648bae9e67eSachartre * data - buffer where data should be written from 1649bae9e67eSachartre * offset - offset in byte where the write should start 1650bae9e67eSachartre * length - number of bytes to written 1651bae9e67eSachartre * 1652bae9e67eSachartre * Return Code: 1653bae9e67eSachartre * n >= 0 - success, n indicates the number of bytes written 1654bae9e67eSachartre * -1 - error 1655bae9e67eSachartre */ 1656bae9e67eSachartre static ssize_t 1657bae9e67eSachartre vd_slice_flabel_write(vd_t *vd, caddr_t data, size_t offset, size_t length) 1658bae9e67eSachartre { 1659*65908c77Syu, larry liu - Sun Microsystems - Beijing China uint_t limit = vd->flabel_limit * vd->vdisk_bsize; 1660bae9e67eSachartre struct dk_label *label; 1661bae9e67eSachartre struct dk_geom geom; 1662342440ecSPrasad Singamsetty struct extvtoc vtoc; 1663bae9e67eSachartre 1664bae9e67eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 1665bae9e67eSachartre ASSERT(vd->flabel != NULL); 1666bae9e67eSachartre 1667bae9e67eSachartre if (offset >= limit) 1668bae9e67eSachartre return (0); 1669bae9e67eSachartre 1670bae9e67eSachartre /* 1671bae9e67eSachartre * If this is a request to overwrite the VTOC disk label, check that 1672bae9e67eSachartre * the new label is similar to the previous one and return that the 1673bae9e67eSachartre * write was successful, but note that nothing is actually overwritten. 1674bae9e67eSachartre */ 1675bae9e67eSachartre if (vd->vdisk_label == VD_DISK_LABEL_VTOC && 1676*65908c77Syu, larry liu - Sun Microsystems - Beijing China offset == 0 && length == vd->vdisk_bsize) { 1677342440ecSPrasad Singamsetty label = (void *)data; 1678bae9e67eSachartre 1679bae9e67eSachartre /* check that this is a valid label */ 1680bae9e67eSachartre if (label->dkl_magic != DKL_MAGIC || 1681bae9e67eSachartre label->dkl_cksum != vd_lbl2cksum(label)) 1682bae9e67eSachartre return (-1); 1683bae9e67eSachartre 1684bae9e67eSachartre /* check the vtoc and geometry */ 1685bae9e67eSachartre vd_label_to_vtocgeom(label, &vtoc, &geom); 1686bae9e67eSachartre if (vd_slice_geom_isvalid(vd, &geom) && 1687bae9e67eSachartre vd_slice_vtoc_isvalid(vd, &vtoc)) 1688bae9e67eSachartre return (length); 1689bae9e67eSachartre } 1690bae9e67eSachartre 1691bae9e67eSachartre /* fail any other write */ 1692bae9e67eSachartre return (-1); 1693bae9e67eSachartre } 1694bae9e67eSachartre 1695bae9e67eSachartre /* 1696bae9e67eSachartre * Function: 1697bae9e67eSachartre * vd_slice_fake_rdwr 1698bae9e67eSachartre * 1699bae9e67eSachartre * Description: 1700bae9e67eSachartre * This function simulates a raw read or write operation to a single-slice 1701bae9e67eSachartre * disk. It only handles the faked part of the operation i.e. I/Os to 1702bae9e67eSachartre * blocks which have no mapping with the vdisk backend (I/Os to the 1703bae9e67eSachartre * beginning and to the end of the vdisk). 1704bae9e67eSachartre * 1705bae9e67eSachartre * The function returns 0 is the operation is completed and it has been 1706bae9e67eSachartre * entirely handled as a fake read or write. In that case, lengthp points 1707bae9e67eSachartre * to the number of bytes not read or written. Values returned by datap 1708bae9e67eSachartre * and blkp are undefined. 1709bae9e67eSachartre * 1710bae9e67eSachartre * If the fake operation has succeeded but the read or write is not 1711bae9e67eSachartre * complete (i.e. the read/write operation extends beyond the blocks 1712bae9e67eSachartre * we fake) then the function returns EAGAIN and datap, blkp and lengthp 1713bae9e67eSachartre * pointers points to the parameters for completing the operation. 1714bae9e67eSachartre * 1715bae9e67eSachartre * In case of an error, for example if the slice is empty or parameters 1716bae9e67eSachartre * are invalid, then the function returns a non-zero value different 1717bae9e67eSachartre * from EAGAIN. In that case, the returned values of datap, blkp and 1718bae9e67eSachartre * lengthp are undefined. 1719bae9e67eSachartre * 1720bae9e67eSachartre * Parameters: 1721bae9e67eSachartre * vd - single-slice disk on which the operation is performed 1722bae9e67eSachartre * slice - slice on which the operation is performed, 1723bae9e67eSachartre * VD_SLICE_NONE indicates that the operation 1724bae9e67eSachartre * is done using an absolute disk offset. 1725bae9e67eSachartre * operation - operation to execute: read (VD_OP_BREAD) or 1726bae9e67eSachartre * write (VD_OP_BWRITE). 1727bae9e67eSachartre * datap - pointer to the buffer where data are read to 1728bae9e67eSachartre * or written from. Return the pointer where remaining 1729bae9e67eSachartre * data have to be read to or written from. 1730bae9e67eSachartre * blkp - pointer to the starting block for the operation. 1731bae9e67eSachartre * Return the starting block relative to the vdisk 1732bae9e67eSachartre * backend for the remaining operation. 1733bae9e67eSachartre * lengthp - pointer to the number of bytes to read or write. 1734*65908c77Syu, larry liu - Sun Microsystems - Beijing China * This should be a multiple of vdisk_bsize. Return the 1735bae9e67eSachartre * remaining number of bytes to read or write. 1736bae9e67eSachartre * 1737bae9e67eSachartre * Return Code: 1738bae9e67eSachartre * 0 - read/write operation is completed 1739bae9e67eSachartre * EAGAIN - read/write operation is not completed 1740bae9e67eSachartre * other values - error 1741bae9e67eSachartre */ 1742bae9e67eSachartre static int 1743bae9e67eSachartre vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap, 1744bae9e67eSachartre size_t *blkp, size_t *lengthp) 1745bae9e67eSachartre { 1746bae9e67eSachartre struct dk_label *label; 1747bae9e67eSachartre caddr_t data; 1748bae9e67eSachartre size_t blk, length, csize; 1749bae9e67eSachartre size_t ablk, asize, aoff, alen; 1750bae9e67eSachartre ssize_t n; 1751bae9e67eSachartre int sec, status; 1752*65908c77Syu, larry liu - Sun Microsystems - Beijing China size_t bsize = vd->vdisk_bsize; 1753bae9e67eSachartre 1754bae9e67eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 1755bae9e67eSachartre ASSERT(slice != 0); 1756bae9e67eSachartre 1757bae9e67eSachartre data = *datap; 1758bae9e67eSachartre blk = *blkp; 1759bae9e67eSachartre length = *lengthp; 1760bae9e67eSachartre 1761bae9e67eSachartre /* 1762bae9e67eSachartre * If this is not a raw I/O or an I/O from a full disk slice then 1763bae9e67eSachartre * this is an I/O to/from an empty slice. 1764bae9e67eSachartre */ 1765bae9e67eSachartre if (slice != VD_SLICE_NONE && 1766bae9e67eSachartre (slice != VD_ENTIRE_DISK_SLICE || 1767bae9e67eSachartre vd->vdisk_label != VD_DISK_LABEL_VTOC) && 1768bae9e67eSachartre (slice != VD_EFI_WD_SLICE || 1769bae9e67eSachartre vd->vdisk_label != VD_DISK_LABEL_EFI)) { 1770bae9e67eSachartre return (EIO); 1771bae9e67eSachartre } 1772bae9e67eSachartre 1773*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (length % bsize != 0) 1774bae9e67eSachartre return (EINVAL); 1775bae9e67eSachartre 1776bae9e67eSachartre /* handle any I/O with the fake label */ 1777bae9e67eSachartre if (operation == VD_OP_BWRITE) 1778*65908c77Syu, larry liu - Sun Microsystems - Beijing China n = vd_slice_flabel_write(vd, data, blk * bsize, length); 1779bae9e67eSachartre else 1780*65908c77Syu, larry liu - Sun Microsystems - Beijing China n = vd_slice_flabel_read(vd, data, blk * bsize, length); 1781bae9e67eSachartre 1782bae9e67eSachartre if (n == -1) 1783bae9e67eSachartre return (EINVAL); 1784bae9e67eSachartre 1785*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(n % bsize == 0); 1786bae9e67eSachartre 1787bae9e67eSachartre /* adjust I/O arguments */ 1788bae9e67eSachartre data += n; 1789*65908c77Syu, larry liu - Sun Microsystems - Beijing China blk += n / bsize; 1790bae9e67eSachartre length -= n; 1791bae9e67eSachartre 1792bae9e67eSachartre /* check if there's something else to process */ 1793bae9e67eSachartre if (length == 0) { 1794bae9e67eSachartre status = 0; 1795bae9e67eSachartre goto done; 1796bae9e67eSachartre } 1797bae9e67eSachartre 1798bae9e67eSachartre if (vd->vdisk_label == VD_DISK_LABEL_VTOC && 1799bae9e67eSachartre slice == VD_ENTIRE_DISK_SLICE) { 1800bae9e67eSachartre status = EAGAIN; 1801bae9e67eSachartre goto done; 1802bae9e67eSachartre } 1803bae9e67eSachartre 1804bae9e67eSachartre if (vd->vdisk_label == VD_DISK_LABEL_EFI) { 1805*65908c77Syu, larry liu - Sun Microsystems - Beijing China asize = EFI_MIN_RESV_SIZE + (EFI_MIN_ARRAY_SIZE / bsize) + 1; 1806bae9e67eSachartre ablk = vd->vdisk_size - asize; 1807bae9e67eSachartre } else { 1808bae9e67eSachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 1809bae9e67eSachartre ASSERT(vd->dk_geom.dkg_apc == 0); 1810bae9e67eSachartre 1811bae9e67eSachartre csize = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect; 1812bae9e67eSachartre ablk = vd->dk_geom.dkg_ncyl * csize; 1813bae9e67eSachartre asize = vd->dk_geom.dkg_acyl * csize; 1814bae9e67eSachartre } 1815bae9e67eSachartre 1816*65908c77Syu, larry liu - Sun Microsystems - Beijing China alen = length / bsize; 1817bae9e67eSachartre aoff = blk; 1818bae9e67eSachartre 1819bae9e67eSachartre /* if we have reached the last block then the I/O is completed */ 1820bae9e67eSachartre if (aoff == ablk + asize) { 1821bae9e67eSachartre status = 0; 1822bae9e67eSachartre goto done; 1823bae9e67eSachartre } 1824bae9e67eSachartre 1825bae9e67eSachartre /* if we are past the last block then return an error */ 1826bae9e67eSachartre if (aoff > ablk + asize) 1827bae9e67eSachartre return (EIO); 1828bae9e67eSachartre 1829bae9e67eSachartre /* check if there is any I/O to end of the disk */ 1830bae9e67eSachartre if (aoff + alen < ablk) { 1831bae9e67eSachartre status = EAGAIN; 1832bae9e67eSachartre goto done; 1833bae9e67eSachartre } 1834bae9e67eSachartre 1835bae9e67eSachartre /* we don't allow any write to the end of the disk */ 1836bae9e67eSachartre if (operation == VD_OP_BWRITE) 1837bae9e67eSachartre return (EIO); 1838bae9e67eSachartre 1839bae9e67eSachartre if (aoff < ablk) { 1840bae9e67eSachartre alen -= (ablk - aoff); 1841bae9e67eSachartre aoff = ablk; 1842bae9e67eSachartre } 1843bae9e67eSachartre 1844bae9e67eSachartre if (aoff + alen > ablk + asize) { 1845bae9e67eSachartre alen = ablk + asize - aoff; 1846bae9e67eSachartre } 1847bae9e67eSachartre 1848*65908c77Syu, larry liu - Sun Microsystems - Beijing China alen *= bsize; 1849bae9e67eSachartre 1850bae9e67eSachartre if (operation == VD_OP_BREAD) { 1851*65908c77Syu, larry liu - Sun Microsystems - Beijing China bzero(data + (aoff - blk) * bsize, alen); 1852bae9e67eSachartre 1853bae9e67eSachartre if (vd->vdisk_label == VD_DISK_LABEL_VTOC) { 1854bae9e67eSachartre /* check if we read backup labels */ 1855bae9e67eSachartre label = VD_LABEL_VTOC(vd); 1856bae9e67eSachartre ablk += (label->dkl_acyl - 1) * csize + 1857bae9e67eSachartre (label->dkl_nhead - 1) * label->dkl_nsect; 1858bae9e67eSachartre 1859bae9e67eSachartre for (sec = 1; (sec < 5 * 2 + 1); sec += 2) { 1860bae9e67eSachartre 1861bae9e67eSachartre if (ablk + sec >= blk && 1862*65908c77Syu, larry liu - Sun Microsystems - Beijing China ablk + sec < blk + (length / bsize)) { 1863bae9e67eSachartre bcopy(label, data + 1864*65908c77Syu, larry liu - Sun Microsystems - Beijing China (ablk + sec - blk) * bsize, 1865bae9e67eSachartre sizeof (struct dk_label)); 1866bae9e67eSachartre } 1867bae9e67eSachartre } 1868bae9e67eSachartre } 1869bae9e67eSachartre } 1870bae9e67eSachartre 1871bae9e67eSachartre length -= alen; 1872bae9e67eSachartre 1873bae9e67eSachartre status = (length == 0)? 0: EAGAIN; 1874bae9e67eSachartre 1875bae9e67eSachartre done: 1876bae9e67eSachartre ASSERT(length == 0 || blk >= vd->flabel_limit); 1877bae9e67eSachartre 1878bae9e67eSachartre /* 1879bae9e67eSachartre * Return the parameters for the remaining I/O. The starting block is 1880bae9e67eSachartre * adjusted so that it is relative to the vdisk backend. 1881bae9e67eSachartre */ 1882bae9e67eSachartre *datap = data; 1883bae9e67eSachartre *blkp = blk - vd->flabel_limit; 1884bae9e67eSachartre *lengthp = length; 1885bae9e67eSachartre 1886bae9e67eSachartre return (status); 1887bae9e67eSachartre } 1888bae9e67eSachartre 188983990c4aSAlexandre Chartre static int 189083990c4aSAlexandre Chartre vd_flush_write(vd_t *vd) 189183990c4aSAlexandre Chartre { 189283990c4aSAlexandre Chartre int status, rval; 189383990c4aSAlexandre Chartre 189483990c4aSAlexandre Chartre if (vd->file) { 189583990c4aSAlexandre Chartre status = VOP_FSYNC(vd->file_vnode, FSYNC, kcred, NULL); 189683990c4aSAlexandre Chartre } else { 189783990c4aSAlexandre Chartre status = ldi_ioctl(vd->ldi_handle[0], DKIOCFLUSHWRITECACHE, 189883990c4aSAlexandre Chartre NULL, vd->open_flags | FKIOCTL, kcred, &rval); 189983990c4aSAlexandre Chartre } 190083990c4aSAlexandre Chartre 190183990c4aSAlexandre Chartre return (status); 190283990c4aSAlexandre Chartre } 190383990c4aSAlexandre Chartre 190483990c4aSAlexandre Chartre static void 190583990c4aSAlexandre Chartre vd_bio_task(void *arg) 190683990c4aSAlexandre Chartre { 190783990c4aSAlexandre Chartre struct buf *buf = (struct buf *)arg; 190883990c4aSAlexandre Chartre vd_task_t *task = (vd_task_t *)buf->b_private; 190983990c4aSAlexandre Chartre vd_t *vd = task->vd; 191083990c4aSAlexandre Chartre ssize_t resid; 191183990c4aSAlexandre Chartre int status; 191283990c4aSAlexandre Chartre 1913*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vd->vdisk_bsize == DEV_BSIZE); 1914*65908c77Syu, larry liu - Sun Microsystems - Beijing China 191583990c4aSAlexandre Chartre if (vd->zvol) { 191683990c4aSAlexandre Chartre 191783990c4aSAlexandre Chartre status = ldi_strategy(vd->ldi_handle[0], buf); 191883990c4aSAlexandre Chartre 191983990c4aSAlexandre Chartre } else { 192083990c4aSAlexandre Chartre 192183990c4aSAlexandre Chartre ASSERT(vd->file); 192283990c4aSAlexandre Chartre 192383990c4aSAlexandre Chartre status = vn_rdwr((buf->b_flags & B_READ)? UIO_READ : UIO_WRITE, 192483990c4aSAlexandre Chartre vd->file_vnode, buf->b_un.b_addr, buf->b_bcount, 192583990c4aSAlexandre Chartre buf->b_lblkno * DEV_BSIZE, UIO_SYSSPACE, 0, 192683990c4aSAlexandre Chartre RLIM64_INFINITY, kcred, &resid); 192783990c4aSAlexandre Chartre 192883990c4aSAlexandre Chartre if (status == 0) { 192983990c4aSAlexandre Chartre buf->b_resid = resid; 193083990c4aSAlexandre Chartre biodone(buf); 193183990c4aSAlexandre Chartre return; 193283990c4aSAlexandre Chartre } 193383990c4aSAlexandre Chartre } 193483990c4aSAlexandre Chartre 193583990c4aSAlexandre Chartre if (status != 0) { 193683990c4aSAlexandre Chartre bioerror(buf, status); 193783990c4aSAlexandre Chartre biodone(buf); 193883990c4aSAlexandre Chartre } 193983990c4aSAlexandre Chartre } 194083990c4aSAlexandre Chartre 1941bae9e67eSachartre /* 19421aff8f07SAlexandre Chartre * We define our own biodone function so that buffers used for 19431aff8f07SAlexandre Chartre * asynchronous writes are not released when biodone() is called. 19441aff8f07SAlexandre Chartre */ 19451aff8f07SAlexandre Chartre static int 19461aff8f07SAlexandre Chartre vd_biodone(struct buf *bp) 19471aff8f07SAlexandre Chartre { 19481aff8f07SAlexandre Chartre ASSERT((bp->b_flags & B_DONE) == 0); 19491aff8f07SAlexandre Chartre ASSERT(SEMA_HELD(&bp->b_sem)); 19501aff8f07SAlexandre Chartre 19511aff8f07SAlexandre Chartre bp->b_flags |= B_DONE; 19521aff8f07SAlexandre Chartre sema_v(&bp->b_io); 19531aff8f07SAlexandre Chartre 19541aff8f07SAlexandre Chartre return (0); 19551aff8f07SAlexandre Chartre } 19561aff8f07SAlexandre Chartre 19571aff8f07SAlexandre Chartre /* 1958205eeb1aSlm66018 * Return Values 1959205eeb1aSlm66018 * EINPROGRESS - operation was successfully started 1960205eeb1aSlm66018 * EIO - encountered LDC (aka. task error) 1961205eeb1aSlm66018 * 0 - operation completed successfully 1962205eeb1aSlm66018 * 1963205eeb1aSlm66018 * Side Effect 1964205eeb1aSlm66018 * sets request->status = <disk operation status> 1965205eeb1aSlm66018 */ 19661ae08745Sheppo static int 1967d10e4ef2Snarayan vd_start_bio(vd_task_t *task) 19681ae08745Sheppo { 19694bac2208Snarayan int rv, status = 0; 1970d10e4ef2Snarayan vd_t *vd = task->vd; 1971d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 1972d10e4ef2Snarayan struct buf *buf = &task->buf; 19734bac2208Snarayan uint8_t mtype; 19743c96341aSnarayan int slice; 1975047ba61eSachartre char *bufaddr = 0; 1976047ba61eSachartre size_t buflen; 1977bae9e67eSachartre size_t offset, length, nbytes; 1978d10e4ef2Snarayan 1979d10e4ef2Snarayan ASSERT(vd != NULL); 1980d10e4ef2Snarayan ASSERT(request != NULL); 19813c96341aSnarayan 19823c96341aSnarayan slice = request->slice; 19833c96341aSnarayan 198487a7269eSachartre ASSERT(slice == VD_SLICE_NONE || slice < vd->nslices); 1985d10e4ef2Snarayan ASSERT((request->operation == VD_OP_BREAD) || 1986d10e4ef2Snarayan (request->operation == VD_OP_BWRITE)); 1987d10e4ef2Snarayan 1988205eeb1aSlm66018 if (request->nbytes == 0) { 1989205eeb1aSlm66018 /* no service for trivial requests */ 1990205eeb1aSlm66018 request->status = EINVAL; 1991205eeb1aSlm66018 return (0); 1992205eeb1aSlm66018 } 19931ae08745Sheppo 1994d10e4ef2Snarayan PR1("%s %lu bytes at block %lu", 1995d10e4ef2Snarayan (request->operation == VD_OP_BREAD) ? "Read" : "Write", 1996d10e4ef2Snarayan request->nbytes, request->addr); 19971ae08745Sheppo 1998047ba61eSachartre /* 1999047ba61eSachartre * We have to check the open flags because the functions processing 2000047ba61eSachartre * the read/write request will not do it. 2001047ba61eSachartre */ 2002047ba61eSachartre if (request->operation == VD_OP_BWRITE && !(vd->open_flags & FWRITE)) { 2003047ba61eSachartre PR0("write fails because backend is opened read-only"); 2004047ba61eSachartre request->nbytes = 0; 2005047ba61eSachartre request->status = EROFS; 2006047ba61eSachartre return (0); 2007047ba61eSachartre } 2008d10e4ef2Snarayan 20094bac2208Snarayan mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP; 20104bac2208Snarayan 20114bac2208Snarayan /* Map memory exported by client */ 20124bac2208Snarayan status = ldc_mem_map(task->mhdl, request->cookie, request->ncookies, 20134bac2208Snarayan mtype, (request->operation == VD_OP_BREAD) ? LDC_MEM_W : LDC_MEM_R, 2014047ba61eSachartre &bufaddr, NULL); 20154bac2208Snarayan if (status != 0) { 20163af08d82Slm66018 PR0("ldc_mem_map() returned err %d ", status); 2017205eeb1aSlm66018 return (EIO); 2018d10e4ef2Snarayan } 2019d10e4ef2Snarayan 2020bae9e67eSachartre /* 2021bae9e67eSachartre * The buffer size has to be 8-byte aligned, so the client should have 2022bae9e67eSachartre * sent a buffer which size is roundup to the next 8-byte aligned value. 2023bae9e67eSachartre */ 2024bae9e67eSachartre buflen = P2ROUNDUP(request->nbytes, 8); 2025047ba61eSachartre 2026047ba61eSachartre status = ldc_mem_acquire(task->mhdl, 0, buflen); 20274bac2208Snarayan if (status != 0) { 20284bac2208Snarayan (void) ldc_mem_unmap(task->mhdl); 20293af08d82Slm66018 PR0("ldc_mem_acquire() returned err %d ", status); 2030205eeb1aSlm66018 return (EIO); 20314bac2208Snarayan } 20324bac2208Snarayan 2033bae9e67eSachartre offset = request->addr; 2034bae9e67eSachartre nbytes = request->nbytes; 2035bae9e67eSachartre length = nbytes; 2036bae9e67eSachartre 2037bae9e67eSachartre /* default number of byte returned by the I/O */ 2038bae9e67eSachartre request->nbytes = 0; 2039bae9e67eSachartre 2040bae9e67eSachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 2041bae9e67eSachartre 2042bae9e67eSachartre if (slice != 0) { 2043bae9e67eSachartre /* handle any fake I/O */ 2044bae9e67eSachartre rv = vd_slice_fake_rdwr(vd, slice, request->operation, 2045bae9e67eSachartre &bufaddr, &offset, &length); 2046bae9e67eSachartre 2047bae9e67eSachartre /* record the number of bytes from the fake I/O */ 2048bae9e67eSachartre request->nbytes = nbytes - length; 2049bae9e67eSachartre 2050bae9e67eSachartre if (rv == 0) { 2051bae9e67eSachartre request->status = 0; 2052bae9e67eSachartre goto io_done; 2053bae9e67eSachartre } 2054bae9e67eSachartre 2055bae9e67eSachartre if (rv != EAGAIN) { 20563c96341aSnarayan request->nbytes = 0; 2057205eeb1aSlm66018 request->status = EIO; 2058bae9e67eSachartre goto io_done; 20593c96341aSnarayan } 2060bae9e67eSachartre 2061bae9e67eSachartre /* 2062bae9e67eSachartre * If we return with EAGAIN then this means that there 2063bae9e67eSachartre * are still data to read or write. 2064bae9e67eSachartre */ 2065bae9e67eSachartre ASSERT(length != 0); 2066bae9e67eSachartre 2067bae9e67eSachartre /* 2068bae9e67eSachartre * We need to continue the I/O from the slice backend to 2069bae9e67eSachartre * complete the request. The variables bufaddr, offset 2070bae9e67eSachartre * and length have been adjusted to have the right 2071bae9e67eSachartre * information to do the remaining I/O from the backend. 2072bae9e67eSachartre * The backend is entirely mapped to slice 0 so we just 2073bae9e67eSachartre * have to complete the I/O from that slice. 2074bae9e67eSachartre */ 2075bae9e67eSachartre slice = 0; 2076bae9e67eSachartre } 2077bae9e67eSachartre 207883990c4aSAlexandre Chartre } else if (vd->volume || vd->file) { 20791aff8f07SAlexandre Chartre 20801aff8f07SAlexandre Chartre rv = vd_dskimg_io_params(vd, slice, &offset, &length); 20811aff8f07SAlexandre Chartre if (rv != 0) { 20821aff8f07SAlexandre Chartre request->status = (rv == ENODATA)? 0: EIO; 20831aff8f07SAlexandre Chartre goto io_done; 20841aff8f07SAlexandre Chartre } 20851aff8f07SAlexandre Chartre slice = 0; 20861aff8f07SAlexandre Chartre 208783990c4aSAlexandre Chartre } else if (slice == VD_SLICE_NONE) { 2088bae9e67eSachartre 208987a7269eSachartre /* 209087a7269eSachartre * This is not a disk image so it is a real disk. We 209187a7269eSachartre * assume that the underlying device driver supports 209287a7269eSachartre * USCSICMD ioctls. This is the case of all SCSI devices 209387a7269eSachartre * (sd, ssd...). 209487a7269eSachartre * 209587a7269eSachartre * In the future if we have non-SCSI disks we would need 209687a7269eSachartre * to invoke the appropriate function to do I/O using an 209717cadca8Slm66018 * absolute disk offset (for example using DIOCTL_RWCMD 209887a7269eSachartre * for IDE disks). 209987a7269eSachartre */ 2100bae9e67eSachartre rv = vd_scsi_rdwr(vd, request->operation, bufaddr, offset, 2101bae9e67eSachartre length); 210287a7269eSachartre if (rv != 0) { 2103bae9e67eSachartre request->status = EIO; 2104bae9e67eSachartre } else { 2105bae9e67eSachartre request->nbytes = length; 2106bae9e67eSachartre request->status = 0; 2107bae9e67eSachartre } 2108bae9e67eSachartre goto io_done; 2109bae9e67eSachartre } 2110bae9e67eSachartre 2111bae9e67eSachartre /* Start the block I/O */ 2112047ba61eSachartre bioinit(buf); 2113047ba61eSachartre buf->b_flags = B_BUSY; 2114bae9e67eSachartre buf->b_bcount = length; 2115bae9e67eSachartre buf->b_lblkno = offset; 2116bae9e67eSachartre buf->b_bufsize = buflen; 2117047ba61eSachartre buf->b_edev = vd->dev[slice]; 2118047ba61eSachartre buf->b_un.b_addr = bufaddr; 21191aff8f07SAlexandre Chartre buf->b_iodone = vd_biodone; 21201aff8f07SAlexandre Chartre 212183990c4aSAlexandre Chartre if (vd->file || vd->zvol) { 212283990c4aSAlexandre Chartre /* 212383990c4aSAlexandre Chartre * I/O to a file are dispatched to an I/O queue, so that several 212483990c4aSAlexandre Chartre * I/Os can be processed in parallel. We also do that for ZFS 212583990c4aSAlexandre Chartre * volumes because the ZFS volume strategy() function will only 212683990c4aSAlexandre Chartre * return after the I/O is completed (instead of just starting 212783990c4aSAlexandre Chartre * the I/O). 212883990c4aSAlexandre Chartre */ 212983990c4aSAlexandre Chartre 21301aff8f07SAlexandre Chartre if (request->operation == VD_OP_BREAD) { 21311aff8f07SAlexandre Chartre buf->b_flags |= B_READ; 21321aff8f07SAlexandre Chartre } else { 21331aff8f07SAlexandre Chartre /* 213483990c4aSAlexandre Chartre * For ZFS volumes and files, we do an asynchronous 213583990c4aSAlexandre Chartre * write and we will wait for the completion of the 213683990c4aSAlexandre Chartre * write in vd_complete_bio() by flushing the volume 213783990c4aSAlexandre Chartre * or file. 213883990c4aSAlexandre Chartre * 213983990c4aSAlexandre Chartre * This done for performance reasons, so that we can 214083990c4aSAlexandre Chartre * group together several write requests into a single 214183990c4aSAlexandre Chartre * flush operation. 21421aff8f07SAlexandre Chartre */ 21431aff8f07SAlexandre Chartre buf->b_flags |= B_WRITE | B_ASYNC; 214483990c4aSAlexandre Chartre 214583990c4aSAlexandre Chartre /* 214683990c4aSAlexandre Chartre * We keep track of the write so that we can group 214783990c4aSAlexandre Chartre * requests when flushing. The write queue has the 214883990c4aSAlexandre Chartre * same number of slots as the dring so this prevents 214983990c4aSAlexandre Chartre * the write queue from wrapping and overwriting 215083990c4aSAlexandre Chartre * existing entries: if the write queue gets full 215183990c4aSAlexandre Chartre * then that means that the dring is full so we stop 215283990c4aSAlexandre Chartre * receiving new requests until an existing request 215383990c4aSAlexandre Chartre * is processed, removed from the write queue and 215483990c4aSAlexandre Chartre * then from the dring. 215583990c4aSAlexandre Chartre */ 215683990c4aSAlexandre Chartre task->write_index = vd->write_index; 215783990c4aSAlexandre Chartre vd->write_queue[task->write_index] = buf; 215883990c4aSAlexandre Chartre vd->write_index = 215983990c4aSAlexandre Chartre VD_WRITE_INDEX_NEXT(vd, vd->write_index); 216083990c4aSAlexandre Chartre } 216183990c4aSAlexandre Chartre 216283990c4aSAlexandre Chartre buf->b_private = task; 216383990c4aSAlexandre Chartre 216483990c4aSAlexandre Chartre ASSERT(vd->ioq != NULL); 216583990c4aSAlexandre Chartre 216683990c4aSAlexandre Chartre request->status = 0; 216783990c4aSAlexandre Chartre (void) ddi_taskq_dispatch(task->vd->ioq, vd_bio_task, buf, 216883990c4aSAlexandre Chartre DDI_SLEEP); 216983990c4aSAlexandre Chartre 217083990c4aSAlexandre Chartre } else { 217183990c4aSAlexandre Chartre 217283990c4aSAlexandre Chartre if (request->operation == VD_OP_BREAD) { 217383990c4aSAlexandre Chartre buf->b_flags |= B_READ; 217483990c4aSAlexandre Chartre } else { 21751aff8f07SAlexandre Chartre buf->b_flags |= B_WRITE; 21761aff8f07SAlexandre Chartre } 2177047ba61eSachartre 2178*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* convert VIO block number to buf block number */ 2179*65908c77Syu, larry liu - Sun Microsystems - Beijing China buf->b_lblkno = offset << vd->vio_bshift; 2180*65908c77Syu, larry liu - Sun Microsystems - Beijing China 2181bae9e67eSachartre request->status = ldi_strategy(vd->ldi_handle[slice], buf); 218283990c4aSAlexandre Chartre } 2183205eeb1aSlm66018 2184205eeb1aSlm66018 /* 2185205eeb1aSlm66018 * This is to indicate to the caller that the request 2186205eeb1aSlm66018 * needs to be finished by vd_complete_bio() by calling 2187205eeb1aSlm66018 * biowait() there and waiting for that to return before 2188205eeb1aSlm66018 * triggering the notification of the vDisk client. 2189205eeb1aSlm66018 * 2190205eeb1aSlm66018 * This is necessary when writing to real disks as 2191205eeb1aSlm66018 * otherwise calls to ldi_strategy() would be serialized 2192205eeb1aSlm66018 * behind the calls to biowait() and performance would 2193205eeb1aSlm66018 * suffer. 2194205eeb1aSlm66018 */ 2195205eeb1aSlm66018 if (request->status == 0) 219687a7269eSachartre return (EINPROGRESS); 2197047ba61eSachartre 2198047ba61eSachartre biofini(buf); 21993c96341aSnarayan 2200bae9e67eSachartre io_done: 2201bae9e67eSachartre /* Clean up after error or completion */ 2202047ba61eSachartre rv = ldc_mem_release(task->mhdl, 0, buflen); 22034bac2208Snarayan if (rv) { 22043af08d82Slm66018 PR0("ldc_mem_release() returned err %d ", rv); 2205205eeb1aSlm66018 status = EIO; 22064bac2208Snarayan } 22074bac2208Snarayan rv = ldc_mem_unmap(task->mhdl); 22084bac2208Snarayan if (rv) { 2209205eeb1aSlm66018 PR0("ldc_mem_unmap() returned err %d ", rv); 2210205eeb1aSlm66018 status = EIO; 22114bac2208Snarayan } 22124bac2208Snarayan 2213d10e4ef2Snarayan return (status); 2214d10e4ef2Snarayan } 2215d10e4ef2Snarayan 2216205eeb1aSlm66018 /* 2217205eeb1aSlm66018 * This function should only be called from vd_notify to ensure that requests 2218205eeb1aSlm66018 * are responded to in the order that they are received. 2219205eeb1aSlm66018 */ 2220d10e4ef2Snarayan static int 2221d10e4ef2Snarayan send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 2222d10e4ef2Snarayan { 22233af08d82Slm66018 int status; 2224d10e4ef2Snarayan size_t nbytes; 2225d10e4ef2Snarayan 22263af08d82Slm66018 do { 2227d10e4ef2Snarayan nbytes = msglen; 2228d10e4ef2Snarayan status = ldc_write(ldc_handle, msg, &nbytes); 22293af08d82Slm66018 if (status != EWOULDBLOCK) 22303af08d82Slm66018 break; 22313af08d82Slm66018 drv_usecwait(vds_ldc_delay); 22323af08d82Slm66018 } while (status == EWOULDBLOCK); 2233d10e4ef2Snarayan 2234d10e4ef2Snarayan if (status != 0) { 22353af08d82Slm66018 if (status != ECONNRESET) 22363af08d82Slm66018 PR0("ldc_write() returned errno %d", status); 2237d10e4ef2Snarayan return (status); 2238d10e4ef2Snarayan } else if (nbytes != msglen) { 22393af08d82Slm66018 PR0("ldc_write() performed only partial write"); 2240d10e4ef2Snarayan return (EIO); 2241d10e4ef2Snarayan } 2242d10e4ef2Snarayan 2243d10e4ef2Snarayan PR1("SENT %lu bytes", msglen); 2244d10e4ef2Snarayan return (0); 2245d10e4ef2Snarayan } 2246d10e4ef2Snarayan 2247d10e4ef2Snarayan static void 2248d10e4ef2Snarayan vd_need_reset(vd_t *vd, boolean_t reset_ldc) 2249d10e4ef2Snarayan { 2250d10e4ef2Snarayan mutex_enter(&vd->lock); 2251d10e4ef2Snarayan vd->reset_state = B_TRUE; 2252d10e4ef2Snarayan vd->reset_ldc = reset_ldc; 2253d10e4ef2Snarayan mutex_exit(&vd->lock); 2254d10e4ef2Snarayan } 2255d10e4ef2Snarayan 2256d10e4ef2Snarayan /* 2257d10e4ef2Snarayan * Reset the state of the connection with a client, if needed; reset the LDC 2258d10e4ef2Snarayan * transport as well, if needed. This function should only be called from the 22593af08d82Slm66018 * "vd_recv_msg", as it waits for tasks - otherwise a deadlock can occur. 2260d10e4ef2Snarayan */ 2261d10e4ef2Snarayan static void 2262d10e4ef2Snarayan vd_reset_if_needed(vd_t *vd) 2263d10e4ef2Snarayan { 2264d10e4ef2Snarayan int status = 0; 2265d10e4ef2Snarayan 2266d10e4ef2Snarayan mutex_enter(&vd->lock); 2267d10e4ef2Snarayan if (!vd->reset_state) { 2268d10e4ef2Snarayan ASSERT(!vd->reset_ldc); 2269d10e4ef2Snarayan mutex_exit(&vd->lock); 2270d10e4ef2Snarayan return; 2271d10e4ef2Snarayan } 2272d10e4ef2Snarayan mutex_exit(&vd->lock); 2273d10e4ef2Snarayan 2274d10e4ef2Snarayan PR0("Resetting connection state with %s", VD_CLIENT(vd)); 2275d10e4ef2Snarayan 2276d10e4ef2Snarayan /* 2277d10e4ef2Snarayan * Let any asynchronous I/O complete before possibly pulling the rug 2278d10e4ef2Snarayan * out from under it; defer checking vd->reset_ldc, as one of the 2279d10e4ef2Snarayan * asynchronous tasks might set it 2280d10e4ef2Snarayan */ 228183990c4aSAlexandre Chartre if (vd->ioq != NULL) 228283990c4aSAlexandre Chartre ddi_taskq_wait(vd->ioq); 2283d10e4ef2Snarayan ddi_taskq_wait(vd->completionq); 2284d10e4ef2Snarayan 228583990c4aSAlexandre Chartre status = vd_flush_write(vd); 22863c96341aSnarayan if (status) { 228783990c4aSAlexandre Chartre PR0("flushwrite returned error %d", status); 22883c96341aSnarayan } 22893c96341aSnarayan 2290d10e4ef2Snarayan if ((vd->initialized & VD_DRING) && 2291d10e4ef2Snarayan ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 22923af08d82Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 2293d10e4ef2Snarayan 22943af08d82Slm66018 vd_free_dring_task(vd); 22953af08d82Slm66018 22963af08d82Slm66018 /* Free the staging buffer for msgs */ 22973af08d82Slm66018 if (vd->vio_msgp != NULL) { 22983af08d82Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 22993af08d82Slm66018 vd->vio_msgp = NULL; 2300d10e4ef2Snarayan } 2301d10e4ef2Snarayan 23023af08d82Slm66018 /* Free the inband message buffer */ 23033af08d82Slm66018 if (vd->inband_task.msg != NULL) { 23043af08d82Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 23053af08d82Slm66018 vd->inband_task.msg = NULL; 23063af08d82Slm66018 } 2307d10e4ef2Snarayan 2308d10e4ef2Snarayan mutex_enter(&vd->lock); 23093af08d82Slm66018 23103af08d82Slm66018 if (vd->reset_ldc) 23113af08d82Slm66018 PR0("taking down LDC channel"); 2312e1ebb9ecSlm66018 if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0)) 23133af08d82Slm66018 PR0("ldc_down() returned errno %d", status); 2314d10e4ef2Snarayan 23152f5224aeSachartre /* Reset exclusive access rights */ 23162f5224aeSachartre vd_reset_access(vd); 23172f5224aeSachartre 2318d10e4ef2Snarayan vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 2319d10e4ef2Snarayan vd->state = VD_STATE_INIT; 2320d10e4ef2Snarayan vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 2321d10e4ef2Snarayan 23223af08d82Slm66018 /* Allocate the staging buffer */ 23233af08d82Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 23243af08d82Slm66018 23253af08d82Slm66018 PR0("calling ldc_up\n"); 23263af08d82Slm66018 (void) ldc_up(vd->ldc_handle); 23273af08d82Slm66018 2328d10e4ef2Snarayan vd->reset_state = B_FALSE; 2329d10e4ef2Snarayan vd->reset_ldc = B_FALSE; 23303af08d82Slm66018 2331d10e4ef2Snarayan mutex_exit(&vd->lock); 2332d10e4ef2Snarayan } 2333d10e4ef2Snarayan 23343af08d82Slm66018 static void vd_recv_msg(void *arg); 23353af08d82Slm66018 23363af08d82Slm66018 static void 23373af08d82Slm66018 vd_mark_in_reset(vd_t *vd) 23383af08d82Slm66018 { 23393af08d82Slm66018 int status; 23403af08d82Slm66018 23413af08d82Slm66018 PR0("vd_mark_in_reset: marking vd in reset\n"); 23423af08d82Slm66018 23433af08d82Slm66018 vd_need_reset(vd, B_FALSE); 23443af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, DDI_SLEEP); 23453af08d82Slm66018 if (status == DDI_FAILURE) { 23463af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 23473af08d82Slm66018 vd_need_reset(vd, B_TRUE); 23483af08d82Slm66018 return; 23493af08d82Slm66018 } 23503af08d82Slm66018 } 23513af08d82Slm66018 2352d10e4ef2Snarayan static int 23533c96341aSnarayan vd_mark_elem_done(vd_t *vd, int idx, int elem_status, int elem_nbytes) 2354d10e4ef2Snarayan { 2355d10e4ef2Snarayan boolean_t accepted; 2356d10e4ef2Snarayan int status; 2357bbfa0259Sha137994 on_trap_data_t otd; 2358d10e4ef2Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 2359d10e4ef2Snarayan 23603af08d82Slm66018 if (vd->reset_state) 23613af08d82Slm66018 return (0); 2362d10e4ef2Snarayan 2363d10e4ef2Snarayan /* Acquire the element */ 2364bbfa0259Sha137994 if ((status = VIO_DRING_ACQUIRE(&otd, vd->dring_mtype, 2365bbfa0259Sha137994 vd->dring_handle, idx, idx)) != 0) { 23663af08d82Slm66018 if (status == ECONNRESET) { 23673af08d82Slm66018 vd_mark_in_reset(vd); 23683af08d82Slm66018 return (0); 23693af08d82Slm66018 } else { 2370d10e4ef2Snarayan return (status); 2371d10e4ef2Snarayan } 23723af08d82Slm66018 } 2373d10e4ef2Snarayan 2374d10e4ef2Snarayan /* Set the element's status and mark it done */ 2375d10e4ef2Snarayan accepted = (elem->hdr.dstate == VIO_DESC_ACCEPTED); 2376d10e4ef2Snarayan if (accepted) { 23773c96341aSnarayan elem->payload.nbytes = elem_nbytes; 2378d10e4ef2Snarayan elem->payload.status = elem_status; 2379d10e4ef2Snarayan elem->hdr.dstate = VIO_DESC_DONE; 2380d10e4ef2Snarayan } else { 2381d10e4ef2Snarayan /* Perhaps client timed out waiting for I/O... */ 23823af08d82Slm66018 PR0("element %u no longer \"accepted\"", idx); 2383d10e4ef2Snarayan VD_DUMP_DRING_ELEM(elem); 2384d10e4ef2Snarayan } 2385d10e4ef2Snarayan /* Release the element */ 2386bbfa0259Sha137994 if ((status = VIO_DRING_RELEASE(vd->dring_mtype, 2387bbfa0259Sha137994 vd->dring_handle, idx, idx)) != 0) { 23883af08d82Slm66018 if (status == ECONNRESET) { 23893af08d82Slm66018 vd_mark_in_reset(vd); 23903af08d82Slm66018 return (0); 23913af08d82Slm66018 } else { 2392bbfa0259Sha137994 PR0("VIO_DRING_RELEASE() returned errno %d", 23933af08d82Slm66018 status); 2394d10e4ef2Snarayan return (status); 2395d10e4ef2Snarayan } 23963af08d82Slm66018 } 2397d10e4ef2Snarayan 2398d10e4ef2Snarayan return (accepted ? 0 : EINVAL); 2399d10e4ef2Snarayan } 2400d10e4ef2Snarayan 2401205eeb1aSlm66018 /* 2402205eeb1aSlm66018 * Return Values 2403205eeb1aSlm66018 * 0 - operation completed successfully 2404205eeb1aSlm66018 * EIO - encountered LDC / task error 2405205eeb1aSlm66018 * 2406205eeb1aSlm66018 * Side Effect 2407205eeb1aSlm66018 * sets request->status = <disk operation status> 2408205eeb1aSlm66018 */ 2409205eeb1aSlm66018 static int 2410205eeb1aSlm66018 vd_complete_bio(vd_task_t *task) 2411d10e4ef2Snarayan { 2412d10e4ef2Snarayan int status = 0; 2413205eeb1aSlm66018 int rv = 0; 2414d10e4ef2Snarayan vd_t *vd = task->vd; 2415d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 2416d10e4ef2Snarayan struct buf *buf = &task->buf; 241783990c4aSAlexandre Chartre int wid, nwrites; 2418d10e4ef2Snarayan 2419d10e4ef2Snarayan 2420d10e4ef2Snarayan ASSERT(vd != NULL); 2421d10e4ef2Snarayan ASSERT(request != NULL); 2422d10e4ef2Snarayan ASSERT(task->msg != NULL); 2423d10e4ef2Snarayan ASSERT(task->msglen >= sizeof (*task->msg)); 2424d10e4ef2Snarayan 242583990c4aSAlexandre Chartre if (buf->b_flags & B_DONE) { 24261aff8f07SAlexandre Chartre /* 242783990c4aSAlexandre Chartre * If the I/O is already done then we don't call biowait() 242883990c4aSAlexandre Chartre * because biowait() might already have been called when 242983990c4aSAlexandre Chartre * flushing a previous asynchronous write. So we just 243083990c4aSAlexandre Chartre * retrieve the status of the request. 24311aff8f07SAlexandre Chartre */ 243283990c4aSAlexandre Chartre request->status = geterror(buf); 24331aff8f07SAlexandre Chartre } else { 243483990c4aSAlexandre Chartre /* 243583990c4aSAlexandre Chartre * Wait for the I/O. For synchronous I/O, biowait() will return 243683990c4aSAlexandre Chartre * when the I/O has completed. For asynchronous write, it will 243783990c4aSAlexandre Chartre * return the write has been submitted to the backend, but it 243883990c4aSAlexandre Chartre * may not have been committed. 243983990c4aSAlexandre Chartre */ 2440d10e4ef2Snarayan request->status = biowait(buf); 24411aff8f07SAlexandre Chartre } 2442d10e4ef2Snarayan 244383990c4aSAlexandre Chartre if (buf->b_flags & B_ASYNC) { 244483990c4aSAlexandre Chartre /* 244583990c4aSAlexandre Chartre * Asynchronous writes are used when writing to a file or a 244683990c4aSAlexandre Chartre * ZFS volume. In that case the bio notification indicates 244783990c4aSAlexandre Chartre * that the write has started. We have to flush the backend 244883990c4aSAlexandre Chartre * to ensure that the write has been committed before marking 244983990c4aSAlexandre Chartre * the request as completed. 245083990c4aSAlexandre Chartre */ 245183990c4aSAlexandre Chartre ASSERT(task->request->operation == VD_OP_BWRITE); 245283990c4aSAlexandre Chartre 245383990c4aSAlexandre Chartre wid = task->write_index; 245483990c4aSAlexandre Chartre 245583990c4aSAlexandre Chartre /* check if write has been already flushed */ 245683990c4aSAlexandre Chartre if (vd->write_queue[wid] != NULL) { 245783990c4aSAlexandre Chartre 245883990c4aSAlexandre Chartre vd->write_queue[wid] = NULL; 245983990c4aSAlexandre Chartre wid = VD_WRITE_INDEX_NEXT(vd, wid); 246083990c4aSAlexandre Chartre 246183990c4aSAlexandre Chartre /* 246283990c4aSAlexandre Chartre * Because flushing is time consuming, it is worth 246383990c4aSAlexandre Chartre * waiting for any other writes so that they can be 246483990c4aSAlexandre Chartre * included in this single flush request. 246583990c4aSAlexandre Chartre */ 246683990c4aSAlexandre Chartre if (vd_awflush & VD_AWFLUSH_GROUP) { 246783990c4aSAlexandre Chartre nwrites = 1; 246883990c4aSAlexandre Chartre while (vd->write_queue[wid] != NULL) { 246983990c4aSAlexandre Chartre (void) biowait(vd->write_queue[wid]); 247083990c4aSAlexandre Chartre vd->write_queue[wid] = NULL; 247183990c4aSAlexandre Chartre wid = VD_WRITE_INDEX_NEXT(vd, wid); 247283990c4aSAlexandre Chartre nwrites++; 247383990c4aSAlexandre Chartre } 247483990c4aSAlexandre Chartre DTRACE_PROBE2(flushgrp, vd_task_t *, task, 247583990c4aSAlexandre Chartre int, nwrites); 247683990c4aSAlexandre Chartre } 247783990c4aSAlexandre Chartre 247883990c4aSAlexandre Chartre if (vd_awflush & VD_AWFLUSH_IMMEDIATE) { 247983990c4aSAlexandre Chartre request->status = vd_flush_write(vd); 248083990c4aSAlexandre Chartre } else if (vd_awflush & VD_AWFLUSH_DEFER) { 248183990c4aSAlexandre Chartre (void) taskq_dispatch(system_taskq, 248283990c4aSAlexandre Chartre (void (*)(void *))vd_flush_write, vd, 248383990c4aSAlexandre Chartre DDI_SLEEP); 248483990c4aSAlexandre Chartre request->status = 0; 248583990c4aSAlexandre Chartre } 248683990c4aSAlexandre Chartre } 248783990c4aSAlexandre Chartre } 248883990c4aSAlexandre Chartre 2489bae9e67eSachartre /* Update the number of bytes read/written */ 2490bae9e67eSachartre request->nbytes += buf->b_bcount - buf->b_resid; 24913c96341aSnarayan 24924bac2208Snarayan /* Release the buffer */ 24933af08d82Slm66018 if (!vd->reset_state) 2494bae9e67eSachartre status = ldc_mem_release(task->mhdl, 0, buf->b_bufsize); 24954bac2208Snarayan if (status) { 24963af08d82Slm66018 PR0("ldc_mem_release() returned errno %d copying to " 24973af08d82Slm66018 "client", status); 24983af08d82Slm66018 if (status == ECONNRESET) { 24993af08d82Slm66018 vd_mark_in_reset(vd); 25003af08d82Slm66018 } 2501205eeb1aSlm66018 rv = EIO; 25021ae08745Sheppo } 25031ae08745Sheppo 25043af08d82Slm66018 /* Unmap the memory, even if in reset */ 25054bac2208Snarayan status = ldc_mem_unmap(task->mhdl); 25064bac2208Snarayan if (status) { 25073af08d82Slm66018 PR0("ldc_mem_unmap() returned errno %d copying to client", 25084bac2208Snarayan status); 25093af08d82Slm66018 if (status == ECONNRESET) { 25103af08d82Slm66018 vd_mark_in_reset(vd); 25113af08d82Slm66018 } 2512205eeb1aSlm66018 rv = EIO; 25134bac2208Snarayan } 25144bac2208Snarayan 2515d10e4ef2Snarayan biofini(buf); 25161ae08745Sheppo 2517205eeb1aSlm66018 return (rv); 2518205eeb1aSlm66018 } 2519205eeb1aSlm66018 2520205eeb1aSlm66018 /* 2521205eeb1aSlm66018 * Description: 2522205eeb1aSlm66018 * This function is called by the two functions called by a taskq 2523205eeb1aSlm66018 * [ vd_complete_notify() and vd_serial_notify()) ] to send the 2524205eeb1aSlm66018 * message to the client. 2525205eeb1aSlm66018 * 2526205eeb1aSlm66018 * Parameters: 2527205eeb1aSlm66018 * arg - opaque pointer to structure containing task to be completed 2528205eeb1aSlm66018 * 2529205eeb1aSlm66018 * Return Values 2530205eeb1aSlm66018 * None 2531205eeb1aSlm66018 */ 2532205eeb1aSlm66018 static void 2533205eeb1aSlm66018 vd_notify(vd_task_t *task) 2534205eeb1aSlm66018 { 2535205eeb1aSlm66018 int status; 2536205eeb1aSlm66018 2537205eeb1aSlm66018 ASSERT(task != NULL); 2538205eeb1aSlm66018 ASSERT(task->vd != NULL); 2539205eeb1aSlm66018 2540205eeb1aSlm66018 /* 2541205eeb1aSlm66018 * Send the "ack" or "nack" back to the client; if sending the message 2542205eeb1aSlm66018 * via LDC fails, arrange to reset both the connection state and LDC 2543205eeb1aSlm66018 * itself 2544205eeb1aSlm66018 */ 2545205eeb1aSlm66018 PR2("Sending %s", 2546205eeb1aSlm66018 (task->msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 2547205eeb1aSlm66018 2548205eeb1aSlm66018 status = send_msg(task->vd->ldc_handle, task->msg, task->msglen); 2549205eeb1aSlm66018 switch (status) { 2550205eeb1aSlm66018 case 0: 2551205eeb1aSlm66018 break; 2552205eeb1aSlm66018 case ECONNRESET: 2553205eeb1aSlm66018 vd_mark_in_reset(task->vd); 2554205eeb1aSlm66018 break; 2555205eeb1aSlm66018 default: 2556205eeb1aSlm66018 PR0("initiating full reset"); 2557205eeb1aSlm66018 vd_need_reset(task->vd, B_TRUE); 2558205eeb1aSlm66018 break; 2559205eeb1aSlm66018 } 2560205eeb1aSlm66018 2561205eeb1aSlm66018 DTRACE_PROBE1(task__end, vd_task_t *, task); 2562205eeb1aSlm66018 } 2563205eeb1aSlm66018 2564205eeb1aSlm66018 /* 2565205eeb1aSlm66018 * Description: 2566205eeb1aSlm66018 * Mark the Dring entry as Done and (if necessary) send an ACK/NACK to 2567205eeb1aSlm66018 * the vDisk client 2568205eeb1aSlm66018 * 2569205eeb1aSlm66018 * Parameters: 2570205eeb1aSlm66018 * task - structure containing the request sent from client 2571205eeb1aSlm66018 * 2572205eeb1aSlm66018 * Return Values 2573205eeb1aSlm66018 * None 2574205eeb1aSlm66018 */ 2575205eeb1aSlm66018 static void 2576205eeb1aSlm66018 vd_complete_notify(vd_task_t *task) 2577205eeb1aSlm66018 { 2578205eeb1aSlm66018 int status = 0; 2579205eeb1aSlm66018 vd_t *vd = task->vd; 2580205eeb1aSlm66018 vd_dring_payload_t *request = task->request; 2581205eeb1aSlm66018 2582d10e4ef2Snarayan /* Update the dring element for a dring client */ 2583f0ca1d9aSsb155480 if (!vd->reset_state && (vd->xfer_mode == VIO_DRING_MODE_V1_0)) { 25843c96341aSnarayan status = vd_mark_elem_done(vd, task->index, 25853c96341aSnarayan request->status, request->nbytes); 25863af08d82Slm66018 if (status == ECONNRESET) 25873af08d82Slm66018 vd_mark_in_reset(vd); 2588bbfa0259Sha137994 else if (status == EACCES) 2589bbfa0259Sha137994 vd_need_reset(vd, B_TRUE); 25903af08d82Slm66018 } 25911ae08745Sheppo 2592d10e4ef2Snarayan /* 2593205eeb1aSlm66018 * If a transport error occurred while marking the element done or 2594205eeb1aSlm66018 * previously while executing the task, arrange to "nack" the message 2595205eeb1aSlm66018 * when the final task in the descriptor element range completes 2596d10e4ef2Snarayan */ 2597205eeb1aSlm66018 if ((status != 0) || (task->status != 0)) 2598d10e4ef2Snarayan task->msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 25991ae08745Sheppo 2600d10e4ef2Snarayan /* 2601d10e4ef2Snarayan * Only the final task for a range of elements will respond to and 2602d10e4ef2Snarayan * free the message 2603d10e4ef2Snarayan */ 26043af08d82Slm66018 if (task->type == VD_NONFINAL_RANGE_TASK) { 2605d10e4ef2Snarayan return; 26063af08d82Slm66018 } 26071ae08745Sheppo 260827ac699dSzk194757 /* 260927ac699dSzk194757 * We should only send an ACK/NACK here if we are not currently in 261027ac699dSzk194757 * reset as, depending on how we reset, the dring may have been 261127ac699dSzk194757 * blown away and we don't want to ACK/NACK a message that isn't 261227ac699dSzk194757 * there. 261327ac699dSzk194757 */ 261427ac699dSzk194757 if (!vd->reset_state) 2615205eeb1aSlm66018 vd_notify(task); 2616205eeb1aSlm66018 } 2617205eeb1aSlm66018 2618d10e4ef2Snarayan /* 2619205eeb1aSlm66018 * Description: 2620205eeb1aSlm66018 * This is the basic completion function called to handle inband data 2621205eeb1aSlm66018 * requests and handshake messages. All it needs to do is trigger a 2622205eeb1aSlm66018 * message to the client that the request is completed. 2623205eeb1aSlm66018 * 2624205eeb1aSlm66018 * Parameters: 2625205eeb1aSlm66018 * arg - opaque pointer to structure containing task to be completed 2626205eeb1aSlm66018 * 2627205eeb1aSlm66018 * Return Values 2628205eeb1aSlm66018 * None 2629d10e4ef2Snarayan */ 2630205eeb1aSlm66018 static void 2631205eeb1aSlm66018 vd_serial_notify(void *arg) 2632205eeb1aSlm66018 { 2633205eeb1aSlm66018 vd_task_t *task = (vd_task_t *)arg; 2634205eeb1aSlm66018 2635205eeb1aSlm66018 ASSERT(task != NULL); 2636205eeb1aSlm66018 vd_notify(task); 26371ae08745Sheppo } 26381ae08745Sheppo 26392f5224aeSachartre /* ARGSUSED */ 26402f5224aeSachartre static int 26412f5224aeSachartre vd_geom2dk_geom(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 26420a55fbb7Slm66018 { 26430a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); 26442f5224aeSachartre return (0); 26450a55fbb7Slm66018 } 26460a55fbb7Slm66018 26472f5224aeSachartre /* ARGSUSED */ 26482f5224aeSachartre static int 26492f5224aeSachartre vd_vtoc2vtoc(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 26500a55fbb7Slm66018 { 2651342440ecSPrasad Singamsetty VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct extvtoc *)ioctl_arg); 26522f5224aeSachartre return (0); 26530a55fbb7Slm66018 } 26540a55fbb7Slm66018 26550a55fbb7Slm66018 static void 26560a55fbb7Slm66018 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) 26570a55fbb7Slm66018 { 26580a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); 26590a55fbb7Slm66018 } 26600a55fbb7Slm66018 26610a55fbb7Slm66018 static void 26620a55fbb7Slm66018 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) 26630a55fbb7Slm66018 { 2664342440ecSPrasad Singamsetty VTOC2VD_VTOC((struct extvtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); 26650a55fbb7Slm66018 } 26660a55fbb7Slm66018 26672f5224aeSachartre static int 26682f5224aeSachartre vd_get_efi_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 26694bac2208Snarayan { 26704bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 26714bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 26722f5224aeSachartre size_t data_len; 26732f5224aeSachartre 26742f5224aeSachartre data_len = vd_buf_len - (sizeof (vd_efi_t) - sizeof (uint64_t)); 26752f5224aeSachartre if (vd_efi->length > data_len) 26762f5224aeSachartre return (EINVAL); 26774bac2208Snarayan 26784bac2208Snarayan dk_efi->dki_lba = vd_efi->lba; 26794bac2208Snarayan dk_efi->dki_length = vd_efi->length; 26804bac2208Snarayan dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP); 26812f5224aeSachartre return (0); 26824bac2208Snarayan } 26834bac2208Snarayan 26844bac2208Snarayan static void 26854bac2208Snarayan vd_get_efi_out(void *ioctl_arg, void *vd_buf) 26864bac2208Snarayan { 26874bac2208Snarayan int len; 26884bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 26894bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 26904bac2208Snarayan 26914bac2208Snarayan len = vd_efi->length; 26924bac2208Snarayan DK_EFI2VD_EFI(dk_efi, vd_efi); 26934bac2208Snarayan kmem_free(dk_efi->dki_data, len); 26944bac2208Snarayan } 26954bac2208Snarayan 26962f5224aeSachartre static int 26972f5224aeSachartre vd_set_efi_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 26984bac2208Snarayan { 26994bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 27004bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 27012f5224aeSachartre size_t data_len; 27022f5224aeSachartre 27032f5224aeSachartre data_len = vd_buf_len - (sizeof (vd_efi_t) - sizeof (uint64_t)); 27042f5224aeSachartre if (vd_efi->length > data_len) 27052f5224aeSachartre return (EINVAL); 27064bac2208Snarayan 27074bac2208Snarayan dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP); 27084bac2208Snarayan VD_EFI2DK_EFI(vd_efi, dk_efi); 27092f5224aeSachartre return (0); 27104bac2208Snarayan } 27114bac2208Snarayan 27124bac2208Snarayan static void 27134bac2208Snarayan vd_set_efi_out(void *ioctl_arg, void *vd_buf) 27144bac2208Snarayan { 27154bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 27164bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 27174bac2208Snarayan 27184bac2208Snarayan kmem_free(dk_efi->dki_data, vd_efi->length); 27194bac2208Snarayan } 27204bac2208Snarayan 27212f5224aeSachartre static int 27222f5224aeSachartre vd_scsicmd_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 27232f5224aeSachartre { 27242f5224aeSachartre size_t vd_scsi_len; 27252f5224aeSachartre vd_scsi_t *vd_scsi = (vd_scsi_t *)vd_buf; 27262f5224aeSachartre struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl_arg; 27272f5224aeSachartre 27282f5224aeSachartre /* check buffer size */ 27292f5224aeSachartre vd_scsi_len = VD_SCSI_SIZE; 27302f5224aeSachartre vd_scsi_len += P2ROUNDUP(vd_scsi->cdb_len, sizeof (uint64_t)); 27312f5224aeSachartre vd_scsi_len += P2ROUNDUP(vd_scsi->sense_len, sizeof (uint64_t)); 27322f5224aeSachartre vd_scsi_len += P2ROUNDUP(vd_scsi->datain_len, sizeof (uint64_t)); 27332f5224aeSachartre vd_scsi_len += P2ROUNDUP(vd_scsi->dataout_len, sizeof (uint64_t)); 27342f5224aeSachartre 27352f5224aeSachartre ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); 27362f5224aeSachartre 27372f5224aeSachartre if (vd_buf_len < vd_scsi_len) 27382f5224aeSachartre return (EINVAL); 27392f5224aeSachartre 27402f5224aeSachartre /* set flags */ 27412f5224aeSachartre uscsi->uscsi_flags = vd_scsi_debug; 27422f5224aeSachartre 27432f5224aeSachartre if (vd_scsi->options & VD_SCSI_OPT_NORETRY) { 27442f5224aeSachartre uscsi->uscsi_flags |= USCSI_ISOLATE; 27452f5224aeSachartre uscsi->uscsi_flags |= USCSI_DIAGNOSE; 27462f5224aeSachartre } 27472f5224aeSachartre 27482f5224aeSachartre /* task attribute */ 27492f5224aeSachartre switch (vd_scsi->task_attribute) { 27502f5224aeSachartre case VD_SCSI_TASK_ACA: 27512f5224aeSachartre uscsi->uscsi_flags |= USCSI_HEAD; 27522f5224aeSachartre break; 27532f5224aeSachartre case VD_SCSI_TASK_HQUEUE: 27542f5224aeSachartre uscsi->uscsi_flags |= USCSI_HTAG; 27552f5224aeSachartre break; 27562f5224aeSachartre case VD_SCSI_TASK_ORDERED: 27572f5224aeSachartre uscsi->uscsi_flags |= USCSI_OTAG; 27582f5224aeSachartre break; 27592f5224aeSachartre default: 27602f5224aeSachartre uscsi->uscsi_flags |= USCSI_NOTAG; 27612f5224aeSachartre break; 27622f5224aeSachartre } 27632f5224aeSachartre 27642f5224aeSachartre /* timeout */ 27652f5224aeSachartre uscsi->uscsi_timeout = vd_scsi->timeout; 27662f5224aeSachartre 27672f5224aeSachartre /* cdb data */ 27682f5224aeSachartre uscsi->uscsi_cdb = (caddr_t)VD_SCSI_DATA_CDB(vd_scsi); 27692f5224aeSachartre uscsi->uscsi_cdblen = vd_scsi->cdb_len; 27702f5224aeSachartre 27712f5224aeSachartre /* sense buffer */ 27722f5224aeSachartre if (vd_scsi->sense_len != 0) { 27732f5224aeSachartre uscsi->uscsi_flags |= USCSI_RQENABLE; 27742f5224aeSachartre uscsi->uscsi_rqbuf = (caddr_t)VD_SCSI_DATA_SENSE(vd_scsi); 27752f5224aeSachartre uscsi->uscsi_rqlen = vd_scsi->sense_len; 27762f5224aeSachartre } 27772f5224aeSachartre 27782f5224aeSachartre if (vd_scsi->datain_len != 0 && vd_scsi->dataout_len != 0) { 27792f5224aeSachartre /* uscsi does not support read/write request */ 27802f5224aeSachartre return (EINVAL); 27812f5224aeSachartre } 27822f5224aeSachartre 27832f5224aeSachartre /* request data-in */ 27842f5224aeSachartre if (vd_scsi->datain_len != 0) { 27852f5224aeSachartre uscsi->uscsi_flags |= USCSI_READ; 27862f5224aeSachartre uscsi->uscsi_buflen = vd_scsi->datain_len; 27872f5224aeSachartre uscsi->uscsi_bufaddr = (char *)VD_SCSI_DATA_IN(vd_scsi); 27882f5224aeSachartre } 27892f5224aeSachartre 27902f5224aeSachartre /* request data-out */ 27912f5224aeSachartre if (vd_scsi->dataout_len != 0) { 27922f5224aeSachartre uscsi->uscsi_buflen = vd_scsi->dataout_len; 27932f5224aeSachartre uscsi->uscsi_bufaddr = (char *)VD_SCSI_DATA_OUT(vd_scsi); 27942f5224aeSachartre } 27952f5224aeSachartre 27962f5224aeSachartre return (0); 27972f5224aeSachartre } 27982f5224aeSachartre 27992f5224aeSachartre static void 28002f5224aeSachartre vd_scsicmd_out(void *ioctl_arg, void *vd_buf) 28012f5224aeSachartre { 28022f5224aeSachartre vd_scsi_t *vd_scsi = (vd_scsi_t *)vd_buf; 28032f5224aeSachartre struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl_arg; 28042f5224aeSachartre 28052f5224aeSachartre /* output fields */ 28062f5224aeSachartre vd_scsi->cmd_status = uscsi->uscsi_status; 28072f5224aeSachartre 28082f5224aeSachartre /* sense data */ 28092f5224aeSachartre if ((uscsi->uscsi_flags & USCSI_RQENABLE) && 28102f5224aeSachartre (uscsi->uscsi_status == STATUS_CHECK || 28112f5224aeSachartre uscsi->uscsi_status == STATUS_TERMINATED)) { 28122f5224aeSachartre vd_scsi->sense_status = uscsi->uscsi_rqstatus; 28132f5224aeSachartre if (uscsi->uscsi_rqstatus == STATUS_GOOD) 281414466a20Szk194757 vd_scsi->sense_len -= uscsi->uscsi_rqresid; 28152f5224aeSachartre else 28162f5224aeSachartre vd_scsi->sense_len = 0; 28172f5224aeSachartre } else { 28182f5224aeSachartre vd_scsi->sense_len = 0; 28192f5224aeSachartre } 28202f5224aeSachartre 28212f5224aeSachartre if (uscsi->uscsi_status != STATUS_GOOD) { 28222f5224aeSachartre vd_scsi->dataout_len = 0; 28232f5224aeSachartre vd_scsi->datain_len = 0; 28242f5224aeSachartre return; 28252f5224aeSachartre } 28262f5224aeSachartre 28272f5224aeSachartre if (uscsi->uscsi_flags & USCSI_READ) { 28282f5224aeSachartre /* request data (read) */ 28292f5224aeSachartre vd_scsi->datain_len -= uscsi->uscsi_resid; 28302f5224aeSachartre vd_scsi->dataout_len = 0; 28312f5224aeSachartre } else { 28322f5224aeSachartre /* request data (write) */ 28332f5224aeSachartre vd_scsi->datain_len = 0; 28342f5224aeSachartre vd_scsi->dataout_len -= uscsi->uscsi_resid; 28352f5224aeSachartre } 28362f5224aeSachartre } 28372f5224aeSachartre 2838690555a1Sachartre static ushort_t 28393c96341aSnarayan vd_lbl2cksum(struct dk_label *label) 28403c96341aSnarayan { 28413c96341aSnarayan int count; 2842690555a1Sachartre ushort_t sum, *sp; 28433c96341aSnarayan 28443c96341aSnarayan count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 2845690555a1Sachartre sp = (ushort_t *)label; 28463c96341aSnarayan sum = 0; 28473c96341aSnarayan while (count--) { 28483c96341aSnarayan sum ^= *sp++; 28493c96341aSnarayan } 28503c96341aSnarayan 28513c96341aSnarayan return (sum); 28523c96341aSnarayan } 28533c96341aSnarayan 285487a7269eSachartre /* 2855bae9e67eSachartre * Copy information from a vtoc and dk_geom structures to a dk_label structure. 2856bae9e67eSachartre */ 2857bae9e67eSachartre static void 2858342440ecSPrasad Singamsetty vd_vtocgeom_to_label(struct extvtoc *vtoc, struct dk_geom *geom, 2859bae9e67eSachartre struct dk_label *label) 2860bae9e67eSachartre { 2861bae9e67eSachartre int i; 2862bae9e67eSachartre 2863bae9e67eSachartre ASSERT(vtoc->v_nparts == V_NUMPAR); 2864bae9e67eSachartre ASSERT(vtoc->v_sanity == VTOC_SANE); 2865bae9e67eSachartre 2866bae9e67eSachartre bzero(label, sizeof (struct dk_label)); 2867bae9e67eSachartre 2868bae9e67eSachartre label->dkl_ncyl = geom->dkg_ncyl; 2869bae9e67eSachartre label->dkl_acyl = geom->dkg_acyl; 2870bae9e67eSachartre label->dkl_pcyl = geom->dkg_pcyl; 2871bae9e67eSachartre label->dkl_nhead = geom->dkg_nhead; 2872bae9e67eSachartre label->dkl_nsect = geom->dkg_nsect; 2873bae9e67eSachartre label->dkl_intrlv = geom->dkg_intrlv; 2874bae9e67eSachartre label->dkl_apc = geom->dkg_apc; 2875bae9e67eSachartre label->dkl_rpm = geom->dkg_rpm; 2876bae9e67eSachartre label->dkl_write_reinstruct = geom->dkg_write_reinstruct; 2877bae9e67eSachartre label->dkl_read_reinstruct = geom->dkg_read_reinstruct; 2878bae9e67eSachartre 2879bae9e67eSachartre label->dkl_vtoc.v_nparts = V_NUMPAR; 2880bae9e67eSachartre label->dkl_vtoc.v_sanity = VTOC_SANE; 2881bae9e67eSachartre label->dkl_vtoc.v_version = vtoc->v_version; 2882bae9e67eSachartre for (i = 0; i < V_NUMPAR; i++) { 2883bae9e67eSachartre label->dkl_vtoc.v_timestamp[i] = vtoc->timestamp[i]; 2884bae9e67eSachartre label->dkl_vtoc.v_part[i].p_tag = vtoc->v_part[i].p_tag; 2885bae9e67eSachartre label->dkl_vtoc.v_part[i].p_flag = vtoc->v_part[i].p_flag; 2886bae9e67eSachartre label->dkl_map[i].dkl_cylno = vtoc->v_part[i].p_start / 2887bae9e67eSachartre (label->dkl_nhead * label->dkl_nsect); 2888bae9e67eSachartre label->dkl_map[i].dkl_nblk = vtoc->v_part[i].p_size; 2889bae9e67eSachartre } 2890bae9e67eSachartre 2891bae9e67eSachartre /* 2892bae9e67eSachartre * The bootinfo array can not be copied with bcopy() because 2893bae9e67eSachartre * elements are of type long in vtoc (so 64-bit) and of type 2894bae9e67eSachartre * int in dk_vtoc (so 32-bit). 2895bae9e67eSachartre */ 2896bae9e67eSachartre label->dkl_vtoc.v_bootinfo[0] = vtoc->v_bootinfo[0]; 2897bae9e67eSachartre label->dkl_vtoc.v_bootinfo[1] = vtoc->v_bootinfo[1]; 2898bae9e67eSachartre label->dkl_vtoc.v_bootinfo[2] = vtoc->v_bootinfo[2]; 2899bae9e67eSachartre bcopy(vtoc->v_asciilabel, label->dkl_asciilabel, LEN_DKL_ASCII); 2900bae9e67eSachartre bcopy(vtoc->v_volume, label->dkl_vtoc.v_volume, LEN_DKL_VVOL); 2901bae9e67eSachartre 2902bae9e67eSachartre /* re-compute checksum */ 2903bae9e67eSachartre label->dkl_magic = DKL_MAGIC; 2904bae9e67eSachartre label->dkl_cksum = vd_lbl2cksum(label); 2905bae9e67eSachartre } 2906bae9e67eSachartre 2907bae9e67eSachartre /* 2908bae9e67eSachartre * Copy information from a dk_label structure to a vtoc and dk_geom structures. 2909bae9e67eSachartre */ 2910bae9e67eSachartre static void 2911342440ecSPrasad Singamsetty vd_label_to_vtocgeom(struct dk_label *label, struct extvtoc *vtoc, 2912bae9e67eSachartre struct dk_geom *geom) 2913bae9e67eSachartre { 2914bae9e67eSachartre int i; 2915bae9e67eSachartre 2916bae9e67eSachartre bzero(vtoc, sizeof (struct vtoc)); 2917bae9e67eSachartre bzero(geom, sizeof (struct dk_geom)); 2918bae9e67eSachartre 2919bae9e67eSachartre geom->dkg_ncyl = label->dkl_ncyl; 2920bae9e67eSachartre geom->dkg_acyl = label->dkl_acyl; 2921bae9e67eSachartre geom->dkg_nhead = label->dkl_nhead; 2922bae9e67eSachartre geom->dkg_nsect = label->dkl_nsect; 2923bae9e67eSachartre geom->dkg_intrlv = label->dkl_intrlv; 2924bae9e67eSachartre geom->dkg_apc = label->dkl_apc; 2925bae9e67eSachartre geom->dkg_rpm = label->dkl_rpm; 2926bae9e67eSachartre geom->dkg_pcyl = label->dkl_pcyl; 2927bae9e67eSachartre geom->dkg_write_reinstruct = label->dkl_write_reinstruct; 2928bae9e67eSachartre geom->dkg_read_reinstruct = label->dkl_read_reinstruct; 2929bae9e67eSachartre 2930bae9e67eSachartre vtoc->v_sanity = label->dkl_vtoc.v_sanity; 2931bae9e67eSachartre vtoc->v_version = label->dkl_vtoc.v_version; 2932bae9e67eSachartre vtoc->v_sectorsz = DEV_BSIZE; 2933bae9e67eSachartre vtoc->v_nparts = label->dkl_vtoc.v_nparts; 2934bae9e67eSachartre 2935bae9e67eSachartre for (i = 0; i < vtoc->v_nparts; i++) { 2936bae9e67eSachartre vtoc->v_part[i].p_tag = label->dkl_vtoc.v_part[i].p_tag; 2937bae9e67eSachartre vtoc->v_part[i].p_flag = label->dkl_vtoc.v_part[i].p_flag; 2938bae9e67eSachartre vtoc->v_part[i].p_start = label->dkl_map[i].dkl_cylno * 2939bae9e67eSachartre (label->dkl_nhead * label->dkl_nsect); 2940bae9e67eSachartre vtoc->v_part[i].p_size = label->dkl_map[i].dkl_nblk; 2941bae9e67eSachartre vtoc->timestamp[i] = label->dkl_vtoc.v_timestamp[i]; 2942bae9e67eSachartre } 2943bae9e67eSachartre 2944bae9e67eSachartre /* 2945bae9e67eSachartre * The bootinfo array can not be copied with bcopy() because 2946bae9e67eSachartre * elements are of type long in vtoc (so 64-bit) and of type 2947bae9e67eSachartre * int in dk_vtoc (so 32-bit). 2948bae9e67eSachartre */ 2949bae9e67eSachartre vtoc->v_bootinfo[0] = label->dkl_vtoc.v_bootinfo[0]; 2950bae9e67eSachartre vtoc->v_bootinfo[1] = label->dkl_vtoc.v_bootinfo[1]; 2951bae9e67eSachartre vtoc->v_bootinfo[2] = label->dkl_vtoc.v_bootinfo[2]; 2952bae9e67eSachartre bcopy(label->dkl_asciilabel, vtoc->v_asciilabel, LEN_DKL_ASCII); 2953bae9e67eSachartre bcopy(label->dkl_vtoc.v_volume, vtoc->v_volume, LEN_DKL_VVOL); 2954bae9e67eSachartre } 2955bae9e67eSachartre 2956bae9e67eSachartre /* 2957bae9e67eSachartre * Check if a geometry is valid for a single-slice disk. A geometry is 2958bae9e67eSachartre * considered valid if the main attributes of the geometry match with the 2959bae9e67eSachartre * attributes of the fake geometry we have created. 2960bae9e67eSachartre */ 2961bae9e67eSachartre static boolean_t 2962bae9e67eSachartre vd_slice_geom_isvalid(vd_t *vd, struct dk_geom *geom) 2963bae9e67eSachartre { 2964bae9e67eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 2965bae9e67eSachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 2966bae9e67eSachartre 2967bae9e67eSachartre if (geom->dkg_ncyl != vd->dk_geom.dkg_ncyl || 2968bae9e67eSachartre geom->dkg_acyl != vd->dk_geom.dkg_acyl || 2969bae9e67eSachartre geom->dkg_nsect != vd->dk_geom.dkg_nsect || 2970bae9e67eSachartre geom->dkg_pcyl != vd->dk_geom.dkg_pcyl) 2971bae9e67eSachartre return (B_FALSE); 2972bae9e67eSachartre 2973bae9e67eSachartre return (B_TRUE); 2974bae9e67eSachartre } 2975bae9e67eSachartre 2976bae9e67eSachartre /* 2977bae9e67eSachartre * Check if a vtoc is valid for a single-slice disk. A vtoc is considered 2978bae9e67eSachartre * valid if the main attributes of the vtoc match with the attributes of the 2979bae9e67eSachartre * fake vtoc we have created. 2980bae9e67eSachartre */ 2981bae9e67eSachartre static boolean_t 2982342440ecSPrasad Singamsetty vd_slice_vtoc_isvalid(vd_t *vd, struct extvtoc *vtoc) 2983bae9e67eSachartre { 2984bae9e67eSachartre size_t csize; 2985bae9e67eSachartre int i; 2986bae9e67eSachartre 2987bae9e67eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 2988bae9e67eSachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 2989bae9e67eSachartre 2990bae9e67eSachartre if (vtoc->v_sanity != vd->vtoc.v_sanity || 2991bae9e67eSachartre vtoc->v_version != vd->vtoc.v_version || 2992bae9e67eSachartre vtoc->v_nparts != vd->vtoc.v_nparts || 2993bae9e67eSachartre strcmp(vtoc->v_volume, vd->vtoc.v_volume) != 0 || 2994bae9e67eSachartre strcmp(vtoc->v_asciilabel, vd->vtoc.v_asciilabel) != 0) 2995bae9e67eSachartre return (B_FALSE); 2996bae9e67eSachartre 2997bae9e67eSachartre /* slice 2 should be unchanged */ 2998bae9e67eSachartre if (vtoc->v_part[VD_ENTIRE_DISK_SLICE].p_start != 2999bae9e67eSachartre vd->vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_start || 3000bae9e67eSachartre vtoc->v_part[VD_ENTIRE_DISK_SLICE].p_size != 3001bae9e67eSachartre vd->vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_size) 3002bae9e67eSachartre return (B_FALSE); 3003bae9e67eSachartre 3004bae9e67eSachartre /* 3005bae9e67eSachartre * Slice 0 should be mostly unchanged and cover most of the disk. 3006bae9e67eSachartre * However we allow some flexibility wrt to the start and the size 3007bae9e67eSachartre * of this slice mainly because we can't exactly know how it will 3008bae9e67eSachartre * be defined by the OS installer. 3009bae9e67eSachartre * 3010bae9e67eSachartre * We allow slice 0 to be defined as starting on any of the first 3011bae9e67eSachartre * 4 cylinders. 3012bae9e67eSachartre */ 3013bae9e67eSachartre csize = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect; 3014bae9e67eSachartre 3015bae9e67eSachartre if (vtoc->v_part[0].p_start > 4 * csize || 3016bae9e67eSachartre vtoc->v_part[0].p_size > vtoc->v_part[VD_ENTIRE_DISK_SLICE].p_size) 3017bae9e67eSachartre return (B_FALSE); 3018bae9e67eSachartre 3019bae9e67eSachartre if (vd->vtoc.v_part[0].p_size >= 4 * csize && 3020bae9e67eSachartre vtoc->v_part[0].p_size < vd->vtoc.v_part[0].p_size - 4 *csize) 3021bae9e67eSachartre return (B_FALSE); 3022bae9e67eSachartre 3023bae9e67eSachartre /* any other slice should have a size of 0 */ 3024bae9e67eSachartre for (i = 1; i < vtoc->v_nparts; i++) { 3025bae9e67eSachartre if (i != VD_ENTIRE_DISK_SLICE && 3026bae9e67eSachartre vtoc->v_part[i].p_size != 0) 3027bae9e67eSachartre return (B_FALSE); 3028bae9e67eSachartre } 3029bae9e67eSachartre 3030bae9e67eSachartre return (B_TRUE); 3031bae9e67eSachartre } 3032bae9e67eSachartre 3033bae9e67eSachartre /* 303487a7269eSachartre * Handle ioctls to a disk slice. 3035205eeb1aSlm66018 * 3036205eeb1aSlm66018 * Return Values 3037205eeb1aSlm66018 * 0 - Indicates that there are no errors in disk operations 3038205eeb1aSlm66018 * ENOTSUP - Unknown disk label type or unsupported DKIO ioctl 3039205eeb1aSlm66018 * EINVAL - Not enough room to copy the EFI label 3040205eeb1aSlm66018 * 304187a7269eSachartre */ 30421ae08745Sheppo static int 30430a55fbb7Slm66018 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 30441ae08745Sheppo { 30454bac2208Snarayan dk_efi_t *dk_ioc; 3046342440ecSPrasad Singamsetty struct extvtoc *vtoc; 3047bae9e67eSachartre struct dk_geom *geom; 3048bae9e67eSachartre size_t len, lba; 3049edcc0754Sachartre 3050edcc0754Sachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 3051edcc0754Sachartre 305283990c4aSAlexandre Chartre if (cmd == DKIOCFLUSHWRITECACHE) 305383990c4aSAlexandre Chartre return (vd_flush_write(vd)); 30544bac2208Snarayan 30554bac2208Snarayan switch (vd->vdisk_label) { 30564bac2208Snarayan 3057edcc0754Sachartre /* ioctls for a single slice disk with a VTOC label */ 30584bac2208Snarayan case VD_DISK_LABEL_VTOC: 30594bac2208Snarayan 30601ae08745Sheppo switch (cmd) { 3061bae9e67eSachartre 30621ae08745Sheppo case DKIOCGGEOM: 30630a55fbb7Slm66018 ASSERT(ioctl_arg != NULL); 30640a55fbb7Slm66018 bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); 30651ae08745Sheppo return (0); 3066bae9e67eSachartre 3067342440ecSPrasad Singamsetty case DKIOCGEXTVTOC: 30680a55fbb7Slm66018 ASSERT(ioctl_arg != NULL); 30690a55fbb7Slm66018 bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); 30701ae08745Sheppo return (0); 3071bae9e67eSachartre 3072bae9e67eSachartre case DKIOCSGEOM: 3073bae9e67eSachartre ASSERT(ioctl_arg != NULL); 3074bae9e67eSachartre if (vd_slice_single_slice) 3075bae9e67eSachartre return (ENOTSUP); 3076bae9e67eSachartre 3077bae9e67eSachartre /* fake success only if new geometry is valid */ 3078bae9e67eSachartre geom = (struct dk_geom *)ioctl_arg; 3079bae9e67eSachartre if (!vd_slice_geom_isvalid(vd, geom)) 3080bae9e67eSachartre return (EINVAL); 3081bae9e67eSachartre 3082bae9e67eSachartre return (0); 3083bae9e67eSachartre 3084342440ecSPrasad Singamsetty case DKIOCSEXTVTOC: 3085bae9e67eSachartre ASSERT(ioctl_arg != NULL); 3086bae9e67eSachartre if (vd_slice_single_slice) 3087bae9e67eSachartre return (ENOTSUP); 3088bae9e67eSachartre 3089bae9e67eSachartre /* fake sucess only if the new vtoc is valid */ 3090342440ecSPrasad Singamsetty vtoc = (struct extvtoc *)ioctl_arg; 3091bae9e67eSachartre if (!vd_slice_vtoc_isvalid(vd, vtoc)) 3092bae9e67eSachartre return (EINVAL); 3093bae9e67eSachartre 3094bae9e67eSachartre return (0); 3095bae9e67eSachartre 309687a7269eSachartre default: 30973c96341aSnarayan return (ENOTSUP); 309887a7269eSachartre } 309987a7269eSachartre 3100edcc0754Sachartre /* ioctls for a single slice disk with an EFI label */ 310187a7269eSachartre case VD_DISK_LABEL_EFI: 310287a7269eSachartre 3103bae9e67eSachartre if (cmd != DKIOCGETEFI && cmd != DKIOCSETEFI) 3104bae9e67eSachartre return (ENOTSUP); 3105bae9e67eSachartre 31063c96341aSnarayan ASSERT(ioctl_arg != NULL); 310787a7269eSachartre dk_ioc = (dk_efi_t *)ioctl_arg; 3108edcc0754Sachartre 3109bae9e67eSachartre len = dk_ioc->dki_length; 3110bae9e67eSachartre lba = dk_ioc->dki_lba; 3111edcc0754Sachartre 3112bae9e67eSachartre if ((lba != VD_EFI_LBA_GPT && lba != VD_EFI_LBA_GPE) || 3113bae9e67eSachartre (lba == VD_EFI_LBA_GPT && len < sizeof (efi_gpt_t)) || 3114bae9e67eSachartre (lba == VD_EFI_LBA_GPE && len < sizeof (efi_gpe_t))) 311587a7269eSachartre return (EINVAL); 3116edcc0754Sachartre 3117bae9e67eSachartre switch (cmd) { 3118bae9e67eSachartre case DKIOCGETEFI: 3119bae9e67eSachartre len = vd_slice_flabel_read(vd, 3120*65908c77Syu, larry liu - Sun Microsystems - Beijing China (caddr_t)dk_ioc->dki_data, 3121*65908c77Syu, larry liu - Sun Microsystems - Beijing China lba * vd->vdisk_bsize, len); 3122edcc0754Sachartre 3123bae9e67eSachartre ASSERT(len > 0); 3124edcc0754Sachartre 312587a7269eSachartre return (0); 3126bae9e67eSachartre 3127bae9e67eSachartre case DKIOCSETEFI: 3128bae9e67eSachartre if (vd_slice_single_slice) 312987a7269eSachartre return (ENOTSUP); 3130bae9e67eSachartre 3131bae9e67eSachartre /* we currently don't support writing EFI */ 3132bae9e67eSachartre return (EIO); 313387a7269eSachartre } 313487a7269eSachartre 313587a7269eSachartre default: 3136205eeb1aSlm66018 /* Unknown disk label type */ 313787a7269eSachartre return (ENOTSUP); 313887a7269eSachartre } 313987a7269eSachartre } 314087a7269eSachartre 3141edcc0754Sachartre static int 3142edcc0754Sachartre vds_efi_alloc_and_read(vd_t *vd, efi_gpt_t **gpt, efi_gpe_t **gpe) 3143edcc0754Sachartre { 3144edcc0754Sachartre vd_efi_dev_t edev; 3145edcc0754Sachartre int status; 3146edcc0754Sachartre 3147edcc0754Sachartre VD_EFI_DEV_SET(edev, vd, (vd_efi_ioctl_func)vd_backend_ioctl); 3148edcc0754Sachartre 3149edcc0754Sachartre status = vd_efi_alloc_and_read(&edev, gpt, gpe); 3150edcc0754Sachartre 3151edcc0754Sachartre return (status); 3152edcc0754Sachartre } 3153edcc0754Sachartre 3154edcc0754Sachartre static void 3155edcc0754Sachartre vds_efi_free(vd_t *vd, efi_gpt_t *gpt, efi_gpe_t *gpe) 3156edcc0754Sachartre { 3157edcc0754Sachartre vd_efi_dev_t edev; 3158edcc0754Sachartre 3159edcc0754Sachartre VD_EFI_DEV_SET(edev, vd, (vd_efi_ioctl_func)vd_backend_ioctl); 3160edcc0754Sachartre 3161edcc0754Sachartre vd_efi_free(&edev, gpt, gpe); 3162edcc0754Sachartre } 3163edcc0754Sachartre 3164edcc0754Sachartre static int 31651aff8f07SAlexandre Chartre vd_dskimg_validate_efi(vd_t *vd) 3166edcc0754Sachartre { 3167edcc0754Sachartre efi_gpt_t *gpt; 3168edcc0754Sachartre efi_gpe_t *gpe; 3169edcc0754Sachartre int i, nparts, status; 3170edcc0754Sachartre struct uuid efi_reserved = EFI_RESERVED; 3171edcc0754Sachartre 3172edcc0754Sachartre if ((status = vds_efi_alloc_and_read(vd, &gpt, &gpe)) != 0) 3173edcc0754Sachartre return (status); 3174edcc0754Sachartre 3175342440ecSPrasad Singamsetty bzero(&vd->vtoc, sizeof (struct extvtoc)); 3176edcc0754Sachartre bzero(&vd->dk_geom, sizeof (struct dk_geom)); 3177edcc0754Sachartre bzero(vd->slices, sizeof (vd_slice_t) * VD_MAXPART); 3178edcc0754Sachartre 3179edcc0754Sachartre vd->efi_reserved = -1; 3180edcc0754Sachartre 3181edcc0754Sachartre nparts = gpt->efi_gpt_NumberOfPartitionEntries; 3182edcc0754Sachartre 3183edcc0754Sachartre for (i = 0; i < nparts && i < VD_MAXPART; i++) { 3184edcc0754Sachartre 3185d84f0041SAlexandre Chartre if (gpe[i].efi_gpe_StartingLBA == 0 && 3186edcc0754Sachartre gpe[i].efi_gpe_EndingLBA == 0) { 3187edcc0754Sachartre continue; 3188edcc0754Sachartre } 3189edcc0754Sachartre 3190edcc0754Sachartre vd->slices[i].start = gpe[i].efi_gpe_StartingLBA; 3191edcc0754Sachartre vd->slices[i].nblocks = gpe[i].efi_gpe_EndingLBA - 3192edcc0754Sachartre gpe[i].efi_gpe_StartingLBA + 1; 3193edcc0754Sachartre 3194edcc0754Sachartre if (bcmp(&gpe[i].efi_gpe_PartitionTypeGUID, &efi_reserved, 3195edcc0754Sachartre sizeof (struct uuid)) == 0) 3196edcc0754Sachartre vd->efi_reserved = i; 3197edcc0754Sachartre 3198edcc0754Sachartre } 3199edcc0754Sachartre 3200edcc0754Sachartre ASSERT(vd->vdisk_size != 0); 3201edcc0754Sachartre vd->slices[VD_EFI_WD_SLICE].start = 0; 3202edcc0754Sachartre vd->slices[VD_EFI_WD_SLICE].nblocks = vd->vdisk_size; 3203edcc0754Sachartre 3204edcc0754Sachartre vds_efi_free(vd, gpt, gpe); 3205edcc0754Sachartre 3206edcc0754Sachartre return (status); 3207edcc0754Sachartre } 3208edcc0754Sachartre 320987a7269eSachartre /* 321078fcd0a1Sachartre * Function: 32111aff8f07SAlexandre Chartre * vd_dskimg_validate_geometry 3212205eeb1aSlm66018 * 321378fcd0a1Sachartre * Description: 321478fcd0a1Sachartre * Read the label and validate the geometry of a disk image. The driver 321578fcd0a1Sachartre * label, vtoc and geometry information are updated according to the 321678fcd0a1Sachartre * label read from the disk image. 321778fcd0a1Sachartre * 321878fcd0a1Sachartre * If no valid label is found, the label is set to unknown and the 321978fcd0a1Sachartre * function returns EINVAL, but a default vtoc and geometry are provided 3220edcc0754Sachartre * to the driver. If an EFI label is found, ENOTSUP is returned. 322178fcd0a1Sachartre * 322278fcd0a1Sachartre * Parameters: 322378fcd0a1Sachartre * vd - disk on which the operation is performed. 322478fcd0a1Sachartre * 322578fcd0a1Sachartre * Return Code: 322678fcd0a1Sachartre * 0 - success. 322778fcd0a1Sachartre * EIO - error reading the label from the disk image. 322878fcd0a1Sachartre * EINVAL - unknown disk label. 3229edcc0754Sachartre * ENOTSUP - geometry not applicable (EFI label). 323087a7269eSachartre */ 323187a7269eSachartre static int 32321aff8f07SAlexandre Chartre vd_dskimg_validate_geometry(vd_t *vd) 323387a7269eSachartre { 323487a7269eSachartre struct dk_label label; 323578fcd0a1Sachartre struct dk_geom *geom = &vd->dk_geom; 3236342440ecSPrasad Singamsetty struct extvtoc *vtoc = &vd->vtoc; 323778fcd0a1Sachartre int i; 323878fcd0a1Sachartre int status = 0; 323987a7269eSachartre 32401aff8f07SAlexandre Chartre ASSERT(VD_DSKIMG(vd)); 324187a7269eSachartre 32421aff8f07SAlexandre Chartre if (VD_DSKIMG_LABEL_READ(vd, &label) < 0) 324387a7269eSachartre return (EIO); 324487a7269eSachartre 324587a7269eSachartre if (label.dkl_magic != DKL_MAGIC || 324678fcd0a1Sachartre label.dkl_cksum != vd_lbl2cksum(&label) || 32471aff8f07SAlexandre Chartre (vd_dskimg_validate_sanity && 32481aff8f07SAlexandre Chartre label.dkl_vtoc.v_sanity != VTOC_SANE) || 324978fcd0a1Sachartre label.dkl_vtoc.v_nparts != V_NUMPAR) { 3250edcc0754Sachartre 32511aff8f07SAlexandre Chartre if (vd_dskimg_validate_efi(vd) == 0) { 3252edcc0754Sachartre vd->vdisk_label = VD_DISK_LABEL_EFI; 3253edcc0754Sachartre return (ENOTSUP); 3254edcc0754Sachartre } 3255edcc0754Sachartre 325678fcd0a1Sachartre vd->vdisk_label = VD_DISK_LABEL_UNK; 3257*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd_build_default_label(vd->dskimg_size, vd->vdisk_bsize, 3258*65908c77Syu, larry liu - Sun Microsystems - Beijing China &label); 325978fcd0a1Sachartre status = EINVAL; 326078fcd0a1Sachartre } else { 326178fcd0a1Sachartre vd->vdisk_label = VD_DISK_LABEL_VTOC; 326278fcd0a1Sachartre } 326387a7269eSachartre 3264bae9e67eSachartre /* Update the driver geometry and vtoc */ 3265bae9e67eSachartre vd_label_to_vtocgeom(&label, vtoc, geom); 326687a7269eSachartre 3267edcc0754Sachartre /* Update logical partitions */ 3268edcc0754Sachartre bzero(vd->slices, sizeof (vd_slice_t) * VD_MAXPART); 3269edcc0754Sachartre if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 3270edcc0754Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 3271edcc0754Sachartre vd->slices[i].start = vtoc->v_part[i].p_start; 3272edcc0754Sachartre vd->slices[i].nblocks = vtoc->v_part[i].p_size; 3273edcc0754Sachartre } 3274edcc0754Sachartre } 3275edcc0754Sachartre 327678fcd0a1Sachartre return (status); 327778fcd0a1Sachartre } 327878fcd0a1Sachartre 327978fcd0a1Sachartre /* 32801aff8f07SAlexandre Chartre * Handle ioctls to a disk image. 328178fcd0a1Sachartre * 328278fcd0a1Sachartre * Return Values 328378fcd0a1Sachartre * 0 - Indicates that there are no errors 328478fcd0a1Sachartre * != 0 - Disk operation returned an error 328578fcd0a1Sachartre */ 328678fcd0a1Sachartre static int 32871aff8f07SAlexandre Chartre vd_do_dskimg_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 328878fcd0a1Sachartre { 328978fcd0a1Sachartre struct dk_label label; 329078fcd0a1Sachartre struct dk_geom *geom; 3291342440ecSPrasad Singamsetty struct extvtoc *vtoc; 3292edcc0754Sachartre dk_efi_t *efi; 329383990c4aSAlexandre Chartre int rc; 329478fcd0a1Sachartre 32951aff8f07SAlexandre Chartre ASSERT(VD_DSKIMG(vd)); 329678fcd0a1Sachartre 329778fcd0a1Sachartre switch (cmd) { 329878fcd0a1Sachartre 329978fcd0a1Sachartre case DKIOCGGEOM: 330078fcd0a1Sachartre ASSERT(ioctl_arg != NULL); 330178fcd0a1Sachartre geom = (struct dk_geom *)ioctl_arg; 330278fcd0a1Sachartre 33031aff8f07SAlexandre Chartre rc = vd_dskimg_validate_geometry(vd); 3304edcc0754Sachartre if (rc != 0 && rc != EINVAL) 330578fcd0a1Sachartre return (rc); 330678fcd0a1Sachartre bcopy(&vd->dk_geom, geom, sizeof (struct dk_geom)); 330778fcd0a1Sachartre return (0); 330878fcd0a1Sachartre 3309342440ecSPrasad Singamsetty case DKIOCGEXTVTOC: 331078fcd0a1Sachartre ASSERT(ioctl_arg != NULL); 3311342440ecSPrasad Singamsetty vtoc = (struct extvtoc *)ioctl_arg; 331278fcd0a1Sachartre 33131aff8f07SAlexandre Chartre rc = vd_dskimg_validate_geometry(vd); 3314edcc0754Sachartre if (rc != 0 && rc != EINVAL) 331578fcd0a1Sachartre return (rc); 3316342440ecSPrasad Singamsetty bcopy(&vd->vtoc, vtoc, sizeof (struct extvtoc)); 331787a7269eSachartre return (0); 331887a7269eSachartre 331987a7269eSachartre case DKIOCSGEOM: 332087a7269eSachartre ASSERT(ioctl_arg != NULL); 332187a7269eSachartre geom = (struct dk_geom *)ioctl_arg; 332287a7269eSachartre 332387a7269eSachartre if (geom->dkg_nhead == 0 || geom->dkg_nsect == 0) 332487a7269eSachartre return (EINVAL); 332587a7269eSachartre 332687a7269eSachartre /* 332787a7269eSachartre * The current device geometry is not updated, just the driver 332887a7269eSachartre * "notion" of it. The device geometry will be effectively 332987a7269eSachartre * updated when a label is written to the device during a next 3330342440ecSPrasad Singamsetty * DKIOCSEXTVTOC. 333187a7269eSachartre */ 333287a7269eSachartre bcopy(ioctl_arg, &vd->dk_geom, sizeof (vd->dk_geom)); 333387a7269eSachartre return (0); 333487a7269eSachartre 3335342440ecSPrasad Singamsetty case DKIOCSEXTVTOC: 333687a7269eSachartre ASSERT(ioctl_arg != NULL); 333787a7269eSachartre ASSERT(vd->dk_geom.dkg_nhead != 0 && 333887a7269eSachartre vd->dk_geom.dkg_nsect != 0); 3339342440ecSPrasad Singamsetty vtoc = (struct extvtoc *)ioctl_arg; 3340690555a1Sachartre 3341690555a1Sachartre if (vtoc->v_sanity != VTOC_SANE || 3342690555a1Sachartre vtoc->v_sectorsz != DEV_BSIZE || 3343690555a1Sachartre vtoc->v_nparts != V_NUMPAR) 3344690555a1Sachartre return (EINVAL); 3345690555a1Sachartre 3346bae9e67eSachartre vd_vtocgeom_to_label(vtoc, &vd->dk_geom, &label); 3347690555a1Sachartre 334887a7269eSachartre /* write label to the disk image */ 33491aff8f07SAlexandre Chartre if ((rc = vd_dskimg_set_vtoc(vd, &label)) != 0) 335087a7269eSachartre return (rc); 3351690555a1Sachartre 3352edcc0754Sachartre break; 3353edcc0754Sachartre 3354edcc0754Sachartre case DKIOCFLUSHWRITECACHE: 335583990c4aSAlexandre Chartre return (vd_flush_write(vd)); 3356edcc0754Sachartre 3357edcc0754Sachartre case DKIOCGETEFI: 3358edcc0754Sachartre ASSERT(ioctl_arg != NULL); 3359edcc0754Sachartre efi = (dk_efi_t *)ioctl_arg; 3360edcc0754Sachartre 33611aff8f07SAlexandre Chartre if (vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, 3362edcc0754Sachartre (caddr_t)efi->dki_data, efi->dki_lba, efi->dki_length) < 0) 3363edcc0754Sachartre return (EIO); 3364edcc0754Sachartre 3365edcc0754Sachartre return (0); 3366edcc0754Sachartre 3367edcc0754Sachartre case DKIOCSETEFI: 3368edcc0754Sachartre ASSERT(ioctl_arg != NULL); 3369edcc0754Sachartre efi = (dk_efi_t *)ioctl_arg; 3370edcc0754Sachartre 33711aff8f07SAlexandre Chartre if (vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, 3372edcc0754Sachartre (caddr_t)efi->dki_data, efi->dki_lba, efi->dki_length) < 0) 3373edcc0754Sachartre return (EIO); 3374edcc0754Sachartre 3375edcc0754Sachartre break; 3376edcc0754Sachartre 3377edcc0754Sachartre 3378edcc0754Sachartre default: 3379edcc0754Sachartre return (ENOTSUP); 3380edcc0754Sachartre } 3381edcc0754Sachartre 3382342440ecSPrasad Singamsetty ASSERT(cmd == DKIOCSEXTVTOC || cmd == DKIOCSETEFI); 3383edcc0754Sachartre 3384edcc0754Sachartre /* label has changed, revalidate the geometry */ 33851aff8f07SAlexandre Chartre (void) vd_dskimg_validate_geometry(vd); 33863c96341aSnarayan 338787a7269eSachartre /* 338887a7269eSachartre * The disk geometry may have changed, so we need to write 338987a7269eSachartre * the devid (if there is one) so that it is stored at the 339087a7269eSachartre * right location. 339187a7269eSachartre */ 33921aff8f07SAlexandre Chartre if (vd_dskimg_write_devid(vd, vd->dskimg_devid) != 0) { 339387a7269eSachartre PR0("Fail to write devid"); 33941ae08745Sheppo } 33954bac2208Snarayan 33964bac2208Snarayan return (0); 33974bac2208Snarayan } 3398edcc0754Sachartre 3399edcc0754Sachartre static int 3400edcc0754Sachartre vd_backend_ioctl(vd_t *vd, int cmd, caddr_t arg) 3401edcc0754Sachartre { 3402edcc0754Sachartre int rval = 0, status; 3403342440ecSPrasad Singamsetty struct vtoc vtoc; 3404edcc0754Sachartre 3405edcc0754Sachartre /* 3406edcc0754Sachartre * Call the appropriate function to execute the ioctl depending 3407edcc0754Sachartre * on the type of vdisk. 3408edcc0754Sachartre */ 3409edcc0754Sachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 3410edcc0754Sachartre 3411edcc0754Sachartre /* slice, file or volume exported as a single slice disk */ 3412edcc0754Sachartre status = vd_do_slice_ioctl(vd, cmd, arg); 3413edcc0754Sachartre 34141aff8f07SAlexandre Chartre } else if (VD_DSKIMG(vd)) { 3415edcc0754Sachartre 3416edcc0754Sachartre /* file or volume exported as a full disk */ 34171aff8f07SAlexandre Chartre status = vd_do_dskimg_ioctl(vd, cmd, arg); 3418edcc0754Sachartre 3419edcc0754Sachartre } else { 3420edcc0754Sachartre 3421edcc0754Sachartre /* disk device exported as a full disk */ 3422edcc0754Sachartre status = ldi_ioctl(vd->ldi_handle[0], cmd, (intptr_t)arg, 3423edcc0754Sachartre vd->open_flags | FKIOCTL, kcred, &rval); 3424342440ecSPrasad Singamsetty 3425342440ecSPrasad Singamsetty /* 3426342440ecSPrasad Singamsetty * By default VTOC ioctls are done using ioctls for the 3427342440ecSPrasad Singamsetty * extended VTOC. Some drivers (in particular non-Sun drivers) 3428342440ecSPrasad Singamsetty * may not support these ioctls. In that case, we fallback to 3429342440ecSPrasad Singamsetty * the regular VTOC ioctls. 3430342440ecSPrasad Singamsetty */ 3431342440ecSPrasad Singamsetty if (status == ENOTTY) { 3432342440ecSPrasad Singamsetty switch (cmd) { 3433342440ecSPrasad Singamsetty 3434342440ecSPrasad Singamsetty case DKIOCGEXTVTOC: 3435342440ecSPrasad Singamsetty cmd = DKIOCGVTOC; 3436342440ecSPrasad Singamsetty status = ldi_ioctl(vd->ldi_handle[0], cmd, 3437342440ecSPrasad Singamsetty (intptr_t)&vtoc, vd->open_flags | FKIOCTL, 3438342440ecSPrasad Singamsetty kcred, &rval); 3439342440ecSPrasad Singamsetty vtoctoextvtoc(vtoc, 3440342440ecSPrasad Singamsetty (*(struct extvtoc *)(void *)arg)); 3441342440ecSPrasad Singamsetty break; 3442342440ecSPrasad Singamsetty 3443342440ecSPrasad Singamsetty case DKIOCSEXTVTOC: 3444342440ecSPrasad Singamsetty cmd = DKIOCSVTOC; 3445342440ecSPrasad Singamsetty extvtoctovtoc((*(struct extvtoc *)(void *)arg), 3446342440ecSPrasad Singamsetty vtoc); 3447342440ecSPrasad Singamsetty status = ldi_ioctl(vd->ldi_handle[0], cmd, 3448342440ecSPrasad Singamsetty (intptr_t)&vtoc, vd->open_flags | FKIOCTL, 3449342440ecSPrasad Singamsetty kcred, &rval); 3450342440ecSPrasad Singamsetty break; 3451342440ecSPrasad Singamsetty } 3452342440ecSPrasad Singamsetty } 3453edcc0754Sachartre } 3454edcc0754Sachartre 3455edcc0754Sachartre #ifdef DEBUG 3456edcc0754Sachartre if (rval != 0) { 3457edcc0754Sachartre PR0("ioctl %x set rval = %d, which is not being returned" 3458edcc0754Sachartre " to caller", cmd, rval); 3459edcc0754Sachartre } 3460edcc0754Sachartre #endif /* DEBUG */ 3461edcc0754Sachartre 3462edcc0754Sachartre return (status); 34631ae08745Sheppo } 34641ae08745Sheppo 3465205eeb1aSlm66018 /* 3466205eeb1aSlm66018 * Description: 3467205eeb1aSlm66018 * This is the function that processes the ioctl requests (farming it 3468205eeb1aSlm66018 * out to functions that handle slices, files or whole disks) 3469205eeb1aSlm66018 * 3470205eeb1aSlm66018 * Return Values 3471205eeb1aSlm66018 * 0 - ioctl operation completed successfully 3472205eeb1aSlm66018 * != 0 - The LDC error value encountered 3473205eeb1aSlm66018 * (propagated back up the call stack as a task error) 3474205eeb1aSlm66018 * 3475205eeb1aSlm66018 * Side Effect 3476205eeb1aSlm66018 * sets request->status to the return value of the ioctl function. 3477205eeb1aSlm66018 */ 34781ae08745Sheppo static int 34790a55fbb7Slm66018 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) 34801ae08745Sheppo { 3481edcc0754Sachartre int status = 0; 34821ae08745Sheppo size_t nbytes = request->nbytes; /* modifiable copy */ 34831ae08745Sheppo 34841ae08745Sheppo 34851ae08745Sheppo ASSERT(request->slice < vd->nslices); 34861ae08745Sheppo PR0("Performing %s", ioctl->operation_name); 34871ae08745Sheppo 34880a55fbb7Slm66018 /* Get data from client and convert, if necessary */ 34890a55fbb7Slm66018 if (ioctl->copyin != NULL) { 34901ae08745Sheppo ASSERT(nbytes != 0 && buf != NULL); 34911ae08745Sheppo PR1("Getting \"arg\" data from client"); 34921ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 34931ae08745Sheppo request->cookie, request->ncookies, 34941ae08745Sheppo LDC_COPY_IN)) != 0) { 34953af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d " 34961ae08745Sheppo "copying from client", status); 34971ae08745Sheppo return (status); 34981ae08745Sheppo } 34990a55fbb7Slm66018 35000a55fbb7Slm66018 /* Convert client's data, if necessary */ 35012f5224aeSachartre if (ioctl->copyin == VD_IDENTITY_IN) { 35022f5224aeSachartre /* use client buffer */ 35030a55fbb7Slm66018 ioctl->arg = buf; 35042f5224aeSachartre } else { 35052f5224aeSachartre /* convert client vdisk operation data to ioctl data */ 35062f5224aeSachartre status = (ioctl->copyin)(buf, nbytes, 35072f5224aeSachartre (void *)ioctl->arg); 35082f5224aeSachartre if (status != 0) { 35092f5224aeSachartre request->status = status; 35102f5224aeSachartre return (0); 35112f5224aeSachartre } 35122f5224aeSachartre } 35132f5224aeSachartre } 35142f5224aeSachartre 35152f5224aeSachartre if (ioctl->operation == VD_OP_SCSICMD) { 35162f5224aeSachartre struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl->arg; 35172f5224aeSachartre 35182f5224aeSachartre /* check write permission */ 35192f5224aeSachartre if (!(vd->open_flags & FWRITE) && 35202f5224aeSachartre !(uscsi->uscsi_flags & USCSI_READ)) { 35212f5224aeSachartre PR0("uscsi fails because backend is opened read-only"); 35222f5224aeSachartre request->status = EROFS; 35232f5224aeSachartre return (0); 35242f5224aeSachartre } 35251ae08745Sheppo } 35261ae08745Sheppo 35271ae08745Sheppo /* 3528edcc0754Sachartre * Send the ioctl to the disk backend. 35291ae08745Sheppo */ 3530edcc0754Sachartre request->status = vd_backend_ioctl(vd, ioctl->cmd, ioctl->arg); 3531205eeb1aSlm66018 3532205eeb1aSlm66018 if (request->status != 0) { 3533205eeb1aSlm66018 PR0("ioctl(%s) = errno %d", ioctl->cmd_name, request->status); 35342f5224aeSachartre if (ioctl->operation == VD_OP_SCSICMD && 35352f5224aeSachartre ((struct uscsi_cmd *)ioctl->arg)->uscsi_status != 0) 35362f5224aeSachartre /* 35372f5224aeSachartre * USCSICMD has reported an error and the uscsi_status 35382f5224aeSachartre * field is not zero. This means that the SCSI command 35392f5224aeSachartre * has completed but it has an error. So we should 35402f5224aeSachartre * mark the VD operation has succesfully completed 35412f5224aeSachartre * and clients can check the SCSI status field for 35422f5224aeSachartre * SCSI errors. 35432f5224aeSachartre */ 35442f5224aeSachartre request->status = 0; 35452f5224aeSachartre else 3546205eeb1aSlm66018 return (0); 3547205eeb1aSlm66018 } 35481ae08745Sheppo 35490a55fbb7Slm66018 /* Convert data and send to client, if necessary */ 35500a55fbb7Slm66018 if (ioctl->copyout != NULL) { 35511ae08745Sheppo ASSERT(nbytes != 0 && buf != NULL); 35521ae08745Sheppo PR1("Sending \"arg\" data to client"); 35530a55fbb7Slm66018 35540a55fbb7Slm66018 /* Convert ioctl data to vdisk operation data, if necessary */ 35552f5224aeSachartre if (ioctl->copyout != VD_IDENTITY_OUT) 35560a55fbb7Slm66018 (ioctl->copyout)((void *)ioctl->arg, buf); 35570a55fbb7Slm66018 35581ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 35591ae08745Sheppo request->cookie, request->ncookies, 35601ae08745Sheppo LDC_COPY_OUT)) != 0) { 35613af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d " 35621ae08745Sheppo "copying to client", status); 35631ae08745Sheppo return (status); 35641ae08745Sheppo } 35651ae08745Sheppo } 35661ae08745Sheppo 35671ae08745Sheppo return (status); 35681ae08745Sheppo } 35691ae08745Sheppo 35701ae08745Sheppo #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 3571205eeb1aSlm66018 3572205eeb1aSlm66018 /* 3573205eeb1aSlm66018 * Description: 3574205eeb1aSlm66018 * This generic function is called by the task queue to complete 3575205eeb1aSlm66018 * the processing of the tasks. The specific completion function 3576205eeb1aSlm66018 * is passed in as a field in the task pointer. 3577205eeb1aSlm66018 * 3578205eeb1aSlm66018 * Parameters: 3579205eeb1aSlm66018 * arg - opaque pointer to structure containing task to be completed 3580205eeb1aSlm66018 * 3581205eeb1aSlm66018 * Return Values 3582205eeb1aSlm66018 * None 3583205eeb1aSlm66018 */ 3584205eeb1aSlm66018 static void 3585205eeb1aSlm66018 vd_complete(void *arg) 3586205eeb1aSlm66018 { 3587205eeb1aSlm66018 vd_task_t *task = (vd_task_t *)arg; 3588205eeb1aSlm66018 3589205eeb1aSlm66018 ASSERT(task != NULL); 3590205eeb1aSlm66018 ASSERT(task->status == EINPROGRESS); 3591205eeb1aSlm66018 ASSERT(task->completef != NULL); 3592205eeb1aSlm66018 3593205eeb1aSlm66018 task->status = task->completef(task); 3594205eeb1aSlm66018 if (task->status) 3595205eeb1aSlm66018 PR0("%s: Error %d completing task", __func__, task->status); 3596205eeb1aSlm66018 3597205eeb1aSlm66018 /* Now notify the vDisk client */ 3598205eeb1aSlm66018 vd_complete_notify(task); 3599205eeb1aSlm66018 } 3600205eeb1aSlm66018 36011ae08745Sheppo static int 3602d10e4ef2Snarayan vd_ioctl(vd_task_t *task) 36031ae08745Sheppo { 360487a7269eSachartre int i, status; 36051ae08745Sheppo void *buf = NULL; 36060a55fbb7Slm66018 struct dk_geom dk_geom = {0}; 3607342440ecSPrasad Singamsetty struct extvtoc vtoc = {0}; 36084bac2208Snarayan struct dk_efi dk_efi = {0}; 36092f5224aeSachartre struct uscsi_cmd uscsi = {0}; 3610d10e4ef2Snarayan vd_t *vd = task->vd; 3611d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 36120a55fbb7Slm66018 vd_ioctl_t ioctl[] = { 36130a55fbb7Slm66018 /* Command (no-copy) operations */ 36140a55fbb7Slm66018 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, 36150a55fbb7Slm66018 DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), 3616047ba61eSachartre NULL, NULL, NULL, B_TRUE}, 36170a55fbb7Slm66018 36180a55fbb7Slm66018 /* "Get" (copy-out) operations */ 36190a55fbb7Slm66018 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), 36200a55fbb7Slm66018 DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), 36212f5224aeSachartre NULL, VD_IDENTITY_IN, VD_IDENTITY_OUT, B_FALSE}, 36220a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), 36230a55fbb7Slm66018 RNDSIZE(vd_geom_t), 36240a55fbb7Slm66018 DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), 3625047ba61eSachartre &dk_geom, NULL, dk_geom2vd_geom, B_FALSE}, 36260a55fbb7Slm66018 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), 3627342440ecSPrasad Singamsetty DKIOCGEXTVTOC, STRINGIZE(DKIOCGEXTVTOC), 3628047ba61eSachartre &vtoc, NULL, vtoc2vd_vtoc, B_FALSE}, 36294bac2208Snarayan {VD_OP_GET_EFI, STRINGIZE(VD_OP_GET_EFI), RNDSIZE(vd_efi_t), 36304bac2208Snarayan DKIOCGETEFI, STRINGIZE(DKIOCGETEFI), 3631047ba61eSachartre &dk_efi, vd_get_efi_in, vd_get_efi_out, B_FALSE}, 36320a55fbb7Slm66018 36330a55fbb7Slm66018 /* "Set" (copy-in) operations */ 36340a55fbb7Slm66018 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), 36350a55fbb7Slm66018 DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), 36362f5224aeSachartre NULL, VD_IDENTITY_IN, VD_IDENTITY_OUT, B_TRUE}, 36370a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), 36380a55fbb7Slm66018 RNDSIZE(vd_geom_t), 36390a55fbb7Slm66018 DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), 3640047ba61eSachartre &dk_geom, vd_geom2dk_geom, NULL, B_TRUE}, 36410a55fbb7Slm66018 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), 3642342440ecSPrasad Singamsetty DKIOCSEXTVTOC, STRINGIZE(DKIOCSEXTVTOC), 3643047ba61eSachartre &vtoc, vd_vtoc2vtoc, NULL, B_TRUE}, 36444bac2208Snarayan {VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t), 36454bac2208Snarayan DKIOCSETEFI, STRINGIZE(DKIOCSETEFI), 3646047ba61eSachartre &dk_efi, vd_set_efi_in, vd_set_efi_out, B_TRUE}, 36472f5224aeSachartre 36482f5224aeSachartre {VD_OP_SCSICMD, STRINGIZE(VD_OP_SCSICMD), RNDSIZE(vd_scsi_t), 36492f5224aeSachartre USCSICMD, STRINGIZE(USCSICMD), 36502f5224aeSachartre &uscsi, vd_scsicmd_in, vd_scsicmd_out, B_FALSE}, 36510a55fbb7Slm66018 }; 36521ae08745Sheppo size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 36531ae08745Sheppo 36541ae08745Sheppo 3655d10e4ef2Snarayan ASSERT(vd != NULL); 3656d10e4ef2Snarayan ASSERT(request != NULL); 36571ae08745Sheppo ASSERT(request->slice < vd->nslices); 36581ae08745Sheppo 36591ae08745Sheppo /* 36601ae08745Sheppo * Determine ioctl corresponding to caller's "operation" and 36611ae08745Sheppo * validate caller's "nbytes" 36621ae08745Sheppo */ 36631ae08745Sheppo for (i = 0; i < nioctls; i++) { 36641ae08745Sheppo if (request->operation == ioctl[i].operation) { 36650a55fbb7Slm66018 /* LDC memory operations require 8-byte multiples */ 36660a55fbb7Slm66018 ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); 36670a55fbb7Slm66018 36684bac2208Snarayan if (request->operation == VD_OP_GET_EFI || 36692f5224aeSachartre request->operation == VD_OP_SET_EFI || 36702f5224aeSachartre request->operation == VD_OP_SCSICMD) { 36714bac2208Snarayan if (request->nbytes >= ioctl[i].nbytes) 36724bac2208Snarayan break; 36733af08d82Slm66018 PR0("%s: Expected at least nbytes = %lu, " 36744bac2208Snarayan "got %lu", ioctl[i].operation_name, 36754bac2208Snarayan ioctl[i].nbytes, request->nbytes); 36764bac2208Snarayan return (EINVAL); 36774bac2208Snarayan } 36784bac2208Snarayan 36790a55fbb7Slm66018 if (request->nbytes != ioctl[i].nbytes) { 36803af08d82Slm66018 PR0("%s: Expected nbytes = %lu, got %lu", 36810a55fbb7Slm66018 ioctl[i].operation_name, ioctl[i].nbytes, 36820a55fbb7Slm66018 request->nbytes); 36831ae08745Sheppo return (EINVAL); 36841ae08745Sheppo } 36851ae08745Sheppo 36861ae08745Sheppo break; 36871ae08745Sheppo } 36881ae08745Sheppo } 36891ae08745Sheppo ASSERT(i < nioctls); /* because "operation" already validated */ 36901ae08745Sheppo 3691047ba61eSachartre if (!(vd->open_flags & FWRITE) && ioctl[i].write) { 3692047ba61eSachartre PR0("%s fails because backend is opened read-only", 3693047ba61eSachartre ioctl[i].operation_name); 3694047ba61eSachartre request->status = EROFS; 3695047ba61eSachartre return (0); 3696047ba61eSachartre } 3697047ba61eSachartre 36981ae08745Sheppo if (request->nbytes) 36991ae08745Sheppo buf = kmem_zalloc(request->nbytes, KM_SLEEP); 37001ae08745Sheppo status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 37011ae08745Sheppo if (request->nbytes) 37021ae08745Sheppo kmem_free(buf, request->nbytes); 370387a7269eSachartre 37041ae08745Sheppo return (status); 37051ae08745Sheppo } 37061ae08745Sheppo 37074bac2208Snarayan static int 37084bac2208Snarayan vd_get_devid(vd_task_t *task) 37094bac2208Snarayan { 37104bac2208Snarayan vd_t *vd = task->vd; 37114bac2208Snarayan vd_dring_payload_t *request = task->request; 37124bac2208Snarayan vd_devid_t *vd_devid; 37134bac2208Snarayan impl_devid_t *devid; 371487a7269eSachartre int status, bufid_len, devid_len, len, sz; 37153af08d82Slm66018 int bufbytes; 37164bac2208Snarayan 37173af08d82Slm66018 PR1("Get Device ID, nbytes=%ld", request->nbytes); 37184bac2208Snarayan 3719bae9e67eSachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 3720bae9e67eSachartre /* 3721bae9e67eSachartre * We don't support devid for single-slice disks because we 3722bae9e67eSachartre * have no space to store a fabricated devid and for physical 3723bae9e67eSachartre * disk slices, we can't use the devid of the disk otherwise 3724bae9e67eSachartre * exporting multiple slices from the same disk will produce 3725bae9e67eSachartre * the same devids. 3726bae9e67eSachartre */ 3727bae9e67eSachartre PR2("No Device ID for slices"); 3728bae9e67eSachartre request->status = ENOTSUP; 3729bae9e67eSachartre return (0); 3730bae9e67eSachartre } 3731bae9e67eSachartre 37321aff8f07SAlexandre Chartre if (VD_DSKIMG(vd)) { 37331aff8f07SAlexandre Chartre if (vd->dskimg_devid == NULL) { 37343af08d82Slm66018 PR2("No Device ID"); 3735205eeb1aSlm66018 request->status = ENOENT; 3736205eeb1aSlm66018 return (0); 373787a7269eSachartre } else { 37381aff8f07SAlexandre Chartre sz = ddi_devid_sizeof(vd->dskimg_devid); 373987a7269eSachartre devid = kmem_alloc(sz, KM_SLEEP); 37401aff8f07SAlexandre Chartre bcopy(vd->dskimg_devid, devid, sz); 374187a7269eSachartre } 374287a7269eSachartre } else { 374387a7269eSachartre if (ddi_lyr_get_devid(vd->dev[request->slice], 374487a7269eSachartre (ddi_devid_t *)&devid) != DDI_SUCCESS) { 374587a7269eSachartre PR2("No Device ID"); 3746205eeb1aSlm66018 request->status = ENOENT; 3747205eeb1aSlm66018 return (0); 374887a7269eSachartre } 37494bac2208Snarayan } 37504bac2208Snarayan 37514bac2208Snarayan bufid_len = request->nbytes - sizeof (vd_devid_t) + 1; 37524bac2208Snarayan devid_len = DEVID_GETLEN(devid); 37534bac2208Snarayan 37543af08d82Slm66018 /* 37553af08d82Slm66018 * Save the buffer size here for use in deallocation. 37563af08d82Slm66018 * The actual number of bytes copied is returned in 37573af08d82Slm66018 * the 'nbytes' field of the request structure. 37583af08d82Slm66018 */ 37593af08d82Slm66018 bufbytes = request->nbytes; 37603af08d82Slm66018 37613af08d82Slm66018 vd_devid = kmem_zalloc(bufbytes, KM_SLEEP); 37624bac2208Snarayan vd_devid->length = devid_len; 37634bac2208Snarayan vd_devid->type = DEVID_GETTYPE(devid); 37644bac2208Snarayan 37654bac2208Snarayan len = (devid_len > bufid_len)? bufid_len : devid_len; 37664bac2208Snarayan 37674bac2208Snarayan bcopy(devid->did_id, vd_devid->id, len); 37684bac2208Snarayan 376978fcd0a1Sachartre request->status = 0; 377078fcd0a1Sachartre 37714bac2208Snarayan /* LDC memory operations require 8-byte multiples */ 37724bac2208Snarayan ASSERT(request->nbytes % sizeof (uint64_t) == 0); 37734bac2208Snarayan 37744bac2208Snarayan if ((status = ldc_mem_copy(vd->ldc_handle, (caddr_t)vd_devid, 0, 37754bac2208Snarayan &request->nbytes, request->cookie, request->ncookies, 37764bac2208Snarayan LDC_COPY_OUT)) != 0) { 37773af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d copying to client", 37784bac2208Snarayan status); 37794bac2208Snarayan } 37803af08d82Slm66018 PR1("post mem_copy: nbytes=%ld", request->nbytes); 37814bac2208Snarayan 37823af08d82Slm66018 kmem_free(vd_devid, bufbytes); 37834bac2208Snarayan ddi_devid_free((ddi_devid_t)devid); 37844bac2208Snarayan 37854bac2208Snarayan return (status); 37864bac2208Snarayan } 37874bac2208Snarayan 37882f5224aeSachartre static int 37892f5224aeSachartre vd_scsi_reset(vd_t *vd) 37902f5224aeSachartre { 37912f5224aeSachartre int rval, status; 37922f5224aeSachartre struct uscsi_cmd uscsi = { 0 }; 37932f5224aeSachartre 37942f5224aeSachartre uscsi.uscsi_flags = vd_scsi_debug | USCSI_RESET; 37952f5224aeSachartre uscsi.uscsi_timeout = vd_scsi_rdwr_timeout; 37962f5224aeSachartre 37972f5224aeSachartre status = ldi_ioctl(vd->ldi_handle[0], USCSICMD, (intptr_t)&uscsi, 37982f5224aeSachartre (vd->open_flags | FKIOCTL), kcred, &rval); 37992f5224aeSachartre 38002f5224aeSachartre return (status); 38012f5224aeSachartre } 38022f5224aeSachartre 38032f5224aeSachartre static int 38042f5224aeSachartre vd_reset(vd_task_t *task) 38052f5224aeSachartre { 38062f5224aeSachartre vd_t *vd = task->vd; 38072f5224aeSachartre vd_dring_payload_t *request = task->request; 38082f5224aeSachartre 38092f5224aeSachartre ASSERT(request->operation == VD_OP_RESET); 38102f5224aeSachartre ASSERT(vd->scsi); 38112f5224aeSachartre 38122f5224aeSachartre PR0("Performing VD_OP_RESET"); 38132f5224aeSachartre 38142f5224aeSachartre if (request->nbytes != 0) { 38152f5224aeSachartre PR0("VD_OP_RESET: Expected nbytes = 0, got %lu", 38162f5224aeSachartre request->nbytes); 38172f5224aeSachartre return (EINVAL); 38182f5224aeSachartre } 38192f5224aeSachartre 38202f5224aeSachartre request->status = vd_scsi_reset(vd); 38212f5224aeSachartre 38222f5224aeSachartre return (0); 38232f5224aeSachartre } 38242f5224aeSachartre 38252f5224aeSachartre static int 38262f5224aeSachartre vd_get_capacity(vd_task_t *task) 38272f5224aeSachartre { 38282f5224aeSachartre int rv; 38292f5224aeSachartre size_t nbytes; 38302f5224aeSachartre vd_t *vd = task->vd; 38312f5224aeSachartre vd_dring_payload_t *request = task->request; 38322f5224aeSachartre vd_capacity_t vd_cap = { 0 }; 38332f5224aeSachartre 38342f5224aeSachartre ASSERT(request->operation == VD_OP_GET_CAPACITY); 38352f5224aeSachartre 38362f5224aeSachartre PR0("Performing VD_OP_GET_CAPACITY"); 38372f5224aeSachartre 38382f5224aeSachartre nbytes = request->nbytes; 38392f5224aeSachartre 38402f5224aeSachartre if (nbytes != RNDSIZE(vd_capacity_t)) { 38412f5224aeSachartre PR0("VD_OP_GET_CAPACITY: Expected nbytes = %lu, got %lu", 38422f5224aeSachartre RNDSIZE(vd_capacity_t), nbytes); 38432f5224aeSachartre return (EINVAL); 38442f5224aeSachartre } 38452f5224aeSachartre 3846de3a5331SRamesh Chitrothu /* 3847de3a5331SRamesh Chitrothu * Check the backend size in case it has changed. If the check fails 3848de3a5331SRamesh Chitrothu * then we will return the last known size. 3849de3a5331SRamesh Chitrothu */ 38502f5224aeSachartre 3851de3a5331SRamesh Chitrothu (void) vd_backend_check_size(vd); 38522f5224aeSachartre ASSERT(vd->vdisk_size != 0); 38532f5224aeSachartre 38542f5224aeSachartre request->status = 0; 38552f5224aeSachartre 3856*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd_cap.vdisk_block_size = vd->vdisk_bsize; 38572f5224aeSachartre vd_cap.vdisk_size = vd->vdisk_size; 38582f5224aeSachartre 38592f5224aeSachartre if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&vd_cap, 0, &nbytes, 38602f5224aeSachartre request->cookie, request->ncookies, LDC_COPY_OUT)) != 0) { 38612f5224aeSachartre PR0("ldc_mem_copy() returned errno %d copying to client", rv); 38622f5224aeSachartre return (rv); 38632f5224aeSachartre } 38642f5224aeSachartre 38652f5224aeSachartre return (0); 38662f5224aeSachartre } 38672f5224aeSachartre 38682f5224aeSachartre static int 38692f5224aeSachartre vd_get_access(vd_task_t *task) 38702f5224aeSachartre { 38712f5224aeSachartre uint64_t access; 38722f5224aeSachartre int rv, rval = 0; 38732f5224aeSachartre size_t nbytes; 38742f5224aeSachartre vd_t *vd = task->vd; 38752f5224aeSachartre vd_dring_payload_t *request = task->request; 38762f5224aeSachartre 38772f5224aeSachartre ASSERT(request->operation == VD_OP_GET_ACCESS); 38782f5224aeSachartre ASSERT(vd->scsi); 38792f5224aeSachartre 38802f5224aeSachartre PR0("Performing VD_OP_GET_ACCESS"); 38812f5224aeSachartre 38822f5224aeSachartre nbytes = request->nbytes; 38832f5224aeSachartre 38842f5224aeSachartre if (nbytes != sizeof (uint64_t)) { 38852f5224aeSachartre PR0("VD_OP_GET_ACCESS: Expected nbytes = %lu, got %lu", 38862f5224aeSachartre sizeof (uint64_t), nbytes); 38872f5224aeSachartre return (EINVAL); 38882f5224aeSachartre } 38892f5224aeSachartre 38902f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], MHIOCSTATUS, 38912f5224aeSachartre NULL, (vd->open_flags | FKIOCTL), kcred, &rval); 38922f5224aeSachartre 38932f5224aeSachartre if (request->status != 0) 38942f5224aeSachartre return (0); 38952f5224aeSachartre 38962f5224aeSachartre access = (rval == 0)? VD_ACCESS_ALLOWED : VD_ACCESS_DENIED; 38972f5224aeSachartre 38982f5224aeSachartre if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&access, 0, &nbytes, 38992f5224aeSachartre request->cookie, request->ncookies, LDC_COPY_OUT)) != 0) { 39002f5224aeSachartre PR0("ldc_mem_copy() returned errno %d copying to client", rv); 39012f5224aeSachartre return (rv); 39022f5224aeSachartre } 39032f5224aeSachartre 39042f5224aeSachartre return (0); 39052f5224aeSachartre } 39062f5224aeSachartre 39072f5224aeSachartre static int 39082f5224aeSachartre vd_set_access(vd_task_t *task) 39092f5224aeSachartre { 39102f5224aeSachartre uint64_t flags; 39112f5224aeSachartre int rv, rval; 39122f5224aeSachartre size_t nbytes; 39132f5224aeSachartre vd_t *vd = task->vd; 39142f5224aeSachartre vd_dring_payload_t *request = task->request; 39152f5224aeSachartre 39162f5224aeSachartre ASSERT(request->operation == VD_OP_SET_ACCESS); 39172f5224aeSachartre ASSERT(vd->scsi); 39182f5224aeSachartre 39192f5224aeSachartre nbytes = request->nbytes; 39202f5224aeSachartre 39212f5224aeSachartre if (nbytes != sizeof (uint64_t)) { 39222f5224aeSachartre PR0("VD_OP_SET_ACCESS: Expected nbytes = %lu, got %lu", 39232f5224aeSachartre sizeof (uint64_t), nbytes); 39242f5224aeSachartre return (EINVAL); 39252f5224aeSachartre } 39262f5224aeSachartre 39272f5224aeSachartre if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&flags, 0, &nbytes, 39282f5224aeSachartre request->cookie, request->ncookies, LDC_COPY_IN)) != 0) { 39292f5224aeSachartre PR0("ldc_mem_copy() returned errno %d copying from client", rv); 39302f5224aeSachartre return (rv); 39312f5224aeSachartre } 39322f5224aeSachartre 39332f5224aeSachartre if (flags == VD_ACCESS_SET_CLEAR) { 39342f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (CLEAR)"); 39352f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 39362f5224aeSachartre MHIOCRELEASE, NULL, (vd->open_flags | FKIOCTL), kcred, 39372f5224aeSachartre &rval); 39382f5224aeSachartre if (request->status == 0) 39392f5224aeSachartre vd->ownership = B_FALSE; 39402f5224aeSachartre return (0); 39412f5224aeSachartre } 39422f5224aeSachartre 39432f5224aeSachartre /* 39442f5224aeSachartre * As per the VIO spec, the PREEMPT and PRESERVE flags are only valid 39452f5224aeSachartre * when the EXCLUSIVE flag is set. 39462f5224aeSachartre */ 39472f5224aeSachartre if (!(flags & VD_ACCESS_SET_EXCLUSIVE)) { 39482f5224aeSachartre PR0("Invalid VD_OP_SET_ACCESS flags: 0x%lx", flags); 39492f5224aeSachartre request->status = EINVAL; 39502f5224aeSachartre return (0); 39512f5224aeSachartre } 39522f5224aeSachartre 39532f5224aeSachartre switch (flags & (VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE)) { 39542f5224aeSachartre 39552f5224aeSachartre case VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE: 39562f5224aeSachartre /* 39572f5224aeSachartre * Flags EXCLUSIVE and PREEMPT and PRESERVE. We have to 39582f5224aeSachartre * acquire exclusive access rights, preserve them and we 39592f5224aeSachartre * can use preemption. So we can use the MHIOCTKNOWN ioctl. 39602f5224aeSachartre */ 39612f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PREEMPT|PRESERVE)"); 39622f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 39632f5224aeSachartre MHIOCTKOWN, NULL, (vd->open_flags | FKIOCTL), kcred, &rval); 39642f5224aeSachartre break; 39652f5224aeSachartre 39662f5224aeSachartre case VD_ACCESS_SET_PRESERVE: 39672f5224aeSachartre /* 39682f5224aeSachartre * Flags EXCLUSIVE and PRESERVE. We have to acquire exclusive 39692f5224aeSachartre * access rights and preserve them, but not preempt any other 39702f5224aeSachartre * host. So we need to use the MHIOCTKOWN ioctl to enable the 39712f5224aeSachartre * "preserve" feature but we can not called it directly 39722f5224aeSachartre * because it uses preemption. So before that, we use the 39732f5224aeSachartre * MHIOCQRESERVE ioctl to ensure we can get exclusive rights 39742f5224aeSachartre * without preempting anyone. 39752f5224aeSachartre */ 39762f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PRESERVE)"); 39772f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 39782f5224aeSachartre MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, 39792f5224aeSachartre &rval); 39802f5224aeSachartre if (request->status != 0) 39812f5224aeSachartre break; 39822f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 39832f5224aeSachartre MHIOCTKOWN, NULL, (vd->open_flags | FKIOCTL), kcred, &rval); 39842f5224aeSachartre break; 39852f5224aeSachartre 39862f5224aeSachartre case VD_ACCESS_SET_PREEMPT: 39872f5224aeSachartre /* 39882f5224aeSachartre * Flags EXCLUSIVE and PREEMPT. We have to acquire exclusive 39892f5224aeSachartre * access rights and we can use preemption. So we try to do 39902f5224aeSachartre * a SCSI reservation, if it fails we reset the disk to clear 39912f5224aeSachartre * any reservation and we try to reserve again. 39922f5224aeSachartre */ 39932f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PREEMPT)"); 39942f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 39952f5224aeSachartre MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, 39962f5224aeSachartre &rval); 39972f5224aeSachartre if (request->status == 0) 39982f5224aeSachartre break; 39992f5224aeSachartre 40002f5224aeSachartre /* reset the disk */ 40012f5224aeSachartre (void) vd_scsi_reset(vd); 40022f5224aeSachartre 40032f5224aeSachartre /* try again even if the reset has failed */ 40042f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 40052f5224aeSachartre MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, 40062f5224aeSachartre &rval); 40072f5224aeSachartre break; 40082f5224aeSachartre 40092f5224aeSachartre case 0: 40102f5224aeSachartre /* Flag EXCLUSIVE only. Just issue a SCSI reservation */ 40112f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE)"); 40122f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 40132f5224aeSachartre MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, 40142f5224aeSachartre &rval); 40152f5224aeSachartre break; 40162f5224aeSachartre } 40172f5224aeSachartre 40182f5224aeSachartre if (request->status == 0) 40192f5224aeSachartre vd->ownership = B_TRUE; 40202f5224aeSachartre else 40212f5224aeSachartre PR0("VD_OP_SET_ACCESS: error %d", request->status); 40222f5224aeSachartre 40232f5224aeSachartre return (0); 40242f5224aeSachartre } 40252f5224aeSachartre 40262f5224aeSachartre static void 40272f5224aeSachartre vd_reset_access(vd_t *vd) 40282f5224aeSachartre { 40292f5224aeSachartre int status, rval; 40302f5224aeSachartre 40311aff8f07SAlexandre Chartre if (vd->file || vd->volume || !vd->ownership) 40322f5224aeSachartre return; 40332f5224aeSachartre 40342f5224aeSachartre PR0("Releasing disk ownership"); 40352f5224aeSachartre status = ldi_ioctl(vd->ldi_handle[0], MHIOCRELEASE, NULL, 40362f5224aeSachartre (vd->open_flags | FKIOCTL), kcred, &rval); 40372f5224aeSachartre 40382f5224aeSachartre /* 40392f5224aeSachartre * An EACCES failure means that there is a reservation conflict, 40402f5224aeSachartre * so we are not the owner of the disk anymore. 40412f5224aeSachartre */ 40422f5224aeSachartre if (status == 0 || status == EACCES) { 40432f5224aeSachartre vd->ownership = B_FALSE; 40442f5224aeSachartre return; 40452f5224aeSachartre } 40462f5224aeSachartre 40472f5224aeSachartre PR0("Fail to release ownership, error %d", status); 40482f5224aeSachartre 40492f5224aeSachartre /* 40502f5224aeSachartre * We have failed to release the ownership, try to reset the disk 40512f5224aeSachartre * to release reservations. 40522f5224aeSachartre */ 40532f5224aeSachartre PR0("Resetting disk"); 40542f5224aeSachartre status = vd_scsi_reset(vd); 40552f5224aeSachartre 40562f5224aeSachartre if (status != 0) 40572f5224aeSachartre PR0("Fail to reset disk, error %d", status); 40582f5224aeSachartre 40592f5224aeSachartre /* whatever the result of the reset is, we try the release again */ 40602f5224aeSachartre status = ldi_ioctl(vd->ldi_handle[0], MHIOCRELEASE, NULL, 40612f5224aeSachartre (vd->open_flags | FKIOCTL), kcred, &rval); 40622f5224aeSachartre 40632f5224aeSachartre if (status == 0 || status == EACCES) { 40642f5224aeSachartre vd->ownership = B_FALSE; 40652f5224aeSachartre return; 40662f5224aeSachartre } 40672f5224aeSachartre 40682f5224aeSachartre PR0("Fail to release ownership, error %d", status); 40692f5224aeSachartre 40702f5224aeSachartre /* 40712f5224aeSachartre * At this point we have done our best to try to reset the 40722f5224aeSachartre * access rights to the disk and we don't know if we still 40732f5224aeSachartre * own a reservation and if any mechanism to preserve the 40742f5224aeSachartre * ownership is still in place. The ultimate solution would 40752f5224aeSachartre * be to reset the system but this is usually not what we 40762f5224aeSachartre * want to happen. 40772f5224aeSachartre */ 40782f5224aeSachartre 40792f5224aeSachartre if (vd_reset_access_failure == A_REBOOT) { 40802f5224aeSachartre cmn_err(CE_WARN, VD_RESET_ACCESS_FAILURE_MSG 40812f5224aeSachartre ", rebooting the system", vd->device_path); 40822f5224aeSachartre (void) uadmin(A_SHUTDOWN, AD_BOOT, NULL); 40832f5224aeSachartre } else if (vd_reset_access_failure == A_DUMP) { 40842f5224aeSachartre panic(VD_RESET_ACCESS_FAILURE_MSG, vd->device_path); 40852f5224aeSachartre } 40862f5224aeSachartre 40872f5224aeSachartre cmn_err(CE_WARN, VD_RESET_ACCESS_FAILURE_MSG, vd->device_path); 40882f5224aeSachartre } 40892f5224aeSachartre 40901ae08745Sheppo /* 40911ae08745Sheppo * Define the supported operations once the functions for performing them have 40921ae08745Sheppo * been defined 40931ae08745Sheppo */ 40941ae08745Sheppo static const vds_operation_t vds_operation[] = { 40953af08d82Slm66018 #define X(_s) #_s, _s 40963af08d82Slm66018 {X(VD_OP_BREAD), vd_start_bio, vd_complete_bio}, 40973af08d82Slm66018 {X(VD_OP_BWRITE), vd_start_bio, vd_complete_bio}, 40983af08d82Slm66018 {X(VD_OP_FLUSH), vd_ioctl, NULL}, 40993af08d82Slm66018 {X(VD_OP_GET_WCE), vd_ioctl, NULL}, 41003af08d82Slm66018 {X(VD_OP_SET_WCE), vd_ioctl, NULL}, 41013af08d82Slm66018 {X(VD_OP_GET_VTOC), vd_ioctl, NULL}, 41023af08d82Slm66018 {X(VD_OP_SET_VTOC), vd_ioctl, NULL}, 41033af08d82Slm66018 {X(VD_OP_GET_DISKGEOM), vd_ioctl, NULL}, 41043af08d82Slm66018 {X(VD_OP_SET_DISKGEOM), vd_ioctl, NULL}, 41053af08d82Slm66018 {X(VD_OP_GET_EFI), vd_ioctl, NULL}, 41063af08d82Slm66018 {X(VD_OP_SET_EFI), vd_ioctl, NULL}, 41073af08d82Slm66018 {X(VD_OP_GET_DEVID), vd_get_devid, NULL}, 41082f5224aeSachartre {X(VD_OP_SCSICMD), vd_ioctl, NULL}, 41092f5224aeSachartre {X(VD_OP_RESET), vd_reset, NULL}, 41102f5224aeSachartre {X(VD_OP_GET_CAPACITY), vd_get_capacity, NULL}, 41112f5224aeSachartre {X(VD_OP_SET_ACCESS), vd_set_access, NULL}, 41122f5224aeSachartre {X(VD_OP_GET_ACCESS), vd_get_access, NULL}, 41133af08d82Slm66018 #undef X 41141ae08745Sheppo }; 41151ae08745Sheppo 41161ae08745Sheppo static const size_t vds_noperations = 41171ae08745Sheppo (sizeof (vds_operation))/(sizeof (vds_operation[0])); 41181ae08745Sheppo 41191ae08745Sheppo /* 4120d10e4ef2Snarayan * Process a task specifying a client I/O request 4121205eeb1aSlm66018 * 4122205eeb1aSlm66018 * Parameters: 4123205eeb1aSlm66018 * task - structure containing the request sent from client 4124205eeb1aSlm66018 * 4125205eeb1aSlm66018 * Return Value 4126205eeb1aSlm66018 * 0 - success 4127205eeb1aSlm66018 * ENOTSUP - Unknown/Unsupported VD_OP_XXX operation 4128205eeb1aSlm66018 * EINVAL - Invalid disk slice 4129205eeb1aSlm66018 * != 0 - some other non-zero return value from start function 41301ae08745Sheppo */ 41311ae08745Sheppo static int 4132205eeb1aSlm66018 vd_do_process_task(vd_task_t *task) 41331ae08745Sheppo { 4134205eeb1aSlm66018 int i; 4135d10e4ef2Snarayan vd_t *vd = task->vd; 4136d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 41371ae08745Sheppo 4138d10e4ef2Snarayan ASSERT(vd != NULL); 4139d10e4ef2Snarayan ASSERT(request != NULL); 41401ae08745Sheppo 4141d10e4ef2Snarayan /* Find the requested operation */ 4142205eeb1aSlm66018 for (i = 0; i < vds_noperations; i++) { 4143205eeb1aSlm66018 if (request->operation == vds_operation[i].operation) { 4144205eeb1aSlm66018 /* all operations should have a start func */ 4145205eeb1aSlm66018 ASSERT(vds_operation[i].start != NULL); 4146205eeb1aSlm66018 4147205eeb1aSlm66018 task->completef = vds_operation[i].complete; 4148d10e4ef2Snarayan break; 4149205eeb1aSlm66018 } 4150205eeb1aSlm66018 } 415117cadca8Slm66018 415217cadca8Slm66018 /* 415317cadca8Slm66018 * We need to check that the requested operation is permitted 415417cadca8Slm66018 * for the particular client that sent it or that the loop above 415517cadca8Slm66018 * did not complete without finding the operation type (indicating 415617cadca8Slm66018 * that the requested operation is unknown/unimplemented) 415717cadca8Slm66018 */ 415817cadca8Slm66018 if ((VD_OP_SUPPORTED(vd->operations, request->operation) == B_FALSE) || 415917cadca8Slm66018 (i == vds_noperations)) { 41603af08d82Slm66018 PR0("Unsupported operation %u", request->operation); 416117cadca8Slm66018 request->status = ENOTSUP; 416217cadca8Slm66018 return (0); 41631ae08745Sheppo } 41641ae08745Sheppo 41657636cb21Slm66018 /* Range-check slice */ 416687a7269eSachartre if (request->slice >= vd->nslices && 4167bae9e67eSachartre ((vd->vdisk_type != VD_DISK_TYPE_DISK && vd_slice_single_slice) || 416887a7269eSachartre request->slice != VD_SLICE_NONE)) { 41693af08d82Slm66018 PR0("Invalid \"slice\" %u (max %u) for virtual disk", 41707636cb21Slm66018 request->slice, (vd->nslices - 1)); 4171bae9e67eSachartre request->status = EINVAL; 4172bae9e67eSachartre return (0); 41737636cb21Slm66018 } 41747636cb21Slm66018 4175205eeb1aSlm66018 /* 4176205eeb1aSlm66018 * Call the function pointer that starts the operation. 4177205eeb1aSlm66018 */ 4178205eeb1aSlm66018 return (vds_operation[i].start(task)); 41791ae08745Sheppo } 41801ae08745Sheppo 4181205eeb1aSlm66018 /* 4182205eeb1aSlm66018 * Description: 4183205eeb1aSlm66018 * This function is called by both the in-band and descriptor ring 4184205eeb1aSlm66018 * message processing functions paths to actually execute the task 4185205eeb1aSlm66018 * requested by the vDisk client. It in turn calls its worker 4186205eeb1aSlm66018 * function, vd_do_process_task(), to carry our the request. 4187205eeb1aSlm66018 * 4188205eeb1aSlm66018 * Any transport errors (e.g. LDC errors, vDisk protocol errors) are 4189205eeb1aSlm66018 * saved in the 'status' field of the task and are propagated back 4190205eeb1aSlm66018 * up the call stack to trigger a NACK 4191205eeb1aSlm66018 * 4192205eeb1aSlm66018 * Any request errors (e.g. ENOTTY from an ioctl) are saved in 4193205eeb1aSlm66018 * the 'status' field of the request and result in an ACK being sent 4194205eeb1aSlm66018 * by the completion handler. 4195205eeb1aSlm66018 * 4196205eeb1aSlm66018 * Parameters: 4197205eeb1aSlm66018 * task - structure containing the request sent from client 4198205eeb1aSlm66018 * 4199205eeb1aSlm66018 * Return Value 4200205eeb1aSlm66018 * 0 - successful synchronous request. 4201205eeb1aSlm66018 * != 0 - transport error (e.g. LDC errors, vDisk protocol) 4202205eeb1aSlm66018 * EINPROGRESS - task will be finished in a completion handler 4203205eeb1aSlm66018 */ 4204205eeb1aSlm66018 static int 4205205eeb1aSlm66018 vd_process_task(vd_task_t *task) 4206205eeb1aSlm66018 { 4207205eeb1aSlm66018 vd_t *vd = task->vd; 4208205eeb1aSlm66018 int status; 42091ae08745Sheppo 4210205eeb1aSlm66018 DTRACE_PROBE1(task__start, vd_task_t *, task); 42113af08d82Slm66018 4212205eeb1aSlm66018 task->status = vd_do_process_task(task); 4213205eeb1aSlm66018 4214205eeb1aSlm66018 /* 4215205eeb1aSlm66018 * If the task processing function returned EINPROGRESS indicating 4216205eeb1aSlm66018 * that the task needs completing then schedule a taskq entry to 4217205eeb1aSlm66018 * finish it now. 4218205eeb1aSlm66018 * 4219205eeb1aSlm66018 * Otherwise the task processing function returned either zero 4220205eeb1aSlm66018 * indicating that the task was finished in the start function (and we 4221205eeb1aSlm66018 * don't need to wait in a completion function) or the start function 4222205eeb1aSlm66018 * returned an error - in both cases all that needs to happen is the 4223205eeb1aSlm66018 * notification to the vDisk client higher up the call stack. 4224205eeb1aSlm66018 * If the task was using a Descriptor Ring, we need to mark it as done 4225205eeb1aSlm66018 * at this stage. 4226205eeb1aSlm66018 */ 4227205eeb1aSlm66018 if (task->status == EINPROGRESS) { 4228d10e4ef2Snarayan /* Queue a task to complete the operation */ 4229205eeb1aSlm66018 (void) ddi_taskq_dispatch(vd->completionq, vd_complete, 4230d10e4ef2Snarayan task, DDI_SLEEP); 423183990c4aSAlexandre Chartre return (EINPROGRESS); 423283990c4aSAlexandre Chartre } 4233d10e4ef2Snarayan 423483990c4aSAlexandre Chartre if (!vd->reset_state && (vd->xfer_mode == VIO_DRING_MODE_V1_0)) { 4235205eeb1aSlm66018 /* Update the dring element if it's a dring client */ 4236205eeb1aSlm66018 status = vd_mark_elem_done(vd, task->index, 4237205eeb1aSlm66018 task->request->status, task->request->nbytes); 4238205eeb1aSlm66018 if (status == ECONNRESET) 4239205eeb1aSlm66018 vd_mark_in_reset(vd); 4240bbfa0259Sha137994 else if (status == EACCES) 4241bbfa0259Sha137994 vd_need_reset(vd, B_TRUE); 4242205eeb1aSlm66018 } 4243205eeb1aSlm66018 4244205eeb1aSlm66018 return (task->status); 42451ae08745Sheppo } 42461ae08745Sheppo 42471ae08745Sheppo /* 42480a55fbb7Slm66018 * Return true if the "type", "subtype", and "env" fields of the "tag" first 42490a55fbb7Slm66018 * argument match the corresponding remaining arguments; otherwise, return false 42501ae08745Sheppo */ 42510a55fbb7Slm66018 boolean_t 42521ae08745Sheppo vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 42531ae08745Sheppo { 42541ae08745Sheppo return ((tag->vio_msgtype == type) && 42551ae08745Sheppo (tag->vio_subtype == subtype) && 42560a55fbb7Slm66018 (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; 42571ae08745Sheppo } 42581ae08745Sheppo 42590a55fbb7Slm66018 /* 42600a55fbb7Slm66018 * Check whether the major/minor version specified in "ver_msg" is supported 42610a55fbb7Slm66018 * by this server. 42620a55fbb7Slm66018 */ 42630a55fbb7Slm66018 static boolean_t 42640a55fbb7Slm66018 vds_supported_version(vio_ver_msg_t *ver_msg) 42650a55fbb7Slm66018 { 42660a55fbb7Slm66018 for (int i = 0; i < vds_num_versions; i++) { 42670a55fbb7Slm66018 ASSERT(vds_version[i].major > 0); 42680a55fbb7Slm66018 ASSERT((i == 0) || 42690a55fbb7Slm66018 (vds_version[i].major < vds_version[i-1].major)); 42700a55fbb7Slm66018 42710a55fbb7Slm66018 /* 42720a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 42730a55fbb7Slm66018 * necessary, down to the highest value supported by this 42740a55fbb7Slm66018 * server and return true so this message will get "ack"ed; 42750a55fbb7Slm66018 * the client should also support all minor versions lower 42760a55fbb7Slm66018 * than the value it sent 42770a55fbb7Slm66018 */ 42780a55fbb7Slm66018 if (ver_msg->ver_major == vds_version[i].major) { 42790a55fbb7Slm66018 if (ver_msg->ver_minor > vds_version[i].minor) { 42800a55fbb7Slm66018 PR0("Adjusting minor version from %u to %u", 42810a55fbb7Slm66018 ver_msg->ver_minor, vds_version[i].minor); 42820a55fbb7Slm66018 ver_msg->ver_minor = vds_version[i].minor; 42830a55fbb7Slm66018 } 42840a55fbb7Slm66018 return (B_TRUE); 42850a55fbb7Slm66018 } 42860a55fbb7Slm66018 42870a55fbb7Slm66018 /* 42880a55fbb7Slm66018 * If the message contains a higher major version number, set 42890a55fbb7Slm66018 * the message's major/minor versions to the current values 42900a55fbb7Slm66018 * and return false, so this message will get "nack"ed with 42910a55fbb7Slm66018 * these values, and the client will potentially try again 42920a55fbb7Slm66018 * with the same or a lower version 42930a55fbb7Slm66018 */ 42940a55fbb7Slm66018 if (ver_msg->ver_major > vds_version[i].major) { 42950a55fbb7Slm66018 ver_msg->ver_major = vds_version[i].major; 42960a55fbb7Slm66018 ver_msg->ver_minor = vds_version[i].minor; 42970a55fbb7Slm66018 return (B_FALSE); 42980a55fbb7Slm66018 } 42990a55fbb7Slm66018 43000a55fbb7Slm66018 /* 43010a55fbb7Slm66018 * Otherwise, the message's major version is less than the 43020a55fbb7Slm66018 * current major version, so continue the loop to the next 43030a55fbb7Slm66018 * (lower) supported version 43040a55fbb7Slm66018 */ 43050a55fbb7Slm66018 } 43060a55fbb7Slm66018 43070a55fbb7Slm66018 /* 43080a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 43090a55fbb7Slm66018 * message to terminate negotiation 43100a55fbb7Slm66018 */ 43110a55fbb7Slm66018 ver_msg->ver_major = 0; 43120a55fbb7Slm66018 ver_msg->ver_minor = 0; 43130a55fbb7Slm66018 return (B_FALSE); 43140a55fbb7Slm66018 } 43150a55fbb7Slm66018 43160a55fbb7Slm66018 /* 43170a55fbb7Slm66018 * Process a version message from a client. vds expects to receive version 43180a55fbb7Slm66018 * messages from clients seeking service, but never issues version messages 43190a55fbb7Slm66018 * itself; therefore, vds can ACK or NACK client version messages, but does 43200a55fbb7Slm66018 * not expect to receive version-message ACKs or NACKs (and will treat such 43210a55fbb7Slm66018 * messages as invalid). 43220a55fbb7Slm66018 */ 43231ae08745Sheppo static int 43240a55fbb7Slm66018 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 43251ae08745Sheppo { 43261ae08745Sheppo vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 43271ae08745Sheppo 43281ae08745Sheppo 43291ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 43301ae08745Sheppo 43311ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 43321ae08745Sheppo VIO_VER_INFO)) { 43331ae08745Sheppo return (ENOMSG); /* not a version message */ 43341ae08745Sheppo } 43351ae08745Sheppo 43361ae08745Sheppo if (msglen != sizeof (*ver_msg)) { 43373af08d82Slm66018 PR0("Expected %lu-byte version message; " 43381ae08745Sheppo "received %lu bytes", sizeof (*ver_msg), msglen); 43391ae08745Sheppo return (EBADMSG); 43401ae08745Sheppo } 43411ae08745Sheppo 43421ae08745Sheppo if (ver_msg->dev_class != VDEV_DISK) { 43433af08d82Slm66018 PR0("Expected device class %u (disk); received %u", 43441ae08745Sheppo VDEV_DISK, ver_msg->dev_class); 43451ae08745Sheppo return (EBADMSG); 43461ae08745Sheppo } 43471ae08745Sheppo 43480a55fbb7Slm66018 /* 43490a55fbb7Slm66018 * We're talking to the expected kind of client; set our device class 43500a55fbb7Slm66018 * for "ack/nack" back to the client 43510a55fbb7Slm66018 */ 43521ae08745Sheppo ver_msg->dev_class = VDEV_DISK_SERVER; 43530a55fbb7Slm66018 43540a55fbb7Slm66018 /* 43550a55fbb7Slm66018 * Check whether the (valid) version message specifies a version 43560a55fbb7Slm66018 * supported by this server. If the version is not supported, return 43570a55fbb7Slm66018 * EBADMSG so the message will get "nack"ed; vds_supported_version() 43580a55fbb7Slm66018 * will have updated the message with a supported version for the 43590a55fbb7Slm66018 * client to consider 43600a55fbb7Slm66018 */ 43610a55fbb7Slm66018 if (!vds_supported_version(ver_msg)) 43620a55fbb7Slm66018 return (EBADMSG); 43630a55fbb7Slm66018 43640a55fbb7Slm66018 43650a55fbb7Slm66018 /* 43660a55fbb7Slm66018 * A version has been agreed upon; use the client's SID for 43670a55fbb7Slm66018 * communication on this channel now 43680a55fbb7Slm66018 */ 43690a55fbb7Slm66018 ASSERT(!(vd->initialized & VD_SID)); 43700a55fbb7Slm66018 vd->sid = ver_msg->tag.vio_sid; 43710a55fbb7Slm66018 vd->initialized |= VD_SID; 43720a55fbb7Slm66018 43730a55fbb7Slm66018 /* 437417cadca8Slm66018 * Store the negotiated major and minor version values in the "vd" data 437517cadca8Slm66018 * structure so that we can check if certain operations are supported 437617cadca8Slm66018 * by the client. 43770a55fbb7Slm66018 */ 437817cadca8Slm66018 vd->version.major = ver_msg->ver_major; 437917cadca8Slm66018 vd->version.minor = ver_msg->ver_minor; 43800a55fbb7Slm66018 43810a55fbb7Slm66018 PR0("Using major version %u, minor version %u", 43820a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 43831ae08745Sheppo return (0); 43841ae08745Sheppo } 43851ae08745Sheppo 438617cadca8Slm66018 static void 438717cadca8Slm66018 vd_set_exported_operations(vd_t *vd) 438817cadca8Slm66018 { 438917cadca8Slm66018 vd->operations = 0; /* clear field */ 439017cadca8Slm66018 439117cadca8Slm66018 /* 439217cadca8Slm66018 * We need to check from the highest version supported to the 439317cadca8Slm66018 * lowest because versions with a higher minor number implicitly 439417cadca8Slm66018 * support versions with a lower minor number. 439517cadca8Slm66018 */ 439617cadca8Slm66018 if (vio_ver_is_supported(vd->version, 1, 1)) { 439717cadca8Slm66018 ASSERT(vd->open_flags & FREAD); 4398de3a5331SRamesh Chitrothu vd->operations |= VD_OP_MASK_READ | (1 << VD_OP_GET_CAPACITY); 439917cadca8Slm66018 440017cadca8Slm66018 if (vd->open_flags & FWRITE) 440117cadca8Slm66018 vd->operations |= VD_OP_MASK_WRITE; 440217cadca8Slm66018 44032f5224aeSachartre if (vd->scsi) 44042f5224aeSachartre vd->operations |= VD_OP_MASK_SCSI; 44052f5224aeSachartre 44061aff8f07SAlexandre Chartre if (VD_DSKIMG(vd) && vd_dskimg_is_iso_image(vd)) { 440717cadca8Slm66018 /* 440817cadca8Slm66018 * can't write to ISO images, make sure that write 440917cadca8Slm66018 * support is not set in case administrator did not 441017cadca8Slm66018 * use "options=ro" when doing an ldm add-vdsdev 441117cadca8Slm66018 */ 441217cadca8Slm66018 vd->operations &= ~VD_OP_MASK_WRITE; 441317cadca8Slm66018 } 441417cadca8Slm66018 } else if (vio_ver_is_supported(vd->version, 1, 0)) { 441517cadca8Slm66018 vd->operations = VD_OP_MASK_READ | VD_OP_MASK_WRITE; 441617cadca8Slm66018 } 441717cadca8Slm66018 441817cadca8Slm66018 /* we should have already agreed on a version */ 441917cadca8Slm66018 ASSERT(vd->operations != 0); 442017cadca8Slm66018 } 442117cadca8Slm66018 44221ae08745Sheppo static int 44231ae08745Sheppo vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 44241ae08745Sheppo { 44251ae08745Sheppo vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 44263c96341aSnarayan int status, retry = 0; 44271ae08745Sheppo 44281ae08745Sheppo 44291ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 44301ae08745Sheppo 44311ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 44321ae08745Sheppo VIO_ATTR_INFO)) { 4433d10e4ef2Snarayan PR0("Message is not an attribute message"); 4434d10e4ef2Snarayan return (ENOMSG); 44351ae08745Sheppo } 44361ae08745Sheppo 44371ae08745Sheppo if (msglen != sizeof (*attr_msg)) { 44383af08d82Slm66018 PR0("Expected %lu-byte attribute message; " 44391ae08745Sheppo "received %lu bytes", sizeof (*attr_msg), msglen); 44401ae08745Sheppo return (EBADMSG); 44411ae08745Sheppo } 44421ae08745Sheppo 44431ae08745Sheppo if (attr_msg->max_xfer_sz == 0) { 44443af08d82Slm66018 PR0("Received maximum transfer size of 0 from client"); 44451ae08745Sheppo return (EBADMSG); 44461ae08745Sheppo } 44471ae08745Sheppo 44481ae08745Sheppo if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 4449f0ca1d9aSsb155480 (attr_msg->xfer_mode != VIO_DRING_MODE_V1_0)) { 44503af08d82Slm66018 PR0("Client requested unsupported transfer mode"); 44511ae08745Sheppo return (EBADMSG); 44521ae08745Sheppo } 44531ae08745Sheppo 44543c96341aSnarayan /* 44553c96341aSnarayan * check if the underlying disk is ready, if not try accessing 44563c96341aSnarayan * the device again. Open the vdisk device and extract info 44573c96341aSnarayan * about it, as this is needed to respond to the attr info msg 44583c96341aSnarayan */ 44593c96341aSnarayan if ((vd->initialized & VD_DISK_READY) == 0) { 44603c96341aSnarayan PR0("Retry setting up disk (%s)", vd->device_path); 44613c96341aSnarayan do { 44623c96341aSnarayan status = vd_setup_vd(vd); 44633c96341aSnarayan if (status != EAGAIN || ++retry > vds_dev_retries) 44643c96341aSnarayan break; 44653c96341aSnarayan 44663c96341aSnarayan /* incremental delay */ 44673c96341aSnarayan delay(drv_usectohz(vds_dev_delay)); 44683c96341aSnarayan 44693c96341aSnarayan /* if vdisk is no longer enabled - return error */ 44703c96341aSnarayan if (!vd_enabled(vd)) 44713c96341aSnarayan return (ENXIO); 44723c96341aSnarayan 44733c96341aSnarayan } while (status == EAGAIN); 44743c96341aSnarayan 44753c96341aSnarayan if (status) 44763c96341aSnarayan return (ENXIO); 44773c96341aSnarayan 44783c96341aSnarayan vd->initialized |= VD_DISK_READY; 44793c96341aSnarayan ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 44808fce2fd6Sachartre PR0("vdisk_type = %s, volume = %s, file = %s, nslices = %u", 44813c96341aSnarayan ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 44828fce2fd6Sachartre (vd->volume ? "yes" : "no"), 44833c96341aSnarayan (vd->file ? "yes" : "no"), 44843c96341aSnarayan vd->nslices); 44853c96341aSnarayan } 44863c96341aSnarayan 44871ae08745Sheppo /* Success: valid message and transfer mode */ 44881ae08745Sheppo vd->xfer_mode = attr_msg->xfer_mode; 44893af08d82Slm66018 44901ae08745Sheppo if (vd->xfer_mode == VIO_DESC_MODE) { 44913af08d82Slm66018 44921ae08745Sheppo /* 44931ae08745Sheppo * The vd_dring_inband_msg_t contains one cookie; need room 44941ae08745Sheppo * for up to n-1 more cookies, where "n" is the number of full 44951ae08745Sheppo * pages plus possibly one partial page required to cover 44961ae08745Sheppo * "max_xfer_sz". Add room for one more cookie if 44971ae08745Sheppo * "max_xfer_sz" isn't an integral multiple of the page size. 44981ae08745Sheppo * Must first get the maximum transfer size in bytes. 44991ae08745Sheppo */ 45001ae08745Sheppo size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 45011ae08745Sheppo attr_msg->vdisk_block_size * attr_msg->max_xfer_sz : 45021ae08745Sheppo attr_msg->max_xfer_sz; 45031ae08745Sheppo size_t max_inband_msglen = 45041ae08745Sheppo sizeof (vd_dring_inband_msg_t) + 45051ae08745Sheppo ((max_xfer_bytes/PAGESIZE + 45061ae08745Sheppo ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 45071ae08745Sheppo (sizeof (ldc_mem_cookie_t))); 45081ae08745Sheppo 45091ae08745Sheppo /* 45101ae08745Sheppo * Set the maximum expected message length to 45111ae08745Sheppo * accommodate in-band-descriptor messages with all 45121ae08745Sheppo * their cookies 45131ae08745Sheppo */ 45141ae08745Sheppo vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 4515d10e4ef2Snarayan 4516d10e4ef2Snarayan /* 4517d10e4ef2Snarayan * Initialize the data structure for processing in-band I/O 4518d10e4ef2Snarayan * request descriptors 4519d10e4ef2Snarayan */ 4520d10e4ef2Snarayan vd->inband_task.vd = vd; 45213af08d82Slm66018 vd->inband_task.msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 4522d10e4ef2Snarayan vd->inband_task.index = 0; 4523d10e4ef2Snarayan vd->inband_task.type = VD_FINAL_RANGE_TASK; /* range == 1 */ 45241ae08745Sheppo } 45251ae08745Sheppo 4526e1ebb9ecSlm66018 /* Return the device's block size and max transfer size to the client */ 4527*65908c77Syu, larry liu - Sun Microsystems - Beijing China attr_msg->vdisk_block_size = vd->vdisk_bsize; 4528e1ebb9ecSlm66018 attr_msg->max_xfer_sz = vd->max_xfer_sz; 4529e1ebb9ecSlm66018 45301ae08745Sheppo attr_msg->vdisk_size = vd->vdisk_size; 4531bae9e67eSachartre attr_msg->vdisk_type = (vd_slice_single_slice)? vd->vdisk_type : 4532bae9e67eSachartre VD_DISK_TYPE_DISK; 453317cadca8Slm66018 attr_msg->vdisk_media = vd->vdisk_media; 453417cadca8Slm66018 453517cadca8Slm66018 /* Discover and save the list of supported VD_OP_XXX operations */ 453617cadca8Slm66018 vd_set_exported_operations(vd); 453717cadca8Slm66018 attr_msg->operations = vd->operations; 453817cadca8Slm66018 45391ae08745Sheppo PR0("%s", VD_CLIENT(vd)); 45403af08d82Slm66018 45413af08d82Slm66018 ASSERT(vd->dring_task == NULL); 45423af08d82Slm66018 45431ae08745Sheppo return (0); 45441ae08745Sheppo } 45451ae08745Sheppo 45461ae08745Sheppo static int 45471ae08745Sheppo vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 45481ae08745Sheppo { 45491ae08745Sheppo int status; 45501ae08745Sheppo size_t expected; 45511ae08745Sheppo ldc_mem_info_t dring_minfo; 4552bbfa0259Sha137994 uint8_t mtype; 45531ae08745Sheppo vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 45541ae08745Sheppo 45551ae08745Sheppo 45561ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 45571ae08745Sheppo 45581ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 45591ae08745Sheppo VIO_DRING_REG)) { 4560d10e4ef2Snarayan PR0("Message is not a register-dring message"); 4561d10e4ef2Snarayan return (ENOMSG); 45621ae08745Sheppo } 45631ae08745Sheppo 45641ae08745Sheppo if (msglen < sizeof (*reg_msg)) { 45653af08d82Slm66018 PR0("Expected at least %lu-byte register-dring message; " 45661ae08745Sheppo "received %lu bytes", sizeof (*reg_msg), msglen); 45671ae08745Sheppo return (EBADMSG); 45681ae08745Sheppo } 45691ae08745Sheppo 45701ae08745Sheppo expected = sizeof (*reg_msg) + 45711ae08745Sheppo (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 45721ae08745Sheppo if (msglen != expected) { 45733af08d82Slm66018 PR0("Expected %lu-byte register-dring message; " 45741ae08745Sheppo "received %lu bytes", expected, msglen); 45751ae08745Sheppo return (EBADMSG); 45761ae08745Sheppo } 45771ae08745Sheppo 45781ae08745Sheppo if (vd->initialized & VD_DRING) { 45793af08d82Slm66018 PR0("A dring was previously registered; only support one"); 45801ae08745Sheppo return (EBADMSG); 45811ae08745Sheppo } 45821ae08745Sheppo 4583d10e4ef2Snarayan if (reg_msg->num_descriptors > INT32_MAX) { 45843af08d82Slm66018 PR0("reg_msg->num_descriptors = %u; must be <= %u (%s)", 4585d10e4ef2Snarayan reg_msg->ncookies, INT32_MAX, STRINGIZE(INT32_MAX)); 4586d10e4ef2Snarayan return (EBADMSG); 4587d10e4ef2Snarayan } 4588d10e4ef2Snarayan 45891ae08745Sheppo if (reg_msg->ncookies != 1) { 45901ae08745Sheppo /* 45911ae08745Sheppo * In addition to fixing the assertion in the success case 45921ae08745Sheppo * below, supporting drings which require more than one 45931ae08745Sheppo * "cookie" requires increasing the value of vd->max_msglen 45941ae08745Sheppo * somewhere in the code path prior to receiving the message 45951ae08745Sheppo * which results in calling this function. Note that without 45961ae08745Sheppo * making this change, the larger message size required to 45971ae08745Sheppo * accommodate multiple cookies cannot be successfully 45981ae08745Sheppo * received, so this function will not even get called. 45991ae08745Sheppo * Gracefully accommodating more dring cookies might 46001ae08745Sheppo * reasonably demand exchanging an additional attribute or 46011ae08745Sheppo * making a minor protocol adjustment 46021ae08745Sheppo */ 46033af08d82Slm66018 PR0("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 46041ae08745Sheppo return (EBADMSG); 46051ae08745Sheppo } 46061ae08745Sheppo 4607bbfa0259Sha137994 if (vd_direct_mapped_drings) 4608bbfa0259Sha137994 mtype = LDC_DIRECT_MAP; 4609bbfa0259Sha137994 else 4610bbfa0259Sha137994 mtype = LDC_SHADOW_MAP; 4611bbfa0259Sha137994 46121ae08745Sheppo status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 46131ae08745Sheppo reg_msg->ncookies, reg_msg->num_descriptors, 4614bbfa0259Sha137994 reg_msg->descriptor_size, mtype, &vd->dring_handle); 46151ae08745Sheppo if (status != 0) { 46163af08d82Slm66018 PR0("ldc_mem_dring_map() returned errno %d", status); 46171ae08745Sheppo return (status); 46181ae08745Sheppo } 46191ae08745Sheppo 46201ae08745Sheppo /* 46211ae08745Sheppo * To remove the need for this assertion, must call 46221ae08745Sheppo * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 46231ae08745Sheppo * successful call to ldc_mem_dring_map() 46241ae08745Sheppo */ 46251ae08745Sheppo ASSERT(reg_msg->ncookies == 1); 46261ae08745Sheppo 46271ae08745Sheppo if ((status = 46281ae08745Sheppo ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 46293af08d82Slm66018 PR0("ldc_mem_dring_info() returned errno %d", status); 46301ae08745Sheppo if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 46313af08d82Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 46321ae08745Sheppo return (status); 46331ae08745Sheppo } 46341ae08745Sheppo 46351ae08745Sheppo if (dring_minfo.vaddr == NULL) { 46363af08d82Slm66018 PR0("Descriptor ring virtual address is NULL"); 46370a55fbb7Slm66018 return (ENXIO); 46381ae08745Sheppo } 46391ae08745Sheppo 46401ae08745Sheppo 4641d10e4ef2Snarayan /* Initialize for valid message and mapped dring */ 46421ae08745Sheppo vd->initialized |= VD_DRING; 46431ae08745Sheppo vd->dring_ident = 1; /* "There Can Be Only One" */ 46441ae08745Sheppo vd->dring = dring_minfo.vaddr; 46451ae08745Sheppo vd->descriptor_size = reg_msg->descriptor_size; 46461ae08745Sheppo vd->dring_len = reg_msg->num_descriptors; 4647bbfa0259Sha137994 vd->dring_mtype = dring_minfo.mtype; 46481ae08745Sheppo reg_msg->dring_ident = vd->dring_ident; 46495b7cb889Sha137994 PR1("descriptor size = %u, dring length = %u", 46505b7cb889Sha137994 vd->descriptor_size, vd->dring_len); 4651d10e4ef2Snarayan 4652d10e4ef2Snarayan /* 4653d10e4ef2Snarayan * Allocate and initialize a "shadow" array of data structures for 4654d10e4ef2Snarayan * tasks to process I/O requests in dring elements 4655d10e4ef2Snarayan */ 4656d10e4ef2Snarayan vd->dring_task = 4657d10e4ef2Snarayan kmem_zalloc((sizeof (*vd->dring_task)) * vd->dring_len, KM_SLEEP); 4658d10e4ef2Snarayan for (int i = 0; i < vd->dring_len; i++) { 4659d10e4ef2Snarayan vd->dring_task[i].vd = vd; 4660d10e4ef2Snarayan vd->dring_task[i].index = i; 46614bac2208Snarayan 46624bac2208Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, 46634bac2208Snarayan &(vd->dring_task[i].mhdl)); 46644bac2208Snarayan if (status) { 46653af08d82Slm66018 PR0("ldc_mem_alloc_handle() returned err %d ", status); 46664bac2208Snarayan return (ENXIO); 46674bac2208Snarayan } 46683af08d82Slm66018 46695b7cb889Sha137994 /* 46705b7cb889Sha137994 * The descriptor payload varies in length. Calculate its 46715b7cb889Sha137994 * size by subtracting the header size from the total 46725b7cb889Sha137994 * descriptor size. 46735b7cb889Sha137994 */ 46745b7cb889Sha137994 vd->dring_task[i].request = kmem_zalloc((vd->descriptor_size - 46755b7cb889Sha137994 sizeof (vio_dring_entry_hdr_t)), KM_SLEEP); 46763af08d82Slm66018 vd->dring_task[i].msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 4677d10e4ef2Snarayan } 4678d10e4ef2Snarayan 467983990c4aSAlexandre Chartre if (vd->file || vd->zvol) { 468083990c4aSAlexandre Chartre vd->write_queue = 468183990c4aSAlexandre Chartre kmem_zalloc(sizeof (buf_t *) * vd->dring_len, KM_SLEEP); 468283990c4aSAlexandre Chartre } 468383990c4aSAlexandre Chartre 46841ae08745Sheppo return (0); 46851ae08745Sheppo } 46861ae08745Sheppo 46871ae08745Sheppo static int 46881ae08745Sheppo vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 46891ae08745Sheppo { 46901ae08745Sheppo vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 46911ae08745Sheppo 46921ae08745Sheppo 46931ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 46941ae08745Sheppo 46951ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 46961ae08745Sheppo VIO_DRING_UNREG)) { 4697d10e4ef2Snarayan PR0("Message is not an unregister-dring message"); 4698d10e4ef2Snarayan return (ENOMSG); 46991ae08745Sheppo } 47001ae08745Sheppo 47011ae08745Sheppo if (msglen != sizeof (*unreg_msg)) { 47023af08d82Slm66018 PR0("Expected %lu-byte unregister-dring message; " 47031ae08745Sheppo "received %lu bytes", sizeof (*unreg_msg), msglen); 47041ae08745Sheppo return (EBADMSG); 47051ae08745Sheppo } 47061ae08745Sheppo 47071ae08745Sheppo if (unreg_msg->dring_ident != vd->dring_ident) { 47083af08d82Slm66018 PR0("Expected dring ident %lu; received %lu", 47091ae08745Sheppo vd->dring_ident, unreg_msg->dring_ident); 47101ae08745Sheppo return (EBADMSG); 47111ae08745Sheppo } 47121ae08745Sheppo 47131ae08745Sheppo return (0); 47141ae08745Sheppo } 47151ae08745Sheppo 47161ae08745Sheppo static int 47171ae08745Sheppo process_rdx_msg(vio_msg_t *msg, size_t msglen) 47181ae08745Sheppo { 47191ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 47201ae08745Sheppo 4721d10e4ef2Snarayan if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) { 4722d10e4ef2Snarayan PR0("Message is not an RDX message"); 4723d10e4ef2Snarayan return (ENOMSG); 4724d10e4ef2Snarayan } 47251ae08745Sheppo 47261ae08745Sheppo if (msglen != sizeof (vio_rdx_msg_t)) { 47273af08d82Slm66018 PR0("Expected %lu-byte RDX message; received %lu bytes", 47281ae08745Sheppo sizeof (vio_rdx_msg_t), msglen); 47291ae08745Sheppo return (EBADMSG); 47301ae08745Sheppo } 47311ae08745Sheppo 4732d10e4ef2Snarayan PR0("Valid RDX message"); 47331ae08745Sheppo return (0); 47341ae08745Sheppo } 47351ae08745Sheppo 47361ae08745Sheppo static int 47371ae08745Sheppo vd_check_seq_num(vd_t *vd, uint64_t seq_num) 47381ae08745Sheppo { 47391ae08745Sheppo if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 47403af08d82Slm66018 PR0("Received seq_num %lu; expected %lu", 47411ae08745Sheppo seq_num, (vd->seq_num + 1)); 47423af08d82Slm66018 PR0("initiating soft reset"); 4743d10e4ef2Snarayan vd_need_reset(vd, B_FALSE); 47441ae08745Sheppo return (1); 47451ae08745Sheppo } 47461ae08745Sheppo 47471ae08745Sheppo vd->seq_num = seq_num; 47481ae08745Sheppo vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 47491ae08745Sheppo return (0); 47501ae08745Sheppo } 47511ae08745Sheppo 47521ae08745Sheppo /* 47531ae08745Sheppo * Return the expected size of an inband-descriptor message with all the 47541ae08745Sheppo * cookies it claims to include 47551ae08745Sheppo */ 47561ae08745Sheppo static size_t 47571ae08745Sheppo expected_inband_size(vd_dring_inband_msg_t *msg) 47581ae08745Sheppo { 47591ae08745Sheppo return ((sizeof (*msg)) + 47601ae08745Sheppo (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 47611ae08745Sheppo } 47621ae08745Sheppo 47631ae08745Sheppo /* 47641ae08745Sheppo * Process an in-band descriptor message: used with clients like OBP, with 47651ae08745Sheppo * which vds exchanges descriptors within VIO message payloads, rather than 47661ae08745Sheppo * operating on them within a descriptor ring 47671ae08745Sheppo */ 47681ae08745Sheppo static int 47693af08d82Slm66018 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 47701ae08745Sheppo { 47711ae08745Sheppo size_t expected; 47721ae08745Sheppo vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 47731ae08745Sheppo 47741ae08745Sheppo 47751ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 47761ae08745Sheppo 47771ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 4778d10e4ef2Snarayan VIO_DESC_DATA)) { 4779d10e4ef2Snarayan PR1("Message is not an in-band-descriptor message"); 4780d10e4ef2Snarayan return (ENOMSG); 4781d10e4ef2Snarayan } 47821ae08745Sheppo 47831ae08745Sheppo if (msglen < sizeof (*desc_msg)) { 47843af08d82Slm66018 PR0("Expected at least %lu-byte descriptor message; " 47851ae08745Sheppo "received %lu bytes", sizeof (*desc_msg), msglen); 47861ae08745Sheppo return (EBADMSG); 47871ae08745Sheppo } 47881ae08745Sheppo 47891ae08745Sheppo if (msglen != (expected = expected_inband_size(desc_msg))) { 47903af08d82Slm66018 PR0("Expected %lu-byte descriptor message; " 47911ae08745Sheppo "received %lu bytes", expected, msglen); 47921ae08745Sheppo return (EBADMSG); 47931ae08745Sheppo } 47941ae08745Sheppo 4795d10e4ef2Snarayan if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) 47961ae08745Sheppo return (EBADMSG); 47971ae08745Sheppo 4798d10e4ef2Snarayan /* 4799d10e4ef2Snarayan * Valid message: Set up the in-band descriptor task and process the 4800d10e4ef2Snarayan * request. Arrange to acknowledge the client's message, unless an 4801d10e4ef2Snarayan * error processing the descriptor task results in setting 4802d10e4ef2Snarayan * VIO_SUBTYPE_NACK 4803d10e4ef2Snarayan */ 4804d10e4ef2Snarayan PR1("Valid in-band-descriptor message"); 4805d10e4ef2Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 48063af08d82Slm66018 48073af08d82Slm66018 ASSERT(vd->inband_task.msg != NULL); 48083af08d82Slm66018 48093af08d82Slm66018 bcopy(msg, vd->inband_task.msg, msglen); 4810d10e4ef2Snarayan vd->inband_task.msglen = msglen; 48113af08d82Slm66018 48123af08d82Slm66018 /* 48133af08d82Slm66018 * The task request is now the payload of the message 48143af08d82Slm66018 * that was just copied into the body of the task. 48153af08d82Slm66018 */ 48163af08d82Slm66018 desc_msg = (vd_dring_inband_msg_t *)vd->inband_task.msg; 4817d10e4ef2Snarayan vd->inband_task.request = &desc_msg->payload; 48183af08d82Slm66018 4819d10e4ef2Snarayan return (vd_process_task(&vd->inband_task)); 48201ae08745Sheppo } 48211ae08745Sheppo 48221ae08745Sheppo static int 4823d10e4ef2Snarayan vd_process_element(vd_t *vd, vd_task_type_t type, uint32_t idx, 48243af08d82Slm66018 vio_msg_t *msg, size_t msglen) 48251ae08745Sheppo { 48261ae08745Sheppo int status; 4827d10e4ef2Snarayan boolean_t ready; 4828bbfa0259Sha137994 on_trap_data_t otd; 4829d10e4ef2Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 48301ae08745Sheppo 4831d10e4ef2Snarayan /* Accept the updated dring element */ 4832bbfa0259Sha137994 if ((status = VIO_DRING_ACQUIRE(&otd, vd->dring_mtype, 4833bbfa0259Sha137994 vd->dring_handle, idx, idx)) != 0) { 48341ae08745Sheppo return (status); 48351ae08745Sheppo } 4836d10e4ef2Snarayan ready = (elem->hdr.dstate == VIO_DESC_READY); 4837d10e4ef2Snarayan if (ready) { 4838d10e4ef2Snarayan elem->hdr.dstate = VIO_DESC_ACCEPTED; 48395b7cb889Sha137994 bcopy(&elem->payload, vd->dring_task[idx].request, 48405b7cb889Sha137994 (vd->descriptor_size - sizeof (vio_dring_entry_hdr_t))); 4841d10e4ef2Snarayan } else { 48423af08d82Slm66018 PR0("descriptor %u not ready", idx); 4843d10e4ef2Snarayan VD_DUMP_DRING_ELEM(elem); 4844d10e4ef2Snarayan } 4845bbfa0259Sha137994 if ((status = VIO_DRING_RELEASE(vd->dring_mtype, 4846bbfa0259Sha137994 vd->dring_handle, idx, idx)) != 0) { 4847bbfa0259Sha137994 PR0("VIO_DRING_RELEASE() returned errno %d", status); 48481ae08745Sheppo return (status); 48491ae08745Sheppo } 4850d10e4ef2Snarayan if (!ready) 4851d10e4ef2Snarayan return (EBUSY); 48521ae08745Sheppo 48531ae08745Sheppo 4854d10e4ef2Snarayan /* Initialize a task and process the accepted element */ 4855d10e4ef2Snarayan PR1("Processing dring element %u", idx); 4856d10e4ef2Snarayan vd->dring_task[idx].type = type; 48573af08d82Slm66018 48583af08d82Slm66018 /* duplicate msg buf for cookies etc. */ 48593af08d82Slm66018 bcopy(msg, vd->dring_task[idx].msg, msglen); 48603af08d82Slm66018 4861d10e4ef2Snarayan vd->dring_task[idx].msglen = msglen; 4862205eeb1aSlm66018 return (vd_process_task(&vd->dring_task[idx])); 48631ae08745Sheppo } 48641ae08745Sheppo 48651ae08745Sheppo static int 4866d10e4ef2Snarayan vd_process_element_range(vd_t *vd, int start, int end, 48673af08d82Slm66018 vio_msg_t *msg, size_t msglen) 4868d10e4ef2Snarayan { 4869d10e4ef2Snarayan int i, n, nelem, status = 0; 4870d10e4ef2Snarayan boolean_t inprogress = B_FALSE; 4871d10e4ef2Snarayan vd_task_type_t type; 4872d10e4ef2Snarayan 4873d10e4ef2Snarayan 4874d10e4ef2Snarayan ASSERT(start >= 0); 4875d10e4ef2Snarayan ASSERT(end >= 0); 4876d10e4ef2Snarayan 4877d10e4ef2Snarayan /* 4878d10e4ef2Snarayan * Arrange to acknowledge the client's message, unless an error 4879d10e4ef2Snarayan * processing one of the dring elements results in setting 4880d10e4ef2Snarayan * VIO_SUBTYPE_NACK 4881d10e4ef2Snarayan */ 4882d10e4ef2Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 4883d10e4ef2Snarayan 4884d10e4ef2Snarayan /* 4885d10e4ef2Snarayan * Process the dring elements in the range 4886d10e4ef2Snarayan */ 4887d10e4ef2Snarayan nelem = ((end < start) ? end + vd->dring_len : end) - start + 1; 4888d10e4ef2Snarayan for (i = start, n = nelem; n > 0; i = (i + 1) % vd->dring_len, n--) { 4889d10e4ef2Snarayan ((vio_dring_msg_t *)msg)->end_idx = i; 4890d10e4ef2Snarayan type = (n == 1) ? VD_FINAL_RANGE_TASK : VD_NONFINAL_RANGE_TASK; 48913af08d82Slm66018 status = vd_process_element(vd, type, i, msg, msglen); 4892d10e4ef2Snarayan if (status == EINPROGRESS) 4893d10e4ef2Snarayan inprogress = B_TRUE; 4894d10e4ef2Snarayan else if (status != 0) 4895d10e4ef2Snarayan break; 4896d10e4ef2Snarayan } 4897d10e4ef2Snarayan 4898d10e4ef2Snarayan /* 4899d10e4ef2Snarayan * If some, but not all, operations of a multi-element range are in 4900d10e4ef2Snarayan * progress, wait for other operations to complete before returning 4901d10e4ef2Snarayan * (which will result in "ack" or "nack" of the message). Note that 4902d10e4ef2Snarayan * all outstanding operations will need to complete, not just the ones 4903d10e4ef2Snarayan * corresponding to the current range of dring elements; howevever, as 4904d10e4ef2Snarayan * this situation is an error case, performance is less critical. 4905d10e4ef2Snarayan */ 490683990c4aSAlexandre Chartre if ((nelem > 1) && (status != EINPROGRESS) && inprogress) { 490783990c4aSAlexandre Chartre if (vd->ioq != NULL) 490883990c4aSAlexandre Chartre ddi_taskq_wait(vd->ioq); 4909d10e4ef2Snarayan ddi_taskq_wait(vd->completionq); 491083990c4aSAlexandre Chartre } 4911d10e4ef2Snarayan 4912d10e4ef2Snarayan return (status); 4913d10e4ef2Snarayan } 4914d10e4ef2Snarayan 4915d10e4ef2Snarayan static int 49163af08d82Slm66018 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 49171ae08745Sheppo { 49181ae08745Sheppo vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 49191ae08745Sheppo 49201ae08745Sheppo 49211ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 49221ae08745Sheppo 49231ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 49241ae08745Sheppo VIO_DRING_DATA)) { 4925d10e4ef2Snarayan PR1("Message is not a dring-data message"); 4926d10e4ef2Snarayan return (ENOMSG); 49271ae08745Sheppo } 49281ae08745Sheppo 49291ae08745Sheppo if (msglen != sizeof (*dring_msg)) { 49303af08d82Slm66018 PR0("Expected %lu-byte dring message; received %lu bytes", 49311ae08745Sheppo sizeof (*dring_msg), msglen); 49321ae08745Sheppo return (EBADMSG); 49331ae08745Sheppo } 49341ae08745Sheppo 4935d10e4ef2Snarayan if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) 49361ae08745Sheppo return (EBADMSG); 49371ae08745Sheppo 49381ae08745Sheppo if (dring_msg->dring_ident != vd->dring_ident) { 49393af08d82Slm66018 PR0("Expected dring ident %lu; received ident %lu", 49401ae08745Sheppo vd->dring_ident, dring_msg->dring_ident); 49411ae08745Sheppo return (EBADMSG); 49421ae08745Sheppo } 49431ae08745Sheppo 4944d10e4ef2Snarayan if (dring_msg->start_idx >= vd->dring_len) { 49453af08d82Slm66018 PR0("\"start_idx\" = %u; must be less than %u", 4946d10e4ef2Snarayan dring_msg->start_idx, vd->dring_len); 4947d10e4ef2Snarayan return (EBADMSG); 4948d10e4ef2Snarayan } 49491ae08745Sheppo 4950d10e4ef2Snarayan if ((dring_msg->end_idx < 0) || 4951d10e4ef2Snarayan (dring_msg->end_idx >= vd->dring_len)) { 49523af08d82Slm66018 PR0("\"end_idx\" = %u; must be >= 0 and less than %u", 4953d10e4ef2Snarayan dring_msg->end_idx, vd->dring_len); 4954d10e4ef2Snarayan return (EBADMSG); 4955d10e4ef2Snarayan } 4956d10e4ef2Snarayan 4957d10e4ef2Snarayan /* Valid message; process range of updated dring elements */ 4958d10e4ef2Snarayan PR1("Processing descriptor range, start = %u, end = %u", 4959d10e4ef2Snarayan dring_msg->start_idx, dring_msg->end_idx); 4960d10e4ef2Snarayan return (vd_process_element_range(vd, dring_msg->start_idx, 49613af08d82Slm66018 dring_msg->end_idx, msg, msglen)); 49621ae08745Sheppo } 49631ae08745Sheppo 49641ae08745Sheppo static int 49651ae08745Sheppo recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 49661ae08745Sheppo { 49671ae08745Sheppo int retry, status; 49681ae08745Sheppo size_t size = *nbytes; 49691ae08745Sheppo 49701ae08745Sheppo 49711ae08745Sheppo for (retry = 0, status = ETIMEDOUT; 49721ae08745Sheppo retry < vds_ldc_retries && status == ETIMEDOUT; 49731ae08745Sheppo retry++) { 49741ae08745Sheppo PR1("ldc_read() attempt %d", (retry + 1)); 49751ae08745Sheppo *nbytes = size; 49761ae08745Sheppo status = ldc_read(ldc_handle, msg, nbytes); 49771ae08745Sheppo } 49781ae08745Sheppo 49793af08d82Slm66018 if (status) { 49803af08d82Slm66018 PR0("ldc_read() returned errno %d", status); 49813af08d82Slm66018 if (status != ECONNRESET) 49823af08d82Slm66018 return (ENOMSG); 49831ae08745Sheppo return (status); 49841ae08745Sheppo } else if (*nbytes == 0) { 49851ae08745Sheppo PR1("ldc_read() returned 0 and no message read"); 49861ae08745Sheppo return (ENOMSG); 49871ae08745Sheppo } 49881ae08745Sheppo 49891ae08745Sheppo PR1("RCVD %lu-byte message", *nbytes); 49901ae08745Sheppo return (0); 49911ae08745Sheppo } 49921ae08745Sheppo 49931ae08745Sheppo static int 49943af08d82Slm66018 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 49951ae08745Sheppo { 49961ae08745Sheppo int status; 49971ae08745Sheppo 49981ae08745Sheppo 49991ae08745Sheppo PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 50001ae08745Sheppo msg->tag.vio_subtype, msg->tag.vio_subtype_env); 50013af08d82Slm66018 #ifdef DEBUG 50023af08d82Slm66018 vd_decode_tag(msg); 50033af08d82Slm66018 #endif 50041ae08745Sheppo 50051ae08745Sheppo /* 50061ae08745Sheppo * Validate session ID up front, since it applies to all messages 50071ae08745Sheppo * once set 50081ae08745Sheppo */ 50091ae08745Sheppo if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 50103af08d82Slm66018 PR0("Expected SID %u, received %u", vd->sid, 50111ae08745Sheppo msg->tag.vio_sid); 50121ae08745Sheppo return (EBADMSG); 50131ae08745Sheppo } 50141ae08745Sheppo 50153af08d82Slm66018 PR1("\tWhile in state %d (%s)", vd->state, vd_decode_state(vd->state)); 50161ae08745Sheppo 50171ae08745Sheppo /* 50181ae08745Sheppo * Process the received message based on connection state 50191ae08745Sheppo */ 50201ae08745Sheppo switch (vd->state) { 50211ae08745Sheppo case VD_STATE_INIT: /* expect version message */ 50220a55fbb7Slm66018 if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) 50231ae08745Sheppo return (status); 50241ae08745Sheppo 50251ae08745Sheppo /* Version negotiated, move to that state */ 50261ae08745Sheppo vd->state = VD_STATE_VER; 50271ae08745Sheppo return (0); 50281ae08745Sheppo 50291ae08745Sheppo case VD_STATE_VER: /* expect attribute message */ 50301ae08745Sheppo if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 50311ae08745Sheppo return (status); 50321ae08745Sheppo 50331ae08745Sheppo /* Attributes exchanged, move to that state */ 50341ae08745Sheppo vd->state = VD_STATE_ATTR; 50351ae08745Sheppo return (0); 50361ae08745Sheppo 50371ae08745Sheppo case VD_STATE_ATTR: 50381ae08745Sheppo switch (vd->xfer_mode) { 50391ae08745Sheppo case VIO_DESC_MODE: /* expect RDX message */ 50401ae08745Sheppo if ((status = process_rdx_msg(msg, msglen)) != 0) 50411ae08745Sheppo return (status); 50421ae08745Sheppo 50431ae08745Sheppo /* Ready to receive in-band descriptors */ 50441ae08745Sheppo vd->state = VD_STATE_DATA; 50451ae08745Sheppo return (0); 50461ae08745Sheppo 5047f0ca1d9aSsb155480 case VIO_DRING_MODE_V1_0: /* expect register-dring message */ 50481ae08745Sheppo if ((status = 50491ae08745Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 50501ae08745Sheppo return (status); 50511ae08745Sheppo 50521ae08745Sheppo /* One dring negotiated, move to that state */ 50531ae08745Sheppo vd->state = VD_STATE_DRING; 50541ae08745Sheppo return (0); 50551ae08745Sheppo 50561ae08745Sheppo default: 50571ae08745Sheppo ASSERT("Unsupported transfer mode"); 50583af08d82Slm66018 PR0("Unsupported transfer mode"); 50591ae08745Sheppo return (ENOTSUP); 50601ae08745Sheppo } 50611ae08745Sheppo 50621ae08745Sheppo case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 50631ae08745Sheppo if ((status = process_rdx_msg(msg, msglen)) == 0) { 50641ae08745Sheppo /* Ready to receive data */ 50651ae08745Sheppo vd->state = VD_STATE_DATA; 50661ae08745Sheppo return (0); 50671ae08745Sheppo } else if (status != ENOMSG) { 50681ae08745Sheppo return (status); 50691ae08745Sheppo } 50701ae08745Sheppo 50711ae08745Sheppo 50721ae08745Sheppo /* 50731ae08745Sheppo * If another register-dring message is received, stay in 50741ae08745Sheppo * dring state in case the client sends RDX; although the 50751ae08745Sheppo * protocol allows multiple drings, this server does not 50761ae08745Sheppo * support using more than one 50771ae08745Sheppo */ 50781ae08745Sheppo if ((status = 50791ae08745Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 50801ae08745Sheppo return (status); 50811ae08745Sheppo 50821ae08745Sheppo /* 50831ae08745Sheppo * Acknowledge an unregister-dring message, but reset the 50841ae08745Sheppo * connection anyway: Although the protocol allows 50851ae08745Sheppo * unregistering drings, this server cannot serve a vdisk 50861ae08745Sheppo * without its only dring 50871ae08745Sheppo */ 50881ae08745Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 50891ae08745Sheppo return ((status == 0) ? ENOTSUP : status); 50901ae08745Sheppo 50911ae08745Sheppo case VD_STATE_DATA: 50921ae08745Sheppo switch (vd->xfer_mode) { 50931ae08745Sheppo case VIO_DESC_MODE: /* expect in-band-descriptor message */ 50943af08d82Slm66018 return (vd_process_desc_msg(vd, msg, msglen)); 50951ae08745Sheppo 5096f0ca1d9aSsb155480 case VIO_DRING_MODE_V1_0: /* expect dring-data or unreg-dring */ 50971ae08745Sheppo /* 50981ae08745Sheppo * Typically expect dring-data messages, so handle 50991ae08745Sheppo * them first 51001ae08745Sheppo */ 51011ae08745Sheppo if ((status = vd_process_dring_msg(vd, msg, 51023af08d82Slm66018 msglen)) != ENOMSG) 51031ae08745Sheppo return (status); 51041ae08745Sheppo 51051ae08745Sheppo /* 51061ae08745Sheppo * Acknowledge an unregister-dring message, but reset 51071ae08745Sheppo * the connection anyway: Although the protocol 51081ae08745Sheppo * allows unregistering drings, this server cannot 51091ae08745Sheppo * serve a vdisk without its only dring 51101ae08745Sheppo */ 51111ae08745Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 51121ae08745Sheppo return ((status == 0) ? ENOTSUP : status); 51131ae08745Sheppo 51141ae08745Sheppo default: 51151ae08745Sheppo ASSERT("Unsupported transfer mode"); 51163af08d82Slm66018 PR0("Unsupported transfer mode"); 51171ae08745Sheppo return (ENOTSUP); 51181ae08745Sheppo } 51191ae08745Sheppo 51201ae08745Sheppo default: 51211ae08745Sheppo ASSERT("Invalid client connection state"); 51223af08d82Slm66018 PR0("Invalid client connection state"); 51231ae08745Sheppo return (ENOTSUP); 51241ae08745Sheppo } 51251ae08745Sheppo } 51261ae08745Sheppo 5127d10e4ef2Snarayan static int 51283af08d82Slm66018 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 51291ae08745Sheppo { 51301ae08745Sheppo int status; 51311ae08745Sheppo boolean_t reset_ldc = B_FALSE; 5132205eeb1aSlm66018 vd_task_t task; 51331ae08745Sheppo 51341ae08745Sheppo /* 51351ae08745Sheppo * Check that the message is at least big enough for a "tag", so that 51361ae08745Sheppo * message processing can proceed based on tag-specified message type 51371ae08745Sheppo */ 51381ae08745Sheppo if (msglen < sizeof (vio_msg_tag_t)) { 51393af08d82Slm66018 PR0("Received short (%lu-byte) message", msglen); 51401ae08745Sheppo /* Can't "nack" short message, so drop the big hammer */ 51413af08d82Slm66018 PR0("initiating full reset"); 5142d10e4ef2Snarayan vd_need_reset(vd, B_TRUE); 5143d10e4ef2Snarayan return (EBADMSG); 51441ae08745Sheppo } 51451ae08745Sheppo 51461ae08745Sheppo /* 51471ae08745Sheppo * Process the message 51481ae08745Sheppo */ 51493af08d82Slm66018 switch (status = vd_do_process_msg(vd, msg, msglen)) { 51501ae08745Sheppo case 0: 51511ae08745Sheppo /* "ack" valid, successfully-processed messages */ 51521ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 51531ae08745Sheppo break; 51541ae08745Sheppo 5155d10e4ef2Snarayan case EINPROGRESS: 5156d10e4ef2Snarayan /* The completion handler will "ack" or "nack" the message */ 5157d10e4ef2Snarayan return (EINPROGRESS); 51581ae08745Sheppo case ENOMSG: 51593af08d82Slm66018 PR0("Received unexpected message"); 51601ae08745Sheppo _NOTE(FALLTHROUGH); 51611ae08745Sheppo case EBADMSG: 51621ae08745Sheppo case ENOTSUP: 5163205eeb1aSlm66018 /* "transport" error will cause NACK of invalid messages */ 51641ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 51651ae08745Sheppo break; 51661ae08745Sheppo 51671ae08745Sheppo default: 5168205eeb1aSlm66018 /* "transport" error will cause NACK of invalid messages */ 51691ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 51701ae08745Sheppo /* An LDC error probably occurred, so try resetting it */ 51711ae08745Sheppo reset_ldc = B_TRUE; 51721ae08745Sheppo break; 51731ae08745Sheppo } 51741ae08745Sheppo 51753af08d82Slm66018 PR1("\tResulting in state %d (%s)", vd->state, 51763af08d82Slm66018 vd_decode_state(vd->state)); 51773af08d82Slm66018 5178205eeb1aSlm66018 /* populate the task so we can dispatch it on the taskq */ 5179205eeb1aSlm66018 task.vd = vd; 5180205eeb1aSlm66018 task.msg = msg; 5181205eeb1aSlm66018 task.msglen = msglen; 5182205eeb1aSlm66018 5183205eeb1aSlm66018 /* 5184205eeb1aSlm66018 * Queue a task to send the notification that the operation completed. 5185205eeb1aSlm66018 * We need to ensure that requests are responded to in the correct 5186205eeb1aSlm66018 * order and since the taskq is processed serially this ordering 5187205eeb1aSlm66018 * is maintained. 5188205eeb1aSlm66018 */ 5189205eeb1aSlm66018 (void) ddi_taskq_dispatch(vd->completionq, vd_serial_notify, 5190205eeb1aSlm66018 &task, DDI_SLEEP); 5191205eeb1aSlm66018 5192205eeb1aSlm66018 /* 5193205eeb1aSlm66018 * To ensure handshake negotiations do not happen out of order, such 5194205eeb1aSlm66018 * requests that come through this path should not be done in parallel 5195205eeb1aSlm66018 * so we need to wait here until the response is sent to the client. 5196205eeb1aSlm66018 */ 5197205eeb1aSlm66018 ddi_taskq_wait(vd->completionq); 51981ae08745Sheppo 5199d10e4ef2Snarayan /* Arrange to reset the connection for nack'ed or failed messages */ 52003af08d82Slm66018 if ((status != 0) || reset_ldc) { 52013af08d82Slm66018 PR0("initiating %s reset", 52023af08d82Slm66018 (reset_ldc) ? "full" : "soft"); 5203d10e4ef2Snarayan vd_need_reset(vd, reset_ldc); 52043af08d82Slm66018 } 5205d10e4ef2Snarayan 5206d10e4ef2Snarayan return (status); 5207d10e4ef2Snarayan } 5208d10e4ef2Snarayan 5209d10e4ef2Snarayan static boolean_t 5210d10e4ef2Snarayan vd_enabled(vd_t *vd) 5211d10e4ef2Snarayan { 5212d10e4ef2Snarayan boolean_t enabled; 5213d10e4ef2Snarayan 5214d10e4ef2Snarayan mutex_enter(&vd->lock); 5215d10e4ef2Snarayan enabled = vd->enabled; 5216d10e4ef2Snarayan mutex_exit(&vd->lock); 5217d10e4ef2Snarayan return (enabled); 52181ae08745Sheppo } 52191ae08745Sheppo 52201ae08745Sheppo static void 52210a55fbb7Slm66018 vd_recv_msg(void *arg) 52221ae08745Sheppo { 52231ae08745Sheppo vd_t *vd = (vd_t *)arg; 52243af08d82Slm66018 int rv = 0, status = 0; 52251ae08745Sheppo 52261ae08745Sheppo ASSERT(vd != NULL); 52273af08d82Slm66018 5228d10e4ef2Snarayan PR2("New task to receive incoming message(s)"); 52293af08d82Slm66018 52303af08d82Slm66018 5231d10e4ef2Snarayan while (vd_enabled(vd) && status == 0) { 5232d10e4ef2Snarayan size_t msglen, msgsize; 52333af08d82Slm66018 ldc_status_t lstatus; 5234d10e4ef2Snarayan 52350a55fbb7Slm66018 /* 5236d10e4ef2Snarayan * Receive and process a message 52370a55fbb7Slm66018 */ 5238d10e4ef2Snarayan vd_reset_if_needed(vd); /* can change vd->max_msglen */ 52393af08d82Slm66018 52403af08d82Slm66018 /* 52413af08d82Slm66018 * check if channel is UP - else break out of loop 52423af08d82Slm66018 */ 52433af08d82Slm66018 status = ldc_status(vd->ldc_handle, &lstatus); 52443af08d82Slm66018 if (lstatus != LDC_UP) { 52453af08d82Slm66018 PR0("channel not up (status=%d), exiting recv loop\n", 52463af08d82Slm66018 lstatus); 52473af08d82Slm66018 break; 52483af08d82Slm66018 } 52493af08d82Slm66018 52503af08d82Slm66018 ASSERT(vd->max_msglen != 0); 52513af08d82Slm66018 5252d10e4ef2Snarayan msgsize = vd->max_msglen; /* stable copy for alloc/free */ 52533af08d82Slm66018 msglen = msgsize; /* actual len after recv_msg() */ 52543af08d82Slm66018 52553af08d82Slm66018 status = recv_msg(vd->ldc_handle, vd->vio_msgp, &msglen); 52563af08d82Slm66018 switch (status) { 52573af08d82Slm66018 case 0: 5258342440ecSPrasad Singamsetty rv = vd_process_msg(vd, (void *)vd->vio_msgp, msglen); 52593af08d82Slm66018 /* check if max_msglen changed */ 52603af08d82Slm66018 if (msgsize != vd->max_msglen) { 52613af08d82Slm66018 PR0("max_msglen changed 0x%lx to 0x%lx bytes\n", 52623af08d82Slm66018 msgsize, vd->max_msglen); 52633af08d82Slm66018 kmem_free(vd->vio_msgp, msgsize); 52643af08d82Slm66018 vd->vio_msgp = 52653af08d82Slm66018 kmem_alloc(vd->max_msglen, KM_SLEEP); 52663af08d82Slm66018 } 52673af08d82Slm66018 if (rv == EINPROGRESS) 52683af08d82Slm66018 continue; 52693af08d82Slm66018 break; 52703af08d82Slm66018 52713af08d82Slm66018 case ENOMSG: 52723af08d82Slm66018 break; 52733af08d82Slm66018 52743af08d82Slm66018 case ECONNRESET: 52753af08d82Slm66018 PR0("initiating soft reset (ECONNRESET)\n"); 52763af08d82Slm66018 vd_need_reset(vd, B_FALSE); 52773af08d82Slm66018 status = 0; 52783af08d82Slm66018 break; 52793af08d82Slm66018 52803af08d82Slm66018 default: 5281d10e4ef2Snarayan /* Probably an LDC failure; arrange to reset it */ 52823af08d82Slm66018 PR0("initiating full reset (status=0x%x)", status); 5283d10e4ef2Snarayan vd_need_reset(vd, B_TRUE); 52843af08d82Slm66018 break; 52850a55fbb7Slm66018 } 52861ae08745Sheppo } 52873af08d82Slm66018 5288d10e4ef2Snarayan PR2("Task finished"); 52890a55fbb7Slm66018 } 52900a55fbb7Slm66018 52910a55fbb7Slm66018 static uint_t 52921ae08745Sheppo vd_handle_ldc_events(uint64_t event, caddr_t arg) 52931ae08745Sheppo { 52941ae08745Sheppo vd_t *vd = (vd_t *)(void *)arg; 52953af08d82Slm66018 int status; 52961ae08745Sheppo 52971ae08745Sheppo ASSERT(vd != NULL); 5298d10e4ef2Snarayan 5299d10e4ef2Snarayan if (!vd_enabled(vd)) 5300d10e4ef2Snarayan return (LDC_SUCCESS); 5301d10e4ef2Snarayan 53023af08d82Slm66018 if (event & LDC_EVT_DOWN) { 530334683adeSsg70180 PR0("LDC_EVT_DOWN: LDC channel went down"); 53043af08d82Slm66018 53053af08d82Slm66018 vd_need_reset(vd, B_TRUE); 53063af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 53073af08d82Slm66018 DDI_SLEEP); 53083af08d82Slm66018 if (status == DDI_FAILURE) { 53093af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 53103af08d82Slm66018 vd_need_reset(vd, B_TRUE); 53113af08d82Slm66018 } 53123af08d82Slm66018 } 53133af08d82Slm66018 5314d10e4ef2Snarayan if (event & LDC_EVT_RESET) { 53153af08d82Slm66018 PR0("LDC_EVT_RESET: LDC channel was reset"); 53163af08d82Slm66018 53173af08d82Slm66018 if (vd->state != VD_STATE_INIT) { 53183af08d82Slm66018 PR0("scheduling full reset"); 53193af08d82Slm66018 vd_need_reset(vd, B_FALSE); 53203af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 53213af08d82Slm66018 vd, DDI_SLEEP); 53223af08d82Slm66018 if (status == DDI_FAILURE) { 53233af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 53243af08d82Slm66018 vd_need_reset(vd, B_TRUE); 53253af08d82Slm66018 } 53263af08d82Slm66018 53273af08d82Slm66018 } else { 53283af08d82Slm66018 PR0("channel already reset, ignoring...\n"); 53293af08d82Slm66018 PR0("doing ldc up...\n"); 53303af08d82Slm66018 (void) ldc_up(vd->ldc_handle); 53313af08d82Slm66018 } 53323af08d82Slm66018 5333d10e4ef2Snarayan return (LDC_SUCCESS); 5334d10e4ef2Snarayan } 5335d10e4ef2Snarayan 5336d10e4ef2Snarayan if (event & LDC_EVT_UP) { 53373af08d82Slm66018 PR0("EVT_UP: LDC is up\nResetting client connection state"); 53383af08d82Slm66018 PR0("initiating soft reset"); 5339d10e4ef2Snarayan vd_need_reset(vd, B_FALSE); 53403af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 53413af08d82Slm66018 vd, DDI_SLEEP); 53423af08d82Slm66018 if (status == DDI_FAILURE) { 53433af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 53443af08d82Slm66018 vd_need_reset(vd, B_TRUE); 53453af08d82Slm66018 return (LDC_SUCCESS); 53463af08d82Slm66018 } 5347d10e4ef2Snarayan } 5348d10e4ef2Snarayan 5349d10e4ef2Snarayan if (event & LDC_EVT_READ) { 5350d10e4ef2Snarayan int status; 5351d10e4ef2Snarayan 5352d10e4ef2Snarayan PR1("New data available"); 5353d10e4ef2Snarayan /* Queue a task to receive the new data */ 5354d10e4ef2Snarayan status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 5355d10e4ef2Snarayan DDI_SLEEP); 53563af08d82Slm66018 53573af08d82Slm66018 if (status == DDI_FAILURE) { 53583af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 53593af08d82Slm66018 vd_need_reset(vd, B_TRUE); 53603af08d82Slm66018 } 5361d10e4ef2Snarayan } 5362d10e4ef2Snarayan 5363d10e4ef2Snarayan return (LDC_SUCCESS); 53641ae08745Sheppo } 53651ae08745Sheppo 53661ae08745Sheppo static uint_t 53671ae08745Sheppo vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 53681ae08745Sheppo { 53691ae08745Sheppo _NOTE(ARGUNUSED(key, val)) 53701ae08745Sheppo (*((uint_t *)arg))++; 53711ae08745Sheppo return (MH_WALK_TERMINATE); 53721ae08745Sheppo } 53731ae08745Sheppo 53741ae08745Sheppo 53751ae08745Sheppo static int 53761ae08745Sheppo vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 53771ae08745Sheppo { 53781ae08745Sheppo uint_t vd_present = 0; 53791ae08745Sheppo minor_t instance; 53801ae08745Sheppo vds_t *vds; 53811ae08745Sheppo 53821ae08745Sheppo 53831ae08745Sheppo switch (cmd) { 53841ae08745Sheppo case DDI_DETACH: 53851ae08745Sheppo /* the real work happens below */ 53861ae08745Sheppo break; 53871ae08745Sheppo case DDI_SUSPEND: 5388d10e4ef2Snarayan PR0("No action required for DDI_SUSPEND"); 53891ae08745Sheppo return (DDI_SUCCESS); 53901ae08745Sheppo default: 53913af08d82Slm66018 PR0("Unrecognized \"cmd\""); 53921ae08745Sheppo return (DDI_FAILURE); 53931ae08745Sheppo } 53941ae08745Sheppo 53951ae08745Sheppo ASSERT(cmd == DDI_DETACH); 53961ae08745Sheppo instance = ddi_get_instance(dip); 53971ae08745Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 53983af08d82Slm66018 PR0("Could not get state for instance %u", instance); 53991ae08745Sheppo ddi_soft_state_free(vds_state, instance); 54001ae08745Sheppo return (DDI_FAILURE); 54011ae08745Sheppo } 54021ae08745Sheppo 54031ae08745Sheppo /* Do no detach when serving any vdisks */ 54041ae08745Sheppo mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 54051ae08745Sheppo if (vd_present) { 54061ae08745Sheppo PR0("Not detaching because serving vdisks"); 54071ae08745Sheppo return (DDI_FAILURE); 54081ae08745Sheppo } 54091ae08745Sheppo 54101ae08745Sheppo PR0("Detaching"); 5411445b4c2eSsb155480 if (vds->initialized & VDS_MDEG) { 54121ae08745Sheppo (void) mdeg_unregister(vds->mdeg); 5413445b4c2eSsb155480 kmem_free(vds->ispecp->specp, sizeof (vds_prop_template)); 5414445b4c2eSsb155480 kmem_free(vds->ispecp, sizeof (mdeg_node_spec_t)); 5415445b4c2eSsb155480 vds->ispecp = NULL; 5416445b4c2eSsb155480 vds->mdeg = NULL; 5417445b4c2eSsb155480 } 5418445b4c2eSsb155480 54198fce2fd6Sachartre vds_driver_types_free(vds); 54208fce2fd6Sachartre 54211ae08745Sheppo if (vds->initialized & VDS_LDI) 54221ae08745Sheppo (void) ldi_ident_release(vds->ldi_ident); 54231ae08745Sheppo mod_hash_destroy_hash(vds->vd_table); 54241ae08745Sheppo ddi_soft_state_free(vds_state, instance); 54251ae08745Sheppo return (DDI_SUCCESS); 54261ae08745Sheppo } 54271ae08745Sheppo 542817cadca8Slm66018 /* 542917cadca8Slm66018 * Description: 54301aff8f07SAlexandre Chartre * This function checks to see if the disk image being used as a 54311aff8f07SAlexandre Chartre * virtual disk is an ISO image. An ISO image is a special case 54321aff8f07SAlexandre Chartre * which can be booted/installed from like a CD/DVD. 543317cadca8Slm66018 * 543417cadca8Slm66018 * Parameters: 543517cadca8Slm66018 * vd - disk on which the operation is performed. 543617cadca8Slm66018 * 543717cadca8Slm66018 * Return Code: 54381aff8f07SAlexandre Chartre * B_TRUE - The disk image is an ISO 9660 compliant image 54391aff8f07SAlexandre Chartre * B_FALSE - just a regular disk image 544017cadca8Slm66018 */ 544117cadca8Slm66018 static boolean_t 54421aff8f07SAlexandre Chartre vd_dskimg_is_iso_image(vd_t *vd) 544317cadca8Slm66018 { 544417cadca8Slm66018 char iso_buf[ISO_SECTOR_SIZE]; 544517cadca8Slm66018 int i, rv; 544617cadca8Slm66018 uint_t sec; 544717cadca8Slm66018 54481aff8f07SAlexandre Chartre ASSERT(VD_DSKIMG(vd)); 544917cadca8Slm66018 545017cadca8Slm66018 /* 545117cadca8Slm66018 * If we have already discovered and saved this info we can 54521aff8f07SAlexandre Chartre * short-circuit the check and avoid reading the disk image. 545317cadca8Slm66018 */ 545417cadca8Slm66018 if (vd->vdisk_media == VD_MEDIA_DVD || vd->vdisk_media == VD_MEDIA_CD) 545517cadca8Slm66018 return (B_TRUE); 545617cadca8Slm66018 545717cadca8Slm66018 /* 545817cadca8Slm66018 * We wish to read the sector that should contain the 2nd ISO volume 545917cadca8Slm66018 * descriptor. The second field in this descriptor is called the 546017cadca8Slm66018 * Standard Identifier and is set to CD001 for a CD-ROM compliant 546117cadca8Slm66018 * to the ISO 9660 standard. 546217cadca8Slm66018 */ 5463*65908c77Syu, larry liu - Sun Microsystems - Beijing China sec = (ISO_VOLDESC_SEC * ISO_SECTOR_SIZE) / vd->vdisk_bsize; 54641aff8f07SAlexandre Chartre rv = vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)iso_buf, 546517cadca8Slm66018 sec, ISO_SECTOR_SIZE); 546617cadca8Slm66018 546717cadca8Slm66018 if (rv < 0) 546817cadca8Slm66018 return (B_FALSE); 546917cadca8Slm66018 547017cadca8Slm66018 for (i = 0; i < ISO_ID_STRLEN; i++) { 547117cadca8Slm66018 if (ISO_STD_ID(iso_buf)[i] != ISO_ID_STRING[i]) 547217cadca8Slm66018 return (B_FALSE); 547317cadca8Slm66018 } 547417cadca8Slm66018 547517cadca8Slm66018 return (B_TRUE); 547617cadca8Slm66018 } 547717cadca8Slm66018 547817cadca8Slm66018 /* 547917cadca8Slm66018 * Description: 548017cadca8Slm66018 * This function checks to see if the virtual device is an ATAPI 548117cadca8Slm66018 * device. ATAPI devices use Group 1 Read/Write commands, so 548217cadca8Slm66018 * any USCSI calls vds makes need to take this into account. 548317cadca8Slm66018 * 548417cadca8Slm66018 * Parameters: 548517cadca8Slm66018 * vd - disk on which the operation is performed. 548617cadca8Slm66018 * 548717cadca8Slm66018 * Return Code: 548817cadca8Slm66018 * B_TRUE - The virtual disk is backed by an ATAPI device 548917cadca8Slm66018 * B_FALSE - not an ATAPI device (presumably SCSI) 549017cadca8Slm66018 */ 549117cadca8Slm66018 static boolean_t 549217cadca8Slm66018 vd_is_atapi_device(vd_t *vd) 549317cadca8Slm66018 { 549417cadca8Slm66018 boolean_t is_atapi = B_FALSE; 549517cadca8Slm66018 char *variantp; 549617cadca8Slm66018 int rv; 549717cadca8Slm66018 549817cadca8Slm66018 ASSERT(vd->ldi_handle[0] != NULL); 549917cadca8Slm66018 ASSERT(!vd->file); 550017cadca8Slm66018 550117cadca8Slm66018 rv = ldi_prop_lookup_string(vd->ldi_handle[0], 550217cadca8Slm66018 (LDI_DEV_T_ANY | DDI_PROP_DONTPASS), "variant", &variantp); 550317cadca8Slm66018 if (rv == DDI_PROP_SUCCESS) { 550417cadca8Slm66018 PR0("'variant' property exists for %s", vd->device_path); 550517cadca8Slm66018 if (strcmp(variantp, "atapi") == 0) 550617cadca8Slm66018 is_atapi = B_TRUE; 550717cadca8Slm66018 ddi_prop_free(variantp); 550817cadca8Slm66018 } 550917cadca8Slm66018 551017cadca8Slm66018 rv = ldi_prop_exists(vd->ldi_handle[0], LDI_DEV_T_ANY, "atapi"); 551117cadca8Slm66018 if (rv) { 551217cadca8Slm66018 PR0("'atapi' property exists for %s", vd->device_path); 551317cadca8Slm66018 is_atapi = B_TRUE; 551417cadca8Slm66018 } 551517cadca8Slm66018 551617cadca8Slm66018 return (is_atapi); 551717cadca8Slm66018 } 551817cadca8Slm66018 55191ae08745Sheppo static int 55202f5224aeSachartre vd_setup_full_disk(vd_t *vd) 55212f5224aeSachartre { 55222f5224aeSachartre int status; 55232f5224aeSachartre major_t major = getmajor(vd->dev[0]); 55242f5224aeSachartre minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 55252f5224aeSachartre 5526047ba61eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); 5527047ba61eSachartre 5528de3a5331SRamesh Chitrothu /* set the disk size, block size and the media type of the disk */ 5529de3a5331SRamesh Chitrothu status = vd_backend_check_size(vd); 55302f5224aeSachartre 55312f5224aeSachartre if (status != 0) { 55322f5224aeSachartre if (!vd->scsi) { 55332f5224aeSachartre /* unexpected failure */ 5534*65908c77Syu, larry liu - Sun Microsystems - Beijing China PRN("Failed to check backend size (errno %d)", status); 55350a55fbb7Slm66018 return (status); 55360a55fbb7Slm66018 } 55372f5224aeSachartre 55382f5224aeSachartre /* 55392f5224aeSachartre * The function can fail for SCSI disks which are present but 55402f5224aeSachartre * reserved by another system. In that case, we don't know the 55412f5224aeSachartre * size of the disk and the block size. 55422f5224aeSachartre */ 55432f5224aeSachartre vd->vdisk_size = VD_SIZE_UNKNOWN; 5544*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vdisk_bsize = 0; 5545*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->backend_bsize = 0; 55462f5224aeSachartre vd->vdisk_media = VD_MEDIA_FIXED; 55472f5224aeSachartre } 55480a55fbb7Slm66018 55490a55fbb7Slm66018 /* Move dev number and LDI handle to entire-disk-slice array elements */ 55500a55fbb7Slm66018 vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; 55510a55fbb7Slm66018 vd->dev[0] = 0; 55520a55fbb7Slm66018 vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; 55530a55fbb7Slm66018 vd->ldi_handle[0] = NULL; 55540a55fbb7Slm66018 55550a55fbb7Slm66018 /* Initialize device numbers for remaining slices and open them */ 55560a55fbb7Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 55570a55fbb7Slm66018 /* 55580a55fbb7Slm66018 * Skip the entire-disk slice, as it's already open and its 55590a55fbb7Slm66018 * device known 55600a55fbb7Slm66018 */ 55610a55fbb7Slm66018 if (slice == VD_ENTIRE_DISK_SLICE) 55620a55fbb7Slm66018 continue; 55630a55fbb7Slm66018 ASSERT(vd->dev[slice] == 0); 55640a55fbb7Slm66018 ASSERT(vd->ldi_handle[slice] == NULL); 55650a55fbb7Slm66018 55660a55fbb7Slm66018 /* 55670a55fbb7Slm66018 * Construct the device number for the current slice 55680a55fbb7Slm66018 */ 55690a55fbb7Slm66018 vd->dev[slice] = makedevice(major, (minor + slice)); 55700a55fbb7Slm66018 55710a55fbb7Slm66018 /* 557234683adeSsg70180 * Open all slices of the disk to serve them to the client. 557334683adeSsg70180 * Slices are opened exclusively to prevent other threads or 557434683adeSsg70180 * processes in the service domain from performing I/O to 557534683adeSsg70180 * slices being accessed by a client. Failure to open a slice 557634683adeSsg70180 * results in vds not serving this disk, as the client could 557734683adeSsg70180 * attempt (and should be able) to access any slice immediately. 557834683adeSsg70180 * Any slices successfully opened before a failure will get 557934683adeSsg70180 * closed by vds_destroy_vd() as a result of the error returned 558034683adeSsg70180 * by this function. 558134683adeSsg70180 * 558234683adeSsg70180 * We need to do the open with FNDELAY so that opening an empty 558334683adeSsg70180 * slice does not fail. 55840a55fbb7Slm66018 */ 55850a55fbb7Slm66018 PR0("Opening device major %u, minor %u = slice %u", 55860a55fbb7Slm66018 major, minor, slice); 5587047ba61eSachartre 5588047ba61eSachartre /* 5589047ba61eSachartre * Try to open the device. This can fail for example if we are 5590047ba61eSachartre * opening an empty slice. So in case of a failure, we try the 5591047ba61eSachartre * open again but this time with the FNDELAY flag. 5592047ba61eSachartre */ 5593047ba61eSachartre status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 5594047ba61eSachartre vd->open_flags, kcred, &vd->ldi_handle[slice], 5595047ba61eSachartre vd->vds->ldi_ident); 5596047ba61eSachartre 5597047ba61eSachartre if (status != 0) { 5598047ba61eSachartre status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 5599047ba61eSachartre vd->open_flags | FNDELAY, kcred, 5600047ba61eSachartre &vd->ldi_handle[slice], vd->vds->ldi_ident); 5601047ba61eSachartre } 5602047ba61eSachartre 5603047ba61eSachartre if (status != 0) { 5604690555a1Sachartre PRN("ldi_open_by_dev() returned errno %d " 56050a55fbb7Slm66018 "for slice %u", status, slice); 56060a55fbb7Slm66018 /* vds_destroy_vd() will close any open slices */ 5607690555a1Sachartre vd->ldi_handle[slice] = NULL; 56080a55fbb7Slm66018 return (status); 56090a55fbb7Slm66018 } 56100a55fbb7Slm66018 } 56110a55fbb7Slm66018 56120a55fbb7Slm66018 return (0); 56130a55fbb7Slm66018 } 56140a55fbb7Slm66018 5615edcc0754Sachartre /* 5616edcc0754Sachartre * When a slice or a volume is exported as a single-slice disk, we want 5617edcc0754Sachartre * the disk backend (i.e. the slice or volume) to be entirely mapped as 5618edcc0754Sachartre * a slice without the addition of any metadata. 5619edcc0754Sachartre * 5620edcc0754Sachartre * So when exporting the disk as a VTOC disk, we fake a disk with the following 5621edcc0754Sachartre * layout: 5622bae9e67eSachartre * flabel +--- flabel_limit 5623bae9e67eSachartre * <-> V 5624bae9e67eSachartre * 0 1 C D E 5625bae9e67eSachartre * +-+---+--------------------------+--+ 5626bae9e67eSachartre * virtual disk: |L|XXX| slice 0 |AA| 5627bae9e67eSachartre * +-+---+--------------------------+--+ 5628edcc0754Sachartre * ^ : : 5629edcc0754Sachartre * | : : 5630edcc0754Sachartre * VTOC LABEL--+ : : 5631edcc0754Sachartre * +--------------------------+ 5632bae9e67eSachartre * disk backend: | slice/volume/file | 5633edcc0754Sachartre * +--------------------------+ 5634edcc0754Sachartre * 0 N 5635edcc0754Sachartre * 5636bae9e67eSachartre * N is the number of blocks in the slice/volume/file. 5637edcc0754Sachartre * 5638bae9e67eSachartre * We simulate a disk with N+M blocks, where M is the number of blocks 5639bae9e67eSachartre * simluated at the beginning and at the end of the disk (blocks 0-C 5640bae9e67eSachartre * and D-E). 5641edcc0754Sachartre * 5642bae9e67eSachartre * The first blocks (0 to C-1) are emulated and can not be changed. Blocks C 5643bae9e67eSachartre * to D defines slice 0 and are mapped to the backend. Finally we emulate 2 5644bae9e67eSachartre * alternate cylinders at the end of the disk (blocks D-E). In summary we have: 5645edcc0754Sachartre * 5646bae9e67eSachartre * - block 0 (L) returns a fake VTOC label 5647bae9e67eSachartre * - blocks 1 to C-1 (X) are unused and return 0 5648bae9e67eSachartre * - blocks C to D-1 are mapped to the exported slice or volume 5649bae9e67eSachartre * - blocks D and E (A) are blocks defining alternate cylinders (2 cylinders) 5650bae9e67eSachartre * 5651bae9e67eSachartre * Note: because we define a fake disk geometry, it is possible that the length 5652bae9e67eSachartre * of the backend is not a multiple of the size of cylinder, in that case the 5653bae9e67eSachartre * very end of the backend will not map to any block of the virtual disk. 5654edcc0754Sachartre */ 56550a55fbb7Slm66018 static int 565678fcd0a1Sachartre vd_setup_partition_vtoc(vd_t *vd) 565778fcd0a1Sachartre { 565878fcd0a1Sachartre char *device_path = vd->device_path; 5659bae9e67eSachartre char unit; 5660bae9e67eSachartre size_t size, csize; 566178fcd0a1Sachartre 566278fcd0a1Sachartre /* Initialize dk_geom structure for single-slice device */ 566378fcd0a1Sachartre if (vd->dk_geom.dkg_nsect == 0) { 566478fcd0a1Sachartre PRN("%s geometry claims 0 sectors per track", device_path); 566578fcd0a1Sachartre return (EIO); 566678fcd0a1Sachartre } 566778fcd0a1Sachartre if (vd->dk_geom.dkg_nhead == 0) { 566878fcd0a1Sachartre PRN("%s geometry claims 0 heads", device_path); 566978fcd0a1Sachartre return (EIO); 567078fcd0a1Sachartre } 5671bae9e67eSachartre 5672bae9e67eSachartre /* size of a cylinder in block */ 5673bae9e67eSachartre csize = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect; 5674bae9e67eSachartre 5675bae9e67eSachartre /* 5676bae9e67eSachartre * Add extra cylinders: we emulate the first cylinder (which contains 5677bae9e67eSachartre * the disk label). 5678bae9e67eSachartre */ 5679bae9e67eSachartre vd->dk_geom.dkg_ncyl = vd->vdisk_size / csize + 1; 5680bae9e67eSachartre 5681bae9e67eSachartre /* we emulate 2 alternate cylinders */ 5682bae9e67eSachartre vd->dk_geom.dkg_acyl = 2; 568378fcd0a1Sachartre vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 568478fcd0a1Sachartre 568578fcd0a1Sachartre 568678fcd0a1Sachartre /* Initialize vtoc structure for single-slice device */ 568778fcd0a1Sachartre bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 568878fcd0a1Sachartre vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 568978fcd0a1Sachartre vd->vtoc.v_part[0].p_flag = 0; 5690bae9e67eSachartre /* 5691bae9e67eSachartre * Partition 0 starts on cylinder 1 and its size has to be 5692bae9e67eSachartre * a multiple of a number of cylinder. 5693bae9e67eSachartre */ 5694bae9e67eSachartre vd->vtoc.v_part[0].p_start = csize; /* start on cylinder 1 */ 5695bae9e67eSachartre vd->vtoc.v_part[0].p_size = (vd->vdisk_size / csize) * csize; 569678fcd0a1Sachartre 5697bae9e67eSachartre if (vd_slice_single_slice) { 5698bae9e67eSachartre vd->vtoc.v_nparts = 1; 5699bae9e67eSachartre bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 5700bae9e67eSachartre MIN(sizeof (VD_ASCIILABEL), 5701bae9e67eSachartre sizeof (vd->vtoc.v_asciilabel))); 5702bae9e67eSachartre bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 5703bae9e67eSachartre MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 5704bae9e67eSachartre } else { 5705bae9e67eSachartre /* adjust the number of slices */ 5706bae9e67eSachartre vd->nslices = V_NUMPAR; 5707bae9e67eSachartre vd->vtoc.v_nparts = V_NUMPAR; 5708bae9e67eSachartre 5709bae9e67eSachartre /* define slice 2 representing the entire disk */ 5710bae9e67eSachartre vd->vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_tag = V_BACKUP; 5711bae9e67eSachartre vd->vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_flag = 0; 5712bae9e67eSachartre vd->vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_start = 0; 5713bae9e67eSachartre vd->vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_size = 5714bae9e67eSachartre vd->dk_geom.dkg_ncyl * csize; 5715bae9e67eSachartre 5716*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd_get_readable_size(vd->vdisk_size * vd->vdisk_bsize, 5717bae9e67eSachartre &size, &unit); 5718bae9e67eSachartre 5719bae9e67eSachartre /* 5720bae9e67eSachartre * Set some attributes of the geometry to what format(1m) uses 5721bae9e67eSachartre * so that writing a default label using format(1m) does not 5722bae9e67eSachartre * produce any error. 5723bae9e67eSachartre */ 5724bae9e67eSachartre vd->dk_geom.dkg_bcyl = 0; 5725bae9e67eSachartre vd->dk_geom.dkg_intrlv = 1; 5726bae9e67eSachartre vd->dk_geom.dkg_write_reinstruct = 0; 5727bae9e67eSachartre vd->dk_geom.dkg_read_reinstruct = 0; 5728bae9e67eSachartre 5729bae9e67eSachartre /* 5730bae9e67eSachartre * We must have a correct label name otherwise format(1m) will 5731bae9e67eSachartre * not recognized the disk as labeled. 5732bae9e67eSachartre */ 5733bae9e67eSachartre (void) snprintf(vd->vtoc.v_asciilabel, LEN_DKL_ASCII, 5734bae9e67eSachartre "SUN-DiskSlice-%ld%cB cyl %d alt %d hd %d sec %d", 5735bae9e67eSachartre size, unit, 5736bae9e67eSachartre vd->dk_geom.dkg_ncyl, vd->dk_geom.dkg_acyl, 5737bae9e67eSachartre vd->dk_geom.dkg_nhead, vd->dk_geom.dkg_nsect); 5738bae9e67eSachartre bzero(vd->vtoc.v_volume, sizeof (vd->vtoc.v_volume)); 5739bae9e67eSachartre 5740bae9e67eSachartre /* create a fake label from the vtoc and geometry */ 5741342440ecSPrasad Singamsetty vd->flabel_limit = (uint_t)csize; 5742*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->flabel_size = VD_LABEL_VTOC_SIZE(vd->vdisk_bsize); 5743bae9e67eSachartre vd->flabel = kmem_zalloc(vd->flabel_size, KM_SLEEP); 5744bae9e67eSachartre vd_vtocgeom_to_label(&vd->vtoc, &vd->dk_geom, 5745bae9e67eSachartre VD_LABEL_VTOC(vd)); 5746bae9e67eSachartre } 5747bae9e67eSachartre 5748bae9e67eSachartre /* adjust the vdisk_size, we emulate 3 cylinders */ 5749bae9e67eSachartre vd->vdisk_size += csize * 3; 5750edcc0754Sachartre 575178fcd0a1Sachartre return (0); 575278fcd0a1Sachartre } 575378fcd0a1Sachartre 5754edcc0754Sachartre /* 5755edcc0754Sachartre * When a slice, volume or file is exported as a single-slice disk, we want 5756edcc0754Sachartre * the disk backend (i.e. the slice, volume or file) to be entirely mapped 5757edcc0754Sachartre * as a slice without the addition of any metadata. 5758edcc0754Sachartre * 5759edcc0754Sachartre * So when exporting the disk as an EFI disk, we fake a disk with the following 5760*65908c77Syu, larry liu - Sun Microsystems - Beijing China * layout: (assuming the block size is 512 bytes) 5761edcc0754Sachartre * 5762bae9e67eSachartre * flabel +--- flabel_limit 5763bae9e67eSachartre * <------> v 5764bae9e67eSachartre * 0 1 2 L 34 34+N P 5765bae9e67eSachartre * +-+-+--+-------+--------------------------+-------+ 5766bae9e67eSachartre * virtual disk: |X|T|EE|XXXXXXX| slice 0 |RRRRRRR| 5767bae9e67eSachartre * +-+-+--+-------+--------------------------+-------+ 5768edcc0754Sachartre * ^ ^ : : 5769edcc0754Sachartre * | | : : 5770edcc0754Sachartre * GPT-+ +-GPE : : 5771edcc0754Sachartre * +--------------------------+ 5772edcc0754Sachartre * disk backend: | slice/volume/file | 5773edcc0754Sachartre * +--------------------------+ 5774edcc0754Sachartre * 0 N 5775edcc0754Sachartre * 5776edcc0754Sachartre * N is the number of blocks in the slice/volume/file. 5777edcc0754Sachartre * 5778bae9e67eSachartre * We simulate a disk with N+M blocks, where M is the number of blocks 5779bae9e67eSachartre * simluated at the beginning and at the end of the disk (blocks 0-34 5780bae9e67eSachartre * and 34+N-P). 5781edcc0754Sachartre * 5782bae9e67eSachartre * The first 34 blocks (0 to 33) are emulated and can not be changed. Blocks 34 5783bae9e67eSachartre * to 34+N defines slice 0 and are mapped to the exported backend, and we 5784bae9e67eSachartre * emulate some blocks at the end of the disk (blocks 34+N to P) as a the EFI 5785bae9e67eSachartre * reserved partition. 5786bae9e67eSachartre * 5787bae9e67eSachartre * - block 0 (X) is unused and return 0 5788edcc0754Sachartre * - block 1 (T) returns a fake EFI GPT (via DKIOCGETEFI) 5789bae9e67eSachartre * - blocks 2 to L-1 (E) defines a fake EFI GPE (via DKIOCGETEFI) 5790bae9e67eSachartre * - blocks L to 33 (X) are unused and return 0 5791bae9e67eSachartre * - blocks 34 to 34+N are mapped to the exported slice, volume or file 5792bae9e67eSachartre * - blocks 34+N+1 to P define a fake reserved partition and backup label, it 5793bae9e67eSachartre * returns 0 5794edcc0754Sachartre * 5795*65908c77Syu, larry liu - Sun Microsystems - Beijing China * Note: if the backend size is not a multiple of the vdisk block size then 5796*65908c77Syu, larry liu - Sun Microsystems - Beijing China * the very end of the backend will not map to any block of the virtual disk. 5797edcc0754Sachartre */ 579878fcd0a1Sachartre static int 57994bac2208Snarayan vd_setup_partition_efi(vd_t *vd) 58004bac2208Snarayan { 58014bac2208Snarayan efi_gpt_t *gpt; 58024bac2208Snarayan efi_gpe_t *gpe; 5803edcc0754Sachartre struct uuid uuid = EFI_USR; 5804bae9e67eSachartre struct uuid efi_reserved = EFI_RESERVED; 58054bac2208Snarayan uint32_t crc; 5806*65908c77Syu, larry liu - Sun Microsystems - Beijing China uint64_t s0_start, s0_end, first_u_lba; 5807*65908c77Syu, larry liu - Sun Microsystems - Beijing China size_t bsize; 58084bac2208Snarayan 5809*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vd->vdisk_bsize > 0); 5810*65908c77Syu, larry liu - Sun Microsystems - Beijing China 5811*65908c77Syu, larry liu - Sun Microsystems - Beijing China bsize = vd->vdisk_bsize; 5812*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* 5813*65908c77Syu, larry liu - Sun Microsystems - Beijing China * The minimum size for the label is 16K (EFI_MIN_ARRAY_SIZE) 5814*65908c77Syu, larry liu - Sun Microsystems - Beijing China * for GPEs plus one block for the GPT and one for PMBR. 5815*65908c77Syu, larry liu - Sun Microsystems - Beijing China */ 5816*65908c77Syu, larry liu - Sun Microsystems - Beijing China first_u_lba = (EFI_MIN_ARRAY_SIZE / bsize) + 2; 5817*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->flabel_limit = (uint_t)first_u_lba; 5818*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->flabel_size = VD_LABEL_EFI_SIZE(bsize); 5819bae9e67eSachartre vd->flabel = kmem_zalloc(vd->flabel_size, KM_SLEEP); 5820*65908c77Syu, larry liu - Sun Microsystems - Beijing China gpt = VD_LABEL_EFI_GPT(vd, bsize); 5821*65908c77Syu, larry liu - Sun Microsystems - Beijing China gpe = VD_LABEL_EFI_GPE(vd, bsize); 5822edcc0754Sachartre 5823*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* 5824*65908c77Syu, larry liu - Sun Microsystems - Beijing China * Adjust the vdisk_size, we emulate the first few blocks 5825*65908c77Syu, larry liu - Sun Microsystems - Beijing China * for the disk label. 5826*65908c77Syu, larry liu - Sun Microsystems - Beijing China */ 5827*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vdisk_size += first_u_lba; 5828*65908c77Syu, larry liu - Sun Microsystems - Beijing China s0_start = first_u_lba; 5829bae9e67eSachartre s0_end = vd->vdisk_size - 1; 58304bac2208Snarayan 58314bac2208Snarayan gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 58324bac2208Snarayan gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 58334bac2208Snarayan gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); 5834*65908c77Syu, larry liu - Sun Microsystems - Beijing China gpt->efi_gpt_FirstUsableLBA = LE_64(first_u_lba); 5835edcc0754Sachartre gpt->efi_gpt_PartitionEntryLBA = LE_64(2ULL); 58364bac2208Snarayan gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); 58374bac2208Snarayan 5838bae9e67eSachartre UUID_LE_CONVERT(gpe[0].efi_gpe_PartitionTypeGUID, uuid); 5839bae9e67eSachartre gpe[0].efi_gpe_StartingLBA = LE_64(s0_start); 5840bae9e67eSachartre gpe[0].efi_gpe_EndingLBA = LE_64(s0_end); 58414bac2208Snarayan 5842bae9e67eSachartre if (vd_slice_single_slice) { 5843bae9e67eSachartre gpt->efi_gpt_NumberOfPartitionEntries = LE_32(1); 5844bae9e67eSachartre } else { 5845bae9e67eSachartre /* adjust the number of slices */ 5846bae9e67eSachartre gpt->efi_gpt_NumberOfPartitionEntries = LE_32(VD_MAXPART); 5847bae9e67eSachartre vd->nslices = V_NUMPAR; 5848bae9e67eSachartre 5849bae9e67eSachartre /* define a fake reserved partition */ 5850bae9e67eSachartre UUID_LE_CONVERT(gpe[VD_MAXPART - 1].efi_gpe_PartitionTypeGUID, 5851bae9e67eSachartre efi_reserved); 5852bae9e67eSachartre gpe[VD_MAXPART - 1].efi_gpe_StartingLBA = 5853bae9e67eSachartre LE_64(s0_end + 1); 5854bae9e67eSachartre gpe[VD_MAXPART - 1].efi_gpe_EndingLBA = 5855bae9e67eSachartre LE_64(s0_end + EFI_MIN_RESV_SIZE); 5856bae9e67eSachartre 5857bae9e67eSachartre /* adjust the vdisk_size to include the reserved slice */ 5858bae9e67eSachartre vd->vdisk_size += EFI_MIN_RESV_SIZE; 5859bae9e67eSachartre } 5860bae9e67eSachartre 5861bae9e67eSachartre gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1); 5862bae9e67eSachartre 5863bae9e67eSachartre /* adjust the vdisk size for the backup GPT and GPE */ 5864*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vdisk_size += (EFI_MIN_ARRAY_SIZE / bsize) + 1; 5865*65908c77Syu, larry liu - Sun Microsystems - Beijing China gpt->efi_gpt_AlternateLBA = LE_64(vd->vdisk_size - 1); 5866bae9e67eSachartre 5867bae9e67eSachartre CRC32(crc, gpe, sizeof (efi_gpe_t) * VD_MAXPART, -1U, crc32_table); 58684bac2208Snarayan gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 58694bac2208Snarayan 58704bac2208Snarayan CRC32(crc, gpt, sizeof (efi_gpt_t), -1U, crc32_table); 58714bac2208Snarayan gpt->efi_gpt_HeaderCRC32 = LE_32(~crc); 58724bac2208Snarayan 58734bac2208Snarayan return (0); 58744bac2208Snarayan } 58754bac2208Snarayan 5876047ba61eSachartre /* 5877047ba61eSachartre * Setup for a virtual disk whose backend is a file (exported as a single slice 58781aff8f07SAlexandre Chartre * or as a full disk). In that case, the backend is accessed using the vnode 58791aff8f07SAlexandre Chartre * interface. 5880047ba61eSachartre */ 58814bac2208Snarayan static int 5882047ba61eSachartre vd_setup_backend_vnode(vd_t *vd) 58833c96341aSnarayan { 588478fcd0a1Sachartre int rval, status; 58853c96341aSnarayan dev_t dev; 58863c96341aSnarayan char *file_path = vd->device_path; 58873c96341aSnarayan ldi_handle_t lhandle; 58883c96341aSnarayan struct dk_cinfo dk_cinfo; 58891aff8f07SAlexandre Chartre 58901aff8f07SAlexandre Chartre ASSERT(!vd->volume); 58913c96341aSnarayan 5892047ba61eSachartre if ((status = vn_open(file_path, UIO_SYSSPACE, vd->open_flags | FOFFMAX, 58933c96341aSnarayan 0, &vd->file_vnode, 0, 0)) != 0) { 5894690555a1Sachartre PRN("vn_open(%s) = errno %d", file_path, status); 58953c96341aSnarayan return (status); 58963c96341aSnarayan } 58973c96341aSnarayan 5898690555a1Sachartre /* 5899690555a1Sachartre * We set vd->file now so that vds_destroy_vd will take care of 5900690555a1Sachartre * closing the file and releasing the vnode in case of an error. 5901690555a1Sachartre */ 5902690555a1Sachartre vd->file = B_TRUE; 5903690555a1Sachartre 59043c96341aSnarayan vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */ 59053c96341aSnarayan 5906047ba61eSachartre /* 59071aff8f07SAlexandre Chartre * Get max_xfer_sz from the device where the file is. 5908047ba61eSachartre */ 59093c96341aSnarayan dev = vd->file_vnode->v_vfsp->vfs_dev; 5910f745d6a3Sachartre PR0("underlying device of %s = (%d, %d)\n", file_path, 5911f745d6a3Sachartre getmajor(dev), getminor(dev)); 59123c96341aSnarayan 5913047ba61eSachartre status = ldi_open_by_dev(&dev, OTYP_BLK, FREAD, kcred, &lhandle, 5914047ba61eSachartre vd->vds->ldi_ident); 5915047ba61eSachartre 5916047ba61eSachartre if (status != 0) { 5917f745d6a3Sachartre PR0("ldi_open() returned errno %d for underlying device", 5918f745d6a3Sachartre status); 59193c96341aSnarayan } else { 59203c96341aSnarayan if ((status = ldi_ioctl(lhandle, DKIOCINFO, 5921047ba61eSachartre (intptr_t)&dk_cinfo, (vd->open_flags | FKIOCTL), kcred, 59223c96341aSnarayan &rval)) != 0) { 5923f745d6a3Sachartre PR0("ldi_ioctl(DKIOCINFO) returned errno %d for " 5924f745d6a3Sachartre "underlying device", status); 59253c96341aSnarayan } else { 59263c96341aSnarayan /* 59273c96341aSnarayan * Store the device's max transfer size for 59283c96341aSnarayan * return to the client 59293c96341aSnarayan */ 59303c96341aSnarayan vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 59313c96341aSnarayan } 59323c96341aSnarayan 5933f745d6a3Sachartre PR0("close the underlying device"); 59343c96341aSnarayan (void) ldi_close(lhandle, FREAD, kcred); 59353c96341aSnarayan } 59363c96341aSnarayan 5937f745d6a3Sachartre PR0("using file %s on device (%d, %d), max_xfer = %u blks", 5938f745d6a3Sachartre file_path, getmajor(dev), getminor(dev), vd->max_xfer_sz); 59391aff8f07SAlexandre Chartre 59401aff8f07SAlexandre Chartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) 59411aff8f07SAlexandre Chartre status = vd_setup_slice_image(vd); 59421aff8f07SAlexandre Chartre else 59431aff8f07SAlexandre Chartre status = vd_setup_disk_image(vd); 59441aff8f07SAlexandre Chartre 59451aff8f07SAlexandre Chartre return (status); 5946f745d6a3Sachartre } 59473c96341aSnarayan 59481aff8f07SAlexandre Chartre static int 59491aff8f07SAlexandre Chartre vd_setup_slice_image(vd_t *vd) 59501aff8f07SAlexandre Chartre { 59511aff8f07SAlexandre Chartre struct dk_label label; 59521aff8f07SAlexandre Chartre int status; 59531aff8f07SAlexandre Chartre 5954bae9e67eSachartre vd->vdisk_media = VD_MEDIA_FIXED; 5955bae9e67eSachartre vd->vdisk_label = (vd_slice_label == VD_DISK_LABEL_UNK)? 5956bae9e67eSachartre vd_file_slice_label : vd_slice_label; 59571aff8f07SAlexandre Chartre 5958bae9e67eSachartre if (vd->vdisk_label == VD_DISK_LABEL_EFI || 59591aff8f07SAlexandre Chartre vd->dskimg_size >= 2 * ONE_TERABYTE) { 5960edcc0754Sachartre status = vd_setup_partition_efi(vd); 5961bae9e67eSachartre } else { 5962bae9e67eSachartre /* 5963bae9e67eSachartre * We build a default label to get a geometry for 5964bae9e67eSachartre * the vdisk. Then the partition setup function will 5965bae9e67eSachartre * adjust the vtoc so that it defines a single-slice 5966bae9e67eSachartre * disk. 5967bae9e67eSachartre */ 5968*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd_build_default_label(vd->dskimg_size, vd->vdisk_bsize, 5969*65908c77Syu, larry liu - Sun Microsystems - Beijing China &label); 5970bae9e67eSachartre vd_label_to_vtocgeom(&label, &vd->vtoc, &vd->dk_geom); 5971bae9e67eSachartre status = vd_setup_partition_vtoc(vd); 5972bae9e67eSachartre } 59731aff8f07SAlexandre Chartre 5974bae9e67eSachartre return (status); 5975edcc0754Sachartre } 5976edcc0754Sachartre 59771aff8f07SAlexandre Chartre static int 59781aff8f07SAlexandre Chartre vd_setup_disk_image(vd_t *vd) 59791aff8f07SAlexandre Chartre { 59801aff8f07SAlexandre Chartre int status; 59811aff8f07SAlexandre Chartre char *backend_path = vd->device_path; 59821aff8f07SAlexandre Chartre 5983*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((status = vd_backend_check_size(vd)) != 0) { 5984*65908c77Syu, larry liu - Sun Microsystems - Beijing China PRN("Fail to check size of %s (errno %d)", 5985*65908c77Syu, larry liu - Sun Microsystems - Beijing China backend_path, status); 5986*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (EIO); 5987*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 5988*65908c77Syu, larry liu - Sun Microsystems - Beijing China 59891aff8f07SAlexandre Chartre /* size should be at least sizeof(dk_label) */ 59901aff8f07SAlexandre Chartre if (vd->dskimg_size < sizeof (struct dk_label)) { 59911aff8f07SAlexandre Chartre PRN("Size of file has to be at least %ld bytes", 59921aff8f07SAlexandre Chartre sizeof (struct dk_label)); 59931aff8f07SAlexandre Chartre return (EIO); 59941aff8f07SAlexandre Chartre } 59951aff8f07SAlexandre Chartre 5996edcc0754Sachartre /* 5997edcc0754Sachartre * Find and validate the geometry of a disk image. 5998edcc0754Sachartre */ 59991aff8f07SAlexandre Chartre status = vd_dskimg_validate_geometry(vd); 6000edcc0754Sachartre if (status != 0 && status != EINVAL && status != ENOTSUP) { 60011aff8f07SAlexandre Chartre PRN("Failed to read label from %s", backend_path); 6002edcc0754Sachartre return (EIO); 6003edcc0754Sachartre } 6004edcc0754Sachartre 60051aff8f07SAlexandre Chartre if (vd_dskimg_is_iso_image(vd)) { 6006edcc0754Sachartre /* 6007edcc0754Sachartre * Indicate whether to call this a CD or DVD from the size 6008edcc0754Sachartre * of the ISO image (images for both drive types are stored 6009edcc0754Sachartre * in the ISO-9600 format). CDs can store up to just under 1Gb 6010edcc0754Sachartre */ 6011*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((vd->vdisk_size * vd->vdisk_bsize) > ONE_GIGABYTE) 6012edcc0754Sachartre vd->vdisk_media = VD_MEDIA_DVD; 6013edcc0754Sachartre else 6014edcc0754Sachartre vd->vdisk_media = VD_MEDIA_CD; 6015edcc0754Sachartre } else { 6016edcc0754Sachartre vd->vdisk_media = VD_MEDIA_FIXED; 6017edcc0754Sachartre } 6018edcc0754Sachartre 6019edcc0754Sachartre /* Setup devid for the disk image */ 6020047ba61eSachartre 602178fcd0a1Sachartre if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 602278fcd0a1Sachartre 60231aff8f07SAlexandre Chartre status = vd_dskimg_read_devid(vd, &vd->dskimg_devid); 602487a7269eSachartre 602587a7269eSachartre if (status == 0) { 602687a7269eSachartre /* a valid devid was found */ 602787a7269eSachartre return (0); 602887a7269eSachartre } 602987a7269eSachartre 603087a7269eSachartre if (status != EINVAL) { 603187a7269eSachartre /* 603278fcd0a1Sachartre * There was an error while trying to read the devid. 603378fcd0a1Sachartre * So this disk image may have a devid but we are 603478fcd0a1Sachartre * unable to read it. 603587a7269eSachartre */ 60361aff8f07SAlexandre Chartre PR0("can not read devid for %s", backend_path); 60371aff8f07SAlexandre Chartre vd->dskimg_devid = NULL; 603887a7269eSachartre return (0); 603987a7269eSachartre } 604078fcd0a1Sachartre } 604187a7269eSachartre 604287a7269eSachartre /* 604387a7269eSachartre * No valid device id was found so we create one. Note that a failure 604487a7269eSachartre * to create a device id is not fatal and does not prevent the disk 604587a7269eSachartre * image from being attached. 604687a7269eSachartre */ 60471aff8f07SAlexandre Chartre PR1("creating devid for %s", backend_path); 604887a7269eSachartre 604987a7269eSachartre if (ddi_devid_init(vd->vds->dip, DEVID_FAB, NULL, 0, 60501aff8f07SAlexandre Chartre &vd->dskimg_devid) != DDI_SUCCESS) { 60511aff8f07SAlexandre Chartre PR0("fail to create devid for %s", backend_path); 60521aff8f07SAlexandre Chartre vd->dskimg_devid = NULL; 605387a7269eSachartre return (0); 605487a7269eSachartre } 605587a7269eSachartre 605678fcd0a1Sachartre /* 605778fcd0a1Sachartre * Write devid to the disk image. The devid is stored into the disk 605878fcd0a1Sachartre * image if we have a valid label; otherwise the devid will be stored 605978fcd0a1Sachartre * when the user writes a valid label. 606078fcd0a1Sachartre */ 606178fcd0a1Sachartre if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 60621aff8f07SAlexandre Chartre if (vd_dskimg_write_devid(vd, vd->dskimg_devid) != 0) { 60631aff8f07SAlexandre Chartre PR0("fail to write devid for %s", backend_path); 60641aff8f07SAlexandre Chartre ddi_devid_free(vd->dskimg_devid); 60651aff8f07SAlexandre Chartre vd->dskimg_devid = NULL; 606687a7269eSachartre } 606778fcd0a1Sachartre } 606887a7269eSachartre 60693c96341aSnarayan return (0); 60703c96341aSnarayan } 60713c96341aSnarayan 607217cadca8Slm66018 607317cadca8Slm66018 /* 607417cadca8Slm66018 * Description: 607517cadca8Slm66018 * Open a device using its device path (supplied by ldm(1m)) 607617cadca8Slm66018 * 607717cadca8Slm66018 * Parameters: 607817cadca8Slm66018 * vd - pointer to structure containing the vDisk info 60798fce2fd6Sachartre * flags - open flags 608017cadca8Slm66018 * 608117cadca8Slm66018 * Return Value 608217cadca8Slm66018 * 0 - success 608317cadca8Slm66018 * != 0 - some other non-zero return value from ldi(9F) functions 608417cadca8Slm66018 */ 608517cadca8Slm66018 static int 60868fce2fd6Sachartre vd_open_using_ldi_by_name(vd_t *vd, int flags) 608717cadca8Slm66018 { 60888fce2fd6Sachartre int status; 608917cadca8Slm66018 char *device_path = vd->device_path; 609017cadca8Slm66018 60918fce2fd6Sachartre /* Attempt to open device */ 60928fce2fd6Sachartre status = ldi_open_by_name(device_path, flags, kcred, 609317cadca8Slm66018 &vd->ldi_handle[0], vd->vds->ldi_ident); 609417cadca8Slm66018 609517cadca8Slm66018 /* 609617cadca8Slm66018 * The open can fail for example if we are opening an empty slice. 609717cadca8Slm66018 * In case of a failure, we try the open again but this time with 609817cadca8Slm66018 * the FNDELAY flag. 609917cadca8Slm66018 */ 610017cadca8Slm66018 if (status != 0) 61018fce2fd6Sachartre status = ldi_open_by_name(device_path, flags | FNDELAY, 610217cadca8Slm66018 kcred, &vd->ldi_handle[0], vd->vds->ldi_ident); 610317cadca8Slm66018 610417cadca8Slm66018 if (status != 0) { 610517cadca8Slm66018 PR0("ldi_open_by_name(%s) = errno %d", device_path, status); 610617cadca8Slm66018 vd->ldi_handle[0] = NULL; 610717cadca8Slm66018 return (status); 610817cadca8Slm66018 } 610917cadca8Slm66018 611017cadca8Slm66018 return (0); 611117cadca8Slm66018 } 611217cadca8Slm66018 6113047ba61eSachartre /* 6114047ba61eSachartre * Setup for a virtual disk which backend is a device (a physical disk, 61151aff8f07SAlexandre Chartre * slice or volume device) exported as a full disk or as a slice. In these 61161aff8f07SAlexandre Chartre * cases, the backend is accessed using the LDI interface. 6117047ba61eSachartre */ 61183c96341aSnarayan static int 6119047ba61eSachartre vd_setup_backend_ldi(vd_t *vd) 61201ae08745Sheppo { 6121e1ebb9ecSlm66018 int rval, status; 61221ae08745Sheppo struct dk_cinfo dk_cinfo; 61233c96341aSnarayan char *device_path = vd->device_path; 61241ae08745Sheppo 61258fce2fd6Sachartre /* device has been opened by vd_identify_dev() */ 61268fce2fd6Sachartre ASSERT(vd->ldi_handle[0] != NULL); 61278fce2fd6Sachartre ASSERT(vd->dev[0] != NULL); 61280a55fbb7Slm66018 61293c96341aSnarayan vd->file = B_FALSE; 61304bac2208Snarayan 613178fcd0a1Sachartre /* Verify backing device supports dk_cinfo */ 6132e1ebb9ecSlm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 6133047ba61eSachartre (intptr_t)&dk_cinfo, (vd->open_flags | FKIOCTL), kcred, 6134e1ebb9ecSlm66018 &rval)) != 0) { 6135e1ebb9ecSlm66018 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 6136e1ebb9ecSlm66018 status, device_path); 6137e1ebb9ecSlm66018 return (status); 6138e1ebb9ecSlm66018 } 6139e1ebb9ecSlm66018 if (dk_cinfo.dki_partition >= V_NUMPAR) { 6140e1ebb9ecSlm66018 PRN("slice %u >= maximum slice %u for %s", 6141e1ebb9ecSlm66018 dk_cinfo.dki_partition, V_NUMPAR, device_path); 6142e1ebb9ecSlm66018 return (EIO); 6143e1ebb9ecSlm66018 } 61444bac2208Snarayan 61458fce2fd6Sachartre /* 61468fce2fd6Sachartre * The device has been opened read-only by vd_identify_dev(), re-open 61478fce2fd6Sachartre * it read-write if the write flag is set and we don't have an optical 61488fce2fd6Sachartre * device such as a CD-ROM, which, for now, we do not permit writes to 61498fce2fd6Sachartre * and thus should not export write operations to the client. 61508fce2fd6Sachartre * 61518fce2fd6Sachartre * Future: if/when we implement support for guest domains writing to 61528fce2fd6Sachartre * optical devices we will need to do further checking of the media type 61538fce2fd6Sachartre * to distinguish between read-only and writable discs. 61548fce2fd6Sachartre */ 61558fce2fd6Sachartre if (dk_cinfo.dki_ctype == DKC_CDROM) { 61568fce2fd6Sachartre 61578fce2fd6Sachartre vd->open_flags &= ~FWRITE; 61588fce2fd6Sachartre 61598fce2fd6Sachartre } else if (vd->open_flags & FWRITE) { 61608fce2fd6Sachartre 61618fce2fd6Sachartre (void) ldi_close(vd->ldi_handle[0], vd->open_flags & ~FWRITE, 61628fce2fd6Sachartre kcred); 61638fce2fd6Sachartre status = vd_open_using_ldi_by_name(vd, vd->open_flags); 61648fce2fd6Sachartre if (status != 0) { 61658fce2fd6Sachartre PR0("Failed to open (%s) = errno %d", 61668fce2fd6Sachartre device_path, status); 61678fce2fd6Sachartre return (status); 61688fce2fd6Sachartre } 61698fce2fd6Sachartre } 61708fce2fd6Sachartre 6171e1ebb9ecSlm66018 /* Store the device's max transfer size for return to the client */ 6172e1ebb9ecSlm66018 vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 6173e1ebb9ecSlm66018 6174047ba61eSachartre /* 617517cadca8Slm66018 * We need to work out if it's an ATAPI (IDE CD-ROM) or SCSI device so 617617cadca8Slm66018 * that we can use the correct CDB group when sending USCSI commands. 617717cadca8Slm66018 */ 617817cadca8Slm66018 vd->is_atapi_dev = vd_is_atapi_device(vd); 617917cadca8Slm66018 618017cadca8Slm66018 /* 6181047ba61eSachartre * Export a full disk. 6182047ba61eSachartre * 61831aff8f07SAlexandre Chartre * The exported device can be either a volume, a disk or a CD/DVD 61841aff8f07SAlexandre Chartre * device. We export a device as a full disk if we have an entire 61851aff8f07SAlexandre Chartre * disk slice (slice 2) and if this slice is exported as a full disk 61861aff8f07SAlexandre Chartre * and not as a single slice disk. A CD or DVD device is exported 61871aff8f07SAlexandre Chartre * as a full disk (even if it isn't s2). A volume is exported as a 61881aff8f07SAlexandre Chartre * full disk as long as the "slice" option is not specified. 6189047ba61eSachartre */ 61901aff8f07SAlexandre Chartre if (vd->vdisk_type == VD_DISK_TYPE_DISK) { 61911aff8f07SAlexandre Chartre 61921aff8f07SAlexandre Chartre if (vd->volume) { 61931aff8f07SAlexandre Chartre /* setup disk image */ 61941aff8f07SAlexandre Chartre return (vd_setup_disk_image(vd)); 61951aff8f07SAlexandre Chartre } 61961aff8f07SAlexandre Chartre 61971aff8f07SAlexandre Chartre if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE || 619817cadca8Slm66018 dk_cinfo.dki_ctype == DKC_CDROM) { 61998fce2fd6Sachartre ASSERT(!vd->volume); 62002f5224aeSachartre if (dk_cinfo.dki_ctype == DKC_SCSI_CCS) 62012f5224aeSachartre vd->scsi = B_TRUE; 6202047ba61eSachartre return (vd_setup_full_disk(vd)); 6203047ba61eSachartre } 62041aff8f07SAlexandre Chartre } 6205047ba61eSachartre 6206047ba61eSachartre /* 6207047ba61eSachartre * Export a single slice disk. 6208047ba61eSachartre * 62098fce2fd6Sachartre * The exported device can be either a volume device or a disk slice. If 6210047ba61eSachartre * it is a disk slice different from slice 2 then it is always exported 6211047ba61eSachartre * as a single slice disk even if the "slice" option is not specified. 62128fce2fd6Sachartre * If it is disk slice 2 or a volume device then it is exported as a 6213047ba61eSachartre * single slice disk only if the "slice" option is specified. 6214047ba61eSachartre */ 6215047ba61eSachartre return (vd_setup_single_slice_disk(vd)); 6216047ba61eSachartre } 6217047ba61eSachartre 6218047ba61eSachartre static int 6219047ba61eSachartre vd_setup_single_slice_disk(vd_t *vd) 6220047ba61eSachartre { 6221edcc0754Sachartre int status, rval; 6222bae9e67eSachartre struct dk_label label; 6223047ba61eSachartre char *device_path = vd->device_path; 6224342440ecSPrasad Singamsetty struct vtoc vtoc; 6225047ba61eSachartre 622617cadca8Slm66018 vd->vdisk_media = VD_MEDIA_FIXED; 6227047ba61eSachartre 62288fce2fd6Sachartre if (vd->volume) { 6229047ba61eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 623078fcd0a1Sachartre } 62310a55fbb7Slm66018 6232047ba61eSachartre /* 6233047ba61eSachartre * We export the slice as a single slice disk even if the "slice" 6234047ba61eSachartre * option was not specified. 6235047ba61eSachartre */ 62361ae08745Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 62371ae08745Sheppo vd->nslices = 1; 62381ae08745Sheppo 6239*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* Get size of backing device */ 6240*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((status = vd_backend_check_size(vd)) != 0) { 6241*65908c77Syu, larry liu - Sun Microsystems - Beijing China PRN("Fail to check size of %s (errno %d)", device_path, status); 6242*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (EIO); 6243*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 6244*65908c77Syu, larry liu - Sun Microsystems - Beijing China 6245edcc0754Sachartre /* 6246edcc0754Sachartre * When exporting a slice or a device as a single slice disk, we don't 6247edcc0754Sachartre * care about any partitioning exposed by the backend. The goal is just 6248edcc0754Sachartre * to export the backend as a flat storage. We provide a fake partition 6249edcc0754Sachartre * table (either a VTOC or EFI), which presents only one slice, to 6250bae9e67eSachartre * accommodate tools expecting a disk label. The selection of the label 6251bae9e67eSachartre * type (VTOC or EFI) depends on the value of the vd_slice_label 6252bae9e67eSachartre * variable. 6253edcc0754Sachartre */ 6254bae9e67eSachartre if (vd_slice_label == VD_DISK_LABEL_EFI || 6255*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vdisk_size >= ONE_TERABYTE / vd->vdisk_bsize) { 6256bae9e67eSachartre vd->vdisk_label = VD_DISK_LABEL_EFI; 6257bae9e67eSachartre } else { 6258342440ecSPrasad Singamsetty status = ldi_ioctl(vd->ldi_handle[0], DKIOCGEXTVTOC, 6259bae9e67eSachartre (intptr_t)&vd->vtoc, (vd->open_flags | FKIOCTL), 6260bae9e67eSachartre kcred, &rval); 6261edcc0754Sachartre 6262342440ecSPrasad Singamsetty if (status == ENOTTY) { 6263342440ecSPrasad Singamsetty /* try with the non-extended vtoc ioctl */ 6264342440ecSPrasad Singamsetty status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, 6265342440ecSPrasad Singamsetty (intptr_t)&vtoc, (vd->open_flags | FKIOCTL), 6266342440ecSPrasad Singamsetty kcred, &rval); 6267342440ecSPrasad Singamsetty vtoctoextvtoc(vtoc, vd->vtoc); 6268342440ecSPrasad Singamsetty } 6269342440ecSPrasad Singamsetty 6270edcc0754Sachartre if (status == 0) { 6271bae9e67eSachartre status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, 6272bae9e67eSachartre (intptr_t)&vd->dk_geom, (vd->open_flags | FKIOCTL), 6273bae9e67eSachartre kcred, &rval); 6274bae9e67eSachartre 6275bae9e67eSachartre if (status != 0) { 6276bae9e67eSachartre PRN("ldi_ioctl(DKIOCGEOM) returned errno %d " 6277bae9e67eSachartre "for %s", status, device_path); 6278bae9e67eSachartre return (status); 6279bae9e67eSachartre } 6280edcc0754Sachartre vd->vdisk_label = VD_DISK_LABEL_VTOC; 6281bae9e67eSachartre 6282bae9e67eSachartre } else if (vd_slice_label == VD_DISK_LABEL_VTOC) { 6283bae9e67eSachartre 6284bae9e67eSachartre vd->vdisk_label = VD_DISK_LABEL_VTOC; 6285*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd_build_default_label(vd->vdisk_size * vd->vdisk_bsize, 6286*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vdisk_bsize, &label); 6287bae9e67eSachartre vd_label_to_vtocgeom(&label, &vd->vtoc, &vd->dk_geom); 6288bae9e67eSachartre 6289bae9e67eSachartre } else { 6290bae9e67eSachartre vd->vdisk_label = VD_DISK_LABEL_EFI; 6291bae9e67eSachartre } 6292bae9e67eSachartre } 6293bae9e67eSachartre 6294bae9e67eSachartre if (vd->vdisk_label == VD_DISK_LABEL_VTOC) { 6295bae9e67eSachartre /* export with a fake VTOC label */ 629678fcd0a1Sachartre status = vd_setup_partition_vtoc(vd); 6297bae9e67eSachartre 6298edcc0754Sachartre } else { 6299edcc0754Sachartre /* export with a fake EFI label */ 6300edcc0754Sachartre status = vd_setup_partition_efi(vd); 630178fcd0a1Sachartre } 630278fcd0a1Sachartre 63034bac2208Snarayan return (status); 63044bac2208Snarayan } 63051ae08745Sheppo 6306*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* 6307*65908c77Syu, larry liu - Sun Microsystems - Beijing China * This function is invoked when setting up the vdisk backend and to process 6308*65908c77Syu, larry liu - Sun Microsystems - Beijing China * the VD_OP_GET_CAPACITY operation. It checks the backend size and set the 6309*65908c77Syu, larry liu - Sun Microsystems - Beijing China * following attributes of the vd structure: 6310*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 6311*65908c77Syu, larry liu - Sun Microsystems - Beijing China * - vdisk_bsize: block size for the virtual disk used by the VIO protocol. Its 6312*65908c77Syu, larry liu - Sun Microsystems - Beijing China * value is 512 bytes (DEV_BSIZE) when the backend is a file, a volume or a 6313*65908c77Syu, larry liu - Sun Microsystems - Beijing China * CD/DVD. When the backend is a disk or a disk slice then it has the value 6314*65908c77Syu, larry liu - Sun Microsystems - Beijing China * of the logical block size of that disk (as returned by the DKIOCGMEDIAINFO 6315*65908c77Syu, larry liu - Sun Microsystems - Beijing China * ioctl). This block size is expected to be a power of 2 and a multiple of 6316*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 512. 6317*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 6318*65908c77Syu, larry liu - Sun Microsystems - Beijing China * - vdisk_size: size of the virtual disk expressed as a number of vdisk_bsize 6319*65908c77Syu, larry liu - Sun Microsystems - Beijing China * blocks. 6320*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 6321*65908c77Syu, larry liu - Sun Microsystems - Beijing China * vdisk_size and vdisk_bsize are sent to the vdisk client during the connection 6322*65908c77Syu, larry liu - Sun Microsystems - Beijing China * handshake and in the result of a VD_OP_GET_CAPACITY operation. 6323*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 6324*65908c77Syu, larry liu - Sun Microsystems - Beijing China * - backend_bsize: block size of the backend device. backend_bsize has the same 6325*65908c77Syu, larry liu - Sun Microsystems - Beijing China * value as vdisk_bsize except when the backend is a CD/DVD. In that case, 6326*65908c77Syu, larry liu - Sun Microsystems - Beijing China * vdisk_bsize is set to 512 (DEV_BSIZE) while backend_bsize is set to the 6327*65908c77Syu, larry liu - Sun Microsystems - Beijing China * effective logical block size of the CD/DVD (usually 2048). 6328*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 6329*65908c77Syu, larry liu - Sun Microsystems - Beijing China * - dskimg_size: size of the backend when the backend is a disk image. This 6330*65908c77Syu, larry liu - Sun Microsystems - Beijing China * attribute is set only when the backend is a file or a volume, otherwise it 6331*65908c77Syu, larry liu - Sun Microsystems - Beijing China * is unused. 6332*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 6333*65908c77Syu, larry liu - Sun Microsystems - Beijing China * - vio_bshift: number of bit to shift to convert a VIO block number (which 6334*65908c77Syu, larry liu - Sun Microsystems - Beijing China * uses a block size of vdisk_bsize) to a buf(9s) block number (which uses a 6335*65908c77Syu, larry liu - Sun Microsystems - Beijing China * block size of 512 bytes) i.e. we have vdisk_bsize = 512 x 2 ^ vio_bshift 6336*65908c77Syu, larry liu - Sun Microsystems - Beijing China * 6337*65908c77Syu, larry liu - Sun Microsystems - Beijing China * - vdisk_media: media of the virtual disk. This function only sets this 6338*65908c77Syu, larry liu - Sun Microsystems - Beijing China * attribute for physical disk and CD/DVD. For other backend types, this 6339*65908c77Syu, larry liu - Sun Microsystems - Beijing China * attribute is set in the setup function of the backend. 6340*65908c77Syu, larry liu - Sun Microsystems - Beijing China */ 6341de3a5331SRamesh Chitrothu static int 6342de3a5331SRamesh Chitrothu vd_backend_check_size(vd_t *vd) 6343de3a5331SRamesh Chitrothu { 6344*65908c77Syu, larry liu - Sun Microsystems - Beijing China size_t backend_size, backend_bsize, vdisk_bsize; 6345*65908c77Syu, larry liu - Sun Microsystems - Beijing China size_t old_size, new_size; 6346de3a5331SRamesh Chitrothu struct dk_minfo minfo; 6347de3a5331SRamesh Chitrothu vattr_t vattr; 6348*65908c77Syu, larry liu - Sun Microsystems - Beijing China int rval, rv, media, nshift = 0; 6349*65908c77Syu, larry liu - Sun Microsystems - Beijing China uint32_t n; 6350de3a5331SRamesh Chitrothu 6351de3a5331SRamesh Chitrothu if (vd->file) { 6352de3a5331SRamesh Chitrothu 6353de3a5331SRamesh Chitrothu /* file (slice or full disk) */ 6354de3a5331SRamesh Chitrothu vattr.va_mask = AT_SIZE; 6355de3a5331SRamesh Chitrothu rv = VOP_GETATTR(vd->file_vnode, &vattr, 0, kcred, NULL); 6356de3a5331SRamesh Chitrothu if (rv != 0) { 6357de3a5331SRamesh Chitrothu PR0("VOP_GETATTR(%s) = errno %d", vd->device_path, rv); 6358de3a5331SRamesh Chitrothu return (rv); 6359de3a5331SRamesh Chitrothu } 6360de3a5331SRamesh Chitrothu backend_size = vattr.va_size; 6361*65908c77Syu, larry liu - Sun Microsystems - Beijing China backend_bsize = DEV_BSIZE; 6362*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdisk_bsize = DEV_BSIZE; 6363de3a5331SRamesh Chitrothu 6364*65908c77Syu, larry liu - Sun Microsystems - Beijing China } else if (vd->volume) { 6365de3a5331SRamesh Chitrothu 6366*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* volume (slice or full disk) */ 6367de3a5331SRamesh Chitrothu rv = ldi_get_size(vd->ldi_handle[0], &backend_size); 6368de3a5331SRamesh Chitrothu if (rv != DDI_SUCCESS) { 6369de3a5331SRamesh Chitrothu PR0("ldi_get_size() failed for %s", vd->device_path); 6370de3a5331SRamesh Chitrothu return (EIO); 6371de3a5331SRamesh Chitrothu } 6372*65908c77Syu, larry liu - Sun Microsystems - Beijing China backend_bsize = DEV_BSIZE; 6373*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdisk_bsize = DEV_BSIZE; 6374de3a5331SRamesh Chitrothu 6375de3a5331SRamesh Chitrothu } else { 6376de3a5331SRamesh Chitrothu 6377*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* physical disk or slice */ 6378de3a5331SRamesh Chitrothu rv = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, 6379de3a5331SRamesh Chitrothu (intptr_t)&minfo, (vd->open_flags | FKIOCTL), 6380de3a5331SRamesh Chitrothu kcred, &rval); 6381de3a5331SRamesh Chitrothu if (rv != 0) { 6382de3a5331SRamesh Chitrothu PR0("DKIOCGMEDIAINFO failed for %s (err=%d)", 6383de3a5331SRamesh Chitrothu vd->device_path, rv); 6384de3a5331SRamesh Chitrothu return (rv); 6385de3a5331SRamesh Chitrothu } 6386*65908c77Syu, larry liu - Sun Microsystems - Beijing China 6387*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 6388*65908c77Syu, larry liu - Sun Microsystems - Beijing China rv = ldi_get_size(vd->ldi_handle[0], &backend_size); 6389*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (rv != DDI_SUCCESS) { 6390*65908c77Syu, larry liu - Sun Microsystems - Beijing China PR0("ldi_get_size() failed for %s", 6391*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->device_path); 6392*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (EIO); 6393*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 6394*65908c77Syu, larry liu - Sun Microsystems - Beijing China } else { 6395*65908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); 6396de3a5331SRamesh Chitrothu backend_size = minfo.dki_capacity * minfo.dki_lbsize; 6397de3a5331SRamesh Chitrothu } 6398de3a5331SRamesh Chitrothu 6399*65908c77Syu, larry liu - Sun Microsystems - Beijing China backend_bsize = minfo.dki_lbsize; 6400*65908c77Syu, larry liu - Sun Microsystems - Beijing China media = DK_MEDIATYPE2VD_MEDIATYPE(minfo.dki_media_type); 6401*65908c77Syu, larry liu - Sun Microsystems - Beijing China 6402*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* 6403*65908c77Syu, larry liu - Sun Microsystems - Beijing China * If the device is a CD or a DVD then we force the vdisk block 6404*65908c77Syu, larry liu - Sun Microsystems - Beijing China * size to 512 bytes (DEV_BSIZE). In that case, vdisk_bsize can 6405*65908c77Syu, larry liu - Sun Microsystems - Beijing China * be different from backend_size. 6406*65908c77Syu, larry liu - Sun Microsystems - Beijing China */ 6407*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (media == VD_MEDIA_CD || media == VD_MEDIA_DVD) 6408*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdisk_bsize = DEV_BSIZE; 6409*65908c77Syu, larry liu - Sun Microsystems - Beijing China else 6410*65908c77Syu, larry liu - Sun Microsystems - Beijing China vdisk_bsize = backend_bsize; 6411*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 6412*65908c77Syu, larry liu - Sun Microsystems - Beijing China 6413*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* check vdisk block size */ 6414*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (vdisk_bsize == 0 || vdisk_bsize % DEV_BSIZE != 0) 6415*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 6416*65908c77Syu, larry liu - Sun Microsystems - Beijing China 6417de3a5331SRamesh Chitrothu old_size = vd->vdisk_size; 6418*65908c77Syu, larry liu - Sun Microsystems - Beijing China new_size = backend_size / vdisk_bsize; 6419de3a5331SRamesh Chitrothu 6420de3a5331SRamesh Chitrothu /* check if size has changed */ 6421*65908c77Syu, larry liu - Sun Microsystems - Beijing China if (old_size != VD_SIZE_UNKNOWN && old_size == new_size && 6422*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vdisk_bsize == vdisk_bsize) 6423de3a5331SRamesh Chitrothu return (0); 6424de3a5331SRamesh Chitrothu 6425*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* cache info for blk conversion */ 6426*65908c77Syu, larry liu - Sun Microsystems - Beijing China for (n = vdisk_bsize / DEV_BSIZE; n > 1; n >>= 1) { 6427*65908c77Syu, larry liu - Sun Microsystems - Beijing China if ((n & 0x1) != 0) { 6428*65908c77Syu, larry liu - Sun Microsystems - Beijing China /* blk_size is not a power of 2 */ 6429*65908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 6430*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 6431*65908c77Syu, larry liu - Sun Microsystems - Beijing China nshift++; 6432*65908c77Syu, larry liu - Sun Microsystems - Beijing China } 6433*65908c77Syu, larry liu - Sun Microsystems - Beijing China 6434*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vio_bshift = nshift; 6435de3a5331SRamesh Chitrothu vd->vdisk_size = new_size; 6436*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vdisk_bsize = vdisk_bsize; 6437*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->backend_bsize = backend_bsize; 6438de3a5331SRamesh Chitrothu 64391aff8f07SAlexandre Chartre if (vd->file || vd->volume) 64401aff8f07SAlexandre Chartre vd->dskimg_size = backend_size; 6441de3a5331SRamesh Chitrothu 6442de3a5331SRamesh Chitrothu /* 6443de3a5331SRamesh Chitrothu * If we are exporting a single-slice disk and the size of the backend 6444de3a5331SRamesh Chitrothu * has changed then we regenerate the partition setup so that the 6445de3a5331SRamesh Chitrothu * partitioning matches with the new disk backend size. 6446de3a5331SRamesh Chitrothu */ 6447de3a5331SRamesh Chitrothu 6448de3a5331SRamesh Chitrothu if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 6449de3a5331SRamesh Chitrothu /* slice or file or device exported as a slice */ 6450de3a5331SRamesh Chitrothu if (vd->vdisk_label == VD_DISK_LABEL_VTOC) { 6451de3a5331SRamesh Chitrothu rv = vd_setup_partition_vtoc(vd); 6452de3a5331SRamesh Chitrothu if (rv != 0) { 6453de3a5331SRamesh Chitrothu PR0("vd_setup_partition_vtoc() failed for %s " 6454de3a5331SRamesh Chitrothu "(err = %d)", vd->device_path, rv); 6455de3a5331SRamesh Chitrothu return (rv); 6456de3a5331SRamesh Chitrothu } 6457de3a5331SRamesh Chitrothu } else { 6458de3a5331SRamesh Chitrothu rv = vd_setup_partition_efi(vd); 6459de3a5331SRamesh Chitrothu if (rv != 0) { 6460de3a5331SRamesh Chitrothu PR0("vd_setup_partition_efi() failed for %s " 6461de3a5331SRamesh Chitrothu "(err = %d)", vd->device_path, rv); 6462de3a5331SRamesh Chitrothu return (rv); 6463de3a5331SRamesh Chitrothu } 6464de3a5331SRamesh Chitrothu } 6465de3a5331SRamesh Chitrothu 64661aff8f07SAlexandre Chartre } else if (!vd->file && !vd->volume) { 64671aff8f07SAlexandre Chartre /* physical disk */ 6468de3a5331SRamesh Chitrothu ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); 6469*65908c77Syu, larry liu - Sun Microsystems - Beijing China vd->vdisk_media = media; 6470de3a5331SRamesh Chitrothu } 6471de3a5331SRamesh Chitrothu 6472de3a5331SRamesh Chitrothu return (0); 6473de3a5331SRamesh Chitrothu } 6474de3a5331SRamesh Chitrothu 64758fce2fd6Sachartre /* 64768fce2fd6Sachartre * Description: 64778fce2fd6Sachartre * Open a device using its device path and identify if this is 64788fce2fd6Sachartre * a disk device or a volume device. 64798fce2fd6Sachartre * 64808fce2fd6Sachartre * Parameters: 64818fce2fd6Sachartre * vd - pointer to structure containing the vDisk info 64828fce2fd6Sachartre * dtype - return the driver type of the device 64838fce2fd6Sachartre * 64848fce2fd6Sachartre * Return Value 64858fce2fd6Sachartre * 0 - success 64868fce2fd6Sachartre * != 0 - some other non-zero return value from ldi(9F) functions 64878fce2fd6Sachartre */ 64888fce2fd6Sachartre static int 64898fce2fd6Sachartre vd_identify_dev(vd_t *vd, int *dtype) 64908fce2fd6Sachartre { 64918fce2fd6Sachartre int status, i; 64928fce2fd6Sachartre char *device_path = vd->device_path; 64938fce2fd6Sachartre char *drv_name; 64948fce2fd6Sachartre int drv_type; 64958fce2fd6Sachartre vds_t *vds = vd->vds; 64968fce2fd6Sachartre 64978fce2fd6Sachartre status = vd_open_using_ldi_by_name(vd, vd->open_flags & ~FWRITE); 64988fce2fd6Sachartre if (status != 0) { 64998fce2fd6Sachartre PR0("Failed to open (%s) = errno %d", device_path, status); 65008fce2fd6Sachartre return (status); 65018fce2fd6Sachartre } 65028fce2fd6Sachartre 65038fce2fd6Sachartre /* Get device number of backing device */ 65048fce2fd6Sachartre if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { 65058fce2fd6Sachartre PRN("ldi_get_dev() returned errno %d for %s", 65068fce2fd6Sachartre status, device_path); 65078fce2fd6Sachartre return (status); 65088fce2fd6Sachartre } 65098fce2fd6Sachartre 65108fce2fd6Sachartre /* 65118fce2fd6Sachartre * We start by looking if the driver is in the list from vds.conf 65128fce2fd6Sachartre * so that we can override the built-in list using vds.conf. 65138fce2fd6Sachartre */ 65148fce2fd6Sachartre drv_name = ddi_major_to_name(getmajor(vd->dev[0])); 65158fce2fd6Sachartre drv_type = VD_DRIVER_UNKNOWN; 65168fce2fd6Sachartre 65178fce2fd6Sachartre /* check vds.conf list */ 65188fce2fd6Sachartre for (i = 0; i < vds->num_drivers; i++) { 65198fce2fd6Sachartre if (vds->driver_types[i].type == VD_DRIVER_UNKNOWN) { 65208fce2fd6Sachartre /* ignore invalid entries */ 65218fce2fd6Sachartre continue; 65228fce2fd6Sachartre } 65238fce2fd6Sachartre if (strcmp(drv_name, vds->driver_types[i].name) == 0) { 65248fce2fd6Sachartre drv_type = vds->driver_types[i].type; 65258fce2fd6Sachartre goto done; 65268fce2fd6Sachartre } 65278fce2fd6Sachartre } 65288fce2fd6Sachartre 65298fce2fd6Sachartre /* check built-in list */ 65308fce2fd6Sachartre for (i = 0; i < VDS_NUM_DRIVERS; i++) { 65318fce2fd6Sachartre if (strcmp(drv_name, vds_driver_types[i].name) == 0) { 65328fce2fd6Sachartre drv_type = vds_driver_types[i].type; 65338fce2fd6Sachartre goto done; 65348fce2fd6Sachartre } 65358fce2fd6Sachartre } 65368fce2fd6Sachartre 65378fce2fd6Sachartre done: 65388fce2fd6Sachartre PR0("driver %s identified as %s", drv_name, 65398fce2fd6Sachartre (drv_type == VD_DRIVER_DISK)? "DISK" : 65408fce2fd6Sachartre (drv_type == VD_DRIVER_VOLUME)? "VOLUME" : "UNKNOWN"); 65418fce2fd6Sachartre 65421aff8f07SAlexandre Chartre if (strcmp(drv_name, "zfs") == 0) 65431aff8f07SAlexandre Chartre vd->zvol = B_TRUE; 65441aff8f07SAlexandre Chartre 65458fce2fd6Sachartre *dtype = drv_type; 65468fce2fd6Sachartre 65478fce2fd6Sachartre return (0); 65488fce2fd6Sachartre } 65498fce2fd6Sachartre 65501ae08745Sheppo static int 6551047ba61eSachartre vd_setup_vd(vd_t *vd) 6552047ba61eSachartre { 65538fce2fd6Sachartre int status, drv_type, pseudo; 6554047ba61eSachartre dev_info_t *dip; 6555047ba61eSachartre vnode_t *vnp; 6556047ba61eSachartre char *path = vd->device_path; 655783990c4aSAlexandre Chartre char tq_name[TASKQ_NAMELEN]; 6558047ba61eSachartre 6559047ba61eSachartre /* make sure the vdisk backend is valid */ 6560047ba61eSachartre if ((status = lookupname(path, UIO_SYSSPACE, 6561047ba61eSachartre FOLLOW, NULLVPP, &vnp)) != 0) { 6562047ba61eSachartre PR0("Cannot lookup %s errno %d", path, status); 6563047ba61eSachartre goto done; 6564047ba61eSachartre } 6565047ba61eSachartre 6566047ba61eSachartre switch (vnp->v_type) { 6567047ba61eSachartre case VREG: 6568047ba61eSachartre /* 6569047ba61eSachartre * Backend is a file so it is exported as a full disk or as a 6570047ba61eSachartre * single slice disk using the vnode interface. 6571047ba61eSachartre */ 6572047ba61eSachartre VN_RELE(vnp); 65738fce2fd6Sachartre vd->volume = B_FALSE; 6574047ba61eSachartre status = vd_setup_backend_vnode(vd); 6575047ba61eSachartre break; 6576047ba61eSachartre 6577047ba61eSachartre case VBLK: 6578047ba61eSachartre case VCHR: 6579047ba61eSachartre /* 65801aff8f07SAlexandre Chartre * Backend is a device. In that case, it is exported using the 65811aff8f07SAlexandre Chartre * LDI interface, and it is exported either as a single-slice 65821aff8f07SAlexandre Chartre * disk or as a full disk depending on the "slice" option and 65831aff8f07SAlexandre Chartre * on the type of device. 6584047ba61eSachartre * 65851aff8f07SAlexandre Chartre * - A volume device is exported as a single-slice disk if the 65861aff8f07SAlexandre Chartre * "slice" is specified, otherwise it is exported as a full 65871aff8f07SAlexandre Chartre * disk. 6588047ba61eSachartre * 6589047ba61eSachartre * - A disk slice (different from slice 2) is always exported 6590047ba61eSachartre * as a single slice disk using the LDI interface. 6591047ba61eSachartre * 6592047ba61eSachartre * - The slice 2 of a disk is exported as a single slice disk 6593047ba61eSachartre * if the "slice" option is specified, otherwise the entire 65941aff8f07SAlexandre Chartre * disk will be exported. 65951aff8f07SAlexandre Chartre * 65961aff8f07SAlexandre Chartre * - The slice of a CD or DVD is exported as single slice disk 65971aff8f07SAlexandre Chartre * if the "slice" option is specified, otherwise the entire 65981aff8f07SAlexandre Chartre * disk will be exported. 6599047ba61eSachartre */ 6600047ba61eSachartre 6601047ba61eSachartre /* check if this is a pseudo device */ 6602047ba61eSachartre if ((dip = ddi_hold_devi_by_instance(getmajor(vnp->v_rdev), 6603047ba61eSachartre dev_to_instance(vnp->v_rdev), 0)) == NULL) { 6604047ba61eSachartre PRN("%s is no longer accessible", path); 6605047ba61eSachartre VN_RELE(vnp); 6606047ba61eSachartre status = EIO; 6607047ba61eSachartre break; 6608047ba61eSachartre } 66098fce2fd6Sachartre pseudo = is_pseudo_device(dip); 6610047ba61eSachartre ddi_release_devi(dip); 6611047ba61eSachartre VN_RELE(vnp); 6612047ba61eSachartre 6613d753835aSZach Kissel if ((status = vd_identify_dev(vd, &drv_type)) != 0) { 6614d753835aSZach Kissel if (status != ENODEV && status != ENXIO && 6615d753835aSZach Kissel status != ENOENT && status != EROFS) { 6616d753835aSZach Kissel PRN("%s identification failed with status %d", 6617d753835aSZach Kissel path, status); 66188fce2fd6Sachartre status = EIO; 6619d753835aSZach Kissel } 66208fce2fd6Sachartre break; 66218fce2fd6Sachartre } 66228fce2fd6Sachartre 66238fce2fd6Sachartre /* 66248fce2fd6Sachartre * If the driver hasn't been identified then we consider that 66258fce2fd6Sachartre * pseudo devices are volumes and other devices are disks. 66268fce2fd6Sachartre */ 66278fce2fd6Sachartre if (drv_type == VD_DRIVER_VOLUME || 66288fce2fd6Sachartre (drv_type == VD_DRIVER_UNKNOWN && pseudo)) { 66298fce2fd6Sachartre vd->volume = B_TRUE; 66302f5224aeSachartre } 66312f5224aeSachartre 6632047ba61eSachartre /* 66338fce2fd6Sachartre * If this is a volume device then its usage depends if the 6634047ba61eSachartre * "slice" option is set or not. If the "slice" option is set 66358fce2fd6Sachartre * then the volume device will be exported as a single slice, 6636047ba61eSachartre * otherwise it will be exported as a full disk. 66372f5224aeSachartre * 66382f5224aeSachartre * For backward compatibility, if vd_volume_force_slice is set 66398fce2fd6Sachartre * then we always export volume devices as slices. 6640047ba61eSachartre */ 66411aff8f07SAlexandre Chartre if (vd->volume && vd_volume_force_slice) { 66422f5224aeSachartre vd->vdisk_type = VD_DISK_TYPE_SLICE; 66432f5224aeSachartre vd->nslices = 1; 66442f5224aeSachartre } 66452f5224aeSachartre 6646047ba61eSachartre status = vd_setup_backend_ldi(vd); 6647047ba61eSachartre break; 6648047ba61eSachartre 6649047ba61eSachartre default: 6650047ba61eSachartre PRN("Unsupported vdisk backend %s", path); 6651047ba61eSachartre VN_RELE(vnp); 6652047ba61eSachartre status = EBADF; 6653047ba61eSachartre } 6654047ba61eSachartre 6655047ba61eSachartre done: 6656047ba61eSachartre if (status != 0) { 6657047ba61eSachartre /* 6658047ba61eSachartre * If the error is retryable print an error message only 6659047ba61eSachartre * during the first try. 6660047ba61eSachartre */ 6661047ba61eSachartre if (status == ENXIO || status == ENODEV || 6662047ba61eSachartre status == ENOENT || status == EROFS) { 6663047ba61eSachartre if (!(vd->initialized & VD_SETUP_ERROR)) { 6664047ba61eSachartre PRN("%s is currently inaccessible (error %d)", 6665047ba61eSachartre path, status); 6666047ba61eSachartre } 6667047ba61eSachartre status = EAGAIN; 6668047ba61eSachartre } else { 6669047ba61eSachartre PRN("%s can not be exported as a virtual disk " 6670047ba61eSachartre "(error %d)", path, status); 6671047ba61eSachartre } 6672047ba61eSachartre vd->initialized |= VD_SETUP_ERROR; 6673047ba61eSachartre 6674047ba61eSachartre } else if (vd->initialized & VD_SETUP_ERROR) { 6675047ba61eSachartre /* print a message only if we previously had an error */ 6676047ba61eSachartre PRN("%s is now online", path); 6677047ba61eSachartre vd->initialized &= ~VD_SETUP_ERROR; 6678047ba61eSachartre } 6679047ba61eSachartre 668083990c4aSAlexandre Chartre /* 668183990c4aSAlexandre Chartre * For file or ZFS volume we also need an I/O queue. 668283990c4aSAlexandre Chartre * 668383990c4aSAlexandre Chartre * The I/O task queue is initialized here and not in vds_do_init_vd() 668483990c4aSAlexandre Chartre * (as the start and completion queues) because vd_setup_vd() will be 668583990c4aSAlexandre Chartre * call again if the backend is not available, and we need to know if 668683990c4aSAlexandre Chartre * the backend is a ZFS volume or a file. 668783990c4aSAlexandre Chartre */ 668883990c4aSAlexandre Chartre if ((vd->file || vd->zvol) && vd->ioq == NULL) { 668983990c4aSAlexandre Chartre (void) snprintf(tq_name, sizeof (tq_name), "vd_ioq%lu", vd->id); 669083990c4aSAlexandre Chartre 669183990c4aSAlexandre Chartre if ((vd->ioq = ddi_taskq_create(vd->vds->dip, tq_name, 669283990c4aSAlexandre Chartre vd_ioq_nthreads, TASKQ_DEFAULTPRI, 0)) == NULL) { 669383990c4aSAlexandre Chartre PRN("Could not create io task queue"); 669483990c4aSAlexandre Chartre return (EIO); 669583990c4aSAlexandre Chartre } 669683990c4aSAlexandre Chartre } 669783990c4aSAlexandre Chartre 6698047ba61eSachartre return (status); 6699047ba61eSachartre } 6700047ba61eSachartre 6701047ba61eSachartre static int 6702047ba61eSachartre vds_do_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t options, 6703047ba61eSachartre uint64_t ldc_id, vd_t **vdp) 67041ae08745Sheppo { 67051ae08745Sheppo char tq_name[TASKQ_NAMELEN]; 67060a55fbb7Slm66018 int status; 67071ae08745Sheppo ddi_iblock_cookie_t iblock = NULL; 67081ae08745Sheppo ldc_attr_t ldc_attr; 67091ae08745Sheppo vd_t *vd; 67101ae08745Sheppo 67111ae08745Sheppo 67121ae08745Sheppo ASSERT(vds != NULL); 6713e1ebb9ecSlm66018 ASSERT(device_path != NULL); 67141ae08745Sheppo ASSERT(vdp != NULL); 6715e1ebb9ecSlm66018 PR0("Adding vdisk for %s", device_path); 67161ae08745Sheppo 67171ae08745Sheppo if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 67181ae08745Sheppo PRN("No memory for virtual disk"); 67191ae08745Sheppo return (EAGAIN); 67201ae08745Sheppo } 67211ae08745Sheppo *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 672283990c4aSAlexandre Chartre vd->id = id; 67231ae08745Sheppo vd->vds = vds; 67243c96341aSnarayan (void) strncpy(vd->device_path, device_path, MAXPATHLEN); 67251ae08745Sheppo 6726047ba61eSachartre /* Setup open flags */ 6727047ba61eSachartre vd->open_flags = FREAD; 6728047ba61eSachartre 6729047ba61eSachartre if (!(options & VD_OPT_RDONLY)) 6730047ba61eSachartre vd->open_flags |= FWRITE; 6731047ba61eSachartre 6732047ba61eSachartre if (options & VD_OPT_EXCLUSIVE) 6733047ba61eSachartre vd->open_flags |= FEXCL; 6734047ba61eSachartre 6735047ba61eSachartre /* Setup disk type */ 6736047ba61eSachartre if (options & VD_OPT_SLICE) { 6737047ba61eSachartre vd->vdisk_type = VD_DISK_TYPE_SLICE; 6738047ba61eSachartre vd->nslices = 1; 6739047ba61eSachartre } else { 6740047ba61eSachartre vd->vdisk_type = VD_DISK_TYPE_DISK; 6741047ba61eSachartre vd->nslices = V_NUMPAR; 6742047ba61eSachartre } 6743047ba61eSachartre 6744047ba61eSachartre /* default disk label */ 6745047ba61eSachartre vd->vdisk_label = VD_DISK_LABEL_UNK; 6746047ba61eSachartre 67470a55fbb7Slm66018 /* Open vdisk and initialize parameters */ 67483c96341aSnarayan if ((status = vd_setup_vd(vd)) == 0) { 67493c96341aSnarayan vd->initialized |= VD_DISK_READY; 67501ae08745Sheppo 67513c96341aSnarayan ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 67528fce2fd6Sachartre PR0("vdisk_type = %s, volume = %s, file = %s, nslices = %u", 67533c96341aSnarayan ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 67548fce2fd6Sachartre (vd->volume ? "yes" : "no"), (vd->file ? "yes" : "no"), 67553c96341aSnarayan vd->nslices); 67563c96341aSnarayan } else { 67573c96341aSnarayan if (status != EAGAIN) 67583c96341aSnarayan return (status); 67593c96341aSnarayan } 67601ae08745Sheppo 67611ae08745Sheppo /* Initialize locking */ 67621ae08745Sheppo if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 67631ae08745Sheppo &iblock) != DDI_SUCCESS) { 67641ae08745Sheppo PRN("Could not get iblock cookie."); 67651ae08745Sheppo return (EIO); 67661ae08745Sheppo } 67671ae08745Sheppo 67681ae08745Sheppo mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 67691ae08745Sheppo vd->initialized |= VD_LOCKING; 67701ae08745Sheppo 67711ae08745Sheppo 6772d10e4ef2Snarayan /* Create start and completion task queues for the vdisk */ 6773d10e4ef2Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id); 67741ae08745Sheppo PR1("tq_name = %s", tq_name); 6775d10e4ef2Snarayan if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1, 67761ae08745Sheppo TASKQ_DEFAULTPRI, 0)) == NULL) { 67771ae08745Sheppo PRN("Could not create task queue"); 67781ae08745Sheppo return (EIO); 67791ae08745Sheppo } 6780d10e4ef2Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id); 6781d10e4ef2Snarayan PR1("tq_name = %s", tq_name); 6782d10e4ef2Snarayan if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1, 6783d10e4ef2Snarayan TASKQ_DEFAULTPRI, 0)) == NULL) { 6784d10e4ef2Snarayan PRN("Could not create task queue"); 6785d10e4ef2Snarayan return (EIO); 6786d10e4ef2Snarayan } 67875b98b509Sachartre 67885b98b509Sachartre /* Allocate the staging buffer */ 67895b98b509Sachartre vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 67905b98b509Sachartre vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 67915b98b509Sachartre 6792d10e4ef2Snarayan vd->enabled = 1; /* before callback can dispatch to startq */ 67931ae08745Sheppo 67941ae08745Sheppo 67951ae08745Sheppo /* Bring up LDC */ 67961ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK_SVC; 67971ae08745Sheppo ldc_attr.instance = ddi_get_instance(vds->dip); 67981ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; 6799e1ebb9ecSlm66018 ldc_attr.mtu = VD_LDC_MTU; 68001ae08745Sheppo if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 680117cadca8Slm66018 PRN("Could not initialize LDC channel %lx, " 6802690555a1Sachartre "init failed with error %d", ldc_id, status); 68031ae08745Sheppo return (status); 68041ae08745Sheppo } 68051ae08745Sheppo vd->initialized |= VD_LDC; 68061ae08745Sheppo 68071ae08745Sheppo if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 68081ae08745Sheppo (caddr_t)vd)) != 0) { 6809690555a1Sachartre PRN("Could not initialize LDC channel %lu," 6810690555a1Sachartre "reg_callback failed with error %d", ldc_id, status); 68111ae08745Sheppo return (status); 68121ae08745Sheppo } 68131ae08745Sheppo 68141ae08745Sheppo if ((status = ldc_open(vd->ldc_handle)) != 0) { 6815690555a1Sachartre PRN("Could not initialize LDC channel %lu," 6816690555a1Sachartre "open failed with error %d", ldc_id, status); 68171ae08745Sheppo return (status); 68181ae08745Sheppo } 68191ae08745Sheppo 68203af08d82Slm66018 if ((status = ldc_up(vd->ldc_handle)) != 0) { 682134683adeSsg70180 PR0("ldc_up() returned errno %d", status); 68223af08d82Slm66018 } 68233af08d82Slm66018 68244bac2208Snarayan /* Allocate the inband task memory handle */ 68254bac2208Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, &(vd->inband_task.mhdl)); 68264bac2208Snarayan if (status) { 6827690555a1Sachartre PRN("Could not initialize LDC channel %lu," 6828690555a1Sachartre "alloc_handle failed with error %d", ldc_id, status); 68294bac2208Snarayan return (ENXIO); 68304bac2208Snarayan } 68311ae08745Sheppo 68321ae08745Sheppo /* Add the successfully-initialized vdisk to the server's table */ 68331ae08745Sheppo if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 68341ae08745Sheppo PRN("Error adding vdisk ID %lu to table", id); 68351ae08745Sheppo return (EIO); 68361ae08745Sheppo } 68371ae08745Sheppo 68383af08d82Slm66018 /* store initial state */ 68393af08d82Slm66018 vd->state = VD_STATE_INIT; 68403af08d82Slm66018 68411ae08745Sheppo return (0); 68421ae08745Sheppo } 68431ae08745Sheppo 68443af08d82Slm66018 static void 68453af08d82Slm66018 vd_free_dring_task(vd_t *vdp) 68463af08d82Slm66018 { 68473af08d82Slm66018 if (vdp->dring_task != NULL) { 68483af08d82Slm66018 ASSERT(vdp->dring_len != 0); 68493af08d82Slm66018 /* Free all dring_task memory handles */ 68503af08d82Slm66018 for (int i = 0; i < vdp->dring_len; i++) { 68513af08d82Slm66018 (void) ldc_mem_free_handle(vdp->dring_task[i].mhdl); 68525b7cb889Sha137994 kmem_free(vdp->dring_task[i].request, 68535b7cb889Sha137994 (vdp->descriptor_size - 68545b7cb889Sha137994 sizeof (vio_dring_entry_hdr_t))); 68555b7cb889Sha137994 vdp->dring_task[i].request = NULL; 68563af08d82Slm66018 kmem_free(vdp->dring_task[i].msg, vdp->max_msglen); 68573af08d82Slm66018 vdp->dring_task[i].msg = NULL; 68583af08d82Slm66018 } 68593af08d82Slm66018 kmem_free(vdp->dring_task, 68603af08d82Slm66018 (sizeof (*vdp->dring_task)) * vdp->dring_len); 68613af08d82Slm66018 vdp->dring_task = NULL; 68623af08d82Slm66018 } 686383990c4aSAlexandre Chartre 686483990c4aSAlexandre Chartre if (vdp->write_queue != NULL) { 686583990c4aSAlexandre Chartre kmem_free(vdp->write_queue, sizeof (buf_t *) * vdp->dring_len); 686683990c4aSAlexandre Chartre vdp->write_queue = NULL; 686783990c4aSAlexandre Chartre } 68683af08d82Slm66018 } 68693af08d82Slm66018 68701ae08745Sheppo /* 68711ae08745Sheppo * Destroy the state associated with a virtual disk 68721ae08745Sheppo */ 68731ae08745Sheppo static void 68741ae08745Sheppo vds_destroy_vd(void *arg) 68751ae08745Sheppo { 68761ae08745Sheppo vd_t *vd = (vd_t *)arg; 687734683adeSsg70180 int retry = 0, rv; 68781ae08745Sheppo 68791ae08745Sheppo if (vd == NULL) 68801ae08745Sheppo return; 68811ae08745Sheppo 6882d10e4ef2Snarayan PR0("Destroying vdisk state"); 6883d10e4ef2Snarayan 68841ae08745Sheppo /* Disable queuing requests for the vdisk */ 68851ae08745Sheppo if (vd->initialized & VD_LOCKING) { 68861ae08745Sheppo mutex_enter(&vd->lock); 68871ae08745Sheppo vd->enabled = 0; 68881ae08745Sheppo mutex_exit(&vd->lock); 68891ae08745Sheppo } 68901ae08745Sheppo 689183990c4aSAlexandre Chartre /* Drain and destroy start queue (*before* destroying ioq) */ 6892d10e4ef2Snarayan if (vd->startq != NULL) 6893d10e4ef2Snarayan ddi_taskq_destroy(vd->startq); /* waits for queued tasks */ 6894d10e4ef2Snarayan 689583990c4aSAlexandre Chartre /* Drain and destroy the I/O queue (*before* destroying completionq) */ 689683990c4aSAlexandre Chartre if (vd->ioq != NULL) 689783990c4aSAlexandre Chartre ddi_taskq_destroy(vd->ioq); 689883990c4aSAlexandre Chartre 6899d10e4ef2Snarayan /* Drain and destroy completion queue (*before* shutting down LDC) */ 6900d10e4ef2Snarayan if (vd->completionq != NULL) 6901d10e4ef2Snarayan ddi_taskq_destroy(vd->completionq); /* waits for tasks */ 6902d10e4ef2Snarayan 69033af08d82Slm66018 vd_free_dring_task(vd); 69043af08d82Slm66018 690534683adeSsg70180 /* Free the inband task memory handle */ 690634683adeSsg70180 (void) ldc_mem_free_handle(vd->inband_task.mhdl); 690734683adeSsg70180 690834683adeSsg70180 /* Shut down LDC */ 690934683adeSsg70180 if (vd->initialized & VD_LDC) { 691034683adeSsg70180 /* unmap the dring */ 691134683adeSsg70180 if (vd->initialized & VD_DRING) 691234683adeSsg70180 (void) ldc_mem_dring_unmap(vd->dring_handle); 691334683adeSsg70180 691434683adeSsg70180 /* close LDC channel - retry on EAGAIN */ 691534683adeSsg70180 while ((rv = ldc_close(vd->ldc_handle)) == EAGAIN) { 691634683adeSsg70180 if (++retry > vds_ldc_retries) { 691734683adeSsg70180 PR0("Timed out closing channel"); 691834683adeSsg70180 break; 691934683adeSsg70180 } 692034683adeSsg70180 drv_usecwait(vds_ldc_delay); 692134683adeSsg70180 } 692234683adeSsg70180 if (rv == 0) { 692334683adeSsg70180 (void) ldc_unreg_callback(vd->ldc_handle); 692434683adeSsg70180 (void) ldc_fini(vd->ldc_handle); 692534683adeSsg70180 } else { 692634683adeSsg70180 /* 692734683adeSsg70180 * Closing the LDC channel has failed. Ideally we should 692834683adeSsg70180 * fail here but there is no Zeus level infrastructure 692934683adeSsg70180 * to handle this. The MD has already been changed and 693034683adeSsg70180 * we have to do the close. So we try to do as much 693134683adeSsg70180 * clean up as we can. 693234683adeSsg70180 */ 693334683adeSsg70180 (void) ldc_set_cb_mode(vd->ldc_handle, LDC_CB_DISABLE); 693434683adeSsg70180 while (ldc_unreg_callback(vd->ldc_handle) == EAGAIN) 693534683adeSsg70180 drv_usecwait(vds_ldc_delay); 693634683adeSsg70180 } 693734683adeSsg70180 } 693834683adeSsg70180 69393af08d82Slm66018 /* Free the staging buffer for msgs */ 69403af08d82Slm66018 if (vd->vio_msgp != NULL) { 69413af08d82Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 69423af08d82Slm66018 vd->vio_msgp = NULL; 69433af08d82Slm66018 } 69443af08d82Slm66018 69453af08d82Slm66018 /* Free the inband message buffer */ 69463af08d82Slm66018 if (vd->inband_task.msg != NULL) { 69473af08d82Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 69483af08d82Slm66018 vd->inband_task.msg = NULL; 6949d10e4ef2Snarayan } 6950da6c28aaSamw 69513c96341aSnarayan if (vd->file) { 6952690555a1Sachartre /* Close file */ 6953047ba61eSachartre (void) VOP_CLOSE(vd->file_vnode, vd->open_flags, 1, 6954da6c28aaSamw 0, kcred, NULL); 69553c96341aSnarayan VN_RELE(vd->file_vnode); 69563c96341aSnarayan } else { 69571ae08745Sheppo /* Close any open backing-device slices */ 6958bae9e67eSachartre for (uint_t slice = 0; slice < V_NUMPAR; slice++) { 69591ae08745Sheppo if (vd->ldi_handle[slice] != NULL) { 69601ae08745Sheppo PR0("Closing slice %u", slice); 69611ae08745Sheppo (void) ldi_close(vd->ldi_handle[slice], 6962047ba61eSachartre vd->open_flags, kcred); 69631ae08745Sheppo } 69641ae08745Sheppo } 69653c96341aSnarayan } 69661ae08745Sheppo 69671aff8f07SAlexandre Chartre /* Free disk image devid */ 69681aff8f07SAlexandre Chartre if (vd->dskimg_devid != NULL) 69691aff8f07SAlexandre Chartre ddi_devid_free(vd->dskimg_devid); 69701aff8f07SAlexandre Chartre 6971bae9e67eSachartre /* Free any fake label */ 6972bae9e67eSachartre if (vd->flabel) { 6973bae9e67eSachartre kmem_free(vd->flabel, vd->flabel_size); 6974bae9e67eSachartre vd->flabel = NULL; 6975bae9e67eSachartre vd->flabel_size = 0; 6976bae9e67eSachartre } 6977bae9e67eSachartre 69781ae08745Sheppo /* Free lock */ 69791ae08745Sheppo if (vd->initialized & VD_LOCKING) 69801ae08745Sheppo mutex_destroy(&vd->lock); 69811ae08745Sheppo 69821ae08745Sheppo /* Finally, free the vdisk structure itself */ 69831ae08745Sheppo kmem_free(vd, sizeof (*vd)); 69841ae08745Sheppo } 69851ae08745Sheppo 69861ae08745Sheppo static int 6987047ba61eSachartre vds_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t options, 6988047ba61eSachartre uint64_t ldc_id) 69891ae08745Sheppo { 69901ae08745Sheppo int status; 69911ae08745Sheppo vd_t *vd = NULL; 69921ae08745Sheppo 69931ae08745Sheppo 6994047ba61eSachartre if ((status = vds_do_init_vd(vds, id, device_path, options, 6995047ba61eSachartre ldc_id, &vd)) != 0) 69961ae08745Sheppo vds_destroy_vd(vd); 69971ae08745Sheppo 69981ae08745Sheppo return (status); 69991ae08745Sheppo } 70001ae08745Sheppo 70011ae08745Sheppo static int 70021ae08745Sheppo vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 70031ae08745Sheppo uint64_t *ldc_id) 70041ae08745Sheppo { 70051ae08745Sheppo int num_channels; 70061ae08745Sheppo 70071ae08745Sheppo 70081ae08745Sheppo /* Look for channel endpoint child(ren) of the vdisk MD node */ 70091ae08745Sheppo if ((num_channels = md_scan_dag(md, vd_node, 70101ae08745Sheppo md_find_name(md, VD_CHANNEL_ENDPOINT), 70111ae08745Sheppo md_find_name(md, "fwd"), channel)) <= 0) { 70121ae08745Sheppo PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 70131ae08745Sheppo return (-1); 70141ae08745Sheppo } 70151ae08745Sheppo 70161ae08745Sheppo /* Get the "id" value for the first channel endpoint node */ 70171ae08745Sheppo if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 70181ae08745Sheppo PRN("No \"%s\" property found for \"%s\" of vdisk", 70191ae08745Sheppo VD_ID_PROP, VD_CHANNEL_ENDPOINT); 70201ae08745Sheppo return (-1); 70211ae08745Sheppo } 70221ae08745Sheppo 70231ae08745Sheppo if (num_channels > 1) { 70241ae08745Sheppo PRN("Using ID of first of multiple channels for this vdisk"); 70251ae08745Sheppo } 70261ae08745Sheppo 70271ae08745Sheppo return (0); 70281ae08745Sheppo } 70291ae08745Sheppo 70301ae08745Sheppo static int 70311ae08745Sheppo vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 70321ae08745Sheppo { 70331ae08745Sheppo int num_nodes, status; 70341ae08745Sheppo size_t size; 70351ae08745Sheppo mde_cookie_t *channel; 70361ae08745Sheppo 70371ae08745Sheppo 70381ae08745Sheppo if ((num_nodes = md_node_count(md)) <= 0) { 70391ae08745Sheppo PRN("Invalid node count in Machine Description subtree"); 70401ae08745Sheppo return (-1); 70411ae08745Sheppo } 70421ae08745Sheppo size = num_nodes*(sizeof (*channel)); 70431ae08745Sheppo channel = kmem_zalloc(size, KM_SLEEP); 70441ae08745Sheppo status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 70451ae08745Sheppo kmem_free(channel, size); 70461ae08745Sheppo 70471ae08745Sheppo return (status); 70481ae08745Sheppo } 70491ae08745Sheppo 7050047ba61eSachartre /* 7051047ba61eSachartre * Function: 7052047ba61eSachartre * vds_get_options 7053047ba61eSachartre * 7054047ba61eSachartre * Description: 7055047ba61eSachartre * Parse the options of a vds node. Options are defined as an array 7056047ba61eSachartre * of strings in the vds-block-device-opts property of the vds node 7057047ba61eSachartre * in the machine description. Options are returned as a bitmask. The 7058047ba61eSachartre * mapping between the bitmask options and the options strings from the 7059047ba61eSachartre * machine description is defined in the vd_bdev_options[] array. 7060047ba61eSachartre * 7061047ba61eSachartre * The vds-block-device-opts property is optional. If a vds has no such 7062047ba61eSachartre * property then no option is defined. 7063047ba61eSachartre * 7064047ba61eSachartre * Parameters: 7065047ba61eSachartre * md - machine description. 7066047ba61eSachartre * vd_node - vds node in the machine description for which 7067047ba61eSachartre * options have to be parsed. 7068047ba61eSachartre * options - the returned options. 7069047ba61eSachartre * 7070047ba61eSachartre * Return Code: 7071047ba61eSachartre * none. 7072047ba61eSachartre */ 7073047ba61eSachartre static void 7074047ba61eSachartre vds_get_options(md_t *md, mde_cookie_t vd_node, uint64_t *options) 7075047ba61eSachartre { 7076047ba61eSachartre char *optstr, *opt; 7077047ba61eSachartre int len, n, i; 7078047ba61eSachartre 7079047ba61eSachartre *options = 0; 7080047ba61eSachartre 7081047ba61eSachartre if (md_get_prop_data(md, vd_node, VD_BLOCK_DEVICE_OPTS, 7082047ba61eSachartre (uint8_t **)&optstr, &len) != 0) { 7083047ba61eSachartre PR0("No options found"); 7084047ba61eSachartre return; 7085047ba61eSachartre } 7086047ba61eSachartre 7087047ba61eSachartre /* parse options */ 7088047ba61eSachartre opt = optstr; 7089047ba61eSachartre n = sizeof (vd_bdev_options) / sizeof (vd_option_t); 7090047ba61eSachartre 7091047ba61eSachartre while (opt < optstr + len) { 7092047ba61eSachartre for (i = 0; i < n; i++) { 7093047ba61eSachartre if (strncmp(vd_bdev_options[i].vdo_name, 7094047ba61eSachartre opt, VD_OPTION_NLEN) == 0) { 7095047ba61eSachartre *options |= vd_bdev_options[i].vdo_value; 7096047ba61eSachartre break; 7097047ba61eSachartre } 7098047ba61eSachartre } 7099047ba61eSachartre 7100047ba61eSachartre if (i < n) { 7101047ba61eSachartre PR0("option: %s", opt); 7102047ba61eSachartre } else { 7103047ba61eSachartre PRN("option %s is unknown or unsupported", opt); 7104047ba61eSachartre } 7105047ba61eSachartre 7106047ba61eSachartre opt += strlen(opt) + 1; 7107047ba61eSachartre } 7108047ba61eSachartre } 7109047ba61eSachartre 71101ae08745Sheppo static void 71118fce2fd6Sachartre vds_driver_types_free(vds_t *vds) 71128fce2fd6Sachartre { 71138fce2fd6Sachartre if (vds->driver_types != NULL) { 71148fce2fd6Sachartre kmem_free(vds->driver_types, sizeof (vd_driver_type_t) * 71158fce2fd6Sachartre vds->num_drivers); 71168fce2fd6Sachartre vds->driver_types = NULL; 71178fce2fd6Sachartre vds->num_drivers = 0; 71188fce2fd6Sachartre } 71198fce2fd6Sachartre } 71208fce2fd6Sachartre 71218fce2fd6Sachartre /* 71228fce2fd6Sachartre * Update the driver type list with information from vds.conf. 71238fce2fd6Sachartre */ 71248fce2fd6Sachartre static void 71258fce2fd6Sachartre vds_driver_types_update(vds_t *vds) 71268fce2fd6Sachartre { 71278fce2fd6Sachartre char **list, *s; 71288fce2fd6Sachartre uint_t i, num, count = 0, len; 71298fce2fd6Sachartre 71308fce2fd6Sachartre if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vds->dip, 71318fce2fd6Sachartre DDI_PROP_DONTPASS, "driver-type-list", &list, &num) != 71328fce2fd6Sachartre DDI_PROP_SUCCESS) 71338fce2fd6Sachartre return; 71348fce2fd6Sachartre 71358fce2fd6Sachartre /* 71368fce2fd6Sachartre * We create a driver_types list with as many as entries as there 71378fce2fd6Sachartre * is in the driver-type-list from vds.conf. However only valid 71388fce2fd6Sachartre * entries will be populated (i.e. entries from driver-type-list 71398fce2fd6Sachartre * with a valid syntax). Invalid entries will be left blank so 71408fce2fd6Sachartre * they will have no driver name and the driver type will be 71418fce2fd6Sachartre * VD_DRIVER_UNKNOWN (= 0). 71428fce2fd6Sachartre */ 71438fce2fd6Sachartre vds->num_drivers = num; 71448fce2fd6Sachartre vds->driver_types = kmem_zalloc(sizeof (vd_driver_type_t) * num, 71458fce2fd6Sachartre KM_SLEEP); 71468fce2fd6Sachartre 71478fce2fd6Sachartre for (i = 0; i < num; i++) { 71488fce2fd6Sachartre 71498fce2fd6Sachartre s = strchr(list[i], ':'); 71508fce2fd6Sachartre 71518fce2fd6Sachartre if (s == NULL) { 71528fce2fd6Sachartre PRN("vds.conf: driver-type-list, entry %d (%s): " 71538fce2fd6Sachartre "a colon is expected in the entry", 71548fce2fd6Sachartre i, list[i]); 71558fce2fd6Sachartre continue; 71568fce2fd6Sachartre } 71578fce2fd6Sachartre 71588fce2fd6Sachartre len = (uintptr_t)s - (uintptr_t)list[i]; 71598fce2fd6Sachartre 71608fce2fd6Sachartre if (len == 0) { 71618fce2fd6Sachartre PRN("vds.conf: driver-type-list, entry %d (%s): " 71628fce2fd6Sachartre "the driver name is empty", 71638fce2fd6Sachartre i, list[i]); 71648fce2fd6Sachartre continue; 71658fce2fd6Sachartre } 71668fce2fd6Sachartre 71678fce2fd6Sachartre if (len >= VD_DRIVER_NAME_LEN) { 71688fce2fd6Sachartre PRN("vds.conf: driver-type-list, entry %d (%s): " 71698fce2fd6Sachartre "the driver name is too long", 71708fce2fd6Sachartre i, list[i]); 71718fce2fd6Sachartre continue; 71728fce2fd6Sachartre } 71738fce2fd6Sachartre 71748fce2fd6Sachartre if (strcmp(s + 1, "disk") == 0) { 71758fce2fd6Sachartre 71768fce2fd6Sachartre vds->driver_types[i].type = VD_DRIVER_DISK; 71778fce2fd6Sachartre 71788fce2fd6Sachartre } else if (strcmp(s + 1, "volume") == 0) { 71798fce2fd6Sachartre 71808fce2fd6Sachartre vds->driver_types[i].type = VD_DRIVER_VOLUME; 71818fce2fd6Sachartre 71828fce2fd6Sachartre } else { 71838fce2fd6Sachartre PRN("vds.conf: driver-type-list, entry %d (%s): " 71848fce2fd6Sachartre "the driver type is invalid", 71858fce2fd6Sachartre i, list[i]); 71868fce2fd6Sachartre continue; 71878fce2fd6Sachartre } 71888fce2fd6Sachartre 71898fce2fd6Sachartre (void) strncpy(vds->driver_types[i].name, list[i], len); 71908fce2fd6Sachartre 71918fce2fd6Sachartre PR0("driver-type-list, entry %d (%s) added", 71928fce2fd6Sachartre i, list[i]); 71938fce2fd6Sachartre 71948fce2fd6Sachartre count++; 71958fce2fd6Sachartre } 71968fce2fd6Sachartre 71978fce2fd6Sachartre ddi_prop_free(list); 71988fce2fd6Sachartre 71998fce2fd6Sachartre if (count == 0) { 72008fce2fd6Sachartre /* nothing was added, clean up */ 72018fce2fd6Sachartre vds_driver_types_free(vds); 72028fce2fd6Sachartre } 72038fce2fd6Sachartre } 72048fce2fd6Sachartre 72058fce2fd6Sachartre static void 72061ae08745Sheppo vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 72071ae08745Sheppo { 7208e1ebb9ecSlm66018 char *device_path = NULL; 7209047ba61eSachartre uint64_t id = 0, ldc_id = 0, options = 0; 72101ae08745Sheppo 72111ae08745Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 72121ae08745Sheppo PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 72131ae08745Sheppo return; 72141ae08745Sheppo } 72151ae08745Sheppo PR0("Adding vdisk ID %lu", id); 72161ae08745Sheppo if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 7217e1ebb9ecSlm66018 &device_path) != 0) { 72181ae08745Sheppo PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 72191ae08745Sheppo return; 72201ae08745Sheppo } 72211ae08745Sheppo 7222047ba61eSachartre vds_get_options(md, vd_node, &options); 7223047ba61eSachartre 72241ae08745Sheppo if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 72251ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", id); 72261ae08745Sheppo return; 72271ae08745Sheppo } 72281ae08745Sheppo 7229047ba61eSachartre if (vds_init_vd(vds, id, device_path, options, ldc_id) != 0) { 72301ae08745Sheppo PRN("Failed to add vdisk ID %lu", id); 723117cadca8Slm66018 if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 723217cadca8Slm66018 PRN("No vDisk entry found for vdisk ID %lu", id); 72331ae08745Sheppo return; 72341ae08745Sheppo } 72351ae08745Sheppo } 72361ae08745Sheppo 72371ae08745Sheppo static void 72381ae08745Sheppo vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 72391ae08745Sheppo { 72401ae08745Sheppo uint64_t id = 0; 72411ae08745Sheppo 72421ae08745Sheppo 72431ae08745Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 72441ae08745Sheppo PRN("Unable to get \"%s\" property from vdisk's MD node", 72451ae08745Sheppo VD_ID_PROP); 72461ae08745Sheppo return; 72471ae08745Sheppo } 72481ae08745Sheppo PR0("Removing vdisk ID %lu", id); 72491ae08745Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 72501ae08745Sheppo PRN("No vdisk entry found for vdisk ID %lu", id); 72511ae08745Sheppo } 72521ae08745Sheppo 72531ae08745Sheppo static void 72541ae08745Sheppo vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 72551ae08745Sheppo md_t *curr_md, mde_cookie_t curr_vd_node) 72561ae08745Sheppo { 72571ae08745Sheppo char *curr_dev, *prev_dev; 7258047ba61eSachartre uint64_t curr_id = 0, curr_ldc_id = 0, curr_options = 0; 7259047ba61eSachartre uint64_t prev_id = 0, prev_ldc_id = 0, prev_options = 0; 72601ae08745Sheppo size_t len; 72611ae08745Sheppo 72621ae08745Sheppo 72631ae08745Sheppo /* Validate that vdisk ID has not changed */ 72641ae08745Sheppo if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 72651ae08745Sheppo PRN("Error getting previous vdisk \"%s\" property", 72661ae08745Sheppo VD_ID_PROP); 72671ae08745Sheppo return; 72681ae08745Sheppo } 72691ae08745Sheppo if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 72701ae08745Sheppo PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 72711ae08745Sheppo return; 72721ae08745Sheppo } 72731ae08745Sheppo if (curr_id != prev_id) { 72741ae08745Sheppo PRN("Not changing vdisk: ID changed from %lu to %lu", 72751ae08745Sheppo prev_id, curr_id); 72761ae08745Sheppo return; 72771ae08745Sheppo } 72781ae08745Sheppo 72791ae08745Sheppo /* Validate that LDC ID has not changed */ 72801ae08745Sheppo if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 72811ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", prev_id); 72821ae08745Sheppo return; 72831ae08745Sheppo } 72841ae08745Sheppo 72851ae08745Sheppo if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 72861ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", curr_id); 72871ae08745Sheppo return; 72881ae08745Sheppo } 72891ae08745Sheppo if (curr_ldc_id != prev_ldc_id) { 72900a55fbb7Slm66018 _NOTE(NOTREACHED); /* lint is confused */ 72911ae08745Sheppo PRN("Not changing vdisk: " 72921ae08745Sheppo "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 72931ae08745Sheppo return; 72941ae08745Sheppo } 72951ae08745Sheppo 72961ae08745Sheppo /* Determine whether device path has changed */ 72971ae08745Sheppo if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 72981ae08745Sheppo &prev_dev) != 0) { 72991ae08745Sheppo PRN("Error getting previous vdisk \"%s\"", 73001ae08745Sheppo VD_BLOCK_DEVICE_PROP); 73011ae08745Sheppo return; 73021ae08745Sheppo } 73031ae08745Sheppo if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 73041ae08745Sheppo &curr_dev) != 0) { 73051ae08745Sheppo PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 73061ae08745Sheppo return; 73071ae08745Sheppo } 73081ae08745Sheppo if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 73091ae08745Sheppo (strncmp(curr_dev, prev_dev, len) == 0)) 73101ae08745Sheppo return; /* no relevant (supported) change */ 73111ae08745Sheppo 7312047ba61eSachartre /* Validate that options have not changed */ 7313047ba61eSachartre vds_get_options(prev_md, prev_vd_node, &prev_options); 7314047ba61eSachartre vds_get_options(curr_md, curr_vd_node, &curr_options); 7315047ba61eSachartre if (prev_options != curr_options) { 7316047ba61eSachartre PRN("Not changing vdisk: options changed from %lx to %lx", 7317047ba61eSachartre prev_options, curr_options); 7318047ba61eSachartre return; 7319047ba61eSachartre } 7320047ba61eSachartre 73211ae08745Sheppo PR0("Changing vdisk ID %lu", prev_id); 73223af08d82Slm66018 73231ae08745Sheppo /* Remove old state, which will close vdisk and reset */ 73241ae08745Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 73251ae08745Sheppo PRN("No entry found for vdisk ID %lu", prev_id); 73263af08d82Slm66018 73271ae08745Sheppo /* Re-initialize vdisk with new state */ 7328047ba61eSachartre if (vds_init_vd(vds, curr_id, curr_dev, curr_options, 7329047ba61eSachartre curr_ldc_id) != 0) { 73301ae08745Sheppo PRN("Failed to change vdisk ID %lu", curr_id); 73311ae08745Sheppo return; 73321ae08745Sheppo } 73331ae08745Sheppo } 73341ae08745Sheppo 73351ae08745Sheppo static int 73361ae08745Sheppo vds_process_md(void *arg, mdeg_result_t *md) 73371ae08745Sheppo { 73381ae08745Sheppo int i; 73391ae08745Sheppo vds_t *vds = arg; 73401ae08745Sheppo 73411ae08745Sheppo 73421ae08745Sheppo if (md == NULL) 73431ae08745Sheppo return (MDEG_FAILURE); 73441ae08745Sheppo ASSERT(vds != NULL); 73451ae08745Sheppo 73461ae08745Sheppo for (i = 0; i < md->removed.nelem; i++) 73471ae08745Sheppo vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 73481ae08745Sheppo for (i = 0; i < md->match_curr.nelem; i++) 73491ae08745Sheppo vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 73501ae08745Sheppo md->match_curr.mdp, md->match_curr.mdep[i]); 73511ae08745Sheppo for (i = 0; i < md->added.nelem; i++) 73521ae08745Sheppo vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 73531ae08745Sheppo 73541ae08745Sheppo return (MDEG_SUCCESS); 73551ae08745Sheppo } 73561ae08745Sheppo 73573c96341aSnarayan 73581ae08745Sheppo static int 73591ae08745Sheppo vds_do_attach(dev_info_t *dip) 73601ae08745Sheppo { 7361445b4c2eSsb155480 int status, sz; 7362445b4c2eSsb155480 int cfg_handle; 73631ae08745Sheppo minor_t instance = ddi_get_instance(dip); 73641ae08745Sheppo vds_t *vds; 7365445b4c2eSsb155480 mdeg_prop_spec_t *pspecp; 7366445b4c2eSsb155480 mdeg_node_spec_t *ispecp; 73671ae08745Sheppo 73681ae08745Sheppo /* 73691ae08745Sheppo * The "cfg-handle" property of a vds node in an MD contains the MD's 73701ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 73711ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 73721ae08745Sheppo * the "reg" property on the node in the device tree it builds from 73731ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 73741ae08745Sheppo * "reg" property value to uniquely identify this device instance when 73751ae08745Sheppo * registering with the MD event-generation framework. If the "reg" 73761ae08745Sheppo * property cannot be found, the device tree state is presumably so 73771ae08745Sheppo * broken that there is no point in continuing. 73781ae08745Sheppo */ 7379445b4c2eSsb155480 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 7380445b4c2eSsb155480 VD_REG_PROP)) { 7381445b4c2eSsb155480 PRN("vds \"%s\" property does not exist", VD_REG_PROP); 73821ae08745Sheppo return (DDI_FAILURE); 73831ae08745Sheppo } 73841ae08745Sheppo 73851ae08745Sheppo /* Get the MD instance for later MDEG registration */ 73861ae08745Sheppo cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 7387445b4c2eSsb155480 VD_REG_PROP, -1); 73881ae08745Sheppo 73891ae08745Sheppo if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 73901ae08745Sheppo PRN("Could not allocate state for instance %u", instance); 73911ae08745Sheppo return (DDI_FAILURE); 73921ae08745Sheppo } 73931ae08745Sheppo 73941ae08745Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 73951ae08745Sheppo PRN("Could not get state for instance %u", instance); 73961ae08745Sheppo ddi_soft_state_free(vds_state, instance); 73971ae08745Sheppo return (DDI_FAILURE); 73981ae08745Sheppo } 73991ae08745Sheppo 74001ae08745Sheppo vds->dip = dip; 74011ae08745Sheppo vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 740287a7269eSachartre vds_destroy_vd, sizeof (void *)); 740387a7269eSachartre 74041ae08745Sheppo ASSERT(vds->vd_table != NULL); 74051ae08745Sheppo 74061ae08745Sheppo if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 74071ae08745Sheppo PRN("ldi_ident_from_dip() returned errno %d", status); 74081ae08745Sheppo return (DDI_FAILURE); 74091ae08745Sheppo } 74101ae08745Sheppo vds->initialized |= VDS_LDI; 74111ae08745Sheppo 74121ae08745Sheppo /* Register for MD updates */ 7413445b4c2eSsb155480 sz = sizeof (vds_prop_template); 7414445b4c2eSsb155480 pspecp = kmem_alloc(sz, KM_SLEEP); 7415445b4c2eSsb155480 bcopy(vds_prop_template, pspecp, sz); 7416445b4c2eSsb155480 7417445b4c2eSsb155480 VDS_SET_MDEG_PROP_INST(pspecp, cfg_handle); 7418445b4c2eSsb155480 7419445b4c2eSsb155480 /* initialize the complete prop spec structure */ 7420445b4c2eSsb155480 ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 7421445b4c2eSsb155480 ispecp->namep = "virtual-device"; 7422445b4c2eSsb155480 ispecp->specp = pspecp; 7423445b4c2eSsb155480 7424445b4c2eSsb155480 if (mdeg_register(ispecp, &vd_match, vds_process_md, vds, 74251ae08745Sheppo &vds->mdeg) != MDEG_SUCCESS) { 74261ae08745Sheppo PRN("Unable to register for MD updates"); 7427445b4c2eSsb155480 kmem_free(ispecp, sizeof (mdeg_node_spec_t)); 7428445b4c2eSsb155480 kmem_free(pspecp, sz); 74291ae08745Sheppo return (DDI_FAILURE); 74301ae08745Sheppo } 7431445b4c2eSsb155480 7432445b4c2eSsb155480 vds->ispecp = ispecp; 74331ae08745Sheppo vds->initialized |= VDS_MDEG; 74341ae08745Sheppo 74350a55fbb7Slm66018 /* Prevent auto-detaching so driver is available whenever MD changes */ 74360a55fbb7Slm66018 if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 74370a55fbb7Slm66018 DDI_PROP_SUCCESS) { 74380a55fbb7Slm66018 PRN("failed to set \"%s\" property for instance %u", 74390a55fbb7Slm66018 DDI_NO_AUTODETACH, instance); 74400a55fbb7Slm66018 } 74410a55fbb7Slm66018 74428fce2fd6Sachartre /* read any user defined driver types from conf file and update list */ 74438fce2fd6Sachartre vds_driver_types_update(vds); 74448fce2fd6Sachartre 74451ae08745Sheppo ddi_report_dev(dip); 74461ae08745Sheppo return (DDI_SUCCESS); 74471ae08745Sheppo } 74481ae08745Sheppo 74491ae08745Sheppo static int 74501ae08745Sheppo vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 74511ae08745Sheppo { 74521ae08745Sheppo int status; 74531ae08745Sheppo 74541ae08745Sheppo switch (cmd) { 74551ae08745Sheppo case DDI_ATTACH: 7456d10e4ef2Snarayan PR0("Attaching"); 74571ae08745Sheppo if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 74581ae08745Sheppo (void) vds_detach(dip, DDI_DETACH); 74591ae08745Sheppo return (status); 74601ae08745Sheppo case DDI_RESUME: 7461d10e4ef2Snarayan PR0("No action required for DDI_RESUME"); 74621ae08745Sheppo return (DDI_SUCCESS); 74631ae08745Sheppo default: 74641ae08745Sheppo return (DDI_FAILURE); 74651ae08745Sheppo } 74661ae08745Sheppo } 74671ae08745Sheppo 74681ae08745Sheppo static struct dev_ops vds_ops = { 74691ae08745Sheppo DEVO_REV, /* devo_rev */ 74701ae08745Sheppo 0, /* devo_refcnt */ 74711ae08745Sheppo ddi_no_info, /* devo_getinfo */ 74721ae08745Sheppo nulldev, /* devo_identify */ 74731ae08745Sheppo nulldev, /* devo_probe */ 74741ae08745Sheppo vds_attach, /* devo_attach */ 74751ae08745Sheppo vds_detach, /* devo_detach */ 74761ae08745Sheppo nodev, /* devo_reset */ 74771ae08745Sheppo NULL, /* devo_cb_ops */ 74781ae08745Sheppo NULL, /* devo_bus_ops */ 747919397407SSherry Moore nulldev, /* devo_power */ 748019397407SSherry Moore ddi_quiesce_not_needed, /* devo_quiesce */ 74811ae08745Sheppo }; 74821ae08745Sheppo 74831ae08745Sheppo static struct modldrv modldrv = { 74841ae08745Sheppo &mod_driverops, 7485205eeb1aSlm66018 "virtual disk server", 74861ae08745Sheppo &vds_ops, 74871ae08745Sheppo }; 74881ae08745Sheppo 74891ae08745Sheppo static struct modlinkage modlinkage = { 74901ae08745Sheppo MODREV_1, 74911ae08745Sheppo &modldrv, 74921ae08745Sheppo NULL 74931ae08745Sheppo }; 74941ae08745Sheppo 74951ae08745Sheppo 74961ae08745Sheppo int 74971ae08745Sheppo _init(void) 74981ae08745Sheppo { 749917cadca8Slm66018 int status; 7500d10e4ef2Snarayan 75011ae08745Sheppo if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 75021ae08745Sheppo return (status); 750317cadca8Slm66018 75041ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) { 75051ae08745Sheppo ddi_soft_state_fini(&vds_state); 75061ae08745Sheppo return (status); 75071ae08745Sheppo } 75081ae08745Sheppo 75091ae08745Sheppo return (0); 75101ae08745Sheppo } 75111ae08745Sheppo 75121ae08745Sheppo int 75131ae08745Sheppo _info(struct modinfo *modinfop) 75141ae08745Sheppo { 75151ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 75161ae08745Sheppo } 75171ae08745Sheppo 75181ae08745Sheppo int 75191ae08745Sheppo _fini(void) 75201ae08745Sheppo { 75211ae08745Sheppo int status; 75221ae08745Sheppo 75231ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 75241ae08745Sheppo return (status); 75251ae08745Sheppo ddi_soft_state_fini(&vds_state); 75261ae08745Sheppo return (0); 75271ae08745Sheppo } 7528