11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23edcc0754Sachartre * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo #pragma ident "%Z%%M% %I% %E% SMI" 281ae08745Sheppo 291ae08745Sheppo /* 301ae08745Sheppo * Virtual disk server 311ae08745Sheppo */ 321ae08745Sheppo 331ae08745Sheppo 341ae08745Sheppo #include <sys/types.h> 351ae08745Sheppo #include <sys/conf.h> 364bac2208Snarayan #include <sys/crc32.h> 371ae08745Sheppo #include <sys/ddi.h> 381ae08745Sheppo #include <sys/dkio.h> 391ae08745Sheppo #include <sys/file.h> 4017cadca8Slm66018 #include <sys/fs/hsfs_isospec.h> 411ae08745Sheppo #include <sys/mdeg.h> 422f5224aeSachartre #include <sys/mhd.h> 431ae08745Sheppo #include <sys/modhash.h> 441ae08745Sheppo #include <sys/note.h> 451ae08745Sheppo #include <sys/pathname.h> 46205eeb1aSlm66018 #include <sys/sdt.h> 471ae08745Sheppo #include <sys/sunddi.h> 481ae08745Sheppo #include <sys/sunldi.h> 491ae08745Sheppo #include <sys/sysmacros.h> 501ae08745Sheppo #include <sys/vio_common.h> 5117cadca8Slm66018 #include <sys/vio_util.h> 521ae08745Sheppo #include <sys/vdsk_mailbox.h> 531ae08745Sheppo #include <sys/vdsk_common.h> 541ae08745Sheppo #include <sys/vtoc.h> 553c96341aSnarayan #include <sys/vfs.h> 563c96341aSnarayan #include <sys/stat.h> 5787a7269eSachartre #include <sys/scsi/impl/uscsi.h> 58690555a1Sachartre #include <vm/seg_map.h> 591ae08745Sheppo 601ae08745Sheppo /* Virtual disk server initialization flags */ 61d10e4ef2Snarayan #define VDS_LDI 0x01 62d10e4ef2Snarayan #define VDS_MDEG 0x02 631ae08745Sheppo 641ae08745Sheppo /* Virtual disk server tunable parameters */ 653c96341aSnarayan #define VDS_RETRIES 5 663c96341aSnarayan #define VDS_LDC_DELAY 1000 /* 1 msecs */ 673c96341aSnarayan #define VDS_DEV_DELAY 10000000 /* 10 secs */ 681ae08745Sheppo #define VDS_NCHAINS 32 691ae08745Sheppo 701ae08745Sheppo /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 711ae08745Sheppo #define VDS_NAME "virtual-disk-server" 721ae08745Sheppo 731ae08745Sheppo #define VD_NAME "vd" 741ae08745Sheppo #define VD_VOLUME_NAME "vdisk" 751ae08745Sheppo #define VD_ASCIILABEL "Virtual Disk" 761ae08745Sheppo 771ae08745Sheppo #define VD_CHANNEL_ENDPOINT "channel-endpoint" 781ae08745Sheppo #define VD_ID_PROP "id" 791ae08745Sheppo #define VD_BLOCK_DEVICE_PROP "vds-block-device" 80047ba61eSachartre #define VD_BLOCK_DEVICE_OPTS "vds-block-device-opts" 81445b4c2eSsb155480 #define VD_REG_PROP "reg" 821ae08745Sheppo 831ae08745Sheppo /* Virtual disk initialization flags */ 843c96341aSnarayan #define VD_DISK_READY 0x01 853c96341aSnarayan #define VD_LOCKING 0x02 863c96341aSnarayan #define VD_LDC 0x04 873c96341aSnarayan #define VD_DRING 0x08 883c96341aSnarayan #define VD_SID 0x10 893c96341aSnarayan #define VD_SEQ_NUM 0x20 90047ba61eSachartre #define VD_SETUP_ERROR 0x40 911ae08745Sheppo 92eba0cb4eSachartre /* Flags for writing to a vdisk which is a file */ 93eba0cb4eSachartre #define VD_FILE_WRITE_FLAGS SM_ASYNC 94eba0cb4eSachartre 9587a7269eSachartre /* Number of backup labels */ 9687a7269eSachartre #define VD_FILE_NUM_BACKUP 5 9787a7269eSachartre 9887a7269eSachartre /* Timeout for SCSI I/O */ 9987a7269eSachartre #define VD_SCSI_RDWR_TIMEOUT 30 /* 30 secs */ 10087a7269eSachartre 101edcc0754Sachartre /* Maximum number of logical partitions */ 102edcc0754Sachartre #define VD_MAXPART (NDKMAP + 1) 103edcc0754Sachartre 1041ae08745Sheppo /* 1051ae08745Sheppo * By Solaris convention, slice/partition 2 represents the entire disk; 1061ae08745Sheppo * unfortunately, this convention does not appear to be codified. 1071ae08745Sheppo */ 1081ae08745Sheppo #define VD_ENTIRE_DISK_SLICE 2 1091ae08745Sheppo 1101ae08745Sheppo /* Return a cpp token as a string */ 1111ae08745Sheppo #define STRINGIZE(token) #token 1121ae08745Sheppo 1131ae08745Sheppo /* 1141ae08745Sheppo * Print a message prefixed with the current function name to the message log 1151ae08745Sheppo * (and optionally to the console for verbose boots); these macros use cpp's 1161ae08745Sheppo * concatenation of string literals and C99 variable-length-argument-list 1171ae08745Sheppo * macros 1181ae08745Sheppo */ 1191ae08745Sheppo #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 1201ae08745Sheppo #define _PRN(format, ...) \ 1211ae08745Sheppo cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 1221ae08745Sheppo 1231ae08745Sheppo /* Return a pointer to the "i"th vdisk dring element */ 1241ae08745Sheppo #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 1251ae08745Sheppo (vd->dring + (i)*vd->descriptor_size)) 1261ae08745Sheppo 1271ae08745Sheppo /* Return the virtual disk client's type as a string (for use in messages) */ 1281ae08745Sheppo #define VD_CLIENT(vd) \ 1291ae08745Sheppo (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 130*f0ca1d9aSsb155480 (((vd)->xfer_mode == VIO_DRING_MODE_V1_0) ? "dring client" : \ 1311ae08745Sheppo (((vd)->xfer_mode == 0) ? "null client" : \ 1321ae08745Sheppo "unsupported client"))) 1331ae08745Sheppo 134690555a1Sachartre /* Read disk label from a disk on file */ 135690555a1Sachartre #define VD_FILE_LABEL_READ(vd, labelp) \ 13687a7269eSachartre vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)labelp, \ 137690555a1Sachartre 0, sizeof (struct dk_label)) 138690555a1Sachartre 139690555a1Sachartre /* Write disk label to a disk on file */ 140690555a1Sachartre #define VD_FILE_LABEL_WRITE(vd, labelp) \ 14187a7269eSachartre vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)labelp, \ 142690555a1Sachartre 0, sizeof (struct dk_label)) 143690555a1Sachartre 1442f5224aeSachartre /* Message for disk access rights reset failure */ 1452f5224aeSachartre #define VD_RESET_ACCESS_FAILURE_MSG \ 1462f5224aeSachartre "Fail to reset disk access rights for disk %s" 1472f5224aeSachartre 148445b4c2eSsb155480 /* 149445b4c2eSsb155480 * Specification of an MD node passed to the MDEG to filter any 150445b4c2eSsb155480 * 'vport' nodes that do not belong to the specified node. This 151445b4c2eSsb155480 * template is copied for each vds instance and filled in with 152445b4c2eSsb155480 * the appropriate 'cfg-handle' value before being passed to the MDEG. 153445b4c2eSsb155480 */ 154445b4c2eSsb155480 static mdeg_prop_spec_t vds_prop_template[] = { 155445b4c2eSsb155480 { MDET_PROP_STR, "name", VDS_NAME }, 156445b4c2eSsb155480 { MDET_PROP_VAL, "cfg-handle", NULL }, 157445b4c2eSsb155480 { MDET_LIST_END, NULL, NULL } 158445b4c2eSsb155480 }; 159445b4c2eSsb155480 160445b4c2eSsb155480 #define VDS_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 161445b4c2eSsb155480 162445b4c2eSsb155480 /* 163445b4c2eSsb155480 * Matching criteria passed to the MDEG to register interest 164445b4c2eSsb155480 * in changes to 'virtual-device-port' nodes identified by their 165445b4c2eSsb155480 * 'id' property. 166445b4c2eSsb155480 */ 167445b4c2eSsb155480 static md_prop_match_t vd_prop_match[] = { 168445b4c2eSsb155480 { MDET_PROP_VAL, VD_ID_PROP }, 169445b4c2eSsb155480 { MDET_LIST_END, NULL } 170445b4c2eSsb155480 }; 171445b4c2eSsb155480 172445b4c2eSsb155480 static mdeg_node_match_t vd_match = {"virtual-device-port", 173445b4c2eSsb155480 vd_prop_match}; 174445b4c2eSsb155480 175047ba61eSachartre /* 176047ba61eSachartre * Options for the VD_BLOCK_DEVICE_OPTS property. 177047ba61eSachartre */ 178047ba61eSachartre #define VD_OPT_RDONLY 0x1 /* read-only */ 179047ba61eSachartre #define VD_OPT_SLICE 0x2 /* single slice */ 180047ba61eSachartre #define VD_OPT_EXCLUSIVE 0x4 /* exclusive access */ 181047ba61eSachartre 182047ba61eSachartre #define VD_OPTION_NLEN 128 183047ba61eSachartre 184047ba61eSachartre typedef struct vd_option { 185047ba61eSachartre char vdo_name[VD_OPTION_NLEN]; 186047ba61eSachartre uint64_t vdo_value; 187047ba61eSachartre } vd_option_t; 188047ba61eSachartre 189047ba61eSachartre vd_option_t vd_bdev_options[] = { 190047ba61eSachartre { "ro", VD_OPT_RDONLY }, 191047ba61eSachartre { "slice", VD_OPT_SLICE }, 192047ba61eSachartre { "excl", VD_OPT_EXCLUSIVE } 193047ba61eSachartre }; 194047ba61eSachartre 1951ae08745Sheppo /* Debugging macros */ 1961ae08745Sheppo #ifdef DEBUG 1973af08d82Slm66018 1983af08d82Slm66018 static int vd_msglevel = 0; 1993af08d82Slm66018 2001ae08745Sheppo #define PR0 if (vd_msglevel > 0) PRN 2011ae08745Sheppo #define PR1 if (vd_msglevel > 1) PRN 2021ae08745Sheppo #define PR2 if (vd_msglevel > 2) PRN 2031ae08745Sheppo 2041ae08745Sheppo #define VD_DUMP_DRING_ELEM(elem) \ 2053c96341aSnarayan PR0("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 2061ae08745Sheppo elem->hdr.dstate, \ 2071ae08745Sheppo elem->payload.operation, \ 2081ae08745Sheppo elem->payload.status, \ 2091ae08745Sheppo elem->payload.nbytes, \ 2101ae08745Sheppo elem->payload.addr, \ 2111ae08745Sheppo elem->payload.ncookies); 2121ae08745Sheppo 2133af08d82Slm66018 char * 2143af08d82Slm66018 vd_decode_state(int state) 2153af08d82Slm66018 { 2163af08d82Slm66018 char *str; 2173af08d82Slm66018 2183af08d82Slm66018 #define CASE_STATE(_s) case _s: str = #_s; break; 2193af08d82Slm66018 2203af08d82Slm66018 switch (state) { 2213af08d82Slm66018 CASE_STATE(VD_STATE_INIT) 2223af08d82Slm66018 CASE_STATE(VD_STATE_VER) 2233af08d82Slm66018 CASE_STATE(VD_STATE_ATTR) 2243af08d82Slm66018 CASE_STATE(VD_STATE_DRING) 2253af08d82Slm66018 CASE_STATE(VD_STATE_RDX) 2263af08d82Slm66018 CASE_STATE(VD_STATE_DATA) 2273af08d82Slm66018 default: str = "unknown"; break; 2283af08d82Slm66018 } 2293af08d82Slm66018 2303af08d82Slm66018 #undef CASE_STATE 2313af08d82Slm66018 2323af08d82Slm66018 return (str); 2333af08d82Slm66018 } 2343af08d82Slm66018 2353af08d82Slm66018 void 2363af08d82Slm66018 vd_decode_tag(vio_msg_t *msg) 2373af08d82Slm66018 { 2383af08d82Slm66018 char *tstr, *sstr, *estr; 2393af08d82Slm66018 2403af08d82Slm66018 #define CASE_TYPE(_s) case _s: tstr = #_s; break; 2413af08d82Slm66018 2423af08d82Slm66018 switch (msg->tag.vio_msgtype) { 2433af08d82Slm66018 CASE_TYPE(VIO_TYPE_CTRL) 2443af08d82Slm66018 CASE_TYPE(VIO_TYPE_DATA) 2453af08d82Slm66018 CASE_TYPE(VIO_TYPE_ERR) 2463af08d82Slm66018 default: tstr = "unknown"; break; 2473af08d82Slm66018 } 2483af08d82Slm66018 2493af08d82Slm66018 #undef CASE_TYPE 2503af08d82Slm66018 2513af08d82Slm66018 #define CASE_SUBTYPE(_s) case _s: sstr = #_s; break; 2523af08d82Slm66018 2533af08d82Slm66018 switch (msg->tag.vio_subtype) { 2543af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_INFO) 2553af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_ACK) 2563af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_NACK) 2573af08d82Slm66018 default: sstr = "unknown"; break; 2583af08d82Slm66018 } 2593af08d82Slm66018 2603af08d82Slm66018 #undef CASE_SUBTYPE 2613af08d82Slm66018 2623af08d82Slm66018 #define CASE_ENV(_s) case _s: estr = #_s; break; 2633af08d82Slm66018 2643af08d82Slm66018 switch (msg->tag.vio_subtype_env) { 2653af08d82Slm66018 CASE_ENV(VIO_VER_INFO) 2663af08d82Slm66018 CASE_ENV(VIO_ATTR_INFO) 2673af08d82Slm66018 CASE_ENV(VIO_DRING_REG) 2683af08d82Slm66018 CASE_ENV(VIO_DRING_UNREG) 2693af08d82Slm66018 CASE_ENV(VIO_RDX) 2703af08d82Slm66018 CASE_ENV(VIO_PKT_DATA) 2713af08d82Slm66018 CASE_ENV(VIO_DESC_DATA) 2723af08d82Slm66018 CASE_ENV(VIO_DRING_DATA) 2733af08d82Slm66018 default: estr = "unknown"; break; 2743af08d82Slm66018 } 2753af08d82Slm66018 2763af08d82Slm66018 #undef CASE_ENV 2773af08d82Slm66018 2783af08d82Slm66018 PR1("(%x/%x/%x) message : (%s/%s/%s)", 2793af08d82Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 2803af08d82Slm66018 msg->tag.vio_subtype_env, tstr, sstr, estr); 2813af08d82Slm66018 } 2823af08d82Slm66018 2831ae08745Sheppo #else /* !DEBUG */ 2843af08d82Slm66018 2851ae08745Sheppo #define PR0(...) 2861ae08745Sheppo #define PR1(...) 2871ae08745Sheppo #define PR2(...) 2881ae08745Sheppo 2891ae08745Sheppo #define VD_DUMP_DRING_ELEM(elem) 2901ae08745Sheppo 2913af08d82Slm66018 #define vd_decode_state(_s) (NULL) 2923af08d82Slm66018 #define vd_decode_tag(_s) (NULL) 2933af08d82Slm66018 2941ae08745Sheppo #endif /* DEBUG */ 2951ae08745Sheppo 2961ae08745Sheppo 297d10e4ef2Snarayan /* 298d10e4ef2Snarayan * Soft state structure for a vds instance 299d10e4ef2Snarayan */ 3001ae08745Sheppo typedef struct vds { 3011ae08745Sheppo uint_t initialized; /* driver inst initialization flags */ 3021ae08745Sheppo dev_info_t *dip; /* driver inst devinfo pointer */ 3031ae08745Sheppo ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 3041ae08745Sheppo mod_hash_t *vd_table; /* table of virtual disks served */ 305445b4c2eSsb155480 mdeg_node_spec_t *ispecp; /* mdeg node specification */ 3061ae08745Sheppo mdeg_handle_t mdeg; /* handle for MDEG operations */ 3071ae08745Sheppo } vds_t; 3081ae08745Sheppo 309d10e4ef2Snarayan /* 310d10e4ef2Snarayan * Types of descriptor-processing tasks 311d10e4ef2Snarayan */ 312d10e4ef2Snarayan typedef enum vd_task_type { 313d10e4ef2Snarayan VD_NONFINAL_RANGE_TASK, /* task for intermediate descriptor in range */ 314d10e4ef2Snarayan VD_FINAL_RANGE_TASK, /* task for last in a range of descriptors */ 315d10e4ef2Snarayan } vd_task_type_t; 316d10e4ef2Snarayan 317d10e4ef2Snarayan /* 318d10e4ef2Snarayan * Structure describing the task for processing a descriptor 319d10e4ef2Snarayan */ 320d10e4ef2Snarayan typedef struct vd_task { 321d10e4ef2Snarayan struct vd *vd; /* vd instance task is for */ 322d10e4ef2Snarayan vd_task_type_t type; /* type of descriptor task */ 323d10e4ef2Snarayan int index; /* dring elem index for task */ 324d10e4ef2Snarayan vio_msg_t *msg; /* VIO message task is for */ 325d10e4ef2Snarayan size_t msglen; /* length of message content */ 326d10e4ef2Snarayan vd_dring_payload_t *request; /* request task will perform */ 327d10e4ef2Snarayan struct buf buf; /* buf(9s) for I/O request */ 3284bac2208Snarayan ldc_mem_handle_t mhdl; /* task memory handle */ 329205eeb1aSlm66018 int status; /* status of processing task */ 330205eeb1aSlm66018 int (*completef)(struct vd_task *task); /* completion func ptr */ 331d10e4ef2Snarayan } vd_task_t; 332d10e4ef2Snarayan 333d10e4ef2Snarayan /* 334d10e4ef2Snarayan * Soft state structure for a virtual disk instance 335d10e4ef2Snarayan */ 3361ae08745Sheppo typedef struct vd { 3371ae08745Sheppo uint_t initialized; /* vdisk initialization flags */ 33817cadca8Slm66018 uint64_t operations; /* bitmask of VD_OPs exported */ 33917cadca8Slm66018 vio_ver_t version; /* ver negotiated with client */ 3401ae08745Sheppo vds_t *vds; /* server for this vdisk */ 341d10e4ef2Snarayan ddi_taskq_t *startq; /* queue for I/O start tasks */ 342d10e4ef2Snarayan ddi_taskq_t *completionq; /* queue for completion tasks */ 3431ae08745Sheppo ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 3443c96341aSnarayan char device_path[MAXPATHLEN + 1]; /* vdisk device */ 3451ae08745Sheppo dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 346047ba61eSachartre int open_flags; /* open flags */ 347e1ebb9ecSlm66018 uint_t nslices; /* number of slices */ 3481ae08745Sheppo size_t vdisk_size; /* number of blocks in vdisk */ 34917cadca8Slm66018 size_t vdisk_block_size; /* size of each vdisk block */ 3501ae08745Sheppo vd_disk_type_t vdisk_type; /* slice or entire disk */ 3514bac2208Snarayan vd_disk_label_t vdisk_label; /* EFI or VTOC label */ 35217cadca8Slm66018 vd_media_t vdisk_media; /* media type of backing dev. */ 35317cadca8Slm66018 boolean_t is_atapi_dev; /* Is this an IDE CD-ROM dev? */ 354e1ebb9ecSlm66018 ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */ 35517cadca8Slm66018 size_t block_size; /* blk size of actual device */ 3561ae08745Sheppo boolean_t pseudo; /* underlying pseudo dev */ 35717cadca8Slm66018 boolean_t file; /* is vDisk backed by a file? */ 3582f5224aeSachartre boolean_t scsi; /* is vDisk backed by scsi? */ 3593c96341aSnarayan vnode_t *file_vnode; /* file vnode */ 3603c96341aSnarayan size_t file_size; /* file size */ 36187a7269eSachartre ddi_devid_t file_devid; /* devid for disk image */ 362edcc0754Sachartre efi_gpt_t efi_gpt; /* EFI GPT for slice type */ 363edcc0754Sachartre efi_gpe_t efi_gpe; /* EFI GPE for slice type */ 364edcc0754Sachartre int efi_reserved; /* EFI reserved slice */ 3651ae08745Sheppo struct dk_geom dk_geom; /* synthetic for slice type */ 3661ae08745Sheppo struct vtoc vtoc; /* synthetic for slice type */ 367edcc0754Sachartre vd_slice_t slices[VD_MAXPART]; /* logical partitions */ 3682f5224aeSachartre boolean_t ownership; /* disk ownership status */ 3691ae08745Sheppo ldc_status_t ldc_state; /* LDC connection state */ 3701ae08745Sheppo ldc_handle_t ldc_handle; /* handle for LDC comm */ 3711ae08745Sheppo size_t max_msglen; /* largest LDC message len */ 3721ae08745Sheppo vd_state_t state; /* client handshake state */ 3731ae08745Sheppo uint8_t xfer_mode; /* transfer mode with client */ 3741ae08745Sheppo uint32_t sid; /* client's session ID */ 3751ae08745Sheppo uint64_t seq_num; /* message sequence number */ 3761ae08745Sheppo uint64_t dring_ident; /* identifier of dring */ 3771ae08745Sheppo ldc_dring_handle_t dring_handle; /* handle for dring ops */ 3781ae08745Sheppo uint32_t descriptor_size; /* num bytes in desc */ 3791ae08745Sheppo uint32_t dring_len; /* number of dring elements */ 3801ae08745Sheppo caddr_t dring; /* address of dring */ 3813af08d82Slm66018 caddr_t vio_msgp; /* vio msg staging buffer */ 382d10e4ef2Snarayan vd_task_t inband_task; /* task for inband descriptor */ 383d10e4ef2Snarayan vd_task_t *dring_task; /* tasks dring elements */ 384d10e4ef2Snarayan 385d10e4ef2Snarayan kmutex_t lock; /* protects variables below */ 386d10e4ef2Snarayan boolean_t enabled; /* is vdisk enabled? */ 387d10e4ef2Snarayan boolean_t reset_state; /* reset connection state? */ 388d10e4ef2Snarayan boolean_t reset_ldc; /* reset LDC channel? */ 3891ae08745Sheppo } vd_t; 3901ae08745Sheppo 3911ae08745Sheppo typedef struct vds_operation { 3923af08d82Slm66018 char *namep; 3931ae08745Sheppo uint8_t operation; 394d10e4ef2Snarayan int (*start)(vd_task_t *task); 395205eeb1aSlm66018 int (*complete)(vd_task_t *task); 3961ae08745Sheppo } vds_operation_t; 3971ae08745Sheppo 3980a55fbb7Slm66018 typedef struct vd_ioctl { 3990a55fbb7Slm66018 uint8_t operation; /* vdisk operation */ 4000a55fbb7Slm66018 const char *operation_name; /* vdisk operation name */ 4010a55fbb7Slm66018 size_t nbytes; /* size of operation buffer */ 4020a55fbb7Slm66018 int cmd; /* corresponding ioctl cmd */ 4030a55fbb7Slm66018 const char *cmd_name; /* ioctl cmd name */ 4040a55fbb7Slm66018 void *arg; /* ioctl cmd argument */ 4050a55fbb7Slm66018 /* convert input vd_buf to output ioctl_arg */ 4062f5224aeSachartre int (*copyin)(void *vd_buf, size_t, void *ioctl_arg); 4070a55fbb7Slm66018 /* convert input ioctl_arg to output vd_buf */ 4080a55fbb7Slm66018 void (*copyout)(void *ioctl_arg, void *vd_buf); 409047ba61eSachartre /* write is true if the operation writes any data to the backend */ 410047ba61eSachartre boolean_t write; 4110a55fbb7Slm66018 } vd_ioctl_t; 4120a55fbb7Slm66018 4130a55fbb7Slm66018 /* Define trivial copyin/copyout conversion function flag */ 4142f5224aeSachartre #define VD_IDENTITY_IN ((int (*)(void *, size_t, void *))-1) 4152f5224aeSachartre #define VD_IDENTITY_OUT ((void (*)(void *, void *))-1) 4161ae08745Sheppo 4171ae08745Sheppo 4183c96341aSnarayan static int vds_ldc_retries = VDS_RETRIES; 4193af08d82Slm66018 static int vds_ldc_delay = VDS_LDC_DELAY; 4203c96341aSnarayan static int vds_dev_retries = VDS_RETRIES; 4213c96341aSnarayan static int vds_dev_delay = VDS_DEV_DELAY; 4221ae08745Sheppo static void *vds_state; 4231ae08745Sheppo 424eba0cb4eSachartre static uint_t vd_file_write_flags = VD_FILE_WRITE_FLAGS; 425eba0cb4eSachartre 42687a7269eSachartre static short vd_scsi_rdwr_timeout = VD_SCSI_RDWR_TIMEOUT; 4272f5224aeSachartre static int vd_scsi_debug = USCSI_SILENT; 4282f5224aeSachartre 4292f5224aeSachartre /* 4302f5224aeSachartre * Tunable to define the behavior of the service domain if the vdisk server 4312f5224aeSachartre * fails to reset disk exclusive access when a LDC channel is reset. When a 4322f5224aeSachartre * LDC channel is reset the vdisk server will try to reset disk exclusive 4332f5224aeSachartre * access by releasing any SCSI-2 reservation or resetting the disk. If these 4342f5224aeSachartre * actions fail then the default behavior (vd_reset_access_failure = 0) is to 4352f5224aeSachartre * print a warning message. This default behavior can be changed by setting 4362f5224aeSachartre * the vd_reset_access_failure variable to A_REBOOT (= 0x1) and that will 4372f5224aeSachartre * cause the service domain to reboot, or A_DUMP (= 0x5) and that will cause 4382f5224aeSachartre * the service domain to panic. In both cases, the reset of the service domain 4392f5224aeSachartre * should trigger a reset SCSI buses and hopefully clear any SCSI-2 reservation. 4402f5224aeSachartre */ 4412f5224aeSachartre static int vd_reset_access_failure = 0; 4422f5224aeSachartre 4432f5224aeSachartre /* 4442f5224aeSachartre * Tunable for backward compatibility. When this variable is set to B_TRUE, 4452f5224aeSachartre * all disk volumes (ZFS, SVM, VxvM volumes) will be exported as single 4462f5224aeSachartre * slice disks whether or not they have the "slice" option set. This is 4472f5224aeSachartre * to provide a simple backward compatibility mechanism when upgrading 4482f5224aeSachartre * the vds driver and using a domain configuration created before the 4492f5224aeSachartre * "slice" option was available. 4502f5224aeSachartre */ 4512f5224aeSachartre static boolean_t vd_volume_force_slice = B_FALSE; 45287a7269eSachartre 4530a55fbb7Slm66018 /* 4540a55fbb7Slm66018 * Supported protocol version pairs, from highest (newest) to lowest (oldest) 4550a55fbb7Slm66018 * 4560a55fbb7Slm66018 * Each supported major version should appear only once, paired with (and only 4570a55fbb7Slm66018 * with) its highest supported minor version number (as the protocol requires 4580a55fbb7Slm66018 * supporting all lower minor version numbers as well) 4590a55fbb7Slm66018 */ 46017cadca8Slm66018 static const vio_ver_t vds_version[] = {{1, 1}}; 4610a55fbb7Slm66018 static const size_t vds_num_versions = 4620a55fbb7Slm66018 sizeof (vds_version)/sizeof (vds_version[0]); 4630a55fbb7Slm66018 4643af08d82Slm66018 static void vd_free_dring_task(vd_t *vdp); 4653c96341aSnarayan static int vd_setup_vd(vd_t *vd); 466047ba61eSachartre static int vd_setup_single_slice_disk(vd_t *vd); 4672f5224aeSachartre static int vd_setup_mediainfo(vd_t *vd); 4683c96341aSnarayan static boolean_t vd_enabled(vd_t *vd); 46978fcd0a1Sachartre static ushort_t vd_lbl2cksum(struct dk_label *label); 47078fcd0a1Sachartre static int vd_file_validate_geometry(vd_t *vd); 47117cadca8Slm66018 static boolean_t vd_file_is_iso_image(vd_t *vd); 47217cadca8Slm66018 static void vd_set_exported_operations(vd_t *vd); 4732f5224aeSachartre static void vd_reset_access(vd_t *vd); 474edcc0754Sachartre static int vd_backend_ioctl(vd_t *vd, int cmd, caddr_t arg); 475edcc0754Sachartre static int vds_efi_alloc_and_read(vd_t *, efi_gpt_t **, efi_gpe_t **); 476edcc0754Sachartre static void vds_efi_free(vd_t *, efi_gpt_t *, efi_gpe_t *); 477047ba61eSachartre 478690555a1Sachartre /* 479690555a1Sachartre * Function: 480690555a1Sachartre * vd_file_rw 481690555a1Sachartre * 482690555a1Sachartre * Description: 483690555a1Sachartre * Read or write to a disk on file. 484690555a1Sachartre * 485690555a1Sachartre * Parameters: 486690555a1Sachartre * vd - disk on which the operation is performed. 487690555a1Sachartre * slice - slice on which the operation is performed, 48887a7269eSachartre * VD_SLICE_NONE indicates that the operation 48987a7269eSachartre * is done using an absolute disk offset. 490690555a1Sachartre * operation - operation to execute: read (VD_OP_BREAD) or 491690555a1Sachartre * write (VD_OP_BWRITE). 492690555a1Sachartre * data - buffer where data are read to or written from. 493690555a1Sachartre * blk - starting block for the operation. 494690555a1Sachartre * len - number of bytes to read or write. 495690555a1Sachartre * 496690555a1Sachartre * Return Code: 497690555a1Sachartre * n >= 0 - success, n indicates the number of bytes read 498690555a1Sachartre * or written. 499690555a1Sachartre * -1 - error. 500690555a1Sachartre */ 501690555a1Sachartre static ssize_t 502690555a1Sachartre vd_file_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t blk, 503690555a1Sachartre size_t len) 504690555a1Sachartre { 505690555a1Sachartre caddr_t maddr; 506690555a1Sachartre size_t offset, maxlen, moffset, mlen, n; 507690555a1Sachartre uint_t smflags; 508690555a1Sachartre enum seg_rw srw; 509690555a1Sachartre 510690555a1Sachartre ASSERT(vd->file); 511690555a1Sachartre ASSERT(len > 0); 512690555a1Sachartre 513047ba61eSachartre /* 514047ba61eSachartre * If a file is exported as a slice then we don't care about the vtoc. 515047ba61eSachartre * In that case, the vtoc is a fake mainly to make newfs happy and we 516047ba61eSachartre * handle any I/O as a raw disk access so that we can have access to the 517047ba61eSachartre * entire backend. 518047ba61eSachartre */ 519047ba61eSachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE || slice == VD_SLICE_NONE) { 520690555a1Sachartre /* raw disk access */ 521690555a1Sachartre offset = blk * DEV_BSIZE; 522690555a1Sachartre } else { 523690555a1Sachartre ASSERT(slice >= 0 && slice < V_NUMPAR); 52478fcd0a1Sachartre 52517cadca8Slm66018 /* 52617cadca8Slm66018 * v1.0 vDisk clients depended on the server not verifying 52717cadca8Slm66018 * the label of a unformatted disk. This "feature" is 52817cadca8Slm66018 * maintained for backward compatibility but all versions 52917cadca8Slm66018 * from v1.1 onwards must do the right thing. 53017cadca8Slm66018 */ 53178fcd0a1Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK && 532edcc0754Sachartre vio_ver_is_supported(vd->version, 1, 1)) { 533edcc0754Sachartre (void) vd_file_validate_geometry(vd); 534edcc0754Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK) { 535edcc0754Sachartre PR0("Unknown disk label, can't do I/O " 536edcc0754Sachartre "from slice %d", slice); 53778fcd0a1Sachartre return (-1); 53878fcd0a1Sachartre } 539edcc0754Sachartre } 54078fcd0a1Sachartre 541edcc0754Sachartre if (vd->vdisk_label == VD_DISK_LABEL_VTOC) { 542edcc0754Sachartre ASSERT(vd->vtoc.v_sectorsz == DEV_BSIZE); 543edcc0754Sachartre } else { 544edcc0754Sachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_EFI); 545edcc0754Sachartre ASSERT(vd->vdisk_block_size == DEV_BSIZE); 546edcc0754Sachartre } 547edcc0754Sachartre 548edcc0754Sachartre if (blk >= vd->slices[slice].nblocks) { 549690555a1Sachartre /* address past the end of the slice */ 550690555a1Sachartre PR0("req_addr (0x%lx) > psize (0x%lx)", 551edcc0754Sachartre blk, vd->slices[slice].nblocks); 552690555a1Sachartre return (0); 553690555a1Sachartre } 554690555a1Sachartre 555edcc0754Sachartre offset = (vd->slices[slice].start + blk) * DEV_BSIZE; 556690555a1Sachartre 557690555a1Sachartre /* 558690555a1Sachartre * If the requested size is greater than the size 559690555a1Sachartre * of the partition, truncate the read/write. 560690555a1Sachartre */ 561edcc0754Sachartre maxlen = (vd->slices[slice].nblocks - blk) * DEV_BSIZE; 562690555a1Sachartre 563690555a1Sachartre if (len > maxlen) { 564690555a1Sachartre PR0("I/O size truncated to %lu bytes from %lu bytes", 565690555a1Sachartre maxlen, len); 566690555a1Sachartre len = maxlen; 567690555a1Sachartre } 568690555a1Sachartre } 569690555a1Sachartre 570690555a1Sachartre /* 571690555a1Sachartre * We have to ensure that we are reading/writing into the mmap 572690555a1Sachartre * range. If we have a partial disk image (e.g. an image of 573690555a1Sachartre * s0 instead s2) the system can try to access slices that 574690555a1Sachartre * are not included into the disk image. 575690555a1Sachartre */ 576edcc0754Sachartre if ((offset + len) > vd->file_size) { 577edcc0754Sachartre PR0("offset + nbytes (0x%lx + 0x%lx) > " 578690555a1Sachartre "file_size (0x%lx)", offset, len, vd->file_size); 579690555a1Sachartre return (-1); 580690555a1Sachartre } 581690555a1Sachartre 582690555a1Sachartre srw = (operation == VD_OP_BREAD)? S_READ : S_WRITE; 583eba0cb4eSachartre smflags = (operation == VD_OP_BREAD)? 0 : 584eba0cb4eSachartre (SM_WRITE | vd_file_write_flags); 585690555a1Sachartre n = len; 586690555a1Sachartre 587690555a1Sachartre do { 588690555a1Sachartre /* 589690555a1Sachartre * segmap_getmapflt() returns a MAXBSIZE chunk which is 590690555a1Sachartre * MAXBSIZE aligned. 591690555a1Sachartre */ 592690555a1Sachartre moffset = offset & MAXBOFFSET; 593690555a1Sachartre mlen = MIN(MAXBSIZE - moffset, n); 594690555a1Sachartre maddr = segmap_getmapflt(segkmap, vd->file_vnode, offset, 595690555a1Sachartre mlen, 1, srw); 596690555a1Sachartre /* 597690555a1Sachartre * Fault in the pages so we can check for error and ensure 598690555a1Sachartre * that we can safely used the mapped address. 599690555a1Sachartre */ 600690555a1Sachartre if (segmap_fault(kas.a_hat, segkmap, maddr, mlen, 601690555a1Sachartre F_SOFTLOCK, srw) != 0) { 602690555a1Sachartre (void) segmap_release(segkmap, maddr, 0); 603690555a1Sachartre return (-1); 604690555a1Sachartre } 605690555a1Sachartre 606690555a1Sachartre if (operation == VD_OP_BREAD) 607690555a1Sachartre bcopy(maddr + moffset, data, mlen); 608690555a1Sachartre else 609690555a1Sachartre bcopy(data, maddr + moffset, mlen); 610690555a1Sachartre 611690555a1Sachartre if (segmap_fault(kas.a_hat, segkmap, maddr, mlen, 612690555a1Sachartre F_SOFTUNLOCK, srw) != 0) { 613690555a1Sachartre (void) segmap_release(segkmap, maddr, 0); 614690555a1Sachartre return (-1); 615690555a1Sachartre } 616690555a1Sachartre if (segmap_release(segkmap, maddr, smflags) != 0) 617690555a1Sachartre return (-1); 618690555a1Sachartre n -= mlen; 619690555a1Sachartre offset += mlen; 620690555a1Sachartre data += mlen; 621690555a1Sachartre 622690555a1Sachartre } while (n > 0); 623690555a1Sachartre 624690555a1Sachartre return (len); 625690555a1Sachartre } 626690555a1Sachartre 62787a7269eSachartre /* 62887a7269eSachartre * Function: 62978fcd0a1Sachartre * vd_file_build_default_label 63078fcd0a1Sachartre * 63178fcd0a1Sachartre * Description: 63278fcd0a1Sachartre * Return a default label for the given disk. This is used when the disk 63378fcd0a1Sachartre * does not have a valid VTOC so that the user can get a valid default 63417cadca8Slm66018 * configuration. The default label has all slice sizes set to 0 (except 63578fcd0a1Sachartre * slice 2 which is the entire disk) to force the user to write a valid 63678fcd0a1Sachartre * label onto the disk image. 63778fcd0a1Sachartre * 63878fcd0a1Sachartre * Parameters: 63978fcd0a1Sachartre * vd - disk on which the operation is performed. 64078fcd0a1Sachartre * label - the returned default label. 64178fcd0a1Sachartre * 64278fcd0a1Sachartre * Return Code: 64378fcd0a1Sachartre * none. 64478fcd0a1Sachartre */ 64578fcd0a1Sachartre static void 64678fcd0a1Sachartre vd_file_build_default_label(vd_t *vd, struct dk_label *label) 64778fcd0a1Sachartre { 64878fcd0a1Sachartre size_t size; 64978fcd0a1Sachartre char prefix; 65078fcd0a1Sachartre 65178fcd0a1Sachartre ASSERT(vd->file); 652edcc0754Sachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); 653edcc0754Sachartre 654edcc0754Sachartre bzero(label, sizeof (struct dk_label)); 65578fcd0a1Sachartre 65678fcd0a1Sachartre /* 65778fcd0a1Sachartre * We must have a resonable number of cylinders and sectors so 65878fcd0a1Sachartre * that newfs can run using default values. 65978fcd0a1Sachartre * 66078fcd0a1Sachartre * if (disk_size < 2MB) 66178fcd0a1Sachartre * phys_cylinders = disk_size / 100K 66278fcd0a1Sachartre * else 66378fcd0a1Sachartre * phys_cylinders = disk_size / 300K 66478fcd0a1Sachartre * 66578fcd0a1Sachartre * phys_cylinders = (phys_cylinders == 0) ? 1 : phys_cylinders 66678fcd0a1Sachartre * alt_cylinders = (phys_cylinders > 2) ? 2 : 0; 66778fcd0a1Sachartre * data_cylinders = phys_cylinders - alt_cylinders 66878fcd0a1Sachartre * 66978fcd0a1Sachartre * sectors = disk_size / (phys_cylinders * blk_size) 67078fcd0a1Sachartre * 67178fcd0a1Sachartre * The file size test is an attempt to not have too few cylinders 67278fcd0a1Sachartre * for a small file, or so many on a big file that you waste space 67378fcd0a1Sachartre * for backup superblocks or cylinder group structures. 67478fcd0a1Sachartre */ 67578fcd0a1Sachartre if (vd->file_size < (2 * 1024 * 1024)) 67678fcd0a1Sachartre label->dkl_pcyl = vd->file_size / (100 * 1024); 67778fcd0a1Sachartre else 67878fcd0a1Sachartre label->dkl_pcyl = vd->file_size / (300 * 1024); 67978fcd0a1Sachartre 68078fcd0a1Sachartre if (label->dkl_pcyl == 0) 68178fcd0a1Sachartre label->dkl_pcyl = 1; 68278fcd0a1Sachartre 683047ba61eSachartre label->dkl_acyl = 0; 684047ba61eSachartre 68578fcd0a1Sachartre if (label->dkl_pcyl > 2) 68678fcd0a1Sachartre label->dkl_acyl = 2; 68778fcd0a1Sachartre 68878fcd0a1Sachartre label->dkl_nsect = vd->file_size / 68978fcd0a1Sachartre (DEV_BSIZE * label->dkl_pcyl); 69078fcd0a1Sachartre label->dkl_ncyl = label->dkl_pcyl - label->dkl_acyl; 69178fcd0a1Sachartre label->dkl_nhead = 1; 69278fcd0a1Sachartre label->dkl_write_reinstruct = 0; 69378fcd0a1Sachartre label->dkl_read_reinstruct = 0; 69478fcd0a1Sachartre label->dkl_rpm = 7200; 69578fcd0a1Sachartre label->dkl_apc = 0; 69678fcd0a1Sachartre label->dkl_intrlv = 0; 69778fcd0a1Sachartre 69878fcd0a1Sachartre PR0("requested disk size: %ld bytes\n", vd->file_size); 69978fcd0a1Sachartre PR0("setup: ncyl=%d nhead=%d nsec=%d\n", label->dkl_pcyl, 70078fcd0a1Sachartre label->dkl_nhead, label->dkl_nsect); 70178fcd0a1Sachartre PR0("provided disk size: %ld bytes\n", (uint64_t) 70278fcd0a1Sachartre (label->dkl_pcyl * label->dkl_nhead * 70378fcd0a1Sachartre label->dkl_nsect * DEV_BSIZE)); 70478fcd0a1Sachartre 70578fcd0a1Sachartre if (vd->file_size < (1ULL << 20)) { 70678fcd0a1Sachartre size = vd->file_size >> 10; 70778fcd0a1Sachartre prefix = 'K'; /* Kilobyte */ 70878fcd0a1Sachartre } else if (vd->file_size < (1ULL << 30)) { 70978fcd0a1Sachartre size = vd->file_size >> 20; 71078fcd0a1Sachartre prefix = 'M'; /* Megabyte */ 71178fcd0a1Sachartre } else if (vd->file_size < (1ULL << 40)) { 71278fcd0a1Sachartre size = vd->file_size >> 30; 71378fcd0a1Sachartre prefix = 'G'; /* Gigabyte */ 71478fcd0a1Sachartre } else { 71578fcd0a1Sachartre size = vd->file_size >> 40; 71678fcd0a1Sachartre prefix = 'T'; /* Terabyte */ 71778fcd0a1Sachartre } 71878fcd0a1Sachartre 71978fcd0a1Sachartre /* 72078fcd0a1Sachartre * We must have a correct label name otherwise format(1m) will 72178fcd0a1Sachartre * not recognized the disk as labeled. 72278fcd0a1Sachartre */ 72378fcd0a1Sachartre (void) snprintf(label->dkl_asciilabel, LEN_DKL_ASCII, 72478fcd0a1Sachartre "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d", 72578fcd0a1Sachartre size, prefix, 72678fcd0a1Sachartre label->dkl_ncyl, label->dkl_acyl, label->dkl_nhead, 72778fcd0a1Sachartre label->dkl_nsect); 72878fcd0a1Sachartre 72978fcd0a1Sachartre /* default VTOC */ 73078fcd0a1Sachartre label->dkl_vtoc.v_version = V_VERSION; 731edcc0754Sachartre label->dkl_vtoc.v_nparts = V_NUMPAR; 73278fcd0a1Sachartre label->dkl_vtoc.v_sanity = VTOC_SANE; 733edcc0754Sachartre label->dkl_vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_tag = V_BACKUP; 734edcc0754Sachartre label->dkl_map[VD_ENTIRE_DISK_SLICE].dkl_cylno = 0; 735edcc0754Sachartre label->dkl_map[VD_ENTIRE_DISK_SLICE].dkl_nblk = label->dkl_ncyl * 73678fcd0a1Sachartre label->dkl_nhead * label->dkl_nsect; 737edcc0754Sachartre label->dkl_magic = DKL_MAGIC; 73878fcd0a1Sachartre label->dkl_cksum = vd_lbl2cksum(label); 73978fcd0a1Sachartre } 74078fcd0a1Sachartre 74178fcd0a1Sachartre /* 74278fcd0a1Sachartre * Function: 74387a7269eSachartre * vd_file_set_vtoc 74487a7269eSachartre * 74587a7269eSachartre * Description: 74687a7269eSachartre * Set the vtoc of a disk image by writing the label and backup 74787a7269eSachartre * labels into the disk image backend. 74887a7269eSachartre * 74987a7269eSachartre * Parameters: 75087a7269eSachartre * vd - disk on which the operation is performed. 75187a7269eSachartre * label - the data to be written. 75287a7269eSachartre * 75387a7269eSachartre * Return Code: 75487a7269eSachartre * 0 - success. 75587a7269eSachartre * n > 0 - error, n indicates the errno code. 75687a7269eSachartre */ 75787a7269eSachartre static int 75887a7269eSachartre vd_file_set_vtoc(vd_t *vd, struct dk_label *label) 75987a7269eSachartre { 76087a7269eSachartre int blk, sec, cyl, head, cnt; 76187a7269eSachartre 76287a7269eSachartre ASSERT(vd->file); 76387a7269eSachartre 76487a7269eSachartre if (VD_FILE_LABEL_WRITE(vd, label) < 0) { 76587a7269eSachartre PR0("fail to write disk label"); 76687a7269eSachartre return (EIO); 76787a7269eSachartre } 76887a7269eSachartre 76987a7269eSachartre /* 77087a7269eSachartre * Backup labels are on the last alternate cylinder's 77187a7269eSachartre * first five odd sectors. 77287a7269eSachartre */ 77387a7269eSachartre if (label->dkl_acyl == 0) { 77487a7269eSachartre PR0("no alternate cylinder, can not store backup labels"); 77587a7269eSachartre return (0); 77687a7269eSachartre } 77787a7269eSachartre 77887a7269eSachartre cyl = label->dkl_ncyl + label->dkl_acyl - 1; 77987a7269eSachartre head = label->dkl_nhead - 1; 78087a7269eSachartre 78187a7269eSachartre blk = (cyl * ((label->dkl_nhead * label->dkl_nsect) - label->dkl_apc)) + 78287a7269eSachartre (head * label->dkl_nsect); 78387a7269eSachartre 78487a7269eSachartre /* 78587a7269eSachartre * Write the backup labels. Make sure we don't try to write past 78687a7269eSachartre * the last cylinder. 78787a7269eSachartre */ 78887a7269eSachartre sec = 1; 78987a7269eSachartre 79087a7269eSachartre for (cnt = 0; cnt < VD_FILE_NUM_BACKUP; cnt++) { 79187a7269eSachartre 79287a7269eSachartre if (sec >= label->dkl_nsect) { 79387a7269eSachartre PR0("not enough sector to store all backup labels"); 79487a7269eSachartre return (0); 79587a7269eSachartre } 79687a7269eSachartre 79787a7269eSachartre if (vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)label, 79887a7269eSachartre blk + sec, sizeof (struct dk_label)) < 0) { 79987a7269eSachartre PR0("error writing backup label at block %d\n", 80087a7269eSachartre blk + sec); 80187a7269eSachartre return (EIO); 80287a7269eSachartre } 80387a7269eSachartre 80487a7269eSachartre PR1("wrote backup label at block %d\n", blk + sec); 80587a7269eSachartre 80687a7269eSachartre sec += 2; 80787a7269eSachartre } 80887a7269eSachartre 80987a7269eSachartre return (0); 81087a7269eSachartre } 81187a7269eSachartre 81287a7269eSachartre /* 81387a7269eSachartre * Function: 81487a7269eSachartre * vd_file_get_devid_block 81587a7269eSachartre * 81687a7269eSachartre * Description: 81787a7269eSachartre * Return the block number where the device id is stored. 81887a7269eSachartre * 81987a7269eSachartre * Parameters: 82087a7269eSachartre * vd - disk on which the operation is performed. 82187a7269eSachartre * blkp - pointer to the block number 82287a7269eSachartre * 82387a7269eSachartre * Return Code: 82487a7269eSachartre * 0 - success 82587a7269eSachartre * ENOSPC - disk has no space to store a device id 82687a7269eSachartre */ 82787a7269eSachartre static int 82887a7269eSachartre vd_file_get_devid_block(vd_t *vd, size_t *blkp) 82987a7269eSachartre { 83087a7269eSachartre diskaddr_t spc, head, cyl; 83187a7269eSachartre 83287a7269eSachartre ASSERT(vd->file); 833edcc0754Sachartre 834edcc0754Sachartre if (vd->vdisk_label == VD_DISK_LABEL_UNK) { 835edcc0754Sachartre /* 836edcc0754Sachartre * If no label is defined we don't know where to find 837edcc0754Sachartre * a device id. 838edcc0754Sachartre */ 839edcc0754Sachartre return (ENOSPC); 840edcc0754Sachartre } 841edcc0754Sachartre 842edcc0754Sachartre if (vd->vdisk_label == VD_DISK_LABEL_EFI) { 843edcc0754Sachartre /* 844edcc0754Sachartre * For an EFI disk, the devid is at the beginning of 845edcc0754Sachartre * the reserved slice 846edcc0754Sachartre */ 847edcc0754Sachartre if (vd->efi_reserved == -1) { 848edcc0754Sachartre PR0("EFI disk has no reserved slice"); 849edcc0754Sachartre return (ENOSPC); 850edcc0754Sachartre } 851edcc0754Sachartre 852edcc0754Sachartre *blkp = vd->slices[vd->efi_reserved].start; 853edcc0754Sachartre return (0); 854edcc0754Sachartre } 855edcc0754Sachartre 85687a7269eSachartre ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); 85787a7269eSachartre 85887a7269eSachartre /* this geometry doesn't allow us to have a devid */ 85987a7269eSachartre if (vd->dk_geom.dkg_acyl < 2) { 86087a7269eSachartre PR0("not enough alternate cylinder available for devid " 86187a7269eSachartre "(acyl=%u)", vd->dk_geom.dkg_acyl); 86287a7269eSachartre return (ENOSPC); 86387a7269eSachartre } 86487a7269eSachartre 86587a7269eSachartre /* the devid is in on the track next to the last cylinder */ 86687a7269eSachartre cyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl - 2; 86787a7269eSachartre spc = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect; 86887a7269eSachartre head = vd->dk_geom.dkg_nhead - 1; 86987a7269eSachartre 87087a7269eSachartre *blkp = (cyl * (spc - vd->dk_geom.dkg_apc)) + 87187a7269eSachartre (head * vd->dk_geom.dkg_nsect) + 1; 87287a7269eSachartre 87387a7269eSachartre return (0); 87487a7269eSachartre } 87587a7269eSachartre 87687a7269eSachartre /* 87787a7269eSachartre * Return the checksum of a disk block containing an on-disk devid. 87887a7269eSachartre */ 87987a7269eSachartre static uint_t 88087a7269eSachartre vd_dkdevid2cksum(struct dk_devid *dkdevid) 88187a7269eSachartre { 88287a7269eSachartre uint_t chksum, *ip; 88387a7269eSachartre int i; 88487a7269eSachartre 88587a7269eSachartre chksum = 0; 88687a7269eSachartre ip = (uint_t *)dkdevid; 88787a7269eSachartre for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int)); i++) 88887a7269eSachartre chksum ^= ip[i]; 88987a7269eSachartre 89087a7269eSachartre return (chksum); 89187a7269eSachartre } 89287a7269eSachartre 89387a7269eSachartre /* 89487a7269eSachartre * Function: 89587a7269eSachartre * vd_file_read_devid 89687a7269eSachartre * 89787a7269eSachartre * Description: 89887a7269eSachartre * Read the device id stored on a disk image. 89987a7269eSachartre * 90087a7269eSachartre * Parameters: 90187a7269eSachartre * vd - disk on which the operation is performed. 90287a7269eSachartre * devid - the return address of the device ID. 90387a7269eSachartre * 90487a7269eSachartre * Return Code: 90587a7269eSachartre * 0 - success 90687a7269eSachartre * EIO - I/O error while trying to access the disk image 90787a7269eSachartre * EINVAL - no valid device id was found 90887a7269eSachartre * ENOSPC - disk has no space to store a device id 90987a7269eSachartre */ 91087a7269eSachartre static int 91187a7269eSachartre vd_file_read_devid(vd_t *vd, ddi_devid_t *devid) 91287a7269eSachartre { 91387a7269eSachartre struct dk_devid *dkdevid; 91487a7269eSachartre size_t blk; 91587a7269eSachartre uint_t chksum; 91687a7269eSachartre int status, sz; 91787a7269eSachartre 91887a7269eSachartre if ((status = vd_file_get_devid_block(vd, &blk)) != 0) 91987a7269eSachartre return (status); 92087a7269eSachartre 92187a7269eSachartre dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 92287a7269eSachartre 92387a7269eSachartre /* get the devid */ 92487a7269eSachartre if ((vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)dkdevid, blk, 92587a7269eSachartre DEV_BSIZE)) < 0) { 92687a7269eSachartre PR0("error reading devid block at %lu", blk); 92787a7269eSachartre status = EIO; 92887a7269eSachartre goto done; 92987a7269eSachartre } 93087a7269eSachartre 93187a7269eSachartre /* validate the revision */ 93287a7269eSachartre if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) || 93387a7269eSachartre (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) { 93487a7269eSachartre PR0("invalid devid found at block %lu (bad revision)", blk); 93587a7269eSachartre status = EINVAL; 93687a7269eSachartre goto done; 93787a7269eSachartre } 93887a7269eSachartre 93987a7269eSachartre /* compute checksum */ 94087a7269eSachartre chksum = vd_dkdevid2cksum(dkdevid); 94187a7269eSachartre 94287a7269eSachartre /* compare the checksums */ 94387a7269eSachartre if (DKD_GETCHKSUM(dkdevid) != chksum) { 94487a7269eSachartre PR0("invalid devid found at block %lu (bad checksum)", blk); 94587a7269eSachartre status = EINVAL; 94687a7269eSachartre goto done; 94787a7269eSachartre } 94887a7269eSachartre 94987a7269eSachartre /* validate the device id */ 95087a7269eSachartre if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) { 95187a7269eSachartre PR0("invalid devid found at block %lu", blk); 95287a7269eSachartre status = EINVAL; 95387a7269eSachartre goto done; 95487a7269eSachartre } 95587a7269eSachartre 95687a7269eSachartre PR1("devid read at block %lu", blk); 95787a7269eSachartre 95887a7269eSachartre sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid); 95987a7269eSachartre *devid = kmem_alloc(sz, KM_SLEEP); 96087a7269eSachartre bcopy(&dkdevid->dkd_devid, *devid, sz); 96187a7269eSachartre 96287a7269eSachartre done: 96387a7269eSachartre kmem_free(dkdevid, DEV_BSIZE); 96487a7269eSachartre return (status); 96587a7269eSachartre 96687a7269eSachartre } 96787a7269eSachartre 96887a7269eSachartre /* 96987a7269eSachartre * Function: 97087a7269eSachartre * vd_file_write_devid 97187a7269eSachartre * 97287a7269eSachartre * Description: 97387a7269eSachartre * Write a device id into disk image. 97487a7269eSachartre * 97587a7269eSachartre * Parameters: 97687a7269eSachartre * vd - disk on which the operation is performed. 97787a7269eSachartre * devid - the device ID to store. 97887a7269eSachartre * 97987a7269eSachartre * Return Code: 98087a7269eSachartre * 0 - success 98187a7269eSachartre * EIO - I/O error while trying to access the disk image 98287a7269eSachartre * ENOSPC - disk has no space to store a device id 98387a7269eSachartre */ 98487a7269eSachartre static int 98587a7269eSachartre vd_file_write_devid(vd_t *vd, ddi_devid_t devid) 98687a7269eSachartre { 98787a7269eSachartre struct dk_devid *dkdevid; 98887a7269eSachartre uint_t chksum; 98987a7269eSachartre size_t blk; 99087a7269eSachartre int status; 99187a7269eSachartre 992edcc0754Sachartre if (devid == NULL) { 993edcc0754Sachartre /* nothing to write */ 994edcc0754Sachartre return (0); 995edcc0754Sachartre } 996edcc0754Sachartre 99787a7269eSachartre if ((status = vd_file_get_devid_block(vd, &blk)) != 0) 99887a7269eSachartre return (status); 99987a7269eSachartre 100087a7269eSachartre dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 100187a7269eSachartre 100287a7269eSachartre /* set revision */ 100387a7269eSachartre dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB; 100487a7269eSachartre dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB; 100587a7269eSachartre 100687a7269eSachartre /* copy devid */ 100787a7269eSachartre bcopy(devid, &dkdevid->dkd_devid, ddi_devid_sizeof(devid)); 100887a7269eSachartre 100987a7269eSachartre /* compute checksum */ 101087a7269eSachartre chksum = vd_dkdevid2cksum(dkdevid); 101187a7269eSachartre 101287a7269eSachartre /* set checksum */ 101387a7269eSachartre DKD_FORMCHKSUM(chksum, dkdevid); 101487a7269eSachartre 101587a7269eSachartre /* store the devid */ 101687a7269eSachartre if ((status = vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, 101787a7269eSachartre (caddr_t)dkdevid, blk, DEV_BSIZE)) < 0) { 101887a7269eSachartre PR0("Error writing devid block at %lu", blk); 101987a7269eSachartre status = EIO; 102087a7269eSachartre } else { 102187a7269eSachartre PR1("devid written at block %lu", blk); 102287a7269eSachartre status = 0; 102387a7269eSachartre } 102487a7269eSachartre 102587a7269eSachartre kmem_free(dkdevid, DEV_BSIZE); 102687a7269eSachartre return (status); 102787a7269eSachartre } 102887a7269eSachartre 102987a7269eSachartre /* 103087a7269eSachartre * Function: 103117cadca8Slm66018 * vd_do_scsi_rdwr 103287a7269eSachartre * 103387a7269eSachartre * Description: 103487a7269eSachartre * Read or write to a SCSI disk using an absolute disk offset. 103587a7269eSachartre * 103687a7269eSachartre * Parameters: 103787a7269eSachartre * vd - disk on which the operation is performed. 103887a7269eSachartre * operation - operation to execute: read (VD_OP_BREAD) or 103987a7269eSachartre * write (VD_OP_BWRITE). 104087a7269eSachartre * data - buffer where data are read to or written from. 104187a7269eSachartre * blk - starting block for the operation. 104287a7269eSachartre * len - number of bytes to read or write. 104387a7269eSachartre * 104487a7269eSachartre * Return Code: 104587a7269eSachartre * 0 - success 104687a7269eSachartre * n != 0 - error. 104787a7269eSachartre */ 104887a7269eSachartre static int 104917cadca8Slm66018 vd_do_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len) 105087a7269eSachartre { 105187a7269eSachartre struct uscsi_cmd ucmd; 105287a7269eSachartre union scsi_cdb cdb; 105387a7269eSachartre int nsectors, nblk; 105487a7269eSachartre int max_sectors; 105587a7269eSachartre int status, rval; 105687a7269eSachartre 105787a7269eSachartre ASSERT(!vd->file); 105817cadca8Slm66018 ASSERT(vd->vdisk_block_size > 0); 105987a7269eSachartre 106087a7269eSachartre max_sectors = vd->max_xfer_sz; 106117cadca8Slm66018 nblk = (len / vd->vdisk_block_size); 106287a7269eSachartre 106317cadca8Slm66018 if (len % vd->vdisk_block_size != 0) 106487a7269eSachartre return (EINVAL); 106587a7269eSachartre 106687a7269eSachartre /* 106787a7269eSachartre * Build and execute the uscsi ioctl. We build a group0, group1 106887a7269eSachartre * or group4 command as necessary, since some targets 106987a7269eSachartre * do not support group1 commands. 107087a7269eSachartre */ 107187a7269eSachartre while (nblk) { 107287a7269eSachartre 107387a7269eSachartre bzero(&ucmd, sizeof (ucmd)); 107487a7269eSachartre bzero(&cdb, sizeof (cdb)); 107587a7269eSachartre 107687a7269eSachartre nsectors = (max_sectors < nblk) ? max_sectors : nblk; 107787a7269eSachartre 107817cadca8Slm66018 /* 107917cadca8Slm66018 * Some of the optical drives on sun4v machines are ATAPI 108017cadca8Slm66018 * devices which use Group 1 Read/Write commands so we need 108117cadca8Slm66018 * to explicitly check a flag which is set when a domain 108217cadca8Slm66018 * is bound. 108317cadca8Slm66018 */ 108417cadca8Slm66018 if (blk < (2 << 20) && nsectors <= 0xff && !vd->is_atapi_dev) { 108587a7269eSachartre FORMG0ADDR(&cdb, blk); 108687a7269eSachartre FORMG0COUNT(&cdb, nsectors); 108787a7269eSachartre ucmd.uscsi_cdblen = CDB_GROUP0; 108887a7269eSachartre } else if (blk > 0xffffffff) { 108987a7269eSachartre FORMG4LONGADDR(&cdb, blk); 109087a7269eSachartre FORMG4COUNT(&cdb, nsectors); 109187a7269eSachartre ucmd.uscsi_cdblen = CDB_GROUP4; 109287a7269eSachartre cdb.scc_cmd |= SCMD_GROUP4; 109387a7269eSachartre } else { 109487a7269eSachartre FORMG1ADDR(&cdb, blk); 109587a7269eSachartre FORMG1COUNT(&cdb, nsectors); 109687a7269eSachartre ucmd.uscsi_cdblen = CDB_GROUP1; 109787a7269eSachartre cdb.scc_cmd |= SCMD_GROUP1; 109887a7269eSachartre } 109987a7269eSachartre ucmd.uscsi_cdb = (caddr_t)&cdb; 110087a7269eSachartre ucmd.uscsi_bufaddr = data; 110117cadca8Slm66018 ucmd.uscsi_buflen = nsectors * vd->block_size; 110287a7269eSachartre ucmd.uscsi_timeout = vd_scsi_rdwr_timeout; 110387a7269eSachartre /* 110487a7269eSachartre * Set flags so that the command is isolated from normal 110587a7269eSachartre * commands and no error message is printed. 110687a7269eSachartre */ 110787a7269eSachartre ucmd.uscsi_flags = USCSI_ISOLATE | USCSI_SILENT; 110887a7269eSachartre 110987a7269eSachartre if (operation == VD_OP_BREAD) { 111087a7269eSachartre cdb.scc_cmd |= SCMD_READ; 111187a7269eSachartre ucmd.uscsi_flags |= USCSI_READ; 111287a7269eSachartre } else { 111387a7269eSachartre cdb.scc_cmd |= SCMD_WRITE; 111487a7269eSachartre } 111587a7269eSachartre 111687a7269eSachartre status = ldi_ioctl(vd->ldi_handle[VD_ENTIRE_DISK_SLICE], 1117047ba61eSachartre USCSICMD, (intptr_t)&ucmd, (vd->open_flags | FKIOCTL), 111887a7269eSachartre kcred, &rval); 111987a7269eSachartre 112087a7269eSachartre if (status == 0) 112187a7269eSachartre status = ucmd.uscsi_status; 112287a7269eSachartre 112387a7269eSachartre if (status != 0) 112487a7269eSachartre break; 112587a7269eSachartre 112687a7269eSachartre /* 112787a7269eSachartre * Check if partial DMA breakup is required. If so, reduce 112887a7269eSachartre * the request size by half and retry the last request. 112987a7269eSachartre */ 113087a7269eSachartre if (ucmd.uscsi_resid == ucmd.uscsi_buflen) { 113187a7269eSachartre max_sectors >>= 1; 113287a7269eSachartre if (max_sectors <= 0) { 113387a7269eSachartre status = EIO; 113487a7269eSachartre break; 113587a7269eSachartre } 113687a7269eSachartre continue; 113787a7269eSachartre } 113887a7269eSachartre 113987a7269eSachartre if (ucmd.uscsi_resid != 0) { 114087a7269eSachartre status = EIO; 114187a7269eSachartre break; 114287a7269eSachartre } 114387a7269eSachartre 114487a7269eSachartre blk += nsectors; 114587a7269eSachartre nblk -= nsectors; 114617cadca8Slm66018 data += nsectors * vd->vdisk_block_size; /* SECSIZE */ 114787a7269eSachartre } 114887a7269eSachartre 114987a7269eSachartre return (status); 115087a7269eSachartre } 115187a7269eSachartre 1152205eeb1aSlm66018 /* 115317cadca8Slm66018 * Function: 115417cadca8Slm66018 * vd_scsi_rdwr 115517cadca8Slm66018 * 115617cadca8Slm66018 * Description: 115717cadca8Slm66018 * Wrapper function to read or write to a SCSI disk using an absolute 115817cadca8Slm66018 * disk offset. It checks the blocksize of the underlying device and, 115917cadca8Slm66018 * if necessary, adjusts the buffers accordingly before calling 116017cadca8Slm66018 * vd_do_scsi_rdwr() to do the actual read or write. 116117cadca8Slm66018 * 116217cadca8Slm66018 * Parameters: 116317cadca8Slm66018 * vd - disk on which the operation is performed. 116417cadca8Slm66018 * operation - operation to execute: read (VD_OP_BREAD) or 116517cadca8Slm66018 * write (VD_OP_BWRITE). 116617cadca8Slm66018 * data - buffer where data are read to or written from. 116717cadca8Slm66018 * blk - starting block for the operation. 116817cadca8Slm66018 * len - number of bytes to read or write. 116917cadca8Slm66018 * 117017cadca8Slm66018 * Return Code: 117117cadca8Slm66018 * 0 - success 117217cadca8Slm66018 * n != 0 - error. 117317cadca8Slm66018 */ 117417cadca8Slm66018 static int 117517cadca8Slm66018 vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen) 117617cadca8Slm66018 { 117717cadca8Slm66018 int rv; 117817cadca8Slm66018 117917cadca8Slm66018 size_t pblk; /* physical device block number of data on device */ 118017cadca8Slm66018 size_t delta; /* relative offset between pblk and vblk */ 118117cadca8Slm66018 size_t pnblk; /* number of physical blocks to be read from device */ 118217cadca8Slm66018 size_t plen; /* length of data to be read from physical device */ 118317cadca8Slm66018 char *buf; /* buffer area to fit physical device's block size */ 118417cadca8Slm66018 11852f5224aeSachartre if (vd->block_size == 0) { 11862f5224aeSachartre /* 11872f5224aeSachartre * The block size was not available during the attach, 11882f5224aeSachartre * try to update it now. 11892f5224aeSachartre */ 11902f5224aeSachartre if (vd_setup_mediainfo(vd) != 0) 11912f5224aeSachartre return (EIO); 11922f5224aeSachartre } 11932f5224aeSachartre 119417cadca8Slm66018 /* 119517cadca8Slm66018 * If the vdisk block size and the block size of the underlying device 119617cadca8Slm66018 * match we can skip straight to vd_do_scsi_rdwr(), otherwise we need 119717cadca8Slm66018 * to create a buffer large enough to handle the device's block size 119817cadca8Slm66018 * and adjust the block to be read from and the amount of data to 119917cadca8Slm66018 * read to correspond with the device's block size. 120017cadca8Slm66018 */ 120117cadca8Slm66018 if (vd->vdisk_block_size == vd->block_size) 120217cadca8Slm66018 return (vd_do_scsi_rdwr(vd, operation, data, vblk, vlen)); 120317cadca8Slm66018 120417cadca8Slm66018 if (vd->vdisk_block_size > vd->block_size) 120517cadca8Slm66018 return (EINVAL); 120617cadca8Slm66018 120717cadca8Slm66018 /* 120817cadca8Slm66018 * Writing of physical block sizes larger than the virtual block size 120917cadca8Slm66018 * is not supported. This would be added if/when support for guests 121017cadca8Slm66018 * writing to DVDs is implemented. 121117cadca8Slm66018 */ 121217cadca8Slm66018 if (operation == VD_OP_BWRITE) 121317cadca8Slm66018 return (ENOTSUP); 121417cadca8Slm66018 121517cadca8Slm66018 /* BEGIN CSTYLED */ 121617cadca8Slm66018 /* 121717cadca8Slm66018 * Below is a diagram showing the relationship between the physical 121817cadca8Slm66018 * and virtual blocks. If the virtual blocks marked by 'X' below are 121917cadca8Slm66018 * requested, then the physical blocks denoted by 'Y' are read. 122017cadca8Slm66018 * 122117cadca8Slm66018 * vblk 122217cadca8Slm66018 * | vlen 122317cadca8Slm66018 * |<--------------->| 122417cadca8Slm66018 * v v 122517cadca8Slm66018 * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- virtual disk: 122617cadca8Slm66018 * | | | |XX|XX|XX|XX|XX|XX| | | | | | } block size is 122717cadca8Slm66018 * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- vd->vdisk_block_size 122817cadca8Slm66018 * : : : : 122917cadca8Slm66018 * >:==:< delta : : 123017cadca8Slm66018 * : : : : 123117cadca8Slm66018 * --+-----+-----+-----+-----+-----+-----+-----+-- physical disk: 123217cadca8Slm66018 * | |YY:YY|YYYYY|YYYYY|YY:YY| | | } block size is 123317cadca8Slm66018 * --+-----+-----+-----+-----+-----+-----+-----+-- vd->block_size 123417cadca8Slm66018 * ^ ^ 123517cadca8Slm66018 * |<--------------------->| 123617cadca8Slm66018 * | plen 123717cadca8Slm66018 * pblk 123817cadca8Slm66018 */ 123917cadca8Slm66018 /* END CSTYLED */ 124017cadca8Slm66018 pblk = (vblk * vd->vdisk_block_size) / vd->block_size; 124117cadca8Slm66018 delta = (vblk * vd->vdisk_block_size) - (pblk * vd->block_size); 124217cadca8Slm66018 pnblk = ((delta + vlen - 1) / vd->block_size) + 1; 124317cadca8Slm66018 plen = pnblk * vd->block_size; 124417cadca8Slm66018 124517cadca8Slm66018 PR2("vblk %lx:pblk %lx: vlen %ld:plen %ld", vblk, pblk, vlen, plen); 124617cadca8Slm66018 124717cadca8Slm66018 buf = kmem_zalloc(sizeof (caddr_t) * plen, KM_SLEEP); 124817cadca8Slm66018 rv = vd_do_scsi_rdwr(vd, operation, (caddr_t)buf, pblk, plen); 124917cadca8Slm66018 bcopy(buf + delta, data, vlen); 125017cadca8Slm66018 125117cadca8Slm66018 kmem_free(buf, sizeof (caddr_t) * plen); 125217cadca8Slm66018 125317cadca8Slm66018 return (rv); 125417cadca8Slm66018 } 125517cadca8Slm66018 125617cadca8Slm66018 /* 1257205eeb1aSlm66018 * Return Values 1258205eeb1aSlm66018 * EINPROGRESS - operation was successfully started 1259205eeb1aSlm66018 * EIO - encountered LDC (aka. task error) 1260205eeb1aSlm66018 * 0 - operation completed successfully 1261205eeb1aSlm66018 * 1262205eeb1aSlm66018 * Side Effect 1263205eeb1aSlm66018 * sets request->status = <disk operation status> 1264205eeb1aSlm66018 */ 12651ae08745Sheppo static int 1266d10e4ef2Snarayan vd_start_bio(vd_task_t *task) 12671ae08745Sheppo { 12684bac2208Snarayan int rv, status = 0; 1269d10e4ef2Snarayan vd_t *vd = task->vd; 1270d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 1271d10e4ef2Snarayan struct buf *buf = &task->buf; 12724bac2208Snarayan uint8_t mtype; 12733c96341aSnarayan int slice; 1274047ba61eSachartre char *bufaddr = 0; 1275047ba61eSachartre size_t buflen; 1276d10e4ef2Snarayan 1277d10e4ef2Snarayan ASSERT(vd != NULL); 1278d10e4ef2Snarayan ASSERT(request != NULL); 12793c96341aSnarayan 12803c96341aSnarayan slice = request->slice; 12813c96341aSnarayan 128287a7269eSachartre ASSERT(slice == VD_SLICE_NONE || slice < vd->nslices); 1283d10e4ef2Snarayan ASSERT((request->operation == VD_OP_BREAD) || 1284d10e4ef2Snarayan (request->operation == VD_OP_BWRITE)); 1285d10e4ef2Snarayan 1286205eeb1aSlm66018 if (request->nbytes == 0) { 1287205eeb1aSlm66018 /* no service for trivial requests */ 1288205eeb1aSlm66018 request->status = EINVAL; 1289205eeb1aSlm66018 return (0); 1290205eeb1aSlm66018 } 12911ae08745Sheppo 1292d10e4ef2Snarayan PR1("%s %lu bytes at block %lu", 1293d10e4ef2Snarayan (request->operation == VD_OP_BREAD) ? "Read" : "Write", 1294d10e4ef2Snarayan request->nbytes, request->addr); 12951ae08745Sheppo 1296047ba61eSachartre /* 1297047ba61eSachartre * We have to check the open flags because the functions processing 1298047ba61eSachartre * the read/write request will not do it. 1299047ba61eSachartre */ 1300047ba61eSachartre if (request->operation == VD_OP_BWRITE && !(vd->open_flags & FWRITE)) { 1301047ba61eSachartre PR0("write fails because backend is opened read-only"); 1302047ba61eSachartre request->nbytes = 0; 1303047ba61eSachartre request->status = EROFS; 1304047ba61eSachartre return (0); 1305047ba61eSachartre } 1306d10e4ef2Snarayan 13074bac2208Snarayan mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP; 13084bac2208Snarayan 13094bac2208Snarayan /* Map memory exported by client */ 13104bac2208Snarayan status = ldc_mem_map(task->mhdl, request->cookie, request->ncookies, 13114bac2208Snarayan mtype, (request->operation == VD_OP_BREAD) ? LDC_MEM_W : LDC_MEM_R, 1312047ba61eSachartre &bufaddr, NULL); 13134bac2208Snarayan if (status != 0) { 13143af08d82Slm66018 PR0("ldc_mem_map() returned err %d ", status); 1315205eeb1aSlm66018 return (EIO); 1316d10e4ef2Snarayan } 1317d10e4ef2Snarayan 1318047ba61eSachartre buflen = request->nbytes; 1319047ba61eSachartre 1320047ba61eSachartre status = ldc_mem_acquire(task->mhdl, 0, buflen); 13214bac2208Snarayan if (status != 0) { 13224bac2208Snarayan (void) ldc_mem_unmap(task->mhdl); 13233af08d82Slm66018 PR0("ldc_mem_acquire() returned err %d ", status); 1324205eeb1aSlm66018 return (EIO); 13254bac2208Snarayan } 13264bac2208Snarayan 1327d10e4ef2Snarayan /* Start the block I/O */ 13283c96341aSnarayan if (vd->file) { 1329047ba61eSachartre rv = vd_file_rw(vd, slice, request->operation, bufaddr, 1330690555a1Sachartre request->addr, request->nbytes); 1331690555a1Sachartre if (rv < 0) { 13323c96341aSnarayan request->nbytes = 0; 1333205eeb1aSlm66018 request->status = EIO; 1334690555a1Sachartre } else { 1335690555a1Sachartre request->nbytes = rv; 1336205eeb1aSlm66018 request->status = 0; 13373c96341aSnarayan } 13383c96341aSnarayan } else { 133987a7269eSachartre if (slice == VD_SLICE_NONE) { 134087a7269eSachartre /* 134187a7269eSachartre * This is not a disk image so it is a real disk. We 134287a7269eSachartre * assume that the underlying device driver supports 134387a7269eSachartre * USCSICMD ioctls. This is the case of all SCSI devices 134487a7269eSachartre * (sd, ssd...). 134587a7269eSachartre * 134687a7269eSachartre * In the future if we have non-SCSI disks we would need 134787a7269eSachartre * to invoke the appropriate function to do I/O using an 134817cadca8Slm66018 * absolute disk offset (for example using DIOCTL_RWCMD 134987a7269eSachartre * for IDE disks). 135087a7269eSachartre */ 1351047ba61eSachartre rv = vd_scsi_rdwr(vd, request->operation, bufaddr, 1352047ba61eSachartre request->addr, request->nbytes); 135387a7269eSachartre if (rv != 0) { 135487a7269eSachartre request->nbytes = 0; 1355205eeb1aSlm66018 request->status = EIO; 135687a7269eSachartre } else { 1357205eeb1aSlm66018 request->status = 0; 135887a7269eSachartre } 135987a7269eSachartre } else { 1360047ba61eSachartre bioinit(buf); 1361047ba61eSachartre buf->b_flags = B_BUSY; 1362047ba61eSachartre buf->b_bcount = request->nbytes; 1363047ba61eSachartre buf->b_lblkno = request->addr; 1364047ba61eSachartre buf->b_edev = vd->dev[slice]; 1365047ba61eSachartre buf->b_un.b_addr = bufaddr; 1366047ba61eSachartre buf->b_flags |= (request->operation == VD_OP_BREAD)? 1367047ba61eSachartre B_READ : B_WRITE; 1368047ba61eSachartre 1369205eeb1aSlm66018 request->status = 1370205eeb1aSlm66018 ldi_strategy(vd->ldi_handle[slice], buf); 1371205eeb1aSlm66018 1372205eeb1aSlm66018 /* 1373205eeb1aSlm66018 * This is to indicate to the caller that the request 1374205eeb1aSlm66018 * needs to be finished by vd_complete_bio() by calling 1375205eeb1aSlm66018 * biowait() there and waiting for that to return before 1376205eeb1aSlm66018 * triggering the notification of the vDisk client. 1377205eeb1aSlm66018 * 1378205eeb1aSlm66018 * This is necessary when writing to real disks as 1379205eeb1aSlm66018 * otherwise calls to ldi_strategy() would be serialized 1380205eeb1aSlm66018 * behind the calls to biowait() and performance would 1381205eeb1aSlm66018 * suffer. 1382205eeb1aSlm66018 */ 1383205eeb1aSlm66018 if (request->status == 0) 138487a7269eSachartre return (EINPROGRESS); 1385047ba61eSachartre 1386047ba61eSachartre biofini(buf); 138787a7269eSachartre } 13883c96341aSnarayan } 13893c96341aSnarayan 1390d10e4ef2Snarayan /* Clean up after error */ 1391047ba61eSachartre rv = ldc_mem_release(task->mhdl, 0, buflen); 13924bac2208Snarayan if (rv) { 13933af08d82Slm66018 PR0("ldc_mem_release() returned err %d ", rv); 1394205eeb1aSlm66018 status = EIO; 13954bac2208Snarayan } 13964bac2208Snarayan rv = ldc_mem_unmap(task->mhdl); 13974bac2208Snarayan if (rv) { 1398205eeb1aSlm66018 PR0("ldc_mem_unmap() returned err %d ", rv); 1399205eeb1aSlm66018 status = EIO; 14004bac2208Snarayan } 14014bac2208Snarayan 1402d10e4ef2Snarayan return (status); 1403d10e4ef2Snarayan } 1404d10e4ef2Snarayan 1405205eeb1aSlm66018 /* 1406205eeb1aSlm66018 * This function should only be called from vd_notify to ensure that requests 1407205eeb1aSlm66018 * are responded to in the order that they are received. 1408205eeb1aSlm66018 */ 1409d10e4ef2Snarayan static int 1410d10e4ef2Snarayan send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 1411d10e4ef2Snarayan { 14123af08d82Slm66018 int status; 1413d10e4ef2Snarayan size_t nbytes; 1414d10e4ef2Snarayan 14153af08d82Slm66018 do { 1416d10e4ef2Snarayan nbytes = msglen; 1417d10e4ef2Snarayan status = ldc_write(ldc_handle, msg, &nbytes); 14183af08d82Slm66018 if (status != EWOULDBLOCK) 14193af08d82Slm66018 break; 14203af08d82Slm66018 drv_usecwait(vds_ldc_delay); 14213af08d82Slm66018 } while (status == EWOULDBLOCK); 1422d10e4ef2Snarayan 1423d10e4ef2Snarayan if (status != 0) { 14243af08d82Slm66018 if (status != ECONNRESET) 14253af08d82Slm66018 PR0("ldc_write() returned errno %d", status); 1426d10e4ef2Snarayan return (status); 1427d10e4ef2Snarayan } else if (nbytes != msglen) { 14283af08d82Slm66018 PR0("ldc_write() performed only partial write"); 1429d10e4ef2Snarayan return (EIO); 1430d10e4ef2Snarayan } 1431d10e4ef2Snarayan 1432d10e4ef2Snarayan PR1("SENT %lu bytes", msglen); 1433d10e4ef2Snarayan return (0); 1434d10e4ef2Snarayan } 1435d10e4ef2Snarayan 1436d10e4ef2Snarayan static void 1437d10e4ef2Snarayan vd_need_reset(vd_t *vd, boolean_t reset_ldc) 1438d10e4ef2Snarayan { 1439d10e4ef2Snarayan mutex_enter(&vd->lock); 1440d10e4ef2Snarayan vd->reset_state = B_TRUE; 1441d10e4ef2Snarayan vd->reset_ldc = reset_ldc; 1442d10e4ef2Snarayan mutex_exit(&vd->lock); 1443d10e4ef2Snarayan } 1444d10e4ef2Snarayan 1445d10e4ef2Snarayan /* 1446d10e4ef2Snarayan * Reset the state of the connection with a client, if needed; reset the LDC 1447d10e4ef2Snarayan * transport as well, if needed. This function should only be called from the 14483af08d82Slm66018 * "vd_recv_msg", as it waits for tasks - otherwise a deadlock can occur. 1449d10e4ef2Snarayan */ 1450d10e4ef2Snarayan static void 1451d10e4ef2Snarayan vd_reset_if_needed(vd_t *vd) 1452d10e4ef2Snarayan { 1453d10e4ef2Snarayan int status = 0; 1454d10e4ef2Snarayan 1455d10e4ef2Snarayan mutex_enter(&vd->lock); 1456d10e4ef2Snarayan if (!vd->reset_state) { 1457d10e4ef2Snarayan ASSERT(!vd->reset_ldc); 1458d10e4ef2Snarayan mutex_exit(&vd->lock); 1459d10e4ef2Snarayan return; 1460d10e4ef2Snarayan } 1461d10e4ef2Snarayan mutex_exit(&vd->lock); 1462d10e4ef2Snarayan 1463d10e4ef2Snarayan PR0("Resetting connection state with %s", VD_CLIENT(vd)); 1464d10e4ef2Snarayan 1465d10e4ef2Snarayan /* 1466d10e4ef2Snarayan * Let any asynchronous I/O complete before possibly pulling the rug 1467d10e4ef2Snarayan * out from under it; defer checking vd->reset_ldc, as one of the 1468d10e4ef2Snarayan * asynchronous tasks might set it 1469d10e4ef2Snarayan */ 1470d10e4ef2Snarayan ddi_taskq_wait(vd->completionq); 1471d10e4ef2Snarayan 14723c96341aSnarayan if (vd->file) { 1473da6c28aaSamw status = VOP_FSYNC(vd->file_vnode, FSYNC, kcred, NULL); 14743c96341aSnarayan if (status) { 14753c96341aSnarayan PR0("VOP_FSYNC returned errno %d", status); 14763c96341aSnarayan } 14773c96341aSnarayan } 14783c96341aSnarayan 1479d10e4ef2Snarayan if ((vd->initialized & VD_DRING) && 1480d10e4ef2Snarayan ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 14813af08d82Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 1482d10e4ef2Snarayan 14833af08d82Slm66018 vd_free_dring_task(vd); 14843af08d82Slm66018 14853af08d82Slm66018 /* Free the staging buffer for msgs */ 14863af08d82Slm66018 if (vd->vio_msgp != NULL) { 14873af08d82Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 14883af08d82Slm66018 vd->vio_msgp = NULL; 1489d10e4ef2Snarayan } 1490d10e4ef2Snarayan 14913af08d82Slm66018 /* Free the inband message buffer */ 14923af08d82Slm66018 if (vd->inband_task.msg != NULL) { 14933af08d82Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 14943af08d82Slm66018 vd->inband_task.msg = NULL; 14953af08d82Slm66018 } 1496d10e4ef2Snarayan 1497d10e4ef2Snarayan mutex_enter(&vd->lock); 14983af08d82Slm66018 14993af08d82Slm66018 if (vd->reset_ldc) 15003af08d82Slm66018 PR0("taking down LDC channel"); 1501e1ebb9ecSlm66018 if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0)) 15023af08d82Slm66018 PR0("ldc_down() returned errno %d", status); 1503d10e4ef2Snarayan 15042f5224aeSachartre /* Reset exclusive access rights */ 15052f5224aeSachartre vd_reset_access(vd); 15062f5224aeSachartre 1507d10e4ef2Snarayan vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 1508d10e4ef2Snarayan vd->state = VD_STATE_INIT; 1509d10e4ef2Snarayan vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 1510d10e4ef2Snarayan 15113af08d82Slm66018 /* Allocate the staging buffer */ 15123af08d82Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 15133af08d82Slm66018 15143af08d82Slm66018 PR0("calling ldc_up\n"); 15153af08d82Slm66018 (void) ldc_up(vd->ldc_handle); 15163af08d82Slm66018 1517d10e4ef2Snarayan vd->reset_state = B_FALSE; 1518d10e4ef2Snarayan vd->reset_ldc = B_FALSE; 15193af08d82Slm66018 1520d10e4ef2Snarayan mutex_exit(&vd->lock); 1521d10e4ef2Snarayan } 1522d10e4ef2Snarayan 15233af08d82Slm66018 static void vd_recv_msg(void *arg); 15243af08d82Slm66018 15253af08d82Slm66018 static void 15263af08d82Slm66018 vd_mark_in_reset(vd_t *vd) 15273af08d82Slm66018 { 15283af08d82Slm66018 int status; 15293af08d82Slm66018 15303af08d82Slm66018 PR0("vd_mark_in_reset: marking vd in reset\n"); 15313af08d82Slm66018 15323af08d82Slm66018 vd_need_reset(vd, B_FALSE); 15333af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, DDI_SLEEP); 15343af08d82Slm66018 if (status == DDI_FAILURE) { 15353af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 15363af08d82Slm66018 vd_need_reset(vd, B_TRUE); 15373af08d82Slm66018 return; 15383af08d82Slm66018 } 15393af08d82Slm66018 } 15403af08d82Slm66018 1541d10e4ef2Snarayan static int 15423c96341aSnarayan vd_mark_elem_done(vd_t *vd, int idx, int elem_status, int elem_nbytes) 1543d10e4ef2Snarayan { 1544d10e4ef2Snarayan boolean_t accepted; 1545d10e4ef2Snarayan int status; 1546d10e4ef2Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 1547d10e4ef2Snarayan 15483af08d82Slm66018 if (vd->reset_state) 15493af08d82Slm66018 return (0); 1550d10e4ef2Snarayan 1551d10e4ef2Snarayan /* Acquire the element */ 15523af08d82Slm66018 if (!vd->reset_state && 15533af08d82Slm66018 (status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 15543af08d82Slm66018 if (status == ECONNRESET) { 15553af08d82Slm66018 vd_mark_in_reset(vd); 15563af08d82Slm66018 return (0); 15573af08d82Slm66018 } else { 15583af08d82Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", 15593af08d82Slm66018 status); 1560d10e4ef2Snarayan return (status); 1561d10e4ef2Snarayan } 15623af08d82Slm66018 } 1563d10e4ef2Snarayan 1564d10e4ef2Snarayan /* Set the element's status and mark it done */ 1565d10e4ef2Snarayan accepted = (elem->hdr.dstate == VIO_DESC_ACCEPTED); 1566d10e4ef2Snarayan if (accepted) { 15673c96341aSnarayan elem->payload.nbytes = elem_nbytes; 1568d10e4ef2Snarayan elem->payload.status = elem_status; 1569d10e4ef2Snarayan elem->hdr.dstate = VIO_DESC_DONE; 1570d10e4ef2Snarayan } else { 1571d10e4ef2Snarayan /* Perhaps client timed out waiting for I/O... */ 15723af08d82Slm66018 PR0("element %u no longer \"accepted\"", idx); 1573d10e4ef2Snarayan VD_DUMP_DRING_ELEM(elem); 1574d10e4ef2Snarayan } 1575d10e4ef2Snarayan /* Release the element */ 15763af08d82Slm66018 if (!vd->reset_state && 15773af08d82Slm66018 (status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 15783af08d82Slm66018 if (status == ECONNRESET) { 15793af08d82Slm66018 vd_mark_in_reset(vd); 15803af08d82Slm66018 return (0); 15813af08d82Slm66018 } else { 15823af08d82Slm66018 PR0("ldc_mem_dring_release() returned errno %d", 15833af08d82Slm66018 status); 1584d10e4ef2Snarayan return (status); 1585d10e4ef2Snarayan } 15863af08d82Slm66018 } 1587d10e4ef2Snarayan 1588d10e4ef2Snarayan return (accepted ? 0 : EINVAL); 1589d10e4ef2Snarayan } 1590d10e4ef2Snarayan 1591205eeb1aSlm66018 /* 1592205eeb1aSlm66018 * Return Values 1593205eeb1aSlm66018 * 0 - operation completed successfully 1594205eeb1aSlm66018 * EIO - encountered LDC / task error 1595205eeb1aSlm66018 * 1596205eeb1aSlm66018 * Side Effect 1597205eeb1aSlm66018 * sets request->status = <disk operation status> 1598205eeb1aSlm66018 */ 1599205eeb1aSlm66018 static int 1600205eeb1aSlm66018 vd_complete_bio(vd_task_t *task) 1601d10e4ef2Snarayan { 1602d10e4ef2Snarayan int status = 0; 1603205eeb1aSlm66018 int rv = 0; 1604d10e4ef2Snarayan vd_t *vd = task->vd; 1605d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 1606d10e4ef2Snarayan struct buf *buf = &task->buf; 1607d10e4ef2Snarayan 1608d10e4ef2Snarayan 1609d10e4ef2Snarayan ASSERT(vd != NULL); 1610d10e4ef2Snarayan ASSERT(request != NULL); 1611d10e4ef2Snarayan ASSERT(task->msg != NULL); 1612d10e4ef2Snarayan ASSERT(task->msglen >= sizeof (*task->msg)); 16133c96341aSnarayan ASSERT(!vd->file); 1614205eeb1aSlm66018 ASSERT(request->slice != VD_SLICE_NONE); 1615d10e4ef2Snarayan 1616205eeb1aSlm66018 /* Wait for the I/O to complete [ call to ldi_strategy(9f) ] */ 1617d10e4ef2Snarayan request->status = biowait(buf); 1618d10e4ef2Snarayan 16193c96341aSnarayan /* return back the number of bytes read/written */ 16203c96341aSnarayan request->nbytes = buf->b_bcount - buf->b_resid; 16213c96341aSnarayan 16224bac2208Snarayan /* Release the buffer */ 16233af08d82Slm66018 if (!vd->reset_state) 16244bac2208Snarayan status = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 16254bac2208Snarayan if (status) { 16263af08d82Slm66018 PR0("ldc_mem_release() returned errno %d copying to " 16273af08d82Slm66018 "client", status); 16283af08d82Slm66018 if (status == ECONNRESET) { 16293af08d82Slm66018 vd_mark_in_reset(vd); 16303af08d82Slm66018 } 1631205eeb1aSlm66018 rv = EIO; 16321ae08745Sheppo } 16331ae08745Sheppo 16343af08d82Slm66018 /* Unmap the memory, even if in reset */ 16354bac2208Snarayan status = ldc_mem_unmap(task->mhdl); 16364bac2208Snarayan if (status) { 16373af08d82Slm66018 PR0("ldc_mem_unmap() returned errno %d copying to client", 16384bac2208Snarayan status); 16393af08d82Slm66018 if (status == ECONNRESET) { 16403af08d82Slm66018 vd_mark_in_reset(vd); 16413af08d82Slm66018 } 1642205eeb1aSlm66018 rv = EIO; 16434bac2208Snarayan } 16444bac2208Snarayan 1645d10e4ef2Snarayan biofini(buf); 16461ae08745Sheppo 1647205eeb1aSlm66018 return (rv); 1648205eeb1aSlm66018 } 1649205eeb1aSlm66018 1650205eeb1aSlm66018 /* 1651205eeb1aSlm66018 * Description: 1652205eeb1aSlm66018 * This function is called by the two functions called by a taskq 1653205eeb1aSlm66018 * [ vd_complete_notify() and vd_serial_notify()) ] to send the 1654205eeb1aSlm66018 * message to the client. 1655205eeb1aSlm66018 * 1656205eeb1aSlm66018 * Parameters: 1657205eeb1aSlm66018 * arg - opaque pointer to structure containing task to be completed 1658205eeb1aSlm66018 * 1659205eeb1aSlm66018 * Return Values 1660205eeb1aSlm66018 * None 1661205eeb1aSlm66018 */ 1662205eeb1aSlm66018 static void 1663205eeb1aSlm66018 vd_notify(vd_task_t *task) 1664205eeb1aSlm66018 { 1665205eeb1aSlm66018 int status; 1666205eeb1aSlm66018 1667205eeb1aSlm66018 ASSERT(task != NULL); 1668205eeb1aSlm66018 ASSERT(task->vd != NULL); 1669205eeb1aSlm66018 1670205eeb1aSlm66018 if (task->vd->reset_state) 1671205eeb1aSlm66018 return; 1672205eeb1aSlm66018 1673205eeb1aSlm66018 /* 1674205eeb1aSlm66018 * Send the "ack" or "nack" back to the client; if sending the message 1675205eeb1aSlm66018 * via LDC fails, arrange to reset both the connection state and LDC 1676205eeb1aSlm66018 * itself 1677205eeb1aSlm66018 */ 1678205eeb1aSlm66018 PR2("Sending %s", 1679205eeb1aSlm66018 (task->msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 1680205eeb1aSlm66018 1681205eeb1aSlm66018 status = send_msg(task->vd->ldc_handle, task->msg, task->msglen); 1682205eeb1aSlm66018 switch (status) { 1683205eeb1aSlm66018 case 0: 1684205eeb1aSlm66018 break; 1685205eeb1aSlm66018 case ECONNRESET: 1686205eeb1aSlm66018 vd_mark_in_reset(task->vd); 1687205eeb1aSlm66018 break; 1688205eeb1aSlm66018 default: 1689205eeb1aSlm66018 PR0("initiating full reset"); 1690205eeb1aSlm66018 vd_need_reset(task->vd, B_TRUE); 1691205eeb1aSlm66018 break; 1692205eeb1aSlm66018 } 1693205eeb1aSlm66018 1694205eeb1aSlm66018 DTRACE_PROBE1(task__end, vd_task_t *, task); 1695205eeb1aSlm66018 } 1696205eeb1aSlm66018 1697205eeb1aSlm66018 /* 1698205eeb1aSlm66018 * Description: 1699205eeb1aSlm66018 * Mark the Dring entry as Done and (if necessary) send an ACK/NACK to 1700205eeb1aSlm66018 * the vDisk client 1701205eeb1aSlm66018 * 1702205eeb1aSlm66018 * Parameters: 1703205eeb1aSlm66018 * task - structure containing the request sent from client 1704205eeb1aSlm66018 * 1705205eeb1aSlm66018 * Return Values 1706205eeb1aSlm66018 * None 1707205eeb1aSlm66018 */ 1708205eeb1aSlm66018 static void 1709205eeb1aSlm66018 vd_complete_notify(vd_task_t *task) 1710205eeb1aSlm66018 { 1711205eeb1aSlm66018 int status = 0; 1712205eeb1aSlm66018 vd_t *vd = task->vd; 1713205eeb1aSlm66018 vd_dring_payload_t *request = task->request; 1714205eeb1aSlm66018 1715d10e4ef2Snarayan /* Update the dring element for a dring client */ 1716*f0ca1d9aSsb155480 if (!vd->reset_state && (vd->xfer_mode == VIO_DRING_MODE_V1_0)) { 17173c96341aSnarayan status = vd_mark_elem_done(vd, task->index, 17183c96341aSnarayan request->status, request->nbytes); 17193af08d82Slm66018 if (status == ECONNRESET) 17203af08d82Slm66018 vd_mark_in_reset(vd); 17213af08d82Slm66018 } 17221ae08745Sheppo 1723d10e4ef2Snarayan /* 1724205eeb1aSlm66018 * If a transport error occurred while marking the element done or 1725205eeb1aSlm66018 * previously while executing the task, arrange to "nack" the message 1726205eeb1aSlm66018 * when the final task in the descriptor element range completes 1727d10e4ef2Snarayan */ 1728205eeb1aSlm66018 if ((status != 0) || (task->status != 0)) 1729d10e4ef2Snarayan task->msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 17301ae08745Sheppo 1731d10e4ef2Snarayan /* 1732d10e4ef2Snarayan * Only the final task for a range of elements will respond to and 1733d10e4ef2Snarayan * free the message 1734d10e4ef2Snarayan */ 17353af08d82Slm66018 if (task->type == VD_NONFINAL_RANGE_TASK) { 1736d10e4ef2Snarayan return; 17373af08d82Slm66018 } 17381ae08745Sheppo 1739205eeb1aSlm66018 vd_notify(task); 1740205eeb1aSlm66018 } 1741205eeb1aSlm66018 1742d10e4ef2Snarayan /* 1743205eeb1aSlm66018 * Description: 1744205eeb1aSlm66018 * This is the basic completion function called to handle inband data 1745205eeb1aSlm66018 * requests and handshake messages. All it needs to do is trigger a 1746205eeb1aSlm66018 * message to the client that the request is completed. 1747205eeb1aSlm66018 * 1748205eeb1aSlm66018 * Parameters: 1749205eeb1aSlm66018 * arg - opaque pointer to structure containing task to be completed 1750205eeb1aSlm66018 * 1751205eeb1aSlm66018 * Return Values 1752205eeb1aSlm66018 * None 1753d10e4ef2Snarayan */ 1754205eeb1aSlm66018 static void 1755205eeb1aSlm66018 vd_serial_notify(void *arg) 1756205eeb1aSlm66018 { 1757205eeb1aSlm66018 vd_task_t *task = (vd_task_t *)arg; 1758205eeb1aSlm66018 1759205eeb1aSlm66018 ASSERT(task != NULL); 1760205eeb1aSlm66018 vd_notify(task); 17611ae08745Sheppo } 17621ae08745Sheppo 17632f5224aeSachartre /* ARGSUSED */ 17642f5224aeSachartre static int 17652f5224aeSachartre vd_geom2dk_geom(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 17660a55fbb7Slm66018 { 17670a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); 17682f5224aeSachartre return (0); 17690a55fbb7Slm66018 } 17700a55fbb7Slm66018 17712f5224aeSachartre /* ARGSUSED */ 17722f5224aeSachartre static int 17732f5224aeSachartre vd_vtoc2vtoc(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 17740a55fbb7Slm66018 { 17750a55fbb7Slm66018 VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); 17762f5224aeSachartre return (0); 17770a55fbb7Slm66018 } 17780a55fbb7Slm66018 17790a55fbb7Slm66018 static void 17800a55fbb7Slm66018 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) 17810a55fbb7Slm66018 { 17820a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); 17830a55fbb7Slm66018 } 17840a55fbb7Slm66018 17850a55fbb7Slm66018 static void 17860a55fbb7Slm66018 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) 17870a55fbb7Slm66018 { 17880a55fbb7Slm66018 VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); 17890a55fbb7Slm66018 } 17900a55fbb7Slm66018 17912f5224aeSachartre static int 17922f5224aeSachartre vd_get_efi_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 17934bac2208Snarayan { 17944bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 17954bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 17962f5224aeSachartre size_t data_len; 17972f5224aeSachartre 17982f5224aeSachartre data_len = vd_buf_len - (sizeof (vd_efi_t) - sizeof (uint64_t)); 17992f5224aeSachartre if (vd_efi->length > data_len) 18002f5224aeSachartre return (EINVAL); 18014bac2208Snarayan 18024bac2208Snarayan dk_efi->dki_lba = vd_efi->lba; 18034bac2208Snarayan dk_efi->dki_length = vd_efi->length; 18044bac2208Snarayan dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP); 18052f5224aeSachartre return (0); 18064bac2208Snarayan } 18074bac2208Snarayan 18084bac2208Snarayan static void 18094bac2208Snarayan vd_get_efi_out(void *ioctl_arg, void *vd_buf) 18104bac2208Snarayan { 18114bac2208Snarayan int len; 18124bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 18134bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 18144bac2208Snarayan 18154bac2208Snarayan len = vd_efi->length; 18164bac2208Snarayan DK_EFI2VD_EFI(dk_efi, vd_efi); 18174bac2208Snarayan kmem_free(dk_efi->dki_data, len); 18184bac2208Snarayan } 18194bac2208Snarayan 18202f5224aeSachartre static int 18212f5224aeSachartre vd_set_efi_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 18224bac2208Snarayan { 18234bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 18244bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 18252f5224aeSachartre size_t data_len; 18262f5224aeSachartre 18272f5224aeSachartre data_len = vd_buf_len - (sizeof (vd_efi_t) - sizeof (uint64_t)); 18282f5224aeSachartre if (vd_efi->length > data_len) 18292f5224aeSachartre return (EINVAL); 18304bac2208Snarayan 18314bac2208Snarayan dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP); 18324bac2208Snarayan VD_EFI2DK_EFI(vd_efi, dk_efi); 18332f5224aeSachartre return (0); 18344bac2208Snarayan } 18354bac2208Snarayan 18364bac2208Snarayan static void 18374bac2208Snarayan vd_set_efi_out(void *ioctl_arg, void *vd_buf) 18384bac2208Snarayan { 18394bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 18404bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 18414bac2208Snarayan 18424bac2208Snarayan kmem_free(dk_efi->dki_data, vd_efi->length); 18434bac2208Snarayan } 18444bac2208Snarayan 18452f5224aeSachartre static int 18462f5224aeSachartre vd_scsicmd_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) 18472f5224aeSachartre { 18482f5224aeSachartre size_t vd_scsi_len; 18492f5224aeSachartre vd_scsi_t *vd_scsi = (vd_scsi_t *)vd_buf; 18502f5224aeSachartre struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl_arg; 18512f5224aeSachartre 18522f5224aeSachartre /* check buffer size */ 18532f5224aeSachartre vd_scsi_len = VD_SCSI_SIZE; 18542f5224aeSachartre vd_scsi_len += P2ROUNDUP(vd_scsi->cdb_len, sizeof (uint64_t)); 18552f5224aeSachartre vd_scsi_len += P2ROUNDUP(vd_scsi->sense_len, sizeof (uint64_t)); 18562f5224aeSachartre vd_scsi_len += P2ROUNDUP(vd_scsi->datain_len, sizeof (uint64_t)); 18572f5224aeSachartre vd_scsi_len += P2ROUNDUP(vd_scsi->dataout_len, sizeof (uint64_t)); 18582f5224aeSachartre 18592f5224aeSachartre ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); 18602f5224aeSachartre 18612f5224aeSachartre if (vd_buf_len < vd_scsi_len) 18622f5224aeSachartre return (EINVAL); 18632f5224aeSachartre 18642f5224aeSachartre /* set flags */ 18652f5224aeSachartre uscsi->uscsi_flags = vd_scsi_debug; 18662f5224aeSachartre 18672f5224aeSachartre if (vd_scsi->options & VD_SCSI_OPT_NORETRY) { 18682f5224aeSachartre uscsi->uscsi_flags |= USCSI_ISOLATE; 18692f5224aeSachartre uscsi->uscsi_flags |= USCSI_DIAGNOSE; 18702f5224aeSachartre } 18712f5224aeSachartre 18722f5224aeSachartre /* task attribute */ 18732f5224aeSachartre switch (vd_scsi->task_attribute) { 18742f5224aeSachartre case VD_SCSI_TASK_ACA: 18752f5224aeSachartre uscsi->uscsi_flags |= USCSI_HEAD; 18762f5224aeSachartre break; 18772f5224aeSachartre case VD_SCSI_TASK_HQUEUE: 18782f5224aeSachartre uscsi->uscsi_flags |= USCSI_HTAG; 18792f5224aeSachartre break; 18802f5224aeSachartre case VD_SCSI_TASK_ORDERED: 18812f5224aeSachartre uscsi->uscsi_flags |= USCSI_OTAG; 18822f5224aeSachartre break; 18832f5224aeSachartre default: 18842f5224aeSachartre uscsi->uscsi_flags |= USCSI_NOTAG; 18852f5224aeSachartre break; 18862f5224aeSachartre } 18872f5224aeSachartre 18882f5224aeSachartre /* timeout */ 18892f5224aeSachartre uscsi->uscsi_timeout = vd_scsi->timeout; 18902f5224aeSachartre 18912f5224aeSachartre /* cdb data */ 18922f5224aeSachartre uscsi->uscsi_cdb = (caddr_t)VD_SCSI_DATA_CDB(vd_scsi); 18932f5224aeSachartre uscsi->uscsi_cdblen = vd_scsi->cdb_len; 18942f5224aeSachartre 18952f5224aeSachartre /* sense buffer */ 18962f5224aeSachartre if (vd_scsi->sense_len != 0) { 18972f5224aeSachartre uscsi->uscsi_flags |= USCSI_RQENABLE; 18982f5224aeSachartre uscsi->uscsi_rqbuf = (caddr_t)VD_SCSI_DATA_SENSE(vd_scsi); 18992f5224aeSachartre uscsi->uscsi_rqlen = vd_scsi->sense_len; 19002f5224aeSachartre } 19012f5224aeSachartre 19022f5224aeSachartre if (vd_scsi->datain_len != 0 && vd_scsi->dataout_len != 0) { 19032f5224aeSachartre /* uscsi does not support read/write request */ 19042f5224aeSachartre return (EINVAL); 19052f5224aeSachartre } 19062f5224aeSachartre 19072f5224aeSachartre /* request data-in */ 19082f5224aeSachartre if (vd_scsi->datain_len != 0) { 19092f5224aeSachartre uscsi->uscsi_flags |= USCSI_READ; 19102f5224aeSachartre uscsi->uscsi_buflen = vd_scsi->datain_len; 19112f5224aeSachartre uscsi->uscsi_bufaddr = (char *)VD_SCSI_DATA_IN(vd_scsi); 19122f5224aeSachartre } 19132f5224aeSachartre 19142f5224aeSachartre /* request data-out */ 19152f5224aeSachartre if (vd_scsi->dataout_len != 0) { 19162f5224aeSachartre uscsi->uscsi_buflen = vd_scsi->dataout_len; 19172f5224aeSachartre uscsi->uscsi_bufaddr = (char *)VD_SCSI_DATA_OUT(vd_scsi); 19182f5224aeSachartre } 19192f5224aeSachartre 19202f5224aeSachartre return (0); 19212f5224aeSachartre } 19222f5224aeSachartre 19232f5224aeSachartre static void 19242f5224aeSachartre vd_scsicmd_out(void *ioctl_arg, void *vd_buf) 19252f5224aeSachartre { 19262f5224aeSachartre vd_scsi_t *vd_scsi = (vd_scsi_t *)vd_buf; 19272f5224aeSachartre struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl_arg; 19282f5224aeSachartre 19292f5224aeSachartre /* output fields */ 19302f5224aeSachartre vd_scsi->cmd_status = uscsi->uscsi_status; 19312f5224aeSachartre 19322f5224aeSachartre /* sense data */ 19332f5224aeSachartre if ((uscsi->uscsi_flags & USCSI_RQENABLE) && 19342f5224aeSachartre (uscsi->uscsi_status == STATUS_CHECK || 19352f5224aeSachartre uscsi->uscsi_status == STATUS_TERMINATED)) { 19362f5224aeSachartre vd_scsi->sense_status = uscsi->uscsi_rqstatus; 19372f5224aeSachartre if (uscsi->uscsi_rqstatus == STATUS_GOOD) 19382f5224aeSachartre vd_scsi->sense_len -= uscsi->uscsi_resid; 19392f5224aeSachartre else 19402f5224aeSachartre vd_scsi->sense_len = 0; 19412f5224aeSachartre } else { 19422f5224aeSachartre vd_scsi->sense_len = 0; 19432f5224aeSachartre } 19442f5224aeSachartre 19452f5224aeSachartre if (uscsi->uscsi_status != STATUS_GOOD) { 19462f5224aeSachartre vd_scsi->dataout_len = 0; 19472f5224aeSachartre vd_scsi->datain_len = 0; 19482f5224aeSachartre return; 19492f5224aeSachartre } 19502f5224aeSachartre 19512f5224aeSachartre if (uscsi->uscsi_flags & USCSI_READ) { 19522f5224aeSachartre /* request data (read) */ 19532f5224aeSachartre vd_scsi->datain_len -= uscsi->uscsi_resid; 19542f5224aeSachartre vd_scsi->dataout_len = 0; 19552f5224aeSachartre } else { 19562f5224aeSachartre /* request data (write) */ 19572f5224aeSachartre vd_scsi->datain_len = 0; 19582f5224aeSachartre vd_scsi->dataout_len -= uscsi->uscsi_resid; 19592f5224aeSachartre } 19602f5224aeSachartre } 19612f5224aeSachartre 1962690555a1Sachartre static ushort_t 19633c96341aSnarayan vd_lbl2cksum(struct dk_label *label) 19643c96341aSnarayan { 19653c96341aSnarayan int count; 1966690555a1Sachartre ushort_t sum, *sp; 19673c96341aSnarayan 19683c96341aSnarayan count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 1969690555a1Sachartre sp = (ushort_t *)label; 19703c96341aSnarayan sum = 0; 19713c96341aSnarayan while (count--) { 19723c96341aSnarayan sum ^= *sp++; 19733c96341aSnarayan } 19743c96341aSnarayan 19753c96341aSnarayan return (sum); 19763c96341aSnarayan } 19773c96341aSnarayan 197887a7269eSachartre /* 197987a7269eSachartre * Handle ioctls to a disk slice. 1980205eeb1aSlm66018 * 1981205eeb1aSlm66018 * Return Values 1982205eeb1aSlm66018 * 0 - Indicates that there are no errors in disk operations 1983205eeb1aSlm66018 * ENOTSUP - Unknown disk label type or unsupported DKIO ioctl 1984205eeb1aSlm66018 * EINVAL - Not enough room to copy the EFI label 1985205eeb1aSlm66018 * 198687a7269eSachartre */ 19871ae08745Sheppo static int 19880a55fbb7Slm66018 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 19891ae08745Sheppo { 19904bac2208Snarayan dk_efi_t *dk_ioc; 1991edcc0754Sachartre int rval; 1992edcc0754Sachartre 1993edcc0754Sachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 1994edcc0754Sachartre 1995edcc0754Sachartre if (cmd == DKIOCFLUSHWRITECACHE) { 1996edcc0754Sachartre if (vd->file) { 1997edcc0754Sachartre return (VOP_FSYNC(vd->file_vnode, FSYNC, kcred, NULL)); 1998edcc0754Sachartre } else { 1999edcc0754Sachartre return (ldi_ioctl(vd->ldi_handle[0], cmd, 2000edcc0754Sachartre (intptr_t)ioctl_arg, vd->open_flags | FKIOCTL, 2001edcc0754Sachartre kcred, &rval)); 2002edcc0754Sachartre } 2003edcc0754Sachartre } 20044bac2208Snarayan 20054bac2208Snarayan switch (vd->vdisk_label) { 20064bac2208Snarayan 2007edcc0754Sachartre /* ioctls for a single slice disk with a VTOC label */ 20084bac2208Snarayan case VD_DISK_LABEL_VTOC: 20094bac2208Snarayan 20101ae08745Sheppo switch (cmd) { 20111ae08745Sheppo case DKIOCGGEOM: 20120a55fbb7Slm66018 ASSERT(ioctl_arg != NULL); 20130a55fbb7Slm66018 bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); 20141ae08745Sheppo return (0); 20151ae08745Sheppo case DKIOCGVTOC: 20160a55fbb7Slm66018 ASSERT(ioctl_arg != NULL); 20170a55fbb7Slm66018 bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); 20181ae08745Sheppo return (0); 201987a7269eSachartre default: 20203c96341aSnarayan return (ENOTSUP); 202187a7269eSachartre } 202287a7269eSachartre 2023edcc0754Sachartre /* ioctls for a single slice disk with an EFI label */ 202487a7269eSachartre case VD_DISK_LABEL_EFI: 202587a7269eSachartre 202687a7269eSachartre switch (cmd) { 202787a7269eSachartre case DKIOCGETEFI: 20283c96341aSnarayan ASSERT(ioctl_arg != NULL); 202987a7269eSachartre dk_ioc = (dk_efi_t *)ioctl_arg; 2030edcc0754Sachartre 2031edcc0754Sachartre /* 2032edcc0754Sachartre * For a single slice disk with an EFI label, we define 2033edcc0754Sachartre * a fake EFI label with the GPT at LBA 1 and one GPE 2034edcc0754Sachartre * at LBA 2. So we return the GPT or the GPE depending 2035edcc0754Sachartre * on which LBA is requested. 2036edcc0754Sachartre */ 2037edcc0754Sachartre if (dk_ioc->dki_lba == 1) { 2038edcc0754Sachartre 2039edcc0754Sachartre /* return the EFI GPT */ 2040edcc0754Sachartre if (dk_ioc->dki_length < sizeof (efi_gpt_t)) 204187a7269eSachartre return (EINVAL); 2042edcc0754Sachartre 2043edcc0754Sachartre bcopy(&vd->efi_gpt, dk_ioc->dki_data, 2044edcc0754Sachartre sizeof (efi_gpt_t)); 2045edcc0754Sachartre 2046edcc0754Sachartre /* also return the GPE if possible */ 2047edcc0754Sachartre if (dk_ioc->dki_length >= sizeof (efi_gpt_t) + 2048edcc0754Sachartre sizeof (efi_gpe_t)) { 2049edcc0754Sachartre bcopy(&vd->efi_gpe, dk_ioc->dki_data + 2050edcc0754Sachartre 1, sizeof (efi_gpe_t)); 2051edcc0754Sachartre } 2052edcc0754Sachartre 2053edcc0754Sachartre } else if (dk_ioc->dki_lba == 2) { 2054edcc0754Sachartre 2055edcc0754Sachartre /* return the EFI GPE */ 2056edcc0754Sachartre if (dk_ioc->dki_length < sizeof (efi_gpe_t)) 2057edcc0754Sachartre return (EINVAL); 2058edcc0754Sachartre 2059edcc0754Sachartre bcopy(&vd->efi_gpe, dk_ioc->dki_data, 2060edcc0754Sachartre sizeof (efi_gpe_t)); 2061edcc0754Sachartre 2062edcc0754Sachartre } else { 2063edcc0754Sachartre return (EINVAL); 2064edcc0754Sachartre } 2065edcc0754Sachartre 206687a7269eSachartre return (0); 206787a7269eSachartre default: 206887a7269eSachartre return (ENOTSUP); 206987a7269eSachartre } 207087a7269eSachartre 207187a7269eSachartre default: 2072205eeb1aSlm66018 /* Unknown disk label type */ 207387a7269eSachartre return (ENOTSUP); 207487a7269eSachartre } 207587a7269eSachartre } 207687a7269eSachartre 2077edcc0754Sachartre static int 2078edcc0754Sachartre vds_efi_alloc_and_read(vd_t *vd, efi_gpt_t **gpt, efi_gpe_t **gpe) 2079edcc0754Sachartre { 2080edcc0754Sachartre vd_efi_dev_t edev; 2081edcc0754Sachartre int status; 2082edcc0754Sachartre 2083edcc0754Sachartre VD_EFI_DEV_SET(edev, vd, (vd_efi_ioctl_func)vd_backend_ioctl); 2084edcc0754Sachartre 2085edcc0754Sachartre status = vd_efi_alloc_and_read(&edev, gpt, gpe); 2086edcc0754Sachartre 2087edcc0754Sachartre return (status); 2088edcc0754Sachartre } 2089edcc0754Sachartre 2090edcc0754Sachartre static void 2091edcc0754Sachartre vds_efi_free(vd_t *vd, efi_gpt_t *gpt, efi_gpe_t *gpe) 2092edcc0754Sachartre { 2093edcc0754Sachartre vd_efi_dev_t edev; 2094edcc0754Sachartre 2095edcc0754Sachartre VD_EFI_DEV_SET(edev, vd, (vd_efi_ioctl_func)vd_backend_ioctl); 2096edcc0754Sachartre 2097edcc0754Sachartre vd_efi_free(&edev, gpt, gpe); 2098edcc0754Sachartre } 2099edcc0754Sachartre 2100edcc0754Sachartre static int 2101edcc0754Sachartre vd_file_validate_efi(vd_t *vd) 2102edcc0754Sachartre { 2103edcc0754Sachartre efi_gpt_t *gpt; 2104edcc0754Sachartre efi_gpe_t *gpe; 2105edcc0754Sachartre int i, nparts, status; 2106edcc0754Sachartre struct uuid efi_reserved = EFI_RESERVED; 2107edcc0754Sachartre 2108edcc0754Sachartre if ((status = vds_efi_alloc_and_read(vd, &gpt, &gpe)) != 0) 2109edcc0754Sachartre return (status); 2110edcc0754Sachartre 2111edcc0754Sachartre bzero(&vd->vtoc, sizeof (struct vtoc)); 2112edcc0754Sachartre bzero(&vd->dk_geom, sizeof (struct dk_geom)); 2113edcc0754Sachartre bzero(vd->slices, sizeof (vd_slice_t) * VD_MAXPART); 2114edcc0754Sachartre 2115edcc0754Sachartre vd->efi_reserved = -1; 2116edcc0754Sachartre 2117edcc0754Sachartre nparts = gpt->efi_gpt_NumberOfPartitionEntries; 2118edcc0754Sachartre 2119edcc0754Sachartre for (i = 0; i < nparts && i < VD_MAXPART; i++) { 2120edcc0754Sachartre 2121edcc0754Sachartre if (gpe[i].efi_gpe_StartingLBA == 0 || 2122edcc0754Sachartre gpe[i].efi_gpe_EndingLBA == 0) { 2123edcc0754Sachartre continue; 2124edcc0754Sachartre } 2125edcc0754Sachartre 2126edcc0754Sachartre vd->slices[i].start = gpe[i].efi_gpe_StartingLBA; 2127edcc0754Sachartre vd->slices[i].nblocks = gpe[i].efi_gpe_EndingLBA - 2128edcc0754Sachartre gpe[i].efi_gpe_StartingLBA + 1; 2129edcc0754Sachartre 2130edcc0754Sachartre if (bcmp(&gpe[i].efi_gpe_PartitionTypeGUID, &efi_reserved, 2131edcc0754Sachartre sizeof (struct uuid)) == 0) 2132edcc0754Sachartre vd->efi_reserved = i; 2133edcc0754Sachartre 2134edcc0754Sachartre } 2135edcc0754Sachartre 2136edcc0754Sachartre ASSERT(vd->vdisk_size != 0); 2137edcc0754Sachartre vd->slices[VD_EFI_WD_SLICE].start = 0; 2138edcc0754Sachartre vd->slices[VD_EFI_WD_SLICE].nblocks = vd->vdisk_size; 2139edcc0754Sachartre 2140edcc0754Sachartre vds_efi_free(vd, gpt, gpe); 2141edcc0754Sachartre 2142edcc0754Sachartre return (status); 2143edcc0754Sachartre } 2144edcc0754Sachartre 214587a7269eSachartre /* 214678fcd0a1Sachartre * Function: 214778fcd0a1Sachartre * vd_file_validate_geometry 2148205eeb1aSlm66018 * 214978fcd0a1Sachartre * Description: 215078fcd0a1Sachartre * Read the label and validate the geometry of a disk image. The driver 215178fcd0a1Sachartre * label, vtoc and geometry information are updated according to the 215278fcd0a1Sachartre * label read from the disk image. 215378fcd0a1Sachartre * 215478fcd0a1Sachartre * If no valid label is found, the label is set to unknown and the 215578fcd0a1Sachartre * function returns EINVAL, but a default vtoc and geometry are provided 2156edcc0754Sachartre * to the driver. If an EFI label is found, ENOTSUP is returned. 215778fcd0a1Sachartre * 215878fcd0a1Sachartre * Parameters: 215978fcd0a1Sachartre * vd - disk on which the operation is performed. 216078fcd0a1Sachartre * 216178fcd0a1Sachartre * Return Code: 216278fcd0a1Sachartre * 0 - success. 216378fcd0a1Sachartre * EIO - error reading the label from the disk image. 216478fcd0a1Sachartre * EINVAL - unknown disk label. 2165edcc0754Sachartre * ENOTSUP - geometry not applicable (EFI label). 216687a7269eSachartre */ 216787a7269eSachartre static int 216878fcd0a1Sachartre vd_file_validate_geometry(vd_t *vd) 216987a7269eSachartre { 217087a7269eSachartre struct dk_label label; 217178fcd0a1Sachartre struct dk_geom *geom = &vd->dk_geom; 217278fcd0a1Sachartre struct vtoc *vtoc = &vd->vtoc; 217378fcd0a1Sachartre int i; 217478fcd0a1Sachartre int status = 0; 217587a7269eSachartre 217687a7269eSachartre ASSERT(vd->file); 2177edcc0754Sachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); 217887a7269eSachartre 217987a7269eSachartre if (VD_FILE_LABEL_READ(vd, &label) < 0) 218087a7269eSachartre return (EIO); 218187a7269eSachartre 218287a7269eSachartre if (label.dkl_magic != DKL_MAGIC || 218378fcd0a1Sachartre label.dkl_cksum != vd_lbl2cksum(&label) || 218478fcd0a1Sachartre label.dkl_vtoc.v_sanity != VTOC_SANE || 218578fcd0a1Sachartre label.dkl_vtoc.v_nparts != V_NUMPAR) { 2186edcc0754Sachartre 2187edcc0754Sachartre if (vd_file_validate_efi(vd) == 0) { 2188edcc0754Sachartre vd->vdisk_label = VD_DISK_LABEL_EFI; 2189edcc0754Sachartre return (ENOTSUP); 2190edcc0754Sachartre } 2191edcc0754Sachartre 219278fcd0a1Sachartre vd->vdisk_label = VD_DISK_LABEL_UNK; 219378fcd0a1Sachartre vd_file_build_default_label(vd, &label); 219478fcd0a1Sachartre status = EINVAL; 219578fcd0a1Sachartre } else { 219678fcd0a1Sachartre vd->vdisk_label = VD_DISK_LABEL_VTOC; 219778fcd0a1Sachartre } 219887a7269eSachartre 219978fcd0a1Sachartre /* Update the driver geometry */ 220087a7269eSachartre bzero(geom, sizeof (struct dk_geom)); 220178fcd0a1Sachartre 220287a7269eSachartre geom->dkg_ncyl = label.dkl_ncyl; 220387a7269eSachartre geom->dkg_acyl = label.dkl_acyl; 220487a7269eSachartre geom->dkg_nhead = label.dkl_nhead; 220587a7269eSachartre geom->dkg_nsect = label.dkl_nsect; 220687a7269eSachartre geom->dkg_intrlv = label.dkl_intrlv; 220787a7269eSachartre geom->dkg_apc = label.dkl_apc; 220887a7269eSachartre geom->dkg_rpm = label.dkl_rpm; 220987a7269eSachartre geom->dkg_pcyl = label.dkl_pcyl; 221087a7269eSachartre geom->dkg_write_reinstruct = label.dkl_write_reinstruct; 221187a7269eSachartre geom->dkg_read_reinstruct = label.dkl_read_reinstruct; 221287a7269eSachartre 221378fcd0a1Sachartre /* Update the driver vtoc */ 221487a7269eSachartre bzero(vtoc, sizeof (struct vtoc)); 221587a7269eSachartre 221687a7269eSachartre vtoc->v_sanity = label.dkl_vtoc.v_sanity; 221787a7269eSachartre vtoc->v_version = label.dkl_vtoc.v_version; 221887a7269eSachartre vtoc->v_sectorsz = DEV_BSIZE; 221987a7269eSachartre vtoc->v_nparts = label.dkl_vtoc.v_nparts; 222087a7269eSachartre 222187a7269eSachartre for (i = 0; i < vtoc->v_nparts; i++) { 222287a7269eSachartre vtoc->v_part[i].p_tag = 222387a7269eSachartre label.dkl_vtoc.v_part[i].p_tag; 222487a7269eSachartre vtoc->v_part[i].p_flag = 222587a7269eSachartre label.dkl_vtoc.v_part[i].p_flag; 222687a7269eSachartre vtoc->v_part[i].p_start = 222787a7269eSachartre label.dkl_map[i].dkl_cylno * 222887a7269eSachartre (label.dkl_nhead * label.dkl_nsect); 222987a7269eSachartre vtoc->v_part[i].p_size = label.dkl_map[i].dkl_nblk; 223087a7269eSachartre vtoc->timestamp[i] = 223187a7269eSachartre label.dkl_vtoc.v_timestamp[i]; 223287a7269eSachartre } 223387a7269eSachartre /* 223487a7269eSachartre * The bootinfo array can not be copied with bcopy() because 223587a7269eSachartre * elements are of type long in vtoc (so 64-bit) and of type 223687a7269eSachartre * int in dk_vtoc (so 32-bit). 223787a7269eSachartre */ 223887a7269eSachartre vtoc->v_bootinfo[0] = label.dkl_vtoc.v_bootinfo[0]; 223987a7269eSachartre vtoc->v_bootinfo[1] = label.dkl_vtoc.v_bootinfo[1]; 224087a7269eSachartre vtoc->v_bootinfo[2] = label.dkl_vtoc.v_bootinfo[2]; 224187a7269eSachartre bcopy(label.dkl_asciilabel, vtoc->v_asciilabel, 224287a7269eSachartre LEN_DKL_ASCII); 224387a7269eSachartre bcopy(label.dkl_vtoc.v_volume, vtoc->v_volume, 224487a7269eSachartre LEN_DKL_VVOL); 224587a7269eSachartre 2246edcc0754Sachartre /* Update logical partitions */ 2247edcc0754Sachartre bzero(vd->slices, sizeof (vd_slice_t) * VD_MAXPART); 2248edcc0754Sachartre if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 2249edcc0754Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 2250edcc0754Sachartre vd->slices[i].start = vtoc->v_part[i].p_start; 2251edcc0754Sachartre vd->slices[i].nblocks = vtoc->v_part[i].p_size; 2252edcc0754Sachartre } 2253edcc0754Sachartre } 2254edcc0754Sachartre 225578fcd0a1Sachartre return (status); 225678fcd0a1Sachartre } 225778fcd0a1Sachartre 225878fcd0a1Sachartre /* 225978fcd0a1Sachartre * Handle ioctls to a disk image (file-based). 226078fcd0a1Sachartre * 226178fcd0a1Sachartre * Return Values 226278fcd0a1Sachartre * 0 - Indicates that there are no errors 226378fcd0a1Sachartre * != 0 - Disk operation returned an error 226478fcd0a1Sachartre */ 226578fcd0a1Sachartre static int 226678fcd0a1Sachartre vd_do_file_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 226778fcd0a1Sachartre { 226878fcd0a1Sachartre struct dk_label label; 226978fcd0a1Sachartre struct dk_geom *geom; 227078fcd0a1Sachartre struct vtoc *vtoc; 2271edcc0754Sachartre dk_efi_t *efi; 227278fcd0a1Sachartre int i, rc; 227378fcd0a1Sachartre 227478fcd0a1Sachartre ASSERT(vd->file); 2275edcc0754Sachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); 227678fcd0a1Sachartre 227778fcd0a1Sachartre switch (cmd) { 227878fcd0a1Sachartre 227978fcd0a1Sachartre case DKIOCGGEOM: 228078fcd0a1Sachartre ASSERT(ioctl_arg != NULL); 228178fcd0a1Sachartre geom = (struct dk_geom *)ioctl_arg; 228278fcd0a1Sachartre 228378fcd0a1Sachartre rc = vd_file_validate_geometry(vd); 2284edcc0754Sachartre if (rc != 0 && rc != EINVAL) 228578fcd0a1Sachartre return (rc); 228678fcd0a1Sachartre bcopy(&vd->dk_geom, geom, sizeof (struct dk_geom)); 228778fcd0a1Sachartre return (0); 228878fcd0a1Sachartre 228978fcd0a1Sachartre case DKIOCGVTOC: 229078fcd0a1Sachartre ASSERT(ioctl_arg != NULL); 229178fcd0a1Sachartre vtoc = (struct vtoc *)ioctl_arg; 229278fcd0a1Sachartre 229378fcd0a1Sachartre rc = vd_file_validate_geometry(vd); 2294edcc0754Sachartre if (rc != 0 && rc != EINVAL) 229578fcd0a1Sachartre return (rc); 229678fcd0a1Sachartre bcopy(&vd->vtoc, vtoc, sizeof (struct vtoc)); 229787a7269eSachartre return (0); 229887a7269eSachartre 229987a7269eSachartre case DKIOCSGEOM: 230087a7269eSachartre ASSERT(ioctl_arg != NULL); 230187a7269eSachartre geom = (struct dk_geom *)ioctl_arg; 230287a7269eSachartre 230387a7269eSachartre if (geom->dkg_nhead == 0 || geom->dkg_nsect == 0) 230487a7269eSachartre return (EINVAL); 230587a7269eSachartre 230687a7269eSachartre /* 230787a7269eSachartre * The current device geometry is not updated, just the driver 230887a7269eSachartre * "notion" of it. The device geometry will be effectively 230987a7269eSachartre * updated when a label is written to the device during a next 231087a7269eSachartre * DKIOCSVTOC. 231187a7269eSachartre */ 231287a7269eSachartre bcopy(ioctl_arg, &vd->dk_geom, sizeof (vd->dk_geom)); 231387a7269eSachartre return (0); 231487a7269eSachartre 231587a7269eSachartre case DKIOCSVTOC: 231687a7269eSachartre ASSERT(ioctl_arg != NULL); 231787a7269eSachartre ASSERT(vd->dk_geom.dkg_nhead != 0 && 231887a7269eSachartre vd->dk_geom.dkg_nsect != 0); 2319690555a1Sachartre vtoc = (struct vtoc *)ioctl_arg; 2320690555a1Sachartre 2321690555a1Sachartre if (vtoc->v_sanity != VTOC_SANE || 2322690555a1Sachartre vtoc->v_sectorsz != DEV_BSIZE || 2323690555a1Sachartre vtoc->v_nparts != V_NUMPAR) 2324690555a1Sachartre return (EINVAL); 2325690555a1Sachartre 2326690555a1Sachartre bzero(&label, sizeof (label)); 2327690555a1Sachartre label.dkl_ncyl = vd->dk_geom.dkg_ncyl; 2328690555a1Sachartre label.dkl_acyl = vd->dk_geom.dkg_acyl; 2329690555a1Sachartre label.dkl_pcyl = vd->dk_geom.dkg_pcyl; 2330690555a1Sachartre label.dkl_nhead = vd->dk_geom.dkg_nhead; 2331690555a1Sachartre label.dkl_nsect = vd->dk_geom.dkg_nsect; 2332690555a1Sachartre label.dkl_intrlv = vd->dk_geom.dkg_intrlv; 2333690555a1Sachartre label.dkl_apc = vd->dk_geom.dkg_apc; 2334690555a1Sachartre label.dkl_rpm = vd->dk_geom.dkg_rpm; 233587a7269eSachartre label.dkl_write_reinstruct = vd->dk_geom.dkg_write_reinstruct; 233687a7269eSachartre label.dkl_read_reinstruct = vd->dk_geom.dkg_read_reinstruct; 2337690555a1Sachartre 233887a7269eSachartre label.dkl_vtoc.v_nparts = V_NUMPAR; 233987a7269eSachartre label.dkl_vtoc.v_sanity = VTOC_SANE; 2340690555a1Sachartre label.dkl_vtoc.v_version = vtoc->v_version; 234187a7269eSachartre for (i = 0; i < V_NUMPAR; i++) { 2342690555a1Sachartre label.dkl_vtoc.v_timestamp[i] = 2343690555a1Sachartre vtoc->timestamp[i]; 2344690555a1Sachartre label.dkl_vtoc.v_part[i].p_tag = 2345690555a1Sachartre vtoc->v_part[i].p_tag; 2346690555a1Sachartre label.dkl_vtoc.v_part[i].p_flag = 2347690555a1Sachartre vtoc->v_part[i].p_flag; 2348690555a1Sachartre label.dkl_map[i].dkl_cylno = 2349690555a1Sachartre vtoc->v_part[i].p_start / 2350690555a1Sachartre (label.dkl_nhead * label.dkl_nsect); 2351690555a1Sachartre label.dkl_map[i].dkl_nblk = 2352690555a1Sachartre vtoc->v_part[i].p_size; 23533c96341aSnarayan } 235487a7269eSachartre /* 235587a7269eSachartre * The bootinfo array can not be copied with bcopy() because 235687a7269eSachartre * elements are of type long in vtoc (so 64-bit) and of type 235787a7269eSachartre * int in dk_vtoc (so 32-bit). 235887a7269eSachartre */ 235987a7269eSachartre label.dkl_vtoc.v_bootinfo[0] = vtoc->v_bootinfo[0]; 236087a7269eSachartre label.dkl_vtoc.v_bootinfo[1] = vtoc->v_bootinfo[1]; 236187a7269eSachartre label.dkl_vtoc.v_bootinfo[2] = vtoc->v_bootinfo[2]; 2362690555a1Sachartre bcopy(vtoc->v_asciilabel, label.dkl_asciilabel, 2363690555a1Sachartre LEN_DKL_ASCII); 2364690555a1Sachartre bcopy(vtoc->v_volume, label.dkl_vtoc.v_volume, 2365690555a1Sachartre LEN_DKL_VVOL); 23663c96341aSnarayan 23673c96341aSnarayan /* re-compute checksum */ 2368690555a1Sachartre label.dkl_magic = DKL_MAGIC; 2369690555a1Sachartre label.dkl_cksum = vd_lbl2cksum(&label); 2370690555a1Sachartre 237187a7269eSachartre /* write label to the disk image */ 237287a7269eSachartre if ((rc = vd_file_set_vtoc(vd, &label)) != 0) 237387a7269eSachartre return (rc); 2374690555a1Sachartre 2375edcc0754Sachartre break; 2376edcc0754Sachartre 2377edcc0754Sachartre case DKIOCFLUSHWRITECACHE: 2378edcc0754Sachartre return (VOP_FSYNC(vd->file_vnode, FSYNC, kcred, NULL)); 2379edcc0754Sachartre 2380edcc0754Sachartre case DKIOCGETEFI: 2381edcc0754Sachartre ASSERT(ioctl_arg != NULL); 2382edcc0754Sachartre efi = (dk_efi_t *)ioctl_arg; 2383edcc0754Sachartre 2384edcc0754Sachartre if (vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, 2385edcc0754Sachartre (caddr_t)efi->dki_data, efi->dki_lba, efi->dki_length) < 0) 2386edcc0754Sachartre return (EIO); 2387edcc0754Sachartre 2388edcc0754Sachartre return (0); 2389edcc0754Sachartre 2390edcc0754Sachartre case DKIOCSETEFI: 2391edcc0754Sachartre ASSERT(ioctl_arg != NULL); 2392edcc0754Sachartre efi = (dk_efi_t *)ioctl_arg; 2393edcc0754Sachartre 2394edcc0754Sachartre if (vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, 2395edcc0754Sachartre (caddr_t)efi->dki_data, efi->dki_lba, efi->dki_length) < 0) 2396edcc0754Sachartre return (EIO); 2397edcc0754Sachartre 2398edcc0754Sachartre break; 2399edcc0754Sachartre 2400edcc0754Sachartre 2401edcc0754Sachartre default: 2402edcc0754Sachartre return (ENOTSUP); 2403edcc0754Sachartre } 2404edcc0754Sachartre 2405edcc0754Sachartre ASSERT(cmd == DKIOCSVTOC || cmd == DKIOCSETEFI); 2406edcc0754Sachartre 2407edcc0754Sachartre /* label has changed, revalidate the geometry */ 2408edcc0754Sachartre (void) vd_file_validate_geometry(vd); 24093c96341aSnarayan 241087a7269eSachartre /* 241187a7269eSachartre * The disk geometry may have changed, so we need to write 241287a7269eSachartre * the devid (if there is one) so that it is stored at the 241387a7269eSachartre * right location. 241487a7269eSachartre */ 2415edcc0754Sachartre if (vd_file_write_devid(vd, vd->file_devid) != 0) { 241687a7269eSachartre PR0("Fail to write devid"); 24171ae08745Sheppo } 24184bac2208Snarayan 24194bac2208Snarayan return (0); 24204bac2208Snarayan } 2421edcc0754Sachartre 2422edcc0754Sachartre static int 2423edcc0754Sachartre vd_backend_ioctl(vd_t *vd, int cmd, caddr_t arg) 2424edcc0754Sachartre { 2425edcc0754Sachartre int rval = 0, status; 2426edcc0754Sachartre 2427edcc0754Sachartre /* 2428edcc0754Sachartre * Call the appropriate function to execute the ioctl depending 2429edcc0754Sachartre * on the type of vdisk. 2430edcc0754Sachartre */ 2431edcc0754Sachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 2432edcc0754Sachartre 2433edcc0754Sachartre /* slice, file or volume exported as a single slice disk */ 2434edcc0754Sachartre status = vd_do_slice_ioctl(vd, cmd, arg); 2435edcc0754Sachartre 2436edcc0754Sachartre } else if (vd->file) { 2437edcc0754Sachartre 2438edcc0754Sachartre /* file or volume exported as a full disk */ 2439edcc0754Sachartre status = vd_do_file_ioctl(vd, cmd, arg); 2440edcc0754Sachartre 2441edcc0754Sachartre } else { 2442edcc0754Sachartre 2443edcc0754Sachartre /* disk device exported as a full disk */ 2444edcc0754Sachartre status = ldi_ioctl(vd->ldi_handle[0], cmd, (intptr_t)arg, 2445edcc0754Sachartre vd->open_flags | FKIOCTL, kcred, &rval); 2446edcc0754Sachartre } 2447edcc0754Sachartre 2448edcc0754Sachartre #ifdef DEBUG 2449edcc0754Sachartre if (rval != 0) { 2450edcc0754Sachartre PR0("ioctl %x set rval = %d, which is not being returned" 2451edcc0754Sachartre " to caller", cmd, rval); 2452edcc0754Sachartre } 2453edcc0754Sachartre #endif /* DEBUG */ 2454edcc0754Sachartre 2455edcc0754Sachartre return (status); 24561ae08745Sheppo } 24571ae08745Sheppo 2458205eeb1aSlm66018 /* 2459205eeb1aSlm66018 * Description: 2460205eeb1aSlm66018 * This is the function that processes the ioctl requests (farming it 2461205eeb1aSlm66018 * out to functions that handle slices, files or whole disks) 2462205eeb1aSlm66018 * 2463205eeb1aSlm66018 * Return Values 2464205eeb1aSlm66018 * 0 - ioctl operation completed successfully 2465205eeb1aSlm66018 * != 0 - The LDC error value encountered 2466205eeb1aSlm66018 * (propagated back up the call stack as a task error) 2467205eeb1aSlm66018 * 2468205eeb1aSlm66018 * Side Effect 2469205eeb1aSlm66018 * sets request->status to the return value of the ioctl function. 2470205eeb1aSlm66018 */ 24711ae08745Sheppo static int 24720a55fbb7Slm66018 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) 24731ae08745Sheppo { 2474edcc0754Sachartre int status = 0; 24751ae08745Sheppo size_t nbytes = request->nbytes; /* modifiable copy */ 24761ae08745Sheppo 24771ae08745Sheppo 24781ae08745Sheppo ASSERT(request->slice < vd->nslices); 24791ae08745Sheppo PR0("Performing %s", ioctl->operation_name); 24801ae08745Sheppo 24810a55fbb7Slm66018 /* Get data from client and convert, if necessary */ 24820a55fbb7Slm66018 if (ioctl->copyin != NULL) { 24831ae08745Sheppo ASSERT(nbytes != 0 && buf != NULL); 24841ae08745Sheppo PR1("Getting \"arg\" data from client"); 24851ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 24861ae08745Sheppo request->cookie, request->ncookies, 24871ae08745Sheppo LDC_COPY_IN)) != 0) { 24883af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d " 24891ae08745Sheppo "copying from client", status); 24901ae08745Sheppo return (status); 24911ae08745Sheppo } 24920a55fbb7Slm66018 24930a55fbb7Slm66018 /* Convert client's data, if necessary */ 24942f5224aeSachartre if (ioctl->copyin == VD_IDENTITY_IN) { 24952f5224aeSachartre /* use client buffer */ 24960a55fbb7Slm66018 ioctl->arg = buf; 24972f5224aeSachartre } else { 24982f5224aeSachartre /* convert client vdisk operation data to ioctl data */ 24992f5224aeSachartre status = (ioctl->copyin)(buf, nbytes, 25002f5224aeSachartre (void *)ioctl->arg); 25012f5224aeSachartre if (status != 0) { 25022f5224aeSachartre request->status = status; 25032f5224aeSachartre return (0); 25042f5224aeSachartre } 25052f5224aeSachartre } 25062f5224aeSachartre } 25072f5224aeSachartre 25082f5224aeSachartre if (ioctl->operation == VD_OP_SCSICMD) { 25092f5224aeSachartre struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl->arg; 25102f5224aeSachartre 25112f5224aeSachartre /* check write permission */ 25122f5224aeSachartre if (!(vd->open_flags & FWRITE) && 25132f5224aeSachartre !(uscsi->uscsi_flags & USCSI_READ)) { 25142f5224aeSachartre PR0("uscsi fails because backend is opened read-only"); 25152f5224aeSachartre request->status = EROFS; 25162f5224aeSachartre return (0); 25172f5224aeSachartre } 25181ae08745Sheppo } 25191ae08745Sheppo 25201ae08745Sheppo /* 2521edcc0754Sachartre * Send the ioctl to the disk backend. 25221ae08745Sheppo */ 2523edcc0754Sachartre request->status = vd_backend_ioctl(vd, ioctl->cmd, ioctl->arg); 2524205eeb1aSlm66018 2525205eeb1aSlm66018 if (request->status != 0) { 2526205eeb1aSlm66018 PR0("ioctl(%s) = errno %d", ioctl->cmd_name, request->status); 25272f5224aeSachartre if (ioctl->operation == VD_OP_SCSICMD && 25282f5224aeSachartre ((struct uscsi_cmd *)ioctl->arg)->uscsi_status != 0) 25292f5224aeSachartre /* 25302f5224aeSachartre * USCSICMD has reported an error and the uscsi_status 25312f5224aeSachartre * field is not zero. This means that the SCSI command 25322f5224aeSachartre * has completed but it has an error. So we should 25332f5224aeSachartre * mark the VD operation has succesfully completed 25342f5224aeSachartre * and clients can check the SCSI status field for 25352f5224aeSachartre * SCSI errors. 25362f5224aeSachartre */ 25372f5224aeSachartre request->status = 0; 25382f5224aeSachartre else 2539205eeb1aSlm66018 return (0); 2540205eeb1aSlm66018 } 25411ae08745Sheppo 25420a55fbb7Slm66018 /* Convert data and send to client, if necessary */ 25430a55fbb7Slm66018 if (ioctl->copyout != NULL) { 25441ae08745Sheppo ASSERT(nbytes != 0 && buf != NULL); 25451ae08745Sheppo PR1("Sending \"arg\" data to client"); 25460a55fbb7Slm66018 25470a55fbb7Slm66018 /* Convert ioctl data to vdisk operation data, if necessary */ 25482f5224aeSachartre if (ioctl->copyout != VD_IDENTITY_OUT) 25490a55fbb7Slm66018 (ioctl->copyout)((void *)ioctl->arg, buf); 25500a55fbb7Slm66018 25511ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 25521ae08745Sheppo request->cookie, request->ncookies, 25531ae08745Sheppo LDC_COPY_OUT)) != 0) { 25543af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d " 25551ae08745Sheppo "copying to client", status); 25561ae08745Sheppo return (status); 25571ae08745Sheppo } 25581ae08745Sheppo } 25591ae08745Sheppo 25601ae08745Sheppo return (status); 25611ae08745Sheppo } 25621ae08745Sheppo 25631ae08745Sheppo #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 2564205eeb1aSlm66018 2565205eeb1aSlm66018 /* 2566205eeb1aSlm66018 * Description: 2567205eeb1aSlm66018 * This generic function is called by the task queue to complete 2568205eeb1aSlm66018 * the processing of the tasks. The specific completion function 2569205eeb1aSlm66018 * is passed in as a field in the task pointer. 2570205eeb1aSlm66018 * 2571205eeb1aSlm66018 * Parameters: 2572205eeb1aSlm66018 * arg - opaque pointer to structure containing task to be completed 2573205eeb1aSlm66018 * 2574205eeb1aSlm66018 * Return Values 2575205eeb1aSlm66018 * None 2576205eeb1aSlm66018 */ 2577205eeb1aSlm66018 static void 2578205eeb1aSlm66018 vd_complete(void *arg) 2579205eeb1aSlm66018 { 2580205eeb1aSlm66018 vd_task_t *task = (vd_task_t *)arg; 2581205eeb1aSlm66018 2582205eeb1aSlm66018 ASSERT(task != NULL); 2583205eeb1aSlm66018 ASSERT(task->status == EINPROGRESS); 2584205eeb1aSlm66018 ASSERT(task->completef != NULL); 2585205eeb1aSlm66018 2586205eeb1aSlm66018 task->status = task->completef(task); 2587205eeb1aSlm66018 if (task->status) 2588205eeb1aSlm66018 PR0("%s: Error %d completing task", __func__, task->status); 2589205eeb1aSlm66018 2590205eeb1aSlm66018 /* Now notify the vDisk client */ 2591205eeb1aSlm66018 vd_complete_notify(task); 2592205eeb1aSlm66018 } 2593205eeb1aSlm66018 25941ae08745Sheppo static int 2595d10e4ef2Snarayan vd_ioctl(vd_task_t *task) 25961ae08745Sheppo { 259787a7269eSachartre int i, status; 25981ae08745Sheppo void *buf = NULL; 25990a55fbb7Slm66018 struct dk_geom dk_geom = {0}; 26000a55fbb7Slm66018 struct vtoc vtoc = {0}; 26014bac2208Snarayan struct dk_efi dk_efi = {0}; 26022f5224aeSachartre struct uscsi_cmd uscsi = {0}; 2603d10e4ef2Snarayan vd_t *vd = task->vd; 2604d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 26050a55fbb7Slm66018 vd_ioctl_t ioctl[] = { 26060a55fbb7Slm66018 /* Command (no-copy) operations */ 26070a55fbb7Slm66018 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, 26080a55fbb7Slm66018 DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), 2609047ba61eSachartre NULL, NULL, NULL, B_TRUE}, 26100a55fbb7Slm66018 26110a55fbb7Slm66018 /* "Get" (copy-out) operations */ 26120a55fbb7Slm66018 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), 26130a55fbb7Slm66018 DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), 26142f5224aeSachartre NULL, VD_IDENTITY_IN, VD_IDENTITY_OUT, B_FALSE}, 26150a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), 26160a55fbb7Slm66018 RNDSIZE(vd_geom_t), 26170a55fbb7Slm66018 DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), 2618047ba61eSachartre &dk_geom, NULL, dk_geom2vd_geom, B_FALSE}, 26190a55fbb7Slm66018 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), 26200a55fbb7Slm66018 DKIOCGVTOC, STRINGIZE(DKIOCGVTOC), 2621047ba61eSachartre &vtoc, NULL, vtoc2vd_vtoc, B_FALSE}, 26224bac2208Snarayan {VD_OP_GET_EFI, STRINGIZE(VD_OP_GET_EFI), RNDSIZE(vd_efi_t), 26234bac2208Snarayan DKIOCGETEFI, STRINGIZE(DKIOCGETEFI), 2624047ba61eSachartre &dk_efi, vd_get_efi_in, vd_get_efi_out, B_FALSE}, 26250a55fbb7Slm66018 26260a55fbb7Slm66018 /* "Set" (copy-in) operations */ 26270a55fbb7Slm66018 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), 26280a55fbb7Slm66018 DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), 26292f5224aeSachartre NULL, VD_IDENTITY_IN, VD_IDENTITY_OUT, B_TRUE}, 26300a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), 26310a55fbb7Slm66018 RNDSIZE(vd_geom_t), 26320a55fbb7Slm66018 DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), 2633047ba61eSachartre &dk_geom, vd_geom2dk_geom, NULL, B_TRUE}, 26340a55fbb7Slm66018 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), 26350a55fbb7Slm66018 DKIOCSVTOC, STRINGIZE(DKIOCSVTOC), 2636047ba61eSachartre &vtoc, vd_vtoc2vtoc, NULL, B_TRUE}, 26374bac2208Snarayan {VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t), 26384bac2208Snarayan DKIOCSETEFI, STRINGIZE(DKIOCSETEFI), 2639047ba61eSachartre &dk_efi, vd_set_efi_in, vd_set_efi_out, B_TRUE}, 26402f5224aeSachartre 26412f5224aeSachartre {VD_OP_SCSICMD, STRINGIZE(VD_OP_SCSICMD), RNDSIZE(vd_scsi_t), 26422f5224aeSachartre USCSICMD, STRINGIZE(USCSICMD), 26432f5224aeSachartre &uscsi, vd_scsicmd_in, vd_scsicmd_out, B_FALSE}, 26440a55fbb7Slm66018 }; 26451ae08745Sheppo size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 26461ae08745Sheppo 26471ae08745Sheppo 2648d10e4ef2Snarayan ASSERT(vd != NULL); 2649d10e4ef2Snarayan ASSERT(request != NULL); 26501ae08745Sheppo ASSERT(request->slice < vd->nslices); 26511ae08745Sheppo 26521ae08745Sheppo /* 26531ae08745Sheppo * Determine ioctl corresponding to caller's "operation" and 26541ae08745Sheppo * validate caller's "nbytes" 26551ae08745Sheppo */ 26561ae08745Sheppo for (i = 0; i < nioctls; i++) { 26571ae08745Sheppo if (request->operation == ioctl[i].operation) { 26580a55fbb7Slm66018 /* LDC memory operations require 8-byte multiples */ 26590a55fbb7Slm66018 ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); 26600a55fbb7Slm66018 26614bac2208Snarayan if (request->operation == VD_OP_GET_EFI || 26622f5224aeSachartre request->operation == VD_OP_SET_EFI || 26632f5224aeSachartre request->operation == VD_OP_SCSICMD) { 26644bac2208Snarayan if (request->nbytes >= ioctl[i].nbytes) 26654bac2208Snarayan break; 26663af08d82Slm66018 PR0("%s: Expected at least nbytes = %lu, " 26674bac2208Snarayan "got %lu", ioctl[i].operation_name, 26684bac2208Snarayan ioctl[i].nbytes, request->nbytes); 26694bac2208Snarayan return (EINVAL); 26704bac2208Snarayan } 26714bac2208Snarayan 26720a55fbb7Slm66018 if (request->nbytes != ioctl[i].nbytes) { 26733af08d82Slm66018 PR0("%s: Expected nbytes = %lu, got %lu", 26740a55fbb7Slm66018 ioctl[i].operation_name, ioctl[i].nbytes, 26750a55fbb7Slm66018 request->nbytes); 26761ae08745Sheppo return (EINVAL); 26771ae08745Sheppo } 26781ae08745Sheppo 26791ae08745Sheppo break; 26801ae08745Sheppo } 26811ae08745Sheppo } 26821ae08745Sheppo ASSERT(i < nioctls); /* because "operation" already validated */ 26831ae08745Sheppo 2684047ba61eSachartre if (!(vd->open_flags & FWRITE) && ioctl[i].write) { 2685047ba61eSachartre PR0("%s fails because backend is opened read-only", 2686047ba61eSachartre ioctl[i].operation_name); 2687047ba61eSachartre request->status = EROFS; 2688047ba61eSachartre return (0); 2689047ba61eSachartre } 2690047ba61eSachartre 26911ae08745Sheppo if (request->nbytes) 26921ae08745Sheppo buf = kmem_zalloc(request->nbytes, KM_SLEEP); 26931ae08745Sheppo status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 26941ae08745Sheppo if (request->nbytes) 26951ae08745Sheppo kmem_free(buf, request->nbytes); 269687a7269eSachartre 26971ae08745Sheppo return (status); 26981ae08745Sheppo } 26991ae08745Sheppo 27004bac2208Snarayan static int 27014bac2208Snarayan vd_get_devid(vd_task_t *task) 27024bac2208Snarayan { 27034bac2208Snarayan vd_t *vd = task->vd; 27044bac2208Snarayan vd_dring_payload_t *request = task->request; 27054bac2208Snarayan vd_devid_t *vd_devid; 27064bac2208Snarayan impl_devid_t *devid; 270787a7269eSachartre int status, bufid_len, devid_len, len, sz; 27083af08d82Slm66018 int bufbytes; 27094bac2208Snarayan 27103af08d82Slm66018 PR1("Get Device ID, nbytes=%ld", request->nbytes); 27114bac2208Snarayan 27123c96341aSnarayan if (vd->file) { 271387a7269eSachartre if (vd->file_devid == NULL) { 27143af08d82Slm66018 PR2("No Device ID"); 2715205eeb1aSlm66018 request->status = ENOENT; 2716205eeb1aSlm66018 return (0); 271787a7269eSachartre } else { 271887a7269eSachartre sz = ddi_devid_sizeof(vd->file_devid); 271987a7269eSachartre devid = kmem_alloc(sz, KM_SLEEP); 272087a7269eSachartre bcopy(vd->file_devid, devid, sz); 272187a7269eSachartre } 272287a7269eSachartre } else { 272387a7269eSachartre if (ddi_lyr_get_devid(vd->dev[request->slice], 272487a7269eSachartre (ddi_devid_t *)&devid) != DDI_SUCCESS) { 272587a7269eSachartre PR2("No Device ID"); 2726205eeb1aSlm66018 request->status = ENOENT; 2727205eeb1aSlm66018 return (0); 272887a7269eSachartre } 27294bac2208Snarayan } 27304bac2208Snarayan 27314bac2208Snarayan bufid_len = request->nbytes - sizeof (vd_devid_t) + 1; 27324bac2208Snarayan devid_len = DEVID_GETLEN(devid); 27334bac2208Snarayan 27343af08d82Slm66018 /* 27353af08d82Slm66018 * Save the buffer size here for use in deallocation. 27363af08d82Slm66018 * The actual number of bytes copied is returned in 27373af08d82Slm66018 * the 'nbytes' field of the request structure. 27383af08d82Slm66018 */ 27393af08d82Slm66018 bufbytes = request->nbytes; 27403af08d82Slm66018 27413af08d82Slm66018 vd_devid = kmem_zalloc(bufbytes, KM_SLEEP); 27424bac2208Snarayan vd_devid->length = devid_len; 27434bac2208Snarayan vd_devid->type = DEVID_GETTYPE(devid); 27444bac2208Snarayan 27454bac2208Snarayan len = (devid_len > bufid_len)? bufid_len : devid_len; 27464bac2208Snarayan 27474bac2208Snarayan bcopy(devid->did_id, vd_devid->id, len); 27484bac2208Snarayan 274978fcd0a1Sachartre request->status = 0; 275078fcd0a1Sachartre 27514bac2208Snarayan /* LDC memory operations require 8-byte multiples */ 27524bac2208Snarayan ASSERT(request->nbytes % sizeof (uint64_t) == 0); 27534bac2208Snarayan 27544bac2208Snarayan if ((status = ldc_mem_copy(vd->ldc_handle, (caddr_t)vd_devid, 0, 27554bac2208Snarayan &request->nbytes, request->cookie, request->ncookies, 27564bac2208Snarayan LDC_COPY_OUT)) != 0) { 27573af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d copying to client", 27584bac2208Snarayan status); 27594bac2208Snarayan } 27603af08d82Slm66018 PR1("post mem_copy: nbytes=%ld", request->nbytes); 27614bac2208Snarayan 27623af08d82Slm66018 kmem_free(vd_devid, bufbytes); 27634bac2208Snarayan ddi_devid_free((ddi_devid_t)devid); 27644bac2208Snarayan 27654bac2208Snarayan return (status); 27664bac2208Snarayan } 27674bac2208Snarayan 27682f5224aeSachartre static int 27692f5224aeSachartre vd_scsi_reset(vd_t *vd) 27702f5224aeSachartre { 27712f5224aeSachartre int rval, status; 27722f5224aeSachartre struct uscsi_cmd uscsi = { 0 }; 27732f5224aeSachartre 27742f5224aeSachartre uscsi.uscsi_flags = vd_scsi_debug | USCSI_RESET; 27752f5224aeSachartre uscsi.uscsi_timeout = vd_scsi_rdwr_timeout; 27762f5224aeSachartre 27772f5224aeSachartre status = ldi_ioctl(vd->ldi_handle[0], USCSICMD, (intptr_t)&uscsi, 27782f5224aeSachartre (vd->open_flags | FKIOCTL), kcred, &rval); 27792f5224aeSachartre 27802f5224aeSachartre return (status); 27812f5224aeSachartre } 27822f5224aeSachartre 27832f5224aeSachartre static int 27842f5224aeSachartre vd_reset(vd_task_t *task) 27852f5224aeSachartre { 27862f5224aeSachartre vd_t *vd = task->vd; 27872f5224aeSachartre vd_dring_payload_t *request = task->request; 27882f5224aeSachartre 27892f5224aeSachartre ASSERT(request->operation == VD_OP_RESET); 27902f5224aeSachartre ASSERT(vd->scsi); 27912f5224aeSachartre 27922f5224aeSachartre PR0("Performing VD_OP_RESET"); 27932f5224aeSachartre 27942f5224aeSachartre if (request->nbytes != 0) { 27952f5224aeSachartre PR0("VD_OP_RESET: Expected nbytes = 0, got %lu", 27962f5224aeSachartre request->nbytes); 27972f5224aeSachartre return (EINVAL); 27982f5224aeSachartre } 27992f5224aeSachartre 28002f5224aeSachartre request->status = vd_scsi_reset(vd); 28012f5224aeSachartre 28022f5224aeSachartre return (0); 28032f5224aeSachartre } 28042f5224aeSachartre 28052f5224aeSachartre static int 28062f5224aeSachartre vd_get_capacity(vd_task_t *task) 28072f5224aeSachartre { 28082f5224aeSachartre int rv; 28092f5224aeSachartre size_t nbytes; 28102f5224aeSachartre vd_t *vd = task->vd; 28112f5224aeSachartre vd_dring_payload_t *request = task->request; 28122f5224aeSachartre vd_capacity_t vd_cap = { 0 }; 28132f5224aeSachartre 28142f5224aeSachartre ASSERT(request->operation == VD_OP_GET_CAPACITY); 28152f5224aeSachartre ASSERT(vd->scsi); 28162f5224aeSachartre 28172f5224aeSachartre PR0("Performing VD_OP_GET_CAPACITY"); 28182f5224aeSachartre 28192f5224aeSachartre nbytes = request->nbytes; 28202f5224aeSachartre 28212f5224aeSachartre if (nbytes != RNDSIZE(vd_capacity_t)) { 28222f5224aeSachartre PR0("VD_OP_GET_CAPACITY: Expected nbytes = %lu, got %lu", 28232f5224aeSachartre RNDSIZE(vd_capacity_t), nbytes); 28242f5224aeSachartre return (EINVAL); 28252f5224aeSachartre } 28262f5224aeSachartre 28272f5224aeSachartre if (vd->vdisk_size == VD_SIZE_UNKNOWN) { 28282f5224aeSachartre if (vd_setup_mediainfo(vd) != 0) 28292f5224aeSachartre ASSERT(vd->vdisk_size == VD_SIZE_UNKNOWN); 28302f5224aeSachartre } 28312f5224aeSachartre 28322f5224aeSachartre ASSERT(vd->vdisk_size != 0); 28332f5224aeSachartre 28342f5224aeSachartre request->status = 0; 28352f5224aeSachartre 28362f5224aeSachartre vd_cap.vdisk_block_size = vd->vdisk_block_size; 28372f5224aeSachartre vd_cap.vdisk_size = vd->vdisk_size; 28382f5224aeSachartre 28392f5224aeSachartre if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&vd_cap, 0, &nbytes, 28402f5224aeSachartre request->cookie, request->ncookies, LDC_COPY_OUT)) != 0) { 28412f5224aeSachartre PR0("ldc_mem_copy() returned errno %d copying to client", rv); 28422f5224aeSachartre return (rv); 28432f5224aeSachartre } 28442f5224aeSachartre 28452f5224aeSachartre return (0); 28462f5224aeSachartre } 28472f5224aeSachartre 28482f5224aeSachartre static int 28492f5224aeSachartre vd_get_access(vd_task_t *task) 28502f5224aeSachartre { 28512f5224aeSachartre uint64_t access; 28522f5224aeSachartre int rv, rval = 0; 28532f5224aeSachartre size_t nbytes; 28542f5224aeSachartre vd_t *vd = task->vd; 28552f5224aeSachartre vd_dring_payload_t *request = task->request; 28562f5224aeSachartre 28572f5224aeSachartre ASSERT(request->operation == VD_OP_GET_ACCESS); 28582f5224aeSachartre ASSERT(vd->scsi); 28592f5224aeSachartre 28602f5224aeSachartre PR0("Performing VD_OP_GET_ACCESS"); 28612f5224aeSachartre 28622f5224aeSachartre nbytes = request->nbytes; 28632f5224aeSachartre 28642f5224aeSachartre if (nbytes != sizeof (uint64_t)) { 28652f5224aeSachartre PR0("VD_OP_GET_ACCESS: Expected nbytes = %lu, got %lu", 28662f5224aeSachartre sizeof (uint64_t), nbytes); 28672f5224aeSachartre return (EINVAL); 28682f5224aeSachartre } 28692f5224aeSachartre 28702f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], MHIOCSTATUS, 28712f5224aeSachartre NULL, (vd->open_flags | FKIOCTL), kcred, &rval); 28722f5224aeSachartre 28732f5224aeSachartre if (request->status != 0) 28742f5224aeSachartre return (0); 28752f5224aeSachartre 28762f5224aeSachartre access = (rval == 0)? VD_ACCESS_ALLOWED : VD_ACCESS_DENIED; 28772f5224aeSachartre 28782f5224aeSachartre if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&access, 0, &nbytes, 28792f5224aeSachartre request->cookie, request->ncookies, LDC_COPY_OUT)) != 0) { 28802f5224aeSachartre PR0("ldc_mem_copy() returned errno %d copying to client", rv); 28812f5224aeSachartre return (rv); 28822f5224aeSachartre } 28832f5224aeSachartre 28842f5224aeSachartre return (0); 28852f5224aeSachartre } 28862f5224aeSachartre 28872f5224aeSachartre static int 28882f5224aeSachartre vd_set_access(vd_task_t *task) 28892f5224aeSachartre { 28902f5224aeSachartre uint64_t flags; 28912f5224aeSachartre int rv, rval; 28922f5224aeSachartre size_t nbytes; 28932f5224aeSachartre vd_t *vd = task->vd; 28942f5224aeSachartre vd_dring_payload_t *request = task->request; 28952f5224aeSachartre 28962f5224aeSachartre ASSERT(request->operation == VD_OP_SET_ACCESS); 28972f5224aeSachartre ASSERT(vd->scsi); 28982f5224aeSachartre 28992f5224aeSachartre nbytes = request->nbytes; 29002f5224aeSachartre 29012f5224aeSachartre if (nbytes != sizeof (uint64_t)) { 29022f5224aeSachartre PR0("VD_OP_SET_ACCESS: Expected nbytes = %lu, got %lu", 29032f5224aeSachartre sizeof (uint64_t), nbytes); 29042f5224aeSachartre return (EINVAL); 29052f5224aeSachartre } 29062f5224aeSachartre 29072f5224aeSachartre if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&flags, 0, &nbytes, 29082f5224aeSachartre request->cookie, request->ncookies, LDC_COPY_IN)) != 0) { 29092f5224aeSachartre PR0("ldc_mem_copy() returned errno %d copying from client", rv); 29102f5224aeSachartre return (rv); 29112f5224aeSachartre } 29122f5224aeSachartre 29132f5224aeSachartre if (flags == VD_ACCESS_SET_CLEAR) { 29142f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (CLEAR)"); 29152f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 29162f5224aeSachartre MHIOCRELEASE, NULL, (vd->open_flags | FKIOCTL), kcred, 29172f5224aeSachartre &rval); 29182f5224aeSachartre if (request->status == 0) 29192f5224aeSachartre vd->ownership = B_FALSE; 29202f5224aeSachartre return (0); 29212f5224aeSachartre } 29222f5224aeSachartre 29232f5224aeSachartre /* 29242f5224aeSachartre * As per the VIO spec, the PREEMPT and PRESERVE flags are only valid 29252f5224aeSachartre * when the EXCLUSIVE flag is set. 29262f5224aeSachartre */ 29272f5224aeSachartre if (!(flags & VD_ACCESS_SET_EXCLUSIVE)) { 29282f5224aeSachartre PR0("Invalid VD_OP_SET_ACCESS flags: 0x%lx", flags); 29292f5224aeSachartre request->status = EINVAL; 29302f5224aeSachartre return (0); 29312f5224aeSachartre } 29322f5224aeSachartre 29332f5224aeSachartre switch (flags & (VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE)) { 29342f5224aeSachartre 29352f5224aeSachartre case VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE: 29362f5224aeSachartre /* 29372f5224aeSachartre * Flags EXCLUSIVE and PREEMPT and PRESERVE. We have to 29382f5224aeSachartre * acquire exclusive access rights, preserve them and we 29392f5224aeSachartre * can use preemption. So we can use the MHIOCTKNOWN ioctl. 29402f5224aeSachartre */ 29412f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PREEMPT|PRESERVE)"); 29422f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 29432f5224aeSachartre MHIOCTKOWN, NULL, (vd->open_flags | FKIOCTL), kcred, &rval); 29442f5224aeSachartre break; 29452f5224aeSachartre 29462f5224aeSachartre case VD_ACCESS_SET_PRESERVE: 29472f5224aeSachartre /* 29482f5224aeSachartre * Flags EXCLUSIVE and PRESERVE. We have to acquire exclusive 29492f5224aeSachartre * access rights and preserve them, but not preempt any other 29502f5224aeSachartre * host. So we need to use the MHIOCTKOWN ioctl to enable the 29512f5224aeSachartre * "preserve" feature but we can not called it directly 29522f5224aeSachartre * because it uses preemption. So before that, we use the 29532f5224aeSachartre * MHIOCQRESERVE ioctl to ensure we can get exclusive rights 29542f5224aeSachartre * without preempting anyone. 29552f5224aeSachartre */ 29562f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PRESERVE)"); 29572f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 29582f5224aeSachartre MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, 29592f5224aeSachartre &rval); 29602f5224aeSachartre if (request->status != 0) 29612f5224aeSachartre break; 29622f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 29632f5224aeSachartre MHIOCTKOWN, NULL, (vd->open_flags | FKIOCTL), kcred, &rval); 29642f5224aeSachartre break; 29652f5224aeSachartre 29662f5224aeSachartre case VD_ACCESS_SET_PREEMPT: 29672f5224aeSachartre /* 29682f5224aeSachartre * Flags EXCLUSIVE and PREEMPT. We have to acquire exclusive 29692f5224aeSachartre * access rights and we can use preemption. So we try to do 29702f5224aeSachartre * a SCSI reservation, if it fails we reset the disk to clear 29712f5224aeSachartre * any reservation and we try to reserve again. 29722f5224aeSachartre */ 29732f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PREEMPT)"); 29742f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 29752f5224aeSachartre MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, 29762f5224aeSachartre &rval); 29772f5224aeSachartre if (request->status == 0) 29782f5224aeSachartre break; 29792f5224aeSachartre 29802f5224aeSachartre /* reset the disk */ 29812f5224aeSachartre (void) vd_scsi_reset(vd); 29822f5224aeSachartre 29832f5224aeSachartre /* try again even if the reset has failed */ 29842f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 29852f5224aeSachartre MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, 29862f5224aeSachartre &rval); 29872f5224aeSachartre break; 29882f5224aeSachartre 29892f5224aeSachartre case 0: 29902f5224aeSachartre /* Flag EXCLUSIVE only. Just issue a SCSI reservation */ 29912f5224aeSachartre PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE)"); 29922f5224aeSachartre request->status = ldi_ioctl(vd->ldi_handle[request->slice], 29932f5224aeSachartre MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, 29942f5224aeSachartre &rval); 29952f5224aeSachartre break; 29962f5224aeSachartre } 29972f5224aeSachartre 29982f5224aeSachartre if (request->status == 0) 29992f5224aeSachartre vd->ownership = B_TRUE; 30002f5224aeSachartre else 30012f5224aeSachartre PR0("VD_OP_SET_ACCESS: error %d", request->status); 30022f5224aeSachartre 30032f5224aeSachartre return (0); 30042f5224aeSachartre } 30052f5224aeSachartre 30062f5224aeSachartre static void 30072f5224aeSachartre vd_reset_access(vd_t *vd) 30082f5224aeSachartre { 30092f5224aeSachartre int status, rval; 30102f5224aeSachartre 30112f5224aeSachartre if (vd->file || !vd->ownership) 30122f5224aeSachartre return; 30132f5224aeSachartre 30142f5224aeSachartre PR0("Releasing disk ownership"); 30152f5224aeSachartre status = ldi_ioctl(vd->ldi_handle[0], MHIOCRELEASE, NULL, 30162f5224aeSachartre (vd->open_flags | FKIOCTL), kcred, &rval); 30172f5224aeSachartre 30182f5224aeSachartre /* 30192f5224aeSachartre * An EACCES failure means that there is a reservation conflict, 30202f5224aeSachartre * so we are not the owner of the disk anymore. 30212f5224aeSachartre */ 30222f5224aeSachartre if (status == 0 || status == EACCES) { 30232f5224aeSachartre vd->ownership = B_FALSE; 30242f5224aeSachartre return; 30252f5224aeSachartre } 30262f5224aeSachartre 30272f5224aeSachartre PR0("Fail to release ownership, error %d", status); 30282f5224aeSachartre 30292f5224aeSachartre /* 30302f5224aeSachartre * We have failed to release the ownership, try to reset the disk 30312f5224aeSachartre * to release reservations. 30322f5224aeSachartre */ 30332f5224aeSachartre PR0("Resetting disk"); 30342f5224aeSachartre status = vd_scsi_reset(vd); 30352f5224aeSachartre 30362f5224aeSachartre if (status != 0) 30372f5224aeSachartre PR0("Fail to reset disk, error %d", status); 30382f5224aeSachartre 30392f5224aeSachartre /* whatever the result of the reset is, we try the release again */ 30402f5224aeSachartre status = ldi_ioctl(vd->ldi_handle[0], MHIOCRELEASE, NULL, 30412f5224aeSachartre (vd->open_flags | FKIOCTL), kcred, &rval); 30422f5224aeSachartre 30432f5224aeSachartre if (status == 0 || status == EACCES) { 30442f5224aeSachartre vd->ownership = B_FALSE; 30452f5224aeSachartre return; 30462f5224aeSachartre } 30472f5224aeSachartre 30482f5224aeSachartre PR0("Fail to release ownership, error %d", status); 30492f5224aeSachartre 30502f5224aeSachartre /* 30512f5224aeSachartre * At this point we have done our best to try to reset the 30522f5224aeSachartre * access rights to the disk and we don't know if we still 30532f5224aeSachartre * own a reservation and if any mechanism to preserve the 30542f5224aeSachartre * ownership is still in place. The ultimate solution would 30552f5224aeSachartre * be to reset the system but this is usually not what we 30562f5224aeSachartre * want to happen. 30572f5224aeSachartre */ 30582f5224aeSachartre 30592f5224aeSachartre if (vd_reset_access_failure == A_REBOOT) { 30602f5224aeSachartre cmn_err(CE_WARN, VD_RESET_ACCESS_FAILURE_MSG 30612f5224aeSachartre ", rebooting the system", vd->device_path); 30622f5224aeSachartre (void) uadmin(A_SHUTDOWN, AD_BOOT, NULL); 30632f5224aeSachartre } else if (vd_reset_access_failure == A_DUMP) { 30642f5224aeSachartre panic(VD_RESET_ACCESS_FAILURE_MSG, vd->device_path); 30652f5224aeSachartre } 30662f5224aeSachartre 30672f5224aeSachartre cmn_err(CE_WARN, VD_RESET_ACCESS_FAILURE_MSG, vd->device_path); 30682f5224aeSachartre } 30692f5224aeSachartre 30701ae08745Sheppo /* 30711ae08745Sheppo * Define the supported operations once the functions for performing them have 30721ae08745Sheppo * been defined 30731ae08745Sheppo */ 30741ae08745Sheppo static const vds_operation_t vds_operation[] = { 30753af08d82Slm66018 #define X(_s) #_s, _s 30763af08d82Slm66018 {X(VD_OP_BREAD), vd_start_bio, vd_complete_bio}, 30773af08d82Slm66018 {X(VD_OP_BWRITE), vd_start_bio, vd_complete_bio}, 30783af08d82Slm66018 {X(VD_OP_FLUSH), vd_ioctl, NULL}, 30793af08d82Slm66018 {X(VD_OP_GET_WCE), vd_ioctl, NULL}, 30803af08d82Slm66018 {X(VD_OP_SET_WCE), vd_ioctl, NULL}, 30813af08d82Slm66018 {X(VD_OP_GET_VTOC), vd_ioctl, NULL}, 30823af08d82Slm66018 {X(VD_OP_SET_VTOC), vd_ioctl, NULL}, 30833af08d82Slm66018 {X(VD_OP_GET_DISKGEOM), vd_ioctl, NULL}, 30843af08d82Slm66018 {X(VD_OP_SET_DISKGEOM), vd_ioctl, NULL}, 30853af08d82Slm66018 {X(VD_OP_GET_EFI), vd_ioctl, NULL}, 30863af08d82Slm66018 {X(VD_OP_SET_EFI), vd_ioctl, NULL}, 30873af08d82Slm66018 {X(VD_OP_GET_DEVID), vd_get_devid, NULL}, 30882f5224aeSachartre {X(VD_OP_SCSICMD), vd_ioctl, NULL}, 30892f5224aeSachartre {X(VD_OP_RESET), vd_reset, NULL}, 30902f5224aeSachartre {X(VD_OP_GET_CAPACITY), vd_get_capacity, NULL}, 30912f5224aeSachartre {X(VD_OP_SET_ACCESS), vd_set_access, NULL}, 30922f5224aeSachartre {X(VD_OP_GET_ACCESS), vd_get_access, NULL}, 30933af08d82Slm66018 #undef X 30941ae08745Sheppo }; 30951ae08745Sheppo 30961ae08745Sheppo static const size_t vds_noperations = 30971ae08745Sheppo (sizeof (vds_operation))/(sizeof (vds_operation[0])); 30981ae08745Sheppo 30991ae08745Sheppo /* 3100d10e4ef2Snarayan * Process a task specifying a client I/O request 3101205eeb1aSlm66018 * 3102205eeb1aSlm66018 * Parameters: 3103205eeb1aSlm66018 * task - structure containing the request sent from client 3104205eeb1aSlm66018 * 3105205eeb1aSlm66018 * Return Value 3106205eeb1aSlm66018 * 0 - success 3107205eeb1aSlm66018 * ENOTSUP - Unknown/Unsupported VD_OP_XXX operation 3108205eeb1aSlm66018 * EINVAL - Invalid disk slice 3109205eeb1aSlm66018 * != 0 - some other non-zero return value from start function 31101ae08745Sheppo */ 31111ae08745Sheppo static int 3112205eeb1aSlm66018 vd_do_process_task(vd_task_t *task) 31131ae08745Sheppo { 3114205eeb1aSlm66018 int i; 3115d10e4ef2Snarayan vd_t *vd = task->vd; 3116d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 31171ae08745Sheppo 3118d10e4ef2Snarayan ASSERT(vd != NULL); 3119d10e4ef2Snarayan ASSERT(request != NULL); 31201ae08745Sheppo 3121d10e4ef2Snarayan /* Find the requested operation */ 3122205eeb1aSlm66018 for (i = 0; i < vds_noperations; i++) { 3123205eeb1aSlm66018 if (request->operation == vds_operation[i].operation) { 3124205eeb1aSlm66018 /* all operations should have a start func */ 3125205eeb1aSlm66018 ASSERT(vds_operation[i].start != NULL); 3126205eeb1aSlm66018 3127205eeb1aSlm66018 task->completef = vds_operation[i].complete; 3128d10e4ef2Snarayan break; 3129205eeb1aSlm66018 } 3130205eeb1aSlm66018 } 313117cadca8Slm66018 313217cadca8Slm66018 /* 313317cadca8Slm66018 * We need to check that the requested operation is permitted 313417cadca8Slm66018 * for the particular client that sent it or that the loop above 313517cadca8Slm66018 * did not complete without finding the operation type (indicating 313617cadca8Slm66018 * that the requested operation is unknown/unimplemented) 313717cadca8Slm66018 */ 313817cadca8Slm66018 if ((VD_OP_SUPPORTED(vd->operations, request->operation) == B_FALSE) || 313917cadca8Slm66018 (i == vds_noperations)) { 31403af08d82Slm66018 PR0("Unsupported operation %u", request->operation); 314117cadca8Slm66018 request->status = ENOTSUP; 314217cadca8Slm66018 return (0); 31431ae08745Sheppo } 31441ae08745Sheppo 31457636cb21Slm66018 /* Range-check slice */ 314687a7269eSachartre if (request->slice >= vd->nslices && 314787a7269eSachartre (vd->vdisk_type != VD_DISK_TYPE_DISK || 314887a7269eSachartre request->slice != VD_SLICE_NONE)) { 31493af08d82Slm66018 PR0("Invalid \"slice\" %u (max %u) for virtual disk", 31507636cb21Slm66018 request->slice, (vd->nslices - 1)); 31517636cb21Slm66018 return (EINVAL); 31527636cb21Slm66018 } 31537636cb21Slm66018 3154205eeb1aSlm66018 /* 3155205eeb1aSlm66018 * Call the function pointer that starts the operation. 3156205eeb1aSlm66018 */ 3157205eeb1aSlm66018 return (vds_operation[i].start(task)); 31581ae08745Sheppo } 31591ae08745Sheppo 3160205eeb1aSlm66018 /* 3161205eeb1aSlm66018 * Description: 3162205eeb1aSlm66018 * This function is called by both the in-band and descriptor ring 3163205eeb1aSlm66018 * message processing functions paths to actually execute the task 3164205eeb1aSlm66018 * requested by the vDisk client. It in turn calls its worker 3165205eeb1aSlm66018 * function, vd_do_process_task(), to carry our the request. 3166205eeb1aSlm66018 * 3167205eeb1aSlm66018 * Any transport errors (e.g. LDC errors, vDisk protocol errors) are 3168205eeb1aSlm66018 * saved in the 'status' field of the task and are propagated back 3169205eeb1aSlm66018 * up the call stack to trigger a NACK 3170205eeb1aSlm66018 * 3171205eeb1aSlm66018 * Any request errors (e.g. ENOTTY from an ioctl) are saved in 3172205eeb1aSlm66018 * the 'status' field of the request and result in an ACK being sent 3173205eeb1aSlm66018 * by the completion handler. 3174205eeb1aSlm66018 * 3175205eeb1aSlm66018 * Parameters: 3176205eeb1aSlm66018 * task - structure containing the request sent from client 3177205eeb1aSlm66018 * 3178205eeb1aSlm66018 * Return Value 3179205eeb1aSlm66018 * 0 - successful synchronous request. 3180205eeb1aSlm66018 * != 0 - transport error (e.g. LDC errors, vDisk protocol) 3181205eeb1aSlm66018 * EINPROGRESS - task will be finished in a completion handler 3182205eeb1aSlm66018 */ 3183205eeb1aSlm66018 static int 3184205eeb1aSlm66018 vd_process_task(vd_task_t *task) 3185205eeb1aSlm66018 { 3186205eeb1aSlm66018 vd_t *vd = task->vd; 3187205eeb1aSlm66018 int status; 31881ae08745Sheppo 3189205eeb1aSlm66018 DTRACE_PROBE1(task__start, vd_task_t *, task); 31903af08d82Slm66018 3191205eeb1aSlm66018 task->status = vd_do_process_task(task); 3192205eeb1aSlm66018 3193205eeb1aSlm66018 /* 3194205eeb1aSlm66018 * If the task processing function returned EINPROGRESS indicating 3195205eeb1aSlm66018 * that the task needs completing then schedule a taskq entry to 3196205eeb1aSlm66018 * finish it now. 3197205eeb1aSlm66018 * 3198205eeb1aSlm66018 * Otherwise the task processing function returned either zero 3199205eeb1aSlm66018 * indicating that the task was finished in the start function (and we 3200205eeb1aSlm66018 * don't need to wait in a completion function) or the start function 3201205eeb1aSlm66018 * returned an error - in both cases all that needs to happen is the 3202205eeb1aSlm66018 * notification to the vDisk client higher up the call stack. 3203205eeb1aSlm66018 * If the task was using a Descriptor Ring, we need to mark it as done 3204205eeb1aSlm66018 * at this stage. 3205205eeb1aSlm66018 */ 3206205eeb1aSlm66018 if (task->status == EINPROGRESS) { 3207d10e4ef2Snarayan /* Queue a task to complete the operation */ 3208205eeb1aSlm66018 (void) ddi_taskq_dispatch(vd->completionq, vd_complete, 3209d10e4ef2Snarayan task, DDI_SLEEP); 3210d10e4ef2Snarayan 3211*f0ca1d9aSsb155480 } else if (!vd->reset_state && (vd->xfer_mode == VIO_DRING_MODE_V1_0)) { 3212205eeb1aSlm66018 /* Update the dring element if it's a dring client */ 3213205eeb1aSlm66018 status = vd_mark_elem_done(vd, task->index, 3214205eeb1aSlm66018 task->request->status, task->request->nbytes); 3215205eeb1aSlm66018 if (status == ECONNRESET) 3216205eeb1aSlm66018 vd_mark_in_reset(vd); 3217205eeb1aSlm66018 } 3218205eeb1aSlm66018 3219205eeb1aSlm66018 return (task->status); 32201ae08745Sheppo } 32211ae08745Sheppo 32221ae08745Sheppo /* 32230a55fbb7Slm66018 * Return true if the "type", "subtype", and "env" fields of the "tag" first 32240a55fbb7Slm66018 * argument match the corresponding remaining arguments; otherwise, return false 32251ae08745Sheppo */ 32260a55fbb7Slm66018 boolean_t 32271ae08745Sheppo vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 32281ae08745Sheppo { 32291ae08745Sheppo return ((tag->vio_msgtype == type) && 32301ae08745Sheppo (tag->vio_subtype == subtype) && 32310a55fbb7Slm66018 (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; 32321ae08745Sheppo } 32331ae08745Sheppo 32340a55fbb7Slm66018 /* 32350a55fbb7Slm66018 * Check whether the major/minor version specified in "ver_msg" is supported 32360a55fbb7Slm66018 * by this server. 32370a55fbb7Slm66018 */ 32380a55fbb7Slm66018 static boolean_t 32390a55fbb7Slm66018 vds_supported_version(vio_ver_msg_t *ver_msg) 32400a55fbb7Slm66018 { 32410a55fbb7Slm66018 for (int i = 0; i < vds_num_versions; i++) { 32420a55fbb7Slm66018 ASSERT(vds_version[i].major > 0); 32430a55fbb7Slm66018 ASSERT((i == 0) || 32440a55fbb7Slm66018 (vds_version[i].major < vds_version[i-1].major)); 32450a55fbb7Slm66018 32460a55fbb7Slm66018 /* 32470a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 32480a55fbb7Slm66018 * necessary, down to the highest value supported by this 32490a55fbb7Slm66018 * server and return true so this message will get "ack"ed; 32500a55fbb7Slm66018 * the client should also support all minor versions lower 32510a55fbb7Slm66018 * than the value it sent 32520a55fbb7Slm66018 */ 32530a55fbb7Slm66018 if (ver_msg->ver_major == vds_version[i].major) { 32540a55fbb7Slm66018 if (ver_msg->ver_minor > vds_version[i].minor) { 32550a55fbb7Slm66018 PR0("Adjusting minor version from %u to %u", 32560a55fbb7Slm66018 ver_msg->ver_minor, vds_version[i].minor); 32570a55fbb7Slm66018 ver_msg->ver_minor = vds_version[i].minor; 32580a55fbb7Slm66018 } 32590a55fbb7Slm66018 return (B_TRUE); 32600a55fbb7Slm66018 } 32610a55fbb7Slm66018 32620a55fbb7Slm66018 /* 32630a55fbb7Slm66018 * If the message contains a higher major version number, set 32640a55fbb7Slm66018 * the message's major/minor versions to the current values 32650a55fbb7Slm66018 * and return false, so this message will get "nack"ed with 32660a55fbb7Slm66018 * these values, and the client will potentially try again 32670a55fbb7Slm66018 * with the same or a lower version 32680a55fbb7Slm66018 */ 32690a55fbb7Slm66018 if (ver_msg->ver_major > vds_version[i].major) { 32700a55fbb7Slm66018 ver_msg->ver_major = vds_version[i].major; 32710a55fbb7Slm66018 ver_msg->ver_minor = vds_version[i].minor; 32720a55fbb7Slm66018 return (B_FALSE); 32730a55fbb7Slm66018 } 32740a55fbb7Slm66018 32750a55fbb7Slm66018 /* 32760a55fbb7Slm66018 * Otherwise, the message's major version is less than the 32770a55fbb7Slm66018 * current major version, so continue the loop to the next 32780a55fbb7Slm66018 * (lower) supported version 32790a55fbb7Slm66018 */ 32800a55fbb7Slm66018 } 32810a55fbb7Slm66018 32820a55fbb7Slm66018 /* 32830a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 32840a55fbb7Slm66018 * message to terminate negotiation 32850a55fbb7Slm66018 */ 32860a55fbb7Slm66018 ver_msg->ver_major = 0; 32870a55fbb7Slm66018 ver_msg->ver_minor = 0; 32880a55fbb7Slm66018 return (B_FALSE); 32890a55fbb7Slm66018 } 32900a55fbb7Slm66018 32910a55fbb7Slm66018 /* 32920a55fbb7Slm66018 * Process a version message from a client. vds expects to receive version 32930a55fbb7Slm66018 * messages from clients seeking service, but never issues version messages 32940a55fbb7Slm66018 * itself; therefore, vds can ACK or NACK client version messages, but does 32950a55fbb7Slm66018 * not expect to receive version-message ACKs or NACKs (and will treat such 32960a55fbb7Slm66018 * messages as invalid). 32970a55fbb7Slm66018 */ 32981ae08745Sheppo static int 32990a55fbb7Slm66018 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 33001ae08745Sheppo { 33011ae08745Sheppo vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 33021ae08745Sheppo 33031ae08745Sheppo 33041ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 33051ae08745Sheppo 33061ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 33071ae08745Sheppo VIO_VER_INFO)) { 33081ae08745Sheppo return (ENOMSG); /* not a version message */ 33091ae08745Sheppo } 33101ae08745Sheppo 33111ae08745Sheppo if (msglen != sizeof (*ver_msg)) { 33123af08d82Slm66018 PR0("Expected %lu-byte version message; " 33131ae08745Sheppo "received %lu bytes", sizeof (*ver_msg), msglen); 33141ae08745Sheppo return (EBADMSG); 33151ae08745Sheppo } 33161ae08745Sheppo 33171ae08745Sheppo if (ver_msg->dev_class != VDEV_DISK) { 33183af08d82Slm66018 PR0("Expected device class %u (disk); received %u", 33191ae08745Sheppo VDEV_DISK, ver_msg->dev_class); 33201ae08745Sheppo return (EBADMSG); 33211ae08745Sheppo } 33221ae08745Sheppo 33230a55fbb7Slm66018 /* 33240a55fbb7Slm66018 * We're talking to the expected kind of client; set our device class 33250a55fbb7Slm66018 * for "ack/nack" back to the client 33260a55fbb7Slm66018 */ 33271ae08745Sheppo ver_msg->dev_class = VDEV_DISK_SERVER; 33280a55fbb7Slm66018 33290a55fbb7Slm66018 /* 33300a55fbb7Slm66018 * Check whether the (valid) version message specifies a version 33310a55fbb7Slm66018 * supported by this server. If the version is not supported, return 33320a55fbb7Slm66018 * EBADMSG so the message will get "nack"ed; vds_supported_version() 33330a55fbb7Slm66018 * will have updated the message with a supported version for the 33340a55fbb7Slm66018 * client to consider 33350a55fbb7Slm66018 */ 33360a55fbb7Slm66018 if (!vds_supported_version(ver_msg)) 33370a55fbb7Slm66018 return (EBADMSG); 33380a55fbb7Slm66018 33390a55fbb7Slm66018 33400a55fbb7Slm66018 /* 33410a55fbb7Slm66018 * A version has been agreed upon; use the client's SID for 33420a55fbb7Slm66018 * communication on this channel now 33430a55fbb7Slm66018 */ 33440a55fbb7Slm66018 ASSERT(!(vd->initialized & VD_SID)); 33450a55fbb7Slm66018 vd->sid = ver_msg->tag.vio_sid; 33460a55fbb7Slm66018 vd->initialized |= VD_SID; 33470a55fbb7Slm66018 33480a55fbb7Slm66018 /* 334917cadca8Slm66018 * Store the negotiated major and minor version values in the "vd" data 335017cadca8Slm66018 * structure so that we can check if certain operations are supported 335117cadca8Slm66018 * by the client. 33520a55fbb7Slm66018 */ 335317cadca8Slm66018 vd->version.major = ver_msg->ver_major; 335417cadca8Slm66018 vd->version.minor = ver_msg->ver_minor; 33550a55fbb7Slm66018 33560a55fbb7Slm66018 PR0("Using major version %u, minor version %u", 33570a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 33581ae08745Sheppo return (0); 33591ae08745Sheppo } 33601ae08745Sheppo 336117cadca8Slm66018 static void 336217cadca8Slm66018 vd_set_exported_operations(vd_t *vd) 336317cadca8Slm66018 { 336417cadca8Slm66018 vd->operations = 0; /* clear field */ 336517cadca8Slm66018 336617cadca8Slm66018 /* 336717cadca8Slm66018 * We need to check from the highest version supported to the 336817cadca8Slm66018 * lowest because versions with a higher minor number implicitly 336917cadca8Slm66018 * support versions with a lower minor number. 337017cadca8Slm66018 */ 337117cadca8Slm66018 if (vio_ver_is_supported(vd->version, 1, 1)) { 337217cadca8Slm66018 ASSERT(vd->open_flags & FREAD); 337317cadca8Slm66018 vd->operations |= VD_OP_MASK_READ; 337417cadca8Slm66018 337517cadca8Slm66018 if (vd->open_flags & FWRITE) 337617cadca8Slm66018 vd->operations |= VD_OP_MASK_WRITE; 337717cadca8Slm66018 33782f5224aeSachartre if (vd->scsi) 33792f5224aeSachartre vd->operations |= VD_OP_MASK_SCSI; 33802f5224aeSachartre 338117cadca8Slm66018 if (vd->file && vd_file_is_iso_image(vd)) { 338217cadca8Slm66018 /* 338317cadca8Slm66018 * can't write to ISO images, make sure that write 338417cadca8Slm66018 * support is not set in case administrator did not 338517cadca8Slm66018 * use "options=ro" when doing an ldm add-vdsdev 338617cadca8Slm66018 */ 338717cadca8Slm66018 vd->operations &= ~VD_OP_MASK_WRITE; 338817cadca8Slm66018 } 338917cadca8Slm66018 } else if (vio_ver_is_supported(vd->version, 1, 0)) { 339017cadca8Slm66018 vd->operations = VD_OP_MASK_READ | VD_OP_MASK_WRITE; 339117cadca8Slm66018 } 339217cadca8Slm66018 339317cadca8Slm66018 /* we should have already agreed on a version */ 339417cadca8Slm66018 ASSERT(vd->operations != 0); 339517cadca8Slm66018 } 339617cadca8Slm66018 33971ae08745Sheppo static int 33981ae08745Sheppo vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 33991ae08745Sheppo { 34001ae08745Sheppo vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 34013c96341aSnarayan int status, retry = 0; 34021ae08745Sheppo 34031ae08745Sheppo 34041ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 34051ae08745Sheppo 34061ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 34071ae08745Sheppo VIO_ATTR_INFO)) { 3408d10e4ef2Snarayan PR0("Message is not an attribute message"); 3409d10e4ef2Snarayan return (ENOMSG); 34101ae08745Sheppo } 34111ae08745Sheppo 34121ae08745Sheppo if (msglen != sizeof (*attr_msg)) { 34133af08d82Slm66018 PR0("Expected %lu-byte attribute message; " 34141ae08745Sheppo "received %lu bytes", sizeof (*attr_msg), msglen); 34151ae08745Sheppo return (EBADMSG); 34161ae08745Sheppo } 34171ae08745Sheppo 34181ae08745Sheppo if (attr_msg->max_xfer_sz == 0) { 34193af08d82Slm66018 PR0("Received maximum transfer size of 0 from client"); 34201ae08745Sheppo return (EBADMSG); 34211ae08745Sheppo } 34221ae08745Sheppo 34231ae08745Sheppo if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 3424*f0ca1d9aSsb155480 (attr_msg->xfer_mode != VIO_DRING_MODE_V1_0)) { 34253af08d82Slm66018 PR0("Client requested unsupported transfer mode"); 34261ae08745Sheppo return (EBADMSG); 34271ae08745Sheppo } 34281ae08745Sheppo 34293c96341aSnarayan /* 34303c96341aSnarayan * check if the underlying disk is ready, if not try accessing 34313c96341aSnarayan * the device again. Open the vdisk device and extract info 34323c96341aSnarayan * about it, as this is needed to respond to the attr info msg 34333c96341aSnarayan */ 34343c96341aSnarayan if ((vd->initialized & VD_DISK_READY) == 0) { 34353c96341aSnarayan PR0("Retry setting up disk (%s)", vd->device_path); 34363c96341aSnarayan do { 34373c96341aSnarayan status = vd_setup_vd(vd); 34383c96341aSnarayan if (status != EAGAIN || ++retry > vds_dev_retries) 34393c96341aSnarayan break; 34403c96341aSnarayan 34413c96341aSnarayan /* incremental delay */ 34423c96341aSnarayan delay(drv_usectohz(vds_dev_delay)); 34433c96341aSnarayan 34443c96341aSnarayan /* if vdisk is no longer enabled - return error */ 34453c96341aSnarayan if (!vd_enabled(vd)) 34463c96341aSnarayan return (ENXIO); 34473c96341aSnarayan 34483c96341aSnarayan } while (status == EAGAIN); 34493c96341aSnarayan 34503c96341aSnarayan if (status) 34513c96341aSnarayan return (ENXIO); 34523c96341aSnarayan 34533c96341aSnarayan vd->initialized |= VD_DISK_READY; 34543c96341aSnarayan ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 34553c96341aSnarayan PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u", 34563c96341aSnarayan ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 34573c96341aSnarayan (vd->pseudo ? "yes" : "no"), 34583c96341aSnarayan (vd->file ? "yes" : "no"), 34593c96341aSnarayan vd->nslices); 34603c96341aSnarayan } 34613c96341aSnarayan 34621ae08745Sheppo /* Success: valid message and transfer mode */ 34631ae08745Sheppo vd->xfer_mode = attr_msg->xfer_mode; 34643af08d82Slm66018 34651ae08745Sheppo if (vd->xfer_mode == VIO_DESC_MODE) { 34663af08d82Slm66018 34671ae08745Sheppo /* 34681ae08745Sheppo * The vd_dring_inband_msg_t contains one cookie; need room 34691ae08745Sheppo * for up to n-1 more cookies, where "n" is the number of full 34701ae08745Sheppo * pages plus possibly one partial page required to cover 34711ae08745Sheppo * "max_xfer_sz". Add room for one more cookie if 34721ae08745Sheppo * "max_xfer_sz" isn't an integral multiple of the page size. 34731ae08745Sheppo * Must first get the maximum transfer size in bytes. 34741ae08745Sheppo */ 34751ae08745Sheppo size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 34761ae08745Sheppo attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : 34771ae08745Sheppo attr_msg->max_xfer_sz; 34781ae08745Sheppo size_t max_inband_msglen = 34791ae08745Sheppo sizeof (vd_dring_inband_msg_t) + 34801ae08745Sheppo ((max_xfer_bytes/PAGESIZE + 34811ae08745Sheppo ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 34821ae08745Sheppo (sizeof (ldc_mem_cookie_t))); 34831ae08745Sheppo 34841ae08745Sheppo /* 34851ae08745Sheppo * Set the maximum expected message length to 34861ae08745Sheppo * accommodate in-band-descriptor messages with all 34871ae08745Sheppo * their cookies 34881ae08745Sheppo */ 34891ae08745Sheppo vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 3490d10e4ef2Snarayan 3491d10e4ef2Snarayan /* 3492d10e4ef2Snarayan * Initialize the data structure for processing in-band I/O 3493d10e4ef2Snarayan * request descriptors 3494d10e4ef2Snarayan */ 3495d10e4ef2Snarayan vd->inband_task.vd = vd; 34963af08d82Slm66018 vd->inband_task.msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 3497d10e4ef2Snarayan vd->inband_task.index = 0; 3498d10e4ef2Snarayan vd->inband_task.type = VD_FINAL_RANGE_TASK; /* range == 1 */ 34991ae08745Sheppo } 35001ae08745Sheppo 3501e1ebb9ecSlm66018 /* Return the device's block size and max transfer size to the client */ 35022f5224aeSachartre attr_msg->vdisk_block_size = vd->vdisk_block_size; 3503e1ebb9ecSlm66018 attr_msg->max_xfer_sz = vd->max_xfer_sz; 3504e1ebb9ecSlm66018 35051ae08745Sheppo attr_msg->vdisk_size = vd->vdisk_size; 35061ae08745Sheppo attr_msg->vdisk_type = vd->vdisk_type; 350717cadca8Slm66018 attr_msg->vdisk_media = vd->vdisk_media; 350817cadca8Slm66018 350917cadca8Slm66018 /* Discover and save the list of supported VD_OP_XXX operations */ 351017cadca8Slm66018 vd_set_exported_operations(vd); 351117cadca8Slm66018 attr_msg->operations = vd->operations; 351217cadca8Slm66018 35131ae08745Sheppo PR0("%s", VD_CLIENT(vd)); 35143af08d82Slm66018 35153af08d82Slm66018 ASSERT(vd->dring_task == NULL); 35163af08d82Slm66018 35171ae08745Sheppo return (0); 35181ae08745Sheppo } 35191ae08745Sheppo 35201ae08745Sheppo static int 35211ae08745Sheppo vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 35221ae08745Sheppo { 35231ae08745Sheppo int status; 35241ae08745Sheppo size_t expected; 35251ae08745Sheppo ldc_mem_info_t dring_minfo; 35261ae08745Sheppo vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 35271ae08745Sheppo 35281ae08745Sheppo 35291ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 35301ae08745Sheppo 35311ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 35321ae08745Sheppo VIO_DRING_REG)) { 3533d10e4ef2Snarayan PR0("Message is not a register-dring message"); 3534d10e4ef2Snarayan return (ENOMSG); 35351ae08745Sheppo } 35361ae08745Sheppo 35371ae08745Sheppo if (msglen < sizeof (*reg_msg)) { 35383af08d82Slm66018 PR0("Expected at least %lu-byte register-dring message; " 35391ae08745Sheppo "received %lu bytes", sizeof (*reg_msg), msglen); 35401ae08745Sheppo return (EBADMSG); 35411ae08745Sheppo } 35421ae08745Sheppo 35431ae08745Sheppo expected = sizeof (*reg_msg) + 35441ae08745Sheppo (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 35451ae08745Sheppo if (msglen != expected) { 35463af08d82Slm66018 PR0("Expected %lu-byte register-dring message; " 35471ae08745Sheppo "received %lu bytes", expected, msglen); 35481ae08745Sheppo return (EBADMSG); 35491ae08745Sheppo } 35501ae08745Sheppo 35511ae08745Sheppo if (vd->initialized & VD_DRING) { 35523af08d82Slm66018 PR0("A dring was previously registered; only support one"); 35531ae08745Sheppo return (EBADMSG); 35541ae08745Sheppo } 35551ae08745Sheppo 3556d10e4ef2Snarayan if (reg_msg->num_descriptors > INT32_MAX) { 35573af08d82Slm66018 PR0("reg_msg->num_descriptors = %u; must be <= %u (%s)", 3558d10e4ef2Snarayan reg_msg->ncookies, INT32_MAX, STRINGIZE(INT32_MAX)); 3559d10e4ef2Snarayan return (EBADMSG); 3560d10e4ef2Snarayan } 3561d10e4ef2Snarayan 35621ae08745Sheppo if (reg_msg->ncookies != 1) { 35631ae08745Sheppo /* 35641ae08745Sheppo * In addition to fixing the assertion in the success case 35651ae08745Sheppo * below, supporting drings which require more than one 35661ae08745Sheppo * "cookie" requires increasing the value of vd->max_msglen 35671ae08745Sheppo * somewhere in the code path prior to receiving the message 35681ae08745Sheppo * which results in calling this function. Note that without 35691ae08745Sheppo * making this change, the larger message size required to 35701ae08745Sheppo * accommodate multiple cookies cannot be successfully 35711ae08745Sheppo * received, so this function will not even get called. 35721ae08745Sheppo * Gracefully accommodating more dring cookies might 35731ae08745Sheppo * reasonably demand exchanging an additional attribute or 35741ae08745Sheppo * making a minor protocol adjustment 35751ae08745Sheppo */ 35763af08d82Slm66018 PR0("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 35771ae08745Sheppo return (EBADMSG); 35781ae08745Sheppo } 35791ae08745Sheppo 35801ae08745Sheppo status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 35811ae08745Sheppo reg_msg->ncookies, reg_msg->num_descriptors, 35824bac2208Snarayan reg_msg->descriptor_size, LDC_DIRECT_MAP, &vd->dring_handle); 35831ae08745Sheppo if (status != 0) { 35843af08d82Slm66018 PR0("ldc_mem_dring_map() returned errno %d", status); 35851ae08745Sheppo return (status); 35861ae08745Sheppo } 35871ae08745Sheppo 35881ae08745Sheppo /* 35891ae08745Sheppo * To remove the need for this assertion, must call 35901ae08745Sheppo * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 35911ae08745Sheppo * successful call to ldc_mem_dring_map() 35921ae08745Sheppo */ 35931ae08745Sheppo ASSERT(reg_msg->ncookies == 1); 35941ae08745Sheppo 35951ae08745Sheppo if ((status = 35961ae08745Sheppo ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 35973af08d82Slm66018 PR0("ldc_mem_dring_info() returned errno %d", status); 35981ae08745Sheppo if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 35993af08d82Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 36001ae08745Sheppo return (status); 36011ae08745Sheppo } 36021ae08745Sheppo 36031ae08745Sheppo if (dring_minfo.vaddr == NULL) { 36043af08d82Slm66018 PR0("Descriptor ring virtual address is NULL"); 36050a55fbb7Slm66018 return (ENXIO); 36061ae08745Sheppo } 36071ae08745Sheppo 36081ae08745Sheppo 3609d10e4ef2Snarayan /* Initialize for valid message and mapped dring */ 36101ae08745Sheppo PR1("descriptor size = %u, dring length = %u", 36111ae08745Sheppo vd->descriptor_size, vd->dring_len); 36121ae08745Sheppo vd->initialized |= VD_DRING; 36131ae08745Sheppo vd->dring_ident = 1; /* "There Can Be Only One" */ 36141ae08745Sheppo vd->dring = dring_minfo.vaddr; 36151ae08745Sheppo vd->descriptor_size = reg_msg->descriptor_size; 36161ae08745Sheppo vd->dring_len = reg_msg->num_descriptors; 36171ae08745Sheppo reg_msg->dring_ident = vd->dring_ident; 3618d10e4ef2Snarayan 3619d10e4ef2Snarayan /* 3620d10e4ef2Snarayan * Allocate and initialize a "shadow" array of data structures for 3621d10e4ef2Snarayan * tasks to process I/O requests in dring elements 3622d10e4ef2Snarayan */ 3623d10e4ef2Snarayan vd->dring_task = 3624d10e4ef2Snarayan kmem_zalloc((sizeof (*vd->dring_task)) * vd->dring_len, KM_SLEEP); 3625d10e4ef2Snarayan for (int i = 0; i < vd->dring_len; i++) { 3626d10e4ef2Snarayan vd->dring_task[i].vd = vd; 3627d10e4ef2Snarayan vd->dring_task[i].index = i; 3628d10e4ef2Snarayan vd->dring_task[i].request = &VD_DRING_ELEM(i)->payload; 36294bac2208Snarayan 36304bac2208Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, 36314bac2208Snarayan &(vd->dring_task[i].mhdl)); 36324bac2208Snarayan if (status) { 36333af08d82Slm66018 PR0("ldc_mem_alloc_handle() returned err %d ", status); 36344bac2208Snarayan return (ENXIO); 36354bac2208Snarayan } 36363af08d82Slm66018 36373af08d82Slm66018 vd->dring_task[i].msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 3638d10e4ef2Snarayan } 3639d10e4ef2Snarayan 36401ae08745Sheppo return (0); 36411ae08745Sheppo } 36421ae08745Sheppo 36431ae08745Sheppo static int 36441ae08745Sheppo vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 36451ae08745Sheppo { 36461ae08745Sheppo vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 36471ae08745Sheppo 36481ae08745Sheppo 36491ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 36501ae08745Sheppo 36511ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 36521ae08745Sheppo VIO_DRING_UNREG)) { 3653d10e4ef2Snarayan PR0("Message is not an unregister-dring message"); 3654d10e4ef2Snarayan return (ENOMSG); 36551ae08745Sheppo } 36561ae08745Sheppo 36571ae08745Sheppo if (msglen != sizeof (*unreg_msg)) { 36583af08d82Slm66018 PR0("Expected %lu-byte unregister-dring message; " 36591ae08745Sheppo "received %lu bytes", sizeof (*unreg_msg), msglen); 36601ae08745Sheppo return (EBADMSG); 36611ae08745Sheppo } 36621ae08745Sheppo 36631ae08745Sheppo if (unreg_msg->dring_ident != vd->dring_ident) { 36643af08d82Slm66018 PR0("Expected dring ident %lu; received %lu", 36651ae08745Sheppo vd->dring_ident, unreg_msg->dring_ident); 36661ae08745Sheppo return (EBADMSG); 36671ae08745Sheppo } 36681ae08745Sheppo 36691ae08745Sheppo return (0); 36701ae08745Sheppo } 36711ae08745Sheppo 36721ae08745Sheppo static int 36731ae08745Sheppo process_rdx_msg(vio_msg_t *msg, size_t msglen) 36741ae08745Sheppo { 36751ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 36761ae08745Sheppo 3677d10e4ef2Snarayan if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) { 3678d10e4ef2Snarayan PR0("Message is not an RDX message"); 3679d10e4ef2Snarayan return (ENOMSG); 3680d10e4ef2Snarayan } 36811ae08745Sheppo 36821ae08745Sheppo if (msglen != sizeof (vio_rdx_msg_t)) { 36833af08d82Slm66018 PR0("Expected %lu-byte RDX message; received %lu bytes", 36841ae08745Sheppo sizeof (vio_rdx_msg_t), msglen); 36851ae08745Sheppo return (EBADMSG); 36861ae08745Sheppo } 36871ae08745Sheppo 3688d10e4ef2Snarayan PR0("Valid RDX message"); 36891ae08745Sheppo return (0); 36901ae08745Sheppo } 36911ae08745Sheppo 36921ae08745Sheppo static int 36931ae08745Sheppo vd_check_seq_num(vd_t *vd, uint64_t seq_num) 36941ae08745Sheppo { 36951ae08745Sheppo if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 36963af08d82Slm66018 PR0("Received seq_num %lu; expected %lu", 36971ae08745Sheppo seq_num, (vd->seq_num + 1)); 36983af08d82Slm66018 PR0("initiating soft reset"); 3699d10e4ef2Snarayan vd_need_reset(vd, B_FALSE); 37001ae08745Sheppo return (1); 37011ae08745Sheppo } 37021ae08745Sheppo 37031ae08745Sheppo vd->seq_num = seq_num; 37041ae08745Sheppo vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 37051ae08745Sheppo return (0); 37061ae08745Sheppo } 37071ae08745Sheppo 37081ae08745Sheppo /* 37091ae08745Sheppo * Return the expected size of an inband-descriptor message with all the 37101ae08745Sheppo * cookies it claims to include 37111ae08745Sheppo */ 37121ae08745Sheppo static size_t 37131ae08745Sheppo expected_inband_size(vd_dring_inband_msg_t *msg) 37141ae08745Sheppo { 37151ae08745Sheppo return ((sizeof (*msg)) + 37161ae08745Sheppo (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 37171ae08745Sheppo } 37181ae08745Sheppo 37191ae08745Sheppo /* 37201ae08745Sheppo * Process an in-band descriptor message: used with clients like OBP, with 37211ae08745Sheppo * which vds exchanges descriptors within VIO message payloads, rather than 37221ae08745Sheppo * operating on them within a descriptor ring 37231ae08745Sheppo */ 37241ae08745Sheppo static int 37253af08d82Slm66018 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 37261ae08745Sheppo { 37271ae08745Sheppo size_t expected; 37281ae08745Sheppo vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 37291ae08745Sheppo 37301ae08745Sheppo 37311ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 37321ae08745Sheppo 37331ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 3734d10e4ef2Snarayan VIO_DESC_DATA)) { 3735d10e4ef2Snarayan PR1("Message is not an in-band-descriptor message"); 3736d10e4ef2Snarayan return (ENOMSG); 3737d10e4ef2Snarayan } 37381ae08745Sheppo 37391ae08745Sheppo if (msglen < sizeof (*desc_msg)) { 37403af08d82Slm66018 PR0("Expected at least %lu-byte descriptor message; " 37411ae08745Sheppo "received %lu bytes", sizeof (*desc_msg), msglen); 37421ae08745Sheppo return (EBADMSG); 37431ae08745Sheppo } 37441ae08745Sheppo 37451ae08745Sheppo if (msglen != (expected = expected_inband_size(desc_msg))) { 37463af08d82Slm66018 PR0("Expected %lu-byte descriptor message; " 37471ae08745Sheppo "received %lu bytes", expected, msglen); 37481ae08745Sheppo return (EBADMSG); 37491ae08745Sheppo } 37501ae08745Sheppo 3751d10e4ef2Snarayan if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) 37521ae08745Sheppo return (EBADMSG); 37531ae08745Sheppo 3754d10e4ef2Snarayan /* 3755d10e4ef2Snarayan * Valid message: Set up the in-band descriptor task and process the 3756d10e4ef2Snarayan * request. Arrange to acknowledge the client's message, unless an 3757d10e4ef2Snarayan * error processing the descriptor task results in setting 3758d10e4ef2Snarayan * VIO_SUBTYPE_NACK 3759d10e4ef2Snarayan */ 3760d10e4ef2Snarayan PR1("Valid in-band-descriptor message"); 3761d10e4ef2Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 37623af08d82Slm66018 37633af08d82Slm66018 ASSERT(vd->inband_task.msg != NULL); 37643af08d82Slm66018 37653af08d82Slm66018 bcopy(msg, vd->inband_task.msg, msglen); 3766d10e4ef2Snarayan vd->inband_task.msglen = msglen; 37673af08d82Slm66018 37683af08d82Slm66018 /* 37693af08d82Slm66018 * The task request is now the payload of the message 37703af08d82Slm66018 * that was just copied into the body of the task. 37713af08d82Slm66018 */ 37723af08d82Slm66018 desc_msg = (vd_dring_inband_msg_t *)vd->inband_task.msg; 3773d10e4ef2Snarayan vd->inband_task.request = &desc_msg->payload; 37743af08d82Slm66018 3775d10e4ef2Snarayan return (vd_process_task(&vd->inband_task)); 37761ae08745Sheppo } 37771ae08745Sheppo 37781ae08745Sheppo static int 3779d10e4ef2Snarayan vd_process_element(vd_t *vd, vd_task_type_t type, uint32_t idx, 37803af08d82Slm66018 vio_msg_t *msg, size_t msglen) 37811ae08745Sheppo { 37821ae08745Sheppo int status; 3783d10e4ef2Snarayan boolean_t ready; 3784d10e4ef2Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 37851ae08745Sheppo 37861ae08745Sheppo 3787d10e4ef2Snarayan /* Accept the updated dring element */ 3788d10e4ef2Snarayan if ((status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 37893af08d82Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", status); 37901ae08745Sheppo return (status); 37911ae08745Sheppo } 3792d10e4ef2Snarayan ready = (elem->hdr.dstate == VIO_DESC_READY); 3793d10e4ef2Snarayan if (ready) { 3794d10e4ef2Snarayan elem->hdr.dstate = VIO_DESC_ACCEPTED; 3795d10e4ef2Snarayan } else { 37963af08d82Slm66018 PR0("descriptor %u not ready", idx); 3797d10e4ef2Snarayan VD_DUMP_DRING_ELEM(elem); 3798d10e4ef2Snarayan } 3799d10e4ef2Snarayan if ((status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 38003af08d82Slm66018 PR0("ldc_mem_dring_release() returned errno %d", status); 38011ae08745Sheppo return (status); 38021ae08745Sheppo } 3803d10e4ef2Snarayan if (!ready) 3804d10e4ef2Snarayan return (EBUSY); 38051ae08745Sheppo 38061ae08745Sheppo 3807d10e4ef2Snarayan /* Initialize a task and process the accepted element */ 3808d10e4ef2Snarayan PR1("Processing dring element %u", idx); 3809d10e4ef2Snarayan vd->dring_task[idx].type = type; 38103af08d82Slm66018 38113af08d82Slm66018 /* duplicate msg buf for cookies etc. */ 38123af08d82Slm66018 bcopy(msg, vd->dring_task[idx].msg, msglen); 38133af08d82Slm66018 3814d10e4ef2Snarayan vd->dring_task[idx].msglen = msglen; 3815205eeb1aSlm66018 return (vd_process_task(&vd->dring_task[idx])); 38161ae08745Sheppo } 38171ae08745Sheppo 38181ae08745Sheppo static int 3819d10e4ef2Snarayan vd_process_element_range(vd_t *vd, int start, int end, 38203af08d82Slm66018 vio_msg_t *msg, size_t msglen) 3821d10e4ef2Snarayan { 3822d10e4ef2Snarayan int i, n, nelem, status = 0; 3823d10e4ef2Snarayan boolean_t inprogress = B_FALSE; 3824d10e4ef2Snarayan vd_task_type_t type; 3825d10e4ef2Snarayan 3826d10e4ef2Snarayan 3827d10e4ef2Snarayan ASSERT(start >= 0); 3828d10e4ef2Snarayan ASSERT(end >= 0); 3829d10e4ef2Snarayan 3830d10e4ef2Snarayan /* 3831d10e4ef2Snarayan * Arrange to acknowledge the client's message, unless an error 3832d10e4ef2Snarayan * processing one of the dring elements results in setting 3833d10e4ef2Snarayan * VIO_SUBTYPE_NACK 3834d10e4ef2Snarayan */ 3835d10e4ef2Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 3836d10e4ef2Snarayan 3837d10e4ef2Snarayan /* 3838d10e4ef2Snarayan * Process the dring elements in the range 3839d10e4ef2Snarayan */ 3840d10e4ef2Snarayan nelem = ((end < start) ? end + vd->dring_len : end) - start + 1; 3841d10e4ef2Snarayan for (i = start, n = nelem; n > 0; i = (i + 1) % vd->dring_len, n--) { 3842d10e4ef2Snarayan ((vio_dring_msg_t *)msg)->end_idx = i; 3843d10e4ef2Snarayan type = (n == 1) ? VD_FINAL_RANGE_TASK : VD_NONFINAL_RANGE_TASK; 38443af08d82Slm66018 status = vd_process_element(vd, type, i, msg, msglen); 3845d10e4ef2Snarayan if (status == EINPROGRESS) 3846d10e4ef2Snarayan inprogress = B_TRUE; 3847d10e4ef2Snarayan else if (status != 0) 3848d10e4ef2Snarayan break; 3849d10e4ef2Snarayan } 3850d10e4ef2Snarayan 3851d10e4ef2Snarayan /* 3852d10e4ef2Snarayan * If some, but not all, operations of a multi-element range are in 3853d10e4ef2Snarayan * progress, wait for other operations to complete before returning 3854d10e4ef2Snarayan * (which will result in "ack" or "nack" of the message). Note that 3855d10e4ef2Snarayan * all outstanding operations will need to complete, not just the ones 3856d10e4ef2Snarayan * corresponding to the current range of dring elements; howevever, as 3857d10e4ef2Snarayan * this situation is an error case, performance is less critical. 3858d10e4ef2Snarayan */ 3859d10e4ef2Snarayan if ((nelem > 1) && (status != EINPROGRESS) && inprogress) 3860d10e4ef2Snarayan ddi_taskq_wait(vd->completionq); 3861d10e4ef2Snarayan 3862d10e4ef2Snarayan return (status); 3863d10e4ef2Snarayan } 3864d10e4ef2Snarayan 3865d10e4ef2Snarayan static int 38663af08d82Slm66018 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 38671ae08745Sheppo { 38681ae08745Sheppo vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 38691ae08745Sheppo 38701ae08745Sheppo 38711ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 38721ae08745Sheppo 38731ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 38741ae08745Sheppo VIO_DRING_DATA)) { 3875d10e4ef2Snarayan PR1("Message is not a dring-data message"); 3876d10e4ef2Snarayan return (ENOMSG); 38771ae08745Sheppo } 38781ae08745Sheppo 38791ae08745Sheppo if (msglen != sizeof (*dring_msg)) { 38803af08d82Slm66018 PR0("Expected %lu-byte dring message; received %lu bytes", 38811ae08745Sheppo sizeof (*dring_msg), msglen); 38821ae08745Sheppo return (EBADMSG); 38831ae08745Sheppo } 38841ae08745Sheppo 3885d10e4ef2Snarayan if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) 38861ae08745Sheppo return (EBADMSG); 38871ae08745Sheppo 38881ae08745Sheppo if (dring_msg->dring_ident != vd->dring_ident) { 38893af08d82Slm66018 PR0("Expected dring ident %lu; received ident %lu", 38901ae08745Sheppo vd->dring_ident, dring_msg->dring_ident); 38911ae08745Sheppo return (EBADMSG); 38921ae08745Sheppo } 38931ae08745Sheppo 3894d10e4ef2Snarayan if (dring_msg->start_idx >= vd->dring_len) { 38953af08d82Slm66018 PR0("\"start_idx\" = %u; must be less than %u", 3896d10e4ef2Snarayan dring_msg->start_idx, vd->dring_len); 3897d10e4ef2Snarayan return (EBADMSG); 3898d10e4ef2Snarayan } 38991ae08745Sheppo 3900d10e4ef2Snarayan if ((dring_msg->end_idx < 0) || 3901d10e4ef2Snarayan (dring_msg->end_idx >= vd->dring_len)) { 39023af08d82Slm66018 PR0("\"end_idx\" = %u; must be >= 0 and less than %u", 3903d10e4ef2Snarayan dring_msg->end_idx, vd->dring_len); 3904d10e4ef2Snarayan return (EBADMSG); 3905d10e4ef2Snarayan } 3906d10e4ef2Snarayan 3907d10e4ef2Snarayan /* Valid message; process range of updated dring elements */ 3908d10e4ef2Snarayan PR1("Processing descriptor range, start = %u, end = %u", 3909d10e4ef2Snarayan dring_msg->start_idx, dring_msg->end_idx); 3910d10e4ef2Snarayan return (vd_process_element_range(vd, dring_msg->start_idx, 39113af08d82Slm66018 dring_msg->end_idx, msg, msglen)); 39121ae08745Sheppo } 39131ae08745Sheppo 39141ae08745Sheppo static int 39151ae08745Sheppo recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 39161ae08745Sheppo { 39171ae08745Sheppo int retry, status; 39181ae08745Sheppo size_t size = *nbytes; 39191ae08745Sheppo 39201ae08745Sheppo 39211ae08745Sheppo for (retry = 0, status = ETIMEDOUT; 39221ae08745Sheppo retry < vds_ldc_retries && status == ETIMEDOUT; 39231ae08745Sheppo retry++) { 39241ae08745Sheppo PR1("ldc_read() attempt %d", (retry + 1)); 39251ae08745Sheppo *nbytes = size; 39261ae08745Sheppo status = ldc_read(ldc_handle, msg, nbytes); 39271ae08745Sheppo } 39281ae08745Sheppo 39293af08d82Slm66018 if (status) { 39303af08d82Slm66018 PR0("ldc_read() returned errno %d", status); 39313af08d82Slm66018 if (status != ECONNRESET) 39323af08d82Slm66018 return (ENOMSG); 39331ae08745Sheppo return (status); 39341ae08745Sheppo } else if (*nbytes == 0) { 39351ae08745Sheppo PR1("ldc_read() returned 0 and no message read"); 39361ae08745Sheppo return (ENOMSG); 39371ae08745Sheppo } 39381ae08745Sheppo 39391ae08745Sheppo PR1("RCVD %lu-byte message", *nbytes); 39401ae08745Sheppo return (0); 39411ae08745Sheppo } 39421ae08745Sheppo 39431ae08745Sheppo static int 39443af08d82Slm66018 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 39451ae08745Sheppo { 39461ae08745Sheppo int status; 39471ae08745Sheppo 39481ae08745Sheppo 39491ae08745Sheppo PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 39501ae08745Sheppo msg->tag.vio_subtype, msg->tag.vio_subtype_env); 39513af08d82Slm66018 #ifdef DEBUG 39523af08d82Slm66018 vd_decode_tag(msg); 39533af08d82Slm66018 #endif 39541ae08745Sheppo 39551ae08745Sheppo /* 39561ae08745Sheppo * Validate session ID up front, since it applies to all messages 39571ae08745Sheppo * once set 39581ae08745Sheppo */ 39591ae08745Sheppo if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 39603af08d82Slm66018 PR0("Expected SID %u, received %u", vd->sid, 39611ae08745Sheppo msg->tag.vio_sid); 39621ae08745Sheppo return (EBADMSG); 39631ae08745Sheppo } 39641ae08745Sheppo 39653af08d82Slm66018 PR1("\tWhile in state %d (%s)", vd->state, vd_decode_state(vd->state)); 39661ae08745Sheppo 39671ae08745Sheppo /* 39681ae08745Sheppo * Process the received message based on connection state 39691ae08745Sheppo */ 39701ae08745Sheppo switch (vd->state) { 39711ae08745Sheppo case VD_STATE_INIT: /* expect version message */ 39720a55fbb7Slm66018 if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) 39731ae08745Sheppo return (status); 39741ae08745Sheppo 39751ae08745Sheppo /* Version negotiated, move to that state */ 39761ae08745Sheppo vd->state = VD_STATE_VER; 39771ae08745Sheppo return (0); 39781ae08745Sheppo 39791ae08745Sheppo case VD_STATE_VER: /* expect attribute message */ 39801ae08745Sheppo if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 39811ae08745Sheppo return (status); 39821ae08745Sheppo 39831ae08745Sheppo /* Attributes exchanged, move to that state */ 39841ae08745Sheppo vd->state = VD_STATE_ATTR; 39851ae08745Sheppo return (0); 39861ae08745Sheppo 39871ae08745Sheppo case VD_STATE_ATTR: 39881ae08745Sheppo switch (vd->xfer_mode) { 39891ae08745Sheppo case VIO_DESC_MODE: /* expect RDX message */ 39901ae08745Sheppo if ((status = process_rdx_msg(msg, msglen)) != 0) 39911ae08745Sheppo return (status); 39921ae08745Sheppo 39931ae08745Sheppo /* Ready to receive in-band descriptors */ 39941ae08745Sheppo vd->state = VD_STATE_DATA; 39951ae08745Sheppo return (0); 39961ae08745Sheppo 3997*f0ca1d9aSsb155480 case VIO_DRING_MODE_V1_0: /* expect register-dring message */ 39981ae08745Sheppo if ((status = 39991ae08745Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 40001ae08745Sheppo return (status); 40011ae08745Sheppo 40021ae08745Sheppo /* One dring negotiated, move to that state */ 40031ae08745Sheppo vd->state = VD_STATE_DRING; 40041ae08745Sheppo return (0); 40051ae08745Sheppo 40061ae08745Sheppo default: 40071ae08745Sheppo ASSERT("Unsupported transfer mode"); 40083af08d82Slm66018 PR0("Unsupported transfer mode"); 40091ae08745Sheppo return (ENOTSUP); 40101ae08745Sheppo } 40111ae08745Sheppo 40121ae08745Sheppo case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 40131ae08745Sheppo if ((status = process_rdx_msg(msg, msglen)) == 0) { 40141ae08745Sheppo /* Ready to receive data */ 40151ae08745Sheppo vd->state = VD_STATE_DATA; 40161ae08745Sheppo return (0); 40171ae08745Sheppo } else if (status != ENOMSG) { 40181ae08745Sheppo return (status); 40191ae08745Sheppo } 40201ae08745Sheppo 40211ae08745Sheppo 40221ae08745Sheppo /* 40231ae08745Sheppo * If another register-dring message is received, stay in 40241ae08745Sheppo * dring state in case the client sends RDX; although the 40251ae08745Sheppo * protocol allows multiple drings, this server does not 40261ae08745Sheppo * support using more than one 40271ae08745Sheppo */ 40281ae08745Sheppo if ((status = 40291ae08745Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 40301ae08745Sheppo return (status); 40311ae08745Sheppo 40321ae08745Sheppo /* 40331ae08745Sheppo * Acknowledge an unregister-dring message, but reset the 40341ae08745Sheppo * connection anyway: Although the protocol allows 40351ae08745Sheppo * unregistering drings, this server cannot serve a vdisk 40361ae08745Sheppo * without its only dring 40371ae08745Sheppo */ 40381ae08745Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 40391ae08745Sheppo return ((status == 0) ? ENOTSUP : status); 40401ae08745Sheppo 40411ae08745Sheppo case VD_STATE_DATA: 40421ae08745Sheppo switch (vd->xfer_mode) { 40431ae08745Sheppo case VIO_DESC_MODE: /* expect in-band-descriptor message */ 40443af08d82Slm66018 return (vd_process_desc_msg(vd, msg, msglen)); 40451ae08745Sheppo 4046*f0ca1d9aSsb155480 case VIO_DRING_MODE_V1_0: /* expect dring-data or unreg-dring */ 40471ae08745Sheppo /* 40481ae08745Sheppo * Typically expect dring-data messages, so handle 40491ae08745Sheppo * them first 40501ae08745Sheppo */ 40511ae08745Sheppo if ((status = vd_process_dring_msg(vd, msg, 40523af08d82Slm66018 msglen)) != ENOMSG) 40531ae08745Sheppo return (status); 40541ae08745Sheppo 40551ae08745Sheppo /* 40561ae08745Sheppo * Acknowledge an unregister-dring message, but reset 40571ae08745Sheppo * the connection anyway: Although the protocol 40581ae08745Sheppo * allows unregistering drings, this server cannot 40591ae08745Sheppo * serve a vdisk without its only dring 40601ae08745Sheppo */ 40611ae08745Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 40621ae08745Sheppo return ((status == 0) ? ENOTSUP : status); 40631ae08745Sheppo 40641ae08745Sheppo default: 40651ae08745Sheppo ASSERT("Unsupported transfer mode"); 40663af08d82Slm66018 PR0("Unsupported transfer mode"); 40671ae08745Sheppo return (ENOTSUP); 40681ae08745Sheppo } 40691ae08745Sheppo 40701ae08745Sheppo default: 40711ae08745Sheppo ASSERT("Invalid client connection state"); 40723af08d82Slm66018 PR0("Invalid client connection state"); 40731ae08745Sheppo return (ENOTSUP); 40741ae08745Sheppo } 40751ae08745Sheppo } 40761ae08745Sheppo 4077d10e4ef2Snarayan static int 40783af08d82Slm66018 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 40791ae08745Sheppo { 40801ae08745Sheppo int status; 40811ae08745Sheppo boolean_t reset_ldc = B_FALSE; 4082205eeb1aSlm66018 vd_task_t task; 40831ae08745Sheppo 40841ae08745Sheppo /* 40851ae08745Sheppo * Check that the message is at least big enough for a "tag", so that 40861ae08745Sheppo * message processing can proceed based on tag-specified message type 40871ae08745Sheppo */ 40881ae08745Sheppo if (msglen < sizeof (vio_msg_tag_t)) { 40893af08d82Slm66018 PR0("Received short (%lu-byte) message", msglen); 40901ae08745Sheppo /* Can't "nack" short message, so drop the big hammer */ 40913af08d82Slm66018 PR0("initiating full reset"); 4092d10e4ef2Snarayan vd_need_reset(vd, B_TRUE); 4093d10e4ef2Snarayan return (EBADMSG); 40941ae08745Sheppo } 40951ae08745Sheppo 40961ae08745Sheppo /* 40971ae08745Sheppo * Process the message 40981ae08745Sheppo */ 40993af08d82Slm66018 switch (status = vd_do_process_msg(vd, msg, msglen)) { 41001ae08745Sheppo case 0: 41011ae08745Sheppo /* "ack" valid, successfully-processed messages */ 41021ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 41031ae08745Sheppo break; 41041ae08745Sheppo 4105d10e4ef2Snarayan case EINPROGRESS: 4106d10e4ef2Snarayan /* The completion handler will "ack" or "nack" the message */ 4107d10e4ef2Snarayan return (EINPROGRESS); 41081ae08745Sheppo case ENOMSG: 41093af08d82Slm66018 PR0("Received unexpected message"); 41101ae08745Sheppo _NOTE(FALLTHROUGH); 41111ae08745Sheppo case EBADMSG: 41121ae08745Sheppo case ENOTSUP: 4113205eeb1aSlm66018 /* "transport" error will cause NACK of invalid messages */ 41141ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 41151ae08745Sheppo break; 41161ae08745Sheppo 41171ae08745Sheppo default: 4118205eeb1aSlm66018 /* "transport" error will cause NACK of invalid messages */ 41191ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 41201ae08745Sheppo /* An LDC error probably occurred, so try resetting it */ 41211ae08745Sheppo reset_ldc = B_TRUE; 41221ae08745Sheppo break; 41231ae08745Sheppo } 41241ae08745Sheppo 41253af08d82Slm66018 PR1("\tResulting in state %d (%s)", vd->state, 41263af08d82Slm66018 vd_decode_state(vd->state)); 41273af08d82Slm66018 4128205eeb1aSlm66018 /* populate the task so we can dispatch it on the taskq */ 4129205eeb1aSlm66018 task.vd = vd; 4130205eeb1aSlm66018 task.msg = msg; 4131205eeb1aSlm66018 task.msglen = msglen; 4132205eeb1aSlm66018 4133205eeb1aSlm66018 /* 4134205eeb1aSlm66018 * Queue a task to send the notification that the operation completed. 4135205eeb1aSlm66018 * We need to ensure that requests are responded to in the correct 4136205eeb1aSlm66018 * order and since the taskq is processed serially this ordering 4137205eeb1aSlm66018 * is maintained. 4138205eeb1aSlm66018 */ 4139205eeb1aSlm66018 (void) ddi_taskq_dispatch(vd->completionq, vd_serial_notify, 4140205eeb1aSlm66018 &task, DDI_SLEEP); 4141205eeb1aSlm66018 4142205eeb1aSlm66018 /* 4143205eeb1aSlm66018 * To ensure handshake negotiations do not happen out of order, such 4144205eeb1aSlm66018 * requests that come through this path should not be done in parallel 4145205eeb1aSlm66018 * so we need to wait here until the response is sent to the client. 4146205eeb1aSlm66018 */ 4147205eeb1aSlm66018 ddi_taskq_wait(vd->completionq); 41481ae08745Sheppo 4149d10e4ef2Snarayan /* Arrange to reset the connection for nack'ed or failed messages */ 41503af08d82Slm66018 if ((status != 0) || reset_ldc) { 41513af08d82Slm66018 PR0("initiating %s reset", 41523af08d82Slm66018 (reset_ldc) ? "full" : "soft"); 4153d10e4ef2Snarayan vd_need_reset(vd, reset_ldc); 41543af08d82Slm66018 } 4155d10e4ef2Snarayan 4156d10e4ef2Snarayan return (status); 4157d10e4ef2Snarayan } 4158d10e4ef2Snarayan 4159d10e4ef2Snarayan static boolean_t 4160d10e4ef2Snarayan vd_enabled(vd_t *vd) 4161d10e4ef2Snarayan { 4162d10e4ef2Snarayan boolean_t enabled; 4163d10e4ef2Snarayan 4164d10e4ef2Snarayan mutex_enter(&vd->lock); 4165d10e4ef2Snarayan enabled = vd->enabled; 4166d10e4ef2Snarayan mutex_exit(&vd->lock); 4167d10e4ef2Snarayan return (enabled); 41681ae08745Sheppo } 41691ae08745Sheppo 41701ae08745Sheppo static void 41710a55fbb7Slm66018 vd_recv_msg(void *arg) 41721ae08745Sheppo { 41731ae08745Sheppo vd_t *vd = (vd_t *)arg; 41743af08d82Slm66018 int rv = 0, status = 0; 41751ae08745Sheppo 41761ae08745Sheppo ASSERT(vd != NULL); 41773af08d82Slm66018 4178d10e4ef2Snarayan PR2("New task to receive incoming message(s)"); 41793af08d82Slm66018 41803af08d82Slm66018 4181d10e4ef2Snarayan while (vd_enabled(vd) && status == 0) { 4182d10e4ef2Snarayan size_t msglen, msgsize; 41833af08d82Slm66018 ldc_status_t lstatus; 4184d10e4ef2Snarayan 41850a55fbb7Slm66018 /* 4186d10e4ef2Snarayan * Receive and process a message 41870a55fbb7Slm66018 */ 4188d10e4ef2Snarayan vd_reset_if_needed(vd); /* can change vd->max_msglen */ 41893af08d82Slm66018 41903af08d82Slm66018 /* 41913af08d82Slm66018 * check if channel is UP - else break out of loop 41923af08d82Slm66018 */ 41933af08d82Slm66018 status = ldc_status(vd->ldc_handle, &lstatus); 41943af08d82Slm66018 if (lstatus != LDC_UP) { 41953af08d82Slm66018 PR0("channel not up (status=%d), exiting recv loop\n", 41963af08d82Slm66018 lstatus); 41973af08d82Slm66018 break; 41983af08d82Slm66018 } 41993af08d82Slm66018 42003af08d82Slm66018 ASSERT(vd->max_msglen != 0); 42013af08d82Slm66018 4202d10e4ef2Snarayan msgsize = vd->max_msglen; /* stable copy for alloc/free */ 42033af08d82Slm66018 msglen = msgsize; /* actual len after recv_msg() */ 42043af08d82Slm66018 42053af08d82Slm66018 status = recv_msg(vd->ldc_handle, vd->vio_msgp, &msglen); 42063af08d82Slm66018 switch (status) { 42073af08d82Slm66018 case 0: 42083af08d82Slm66018 rv = vd_process_msg(vd, (vio_msg_t *)vd->vio_msgp, 42093af08d82Slm66018 msglen); 42103af08d82Slm66018 /* check if max_msglen changed */ 42113af08d82Slm66018 if (msgsize != vd->max_msglen) { 42123af08d82Slm66018 PR0("max_msglen changed 0x%lx to 0x%lx bytes\n", 42133af08d82Slm66018 msgsize, vd->max_msglen); 42143af08d82Slm66018 kmem_free(vd->vio_msgp, msgsize); 42153af08d82Slm66018 vd->vio_msgp = 42163af08d82Slm66018 kmem_alloc(vd->max_msglen, KM_SLEEP); 42173af08d82Slm66018 } 42183af08d82Slm66018 if (rv == EINPROGRESS) 42193af08d82Slm66018 continue; 42203af08d82Slm66018 break; 42213af08d82Slm66018 42223af08d82Slm66018 case ENOMSG: 42233af08d82Slm66018 break; 42243af08d82Slm66018 42253af08d82Slm66018 case ECONNRESET: 42263af08d82Slm66018 PR0("initiating soft reset (ECONNRESET)\n"); 42273af08d82Slm66018 vd_need_reset(vd, B_FALSE); 42283af08d82Slm66018 status = 0; 42293af08d82Slm66018 break; 42303af08d82Slm66018 42313af08d82Slm66018 default: 4232d10e4ef2Snarayan /* Probably an LDC failure; arrange to reset it */ 42333af08d82Slm66018 PR0("initiating full reset (status=0x%x)", status); 4234d10e4ef2Snarayan vd_need_reset(vd, B_TRUE); 42353af08d82Slm66018 break; 42360a55fbb7Slm66018 } 42371ae08745Sheppo } 42383af08d82Slm66018 4239d10e4ef2Snarayan PR2("Task finished"); 42400a55fbb7Slm66018 } 42410a55fbb7Slm66018 42420a55fbb7Slm66018 static uint_t 42431ae08745Sheppo vd_handle_ldc_events(uint64_t event, caddr_t arg) 42441ae08745Sheppo { 42451ae08745Sheppo vd_t *vd = (vd_t *)(void *)arg; 42463af08d82Slm66018 int status; 42471ae08745Sheppo 42481ae08745Sheppo ASSERT(vd != NULL); 4249d10e4ef2Snarayan 4250d10e4ef2Snarayan if (!vd_enabled(vd)) 4251d10e4ef2Snarayan return (LDC_SUCCESS); 4252d10e4ef2Snarayan 42533af08d82Slm66018 if (event & LDC_EVT_DOWN) { 425434683adeSsg70180 PR0("LDC_EVT_DOWN: LDC channel went down"); 42553af08d82Slm66018 42563af08d82Slm66018 vd_need_reset(vd, B_TRUE); 42573af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 42583af08d82Slm66018 DDI_SLEEP); 42593af08d82Slm66018 if (status == DDI_FAILURE) { 42603af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 42613af08d82Slm66018 vd_need_reset(vd, B_TRUE); 42623af08d82Slm66018 } 42633af08d82Slm66018 } 42643af08d82Slm66018 4265d10e4ef2Snarayan if (event & LDC_EVT_RESET) { 42663af08d82Slm66018 PR0("LDC_EVT_RESET: LDC channel was reset"); 42673af08d82Slm66018 42683af08d82Slm66018 if (vd->state != VD_STATE_INIT) { 42693af08d82Slm66018 PR0("scheduling full reset"); 42703af08d82Slm66018 vd_need_reset(vd, B_FALSE); 42713af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 42723af08d82Slm66018 vd, DDI_SLEEP); 42733af08d82Slm66018 if (status == DDI_FAILURE) { 42743af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 42753af08d82Slm66018 vd_need_reset(vd, B_TRUE); 42763af08d82Slm66018 } 42773af08d82Slm66018 42783af08d82Slm66018 } else { 42793af08d82Slm66018 PR0("channel already reset, ignoring...\n"); 42803af08d82Slm66018 PR0("doing ldc up...\n"); 42813af08d82Slm66018 (void) ldc_up(vd->ldc_handle); 42823af08d82Slm66018 } 42833af08d82Slm66018 4284d10e4ef2Snarayan return (LDC_SUCCESS); 4285d10e4ef2Snarayan } 4286d10e4ef2Snarayan 4287d10e4ef2Snarayan if (event & LDC_EVT_UP) { 42883af08d82Slm66018 PR0("EVT_UP: LDC is up\nResetting client connection state"); 42893af08d82Slm66018 PR0("initiating soft reset"); 4290d10e4ef2Snarayan vd_need_reset(vd, B_FALSE); 42913af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 42923af08d82Slm66018 vd, DDI_SLEEP); 42933af08d82Slm66018 if (status == DDI_FAILURE) { 42943af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 42953af08d82Slm66018 vd_need_reset(vd, B_TRUE); 42963af08d82Slm66018 return (LDC_SUCCESS); 42973af08d82Slm66018 } 4298d10e4ef2Snarayan } 4299d10e4ef2Snarayan 4300d10e4ef2Snarayan if (event & LDC_EVT_READ) { 4301d10e4ef2Snarayan int status; 4302d10e4ef2Snarayan 4303d10e4ef2Snarayan PR1("New data available"); 4304d10e4ef2Snarayan /* Queue a task to receive the new data */ 4305d10e4ef2Snarayan status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 4306d10e4ef2Snarayan DDI_SLEEP); 43073af08d82Slm66018 43083af08d82Slm66018 if (status == DDI_FAILURE) { 43093af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 43103af08d82Slm66018 vd_need_reset(vd, B_TRUE); 43113af08d82Slm66018 } 4312d10e4ef2Snarayan } 4313d10e4ef2Snarayan 4314d10e4ef2Snarayan return (LDC_SUCCESS); 43151ae08745Sheppo } 43161ae08745Sheppo 43171ae08745Sheppo static uint_t 43181ae08745Sheppo vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 43191ae08745Sheppo { 43201ae08745Sheppo _NOTE(ARGUNUSED(key, val)) 43211ae08745Sheppo (*((uint_t *)arg))++; 43221ae08745Sheppo return (MH_WALK_TERMINATE); 43231ae08745Sheppo } 43241ae08745Sheppo 43251ae08745Sheppo 43261ae08745Sheppo static int 43271ae08745Sheppo vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 43281ae08745Sheppo { 43291ae08745Sheppo uint_t vd_present = 0; 43301ae08745Sheppo minor_t instance; 43311ae08745Sheppo vds_t *vds; 43321ae08745Sheppo 43331ae08745Sheppo 43341ae08745Sheppo switch (cmd) { 43351ae08745Sheppo case DDI_DETACH: 43361ae08745Sheppo /* the real work happens below */ 43371ae08745Sheppo break; 43381ae08745Sheppo case DDI_SUSPEND: 4339d10e4ef2Snarayan PR0("No action required for DDI_SUSPEND"); 43401ae08745Sheppo return (DDI_SUCCESS); 43411ae08745Sheppo default: 43423af08d82Slm66018 PR0("Unrecognized \"cmd\""); 43431ae08745Sheppo return (DDI_FAILURE); 43441ae08745Sheppo } 43451ae08745Sheppo 43461ae08745Sheppo ASSERT(cmd == DDI_DETACH); 43471ae08745Sheppo instance = ddi_get_instance(dip); 43481ae08745Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 43493af08d82Slm66018 PR0("Could not get state for instance %u", instance); 43501ae08745Sheppo ddi_soft_state_free(vds_state, instance); 43511ae08745Sheppo return (DDI_FAILURE); 43521ae08745Sheppo } 43531ae08745Sheppo 43541ae08745Sheppo /* Do no detach when serving any vdisks */ 43551ae08745Sheppo mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 43561ae08745Sheppo if (vd_present) { 43571ae08745Sheppo PR0("Not detaching because serving vdisks"); 43581ae08745Sheppo return (DDI_FAILURE); 43591ae08745Sheppo } 43601ae08745Sheppo 43611ae08745Sheppo PR0("Detaching"); 4362445b4c2eSsb155480 if (vds->initialized & VDS_MDEG) { 43631ae08745Sheppo (void) mdeg_unregister(vds->mdeg); 4364445b4c2eSsb155480 kmem_free(vds->ispecp->specp, sizeof (vds_prop_template)); 4365445b4c2eSsb155480 kmem_free(vds->ispecp, sizeof (mdeg_node_spec_t)); 4366445b4c2eSsb155480 vds->ispecp = NULL; 4367445b4c2eSsb155480 vds->mdeg = NULL; 4368445b4c2eSsb155480 } 4369445b4c2eSsb155480 43701ae08745Sheppo if (vds->initialized & VDS_LDI) 43711ae08745Sheppo (void) ldi_ident_release(vds->ldi_ident); 43721ae08745Sheppo mod_hash_destroy_hash(vds->vd_table); 43731ae08745Sheppo ddi_soft_state_free(vds_state, instance); 43741ae08745Sheppo return (DDI_SUCCESS); 43751ae08745Sheppo } 43761ae08745Sheppo 43771ae08745Sheppo static boolean_t 43781ae08745Sheppo is_pseudo_device(dev_info_t *dip) 43791ae08745Sheppo { 43801ae08745Sheppo dev_info_t *parent, *root = ddi_root_node(); 43811ae08745Sheppo 43821ae08745Sheppo 43831ae08745Sheppo for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root); 43841ae08745Sheppo parent = ddi_get_parent(parent)) { 43851ae08745Sheppo if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0) 43861ae08745Sheppo return (B_TRUE); 43871ae08745Sheppo } 43881ae08745Sheppo 43891ae08745Sheppo return (B_FALSE); 43901ae08745Sheppo } 43911ae08745Sheppo 439217cadca8Slm66018 /* 439317cadca8Slm66018 * Description: 439417cadca8Slm66018 * This function checks to see if the file being used as a 439517cadca8Slm66018 * virtual disk is an ISO image. An ISO image is a special 439617cadca8Slm66018 * case which can be booted/installed from like a CD/DVD 439717cadca8Slm66018 * 439817cadca8Slm66018 * Parameters: 439917cadca8Slm66018 * vd - disk on which the operation is performed. 440017cadca8Slm66018 * 440117cadca8Slm66018 * Return Code: 440217cadca8Slm66018 * B_TRUE - The file is an ISO 9660 compliant image 440317cadca8Slm66018 * B_FALSE - just a regular disk image file 440417cadca8Slm66018 */ 440517cadca8Slm66018 static boolean_t 440617cadca8Slm66018 vd_file_is_iso_image(vd_t *vd) 440717cadca8Slm66018 { 440817cadca8Slm66018 char iso_buf[ISO_SECTOR_SIZE]; 440917cadca8Slm66018 int i, rv; 441017cadca8Slm66018 uint_t sec; 441117cadca8Slm66018 441217cadca8Slm66018 ASSERT(vd->file); 441317cadca8Slm66018 441417cadca8Slm66018 /* 441517cadca8Slm66018 * If we have already discovered and saved this info we can 441617cadca8Slm66018 * short-circuit the check and avoid reading the file. 441717cadca8Slm66018 */ 441817cadca8Slm66018 if (vd->vdisk_media == VD_MEDIA_DVD || vd->vdisk_media == VD_MEDIA_CD) 441917cadca8Slm66018 return (B_TRUE); 442017cadca8Slm66018 442117cadca8Slm66018 /* 442217cadca8Slm66018 * We wish to read the sector that should contain the 2nd ISO volume 442317cadca8Slm66018 * descriptor. The second field in this descriptor is called the 442417cadca8Slm66018 * Standard Identifier and is set to CD001 for a CD-ROM compliant 442517cadca8Slm66018 * to the ISO 9660 standard. 442617cadca8Slm66018 */ 442717cadca8Slm66018 sec = (ISO_VOLDESC_SEC * ISO_SECTOR_SIZE) / vd->vdisk_block_size; 442817cadca8Slm66018 rv = vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)iso_buf, 442917cadca8Slm66018 sec, ISO_SECTOR_SIZE); 443017cadca8Slm66018 443117cadca8Slm66018 if (rv < 0) 443217cadca8Slm66018 return (B_FALSE); 443317cadca8Slm66018 443417cadca8Slm66018 for (i = 0; i < ISO_ID_STRLEN; i++) { 443517cadca8Slm66018 if (ISO_STD_ID(iso_buf)[i] != ISO_ID_STRING[i]) 443617cadca8Slm66018 return (B_FALSE); 443717cadca8Slm66018 } 443817cadca8Slm66018 443917cadca8Slm66018 return (B_TRUE); 444017cadca8Slm66018 } 444117cadca8Slm66018 444217cadca8Slm66018 /* 444317cadca8Slm66018 * Description: 444417cadca8Slm66018 * This function checks to see if the virtual device is an ATAPI 444517cadca8Slm66018 * device. ATAPI devices use Group 1 Read/Write commands, so 444617cadca8Slm66018 * any USCSI calls vds makes need to take this into account. 444717cadca8Slm66018 * 444817cadca8Slm66018 * Parameters: 444917cadca8Slm66018 * vd - disk on which the operation is performed. 445017cadca8Slm66018 * 445117cadca8Slm66018 * Return Code: 445217cadca8Slm66018 * B_TRUE - The virtual disk is backed by an ATAPI device 445317cadca8Slm66018 * B_FALSE - not an ATAPI device (presumably SCSI) 445417cadca8Slm66018 */ 445517cadca8Slm66018 static boolean_t 445617cadca8Slm66018 vd_is_atapi_device(vd_t *vd) 445717cadca8Slm66018 { 445817cadca8Slm66018 boolean_t is_atapi = B_FALSE; 445917cadca8Slm66018 char *variantp; 446017cadca8Slm66018 int rv; 446117cadca8Slm66018 446217cadca8Slm66018 ASSERT(vd->ldi_handle[0] != NULL); 446317cadca8Slm66018 ASSERT(!vd->file); 446417cadca8Slm66018 446517cadca8Slm66018 rv = ldi_prop_lookup_string(vd->ldi_handle[0], 446617cadca8Slm66018 (LDI_DEV_T_ANY | DDI_PROP_DONTPASS), "variant", &variantp); 446717cadca8Slm66018 if (rv == DDI_PROP_SUCCESS) { 446817cadca8Slm66018 PR0("'variant' property exists for %s", vd->device_path); 446917cadca8Slm66018 if (strcmp(variantp, "atapi") == 0) 447017cadca8Slm66018 is_atapi = B_TRUE; 447117cadca8Slm66018 ddi_prop_free(variantp); 447217cadca8Slm66018 } 447317cadca8Slm66018 447417cadca8Slm66018 rv = ldi_prop_exists(vd->ldi_handle[0], LDI_DEV_T_ANY, "atapi"); 447517cadca8Slm66018 if (rv) { 447617cadca8Slm66018 PR0("'atapi' property exists for %s", vd->device_path); 447717cadca8Slm66018 is_atapi = B_TRUE; 447817cadca8Slm66018 } 447917cadca8Slm66018 448017cadca8Slm66018 return (is_atapi); 448117cadca8Slm66018 } 448217cadca8Slm66018 44831ae08745Sheppo static int 44842f5224aeSachartre vd_setup_mediainfo(vd_t *vd) 44850a55fbb7Slm66018 { 44862f5224aeSachartre int status, rval; 44874bac2208Snarayan struct dk_minfo dk_minfo; 44880a55fbb7Slm66018 44892f5224aeSachartre ASSERT(vd->ldi_handle[0] != NULL); 44902f5224aeSachartre ASSERT(vd->vdisk_block_size != 0); 44912f5224aeSachartre 44922f5224aeSachartre if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, 44932f5224aeSachartre (intptr_t)&dk_minfo, (vd->open_flags | FKIOCTL), 44942f5224aeSachartre kcred, &rval)) != 0) 44952f5224aeSachartre return (status); 44962f5224aeSachartre 44972f5224aeSachartre ASSERT(dk_minfo.dki_lbsize % vd->vdisk_block_size == 0); 44982f5224aeSachartre 44992f5224aeSachartre vd->block_size = dk_minfo.dki_lbsize; 45002f5224aeSachartre vd->vdisk_size = (dk_minfo.dki_capacity * dk_minfo.dki_lbsize) / 45012f5224aeSachartre vd->vdisk_block_size; 45022f5224aeSachartre vd->vdisk_media = DK_MEDIATYPE2VD_MEDIATYPE(dk_minfo.dki_media_type); 45032f5224aeSachartre return (0); 45042f5224aeSachartre } 45052f5224aeSachartre 45062f5224aeSachartre static int 45072f5224aeSachartre vd_setup_full_disk(vd_t *vd) 45082f5224aeSachartre { 45092f5224aeSachartre int status; 45102f5224aeSachartre major_t major = getmajor(vd->dev[0]); 45112f5224aeSachartre minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 45122f5224aeSachartre 4513047ba61eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); 4514047ba61eSachartre 45152f5224aeSachartre vd->vdisk_block_size = DEV_BSIZE; 45162f5224aeSachartre 45174bac2208Snarayan /* 45184bac2208Snarayan * At this point, vdisk_size is set to the size of partition 2 but 45194bac2208Snarayan * this does not represent the size of the disk because partition 2 45204bac2208Snarayan * may not cover the entire disk and its size does not include reserved 45212f5224aeSachartre * blocks. So we call vd_get_mediainfo to udpate this information and 45222f5224aeSachartre * set the block size and the media type of the disk. 45234bac2208Snarayan */ 45242f5224aeSachartre status = vd_setup_mediainfo(vd); 45252f5224aeSachartre 45262f5224aeSachartre if (status != 0) { 45272f5224aeSachartre if (!vd->scsi) { 45282f5224aeSachartre /* unexpected failure */ 4529690555a1Sachartre PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", 45304bac2208Snarayan status); 45310a55fbb7Slm66018 return (status); 45320a55fbb7Slm66018 } 45332f5224aeSachartre 45342f5224aeSachartre /* 45352f5224aeSachartre * The function can fail for SCSI disks which are present but 45362f5224aeSachartre * reserved by another system. In that case, we don't know the 45372f5224aeSachartre * size of the disk and the block size. 45382f5224aeSachartre */ 45392f5224aeSachartre vd->vdisk_size = VD_SIZE_UNKNOWN; 45402f5224aeSachartre vd->block_size = 0; 45412f5224aeSachartre vd->vdisk_media = VD_MEDIA_FIXED; 45422f5224aeSachartre } 45430a55fbb7Slm66018 45440a55fbb7Slm66018 /* Move dev number and LDI handle to entire-disk-slice array elements */ 45450a55fbb7Slm66018 vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; 45460a55fbb7Slm66018 vd->dev[0] = 0; 45470a55fbb7Slm66018 vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; 45480a55fbb7Slm66018 vd->ldi_handle[0] = NULL; 45490a55fbb7Slm66018 45500a55fbb7Slm66018 /* Initialize device numbers for remaining slices and open them */ 45510a55fbb7Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 45520a55fbb7Slm66018 /* 45530a55fbb7Slm66018 * Skip the entire-disk slice, as it's already open and its 45540a55fbb7Slm66018 * device known 45550a55fbb7Slm66018 */ 45560a55fbb7Slm66018 if (slice == VD_ENTIRE_DISK_SLICE) 45570a55fbb7Slm66018 continue; 45580a55fbb7Slm66018 ASSERT(vd->dev[slice] == 0); 45590a55fbb7Slm66018 ASSERT(vd->ldi_handle[slice] == NULL); 45600a55fbb7Slm66018 45610a55fbb7Slm66018 /* 45620a55fbb7Slm66018 * Construct the device number for the current slice 45630a55fbb7Slm66018 */ 45640a55fbb7Slm66018 vd->dev[slice] = makedevice(major, (minor + slice)); 45650a55fbb7Slm66018 45660a55fbb7Slm66018 /* 456734683adeSsg70180 * Open all slices of the disk to serve them to the client. 456834683adeSsg70180 * Slices are opened exclusively to prevent other threads or 456934683adeSsg70180 * processes in the service domain from performing I/O to 457034683adeSsg70180 * slices being accessed by a client. Failure to open a slice 457134683adeSsg70180 * results in vds not serving this disk, as the client could 457234683adeSsg70180 * attempt (and should be able) to access any slice immediately. 457334683adeSsg70180 * Any slices successfully opened before a failure will get 457434683adeSsg70180 * closed by vds_destroy_vd() as a result of the error returned 457534683adeSsg70180 * by this function. 457634683adeSsg70180 * 457734683adeSsg70180 * We need to do the open with FNDELAY so that opening an empty 457834683adeSsg70180 * slice does not fail. 45790a55fbb7Slm66018 */ 45800a55fbb7Slm66018 PR0("Opening device major %u, minor %u = slice %u", 45810a55fbb7Slm66018 major, minor, slice); 4582047ba61eSachartre 4583047ba61eSachartre /* 4584047ba61eSachartre * Try to open the device. This can fail for example if we are 4585047ba61eSachartre * opening an empty slice. So in case of a failure, we try the 4586047ba61eSachartre * open again but this time with the FNDELAY flag. 4587047ba61eSachartre */ 4588047ba61eSachartre status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 4589047ba61eSachartre vd->open_flags, kcred, &vd->ldi_handle[slice], 4590047ba61eSachartre vd->vds->ldi_ident); 4591047ba61eSachartre 4592047ba61eSachartre if (status != 0) { 4593047ba61eSachartre status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 4594047ba61eSachartre vd->open_flags | FNDELAY, kcred, 4595047ba61eSachartre &vd->ldi_handle[slice], vd->vds->ldi_ident); 4596047ba61eSachartre } 4597047ba61eSachartre 4598047ba61eSachartre if (status != 0) { 4599690555a1Sachartre PRN("ldi_open_by_dev() returned errno %d " 46000a55fbb7Slm66018 "for slice %u", status, slice); 46010a55fbb7Slm66018 /* vds_destroy_vd() will close any open slices */ 4602690555a1Sachartre vd->ldi_handle[slice] = NULL; 46030a55fbb7Slm66018 return (status); 46040a55fbb7Slm66018 } 46050a55fbb7Slm66018 } 46060a55fbb7Slm66018 46070a55fbb7Slm66018 return (0); 46080a55fbb7Slm66018 } 46090a55fbb7Slm66018 4610edcc0754Sachartre /* 4611edcc0754Sachartre * When a slice or a volume is exported as a single-slice disk, we want 4612edcc0754Sachartre * the disk backend (i.e. the slice or volume) to be entirely mapped as 4613edcc0754Sachartre * a slice without the addition of any metadata. 4614edcc0754Sachartre * 4615edcc0754Sachartre * So when exporting the disk as a VTOC disk, we fake a disk with the following 4616edcc0754Sachartre * layout: 4617edcc0754Sachartre * 4618edcc0754Sachartre * 0 1 N+1 4619edcc0754Sachartre * +-+--------------------------+ 4620edcc0754Sachartre * virtual disk: |L| slice 0 | 4621edcc0754Sachartre * +-+--------------------------+ 4622edcc0754Sachartre * ^: : 4623edcc0754Sachartre * |: : 4624edcc0754Sachartre * VTOC LABEL--+: : 4625edcc0754Sachartre * +--------------------------+ 4626edcc0754Sachartre * disk backend: | slice/volume | 4627edcc0754Sachartre * +--------------------------+ 4628edcc0754Sachartre * 0 N 4629edcc0754Sachartre * 4630edcc0754Sachartre * N is the number of blocks in the slice/volume. 4631edcc0754Sachartre * 4632edcc0754Sachartre * We simulate a disk with N+1 blocks. The first block (block 0) is faked and 4633edcc0754Sachartre * can not be changed. The remaining blocks (1 to N+1) defines slice 0 and are 4634edcc0754Sachartre * mapped to the exported slice or volume: 4635edcc0754Sachartre * 4636edcc0754Sachartre * - block 0 (L) can return a fake VTOC label if raw read was implemented. 4637edcc0754Sachartre * - block 1 to N+1 is mapped to the exported slice or volume. 4638edcc0754Sachartre * 4639edcc0754Sachartre */ 46400a55fbb7Slm66018 static int 464178fcd0a1Sachartre vd_setup_partition_vtoc(vd_t *vd) 464278fcd0a1Sachartre { 464378fcd0a1Sachartre int rval, status; 464478fcd0a1Sachartre char *device_path = vd->device_path; 464578fcd0a1Sachartre 464678fcd0a1Sachartre status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, 4647047ba61eSachartre (intptr_t)&vd->dk_geom, (vd->open_flags | FKIOCTL), kcred, &rval); 464878fcd0a1Sachartre 464978fcd0a1Sachartre if (status != 0) { 465078fcd0a1Sachartre PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", 465178fcd0a1Sachartre status, device_path); 465278fcd0a1Sachartre return (status); 465378fcd0a1Sachartre } 465478fcd0a1Sachartre 465578fcd0a1Sachartre /* Initialize dk_geom structure for single-slice device */ 465678fcd0a1Sachartre if (vd->dk_geom.dkg_nsect == 0) { 465778fcd0a1Sachartre PRN("%s geometry claims 0 sectors per track", device_path); 465878fcd0a1Sachartre return (EIO); 465978fcd0a1Sachartre } 466078fcd0a1Sachartre if (vd->dk_geom.dkg_nhead == 0) { 466178fcd0a1Sachartre PRN("%s geometry claims 0 heads", device_path); 466278fcd0a1Sachartre return (EIO); 466378fcd0a1Sachartre } 4664edcc0754Sachartre vd->dk_geom.dkg_ncyl = (vd->vdisk_size + 1) / vd->dk_geom.dkg_nsect / 466578fcd0a1Sachartre vd->dk_geom.dkg_nhead; 466678fcd0a1Sachartre vd->dk_geom.dkg_acyl = 0; 466778fcd0a1Sachartre vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 466878fcd0a1Sachartre 466978fcd0a1Sachartre 467078fcd0a1Sachartre /* Initialize vtoc structure for single-slice device */ 467178fcd0a1Sachartre bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 467278fcd0a1Sachartre MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 467378fcd0a1Sachartre bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 467478fcd0a1Sachartre vd->vtoc.v_nparts = 1; 467578fcd0a1Sachartre vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 467678fcd0a1Sachartre vd->vtoc.v_part[0].p_flag = 0; 4677edcc0754Sachartre vd->vtoc.v_part[0].p_start = 1; 467878fcd0a1Sachartre vd->vtoc.v_part[0].p_size = vd->vdisk_size; 467978fcd0a1Sachartre bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 468078fcd0a1Sachartre MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); 468178fcd0a1Sachartre 4682edcc0754Sachartre /* adjust the vdisk_size, we emulate the first block */ 4683edcc0754Sachartre vd->vdisk_size += 1; 4684edcc0754Sachartre 468578fcd0a1Sachartre return (0); 468678fcd0a1Sachartre } 468778fcd0a1Sachartre 4688edcc0754Sachartre /* 4689edcc0754Sachartre * When a slice, volume or file is exported as a single-slice disk, we want 4690edcc0754Sachartre * the disk backend (i.e. the slice, volume or file) to be entirely mapped 4691edcc0754Sachartre * as a slice without the addition of any metadata. 4692edcc0754Sachartre * 4693edcc0754Sachartre * So when exporting the disk as an EFI disk, we fake a disk with the following 4694edcc0754Sachartre * layout: 4695edcc0754Sachartre * 4696edcc0754Sachartre * 0 1 2 3 34 34+N 4697edcc0754Sachartre * +-+-+-+-------+--------------------------+ 4698edcc0754Sachartre * virtual disk: |X|T|E|XXXXXXX| slice 0 | 4699edcc0754Sachartre * +-+-+-+-------+--------------------------+ 4700edcc0754Sachartre * ^ ^ : : 4701edcc0754Sachartre * | | : : 4702edcc0754Sachartre * GPT-+ +-GPE : : 4703edcc0754Sachartre * +--------------------------+ 4704edcc0754Sachartre * disk backend: | slice/volume/file | 4705edcc0754Sachartre * +--------------------------+ 4706edcc0754Sachartre * 0 N 4707edcc0754Sachartre * 4708edcc0754Sachartre * N is the number of blocks in the slice/volume/file. 4709edcc0754Sachartre * 4710edcc0754Sachartre * We simulate a disk with 34+N blocks. The first 34 blocks (0 to 33) are 4711edcc0754Sachartre * emulated and can not be changed. The remaining blocks (34 to 34+N) defines 4712edcc0754Sachartre * slice 0 and are mapped to the exported slice, volume or file: 4713edcc0754Sachartre * 4714edcc0754Sachartre * - block 0 (X) is unused and can return 0 if raw read was implemented. 4715edcc0754Sachartre * - block 1 (T) returns a fake EFI GPT (via DKIOCGETEFI) 4716edcc0754Sachartre * - block 2 (E) returns a fake EFI GPE (via DKIOCGETEFI) 4717edcc0754Sachartre * - block 3 to 33 (X) are unused and return 0 if raw read is implemented. 4718edcc0754Sachartre * - block 34 to 34+N is mapped to the exported slice, volume or file. 4719edcc0754Sachartre * 4720edcc0754Sachartre */ 472178fcd0a1Sachartre static int 47224bac2208Snarayan vd_setup_partition_efi(vd_t *vd) 47234bac2208Snarayan { 47244bac2208Snarayan efi_gpt_t *gpt; 47254bac2208Snarayan efi_gpe_t *gpe; 4726edcc0754Sachartre struct uuid uuid = EFI_USR; 47274bac2208Snarayan uint32_t crc; 47284bac2208Snarayan 4729edcc0754Sachartre gpt = &vd->efi_gpt; 4730edcc0754Sachartre gpe = &vd->efi_gpe; 47314bac2208Snarayan 4732edcc0754Sachartre bzero(gpt, sizeof (efi_gpt_t)); 4733edcc0754Sachartre bzero(gpe, sizeof (efi_gpe_t)); 4734edcc0754Sachartre 4735edcc0754Sachartre /* adjust the vdisk_size, we emulate the first 34 blocks */ 4736edcc0754Sachartre vd->vdisk_size += 34; 47374bac2208Snarayan 47384bac2208Snarayan gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 47394bac2208Snarayan gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 47404bac2208Snarayan gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); 4741edcc0754Sachartre gpt->efi_gpt_FirstUsableLBA = LE_64(34ULL); 47424bac2208Snarayan gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1); 47434bac2208Snarayan gpt->efi_gpt_NumberOfPartitionEntries = LE_32(1); 4744edcc0754Sachartre gpt->efi_gpt_PartitionEntryLBA = LE_64(2ULL); 47454bac2208Snarayan gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); 47464bac2208Snarayan 47474bac2208Snarayan UUID_LE_CONVERT(gpe->efi_gpe_PartitionTypeGUID, uuid); 47484bac2208Snarayan gpe->efi_gpe_StartingLBA = gpt->efi_gpt_FirstUsableLBA; 47494bac2208Snarayan gpe->efi_gpe_EndingLBA = gpt->efi_gpt_LastUsableLBA; 47504bac2208Snarayan 47514bac2208Snarayan CRC32(crc, gpe, sizeof (efi_gpe_t), -1U, crc32_table); 47524bac2208Snarayan gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 47534bac2208Snarayan 47544bac2208Snarayan CRC32(crc, gpt, sizeof (efi_gpt_t), -1U, crc32_table); 47554bac2208Snarayan gpt->efi_gpt_HeaderCRC32 = LE_32(~crc); 47564bac2208Snarayan 47574bac2208Snarayan return (0); 47584bac2208Snarayan } 47594bac2208Snarayan 4760047ba61eSachartre /* 4761047ba61eSachartre * Setup for a virtual disk whose backend is a file (exported as a single slice 4762047ba61eSachartre * or as a full disk) or a pseudo device (for example a ZFS, SVM or VxVM volume) 4763047ba61eSachartre * exported as a full disk. In these cases, the backend is accessed using the 4764047ba61eSachartre * vnode interface. 4765047ba61eSachartre */ 47664bac2208Snarayan static int 4767047ba61eSachartre vd_setup_backend_vnode(vd_t *vd) 47683c96341aSnarayan { 476978fcd0a1Sachartre int rval, status; 47703c96341aSnarayan vattr_t vattr; 47713c96341aSnarayan dev_t dev; 47723c96341aSnarayan char *file_path = vd->device_path; 47733c96341aSnarayan char dev_path[MAXPATHLEN + 1]; 47743c96341aSnarayan ldi_handle_t lhandle; 47753c96341aSnarayan struct dk_cinfo dk_cinfo; 47763c96341aSnarayan 4777047ba61eSachartre if ((status = vn_open(file_path, UIO_SYSSPACE, vd->open_flags | FOFFMAX, 47783c96341aSnarayan 0, &vd->file_vnode, 0, 0)) != 0) { 4779690555a1Sachartre PRN("vn_open(%s) = errno %d", file_path, status); 47803c96341aSnarayan return (status); 47813c96341aSnarayan } 47823c96341aSnarayan 4783690555a1Sachartre /* 4784690555a1Sachartre * We set vd->file now so that vds_destroy_vd will take care of 4785690555a1Sachartre * closing the file and releasing the vnode in case of an error. 4786690555a1Sachartre */ 4787690555a1Sachartre vd->file = B_TRUE; 4788690555a1Sachartre 47893c96341aSnarayan vattr.va_mask = AT_SIZE; 4790da6c28aaSamw if ((status = VOP_GETATTR(vd->file_vnode, &vattr, 0, kcred, NULL)) 4791da6c28aaSamw != 0) { 4792690555a1Sachartre PRN("VOP_GETATTR(%s) = errno %d", file_path, status); 47933c96341aSnarayan return (EIO); 47943c96341aSnarayan } 47953c96341aSnarayan 47963c96341aSnarayan vd->file_size = vattr.va_size; 47973c96341aSnarayan /* size should be at least sizeof(dk_label) */ 47983c96341aSnarayan if (vd->file_size < sizeof (struct dk_label)) { 47993c96341aSnarayan PRN("Size of file has to be at least %ld bytes", 48003c96341aSnarayan sizeof (struct dk_label)); 48013c96341aSnarayan return (EIO); 48023c96341aSnarayan } 48033c96341aSnarayan 4804690555a1Sachartre if (vd->file_vnode->v_flag & VNOMAP) { 4805690555a1Sachartre PRN("File %s cannot be mapped", file_path); 48063c96341aSnarayan return (EIO); 48073c96341aSnarayan } 48083c96341aSnarayan 48093c96341aSnarayan /* sector size = block size = DEV_BSIZE */ 481017cadca8Slm66018 vd->block_size = DEV_BSIZE; 481117cadca8Slm66018 vd->vdisk_block_size = DEV_BSIZE; 481287a7269eSachartre vd->vdisk_size = vd->file_size / DEV_BSIZE; 48133c96341aSnarayan vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */ 48143c96341aSnarayan 4815047ba61eSachartre /* 4816047ba61eSachartre * Get max_xfer_sz from the device where the file is or from the device 4817047ba61eSachartre * itself if we have a pseudo device. 4818047ba61eSachartre */ 4819047ba61eSachartre dev_path[0] = '\0'; 4820047ba61eSachartre 4821047ba61eSachartre if (vd->pseudo) { 4822047ba61eSachartre status = ldi_open_by_name(file_path, FREAD, kcred, &lhandle, 4823047ba61eSachartre vd->vds->ldi_ident); 4824047ba61eSachartre } else { 48253c96341aSnarayan dev = vd->file_vnode->v_vfsp->vfs_dev; 48263c96341aSnarayan if (ddi_dev_pathname(dev, S_IFBLK, dev_path) == DDI_SUCCESS) { 48273c96341aSnarayan PR0("underlying device = %s\n", dev_path); 48283c96341aSnarayan } 48293c96341aSnarayan 4830047ba61eSachartre status = ldi_open_by_dev(&dev, OTYP_BLK, FREAD, kcred, &lhandle, 4831047ba61eSachartre vd->vds->ldi_ident); 4832047ba61eSachartre } 4833047ba61eSachartre 4834047ba61eSachartre if (status != 0) { 4835047ba61eSachartre PR0("ldi_open() returned errno %d for device %s", 4836047ba61eSachartre status, (dev_path[0] == '\0')? file_path : dev_path); 48373c96341aSnarayan } else { 48383c96341aSnarayan if ((status = ldi_ioctl(lhandle, DKIOCINFO, 4839047ba61eSachartre (intptr_t)&dk_cinfo, (vd->open_flags | FKIOCTL), kcred, 48403c96341aSnarayan &rval)) != 0) { 48413c96341aSnarayan PR0("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 48423c96341aSnarayan status, dev_path); 48433c96341aSnarayan } else { 48443c96341aSnarayan /* 48453c96341aSnarayan * Store the device's max transfer size for 48463c96341aSnarayan * return to the client 48473c96341aSnarayan */ 48483c96341aSnarayan vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 48493c96341aSnarayan } 48503c96341aSnarayan 48513c96341aSnarayan PR0("close the device %s", dev_path); 48523c96341aSnarayan (void) ldi_close(lhandle, FREAD, kcred); 48533c96341aSnarayan } 48543c96341aSnarayan 4855205eeb1aSlm66018 PR0("using file %s, dev %s, max_xfer = %u blks", 48563c96341aSnarayan file_path, dev_path, vd->max_xfer_sz); 48573c96341aSnarayan 4858edcc0754Sachartre if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { 4859edcc0754Sachartre ASSERT(!vd->pseudo); 4860edcc0754Sachartre vd->vdisk_label = VD_DISK_LABEL_EFI; 4861edcc0754Sachartre status = vd_setup_partition_efi(vd); 4862047ba61eSachartre return (0); 4863edcc0754Sachartre } 4864edcc0754Sachartre 4865edcc0754Sachartre /* 4866edcc0754Sachartre * Find and validate the geometry of a disk image. 4867edcc0754Sachartre */ 4868edcc0754Sachartre status = vd_file_validate_geometry(vd); 4869edcc0754Sachartre if (status != 0 && status != EINVAL && status != ENOTSUP) { 4870edcc0754Sachartre PRN("Failed to read label from %s", file_path); 4871edcc0754Sachartre return (EIO); 4872edcc0754Sachartre } 4873edcc0754Sachartre 4874edcc0754Sachartre if (vd_file_is_iso_image(vd)) { 4875edcc0754Sachartre /* 4876edcc0754Sachartre * Indicate whether to call this a CD or DVD from the size 4877edcc0754Sachartre * of the ISO image (images for both drive types are stored 4878edcc0754Sachartre * in the ISO-9600 format). CDs can store up to just under 1Gb 4879edcc0754Sachartre */ 4880edcc0754Sachartre if ((vd->vdisk_size * vd->vdisk_block_size) > 4881edcc0754Sachartre (1024 * 1024 * 1024)) 4882edcc0754Sachartre vd->vdisk_media = VD_MEDIA_DVD; 4883edcc0754Sachartre else 4884edcc0754Sachartre vd->vdisk_media = VD_MEDIA_CD; 4885edcc0754Sachartre } else { 4886edcc0754Sachartre vd->vdisk_media = VD_MEDIA_FIXED; 4887edcc0754Sachartre } 4888edcc0754Sachartre 4889edcc0754Sachartre /* Setup devid for the disk image */ 4890047ba61eSachartre 489178fcd0a1Sachartre if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 489278fcd0a1Sachartre 489387a7269eSachartre status = vd_file_read_devid(vd, &vd->file_devid); 489487a7269eSachartre 489587a7269eSachartre if (status == 0) { 489687a7269eSachartre /* a valid devid was found */ 489787a7269eSachartre return (0); 489887a7269eSachartre } 489987a7269eSachartre 490087a7269eSachartre if (status != EINVAL) { 490187a7269eSachartre /* 490278fcd0a1Sachartre * There was an error while trying to read the devid. 490378fcd0a1Sachartre * So this disk image may have a devid but we are 490478fcd0a1Sachartre * unable to read it. 490587a7269eSachartre */ 490687a7269eSachartre PR0("can not read devid for %s", file_path); 490787a7269eSachartre vd->file_devid = NULL; 490887a7269eSachartre return (0); 490987a7269eSachartre } 491078fcd0a1Sachartre } 491187a7269eSachartre 491287a7269eSachartre /* 491387a7269eSachartre * No valid device id was found so we create one. Note that a failure 491487a7269eSachartre * to create a device id is not fatal and does not prevent the disk 491587a7269eSachartre * image from being attached. 491687a7269eSachartre */ 491787a7269eSachartre PR1("creating devid for %s", file_path); 491887a7269eSachartre 491987a7269eSachartre if (ddi_devid_init(vd->vds->dip, DEVID_FAB, NULL, 0, 492087a7269eSachartre &vd->file_devid) != DDI_SUCCESS) { 492187a7269eSachartre PR0("fail to create devid for %s", file_path); 492287a7269eSachartre vd->file_devid = NULL; 492387a7269eSachartre return (0); 492487a7269eSachartre } 492587a7269eSachartre 492678fcd0a1Sachartre /* 492778fcd0a1Sachartre * Write devid to the disk image. The devid is stored into the disk 492878fcd0a1Sachartre * image if we have a valid label; otherwise the devid will be stored 492978fcd0a1Sachartre * when the user writes a valid label. 493078fcd0a1Sachartre */ 493178fcd0a1Sachartre if (vd->vdisk_label != VD_DISK_LABEL_UNK) { 493287a7269eSachartre if (vd_file_write_devid(vd, vd->file_devid) != 0) { 493387a7269eSachartre PR0("fail to write devid for %s", file_path); 493487a7269eSachartre ddi_devid_free(vd->file_devid); 493587a7269eSachartre vd->file_devid = NULL; 493687a7269eSachartre } 493778fcd0a1Sachartre } 493887a7269eSachartre 49393c96341aSnarayan return (0); 49403c96341aSnarayan } 49413c96341aSnarayan 494217cadca8Slm66018 494317cadca8Slm66018 /* 494417cadca8Slm66018 * Description: 494517cadca8Slm66018 * Open a device using its device path (supplied by ldm(1m)) 494617cadca8Slm66018 * 494717cadca8Slm66018 * Parameters: 494817cadca8Slm66018 * vd - pointer to structure containing the vDisk info 494917cadca8Slm66018 * 495017cadca8Slm66018 * Return Value 495117cadca8Slm66018 * 0 - success 495217cadca8Slm66018 * EIO - Invalid number of partitions 495317cadca8Slm66018 * != 0 - some other non-zero return value from ldi(9F) functions 495417cadca8Slm66018 */ 495517cadca8Slm66018 static int 495617cadca8Slm66018 vd_open_using_ldi_by_name(vd_t *vd) 495717cadca8Slm66018 { 495817cadca8Slm66018 int rval, status, open_flags; 495917cadca8Slm66018 struct dk_cinfo dk_cinfo; 496017cadca8Slm66018 char *device_path = vd->device_path; 496117cadca8Slm66018 496217cadca8Slm66018 /* 496317cadca8Slm66018 * Try to open the device. If the flags indicate that the device should 496417cadca8Slm66018 * be opened write-enabled, we first we try to open it "read-only" 496517cadca8Slm66018 * to see if we have an optical device such as a CD-ROM which, for 496617cadca8Slm66018 * now, we do not permit writes to and thus should not export write 496717cadca8Slm66018 * operations to the client. 496817cadca8Slm66018 * 496917cadca8Slm66018 * Future: if/when we implement support for guest domains writing to 497017cadca8Slm66018 * optical devices we will need to do further checking of the media type 497117cadca8Slm66018 * to distinguish between read-only and writable discs. 497217cadca8Slm66018 */ 497317cadca8Slm66018 if (vd->open_flags & FWRITE) { 497417cadca8Slm66018 open_flags = vd->open_flags & ~FWRITE; 497517cadca8Slm66018 status = ldi_open_by_name(device_path, open_flags, kcred, 497617cadca8Slm66018 &vd->ldi_handle[0], vd->vds->ldi_ident); 497717cadca8Slm66018 497817cadca8Slm66018 if (status == 0) { 497917cadca8Slm66018 /* Verify backing device supports dk_cinfo */ 498017cadca8Slm66018 status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 498117cadca8Slm66018 (intptr_t)&dk_cinfo, (open_flags | FKIOCTL), 498217cadca8Slm66018 kcred, &rval); 498317cadca8Slm66018 if (status != 0) { 498417cadca8Slm66018 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for" 498517cadca8Slm66018 " %s opened as RO", status, device_path); 498617cadca8Slm66018 return (status); 498717cadca8Slm66018 } 498817cadca8Slm66018 498917cadca8Slm66018 if (dk_cinfo.dki_partition >= V_NUMPAR) { 499017cadca8Slm66018 PRN("slice %u >= maximum slice %u for %s", 499117cadca8Slm66018 dk_cinfo.dki_partition, V_NUMPAR, 499217cadca8Slm66018 device_path); 499317cadca8Slm66018 return (EIO); 499417cadca8Slm66018 } 499517cadca8Slm66018 499617cadca8Slm66018 /* 499717cadca8Slm66018 * If this is an optical device then we disable 499817cadca8Slm66018 * write access and return, otherwise we close 499917cadca8Slm66018 * the device and try again with writes enabled. 500017cadca8Slm66018 */ 500117cadca8Slm66018 if (dk_cinfo.dki_ctype == DKC_CDROM) { 500217cadca8Slm66018 vd->open_flags = open_flags; 500317cadca8Slm66018 return (0); 500417cadca8Slm66018 } else { 500517cadca8Slm66018 (void) ldi_close(vd->ldi_handle[0], 500617cadca8Slm66018 open_flags, kcred); 500717cadca8Slm66018 } 500817cadca8Slm66018 } 500917cadca8Slm66018 } 501017cadca8Slm66018 501117cadca8Slm66018 /* Attempt to (re)open device */ 501217cadca8Slm66018 status = ldi_open_by_name(device_path, open_flags, kcred, 501317cadca8Slm66018 &vd->ldi_handle[0], vd->vds->ldi_ident); 501417cadca8Slm66018 501517cadca8Slm66018 /* 501617cadca8Slm66018 * The open can fail for example if we are opening an empty slice. 501717cadca8Slm66018 * In case of a failure, we try the open again but this time with 501817cadca8Slm66018 * the FNDELAY flag. 501917cadca8Slm66018 */ 502017cadca8Slm66018 if (status != 0) 502117cadca8Slm66018 status = ldi_open_by_name(device_path, vd->open_flags | FNDELAY, 502217cadca8Slm66018 kcred, &vd->ldi_handle[0], vd->vds->ldi_ident); 502317cadca8Slm66018 502417cadca8Slm66018 if (status != 0) { 502517cadca8Slm66018 PR0("ldi_open_by_name(%s) = errno %d", device_path, status); 502617cadca8Slm66018 vd->ldi_handle[0] = NULL; 502717cadca8Slm66018 return (status); 502817cadca8Slm66018 } 502917cadca8Slm66018 503017cadca8Slm66018 /* Verify backing device supports dk_cinfo */ 503117cadca8Slm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 503217cadca8Slm66018 (intptr_t)&dk_cinfo, (vd->open_flags | FKIOCTL), kcred, 503317cadca8Slm66018 &rval)) != 0) { 503417cadca8Slm66018 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 503517cadca8Slm66018 status, device_path); 503617cadca8Slm66018 return (status); 503717cadca8Slm66018 } 503817cadca8Slm66018 if (dk_cinfo.dki_partition >= V_NUMPAR) { 503917cadca8Slm66018 PRN("slice %u >= maximum slice %u for %s", 504017cadca8Slm66018 dk_cinfo.dki_partition, V_NUMPAR, device_path); 504117cadca8Slm66018 return (EIO); 504217cadca8Slm66018 } 504317cadca8Slm66018 504417cadca8Slm66018 return (0); 504517cadca8Slm66018 } 504617cadca8Slm66018 504717cadca8Slm66018 5048047ba61eSachartre /* 5049047ba61eSachartre * Setup for a virtual disk which backend is a device (a physical disk, 5050047ba61eSachartre * slice or pseudo device) that is directly exported either as a full disk 5051047ba61eSachartre * for a physical disk or as a slice for a pseudo device or a disk slice. 5052047ba61eSachartre * In these cases, the backend is accessed using the LDI interface. 5053047ba61eSachartre */ 50543c96341aSnarayan static int 5055047ba61eSachartre vd_setup_backend_ldi(vd_t *vd) 50561ae08745Sheppo { 5057e1ebb9ecSlm66018 int rval, status; 50581ae08745Sheppo struct dk_cinfo dk_cinfo; 50593c96341aSnarayan char *device_path = vd->device_path; 50601ae08745Sheppo 506117cadca8Slm66018 status = vd_open_using_ldi_by_name(vd); 5062047ba61eSachartre if (status != 0) { 506317cadca8Slm66018 PR0("Failed to open (%s) = errno %d", device_path, status); 50640a55fbb7Slm66018 return (status); 50650a55fbb7Slm66018 } 50660a55fbb7Slm66018 50673c96341aSnarayan vd->file = B_FALSE; 50684bac2208Snarayan 5069047ba61eSachartre /* Get device number of backing device */ 50700a55fbb7Slm66018 if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { 50711ae08745Sheppo PRN("ldi_get_dev() returned errno %d for %s", 5072e1ebb9ecSlm66018 status, device_path); 50731ae08745Sheppo return (status); 50741ae08745Sheppo } 50751ae08745Sheppo 507678fcd0a1Sachartre /* Verify backing device supports dk_cinfo */ 5077e1ebb9ecSlm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 5078047ba61eSachartre (intptr_t)&dk_cinfo, (vd->open_flags | FKIOCTL), kcred, 5079e1ebb9ecSlm66018 &rval)) != 0) { 5080e1ebb9ecSlm66018 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 5081e1ebb9ecSlm66018 status, device_path); 5082e1ebb9ecSlm66018 return (status); 5083e1ebb9ecSlm66018 } 5084e1ebb9ecSlm66018 if (dk_cinfo.dki_partition >= V_NUMPAR) { 5085e1ebb9ecSlm66018 PRN("slice %u >= maximum slice %u for %s", 5086e1ebb9ecSlm66018 dk_cinfo.dki_partition, V_NUMPAR, device_path); 5087e1ebb9ecSlm66018 return (EIO); 5088e1ebb9ecSlm66018 } 50894bac2208Snarayan 5090e1ebb9ecSlm66018 /* Store the device's max transfer size for return to the client */ 5091e1ebb9ecSlm66018 vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 5092e1ebb9ecSlm66018 5093047ba61eSachartre /* 509417cadca8Slm66018 * We need to work out if it's an ATAPI (IDE CD-ROM) or SCSI device so 509517cadca8Slm66018 * that we can use the correct CDB group when sending USCSI commands. 509617cadca8Slm66018 */ 509717cadca8Slm66018 vd->is_atapi_dev = vd_is_atapi_device(vd); 509817cadca8Slm66018 509917cadca8Slm66018 /* 5100047ba61eSachartre * Export a full disk. 5101047ba61eSachartre * 5102047ba61eSachartre * When we use the LDI interface, we export a device as a full disk 5103047ba61eSachartre * if we have an entire disk slice (slice 2) and if this slice is 5104047ba61eSachartre * exported as a full disk and not as a single slice disk. 510517cadca8Slm66018 * Similarly, we want to use LDI if we are accessing a CD or DVD 510617cadca8Slm66018 * device (even if it isn't s2) 5107047ba61eSachartre * 5108047ba61eSachartre * Note that pseudo devices are exported as full disks using the vnode 5109047ba61eSachartre * interface, not the LDI interface. 5110047ba61eSachartre */ 511117cadca8Slm66018 if ((dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE && 511217cadca8Slm66018 vd->vdisk_type == VD_DISK_TYPE_DISK) || 511317cadca8Slm66018 dk_cinfo.dki_ctype == DKC_CDROM) { 5114047ba61eSachartre ASSERT(!vd->pseudo); 51152f5224aeSachartre if (dk_cinfo.dki_ctype == DKC_SCSI_CCS) 51162f5224aeSachartre vd->scsi = B_TRUE; 5117047ba61eSachartre return (vd_setup_full_disk(vd)); 5118047ba61eSachartre } 5119047ba61eSachartre 5120047ba61eSachartre /* 5121047ba61eSachartre * Export a single slice disk. 5122047ba61eSachartre * 5123047ba61eSachartre * The exported device can be either a pseudo device or a disk slice. If 5124047ba61eSachartre * it is a disk slice different from slice 2 then it is always exported 5125047ba61eSachartre * as a single slice disk even if the "slice" option is not specified. 5126047ba61eSachartre * If it is disk slice 2 or a pseudo device then it is exported as a 5127047ba61eSachartre * single slice disk only if the "slice" option is specified. 5128047ba61eSachartre */ 5129047ba61eSachartre return (vd_setup_single_slice_disk(vd)); 5130047ba61eSachartre } 5131047ba61eSachartre 5132047ba61eSachartre static int 5133047ba61eSachartre vd_setup_single_slice_disk(vd_t *vd) 5134047ba61eSachartre { 5135edcc0754Sachartre int status, rval; 5136047ba61eSachartre char *device_path = vd->device_path; 5137047ba61eSachartre 5138047ba61eSachartre /* Get size of backing device */ 5139047ba61eSachartre if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { 5140047ba61eSachartre PRN("ldi_get_size() failed for %s", device_path); 51411ae08745Sheppo return (EIO); 51421ae08745Sheppo } 5143047ba61eSachartre vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */ 514417cadca8Slm66018 vd->block_size = DEV_BSIZE; 514517cadca8Slm66018 vd->vdisk_block_size = DEV_BSIZE; 514617cadca8Slm66018 vd->vdisk_media = VD_MEDIA_FIXED; 5147047ba61eSachartre 51481ae08745Sheppo if (vd->pseudo) { 5149047ba61eSachartre ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); 515078fcd0a1Sachartre } 51510a55fbb7Slm66018 5152047ba61eSachartre /* 5153047ba61eSachartre * We export the slice as a single slice disk even if the "slice" 5154047ba61eSachartre * option was not specified. 5155047ba61eSachartre */ 51561ae08745Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 51571ae08745Sheppo vd->nslices = 1; 51581ae08745Sheppo 5159edcc0754Sachartre /* 5160edcc0754Sachartre * When exporting a slice or a device as a single slice disk, we don't 5161edcc0754Sachartre * care about any partitioning exposed by the backend. The goal is just 5162edcc0754Sachartre * to export the backend as a flat storage. We provide a fake partition 5163edcc0754Sachartre * table (either a VTOC or EFI), which presents only one slice, to 5164edcc0754Sachartre * accommodate tools expecting a disk label. 5165edcc0754Sachartre * 5166edcc0754Sachartre * We check the label of the backend to export the device as a slice 5167edcc0754Sachartre * using the same type of label (VTOC or EFI). If there is no label 5168edcc0754Sachartre * then we create a fake EFI label. 5169edcc0754Sachartre * 5170edcc0754Sachartre * Note that the partition table we are creating could also be faked 5171edcc0754Sachartre * by the client based on the size of the backend device. 5172edcc0754Sachartre */ 5173edcc0754Sachartre status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, (intptr_t)&vd->vtoc, 5174edcc0754Sachartre (vd->open_flags | FKIOCTL), kcred, &rval); 5175edcc0754Sachartre 5176edcc0754Sachartre if (status == 0) { 5177edcc0754Sachartre /* export with a fake VTOC label */ 5178edcc0754Sachartre vd->vdisk_label = VD_DISK_LABEL_VTOC; 517978fcd0a1Sachartre status = vd_setup_partition_vtoc(vd); 5180edcc0754Sachartre } else { 5181edcc0754Sachartre /* export with a fake EFI label */ 5182edcc0754Sachartre vd->vdisk_label = VD_DISK_LABEL_EFI; 5183edcc0754Sachartre status = vd_setup_partition_efi(vd); 518478fcd0a1Sachartre } 518578fcd0a1Sachartre 51864bac2208Snarayan return (status); 51874bac2208Snarayan } 51881ae08745Sheppo 51891ae08745Sheppo static int 5190047ba61eSachartre vd_setup_vd(vd_t *vd) 5191047ba61eSachartre { 5192047ba61eSachartre int status; 5193047ba61eSachartre dev_info_t *dip; 5194047ba61eSachartre vnode_t *vnp; 5195047ba61eSachartre char *path = vd->device_path; 5196047ba61eSachartre 5197047ba61eSachartre /* make sure the vdisk backend is valid */ 5198047ba61eSachartre if ((status = lookupname(path, UIO_SYSSPACE, 5199047ba61eSachartre FOLLOW, NULLVPP, &vnp)) != 0) { 5200047ba61eSachartre PR0("Cannot lookup %s errno %d", path, status); 5201047ba61eSachartre goto done; 5202047ba61eSachartre } 5203047ba61eSachartre 5204047ba61eSachartre switch (vnp->v_type) { 5205047ba61eSachartre case VREG: 5206047ba61eSachartre /* 5207047ba61eSachartre * Backend is a file so it is exported as a full disk or as a 5208047ba61eSachartre * single slice disk using the vnode interface. 5209047ba61eSachartre */ 5210047ba61eSachartre VN_RELE(vnp); 5211047ba61eSachartre vd->pseudo = B_FALSE; 5212047ba61eSachartre status = vd_setup_backend_vnode(vd); 5213047ba61eSachartre break; 5214047ba61eSachartre 5215047ba61eSachartre case VBLK: 5216047ba61eSachartre case VCHR: 5217047ba61eSachartre /* 5218047ba61eSachartre * Backend is a device. The way it is exported depends on the 5219047ba61eSachartre * type of the device. 5220047ba61eSachartre * 5221047ba61eSachartre * - A pseudo device is exported as a full disk using the vnode 5222047ba61eSachartre * interface or as a single slice disk using the LDI 5223047ba61eSachartre * interface. 5224047ba61eSachartre * 5225047ba61eSachartre * - A disk (represented by the slice 2 of that disk) is 5226047ba61eSachartre * exported as a full disk using the LDI interface. 5227047ba61eSachartre * 5228047ba61eSachartre * - A disk slice (different from slice 2) is always exported 5229047ba61eSachartre * as a single slice disk using the LDI interface. 5230047ba61eSachartre * 5231047ba61eSachartre * - The slice 2 of a disk is exported as a single slice disk 5232047ba61eSachartre * if the "slice" option is specified, otherwise the entire 5233047ba61eSachartre * disk will be exported. In any case, the LDI interface is 5234047ba61eSachartre * used. 5235047ba61eSachartre */ 5236047ba61eSachartre 5237047ba61eSachartre /* check if this is a pseudo device */ 5238047ba61eSachartre if ((dip = ddi_hold_devi_by_instance(getmajor(vnp->v_rdev), 5239047ba61eSachartre dev_to_instance(vnp->v_rdev), 0)) == NULL) { 5240047ba61eSachartre PRN("%s is no longer accessible", path); 5241047ba61eSachartre VN_RELE(vnp); 5242047ba61eSachartre status = EIO; 5243047ba61eSachartre break; 5244047ba61eSachartre } 5245047ba61eSachartre vd->pseudo = is_pseudo_device(dip); 5246047ba61eSachartre ddi_release_devi(dip); 5247047ba61eSachartre VN_RELE(vnp); 5248047ba61eSachartre 52492f5224aeSachartre if (!vd->pseudo) { 52502f5224aeSachartre status = vd_setup_backend_ldi(vd); 52512f5224aeSachartre break; 52522f5224aeSachartre } 52532f5224aeSachartre 5254047ba61eSachartre /* 5255047ba61eSachartre * If this is a pseudo device then its usage depends if the 5256047ba61eSachartre * "slice" option is set or not. If the "slice" option is set 5257047ba61eSachartre * then the pseudo device will be exported as a single slice, 5258047ba61eSachartre * otherwise it will be exported as a full disk. 52592f5224aeSachartre * 52602f5224aeSachartre * For backward compatibility, if vd_volume_force_slice is set 52612f5224aeSachartre * then we always export pseudo devices as slices. 5262047ba61eSachartre */ 52632f5224aeSachartre if (vd_volume_force_slice) { 52642f5224aeSachartre vd->vdisk_type = VD_DISK_TYPE_SLICE; 52652f5224aeSachartre vd->nslices = 1; 52662f5224aeSachartre } 52672f5224aeSachartre 52682f5224aeSachartre if (vd->vdisk_type == VD_DISK_TYPE_DISK) 5269047ba61eSachartre status = vd_setup_backend_vnode(vd); 5270047ba61eSachartre else 5271047ba61eSachartre status = vd_setup_backend_ldi(vd); 5272047ba61eSachartre break; 5273047ba61eSachartre 5274047ba61eSachartre default: 5275047ba61eSachartre PRN("Unsupported vdisk backend %s", path); 5276047ba61eSachartre VN_RELE(vnp); 5277047ba61eSachartre status = EBADF; 5278047ba61eSachartre } 5279047ba61eSachartre 5280047ba61eSachartre done: 5281047ba61eSachartre if (status != 0) { 5282047ba61eSachartre /* 5283047ba61eSachartre * If the error is retryable print an error message only 5284047ba61eSachartre * during the first try. 5285047ba61eSachartre */ 5286047ba61eSachartre if (status == ENXIO || status == ENODEV || 5287047ba61eSachartre status == ENOENT || status == EROFS) { 5288047ba61eSachartre if (!(vd->initialized & VD_SETUP_ERROR)) { 5289047ba61eSachartre PRN("%s is currently inaccessible (error %d)", 5290047ba61eSachartre path, status); 5291047ba61eSachartre } 5292047ba61eSachartre status = EAGAIN; 5293047ba61eSachartre } else { 5294047ba61eSachartre PRN("%s can not be exported as a virtual disk " 5295047ba61eSachartre "(error %d)", path, status); 5296047ba61eSachartre } 5297047ba61eSachartre vd->initialized |= VD_SETUP_ERROR; 5298047ba61eSachartre 5299047ba61eSachartre } else if (vd->initialized & VD_SETUP_ERROR) { 5300047ba61eSachartre /* print a message only if we previously had an error */ 5301047ba61eSachartre PRN("%s is now online", path); 5302047ba61eSachartre vd->initialized &= ~VD_SETUP_ERROR; 5303047ba61eSachartre } 5304047ba61eSachartre 5305047ba61eSachartre return (status); 5306047ba61eSachartre } 5307047ba61eSachartre 5308047ba61eSachartre static int 5309047ba61eSachartre vds_do_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t options, 5310047ba61eSachartre uint64_t ldc_id, vd_t **vdp) 53111ae08745Sheppo { 53121ae08745Sheppo char tq_name[TASKQ_NAMELEN]; 53130a55fbb7Slm66018 int status; 53141ae08745Sheppo ddi_iblock_cookie_t iblock = NULL; 53151ae08745Sheppo ldc_attr_t ldc_attr; 53161ae08745Sheppo vd_t *vd; 53171ae08745Sheppo 53181ae08745Sheppo 53191ae08745Sheppo ASSERT(vds != NULL); 5320e1ebb9ecSlm66018 ASSERT(device_path != NULL); 53211ae08745Sheppo ASSERT(vdp != NULL); 5322e1ebb9ecSlm66018 PR0("Adding vdisk for %s", device_path); 53231ae08745Sheppo 53241ae08745Sheppo if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 53251ae08745Sheppo PRN("No memory for virtual disk"); 53261ae08745Sheppo return (EAGAIN); 53271ae08745Sheppo } 53281ae08745Sheppo *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 53291ae08745Sheppo vd->vds = vds; 53303c96341aSnarayan (void) strncpy(vd->device_path, device_path, MAXPATHLEN); 53311ae08745Sheppo 5332047ba61eSachartre /* Setup open flags */ 5333047ba61eSachartre vd->open_flags = FREAD; 5334047ba61eSachartre 5335047ba61eSachartre if (!(options & VD_OPT_RDONLY)) 5336047ba61eSachartre vd->open_flags |= FWRITE; 5337047ba61eSachartre 5338047ba61eSachartre if (options & VD_OPT_EXCLUSIVE) 5339047ba61eSachartre vd->open_flags |= FEXCL; 5340047ba61eSachartre 5341047ba61eSachartre /* Setup disk type */ 5342047ba61eSachartre if (options & VD_OPT_SLICE) { 5343047ba61eSachartre vd->vdisk_type = VD_DISK_TYPE_SLICE; 5344047ba61eSachartre vd->nslices = 1; 5345047ba61eSachartre } else { 5346047ba61eSachartre vd->vdisk_type = VD_DISK_TYPE_DISK; 5347047ba61eSachartre vd->nslices = V_NUMPAR; 5348047ba61eSachartre } 5349047ba61eSachartre 5350047ba61eSachartre /* default disk label */ 5351047ba61eSachartre vd->vdisk_label = VD_DISK_LABEL_UNK; 5352047ba61eSachartre 53530a55fbb7Slm66018 /* Open vdisk and initialize parameters */ 53543c96341aSnarayan if ((status = vd_setup_vd(vd)) == 0) { 53553c96341aSnarayan vd->initialized |= VD_DISK_READY; 53561ae08745Sheppo 53573c96341aSnarayan ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 53583c96341aSnarayan PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u", 53593c96341aSnarayan ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 53603c96341aSnarayan (vd->pseudo ? "yes" : "no"), (vd->file ? "yes" : "no"), 53613c96341aSnarayan vd->nslices); 53623c96341aSnarayan } else { 53633c96341aSnarayan if (status != EAGAIN) 53643c96341aSnarayan return (status); 53653c96341aSnarayan } 53661ae08745Sheppo 53671ae08745Sheppo /* Initialize locking */ 53681ae08745Sheppo if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 53691ae08745Sheppo &iblock) != DDI_SUCCESS) { 53701ae08745Sheppo PRN("Could not get iblock cookie."); 53711ae08745Sheppo return (EIO); 53721ae08745Sheppo } 53731ae08745Sheppo 53741ae08745Sheppo mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 53751ae08745Sheppo vd->initialized |= VD_LOCKING; 53761ae08745Sheppo 53771ae08745Sheppo 5378d10e4ef2Snarayan /* Create start and completion task queues for the vdisk */ 5379d10e4ef2Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id); 53801ae08745Sheppo PR1("tq_name = %s", tq_name); 5381d10e4ef2Snarayan if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1, 53821ae08745Sheppo TASKQ_DEFAULTPRI, 0)) == NULL) { 53831ae08745Sheppo PRN("Could not create task queue"); 53841ae08745Sheppo return (EIO); 53851ae08745Sheppo } 5386d10e4ef2Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id); 5387d10e4ef2Snarayan PR1("tq_name = %s", tq_name); 5388d10e4ef2Snarayan if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1, 5389d10e4ef2Snarayan TASKQ_DEFAULTPRI, 0)) == NULL) { 5390d10e4ef2Snarayan PRN("Could not create task queue"); 5391d10e4ef2Snarayan return (EIO); 5392d10e4ef2Snarayan } 5393d10e4ef2Snarayan vd->enabled = 1; /* before callback can dispatch to startq */ 53941ae08745Sheppo 53951ae08745Sheppo 53961ae08745Sheppo /* Bring up LDC */ 53971ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK_SVC; 53981ae08745Sheppo ldc_attr.instance = ddi_get_instance(vds->dip); 53991ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; 5400e1ebb9ecSlm66018 ldc_attr.mtu = VD_LDC_MTU; 54011ae08745Sheppo if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 540217cadca8Slm66018 PRN("Could not initialize LDC channel %lx, " 5403690555a1Sachartre "init failed with error %d", ldc_id, status); 54041ae08745Sheppo return (status); 54051ae08745Sheppo } 54061ae08745Sheppo vd->initialized |= VD_LDC; 54071ae08745Sheppo 54081ae08745Sheppo if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 54091ae08745Sheppo (caddr_t)vd)) != 0) { 5410690555a1Sachartre PRN("Could not initialize LDC channel %lu," 5411690555a1Sachartre "reg_callback failed with error %d", ldc_id, status); 54121ae08745Sheppo return (status); 54131ae08745Sheppo } 54141ae08745Sheppo 54151ae08745Sheppo if ((status = ldc_open(vd->ldc_handle)) != 0) { 5416690555a1Sachartre PRN("Could not initialize LDC channel %lu," 5417690555a1Sachartre "open failed with error %d", ldc_id, status); 54181ae08745Sheppo return (status); 54191ae08745Sheppo } 54201ae08745Sheppo 54213af08d82Slm66018 if ((status = ldc_up(vd->ldc_handle)) != 0) { 542234683adeSsg70180 PR0("ldc_up() returned errno %d", status); 54233af08d82Slm66018 } 54243af08d82Slm66018 54254bac2208Snarayan /* Allocate the inband task memory handle */ 54264bac2208Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, &(vd->inband_task.mhdl)); 54274bac2208Snarayan if (status) { 5428690555a1Sachartre PRN("Could not initialize LDC channel %lu," 5429690555a1Sachartre "alloc_handle failed with error %d", ldc_id, status); 54304bac2208Snarayan return (ENXIO); 54314bac2208Snarayan } 54321ae08745Sheppo 54331ae08745Sheppo /* Add the successfully-initialized vdisk to the server's table */ 54341ae08745Sheppo if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 54351ae08745Sheppo PRN("Error adding vdisk ID %lu to table", id); 54361ae08745Sheppo return (EIO); 54371ae08745Sheppo } 54381ae08745Sheppo 54393af08d82Slm66018 /* Allocate the staging buffer */ 54403af08d82Slm66018 vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 54413af08d82Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 54423af08d82Slm66018 54433af08d82Slm66018 /* store initial state */ 54443af08d82Slm66018 vd->state = VD_STATE_INIT; 54453af08d82Slm66018 54461ae08745Sheppo return (0); 54471ae08745Sheppo } 54481ae08745Sheppo 54493af08d82Slm66018 static void 54503af08d82Slm66018 vd_free_dring_task(vd_t *vdp) 54513af08d82Slm66018 { 54523af08d82Slm66018 if (vdp->dring_task != NULL) { 54533af08d82Slm66018 ASSERT(vdp->dring_len != 0); 54543af08d82Slm66018 /* Free all dring_task memory handles */ 54553af08d82Slm66018 for (int i = 0; i < vdp->dring_len; i++) { 54563af08d82Slm66018 (void) ldc_mem_free_handle(vdp->dring_task[i].mhdl); 54573af08d82Slm66018 kmem_free(vdp->dring_task[i].msg, vdp->max_msglen); 54583af08d82Slm66018 vdp->dring_task[i].msg = NULL; 54593af08d82Slm66018 } 54603af08d82Slm66018 kmem_free(vdp->dring_task, 54613af08d82Slm66018 (sizeof (*vdp->dring_task)) * vdp->dring_len); 54623af08d82Slm66018 vdp->dring_task = NULL; 54633af08d82Slm66018 } 54643af08d82Slm66018 } 54653af08d82Slm66018 54661ae08745Sheppo /* 54671ae08745Sheppo * Destroy the state associated with a virtual disk 54681ae08745Sheppo */ 54691ae08745Sheppo static void 54701ae08745Sheppo vds_destroy_vd(void *arg) 54711ae08745Sheppo { 54721ae08745Sheppo vd_t *vd = (vd_t *)arg; 547334683adeSsg70180 int retry = 0, rv; 54741ae08745Sheppo 54751ae08745Sheppo if (vd == NULL) 54761ae08745Sheppo return; 54771ae08745Sheppo 5478d10e4ef2Snarayan PR0("Destroying vdisk state"); 5479d10e4ef2Snarayan 54801ae08745Sheppo /* Disable queuing requests for the vdisk */ 54811ae08745Sheppo if (vd->initialized & VD_LOCKING) { 54821ae08745Sheppo mutex_enter(&vd->lock); 54831ae08745Sheppo vd->enabled = 0; 54841ae08745Sheppo mutex_exit(&vd->lock); 54851ae08745Sheppo } 54861ae08745Sheppo 5487d10e4ef2Snarayan /* Drain and destroy start queue (*before* destroying completionq) */ 5488d10e4ef2Snarayan if (vd->startq != NULL) 5489d10e4ef2Snarayan ddi_taskq_destroy(vd->startq); /* waits for queued tasks */ 5490d10e4ef2Snarayan 5491d10e4ef2Snarayan /* Drain and destroy completion queue (*before* shutting down LDC) */ 5492d10e4ef2Snarayan if (vd->completionq != NULL) 5493d10e4ef2Snarayan ddi_taskq_destroy(vd->completionq); /* waits for tasks */ 5494d10e4ef2Snarayan 54953af08d82Slm66018 vd_free_dring_task(vd); 54963af08d82Slm66018 549734683adeSsg70180 /* Free the inband task memory handle */ 549834683adeSsg70180 (void) ldc_mem_free_handle(vd->inband_task.mhdl); 549934683adeSsg70180 550034683adeSsg70180 /* Shut down LDC */ 550134683adeSsg70180 if (vd->initialized & VD_LDC) { 550234683adeSsg70180 /* unmap the dring */ 550334683adeSsg70180 if (vd->initialized & VD_DRING) 550434683adeSsg70180 (void) ldc_mem_dring_unmap(vd->dring_handle); 550534683adeSsg70180 550634683adeSsg70180 /* close LDC channel - retry on EAGAIN */ 550734683adeSsg70180 while ((rv = ldc_close(vd->ldc_handle)) == EAGAIN) { 550834683adeSsg70180 if (++retry > vds_ldc_retries) { 550934683adeSsg70180 PR0("Timed out closing channel"); 551034683adeSsg70180 break; 551134683adeSsg70180 } 551234683adeSsg70180 drv_usecwait(vds_ldc_delay); 551334683adeSsg70180 } 551434683adeSsg70180 if (rv == 0) { 551534683adeSsg70180 (void) ldc_unreg_callback(vd->ldc_handle); 551634683adeSsg70180 (void) ldc_fini(vd->ldc_handle); 551734683adeSsg70180 } else { 551834683adeSsg70180 /* 551934683adeSsg70180 * Closing the LDC channel has failed. Ideally we should 552034683adeSsg70180 * fail here but there is no Zeus level infrastructure 552134683adeSsg70180 * to handle this. The MD has already been changed and 552234683adeSsg70180 * we have to do the close. So we try to do as much 552334683adeSsg70180 * clean up as we can. 552434683adeSsg70180 */ 552534683adeSsg70180 (void) ldc_set_cb_mode(vd->ldc_handle, LDC_CB_DISABLE); 552634683adeSsg70180 while (ldc_unreg_callback(vd->ldc_handle) == EAGAIN) 552734683adeSsg70180 drv_usecwait(vds_ldc_delay); 552834683adeSsg70180 } 552934683adeSsg70180 } 553034683adeSsg70180 55313af08d82Slm66018 /* Free the staging buffer for msgs */ 55323af08d82Slm66018 if (vd->vio_msgp != NULL) { 55333af08d82Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 55343af08d82Slm66018 vd->vio_msgp = NULL; 55353af08d82Slm66018 } 55363af08d82Slm66018 55373af08d82Slm66018 /* Free the inband message buffer */ 55383af08d82Slm66018 if (vd->inband_task.msg != NULL) { 55393af08d82Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 55403af08d82Slm66018 vd->inband_task.msg = NULL; 5541d10e4ef2Snarayan } 5542da6c28aaSamw 55433c96341aSnarayan if (vd->file) { 5544690555a1Sachartre /* Close file */ 5545047ba61eSachartre (void) VOP_CLOSE(vd->file_vnode, vd->open_flags, 1, 5546da6c28aaSamw 0, kcred, NULL); 55473c96341aSnarayan VN_RELE(vd->file_vnode); 554887a7269eSachartre if (vd->file_devid != NULL) 554987a7269eSachartre ddi_devid_free(vd->file_devid); 55503c96341aSnarayan } else { 55511ae08745Sheppo /* Close any open backing-device slices */ 55521ae08745Sheppo for (uint_t slice = 0; slice < vd->nslices; slice++) { 55531ae08745Sheppo if (vd->ldi_handle[slice] != NULL) { 55541ae08745Sheppo PR0("Closing slice %u", slice); 55551ae08745Sheppo (void) ldi_close(vd->ldi_handle[slice], 5556047ba61eSachartre vd->open_flags, kcred); 55571ae08745Sheppo } 55581ae08745Sheppo } 55593c96341aSnarayan } 55601ae08745Sheppo 55611ae08745Sheppo /* Free lock */ 55621ae08745Sheppo if (vd->initialized & VD_LOCKING) 55631ae08745Sheppo mutex_destroy(&vd->lock); 55641ae08745Sheppo 55651ae08745Sheppo /* Finally, free the vdisk structure itself */ 55661ae08745Sheppo kmem_free(vd, sizeof (*vd)); 55671ae08745Sheppo } 55681ae08745Sheppo 55691ae08745Sheppo static int 5570047ba61eSachartre vds_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t options, 5571047ba61eSachartre uint64_t ldc_id) 55721ae08745Sheppo { 55731ae08745Sheppo int status; 55741ae08745Sheppo vd_t *vd = NULL; 55751ae08745Sheppo 55761ae08745Sheppo 5577047ba61eSachartre if ((status = vds_do_init_vd(vds, id, device_path, options, 5578047ba61eSachartre ldc_id, &vd)) != 0) 55791ae08745Sheppo vds_destroy_vd(vd); 55801ae08745Sheppo 55811ae08745Sheppo return (status); 55821ae08745Sheppo } 55831ae08745Sheppo 55841ae08745Sheppo static int 55851ae08745Sheppo vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 55861ae08745Sheppo uint64_t *ldc_id) 55871ae08745Sheppo { 55881ae08745Sheppo int num_channels; 55891ae08745Sheppo 55901ae08745Sheppo 55911ae08745Sheppo /* Look for channel endpoint child(ren) of the vdisk MD node */ 55921ae08745Sheppo if ((num_channels = md_scan_dag(md, vd_node, 55931ae08745Sheppo md_find_name(md, VD_CHANNEL_ENDPOINT), 55941ae08745Sheppo md_find_name(md, "fwd"), channel)) <= 0) { 55951ae08745Sheppo PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 55961ae08745Sheppo return (-1); 55971ae08745Sheppo } 55981ae08745Sheppo 55991ae08745Sheppo /* Get the "id" value for the first channel endpoint node */ 56001ae08745Sheppo if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 56011ae08745Sheppo PRN("No \"%s\" property found for \"%s\" of vdisk", 56021ae08745Sheppo VD_ID_PROP, VD_CHANNEL_ENDPOINT); 56031ae08745Sheppo return (-1); 56041ae08745Sheppo } 56051ae08745Sheppo 56061ae08745Sheppo if (num_channels > 1) { 56071ae08745Sheppo PRN("Using ID of first of multiple channels for this vdisk"); 56081ae08745Sheppo } 56091ae08745Sheppo 56101ae08745Sheppo return (0); 56111ae08745Sheppo } 56121ae08745Sheppo 56131ae08745Sheppo static int 56141ae08745Sheppo vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 56151ae08745Sheppo { 56161ae08745Sheppo int num_nodes, status; 56171ae08745Sheppo size_t size; 56181ae08745Sheppo mde_cookie_t *channel; 56191ae08745Sheppo 56201ae08745Sheppo 56211ae08745Sheppo if ((num_nodes = md_node_count(md)) <= 0) { 56221ae08745Sheppo PRN("Invalid node count in Machine Description subtree"); 56231ae08745Sheppo return (-1); 56241ae08745Sheppo } 56251ae08745Sheppo size = num_nodes*(sizeof (*channel)); 56261ae08745Sheppo channel = kmem_zalloc(size, KM_SLEEP); 56271ae08745Sheppo status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 56281ae08745Sheppo kmem_free(channel, size); 56291ae08745Sheppo 56301ae08745Sheppo return (status); 56311ae08745Sheppo } 56321ae08745Sheppo 5633047ba61eSachartre /* 5634047ba61eSachartre * Function: 5635047ba61eSachartre * vds_get_options 5636047ba61eSachartre * 5637047ba61eSachartre * Description: 5638047ba61eSachartre * Parse the options of a vds node. Options are defined as an array 5639047ba61eSachartre * of strings in the vds-block-device-opts property of the vds node 5640047ba61eSachartre * in the machine description. Options are returned as a bitmask. The 5641047ba61eSachartre * mapping between the bitmask options and the options strings from the 5642047ba61eSachartre * machine description is defined in the vd_bdev_options[] array. 5643047ba61eSachartre * 5644047ba61eSachartre * The vds-block-device-opts property is optional. If a vds has no such 5645047ba61eSachartre * property then no option is defined. 5646047ba61eSachartre * 5647047ba61eSachartre * Parameters: 5648047ba61eSachartre * md - machine description. 5649047ba61eSachartre * vd_node - vds node in the machine description for which 5650047ba61eSachartre * options have to be parsed. 5651047ba61eSachartre * options - the returned options. 5652047ba61eSachartre * 5653047ba61eSachartre * Return Code: 5654047ba61eSachartre * none. 5655047ba61eSachartre */ 5656047ba61eSachartre static void 5657047ba61eSachartre vds_get_options(md_t *md, mde_cookie_t vd_node, uint64_t *options) 5658047ba61eSachartre { 5659047ba61eSachartre char *optstr, *opt; 5660047ba61eSachartre int len, n, i; 5661047ba61eSachartre 5662047ba61eSachartre *options = 0; 5663047ba61eSachartre 5664047ba61eSachartre if (md_get_prop_data(md, vd_node, VD_BLOCK_DEVICE_OPTS, 5665047ba61eSachartre (uint8_t **)&optstr, &len) != 0) { 5666047ba61eSachartre PR0("No options found"); 5667047ba61eSachartre return; 5668047ba61eSachartre } 5669047ba61eSachartre 5670047ba61eSachartre /* parse options */ 5671047ba61eSachartre opt = optstr; 5672047ba61eSachartre n = sizeof (vd_bdev_options) / sizeof (vd_option_t); 5673047ba61eSachartre 5674047ba61eSachartre while (opt < optstr + len) { 5675047ba61eSachartre for (i = 0; i < n; i++) { 5676047ba61eSachartre if (strncmp(vd_bdev_options[i].vdo_name, 5677047ba61eSachartre opt, VD_OPTION_NLEN) == 0) { 5678047ba61eSachartre *options |= vd_bdev_options[i].vdo_value; 5679047ba61eSachartre break; 5680047ba61eSachartre } 5681047ba61eSachartre } 5682047ba61eSachartre 5683047ba61eSachartre if (i < n) { 5684047ba61eSachartre PR0("option: %s", opt); 5685047ba61eSachartre } else { 5686047ba61eSachartre PRN("option %s is unknown or unsupported", opt); 5687047ba61eSachartre } 5688047ba61eSachartre 5689047ba61eSachartre opt += strlen(opt) + 1; 5690047ba61eSachartre } 5691047ba61eSachartre } 5692047ba61eSachartre 56931ae08745Sheppo static void 56941ae08745Sheppo vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 56951ae08745Sheppo { 5696e1ebb9ecSlm66018 char *device_path = NULL; 5697047ba61eSachartre uint64_t id = 0, ldc_id = 0, options = 0; 56981ae08745Sheppo 56991ae08745Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 57001ae08745Sheppo PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 57011ae08745Sheppo return; 57021ae08745Sheppo } 57031ae08745Sheppo PR0("Adding vdisk ID %lu", id); 57041ae08745Sheppo if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 5705e1ebb9ecSlm66018 &device_path) != 0) { 57061ae08745Sheppo PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 57071ae08745Sheppo return; 57081ae08745Sheppo } 57091ae08745Sheppo 5710047ba61eSachartre vds_get_options(md, vd_node, &options); 5711047ba61eSachartre 57121ae08745Sheppo if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 57131ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", id); 57141ae08745Sheppo return; 57151ae08745Sheppo } 57161ae08745Sheppo 5717047ba61eSachartre if (vds_init_vd(vds, id, device_path, options, ldc_id) != 0) { 57181ae08745Sheppo PRN("Failed to add vdisk ID %lu", id); 571917cadca8Slm66018 if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 572017cadca8Slm66018 PRN("No vDisk entry found for vdisk ID %lu", id); 57211ae08745Sheppo return; 57221ae08745Sheppo } 57231ae08745Sheppo } 57241ae08745Sheppo 57251ae08745Sheppo static void 57261ae08745Sheppo vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 57271ae08745Sheppo { 57281ae08745Sheppo uint64_t id = 0; 57291ae08745Sheppo 57301ae08745Sheppo 57311ae08745Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 57321ae08745Sheppo PRN("Unable to get \"%s\" property from vdisk's MD node", 57331ae08745Sheppo VD_ID_PROP); 57341ae08745Sheppo return; 57351ae08745Sheppo } 57361ae08745Sheppo PR0("Removing vdisk ID %lu", id); 57371ae08745Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 57381ae08745Sheppo PRN("No vdisk entry found for vdisk ID %lu", id); 57391ae08745Sheppo } 57401ae08745Sheppo 57411ae08745Sheppo static void 57421ae08745Sheppo vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 57431ae08745Sheppo md_t *curr_md, mde_cookie_t curr_vd_node) 57441ae08745Sheppo { 57451ae08745Sheppo char *curr_dev, *prev_dev; 5746047ba61eSachartre uint64_t curr_id = 0, curr_ldc_id = 0, curr_options = 0; 5747047ba61eSachartre uint64_t prev_id = 0, prev_ldc_id = 0, prev_options = 0; 57481ae08745Sheppo size_t len; 57491ae08745Sheppo 57501ae08745Sheppo 57511ae08745Sheppo /* Validate that vdisk ID has not changed */ 57521ae08745Sheppo if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 57531ae08745Sheppo PRN("Error getting previous vdisk \"%s\" property", 57541ae08745Sheppo VD_ID_PROP); 57551ae08745Sheppo return; 57561ae08745Sheppo } 57571ae08745Sheppo if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 57581ae08745Sheppo PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 57591ae08745Sheppo return; 57601ae08745Sheppo } 57611ae08745Sheppo if (curr_id != prev_id) { 57621ae08745Sheppo PRN("Not changing vdisk: ID changed from %lu to %lu", 57631ae08745Sheppo prev_id, curr_id); 57641ae08745Sheppo return; 57651ae08745Sheppo } 57661ae08745Sheppo 57671ae08745Sheppo /* Validate that LDC ID has not changed */ 57681ae08745Sheppo if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 57691ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", prev_id); 57701ae08745Sheppo return; 57711ae08745Sheppo } 57721ae08745Sheppo 57731ae08745Sheppo if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 57741ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", curr_id); 57751ae08745Sheppo return; 57761ae08745Sheppo } 57771ae08745Sheppo if (curr_ldc_id != prev_ldc_id) { 57780a55fbb7Slm66018 _NOTE(NOTREACHED); /* lint is confused */ 57791ae08745Sheppo PRN("Not changing vdisk: " 57801ae08745Sheppo "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 57811ae08745Sheppo return; 57821ae08745Sheppo } 57831ae08745Sheppo 57841ae08745Sheppo /* Determine whether device path has changed */ 57851ae08745Sheppo if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 57861ae08745Sheppo &prev_dev) != 0) { 57871ae08745Sheppo PRN("Error getting previous vdisk \"%s\"", 57881ae08745Sheppo VD_BLOCK_DEVICE_PROP); 57891ae08745Sheppo return; 57901ae08745Sheppo } 57911ae08745Sheppo if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 57921ae08745Sheppo &curr_dev) != 0) { 57931ae08745Sheppo PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 57941ae08745Sheppo return; 57951ae08745Sheppo } 57961ae08745Sheppo if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 57971ae08745Sheppo (strncmp(curr_dev, prev_dev, len) == 0)) 57981ae08745Sheppo return; /* no relevant (supported) change */ 57991ae08745Sheppo 5800047ba61eSachartre /* Validate that options have not changed */ 5801047ba61eSachartre vds_get_options(prev_md, prev_vd_node, &prev_options); 5802047ba61eSachartre vds_get_options(curr_md, curr_vd_node, &curr_options); 5803047ba61eSachartre if (prev_options != curr_options) { 5804047ba61eSachartre PRN("Not changing vdisk: options changed from %lx to %lx", 5805047ba61eSachartre prev_options, curr_options); 5806047ba61eSachartre return; 5807047ba61eSachartre } 5808047ba61eSachartre 58091ae08745Sheppo PR0("Changing vdisk ID %lu", prev_id); 58103af08d82Slm66018 58111ae08745Sheppo /* Remove old state, which will close vdisk and reset */ 58121ae08745Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 58131ae08745Sheppo PRN("No entry found for vdisk ID %lu", prev_id); 58143af08d82Slm66018 58151ae08745Sheppo /* Re-initialize vdisk with new state */ 5816047ba61eSachartre if (vds_init_vd(vds, curr_id, curr_dev, curr_options, 5817047ba61eSachartre curr_ldc_id) != 0) { 58181ae08745Sheppo PRN("Failed to change vdisk ID %lu", curr_id); 58191ae08745Sheppo return; 58201ae08745Sheppo } 58211ae08745Sheppo } 58221ae08745Sheppo 58231ae08745Sheppo static int 58241ae08745Sheppo vds_process_md(void *arg, mdeg_result_t *md) 58251ae08745Sheppo { 58261ae08745Sheppo int i; 58271ae08745Sheppo vds_t *vds = arg; 58281ae08745Sheppo 58291ae08745Sheppo 58301ae08745Sheppo if (md == NULL) 58311ae08745Sheppo return (MDEG_FAILURE); 58321ae08745Sheppo ASSERT(vds != NULL); 58331ae08745Sheppo 58341ae08745Sheppo for (i = 0; i < md->removed.nelem; i++) 58351ae08745Sheppo vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 58361ae08745Sheppo for (i = 0; i < md->match_curr.nelem; i++) 58371ae08745Sheppo vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 58381ae08745Sheppo md->match_curr.mdp, md->match_curr.mdep[i]); 58391ae08745Sheppo for (i = 0; i < md->added.nelem; i++) 58401ae08745Sheppo vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 58411ae08745Sheppo 58421ae08745Sheppo return (MDEG_SUCCESS); 58431ae08745Sheppo } 58441ae08745Sheppo 58453c96341aSnarayan 58461ae08745Sheppo static int 58471ae08745Sheppo vds_do_attach(dev_info_t *dip) 58481ae08745Sheppo { 5849445b4c2eSsb155480 int status, sz; 5850445b4c2eSsb155480 int cfg_handle; 58511ae08745Sheppo minor_t instance = ddi_get_instance(dip); 58521ae08745Sheppo vds_t *vds; 5853445b4c2eSsb155480 mdeg_prop_spec_t *pspecp; 5854445b4c2eSsb155480 mdeg_node_spec_t *ispecp; 58551ae08745Sheppo 58561ae08745Sheppo /* 58571ae08745Sheppo * The "cfg-handle" property of a vds node in an MD contains the MD's 58581ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 58591ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 58601ae08745Sheppo * the "reg" property on the node in the device tree it builds from 58611ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 58621ae08745Sheppo * "reg" property value to uniquely identify this device instance when 58631ae08745Sheppo * registering with the MD event-generation framework. If the "reg" 58641ae08745Sheppo * property cannot be found, the device tree state is presumably so 58651ae08745Sheppo * broken that there is no point in continuing. 58661ae08745Sheppo */ 5867445b4c2eSsb155480 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 5868445b4c2eSsb155480 VD_REG_PROP)) { 5869445b4c2eSsb155480 PRN("vds \"%s\" property does not exist", VD_REG_PROP); 58701ae08745Sheppo return (DDI_FAILURE); 58711ae08745Sheppo } 58721ae08745Sheppo 58731ae08745Sheppo /* Get the MD instance for later MDEG registration */ 58741ae08745Sheppo cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 5875445b4c2eSsb155480 VD_REG_PROP, -1); 58761ae08745Sheppo 58771ae08745Sheppo if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 58781ae08745Sheppo PRN("Could not allocate state for instance %u", instance); 58791ae08745Sheppo return (DDI_FAILURE); 58801ae08745Sheppo } 58811ae08745Sheppo 58821ae08745Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 58831ae08745Sheppo PRN("Could not get state for instance %u", instance); 58841ae08745Sheppo ddi_soft_state_free(vds_state, instance); 58851ae08745Sheppo return (DDI_FAILURE); 58861ae08745Sheppo } 58871ae08745Sheppo 58881ae08745Sheppo vds->dip = dip; 58891ae08745Sheppo vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 589087a7269eSachartre vds_destroy_vd, sizeof (void *)); 589187a7269eSachartre 58921ae08745Sheppo ASSERT(vds->vd_table != NULL); 58931ae08745Sheppo 58941ae08745Sheppo if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 58951ae08745Sheppo PRN("ldi_ident_from_dip() returned errno %d", status); 58961ae08745Sheppo return (DDI_FAILURE); 58971ae08745Sheppo } 58981ae08745Sheppo vds->initialized |= VDS_LDI; 58991ae08745Sheppo 59001ae08745Sheppo /* Register for MD updates */ 5901445b4c2eSsb155480 sz = sizeof (vds_prop_template); 5902445b4c2eSsb155480 pspecp = kmem_alloc(sz, KM_SLEEP); 5903445b4c2eSsb155480 bcopy(vds_prop_template, pspecp, sz); 5904445b4c2eSsb155480 5905445b4c2eSsb155480 VDS_SET_MDEG_PROP_INST(pspecp, cfg_handle); 5906445b4c2eSsb155480 5907445b4c2eSsb155480 /* initialize the complete prop spec structure */ 5908445b4c2eSsb155480 ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 5909445b4c2eSsb155480 ispecp->namep = "virtual-device"; 5910445b4c2eSsb155480 ispecp->specp = pspecp; 5911445b4c2eSsb155480 5912445b4c2eSsb155480 if (mdeg_register(ispecp, &vd_match, vds_process_md, vds, 59131ae08745Sheppo &vds->mdeg) != MDEG_SUCCESS) { 59141ae08745Sheppo PRN("Unable to register for MD updates"); 5915445b4c2eSsb155480 kmem_free(ispecp, sizeof (mdeg_node_spec_t)); 5916445b4c2eSsb155480 kmem_free(pspecp, sz); 59171ae08745Sheppo return (DDI_FAILURE); 59181ae08745Sheppo } 5919445b4c2eSsb155480 5920445b4c2eSsb155480 vds->ispecp = ispecp; 59211ae08745Sheppo vds->initialized |= VDS_MDEG; 59221ae08745Sheppo 59230a55fbb7Slm66018 /* Prevent auto-detaching so driver is available whenever MD changes */ 59240a55fbb7Slm66018 if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 59250a55fbb7Slm66018 DDI_PROP_SUCCESS) { 59260a55fbb7Slm66018 PRN("failed to set \"%s\" property for instance %u", 59270a55fbb7Slm66018 DDI_NO_AUTODETACH, instance); 59280a55fbb7Slm66018 } 59290a55fbb7Slm66018 59301ae08745Sheppo ddi_report_dev(dip); 59311ae08745Sheppo return (DDI_SUCCESS); 59321ae08745Sheppo } 59331ae08745Sheppo 59341ae08745Sheppo static int 59351ae08745Sheppo vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 59361ae08745Sheppo { 59371ae08745Sheppo int status; 59381ae08745Sheppo 59391ae08745Sheppo switch (cmd) { 59401ae08745Sheppo case DDI_ATTACH: 5941d10e4ef2Snarayan PR0("Attaching"); 59421ae08745Sheppo if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 59431ae08745Sheppo (void) vds_detach(dip, DDI_DETACH); 59441ae08745Sheppo return (status); 59451ae08745Sheppo case DDI_RESUME: 5946d10e4ef2Snarayan PR0("No action required for DDI_RESUME"); 59471ae08745Sheppo return (DDI_SUCCESS); 59481ae08745Sheppo default: 59491ae08745Sheppo return (DDI_FAILURE); 59501ae08745Sheppo } 59511ae08745Sheppo } 59521ae08745Sheppo 59531ae08745Sheppo static struct dev_ops vds_ops = { 59541ae08745Sheppo DEVO_REV, /* devo_rev */ 59551ae08745Sheppo 0, /* devo_refcnt */ 59561ae08745Sheppo ddi_no_info, /* devo_getinfo */ 59571ae08745Sheppo nulldev, /* devo_identify */ 59581ae08745Sheppo nulldev, /* devo_probe */ 59591ae08745Sheppo vds_attach, /* devo_attach */ 59601ae08745Sheppo vds_detach, /* devo_detach */ 59611ae08745Sheppo nodev, /* devo_reset */ 59621ae08745Sheppo NULL, /* devo_cb_ops */ 59631ae08745Sheppo NULL, /* devo_bus_ops */ 59641ae08745Sheppo nulldev /* devo_power */ 59651ae08745Sheppo }; 59661ae08745Sheppo 59671ae08745Sheppo static struct modldrv modldrv = { 59681ae08745Sheppo &mod_driverops, 5969205eeb1aSlm66018 "virtual disk server", 59701ae08745Sheppo &vds_ops, 59711ae08745Sheppo }; 59721ae08745Sheppo 59731ae08745Sheppo static struct modlinkage modlinkage = { 59741ae08745Sheppo MODREV_1, 59751ae08745Sheppo &modldrv, 59761ae08745Sheppo NULL 59771ae08745Sheppo }; 59781ae08745Sheppo 59791ae08745Sheppo 59801ae08745Sheppo int 59811ae08745Sheppo _init(void) 59821ae08745Sheppo { 598317cadca8Slm66018 int status; 5984d10e4ef2Snarayan 59851ae08745Sheppo if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 59861ae08745Sheppo return (status); 598717cadca8Slm66018 59881ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) { 59891ae08745Sheppo ddi_soft_state_fini(&vds_state); 59901ae08745Sheppo return (status); 59911ae08745Sheppo } 59921ae08745Sheppo 59931ae08745Sheppo return (0); 59941ae08745Sheppo } 59951ae08745Sheppo 59961ae08745Sheppo int 59971ae08745Sheppo _info(struct modinfo *modinfop) 59981ae08745Sheppo { 59991ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 60001ae08745Sheppo } 60011ae08745Sheppo 60021ae08745Sheppo int 60031ae08745Sheppo _fini(void) 60041ae08745Sheppo { 60051ae08745Sheppo int status; 60061ae08745Sheppo 60071ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 60081ae08745Sheppo return (status); 60091ae08745Sheppo ddi_soft_state_fini(&vds_state); 60101ae08745Sheppo return (0); 60111ae08745Sheppo } 6012