11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23*3c96341aSnarayan * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo #pragma ident "%Z%%M% %I% %E% SMI" 281ae08745Sheppo 291ae08745Sheppo /* 301ae08745Sheppo * Virtual disk server 311ae08745Sheppo */ 321ae08745Sheppo 331ae08745Sheppo 341ae08745Sheppo #include <sys/types.h> 351ae08745Sheppo #include <sys/conf.h> 364bac2208Snarayan #include <sys/crc32.h> 371ae08745Sheppo #include <sys/ddi.h> 381ae08745Sheppo #include <sys/dkio.h> 391ae08745Sheppo #include <sys/file.h> 401ae08745Sheppo #include <sys/mdeg.h> 411ae08745Sheppo #include <sys/modhash.h> 421ae08745Sheppo #include <sys/note.h> 431ae08745Sheppo #include <sys/pathname.h> 441ae08745Sheppo #include <sys/sunddi.h> 451ae08745Sheppo #include <sys/sunldi.h> 461ae08745Sheppo #include <sys/sysmacros.h> 471ae08745Sheppo #include <sys/vio_common.h> 481ae08745Sheppo #include <sys/vdsk_mailbox.h> 491ae08745Sheppo #include <sys/vdsk_common.h> 501ae08745Sheppo #include <sys/vtoc.h> 51*3c96341aSnarayan #include <sys/vfs.h> 52*3c96341aSnarayan #include <sys/stat.h> 531ae08745Sheppo 541ae08745Sheppo /* Virtual disk server initialization flags */ 55d10e4ef2Snarayan #define VDS_LDI 0x01 56d10e4ef2Snarayan #define VDS_MDEG 0x02 571ae08745Sheppo 581ae08745Sheppo /* Virtual disk server tunable parameters */ 59*3c96341aSnarayan #define VDS_RETRIES 5 60*3c96341aSnarayan #define VDS_LDC_DELAY 1000 /* 1 msecs */ 61*3c96341aSnarayan #define VDS_DEV_DELAY 10000000 /* 10 secs */ 621ae08745Sheppo #define VDS_NCHAINS 32 631ae08745Sheppo 641ae08745Sheppo /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */ 651ae08745Sheppo #define VDS_NAME "virtual-disk-server" 661ae08745Sheppo 671ae08745Sheppo #define VD_NAME "vd" 681ae08745Sheppo #define VD_VOLUME_NAME "vdisk" 691ae08745Sheppo #define VD_ASCIILABEL "Virtual Disk" 701ae08745Sheppo 711ae08745Sheppo #define VD_CHANNEL_ENDPOINT "channel-endpoint" 721ae08745Sheppo #define VD_ID_PROP "id" 731ae08745Sheppo #define VD_BLOCK_DEVICE_PROP "vds-block-device" 74445b4c2eSsb155480 #define VD_REG_PROP "reg" 751ae08745Sheppo 761ae08745Sheppo /* Virtual disk initialization flags */ 77*3c96341aSnarayan #define VD_DISK_READY 0x01 78*3c96341aSnarayan #define VD_LOCKING 0x02 79*3c96341aSnarayan #define VD_LDC 0x04 80*3c96341aSnarayan #define VD_DRING 0x08 81*3c96341aSnarayan #define VD_SID 0x10 82*3c96341aSnarayan #define VD_SEQ_NUM 0x20 831ae08745Sheppo 841ae08745Sheppo /* Flags for opening/closing backing devices via LDI */ 851ae08745Sheppo #define VD_OPEN_FLAGS (FEXCL | FREAD | FWRITE) 861ae08745Sheppo 871ae08745Sheppo /* 881ae08745Sheppo * By Solaris convention, slice/partition 2 represents the entire disk; 891ae08745Sheppo * unfortunately, this convention does not appear to be codified. 901ae08745Sheppo */ 911ae08745Sheppo #define VD_ENTIRE_DISK_SLICE 2 921ae08745Sheppo 931ae08745Sheppo /* Return a cpp token as a string */ 941ae08745Sheppo #define STRINGIZE(token) #token 951ae08745Sheppo 961ae08745Sheppo /* 971ae08745Sheppo * Print a message prefixed with the current function name to the message log 981ae08745Sheppo * (and optionally to the console for verbose boots); these macros use cpp's 991ae08745Sheppo * concatenation of string literals and C99 variable-length-argument-list 1001ae08745Sheppo * macros 1011ae08745Sheppo */ 1021ae08745Sheppo #define PRN(...) _PRN("?%s(): "__VA_ARGS__, "") 1031ae08745Sheppo #define _PRN(format, ...) \ 1041ae08745Sheppo cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__) 1051ae08745Sheppo 1061ae08745Sheppo /* Return a pointer to the "i"th vdisk dring element */ 1071ae08745Sheppo #define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \ 1081ae08745Sheppo (vd->dring + (i)*vd->descriptor_size)) 1091ae08745Sheppo 1101ae08745Sheppo /* Return the virtual disk client's type as a string (for use in messages) */ 1111ae08745Sheppo #define VD_CLIENT(vd) \ 1121ae08745Sheppo (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \ 1131ae08745Sheppo (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" : \ 1141ae08745Sheppo (((vd)->xfer_mode == 0) ? "null client" : \ 1151ae08745Sheppo "unsupported client"))) 1161ae08745Sheppo 117445b4c2eSsb155480 /* 118445b4c2eSsb155480 * Specification of an MD node passed to the MDEG to filter any 119445b4c2eSsb155480 * 'vport' nodes that do not belong to the specified node. This 120445b4c2eSsb155480 * template is copied for each vds instance and filled in with 121445b4c2eSsb155480 * the appropriate 'cfg-handle' value before being passed to the MDEG. 122445b4c2eSsb155480 */ 123445b4c2eSsb155480 static mdeg_prop_spec_t vds_prop_template[] = { 124445b4c2eSsb155480 { MDET_PROP_STR, "name", VDS_NAME }, 125445b4c2eSsb155480 { MDET_PROP_VAL, "cfg-handle", NULL }, 126445b4c2eSsb155480 { MDET_LIST_END, NULL, NULL } 127445b4c2eSsb155480 }; 128445b4c2eSsb155480 129445b4c2eSsb155480 #define VDS_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 130445b4c2eSsb155480 131445b4c2eSsb155480 /* 132445b4c2eSsb155480 * Matching criteria passed to the MDEG to register interest 133445b4c2eSsb155480 * in changes to 'virtual-device-port' nodes identified by their 134445b4c2eSsb155480 * 'id' property. 135445b4c2eSsb155480 */ 136445b4c2eSsb155480 static md_prop_match_t vd_prop_match[] = { 137445b4c2eSsb155480 { MDET_PROP_VAL, VD_ID_PROP }, 138445b4c2eSsb155480 { MDET_LIST_END, NULL } 139445b4c2eSsb155480 }; 140445b4c2eSsb155480 141445b4c2eSsb155480 static mdeg_node_match_t vd_match = {"virtual-device-port", 142445b4c2eSsb155480 vd_prop_match}; 143445b4c2eSsb155480 1441ae08745Sheppo /* Debugging macros */ 1451ae08745Sheppo #ifdef DEBUG 1463af08d82Slm66018 1473af08d82Slm66018 static int vd_msglevel = 0; 1483af08d82Slm66018 1491ae08745Sheppo #define PR0 if (vd_msglevel > 0) PRN 1501ae08745Sheppo #define PR1 if (vd_msglevel > 1) PRN 1511ae08745Sheppo #define PR2 if (vd_msglevel > 2) PRN 1521ae08745Sheppo 1531ae08745Sheppo #define VD_DUMP_DRING_ELEM(elem) \ 154*3c96341aSnarayan PR0("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \ 1551ae08745Sheppo elem->hdr.dstate, \ 1561ae08745Sheppo elem->payload.operation, \ 1571ae08745Sheppo elem->payload.status, \ 1581ae08745Sheppo elem->payload.nbytes, \ 1591ae08745Sheppo elem->payload.addr, \ 1601ae08745Sheppo elem->payload.ncookies); 1611ae08745Sheppo 1623af08d82Slm66018 char * 1633af08d82Slm66018 vd_decode_state(int state) 1643af08d82Slm66018 { 1653af08d82Slm66018 char *str; 1663af08d82Slm66018 1673af08d82Slm66018 #define CASE_STATE(_s) case _s: str = #_s; break; 1683af08d82Slm66018 1693af08d82Slm66018 switch (state) { 1703af08d82Slm66018 CASE_STATE(VD_STATE_INIT) 1713af08d82Slm66018 CASE_STATE(VD_STATE_VER) 1723af08d82Slm66018 CASE_STATE(VD_STATE_ATTR) 1733af08d82Slm66018 CASE_STATE(VD_STATE_DRING) 1743af08d82Slm66018 CASE_STATE(VD_STATE_RDX) 1753af08d82Slm66018 CASE_STATE(VD_STATE_DATA) 1763af08d82Slm66018 default: str = "unknown"; break; 1773af08d82Slm66018 } 1783af08d82Slm66018 1793af08d82Slm66018 #undef CASE_STATE 1803af08d82Slm66018 1813af08d82Slm66018 return (str); 1823af08d82Slm66018 } 1833af08d82Slm66018 1843af08d82Slm66018 void 1853af08d82Slm66018 vd_decode_tag(vio_msg_t *msg) 1863af08d82Slm66018 { 1873af08d82Slm66018 char *tstr, *sstr, *estr; 1883af08d82Slm66018 1893af08d82Slm66018 #define CASE_TYPE(_s) case _s: tstr = #_s; break; 1903af08d82Slm66018 1913af08d82Slm66018 switch (msg->tag.vio_msgtype) { 1923af08d82Slm66018 CASE_TYPE(VIO_TYPE_CTRL) 1933af08d82Slm66018 CASE_TYPE(VIO_TYPE_DATA) 1943af08d82Slm66018 CASE_TYPE(VIO_TYPE_ERR) 1953af08d82Slm66018 default: tstr = "unknown"; break; 1963af08d82Slm66018 } 1973af08d82Slm66018 1983af08d82Slm66018 #undef CASE_TYPE 1993af08d82Slm66018 2003af08d82Slm66018 #define CASE_SUBTYPE(_s) case _s: sstr = #_s; break; 2013af08d82Slm66018 2023af08d82Slm66018 switch (msg->tag.vio_subtype) { 2033af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_INFO) 2043af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_ACK) 2053af08d82Slm66018 CASE_SUBTYPE(VIO_SUBTYPE_NACK) 2063af08d82Slm66018 default: sstr = "unknown"; break; 2073af08d82Slm66018 } 2083af08d82Slm66018 2093af08d82Slm66018 #undef CASE_SUBTYPE 2103af08d82Slm66018 2113af08d82Slm66018 #define CASE_ENV(_s) case _s: estr = #_s; break; 2123af08d82Slm66018 2133af08d82Slm66018 switch (msg->tag.vio_subtype_env) { 2143af08d82Slm66018 CASE_ENV(VIO_VER_INFO) 2153af08d82Slm66018 CASE_ENV(VIO_ATTR_INFO) 2163af08d82Slm66018 CASE_ENV(VIO_DRING_REG) 2173af08d82Slm66018 CASE_ENV(VIO_DRING_UNREG) 2183af08d82Slm66018 CASE_ENV(VIO_RDX) 2193af08d82Slm66018 CASE_ENV(VIO_PKT_DATA) 2203af08d82Slm66018 CASE_ENV(VIO_DESC_DATA) 2213af08d82Slm66018 CASE_ENV(VIO_DRING_DATA) 2223af08d82Slm66018 default: estr = "unknown"; break; 2233af08d82Slm66018 } 2243af08d82Slm66018 2253af08d82Slm66018 #undef CASE_ENV 2263af08d82Slm66018 2273af08d82Slm66018 PR1("(%x/%x/%x) message : (%s/%s/%s)", 2283af08d82Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 2293af08d82Slm66018 msg->tag.vio_subtype_env, tstr, sstr, estr); 2303af08d82Slm66018 } 2313af08d82Slm66018 2321ae08745Sheppo #else /* !DEBUG */ 2333af08d82Slm66018 2341ae08745Sheppo #define PR0(...) 2351ae08745Sheppo #define PR1(...) 2361ae08745Sheppo #define PR2(...) 2371ae08745Sheppo 2381ae08745Sheppo #define VD_DUMP_DRING_ELEM(elem) 2391ae08745Sheppo 2403af08d82Slm66018 #define vd_decode_state(_s) (NULL) 2413af08d82Slm66018 #define vd_decode_tag(_s) (NULL) 2423af08d82Slm66018 2431ae08745Sheppo #endif /* DEBUG */ 2441ae08745Sheppo 2451ae08745Sheppo 246d10e4ef2Snarayan /* 247d10e4ef2Snarayan * Soft state structure for a vds instance 248d10e4ef2Snarayan */ 2491ae08745Sheppo typedef struct vds { 2501ae08745Sheppo uint_t initialized; /* driver inst initialization flags */ 2511ae08745Sheppo dev_info_t *dip; /* driver inst devinfo pointer */ 2521ae08745Sheppo ldi_ident_t ldi_ident; /* driver's identifier for LDI */ 2531ae08745Sheppo mod_hash_t *vd_table; /* table of virtual disks served */ 254445b4c2eSsb155480 mdeg_node_spec_t *ispecp; /* mdeg node specification */ 2551ae08745Sheppo mdeg_handle_t mdeg; /* handle for MDEG operations */ 2561ae08745Sheppo } vds_t; 2571ae08745Sheppo 258d10e4ef2Snarayan /* 259d10e4ef2Snarayan * Types of descriptor-processing tasks 260d10e4ef2Snarayan */ 261d10e4ef2Snarayan typedef enum vd_task_type { 262d10e4ef2Snarayan VD_NONFINAL_RANGE_TASK, /* task for intermediate descriptor in range */ 263d10e4ef2Snarayan VD_FINAL_RANGE_TASK, /* task for last in a range of descriptors */ 264d10e4ef2Snarayan } vd_task_type_t; 265d10e4ef2Snarayan 266d10e4ef2Snarayan /* 267d10e4ef2Snarayan * Structure describing the task for processing a descriptor 268d10e4ef2Snarayan */ 269d10e4ef2Snarayan typedef struct vd_task { 270d10e4ef2Snarayan struct vd *vd; /* vd instance task is for */ 271d10e4ef2Snarayan vd_task_type_t type; /* type of descriptor task */ 272d10e4ef2Snarayan int index; /* dring elem index for task */ 273d10e4ef2Snarayan vio_msg_t *msg; /* VIO message task is for */ 274d10e4ef2Snarayan size_t msglen; /* length of message content */ 275d10e4ef2Snarayan vd_dring_payload_t *request; /* request task will perform */ 276d10e4ef2Snarayan struct buf buf; /* buf(9s) for I/O request */ 2774bac2208Snarayan ldc_mem_handle_t mhdl; /* task memory handle */ 278d10e4ef2Snarayan } vd_task_t; 279d10e4ef2Snarayan 280d10e4ef2Snarayan /* 281d10e4ef2Snarayan * Soft state structure for a virtual disk instance 282d10e4ef2Snarayan */ 2831ae08745Sheppo typedef struct vd { 2841ae08745Sheppo uint_t initialized; /* vdisk initialization flags */ 2851ae08745Sheppo vds_t *vds; /* server for this vdisk */ 286d10e4ef2Snarayan ddi_taskq_t *startq; /* queue for I/O start tasks */ 287d10e4ef2Snarayan ddi_taskq_t *completionq; /* queue for completion tasks */ 2881ae08745Sheppo ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */ 289*3c96341aSnarayan char device_path[MAXPATHLEN + 1]; /* vdisk device */ 2901ae08745Sheppo dev_t dev[V_NUMPAR]; /* dev numbers for slices */ 291e1ebb9ecSlm66018 uint_t nslices; /* number of slices */ 2921ae08745Sheppo size_t vdisk_size; /* number of blocks in vdisk */ 2931ae08745Sheppo vd_disk_type_t vdisk_type; /* slice or entire disk */ 2944bac2208Snarayan vd_disk_label_t vdisk_label; /* EFI or VTOC label */ 295e1ebb9ecSlm66018 ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */ 2961ae08745Sheppo boolean_t pseudo; /* underlying pseudo dev */ 297*3c96341aSnarayan boolean_t file; /* underlying file */ 298*3c96341aSnarayan char *file_maddr; /* file mapping address */ 299*3c96341aSnarayan vnode_t *file_vnode; /* file vnode */ 300*3c96341aSnarayan size_t file_size; /* file size */ 3014bac2208Snarayan struct dk_efi dk_efi; /* synthetic for slice type */ 3021ae08745Sheppo struct dk_geom dk_geom; /* synthetic for slice type */ 3031ae08745Sheppo struct vtoc vtoc; /* synthetic for slice type */ 3041ae08745Sheppo ldc_status_t ldc_state; /* LDC connection state */ 3051ae08745Sheppo ldc_handle_t ldc_handle; /* handle for LDC comm */ 3061ae08745Sheppo size_t max_msglen; /* largest LDC message len */ 3071ae08745Sheppo vd_state_t state; /* client handshake state */ 3081ae08745Sheppo uint8_t xfer_mode; /* transfer mode with client */ 3091ae08745Sheppo uint32_t sid; /* client's session ID */ 3101ae08745Sheppo uint64_t seq_num; /* message sequence number */ 3111ae08745Sheppo uint64_t dring_ident; /* identifier of dring */ 3121ae08745Sheppo ldc_dring_handle_t dring_handle; /* handle for dring ops */ 3131ae08745Sheppo uint32_t descriptor_size; /* num bytes in desc */ 3141ae08745Sheppo uint32_t dring_len; /* number of dring elements */ 3151ae08745Sheppo caddr_t dring; /* address of dring */ 3163af08d82Slm66018 caddr_t vio_msgp; /* vio msg staging buffer */ 317d10e4ef2Snarayan vd_task_t inband_task; /* task for inband descriptor */ 318d10e4ef2Snarayan vd_task_t *dring_task; /* tasks dring elements */ 319d10e4ef2Snarayan 320d10e4ef2Snarayan kmutex_t lock; /* protects variables below */ 321d10e4ef2Snarayan boolean_t enabled; /* is vdisk enabled? */ 322d10e4ef2Snarayan boolean_t reset_state; /* reset connection state? */ 323d10e4ef2Snarayan boolean_t reset_ldc; /* reset LDC channel? */ 3241ae08745Sheppo } vd_t; 3251ae08745Sheppo 3261ae08745Sheppo typedef struct vds_operation { 3273af08d82Slm66018 char *namep; 3281ae08745Sheppo uint8_t operation; 329d10e4ef2Snarayan int (*start)(vd_task_t *task); 330d10e4ef2Snarayan void (*complete)(void *arg); 3311ae08745Sheppo } vds_operation_t; 3321ae08745Sheppo 3330a55fbb7Slm66018 typedef struct vd_ioctl { 3340a55fbb7Slm66018 uint8_t operation; /* vdisk operation */ 3350a55fbb7Slm66018 const char *operation_name; /* vdisk operation name */ 3360a55fbb7Slm66018 size_t nbytes; /* size of operation buffer */ 3370a55fbb7Slm66018 int cmd; /* corresponding ioctl cmd */ 3380a55fbb7Slm66018 const char *cmd_name; /* ioctl cmd name */ 3390a55fbb7Slm66018 void *arg; /* ioctl cmd argument */ 3400a55fbb7Slm66018 /* convert input vd_buf to output ioctl_arg */ 3410a55fbb7Slm66018 void (*copyin)(void *vd_buf, void *ioctl_arg); 3420a55fbb7Slm66018 /* convert input ioctl_arg to output vd_buf */ 3430a55fbb7Slm66018 void (*copyout)(void *ioctl_arg, void *vd_buf); 3440a55fbb7Slm66018 } vd_ioctl_t; 3450a55fbb7Slm66018 3460a55fbb7Slm66018 /* Define trivial copyin/copyout conversion function flag */ 3470a55fbb7Slm66018 #define VD_IDENTITY ((void (*)(void *, void *))-1) 3481ae08745Sheppo 3491ae08745Sheppo 350*3c96341aSnarayan static int vds_ldc_retries = VDS_RETRIES; 3513af08d82Slm66018 static int vds_ldc_delay = VDS_LDC_DELAY; 352*3c96341aSnarayan static int vds_dev_retries = VDS_RETRIES; 353*3c96341aSnarayan static int vds_dev_delay = VDS_DEV_DELAY; 3541ae08745Sheppo static void *vds_state; 3551ae08745Sheppo static uint64_t vds_operations; /* see vds_operation[] definition below */ 3561ae08745Sheppo 3571ae08745Sheppo static int vd_open_flags = VD_OPEN_FLAGS; 3581ae08745Sheppo 3590a55fbb7Slm66018 /* 3600a55fbb7Slm66018 * Supported protocol version pairs, from highest (newest) to lowest (oldest) 3610a55fbb7Slm66018 * 3620a55fbb7Slm66018 * Each supported major version should appear only once, paired with (and only 3630a55fbb7Slm66018 * with) its highest supported minor version number (as the protocol requires 3640a55fbb7Slm66018 * supporting all lower minor version numbers as well) 3650a55fbb7Slm66018 */ 3660a55fbb7Slm66018 static const vio_ver_t vds_version[] = {{1, 0}}; 3670a55fbb7Slm66018 static const size_t vds_num_versions = 3680a55fbb7Slm66018 sizeof (vds_version)/sizeof (vds_version[0]); 3690a55fbb7Slm66018 3703af08d82Slm66018 static void vd_free_dring_task(vd_t *vdp); 371*3c96341aSnarayan static int vd_setup_vd(vd_t *vd); 372*3c96341aSnarayan static boolean_t vd_enabled(vd_t *vd); 3731ae08745Sheppo 3741ae08745Sheppo static int 375d10e4ef2Snarayan vd_start_bio(vd_task_t *task) 3761ae08745Sheppo { 3774bac2208Snarayan int rv, status = 0; 378d10e4ef2Snarayan vd_t *vd = task->vd; 379d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 380d10e4ef2Snarayan struct buf *buf = &task->buf; 3814bac2208Snarayan uint8_t mtype; 382*3c96341aSnarayan caddr_t addr; 383*3c96341aSnarayan size_t offset, maxlen; 384*3c96341aSnarayan int slice; 385d10e4ef2Snarayan 386d10e4ef2Snarayan ASSERT(vd != NULL); 387d10e4ef2Snarayan ASSERT(request != NULL); 388*3c96341aSnarayan 389*3c96341aSnarayan slice = request->slice; 390*3c96341aSnarayan 391*3c96341aSnarayan ASSERT(slice < vd->nslices); 392d10e4ef2Snarayan ASSERT((request->operation == VD_OP_BREAD) || 393d10e4ef2Snarayan (request->operation == VD_OP_BWRITE)); 394d10e4ef2Snarayan 3951ae08745Sheppo if (request->nbytes == 0) 3961ae08745Sheppo return (EINVAL); /* no service for trivial requests */ 3971ae08745Sheppo 398d10e4ef2Snarayan PR1("%s %lu bytes at block %lu", 399d10e4ef2Snarayan (request->operation == VD_OP_BREAD) ? "Read" : "Write", 400d10e4ef2Snarayan request->nbytes, request->addr); 4011ae08745Sheppo 402d10e4ef2Snarayan bioinit(buf); 403d10e4ef2Snarayan buf->b_flags = B_BUSY; 404d10e4ef2Snarayan buf->b_bcount = request->nbytes; 405d10e4ef2Snarayan buf->b_lblkno = request->addr; 406*3c96341aSnarayan buf->b_edev = vd->dev[slice]; 407d10e4ef2Snarayan 4084bac2208Snarayan mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP; 4094bac2208Snarayan 4104bac2208Snarayan /* Map memory exported by client */ 4114bac2208Snarayan status = ldc_mem_map(task->mhdl, request->cookie, request->ncookies, 4124bac2208Snarayan mtype, (request->operation == VD_OP_BREAD) ? LDC_MEM_W : LDC_MEM_R, 4134bac2208Snarayan &(buf->b_un.b_addr), NULL); 4144bac2208Snarayan if (status != 0) { 4153af08d82Slm66018 PR0("ldc_mem_map() returned err %d ", status); 4164bac2208Snarayan biofini(buf); 4174bac2208Snarayan return (status); 418d10e4ef2Snarayan } 419d10e4ef2Snarayan 4204bac2208Snarayan status = ldc_mem_acquire(task->mhdl, 0, buf->b_bcount); 4214bac2208Snarayan if (status != 0) { 4224bac2208Snarayan (void) ldc_mem_unmap(task->mhdl); 4233af08d82Slm66018 PR0("ldc_mem_acquire() returned err %d ", status); 4244bac2208Snarayan biofini(buf); 4254bac2208Snarayan return (status); 4264bac2208Snarayan } 4274bac2208Snarayan 4284bac2208Snarayan buf->b_flags |= (request->operation == VD_OP_BREAD) ? B_READ : B_WRITE; 4294bac2208Snarayan 430d10e4ef2Snarayan /* Start the block I/O */ 431*3c96341aSnarayan if (vd->file) { 432d10e4ef2Snarayan 433*3c96341aSnarayan if (request->addr >= vd->vtoc.v_part[slice].p_size) { 434*3c96341aSnarayan /* address past the end of the slice */ 435*3c96341aSnarayan PR0("req_addr (0x%lx) > psize (0x%lx)", 436*3c96341aSnarayan request->addr, vd->vtoc.v_part[slice].p_size); 437*3c96341aSnarayan request->nbytes = 0; 438*3c96341aSnarayan status = 0; 439*3c96341aSnarayan goto cleanup; 440*3c96341aSnarayan } 441*3c96341aSnarayan 442*3c96341aSnarayan offset = (vd->vtoc.v_part[slice].p_start + 443*3c96341aSnarayan request->addr) * DEV_BSIZE; 444*3c96341aSnarayan 445*3c96341aSnarayan /* 446*3c96341aSnarayan * If the requested size is greater than the size 447*3c96341aSnarayan * of the partition, truncate the read/write. 448*3c96341aSnarayan */ 449*3c96341aSnarayan maxlen = (vd->vtoc.v_part[slice].p_size - 450*3c96341aSnarayan request->addr) * DEV_BSIZE; 451*3c96341aSnarayan 452*3c96341aSnarayan if (request->nbytes > maxlen) { 453*3c96341aSnarayan PR0("I/O size truncated to %lu bytes from %lu bytes", 454*3c96341aSnarayan maxlen, request->nbytes); 455*3c96341aSnarayan request->nbytes = maxlen; 456*3c96341aSnarayan } 457*3c96341aSnarayan 458*3c96341aSnarayan /* 459*3c96341aSnarayan * We have to ensure that we are reading/writing into the mmap 460*3c96341aSnarayan * range. If we have a partial disk image (e.g. an image of 461*3c96341aSnarayan * s0 instead s2) the system can try to access slices that 462*3c96341aSnarayan * are not included into the disk image. 463*3c96341aSnarayan */ 464*3c96341aSnarayan if ((offset + request->nbytes) >= vd->file_size) { 465*3c96341aSnarayan PR0("offset + nbytes (0x%lx + 0x%lx) >= " 466*3c96341aSnarayan "file_size (0x%lx)", offset, request->nbytes, 467*3c96341aSnarayan vd->file_size); 468*3c96341aSnarayan request->nbytes = 0; 469*3c96341aSnarayan status = EIO; 470*3c96341aSnarayan goto cleanup; 471*3c96341aSnarayan } 472*3c96341aSnarayan 473*3c96341aSnarayan addr = vd->file_maddr + offset; 474*3c96341aSnarayan 475*3c96341aSnarayan if (request->operation == VD_OP_BREAD) 476*3c96341aSnarayan bcopy(addr, buf->b_un.b_addr, request->nbytes); 477*3c96341aSnarayan else 478*3c96341aSnarayan bcopy(buf->b_un.b_addr, addr, request->nbytes); 479*3c96341aSnarayan 480*3c96341aSnarayan } else { 481*3c96341aSnarayan status = ldi_strategy(vd->ldi_handle[slice], buf); 482*3c96341aSnarayan if (status == 0) 483*3c96341aSnarayan return (EINPROGRESS); /* will complete on completionq */ 484*3c96341aSnarayan } 485*3c96341aSnarayan 486*3c96341aSnarayan cleanup: 487d10e4ef2Snarayan /* Clean up after error */ 4884bac2208Snarayan rv = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 4894bac2208Snarayan if (rv) { 4903af08d82Slm66018 PR0("ldc_mem_release() returned err %d ", rv); 4914bac2208Snarayan } 4924bac2208Snarayan rv = ldc_mem_unmap(task->mhdl); 4934bac2208Snarayan if (rv) { 4943af08d82Slm66018 PR0("ldc_mem_unmap() returned err %d ", status); 4954bac2208Snarayan } 4964bac2208Snarayan 497d10e4ef2Snarayan biofini(buf); 498d10e4ef2Snarayan return (status); 499d10e4ef2Snarayan } 500d10e4ef2Snarayan 501d10e4ef2Snarayan static int 502d10e4ef2Snarayan send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen) 503d10e4ef2Snarayan { 5043af08d82Slm66018 int status; 505d10e4ef2Snarayan size_t nbytes; 506d10e4ef2Snarayan 5073af08d82Slm66018 do { 508d10e4ef2Snarayan nbytes = msglen; 509d10e4ef2Snarayan status = ldc_write(ldc_handle, msg, &nbytes); 5103af08d82Slm66018 if (status != EWOULDBLOCK) 5113af08d82Slm66018 break; 5123af08d82Slm66018 drv_usecwait(vds_ldc_delay); 5133af08d82Slm66018 } while (status == EWOULDBLOCK); 514d10e4ef2Snarayan 515d10e4ef2Snarayan if (status != 0) { 5163af08d82Slm66018 if (status != ECONNRESET) 5173af08d82Slm66018 PR0("ldc_write() returned errno %d", status); 518d10e4ef2Snarayan return (status); 519d10e4ef2Snarayan } else if (nbytes != msglen) { 5203af08d82Slm66018 PR0("ldc_write() performed only partial write"); 521d10e4ef2Snarayan return (EIO); 522d10e4ef2Snarayan } 523d10e4ef2Snarayan 524d10e4ef2Snarayan PR1("SENT %lu bytes", msglen); 525d10e4ef2Snarayan return (0); 526d10e4ef2Snarayan } 527d10e4ef2Snarayan 528d10e4ef2Snarayan static void 529d10e4ef2Snarayan vd_need_reset(vd_t *vd, boolean_t reset_ldc) 530d10e4ef2Snarayan { 531d10e4ef2Snarayan mutex_enter(&vd->lock); 532d10e4ef2Snarayan vd->reset_state = B_TRUE; 533d10e4ef2Snarayan vd->reset_ldc = reset_ldc; 534d10e4ef2Snarayan mutex_exit(&vd->lock); 535d10e4ef2Snarayan } 536d10e4ef2Snarayan 537d10e4ef2Snarayan /* 538d10e4ef2Snarayan * Reset the state of the connection with a client, if needed; reset the LDC 539d10e4ef2Snarayan * transport as well, if needed. This function should only be called from the 5403af08d82Slm66018 * "vd_recv_msg", as it waits for tasks - otherwise a deadlock can occur. 541d10e4ef2Snarayan */ 542d10e4ef2Snarayan static void 543d10e4ef2Snarayan vd_reset_if_needed(vd_t *vd) 544d10e4ef2Snarayan { 545d10e4ef2Snarayan int status = 0; 546d10e4ef2Snarayan 547d10e4ef2Snarayan mutex_enter(&vd->lock); 548d10e4ef2Snarayan if (!vd->reset_state) { 549d10e4ef2Snarayan ASSERT(!vd->reset_ldc); 550d10e4ef2Snarayan mutex_exit(&vd->lock); 551d10e4ef2Snarayan return; 552d10e4ef2Snarayan } 553d10e4ef2Snarayan mutex_exit(&vd->lock); 554d10e4ef2Snarayan 555d10e4ef2Snarayan PR0("Resetting connection state with %s", VD_CLIENT(vd)); 556d10e4ef2Snarayan 557d10e4ef2Snarayan /* 558d10e4ef2Snarayan * Let any asynchronous I/O complete before possibly pulling the rug 559d10e4ef2Snarayan * out from under it; defer checking vd->reset_ldc, as one of the 560d10e4ef2Snarayan * asynchronous tasks might set it 561d10e4ef2Snarayan */ 562d10e4ef2Snarayan ddi_taskq_wait(vd->completionq); 563d10e4ef2Snarayan 564*3c96341aSnarayan if (vd->file) { 565*3c96341aSnarayan status = VOP_FSYNC(vd->file_vnode, FSYNC, kcred); 566*3c96341aSnarayan if (status) { 567*3c96341aSnarayan PR0("VOP_FSYNC returned errno %d", status); 568*3c96341aSnarayan } 569*3c96341aSnarayan } 570*3c96341aSnarayan 571d10e4ef2Snarayan if ((vd->initialized & VD_DRING) && 572d10e4ef2Snarayan ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)) 5733af08d82Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 574d10e4ef2Snarayan 5753af08d82Slm66018 vd_free_dring_task(vd); 5763af08d82Slm66018 5773af08d82Slm66018 /* Free the staging buffer for msgs */ 5783af08d82Slm66018 if (vd->vio_msgp != NULL) { 5793af08d82Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 5803af08d82Slm66018 vd->vio_msgp = NULL; 581d10e4ef2Snarayan } 582d10e4ef2Snarayan 5833af08d82Slm66018 /* Free the inband message buffer */ 5843af08d82Slm66018 if (vd->inband_task.msg != NULL) { 5853af08d82Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 5863af08d82Slm66018 vd->inband_task.msg = NULL; 5873af08d82Slm66018 } 588d10e4ef2Snarayan 589d10e4ef2Snarayan mutex_enter(&vd->lock); 5903af08d82Slm66018 5913af08d82Slm66018 if (vd->reset_ldc) 5923af08d82Slm66018 PR0("taking down LDC channel"); 593e1ebb9ecSlm66018 if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0)) 5943af08d82Slm66018 PR0("ldc_down() returned errno %d", status); 595d10e4ef2Snarayan 596d10e4ef2Snarayan vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); 597d10e4ef2Snarayan vd->state = VD_STATE_INIT; 598d10e4ef2Snarayan vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 599d10e4ef2Snarayan 6003af08d82Slm66018 /* Allocate the staging buffer */ 6013af08d82Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 6023af08d82Slm66018 6033af08d82Slm66018 PR0("calling ldc_up\n"); 6043af08d82Slm66018 (void) ldc_up(vd->ldc_handle); 6053af08d82Slm66018 606d10e4ef2Snarayan vd->reset_state = B_FALSE; 607d10e4ef2Snarayan vd->reset_ldc = B_FALSE; 6083af08d82Slm66018 609d10e4ef2Snarayan mutex_exit(&vd->lock); 610d10e4ef2Snarayan } 611d10e4ef2Snarayan 6123af08d82Slm66018 static void vd_recv_msg(void *arg); 6133af08d82Slm66018 6143af08d82Slm66018 static void 6153af08d82Slm66018 vd_mark_in_reset(vd_t *vd) 6163af08d82Slm66018 { 6173af08d82Slm66018 int status; 6183af08d82Slm66018 6193af08d82Slm66018 PR0("vd_mark_in_reset: marking vd in reset\n"); 6203af08d82Slm66018 6213af08d82Slm66018 vd_need_reset(vd, B_FALSE); 6223af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, DDI_SLEEP); 6233af08d82Slm66018 if (status == DDI_FAILURE) { 6243af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 6253af08d82Slm66018 vd_need_reset(vd, B_TRUE); 6263af08d82Slm66018 return; 6273af08d82Slm66018 } 6283af08d82Slm66018 } 6293af08d82Slm66018 630d10e4ef2Snarayan static int 631*3c96341aSnarayan vd_mark_elem_done(vd_t *vd, int idx, int elem_status, int elem_nbytes) 632d10e4ef2Snarayan { 633d10e4ef2Snarayan boolean_t accepted; 634d10e4ef2Snarayan int status; 635d10e4ef2Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 636d10e4ef2Snarayan 6373af08d82Slm66018 if (vd->reset_state) 6383af08d82Slm66018 return (0); 639d10e4ef2Snarayan 640d10e4ef2Snarayan /* Acquire the element */ 6413af08d82Slm66018 if (!vd->reset_state && 6423af08d82Slm66018 (status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 6433af08d82Slm66018 if (status == ECONNRESET) { 6443af08d82Slm66018 vd_mark_in_reset(vd); 6453af08d82Slm66018 return (0); 6463af08d82Slm66018 } else { 6473af08d82Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", 6483af08d82Slm66018 status); 649d10e4ef2Snarayan return (status); 650d10e4ef2Snarayan } 6513af08d82Slm66018 } 652d10e4ef2Snarayan 653d10e4ef2Snarayan /* Set the element's status and mark it done */ 654d10e4ef2Snarayan accepted = (elem->hdr.dstate == VIO_DESC_ACCEPTED); 655d10e4ef2Snarayan if (accepted) { 656*3c96341aSnarayan elem->payload.nbytes = elem_nbytes; 657d10e4ef2Snarayan elem->payload.status = elem_status; 658d10e4ef2Snarayan elem->hdr.dstate = VIO_DESC_DONE; 659d10e4ef2Snarayan } else { 660d10e4ef2Snarayan /* Perhaps client timed out waiting for I/O... */ 6613af08d82Slm66018 PR0("element %u no longer \"accepted\"", idx); 662d10e4ef2Snarayan VD_DUMP_DRING_ELEM(elem); 663d10e4ef2Snarayan } 664d10e4ef2Snarayan /* Release the element */ 6653af08d82Slm66018 if (!vd->reset_state && 6663af08d82Slm66018 (status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 6673af08d82Slm66018 if (status == ECONNRESET) { 6683af08d82Slm66018 vd_mark_in_reset(vd); 6693af08d82Slm66018 return (0); 6703af08d82Slm66018 } else { 6713af08d82Slm66018 PR0("ldc_mem_dring_release() returned errno %d", 6723af08d82Slm66018 status); 673d10e4ef2Snarayan return (status); 674d10e4ef2Snarayan } 6753af08d82Slm66018 } 676d10e4ef2Snarayan 677d10e4ef2Snarayan return (accepted ? 0 : EINVAL); 678d10e4ef2Snarayan } 679d10e4ef2Snarayan 680d10e4ef2Snarayan static void 681d10e4ef2Snarayan vd_complete_bio(void *arg) 682d10e4ef2Snarayan { 683d10e4ef2Snarayan int status = 0; 684d10e4ef2Snarayan vd_task_t *task = (vd_task_t *)arg; 685d10e4ef2Snarayan vd_t *vd = task->vd; 686d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 687d10e4ef2Snarayan struct buf *buf = &task->buf; 688d10e4ef2Snarayan 689d10e4ef2Snarayan 690d10e4ef2Snarayan ASSERT(vd != NULL); 691d10e4ef2Snarayan ASSERT(request != NULL); 692d10e4ef2Snarayan ASSERT(task->msg != NULL); 693d10e4ef2Snarayan ASSERT(task->msglen >= sizeof (*task->msg)); 694*3c96341aSnarayan ASSERT(!vd->file); 695d10e4ef2Snarayan 696d10e4ef2Snarayan /* Wait for the I/O to complete */ 697d10e4ef2Snarayan request->status = biowait(buf); 698d10e4ef2Snarayan 699*3c96341aSnarayan /* return back the number of bytes read/written */ 700*3c96341aSnarayan request->nbytes = buf->b_bcount - buf->b_resid; 701*3c96341aSnarayan 7024bac2208Snarayan /* Release the buffer */ 7033af08d82Slm66018 if (!vd->reset_state) 7044bac2208Snarayan status = ldc_mem_release(task->mhdl, 0, buf->b_bcount); 7054bac2208Snarayan if (status) { 7063af08d82Slm66018 PR0("ldc_mem_release() returned errno %d copying to " 7073af08d82Slm66018 "client", status); 7083af08d82Slm66018 if (status == ECONNRESET) { 7093af08d82Slm66018 vd_mark_in_reset(vd); 7103af08d82Slm66018 } 7111ae08745Sheppo } 7121ae08745Sheppo 7133af08d82Slm66018 /* Unmap the memory, even if in reset */ 7144bac2208Snarayan status = ldc_mem_unmap(task->mhdl); 7154bac2208Snarayan if (status) { 7163af08d82Slm66018 PR0("ldc_mem_unmap() returned errno %d copying to client", 7174bac2208Snarayan status); 7183af08d82Slm66018 if (status == ECONNRESET) { 7193af08d82Slm66018 vd_mark_in_reset(vd); 7203af08d82Slm66018 } 7214bac2208Snarayan } 7224bac2208Snarayan 723d10e4ef2Snarayan biofini(buf); 7241ae08745Sheppo 725d10e4ef2Snarayan /* Update the dring element for a dring client */ 7263af08d82Slm66018 if (!vd->reset_state && (status == 0) && 7273af08d82Slm66018 (vd->xfer_mode == VIO_DRING_MODE)) { 728*3c96341aSnarayan status = vd_mark_elem_done(vd, task->index, 729*3c96341aSnarayan request->status, request->nbytes); 7303af08d82Slm66018 if (status == ECONNRESET) 7313af08d82Slm66018 vd_mark_in_reset(vd); 7323af08d82Slm66018 } 7331ae08745Sheppo 734d10e4ef2Snarayan /* 735d10e4ef2Snarayan * If a transport error occurred, arrange to "nack" the message when 736d10e4ef2Snarayan * the final task in the descriptor element range completes 737d10e4ef2Snarayan */ 738d10e4ef2Snarayan if (status != 0) 739d10e4ef2Snarayan task->msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 7401ae08745Sheppo 741d10e4ef2Snarayan /* 742d10e4ef2Snarayan * Only the final task for a range of elements will respond to and 743d10e4ef2Snarayan * free the message 744d10e4ef2Snarayan */ 7453af08d82Slm66018 if (task->type == VD_NONFINAL_RANGE_TASK) { 746d10e4ef2Snarayan return; 7473af08d82Slm66018 } 7481ae08745Sheppo 749d10e4ef2Snarayan /* 750d10e4ef2Snarayan * Send the "ack" or "nack" back to the client; if sending the message 751d10e4ef2Snarayan * via LDC fails, arrange to reset both the connection state and LDC 752d10e4ef2Snarayan * itself 753d10e4ef2Snarayan */ 754d10e4ef2Snarayan PR1("Sending %s", 755d10e4ef2Snarayan (task->msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 7563af08d82Slm66018 if (!vd->reset_state) { 7573af08d82Slm66018 status = send_msg(vd->ldc_handle, task->msg, task->msglen); 7583af08d82Slm66018 switch (status) { 7593af08d82Slm66018 case 0: 7603af08d82Slm66018 break; 7613af08d82Slm66018 case ECONNRESET: 7623af08d82Slm66018 vd_mark_in_reset(vd); 7633af08d82Slm66018 break; 7643af08d82Slm66018 default: 7653af08d82Slm66018 PR0("initiating full reset"); 766d10e4ef2Snarayan vd_need_reset(vd, B_TRUE); 7673af08d82Slm66018 break; 7683af08d82Slm66018 } 7693af08d82Slm66018 } 7701ae08745Sheppo } 7711ae08745Sheppo 7720a55fbb7Slm66018 static void 7730a55fbb7Slm66018 vd_geom2dk_geom(void *vd_buf, void *ioctl_arg) 7740a55fbb7Slm66018 { 7750a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); 7760a55fbb7Slm66018 } 7770a55fbb7Slm66018 7780a55fbb7Slm66018 static void 7790a55fbb7Slm66018 vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg) 7800a55fbb7Slm66018 { 7810a55fbb7Slm66018 VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); 7820a55fbb7Slm66018 } 7830a55fbb7Slm66018 7840a55fbb7Slm66018 static void 7850a55fbb7Slm66018 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) 7860a55fbb7Slm66018 { 7870a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); 7880a55fbb7Slm66018 } 7890a55fbb7Slm66018 7900a55fbb7Slm66018 static void 7910a55fbb7Slm66018 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) 7920a55fbb7Slm66018 { 7930a55fbb7Slm66018 VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); 7940a55fbb7Slm66018 } 7950a55fbb7Slm66018 7964bac2208Snarayan static void 7974bac2208Snarayan vd_get_efi_in(void *vd_buf, void *ioctl_arg) 7984bac2208Snarayan { 7994bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 8004bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 8014bac2208Snarayan 8024bac2208Snarayan dk_efi->dki_lba = vd_efi->lba; 8034bac2208Snarayan dk_efi->dki_length = vd_efi->length; 8044bac2208Snarayan dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP); 8054bac2208Snarayan } 8064bac2208Snarayan 8074bac2208Snarayan static void 8084bac2208Snarayan vd_get_efi_out(void *ioctl_arg, void *vd_buf) 8094bac2208Snarayan { 8104bac2208Snarayan int len; 8114bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 8124bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 8134bac2208Snarayan 8144bac2208Snarayan len = vd_efi->length; 8154bac2208Snarayan DK_EFI2VD_EFI(dk_efi, vd_efi); 8164bac2208Snarayan kmem_free(dk_efi->dki_data, len); 8174bac2208Snarayan } 8184bac2208Snarayan 8194bac2208Snarayan static void 8204bac2208Snarayan vd_set_efi_in(void *vd_buf, void *ioctl_arg) 8214bac2208Snarayan { 8224bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 8234bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 8244bac2208Snarayan 8254bac2208Snarayan dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP); 8264bac2208Snarayan VD_EFI2DK_EFI(vd_efi, dk_efi); 8274bac2208Snarayan } 8284bac2208Snarayan 8294bac2208Snarayan static void 8304bac2208Snarayan vd_set_efi_out(void *ioctl_arg, void *vd_buf) 8314bac2208Snarayan { 8324bac2208Snarayan vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; 8334bac2208Snarayan dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; 8344bac2208Snarayan 8354bac2208Snarayan kmem_free(dk_efi->dki_data, vd_efi->length); 8364bac2208Snarayan } 8374bac2208Snarayan 8384bac2208Snarayan static int 8394bac2208Snarayan vd_read_vtoc(ldi_handle_t handle, struct vtoc *vtoc, vd_disk_label_t *label) 8404bac2208Snarayan { 8414bac2208Snarayan int status, rval; 8424bac2208Snarayan struct dk_gpt *efi; 8434bac2208Snarayan size_t efi_len; 8444bac2208Snarayan 8454bac2208Snarayan *label = VD_DISK_LABEL_UNK; 8464bac2208Snarayan 8474bac2208Snarayan status = ldi_ioctl(handle, DKIOCGVTOC, (intptr_t)vtoc, 8484bac2208Snarayan (vd_open_flags | FKIOCTL), kcred, &rval); 8494bac2208Snarayan 8504bac2208Snarayan if (status == 0) { 8514bac2208Snarayan *label = VD_DISK_LABEL_VTOC; 8524bac2208Snarayan return (0); 8534bac2208Snarayan } else if (status != ENOTSUP) { 8543af08d82Slm66018 PR0("ldi_ioctl(DKIOCGVTOC) returned error %d", status); 8554bac2208Snarayan return (status); 8564bac2208Snarayan } 8574bac2208Snarayan 8584bac2208Snarayan status = vds_efi_alloc_and_read(handle, &efi, &efi_len); 8594bac2208Snarayan 8604bac2208Snarayan if (status) { 8613af08d82Slm66018 PR0("vds_efi_alloc_and_read returned error %d", status); 8624bac2208Snarayan return (status); 8634bac2208Snarayan } 8644bac2208Snarayan 8654bac2208Snarayan *label = VD_DISK_LABEL_EFI; 8664bac2208Snarayan vd_efi_to_vtoc(efi, vtoc); 8674bac2208Snarayan vd_efi_free(efi, efi_len); 8684bac2208Snarayan 8694bac2208Snarayan return (0); 8704bac2208Snarayan } 8714bac2208Snarayan 872*3c96341aSnarayan static short 873*3c96341aSnarayan vd_lbl2cksum(struct dk_label *label) 874*3c96341aSnarayan { 875*3c96341aSnarayan int count; 876*3c96341aSnarayan short sum, *sp; 877*3c96341aSnarayan 878*3c96341aSnarayan count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 879*3c96341aSnarayan sp = (short *)label; 880*3c96341aSnarayan sum = 0; 881*3c96341aSnarayan while (count--) { 882*3c96341aSnarayan sum ^= *sp++; 883*3c96341aSnarayan } 884*3c96341aSnarayan 885*3c96341aSnarayan return (sum); 886*3c96341aSnarayan } 887*3c96341aSnarayan 8881ae08745Sheppo static int 8890a55fbb7Slm66018 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) 8901ae08745Sheppo { 8914bac2208Snarayan dk_efi_t *dk_ioc; 892*3c96341aSnarayan struct dk_label *label; 893*3c96341aSnarayan int i; 8944bac2208Snarayan 8954bac2208Snarayan switch (vd->vdisk_label) { 8964bac2208Snarayan 8974bac2208Snarayan case VD_DISK_LABEL_VTOC: 8984bac2208Snarayan 8991ae08745Sheppo switch (cmd) { 9001ae08745Sheppo case DKIOCGGEOM: 9010a55fbb7Slm66018 ASSERT(ioctl_arg != NULL); 9020a55fbb7Slm66018 bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); 9031ae08745Sheppo return (0); 9041ae08745Sheppo case DKIOCGVTOC: 9050a55fbb7Slm66018 ASSERT(ioctl_arg != NULL); 9060a55fbb7Slm66018 bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); 9071ae08745Sheppo return (0); 908*3c96341aSnarayan case DKIOCSVTOC: 909*3c96341aSnarayan if (!vd->file) 910*3c96341aSnarayan return (ENOTSUP); 911*3c96341aSnarayan ASSERT(ioctl_arg != NULL); 912*3c96341aSnarayan bcopy(ioctl_arg, &vd->vtoc, sizeof (vd->vtoc)); 913*3c96341aSnarayan /* write new VTOC to file */ 914*3c96341aSnarayan label = (struct dk_label *)vd->file_maddr; 915*3c96341aSnarayan label->dkl_vtoc.v_nparts = vd->vtoc.v_nparts; 916*3c96341aSnarayan label->dkl_vtoc.v_sanity = vd->vtoc.v_sanity; 917*3c96341aSnarayan label->dkl_vtoc.v_version = vd->vtoc.v_version; 918*3c96341aSnarayan bcopy(vd->vtoc.v_volume, label->dkl_vtoc.v_volume, 919*3c96341aSnarayan LEN_DKL_VVOL); 920*3c96341aSnarayan for (i = 0; i < vd->vtoc.v_nparts; i++) { 921*3c96341aSnarayan label->dkl_vtoc.v_timestamp[i] = 922*3c96341aSnarayan vd->vtoc.timestamp[i]; 923*3c96341aSnarayan label->dkl_vtoc.v_part[i].p_tag = 924*3c96341aSnarayan vd->vtoc.v_part[i].p_tag; 925*3c96341aSnarayan label->dkl_vtoc.v_part[i].p_flag = 926*3c96341aSnarayan vd->vtoc.v_part[i].p_flag; 927*3c96341aSnarayan label->dkl_map[i].dkl_cylno = 928*3c96341aSnarayan vd->vtoc.v_part[i].p_start / 929*3c96341aSnarayan (label->dkl_nhead * label->dkl_nsect); 930*3c96341aSnarayan label->dkl_map[i].dkl_nblk = 931*3c96341aSnarayan vd->vtoc.v_part[i].p_size; 932*3c96341aSnarayan } 933*3c96341aSnarayan 934*3c96341aSnarayan /* re-compute checksum */ 935*3c96341aSnarayan label->dkl_cksum = vd_lbl2cksum(label); 936*3c96341aSnarayan 937*3c96341aSnarayan return (0); 9381ae08745Sheppo default: 9391ae08745Sheppo return (ENOTSUP); 9401ae08745Sheppo } 9414bac2208Snarayan 9424bac2208Snarayan case VD_DISK_LABEL_EFI: 9434bac2208Snarayan 9444bac2208Snarayan switch (cmd) { 9454bac2208Snarayan case DKIOCGETEFI: 9464bac2208Snarayan ASSERT(ioctl_arg != NULL); 9474bac2208Snarayan dk_ioc = (dk_efi_t *)ioctl_arg; 9484bac2208Snarayan if (dk_ioc->dki_length < vd->dk_efi.dki_length) 9494bac2208Snarayan return (EINVAL); 9504bac2208Snarayan bcopy(vd->dk_efi.dki_data, dk_ioc->dki_data, 9514bac2208Snarayan vd->dk_efi.dki_length); 9524bac2208Snarayan return (0); 9534bac2208Snarayan default: 9544bac2208Snarayan return (ENOTSUP); 9554bac2208Snarayan } 9564bac2208Snarayan 9574bac2208Snarayan default: 9584bac2208Snarayan return (ENOTSUP); 9594bac2208Snarayan } 9601ae08745Sheppo } 9611ae08745Sheppo 9621ae08745Sheppo static int 9630a55fbb7Slm66018 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) 9641ae08745Sheppo { 9651ae08745Sheppo int rval = 0, status; 9661ae08745Sheppo size_t nbytes = request->nbytes; /* modifiable copy */ 9671ae08745Sheppo 9681ae08745Sheppo 9691ae08745Sheppo ASSERT(request->slice < vd->nslices); 9701ae08745Sheppo PR0("Performing %s", ioctl->operation_name); 9711ae08745Sheppo 9720a55fbb7Slm66018 /* Get data from client and convert, if necessary */ 9730a55fbb7Slm66018 if (ioctl->copyin != NULL) { 9741ae08745Sheppo ASSERT(nbytes != 0 && buf != NULL); 9751ae08745Sheppo PR1("Getting \"arg\" data from client"); 9761ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 9771ae08745Sheppo request->cookie, request->ncookies, 9781ae08745Sheppo LDC_COPY_IN)) != 0) { 9793af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d " 9801ae08745Sheppo "copying from client", status); 9811ae08745Sheppo return (status); 9821ae08745Sheppo } 9830a55fbb7Slm66018 9840a55fbb7Slm66018 /* Convert client's data, if necessary */ 9850a55fbb7Slm66018 if (ioctl->copyin == VD_IDENTITY) /* use client buffer */ 9860a55fbb7Slm66018 ioctl->arg = buf; 9870a55fbb7Slm66018 else /* convert client vdisk operation data to ioctl data */ 9880a55fbb7Slm66018 (ioctl->copyin)(buf, (void *)ioctl->arg); 9891ae08745Sheppo } 9901ae08745Sheppo 9911ae08745Sheppo /* 9921ae08745Sheppo * Handle single-slice block devices internally; otherwise, have the 9931ae08745Sheppo * real driver perform the ioctl() 9941ae08745Sheppo */ 995*3c96341aSnarayan if (vd->file || (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo)) { 9960a55fbb7Slm66018 if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, 9970a55fbb7Slm66018 (void *)ioctl->arg)) != 0) 9981ae08745Sheppo return (status); 9991ae08745Sheppo } else if ((status = ldi_ioctl(vd->ldi_handle[request->slice], 1000d10e4ef2Snarayan ioctl->cmd, (intptr_t)ioctl->arg, (vd_open_flags | FKIOCTL), 1001d10e4ef2Snarayan kcred, &rval)) != 0) { 10021ae08745Sheppo PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status); 10031ae08745Sheppo return (status); 10041ae08745Sheppo } 10051ae08745Sheppo #ifdef DEBUG 10061ae08745Sheppo if (rval != 0) { 10073af08d82Slm66018 PR0("%s set rval = %d, which is not being returned to client", 10081ae08745Sheppo ioctl->cmd_name, rval); 10091ae08745Sheppo } 10101ae08745Sheppo #endif /* DEBUG */ 10111ae08745Sheppo 10120a55fbb7Slm66018 /* Convert data and send to client, if necessary */ 10130a55fbb7Slm66018 if (ioctl->copyout != NULL) { 10141ae08745Sheppo ASSERT(nbytes != 0 && buf != NULL); 10151ae08745Sheppo PR1("Sending \"arg\" data to client"); 10160a55fbb7Slm66018 10170a55fbb7Slm66018 /* Convert ioctl data to vdisk operation data, if necessary */ 10180a55fbb7Slm66018 if (ioctl->copyout != VD_IDENTITY) 10190a55fbb7Slm66018 (ioctl->copyout)((void *)ioctl->arg, buf); 10200a55fbb7Slm66018 10211ae08745Sheppo if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, 10221ae08745Sheppo request->cookie, request->ncookies, 10231ae08745Sheppo LDC_COPY_OUT)) != 0) { 10243af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d " 10251ae08745Sheppo "copying to client", status); 10261ae08745Sheppo return (status); 10271ae08745Sheppo } 10281ae08745Sheppo } 10291ae08745Sheppo 10301ae08745Sheppo return (status); 10311ae08745Sheppo } 10321ae08745Sheppo 10331ae08745Sheppo #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) 10341ae08745Sheppo static int 1035d10e4ef2Snarayan vd_ioctl(vd_task_t *task) 10361ae08745Sheppo { 103734683adeSsg70180 int i, status, rc; 10381ae08745Sheppo void *buf = NULL; 10390a55fbb7Slm66018 struct dk_geom dk_geom = {0}; 10400a55fbb7Slm66018 struct vtoc vtoc = {0}; 10414bac2208Snarayan struct dk_efi dk_efi = {0}; 1042d10e4ef2Snarayan vd_t *vd = task->vd; 1043d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 10440a55fbb7Slm66018 vd_ioctl_t ioctl[] = { 10450a55fbb7Slm66018 /* Command (no-copy) operations */ 10460a55fbb7Slm66018 {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, 10470a55fbb7Slm66018 DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), 10480a55fbb7Slm66018 NULL, NULL, NULL}, 10490a55fbb7Slm66018 10500a55fbb7Slm66018 /* "Get" (copy-out) operations */ 10510a55fbb7Slm66018 {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), 10520a55fbb7Slm66018 DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), 10534bac2208Snarayan NULL, VD_IDENTITY, VD_IDENTITY}, 10540a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), 10550a55fbb7Slm66018 RNDSIZE(vd_geom_t), 10560a55fbb7Slm66018 DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), 10570a55fbb7Slm66018 &dk_geom, NULL, dk_geom2vd_geom}, 10580a55fbb7Slm66018 {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), 10590a55fbb7Slm66018 DKIOCGVTOC, STRINGIZE(DKIOCGVTOC), 10600a55fbb7Slm66018 &vtoc, NULL, vtoc2vd_vtoc}, 10614bac2208Snarayan {VD_OP_GET_EFI, STRINGIZE(VD_OP_GET_EFI), RNDSIZE(vd_efi_t), 10624bac2208Snarayan DKIOCGETEFI, STRINGIZE(DKIOCGETEFI), 10634bac2208Snarayan &dk_efi, vd_get_efi_in, vd_get_efi_out}, 10640a55fbb7Slm66018 10650a55fbb7Slm66018 /* "Set" (copy-in) operations */ 10660a55fbb7Slm66018 {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), 10670a55fbb7Slm66018 DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), 10684bac2208Snarayan NULL, VD_IDENTITY, VD_IDENTITY}, 10690a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), 10700a55fbb7Slm66018 RNDSIZE(vd_geom_t), 10710a55fbb7Slm66018 DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), 10720a55fbb7Slm66018 &dk_geom, vd_geom2dk_geom, NULL}, 10730a55fbb7Slm66018 {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), 10740a55fbb7Slm66018 DKIOCSVTOC, STRINGIZE(DKIOCSVTOC), 10750a55fbb7Slm66018 &vtoc, vd_vtoc2vtoc, NULL}, 10764bac2208Snarayan {VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t), 10774bac2208Snarayan DKIOCSETEFI, STRINGIZE(DKIOCSETEFI), 10784bac2208Snarayan &dk_efi, vd_set_efi_in, vd_set_efi_out}, 10790a55fbb7Slm66018 }; 10801ae08745Sheppo size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); 10811ae08745Sheppo 10821ae08745Sheppo 1083d10e4ef2Snarayan ASSERT(vd != NULL); 1084d10e4ef2Snarayan ASSERT(request != NULL); 10851ae08745Sheppo ASSERT(request->slice < vd->nslices); 10861ae08745Sheppo 10871ae08745Sheppo /* 10881ae08745Sheppo * Determine ioctl corresponding to caller's "operation" and 10891ae08745Sheppo * validate caller's "nbytes" 10901ae08745Sheppo */ 10911ae08745Sheppo for (i = 0; i < nioctls; i++) { 10921ae08745Sheppo if (request->operation == ioctl[i].operation) { 10930a55fbb7Slm66018 /* LDC memory operations require 8-byte multiples */ 10940a55fbb7Slm66018 ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); 10950a55fbb7Slm66018 10964bac2208Snarayan if (request->operation == VD_OP_GET_EFI || 10974bac2208Snarayan request->operation == VD_OP_SET_EFI) { 10984bac2208Snarayan if (request->nbytes >= ioctl[i].nbytes) 10994bac2208Snarayan break; 11003af08d82Slm66018 PR0("%s: Expected at least nbytes = %lu, " 11014bac2208Snarayan "got %lu", ioctl[i].operation_name, 11024bac2208Snarayan ioctl[i].nbytes, request->nbytes); 11034bac2208Snarayan return (EINVAL); 11044bac2208Snarayan } 11054bac2208Snarayan 11060a55fbb7Slm66018 if (request->nbytes != ioctl[i].nbytes) { 11073af08d82Slm66018 PR0("%s: Expected nbytes = %lu, got %lu", 11080a55fbb7Slm66018 ioctl[i].operation_name, ioctl[i].nbytes, 11090a55fbb7Slm66018 request->nbytes); 11101ae08745Sheppo return (EINVAL); 11111ae08745Sheppo } 11121ae08745Sheppo 11131ae08745Sheppo break; 11141ae08745Sheppo } 11151ae08745Sheppo } 11161ae08745Sheppo ASSERT(i < nioctls); /* because "operation" already validated */ 11171ae08745Sheppo 11181ae08745Sheppo if (request->nbytes) 11191ae08745Sheppo buf = kmem_zalloc(request->nbytes, KM_SLEEP); 11201ae08745Sheppo status = vd_do_ioctl(vd, request, buf, &ioctl[i]); 11211ae08745Sheppo if (request->nbytes) 11221ae08745Sheppo kmem_free(buf, request->nbytes); 1123*3c96341aSnarayan if (!vd->file && vd->vdisk_type == VD_DISK_TYPE_DISK && 11244bac2208Snarayan (request->operation == VD_OP_SET_VTOC || 112534683adeSsg70180 request->operation == VD_OP_SET_EFI)) { 112634683adeSsg70180 /* update disk information */ 112734683adeSsg70180 rc = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc, 112834683adeSsg70180 &vd->vdisk_label); 112934683adeSsg70180 if (rc != 0) 113034683adeSsg70180 PR0("vd_read_vtoc return error %d", rc); 113134683adeSsg70180 } 1132d10e4ef2Snarayan PR0("Returning %d", status); 11331ae08745Sheppo return (status); 11341ae08745Sheppo } 11351ae08745Sheppo 11364bac2208Snarayan static int 11374bac2208Snarayan vd_get_devid(vd_task_t *task) 11384bac2208Snarayan { 11394bac2208Snarayan vd_t *vd = task->vd; 11404bac2208Snarayan vd_dring_payload_t *request = task->request; 11414bac2208Snarayan vd_devid_t *vd_devid; 11424bac2208Snarayan impl_devid_t *devid; 11434bac2208Snarayan int status, bufid_len, devid_len, len; 11443af08d82Slm66018 int bufbytes; 11454bac2208Snarayan 11463af08d82Slm66018 PR1("Get Device ID, nbytes=%ld", request->nbytes); 11474bac2208Snarayan 1148*3c96341aSnarayan if (vd->file) { 1149*3c96341aSnarayan /* no devid for disk on file */ 1150*3c96341aSnarayan return (ENOENT); 1151*3c96341aSnarayan } 1152*3c96341aSnarayan 11534bac2208Snarayan if (ddi_lyr_get_devid(vd->dev[request->slice], 11544bac2208Snarayan (ddi_devid_t *)&devid) != DDI_SUCCESS) { 11554bac2208Snarayan /* the most common failure is that no devid is available */ 11563af08d82Slm66018 PR2("No Device ID"); 11574bac2208Snarayan return (ENOENT); 11584bac2208Snarayan } 11594bac2208Snarayan 11604bac2208Snarayan bufid_len = request->nbytes - sizeof (vd_devid_t) + 1; 11614bac2208Snarayan devid_len = DEVID_GETLEN(devid); 11624bac2208Snarayan 11633af08d82Slm66018 /* 11643af08d82Slm66018 * Save the buffer size here for use in deallocation. 11653af08d82Slm66018 * The actual number of bytes copied is returned in 11663af08d82Slm66018 * the 'nbytes' field of the request structure. 11673af08d82Slm66018 */ 11683af08d82Slm66018 bufbytes = request->nbytes; 11693af08d82Slm66018 11703af08d82Slm66018 vd_devid = kmem_zalloc(bufbytes, KM_SLEEP); 11714bac2208Snarayan vd_devid->length = devid_len; 11724bac2208Snarayan vd_devid->type = DEVID_GETTYPE(devid); 11734bac2208Snarayan 11744bac2208Snarayan len = (devid_len > bufid_len)? bufid_len : devid_len; 11754bac2208Snarayan 11764bac2208Snarayan bcopy(devid->did_id, vd_devid->id, len); 11774bac2208Snarayan 11784bac2208Snarayan /* LDC memory operations require 8-byte multiples */ 11794bac2208Snarayan ASSERT(request->nbytes % sizeof (uint64_t) == 0); 11804bac2208Snarayan 11814bac2208Snarayan if ((status = ldc_mem_copy(vd->ldc_handle, (caddr_t)vd_devid, 0, 11824bac2208Snarayan &request->nbytes, request->cookie, request->ncookies, 11834bac2208Snarayan LDC_COPY_OUT)) != 0) { 11843af08d82Slm66018 PR0("ldc_mem_copy() returned errno %d copying to client", 11854bac2208Snarayan status); 11864bac2208Snarayan } 11873af08d82Slm66018 PR1("post mem_copy: nbytes=%ld", request->nbytes); 11884bac2208Snarayan 11893af08d82Slm66018 kmem_free(vd_devid, bufbytes); 11904bac2208Snarayan ddi_devid_free((ddi_devid_t)devid); 11914bac2208Snarayan 11924bac2208Snarayan return (status); 11934bac2208Snarayan } 11944bac2208Snarayan 11951ae08745Sheppo /* 11961ae08745Sheppo * Define the supported operations once the functions for performing them have 11971ae08745Sheppo * been defined 11981ae08745Sheppo */ 11991ae08745Sheppo static const vds_operation_t vds_operation[] = { 12003af08d82Slm66018 #define X(_s) #_s, _s 12013af08d82Slm66018 {X(VD_OP_BREAD), vd_start_bio, vd_complete_bio}, 12023af08d82Slm66018 {X(VD_OP_BWRITE), vd_start_bio, vd_complete_bio}, 12033af08d82Slm66018 {X(VD_OP_FLUSH), vd_ioctl, NULL}, 12043af08d82Slm66018 {X(VD_OP_GET_WCE), vd_ioctl, NULL}, 12053af08d82Slm66018 {X(VD_OP_SET_WCE), vd_ioctl, NULL}, 12063af08d82Slm66018 {X(VD_OP_GET_VTOC), vd_ioctl, NULL}, 12073af08d82Slm66018 {X(VD_OP_SET_VTOC), vd_ioctl, NULL}, 12083af08d82Slm66018 {X(VD_OP_GET_DISKGEOM), vd_ioctl, NULL}, 12093af08d82Slm66018 {X(VD_OP_SET_DISKGEOM), vd_ioctl, NULL}, 12103af08d82Slm66018 {X(VD_OP_GET_EFI), vd_ioctl, NULL}, 12113af08d82Slm66018 {X(VD_OP_SET_EFI), vd_ioctl, NULL}, 12123af08d82Slm66018 {X(VD_OP_GET_DEVID), vd_get_devid, NULL}, 12133af08d82Slm66018 #undef X 12141ae08745Sheppo }; 12151ae08745Sheppo 12161ae08745Sheppo static const size_t vds_noperations = 12171ae08745Sheppo (sizeof (vds_operation))/(sizeof (vds_operation[0])); 12181ae08745Sheppo 12191ae08745Sheppo /* 1220d10e4ef2Snarayan * Process a task specifying a client I/O request 12211ae08745Sheppo */ 12221ae08745Sheppo static int 1223d10e4ef2Snarayan vd_process_task(vd_task_t *task) 12241ae08745Sheppo { 1225d10e4ef2Snarayan int i, status; 1226d10e4ef2Snarayan vd_t *vd = task->vd; 1227d10e4ef2Snarayan vd_dring_payload_t *request = task->request; 12281ae08745Sheppo 12291ae08745Sheppo 1230d10e4ef2Snarayan ASSERT(vd != NULL); 1231d10e4ef2Snarayan ASSERT(request != NULL); 12321ae08745Sheppo 1233d10e4ef2Snarayan /* Find the requested operation */ 12341ae08745Sheppo for (i = 0; i < vds_noperations; i++) 12351ae08745Sheppo if (request->operation == vds_operation[i].operation) 1236d10e4ef2Snarayan break; 1237d10e4ef2Snarayan if (i == vds_noperations) { 12383af08d82Slm66018 PR0("Unsupported operation %u", request->operation); 12391ae08745Sheppo return (ENOTSUP); 12401ae08745Sheppo } 12411ae08745Sheppo 12427636cb21Slm66018 /* Handle client using absolute disk offsets */ 12437636cb21Slm66018 if ((vd->vdisk_type == VD_DISK_TYPE_DISK) && 12447636cb21Slm66018 (request->slice == UINT8_MAX)) 12457636cb21Slm66018 request->slice = VD_ENTIRE_DISK_SLICE; 12467636cb21Slm66018 12477636cb21Slm66018 /* Range-check slice */ 12487636cb21Slm66018 if (request->slice >= vd->nslices) { 12493af08d82Slm66018 PR0("Invalid \"slice\" %u (max %u) for virtual disk", 12507636cb21Slm66018 request->slice, (vd->nslices - 1)); 12517636cb21Slm66018 return (EINVAL); 12527636cb21Slm66018 } 12537636cb21Slm66018 12543af08d82Slm66018 PR1("operation : %s", vds_operation[i].namep); 12553af08d82Slm66018 1256d10e4ef2Snarayan /* Start the operation */ 1257d10e4ef2Snarayan if ((status = vds_operation[i].start(task)) != EINPROGRESS) { 12583af08d82Slm66018 PR0("operation : %s returned status %d", 12593af08d82Slm66018 vds_operation[i].namep, status); 1260d10e4ef2Snarayan request->status = status; /* op succeeded or failed */ 1261d10e4ef2Snarayan return (0); /* but request completed */ 12621ae08745Sheppo } 12631ae08745Sheppo 1264d10e4ef2Snarayan ASSERT(vds_operation[i].complete != NULL); /* debug case */ 1265d10e4ef2Snarayan if (vds_operation[i].complete == NULL) { /* non-debug case */ 12663af08d82Slm66018 PR0("Unexpected return of EINPROGRESS " 1267d10e4ef2Snarayan "with no I/O completion handler"); 1268d10e4ef2Snarayan request->status = EIO; /* operation failed */ 1269d10e4ef2Snarayan return (0); /* but request completed */ 12701ae08745Sheppo } 12711ae08745Sheppo 12723af08d82Slm66018 PR1("operation : kick off taskq entry for %s", vds_operation[i].namep); 12733af08d82Slm66018 1274d10e4ef2Snarayan /* Queue a task to complete the operation */ 1275d10e4ef2Snarayan status = ddi_taskq_dispatch(vd->completionq, vds_operation[i].complete, 1276d10e4ef2Snarayan task, DDI_SLEEP); 1277d10e4ef2Snarayan /* ddi_taskq_dispatch(9f) guarantees success with DDI_SLEEP */ 1278d10e4ef2Snarayan ASSERT(status == DDI_SUCCESS); 1279d10e4ef2Snarayan 1280d10e4ef2Snarayan PR1("Operation in progress"); 1281d10e4ef2Snarayan return (EINPROGRESS); /* completion handler will finish request */ 12821ae08745Sheppo } 12831ae08745Sheppo 12841ae08745Sheppo /* 12850a55fbb7Slm66018 * Return true if the "type", "subtype", and "env" fields of the "tag" first 12860a55fbb7Slm66018 * argument match the corresponding remaining arguments; otherwise, return false 12871ae08745Sheppo */ 12880a55fbb7Slm66018 boolean_t 12891ae08745Sheppo vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) 12901ae08745Sheppo { 12911ae08745Sheppo return ((tag->vio_msgtype == type) && 12921ae08745Sheppo (tag->vio_subtype == subtype) && 12930a55fbb7Slm66018 (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; 12941ae08745Sheppo } 12951ae08745Sheppo 12960a55fbb7Slm66018 /* 12970a55fbb7Slm66018 * Check whether the major/minor version specified in "ver_msg" is supported 12980a55fbb7Slm66018 * by this server. 12990a55fbb7Slm66018 */ 13000a55fbb7Slm66018 static boolean_t 13010a55fbb7Slm66018 vds_supported_version(vio_ver_msg_t *ver_msg) 13020a55fbb7Slm66018 { 13030a55fbb7Slm66018 for (int i = 0; i < vds_num_versions; i++) { 13040a55fbb7Slm66018 ASSERT(vds_version[i].major > 0); 13050a55fbb7Slm66018 ASSERT((i == 0) || 13060a55fbb7Slm66018 (vds_version[i].major < vds_version[i-1].major)); 13070a55fbb7Slm66018 13080a55fbb7Slm66018 /* 13090a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 13100a55fbb7Slm66018 * necessary, down to the highest value supported by this 13110a55fbb7Slm66018 * server and return true so this message will get "ack"ed; 13120a55fbb7Slm66018 * the client should also support all minor versions lower 13130a55fbb7Slm66018 * than the value it sent 13140a55fbb7Slm66018 */ 13150a55fbb7Slm66018 if (ver_msg->ver_major == vds_version[i].major) { 13160a55fbb7Slm66018 if (ver_msg->ver_minor > vds_version[i].minor) { 13170a55fbb7Slm66018 PR0("Adjusting minor version from %u to %u", 13180a55fbb7Slm66018 ver_msg->ver_minor, vds_version[i].minor); 13190a55fbb7Slm66018 ver_msg->ver_minor = vds_version[i].minor; 13200a55fbb7Slm66018 } 13210a55fbb7Slm66018 return (B_TRUE); 13220a55fbb7Slm66018 } 13230a55fbb7Slm66018 13240a55fbb7Slm66018 /* 13250a55fbb7Slm66018 * If the message contains a higher major version number, set 13260a55fbb7Slm66018 * the message's major/minor versions to the current values 13270a55fbb7Slm66018 * and return false, so this message will get "nack"ed with 13280a55fbb7Slm66018 * these values, and the client will potentially try again 13290a55fbb7Slm66018 * with the same or a lower version 13300a55fbb7Slm66018 */ 13310a55fbb7Slm66018 if (ver_msg->ver_major > vds_version[i].major) { 13320a55fbb7Slm66018 ver_msg->ver_major = vds_version[i].major; 13330a55fbb7Slm66018 ver_msg->ver_minor = vds_version[i].minor; 13340a55fbb7Slm66018 return (B_FALSE); 13350a55fbb7Slm66018 } 13360a55fbb7Slm66018 13370a55fbb7Slm66018 /* 13380a55fbb7Slm66018 * Otherwise, the message's major version is less than the 13390a55fbb7Slm66018 * current major version, so continue the loop to the next 13400a55fbb7Slm66018 * (lower) supported version 13410a55fbb7Slm66018 */ 13420a55fbb7Slm66018 } 13430a55fbb7Slm66018 13440a55fbb7Slm66018 /* 13450a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 13460a55fbb7Slm66018 * message to terminate negotiation 13470a55fbb7Slm66018 */ 13480a55fbb7Slm66018 ver_msg->ver_major = 0; 13490a55fbb7Slm66018 ver_msg->ver_minor = 0; 13500a55fbb7Slm66018 return (B_FALSE); 13510a55fbb7Slm66018 } 13520a55fbb7Slm66018 13530a55fbb7Slm66018 /* 13540a55fbb7Slm66018 * Process a version message from a client. vds expects to receive version 13550a55fbb7Slm66018 * messages from clients seeking service, but never issues version messages 13560a55fbb7Slm66018 * itself; therefore, vds can ACK or NACK client version messages, but does 13570a55fbb7Slm66018 * not expect to receive version-message ACKs or NACKs (and will treat such 13580a55fbb7Slm66018 * messages as invalid). 13590a55fbb7Slm66018 */ 13601ae08745Sheppo static int 13610a55fbb7Slm66018 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 13621ae08745Sheppo { 13631ae08745Sheppo vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; 13641ae08745Sheppo 13651ae08745Sheppo 13661ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 13671ae08745Sheppo 13681ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 13691ae08745Sheppo VIO_VER_INFO)) { 13701ae08745Sheppo return (ENOMSG); /* not a version message */ 13711ae08745Sheppo } 13721ae08745Sheppo 13731ae08745Sheppo if (msglen != sizeof (*ver_msg)) { 13743af08d82Slm66018 PR0("Expected %lu-byte version message; " 13751ae08745Sheppo "received %lu bytes", sizeof (*ver_msg), msglen); 13761ae08745Sheppo return (EBADMSG); 13771ae08745Sheppo } 13781ae08745Sheppo 13791ae08745Sheppo if (ver_msg->dev_class != VDEV_DISK) { 13803af08d82Slm66018 PR0("Expected device class %u (disk); received %u", 13811ae08745Sheppo VDEV_DISK, ver_msg->dev_class); 13821ae08745Sheppo return (EBADMSG); 13831ae08745Sheppo } 13841ae08745Sheppo 13850a55fbb7Slm66018 /* 13860a55fbb7Slm66018 * We're talking to the expected kind of client; set our device class 13870a55fbb7Slm66018 * for "ack/nack" back to the client 13880a55fbb7Slm66018 */ 13891ae08745Sheppo ver_msg->dev_class = VDEV_DISK_SERVER; 13900a55fbb7Slm66018 13910a55fbb7Slm66018 /* 13920a55fbb7Slm66018 * Check whether the (valid) version message specifies a version 13930a55fbb7Slm66018 * supported by this server. If the version is not supported, return 13940a55fbb7Slm66018 * EBADMSG so the message will get "nack"ed; vds_supported_version() 13950a55fbb7Slm66018 * will have updated the message with a supported version for the 13960a55fbb7Slm66018 * client to consider 13970a55fbb7Slm66018 */ 13980a55fbb7Slm66018 if (!vds_supported_version(ver_msg)) 13990a55fbb7Slm66018 return (EBADMSG); 14000a55fbb7Slm66018 14010a55fbb7Slm66018 14020a55fbb7Slm66018 /* 14030a55fbb7Slm66018 * A version has been agreed upon; use the client's SID for 14040a55fbb7Slm66018 * communication on this channel now 14050a55fbb7Slm66018 */ 14060a55fbb7Slm66018 ASSERT(!(vd->initialized & VD_SID)); 14070a55fbb7Slm66018 vd->sid = ver_msg->tag.vio_sid; 14080a55fbb7Slm66018 vd->initialized |= VD_SID; 14090a55fbb7Slm66018 14100a55fbb7Slm66018 /* 14110a55fbb7Slm66018 * When multiple versions are supported, this function should store 14120a55fbb7Slm66018 * the negotiated major and minor version values in the "vd" data 14130a55fbb7Slm66018 * structure to govern further communication; in particular, note that 14140a55fbb7Slm66018 * the client might have specified a lower minor version for the 14150a55fbb7Slm66018 * agreed major version than specifed in the vds_version[] array. The 14160a55fbb7Slm66018 * following assertions should help remind future maintainers to make 14170a55fbb7Slm66018 * the appropriate changes to support multiple versions. 14180a55fbb7Slm66018 */ 14190a55fbb7Slm66018 ASSERT(vds_num_versions == 1); 14200a55fbb7Slm66018 ASSERT(ver_msg->ver_major == vds_version[0].major); 14210a55fbb7Slm66018 ASSERT(ver_msg->ver_minor == vds_version[0].minor); 14220a55fbb7Slm66018 14230a55fbb7Slm66018 PR0("Using major version %u, minor version %u", 14240a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 14251ae08745Sheppo return (0); 14261ae08745Sheppo } 14271ae08745Sheppo 14281ae08745Sheppo static int 14291ae08745Sheppo vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 14301ae08745Sheppo { 14311ae08745Sheppo vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg; 1432*3c96341aSnarayan int status, retry = 0; 14331ae08745Sheppo 14341ae08745Sheppo 14351ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 14361ae08745Sheppo 14371ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 14381ae08745Sheppo VIO_ATTR_INFO)) { 1439d10e4ef2Snarayan PR0("Message is not an attribute message"); 1440d10e4ef2Snarayan return (ENOMSG); 14411ae08745Sheppo } 14421ae08745Sheppo 14431ae08745Sheppo if (msglen != sizeof (*attr_msg)) { 14443af08d82Slm66018 PR0("Expected %lu-byte attribute message; " 14451ae08745Sheppo "received %lu bytes", sizeof (*attr_msg), msglen); 14461ae08745Sheppo return (EBADMSG); 14471ae08745Sheppo } 14481ae08745Sheppo 14491ae08745Sheppo if (attr_msg->max_xfer_sz == 0) { 14503af08d82Slm66018 PR0("Received maximum transfer size of 0 from client"); 14511ae08745Sheppo return (EBADMSG); 14521ae08745Sheppo } 14531ae08745Sheppo 14541ae08745Sheppo if ((attr_msg->xfer_mode != VIO_DESC_MODE) && 14551ae08745Sheppo (attr_msg->xfer_mode != VIO_DRING_MODE)) { 14563af08d82Slm66018 PR0("Client requested unsupported transfer mode"); 14571ae08745Sheppo return (EBADMSG); 14581ae08745Sheppo } 14591ae08745Sheppo 1460*3c96341aSnarayan /* 1461*3c96341aSnarayan * check if the underlying disk is ready, if not try accessing 1462*3c96341aSnarayan * the device again. Open the vdisk device and extract info 1463*3c96341aSnarayan * about it, as this is needed to respond to the attr info msg 1464*3c96341aSnarayan */ 1465*3c96341aSnarayan if ((vd->initialized & VD_DISK_READY) == 0) { 1466*3c96341aSnarayan PR0("Retry setting up disk (%s)", vd->device_path); 1467*3c96341aSnarayan do { 1468*3c96341aSnarayan status = vd_setup_vd(vd); 1469*3c96341aSnarayan if (status != EAGAIN || ++retry > vds_dev_retries) 1470*3c96341aSnarayan break; 1471*3c96341aSnarayan 1472*3c96341aSnarayan /* incremental delay */ 1473*3c96341aSnarayan delay(drv_usectohz(vds_dev_delay)); 1474*3c96341aSnarayan 1475*3c96341aSnarayan /* if vdisk is no longer enabled - return error */ 1476*3c96341aSnarayan if (!vd_enabled(vd)) 1477*3c96341aSnarayan return (ENXIO); 1478*3c96341aSnarayan 1479*3c96341aSnarayan } while (status == EAGAIN); 1480*3c96341aSnarayan 1481*3c96341aSnarayan if (status) 1482*3c96341aSnarayan return (ENXIO); 1483*3c96341aSnarayan 1484*3c96341aSnarayan vd->initialized |= VD_DISK_READY; 1485*3c96341aSnarayan ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 1486*3c96341aSnarayan PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u", 1487*3c96341aSnarayan ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 1488*3c96341aSnarayan (vd->pseudo ? "yes" : "no"), 1489*3c96341aSnarayan (vd->file ? "yes" : "no"), 1490*3c96341aSnarayan vd->nslices); 1491*3c96341aSnarayan } 1492*3c96341aSnarayan 14931ae08745Sheppo /* Success: valid message and transfer mode */ 14941ae08745Sheppo vd->xfer_mode = attr_msg->xfer_mode; 14953af08d82Slm66018 14961ae08745Sheppo if (vd->xfer_mode == VIO_DESC_MODE) { 14973af08d82Slm66018 14981ae08745Sheppo /* 14991ae08745Sheppo * The vd_dring_inband_msg_t contains one cookie; need room 15001ae08745Sheppo * for up to n-1 more cookies, where "n" is the number of full 15011ae08745Sheppo * pages plus possibly one partial page required to cover 15021ae08745Sheppo * "max_xfer_sz". Add room for one more cookie if 15031ae08745Sheppo * "max_xfer_sz" isn't an integral multiple of the page size. 15041ae08745Sheppo * Must first get the maximum transfer size in bytes. 15051ae08745Sheppo */ 15061ae08745Sheppo size_t max_xfer_bytes = attr_msg->vdisk_block_size ? 15071ae08745Sheppo attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : 15081ae08745Sheppo attr_msg->max_xfer_sz; 15091ae08745Sheppo size_t max_inband_msglen = 15101ae08745Sheppo sizeof (vd_dring_inband_msg_t) + 15111ae08745Sheppo ((max_xfer_bytes/PAGESIZE + 15121ae08745Sheppo ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* 15131ae08745Sheppo (sizeof (ldc_mem_cookie_t))); 15141ae08745Sheppo 15151ae08745Sheppo /* 15161ae08745Sheppo * Set the maximum expected message length to 15171ae08745Sheppo * accommodate in-band-descriptor messages with all 15181ae08745Sheppo * their cookies 15191ae08745Sheppo */ 15201ae08745Sheppo vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen); 1521d10e4ef2Snarayan 1522d10e4ef2Snarayan /* 1523d10e4ef2Snarayan * Initialize the data structure for processing in-band I/O 1524d10e4ef2Snarayan * request descriptors 1525d10e4ef2Snarayan */ 1526d10e4ef2Snarayan vd->inband_task.vd = vd; 15273af08d82Slm66018 vd->inband_task.msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 1528d10e4ef2Snarayan vd->inband_task.index = 0; 1529d10e4ef2Snarayan vd->inband_task.type = VD_FINAL_RANGE_TASK; /* range == 1 */ 15301ae08745Sheppo } 15311ae08745Sheppo 1532e1ebb9ecSlm66018 /* Return the device's block size and max transfer size to the client */ 1533e1ebb9ecSlm66018 attr_msg->vdisk_block_size = DEV_BSIZE; 1534e1ebb9ecSlm66018 attr_msg->max_xfer_sz = vd->max_xfer_sz; 1535e1ebb9ecSlm66018 15361ae08745Sheppo attr_msg->vdisk_size = vd->vdisk_size; 15371ae08745Sheppo attr_msg->vdisk_type = vd->vdisk_type; 15381ae08745Sheppo attr_msg->operations = vds_operations; 15391ae08745Sheppo PR0("%s", VD_CLIENT(vd)); 15403af08d82Slm66018 15413af08d82Slm66018 ASSERT(vd->dring_task == NULL); 15423af08d82Slm66018 15431ae08745Sheppo return (0); 15441ae08745Sheppo } 15451ae08745Sheppo 15461ae08745Sheppo static int 15471ae08745Sheppo vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 15481ae08745Sheppo { 15491ae08745Sheppo int status; 15501ae08745Sheppo size_t expected; 15511ae08745Sheppo ldc_mem_info_t dring_minfo; 15521ae08745Sheppo vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg; 15531ae08745Sheppo 15541ae08745Sheppo 15551ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 15561ae08745Sheppo 15571ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 15581ae08745Sheppo VIO_DRING_REG)) { 1559d10e4ef2Snarayan PR0("Message is not a register-dring message"); 1560d10e4ef2Snarayan return (ENOMSG); 15611ae08745Sheppo } 15621ae08745Sheppo 15631ae08745Sheppo if (msglen < sizeof (*reg_msg)) { 15643af08d82Slm66018 PR0("Expected at least %lu-byte register-dring message; " 15651ae08745Sheppo "received %lu bytes", sizeof (*reg_msg), msglen); 15661ae08745Sheppo return (EBADMSG); 15671ae08745Sheppo } 15681ae08745Sheppo 15691ae08745Sheppo expected = sizeof (*reg_msg) + 15701ae08745Sheppo (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0])); 15711ae08745Sheppo if (msglen != expected) { 15723af08d82Slm66018 PR0("Expected %lu-byte register-dring message; " 15731ae08745Sheppo "received %lu bytes", expected, msglen); 15741ae08745Sheppo return (EBADMSG); 15751ae08745Sheppo } 15761ae08745Sheppo 15771ae08745Sheppo if (vd->initialized & VD_DRING) { 15783af08d82Slm66018 PR0("A dring was previously registered; only support one"); 15791ae08745Sheppo return (EBADMSG); 15801ae08745Sheppo } 15811ae08745Sheppo 1582d10e4ef2Snarayan if (reg_msg->num_descriptors > INT32_MAX) { 15833af08d82Slm66018 PR0("reg_msg->num_descriptors = %u; must be <= %u (%s)", 1584d10e4ef2Snarayan reg_msg->ncookies, INT32_MAX, STRINGIZE(INT32_MAX)); 1585d10e4ef2Snarayan return (EBADMSG); 1586d10e4ef2Snarayan } 1587d10e4ef2Snarayan 15881ae08745Sheppo if (reg_msg->ncookies != 1) { 15891ae08745Sheppo /* 15901ae08745Sheppo * In addition to fixing the assertion in the success case 15911ae08745Sheppo * below, supporting drings which require more than one 15921ae08745Sheppo * "cookie" requires increasing the value of vd->max_msglen 15931ae08745Sheppo * somewhere in the code path prior to receiving the message 15941ae08745Sheppo * which results in calling this function. Note that without 15951ae08745Sheppo * making this change, the larger message size required to 15961ae08745Sheppo * accommodate multiple cookies cannot be successfully 15971ae08745Sheppo * received, so this function will not even get called. 15981ae08745Sheppo * Gracefully accommodating more dring cookies might 15991ae08745Sheppo * reasonably demand exchanging an additional attribute or 16001ae08745Sheppo * making a minor protocol adjustment 16011ae08745Sheppo */ 16023af08d82Slm66018 PR0("reg_msg->ncookies = %u != 1", reg_msg->ncookies); 16031ae08745Sheppo return (EBADMSG); 16041ae08745Sheppo } 16051ae08745Sheppo 16061ae08745Sheppo status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie, 16071ae08745Sheppo reg_msg->ncookies, reg_msg->num_descriptors, 16084bac2208Snarayan reg_msg->descriptor_size, LDC_DIRECT_MAP, &vd->dring_handle); 16091ae08745Sheppo if (status != 0) { 16103af08d82Slm66018 PR0("ldc_mem_dring_map() returned errno %d", status); 16111ae08745Sheppo return (status); 16121ae08745Sheppo } 16131ae08745Sheppo 16141ae08745Sheppo /* 16151ae08745Sheppo * To remove the need for this assertion, must call 16161ae08745Sheppo * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a 16171ae08745Sheppo * successful call to ldc_mem_dring_map() 16181ae08745Sheppo */ 16191ae08745Sheppo ASSERT(reg_msg->ncookies == 1); 16201ae08745Sheppo 16211ae08745Sheppo if ((status = 16221ae08745Sheppo ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { 16233af08d82Slm66018 PR0("ldc_mem_dring_info() returned errno %d", status); 16241ae08745Sheppo if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) 16253af08d82Slm66018 PR0("ldc_mem_dring_unmap() returned errno %d", status); 16261ae08745Sheppo return (status); 16271ae08745Sheppo } 16281ae08745Sheppo 16291ae08745Sheppo if (dring_minfo.vaddr == NULL) { 16303af08d82Slm66018 PR0("Descriptor ring virtual address is NULL"); 16310a55fbb7Slm66018 return (ENXIO); 16321ae08745Sheppo } 16331ae08745Sheppo 16341ae08745Sheppo 1635d10e4ef2Snarayan /* Initialize for valid message and mapped dring */ 16361ae08745Sheppo PR1("descriptor size = %u, dring length = %u", 16371ae08745Sheppo vd->descriptor_size, vd->dring_len); 16381ae08745Sheppo vd->initialized |= VD_DRING; 16391ae08745Sheppo vd->dring_ident = 1; /* "There Can Be Only One" */ 16401ae08745Sheppo vd->dring = dring_minfo.vaddr; 16411ae08745Sheppo vd->descriptor_size = reg_msg->descriptor_size; 16421ae08745Sheppo vd->dring_len = reg_msg->num_descriptors; 16431ae08745Sheppo reg_msg->dring_ident = vd->dring_ident; 1644d10e4ef2Snarayan 1645d10e4ef2Snarayan /* 1646d10e4ef2Snarayan * Allocate and initialize a "shadow" array of data structures for 1647d10e4ef2Snarayan * tasks to process I/O requests in dring elements 1648d10e4ef2Snarayan */ 1649d10e4ef2Snarayan vd->dring_task = 1650d10e4ef2Snarayan kmem_zalloc((sizeof (*vd->dring_task)) * vd->dring_len, KM_SLEEP); 1651d10e4ef2Snarayan for (int i = 0; i < vd->dring_len; i++) { 1652d10e4ef2Snarayan vd->dring_task[i].vd = vd; 1653d10e4ef2Snarayan vd->dring_task[i].index = i; 1654d10e4ef2Snarayan vd->dring_task[i].request = &VD_DRING_ELEM(i)->payload; 16554bac2208Snarayan 16564bac2208Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, 16574bac2208Snarayan &(vd->dring_task[i].mhdl)); 16584bac2208Snarayan if (status) { 16593af08d82Slm66018 PR0("ldc_mem_alloc_handle() returned err %d ", status); 16604bac2208Snarayan return (ENXIO); 16614bac2208Snarayan } 16623af08d82Slm66018 16633af08d82Slm66018 vd->dring_task[i].msg = kmem_alloc(vd->max_msglen, KM_SLEEP); 1664d10e4ef2Snarayan } 1665d10e4ef2Snarayan 16661ae08745Sheppo return (0); 16671ae08745Sheppo } 16681ae08745Sheppo 16691ae08745Sheppo static int 16701ae08745Sheppo vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 16711ae08745Sheppo { 16721ae08745Sheppo vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg; 16731ae08745Sheppo 16741ae08745Sheppo 16751ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 16761ae08745Sheppo 16771ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, 16781ae08745Sheppo VIO_DRING_UNREG)) { 1679d10e4ef2Snarayan PR0("Message is not an unregister-dring message"); 1680d10e4ef2Snarayan return (ENOMSG); 16811ae08745Sheppo } 16821ae08745Sheppo 16831ae08745Sheppo if (msglen != sizeof (*unreg_msg)) { 16843af08d82Slm66018 PR0("Expected %lu-byte unregister-dring message; " 16851ae08745Sheppo "received %lu bytes", sizeof (*unreg_msg), msglen); 16861ae08745Sheppo return (EBADMSG); 16871ae08745Sheppo } 16881ae08745Sheppo 16891ae08745Sheppo if (unreg_msg->dring_ident != vd->dring_ident) { 16903af08d82Slm66018 PR0("Expected dring ident %lu; received %lu", 16911ae08745Sheppo vd->dring_ident, unreg_msg->dring_ident); 16921ae08745Sheppo return (EBADMSG); 16931ae08745Sheppo } 16941ae08745Sheppo 16951ae08745Sheppo return (0); 16961ae08745Sheppo } 16971ae08745Sheppo 16981ae08745Sheppo static int 16991ae08745Sheppo process_rdx_msg(vio_msg_t *msg, size_t msglen) 17001ae08745Sheppo { 17011ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 17021ae08745Sheppo 1703d10e4ef2Snarayan if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) { 1704d10e4ef2Snarayan PR0("Message is not an RDX message"); 1705d10e4ef2Snarayan return (ENOMSG); 1706d10e4ef2Snarayan } 17071ae08745Sheppo 17081ae08745Sheppo if (msglen != sizeof (vio_rdx_msg_t)) { 17093af08d82Slm66018 PR0("Expected %lu-byte RDX message; received %lu bytes", 17101ae08745Sheppo sizeof (vio_rdx_msg_t), msglen); 17111ae08745Sheppo return (EBADMSG); 17121ae08745Sheppo } 17131ae08745Sheppo 1714d10e4ef2Snarayan PR0("Valid RDX message"); 17151ae08745Sheppo return (0); 17161ae08745Sheppo } 17171ae08745Sheppo 17181ae08745Sheppo static int 17191ae08745Sheppo vd_check_seq_num(vd_t *vd, uint64_t seq_num) 17201ae08745Sheppo { 17211ae08745Sheppo if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) { 17223af08d82Slm66018 PR0("Received seq_num %lu; expected %lu", 17231ae08745Sheppo seq_num, (vd->seq_num + 1)); 17243af08d82Slm66018 PR0("initiating soft reset"); 1725d10e4ef2Snarayan vd_need_reset(vd, B_FALSE); 17261ae08745Sheppo return (1); 17271ae08745Sheppo } 17281ae08745Sheppo 17291ae08745Sheppo vd->seq_num = seq_num; 17301ae08745Sheppo vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */ 17311ae08745Sheppo return (0); 17321ae08745Sheppo } 17331ae08745Sheppo 17341ae08745Sheppo /* 17351ae08745Sheppo * Return the expected size of an inband-descriptor message with all the 17361ae08745Sheppo * cookies it claims to include 17371ae08745Sheppo */ 17381ae08745Sheppo static size_t 17391ae08745Sheppo expected_inband_size(vd_dring_inband_msg_t *msg) 17401ae08745Sheppo { 17411ae08745Sheppo return ((sizeof (*msg)) + 17421ae08745Sheppo (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0]))); 17431ae08745Sheppo } 17441ae08745Sheppo 17451ae08745Sheppo /* 17461ae08745Sheppo * Process an in-band descriptor message: used with clients like OBP, with 17471ae08745Sheppo * which vds exchanges descriptors within VIO message payloads, rather than 17481ae08745Sheppo * operating on them within a descriptor ring 17491ae08745Sheppo */ 17501ae08745Sheppo static int 17513af08d82Slm66018 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 17521ae08745Sheppo { 17531ae08745Sheppo size_t expected; 17541ae08745Sheppo vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg; 17551ae08745Sheppo 17561ae08745Sheppo 17571ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 17581ae08745Sheppo 17591ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 1760d10e4ef2Snarayan VIO_DESC_DATA)) { 1761d10e4ef2Snarayan PR1("Message is not an in-band-descriptor message"); 1762d10e4ef2Snarayan return (ENOMSG); 1763d10e4ef2Snarayan } 17641ae08745Sheppo 17651ae08745Sheppo if (msglen < sizeof (*desc_msg)) { 17663af08d82Slm66018 PR0("Expected at least %lu-byte descriptor message; " 17671ae08745Sheppo "received %lu bytes", sizeof (*desc_msg), msglen); 17681ae08745Sheppo return (EBADMSG); 17691ae08745Sheppo } 17701ae08745Sheppo 17711ae08745Sheppo if (msglen != (expected = expected_inband_size(desc_msg))) { 17723af08d82Slm66018 PR0("Expected %lu-byte descriptor message; " 17731ae08745Sheppo "received %lu bytes", expected, msglen); 17741ae08745Sheppo return (EBADMSG); 17751ae08745Sheppo } 17761ae08745Sheppo 1777d10e4ef2Snarayan if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) 17781ae08745Sheppo return (EBADMSG); 17791ae08745Sheppo 1780d10e4ef2Snarayan /* 1781d10e4ef2Snarayan * Valid message: Set up the in-band descriptor task and process the 1782d10e4ef2Snarayan * request. Arrange to acknowledge the client's message, unless an 1783d10e4ef2Snarayan * error processing the descriptor task results in setting 1784d10e4ef2Snarayan * VIO_SUBTYPE_NACK 1785d10e4ef2Snarayan */ 1786d10e4ef2Snarayan PR1("Valid in-band-descriptor message"); 1787d10e4ef2Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 17883af08d82Slm66018 17893af08d82Slm66018 ASSERT(vd->inband_task.msg != NULL); 17903af08d82Slm66018 17913af08d82Slm66018 bcopy(msg, vd->inband_task.msg, msglen); 1792d10e4ef2Snarayan vd->inband_task.msglen = msglen; 17933af08d82Slm66018 17943af08d82Slm66018 /* 17953af08d82Slm66018 * The task request is now the payload of the message 17963af08d82Slm66018 * that was just copied into the body of the task. 17973af08d82Slm66018 */ 17983af08d82Slm66018 desc_msg = (vd_dring_inband_msg_t *)vd->inband_task.msg; 1799d10e4ef2Snarayan vd->inband_task.request = &desc_msg->payload; 18003af08d82Slm66018 1801d10e4ef2Snarayan return (vd_process_task(&vd->inband_task)); 18021ae08745Sheppo } 18031ae08745Sheppo 18041ae08745Sheppo static int 1805d10e4ef2Snarayan vd_process_element(vd_t *vd, vd_task_type_t type, uint32_t idx, 18063af08d82Slm66018 vio_msg_t *msg, size_t msglen) 18071ae08745Sheppo { 18081ae08745Sheppo int status; 1809d10e4ef2Snarayan boolean_t ready; 1810d10e4ef2Snarayan vd_dring_entry_t *elem = VD_DRING_ELEM(idx); 18111ae08745Sheppo 18121ae08745Sheppo 1813d10e4ef2Snarayan /* Accept the updated dring element */ 1814d10e4ef2Snarayan if ((status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) { 18153af08d82Slm66018 PR0("ldc_mem_dring_acquire() returned errno %d", status); 18161ae08745Sheppo return (status); 18171ae08745Sheppo } 1818d10e4ef2Snarayan ready = (elem->hdr.dstate == VIO_DESC_READY); 1819d10e4ef2Snarayan if (ready) { 1820d10e4ef2Snarayan elem->hdr.dstate = VIO_DESC_ACCEPTED; 1821d10e4ef2Snarayan } else { 18223af08d82Slm66018 PR0("descriptor %u not ready", idx); 1823d10e4ef2Snarayan VD_DUMP_DRING_ELEM(elem); 1824d10e4ef2Snarayan } 1825d10e4ef2Snarayan if ((status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) { 18263af08d82Slm66018 PR0("ldc_mem_dring_release() returned errno %d", status); 18271ae08745Sheppo return (status); 18281ae08745Sheppo } 1829d10e4ef2Snarayan if (!ready) 1830d10e4ef2Snarayan return (EBUSY); 18311ae08745Sheppo 18321ae08745Sheppo 1833d10e4ef2Snarayan /* Initialize a task and process the accepted element */ 1834d10e4ef2Snarayan PR1("Processing dring element %u", idx); 1835d10e4ef2Snarayan vd->dring_task[idx].type = type; 18363af08d82Slm66018 18373af08d82Slm66018 /* duplicate msg buf for cookies etc. */ 18383af08d82Slm66018 bcopy(msg, vd->dring_task[idx].msg, msglen); 18393af08d82Slm66018 1840d10e4ef2Snarayan vd->dring_task[idx].msglen = msglen; 1841d10e4ef2Snarayan if ((status = vd_process_task(&vd->dring_task[idx])) != EINPROGRESS) 1842*3c96341aSnarayan status = vd_mark_elem_done(vd, idx, 1843*3c96341aSnarayan vd->dring_task[idx].request->status, 1844*3c96341aSnarayan vd->dring_task[idx].request->nbytes); 18451ae08745Sheppo 18461ae08745Sheppo return (status); 18471ae08745Sheppo } 18481ae08745Sheppo 18491ae08745Sheppo static int 1850d10e4ef2Snarayan vd_process_element_range(vd_t *vd, int start, int end, 18513af08d82Slm66018 vio_msg_t *msg, size_t msglen) 1852d10e4ef2Snarayan { 1853d10e4ef2Snarayan int i, n, nelem, status = 0; 1854d10e4ef2Snarayan boolean_t inprogress = B_FALSE; 1855d10e4ef2Snarayan vd_task_type_t type; 1856d10e4ef2Snarayan 1857d10e4ef2Snarayan 1858d10e4ef2Snarayan ASSERT(start >= 0); 1859d10e4ef2Snarayan ASSERT(end >= 0); 1860d10e4ef2Snarayan 1861d10e4ef2Snarayan /* 1862d10e4ef2Snarayan * Arrange to acknowledge the client's message, unless an error 1863d10e4ef2Snarayan * processing one of the dring elements results in setting 1864d10e4ef2Snarayan * VIO_SUBTYPE_NACK 1865d10e4ef2Snarayan */ 1866d10e4ef2Snarayan msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 1867d10e4ef2Snarayan 1868d10e4ef2Snarayan /* 1869d10e4ef2Snarayan * Process the dring elements in the range 1870d10e4ef2Snarayan */ 1871d10e4ef2Snarayan nelem = ((end < start) ? end + vd->dring_len : end) - start + 1; 1872d10e4ef2Snarayan for (i = start, n = nelem; n > 0; i = (i + 1) % vd->dring_len, n--) { 1873d10e4ef2Snarayan ((vio_dring_msg_t *)msg)->end_idx = i; 1874d10e4ef2Snarayan type = (n == 1) ? VD_FINAL_RANGE_TASK : VD_NONFINAL_RANGE_TASK; 18753af08d82Slm66018 status = vd_process_element(vd, type, i, msg, msglen); 1876d10e4ef2Snarayan if (status == EINPROGRESS) 1877d10e4ef2Snarayan inprogress = B_TRUE; 1878d10e4ef2Snarayan else if (status != 0) 1879d10e4ef2Snarayan break; 1880d10e4ef2Snarayan } 1881d10e4ef2Snarayan 1882d10e4ef2Snarayan /* 1883d10e4ef2Snarayan * If some, but not all, operations of a multi-element range are in 1884d10e4ef2Snarayan * progress, wait for other operations to complete before returning 1885d10e4ef2Snarayan * (which will result in "ack" or "nack" of the message). Note that 1886d10e4ef2Snarayan * all outstanding operations will need to complete, not just the ones 1887d10e4ef2Snarayan * corresponding to the current range of dring elements; howevever, as 1888d10e4ef2Snarayan * this situation is an error case, performance is less critical. 1889d10e4ef2Snarayan */ 1890d10e4ef2Snarayan if ((nelem > 1) && (status != EINPROGRESS) && inprogress) 1891d10e4ef2Snarayan ddi_taskq_wait(vd->completionq); 1892d10e4ef2Snarayan 1893d10e4ef2Snarayan return (status); 1894d10e4ef2Snarayan } 1895d10e4ef2Snarayan 1896d10e4ef2Snarayan static int 18973af08d82Slm66018 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 18981ae08745Sheppo { 18991ae08745Sheppo vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg; 19001ae08745Sheppo 19011ae08745Sheppo 19021ae08745Sheppo ASSERT(msglen >= sizeof (msg->tag)); 19031ae08745Sheppo 19041ae08745Sheppo if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, 19051ae08745Sheppo VIO_DRING_DATA)) { 1906d10e4ef2Snarayan PR1("Message is not a dring-data message"); 1907d10e4ef2Snarayan return (ENOMSG); 19081ae08745Sheppo } 19091ae08745Sheppo 19101ae08745Sheppo if (msglen != sizeof (*dring_msg)) { 19113af08d82Slm66018 PR0("Expected %lu-byte dring message; received %lu bytes", 19121ae08745Sheppo sizeof (*dring_msg), msglen); 19131ae08745Sheppo return (EBADMSG); 19141ae08745Sheppo } 19151ae08745Sheppo 1916d10e4ef2Snarayan if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) 19171ae08745Sheppo return (EBADMSG); 19181ae08745Sheppo 19191ae08745Sheppo if (dring_msg->dring_ident != vd->dring_ident) { 19203af08d82Slm66018 PR0("Expected dring ident %lu; received ident %lu", 19211ae08745Sheppo vd->dring_ident, dring_msg->dring_ident); 19221ae08745Sheppo return (EBADMSG); 19231ae08745Sheppo } 19241ae08745Sheppo 1925d10e4ef2Snarayan if (dring_msg->start_idx >= vd->dring_len) { 19263af08d82Slm66018 PR0("\"start_idx\" = %u; must be less than %u", 1927d10e4ef2Snarayan dring_msg->start_idx, vd->dring_len); 1928d10e4ef2Snarayan return (EBADMSG); 1929d10e4ef2Snarayan } 19301ae08745Sheppo 1931d10e4ef2Snarayan if ((dring_msg->end_idx < 0) || 1932d10e4ef2Snarayan (dring_msg->end_idx >= vd->dring_len)) { 19333af08d82Slm66018 PR0("\"end_idx\" = %u; must be >= 0 and less than %u", 1934d10e4ef2Snarayan dring_msg->end_idx, vd->dring_len); 1935d10e4ef2Snarayan return (EBADMSG); 1936d10e4ef2Snarayan } 1937d10e4ef2Snarayan 1938d10e4ef2Snarayan /* Valid message; process range of updated dring elements */ 1939d10e4ef2Snarayan PR1("Processing descriptor range, start = %u, end = %u", 1940d10e4ef2Snarayan dring_msg->start_idx, dring_msg->end_idx); 1941d10e4ef2Snarayan return (vd_process_element_range(vd, dring_msg->start_idx, 19423af08d82Slm66018 dring_msg->end_idx, msg, msglen)); 19431ae08745Sheppo } 19441ae08745Sheppo 19451ae08745Sheppo static int 19461ae08745Sheppo recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes) 19471ae08745Sheppo { 19481ae08745Sheppo int retry, status; 19491ae08745Sheppo size_t size = *nbytes; 19501ae08745Sheppo 19511ae08745Sheppo 19521ae08745Sheppo for (retry = 0, status = ETIMEDOUT; 19531ae08745Sheppo retry < vds_ldc_retries && status == ETIMEDOUT; 19541ae08745Sheppo retry++) { 19551ae08745Sheppo PR1("ldc_read() attempt %d", (retry + 1)); 19561ae08745Sheppo *nbytes = size; 19571ae08745Sheppo status = ldc_read(ldc_handle, msg, nbytes); 19581ae08745Sheppo } 19591ae08745Sheppo 19603af08d82Slm66018 if (status) { 19613af08d82Slm66018 PR0("ldc_read() returned errno %d", status); 19623af08d82Slm66018 if (status != ECONNRESET) 19633af08d82Slm66018 return (ENOMSG); 19641ae08745Sheppo return (status); 19651ae08745Sheppo } else if (*nbytes == 0) { 19661ae08745Sheppo PR1("ldc_read() returned 0 and no message read"); 19671ae08745Sheppo return (ENOMSG); 19681ae08745Sheppo } 19691ae08745Sheppo 19701ae08745Sheppo PR1("RCVD %lu-byte message", *nbytes); 19711ae08745Sheppo return (0); 19721ae08745Sheppo } 19731ae08745Sheppo 19741ae08745Sheppo static int 19753af08d82Slm66018 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 19761ae08745Sheppo { 19771ae08745Sheppo int status; 19781ae08745Sheppo 19791ae08745Sheppo 19801ae08745Sheppo PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype, 19811ae08745Sheppo msg->tag.vio_subtype, msg->tag.vio_subtype_env); 19823af08d82Slm66018 #ifdef DEBUG 19833af08d82Slm66018 vd_decode_tag(msg); 19843af08d82Slm66018 #endif 19851ae08745Sheppo 19861ae08745Sheppo /* 19871ae08745Sheppo * Validate session ID up front, since it applies to all messages 19881ae08745Sheppo * once set 19891ae08745Sheppo */ 19901ae08745Sheppo if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) { 19913af08d82Slm66018 PR0("Expected SID %u, received %u", vd->sid, 19921ae08745Sheppo msg->tag.vio_sid); 19931ae08745Sheppo return (EBADMSG); 19941ae08745Sheppo } 19951ae08745Sheppo 19963af08d82Slm66018 PR1("\tWhile in state %d (%s)", vd->state, vd_decode_state(vd->state)); 19971ae08745Sheppo 19981ae08745Sheppo /* 19991ae08745Sheppo * Process the received message based on connection state 20001ae08745Sheppo */ 20011ae08745Sheppo switch (vd->state) { 20021ae08745Sheppo case VD_STATE_INIT: /* expect version message */ 20030a55fbb7Slm66018 if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) 20041ae08745Sheppo return (status); 20051ae08745Sheppo 20061ae08745Sheppo /* Version negotiated, move to that state */ 20071ae08745Sheppo vd->state = VD_STATE_VER; 20081ae08745Sheppo return (0); 20091ae08745Sheppo 20101ae08745Sheppo case VD_STATE_VER: /* expect attribute message */ 20111ae08745Sheppo if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0) 20121ae08745Sheppo return (status); 20131ae08745Sheppo 20141ae08745Sheppo /* Attributes exchanged, move to that state */ 20151ae08745Sheppo vd->state = VD_STATE_ATTR; 20161ae08745Sheppo return (0); 20171ae08745Sheppo 20181ae08745Sheppo case VD_STATE_ATTR: 20191ae08745Sheppo switch (vd->xfer_mode) { 20201ae08745Sheppo case VIO_DESC_MODE: /* expect RDX message */ 20211ae08745Sheppo if ((status = process_rdx_msg(msg, msglen)) != 0) 20221ae08745Sheppo return (status); 20231ae08745Sheppo 20241ae08745Sheppo /* Ready to receive in-band descriptors */ 20251ae08745Sheppo vd->state = VD_STATE_DATA; 20261ae08745Sheppo return (0); 20271ae08745Sheppo 20281ae08745Sheppo case VIO_DRING_MODE: /* expect register-dring message */ 20291ae08745Sheppo if ((status = 20301ae08745Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != 0) 20311ae08745Sheppo return (status); 20321ae08745Sheppo 20331ae08745Sheppo /* One dring negotiated, move to that state */ 20341ae08745Sheppo vd->state = VD_STATE_DRING; 20351ae08745Sheppo return (0); 20361ae08745Sheppo 20371ae08745Sheppo default: 20381ae08745Sheppo ASSERT("Unsupported transfer mode"); 20393af08d82Slm66018 PR0("Unsupported transfer mode"); 20401ae08745Sheppo return (ENOTSUP); 20411ae08745Sheppo } 20421ae08745Sheppo 20431ae08745Sheppo case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */ 20441ae08745Sheppo if ((status = process_rdx_msg(msg, msglen)) == 0) { 20451ae08745Sheppo /* Ready to receive data */ 20461ae08745Sheppo vd->state = VD_STATE_DATA; 20471ae08745Sheppo return (0); 20481ae08745Sheppo } else if (status != ENOMSG) { 20491ae08745Sheppo return (status); 20501ae08745Sheppo } 20511ae08745Sheppo 20521ae08745Sheppo 20531ae08745Sheppo /* 20541ae08745Sheppo * If another register-dring message is received, stay in 20551ae08745Sheppo * dring state in case the client sends RDX; although the 20561ae08745Sheppo * protocol allows multiple drings, this server does not 20571ae08745Sheppo * support using more than one 20581ae08745Sheppo */ 20591ae08745Sheppo if ((status = 20601ae08745Sheppo vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) 20611ae08745Sheppo return (status); 20621ae08745Sheppo 20631ae08745Sheppo /* 20641ae08745Sheppo * Acknowledge an unregister-dring message, but reset the 20651ae08745Sheppo * connection anyway: Although the protocol allows 20661ae08745Sheppo * unregistering drings, this server cannot serve a vdisk 20671ae08745Sheppo * without its only dring 20681ae08745Sheppo */ 20691ae08745Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 20701ae08745Sheppo return ((status == 0) ? ENOTSUP : status); 20711ae08745Sheppo 20721ae08745Sheppo case VD_STATE_DATA: 20731ae08745Sheppo switch (vd->xfer_mode) { 20741ae08745Sheppo case VIO_DESC_MODE: /* expect in-band-descriptor message */ 20753af08d82Slm66018 return (vd_process_desc_msg(vd, msg, msglen)); 20761ae08745Sheppo 20771ae08745Sheppo case VIO_DRING_MODE: /* expect dring-data or unreg-dring */ 20781ae08745Sheppo /* 20791ae08745Sheppo * Typically expect dring-data messages, so handle 20801ae08745Sheppo * them first 20811ae08745Sheppo */ 20821ae08745Sheppo if ((status = vd_process_dring_msg(vd, msg, 20833af08d82Slm66018 msglen)) != ENOMSG) 20841ae08745Sheppo return (status); 20851ae08745Sheppo 20861ae08745Sheppo /* 20871ae08745Sheppo * Acknowledge an unregister-dring message, but reset 20881ae08745Sheppo * the connection anyway: Although the protocol 20891ae08745Sheppo * allows unregistering drings, this server cannot 20901ae08745Sheppo * serve a vdisk without its only dring 20911ae08745Sheppo */ 20921ae08745Sheppo status = vd_process_dring_unreg_msg(vd, msg, msglen); 20931ae08745Sheppo return ((status == 0) ? ENOTSUP : status); 20941ae08745Sheppo 20951ae08745Sheppo default: 20961ae08745Sheppo ASSERT("Unsupported transfer mode"); 20973af08d82Slm66018 PR0("Unsupported transfer mode"); 20981ae08745Sheppo return (ENOTSUP); 20991ae08745Sheppo } 21001ae08745Sheppo 21011ae08745Sheppo default: 21021ae08745Sheppo ASSERT("Invalid client connection state"); 21033af08d82Slm66018 PR0("Invalid client connection state"); 21041ae08745Sheppo return (ENOTSUP); 21051ae08745Sheppo } 21061ae08745Sheppo } 21071ae08745Sheppo 2108d10e4ef2Snarayan static int 21093af08d82Slm66018 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) 21101ae08745Sheppo { 21111ae08745Sheppo int status; 21121ae08745Sheppo boolean_t reset_ldc = B_FALSE; 21131ae08745Sheppo 21141ae08745Sheppo 21151ae08745Sheppo /* 21161ae08745Sheppo * Check that the message is at least big enough for a "tag", so that 21171ae08745Sheppo * message processing can proceed based on tag-specified message type 21181ae08745Sheppo */ 21191ae08745Sheppo if (msglen < sizeof (vio_msg_tag_t)) { 21203af08d82Slm66018 PR0("Received short (%lu-byte) message", msglen); 21211ae08745Sheppo /* Can't "nack" short message, so drop the big hammer */ 21223af08d82Slm66018 PR0("initiating full reset"); 2123d10e4ef2Snarayan vd_need_reset(vd, B_TRUE); 2124d10e4ef2Snarayan return (EBADMSG); 21251ae08745Sheppo } 21261ae08745Sheppo 21271ae08745Sheppo /* 21281ae08745Sheppo * Process the message 21291ae08745Sheppo */ 21303af08d82Slm66018 switch (status = vd_do_process_msg(vd, msg, msglen)) { 21311ae08745Sheppo case 0: 21321ae08745Sheppo /* "ack" valid, successfully-processed messages */ 21331ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_ACK; 21341ae08745Sheppo break; 21351ae08745Sheppo 2136d10e4ef2Snarayan case EINPROGRESS: 2137d10e4ef2Snarayan /* The completion handler will "ack" or "nack" the message */ 2138d10e4ef2Snarayan return (EINPROGRESS); 21391ae08745Sheppo case ENOMSG: 21403af08d82Slm66018 PR0("Received unexpected message"); 21411ae08745Sheppo _NOTE(FALLTHROUGH); 21421ae08745Sheppo case EBADMSG: 21431ae08745Sheppo case ENOTSUP: 21441ae08745Sheppo /* "nack" invalid messages */ 21451ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 21461ae08745Sheppo break; 21471ae08745Sheppo 21481ae08745Sheppo default: 21491ae08745Sheppo /* "nack" failed messages */ 21501ae08745Sheppo msg->tag.vio_subtype = VIO_SUBTYPE_NACK; 21511ae08745Sheppo /* An LDC error probably occurred, so try resetting it */ 21521ae08745Sheppo reset_ldc = B_TRUE; 21531ae08745Sheppo break; 21541ae08745Sheppo } 21551ae08745Sheppo 21563af08d82Slm66018 PR1("\tResulting in state %d (%s)", vd->state, 21573af08d82Slm66018 vd_decode_state(vd->state)); 21583af08d82Slm66018 2159d10e4ef2Snarayan /* Send the "ack" or "nack" to the client */ 21601ae08745Sheppo PR1("Sending %s", 21611ae08745Sheppo (msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK"); 21621ae08745Sheppo if (send_msg(vd->ldc_handle, msg, msglen) != 0) 21631ae08745Sheppo reset_ldc = B_TRUE; 21641ae08745Sheppo 2165d10e4ef2Snarayan /* Arrange to reset the connection for nack'ed or failed messages */ 21663af08d82Slm66018 if ((status != 0) || reset_ldc) { 21673af08d82Slm66018 PR0("initiating %s reset", 21683af08d82Slm66018 (reset_ldc) ? "full" : "soft"); 2169d10e4ef2Snarayan vd_need_reset(vd, reset_ldc); 21703af08d82Slm66018 } 2171d10e4ef2Snarayan 2172d10e4ef2Snarayan return (status); 2173d10e4ef2Snarayan } 2174d10e4ef2Snarayan 2175d10e4ef2Snarayan static boolean_t 2176d10e4ef2Snarayan vd_enabled(vd_t *vd) 2177d10e4ef2Snarayan { 2178d10e4ef2Snarayan boolean_t enabled; 2179d10e4ef2Snarayan 2180d10e4ef2Snarayan 2181d10e4ef2Snarayan mutex_enter(&vd->lock); 2182d10e4ef2Snarayan enabled = vd->enabled; 2183d10e4ef2Snarayan mutex_exit(&vd->lock); 2184d10e4ef2Snarayan return (enabled); 21851ae08745Sheppo } 21861ae08745Sheppo 21871ae08745Sheppo static void 21880a55fbb7Slm66018 vd_recv_msg(void *arg) 21891ae08745Sheppo { 21901ae08745Sheppo vd_t *vd = (vd_t *)arg; 21913af08d82Slm66018 int rv = 0, status = 0; 21921ae08745Sheppo 21931ae08745Sheppo ASSERT(vd != NULL); 21943af08d82Slm66018 2195d10e4ef2Snarayan PR2("New task to receive incoming message(s)"); 21963af08d82Slm66018 21973af08d82Slm66018 2198d10e4ef2Snarayan while (vd_enabled(vd) && status == 0) { 2199d10e4ef2Snarayan size_t msglen, msgsize; 22003af08d82Slm66018 ldc_status_t lstatus; 2201d10e4ef2Snarayan 22020a55fbb7Slm66018 /* 2203d10e4ef2Snarayan * Receive and process a message 22040a55fbb7Slm66018 */ 2205d10e4ef2Snarayan vd_reset_if_needed(vd); /* can change vd->max_msglen */ 22063af08d82Slm66018 22073af08d82Slm66018 /* 22083af08d82Slm66018 * check if channel is UP - else break out of loop 22093af08d82Slm66018 */ 22103af08d82Slm66018 status = ldc_status(vd->ldc_handle, &lstatus); 22113af08d82Slm66018 if (lstatus != LDC_UP) { 22123af08d82Slm66018 PR0("channel not up (status=%d), exiting recv loop\n", 22133af08d82Slm66018 lstatus); 22143af08d82Slm66018 break; 22153af08d82Slm66018 } 22163af08d82Slm66018 22173af08d82Slm66018 ASSERT(vd->max_msglen != 0); 22183af08d82Slm66018 2219d10e4ef2Snarayan msgsize = vd->max_msglen; /* stable copy for alloc/free */ 22203af08d82Slm66018 msglen = msgsize; /* actual len after recv_msg() */ 22213af08d82Slm66018 22223af08d82Slm66018 status = recv_msg(vd->ldc_handle, vd->vio_msgp, &msglen); 22233af08d82Slm66018 switch (status) { 22243af08d82Slm66018 case 0: 22253af08d82Slm66018 rv = vd_process_msg(vd, (vio_msg_t *)vd->vio_msgp, 22263af08d82Slm66018 msglen); 22273af08d82Slm66018 /* check if max_msglen changed */ 22283af08d82Slm66018 if (msgsize != vd->max_msglen) { 22293af08d82Slm66018 PR0("max_msglen changed 0x%lx to 0x%lx bytes\n", 22303af08d82Slm66018 msgsize, vd->max_msglen); 22313af08d82Slm66018 kmem_free(vd->vio_msgp, msgsize); 22323af08d82Slm66018 vd->vio_msgp = 22333af08d82Slm66018 kmem_alloc(vd->max_msglen, KM_SLEEP); 22343af08d82Slm66018 } 22353af08d82Slm66018 if (rv == EINPROGRESS) 22363af08d82Slm66018 continue; 22373af08d82Slm66018 break; 22383af08d82Slm66018 22393af08d82Slm66018 case ENOMSG: 22403af08d82Slm66018 break; 22413af08d82Slm66018 22423af08d82Slm66018 case ECONNRESET: 22433af08d82Slm66018 PR0("initiating soft reset (ECONNRESET)\n"); 22443af08d82Slm66018 vd_need_reset(vd, B_FALSE); 22453af08d82Slm66018 status = 0; 22463af08d82Slm66018 break; 22473af08d82Slm66018 22483af08d82Slm66018 default: 2249d10e4ef2Snarayan /* Probably an LDC failure; arrange to reset it */ 22503af08d82Slm66018 PR0("initiating full reset (status=0x%x)", status); 2251d10e4ef2Snarayan vd_need_reset(vd, B_TRUE); 22523af08d82Slm66018 break; 22530a55fbb7Slm66018 } 22541ae08745Sheppo } 22553af08d82Slm66018 2256d10e4ef2Snarayan PR2("Task finished"); 22570a55fbb7Slm66018 } 22580a55fbb7Slm66018 22590a55fbb7Slm66018 static uint_t 22601ae08745Sheppo vd_handle_ldc_events(uint64_t event, caddr_t arg) 22611ae08745Sheppo { 22621ae08745Sheppo vd_t *vd = (vd_t *)(void *)arg; 22633af08d82Slm66018 int status; 22641ae08745Sheppo 22651ae08745Sheppo ASSERT(vd != NULL); 2266d10e4ef2Snarayan 2267d10e4ef2Snarayan if (!vd_enabled(vd)) 2268d10e4ef2Snarayan return (LDC_SUCCESS); 2269d10e4ef2Snarayan 22703af08d82Slm66018 if (event & LDC_EVT_DOWN) { 227134683adeSsg70180 PR0("LDC_EVT_DOWN: LDC channel went down"); 22723af08d82Slm66018 22733af08d82Slm66018 vd_need_reset(vd, B_TRUE); 22743af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 22753af08d82Slm66018 DDI_SLEEP); 22763af08d82Slm66018 if (status == DDI_FAILURE) { 22773af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 22783af08d82Slm66018 vd_need_reset(vd, B_TRUE); 22793af08d82Slm66018 } 22803af08d82Slm66018 } 22813af08d82Slm66018 2282d10e4ef2Snarayan if (event & LDC_EVT_RESET) { 22833af08d82Slm66018 PR0("LDC_EVT_RESET: LDC channel was reset"); 22843af08d82Slm66018 22853af08d82Slm66018 if (vd->state != VD_STATE_INIT) { 22863af08d82Slm66018 PR0("scheduling full reset"); 22873af08d82Slm66018 vd_need_reset(vd, B_FALSE); 22883af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 22893af08d82Slm66018 vd, DDI_SLEEP); 22903af08d82Slm66018 if (status == DDI_FAILURE) { 22913af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 22923af08d82Slm66018 vd_need_reset(vd, B_TRUE); 22933af08d82Slm66018 } 22943af08d82Slm66018 22953af08d82Slm66018 } else { 22963af08d82Slm66018 PR0("channel already reset, ignoring...\n"); 22973af08d82Slm66018 PR0("doing ldc up...\n"); 22983af08d82Slm66018 (void) ldc_up(vd->ldc_handle); 22993af08d82Slm66018 } 23003af08d82Slm66018 2301d10e4ef2Snarayan return (LDC_SUCCESS); 2302d10e4ef2Snarayan } 2303d10e4ef2Snarayan 2304d10e4ef2Snarayan if (event & LDC_EVT_UP) { 23053af08d82Slm66018 PR0("EVT_UP: LDC is up\nResetting client connection state"); 23063af08d82Slm66018 PR0("initiating soft reset"); 2307d10e4ef2Snarayan vd_need_reset(vd, B_FALSE); 23083af08d82Slm66018 status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, 23093af08d82Slm66018 vd, DDI_SLEEP); 23103af08d82Slm66018 if (status == DDI_FAILURE) { 23113af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 23123af08d82Slm66018 vd_need_reset(vd, B_TRUE); 23133af08d82Slm66018 return (LDC_SUCCESS); 23143af08d82Slm66018 } 2315d10e4ef2Snarayan } 2316d10e4ef2Snarayan 2317d10e4ef2Snarayan if (event & LDC_EVT_READ) { 2318d10e4ef2Snarayan int status; 2319d10e4ef2Snarayan 2320d10e4ef2Snarayan PR1("New data available"); 2321d10e4ef2Snarayan /* Queue a task to receive the new data */ 2322d10e4ef2Snarayan status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, 2323d10e4ef2Snarayan DDI_SLEEP); 23243af08d82Slm66018 23253af08d82Slm66018 if (status == DDI_FAILURE) { 23263af08d82Slm66018 PR0("cannot schedule task to recv msg\n"); 23273af08d82Slm66018 vd_need_reset(vd, B_TRUE); 23283af08d82Slm66018 } 2329d10e4ef2Snarayan } 2330d10e4ef2Snarayan 2331d10e4ef2Snarayan return (LDC_SUCCESS); 23321ae08745Sheppo } 23331ae08745Sheppo 23341ae08745Sheppo static uint_t 23351ae08745Sheppo vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 23361ae08745Sheppo { 23371ae08745Sheppo _NOTE(ARGUNUSED(key, val)) 23381ae08745Sheppo (*((uint_t *)arg))++; 23391ae08745Sheppo return (MH_WALK_TERMINATE); 23401ae08745Sheppo } 23411ae08745Sheppo 23421ae08745Sheppo 23431ae08745Sheppo static int 23441ae08745Sheppo vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 23451ae08745Sheppo { 23461ae08745Sheppo uint_t vd_present = 0; 23471ae08745Sheppo minor_t instance; 23481ae08745Sheppo vds_t *vds; 23491ae08745Sheppo 23501ae08745Sheppo 23511ae08745Sheppo switch (cmd) { 23521ae08745Sheppo case DDI_DETACH: 23531ae08745Sheppo /* the real work happens below */ 23541ae08745Sheppo break; 23551ae08745Sheppo case DDI_SUSPEND: 2356d10e4ef2Snarayan PR0("No action required for DDI_SUSPEND"); 23571ae08745Sheppo return (DDI_SUCCESS); 23581ae08745Sheppo default: 23593af08d82Slm66018 PR0("Unrecognized \"cmd\""); 23601ae08745Sheppo return (DDI_FAILURE); 23611ae08745Sheppo } 23621ae08745Sheppo 23631ae08745Sheppo ASSERT(cmd == DDI_DETACH); 23641ae08745Sheppo instance = ddi_get_instance(dip); 23651ae08745Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 23663af08d82Slm66018 PR0("Could not get state for instance %u", instance); 23671ae08745Sheppo ddi_soft_state_free(vds_state, instance); 23681ae08745Sheppo return (DDI_FAILURE); 23691ae08745Sheppo } 23701ae08745Sheppo 23711ae08745Sheppo /* Do no detach when serving any vdisks */ 23721ae08745Sheppo mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present); 23731ae08745Sheppo if (vd_present) { 23741ae08745Sheppo PR0("Not detaching because serving vdisks"); 23751ae08745Sheppo return (DDI_FAILURE); 23761ae08745Sheppo } 23771ae08745Sheppo 23781ae08745Sheppo PR0("Detaching"); 2379445b4c2eSsb155480 if (vds->initialized & VDS_MDEG) { 23801ae08745Sheppo (void) mdeg_unregister(vds->mdeg); 2381445b4c2eSsb155480 kmem_free(vds->ispecp->specp, sizeof (vds_prop_template)); 2382445b4c2eSsb155480 kmem_free(vds->ispecp, sizeof (mdeg_node_spec_t)); 2383445b4c2eSsb155480 vds->ispecp = NULL; 2384445b4c2eSsb155480 vds->mdeg = NULL; 2385445b4c2eSsb155480 } 2386445b4c2eSsb155480 23871ae08745Sheppo if (vds->initialized & VDS_LDI) 23881ae08745Sheppo (void) ldi_ident_release(vds->ldi_ident); 23891ae08745Sheppo mod_hash_destroy_hash(vds->vd_table); 23901ae08745Sheppo ddi_soft_state_free(vds_state, instance); 23911ae08745Sheppo return (DDI_SUCCESS); 23921ae08745Sheppo } 23931ae08745Sheppo 23941ae08745Sheppo static boolean_t 23951ae08745Sheppo is_pseudo_device(dev_info_t *dip) 23961ae08745Sheppo { 23971ae08745Sheppo dev_info_t *parent, *root = ddi_root_node(); 23981ae08745Sheppo 23991ae08745Sheppo 24001ae08745Sheppo for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root); 24011ae08745Sheppo parent = ddi_get_parent(parent)) { 24021ae08745Sheppo if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0) 24031ae08745Sheppo return (B_TRUE); 24041ae08745Sheppo } 24051ae08745Sheppo 24061ae08745Sheppo return (B_FALSE); 24071ae08745Sheppo } 24081ae08745Sheppo 24091ae08745Sheppo static int 24100a55fbb7Slm66018 vd_setup_full_disk(vd_t *vd) 24110a55fbb7Slm66018 { 24120a55fbb7Slm66018 int rval, status; 24130a55fbb7Slm66018 major_t major = getmajor(vd->dev[0]); 24140a55fbb7Slm66018 minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; 24154bac2208Snarayan struct dk_minfo dk_minfo; 24160a55fbb7Slm66018 24174bac2208Snarayan /* 24184bac2208Snarayan * At this point, vdisk_size is set to the size of partition 2 but 24194bac2208Snarayan * this does not represent the size of the disk because partition 2 24204bac2208Snarayan * may not cover the entire disk and its size does not include reserved 24214bac2208Snarayan * blocks. So we update vdisk_size to be the size of the entire disk. 24224bac2208Snarayan */ 24234bac2208Snarayan if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, 24244bac2208Snarayan (intptr_t)&dk_minfo, (vd_open_flags | FKIOCTL), 24254bac2208Snarayan kcred, &rval)) != 0) { 242634683adeSsg70180 PR0("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", 24274bac2208Snarayan status); 24280a55fbb7Slm66018 return (status); 24290a55fbb7Slm66018 } 24304bac2208Snarayan vd->vdisk_size = dk_minfo.dki_capacity; 24310a55fbb7Slm66018 24320a55fbb7Slm66018 /* Set full-disk parameters */ 24330a55fbb7Slm66018 vd->vdisk_type = VD_DISK_TYPE_DISK; 24340a55fbb7Slm66018 vd->nslices = (sizeof (vd->dev))/(sizeof (vd->dev[0])); 24350a55fbb7Slm66018 24360a55fbb7Slm66018 /* Move dev number and LDI handle to entire-disk-slice array elements */ 24370a55fbb7Slm66018 vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; 24380a55fbb7Slm66018 vd->dev[0] = 0; 24390a55fbb7Slm66018 vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; 24400a55fbb7Slm66018 vd->ldi_handle[0] = NULL; 24410a55fbb7Slm66018 24420a55fbb7Slm66018 /* Initialize device numbers for remaining slices and open them */ 24430a55fbb7Slm66018 for (int slice = 0; slice < vd->nslices; slice++) { 24440a55fbb7Slm66018 /* 24450a55fbb7Slm66018 * Skip the entire-disk slice, as it's already open and its 24460a55fbb7Slm66018 * device known 24470a55fbb7Slm66018 */ 24480a55fbb7Slm66018 if (slice == VD_ENTIRE_DISK_SLICE) 24490a55fbb7Slm66018 continue; 24500a55fbb7Slm66018 ASSERT(vd->dev[slice] == 0); 24510a55fbb7Slm66018 ASSERT(vd->ldi_handle[slice] == NULL); 24520a55fbb7Slm66018 24530a55fbb7Slm66018 /* 24540a55fbb7Slm66018 * Construct the device number for the current slice 24550a55fbb7Slm66018 */ 24560a55fbb7Slm66018 vd->dev[slice] = makedevice(major, (minor + slice)); 24570a55fbb7Slm66018 24580a55fbb7Slm66018 /* 245934683adeSsg70180 * Open all slices of the disk to serve them to the client. 246034683adeSsg70180 * Slices are opened exclusively to prevent other threads or 246134683adeSsg70180 * processes in the service domain from performing I/O to 246234683adeSsg70180 * slices being accessed by a client. Failure to open a slice 246334683adeSsg70180 * results in vds not serving this disk, as the client could 246434683adeSsg70180 * attempt (and should be able) to access any slice immediately. 246534683adeSsg70180 * Any slices successfully opened before a failure will get 246634683adeSsg70180 * closed by vds_destroy_vd() as a result of the error returned 246734683adeSsg70180 * by this function. 246834683adeSsg70180 * 246934683adeSsg70180 * We need to do the open with FNDELAY so that opening an empty 247034683adeSsg70180 * slice does not fail. 24710a55fbb7Slm66018 */ 24720a55fbb7Slm66018 PR0("Opening device major %u, minor %u = slice %u", 24730a55fbb7Slm66018 major, minor, slice); 24740a55fbb7Slm66018 if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, 247534683adeSsg70180 vd_open_flags | FNDELAY, kcred, &vd->ldi_handle[slice], 24760a55fbb7Slm66018 vd->vds->ldi_ident)) != 0) { 247734683adeSsg70180 PR0("ldi_open_by_dev() returned errno %d " 24780a55fbb7Slm66018 "for slice %u", status, slice); 24790a55fbb7Slm66018 /* vds_destroy_vd() will close any open slices */ 24800a55fbb7Slm66018 return (status); 24810a55fbb7Slm66018 } 24820a55fbb7Slm66018 } 24830a55fbb7Slm66018 24840a55fbb7Slm66018 return (0); 24850a55fbb7Slm66018 } 24860a55fbb7Slm66018 24870a55fbb7Slm66018 static int 24884bac2208Snarayan vd_setup_partition_efi(vd_t *vd) 24894bac2208Snarayan { 24904bac2208Snarayan efi_gpt_t *gpt; 24914bac2208Snarayan efi_gpe_t *gpe; 24924bac2208Snarayan struct uuid uuid = EFI_RESERVED; 24934bac2208Snarayan uint32_t crc; 24944bac2208Snarayan int length; 24954bac2208Snarayan 24964bac2208Snarayan length = sizeof (efi_gpt_t) + sizeof (efi_gpe_t); 24974bac2208Snarayan 24984bac2208Snarayan gpt = kmem_zalloc(length, KM_SLEEP); 24994bac2208Snarayan gpe = (efi_gpe_t *)(gpt + 1); 25004bac2208Snarayan 25014bac2208Snarayan gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 25024bac2208Snarayan gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 25034bac2208Snarayan gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); 25044bac2208Snarayan gpt->efi_gpt_FirstUsableLBA = LE_64(0ULL); 25054bac2208Snarayan gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1); 25064bac2208Snarayan gpt->efi_gpt_NumberOfPartitionEntries = LE_32(1); 25074bac2208Snarayan gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); 25084bac2208Snarayan 25094bac2208Snarayan UUID_LE_CONVERT(gpe->efi_gpe_PartitionTypeGUID, uuid); 25104bac2208Snarayan gpe->efi_gpe_StartingLBA = gpt->efi_gpt_FirstUsableLBA; 25114bac2208Snarayan gpe->efi_gpe_EndingLBA = gpt->efi_gpt_LastUsableLBA; 25124bac2208Snarayan 25134bac2208Snarayan CRC32(crc, gpe, sizeof (efi_gpe_t), -1U, crc32_table); 25144bac2208Snarayan gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 25154bac2208Snarayan 25164bac2208Snarayan CRC32(crc, gpt, sizeof (efi_gpt_t), -1U, crc32_table); 25174bac2208Snarayan gpt->efi_gpt_HeaderCRC32 = LE_32(~crc); 25184bac2208Snarayan 25194bac2208Snarayan vd->dk_efi.dki_lba = 0; 25204bac2208Snarayan vd->dk_efi.dki_length = length; 25214bac2208Snarayan vd->dk_efi.dki_data = gpt; 25224bac2208Snarayan 25234bac2208Snarayan return (0); 25244bac2208Snarayan } 25254bac2208Snarayan 25264bac2208Snarayan static int 2527*3c96341aSnarayan vd_setup_file(vd_t *vd) 2528*3c96341aSnarayan { 2529*3c96341aSnarayan int i, rval, status; 2530*3c96341aSnarayan short sum; 2531*3c96341aSnarayan vattr_t vattr; 2532*3c96341aSnarayan dev_t dev; 2533*3c96341aSnarayan char *file_path = vd->device_path; 2534*3c96341aSnarayan char dev_path[MAXPATHLEN + 1]; 2535*3c96341aSnarayan ldi_handle_t lhandle; 2536*3c96341aSnarayan struct dk_cinfo dk_cinfo; 2537*3c96341aSnarayan struct dk_label *label; 2538*3c96341aSnarayan 2539*3c96341aSnarayan /* make sure the file is valid */ 2540*3c96341aSnarayan if ((status = lookupname(file_path, UIO_SYSSPACE, FOLLOW, 2541*3c96341aSnarayan NULLVPP, &vd->file_vnode)) != 0) { 2542*3c96341aSnarayan PR0("Cannot lookup file(%s) errno %d", file_path, status); 2543*3c96341aSnarayan return (status); 2544*3c96341aSnarayan } 2545*3c96341aSnarayan 2546*3c96341aSnarayan if (vd->file_vnode->v_type != VREG) { 2547*3c96341aSnarayan PR0("Invalid file type (%s)\n", file_path); 2548*3c96341aSnarayan VN_RELE(vd->file_vnode); 2549*3c96341aSnarayan return (EBADF); 2550*3c96341aSnarayan } 2551*3c96341aSnarayan VN_RELE(vd->file_vnode); 2552*3c96341aSnarayan 2553*3c96341aSnarayan if ((status = vn_open(file_path, UIO_SYSSPACE, vd_open_flags | FOFFMAX, 2554*3c96341aSnarayan 0, &vd->file_vnode, 0, 0)) != 0) { 2555*3c96341aSnarayan PR0("vn_open(%s) = errno %d", file_path, status); 2556*3c96341aSnarayan return (status); 2557*3c96341aSnarayan } 2558*3c96341aSnarayan 2559*3c96341aSnarayan vattr.va_mask = AT_SIZE; 2560*3c96341aSnarayan if ((status = VOP_GETATTR(vd->file_vnode, &vattr, 0, kcred)) != 0) { 2561*3c96341aSnarayan PR0("VOP_GETATTR(%s) = errno %d", file_path, status); 2562*3c96341aSnarayan (void) VOP_CLOSE(vd->file_vnode, vd_open_flags, 1, 0, kcred); 2563*3c96341aSnarayan VN_RELE(vd->file_vnode); 2564*3c96341aSnarayan return (EIO); 2565*3c96341aSnarayan } 2566*3c96341aSnarayan 2567*3c96341aSnarayan vd->file_size = vattr.va_size; 2568*3c96341aSnarayan /* size should be at least sizeof(dk_label) */ 2569*3c96341aSnarayan if (vd->file_size < sizeof (struct dk_label)) { 2570*3c96341aSnarayan PRN("Size of file has to be at least %ld bytes", 2571*3c96341aSnarayan sizeof (struct dk_label)); 2572*3c96341aSnarayan (void) VOP_CLOSE(vd->file_vnode, vd_open_flags, 1, 0, kcred); 2573*3c96341aSnarayan VN_RELE(vd->file_vnode); 2574*3c96341aSnarayan return (EIO); 2575*3c96341aSnarayan } 2576*3c96341aSnarayan 2577*3c96341aSnarayan if ((status = VOP_MAP(vd->file_vnode, 0, &kas, &vd->file_maddr, 2578*3c96341aSnarayan vd->file_size, PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, 2579*3c96341aSnarayan MAP_SHARED, kcred)) != 0) { 2580*3c96341aSnarayan PR0("VOP_MAP(%s) = errno %d", file_path, status); 2581*3c96341aSnarayan (void) VOP_CLOSE(vd->file_vnode, vd_open_flags, 1, 0, kcred); 2582*3c96341aSnarayan VN_RELE(vd->file_vnode); 2583*3c96341aSnarayan return (EIO); 2584*3c96341aSnarayan } 2585*3c96341aSnarayan 2586*3c96341aSnarayan label = (struct dk_label *)vd->file_maddr; 2587*3c96341aSnarayan 2588*3c96341aSnarayan /* label checksum */ 2589*3c96341aSnarayan sum = vd_lbl2cksum(label); 2590*3c96341aSnarayan 2591*3c96341aSnarayan if (label->dkl_magic != DKL_MAGIC || label->dkl_cksum != sum) { 2592*3c96341aSnarayan PR0("%s has an invalid disk label " 2593*3c96341aSnarayan "(magic=%x cksum=%x (expect %x))", 2594*3c96341aSnarayan file_path, label->dkl_magic, label->dkl_cksum, sum); 2595*3c96341aSnarayan 2596*3c96341aSnarayan /* default label */ 2597*3c96341aSnarayan bzero(label, sizeof (struct dk_label)); 2598*3c96341aSnarayan 2599*3c96341aSnarayan /* 2600*3c96341aSnarayan * We must have a resonable number of cylinders and sectors so 2601*3c96341aSnarayan * that newfs can run using default values. 2602*3c96341aSnarayan * 2603*3c96341aSnarayan * if (disk_size < 2MB) 2604*3c96341aSnarayan * phys_cylinders = disk_size / 100K 2605*3c96341aSnarayan * else 2606*3c96341aSnarayan * phys_cylinders = disk_size / 300K 2607*3c96341aSnarayan * 2608*3c96341aSnarayan * phys_cylinders = (phys_cylinders == 0) ? 1 : phys_cylinders 2609*3c96341aSnarayan * alt_cylinders = (phys_cylinders > 2) ? 2 : 0; 2610*3c96341aSnarayan * data_cylinders = phys_cylinders - alt_cylinders 2611*3c96341aSnarayan * 2612*3c96341aSnarayan * sectors = disk_size / (phys_cylinders * blk_size) 2613*3c96341aSnarayan */ 2614*3c96341aSnarayan if (vd->file_size < (2 * 1024 * 1024)) 2615*3c96341aSnarayan label->dkl_pcyl = vd->file_size / (100 * 1024); 2616*3c96341aSnarayan else 2617*3c96341aSnarayan label->dkl_pcyl = vd->file_size / (300 * 1024); 2618*3c96341aSnarayan 2619*3c96341aSnarayan if (label->dkl_pcyl == 0) 2620*3c96341aSnarayan label->dkl_pcyl = 1; 2621*3c96341aSnarayan 2622*3c96341aSnarayan if (label->dkl_pcyl > 2) 2623*3c96341aSnarayan label->dkl_acyl = 2; 2624*3c96341aSnarayan else 2625*3c96341aSnarayan label->dkl_acyl = 0; 2626*3c96341aSnarayan 2627*3c96341aSnarayan label->dkl_nsect = vd->file_size / 2628*3c96341aSnarayan (DEV_BSIZE * label->dkl_pcyl); 2629*3c96341aSnarayan label->dkl_ncyl = label->dkl_pcyl - label->dkl_acyl; 2630*3c96341aSnarayan label->dkl_nhead = 1; 2631*3c96341aSnarayan label->dkl_write_reinstruct = 0; 2632*3c96341aSnarayan label->dkl_read_reinstruct = 0; 2633*3c96341aSnarayan label->dkl_rpm = 7200; 2634*3c96341aSnarayan label->dkl_apc = 0; 2635*3c96341aSnarayan label->dkl_intrlv = 0; 2636*3c96341aSnarayan label->dkl_magic = DKL_MAGIC; 2637*3c96341aSnarayan 2638*3c96341aSnarayan PR0("requested disk size: %ld bytes\n", vd->file_size); 2639*3c96341aSnarayan PR0("setup: ncyl=%d nhead=%d nsec=%d\n", label->dkl_pcyl, 2640*3c96341aSnarayan label->dkl_nhead, label->dkl_nsect); 2641*3c96341aSnarayan PR0("provided disk size: %ld bytes\n", (uint64_t) 2642*3c96341aSnarayan (label->dkl_pcyl * 2643*3c96341aSnarayan label->dkl_nhead * label->dkl_nsect * DEV_BSIZE)); 2644*3c96341aSnarayan 2645*3c96341aSnarayan /* 2646*3c96341aSnarayan * We must have a correct label name otherwise format(1m) will 2647*3c96341aSnarayan * not recognized the disk as labeled. 2648*3c96341aSnarayan */ 2649*3c96341aSnarayan (void) snprintf(label->dkl_asciilabel, LEN_DKL_ASCII, 2650*3c96341aSnarayan "SUNVDSK cyl %d alt %d hd %d sec %d", 2651*3c96341aSnarayan label->dkl_ncyl, label->dkl_acyl, label->dkl_nhead, 2652*3c96341aSnarayan label->dkl_nsect); 2653*3c96341aSnarayan 2654*3c96341aSnarayan /* default VTOC */ 2655*3c96341aSnarayan label->dkl_vtoc.v_version = V_VERSION; 2656*3c96341aSnarayan label->dkl_vtoc.v_nparts = 8; 2657*3c96341aSnarayan label->dkl_vtoc.v_sanity = VTOC_SANE; 2658*3c96341aSnarayan label->dkl_vtoc.v_part[2].p_tag = V_BACKUP; 2659*3c96341aSnarayan label->dkl_map[2].dkl_cylno = 0; 2660*3c96341aSnarayan label->dkl_map[2].dkl_nblk = label->dkl_ncyl * 2661*3c96341aSnarayan label->dkl_nhead * label->dkl_nsect; 2662*3c96341aSnarayan label->dkl_map[0] = label->dkl_map[2]; 2663*3c96341aSnarayan label->dkl_map[0] = label->dkl_map[2]; 2664*3c96341aSnarayan label->dkl_cksum = vd_lbl2cksum(label); 2665*3c96341aSnarayan } 2666*3c96341aSnarayan 2667*3c96341aSnarayan vd->nslices = label->dkl_vtoc.v_nparts; 2668*3c96341aSnarayan 2669*3c96341aSnarayan /* sector size = block size = DEV_BSIZE */ 2670*3c96341aSnarayan vd->vdisk_size = (label->dkl_pcyl * 2671*3c96341aSnarayan label->dkl_nhead * label->dkl_nsect) / DEV_BSIZE; 2672*3c96341aSnarayan vd->vdisk_type = VD_DISK_TYPE_DISK; 2673*3c96341aSnarayan vd->vdisk_label = VD_DISK_LABEL_VTOC; 2674*3c96341aSnarayan vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */ 2675*3c96341aSnarayan 2676*3c96341aSnarayan /* Get max_xfer_sz from the device where the file is */ 2677*3c96341aSnarayan dev = vd->file_vnode->v_vfsp->vfs_dev; 2678*3c96341aSnarayan dev_path[0] = NULL; 2679*3c96341aSnarayan if (ddi_dev_pathname(dev, S_IFBLK, dev_path) == DDI_SUCCESS) { 2680*3c96341aSnarayan PR0("underlying device = %s\n", dev_path); 2681*3c96341aSnarayan } 2682*3c96341aSnarayan 2683*3c96341aSnarayan if ((status = ldi_open_by_dev(&dev, OTYP_BLK, FREAD, 2684*3c96341aSnarayan kcred, &lhandle, vd->vds->ldi_ident)) != 0) { 2685*3c96341aSnarayan PR0("ldi_open_by_dev() returned errno %d for device %s", 2686*3c96341aSnarayan status, dev_path); 2687*3c96341aSnarayan } else { 2688*3c96341aSnarayan if ((status = ldi_ioctl(lhandle, DKIOCINFO, 2689*3c96341aSnarayan (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, 2690*3c96341aSnarayan &rval)) != 0) { 2691*3c96341aSnarayan PR0("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 2692*3c96341aSnarayan status, dev_path); 2693*3c96341aSnarayan } else { 2694*3c96341aSnarayan /* 2695*3c96341aSnarayan * Store the device's max transfer size for 2696*3c96341aSnarayan * return to the client 2697*3c96341aSnarayan */ 2698*3c96341aSnarayan vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 2699*3c96341aSnarayan } 2700*3c96341aSnarayan 2701*3c96341aSnarayan PR0("close the device %s", dev_path); 2702*3c96341aSnarayan (void) ldi_close(lhandle, FREAD, kcred); 2703*3c96341aSnarayan } 2704*3c96341aSnarayan 2705*3c96341aSnarayan PR0("using for file %s, dev %s, max_xfer = %u blks", 2706*3c96341aSnarayan file_path, dev_path, vd->max_xfer_sz); 2707*3c96341aSnarayan 2708*3c96341aSnarayan vd->pseudo = B_FALSE; 2709*3c96341aSnarayan vd->file = B_TRUE; 2710*3c96341aSnarayan 2711*3c96341aSnarayan vd->dk_geom.dkg_ncyl = label->dkl_ncyl; 2712*3c96341aSnarayan vd->dk_geom.dkg_acyl = label->dkl_acyl; 2713*3c96341aSnarayan vd->dk_geom.dkg_pcyl = label->dkl_pcyl; 2714*3c96341aSnarayan vd->dk_geom.dkg_nhead = label->dkl_nhead; 2715*3c96341aSnarayan vd->dk_geom.dkg_nsect = label->dkl_nsect; 2716*3c96341aSnarayan vd->dk_geom.dkg_intrlv = label->dkl_intrlv; 2717*3c96341aSnarayan vd->dk_geom.dkg_apc = label->dkl_apc; 2718*3c96341aSnarayan vd->dk_geom.dkg_rpm = label->dkl_rpm; 2719*3c96341aSnarayan vd->dk_geom.dkg_write_reinstruct = label->dkl_write_reinstruct; 2720*3c96341aSnarayan vd->dk_geom.dkg_read_reinstruct = label->dkl_read_reinstruct; 2721*3c96341aSnarayan 2722*3c96341aSnarayan vd->vtoc.v_sanity = label->dkl_vtoc.v_sanity; 2723*3c96341aSnarayan vd->vtoc.v_version = label->dkl_vtoc.v_version; 2724*3c96341aSnarayan vd->vtoc.v_sectorsz = DEV_BSIZE; 2725*3c96341aSnarayan vd->vtoc.v_nparts = label->dkl_vtoc.v_nparts; 2726*3c96341aSnarayan 2727*3c96341aSnarayan bcopy(label->dkl_vtoc.v_volume, vd->vtoc.v_volume, 2728*3c96341aSnarayan LEN_DKL_VVOL); 2729*3c96341aSnarayan bcopy(label->dkl_asciilabel, vd->vtoc.v_asciilabel, 2730*3c96341aSnarayan LEN_DKL_ASCII); 2731*3c96341aSnarayan 2732*3c96341aSnarayan for (i = 0; i < vd->nslices; i++) { 2733*3c96341aSnarayan vd->vtoc.timestamp[i] = label->dkl_vtoc.v_timestamp[i]; 2734*3c96341aSnarayan vd->vtoc.v_part[i].p_tag = label->dkl_vtoc.v_part[i].p_tag; 2735*3c96341aSnarayan vd->vtoc.v_part[i].p_flag = label->dkl_vtoc.v_part[i].p_flag; 2736*3c96341aSnarayan vd->vtoc.v_part[i].p_start = label->dkl_map[i].dkl_cylno * 2737*3c96341aSnarayan label->dkl_nhead * label->dkl_nsect; 2738*3c96341aSnarayan vd->vtoc.v_part[i].p_size = label->dkl_map[i].dkl_nblk; 2739*3c96341aSnarayan vd->ldi_handle[i] = NULL; 2740*3c96341aSnarayan vd->dev[i] = NULL; 2741*3c96341aSnarayan } 2742*3c96341aSnarayan 2743*3c96341aSnarayan return (0); 2744*3c96341aSnarayan } 2745*3c96341aSnarayan 2746*3c96341aSnarayan static int 2747*3c96341aSnarayan vd_setup_vd(vd_t *vd) 27481ae08745Sheppo { 2749e1ebb9ecSlm66018 int rval, status; 27501ae08745Sheppo dev_info_t *dip; 27511ae08745Sheppo struct dk_cinfo dk_cinfo; 2752*3c96341aSnarayan char *device_path = vd->device_path; 27531ae08745Sheppo 27544bac2208Snarayan /* 27554bac2208Snarayan * We need to open with FNDELAY so that opening an empty partition 27564bac2208Snarayan * does not fail. 27574bac2208Snarayan */ 27584bac2208Snarayan if ((status = ldi_open_by_name(device_path, vd_open_flags | FNDELAY, 27594bac2208Snarayan kcred, &vd->ldi_handle[0], vd->vds->ldi_ident)) != 0) { 2760*3c96341aSnarayan PR0("ldi_open_by_name(%s) = errno %d", device_path, status); 2761*3c96341aSnarayan 2762*3c96341aSnarayan /* this may not be a device try opening as a file */ 2763*3c96341aSnarayan if (status == ENXIO || status == ENODEV) 2764*3c96341aSnarayan status = vd_setup_file(vd); 2765*3c96341aSnarayan if (status) { 2766*3c96341aSnarayan PR0("Cannot use device/file (%s), errno=%d\n", 2767*3c96341aSnarayan device_path, status); 2768*3c96341aSnarayan if (status == ENXIO || status == ENODEV || 2769*3c96341aSnarayan status == ENOENT) { 2770*3c96341aSnarayan return (EAGAIN); 2771*3c96341aSnarayan } 2772*3c96341aSnarayan } 27730a55fbb7Slm66018 return (status); 27740a55fbb7Slm66018 } 27750a55fbb7Slm66018 27764bac2208Snarayan /* 27774bac2208Snarayan * nslices must be updated now so that vds_destroy_vd() will close 27784bac2208Snarayan * the slice we have just opened in case of an error. 27794bac2208Snarayan */ 27804bac2208Snarayan vd->nslices = 1; 2781*3c96341aSnarayan vd->file = B_FALSE; 27824bac2208Snarayan 2783e1ebb9ecSlm66018 /* Get device number and size of backing device */ 27840a55fbb7Slm66018 if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { 27851ae08745Sheppo PRN("ldi_get_dev() returned errno %d for %s", 2786e1ebb9ecSlm66018 status, device_path); 27871ae08745Sheppo return (status); 27881ae08745Sheppo } 27890a55fbb7Slm66018 if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { 2790e1ebb9ecSlm66018 PRN("ldi_get_size() failed for %s", device_path); 27911ae08745Sheppo return (EIO); 27921ae08745Sheppo } 2793e1ebb9ecSlm66018 vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */ 27941ae08745Sheppo 2795e1ebb9ecSlm66018 /* Verify backing device supports dk_cinfo, dk_geom, and vtoc */ 2796e1ebb9ecSlm66018 if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, 2797e1ebb9ecSlm66018 (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, 2798e1ebb9ecSlm66018 &rval)) != 0) { 2799e1ebb9ecSlm66018 PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", 2800e1ebb9ecSlm66018 status, device_path); 2801e1ebb9ecSlm66018 return (status); 2802e1ebb9ecSlm66018 } 2803e1ebb9ecSlm66018 if (dk_cinfo.dki_partition >= V_NUMPAR) { 2804e1ebb9ecSlm66018 PRN("slice %u >= maximum slice %u for %s", 2805e1ebb9ecSlm66018 dk_cinfo.dki_partition, V_NUMPAR, device_path); 2806e1ebb9ecSlm66018 return (EIO); 2807e1ebb9ecSlm66018 } 28084bac2208Snarayan 28094bac2208Snarayan status = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc, &vd->vdisk_label); 28104bac2208Snarayan 28114bac2208Snarayan if (status != 0) { 28124bac2208Snarayan PRN("vd_read_vtoc returned errno %d for %s", 2813e1ebb9ecSlm66018 status, device_path); 2814e1ebb9ecSlm66018 return (status); 2815e1ebb9ecSlm66018 } 28164bac2208Snarayan 28174bac2208Snarayan if (vd->vdisk_label == VD_DISK_LABEL_VTOC && 28184bac2208Snarayan (status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, 28194bac2208Snarayan (intptr_t)&vd->dk_geom, (vd_open_flags | FKIOCTL), 28204bac2208Snarayan kcred, &rval)) != 0) { 28214bac2208Snarayan PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", 2822e1ebb9ecSlm66018 status, device_path); 2823e1ebb9ecSlm66018 return (status); 2824e1ebb9ecSlm66018 } 2825e1ebb9ecSlm66018 2826e1ebb9ecSlm66018 /* Store the device's max transfer size for return to the client */ 2827e1ebb9ecSlm66018 vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; 2828e1ebb9ecSlm66018 2829e1ebb9ecSlm66018 /* Determine if backing device is a pseudo device */ 28301ae08745Sheppo if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]), 28311ae08745Sheppo dev_to_instance(vd->dev[0]), 0)) == NULL) { 2832e1ebb9ecSlm66018 PRN("%s is no longer accessible", device_path); 28331ae08745Sheppo return (EIO); 28341ae08745Sheppo } 28351ae08745Sheppo vd->pseudo = is_pseudo_device(dip); 28361ae08745Sheppo ddi_release_devi(dip); 28371ae08745Sheppo if (vd->pseudo) { 28381ae08745Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 28391ae08745Sheppo vd->nslices = 1; 28401ae08745Sheppo return (0); /* ...and we're done */ 28411ae08745Sheppo } 28421ae08745Sheppo 28430a55fbb7Slm66018 /* If slice is entire-disk slice, initialize for full disk */ 28440a55fbb7Slm66018 if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) 28450a55fbb7Slm66018 return (vd_setup_full_disk(vd)); 28461ae08745Sheppo 28470a55fbb7Slm66018 2848e1ebb9ecSlm66018 /* Otherwise, we have a non-entire slice of a device */ 28491ae08745Sheppo vd->vdisk_type = VD_DISK_TYPE_SLICE; 28501ae08745Sheppo vd->nslices = 1; 28511ae08745Sheppo 28524bac2208Snarayan if (vd->vdisk_label == VD_DISK_LABEL_EFI) { 28534bac2208Snarayan status = vd_setup_partition_efi(vd); 28544bac2208Snarayan return (status); 28554bac2208Snarayan } 28561ae08745Sheppo 2857e1ebb9ecSlm66018 /* Initialize dk_geom structure for single-slice device */ 28581ae08745Sheppo if (vd->dk_geom.dkg_nsect == 0) { 28593af08d82Slm66018 PR0("%s geometry claims 0 sectors per track", device_path); 28601ae08745Sheppo return (EIO); 28611ae08745Sheppo } 28621ae08745Sheppo if (vd->dk_geom.dkg_nhead == 0) { 28633af08d82Slm66018 PR0("%s geometry claims 0 heads", device_path); 28641ae08745Sheppo return (EIO); 28651ae08745Sheppo } 28661ae08745Sheppo vd->dk_geom.dkg_ncyl = 2867e1ebb9ecSlm66018 vd->vdisk_size/vd->dk_geom.dkg_nsect/vd->dk_geom.dkg_nhead; 28681ae08745Sheppo vd->dk_geom.dkg_acyl = 0; 28691ae08745Sheppo vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; 28701ae08745Sheppo 28711ae08745Sheppo 2872e1ebb9ecSlm66018 /* Initialize vtoc structure for single-slice device */ 28731ae08745Sheppo bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, 28741ae08745Sheppo MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); 28751ae08745Sheppo bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); 28761ae08745Sheppo vd->vtoc.v_nparts = 1; 28771ae08745Sheppo vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; 28781ae08745Sheppo vd->vtoc.v_part[0].p_flag = 0; 28791ae08745Sheppo vd->vtoc.v_part[0].p_start = 0; 2880e1ebb9ecSlm66018 vd->vtoc.v_part[0].p_size = vd->vdisk_size; 28811ae08745Sheppo bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, 28821ae08745Sheppo MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); 28831ae08745Sheppo 28841ae08745Sheppo 28851ae08745Sheppo return (0); 28861ae08745Sheppo } 28871ae08745Sheppo 28881ae08745Sheppo static int 2889e1ebb9ecSlm66018 vds_do_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id, 28901ae08745Sheppo vd_t **vdp) 28911ae08745Sheppo { 28921ae08745Sheppo char tq_name[TASKQ_NAMELEN]; 28930a55fbb7Slm66018 int status; 28941ae08745Sheppo ddi_iblock_cookie_t iblock = NULL; 28951ae08745Sheppo ldc_attr_t ldc_attr; 28961ae08745Sheppo vd_t *vd; 28971ae08745Sheppo 28981ae08745Sheppo 28991ae08745Sheppo ASSERT(vds != NULL); 2900e1ebb9ecSlm66018 ASSERT(device_path != NULL); 29011ae08745Sheppo ASSERT(vdp != NULL); 2902e1ebb9ecSlm66018 PR0("Adding vdisk for %s", device_path); 29031ae08745Sheppo 29041ae08745Sheppo if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) { 29051ae08745Sheppo PRN("No memory for virtual disk"); 29061ae08745Sheppo return (EAGAIN); 29071ae08745Sheppo } 29081ae08745Sheppo *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */ 29091ae08745Sheppo vd->vds = vds; 2910*3c96341aSnarayan (void) strncpy(vd->device_path, device_path, MAXPATHLEN); 29111ae08745Sheppo 29120a55fbb7Slm66018 /* Open vdisk and initialize parameters */ 2913*3c96341aSnarayan if ((status = vd_setup_vd(vd)) == 0) { 2914*3c96341aSnarayan vd->initialized |= VD_DISK_READY; 29151ae08745Sheppo 2916*3c96341aSnarayan ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); 2917*3c96341aSnarayan PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u", 2918*3c96341aSnarayan ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), 2919*3c96341aSnarayan (vd->pseudo ? "yes" : "no"), (vd->file ? "yes" : "no"), 2920*3c96341aSnarayan vd->nslices); 2921*3c96341aSnarayan } else { 2922*3c96341aSnarayan if (status != EAGAIN) 2923*3c96341aSnarayan return (status); 2924*3c96341aSnarayan } 29251ae08745Sheppo 29261ae08745Sheppo /* Initialize locking */ 29271ae08745Sheppo if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, 29281ae08745Sheppo &iblock) != DDI_SUCCESS) { 29291ae08745Sheppo PRN("Could not get iblock cookie."); 29301ae08745Sheppo return (EIO); 29311ae08745Sheppo } 29321ae08745Sheppo 29331ae08745Sheppo mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock); 29341ae08745Sheppo vd->initialized |= VD_LOCKING; 29351ae08745Sheppo 29361ae08745Sheppo 2937d10e4ef2Snarayan /* Create start and completion task queues for the vdisk */ 2938d10e4ef2Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id); 29391ae08745Sheppo PR1("tq_name = %s", tq_name); 2940d10e4ef2Snarayan if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1, 29411ae08745Sheppo TASKQ_DEFAULTPRI, 0)) == NULL) { 29421ae08745Sheppo PRN("Could not create task queue"); 29431ae08745Sheppo return (EIO); 29441ae08745Sheppo } 2945d10e4ef2Snarayan (void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id); 2946d10e4ef2Snarayan PR1("tq_name = %s", tq_name); 2947d10e4ef2Snarayan if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1, 2948d10e4ef2Snarayan TASKQ_DEFAULTPRI, 0)) == NULL) { 2949d10e4ef2Snarayan PRN("Could not create task queue"); 2950d10e4ef2Snarayan return (EIO); 2951d10e4ef2Snarayan } 2952d10e4ef2Snarayan vd->enabled = 1; /* before callback can dispatch to startq */ 29531ae08745Sheppo 29541ae08745Sheppo 29551ae08745Sheppo /* Bring up LDC */ 29561ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK_SVC; 29571ae08745Sheppo ldc_attr.instance = ddi_get_instance(vds->dip); 29581ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; 2959e1ebb9ecSlm66018 ldc_attr.mtu = VD_LDC_MTU; 29601ae08745Sheppo if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) { 29613af08d82Slm66018 PR0("ldc_init(%lu) = errno %d", ldc_id, status); 29621ae08745Sheppo return (status); 29631ae08745Sheppo } 29641ae08745Sheppo vd->initialized |= VD_LDC; 29651ae08745Sheppo 29661ae08745Sheppo if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, 29671ae08745Sheppo (caddr_t)vd)) != 0) { 29683af08d82Slm66018 PR0("ldc_reg_callback() returned errno %d", status); 29691ae08745Sheppo return (status); 29701ae08745Sheppo } 29711ae08745Sheppo 29721ae08745Sheppo if ((status = ldc_open(vd->ldc_handle)) != 0) { 29733af08d82Slm66018 PR0("ldc_open() returned errno %d", status); 29741ae08745Sheppo return (status); 29751ae08745Sheppo } 29761ae08745Sheppo 29773af08d82Slm66018 if ((status = ldc_up(vd->ldc_handle)) != 0) { 297834683adeSsg70180 PR0("ldc_up() returned errno %d", status); 29793af08d82Slm66018 } 29803af08d82Slm66018 29814bac2208Snarayan /* Allocate the inband task memory handle */ 29824bac2208Snarayan status = ldc_mem_alloc_handle(vd->ldc_handle, &(vd->inband_task.mhdl)); 29834bac2208Snarayan if (status) { 298434683adeSsg70180 PR0("ldc_mem_alloc_handle() returned err %d ", status); 29854bac2208Snarayan return (ENXIO); 29864bac2208Snarayan } 29871ae08745Sheppo 29881ae08745Sheppo /* Add the successfully-initialized vdisk to the server's table */ 29891ae08745Sheppo if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { 29901ae08745Sheppo PRN("Error adding vdisk ID %lu to table", id); 29911ae08745Sheppo return (EIO); 29921ae08745Sheppo } 29931ae08745Sheppo 29943af08d82Slm66018 /* Allocate the staging buffer */ 29953af08d82Slm66018 vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ 29963af08d82Slm66018 vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP); 29973af08d82Slm66018 29983af08d82Slm66018 /* store initial state */ 29993af08d82Slm66018 vd->state = VD_STATE_INIT; 30003af08d82Slm66018 30011ae08745Sheppo return (0); 30021ae08745Sheppo } 30031ae08745Sheppo 30043af08d82Slm66018 static void 30053af08d82Slm66018 vd_free_dring_task(vd_t *vdp) 30063af08d82Slm66018 { 30073af08d82Slm66018 if (vdp->dring_task != NULL) { 30083af08d82Slm66018 ASSERT(vdp->dring_len != 0); 30093af08d82Slm66018 /* Free all dring_task memory handles */ 30103af08d82Slm66018 for (int i = 0; i < vdp->dring_len; i++) { 30113af08d82Slm66018 (void) ldc_mem_free_handle(vdp->dring_task[i].mhdl); 30123af08d82Slm66018 kmem_free(vdp->dring_task[i].msg, vdp->max_msglen); 30133af08d82Slm66018 vdp->dring_task[i].msg = NULL; 30143af08d82Slm66018 } 30153af08d82Slm66018 kmem_free(vdp->dring_task, 30163af08d82Slm66018 (sizeof (*vdp->dring_task)) * vdp->dring_len); 30173af08d82Slm66018 vdp->dring_task = NULL; 30183af08d82Slm66018 } 30193af08d82Slm66018 } 30203af08d82Slm66018 30211ae08745Sheppo /* 30221ae08745Sheppo * Destroy the state associated with a virtual disk 30231ae08745Sheppo */ 30241ae08745Sheppo static void 30251ae08745Sheppo vds_destroy_vd(void *arg) 30261ae08745Sheppo { 30271ae08745Sheppo vd_t *vd = (vd_t *)arg; 302834683adeSsg70180 int retry = 0, rv; 30291ae08745Sheppo 30301ae08745Sheppo if (vd == NULL) 30311ae08745Sheppo return; 30321ae08745Sheppo 3033d10e4ef2Snarayan PR0("Destroying vdisk state"); 3034d10e4ef2Snarayan 30354bac2208Snarayan if (vd->dk_efi.dki_data != NULL) 30364bac2208Snarayan kmem_free(vd->dk_efi.dki_data, vd->dk_efi.dki_length); 30374bac2208Snarayan 30381ae08745Sheppo /* Disable queuing requests for the vdisk */ 30391ae08745Sheppo if (vd->initialized & VD_LOCKING) { 30401ae08745Sheppo mutex_enter(&vd->lock); 30411ae08745Sheppo vd->enabled = 0; 30421ae08745Sheppo mutex_exit(&vd->lock); 30431ae08745Sheppo } 30441ae08745Sheppo 3045d10e4ef2Snarayan /* Drain and destroy start queue (*before* destroying completionq) */ 3046d10e4ef2Snarayan if (vd->startq != NULL) 3047d10e4ef2Snarayan ddi_taskq_destroy(vd->startq); /* waits for queued tasks */ 3048d10e4ef2Snarayan 3049d10e4ef2Snarayan /* Drain and destroy completion queue (*before* shutting down LDC) */ 3050d10e4ef2Snarayan if (vd->completionq != NULL) 3051d10e4ef2Snarayan ddi_taskq_destroy(vd->completionq); /* waits for tasks */ 3052d10e4ef2Snarayan 30533af08d82Slm66018 vd_free_dring_task(vd); 30543af08d82Slm66018 305534683adeSsg70180 /* Free the inband task memory handle */ 305634683adeSsg70180 (void) ldc_mem_free_handle(vd->inband_task.mhdl); 305734683adeSsg70180 305834683adeSsg70180 /* Shut down LDC */ 305934683adeSsg70180 if (vd->initialized & VD_LDC) { 306034683adeSsg70180 /* unmap the dring */ 306134683adeSsg70180 if (vd->initialized & VD_DRING) 306234683adeSsg70180 (void) ldc_mem_dring_unmap(vd->dring_handle); 306334683adeSsg70180 306434683adeSsg70180 /* close LDC channel - retry on EAGAIN */ 306534683adeSsg70180 while ((rv = ldc_close(vd->ldc_handle)) == EAGAIN) { 306634683adeSsg70180 if (++retry > vds_ldc_retries) { 306734683adeSsg70180 PR0("Timed out closing channel"); 306834683adeSsg70180 break; 306934683adeSsg70180 } 307034683adeSsg70180 drv_usecwait(vds_ldc_delay); 307134683adeSsg70180 } 307234683adeSsg70180 if (rv == 0) { 307334683adeSsg70180 (void) ldc_unreg_callback(vd->ldc_handle); 307434683adeSsg70180 (void) ldc_fini(vd->ldc_handle); 307534683adeSsg70180 } else { 307634683adeSsg70180 /* 307734683adeSsg70180 * Closing the LDC channel has failed. Ideally we should 307834683adeSsg70180 * fail here but there is no Zeus level infrastructure 307934683adeSsg70180 * to handle this. The MD has already been changed and 308034683adeSsg70180 * we have to do the close. So we try to do as much 308134683adeSsg70180 * clean up as we can. 308234683adeSsg70180 */ 308334683adeSsg70180 (void) ldc_set_cb_mode(vd->ldc_handle, LDC_CB_DISABLE); 308434683adeSsg70180 while (ldc_unreg_callback(vd->ldc_handle) == EAGAIN) 308534683adeSsg70180 drv_usecwait(vds_ldc_delay); 308634683adeSsg70180 } 308734683adeSsg70180 } 308834683adeSsg70180 30893af08d82Slm66018 /* Free the staging buffer for msgs */ 30903af08d82Slm66018 if (vd->vio_msgp != NULL) { 30913af08d82Slm66018 kmem_free(vd->vio_msgp, vd->max_msglen); 30923af08d82Slm66018 vd->vio_msgp = NULL; 30933af08d82Slm66018 } 30943af08d82Slm66018 30953af08d82Slm66018 /* Free the inband message buffer */ 30963af08d82Slm66018 if (vd->inband_task.msg != NULL) { 30973af08d82Slm66018 kmem_free(vd->inband_task.msg, vd->max_msglen); 30983af08d82Slm66018 vd->inband_task.msg = NULL; 3099d10e4ef2Snarayan } 3100*3c96341aSnarayan if (vd->initialized & VD_DISK_READY) { 3101*3c96341aSnarayan if (vd->file) { 3102*3c96341aSnarayan /* Unmap and close file */ 3103*3c96341aSnarayan (void) as_unmap(&kas, vd->file_maddr, vd->file_size); 3104*3c96341aSnarayan (void) VOP_CLOSE(vd->file_vnode, vd_open_flags, 1, 3105*3c96341aSnarayan 0, kcred); 3106*3c96341aSnarayan VN_RELE(vd->file_vnode); 3107*3c96341aSnarayan } else { 31081ae08745Sheppo /* Close any open backing-device slices */ 31091ae08745Sheppo for (uint_t slice = 0; slice < vd->nslices; slice++) { 31101ae08745Sheppo if (vd->ldi_handle[slice] != NULL) { 31111ae08745Sheppo PR0("Closing slice %u", slice); 31121ae08745Sheppo (void) ldi_close(vd->ldi_handle[slice], 31134bac2208Snarayan vd_open_flags | FNDELAY, kcred); 31141ae08745Sheppo } 31151ae08745Sheppo } 3116*3c96341aSnarayan } 3117*3c96341aSnarayan } 31181ae08745Sheppo 31191ae08745Sheppo /* Free lock */ 31201ae08745Sheppo if (vd->initialized & VD_LOCKING) 31211ae08745Sheppo mutex_destroy(&vd->lock); 31221ae08745Sheppo 31231ae08745Sheppo /* Finally, free the vdisk structure itself */ 31241ae08745Sheppo kmem_free(vd, sizeof (*vd)); 31251ae08745Sheppo } 31261ae08745Sheppo 31271ae08745Sheppo static int 3128e1ebb9ecSlm66018 vds_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id) 31291ae08745Sheppo { 31301ae08745Sheppo int status; 31311ae08745Sheppo vd_t *vd = NULL; 31321ae08745Sheppo 31331ae08745Sheppo 3134e1ebb9ecSlm66018 if ((status = vds_do_init_vd(vds, id, device_path, ldc_id, &vd)) != 0) 31351ae08745Sheppo vds_destroy_vd(vd); 31361ae08745Sheppo 31371ae08745Sheppo return (status); 31381ae08745Sheppo } 31391ae08745Sheppo 31401ae08745Sheppo static int 31411ae08745Sheppo vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel, 31421ae08745Sheppo uint64_t *ldc_id) 31431ae08745Sheppo { 31441ae08745Sheppo int num_channels; 31451ae08745Sheppo 31461ae08745Sheppo 31471ae08745Sheppo /* Look for channel endpoint child(ren) of the vdisk MD node */ 31481ae08745Sheppo if ((num_channels = md_scan_dag(md, vd_node, 31491ae08745Sheppo md_find_name(md, VD_CHANNEL_ENDPOINT), 31501ae08745Sheppo md_find_name(md, "fwd"), channel)) <= 0) { 31511ae08745Sheppo PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); 31521ae08745Sheppo return (-1); 31531ae08745Sheppo } 31541ae08745Sheppo 31551ae08745Sheppo /* Get the "id" value for the first channel endpoint node */ 31561ae08745Sheppo if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) { 31571ae08745Sheppo PRN("No \"%s\" property found for \"%s\" of vdisk", 31581ae08745Sheppo VD_ID_PROP, VD_CHANNEL_ENDPOINT); 31591ae08745Sheppo return (-1); 31601ae08745Sheppo } 31611ae08745Sheppo 31621ae08745Sheppo if (num_channels > 1) { 31631ae08745Sheppo PRN("Using ID of first of multiple channels for this vdisk"); 31641ae08745Sheppo } 31651ae08745Sheppo 31661ae08745Sheppo return (0); 31671ae08745Sheppo } 31681ae08745Sheppo 31691ae08745Sheppo static int 31701ae08745Sheppo vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id) 31711ae08745Sheppo { 31721ae08745Sheppo int num_nodes, status; 31731ae08745Sheppo size_t size; 31741ae08745Sheppo mde_cookie_t *channel; 31751ae08745Sheppo 31761ae08745Sheppo 31771ae08745Sheppo if ((num_nodes = md_node_count(md)) <= 0) { 31781ae08745Sheppo PRN("Invalid node count in Machine Description subtree"); 31791ae08745Sheppo return (-1); 31801ae08745Sheppo } 31811ae08745Sheppo size = num_nodes*(sizeof (*channel)); 31821ae08745Sheppo channel = kmem_zalloc(size, KM_SLEEP); 31831ae08745Sheppo status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id); 31841ae08745Sheppo kmem_free(channel, size); 31851ae08745Sheppo 31861ae08745Sheppo return (status); 31871ae08745Sheppo } 31881ae08745Sheppo 31891ae08745Sheppo static void 31901ae08745Sheppo vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 31911ae08745Sheppo { 3192e1ebb9ecSlm66018 char *device_path = NULL; 31931ae08745Sheppo uint64_t id = 0, ldc_id = 0; 31941ae08745Sheppo 31951ae08745Sheppo 31961ae08745Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 31971ae08745Sheppo PRN("Error getting vdisk \"%s\"", VD_ID_PROP); 31981ae08745Sheppo return; 31991ae08745Sheppo } 32001ae08745Sheppo PR0("Adding vdisk ID %lu", id); 32011ae08745Sheppo if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, 3202e1ebb9ecSlm66018 &device_path) != 0) { 32031ae08745Sheppo PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 32041ae08745Sheppo return; 32051ae08745Sheppo } 32061ae08745Sheppo 32071ae08745Sheppo if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) { 32081ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", id); 32091ae08745Sheppo return; 32101ae08745Sheppo } 32111ae08745Sheppo 3212e1ebb9ecSlm66018 if (vds_init_vd(vds, id, device_path, ldc_id) != 0) { 32131ae08745Sheppo PRN("Failed to add vdisk ID %lu", id); 32141ae08745Sheppo return; 32151ae08745Sheppo } 32161ae08745Sheppo } 32171ae08745Sheppo 32181ae08745Sheppo static void 32191ae08745Sheppo vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node) 32201ae08745Sheppo { 32211ae08745Sheppo uint64_t id = 0; 32221ae08745Sheppo 32231ae08745Sheppo 32241ae08745Sheppo if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) { 32251ae08745Sheppo PRN("Unable to get \"%s\" property from vdisk's MD node", 32261ae08745Sheppo VD_ID_PROP); 32271ae08745Sheppo return; 32281ae08745Sheppo } 32291ae08745Sheppo PR0("Removing vdisk ID %lu", id); 32301ae08745Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0) 32311ae08745Sheppo PRN("No vdisk entry found for vdisk ID %lu", id); 32321ae08745Sheppo } 32331ae08745Sheppo 32341ae08745Sheppo static void 32351ae08745Sheppo vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node, 32361ae08745Sheppo md_t *curr_md, mde_cookie_t curr_vd_node) 32371ae08745Sheppo { 32381ae08745Sheppo char *curr_dev, *prev_dev; 32391ae08745Sheppo uint64_t curr_id = 0, curr_ldc_id = 0; 32401ae08745Sheppo uint64_t prev_id = 0, prev_ldc_id = 0; 32411ae08745Sheppo size_t len; 32421ae08745Sheppo 32431ae08745Sheppo 32441ae08745Sheppo /* Validate that vdisk ID has not changed */ 32451ae08745Sheppo if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) { 32461ae08745Sheppo PRN("Error getting previous vdisk \"%s\" property", 32471ae08745Sheppo VD_ID_PROP); 32481ae08745Sheppo return; 32491ae08745Sheppo } 32501ae08745Sheppo if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) { 32511ae08745Sheppo PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP); 32521ae08745Sheppo return; 32531ae08745Sheppo } 32541ae08745Sheppo if (curr_id != prev_id) { 32551ae08745Sheppo PRN("Not changing vdisk: ID changed from %lu to %lu", 32561ae08745Sheppo prev_id, curr_id); 32571ae08745Sheppo return; 32581ae08745Sheppo } 32591ae08745Sheppo 32601ae08745Sheppo /* Validate that LDC ID has not changed */ 32611ae08745Sheppo if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) { 32621ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", prev_id); 32631ae08745Sheppo return; 32641ae08745Sheppo } 32651ae08745Sheppo 32661ae08745Sheppo if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) { 32671ae08745Sheppo PRN("Error getting LDC ID for vdisk %lu", curr_id); 32681ae08745Sheppo return; 32691ae08745Sheppo } 32701ae08745Sheppo if (curr_ldc_id != prev_ldc_id) { 32710a55fbb7Slm66018 _NOTE(NOTREACHED); /* lint is confused */ 32721ae08745Sheppo PRN("Not changing vdisk: " 32731ae08745Sheppo "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); 32741ae08745Sheppo return; 32751ae08745Sheppo } 32761ae08745Sheppo 32771ae08745Sheppo /* Determine whether device path has changed */ 32781ae08745Sheppo if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, 32791ae08745Sheppo &prev_dev) != 0) { 32801ae08745Sheppo PRN("Error getting previous vdisk \"%s\"", 32811ae08745Sheppo VD_BLOCK_DEVICE_PROP); 32821ae08745Sheppo return; 32831ae08745Sheppo } 32841ae08745Sheppo if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, 32851ae08745Sheppo &curr_dev) != 0) { 32861ae08745Sheppo PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); 32871ae08745Sheppo return; 32881ae08745Sheppo } 32891ae08745Sheppo if (((len = strlen(curr_dev)) == strlen(prev_dev)) && 32901ae08745Sheppo (strncmp(curr_dev, prev_dev, len) == 0)) 32911ae08745Sheppo return; /* no relevant (supported) change */ 32921ae08745Sheppo 32931ae08745Sheppo PR0("Changing vdisk ID %lu", prev_id); 32943af08d82Slm66018 32951ae08745Sheppo /* Remove old state, which will close vdisk and reset */ 32961ae08745Sheppo if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0) 32971ae08745Sheppo PRN("No entry found for vdisk ID %lu", prev_id); 32983af08d82Slm66018 32991ae08745Sheppo /* Re-initialize vdisk with new state */ 33001ae08745Sheppo if (vds_init_vd(vds, curr_id, curr_dev, curr_ldc_id) != 0) { 33011ae08745Sheppo PRN("Failed to change vdisk ID %lu", curr_id); 33021ae08745Sheppo return; 33031ae08745Sheppo } 33041ae08745Sheppo } 33051ae08745Sheppo 33061ae08745Sheppo static int 33071ae08745Sheppo vds_process_md(void *arg, mdeg_result_t *md) 33081ae08745Sheppo { 33091ae08745Sheppo int i; 33101ae08745Sheppo vds_t *vds = arg; 33111ae08745Sheppo 33121ae08745Sheppo 33131ae08745Sheppo if (md == NULL) 33141ae08745Sheppo return (MDEG_FAILURE); 33151ae08745Sheppo ASSERT(vds != NULL); 33161ae08745Sheppo 33171ae08745Sheppo for (i = 0; i < md->removed.nelem; i++) 33181ae08745Sheppo vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]); 33191ae08745Sheppo for (i = 0; i < md->match_curr.nelem; i++) 33201ae08745Sheppo vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i], 33211ae08745Sheppo md->match_curr.mdp, md->match_curr.mdep[i]); 33221ae08745Sheppo for (i = 0; i < md->added.nelem; i++) 33231ae08745Sheppo vds_add_vd(vds, md->added.mdp, md->added.mdep[i]); 33241ae08745Sheppo 33251ae08745Sheppo return (MDEG_SUCCESS); 33261ae08745Sheppo } 33271ae08745Sheppo 3328*3c96341aSnarayan 33291ae08745Sheppo static int 33301ae08745Sheppo vds_do_attach(dev_info_t *dip) 33311ae08745Sheppo { 3332445b4c2eSsb155480 int status, sz; 3333445b4c2eSsb155480 int cfg_handle; 33341ae08745Sheppo minor_t instance = ddi_get_instance(dip); 33351ae08745Sheppo vds_t *vds; 3336445b4c2eSsb155480 mdeg_prop_spec_t *pspecp; 3337445b4c2eSsb155480 mdeg_node_spec_t *ispecp; 33381ae08745Sheppo 33391ae08745Sheppo /* 33401ae08745Sheppo * The "cfg-handle" property of a vds node in an MD contains the MD's 33411ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 33421ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 33431ae08745Sheppo * the "reg" property on the node in the device tree it builds from 33441ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 33451ae08745Sheppo * "reg" property value to uniquely identify this device instance when 33461ae08745Sheppo * registering with the MD event-generation framework. If the "reg" 33471ae08745Sheppo * property cannot be found, the device tree state is presumably so 33481ae08745Sheppo * broken that there is no point in continuing. 33491ae08745Sheppo */ 3350445b4c2eSsb155480 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 3351445b4c2eSsb155480 VD_REG_PROP)) { 3352445b4c2eSsb155480 PRN("vds \"%s\" property does not exist", VD_REG_PROP); 33531ae08745Sheppo return (DDI_FAILURE); 33541ae08745Sheppo } 33551ae08745Sheppo 33561ae08745Sheppo /* Get the MD instance for later MDEG registration */ 33571ae08745Sheppo cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 3358445b4c2eSsb155480 VD_REG_PROP, -1); 33591ae08745Sheppo 33601ae08745Sheppo if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) { 33611ae08745Sheppo PRN("Could not allocate state for instance %u", instance); 33621ae08745Sheppo return (DDI_FAILURE); 33631ae08745Sheppo } 33641ae08745Sheppo 33651ae08745Sheppo if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) { 33661ae08745Sheppo PRN("Could not get state for instance %u", instance); 33671ae08745Sheppo ddi_soft_state_free(vds_state, instance); 33681ae08745Sheppo return (DDI_FAILURE); 33691ae08745Sheppo } 33701ae08745Sheppo 3371*3c96341aSnarayan 33721ae08745Sheppo vds->dip = dip; 33731ae08745Sheppo vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, 33741ae08745Sheppo vds_destroy_vd, 33751ae08745Sheppo sizeof (void *)); 33761ae08745Sheppo ASSERT(vds->vd_table != NULL); 33771ae08745Sheppo 33781ae08745Sheppo if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { 33791ae08745Sheppo PRN("ldi_ident_from_dip() returned errno %d", status); 33801ae08745Sheppo return (DDI_FAILURE); 33811ae08745Sheppo } 33821ae08745Sheppo vds->initialized |= VDS_LDI; 33831ae08745Sheppo 33841ae08745Sheppo /* Register for MD updates */ 3385445b4c2eSsb155480 sz = sizeof (vds_prop_template); 3386445b4c2eSsb155480 pspecp = kmem_alloc(sz, KM_SLEEP); 3387445b4c2eSsb155480 bcopy(vds_prop_template, pspecp, sz); 3388445b4c2eSsb155480 3389445b4c2eSsb155480 VDS_SET_MDEG_PROP_INST(pspecp, cfg_handle); 3390445b4c2eSsb155480 3391445b4c2eSsb155480 /* initialize the complete prop spec structure */ 3392445b4c2eSsb155480 ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 3393445b4c2eSsb155480 ispecp->namep = "virtual-device"; 3394445b4c2eSsb155480 ispecp->specp = pspecp; 3395445b4c2eSsb155480 3396445b4c2eSsb155480 if (mdeg_register(ispecp, &vd_match, vds_process_md, vds, 33971ae08745Sheppo &vds->mdeg) != MDEG_SUCCESS) { 33981ae08745Sheppo PRN("Unable to register for MD updates"); 3399445b4c2eSsb155480 kmem_free(ispecp, sizeof (mdeg_node_spec_t)); 3400445b4c2eSsb155480 kmem_free(pspecp, sz); 34011ae08745Sheppo return (DDI_FAILURE); 34021ae08745Sheppo } 3403445b4c2eSsb155480 3404445b4c2eSsb155480 vds->ispecp = ispecp; 34051ae08745Sheppo vds->initialized |= VDS_MDEG; 34061ae08745Sheppo 34070a55fbb7Slm66018 /* Prevent auto-detaching so driver is available whenever MD changes */ 34080a55fbb7Slm66018 if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 34090a55fbb7Slm66018 DDI_PROP_SUCCESS) { 34100a55fbb7Slm66018 PRN("failed to set \"%s\" property for instance %u", 34110a55fbb7Slm66018 DDI_NO_AUTODETACH, instance); 34120a55fbb7Slm66018 } 34130a55fbb7Slm66018 34141ae08745Sheppo ddi_report_dev(dip); 34151ae08745Sheppo return (DDI_SUCCESS); 34161ae08745Sheppo } 34171ae08745Sheppo 34181ae08745Sheppo static int 34191ae08745Sheppo vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 34201ae08745Sheppo { 34211ae08745Sheppo int status; 34221ae08745Sheppo 34231ae08745Sheppo switch (cmd) { 34241ae08745Sheppo case DDI_ATTACH: 3425d10e4ef2Snarayan PR0("Attaching"); 34261ae08745Sheppo if ((status = vds_do_attach(dip)) != DDI_SUCCESS) 34271ae08745Sheppo (void) vds_detach(dip, DDI_DETACH); 34281ae08745Sheppo return (status); 34291ae08745Sheppo case DDI_RESUME: 3430d10e4ef2Snarayan PR0("No action required for DDI_RESUME"); 34311ae08745Sheppo return (DDI_SUCCESS); 34321ae08745Sheppo default: 34331ae08745Sheppo return (DDI_FAILURE); 34341ae08745Sheppo } 34351ae08745Sheppo } 34361ae08745Sheppo 34371ae08745Sheppo static struct dev_ops vds_ops = { 34381ae08745Sheppo DEVO_REV, /* devo_rev */ 34391ae08745Sheppo 0, /* devo_refcnt */ 34401ae08745Sheppo ddi_no_info, /* devo_getinfo */ 34411ae08745Sheppo nulldev, /* devo_identify */ 34421ae08745Sheppo nulldev, /* devo_probe */ 34431ae08745Sheppo vds_attach, /* devo_attach */ 34441ae08745Sheppo vds_detach, /* devo_detach */ 34451ae08745Sheppo nodev, /* devo_reset */ 34461ae08745Sheppo NULL, /* devo_cb_ops */ 34471ae08745Sheppo NULL, /* devo_bus_ops */ 34481ae08745Sheppo nulldev /* devo_power */ 34491ae08745Sheppo }; 34501ae08745Sheppo 34511ae08745Sheppo static struct modldrv modldrv = { 34521ae08745Sheppo &mod_driverops, 34531ae08745Sheppo "virtual disk server v%I%", 34541ae08745Sheppo &vds_ops, 34551ae08745Sheppo }; 34561ae08745Sheppo 34571ae08745Sheppo static struct modlinkage modlinkage = { 34581ae08745Sheppo MODREV_1, 34591ae08745Sheppo &modldrv, 34601ae08745Sheppo NULL 34611ae08745Sheppo }; 34621ae08745Sheppo 34631ae08745Sheppo 34641ae08745Sheppo int 34651ae08745Sheppo _init(void) 34661ae08745Sheppo { 34671ae08745Sheppo int i, status; 34681ae08745Sheppo 3469d10e4ef2Snarayan 34701ae08745Sheppo if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0) 34711ae08745Sheppo return (status); 34721ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) { 34731ae08745Sheppo ddi_soft_state_fini(&vds_state); 34741ae08745Sheppo return (status); 34751ae08745Sheppo } 34761ae08745Sheppo 34771ae08745Sheppo /* Fill in the bit-mask of server-supported operations */ 34781ae08745Sheppo for (i = 0; i < vds_noperations; i++) 34791ae08745Sheppo vds_operations |= 1 << (vds_operation[i].operation - 1); 34801ae08745Sheppo 34811ae08745Sheppo return (0); 34821ae08745Sheppo } 34831ae08745Sheppo 34841ae08745Sheppo int 34851ae08745Sheppo _info(struct modinfo *modinfop) 34861ae08745Sheppo { 34871ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 34881ae08745Sheppo } 34891ae08745Sheppo 34901ae08745Sheppo int 34911ae08745Sheppo _fini(void) 34921ae08745Sheppo { 34931ae08745Sheppo int status; 34941ae08745Sheppo 3495d10e4ef2Snarayan 34961ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 34971ae08745Sheppo return (status); 34981ae08745Sheppo ddi_soft_state_fini(&vds_state); 34991ae08745Sheppo return (0); 35001ae08745Sheppo } 3501