11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23edcc0754Sachartre * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo #pragma ident "%Z%%M% %I% %E% SMI" 281ae08745Sheppo 291ae08745Sheppo /* 301ae08745Sheppo * LDoms virtual disk client (vdc) device driver 311ae08745Sheppo * 321ae08745Sheppo * This driver runs on a guest logical domain and communicates with the virtual 331ae08745Sheppo * disk server (vds) driver running on the service domain which is exporting 341ae08745Sheppo * virtualized "disks" to the guest logical domain. 351ae08745Sheppo * 361ae08745Sheppo * The driver can be divided into four sections: 371ae08745Sheppo * 381ae08745Sheppo * 1) generic device driver housekeeping 391ae08745Sheppo * _init, _fini, attach, detach, ops structures, etc. 401ae08745Sheppo * 411ae08745Sheppo * 2) communication channel setup 421ae08745Sheppo * Setup the communications link over the LDC channel that vdc uses to 431ae08745Sheppo * talk to the vDisk server. Initialise the descriptor ring which 441ae08745Sheppo * allows the LDC clients to transfer data via memory mappings. 451ae08745Sheppo * 461ae08745Sheppo * 3) Support exported to upper layers (filesystems, etc) 471ae08745Sheppo * The upper layers call into vdc via strategy(9E) and DKIO(7I) 481ae08745Sheppo * ioctl calls. vdc will copy the data to be written to the descriptor 491ae08745Sheppo * ring or maps the buffer to store the data read by the vDisk 501ae08745Sheppo * server into the descriptor ring. It then sends a message to the 511ae08745Sheppo * vDisk server requesting it to complete the operation. 521ae08745Sheppo * 531ae08745Sheppo * 4) Handling responses from vDisk server. 541ae08745Sheppo * The vDisk server will ACK some or all of the messages vdc sends to it 551ae08745Sheppo * (this is configured during the handshake). Upon receipt of an ACK 561ae08745Sheppo * vdc will check the descriptor ring and signal to the upper layer 571ae08745Sheppo * code waiting on the IO. 581ae08745Sheppo */ 591ae08745Sheppo 60e1ebb9ecSlm66018 #include <sys/atomic.h> 611ae08745Sheppo #include <sys/conf.h> 621ae08745Sheppo #include <sys/disp.h> 631ae08745Sheppo #include <sys/ddi.h> 641ae08745Sheppo #include <sys/dkio.h> 651ae08745Sheppo #include <sys/efi_partition.h> 661ae08745Sheppo #include <sys/fcntl.h> 671ae08745Sheppo #include <sys/file.h> 68366a92acSlm66018 #include <sys/kstat.h> 691ae08745Sheppo #include <sys/mach_descrip.h> 701ae08745Sheppo #include <sys/modctl.h> 711ae08745Sheppo #include <sys/mdeg.h> 721ae08745Sheppo #include <sys/note.h> 731ae08745Sheppo #include <sys/open.h> 74d10e4ef2Snarayan #include <sys/sdt.h> 751ae08745Sheppo #include <sys/stat.h> 761ae08745Sheppo #include <sys/sunddi.h> 771ae08745Sheppo #include <sys/types.h> 781ae08745Sheppo #include <sys/promif.h> 792f5224aeSachartre #include <sys/var.h> 801ae08745Sheppo #include <sys/vtoc.h> 811ae08745Sheppo #include <sys/archsystm.h> 821ae08745Sheppo #include <sys/sysmacros.h> 831ae08745Sheppo 841ae08745Sheppo #include <sys/cdio.h> 851ae08745Sheppo #include <sys/dktp/fdisk.h> 8687a7269eSachartre #include <sys/dktp/dadkio.h> 872f5224aeSachartre #include <sys/mhd.h> 881ae08745Sheppo #include <sys/scsi/generic/sense.h> 892f5224aeSachartre #include <sys/scsi/impl/uscsi.h> 902f5224aeSachartre #include <sys/scsi/impl/services.h> 912f5224aeSachartre #include <sys/scsi/targets/sddef.h> 921ae08745Sheppo 931ae08745Sheppo #include <sys/ldoms.h> 941ae08745Sheppo #include <sys/ldc.h> 951ae08745Sheppo #include <sys/vio_common.h> 961ae08745Sheppo #include <sys/vio_mailbox.h> 9717cadca8Slm66018 #include <sys/vio_util.h> 981ae08745Sheppo #include <sys/vdsk_common.h> 991ae08745Sheppo #include <sys/vdsk_mailbox.h> 1001ae08745Sheppo #include <sys/vdc.h> 1011ae08745Sheppo 1021ae08745Sheppo /* 1031ae08745Sheppo * function prototypes 1041ae08745Sheppo */ 1051ae08745Sheppo 1061ae08745Sheppo /* standard driver functions */ 1071ae08745Sheppo static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 1081ae08745Sheppo static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 1091ae08745Sheppo static int vdc_strategy(struct buf *buf); 1101ae08745Sheppo static int vdc_print(dev_t dev, char *str); 1111ae08745Sheppo static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 1121ae08745Sheppo static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 1131ae08745Sheppo static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 1141ae08745Sheppo static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1151ae08745Sheppo cred_t *credp, int *rvalp); 1161ae08745Sheppo static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 1171ae08745Sheppo static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 1181ae08745Sheppo 1191ae08745Sheppo static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 1201ae08745Sheppo void *arg, void **resultp); 1211ae08745Sheppo static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 1221ae08745Sheppo static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 1231ae08745Sheppo 1241ae08745Sheppo /* setup */ 1250d0c8d4bSnarayan static void vdc_min(struct buf *bufp); 1260a55fbb7Slm66018 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 127655fd6a9Sachartre static int vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node); 1281ae08745Sheppo static int vdc_start_ldc_connection(vdc_t *vdc); 1291ae08745Sheppo static int vdc_create_device_nodes(vdc_t *vdc); 1304bac2208Snarayan static int vdc_create_device_nodes_efi(vdc_t *vdc); 1314bac2208Snarayan static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 1321ae08745Sheppo static int vdc_create_device_nodes_props(vdc_t *vdc); 133366a92acSlm66018 static void vdc_create_io_kstats(vdc_t *vdc); 134366a92acSlm66018 static void vdc_create_err_kstats(vdc_t *vdc); 135366a92acSlm66018 static void vdc_set_err_kstats(vdc_t *vdc); 136655fd6a9Sachartre static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 137655fd6a9Sachartre mde_cookie_t *vd_nodep, mde_cookie_t *vd_portp); 138655fd6a9Sachartre static int vdc_get_ldc_id(md_t *, mde_cookie_t, uint64_t *); 1390a55fbb7Slm66018 static int vdc_do_ldc_up(vdc_t *vdc); 1401ae08745Sheppo static void vdc_terminate_ldc(vdc_t *vdc); 1411ae08745Sheppo static int vdc_init_descriptor_ring(vdc_t *vdc); 1421ae08745Sheppo static void vdc_destroy_descriptor_ring(vdc_t *vdc); 1434bac2208Snarayan static int vdc_setup_devid(vdc_t *vdc); 144edcc0754Sachartre static void vdc_store_label_efi(vdc_t *, efi_gpt_t *, efi_gpe_t *); 14578fcd0a1Sachartre static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, struct vtoc *); 14678fcd0a1Sachartre static void vdc_store_label_unk(vdc_t *vdc); 14778fcd0a1Sachartre static boolean_t vdc_is_opened(vdc_t *vdc); 1481ae08745Sheppo 1491ae08745Sheppo /* handshake with vds */ 1500a55fbb7Slm66018 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 1513af08d82Slm66018 static int vdc_ver_negotiation(vdc_t *vdcp); 1521ae08745Sheppo static int vdc_init_attr_negotiation(vdc_t *vdc); 1533af08d82Slm66018 static int vdc_attr_negotiation(vdc_t *vdcp); 1541ae08745Sheppo static int vdc_init_dring_negotiate(vdc_t *vdc); 1553af08d82Slm66018 static int vdc_dring_negotiation(vdc_t *vdcp); 1563af08d82Slm66018 static int vdc_send_rdx(vdc_t *vdcp); 1573af08d82Slm66018 static int vdc_rdx_exchange(vdc_t *vdcp); 1580a55fbb7Slm66018 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 1591ae08745Sheppo 1600a55fbb7Slm66018 /* processing incoming messages from vDisk server */ 1611ae08745Sheppo static void vdc_process_msg_thread(vdc_t *vdc); 1623af08d82Slm66018 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 1633af08d82Slm66018 1640a55fbb7Slm66018 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 1653af08d82Slm66018 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 1660a55fbb7Slm66018 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 1670a55fbb7Slm66018 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 1680a55fbb7Slm66018 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 1693af08d82Slm66018 static int vdc_send_request(vdc_t *vdcp, int operation, 1703af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 1713af08d82Slm66018 int cb_type, void *cb_arg, vio_desc_direction_t dir); 1723af08d82Slm66018 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 1733af08d82Slm66018 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 1743af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 1753af08d82Slm66018 int cb_type, void *cb_arg, vio_desc_direction_t dir); 1762f5224aeSachartre static int vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, 1772f5224aeSachartre size_t nbytes, int slice, diskaddr_t offset, int cb_type, 1782f5224aeSachartre void *cb_arg, vio_desc_direction_t dir, boolean_t); 1793af08d82Slm66018 1803af08d82Slm66018 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 1813af08d82Slm66018 static int vdc_drain_response(vdc_t *vdcp); 1821ae08745Sheppo static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 1833af08d82Slm66018 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 184e1ebb9ecSlm66018 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 1851ae08745Sheppo 1861ae08745Sheppo /* dkio */ 1872f5224aeSachartre static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, 1882f5224aeSachartre int *rvalp); 189edcc0754Sachartre static int vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg); 19078fcd0a1Sachartre static void vdc_create_fake_geometry(vdc_t *vdc); 19178fcd0a1Sachartre static int vdc_validate_geometry(vdc_t *vdc); 19278fcd0a1Sachartre static void vdc_validate(vdc_t *vdc); 19378fcd0a1Sachartre static void vdc_validate_task(void *arg); 194d10e4ef2Snarayan static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 195d10e4ef2Snarayan int mode, int dir); 1964bac2208Snarayan static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 1974bac2208Snarayan int mode, int dir); 1984bac2208Snarayan static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 1994bac2208Snarayan int mode, int dir); 200d10e4ef2Snarayan static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 201d10e4ef2Snarayan int mode, int dir); 202d10e4ef2Snarayan static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 203d10e4ef2Snarayan int mode, int dir); 204d10e4ef2Snarayan static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 205d10e4ef2Snarayan int mode, int dir); 206d10e4ef2Snarayan static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 207d10e4ef2Snarayan int mode, int dir); 2084bac2208Snarayan static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 2094bac2208Snarayan int mode, int dir); 2104bac2208Snarayan static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 2114bac2208Snarayan int mode, int dir); 2121ae08745Sheppo 2132f5224aeSachartre static void vdc_ownership_update(vdc_t *vdc, int ownership_flags); 2142f5224aeSachartre static int vdc_access_set(vdc_t *vdc, uint64_t flags, int mode); 2152f5224aeSachartre static vdc_io_t *vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf); 2162f5224aeSachartre static int vdc_failfast_check_resv(vdc_t *vdc); 2172f5224aeSachartre 2181ae08745Sheppo /* 2191ae08745Sheppo * Module variables 2201ae08745Sheppo */ 221e1ebb9ecSlm66018 222e1ebb9ecSlm66018 /* 223e1ebb9ecSlm66018 * Tunable variables to control how long vdc waits before timing out on 224e1ebb9ecSlm66018 * various operations 225e1ebb9ecSlm66018 */ 2263c96341aSnarayan static int vdc_hshake_retries = 3; 227e1ebb9ecSlm66018 228655fd6a9Sachartre static int vdc_timeout = 0; /* units: seconds */ 229655fd6a9Sachartre 2303af08d82Slm66018 static uint64_t vdc_hz_min_ldc_delay; 2313af08d82Slm66018 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 2323af08d82Slm66018 static uint64_t vdc_hz_max_ldc_delay; 2333af08d82Slm66018 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 2343af08d82Slm66018 2353af08d82Slm66018 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 2363af08d82Slm66018 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 237e1ebb9ecSlm66018 238e1ebb9ecSlm66018 /* values for dumping - need to run in a tighter loop */ 239e1ebb9ecSlm66018 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 240e1ebb9ecSlm66018 static int vdc_dump_retries = 100; 241e1ebb9ecSlm66018 2422f5224aeSachartre static uint16_t vdc_scsi_timeout = 60; /* 60s units: seconds */ 2432f5224aeSachartre 2442f5224aeSachartre static uint64_t vdc_ownership_delay = 6 * MICROSEC; /* 6s units: usec */ 2452f5224aeSachartre 246e1ebb9ecSlm66018 /* Count of the number of vdc instances attached */ 247e1ebb9ecSlm66018 static volatile uint32_t vdc_instance_count = 0; 2481ae08745Sheppo 2492f5224aeSachartre /* Tunable to log all SCSI errors */ 2502f5224aeSachartre static boolean_t vdc_scsi_log_error = B_FALSE; 2512f5224aeSachartre 2521ae08745Sheppo /* Soft state pointer */ 2531ae08745Sheppo static void *vdc_state; 2541ae08745Sheppo 2553af08d82Slm66018 /* 2563af08d82Slm66018 * Controlling the verbosity of the error/debug messages 2573af08d82Slm66018 * 2583af08d82Slm66018 * vdc_msglevel - controls level of messages 2593af08d82Slm66018 * vdc_matchinst - 64-bit variable where each bit corresponds 2603af08d82Slm66018 * to the vdc instance the vdc_msglevel applies. 2613af08d82Slm66018 */ 2623af08d82Slm66018 int vdc_msglevel = 0x0; 2633af08d82Slm66018 uint64_t vdc_matchinst = 0ull; 2641ae08745Sheppo 2650a55fbb7Slm66018 /* 2660a55fbb7Slm66018 * Supported vDisk protocol version pairs. 2670a55fbb7Slm66018 * 2680a55fbb7Slm66018 * The first array entry is the latest and preferred version. 2690a55fbb7Slm66018 */ 27017cadca8Slm66018 static const vio_ver_t vdc_version[] = {{1, 1}}; 2711ae08745Sheppo 2721ae08745Sheppo static struct cb_ops vdc_cb_ops = { 2731ae08745Sheppo vdc_open, /* cb_open */ 2741ae08745Sheppo vdc_close, /* cb_close */ 2751ae08745Sheppo vdc_strategy, /* cb_strategy */ 2761ae08745Sheppo vdc_print, /* cb_print */ 2771ae08745Sheppo vdc_dump, /* cb_dump */ 2781ae08745Sheppo vdc_read, /* cb_read */ 2791ae08745Sheppo vdc_write, /* cb_write */ 2801ae08745Sheppo vdc_ioctl, /* cb_ioctl */ 2811ae08745Sheppo nodev, /* cb_devmap */ 2821ae08745Sheppo nodev, /* cb_mmap */ 2831ae08745Sheppo nodev, /* cb_segmap */ 2841ae08745Sheppo nochpoll, /* cb_chpoll */ 2851ae08745Sheppo ddi_prop_op, /* cb_prop_op */ 2861ae08745Sheppo NULL, /* cb_str */ 2871ae08745Sheppo D_MP | D_64BIT, /* cb_flag */ 2881ae08745Sheppo CB_REV, /* cb_rev */ 2891ae08745Sheppo vdc_aread, /* cb_aread */ 2901ae08745Sheppo vdc_awrite /* cb_awrite */ 2911ae08745Sheppo }; 2921ae08745Sheppo 2931ae08745Sheppo static struct dev_ops vdc_ops = { 2941ae08745Sheppo DEVO_REV, /* devo_rev */ 2951ae08745Sheppo 0, /* devo_refcnt */ 2961ae08745Sheppo vdc_getinfo, /* devo_getinfo */ 2971ae08745Sheppo nulldev, /* devo_identify */ 2981ae08745Sheppo nulldev, /* devo_probe */ 2991ae08745Sheppo vdc_attach, /* devo_attach */ 3001ae08745Sheppo vdc_detach, /* devo_detach */ 3011ae08745Sheppo nodev, /* devo_reset */ 3021ae08745Sheppo &vdc_cb_ops, /* devo_cb_ops */ 3031ae08745Sheppo NULL, /* devo_bus_ops */ 3041ae08745Sheppo nulldev /* devo_power */ 3051ae08745Sheppo }; 3061ae08745Sheppo 3071ae08745Sheppo static struct modldrv modldrv = { 3081ae08745Sheppo &mod_driverops, 309205eeb1aSlm66018 "virtual disk client", 3101ae08745Sheppo &vdc_ops, 3111ae08745Sheppo }; 3121ae08745Sheppo 3131ae08745Sheppo static struct modlinkage modlinkage = { 3141ae08745Sheppo MODREV_1, 3151ae08745Sheppo &modldrv, 3161ae08745Sheppo NULL 3171ae08745Sheppo }; 3181ae08745Sheppo 3191ae08745Sheppo /* -------------------------------------------------------------------------- */ 3201ae08745Sheppo 3211ae08745Sheppo /* 3221ae08745Sheppo * Device Driver housekeeping and setup 3231ae08745Sheppo */ 3241ae08745Sheppo 3251ae08745Sheppo int 3261ae08745Sheppo _init(void) 3271ae08745Sheppo { 3281ae08745Sheppo int status; 3291ae08745Sheppo 3301ae08745Sheppo if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 3311ae08745Sheppo return (status); 3321ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) 3331ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3341ae08745Sheppo return (status); 3351ae08745Sheppo } 3361ae08745Sheppo 3371ae08745Sheppo int 3381ae08745Sheppo _info(struct modinfo *modinfop) 3391ae08745Sheppo { 3401ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 3411ae08745Sheppo } 3421ae08745Sheppo 3431ae08745Sheppo int 3441ae08745Sheppo _fini(void) 3451ae08745Sheppo { 3461ae08745Sheppo int status; 3471ae08745Sheppo 3481ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 3491ae08745Sheppo return (status); 3501ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3511ae08745Sheppo return (0); 3521ae08745Sheppo } 3531ae08745Sheppo 3541ae08745Sheppo static int 3551ae08745Sheppo vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 3561ae08745Sheppo { 3571ae08745Sheppo _NOTE(ARGUNUSED(dip)) 3581ae08745Sheppo 3590d0c8d4bSnarayan int instance = VDCUNIT((dev_t)arg); 3601ae08745Sheppo vdc_t *vdc = NULL; 3611ae08745Sheppo 3621ae08745Sheppo switch (cmd) { 3631ae08745Sheppo case DDI_INFO_DEVT2DEVINFO: 3641ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 3651ae08745Sheppo *resultp = NULL; 3661ae08745Sheppo return (DDI_FAILURE); 3671ae08745Sheppo } 3681ae08745Sheppo *resultp = vdc->dip; 3691ae08745Sheppo return (DDI_SUCCESS); 3701ae08745Sheppo case DDI_INFO_DEVT2INSTANCE: 3711ae08745Sheppo *resultp = (void *)(uintptr_t)instance; 3721ae08745Sheppo return (DDI_SUCCESS); 3731ae08745Sheppo default: 3741ae08745Sheppo *resultp = NULL; 3751ae08745Sheppo return (DDI_FAILURE); 3761ae08745Sheppo } 3771ae08745Sheppo } 3781ae08745Sheppo 3791ae08745Sheppo static int 3801ae08745Sheppo vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3811ae08745Sheppo { 3822f5224aeSachartre kt_did_t failfast_tid, ownership_tid; 3831ae08745Sheppo int instance; 3841ae08745Sheppo int rv; 3851ae08745Sheppo vdc_t *vdc = NULL; 3861ae08745Sheppo 3871ae08745Sheppo switch (cmd) { 3881ae08745Sheppo case DDI_DETACH: 3891ae08745Sheppo /* the real work happens below */ 3901ae08745Sheppo break; 3911ae08745Sheppo case DDI_SUSPEND: 3921ae08745Sheppo /* nothing to do for this non-device */ 3931ae08745Sheppo return (DDI_SUCCESS); 3941ae08745Sheppo default: 3951ae08745Sheppo return (DDI_FAILURE); 3961ae08745Sheppo } 3971ae08745Sheppo 3981ae08745Sheppo ASSERT(cmd == DDI_DETACH); 3991ae08745Sheppo instance = ddi_get_instance(dip); 4003af08d82Slm66018 DMSGX(1, "[%d] Entered\n", instance); 4011ae08745Sheppo 4021ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 403e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 4041ae08745Sheppo return (DDI_FAILURE); 4051ae08745Sheppo } 4061ae08745Sheppo 4072f5224aeSachartre /* 4082f5224aeSachartre * This function is called when vdc is detached or if it has failed to 4092f5224aeSachartre * attach. In that case, the attach may have fail before the vdisk type 4102f5224aeSachartre * has been set so we can't call vdc_is_opened(). However as the attach 4112f5224aeSachartre * has failed, we know that the vdisk is not opened and we can safely 4122f5224aeSachartre * detach. 4132f5224aeSachartre */ 4142f5224aeSachartre if (vdc->vdisk_type != VD_DISK_TYPE_UNK && vdc_is_opened(vdc)) { 4153af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 4161ae08745Sheppo return (DDI_FAILURE); 4171ae08745Sheppo } 4181ae08745Sheppo 41978fcd0a1Sachartre if (vdc->dkio_flush_pending) { 42078fcd0a1Sachartre DMSG(vdc, 0, 42178fcd0a1Sachartre "[%d] Cannot detach: %d outstanding DKIO flushes\n", 42278fcd0a1Sachartre instance, vdc->dkio_flush_pending); 42378fcd0a1Sachartre return (DDI_FAILURE); 42478fcd0a1Sachartre } 42578fcd0a1Sachartre 42678fcd0a1Sachartre if (vdc->validate_pending) { 42778fcd0a1Sachartre DMSG(vdc, 0, 42878fcd0a1Sachartre "[%d] Cannot detach: %d outstanding validate request\n", 42978fcd0a1Sachartre instance, vdc->validate_pending); 43078fcd0a1Sachartre return (DDI_FAILURE); 43178fcd0a1Sachartre } 43278fcd0a1Sachartre 4333af08d82Slm66018 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 4343af08d82Slm66018 4352f5224aeSachartre /* If we took ownership, release ownership */ 4362f5224aeSachartre mutex_enter(&vdc->ownership_lock); 4372f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) { 4382f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, FKIOCTL); 4392f5224aeSachartre if (rv == 0) { 4402f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 4412f5224aeSachartre } 4422f5224aeSachartre } 4432f5224aeSachartre mutex_exit(&vdc->ownership_lock); 4442f5224aeSachartre 4453af08d82Slm66018 /* mark instance as detaching */ 4463af08d82Slm66018 vdc->lifecycle = VDC_LC_DETACHING; 4471ae08745Sheppo 4481ae08745Sheppo /* 4491ae08745Sheppo * try and disable callbacks to prevent another handshake 4501ae08745Sheppo */ 4511ae08745Sheppo rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 4523af08d82Slm66018 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 4531ae08745Sheppo 4541ae08745Sheppo if (vdc->initialized & VDC_THREAD) { 4553af08d82Slm66018 mutex_enter(&vdc->read_lock); 4563af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 4573af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) { 4583af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 4593af08d82Slm66018 cv_signal(&vdc->read_cv); 4601ae08745Sheppo } 4613af08d82Slm66018 4623af08d82Slm66018 mutex_exit(&vdc->read_lock); 4633af08d82Slm66018 4643af08d82Slm66018 /* wake up any thread waiting for connection to come online */ 4653af08d82Slm66018 mutex_enter(&vdc->lock); 4663af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 4673af08d82Slm66018 DMSG(vdc, 0, 4683af08d82Slm66018 "[%d] write reset - move to resetting state...\n", 4693af08d82Slm66018 instance); 4703af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 4713af08d82Slm66018 cv_signal(&vdc->initwait_cv); 4723af08d82Slm66018 } 4733af08d82Slm66018 mutex_exit(&vdc->lock); 4743af08d82Slm66018 4753af08d82Slm66018 /* now wait until state transitions to VDC_STATE_DETACH */ 4763af08d82Slm66018 thread_join(vdc->msg_proc_thr->t_did); 4773af08d82Slm66018 ASSERT(vdc->state == VDC_STATE_DETACH); 4783af08d82Slm66018 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 4793af08d82Slm66018 vdc->instance); 4801ae08745Sheppo } 4811ae08745Sheppo 4821ae08745Sheppo mutex_enter(&vdc->lock); 4831ae08745Sheppo 4841ae08745Sheppo if (vdc->initialized & VDC_DRING) 4851ae08745Sheppo vdc_destroy_descriptor_ring(vdc); 4861ae08745Sheppo 4871ae08745Sheppo if (vdc->initialized & VDC_LDC) 4881ae08745Sheppo vdc_terminate_ldc(vdc); 4891ae08745Sheppo 4902f5224aeSachartre if (vdc->failfast_thread) { 4912f5224aeSachartre failfast_tid = vdc->failfast_thread->t_did; 4922f5224aeSachartre vdc->failfast_interval = 0; 4932f5224aeSachartre cv_signal(&vdc->failfast_cv); 4942f5224aeSachartre } else { 4952f5224aeSachartre failfast_tid = 0; 4962f5224aeSachartre } 4972f5224aeSachartre 4982f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_WANTED) { 4992f5224aeSachartre ownership_tid = vdc->ownership_thread->t_did; 5002f5224aeSachartre vdc->ownership = VDC_OWNERSHIP_NONE; 5012f5224aeSachartre cv_signal(&vdc->ownership_cv); 5022f5224aeSachartre } else { 5032f5224aeSachartre ownership_tid = 0; 5042f5224aeSachartre } 5052f5224aeSachartre 5061ae08745Sheppo mutex_exit(&vdc->lock); 5071ae08745Sheppo 5082f5224aeSachartre if (failfast_tid != 0) 5092f5224aeSachartre thread_join(failfast_tid); 5102f5224aeSachartre 5112f5224aeSachartre if (ownership_tid != 0) 5122f5224aeSachartre thread_join(ownership_tid); 5132f5224aeSachartre 5141ae08745Sheppo if (vdc->initialized & VDC_MINOR) { 5151ae08745Sheppo ddi_prop_remove_all(dip); 5161ae08745Sheppo ddi_remove_minor_node(dip, NULL); 5171ae08745Sheppo } 5181ae08745Sheppo 519366a92acSlm66018 if (vdc->io_stats) { 520366a92acSlm66018 kstat_delete(vdc->io_stats); 521366a92acSlm66018 vdc->io_stats = NULL; 522366a92acSlm66018 } 523366a92acSlm66018 524366a92acSlm66018 if (vdc->err_stats) { 525366a92acSlm66018 kstat_delete(vdc->err_stats); 526366a92acSlm66018 vdc->err_stats = NULL; 527366a92acSlm66018 } 528366a92acSlm66018 5291ae08745Sheppo if (vdc->initialized & VDC_LOCKS) { 5301ae08745Sheppo mutex_destroy(&vdc->lock); 5313af08d82Slm66018 mutex_destroy(&vdc->read_lock); 5322f5224aeSachartre mutex_destroy(&vdc->ownership_lock); 5333af08d82Slm66018 cv_destroy(&vdc->initwait_cv); 5343af08d82Slm66018 cv_destroy(&vdc->dring_free_cv); 5353af08d82Slm66018 cv_destroy(&vdc->membind_cv); 5363af08d82Slm66018 cv_destroy(&vdc->sync_pending_cv); 5373af08d82Slm66018 cv_destroy(&vdc->sync_blocked_cv); 5383af08d82Slm66018 cv_destroy(&vdc->read_cv); 5393af08d82Slm66018 cv_destroy(&vdc->running_cv); 5402f5224aeSachartre cv_destroy(&vdc->ownership_cv); 5412f5224aeSachartre cv_destroy(&vdc->failfast_cv); 5422f5224aeSachartre cv_destroy(&vdc->failfast_io_cv); 5431ae08745Sheppo } 5441ae08745Sheppo 5451ae08745Sheppo if (vdc->minfo) 5461ae08745Sheppo kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 5471ae08745Sheppo 5481ae08745Sheppo if (vdc->cinfo) 5491ae08745Sheppo kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 5501ae08745Sheppo 5511ae08745Sheppo if (vdc->vtoc) 5521ae08745Sheppo kmem_free(vdc->vtoc, sizeof (struct vtoc)); 5531ae08745Sheppo 55478fcd0a1Sachartre if (vdc->geom) 55578fcd0a1Sachartre kmem_free(vdc->geom, sizeof (struct dk_geom)); 5560a55fbb7Slm66018 5574bac2208Snarayan if (vdc->devid) { 5584bac2208Snarayan ddi_devid_unregister(dip); 5594bac2208Snarayan ddi_devid_free(vdc->devid); 5604bac2208Snarayan } 5614bac2208Snarayan 5621ae08745Sheppo if (vdc->initialized & VDC_SOFT_STATE) 5631ae08745Sheppo ddi_soft_state_free(vdc_state, instance); 5641ae08745Sheppo 5653af08d82Slm66018 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 5661ae08745Sheppo 5671ae08745Sheppo return (DDI_SUCCESS); 5681ae08745Sheppo } 5691ae08745Sheppo 5701ae08745Sheppo 5711ae08745Sheppo static int 5721ae08745Sheppo vdc_do_attach(dev_info_t *dip) 5731ae08745Sheppo { 5741ae08745Sheppo int instance; 5751ae08745Sheppo vdc_t *vdc = NULL; 5761ae08745Sheppo int status; 577655fd6a9Sachartre md_t *mdp; 578655fd6a9Sachartre mde_cookie_t vd_node, vd_port; 5791ae08745Sheppo 5801ae08745Sheppo ASSERT(dip != NULL); 5811ae08745Sheppo 5821ae08745Sheppo instance = ddi_get_instance(dip); 5831ae08745Sheppo if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 584e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 585e1ebb9ecSlm66018 instance); 5861ae08745Sheppo return (DDI_FAILURE); 5871ae08745Sheppo } 5881ae08745Sheppo 5891ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 590e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 5911ae08745Sheppo return (DDI_FAILURE); 5921ae08745Sheppo } 5931ae08745Sheppo 5941ae08745Sheppo /* 5951ae08745Sheppo * We assign the value to initialized in this case to zero out the 5961ae08745Sheppo * variable and then set bits in it to indicate what has been done 5971ae08745Sheppo */ 5981ae08745Sheppo vdc->initialized = VDC_SOFT_STATE; 5991ae08745Sheppo 6003af08d82Slm66018 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 6013af08d82Slm66018 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 6021ae08745Sheppo 6031ae08745Sheppo vdc->dip = dip; 6041ae08745Sheppo vdc->instance = instance; 6051ae08745Sheppo vdc->vdisk_type = VD_DISK_TYPE_UNK; 6064bac2208Snarayan vdc->vdisk_label = VD_DISK_LABEL_UNK; 6073af08d82Slm66018 vdc->state = VDC_STATE_INIT; 6083af08d82Slm66018 vdc->lifecycle = VDC_LC_ATTACHING; 6091ae08745Sheppo vdc->ldc_state = 0; 6101ae08745Sheppo vdc->session_id = 0; 6111ae08745Sheppo vdc->block_size = DEV_BSIZE; 6128e6a2a04Slm66018 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 6131ae08745Sheppo 61417cadca8Slm66018 /* 61517cadca8Slm66018 * We assume, for now, that the vDisk server will export 'read' 61617cadca8Slm66018 * operations to us at a minimum (this is needed because of checks 61717cadca8Slm66018 * in vdc for supported operations early in the handshake process). 61817cadca8Slm66018 * The vDisk server will return ENOTSUP if this is not the case. 61917cadca8Slm66018 * The value will be overwritten during the attribute exchange with 62017cadca8Slm66018 * the bitmask of operations exported by server. 62117cadca8Slm66018 */ 62217cadca8Slm66018 vdc->operations = VD_OP_MASK_READ; 62317cadca8Slm66018 6241ae08745Sheppo vdc->vtoc = NULL; 62578fcd0a1Sachartre vdc->geom = NULL; 6261ae08745Sheppo vdc->cinfo = NULL; 6271ae08745Sheppo vdc->minfo = NULL; 6281ae08745Sheppo 6291ae08745Sheppo mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 6303af08d82Slm66018 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 6313af08d82Slm66018 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 6323af08d82Slm66018 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 6333af08d82Slm66018 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 6343af08d82Slm66018 6353af08d82Slm66018 vdc->threads_pending = 0; 6363af08d82Slm66018 vdc->sync_op_pending = B_FALSE; 6373af08d82Slm66018 vdc->sync_op_blocked = B_FALSE; 6383af08d82Slm66018 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 6393af08d82Slm66018 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 6403af08d82Slm66018 6412f5224aeSachartre mutex_init(&vdc->ownership_lock, NULL, MUTEX_DRIVER, NULL); 6422f5224aeSachartre cv_init(&vdc->ownership_cv, NULL, CV_DRIVER, NULL); 6432f5224aeSachartre cv_init(&vdc->failfast_cv, NULL, CV_DRIVER, NULL); 6442f5224aeSachartre cv_init(&vdc->failfast_io_cv, NULL, CV_DRIVER, NULL); 6452f5224aeSachartre 6463af08d82Slm66018 /* init blocking msg read functionality */ 6473af08d82Slm66018 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 6483af08d82Slm66018 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 6493af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 6503af08d82Slm66018 6511ae08745Sheppo vdc->initialized |= VDC_LOCKS; 6521ae08745Sheppo 653655fd6a9Sachartre /* get device and port MD node for this disk instance */ 654655fd6a9Sachartre if (vdc_get_md_node(dip, &mdp, &vd_node, &vd_port) != 0) { 655655fd6a9Sachartre cmn_err(CE_NOTE, "[%d] Could not get machine description node", 656655fd6a9Sachartre instance); 657655fd6a9Sachartre return (DDI_FAILURE); 658655fd6a9Sachartre } 659655fd6a9Sachartre 660655fd6a9Sachartre /* set the connection timeout */ 661655fd6a9Sachartre if (vd_port == NULL || (md_get_prop_val(mdp, vd_port, 662655fd6a9Sachartre VDC_MD_TIMEOUT, &vdc->ctimeout) != 0)) { 663655fd6a9Sachartre vdc->ctimeout = 0; 664655fd6a9Sachartre } 665655fd6a9Sachartre 6663af08d82Slm66018 /* initialise LDC channel which will be used to communicate with vds */ 667655fd6a9Sachartre status = vdc_do_ldc_init(vdc, mdp, vd_node); 668655fd6a9Sachartre 669655fd6a9Sachartre (void) md_fini_handle(mdp); 670655fd6a9Sachartre 671655fd6a9Sachartre if (status != 0) { 6723af08d82Slm66018 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 6733af08d82Slm66018 goto return_status; 6743af08d82Slm66018 } 6753af08d82Slm66018 6763af08d82Slm66018 /* initialize the thread responsible for managing state with server */ 6773af08d82Slm66018 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 6781ae08745Sheppo vdc, 0, &p0, TS_RUN, minclsyspri); 6793af08d82Slm66018 if (vdc->msg_proc_thr == NULL) { 6801ae08745Sheppo cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 6811ae08745Sheppo instance); 6821ae08745Sheppo return (DDI_FAILURE); 6831ae08745Sheppo } 6843af08d82Slm66018 6851ae08745Sheppo vdc->initialized |= VDC_THREAD; 6861ae08745Sheppo 687366a92acSlm66018 /* Create the kstats for saving the I/O statistics used by iostat(1M) */ 688366a92acSlm66018 vdc_create_io_kstats(vdc); 689366a92acSlm66018 vdc_create_err_kstats(vdc); 690366a92acSlm66018 691e1ebb9ecSlm66018 atomic_inc_32(&vdc_instance_count); 6921ae08745Sheppo 6930a55fbb7Slm66018 /* 69478fcd0a1Sachartre * Check the disk label. This will send requests and do the handshake. 69578fcd0a1Sachartre * We don't really care about the disk label now. What we really need is 69678fcd0a1Sachartre * the handshake do be done so that we know the type of the disk (slice 69778fcd0a1Sachartre * or full disk) and the appropriate device nodes can be created. 6980a55fbb7Slm66018 */ 69978fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_UNK; 70078fcd0a1Sachartre vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 70178fcd0a1Sachartre vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 70217cadca8Slm66018 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 70378fcd0a1Sachartre 70478fcd0a1Sachartre mutex_enter(&vdc->lock); 70578fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 70678fcd0a1Sachartre mutex_exit(&vdc->lock); 7071ae08745Sheppo 7081ae08745Sheppo /* 7091ae08745Sheppo * Now that we have the device info we can create the 7101ae08745Sheppo * device nodes and properties 7111ae08745Sheppo */ 7121ae08745Sheppo status = vdc_create_device_nodes(vdc); 7131ae08745Sheppo if (status) { 7143af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to create device nodes", 7151ae08745Sheppo instance); 7163af08d82Slm66018 goto return_status; 7171ae08745Sheppo } 7181ae08745Sheppo status = vdc_create_device_nodes_props(vdc); 7191ae08745Sheppo if (status) { 7203af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to create device nodes" 7210a55fbb7Slm66018 " properties (%d)", instance, status); 7223af08d82Slm66018 goto return_status; 7231ae08745Sheppo } 7241ae08745Sheppo 7254bac2208Snarayan /* 7264bac2208Snarayan * Setup devid 7274bac2208Snarayan */ 7284bac2208Snarayan if (vdc_setup_devid(vdc)) { 7293af08d82Slm66018 DMSG(vdc, 0, "[%d] No device id available\n", instance); 7304bac2208Snarayan } 7314bac2208Snarayan 732366a92acSlm66018 /* 733366a92acSlm66018 * Fill in the fields of the error statistics kstat that were not 734366a92acSlm66018 * available when creating the kstat 735366a92acSlm66018 */ 736366a92acSlm66018 vdc_set_err_kstats(vdc); 737366a92acSlm66018 7381ae08745Sheppo ddi_report_dev(dip); 7393af08d82Slm66018 vdc->lifecycle = VDC_LC_ONLINE; 7403af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 7411ae08745Sheppo 7423af08d82Slm66018 return_status: 7433af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 7441ae08745Sheppo return (status); 7451ae08745Sheppo } 7461ae08745Sheppo 7471ae08745Sheppo static int 7481ae08745Sheppo vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 7491ae08745Sheppo { 7501ae08745Sheppo int status; 7511ae08745Sheppo 7521ae08745Sheppo switch (cmd) { 7531ae08745Sheppo case DDI_ATTACH: 7541ae08745Sheppo if ((status = vdc_do_attach(dip)) != 0) 7551ae08745Sheppo (void) vdc_detach(dip, DDI_DETACH); 7561ae08745Sheppo return (status); 7571ae08745Sheppo case DDI_RESUME: 7581ae08745Sheppo /* nothing to do for this non-device */ 7591ae08745Sheppo return (DDI_SUCCESS); 7601ae08745Sheppo default: 7611ae08745Sheppo return (DDI_FAILURE); 7621ae08745Sheppo } 7631ae08745Sheppo } 7641ae08745Sheppo 7651ae08745Sheppo static int 766655fd6a9Sachartre vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node) 7671ae08745Sheppo { 7681ae08745Sheppo int status = 0; 7691ae08745Sheppo ldc_status_t ldc_state; 7701ae08745Sheppo ldc_attr_t ldc_attr; 7711ae08745Sheppo uint64_t ldc_id = 0; 7721ae08745Sheppo 7731ae08745Sheppo ASSERT(vdc != NULL); 7741ae08745Sheppo 7751ae08745Sheppo vdc->initialized |= VDC_LDC; 7761ae08745Sheppo 777655fd6a9Sachartre if ((status = vdc_get_ldc_id(mdp, vd_node, &ldc_id)) != 0) { 7783af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 779e1ebb9ecSlm66018 vdc->instance); 7801ae08745Sheppo return (EIO); 7811ae08745Sheppo } 782655fd6a9Sachartre 783655fd6a9Sachartre DMSGX(0, "[%d] LDC id is 0x%lx\n", vdc->instance, ldc_id); 784655fd6a9Sachartre 7851ae08745Sheppo vdc->ldc_id = ldc_id; 7861ae08745Sheppo 7871ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK; 7881ae08745Sheppo ldc_attr.instance = vdc->instance; 7891ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 790e1ebb9ecSlm66018 ldc_attr.mtu = VD_LDC_MTU; 7911ae08745Sheppo 7921ae08745Sheppo if ((vdc->initialized & VDC_LDC_INIT) == 0) { 7931ae08745Sheppo status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 7941ae08745Sheppo if (status != 0) { 7953af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 7961ae08745Sheppo vdc->instance, ldc_id, status); 7971ae08745Sheppo return (status); 7981ae08745Sheppo } 7991ae08745Sheppo vdc->initialized |= VDC_LDC_INIT; 8001ae08745Sheppo } 8011ae08745Sheppo status = ldc_status(vdc->ldc_handle, &ldc_state); 8021ae08745Sheppo if (status != 0) { 8033af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 804e1ebb9ecSlm66018 vdc->instance, status); 8051ae08745Sheppo return (status); 8061ae08745Sheppo } 8071ae08745Sheppo vdc->ldc_state = ldc_state; 8081ae08745Sheppo 8091ae08745Sheppo if ((vdc->initialized & VDC_LDC_CB) == 0) { 8101ae08745Sheppo status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 8111ae08745Sheppo (caddr_t)vdc); 8121ae08745Sheppo if (status != 0) { 8133af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 814e1ebb9ecSlm66018 vdc->instance, status); 8151ae08745Sheppo return (status); 8161ae08745Sheppo } 8171ae08745Sheppo vdc->initialized |= VDC_LDC_CB; 8181ae08745Sheppo } 8191ae08745Sheppo 8201ae08745Sheppo vdc->initialized |= VDC_LDC; 8211ae08745Sheppo 8221ae08745Sheppo /* 8231ae08745Sheppo * At this stage we have initialised LDC, we will now try and open 8241ae08745Sheppo * the connection. 8251ae08745Sheppo */ 8261ae08745Sheppo if (vdc->ldc_state == LDC_INIT) { 8271ae08745Sheppo status = ldc_open(vdc->ldc_handle); 8281ae08745Sheppo if (status != 0) { 8293af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 8301ae08745Sheppo vdc->instance, vdc->ldc_id, status); 8311ae08745Sheppo return (status); 8321ae08745Sheppo } 8331ae08745Sheppo vdc->initialized |= VDC_LDC_OPEN; 8341ae08745Sheppo } 8351ae08745Sheppo 8361ae08745Sheppo return (status); 8371ae08745Sheppo } 8381ae08745Sheppo 8391ae08745Sheppo static int 8401ae08745Sheppo vdc_start_ldc_connection(vdc_t *vdc) 8411ae08745Sheppo { 8421ae08745Sheppo int status = 0; 8431ae08745Sheppo 8441ae08745Sheppo ASSERT(vdc != NULL); 8451ae08745Sheppo 8463af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 8471ae08745Sheppo 8480a55fbb7Slm66018 status = vdc_do_ldc_up(vdc); 8491ae08745Sheppo 8503af08d82Slm66018 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 8511ae08745Sheppo 8523af08d82Slm66018 return (status); 8533af08d82Slm66018 } 8543af08d82Slm66018 8553af08d82Slm66018 static int 8563af08d82Slm66018 vdc_stop_ldc_connection(vdc_t *vdcp) 8573af08d82Slm66018 { 8583af08d82Slm66018 int status; 8593af08d82Slm66018 8603af08d82Slm66018 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 8613af08d82Slm66018 vdcp->state); 8623af08d82Slm66018 8633af08d82Slm66018 status = ldc_down(vdcp->ldc_handle); 8643af08d82Slm66018 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 8653af08d82Slm66018 8663af08d82Slm66018 vdcp->initialized &= ~VDC_HANDSHAKE; 8673af08d82Slm66018 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 8681ae08745Sheppo 8691ae08745Sheppo return (status); 8701ae08745Sheppo } 8711ae08745Sheppo 872366a92acSlm66018 static void 873366a92acSlm66018 vdc_create_io_kstats(vdc_t *vdc) 874366a92acSlm66018 { 875366a92acSlm66018 if (vdc->io_stats != NULL) { 876366a92acSlm66018 DMSG(vdc, 0, "[%d] I/O kstat already exists\n", vdc->instance); 877366a92acSlm66018 return; 878366a92acSlm66018 } 879366a92acSlm66018 880366a92acSlm66018 vdc->io_stats = kstat_create(VDC_DRIVER_NAME, vdc->instance, NULL, 881366a92acSlm66018 "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 882366a92acSlm66018 if (vdc->io_stats != NULL) { 883366a92acSlm66018 vdc->io_stats->ks_lock = &vdc->lock; 884366a92acSlm66018 kstat_install(vdc->io_stats); 885366a92acSlm66018 } else { 886366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: I/O statistics" 887366a92acSlm66018 " will not be gathered", vdc->instance); 888366a92acSlm66018 } 889366a92acSlm66018 } 890366a92acSlm66018 891366a92acSlm66018 static void 892366a92acSlm66018 vdc_create_err_kstats(vdc_t *vdc) 893366a92acSlm66018 { 894366a92acSlm66018 vd_err_stats_t *stp; 895366a92acSlm66018 char kstatmodule_err[KSTAT_STRLEN]; 896366a92acSlm66018 char kstatname[KSTAT_STRLEN]; 897366a92acSlm66018 int ndata = (sizeof (vd_err_stats_t) / sizeof (kstat_named_t)); 898366a92acSlm66018 int instance = vdc->instance; 899366a92acSlm66018 900366a92acSlm66018 if (vdc->err_stats != NULL) { 901366a92acSlm66018 DMSG(vdc, 0, "[%d] ERR kstat already exists\n", vdc->instance); 902366a92acSlm66018 return; 903366a92acSlm66018 } 904366a92acSlm66018 905366a92acSlm66018 (void) snprintf(kstatmodule_err, sizeof (kstatmodule_err), 906366a92acSlm66018 "%serr", VDC_DRIVER_NAME); 907366a92acSlm66018 (void) snprintf(kstatname, sizeof (kstatname), 908366a92acSlm66018 "%s%d,err", VDC_DRIVER_NAME, instance); 909366a92acSlm66018 910366a92acSlm66018 vdc->err_stats = kstat_create(kstatmodule_err, instance, kstatname, 911366a92acSlm66018 "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 912366a92acSlm66018 913366a92acSlm66018 if (vdc->err_stats == NULL) { 914366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: Error statistics" 915366a92acSlm66018 " will not be gathered", instance); 916366a92acSlm66018 return; 917366a92acSlm66018 } 918366a92acSlm66018 919366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 920366a92acSlm66018 kstat_named_init(&stp->vd_softerrs, "Soft Errors", 921366a92acSlm66018 KSTAT_DATA_UINT32); 922366a92acSlm66018 kstat_named_init(&stp->vd_transerrs, "Transport Errors", 923366a92acSlm66018 KSTAT_DATA_UINT32); 924366a92acSlm66018 kstat_named_init(&stp->vd_protoerrs, "Protocol Errors", 925366a92acSlm66018 KSTAT_DATA_UINT32); 926366a92acSlm66018 kstat_named_init(&stp->vd_vid, "Vendor", 927366a92acSlm66018 KSTAT_DATA_CHAR); 928366a92acSlm66018 kstat_named_init(&stp->vd_pid, "Product", 929366a92acSlm66018 KSTAT_DATA_CHAR); 930366a92acSlm66018 kstat_named_init(&stp->vd_capacity, "Size", 931366a92acSlm66018 KSTAT_DATA_ULONGLONG); 932366a92acSlm66018 933366a92acSlm66018 vdc->err_stats->ks_update = nulldev; 934366a92acSlm66018 935366a92acSlm66018 kstat_install(vdc->err_stats); 936366a92acSlm66018 } 937366a92acSlm66018 938366a92acSlm66018 static void 939366a92acSlm66018 vdc_set_err_kstats(vdc_t *vdc) 940366a92acSlm66018 { 941366a92acSlm66018 vd_err_stats_t *stp; 942366a92acSlm66018 943366a92acSlm66018 if (vdc->err_stats == NULL) 944366a92acSlm66018 return; 945366a92acSlm66018 946366a92acSlm66018 mutex_enter(&vdc->lock); 947366a92acSlm66018 948366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 949366a92acSlm66018 ASSERT(stp != NULL); 950366a92acSlm66018 951366a92acSlm66018 stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->block_size; 952366a92acSlm66018 (void) strcpy(stp->vd_vid.value.c, "SUN"); 953366a92acSlm66018 (void) strcpy(stp->vd_pid.value.c, "VDSK"); 954366a92acSlm66018 955366a92acSlm66018 mutex_exit(&vdc->lock); 956366a92acSlm66018 } 957366a92acSlm66018 9584bac2208Snarayan static int 9594bac2208Snarayan vdc_create_device_nodes_efi(vdc_t *vdc) 9604bac2208Snarayan { 9614bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h"); 9624bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h,raw"); 9634bac2208Snarayan 9644bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 9654bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9664bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9674bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 9684bac2208Snarayan vdc->instance); 9694bac2208Snarayan return (EIO); 9704bac2208Snarayan } 9714bac2208Snarayan 9724bac2208Snarayan /* if any device node is created we set this flag */ 9734bac2208Snarayan vdc->initialized |= VDC_MINOR; 9744bac2208Snarayan 9754bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 9764bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9774bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9784bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 9794bac2208Snarayan vdc->instance); 9804bac2208Snarayan return (EIO); 9814bac2208Snarayan } 9824bac2208Snarayan 9834bac2208Snarayan return (0); 9844bac2208Snarayan } 9854bac2208Snarayan 9864bac2208Snarayan static int 9874bac2208Snarayan vdc_create_device_nodes_vtoc(vdc_t *vdc) 9884bac2208Snarayan { 9894bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd"); 9904bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd,raw"); 9914bac2208Snarayan 9924bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 9934bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9944bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9954bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 9964bac2208Snarayan vdc->instance); 9974bac2208Snarayan return (EIO); 9984bac2208Snarayan } 9994bac2208Snarayan 10004bac2208Snarayan /* if any device node is created we set this flag */ 10014bac2208Snarayan vdc->initialized |= VDC_MINOR; 10024bac2208Snarayan 10034bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 10044bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 10054bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 10064bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 10074bac2208Snarayan vdc->instance); 10084bac2208Snarayan return (EIO); 10094bac2208Snarayan } 10104bac2208Snarayan 10114bac2208Snarayan return (0); 10124bac2208Snarayan } 10131ae08745Sheppo 10141ae08745Sheppo /* 10151ae08745Sheppo * Function: 10161ae08745Sheppo * vdc_create_device_nodes 10171ae08745Sheppo * 10181ae08745Sheppo * Description: 10191ae08745Sheppo * This function creates the block and character device nodes under 10201ae08745Sheppo * /devices along with the node properties. It is called as part of 10211ae08745Sheppo * the attach(9E) of the instance during the handshake with vds after 10221ae08745Sheppo * vds has sent the attributes to vdc. 10231ae08745Sheppo * 10241ae08745Sheppo * If the device is of type VD_DISK_TYPE_SLICE then the minor node 10251ae08745Sheppo * of 2 is used in keeping with the Solaris convention that slice 2 10261ae08745Sheppo * refers to a whole disk. Slices start at 'a' 10271ae08745Sheppo * 10281ae08745Sheppo * Parameters: 10291ae08745Sheppo * vdc - soft state pointer 10301ae08745Sheppo * 10311ae08745Sheppo * Return Values 10321ae08745Sheppo * 0 - Success 10331ae08745Sheppo * EIO - Failed to create node 10341ae08745Sheppo * EINVAL - Unknown type of disk exported 10351ae08745Sheppo */ 10361ae08745Sheppo static int 10371ae08745Sheppo vdc_create_device_nodes(vdc_t *vdc) 10381ae08745Sheppo { 10394bac2208Snarayan char name[sizeof ("s,raw")]; 10401ae08745Sheppo dev_info_t *dip = NULL; 10414bac2208Snarayan int instance, status; 10421ae08745Sheppo int num_slices = 1; 10431ae08745Sheppo int i; 10441ae08745Sheppo 10451ae08745Sheppo ASSERT(vdc != NULL); 10461ae08745Sheppo 10471ae08745Sheppo instance = vdc->instance; 10481ae08745Sheppo dip = vdc->dip; 10491ae08745Sheppo 10501ae08745Sheppo switch (vdc->vdisk_type) { 10511ae08745Sheppo case VD_DISK_TYPE_DISK: 10521ae08745Sheppo num_slices = V_NUMPAR; 10531ae08745Sheppo break; 10541ae08745Sheppo case VD_DISK_TYPE_SLICE: 10551ae08745Sheppo num_slices = 1; 10561ae08745Sheppo break; 10571ae08745Sheppo case VD_DISK_TYPE_UNK: 10581ae08745Sheppo default: 10591ae08745Sheppo return (EINVAL); 10601ae08745Sheppo } 10611ae08745Sheppo 10624bac2208Snarayan /* 10634bac2208Snarayan * Minor nodes are different for EFI disks: EFI disks do not have 10644bac2208Snarayan * a minor node 'g' for the minor number corresponding to slice 10654bac2208Snarayan * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 10664bac2208Snarayan * representing the whole disk. 10674bac2208Snarayan */ 10681ae08745Sheppo for (i = 0; i < num_slices; i++) { 10694bac2208Snarayan 10704bac2208Snarayan if (i == VD_EFI_WD_SLICE) { 10714bac2208Snarayan if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 10724bac2208Snarayan status = vdc_create_device_nodes_efi(vdc); 10734bac2208Snarayan else 10744bac2208Snarayan status = vdc_create_device_nodes_vtoc(vdc); 10754bac2208Snarayan if (status != 0) 10764bac2208Snarayan return (status); 10774bac2208Snarayan continue; 10784bac2208Snarayan } 10794bac2208Snarayan 10801ae08745Sheppo (void) snprintf(name, sizeof (name), "%c", 'a' + i); 10811ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFBLK, 10821ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1083e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 1084e1ebb9ecSlm66018 instance, name); 10851ae08745Sheppo return (EIO); 10861ae08745Sheppo } 10871ae08745Sheppo 10881ae08745Sheppo /* if any device node is created we set this flag */ 10891ae08745Sheppo vdc->initialized |= VDC_MINOR; 10901ae08745Sheppo 109187a7269eSachartre (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 109287a7269eSachartre 10931ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFCHR, 10941ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1095e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 1096e1ebb9ecSlm66018 instance, name); 10971ae08745Sheppo return (EIO); 10981ae08745Sheppo } 10991ae08745Sheppo } 11001ae08745Sheppo 11011ae08745Sheppo return (0); 11021ae08745Sheppo } 11031ae08745Sheppo 11041ae08745Sheppo /* 11051ae08745Sheppo * Function: 11061ae08745Sheppo * vdc_create_device_nodes_props 11071ae08745Sheppo * 11081ae08745Sheppo * Description: 11091ae08745Sheppo * This function creates the block and character device nodes under 11101ae08745Sheppo * /devices along with the node properties. It is called as part of 11111ae08745Sheppo * the attach(9E) of the instance during the handshake with vds after 11121ae08745Sheppo * vds has sent the attributes to vdc. 11131ae08745Sheppo * 11141ae08745Sheppo * Parameters: 11151ae08745Sheppo * vdc - soft state pointer 11161ae08745Sheppo * 11171ae08745Sheppo * Return Values 11181ae08745Sheppo * 0 - Success 11191ae08745Sheppo * EIO - Failed to create device node property 11201ae08745Sheppo * EINVAL - Unknown type of disk exported 11211ae08745Sheppo */ 11221ae08745Sheppo static int 11231ae08745Sheppo vdc_create_device_nodes_props(vdc_t *vdc) 11241ae08745Sheppo { 11251ae08745Sheppo dev_info_t *dip = NULL; 11261ae08745Sheppo int instance; 11271ae08745Sheppo int num_slices = 1; 11281ae08745Sheppo int64_t size = 0; 11291ae08745Sheppo dev_t dev; 11301ae08745Sheppo int rv; 11311ae08745Sheppo int i; 11321ae08745Sheppo 11331ae08745Sheppo ASSERT(vdc != NULL); 11341ae08745Sheppo 11351ae08745Sheppo instance = vdc->instance; 11361ae08745Sheppo dip = vdc->dip; 11371ae08745Sheppo 11381ae08745Sheppo switch (vdc->vdisk_type) { 11391ae08745Sheppo case VD_DISK_TYPE_DISK: 11401ae08745Sheppo num_slices = V_NUMPAR; 11411ae08745Sheppo break; 11421ae08745Sheppo case VD_DISK_TYPE_SLICE: 11431ae08745Sheppo num_slices = 1; 11441ae08745Sheppo break; 11451ae08745Sheppo case VD_DISK_TYPE_UNK: 11461ae08745Sheppo default: 11471ae08745Sheppo return (EINVAL); 11481ae08745Sheppo } 11491ae08745Sheppo 115078fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 115178fcd0a1Sachartre /* remove all properties */ 115278fcd0a1Sachartre for (i = 0; i < num_slices; i++) { 115378fcd0a1Sachartre dev = makedevice(ddi_driver_major(dip), 115478fcd0a1Sachartre VD_MAKE_DEV(instance, i)); 115578fcd0a1Sachartre (void) ddi_prop_remove(dev, dip, VDC_SIZE_PROP_NAME); 115678fcd0a1Sachartre (void) ddi_prop_remove(dev, dip, VDC_NBLOCKS_PROP_NAME); 115778fcd0a1Sachartre } 115878fcd0a1Sachartre return (0); 115978fcd0a1Sachartre } 116078fcd0a1Sachartre 11611ae08745Sheppo for (i = 0; i < num_slices; i++) { 11621ae08745Sheppo dev = makedevice(ddi_driver_major(dip), 11631ae08745Sheppo VD_MAKE_DEV(instance, i)); 11641ae08745Sheppo 1165edcc0754Sachartre size = vdc->slice[i].nblocks * vdc->block_size; 11663af08d82Slm66018 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 1167e1ebb9ecSlm66018 instance, size, size / (1024 * 1024), 1168edcc0754Sachartre vdc->slice[i].nblocks); 11691ae08745Sheppo 11701ae08745Sheppo rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 11711ae08745Sheppo if (rv != DDI_PROP_SUCCESS) { 1172e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 1173e1ebb9ecSlm66018 instance, VDC_SIZE_PROP_NAME, size); 11741ae08745Sheppo return (EIO); 11751ae08745Sheppo } 11761ae08745Sheppo 11771ae08745Sheppo rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 11781ae08745Sheppo lbtodb(size)); 11791ae08745Sheppo if (rv != DDI_PROP_SUCCESS) { 1180e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 11811ae08745Sheppo instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 11821ae08745Sheppo return (EIO); 11831ae08745Sheppo } 11841ae08745Sheppo } 11851ae08745Sheppo 11861ae08745Sheppo return (0); 11871ae08745Sheppo } 11881ae08745Sheppo 118978fcd0a1Sachartre /* 119078fcd0a1Sachartre * Function: 119178fcd0a1Sachartre * vdc_is_opened 119278fcd0a1Sachartre * 119378fcd0a1Sachartre * Description: 119478fcd0a1Sachartre * This function checks if any slice of a given virtual disk is 119578fcd0a1Sachartre * currently opened. 119678fcd0a1Sachartre * 119778fcd0a1Sachartre * Parameters: 119878fcd0a1Sachartre * vdc - soft state pointer 119978fcd0a1Sachartre * 120078fcd0a1Sachartre * Return Values 120178fcd0a1Sachartre * B_TRUE - at least one slice is opened. 120278fcd0a1Sachartre * B_FALSE - no slice is opened. 120378fcd0a1Sachartre */ 120478fcd0a1Sachartre static boolean_t 120578fcd0a1Sachartre vdc_is_opened(vdc_t *vdc) 120678fcd0a1Sachartre { 120778fcd0a1Sachartre int i, nslices; 120878fcd0a1Sachartre 120978fcd0a1Sachartre switch (vdc->vdisk_type) { 121078fcd0a1Sachartre case VD_DISK_TYPE_DISK: 121178fcd0a1Sachartre nslices = V_NUMPAR; 121278fcd0a1Sachartre break; 121378fcd0a1Sachartre case VD_DISK_TYPE_SLICE: 121478fcd0a1Sachartre nslices = 1; 121578fcd0a1Sachartre break; 121678fcd0a1Sachartre case VD_DISK_TYPE_UNK: 121778fcd0a1Sachartre default: 121878fcd0a1Sachartre ASSERT(0); 121978fcd0a1Sachartre } 122078fcd0a1Sachartre 122178fcd0a1Sachartre /* check if there's any layered open */ 122278fcd0a1Sachartre for (i = 0; i < nslices; i++) { 122378fcd0a1Sachartre if (vdc->open_lyr[i] > 0) 122478fcd0a1Sachartre return (B_TRUE); 122578fcd0a1Sachartre } 122678fcd0a1Sachartre 122778fcd0a1Sachartre /* check if there is any other kind of open */ 122878fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 122978fcd0a1Sachartre if (vdc->open[i] != 0) 123078fcd0a1Sachartre return (B_TRUE); 123178fcd0a1Sachartre } 123278fcd0a1Sachartre 123378fcd0a1Sachartre return (B_FALSE); 123478fcd0a1Sachartre } 123578fcd0a1Sachartre 123678fcd0a1Sachartre static int 123778fcd0a1Sachartre vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 123878fcd0a1Sachartre { 123978fcd0a1Sachartre uint8_t slicemask; 124078fcd0a1Sachartre int i; 124178fcd0a1Sachartre 124278fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 124378fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 124478fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 124578fcd0a1Sachartre 124678fcd0a1Sachartre slicemask = 1 << slice; 124778fcd0a1Sachartre 124878fcd0a1Sachartre /* check if slice is already exclusively opened */ 124978fcd0a1Sachartre if (vdc->open_excl & slicemask) 125078fcd0a1Sachartre return (EBUSY); 125178fcd0a1Sachartre 125278fcd0a1Sachartre /* if open exclusive, check if slice is already opened */ 125378fcd0a1Sachartre if (flag & FEXCL) { 125478fcd0a1Sachartre if (vdc->open_lyr[slice] > 0) 125578fcd0a1Sachartre return (EBUSY); 125678fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 125778fcd0a1Sachartre if (vdc->open[i] & slicemask) 125878fcd0a1Sachartre return (EBUSY); 125978fcd0a1Sachartre } 126078fcd0a1Sachartre vdc->open_excl |= slicemask; 126178fcd0a1Sachartre } 126278fcd0a1Sachartre 126378fcd0a1Sachartre /* mark slice as opened */ 126478fcd0a1Sachartre if (otyp == OTYP_LYR) { 126578fcd0a1Sachartre vdc->open_lyr[slice]++; 126678fcd0a1Sachartre } else { 126778fcd0a1Sachartre vdc->open[otyp] |= slicemask; 126878fcd0a1Sachartre } 126978fcd0a1Sachartre 127078fcd0a1Sachartre return (0); 127178fcd0a1Sachartre } 127278fcd0a1Sachartre 127378fcd0a1Sachartre static void 127478fcd0a1Sachartre vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 127578fcd0a1Sachartre { 127678fcd0a1Sachartre uint8_t slicemask; 127778fcd0a1Sachartre 127878fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 127978fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 128078fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 128178fcd0a1Sachartre 128278fcd0a1Sachartre slicemask = 1 << slice; 128378fcd0a1Sachartre 128478fcd0a1Sachartre if (otyp == OTYP_LYR) { 128578fcd0a1Sachartre ASSERT(vdc->open_lyr[slice] > 0); 128678fcd0a1Sachartre vdc->open_lyr[slice]--; 128778fcd0a1Sachartre } else { 128878fcd0a1Sachartre vdc->open[otyp] &= ~slicemask; 128978fcd0a1Sachartre } 129078fcd0a1Sachartre 129178fcd0a1Sachartre if (flag & FEXCL) 129278fcd0a1Sachartre vdc->open_excl &= ~slicemask; 129378fcd0a1Sachartre } 129478fcd0a1Sachartre 12951ae08745Sheppo static int 12961ae08745Sheppo vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 12971ae08745Sheppo { 12981ae08745Sheppo _NOTE(ARGUNUSED(cred)) 12991ae08745Sheppo 1300*179e09c2Sachartre int instance, nodelay; 130178fcd0a1Sachartre int slice, status = 0; 13021ae08745Sheppo vdc_t *vdc; 13031ae08745Sheppo 13041ae08745Sheppo ASSERT(dev != NULL); 13050d0c8d4bSnarayan instance = VDCUNIT(*dev); 13061ae08745Sheppo 130778fcd0a1Sachartre if (otyp >= OTYPCNT) 13081ae08745Sheppo return (EINVAL); 13091ae08745Sheppo 13101ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1311e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13121ae08745Sheppo return (ENXIO); 13131ae08745Sheppo } 13141ae08745Sheppo 13153af08d82Slm66018 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 13163af08d82Slm66018 getminor(*dev), flag, otyp); 13171ae08745Sheppo 131878fcd0a1Sachartre slice = VDCPART(*dev); 131978fcd0a1Sachartre 1320*179e09c2Sachartre nodelay = flag & (FNDELAY | FNONBLOCK); 1321*179e09c2Sachartre 1322*179e09c2Sachartre if ((flag & FWRITE) && (!nodelay) && 1323*179e09c2Sachartre !(VD_OP_SUPPORTED(vdc->operations, VD_OP_BWRITE))) { 1324*179e09c2Sachartre return (EROFS); 1325*179e09c2Sachartre } 1326*179e09c2Sachartre 13271ae08745Sheppo mutex_enter(&vdc->lock); 132878fcd0a1Sachartre 132978fcd0a1Sachartre status = vdc_mark_opened(vdc, slice, flag, otyp); 133078fcd0a1Sachartre 133178fcd0a1Sachartre if (status != 0) { 133278fcd0a1Sachartre mutex_exit(&vdc->lock); 133378fcd0a1Sachartre return (status); 133478fcd0a1Sachartre } 133578fcd0a1Sachartre 1336*179e09c2Sachartre if (nodelay) { 133778fcd0a1Sachartre 133878fcd0a1Sachartre /* don't resubmit a validate request if there's already one */ 133978fcd0a1Sachartre if (vdc->validate_pending > 0) { 134078fcd0a1Sachartre mutex_exit(&vdc->lock); 134178fcd0a1Sachartre return (0); 134278fcd0a1Sachartre } 134378fcd0a1Sachartre 134478fcd0a1Sachartre /* call vdc_validate() asynchronously to avoid blocking */ 134578fcd0a1Sachartre if (taskq_dispatch(system_taskq, vdc_validate_task, 134678fcd0a1Sachartre (void *)vdc, TQ_NOSLEEP) == NULL) { 134778fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 134878fcd0a1Sachartre mutex_exit(&vdc->lock); 134978fcd0a1Sachartre return (ENXIO); 135078fcd0a1Sachartre } 135178fcd0a1Sachartre 135278fcd0a1Sachartre vdc->validate_pending++; 135378fcd0a1Sachartre mutex_exit(&vdc->lock); 135478fcd0a1Sachartre return (0); 135578fcd0a1Sachartre } 135678fcd0a1Sachartre 13571ae08745Sheppo mutex_exit(&vdc->lock); 13581ae08745Sheppo 135978fcd0a1Sachartre vdc_validate(vdc); 136078fcd0a1Sachartre 136178fcd0a1Sachartre mutex_enter(&vdc->lock); 136278fcd0a1Sachartre 136378fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1364edcc0754Sachartre vdc->slice[slice].nblocks == 0) { 136578fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 136678fcd0a1Sachartre status = EIO; 136778fcd0a1Sachartre } 136878fcd0a1Sachartre 136978fcd0a1Sachartre mutex_exit(&vdc->lock); 137078fcd0a1Sachartre 137178fcd0a1Sachartre return (status); 13721ae08745Sheppo } 13731ae08745Sheppo 13741ae08745Sheppo static int 13751ae08745Sheppo vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 13761ae08745Sheppo { 13771ae08745Sheppo _NOTE(ARGUNUSED(cred)) 13781ae08745Sheppo 13791ae08745Sheppo int instance; 138078fcd0a1Sachartre int slice; 13812f5224aeSachartre int rv, rval; 13821ae08745Sheppo vdc_t *vdc; 13831ae08745Sheppo 13840d0c8d4bSnarayan instance = VDCUNIT(dev); 13851ae08745Sheppo 138678fcd0a1Sachartre if (otyp >= OTYPCNT) 13871ae08745Sheppo return (EINVAL); 13881ae08745Sheppo 13891ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1390e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13911ae08745Sheppo return (ENXIO); 13921ae08745Sheppo } 13931ae08745Sheppo 13943af08d82Slm66018 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 13951ae08745Sheppo 139678fcd0a1Sachartre slice = VDCPART(dev); 139778fcd0a1Sachartre 13988259acd8Szk194757 /* 13998259acd8Szk194757 * Attempt to flush the W$ on a close operation. If this is 14008259acd8Szk194757 * not a supported IOCTL command or the backing device is read-only 14018259acd8Szk194757 * do not fail the close operation. 14028259acd8Szk194757 */ 14032f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, &rval); 14048259acd8Szk194757 14058259acd8Szk194757 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 14068259acd8Szk194757 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 14078259acd8Szk194757 instance, rv); 14088259acd8Szk194757 return (EIO); 14098259acd8Szk194757 } 14108259acd8Szk194757 14111ae08745Sheppo mutex_enter(&vdc->lock); 141278fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 14131ae08745Sheppo mutex_exit(&vdc->lock); 14141ae08745Sheppo 14151ae08745Sheppo return (0); 14161ae08745Sheppo } 14171ae08745Sheppo 14181ae08745Sheppo static int 14191ae08745Sheppo vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 14201ae08745Sheppo { 14211ae08745Sheppo _NOTE(ARGUNUSED(credp)) 14221ae08745Sheppo 14232f5224aeSachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode, rvalp)); 14241ae08745Sheppo } 14251ae08745Sheppo 14261ae08745Sheppo static int 14271ae08745Sheppo vdc_print(dev_t dev, char *str) 14281ae08745Sheppo { 14290d0c8d4bSnarayan cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 14301ae08745Sheppo return (0); 14311ae08745Sheppo } 14321ae08745Sheppo 14331ae08745Sheppo static int 14341ae08745Sheppo vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 14351ae08745Sheppo { 1436d10e4ef2Snarayan int rv; 1437d10e4ef2Snarayan size_t nbytes = nblk * DEV_BSIZE; 14380d0c8d4bSnarayan int instance = VDCUNIT(dev); 1439d10e4ef2Snarayan vdc_t *vdc = NULL; 14401ae08745Sheppo 14411ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1442e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 14431ae08745Sheppo return (ENXIO); 14441ae08745Sheppo } 14451ae08745Sheppo 14463af08d82Slm66018 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 14473af08d82Slm66018 instance, nbytes, blkno, (void *)addr); 14483af08d82Slm66018 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 14490d0c8d4bSnarayan VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 14503af08d82Slm66018 if (rv) { 14513af08d82Slm66018 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 14521ae08745Sheppo return (rv); 14531ae08745Sheppo } 14541ae08745Sheppo 14553af08d82Slm66018 if (ddi_in_panic()) 14563af08d82Slm66018 (void) vdc_drain_response(vdc); 14573af08d82Slm66018 14583af08d82Slm66018 DMSG(vdc, 0, "[%d] End\n", instance); 14593af08d82Slm66018 14603af08d82Slm66018 return (0); 14613af08d82Slm66018 } 14623af08d82Slm66018 14631ae08745Sheppo /* -------------------------------------------------------------------------- */ 14641ae08745Sheppo 14651ae08745Sheppo /* 14661ae08745Sheppo * Disk access routines 14671ae08745Sheppo * 14681ae08745Sheppo */ 14691ae08745Sheppo 14701ae08745Sheppo /* 14711ae08745Sheppo * vdc_strategy() 14721ae08745Sheppo * 14731ae08745Sheppo * Return Value: 14741ae08745Sheppo * 0: As per strategy(9E), the strategy() function must return 0 14751ae08745Sheppo * [ bioerror(9f) sets b_flags to the proper error code ] 14761ae08745Sheppo */ 14771ae08745Sheppo static int 14781ae08745Sheppo vdc_strategy(struct buf *buf) 14791ae08745Sheppo { 14801ae08745Sheppo int rv = -1; 14811ae08745Sheppo vdc_t *vdc = NULL; 14820d0c8d4bSnarayan int instance = VDCUNIT(buf->b_edev); 14831ae08745Sheppo int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 148487a7269eSachartre int slice; 14851ae08745Sheppo 14861ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1487e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 14881ae08745Sheppo bioerror(buf, ENXIO); 14891ae08745Sheppo biodone(buf); 14901ae08745Sheppo return (0); 14911ae08745Sheppo } 14921ae08745Sheppo 14933af08d82Slm66018 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 14943af08d82Slm66018 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 14953af08d82Slm66018 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1496d10e4ef2Snarayan 14971ae08745Sheppo bp_mapin(buf); 14981ae08745Sheppo 149987a7269eSachartre if ((long)buf->b_private == VD_SLICE_NONE) { 150087a7269eSachartre /* I/O using an absolute disk offset */ 150187a7269eSachartre slice = VD_SLICE_NONE; 150287a7269eSachartre } else { 150387a7269eSachartre slice = VDCPART(buf->b_edev); 150487a7269eSachartre } 150587a7269eSachartre 15063af08d82Slm66018 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 150787a7269eSachartre buf->b_bcount, slice, buf->b_lblkno, 15083af08d82Slm66018 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 15093af08d82Slm66018 VIO_write_dir); 15103af08d82Slm66018 1511d10e4ef2Snarayan /* 1512d10e4ef2Snarayan * If the request was successfully sent, the strategy call returns and 1513d10e4ef2Snarayan * the ACK handler calls the bioxxx functions when the vDisk server is 1514366a92acSlm66018 * done otherwise we handle the error here. 1515d10e4ef2Snarayan */ 1516d10e4ef2Snarayan if (rv) { 15173af08d82Slm66018 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 15181ae08745Sheppo bioerror(buf, rv); 15191ae08745Sheppo biodone(buf); 1520d10e4ef2Snarayan } 1521d10e4ef2Snarayan 15221ae08745Sheppo return (0); 15231ae08745Sheppo } 15241ae08745Sheppo 15250d0c8d4bSnarayan /* 15260d0c8d4bSnarayan * Function: 15270d0c8d4bSnarayan * vdc_min 15280d0c8d4bSnarayan * 15290d0c8d4bSnarayan * Description: 15300d0c8d4bSnarayan * Routine to limit the size of a data transfer. Used in 15310d0c8d4bSnarayan * conjunction with physio(9F). 15320d0c8d4bSnarayan * 15330d0c8d4bSnarayan * Arguments: 15340d0c8d4bSnarayan * bp - pointer to the indicated buf(9S) struct. 15350d0c8d4bSnarayan * 15360d0c8d4bSnarayan */ 15370d0c8d4bSnarayan static void 15380d0c8d4bSnarayan vdc_min(struct buf *bufp) 15390d0c8d4bSnarayan { 15400d0c8d4bSnarayan vdc_t *vdc = NULL; 15410d0c8d4bSnarayan int instance = VDCUNIT(bufp->b_edev); 15420d0c8d4bSnarayan 15430d0c8d4bSnarayan vdc = ddi_get_soft_state(vdc_state, instance); 15440d0c8d4bSnarayan VERIFY(vdc != NULL); 15450d0c8d4bSnarayan 15460d0c8d4bSnarayan if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 15470d0c8d4bSnarayan bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 15480d0c8d4bSnarayan } 15490d0c8d4bSnarayan } 15501ae08745Sheppo 15511ae08745Sheppo static int 15521ae08745Sheppo vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 15531ae08745Sheppo { 15541ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15551ae08745Sheppo 15560d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15570d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 15581ae08745Sheppo } 15591ae08745Sheppo 15601ae08745Sheppo static int 15611ae08745Sheppo vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 15621ae08745Sheppo { 15631ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15641ae08745Sheppo 15650d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15660d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 15671ae08745Sheppo } 15681ae08745Sheppo 15691ae08745Sheppo static int 15701ae08745Sheppo vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 15711ae08745Sheppo { 15721ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15731ae08745Sheppo 15740d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15750d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 15761ae08745Sheppo } 15771ae08745Sheppo 15781ae08745Sheppo static int 15791ae08745Sheppo vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 15801ae08745Sheppo { 15811ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15821ae08745Sheppo 15830d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15840d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 15851ae08745Sheppo } 15861ae08745Sheppo 15871ae08745Sheppo 15881ae08745Sheppo /* -------------------------------------------------------------------------- */ 15891ae08745Sheppo 15901ae08745Sheppo /* 15911ae08745Sheppo * Handshake support 15921ae08745Sheppo */ 15931ae08745Sheppo 15941ae08745Sheppo 15950a55fbb7Slm66018 /* 15960a55fbb7Slm66018 * Function: 15970a55fbb7Slm66018 * vdc_init_ver_negotiation() 15980a55fbb7Slm66018 * 15990a55fbb7Slm66018 * Description: 16000a55fbb7Slm66018 * 16010a55fbb7Slm66018 * Arguments: 16020a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 16030a55fbb7Slm66018 * 16040a55fbb7Slm66018 * Return Code: 16050a55fbb7Slm66018 * 0 - Success 16060a55fbb7Slm66018 */ 16071ae08745Sheppo static int 16080a55fbb7Slm66018 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 16091ae08745Sheppo { 16101ae08745Sheppo vio_ver_msg_t pkt; 16111ae08745Sheppo size_t msglen = sizeof (pkt); 16121ae08745Sheppo int status = -1; 16131ae08745Sheppo 16141ae08745Sheppo ASSERT(vdc != NULL); 16151ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 16161ae08745Sheppo 16173af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1618e1ebb9ecSlm66018 16191ae08745Sheppo /* 16201ae08745Sheppo * set the Session ID to a unique value 16211ae08745Sheppo * (the lower 32 bits of the clock tick) 16221ae08745Sheppo */ 16231ae08745Sheppo vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 16243af08d82Slm66018 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 16251ae08745Sheppo 16261ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 16271ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 16281ae08745Sheppo pkt.tag.vio_subtype_env = VIO_VER_INFO; 16291ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 16301ae08745Sheppo pkt.dev_class = VDEV_DISK; 16310a55fbb7Slm66018 pkt.ver_major = ver.major; 16320a55fbb7Slm66018 pkt.ver_minor = ver.minor; 16331ae08745Sheppo 16340a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 16353af08d82Slm66018 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 16363af08d82Slm66018 vdc->instance, status); 16371ae08745Sheppo if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 16383af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 163987a7269eSachartre "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 16401ae08745Sheppo status, msglen); 16411ae08745Sheppo if (msglen != sizeof (vio_ver_msg_t)) 16421ae08745Sheppo status = ENOMSG; 16431ae08745Sheppo } 16441ae08745Sheppo 16451ae08745Sheppo return (status); 16461ae08745Sheppo } 16471ae08745Sheppo 16480a55fbb7Slm66018 /* 16490a55fbb7Slm66018 * Function: 16503af08d82Slm66018 * vdc_ver_negotiation() 16513af08d82Slm66018 * 16523af08d82Slm66018 * Description: 16533af08d82Slm66018 * 16543af08d82Slm66018 * Arguments: 16553af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 16563af08d82Slm66018 * 16573af08d82Slm66018 * Return Code: 16583af08d82Slm66018 * 0 - Success 16593af08d82Slm66018 */ 16603af08d82Slm66018 static int 16613af08d82Slm66018 vdc_ver_negotiation(vdc_t *vdcp) 16623af08d82Slm66018 { 16633af08d82Slm66018 vio_msg_t vio_msg; 16643af08d82Slm66018 int status; 16653af08d82Slm66018 16663af08d82Slm66018 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 16673af08d82Slm66018 return (status); 16683af08d82Slm66018 16693af08d82Slm66018 /* release lock and wait for response */ 16703af08d82Slm66018 mutex_exit(&vdcp->lock); 16713af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 16723af08d82Slm66018 mutex_enter(&vdcp->lock); 16733af08d82Slm66018 if (status) { 16743af08d82Slm66018 DMSG(vdcp, 0, 16753af08d82Slm66018 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 16763af08d82Slm66018 vdcp->instance, status); 16773af08d82Slm66018 return (status); 16783af08d82Slm66018 } 16793af08d82Slm66018 16803af08d82Slm66018 /* check type and sub_type ... */ 16813af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 16823af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 16833af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 16843af08d82Slm66018 vdcp->instance); 16853af08d82Slm66018 return (EPROTO); 16863af08d82Slm66018 } 16873af08d82Slm66018 16883af08d82Slm66018 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 16893af08d82Slm66018 } 16903af08d82Slm66018 16913af08d82Slm66018 /* 16923af08d82Slm66018 * Function: 16930a55fbb7Slm66018 * vdc_init_attr_negotiation() 16940a55fbb7Slm66018 * 16950a55fbb7Slm66018 * Description: 16960a55fbb7Slm66018 * 16970a55fbb7Slm66018 * Arguments: 16980a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 16990a55fbb7Slm66018 * 17000a55fbb7Slm66018 * Return Code: 17010a55fbb7Slm66018 * 0 - Success 17020a55fbb7Slm66018 */ 17031ae08745Sheppo static int 17041ae08745Sheppo vdc_init_attr_negotiation(vdc_t *vdc) 17051ae08745Sheppo { 17061ae08745Sheppo vd_attr_msg_t pkt; 17071ae08745Sheppo size_t msglen = sizeof (pkt); 17081ae08745Sheppo int status; 17091ae08745Sheppo 17101ae08745Sheppo ASSERT(vdc != NULL); 17111ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 17121ae08745Sheppo 17133af08d82Slm66018 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 17141ae08745Sheppo 17151ae08745Sheppo /* fill in tag */ 17161ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 17171ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 17181ae08745Sheppo pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 17191ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 17201ae08745Sheppo /* fill in payload */ 17211ae08745Sheppo pkt.max_xfer_sz = vdc->max_xfer_sz; 17221ae08745Sheppo pkt.vdisk_block_size = vdc->block_size; 1723f0ca1d9aSsb155480 pkt.xfer_mode = VIO_DRING_MODE_V1_0; 17241ae08745Sheppo pkt.operations = 0; /* server will set bits of valid operations */ 17251ae08745Sheppo pkt.vdisk_type = 0; /* server will set to valid device type */ 172617cadca8Slm66018 pkt.vdisk_media = 0; /* server will set to valid media type */ 17271ae08745Sheppo pkt.vdisk_size = 0; /* server will set to valid size */ 17281ae08745Sheppo 17290a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 17303af08d82Slm66018 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 17311ae08745Sheppo 17321ae08745Sheppo if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 17333af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 173487a7269eSachartre "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 17351ae08745Sheppo status, msglen); 17361ae08745Sheppo if (msglen != sizeof (vio_ver_msg_t)) 17371ae08745Sheppo status = ENOMSG; 17381ae08745Sheppo } 17391ae08745Sheppo 17401ae08745Sheppo return (status); 17411ae08745Sheppo } 17421ae08745Sheppo 17430a55fbb7Slm66018 /* 17440a55fbb7Slm66018 * Function: 17453af08d82Slm66018 * vdc_attr_negotiation() 17463af08d82Slm66018 * 17473af08d82Slm66018 * Description: 17483af08d82Slm66018 * 17493af08d82Slm66018 * Arguments: 17503af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 17513af08d82Slm66018 * 17523af08d82Slm66018 * Return Code: 17533af08d82Slm66018 * 0 - Success 17543af08d82Slm66018 */ 17553af08d82Slm66018 static int 17563af08d82Slm66018 vdc_attr_negotiation(vdc_t *vdcp) 17573af08d82Slm66018 { 17583af08d82Slm66018 int status; 17593af08d82Slm66018 vio_msg_t vio_msg; 17603af08d82Slm66018 17613af08d82Slm66018 if (status = vdc_init_attr_negotiation(vdcp)) 17623af08d82Slm66018 return (status); 17633af08d82Slm66018 17643af08d82Slm66018 /* release lock and wait for response */ 17653af08d82Slm66018 mutex_exit(&vdcp->lock); 17663af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 17673af08d82Slm66018 mutex_enter(&vdcp->lock); 17683af08d82Slm66018 if (status) { 17693af08d82Slm66018 DMSG(vdcp, 0, 17703af08d82Slm66018 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 17713af08d82Slm66018 vdcp->instance, status); 17723af08d82Slm66018 return (status); 17733af08d82Slm66018 } 17743af08d82Slm66018 17753af08d82Slm66018 /* check type and sub_type ... */ 17763af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 17773af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 17783af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 17793af08d82Slm66018 vdcp->instance); 17803af08d82Slm66018 return (EPROTO); 17813af08d82Slm66018 } 17823af08d82Slm66018 17833af08d82Slm66018 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 17843af08d82Slm66018 } 17853af08d82Slm66018 17863af08d82Slm66018 17873af08d82Slm66018 /* 17883af08d82Slm66018 * Function: 17890a55fbb7Slm66018 * vdc_init_dring_negotiate() 17900a55fbb7Slm66018 * 17910a55fbb7Slm66018 * Description: 17920a55fbb7Slm66018 * 17930a55fbb7Slm66018 * Arguments: 17940a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 17950a55fbb7Slm66018 * 17960a55fbb7Slm66018 * Return Code: 17970a55fbb7Slm66018 * 0 - Success 17980a55fbb7Slm66018 */ 17991ae08745Sheppo static int 18001ae08745Sheppo vdc_init_dring_negotiate(vdc_t *vdc) 18011ae08745Sheppo { 18021ae08745Sheppo vio_dring_reg_msg_t pkt; 18031ae08745Sheppo size_t msglen = sizeof (pkt); 18041ae08745Sheppo int status = -1; 18053af08d82Slm66018 int retry; 18063af08d82Slm66018 int nretries = 10; 18071ae08745Sheppo 18081ae08745Sheppo ASSERT(vdc != NULL); 18091ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 18101ae08745Sheppo 18113af08d82Slm66018 for (retry = 0; retry < nretries; retry++) { 18121ae08745Sheppo status = vdc_init_descriptor_ring(vdc); 18133af08d82Slm66018 if (status != EAGAIN) 18143af08d82Slm66018 break; 18153af08d82Slm66018 drv_usecwait(vdc_min_timeout_ldc); 18163af08d82Slm66018 } 18173af08d82Slm66018 18181ae08745Sheppo if (status != 0) { 18193af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 18201ae08745Sheppo vdc->instance, status); 18211ae08745Sheppo return (status); 18221ae08745Sheppo } 18233af08d82Slm66018 18243af08d82Slm66018 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1825e1ebb9ecSlm66018 vdc->instance, status); 18261ae08745Sheppo 18271ae08745Sheppo /* fill in tag */ 18281ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 18291ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 18301ae08745Sheppo pkt.tag.vio_subtype_env = VIO_DRING_REG; 18311ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 18321ae08745Sheppo /* fill in payload */ 18331ae08745Sheppo pkt.dring_ident = 0; 1834e1ebb9ecSlm66018 pkt.num_descriptors = vdc->dring_len; 1835e1ebb9ecSlm66018 pkt.descriptor_size = vdc->dring_entry_size; 18361ae08745Sheppo pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 18371ae08745Sheppo pkt.ncookies = vdc->dring_cookie_count; 18381ae08745Sheppo pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 18391ae08745Sheppo 18400a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 18411ae08745Sheppo if (status != 0) { 18423af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1843e1ebb9ecSlm66018 vdc->instance, status); 18441ae08745Sheppo } 18451ae08745Sheppo 18461ae08745Sheppo return (status); 18471ae08745Sheppo } 18481ae08745Sheppo 18491ae08745Sheppo 18503af08d82Slm66018 /* 18513af08d82Slm66018 * Function: 18523af08d82Slm66018 * vdc_dring_negotiation() 18533af08d82Slm66018 * 18543af08d82Slm66018 * Description: 18553af08d82Slm66018 * 18563af08d82Slm66018 * Arguments: 18573af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 18583af08d82Slm66018 * 18593af08d82Slm66018 * Return Code: 18603af08d82Slm66018 * 0 - Success 18613af08d82Slm66018 */ 18623af08d82Slm66018 static int 18633af08d82Slm66018 vdc_dring_negotiation(vdc_t *vdcp) 18643af08d82Slm66018 { 18653af08d82Slm66018 int status; 18663af08d82Slm66018 vio_msg_t vio_msg; 18673af08d82Slm66018 18683af08d82Slm66018 if (status = vdc_init_dring_negotiate(vdcp)) 18693af08d82Slm66018 return (status); 18703af08d82Slm66018 18713af08d82Slm66018 /* release lock and wait for response */ 18723af08d82Slm66018 mutex_exit(&vdcp->lock); 18733af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 18743af08d82Slm66018 mutex_enter(&vdcp->lock); 18753af08d82Slm66018 if (status) { 18763af08d82Slm66018 DMSG(vdcp, 0, 18773af08d82Slm66018 "[%d] Failed waiting for Dring negotiation response," 18783af08d82Slm66018 " rv(%d)", vdcp->instance, status); 18793af08d82Slm66018 return (status); 18803af08d82Slm66018 } 18813af08d82Slm66018 18823af08d82Slm66018 /* check type and sub_type ... */ 18833af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 18843af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 18853af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 18863af08d82Slm66018 vdcp->instance); 18873af08d82Slm66018 return (EPROTO); 18883af08d82Slm66018 } 18893af08d82Slm66018 18903af08d82Slm66018 return (vdc_handle_dring_reg_msg(vdcp, 18913af08d82Slm66018 (vio_dring_reg_msg_t *)&vio_msg)); 18923af08d82Slm66018 } 18933af08d82Slm66018 18943af08d82Slm66018 18953af08d82Slm66018 /* 18963af08d82Slm66018 * Function: 18973af08d82Slm66018 * vdc_send_rdx() 18983af08d82Slm66018 * 18993af08d82Slm66018 * Description: 19003af08d82Slm66018 * 19013af08d82Slm66018 * Arguments: 19023af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19033af08d82Slm66018 * 19043af08d82Slm66018 * Return Code: 19053af08d82Slm66018 * 0 - Success 19063af08d82Slm66018 */ 19073af08d82Slm66018 static int 19083af08d82Slm66018 vdc_send_rdx(vdc_t *vdcp) 19093af08d82Slm66018 { 19103af08d82Slm66018 vio_msg_t msg; 19113af08d82Slm66018 size_t msglen = sizeof (vio_msg_t); 19123af08d82Slm66018 int status; 19133af08d82Slm66018 19143af08d82Slm66018 /* 19153af08d82Slm66018 * Send an RDX message to vds to indicate we are ready 19163af08d82Slm66018 * to send data 19173af08d82Slm66018 */ 19183af08d82Slm66018 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 19193af08d82Slm66018 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 19203af08d82Slm66018 msg.tag.vio_subtype_env = VIO_RDX; 19213af08d82Slm66018 msg.tag.vio_sid = vdcp->session_id; 19223af08d82Slm66018 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 19233af08d82Slm66018 if (status != 0) { 19243af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 19253af08d82Slm66018 vdcp->instance, status); 19263af08d82Slm66018 } 19273af08d82Slm66018 19283af08d82Slm66018 return (status); 19293af08d82Slm66018 } 19303af08d82Slm66018 19313af08d82Slm66018 /* 19323af08d82Slm66018 * Function: 19333af08d82Slm66018 * vdc_handle_rdx() 19343af08d82Slm66018 * 19353af08d82Slm66018 * Description: 19363af08d82Slm66018 * 19373af08d82Slm66018 * Arguments: 19383af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19393af08d82Slm66018 * msgp - received msg 19403af08d82Slm66018 * 19413af08d82Slm66018 * Return Code: 19423af08d82Slm66018 * 0 - Success 19433af08d82Slm66018 */ 19443af08d82Slm66018 static int 19453af08d82Slm66018 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 19463af08d82Slm66018 { 19473af08d82Slm66018 _NOTE(ARGUNUSED(vdcp)) 19483af08d82Slm66018 _NOTE(ARGUNUSED(msgp)) 19493af08d82Slm66018 19503af08d82Slm66018 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 19513af08d82Slm66018 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 19523af08d82Slm66018 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 19533af08d82Slm66018 19543af08d82Slm66018 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 19553af08d82Slm66018 19563af08d82Slm66018 return (0); 19573af08d82Slm66018 } 19583af08d82Slm66018 19593af08d82Slm66018 /* 19603af08d82Slm66018 * Function: 19613af08d82Slm66018 * vdc_rdx_exchange() 19623af08d82Slm66018 * 19633af08d82Slm66018 * Description: 19643af08d82Slm66018 * 19653af08d82Slm66018 * Arguments: 19663af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19673af08d82Slm66018 * 19683af08d82Slm66018 * Return Code: 19693af08d82Slm66018 * 0 - Success 19703af08d82Slm66018 */ 19713af08d82Slm66018 static int 19723af08d82Slm66018 vdc_rdx_exchange(vdc_t *vdcp) 19733af08d82Slm66018 { 19743af08d82Slm66018 int status; 19753af08d82Slm66018 vio_msg_t vio_msg; 19763af08d82Slm66018 19773af08d82Slm66018 if (status = vdc_send_rdx(vdcp)) 19783af08d82Slm66018 return (status); 19793af08d82Slm66018 19803af08d82Slm66018 /* release lock and wait for response */ 19813af08d82Slm66018 mutex_exit(&vdcp->lock); 19823af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 19833af08d82Slm66018 mutex_enter(&vdcp->lock); 19843af08d82Slm66018 if (status) { 198587a7269eSachartre DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 198687a7269eSachartre vdcp->instance, status); 19873af08d82Slm66018 return (status); 19883af08d82Slm66018 } 19893af08d82Slm66018 19903af08d82Slm66018 /* check type and sub_type ... */ 19913af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 19923af08d82Slm66018 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 199387a7269eSachartre DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 19943af08d82Slm66018 return (EPROTO); 19953af08d82Slm66018 } 19963af08d82Slm66018 19973af08d82Slm66018 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 19983af08d82Slm66018 } 19993af08d82Slm66018 20003af08d82Slm66018 20011ae08745Sheppo /* -------------------------------------------------------------------------- */ 20021ae08745Sheppo 20031ae08745Sheppo /* 20041ae08745Sheppo * LDC helper routines 20051ae08745Sheppo */ 20061ae08745Sheppo 20073af08d82Slm66018 static int 20083af08d82Slm66018 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 20093af08d82Slm66018 { 20103af08d82Slm66018 int status; 20113af08d82Slm66018 boolean_t q_has_pkts = B_FALSE; 201217cadca8Slm66018 uint64_t delay_time; 20133af08d82Slm66018 size_t len; 20143af08d82Slm66018 20153af08d82Slm66018 mutex_enter(&vdc->read_lock); 20163af08d82Slm66018 20173af08d82Slm66018 if (vdc->read_state == VDC_READ_IDLE) 20183af08d82Slm66018 vdc->read_state = VDC_READ_WAITING; 20193af08d82Slm66018 20203af08d82Slm66018 while (vdc->read_state != VDC_READ_PENDING) { 20213af08d82Slm66018 20223af08d82Slm66018 /* detect if the connection has been reset */ 20233af08d82Slm66018 if (vdc->read_state == VDC_READ_RESET) { 20243af08d82Slm66018 status = ECONNRESET; 20253af08d82Slm66018 goto done; 20263af08d82Slm66018 } 20273af08d82Slm66018 20283af08d82Slm66018 cv_wait(&vdc->read_cv, &vdc->read_lock); 20293af08d82Slm66018 } 20303af08d82Slm66018 20313af08d82Slm66018 /* 20323af08d82Slm66018 * Until we get a blocking ldc read we have to retry 20333af08d82Slm66018 * until the entire LDC message has arrived before 20343af08d82Slm66018 * ldc_read() will succeed. Note we also bail out if 2035eff7243fSlm66018 * the channel is reset or goes away. 20363af08d82Slm66018 */ 20373af08d82Slm66018 delay_time = vdc_ldc_read_init_delay; 20383af08d82Slm66018 loop: 20393af08d82Slm66018 len = *nbytesp; 20403af08d82Slm66018 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 20413af08d82Slm66018 switch (status) { 20423af08d82Slm66018 case EAGAIN: 20433af08d82Slm66018 delay_time *= 2; 20443af08d82Slm66018 if (delay_time >= vdc_ldc_read_max_delay) 20453af08d82Slm66018 delay_time = vdc_ldc_read_max_delay; 20463af08d82Slm66018 delay(delay_time); 20473af08d82Slm66018 goto loop; 20483af08d82Slm66018 20493af08d82Slm66018 case 0: 20503af08d82Slm66018 if (len == 0) { 205117cadca8Slm66018 DMSG(vdc, 1, "[%d] ldc_read returned 0 bytes with " 20523af08d82Slm66018 "no error!\n", vdc->instance); 20533af08d82Slm66018 goto loop; 20543af08d82Slm66018 } 20553af08d82Slm66018 20563af08d82Slm66018 *nbytesp = len; 20573af08d82Slm66018 20583af08d82Slm66018 /* 20593af08d82Slm66018 * If there are pending messages, leave the 20603af08d82Slm66018 * read state as pending. Otherwise, set the state 20613af08d82Slm66018 * back to idle. 20623af08d82Slm66018 */ 20633af08d82Slm66018 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 20643af08d82Slm66018 if (status == 0 && !q_has_pkts) 20653af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 20663af08d82Slm66018 20673af08d82Slm66018 break; 20683af08d82Slm66018 default: 20693af08d82Slm66018 DMSG(vdc, 0, "ldc_read returned %d\n", status); 20703af08d82Slm66018 break; 20713af08d82Slm66018 } 20723af08d82Slm66018 20733af08d82Slm66018 done: 20743af08d82Slm66018 mutex_exit(&vdc->read_lock); 20753af08d82Slm66018 20763af08d82Slm66018 return (status); 20773af08d82Slm66018 } 20783af08d82Slm66018 20793af08d82Slm66018 20803af08d82Slm66018 20813af08d82Slm66018 #ifdef DEBUG 20823af08d82Slm66018 void 20833af08d82Slm66018 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 20843af08d82Slm66018 { 20853af08d82Slm66018 char *ms, *ss, *ses; 20863af08d82Slm66018 switch (msg->tag.vio_msgtype) { 20873af08d82Slm66018 #define Q(_s) case _s : ms = #_s; break; 20883af08d82Slm66018 Q(VIO_TYPE_CTRL) 20893af08d82Slm66018 Q(VIO_TYPE_DATA) 20903af08d82Slm66018 Q(VIO_TYPE_ERR) 20913af08d82Slm66018 #undef Q 20923af08d82Slm66018 default: ms = "unknown"; break; 20933af08d82Slm66018 } 20943af08d82Slm66018 20953af08d82Slm66018 switch (msg->tag.vio_subtype) { 20963af08d82Slm66018 #define Q(_s) case _s : ss = #_s; break; 20973af08d82Slm66018 Q(VIO_SUBTYPE_INFO) 20983af08d82Slm66018 Q(VIO_SUBTYPE_ACK) 20993af08d82Slm66018 Q(VIO_SUBTYPE_NACK) 21003af08d82Slm66018 #undef Q 21013af08d82Slm66018 default: ss = "unknown"; break; 21023af08d82Slm66018 } 21033af08d82Slm66018 21043af08d82Slm66018 switch (msg->tag.vio_subtype_env) { 21053af08d82Slm66018 #define Q(_s) case _s : ses = #_s; break; 21063af08d82Slm66018 Q(VIO_VER_INFO) 21073af08d82Slm66018 Q(VIO_ATTR_INFO) 21083af08d82Slm66018 Q(VIO_DRING_REG) 21093af08d82Slm66018 Q(VIO_DRING_UNREG) 21103af08d82Slm66018 Q(VIO_RDX) 21113af08d82Slm66018 Q(VIO_PKT_DATA) 21123af08d82Slm66018 Q(VIO_DESC_DATA) 21133af08d82Slm66018 Q(VIO_DRING_DATA) 21143af08d82Slm66018 #undef Q 21153af08d82Slm66018 default: ses = "unknown"; break; 21163af08d82Slm66018 } 21173af08d82Slm66018 21183af08d82Slm66018 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 21193af08d82Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 21203af08d82Slm66018 msg->tag.vio_subtype_env, ms, ss, ses); 21213af08d82Slm66018 } 21223af08d82Slm66018 #endif 21233af08d82Slm66018 21241ae08745Sheppo /* 21251ae08745Sheppo * Function: 21261ae08745Sheppo * vdc_send() 21271ae08745Sheppo * 21281ae08745Sheppo * Description: 21291ae08745Sheppo * The function encapsulates the call to write a message using LDC. 21301ae08745Sheppo * If LDC indicates that the call failed due to the queue being full, 213117cadca8Slm66018 * we retry the ldc_write(), otherwise we return the error returned by LDC. 21321ae08745Sheppo * 21331ae08745Sheppo * Arguments: 21341ae08745Sheppo * ldc_handle - LDC handle for the channel this instance of vdc uses 21351ae08745Sheppo * pkt - address of LDC message to be sent 21361ae08745Sheppo * msglen - the size of the message being sent. When the function 21371ae08745Sheppo * returns, this contains the number of bytes written. 21381ae08745Sheppo * 21391ae08745Sheppo * Return Code: 21401ae08745Sheppo * 0 - Success. 21411ae08745Sheppo * EINVAL - pkt or msglen were NULL 21421ae08745Sheppo * ECONNRESET - The connection was not up. 21431ae08745Sheppo * EWOULDBLOCK - LDC queue is full 21441ae08745Sheppo * xxx - other error codes returned by ldc_write 21451ae08745Sheppo */ 21461ae08745Sheppo static int 21470a55fbb7Slm66018 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 21481ae08745Sheppo { 21491ae08745Sheppo size_t size = 0; 21501ae08745Sheppo int status = 0; 21513af08d82Slm66018 clock_t delay_ticks; 21521ae08745Sheppo 21530a55fbb7Slm66018 ASSERT(vdc != NULL); 21540a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 21551ae08745Sheppo ASSERT(msglen != NULL); 21561ae08745Sheppo ASSERT(*msglen != 0); 21571ae08745Sheppo 21583af08d82Slm66018 #ifdef DEBUG 215917cadca8Slm66018 vdc_decode_tag(vdc, (vio_msg_t *)(uintptr_t)pkt); 21603af08d82Slm66018 #endif 21613af08d82Slm66018 /* 21623af08d82Slm66018 * Wait indefinitely to send if channel 21633af08d82Slm66018 * is busy, but bail out if we succeed or 21643af08d82Slm66018 * if the channel closes or is reset. 21653af08d82Slm66018 */ 21663af08d82Slm66018 delay_ticks = vdc_hz_min_ldc_delay; 21671ae08745Sheppo do { 21681ae08745Sheppo size = *msglen; 21690a55fbb7Slm66018 status = ldc_write(vdc->ldc_handle, pkt, &size); 21703af08d82Slm66018 if (status == EWOULDBLOCK) { 21713af08d82Slm66018 delay(delay_ticks); 21723af08d82Slm66018 /* geometric backoff */ 21733af08d82Slm66018 delay_ticks *= 2; 21743af08d82Slm66018 if (delay_ticks > vdc_hz_max_ldc_delay) 21753af08d82Slm66018 delay_ticks = vdc_hz_max_ldc_delay; 21763af08d82Slm66018 } 21773af08d82Slm66018 } while (status == EWOULDBLOCK); 21781ae08745Sheppo 21790a55fbb7Slm66018 /* if LDC had serious issues --- reset vdc state */ 21800a55fbb7Slm66018 if (status == EIO || status == ECONNRESET) { 21813af08d82Slm66018 /* LDC had serious issues --- reset vdc state */ 21823af08d82Slm66018 mutex_enter(&vdc->read_lock); 21833af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 21843af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 21853af08d82Slm66018 cv_signal(&vdc->read_cv); 21863af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 21873af08d82Slm66018 mutex_exit(&vdc->read_lock); 21883af08d82Slm66018 21893af08d82Slm66018 /* wake up any waiters in the reset thread */ 21903af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 21913af08d82Slm66018 DMSG(vdc, 0, "[%d] write reset - " 21923af08d82Slm66018 "vdc is resetting ..\n", vdc->instance); 21933af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 21943af08d82Slm66018 cv_signal(&vdc->initwait_cv); 21953af08d82Slm66018 } 21963af08d82Slm66018 21973af08d82Slm66018 return (ECONNRESET); 21980a55fbb7Slm66018 } 21990a55fbb7Slm66018 22001ae08745Sheppo /* return the last size written */ 22011ae08745Sheppo *msglen = size; 22021ae08745Sheppo 22031ae08745Sheppo return (status); 22041ae08745Sheppo } 22051ae08745Sheppo 22061ae08745Sheppo /* 22071ae08745Sheppo * Function: 2208655fd6a9Sachartre * vdc_get_md_node 22091ae08745Sheppo * 22101ae08745Sheppo * Description: 2211655fd6a9Sachartre * Get the MD, the device node and the port node for the given 2212655fd6a9Sachartre * disk instance. The caller is responsible for cleaning up the 2213655fd6a9Sachartre * reference to the returned MD (mdpp) by calling md_fini_handle(). 22141ae08745Sheppo * 22151ae08745Sheppo * Arguments: 22161ae08745Sheppo * dip - dev info pointer for this instance of the device driver. 2217655fd6a9Sachartre * mdpp - the returned MD. 2218655fd6a9Sachartre * vd_nodep - the returned device node. 2219655fd6a9Sachartre * vd_portp - the returned port node. The returned port node is NULL 2220655fd6a9Sachartre * if no port node is found. 22211ae08745Sheppo * 22221ae08745Sheppo * Return Code: 22231ae08745Sheppo * 0 - Success. 22241ae08745Sheppo * ENOENT - Expected node or property did not exist. 22251ae08745Sheppo * ENXIO - Unexpected error communicating with MD framework 22261ae08745Sheppo */ 22271ae08745Sheppo static int 2228655fd6a9Sachartre vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep, 2229655fd6a9Sachartre mde_cookie_t *vd_portp) 22301ae08745Sheppo { 22311ae08745Sheppo int status = ENOENT; 22321ae08745Sheppo char *node_name = NULL; 22331ae08745Sheppo md_t *mdp = NULL; 22341ae08745Sheppo int num_nodes; 22351ae08745Sheppo int num_vdevs; 2236655fd6a9Sachartre int num_vports; 22371ae08745Sheppo mde_cookie_t rootnode; 22381ae08745Sheppo mde_cookie_t *listp = NULL; 22391ae08745Sheppo boolean_t found_inst = B_FALSE; 22401ae08745Sheppo int listsz; 22411ae08745Sheppo int idx; 22421ae08745Sheppo uint64_t md_inst; 22431ae08745Sheppo int obp_inst; 22441ae08745Sheppo int instance = ddi_get_instance(dip); 22451ae08745Sheppo 22461ae08745Sheppo /* 22471ae08745Sheppo * Get the OBP instance number for comparison with the MD instance 22481ae08745Sheppo * 22491ae08745Sheppo * The "cfg-handle" property of a vdc node in an MD contains the MD's 22501ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 22511ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 22521ae08745Sheppo * the "reg" property on the node in the device tree it builds from 22531ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 22541ae08745Sheppo * "reg" property value to uniquely identify this device instance. 22551ae08745Sheppo * If the "reg" property cannot be found, the device tree state is 22561ae08745Sheppo * presumably so broken that there is no point in continuing. 22571ae08745Sheppo */ 22581ae08745Sheppo if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 22591ae08745Sheppo cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 22601ae08745Sheppo return (ENOENT); 22611ae08745Sheppo } 22621ae08745Sheppo obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 22631ae08745Sheppo OBP_REG, -1); 22643af08d82Slm66018 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 22651ae08745Sheppo 22661ae08745Sheppo /* 2267655fd6a9Sachartre * We now walk the MD nodes to find the node for this vdisk. 22681ae08745Sheppo */ 22691ae08745Sheppo if ((mdp = md_get_handle()) == NULL) { 22701ae08745Sheppo cmn_err(CE_WARN, "unable to init machine description"); 22711ae08745Sheppo return (ENXIO); 22721ae08745Sheppo } 22731ae08745Sheppo 22741ae08745Sheppo num_nodes = md_node_count(mdp); 22751ae08745Sheppo ASSERT(num_nodes > 0); 22761ae08745Sheppo 22771ae08745Sheppo listsz = num_nodes * sizeof (mde_cookie_t); 22781ae08745Sheppo 22791ae08745Sheppo /* allocate memory for nodes */ 22801ae08745Sheppo listp = kmem_zalloc(listsz, KM_SLEEP); 22811ae08745Sheppo 22821ae08745Sheppo rootnode = md_root_node(mdp); 22831ae08745Sheppo ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 22841ae08745Sheppo 22851ae08745Sheppo /* 22861ae08745Sheppo * Search for all the virtual devices, we will then check to see which 22871ae08745Sheppo * ones are disk nodes. 22881ae08745Sheppo */ 22891ae08745Sheppo num_vdevs = md_scan_dag(mdp, rootnode, 22901ae08745Sheppo md_find_name(mdp, VDC_MD_VDEV_NAME), 22911ae08745Sheppo md_find_name(mdp, "fwd"), listp); 22921ae08745Sheppo 22931ae08745Sheppo if (num_vdevs <= 0) { 22941ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 22951ae08745Sheppo status = ENOENT; 22961ae08745Sheppo goto done; 22971ae08745Sheppo } 22981ae08745Sheppo 22993af08d82Slm66018 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 23001ae08745Sheppo for (idx = 0; idx < num_vdevs; idx++) { 23011ae08745Sheppo status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 23021ae08745Sheppo if ((status != 0) || (node_name == NULL)) { 23031ae08745Sheppo cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 23041ae08745Sheppo ": err %d", VDC_MD_VDEV_NAME, status); 23051ae08745Sheppo continue; 23061ae08745Sheppo } 23071ae08745Sheppo 23083af08d82Slm66018 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 23091ae08745Sheppo if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 23101ae08745Sheppo status = md_get_prop_val(mdp, listp[idx], 23111ae08745Sheppo VDC_MD_CFG_HDL, &md_inst); 23123af08d82Slm66018 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 23133af08d82Slm66018 instance, md_inst); 23141ae08745Sheppo if ((status == 0) && (md_inst == obp_inst)) { 23151ae08745Sheppo found_inst = B_TRUE; 23161ae08745Sheppo break; 23171ae08745Sheppo } 23181ae08745Sheppo } 23191ae08745Sheppo } 23201ae08745Sheppo 23210a55fbb7Slm66018 if (!found_inst) { 23223af08d82Slm66018 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 23231ae08745Sheppo status = ENOENT; 23241ae08745Sheppo goto done; 23251ae08745Sheppo } 23263af08d82Slm66018 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 23271ae08745Sheppo 2328655fd6a9Sachartre *vd_nodep = listp[idx]; 2329655fd6a9Sachartre *mdpp = mdp; 2330655fd6a9Sachartre 2331655fd6a9Sachartre num_vports = md_scan_dag(mdp, *vd_nodep, 2332655fd6a9Sachartre md_find_name(mdp, VDC_MD_PORT_NAME), 2333655fd6a9Sachartre md_find_name(mdp, "fwd"), listp); 2334655fd6a9Sachartre 2335655fd6a9Sachartre if (num_vports != 1) { 2336655fd6a9Sachartre DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2337655fd6a9Sachartre VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME, num_vports); 2338655fd6a9Sachartre } 2339655fd6a9Sachartre 2340655fd6a9Sachartre *vd_portp = (num_vports == 0)? NULL: listp[0]; 2341655fd6a9Sachartre 2342655fd6a9Sachartre done: 2343655fd6a9Sachartre kmem_free(listp, listsz); 2344655fd6a9Sachartre return (status); 2345655fd6a9Sachartre } 2346655fd6a9Sachartre 2347655fd6a9Sachartre /* 2348655fd6a9Sachartre * Function: 2349655fd6a9Sachartre * vdc_get_ldc_id() 2350655fd6a9Sachartre * 2351655fd6a9Sachartre * Description: 2352655fd6a9Sachartre * This function gets the 'ldc-id' for this particular instance of vdc. 2353655fd6a9Sachartre * The id returned is the guest domain channel endpoint LDC uses for 2354655fd6a9Sachartre * communication with vds. 2355655fd6a9Sachartre * 2356655fd6a9Sachartre * Arguments: 2357655fd6a9Sachartre * mdp - pointer to the machine description. 2358655fd6a9Sachartre * vd_node - the vdisk element from the MD. 2359655fd6a9Sachartre * ldc_id - pointer to variable used to return the 'ldc-id' found. 2360655fd6a9Sachartre * 2361655fd6a9Sachartre * Return Code: 2362655fd6a9Sachartre * 0 - Success. 2363655fd6a9Sachartre * ENOENT - Expected node or property did not exist. 2364655fd6a9Sachartre */ 2365655fd6a9Sachartre static int 2366655fd6a9Sachartre vdc_get_ldc_id(md_t *mdp, mde_cookie_t vd_node, uint64_t *ldc_id) 2367655fd6a9Sachartre { 2368655fd6a9Sachartre mde_cookie_t *chanp = NULL; 2369655fd6a9Sachartre int listsz; 2370655fd6a9Sachartre int num_chans; 2371655fd6a9Sachartre int num_nodes; 2372655fd6a9Sachartre int status = 0; 2373655fd6a9Sachartre 2374655fd6a9Sachartre num_nodes = md_node_count(mdp); 2375655fd6a9Sachartre ASSERT(num_nodes > 0); 2376655fd6a9Sachartre 2377655fd6a9Sachartre listsz = num_nodes * sizeof (mde_cookie_t); 2378655fd6a9Sachartre 2379655fd6a9Sachartre /* allocate memory for nodes */ 2380655fd6a9Sachartre chanp = kmem_zalloc(listsz, KM_SLEEP); 2381655fd6a9Sachartre 23821ae08745Sheppo /* get the channels for this node */ 2383655fd6a9Sachartre num_chans = md_scan_dag(mdp, vd_node, 23841ae08745Sheppo md_find_name(mdp, VDC_MD_CHAN_NAME), 23851ae08745Sheppo md_find_name(mdp, "fwd"), chanp); 23861ae08745Sheppo 23871ae08745Sheppo /* expecting at least one channel */ 23881ae08745Sheppo if (num_chans <= 0) { 23891ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node for '%s' port", 23901ae08745Sheppo VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 23911ae08745Sheppo status = ENOENT; 23921ae08745Sheppo goto done; 23931ae08745Sheppo 23941ae08745Sheppo } else if (num_chans != 1) { 2395655fd6a9Sachartre DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2396655fd6a9Sachartre VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, num_chans); 23971ae08745Sheppo } 23981ae08745Sheppo 23991ae08745Sheppo /* 24001ae08745Sheppo * We use the first channel found (index 0), irrespective of how 24011ae08745Sheppo * many are there in total. 24021ae08745Sheppo */ 2403655fd6a9Sachartre if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, ldc_id) != 0) { 2404655fd6a9Sachartre cmn_err(CE_NOTE, "Channel '%s' property not found", VDC_MD_ID); 24051ae08745Sheppo status = ENOENT; 24061ae08745Sheppo } 24071ae08745Sheppo 24081ae08745Sheppo done: 24091ae08745Sheppo kmem_free(chanp, listsz); 24101ae08745Sheppo return (status); 24111ae08745Sheppo } 24121ae08745Sheppo 24130a55fbb7Slm66018 static int 24140a55fbb7Slm66018 vdc_do_ldc_up(vdc_t *vdc) 24150a55fbb7Slm66018 { 24160a55fbb7Slm66018 int status; 24173af08d82Slm66018 ldc_status_t ldc_state; 24180a55fbb7Slm66018 24193af08d82Slm66018 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 24203af08d82Slm66018 vdc->instance, vdc->ldc_id); 24213af08d82Slm66018 24223af08d82Slm66018 if (vdc->lifecycle == VDC_LC_DETACHING) 24233af08d82Slm66018 return (EINVAL); 24240a55fbb7Slm66018 24250a55fbb7Slm66018 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 24260a55fbb7Slm66018 switch (status) { 24270a55fbb7Slm66018 case ECONNREFUSED: /* listener not ready at other end */ 24283af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 2429e1ebb9ecSlm66018 vdc->instance, vdc->ldc_id, status); 24300a55fbb7Slm66018 status = 0; 24310a55fbb7Slm66018 break; 24320a55fbb7Slm66018 default: 24333af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 24343af08d82Slm66018 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 24353af08d82Slm66018 status); 24363af08d82Slm66018 break; 24373af08d82Slm66018 } 24383af08d82Slm66018 } 24393af08d82Slm66018 24403af08d82Slm66018 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 24413af08d82Slm66018 vdc->ldc_state = ldc_state; 24423af08d82Slm66018 if (ldc_state == LDC_UP) { 24433af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC channel already up\n", 24443af08d82Slm66018 vdc->instance); 24453af08d82Slm66018 vdc->seq_num = 1; 24463af08d82Slm66018 vdc->seq_num_reply = 0; 24470a55fbb7Slm66018 } 24480a55fbb7Slm66018 } 24490a55fbb7Slm66018 24500a55fbb7Slm66018 return (status); 24510a55fbb7Slm66018 } 24520a55fbb7Slm66018 24530a55fbb7Slm66018 /* 24540a55fbb7Slm66018 * Function: 24550a55fbb7Slm66018 * vdc_terminate_ldc() 24560a55fbb7Slm66018 * 24570a55fbb7Slm66018 * Description: 24580a55fbb7Slm66018 * 24590a55fbb7Slm66018 * Arguments: 24600a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 24610a55fbb7Slm66018 * 24620a55fbb7Slm66018 * Return Code: 24630a55fbb7Slm66018 * None 24640a55fbb7Slm66018 */ 24651ae08745Sheppo static void 24661ae08745Sheppo vdc_terminate_ldc(vdc_t *vdc) 24671ae08745Sheppo { 24681ae08745Sheppo int instance = ddi_get_instance(vdc->dip); 24691ae08745Sheppo 24701ae08745Sheppo ASSERT(vdc != NULL); 24711ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 24721ae08745Sheppo 24733af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 24741ae08745Sheppo 24751ae08745Sheppo if (vdc->initialized & VDC_LDC_OPEN) { 24763af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 24771ae08745Sheppo (void) ldc_close(vdc->ldc_handle); 24781ae08745Sheppo } 24791ae08745Sheppo if (vdc->initialized & VDC_LDC_CB) { 24803af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 24811ae08745Sheppo (void) ldc_unreg_callback(vdc->ldc_handle); 24821ae08745Sheppo } 24831ae08745Sheppo if (vdc->initialized & VDC_LDC) { 24843af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 24851ae08745Sheppo (void) ldc_fini(vdc->ldc_handle); 24861ae08745Sheppo vdc->ldc_handle = NULL; 24871ae08745Sheppo } 24881ae08745Sheppo 24891ae08745Sheppo vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 24901ae08745Sheppo } 24911ae08745Sheppo 24921ae08745Sheppo /* -------------------------------------------------------------------------- */ 24931ae08745Sheppo 24941ae08745Sheppo /* 24951ae08745Sheppo * Descriptor Ring helper routines 24961ae08745Sheppo */ 24971ae08745Sheppo 24980a55fbb7Slm66018 /* 24990a55fbb7Slm66018 * Function: 25000a55fbb7Slm66018 * vdc_init_descriptor_ring() 25010a55fbb7Slm66018 * 25020a55fbb7Slm66018 * Description: 25030a55fbb7Slm66018 * 25040a55fbb7Slm66018 * Arguments: 25050a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 25060a55fbb7Slm66018 * 25070a55fbb7Slm66018 * Return Code: 25080a55fbb7Slm66018 * 0 - Success 25090a55fbb7Slm66018 */ 25101ae08745Sheppo static int 25111ae08745Sheppo vdc_init_descriptor_ring(vdc_t *vdc) 25121ae08745Sheppo { 25131ae08745Sheppo vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 25140a55fbb7Slm66018 int status = 0; 25151ae08745Sheppo int i; 25161ae08745Sheppo 25173af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 25181ae08745Sheppo 25191ae08745Sheppo ASSERT(vdc != NULL); 25201ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 25211ae08745Sheppo ASSERT(vdc->ldc_handle != NULL); 25221ae08745Sheppo 2523e1ebb9ecSlm66018 /* ensure we have enough room to store max sized block */ 2524e1ebb9ecSlm66018 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2525e1ebb9ecSlm66018 25260a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 25273af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2528e1ebb9ecSlm66018 /* 2529e1ebb9ecSlm66018 * Calculate the maximum block size we can transmit using one 2530e1ebb9ecSlm66018 * Descriptor Ring entry from the attributes returned by the 2531e1ebb9ecSlm66018 * vDisk server. This is subject to a minimum of 'maxphys' 2532e1ebb9ecSlm66018 * as we do not have the capability to split requests over 2533e1ebb9ecSlm66018 * multiple DRing entries. 2534e1ebb9ecSlm66018 */ 2535e1ebb9ecSlm66018 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 25363af08d82Slm66018 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2537e1ebb9ecSlm66018 vdc->instance); 2538e1ebb9ecSlm66018 vdc->dring_max_cookies = maxphys / PAGESIZE; 2539e1ebb9ecSlm66018 } else { 2540e1ebb9ecSlm66018 vdc->dring_max_cookies = 2541e1ebb9ecSlm66018 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2542e1ebb9ecSlm66018 } 2543e1ebb9ecSlm66018 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2544e1ebb9ecSlm66018 (sizeof (ldc_mem_cookie_t) * 2545e1ebb9ecSlm66018 (vdc->dring_max_cookies - 1))); 2546e1ebb9ecSlm66018 vdc->dring_len = VD_DRING_LEN; 2547e1ebb9ecSlm66018 2548e1ebb9ecSlm66018 status = ldc_mem_dring_create(vdc->dring_len, 2549e1ebb9ecSlm66018 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 25501ae08745Sheppo if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 25513af08d82Slm66018 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2552e1ebb9ecSlm66018 vdc->instance); 25531ae08745Sheppo return (status); 25541ae08745Sheppo } 25550a55fbb7Slm66018 vdc->initialized |= VDC_DRING_INIT; 25560a55fbb7Slm66018 } 25571ae08745Sheppo 25580a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 25593af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 25600a55fbb7Slm66018 vdc->dring_cookie = 25610a55fbb7Slm66018 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 25621ae08745Sheppo 25631ae08745Sheppo status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 25644bac2208Snarayan LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 25650a55fbb7Slm66018 &vdc->dring_cookie[0], 25661ae08745Sheppo &vdc->dring_cookie_count); 25671ae08745Sheppo if (status != 0) { 25683af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 25693af08d82Slm66018 "(%lx) to channel (%lx) status=%d\n", 25703af08d82Slm66018 vdc->instance, vdc->ldc_dring_hdl, 25713af08d82Slm66018 vdc->ldc_handle, status); 25721ae08745Sheppo return (status); 25731ae08745Sheppo } 25741ae08745Sheppo ASSERT(vdc->dring_cookie_count == 1); 25751ae08745Sheppo vdc->initialized |= VDC_DRING_BOUND; 25760a55fbb7Slm66018 } 25771ae08745Sheppo 25781ae08745Sheppo status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 25791ae08745Sheppo if (status != 0) { 25803af08d82Slm66018 DMSG(vdc, 0, 25813af08d82Slm66018 "[%d] Failed to get info for descriptor ring (%lx)\n", 2582e1ebb9ecSlm66018 vdc->instance, vdc->ldc_dring_hdl); 25831ae08745Sheppo return (status); 25841ae08745Sheppo } 25851ae08745Sheppo 25860a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 25873af08d82Slm66018 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 25880a55fbb7Slm66018 25891ae08745Sheppo /* Allocate the local copy of this dring */ 25900a55fbb7Slm66018 vdc->local_dring = 2591e1ebb9ecSlm66018 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 25921ae08745Sheppo KM_SLEEP); 25931ae08745Sheppo vdc->initialized |= VDC_DRING_LOCAL; 25940a55fbb7Slm66018 } 25951ae08745Sheppo 25961ae08745Sheppo /* 25970a55fbb7Slm66018 * Mark all DRing entries as free and initialize the private 25980a55fbb7Slm66018 * descriptor's memory handles. If any entry is initialized, 25990a55fbb7Slm66018 * we need to free it later so we set the bit in 'initialized' 26000a55fbb7Slm66018 * at the start. 26011ae08745Sheppo */ 26021ae08745Sheppo vdc->initialized |= VDC_DRING_ENTRY; 2603e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 26041ae08745Sheppo dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 26051ae08745Sheppo dep->hdr.dstate = VIO_DESC_FREE; 26061ae08745Sheppo 26071ae08745Sheppo status = ldc_mem_alloc_handle(vdc->ldc_handle, 26081ae08745Sheppo &vdc->local_dring[i].desc_mhdl); 26091ae08745Sheppo if (status != 0) { 26103af08d82Slm66018 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 26111ae08745Sheppo " descriptor %d", vdc->instance, i); 26121ae08745Sheppo return (status); 26131ae08745Sheppo } 26143af08d82Slm66018 vdc->local_dring[i].is_free = B_TRUE; 26151ae08745Sheppo vdc->local_dring[i].dep = dep; 26161ae08745Sheppo } 26171ae08745Sheppo 26183af08d82Slm66018 /* Initialize the starting index */ 26193af08d82Slm66018 vdc->dring_curr_idx = 0; 26201ae08745Sheppo 26211ae08745Sheppo return (status); 26221ae08745Sheppo } 26231ae08745Sheppo 26240a55fbb7Slm66018 /* 26250a55fbb7Slm66018 * Function: 26260a55fbb7Slm66018 * vdc_destroy_descriptor_ring() 26270a55fbb7Slm66018 * 26280a55fbb7Slm66018 * Description: 26290a55fbb7Slm66018 * 26300a55fbb7Slm66018 * Arguments: 26310a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 26320a55fbb7Slm66018 * 26330a55fbb7Slm66018 * Return Code: 26340a55fbb7Slm66018 * None 26350a55fbb7Slm66018 */ 26361ae08745Sheppo static void 26371ae08745Sheppo vdc_destroy_descriptor_ring(vdc_t *vdc) 26381ae08745Sheppo { 26390a55fbb7Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 26401ae08745Sheppo ldc_mem_handle_t mhdl = NULL; 26413af08d82Slm66018 ldc_mem_info_t minfo; 26421ae08745Sheppo int status = -1; 26431ae08745Sheppo int i; /* loop */ 26441ae08745Sheppo 26451ae08745Sheppo ASSERT(vdc != NULL); 26461ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 26471ae08745Sheppo 26483af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 26491ae08745Sheppo 26501ae08745Sheppo if (vdc->initialized & VDC_DRING_ENTRY) { 26513af08d82Slm66018 DMSG(vdc, 0, 26523af08d82Slm66018 "[%d] Removing Local DRing entries\n", vdc->instance); 2653e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 26540a55fbb7Slm66018 ldep = &vdc->local_dring[i]; 26550a55fbb7Slm66018 mhdl = ldep->desc_mhdl; 26561ae08745Sheppo 26570a55fbb7Slm66018 if (mhdl == NULL) 26580a55fbb7Slm66018 continue; 26590a55fbb7Slm66018 26603af08d82Slm66018 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 26613af08d82Slm66018 DMSG(vdc, 0, 26623af08d82Slm66018 "ldc_mem_info returned an error: %d\n", 26633af08d82Slm66018 status); 26643af08d82Slm66018 26653af08d82Slm66018 /* 26663af08d82Slm66018 * This must mean that the mem handle 26673af08d82Slm66018 * is not valid. Clear it out so that 26683af08d82Slm66018 * no one tries to use it. 26693af08d82Slm66018 */ 26703af08d82Slm66018 ldep->desc_mhdl = NULL; 26713af08d82Slm66018 continue; 26723af08d82Slm66018 } 26733af08d82Slm66018 26743af08d82Slm66018 if (minfo.status == LDC_BOUND) { 26753af08d82Slm66018 (void) ldc_mem_unbind_handle(mhdl); 26763af08d82Slm66018 } 26773af08d82Slm66018 26781ae08745Sheppo (void) ldc_mem_free_handle(mhdl); 26793af08d82Slm66018 26803af08d82Slm66018 ldep->desc_mhdl = NULL; 26811ae08745Sheppo } 26821ae08745Sheppo vdc->initialized &= ~VDC_DRING_ENTRY; 26831ae08745Sheppo } 26841ae08745Sheppo 26851ae08745Sheppo if (vdc->initialized & VDC_DRING_LOCAL) { 26863af08d82Slm66018 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 26871ae08745Sheppo kmem_free(vdc->local_dring, 2688e1ebb9ecSlm66018 vdc->dring_len * sizeof (vdc_local_desc_t)); 26891ae08745Sheppo vdc->initialized &= ~VDC_DRING_LOCAL; 26901ae08745Sheppo } 26911ae08745Sheppo 26921ae08745Sheppo if (vdc->initialized & VDC_DRING_BOUND) { 26933af08d82Slm66018 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 26941ae08745Sheppo status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 26951ae08745Sheppo if (status == 0) { 26961ae08745Sheppo vdc->initialized &= ~VDC_DRING_BOUND; 26971ae08745Sheppo } else { 26983af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2699e1ebb9ecSlm66018 vdc->instance, status, vdc->ldc_dring_hdl); 27001ae08745Sheppo } 27013af08d82Slm66018 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 27021ae08745Sheppo } 27031ae08745Sheppo 27041ae08745Sheppo if (vdc->initialized & VDC_DRING_INIT) { 27053af08d82Slm66018 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 27061ae08745Sheppo status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 27071ae08745Sheppo if (status == 0) { 27081ae08745Sheppo vdc->ldc_dring_hdl = NULL; 27091ae08745Sheppo bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 27101ae08745Sheppo vdc->initialized &= ~VDC_DRING_INIT; 27111ae08745Sheppo } else { 27123af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2713e1ebb9ecSlm66018 vdc->instance, status, vdc->ldc_dring_hdl); 27141ae08745Sheppo } 27151ae08745Sheppo } 27161ae08745Sheppo } 27171ae08745Sheppo 27181ae08745Sheppo /* 27193af08d82Slm66018 * Function: 27203af08d82Slm66018 * vdc_map_to_shared_ring() 27211ae08745Sheppo * 27221ae08745Sheppo * Description: 27233af08d82Slm66018 * Copy contents of the local descriptor to the shared 27243af08d82Slm66018 * memory descriptor. 27251ae08745Sheppo * 27263af08d82Slm66018 * Arguments: 27273af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 27283af08d82Slm66018 * idx - descriptor ring index 27293af08d82Slm66018 * 27303af08d82Slm66018 * Return Code: 27313af08d82Slm66018 * None 27321ae08745Sheppo */ 27331ae08745Sheppo static int 27343af08d82Slm66018 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 27351ae08745Sheppo { 27363af08d82Slm66018 vdc_local_desc_t *ldep; 27373af08d82Slm66018 vd_dring_entry_t *dep; 27383af08d82Slm66018 int rv; 27391ae08745Sheppo 27403af08d82Slm66018 ldep = &(vdcp->local_dring[idx]); 27411ae08745Sheppo 27423af08d82Slm66018 /* for now leave in the old pop_mem_hdl stuff */ 27433af08d82Slm66018 if (ldep->nbytes > 0) { 27443af08d82Slm66018 rv = vdc_populate_mem_hdl(vdcp, ldep); 27453af08d82Slm66018 if (rv) { 27463af08d82Slm66018 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 27473af08d82Slm66018 vdcp->instance); 27483af08d82Slm66018 return (rv); 27493af08d82Slm66018 } 27503af08d82Slm66018 } 27511ae08745Sheppo 27523af08d82Slm66018 /* 27533af08d82Slm66018 * fill in the data details into the DRing 27543af08d82Slm66018 */ 2755d10e4ef2Snarayan dep = ldep->dep; 27561ae08745Sheppo ASSERT(dep != NULL); 27571ae08745Sheppo 27583af08d82Slm66018 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 27593af08d82Slm66018 dep->payload.operation = ldep->operation; 27603af08d82Slm66018 dep->payload.addr = ldep->offset; 27613af08d82Slm66018 dep->payload.nbytes = ldep->nbytes; 2762055d7c80Scarlsonj dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 27633af08d82Slm66018 dep->payload.slice = ldep->slice; 27643af08d82Slm66018 dep->hdr.dstate = VIO_DESC_READY; 27653af08d82Slm66018 dep->hdr.ack = 1; /* request an ACK for every message */ 27661ae08745Sheppo 27673af08d82Slm66018 return (0); 27681ae08745Sheppo } 27691ae08745Sheppo 27701ae08745Sheppo /* 27711ae08745Sheppo * Function: 27723af08d82Slm66018 * vdc_send_request 27733af08d82Slm66018 * 27743af08d82Slm66018 * Description: 27753af08d82Slm66018 * This routine writes the data to be transmitted to vds into the 27763af08d82Slm66018 * descriptor, notifies vds that the ring has been updated and 27773af08d82Slm66018 * then waits for the request to be processed. 27783af08d82Slm66018 * 27793af08d82Slm66018 * Arguments: 27803af08d82Slm66018 * vdcp - the soft state pointer 27813af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 27823af08d82Slm66018 * addr - address of data buf to be read/written. 27833af08d82Slm66018 * nbytes - number of bytes to read/write 27843af08d82Slm66018 * slice - the disk slice this request is for 27853af08d82Slm66018 * offset - relative disk offset 27863af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 27873af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 27883af08d82Slm66018 * . mode for ioctl(9e) 27893af08d82Slm66018 * . LP64 diskaddr_t (block I/O) 27903af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 27913af08d82Slm66018 * 27923af08d82Slm66018 * Return Codes: 27933af08d82Slm66018 * 0 27943af08d82Slm66018 * ENXIO 27953af08d82Slm66018 */ 27963af08d82Slm66018 static int 27973af08d82Slm66018 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 27983af08d82Slm66018 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 27993af08d82Slm66018 void *cb_arg, vio_desc_direction_t dir) 28003af08d82Slm66018 { 2801366a92acSlm66018 int rv = 0; 2802366a92acSlm66018 28033af08d82Slm66018 ASSERT(vdcp != NULL); 280487a7269eSachartre ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 28053af08d82Slm66018 28063af08d82Slm66018 mutex_enter(&vdcp->lock); 28073af08d82Slm66018 2808366a92acSlm66018 /* 2809366a92acSlm66018 * If this is a block read/write operation we update the I/O statistics 2810366a92acSlm66018 * to indicate that the request is being put on the waitq to be 2811366a92acSlm66018 * serviced. 2812366a92acSlm66018 * 2813366a92acSlm66018 * We do it here (a common routine for both synchronous and strategy 2814366a92acSlm66018 * calls) for performance reasons - we are already holding vdc->lock 2815366a92acSlm66018 * so there is no extra locking overhead. We would have to explicitly 2816366a92acSlm66018 * grab the 'lock' mutex to update the stats if we were to do this 2817366a92acSlm66018 * higher up the stack in vdc_strategy() et. al. 2818366a92acSlm66018 */ 2819366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2820366a92acSlm66018 DTRACE_IO1(start, buf_t *, cb_arg); 2821366a92acSlm66018 VD_KSTAT_WAITQ_ENTER(vdcp->io_stats); 2822366a92acSlm66018 } 2823366a92acSlm66018 28243af08d82Slm66018 do { 28253c96341aSnarayan while (vdcp->state != VDC_STATE_RUNNING) { 28263af08d82Slm66018 28273c96341aSnarayan /* return error if detaching */ 28283c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 2829366a92acSlm66018 rv = ENXIO; 2830366a92acSlm66018 goto done; 28313c96341aSnarayan } 2832655fd6a9Sachartre 2833655fd6a9Sachartre /* fail request if connection timeout is reached */ 2834655fd6a9Sachartre if (vdcp->ctimeout_reached) { 2835366a92acSlm66018 rv = EIO; 2836366a92acSlm66018 goto done; 2837655fd6a9Sachartre } 2838655fd6a9Sachartre 28392f5224aeSachartre /* 28402f5224aeSachartre * If we are panicking and the disk is not ready then 28412f5224aeSachartre * we can't send any request because we can't complete 28422f5224aeSachartre * the handshake now. 28432f5224aeSachartre */ 28442f5224aeSachartre if (ddi_in_panic()) { 2845366a92acSlm66018 rv = EIO; 2846366a92acSlm66018 goto done; 28472f5224aeSachartre } 28482f5224aeSachartre 2849655fd6a9Sachartre cv_wait(&vdcp->running_cv, &vdcp->lock); 28503c96341aSnarayan } 28513c96341aSnarayan 28523af08d82Slm66018 } while (vdc_populate_descriptor(vdcp, operation, addr, 28533af08d82Slm66018 nbytes, slice, offset, cb_type, cb_arg, dir)); 28543af08d82Slm66018 2855366a92acSlm66018 done: 2856366a92acSlm66018 /* 2857366a92acSlm66018 * If this is a block read/write we update the I/O statistics kstat 2858366a92acSlm66018 * to indicate that this request has been placed on the queue for 2859366a92acSlm66018 * processing (i.e sent to the vDisk server) - iostat(1M) will 2860366a92acSlm66018 * report the time waiting for the vDisk server under the %b column 2861366a92acSlm66018 * In the case of an error we simply take it off the wait queue. 2862366a92acSlm66018 */ 2863366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2864366a92acSlm66018 if (rv == 0) { 2865366a92acSlm66018 VD_KSTAT_WAITQ_TO_RUNQ(vdcp->io_stats); 2866366a92acSlm66018 DTRACE_PROBE1(send, buf_t *, cb_arg); 2867366a92acSlm66018 } else { 2868366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 2869366a92acSlm66018 VD_KSTAT_WAITQ_EXIT(vdcp->io_stats); 2870366a92acSlm66018 DTRACE_IO1(done, buf_t *, cb_arg); 2871366a92acSlm66018 } 2872366a92acSlm66018 } 2873366a92acSlm66018 28743af08d82Slm66018 mutex_exit(&vdcp->lock); 2875366a92acSlm66018 2876366a92acSlm66018 return (rv); 28773af08d82Slm66018 } 28783af08d82Slm66018 28793af08d82Slm66018 28803af08d82Slm66018 /* 28813af08d82Slm66018 * Function: 28821ae08745Sheppo * vdc_populate_descriptor 28831ae08745Sheppo * 28841ae08745Sheppo * Description: 28851ae08745Sheppo * This routine writes the data to be transmitted to vds into the 28861ae08745Sheppo * descriptor, notifies vds that the ring has been updated and 28871ae08745Sheppo * then waits for the request to be processed. 28881ae08745Sheppo * 28891ae08745Sheppo * Arguments: 28903af08d82Slm66018 * vdcp - the soft state pointer 28911ae08745Sheppo * operation - operation we want vds to perform (VD_OP_XXX) 28923af08d82Slm66018 * addr - address of data buf to be read/written. 28933af08d82Slm66018 * nbytes - number of bytes to read/write 28943af08d82Slm66018 * slice - the disk slice this request is for 28953af08d82Slm66018 * offset - relative disk offset 28963af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 28973af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 28981ae08745Sheppo * . mode for ioctl(9e) 28991ae08745Sheppo * . LP64 diskaddr_t (block I/O) 29003af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 29011ae08745Sheppo * 29021ae08745Sheppo * Return Codes: 29031ae08745Sheppo * 0 29041ae08745Sheppo * EAGAIN 290517cadca8Slm66018 * ECONNRESET 29061ae08745Sheppo * ENXIO 29071ae08745Sheppo */ 29081ae08745Sheppo static int 29093af08d82Slm66018 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 29103af08d82Slm66018 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 29113af08d82Slm66018 void *cb_arg, vio_desc_direction_t dir) 29121ae08745Sheppo { 29133af08d82Slm66018 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 29143af08d82Slm66018 int idx; /* Index of DRing entry used */ 29153af08d82Slm66018 int next_idx; 29161ae08745Sheppo vio_dring_msg_t dmsg; 29173af08d82Slm66018 size_t msglen; 29188e6a2a04Slm66018 int rv; 29191ae08745Sheppo 29203af08d82Slm66018 ASSERT(MUTEX_HELD(&vdcp->lock)); 29213af08d82Slm66018 vdcp->threads_pending++; 29223af08d82Slm66018 loop: 29233af08d82Slm66018 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 29241ae08745Sheppo 29253af08d82Slm66018 /* Get next available D-Ring entry */ 29263af08d82Slm66018 idx = vdcp->dring_curr_idx; 29273af08d82Slm66018 local_dep = &(vdcp->local_dring[idx]); 29281ae08745Sheppo 29293af08d82Slm66018 if (!local_dep->is_free) { 29303af08d82Slm66018 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 29313af08d82Slm66018 vdcp->instance); 29323af08d82Slm66018 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 29333af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 29343af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 29353af08d82Slm66018 goto loop; 29363af08d82Slm66018 } 29373af08d82Slm66018 vdcp->threads_pending--; 29383af08d82Slm66018 return (ECONNRESET); 29391ae08745Sheppo } 29401ae08745Sheppo 29413af08d82Slm66018 next_idx = idx + 1; 29423af08d82Slm66018 if (next_idx >= vdcp->dring_len) 29433af08d82Slm66018 next_idx = 0; 29443af08d82Slm66018 vdcp->dring_curr_idx = next_idx; 29451ae08745Sheppo 29463af08d82Slm66018 ASSERT(local_dep->is_free); 29471ae08745Sheppo 29483af08d82Slm66018 local_dep->operation = operation; 2949d10e4ef2Snarayan local_dep->addr = addr; 29503af08d82Slm66018 local_dep->nbytes = nbytes; 29513af08d82Slm66018 local_dep->slice = slice; 29523af08d82Slm66018 local_dep->offset = offset; 29533af08d82Slm66018 local_dep->cb_type = cb_type; 29543af08d82Slm66018 local_dep->cb_arg = cb_arg; 29553af08d82Slm66018 local_dep->dir = dir; 29563af08d82Slm66018 29573af08d82Slm66018 local_dep->is_free = B_FALSE; 29583af08d82Slm66018 29593af08d82Slm66018 rv = vdc_map_to_shared_dring(vdcp, idx); 29603af08d82Slm66018 if (rv) { 29613af08d82Slm66018 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 29623af08d82Slm66018 vdcp->instance); 29633af08d82Slm66018 /* free the descriptor */ 29643af08d82Slm66018 local_dep->is_free = B_TRUE; 29653af08d82Slm66018 vdcp->dring_curr_idx = idx; 29663af08d82Slm66018 cv_wait(&vdcp->membind_cv, &vdcp->lock); 29673af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 29683af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 29693af08d82Slm66018 goto loop; 29701ae08745Sheppo } 29713af08d82Slm66018 vdcp->threads_pending--; 29723af08d82Slm66018 return (ECONNRESET); 29731ae08745Sheppo } 29741ae08745Sheppo 29751ae08745Sheppo /* 29761ae08745Sheppo * Send a msg with the DRing details to vds 29771ae08745Sheppo */ 29781ae08745Sheppo VIO_INIT_DRING_DATA_TAG(dmsg); 29793af08d82Slm66018 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 29803af08d82Slm66018 dmsg.dring_ident = vdcp->dring_ident; 29811ae08745Sheppo dmsg.start_idx = idx; 29821ae08745Sheppo dmsg.end_idx = idx; 29833af08d82Slm66018 vdcp->seq_num++; 29841ae08745Sheppo 2985366a92acSlm66018 DTRACE_PROBE2(populate, int, vdcp->instance, 2986366a92acSlm66018 vdc_local_desc_t *, local_dep); 29873af08d82Slm66018 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 29883af08d82Slm66018 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 29891ae08745Sheppo 29903af08d82Slm66018 /* 29913af08d82Slm66018 * note we're still holding the lock here to 29923af08d82Slm66018 * make sure the message goes out in order !!!... 29933af08d82Slm66018 */ 29943af08d82Slm66018 msglen = sizeof (dmsg); 29953af08d82Slm66018 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 29963af08d82Slm66018 switch (rv) { 29973af08d82Slm66018 case ECONNRESET: 29983af08d82Slm66018 /* 29993af08d82Slm66018 * vdc_send initiates the reset on failure. 30003af08d82Slm66018 * Since the transaction has already been put 30013af08d82Slm66018 * on the local dring, it will automatically get 30023af08d82Slm66018 * retried when the channel is reset. Given that, 30033af08d82Slm66018 * it is ok to just return success even though the 30043af08d82Slm66018 * send failed. 30053af08d82Slm66018 */ 30063af08d82Slm66018 rv = 0; 30073af08d82Slm66018 break; 3008d10e4ef2Snarayan 30093af08d82Slm66018 case 0: /* EOK */ 30103af08d82Slm66018 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 30113af08d82Slm66018 break; 3012d10e4ef2Snarayan 30133af08d82Slm66018 default: 30143af08d82Slm66018 goto cleanup_and_exit; 30153af08d82Slm66018 } 3016e1ebb9ecSlm66018 30173af08d82Slm66018 vdcp->threads_pending--; 30183af08d82Slm66018 return (rv); 30193af08d82Slm66018 30203af08d82Slm66018 cleanup_and_exit: 30213af08d82Slm66018 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 30223af08d82Slm66018 return (ENXIO); 30231ae08745Sheppo } 30241ae08745Sheppo 30251ae08745Sheppo /* 30263af08d82Slm66018 * Function: 30273af08d82Slm66018 * vdc_do_sync_op 30283af08d82Slm66018 * 30293af08d82Slm66018 * Description: 30303af08d82Slm66018 * Wrapper around vdc_populate_descriptor that blocks until the 30313af08d82Slm66018 * response to the message is available. 30323af08d82Slm66018 * 30333af08d82Slm66018 * Arguments: 30343af08d82Slm66018 * vdcp - the soft state pointer 30353af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 30363af08d82Slm66018 * addr - address of data buf to be read/written. 30373af08d82Slm66018 * nbytes - number of bytes to read/write 30383af08d82Slm66018 * slice - the disk slice this request is for 30393af08d82Slm66018 * offset - relative disk offset 30403af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 30413af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 30423af08d82Slm66018 * . mode for ioctl(9e) 30433af08d82Slm66018 * . LP64 diskaddr_t (block I/O) 30443af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 30452f5224aeSachartre * rconflict - check for reservation conflict in case of failure 30462f5224aeSachartre * 30472f5224aeSachartre * rconflict should be set to B_TRUE by most callers. Callers invoking the 30482f5224aeSachartre * VD_OP_SCSICMD operation can set rconflict to B_FALSE if they check the 30492f5224aeSachartre * result of a successful operation with vd_scsi_status(). 30503af08d82Slm66018 * 30513af08d82Slm66018 * Return Codes: 30523af08d82Slm66018 * 0 30533af08d82Slm66018 * EAGAIN 30543af08d82Slm66018 * EFAULT 30553af08d82Slm66018 * ENXIO 30563af08d82Slm66018 * EIO 30570a55fbb7Slm66018 */ 30583af08d82Slm66018 static int 30593af08d82Slm66018 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 30603af08d82Slm66018 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 30612f5224aeSachartre vio_desc_direction_t dir, boolean_t rconflict) 30623af08d82Slm66018 { 30633af08d82Slm66018 int status; 30642f5224aeSachartre vdc_io_t *vio; 30652f5224aeSachartre boolean_t check_resv_conflict = B_FALSE; 30663af08d82Slm66018 30673af08d82Slm66018 ASSERT(cb_type == CB_SYNC); 30681ae08745Sheppo 30691ae08745Sheppo /* 30703af08d82Slm66018 * Grab the lock, if blocked wait until the server 30713af08d82Slm66018 * response causes us to wake up again. 30723af08d82Slm66018 */ 30733af08d82Slm66018 mutex_enter(&vdcp->lock); 30743af08d82Slm66018 vdcp->sync_op_cnt++; 30753af08d82Slm66018 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 30763af08d82Slm66018 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 30773af08d82Slm66018 30783af08d82Slm66018 if (vdcp->state == VDC_STATE_DETACH) { 30793af08d82Slm66018 cv_broadcast(&vdcp->sync_blocked_cv); 30803af08d82Slm66018 vdcp->sync_op_cnt--; 30813af08d82Slm66018 mutex_exit(&vdcp->lock); 30823af08d82Slm66018 return (ENXIO); 30833af08d82Slm66018 } 30843af08d82Slm66018 30853af08d82Slm66018 /* now block anyone other thread entering after us */ 30863af08d82Slm66018 vdcp->sync_op_blocked = B_TRUE; 30873af08d82Slm66018 vdcp->sync_op_pending = B_TRUE; 30883af08d82Slm66018 mutex_exit(&vdcp->lock); 30893af08d82Slm66018 3090655fd6a9Sachartre status = vdc_send_request(vdcp, operation, addr, 30913af08d82Slm66018 nbytes, slice, offset, cb_type, cb_arg, dir); 30923af08d82Slm66018 3093655fd6a9Sachartre mutex_enter(&vdcp->lock); 3094655fd6a9Sachartre 3095655fd6a9Sachartre if (status != 0) { 3096655fd6a9Sachartre vdcp->sync_op_pending = B_FALSE; 3097655fd6a9Sachartre } else { 30983af08d82Slm66018 /* 30993af08d82Slm66018 * block until our transaction completes. 31003af08d82Slm66018 * Also anyone else waiting also gets to go next. 31013af08d82Slm66018 */ 31023af08d82Slm66018 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 31033af08d82Slm66018 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 31043af08d82Slm66018 3105655fd6a9Sachartre DMSG(vdcp, 2, ": operation returned %d\n", 3106655fd6a9Sachartre vdcp->sync_op_status); 31073c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 31083c96341aSnarayan vdcp->sync_op_pending = B_FALSE; 31093af08d82Slm66018 status = ENXIO; 31103c96341aSnarayan } else { 31113af08d82Slm66018 status = vdcp->sync_op_status; 31122f5224aeSachartre if (status != 0 && vdcp->failfast_interval != 0) { 31132f5224aeSachartre /* 31142f5224aeSachartre * Operation has failed and failfast is enabled. 31152f5224aeSachartre * We need to check if the failure is due to a 31162f5224aeSachartre * reservation conflict if this was requested. 31172f5224aeSachartre */ 31182f5224aeSachartre check_resv_conflict = rconflict; 31192f5224aeSachartre } 31202f5224aeSachartre 31213c96341aSnarayan } 3122655fd6a9Sachartre } 31233c96341aSnarayan 31243af08d82Slm66018 vdcp->sync_op_status = 0; 31253af08d82Slm66018 vdcp->sync_op_blocked = B_FALSE; 31263af08d82Slm66018 vdcp->sync_op_cnt--; 31273af08d82Slm66018 31283af08d82Slm66018 /* signal the next waiting thread */ 31293af08d82Slm66018 cv_signal(&vdcp->sync_blocked_cv); 31302f5224aeSachartre 31312f5224aeSachartre /* 31322f5224aeSachartre * We have to check for reservation conflict after unblocking sync 31332f5224aeSachartre * operations because some sync operations will be used to do this 31342f5224aeSachartre * check. 31352f5224aeSachartre */ 31362f5224aeSachartre if (check_resv_conflict) { 31372f5224aeSachartre vio = vdc_failfast_io_queue(vdcp, NULL); 31382f5224aeSachartre while (vio->vio_qtime != 0) 31392f5224aeSachartre cv_wait(&vdcp->failfast_io_cv, &vdcp->lock); 31402f5224aeSachartre kmem_free(vio, sizeof (vdc_io_t)); 31412f5224aeSachartre } 31422f5224aeSachartre 31433af08d82Slm66018 mutex_exit(&vdcp->lock); 31443af08d82Slm66018 31453af08d82Slm66018 return (status); 31463af08d82Slm66018 } 31473af08d82Slm66018 31483af08d82Slm66018 31493af08d82Slm66018 /* 31503af08d82Slm66018 * Function: 31513af08d82Slm66018 * vdc_drain_response() 31523af08d82Slm66018 * 31533af08d82Slm66018 * Description: 31541ae08745Sheppo * When a guest is panicking, the completion of requests needs to be 31551ae08745Sheppo * handled differently because interrupts are disabled and vdc 31561ae08745Sheppo * will not get messages. We have to poll for the messages instead. 31573af08d82Slm66018 * 3158366a92acSlm66018 * Note: since we don't have a buf_t available we cannot implement 3159366a92acSlm66018 * the io:::done DTrace probe in this specific case. 3160366a92acSlm66018 * 31613af08d82Slm66018 * Arguments: 31623af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 31633af08d82Slm66018 * 31643af08d82Slm66018 * Return Code: 31653af08d82Slm66018 * 0 - Success 31661ae08745Sheppo */ 31673af08d82Slm66018 static int 31683af08d82Slm66018 vdc_drain_response(vdc_t *vdc) 31693af08d82Slm66018 { 31703af08d82Slm66018 int rv, idx, retries; 31713af08d82Slm66018 size_t msglen; 31723af08d82Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 31733af08d82Slm66018 vio_dring_msg_t dmsg; 31743af08d82Slm66018 31753af08d82Slm66018 mutex_enter(&vdc->lock); 31763af08d82Slm66018 31771ae08745Sheppo retries = 0; 31781ae08745Sheppo for (;;) { 31791ae08745Sheppo msglen = sizeof (dmsg); 31803af08d82Slm66018 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 31818e6a2a04Slm66018 if (rv) { 31828e6a2a04Slm66018 rv = EINVAL; 31831ae08745Sheppo break; 31841ae08745Sheppo } 31851ae08745Sheppo 31861ae08745Sheppo /* 31871ae08745Sheppo * if there are no packets wait and check again 31881ae08745Sheppo */ 31898e6a2a04Slm66018 if ((rv == 0) && (msglen == 0)) { 31901ae08745Sheppo if (retries++ > vdc_dump_retries) { 31918e6a2a04Slm66018 rv = EAGAIN; 31921ae08745Sheppo break; 31931ae08745Sheppo } 31941ae08745Sheppo 3195d10e4ef2Snarayan drv_usecwait(vdc_usec_timeout_dump); 31961ae08745Sheppo continue; 31971ae08745Sheppo } 31981ae08745Sheppo 31991ae08745Sheppo /* 32001ae08745Sheppo * Ignore all messages that are not ACKs/NACKs to 32011ae08745Sheppo * DRing requests. 32021ae08745Sheppo */ 32031ae08745Sheppo if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 32041ae08745Sheppo (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 32053af08d82Slm66018 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 32061ae08745Sheppo dmsg.tag.vio_msgtype, 32071ae08745Sheppo dmsg.tag.vio_subtype, 32081ae08745Sheppo dmsg.tag.vio_subtype_env); 32091ae08745Sheppo continue; 32101ae08745Sheppo } 32111ae08745Sheppo 32121ae08745Sheppo /* 32133af08d82Slm66018 * set the appropriate return value for the current request. 32141ae08745Sheppo */ 32151ae08745Sheppo switch (dmsg.tag.vio_subtype) { 32161ae08745Sheppo case VIO_SUBTYPE_ACK: 32178e6a2a04Slm66018 rv = 0; 32181ae08745Sheppo break; 32191ae08745Sheppo case VIO_SUBTYPE_NACK: 32208e6a2a04Slm66018 rv = EAGAIN; 32211ae08745Sheppo break; 32221ae08745Sheppo default: 32231ae08745Sheppo continue; 32241ae08745Sheppo } 32251ae08745Sheppo 32263af08d82Slm66018 idx = dmsg.start_idx; 32273af08d82Slm66018 if (idx >= vdc->dring_len) { 32283af08d82Slm66018 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 3229e1ebb9ecSlm66018 vdc->instance, idx); 32303af08d82Slm66018 continue; 32311ae08745Sheppo } 32323af08d82Slm66018 ldep = &vdc->local_dring[idx]; 32333af08d82Slm66018 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 32343af08d82Slm66018 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 32353af08d82Slm66018 vdc->instance, idx, ldep->dep->hdr.dstate); 32361ae08745Sheppo continue; 32371ae08745Sheppo } 32381ae08745Sheppo 32393af08d82Slm66018 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 32403af08d82Slm66018 vdc->instance, idx, ldep->dep->hdr.dstate); 3241366a92acSlm66018 32423af08d82Slm66018 rv = vdc_depopulate_descriptor(vdc, idx); 32433af08d82Slm66018 if (rv) { 32443af08d82Slm66018 DMSG(vdc, 0, 32453af08d82Slm66018 "[%d] Entry @ %d - depopulate failed ..\n", 32463af08d82Slm66018 vdc->instance, idx); 32471ae08745Sheppo } 32481ae08745Sheppo 32493af08d82Slm66018 /* if this is the last descriptor - break out of loop */ 32503af08d82Slm66018 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 32513af08d82Slm66018 break; 32523af08d82Slm66018 } 32533af08d82Slm66018 32543af08d82Slm66018 mutex_exit(&vdc->lock); 32553af08d82Slm66018 DMSG(vdc, 0, "End idx=%d\n", idx); 32563af08d82Slm66018 32573af08d82Slm66018 return (rv); 32581ae08745Sheppo } 32591ae08745Sheppo 32601ae08745Sheppo 32610a55fbb7Slm66018 /* 32620a55fbb7Slm66018 * Function: 32630a55fbb7Slm66018 * vdc_depopulate_descriptor() 32640a55fbb7Slm66018 * 32650a55fbb7Slm66018 * Description: 32660a55fbb7Slm66018 * 32670a55fbb7Slm66018 * Arguments: 32680a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 32690a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 32700a55fbb7Slm66018 * 32710a55fbb7Slm66018 * Return Code: 32720a55fbb7Slm66018 * 0 - Success 32730a55fbb7Slm66018 */ 32741ae08745Sheppo static int 32751ae08745Sheppo vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 32761ae08745Sheppo { 32771ae08745Sheppo vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 32781ae08745Sheppo vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 32791ae08745Sheppo int status = ENXIO; 32808e6a2a04Slm66018 int rv = 0; 32811ae08745Sheppo 32821ae08745Sheppo ASSERT(vdc != NULL); 3283e1ebb9ecSlm66018 ASSERT(idx < vdc->dring_len); 32841ae08745Sheppo ldep = &vdc->local_dring[idx]; 32851ae08745Sheppo ASSERT(ldep != NULL); 32863af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 32873af08d82Slm66018 3288366a92acSlm66018 DTRACE_PROBE2(depopulate, int, vdc->instance, vdc_local_desc_t *, ldep); 32893af08d82Slm66018 DMSG(vdc, 2, ": idx = %d\n", idx); 3290366a92acSlm66018 32911ae08745Sheppo dep = ldep->dep; 32921ae08745Sheppo ASSERT(dep != NULL); 3293e1ebb9ecSlm66018 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3294e1ebb9ecSlm66018 (dep->payload.status == ECANCELED)); 32951ae08745Sheppo 3296e1ebb9ecSlm66018 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 32973af08d82Slm66018 32983af08d82Slm66018 ldep->is_free = B_TRUE; 32991ae08745Sheppo status = dep->payload.status; 3300205eeb1aSlm66018 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 33011ae08745Sheppo 3302eff7243fSlm66018 /* 3303eff7243fSlm66018 * If no buffers were used to transfer information to the server when 3304eff7243fSlm66018 * populating the descriptor then no memory handles need to be unbound 3305eff7243fSlm66018 * and we can return now. 3306eff7243fSlm66018 */ 3307eff7243fSlm66018 if (ldep->nbytes == 0) { 3308eff7243fSlm66018 cv_signal(&vdc->dring_free_cv); 33098e6a2a04Slm66018 return (status); 3310eff7243fSlm66018 } 33118e6a2a04Slm66018 33121ae08745Sheppo /* 33131ae08745Sheppo * If the upper layer passed in a misaligned address we copied the 33141ae08745Sheppo * data into an aligned buffer before sending it to LDC - we now 33151ae08745Sheppo * copy it back to the original buffer. 33161ae08745Sheppo */ 33171ae08745Sheppo if (ldep->align_addr) { 33181ae08745Sheppo ASSERT(ldep->addr != NULL); 33191ae08745Sheppo 33203c96341aSnarayan if (dep->payload.nbytes > 0) 33213c96341aSnarayan bcopy(ldep->align_addr, ldep->addr, 33223c96341aSnarayan dep->payload.nbytes); 33231ae08745Sheppo kmem_free(ldep->align_addr, 33243c96341aSnarayan sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 33251ae08745Sheppo ldep->align_addr = NULL; 33261ae08745Sheppo } 33271ae08745Sheppo 33288e6a2a04Slm66018 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 33298e6a2a04Slm66018 if (rv != 0) { 33303af08d82Slm66018 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 33318e6a2a04Slm66018 vdc->instance, ldep->desc_mhdl, idx, rv); 33328e6a2a04Slm66018 /* 33338e6a2a04Slm66018 * The error returned by the vDisk server is more informative 33348e6a2a04Slm66018 * and thus has a higher priority but if it isn't set we ensure 33358e6a2a04Slm66018 * that this function returns an error. 33368e6a2a04Slm66018 */ 33378e6a2a04Slm66018 if (status == 0) 33388e6a2a04Slm66018 status = EINVAL; 33391ae08745Sheppo } 33401ae08745Sheppo 33413af08d82Slm66018 cv_signal(&vdc->membind_cv); 33423af08d82Slm66018 cv_signal(&vdc->dring_free_cv); 33433af08d82Slm66018 33441ae08745Sheppo return (status); 33451ae08745Sheppo } 33461ae08745Sheppo 33470a55fbb7Slm66018 /* 33480a55fbb7Slm66018 * Function: 33490a55fbb7Slm66018 * vdc_populate_mem_hdl() 33500a55fbb7Slm66018 * 33510a55fbb7Slm66018 * Description: 33520a55fbb7Slm66018 * 33530a55fbb7Slm66018 * Arguments: 33540a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 33550a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 33560a55fbb7Slm66018 * addr - virtual address being mapped in 33570a55fbb7Slm66018 * nybtes - number of bytes in 'addr' 33580a55fbb7Slm66018 * operation - the vDisk operation being performed (VD_OP_xxx) 33590a55fbb7Slm66018 * 33600a55fbb7Slm66018 * Return Code: 33610a55fbb7Slm66018 * 0 - Success 33620a55fbb7Slm66018 */ 33631ae08745Sheppo static int 33643af08d82Slm66018 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 33651ae08745Sheppo { 33661ae08745Sheppo vd_dring_entry_t *dep = NULL; 33671ae08745Sheppo ldc_mem_handle_t mhdl; 33681ae08745Sheppo caddr_t vaddr; 33693af08d82Slm66018 size_t nbytes; 33704bac2208Snarayan uint8_t perm = LDC_MEM_RW; 33714bac2208Snarayan uint8_t maptype; 33721ae08745Sheppo int rv = 0; 33731ae08745Sheppo int i; 33741ae08745Sheppo 33753af08d82Slm66018 ASSERT(vdcp != NULL); 33761ae08745Sheppo 33773af08d82Slm66018 dep = ldep->dep; 33781ae08745Sheppo mhdl = ldep->desc_mhdl; 33791ae08745Sheppo 33803af08d82Slm66018 switch (ldep->dir) { 33813af08d82Slm66018 case VIO_read_dir: 33821ae08745Sheppo perm = LDC_MEM_W; 33831ae08745Sheppo break; 33841ae08745Sheppo 33853af08d82Slm66018 case VIO_write_dir: 33861ae08745Sheppo perm = LDC_MEM_R; 33871ae08745Sheppo break; 33881ae08745Sheppo 33893af08d82Slm66018 case VIO_both_dir: 33901ae08745Sheppo perm = LDC_MEM_RW; 33911ae08745Sheppo break; 33921ae08745Sheppo 33931ae08745Sheppo default: 33941ae08745Sheppo ASSERT(0); /* catch bad programming in vdc */ 33951ae08745Sheppo } 33961ae08745Sheppo 33971ae08745Sheppo /* 33981ae08745Sheppo * LDC expects any addresses passed in to be 8-byte aligned. We need 33991ae08745Sheppo * to copy the contents of any misaligned buffers to a newly allocated 34001ae08745Sheppo * buffer and bind it instead (and copy the the contents back to the 34011ae08745Sheppo * original buffer passed in when depopulating the descriptor) 34021ae08745Sheppo */ 34033af08d82Slm66018 vaddr = ldep->addr; 34043af08d82Slm66018 nbytes = ldep->nbytes; 34053af08d82Slm66018 if (((uint64_t)vaddr & 0x7) != 0) { 3406d10e4ef2Snarayan ASSERT(ldep->align_addr == NULL); 34071ae08745Sheppo ldep->align_addr = 34083af08d82Slm66018 kmem_alloc(sizeof (caddr_t) * 34093af08d82Slm66018 P2ROUNDUP(nbytes, 8), KM_SLEEP); 34103af08d82Slm66018 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 34113af08d82Slm66018 "(buf=%p nb=%ld op=%d)\n", 34123af08d82Slm66018 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 34133af08d82Slm66018 nbytes, ldep->operation); 34143af08d82Slm66018 if (perm != LDC_MEM_W) 34153af08d82Slm66018 bcopy(vaddr, ldep->align_addr, nbytes); 34161ae08745Sheppo vaddr = ldep->align_addr; 34171ae08745Sheppo } 34181ae08745Sheppo 34194bac2208Snarayan maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 34201ae08745Sheppo rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 342187a7269eSachartre maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 34223af08d82Slm66018 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 34233af08d82Slm66018 vdcp->instance, dep->payload.ncookies); 34241ae08745Sheppo if (rv != 0) { 34253af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 34263af08d82Slm66018 "(mhdl=%p, buf=%p, err=%d)\n", 34273af08d82Slm66018 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 34281ae08745Sheppo if (ldep->align_addr) { 34291ae08745Sheppo kmem_free(ldep->align_addr, 3430d10e4ef2Snarayan sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 34311ae08745Sheppo ldep->align_addr = NULL; 34321ae08745Sheppo } 34331ae08745Sheppo return (EAGAIN); 34341ae08745Sheppo } 34351ae08745Sheppo 34361ae08745Sheppo /* 34371ae08745Sheppo * Get the other cookies (if any). 34381ae08745Sheppo */ 34391ae08745Sheppo for (i = 1; i < dep->payload.ncookies; i++) { 34401ae08745Sheppo rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 34411ae08745Sheppo if (rv != 0) { 34421ae08745Sheppo (void) ldc_mem_unbind_handle(mhdl); 34433af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3444e1ebb9ecSlm66018 "(mhdl=%lx cnum=%d), err=%d", 34453af08d82Slm66018 vdcp->instance, mhdl, i, rv); 34461ae08745Sheppo if (ldep->align_addr) { 34471ae08745Sheppo kmem_free(ldep->align_addr, 34483c96341aSnarayan sizeof (caddr_t) * ldep->nbytes); 34491ae08745Sheppo ldep->align_addr = NULL; 34501ae08745Sheppo } 34511ae08745Sheppo return (EAGAIN); 34521ae08745Sheppo } 34531ae08745Sheppo } 34541ae08745Sheppo 34551ae08745Sheppo return (rv); 34561ae08745Sheppo } 34571ae08745Sheppo 34581ae08745Sheppo /* 34591ae08745Sheppo * Interrupt handlers for messages from LDC 34601ae08745Sheppo */ 34611ae08745Sheppo 34620a55fbb7Slm66018 /* 34630a55fbb7Slm66018 * Function: 34640a55fbb7Slm66018 * vdc_handle_cb() 34650a55fbb7Slm66018 * 34660a55fbb7Slm66018 * Description: 34670a55fbb7Slm66018 * 34680a55fbb7Slm66018 * Arguments: 34690a55fbb7Slm66018 * event - Type of event (LDC_EVT_xxx) that triggered the callback 34700a55fbb7Slm66018 * arg - soft state pointer for this instance of the device driver. 34710a55fbb7Slm66018 * 34720a55fbb7Slm66018 * Return Code: 34730a55fbb7Slm66018 * 0 - Success 34740a55fbb7Slm66018 */ 34751ae08745Sheppo static uint_t 34761ae08745Sheppo vdc_handle_cb(uint64_t event, caddr_t arg) 34771ae08745Sheppo { 34781ae08745Sheppo ldc_status_t ldc_state; 34791ae08745Sheppo int rv = 0; 34801ae08745Sheppo 34811ae08745Sheppo vdc_t *vdc = (vdc_t *)(void *)arg; 34821ae08745Sheppo 34831ae08745Sheppo ASSERT(vdc != NULL); 34841ae08745Sheppo 34853af08d82Slm66018 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 34861ae08745Sheppo 34871ae08745Sheppo /* 34881ae08745Sheppo * Depending on the type of event that triggered this callback, 34893af08d82Slm66018 * we modify the handshake state or read the data. 34901ae08745Sheppo * 34911ae08745Sheppo * NOTE: not done as a switch() as event could be triggered by 34921ae08745Sheppo * a state change and a read request. Also the ordering of the 34931ae08745Sheppo * check for the event types is deliberate. 34941ae08745Sheppo */ 34951ae08745Sheppo if (event & LDC_EVT_UP) { 34963af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 34973af08d82Slm66018 34983af08d82Slm66018 mutex_enter(&vdc->lock); 34991ae08745Sheppo 35001ae08745Sheppo /* get LDC state */ 35011ae08745Sheppo rv = ldc_status(vdc->ldc_handle, &ldc_state); 35021ae08745Sheppo if (rv != 0) { 35033af08d82Slm66018 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 35041ae08745Sheppo vdc->instance, rv); 35051ae08745Sheppo return (LDC_SUCCESS); 35061ae08745Sheppo } 35073af08d82Slm66018 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 35081ae08745Sheppo /* 35093af08d82Slm66018 * Reset the transaction sequence numbers when 35103af08d82Slm66018 * LDC comes up. We then kick off the handshake 35113af08d82Slm66018 * negotiation with the vDisk server. 35121ae08745Sheppo */ 35130a55fbb7Slm66018 vdc->seq_num = 1; 35141ae08745Sheppo vdc->seq_num_reply = 0; 35151ae08745Sheppo vdc->ldc_state = ldc_state; 35163af08d82Slm66018 cv_signal(&vdc->initwait_cv); 35173af08d82Slm66018 } 35183af08d82Slm66018 35191ae08745Sheppo mutex_exit(&vdc->lock); 35201ae08745Sheppo } 35211ae08745Sheppo 35221ae08745Sheppo if (event & LDC_EVT_READ) { 352317cadca8Slm66018 DMSG(vdc, 1, "[%d] Received LDC_EVT_READ\n", vdc->instance); 35243af08d82Slm66018 mutex_enter(&vdc->read_lock); 35253af08d82Slm66018 cv_signal(&vdc->read_cv); 35263af08d82Slm66018 vdc->read_state = VDC_READ_PENDING; 35273af08d82Slm66018 mutex_exit(&vdc->read_lock); 35281ae08745Sheppo 35291ae08745Sheppo /* that's all we have to do - no need to handle DOWN/RESET */ 35301ae08745Sheppo return (LDC_SUCCESS); 35311ae08745Sheppo } 35321ae08745Sheppo 35333af08d82Slm66018 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 35340a55fbb7Slm66018 35353af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 35363af08d82Slm66018 35370a55fbb7Slm66018 mutex_enter(&vdc->lock); 35383af08d82Slm66018 /* 35393af08d82Slm66018 * Need to wake up any readers so they will 35403af08d82Slm66018 * detect that a reset has occurred. 35413af08d82Slm66018 */ 35423af08d82Slm66018 mutex_enter(&vdc->read_lock); 35433af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 35443af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 35453af08d82Slm66018 cv_signal(&vdc->read_cv); 35463af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 35473af08d82Slm66018 mutex_exit(&vdc->read_lock); 35480a55fbb7Slm66018 35493af08d82Slm66018 /* wake up any threads waiting for connection to come up */ 35503af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 35513af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 35523af08d82Slm66018 cv_signal(&vdc->initwait_cv); 35531ae08745Sheppo } 35541ae08745Sheppo 35550a55fbb7Slm66018 mutex_exit(&vdc->lock); 35561ae08745Sheppo } 35571ae08745Sheppo 35581ae08745Sheppo if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 35593af08d82Slm66018 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 35601ae08745Sheppo vdc->instance, event); 35611ae08745Sheppo 35621ae08745Sheppo return (LDC_SUCCESS); 35631ae08745Sheppo } 35641ae08745Sheppo 35653af08d82Slm66018 /* 35663af08d82Slm66018 * Function: 35673af08d82Slm66018 * vdc_wait_for_response() 35683af08d82Slm66018 * 35693af08d82Slm66018 * Description: 35703af08d82Slm66018 * Block waiting for a response from the server. If there is 35713af08d82Slm66018 * no data the thread block on the read_cv that is signalled 35723af08d82Slm66018 * by the callback when an EVT_READ occurs. 35733af08d82Slm66018 * 35743af08d82Slm66018 * Arguments: 35753af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 35763af08d82Slm66018 * 35773af08d82Slm66018 * Return Code: 35783af08d82Slm66018 * 0 - Success 35793af08d82Slm66018 */ 35803af08d82Slm66018 static int 35813af08d82Slm66018 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 35823af08d82Slm66018 { 35833af08d82Slm66018 size_t nbytes = sizeof (*msgp); 35843af08d82Slm66018 int status; 35853af08d82Slm66018 35863af08d82Slm66018 ASSERT(vdcp != NULL); 35873af08d82Slm66018 35883af08d82Slm66018 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 35893af08d82Slm66018 35903af08d82Slm66018 status = vdc_recv(vdcp, msgp, &nbytes); 35913af08d82Slm66018 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 35923af08d82Slm66018 status, (int)nbytes); 35933af08d82Slm66018 if (status) { 35943af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 35953af08d82Slm66018 vdcp->instance, status); 35963af08d82Slm66018 return (status); 35973af08d82Slm66018 } 35983af08d82Slm66018 35993af08d82Slm66018 if (nbytes < sizeof (vio_msg_tag_t)) { 36003af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 36013af08d82Slm66018 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 36023af08d82Slm66018 return (ENOMSG); 36033af08d82Slm66018 } 36043af08d82Slm66018 36053af08d82Slm66018 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 36063af08d82Slm66018 msgp->tag.vio_msgtype, 36073af08d82Slm66018 msgp->tag.vio_subtype, 36083af08d82Slm66018 msgp->tag.vio_subtype_env); 36093af08d82Slm66018 36103af08d82Slm66018 /* 36113af08d82Slm66018 * Verify the Session ID of the message 36123af08d82Slm66018 * 36133af08d82Slm66018 * Every message after the Version has been negotiated should 36143af08d82Slm66018 * have the correct session ID set. 36153af08d82Slm66018 */ 36163af08d82Slm66018 if ((msgp->tag.vio_sid != vdcp->session_id) && 36173af08d82Slm66018 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 36183af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 36193af08d82Slm66018 "expected 0x%lx [seq num %lx @ %d]", 36203af08d82Slm66018 vdcp->instance, msgp->tag.vio_sid, 36213af08d82Slm66018 vdcp->session_id, 36223af08d82Slm66018 ((vio_dring_msg_t *)msgp)->seq_num, 36233af08d82Slm66018 ((vio_dring_msg_t *)msgp)->start_idx); 36243af08d82Slm66018 return (ENOMSG); 36253af08d82Slm66018 } 36263af08d82Slm66018 return (0); 36273af08d82Slm66018 } 36283af08d82Slm66018 36293af08d82Slm66018 36303af08d82Slm66018 /* 36313af08d82Slm66018 * Function: 36323af08d82Slm66018 * vdc_resubmit_backup_dring() 36333af08d82Slm66018 * 36343af08d82Slm66018 * Description: 36353af08d82Slm66018 * Resubmit each descriptor in the backed up dring to 36363af08d82Slm66018 * vDisk server. The Dring was backed up during connection 36373af08d82Slm66018 * reset. 36383af08d82Slm66018 * 36393af08d82Slm66018 * Arguments: 36403af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 36413af08d82Slm66018 * 36423af08d82Slm66018 * Return Code: 36433af08d82Slm66018 * 0 - Success 36443af08d82Slm66018 */ 36453af08d82Slm66018 static int 36463af08d82Slm66018 vdc_resubmit_backup_dring(vdc_t *vdcp) 36473af08d82Slm66018 { 36483af08d82Slm66018 int count; 36493af08d82Slm66018 int b_idx; 36503af08d82Slm66018 int rv; 36513af08d82Slm66018 int dring_size; 36523af08d82Slm66018 int status; 36533af08d82Slm66018 vio_msg_t vio_msg; 36543af08d82Slm66018 vdc_local_desc_t *curr_ldep; 36553af08d82Slm66018 36563af08d82Slm66018 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 36573af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 36583af08d82Slm66018 3659655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3660655fd6a9Sachartre /* the pending requests have already been processed */ 3661655fd6a9Sachartre return (0); 3662655fd6a9Sachartre } 3663655fd6a9Sachartre 36643af08d82Slm66018 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 36653af08d82Slm66018 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 36663af08d82Slm66018 36673af08d82Slm66018 /* 36683af08d82Slm66018 * Walk the backup copy of the local descriptor ring and 36693af08d82Slm66018 * resubmit all the outstanding transactions. 36703af08d82Slm66018 */ 36713af08d82Slm66018 b_idx = vdcp->local_dring_backup_tail; 36723af08d82Slm66018 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 36733af08d82Slm66018 36743af08d82Slm66018 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 36753af08d82Slm66018 3676eff7243fSlm66018 /* only resubmit outstanding transactions */ 36773af08d82Slm66018 if (!curr_ldep->is_free) { 36783af08d82Slm66018 36793af08d82Slm66018 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 36803af08d82Slm66018 mutex_enter(&vdcp->lock); 36813af08d82Slm66018 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 36823af08d82Slm66018 curr_ldep->addr, curr_ldep->nbytes, 36833af08d82Slm66018 curr_ldep->slice, curr_ldep->offset, 36843af08d82Slm66018 curr_ldep->cb_type, curr_ldep->cb_arg, 36853af08d82Slm66018 curr_ldep->dir); 36863af08d82Slm66018 mutex_exit(&vdcp->lock); 36873af08d82Slm66018 if (rv) { 36883af08d82Slm66018 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 36893af08d82Slm66018 vdcp->instance, b_idx); 36903af08d82Slm66018 return (rv); 36913af08d82Slm66018 } 36923af08d82Slm66018 36933af08d82Slm66018 /* Wait for the response message. */ 36943af08d82Slm66018 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 36953af08d82Slm66018 b_idx); 36963af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 36973af08d82Slm66018 if (status) { 36983af08d82Slm66018 DMSG(vdcp, 1, "[%d] wait_for_response " 36993af08d82Slm66018 "returned err=%d\n", vdcp->instance, 37003af08d82Slm66018 status); 37013af08d82Slm66018 return (status); 37023af08d82Slm66018 } 37033af08d82Slm66018 37043af08d82Slm66018 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 37053af08d82Slm66018 status = vdc_process_data_msg(vdcp, &vio_msg); 37063af08d82Slm66018 if (status) { 37073af08d82Slm66018 DMSG(vdcp, 1, "[%d] process_data_msg " 37083af08d82Slm66018 "returned err=%d\n", vdcp->instance, 37093af08d82Slm66018 status); 37103af08d82Slm66018 return (status); 37113af08d82Slm66018 } 37123af08d82Slm66018 } 37133af08d82Slm66018 37143af08d82Slm66018 /* get the next element to submit */ 37153af08d82Slm66018 if (++b_idx >= vdcp->local_dring_backup_len) 37163af08d82Slm66018 b_idx = 0; 37173af08d82Slm66018 } 37183af08d82Slm66018 37193af08d82Slm66018 /* all done - now clear up pending dring copy */ 37203af08d82Slm66018 dring_size = vdcp->local_dring_backup_len * 37213af08d82Slm66018 sizeof (vdcp->local_dring_backup[0]); 37223af08d82Slm66018 37233af08d82Slm66018 (void) kmem_free(vdcp->local_dring_backup, dring_size); 37243af08d82Slm66018 37253af08d82Slm66018 vdcp->local_dring_backup = NULL; 37263af08d82Slm66018 37273af08d82Slm66018 return (0); 37283af08d82Slm66018 } 37293af08d82Slm66018 37303af08d82Slm66018 /* 37313af08d82Slm66018 * Function: 3732655fd6a9Sachartre * vdc_cancel_backup_dring 3733655fd6a9Sachartre * 3734655fd6a9Sachartre * Description: 3735655fd6a9Sachartre * Cancel each descriptor in the backed up dring to vDisk server. 3736655fd6a9Sachartre * The Dring was backed up during connection reset. 3737655fd6a9Sachartre * 3738655fd6a9Sachartre * Arguments: 3739655fd6a9Sachartre * vdcp - soft state pointer for this instance of the device driver. 3740655fd6a9Sachartre * 3741655fd6a9Sachartre * Return Code: 3742655fd6a9Sachartre * None 3743655fd6a9Sachartre */ 3744655fd6a9Sachartre void 3745655fd6a9Sachartre vdc_cancel_backup_ring(vdc_t *vdcp) 3746655fd6a9Sachartre { 3747655fd6a9Sachartre vdc_local_desc_t *ldep; 3748655fd6a9Sachartre struct buf *bufp; 3749655fd6a9Sachartre int count; 3750655fd6a9Sachartre int b_idx; 3751655fd6a9Sachartre int dring_size; 3752655fd6a9Sachartre 3753655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 3754655fd6a9Sachartre ASSERT(vdcp->state == VDC_STATE_INIT || 3755655fd6a9Sachartre vdcp->state == VDC_STATE_INIT_WAITING || 3756655fd6a9Sachartre vdcp->state == VDC_STATE_NEGOTIATE || 3757655fd6a9Sachartre vdcp->state == VDC_STATE_RESETTING); 3758655fd6a9Sachartre 3759655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3760655fd6a9Sachartre /* the pending requests have already been processed */ 3761655fd6a9Sachartre return; 3762655fd6a9Sachartre } 3763655fd6a9Sachartre 3764655fd6a9Sachartre DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3765655fd6a9Sachartre vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3766655fd6a9Sachartre 3767655fd6a9Sachartre /* 3768655fd6a9Sachartre * Walk the backup copy of the local descriptor ring and 3769655fd6a9Sachartre * cancel all the outstanding transactions. 3770655fd6a9Sachartre */ 3771655fd6a9Sachartre b_idx = vdcp->local_dring_backup_tail; 3772655fd6a9Sachartre for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3773655fd6a9Sachartre 3774655fd6a9Sachartre ldep = &(vdcp->local_dring_backup[b_idx]); 3775655fd6a9Sachartre 3776655fd6a9Sachartre /* only cancel outstanding transactions */ 3777655fd6a9Sachartre if (!ldep->is_free) { 3778655fd6a9Sachartre 3779655fd6a9Sachartre DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 3780655fd6a9Sachartre 3781655fd6a9Sachartre /* 3782655fd6a9Sachartre * All requests have already been cleared from the 3783655fd6a9Sachartre * local descriptor ring and the LDC channel has been 3784655fd6a9Sachartre * reset so we will never get any reply for these 3785655fd6a9Sachartre * requests. Now we just have to notify threads waiting 3786655fd6a9Sachartre * for replies that the request has failed. 3787655fd6a9Sachartre */ 3788655fd6a9Sachartre switch (ldep->cb_type) { 3789655fd6a9Sachartre case CB_SYNC: 3790655fd6a9Sachartre ASSERT(vdcp->sync_op_pending); 3791655fd6a9Sachartre vdcp->sync_op_status = EIO; 3792655fd6a9Sachartre vdcp->sync_op_pending = B_FALSE; 3793655fd6a9Sachartre cv_signal(&vdcp->sync_pending_cv); 3794655fd6a9Sachartre break; 3795655fd6a9Sachartre 3796655fd6a9Sachartre case CB_STRATEGY: 3797655fd6a9Sachartre bufp = ldep->cb_arg; 3798655fd6a9Sachartre ASSERT(bufp != NULL); 3799655fd6a9Sachartre bufp->b_resid = bufp->b_bcount; 3800366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 3801366a92acSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp->io_stats); 3802366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 3803655fd6a9Sachartre bioerror(bufp, EIO); 3804655fd6a9Sachartre biodone(bufp); 3805655fd6a9Sachartre break; 3806655fd6a9Sachartre 3807655fd6a9Sachartre default: 3808655fd6a9Sachartre ASSERT(0); 3809655fd6a9Sachartre } 3810655fd6a9Sachartre 3811655fd6a9Sachartre } 3812655fd6a9Sachartre 3813655fd6a9Sachartre /* get the next element to cancel */ 3814655fd6a9Sachartre if (++b_idx >= vdcp->local_dring_backup_len) 3815655fd6a9Sachartre b_idx = 0; 3816655fd6a9Sachartre } 3817655fd6a9Sachartre 3818655fd6a9Sachartre /* all done - now clear up pending dring copy */ 3819655fd6a9Sachartre dring_size = vdcp->local_dring_backup_len * 3820655fd6a9Sachartre sizeof (vdcp->local_dring_backup[0]); 3821655fd6a9Sachartre 3822655fd6a9Sachartre (void) kmem_free(vdcp->local_dring_backup, dring_size); 3823655fd6a9Sachartre 3824655fd6a9Sachartre vdcp->local_dring_backup = NULL; 3825655fd6a9Sachartre 3826366a92acSlm66018 DTRACE_PROBE2(processed, int, count, vdc_t *, vdcp); 3827655fd6a9Sachartre } 3828655fd6a9Sachartre 3829655fd6a9Sachartre /* 3830655fd6a9Sachartre * Function: 3831655fd6a9Sachartre * vdc_connection_timeout 3832655fd6a9Sachartre * 3833655fd6a9Sachartre * Description: 3834655fd6a9Sachartre * This function is invoked if the timeout set to establish the connection 3835655fd6a9Sachartre * with vds expires. This will happen if we spend too much time in the 3836655fd6a9Sachartre * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. Then we will 3837655fd6a9Sachartre * cancel any pending request and mark them as failed. 3838655fd6a9Sachartre * 3839655fd6a9Sachartre * If the timeout does not expire, it will be cancelled when we reach the 3840655fd6a9Sachartre * VDC_STATE_HANDLE_PENDING or VDC_STATE_RESETTING state. This function can 3841655fd6a9Sachartre * be invoked while we are in the VDC_STATE_HANDLE_PENDING or 3842655fd6a9Sachartre * VDC_STATE_RESETTING state in which case we do nothing because the 3843655fd6a9Sachartre * timeout is being cancelled. 3844655fd6a9Sachartre * 3845655fd6a9Sachartre * Arguments: 3846655fd6a9Sachartre * arg - argument of the timeout function actually a soft state 3847655fd6a9Sachartre * pointer for the instance of the device driver. 3848655fd6a9Sachartre * 3849655fd6a9Sachartre * Return Code: 3850655fd6a9Sachartre * None 3851655fd6a9Sachartre */ 3852655fd6a9Sachartre void 3853655fd6a9Sachartre vdc_connection_timeout(void *arg) 3854655fd6a9Sachartre { 3855655fd6a9Sachartre vdc_t *vdcp = (vdc_t *)arg; 3856655fd6a9Sachartre 3857655fd6a9Sachartre mutex_enter(&vdcp->lock); 3858655fd6a9Sachartre 3859655fd6a9Sachartre if (vdcp->state == VDC_STATE_HANDLE_PENDING || 3860655fd6a9Sachartre vdcp->state == VDC_STATE_DETACH) { 3861655fd6a9Sachartre /* 3862655fd6a9Sachartre * The connection has just been re-established or 3863655fd6a9Sachartre * we are detaching. 3864655fd6a9Sachartre */ 3865655fd6a9Sachartre vdcp->ctimeout_reached = B_FALSE; 3866655fd6a9Sachartre mutex_exit(&vdcp->lock); 3867655fd6a9Sachartre return; 3868655fd6a9Sachartre } 3869655fd6a9Sachartre 3870655fd6a9Sachartre vdcp->ctimeout_reached = B_TRUE; 3871655fd6a9Sachartre 3872655fd6a9Sachartre /* notify requests waiting for sending */ 3873655fd6a9Sachartre cv_broadcast(&vdcp->running_cv); 3874655fd6a9Sachartre 3875655fd6a9Sachartre /* cancel requests waiting for a result */ 3876655fd6a9Sachartre vdc_cancel_backup_ring(vdcp); 3877655fd6a9Sachartre 3878655fd6a9Sachartre mutex_exit(&vdcp->lock); 3879655fd6a9Sachartre 3880655fd6a9Sachartre cmn_err(CE_NOTE, "[%d] connection to service domain timeout", 3881655fd6a9Sachartre vdcp->instance); 3882655fd6a9Sachartre } 3883655fd6a9Sachartre 3884655fd6a9Sachartre /* 3885655fd6a9Sachartre * Function: 38863af08d82Slm66018 * vdc_backup_local_dring() 38873af08d82Slm66018 * 38883af08d82Slm66018 * Description: 38893af08d82Slm66018 * Backup the current dring in the event of a reset. The Dring 38903af08d82Slm66018 * transactions will be resubmitted to the server when the 38913af08d82Slm66018 * connection is restored. 38923af08d82Slm66018 * 38933af08d82Slm66018 * Arguments: 38943af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 38953af08d82Slm66018 * 38963af08d82Slm66018 * Return Code: 38973af08d82Slm66018 * NONE 38983af08d82Slm66018 */ 38993af08d82Slm66018 static void 39003af08d82Slm66018 vdc_backup_local_dring(vdc_t *vdcp) 39013af08d82Slm66018 { 39023af08d82Slm66018 int dring_size; 39033af08d82Slm66018 3904655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 39053af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_RESETTING); 39063af08d82Slm66018 39073af08d82Slm66018 /* 39083af08d82Slm66018 * If the backup dring is stil around, it means 39093af08d82Slm66018 * that the last restore did not complete. However, 39103af08d82Slm66018 * since we never got back into the running state, 39113af08d82Slm66018 * the backup copy we have is still valid. 39123af08d82Slm66018 */ 39133af08d82Slm66018 if (vdcp->local_dring_backup != NULL) { 39143af08d82Slm66018 DMSG(vdcp, 1, "reusing local descriptor ring backup " 39153af08d82Slm66018 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 39163af08d82Slm66018 vdcp->local_dring_backup_tail); 39173af08d82Slm66018 return; 39183af08d82Slm66018 } 39193af08d82Slm66018 3920655fd6a9Sachartre /* 3921655fd6a9Sachartre * The backup dring can be NULL and the local dring may not be 3922655fd6a9Sachartre * initialized. This can happen if we had a reset while establishing 3923655fd6a9Sachartre * a new connection but after the connection has timed out. In that 3924655fd6a9Sachartre * case the backup dring is NULL because the requests have been 3925655fd6a9Sachartre * cancelled and the request occured before the local dring is 3926655fd6a9Sachartre * initialized. 3927655fd6a9Sachartre */ 3928655fd6a9Sachartre if (!(vdcp->initialized & VDC_DRING_LOCAL)) 3929655fd6a9Sachartre return; 3930655fd6a9Sachartre 39313af08d82Slm66018 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 39323af08d82Slm66018 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 39333af08d82Slm66018 39343af08d82Slm66018 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 39353af08d82Slm66018 39363af08d82Slm66018 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 39373af08d82Slm66018 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 39383af08d82Slm66018 39393af08d82Slm66018 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 39403af08d82Slm66018 vdcp->local_dring_backup_len = vdcp->dring_len; 39413af08d82Slm66018 } 39423af08d82Slm66018 39431ae08745Sheppo /* -------------------------------------------------------------------------- */ 39441ae08745Sheppo 39451ae08745Sheppo /* 39461ae08745Sheppo * The following functions process the incoming messages from vds 39471ae08745Sheppo */ 39481ae08745Sheppo 39490a55fbb7Slm66018 /* 39500a55fbb7Slm66018 * Function: 39510a55fbb7Slm66018 * vdc_process_msg_thread() 39520a55fbb7Slm66018 * 39530a55fbb7Slm66018 * Description: 39540a55fbb7Slm66018 * 39553af08d82Slm66018 * Main VDC message processing thread. Each vDisk instance 39563af08d82Slm66018 * consists of a copy of this thread. This thread triggers 39573af08d82Slm66018 * all the handshakes and data exchange with the server. It 39583af08d82Slm66018 * also handles all channel resets 39593af08d82Slm66018 * 39600a55fbb7Slm66018 * Arguments: 39610a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 39620a55fbb7Slm66018 * 39630a55fbb7Slm66018 * Return Code: 39640a55fbb7Slm66018 * None 39650a55fbb7Slm66018 */ 39661ae08745Sheppo static void 39673af08d82Slm66018 vdc_process_msg_thread(vdc_t *vdcp) 39681ae08745Sheppo { 39691ae08745Sheppo int status; 3970655fd6a9Sachartre int ctimeout; 3971655fd6a9Sachartre timeout_id_t tmid = 0; 39721ae08745Sheppo 39733af08d82Slm66018 mutex_enter(&vdcp->lock); 39741ae08745Sheppo 39751ae08745Sheppo for (;;) { 39761ae08745Sheppo 39773af08d82Slm66018 #define Q(_s) (vdcp->state == _s) ? #_s : 39783af08d82Slm66018 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 39793af08d82Slm66018 Q(VDC_STATE_INIT) 39803af08d82Slm66018 Q(VDC_STATE_INIT_WAITING) 39813af08d82Slm66018 Q(VDC_STATE_NEGOTIATE) 39823af08d82Slm66018 Q(VDC_STATE_HANDLE_PENDING) 39833af08d82Slm66018 Q(VDC_STATE_RUNNING) 39843af08d82Slm66018 Q(VDC_STATE_RESETTING) 39853af08d82Slm66018 Q(VDC_STATE_DETACH) 39863af08d82Slm66018 "UNKNOWN"); 39871ae08745Sheppo 39883af08d82Slm66018 switch (vdcp->state) { 39893af08d82Slm66018 case VDC_STATE_INIT: 39903af08d82Slm66018 3991655fd6a9Sachartre /* 3992655fd6a9Sachartre * If requested, start a timeout to check if the 3993655fd6a9Sachartre * connection with vds is established in the 3994655fd6a9Sachartre * specified delay. If the timeout expires, we 3995655fd6a9Sachartre * will cancel any pending request. 3996655fd6a9Sachartre * 3997655fd6a9Sachartre * If some reset have occurred while establishing 3998655fd6a9Sachartre * the connection, we already have a timeout armed 3999655fd6a9Sachartre * and in that case we don't need to arm a new one. 4000655fd6a9Sachartre */ 4001655fd6a9Sachartre ctimeout = (vdc_timeout != 0)? 4002655fd6a9Sachartre vdc_timeout : vdcp->ctimeout; 4003655fd6a9Sachartre 4004655fd6a9Sachartre if (ctimeout != 0 && tmid == 0) { 4005655fd6a9Sachartre tmid = timeout(vdc_connection_timeout, vdcp, 4006655fd6a9Sachartre ctimeout * drv_usectohz(1000000)); 4007655fd6a9Sachartre } 4008655fd6a9Sachartre 40093af08d82Slm66018 /* Check if have re-initializing repeatedly */ 4010655fd6a9Sachartre if (vdcp->hshake_cnt++ > vdc_hshake_retries && 4011655fd6a9Sachartre vdcp->lifecycle != VDC_LC_ONLINE) { 40123c96341aSnarayan cmn_err(CE_NOTE, "[%d] disk access failed.\n", 40133c96341aSnarayan vdcp->instance); 40143af08d82Slm66018 vdcp->state = VDC_STATE_DETACH; 40153af08d82Slm66018 break; 40163af08d82Slm66018 } 40173af08d82Slm66018 40183af08d82Slm66018 /* Bring up connection with vds via LDC */ 40193af08d82Slm66018 status = vdc_start_ldc_connection(vdcp); 4020655fd6a9Sachartre if (status == EINVAL) { 40213af08d82Slm66018 DMSG(vdcp, 0, "[%d] Could not start LDC", 40223af08d82Slm66018 vdcp->instance); 40233af08d82Slm66018 vdcp->state = VDC_STATE_DETACH; 4024655fd6a9Sachartre } else { 40253af08d82Slm66018 vdcp->state = VDC_STATE_INIT_WAITING; 40263af08d82Slm66018 } 40273af08d82Slm66018 break; 40283af08d82Slm66018 40293af08d82Slm66018 case VDC_STATE_INIT_WAITING: 40303af08d82Slm66018 40313af08d82Slm66018 /* 40323af08d82Slm66018 * Let the callback event move us on 40333af08d82Slm66018 * when channel is open to server 40343af08d82Slm66018 */ 40353af08d82Slm66018 while (vdcp->ldc_state != LDC_UP) { 40363af08d82Slm66018 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 40373af08d82Slm66018 if (vdcp->state != VDC_STATE_INIT_WAITING) { 40383af08d82Slm66018 DMSG(vdcp, 0, 40393af08d82Slm66018 "state moved to %d out from under us...\n", 40403af08d82Slm66018 vdcp->state); 40413af08d82Slm66018 40423af08d82Slm66018 break; 40433af08d82Slm66018 } 40443af08d82Slm66018 } 40453af08d82Slm66018 if (vdcp->state == VDC_STATE_INIT_WAITING && 40463af08d82Slm66018 vdcp->ldc_state == LDC_UP) { 40473af08d82Slm66018 vdcp->state = VDC_STATE_NEGOTIATE; 40483af08d82Slm66018 } 40493af08d82Slm66018 break; 40503af08d82Slm66018 40513af08d82Slm66018 case VDC_STATE_NEGOTIATE: 40523af08d82Slm66018 switch (status = vdc_ver_negotiation(vdcp)) { 40533af08d82Slm66018 case 0: 40543af08d82Slm66018 break; 40553af08d82Slm66018 default: 40563af08d82Slm66018 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 40573af08d82Slm66018 status); 40583af08d82Slm66018 goto reset; 40593af08d82Slm66018 } 40603af08d82Slm66018 40613af08d82Slm66018 switch (status = vdc_attr_negotiation(vdcp)) { 40623af08d82Slm66018 case 0: 40633af08d82Slm66018 break; 40643af08d82Slm66018 default: 40653af08d82Slm66018 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 40663af08d82Slm66018 status); 40673af08d82Slm66018 goto reset; 40683af08d82Slm66018 } 40693af08d82Slm66018 40703af08d82Slm66018 switch (status = vdc_dring_negotiation(vdcp)) { 40713af08d82Slm66018 case 0: 40723af08d82Slm66018 break; 40733af08d82Slm66018 default: 40743af08d82Slm66018 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 40753af08d82Slm66018 status); 40763af08d82Slm66018 goto reset; 40773af08d82Slm66018 } 40783af08d82Slm66018 40793af08d82Slm66018 switch (status = vdc_rdx_exchange(vdcp)) { 40803af08d82Slm66018 case 0: 40813af08d82Slm66018 vdcp->state = VDC_STATE_HANDLE_PENDING; 40823af08d82Slm66018 goto done; 40833af08d82Slm66018 default: 40843af08d82Slm66018 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 40853af08d82Slm66018 status); 40863af08d82Slm66018 goto reset; 40873af08d82Slm66018 } 40883af08d82Slm66018 reset: 40893af08d82Slm66018 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 40903af08d82Slm66018 status); 40913af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4092655fd6a9Sachartre vdcp->self_reset = B_TRUE; 40933af08d82Slm66018 done: 40943af08d82Slm66018 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 40953af08d82Slm66018 vdcp->state); 40963af08d82Slm66018 break; 40973af08d82Slm66018 40983af08d82Slm66018 case VDC_STATE_HANDLE_PENDING: 40993af08d82Slm66018 4100655fd6a9Sachartre if (vdcp->ctimeout_reached) { 4101655fd6a9Sachartre /* 4102655fd6a9Sachartre * The connection timeout had been reached so 4103655fd6a9Sachartre * pending requests have been cancelled. Now 4104655fd6a9Sachartre * that the connection is back we can reset 4105655fd6a9Sachartre * the timeout. 4106655fd6a9Sachartre */ 4107655fd6a9Sachartre ASSERT(vdcp->local_dring_backup == NULL); 4108655fd6a9Sachartre ASSERT(tmid != 0); 4109655fd6a9Sachartre tmid = 0; 4110655fd6a9Sachartre vdcp->ctimeout_reached = B_FALSE; 4111655fd6a9Sachartre vdcp->state = VDC_STATE_RUNNING; 4112655fd6a9Sachartre DMSG(vdcp, 0, "[%d] connection to service " 4113655fd6a9Sachartre "domain is up", vdcp->instance); 4114655fd6a9Sachartre break; 4115655fd6a9Sachartre } 4116655fd6a9Sachartre 41173af08d82Slm66018 mutex_exit(&vdcp->lock); 4118655fd6a9Sachartre if (tmid != 0) { 4119655fd6a9Sachartre (void) untimeout(tmid); 4120655fd6a9Sachartre tmid = 0; 4121655fd6a9Sachartre } 41223af08d82Slm66018 status = vdc_resubmit_backup_dring(vdcp); 41233af08d82Slm66018 mutex_enter(&vdcp->lock); 41243af08d82Slm66018 41253af08d82Slm66018 if (status) 41263af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 41273af08d82Slm66018 else 41283af08d82Slm66018 vdcp->state = VDC_STATE_RUNNING; 41293af08d82Slm66018 41303af08d82Slm66018 break; 41313af08d82Slm66018 41323af08d82Slm66018 /* enter running state */ 41333af08d82Slm66018 case VDC_STATE_RUNNING: 41343af08d82Slm66018 /* 41353af08d82Slm66018 * Signal anyone waiting for the connection 41363af08d82Slm66018 * to come on line. 41373af08d82Slm66018 */ 41383af08d82Slm66018 vdcp->hshake_cnt = 0; 41393af08d82Slm66018 cv_broadcast(&vdcp->running_cv); 41402f5224aeSachartre 41412f5224aeSachartre /* failfast has to been checked after reset */ 41422f5224aeSachartre cv_signal(&vdcp->failfast_cv); 41432f5224aeSachartre 41442f5224aeSachartre /* ownership is lost during reset */ 41452f5224aeSachartre if (vdcp->ownership & VDC_OWNERSHIP_WANTED) 41462f5224aeSachartre vdcp->ownership |= VDC_OWNERSHIP_RESET; 41472f5224aeSachartre cv_signal(&vdcp->ownership_cv); 41482f5224aeSachartre 41493af08d82Slm66018 mutex_exit(&vdcp->lock); 41503af08d82Slm66018 41513af08d82Slm66018 for (;;) { 41523af08d82Slm66018 vio_msg_t msg; 41533af08d82Slm66018 status = vdc_wait_for_response(vdcp, &msg); 41543af08d82Slm66018 if (status) break; 41553af08d82Slm66018 41563af08d82Slm66018 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 41573af08d82Slm66018 vdcp->instance); 41583af08d82Slm66018 status = vdc_process_data_msg(vdcp, &msg); 41591ae08745Sheppo if (status) { 41603af08d82Slm66018 DMSG(vdcp, 1, "[%d] process_data_msg " 41613af08d82Slm66018 "returned err=%d\n", vdcp->instance, 41623af08d82Slm66018 status); 41631ae08745Sheppo break; 41641ae08745Sheppo } 41651ae08745Sheppo 41663af08d82Slm66018 } 4167e1ebb9ecSlm66018 41683af08d82Slm66018 mutex_enter(&vdcp->lock); 41693af08d82Slm66018 41703af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4171690555a1Sachartre vdcp->self_reset = B_TRUE; 41723af08d82Slm66018 break; 41733af08d82Slm66018 41743af08d82Slm66018 case VDC_STATE_RESETTING: 4175655fd6a9Sachartre /* 4176655fd6a9Sachartre * When we reach this state, we either come from the 4177655fd6a9Sachartre * VDC_STATE_RUNNING state and we can have pending 4178655fd6a9Sachartre * request but no timeout is armed; or we come from 4179655fd6a9Sachartre * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 4180655fd6a9Sachartre * VDC_HANDLE_PENDING state and there is no pending 4181655fd6a9Sachartre * request or pending requests have already been copied 4182655fd6a9Sachartre * into the backup dring. So we can safely keep the 4183655fd6a9Sachartre * connection timeout armed while we are in this state. 4184655fd6a9Sachartre */ 4185655fd6a9Sachartre 41863af08d82Slm66018 DMSG(vdcp, 0, "Initiating channel reset " 41873af08d82Slm66018 "(pending = %d)\n", (int)vdcp->threads_pending); 41883af08d82Slm66018 41893af08d82Slm66018 if (vdcp->self_reset) { 41903af08d82Slm66018 DMSG(vdcp, 0, 41913af08d82Slm66018 "[%d] calling stop_ldc_connection.\n", 41923af08d82Slm66018 vdcp->instance); 41933af08d82Slm66018 status = vdc_stop_ldc_connection(vdcp); 41943af08d82Slm66018 vdcp->self_reset = B_FALSE; 41951ae08745Sheppo } 41961ae08745Sheppo 41971ae08745Sheppo /* 41983af08d82Slm66018 * Wait for all threads currently waiting 41993af08d82Slm66018 * for a free dring entry to use. 42001ae08745Sheppo */ 42013af08d82Slm66018 while (vdcp->threads_pending) { 42023af08d82Slm66018 cv_broadcast(&vdcp->membind_cv); 42033af08d82Slm66018 cv_broadcast(&vdcp->dring_free_cv); 42043af08d82Slm66018 mutex_exit(&vdcp->lock); 4205205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4206205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 42073af08d82Slm66018 mutex_enter(&vdcp->lock); 42081ae08745Sheppo } 42091ae08745Sheppo 42103af08d82Slm66018 ASSERT(vdcp->threads_pending == 0); 42111ae08745Sheppo 42123af08d82Slm66018 /* Sanity check that no thread is receiving */ 42133af08d82Slm66018 ASSERT(vdcp->read_state != VDC_READ_WAITING); 42140a55fbb7Slm66018 42153af08d82Slm66018 vdcp->read_state = VDC_READ_IDLE; 42163af08d82Slm66018 42173af08d82Slm66018 vdc_backup_local_dring(vdcp); 42183af08d82Slm66018 42193af08d82Slm66018 /* cleanup the old d-ring */ 42203af08d82Slm66018 vdc_destroy_descriptor_ring(vdcp); 42213af08d82Slm66018 42223af08d82Slm66018 /* go and start again */ 42233af08d82Slm66018 vdcp->state = VDC_STATE_INIT; 42243af08d82Slm66018 42250a55fbb7Slm66018 break; 42260a55fbb7Slm66018 42273af08d82Slm66018 case VDC_STATE_DETACH: 42283af08d82Slm66018 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 42293af08d82Slm66018 vdcp->instance); 42303af08d82Slm66018 4231655fd6a9Sachartre /* cancel any pending timeout */ 4232655fd6a9Sachartre mutex_exit(&vdcp->lock); 4233655fd6a9Sachartre if (tmid != 0) { 4234655fd6a9Sachartre (void) untimeout(tmid); 4235655fd6a9Sachartre tmid = 0; 4236655fd6a9Sachartre } 4237655fd6a9Sachartre mutex_enter(&vdcp->lock); 4238655fd6a9Sachartre 42393c96341aSnarayan /* 42403c96341aSnarayan * Signal anyone waiting for connection 42413c96341aSnarayan * to come online 42423c96341aSnarayan */ 42433c96341aSnarayan cv_broadcast(&vdcp->running_cv); 42443c96341aSnarayan 42453af08d82Slm66018 while (vdcp->sync_op_pending) { 42463af08d82Slm66018 cv_signal(&vdcp->sync_pending_cv); 42473af08d82Slm66018 cv_signal(&vdcp->sync_blocked_cv); 42483af08d82Slm66018 mutex_exit(&vdcp->lock); 4249205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4250205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 42513af08d82Slm66018 mutex_enter(&vdcp->lock); 42520a55fbb7Slm66018 } 42531ae08745Sheppo 42543af08d82Slm66018 mutex_exit(&vdcp->lock); 42553af08d82Slm66018 42563af08d82Slm66018 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 42573af08d82Slm66018 vdcp->instance); 42583af08d82Slm66018 thread_exit(); 42593af08d82Slm66018 break; 42603af08d82Slm66018 } 42613af08d82Slm66018 } 42620a55fbb7Slm66018 } 42630a55fbb7Slm66018 42640a55fbb7Slm66018 42650a55fbb7Slm66018 /* 42660a55fbb7Slm66018 * Function: 42670a55fbb7Slm66018 * vdc_process_data_msg() 42680a55fbb7Slm66018 * 42690a55fbb7Slm66018 * Description: 42700a55fbb7Slm66018 * This function is called by the message processing thread each time 42710a55fbb7Slm66018 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 42720a55fbb7Slm66018 * be an ACK or NACK from vds[1] which vdc handles as follows. 42730a55fbb7Slm66018 * ACK - wake up the waiting thread 42740a55fbb7Slm66018 * NACK - resend any messages necessary 42750a55fbb7Slm66018 * 42760a55fbb7Slm66018 * [1] Although the message format allows it, vds should not send a 42770a55fbb7Slm66018 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 42780a55fbb7Slm66018 * some bizarre reason it does, vdc will reset the connection. 42790a55fbb7Slm66018 * 42800a55fbb7Slm66018 * Arguments: 42810a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 42820a55fbb7Slm66018 * msg - the LDC message sent by vds 42830a55fbb7Slm66018 * 42840a55fbb7Slm66018 * Return Code: 42850a55fbb7Slm66018 * 0 - Success. 42860a55fbb7Slm66018 * > 0 - error value returned by LDC 42870a55fbb7Slm66018 */ 42880a55fbb7Slm66018 static int 42893af08d82Slm66018 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 42900a55fbb7Slm66018 { 42910a55fbb7Slm66018 int status = 0; 42923af08d82Slm66018 vio_dring_msg_t *dring_msg; 4293d10e4ef2Snarayan vdc_local_desc_t *ldep = NULL; 42943af08d82Slm66018 int start, end; 42953af08d82Slm66018 int idx; 42960a55fbb7Slm66018 42973af08d82Slm66018 dring_msg = (vio_dring_msg_t *)msg; 42980a55fbb7Slm66018 42993af08d82Slm66018 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 43003af08d82Slm66018 ASSERT(vdcp != NULL); 43013af08d82Slm66018 43023af08d82Slm66018 mutex_enter(&vdcp->lock); 43030a55fbb7Slm66018 43040a55fbb7Slm66018 /* 43050a55fbb7Slm66018 * Check to see if the message has bogus data 43060a55fbb7Slm66018 */ 4307e1ebb9ecSlm66018 idx = start = dring_msg->start_idx; 43080a55fbb7Slm66018 end = dring_msg->end_idx; 43093af08d82Slm66018 if ((start >= vdcp->dring_len) || 43103af08d82Slm66018 (end >= vdcp->dring_len) || (end < -1)) { 43113af08d82Slm66018 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 43123af08d82Slm66018 vdcp->instance, start, end); 4313366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 43143af08d82Slm66018 mutex_exit(&vdcp->lock); 4315e1ebb9ecSlm66018 return (EINVAL); 43160a55fbb7Slm66018 } 43170a55fbb7Slm66018 43180a55fbb7Slm66018 /* 43190a55fbb7Slm66018 * Verify that the sequence number is what vdc expects. 43200a55fbb7Slm66018 */ 43213af08d82Slm66018 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4322e1ebb9ecSlm66018 case VDC_SEQ_NUM_TODO: 4323e1ebb9ecSlm66018 break; /* keep processing this message */ 4324e1ebb9ecSlm66018 case VDC_SEQ_NUM_SKIP: 43253af08d82Slm66018 mutex_exit(&vdcp->lock); 4326e1ebb9ecSlm66018 return (0); 4327e1ebb9ecSlm66018 case VDC_SEQ_NUM_INVALID: 43283af08d82Slm66018 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4329366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 4330366a92acSlm66018 mutex_exit(&vdcp->lock); 43310a55fbb7Slm66018 return (ENXIO); 43320a55fbb7Slm66018 } 43330a55fbb7Slm66018 43343af08d82Slm66018 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 43353af08d82Slm66018 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 4336e1ebb9ecSlm66018 VDC_DUMP_DRING_MSG(dring_msg); 4337366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 43383af08d82Slm66018 mutex_exit(&vdcp->lock); 4339e1ebb9ecSlm66018 return (EIO); 43400a55fbb7Slm66018 43413af08d82Slm66018 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 4342366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_protoerrs); 43433af08d82Slm66018 mutex_exit(&vdcp->lock); 4344e1ebb9ecSlm66018 return (EPROTO); 4345e1ebb9ecSlm66018 } 4346e1ebb9ecSlm66018 43473af08d82Slm66018 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 43483af08d82Slm66018 ASSERT(start == end); 43493af08d82Slm66018 43503af08d82Slm66018 ldep = &vdcp->local_dring[idx]; 43513af08d82Slm66018 43523af08d82Slm66018 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 43533af08d82Slm66018 ldep->dep->hdr.dstate, ldep->cb_type); 43543af08d82Slm66018 4355e1ebb9ecSlm66018 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 43563af08d82Slm66018 struct buf *bufp; 4357e1ebb9ecSlm66018 43583af08d82Slm66018 switch (ldep->cb_type) { 43593af08d82Slm66018 case CB_SYNC: 43603af08d82Slm66018 ASSERT(vdcp->sync_op_pending); 4361d10e4ef2Snarayan 43623af08d82Slm66018 status = vdc_depopulate_descriptor(vdcp, idx); 43633af08d82Slm66018 vdcp->sync_op_status = status; 43643af08d82Slm66018 vdcp->sync_op_pending = B_FALSE; 43653af08d82Slm66018 cv_signal(&vdcp->sync_pending_cv); 43663af08d82Slm66018 break; 43674bac2208Snarayan 43683af08d82Slm66018 case CB_STRATEGY: 43693af08d82Slm66018 bufp = ldep->cb_arg; 43703af08d82Slm66018 ASSERT(bufp != NULL); 43713c96341aSnarayan bufp->b_resid = 43723c96341aSnarayan bufp->b_bcount - ldep->dep->payload.nbytes; 43733af08d82Slm66018 status = ldep->dep->payload.status; /* Future:ntoh */ 43743af08d82Slm66018 if (status != 0) { 43753af08d82Slm66018 DMSG(vdcp, 1, "strategy status=%d\n", status); 4376366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 43773af08d82Slm66018 bioerror(bufp, status); 4378d10e4ef2Snarayan } 43792f5224aeSachartre 43802f5224aeSachartre (void) vdc_depopulate_descriptor(vdcp, idx); 43813c96341aSnarayan 43823c96341aSnarayan DMSG(vdcp, 1, 43833c96341aSnarayan "strategy complete req=%ld bytes resp=%ld bytes\n", 43843c96341aSnarayan bufp->b_bcount, ldep->dep->payload.nbytes); 43852f5224aeSachartre 43862f5224aeSachartre if (status != 0 && vdcp->failfast_interval != 0) { 43872f5224aeSachartre /* 43882f5224aeSachartre * The I/O has failed and failfast is enabled. 43892f5224aeSachartre * We need the failfast thread to check if the 43902f5224aeSachartre * failure is due to a reservation conflict. 43912f5224aeSachartre */ 43922f5224aeSachartre (void) vdc_failfast_io_queue(vdcp, bufp); 43932f5224aeSachartre } else { 4394366a92acSlm66018 if (status == 0) { 4395366a92acSlm66018 int op = (bufp->b_flags & B_READ) ? 4396366a92acSlm66018 VD_OP_BREAD : VD_OP_BWRITE; 4397366a92acSlm66018 VD_UPDATE_IO_STATS(vdcp, op, 4398366a92acSlm66018 ldep->dep->payload.nbytes); 4399366a92acSlm66018 } 4400366a92acSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp->io_stats); 4401366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 44022f5224aeSachartre biodone(bufp); 44032f5224aeSachartre } 44043af08d82Slm66018 break; 44053af08d82Slm66018 44063af08d82Slm66018 default: 44073af08d82Slm66018 ASSERT(0); 44080a55fbb7Slm66018 } 44093af08d82Slm66018 } 44103af08d82Slm66018 44113af08d82Slm66018 /* let the arrival signal propogate */ 44123af08d82Slm66018 mutex_exit(&vdcp->lock); 44130a55fbb7Slm66018 4414e1ebb9ecSlm66018 /* probe gives the count of how many entries were processed */ 4415366a92acSlm66018 DTRACE_PROBE2(processed, int, 1, vdc_t *, vdcp); 44160a55fbb7Slm66018 44173af08d82Slm66018 return (0); 44180a55fbb7Slm66018 } 44190a55fbb7Slm66018 44200a55fbb7Slm66018 44210a55fbb7Slm66018 /* 44220a55fbb7Slm66018 * Function: 44230a55fbb7Slm66018 * vdc_handle_ver_msg() 44240a55fbb7Slm66018 * 44250a55fbb7Slm66018 * Description: 44260a55fbb7Slm66018 * 44270a55fbb7Slm66018 * Arguments: 44280a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 44290a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 44300a55fbb7Slm66018 * 44310a55fbb7Slm66018 * Return Code: 44320a55fbb7Slm66018 * 0 - Success 44330a55fbb7Slm66018 */ 44340a55fbb7Slm66018 static int 44350a55fbb7Slm66018 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 44360a55fbb7Slm66018 { 44370a55fbb7Slm66018 int status = 0; 44380a55fbb7Slm66018 44390a55fbb7Slm66018 ASSERT(vdc != NULL); 44400a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 44410a55fbb7Slm66018 44420a55fbb7Slm66018 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 44430a55fbb7Slm66018 return (EPROTO); 44440a55fbb7Slm66018 } 44450a55fbb7Slm66018 44460a55fbb7Slm66018 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 44470a55fbb7Slm66018 return (EINVAL); 44480a55fbb7Slm66018 } 44490a55fbb7Slm66018 44500a55fbb7Slm66018 switch (ver_msg->tag.vio_subtype) { 44510a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 44520a55fbb7Slm66018 /* 44530a55fbb7Slm66018 * We check to see if the version returned is indeed supported 44540a55fbb7Slm66018 * (The server may have also adjusted the minor number downwards 44550a55fbb7Slm66018 * and if so 'ver_msg' will contain the actual version agreed) 44560a55fbb7Slm66018 */ 44570a55fbb7Slm66018 if (vdc_is_supported_version(ver_msg)) { 44580a55fbb7Slm66018 vdc->ver.major = ver_msg->ver_major; 44590a55fbb7Slm66018 vdc->ver.minor = ver_msg->ver_minor; 44600a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 44610a55fbb7Slm66018 } else { 44620a55fbb7Slm66018 status = EPROTO; 44630a55fbb7Slm66018 } 44640a55fbb7Slm66018 break; 44650a55fbb7Slm66018 44660a55fbb7Slm66018 case VIO_SUBTYPE_NACK: 44670a55fbb7Slm66018 /* 44680a55fbb7Slm66018 * call vdc_is_supported_version() which will return the next 44690a55fbb7Slm66018 * supported version (if any) in 'ver_msg' 44700a55fbb7Slm66018 */ 44710a55fbb7Slm66018 (void) vdc_is_supported_version(ver_msg); 44720a55fbb7Slm66018 if (ver_msg->ver_major > 0) { 44730a55fbb7Slm66018 size_t len = sizeof (*ver_msg); 44740a55fbb7Slm66018 44750a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 44760a55fbb7Slm66018 44770a55fbb7Slm66018 /* reset the necessary fields and resend */ 44780a55fbb7Slm66018 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 44790a55fbb7Slm66018 ver_msg->dev_class = VDEV_DISK; 44800a55fbb7Slm66018 44810a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 44823af08d82Slm66018 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 44830a55fbb7Slm66018 vdc->instance, status); 44840a55fbb7Slm66018 if (len != sizeof (*ver_msg)) 44850a55fbb7Slm66018 status = EBADMSG; 44860a55fbb7Slm66018 } else { 448787a7269eSachartre DMSG(vdc, 0, "[%d] No common version with vDisk server", 448887a7269eSachartre vdc->instance); 44890a55fbb7Slm66018 status = ENOTSUP; 44900a55fbb7Slm66018 } 44910a55fbb7Slm66018 44920a55fbb7Slm66018 break; 44931ae08745Sheppo case VIO_SUBTYPE_INFO: 44941ae08745Sheppo /* 44951ae08745Sheppo * Handle the case where vds starts handshake 4496eff7243fSlm66018 * (for now only vdc is the instigator) 44971ae08745Sheppo */ 44981ae08745Sheppo status = ENOTSUP; 44991ae08745Sheppo break; 45001ae08745Sheppo 45011ae08745Sheppo default: 45020a55fbb7Slm66018 status = EINVAL; 45031ae08745Sheppo break; 45041ae08745Sheppo } 45051ae08745Sheppo 45060a55fbb7Slm66018 return (status); 45070a55fbb7Slm66018 } 45080a55fbb7Slm66018 45090a55fbb7Slm66018 /* 45100a55fbb7Slm66018 * Function: 45110a55fbb7Slm66018 * vdc_handle_attr_msg() 45120a55fbb7Slm66018 * 45130a55fbb7Slm66018 * Description: 45140a55fbb7Slm66018 * 45150a55fbb7Slm66018 * Arguments: 45160a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 45170a55fbb7Slm66018 * attr_msg - LDC message sent by vDisk server 45180a55fbb7Slm66018 * 45190a55fbb7Slm66018 * Return Code: 45200a55fbb7Slm66018 * 0 - Success 45210a55fbb7Slm66018 */ 45220a55fbb7Slm66018 static int 45230a55fbb7Slm66018 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 45240a55fbb7Slm66018 { 45250a55fbb7Slm66018 int status = 0; 45260a55fbb7Slm66018 45270a55fbb7Slm66018 ASSERT(vdc != NULL); 45280a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 45290a55fbb7Slm66018 45300a55fbb7Slm66018 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 45310a55fbb7Slm66018 return (EPROTO); 45320a55fbb7Slm66018 } 45330a55fbb7Slm66018 45340a55fbb7Slm66018 switch (attr_msg->tag.vio_subtype) { 45351ae08745Sheppo case VIO_SUBTYPE_ACK: 45361ae08745Sheppo /* 45371ae08745Sheppo * We now verify the attributes sent by vds. 45381ae08745Sheppo */ 453978fcd0a1Sachartre if (attr_msg->vdisk_size == 0) { 454078fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid disk size from vds", 454178fcd0a1Sachartre vdc->instance); 454278fcd0a1Sachartre status = EINVAL; 454378fcd0a1Sachartre break; 454478fcd0a1Sachartre } 454578fcd0a1Sachartre 454678fcd0a1Sachartre if (attr_msg->max_xfer_sz == 0) { 454778fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 454878fcd0a1Sachartre vdc->instance); 454978fcd0a1Sachartre status = EINVAL; 455078fcd0a1Sachartre break; 455178fcd0a1Sachartre } 455278fcd0a1Sachartre 45532f5224aeSachartre if (attr_msg->vdisk_size == VD_SIZE_UNKNOWN) { 45542f5224aeSachartre DMSG(vdc, 0, "[%d] Unknown disk size from vds", 45552f5224aeSachartre vdc->instance); 45562f5224aeSachartre attr_msg->vdisk_size = 0; 45572f5224aeSachartre } 45582f5224aeSachartre 455978fcd0a1Sachartre /* 456078fcd0a1Sachartre * If the disk size is already set check that it hasn't changed. 456178fcd0a1Sachartre */ 45622f5224aeSachartre if ((vdc->vdisk_size != 0) && (attr_msg->vdisk_size != 0) && 456378fcd0a1Sachartre (vdc->vdisk_size != attr_msg->vdisk_size)) { 456478fcd0a1Sachartre DMSG(vdc, 0, "[%d] Different disk size from vds " 456578fcd0a1Sachartre "(old=0x%lx - new=0x%lx", vdc->instance, 456678fcd0a1Sachartre vdc->vdisk_size, attr_msg->vdisk_size) 456778fcd0a1Sachartre status = EINVAL; 456878fcd0a1Sachartre break; 456978fcd0a1Sachartre } 457078fcd0a1Sachartre 45711ae08745Sheppo vdc->vdisk_size = attr_msg->vdisk_size; 45721ae08745Sheppo vdc->vdisk_type = attr_msg->vdisk_type; 457317cadca8Slm66018 vdc->operations = attr_msg->operations; 457417cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) 457517cadca8Slm66018 vdc->vdisk_media = attr_msg->vdisk_media; 457617cadca8Slm66018 else 457717cadca8Slm66018 vdc->vdisk_media = 0; 45781ae08745Sheppo 45793af08d82Slm66018 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 4580e1ebb9ecSlm66018 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 45813af08d82Slm66018 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 4582e1ebb9ecSlm66018 vdc->instance, vdc->block_size, 4583e1ebb9ecSlm66018 attr_msg->vdisk_block_size); 4584e1ebb9ecSlm66018 45851ae08745Sheppo /* 4586e1ebb9ecSlm66018 * We don't know at compile time what the vDisk server will 458717cadca8Slm66018 * think are good values but we apply a large (arbitrary) 4588e1ebb9ecSlm66018 * upper bound to prevent memory exhaustion in vdc if it was 4589e1ebb9ecSlm66018 * allocating a DRing based of huge values sent by the server. 4590e1ebb9ecSlm66018 * We probably will never exceed this except if the message 4591e1ebb9ecSlm66018 * was garbage. 45921ae08745Sheppo */ 4593e1ebb9ecSlm66018 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 4594e1ebb9ecSlm66018 (PAGESIZE * DEV_BSIZE)) { 4595e1ebb9ecSlm66018 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 4596e1ebb9ecSlm66018 vdc->block_size = attr_msg->vdisk_block_size; 4597e1ebb9ecSlm66018 } else { 45983af08d82Slm66018 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 4599e1ebb9ecSlm66018 " using max supported by vdc", vdc->instance); 46001ae08745Sheppo } 46011ae08745Sheppo 4602f0ca1d9aSsb155480 if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) || 46031ae08745Sheppo (attr_msg->vdisk_size > INT64_MAX) || 460417cadca8Slm66018 (attr_msg->operations == 0) || 46051ae08745Sheppo (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 46063af08d82Slm66018 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 4607e1ebb9ecSlm66018 vdc->instance); 46081ae08745Sheppo status = EINVAL; 46091ae08745Sheppo break; 46101ae08745Sheppo } 46111ae08745Sheppo 461278fcd0a1Sachartre /* 461378fcd0a1Sachartre * Now that we have received all attributes we can create a 461478fcd0a1Sachartre * fake geometry for the disk. 461578fcd0a1Sachartre */ 461678fcd0a1Sachartre vdc_create_fake_geometry(vdc); 46171ae08745Sheppo break; 46181ae08745Sheppo 46191ae08745Sheppo case VIO_SUBTYPE_NACK: 46201ae08745Sheppo /* 46211ae08745Sheppo * vds could not handle the attributes we sent so we 46221ae08745Sheppo * stop negotiating. 46231ae08745Sheppo */ 46241ae08745Sheppo status = EPROTO; 46251ae08745Sheppo break; 46261ae08745Sheppo 46271ae08745Sheppo case VIO_SUBTYPE_INFO: 46281ae08745Sheppo /* 46291ae08745Sheppo * Handle the case where vds starts the handshake 46301ae08745Sheppo * (for now; vdc is the only supported instigatior) 46311ae08745Sheppo */ 46321ae08745Sheppo status = ENOTSUP; 46331ae08745Sheppo break; 46341ae08745Sheppo 46351ae08745Sheppo default: 46361ae08745Sheppo status = ENOTSUP; 46371ae08745Sheppo break; 46381ae08745Sheppo } 46391ae08745Sheppo 46400a55fbb7Slm66018 return (status); 46411ae08745Sheppo } 46421ae08745Sheppo 46430a55fbb7Slm66018 /* 46440a55fbb7Slm66018 * Function: 46450a55fbb7Slm66018 * vdc_handle_dring_reg_msg() 46460a55fbb7Slm66018 * 46470a55fbb7Slm66018 * Description: 46480a55fbb7Slm66018 * 46490a55fbb7Slm66018 * Arguments: 46500a55fbb7Slm66018 * vdc - soft state pointer for this instance of the driver. 46510a55fbb7Slm66018 * dring_msg - LDC message sent by vDisk server 46520a55fbb7Slm66018 * 46530a55fbb7Slm66018 * Return Code: 46540a55fbb7Slm66018 * 0 - Success 46550a55fbb7Slm66018 */ 46560a55fbb7Slm66018 static int 46570a55fbb7Slm66018 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 46580a55fbb7Slm66018 { 46590a55fbb7Slm66018 int status = 0; 46601ae08745Sheppo 46610a55fbb7Slm66018 ASSERT(vdc != NULL); 46620a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 46630a55fbb7Slm66018 46640a55fbb7Slm66018 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 46650a55fbb7Slm66018 return (EPROTO); 46660a55fbb7Slm66018 } 46670a55fbb7Slm66018 46680a55fbb7Slm66018 switch (dring_msg->tag.vio_subtype) { 46690a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 46701ae08745Sheppo /* save the received dring_ident */ 46711ae08745Sheppo vdc->dring_ident = dring_msg->dring_ident; 46723af08d82Slm66018 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 4673e1ebb9ecSlm66018 vdc->instance, vdc->dring_ident); 46741ae08745Sheppo break; 46751ae08745Sheppo 46761ae08745Sheppo case VIO_SUBTYPE_NACK: 46771ae08745Sheppo /* 46781ae08745Sheppo * vds could not handle the DRing info we sent so we 46791ae08745Sheppo * stop negotiating. 46801ae08745Sheppo */ 46813af08d82Slm66018 DMSG(vdc, 0, "[%d] server could not register DRing\n", 46823af08d82Slm66018 vdc->instance); 46831ae08745Sheppo status = EPROTO; 46841ae08745Sheppo break; 46851ae08745Sheppo 46861ae08745Sheppo case VIO_SUBTYPE_INFO: 46871ae08745Sheppo /* 46881ae08745Sheppo * Handle the case where vds starts handshake 46891ae08745Sheppo * (for now only vdc is the instigatior) 46901ae08745Sheppo */ 46911ae08745Sheppo status = ENOTSUP; 46921ae08745Sheppo break; 46931ae08745Sheppo default: 46941ae08745Sheppo status = ENOTSUP; 46951ae08745Sheppo } 46961ae08745Sheppo 46971ae08745Sheppo return (status); 46981ae08745Sheppo } 46991ae08745Sheppo 47001ae08745Sheppo /* 47011ae08745Sheppo * Function: 47021ae08745Sheppo * vdc_verify_seq_num() 47031ae08745Sheppo * 47041ae08745Sheppo * Description: 4705e1ebb9ecSlm66018 * This functions verifies that the sequence number sent back by the vDisk 4706e1ebb9ecSlm66018 * server with the latest message is what is expected (i.e. it is greater 4707e1ebb9ecSlm66018 * than the last seq num sent by the vDisk server and less than or equal 4708e1ebb9ecSlm66018 * to the last seq num generated by vdc). 4709e1ebb9ecSlm66018 * 4710e1ebb9ecSlm66018 * It then checks the request ID to see if any requests need processing 4711e1ebb9ecSlm66018 * in the DRing. 47121ae08745Sheppo * 47131ae08745Sheppo * Arguments: 47141ae08745Sheppo * vdc - soft state pointer for this instance of the driver. 47151ae08745Sheppo * dring_msg - pointer to the LDC message sent by vds 47161ae08745Sheppo * 47171ae08745Sheppo * Return Code: 4718e1ebb9ecSlm66018 * VDC_SEQ_NUM_TODO - Message needs to be processed 4719e1ebb9ecSlm66018 * VDC_SEQ_NUM_SKIP - Message has already been processed 4720e1ebb9ecSlm66018 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 4721e1ebb9ecSlm66018 * vdc cannot deal with them 47221ae08745Sheppo */ 4723e1ebb9ecSlm66018 static int 4724e1ebb9ecSlm66018 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 47251ae08745Sheppo { 47261ae08745Sheppo ASSERT(vdc != NULL); 47271ae08745Sheppo ASSERT(dring_msg != NULL); 4728d10e4ef2Snarayan ASSERT(mutex_owned(&vdc->lock)); 47291ae08745Sheppo 47301ae08745Sheppo /* 47311ae08745Sheppo * Check to see if the messages were responded to in the correct 4732e1ebb9ecSlm66018 * order by vds. 47331ae08745Sheppo */ 4734e1ebb9ecSlm66018 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 4735e1ebb9ecSlm66018 (dring_msg->seq_num > vdc->seq_num)) { 47363af08d82Slm66018 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 4737e1ebb9ecSlm66018 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 4738e1ebb9ecSlm66018 vdc->instance, dring_msg->seq_num, 4739e1ebb9ecSlm66018 vdc->seq_num_reply, vdc->seq_num, 4740e1ebb9ecSlm66018 vdc->req_id_proc, vdc->req_id); 4741e1ebb9ecSlm66018 return (VDC_SEQ_NUM_INVALID); 47421ae08745Sheppo } 4743e1ebb9ecSlm66018 vdc->seq_num_reply = dring_msg->seq_num; 47441ae08745Sheppo 4745e1ebb9ecSlm66018 if (vdc->req_id_proc < vdc->req_id) 4746e1ebb9ecSlm66018 return (VDC_SEQ_NUM_TODO); 4747e1ebb9ecSlm66018 else 4748e1ebb9ecSlm66018 return (VDC_SEQ_NUM_SKIP); 47491ae08745Sheppo } 47501ae08745Sheppo 47510a55fbb7Slm66018 47520a55fbb7Slm66018 /* 47530a55fbb7Slm66018 * Function: 47540a55fbb7Slm66018 * vdc_is_supported_version() 47550a55fbb7Slm66018 * 47560a55fbb7Slm66018 * Description: 47570a55fbb7Slm66018 * This routine checks if the major/minor version numbers specified in 47580a55fbb7Slm66018 * 'ver_msg' are supported. If not it finds the next version that is 47590a55fbb7Slm66018 * in the supported version list 'vdc_version[]' and sets the fields in 47600a55fbb7Slm66018 * 'ver_msg' to those values 47610a55fbb7Slm66018 * 47620a55fbb7Slm66018 * Arguments: 47630a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 47640a55fbb7Slm66018 * 47650a55fbb7Slm66018 * Return Code: 47660a55fbb7Slm66018 * B_TRUE - Success 47670a55fbb7Slm66018 * B_FALSE - Version not supported 47680a55fbb7Slm66018 */ 47690a55fbb7Slm66018 static boolean_t 47700a55fbb7Slm66018 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 47710a55fbb7Slm66018 { 47720a55fbb7Slm66018 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 47730a55fbb7Slm66018 47740a55fbb7Slm66018 for (int i = 0; i < vdc_num_versions; i++) { 47750a55fbb7Slm66018 ASSERT(vdc_version[i].major > 0); 47760a55fbb7Slm66018 ASSERT((i == 0) || 47770a55fbb7Slm66018 (vdc_version[i].major < vdc_version[i-1].major)); 47780a55fbb7Slm66018 47790a55fbb7Slm66018 /* 47800a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 47810a55fbb7Slm66018 * necessary, down to the highest value supported by this 47820a55fbb7Slm66018 * client. The server should support all minor versions lower 47830a55fbb7Slm66018 * than the value it sent 47840a55fbb7Slm66018 */ 47850a55fbb7Slm66018 if (ver_msg->ver_major == vdc_version[i].major) { 47860a55fbb7Slm66018 if (ver_msg->ver_minor > vdc_version[i].minor) { 47873af08d82Slm66018 DMSGX(0, 47883af08d82Slm66018 "Adjusting minor version from %u to %u", 47890a55fbb7Slm66018 ver_msg->ver_minor, vdc_version[i].minor); 47900a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 47910a55fbb7Slm66018 } 47920a55fbb7Slm66018 return (B_TRUE); 47930a55fbb7Slm66018 } 47940a55fbb7Slm66018 47950a55fbb7Slm66018 /* 47960a55fbb7Slm66018 * If the message contains a higher major version number, set 47970a55fbb7Slm66018 * the message's major/minor versions to the current values 47980a55fbb7Slm66018 * and return false, so this message will get resent with 47990a55fbb7Slm66018 * these values, and the server will potentially try again 48000a55fbb7Slm66018 * with the same or a lower version 48010a55fbb7Slm66018 */ 48020a55fbb7Slm66018 if (ver_msg->ver_major > vdc_version[i].major) { 48030a55fbb7Slm66018 ver_msg->ver_major = vdc_version[i].major; 48040a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 48053af08d82Slm66018 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 48060a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 48070a55fbb7Slm66018 48080a55fbb7Slm66018 return (B_FALSE); 48090a55fbb7Slm66018 } 48100a55fbb7Slm66018 48110a55fbb7Slm66018 /* 48120a55fbb7Slm66018 * Otherwise, the message's major version is less than the 48130a55fbb7Slm66018 * current major version, so continue the loop to the next 48140a55fbb7Slm66018 * (lower) supported version 48150a55fbb7Slm66018 */ 48160a55fbb7Slm66018 } 48170a55fbb7Slm66018 48180a55fbb7Slm66018 /* 48190a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 48200a55fbb7Slm66018 * message to terminate negotiation 48210a55fbb7Slm66018 */ 48220a55fbb7Slm66018 ver_msg->ver_major = 0; 48230a55fbb7Slm66018 ver_msg->ver_minor = 0; 48240a55fbb7Slm66018 48250a55fbb7Slm66018 return (B_FALSE); 48260a55fbb7Slm66018 } 48271ae08745Sheppo /* -------------------------------------------------------------------------- */ 48281ae08745Sheppo 48291ae08745Sheppo /* 48301ae08745Sheppo * DKIO(7) support 48311ae08745Sheppo */ 48321ae08745Sheppo 48331ae08745Sheppo typedef struct vdc_dk_arg { 48341ae08745Sheppo struct dk_callback dkc; 48351ae08745Sheppo int mode; 48361ae08745Sheppo dev_t dev; 48371ae08745Sheppo vdc_t *vdc; 48381ae08745Sheppo } vdc_dk_arg_t; 48391ae08745Sheppo 48401ae08745Sheppo /* 48411ae08745Sheppo * Function: 48421ae08745Sheppo * vdc_dkio_flush_cb() 48431ae08745Sheppo * 48441ae08745Sheppo * Description: 48451ae08745Sheppo * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 48461ae08745Sheppo * by kernel code. 48471ae08745Sheppo * 48481ae08745Sheppo * Arguments: 48491ae08745Sheppo * arg - a pointer to a vdc_dk_arg_t structure. 48501ae08745Sheppo */ 48511ae08745Sheppo void 48521ae08745Sheppo vdc_dkio_flush_cb(void *arg) 48531ae08745Sheppo { 48541ae08745Sheppo struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 48551ae08745Sheppo struct dk_callback *dkc = NULL; 48561ae08745Sheppo vdc_t *vdc = NULL; 48571ae08745Sheppo int rv; 48581ae08745Sheppo 48591ae08745Sheppo if (dk_arg == NULL) { 48603af08d82Slm66018 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 48611ae08745Sheppo return; 48621ae08745Sheppo } 48631ae08745Sheppo dkc = &dk_arg->dkc; 48641ae08745Sheppo vdc = dk_arg->vdc; 48651ae08745Sheppo ASSERT(vdc != NULL); 48661ae08745Sheppo 48673af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 48682f5224aeSachartre VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir, B_TRUE); 48691ae08745Sheppo if (rv != 0) { 48703af08d82Slm66018 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4871e1ebb9ecSlm66018 vdc->instance, rv, 48721ae08745Sheppo ddi_model_convert_from(dk_arg->mode & FMODELS)); 48731ae08745Sheppo } 48741ae08745Sheppo 48751ae08745Sheppo /* 48761ae08745Sheppo * Trigger the call back to notify the caller the the ioctl call has 48771ae08745Sheppo * been completed. 48781ae08745Sheppo */ 48791ae08745Sheppo if ((dk_arg->mode & FKIOCTL) && 48801ae08745Sheppo (dkc != NULL) && 48811ae08745Sheppo (dkc->dkc_callback != NULL)) { 48821ae08745Sheppo ASSERT(dkc->dkc_cookie != NULL); 48838e6a2a04Slm66018 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 48841ae08745Sheppo } 48851ae08745Sheppo 48861ae08745Sheppo /* Indicate that one less DKIO write flush is outstanding */ 48871ae08745Sheppo mutex_enter(&vdc->lock); 48881ae08745Sheppo vdc->dkio_flush_pending--; 48891ae08745Sheppo ASSERT(vdc->dkio_flush_pending >= 0); 48901ae08745Sheppo mutex_exit(&vdc->lock); 48918e6a2a04Slm66018 48928e6a2a04Slm66018 /* free the mem that was allocated when the callback was dispatched */ 48938e6a2a04Slm66018 kmem_free(arg, sizeof (vdc_dk_arg_t)); 48941ae08745Sheppo } 48951ae08745Sheppo 48961ae08745Sheppo /* 489787a7269eSachartre * Function: 48989642afceSachartre * vdc_dkio_gapart() 489987a7269eSachartre * 490087a7269eSachartre * Description: 490187a7269eSachartre * This function implements the DKIOCGAPART ioctl. 490287a7269eSachartre * 490387a7269eSachartre * Arguments: 490478fcd0a1Sachartre * vdc - soft state pointer 490587a7269eSachartre * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 490687a7269eSachartre * flag - ioctl flags 490787a7269eSachartre */ 490887a7269eSachartre static int 49099642afceSachartre vdc_dkio_gapart(vdc_t *vdc, caddr_t arg, int flag) 491087a7269eSachartre { 491178fcd0a1Sachartre struct dk_geom *geom; 491278fcd0a1Sachartre struct vtoc *vtoc; 491387a7269eSachartre union { 491487a7269eSachartre struct dk_map map[NDKMAP]; 491587a7269eSachartre struct dk_map32 map32[NDKMAP]; 491687a7269eSachartre } data; 491787a7269eSachartre int i, rv, size; 491887a7269eSachartre 491978fcd0a1Sachartre mutex_enter(&vdc->lock); 492087a7269eSachartre 492178fcd0a1Sachartre if ((rv = vdc_validate_geometry(vdc)) != 0) { 492278fcd0a1Sachartre mutex_exit(&vdc->lock); 492387a7269eSachartre return (rv); 492478fcd0a1Sachartre } 492587a7269eSachartre 492678fcd0a1Sachartre vtoc = vdc->vtoc; 492778fcd0a1Sachartre geom = vdc->geom; 492887a7269eSachartre 492987a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 493087a7269eSachartre 493178fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 493278fcd0a1Sachartre data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 493378fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 493478fcd0a1Sachartre data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 493587a7269eSachartre } 493687a7269eSachartre size = NDKMAP * sizeof (struct dk_map32); 493787a7269eSachartre 493887a7269eSachartre } else { 493987a7269eSachartre 494078fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 494178fcd0a1Sachartre data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 494278fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 494378fcd0a1Sachartre data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 494487a7269eSachartre } 494587a7269eSachartre size = NDKMAP * sizeof (struct dk_map); 494687a7269eSachartre 494787a7269eSachartre } 494887a7269eSachartre 494978fcd0a1Sachartre mutex_exit(&vdc->lock); 495078fcd0a1Sachartre 495187a7269eSachartre if (ddi_copyout(&data, arg, size, flag) != 0) 495287a7269eSachartre return (EFAULT); 495387a7269eSachartre 495487a7269eSachartre return (0); 495587a7269eSachartre } 495687a7269eSachartre 495787a7269eSachartre /* 495887a7269eSachartre * Function: 49599642afceSachartre * vdc_dkio_partition() 49609642afceSachartre * 49619642afceSachartre * Description: 49629642afceSachartre * This function implements the DKIOCPARTITION ioctl. 49639642afceSachartre * 49649642afceSachartre * Arguments: 49659642afceSachartre * vdc - soft state pointer 49669642afceSachartre * arg - a pointer to a struct partition64 structure 49679642afceSachartre * flag - ioctl flags 49689642afceSachartre */ 49699642afceSachartre static int 49709642afceSachartre vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag) 49719642afceSachartre { 49729642afceSachartre struct partition64 p64; 49739642afceSachartre efi_gpt_t *gpt; 49749642afceSachartre efi_gpe_t *gpe; 49759642afceSachartre vd_efi_dev_t edev; 49769642afceSachartre uint_t partno; 49779642afceSachartre int rv; 49789642afceSachartre 49799642afceSachartre if (ddi_copyin(arg, &p64, sizeof (struct partition64), flag)) { 49809642afceSachartre return (EFAULT); 49819642afceSachartre } 49829642afceSachartre 49839642afceSachartre VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 49849642afceSachartre 49859642afceSachartre if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) { 49869642afceSachartre return (rv); 49879642afceSachartre } 49889642afceSachartre 49899642afceSachartre partno = p64.p_partno; 49909642afceSachartre 49919642afceSachartre if (partno >= gpt->efi_gpt_NumberOfPartitionEntries) { 49929642afceSachartre vd_efi_free(&edev, gpt, gpe); 49939642afceSachartre return (ESRCH); 49949642afceSachartre } 49959642afceSachartre 49969642afceSachartre bcopy(&gpe[partno].efi_gpe_PartitionTypeGUID, &p64.p_type, 49979642afceSachartre sizeof (struct uuid)); 49989642afceSachartre p64.p_start = gpe[partno].efi_gpe_StartingLBA; 49999642afceSachartre p64.p_size = gpe[partno].efi_gpe_EndingLBA - p64.p_start + 1; 50009642afceSachartre 50019642afceSachartre if (ddi_copyout(&p64, arg, sizeof (struct partition64), flag)) { 50029642afceSachartre vd_efi_free(&edev, gpt, gpe); 50039642afceSachartre return (EFAULT); 50049642afceSachartre } 50059642afceSachartre 50069642afceSachartre vd_efi_free(&edev, gpt, gpe); 50079642afceSachartre return (0); 50089642afceSachartre } 50099642afceSachartre 50109642afceSachartre /* 50119642afceSachartre * Function: 501287a7269eSachartre * vdc_dioctl_rwcmd() 501387a7269eSachartre * 501487a7269eSachartre * Description: 501587a7269eSachartre * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 501687a7269eSachartre * for DKC_DIRECT disks to read or write at an absolute disk offset. 501787a7269eSachartre * 501887a7269eSachartre * Arguments: 501987a7269eSachartre * dev - device 502087a7269eSachartre * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 502187a7269eSachartre * flag - ioctl flags 502287a7269eSachartre */ 502387a7269eSachartre static int 502487a7269eSachartre vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) 502587a7269eSachartre { 502687a7269eSachartre struct dadkio_rwcmd32 rwcmd32; 502787a7269eSachartre struct dadkio_rwcmd rwcmd; 502887a7269eSachartre struct iovec aiov; 502987a7269eSachartre struct uio auio; 503087a7269eSachartre int rw, status; 503187a7269eSachartre struct buf *buf; 503287a7269eSachartre 503387a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 503487a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 503587a7269eSachartre sizeof (struct dadkio_rwcmd32), flag)) { 503687a7269eSachartre return (EFAULT); 503787a7269eSachartre } 503887a7269eSachartre rwcmd.cmd = rwcmd32.cmd; 503987a7269eSachartre rwcmd.flags = rwcmd32.flags; 504087a7269eSachartre rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 504187a7269eSachartre rwcmd.buflen = rwcmd32.buflen; 504287a7269eSachartre rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 504387a7269eSachartre } else { 504487a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 504587a7269eSachartre sizeof (struct dadkio_rwcmd), flag)) { 504687a7269eSachartre return (EFAULT); 504787a7269eSachartre } 504887a7269eSachartre } 504987a7269eSachartre 505087a7269eSachartre switch (rwcmd.cmd) { 505187a7269eSachartre case DADKIO_RWCMD_READ: 505287a7269eSachartre rw = B_READ; 505387a7269eSachartre break; 505487a7269eSachartre case DADKIO_RWCMD_WRITE: 505587a7269eSachartre rw = B_WRITE; 505687a7269eSachartre break; 505787a7269eSachartre default: 505887a7269eSachartre return (EINVAL); 505987a7269eSachartre } 506087a7269eSachartre 506187a7269eSachartre bzero((caddr_t)&aiov, sizeof (struct iovec)); 506287a7269eSachartre aiov.iov_base = rwcmd.bufaddr; 506387a7269eSachartre aiov.iov_len = rwcmd.buflen; 506487a7269eSachartre 506587a7269eSachartre bzero((caddr_t)&auio, sizeof (struct uio)); 506687a7269eSachartre auio.uio_iov = &aiov; 506787a7269eSachartre auio.uio_iovcnt = 1; 506887a7269eSachartre auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; 506987a7269eSachartre auio.uio_resid = rwcmd.buflen; 507087a7269eSachartre auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 507187a7269eSachartre 507287a7269eSachartre buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 507387a7269eSachartre bioinit(buf); 507487a7269eSachartre /* 507587a7269eSachartre * We use the private field of buf to specify that this is an 507687a7269eSachartre * I/O using an absolute offset. 507787a7269eSachartre */ 507887a7269eSachartre buf->b_private = (void *)VD_SLICE_NONE; 507987a7269eSachartre 508087a7269eSachartre status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); 508187a7269eSachartre 508287a7269eSachartre biofini(buf); 508387a7269eSachartre kmem_free(buf, sizeof (buf_t)); 508487a7269eSachartre 508587a7269eSachartre return (status); 508687a7269eSachartre } 508787a7269eSachartre 508887a7269eSachartre /* 50892f5224aeSachartre * Allocate a buffer for a VD_OP_SCSICMD operation. The size of the allocated 50902f5224aeSachartre * buffer is returned in alloc_len. 50912f5224aeSachartre */ 50922f5224aeSachartre static vd_scsi_t * 50932f5224aeSachartre vdc_scsi_alloc(int cdb_len, int sense_len, int datain_len, int dataout_len, 50942f5224aeSachartre int *alloc_len) 50952f5224aeSachartre { 50962f5224aeSachartre vd_scsi_t *vd_scsi; 50972f5224aeSachartre int vd_scsi_len = VD_SCSI_SIZE; 50982f5224aeSachartre 50992f5224aeSachartre vd_scsi_len += P2ROUNDUP(cdb_len, sizeof (uint64_t)); 51002f5224aeSachartre vd_scsi_len += P2ROUNDUP(sense_len, sizeof (uint64_t)); 51012f5224aeSachartre vd_scsi_len += P2ROUNDUP(datain_len, sizeof (uint64_t)); 51022f5224aeSachartre vd_scsi_len += P2ROUNDUP(dataout_len, sizeof (uint64_t)); 51032f5224aeSachartre 51042f5224aeSachartre ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); 51052f5224aeSachartre 51062f5224aeSachartre vd_scsi = kmem_zalloc(vd_scsi_len, KM_SLEEP); 51072f5224aeSachartre 51082f5224aeSachartre vd_scsi->cdb_len = cdb_len; 51092f5224aeSachartre vd_scsi->sense_len = sense_len; 51102f5224aeSachartre vd_scsi->datain_len = datain_len; 51112f5224aeSachartre vd_scsi->dataout_len = dataout_len; 51122f5224aeSachartre 51132f5224aeSachartre *alloc_len = vd_scsi_len; 51142f5224aeSachartre 51152f5224aeSachartre return (vd_scsi); 51162f5224aeSachartre } 51172f5224aeSachartre 51182f5224aeSachartre /* 51192f5224aeSachartre * Convert the status of a SCSI command to a Solaris return code. 51202f5224aeSachartre * 51212f5224aeSachartre * Arguments: 51222f5224aeSachartre * vd_scsi - The SCSI operation buffer. 51232f5224aeSachartre * log_error - indicate if an error message should be logged. 51242f5224aeSachartre * 51252f5224aeSachartre * Note that our SCSI error messages are rather primitive for the moment 51262f5224aeSachartre * and could be improved by decoding some data like the SCSI command and 51272f5224aeSachartre * the sense key. 51282f5224aeSachartre * 51292f5224aeSachartre * Return value: 51302f5224aeSachartre * 0 - Status is good. 51312f5224aeSachartre * EACCES - Status reports a reservation conflict. 51322f5224aeSachartre * ENOTSUP - Status reports a check condition and sense key 51332f5224aeSachartre * reports an illegal request. 51342f5224aeSachartre * EIO - Any other status. 51352f5224aeSachartre */ 51362f5224aeSachartre static int 51372f5224aeSachartre vdc_scsi_status(vdc_t *vdc, vd_scsi_t *vd_scsi, boolean_t log_error) 51382f5224aeSachartre { 51392f5224aeSachartre int rv; 51402f5224aeSachartre char path_str[MAXPATHLEN]; 51412f5224aeSachartre char panic_str[VDC_RESV_CONFLICT_FMT_LEN + MAXPATHLEN]; 51422f5224aeSachartre union scsi_cdb *cdb; 51432f5224aeSachartre struct scsi_extended_sense *sense; 51442f5224aeSachartre 51452f5224aeSachartre if (vd_scsi->cmd_status == STATUS_GOOD) 51462f5224aeSachartre /* no error */ 51472f5224aeSachartre return (0); 51482f5224aeSachartre 51492f5224aeSachartre /* when the tunable vdc_scsi_log_error is true we log all errors */ 51502f5224aeSachartre if (vdc_scsi_log_error) 51512f5224aeSachartre log_error = B_TRUE; 51522f5224aeSachartre 51532f5224aeSachartre if (log_error) { 51542f5224aeSachartre cmn_err(CE_WARN, "%s (vdc%d):\tError for Command: 0x%x)\n", 51552f5224aeSachartre ddi_pathname(vdc->dip, path_str), vdc->instance, 51562f5224aeSachartre GETCMD(VD_SCSI_DATA_CDB(vd_scsi))); 51572f5224aeSachartre } 51582f5224aeSachartre 51592f5224aeSachartre /* default returned value */ 51602f5224aeSachartre rv = EIO; 51612f5224aeSachartre 51622f5224aeSachartre switch (vd_scsi->cmd_status) { 51632f5224aeSachartre 51642f5224aeSachartre case STATUS_CHECK: 51652f5224aeSachartre case STATUS_TERMINATED: 51662f5224aeSachartre if (log_error) 51672f5224aeSachartre cmn_err(CE_CONT, "\tCheck Condition Error\n"); 51682f5224aeSachartre 51692f5224aeSachartre /* check sense buffer */ 51702f5224aeSachartre if (vd_scsi->sense_len == 0 || 51712f5224aeSachartre vd_scsi->sense_status != STATUS_GOOD) { 51722f5224aeSachartre if (log_error) 51732f5224aeSachartre cmn_err(CE_CONT, "\tNo Sense Data Available\n"); 51742f5224aeSachartre break; 51752f5224aeSachartre } 51762f5224aeSachartre 51772f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 51782f5224aeSachartre 51792f5224aeSachartre if (log_error) { 51802f5224aeSachartre cmn_err(CE_CONT, "\tSense Key: 0x%x\n" 51812f5224aeSachartre "\tASC: 0x%x, ASCQ: 0x%x\n", 51822f5224aeSachartre scsi_sense_key((uint8_t *)sense), 51832f5224aeSachartre scsi_sense_asc((uint8_t *)sense), 51842f5224aeSachartre scsi_sense_ascq((uint8_t *)sense)); 51852f5224aeSachartre } 51862f5224aeSachartre 51872f5224aeSachartre if (scsi_sense_key((uint8_t *)sense) == KEY_ILLEGAL_REQUEST) 51882f5224aeSachartre rv = ENOTSUP; 51892f5224aeSachartre break; 51902f5224aeSachartre 51912f5224aeSachartre case STATUS_BUSY: 51922f5224aeSachartre if (log_error) 51932f5224aeSachartre cmn_err(CE_NOTE, "\tDevice Busy\n"); 51942f5224aeSachartre break; 51952f5224aeSachartre 51962f5224aeSachartre case STATUS_RESERVATION_CONFLICT: 51972f5224aeSachartre /* 51982f5224aeSachartre * If the command was PERSISTENT_RESERVATION_[IN|OUT] then 51992f5224aeSachartre * reservation conflict could be due to various reasons like 52002f5224aeSachartre * incorrect keys, not registered or not reserved etc. So, 52012f5224aeSachartre * we should not panic in that case. 52022f5224aeSachartre */ 52032f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 52042f5224aeSachartre if (vdc->failfast_interval != 0 && 52052f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_IN && 52062f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_OUT) { 52072f5224aeSachartre /* failfast is enabled so we have to panic */ 52082f5224aeSachartre (void) snprintf(panic_str, sizeof (panic_str), 52092f5224aeSachartre VDC_RESV_CONFLICT_FMT_STR "%s", 52102f5224aeSachartre ddi_pathname(vdc->dip, path_str)); 52112f5224aeSachartre panic(panic_str); 52122f5224aeSachartre } 52132f5224aeSachartre if (log_error) 52142f5224aeSachartre cmn_err(CE_NOTE, "\tReservation Conflict\n"); 52152f5224aeSachartre rv = EACCES; 52162f5224aeSachartre break; 52172f5224aeSachartre 52182f5224aeSachartre case STATUS_QFULL: 52192f5224aeSachartre if (log_error) 52202f5224aeSachartre cmn_err(CE_NOTE, "\tQueue Full\n"); 52212f5224aeSachartre break; 52222f5224aeSachartre 52232f5224aeSachartre case STATUS_MET: 52242f5224aeSachartre case STATUS_INTERMEDIATE: 52252f5224aeSachartre case STATUS_SCSI2: 52262f5224aeSachartre case STATUS_INTERMEDIATE_MET: 52272f5224aeSachartre case STATUS_ACA_ACTIVE: 52282f5224aeSachartre if (log_error) 52292f5224aeSachartre cmn_err(CE_CONT, 52302f5224aeSachartre "\tUnexpected SCSI status received: 0x%x\n", 52312f5224aeSachartre vd_scsi->cmd_status); 52322f5224aeSachartre break; 52332f5224aeSachartre 52342f5224aeSachartre default: 52352f5224aeSachartre if (log_error) 52362f5224aeSachartre cmn_err(CE_CONT, 52372f5224aeSachartre "\tInvalid SCSI status received: 0x%x\n", 52382f5224aeSachartre vd_scsi->cmd_status); 52392f5224aeSachartre break; 52402f5224aeSachartre } 52412f5224aeSachartre 52422f5224aeSachartre return (rv); 52432f5224aeSachartre } 52442f5224aeSachartre 52452f5224aeSachartre /* 52462f5224aeSachartre * Implemented the USCSICMD uscsi(7I) ioctl. This ioctl is converted to 52472f5224aeSachartre * a VD_OP_SCSICMD operation which is sent to the vdisk server. If a SCSI 52482f5224aeSachartre * reset is requested (i.e. a flag USCSI_RESET* is set) then the ioctl is 52492f5224aeSachartre * converted to a VD_OP_RESET operation. 52502f5224aeSachartre */ 52512f5224aeSachartre static int 52522f5224aeSachartre vdc_uscsi_cmd(vdc_t *vdc, caddr_t arg, int mode) 52532f5224aeSachartre { 52542f5224aeSachartre struct uscsi_cmd uscsi; 52552f5224aeSachartre struct uscsi_cmd32 uscsi32; 52562f5224aeSachartre vd_scsi_t *vd_scsi; 52572f5224aeSachartre int vd_scsi_len; 52582f5224aeSachartre union scsi_cdb *cdb; 52592f5224aeSachartre struct scsi_extended_sense *sense; 52602f5224aeSachartre char *datain, *dataout; 52612f5224aeSachartre size_t cdb_len, datain_len, dataout_len, sense_len; 52622f5224aeSachartre int rv; 52632f5224aeSachartre 52642f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 52652f5224aeSachartre if (ddi_copyin(arg, &uscsi32, sizeof (struct uscsi_cmd32), 52662f5224aeSachartre mode) != 0) 52672f5224aeSachartre return (EFAULT); 52682f5224aeSachartre uscsi_cmd32touscsi_cmd((&uscsi32), (&uscsi)); 52692f5224aeSachartre } else { 52702f5224aeSachartre if (ddi_copyin(arg, &uscsi, sizeof (struct uscsi_cmd), 52712f5224aeSachartre mode) != 0) 52722f5224aeSachartre return (EFAULT); 52732f5224aeSachartre } 52742f5224aeSachartre 52752f5224aeSachartre /* a uscsi reset is converted to a VD_OP_RESET operation */ 52762f5224aeSachartre if (uscsi.uscsi_flags & (USCSI_RESET | USCSI_RESET_LUN | 52772f5224aeSachartre USCSI_RESET_ALL)) { 52782f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_RESET, NULL, 0, 0, 0, CB_SYNC, 52792f5224aeSachartre (void *)(uint64_t)mode, VIO_both_dir, B_TRUE); 52802f5224aeSachartre return (rv); 52812f5224aeSachartre } 52822f5224aeSachartre 52832f5224aeSachartre /* cdb buffer length */ 52842f5224aeSachartre cdb_len = uscsi.uscsi_cdblen; 52852f5224aeSachartre 52862f5224aeSachartre /* data in and out buffers length */ 52872f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 52882f5224aeSachartre datain_len = uscsi.uscsi_buflen; 52892f5224aeSachartre dataout_len = 0; 52902f5224aeSachartre } else { 52912f5224aeSachartre datain_len = 0; 52922f5224aeSachartre dataout_len = uscsi.uscsi_buflen; 52932f5224aeSachartre } 52942f5224aeSachartre 52952f5224aeSachartre /* sense buffer length */ 52962f5224aeSachartre if (uscsi.uscsi_flags & USCSI_RQENABLE) 52972f5224aeSachartre sense_len = uscsi.uscsi_rqlen; 52982f5224aeSachartre else 52992f5224aeSachartre sense_len = 0; 53002f5224aeSachartre 53012f5224aeSachartre /* allocate buffer for the VD_SCSICMD_OP operation */ 53022f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 53032f5224aeSachartre &vd_scsi_len); 53042f5224aeSachartre 53052f5224aeSachartre /* 53062f5224aeSachartre * The documentation of USCSI_ISOLATE and USCSI_DIAGNOSE is very vague, 53072f5224aeSachartre * but basically they prevent a SCSI command from being retried in case 53082f5224aeSachartre * of an error. 53092f5224aeSachartre */ 53102f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_ISOLATE) || 53112f5224aeSachartre (uscsi.uscsi_flags & USCSI_DIAGNOSE)) 53122f5224aeSachartre vd_scsi->options |= VD_SCSI_OPT_NORETRY; 53132f5224aeSachartre 53142f5224aeSachartre /* set task attribute */ 53152f5224aeSachartre if (uscsi.uscsi_flags & USCSI_NOTAG) { 53162f5224aeSachartre vd_scsi->task_attribute = 0; 53172f5224aeSachartre } else { 53182f5224aeSachartre if (uscsi.uscsi_flags & USCSI_HEAD) 53192f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 53202f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_HTAG) 53212f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_HQUEUE; 53222f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_OTAG) 53232f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ORDERED; 53242f5224aeSachartre else 53252f5224aeSachartre vd_scsi->task_attribute = 0; 53262f5224aeSachartre } 53272f5224aeSachartre 53282f5224aeSachartre /* set timeout */ 53292f5224aeSachartre vd_scsi->timeout = uscsi.uscsi_timeout; 53302f5224aeSachartre 53312f5224aeSachartre /* copy-in cdb data */ 53322f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 53332f5224aeSachartre if (ddi_copyin(uscsi.uscsi_cdb, cdb, cdb_len, mode) != 0) { 53342f5224aeSachartre rv = EFAULT; 53352f5224aeSachartre goto done; 53362f5224aeSachartre } 53372f5224aeSachartre 53382f5224aeSachartre /* keep a pointer to the sense buffer */ 53392f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 53402f5224aeSachartre 53412f5224aeSachartre /* keep a pointer to the data-in buffer */ 53422f5224aeSachartre datain = (char *)VD_SCSI_DATA_IN(vd_scsi); 53432f5224aeSachartre 53442f5224aeSachartre /* copy-in request data to the data-out buffer */ 53452f5224aeSachartre dataout = (char *)VD_SCSI_DATA_OUT(vd_scsi); 53462f5224aeSachartre if (!(uscsi.uscsi_flags & USCSI_READ)) { 53472f5224aeSachartre if (ddi_copyin(uscsi.uscsi_bufaddr, dataout, dataout_len, 53482f5224aeSachartre mode)) { 53492f5224aeSachartre rv = EFAULT; 53502f5224aeSachartre goto done; 53512f5224aeSachartre } 53522f5224aeSachartre } 53532f5224aeSachartre 53542f5224aeSachartre /* submit the request */ 53552f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 53562f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 53572f5224aeSachartre 53582f5224aeSachartre if (rv != 0) 53592f5224aeSachartre goto done; 53602f5224aeSachartre 53612f5224aeSachartre /* update scsi status */ 53622f5224aeSachartre uscsi.uscsi_status = vd_scsi->cmd_status; 53632f5224aeSachartre 53642f5224aeSachartre /* update sense data */ 53652f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_RQENABLE) && 53662f5224aeSachartre (uscsi.uscsi_status == STATUS_CHECK || 53672f5224aeSachartre uscsi.uscsi_status == STATUS_TERMINATED)) { 53682f5224aeSachartre 53692f5224aeSachartre uscsi.uscsi_rqstatus = vd_scsi->sense_status; 53702f5224aeSachartre 53712f5224aeSachartre if (uscsi.uscsi_rqstatus == STATUS_GOOD) { 53722f5224aeSachartre uscsi.uscsi_rqresid = uscsi.uscsi_rqlen - 53732f5224aeSachartre vd_scsi->sense_len; 53742f5224aeSachartre if (ddi_copyout(sense, uscsi.uscsi_rqbuf, 53752f5224aeSachartre vd_scsi->sense_len, mode) != 0) { 53762f5224aeSachartre rv = EFAULT; 53772f5224aeSachartre goto done; 53782f5224aeSachartre } 53792f5224aeSachartre } 53802f5224aeSachartre } 53812f5224aeSachartre 53822f5224aeSachartre /* update request data */ 53832f5224aeSachartre if (uscsi.uscsi_status == STATUS_GOOD) { 53842f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 53852f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 53862f5224aeSachartre vd_scsi->datain_len; 53872f5224aeSachartre if (ddi_copyout(datain, uscsi.uscsi_bufaddr, 53882f5224aeSachartre vd_scsi->datain_len, mode) != 0) { 53892f5224aeSachartre rv = EFAULT; 53902f5224aeSachartre goto done; 53912f5224aeSachartre } 53922f5224aeSachartre } else { 53932f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 53942f5224aeSachartre vd_scsi->dataout_len; 53952f5224aeSachartre } 53962f5224aeSachartre } 53972f5224aeSachartre 53982f5224aeSachartre /* copy-out result */ 53992f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 54002f5224aeSachartre uscsi_cmdtouscsi_cmd32((&uscsi), (&uscsi32)); 54012f5224aeSachartre if (ddi_copyout(&uscsi32, arg, sizeof (struct uscsi_cmd32), 54022f5224aeSachartre mode) != 0) { 54032f5224aeSachartre rv = EFAULT; 54042f5224aeSachartre goto done; 54052f5224aeSachartre } 54062f5224aeSachartre } else { 54072f5224aeSachartre if (ddi_copyout(&uscsi, arg, sizeof (struct uscsi_cmd), 54082f5224aeSachartre mode) != 0) { 54092f5224aeSachartre rv = EFAULT; 54102f5224aeSachartre goto done; 54112f5224aeSachartre } 54122f5224aeSachartre } 54132f5224aeSachartre 54142f5224aeSachartre /* get the return code from the SCSI command status */ 54152f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, 54162f5224aeSachartre !(uscsi.uscsi_flags & USCSI_SILENT)); 54172f5224aeSachartre 54182f5224aeSachartre done: 54192f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 54202f5224aeSachartre return (rv); 54212f5224aeSachartre } 54222f5224aeSachartre 54232f5224aeSachartre /* 54242f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT IN command. 54252f5224aeSachartre * 54262f5224aeSachartre * Arguments: 54272f5224aeSachartre * cmd - SCSI PERSISTENT IN command 54282f5224aeSachartre * len - length of the SCSI input buffer 54292f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 54302f5224aeSachartre * 54312f5224aeSachartre * Returned Value: 54322f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 54332f5224aeSachartre */ 54342f5224aeSachartre static vd_scsi_t * 54352f5224aeSachartre vdc_scsi_alloc_persistent_in(uchar_t cmd, int len, int *vd_scsi_len) 54362f5224aeSachartre { 54372f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 54382f5224aeSachartre vd_scsi_t *vd_scsi; 54392f5224aeSachartre union scsi_cdb *cdb; 54402f5224aeSachartre 54412f5224aeSachartre cdb_len = CDB_GROUP1; 54422f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 54432f5224aeSachartre datain_len = len; 54442f5224aeSachartre dataout_len = 0; 54452f5224aeSachartre 54462f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 54472f5224aeSachartre vd_scsi_len); 54482f5224aeSachartre 54492f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 54502f5224aeSachartre 54512f5224aeSachartre /* set cdb */ 54522f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_IN; 54532f5224aeSachartre cdb->cdb_opaque[1] = cmd; 54542f5224aeSachartre FORMG1COUNT(cdb, datain_len); 54552f5224aeSachartre 54562f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 54572f5224aeSachartre 54582f5224aeSachartre return (vd_scsi); 54592f5224aeSachartre } 54602f5224aeSachartre 54612f5224aeSachartre /* 54622f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT OUT command. 54632f5224aeSachartre * 54642f5224aeSachartre * Arguments: 54652f5224aeSachartre * cmd - SCSI PERSISTENT OUT command 54662f5224aeSachartre * len - length of the SCSI output buffer 54672f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 54682f5224aeSachartre * 54692f5224aeSachartre * Returned Code: 54702f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 54712f5224aeSachartre */ 54722f5224aeSachartre static vd_scsi_t * 54732f5224aeSachartre vdc_scsi_alloc_persistent_out(uchar_t cmd, int len, int *vd_scsi_len) 54742f5224aeSachartre { 54752f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 54762f5224aeSachartre vd_scsi_t *vd_scsi; 54772f5224aeSachartre union scsi_cdb *cdb; 54782f5224aeSachartre 54792f5224aeSachartre cdb_len = CDB_GROUP1; 54802f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 54812f5224aeSachartre datain_len = 0; 54822f5224aeSachartre dataout_len = len; 54832f5224aeSachartre 54842f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 54852f5224aeSachartre vd_scsi_len); 54862f5224aeSachartre 54872f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 54882f5224aeSachartre 54892f5224aeSachartre /* set cdb */ 54902f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_OUT; 54912f5224aeSachartre cdb->cdb_opaque[1] = cmd; 54922f5224aeSachartre FORMG1COUNT(cdb, dataout_len); 54932f5224aeSachartre 54942f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 54952f5224aeSachartre 54962f5224aeSachartre return (vd_scsi); 54972f5224aeSachartre } 54982f5224aeSachartre 54992f5224aeSachartre /* 55002f5224aeSachartre * Implement the MHIOCGRP_INKEYS mhd(7i) ioctl. The ioctl is converted 55012f5224aeSachartre * to a SCSI PERSISTENT IN READ KEYS command which is sent to the vdisk 55022f5224aeSachartre * server with a VD_OP_SCSICMD operation. 55032f5224aeSachartre */ 55042f5224aeSachartre static int 55052f5224aeSachartre vdc_mhd_inkeys(vdc_t *vdc, caddr_t arg, int mode) 55062f5224aeSachartre { 55072f5224aeSachartre vd_scsi_t *vd_scsi; 55082f5224aeSachartre mhioc_inkeys_t inkeys; 55092f5224aeSachartre mhioc_key_list_t klist; 55102f5224aeSachartre struct mhioc_inkeys32 inkeys32; 55112f5224aeSachartre struct mhioc_key_list32 klist32; 55122f5224aeSachartre sd_prin_readkeys_t *scsi_keys; 55132f5224aeSachartre void *user_keys; 55142f5224aeSachartre int vd_scsi_len; 55152f5224aeSachartre int listsize, listlen, rv; 55162f5224aeSachartre 55172f5224aeSachartre /* copyin arguments */ 55182f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 55192f5224aeSachartre rv = ddi_copyin(arg, &inkeys32, sizeof (inkeys32), mode); 55202f5224aeSachartre if (rv != 0) 55212f5224aeSachartre return (EFAULT); 55222f5224aeSachartre 55232f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inkeys32.li, &klist32, 55242f5224aeSachartre sizeof (klist32), mode); 55252f5224aeSachartre if (rv != 0) 55262f5224aeSachartre return (EFAULT); 55272f5224aeSachartre 55282f5224aeSachartre listsize = klist32.listsize; 55292f5224aeSachartre } else { 55302f5224aeSachartre rv = ddi_copyin(arg, &inkeys, sizeof (inkeys), mode); 55312f5224aeSachartre if (rv != 0) 55322f5224aeSachartre return (EFAULT); 55332f5224aeSachartre 55342f5224aeSachartre rv = ddi_copyin(inkeys.li, &klist, sizeof (klist), mode); 55352f5224aeSachartre if (rv != 0) 55362f5224aeSachartre return (EFAULT); 55372f5224aeSachartre 55382f5224aeSachartre listsize = klist.listsize; 55392f5224aeSachartre } 55402f5224aeSachartre 55412f5224aeSachartre /* build SCSI VD_OP request */ 55422f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_KEYS, 55432f5224aeSachartre sizeof (sd_prin_readkeys_t) - sizeof (caddr_t) + 55442f5224aeSachartre (sizeof (mhioc_resv_key_t) * listsize), &vd_scsi_len); 55452f5224aeSachartre 55462f5224aeSachartre scsi_keys = (sd_prin_readkeys_t *)VD_SCSI_DATA_IN(vd_scsi); 55472f5224aeSachartre 55482f5224aeSachartre /* submit the request */ 55492f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 55502f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 55512f5224aeSachartre 55522f5224aeSachartre if (rv != 0) 55532f5224aeSachartre goto done; 55542f5224aeSachartre 55552f5224aeSachartre listlen = scsi_keys->len / MHIOC_RESV_KEY_SIZE; 55562f5224aeSachartre 55572f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 55582f5224aeSachartre inkeys32.generation = scsi_keys->generation; 55592f5224aeSachartre rv = ddi_copyout(&inkeys32, arg, sizeof (inkeys32), mode); 55602f5224aeSachartre if (rv != 0) { 55612f5224aeSachartre rv = EFAULT; 55622f5224aeSachartre goto done; 55632f5224aeSachartre } 55642f5224aeSachartre 55652f5224aeSachartre klist32.listlen = listlen; 55662f5224aeSachartre rv = ddi_copyout(&klist32, (caddr_t)(uintptr_t)inkeys32.li, 55672f5224aeSachartre sizeof (klist32), mode); 55682f5224aeSachartre if (rv != 0) { 55692f5224aeSachartre rv = EFAULT; 55702f5224aeSachartre goto done; 55712f5224aeSachartre } 55722f5224aeSachartre 55732f5224aeSachartre user_keys = (caddr_t)(uintptr_t)klist32.list; 55742f5224aeSachartre } else { 55752f5224aeSachartre inkeys.generation = scsi_keys->generation; 55762f5224aeSachartre rv = ddi_copyout(&inkeys, arg, sizeof (inkeys), mode); 55772f5224aeSachartre if (rv != 0) { 55782f5224aeSachartre rv = EFAULT; 55792f5224aeSachartre goto done; 55802f5224aeSachartre } 55812f5224aeSachartre 55822f5224aeSachartre klist.listlen = listlen; 55832f5224aeSachartre rv = ddi_copyout(&klist, inkeys.li, sizeof (klist), mode); 55842f5224aeSachartre if (rv != 0) { 55852f5224aeSachartre rv = EFAULT; 55862f5224aeSachartre goto done; 55872f5224aeSachartre } 55882f5224aeSachartre 55892f5224aeSachartre user_keys = klist.list; 55902f5224aeSachartre } 55912f5224aeSachartre 55922f5224aeSachartre /* copy out keys */ 55932f5224aeSachartre if (listlen > 0 && listsize > 0) { 55942f5224aeSachartre if (listsize < listlen) 55952f5224aeSachartre listlen = listsize; 55962f5224aeSachartre rv = ddi_copyout(&scsi_keys->keylist, user_keys, 55972f5224aeSachartre listlen * MHIOC_RESV_KEY_SIZE, mode); 55982f5224aeSachartre if (rv != 0) 55992f5224aeSachartre rv = EFAULT; 56002f5224aeSachartre } 56012f5224aeSachartre 56022f5224aeSachartre if (rv == 0) 56032f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 56042f5224aeSachartre 56052f5224aeSachartre done: 56062f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 56072f5224aeSachartre 56082f5224aeSachartre return (rv); 56092f5224aeSachartre } 56102f5224aeSachartre 56112f5224aeSachartre /* 56122f5224aeSachartre * Implement the MHIOCGRP_INRESV mhd(7i) ioctl. The ioctl is converted 56132f5224aeSachartre * to a SCSI PERSISTENT IN READ RESERVATION command which is sent to 56142f5224aeSachartre * the vdisk server with a VD_OP_SCSICMD operation. 56152f5224aeSachartre */ 56162f5224aeSachartre static int 56172f5224aeSachartre vdc_mhd_inresv(vdc_t *vdc, caddr_t arg, int mode) 56182f5224aeSachartre { 56192f5224aeSachartre vd_scsi_t *vd_scsi; 56202f5224aeSachartre mhioc_inresvs_t inresv; 56212f5224aeSachartre mhioc_resv_desc_list_t rlist; 56222f5224aeSachartre struct mhioc_inresvs32 inresv32; 56232f5224aeSachartre struct mhioc_resv_desc_list32 rlist32; 56242f5224aeSachartre mhioc_resv_desc_t mhd_resv; 56252f5224aeSachartre sd_prin_readresv_t *scsi_resv; 56262f5224aeSachartre sd_readresv_desc_t *resv; 56272f5224aeSachartre mhioc_resv_desc_t *user_resv; 56282f5224aeSachartre int vd_scsi_len; 56292f5224aeSachartre int listsize, listlen, i, rv; 56302f5224aeSachartre 56312f5224aeSachartre /* copyin arguments */ 56322f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 56332f5224aeSachartre rv = ddi_copyin(arg, &inresv32, sizeof (inresv32), mode); 56342f5224aeSachartre if (rv != 0) 56352f5224aeSachartre return (EFAULT); 56362f5224aeSachartre 56372f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inresv32.li, &rlist32, 56382f5224aeSachartre sizeof (rlist32), mode); 56392f5224aeSachartre if (rv != 0) 56402f5224aeSachartre return (EFAULT); 56412f5224aeSachartre 56422f5224aeSachartre listsize = rlist32.listsize; 56432f5224aeSachartre } else { 56442f5224aeSachartre rv = ddi_copyin(arg, &inresv, sizeof (inresv), mode); 56452f5224aeSachartre if (rv != 0) 56462f5224aeSachartre return (EFAULT); 56472f5224aeSachartre 56482f5224aeSachartre rv = ddi_copyin(inresv.li, &rlist, sizeof (rlist), mode); 56492f5224aeSachartre if (rv != 0) 56502f5224aeSachartre return (EFAULT); 56512f5224aeSachartre 56522f5224aeSachartre listsize = rlist.listsize; 56532f5224aeSachartre } 56542f5224aeSachartre 56552f5224aeSachartre /* build SCSI VD_OP request */ 56562f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_RESV, 56572f5224aeSachartre sizeof (sd_prin_readresv_t) - sizeof (caddr_t) + 56582f5224aeSachartre (SCSI3_RESV_DESC_LEN * listsize), &vd_scsi_len); 56592f5224aeSachartre 56602f5224aeSachartre scsi_resv = (sd_prin_readresv_t *)VD_SCSI_DATA_IN(vd_scsi); 56612f5224aeSachartre 56622f5224aeSachartre /* submit the request */ 56632f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 56642f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 56652f5224aeSachartre 56662f5224aeSachartre if (rv != 0) 56672f5224aeSachartre goto done; 56682f5224aeSachartre 56692f5224aeSachartre listlen = scsi_resv->len / SCSI3_RESV_DESC_LEN; 56702f5224aeSachartre 56712f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 56722f5224aeSachartre inresv32.generation = scsi_resv->generation; 56732f5224aeSachartre rv = ddi_copyout(&inresv32, arg, sizeof (inresv32), mode); 56742f5224aeSachartre if (rv != 0) { 56752f5224aeSachartre rv = EFAULT; 56762f5224aeSachartre goto done; 56772f5224aeSachartre } 56782f5224aeSachartre 56792f5224aeSachartre rlist32.listlen = listlen; 56802f5224aeSachartre rv = ddi_copyout(&rlist32, (caddr_t)(uintptr_t)inresv32.li, 56812f5224aeSachartre sizeof (rlist32), mode); 56822f5224aeSachartre if (rv != 0) { 56832f5224aeSachartre rv = EFAULT; 56842f5224aeSachartre goto done; 56852f5224aeSachartre } 56862f5224aeSachartre 56872f5224aeSachartre user_resv = (mhioc_resv_desc_t *)(uintptr_t)rlist32.list; 56882f5224aeSachartre } else { 56892f5224aeSachartre inresv.generation = scsi_resv->generation; 56902f5224aeSachartre rv = ddi_copyout(&inresv, arg, sizeof (inresv), mode); 56912f5224aeSachartre if (rv != 0) { 56922f5224aeSachartre rv = EFAULT; 56932f5224aeSachartre goto done; 56942f5224aeSachartre } 56952f5224aeSachartre 56962f5224aeSachartre rlist.listlen = listlen; 56972f5224aeSachartre rv = ddi_copyout(&rlist, inresv.li, sizeof (rlist), mode); 56982f5224aeSachartre if (rv != 0) { 56992f5224aeSachartre rv = EFAULT; 57002f5224aeSachartre goto done; 57012f5224aeSachartre } 57022f5224aeSachartre 57032f5224aeSachartre user_resv = rlist.list; 57042f5224aeSachartre } 57052f5224aeSachartre 57062f5224aeSachartre /* copy out reservations */ 57072f5224aeSachartre if (listsize > 0 && listlen > 0) { 57082f5224aeSachartre if (listsize < listlen) 57092f5224aeSachartre listlen = listsize; 57102f5224aeSachartre resv = (sd_readresv_desc_t *)&scsi_resv->readresv_desc; 57112f5224aeSachartre 57122f5224aeSachartre for (i = 0; i < listlen; i++) { 57132f5224aeSachartre mhd_resv.type = resv->type; 57142f5224aeSachartre mhd_resv.scope = resv->scope; 57152f5224aeSachartre mhd_resv.scope_specific_addr = 57162f5224aeSachartre BE_32(resv->scope_specific_addr); 57172f5224aeSachartre bcopy(&resv->resvkey, &mhd_resv.key, 57182f5224aeSachartre MHIOC_RESV_KEY_SIZE); 57192f5224aeSachartre 57202f5224aeSachartre rv = ddi_copyout(&mhd_resv, user_resv, 57212f5224aeSachartre sizeof (mhd_resv), mode); 57222f5224aeSachartre if (rv != 0) { 57232f5224aeSachartre rv = EFAULT; 57242f5224aeSachartre goto done; 57252f5224aeSachartre } 57262f5224aeSachartre resv++; 57272f5224aeSachartre user_resv++; 57282f5224aeSachartre } 57292f5224aeSachartre } 57302f5224aeSachartre 57312f5224aeSachartre if (rv == 0) 57322f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 57332f5224aeSachartre 57342f5224aeSachartre done: 57352f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 57362f5224aeSachartre return (rv); 57372f5224aeSachartre } 57382f5224aeSachartre 57392f5224aeSachartre /* 57402f5224aeSachartre * Implement the MHIOCGRP_REGISTER mhd(7i) ioctl. The ioctl is converted 57412f5224aeSachartre * to a SCSI PERSISTENT OUT REGISTER command which is sent to the vdisk 57422f5224aeSachartre * server with a VD_OP_SCSICMD operation. 57432f5224aeSachartre */ 57442f5224aeSachartre static int 57452f5224aeSachartre vdc_mhd_register(vdc_t *vdc, caddr_t arg, int mode) 57462f5224aeSachartre { 57472f5224aeSachartre vd_scsi_t *vd_scsi; 57482f5224aeSachartre sd_prout_t *scsi_prout; 57492f5224aeSachartre mhioc_register_t mhd_reg; 57502f5224aeSachartre int vd_scsi_len, rv; 57512f5224aeSachartre 57522f5224aeSachartre /* copyin arguments */ 57532f5224aeSachartre rv = ddi_copyin(arg, &mhd_reg, sizeof (mhd_reg), mode); 57542f5224aeSachartre if (rv != 0) 57552f5224aeSachartre return (EFAULT); 57562f5224aeSachartre 57572f5224aeSachartre /* build SCSI VD_OP request */ 57582f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTER, 57592f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 57602f5224aeSachartre 57612f5224aeSachartre /* set parameters */ 57622f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 57632f5224aeSachartre bcopy(mhd_reg.oldkey.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 57642f5224aeSachartre bcopy(mhd_reg.newkey.key, scsi_prout->service_key, MHIOC_RESV_KEY_SIZE); 57652f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_reg.aptpl; 57662f5224aeSachartre 57672f5224aeSachartre /* submit the request */ 57682f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 57692f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 57702f5224aeSachartre 57712f5224aeSachartre if (rv == 0) 57722f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 57732f5224aeSachartre 57742f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 57752f5224aeSachartre return (rv); 57762f5224aeSachartre } 57772f5224aeSachartre 57782f5224aeSachartre /* 57792f5224aeSachartre * Implement the MHIOCGRP_RESERVE mhd(7i) ioctl. The ioctl is converted 57802f5224aeSachartre * to a SCSI PERSISTENT OUT RESERVE command which is sent to the vdisk 57812f5224aeSachartre * server with a VD_OP_SCSICMD operation. 57822f5224aeSachartre */ 57832f5224aeSachartre static int 57842f5224aeSachartre vdc_mhd_reserve(vdc_t *vdc, caddr_t arg, int mode) 57852f5224aeSachartre { 57862f5224aeSachartre union scsi_cdb *cdb; 57872f5224aeSachartre vd_scsi_t *vd_scsi; 57882f5224aeSachartre sd_prout_t *scsi_prout; 57892f5224aeSachartre mhioc_resv_desc_t mhd_resv; 57902f5224aeSachartre int vd_scsi_len, rv; 57912f5224aeSachartre 57922f5224aeSachartre /* copyin arguments */ 57932f5224aeSachartre rv = ddi_copyin(arg, &mhd_resv, sizeof (mhd_resv), mode); 57942f5224aeSachartre if (rv != 0) 57952f5224aeSachartre return (EFAULT); 57962f5224aeSachartre 57972f5224aeSachartre /* build SCSI VD_OP request */ 57982f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_RESERVE, 57992f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 58002f5224aeSachartre 58012f5224aeSachartre /* set parameters */ 58022f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 58032f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 58042f5224aeSachartre bcopy(mhd_resv.key.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 58052f5224aeSachartre scsi_prout->scope_address = mhd_resv.scope_specific_addr; 58062f5224aeSachartre cdb->cdb_opaque[2] = mhd_resv.type; 58072f5224aeSachartre 58082f5224aeSachartre /* submit the request */ 58092f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 58102f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 58112f5224aeSachartre 58122f5224aeSachartre if (rv == 0) 58132f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 58142f5224aeSachartre 58152f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 58162f5224aeSachartre return (rv); 58172f5224aeSachartre } 58182f5224aeSachartre 58192f5224aeSachartre /* 58202f5224aeSachartre * Implement the MHIOCGRP_PREEMPTANDABORT mhd(7i) ioctl. The ioctl is 58212f5224aeSachartre * converted to a SCSI PERSISTENT OUT PREEMPT AND ABORT command which 58222f5224aeSachartre * is sent to the vdisk server with a VD_OP_SCSICMD operation. 58232f5224aeSachartre */ 58242f5224aeSachartre static int 58252f5224aeSachartre vdc_mhd_preemptabort(vdc_t *vdc, caddr_t arg, int mode) 58262f5224aeSachartre { 58272f5224aeSachartre union scsi_cdb *cdb; 58282f5224aeSachartre vd_scsi_t *vd_scsi; 58292f5224aeSachartre sd_prout_t *scsi_prout; 58302f5224aeSachartre mhioc_preemptandabort_t mhd_preempt; 58312f5224aeSachartre int vd_scsi_len, rv; 58322f5224aeSachartre 58332f5224aeSachartre /* copyin arguments */ 58342f5224aeSachartre rv = ddi_copyin(arg, &mhd_preempt, sizeof (mhd_preempt), mode); 58352f5224aeSachartre if (rv != 0) 58362f5224aeSachartre return (EFAULT); 58372f5224aeSachartre 58382f5224aeSachartre /* build SCSI VD_OP request */ 58392f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_PREEMPTANDABORT, 58402f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 58412f5224aeSachartre 58422f5224aeSachartre /* set parameters */ 58432f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 58442f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 58452f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 58462f5224aeSachartre bcopy(mhd_preempt.resvdesc.key.key, scsi_prout->res_key, 58472f5224aeSachartre MHIOC_RESV_KEY_SIZE); 58482f5224aeSachartre bcopy(mhd_preempt.victim_key.key, scsi_prout->service_key, 58492f5224aeSachartre MHIOC_RESV_KEY_SIZE); 58502f5224aeSachartre scsi_prout->scope_address = mhd_preempt.resvdesc.scope_specific_addr; 58512f5224aeSachartre cdb->cdb_opaque[2] = mhd_preempt.resvdesc.type; 58522f5224aeSachartre 58532f5224aeSachartre /* submit the request */ 58542f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 58552f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 58562f5224aeSachartre 58572f5224aeSachartre if (rv == 0) 58582f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 58592f5224aeSachartre 58602f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 58612f5224aeSachartre return (rv); 58622f5224aeSachartre } 58632f5224aeSachartre 58642f5224aeSachartre /* 58652f5224aeSachartre * Implement the MHIOCGRP_REGISTERANDIGNOREKEY mhd(7i) ioctl. The ioctl 58662f5224aeSachartre * is converted to a SCSI PERSISTENT OUT REGISTER AND IGNORE EXISTING KEY 58672f5224aeSachartre * command which is sent to the vdisk server with a VD_OP_SCSICMD operation. 58682f5224aeSachartre */ 58692f5224aeSachartre static int 58702f5224aeSachartre vdc_mhd_registerignore(vdc_t *vdc, caddr_t arg, int mode) 58712f5224aeSachartre { 58722f5224aeSachartre vd_scsi_t *vd_scsi; 58732f5224aeSachartre sd_prout_t *scsi_prout; 58742f5224aeSachartre mhioc_registerandignorekey_t mhd_regi; 58752f5224aeSachartre int vd_scsi_len, rv; 58762f5224aeSachartre 58772f5224aeSachartre /* copyin arguments */ 58782f5224aeSachartre rv = ddi_copyin(arg, &mhd_regi, sizeof (mhd_regi), mode); 58792f5224aeSachartre if (rv != 0) 58802f5224aeSachartre return (EFAULT); 58812f5224aeSachartre 58822f5224aeSachartre /* build SCSI VD_OP request */ 58832f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTERANDIGNOREKEY, 58842f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 58852f5224aeSachartre 58862f5224aeSachartre /* set parameters */ 58872f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 58882f5224aeSachartre bcopy(mhd_regi.newkey.key, scsi_prout->service_key, 58892f5224aeSachartre MHIOC_RESV_KEY_SIZE); 58902f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_regi.aptpl; 58912f5224aeSachartre 58922f5224aeSachartre /* submit the request */ 58932f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 58942f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 58952f5224aeSachartre 58962f5224aeSachartre if (rv == 0) 58972f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 58982f5224aeSachartre 58992f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 59002f5224aeSachartre return (rv); 59012f5224aeSachartre } 59022f5224aeSachartre 59032f5224aeSachartre /* 59042f5224aeSachartre * This function is used by the failfast mechanism to send a SCSI command 59052f5224aeSachartre * to check for reservation conflict. 59062f5224aeSachartre */ 59072f5224aeSachartre static int 59082f5224aeSachartre vdc_failfast_scsi_cmd(vdc_t *vdc, uchar_t scmd) 59092f5224aeSachartre { 59102f5224aeSachartre int cdb_len, sense_len, vd_scsi_len; 59112f5224aeSachartre vd_scsi_t *vd_scsi; 59122f5224aeSachartre union scsi_cdb *cdb; 59132f5224aeSachartre int rv; 59142f5224aeSachartre 59152f5224aeSachartre ASSERT(scmd == SCMD_TEST_UNIT_READY || scmd == SCMD_WRITE_G1); 59162f5224aeSachartre 59172f5224aeSachartre if (scmd == SCMD_WRITE_G1) 59182f5224aeSachartre cdb_len = CDB_GROUP1; 59192f5224aeSachartre else 59202f5224aeSachartre cdb_len = CDB_GROUP0; 59212f5224aeSachartre 59222f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 59232f5224aeSachartre 59242f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, 0, 0, &vd_scsi_len); 59252f5224aeSachartre 59262f5224aeSachartre /* set cdb */ 59272f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 59282f5224aeSachartre cdb->scc_cmd = scmd; 59292f5224aeSachartre 59302f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 59312f5224aeSachartre 59322f5224aeSachartre /* 59332f5224aeSachartre * Submit the request. The last argument has to be B_FALSE so that 59342f5224aeSachartre * vdc_do_sync_op does not loop checking for reservation conflict if 59352f5224aeSachartre * the operation returns an error. 59362f5224aeSachartre */ 59372f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 59382f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)FKIOCTL, VIO_both_dir, B_FALSE); 59392f5224aeSachartre 59402f5224aeSachartre if (rv == 0) 59412f5224aeSachartre (void) vdc_scsi_status(vdc, vd_scsi, B_FALSE); 59422f5224aeSachartre 59432f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 59442f5224aeSachartre return (rv); 59452f5224aeSachartre } 59462f5224aeSachartre 59472f5224aeSachartre /* 59482f5224aeSachartre * This function is used by the failfast mechanism to check for reservation 59492f5224aeSachartre * conflict. It sends some SCSI commands which will fail with a reservation 59502f5224aeSachartre * conflict error if the system does not have access to the disk and this 59512f5224aeSachartre * will panic the system. 59522f5224aeSachartre * 59532f5224aeSachartre * Returned Code: 59542f5224aeSachartre * 0 - disk is accessible without reservation conflict error 59552f5224aeSachartre * != 0 - unable to check if disk is accessible 59562f5224aeSachartre */ 59572f5224aeSachartre int 59582f5224aeSachartre vdc_failfast_check_resv(vdc_t *vdc) 59592f5224aeSachartre { 59602f5224aeSachartre int failure = 0; 59612f5224aeSachartre 59622f5224aeSachartre /* 59632f5224aeSachartre * Send a TEST UNIT READY command. The command will panic 59642f5224aeSachartre * the system if it fails with a reservation conflict. 59652f5224aeSachartre */ 59662f5224aeSachartre if (vdc_failfast_scsi_cmd(vdc, SCMD_TEST_UNIT_READY) != 0) 59672f5224aeSachartre failure++; 59682f5224aeSachartre 59692f5224aeSachartre /* 59702f5224aeSachartre * With SPC-3 compliant devices TEST UNIT READY will succeed on 59712f5224aeSachartre * a reserved device, so we also do a WRITE(10) of zero byte in 59722f5224aeSachartre * order to provoke a Reservation Conflict status on those newer 59732f5224aeSachartre * devices. 59742f5224aeSachartre */ 59752f5224aeSachartre if (vdc_failfast_scsi_cmd(vdc, SCMD_WRITE_G1) != 0) 59762f5224aeSachartre failure++; 59772f5224aeSachartre 59782f5224aeSachartre return (failure); 59792f5224aeSachartre } 59802f5224aeSachartre 59812f5224aeSachartre /* 59822f5224aeSachartre * Add a pending I/O to the failfast I/O queue. An I/O is added to this 59832f5224aeSachartre * queue when it has failed and failfast is enabled. Then we have to check 59842f5224aeSachartre * if it has failed because of a reservation conflict in which case we have 59852f5224aeSachartre * to panic the system. 59862f5224aeSachartre * 59872f5224aeSachartre * Async I/O should be queued with their block I/O data transfer structure 59882f5224aeSachartre * (buf). Sync I/O should be queued with buf = NULL. 59892f5224aeSachartre */ 59902f5224aeSachartre static vdc_io_t * 59912f5224aeSachartre vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf) 59922f5224aeSachartre { 59932f5224aeSachartre vdc_io_t *vio; 59942f5224aeSachartre 59952f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 59962f5224aeSachartre 59972f5224aeSachartre vio = kmem_alloc(sizeof (vdc_io_t), KM_SLEEP); 59982f5224aeSachartre vio->vio_next = vdc->failfast_io_queue; 59992f5224aeSachartre vio->vio_buf = buf; 60002f5224aeSachartre vio->vio_qtime = ddi_get_lbolt(); 60012f5224aeSachartre 60022f5224aeSachartre vdc->failfast_io_queue = vio; 60032f5224aeSachartre 60042f5224aeSachartre /* notify the failfast thread that a new I/O is queued */ 60052f5224aeSachartre cv_signal(&vdc->failfast_cv); 60062f5224aeSachartre 60072f5224aeSachartre return (vio); 60082f5224aeSachartre } 60092f5224aeSachartre 60102f5224aeSachartre /* 60112f5224aeSachartre * Remove and complete I/O in the failfast I/O queue which have been 60122f5224aeSachartre * added after the indicated deadline. A deadline of 0 means that all 60132f5224aeSachartre * I/O have to be unqueued and marked as completed. 60142f5224aeSachartre */ 60152f5224aeSachartre static void 60162f5224aeSachartre vdc_failfast_io_unqueue(vdc_t *vdc, clock_t deadline) 60172f5224aeSachartre { 60182f5224aeSachartre vdc_io_t *vio, *vio_tmp; 60192f5224aeSachartre 60202f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 60212f5224aeSachartre 60222f5224aeSachartre vio_tmp = NULL; 60232f5224aeSachartre vio = vdc->failfast_io_queue; 60242f5224aeSachartre 60252f5224aeSachartre if (deadline != 0) { 60262f5224aeSachartre /* 60272f5224aeSachartre * Skip any io queued after the deadline. The failfast 60282f5224aeSachartre * I/O queue is ordered starting with the last I/O added 60292f5224aeSachartre * to the queue. 60302f5224aeSachartre */ 60312f5224aeSachartre while (vio != NULL && vio->vio_qtime > deadline) { 60322f5224aeSachartre vio_tmp = vio; 60332f5224aeSachartre vio = vio->vio_next; 60342f5224aeSachartre } 60352f5224aeSachartre } 60362f5224aeSachartre 60372f5224aeSachartre if (vio == NULL) 60382f5224aeSachartre /* nothing to unqueue */ 60392f5224aeSachartre return; 60402f5224aeSachartre 60412f5224aeSachartre /* update the queue */ 60422f5224aeSachartre if (vio_tmp == NULL) 60432f5224aeSachartre vdc->failfast_io_queue = NULL; 60442f5224aeSachartre else 60452f5224aeSachartre vio_tmp->vio_next = NULL; 60462f5224aeSachartre 60472f5224aeSachartre /* 60482f5224aeSachartre * Complete unqueued I/O. Async I/O have a block I/O data transfer 60492f5224aeSachartre * structure (buf) and they are completed by calling biodone(). Sync 60502f5224aeSachartre * I/O do not have a buf and they are completed by setting the 60512f5224aeSachartre * vio_qtime to zero and signaling failfast_io_cv. In that case, the 60522f5224aeSachartre * thread waiting for the I/O to complete is responsible for freeing 60532f5224aeSachartre * the vio structure. 60542f5224aeSachartre */ 60552f5224aeSachartre while (vio != NULL) { 60562f5224aeSachartre vio_tmp = vio->vio_next; 60572f5224aeSachartre if (vio->vio_buf != NULL) { 6058366a92acSlm66018 VD_KSTAT_RUNQ_EXIT(vdc->io_stats); 6059366a92acSlm66018 DTRACE_IO1(done, buf_t *, vio->vio_buf); 60602f5224aeSachartre biodone(vio->vio_buf); 60612f5224aeSachartre kmem_free(vio, sizeof (vdc_io_t)); 60622f5224aeSachartre } else { 60632f5224aeSachartre vio->vio_qtime = 0; 60642f5224aeSachartre } 60652f5224aeSachartre vio = vio_tmp; 60662f5224aeSachartre } 60672f5224aeSachartre 60682f5224aeSachartre cv_broadcast(&vdc->failfast_io_cv); 60692f5224aeSachartre } 60702f5224aeSachartre 60712f5224aeSachartre /* 60722f5224aeSachartre * Failfast Thread. 60732f5224aeSachartre * 60742f5224aeSachartre * While failfast is enabled, the failfast thread sends a TEST UNIT READY 60752f5224aeSachartre * and a zero size WRITE(10) SCSI commands on a regular basis to check that 60762f5224aeSachartre * we still have access to the disk. If a command fails with a RESERVATION 60772f5224aeSachartre * CONFLICT error then the system will immediatly panic. 60782f5224aeSachartre * 60792f5224aeSachartre * The failfast thread is also woken up when an I/O has failed. It then check 60802f5224aeSachartre * the access to the disk to ensure that the I/O failure was not due to a 60812f5224aeSachartre * reservation conflict. 60822f5224aeSachartre * 60832f5224aeSachartre * There is one failfast thread for each virtual disk for which failfast is 60842f5224aeSachartre * enabled. We could have only one thread sending requests for all disks but 60852f5224aeSachartre * this would need vdc to send asynchronous requests and to have callbacks to 60862f5224aeSachartre * process replies. 60872f5224aeSachartre */ 60882f5224aeSachartre static void 60892f5224aeSachartre vdc_failfast_thread(void *arg) 60902f5224aeSachartre { 60912f5224aeSachartre int status; 60922f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 60932f5224aeSachartre clock_t timeout, starttime; 60942f5224aeSachartre 60952f5224aeSachartre mutex_enter(&vdc->lock); 60962f5224aeSachartre 60972f5224aeSachartre while (vdc->failfast_interval != 0) { 60982f5224aeSachartre 60992f5224aeSachartre starttime = ddi_get_lbolt(); 61002f5224aeSachartre 61012f5224aeSachartre mutex_exit(&vdc->lock); 61022f5224aeSachartre 61032f5224aeSachartre /* check for reservation conflict */ 61042f5224aeSachartre status = vdc_failfast_check_resv(vdc); 61052f5224aeSachartre 61062f5224aeSachartre mutex_enter(&vdc->lock); 61072f5224aeSachartre /* 61082f5224aeSachartre * We have dropped the lock to send the SCSI command so we have 61092f5224aeSachartre * to check that failfast is still enabled. 61102f5224aeSachartre */ 61112f5224aeSachartre if (vdc->failfast_interval == 0) 61122f5224aeSachartre break; 61132f5224aeSachartre 61142f5224aeSachartre /* 61152f5224aeSachartre * If we have successfully check the disk access and there was 61162f5224aeSachartre * no reservation conflict then we can complete any I/O queued 61172f5224aeSachartre * before the last check. 61182f5224aeSachartre */ 61192f5224aeSachartre if (status == 0) 61202f5224aeSachartre vdc_failfast_io_unqueue(vdc, starttime); 61212f5224aeSachartre 61222f5224aeSachartre /* proceed again if some I/O are still in the queue */ 61232f5224aeSachartre if (vdc->failfast_io_queue != NULL) 61242f5224aeSachartre continue; 61252f5224aeSachartre 61262f5224aeSachartre timeout = ddi_get_lbolt() + 61272f5224aeSachartre drv_usectohz(vdc->failfast_interval); 61282f5224aeSachartre (void) cv_timedwait(&vdc->failfast_cv, &vdc->lock, timeout); 61292f5224aeSachartre } 61302f5224aeSachartre 61312f5224aeSachartre /* 61322f5224aeSachartre * Failfast is being stop so we can complete any queued I/O. 61332f5224aeSachartre */ 61342f5224aeSachartre vdc_failfast_io_unqueue(vdc, 0); 61352f5224aeSachartre vdc->failfast_thread = NULL; 61362f5224aeSachartre mutex_exit(&vdc->lock); 61372f5224aeSachartre thread_exit(); 61382f5224aeSachartre } 61392f5224aeSachartre 61402f5224aeSachartre /* 61412f5224aeSachartre * Implement the MHIOCENFAILFAST mhd(7i) ioctl. 61422f5224aeSachartre */ 61432f5224aeSachartre static int 61442f5224aeSachartre vdc_failfast(vdc_t *vdc, caddr_t arg, int mode) 61452f5224aeSachartre { 61462f5224aeSachartre unsigned int mh_time; 61472f5224aeSachartre 61482f5224aeSachartre if (ddi_copyin((void *)arg, &mh_time, sizeof (int), mode)) 61492f5224aeSachartre return (EFAULT); 61502f5224aeSachartre 61512f5224aeSachartre mutex_enter(&vdc->lock); 61522f5224aeSachartre if (mh_time != 0 && vdc->failfast_thread == NULL) { 61532f5224aeSachartre vdc->failfast_thread = thread_create(NULL, 0, 61542f5224aeSachartre vdc_failfast_thread, vdc, 0, &p0, TS_RUN, 61552f5224aeSachartre v.v_maxsyspri - 2); 61562f5224aeSachartre } 61572f5224aeSachartre 61582f5224aeSachartre vdc->failfast_interval = mh_time * 1000; 61592f5224aeSachartre cv_signal(&vdc->failfast_cv); 61602f5224aeSachartre mutex_exit(&vdc->lock); 61612f5224aeSachartre 61622f5224aeSachartre return (0); 61632f5224aeSachartre } 61642f5224aeSachartre 61652f5224aeSachartre /* 61662f5224aeSachartre * Implement the MHIOCTKOWN and MHIOCRELEASE mhd(7i) ioctls. These ioctls are 61672f5224aeSachartre * converted to VD_OP_SET_ACCESS operations. 61682f5224aeSachartre */ 61692f5224aeSachartre static int 61702f5224aeSachartre vdc_access_set(vdc_t *vdc, uint64_t flags, int mode) 61712f5224aeSachartre { 61722f5224aeSachartre int rv; 61732f5224aeSachartre 61742f5224aeSachartre /* submit owership command request */ 61752f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SET_ACCESS, (caddr_t)&flags, 61762f5224aeSachartre sizeof (uint64_t), 0, 0, CB_SYNC, (void *)(uint64_t)mode, 61772f5224aeSachartre VIO_both_dir, B_TRUE); 61782f5224aeSachartre 61792f5224aeSachartre return (rv); 61802f5224aeSachartre } 61812f5224aeSachartre 61822f5224aeSachartre /* 61832f5224aeSachartre * Implement the MHIOCSTATUS mhd(7i) ioctl. This ioctl is converted to a 61842f5224aeSachartre * VD_OP_GET_ACCESS operation. 61852f5224aeSachartre */ 61862f5224aeSachartre static int 61872f5224aeSachartre vdc_access_get(vdc_t *vdc, uint64_t *status, int mode) 61882f5224aeSachartre { 61892f5224aeSachartre int rv; 61902f5224aeSachartre 61912f5224aeSachartre /* submit owership command request */ 61922f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_ACCESS, (caddr_t)status, 61932f5224aeSachartre sizeof (uint64_t), 0, 0, CB_SYNC, (void *)(uint64_t)mode, 61942f5224aeSachartre VIO_both_dir, B_TRUE); 61952f5224aeSachartre 61962f5224aeSachartre return (rv); 61972f5224aeSachartre } 61982f5224aeSachartre 61992f5224aeSachartre /* 62002f5224aeSachartre * Disk Ownership Thread. 62012f5224aeSachartre * 62022f5224aeSachartre * When we have taken the ownership of a disk, this thread waits to be 62032f5224aeSachartre * notified when the LDC channel is reset so that it can recover the 62042f5224aeSachartre * ownership. 62052f5224aeSachartre * 62062f5224aeSachartre * Note that the thread handling the LDC reset (vdc_process_msg_thread()) 62072f5224aeSachartre * can not be used to do the ownership recovery because it has to be 62082f5224aeSachartre * running to handle the reply message to the ownership operation. 62092f5224aeSachartre */ 62102f5224aeSachartre static void 62112f5224aeSachartre vdc_ownership_thread(void *arg) 62122f5224aeSachartre { 62132f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 62142f5224aeSachartre clock_t timeout; 62152f5224aeSachartre uint64_t status; 62162f5224aeSachartre 62172f5224aeSachartre mutex_enter(&vdc->ownership_lock); 62182f5224aeSachartre mutex_enter(&vdc->lock); 62192f5224aeSachartre 62202f5224aeSachartre while (vdc->ownership & VDC_OWNERSHIP_WANTED) { 62212f5224aeSachartre 62222f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_RESET) || 62232f5224aeSachartre !(vdc->ownership & VDC_OWNERSHIP_GRANTED)) { 62242f5224aeSachartre /* 62252f5224aeSachartre * There was a reset so the ownership has been lost, 62262f5224aeSachartre * try to recover. We do this without using the preempt 62272f5224aeSachartre * option so that we don't steal the ownership from 62282f5224aeSachartre * someone who has preempted us. 62292f5224aeSachartre */ 62302f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership lost, recovering", 62312f5224aeSachartre vdc->instance); 62322f5224aeSachartre 62332f5224aeSachartre vdc->ownership &= ~(VDC_OWNERSHIP_RESET | 62342f5224aeSachartre VDC_OWNERSHIP_GRANTED); 62352f5224aeSachartre 62362f5224aeSachartre mutex_exit(&vdc->lock); 62372f5224aeSachartre 62382f5224aeSachartre status = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 62392f5224aeSachartre VD_ACCESS_SET_PRESERVE, FKIOCTL); 62402f5224aeSachartre 62412f5224aeSachartre mutex_enter(&vdc->lock); 62422f5224aeSachartre 62432f5224aeSachartre if (status == 0) { 62442f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership recovered", 62452f5224aeSachartre vdc->instance); 62462f5224aeSachartre vdc->ownership |= VDC_OWNERSHIP_GRANTED; 62472f5224aeSachartre } else { 62482f5224aeSachartre DMSG(vdc, 0, "[%d] Fail to recover ownership", 62492f5224aeSachartre vdc->instance); 62502f5224aeSachartre } 62512f5224aeSachartre 62522f5224aeSachartre } 62532f5224aeSachartre 62542f5224aeSachartre /* 62552f5224aeSachartre * If we have the ownership then we just wait for an event 62562f5224aeSachartre * to happen (LDC reset), otherwise we will retry to recover 62572f5224aeSachartre * after a delay. 62582f5224aeSachartre */ 62592f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) 62602f5224aeSachartre timeout = 0; 62612f5224aeSachartre else 62622f5224aeSachartre timeout = ddi_get_lbolt() + 62632f5224aeSachartre drv_usectohz(vdc_ownership_delay); 62642f5224aeSachartre 62652f5224aeSachartre /* Release the ownership_lock and wait on the vdc lock */ 62662f5224aeSachartre mutex_exit(&vdc->ownership_lock); 62672f5224aeSachartre 62682f5224aeSachartre if (timeout == 0) 62692f5224aeSachartre (void) cv_wait(&vdc->ownership_cv, &vdc->lock); 62702f5224aeSachartre else 62712f5224aeSachartre (void) cv_timedwait(&vdc->ownership_cv, 62722f5224aeSachartre &vdc->lock, timeout); 62732f5224aeSachartre 62742f5224aeSachartre mutex_exit(&vdc->lock); 62752f5224aeSachartre 62762f5224aeSachartre mutex_enter(&vdc->ownership_lock); 62772f5224aeSachartre mutex_enter(&vdc->lock); 62782f5224aeSachartre } 62792f5224aeSachartre 62802f5224aeSachartre vdc->ownership_thread = NULL; 62812f5224aeSachartre mutex_exit(&vdc->lock); 62822f5224aeSachartre mutex_exit(&vdc->ownership_lock); 62832f5224aeSachartre 62842f5224aeSachartre thread_exit(); 62852f5224aeSachartre } 62862f5224aeSachartre 62872f5224aeSachartre static void 62882f5224aeSachartre vdc_ownership_update(vdc_t *vdc, int ownership_flags) 62892f5224aeSachartre { 62902f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->ownership_lock)); 62912f5224aeSachartre 62922f5224aeSachartre mutex_enter(&vdc->lock); 62932f5224aeSachartre vdc->ownership = ownership_flags; 62942f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_WANTED) && 62952f5224aeSachartre vdc->ownership_thread == NULL) { 62962f5224aeSachartre /* start ownership thread */ 62972f5224aeSachartre vdc->ownership_thread = thread_create(NULL, 0, 62982f5224aeSachartre vdc_ownership_thread, vdc, 0, &p0, TS_RUN, 62992f5224aeSachartre v.v_maxsyspri - 2); 63002f5224aeSachartre } else { 63012f5224aeSachartre /* notify the ownership thread */ 63022f5224aeSachartre cv_signal(&vdc->ownership_cv); 63032f5224aeSachartre } 63042f5224aeSachartre mutex_exit(&vdc->lock); 63052f5224aeSachartre } 63062f5224aeSachartre 63072f5224aeSachartre /* 63082f5224aeSachartre * Get the size and the block size of a virtual disk from the vdisk server. 63092f5224aeSachartre * We need to use this operation when the vdisk_size attribute was not 63102f5224aeSachartre * available during the handshake with the vdisk server. 63112f5224aeSachartre */ 63122f5224aeSachartre static int 63132f5224aeSachartre vdc_check_capacity(vdc_t *vdc) 63142f5224aeSachartre { 63152f5224aeSachartre int rv = 0; 63162f5224aeSachartre size_t alloc_len; 63172f5224aeSachartre vd_capacity_t *vd_cap; 63182f5224aeSachartre 63192f5224aeSachartre if (vdc->vdisk_size != 0) 63202f5224aeSachartre return (0); 63212f5224aeSachartre 63222f5224aeSachartre alloc_len = P2ROUNDUP(sizeof (vd_capacity_t), sizeof (uint64_t)); 63232f5224aeSachartre 63242f5224aeSachartre vd_cap = kmem_zalloc(alloc_len, KM_SLEEP); 63252f5224aeSachartre 63262f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_CAPACITY, (caddr_t)vd_cap, alloc_len, 63272f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)FKIOCTL, VIO_both_dir, B_TRUE); 63282f5224aeSachartre 63292f5224aeSachartre if (rv == 0) { 63302f5224aeSachartre if (vd_cap->vdisk_block_size != vdc->block_size || 63312f5224aeSachartre vd_cap->vdisk_size == VD_SIZE_UNKNOWN || 63322f5224aeSachartre vd_cap->vdisk_size == 0) 63332f5224aeSachartre rv = EINVAL; 63342f5224aeSachartre else 63352f5224aeSachartre vdc->vdisk_size = vd_cap->vdisk_size; 63362f5224aeSachartre } 63372f5224aeSachartre 63382f5224aeSachartre kmem_free(vd_cap, alloc_len); 63392f5224aeSachartre return (rv); 63402f5224aeSachartre } 63412f5224aeSachartre 63422f5224aeSachartre /* 63431ae08745Sheppo * This structure is used in the DKIO(7I) array below. 63441ae08745Sheppo */ 63451ae08745Sheppo typedef struct vdc_dk_ioctl { 63461ae08745Sheppo uint8_t op; /* VD_OP_XXX value */ 63471ae08745Sheppo int cmd; /* Solaris ioctl operation number */ 63481ae08745Sheppo size_t nbytes; /* size of structure to be copied */ 63490a55fbb7Slm66018 63500a55fbb7Slm66018 /* function to convert between vDisk and Solaris structure formats */ 6351d10e4ef2Snarayan int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 6352d10e4ef2Snarayan int mode, int dir); 63531ae08745Sheppo } vdc_dk_ioctl_t; 63541ae08745Sheppo 63551ae08745Sheppo /* 63561ae08745Sheppo * Subset of DKIO(7I) operations currently supported 63571ae08745Sheppo */ 63581ae08745Sheppo static vdc_dk_ioctl_t dk_ioctl[] = { 6359eff7243fSlm66018 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 63600a55fbb7Slm66018 vdc_null_copy_func}, 63610a55fbb7Slm66018 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 63624bac2208Snarayan vdc_get_wce_convert}, 63630a55fbb7Slm66018 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 63644bac2208Snarayan vdc_set_wce_convert}, 63650a55fbb7Slm66018 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 63660a55fbb7Slm66018 vdc_get_vtoc_convert}, 63670a55fbb7Slm66018 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 63680a55fbb7Slm66018 vdc_set_vtoc_convert}, 63690a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 63700a55fbb7Slm66018 vdc_get_geom_convert}, 63710a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 63720a55fbb7Slm66018 vdc_get_geom_convert}, 63730a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 63740a55fbb7Slm66018 vdc_get_geom_convert}, 63750a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 63760a55fbb7Slm66018 vdc_set_geom_convert}, 63774bac2208Snarayan {VD_OP_GET_EFI, DKIOCGETEFI, 0, 63784bac2208Snarayan vdc_get_efi_convert}, 63794bac2208Snarayan {VD_OP_SET_EFI, DKIOCSETEFI, 0, 63804bac2208Snarayan vdc_set_efi_convert}, 63810a55fbb7Slm66018 638287a7269eSachartre /* DIOCTL_RWCMD is converted to a read or a write */ 638387a7269eSachartre {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 638487a7269eSachartre 63852f5224aeSachartre /* mhd(7I) non-shared multihost disks ioctls */ 63862f5224aeSachartre {0, MHIOCTKOWN, 0, vdc_null_copy_func}, 63872f5224aeSachartre {0, MHIOCRELEASE, 0, vdc_null_copy_func}, 63882f5224aeSachartre {0, MHIOCSTATUS, 0, vdc_null_copy_func}, 63892f5224aeSachartre {0, MHIOCQRESERVE, 0, vdc_null_copy_func}, 63902f5224aeSachartre 63912f5224aeSachartre /* mhd(7I) shared multihost disks ioctls */ 63922f5224aeSachartre {0, MHIOCGRP_INKEYS, 0, vdc_null_copy_func}, 63932f5224aeSachartre {0, MHIOCGRP_INRESV, 0, vdc_null_copy_func}, 63942f5224aeSachartre {0, MHIOCGRP_REGISTER, 0, vdc_null_copy_func}, 63952f5224aeSachartre {0, MHIOCGRP_RESERVE, 0, vdc_null_copy_func}, 63962f5224aeSachartre {0, MHIOCGRP_PREEMPTANDABORT, 0, vdc_null_copy_func}, 63972f5224aeSachartre {0, MHIOCGRP_REGISTERANDIGNOREKEY, 0, vdc_null_copy_func}, 63982f5224aeSachartre 63992f5224aeSachartre /* mhd(7I) failfast ioctl */ 64002f5224aeSachartre {0, MHIOCENFAILFAST, 0, vdc_null_copy_func}, 64012f5224aeSachartre 64020a55fbb7Slm66018 /* 64030a55fbb7Slm66018 * These particular ioctls are not sent to the server - vdc fakes up 64040a55fbb7Slm66018 * the necessary info. 64050a55fbb7Slm66018 */ 64060a55fbb7Slm66018 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 64070a55fbb7Slm66018 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 64080a55fbb7Slm66018 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 64099642afceSachartre {0, DKIOCPARTITION, 0, vdc_null_copy_func }, 641087a7269eSachartre {0, DKIOCGAPART, 0, vdc_null_copy_func }, 64110a55fbb7Slm66018 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 64120a55fbb7Slm66018 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 64131ae08745Sheppo }; 64141ae08745Sheppo 64151ae08745Sheppo /* 6416edcc0754Sachartre * This function handles ioctl requests from the vd_efi_alloc_and_read() 6417edcc0754Sachartre * function and forward them to the vdisk. 64182f5224aeSachartre */ 64192f5224aeSachartre static int 6420edcc0754Sachartre vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg) 64212f5224aeSachartre { 6422edcc0754Sachartre vdc_t *vdc = (vdc_t *)vdisk; 6423edcc0754Sachartre dev_t dev; 64242f5224aeSachartre int rval; 6425edcc0754Sachartre 6426edcc0754Sachartre dev = makedevice(ddi_driver_major(vdc->dip), 6427edcc0754Sachartre VD_MAKE_DEV(vdc->instance, 0)); 6428edcc0754Sachartre 6429edcc0754Sachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, FKIOCTL, &rval)); 64302f5224aeSachartre } 64312f5224aeSachartre 64322f5224aeSachartre /* 64331ae08745Sheppo * Function: 64341ae08745Sheppo * vd_process_ioctl() 64351ae08745Sheppo * 64361ae08745Sheppo * Description: 64370a55fbb7Slm66018 * This routine processes disk specific ioctl calls 64381ae08745Sheppo * 64391ae08745Sheppo * Arguments: 64401ae08745Sheppo * dev - the device number 64411ae08745Sheppo * cmd - the operation [dkio(7I)] to be processed 64421ae08745Sheppo * arg - pointer to user provided structure 64431ae08745Sheppo * (contains data to be set or reference parameter for get) 64441ae08745Sheppo * mode - bit flag, indicating open settings, 32/64 bit type, etc 64452f5224aeSachartre * rvalp - pointer to return value for calling process. 64461ae08745Sheppo * 64471ae08745Sheppo * Return Code: 64481ae08745Sheppo * 0 64491ae08745Sheppo * EFAULT 64501ae08745Sheppo * ENXIO 64511ae08745Sheppo * EIO 64521ae08745Sheppo * ENOTSUP 64531ae08745Sheppo */ 64541ae08745Sheppo static int 64552f5224aeSachartre vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp) 64561ae08745Sheppo { 64570d0c8d4bSnarayan int instance = VDCUNIT(dev); 64581ae08745Sheppo vdc_t *vdc = NULL; 64591ae08745Sheppo int rv = -1; 64601ae08745Sheppo int idx = 0; /* index into dk_ioctl[] */ 64611ae08745Sheppo size_t len = 0; /* #bytes to send to vds */ 64621ae08745Sheppo size_t alloc_len = 0; /* #bytes to allocate mem for */ 64631ae08745Sheppo caddr_t mem_p = NULL; 64641ae08745Sheppo size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 64653af08d82Slm66018 vdc_dk_ioctl_t *iop; 64661ae08745Sheppo 64671ae08745Sheppo vdc = ddi_get_soft_state(vdc_state, instance); 64681ae08745Sheppo if (vdc == NULL) { 64691ae08745Sheppo cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 64701ae08745Sheppo instance); 64711ae08745Sheppo return (ENXIO); 64721ae08745Sheppo } 64731ae08745Sheppo 64743af08d82Slm66018 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 64753af08d82Slm66018 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 64761ae08745Sheppo 64772f5224aeSachartre if (rvalp != NULL) { 64782f5224aeSachartre /* the return value of the ioctl is 0 by default */ 64792f5224aeSachartre *rvalp = 0; 64802f5224aeSachartre } 64812f5224aeSachartre 64821ae08745Sheppo /* 64831ae08745Sheppo * Validate the ioctl operation to be performed. 64841ae08745Sheppo * 64851ae08745Sheppo * If we have looped through the array without finding a match then we 64861ae08745Sheppo * don't support this ioctl. 64871ae08745Sheppo */ 64881ae08745Sheppo for (idx = 0; idx < nioctls; idx++) { 64891ae08745Sheppo if (cmd == dk_ioctl[idx].cmd) 64901ae08745Sheppo break; 64911ae08745Sheppo } 64921ae08745Sheppo 64931ae08745Sheppo if (idx >= nioctls) { 64943af08d82Slm66018 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 6495e1ebb9ecSlm66018 vdc->instance, cmd); 64961ae08745Sheppo return (ENOTSUP); 64971ae08745Sheppo } 64981ae08745Sheppo 64993af08d82Slm66018 iop = &(dk_ioctl[idx]); 65003af08d82Slm66018 65014bac2208Snarayan if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 65024bac2208Snarayan /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 65034bac2208Snarayan dk_efi_t dk_efi; 65044bac2208Snarayan 65054bac2208Snarayan rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 65064bac2208Snarayan if (rv != 0) 65074bac2208Snarayan return (EFAULT); 65084bac2208Snarayan 65094bac2208Snarayan len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 65104bac2208Snarayan } else { 65113af08d82Slm66018 len = iop->nbytes; 65124bac2208Snarayan } 65131ae08745Sheppo 65142f5224aeSachartre /* check if the ioctl is applicable */ 65151ae08745Sheppo switch (cmd) { 65161ae08745Sheppo case CDROMREADOFFSET: 65171ae08745Sheppo case DKIOCREMOVABLE: 65181ae08745Sheppo return (ENOTTY); 65191ae08745Sheppo 65202f5224aeSachartre case USCSICMD: 65212f5224aeSachartre case MHIOCTKOWN: 65222f5224aeSachartre case MHIOCSTATUS: 65232f5224aeSachartre case MHIOCQRESERVE: 65242f5224aeSachartre case MHIOCRELEASE: 65252f5224aeSachartre case MHIOCGRP_INKEYS: 65262f5224aeSachartre case MHIOCGRP_INRESV: 65272f5224aeSachartre case MHIOCGRP_REGISTER: 65282f5224aeSachartre case MHIOCGRP_RESERVE: 65292f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 65302f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 65312f5224aeSachartre case MHIOCENFAILFAST: 65322f5224aeSachartre if (vdc->cinfo == NULL) 65332f5224aeSachartre return (ENXIO); 65342f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_SCSI_CCS) 65352f5224aeSachartre return (ENOTTY); 65362f5224aeSachartre break; 65372f5224aeSachartre 65382f5224aeSachartre case DIOCTL_RWCMD: 65392f5224aeSachartre if (vdc->cinfo == NULL) 65402f5224aeSachartre return (ENXIO); 65412f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_DIRECT) 65422f5224aeSachartre return (ENOTTY); 65432f5224aeSachartre break; 65442f5224aeSachartre 65452f5224aeSachartre case DKIOCINFO: 65462f5224aeSachartre if (vdc->cinfo == NULL) 65472f5224aeSachartre return (ENXIO); 65482f5224aeSachartre break; 65492f5224aeSachartre 65502f5224aeSachartre case DKIOCGMEDIAINFO: 65512f5224aeSachartre if (vdc->minfo == NULL) 65522f5224aeSachartre return (ENXIO); 65532f5224aeSachartre if (vdc_check_capacity(vdc) != 0) 65542f5224aeSachartre /* disk capacity is not available */ 65552f5224aeSachartre return (EIO); 65562f5224aeSachartre break; 65572f5224aeSachartre } 65582f5224aeSachartre 65592f5224aeSachartre /* 65602f5224aeSachartre * Deal with ioctls which require a processing different than 65612f5224aeSachartre * converting ioctl arguments and sending a corresponding 65622f5224aeSachartre * VD operation. 65632f5224aeSachartre */ 65642f5224aeSachartre switch (cmd) { 65652f5224aeSachartre 65662f5224aeSachartre case USCSICMD: 65672f5224aeSachartre { 65682f5224aeSachartre return (vdc_uscsi_cmd(vdc, arg, mode)); 65692f5224aeSachartre } 65702f5224aeSachartre 65712f5224aeSachartre case MHIOCTKOWN: 65722f5224aeSachartre { 65732f5224aeSachartre mutex_enter(&vdc->ownership_lock); 65742f5224aeSachartre /* 65752f5224aeSachartre * We have to set VDC_OWNERSHIP_WANTED now so that the ownership 65762f5224aeSachartre * can be flagged with VDC_OWNERSHIP_RESET if the LDC is reset 65772f5224aeSachartre * while we are processing the ioctl. 65782f5224aeSachartre */ 65792f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED); 65802f5224aeSachartre 65812f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 65822f5224aeSachartre VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE, mode); 65832f5224aeSachartre if (rv == 0) { 65842f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED | 65852f5224aeSachartre VDC_OWNERSHIP_GRANTED); 65862f5224aeSachartre } else { 65872f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 65882f5224aeSachartre } 65892f5224aeSachartre mutex_exit(&vdc->ownership_lock); 65902f5224aeSachartre return (rv); 65912f5224aeSachartre } 65922f5224aeSachartre 65932f5224aeSachartre case MHIOCRELEASE: 65942f5224aeSachartre { 65952f5224aeSachartre mutex_enter(&vdc->ownership_lock); 65962f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, mode); 65972f5224aeSachartre if (rv == 0) { 65982f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 65992f5224aeSachartre } 66002f5224aeSachartre mutex_exit(&vdc->ownership_lock); 66012f5224aeSachartre return (rv); 66022f5224aeSachartre } 66032f5224aeSachartre 66042f5224aeSachartre case MHIOCSTATUS: 66052f5224aeSachartre { 66062f5224aeSachartre uint64_t status; 66072f5224aeSachartre 66082f5224aeSachartre rv = vdc_access_get(vdc, &status, mode); 66092f5224aeSachartre if (rv == 0 && rvalp != NULL) 66102f5224aeSachartre *rvalp = (status & VD_ACCESS_ALLOWED)? 0 : 1; 66112f5224aeSachartre return (rv); 66122f5224aeSachartre } 66132f5224aeSachartre 66142f5224aeSachartre case MHIOCQRESERVE: 66152f5224aeSachartre { 66162f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE, mode); 66172f5224aeSachartre return (rv); 66182f5224aeSachartre } 66192f5224aeSachartre 66202f5224aeSachartre case MHIOCGRP_INKEYS: 66212f5224aeSachartre { 66222f5224aeSachartre return (vdc_mhd_inkeys(vdc, arg, mode)); 66232f5224aeSachartre } 66242f5224aeSachartre 66252f5224aeSachartre case MHIOCGRP_INRESV: 66262f5224aeSachartre { 66272f5224aeSachartre return (vdc_mhd_inresv(vdc, arg, mode)); 66282f5224aeSachartre } 66292f5224aeSachartre 66302f5224aeSachartre case MHIOCGRP_REGISTER: 66312f5224aeSachartre { 66322f5224aeSachartre return (vdc_mhd_register(vdc, arg, mode)); 66332f5224aeSachartre } 66342f5224aeSachartre 66352f5224aeSachartre case MHIOCGRP_RESERVE: 66362f5224aeSachartre { 66372f5224aeSachartre return (vdc_mhd_reserve(vdc, arg, mode)); 66382f5224aeSachartre } 66392f5224aeSachartre 66402f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 66412f5224aeSachartre { 66422f5224aeSachartre return (vdc_mhd_preemptabort(vdc, arg, mode)); 66432f5224aeSachartre } 66442f5224aeSachartre 66452f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 66462f5224aeSachartre { 66472f5224aeSachartre return (vdc_mhd_registerignore(vdc, arg, mode)); 66482f5224aeSachartre } 66492f5224aeSachartre 66502f5224aeSachartre case MHIOCENFAILFAST: 66512f5224aeSachartre { 66522f5224aeSachartre rv = vdc_failfast(vdc, arg, mode); 66532f5224aeSachartre return (rv); 66542f5224aeSachartre } 66552f5224aeSachartre 665687a7269eSachartre case DIOCTL_RWCMD: 665787a7269eSachartre { 665887a7269eSachartre return (vdc_dioctl_rwcmd(dev, arg, mode)); 665987a7269eSachartre } 666087a7269eSachartre 666187a7269eSachartre case DKIOCGAPART: 666287a7269eSachartre { 66639642afceSachartre return (vdc_dkio_gapart(vdc, arg, mode)); 66649642afceSachartre } 66659642afceSachartre 66669642afceSachartre case DKIOCPARTITION: 66679642afceSachartre { 66689642afceSachartre return (vdc_dkio_partition(vdc, arg, mode)); 666987a7269eSachartre } 667087a7269eSachartre 66711ae08745Sheppo case DKIOCINFO: 66721ae08745Sheppo { 66731ae08745Sheppo struct dk_cinfo cinfo; 66741ae08745Sheppo 66751ae08745Sheppo bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 66760d0c8d4bSnarayan cinfo.dki_partition = VDCPART(dev); 66771ae08745Sheppo 66781ae08745Sheppo rv = ddi_copyout(&cinfo, (void *)arg, 66791ae08745Sheppo sizeof (struct dk_cinfo), mode); 66801ae08745Sheppo if (rv != 0) 66811ae08745Sheppo return (EFAULT); 66821ae08745Sheppo 66831ae08745Sheppo return (0); 66841ae08745Sheppo } 66851ae08745Sheppo 66861ae08745Sheppo case DKIOCGMEDIAINFO: 66878e6a2a04Slm66018 { 66882f5224aeSachartre ASSERT(vdc->vdisk_size != 0); 66892f5224aeSachartre if (vdc->minfo->dki_capacity == 0) 66902f5224aeSachartre vdc->minfo->dki_capacity = vdc->vdisk_size; 66911ae08745Sheppo rv = ddi_copyout(vdc->minfo, (void *)arg, 66921ae08745Sheppo sizeof (struct dk_minfo), mode); 66931ae08745Sheppo if (rv != 0) 66941ae08745Sheppo return (EFAULT); 66951ae08745Sheppo 66961ae08745Sheppo return (0); 66971ae08745Sheppo } 66981ae08745Sheppo 66998e6a2a04Slm66018 case DKIOCFLUSHWRITECACHE: 67008e6a2a04Slm66018 { 670117cadca8Slm66018 struct dk_callback *dkc = 670217cadca8Slm66018 (struct dk_callback *)(uintptr_t)arg; 67038e6a2a04Slm66018 vdc_dk_arg_t *dkarg = NULL; 67048e6a2a04Slm66018 67053af08d82Slm66018 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 67063af08d82Slm66018 instance, mode); 67078e6a2a04Slm66018 67088e6a2a04Slm66018 /* 67098e6a2a04Slm66018 * If arg is NULL, then there is no callback function 67108e6a2a04Slm66018 * registered and the call operates synchronously; we 67118e6a2a04Slm66018 * break and continue with the rest of the function and 67128e6a2a04Slm66018 * wait for vds to return (i.e. after the request to 67138e6a2a04Slm66018 * vds returns successfully, all writes completed prior 67148e6a2a04Slm66018 * to the ioctl will have been flushed from the disk 67158e6a2a04Slm66018 * write cache to persistent media. 67168e6a2a04Slm66018 * 67178e6a2a04Slm66018 * If a callback function is registered, we dispatch 67188e6a2a04Slm66018 * the request on a task queue and return immediately. 67198e6a2a04Slm66018 * The callback will deal with informing the calling 67208e6a2a04Slm66018 * thread that the flush request is completed. 67218e6a2a04Slm66018 */ 67228e6a2a04Slm66018 if (dkc == NULL) 67238e6a2a04Slm66018 break; 67248e6a2a04Slm66018 6725eff7243fSlm66018 /* 6726eff7243fSlm66018 * the asynchronous callback is only supported if 6727eff7243fSlm66018 * invoked from within the kernel 6728eff7243fSlm66018 */ 6729eff7243fSlm66018 if ((mode & FKIOCTL) == 0) 6730eff7243fSlm66018 return (ENOTSUP); 6731eff7243fSlm66018 67328e6a2a04Slm66018 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 67338e6a2a04Slm66018 67348e6a2a04Slm66018 dkarg->mode = mode; 67358e6a2a04Slm66018 dkarg->dev = dev; 67368e6a2a04Slm66018 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 67378e6a2a04Slm66018 67388e6a2a04Slm66018 mutex_enter(&vdc->lock); 67398e6a2a04Slm66018 vdc->dkio_flush_pending++; 67408e6a2a04Slm66018 dkarg->vdc = vdc; 67418e6a2a04Slm66018 mutex_exit(&vdc->lock); 67428e6a2a04Slm66018 67438e6a2a04Slm66018 /* put the request on a task queue */ 67448e6a2a04Slm66018 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 67458e6a2a04Slm66018 (void *)dkarg, DDI_SLEEP); 67463af08d82Slm66018 if (rv == NULL) { 67473af08d82Slm66018 /* clean up if dispatch fails */ 67483af08d82Slm66018 mutex_enter(&vdc->lock); 67493af08d82Slm66018 vdc->dkio_flush_pending--; 675078fcd0a1Sachartre mutex_exit(&vdc->lock); 67513af08d82Slm66018 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 67523af08d82Slm66018 } 67538e6a2a04Slm66018 67548e6a2a04Slm66018 return (rv == NULL ? ENOMEM : 0); 67558e6a2a04Slm66018 } 67568e6a2a04Slm66018 } 67578e6a2a04Slm66018 67581ae08745Sheppo /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 67593af08d82Slm66018 ASSERT(iop->op != 0); 67601ae08745Sheppo 676117cadca8Slm66018 /* check if the vDisk server handles the operation for this vDisk */ 676217cadca8Slm66018 if (VD_OP_SUPPORTED(vdc->operations, iop->op) == B_FALSE) { 676317cadca8Slm66018 DMSG(vdc, 0, "[%d] Unsupported VD_OP operation (0x%x)\n", 676417cadca8Slm66018 vdc->instance, iop->op); 676517cadca8Slm66018 return (ENOTSUP); 676617cadca8Slm66018 } 676717cadca8Slm66018 67681ae08745Sheppo /* LDC requires that the memory being mapped is 8-byte aligned */ 67691ae08745Sheppo alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 67703af08d82Slm66018 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 67713af08d82Slm66018 instance, len, alloc_len); 67721ae08745Sheppo 6773eff7243fSlm66018 if (alloc_len > 0) 67741ae08745Sheppo mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 67751ae08745Sheppo 67760a55fbb7Slm66018 /* 6777eff7243fSlm66018 * Call the conversion function for this ioctl which, if necessary, 67780a55fbb7Slm66018 * converts from the Solaris format to the format ARC'ed 67790a55fbb7Slm66018 * as part of the vDisk protocol (FWARC 2006/195) 67800a55fbb7Slm66018 */ 67813af08d82Slm66018 ASSERT(iop->convert != NULL); 67823af08d82Slm66018 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 67831ae08745Sheppo if (rv != 0) { 67843af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 6785e1ebb9ecSlm66018 instance, rv, cmd); 67861ae08745Sheppo if (mem_p != NULL) 67871ae08745Sheppo kmem_free(mem_p, alloc_len); 67880a55fbb7Slm66018 return (rv); 67891ae08745Sheppo } 67901ae08745Sheppo 67911ae08745Sheppo /* 67921ae08745Sheppo * send request to vds to service the ioctl. 67931ae08745Sheppo */ 67943af08d82Slm66018 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 67950d0c8d4bSnarayan VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 67962f5224aeSachartre VIO_both_dir, B_TRUE); 679778fcd0a1Sachartre 67981ae08745Sheppo if (rv != 0) { 67991ae08745Sheppo /* 68001ae08745Sheppo * This is not necessarily an error. The ioctl could 68011ae08745Sheppo * be returning a value such as ENOTTY to indicate 68021ae08745Sheppo * that the ioctl is not applicable. 68031ae08745Sheppo */ 68043af08d82Slm66018 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 6805e1ebb9ecSlm66018 instance, rv, cmd); 68061ae08745Sheppo if (mem_p != NULL) 68071ae08745Sheppo kmem_free(mem_p, alloc_len); 6808d10e4ef2Snarayan 68091ae08745Sheppo return (rv); 68101ae08745Sheppo } 68111ae08745Sheppo 68121ae08745Sheppo /* 68130a55fbb7Slm66018 * Call the conversion function (if it exists) for this ioctl 68140a55fbb7Slm66018 * which converts from the format ARC'ed as part of the vDisk 68150a55fbb7Slm66018 * protocol (FWARC 2006/195) back to a format understood by 68160a55fbb7Slm66018 * the rest of Solaris. 68171ae08745Sheppo */ 68183af08d82Slm66018 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 68190a55fbb7Slm66018 if (rv != 0) { 68203af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 6821e1ebb9ecSlm66018 instance, rv, cmd); 68221ae08745Sheppo if (mem_p != NULL) 68231ae08745Sheppo kmem_free(mem_p, alloc_len); 68240a55fbb7Slm66018 return (rv); 68251ae08745Sheppo } 68261ae08745Sheppo 68271ae08745Sheppo if (mem_p != NULL) 68281ae08745Sheppo kmem_free(mem_p, alloc_len); 68291ae08745Sheppo 68301ae08745Sheppo return (rv); 68311ae08745Sheppo } 68321ae08745Sheppo 68331ae08745Sheppo /* 68341ae08745Sheppo * Function: 68350a55fbb7Slm66018 * 68360a55fbb7Slm66018 * Description: 68370a55fbb7Slm66018 * This is an empty conversion function used by ioctl calls which 68380a55fbb7Slm66018 * do not need to convert the data being passed in/out to userland 68390a55fbb7Slm66018 */ 68400a55fbb7Slm66018 static int 6841d10e4ef2Snarayan vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 68420a55fbb7Slm66018 { 6843d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 68440a55fbb7Slm66018 _NOTE(ARGUNUSED(from)) 68450a55fbb7Slm66018 _NOTE(ARGUNUSED(to)) 68460a55fbb7Slm66018 _NOTE(ARGUNUSED(mode)) 68470a55fbb7Slm66018 _NOTE(ARGUNUSED(dir)) 68480a55fbb7Slm66018 68490a55fbb7Slm66018 return (0); 68500a55fbb7Slm66018 } 68510a55fbb7Slm66018 68524bac2208Snarayan static int 68534bac2208Snarayan vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 68544bac2208Snarayan int mode, int dir) 68554bac2208Snarayan { 68564bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 68574bac2208Snarayan 68584bac2208Snarayan if (dir == VD_COPYIN) 68594bac2208Snarayan return (0); /* nothing to do */ 68604bac2208Snarayan 68614bac2208Snarayan if (ddi_copyout(from, to, sizeof (int), mode) != 0) 68624bac2208Snarayan return (EFAULT); 68634bac2208Snarayan 68644bac2208Snarayan return (0); 68654bac2208Snarayan } 68664bac2208Snarayan 68674bac2208Snarayan static int 68684bac2208Snarayan vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 68694bac2208Snarayan int mode, int dir) 68704bac2208Snarayan { 68714bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 68724bac2208Snarayan 68734bac2208Snarayan if (dir == VD_COPYOUT) 68744bac2208Snarayan return (0); /* nothing to do */ 68754bac2208Snarayan 68764bac2208Snarayan if (ddi_copyin(from, to, sizeof (int), mode) != 0) 68774bac2208Snarayan return (EFAULT); 68784bac2208Snarayan 68794bac2208Snarayan return (0); 68804bac2208Snarayan } 68814bac2208Snarayan 68820a55fbb7Slm66018 /* 68830a55fbb7Slm66018 * Function: 68840a55fbb7Slm66018 * vdc_get_vtoc_convert() 68850a55fbb7Slm66018 * 68860a55fbb7Slm66018 * Description: 6887d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGVTOC 6888d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 6889d10e4ef2Snarayan * 6890d10e4ef2Snarayan * In the struct vtoc definition, the timestamp field is marked as not 6891d10e4ef2Snarayan * supported so it is not part of vDisk protocol (FWARC 2006/195). 6892d10e4ef2Snarayan * However SVM uses that field to check it can write into the VTOC, 6893d10e4ef2Snarayan * so we fake up the info of that field. 68940a55fbb7Slm66018 * 68950a55fbb7Slm66018 * Arguments: 6896d10e4ef2Snarayan * vdc - the vDisk client 68970a55fbb7Slm66018 * from - the buffer containing the data to be copied from 68980a55fbb7Slm66018 * to - the buffer to be copied to 68990a55fbb7Slm66018 * mode - flags passed to ioctl() call 69000a55fbb7Slm66018 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 69010a55fbb7Slm66018 * 69020a55fbb7Slm66018 * Return Code: 69030a55fbb7Slm66018 * 0 - Success 69040a55fbb7Slm66018 * ENXIO - incorrect buffer passed in. 6905d10e4ef2Snarayan * EFAULT - ddi_copyout routine encountered an error. 69060a55fbb7Slm66018 */ 69070a55fbb7Slm66018 static int 6908d10e4ef2Snarayan vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 69090a55fbb7Slm66018 { 6910d10e4ef2Snarayan int i; 69110a55fbb7Slm66018 void *tmp_mem = NULL; 69120a55fbb7Slm66018 void *tmp_memp; 69130a55fbb7Slm66018 struct vtoc vt; 69140a55fbb7Slm66018 struct vtoc32 vt32; 69150a55fbb7Slm66018 int copy_len = 0; 69160a55fbb7Slm66018 int rv = 0; 69170a55fbb7Slm66018 69180a55fbb7Slm66018 if (dir != VD_COPYOUT) 69190a55fbb7Slm66018 return (0); /* nothing to do */ 69200a55fbb7Slm66018 69210a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 69220a55fbb7Slm66018 return (ENXIO); 69230a55fbb7Slm66018 69240a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 69250a55fbb7Slm66018 copy_len = sizeof (struct vtoc32); 69260a55fbb7Slm66018 else 69270a55fbb7Slm66018 copy_len = sizeof (struct vtoc); 69280a55fbb7Slm66018 69290a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 69300a55fbb7Slm66018 69310a55fbb7Slm66018 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 6932d10e4ef2Snarayan 6933d10e4ef2Snarayan /* fake the VTOC timestamp field */ 6934d10e4ef2Snarayan for (i = 0; i < V_NUMPAR; i++) { 6935d10e4ef2Snarayan vt.timestamp[i] = vdc->vtoc->timestamp[i]; 6936d10e4ef2Snarayan } 6937d10e4ef2Snarayan 69380a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 693917cadca8Slm66018 /* LINTED E_ASSIGN_NARROW_CONV */ 69400a55fbb7Slm66018 vtoctovtoc32(vt, vt32); 69410a55fbb7Slm66018 tmp_memp = &vt32; 69420a55fbb7Slm66018 } else { 69430a55fbb7Slm66018 tmp_memp = &vt; 69440a55fbb7Slm66018 } 69450a55fbb7Slm66018 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 69460a55fbb7Slm66018 if (rv != 0) 69470a55fbb7Slm66018 rv = EFAULT; 69480a55fbb7Slm66018 69490a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 69500a55fbb7Slm66018 return (rv); 69510a55fbb7Slm66018 } 69520a55fbb7Slm66018 69530a55fbb7Slm66018 /* 69540a55fbb7Slm66018 * Function: 69550a55fbb7Slm66018 * vdc_set_vtoc_convert() 69560a55fbb7Slm66018 * 69570a55fbb7Slm66018 * Description: 6958d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSVTOC 6959d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 69600a55fbb7Slm66018 * 69610a55fbb7Slm66018 * Arguments: 6962d10e4ef2Snarayan * vdc - the vDisk client 69630a55fbb7Slm66018 * from - Buffer with data 69640a55fbb7Slm66018 * to - Buffer where data is to be copied to 69650a55fbb7Slm66018 * mode - flags passed to ioctl 69660a55fbb7Slm66018 * dir - direction of copy (in or out) 69670a55fbb7Slm66018 * 69680a55fbb7Slm66018 * Return Code: 69690a55fbb7Slm66018 * 0 - Success 69700a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 69710a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 69720a55fbb7Slm66018 */ 69730a55fbb7Slm66018 static int 6974d10e4ef2Snarayan vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 69750a55fbb7Slm66018 { 697678fcd0a1Sachartre _NOTE(ARGUNUSED(vdc)) 697778fcd0a1Sachartre 69782f5224aeSachartre void *tmp_mem = NULL, *uvtoc; 69790a55fbb7Slm66018 struct vtoc vt; 69800a55fbb7Slm66018 struct vtoc *vtp = &vt; 69810a55fbb7Slm66018 vd_vtoc_t vtvd; 69820a55fbb7Slm66018 int copy_len = 0; 69832f5224aeSachartre int i, rv = 0; 69840a55fbb7Slm66018 69850a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 69860a55fbb7Slm66018 return (ENXIO); 69870a55fbb7Slm66018 69882f5224aeSachartre if (dir == VD_COPYIN) 69892f5224aeSachartre uvtoc = from; 69902f5224aeSachartre else 69912f5224aeSachartre uvtoc = to; 69922f5224aeSachartre 69930a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 69940a55fbb7Slm66018 copy_len = sizeof (struct vtoc32); 69950a55fbb7Slm66018 else 69960a55fbb7Slm66018 copy_len = sizeof (struct vtoc); 69970a55fbb7Slm66018 69980a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 69990a55fbb7Slm66018 70002f5224aeSachartre rv = ddi_copyin(uvtoc, tmp_mem, copy_len, mode); 70010a55fbb7Slm66018 if (rv != 0) { 70020a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 70030a55fbb7Slm66018 return (EFAULT); 70040a55fbb7Slm66018 } 70050a55fbb7Slm66018 70060a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 70070a55fbb7Slm66018 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 70080a55fbb7Slm66018 } else { 70090a55fbb7Slm66018 vtp = tmp_mem; 70100a55fbb7Slm66018 } 70110a55fbb7Slm66018 70122f5224aeSachartre if (dir == VD_COPYOUT) { 70132f5224aeSachartre /* 70142f5224aeSachartre * The disk label may have changed. Revalidate the disk 70152f5224aeSachartre * geometry. This will also update the device nodes and 70162f5224aeSachartre * properties. 70172f5224aeSachartre */ 70182f5224aeSachartre vdc_validate(vdc); 70192f5224aeSachartre 70202f5224aeSachartre /* 70212f5224aeSachartre * We also need to keep track of the timestamp fields. 70222f5224aeSachartre */ 70232f5224aeSachartre for (i = 0; i < V_NUMPAR; i++) { 70242f5224aeSachartre vdc->vtoc->timestamp[i] = vtp->timestamp[i]; 70252f5224aeSachartre } 70262f5224aeSachartre 70272f5224aeSachartre return (0); 70282f5224aeSachartre } 70292f5224aeSachartre 70300a55fbb7Slm66018 VTOC2VD_VTOC(vtp, &vtvd); 70310a55fbb7Slm66018 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 70320a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 70330a55fbb7Slm66018 70340a55fbb7Slm66018 return (0); 70350a55fbb7Slm66018 } 70360a55fbb7Slm66018 70370a55fbb7Slm66018 /* 70380a55fbb7Slm66018 * Function: 70390a55fbb7Slm66018 * vdc_get_geom_convert() 70400a55fbb7Slm66018 * 70410a55fbb7Slm66018 * Description: 7042d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGGEOM, 7043d10e4ef2Snarayan * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 7044d10e4ef2Snarayan * defined in FWARC 2006/195 70450a55fbb7Slm66018 * 70460a55fbb7Slm66018 * Arguments: 7047d10e4ef2Snarayan * vdc - the vDisk client 70480a55fbb7Slm66018 * from - Buffer with data 70490a55fbb7Slm66018 * to - Buffer where data is to be copied to 70500a55fbb7Slm66018 * mode - flags passed to ioctl 70510a55fbb7Slm66018 * dir - direction of copy (in or out) 70520a55fbb7Slm66018 * 70530a55fbb7Slm66018 * Return Code: 70540a55fbb7Slm66018 * 0 - Success 70550a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 7056d10e4ef2Snarayan * EFAULT - ddi_copyout of data failed 70570a55fbb7Slm66018 */ 70580a55fbb7Slm66018 static int 7059d10e4ef2Snarayan vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 70600a55fbb7Slm66018 { 7061d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7062d10e4ef2Snarayan 70630a55fbb7Slm66018 struct dk_geom geom; 70640a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 70650a55fbb7Slm66018 int rv = 0; 70660a55fbb7Slm66018 70670a55fbb7Slm66018 if (dir != VD_COPYOUT) 70680a55fbb7Slm66018 return (0); /* nothing to do */ 70690a55fbb7Slm66018 70700a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 70710a55fbb7Slm66018 return (ENXIO); 70720a55fbb7Slm66018 70730a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 70740a55fbb7Slm66018 rv = ddi_copyout(&geom, to, copy_len, mode); 70750a55fbb7Slm66018 if (rv != 0) 70760a55fbb7Slm66018 rv = EFAULT; 70770a55fbb7Slm66018 70780a55fbb7Slm66018 return (rv); 70790a55fbb7Slm66018 } 70800a55fbb7Slm66018 70810a55fbb7Slm66018 /* 70820a55fbb7Slm66018 * Function: 70830a55fbb7Slm66018 * vdc_set_geom_convert() 70840a55fbb7Slm66018 * 70850a55fbb7Slm66018 * Description: 7086d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSGEOM 7087d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 70880a55fbb7Slm66018 * 70890a55fbb7Slm66018 * Arguments: 7090d10e4ef2Snarayan * vdc - the vDisk client 70910a55fbb7Slm66018 * from - Buffer with data 70920a55fbb7Slm66018 * to - Buffer where data is to be copied to 70930a55fbb7Slm66018 * mode - flags passed to ioctl 70940a55fbb7Slm66018 * dir - direction of copy (in or out) 70950a55fbb7Slm66018 * 70960a55fbb7Slm66018 * Return Code: 70970a55fbb7Slm66018 * 0 - Success 70980a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 70990a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 71000a55fbb7Slm66018 */ 71010a55fbb7Slm66018 static int 7102d10e4ef2Snarayan vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 71030a55fbb7Slm66018 { 7104d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7105d10e4ef2Snarayan 71060a55fbb7Slm66018 vd_geom_t vdgeom; 71070a55fbb7Slm66018 void *tmp_mem = NULL; 71080a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 71090a55fbb7Slm66018 int rv = 0; 71100a55fbb7Slm66018 71110a55fbb7Slm66018 if (dir != VD_COPYIN) 71120a55fbb7Slm66018 return (0); /* nothing to do */ 71130a55fbb7Slm66018 71140a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 71150a55fbb7Slm66018 return (ENXIO); 71160a55fbb7Slm66018 71170a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 71180a55fbb7Slm66018 71190a55fbb7Slm66018 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 71200a55fbb7Slm66018 if (rv != 0) { 71210a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 71220a55fbb7Slm66018 return (EFAULT); 71230a55fbb7Slm66018 } 71240a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 71250a55fbb7Slm66018 bcopy(&vdgeom, to, sizeof (vdgeom)); 71260a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 71270a55fbb7Slm66018 71280a55fbb7Slm66018 return (0); 71290a55fbb7Slm66018 } 71300a55fbb7Slm66018 71314bac2208Snarayan static int 71324bac2208Snarayan vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 71334bac2208Snarayan { 71344bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 71354bac2208Snarayan 71364bac2208Snarayan vd_efi_t *vd_efi; 71374bac2208Snarayan dk_efi_t dk_efi; 71384bac2208Snarayan int rv = 0; 71394bac2208Snarayan void *uaddr; 71404bac2208Snarayan 71414bac2208Snarayan if ((from == NULL) || (to == NULL)) 71424bac2208Snarayan return (ENXIO); 71434bac2208Snarayan 71444bac2208Snarayan if (dir == VD_COPYIN) { 71454bac2208Snarayan 71464bac2208Snarayan vd_efi = (vd_efi_t *)to; 71474bac2208Snarayan 71484bac2208Snarayan rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 71494bac2208Snarayan if (rv != 0) 71504bac2208Snarayan return (EFAULT); 71514bac2208Snarayan 71524bac2208Snarayan vd_efi->lba = dk_efi.dki_lba; 71534bac2208Snarayan vd_efi->length = dk_efi.dki_length; 71544bac2208Snarayan bzero(vd_efi->data, vd_efi->length); 71554bac2208Snarayan 71564bac2208Snarayan } else { 71574bac2208Snarayan 71584bac2208Snarayan rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 71594bac2208Snarayan if (rv != 0) 71604bac2208Snarayan return (EFAULT); 71614bac2208Snarayan 71624bac2208Snarayan uaddr = dk_efi.dki_data; 71634bac2208Snarayan 71644bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 71654bac2208Snarayan 71664bac2208Snarayan VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 71674bac2208Snarayan 71684bac2208Snarayan rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 71694bac2208Snarayan mode); 71704bac2208Snarayan if (rv != 0) 71714bac2208Snarayan return (EFAULT); 71724bac2208Snarayan 71734bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 71744bac2208Snarayan } 71754bac2208Snarayan 71764bac2208Snarayan return (0); 71774bac2208Snarayan } 71784bac2208Snarayan 71794bac2208Snarayan static int 71804bac2208Snarayan vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 71814bac2208Snarayan { 71824bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 71834bac2208Snarayan 71844bac2208Snarayan dk_efi_t dk_efi; 71854bac2208Snarayan void *uaddr; 71864bac2208Snarayan 71872f5224aeSachartre if (dir == VD_COPYOUT) { 71882f5224aeSachartre /* 71892f5224aeSachartre * The disk label may have changed. Revalidate the disk 71902f5224aeSachartre * geometry. This will also update the device nodes and 71912f5224aeSachartre * properties. 71922f5224aeSachartre */ 71932f5224aeSachartre vdc_validate(vdc); 71942f5224aeSachartre return (0); 71952f5224aeSachartre } 71964bac2208Snarayan 71974bac2208Snarayan if ((from == NULL) || (to == NULL)) 71984bac2208Snarayan return (ENXIO); 71994bac2208Snarayan 72004bac2208Snarayan if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 72014bac2208Snarayan return (EFAULT); 72024bac2208Snarayan 72034bac2208Snarayan uaddr = dk_efi.dki_data; 72044bac2208Snarayan 72054bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 72064bac2208Snarayan 72074bac2208Snarayan if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 72084bac2208Snarayan return (EFAULT); 72094bac2208Snarayan 72104bac2208Snarayan DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 72114bac2208Snarayan 72124bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 72134bac2208Snarayan 72144bac2208Snarayan return (0); 72154bac2208Snarayan } 72164bac2208Snarayan 721717cadca8Slm66018 721817cadca8Slm66018 /* -------------------------------------------------------------------------- */ 721917cadca8Slm66018 72200a55fbb7Slm66018 /* 72210a55fbb7Slm66018 * Function: 72221ae08745Sheppo * vdc_create_fake_geometry() 72231ae08745Sheppo * 72241ae08745Sheppo * Description: 722517cadca8Slm66018 * This routine fakes up the disk info needed for some DKIO ioctls such 722617cadca8Slm66018 * as DKIOCINFO and DKIOCGMEDIAINFO [just like lofi(7D) and ramdisk(7D) do] 72271ae08745Sheppo * 722817cadca8Slm66018 * Note: This function must not be called until the vDisk attributes have 722917cadca8Slm66018 * been exchanged as part of the handshake with the vDisk server. 72301ae08745Sheppo * 72311ae08745Sheppo * Arguments: 72321ae08745Sheppo * vdc - soft state pointer for this instance of the device driver. 72331ae08745Sheppo * 72341ae08745Sheppo * Return Code: 723578fcd0a1Sachartre * none. 72361ae08745Sheppo */ 723778fcd0a1Sachartre static void 72381ae08745Sheppo vdc_create_fake_geometry(vdc_t *vdc) 72391ae08745Sheppo { 72401ae08745Sheppo ASSERT(vdc != NULL); 724178fcd0a1Sachartre ASSERT(vdc->max_xfer_sz != 0); 72420d0c8d4bSnarayan 72430d0c8d4bSnarayan /* 72441ae08745Sheppo * DKIOCINFO support 72451ae08745Sheppo */ 724678fcd0a1Sachartre if (vdc->cinfo == NULL) 72471ae08745Sheppo vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 72481ae08745Sheppo 72491ae08745Sheppo (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 72501ae08745Sheppo (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 72518e6a2a04Slm66018 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 72528e6a2a04Slm66018 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 72532f5224aeSachartre 725487a7269eSachartre /* 72552f5224aeSachartre * We set the controller type to DKC_SCSI_CCS only if the VD_OP_SCSICMD 72562f5224aeSachartre * operation is supported, otherwise the controller type is DKC_DIRECT. 72572f5224aeSachartre * Version 1.0 does not support the VD_OP_SCSICMD operation, so the 72582f5224aeSachartre * controller type is always DKC_DIRECT in that case. 72592f5224aeSachartre * 726017cadca8Slm66018 * If the virtual disk is backed by a physical CD/DVD device or 726117cadca8Slm66018 * an ISO image, modify the controller type to indicate this 726287a7269eSachartre */ 726317cadca8Slm66018 switch (vdc->vdisk_media) { 726417cadca8Slm66018 case VD_MEDIA_CD: 726517cadca8Slm66018 case VD_MEDIA_DVD: 726617cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_CDROM; 726717cadca8Slm66018 break; 726817cadca8Slm66018 case VD_MEDIA_FIXED: 72692f5224aeSachartre if (VD_OP_SUPPORTED(vdc->operations, VD_OP_SCSICMD)) 72702f5224aeSachartre vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 72712f5224aeSachartre else 727287a7269eSachartre vdc->cinfo->dki_ctype = DKC_DIRECT; 727317cadca8Slm66018 break; 727417cadca8Slm66018 default: 727517cadca8Slm66018 /* in the case of v1.0 we default to a fixed disk */ 727617cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_DIRECT; 727717cadca8Slm66018 break; 727817cadca8Slm66018 } 72791ae08745Sheppo vdc->cinfo->dki_flags = DKI_FMTVOL; 72801ae08745Sheppo vdc->cinfo->dki_cnum = 0; 72811ae08745Sheppo vdc->cinfo->dki_addr = 0; 72821ae08745Sheppo vdc->cinfo->dki_space = 0; 72831ae08745Sheppo vdc->cinfo->dki_prio = 0; 72841ae08745Sheppo vdc->cinfo->dki_vec = 0; 72851ae08745Sheppo vdc->cinfo->dki_unit = vdc->instance; 72861ae08745Sheppo vdc->cinfo->dki_slave = 0; 72871ae08745Sheppo /* 72881ae08745Sheppo * The partition number will be created on the fly depending on the 72891ae08745Sheppo * actual slice (i.e. minor node) that is used to request the data. 72901ae08745Sheppo */ 72911ae08745Sheppo vdc->cinfo->dki_partition = 0; 72921ae08745Sheppo 72931ae08745Sheppo /* 72941ae08745Sheppo * DKIOCGMEDIAINFO support 72951ae08745Sheppo */ 72960a55fbb7Slm66018 if (vdc->minfo == NULL) 72971ae08745Sheppo vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 729817cadca8Slm66018 729917cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) { 730017cadca8Slm66018 vdc->minfo->dki_media_type = 730117cadca8Slm66018 VD_MEDIATYPE2DK_MEDIATYPE(vdc->vdisk_media); 730217cadca8Slm66018 } else { 73031ae08745Sheppo vdc->minfo->dki_media_type = DK_FIXED_DISK; 730417cadca8Slm66018 } 730517cadca8Slm66018 73064bac2208Snarayan vdc->minfo->dki_capacity = vdc->vdisk_size; 730717cadca8Slm66018 vdc->minfo->dki_lbsize = vdc->block_size; 730878fcd0a1Sachartre } 73091ae08745Sheppo 731078fcd0a1Sachartre static ushort_t 731178fcd0a1Sachartre vdc_lbl2cksum(struct dk_label *label) 731278fcd0a1Sachartre { 731378fcd0a1Sachartre int count; 731478fcd0a1Sachartre ushort_t sum, *sp; 731578fcd0a1Sachartre 731678fcd0a1Sachartre count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 731778fcd0a1Sachartre sp = (ushort_t *)label; 731878fcd0a1Sachartre sum = 0; 731978fcd0a1Sachartre while (count--) { 732078fcd0a1Sachartre sum ^= *sp++; 732178fcd0a1Sachartre } 732278fcd0a1Sachartre 732378fcd0a1Sachartre return (sum); 73240a55fbb7Slm66018 } 73250a55fbb7Slm66018 73260a55fbb7Slm66018 /* 73270a55fbb7Slm66018 * Function: 732878fcd0a1Sachartre * vdc_validate_geometry 73290a55fbb7Slm66018 * 73300a55fbb7Slm66018 * Description: 733178fcd0a1Sachartre * This routine discovers the label and geometry of the disk. It stores 733278fcd0a1Sachartre * the disk label and related information in the vdc structure. If it 733378fcd0a1Sachartre * fails to validate the geometry or to discover the disk label then 733478fcd0a1Sachartre * the label is marked as unknown (VD_DISK_LABEL_UNK). 73350a55fbb7Slm66018 * 73360a55fbb7Slm66018 * Arguments: 73370a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 73380a55fbb7Slm66018 * 73390a55fbb7Slm66018 * Return Code: 734078fcd0a1Sachartre * 0 - success. 734178fcd0a1Sachartre * EINVAL - unknown disk label. 734278fcd0a1Sachartre * ENOTSUP - geometry not applicable (EFI label). 734378fcd0a1Sachartre * EIO - error accessing the disk. 73440a55fbb7Slm66018 */ 73450a55fbb7Slm66018 static int 734678fcd0a1Sachartre vdc_validate_geometry(vdc_t *vdc) 73470a55fbb7Slm66018 { 7348d10e4ef2Snarayan buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 73490a55fbb7Slm66018 dev_t dev; 73502f5224aeSachartre int rv, rval; 735178fcd0a1Sachartre struct dk_label label; 735278fcd0a1Sachartre struct dk_geom geom; 735378fcd0a1Sachartre struct vtoc vtoc; 7354edcc0754Sachartre efi_gpt_t *gpt; 7355edcc0754Sachartre efi_gpe_t *gpe; 7356edcc0754Sachartre vd_efi_dev_t edev; 73570a55fbb7Slm66018 73580a55fbb7Slm66018 ASSERT(vdc != NULL); 735978fcd0a1Sachartre ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 736078fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 73610a55fbb7Slm66018 736278fcd0a1Sachartre mutex_exit(&vdc->lock); 73630a55fbb7Slm66018 73640a55fbb7Slm66018 dev = makedevice(ddi_driver_major(vdc->dip), 73650a55fbb7Slm66018 VD_MAKE_DEV(vdc->instance, 0)); 73664bac2208Snarayan 73672f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL, &rval); 736878fcd0a1Sachartre if (rv == 0) 73692f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, 73702f5224aeSachartre FKIOCTL, &rval); 73710d0c8d4bSnarayan 73724bac2208Snarayan if (rv == ENOTSUP) { 73734bac2208Snarayan /* 73744bac2208Snarayan * If the device does not support VTOC then we try 73754bac2208Snarayan * to read an EFI label. 7376edcc0754Sachartre * 7377edcc0754Sachartre * We need to know the block size and the disk size to 7378edcc0754Sachartre * be able to read an EFI label. 73794bac2208Snarayan */ 7380edcc0754Sachartre if (vdc->vdisk_size == 0) { 7381edcc0754Sachartre if ((rv = vdc_check_capacity(vdc)) != 0) { 7382edcc0754Sachartre mutex_enter(&vdc->lock); 7383edcc0754Sachartre vdc_store_label_unk(vdc); 7384edcc0754Sachartre return (rv); 7385edcc0754Sachartre } 7386edcc0754Sachartre } 73874bac2208Snarayan 7388edcc0754Sachartre VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 7389edcc0754Sachartre 7390edcc0754Sachartre rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe); 73914bac2208Snarayan 73924bac2208Snarayan if (rv) { 73933af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 73944bac2208Snarayan vdc->instance, rv); 739578fcd0a1Sachartre mutex_enter(&vdc->lock); 739678fcd0a1Sachartre vdc_store_label_unk(vdc); 739778fcd0a1Sachartre return (EIO); 739878fcd0a1Sachartre } 739978fcd0a1Sachartre 740078fcd0a1Sachartre mutex_enter(&vdc->lock); 7401edcc0754Sachartre vdc_store_label_efi(vdc, gpt, gpe); 7402edcc0754Sachartre vd_efi_free(&edev, gpt, gpe); 740378fcd0a1Sachartre return (ENOTSUP); 740478fcd0a1Sachartre } 740578fcd0a1Sachartre 740678fcd0a1Sachartre if (rv != 0) { 740778fcd0a1Sachartre DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 740878fcd0a1Sachartre vdc->instance, rv); 740978fcd0a1Sachartre mutex_enter(&vdc->lock); 741078fcd0a1Sachartre vdc_store_label_unk(vdc); 741178fcd0a1Sachartre if (rv != EINVAL) 741278fcd0a1Sachartre rv = EIO; 74134bac2208Snarayan return (rv); 74144bac2208Snarayan } 74154bac2208Snarayan 741678fcd0a1Sachartre /* check that geometry and vtoc are valid */ 741778fcd0a1Sachartre if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 741878fcd0a1Sachartre vtoc.v_sanity != VTOC_SANE) { 741978fcd0a1Sachartre mutex_enter(&vdc->lock); 742078fcd0a1Sachartre vdc_store_label_unk(vdc); 742178fcd0a1Sachartre return (EINVAL); 742278fcd0a1Sachartre } 74234bac2208Snarayan 742478fcd0a1Sachartre /* 742578fcd0a1Sachartre * We have a disk and a valid VTOC. However this does not mean 742678fcd0a1Sachartre * that the disk currently have a VTOC label. The returned VTOC may 742778fcd0a1Sachartre * be a default VTOC to be used for configuring the disk (this is 742878fcd0a1Sachartre * what is done for disk image). So we read the label from the 742978fcd0a1Sachartre * beginning of the disk to ensure we really have a VTOC label. 743078fcd0a1Sachartre * 743178fcd0a1Sachartre * FUTURE: This could be the default way for reading the VTOC 743278fcd0a1Sachartre * from the disk as opposed to sending the VD_OP_GET_VTOC 743378fcd0a1Sachartre * to the server. This will be the default if vdc is implemented 743478fcd0a1Sachartre * ontop of cmlb. 743578fcd0a1Sachartre */ 743678fcd0a1Sachartre 743778fcd0a1Sachartre /* 743878fcd0a1Sachartre * Single slice disk does not support read using an absolute disk 743978fcd0a1Sachartre * offset so we just rely on the DKIOCGVTOC ioctl in that case. 744078fcd0a1Sachartre */ 744178fcd0a1Sachartre if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 744278fcd0a1Sachartre mutex_enter(&vdc->lock); 744378fcd0a1Sachartre if (vtoc.v_nparts != 1) { 744478fcd0a1Sachartre vdc_store_label_unk(vdc); 744578fcd0a1Sachartre return (EINVAL); 744678fcd0a1Sachartre } 744778fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 74484bac2208Snarayan return (0); 74494bac2208Snarayan } 74504bac2208Snarayan 745178fcd0a1Sachartre if (vtoc.v_nparts != V_NUMPAR) { 745278fcd0a1Sachartre mutex_enter(&vdc->lock); 745378fcd0a1Sachartre vdc_store_label_unk(vdc); 745478fcd0a1Sachartre return (EINVAL); 74550a55fbb7Slm66018 } 7456d10e4ef2Snarayan 7457d10e4ef2Snarayan /* 7458d10e4ef2Snarayan * Read disk label from start of disk 7459d10e4ef2Snarayan */ 7460d10e4ef2Snarayan buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 7461d10e4ef2Snarayan bioinit(buf); 746278fcd0a1Sachartre buf->b_un.b_addr = (caddr_t)&label; 7463d10e4ef2Snarayan buf->b_bcount = DK_LABEL_SIZE; 7464d10e4ef2Snarayan buf->b_flags = B_BUSY | B_READ; 746517cadca8Slm66018 buf->b_dev = cmpdev(dev); 746678fcd0a1Sachartre rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label, 746778fcd0a1Sachartre DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); 74683af08d82Slm66018 if (rv) { 74693af08d82Slm66018 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 74703af08d82Slm66018 vdc->instance); 747178fcd0a1Sachartre } else { 7472d10e4ef2Snarayan rv = biowait(buf); 7473d10e4ef2Snarayan biofini(buf); 747478fcd0a1Sachartre } 7475d10e4ef2Snarayan kmem_free(buf, sizeof (buf_t)); 74760a55fbb7Slm66018 747778fcd0a1Sachartre if (rv != 0 || label.dkl_magic != DKL_MAGIC || 747878fcd0a1Sachartre label.dkl_cksum != vdc_lbl2cksum(&label)) { 747978fcd0a1Sachartre DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 748078fcd0a1Sachartre vdc->instance); 748178fcd0a1Sachartre mutex_enter(&vdc->lock); 748278fcd0a1Sachartre vdc_store_label_unk(vdc); 748378fcd0a1Sachartre return (EINVAL); 748478fcd0a1Sachartre } 748578fcd0a1Sachartre 748678fcd0a1Sachartre mutex_enter(&vdc->lock); 748778fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 748878fcd0a1Sachartre return (0); 748978fcd0a1Sachartre } 749078fcd0a1Sachartre 749178fcd0a1Sachartre /* 749278fcd0a1Sachartre * Function: 749378fcd0a1Sachartre * vdc_validate 749478fcd0a1Sachartre * 749578fcd0a1Sachartre * Description: 749678fcd0a1Sachartre * This routine discovers the label of the disk and create the 749778fcd0a1Sachartre * appropriate device nodes if the label has changed. 749878fcd0a1Sachartre * 749978fcd0a1Sachartre * Arguments: 750078fcd0a1Sachartre * vdc - soft state pointer for this instance of the device driver. 750178fcd0a1Sachartre * 750278fcd0a1Sachartre * Return Code: 750378fcd0a1Sachartre * none. 750478fcd0a1Sachartre */ 750578fcd0a1Sachartre static void 750678fcd0a1Sachartre vdc_validate(vdc_t *vdc) 750778fcd0a1Sachartre { 750878fcd0a1Sachartre vd_disk_label_t old_label; 7509edcc0754Sachartre vd_slice_t old_slice[V_NUMPAR]; 751078fcd0a1Sachartre int rv; 751178fcd0a1Sachartre 751278fcd0a1Sachartre ASSERT(!MUTEX_HELD(&vdc->lock)); 751378fcd0a1Sachartre 751478fcd0a1Sachartre mutex_enter(&vdc->lock); 751578fcd0a1Sachartre 751678fcd0a1Sachartre /* save the current label and vtoc */ 751778fcd0a1Sachartre old_label = vdc->vdisk_label; 7518edcc0754Sachartre bcopy(vdc->slice, &old_slice, sizeof (vd_slice_t) * V_NUMPAR); 751978fcd0a1Sachartre 752078fcd0a1Sachartre /* check the geometry */ 752178fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 752278fcd0a1Sachartre 752378fcd0a1Sachartre /* if the disk label has changed, update device nodes */ 752478fcd0a1Sachartre if (vdc->vdisk_label != old_label) { 752578fcd0a1Sachartre 752678fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 752778fcd0a1Sachartre rv = vdc_create_device_nodes_efi(vdc); 752878fcd0a1Sachartre else 752978fcd0a1Sachartre rv = vdc_create_device_nodes_vtoc(vdc); 753078fcd0a1Sachartre 753178fcd0a1Sachartre if (rv != 0) { 753278fcd0a1Sachartre DMSG(vdc, 0, "![%d] Failed to update device nodes", 753378fcd0a1Sachartre vdc->instance); 753478fcd0a1Sachartre } 753578fcd0a1Sachartre } 753678fcd0a1Sachartre 753778fcd0a1Sachartre /* if the vtoc has changed, update device nodes properties */ 7538edcc0754Sachartre if (bcmp(vdc->slice, &old_slice, sizeof (vd_slice_t) * V_NUMPAR) != 0) { 753978fcd0a1Sachartre 754078fcd0a1Sachartre if (vdc_create_device_nodes_props(vdc) != 0) { 754178fcd0a1Sachartre DMSG(vdc, 0, "![%d] Failed to update device nodes" 754278fcd0a1Sachartre " properties", vdc->instance); 754378fcd0a1Sachartre } 754478fcd0a1Sachartre } 754578fcd0a1Sachartre 754678fcd0a1Sachartre mutex_exit(&vdc->lock); 754778fcd0a1Sachartre } 754878fcd0a1Sachartre 754978fcd0a1Sachartre static void 755078fcd0a1Sachartre vdc_validate_task(void *arg) 755178fcd0a1Sachartre { 755278fcd0a1Sachartre vdc_t *vdc = (vdc_t *)arg; 755378fcd0a1Sachartre 755478fcd0a1Sachartre vdc_validate(vdc); 755578fcd0a1Sachartre 755678fcd0a1Sachartre mutex_enter(&vdc->lock); 755778fcd0a1Sachartre ASSERT(vdc->validate_pending > 0); 755878fcd0a1Sachartre vdc->validate_pending--; 755978fcd0a1Sachartre mutex_exit(&vdc->lock); 75601ae08745Sheppo } 75614bac2208Snarayan 75624bac2208Snarayan /* 75634bac2208Snarayan * Function: 75644bac2208Snarayan * vdc_setup_devid() 75654bac2208Snarayan * 75664bac2208Snarayan * Description: 75674bac2208Snarayan * This routine discovers the devid of a vDisk. It requests the devid of 75684bac2208Snarayan * the underlying device from the vDisk server, builds an encapsulated 75694bac2208Snarayan * devid based on the retrieved devid and registers that new devid to 75704bac2208Snarayan * the vDisk. 75714bac2208Snarayan * 75724bac2208Snarayan * Arguments: 75734bac2208Snarayan * vdc - soft state pointer for this instance of the device driver. 75744bac2208Snarayan * 75754bac2208Snarayan * Return Code: 75764bac2208Snarayan * 0 - A devid was succesfully registered for the vDisk 75774bac2208Snarayan */ 75784bac2208Snarayan static int 75794bac2208Snarayan vdc_setup_devid(vdc_t *vdc) 75804bac2208Snarayan { 75814bac2208Snarayan int rv; 75824bac2208Snarayan vd_devid_t *vd_devid; 75834bac2208Snarayan size_t bufsize, bufid_len; 75844bac2208Snarayan 75854bac2208Snarayan /* 75864bac2208Snarayan * At first sight, we don't know the size of the devid that the 75874bac2208Snarayan * server will return but this size will be encoded into the 75884bac2208Snarayan * reply. So we do a first request using a default size then we 75894bac2208Snarayan * check if this size was large enough. If not then we do a second 75904bac2208Snarayan * request with the correct size returned by the server. Note that 75914bac2208Snarayan * ldc requires size to be 8-byte aligned. 75924bac2208Snarayan */ 75934bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 75944bac2208Snarayan sizeof (uint64_t)); 75954bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 75964bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 75974bac2208Snarayan 75983af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 75992f5224aeSachartre bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir, B_TRUE); 76003af08d82Slm66018 76013af08d82Slm66018 DMSG(vdc, 2, "sync_op returned %d\n", rv); 76023af08d82Slm66018 76034bac2208Snarayan if (rv) { 76044bac2208Snarayan kmem_free(vd_devid, bufsize); 76054bac2208Snarayan return (rv); 76064bac2208Snarayan } 76074bac2208Snarayan 76084bac2208Snarayan if (vd_devid->length > bufid_len) { 76094bac2208Snarayan /* 76104bac2208Snarayan * The returned devid is larger than the buffer used. Try again 76114bac2208Snarayan * with a buffer with the right size. 76124bac2208Snarayan */ 76134bac2208Snarayan kmem_free(vd_devid, bufsize); 76144bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 76154bac2208Snarayan sizeof (uint64_t)); 76164bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 76174bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 76184bac2208Snarayan 76193af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 76203af08d82Slm66018 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 76212f5224aeSachartre VIO_both_dir, B_TRUE); 76223af08d82Slm66018 76234bac2208Snarayan if (rv) { 76244bac2208Snarayan kmem_free(vd_devid, bufsize); 76254bac2208Snarayan return (rv); 76264bac2208Snarayan } 76274bac2208Snarayan } 76284bac2208Snarayan 76294bac2208Snarayan /* 76304bac2208Snarayan * The virtual disk should have the same device id as the one associated 76314bac2208Snarayan * with the physical disk it is mapped on, otherwise sharing a disk 76324bac2208Snarayan * between a LDom and a non-LDom may not work (for example for a shared 76334bac2208Snarayan * SVM disk set). 76344bac2208Snarayan * 76354bac2208Snarayan * The DDI framework does not allow creating a device id with any 76364bac2208Snarayan * type so we first create a device id of type DEVID_ENCAP and then 76374bac2208Snarayan * we restore the orignal type of the physical device. 76384bac2208Snarayan */ 76394bac2208Snarayan 76403af08d82Slm66018 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 76413af08d82Slm66018 76424bac2208Snarayan /* build an encapsulated devid based on the returned devid */ 76434bac2208Snarayan if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 76444bac2208Snarayan vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 76453af08d82Slm66018 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 76464bac2208Snarayan kmem_free(vd_devid, bufsize); 76474bac2208Snarayan return (1); 76484bac2208Snarayan } 76494bac2208Snarayan 76504bac2208Snarayan DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 76514bac2208Snarayan 76524bac2208Snarayan ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 76534bac2208Snarayan 76544bac2208Snarayan kmem_free(vd_devid, bufsize); 76554bac2208Snarayan 76564bac2208Snarayan if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 76573af08d82Slm66018 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 76584bac2208Snarayan return (1); 76594bac2208Snarayan } 76604bac2208Snarayan 76614bac2208Snarayan return (0); 76624bac2208Snarayan } 76634bac2208Snarayan 76644bac2208Snarayan static void 7665edcc0754Sachartre vdc_store_label_efi(vdc_t *vdc, efi_gpt_t *gpt, efi_gpe_t *gpe) 76664bac2208Snarayan { 7667edcc0754Sachartre int i, nparts; 76684bac2208Snarayan 766978fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 767078fcd0a1Sachartre 767178fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_EFI; 7672edcc0754Sachartre bzero(vdc->vtoc, sizeof (struct vtoc)); 767378fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 7674edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 7675edcc0754Sachartre 7676edcc0754Sachartre nparts = gpt->efi_gpt_NumberOfPartitionEntries; 7677edcc0754Sachartre 7678edcc0754Sachartre for (i = 0; i < nparts && i < VD_EFI_WD_SLICE; i++) { 7679edcc0754Sachartre 7680edcc0754Sachartre if (gpe[i].efi_gpe_StartingLBA == 0 || 7681edcc0754Sachartre gpe[i].efi_gpe_EndingLBA == 0) { 7682edcc0754Sachartre continue; 76834bac2208Snarayan } 7684edcc0754Sachartre 7685edcc0754Sachartre vdc->slice[i].start = gpe[i].efi_gpe_StartingLBA; 7686edcc0754Sachartre vdc->slice[i].nblocks = gpe[i].efi_gpe_EndingLBA - 7687edcc0754Sachartre gpe[i].efi_gpe_StartingLBA + 1; 7688edcc0754Sachartre } 7689edcc0754Sachartre 7690edcc0754Sachartre ASSERT(vdc->vdisk_size != 0); 7691edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].start = 0; 7692edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].nblocks = vdc->vdisk_size; 7693edcc0754Sachartre 76944bac2208Snarayan } 769578fcd0a1Sachartre 769678fcd0a1Sachartre static void 769778fcd0a1Sachartre vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct vtoc *vtoc) 769878fcd0a1Sachartre { 7699edcc0754Sachartre int i; 7700edcc0754Sachartre 770178fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 7702edcc0754Sachartre ASSERT(vdc->block_size == vtoc->v_sectorsz); 770378fcd0a1Sachartre 770478fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_VTOC; 770578fcd0a1Sachartre bcopy(vtoc, vdc->vtoc, sizeof (struct vtoc)); 770678fcd0a1Sachartre bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 7707edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 7708edcc0754Sachartre 7709edcc0754Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 7710edcc0754Sachartre vdc->slice[i].start = vtoc->v_part[i].p_start; 7711edcc0754Sachartre vdc->slice[i].nblocks = vtoc->v_part[i].p_size; 7712edcc0754Sachartre } 771378fcd0a1Sachartre } 771478fcd0a1Sachartre 771578fcd0a1Sachartre static void 771678fcd0a1Sachartre vdc_store_label_unk(vdc_t *vdc) 771778fcd0a1Sachartre { 771878fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 771978fcd0a1Sachartre 772078fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_UNK; 772178fcd0a1Sachartre bzero(vdc->vtoc, sizeof (struct vtoc)); 772278fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 7723edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 772478fcd0a1Sachartre } 7725