11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23edcc0754Sachartre * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo 281ae08745Sheppo /* 291ae08745Sheppo * LDoms virtual disk client (vdc) device driver 301ae08745Sheppo * 311ae08745Sheppo * This driver runs on a guest logical domain and communicates with the virtual 321ae08745Sheppo * disk server (vds) driver running on the service domain which is exporting 331ae08745Sheppo * virtualized "disks" to the guest logical domain. 341ae08745Sheppo * 351ae08745Sheppo * The driver can be divided into four sections: 361ae08745Sheppo * 371ae08745Sheppo * 1) generic device driver housekeeping 381ae08745Sheppo * _init, _fini, attach, detach, ops structures, etc. 391ae08745Sheppo * 401ae08745Sheppo * 2) communication channel setup 411ae08745Sheppo * Setup the communications link over the LDC channel that vdc uses to 421ae08745Sheppo * talk to the vDisk server. Initialise the descriptor ring which 431ae08745Sheppo * allows the LDC clients to transfer data via memory mappings. 441ae08745Sheppo * 451ae08745Sheppo * 3) Support exported to upper layers (filesystems, etc) 461ae08745Sheppo * The upper layers call into vdc via strategy(9E) and DKIO(7I) 471ae08745Sheppo * ioctl calls. vdc will copy the data to be written to the descriptor 481ae08745Sheppo * ring or maps the buffer to store the data read by the vDisk 491ae08745Sheppo * server into the descriptor ring. It then sends a message to the 501ae08745Sheppo * vDisk server requesting it to complete the operation. 511ae08745Sheppo * 521ae08745Sheppo * 4) Handling responses from vDisk server. 531ae08745Sheppo * The vDisk server will ACK some or all of the messages vdc sends to it 541ae08745Sheppo * (this is configured during the handshake). Upon receipt of an ACK 551ae08745Sheppo * vdc will check the descriptor ring and signal to the upper layer 561ae08745Sheppo * code waiting on the IO. 571ae08745Sheppo */ 581ae08745Sheppo 59e1ebb9ecSlm66018 #include <sys/atomic.h> 601ae08745Sheppo #include <sys/conf.h> 611ae08745Sheppo #include <sys/disp.h> 621ae08745Sheppo #include <sys/ddi.h> 631ae08745Sheppo #include <sys/dkio.h> 641ae08745Sheppo #include <sys/efi_partition.h> 651ae08745Sheppo #include <sys/fcntl.h> 661ae08745Sheppo #include <sys/file.h> 67366a92acSlm66018 #include <sys/kstat.h> 681ae08745Sheppo #include <sys/mach_descrip.h> 691ae08745Sheppo #include <sys/modctl.h> 701ae08745Sheppo #include <sys/mdeg.h> 711ae08745Sheppo #include <sys/note.h> 721ae08745Sheppo #include <sys/open.h> 73d10e4ef2Snarayan #include <sys/sdt.h> 741ae08745Sheppo #include <sys/stat.h> 751ae08745Sheppo #include <sys/sunddi.h> 761ae08745Sheppo #include <sys/types.h> 771ae08745Sheppo #include <sys/promif.h> 782f5224aeSachartre #include <sys/var.h> 791ae08745Sheppo #include <sys/vtoc.h> 801ae08745Sheppo #include <sys/archsystm.h> 811ae08745Sheppo #include <sys/sysmacros.h> 821ae08745Sheppo 831ae08745Sheppo #include <sys/cdio.h> 841ae08745Sheppo #include <sys/dktp/fdisk.h> 8587a7269eSachartre #include <sys/dktp/dadkio.h> 862f5224aeSachartre #include <sys/mhd.h> 871ae08745Sheppo #include <sys/scsi/generic/sense.h> 882f5224aeSachartre #include <sys/scsi/impl/uscsi.h> 892f5224aeSachartre #include <sys/scsi/impl/services.h> 902f5224aeSachartre #include <sys/scsi/targets/sddef.h> 911ae08745Sheppo 921ae08745Sheppo #include <sys/ldoms.h> 931ae08745Sheppo #include <sys/ldc.h> 941ae08745Sheppo #include <sys/vio_common.h> 951ae08745Sheppo #include <sys/vio_mailbox.h> 9617cadca8Slm66018 #include <sys/vio_util.h> 971ae08745Sheppo #include <sys/vdsk_common.h> 981ae08745Sheppo #include <sys/vdsk_mailbox.h> 991ae08745Sheppo #include <sys/vdc.h> 1001ae08745Sheppo 1011ae08745Sheppo /* 1021ae08745Sheppo * function prototypes 1031ae08745Sheppo */ 1041ae08745Sheppo 1051ae08745Sheppo /* standard driver functions */ 1061ae08745Sheppo static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 1071ae08745Sheppo static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 1081ae08745Sheppo static int vdc_strategy(struct buf *buf); 1091ae08745Sheppo static int vdc_print(dev_t dev, char *str); 1101ae08745Sheppo static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 1111ae08745Sheppo static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 1121ae08745Sheppo static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 1131ae08745Sheppo static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1141ae08745Sheppo cred_t *credp, int *rvalp); 1151ae08745Sheppo static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 1161ae08745Sheppo static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 1171ae08745Sheppo 1181ae08745Sheppo static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 1191ae08745Sheppo void *arg, void **resultp); 1201ae08745Sheppo static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 1211ae08745Sheppo static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 1225b98b509Sachartre static int vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1235b98b509Sachartre int mod_flags, char *name, caddr_t valuep, int *lengthp); 1241ae08745Sheppo 1251ae08745Sheppo /* setup */ 1260d0c8d4bSnarayan static void vdc_min(struct buf *bufp); 1270a55fbb7Slm66018 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 1288cd10891Snarayan static int vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr); 1291ae08745Sheppo static int vdc_start_ldc_connection(vdc_t *vdc); 1301ae08745Sheppo static int vdc_create_device_nodes(vdc_t *vdc); 1314bac2208Snarayan static int vdc_create_device_nodes_efi(vdc_t *vdc); 1324bac2208Snarayan static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 133366a92acSlm66018 static void vdc_create_io_kstats(vdc_t *vdc); 134366a92acSlm66018 static void vdc_create_err_kstats(vdc_t *vdc); 135366a92acSlm66018 static void vdc_set_err_kstats(vdc_t *vdc); 136655fd6a9Sachartre static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 1378cd10891Snarayan mde_cookie_t *vd_nodep); 1388cd10891Snarayan static int vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep); 1398cd10891Snarayan static void vdc_fini_ports(vdc_t *vdc); 1408cd10891Snarayan static void vdc_switch_server(vdc_t *vdcp); 1410a55fbb7Slm66018 static int vdc_do_ldc_up(vdc_t *vdc); 1428cd10891Snarayan static void vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr); 1431ae08745Sheppo static int vdc_init_descriptor_ring(vdc_t *vdc); 1441ae08745Sheppo static void vdc_destroy_descriptor_ring(vdc_t *vdc); 1454bac2208Snarayan static int vdc_setup_devid(vdc_t *vdc); 146edcc0754Sachartre static void vdc_store_label_efi(vdc_t *, efi_gpt_t *, efi_gpe_t *); 14778fcd0a1Sachartre static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, struct vtoc *); 14878fcd0a1Sachartre static void vdc_store_label_unk(vdc_t *vdc); 14978fcd0a1Sachartre static boolean_t vdc_is_opened(vdc_t *vdc); 150*de3a5331SRamesh Chitrothu static void vdc_update_size(vdc_t *vdc, size_t, size_t, size_t); 1511ae08745Sheppo 1521ae08745Sheppo /* handshake with vds */ 1530a55fbb7Slm66018 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 1543af08d82Slm66018 static int vdc_ver_negotiation(vdc_t *vdcp); 1551ae08745Sheppo static int vdc_init_attr_negotiation(vdc_t *vdc); 1563af08d82Slm66018 static int vdc_attr_negotiation(vdc_t *vdcp); 1571ae08745Sheppo static int vdc_init_dring_negotiate(vdc_t *vdc); 1583af08d82Slm66018 static int vdc_dring_negotiation(vdc_t *vdcp); 1593af08d82Slm66018 static int vdc_send_rdx(vdc_t *vdcp); 1603af08d82Slm66018 static int vdc_rdx_exchange(vdc_t *vdcp); 1610a55fbb7Slm66018 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 1621ae08745Sheppo 1630a55fbb7Slm66018 /* processing incoming messages from vDisk server */ 1641ae08745Sheppo static void vdc_process_msg_thread(vdc_t *vdc); 1653af08d82Slm66018 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 1663af08d82Slm66018 1670a55fbb7Slm66018 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 1683af08d82Slm66018 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 1690a55fbb7Slm66018 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 1700a55fbb7Slm66018 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 1710a55fbb7Slm66018 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 1723af08d82Slm66018 static int vdc_send_request(vdc_t *vdcp, int operation, 1733af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 1743af08d82Slm66018 int cb_type, void *cb_arg, vio_desc_direction_t dir); 1753af08d82Slm66018 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 1763af08d82Slm66018 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 1773af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 1783af08d82Slm66018 int cb_type, void *cb_arg, vio_desc_direction_t dir); 1792f5224aeSachartre static int vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, 1802f5224aeSachartre size_t nbytes, int slice, diskaddr_t offset, int cb_type, 1812f5224aeSachartre void *cb_arg, vio_desc_direction_t dir, boolean_t); 1823af08d82Slm66018 1833af08d82Slm66018 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 1843c2ebf09Sachartre static int vdc_drain_response(vdc_t *vdcp, struct buf *buf); 1851ae08745Sheppo static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 1863af08d82Slm66018 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 187e1ebb9ecSlm66018 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 1881ae08745Sheppo 1891ae08745Sheppo /* dkio */ 1902f5224aeSachartre static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, 1912f5224aeSachartre int *rvalp); 192edcc0754Sachartre static int vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg); 19378fcd0a1Sachartre static void vdc_create_fake_geometry(vdc_t *vdc); 19478fcd0a1Sachartre static int vdc_validate_geometry(vdc_t *vdc); 19578fcd0a1Sachartre static void vdc_validate(vdc_t *vdc); 19678fcd0a1Sachartre static void vdc_validate_task(void *arg); 197d10e4ef2Snarayan static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 198d10e4ef2Snarayan int mode, int dir); 1994bac2208Snarayan static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 2004bac2208Snarayan int mode, int dir); 2014bac2208Snarayan static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 2024bac2208Snarayan int mode, int dir); 203d10e4ef2Snarayan static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 204d10e4ef2Snarayan int mode, int dir); 205d10e4ef2Snarayan static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 206d10e4ef2Snarayan int mode, int dir); 207d10e4ef2Snarayan static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 208d10e4ef2Snarayan int mode, int dir); 209d10e4ef2Snarayan static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 210d10e4ef2Snarayan int mode, int dir); 2114bac2208Snarayan static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 2124bac2208Snarayan int mode, int dir); 2134bac2208Snarayan static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 2144bac2208Snarayan int mode, int dir); 2151ae08745Sheppo 2162f5224aeSachartre static void vdc_ownership_update(vdc_t *vdc, int ownership_flags); 2172f5224aeSachartre static int vdc_access_set(vdc_t *vdc, uint64_t flags, int mode); 2182f5224aeSachartre static vdc_io_t *vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf); 2192f5224aeSachartre static int vdc_failfast_check_resv(vdc_t *vdc); 2202f5224aeSachartre 2211ae08745Sheppo /* 2221ae08745Sheppo * Module variables 2231ae08745Sheppo */ 224e1ebb9ecSlm66018 225e1ebb9ecSlm66018 /* 226e1ebb9ecSlm66018 * Tunable variables to control how long vdc waits before timing out on 227e1ebb9ecSlm66018 * various operations 228e1ebb9ecSlm66018 */ 2293c96341aSnarayan static int vdc_hshake_retries = 3; 230e1ebb9ecSlm66018 231655fd6a9Sachartre static int vdc_timeout = 0; /* units: seconds */ 2328cd10891Snarayan static int vdc_ldcup_timeout = 1; /* units: seconds */ 233655fd6a9Sachartre 2343af08d82Slm66018 static uint64_t vdc_hz_min_ldc_delay; 2353af08d82Slm66018 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 2363af08d82Slm66018 static uint64_t vdc_hz_max_ldc_delay; 2373af08d82Slm66018 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 2383af08d82Slm66018 2393af08d82Slm66018 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 2403af08d82Slm66018 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 241e1ebb9ecSlm66018 242e1ebb9ecSlm66018 /* values for dumping - need to run in a tighter loop */ 243e1ebb9ecSlm66018 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 244e1ebb9ecSlm66018 static int vdc_dump_retries = 100; 245e1ebb9ecSlm66018 2462f5224aeSachartre static uint16_t vdc_scsi_timeout = 60; /* 60s units: seconds */ 2472f5224aeSachartre 2482f5224aeSachartre static uint64_t vdc_ownership_delay = 6 * MICROSEC; /* 6s units: usec */ 2492f5224aeSachartre 250e1ebb9ecSlm66018 /* Count of the number of vdc instances attached */ 251e1ebb9ecSlm66018 static volatile uint32_t vdc_instance_count = 0; 2521ae08745Sheppo 2532f5224aeSachartre /* Tunable to log all SCSI errors */ 2542f5224aeSachartre static boolean_t vdc_scsi_log_error = B_FALSE; 2552f5224aeSachartre 2561ae08745Sheppo /* Soft state pointer */ 2571ae08745Sheppo static void *vdc_state; 2581ae08745Sheppo 2593af08d82Slm66018 /* 2603af08d82Slm66018 * Controlling the verbosity of the error/debug messages 2613af08d82Slm66018 * 2623af08d82Slm66018 * vdc_msglevel - controls level of messages 2633af08d82Slm66018 * vdc_matchinst - 64-bit variable where each bit corresponds 2643af08d82Slm66018 * to the vdc instance the vdc_msglevel applies. 2653af08d82Slm66018 */ 2663af08d82Slm66018 int vdc_msglevel = 0x0; 2673af08d82Slm66018 uint64_t vdc_matchinst = 0ull; 2681ae08745Sheppo 2690a55fbb7Slm66018 /* 2700a55fbb7Slm66018 * Supported vDisk protocol version pairs. 2710a55fbb7Slm66018 * 2720a55fbb7Slm66018 * The first array entry is the latest and preferred version. 2730a55fbb7Slm66018 */ 27417cadca8Slm66018 static const vio_ver_t vdc_version[] = {{1, 1}}; 2751ae08745Sheppo 2761ae08745Sheppo static struct cb_ops vdc_cb_ops = { 2771ae08745Sheppo vdc_open, /* cb_open */ 2781ae08745Sheppo vdc_close, /* cb_close */ 2791ae08745Sheppo vdc_strategy, /* cb_strategy */ 2801ae08745Sheppo vdc_print, /* cb_print */ 2811ae08745Sheppo vdc_dump, /* cb_dump */ 2821ae08745Sheppo vdc_read, /* cb_read */ 2831ae08745Sheppo vdc_write, /* cb_write */ 2841ae08745Sheppo vdc_ioctl, /* cb_ioctl */ 2851ae08745Sheppo nodev, /* cb_devmap */ 2861ae08745Sheppo nodev, /* cb_mmap */ 2871ae08745Sheppo nodev, /* cb_segmap */ 2881ae08745Sheppo nochpoll, /* cb_chpoll */ 2895b98b509Sachartre vdc_prop_op, /* cb_prop_op */ 2901ae08745Sheppo NULL, /* cb_str */ 2911ae08745Sheppo D_MP | D_64BIT, /* cb_flag */ 2921ae08745Sheppo CB_REV, /* cb_rev */ 2931ae08745Sheppo vdc_aread, /* cb_aread */ 2941ae08745Sheppo vdc_awrite /* cb_awrite */ 2951ae08745Sheppo }; 2961ae08745Sheppo 2971ae08745Sheppo static struct dev_ops vdc_ops = { 2981ae08745Sheppo DEVO_REV, /* devo_rev */ 2991ae08745Sheppo 0, /* devo_refcnt */ 3001ae08745Sheppo vdc_getinfo, /* devo_getinfo */ 3011ae08745Sheppo nulldev, /* devo_identify */ 3021ae08745Sheppo nulldev, /* devo_probe */ 3031ae08745Sheppo vdc_attach, /* devo_attach */ 3041ae08745Sheppo vdc_detach, /* devo_detach */ 3051ae08745Sheppo nodev, /* devo_reset */ 3061ae08745Sheppo &vdc_cb_ops, /* devo_cb_ops */ 3071ae08745Sheppo NULL, /* devo_bus_ops */ 3081ae08745Sheppo nulldev /* devo_power */ 3091ae08745Sheppo }; 3101ae08745Sheppo 3111ae08745Sheppo static struct modldrv modldrv = { 3121ae08745Sheppo &mod_driverops, 313205eeb1aSlm66018 "virtual disk client", 3141ae08745Sheppo &vdc_ops, 3151ae08745Sheppo }; 3161ae08745Sheppo 3171ae08745Sheppo static struct modlinkage modlinkage = { 3181ae08745Sheppo MODREV_1, 3191ae08745Sheppo &modldrv, 3201ae08745Sheppo NULL 3211ae08745Sheppo }; 3221ae08745Sheppo 3231ae08745Sheppo /* -------------------------------------------------------------------------- */ 3241ae08745Sheppo 3251ae08745Sheppo /* 3261ae08745Sheppo * Device Driver housekeeping and setup 3271ae08745Sheppo */ 3281ae08745Sheppo 3291ae08745Sheppo int 3301ae08745Sheppo _init(void) 3311ae08745Sheppo { 3321ae08745Sheppo int status; 3331ae08745Sheppo 3341ae08745Sheppo if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 3351ae08745Sheppo return (status); 3361ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) 3371ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3381ae08745Sheppo return (status); 3391ae08745Sheppo } 3401ae08745Sheppo 3411ae08745Sheppo int 3421ae08745Sheppo _info(struct modinfo *modinfop) 3431ae08745Sheppo { 3441ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 3451ae08745Sheppo } 3461ae08745Sheppo 3471ae08745Sheppo int 3481ae08745Sheppo _fini(void) 3491ae08745Sheppo { 3501ae08745Sheppo int status; 3511ae08745Sheppo 3521ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 3531ae08745Sheppo return (status); 3541ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3551ae08745Sheppo return (0); 3561ae08745Sheppo } 3571ae08745Sheppo 3581ae08745Sheppo static int 3591ae08745Sheppo vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 3601ae08745Sheppo { 3611ae08745Sheppo _NOTE(ARGUNUSED(dip)) 3621ae08745Sheppo 3630d0c8d4bSnarayan int instance = VDCUNIT((dev_t)arg); 3641ae08745Sheppo vdc_t *vdc = NULL; 3651ae08745Sheppo 3661ae08745Sheppo switch (cmd) { 3671ae08745Sheppo case DDI_INFO_DEVT2DEVINFO: 3681ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 3691ae08745Sheppo *resultp = NULL; 3701ae08745Sheppo return (DDI_FAILURE); 3711ae08745Sheppo } 3721ae08745Sheppo *resultp = vdc->dip; 3731ae08745Sheppo return (DDI_SUCCESS); 3741ae08745Sheppo case DDI_INFO_DEVT2INSTANCE: 3751ae08745Sheppo *resultp = (void *)(uintptr_t)instance; 3761ae08745Sheppo return (DDI_SUCCESS); 3771ae08745Sheppo default: 3781ae08745Sheppo *resultp = NULL; 3791ae08745Sheppo return (DDI_FAILURE); 3801ae08745Sheppo } 3811ae08745Sheppo } 3821ae08745Sheppo 3831ae08745Sheppo static int 3841ae08745Sheppo vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3851ae08745Sheppo { 3862f5224aeSachartre kt_did_t failfast_tid, ownership_tid; 3871ae08745Sheppo int instance; 3881ae08745Sheppo int rv; 389d7400d00Sachartre vdc_server_t *srvr; 3901ae08745Sheppo vdc_t *vdc = NULL; 3911ae08745Sheppo 3921ae08745Sheppo switch (cmd) { 3931ae08745Sheppo case DDI_DETACH: 3941ae08745Sheppo /* the real work happens below */ 3951ae08745Sheppo break; 3961ae08745Sheppo case DDI_SUSPEND: 3971ae08745Sheppo /* nothing to do for this non-device */ 3981ae08745Sheppo return (DDI_SUCCESS); 3991ae08745Sheppo default: 4001ae08745Sheppo return (DDI_FAILURE); 4011ae08745Sheppo } 4021ae08745Sheppo 4031ae08745Sheppo ASSERT(cmd == DDI_DETACH); 4041ae08745Sheppo instance = ddi_get_instance(dip); 4053af08d82Slm66018 DMSGX(1, "[%d] Entered\n", instance); 4061ae08745Sheppo 4071ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 408e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 4091ae08745Sheppo return (DDI_FAILURE); 4101ae08745Sheppo } 4111ae08745Sheppo 4122f5224aeSachartre /* 4132f5224aeSachartre * This function is called when vdc is detached or if it has failed to 4142f5224aeSachartre * attach. In that case, the attach may have fail before the vdisk type 4152f5224aeSachartre * has been set so we can't call vdc_is_opened(). However as the attach 4162f5224aeSachartre * has failed, we know that the vdisk is not opened and we can safely 4172f5224aeSachartre * detach. 4182f5224aeSachartre */ 4192f5224aeSachartre if (vdc->vdisk_type != VD_DISK_TYPE_UNK && vdc_is_opened(vdc)) { 4203af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 4211ae08745Sheppo return (DDI_FAILURE); 4221ae08745Sheppo } 4231ae08745Sheppo 42478fcd0a1Sachartre if (vdc->dkio_flush_pending) { 42578fcd0a1Sachartre DMSG(vdc, 0, 42678fcd0a1Sachartre "[%d] Cannot detach: %d outstanding DKIO flushes\n", 42778fcd0a1Sachartre instance, vdc->dkio_flush_pending); 42878fcd0a1Sachartre return (DDI_FAILURE); 42978fcd0a1Sachartre } 43078fcd0a1Sachartre 43178fcd0a1Sachartre if (vdc->validate_pending) { 43278fcd0a1Sachartre DMSG(vdc, 0, 43378fcd0a1Sachartre "[%d] Cannot detach: %d outstanding validate request\n", 43478fcd0a1Sachartre instance, vdc->validate_pending); 43578fcd0a1Sachartre return (DDI_FAILURE); 43678fcd0a1Sachartre } 43778fcd0a1Sachartre 4383af08d82Slm66018 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 4393af08d82Slm66018 4402f5224aeSachartre /* If we took ownership, release ownership */ 4412f5224aeSachartre mutex_enter(&vdc->ownership_lock); 4422f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) { 4432f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, FKIOCTL); 4442f5224aeSachartre if (rv == 0) { 4452f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 4462f5224aeSachartre } 4472f5224aeSachartre } 4482f5224aeSachartre mutex_exit(&vdc->ownership_lock); 4492f5224aeSachartre 4503af08d82Slm66018 /* mark instance as detaching */ 4513af08d82Slm66018 vdc->lifecycle = VDC_LC_DETACHING; 4521ae08745Sheppo 4531ae08745Sheppo /* 454d7400d00Sachartre * Try and disable callbacks to prevent another handshake. We have to 455d7400d00Sachartre * disable callbacks for all servers. 4561ae08745Sheppo */ 457d7400d00Sachartre for (srvr = vdc->server_list; srvr != NULL; srvr = srvr->next) { 458d7400d00Sachartre rv = ldc_set_cb_mode(srvr->ldc_handle, LDC_CB_DISABLE); 459d7400d00Sachartre DMSG(vdc, 0, "callback disabled (ldc=%lu, rv=%d)\n", 460d7400d00Sachartre srvr->ldc_id, rv); 4618cd10891Snarayan } 4621ae08745Sheppo 4631ae08745Sheppo if (vdc->initialized & VDC_THREAD) { 4643af08d82Slm66018 mutex_enter(&vdc->read_lock); 4653af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 4663af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) { 4673af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 4683af08d82Slm66018 cv_signal(&vdc->read_cv); 4691ae08745Sheppo } 4703af08d82Slm66018 4713af08d82Slm66018 mutex_exit(&vdc->read_lock); 4723af08d82Slm66018 4733af08d82Slm66018 /* wake up any thread waiting for connection to come online */ 4743af08d82Slm66018 mutex_enter(&vdc->lock); 4753af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 4763af08d82Slm66018 DMSG(vdc, 0, 4773af08d82Slm66018 "[%d] write reset - move to resetting state...\n", 4783af08d82Slm66018 instance); 4793af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 4803af08d82Slm66018 cv_signal(&vdc->initwait_cv); 4813af08d82Slm66018 } 4823af08d82Slm66018 mutex_exit(&vdc->lock); 4833af08d82Slm66018 4843af08d82Slm66018 /* now wait until state transitions to VDC_STATE_DETACH */ 4853af08d82Slm66018 thread_join(vdc->msg_proc_thr->t_did); 4863af08d82Slm66018 ASSERT(vdc->state == VDC_STATE_DETACH); 4873af08d82Slm66018 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 4883af08d82Slm66018 vdc->instance); 4891ae08745Sheppo } 4901ae08745Sheppo 4911ae08745Sheppo mutex_enter(&vdc->lock); 4921ae08745Sheppo 4931ae08745Sheppo if (vdc->initialized & VDC_DRING) 4941ae08745Sheppo vdc_destroy_descriptor_ring(vdc); 4951ae08745Sheppo 4968cd10891Snarayan vdc_fini_ports(vdc); 4971ae08745Sheppo 4982f5224aeSachartre if (vdc->failfast_thread) { 4992f5224aeSachartre failfast_tid = vdc->failfast_thread->t_did; 5002f5224aeSachartre vdc->failfast_interval = 0; 5012f5224aeSachartre cv_signal(&vdc->failfast_cv); 5022f5224aeSachartre } else { 5032f5224aeSachartre failfast_tid = 0; 5042f5224aeSachartre } 5052f5224aeSachartre 5062f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_WANTED) { 5072f5224aeSachartre ownership_tid = vdc->ownership_thread->t_did; 5082f5224aeSachartre vdc->ownership = VDC_OWNERSHIP_NONE; 5092f5224aeSachartre cv_signal(&vdc->ownership_cv); 5102f5224aeSachartre } else { 5112f5224aeSachartre ownership_tid = 0; 5122f5224aeSachartre } 5132f5224aeSachartre 5141ae08745Sheppo mutex_exit(&vdc->lock); 5151ae08745Sheppo 5162f5224aeSachartre if (failfast_tid != 0) 5172f5224aeSachartre thread_join(failfast_tid); 5182f5224aeSachartre 5192f5224aeSachartre if (ownership_tid != 0) 5202f5224aeSachartre thread_join(ownership_tid); 5212f5224aeSachartre 5225b98b509Sachartre if (vdc->initialized & VDC_MINOR) 5231ae08745Sheppo ddi_remove_minor_node(dip, NULL); 5241ae08745Sheppo 525366a92acSlm66018 if (vdc->io_stats) { 526366a92acSlm66018 kstat_delete(vdc->io_stats); 527366a92acSlm66018 vdc->io_stats = NULL; 528366a92acSlm66018 } 529366a92acSlm66018 530366a92acSlm66018 if (vdc->err_stats) { 531366a92acSlm66018 kstat_delete(vdc->err_stats); 532366a92acSlm66018 vdc->err_stats = NULL; 533366a92acSlm66018 } 534366a92acSlm66018 5351ae08745Sheppo if (vdc->initialized & VDC_LOCKS) { 5361ae08745Sheppo mutex_destroy(&vdc->lock); 5373af08d82Slm66018 mutex_destroy(&vdc->read_lock); 5382f5224aeSachartre mutex_destroy(&vdc->ownership_lock); 5393af08d82Slm66018 cv_destroy(&vdc->initwait_cv); 5403af08d82Slm66018 cv_destroy(&vdc->dring_free_cv); 5413af08d82Slm66018 cv_destroy(&vdc->membind_cv); 5423af08d82Slm66018 cv_destroy(&vdc->sync_pending_cv); 5433af08d82Slm66018 cv_destroy(&vdc->sync_blocked_cv); 5443af08d82Slm66018 cv_destroy(&vdc->read_cv); 5453af08d82Slm66018 cv_destroy(&vdc->running_cv); 5462f5224aeSachartre cv_destroy(&vdc->ownership_cv); 5472f5224aeSachartre cv_destroy(&vdc->failfast_cv); 5482f5224aeSachartre cv_destroy(&vdc->failfast_io_cv); 5491ae08745Sheppo } 5501ae08745Sheppo 5511ae08745Sheppo if (vdc->minfo) 5521ae08745Sheppo kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 5531ae08745Sheppo 5541ae08745Sheppo if (vdc->cinfo) 5551ae08745Sheppo kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 5561ae08745Sheppo 5571ae08745Sheppo if (vdc->vtoc) 5581ae08745Sheppo kmem_free(vdc->vtoc, sizeof (struct vtoc)); 5591ae08745Sheppo 56078fcd0a1Sachartre if (vdc->geom) 56178fcd0a1Sachartre kmem_free(vdc->geom, sizeof (struct dk_geom)); 5620a55fbb7Slm66018 5634bac2208Snarayan if (vdc->devid) { 5644bac2208Snarayan ddi_devid_unregister(dip); 5654bac2208Snarayan ddi_devid_free(vdc->devid); 5664bac2208Snarayan } 5674bac2208Snarayan 5681ae08745Sheppo if (vdc->initialized & VDC_SOFT_STATE) 5691ae08745Sheppo ddi_soft_state_free(vdc_state, instance); 5701ae08745Sheppo 5713af08d82Slm66018 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 5721ae08745Sheppo 5731ae08745Sheppo return (DDI_SUCCESS); 5741ae08745Sheppo } 5751ae08745Sheppo 5761ae08745Sheppo 5771ae08745Sheppo static int 5781ae08745Sheppo vdc_do_attach(dev_info_t *dip) 5791ae08745Sheppo { 5801ae08745Sheppo int instance; 5811ae08745Sheppo vdc_t *vdc = NULL; 5821ae08745Sheppo int status; 583655fd6a9Sachartre md_t *mdp; 5848cd10891Snarayan mde_cookie_t vd_node; 5851ae08745Sheppo 5861ae08745Sheppo ASSERT(dip != NULL); 5871ae08745Sheppo 5881ae08745Sheppo instance = ddi_get_instance(dip); 5891ae08745Sheppo if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 590e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 591e1ebb9ecSlm66018 instance); 5921ae08745Sheppo return (DDI_FAILURE); 5931ae08745Sheppo } 5941ae08745Sheppo 5951ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 596e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 5971ae08745Sheppo return (DDI_FAILURE); 5981ae08745Sheppo } 5991ae08745Sheppo 6001ae08745Sheppo /* 6011ae08745Sheppo * We assign the value to initialized in this case to zero out the 6021ae08745Sheppo * variable and then set bits in it to indicate what has been done 6031ae08745Sheppo */ 6041ae08745Sheppo vdc->initialized = VDC_SOFT_STATE; 6051ae08745Sheppo 6063af08d82Slm66018 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 6073af08d82Slm66018 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 6081ae08745Sheppo 6091ae08745Sheppo vdc->dip = dip; 6101ae08745Sheppo vdc->instance = instance; 6111ae08745Sheppo vdc->vdisk_type = VD_DISK_TYPE_UNK; 6124bac2208Snarayan vdc->vdisk_label = VD_DISK_LABEL_UNK; 6133af08d82Slm66018 vdc->state = VDC_STATE_INIT; 6143af08d82Slm66018 vdc->lifecycle = VDC_LC_ATTACHING; 6151ae08745Sheppo vdc->session_id = 0; 6161ae08745Sheppo vdc->block_size = DEV_BSIZE; 6178e6a2a04Slm66018 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 6181ae08745Sheppo 61917cadca8Slm66018 /* 62017cadca8Slm66018 * We assume, for now, that the vDisk server will export 'read' 62117cadca8Slm66018 * operations to us at a minimum (this is needed because of checks 62217cadca8Slm66018 * in vdc for supported operations early in the handshake process). 62317cadca8Slm66018 * The vDisk server will return ENOTSUP if this is not the case. 62417cadca8Slm66018 * The value will be overwritten during the attribute exchange with 62517cadca8Slm66018 * the bitmask of operations exported by server. 62617cadca8Slm66018 */ 62717cadca8Slm66018 vdc->operations = VD_OP_MASK_READ; 62817cadca8Slm66018 6291ae08745Sheppo vdc->vtoc = NULL; 63078fcd0a1Sachartre vdc->geom = NULL; 6311ae08745Sheppo vdc->cinfo = NULL; 6321ae08745Sheppo vdc->minfo = NULL; 6331ae08745Sheppo 6341ae08745Sheppo mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 6353af08d82Slm66018 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 6363af08d82Slm66018 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 6373af08d82Slm66018 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 6383af08d82Slm66018 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 6393af08d82Slm66018 6403af08d82Slm66018 vdc->threads_pending = 0; 6413af08d82Slm66018 vdc->sync_op_pending = B_FALSE; 6423af08d82Slm66018 vdc->sync_op_blocked = B_FALSE; 6433af08d82Slm66018 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 6443af08d82Slm66018 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 6453af08d82Slm66018 6462f5224aeSachartre mutex_init(&vdc->ownership_lock, NULL, MUTEX_DRIVER, NULL); 6472f5224aeSachartre cv_init(&vdc->ownership_cv, NULL, CV_DRIVER, NULL); 6482f5224aeSachartre cv_init(&vdc->failfast_cv, NULL, CV_DRIVER, NULL); 6492f5224aeSachartre cv_init(&vdc->failfast_io_cv, NULL, CV_DRIVER, NULL); 6502f5224aeSachartre 6513af08d82Slm66018 /* init blocking msg read functionality */ 6523af08d82Slm66018 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 6533af08d82Slm66018 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 6543af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 6553af08d82Slm66018 6561ae08745Sheppo vdc->initialized |= VDC_LOCKS; 6571ae08745Sheppo 658655fd6a9Sachartre /* get device and port MD node for this disk instance */ 6598cd10891Snarayan if (vdc_get_md_node(dip, &mdp, &vd_node) != 0) { 660655fd6a9Sachartre cmn_err(CE_NOTE, "[%d] Could not get machine description node", 661655fd6a9Sachartre instance); 662655fd6a9Sachartre return (DDI_FAILURE); 663655fd6a9Sachartre } 664655fd6a9Sachartre 6658cd10891Snarayan if (vdc_init_ports(vdc, mdp, vd_node) != 0) { 6668cd10891Snarayan cmn_err(CE_NOTE, "[%d] Error initialising ports", instance); 6678cd10891Snarayan return (DDI_FAILURE); 668655fd6a9Sachartre } 669655fd6a9Sachartre 670655fd6a9Sachartre (void) md_fini_handle(mdp); 671655fd6a9Sachartre 672*de3a5331SRamesh Chitrothu /* Create the kstats for saving the I/O statistics used by iostat(1M) */ 673*de3a5331SRamesh Chitrothu vdc_create_io_kstats(vdc); 674*de3a5331SRamesh Chitrothu vdc_create_err_kstats(vdc); 675*de3a5331SRamesh Chitrothu 676*de3a5331SRamesh Chitrothu /* Initialize remaining structures before starting the msg thread */ 677*de3a5331SRamesh Chitrothu vdc->vdisk_label = VD_DISK_LABEL_UNK; 678*de3a5331SRamesh Chitrothu vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 679*de3a5331SRamesh Chitrothu vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 680*de3a5331SRamesh Chitrothu vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 681*de3a5331SRamesh Chitrothu 6823af08d82Slm66018 /* initialize the thread responsible for managing state with server */ 6833af08d82Slm66018 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 6841ae08745Sheppo vdc, 0, &p0, TS_RUN, minclsyspri); 6853af08d82Slm66018 if (vdc->msg_proc_thr == NULL) { 6861ae08745Sheppo cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 6871ae08745Sheppo instance); 6881ae08745Sheppo return (DDI_FAILURE); 6891ae08745Sheppo } 6903af08d82Slm66018 6911ae08745Sheppo vdc->initialized |= VDC_THREAD; 6921ae08745Sheppo 693e1ebb9ecSlm66018 atomic_inc_32(&vdc_instance_count); 6941ae08745Sheppo 6950a55fbb7Slm66018 /* 69678fcd0a1Sachartre * Check the disk label. This will send requests and do the handshake. 69778fcd0a1Sachartre * We don't really care about the disk label now. What we really need is 69878fcd0a1Sachartre * the handshake do be done so that we know the type of the disk (slice 69978fcd0a1Sachartre * or full disk) and the appropriate device nodes can be created. 7000a55fbb7Slm66018 */ 70178fcd0a1Sachartre 70278fcd0a1Sachartre mutex_enter(&vdc->lock); 70378fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 70478fcd0a1Sachartre mutex_exit(&vdc->lock); 7051ae08745Sheppo 7061ae08745Sheppo /* 7075b98b509Sachartre * Now that we have the device info we can create the device nodes 7081ae08745Sheppo */ 7091ae08745Sheppo status = vdc_create_device_nodes(vdc); 7101ae08745Sheppo if (status) { 7113af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to create device nodes", 7121ae08745Sheppo instance); 7133af08d82Slm66018 goto return_status; 7141ae08745Sheppo } 7151ae08745Sheppo 7164bac2208Snarayan /* 7174bac2208Snarayan * Setup devid 7184bac2208Snarayan */ 7194bac2208Snarayan if (vdc_setup_devid(vdc)) { 7203af08d82Slm66018 DMSG(vdc, 0, "[%d] No device id available\n", instance); 7214bac2208Snarayan } 7224bac2208Snarayan 723366a92acSlm66018 /* 724366a92acSlm66018 * Fill in the fields of the error statistics kstat that were not 725366a92acSlm66018 * available when creating the kstat 726366a92acSlm66018 */ 727366a92acSlm66018 vdc_set_err_kstats(vdc); 728366a92acSlm66018 7291ae08745Sheppo ddi_report_dev(dip); 7303af08d82Slm66018 vdc->lifecycle = VDC_LC_ONLINE; 7313af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 7321ae08745Sheppo 7333af08d82Slm66018 return_status: 7343af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 7351ae08745Sheppo return (status); 7361ae08745Sheppo } 7371ae08745Sheppo 7381ae08745Sheppo static int 7391ae08745Sheppo vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 7401ae08745Sheppo { 7411ae08745Sheppo int status; 7421ae08745Sheppo 7431ae08745Sheppo switch (cmd) { 7441ae08745Sheppo case DDI_ATTACH: 7451ae08745Sheppo if ((status = vdc_do_attach(dip)) != 0) 7461ae08745Sheppo (void) vdc_detach(dip, DDI_DETACH); 7471ae08745Sheppo return (status); 7481ae08745Sheppo case DDI_RESUME: 7491ae08745Sheppo /* nothing to do for this non-device */ 7501ae08745Sheppo return (DDI_SUCCESS); 7511ae08745Sheppo default: 7521ae08745Sheppo return (DDI_FAILURE); 7531ae08745Sheppo } 7541ae08745Sheppo } 7551ae08745Sheppo 7561ae08745Sheppo static int 7578cd10891Snarayan vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr) 7581ae08745Sheppo { 7591ae08745Sheppo int status = 0; 7601ae08745Sheppo ldc_status_t ldc_state; 7611ae08745Sheppo ldc_attr_t ldc_attr; 7621ae08745Sheppo 7631ae08745Sheppo ASSERT(vdc != NULL); 7648cd10891Snarayan ASSERT(srvr != NULL); 7651ae08745Sheppo 7661ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK; 7671ae08745Sheppo ldc_attr.instance = vdc->instance; 7681ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 769e1ebb9ecSlm66018 ldc_attr.mtu = VD_LDC_MTU; 7701ae08745Sheppo 7718cd10891Snarayan if ((srvr->state & VDC_LDC_INIT) == 0) { 7728cd10891Snarayan status = ldc_init(srvr->ldc_id, &ldc_attr, 7738cd10891Snarayan &srvr->ldc_handle); 7741ae08745Sheppo if (status != 0) { 7753af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 7768cd10891Snarayan vdc->instance, srvr->ldc_id, status); 7771ae08745Sheppo return (status); 7781ae08745Sheppo } 7798cd10891Snarayan srvr->state |= VDC_LDC_INIT; 7801ae08745Sheppo } 7818cd10891Snarayan status = ldc_status(srvr->ldc_handle, &ldc_state); 7821ae08745Sheppo if (status != 0) { 7833af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 784e1ebb9ecSlm66018 vdc->instance, status); 7858cd10891Snarayan goto init_exit; 7861ae08745Sheppo } 7878cd10891Snarayan srvr->ldc_state = ldc_state; 7881ae08745Sheppo 7898cd10891Snarayan if ((srvr->state & VDC_LDC_CB) == 0) { 7908cd10891Snarayan status = ldc_reg_callback(srvr->ldc_handle, vdc_handle_cb, 7918cd10891Snarayan (caddr_t)srvr); 7921ae08745Sheppo if (status != 0) { 7933af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 794e1ebb9ecSlm66018 vdc->instance, status); 7958cd10891Snarayan goto init_exit; 7961ae08745Sheppo } 7978cd10891Snarayan srvr->state |= VDC_LDC_CB; 7981ae08745Sheppo } 7991ae08745Sheppo 8001ae08745Sheppo /* 8011ae08745Sheppo * At this stage we have initialised LDC, we will now try and open 8021ae08745Sheppo * the connection. 8031ae08745Sheppo */ 8048cd10891Snarayan if (srvr->ldc_state == LDC_INIT) { 8058cd10891Snarayan status = ldc_open(srvr->ldc_handle); 8061ae08745Sheppo if (status != 0) { 8073af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 8088cd10891Snarayan vdc->instance, srvr->ldc_id, status); 8098cd10891Snarayan goto init_exit; 8101ae08745Sheppo } 8118cd10891Snarayan srvr->state |= VDC_LDC_OPEN; 8128cd10891Snarayan } 8138cd10891Snarayan 8148cd10891Snarayan init_exit: 8158cd10891Snarayan if (status) { 8168cd10891Snarayan vdc_terminate_ldc(vdc, srvr); 8171ae08745Sheppo } 8181ae08745Sheppo 8191ae08745Sheppo return (status); 8201ae08745Sheppo } 8211ae08745Sheppo 8221ae08745Sheppo static int 8231ae08745Sheppo vdc_start_ldc_connection(vdc_t *vdc) 8241ae08745Sheppo { 8251ae08745Sheppo int status = 0; 8261ae08745Sheppo 8271ae08745Sheppo ASSERT(vdc != NULL); 8281ae08745Sheppo 8293af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 8301ae08745Sheppo 8310a55fbb7Slm66018 status = vdc_do_ldc_up(vdc); 8321ae08745Sheppo 8333af08d82Slm66018 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 8341ae08745Sheppo 8353af08d82Slm66018 return (status); 8363af08d82Slm66018 } 8373af08d82Slm66018 8383af08d82Slm66018 static int 8393af08d82Slm66018 vdc_stop_ldc_connection(vdc_t *vdcp) 8403af08d82Slm66018 { 8413af08d82Slm66018 int status; 8423af08d82Slm66018 8438cd10891Snarayan ASSERT(vdcp != NULL); 8448cd10891Snarayan 8458cd10891Snarayan ASSERT(MUTEX_HELD(&vdcp->lock)); 8468cd10891Snarayan 8473af08d82Slm66018 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 8483af08d82Slm66018 vdcp->state); 8493af08d82Slm66018 8508cd10891Snarayan status = ldc_down(vdcp->curr_server->ldc_handle); 8513af08d82Slm66018 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 8523af08d82Slm66018 8533af08d82Slm66018 vdcp->initialized &= ~VDC_HANDSHAKE; 8543af08d82Slm66018 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 8551ae08745Sheppo 8561ae08745Sheppo return (status); 8571ae08745Sheppo } 8581ae08745Sheppo 859366a92acSlm66018 static void 860366a92acSlm66018 vdc_create_io_kstats(vdc_t *vdc) 861366a92acSlm66018 { 862366a92acSlm66018 if (vdc->io_stats != NULL) { 863366a92acSlm66018 DMSG(vdc, 0, "[%d] I/O kstat already exists\n", vdc->instance); 864366a92acSlm66018 return; 865366a92acSlm66018 } 866366a92acSlm66018 867366a92acSlm66018 vdc->io_stats = kstat_create(VDC_DRIVER_NAME, vdc->instance, NULL, 868366a92acSlm66018 "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 869366a92acSlm66018 if (vdc->io_stats != NULL) { 870366a92acSlm66018 vdc->io_stats->ks_lock = &vdc->lock; 871366a92acSlm66018 kstat_install(vdc->io_stats); 872366a92acSlm66018 } else { 873366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: I/O statistics" 874366a92acSlm66018 " will not be gathered", vdc->instance); 875366a92acSlm66018 } 876366a92acSlm66018 } 877366a92acSlm66018 878366a92acSlm66018 static void 879366a92acSlm66018 vdc_create_err_kstats(vdc_t *vdc) 880366a92acSlm66018 { 881366a92acSlm66018 vd_err_stats_t *stp; 882366a92acSlm66018 char kstatmodule_err[KSTAT_STRLEN]; 883366a92acSlm66018 char kstatname[KSTAT_STRLEN]; 884366a92acSlm66018 int ndata = (sizeof (vd_err_stats_t) / sizeof (kstat_named_t)); 885366a92acSlm66018 int instance = vdc->instance; 886366a92acSlm66018 887366a92acSlm66018 if (vdc->err_stats != NULL) { 888366a92acSlm66018 DMSG(vdc, 0, "[%d] ERR kstat already exists\n", vdc->instance); 889366a92acSlm66018 return; 890366a92acSlm66018 } 891366a92acSlm66018 892366a92acSlm66018 (void) snprintf(kstatmodule_err, sizeof (kstatmodule_err), 893366a92acSlm66018 "%serr", VDC_DRIVER_NAME); 894366a92acSlm66018 (void) snprintf(kstatname, sizeof (kstatname), 895366a92acSlm66018 "%s%d,err", VDC_DRIVER_NAME, instance); 896366a92acSlm66018 897366a92acSlm66018 vdc->err_stats = kstat_create(kstatmodule_err, instance, kstatname, 898366a92acSlm66018 "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 899366a92acSlm66018 900366a92acSlm66018 if (vdc->err_stats == NULL) { 901366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: Error statistics" 902366a92acSlm66018 " will not be gathered", instance); 903366a92acSlm66018 return; 904366a92acSlm66018 } 905366a92acSlm66018 906366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 907366a92acSlm66018 kstat_named_init(&stp->vd_softerrs, "Soft Errors", 908366a92acSlm66018 KSTAT_DATA_UINT32); 909366a92acSlm66018 kstat_named_init(&stp->vd_transerrs, "Transport Errors", 910366a92acSlm66018 KSTAT_DATA_UINT32); 911366a92acSlm66018 kstat_named_init(&stp->vd_protoerrs, "Protocol Errors", 912366a92acSlm66018 KSTAT_DATA_UINT32); 913366a92acSlm66018 kstat_named_init(&stp->vd_vid, "Vendor", 914366a92acSlm66018 KSTAT_DATA_CHAR); 915366a92acSlm66018 kstat_named_init(&stp->vd_pid, "Product", 916366a92acSlm66018 KSTAT_DATA_CHAR); 917366a92acSlm66018 kstat_named_init(&stp->vd_capacity, "Size", 918366a92acSlm66018 KSTAT_DATA_ULONGLONG); 919366a92acSlm66018 920366a92acSlm66018 vdc->err_stats->ks_update = nulldev; 921366a92acSlm66018 922366a92acSlm66018 kstat_install(vdc->err_stats); 923366a92acSlm66018 } 924366a92acSlm66018 925366a92acSlm66018 static void 926366a92acSlm66018 vdc_set_err_kstats(vdc_t *vdc) 927366a92acSlm66018 { 928366a92acSlm66018 vd_err_stats_t *stp; 929366a92acSlm66018 930366a92acSlm66018 if (vdc->err_stats == NULL) 931366a92acSlm66018 return; 932366a92acSlm66018 933366a92acSlm66018 mutex_enter(&vdc->lock); 934366a92acSlm66018 935366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 936366a92acSlm66018 ASSERT(stp != NULL); 937366a92acSlm66018 938366a92acSlm66018 stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->block_size; 939366a92acSlm66018 (void) strcpy(stp->vd_vid.value.c, "SUN"); 940366a92acSlm66018 (void) strcpy(stp->vd_pid.value.c, "VDSK"); 941366a92acSlm66018 942366a92acSlm66018 mutex_exit(&vdc->lock); 943366a92acSlm66018 } 944366a92acSlm66018 9454bac2208Snarayan static int 9464bac2208Snarayan vdc_create_device_nodes_efi(vdc_t *vdc) 9474bac2208Snarayan { 9484bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h"); 9494bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h,raw"); 9504bac2208Snarayan 9514bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 9524bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9534bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9544bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 9554bac2208Snarayan vdc->instance); 9564bac2208Snarayan return (EIO); 9574bac2208Snarayan } 9584bac2208Snarayan 9594bac2208Snarayan /* if any device node is created we set this flag */ 9604bac2208Snarayan vdc->initialized |= VDC_MINOR; 9614bac2208Snarayan 9624bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 9634bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9644bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9654bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 9664bac2208Snarayan vdc->instance); 9674bac2208Snarayan return (EIO); 9684bac2208Snarayan } 9694bac2208Snarayan 9704bac2208Snarayan return (0); 9714bac2208Snarayan } 9724bac2208Snarayan 9734bac2208Snarayan static int 9744bac2208Snarayan vdc_create_device_nodes_vtoc(vdc_t *vdc) 9754bac2208Snarayan { 9764bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd"); 9774bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd,raw"); 9784bac2208Snarayan 9794bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 9804bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9814bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9824bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 9834bac2208Snarayan vdc->instance); 9844bac2208Snarayan return (EIO); 9854bac2208Snarayan } 9864bac2208Snarayan 9874bac2208Snarayan /* if any device node is created we set this flag */ 9884bac2208Snarayan vdc->initialized |= VDC_MINOR; 9894bac2208Snarayan 9904bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 9914bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9924bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9934bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 9944bac2208Snarayan vdc->instance); 9954bac2208Snarayan return (EIO); 9964bac2208Snarayan } 9974bac2208Snarayan 9984bac2208Snarayan return (0); 9994bac2208Snarayan } 10001ae08745Sheppo 10011ae08745Sheppo /* 10021ae08745Sheppo * Function: 10031ae08745Sheppo * vdc_create_device_nodes 10041ae08745Sheppo * 10051ae08745Sheppo * Description: 10061ae08745Sheppo * This function creates the block and character device nodes under 10075b98b509Sachartre * /devices. It is called as part of the attach(9E) of the instance 10085b98b509Sachartre * during the handshake with vds after vds has sent the attributes 10095b98b509Sachartre * to vdc. 10101ae08745Sheppo * 10111ae08745Sheppo * If the device is of type VD_DISK_TYPE_SLICE then the minor node 10121ae08745Sheppo * of 2 is used in keeping with the Solaris convention that slice 2 10131ae08745Sheppo * refers to a whole disk. Slices start at 'a' 10141ae08745Sheppo * 10151ae08745Sheppo * Parameters: 10161ae08745Sheppo * vdc - soft state pointer 10171ae08745Sheppo * 10181ae08745Sheppo * Return Values 10191ae08745Sheppo * 0 - Success 10201ae08745Sheppo * EIO - Failed to create node 10211ae08745Sheppo * EINVAL - Unknown type of disk exported 10221ae08745Sheppo */ 10231ae08745Sheppo static int 10241ae08745Sheppo vdc_create_device_nodes(vdc_t *vdc) 10251ae08745Sheppo { 10264bac2208Snarayan char name[sizeof ("s,raw")]; 10271ae08745Sheppo dev_info_t *dip = NULL; 10284bac2208Snarayan int instance, status; 10291ae08745Sheppo int num_slices = 1; 10301ae08745Sheppo int i; 10311ae08745Sheppo 10321ae08745Sheppo ASSERT(vdc != NULL); 10331ae08745Sheppo 10341ae08745Sheppo instance = vdc->instance; 10351ae08745Sheppo dip = vdc->dip; 10361ae08745Sheppo 10371ae08745Sheppo switch (vdc->vdisk_type) { 10381ae08745Sheppo case VD_DISK_TYPE_DISK: 10391ae08745Sheppo num_slices = V_NUMPAR; 10401ae08745Sheppo break; 10411ae08745Sheppo case VD_DISK_TYPE_SLICE: 10421ae08745Sheppo num_slices = 1; 10431ae08745Sheppo break; 10441ae08745Sheppo case VD_DISK_TYPE_UNK: 10451ae08745Sheppo default: 10461ae08745Sheppo return (EINVAL); 10471ae08745Sheppo } 10481ae08745Sheppo 10494bac2208Snarayan /* 10504bac2208Snarayan * Minor nodes are different for EFI disks: EFI disks do not have 10514bac2208Snarayan * a minor node 'g' for the minor number corresponding to slice 10524bac2208Snarayan * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 10534bac2208Snarayan * representing the whole disk. 10544bac2208Snarayan */ 10551ae08745Sheppo for (i = 0; i < num_slices; i++) { 10564bac2208Snarayan 10574bac2208Snarayan if (i == VD_EFI_WD_SLICE) { 10584bac2208Snarayan if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 10594bac2208Snarayan status = vdc_create_device_nodes_efi(vdc); 10604bac2208Snarayan else 10614bac2208Snarayan status = vdc_create_device_nodes_vtoc(vdc); 10624bac2208Snarayan if (status != 0) 10634bac2208Snarayan return (status); 10644bac2208Snarayan continue; 10654bac2208Snarayan } 10664bac2208Snarayan 10671ae08745Sheppo (void) snprintf(name, sizeof (name), "%c", 'a' + i); 10681ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFBLK, 10691ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1070e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 1071e1ebb9ecSlm66018 instance, name); 10721ae08745Sheppo return (EIO); 10731ae08745Sheppo } 10741ae08745Sheppo 10751ae08745Sheppo /* if any device node is created we set this flag */ 10761ae08745Sheppo vdc->initialized |= VDC_MINOR; 10771ae08745Sheppo 107887a7269eSachartre (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 107987a7269eSachartre 10801ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFCHR, 10811ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1082e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 1083e1ebb9ecSlm66018 instance, name); 10841ae08745Sheppo return (EIO); 10851ae08745Sheppo } 10861ae08745Sheppo } 10871ae08745Sheppo 10881ae08745Sheppo return (0); 10891ae08745Sheppo } 10901ae08745Sheppo 10911ae08745Sheppo /* 10925b98b509Sachartre * Driver prop_op(9e) entry point function. Return the number of blocks for 10935b98b509Sachartre * the partition in question or forward the request to the property facilities. 10941ae08745Sheppo */ 10951ae08745Sheppo static int 10965b98b509Sachartre vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 10975b98b509Sachartre char *name, caddr_t valuep, int *lengthp) 10981ae08745Sheppo { 10995b98b509Sachartre int instance = ddi_get_instance(dip); 11005b98b509Sachartre vdc_t *vdc; 11015b98b509Sachartre uint64_t nblocks; 11025b98b509Sachartre uint_t blksize; 11031ae08745Sheppo 11045b98b509Sachartre vdc = ddi_get_soft_state(vdc_state, instance); 11051ae08745Sheppo 11065b98b509Sachartre if (dev == DDI_DEV_T_ANY || vdc == NULL) { 11075b98b509Sachartre return (ddi_prop_op(dev, dip, prop_op, mod_flags, 11085b98b509Sachartre name, valuep, lengthp)); 11091ae08745Sheppo } 11101ae08745Sheppo 11115b98b509Sachartre mutex_enter(&vdc->lock); 11125b98b509Sachartre (void) vdc_validate_geometry(vdc); 111378fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 11145b98b509Sachartre mutex_exit(&vdc->lock); 11155b98b509Sachartre return (ddi_prop_op(dev, dip, prop_op, mod_flags, 11165b98b509Sachartre name, valuep, lengthp)); 111778fcd0a1Sachartre } 11185b98b509Sachartre nblocks = vdc->slice[VDCPART(dev)].nblocks; 11195b98b509Sachartre blksize = vdc->block_size; 11205b98b509Sachartre mutex_exit(&vdc->lock); 112178fcd0a1Sachartre 11225b98b509Sachartre return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags, 11235b98b509Sachartre name, valuep, lengthp, nblocks, blksize)); 11241ae08745Sheppo } 11251ae08745Sheppo 112678fcd0a1Sachartre /* 112778fcd0a1Sachartre * Function: 112878fcd0a1Sachartre * vdc_is_opened 112978fcd0a1Sachartre * 113078fcd0a1Sachartre * Description: 113178fcd0a1Sachartre * This function checks if any slice of a given virtual disk is 113278fcd0a1Sachartre * currently opened. 113378fcd0a1Sachartre * 113478fcd0a1Sachartre * Parameters: 113578fcd0a1Sachartre * vdc - soft state pointer 113678fcd0a1Sachartre * 113778fcd0a1Sachartre * Return Values 113878fcd0a1Sachartre * B_TRUE - at least one slice is opened. 113978fcd0a1Sachartre * B_FALSE - no slice is opened. 114078fcd0a1Sachartre */ 114178fcd0a1Sachartre static boolean_t 114278fcd0a1Sachartre vdc_is_opened(vdc_t *vdc) 114378fcd0a1Sachartre { 114478fcd0a1Sachartre int i, nslices; 114578fcd0a1Sachartre 114678fcd0a1Sachartre switch (vdc->vdisk_type) { 114778fcd0a1Sachartre case VD_DISK_TYPE_DISK: 114878fcd0a1Sachartre nslices = V_NUMPAR; 114978fcd0a1Sachartre break; 115078fcd0a1Sachartre case VD_DISK_TYPE_SLICE: 115178fcd0a1Sachartre nslices = 1; 115278fcd0a1Sachartre break; 115378fcd0a1Sachartre case VD_DISK_TYPE_UNK: 115478fcd0a1Sachartre default: 115578fcd0a1Sachartre ASSERT(0); 115678fcd0a1Sachartre } 115778fcd0a1Sachartre 115878fcd0a1Sachartre /* check if there's any layered open */ 115978fcd0a1Sachartre for (i = 0; i < nslices; i++) { 116078fcd0a1Sachartre if (vdc->open_lyr[i] > 0) 116178fcd0a1Sachartre return (B_TRUE); 116278fcd0a1Sachartre } 116378fcd0a1Sachartre 116478fcd0a1Sachartre /* check if there is any other kind of open */ 116578fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 116678fcd0a1Sachartre if (vdc->open[i] != 0) 116778fcd0a1Sachartre return (B_TRUE); 116878fcd0a1Sachartre } 116978fcd0a1Sachartre 117078fcd0a1Sachartre return (B_FALSE); 117178fcd0a1Sachartre } 117278fcd0a1Sachartre 117378fcd0a1Sachartre static int 117478fcd0a1Sachartre vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 117578fcd0a1Sachartre { 117678fcd0a1Sachartre uint8_t slicemask; 117778fcd0a1Sachartre int i; 117878fcd0a1Sachartre 117978fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 118078fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 118178fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 118278fcd0a1Sachartre 118378fcd0a1Sachartre slicemask = 1 << slice; 118478fcd0a1Sachartre 118578fcd0a1Sachartre /* check if slice is already exclusively opened */ 118678fcd0a1Sachartre if (vdc->open_excl & slicemask) 118778fcd0a1Sachartre return (EBUSY); 118878fcd0a1Sachartre 118978fcd0a1Sachartre /* if open exclusive, check if slice is already opened */ 119078fcd0a1Sachartre if (flag & FEXCL) { 119178fcd0a1Sachartre if (vdc->open_lyr[slice] > 0) 119278fcd0a1Sachartre return (EBUSY); 119378fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 119478fcd0a1Sachartre if (vdc->open[i] & slicemask) 119578fcd0a1Sachartre return (EBUSY); 119678fcd0a1Sachartre } 119778fcd0a1Sachartre vdc->open_excl |= slicemask; 119878fcd0a1Sachartre } 119978fcd0a1Sachartre 120078fcd0a1Sachartre /* mark slice as opened */ 120178fcd0a1Sachartre if (otyp == OTYP_LYR) { 120278fcd0a1Sachartre vdc->open_lyr[slice]++; 120378fcd0a1Sachartre } else { 120478fcd0a1Sachartre vdc->open[otyp] |= slicemask; 120578fcd0a1Sachartre } 120678fcd0a1Sachartre 120778fcd0a1Sachartre return (0); 120878fcd0a1Sachartre } 120978fcd0a1Sachartre 121078fcd0a1Sachartre static void 121178fcd0a1Sachartre vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 121278fcd0a1Sachartre { 121378fcd0a1Sachartre uint8_t slicemask; 121478fcd0a1Sachartre 121578fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 121678fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 121778fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 121878fcd0a1Sachartre 121978fcd0a1Sachartre slicemask = 1 << slice; 122078fcd0a1Sachartre 122178fcd0a1Sachartre if (otyp == OTYP_LYR) { 122278fcd0a1Sachartre ASSERT(vdc->open_lyr[slice] > 0); 122378fcd0a1Sachartre vdc->open_lyr[slice]--; 122478fcd0a1Sachartre } else { 122578fcd0a1Sachartre vdc->open[otyp] &= ~slicemask; 122678fcd0a1Sachartre } 122778fcd0a1Sachartre 122878fcd0a1Sachartre if (flag & FEXCL) 122978fcd0a1Sachartre vdc->open_excl &= ~slicemask; 123078fcd0a1Sachartre } 123178fcd0a1Sachartre 12321ae08745Sheppo static int 12331ae08745Sheppo vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 12341ae08745Sheppo { 12351ae08745Sheppo _NOTE(ARGUNUSED(cred)) 12361ae08745Sheppo 1237179e09c2Sachartre int instance, nodelay; 123878fcd0a1Sachartre int slice, status = 0; 12391ae08745Sheppo vdc_t *vdc; 12401ae08745Sheppo 12411ae08745Sheppo ASSERT(dev != NULL); 12420d0c8d4bSnarayan instance = VDCUNIT(*dev); 12431ae08745Sheppo 124478fcd0a1Sachartre if (otyp >= OTYPCNT) 12451ae08745Sheppo return (EINVAL); 12461ae08745Sheppo 12471ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1248e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 12491ae08745Sheppo return (ENXIO); 12501ae08745Sheppo } 12511ae08745Sheppo 12523af08d82Slm66018 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 12533af08d82Slm66018 getminor(*dev), flag, otyp); 12541ae08745Sheppo 125578fcd0a1Sachartre slice = VDCPART(*dev); 125678fcd0a1Sachartre 1257179e09c2Sachartre nodelay = flag & (FNDELAY | FNONBLOCK); 1258179e09c2Sachartre 1259179e09c2Sachartre if ((flag & FWRITE) && (!nodelay) && 1260179e09c2Sachartre !(VD_OP_SUPPORTED(vdc->operations, VD_OP_BWRITE))) { 1261179e09c2Sachartre return (EROFS); 1262179e09c2Sachartre } 1263179e09c2Sachartre 12641ae08745Sheppo mutex_enter(&vdc->lock); 126578fcd0a1Sachartre 126678fcd0a1Sachartre status = vdc_mark_opened(vdc, slice, flag, otyp); 126778fcd0a1Sachartre 126878fcd0a1Sachartre if (status != 0) { 126978fcd0a1Sachartre mutex_exit(&vdc->lock); 127078fcd0a1Sachartre return (status); 127178fcd0a1Sachartre } 127278fcd0a1Sachartre 1273179e09c2Sachartre if (nodelay) { 127478fcd0a1Sachartre 127578fcd0a1Sachartre /* don't resubmit a validate request if there's already one */ 127678fcd0a1Sachartre if (vdc->validate_pending > 0) { 127778fcd0a1Sachartre mutex_exit(&vdc->lock); 127878fcd0a1Sachartre return (0); 127978fcd0a1Sachartre } 128078fcd0a1Sachartre 128178fcd0a1Sachartre /* call vdc_validate() asynchronously to avoid blocking */ 128278fcd0a1Sachartre if (taskq_dispatch(system_taskq, vdc_validate_task, 128378fcd0a1Sachartre (void *)vdc, TQ_NOSLEEP) == NULL) { 128478fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 128578fcd0a1Sachartre mutex_exit(&vdc->lock); 128678fcd0a1Sachartre return (ENXIO); 128778fcd0a1Sachartre } 128878fcd0a1Sachartre 128978fcd0a1Sachartre vdc->validate_pending++; 129078fcd0a1Sachartre mutex_exit(&vdc->lock); 129178fcd0a1Sachartre return (0); 129278fcd0a1Sachartre } 129378fcd0a1Sachartre 12941ae08745Sheppo mutex_exit(&vdc->lock); 12951ae08745Sheppo 129678fcd0a1Sachartre vdc_validate(vdc); 129778fcd0a1Sachartre 129878fcd0a1Sachartre mutex_enter(&vdc->lock); 129978fcd0a1Sachartre 130078fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1301edcc0754Sachartre vdc->slice[slice].nblocks == 0) { 130278fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 130378fcd0a1Sachartre status = EIO; 130478fcd0a1Sachartre } 130578fcd0a1Sachartre 130678fcd0a1Sachartre mutex_exit(&vdc->lock); 130778fcd0a1Sachartre 130878fcd0a1Sachartre return (status); 13091ae08745Sheppo } 13101ae08745Sheppo 13111ae08745Sheppo static int 13121ae08745Sheppo vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 13131ae08745Sheppo { 13141ae08745Sheppo _NOTE(ARGUNUSED(cred)) 13151ae08745Sheppo 13161ae08745Sheppo int instance; 131778fcd0a1Sachartre int slice; 13182f5224aeSachartre int rv, rval; 13191ae08745Sheppo vdc_t *vdc; 13201ae08745Sheppo 13210d0c8d4bSnarayan instance = VDCUNIT(dev); 13221ae08745Sheppo 132378fcd0a1Sachartre if (otyp >= OTYPCNT) 13241ae08745Sheppo return (EINVAL); 13251ae08745Sheppo 13261ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1327e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13281ae08745Sheppo return (ENXIO); 13291ae08745Sheppo } 13301ae08745Sheppo 13313af08d82Slm66018 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 13321ae08745Sheppo 133378fcd0a1Sachartre slice = VDCPART(dev); 133478fcd0a1Sachartre 13358259acd8Szk194757 /* 13368259acd8Szk194757 * Attempt to flush the W$ on a close operation. If this is 13378259acd8Szk194757 * not a supported IOCTL command or the backing device is read-only 13388259acd8Szk194757 * do not fail the close operation. 13398259acd8Szk194757 */ 13402f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, &rval); 13418259acd8Szk194757 13428259acd8Szk194757 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 13438259acd8Szk194757 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 13448259acd8Szk194757 instance, rv); 13458259acd8Szk194757 return (EIO); 13468259acd8Szk194757 } 13478259acd8Szk194757 13481ae08745Sheppo mutex_enter(&vdc->lock); 134978fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 13501ae08745Sheppo mutex_exit(&vdc->lock); 13511ae08745Sheppo 13521ae08745Sheppo return (0); 13531ae08745Sheppo } 13541ae08745Sheppo 13551ae08745Sheppo static int 13561ae08745Sheppo vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 13571ae08745Sheppo { 13581ae08745Sheppo _NOTE(ARGUNUSED(credp)) 13591ae08745Sheppo 13602f5224aeSachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode, rvalp)); 13611ae08745Sheppo } 13621ae08745Sheppo 13631ae08745Sheppo static int 13641ae08745Sheppo vdc_print(dev_t dev, char *str) 13651ae08745Sheppo { 13660d0c8d4bSnarayan cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 13671ae08745Sheppo return (0); 13681ae08745Sheppo } 13691ae08745Sheppo 13701ae08745Sheppo static int 13711ae08745Sheppo vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 13721ae08745Sheppo { 1373d10e4ef2Snarayan int rv; 1374d10e4ef2Snarayan size_t nbytes = nblk * DEV_BSIZE; 13750d0c8d4bSnarayan int instance = VDCUNIT(dev); 1376d10e4ef2Snarayan vdc_t *vdc = NULL; 13771ae08745Sheppo 13781ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1379e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13801ae08745Sheppo return (ENXIO); 13811ae08745Sheppo } 13821ae08745Sheppo 13833af08d82Slm66018 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 13843af08d82Slm66018 instance, nbytes, blkno, (void *)addr); 13853af08d82Slm66018 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 13860d0c8d4bSnarayan VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 13873af08d82Slm66018 if (rv) { 13883af08d82Slm66018 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 13891ae08745Sheppo return (rv); 13901ae08745Sheppo } 13911ae08745Sheppo 13923af08d82Slm66018 if (ddi_in_panic()) 13933c2ebf09Sachartre (void) vdc_drain_response(vdc, NULL); 13943af08d82Slm66018 13953af08d82Slm66018 DMSG(vdc, 0, "[%d] End\n", instance); 13963af08d82Slm66018 13973af08d82Slm66018 return (0); 13983af08d82Slm66018 } 13993af08d82Slm66018 14001ae08745Sheppo /* -------------------------------------------------------------------------- */ 14011ae08745Sheppo 14021ae08745Sheppo /* 14031ae08745Sheppo * Disk access routines 14041ae08745Sheppo * 14051ae08745Sheppo */ 14061ae08745Sheppo 14071ae08745Sheppo /* 14081ae08745Sheppo * vdc_strategy() 14091ae08745Sheppo * 14101ae08745Sheppo * Return Value: 14111ae08745Sheppo * 0: As per strategy(9E), the strategy() function must return 0 14121ae08745Sheppo * [ bioerror(9f) sets b_flags to the proper error code ] 14131ae08745Sheppo */ 14141ae08745Sheppo static int 14151ae08745Sheppo vdc_strategy(struct buf *buf) 14161ae08745Sheppo { 14171ae08745Sheppo int rv = -1; 14181ae08745Sheppo vdc_t *vdc = NULL; 14190d0c8d4bSnarayan int instance = VDCUNIT(buf->b_edev); 14201ae08745Sheppo int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 142187a7269eSachartre int slice; 14221ae08745Sheppo 14231ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1424e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 14251ae08745Sheppo bioerror(buf, ENXIO); 14261ae08745Sheppo biodone(buf); 14271ae08745Sheppo return (0); 14281ae08745Sheppo } 14291ae08745Sheppo 14303af08d82Slm66018 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 14313af08d82Slm66018 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 14323af08d82Slm66018 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1433d10e4ef2Snarayan 14341ae08745Sheppo bp_mapin(buf); 14351ae08745Sheppo 143687a7269eSachartre if ((long)buf->b_private == VD_SLICE_NONE) { 143787a7269eSachartre /* I/O using an absolute disk offset */ 143887a7269eSachartre slice = VD_SLICE_NONE; 143987a7269eSachartre } else { 144087a7269eSachartre slice = VDCPART(buf->b_edev); 144187a7269eSachartre } 144287a7269eSachartre 14433af08d82Slm66018 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 144487a7269eSachartre buf->b_bcount, slice, buf->b_lblkno, 14453af08d82Slm66018 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 14463af08d82Slm66018 VIO_write_dir); 14473af08d82Slm66018 1448d10e4ef2Snarayan /* 1449d10e4ef2Snarayan * If the request was successfully sent, the strategy call returns and 1450d10e4ef2Snarayan * the ACK handler calls the bioxxx functions when the vDisk server is 1451366a92acSlm66018 * done otherwise we handle the error here. 1452d10e4ef2Snarayan */ 1453d10e4ef2Snarayan if (rv) { 14543af08d82Slm66018 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 14551ae08745Sheppo bioerror(buf, rv); 14561ae08745Sheppo biodone(buf); 14573c2ebf09Sachartre } else if (ddi_in_panic()) { 14583c2ebf09Sachartre (void) vdc_drain_response(vdc, buf); 1459d10e4ef2Snarayan } 1460d10e4ef2Snarayan 14611ae08745Sheppo return (0); 14621ae08745Sheppo } 14631ae08745Sheppo 14640d0c8d4bSnarayan /* 14650d0c8d4bSnarayan * Function: 14660d0c8d4bSnarayan * vdc_min 14670d0c8d4bSnarayan * 14680d0c8d4bSnarayan * Description: 14690d0c8d4bSnarayan * Routine to limit the size of a data transfer. Used in 14700d0c8d4bSnarayan * conjunction with physio(9F). 14710d0c8d4bSnarayan * 14720d0c8d4bSnarayan * Arguments: 14730d0c8d4bSnarayan * bp - pointer to the indicated buf(9S) struct. 14740d0c8d4bSnarayan * 14750d0c8d4bSnarayan */ 14760d0c8d4bSnarayan static void 14770d0c8d4bSnarayan vdc_min(struct buf *bufp) 14780d0c8d4bSnarayan { 14790d0c8d4bSnarayan vdc_t *vdc = NULL; 14800d0c8d4bSnarayan int instance = VDCUNIT(bufp->b_edev); 14810d0c8d4bSnarayan 14820d0c8d4bSnarayan vdc = ddi_get_soft_state(vdc_state, instance); 14830d0c8d4bSnarayan VERIFY(vdc != NULL); 14840d0c8d4bSnarayan 14850d0c8d4bSnarayan if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 14860d0c8d4bSnarayan bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 14870d0c8d4bSnarayan } 14880d0c8d4bSnarayan } 14891ae08745Sheppo 14901ae08745Sheppo static int 14911ae08745Sheppo vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 14921ae08745Sheppo { 14931ae08745Sheppo _NOTE(ARGUNUSED(cred)) 14941ae08745Sheppo 14950d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 14960d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 14971ae08745Sheppo } 14981ae08745Sheppo 14991ae08745Sheppo static int 15001ae08745Sheppo vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 15011ae08745Sheppo { 15021ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15031ae08745Sheppo 15040d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15050d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 15061ae08745Sheppo } 15071ae08745Sheppo 15081ae08745Sheppo static int 15091ae08745Sheppo vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 15101ae08745Sheppo { 15111ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15121ae08745Sheppo 15130d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15140d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 15151ae08745Sheppo } 15161ae08745Sheppo 15171ae08745Sheppo static int 15181ae08745Sheppo vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 15191ae08745Sheppo { 15201ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15211ae08745Sheppo 15220d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15230d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 15241ae08745Sheppo } 15251ae08745Sheppo 15261ae08745Sheppo 15271ae08745Sheppo /* -------------------------------------------------------------------------- */ 15281ae08745Sheppo 15291ae08745Sheppo /* 15301ae08745Sheppo * Handshake support 15311ae08745Sheppo */ 15321ae08745Sheppo 15331ae08745Sheppo 15340a55fbb7Slm66018 /* 15350a55fbb7Slm66018 * Function: 15360a55fbb7Slm66018 * vdc_init_ver_negotiation() 15370a55fbb7Slm66018 * 15380a55fbb7Slm66018 * Description: 15390a55fbb7Slm66018 * 15400a55fbb7Slm66018 * Arguments: 15410a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 15420a55fbb7Slm66018 * 15430a55fbb7Slm66018 * Return Code: 15440a55fbb7Slm66018 * 0 - Success 15450a55fbb7Slm66018 */ 15461ae08745Sheppo static int 15470a55fbb7Slm66018 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 15481ae08745Sheppo { 15491ae08745Sheppo vio_ver_msg_t pkt; 15501ae08745Sheppo size_t msglen = sizeof (pkt); 15511ae08745Sheppo int status = -1; 15521ae08745Sheppo 15531ae08745Sheppo ASSERT(vdc != NULL); 15541ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 15551ae08745Sheppo 15563af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1557e1ebb9ecSlm66018 15581ae08745Sheppo /* 15591ae08745Sheppo * set the Session ID to a unique value 15601ae08745Sheppo * (the lower 32 bits of the clock tick) 15611ae08745Sheppo */ 15621ae08745Sheppo vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 15633af08d82Slm66018 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 15641ae08745Sheppo 15651ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 15661ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 15671ae08745Sheppo pkt.tag.vio_subtype_env = VIO_VER_INFO; 15681ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 15691ae08745Sheppo pkt.dev_class = VDEV_DISK; 15700a55fbb7Slm66018 pkt.ver_major = ver.major; 15710a55fbb7Slm66018 pkt.ver_minor = ver.minor; 15721ae08745Sheppo 15730a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 15743af08d82Slm66018 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 15753af08d82Slm66018 vdc->instance, status); 15761ae08745Sheppo if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 15773af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 15788cd10891Snarayan "id(%lx) rv(%d) size(%ld)", vdc->instance, 15798cd10891Snarayan vdc->curr_server->ldc_handle, status, msglen); 15801ae08745Sheppo if (msglen != sizeof (vio_ver_msg_t)) 15811ae08745Sheppo status = ENOMSG; 15821ae08745Sheppo } 15831ae08745Sheppo 15841ae08745Sheppo return (status); 15851ae08745Sheppo } 15861ae08745Sheppo 15870a55fbb7Slm66018 /* 15880a55fbb7Slm66018 * Function: 15893af08d82Slm66018 * vdc_ver_negotiation() 15903af08d82Slm66018 * 15913af08d82Slm66018 * Description: 15923af08d82Slm66018 * 15933af08d82Slm66018 * Arguments: 15943af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 15953af08d82Slm66018 * 15963af08d82Slm66018 * Return Code: 15973af08d82Slm66018 * 0 - Success 15983af08d82Slm66018 */ 15993af08d82Slm66018 static int 16003af08d82Slm66018 vdc_ver_negotiation(vdc_t *vdcp) 16013af08d82Slm66018 { 16023af08d82Slm66018 vio_msg_t vio_msg; 16033af08d82Slm66018 int status; 16043af08d82Slm66018 16053af08d82Slm66018 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 16063af08d82Slm66018 return (status); 16073af08d82Slm66018 16083af08d82Slm66018 /* release lock and wait for response */ 16093af08d82Slm66018 mutex_exit(&vdcp->lock); 16103af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 16113af08d82Slm66018 mutex_enter(&vdcp->lock); 16123af08d82Slm66018 if (status) { 16133af08d82Slm66018 DMSG(vdcp, 0, 16143af08d82Slm66018 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 16153af08d82Slm66018 vdcp->instance, status); 16163af08d82Slm66018 return (status); 16173af08d82Slm66018 } 16183af08d82Slm66018 16193af08d82Slm66018 /* check type and sub_type ... */ 16203af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 16213af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 16223af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 16233af08d82Slm66018 vdcp->instance); 16243af08d82Slm66018 return (EPROTO); 16253af08d82Slm66018 } 16263af08d82Slm66018 16273af08d82Slm66018 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 16283af08d82Slm66018 } 16293af08d82Slm66018 16303af08d82Slm66018 /* 16313af08d82Slm66018 * Function: 16320a55fbb7Slm66018 * vdc_init_attr_negotiation() 16330a55fbb7Slm66018 * 16340a55fbb7Slm66018 * Description: 16350a55fbb7Slm66018 * 16360a55fbb7Slm66018 * Arguments: 16370a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 16380a55fbb7Slm66018 * 16390a55fbb7Slm66018 * Return Code: 16400a55fbb7Slm66018 * 0 - Success 16410a55fbb7Slm66018 */ 16421ae08745Sheppo static int 16431ae08745Sheppo vdc_init_attr_negotiation(vdc_t *vdc) 16441ae08745Sheppo { 16451ae08745Sheppo vd_attr_msg_t pkt; 16461ae08745Sheppo size_t msglen = sizeof (pkt); 16471ae08745Sheppo int status; 16481ae08745Sheppo 16491ae08745Sheppo ASSERT(vdc != NULL); 16501ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 16511ae08745Sheppo 16523af08d82Slm66018 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 16531ae08745Sheppo 16541ae08745Sheppo /* fill in tag */ 16551ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 16561ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 16571ae08745Sheppo pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 16581ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 16591ae08745Sheppo /* fill in payload */ 16601ae08745Sheppo pkt.max_xfer_sz = vdc->max_xfer_sz; 16611ae08745Sheppo pkt.vdisk_block_size = vdc->block_size; 1662f0ca1d9aSsb155480 pkt.xfer_mode = VIO_DRING_MODE_V1_0; 16631ae08745Sheppo pkt.operations = 0; /* server will set bits of valid operations */ 16641ae08745Sheppo pkt.vdisk_type = 0; /* server will set to valid device type */ 166517cadca8Slm66018 pkt.vdisk_media = 0; /* server will set to valid media type */ 16661ae08745Sheppo pkt.vdisk_size = 0; /* server will set to valid size */ 16671ae08745Sheppo 16680a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 16693af08d82Slm66018 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 16701ae08745Sheppo 1671f3241e46Sanbui if ((status != 0) || (msglen != sizeof (vd_attr_msg_t))) { 16723af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 16738cd10891Snarayan "id(%lx) rv(%d) size(%ld)", vdc->instance, 16748cd10891Snarayan vdc->curr_server->ldc_handle, status, msglen); 1675f3241e46Sanbui if (msglen != sizeof (vd_attr_msg_t)) 16761ae08745Sheppo status = ENOMSG; 16771ae08745Sheppo } 16781ae08745Sheppo 16791ae08745Sheppo return (status); 16801ae08745Sheppo } 16811ae08745Sheppo 16820a55fbb7Slm66018 /* 16830a55fbb7Slm66018 * Function: 16843af08d82Slm66018 * vdc_attr_negotiation() 16853af08d82Slm66018 * 16863af08d82Slm66018 * Description: 16873af08d82Slm66018 * 16883af08d82Slm66018 * Arguments: 16893af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 16903af08d82Slm66018 * 16913af08d82Slm66018 * Return Code: 16923af08d82Slm66018 * 0 - Success 16933af08d82Slm66018 */ 16943af08d82Slm66018 static int 16953af08d82Slm66018 vdc_attr_negotiation(vdc_t *vdcp) 16963af08d82Slm66018 { 16973af08d82Slm66018 int status; 16983af08d82Slm66018 vio_msg_t vio_msg; 16993af08d82Slm66018 17003af08d82Slm66018 if (status = vdc_init_attr_negotiation(vdcp)) 17013af08d82Slm66018 return (status); 17023af08d82Slm66018 17033af08d82Slm66018 /* release lock and wait for response */ 17043af08d82Slm66018 mutex_exit(&vdcp->lock); 17053af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 17063af08d82Slm66018 mutex_enter(&vdcp->lock); 17073af08d82Slm66018 if (status) { 17083af08d82Slm66018 DMSG(vdcp, 0, 17093af08d82Slm66018 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 17103af08d82Slm66018 vdcp->instance, status); 17113af08d82Slm66018 return (status); 17123af08d82Slm66018 } 17133af08d82Slm66018 17143af08d82Slm66018 /* check type and sub_type ... */ 17153af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 17163af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 17173af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 17183af08d82Slm66018 vdcp->instance); 17193af08d82Slm66018 return (EPROTO); 17203af08d82Slm66018 } 17213af08d82Slm66018 17223af08d82Slm66018 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 17233af08d82Slm66018 } 17243af08d82Slm66018 17253af08d82Slm66018 17263af08d82Slm66018 /* 17273af08d82Slm66018 * Function: 17280a55fbb7Slm66018 * vdc_init_dring_negotiate() 17290a55fbb7Slm66018 * 17300a55fbb7Slm66018 * Description: 17310a55fbb7Slm66018 * 17320a55fbb7Slm66018 * Arguments: 17330a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 17340a55fbb7Slm66018 * 17350a55fbb7Slm66018 * Return Code: 17360a55fbb7Slm66018 * 0 - Success 17370a55fbb7Slm66018 */ 17381ae08745Sheppo static int 17391ae08745Sheppo vdc_init_dring_negotiate(vdc_t *vdc) 17401ae08745Sheppo { 17411ae08745Sheppo vio_dring_reg_msg_t pkt; 17421ae08745Sheppo size_t msglen = sizeof (pkt); 17431ae08745Sheppo int status = -1; 17443af08d82Slm66018 int retry; 17453af08d82Slm66018 int nretries = 10; 17461ae08745Sheppo 17471ae08745Sheppo ASSERT(vdc != NULL); 17481ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 17491ae08745Sheppo 17503af08d82Slm66018 for (retry = 0; retry < nretries; retry++) { 17511ae08745Sheppo status = vdc_init_descriptor_ring(vdc); 17523af08d82Slm66018 if (status != EAGAIN) 17533af08d82Slm66018 break; 17543af08d82Slm66018 drv_usecwait(vdc_min_timeout_ldc); 17553af08d82Slm66018 } 17563af08d82Slm66018 17571ae08745Sheppo if (status != 0) { 17583af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 17591ae08745Sheppo vdc->instance, status); 17601ae08745Sheppo return (status); 17611ae08745Sheppo } 17623af08d82Slm66018 17633af08d82Slm66018 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1764e1ebb9ecSlm66018 vdc->instance, status); 17651ae08745Sheppo 17661ae08745Sheppo /* fill in tag */ 17671ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 17681ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 17691ae08745Sheppo pkt.tag.vio_subtype_env = VIO_DRING_REG; 17701ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 17711ae08745Sheppo /* fill in payload */ 17721ae08745Sheppo pkt.dring_ident = 0; 1773e1ebb9ecSlm66018 pkt.num_descriptors = vdc->dring_len; 1774e1ebb9ecSlm66018 pkt.descriptor_size = vdc->dring_entry_size; 17751ae08745Sheppo pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 17761ae08745Sheppo pkt.ncookies = vdc->dring_cookie_count; 17771ae08745Sheppo pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 17781ae08745Sheppo 17790a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 17801ae08745Sheppo if (status != 0) { 17813af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1782e1ebb9ecSlm66018 vdc->instance, status); 17831ae08745Sheppo } 17841ae08745Sheppo 17851ae08745Sheppo return (status); 17861ae08745Sheppo } 17871ae08745Sheppo 17881ae08745Sheppo 17893af08d82Slm66018 /* 17903af08d82Slm66018 * Function: 17913af08d82Slm66018 * vdc_dring_negotiation() 17923af08d82Slm66018 * 17933af08d82Slm66018 * Description: 17943af08d82Slm66018 * 17953af08d82Slm66018 * Arguments: 17963af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 17973af08d82Slm66018 * 17983af08d82Slm66018 * Return Code: 17993af08d82Slm66018 * 0 - Success 18003af08d82Slm66018 */ 18013af08d82Slm66018 static int 18023af08d82Slm66018 vdc_dring_negotiation(vdc_t *vdcp) 18033af08d82Slm66018 { 18043af08d82Slm66018 int status; 18053af08d82Slm66018 vio_msg_t vio_msg; 18063af08d82Slm66018 18073af08d82Slm66018 if (status = vdc_init_dring_negotiate(vdcp)) 18083af08d82Slm66018 return (status); 18093af08d82Slm66018 18103af08d82Slm66018 /* release lock and wait for response */ 18113af08d82Slm66018 mutex_exit(&vdcp->lock); 18123af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 18133af08d82Slm66018 mutex_enter(&vdcp->lock); 18143af08d82Slm66018 if (status) { 18153af08d82Slm66018 DMSG(vdcp, 0, 18163af08d82Slm66018 "[%d] Failed waiting for Dring negotiation response," 18173af08d82Slm66018 " rv(%d)", vdcp->instance, status); 18183af08d82Slm66018 return (status); 18193af08d82Slm66018 } 18203af08d82Slm66018 18213af08d82Slm66018 /* check type and sub_type ... */ 18223af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 18233af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 18243af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 18253af08d82Slm66018 vdcp->instance); 18263af08d82Slm66018 return (EPROTO); 18273af08d82Slm66018 } 18283af08d82Slm66018 18293af08d82Slm66018 return (vdc_handle_dring_reg_msg(vdcp, 18303af08d82Slm66018 (vio_dring_reg_msg_t *)&vio_msg)); 18313af08d82Slm66018 } 18323af08d82Slm66018 18333af08d82Slm66018 18343af08d82Slm66018 /* 18353af08d82Slm66018 * Function: 18363af08d82Slm66018 * vdc_send_rdx() 18373af08d82Slm66018 * 18383af08d82Slm66018 * Description: 18393af08d82Slm66018 * 18403af08d82Slm66018 * Arguments: 18413af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 18423af08d82Slm66018 * 18433af08d82Slm66018 * Return Code: 18443af08d82Slm66018 * 0 - Success 18453af08d82Slm66018 */ 18463af08d82Slm66018 static int 18473af08d82Slm66018 vdc_send_rdx(vdc_t *vdcp) 18483af08d82Slm66018 { 18493af08d82Slm66018 vio_msg_t msg; 18503af08d82Slm66018 size_t msglen = sizeof (vio_msg_t); 18513af08d82Slm66018 int status; 18523af08d82Slm66018 18533af08d82Slm66018 /* 18543af08d82Slm66018 * Send an RDX message to vds to indicate we are ready 18553af08d82Slm66018 * to send data 18563af08d82Slm66018 */ 18573af08d82Slm66018 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 18583af08d82Slm66018 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 18593af08d82Slm66018 msg.tag.vio_subtype_env = VIO_RDX; 18603af08d82Slm66018 msg.tag.vio_sid = vdcp->session_id; 18613af08d82Slm66018 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 18623af08d82Slm66018 if (status != 0) { 18633af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 18643af08d82Slm66018 vdcp->instance, status); 18653af08d82Slm66018 } 18663af08d82Slm66018 18673af08d82Slm66018 return (status); 18683af08d82Slm66018 } 18693af08d82Slm66018 18703af08d82Slm66018 /* 18713af08d82Slm66018 * Function: 18723af08d82Slm66018 * vdc_handle_rdx() 18733af08d82Slm66018 * 18743af08d82Slm66018 * Description: 18753af08d82Slm66018 * 18763af08d82Slm66018 * Arguments: 18773af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 18783af08d82Slm66018 * msgp - received msg 18793af08d82Slm66018 * 18803af08d82Slm66018 * Return Code: 18813af08d82Slm66018 * 0 - Success 18823af08d82Slm66018 */ 18833af08d82Slm66018 static int 18843af08d82Slm66018 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 18853af08d82Slm66018 { 18863af08d82Slm66018 _NOTE(ARGUNUSED(vdcp)) 18873af08d82Slm66018 _NOTE(ARGUNUSED(msgp)) 18883af08d82Slm66018 18893af08d82Slm66018 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 18903af08d82Slm66018 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 18913af08d82Slm66018 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 18923af08d82Slm66018 18933af08d82Slm66018 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 18943af08d82Slm66018 18953af08d82Slm66018 return (0); 18963af08d82Slm66018 } 18973af08d82Slm66018 18983af08d82Slm66018 /* 18993af08d82Slm66018 * Function: 19003af08d82Slm66018 * vdc_rdx_exchange() 19013af08d82Slm66018 * 19023af08d82Slm66018 * Description: 19033af08d82Slm66018 * 19043af08d82Slm66018 * Arguments: 19053af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19063af08d82Slm66018 * 19073af08d82Slm66018 * Return Code: 19083af08d82Slm66018 * 0 - Success 19093af08d82Slm66018 */ 19103af08d82Slm66018 static int 19113af08d82Slm66018 vdc_rdx_exchange(vdc_t *vdcp) 19123af08d82Slm66018 { 19133af08d82Slm66018 int status; 19143af08d82Slm66018 vio_msg_t vio_msg; 19153af08d82Slm66018 19163af08d82Slm66018 if (status = vdc_send_rdx(vdcp)) 19173af08d82Slm66018 return (status); 19183af08d82Slm66018 19193af08d82Slm66018 /* release lock and wait for response */ 19203af08d82Slm66018 mutex_exit(&vdcp->lock); 19213af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 19223af08d82Slm66018 mutex_enter(&vdcp->lock); 19233af08d82Slm66018 if (status) { 192487a7269eSachartre DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 192587a7269eSachartre vdcp->instance, status); 19263af08d82Slm66018 return (status); 19273af08d82Slm66018 } 19283af08d82Slm66018 19293af08d82Slm66018 /* check type and sub_type ... */ 19303af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 19313af08d82Slm66018 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 193287a7269eSachartre DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 19333af08d82Slm66018 return (EPROTO); 19343af08d82Slm66018 } 19353af08d82Slm66018 19363af08d82Slm66018 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 19373af08d82Slm66018 } 19383af08d82Slm66018 19393af08d82Slm66018 19401ae08745Sheppo /* -------------------------------------------------------------------------- */ 19411ae08745Sheppo 19421ae08745Sheppo /* 19431ae08745Sheppo * LDC helper routines 19441ae08745Sheppo */ 19451ae08745Sheppo 19463af08d82Slm66018 static int 19473af08d82Slm66018 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 19483af08d82Slm66018 { 19493af08d82Slm66018 int status; 19503af08d82Slm66018 boolean_t q_has_pkts = B_FALSE; 195117cadca8Slm66018 uint64_t delay_time; 19523af08d82Slm66018 size_t len; 19533af08d82Slm66018 19543af08d82Slm66018 mutex_enter(&vdc->read_lock); 19553af08d82Slm66018 19563af08d82Slm66018 if (vdc->read_state == VDC_READ_IDLE) 19573af08d82Slm66018 vdc->read_state = VDC_READ_WAITING; 19583af08d82Slm66018 19593af08d82Slm66018 while (vdc->read_state != VDC_READ_PENDING) { 19603af08d82Slm66018 19613af08d82Slm66018 /* detect if the connection has been reset */ 19623af08d82Slm66018 if (vdc->read_state == VDC_READ_RESET) { 19633af08d82Slm66018 status = ECONNRESET; 19643af08d82Slm66018 goto done; 19653af08d82Slm66018 } 19663af08d82Slm66018 19673af08d82Slm66018 cv_wait(&vdc->read_cv, &vdc->read_lock); 19683af08d82Slm66018 } 19693af08d82Slm66018 19703af08d82Slm66018 /* 19713af08d82Slm66018 * Until we get a blocking ldc read we have to retry 19723af08d82Slm66018 * until the entire LDC message has arrived before 19733af08d82Slm66018 * ldc_read() will succeed. Note we also bail out if 1974eff7243fSlm66018 * the channel is reset or goes away. 19753af08d82Slm66018 */ 19763af08d82Slm66018 delay_time = vdc_ldc_read_init_delay; 19773af08d82Slm66018 loop: 19783af08d82Slm66018 len = *nbytesp; 19798cd10891Snarayan status = ldc_read(vdc->curr_server->ldc_handle, (caddr_t)msgp, &len); 19803af08d82Slm66018 switch (status) { 19813af08d82Slm66018 case EAGAIN: 19823af08d82Slm66018 delay_time *= 2; 19833af08d82Slm66018 if (delay_time >= vdc_ldc_read_max_delay) 19843af08d82Slm66018 delay_time = vdc_ldc_read_max_delay; 19853af08d82Slm66018 delay(delay_time); 19863af08d82Slm66018 goto loop; 19873af08d82Slm66018 19883af08d82Slm66018 case 0: 19893af08d82Slm66018 if (len == 0) { 199017cadca8Slm66018 DMSG(vdc, 1, "[%d] ldc_read returned 0 bytes with " 19913af08d82Slm66018 "no error!\n", vdc->instance); 19923af08d82Slm66018 goto loop; 19933af08d82Slm66018 } 19943af08d82Slm66018 19953af08d82Slm66018 *nbytesp = len; 19963af08d82Slm66018 19973af08d82Slm66018 /* 19983af08d82Slm66018 * If there are pending messages, leave the 19993af08d82Slm66018 * read state as pending. Otherwise, set the state 20003af08d82Slm66018 * back to idle. 20013af08d82Slm66018 */ 20028cd10891Snarayan status = ldc_chkq(vdc->curr_server->ldc_handle, &q_has_pkts); 20033af08d82Slm66018 if (status == 0 && !q_has_pkts) 20043af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 20053af08d82Slm66018 20063af08d82Slm66018 break; 20073af08d82Slm66018 default: 20083af08d82Slm66018 DMSG(vdc, 0, "ldc_read returned %d\n", status); 20093af08d82Slm66018 break; 20103af08d82Slm66018 } 20113af08d82Slm66018 20123af08d82Slm66018 done: 20133af08d82Slm66018 mutex_exit(&vdc->read_lock); 20143af08d82Slm66018 20153af08d82Slm66018 return (status); 20163af08d82Slm66018 } 20173af08d82Slm66018 20183af08d82Slm66018 20193af08d82Slm66018 20203af08d82Slm66018 #ifdef DEBUG 20213af08d82Slm66018 void 20223af08d82Slm66018 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 20233af08d82Slm66018 { 20243af08d82Slm66018 char *ms, *ss, *ses; 20253af08d82Slm66018 switch (msg->tag.vio_msgtype) { 20263af08d82Slm66018 #define Q(_s) case _s : ms = #_s; break; 20273af08d82Slm66018 Q(VIO_TYPE_CTRL) 20283af08d82Slm66018 Q(VIO_TYPE_DATA) 20293af08d82Slm66018 Q(VIO_TYPE_ERR) 20303af08d82Slm66018 #undef Q 20313af08d82Slm66018 default: ms = "unknown"; break; 20323af08d82Slm66018 } 20333af08d82Slm66018 20343af08d82Slm66018 switch (msg->tag.vio_subtype) { 20353af08d82Slm66018 #define Q(_s) case _s : ss = #_s; break; 20363af08d82Slm66018 Q(VIO_SUBTYPE_INFO) 20373af08d82Slm66018 Q(VIO_SUBTYPE_ACK) 20383af08d82Slm66018 Q(VIO_SUBTYPE_NACK) 20393af08d82Slm66018 #undef Q 20403af08d82Slm66018 default: ss = "unknown"; break; 20413af08d82Slm66018 } 20423af08d82Slm66018 20433af08d82Slm66018 switch (msg->tag.vio_subtype_env) { 20443af08d82Slm66018 #define Q(_s) case _s : ses = #_s; break; 20453af08d82Slm66018 Q(VIO_VER_INFO) 20463af08d82Slm66018 Q(VIO_ATTR_INFO) 20473af08d82Slm66018 Q(VIO_DRING_REG) 20483af08d82Slm66018 Q(VIO_DRING_UNREG) 20493af08d82Slm66018 Q(VIO_RDX) 20503af08d82Slm66018 Q(VIO_PKT_DATA) 20513af08d82Slm66018 Q(VIO_DESC_DATA) 20523af08d82Slm66018 Q(VIO_DRING_DATA) 20533af08d82Slm66018 #undef Q 20543af08d82Slm66018 default: ses = "unknown"; break; 20553af08d82Slm66018 } 20563af08d82Slm66018 20573af08d82Slm66018 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 20583af08d82Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 20593af08d82Slm66018 msg->tag.vio_subtype_env, ms, ss, ses); 20603af08d82Slm66018 } 20613af08d82Slm66018 #endif 20623af08d82Slm66018 20631ae08745Sheppo /* 20641ae08745Sheppo * Function: 20651ae08745Sheppo * vdc_send() 20661ae08745Sheppo * 20671ae08745Sheppo * Description: 20681ae08745Sheppo * The function encapsulates the call to write a message using LDC. 20691ae08745Sheppo * If LDC indicates that the call failed due to the queue being full, 207017cadca8Slm66018 * we retry the ldc_write(), otherwise we return the error returned by LDC. 20711ae08745Sheppo * 20721ae08745Sheppo * Arguments: 20731ae08745Sheppo * ldc_handle - LDC handle for the channel this instance of vdc uses 20741ae08745Sheppo * pkt - address of LDC message to be sent 20751ae08745Sheppo * msglen - the size of the message being sent. When the function 20761ae08745Sheppo * returns, this contains the number of bytes written. 20771ae08745Sheppo * 20781ae08745Sheppo * Return Code: 20791ae08745Sheppo * 0 - Success. 20801ae08745Sheppo * EINVAL - pkt or msglen were NULL 20811ae08745Sheppo * ECONNRESET - The connection was not up. 20821ae08745Sheppo * EWOULDBLOCK - LDC queue is full 20831ae08745Sheppo * xxx - other error codes returned by ldc_write 20841ae08745Sheppo */ 20851ae08745Sheppo static int 20860a55fbb7Slm66018 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 20871ae08745Sheppo { 20881ae08745Sheppo size_t size = 0; 20891ae08745Sheppo int status = 0; 20903af08d82Slm66018 clock_t delay_ticks; 20911ae08745Sheppo 20920a55fbb7Slm66018 ASSERT(vdc != NULL); 20930a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 20941ae08745Sheppo ASSERT(msglen != NULL); 20951ae08745Sheppo ASSERT(*msglen != 0); 20961ae08745Sheppo 20973af08d82Slm66018 #ifdef DEBUG 209817cadca8Slm66018 vdc_decode_tag(vdc, (vio_msg_t *)(uintptr_t)pkt); 20993af08d82Slm66018 #endif 21003af08d82Slm66018 /* 21013af08d82Slm66018 * Wait indefinitely to send if channel 21023af08d82Slm66018 * is busy, but bail out if we succeed or 21033af08d82Slm66018 * if the channel closes or is reset. 21043af08d82Slm66018 */ 21053af08d82Slm66018 delay_ticks = vdc_hz_min_ldc_delay; 21061ae08745Sheppo do { 21071ae08745Sheppo size = *msglen; 21088cd10891Snarayan status = ldc_write(vdc->curr_server->ldc_handle, pkt, &size); 21093af08d82Slm66018 if (status == EWOULDBLOCK) { 21103af08d82Slm66018 delay(delay_ticks); 21113af08d82Slm66018 /* geometric backoff */ 21123af08d82Slm66018 delay_ticks *= 2; 21133af08d82Slm66018 if (delay_ticks > vdc_hz_max_ldc_delay) 21143af08d82Slm66018 delay_ticks = vdc_hz_max_ldc_delay; 21153af08d82Slm66018 } 21163af08d82Slm66018 } while (status == EWOULDBLOCK); 21171ae08745Sheppo 21180a55fbb7Slm66018 /* if LDC had serious issues --- reset vdc state */ 21190a55fbb7Slm66018 if (status == EIO || status == ECONNRESET) { 21203af08d82Slm66018 /* LDC had serious issues --- reset vdc state */ 21213af08d82Slm66018 mutex_enter(&vdc->read_lock); 21223af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 21233af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 21243af08d82Slm66018 cv_signal(&vdc->read_cv); 21253af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 21263af08d82Slm66018 mutex_exit(&vdc->read_lock); 21273af08d82Slm66018 21283af08d82Slm66018 /* wake up any waiters in the reset thread */ 21293af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 21303af08d82Slm66018 DMSG(vdc, 0, "[%d] write reset - " 21313af08d82Slm66018 "vdc is resetting ..\n", vdc->instance); 21323af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 21333af08d82Slm66018 cv_signal(&vdc->initwait_cv); 21343af08d82Slm66018 } 21353af08d82Slm66018 21363af08d82Slm66018 return (ECONNRESET); 21370a55fbb7Slm66018 } 21380a55fbb7Slm66018 21391ae08745Sheppo /* return the last size written */ 21401ae08745Sheppo *msglen = size; 21411ae08745Sheppo 21421ae08745Sheppo return (status); 21431ae08745Sheppo } 21441ae08745Sheppo 21451ae08745Sheppo /* 21461ae08745Sheppo * Function: 2147655fd6a9Sachartre * vdc_get_md_node 21481ae08745Sheppo * 21491ae08745Sheppo * Description: 21508cd10891Snarayan * Get the MD, the device node for the given disk instance. The 21518cd10891Snarayan * caller is responsible for cleaning up the reference to the 21528cd10891Snarayan * returned MD (mdpp) by calling md_fini_handle(). 21531ae08745Sheppo * 21541ae08745Sheppo * Arguments: 21551ae08745Sheppo * dip - dev info pointer for this instance of the device driver. 2156655fd6a9Sachartre * mdpp - the returned MD. 2157655fd6a9Sachartre * vd_nodep - the returned device node. 21581ae08745Sheppo * 21591ae08745Sheppo * Return Code: 21601ae08745Sheppo * 0 - Success. 21611ae08745Sheppo * ENOENT - Expected node or property did not exist. 21621ae08745Sheppo * ENXIO - Unexpected error communicating with MD framework 21631ae08745Sheppo */ 21641ae08745Sheppo static int 21658cd10891Snarayan vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep) 21661ae08745Sheppo { 21671ae08745Sheppo int status = ENOENT; 21681ae08745Sheppo char *node_name = NULL; 21691ae08745Sheppo md_t *mdp = NULL; 21701ae08745Sheppo int num_nodes; 21711ae08745Sheppo int num_vdevs; 21721ae08745Sheppo mde_cookie_t rootnode; 21731ae08745Sheppo mde_cookie_t *listp = NULL; 21741ae08745Sheppo boolean_t found_inst = B_FALSE; 21751ae08745Sheppo int listsz; 21761ae08745Sheppo int idx; 21771ae08745Sheppo uint64_t md_inst; 21781ae08745Sheppo int obp_inst; 21791ae08745Sheppo int instance = ddi_get_instance(dip); 21801ae08745Sheppo 21811ae08745Sheppo /* 21821ae08745Sheppo * Get the OBP instance number for comparison with the MD instance 21831ae08745Sheppo * 21841ae08745Sheppo * The "cfg-handle" property of a vdc node in an MD contains the MD's 21851ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 21861ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 21871ae08745Sheppo * the "reg" property on the node in the device tree it builds from 21881ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 21891ae08745Sheppo * "reg" property value to uniquely identify this device instance. 21901ae08745Sheppo * If the "reg" property cannot be found, the device tree state is 21911ae08745Sheppo * presumably so broken that there is no point in continuing. 21921ae08745Sheppo */ 21931ae08745Sheppo if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 21941ae08745Sheppo cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 21951ae08745Sheppo return (ENOENT); 21961ae08745Sheppo } 21971ae08745Sheppo obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 21981ae08745Sheppo OBP_REG, -1); 21993af08d82Slm66018 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 22001ae08745Sheppo 22011ae08745Sheppo /* 2202655fd6a9Sachartre * We now walk the MD nodes to find the node for this vdisk. 22031ae08745Sheppo */ 22041ae08745Sheppo if ((mdp = md_get_handle()) == NULL) { 22051ae08745Sheppo cmn_err(CE_WARN, "unable to init machine description"); 22061ae08745Sheppo return (ENXIO); 22071ae08745Sheppo } 22081ae08745Sheppo 22091ae08745Sheppo num_nodes = md_node_count(mdp); 22101ae08745Sheppo ASSERT(num_nodes > 0); 22111ae08745Sheppo 22121ae08745Sheppo listsz = num_nodes * sizeof (mde_cookie_t); 22131ae08745Sheppo 22141ae08745Sheppo /* allocate memory for nodes */ 22151ae08745Sheppo listp = kmem_zalloc(listsz, KM_SLEEP); 22161ae08745Sheppo 22171ae08745Sheppo rootnode = md_root_node(mdp); 22181ae08745Sheppo ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 22191ae08745Sheppo 22201ae08745Sheppo /* 22211ae08745Sheppo * Search for all the virtual devices, we will then check to see which 22221ae08745Sheppo * ones are disk nodes. 22231ae08745Sheppo */ 22241ae08745Sheppo num_vdevs = md_scan_dag(mdp, rootnode, 22251ae08745Sheppo md_find_name(mdp, VDC_MD_VDEV_NAME), 22261ae08745Sheppo md_find_name(mdp, "fwd"), listp); 22271ae08745Sheppo 22281ae08745Sheppo if (num_vdevs <= 0) { 22291ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 22301ae08745Sheppo status = ENOENT; 22311ae08745Sheppo goto done; 22321ae08745Sheppo } 22331ae08745Sheppo 22343af08d82Slm66018 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 22351ae08745Sheppo for (idx = 0; idx < num_vdevs; idx++) { 22361ae08745Sheppo status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 22371ae08745Sheppo if ((status != 0) || (node_name == NULL)) { 22381ae08745Sheppo cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 22391ae08745Sheppo ": err %d", VDC_MD_VDEV_NAME, status); 22401ae08745Sheppo continue; 22411ae08745Sheppo } 22421ae08745Sheppo 22433af08d82Slm66018 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 22441ae08745Sheppo if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 22451ae08745Sheppo status = md_get_prop_val(mdp, listp[idx], 22461ae08745Sheppo VDC_MD_CFG_HDL, &md_inst); 22473af08d82Slm66018 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 22483af08d82Slm66018 instance, md_inst); 22491ae08745Sheppo if ((status == 0) && (md_inst == obp_inst)) { 22501ae08745Sheppo found_inst = B_TRUE; 22511ae08745Sheppo break; 22521ae08745Sheppo } 22531ae08745Sheppo } 22541ae08745Sheppo } 22551ae08745Sheppo 22560a55fbb7Slm66018 if (!found_inst) { 22573af08d82Slm66018 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 22581ae08745Sheppo status = ENOENT; 22591ae08745Sheppo goto done; 22601ae08745Sheppo } 22613af08d82Slm66018 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 22621ae08745Sheppo 2263655fd6a9Sachartre *vd_nodep = listp[idx]; 2264655fd6a9Sachartre *mdpp = mdp; 2265655fd6a9Sachartre done: 2266655fd6a9Sachartre kmem_free(listp, listsz); 2267655fd6a9Sachartre return (status); 2268655fd6a9Sachartre } 2269655fd6a9Sachartre 2270655fd6a9Sachartre /* 2271655fd6a9Sachartre * Function: 22728cd10891Snarayan * vdc_init_ports 2273655fd6a9Sachartre * 2274655fd6a9Sachartre * Description: 22758cd10891Snarayan * Initialize all the ports for this vdisk instance. 2276655fd6a9Sachartre * 2277655fd6a9Sachartre * Arguments: 22788cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 22798cd10891Snarayan * mdp - md pointer 22808cd10891Snarayan * vd_nodep - device md node. 2281655fd6a9Sachartre * 2282655fd6a9Sachartre * Return Code: 2283655fd6a9Sachartre * 0 - Success. 2284655fd6a9Sachartre * ENOENT - Expected node or property did not exist. 2285655fd6a9Sachartre */ 2286655fd6a9Sachartre static int 22878cd10891Snarayan vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep) 2288655fd6a9Sachartre { 2289655fd6a9Sachartre int status = 0; 22908cd10891Snarayan int idx; 22918cd10891Snarayan int num_nodes; 22928cd10891Snarayan int num_vports; 22938cd10891Snarayan int num_chans; 22948cd10891Snarayan int listsz; 22958cd10891Snarayan mde_cookie_t vd_port; 22968cd10891Snarayan mde_cookie_t *chanp = NULL; 22978cd10891Snarayan mde_cookie_t *portp = NULL; 22988cd10891Snarayan vdc_server_t *srvr; 22998cd10891Snarayan vdc_server_t *prev_srvr = NULL; 2300655fd6a9Sachartre 23018cd10891Snarayan /* 23028cd10891Snarayan * We now walk the MD nodes to find the port nodes for this vdisk. 23038cd10891Snarayan */ 2304655fd6a9Sachartre num_nodes = md_node_count(mdp); 2305655fd6a9Sachartre ASSERT(num_nodes > 0); 2306655fd6a9Sachartre 2307655fd6a9Sachartre listsz = num_nodes * sizeof (mde_cookie_t); 2308655fd6a9Sachartre 2309655fd6a9Sachartre /* allocate memory for nodes */ 23108cd10891Snarayan portp = kmem_zalloc(listsz, KM_SLEEP); 2311655fd6a9Sachartre chanp = kmem_zalloc(listsz, KM_SLEEP); 2312655fd6a9Sachartre 23138cd10891Snarayan num_vports = md_scan_dag(mdp, vd_nodep, 23148cd10891Snarayan md_find_name(mdp, VDC_MD_PORT_NAME), 23158cd10891Snarayan md_find_name(mdp, "fwd"), portp); 23168cd10891Snarayan if (num_vports == 0) { 23178cd10891Snarayan DMSGX(0, "Found no '%s' node for '%s' port\n", 23188cd10891Snarayan VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 23198cd10891Snarayan status = ENOENT; 23208cd10891Snarayan goto done; 23218cd10891Snarayan } 23228cd10891Snarayan 23238cd10891Snarayan DMSGX(1, "Found %d '%s' node(s) for '%s' port\n", 23248cd10891Snarayan num_vports, VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 23258cd10891Snarayan 23268cd10891Snarayan vdc->num_servers = 0; 23278cd10891Snarayan for (idx = 0; idx < num_vports; idx++) { 23288cd10891Snarayan 23298cd10891Snarayan /* initialize this port */ 23308cd10891Snarayan vd_port = portp[idx]; 23318cd10891Snarayan srvr = kmem_zalloc(sizeof (vdc_server_t), KM_SLEEP); 23328cd10891Snarayan srvr->vdcp = vdc; 23338cd10891Snarayan 23348cd10891Snarayan /* get port id */ 23358cd10891Snarayan if (md_get_prop_val(mdp, vd_port, VDC_MD_ID, &srvr->id) != 0) { 23368cd10891Snarayan cmn_err(CE_NOTE, "vDisk port '%s' property not found", 23378cd10891Snarayan VDC_MD_ID); 23388cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 23398cd10891Snarayan continue; 23408cd10891Snarayan } 23418cd10891Snarayan 23428cd10891Snarayan /* set the connection timeout */ 23438cd10891Snarayan if (md_get_prop_val(mdp, vd_port, VDC_MD_TIMEOUT, 23448cd10891Snarayan &srvr->ctimeout) != 0) { 23458cd10891Snarayan srvr->ctimeout = 0; 23468cd10891Snarayan } 23478cd10891Snarayan 23488cd10891Snarayan /* get the ldc id */ 23498cd10891Snarayan num_chans = md_scan_dag(mdp, vd_port, 23501ae08745Sheppo md_find_name(mdp, VDC_MD_CHAN_NAME), 23511ae08745Sheppo md_find_name(mdp, "fwd"), chanp); 23521ae08745Sheppo 23531ae08745Sheppo /* expecting at least one channel */ 23541ae08745Sheppo if (num_chans <= 0) { 23551ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node for '%s' port", 23561ae08745Sheppo VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 23578cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 23588cd10891Snarayan continue; 23591ae08745Sheppo } else if (num_chans != 1) { 23608cd10891Snarayan DMSGX(0, "Expected 1 '%s' node for '%s' port, " 23618cd10891Snarayan "found %d\n", VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 23628cd10891Snarayan num_chans); 23631ae08745Sheppo } 23641ae08745Sheppo 23651ae08745Sheppo /* 23661ae08745Sheppo * We use the first channel found (index 0), irrespective of how 23671ae08745Sheppo * many are there in total. 23681ae08745Sheppo */ 23698cd10891Snarayan if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, 23708cd10891Snarayan &srvr->ldc_id) != 0) { 23718cd10891Snarayan cmn_err(CE_NOTE, "Channel '%s' property not found", 23728cd10891Snarayan VDC_MD_ID); 23738cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 23748cd10891Snarayan continue; 23758cd10891Snarayan } 23768cd10891Snarayan 23778cd10891Snarayan /* 23788cd10891Snarayan * now initialise LDC channel which will be used to 23798cd10891Snarayan * communicate with this server 23808cd10891Snarayan */ 23818cd10891Snarayan if (vdc_do_ldc_init(vdc, srvr) != 0) { 23828cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 23838cd10891Snarayan continue; 23848cd10891Snarayan } 23858cd10891Snarayan 23868cd10891Snarayan /* add server to list */ 2387d7400d00Sachartre if (prev_srvr) 23888cd10891Snarayan prev_srvr->next = srvr; 2389d7400d00Sachartre else 23908cd10891Snarayan vdc->server_list = srvr; 2391d7400d00Sachartre 23928cd10891Snarayan prev_srvr = srvr; 23938cd10891Snarayan 23948cd10891Snarayan /* inc numbers of servers */ 23958cd10891Snarayan vdc->num_servers++; 23968cd10891Snarayan } 23978cd10891Snarayan 23988cd10891Snarayan /* 23998cd10891Snarayan * Adjust the max number of handshake retries to match 24008cd10891Snarayan * the number of vdisk servers. 24018cd10891Snarayan */ 24028cd10891Snarayan if (vdc_hshake_retries < vdc->num_servers) 24038cd10891Snarayan vdc_hshake_retries = vdc->num_servers; 24048cd10891Snarayan 24058cd10891Snarayan /* pick first server as current server */ 24068cd10891Snarayan if (vdc->server_list != NULL) { 24078cd10891Snarayan vdc->curr_server = vdc->server_list; 24088cd10891Snarayan status = 0; 24098cd10891Snarayan } else { 24101ae08745Sheppo status = ENOENT; 24111ae08745Sheppo } 24121ae08745Sheppo 24131ae08745Sheppo done: 24141ae08745Sheppo kmem_free(chanp, listsz); 24158cd10891Snarayan kmem_free(portp, listsz); 24161ae08745Sheppo return (status); 24171ae08745Sheppo } 24181ae08745Sheppo 24198cd10891Snarayan 24208cd10891Snarayan /* 24218cd10891Snarayan * Function: 24228cd10891Snarayan * vdc_do_ldc_up 24238cd10891Snarayan * 24248cd10891Snarayan * Description: 24258cd10891Snarayan * Bring the channel for the current server up. 24268cd10891Snarayan * 24278cd10891Snarayan * Arguments: 24288cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 24298cd10891Snarayan * 24308cd10891Snarayan * Return Code: 24318cd10891Snarayan * 0 - Success. 24328cd10891Snarayan * EINVAL - Driver is detaching / LDC error 24338cd10891Snarayan * ECONNREFUSED - Other end is not listening 24348cd10891Snarayan */ 24350a55fbb7Slm66018 static int 24360a55fbb7Slm66018 vdc_do_ldc_up(vdc_t *vdc) 24370a55fbb7Slm66018 { 24380a55fbb7Slm66018 int status; 24393af08d82Slm66018 ldc_status_t ldc_state; 24400a55fbb7Slm66018 24418cd10891Snarayan ASSERT(MUTEX_HELD(&vdc->lock)); 24428cd10891Snarayan 24433af08d82Slm66018 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 24448cd10891Snarayan vdc->instance, vdc->curr_server->ldc_id); 24453af08d82Slm66018 24463af08d82Slm66018 if (vdc->lifecycle == VDC_LC_DETACHING) 24473af08d82Slm66018 return (EINVAL); 24480a55fbb7Slm66018 24498cd10891Snarayan if ((status = ldc_up(vdc->curr_server->ldc_handle)) != 0) { 24500a55fbb7Slm66018 switch (status) { 24510a55fbb7Slm66018 case ECONNREFUSED: /* listener not ready at other end */ 24523af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 24538cd10891Snarayan vdc->instance, vdc->curr_server->ldc_id, status); 24540a55fbb7Slm66018 status = 0; 24550a55fbb7Slm66018 break; 24560a55fbb7Slm66018 default: 24573af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 24588cd10891Snarayan "channel=%ld, err=%d", vdc->instance, 24598cd10891Snarayan vdc->curr_server->ldc_id, status); 24603af08d82Slm66018 break; 24613af08d82Slm66018 } 24623af08d82Slm66018 } 24633af08d82Slm66018 24648cd10891Snarayan if (ldc_status(vdc->curr_server->ldc_handle, &ldc_state) == 0) { 24658cd10891Snarayan vdc->curr_server->ldc_state = ldc_state; 24663af08d82Slm66018 if (ldc_state == LDC_UP) { 24673af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC channel already up\n", 24683af08d82Slm66018 vdc->instance); 24693af08d82Slm66018 vdc->seq_num = 1; 24703af08d82Slm66018 vdc->seq_num_reply = 0; 24710a55fbb7Slm66018 } 24720a55fbb7Slm66018 } 24730a55fbb7Slm66018 24740a55fbb7Slm66018 return (status); 24750a55fbb7Slm66018 } 24760a55fbb7Slm66018 24770a55fbb7Slm66018 /* 24780a55fbb7Slm66018 * Function: 24790a55fbb7Slm66018 * vdc_terminate_ldc() 24800a55fbb7Slm66018 * 24810a55fbb7Slm66018 * Description: 24820a55fbb7Slm66018 * 24830a55fbb7Slm66018 * Arguments: 24840a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 24858cd10891Snarayan * srvr - vdc per-server info structure 24860a55fbb7Slm66018 * 24870a55fbb7Slm66018 * Return Code: 24880a55fbb7Slm66018 * None 24890a55fbb7Slm66018 */ 24901ae08745Sheppo static void 24918cd10891Snarayan vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr) 24921ae08745Sheppo { 24931ae08745Sheppo int instance = ddi_get_instance(vdc->dip); 24941ae08745Sheppo 24958cd10891Snarayan if (srvr->state & VDC_LDC_OPEN) { 24968cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 24978cd10891Snarayan (void) ldc_close(srvr->ldc_handle); 24988cd10891Snarayan } 24998cd10891Snarayan if (srvr->state & VDC_LDC_CB) { 25008cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 25018cd10891Snarayan (void) ldc_unreg_callback(srvr->ldc_handle); 25028cd10891Snarayan } 25038cd10891Snarayan if (srvr->state & VDC_LDC_INIT) { 25048cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 25058cd10891Snarayan (void) ldc_fini(srvr->ldc_handle); 25068cd10891Snarayan srvr->ldc_handle = NULL; 25078cd10891Snarayan } 25088cd10891Snarayan 25098cd10891Snarayan srvr->state &= ~(VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN); 25108cd10891Snarayan } 25118cd10891Snarayan 25128cd10891Snarayan /* 25138cd10891Snarayan * Function: 25148cd10891Snarayan * vdc_fini_ports() 25158cd10891Snarayan * 25168cd10891Snarayan * Description: 25178cd10891Snarayan * Finalize all ports by closing the channel associated with each 25188cd10891Snarayan * port and also freeing the server structure. 25198cd10891Snarayan * 25208cd10891Snarayan * Arguments: 25218cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 25228cd10891Snarayan * 25238cd10891Snarayan * Return Code: 25248cd10891Snarayan * None 25258cd10891Snarayan */ 25268cd10891Snarayan static void 25278cd10891Snarayan vdc_fini_ports(vdc_t *vdc) 25288cd10891Snarayan { 25298cd10891Snarayan int instance = ddi_get_instance(vdc->dip); 25308cd10891Snarayan vdc_server_t *srvr, *prev_srvr; 25318cd10891Snarayan 25321ae08745Sheppo ASSERT(vdc != NULL); 25331ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 25341ae08745Sheppo 25353af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 25361ae08745Sheppo 25378cd10891Snarayan srvr = vdc->server_list; 25388cd10891Snarayan 25398cd10891Snarayan while (srvr) { 25408cd10891Snarayan 25418cd10891Snarayan vdc_terminate_ldc(vdc, srvr); 25428cd10891Snarayan 25438cd10891Snarayan /* next server */ 25448cd10891Snarayan prev_srvr = srvr; 25458cd10891Snarayan srvr = srvr->next; 25468cd10891Snarayan 25478cd10891Snarayan /* free server */ 25488cd10891Snarayan kmem_free(prev_srvr, sizeof (vdc_server_t)); 25491ae08745Sheppo } 25501ae08745Sheppo 25518cd10891Snarayan vdc->server_list = NULL; 25521ae08745Sheppo } 25531ae08745Sheppo 25541ae08745Sheppo /* -------------------------------------------------------------------------- */ 25551ae08745Sheppo 25561ae08745Sheppo /* 25571ae08745Sheppo * Descriptor Ring helper routines 25581ae08745Sheppo */ 25591ae08745Sheppo 25600a55fbb7Slm66018 /* 25610a55fbb7Slm66018 * Function: 25620a55fbb7Slm66018 * vdc_init_descriptor_ring() 25630a55fbb7Slm66018 * 25640a55fbb7Slm66018 * Description: 25650a55fbb7Slm66018 * 25660a55fbb7Slm66018 * Arguments: 25670a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 25680a55fbb7Slm66018 * 25690a55fbb7Slm66018 * Return Code: 25700a55fbb7Slm66018 * 0 - Success 25710a55fbb7Slm66018 */ 25721ae08745Sheppo static int 25731ae08745Sheppo vdc_init_descriptor_ring(vdc_t *vdc) 25741ae08745Sheppo { 25751ae08745Sheppo vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 25760a55fbb7Slm66018 int status = 0; 25771ae08745Sheppo int i; 25781ae08745Sheppo 25793af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 25801ae08745Sheppo 25811ae08745Sheppo ASSERT(vdc != NULL); 25821ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 25831ae08745Sheppo 2584e1ebb9ecSlm66018 /* ensure we have enough room to store max sized block */ 2585e1ebb9ecSlm66018 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2586e1ebb9ecSlm66018 25870a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 25883af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2589e1ebb9ecSlm66018 /* 2590e1ebb9ecSlm66018 * Calculate the maximum block size we can transmit using one 2591e1ebb9ecSlm66018 * Descriptor Ring entry from the attributes returned by the 2592e1ebb9ecSlm66018 * vDisk server. This is subject to a minimum of 'maxphys' 2593e1ebb9ecSlm66018 * as we do not have the capability to split requests over 2594e1ebb9ecSlm66018 * multiple DRing entries. 2595e1ebb9ecSlm66018 */ 2596e1ebb9ecSlm66018 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 25973af08d82Slm66018 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2598e1ebb9ecSlm66018 vdc->instance); 2599e1ebb9ecSlm66018 vdc->dring_max_cookies = maxphys / PAGESIZE; 2600e1ebb9ecSlm66018 } else { 2601e1ebb9ecSlm66018 vdc->dring_max_cookies = 2602e1ebb9ecSlm66018 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2603e1ebb9ecSlm66018 } 2604e1ebb9ecSlm66018 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2605e1ebb9ecSlm66018 (sizeof (ldc_mem_cookie_t) * 2606e1ebb9ecSlm66018 (vdc->dring_max_cookies - 1))); 2607e1ebb9ecSlm66018 vdc->dring_len = VD_DRING_LEN; 2608e1ebb9ecSlm66018 2609e1ebb9ecSlm66018 status = ldc_mem_dring_create(vdc->dring_len, 26108cd10891Snarayan vdc->dring_entry_size, &vdc->dring_hdl); 26118cd10891Snarayan if ((vdc->dring_hdl == NULL) || (status != 0)) { 26123af08d82Slm66018 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2613e1ebb9ecSlm66018 vdc->instance); 26141ae08745Sheppo return (status); 26151ae08745Sheppo } 26160a55fbb7Slm66018 vdc->initialized |= VDC_DRING_INIT; 26170a55fbb7Slm66018 } 26181ae08745Sheppo 26190a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 26203af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 26210a55fbb7Slm66018 vdc->dring_cookie = 26220a55fbb7Slm66018 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 26231ae08745Sheppo 26248cd10891Snarayan status = ldc_mem_dring_bind(vdc->curr_server->ldc_handle, 26258cd10891Snarayan vdc->dring_hdl, 26264bac2208Snarayan LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 26270a55fbb7Slm66018 &vdc->dring_cookie[0], 26281ae08745Sheppo &vdc->dring_cookie_count); 26291ae08745Sheppo if (status != 0) { 26303af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 26313af08d82Slm66018 "(%lx) to channel (%lx) status=%d\n", 26328cd10891Snarayan vdc->instance, vdc->dring_hdl, 26338cd10891Snarayan vdc->curr_server->ldc_handle, status); 26341ae08745Sheppo return (status); 26351ae08745Sheppo } 26361ae08745Sheppo ASSERT(vdc->dring_cookie_count == 1); 26371ae08745Sheppo vdc->initialized |= VDC_DRING_BOUND; 26380a55fbb7Slm66018 } 26391ae08745Sheppo 26408cd10891Snarayan status = ldc_mem_dring_info(vdc->dring_hdl, &vdc->dring_mem_info); 26411ae08745Sheppo if (status != 0) { 26423af08d82Slm66018 DMSG(vdc, 0, 26433af08d82Slm66018 "[%d] Failed to get info for descriptor ring (%lx)\n", 26448cd10891Snarayan vdc->instance, vdc->dring_hdl); 26451ae08745Sheppo return (status); 26461ae08745Sheppo } 26471ae08745Sheppo 26480a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 26493af08d82Slm66018 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 26500a55fbb7Slm66018 26511ae08745Sheppo /* Allocate the local copy of this dring */ 26520a55fbb7Slm66018 vdc->local_dring = 2653e1ebb9ecSlm66018 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 26541ae08745Sheppo KM_SLEEP); 26551ae08745Sheppo vdc->initialized |= VDC_DRING_LOCAL; 26560a55fbb7Slm66018 } 26571ae08745Sheppo 26581ae08745Sheppo /* 26590a55fbb7Slm66018 * Mark all DRing entries as free and initialize the private 26600a55fbb7Slm66018 * descriptor's memory handles. If any entry is initialized, 26610a55fbb7Slm66018 * we need to free it later so we set the bit in 'initialized' 26620a55fbb7Slm66018 * at the start. 26631ae08745Sheppo */ 26641ae08745Sheppo vdc->initialized |= VDC_DRING_ENTRY; 2665e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 26661ae08745Sheppo dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 26671ae08745Sheppo dep->hdr.dstate = VIO_DESC_FREE; 26681ae08745Sheppo 26698cd10891Snarayan status = ldc_mem_alloc_handle(vdc->curr_server->ldc_handle, 26701ae08745Sheppo &vdc->local_dring[i].desc_mhdl); 26711ae08745Sheppo if (status != 0) { 26723af08d82Slm66018 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 26731ae08745Sheppo " descriptor %d", vdc->instance, i); 26741ae08745Sheppo return (status); 26751ae08745Sheppo } 26763af08d82Slm66018 vdc->local_dring[i].is_free = B_TRUE; 26771ae08745Sheppo vdc->local_dring[i].dep = dep; 26781ae08745Sheppo } 26791ae08745Sheppo 26803af08d82Slm66018 /* Initialize the starting index */ 26813af08d82Slm66018 vdc->dring_curr_idx = 0; 26821ae08745Sheppo 26831ae08745Sheppo return (status); 26841ae08745Sheppo } 26851ae08745Sheppo 26860a55fbb7Slm66018 /* 26870a55fbb7Slm66018 * Function: 26880a55fbb7Slm66018 * vdc_destroy_descriptor_ring() 26890a55fbb7Slm66018 * 26900a55fbb7Slm66018 * Description: 26910a55fbb7Slm66018 * 26920a55fbb7Slm66018 * Arguments: 26930a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 26940a55fbb7Slm66018 * 26950a55fbb7Slm66018 * Return Code: 26960a55fbb7Slm66018 * None 26970a55fbb7Slm66018 */ 26981ae08745Sheppo static void 26991ae08745Sheppo vdc_destroy_descriptor_ring(vdc_t *vdc) 27001ae08745Sheppo { 27010a55fbb7Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 27021ae08745Sheppo ldc_mem_handle_t mhdl = NULL; 27033af08d82Slm66018 ldc_mem_info_t minfo; 27041ae08745Sheppo int status = -1; 27051ae08745Sheppo int i; /* loop */ 27061ae08745Sheppo 27071ae08745Sheppo ASSERT(vdc != NULL); 27081ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 27091ae08745Sheppo 27103af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 27111ae08745Sheppo 27121ae08745Sheppo if (vdc->initialized & VDC_DRING_ENTRY) { 27133af08d82Slm66018 DMSG(vdc, 0, 27143af08d82Slm66018 "[%d] Removing Local DRing entries\n", vdc->instance); 2715e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 27160a55fbb7Slm66018 ldep = &vdc->local_dring[i]; 27170a55fbb7Slm66018 mhdl = ldep->desc_mhdl; 27181ae08745Sheppo 27190a55fbb7Slm66018 if (mhdl == NULL) 27200a55fbb7Slm66018 continue; 27210a55fbb7Slm66018 27223af08d82Slm66018 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 27233af08d82Slm66018 DMSG(vdc, 0, 27243af08d82Slm66018 "ldc_mem_info returned an error: %d\n", 27253af08d82Slm66018 status); 27263af08d82Slm66018 27273af08d82Slm66018 /* 27283af08d82Slm66018 * This must mean that the mem handle 27293af08d82Slm66018 * is not valid. Clear it out so that 27303af08d82Slm66018 * no one tries to use it. 27313af08d82Slm66018 */ 27323af08d82Slm66018 ldep->desc_mhdl = NULL; 27333af08d82Slm66018 continue; 27343af08d82Slm66018 } 27353af08d82Slm66018 27363af08d82Slm66018 if (minfo.status == LDC_BOUND) { 27373af08d82Slm66018 (void) ldc_mem_unbind_handle(mhdl); 27383af08d82Slm66018 } 27393af08d82Slm66018 27401ae08745Sheppo (void) ldc_mem_free_handle(mhdl); 27413af08d82Slm66018 27423af08d82Slm66018 ldep->desc_mhdl = NULL; 27431ae08745Sheppo } 27441ae08745Sheppo vdc->initialized &= ~VDC_DRING_ENTRY; 27451ae08745Sheppo } 27461ae08745Sheppo 27471ae08745Sheppo if (vdc->initialized & VDC_DRING_LOCAL) { 27483af08d82Slm66018 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 27491ae08745Sheppo kmem_free(vdc->local_dring, 2750e1ebb9ecSlm66018 vdc->dring_len * sizeof (vdc_local_desc_t)); 27511ae08745Sheppo vdc->initialized &= ~VDC_DRING_LOCAL; 27521ae08745Sheppo } 27531ae08745Sheppo 27541ae08745Sheppo if (vdc->initialized & VDC_DRING_BOUND) { 27553af08d82Slm66018 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 27568cd10891Snarayan status = ldc_mem_dring_unbind(vdc->dring_hdl); 27571ae08745Sheppo if (status == 0) { 27581ae08745Sheppo vdc->initialized &= ~VDC_DRING_BOUND; 27591ae08745Sheppo } else { 27603af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 27618cd10891Snarayan vdc->instance, status, vdc->dring_hdl); 27621ae08745Sheppo } 27633af08d82Slm66018 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 27641ae08745Sheppo } 27651ae08745Sheppo 27661ae08745Sheppo if (vdc->initialized & VDC_DRING_INIT) { 27673af08d82Slm66018 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 27688cd10891Snarayan status = ldc_mem_dring_destroy(vdc->dring_hdl); 27691ae08745Sheppo if (status == 0) { 27708cd10891Snarayan vdc->dring_hdl = NULL; 27711ae08745Sheppo bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 27721ae08745Sheppo vdc->initialized &= ~VDC_DRING_INIT; 27731ae08745Sheppo } else { 27743af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 27758cd10891Snarayan vdc->instance, status, vdc->dring_hdl); 27761ae08745Sheppo } 27771ae08745Sheppo } 27781ae08745Sheppo } 27791ae08745Sheppo 27801ae08745Sheppo /* 27813af08d82Slm66018 * Function: 278290e2f9dcSlm66018 * vdc_map_to_shared_dring() 27831ae08745Sheppo * 27841ae08745Sheppo * Description: 27853af08d82Slm66018 * Copy contents of the local descriptor to the shared 27863af08d82Slm66018 * memory descriptor. 27871ae08745Sheppo * 27883af08d82Slm66018 * Arguments: 27893af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 27903af08d82Slm66018 * idx - descriptor ring index 27913af08d82Slm66018 * 27923af08d82Slm66018 * Return Code: 27933af08d82Slm66018 * None 27941ae08745Sheppo */ 27951ae08745Sheppo static int 27963af08d82Slm66018 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 27971ae08745Sheppo { 27983af08d82Slm66018 vdc_local_desc_t *ldep; 27993af08d82Slm66018 vd_dring_entry_t *dep; 28003af08d82Slm66018 int rv; 28011ae08745Sheppo 28023af08d82Slm66018 ldep = &(vdcp->local_dring[idx]); 28031ae08745Sheppo 28043af08d82Slm66018 /* for now leave in the old pop_mem_hdl stuff */ 28053af08d82Slm66018 if (ldep->nbytes > 0) { 28063af08d82Slm66018 rv = vdc_populate_mem_hdl(vdcp, ldep); 28073af08d82Slm66018 if (rv) { 28083af08d82Slm66018 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 28093af08d82Slm66018 vdcp->instance); 28103af08d82Slm66018 return (rv); 28113af08d82Slm66018 } 28123af08d82Slm66018 } 28131ae08745Sheppo 28143af08d82Slm66018 /* 28153af08d82Slm66018 * fill in the data details into the DRing 28163af08d82Slm66018 */ 2817d10e4ef2Snarayan dep = ldep->dep; 28181ae08745Sheppo ASSERT(dep != NULL); 28191ae08745Sheppo 28203af08d82Slm66018 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 28213af08d82Slm66018 dep->payload.operation = ldep->operation; 28223af08d82Slm66018 dep->payload.addr = ldep->offset; 28233af08d82Slm66018 dep->payload.nbytes = ldep->nbytes; 2824055d7c80Scarlsonj dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 28253af08d82Slm66018 dep->payload.slice = ldep->slice; 28263af08d82Slm66018 dep->hdr.dstate = VIO_DESC_READY; 28273af08d82Slm66018 dep->hdr.ack = 1; /* request an ACK for every message */ 28281ae08745Sheppo 28293af08d82Slm66018 return (0); 28301ae08745Sheppo } 28311ae08745Sheppo 28321ae08745Sheppo /* 28331ae08745Sheppo * Function: 28343af08d82Slm66018 * vdc_send_request 28353af08d82Slm66018 * 28363af08d82Slm66018 * Description: 28373af08d82Slm66018 * This routine writes the data to be transmitted to vds into the 28383af08d82Slm66018 * descriptor, notifies vds that the ring has been updated and 28393af08d82Slm66018 * then waits for the request to be processed. 28403af08d82Slm66018 * 28413af08d82Slm66018 * Arguments: 28423af08d82Slm66018 * vdcp - the soft state pointer 28433af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 28443af08d82Slm66018 * addr - address of data buf to be read/written. 28453af08d82Slm66018 * nbytes - number of bytes to read/write 28463af08d82Slm66018 * slice - the disk slice this request is for 28473af08d82Slm66018 * offset - relative disk offset 28483af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 28493af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 28503af08d82Slm66018 * . mode for ioctl(9e) 28513af08d82Slm66018 * . LP64 diskaddr_t (block I/O) 28523af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 28533af08d82Slm66018 * 28543af08d82Slm66018 * Return Codes: 28553af08d82Slm66018 * 0 28563af08d82Slm66018 * ENXIO 28573af08d82Slm66018 */ 28583af08d82Slm66018 static int 28593af08d82Slm66018 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 28603af08d82Slm66018 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 28613af08d82Slm66018 void *cb_arg, vio_desc_direction_t dir) 28623af08d82Slm66018 { 2863366a92acSlm66018 int rv = 0; 2864366a92acSlm66018 28653af08d82Slm66018 ASSERT(vdcp != NULL); 286687a7269eSachartre ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 28673af08d82Slm66018 28683af08d82Slm66018 mutex_enter(&vdcp->lock); 28693af08d82Slm66018 2870366a92acSlm66018 /* 2871366a92acSlm66018 * If this is a block read/write operation we update the I/O statistics 2872366a92acSlm66018 * to indicate that the request is being put on the waitq to be 2873366a92acSlm66018 * serviced. 2874366a92acSlm66018 * 2875366a92acSlm66018 * We do it here (a common routine for both synchronous and strategy 2876366a92acSlm66018 * calls) for performance reasons - we are already holding vdc->lock 2877366a92acSlm66018 * so there is no extra locking overhead. We would have to explicitly 2878366a92acSlm66018 * grab the 'lock' mutex to update the stats if we were to do this 2879366a92acSlm66018 * higher up the stack in vdc_strategy() et. al. 2880366a92acSlm66018 */ 2881366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2882366a92acSlm66018 DTRACE_IO1(start, buf_t *, cb_arg); 288390e2f9dcSlm66018 VD_KSTAT_WAITQ_ENTER(vdcp); 2884366a92acSlm66018 } 2885366a92acSlm66018 28863af08d82Slm66018 do { 28873c96341aSnarayan while (vdcp->state != VDC_STATE_RUNNING) { 28883af08d82Slm66018 28893c96341aSnarayan /* return error if detaching */ 28903c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 2891366a92acSlm66018 rv = ENXIO; 2892366a92acSlm66018 goto done; 28933c96341aSnarayan } 2894655fd6a9Sachartre 2895655fd6a9Sachartre /* fail request if connection timeout is reached */ 2896655fd6a9Sachartre if (vdcp->ctimeout_reached) { 2897366a92acSlm66018 rv = EIO; 2898366a92acSlm66018 goto done; 2899655fd6a9Sachartre } 2900655fd6a9Sachartre 29012f5224aeSachartre /* 29022f5224aeSachartre * If we are panicking and the disk is not ready then 29032f5224aeSachartre * we can't send any request because we can't complete 29042f5224aeSachartre * the handshake now. 29052f5224aeSachartre */ 29062f5224aeSachartre if (ddi_in_panic()) { 2907366a92acSlm66018 rv = EIO; 2908366a92acSlm66018 goto done; 29092f5224aeSachartre } 29102f5224aeSachartre 2911655fd6a9Sachartre cv_wait(&vdcp->running_cv, &vdcp->lock); 29123c96341aSnarayan } 29133c96341aSnarayan 29143af08d82Slm66018 } while (vdc_populate_descriptor(vdcp, operation, addr, 29153af08d82Slm66018 nbytes, slice, offset, cb_type, cb_arg, dir)); 29163af08d82Slm66018 2917366a92acSlm66018 done: 2918366a92acSlm66018 /* 2919366a92acSlm66018 * If this is a block read/write we update the I/O statistics kstat 2920366a92acSlm66018 * to indicate that this request has been placed on the queue for 2921366a92acSlm66018 * processing (i.e sent to the vDisk server) - iostat(1M) will 2922366a92acSlm66018 * report the time waiting for the vDisk server under the %b column 2923366a92acSlm66018 * In the case of an error we simply take it off the wait queue. 2924366a92acSlm66018 */ 2925366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2926366a92acSlm66018 if (rv == 0) { 292790e2f9dcSlm66018 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 2928366a92acSlm66018 DTRACE_PROBE1(send, buf_t *, cb_arg); 2929366a92acSlm66018 } else { 2930366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 293190e2f9dcSlm66018 VD_KSTAT_WAITQ_EXIT(vdcp); 2932366a92acSlm66018 DTRACE_IO1(done, buf_t *, cb_arg); 2933366a92acSlm66018 } 2934366a92acSlm66018 } 2935366a92acSlm66018 29363af08d82Slm66018 mutex_exit(&vdcp->lock); 2937366a92acSlm66018 2938366a92acSlm66018 return (rv); 29393af08d82Slm66018 } 29403af08d82Slm66018 29413af08d82Slm66018 29423af08d82Slm66018 /* 29433af08d82Slm66018 * Function: 29441ae08745Sheppo * vdc_populate_descriptor 29451ae08745Sheppo * 29461ae08745Sheppo * Description: 29471ae08745Sheppo * This routine writes the data to be transmitted to vds into the 29481ae08745Sheppo * descriptor, notifies vds that the ring has been updated and 29491ae08745Sheppo * then waits for the request to be processed. 29501ae08745Sheppo * 29511ae08745Sheppo * Arguments: 29523af08d82Slm66018 * vdcp - the soft state pointer 29531ae08745Sheppo * operation - operation we want vds to perform (VD_OP_XXX) 29543af08d82Slm66018 * addr - address of data buf to be read/written. 29553af08d82Slm66018 * nbytes - number of bytes to read/write 29563af08d82Slm66018 * slice - the disk slice this request is for 29573af08d82Slm66018 * offset - relative disk offset 29583af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 29593af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 29601ae08745Sheppo * . mode for ioctl(9e) 29611ae08745Sheppo * . LP64 diskaddr_t (block I/O) 29623af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 29631ae08745Sheppo * 29641ae08745Sheppo * Return Codes: 29651ae08745Sheppo * 0 29661ae08745Sheppo * EAGAIN 296717cadca8Slm66018 * ECONNRESET 29681ae08745Sheppo * ENXIO 29691ae08745Sheppo */ 29701ae08745Sheppo static int 29713af08d82Slm66018 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 29723af08d82Slm66018 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 29733af08d82Slm66018 void *cb_arg, vio_desc_direction_t dir) 29741ae08745Sheppo { 29753af08d82Slm66018 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 29763af08d82Slm66018 int idx; /* Index of DRing entry used */ 29773af08d82Slm66018 int next_idx; 29781ae08745Sheppo vio_dring_msg_t dmsg; 29793af08d82Slm66018 size_t msglen; 29808e6a2a04Slm66018 int rv; 29811ae08745Sheppo 29823af08d82Slm66018 ASSERT(MUTEX_HELD(&vdcp->lock)); 29833af08d82Slm66018 vdcp->threads_pending++; 29843af08d82Slm66018 loop: 29853af08d82Slm66018 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 29861ae08745Sheppo 29873af08d82Slm66018 /* Get next available D-Ring entry */ 29883af08d82Slm66018 idx = vdcp->dring_curr_idx; 29893af08d82Slm66018 local_dep = &(vdcp->local_dring[idx]); 29901ae08745Sheppo 29913af08d82Slm66018 if (!local_dep->is_free) { 29923af08d82Slm66018 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 29933af08d82Slm66018 vdcp->instance); 29943af08d82Slm66018 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 29953af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 29963af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 29973af08d82Slm66018 goto loop; 29983af08d82Slm66018 } 29993af08d82Slm66018 vdcp->threads_pending--; 30003af08d82Slm66018 return (ECONNRESET); 30011ae08745Sheppo } 30021ae08745Sheppo 30033af08d82Slm66018 next_idx = idx + 1; 30043af08d82Slm66018 if (next_idx >= vdcp->dring_len) 30053af08d82Slm66018 next_idx = 0; 30063af08d82Slm66018 vdcp->dring_curr_idx = next_idx; 30071ae08745Sheppo 30083af08d82Slm66018 ASSERT(local_dep->is_free); 30091ae08745Sheppo 30103af08d82Slm66018 local_dep->operation = operation; 3011d10e4ef2Snarayan local_dep->addr = addr; 30123af08d82Slm66018 local_dep->nbytes = nbytes; 30133af08d82Slm66018 local_dep->slice = slice; 30143af08d82Slm66018 local_dep->offset = offset; 30153af08d82Slm66018 local_dep->cb_type = cb_type; 30163af08d82Slm66018 local_dep->cb_arg = cb_arg; 30173af08d82Slm66018 local_dep->dir = dir; 30183af08d82Slm66018 30193af08d82Slm66018 local_dep->is_free = B_FALSE; 30203af08d82Slm66018 30213af08d82Slm66018 rv = vdc_map_to_shared_dring(vdcp, idx); 30223af08d82Slm66018 if (rv) { 30233af08d82Slm66018 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 30243af08d82Slm66018 vdcp->instance); 30253af08d82Slm66018 /* free the descriptor */ 30263af08d82Slm66018 local_dep->is_free = B_TRUE; 30273af08d82Slm66018 vdcp->dring_curr_idx = idx; 30283af08d82Slm66018 cv_wait(&vdcp->membind_cv, &vdcp->lock); 30293af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 30303af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 30313af08d82Slm66018 goto loop; 30321ae08745Sheppo } 30333af08d82Slm66018 vdcp->threads_pending--; 30343af08d82Slm66018 return (ECONNRESET); 30351ae08745Sheppo } 30361ae08745Sheppo 30371ae08745Sheppo /* 30381ae08745Sheppo * Send a msg with the DRing details to vds 30391ae08745Sheppo */ 30401ae08745Sheppo VIO_INIT_DRING_DATA_TAG(dmsg); 30413af08d82Slm66018 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 30423af08d82Slm66018 dmsg.dring_ident = vdcp->dring_ident; 30431ae08745Sheppo dmsg.start_idx = idx; 30441ae08745Sheppo dmsg.end_idx = idx; 30453af08d82Slm66018 vdcp->seq_num++; 30461ae08745Sheppo 3047366a92acSlm66018 DTRACE_PROBE2(populate, int, vdcp->instance, 3048366a92acSlm66018 vdc_local_desc_t *, local_dep); 30493af08d82Slm66018 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 30503af08d82Slm66018 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 30511ae08745Sheppo 30523af08d82Slm66018 /* 30533af08d82Slm66018 * note we're still holding the lock here to 30543af08d82Slm66018 * make sure the message goes out in order !!!... 30553af08d82Slm66018 */ 30563af08d82Slm66018 msglen = sizeof (dmsg); 30573af08d82Slm66018 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 30583af08d82Slm66018 switch (rv) { 30593af08d82Slm66018 case ECONNRESET: 30603af08d82Slm66018 /* 30613af08d82Slm66018 * vdc_send initiates the reset on failure. 30623af08d82Slm66018 * Since the transaction has already been put 30633af08d82Slm66018 * on the local dring, it will automatically get 30643af08d82Slm66018 * retried when the channel is reset. Given that, 30653af08d82Slm66018 * it is ok to just return success even though the 30663af08d82Slm66018 * send failed. 30673af08d82Slm66018 */ 30683af08d82Slm66018 rv = 0; 30693af08d82Slm66018 break; 3070d10e4ef2Snarayan 30713af08d82Slm66018 case 0: /* EOK */ 30723af08d82Slm66018 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 30733af08d82Slm66018 break; 3074d10e4ef2Snarayan 30753af08d82Slm66018 default: 30763af08d82Slm66018 goto cleanup_and_exit; 30773af08d82Slm66018 } 3078e1ebb9ecSlm66018 30793af08d82Slm66018 vdcp->threads_pending--; 30803af08d82Slm66018 return (rv); 30813af08d82Slm66018 30823af08d82Slm66018 cleanup_and_exit: 30833af08d82Slm66018 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 30843af08d82Slm66018 return (ENXIO); 30851ae08745Sheppo } 30861ae08745Sheppo 30871ae08745Sheppo /* 30883af08d82Slm66018 * Function: 30893af08d82Slm66018 * vdc_do_sync_op 30903af08d82Slm66018 * 30913af08d82Slm66018 * Description: 30923af08d82Slm66018 * Wrapper around vdc_populate_descriptor that blocks until the 30933af08d82Slm66018 * response to the message is available. 30943af08d82Slm66018 * 30953af08d82Slm66018 * Arguments: 30963af08d82Slm66018 * vdcp - the soft state pointer 30973af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 30983af08d82Slm66018 * addr - address of data buf to be read/written. 30993af08d82Slm66018 * nbytes - number of bytes to read/write 31003af08d82Slm66018 * slice - the disk slice this request is for 31013af08d82Slm66018 * offset - relative disk offset 31023af08d82Slm66018 * cb_type - type of call - STRATEGY or SYNC 31033af08d82Slm66018 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 31043af08d82Slm66018 * . mode for ioctl(9e) 31053af08d82Slm66018 * . LP64 diskaddr_t (block I/O) 31063af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 31072f5224aeSachartre * rconflict - check for reservation conflict in case of failure 31082f5224aeSachartre * 31092f5224aeSachartre * rconflict should be set to B_TRUE by most callers. Callers invoking the 31102f5224aeSachartre * VD_OP_SCSICMD operation can set rconflict to B_FALSE if they check the 31112f5224aeSachartre * result of a successful operation with vd_scsi_status(). 31123af08d82Slm66018 * 31133af08d82Slm66018 * Return Codes: 31143af08d82Slm66018 * 0 31153af08d82Slm66018 * EAGAIN 31163af08d82Slm66018 * EFAULT 31173af08d82Slm66018 * ENXIO 31183af08d82Slm66018 * EIO 31190a55fbb7Slm66018 */ 31203af08d82Slm66018 static int 31213af08d82Slm66018 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 31223af08d82Slm66018 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 31232f5224aeSachartre vio_desc_direction_t dir, boolean_t rconflict) 31243af08d82Slm66018 { 31253af08d82Slm66018 int status; 31262f5224aeSachartre vdc_io_t *vio; 31272f5224aeSachartre boolean_t check_resv_conflict = B_FALSE; 31283af08d82Slm66018 31293af08d82Slm66018 ASSERT(cb_type == CB_SYNC); 31301ae08745Sheppo 31311ae08745Sheppo /* 31323af08d82Slm66018 * Grab the lock, if blocked wait until the server 31333af08d82Slm66018 * response causes us to wake up again. 31343af08d82Slm66018 */ 31353af08d82Slm66018 mutex_enter(&vdcp->lock); 31363af08d82Slm66018 vdcp->sync_op_cnt++; 31373af08d82Slm66018 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 31383af08d82Slm66018 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 31393af08d82Slm66018 31403af08d82Slm66018 if (vdcp->state == VDC_STATE_DETACH) { 31413af08d82Slm66018 cv_broadcast(&vdcp->sync_blocked_cv); 31423af08d82Slm66018 vdcp->sync_op_cnt--; 31433af08d82Slm66018 mutex_exit(&vdcp->lock); 31443af08d82Slm66018 return (ENXIO); 31453af08d82Slm66018 } 31463af08d82Slm66018 31473af08d82Slm66018 /* now block anyone other thread entering after us */ 31483af08d82Slm66018 vdcp->sync_op_blocked = B_TRUE; 31493af08d82Slm66018 vdcp->sync_op_pending = B_TRUE; 31503af08d82Slm66018 mutex_exit(&vdcp->lock); 31513af08d82Slm66018 3152655fd6a9Sachartre status = vdc_send_request(vdcp, operation, addr, 31533af08d82Slm66018 nbytes, slice, offset, cb_type, cb_arg, dir); 31543af08d82Slm66018 3155655fd6a9Sachartre mutex_enter(&vdcp->lock); 3156655fd6a9Sachartre 3157655fd6a9Sachartre if (status != 0) { 3158655fd6a9Sachartre vdcp->sync_op_pending = B_FALSE; 3159655fd6a9Sachartre } else { 31603af08d82Slm66018 /* 31613af08d82Slm66018 * block until our transaction completes. 31623af08d82Slm66018 * Also anyone else waiting also gets to go next. 31633af08d82Slm66018 */ 31643af08d82Slm66018 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 31653af08d82Slm66018 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 31663af08d82Slm66018 3167655fd6a9Sachartre DMSG(vdcp, 2, ": operation returned %d\n", 3168655fd6a9Sachartre vdcp->sync_op_status); 31693c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 31703c96341aSnarayan vdcp->sync_op_pending = B_FALSE; 31713af08d82Slm66018 status = ENXIO; 31723c96341aSnarayan } else { 31733af08d82Slm66018 status = vdcp->sync_op_status; 31742f5224aeSachartre if (status != 0 && vdcp->failfast_interval != 0) { 31752f5224aeSachartre /* 31762f5224aeSachartre * Operation has failed and failfast is enabled. 31772f5224aeSachartre * We need to check if the failure is due to a 31782f5224aeSachartre * reservation conflict if this was requested. 31792f5224aeSachartre */ 31802f5224aeSachartre check_resv_conflict = rconflict; 31812f5224aeSachartre } 31822f5224aeSachartre 31833c96341aSnarayan } 3184655fd6a9Sachartre } 31853c96341aSnarayan 31863af08d82Slm66018 vdcp->sync_op_status = 0; 31873af08d82Slm66018 vdcp->sync_op_blocked = B_FALSE; 31883af08d82Slm66018 vdcp->sync_op_cnt--; 31893af08d82Slm66018 31903af08d82Slm66018 /* signal the next waiting thread */ 31913af08d82Slm66018 cv_signal(&vdcp->sync_blocked_cv); 31922f5224aeSachartre 31932f5224aeSachartre /* 31942f5224aeSachartre * We have to check for reservation conflict after unblocking sync 31952f5224aeSachartre * operations because some sync operations will be used to do this 31962f5224aeSachartre * check. 31972f5224aeSachartre */ 31982f5224aeSachartre if (check_resv_conflict) { 31992f5224aeSachartre vio = vdc_failfast_io_queue(vdcp, NULL); 32002f5224aeSachartre while (vio->vio_qtime != 0) 32012f5224aeSachartre cv_wait(&vdcp->failfast_io_cv, &vdcp->lock); 32022f5224aeSachartre kmem_free(vio, sizeof (vdc_io_t)); 32032f5224aeSachartre } 32042f5224aeSachartre 32053af08d82Slm66018 mutex_exit(&vdcp->lock); 32063af08d82Slm66018 32073af08d82Slm66018 return (status); 32083af08d82Slm66018 } 32093af08d82Slm66018 32103af08d82Slm66018 32113af08d82Slm66018 /* 32123af08d82Slm66018 * Function: 32133af08d82Slm66018 * vdc_drain_response() 32143af08d82Slm66018 * 32153af08d82Slm66018 * Description: 32161ae08745Sheppo * When a guest is panicking, the completion of requests needs to be 32171ae08745Sheppo * handled differently because interrupts are disabled and vdc 32181ae08745Sheppo * will not get messages. We have to poll for the messages instead. 32193af08d82Slm66018 * 32203c2ebf09Sachartre * Note: since we are panicking we don't implement the io:::done 32213c2ebf09Sachartre * DTrace probe or update the I/O statistics kstats. 3222366a92acSlm66018 * 32233af08d82Slm66018 * Arguments: 32243af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 32253c2ebf09Sachartre * buf - if buf is NULL then we drain all responses, otherwise we 32263c2ebf09Sachartre * poll until we receive a ACK/NACK for the specific I/O 32273c2ebf09Sachartre * described by buf. 32283af08d82Slm66018 * 32293af08d82Slm66018 * Return Code: 32303af08d82Slm66018 * 0 - Success 32311ae08745Sheppo */ 32323af08d82Slm66018 static int 32333c2ebf09Sachartre vdc_drain_response(vdc_t *vdc, struct buf *buf) 32343af08d82Slm66018 { 32353af08d82Slm66018 int rv, idx, retries; 32363af08d82Slm66018 size_t msglen; 32373af08d82Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 32383af08d82Slm66018 vio_dring_msg_t dmsg; 32393c2ebf09Sachartre struct buf *mbuf; 32403af08d82Slm66018 32413af08d82Slm66018 mutex_enter(&vdc->lock); 32423af08d82Slm66018 32431ae08745Sheppo retries = 0; 32441ae08745Sheppo for (;;) { 32451ae08745Sheppo msglen = sizeof (dmsg); 32468cd10891Snarayan rv = ldc_read(vdc->curr_server->ldc_handle, (caddr_t)&dmsg, 32478cd10891Snarayan &msglen); 32488e6a2a04Slm66018 if (rv) { 32498e6a2a04Slm66018 rv = EINVAL; 32501ae08745Sheppo break; 32511ae08745Sheppo } 32521ae08745Sheppo 32531ae08745Sheppo /* 32541ae08745Sheppo * if there are no packets wait and check again 32551ae08745Sheppo */ 32568e6a2a04Slm66018 if ((rv == 0) && (msglen == 0)) { 32571ae08745Sheppo if (retries++ > vdc_dump_retries) { 32588e6a2a04Slm66018 rv = EAGAIN; 32591ae08745Sheppo break; 32601ae08745Sheppo } 32611ae08745Sheppo 3262d10e4ef2Snarayan drv_usecwait(vdc_usec_timeout_dump); 32631ae08745Sheppo continue; 32641ae08745Sheppo } 32651ae08745Sheppo 32661ae08745Sheppo /* 32671ae08745Sheppo * Ignore all messages that are not ACKs/NACKs to 32681ae08745Sheppo * DRing requests. 32691ae08745Sheppo */ 32701ae08745Sheppo if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 32711ae08745Sheppo (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 32723af08d82Slm66018 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 32731ae08745Sheppo dmsg.tag.vio_msgtype, 32741ae08745Sheppo dmsg.tag.vio_subtype, 32751ae08745Sheppo dmsg.tag.vio_subtype_env); 32761ae08745Sheppo continue; 32771ae08745Sheppo } 32781ae08745Sheppo 32791ae08745Sheppo /* 32803af08d82Slm66018 * set the appropriate return value for the current request. 32811ae08745Sheppo */ 32821ae08745Sheppo switch (dmsg.tag.vio_subtype) { 32831ae08745Sheppo case VIO_SUBTYPE_ACK: 32848e6a2a04Slm66018 rv = 0; 32851ae08745Sheppo break; 32861ae08745Sheppo case VIO_SUBTYPE_NACK: 32878e6a2a04Slm66018 rv = EAGAIN; 32881ae08745Sheppo break; 32891ae08745Sheppo default: 32901ae08745Sheppo continue; 32911ae08745Sheppo } 32921ae08745Sheppo 32933af08d82Slm66018 idx = dmsg.start_idx; 32943af08d82Slm66018 if (idx >= vdc->dring_len) { 32953af08d82Slm66018 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 3296e1ebb9ecSlm66018 vdc->instance, idx); 32973af08d82Slm66018 continue; 32981ae08745Sheppo } 32993af08d82Slm66018 ldep = &vdc->local_dring[idx]; 33003af08d82Slm66018 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 33013af08d82Slm66018 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 33023af08d82Slm66018 vdc->instance, idx, ldep->dep->hdr.dstate); 33031ae08745Sheppo continue; 33041ae08745Sheppo } 33051ae08745Sheppo 33063c2ebf09Sachartre if (buf != NULL && ldep->cb_type == CB_STRATEGY) { 33073c2ebf09Sachartre mbuf = ldep->cb_arg; 33083c2ebf09Sachartre mbuf->b_resid = mbuf->b_bcount - 33093c2ebf09Sachartre ldep->dep->payload.nbytes; 33103c2ebf09Sachartre bioerror(mbuf, (rv == EAGAIN)? EIO: 33113c2ebf09Sachartre ldep->dep->payload.status); 33123c2ebf09Sachartre biodone(mbuf); 33133c2ebf09Sachartre } else { 33143c2ebf09Sachartre mbuf = NULL; 33153c2ebf09Sachartre } 33163c2ebf09Sachartre 33173af08d82Slm66018 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 33183af08d82Slm66018 vdc->instance, idx, ldep->dep->hdr.dstate); 3319366a92acSlm66018 33203af08d82Slm66018 rv = vdc_depopulate_descriptor(vdc, idx); 33213af08d82Slm66018 if (rv) { 33223af08d82Slm66018 DMSG(vdc, 0, 33233af08d82Slm66018 "[%d] Entry @ %d - depopulate failed ..\n", 33243af08d82Slm66018 vdc->instance, idx); 33251ae08745Sheppo } 33261ae08745Sheppo 33273c2ebf09Sachartre /* we have received an ACK/NACK for the specified buffer */ 33283c2ebf09Sachartre if (buf != NULL && buf == mbuf) { 33293c2ebf09Sachartre rv = 0; 33303af08d82Slm66018 break; 33313af08d82Slm66018 } 33323af08d82Slm66018 33333c2ebf09Sachartre /* if this is the last descriptor - break out of loop */ 33343c2ebf09Sachartre if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) { 33353c2ebf09Sachartre if (buf != NULL) { 33363c2ebf09Sachartre /* 33373c2ebf09Sachartre * We never got a response for the specified 33383c2ebf09Sachartre * buffer so we fail the I/O. 33393c2ebf09Sachartre */ 33403c2ebf09Sachartre bioerror(buf, EIO); 33413c2ebf09Sachartre biodone(buf); 33423c2ebf09Sachartre } 33433c2ebf09Sachartre break; 33443c2ebf09Sachartre } 33453c2ebf09Sachartre } 33463c2ebf09Sachartre 33473af08d82Slm66018 mutex_exit(&vdc->lock); 33483af08d82Slm66018 DMSG(vdc, 0, "End idx=%d\n", idx); 33493af08d82Slm66018 33503af08d82Slm66018 return (rv); 33511ae08745Sheppo } 33521ae08745Sheppo 33531ae08745Sheppo 33540a55fbb7Slm66018 /* 33550a55fbb7Slm66018 * Function: 33560a55fbb7Slm66018 * vdc_depopulate_descriptor() 33570a55fbb7Slm66018 * 33580a55fbb7Slm66018 * Description: 33590a55fbb7Slm66018 * 33600a55fbb7Slm66018 * Arguments: 33610a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 33620a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 33630a55fbb7Slm66018 * 33640a55fbb7Slm66018 * Return Code: 33650a55fbb7Slm66018 * 0 - Success 33660a55fbb7Slm66018 */ 33671ae08745Sheppo static int 33681ae08745Sheppo vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 33691ae08745Sheppo { 33701ae08745Sheppo vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 33711ae08745Sheppo vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 33721ae08745Sheppo int status = ENXIO; 33738e6a2a04Slm66018 int rv = 0; 33741ae08745Sheppo 33751ae08745Sheppo ASSERT(vdc != NULL); 3376e1ebb9ecSlm66018 ASSERT(idx < vdc->dring_len); 33771ae08745Sheppo ldep = &vdc->local_dring[idx]; 33781ae08745Sheppo ASSERT(ldep != NULL); 33793af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 33803af08d82Slm66018 3381366a92acSlm66018 DTRACE_PROBE2(depopulate, int, vdc->instance, vdc_local_desc_t *, ldep); 33823af08d82Slm66018 DMSG(vdc, 2, ": idx = %d\n", idx); 3383366a92acSlm66018 33841ae08745Sheppo dep = ldep->dep; 33851ae08745Sheppo ASSERT(dep != NULL); 3386e1ebb9ecSlm66018 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3387e1ebb9ecSlm66018 (dep->payload.status == ECANCELED)); 33881ae08745Sheppo 3389e1ebb9ecSlm66018 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 33903af08d82Slm66018 33913af08d82Slm66018 ldep->is_free = B_TRUE; 33921ae08745Sheppo status = dep->payload.status; 3393205eeb1aSlm66018 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 33941ae08745Sheppo 3395eff7243fSlm66018 /* 3396eff7243fSlm66018 * If no buffers were used to transfer information to the server when 3397eff7243fSlm66018 * populating the descriptor then no memory handles need to be unbound 3398eff7243fSlm66018 * and we can return now. 3399eff7243fSlm66018 */ 3400eff7243fSlm66018 if (ldep->nbytes == 0) { 3401eff7243fSlm66018 cv_signal(&vdc->dring_free_cv); 34028e6a2a04Slm66018 return (status); 3403eff7243fSlm66018 } 34048e6a2a04Slm66018 34051ae08745Sheppo /* 34061ae08745Sheppo * If the upper layer passed in a misaligned address we copied the 34071ae08745Sheppo * data into an aligned buffer before sending it to LDC - we now 34081ae08745Sheppo * copy it back to the original buffer. 34091ae08745Sheppo */ 34101ae08745Sheppo if (ldep->align_addr) { 34111ae08745Sheppo ASSERT(ldep->addr != NULL); 34121ae08745Sheppo 34133c96341aSnarayan if (dep->payload.nbytes > 0) 34143c96341aSnarayan bcopy(ldep->align_addr, ldep->addr, 34153c96341aSnarayan dep->payload.nbytes); 34161ae08745Sheppo kmem_free(ldep->align_addr, 34173c96341aSnarayan sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 34181ae08745Sheppo ldep->align_addr = NULL; 34191ae08745Sheppo } 34201ae08745Sheppo 34218e6a2a04Slm66018 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 34228e6a2a04Slm66018 if (rv != 0) { 34233af08d82Slm66018 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 34248e6a2a04Slm66018 vdc->instance, ldep->desc_mhdl, idx, rv); 34258e6a2a04Slm66018 /* 34268e6a2a04Slm66018 * The error returned by the vDisk server is more informative 34278e6a2a04Slm66018 * and thus has a higher priority but if it isn't set we ensure 34288e6a2a04Slm66018 * that this function returns an error. 34298e6a2a04Slm66018 */ 34308e6a2a04Slm66018 if (status == 0) 34318e6a2a04Slm66018 status = EINVAL; 34321ae08745Sheppo } 34331ae08745Sheppo 34343af08d82Slm66018 cv_signal(&vdc->membind_cv); 34353af08d82Slm66018 cv_signal(&vdc->dring_free_cv); 34363af08d82Slm66018 34371ae08745Sheppo return (status); 34381ae08745Sheppo } 34391ae08745Sheppo 34400a55fbb7Slm66018 /* 34410a55fbb7Slm66018 * Function: 34420a55fbb7Slm66018 * vdc_populate_mem_hdl() 34430a55fbb7Slm66018 * 34440a55fbb7Slm66018 * Description: 34450a55fbb7Slm66018 * 34460a55fbb7Slm66018 * Arguments: 34470a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 34480a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 34490a55fbb7Slm66018 * addr - virtual address being mapped in 34500a55fbb7Slm66018 * nybtes - number of bytes in 'addr' 34510a55fbb7Slm66018 * operation - the vDisk operation being performed (VD_OP_xxx) 34520a55fbb7Slm66018 * 34530a55fbb7Slm66018 * Return Code: 34540a55fbb7Slm66018 * 0 - Success 34550a55fbb7Slm66018 */ 34561ae08745Sheppo static int 34573af08d82Slm66018 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 34581ae08745Sheppo { 34591ae08745Sheppo vd_dring_entry_t *dep = NULL; 34601ae08745Sheppo ldc_mem_handle_t mhdl; 34611ae08745Sheppo caddr_t vaddr; 34623af08d82Slm66018 size_t nbytes; 34634bac2208Snarayan uint8_t perm = LDC_MEM_RW; 34644bac2208Snarayan uint8_t maptype; 34651ae08745Sheppo int rv = 0; 34661ae08745Sheppo int i; 34671ae08745Sheppo 34683af08d82Slm66018 ASSERT(vdcp != NULL); 34691ae08745Sheppo 34703af08d82Slm66018 dep = ldep->dep; 34711ae08745Sheppo mhdl = ldep->desc_mhdl; 34721ae08745Sheppo 34733af08d82Slm66018 switch (ldep->dir) { 34743af08d82Slm66018 case VIO_read_dir: 34751ae08745Sheppo perm = LDC_MEM_W; 34761ae08745Sheppo break; 34771ae08745Sheppo 34783af08d82Slm66018 case VIO_write_dir: 34791ae08745Sheppo perm = LDC_MEM_R; 34801ae08745Sheppo break; 34811ae08745Sheppo 34823af08d82Slm66018 case VIO_both_dir: 34831ae08745Sheppo perm = LDC_MEM_RW; 34841ae08745Sheppo break; 34851ae08745Sheppo 34861ae08745Sheppo default: 34871ae08745Sheppo ASSERT(0); /* catch bad programming in vdc */ 34881ae08745Sheppo } 34891ae08745Sheppo 34901ae08745Sheppo /* 34911ae08745Sheppo * LDC expects any addresses passed in to be 8-byte aligned. We need 34921ae08745Sheppo * to copy the contents of any misaligned buffers to a newly allocated 34931ae08745Sheppo * buffer and bind it instead (and copy the the contents back to the 34941ae08745Sheppo * original buffer passed in when depopulating the descriptor) 34951ae08745Sheppo */ 34963af08d82Slm66018 vaddr = ldep->addr; 34973af08d82Slm66018 nbytes = ldep->nbytes; 34983af08d82Slm66018 if (((uint64_t)vaddr & 0x7) != 0) { 3499d10e4ef2Snarayan ASSERT(ldep->align_addr == NULL); 35001ae08745Sheppo ldep->align_addr = 35013af08d82Slm66018 kmem_alloc(sizeof (caddr_t) * 35023af08d82Slm66018 P2ROUNDUP(nbytes, 8), KM_SLEEP); 35033af08d82Slm66018 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 35043af08d82Slm66018 "(buf=%p nb=%ld op=%d)\n", 35053af08d82Slm66018 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 35063af08d82Slm66018 nbytes, ldep->operation); 35073af08d82Slm66018 if (perm != LDC_MEM_W) 35083af08d82Slm66018 bcopy(vaddr, ldep->align_addr, nbytes); 35091ae08745Sheppo vaddr = ldep->align_addr; 35101ae08745Sheppo } 35111ae08745Sheppo 35124bac2208Snarayan maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 35131ae08745Sheppo rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 351487a7269eSachartre maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 35153af08d82Slm66018 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 35163af08d82Slm66018 vdcp->instance, dep->payload.ncookies); 35171ae08745Sheppo if (rv != 0) { 35183af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 35193af08d82Slm66018 "(mhdl=%p, buf=%p, err=%d)\n", 35203af08d82Slm66018 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 35211ae08745Sheppo if (ldep->align_addr) { 35221ae08745Sheppo kmem_free(ldep->align_addr, 3523d10e4ef2Snarayan sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 35241ae08745Sheppo ldep->align_addr = NULL; 35251ae08745Sheppo } 35261ae08745Sheppo return (EAGAIN); 35271ae08745Sheppo } 35281ae08745Sheppo 35291ae08745Sheppo /* 35301ae08745Sheppo * Get the other cookies (if any). 35311ae08745Sheppo */ 35321ae08745Sheppo for (i = 1; i < dep->payload.ncookies; i++) { 35331ae08745Sheppo rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 35341ae08745Sheppo if (rv != 0) { 35351ae08745Sheppo (void) ldc_mem_unbind_handle(mhdl); 35363af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3537e1ebb9ecSlm66018 "(mhdl=%lx cnum=%d), err=%d", 35383af08d82Slm66018 vdcp->instance, mhdl, i, rv); 35391ae08745Sheppo if (ldep->align_addr) { 35401ae08745Sheppo kmem_free(ldep->align_addr, 35413c96341aSnarayan sizeof (caddr_t) * ldep->nbytes); 35421ae08745Sheppo ldep->align_addr = NULL; 35431ae08745Sheppo } 35441ae08745Sheppo return (EAGAIN); 35451ae08745Sheppo } 35461ae08745Sheppo } 35471ae08745Sheppo 35481ae08745Sheppo return (rv); 35491ae08745Sheppo } 35501ae08745Sheppo 35511ae08745Sheppo /* 35521ae08745Sheppo * Interrupt handlers for messages from LDC 35531ae08745Sheppo */ 35541ae08745Sheppo 35550a55fbb7Slm66018 /* 35560a55fbb7Slm66018 * Function: 35570a55fbb7Slm66018 * vdc_handle_cb() 35580a55fbb7Slm66018 * 35590a55fbb7Slm66018 * Description: 35600a55fbb7Slm66018 * 35610a55fbb7Slm66018 * Arguments: 35620a55fbb7Slm66018 * event - Type of event (LDC_EVT_xxx) that triggered the callback 35630a55fbb7Slm66018 * arg - soft state pointer for this instance of the device driver. 35640a55fbb7Slm66018 * 35650a55fbb7Slm66018 * Return Code: 35660a55fbb7Slm66018 * 0 - Success 35670a55fbb7Slm66018 */ 35681ae08745Sheppo static uint_t 35691ae08745Sheppo vdc_handle_cb(uint64_t event, caddr_t arg) 35701ae08745Sheppo { 35711ae08745Sheppo ldc_status_t ldc_state; 35721ae08745Sheppo int rv = 0; 35738cd10891Snarayan vdc_server_t *srvr = (vdc_server_t *)(void *)arg; 35748cd10891Snarayan vdc_t *vdc = srvr->vdcp; 35751ae08745Sheppo 35761ae08745Sheppo ASSERT(vdc != NULL); 35771ae08745Sheppo 35783af08d82Slm66018 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 35791ae08745Sheppo 35808cd10891Snarayan /* If callback is not for the current server, ignore it */ 35818cd10891Snarayan mutex_enter(&vdc->lock); 35828cd10891Snarayan 35838cd10891Snarayan if (vdc->curr_server != srvr) { 35848cd10891Snarayan DMSG(vdc, 0, "[%d] Ignoring event 0x%lx for port@%ld\n", 35858cd10891Snarayan vdc->instance, event, srvr->id); 35868cd10891Snarayan mutex_exit(&vdc->lock); 35878cd10891Snarayan return (LDC_SUCCESS); 35888cd10891Snarayan } 35898cd10891Snarayan 35901ae08745Sheppo /* 35911ae08745Sheppo * Depending on the type of event that triggered this callback, 35923af08d82Slm66018 * we modify the handshake state or read the data. 35931ae08745Sheppo * 35941ae08745Sheppo * NOTE: not done as a switch() as event could be triggered by 35951ae08745Sheppo * a state change and a read request. Also the ordering of the 35961ae08745Sheppo * check for the event types is deliberate. 35971ae08745Sheppo */ 35981ae08745Sheppo if (event & LDC_EVT_UP) { 35993af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 36003af08d82Slm66018 36011ae08745Sheppo /* get LDC state */ 36028cd10891Snarayan rv = ldc_status(srvr->ldc_handle, &ldc_state); 36031ae08745Sheppo if (rv != 0) { 36043af08d82Slm66018 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 36051ae08745Sheppo vdc->instance, rv); 36068cd10891Snarayan mutex_exit(&vdc->lock); 36071ae08745Sheppo return (LDC_SUCCESS); 36081ae08745Sheppo } 36098cd10891Snarayan if (srvr->ldc_state != LDC_UP && 36108cd10891Snarayan ldc_state == LDC_UP) { 36111ae08745Sheppo /* 36123af08d82Slm66018 * Reset the transaction sequence numbers when 36133af08d82Slm66018 * LDC comes up. We then kick off the handshake 36143af08d82Slm66018 * negotiation with the vDisk server. 36151ae08745Sheppo */ 36160a55fbb7Slm66018 vdc->seq_num = 1; 36171ae08745Sheppo vdc->seq_num_reply = 0; 36188cd10891Snarayan srvr->ldc_state = ldc_state; 36193af08d82Slm66018 cv_signal(&vdc->initwait_cv); 36203af08d82Slm66018 } 36211ae08745Sheppo } 36221ae08745Sheppo 36231ae08745Sheppo if (event & LDC_EVT_READ) { 362417cadca8Slm66018 DMSG(vdc, 1, "[%d] Received LDC_EVT_READ\n", vdc->instance); 36253af08d82Slm66018 mutex_enter(&vdc->read_lock); 36263af08d82Slm66018 cv_signal(&vdc->read_cv); 36273af08d82Slm66018 vdc->read_state = VDC_READ_PENDING; 36283af08d82Slm66018 mutex_exit(&vdc->read_lock); 36298cd10891Snarayan mutex_exit(&vdc->lock); 36301ae08745Sheppo 36311ae08745Sheppo /* that's all we have to do - no need to handle DOWN/RESET */ 36321ae08745Sheppo return (LDC_SUCCESS); 36331ae08745Sheppo } 36341ae08745Sheppo 36353af08d82Slm66018 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 36360a55fbb7Slm66018 36373af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 36383af08d82Slm66018 36393af08d82Slm66018 /* 36403af08d82Slm66018 * Need to wake up any readers so they will 36413af08d82Slm66018 * detect that a reset has occurred. 36423af08d82Slm66018 */ 36433af08d82Slm66018 mutex_enter(&vdc->read_lock); 36443af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 36453af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 36463af08d82Slm66018 cv_signal(&vdc->read_cv); 36473af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 36483af08d82Slm66018 mutex_exit(&vdc->read_lock); 36490a55fbb7Slm66018 36503af08d82Slm66018 /* wake up any threads waiting for connection to come up */ 36513af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 36523af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 36533af08d82Slm66018 cv_signal(&vdc->initwait_cv); 36541ae08745Sheppo } 36551ae08745Sheppo 36561ae08745Sheppo } 36571ae08745Sheppo 36588cd10891Snarayan mutex_exit(&vdc->lock); 36598cd10891Snarayan 36601ae08745Sheppo if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 36613af08d82Slm66018 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 36621ae08745Sheppo vdc->instance, event); 36631ae08745Sheppo 36641ae08745Sheppo return (LDC_SUCCESS); 36651ae08745Sheppo } 36661ae08745Sheppo 36673af08d82Slm66018 /* 36683af08d82Slm66018 * Function: 36693af08d82Slm66018 * vdc_wait_for_response() 36703af08d82Slm66018 * 36713af08d82Slm66018 * Description: 36723af08d82Slm66018 * Block waiting for a response from the server. If there is 36733af08d82Slm66018 * no data the thread block on the read_cv that is signalled 36743af08d82Slm66018 * by the callback when an EVT_READ occurs. 36753af08d82Slm66018 * 36763af08d82Slm66018 * Arguments: 36773af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 36783af08d82Slm66018 * 36793af08d82Slm66018 * Return Code: 36803af08d82Slm66018 * 0 - Success 36813af08d82Slm66018 */ 36823af08d82Slm66018 static int 36833af08d82Slm66018 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 36843af08d82Slm66018 { 36853af08d82Slm66018 size_t nbytes = sizeof (*msgp); 36863af08d82Slm66018 int status; 36873af08d82Slm66018 36883af08d82Slm66018 ASSERT(vdcp != NULL); 36893af08d82Slm66018 36903af08d82Slm66018 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 36913af08d82Slm66018 36923af08d82Slm66018 status = vdc_recv(vdcp, msgp, &nbytes); 36933af08d82Slm66018 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 36943af08d82Slm66018 status, (int)nbytes); 36953af08d82Slm66018 if (status) { 36963af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 36973af08d82Slm66018 vdcp->instance, status); 36983af08d82Slm66018 return (status); 36993af08d82Slm66018 } 37003af08d82Slm66018 37013af08d82Slm66018 if (nbytes < sizeof (vio_msg_tag_t)) { 37023af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 37033af08d82Slm66018 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 37043af08d82Slm66018 return (ENOMSG); 37053af08d82Slm66018 } 37063af08d82Slm66018 37073af08d82Slm66018 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 37083af08d82Slm66018 msgp->tag.vio_msgtype, 37093af08d82Slm66018 msgp->tag.vio_subtype, 37103af08d82Slm66018 msgp->tag.vio_subtype_env); 37113af08d82Slm66018 37123af08d82Slm66018 /* 37133af08d82Slm66018 * Verify the Session ID of the message 37143af08d82Slm66018 * 37153af08d82Slm66018 * Every message after the Version has been negotiated should 37163af08d82Slm66018 * have the correct session ID set. 37173af08d82Slm66018 */ 37183af08d82Slm66018 if ((msgp->tag.vio_sid != vdcp->session_id) && 37193af08d82Slm66018 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 37203af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 37213af08d82Slm66018 "expected 0x%lx [seq num %lx @ %d]", 37223af08d82Slm66018 vdcp->instance, msgp->tag.vio_sid, 37233af08d82Slm66018 vdcp->session_id, 37243af08d82Slm66018 ((vio_dring_msg_t *)msgp)->seq_num, 37253af08d82Slm66018 ((vio_dring_msg_t *)msgp)->start_idx); 37263af08d82Slm66018 return (ENOMSG); 37273af08d82Slm66018 } 37283af08d82Slm66018 return (0); 37293af08d82Slm66018 } 37303af08d82Slm66018 37313af08d82Slm66018 37323af08d82Slm66018 /* 37333af08d82Slm66018 * Function: 37343af08d82Slm66018 * vdc_resubmit_backup_dring() 37353af08d82Slm66018 * 37363af08d82Slm66018 * Description: 37373af08d82Slm66018 * Resubmit each descriptor in the backed up dring to 37383af08d82Slm66018 * vDisk server. The Dring was backed up during connection 37393af08d82Slm66018 * reset. 37403af08d82Slm66018 * 37413af08d82Slm66018 * Arguments: 37423af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 37433af08d82Slm66018 * 37443af08d82Slm66018 * Return Code: 37453af08d82Slm66018 * 0 - Success 37463af08d82Slm66018 */ 37473af08d82Slm66018 static int 37483af08d82Slm66018 vdc_resubmit_backup_dring(vdc_t *vdcp) 37493af08d82Slm66018 { 375090e2f9dcSlm66018 int processed = 0; 37513af08d82Slm66018 int count; 37523af08d82Slm66018 int b_idx; 375390e2f9dcSlm66018 int rv = 0; 37543af08d82Slm66018 int dring_size; 375590e2f9dcSlm66018 int op; 37563af08d82Slm66018 vio_msg_t vio_msg; 37573af08d82Slm66018 vdc_local_desc_t *curr_ldep; 37583af08d82Slm66018 37593af08d82Slm66018 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 37603af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 37613af08d82Slm66018 3762655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3763655fd6a9Sachartre /* the pending requests have already been processed */ 3764655fd6a9Sachartre return (0); 3765655fd6a9Sachartre } 3766655fd6a9Sachartre 37673af08d82Slm66018 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 37683af08d82Slm66018 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 37693af08d82Slm66018 37703af08d82Slm66018 /* 37713af08d82Slm66018 * Walk the backup copy of the local descriptor ring and 37723af08d82Slm66018 * resubmit all the outstanding transactions. 37733af08d82Slm66018 */ 37743af08d82Slm66018 b_idx = vdcp->local_dring_backup_tail; 37753af08d82Slm66018 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 37763af08d82Slm66018 37773af08d82Slm66018 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 37783af08d82Slm66018 3779eff7243fSlm66018 /* only resubmit outstanding transactions */ 37803af08d82Slm66018 if (!curr_ldep->is_free) { 378190e2f9dcSlm66018 /* 378290e2f9dcSlm66018 * If we are retrying a block read/write operation we 378390e2f9dcSlm66018 * need to update the I/O statistics to indicate that 378490e2f9dcSlm66018 * the request is being put back on the waitq to be 378590e2f9dcSlm66018 * serviced (it will have been taken off after the 378690e2f9dcSlm66018 * error was reported). 378790e2f9dcSlm66018 */ 378890e2f9dcSlm66018 mutex_enter(&vdcp->lock); 378990e2f9dcSlm66018 op = curr_ldep->operation; 379090e2f9dcSlm66018 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 379190e2f9dcSlm66018 DTRACE_IO1(start, buf_t *, curr_ldep->cb_arg); 379290e2f9dcSlm66018 VD_KSTAT_WAITQ_ENTER(vdcp); 379390e2f9dcSlm66018 } 37943af08d82Slm66018 37953af08d82Slm66018 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 379690e2f9dcSlm66018 rv = vdc_populate_descriptor(vdcp, op, 37973af08d82Slm66018 curr_ldep->addr, curr_ldep->nbytes, 37983af08d82Slm66018 curr_ldep->slice, curr_ldep->offset, 37993af08d82Slm66018 curr_ldep->cb_type, curr_ldep->cb_arg, 38003af08d82Slm66018 curr_ldep->dir); 380190e2f9dcSlm66018 38023af08d82Slm66018 if (rv) { 380390e2f9dcSlm66018 if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 380490e2f9dcSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 380590e2f9dcSlm66018 VD_KSTAT_WAITQ_EXIT(vdcp); 380690e2f9dcSlm66018 DTRACE_IO1(done, buf_t *, 380790e2f9dcSlm66018 curr_ldep->cb_arg); 380890e2f9dcSlm66018 } 38093af08d82Slm66018 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 38103af08d82Slm66018 vdcp->instance, b_idx); 381190e2f9dcSlm66018 mutex_exit(&vdcp->lock); 381290e2f9dcSlm66018 goto done; 38133af08d82Slm66018 } 38143af08d82Slm66018 381590e2f9dcSlm66018 /* 381690e2f9dcSlm66018 * If this is a block read/write we update the I/O 381790e2f9dcSlm66018 * statistics kstat to indicate that the request 381890e2f9dcSlm66018 * has been sent back to the vDisk server and should 381990e2f9dcSlm66018 * now be put on the run queue. 382090e2f9dcSlm66018 */ 382190e2f9dcSlm66018 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 382290e2f9dcSlm66018 DTRACE_PROBE1(send, buf_t *, curr_ldep->cb_arg); 382390e2f9dcSlm66018 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 382490e2f9dcSlm66018 } 382590e2f9dcSlm66018 mutex_exit(&vdcp->lock); 382690e2f9dcSlm66018 38273af08d82Slm66018 /* Wait for the response message. */ 38283af08d82Slm66018 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 38293af08d82Slm66018 b_idx); 383090e2f9dcSlm66018 rv = vdc_wait_for_response(vdcp, &vio_msg); 383190e2f9dcSlm66018 if (rv) { 383290e2f9dcSlm66018 /* 383390e2f9dcSlm66018 * If this is a block read/write we update 383490e2f9dcSlm66018 * the I/O statistics kstat to take it 383590e2f9dcSlm66018 * off the run queue. 383690e2f9dcSlm66018 */ 383790e2f9dcSlm66018 mutex_enter(&vdcp->lock); 383890e2f9dcSlm66018 if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 383990e2f9dcSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 384090e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 384190e2f9dcSlm66018 DTRACE_IO1(done, buf_t *, 384290e2f9dcSlm66018 curr_ldep->cb_arg); 384390e2f9dcSlm66018 } 38443af08d82Slm66018 DMSG(vdcp, 1, "[%d] wait_for_response " 38453af08d82Slm66018 "returned err=%d\n", vdcp->instance, 384690e2f9dcSlm66018 rv); 384790e2f9dcSlm66018 mutex_exit(&vdcp->lock); 384890e2f9dcSlm66018 goto done; 38493af08d82Slm66018 } 38503af08d82Slm66018 38513af08d82Slm66018 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 385290e2f9dcSlm66018 rv = vdc_process_data_msg(vdcp, &vio_msg); 385390e2f9dcSlm66018 if (rv) { 38543af08d82Slm66018 DMSG(vdcp, 1, "[%d] process_data_msg " 38553af08d82Slm66018 "returned err=%d\n", vdcp->instance, 385690e2f9dcSlm66018 rv); 385790e2f9dcSlm66018 goto done; 38583af08d82Slm66018 } 3859630f014dSrameshc /* 3860630f014dSrameshc * Mark this entry as free so that we will not resubmit 3861630f014dSrameshc * this "done" request again, if we were to use the same 3862630f014dSrameshc * backup_dring again in future. This could happen when 3863630f014dSrameshc * a reset happens while processing the backup_dring. 3864630f014dSrameshc */ 3865630f014dSrameshc curr_ldep->is_free = B_TRUE; 386690e2f9dcSlm66018 processed++; 38673af08d82Slm66018 } 38683af08d82Slm66018 38693af08d82Slm66018 /* get the next element to submit */ 38703af08d82Slm66018 if (++b_idx >= vdcp->local_dring_backup_len) 38713af08d82Slm66018 b_idx = 0; 38723af08d82Slm66018 } 38733af08d82Slm66018 38743af08d82Slm66018 /* all done - now clear up pending dring copy */ 38753af08d82Slm66018 dring_size = vdcp->local_dring_backup_len * 38763af08d82Slm66018 sizeof (vdcp->local_dring_backup[0]); 38773af08d82Slm66018 38783af08d82Slm66018 (void) kmem_free(vdcp->local_dring_backup, dring_size); 38793af08d82Slm66018 38803af08d82Slm66018 vdcp->local_dring_backup = NULL; 38813af08d82Slm66018 388290e2f9dcSlm66018 done: 388390e2f9dcSlm66018 DTRACE_PROBE2(processed, int, processed, vdc_t *, vdcp); 388490e2f9dcSlm66018 388590e2f9dcSlm66018 return (rv); 38863af08d82Slm66018 } 38873af08d82Slm66018 38883af08d82Slm66018 /* 38893af08d82Slm66018 * Function: 3890655fd6a9Sachartre * vdc_cancel_backup_dring 3891655fd6a9Sachartre * 3892655fd6a9Sachartre * Description: 3893655fd6a9Sachartre * Cancel each descriptor in the backed up dring to vDisk server. 3894655fd6a9Sachartre * The Dring was backed up during connection reset. 3895655fd6a9Sachartre * 3896655fd6a9Sachartre * Arguments: 3897655fd6a9Sachartre * vdcp - soft state pointer for this instance of the device driver. 3898655fd6a9Sachartre * 3899655fd6a9Sachartre * Return Code: 3900655fd6a9Sachartre * None 3901655fd6a9Sachartre */ 3902655fd6a9Sachartre void 390390e2f9dcSlm66018 vdc_cancel_backup_dring(vdc_t *vdcp) 3904655fd6a9Sachartre { 3905655fd6a9Sachartre vdc_local_desc_t *ldep; 3906655fd6a9Sachartre struct buf *bufp; 3907655fd6a9Sachartre int count; 3908655fd6a9Sachartre int b_idx; 3909655fd6a9Sachartre int dring_size; 391090e2f9dcSlm66018 int cancelled = 0; 3911655fd6a9Sachartre 3912655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 3913655fd6a9Sachartre ASSERT(vdcp->state == VDC_STATE_INIT || 3914655fd6a9Sachartre vdcp->state == VDC_STATE_INIT_WAITING || 3915655fd6a9Sachartre vdcp->state == VDC_STATE_NEGOTIATE || 3916655fd6a9Sachartre vdcp->state == VDC_STATE_RESETTING); 3917655fd6a9Sachartre 3918655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3919655fd6a9Sachartre /* the pending requests have already been processed */ 3920655fd6a9Sachartre return; 3921655fd6a9Sachartre } 3922655fd6a9Sachartre 3923655fd6a9Sachartre DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3924655fd6a9Sachartre vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3925655fd6a9Sachartre 3926655fd6a9Sachartre /* 3927655fd6a9Sachartre * Walk the backup copy of the local descriptor ring and 3928655fd6a9Sachartre * cancel all the outstanding transactions. 3929655fd6a9Sachartre */ 3930655fd6a9Sachartre b_idx = vdcp->local_dring_backup_tail; 3931655fd6a9Sachartre for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3932655fd6a9Sachartre 3933655fd6a9Sachartre ldep = &(vdcp->local_dring_backup[b_idx]); 3934655fd6a9Sachartre 3935655fd6a9Sachartre /* only cancel outstanding transactions */ 3936655fd6a9Sachartre if (!ldep->is_free) { 3937655fd6a9Sachartre 3938655fd6a9Sachartre DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 393990e2f9dcSlm66018 cancelled++; 3940655fd6a9Sachartre 3941655fd6a9Sachartre /* 3942655fd6a9Sachartre * All requests have already been cleared from the 3943655fd6a9Sachartre * local descriptor ring and the LDC channel has been 3944655fd6a9Sachartre * reset so we will never get any reply for these 3945655fd6a9Sachartre * requests. Now we just have to notify threads waiting 3946655fd6a9Sachartre * for replies that the request has failed. 3947655fd6a9Sachartre */ 3948655fd6a9Sachartre switch (ldep->cb_type) { 3949655fd6a9Sachartre case CB_SYNC: 3950655fd6a9Sachartre ASSERT(vdcp->sync_op_pending); 3951655fd6a9Sachartre vdcp->sync_op_status = EIO; 3952655fd6a9Sachartre vdcp->sync_op_pending = B_FALSE; 3953655fd6a9Sachartre cv_signal(&vdcp->sync_pending_cv); 3954655fd6a9Sachartre break; 3955655fd6a9Sachartre 3956655fd6a9Sachartre case CB_STRATEGY: 3957655fd6a9Sachartre bufp = ldep->cb_arg; 3958655fd6a9Sachartre ASSERT(bufp != NULL); 3959655fd6a9Sachartre bufp->b_resid = bufp->b_bcount; 3960366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 396190e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 3962366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 3963655fd6a9Sachartre bioerror(bufp, EIO); 3964655fd6a9Sachartre biodone(bufp); 3965655fd6a9Sachartre break; 3966655fd6a9Sachartre 3967655fd6a9Sachartre default: 3968655fd6a9Sachartre ASSERT(0); 3969655fd6a9Sachartre } 3970655fd6a9Sachartre 3971655fd6a9Sachartre } 3972655fd6a9Sachartre 3973655fd6a9Sachartre /* get the next element to cancel */ 3974655fd6a9Sachartre if (++b_idx >= vdcp->local_dring_backup_len) 3975655fd6a9Sachartre b_idx = 0; 3976655fd6a9Sachartre } 3977655fd6a9Sachartre 3978655fd6a9Sachartre /* all done - now clear up pending dring copy */ 3979655fd6a9Sachartre dring_size = vdcp->local_dring_backup_len * 3980655fd6a9Sachartre sizeof (vdcp->local_dring_backup[0]); 3981655fd6a9Sachartre 3982655fd6a9Sachartre (void) kmem_free(vdcp->local_dring_backup, dring_size); 3983655fd6a9Sachartre 3984655fd6a9Sachartre vdcp->local_dring_backup = NULL; 3985655fd6a9Sachartre 398690e2f9dcSlm66018 DTRACE_PROBE2(cancelled, int, cancelled, vdc_t *, vdcp); 3987655fd6a9Sachartre } 3988655fd6a9Sachartre 3989655fd6a9Sachartre /* 3990655fd6a9Sachartre * Function: 3991655fd6a9Sachartre * vdc_connection_timeout 3992655fd6a9Sachartre * 3993655fd6a9Sachartre * Description: 3994655fd6a9Sachartre * This function is invoked if the timeout set to establish the connection 3995655fd6a9Sachartre * with vds expires. This will happen if we spend too much time in the 3996655fd6a9Sachartre * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. Then we will 3997655fd6a9Sachartre * cancel any pending request and mark them as failed. 3998655fd6a9Sachartre * 3999655fd6a9Sachartre * If the timeout does not expire, it will be cancelled when we reach the 4000655fd6a9Sachartre * VDC_STATE_HANDLE_PENDING or VDC_STATE_RESETTING state. This function can 4001655fd6a9Sachartre * be invoked while we are in the VDC_STATE_HANDLE_PENDING or 4002655fd6a9Sachartre * VDC_STATE_RESETTING state in which case we do nothing because the 4003655fd6a9Sachartre * timeout is being cancelled. 4004655fd6a9Sachartre * 4005655fd6a9Sachartre * Arguments: 4006655fd6a9Sachartre * arg - argument of the timeout function actually a soft state 4007655fd6a9Sachartre * pointer for the instance of the device driver. 4008655fd6a9Sachartre * 4009655fd6a9Sachartre * Return Code: 4010655fd6a9Sachartre * None 4011655fd6a9Sachartre */ 4012655fd6a9Sachartre void 4013655fd6a9Sachartre vdc_connection_timeout(void *arg) 4014655fd6a9Sachartre { 4015655fd6a9Sachartre vdc_t *vdcp = (vdc_t *)arg; 4016655fd6a9Sachartre 4017655fd6a9Sachartre mutex_enter(&vdcp->lock); 4018655fd6a9Sachartre 4019655fd6a9Sachartre if (vdcp->state == VDC_STATE_HANDLE_PENDING || 4020655fd6a9Sachartre vdcp->state == VDC_STATE_DETACH) { 4021655fd6a9Sachartre /* 4022655fd6a9Sachartre * The connection has just been re-established or 4023655fd6a9Sachartre * we are detaching. 4024655fd6a9Sachartre */ 4025655fd6a9Sachartre vdcp->ctimeout_reached = B_FALSE; 4026655fd6a9Sachartre mutex_exit(&vdcp->lock); 4027655fd6a9Sachartre return; 4028655fd6a9Sachartre } 4029655fd6a9Sachartre 4030655fd6a9Sachartre vdcp->ctimeout_reached = B_TRUE; 4031655fd6a9Sachartre 4032655fd6a9Sachartre /* notify requests waiting for sending */ 4033655fd6a9Sachartre cv_broadcast(&vdcp->running_cv); 4034655fd6a9Sachartre 4035655fd6a9Sachartre /* cancel requests waiting for a result */ 403690e2f9dcSlm66018 vdc_cancel_backup_dring(vdcp); 4037655fd6a9Sachartre 4038655fd6a9Sachartre mutex_exit(&vdcp->lock); 4039655fd6a9Sachartre 4040655fd6a9Sachartre cmn_err(CE_NOTE, "[%d] connection to service domain timeout", 4041655fd6a9Sachartre vdcp->instance); 4042655fd6a9Sachartre } 4043655fd6a9Sachartre 4044655fd6a9Sachartre /* 4045655fd6a9Sachartre * Function: 40463af08d82Slm66018 * vdc_backup_local_dring() 40473af08d82Slm66018 * 40483af08d82Slm66018 * Description: 40493af08d82Slm66018 * Backup the current dring in the event of a reset. The Dring 40503af08d82Slm66018 * transactions will be resubmitted to the server when the 40513af08d82Slm66018 * connection is restored. 40523af08d82Slm66018 * 40533af08d82Slm66018 * Arguments: 40543af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 40553af08d82Slm66018 * 40563af08d82Slm66018 * Return Code: 40573af08d82Slm66018 * NONE 40583af08d82Slm66018 */ 40593af08d82Slm66018 static void 40603af08d82Slm66018 vdc_backup_local_dring(vdc_t *vdcp) 40613af08d82Slm66018 { 40623af08d82Slm66018 int dring_size; 40633af08d82Slm66018 4064655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 40653af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_RESETTING); 40663af08d82Slm66018 40673af08d82Slm66018 /* 40683af08d82Slm66018 * If the backup dring is stil around, it means 40693af08d82Slm66018 * that the last restore did not complete. However, 40703af08d82Slm66018 * since we never got back into the running state, 40713af08d82Slm66018 * the backup copy we have is still valid. 40723af08d82Slm66018 */ 40733af08d82Slm66018 if (vdcp->local_dring_backup != NULL) { 40743af08d82Slm66018 DMSG(vdcp, 1, "reusing local descriptor ring backup " 40753af08d82Slm66018 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 40763af08d82Slm66018 vdcp->local_dring_backup_tail); 40773af08d82Slm66018 return; 40783af08d82Slm66018 } 40793af08d82Slm66018 4080655fd6a9Sachartre /* 4081655fd6a9Sachartre * The backup dring can be NULL and the local dring may not be 4082655fd6a9Sachartre * initialized. This can happen if we had a reset while establishing 4083655fd6a9Sachartre * a new connection but after the connection has timed out. In that 4084655fd6a9Sachartre * case the backup dring is NULL because the requests have been 4085655fd6a9Sachartre * cancelled and the request occured before the local dring is 4086655fd6a9Sachartre * initialized. 4087655fd6a9Sachartre */ 4088655fd6a9Sachartre if (!(vdcp->initialized & VDC_DRING_LOCAL)) 4089655fd6a9Sachartre return; 4090655fd6a9Sachartre 40913af08d82Slm66018 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 40923af08d82Slm66018 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 40933af08d82Slm66018 40943af08d82Slm66018 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 40953af08d82Slm66018 40963af08d82Slm66018 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 40973af08d82Slm66018 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 40983af08d82Slm66018 40993af08d82Slm66018 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 41003af08d82Slm66018 vdcp->local_dring_backup_len = vdcp->dring_len; 41013af08d82Slm66018 } 41023af08d82Slm66018 41038cd10891Snarayan static void 41048cd10891Snarayan vdc_switch_server(vdc_t *vdcp) 41058cd10891Snarayan { 41068cd10891Snarayan int rv; 41078cd10891Snarayan vdc_server_t *curr_server, *new_server; 41088cd10891Snarayan 41098cd10891Snarayan ASSERT(MUTEX_HELD(&vdcp->lock)); 41108cd10891Snarayan 41118cd10891Snarayan /* if there is only one server return back */ 41128cd10891Snarayan if (vdcp->num_servers == 1) { 41138cd10891Snarayan return; 41148cd10891Snarayan } 41158cd10891Snarayan 41168cd10891Snarayan /* Get current and next server */ 41178cd10891Snarayan curr_server = vdcp->curr_server; 41188cd10891Snarayan new_server = 41198cd10891Snarayan (curr_server->next) ? curr_server->next : vdcp->server_list; 41208cd10891Snarayan ASSERT(curr_server != new_server); 41218cd10891Snarayan 41228cd10891Snarayan /* bring current server's channel down */ 41238cd10891Snarayan rv = ldc_down(curr_server->ldc_handle); 41248cd10891Snarayan if (rv) { 41258cd10891Snarayan DMSG(vdcp, 0, "[%d] Cannot bring channel down, port %ld\n", 41268cd10891Snarayan vdcp->instance, curr_server->id); 41278cd10891Snarayan return; 41288cd10891Snarayan } 41298cd10891Snarayan 41308cd10891Snarayan /* switch the server */ 41318cd10891Snarayan vdcp->curr_server = new_server; 41328cd10891Snarayan 41338cd10891Snarayan DMSG(vdcp, 0, "[%d] Switched to next vdisk server, port@%ld, ldc@%ld\n", 41348cd10891Snarayan vdcp->instance, vdcp->curr_server->id, vdcp->curr_server->ldc_id); 41358cd10891Snarayan } 41368cd10891Snarayan 41371ae08745Sheppo /* -------------------------------------------------------------------------- */ 41381ae08745Sheppo 41391ae08745Sheppo /* 41401ae08745Sheppo * The following functions process the incoming messages from vds 41411ae08745Sheppo */ 41421ae08745Sheppo 41430a55fbb7Slm66018 /* 41440a55fbb7Slm66018 * Function: 41450a55fbb7Slm66018 * vdc_process_msg_thread() 41460a55fbb7Slm66018 * 41470a55fbb7Slm66018 * Description: 41480a55fbb7Slm66018 * 41493af08d82Slm66018 * Main VDC message processing thread. Each vDisk instance 41503af08d82Slm66018 * consists of a copy of this thread. This thread triggers 41513af08d82Slm66018 * all the handshakes and data exchange with the server. It 41523af08d82Slm66018 * also handles all channel resets 41533af08d82Slm66018 * 41540a55fbb7Slm66018 * Arguments: 41550a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 41560a55fbb7Slm66018 * 41570a55fbb7Slm66018 * Return Code: 41580a55fbb7Slm66018 * None 41590a55fbb7Slm66018 */ 41601ae08745Sheppo static void 41613af08d82Slm66018 vdc_process_msg_thread(vdc_t *vdcp) 41621ae08745Sheppo { 41631ae08745Sheppo int status; 4164655fd6a9Sachartre int ctimeout; 4165655fd6a9Sachartre timeout_id_t tmid = 0; 41668cd10891Snarayan clock_t ldcup_timeout = 0; 41671ae08745Sheppo 41683af08d82Slm66018 mutex_enter(&vdcp->lock); 41691ae08745Sheppo 41701ae08745Sheppo for (;;) { 41711ae08745Sheppo 41723af08d82Slm66018 #define Q(_s) (vdcp->state == _s) ? #_s : 41733af08d82Slm66018 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 41743af08d82Slm66018 Q(VDC_STATE_INIT) 41753af08d82Slm66018 Q(VDC_STATE_INIT_WAITING) 41763af08d82Slm66018 Q(VDC_STATE_NEGOTIATE) 41773af08d82Slm66018 Q(VDC_STATE_HANDLE_PENDING) 41783af08d82Slm66018 Q(VDC_STATE_RUNNING) 41793af08d82Slm66018 Q(VDC_STATE_RESETTING) 41803af08d82Slm66018 Q(VDC_STATE_DETACH) 41813af08d82Slm66018 "UNKNOWN"); 41821ae08745Sheppo 41833af08d82Slm66018 switch (vdcp->state) { 41843af08d82Slm66018 case VDC_STATE_INIT: 41853af08d82Slm66018 4186655fd6a9Sachartre /* 4187655fd6a9Sachartre * If requested, start a timeout to check if the 4188655fd6a9Sachartre * connection with vds is established in the 4189655fd6a9Sachartre * specified delay. If the timeout expires, we 4190655fd6a9Sachartre * will cancel any pending request. 4191655fd6a9Sachartre * 4192655fd6a9Sachartre * If some reset have occurred while establishing 4193655fd6a9Sachartre * the connection, we already have a timeout armed 4194655fd6a9Sachartre * and in that case we don't need to arm a new one. 41958cd10891Snarayan * 41968cd10891Snarayan * The same rule applies when there are multiple vds'. 41978cd10891Snarayan * If either a connection cannot be established or 41988cd10891Snarayan * the handshake times out, the connection thread will 41998cd10891Snarayan * try another server. The 'ctimeout' will report 42008cd10891Snarayan * back an error after it expires irrespective of 42018cd10891Snarayan * whether the vdisk is trying to connect to just 42028cd10891Snarayan * one or multiple servers. 4203655fd6a9Sachartre */ 4204655fd6a9Sachartre ctimeout = (vdc_timeout != 0)? 42058cd10891Snarayan vdc_timeout : vdcp->curr_server->ctimeout; 4206655fd6a9Sachartre 4207655fd6a9Sachartre if (ctimeout != 0 && tmid == 0) { 4208655fd6a9Sachartre tmid = timeout(vdc_connection_timeout, vdcp, 42098cd10891Snarayan ctimeout * drv_usectohz(MICROSEC)); 4210655fd6a9Sachartre } 4211655fd6a9Sachartre 42128cd10891Snarayan /* Check if we are re-initializing repeatedly */ 42138cd10891Snarayan if (vdcp->hshake_cnt > vdc_hshake_retries && 4214655fd6a9Sachartre vdcp->lifecycle != VDC_LC_ONLINE) { 42158cd10891Snarayan 42168cd10891Snarayan DMSG(vdcp, 0, "[%d] too many handshakes,cnt=%d", 42178cd10891Snarayan vdcp->instance, vdcp->hshake_cnt); 42183c96341aSnarayan cmn_err(CE_NOTE, "[%d] disk access failed.\n", 42193c96341aSnarayan vdcp->instance); 42203af08d82Slm66018 vdcp->state = VDC_STATE_DETACH; 42213af08d82Slm66018 break; 42223af08d82Slm66018 } 42233af08d82Slm66018 42248cd10891Snarayan /* Switch to STATE_DETACH if drv is detaching */ 42258cd10891Snarayan if (vdcp->lifecycle == VDC_LC_DETACHING) { 42268cd10891Snarayan vdcp->state = VDC_STATE_DETACH; 42278cd10891Snarayan break; 42288cd10891Snarayan } 42298cd10891Snarayan 42308cd10891Snarayan /* Switch server */ 42318cd10891Snarayan if (vdcp->hshake_cnt > 0) 42328cd10891Snarayan vdc_switch_server(vdcp); 42338cd10891Snarayan vdcp->hshake_cnt++; 42348cd10891Snarayan 42353af08d82Slm66018 /* Bring up connection with vds via LDC */ 42363af08d82Slm66018 status = vdc_start_ldc_connection(vdcp); 42378cd10891Snarayan if (status != EINVAL) { 42383af08d82Slm66018 vdcp->state = VDC_STATE_INIT_WAITING; 42393af08d82Slm66018 } 42403af08d82Slm66018 break; 42413af08d82Slm66018 42423af08d82Slm66018 case VDC_STATE_INIT_WAITING: 42433af08d82Slm66018 42448cd10891Snarayan /* if channel is UP, start negotiation */ 42458cd10891Snarayan if (vdcp->curr_server->ldc_state == LDC_UP) { 42468cd10891Snarayan vdcp->state = VDC_STATE_NEGOTIATE; 42478cd10891Snarayan break; 42488cd10891Snarayan } 42498cd10891Snarayan 42508cd10891Snarayan /* check if only one server exists */ 42518cd10891Snarayan if (vdcp->num_servers == 1) { 42523af08d82Slm66018 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 42538cd10891Snarayan } else { 42548cd10891Snarayan /* 42558cd10891Snarayan * wait for LDC_UP, if it times out, switch 42568cd10891Snarayan * to another server. 42578cd10891Snarayan */ 42588cd10891Snarayan ldcup_timeout = ddi_get_lbolt() + 42598cd10891Snarayan (vdc_ldcup_timeout * 42608cd10891Snarayan drv_usectohz(MICROSEC)); 42618cd10891Snarayan status = cv_timedwait(&vdcp->initwait_cv, 42628cd10891Snarayan &vdcp->lock, ldcup_timeout); 42638cd10891Snarayan if (status == -1 && 42648cd10891Snarayan vdcp->state == VDC_STATE_INIT_WAITING && 42658cd10891Snarayan vdcp->curr_server->ldc_state != LDC_UP) { 42668cd10891Snarayan /* timed out & still waiting */ 42678cd10891Snarayan vdcp->state = VDC_STATE_INIT; 42688cd10891Snarayan break; 42698cd10891Snarayan } 42708cd10891Snarayan } 42718cd10891Snarayan 42723af08d82Slm66018 if (vdcp->state != VDC_STATE_INIT_WAITING) { 42733af08d82Slm66018 DMSG(vdcp, 0, 42743af08d82Slm66018 "state moved to %d out from under us...\n", 42753af08d82Slm66018 vdcp->state); 42763af08d82Slm66018 } 42773af08d82Slm66018 break; 42783af08d82Slm66018 42793af08d82Slm66018 case VDC_STATE_NEGOTIATE: 42803af08d82Slm66018 switch (status = vdc_ver_negotiation(vdcp)) { 42813af08d82Slm66018 case 0: 42823af08d82Slm66018 break; 42833af08d82Slm66018 default: 42843af08d82Slm66018 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 42853af08d82Slm66018 status); 42863af08d82Slm66018 goto reset; 42873af08d82Slm66018 } 42883af08d82Slm66018 42893af08d82Slm66018 switch (status = vdc_attr_negotiation(vdcp)) { 42903af08d82Slm66018 case 0: 42913af08d82Slm66018 break; 42923af08d82Slm66018 default: 42933af08d82Slm66018 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 42943af08d82Slm66018 status); 42953af08d82Slm66018 goto reset; 42963af08d82Slm66018 } 42973af08d82Slm66018 42983af08d82Slm66018 switch (status = vdc_dring_negotiation(vdcp)) { 42993af08d82Slm66018 case 0: 43003af08d82Slm66018 break; 43013af08d82Slm66018 default: 43023af08d82Slm66018 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 43033af08d82Slm66018 status); 43043af08d82Slm66018 goto reset; 43053af08d82Slm66018 } 43063af08d82Slm66018 43073af08d82Slm66018 switch (status = vdc_rdx_exchange(vdcp)) { 43083af08d82Slm66018 case 0: 43093af08d82Slm66018 vdcp->state = VDC_STATE_HANDLE_PENDING; 43103af08d82Slm66018 goto done; 43113af08d82Slm66018 default: 43123af08d82Slm66018 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 43133af08d82Slm66018 status); 43143af08d82Slm66018 goto reset; 43153af08d82Slm66018 } 43163af08d82Slm66018 reset: 43173af08d82Slm66018 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 43183af08d82Slm66018 status); 43193af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4320655fd6a9Sachartre vdcp->self_reset = B_TRUE; 43213af08d82Slm66018 done: 43223af08d82Slm66018 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 43233af08d82Slm66018 vdcp->state); 43243af08d82Slm66018 break; 43253af08d82Slm66018 43263af08d82Slm66018 case VDC_STATE_HANDLE_PENDING: 43273af08d82Slm66018 4328655fd6a9Sachartre if (vdcp->ctimeout_reached) { 4329655fd6a9Sachartre /* 4330655fd6a9Sachartre * The connection timeout had been reached so 4331655fd6a9Sachartre * pending requests have been cancelled. Now 4332655fd6a9Sachartre * that the connection is back we can reset 4333655fd6a9Sachartre * the timeout. 4334655fd6a9Sachartre */ 4335655fd6a9Sachartre ASSERT(vdcp->local_dring_backup == NULL); 4336655fd6a9Sachartre ASSERT(tmid != 0); 4337655fd6a9Sachartre tmid = 0; 4338655fd6a9Sachartre vdcp->ctimeout_reached = B_FALSE; 4339655fd6a9Sachartre vdcp->state = VDC_STATE_RUNNING; 4340655fd6a9Sachartre DMSG(vdcp, 0, "[%d] connection to service " 4341655fd6a9Sachartre "domain is up", vdcp->instance); 4342655fd6a9Sachartre break; 4343655fd6a9Sachartre } 4344655fd6a9Sachartre 43453af08d82Slm66018 mutex_exit(&vdcp->lock); 4346655fd6a9Sachartre if (tmid != 0) { 4347655fd6a9Sachartre (void) untimeout(tmid); 4348655fd6a9Sachartre tmid = 0; 4349655fd6a9Sachartre } 43503af08d82Slm66018 status = vdc_resubmit_backup_dring(vdcp); 43513af08d82Slm66018 mutex_enter(&vdcp->lock); 43523af08d82Slm66018 43533af08d82Slm66018 if (status) 43543af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 43553af08d82Slm66018 else 43563af08d82Slm66018 vdcp->state = VDC_STATE_RUNNING; 43573af08d82Slm66018 43583af08d82Slm66018 break; 43593af08d82Slm66018 43603af08d82Slm66018 /* enter running state */ 43613af08d82Slm66018 case VDC_STATE_RUNNING: 43623af08d82Slm66018 /* 43633af08d82Slm66018 * Signal anyone waiting for the connection 43643af08d82Slm66018 * to come on line. 43653af08d82Slm66018 */ 43663af08d82Slm66018 vdcp->hshake_cnt = 0; 43673af08d82Slm66018 cv_broadcast(&vdcp->running_cv); 43682f5224aeSachartre 43692f5224aeSachartre /* failfast has to been checked after reset */ 43702f5224aeSachartre cv_signal(&vdcp->failfast_cv); 43712f5224aeSachartre 43722f5224aeSachartre /* ownership is lost during reset */ 43732f5224aeSachartre if (vdcp->ownership & VDC_OWNERSHIP_WANTED) 43742f5224aeSachartre vdcp->ownership |= VDC_OWNERSHIP_RESET; 43752f5224aeSachartre cv_signal(&vdcp->ownership_cv); 43762f5224aeSachartre 4377d7400d00Sachartre cmn_err(CE_CONT, "?vdisk@%d is online using " 4378d7400d00Sachartre "ldc@%ld,%ld\n", vdcp->instance, 4379d7400d00Sachartre vdcp->curr_server->ldc_id, vdcp->curr_server->id); 4380d7400d00Sachartre 43813af08d82Slm66018 mutex_exit(&vdcp->lock); 43823af08d82Slm66018 43833af08d82Slm66018 for (;;) { 43843af08d82Slm66018 vio_msg_t msg; 43853af08d82Slm66018 status = vdc_wait_for_response(vdcp, &msg); 43863af08d82Slm66018 if (status) break; 43873af08d82Slm66018 43883af08d82Slm66018 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 43893af08d82Slm66018 vdcp->instance); 43903af08d82Slm66018 status = vdc_process_data_msg(vdcp, &msg); 43911ae08745Sheppo if (status) { 43923af08d82Slm66018 DMSG(vdcp, 1, "[%d] process_data_msg " 43933af08d82Slm66018 "returned err=%d\n", vdcp->instance, 43943af08d82Slm66018 status); 43951ae08745Sheppo break; 43961ae08745Sheppo } 43971ae08745Sheppo 43983af08d82Slm66018 } 4399e1ebb9ecSlm66018 44003af08d82Slm66018 mutex_enter(&vdcp->lock); 44013af08d82Slm66018 4402d7400d00Sachartre cmn_err(CE_CONT, "?vdisk@%d is offline\n", 4403d7400d00Sachartre vdcp->instance); 4404d7400d00Sachartre 44053af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4406690555a1Sachartre vdcp->self_reset = B_TRUE; 44073af08d82Slm66018 break; 44083af08d82Slm66018 44093af08d82Slm66018 case VDC_STATE_RESETTING: 4410655fd6a9Sachartre /* 4411655fd6a9Sachartre * When we reach this state, we either come from the 4412655fd6a9Sachartre * VDC_STATE_RUNNING state and we can have pending 4413655fd6a9Sachartre * request but no timeout is armed; or we come from 4414655fd6a9Sachartre * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 4415655fd6a9Sachartre * VDC_HANDLE_PENDING state and there is no pending 4416655fd6a9Sachartre * request or pending requests have already been copied 4417655fd6a9Sachartre * into the backup dring. So we can safely keep the 4418655fd6a9Sachartre * connection timeout armed while we are in this state. 4419655fd6a9Sachartre */ 4420655fd6a9Sachartre 44213af08d82Slm66018 DMSG(vdcp, 0, "Initiating channel reset " 44223af08d82Slm66018 "(pending = %d)\n", (int)vdcp->threads_pending); 44233af08d82Slm66018 44243af08d82Slm66018 if (vdcp->self_reset) { 44253af08d82Slm66018 DMSG(vdcp, 0, 44263af08d82Slm66018 "[%d] calling stop_ldc_connection.\n", 44273af08d82Slm66018 vdcp->instance); 44283af08d82Slm66018 status = vdc_stop_ldc_connection(vdcp); 44293af08d82Slm66018 vdcp->self_reset = B_FALSE; 44301ae08745Sheppo } 44311ae08745Sheppo 44321ae08745Sheppo /* 44333af08d82Slm66018 * Wait for all threads currently waiting 44343af08d82Slm66018 * for a free dring entry to use. 44351ae08745Sheppo */ 44363af08d82Slm66018 while (vdcp->threads_pending) { 44373af08d82Slm66018 cv_broadcast(&vdcp->membind_cv); 44383af08d82Slm66018 cv_broadcast(&vdcp->dring_free_cv); 44393af08d82Slm66018 mutex_exit(&vdcp->lock); 4440205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4441205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 44423af08d82Slm66018 mutex_enter(&vdcp->lock); 44431ae08745Sheppo } 44441ae08745Sheppo 44453af08d82Slm66018 ASSERT(vdcp->threads_pending == 0); 44461ae08745Sheppo 44473af08d82Slm66018 /* Sanity check that no thread is receiving */ 44483af08d82Slm66018 ASSERT(vdcp->read_state != VDC_READ_WAITING); 44490a55fbb7Slm66018 44503af08d82Slm66018 vdcp->read_state = VDC_READ_IDLE; 44513af08d82Slm66018 44523af08d82Slm66018 vdc_backup_local_dring(vdcp); 44533af08d82Slm66018 44543af08d82Slm66018 /* cleanup the old d-ring */ 44553af08d82Slm66018 vdc_destroy_descriptor_ring(vdcp); 44563af08d82Slm66018 44573af08d82Slm66018 /* go and start again */ 44583af08d82Slm66018 vdcp->state = VDC_STATE_INIT; 44593af08d82Slm66018 44600a55fbb7Slm66018 break; 44610a55fbb7Slm66018 44623af08d82Slm66018 case VDC_STATE_DETACH: 44633af08d82Slm66018 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 44643af08d82Slm66018 vdcp->instance); 44653af08d82Slm66018 4466655fd6a9Sachartre /* cancel any pending timeout */ 4467655fd6a9Sachartre mutex_exit(&vdcp->lock); 4468655fd6a9Sachartre if (tmid != 0) { 4469655fd6a9Sachartre (void) untimeout(tmid); 4470655fd6a9Sachartre tmid = 0; 4471655fd6a9Sachartre } 4472655fd6a9Sachartre mutex_enter(&vdcp->lock); 4473655fd6a9Sachartre 44743c96341aSnarayan /* 44753c96341aSnarayan * Signal anyone waiting for connection 44763c96341aSnarayan * to come online 44773c96341aSnarayan */ 44783c96341aSnarayan cv_broadcast(&vdcp->running_cv); 44793c96341aSnarayan 44803af08d82Slm66018 while (vdcp->sync_op_pending) { 44813af08d82Slm66018 cv_signal(&vdcp->sync_pending_cv); 44823af08d82Slm66018 cv_signal(&vdcp->sync_blocked_cv); 44833af08d82Slm66018 mutex_exit(&vdcp->lock); 4484205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4485205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 44863af08d82Slm66018 mutex_enter(&vdcp->lock); 44870a55fbb7Slm66018 } 44881ae08745Sheppo 44893af08d82Slm66018 mutex_exit(&vdcp->lock); 44903af08d82Slm66018 44913af08d82Slm66018 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 44923af08d82Slm66018 vdcp->instance); 44933af08d82Slm66018 thread_exit(); 44943af08d82Slm66018 break; 44953af08d82Slm66018 } 44963af08d82Slm66018 } 44970a55fbb7Slm66018 } 44980a55fbb7Slm66018 44990a55fbb7Slm66018 45000a55fbb7Slm66018 /* 45010a55fbb7Slm66018 * Function: 45020a55fbb7Slm66018 * vdc_process_data_msg() 45030a55fbb7Slm66018 * 45040a55fbb7Slm66018 * Description: 45050a55fbb7Slm66018 * This function is called by the message processing thread each time 45060a55fbb7Slm66018 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 45070a55fbb7Slm66018 * be an ACK or NACK from vds[1] which vdc handles as follows. 45080a55fbb7Slm66018 * ACK - wake up the waiting thread 45090a55fbb7Slm66018 * NACK - resend any messages necessary 45100a55fbb7Slm66018 * 45110a55fbb7Slm66018 * [1] Although the message format allows it, vds should not send a 45120a55fbb7Slm66018 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 45130a55fbb7Slm66018 * some bizarre reason it does, vdc will reset the connection. 45140a55fbb7Slm66018 * 45150a55fbb7Slm66018 * Arguments: 45160a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 45170a55fbb7Slm66018 * msg - the LDC message sent by vds 45180a55fbb7Slm66018 * 45190a55fbb7Slm66018 * Return Code: 45200a55fbb7Slm66018 * 0 - Success. 45210a55fbb7Slm66018 * > 0 - error value returned by LDC 45220a55fbb7Slm66018 */ 45230a55fbb7Slm66018 static int 45243af08d82Slm66018 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 45250a55fbb7Slm66018 { 45260a55fbb7Slm66018 int status = 0; 45273af08d82Slm66018 vio_dring_msg_t *dring_msg; 4528d10e4ef2Snarayan vdc_local_desc_t *ldep = NULL; 45293af08d82Slm66018 int start, end; 45303af08d82Slm66018 int idx; 453190e2f9dcSlm66018 int op; 45320a55fbb7Slm66018 45333af08d82Slm66018 dring_msg = (vio_dring_msg_t *)msg; 45340a55fbb7Slm66018 45353af08d82Slm66018 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 45363af08d82Slm66018 ASSERT(vdcp != NULL); 45373af08d82Slm66018 45383af08d82Slm66018 mutex_enter(&vdcp->lock); 45390a55fbb7Slm66018 45400a55fbb7Slm66018 /* 45410a55fbb7Slm66018 * Check to see if the message has bogus data 45420a55fbb7Slm66018 */ 4543e1ebb9ecSlm66018 idx = start = dring_msg->start_idx; 45440a55fbb7Slm66018 end = dring_msg->end_idx; 45453af08d82Slm66018 if ((start >= vdcp->dring_len) || 45463af08d82Slm66018 (end >= vdcp->dring_len) || (end < -1)) { 454790e2f9dcSlm66018 /* 454890e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 454990e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 455090e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 455190e2f9dcSlm66018 */ 455290e2f9dcSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 45533af08d82Slm66018 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 45543af08d82Slm66018 vdcp->instance, start, end); 45553af08d82Slm66018 mutex_exit(&vdcp->lock); 4556e1ebb9ecSlm66018 return (EINVAL); 45570a55fbb7Slm66018 } 45580a55fbb7Slm66018 45590a55fbb7Slm66018 /* 45600a55fbb7Slm66018 * Verify that the sequence number is what vdc expects. 45610a55fbb7Slm66018 */ 45623af08d82Slm66018 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4563e1ebb9ecSlm66018 case VDC_SEQ_NUM_TODO: 4564e1ebb9ecSlm66018 break; /* keep processing this message */ 4565e1ebb9ecSlm66018 case VDC_SEQ_NUM_SKIP: 45663af08d82Slm66018 mutex_exit(&vdcp->lock); 4567e1ebb9ecSlm66018 return (0); 4568e1ebb9ecSlm66018 case VDC_SEQ_NUM_INVALID: 456990e2f9dcSlm66018 /* 457090e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 457190e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 457290e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 457390e2f9dcSlm66018 */ 4574366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 457590e2f9dcSlm66018 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4576366a92acSlm66018 mutex_exit(&vdcp->lock); 45770a55fbb7Slm66018 return (ENXIO); 45780a55fbb7Slm66018 } 45790a55fbb7Slm66018 45803af08d82Slm66018 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 458190e2f9dcSlm66018 /* 458290e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 458390e2f9dcSlm66018 * 458490e2f9dcSlm66018 * We need to update the run queue if a read or write request 458590e2f9dcSlm66018 * is being NACKed - otherwise there will appear to be an 458690e2f9dcSlm66018 * indefinite outstanding request and statistics reported by 458790e2f9dcSlm66018 * iostat(1M) will be incorrect. The transaction will be 458890e2f9dcSlm66018 * resubmitted from the backup DRing following the reset 458990e2f9dcSlm66018 * and the wait/run queues will be entered again. 459090e2f9dcSlm66018 */ 459190e2f9dcSlm66018 ldep = &vdcp->local_dring[idx]; 459290e2f9dcSlm66018 op = ldep->operation; 459390e2f9dcSlm66018 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 459490e2f9dcSlm66018 DTRACE_IO1(done, buf_t *, ldep->cb_arg); 459590e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 459690e2f9dcSlm66018 } 4597366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 459890e2f9dcSlm66018 VDC_DUMP_DRING_MSG(dring_msg); 459990e2f9dcSlm66018 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 46003af08d82Slm66018 mutex_exit(&vdcp->lock); 4601e1ebb9ecSlm66018 return (EIO); 46020a55fbb7Slm66018 46033af08d82Slm66018 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 460490e2f9dcSlm66018 /* 460590e2f9dcSlm66018 * Update the I/O statistics to indicate that an error occurred. 460690e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 460790e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 460890e2f9dcSlm66018 */ 4609366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_protoerrs); 46103af08d82Slm66018 mutex_exit(&vdcp->lock); 4611e1ebb9ecSlm66018 return (EPROTO); 4612e1ebb9ecSlm66018 } 4613e1ebb9ecSlm66018 46143af08d82Slm66018 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 46153af08d82Slm66018 ASSERT(start == end); 46163af08d82Slm66018 46173af08d82Slm66018 ldep = &vdcp->local_dring[idx]; 46183af08d82Slm66018 46193af08d82Slm66018 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 46203af08d82Slm66018 ldep->dep->hdr.dstate, ldep->cb_type); 46213af08d82Slm66018 4622e1ebb9ecSlm66018 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 46233af08d82Slm66018 struct buf *bufp; 4624e1ebb9ecSlm66018 46253af08d82Slm66018 switch (ldep->cb_type) { 46263af08d82Slm66018 case CB_SYNC: 46273af08d82Slm66018 ASSERT(vdcp->sync_op_pending); 4628d10e4ef2Snarayan 46293af08d82Slm66018 status = vdc_depopulate_descriptor(vdcp, idx); 46303af08d82Slm66018 vdcp->sync_op_status = status; 46313af08d82Slm66018 vdcp->sync_op_pending = B_FALSE; 46323af08d82Slm66018 cv_signal(&vdcp->sync_pending_cv); 46333af08d82Slm66018 break; 46344bac2208Snarayan 46353af08d82Slm66018 case CB_STRATEGY: 46363af08d82Slm66018 bufp = ldep->cb_arg; 46373af08d82Slm66018 ASSERT(bufp != NULL); 46383c96341aSnarayan bufp->b_resid = 46393c96341aSnarayan bufp->b_bcount - ldep->dep->payload.nbytes; 46403af08d82Slm66018 status = ldep->dep->payload.status; /* Future:ntoh */ 46413af08d82Slm66018 if (status != 0) { 46423af08d82Slm66018 DMSG(vdcp, 1, "strategy status=%d\n", status); 4643366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 46443af08d82Slm66018 bioerror(bufp, status); 4645d10e4ef2Snarayan } 46462f5224aeSachartre 46472f5224aeSachartre (void) vdc_depopulate_descriptor(vdcp, idx); 46483c96341aSnarayan 46493c96341aSnarayan DMSG(vdcp, 1, 46503c96341aSnarayan "strategy complete req=%ld bytes resp=%ld bytes\n", 46513c96341aSnarayan bufp->b_bcount, ldep->dep->payload.nbytes); 46522f5224aeSachartre 46532f5224aeSachartre if (status != 0 && vdcp->failfast_interval != 0) { 46542f5224aeSachartre /* 46552f5224aeSachartre * The I/O has failed and failfast is enabled. 46562f5224aeSachartre * We need the failfast thread to check if the 46572f5224aeSachartre * failure is due to a reservation conflict. 46582f5224aeSachartre */ 46592f5224aeSachartre (void) vdc_failfast_io_queue(vdcp, bufp); 46602f5224aeSachartre } else { 4661366a92acSlm66018 if (status == 0) { 466290e2f9dcSlm66018 op = (bufp->b_flags & B_READ) ? 4663366a92acSlm66018 VD_OP_BREAD : VD_OP_BWRITE; 4664366a92acSlm66018 VD_UPDATE_IO_STATS(vdcp, op, 4665366a92acSlm66018 ldep->dep->payload.nbytes); 4666366a92acSlm66018 } 466790e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 4668366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 46692f5224aeSachartre biodone(bufp); 46702f5224aeSachartre } 46713af08d82Slm66018 break; 46723af08d82Slm66018 46733af08d82Slm66018 default: 46743af08d82Slm66018 ASSERT(0); 46750a55fbb7Slm66018 } 46763af08d82Slm66018 } 46773af08d82Slm66018 46783af08d82Slm66018 /* let the arrival signal propogate */ 46793af08d82Slm66018 mutex_exit(&vdcp->lock); 46800a55fbb7Slm66018 4681e1ebb9ecSlm66018 /* probe gives the count of how many entries were processed */ 4682366a92acSlm66018 DTRACE_PROBE2(processed, int, 1, vdc_t *, vdcp); 46830a55fbb7Slm66018 46843af08d82Slm66018 return (0); 46850a55fbb7Slm66018 } 46860a55fbb7Slm66018 46870a55fbb7Slm66018 46880a55fbb7Slm66018 /* 46890a55fbb7Slm66018 * Function: 46900a55fbb7Slm66018 * vdc_handle_ver_msg() 46910a55fbb7Slm66018 * 46920a55fbb7Slm66018 * Description: 46930a55fbb7Slm66018 * 46940a55fbb7Slm66018 * Arguments: 46950a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 46960a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 46970a55fbb7Slm66018 * 46980a55fbb7Slm66018 * Return Code: 46990a55fbb7Slm66018 * 0 - Success 47000a55fbb7Slm66018 */ 47010a55fbb7Slm66018 static int 47020a55fbb7Slm66018 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 47030a55fbb7Slm66018 { 47040a55fbb7Slm66018 int status = 0; 47050a55fbb7Slm66018 47060a55fbb7Slm66018 ASSERT(vdc != NULL); 47070a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 47080a55fbb7Slm66018 47090a55fbb7Slm66018 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 47100a55fbb7Slm66018 return (EPROTO); 47110a55fbb7Slm66018 } 47120a55fbb7Slm66018 47130a55fbb7Slm66018 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 47140a55fbb7Slm66018 return (EINVAL); 47150a55fbb7Slm66018 } 47160a55fbb7Slm66018 47170a55fbb7Slm66018 switch (ver_msg->tag.vio_subtype) { 47180a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 47190a55fbb7Slm66018 /* 47200a55fbb7Slm66018 * We check to see if the version returned is indeed supported 47210a55fbb7Slm66018 * (The server may have also adjusted the minor number downwards 47220a55fbb7Slm66018 * and if so 'ver_msg' will contain the actual version agreed) 47230a55fbb7Slm66018 */ 47240a55fbb7Slm66018 if (vdc_is_supported_version(ver_msg)) { 47250a55fbb7Slm66018 vdc->ver.major = ver_msg->ver_major; 47260a55fbb7Slm66018 vdc->ver.minor = ver_msg->ver_minor; 47270a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 47280a55fbb7Slm66018 } else { 47290a55fbb7Slm66018 status = EPROTO; 47300a55fbb7Slm66018 } 47310a55fbb7Slm66018 break; 47320a55fbb7Slm66018 47330a55fbb7Slm66018 case VIO_SUBTYPE_NACK: 47340a55fbb7Slm66018 /* 47350a55fbb7Slm66018 * call vdc_is_supported_version() which will return the next 47360a55fbb7Slm66018 * supported version (if any) in 'ver_msg' 47370a55fbb7Slm66018 */ 47380a55fbb7Slm66018 (void) vdc_is_supported_version(ver_msg); 47390a55fbb7Slm66018 if (ver_msg->ver_major > 0) { 47400a55fbb7Slm66018 size_t len = sizeof (*ver_msg); 47410a55fbb7Slm66018 47420a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 47430a55fbb7Slm66018 47440a55fbb7Slm66018 /* reset the necessary fields and resend */ 47450a55fbb7Slm66018 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 47460a55fbb7Slm66018 ver_msg->dev_class = VDEV_DISK; 47470a55fbb7Slm66018 47480a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 47493af08d82Slm66018 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 47500a55fbb7Slm66018 vdc->instance, status); 47510a55fbb7Slm66018 if (len != sizeof (*ver_msg)) 47520a55fbb7Slm66018 status = EBADMSG; 47530a55fbb7Slm66018 } else { 475487a7269eSachartre DMSG(vdc, 0, "[%d] No common version with vDisk server", 475587a7269eSachartre vdc->instance); 47560a55fbb7Slm66018 status = ENOTSUP; 47570a55fbb7Slm66018 } 47580a55fbb7Slm66018 47590a55fbb7Slm66018 break; 47601ae08745Sheppo case VIO_SUBTYPE_INFO: 47611ae08745Sheppo /* 47621ae08745Sheppo * Handle the case where vds starts handshake 4763eff7243fSlm66018 * (for now only vdc is the instigator) 47641ae08745Sheppo */ 47651ae08745Sheppo status = ENOTSUP; 47661ae08745Sheppo break; 47671ae08745Sheppo 47681ae08745Sheppo default: 47690a55fbb7Slm66018 status = EINVAL; 47701ae08745Sheppo break; 47711ae08745Sheppo } 47721ae08745Sheppo 47730a55fbb7Slm66018 return (status); 47740a55fbb7Slm66018 } 47750a55fbb7Slm66018 47760a55fbb7Slm66018 /* 47770a55fbb7Slm66018 * Function: 47780a55fbb7Slm66018 * vdc_handle_attr_msg() 47790a55fbb7Slm66018 * 47800a55fbb7Slm66018 * Description: 47810a55fbb7Slm66018 * 47820a55fbb7Slm66018 * Arguments: 47830a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 47840a55fbb7Slm66018 * attr_msg - LDC message sent by vDisk server 47850a55fbb7Slm66018 * 47860a55fbb7Slm66018 * Return Code: 47870a55fbb7Slm66018 * 0 - Success 47880a55fbb7Slm66018 */ 47890a55fbb7Slm66018 static int 47900a55fbb7Slm66018 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 47910a55fbb7Slm66018 { 47920a55fbb7Slm66018 int status = 0; 47930a55fbb7Slm66018 47940a55fbb7Slm66018 ASSERT(vdc != NULL); 47950a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 47960a55fbb7Slm66018 47970a55fbb7Slm66018 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 47980a55fbb7Slm66018 return (EPROTO); 47990a55fbb7Slm66018 } 48000a55fbb7Slm66018 48010a55fbb7Slm66018 switch (attr_msg->tag.vio_subtype) { 48021ae08745Sheppo case VIO_SUBTYPE_ACK: 48031ae08745Sheppo /* 48041ae08745Sheppo * We now verify the attributes sent by vds. 48051ae08745Sheppo */ 480678fcd0a1Sachartre if (attr_msg->vdisk_size == 0) { 480778fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid disk size from vds", 480878fcd0a1Sachartre vdc->instance); 480978fcd0a1Sachartre status = EINVAL; 481078fcd0a1Sachartre break; 481178fcd0a1Sachartre } 481278fcd0a1Sachartre 481378fcd0a1Sachartre if (attr_msg->max_xfer_sz == 0) { 481478fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 481578fcd0a1Sachartre vdc->instance); 481678fcd0a1Sachartre status = EINVAL; 481778fcd0a1Sachartre break; 481878fcd0a1Sachartre } 481978fcd0a1Sachartre 48202f5224aeSachartre if (attr_msg->vdisk_size == VD_SIZE_UNKNOWN) { 48212f5224aeSachartre DMSG(vdc, 0, "[%d] Unknown disk size from vds", 48222f5224aeSachartre vdc->instance); 48232f5224aeSachartre attr_msg->vdisk_size = 0; 48242f5224aeSachartre } 4825*de3a5331SRamesh Chitrothu /* update disk, block and transfer sizes */ 4826*de3a5331SRamesh Chitrothu vdc_update_size(vdc, attr_msg->vdisk_size, 4827*de3a5331SRamesh Chitrothu attr_msg->vdisk_block_size, attr_msg->max_xfer_sz); 48281ae08745Sheppo vdc->vdisk_type = attr_msg->vdisk_type; 482917cadca8Slm66018 vdc->operations = attr_msg->operations; 483017cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) 483117cadca8Slm66018 vdc->vdisk_media = attr_msg->vdisk_media; 483217cadca8Slm66018 else 483317cadca8Slm66018 vdc->vdisk_media = 0; 48341ae08745Sheppo 48353af08d82Slm66018 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 4836e1ebb9ecSlm66018 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 48373af08d82Slm66018 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 4838e1ebb9ecSlm66018 vdc->instance, vdc->block_size, 4839e1ebb9ecSlm66018 attr_msg->vdisk_block_size); 4840e1ebb9ecSlm66018 4841f0ca1d9aSsb155480 if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) || 48421ae08745Sheppo (attr_msg->vdisk_size > INT64_MAX) || 484317cadca8Slm66018 (attr_msg->operations == 0) || 48441ae08745Sheppo (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 48453af08d82Slm66018 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 4846e1ebb9ecSlm66018 vdc->instance); 48471ae08745Sheppo status = EINVAL; 48481ae08745Sheppo break; 48491ae08745Sheppo } 48501ae08745Sheppo 485178fcd0a1Sachartre /* 485278fcd0a1Sachartre * Now that we have received all attributes we can create a 485378fcd0a1Sachartre * fake geometry for the disk. 485478fcd0a1Sachartre */ 485578fcd0a1Sachartre vdc_create_fake_geometry(vdc); 48561ae08745Sheppo break; 48571ae08745Sheppo 48581ae08745Sheppo case VIO_SUBTYPE_NACK: 48591ae08745Sheppo /* 48601ae08745Sheppo * vds could not handle the attributes we sent so we 48611ae08745Sheppo * stop negotiating. 48621ae08745Sheppo */ 48631ae08745Sheppo status = EPROTO; 48641ae08745Sheppo break; 48651ae08745Sheppo 48661ae08745Sheppo case VIO_SUBTYPE_INFO: 48671ae08745Sheppo /* 48681ae08745Sheppo * Handle the case where vds starts the handshake 48691ae08745Sheppo * (for now; vdc is the only supported instigatior) 48701ae08745Sheppo */ 48711ae08745Sheppo status = ENOTSUP; 48721ae08745Sheppo break; 48731ae08745Sheppo 48741ae08745Sheppo default: 48751ae08745Sheppo status = ENOTSUP; 48761ae08745Sheppo break; 48771ae08745Sheppo } 48781ae08745Sheppo 48790a55fbb7Slm66018 return (status); 48801ae08745Sheppo } 48811ae08745Sheppo 48820a55fbb7Slm66018 /* 48830a55fbb7Slm66018 * Function: 48840a55fbb7Slm66018 * vdc_handle_dring_reg_msg() 48850a55fbb7Slm66018 * 48860a55fbb7Slm66018 * Description: 48870a55fbb7Slm66018 * 48880a55fbb7Slm66018 * Arguments: 48890a55fbb7Slm66018 * vdc - soft state pointer for this instance of the driver. 48900a55fbb7Slm66018 * dring_msg - LDC message sent by vDisk server 48910a55fbb7Slm66018 * 48920a55fbb7Slm66018 * Return Code: 48930a55fbb7Slm66018 * 0 - Success 48940a55fbb7Slm66018 */ 48950a55fbb7Slm66018 static int 48960a55fbb7Slm66018 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 48970a55fbb7Slm66018 { 48980a55fbb7Slm66018 int status = 0; 48991ae08745Sheppo 49000a55fbb7Slm66018 ASSERT(vdc != NULL); 49010a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 49020a55fbb7Slm66018 49030a55fbb7Slm66018 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 49040a55fbb7Slm66018 return (EPROTO); 49050a55fbb7Slm66018 } 49060a55fbb7Slm66018 49070a55fbb7Slm66018 switch (dring_msg->tag.vio_subtype) { 49080a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 49091ae08745Sheppo /* save the received dring_ident */ 49101ae08745Sheppo vdc->dring_ident = dring_msg->dring_ident; 49113af08d82Slm66018 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 4912e1ebb9ecSlm66018 vdc->instance, vdc->dring_ident); 49131ae08745Sheppo break; 49141ae08745Sheppo 49151ae08745Sheppo case VIO_SUBTYPE_NACK: 49161ae08745Sheppo /* 49171ae08745Sheppo * vds could not handle the DRing info we sent so we 49181ae08745Sheppo * stop negotiating. 49191ae08745Sheppo */ 49203af08d82Slm66018 DMSG(vdc, 0, "[%d] server could not register DRing\n", 49213af08d82Slm66018 vdc->instance); 49221ae08745Sheppo status = EPROTO; 49231ae08745Sheppo break; 49241ae08745Sheppo 49251ae08745Sheppo case VIO_SUBTYPE_INFO: 49261ae08745Sheppo /* 49271ae08745Sheppo * Handle the case where vds starts handshake 49281ae08745Sheppo * (for now only vdc is the instigatior) 49291ae08745Sheppo */ 49301ae08745Sheppo status = ENOTSUP; 49311ae08745Sheppo break; 49321ae08745Sheppo default: 49331ae08745Sheppo status = ENOTSUP; 49341ae08745Sheppo } 49351ae08745Sheppo 49361ae08745Sheppo return (status); 49371ae08745Sheppo } 49381ae08745Sheppo 49391ae08745Sheppo /* 49401ae08745Sheppo * Function: 49411ae08745Sheppo * vdc_verify_seq_num() 49421ae08745Sheppo * 49431ae08745Sheppo * Description: 4944e1ebb9ecSlm66018 * This functions verifies that the sequence number sent back by the vDisk 4945e1ebb9ecSlm66018 * server with the latest message is what is expected (i.e. it is greater 4946e1ebb9ecSlm66018 * than the last seq num sent by the vDisk server and less than or equal 4947e1ebb9ecSlm66018 * to the last seq num generated by vdc). 4948e1ebb9ecSlm66018 * 4949e1ebb9ecSlm66018 * It then checks the request ID to see if any requests need processing 4950e1ebb9ecSlm66018 * in the DRing. 49511ae08745Sheppo * 49521ae08745Sheppo * Arguments: 49531ae08745Sheppo * vdc - soft state pointer for this instance of the driver. 49541ae08745Sheppo * dring_msg - pointer to the LDC message sent by vds 49551ae08745Sheppo * 49561ae08745Sheppo * Return Code: 4957e1ebb9ecSlm66018 * VDC_SEQ_NUM_TODO - Message needs to be processed 4958e1ebb9ecSlm66018 * VDC_SEQ_NUM_SKIP - Message has already been processed 4959e1ebb9ecSlm66018 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 4960e1ebb9ecSlm66018 * vdc cannot deal with them 49611ae08745Sheppo */ 4962e1ebb9ecSlm66018 static int 4963e1ebb9ecSlm66018 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 49641ae08745Sheppo { 49651ae08745Sheppo ASSERT(vdc != NULL); 49661ae08745Sheppo ASSERT(dring_msg != NULL); 4967d10e4ef2Snarayan ASSERT(mutex_owned(&vdc->lock)); 49681ae08745Sheppo 49691ae08745Sheppo /* 49701ae08745Sheppo * Check to see if the messages were responded to in the correct 4971e1ebb9ecSlm66018 * order by vds. 49721ae08745Sheppo */ 4973e1ebb9ecSlm66018 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 4974e1ebb9ecSlm66018 (dring_msg->seq_num > vdc->seq_num)) { 49753af08d82Slm66018 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 4976e1ebb9ecSlm66018 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 4977e1ebb9ecSlm66018 vdc->instance, dring_msg->seq_num, 4978e1ebb9ecSlm66018 vdc->seq_num_reply, vdc->seq_num, 4979e1ebb9ecSlm66018 vdc->req_id_proc, vdc->req_id); 4980e1ebb9ecSlm66018 return (VDC_SEQ_NUM_INVALID); 49811ae08745Sheppo } 4982e1ebb9ecSlm66018 vdc->seq_num_reply = dring_msg->seq_num; 49831ae08745Sheppo 4984e1ebb9ecSlm66018 if (vdc->req_id_proc < vdc->req_id) 4985e1ebb9ecSlm66018 return (VDC_SEQ_NUM_TODO); 4986e1ebb9ecSlm66018 else 4987e1ebb9ecSlm66018 return (VDC_SEQ_NUM_SKIP); 49881ae08745Sheppo } 49891ae08745Sheppo 49900a55fbb7Slm66018 49910a55fbb7Slm66018 /* 49920a55fbb7Slm66018 * Function: 49930a55fbb7Slm66018 * vdc_is_supported_version() 49940a55fbb7Slm66018 * 49950a55fbb7Slm66018 * Description: 49960a55fbb7Slm66018 * This routine checks if the major/minor version numbers specified in 49970a55fbb7Slm66018 * 'ver_msg' are supported. If not it finds the next version that is 49980a55fbb7Slm66018 * in the supported version list 'vdc_version[]' and sets the fields in 49990a55fbb7Slm66018 * 'ver_msg' to those values 50000a55fbb7Slm66018 * 50010a55fbb7Slm66018 * Arguments: 50020a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 50030a55fbb7Slm66018 * 50040a55fbb7Slm66018 * Return Code: 50050a55fbb7Slm66018 * B_TRUE - Success 50060a55fbb7Slm66018 * B_FALSE - Version not supported 50070a55fbb7Slm66018 */ 50080a55fbb7Slm66018 static boolean_t 50090a55fbb7Slm66018 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 50100a55fbb7Slm66018 { 50110a55fbb7Slm66018 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 50120a55fbb7Slm66018 50130a55fbb7Slm66018 for (int i = 0; i < vdc_num_versions; i++) { 50140a55fbb7Slm66018 ASSERT(vdc_version[i].major > 0); 50150a55fbb7Slm66018 ASSERT((i == 0) || 50160a55fbb7Slm66018 (vdc_version[i].major < vdc_version[i-1].major)); 50170a55fbb7Slm66018 50180a55fbb7Slm66018 /* 50190a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 50200a55fbb7Slm66018 * necessary, down to the highest value supported by this 50210a55fbb7Slm66018 * client. The server should support all minor versions lower 50220a55fbb7Slm66018 * than the value it sent 50230a55fbb7Slm66018 */ 50240a55fbb7Slm66018 if (ver_msg->ver_major == vdc_version[i].major) { 50250a55fbb7Slm66018 if (ver_msg->ver_minor > vdc_version[i].minor) { 50263af08d82Slm66018 DMSGX(0, 50273af08d82Slm66018 "Adjusting minor version from %u to %u", 50280a55fbb7Slm66018 ver_msg->ver_minor, vdc_version[i].minor); 50290a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 50300a55fbb7Slm66018 } 50310a55fbb7Slm66018 return (B_TRUE); 50320a55fbb7Slm66018 } 50330a55fbb7Slm66018 50340a55fbb7Slm66018 /* 50350a55fbb7Slm66018 * If the message contains a higher major version number, set 50360a55fbb7Slm66018 * the message's major/minor versions to the current values 50370a55fbb7Slm66018 * and return false, so this message will get resent with 50380a55fbb7Slm66018 * these values, and the server will potentially try again 50390a55fbb7Slm66018 * with the same or a lower version 50400a55fbb7Slm66018 */ 50410a55fbb7Slm66018 if (ver_msg->ver_major > vdc_version[i].major) { 50420a55fbb7Slm66018 ver_msg->ver_major = vdc_version[i].major; 50430a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 50443af08d82Slm66018 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 50450a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 50460a55fbb7Slm66018 50470a55fbb7Slm66018 return (B_FALSE); 50480a55fbb7Slm66018 } 50490a55fbb7Slm66018 50500a55fbb7Slm66018 /* 50510a55fbb7Slm66018 * Otherwise, the message's major version is less than the 50520a55fbb7Slm66018 * current major version, so continue the loop to the next 50530a55fbb7Slm66018 * (lower) supported version 50540a55fbb7Slm66018 */ 50550a55fbb7Slm66018 } 50560a55fbb7Slm66018 50570a55fbb7Slm66018 /* 50580a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 50590a55fbb7Slm66018 * message to terminate negotiation 50600a55fbb7Slm66018 */ 50610a55fbb7Slm66018 ver_msg->ver_major = 0; 50620a55fbb7Slm66018 ver_msg->ver_minor = 0; 50630a55fbb7Slm66018 50640a55fbb7Slm66018 return (B_FALSE); 50650a55fbb7Slm66018 } 50661ae08745Sheppo /* -------------------------------------------------------------------------- */ 50671ae08745Sheppo 50681ae08745Sheppo /* 50691ae08745Sheppo * DKIO(7) support 50701ae08745Sheppo */ 50711ae08745Sheppo 50721ae08745Sheppo typedef struct vdc_dk_arg { 50731ae08745Sheppo struct dk_callback dkc; 50741ae08745Sheppo int mode; 50751ae08745Sheppo dev_t dev; 50761ae08745Sheppo vdc_t *vdc; 50771ae08745Sheppo } vdc_dk_arg_t; 50781ae08745Sheppo 50791ae08745Sheppo /* 50801ae08745Sheppo * Function: 50811ae08745Sheppo * vdc_dkio_flush_cb() 50821ae08745Sheppo * 50831ae08745Sheppo * Description: 50841ae08745Sheppo * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 50851ae08745Sheppo * by kernel code. 50861ae08745Sheppo * 50871ae08745Sheppo * Arguments: 50881ae08745Sheppo * arg - a pointer to a vdc_dk_arg_t structure. 50891ae08745Sheppo */ 50901ae08745Sheppo void 50911ae08745Sheppo vdc_dkio_flush_cb(void *arg) 50921ae08745Sheppo { 50931ae08745Sheppo struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 50941ae08745Sheppo struct dk_callback *dkc = NULL; 50951ae08745Sheppo vdc_t *vdc = NULL; 50961ae08745Sheppo int rv; 50971ae08745Sheppo 50981ae08745Sheppo if (dk_arg == NULL) { 50993af08d82Slm66018 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 51001ae08745Sheppo return; 51011ae08745Sheppo } 51021ae08745Sheppo dkc = &dk_arg->dkc; 51031ae08745Sheppo vdc = dk_arg->vdc; 51041ae08745Sheppo ASSERT(vdc != NULL); 51051ae08745Sheppo 51063af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 51072f5224aeSachartre VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir, B_TRUE); 51081ae08745Sheppo if (rv != 0) { 51093af08d82Slm66018 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 5110e1ebb9ecSlm66018 vdc->instance, rv, 51111ae08745Sheppo ddi_model_convert_from(dk_arg->mode & FMODELS)); 51121ae08745Sheppo } 51131ae08745Sheppo 51141ae08745Sheppo /* 51151ae08745Sheppo * Trigger the call back to notify the caller the the ioctl call has 51161ae08745Sheppo * been completed. 51171ae08745Sheppo */ 51181ae08745Sheppo if ((dk_arg->mode & FKIOCTL) && 51191ae08745Sheppo (dkc != NULL) && 51201ae08745Sheppo (dkc->dkc_callback != NULL)) { 51211ae08745Sheppo ASSERT(dkc->dkc_cookie != NULL); 51228e6a2a04Slm66018 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 51231ae08745Sheppo } 51241ae08745Sheppo 51251ae08745Sheppo /* Indicate that one less DKIO write flush is outstanding */ 51261ae08745Sheppo mutex_enter(&vdc->lock); 51271ae08745Sheppo vdc->dkio_flush_pending--; 51281ae08745Sheppo ASSERT(vdc->dkio_flush_pending >= 0); 51291ae08745Sheppo mutex_exit(&vdc->lock); 51308e6a2a04Slm66018 51318e6a2a04Slm66018 /* free the mem that was allocated when the callback was dispatched */ 51328e6a2a04Slm66018 kmem_free(arg, sizeof (vdc_dk_arg_t)); 51331ae08745Sheppo } 51341ae08745Sheppo 51351ae08745Sheppo /* 513687a7269eSachartre * Function: 51379642afceSachartre * vdc_dkio_gapart() 513887a7269eSachartre * 513987a7269eSachartre * Description: 514087a7269eSachartre * This function implements the DKIOCGAPART ioctl. 514187a7269eSachartre * 514287a7269eSachartre * Arguments: 514378fcd0a1Sachartre * vdc - soft state pointer 514487a7269eSachartre * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 514587a7269eSachartre * flag - ioctl flags 514687a7269eSachartre */ 514787a7269eSachartre static int 51489642afceSachartre vdc_dkio_gapart(vdc_t *vdc, caddr_t arg, int flag) 514987a7269eSachartre { 515078fcd0a1Sachartre struct dk_geom *geom; 515178fcd0a1Sachartre struct vtoc *vtoc; 515287a7269eSachartre union { 515387a7269eSachartre struct dk_map map[NDKMAP]; 515487a7269eSachartre struct dk_map32 map32[NDKMAP]; 515587a7269eSachartre } data; 515687a7269eSachartre int i, rv, size; 515787a7269eSachartre 515878fcd0a1Sachartre mutex_enter(&vdc->lock); 515987a7269eSachartre 516078fcd0a1Sachartre if ((rv = vdc_validate_geometry(vdc)) != 0) { 516178fcd0a1Sachartre mutex_exit(&vdc->lock); 516287a7269eSachartre return (rv); 516378fcd0a1Sachartre } 516487a7269eSachartre 516578fcd0a1Sachartre vtoc = vdc->vtoc; 516678fcd0a1Sachartre geom = vdc->geom; 516787a7269eSachartre 516887a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 516987a7269eSachartre 517078fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 517178fcd0a1Sachartre data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 517278fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 517378fcd0a1Sachartre data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 517487a7269eSachartre } 517587a7269eSachartre size = NDKMAP * sizeof (struct dk_map32); 517687a7269eSachartre 517787a7269eSachartre } else { 517887a7269eSachartre 517978fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 518078fcd0a1Sachartre data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 518178fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 518278fcd0a1Sachartre data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 518387a7269eSachartre } 518487a7269eSachartre size = NDKMAP * sizeof (struct dk_map); 518587a7269eSachartre 518687a7269eSachartre } 518787a7269eSachartre 518878fcd0a1Sachartre mutex_exit(&vdc->lock); 518978fcd0a1Sachartre 519087a7269eSachartre if (ddi_copyout(&data, arg, size, flag) != 0) 519187a7269eSachartre return (EFAULT); 519287a7269eSachartre 519387a7269eSachartre return (0); 519487a7269eSachartre } 519587a7269eSachartre 519687a7269eSachartre /* 519787a7269eSachartre * Function: 51989642afceSachartre * vdc_dkio_partition() 51999642afceSachartre * 52009642afceSachartre * Description: 52019642afceSachartre * This function implements the DKIOCPARTITION ioctl. 52029642afceSachartre * 52039642afceSachartre * Arguments: 52049642afceSachartre * vdc - soft state pointer 52059642afceSachartre * arg - a pointer to a struct partition64 structure 52069642afceSachartre * flag - ioctl flags 52079642afceSachartre */ 52089642afceSachartre static int 52099642afceSachartre vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag) 52109642afceSachartre { 52119642afceSachartre struct partition64 p64; 52129642afceSachartre efi_gpt_t *gpt; 52139642afceSachartre efi_gpe_t *gpe; 52149642afceSachartre vd_efi_dev_t edev; 52159642afceSachartre uint_t partno; 52169642afceSachartre int rv; 52179642afceSachartre 52189642afceSachartre if (ddi_copyin(arg, &p64, sizeof (struct partition64), flag)) { 52199642afceSachartre return (EFAULT); 52209642afceSachartre } 52219642afceSachartre 52229642afceSachartre VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 52239642afceSachartre 52249642afceSachartre if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) { 52259642afceSachartre return (rv); 52269642afceSachartre } 52279642afceSachartre 52289642afceSachartre partno = p64.p_partno; 52299642afceSachartre 52309642afceSachartre if (partno >= gpt->efi_gpt_NumberOfPartitionEntries) { 52319642afceSachartre vd_efi_free(&edev, gpt, gpe); 52329642afceSachartre return (ESRCH); 52339642afceSachartre } 52349642afceSachartre 52359642afceSachartre bcopy(&gpe[partno].efi_gpe_PartitionTypeGUID, &p64.p_type, 52369642afceSachartre sizeof (struct uuid)); 52379642afceSachartre p64.p_start = gpe[partno].efi_gpe_StartingLBA; 52389642afceSachartre p64.p_size = gpe[partno].efi_gpe_EndingLBA - p64.p_start + 1; 52399642afceSachartre 52409642afceSachartre if (ddi_copyout(&p64, arg, sizeof (struct partition64), flag)) { 52419642afceSachartre vd_efi_free(&edev, gpt, gpe); 52429642afceSachartre return (EFAULT); 52439642afceSachartre } 52449642afceSachartre 52459642afceSachartre vd_efi_free(&edev, gpt, gpe); 52469642afceSachartre return (0); 52479642afceSachartre } 52489642afceSachartre 52499642afceSachartre /* 52509642afceSachartre * Function: 525187a7269eSachartre * vdc_dioctl_rwcmd() 525287a7269eSachartre * 525387a7269eSachartre * Description: 525487a7269eSachartre * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 525587a7269eSachartre * for DKC_DIRECT disks to read or write at an absolute disk offset. 525687a7269eSachartre * 525787a7269eSachartre * Arguments: 525887a7269eSachartre * dev - device 525987a7269eSachartre * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 526087a7269eSachartre * flag - ioctl flags 526187a7269eSachartre */ 526287a7269eSachartre static int 526387a7269eSachartre vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) 526487a7269eSachartre { 526587a7269eSachartre struct dadkio_rwcmd32 rwcmd32; 526687a7269eSachartre struct dadkio_rwcmd rwcmd; 526787a7269eSachartre struct iovec aiov; 526887a7269eSachartre struct uio auio; 526987a7269eSachartre int rw, status; 527087a7269eSachartre struct buf *buf; 527187a7269eSachartre 527287a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 527387a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 527487a7269eSachartre sizeof (struct dadkio_rwcmd32), flag)) { 527587a7269eSachartre return (EFAULT); 527687a7269eSachartre } 527787a7269eSachartre rwcmd.cmd = rwcmd32.cmd; 527887a7269eSachartre rwcmd.flags = rwcmd32.flags; 527987a7269eSachartre rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 528087a7269eSachartre rwcmd.buflen = rwcmd32.buflen; 528187a7269eSachartre rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 528287a7269eSachartre } else { 528387a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 528487a7269eSachartre sizeof (struct dadkio_rwcmd), flag)) { 528587a7269eSachartre return (EFAULT); 528687a7269eSachartre } 528787a7269eSachartre } 528887a7269eSachartre 528987a7269eSachartre switch (rwcmd.cmd) { 529087a7269eSachartre case DADKIO_RWCMD_READ: 529187a7269eSachartre rw = B_READ; 529287a7269eSachartre break; 529387a7269eSachartre case DADKIO_RWCMD_WRITE: 529487a7269eSachartre rw = B_WRITE; 529587a7269eSachartre break; 529687a7269eSachartre default: 529787a7269eSachartre return (EINVAL); 529887a7269eSachartre } 529987a7269eSachartre 530087a7269eSachartre bzero((caddr_t)&aiov, sizeof (struct iovec)); 530187a7269eSachartre aiov.iov_base = rwcmd.bufaddr; 530287a7269eSachartre aiov.iov_len = rwcmd.buflen; 530387a7269eSachartre 530487a7269eSachartre bzero((caddr_t)&auio, sizeof (struct uio)); 530587a7269eSachartre auio.uio_iov = &aiov; 530687a7269eSachartre auio.uio_iovcnt = 1; 530787a7269eSachartre auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; 530887a7269eSachartre auio.uio_resid = rwcmd.buflen; 530987a7269eSachartre auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 531087a7269eSachartre 531187a7269eSachartre buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 531287a7269eSachartre bioinit(buf); 531387a7269eSachartre /* 531487a7269eSachartre * We use the private field of buf to specify that this is an 531587a7269eSachartre * I/O using an absolute offset. 531687a7269eSachartre */ 531787a7269eSachartre buf->b_private = (void *)VD_SLICE_NONE; 531887a7269eSachartre 531987a7269eSachartre status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); 532087a7269eSachartre 532187a7269eSachartre biofini(buf); 532287a7269eSachartre kmem_free(buf, sizeof (buf_t)); 532387a7269eSachartre 532487a7269eSachartre return (status); 532587a7269eSachartre } 532687a7269eSachartre 532787a7269eSachartre /* 53282f5224aeSachartre * Allocate a buffer for a VD_OP_SCSICMD operation. The size of the allocated 53292f5224aeSachartre * buffer is returned in alloc_len. 53302f5224aeSachartre */ 53312f5224aeSachartre static vd_scsi_t * 53322f5224aeSachartre vdc_scsi_alloc(int cdb_len, int sense_len, int datain_len, int dataout_len, 53332f5224aeSachartre int *alloc_len) 53342f5224aeSachartre { 53352f5224aeSachartre vd_scsi_t *vd_scsi; 53362f5224aeSachartre int vd_scsi_len = VD_SCSI_SIZE; 53372f5224aeSachartre 53382f5224aeSachartre vd_scsi_len += P2ROUNDUP(cdb_len, sizeof (uint64_t)); 53392f5224aeSachartre vd_scsi_len += P2ROUNDUP(sense_len, sizeof (uint64_t)); 53402f5224aeSachartre vd_scsi_len += P2ROUNDUP(datain_len, sizeof (uint64_t)); 53412f5224aeSachartre vd_scsi_len += P2ROUNDUP(dataout_len, sizeof (uint64_t)); 53422f5224aeSachartre 53432f5224aeSachartre ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); 53442f5224aeSachartre 53452f5224aeSachartre vd_scsi = kmem_zalloc(vd_scsi_len, KM_SLEEP); 53462f5224aeSachartre 53472f5224aeSachartre vd_scsi->cdb_len = cdb_len; 53482f5224aeSachartre vd_scsi->sense_len = sense_len; 53492f5224aeSachartre vd_scsi->datain_len = datain_len; 53502f5224aeSachartre vd_scsi->dataout_len = dataout_len; 53512f5224aeSachartre 53522f5224aeSachartre *alloc_len = vd_scsi_len; 53532f5224aeSachartre 53542f5224aeSachartre return (vd_scsi); 53552f5224aeSachartre } 53562f5224aeSachartre 53572f5224aeSachartre /* 53582f5224aeSachartre * Convert the status of a SCSI command to a Solaris return code. 53592f5224aeSachartre * 53602f5224aeSachartre * Arguments: 53612f5224aeSachartre * vd_scsi - The SCSI operation buffer. 53622f5224aeSachartre * log_error - indicate if an error message should be logged. 53632f5224aeSachartre * 53642f5224aeSachartre * Note that our SCSI error messages are rather primitive for the moment 53652f5224aeSachartre * and could be improved by decoding some data like the SCSI command and 53662f5224aeSachartre * the sense key. 53672f5224aeSachartre * 53682f5224aeSachartre * Return value: 53692f5224aeSachartre * 0 - Status is good. 53702f5224aeSachartre * EACCES - Status reports a reservation conflict. 53712f5224aeSachartre * ENOTSUP - Status reports a check condition and sense key 53722f5224aeSachartre * reports an illegal request. 53732f5224aeSachartre * EIO - Any other status. 53742f5224aeSachartre */ 53752f5224aeSachartre static int 53762f5224aeSachartre vdc_scsi_status(vdc_t *vdc, vd_scsi_t *vd_scsi, boolean_t log_error) 53772f5224aeSachartre { 53782f5224aeSachartre int rv; 53792f5224aeSachartre char path_str[MAXPATHLEN]; 53802f5224aeSachartre char panic_str[VDC_RESV_CONFLICT_FMT_LEN + MAXPATHLEN]; 53812f5224aeSachartre union scsi_cdb *cdb; 53822f5224aeSachartre struct scsi_extended_sense *sense; 53832f5224aeSachartre 53842f5224aeSachartre if (vd_scsi->cmd_status == STATUS_GOOD) 53852f5224aeSachartre /* no error */ 53862f5224aeSachartre return (0); 53872f5224aeSachartre 53882f5224aeSachartre /* when the tunable vdc_scsi_log_error is true we log all errors */ 53892f5224aeSachartre if (vdc_scsi_log_error) 53902f5224aeSachartre log_error = B_TRUE; 53912f5224aeSachartre 53922f5224aeSachartre if (log_error) { 53932f5224aeSachartre cmn_err(CE_WARN, "%s (vdc%d):\tError for Command: 0x%x)\n", 53942f5224aeSachartre ddi_pathname(vdc->dip, path_str), vdc->instance, 53952f5224aeSachartre GETCMD(VD_SCSI_DATA_CDB(vd_scsi))); 53962f5224aeSachartre } 53972f5224aeSachartre 53982f5224aeSachartre /* default returned value */ 53992f5224aeSachartre rv = EIO; 54002f5224aeSachartre 54012f5224aeSachartre switch (vd_scsi->cmd_status) { 54022f5224aeSachartre 54032f5224aeSachartre case STATUS_CHECK: 54042f5224aeSachartre case STATUS_TERMINATED: 54052f5224aeSachartre if (log_error) 54062f5224aeSachartre cmn_err(CE_CONT, "\tCheck Condition Error\n"); 54072f5224aeSachartre 54082f5224aeSachartre /* check sense buffer */ 54092f5224aeSachartre if (vd_scsi->sense_len == 0 || 54102f5224aeSachartre vd_scsi->sense_status != STATUS_GOOD) { 54112f5224aeSachartre if (log_error) 54122f5224aeSachartre cmn_err(CE_CONT, "\tNo Sense Data Available\n"); 54132f5224aeSachartre break; 54142f5224aeSachartre } 54152f5224aeSachartre 54162f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 54172f5224aeSachartre 54182f5224aeSachartre if (log_error) { 54192f5224aeSachartre cmn_err(CE_CONT, "\tSense Key: 0x%x\n" 54202f5224aeSachartre "\tASC: 0x%x, ASCQ: 0x%x\n", 54212f5224aeSachartre scsi_sense_key((uint8_t *)sense), 54222f5224aeSachartre scsi_sense_asc((uint8_t *)sense), 54232f5224aeSachartre scsi_sense_ascq((uint8_t *)sense)); 54242f5224aeSachartre } 54252f5224aeSachartre 54262f5224aeSachartre if (scsi_sense_key((uint8_t *)sense) == KEY_ILLEGAL_REQUEST) 54272f5224aeSachartre rv = ENOTSUP; 54282f5224aeSachartre break; 54292f5224aeSachartre 54302f5224aeSachartre case STATUS_BUSY: 54312f5224aeSachartre if (log_error) 54322f5224aeSachartre cmn_err(CE_NOTE, "\tDevice Busy\n"); 54332f5224aeSachartre break; 54342f5224aeSachartre 54352f5224aeSachartre case STATUS_RESERVATION_CONFLICT: 54362f5224aeSachartre /* 54372f5224aeSachartre * If the command was PERSISTENT_RESERVATION_[IN|OUT] then 54382f5224aeSachartre * reservation conflict could be due to various reasons like 54392f5224aeSachartre * incorrect keys, not registered or not reserved etc. So, 54402f5224aeSachartre * we should not panic in that case. 54412f5224aeSachartre */ 54422f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 54432f5224aeSachartre if (vdc->failfast_interval != 0 && 54442f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_IN && 54452f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_OUT) { 54462f5224aeSachartre /* failfast is enabled so we have to panic */ 54472f5224aeSachartre (void) snprintf(panic_str, sizeof (panic_str), 54482f5224aeSachartre VDC_RESV_CONFLICT_FMT_STR "%s", 54492f5224aeSachartre ddi_pathname(vdc->dip, path_str)); 54502f5224aeSachartre panic(panic_str); 54512f5224aeSachartre } 54522f5224aeSachartre if (log_error) 54532f5224aeSachartre cmn_err(CE_NOTE, "\tReservation Conflict\n"); 54542f5224aeSachartre rv = EACCES; 54552f5224aeSachartre break; 54562f5224aeSachartre 54572f5224aeSachartre case STATUS_QFULL: 54582f5224aeSachartre if (log_error) 54592f5224aeSachartre cmn_err(CE_NOTE, "\tQueue Full\n"); 54602f5224aeSachartre break; 54612f5224aeSachartre 54622f5224aeSachartre case STATUS_MET: 54632f5224aeSachartre case STATUS_INTERMEDIATE: 54642f5224aeSachartre case STATUS_SCSI2: 54652f5224aeSachartre case STATUS_INTERMEDIATE_MET: 54662f5224aeSachartre case STATUS_ACA_ACTIVE: 54672f5224aeSachartre if (log_error) 54682f5224aeSachartre cmn_err(CE_CONT, 54692f5224aeSachartre "\tUnexpected SCSI status received: 0x%x\n", 54702f5224aeSachartre vd_scsi->cmd_status); 54712f5224aeSachartre break; 54722f5224aeSachartre 54732f5224aeSachartre default: 54742f5224aeSachartre if (log_error) 54752f5224aeSachartre cmn_err(CE_CONT, 54762f5224aeSachartre "\tInvalid SCSI status received: 0x%x\n", 54772f5224aeSachartre vd_scsi->cmd_status); 54782f5224aeSachartre break; 54792f5224aeSachartre } 54802f5224aeSachartre 54812f5224aeSachartre return (rv); 54822f5224aeSachartre } 54832f5224aeSachartre 54842f5224aeSachartre /* 54852f5224aeSachartre * Implemented the USCSICMD uscsi(7I) ioctl. This ioctl is converted to 54862f5224aeSachartre * a VD_OP_SCSICMD operation which is sent to the vdisk server. If a SCSI 54872f5224aeSachartre * reset is requested (i.e. a flag USCSI_RESET* is set) then the ioctl is 54882f5224aeSachartre * converted to a VD_OP_RESET operation. 54892f5224aeSachartre */ 54902f5224aeSachartre static int 54912f5224aeSachartre vdc_uscsi_cmd(vdc_t *vdc, caddr_t arg, int mode) 54922f5224aeSachartre { 54932f5224aeSachartre struct uscsi_cmd uscsi; 54942f5224aeSachartre struct uscsi_cmd32 uscsi32; 54952f5224aeSachartre vd_scsi_t *vd_scsi; 54962f5224aeSachartre int vd_scsi_len; 54972f5224aeSachartre union scsi_cdb *cdb; 54982f5224aeSachartre struct scsi_extended_sense *sense; 54992f5224aeSachartre char *datain, *dataout; 55002f5224aeSachartre size_t cdb_len, datain_len, dataout_len, sense_len; 55012f5224aeSachartre int rv; 55022f5224aeSachartre 55032f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 55042f5224aeSachartre if (ddi_copyin(arg, &uscsi32, sizeof (struct uscsi_cmd32), 55052f5224aeSachartre mode) != 0) 55062f5224aeSachartre return (EFAULT); 55072f5224aeSachartre uscsi_cmd32touscsi_cmd((&uscsi32), (&uscsi)); 55082f5224aeSachartre } else { 55092f5224aeSachartre if (ddi_copyin(arg, &uscsi, sizeof (struct uscsi_cmd), 55102f5224aeSachartre mode) != 0) 55112f5224aeSachartre return (EFAULT); 55122f5224aeSachartre } 55132f5224aeSachartre 55142f5224aeSachartre /* a uscsi reset is converted to a VD_OP_RESET operation */ 55152f5224aeSachartre if (uscsi.uscsi_flags & (USCSI_RESET | USCSI_RESET_LUN | 55162f5224aeSachartre USCSI_RESET_ALL)) { 55172f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_RESET, NULL, 0, 0, 0, CB_SYNC, 55182f5224aeSachartre (void *)(uint64_t)mode, VIO_both_dir, B_TRUE); 55192f5224aeSachartre return (rv); 55202f5224aeSachartre } 55212f5224aeSachartre 55222f5224aeSachartre /* cdb buffer length */ 55232f5224aeSachartre cdb_len = uscsi.uscsi_cdblen; 55242f5224aeSachartre 55252f5224aeSachartre /* data in and out buffers length */ 55262f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 55272f5224aeSachartre datain_len = uscsi.uscsi_buflen; 55282f5224aeSachartre dataout_len = 0; 55292f5224aeSachartre } else { 55302f5224aeSachartre datain_len = 0; 55312f5224aeSachartre dataout_len = uscsi.uscsi_buflen; 55322f5224aeSachartre } 55332f5224aeSachartre 55342f5224aeSachartre /* sense buffer length */ 55352f5224aeSachartre if (uscsi.uscsi_flags & USCSI_RQENABLE) 55362f5224aeSachartre sense_len = uscsi.uscsi_rqlen; 55372f5224aeSachartre else 55382f5224aeSachartre sense_len = 0; 55392f5224aeSachartre 55402f5224aeSachartre /* allocate buffer for the VD_SCSICMD_OP operation */ 55412f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 55422f5224aeSachartre &vd_scsi_len); 55432f5224aeSachartre 55442f5224aeSachartre /* 55452f5224aeSachartre * The documentation of USCSI_ISOLATE and USCSI_DIAGNOSE is very vague, 55462f5224aeSachartre * but basically they prevent a SCSI command from being retried in case 55472f5224aeSachartre * of an error. 55482f5224aeSachartre */ 55492f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_ISOLATE) || 55502f5224aeSachartre (uscsi.uscsi_flags & USCSI_DIAGNOSE)) 55512f5224aeSachartre vd_scsi->options |= VD_SCSI_OPT_NORETRY; 55522f5224aeSachartre 55532f5224aeSachartre /* set task attribute */ 55542f5224aeSachartre if (uscsi.uscsi_flags & USCSI_NOTAG) { 55552f5224aeSachartre vd_scsi->task_attribute = 0; 55562f5224aeSachartre } else { 55572f5224aeSachartre if (uscsi.uscsi_flags & USCSI_HEAD) 55582f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 55592f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_HTAG) 55602f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_HQUEUE; 55612f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_OTAG) 55622f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ORDERED; 55632f5224aeSachartre else 55642f5224aeSachartre vd_scsi->task_attribute = 0; 55652f5224aeSachartre } 55662f5224aeSachartre 55672f5224aeSachartre /* set timeout */ 55682f5224aeSachartre vd_scsi->timeout = uscsi.uscsi_timeout; 55692f5224aeSachartre 55702f5224aeSachartre /* copy-in cdb data */ 55712f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 55722f5224aeSachartre if (ddi_copyin(uscsi.uscsi_cdb, cdb, cdb_len, mode) != 0) { 55732f5224aeSachartre rv = EFAULT; 55742f5224aeSachartre goto done; 55752f5224aeSachartre } 55762f5224aeSachartre 55772f5224aeSachartre /* keep a pointer to the sense buffer */ 55782f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 55792f5224aeSachartre 55802f5224aeSachartre /* keep a pointer to the data-in buffer */ 55812f5224aeSachartre datain = (char *)VD_SCSI_DATA_IN(vd_scsi); 55822f5224aeSachartre 55832f5224aeSachartre /* copy-in request data to the data-out buffer */ 55842f5224aeSachartre dataout = (char *)VD_SCSI_DATA_OUT(vd_scsi); 55852f5224aeSachartre if (!(uscsi.uscsi_flags & USCSI_READ)) { 55862f5224aeSachartre if (ddi_copyin(uscsi.uscsi_bufaddr, dataout, dataout_len, 55872f5224aeSachartre mode)) { 55882f5224aeSachartre rv = EFAULT; 55892f5224aeSachartre goto done; 55902f5224aeSachartre } 55912f5224aeSachartre } 55922f5224aeSachartre 55932f5224aeSachartre /* submit the request */ 55942f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 55952f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 55962f5224aeSachartre 55972f5224aeSachartre if (rv != 0) 55982f5224aeSachartre goto done; 55992f5224aeSachartre 56002f5224aeSachartre /* update scsi status */ 56012f5224aeSachartre uscsi.uscsi_status = vd_scsi->cmd_status; 56022f5224aeSachartre 56032f5224aeSachartre /* update sense data */ 56042f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_RQENABLE) && 56052f5224aeSachartre (uscsi.uscsi_status == STATUS_CHECK || 56062f5224aeSachartre uscsi.uscsi_status == STATUS_TERMINATED)) { 56072f5224aeSachartre 56082f5224aeSachartre uscsi.uscsi_rqstatus = vd_scsi->sense_status; 56092f5224aeSachartre 56102f5224aeSachartre if (uscsi.uscsi_rqstatus == STATUS_GOOD) { 56112f5224aeSachartre uscsi.uscsi_rqresid = uscsi.uscsi_rqlen - 56122f5224aeSachartre vd_scsi->sense_len; 56132f5224aeSachartre if (ddi_copyout(sense, uscsi.uscsi_rqbuf, 56142f5224aeSachartre vd_scsi->sense_len, mode) != 0) { 56152f5224aeSachartre rv = EFAULT; 56162f5224aeSachartre goto done; 56172f5224aeSachartre } 56182f5224aeSachartre } 56192f5224aeSachartre } 56202f5224aeSachartre 56212f5224aeSachartre /* update request data */ 56222f5224aeSachartre if (uscsi.uscsi_status == STATUS_GOOD) { 56232f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 56242f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 56252f5224aeSachartre vd_scsi->datain_len; 56262f5224aeSachartre if (ddi_copyout(datain, uscsi.uscsi_bufaddr, 56272f5224aeSachartre vd_scsi->datain_len, mode) != 0) { 56282f5224aeSachartre rv = EFAULT; 56292f5224aeSachartre goto done; 56302f5224aeSachartre } 56312f5224aeSachartre } else { 56322f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 56332f5224aeSachartre vd_scsi->dataout_len; 56342f5224aeSachartre } 56352f5224aeSachartre } 56362f5224aeSachartre 56372f5224aeSachartre /* copy-out result */ 56382f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 56392f5224aeSachartre uscsi_cmdtouscsi_cmd32((&uscsi), (&uscsi32)); 56402f5224aeSachartre if (ddi_copyout(&uscsi32, arg, sizeof (struct uscsi_cmd32), 56412f5224aeSachartre mode) != 0) { 56422f5224aeSachartre rv = EFAULT; 56432f5224aeSachartre goto done; 56442f5224aeSachartre } 56452f5224aeSachartre } else { 56462f5224aeSachartre if (ddi_copyout(&uscsi, arg, sizeof (struct uscsi_cmd), 56472f5224aeSachartre mode) != 0) { 56482f5224aeSachartre rv = EFAULT; 56492f5224aeSachartre goto done; 56502f5224aeSachartre } 56512f5224aeSachartre } 56522f5224aeSachartre 56532f5224aeSachartre /* get the return code from the SCSI command status */ 56542f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, 56552f5224aeSachartre !(uscsi.uscsi_flags & USCSI_SILENT)); 56562f5224aeSachartre 56572f5224aeSachartre done: 56582f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 56592f5224aeSachartre return (rv); 56602f5224aeSachartre } 56612f5224aeSachartre 56622f5224aeSachartre /* 56632f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT IN command. 56642f5224aeSachartre * 56652f5224aeSachartre * Arguments: 56662f5224aeSachartre * cmd - SCSI PERSISTENT IN command 56672f5224aeSachartre * len - length of the SCSI input buffer 56682f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 56692f5224aeSachartre * 56702f5224aeSachartre * Returned Value: 56712f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 56722f5224aeSachartre */ 56732f5224aeSachartre static vd_scsi_t * 56742f5224aeSachartre vdc_scsi_alloc_persistent_in(uchar_t cmd, int len, int *vd_scsi_len) 56752f5224aeSachartre { 56762f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 56772f5224aeSachartre vd_scsi_t *vd_scsi; 56782f5224aeSachartre union scsi_cdb *cdb; 56792f5224aeSachartre 56802f5224aeSachartre cdb_len = CDB_GROUP1; 56812f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 56822f5224aeSachartre datain_len = len; 56832f5224aeSachartre dataout_len = 0; 56842f5224aeSachartre 56852f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 56862f5224aeSachartre vd_scsi_len); 56872f5224aeSachartre 56882f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 56892f5224aeSachartre 56902f5224aeSachartre /* set cdb */ 56912f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_IN; 56922f5224aeSachartre cdb->cdb_opaque[1] = cmd; 56932f5224aeSachartre FORMG1COUNT(cdb, datain_len); 56942f5224aeSachartre 56952f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 56962f5224aeSachartre 56972f5224aeSachartre return (vd_scsi); 56982f5224aeSachartre } 56992f5224aeSachartre 57002f5224aeSachartre /* 57012f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT OUT command. 57022f5224aeSachartre * 57032f5224aeSachartre * Arguments: 57042f5224aeSachartre * cmd - SCSI PERSISTENT OUT command 57052f5224aeSachartre * len - length of the SCSI output buffer 57062f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 57072f5224aeSachartre * 57082f5224aeSachartre * Returned Code: 57092f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 57102f5224aeSachartre */ 57112f5224aeSachartre static vd_scsi_t * 57122f5224aeSachartre vdc_scsi_alloc_persistent_out(uchar_t cmd, int len, int *vd_scsi_len) 57132f5224aeSachartre { 57142f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 57152f5224aeSachartre vd_scsi_t *vd_scsi; 57162f5224aeSachartre union scsi_cdb *cdb; 57172f5224aeSachartre 57182f5224aeSachartre cdb_len = CDB_GROUP1; 57192f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 57202f5224aeSachartre datain_len = 0; 57212f5224aeSachartre dataout_len = len; 57222f5224aeSachartre 57232f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 57242f5224aeSachartre vd_scsi_len); 57252f5224aeSachartre 57262f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 57272f5224aeSachartre 57282f5224aeSachartre /* set cdb */ 57292f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_OUT; 57302f5224aeSachartre cdb->cdb_opaque[1] = cmd; 57312f5224aeSachartre FORMG1COUNT(cdb, dataout_len); 57322f5224aeSachartre 57332f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 57342f5224aeSachartre 57352f5224aeSachartre return (vd_scsi); 57362f5224aeSachartre } 57372f5224aeSachartre 57382f5224aeSachartre /* 57392f5224aeSachartre * Implement the MHIOCGRP_INKEYS mhd(7i) ioctl. The ioctl is converted 57402f5224aeSachartre * to a SCSI PERSISTENT IN READ KEYS command which is sent to the vdisk 57412f5224aeSachartre * server with a VD_OP_SCSICMD operation. 57422f5224aeSachartre */ 57432f5224aeSachartre static int 57442f5224aeSachartre vdc_mhd_inkeys(vdc_t *vdc, caddr_t arg, int mode) 57452f5224aeSachartre { 57462f5224aeSachartre vd_scsi_t *vd_scsi; 57472f5224aeSachartre mhioc_inkeys_t inkeys; 57482f5224aeSachartre mhioc_key_list_t klist; 57492f5224aeSachartre struct mhioc_inkeys32 inkeys32; 57502f5224aeSachartre struct mhioc_key_list32 klist32; 57512f5224aeSachartre sd_prin_readkeys_t *scsi_keys; 57522f5224aeSachartre void *user_keys; 57532f5224aeSachartre int vd_scsi_len; 57542f5224aeSachartre int listsize, listlen, rv; 57552f5224aeSachartre 57562f5224aeSachartre /* copyin arguments */ 57572f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 57582f5224aeSachartre rv = ddi_copyin(arg, &inkeys32, sizeof (inkeys32), mode); 57592f5224aeSachartre if (rv != 0) 57602f5224aeSachartre return (EFAULT); 57612f5224aeSachartre 57622f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inkeys32.li, &klist32, 57632f5224aeSachartre sizeof (klist32), mode); 57642f5224aeSachartre if (rv != 0) 57652f5224aeSachartre return (EFAULT); 57662f5224aeSachartre 57672f5224aeSachartre listsize = klist32.listsize; 57682f5224aeSachartre } else { 57692f5224aeSachartre rv = ddi_copyin(arg, &inkeys, sizeof (inkeys), mode); 57702f5224aeSachartre if (rv != 0) 57712f5224aeSachartre return (EFAULT); 57722f5224aeSachartre 57732f5224aeSachartre rv = ddi_copyin(inkeys.li, &klist, sizeof (klist), mode); 57742f5224aeSachartre if (rv != 0) 57752f5224aeSachartre return (EFAULT); 57762f5224aeSachartre 57772f5224aeSachartre listsize = klist.listsize; 57782f5224aeSachartre } 57792f5224aeSachartre 57802f5224aeSachartre /* build SCSI VD_OP request */ 57812f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_KEYS, 57822f5224aeSachartre sizeof (sd_prin_readkeys_t) - sizeof (caddr_t) + 57832f5224aeSachartre (sizeof (mhioc_resv_key_t) * listsize), &vd_scsi_len); 57842f5224aeSachartre 57852f5224aeSachartre scsi_keys = (sd_prin_readkeys_t *)VD_SCSI_DATA_IN(vd_scsi); 57862f5224aeSachartre 57872f5224aeSachartre /* submit the request */ 57882f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 57892f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 57902f5224aeSachartre 57912f5224aeSachartre if (rv != 0) 57922f5224aeSachartre goto done; 57932f5224aeSachartre 57942f5224aeSachartre listlen = scsi_keys->len / MHIOC_RESV_KEY_SIZE; 57952f5224aeSachartre 57962f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 57972f5224aeSachartre inkeys32.generation = scsi_keys->generation; 57982f5224aeSachartre rv = ddi_copyout(&inkeys32, arg, sizeof (inkeys32), mode); 57992f5224aeSachartre if (rv != 0) { 58002f5224aeSachartre rv = EFAULT; 58012f5224aeSachartre goto done; 58022f5224aeSachartre } 58032f5224aeSachartre 58042f5224aeSachartre klist32.listlen = listlen; 58052f5224aeSachartre rv = ddi_copyout(&klist32, (caddr_t)(uintptr_t)inkeys32.li, 58062f5224aeSachartre sizeof (klist32), mode); 58072f5224aeSachartre if (rv != 0) { 58082f5224aeSachartre rv = EFAULT; 58092f5224aeSachartre goto done; 58102f5224aeSachartre } 58112f5224aeSachartre 58122f5224aeSachartre user_keys = (caddr_t)(uintptr_t)klist32.list; 58132f5224aeSachartre } else { 58142f5224aeSachartre inkeys.generation = scsi_keys->generation; 58152f5224aeSachartre rv = ddi_copyout(&inkeys, arg, sizeof (inkeys), mode); 58162f5224aeSachartre if (rv != 0) { 58172f5224aeSachartre rv = EFAULT; 58182f5224aeSachartre goto done; 58192f5224aeSachartre } 58202f5224aeSachartre 58212f5224aeSachartre klist.listlen = listlen; 58222f5224aeSachartre rv = ddi_copyout(&klist, inkeys.li, sizeof (klist), mode); 58232f5224aeSachartre if (rv != 0) { 58242f5224aeSachartre rv = EFAULT; 58252f5224aeSachartre goto done; 58262f5224aeSachartre } 58272f5224aeSachartre 58282f5224aeSachartre user_keys = klist.list; 58292f5224aeSachartre } 58302f5224aeSachartre 58312f5224aeSachartre /* copy out keys */ 58322f5224aeSachartre if (listlen > 0 && listsize > 0) { 58332f5224aeSachartre if (listsize < listlen) 58342f5224aeSachartre listlen = listsize; 58352f5224aeSachartre rv = ddi_copyout(&scsi_keys->keylist, user_keys, 58362f5224aeSachartre listlen * MHIOC_RESV_KEY_SIZE, mode); 58372f5224aeSachartre if (rv != 0) 58382f5224aeSachartre rv = EFAULT; 58392f5224aeSachartre } 58402f5224aeSachartre 58412f5224aeSachartre if (rv == 0) 58422f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 58432f5224aeSachartre 58442f5224aeSachartre done: 58452f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 58462f5224aeSachartre 58472f5224aeSachartre return (rv); 58482f5224aeSachartre } 58492f5224aeSachartre 58502f5224aeSachartre /* 58512f5224aeSachartre * Implement the MHIOCGRP_INRESV mhd(7i) ioctl. The ioctl is converted 58522f5224aeSachartre * to a SCSI PERSISTENT IN READ RESERVATION command which is sent to 58532f5224aeSachartre * the vdisk server with a VD_OP_SCSICMD operation. 58542f5224aeSachartre */ 58552f5224aeSachartre static int 58562f5224aeSachartre vdc_mhd_inresv(vdc_t *vdc, caddr_t arg, int mode) 58572f5224aeSachartre { 58582f5224aeSachartre vd_scsi_t *vd_scsi; 58592f5224aeSachartre mhioc_inresvs_t inresv; 58602f5224aeSachartre mhioc_resv_desc_list_t rlist; 58612f5224aeSachartre struct mhioc_inresvs32 inresv32; 58622f5224aeSachartre struct mhioc_resv_desc_list32 rlist32; 58632f5224aeSachartre mhioc_resv_desc_t mhd_resv; 58642f5224aeSachartre sd_prin_readresv_t *scsi_resv; 58652f5224aeSachartre sd_readresv_desc_t *resv; 58662f5224aeSachartre mhioc_resv_desc_t *user_resv; 58672f5224aeSachartre int vd_scsi_len; 58682f5224aeSachartre int listsize, listlen, i, rv; 58692f5224aeSachartre 58702f5224aeSachartre /* copyin arguments */ 58712f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 58722f5224aeSachartre rv = ddi_copyin(arg, &inresv32, sizeof (inresv32), mode); 58732f5224aeSachartre if (rv != 0) 58742f5224aeSachartre return (EFAULT); 58752f5224aeSachartre 58762f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inresv32.li, &rlist32, 58772f5224aeSachartre sizeof (rlist32), mode); 58782f5224aeSachartre if (rv != 0) 58792f5224aeSachartre return (EFAULT); 58802f5224aeSachartre 58812f5224aeSachartre listsize = rlist32.listsize; 58822f5224aeSachartre } else { 58832f5224aeSachartre rv = ddi_copyin(arg, &inresv, sizeof (inresv), mode); 58842f5224aeSachartre if (rv != 0) 58852f5224aeSachartre return (EFAULT); 58862f5224aeSachartre 58872f5224aeSachartre rv = ddi_copyin(inresv.li, &rlist, sizeof (rlist), mode); 58882f5224aeSachartre if (rv != 0) 58892f5224aeSachartre return (EFAULT); 58902f5224aeSachartre 58912f5224aeSachartre listsize = rlist.listsize; 58922f5224aeSachartre } 58932f5224aeSachartre 58942f5224aeSachartre /* build SCSI VD_OP request */ 58952f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_RESV, 58962f5224aeSachartre sizeof (sd_prin_readresv_t) - sizeof (caddr_t) + 58972f5224aeSachartre (SCSI3_RESV_DESC_LEN * listsize), &vd_scsi_len); 58982f5224aeSachartre 58992f5224aeSachartre scsi_resv = (sd_prin_readresv_t *)VD_SCSI_DATA_IN(vd_scsi); 59002f5224aeSachartre 59012f5224aeSachartre /* submit the request */ 59022f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 59032f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 59042f5224aeSachartre 59052f5224aeSachartre if (rv != 0) 59062f5224aeSachartre goto done; 59072f5224aeSachartre 59082f5224aeSachartre listlen = scsi_resv->len / SCSI3_RESV_DESC_LEN; 59092f5224aeSachartre 59102f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 59112f5224aeSachartre inresv32.generation = scsi_resv->generation; 59122f5224aeSachartre rv = ddi_copyout(&inresv32, arg, sizeof (inresv32), mode); 59132f5224aeSachartre if (rv != 0) { 59142f5224aeSachartre rv = EFAULT; 59152f5224aeSachartre goto done; 59162f5224aeSachartre } 59172f5224aeSachartre 59182f5224aeSachartre rlist32.listlen = listlen; 59192f5224aeSachartre rv = ddi_copyout(&rlist32, (caddr_t)(uintptr_t)inresv32.li, 59202f5224aeSachartre sizeof (rlist32), mode); 59212f5224aeSachartre if (rv != 0) { 59222f5224aeSachartre rv = EFAULT; 59232f5224aeSachartre goto done; 59242f5224aeSachartre } 59252f5224aeSachartre 59262f5224aeSachartre user_resv = (mhioc_resv_desc_t *)(uintptr_t)rlist32.list; 59272f5224aeSachartre } else { 59282f5224aeSachartre inresv.generation = scsi_resv->generation; 59292f5224aeSachartre rv = ddi_copyout(&inresv, arg, sizeof (inresv), mode); 59302f5224aeSachartre if (rv != 0) { 59312f5224aeSachartre rv = EFAULT; 59322f5224aeSachartre goto done; 59332f5224aeSachartre } 59342f5224aeSachartre 59352f5224aeSachartre rlist.listlen = listlen; 59362f5224aeSachartre rv = ddi_copyout(&rlist, inresv.li, sizeof (rlist), mode); 59372f5224aeSachartre if (rv != 0) { 59382f5224aeSachartre rv = EFAULT; 59392f5224aeSachartre goto done; 59402f5224aeSachartre } 59412f5224aeSachartre 59422f5224aeSachartre user_resv = rlist.list; 59432f5224aeSachartre } 59442f5224aeSachartre 59452f5224aeSachartre /* copy out reservations */ 59462f5224aeSachartre if (listsize > 0 && listlen > 0) { 59472f5224aeSachartre if (listsize < listlen) 59482f5224aeSachartre listlen = listsize; 59492f5224aeSachartre resv = (sd_readresv_desc_t *)&scsi_resv->readresv_desc; 59502f5224aeSachartre 59512f5224aeSachartre for (i = 0; i < listlen; i++) { 59522f5224aeSachartre mhd_resv.type = resv->type; 59532f5224aeSachartre mhd_resv.scope = resv->scope; 59542f5224aeSachartre mhd_resv.scope_specific_addr = 59552f5224aeSachartre BE_32(resv->scope_specific_addr); 59562f5224aeSachartre bcopy(&resv->resvkey, &mhd_resv.key, 59572f5224aeSachartre MHIOC_RESV_KEY_SIZE); 59582f5224aeSachartre 59592f5224aeSachartre rv = ddi_copyout(&mhd_resv, user_resv, 59602f5224aeSachartre sizeof (mhd_resv), mode); 59612f5224aeSachartre if (rv != 0) { 59622f5224aeSachartre rv = EFAULT; 59632f5224aeSachartre goto done; 59642f5224aeSachartre } 59652f5224aeSachartre resv++; 59662f5224aeSachartre user_resv++; 59672f5224aeSachartre } 59682f5224aeSachartre } 59692f5224aeSachartre 59702f5224aeSachartre if (rv == 0) 59712f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 59722f5224aeSachartre 59732f5224aeSachartre done: 59742f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 59752f5224aeSachartre return (rv); 59762f5224aeSachartre } 59772f5224aeSachartre 59782f5224aeSachartre /* 59792f5224aeSachartre * Implement the MHIOCGRP_REGISTER mhd(7i) ioctl. The ioctl is converted 59802f5224aeSachartre * to a SCSI PERSISTENT OUT REGISTER command which is sent to the vdisk 59812f5224aeSachartre * server with a VD_OP_SCSICMD operation. 59822f5224aeSachartre */ 59832f5224aeSachartre static int 59842f5224aeSachartre vdc_mhd_register(vdc_t *vdc, caddr_t arg, int mode) 59852f5224aeSachartre { 59862f5224aeSachartre vd_scsi_t *vd_scsi; 59872f5224aeSachartre sd_prout_t *scsi_prout; 59882f5224aeSachartre mhioc_register_t mhd_reg; 59892f5224aeSachartre int vd_scsi_len, rv; 59902f5224aeSachartre 59912f5224aeSachartre /* copyin arguments */ 59922f5224aeSachartre rv = ddi_copyin(arg, &mhd_reg, sizeof (mhd_reg), mode); 59932f5224aeSachartre if (rv != 0) 59942f5224aeSachartre return (EFAULT); 59952f5224aeSachartre 59962f5224aeSachartre /* build SCSI VD_OP request */ 59972f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTER, 59982f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 59992f5224aeSachartre 60002f5224aeSachartre /* set parameters */ 60012f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 60022f5224aeSachartre bcopy(mhd_reg.oldkey.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 60032f5224aeSachartre bcopy(mhd_reg.newkey.key, scsi_prout->service_key, MHIOC_RESV_KEY_SIZE); 60042f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_reg.aptpl; 60052f5224aeSachartre 60062f5224aeSachartre /* submit the request */ 60072f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 60082f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 60092f5224aeSachartre 60102f5224aeSachartre if (rv == 0) 60112f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 60122f5224aeSachartre 60132f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 60142f5224aeSachartre return (rv); 60152f5224aeSachartre } 60162f5224aeSachartre 60172f5224aeSachartre /* 60182f5224aeSachartre * Implement the MHIOCGRP_RESERVE mhd(7i) ioctl. The ioctl is converted 60192f5224aeSachartre * to a SCSI PERSISTENT OUT RESERVE command which is sent to the vdisk 60202f5224aeSachartre * server with a VD_OP_SCSICMD operation. 60212f5224aeSachartre */ 60222f5224aeSachartre static int 60232f5224aeSachartre vdc_mhd_reserve(vdc_t *vdc, caddr_t arg, int mode) 60242f5224aeSachartre { 60252f5224aeSachartre union scsi_cdb *cdb; 60262f5224aeSachartre vd_scsi_t *vd_scsi; 60272f5224aeSachartre sd_prout_t *scsi_prout; 60282f5224aeSachartre mhioc_resv_desc_t mhd_resv; 60292f5224aeSachartre int vd_scsi_len, rv; 60302f5224aeSachartre 60312f5224aeSachartre /* copyin arguments */ 60322f5224aeSachartre rv = ddi_copyin(arg, &mhd_resv, sizeof (mhd_resv), mode); 60332f5224aeSachartre if (rv != 0) 60342f5224aeSachartre return (EFAULT); 60352f5224aeSachartre 60362f5224aeSachartre /* build SCSI VD_OP request */ 60372f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_RESERVE, 60382f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 60392f5224aeSachartre 60402f5224aeSachartre /* set parameters */ 60412f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 60422f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 60432f5224aeSachartre bcopy(mhd_resv.key.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 60442f5224aeSachartre scsi_prout->scope_address = mhd_resv.scope_specific_addr; 60452f5224aeSachartre cdb->cdb_opaque[2] = mhd_resv.type; 60462f5224aeSachartre 60472f5224aeSachartre /* submit the request */ 60482f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 60492f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 60502f5224aeSachartre 60512f5224aeSachartre if (rv == 0) 60522f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 60532f5224aeSachartre 60542f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 60552f5224aeSachartre return (rv); 60562f5224aeSachartre } 60572f5224aeSachartre 60582f5224aeSachartre /* 60592f5224aeSachartre * Implement the MHIOCGRP_PREEMPTANDABORT mhd(7i) ioctl. The ioctl is 60602f5224aeSachartre * converted to a SCSI PERSISTENT OUT PREEMPT AND ABORT command which 60612f5224aeSachartre * is sent to the vdisk server with a VD_OP_SCSICMD operation. 60622f5224aeSachartre */ 60632f5224aeSachartre static int 60642f5224aeSachartre vdc_mhd_preemptabort(vdc_t *vdc, caddr_t arg, int mode) 60652f5224aeSachartre { 60662f5224aeSachartre union scsi_cdb *cdb; 60672f5224aeSachartre vd_scsi_t *vd_scsi; 60682f5224aeSachartre sd_prout_t *scsi_prout; 60692f5224aeSachartre mhioc_preemptandabort_t mhd_preempt; 60702f5224aeSachartre int vd_scsi_len, rv; 60712f5224aeSachartre 60722f5224aeSachartre /* copyin arguments */ 60732f5224aeSachartre rv = ddi_copyin(arg, &mhd_preempt, sizeof (mhd_preempt), mode); 60742f5224aeSachartre if (rv != 0) 60752f5224aeSachartre return (EFAULT); 60762f5224aeSachartre 60772f5224aeSachartre /* build SCSI VD_OP request */ 60782f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_PREEMPTANDABORT, 60792f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 60802f5224aeSachartre 60812f5224aeSachartre /* set parameters */ 60822f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 60832f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 60842f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 60852f5224aeSachartre bcopy(mhd_preempt.resvdesc.key.key, scsi_prout->res_key, 60862f5224aeSachartre MHIOC_RESV_KEY_SIZE); 60872f5224aeSachartre bcopy(mhd_preempt.victim_key.key, scsi_prout->service_key, 60882f5224aeSachartre MHIOC_RESV_KEY_SIZE); 60892f5224aeSachartre scsi_prout->scope_address = mhd_preempt.resvdesc.scope_specific_addr; 60902f5224aeSachartre cdb->cdb_opaque[2] = mhd_preempt.resvdesc.type; 60912f5224aeSachartre 60922f5224aeSachartre /* submit the request */ 60932f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 60942f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 60952f5224aeSachartre 60962f5224aeSachartre if (rv == 0) 60972f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 60982f5224aeSachartre 60992f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 61002f5224aeSachartre return (rv); 61012f5224aeSachartre } 61022f5224aeSachartre 61032f5224aeSachartre /* 61042f5224aeSachartre * Implement the MHIOCGRP_REGISTERANDIGNOREKEY mhd(7i) ioctl. The ioctl 61052f5224aeSachartre * is converted to a SCSI PERSISTENT OUT REGISTER AND IGNORE EXISTING KEY 61062f5224aeSachartre * command which is sent to the vdisk server with a VD_OP_SCSICMD operation. 61072f5224aeSachartre */ 61082f5224aeSachartre static int 61092f5224aeSachartre vdc_mhd_registerignore(vdc_t *vdc, caddr_t arg, int mode) 61102f5224aeSachartre { 61112f5224aeSachartre vd_scsi_t *vd_scsi; 61122f5224aeSachartre sd_prout_t *scsi_prout; 61132f5224aeSachartre mhioc_registerandignorekey_t mhd_regi; 61142f5224aeSachartre int vd_scsi_len, rv; 61152f5224aeSachartre 61162f5224aeSachartre /* copyin arguments */ 61172f5224aeSachartre rv = ddi_copyin(arg, &mhd_regi, sizeof (mhd_regi), mode); 61182f5224aeSachartre if (rv != 0) 61192f5224aeSachartre return (EFAULT); 61202f5224aeSachartre 61212f5224aeSachartre /* build SCSI VD_OP request */ 61222f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTERANDIGNOREKEY, 61232f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 61242f5224aeSachartre 61252f5224aeSachartre /* set parameters */ 61262f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 61272f5224aeSachartre bcopy(mhd_regi.newkey.key, scsi_prout->service_key, 61282f5224aeSachartre MHIOC_RESV_KEY_SIZE); 61292f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_regi.aptpl; 61302f5224aeSachartre 61312f5224aeSachartre /* submit the request */ 61322f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 61332f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 61342f5224aeSachartre 61352f5224aeSachartre if (rv == 0) 61362f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 61372f5224aeSachartre 61382f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 61392f5224aeSachartre return (rv); 61402f5224aeSachartre } 61412f5224aeSachartre 61422f5224aeSachartre /* 61432f5224aeSachartre * This function is used by the failfast mechanism to send a SCSI command 61442f5224aeSachartre * to check for reservation conflict. 61452f5224aeSachartre */ 61462f5224aeSachartre static int 61472f5224aeSachartre vdc_failfast_scsi_cmd(vdc_t *vdc, uchar_t scmd) 61482f5224aeSachartre { 61492f5224aeSachartre int cdb_len, sense_len, vd_scsi_len; 61502f5224aeSachartre vd_scsi_t *vd_scsi; 61512f5224aeSachartre union scsi_cdb *cdb; 61522f5224aeSachartre int rv; 61532f5224aeSachartre 61542f5224aeSachartre ASSERT(scmd == SCMD_TEST_UNIT_READY || scmd == SCMD_WRITE_G1); 61552f5224aeSachartre 61562f5224aeSachartre if (scmd == SCMD_WRITE_G1) 61572f5224aeSachartre cdb_len = CDB_GROUP1; 61582f5224aeSachartre else 61592f5224aeSachartre cdb_len = CDB_GROUP0; 61602f5224aeSachartre 61612f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 61622f5224aeSachartre 61632f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, 0, 0, &vd_scsi_len); 61642f5224aeSachartre 61652f5224aeSachartre /* set cdb */ 61662f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 61672f5224aeSachartre cdb->scc_cmd = scmd; 61682f5224aeSachartre 61692f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 61702f5224aeSachartre 61712f5224aeSachartre /* 61722f5224aeSachartre * Submit the request. The last argument has to be B_FALSE so that 61732f5224aeSachartre * vdc_do_sync_op does not loop checking for reservation conflict if 61742f5224aeSachartre * the operation returns an error. 61752f5224aeSachartre */ 61762f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 61772f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)FKIOCTL, VIO_both_dir, B_FALSE); 61782f5224aeSachartre 61792f5224aeSachartre if (rv == 0) 61802f5224aeSachartre (void) vdc_scsi_status(vdc, vd_scsi, B_FALSE); 61812f5224aeSachartre 61822f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 61832f5224aeSachartre return (rv); 61842f5224aeSachartre } 61852f5224aeSachartre 61862f5224aeSachartre /* 61872f5224aeSachartre * This function is used by the failfast mechanism to check for reservation 61882f5224aeSachartre * conflict. It sends some SCSI commands which will fail with a reservation 61892f5224aeSachartre * conflict error if the system does not have access to the disk and this 61902f5224aeSachartre * will panic the system. 61912f5224aeSachartre * 61922f5224aeSachartre * Returned Code: 61932f5224aeSachartre * 0 - disk is accessible without reservation conflict error 61942f5224aeSachartre * != 0 - unable to check if disk is accessible 61952f5224aeSachartre */ 61962f5224aeSachartre int 61972f5224aeSachartre vdc_failfast_check_resv(vdc_t *vdc) 61982f5224aeSachartre { 61992f5224aeSachartre int failure = 0; 62002f5224aeSachartre 62012f5224aeSachartre /* 62022f5224aeSachartre * Send a TEST UNIT READY command. The command will panic 62032f5224aeSachartre * the system if it fails with a reservation conflict. 62042f5224aeSachartre */ 62052f5224aeSachartre if (vdc_failfast_scsi_cmd(vdc, SCMD_TEST_UNIT_READY) != 0) 62062f5224aeSachartre failure++; 62072f5224aeSachartre 62082f5224aeSachartre /* 62092f5224aeSachartre * With SPC-3 compliant devices TEST UNIT READY will succeed on 62102f5224aeSachartre * a reserved device, so we also do a WRITE(10) of zero byte in 62112f5224aeSachartre * order to provoke a Reservation Conflict status on those newer 62122f5224aeSachartre * devices. 62132f5224aeSachartre */ 62142f5224aeSachartre if (vdc_failfast_scsi_cmd(vdc, SCMD_WRITE_G1) != 0) 62152f5224aeSachartre failure++; 62162f5224aeSachartre 62172f5224aeSachartre return (failure); 62182f5224aeSachartre } 62192f5224aeSachartre 62202f5224aeSachartre /* 62212f5224aeSachartre * Add a pending I/O to the failfast I/O queue. An I/O is added to this 62222f5224aeSachartre * queue when it has failed and failfast is enabled. Then we have to check 62232f5224aeSachartre * if it has failed because of a reservation conflict in which case we have 62242f5224aeSachartre * to panic the system. 62252f5224aeSachartre * 62262f5224aeSachartre * Async I/O should be queued with their block I/O data transfer structure 62272f5224aeSachartre * (buf). Sync I/O should be queued with buf = NULL. 62282f5224aeSachartre */ 62292f5224aeSachartre static vdc_io_t * 62302f5224aeSachartre vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf) 62312f5224aeSachartre { 62322f5224aeSachartre vdc_io_t *vio; 62332f5224aeSachartre 62342f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 62352f5224aeSachartre 62362f5224aeSachartre vio = kmem_alloc(sizeof (vdc_io_t), KM_SLEEP); 62372f5224aeSachartre vio->vio_next = vdc->failfast_io_queue; 62382f5224aeSachartre vio->vio_buf = buf; 62392f5224aeSachartre vio->vio_qtime = ddi_get_lbolt(); 62402f5224aeSachartre 62412f5224aeSachartre vdc->failfast_io_queue = vio; 62422f5224aeSachartre 62432f5224aeSachartre /* notify the failfast thread that a new I/O is queued */ 62442f5224aeSachartre cv_signal(&vdc->failfast_cv); 62452f5224aeSachartre 62462f5224aeSachartre return (vio); 62472f5224aeSachartre } 62482f5224aeSachartre 62492f5224aeSachartre /* 62502f5224aeSachartre * Remove and complete I/O in the failfast I/O queue which have been 62512f5224aeSachartre * added after the indicated deadline. A deadline of 0 means that all 62522f5224aeSachartre * I/O have to be unqueued and marked as completed. 62532f5224aeSachartre */ 62542f5224aeSachartre static void 62552f5224aeSachartre vdc_failfast_io_unqueue(vdc_t *vdc, clock_t deadline) 62562f5224aeSachartre { 62572f5224aeSachartre vdc_io_t *vio, *vio_tmp; 62582f5224aeSachartre 62592f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 62602f5224aeSachartre 62612f5224aeSachartre vio_tmp = NULL; 62622f5224aeSachartre vio = vdc->failfast_io_queue; 62632f5224aeSachartre 62642f5224aeSachartre if (deadline != 0) { 62652f5224aeSachartre /* 62662f5224aeSachartre * Skip any io queued after the deadline. The failfast 62672f5224aeSachartre * I/O queue is ordered starting with the last I/O added 62682f5224aeSachartre * to the queue. 62692f5224aeSachartre */ 62702f5224aeSachartre while (vio != NULL && vio->vio_qtime > deadline) { 62712f5224aeSachartre vio_tmp = vio; 62722f5224aeSachartre vio = vio->vio_next; 62732f5224aeSachartre } 62742f5224aeSachartre } 62752f5224aeSachartre 62762f5224aeSachartre if (vio == NULL) 62772f5224aeSachartre /* nothing to unqueue */ 62782f5224aeSachartre return; 62792f5224aeSachartre 62802f5224aeSachartre /* update the queue */ 62812f5224aeSachartre if (vio_tmp == NULL) 62822f5224aeSachartre vdc->failfast_io_queue = NULL; 62832f5224aeSachartre else 62842f5224aeSachartre vio_tmp->vio_next = NULL; 62852f5224aeSachartre 62862f5224aeSachartre /* 62872f5224aeSachartre * Complete unqueued I/O. Async I/O have a block I/O data transfer 62882f5224aeSachartre * structure (buf) and they are completed by calling biodone(). Sync 62892f5224aeSachartre * I/O do not have a buf and they are completed by setting the 62902f5224aeSachartre * vio_qtime to zero and signaling failfast_io_cv. In that case, the 62912f5224aeSachartre * thread waiting for the I/O to complete is responsible for freeing 62922f5224aeSachartre * the vio structure. 62932f5224aeSachartre */ 62942f5224aeSachartre while (vio != NULL) { 62952f5224aeSachartre vio_tmp = vio->vio_next; 62962f5224aeSachartre if (vio->vio_buf != NULL) { 629790e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdc); 6298366a92acSlm66018 DTRACE_IO1(done, buf_t *, vio->vio_buf); 62992f5224aeSachartre biodone(vio->vio_buf); 63002f5224aeSachartre kmem_free(vio, sizeof (vdc_io_t)); 63012f5224aeSachartre } else { 63022f5224aeSachartre vio->vio_qtime = 0; 63032f5224aeSachartre } 63042f5224aeSachartre vio = vio_tmp; 63052f5224aeSachartre } 63062f5224aeSachartre 63072f5224aeSachartre cv_broadcast(&vdc->failfast_io_cv); 63082f5224aeSachartre } 63092f5224aeSachartre 63102f5224aeSachartre /* 63112f5224aeSachartre * Failfast Thread. 63122f5224aeSachartre * 63132f5224aeSachartre * While failfast is enabled, the failfast thread sends a TEST UNIT READY 63142f5224aeSachartre * and a zero size WRITE(10) SCSI commands on a regular basis to check that 63152f5224aeSachartre * we still have access to the disk. If a command fails with a RESERVATION 63162f5224aeSachartre * CONFLICT error then the system will immediatly panic. 63172f5224aeSachartre * 63182f5224aeSachartre * The failfast thread is also woken up when an I/O has failed. It then check 63192f5224aeSachartre * the access to the disk to ensure that the I/O failure was not due to a 63202f5224aeSachartre * reservation conflict. 63212f5224aeSachartre * 63222f5224aeSachartre * There is one failfast thread for each virtual disk for which failfast is 63232f5224aeSachartre * enabled. We could have only one thread sending requests for all disks but 63242f5224aeSachartre * this would need vdc to send asynchronous requests and to have callbacks to 63252f5224aeSachartre * process replies. 63262f5224aeSachartre */ 63272f5224aeSachartre static void 63282f5224aeSachartre vdc_failfast_thread(void *arg) 63292f5224aeSachartre { 63302f5224aeSachartre int status; 63312f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 63322f5224aeSachartre clock_t timeout, starttime; 63332f5224aeSachartre 63342f5224aeSachartre mutex_enter(&vdc->lock); 63352f5224aeSachartre 63362f5224aeSachartre while (vdc->failfast_interval != 0) { 63372f5224aeSachartre 63382f5224aeSachartre starttime = ddi_get_lbolt(); 63392f5224aeSachartre 63402f5224aeSachartre mutex_exit(&vdc->lock); 63412f5224aeSachartre 63422f5224aeSachartre /* check for reservation conflict */ 63432f5224aeSachartre status = vdc_failfast_check_resv(vdc); 63442f5224aeSachartre 63452f5224aeSachartre mutex_enter(&vdc->lock); 63462f5224aeSachartre /* 63472f5224aeSachartre * We have dropped the lock to send the SCSI command so we have 63482f5224aeSachartre * to check that failfast is still enabled. 63492f5224aeSachartre */ 63502f5224aeSachartre if (vdc->failfast_interval == 0) 63512f5224aeSachartre break; 63522f5224aeSachartre 63532f5224aeSachartre /* 63542f5224aeSachartre * If we have successfully check the disk access and there was 63552f5224aeSachartre * no reservation conflict then we can complete any I/O queued 63562f5224aeSachartre * before the last check. 63572f5224aeSachartre */ 63582f5224aeSachartre if (status == 0) 63592f5224aeSachartre vdc_failfast_io_unqueue(vdc, starttime); 63602f5224aeSachartre 63612f5224aeSachartre /* proceed again if some I/O are still in the queue */ 63622f5224aeSachartre if (vdc->failfast_io_queue != NULL) 63632f5224aeSachartre continue; 63642f5224aeSachartre 63652f5224aeSachartre timeout = ddi_get_lbolt() + 63662f5224aeSachartre drv_usectohz(vdc->failfast_interval); 63672f5224aeSachartre (void) cv_timedwait(&vdc->failfast_cv, &vdc->lock, timeout); 63682f5224aeSachartre } 63692f5224aeSachartre 63702f5224aeSachartre /* 63712f5224aeSachartre * Failfast is being stop so we can complete any queued I/O. 63722f5224aeSachartre */ 63732f5224aeSachartre vdc_failfast_io_unqueue(vdc, 0); 63742f5224aeSachartre vdc->failfast_thread = NULL; 63752f5224aeSachartre mutex_exit(&vdc->lock); 63762f5224aeSachartre thread_exit(); 63772f5224aeSachartre } 63782f5224aeSachartre 63792f5224aeSachartre /* 63802f5224aeSachartre * Implement the MHIOCENFAILFAST mhd(7i) ioctl. 63812f5224aeSachartre */ 63822f5224aeSachartre static int 63832f5224aeSachartre vdc_failfast(vdc_t *vdc, caddr_t arg, int mode) 63842f5224aeSachartre { 63852f5224aeSachartre unsigned int mh_time; 63862f5224aeSachartre 63872f5224aeSachartre if (ddi_copyin((void *)arg, &mh_time, sizeof (int), mode)) 63882f5224aeSachartre return (EFAULT); 63892f5224aeSachartre 63902f5224aeSachartre mutex_enter(&vdc->lock); 63912f5224aeSachartre if (mh_time != 0 && vdc->failfast_thread == NULL) { 63922f5224aeSachartre vdc->failfast_thread = thread_create(NULL, 0, 63932f5224aeSachartre vdc_failfast_thread, vdc, 0, &p0, TS_RUN, 63942f5224aeSachartre v.v_maxsyspri - 2); 63952f5224aeSachartre } 63962f5224aeSachartre 63972f5224aeSachartre vdc->failfast_interval = mh_time * 1000; 63982f5224aeSachartre cv_signal(&vdc->failfast_cv); 63992f5224aeSachartre mutex_exit(&vdc->lock); 64002f5224aeSachartre 64012f5224aeSachartre return (0); 64022f5224aeSachartre } 64032f5224aeSachartre 64042f5224aeSachartre /* 64052f5224aeSachartre * Implement the MHIOCTKOWN and MHIOCRELEASE mhd(7i) ioctls. These ioctls are 64062f5224aeSachartre * converted to VD_OP_SET_ACCESS operations. 64072f5224aeSachartre */ 64082f5224aeSachartre static int 64092f5224aeSachartre vdc_access_set(vdc_t *vdc, uint64_t flags, int mode) 64102f5224aeSachartre { 64112f5224aeSachartre int rv; 64122f5224aeSachartre 64132f5224aeSachartre /* submit owership command request */ 64142f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SET_ACCESS, (caddr_t)&flags, 64152f5224aeSachartre sizeof (uint64_t), 0, 0, CB_SYNC, (void *)(uint64_t)mode, 64162f5224aeSachartre VIO_both_dir, B_TRUE); 64172f5224aeSachartre 64182f5224aeSachartre return (rv); 64192f5224aeSachartre } 64202f5224aeSachartre 64212f5224aeSachartre /* 64222f5224aeSachartre * Implement the MHIOCSTATUS mhd(7i) ioctl. This ioctl is converted to a 64232f5224aeSachartre * VD_OP_GET_ACCESS operation. 64242f5224aeSachartre */ 64252f5224aeSachartre static int 64262f5224aeSachartre vdc_access_get(vdc_t *vdc, uint64_t *status, int mode) 64272f5224aeSachartre { 64282f5224aeSachartre int rv; 64292f5224aeSachartre 64302f5224aeSachartre /* submit owership command request */ 64312f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_ACCESS, (caddr_t)status, 64322f5224aeSachartre sizeof (uint64_t), 0, 0, CB_SYNC, (void *)(uint64_t)mode, 64332f5224aeSachartre VIO_both_dir, B_TRUE); 64342f5224aeSachartre 64352f5224aeSachartre return (rv); 64362f5224aeSachartre } 64372f5224aeSachartre 64382f5224aeSachartre /* 64392f5224aeSachartre * Disk Ownership Thread. 64402f5224aeSachartre * 64412f5224aeSachartre * When we have taken the ownership of a disk, this thread waits to be 64422f5224aeSachartre * notified when the LDC channel is reset so that it can recover the 64432f5224aeSachartre * ownership. 64442f5224aeSachartre * 64452f5224aeSachartre * Note that the thread handling the LDC reset (vdc_process_msg_thread()) 64462f5224aeSachartre * can not be used to do the ownership recovery because it has to be 64472f5224aeSachartre * running to handle the reply message to the ownership operation. 64482f5224aeSachartre */ 64492f5224aeSachartre static void 64502f5224aeSachartre vdc_ownership_thread(void *arg) 64512f5224aeSachartre { 64522f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 64532f5224aeSachartre clock_t timeout; 64542f5224aeSachartre uint64_t status; 64552f5224aeSachartre 64562f5224aeSachartre mutex_enter(&vdc->ownership_lock); 64572f5224aeSachartre mutex_enter(&vdc->lock); 64582f5224aeSachartre 64592f5224aeSachartre while (vdc->ownership & VDC_OWNERSHIP_WANTED) { 64602f5224aeSachartre 64612f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_RESET) || 64622f5224aeSachartre !(vdc->ownership & VDC_OWNERSHIP_GRANTED)) { 64632f5224aeSachartre /* 64642f5224aeSachartre * There was a reset so the ownership has been lost, 64652f5224aeSachartre * try to recover. We do this without using the preempt 64662f5224aeSachartre * option so that we don't steal the ownership from 64672f5224aeSachartre * someone who has preempted us. 64682f5224aeSachartre */ 64692f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership lost, recovering", 64702f5224aeSachartre vdc->instance); 64712f5224aeSachartre 64722f5224aeSachartre vdc->ownership &= ~(VDC_OWNERSHIP_RESET | 64732f5224aeSachartre VDC_OWNERSHIP_GRANTED); 64742f5224aeSachartre 64752f5224aeSachartre mutex_exit(&vdc->lock); 64762f5224aeSachartre 64772f5224aeSachartre status = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 64782f5224aeSachartre VD_ACCESS_SET_PRESERVE, FKIOCTL); 64792f5224aeSachartre 64802f5224aeSachartre mutex_enter(&vdc->lock); 64812f5224aeSachartre 64822f5224aeSachartre if (status == 0) { 64832f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership recovered", 64842f5224aeSachartre vdc->instance); 64852f5224aeSachartre vdc->ownership |= VDC_OWNERSHIP_GRANTED; 64862f5224aeSachartre } else { 64872f5224aeSachartre DMSG(vdc, 0, "[%d] Fail to recover ownership", 64882f5224aeSachartre vdc->instance); 64892f5224aeSachartre } 64902f5224aeSachartre 64912f5224aeSachartre } 64922f5224aeSachartre 64932f5224aeSachartre /* 64942f5224aeSachartre * If we have the ownership then we just wait for an event 64952f5224aeSachartre * to happen (LDC reset), otherwise we will retry to recover 64962f5224aeSachartre * after a delay. 64972f5224aeSachartre */ 64982f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) 64992f5224aeSachartre timeout = 0; 65002f5224aeSachartre else 65012f5224aeSachartre timeout = ddi_get_lbolt() + 65022f5224aeSachartre drv_usectohz(vdc_ownership_delay); 65032f5224aeSachartre 65042f5224aeSachartre /* Release the ownership_lock and wait on the vdc lock */ 65052f5224aeSachartre mutex_exit(&vdc->ownership_lock); 65062f5224aeSachartre 65072f5224aeSachartre if (timeout == 0) 65082f5224aeSachartre (void) cv_wait(&vdc->ownership_cv, &vdc->lock); 65092f5224aeSachartre else 65102f5224aeSachartre (void) cv_timedwait(&vdc->ownership_cv, 65112f5224aeSachartre &vdc->lock, timeout); 65122f5224aeSachartre 65132f5224aeSachartre mutex_exit(&vdc->lock); 65142f5224aeSachartre 65152f5224aeSachartre mutex_enter(&vdc->ownership_lock); 65162f5224aeSachartre mutex_enter(&vdc->lock); 65172f5224aeSachartre } 65182f5224aeSachartre 65192f5224aeSachartre vdc->ownership_thread = NULL; 65202f5224aeSachartre mutex_exit(&vdc->lock); 65212f5224aeSachartre mutex_exit(&vdc->ownership_lock); 65222f5224aeSachartre 65232f5224aeSachartre thread_exit(); 65242f5224aeSachartre } 65252f5224aeSachartre 65262f5224aeSachartre static void 65272f5224aeSachartre vdc_ownership_update(vdc_t *vdc, int ownership_flags) 65282f5224aeSachartre { 65292f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->ownership_lock)); 65302f5224aeSachartre 65312f5224aeSachartre mutex_enter(&vdc->lock); 65322f5224aeSachartre vdc->ownership = ownership_flags; 65332f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_WANTED) && 65342f5224aeSachartre vdc->ownership_thread == NULL) { 65352f5224aeSachartre /* start ownership thread */ 65362f5224aeSachartre vdc->ownership_thread = thread_create(NULL, 0, 65372f5224aeSachartre vdc_ownership_thread, vdc, 0, &p0, TS_RUN, 65382f5224aeSachartre v.v_maxsyspri - 2); 65392f5224aeSachartre } else { 65402f5224aeSachartre /* notify the ownership thread */ 65412f5224aeSachartre cv_signal(&vdc->ownership_cv); 65422f5224aeSachartre } 65432f5224aeSachartre mutex_exit(&vdc->lock); 65442f5224aeSachartre } 65452f5224aeSachartre 65462f5224aeSachartre /* 65472f5224aeSachartre * Get the size and the block size of a virtual disk from the vdisk server. 65482f5224aeSachartre */ 65492f5224aeSachartre static int 6550*de3a5331SRamesh Chitrothu vdc_get_capacity(vdc_t *vdc, size_t *dsk_size, size_t *blk_size) 65512f5224aeSachartre { 65522f5224aeSachartre int rv = 0; 65532f5224aeSachartre size_t alloc_len; 65542f5224aeSachartre vd_capacity_t *vd_cap; 65552f5224aeSachartre 6556*de3a5331SRamesh Chitrothu ASSERT(MUTEX_NOT_HELD(&vdc->lock)); 65572f5224aeSachartre 65582f5224aeSachartre alloc_len = P2ROUNDUP(sizeof (vd_capacity_t), sizeof (uint64_t)); 65592f5224aeSachartre 65602f5224aeSachartre vd_cap = kmem_zalloc(alloc_len, KM_SLEEP); 65612f5224aeSachartre 65622f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_CAPACITY, (caddr_t)vd_cap, alloc_len, 65632f5224aeSachartre 0, 0, CB_SYNC, (void *)(uint64_t)FKIOCTL, VIO_both_dir, B_TRUE); 65642f5224aeSachartre 6565*de3a5331SRamesh Chitrothu *dsk_size = vd_cap->vdisk_size; 6566*de3a5331SRamesh Chitrothu *blk_size = vd_cap->vdisk_block_size; 65672f5224aeSachartre 65682f5224aeSachartre kmem_free(vd_cap, alloc_len); 65692f5224aeSachartre return (rv); 65702f5224aeSachartre } 65712f5224aeSachartre 65722f5224aeSachartre /* 6573*de3a5331SRamesh Chitrothu * Check the disk capacity. Disk size information is updated if size has 6574*de3a5331SRamesh Chitrothu * changed. 6575*de3a5331SRamesh Chitrothu * 6576*de3a5331SRamesh Chitrothu * Return 0 if the disk capacity is available, or non-zero if it is not. 6577*de3a5331SRamesh Chitrothu */ 6578*de3a5331SRamesh Chitrothu static int 6579*de3a5331SRamesh Chitrothu vdc_check_capacity(vdc_t *vdc) 6580*de3a5331SRamesh Chitrothu { 6581*de3a5331SRamesh Chitrothu size_t dsk_size, blk_size; 6582*de3a5331SRamesh Chitrothu int rv; 6583*de3a5331SRamesh Chitrothu 6584*de3a5331SRamesh Chitrothu if ((rv = vdc_get_capacity(vdc, &dsk_size, &blk_size)) != 0) 6585*de3a5331SRamesh Chitrothu return (rv); 6586*de3a5331SRamesh Chitrothu 6587*de3a5331SRamesh Chitrothu if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0) 6588*de3a5331SRamesh Chitrothu return (EINVAL); 6589*de3a5331SRamesh Chitrothu 6590*de3a5331SRamesh Chitrothu mutex_enter(&vdc->lock); 6591*de3a5331SRamesh Chitrothu vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz); 6592*de3a5331SRamesh Chitrothu mutex_exit(&vdc->lock); 6593*de3a5331SRamesh Chitrothu 6594*de3a5331SRamesh Chitrothu return (0); 6595*de3a5331SRamesh Chitrothu } 6596*de3a5331SRamesh Chitrothu 6597*de3a5331SRamesh Chitrothu /* 65981ae08745Sheppo * This structure is used in the DKIO(7I) array below. 65991ae08745Sheppo */ 66001ae08745Sheppo typedef struct vdc_dk_ioctl { 66011ae08745Sheppo uint8_t op; /* VD_OP_XXX value */ 66021ae08745Sheppo int cmd; /* Solaris ioctl operation number */ 66031ae08745Sheppo size_t nbytes; /* size of structure to be copied */ 66040a55fbb7Slm66018 66050a55fbb7Slm66018 /* function to convert between vDisk and Solaris structure formats */ 6606d10e4ef2Snarayan int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 6607d10e4ef2Snarayan int mode, int dir); 66081ae08745Sheppo } vdc_dk_ioctl_t; 66091ae08745Sheppo 66101ae08745Sheppo /* 66111ae08745Sheppo * Subset of DKIO(7I) operations currently supported 66121ae08745Sheppo */ 66131ae08745Sheppo static vdc_dk_ioctl_t dk_ioctl[] = { 6614eff7243fSlm66018 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 66150a55fbb7Slm66018 vdc_null_copy_func}, 66160a55fbb7Slm66018 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 66174bac2208Snarayan vdc_get_wce_convert}, 66180a55fbb7Slm66018 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 66194bac2208Snarayan vdc_set_wce_convert}, 66200a55fbb7Slm66018 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 66210a55fbb7Slm66018 vdc_get_vtoc_convert}, 66220a55fbb7Slm66018 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 66230a55fbb7Slm66018 vdc_set_vtoc_convert}, 66240a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 66250a55fbb7Slm66018 vdc_get_geom_convert}, 66260a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 66270a55fbb7Slm66018 vdc_get_geom_convert}, 66280a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 66290a55fbb7Slm66018 vdc_get_geom_convert}, 66300a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 66310a55fbb7Slm66018 vdc_set_geom_convert}, 66324bac2208Snarayan {VD_OP_GET_EFI, DKIOCGETEFI, 0, 66334bac2208Snarayan vdc_get_efi_convert}, 66344bac2208Snarayan {VD_OP_SET_EFI, DKIOCSETEFI, 0, 66354bac2208Snarayan vdc_set_efi_convert}, 66360a55fbb7Slm66018 663787a7269eSachartre /* DIOCTL_RWCMD is converted to a read or a write */ 663887a7269eSachartre {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 663987a7269eSachartre 66402f5224aeSachartre /* mhd(7I) non-shared multihost disks ioctls */ 66412f5224aeSachartre {0, MHIOCTKOWN, 0, vdc_null_copy_func}, 66422f5224aeSachartre {0, MHIOCRELEASE, 0, vdc_null_copy_func}, 66432f5224aeSachartre {0, MHIOCSTATUS, 0, vdc_null_copy_func}, 66442f5224aeSachartre {0, MHIOCQRESERVE, 0, vdc_null_copy_func}, 66452f5224aeSachartre 66462f5224aeSachartre /* mhd(7I) shared multihost disks ioctls */ 66472f5224aeSachartre {0, MHIOCGRP_INKEYS, 0, vdc_null_copy_func}, 66482f5224aeSachartre {0, MHIOCGRP_INRESV, 0, vdc_null_copy_func}, 66492f5224aeSachartre {0, MHIOCGRP_REGISTER, 0, vdc_null_copy_func}, 66502f5224aeSachartre {0, MHIOCGRP_RESERVE, 0, vdc_null_copy_func}, 66512f5224aeSachartre {0, MHIOCGRP_PREEMPTANDABORT, 0, vdc_null_copy_func}, 66522f5224aeSachartre {0, MHIOCGRP_REGISTERANDIGNOREKEY, 0, vdc_null_copy_func}, 66532f5224aeSachartre 66542f5224aeSachartre /* mhd(7I) failfast ioctl */ 66552f5224aeSachartre {0, MHIOCENFAILFAST, 0, vdc_null_copy_func}, 66562f5224aeSachartre 66570a55fbb7Slm66018 /* 66580a55fbb7Slm66018 * These particular ioctls are not sent to the server - vdc fakes up 66590a55fbb7Slm66018 * the necessary info. 66600a55fbb7Slm66018 */ 66610a55fbb7Slm66018 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 66620a55fbb7Slm66018 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 66630a55fbb7Slm66018 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 66649642afceSachartre {0, DKIOCPARTITION, 0, vdc_null_copy_func }, 666587a7269eSachartre {0, DKIOCGAPART, 0, vdc_null_copy_func }, 66660a55fbb7Slm66018 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 66670a55fbb7Slm66018 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 66681ae08745Sheppo }; 66691ae08745Sheppo 66701ae08745Sheppo /* 6671edcc0754Sachartre * This function handles ioctl requests from the vd_efi_alloc_and_read() 6672edcc0754Sachartre * function and forward them to the vdisk. 66732f5224aeSachartre */ 66742f5224aeSachartre static int 6675edcc0754Sachartre vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg) 66762f5224aeSachartre { 6677edcc0754Sachartre vdc_t *vdc = (vdc_t *)vdisk; 6678edcc0754Sachartre dev_t dev; 66792f5224aeSachartre int rval; 6680edcc0754Sachartre 6681edcc0754Sachartre dev = makedevice(ddi_driver_major(vdc->dip), 6682edcc0754Sachartre VD_MAKE_DEV(vdc->instance, 0)); 6683edcc0754Sachartre 6684edcc0754Sachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, FKIOCTL, &rval)); 66852f5224aeSachartre } 66862f5224aeSachartre 66872f5224aeSachartre /* 66881ae08745Sheppo * Function: 66891ae08745Sheppo * vd_process_ioctl() 66901ae08745Sheppo * 66911ae08745Sheppo * Description: 66920a55fbb7Slm66018 * This routine processes disk specific ioctl calls 66931ae08745Sheppo * 66941ae08745Sheppo * Arguments: 66951ae08745Sheppo * dev - the device number 66961ae08745Sheppo * cmd - the operation [dkio(7I)] to be processed 66971ae08745Sheppo * arg - pointer to user provided structure 66981ae08745Sheppo * (contains data to be set or reference parameter for get) 66991ae08745Sheppo * mode - bit flag, indicating open settings, 32/64 bit type, etc 67002f5224aeSachartre * rvalp - pointer to return value for calling process. 67011ae08745Sheppo * 67021ae08745Sheppo * Return Code: 67031ae08745Sheppo * 0 67041ae08745Sheppo * EFAULT 67051ae08745Sheppo * ENXIO 67061ae08745Sheppo * EIO 67071ae08745Sheppo * ENOTSUP 67081ae08745Sheppo */ 67091ae08745Sheppo static int 67102f5224aeSachartre vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp) 67111ae08745Sheppo { 67120d0c8d4bSnarayan int instance = VDCUNIT(dev); 67131ae08745Sheppo vdc_t *vdc = NULL; 67141ae08745Sheppo int rv = -1; 67151ae08745Sheppo int idx = 0; /* index into dk_ioctl[] */ 67161ae08745Sheppo size_t len = 0; /* #bytes to send to vds */ 67171ae08745Sheppo size_t alloc_len = 0; /* #bytes to allocate mem for */ 67181ae08745Sheppo caddr_t mem_p = NULL; 67191ae08745Sheppo size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 67203af08d82Slm66018 vdc_dk_ioctl_t *iop; 67211ae08745Sheppo 67221ae08745Sheppo vdc = ddi_get_soft_state(vdc_state, instance); 67231ae08745Sheppo if (vdc == NULL) { 67241ae08745Sheppo cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 67251ae08745Sheppo instance); 67261ae08745Sheppo return (ENXIO); 67271ae08745Sheppo } 67281ae08745Sheppo 67293af08d82Slm66018 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 67303af08d82Slm66018 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 67311ae08745Sheppo 67322f5224aeSachartre if (rvalp != NULL) { 67332f5224aeSachartre /* the return value of the ioctl is 0 by default */ 67342f5224aeSachartre *rvalp = 0; 67352f5224aeSachartre } 67362f5224aeSachartre 67371ae08745Sheppo /* 67381ae08745Sheppo * Validate the ioctl operation to be performed. 67391ae08745Sheppo * 67401ae08745Sheppo * If we have looped through the array without finding a match then we 67411ae08745Sheppo * don't support this ioctl. 67421ae08745Sheppo */ 67431ae08745Sheppo for (idx = 0; idx < nioctls; idx++) { 67441ae08745Sheppo if (cmd == dk_ioctl[idx].cmd) 67451ae08745Sheppo break; 67461ae08745Sheppo } 67471ae08745Sheppo 67481ae08745Sheppo if (idx >= nioctls) { 67493af08d82Slm66018 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 6750e1ebb9ecSlm66018 vdc->instance, cmd); 67511ae08745Sheppo return (ENOTSUP); 67521ae08745Sheppo } 67531ae08745Sheppo 67543af08d82Slm66018 iop = &(dk_ioctl[idx]); 67553af08d82Slm66018 67564bac2208Snarayan if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 67574bac2208Snarayan /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 67584bac2208Snarayan dk_efi_t dk_efi; 67594bac2208Snarayan 67604bac2208Snarayan rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 67614bac2208Snarayan if (rv != 0) 67624bac2208Snarayan return (EFAULT); 67634bac2208Snarayan 67644bac2208Snarayan len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 67654bac2208Snarayan } else { 67663af08d82Slm66018 len = iop->nbytes; 67674bac2208Snarayan } 67681ae08745Sheppo 67692f5224aeSachartre /* check if the ioctl is applicable */ 67701ae08745Sheppo switch (cmd) { 67711ae08745Sheppo case CDROMREADOFFSET: 67721ae08745Sheppo case DKIOCREMOVABLE: 67731ae08745Sheppo return (ENOTTY); 67741ae08745Sheppo 67752f5224aeSachartre case USCSICMD: 67762f5224aeSachartre case MHIOCTKOWN: 67772f5224aeSachartre case MHIOCSTATUS: 67782f5224aeSachartre case MHIOCQRESERVE: 67792f5224aeSachartre case MHIOCRELEASE: 67802f5224aeSachartre case MHIOCGRP_INKEYS: 67812f5224aeSachartre case MHIOCGRP_INRESV: 67822f5224aeSachartre case MHIOCGRP_REGISTER: 67832f5224aeSachartre case MHIOCGRP_RESERVE: 67842f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 67852f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 67862f5224aeSachartre case MHIOCENFAILFAST: 67872f5224aeSachartre if (vdc->cinfo == NULL) 67882f5224aeSachartre return (ENXIO); 67892f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_SCSI_CCS) 67902f5224aeSachartre return (ENOTTY); 67912f5224aeSachartre break; 67922f5224aeSachartre 67932f5224aeSachartre case DIOCTL_RWCMD: 67942f5224aeSachartre if (vdc->cinfo == NULL) 67952f5224aeSachartre return (ENXIO); 67962f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_DIRECT) 67972f5224aeSachartre return (ENOTTY); 67982f5224aeSachartre break; 67992f5224aeSachartre 68002f5224aeSachartre case DKIOCINFO: 68012f5224aeSachartre if (vdc->cinfo == NULL) 68022f5224aeSachartre return (ENXIO); 68032f5224aeSachartre break; 68042f5224aeSachartre 68052f5224aeSachartre case DKIOCGMEDIAINFO: 68062f5224aeSachartre if (vdc->minfo == NULL) 68072f5224aeSachartre return (ENXIO); 68082f5224aeSachartre if (vdc_check_capacity(vdc) != 0) 68092f5224aeSachartre /* disk capacity is not available */ 68102f5224aeSachartre return (EIO); 68112f5224aeSachartre break; 68122f5224aeSachartre } 68132f5224aeSachartre 68142f5224aeSachartre /* 68152f5224aeSachartre * Deal with ioctls which require a processing different than 68162f5224aeSachartre * converting ioctl arguments and sending a corresponding 68172f5224aeSachartre * VD operation. 68182f5224aeSachartre */ 68192f5224aeSachartre switch (cmd) { 68202f5224aeSachartre 68212f5224aeSachartre case USCSICMD: 68222f5224aeSachartre { 68232f5224aeSachartre return (vdc_uscsi_cmd(vdc, arg, mode)); 68242f5224aeSachartre } 68252f5224aeSachartre 68262f5224aeSachartre case MHIOCTKOWN: 68272f5224aeSachartre { 68282f5224aeSachartre mutex_enter(&vdc->ownership_lock); 68292f5224aeSachartre /* 68302f5224aeSachartre * We have to set VDC_OWNERSHIP_WANTED now so that the ownership 68312f5224aeSachartre * can be flagged with VDC_OWNERSHIP_RESET if the LDC is reset 68322f5224aeSachartre * while we are processing the ioctl. 68332f5224aeSachartre */ 68342f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED); 68352f5224aeSachartre 68362f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 68372f5224aeSachartre VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE, mode); 68382f5224aeSachartre if (rv == 0) { 68392f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED | 68402f5224aeSachartre VDC_OWNERSHIP_GRANTED); 68412f5224aeSachartre } else { 68422f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 68432f5224aeSachartre } 68442f5224aeSachartre mutex_exit(&vdc->ownership_lock); 68452f5224aeSachartre return (rv); 68462f5224aeSachartre } 68472f5224aeSachartre 68482f5224aeSachartre case MHIOCRELEASE: 68492f5224aeSachartre { 68502f5224aeSachartre mutex_enter(&vdc->ownership_lock); 68512f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, mode); 68522f5224aeSachartre if (rv == 0) { 68532f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 68542f5224aeSachartre } 68552f5224aeSachartre mutex_exit(&vdc->ownership_lock); 68562f5224aeSachartre return (rv); 68572f5224aeSachartre } 68582f5224aeSachartre 68592f5224aeSachartre case MHIOCSTATUS: 68602f5224aeSachartre { 68612f5224aeSachartre uint64_t status; 68622f5224aeSachartre 68632f5224aeSachartre rv = vdc_access_get(vdc, &status, mode); 68642f5224aeSachartre if (rv == 0 && rvalp != NULL) 68652f5224aeSachartre *rvalp = (status & VD_ACCESS_ALLOWED)? 0 : 1; 68662f5224aeSachartre return (rv); 68672f5224aeSachartre } 68682f5224aeSachartre 68692f5224aeSachartre case MHIOCQRESERVE: 68702f5224aeSachartre { 68712f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE, mode); 68722f5224aeSachartre return (rv); 68732f5224aeSachartre } 68742f5224aeSachartre 68752f5224aeSachartre case MHIOCGRP_INKEYS: 68762f5224aeSachartre { 68772f5224aeSachartre return (vdc_mhd_inkeys(vdc, arg, mode)); 68782f5224aeSachartre } 68792f5224aeSachartre 68802f5224aeSachartre case MHIOCGRP_INRESV: 68812f5224aeSachartre { 68822f5224aeSachartre return (vdc_mhd_inresv(vdc, arg, mode)); 68832f5224aeSachartre } 68842f5224aeSachartre 68852f5224aeSachartre case MHIOCGRP_REGISTER: 68862f5224aeSachartre { 68872f5224aeSachartre return (vdc_mhd_register(vdc, arg, mode)); 68882f5224aeSachartre } 68892f5224aeSachartre 68902f5224aeSachartre case MHIOCGRP_RESERVE: 68912f5224aeSachartre { 68922f5224aeSachartre return (vdc_mhd_reserve(vdc, arg, mode)); 68932f5224aeSachartre } 68942f5224aeSachartre 68952f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 68962f5224aeSachartre { 68972f5224aeSachartre return (vdc_mhd_preemptabort(vdc, arg, mode)); 68982f5224aeSachartre } 68992f5224aeSachartre 69002f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 69012f5224aeSachartre { 69022f5224aeSachartre return (vdc_mhd_registerignore(vdc, arg, mode)); 69032f5224aeSachartre } 69042f5224aeSachartre 69052f5224aeSachartre case MHIOCENFAILFAST: 69062f5224aeSachartre { 69072f5224aeSachartre rv = vdc_failfast(vdc, arg, mode); 69082f5224aeSachartre return (rv); 69092f5224aeSachartre } 69102f5224aeSachartre 691187a7269eSachartre case DIOCTL_RWCMD: 691287a7269eSachartre { 691387a7269eSachartre return (vdc_dioctl_rwcmd(dev, arg, mode)); 691487a7269eSachartre } 691587a7269eSachartre 691687a7269eSachartre case DKIOCGAPART: 691787a7269eSachartre { 69189642afceSachartre return (vdc_dkio_gapart(vdc, arg, mode)); 69199642afceSachartre } 69209642afceSachartre 69219642afceSachartre case DKIOCPARTITION: 69229642afceSachartre { 69239642afceSachartre return (vdc_dkio_partition(vdc, arg, mode)); 692487a7269eSachartre } 692587a7269eSachartre 69261ae08745Sheppo case DKIOCINFO: 69271ae08745Sheppo { 69281ae08745Sheppo struct dk_cinfo cinfo; 69291ae08745Sheppo 69301ae08745Sheppo bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 69310d0c8d4bSnarayan cinfo.dki_partition = VDCPART(dev); 69321ae08745Sheppo 69331ae08745Sheppo rv = ddi_copyout(&cinfo, (void *)arg, 69341ae08745Sheppo sizeof (struct dk_cinfo), mode); 69351ae08745Sheppo if (rv != 0) 69361ae08745Sheppo return (EFAULT); 69371ae08745Sheppo 69381ae08745Sheppo return (0); 69391ae08745Sheppo } 69401ae08745Sheppo 69411ae08745Sheppo case DKIOCGMEDIAINFO: 69428e6a2a04Slm66018 { 69432f5224aeSachartre ASSERT(vdc->vdisk_size != 0); 6944*de3a5331SRamesh Chitrothu ASSERT(vdc->minfo->dki_capacity != 0); 69451ae08745Sheppo rv = ddi_copyout(vdc->minfo, (void *)arg, 69461ae08745Sheppo sizeof (struct dk_minfo), mode); 69471ae08745Sheppo if (rv != 0) 69481ae08745Sheppo return (EFAULT); 69491ae08745Sheppo 69501ae08745Sheppo return (0); 69511ae08745Sheppo } 69521ae08745Sheppo 69538e6a2a04Slm66018 case DKIOCFLUSHWRITECACHE: 69548e6a2a04Slm66018 { 695517cadca8Slm66018 struct dk_callback *dkc = 695617cadca8Slm66018 (struct dk_callback *)(uintptr_t)arg; 69578e6a2a04Slm66018 vdc_dk_arg_t *dkarg = NULL; 69588e6a2a04Slm66018 69593af08d82Slm66018 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 69603af08d82Slm66018 instance, mode); 69618e6a2a04Slm66018 69628e6a2a04Slm66018 /* 69638e6a2a04Slm66018 * If arg is NULL, then there is no callback function 69648e6a2a04Slm66018 * registered and the call operates synchronously; we 69658e6a2a04Slm66018 * break and continue with the rest of the function and 69668e6a2a04Slm66018 * wait for vds to return (i.e. after the request to 69678e6a2a04Slm66018 * vds returns successfully, all writes completed prior 69688e6a2a04Slm66018 * to the ioctl will have been flushed from the disk 69698e6a2a04Slm66018 * write cache to persistent media. 69708e6a2a04Slm66018 * 69718e6a2a04Slm66018 * If a callback function is registered, we dispatch 69728e6a2a04Slm66018 * the request on a task queue and return immediately. 69738e6a2a04Slm66018 * The callback will deal with informing the calling 69748e6a2a04Slm66018 * thread that the flush request is completed. 69758e6a2a04Slm66018 */ 69768e6a2a04Slm66018 if (dkc == NULL) 69778e6a2a04Slm66018 break; 69788e6a2a04Slm66018 6979eff7243fSlm66018 /* 6980eff7243fSlm66018 * the asynchronous callback is only supported if 6981eff7243fSlm66018 * invoked from within the kernel 6982eff7243fSlm66018 */ 6983eff7243fSlm66018 if ((mode & FKIOCTL) == 0) 6984eff7243fSlm66018 return (ENOTSUP); 6985eff7243fSlm66018 69868e6a2a04Slm66018 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 69878e6a2a04Slm66018 69888e6a2a04Slm66018 dkarg->mode = mode; 69898e6a2a04Slm66018 dkarg->dev = dev; 69908e6a2a04Slm66018 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 69918e6a2a04Slm66018 69928e6a2a04Slm66018 mutex_enter(&vdc->lock); 69938e6a2a04Slm66018 vdc->dkio_flush_pending++; 69948e6a2a04Slm66018 dkarg->vdc = vdc; 69958e6a2a04Slm66018 mutex_exit(&vdc->lock); 69968e6a2a04Slm66018 69978e6a2a04Slm66018 /* put the request on a task queue */ 69988e6a2a04Slm66018 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 69998e6a2a04Slm66018 (void *)dkarg, DDI_SLEEP); 70003af08d82Slm66018 if (rv == NULL) { 70013af08d82Slm66018 /* clean up if dispatch fails */ 70023af08d82Slm66018 mutex_enter(&vdc->lock); 70033af08d82Slm66018 vdc->dkio_flush_pending--; 700478fcd0a1Sachartre mutex_exit(&vdc->lock); 70053af08d82Slm66018 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 70063af08d82Slm66018 } 70078e6a2a04Slm66018 70088e6a2a04Slm66018 return (rv == NULL ? ENOMEM : 0); 70098e6a2a04Slm66018 } 70108e6a2a04Slm66018 } 70118e6a2a04Slm66018 70121ae08745Sheppo /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 70133af08d82Slm66018 ASSERT(iop->op != 0); 70141ae08745Sheppo 701517cadca8Slm66018 /* check if the vDisk server handles the operation for this vDisk */ 701617cadca8Slm66018 if (VD_OP_SUPPORTED(vdc->operations, iop->op) == B_FALSE) { 701717cadca8Slm66018 DMSG(vdc, 0, "[%d] Unsupported VD_OP operation (0x%x)\n", 701817cadca8Slm66018 vdc->instance, iop->op); 701917cadca8Slm66018 return (ENOTSUP); 702017cadca8Slm66018 } 702117cadca8Slm66018 70221ae08745Sheppo /* LDC requires that the memory being mapped is 8-byte aligned */ 70231ae08745Sheppo alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 70243af08d82Slm66018 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 70253af08d82Slm66018 instance, len, alloc_len); 70261ae08745Sheppo 7027eff7243fSlm66018 if (alloc_len > 0) 70281ae08745Sheppo mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 70291ae08745Sheppo 70300a55fbb7Slm66018 /* 7031eff7243fSlm66018 * Call the conversion function for this ioctl which, if necessary, 70320a55fbb7Slm66018 * converts from the Solaris format to the format ARC'ed 70330a55fbb7Slm66018 * as part of the vDisk protocol (FWARC 2006/195) 70340a55fbb7Slm66018 */ 70353af08d82Slm66018 ASSERT(iop->convert != NULL); 70363af08d82Slm66018 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 70371ae08745Sheppo if (rv != 0) { 70383af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 7039e1ebb9ecSlm66018 instance, rv, cmd); 70401ae08745Sheppo if (mem_p != NULL) 70411ae08745Sheppo kmem_free(mem_p, alloc_len); 70420a55fbb7Slm66018 return (rv); 70431ae08745Sheppo } 70441ae08745Sheppo 70451ae08745Sheppo /* 70461ae08745Sheppo * send request to vds to service the ioctl. 70471ae08745Sheppo */ 70483af08d82Slm66018 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 70490d0c8d4bSnarayan VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 70502f5224aeSachartre VIO_both_dir, B_TRUE); 705178fcd0a1Sachartre 70521ae08745Sheppo if (rv != 0) { 70531ae08745Sheppo /* 70541ae08745Sheppo * This is not necessarily an error. The ioctl could 70551ae08745Sheppo * be returning a value such as ENOTTY to indicate 70561ae08745Sheppo * that the ioctl is not applicable. 70571ae08745Sheppo */ 70583af08d82Slm66018 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 7059e1ebb9ecSlm66018 instance, rv, cmd); 70601ae08745Sheppo if (mem_p != NULL) 70611ae08745Sheppo kmem_free(mem_p, alloc_len); 7062d10e4ef2Snarayan 70631ae08745Sheppo return (rv); 70641ae08745Sheppo } 70651ae08745Sheppo 70661ae08745Sheppo /* 70670a55fbb7Slm66018 * Call the conversion function (if it exists) for this ioctl 70680a55fbb7Slm66018 * which converts from the format ARC'ed as part of the vDisk 70690a55fbb7Slm66018 * protocol (FWARC 2006/195) back to a format understood by 70700a55fbb7Slm66018 * the rest of Solaris. 70711ae08745Sheppo */ 70723af08d82Slm66018 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 70730a55fbb7Slm66018 if (rv != 0) { 70743af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 7075e1ebb9ecSlm66018 instance, rv, cmd); 70761ae08745Sheppo if (mem_p != NULL) 70771ae08745Sheppo kmem_free(mem_p, alloc_len); 70780a55fbb7Slm66018 return (rv); 70791ae08745Sheppo } 70801ae08745Sheppo 70811ae08745Sheppo if (mem_p != NULL) 70821ae08745Sheppo kmem_free(mem_p, alloc_len); 70831ae08745Sheppo 70841ae08745Sheppo return (rv); 70851ae08745Sheppo } 70861ae08745Sheppo 70871ae08745Sheppo /* 70881ae08745Sheppo * Function: 70890a55fbb7Slm66018 * 70900a55fbb7Slm66018 * Description: 70910a55fbb7Slm66018 * This is an empty conversion function used by ioctl calls which 70920a55fbb7Slm66018 * do not need to convert the data being passed in/out to userland 70930a55fbb7Slm66018 */ 70940a55fbb7Slm66018 static int 7095d10e4ef2Snarayan vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 70960a55fbb7Slm66018 { 7097d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 70980a55fbb7Slm66018 _NOTE(ARGUNUSED(from)) 70990a55fbb7Slm66018 _NOTE(ARGUNUSED(to)) 71000a55fbb7Slm66018 _NOTE(ARGUNUSED(mode)) 71010a55fbb7Slm66018 _NOTE(ARGUNUSED(dir)) 71020a55fbb7Slm66018 71030a55fbb7Slm66018 return (0); 71040a55fbb7Slm66018 } 71050a55fbb7Slm66018 71064bac2208Snarayan static int 71074bac2208Snarayan vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 71084bac2208Snarayan int mode, int dir) 71094bac2208Snarayan { 71104bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 71114bac2208Snarayan 71124bac2208Snarayan if (dir == VD_COPYIN) 71134bac2208Snarayan return (0); /* nothing to do */ 71144bac2208Snarayan 71154bac2208Snarayan if (ddi_copyout(from, to, sizeof (int), mode) != 0) 71164bac2208Snarayan return (EFAULT); 71174bac2208Snarayan 71184bac2208Snarayan return (0); 71194bac2208Snarayan } 71204bac2208Snarayan 71214bac2208Snarayan static int 71224bac2208Snarayan vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 71234bac2208Snarayan int mode, int dir) 71244bac2208Snarayan { 71254bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 71264bac2208Snarayan 71274bac2208Snarayan if (dir == VD_COPYOUT) 71284bac2208Snarayan return (0); /* nothing to do */ 71294bac2208Snarayan 71304bac2208Snarayan if (ddi_copyin(from, to, sizeof (int), mode) != 0) 71314bac2208Snarayan return (EFAULT); 71324bac2208Snarayan 71334bac2208Snarayan return (0); 71344bac2208Snarayan } 71354bac2208Snarayan 71360a55fbb7Slm66018 /* 71370a55fbb7Slm66018 * Function: 71380a55fbb7Slm66018 * vdc_get_vtoc_convert() 71390a55fbb7Slm66018 * 71400a55fbb7Slm66018 * Description: 7141d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGVTOC 7142d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 7143d10e4ef2Snarayan * 7144d10e4ef2Snarayan * In the struct vtoc definition, the timestamp field is marked as not 7145d10e4ef2Snarayan * supported so it is not part of vDisk protocol (FWARC 2006/195). 7146d10e4ef2Snarayan * However SVM uses that field to check it can write into the VTOC, 7147d10e4ef2Snarayan * so we fake up the info of that field. 71480a55fbb7Slm66018 * 71490a55fbb7Slm66018 * Arguments: 7150d10e4ef2Snarayan * vdc - the vDisk client 71510a55fbb7Slm66018 * from - the buffer containing the data to be copied from 71520a55fbb7Slm66018 * to - the buffer to be copied to 71530a55fbb7Slm66018 * mode - flags passed to ioctl() call 71540a55fbb7Slm66018 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 71550a55fbb7Slm66018 * 71560a55fbb7Slm66018 * Return Code: 71570a55fbb7Slm66018 * 0 - Success 71580a55fbb7Slm66018 * ENXIO - incorrect buffer passed in. 7159d10e4ef2Snarayan * EFAULT - ddi_copyout routine encountered an error. 71600a55fbb7Slm66018 */ 71610a55fbb7Slm66018 static int 7162d10e4ef2Snarayan vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 71630a55fbb7Slm66018 { 7164d10e4ef2Snarayan int i; 71650a55fbb7Slm66018 void *tmp_mem = NULL; 71660a55fbb7Slm66018 void *tmp_memp; 71670a55fbb7Slm66018 struct vtoc vt; 71680a55fbb7Slm66018 struct vtoc32 vt32; 71690a55fbb7Slm66018 int copy_len = 0; 71700a55fbb7Slm66018 int rv = 0; 71710a55fbb7Slm66018 71720a55fbb7Slm66018 if (dir != VD_COPYOUT) 71730a55fbb7Slm66018 return (0); /* nothing to do */ 71740a55fbb7Slm66018 71750a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 71760a55fbb7Slm66018 return (ENXIO); 71770a55fbb7Slm66018 71780a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 71790a55fbb7Slm66018 copy_len = sizeof (struct vtoc32); 71800a55fbb7Slm66018 else 71810a55fbb7Slm66018 copy_len = sizeof (struct vtoc); 71820a55fbb7Slm66018 71830a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 71840a55fbb7Slm66018 71850a55fbb7Slm66018 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 7186d10e4ef2Snarayan 7187d10e4ef2Snarayan /* fake the VTOC timestamp field */ 7188d10e4ef2Snarayan for (i = 0; i < V_NUMPAR; i++) { 7189d10e4ef2Snarayan vt.timestamp[i] = vdc->vtoc->timestamp[i]; 7190d10e4ef2Snarayan } 7191d10e4ef2Snarayan 71920a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 719317cadca8Slm66018 /* LINTED E_ASSIGN_NARROW_CONV */ 71940a55fbb7Slm66018 vtoctovtoc32(vt, vt32); 71950a55fbb7Slm66018 tmp_memp = &vt32; 71960a55fbb7Slm66018 } else { 71970a55fbb7Slm66018 tmp_memp = &vt; 71980a55fbb7Slm66018 } 71990a55fbb7Slm66018 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 72000a55fbb7Slm66018 if (rv != 0) 72010a55fbb7Slm66018 rv = EFAULT; 72020a55fbb7Slm66018 72030a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 72040a55fbb7Slm66018 return (rv); 72050a55fbb7Slm66018 } 72060a55fbb7Slm66018 72070a55fbb7Slm66018 /* 72080a55fbb7Slm66018 * Function: 72090a55fbb7Slm66018 * vdc_set_vtoc_convert() 72100a55fbb7Slm66018 * 72110a55fbb7Slm66018 * Description: 7212d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSVTOC 7213d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 72140a55fbb7Slm66018 * 72150a55fbb7Slm66018 * Arguments: 7216d10e4ef2Snarayan * vdc - the vDisk client 72170a55fbb7Slm66018 * from - Buffer with data 72180a55fbb7Slm66018 * to - Buffer where data is to be copied to 72190a55fbb7Slm66018 * mode - flags passed to ioctl 72200a55fbb7Slm66018 * dir - direction of copy (in or out) 72210a55fbb7Slm66018 * 72220a55fbb7Slm66018 * Return Code: 72230a55fbb7Slm66018 * 0 - Success 72240a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 72250a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 72260a55fbb7Slm66018 */ 72270a55fbb7Slm66018 static int 7228d10e4ef2Snarayan vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 72290a55fbb7Slm66018 { 723078fcd0a1Sachartre _NOTE(ARGUNUSED(vdc)) 723178fcd0a1Sachartre 72322f5224aeSachartre void *tmp_mem = NULL, *uvtoc; 72330a55fbb7Slm66018 struct vtoc vt; 72340a55fbb7Slm66018 struct vtoc *vtp = &vt; 72350a55fbb7Slm66018 vd_vtoc_t vtvd; 72360a55fbb7Slm66018 int copy_len = 0; 72372f5224aeSachartre int i, rv = 0; 72380a55fbb7Slm66018 72390a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 72400a55fbb7Slm66018 return (ENXIO); 72410a55fbb7Slm66018 72422f5224aeSachartre if (dir == VD_COPYIN) 72432f5224aeSachartre uvtoc = from; 72442f5224aeSachartre else 72452f5224aeSachartre uvtoc = to; 72462f5224aeSachartre 72470a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 72480a55fbb7Slm66018 copy_len = sizeof (struct vtoc32); 72490a55fbb7Slm66018 else 72500a55fbb7Slm66018 copy_len = sizeof (struct vtoc); 72510a55fbb7Slm66018 72520a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 72530a55fbb7Slm66018 72542f5224aeSachartre rv = ddi_copyin(uvtoc, tmp_mem, copy_len, mode); 72550a55fbb7Slm66018 if (rv != 0) { 72560a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 72570a55fbb7Slm66018 return (EFAULT); 72580a55fbb7Slm66018 } 72590a55fbb7Slm66018 72600a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 72610a55fbb7Slm66018 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 72620a55fbb7Slm66018 } else { 72630a55fbb7Slm66018 vtp = tmp_mem; 72640a55fbb7Slm66018 } 72650a55fbb7Slm66018 72662f5224aeSachartre if (dir == VD_COPYOUT) { 72672f5224aeSachartre /* 72682f5224aeSachartre * The disk label may have changed. Revalidate the disk 72695b98b509Sachartre * geometry. This will also update the device nodes. 72702f5224aeSachartre */ 72712f5224aeSachartre vdc_validate(vdc); 72722f5224aeSachartre 72732f5224aeSachartre /* 72742f5224aeSachartre * We also need to keep track of the timestamp fields. 72752f5224aeSachartre */ 72762f5224aeSachartre for (i = 0; i < V_NUMPAR; i++) { 72772f5224aeSachartre vdc->vtoc->timestamp[i] = vtp->timestamp[i]; 72782f5224aeSachartre } 72792f5224aeSachartre 72802f5224aeSachartre return (0); 72812f5224aeSachartre } 72822f5224aeSachartre 72830a55fbb7Slm66018 VTOC2VD_VTOC(vtp, &vtvd); 72840a55fbb7Slm66018 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 72850a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 72860a55fbb7Slm66018 72870a55fbb7Slm66018 return (0); 72880a55fbb7Slm66018 } 72890a55fbb7Slm66018 72900a55fbb7Slm66018 /* 72910a55fbb7Slm66018 * Function: 72920a55fbb7Slm66018 * vdc_get_geom_convert() 72930a55fbb7Slm66018 * 72940a55fbb7Slm66018 * Description: 7295d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGGEOM, 7296d10e4ef2Snarayan * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 7297d10e4ef2Snarayan * defined in FWARC 2006/195 72980a55fbb7Slm66018 * 72990a55fbb7Slm66018 * Arguments: 7300d10e4ef2Snarayan * vdc - the vDisk client 73010a55fbb7Slm66018 * from - Buffer with data 73020a55fbb7Slm66018 * to - Buffer where data is to be copied to 73030a55fbb7Slm66018 * mode - flags passed to ioctl 73040a55fbb7Slm66018 * dir - direction of copy (in or out) 73050a55fbb7Slm66018 * 73060a55fbb7Slm66018 * Return Code: 73070a55fbb7Slm66018 * 0 - Success 73080a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 7309d10e4ef2Snarayan * EFAULT - ddi_copyout of data failed 73100a55fbb7Slm66018 */ 73110a55fbb7Slm66018 static int 7312d10e4ef2Snarayan vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 73130a55fbb7Slm66018 { 7314d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7315d10e4ef2Snarayan 73160a55fbb7Slm66018 struct dk_geom geom; 73170a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 73180a55fbb7Slm66018 int rv = 0; 73190a55fbb7Slm66018 73200a55fbb7Slm66018 if (dir != VD_COPYOUT) 73210a55fbb7Slm66018 return (0); /* nothing to do */ 73220a55fbb7Slm66018 73230a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 73240a55fbb7Slm66018 return (ENXIO); 73250a55fbb7Slm66018 73260a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 73270a55fbb7Slm66018 rv = ddi_copyout(&geom, to, copy_len, mode); 73280a55fbb7Slm66018 if (rv != 0) 73290a55fbb7Slm66018 rv = EFAULT; 73300a55fbb7Slm66018 73310a55fbb7Slm66018 return (rv); 73320a55fbb7Slm66018 } 73330a55fbb7Slm66018 73340a55fbb7Slm66018 /* 73350a55fbb7Slm66018 * Function: 73360a55fbb7Slm66018 * vdc_set_geom_convert() 73370a55fbb7Slm66018 * 73380a55fbb7Slm66018 * Description: 7339d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSGEOM 7340d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 73410a55fbb7Slm66018 * 73420a55fbb7Slm66018 * Arguments: 7343d10e4ef2Snarayan * vdc - the vDisk client 73440a55fbb7Slm66018 * from - Buffer with data 73450a55fbb7Slm66018 * to - Buffer where data is to be copied to 73460a55fbb7Slm66018 * mode - flags passed to ioctl 73470a55fbb7Slm66018 * dir - direction of copy (in or out) 73480a55fbb7Slm66018 * 73490a55fbb7Slm66018 * Return Code: 73500a55fbb7Slm66018 * 0 - Success 73510a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 73520a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 73530a55fbb7Slm66018 */ 73540a55fbb7Slm66018 static int 7355d10e4ef2Snarayan vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 73560a55fbb7Slm66018 { 7357d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7358d10e4ef2Snarayan 73590a55fbb7Slm66018 vd_geom_t vdgeom; 73600a55fbb7Slm66018 void *tmp_mem = NULL; 73610a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 73620a55fbb7Slm66018 int rv = 0; 73630a55fbb7Slm66018 73640a55fbb7Slm66018 if (dir != VD_COPYIN) 73650a55fbb7Slm66018 return (0); /* nothing to do */ 73660a55fbb7Slm66018 73670a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 73680a55fbb7Slm66018 return (ENXIO); 73690a55fbb7Slm66018 73700a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 73710a55fbb7Slm66018 73720a55fbb7Slm66018 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 73730a55fbb7Slm66018 if (rv != 0) { 73740a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 73750a55fbb7Slm66018 return (EFAULT); 73760a55fbb7Slm66018 } 73770a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 73780a55fbb7Slm66018 bcopy(&vdgeom, to, sizeof (vdgeom)); 73790a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 73800a55fbb7Slm66018 73810a55fbb7Slm66018 return (0); 73820a55fbb7Slm66018 } 73830a55fbb7Slm66018 73844bac2208Snarayan static int 73854bac2208Snarayan vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 73864bac2208Snarayan { 73874bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 73884bac2208Snarayan 73894bac2208Snarayan vd_efi_t *vd_efi; 73904bac2208Snarayan dk_efi_t dk_efi; 73914bac2208Snarayan int rv = 0; 73924bac2208Snarayan void *uaddr; 73934bac2208Snarayan 73944bac2208Snarayan if ((from == NULL) || (to == NULL)) 73954bac2208Snarayan return (ENXIO); 73964bac2208Snarayan 73974bac2208Snarayan if (dir == VD_COPYIN) { 73984bac2208Snarayan 73994bac2208Snarayan vd_efi = (vd_efi_t *)to; 74004bac2208Snarayan 74014bac2208Snarayan rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 74024bac2208Snarayan if (rv != 0) 74034bac2208Snarayan return (EFAULT); 74044bac2208Snarayan 74054bac2208Snarayan vd_efi->lba = dk_efi.dki_lba; 74064bac2208Snarayan vd_efi->length = dk_efi.dki_length; 74074bac2208Snarayan bzero(vd_efi->data, vd_efi->length); 74084bac2208Snarayan 74094bac2208Snarayan } else { 74104bac2208Snarayan 74114bac2208Snarayan rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 74124bac2208Snarayan if (rv != 0) 74134bac2208Snarayan return (EFAULT); 74144bac2208Snarayan 74154bac2208Snarayan uaddr = dk_efi.dki_data; 74164bac2208Snarayan 74174bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 74184bac2208Snarayan 74194bac2208Snarayan VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 74204bac2208Snarayan 74214bac2208Snarayan rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 74224bac2208Snarayan mode); 74234bac2208Snarayan if (rv != 0) 74244bac2208Snarayan return (EFAULT); 74254bac2208Snarayan 74264bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 74274bac2208Snarayan } 74284bac2208Snarayan 74294bac2208Snarayan return (0); 74304bac2208Snarayan } 74314bac2208Snarayan 74324bac2208Snarayan static int 74334bac2208Snarayan vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 74344bac2208Snarayan { 74354bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 74364bac2208Snarayan 74374bac2208Snarayan dk_efi_t dk_efi; 74384bac2208Snarayan void *uaddr; 74394bac2208Snarayan 74402f5224aeSachartre if (dir == VD_COPYOUT) { 74412f5224aeSachartre /* 74422f5224aeSachartre * The disk label may have changed. Revalidate the disk 74435b98b509Sachartre * geometry. This will also update the device nodes. 74442f5224aeSachartre */ 74452f5224aeSachartre vdc_validate(vdc); 74462f5224aeSachartre return (0); 74472f5224aeSachartre } 74484bac2208Snarayan 74494bac2208Snarayan if ((from == NULL) || (to == NULL)) 74504bac2208Snarayan return (ENXIO); 74514bac2208Snarayan 74524bac2208Snarayan if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 74534bac2208Snarayan return (EFAULT); 74544bac2208Snarayan 74554bac2208Snarayan uaddr = dk_efi.dki_data; 74564bac2208Snarayan 74574bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 74584bac2208Snarayan 74594bac2208Snarayan if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 74604bac2208Snarayan return (EFAULT); 74614bac2208Snarayan 74624bac2208Snarayan DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 74634bac2208Snarayan 74644bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 74654bac2208Snarayan 74664bac2208Snarayan return (0); 74674bac2208Snarayan } 74684bac2208Snarayan 746917cadca8Slm66018 747017cadca8Slm66018 /* -------------------------------------------------------------------------- */ 747117cadca8Slm66018 74720a55fbb7Slm66018 /* 74730a55fbb7Slm66018 * Function: 74741ae08745Sheppo * vdc_create_fake_geometry() 74751ae08745Sheppo * 74761ae08745Sheppo * Description: 747717cadca8Slm66018 * This routine fakes up the disk info needed for some DKIO ioctls such 747817cadca8Slm66018 * as DKIOCINFO and DKIOCGMEDIAINFO [just like lofi(7D) and ramdisk(7D) do] 74791ae08745Sheppo * 748017cadca8Slm66018 * Note: This function must not be called until the vDisk attributes have 748117cadca8Slm66018 * been exchanged as part of the handshake with the vDisk server. 74821ae08745Sheppo * 74831ae08745Sheppo * Arguments: 74841ae08745Sheppo * vdc - soft state pointer for this instance of the device driver. 74851ae08745Sheppo * 74861ae08745Sheppo * Return Code: 748778fcd0a1Sachartre * none. 74881ae08745Sheppo */ 748978fcd0a1Sachartre static void 74901ae08745Sheppo vdc_create_fake_geometry(vdc_t *vdc) 74911ae08745Sheppo { 74921ae08745Sheppo ASSERT(vdc != NULL); 749378fcd0a1Sachartre ASSERT(vdc->max_xfer_sz != 0); 74940d0c8d4bSnarayan 74950d0c8d4bSnarayan /* 74961ae08745Sheppo * DKIOCINFO support 74971ae08745Sheppo */ 749878fcd0a1Sachartre if (vdc->cinfo == NULL) 74991ae08745Sheppo vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 75001ae08745Sheppo 75011ae08745Sheppo (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 75021ae08745Sheppo (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 75038e6a2a04Slm66018 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 75048e6a2a04Slm66018 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 75052f5224aeSachartre 750687a7269eSachartre /* 75072f5224aeSachartre * We set the controller type to DKC_SCSI_CCS only if the VD_OP_SCSICMD 75082f5224aeSachartre * operation is supported, otherwise the controller type is DKC_DIRECT. 75092f5224aeSachartre * Version 1.0 does not support the VD_OP_SCSICMD operation, so the 75102f5224aeSachartre * controller type is always DKC_DIRECT in that case. 75112f5224aeSachartre * 751217cadca8Slm66018 * If the virtual disk is backed by a physical CD/DVD device or 751317cadca8Slm66018 * an ISO image, modify the controller type to indicate this 751487a7269eSachartre */ 751517cadca8Slm66018 switch (vdc->vdisk_media) { 751617cadca8Slm66018 case VD_MEDIA_CD: 751717cadca8Slm66018 case VD_MEDIA_DVD: 751817cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_CDROM; 751917cadca8Slm66018 break; 752017cadca8Slm66018 case VD_MEDIA_FIXED: 75212f5224aeSachartre if (VD_OP_SUPPORTED(vdc->operations, VD_OP_SCSICMD)) 75222f5224aeSachartre vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 75232f5224aeSachartre else 752487a7269eSachartre vdc->cinfo->dki_ctype = DKC_DIRECT; 752517cadca8Slm66018 break; 752617cadca8Slm66018 default: 752717cadca8Slm66018 /* in the case of v1.0 we default to a fixed disk */ 752817cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_DIRECT; 752917cadca8Slm66018 break; 753017cadca8Slm66018 } 75311ae08745Sheppo vdc->cinfo->dki_flags = DKI_FMTVOL; 75321ae08745Sheppo vdc->cinfo->dki_cnum = 0; 75331ae08745Sheppo vdc->cinfo->dki_addr = 0; 75341ae08745Sheppo vdc->cinfo->dki_space = 0; 75351ae08745Sheppo vdc->cinfo->dki_prio = 0; 75361ae08745Sheppo vdc->cinfo->dki_vec = 0; 75371ae08745Sheppo vdc->cinfo->dki_unit = vdc->instance; 75381ae08745Sheppo vdc->cinfo->dki_slave = 0; 75391ae08745Sheppo /* 75401ae08745Sheppo * The partition number will be created on the fly depending on the 75411ae08745Sheppo * actual slice (i.e. minor node) that is used to request the data. 75421ae08745Sheppo */ 75431ae08745Sheppo vdc->cinfo->dki_partition = 0; 75441ae08745Sheppo 75451ae08745Sheppo /* 75461ae08745Sheppo * DKIOCGMEDIAINFO support 75471ae08745Sheppo */ 75480a55fbb7Slm66018 if (vdc->minfo == NULL) 75491ae08745Sheppo vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 755017cadca8Slm66018 755117cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) { 755217cadca8Slm66018 vdc->minfo->dki_media_type = 755317cadca8Slm66018 VD_MEDIATYPE2DK_MEDIATYPE(vdc->vdisk_media); 755417cadca8Slm66018 } else { 75551ae08745Sheppo vdc->minfo->dki_media_type = DK_FIXED_DISK; 755617cadca8Slm66018 } 755717cadca8Slm66018 75584bac2208Snarayan vdc->minfo->dki_capacity = vdc->vdisk_size; 755917cadca8Slm66018 vdc->minfo->dki_lbsize = vdc->block_size; 756078fcd0a1Sachartre } 75611ae08745Sheppo 756278fcd0a1Sachartre static ushort_t 756378fcd0a1Sachartre vdc_lbl2cksum(struct dk_label *label) 756478fcd0a1Sachartre { 756578fcd0a1Sachartre int count; 756678fcd0a1Sachartre ushort_t sum, *sp; 756778fcd0a1Sachartre 756878fcd0a1Sachartre count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 756978fcd0a1Sachartre sp = (ushort_t *)label; 757078fcd0a1Sachartre sum = 0; 757178fcd0a1Sachartre while (count--) { 757278fcd0a1Sachartre sum ^= *sp++; 757378fcd0a1Sachartre } 757478fcd0a1Sachartre 757578fcd0a1Sachartre return (sum); 75760a55fbb7Slm66018 } 75770a55fbb7Slm66018 7578*de3a5331SRamesh Chitrothu static void 7579*de3a5331SRamesh Chitrothu vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size) 7580*de3a5331SRamesh Chitrothu { 7581*de3a5331SRamesh Chitrothu vd_err_stats_t *stp; 7582*de3a5331SRamesh Chitrothu 7583*de3a5331SRamesh Chitrothu ASSERT(MUTEX_HELD(&vdc->lock)); 7584*de3a5331SRamesh Chitrothu ASSERT(xfr_size != 0); 7585*de3a5331SRamesh Chitrothu 7586*de3a5331SRamesh Chitrothu /* 7587*de3a5331SRamesh Chitrothu * If the disk size is unknown or sizes are unchanged then don't 7588*de3a5331SRamesh Chitrothu * update anything. 7589*de3a5331SRamesh Chitrothu */ 7590*de3a5331SRamesh Chitrothu if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || 7591*de3a5331SRamesh Chitrothu (blk_size == vdc->block_size && dsk_size == vdc->vdisk_size && 7592*de3a5331SRamesh Chitrothu xfr_size == vdc->max_xfer_sz)) 7593*de3a5331SRamesh Chitrothu return; 7594*de3a5331SRamesh Chitrothu 7595*de3a5331SRamesh Chitrothu /* 7596*de3a5331SRamesh Chitrothu * We don't know at compile time what the vDisk server will think 7597*de3a5331SRamesh Chitrothu * are good values but we apply a large (arbitrary) upper bound to 7598*de3a5331SRamesh Chitrothu * prevent memory exhaustion in vdc if it was allocating a DRing 7599*de3a5331SRamesh Chitrothu * based of huge values sent by the server. We probably will never 7600*de3a5331SRamesh Chitrothu * exceed this except if the message was garbage. 7601*de3a5331SRamesh Chitrothu */ 7602*de3a5331SRamesh Chitrothu if ((xfr_size * blk_size) > (PAGESIZE * DEV_BSIZE)) { 7603*de3a5331SRamesh Chitrothu DMSG(vdc, 0, "[%d] vds block transfer size too big;" 7604*de3a5331SRamesh Chitrothu " using max supported by vdc", vdc->instance); 7605*de3a5331SRamesh Chitrothu xfr_size = maxphys / DEV_BSIZE; 7606*de3a5331SRamesh Chitrothu dsk_size = (dsk_size * blk_size) / DEV_BSIZE; 7607*de3a5331SRamesh Chitrothu blk_size = DEV_BSIZE; 7608*de3a5331SRamesh Chitrothu } 7609*de3a5331SRamesh Chitrothu 7610*de3a5331SRamesh Chitrothu vdc->max_xfer_sz = xfr_size; 7611*de3a5331SRamesh Chitrothu vdc->block_size = blk_size; 7612*de3a5331SRamesh Chitrothu vdc->vdisk_size = dsk_size; 7613*de3a5331SRamesh Chitrothu 7614*de3a5331SRamesh Chitrothu stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 7615*de3a5331SRamesh Chitrothu stp->vd_capacity.value.ui64 = dsk_size * blk_size; 7616*de3a5331SRamesh Chitrothu 7617*de3a5331SRamesh Chitrothu vdc->minfo->dki_capacity = dsk_size; 7618*de3a5331SRamesh Chitrothu vdc->minfo->dki_lbsize = (uint_t)blk_size; 7619*de3a5331SRamesh Chitrothu } 7620*de3a5331SRamesh Chitrothu 76210a55fbb7Slm66018 /* 76220a55fbb7Slm66018 * Function: 762378fcd0a1Sachartre * vdc_validate_geometry 76240a55fbb7Slm66018 * 76250a55fbb7Slm66018 * Description: 762678fcd0a1Sachartre * This routine discovers the label and geometry of the disk. It stores 762778fcd0a1Sachartre * the disk label and related information in the vdc structure. If it 762878fcd0a1Sachartre * fails to validate the geometry or to discover the disk label then 762978fcd0a1Sachartre * the label is marked as unknown (VD_DISK_LABEL_UNK). 76300a55fbb7Slm66018 * 76310a55fbb7Slm66018 * Arguments: 76320a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 76330a55fbb7Slm66018 * 76340a55fbb7Slm66018 * Return Code: 763578fcd0a1Sachartre * 0 - success. 763678fcd0a1Sachartre * EINVAL - unknown disk label. 763778fcd0a1Sachartre * ENOTSUP - geometry not applicable (EFI label). 763878fcd0a1Sachartre * EIO - error accessing the disk. 76390a55fbb7Slm66018 */ 76400a55fbb7Slm66018 static int 764178fcd0a1Sachartre vdc_validate_geometry(vdc_t *vdc) 76420a55fbb7Slm66018 { 7643d10e4ef2Snarayan buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 76440a55fbb7Slm66018 dev_t dev; 76452f5224aeSachartre int rv, rval; 764678fcd0a1Sachartre struct dk_label label; 764778fcd0a1Sachartre struct dk_geom geom; 764878fcd0a1Sachartre struct vtoc vtoc; 7649edcc0754Sachartre efi_gpt_t *gpt; 7650edcc0754Sachartre efi_gpe_t *gpe; 7651edcc0754Sachartre vd_efi_dev_t edev; 76520a55fbb7Slm66018 76530a55fbb7Slm66018 ASSERT(vdc != NULL); 765478fcd0a1Sachartre ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 765578fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 76560a55fbb7Slm66018 765778fcd0a1Sachartre mutex_exit(&vdc->lock); 7658*de3a5331SRamesh Chitrothu /* 7659*de3a5331SRamesh Chitrothu * Check the disk capacity in case it has changed. If that fails then 7660*de3a5331SRamesh Chitrothu * we proceed and we will be using the disk size we currently have. 7661*de3a5331SRamesh Chitrothu */ 7662*de3a5331SRamesh Chitrothu (void) vdc_check_capacity(vdc); 76630a55fbb7Slm66018 dev = makedevice(ddi_driver_major(vdc->dip), 76640a55fbb7Slm66018 VD_MAKE_DEV(vdc->instance, 0)); 76654bac2208Snarayan 76662f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL, &rval); 766778fcd0a1Sachartre if (rv == 0) 76682f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, 76692f5224aeSachartre FKIOCTL, &rval); 76700d0c8d4bSnarayan 76714bac2208Snarayan if (rv == ENOTSUP) { 76724bac2208Snarayan /* 76734bac2208Snarayan * If the device does not support VTOC then we try 76744bac2208Snarayan * to read an EFI label. 7675edcc0754Sachartre * 7676edcc0754Sachartre * We need to know the block size and the disk size to 7677edcc0754Sachartre * be able to read an EFI label. 76784bac2208Snarayan */ 7679edcc0754Sachartre if (vdc->vdisk_size == 0) { 7680edcc0754Sachartre mutex_enter(&vdc->lock); 7681edcc0754Sachartre vdc_store_label_unk(vdc); 7682*de3a5331SRamesh Chitrothu return (EIO); 7683edcc0754Sachartre } 76844bac2208Snarayan 7685edcc0754Sachartre VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 7686edcc0754Sachartre 7687edcc0754Sachartre rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe); 76884bac2208Snarayan 76894bac2208Snarayan if (rv) { 76903af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 76914bac2208Snarayan vdc->instance, rv); 769278fcd0a1Sachartre mutex_enter(&vdc->lock); 769378fcd0a1Sachartre vdc_store_label_unk(vdc); 769478fcd0a1Sachartre return (EIO); 769578fcd0a1Sachartre } 769678fcd0a1Sachartre 769778fcd0a1Sachartre mutex_enter(&vdc->lock); 7698edcc0754Sachartre vdc_store_label_efi(vdc, gpt, gpe); 7699edcc0754Sachartre vd_efi_free(&edev, gpt, gpe); 770078fcd0a1Sachartre return (ENOTSUP); 770178fcd0a1Sachartre } 770278fcd0a1Sachartre 770378fcd0a1Sachartre if (rv != 0) { 770478fcd0a1Sachartre DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 770578fcd0a1Sachartre vdc->instance, rv); 770678fcd0a1Sachartre mutex_enter(&vdc->lock); 770778fcd0a1Sachartre vdc_store_label_unk(vdc); 770878fcd0a1Sachartre if (rv != EINVAL) 770978fcd0a1Sachartre rv = EIO; 77104bac2208Snarayan return (rv); 77114bac2208Snarayan } 77124bac2208Snarayan 771378fcd0a1Sachartre /* check that geometry and vtoc are valid */ 771478fcd0a1Sachartre if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 771578fcd0a1Sachartre vtoc.v_sanity != VTOC_SANE) { 771678fcd0a1Sachartre mutex_enter(&vdc->lock); 771778fcd0a1Sachartre vdc_store_label_unk(vdc); 771878fcd0a1Sachartre return (EINVAL); 771978fcd0a1Sachartre } 77204bac2208Snarayan 772178fcd0a1Sachartre /* 772278fcd0a1Sachartre * We have a disk and a valid VTOC. However this does not mean 772378fcd0a1Sachartre * that the disk currently have a VTOC label. The returned VTOC may 772478fcd0a1Sachartre * be a default VTOC to be used for configuring the disk (this is 772578fcd0a1Sachartre * what is done for disk image). So we read the label from the 772678fcd0a1Sachartre * beginning of the disk to ensure we really have a VTOC label. 772778fcd0a1Sachartre * 772878fcd0a1Sachartre * FUTURE: This could be the default way for reading the VTOC 772978fcd0a1Sachartre * from the disk as opposed to sending the VD_OP_GET_VTOC 773078fcd0a1Sachartre * to the server. This will be the default if vdc is implemented 773178fcd0a1Sachartre * ontop of cmlb. 773278fcd0a1Sachartre */ 773378fcd0a1Sachartre 773478fcd0a1Sachartre /* 773578fcd0a1Sachartre * Single slice disk does not support read using an absolute disk 773678fcd0a1Sachartre * offset so we just rely on the DKIOCGVTOC ioctl in that case. 773778fcd0a1Sachartre */ 773878fcd0a1Sachartre if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 773978fcd0a1Sachartre mutex_enter(&vdc->lock); 774078fcd0a1Sachartre if (vtoc.v_nparts != 1) { 774178fcd0a1Sachartre vdc_store_label_unk(vdc); 774278fcd0a1Sachartre return (EINVAL); 774378fcd0a1Sachartre } 774478fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 77454bac2208Snarayan return (0); 77464bac2208Snarayan } 77474bac2208Snarayan 774878fcd0a1Sachartre if (vtoc.v_nparts != V_NUMPAR) { 774978fcd0a1Sachartre mutex_enter(&vdc->lock); 775078fcd0a1Sachartre vdc_store_label_unk(vdc); 775178fcd0a1Sachartre return (EINVAL); 77520a55fbb7Slm66018 } 7753d10e4ef2Snarayan 7754d10e4ef2Snarayan /* 7755d10e4ef2Snarayan * Read disk label from start of disk 7756d10e4ef2Snarayan */ 7757d10e4ef2Snarayan buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 7758d10e4ef2Snarayan bioinit(buf); 775978fcd0a1Sachartre buf->b_un.b_addr = (caddr_t)&label; 7760d10e4ef2Snarayan buf->b_bcount = DK_LABEL_SIZE; 7761d10e4ef2Snarayan buf->b_flags = B_BUSY | B_READ; 776217cadca8Slm66018 buf->b_dev = cmpdev(dev); 776378fcd0a1Sachartre rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label, 776478fcd0a1Sachartre DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); 77653af08d82Slm66018 if (rv) { 77663af08d82Slm66018 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 77673af08d82Slm66018 vdc->instance); 776878fcd0a1Sachartre } else { 7769d10e4ef2Snarayan rv = biowait(buf); 7770d10e4ef2Snarayan biofini(buf); 777178fcd0a1Sachartre } 7772d10e4ef2Snarayan kmem_free(buf, sizeof (buf_t)); 77730a55fbb7Slm66018 777478fcd0a1Sachartre if (rv != 0 || label.dkl_magic != DKL_MAGIC || 777578fcd0a1Sachartre label.dkl_cksum != vdc_lbl2cksum(&label)) { 777678fcd0a1Sachartre DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 777778fcd0a1Sachartre vdc->instance); 777878fcd0a1Sachartre mutex_enter(&vdc->lock); 777978fcd0a1Sachartre vdc_store_label_unk(vdc); 778078fcd0a1Sachartre return (EINVAL); 778178fcd0a1Sachartre } 778278fcd0a1Sachartre 778378fcd0a1Sachartre mutex_enter(&vdc->lock); 778478fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 778578fcd0a1Sachartre return (0); 778678fcd0a1Sachartre } 778778fcd0a1Sachartre 778878fcd0a1Sachartre /* 778978fcd0a1Sachartre * Function: 779078fcd0a1Sachartre * vdc_validate 779178fcd0a1Sachartre * 779278fcd0a1Sachartre * Description: 779378fcd0a1Sachartre * This routine discovers the label of the disk and create the 779478fcd0a1Sachartre * appropriate device nodes if the label has changed. 779578fcd0a1Sachartre * 779678fcd0a1Sachartre * Arguments: 779778fcd0a1Sachartre * vdc - soft state pointer for this instance of the device driver. 779878fcd0a1Sachartre * 779978fcd0a1Sachartre * Return Code: 780078fcd0a1Sachartre * none. 780178fcd0a1Sachartre */ 780278fcd0a1Sachartre static void 780378fcd0a1Sachartre vdc_validate(vdc_t *vdc) 780478fcd0a1Sachartre { 780578fcd0a1Sachartre vd_disk_label_t old_label; 7806edcc0754Sachartre vd_slice_t old_slice[V_NUMPAR]; 780778fcd0a1Sachartre int rv; 780878fcd0a1Sachartre 780978fcd0a1Sachartre ASSERT(!MUTEX_HELD(&vdc->lock)); 781078fcd0a1Sachartre 781178fcd0a1Sachartre mutex_enter(&vdc->lock); 781278fcd0a1Sachartre 781378fcd0a1Sachartre /* save the current label and vtoc */ 781478fcd0a1Sachartre old_label = vdc->vdisk_label; 7815edcc0754Sachartre bcopy(vdc->slice, &old_slice, sizeof (vd_slice_t) * V_NUMPAR); 781678fcd0a1Sachartre 781778fcd0a1Sachartre /* check the geometry */ 781878fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 781978fcd0a1Sachartre 782078fcd0a1Sachartre /* if the disk label has changed, update device nodes */ 782178fcd0a1Sachartre if (vdc->vdisk_label != old_label) { 782278fcd0a1Sachartre 782378fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 782478fcd0a1Sachartre rv = vdc_create_device_nodes_efi(vdc); 782578fcd0a1Sachartre else 782678fcd0a1Sachartre rv = vdc_create_device_nodes_vtoc(vdc); 782778fcd0a1Sachartre 782878fcd0a1Sachartre if (rv != 0) { 782978fcd0a1Sachartre DMSG(vdc, 0, "![%d] Failed to update device nodes", 783078fcd0a1Sachartre vdc->instance); 783178fcd0a1Sachartre } 783278fcd0a1Sachartre } 783378fcd0a1Sachartre 783478fcd0a1Sachartre mutex_exit(&vdc->lock); 783578fcd0a1Sachartre } 783678fcd0a1Sachartre 783778fcd0a1Sachartre static void 783878fcd0a1Sachartre vdc_validate_task(void *arg) 783978fcd0a1Sachartre { 784078fcd0a1Sachartre vdc_t *vdc = (vdc_t *)arg; 784178fcd0a1Sachartre 784278fcd0a1Sachartre vdc_validate(vdc); 784378fcd0a1Sachartre 784478fcd0a1Sachartre mutex_enter(&vdc->lock); 784578fcd0a1Sachartre ASSERT(vdc->validate_pending > 0); 784678fcd0a1Sachartre vdc->validate_pending--; 784778fcd0a1Sachartre mutex_exit(&vdc->lock); 78481ae08745Sheppo } 78494bac2208Snarayan 78504bac2208Snarayan /* 78514bac2208Snarayan * Function: 78524bac2208Snarayan * vdc_setup_devid() 78534bac2208Snarayan * 78544bac2208Snarayan * Description: 78554bac2208Snarayan * This routine discovers the devid of a vDisk. It requests the devid of 78564bac2208Snarayan * the underlying device from the vDisk server, builds an encapsulated 78574bac2208Snarayan * devid based on the retrieved devid and registers that new devid to 78584bac2208Snarayan * the vDisk. 78594bac2208Snarayan * 78604bac2208Snarayan * Arguments: 78614bac2208Snarayan * vdc - soft state pointer for this instance of the device driver. 78624bac2208Snarayan * 78634bac2208Snarayan * Return Code: 78644bac2208Snarayan * 0 - A devid was succesfully registered for the vDisk 78654bac2208Snarayan */ 78664bac2208Snarayan static int 78674bac2208Snarayan vdc_setup_devid(vdc_t *vdc) 78684bac2208Snarayan { 78694bac2208Snarayan int rv; 78704bac2208Snarayan vd_devid_t *vd_devid; 78714bac2208Snarayan size_t bufsize, bufid_len; 78724bac2208Snarayan 78734bac2208Snarayan /* 78744bac2208Snarayan * At first sight, we don't know the size of the devid that the 78754bac2208Snarayan * server will return but this size will be encoded into the 78764bac2208Snarayan * reply. So we do a first request using a default size then we 78774bac2208Snarayan * check if this size was large enough. If not then we do a second 78784bac2208Snarayan * request with the correct size returned by the server. Note that 78794bac2208Snarayan * ldc requires size to be 8-byte aligned. 78804bac2208Snarayan */ 78814bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 78824bac2208Snarayan sizeof (uint64_t)); 78834bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 78844bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 78854bac2208Snarayan 78863af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 78872f5224aeSachartre bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir, B_TRUE); 78883af08d82Slm66018 78893af08d82Slm66018 DMSG(vdc, 2, "sync_op returned %d\n", rv); 78903af08d82Slm66018 78914bac2208Snarayan if (rv) { 78924bac2208Snarayan kmem_free(vd_devid, bufsize); 78934bac2208Snarayan return (rv); 78944bac2208Snarayan } 78954bac2208Snarayan 78964bac2208Snarayan if (vd_devid->length > bufid_len) { 78974bac2208Snarayan /* 78984bac2208Snarayan * The returned devid is larger than the buffer used. Try again 78994bac2208Snarayan * with a buffer with the right size. 79004bac2208Snarayan */ 79014bac2208Snarayan kmem_free(vd_devid, bufsize); 79024bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 79034bac2208Snarayan sizeof (uint64_t)); 79044bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 79054bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 79064bac2208Snarayan 79073af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 79083af08d82Slm66018 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 79092f5224aeSachartre VIO_both_dir, B_TRUE); 79103af08d82Slm66018 79114bac2208Snarayan if (rv) { 79124bac2208Snarayan kmem_free(vd_devid, bufsize); 79134bac2208Snarayan return (rv); 79144bac2208Snarayan } 79154bac2208Snarayan } 79164bac2208Snarayan 79174bac2208Snarayan /* 79184bac2208Snarayan * The virtual disk should have the same device id as the one associated 79194bac2208Snarayan * with the physical disk it is mapped on, otherwise sharing a disk 79204bac2208Snarayan * between a LDom and a non-LDom may not work (for example for a shared 79214bac2208Snarayan * SVM disk set). 79224bac2208Snarayan * 79234bac2208Snarayan * The DDI framework does not allow creating a device id with any 79244bac2208Snarayan * type so we first create a device id of type DEVID_ENCAP and then 79254bac2208Snarayan * we restore the orignal type of the physical device. 79264bac2208Snarayan */ 79274bac2208Snarayan 79283af08d82Slm66018 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 79293af08d82Slm66018 79304bac2208Snarayan /* build an encapsulated devid based on the returned devid */ 79314bac2208Snarayan if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 79324bac2208Snarayan vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 79333af08d82Slm66018 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 79344bac2208Snarayan kmem_free(vd_devid, bufsize); 79354bac2208Snarayan return (1); 79364bac2208Snarayan } 79374bac2208Snarayan 79384bac2208Snarayan DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 79394bac2208Snarayan 79404bac2208Snarayan ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 79414bac2208Snarayan 79424bac2208Snarayan kmem_free(vd_devid, bufsize); 79434bac2208Snarayan 79444bac2208Snarayan if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 79453af08d82Slm66018 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 79464bac2208Snarayan return (1); 79474bac2208Snarayan } 79484bac2208Snarayan 79494bac2208Snarayan return (0); 79504bac2208Snarayan } 79514bac2208Snarayan 79524bac2208Snarayan static void 7953edcc0754Sachartre vdc_store_label_efi(vdc_t *vdc, efi_gpt_t *gpt, efi_gpe_t *gpe) 79544bac2208Snarayan { 7955edcc0754Sachartre int i, nparts; 79564bac2208Snarayan 795778fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 795878fcd0a1Sachartre 795978fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_EFI; 7960edcc0754Sachartre bzero(vdc->vtoc, sizeof (struct vtoc)); 796178fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 7962edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 7963edcc0754Sachartre 7964edcc0754Sachartre nparts = gpt->efi_gpt_NumberOfPartitionEntries; 7965edcc0754Sachartre 7966edcc0754Sachartre for (i = 0; i < nparts && i < VD_EFI_WD_SLICE; i++) { 7967edcc0754Sachartre 7968edcc0754Sachartre if (gpe[i].efi_gpe_StartingLBA == 0 || 7969edcc0754Sachartre gpe[i].efi_gpe_EndingLBA == 0) { 7970edcc0754Sachartre continue; 79714bac2208Snarayan } 7972edcc0754Sachartre 7973edcc0754Sachartre vdc->slice[i].start = gpe[i].efi_gpe_StartingLBA; 7974edcc0754Sachartre vdc->slice[i].nblocks = gpe[i].efi_gpe_EndingLBA - 7975edcc0754Sachartre gpe[i].efi_gpe_StartingLBA + 1; 7976edcc0754Sachartre } 7977edcc0754Sachartre 7978edcc0754Sachartre ASSERT(vdc->vdisk_size != 0); 7979edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].start = 0; 7980edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].nblocks = vdc->vdisk_size; 7981edcc0754Sachartre 79824bac2208Snarayan } 798378fcd0a1Sachartre 798478fcd0a1Sachartre static void 798578fcd0a1Sachartre vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct vtoc *vtoc) 798678fcd0a1Sachartre { 7987edcc0754Sachartre int i; 7988edcc0754Sachartre 798978fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 7990edcc0754Sachartre ASSERT(vdc->block_size == vtoc->v_sectorsz); 799178fcd0a1Sachartre 799278fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_VTOC; 799378fcd0a1Sachartre bcopy(vtoc, vdc->vtoc, sizeof (struct vtoc)); 799478fcd0a1Sachartre bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 7995edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 7996edcc0754Sachartre 7997edcc0754Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 7998edcc0754Sachartre vdc->slice[i].start = vtoc->v_part[i].p_start; 7999edcc0754Sachartre vdc->slice[i].nblocks = vtoc->v_part[i].p_size; 8000edcc0754Sachartre } 800178fcd0a1Sachartre } 800278fcd0a1Sachartre 800378fcd0a1Sachartre static void 800478fcd0a1Sachartre vdc_store_label_unk(vdc_t *vdc) 800578fcd0a1Sachartre { 800678fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 800778fcd0a1Sachartre 800878fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_UNK; 800978fcd0a1Sachartre bzero(vdc->vtoc, sizeof (struct vtoc)); 801078fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 8011edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 801278fcd0a1Sachartre } 8013