11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23d84f0041SAlexandre Chartre * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo /* 281ae08745Sheppo * LDoms virtual disk client (vdc) device driver 291ae08745Sheppo * 301ae08745Sheppo * This driver runs on a guest logical domain and communicates with the virtual 311ae08745Sheppo * disk server (vds) driver running on the service domain which is exporting 321ae08745Sheppo * virtualized "disks" to the guest logical domain. 331ae08745Sheppo * 341ae08745Sheppo * The driver can be divided into four sections: 351ae08745Sheppo * 361ae08745Sheppo * 1) generic device driver housekeeping 371ae08745Sheppo * _init, _fini, attach, detach, ops structures, etc. 381ae08745Sheppo * 391ae08745Sheppo * 2) communication channel setup 401ae08745Sheppo * Setup the communications link over the LDC channel that vdc uses to 411ae08745Sheppo * talk to the vDisk server. Initialise the descriptor ring which 421ae08745Sheppo * allows the LDC clients to transfer data via memory mappings. 431ae08745Sheppo * 441ae08745Sheppo * 3) Support exported to upper layers (filesystems, etc) 451ae08745Sheppo * The upper layers call into vdc via strategy(9E) and DKIO(7I) 461ae08745Sheppo * ioctl calls. vdc will copy the data to be written to the descriptor 471ae08745Sheppo * ring or maps the buffer to store the data read by the vDisk 481ae08745Sheppo * server into the descriptor ring. It then sends a message to the 491ae08745Sheppo * vDisk server requesting it to complete the operation. 501ae08745Sheppo * 511ae08745Sheppo * 4) Handling responses from vDisk server. 521ae08745Sheppo * The vDisk server will ACK some or all of the messages vdc sends to it 531ae08745Sheppo * (this is configured during the handshake). Upon receipt of an ACK 541ae08745Sheppo * vdc will check the descriptor ring and signal to the upper layer 551ae08745Sheppo * code waiting on the IO. 561ae08745Sheppo */ 571ae08745Sheppo 58e1ebb9ecSlm66018 #include <sys/atomic.h> 591ae08745Sheppo #include <sys/conf.h> 601ae08745Sheppo #include <sys/disp.h> 611ae08745Sheppo #include <sys/ddi.h> 621ae08745Sheppo #include <sys/dkio.h> 631ae08745Sheppo #include <sys/efi_partition.h> 641ae08745Sheppo #include <sys/fcntl.h> 651ae08745Sheppo #include <sys/file.h> 66366a92acSlm66018 #include <sys/kstat.h> 671ae08745Sheppo #include <sys/mach_descrip.h> 681ae08745Sheppo #include <sys/modctl.h> 691ae08745Sheppo #include <sys/mdeg.h> 701ae08745Sheppo #include <sys/note.h> 711ae08745Sheppo #include <sys/open.h> 72*00e3a3e9SAlexandre Chartre #include <sys/random.h> 73d10e4ef2Snarayan #include <sys/sdt.h> 741ae08745Sheppo #include <sys/stat.h> 751ae08745Sheppo #include <sys/sunddi.h> 761ae08745Sheppo #include <sys/types.h> 771ae08745Sheppo #include <sys/promif.h> 782f5224aeSachartre #include <sys/var.h> 791ae08745Sheppo #include <sys/vtoc.h> 801ae08745Sheppo #include <sys/archsystm.h> 811ae08745Sheppo #include <sys/sysmacros.h> 821ae08745Sheppo 831ae08745Sheppo #include <sys/cdio.h> 841ae08745Sheppo #include <sys/dktp/fdisk.h> 8587a7269eSachartre #include <sys/dktp/dadkio.h> 86*00e3a3e9SAlexandre Chartre #include <sys/fs/dv_node.h> 872f5224aeSachartre #include <sys/mhd.h> 881ae08745Sheppo #include <sys/scsi/generic/sense.h> 892f5224aeSachartre #include <sys/scsi/impl/uscsi.h> 902f5224aeSachartre #include <sys/scsi/impl/services.h> 912f5224aeSachartre #include <sys/scsi/targets/sddef.h> 921ae08745Sheppo 931ae08745Sheppo #include <sys/ldoms.h> 941ae08745Sheppo #include <sys/ldc.h> 951ae08745Sheppo #include <sys/vio_common.h> 961ae08745Sheppo #include <sys/vio_mailbox.h> 9717cadca8Slm66018 #include <sys/vio_util.h> 981ae08745Sheppo #include <sys/vdsk_common.h> 991ae08745Sheppo #include <sys/vdsk_mailbox.h> 1001ae08745Sheppo #include <sys/vdc.h> 1011ae08745Sheppo 102342440ecSPrasad Singamsetty #define VD_OLDVTOC_LIMIT 0x7fffffff 103342440ecSPrasad Singamsetty 1041ae08745Sheppo /* 1051ae08745Sheppo * function prototypes 1061ae08745Sheppo */ 1071ae08745Sheppo 1081ae08745Sheppo /* standard driver functions */ 1091ae08745Sheppo static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 1101ae08745Sheppo static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 1111ae08745Sheppo static int vdc_strategy(struct buf *buf); 1121ae08745Sheppo static int vdc_print(dev_t dev, char *str); 1131ae08745Sheppo static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 1141ae08745Sheppo static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 1151ae08745Sheppo static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 1161ae08745Sheppo static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 1171ae08745Sheppo cred_t *credp, int *rvalp); 1181ae08745Sheppo static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 1191ae08745Sheppo static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 1201ae08745Sheppo 1211ae08745Sheppo static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 1221ae08745Sheppo void *arg, void **resultp); 1231ae08745Sheppo static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 1241ae08745Sheppo static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 1255b98b509Sachartre static int vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, 1265b98b509Sachartre int mod_flags, char *name, caddr_t valuep, int *lengthp); 1271ae08745Sheppo 1281ae08745Sheppo /* setup */ 1290d0c8d4bSnarayan static void vdc_min(struct buf *bufp); 1300a55fbb7Slm66018 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 1318cd10891Snarayan static int vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr); 1321ae08745Sheppo static int vdc_start_ldc_connection(vdc_t *vdc); 1331ae08745Sheppo static int vdc_create_device_nodes(vdc_t *vdc); 1344bac2208Snarayan static int vdc_create_device_nodes_efi(vdc_t *vdc); 1354bac2208Snarayan static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 136366a92acSlm66018 static void vdc_create_io_kstats(vdc_t *vdc); 137366a92acSlm66018 static void vdc_create_err_kstats(vdc_t *vdc); 138366a92acSlm66018 static void vdc_set_err_kstats(vdc_t *vdc); 139655fd6a9Sachartre static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 1408cd10891Snarayan mde_cookie_t *vd_nodep); 1418cd10891Snarayan static int vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep); 1428cd10891Snarayan static void vdc_fini_ports(vdc_t *vdc); 1438cd10891Snarayan static void vdc_switch_server(vdc_t *vdcp); 1440a55fbb7Slm66018 static int vdc_do_ldc_up(vdc_t *vdc); 1458cd10891Snarayan static void vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr); 1461ae08745Sheppo static int vdc_init_descriptor_ring(vdc_t *vdc); 1471ae08745Sheppo static void vdc_destroy_descriptor_ring(vdc_t *vdc); 1484bac2208Snarayan static int vdc_setup_devid(vdc_t *vdc); 149edcc0754Sachartre static void vdc_store_label_efi(vdc_t *, efi_gpt_t *, efi_gpe_t *); 150342440ecSPrasad Singamsetty static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, 151342440ecSPrasad Singamsetty struct extvtoc *); 15278fcd0a1Sachartre static void vdc_store_label_unk(vdc_t *vdc); 15378fcd0a1Sachartre static boolean_t vdc_is_opened(vdc_t *vdc); 154de3a5331SRamesh Chitrothu static void vdc_update_size(vdc_t *vdc, size_t, size_t, size_t); 15565908c77Syu, larry liu - Sun Microsystems - Beijing China static int vdc_update_vio_bsize(vdc_t *vdc, uint32_t); 1561ae08745Sheppo 1571ae08745Sheppo /* handshake with vds */ 1580a55fbb7Slm66018 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 1593af08d82Slm66018 static int vdc_ver_negotiation(vdc_t *vdcp); 1601ae08745Sheppo static int vdc_init_attr_negotiation(vdc_t *vdc); 1613af08d82Slm66018 static int vdc_attr_negotiation(vdc_t *vdcp); 1621ae08745Sheppo static int vdc_init_dring_negotiate(vdc_t *vdc); 1633af08d82Slm66018 static int vdc_dring_negotiation(vdc_t *vdcp); 1643af08d82Slm66018 static int vdc_send_rdx(vdc_t *vdcp); 1653af08d82Slm66018 static int vdc_rdx_exchange(vdc_t *vdcp); 1660a55fbb7Slm66018 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 1671ae08745Sheppo 1680a55fbb7Slm66018 /* processing incoming messages from vDisk server */ 1691ae08745Sheppo static void vdc_process_msg_thread(vdc_t *vdc); 1703af08d82Slm66018 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 1713af08d82Slm66018 1720a55fbb7Slm66018 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 1733af08d82Slm66018 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 1740a55fbb7Slm66018 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 1750a55fbb7Slm66018 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 1760a55fbb7Slm66018 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 1773af08d82Slm66018 static int vdc_send_request(vdc_t *vdcp, int operation, 1783af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 179*00e3a3e9SAlexandre Chartre buf_t *bufp, vio_desc_direction_t dir, int flags); 1803af08d82Slm66018 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 1813af08d82Slm66018 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 1823af08d82Slm66018 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 183*00e3a3e9SAlexandre Chartre buf_t *bufp, vio_desc_direction_t dir, int flags); 1842f5224aeSachartre static int vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, 185*00e3a3e9SAlexandre Chartre size_t nbytes, int slice, diskaddr_t offset, 186*00e3a3e9SAlexandre Chartre vio_desc_direction_t dir, boolean_t); 187*00e3a3e9SAlexandre Chartre static int vdc_do_op(vdc_t *vdc, int op, caddr_t addr, size_t nbytes, 188*00e3a3e9SAlexandre Chartre int slice, diskaddr_t offset, struct buf *bufp, 189*00e3a3e9SAlexandre Chartre vio_desc_direction_t dir, int flags); 1903af08d82Slm66018 1913af08d82Slm66018 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 192*00e3a3e9SAlexandre Chartre static int vdc_drain_response(vdc_t *vdcp, struct buf *buf); 1931ae08745Sheppo static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 1943af08d82Slm66018 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 195e1ebb9ecSlm66018 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 1961ae08745Sheppo 1971ae08745Sheppo /* dkio */ 1982f5224aeSachartre static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, 1992f5224aeSachartre int *rvalp); 200edcc0754Sachartre static int vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg); 20178fcd0a1Sachartre static void vdc_create_fake_geometry(vdc_t *vdc); 20278fcd0a1Sachartre static int vdc_validate_geometry(vdc_t *vdc); 20378fcd0a1Sachartre static void vdc_validate(vdc_t *vdc); 20478fcd0a1Sachartre static void vdc_validate_task(void *arg); 205d10e4ef2Snarayan static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 206d10e4ef2Snarayan int mode, int dir); 2074bac2208Snarayan static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 2084bac2208Snarayan int mode, int dir); 2094bac2208Snarayan static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 2104bac2208Snarayan int mode, int dir); 211d10e4ef2Snarayan static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 212d10e4ef2Snarayan int mode, int dir); 213d10e4ef2Snarayan static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 214d10e4ef2Snarayan int mode, int dir); 215342440ecSPrasad Singamsetty static int vdc_get_extvtoc_convert(vdc_t *vdc, void *from, void *to, 216342440ecSPrasad Singamsetty int mode, int dir); 217342440ecSPrasad Singamsetty static int vdc_set_extvtoc_convert(vdc_t *vdc, void *from, void *to, 218342440ecSPrasad Singamsetty int mode, int dir); 219d10e4ef2Snarayan static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 220d10e4ef2Snarayan int mode, int dir); 221d10e4ef2Snarayan static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 222d10e4ef2Snarayan int mode, int dir); 2234bac2208Snarayan static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 2244bac2208Snarayan int mode, int dir); 2254bac2208Snarayan static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 2264bac2208Snarayan int mode, int dir); 2271ae08745Sheppo 2282f5224aeSachartre static void vdc_ownership_update(vdc_t *vdc, int ownership_flags); 229*00e3a3e9SAlexandre Chartre static int vdc_access_set(vdc_t *vdc, uint64_t flags); 230*00e3a3e9SAlexandre Chartre static vdc_io_t *vdc_eio_queue(vdc_t *vdc, int index); 231*00e3a3e9SAlexandre Chartre static void vdc_eio_unqueue(vdc_t *vdc, clock_t deadline, 232*00e3a3e9SAlexandre Chartre boolean_t complete_io); 233*00e3a3e9SAlexandre Chartre static int vdc_eio_check(vdc_t *vdc, int flags); 234*00e3a3e9SAlexandre Chartre static void vdc_eio_thread(void *arg); 2352f5224aeSachartre 2361ae08745Sheppo /* 2371ae08745Sheppo * Module variables 2381ae08745Sheppo */ 239e1ebb9ecSlm66018 240e1ebb9ecSlm66018 /* 241e1ebb9ecSlm66018 * Tunable variables to control how long vdc waits before timing out on 242e1ebb9ecSlm66018 * various operations 243e1ebb9ecSlm66018 */ 2443c96341aSnarayan static int vdc_hshake_retries = 3; 245e1ebb9ecSlm66018 246655fd6a9Sachartre static int vdc_timeout = 0; /* units: seconds */ 2478cd10891Snarayan static int vdc_ldcup_timeout = 1; /* units: seconds */ 248655fd6a9Sachartre 2493af08d82Slm66018 static uint64_t vdc_hz_min_ldc_delay; 2503af08d82Slm66018 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 2513af08d82Slm66018 static uint64_t vdc_hz_max_ldc_delay; 2523af08d82Slm66018 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 2533af08d82Slm66018 2543af08d82Slm66018 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 2553af08d82Slm66018 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 256e1ebb9ecSlm66018 257e1ebb9ecSlm66018 /* values for dumping - need to run in a tighter loop */ 258e1ebb9ecSlm66018 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 259e1ebb9ecSlm66018 static int vdc_dump_retries = 100; 260e1ebb9ecSlm66018 2612f5224aeSachartre static uint16_t vdc_scsi_timeout = 60; /* 60s units: seconds */ 2622f5224aeSachartre 2632f5224aeSachartre static uint64_t vdc_ownership_delay = 6 * MICROSEC; /* 6s units: usec */ 2642f5224aeSachartre 265e1ebb9ecSlm66018 /* Count of the number of vdc instances attached */ 266e1ebb9ecSlm66018 static volatile uint32_t vdc_instance_count = 0; 2671ae08745Sheppo 2682f5224aeSachartre /* Tunable to log all SCSI errors */ 2692f5224aeSachartre static boolean_t vdc_scsi_log_error = B_FALSE; 2702f5224aeSachartre 2711ae08745Sheppo /* Soft state pointer */ 2721ae08745Sheppo static void *vdc_state; 2731ae08745Sheppo 2743af08d82Slm66018 /* 2753af08d82Slm66018 * Controlling the verbosity of the error/debug messages 2763af08d82Slm66018 * 2773af08d82Slm66018 * vdc_msglevel - controls level of messages 2783af08d82Slm66018 * vdc_matchinst - 64-bit variable where each bit corresponds 2793af08d82Slm66018 * to the vdc instance the vdc_msglevel applies. 2803af08d82Slm66018 */ 2813af08d82Slm66018 int vdc_msglevel = 0x0; 2823af08d82Slm66018 uint64_t vdc_matchinst = 0ull; 2831ae08745Sheppo 2840a55fbb7Slm66018 /* 2850a55fbb7Slm66018 * Supported vDisk protocol version pairs. 2860a55fbb7Slm66018 * 2870a55fbb7Slm66018 * The first array entry is the latest and preferred version. 2880a55fbb7Slm66018 */ 28917cadca8Slm66018 static const vio_ver_t vdc_version[] = {{1, 1}}; 2901ae08745Sheppo 2911ae08745Sheppo static struct cb_ops vdc_cb_ops = { 2921ae08745Sheppo vdc_open, /* cb_open */ 2931ae08745Sheppo vdc_close, /* cb_close */ 2941ae08745Sheppo vdc_strategy, /* cb_strategy */ 2951ae08745Sheppo vdc_print, /* cb_print */ 2961ae08745Sheppo vdc_dump, /* cb_dump */ 2971ae08745Sheppo vdc_read, /* cb_read */ 2981ae08745Sheppo vdc_write, /* cb_write */ 2991ae08745Sheppo vdc_ioctl, /* cb_ioctl */ 3001ae08745Sheppo nodev, /* cb_devmap */ 3011ae08745Sheppo nodev, /* cb_mmap */ 3021ae08745Sheppo nodev, /* cb_segmap */ 3031ae08745Sheppo nochpoll, /* cb_chpoll */ 3045b98b509Sachartre vdc_prop_op, /* cb_prop_op */ 3051ae08745Sheppo NULL, /* cb_str */ 3061ae08745Sheppo D_MP | D_64BIT, /* cb_flag */ 3071ae08745Sheppo CB_REV, /* cb_rev */ 3081ae08745Sheppo vdc_aread, /* cb_aread */ 3091ae08745Sheppo vdc_awrite /* cb_awrite */ 3101ae08745Sheppo }; 3111ae08745Sheppo 3121ae08745Sheppo static struct dev_ops vdc_ops = { 3131ae08745Sheppo DEVO_REV, /* devo_rev */ 3141ae08745Sheppo 0, /* devo_refcnt */ 3151ae08745Sheppo vdc_getinfo, /* devo_getinfo */ 3161ae08745Sheppo nulldev, /* devo_identify */ 3171ae08745Sheppo nulldev, /* devo_probe */ 3181ae08745Sheppo vdc_attach, /* devo_attach */ 3191ae08745Sheppo vdc_detach, /* devo_detach */ 3201ae08745Sheppo nodev, /* devo_reset */ 3211ae08745Sheppo &vdc_cb_ops, /* devo_cb_ops */ 3221ae08745Sheppo NULL, /* devo_bus_ops */ 32319397407SSherry Moore nulldev, /* devo_power */ 32419397407SSherry Moore ddi_quiesce_not_needed, /* devo_quiesce */ 3251ae08745Sheppo }; 3261ae08745Sheppo 3271ae08745Sheppo static struct modldrv modldrv = { 3281ae08745Sheppo &mod_driverops, 329205eeb1aSlm66018 "virtual disk client", 3301ae08745Sheppo &vdc_ops, 3311ae08745Sheppo }; 3321ae08745Sheppo 3331ae08745Sheppo static struct modlinkage modlinkage = { 3341ae08745Sheppo MODREV_1, 3351ae08745Sheppo &modldrv, 3361ae08745Sheppo NULL 3371ae08745Sheppo }; 3381ae08745Sheppo 3391ae08745Sheppo /* -------------------------------------------------------------------------- */ 3401ae08745Sheppo 3411ae08745Sheppo /* 3421ae08745Sheppo * Device Driver housekeeping and setup 3431ae08745Sheppo */ 3441ae08745Sheppo 3451ae08745Sheppo int 3461ae08745Sheppo _init(void) 3471ae08745Sheppo { 3481ae08745Sheppo int status; 3491ae08745Sheppo 3501ae08745Sheppo if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 3511ae08745Sheppo return (status); 3521ae08745Sheppo if ((status = mod_install(&modlinkage)) != 0) 3531ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3541ae08745Sheppo return (status); 3551ae08745Sheppo } 3561ae08745Sheppo 3571ae08745Sheppo int 3581ae08745Sheppo _info(struct modinfo *modinfop) 3591ae08745Sheppo { 3601ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 3611ae08745Sheppo } 3621ae08745Sheppo 3631ae08745Sheppo int 3641ae08745Sheppo _fini(void) 3651ae08745Sheppo { 3661ae08745Sheppo int status; 3671ae08745Sheppo 3681ae08745Sheppo if ((status = mod_remove(&modlinkage)) != 0) 3691ae08745Sheppo return (status); 3701ae08745Sheppo ddi_soft_state_fini(&vdc_state); 3711ae08745Sheppo return (0); 3721ae08745Sheppo } 3731ae08745Sheppo 3741ae08745Sheppo static int 3751ae08745Sheppo vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 3761ae08745Sheppo { 3771ae08745Sheppo _NOTE(ARGUNUSED(dip)) 3781ae08745Sheppo 3790d0c8d4bSnarayan int instance = VDCUNIT((dev_t)arg); 3801ae08745Sheppo vdc_t *vdc = NULL; 3811ae08745Sheppo 3821ae08745Sheppo switch (cmd) { 3831ae08745Sheppo case DDI_INFO_DEVT2DEVINFO: 3841ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 3851ae08745Sheppo *resultp = NULL; 3861ae08745Sheppo return (DDI_FAILURE); 3871ae08745Sheppo } 3881ae08745Sheppo *resultp = vdc->dip; 3891ae08745Sheppo return (DDI_SUCCESS); 3901ae08745Sheppo case DDI_INFO_DEVT2INSTANCE: 3911ae08745Sheppo *resultp = (void *)(uintptr_t)instance; 3921ae08745Sheppo return (DDI_SUCCESS); 3931ae08745Sheppo default: 3941ae08745Sheppo *resultp = NULL; 3951ae08745Sheppo return (DDI_FAILURE); 3961ae08745Sheppo } 3971ae08745Sheppo } 3981ae08745Sheppo 3991ae08745Sheppo static int 4001ae08745Sheppo vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4011ae08745Sheppo { 402*00e3a3e9SAlexandre Chartre kt_did_t eio_tid, ownership_tid; 4031ae08745Sheppo int instance; 4041ae08745Sheppo int rv; 405d7400d00Sachartre vdc_server_t *srvr; 4061ae08745Sheppo vdc_t *vdc = NULL; 4071ae08745Sheppo 4081ae08745Sheppo switch (cmd) { 4091ae08745Sheppo case DDI_DETACH: 4101ae08745Sheppo /* the real work happens below */ 4111ae08745Sheppo break; 4121ae08745Sheppo case DDI_SUSPEND: 4131ae08745Sheppo /* nothing to do for this non-device */ 4141ae08745Sheppo return (DDI_SUCCESS); 4151ae08745Sheppo default: 4161ae08745Sheppo return (DDI_FAILURE); 4171ae08745Sheppo } 4181ae08745Sheppo 4191ae08745Sheppo ASSERT(cmd == DDI_DETACH); 4201ae08745Sheppo instance = ddi_get_instance(dip); 4213af08d82Slm66018 DMSGX(1, "[%d] Entered\n", instance); 4221ae08745Sheppo 4231ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 424e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 4251ae08745Sheppo return (DDI_FAILURE); 4261ae08745Sheppo } 4271ae08745Sheppo 428*00e3a3e9SAlexandre Chartre if (vdc_is_opened(vdc)) { 4293af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 4301ae08745Sheppo return (DDI_FAILURE); 4311ae08745Sheppo } 4321ae08745Sheppo 43378fcd0a1Sachartre if (vdc->dkio_flush_pending) { 43478fcd0a1Sachartre DMSG(vdc, 0, 43578fcd0a1Sachartre "[%d] Cannot detach: %d outstanding DKIO flushes\n", 43678fcd0a1Sachartre instance, vdc->dkio_flush_pending); 43778fcd0a1Sachartre return (DDI_FAILURE); 43878fcd0a1Sachartre } 43978fcd0a1Sachartre 44078fcd0a1Sachartre if (vdc->validate_pending) { 44178fcd0a1Sachartre DMSG(vdc, 0, 44278fcd0a1Sachartre "[%d] Cannot detach: %d outstanding validate request\n", 44378fcd0a1Sachartre instance, vdc->validate_pending); 44478fcd0a1Sachartre return (DDI_FAILURE); 44578fcd0a1Sachartre } 44678fcd0a1Sachartre 4473af08d82Slm66018 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 4483af08d82Slm66018 4492f5224aeSachartre /* If we took ownership, release ownership */ 4502f5224aeSachartre mutex_enter(&vdc->ownership_lock); 4512f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) { 452*00e3a3e9SAlexandre Chartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR); 4532f5224aeSachartre if (rv == 0) { 4542f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 4552f5224aeSachartre } 4562f5224aeSachartre } 4572f5224aeSachartre mutex_exit(&vdc->ownership_lock); 4582f5224aeSachartre 4593af08d82Slm66018 /* mark instance as detaching */ 4603af08d82Slm66018 vdc->lifecycle = VDC_LC_DETACHING; 4611ae08745Sheppo 4621ae08745Sheppo /* 463d7400d00Sachartre * Try and disable callbacks to prevent another handshake. We have to 464d7400d00Sachartre * disable callbacks for all servers. 4651ae08745Sheppo */ 466d7400d00Sachartre for (srvr = vdc->server_list; srvr != NULL; srvr = srvr->next) { 467d7400d00Sachartre rv = ldc_set_cb_mode(srvr->ldc_handle, LDC_CB_DISABLE); 468d7400d00Sachartre DMSG(vdc, 0, "callback disabled (ldc=%lu, rv=%d)\n", 469d7400d00Sachartre srvr->ldc_id, rv); 4708cd10891Snarayan } 4711ae08745Sheppo 4721ae08745Sheppo if (vdc->initialized & VDC_THREAD) { 4733af08d82Slm66018 mutex_enter(&vdc->read_lock); 4743af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 4753af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) { 4763af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 4773af08d82Slm66018 cv_signal(&vdc->read_cv); 4781ae08745Sheppo } 4793af08d82Slm66018 4803af08d82Slm66018 mutex_exit(&vdc->read_lock); 4813af08d82Slm66018 4823af08d82Slm66018 /* wake up any thread waiting for connection to come online */ 4833af08d82Slm66018 mutex_enter(&vdc->lock); 4843af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 4853af08d82Slm66018 DMSG(vdc, 0, 4863af08d82Slm66018 "[%d] write reset - move to resetting state...\n", 4873af08d82Slm66018 instance); 4883af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 4893af08d82Slm66018 cv_signal(&vdc->initwait_cv); 490*00e3a3e9SAlexandre Chartre } else if (vdc->state == VDC_STATE_FAILED) { 491*00e3a3e9SAlexandre Chartre vdc->io_pending = B_TRUE; 492*00e3a3e9SAlexandre Chartre cv_signal(&vdc->io_pending_cv); 4933af08d82Slm66018 } 4943af08d82Slm66018 mutex_exit(&vdc->lock); 4953af08d82Slm66018 4963af08d82Slm66018 /* now wait until state transitions to VDC_STATE_DETACH */ 4973af08d82Slm66018 thread_join(vdc->msg_proc_thr->t_did); 4983af08d82Slm66018 ASSERT(vdc->state == VDC_STATE_DETACH); 4993af08d82Slm66018 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 5003af08d82Slm66018 vdc->instance); 5011ae08745Sheppo } 5021ae08745Sheppo 5031ae08745Sheppo mutex_enter(&vdc->lock); 5041ae08745Sheppo 5051ae08745Sheppo if (vdc->initialized & VDC_DRING) 5061ae08745Sheppo vdc_destroy_descriptor_ring(vdc); 5071ae08745Sheppo 5088cd10891Snarayan vdc_fini_ports(vdc); 5091ae08745Sheppo 510*00e3a3e9SAlexandre Chartre if (vdc->eio_thread) { 511*00e3a3e9SAlexandre Chartre eio_tid = vdc->eio_thread->t_did; 5122f5224aeSachartre vdc->failfast_interval = 0; 513*00e3a3e9SAlexandre Chartre ASSERT(vdc->num_servers == 0); 514*00e3a3e9SAlexandre Chartre cv_signal(&vdc->eio_cv); 5152f5224aeSachartre } else { 516*00e3a3e9SAlexandre Chartre eio_tid = 0; 5172f5224aeSachartre } 5182f5224aeSachartre 5192f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_WANTED) { 5202f5224aeSachartre ownership_tid = vdc->ownership_thread->t_did; 5212f5224aeSachartre vdc->ownership = VDC_OWNERSHIP_NONE; 5222f5224aeSachartre cv_signal(&vdc->ownership_cv); 5232f5224aeSachartre } else { 5242f5224aeSachartre ownership_tid = 0; 5252f5224aeSachartre } 5262f5224aeSachartre 5271ae08745Sheppo mutex_exit(&vdc->lock); 5281ae08745Sheppo 529*00e3a3e9SAlexandre Chartre if (eio_tid != 0) 530*00e3a3e9SAlexandre Chartre thread_join(eio_tid); 5312f5224aeSachartre 5322f5224aeSachartre if (ownership_tid != 0) 5332f5224aeSachartre thread_join(ownership_tid); 5342f5224aeSachartre 5355b98b509Sachartre if (vdc->initialized & VDC_MINOR) 5361ae08745Sheppo ddi_remove_minor_node(dip, NULL); 5371ae08745Sheppo 538366a92acSlm66018 if (vdc->io_stats) { 539366a92acSlm66018 kstat_delete(vdc->io_stats); 540366a92acSlm66018 vdc->io_stats = NULL; 541366a92acSlm66018 } 542366a92acSlm66018 543366a92acSlm66018 if (vdc->err_stats) { 544366a92acSlm66018 kstat_delete(vdc->err_stats); 545366a92acSlm66018 vdc->err_stats = NULL; 546366a92acSlm66018 } 547366a92acSlm66018 5481ae08745Sheppo if (vdc->initialized & VDC_LOCKS) { 5491ae08745Sheppo mutex_destroy(&vdc->lock); 5503af08d82Slm66018 mutex_destroy(&vdc->read_lock); 5512f5224aeSachartre mutex_destroy(&vdc->ownership_lock); 5523af08d82Slm66018 cv_destroy(&vdc->initwait_cv); 5533af08d82Slm66018 cv_destroy(&vdc->dring_free_cv); 5543af08d82Slm66018 cv_destroy(&vdc->membind_cv); 5553af08d82Slm66018 cv_destroy(&vdc->sync_blocked_cv); 5563af08d82Slm66018 cv_destroy(&vdc->read_cv); 5573af08d82Slm66018 cv_destroy(&vdc->running_cv); 558*00e3a3e9SAlexandre Chartre cv_destroy(&vdc->io_pending_cv); 5592f5224aeSachartre cv_destroy(&vdc->ownership_cv); 560*00e3a3e9SAlexandre Chartre cv_destroy(&vdc->eio_cv); 5611ae08745Sheppo } 5621ae08745Sheppo 5631ae08745Sheppo if (vdc->minfo) 5641ae08745Sheppo kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 5651ae08745Sheppo 5661ae08745Sheppo if (vdc->cinfo) 5671ae08745Sheppo kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 5681ae08745Sheppo 5691ae08745Sheppo if (vdc->vtoc) 570342440ecSPrasad Singamsetty kmem_free(vdc->vtoc, sizeof (struct extvtoc)); 5711ae08745Sheppo 57278fcd0a1Sachartre if (vdc->geom) 57378fcd0a1Sachartre kmem_free(vdc->geom, sizeof (struct dk_geom)); 5740a55fbb7Slm66018 5754bac2208Snarayan if (vdc->devid) { 5764bac2208Snarayan ddi_devid_unregister(dip); 5774bac2208Snarayan ddi_devid_free(vdc->devid); 5784bac2208Snarayan } 5794bac2208Snarayan 5801ae08745Sheppo if (vdc->initialized & VDC_SOFT_STATE) 5811ae08745Sheppo ddi_soft_state_free(vdc_state, instance); 5821ae08745Sheppo 5833af08d82Slm66018 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 5841ae08745Sheppo 5851ae08745Sheppo return (DDI_SUCCESS); 5861ae08745Sheppo } 5871ae08745Sheppo 5881ae08745Sheppo 5891ae08745Sheppo static int 5901ae08745Sheppo vdc_do_attach(dev_info_t *dip) 5911ae08745Sheppo { 5921ae08745Sheppo int instance; 5931ae08745Sheppo vdc_t *vdc = NULL; 5941ae08745Sheppo int status; 595655fd6a9Sachartre md_t *mdp; 5968cd10891Snarayan mde_cookie_t vd_node; 5971ae08745Sheppo 5981ae08745Sheppo ASSERT(dip != NULL); 5991ae08745Sheppo 6001ae08745Sheppo instance = ddi_get_instance(dip); 6011ae08745Sheppo if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 602e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 603e1ebb9ecSlm66018 instance); 6041ae08745Sheppo return (DDI_FAILURE); 6051ae08745Sheppo } 6061ae08745Sheppo 6071ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 608e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 6091ae08745Sheppo return (DDI_FAILURE); 6101ae08745Sheppo } 6111ae08745Sheppo 6121ae08745Sheppo /* 6131ae08745Sheppo * We assign the value to initialized in this case to zero out the 6141ae08745Sheppo * variable and then set bits in it to indicate what has been done 6151ae08745Sheppo */ 6161ae08745Sheppo vdc->initialized = VDC_SOFT_STATE; 6171ae08745Sheppo 6183af08d82Slm66018 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 6193af08d82Slm66018 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 6201ae08745Sheppo 6211ae08745Sheppo vdc->dip = dip; 6221ae08745Sheppo vdc->instance = instance; 6231ae08745Sheppo vdc->vdisk_type = VD_DISK_TYPE_UNK; 6244bac2208Snarayan vdc->vdisk_label = VD_DISK_LABEL_UNK; 6253af08d82Slm66018 vdc->state = VDC_STATE_INIT; 6263af08d82Slm66018 vdc->lifecycle = VDC_LC_ATTACHING; 6271ae08745Sheppo vdc->session_id = 0; 62865908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vdisk_bsize = DEV_BSIZE; 62965908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = 0; 63065908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = 0; 63165908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->max_xfer_sz = maxphys / vdc->vdisk_bsize; 6321ae08745Sheppo 63317cadca8Slm66018 /* 63417cadca8Slm66018 * We assume, for now, that the vDisk server will export 'read' 63517cadca8Slm66018 * operations to us at a minimum (this is needed because of checks 63617cadca8Slm66018 * in vdc for supported operations early in the handshake process). 63717cadca8Slm66018 * The vDisk server will return ENOTSUP if this is not the case. 63817cadca8Slm66018 * The value will be overwritten during the attribute exchange with 63917cadca8Slm66018 * the bitmask of operations exported by server. 64017cadca8Slm66018 */ 64117cadca8Slm66018 vdc->operations = VD_OP_MASK_READ; 64217cadca8Slm66018 6431ae08745Sheppo vdc->vtoc = NULL; 64478fcd0a1Sachartre vdc->geom = NULL; 6451ae08745Sheppo vdc->cinfo = NULL; 6461ae08745Sheppo vdc->minfo = NULL; 6471ae08745Sheppo 6481ae08745Sheppo mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 6493af08d82Slm66018 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 6503af08d82Slm66018 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 6513af08d82Slm66018 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 6523af08d82Slm66018 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 653*00e3a3e9SAlexandre Chartre cv_init(&vdc->io_pending_cv, NULL, CV_DRIVER, NULL); 6543af08d82Slm66018 655*00e3a3e9SAlexandre Chartre vdc->io_pending = B_FALSE; 6563af08d82Slm66018 vdc->threads_pending = 0; 6573af08d82Slm66018 vdc->sync_op_blocked = B_FALSE; 6583af08d82Slm66018 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 6593af08d82Slm66018 6602f5224aeSachartre mutex_init(&vdc->ownership_lock, NULL, MUTEX_DRIVER, NULL); 6612f5224aeSachartre cv_init(&vdc->ownership_cv, NULL, CV_DRIVER, NULL); 662*00e3a3e9SAlexandre Chartre cv_init(&vdc->eio_cv, NULL, CV_DRIVER, NULL); 6632f5224aeSachartre 6643af08d82Slm66018 /* init blocking msg read functionality */ 6653af08d82Slm66018 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 6663af08d82Slm66018 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 6673af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 6683af08d82Slm66018 6691ae08745Sheppo vdc->initialized |= VDC_LOCKS; 6701ae08745Sheppo 671655fd6a9Sachartre /* get device and port MD node for this disk instance */ 6728cd10891Snarayan if (vdc_get_md_node(dip, &mdp, &vd_node) != 0) { 673655fd6a9Sachartre cmn_err(CE_NOTE, "[%d] Could not get machine description node", 674655fd6a9Sachartre instance); 675655fd6a9Sachartre return (DDI_FAILURE); 676655fd6a9Sachartre } 677655fd6a9Sachartre 6788cd10891Snarayan if (vdc_init_ports(vdc, mdp, vd_node) != 0) { 6798cd10891Snarayan cmn_err(CE_NOTE, "[%d] Error initialising ports", instance); 6808cd10891Snarayan return (DDI_FAILURE); 681655fd6a9Sachartre } 682655fd6a9Sachartre 683655fd6a9Sachartre (void) md_fini_handle(mdp); 684655fd6a9Sachartre 685de3a5331SRamesh Chitrothu /* Create the kstats for saving the I/O statistics used by iostat(1M) */ 686de3a5331SRamesh Chitrothu vdc_create_io_kstats(vdc); 687de3a5331SRamesh Chitrothu vdc_create_err_kstats(vdc); 688de3a5331SRamesh Chitrothu 689de3a5331SRamesh Chitrothu /* Initialize remaining structures before starting the msg thread */ 690de3a5331SRamesh Chitrothu vdc->vdisk_label = VD_DISK_LABEL_UNK; 691342440ecSPrasad Singamsetty vdc->vtoc = kmem_zalloc(sizeof (struct extvtoc), KM_SLEEP); 692de3a5331SRamesh Chitrothu vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 693de3a5331SRamesh Chitrothu vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 694de3a5331SRamesh Chitrothu 6953af08d82Slm66018 /* initialize the thread responsible for managing state with server */ 6963af08d82Slm66018 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 6971ae08745Sheppo vdc, 0, &p0, TS_RUN, minclsyspri); 6983af08d82Slm66018 if (vdc->msg_proc_thr == NULL) { 6991ae08745Sheppo cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 7001ae08745Sheppo instance); 7011ae08745Sheppo return (DDI_FAILURE); 7021ae08745Sheppo } 7033af08d82Slm66018 704*00e3a3e9SAlexandre Chartre /* 705*00e3a3e9SAlexandre Chartre * If there are multiple servers then start the eio thread. 706*00e3a3e9SAlexandre Chartre */ 707*00e3a3e9SAlexandre Chartre if (vdc->num_servers > 1) { 708*00e3a3e9SAlexandre Chartre vdc->eio_thread = thread_create(NULL, 0, vdc_eio_thread, vdc, 0, 709*00e3a3e9SAlexandre Chartre &p0, TS_RUN, v.v_maxsyspri - 2); 710*00e3a3e9SAlexandre Chartre if (vdc->eio_thread == NULL) { 711*00e3a3e9SAlexandre Chartre cmn_err(CE_NOTE, "[%d] Failed to create error " 712*00e3a3e9SAlexandre Chartre "I/O thread", instance); 713*00e3a3e9SAlexandre Chartre return (DDI_FAILURE); 714*00e3a3e9SAlexandre Chartre } 715*00e3a3e9SAlexandre Chartre } 716*00e3a3e9SAlexandre Chartre 7171ae08745Sheppo vdc->initialized |= VDC_THREAD; 7181ae08745Sheppo 719e1ebb9ecSlm66018 atomic_inc_32(&vdc_instance_count); 7201ae08745Sheppo 7210a55fbb7Slm66018 /* 72278fcd0a1Sachartre * Check the disk label. This will send requests and do the handshake. 72378fcd0a1Sachartre * We don't really care about the disk label now. What we really need is 72478fcd0a1Sachartre * the handshake do be done so that we know the type of the disk (slice 72578fcd0a1Sachartre * or full disk) and the appropriate device nodes can be created. 7260a55fbb7Slm66018 */ 72778fcd0a1Sachartre 72878fcd0a1Sachartre mutex_enter(&vdc->lock); 72978fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 73078fcd0a1Sachartre mutex_exit(&vdc->lock); 7311ae08745Sheppo 7321ae08745Sheppo /* 7335b98b509Sachartre * Now that we have the device info we can create the device nodes 7341ae08745Sheppo */ 7351ae08745Sheppo status = vdc_create_device_nodes(vdc); 7361ae08745Sheppo if (status) { 7373af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to create device nodes", 7381ae08745Sheppo instance); 7393af08d82Slm66018 goto return_status; 7401ae08745Sheppo } 7411ae08745Sheppo 7424bac2208Snarayan /* 743366a92acSlm66018 * Fill in the fields of the error statistics kstat that were not 744366a92acSlm66018 * available when creating the kstat 745366a92acSlm66018 */ 746366a92acSlm66018 vdc_set_err_kstats(vdc); 747366a92acSlm66018 7481ae08745Sheppo ddi_report_dev(dip); 7493af08d82Slm66018 vdc->lifecycle = VDC_LC_ONLINE; 7503af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 7511ae08745Sheppo 7523af08d82Slm66018 return_status: 7533af08d82Slm66018 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 7541ae08745Sheppo return (status); 7551ae08745Sheppo } 7561ae08745Sheppo 7571ae08745Sheppo static int 7581ae08745Sheppo vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 7591ae08745Sheppo { 7601ae08745Sheppo int status; 7611ae08745Sheppo 7621ae08745Sheppo switch (cmd) { 7631ae08745Sheppo case DDI_ATTACH: 7641ae08745Sheppo if ((status = vdc_do_attach(dip)) != 0) 7651ae08745Sheppo (void) vdc_detach(dip, DDI_DETACH); 7661ae08745Sheppo return (status); 7671ae08745Sheppo case DDI_RESUME: 7681ae08745Sheppo /* nothing to do for this non-device */ 7691ae08745Sheppo return (DDI_SUCCESS); 7701ae08745Sheppo default: 7711ae08745Sheppo return (DDI_FAILURE); 7721ae08745Sheppo } 7731ae08745Sheppo } 7741ae08745Sheppo 7751ae08745Sheppo static int 7768cd10891Snarayan vdc_do_ldc_init(vdc_t *vdc, vdc_server_t *srvr) 7771ae08745Sheppo { 7781ae08745Sheppo int status = 0; 7791ae08745Sheppo ldc_status_t ldc_state; 7801ae08745Sheppo ldc_attr_t ldc_attr; 7811ae08745Sheppo 7821ae08745Sheppo ASSERT(vdc != NULL); 7838cd10891Snarayan ASSERT(srvr != NULL); 7841ae08745Sheppo 7851ae08745Sheppo ldc_attr.devclass = LDC_DEV_BLK; 7861ae08745Sheppo ldc_attr.instance = vdc->instance; 7871ae08745Sheppo ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 788e1ebb9ecSlm66018 ldc_attr.mtu = VD_LDC_MTU; 7891ae08745Sheppo 7908cd10891Snarayan if ((srvr->state & VDC_LDC_INIT) == 0) { 7918cd10891Snarayan status = ldc_init(srvr->ldc_id, &ldc_attr, 7928cd10891Snarayan &srvr->ldc_handle); 7931ae08745Sheppo if (status != 0) { 7943af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 7958cd10891Snarayan vdc->instance, srvr->ldc_id, status); 7961ae08745Sheppo return (status); 7971ae08745Sheppo } 7988cd10891Snarayan srvr->state |= VDC_LDC_INIT; 7991ae08745Sheppo } 8008cd10891Snarayan status = ldc_status(srvr->ldc_handle, &ldc_state); 8011ae08745Sheppo if (status != 0) { 8023af08d82Slm66018 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 803e1ebb9ecSlm66018 vdc->instance, status); 8048cd10891Snarayan goto init_exit; 8051ae08745Sheppo } 8068cd10891Snarayan srvr->ldc_state = ldc_state; 8071ae08745Sheppo 8088cd10891Snarayan if ((srvr->state & VDC_LDC_CB) == 0) { 8098cd10891Snarayan status = ldc_reg_callback(srvr->ldc_handle, vdc_handle_cb, 8108cd10891Snarayan (caddr_t)srvr); 8111ae08745Sheppo if (status != 0) { 8123af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 813e1ebb9ecSlm66018 vdc->instance, status); 8148cd10891Snarayan goto init_exit; 8151ae08745Sheppo } 8168cd10891Snarayan srvr->state |= VDC_LDC_CB; 8171ae08745Sheppo } 8181ae08745Sheppo 8191ae08745Sheppo /* 8201ae08745Sheppo * At this stage we have initialised LDC, we will now try and open 8211ae08745Sheppo * the connection. 8221ae08745Sheppo */ 8238cd10891Snarayan if (srvr->ldc_state == LDC_INIT) { 8248cd10891Snarayan status = ldc_open(srvr->ldc_handle); 8251ae08745Sheppo if (status != 0) { 8263af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 8278cd10891Snarayan vdc->instance, srvr->ldc_id, status); 8288cd10891Snarayan goto init_exit; 8291ae08745Sheppo } 8308cd10891Snarayan srvr->state |= VDC_LDC_OPEN; 8318cd10891Snarayan } 8328cd10891Snarayan 8338cd10891Snarayan init_exit: 8348cd10891Snarayan if (status) { 8358cd10891Snarayan vdc_terminate_ldc(vdc, srvr); 8361ae08745Sheppo } 8371ae08745Sheppo 8381ae08745Sheppo return (status); 8391ae08745Sheppo } 8401ae08745Sheppo 8411ae08745Sheppo static int 8421ae08745Sheppo vdc_start_ldc_connection(vdc_t *vdc) 8431ae08745Sheppo { 8441ae08745Sheppo int status = 0; 8451ae08745Sheppo 8461ae08745Sheppo ASSERT(vdc != NULL); 8471ae08745Sheppo 8483af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 8491ae08745Sheppo 8500a55fbb7Slm66018 status = vdc_do_ldc_up(vdc); 8511ae08745Sheppo 8523af08d82Slm66018 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 8531ae08745Sheppo 8543af08d82Slm66018 return (status); 8553af08d82Slm66018 } 8563af08d82Slm66018 8573af08d82Slm66018 static int 8583af08d82Slm66018 vdc_stop_ldc_connection(vdc_t *vdcp) 8593af08d82Slm66018 { 8603af08d82Slm66018 int status; 8613af08d82Slm66018 8628cd10891Snarayan ASSERT(vdcp != NULL); 8638cd10891Snarayan 8648cd10891Snarayan ASSERT(MUTEX_HELD(&vdcp->lock)); 8658cd10891Snarayan 8663af08d82Slm66018 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 8673af08d82Slm66018 vdcp->state); 8683af08d82Slm66018 8698cd10891Snarayan status = ldc_down(vdcp->curr_server->ldc_handle); 8703af08d82Slm66018 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 8713af08d82Slm66018 8723af08d82Slm66018 vdcp->initialized &= ~VDC_HANDSHAKE; 8733af08d82Slm66018 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 8741ae08745Sheppo 8751ae08745Sheppo return (status); 8761ae08745Sheppo } 8771ae08745Sheppo 878366a92acSlm66018 static void 879366a92acSlm66018 vdc_create_io_kstats(vdc_t *vdc) 880366a92acSlm66018 { 881366a92acSlm66018 if (vdc->io_stats != NULL) { 882366a92acSlm66018 DMSG(vdc, 0, "[%d] I/O kstat already exists\n", vdc->instance); 883366a92acSlm66018 return; 884366a92acSlm66018 } 885366a92acSlm66018 886366a92acSlm66018 vdc->io_stats = kstat_create(VDC_DRIVER_NAME, vdc->instance, NULL, 887366a92acSlm66018 "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 888366a92acSlm66018 if (vdc->io_stats != NULL) { 889366a92acSlm66018 vdc->io_stats->ks_lock = &vdc->lock; 890366a92acSlm66018 kstat_install(vdc->io_stats); 891366a92acSlm66018 } else { 892366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: I/O statistics" 893366a92acSlm66018 " will not be gathered", vdc->instance); 894366a92acSlm66018 } 895366a92acSlm66018 } 896366a92acSlm66018 897366a92acSlm66018 static void 898366a92acSlm66018 vdc_create_err_kstats(vdc_t *vdc) 899366a92acSlm66018 { 900366a92acSlm66018 vd_err_stats_t *stp; 901366a92acSlm66018 char kstatmodule_err[KSTAT_STRLEN]; 902366a92acSlm66018 char kstatname[KSTAT_STRLEN]; 903366a92acSlm66018 int ndata = (sizeof (vd_err_stats_t) / sizeof (kstat_named_t)); 904366a92acSlm66018 int instance = vdc->instance; 905366a92acSlm66018 906366a92acSlm66018 if (vdc->err_stats != NULL) { 907366a92acSlm66018 DMSG(vdc, 0, "[%d] ERR kstat already exists\n", vdc->instance); 908366a92acSlm66018 return; 909366a92acSlm66018 } 910366a92acSlm66018 911366a92acSlm66018 (void) snprintf(kstatmodule_err, sizeof (kstatmodule_err), 912366a92acSlm66018 "%serr", VDC_DRIVER_NAME); 913366a92acSlm66018 (void) snprintf(kstatname, sizeof (kstatname), 914366a92acSlm66018 "%s%d,err", VDC_DRIVER_NAME, instance); 915366a92acSlm66018 916366a92acSlm66018 vdc->err_stats = kstat_create(kstatmodule_err, instance, kstatname, 917366a92acSlm66018 "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 918366a92acSlm66018 919366a92acSlm66018 if (vdc->err_stats == NULL) { 920366a92acSlm66018 cmn_err(CE_NOTE, "[%d] Failed to create kstat: Error statistics" 921366a92acSlm66018 " will not be gathered", instance); 922366a92acSlm66018 return; 923366a92acSlm66018 } 924366a92acSlm66018 925366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 926366a92acSlm66018 kstat_named_init(&stp->vd_softerrs, "Soft Errors", 927366a92acSlm66018 KSTAT_DATA_UINT32); 928366a92acSlm66018 kstat_named_init(&stp->vd_transerrs, "Transport Errors", 929366a92acSlm66018 KSTAT_DATA_UINT32); 930366a92acSlm66018 kstat_named_init(&stp->vd_protoerrs, "Protocol Errors", 931366a92acSlm66018 KSTAT_DATA_UINT32); 932366a92acSlm66018 kstat_named_init(&stp->vd_vid, "Vendor", 933366a92acSlm66018 KSTAT_DATA_CHAR); 934366a92acSlm66018 kstat_named_init(&stp->vd_pid, "Product", 935366a92acSlm66018 KSTAT_DATA_CHAR); 936366a92acSlm66018 kstat_named_init(&stp->vd_capacity, "Size", 937366a92acSlm66018 KSTAT_DATA_ULONGLONG); 938366a92acSlm66018 939366a92acSlm66018 vdc->err_stats->ks_update = nulldev; 940366a92acSlm66018 941366a92acSlm66018 kstat_install(vdc->err_stats); 942366a92acSlm66018 } 943366a92acSlm66018 944366a92acSlm66018 static void 945366a92acSlm66018 vdc_set_err_kstats(vdc_t *vdc) 946366a92acSlm66018 { 947366a92acSlm66018 vd_err_stats_t *stp; 948366a92acSlm66018 949366a92acSlm66018 if (vdc->err_stats == NULL) 950366a92acSlm66018 return; 951366a92acSlm66018 952366a92acSlm66018 mutex_enter(&vdc->lock); 953366a92acSlm66018 954366a92acSlm66018 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 955366a92acSlm66018 ASSERT(stp != NULL); 956366a92acSlm66018 95765908c77Syu, larry liu - Sun Microsystems - Beijing China stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->vdisk_bsize; 958366a92acSlm66018 (void) strcpy(stp->vd_vid.value.c, "SUN"); 959366a92acSlm66018 (void) strcpy(stp->vd_pid.value.c, "VDSK"); 960366a92acSlm66018 961366a92acSlm66018 mutex_exit(&vdc->lock); 962366a92acSlm66018 } 963366a92acSlm66018 9644bac2208Snarayan static int 9654bac2208Snarayan vdc_create_device_nodes_efi(vdc_t *vdc) 9664bac2208Snarayan { 9674bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h"); 9684bac2208Snarayan ddi_remove_minor_node(vdc->dip, "h,raw"); 9694bac2208Snarayan 9704bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 9714bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9724bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9734bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 9744bac2208Snarayan vdc->instance); 9754bac2208Snarayan return (EIO); 9764bac2208Snarayan } 9774bac2208Snarayan 9784bac2208Snarayan /* if any device node is created we set this flag */ 9794bac2208Snarayan vdc->initialized |= VDC_MINOR; 9804bac2208Snarayan 9814bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 9824bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 9834bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 9844bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 9854bac2208Snarayan vdc->instance); 9864bac2208Snarayan return (EIO); 9874bac2208Snarayan } 9884bac2208Snarayan 9894bac2208Snarayan return (0); 9904bac2208Snarayan } 9914bac2208Snarayan 9924bac2208Snarayan static int 9934bac2208Snarayan vdc_create_device_nodes_vtoc(vdc_t *vdc) 9944bac2208Snarayan { 9954bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd"); 9964bac2208Snarayan ddi_remove_minor_node(vdc->dip, "wd,raw"); 9974bac2208Snarayan 9984bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 9994bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 10004bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 10014bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 10024bac2208Snarayan vdc->instance); 10034bac2208Snarayan return (EIO); 10044bac2208Snarayan } 10054bac2208Snarayan 10064bac2208Snarayan /* if any device node is created we set this flag */ 10074bac2208Snarayan vdc->initialized |= VDC_MINOR; 10084bac2208Snarayan 10094bac2208Snarayan if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 10104bac2208Snarayan VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 10114bac2208Snarayan DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 10124bac2208Snarayan cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 10134bac2208Snarayan vdc->instance); 10144bac2208Snarayan return (EIO); 10154bac2208Snarayan } 10164bac2208Snarayan 10174bac2208Snarayan return (0); 10184bac2208Snarayan } 10191ae08745Sheppo 10201ae08745Sheppo /* 10211ae08745Sheppo * Function: 10221ae08745Sheppo * vdc_create_device_nodes 10231ae08745Sheppo * 10241ae08745Sheppo * Description: 10251ae08745Sheppo * This function creates the block and character device nodes under 10265b98b509Sachartre * /devices. It is called as part of the attach(9E) of the instance 10275b98b509Sachartre * during the handshake with vds after vds has sent the attributes 10285b98b509Sachartre * to vdc. 10291ae08745Sheppo * 10301ae08745Sheppo * If the device is of type VD_DISK_TYPE_SLICE then the minor node 10311ae08745Sheppo * of 2 is used in keeping with the Solaris convention that slice 2 10321ae08745Sheppo * refers to a whole disk. Slices start at 'a' 10331ae08745Sheppo * 10341ae08745Sheppo * Parameters: 10351ae08745Sheppo * vdc - soft state pointer 10361ae08745Sheppo * 10371ae08745Sheppo * Return Values 10381ae08745Sheppo * 0 - Success 10391ae08745Sheppo * EIO - Failed to create node 10401ae08745Sheppo */ 10411ae08745Sheppo static int 10421ae08745Sheppo vdc_create_device_nodes(vdc_t *vdc) 10431ae08745Sheppo { 10444bac2208Snarayan char name[sizeof ("s,raw")]; 10451ae08745Sheppo dev_info_t *dip = NULL; 10464bac2208Snarayan int instance, status; 10471ae08745Sheppo int num_slices = 1; 10481ae08745Sheppo int i; 10491ae08745Sheppo 10501ae08745Sheppo ASSERT(vdc != NULL); 10511ae08745Sheppo 10521ae08745Sheppo instance = vdc->instance; 10531ae08745Sheppo dip = vdc->dip; 10541ae08745Sheppo 10551ae08745Sheppo switch (vdc->vdisk_type) { 10561ae08745Sheppo case VD_DISK_TYPE_DISK: 1057*00e3a3e9SAlexandre Chartre case VD_DISK_TYPE_UNK: 10581ae08745Sheppo num_slices = V_NUMPAR; 10591ae08745Sheppo break; 10601ae08745Sheppo case VD_DISK_TYPE_SLICE: 10611ae08745Sheppo num_slices = 1; 10621ae08745Sheppo break; 10631ae08745Sheppo default: 1064*00e3a3e9SAlexandre Chartre ASSERT(0); 10651ae08745Sheppo } 10661ae08745Sheppo 10674bac2208Snarayan /* 10684bac2208Snarayan * Minor nodes are different for EFI disks: EFI disks do not have 10694bac2208Snarayan * a minor node 'g' for the minor number corresponding to slice 10704bac2208Snarayan * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 10714bac2208Snarayan * representing the whole disk. 10724bac2208Snarayan */ 10731ae08745Sheppo for (i = 0; i < num_slices; i++) { 10744bac2208Snarayan 10754bac2208Snarayan if (i == VD_EFI_WD_SLICE) { 10764bac2208Snarayan if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 10774bac2208Snarayan status = vdc_create_device_nodes_efi(vdc); 10784bac2208Snarayan else 10794bac2208Snarayan status = vdc_create_device_nodes_vtoc(vdc); 10804bac2208Snarayan if (status != 0) 10814bac2208Snarayan return (status); 10824bac2208Snarayan continue; 10834bac2208Snarayan } 10844bac2208Snarayan 10851ae08745Sheppo (void) snprintf(name, sizeof (name), "%c", 'a' + i); 10861ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFBLK, 10871ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1088e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 1089e1ebb9ecSlm66018 instance, name); 10901ae08745Sheppo return (EIO); 10911ae08745Sheppo } 10921ae08745Sheppo 10931ae08745Sheppo /* if any device node is created we set this flag */ 10941ae08745Sheppo vdc->initialized |= VDC_MINOR; 10951ae08745Sheppo 109687a7269eSachartre (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 109787a7269eSachartre 10981ae08745Sheppo if (ddi_create_minor_node(dip, name, S_IFCHR, 10991ae08745Sheppo VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1100e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 1101e1ebb9ecSlm66018 instance, name); 11021ae08745Sheppo return (EIO); 11031ae08745Sheppo } 11041ae08745Sheppo } 11051ae08745Sheppo 11061ae08745Sheppo return (0); 11071ae08745Sheppo } 11081ae08745Sheppo 11091ae08745Sheppo /* 11105b98b509Sachartre * Driver prop_op(9e) entry point function. Return the number of blocks for 11115b98b509Sachartre * the partition in question or forward the request to the property facilities. 11121ae08745Sheppo */ 11131ae08745Sheppo static int 11145b98b509Sachartre vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 11155b98b509Sachartre char *name, caddr_t valuep, int *lengthp) 11161ae08745Sheppo { 11175b98b509Sachartre int instance = ddi_get_instance(dip); 11185b98b509Sachartre vdc_t *vdc; 11195b98b509Sachartre uint64_t nblocks; 11205b98b509Sachartre uint_t blksize; 11211ae08745Sheppo 11225b98b509Sachartre vdc = ddi_get_soft_state(vdc_state, instance); 11231ae08745Sheppo 11245b98b509Sachartre if (dev == DDI_DEV_T_ANY || vdc == NULL) { 11255b98b509Sachartre return (ddi_prop_op(dev, dip, prop_op, mod_flags, 11265b98b509Sachartre name, valuep, lengthp)); 11271ae08745Sheppo } 11281ae08745Sheppo 11295b98b509Sachartre mutex_enter(&vdc->lock); 11305b98b509Sachartre (void) vdc_validate_geometry(vdc); 113178fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 11325b98b509Sachartre mutex_exit(&vdc->lock); 11335b98b509Sachartre return (ddi_prop_op(dev, dip, prop_op, mod_flags, 11345b98b509Sachartre name, valuep, lengthp)); 113578fcd0a1Sachartre } 11365b98b509Sachartre nblocks = vdc->slice[VDCPART(dev)].nblocks; 113765908c77Syu, larry liu - Sun Microsystems - Beijing China blksize = vdc->vdisk_bsize; 11385b98b509Sachartre mutex_exit(&vdc->lock); 113978fcd0a1Sachartre 11405b98b509Sachartre return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags, 11415b98b509Sachartre name, valuep, lengthp, nblocks, blksize)); 11421ae08745Sheppo } 11431ae08745Sheppo 114478fcd0a1Sachartre /* 114578fcd0a1Sachartre * Function: 114678fcd0a1Sachartre * vdc_is_opened 114778fcd0a1Sachartre * 114878fcd0a1Sachartre * Description: 114978fcd0a1Sachartre * This function checks if any slice of a given virtual disk is 115078fcd0a1Sachartre * currently opened. 115178fcd0a1Sachartre * 115278fcd0a1Sachartre * Parameters: 115378fcd0a1Sachartre * vdc - soft state pointer 115478fcd0a1Sachartre * 115578fcd0a1Sachartre * Return Values 115678fcd0a1Sachartre * B_TRUE - at least one slice is opened. 115778fcd0a1Sachartre * B_FALSE - no slice is opened. 115878fcd0a1Sachartre */ 115978fcd0a1Sachartre static boolean_t 116078fcd0a1Sachartre vdc_is_opened(vdc_t *vdc) 116178fcd0a1Sachartre { 1162*00e3a3e9SAlexandre Chartre int i; 116378fcd0a1Sachartre 116478fcd0a1Sachartre /* check if there's any layered open */ 1165*00e3a3e9SAlexandre Chartre for (i = 0; i < V_NUMPAR; i++) { 116678fcd0a1Sachartre if (vdc->open_lyr[i] > 0) 116778fcd0a1Sachartre return (B_TRUE); 116878fcd0a1Sachartre } 116978fcd0a1Sachartre 117078fcd0a1Sachartre /* check if there is any other kind of open */ 117178fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 117278fcd0a1Sachartre if (vdc->open[i] != 0) 117378fcd0a1Sachartre return (B_TRUE); 117478fcd0a1Sachartre } 117578fcd0a1Sachartre 117678fcd0a1Sachartre return (B_FALSE); 117778fcd0a1Sachartre } 117878fcd0a1Sachartre 117978fcd0a1Sachartre static int 118078fcd0a1Sachartre vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 118178fcd0a1Sachartre { 118278fcd0a1Sachartre uint8_t slicemask; 118378fcd0a1Sachartre int i; 118478fcd0a1Sachartre 118578fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 118678fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 118778fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 118878fcd0a1Sachartre 118978fcd0a1Sachartre slicemask = 1 << slice; 119078fcd0a1Sachartre 1191*00e3a3e9SAlexandre Chartre /* 1192*00e3a3e9SAlexandre Chartre * If we have a single-slice disk which was unavailable during the 1193*00e3a3e9SAlexandre Chartre * attach then a device was created for each 8 slices. Now that 1194*00e3a3e9SAlexandre Chartre * the type is known, we prevent opening any slice other than 0 1195*00e3a3e9SAlexandre Chartre * even if a device still exists. 1196*00e3a3e9SAlexandre Chartre */ 1197*00e3a3e9SAlexandre Chartre if (vdc->vdisk_type == VD_DISK_TYPE_SLICE && slice != 0) 1198*00e3a3e9SAlexandre Chartre return (EIO); 1199*00e3a3e9SAlexandre Chartre 120078fcd0a1Sachartre /* check if slice is already exclusively opened */ 120178fcd0a1Sachartre if (vdc->open_excl & slicemask) 120278fcd0a1Sachartre return (EBUSY); 120378fcd0a1Sachartre 120478fcd0a1Sachartre /* if open exclusive, check if slice is already opened */ 120578fcd0a1Sachartre if (flag & FEXCL) { 120678fcd0a1Sachartre if (vdc->open_lyr[slice] > 0) 120778fcd0a1Sachartre return (EBUSY); 120878fcd0a1Sachartre for (i = 0; i < OTYPCNT; i++) { 120978fcd0a1Sachartre if (vdc->open[i] & slicemask) 121078fcd0a1Sachartre return (EBUSY); 121178fcd0a1Sachartre } 121278fcd0a1Sachartre vdc->open_excl |= slicemask; 121378fcd0a1Sachartre } 121478fcd0a1Sachartre 121578fcd0a1Sachartre /* mark slice as opened */ 121678fcd0a1Sachartre if (otyp == OTYP_LYR) { 121778fcd0a1Sachartre vdc->open_lyr[slice]++; 121878fcd0a1Sachartre } else { 121978fcd0a1Sachartre vdc->open[otyp] |= slicemask; 122078fcd0a1Sachartre } 122178fcd0a1Sachartre 122278fcd0a1Sachartre return (0); 122378fcd0a1Sachartre } 122478fcd0a1Sachartre 122578fcd0a1Sachartre static void 122678fcd0a1Sachartre vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 122778fcd0a1Sachartre { 122878fcd0a1Sachartre uint8_t slicemask; 122978fcd0a1Sachartre 123078fcd0a1Sachartre ASSERT(otyp < OTYPCNT); 123178fcd0a1Sachartre ASSERT(slice < V_NUMPAR); 123278fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 123378fcd0a1Sachartre 123478fcd0a1Sachartre slicemask = 1 << slice; 123578fcd0a1Sachartre 123678fcd0a1Sachartre if (otyp == OTYP_LYR) { 123778fcd0a1Sachartre ASSERT(vdc->open_lyr[slice] > 0); 123878fcd0a1Sachartre vdc->open_lyr[slice]--; 123978fcd0a1Sachartre } else { 124078fcd0a1Sachartre vdc->open[otyp] &= ~slicemask; 124178fcd0a1Sachartre } 124278fcd0a1Sachartre 124378fcd0a1Sachartre if (flag & FEXCL) 124478fcd0a1Sachartre vdc->open_excl &= ~slicemask; 124578fcd0a1Sachartre } 124678fcd0a1Sachartre 12471ae08745Sheppo static int 12481ae08745Sheppo vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 12491ae08745Sheppo { 12501ae08745Sheppo _NOTE(ARGUNUSED(cred)) 12511ae08745Sheppo 1252179e09c2Sachartre int instance, nodelay; 125378fcd0a1Sachartre int slice, status = 0; 12541ae08745Sheppo vdc_t *vdc; 12551ae08745Sheppo 12561ae08745Sheppo ASSERT(dev != NULL); 12570d0c8d4bSnarayan instance = VDCUNIT(*dev); 12581ae08745Sheppo 125978fcd0a1Sachartre if (otyp >= OTYPCNT) 12601ae08745Sheppo return (EINVAL); 12611ae08745Sheppo 12621ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1263e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 12641ae08745Sheppo return (ENXIO); 12651ae08745Sheppo } 12661ae08745Sheppo 12673af08d82Slm66018 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 12683af08d82Slm66018 getminor(*dev), flag, otyp); 12691ae08745Sheppo 127078fcd0a1Sachartre slice = VDCPART(*dev); 127178fcd0a1Sachartre 1272179e09c2Sachartre nodelay = flag & (FNDELAY | FNONBLOCK); 1273179e09c2Sachartre 1274179e09c2Sachartre if ((flag & FWRITE) && (!nodelay) && 1275179e09c2Sachartre !(VD_OP_SUPPORTED(vdc->operations, VD_OP_BWRITE))) { 1276179e09c2Sachartre return (EROFS); 1277179e09c2Sachartre } 1278179e09c2Sachartre 12791ae08745Sheppo mutex_enter(&vdc->lock); 128078fcd0a1Sachartre 128178fcd0a1Sachartre status = vdc_mark_opened(vdc, slice, flag, otyp); 128278fcd0a1Sachartre 128378fcd0a1Sachartre if (status != 0) { 128478fcd0a1Sachartre mutex_exit(&vdc->lock); 128578fcd0a1Sachartre return (status); 128678fcd0a1Sachartre } 128778fcd0a1Sachartre 1288*00e3a3e9SAlexandre Chartre /* 1289*00e3a3e9SAlexandre Chartre * If the disk type is unknown then we have to wait for the 1290*00e3a3e9SAlexandre Chartre * handshake to complete because we don't know if the slice 1291*00e3a3e9SAlexandre Chartre * device we are opening effectively exists. 1292*00e3a3e9SAlexandre Chartre */ 1293*00e3a3e9SAlexandre Chartre if (vdc->vdisk_type != VD_DISK_TYPE_UNK && nodelay) { 129478fcd0a1Sachartre 129578fcd0a1Sachartre /* don't resubmit a validate request if there's already one */ 129678fcd0a1Sachartre if (vdc->validate_pending > 0) { 129778fcd0a1Sachartre mutex_exit(&vdc->lock); 129878fcd0a1Sachartre return (0); 129978fcd0a1Sachartre } 130078fcd0a1Sachartre 130178fcd0a1Sachartre /* call vdc_validate() asynchronously to avoid blocking */ 130278fcd0a1Sachartre if (taskq_dispatch(system_taskq, vdc_validate_task, 130378fcd0a1Sachartre (void *)vdc, TQ_NOSLEEP) == NULL) { 130478fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 130578fcd0a1Sachartre mutex_exit(&vdc->lock); 130678fcd0a1Sachartre return (ENXIO); 130778fcd0a1Sachartre } 130878fcd0a1Sachartre 130978fcd0a1Sachartre vdc->validate_pending++; 131078fcd0a1Sachartre mutex_exit(&vdc->lock); 131178fcd0a1Sachartre return (0); 131278fcd0a1Sachartre } 131378fcd0a1Sachartre 13141ae08745Sheppo mutex_exit(&vdc->lock); 13151ae08745Sheppo 131678fcd0a1Sachartre vdc_validate(vdc); 131778fcd0a1Sachartre 131878fcd0a1Sachartre mutex_enter(&vdc->lock); 131978fcd0a1Sachartre 1320*00e3a3e9SAlexandre Chartre if (vdc->vdisk_type == VD_DISK_TYPE_UNK || 1321*00e3a3e9SAlexandre Chartre (vdc->vdisk_type == VD_DISK_TYPE_SLICE && slice != 0) || 1322*00e3a3e9SAlexandre Chartre (!nodelay && (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1323*00e3a3e9SAlexandre Chartre vdc->slice[slice].nblocks == 0))) { 132478fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 132578fcd0a1Sachartre status = EIO; 132678fcd0a1Sachartre } 132778fcd0a1Sachartre 132878fcd0a1Sachartre mutex_exit(&vdc->lock); 132978fcd0a1Sachartre 133078fcd0a1Sachartre return (status); 13311ae08745Sheppo } 13321ae08745Sheppo 13331ae08745Sheppo static int 13341ae08745Sheppo vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 13351ae08745Sheppo { 13361ae08745Sheppo _NOTE(ARGUNUSED(cred)) 13371ae08745Sheppo 13381ae08745Sheppo int instance; 133978fcd0a1Sachartre int slice; 13402f5224aeSachartre int rv, rval; 13411ae08745Sheppo vdc_t *vdc; 13421ae08745Sheppo 13430d0c8d4bSnarayan instance = VDCUNIT(dev); 13441ae08745Sheppo 134578fcd0a1Sachartre if (otyp >= OTYPCNT) 13461ae08745Sheppo return (EINVAL); 13471ae08745Sheppo 13481ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1349e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 13501ae08745Sheppo return (ENXIO); 13511ae08745Sheppo } 13521ae08745Sheppo 13533af08d82Slm66018 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 13541ae08745Sheppo 135578fcd0a1Sachartre slice = VDCPART(dev); 135678fcd0a1Sachartre 13578259acd8Szk194757 /* 13588259acd8Szk194757 * Attempt to flush the W$ on a close operation. If this is 13598259acd8Szk194757 * not a supported IOCTL command or the backing device is read-only 13608259acd8Szk194757 * do not fail the close operation. 13618259acd8Szk194757 */ 13622f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, &rval); 13638259acd8Szk194757 13648259acd8Szk194757 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 13658259acd8Szk194757 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 13668259acd8Szk194757 instance, rv); 13678259acd8Szk194757 return (EIO); 13688259acd8Szk194757 } 13698259acd8Szk194757 13701ae08745Sheppo mutex_enter(&vdc->lock); 137178fcd0a1Sachartre vdc_mark_closed(vdc, slice, flag, otyp); 13721ae08745Sheppo mutex_exit(&vdc->lock); 13731ae08745Sheppo 13741ae08745Sheppo return (0); 13751ae08745Sheppo } 13761ae08745Sheppo 13771ae08745Sheppo static int 13781ae08745Sheppo vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 13791ae08745Sheppo { 13801ae08745Sheppo _NOTE(ARGUNUSED(credp)) 13811ae08745Sheppo 13822f5224aeSachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode, rvalp)); 13831ae08745Sheppo } 13841ae08745Sheppo 13851ae08745Sheppo static int 13861ae08745Sheppo vdc_print(dev_t dev, char *str) 13871ae08745Sheppo { 13880d0c8d4bSnarayan cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 13891ae08745Sheppo return (0); 13901ae08745Sheppo } 13911ae08745Sheppo 13921ae08745Sheppo static int 13931ae08745Sheppo vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 13941ae08745Sheppo { 1395*00e3a3e9SAlexandre Chartre int rv, flags; 1396d10e4ef2Snarayan size_t nbytes = nblk * DEV_BSIZE; 13970d0c8d4bSnarayan int instance = VDCUNIT(dev); 1398d10e4ef2Snarayan vdc_t *vdc = NULL; 139965908c77Syu, larry liu - Sun Microsystems - Beijing China diskaddr_t vio_blkno; 14001ae08745Sheppo 14011ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1402e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 14031ae08745Sheppo return (ENXIO); 14041ae08745Sheppo } 14051ae08745Sheppo 14063af08d82Slm66018 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 14073af08d82Slm66018 instance, nbytes, blkno, (void *)addr); 140865908c77Syu, larry liu - Sun Microsystems - Beijing China 140965908c77Syu, larry liu - Sun Microsystems - Beijing China /* convert logical block to vio block */ 141065908c77Syu, larry liu - Sun Microsystems - Beijing China if ((blkno & vdc->vio_bmask) != 0) { 141165908c77Syu, larry liu - Sun Microsystems - Beijing China DMSG(vdc, 0, "Misaligned block number (%lu)\n", blkno); 141265908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 141365908c77Syu, larry liu - Sun Microsystems - Beijing China } 141465908c77Syu, larry liu - Sun Microsystems - Beijing China vio_blkno = blkno >> vdc->vio_bshift; 141565908c77Syu, larry liu - Sun Microsystems - Beijing China 1416*00e3a3e9SAlexandre Chartre /* 1417*00e3a3e9SAlexandre Chartre * If we are panicking, we need the state to be "running" so that we 1418*00e3a3e9SAlexandre Chartre * can submit I/Os, but we don't want to check for any backend error. 1419*00e3a3e9SAlexandre Chartre */ 1420*00e3a3e9SAlexandre Chartre flags = (ddi_in_panic())? VDC_OP_STATE_RUNNING : VDC_OP_NORMAL; 1421*00e3a3e9SAlexandre Chartre 1422*00e3a3e9SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BWRITE, addr, nbytes, VDCPART(dev), 1423*00e3a3e9SAlexandre Chartre vio_blkno, NULL, VIO_write_dir, flags); 1424*00e3a3e9SAlexandre Chartre 14253af08d82Slm66018 if (rv) { 14263af08d82Slm66018 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 14271ae08745Sheppo return (rv); 14281ae08745Sheppo } 14291ae08745Sheppo 14303af08d82Slm66018 DMSG(vdc, 0, "[%d] End\n", instance); 14313af08d82Slm66018 14323af08d82Slm66018 return (0); 14333af08d82Slm66018 } 14343af08d82Slm66018 14351ae08745Sheppo /* -------------------------------------------------------------------------- */ 14361ae08745Sheppo 14371ae08745Sheppo /* 14381ae08745Sheppo * Disk access routines 14391ae08745Sheppo * 14401ae08745Sheppo */ 14411ae08745Sheppo 14421ae08745Sheppo /* 14431ae08745Sheppo * vdc_strategy() 14441ae08745Sheppo * 14451ae08745Sheppo * Return Value: 14461ae08745Sheppo * 0: As per strategy(9E), the strategy() function must return 0 14471ae08745Sheppo * [ bioerror(9f) sets b_flags to the proper error code ] 14481ae08745Sheppo */ 14491ae08745Sheppo static int 14501ae08745Sheppo vdc_strategy(struct buf *buf) 14511ae08745Sheppo { 145265908c77Syu, larry liu - Sun Microsystems - Beijing China diskaddr_t vio_blkno; 14531ae08745Sheppo vdc_t *vdc = NULL; 14540d0c8d4bSnarayan int instance = VDCUNIT(buf->b_edev); 14551ae08745Sheppo int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 145687a7269eSachartre int slice; 14571ae08745Sheppo 14581ae08745Sheppo if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1459e1ebb9ecSlm66018 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 14601ae08745Sheppo bioerror(buf, ENXIO); 14611ae08745Sheppo biodone(buf); 14621ae08745Sheppo return (0); 14631ae08745Sheppo } 14641ae08745Sheppo 14653af08d82Slm66018 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 14663af08d82Slm66018 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 14673af08d82Slm66018 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1468d10e4ef2Snarayan 14691ae08745Sheppo bp_mapin(buf); 14701ae08745Sheppo 147187a7269eSachartre if ((long)buf->b_private == VD_SLICE_NONE) { 147287a7269eSachartre /* I/O using an absolute disk offset */ 147387a7269eSachartre slice = VD_SLICE_NONE; 147487a7269eSachartre } else { 147587a7269eSachartre slice = VDCPART(buf->b_edev); 147687a7269eSachartre } 147787a7269eSachartre 147865908c77Syu, larry liu - Sun Microsystems - Beijing China /* 147965908c77Syu, larry liu - Sun Microsystems - Beijing China * In the buf structure, b_lblkno represents a logical block number 148065908c77Syu, larry liu - Sun Microsystems - Beijing China * using a block size of 512 bytes. For the VIO request, this block 148165908c77Syu, larry liu - Sun Microsystems - Beijing China * number has to be converted to be represented with the block size 148265908c77Syu, larry liu - Sun Microsystems - Beijing China * used by the VIO protocol. 148365908c77Syu, larry liu - Sun Microsystems - Beijing China */ 148465908c77Syu, larry liu - Sun Microsystems - Beijing China if ((buf->b_lblkno & vdc->vio_bmask) != 0) { 148565908c77Syu, larry liu - Sun Microsystems - Beijing China bioerror(buf, EINVAL); 148665908c77Syu, larry liu - Sun Microsystems - Beijing China biodone(buf); 148765908c77Syu, larry liu - Sun Microsystems - Beijing China return (0); 148865908c77Syu, larry liu - Sun Microsystems - Beijing China } 148965908c77Syu, larry liu - Sun Microsystems - Beijing China vio_blkno = buf->b_lblkno >> vdc->vio_bshift; 149065908c77Syu, larry liu - Sun Microsystems - Beijing China 1491*00e3a3e9SAlexandre Chartre /* submit the I/O, any error will be reported in the buf structure */ 1492*00e3a3e9SAlexandre Chartre (void) vdc_do_op(vdc, op, (caddr_t)buf->b_un.b_addr, 149365908c77Syu, larry liu - Sun Microsystems - Beijing China buf->b_bcount, slice, vio_blkno, 1494*00e3a3e9SAlexandre Chartre buf, (op == VD_OP_BREAD) ? VIO_read_dir : VIO_write_dir, 1495*00e3a3e9SAlexandre Chartre VDC_OP_NORMAL); 1496d10e4ef2Snarayan 14971ae08745Sheppo return (0); 14981ae08745Sheppo } 14991ae08745Sheppo 15000d0c8d4bSnarayan /* 15010d0c8d4bSnarayan * Function: 15020d0c8d4bSnarayan * vdc_min 15030d0c8d4bSnarayan * 15040d0c8d4bSnarayan * Description: 15050d0c8d4bSnarayan * Routine to limit the size of a data transfer. Used in 15060d0c8d4bSnarayan * conjunction with physio(9F). 15070d0c8d4bSnarayan * 15080d0c8d4bSnarayan * Arguments: 15090d0c8d4bSnarayan * bp - pointer to the indicated buf(9S) struct. 15100d0c8d4bSnarayan * 15110d0c8d4bSnarayan */ 15120d0c8d4bSnarayan static void 15130d0c8d4bSnarayan vdc_min(struct buf *bufp) 15140d0c8d4bSnarayan { 15150d0c8d4bSnarayan vdc_t *vdc = NULL; 15160d0c8d4bSnarayan int instance = VDCUNIT(bufp->b_edev); 15170d0c8d4bSnarayan 15180d0c8d4bSnarayan vdc = ddi_get_soft_state(vdc_state, instance); 15190d0c8d4bSnarayan VERIFY(vdc != NULL); 15200d0c8d4bSnarayan 152165908c77Syu, larry liu - Sun Microsystems - Beijing China if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->vdisk_bsize)) { 152265908c77Syu, larry liu - Sun Microsystems - Beijing China bufp->b_bcount = vdc->max_xfer_sz * vdc->vdisk_bsize; 15230d0c8d4bSnarayan } 15240d0c8d4bSnarayan } 15251ae08745Sheppo 15261ae08745Sheppo static int 15271ae08745Sheppo vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 15281ae08745Sheppo { 15291ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15301ae08745Sheppo 15310d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15320d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 15331ae08745Sheppo } 15341ae08745Sheppo 15351ae08745Sheppo static int 15361ae08745Sheppo vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 15371ae08745Sheppo { 15381ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15391ae08745Sheppo 15400d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15410d0c8d4bSnarayan return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 15421ae08745Sheppo } 15431ae08745Sheppo 15441ae08745Sheppo static int 15451ae08745Sheppo vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 15461ae08745Sheppo { 15471ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15481ae08745Sheppo 15490d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15500d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 15511ae08745Sheppo } 15521ae08745Sheppo 15531ae08745Sheppo static int 15541ae08745Sheppo vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 15551ae08745Sheppo { 15561ae08745Sheppo _NOTE(ARGUNUSED(cred)) 15571ae08745Sheppo 15580d0c8d4bSnarayan DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 15590d0c8d4bSnarayan return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 15601ae08745Sheppo } 15611ae08745Sheppo 15621ae08745Sheppo 15631ae08745Sheppo /* -------------------------------------------------------------------------- */ 15641ae08745Sheppo 15651ae08745Sheppo /* 15661ae08745Sheppo * Handshake support 15671ae08745Sheppo */ 15681ae08745Sheppo 15691ae08745Sheppo 15700a55fbb7Slm66018 /* 15710a55fbb7Slm66018 * Function: 15720a55fbb7Slm66018 * vdc_init_ver_negotiation() 15730a55fbb7Slm66018 * 15740a55fbb7Slm66018 * Description: 15750a55fbb7Slm66018 * 15760a55fbb7Slm66018 * Arguments: 15770a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 15780a55fbb7Slm66018 * 15790a55fbb7Slm66018 * Return Code: 15800a55fbb7Slm66018 * 0 - Success 15810a55fbb7Slm66018 */ 15821ae08745Sheppo static int 15830a55fbb7Slm66018 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 15841ae08745Sheppo { 15851ae08745Sheppo vio_ver_msg_t pkt; 15861ae08745Sheppo size_t msglen = sizeof (pkt); 15871ae08745Sheppo int status = -1; 15881ae08745Sheppo 15891ae08745Sheppo ASSERT(vdc != NULL); 15901ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 15911ae08745Sheppo 15923af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1593e1ebb9ecSlm66018 15941ae08745Sheppo /* 15951ae08745Sheppo * set the Session ID to a unique value 15961ae08745Sheppo * (the lower 32 bits of the clock tick) 15971ae08745Sheppo */ 15981ae08745Sheppo vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 15993af08d82Slm66018 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 16001ae08745Sheppo 16011ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 16021ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 16031ae08745Sheppo pkt.tag.vio_subtype_env = VIO_VER_INFO; 16041ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 16051ae08745Sheppo pkt.dev_class = VDEV_DISK; 16060a55fbb7Slm66018 pkt.ver_major = ver.major; 16070a55fbb7Slm66018 pkt.ver_minor = ver.minor; 16081ae08745Sheppo 16090a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 16103af08d82Slm66018 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 16113af08d82Slm66018 vdc->instance, status); 16121ae08745Sheppo if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 16133af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 16148cd10891Snarayan "id(%lx) rv(%d) size(%ld)", vdc->instance, 16158cd10891Snarayan vdc->curr_server->ldc_handle, status, msglen); 16161ae08745Sheppo if (msglen != sizeof (vio_ver_msg_t)) 16171ae08745Sheppo status = ENOMSG; 16181ae08745Sheppo } 16191ae08745Sheppo 16201ae08745Sheppo return (status); 16211ae08745Sheppo } 16221ae08745Sheppo 16230a55fbb7Slm66018 /* 16240a55fbb7Slm66018 * Function: 16253af08d82Slm66018 * vdc_ver_negotiation() 16263af08d82Slm66018 * 16273af08d82Slm66018 * Description: 16283af08d82Slm66018 * 16293af08d82Slm66018 * Arguments: 16303af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 16313af08d82Slm66018 * 16323af08d82Slm66018 * Return Code: 16333af08d82Slm66018 * 0 - Success 16343af08d82Slm66018 */ 16353af08d82Slm66018 static int 16363af08d82Slm66018 vdc_ver_negotiation(vdc_t *vdcp) 16373af08d82Slm66018 { 16383af08d82Slm66018 vio_msg_t vio_msg; 16393af08d82Slm66018 int status; 16403af08d82Slm66018 16413af08d82Slm66018 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 16423af08d82Slm66018 return (status); 16433af08d82Slm66018 16443af08d82Slm66018 /* release lock and wait for response */ 16453af08d82Slm66018 mutex_exit(&vdcp->lock); 16463af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 16473af08d82Slm66018 mutex_enter(&vdcp->lock); 16483af08d82Slm66018 if (status) { 16493af08d82Slm66018 DMSG(vdcp, 0, 16503af08d82Slm66018 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 16513af08d82Slm66018 vdcp->instance, status); 16523af08d82Slm66018 return (status); 16533af08d82Slm66018 } 16543af08d82Slm66018 16553af08d82Slm66018 /* check type and sub_type ... */ 16563af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 16573af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 16583af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 16593af08d82Slm66018 vdcp->instance); 16603af08d82Slm66018 return (EPROTO); 16613af08d82Slm66018 } 16623af08d82Slm66018 16633af08d82Slm66018 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 16643af08d82Slm66018 } 16653af08d82Slm66018 16663af08d82Slm66018 /* 16673af08d82Slm66018 * Function: 16680a55fbb7Slm66018 * vdc_init_attr_negotiation() 16690a55fbb7Slm66018 * 16700a55fbb7Slm66018 * Description: 16710a55fbb7Slm66018 * 16720a55fbb7Slm66018 * Arguments: 16730a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 16740a55fbb7Slm66018 * 16750a55fbb7Slm66018 * Return Code: 16760a55fbb7Slm66018 * 0 - Success 16770a55fbb7Slm66018 */ 16781ae08745Sheppo static int 16791ae08745Sheppo vdc_init_attr_negotiation(vdc_t *vdc) 16801ae08745Sheppo { 16811ae08745Sheppo vd_attr_msg_t pkt; 16821ae08745Sheppo size_t msglen = sizeof (pkt); 16831ae08745Sheppo int status; 16841ae08745Sheppo 16851ae08745Sheppo ASSERT(vdc != NULL); 16861ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 16871ae08745Sheppo 16883af08d82Slm66018 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 16891ae08745Sheppo 16901ae08745Sheppo /* fill in tag */ 16911ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 16921ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 16931ae08745Sheppo pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 16941ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 16951ae08745Sheppo /* fill in payload */ 16961ae08745Sheppo pkt.max_xfer_sz = vdc->max_xfer_sz; 169765908c77Syu, larry liu - Sun Microsystems - Beijing China pkt.vdisk_block_size = vdc->vdisk_bsize; 1698f0ca1d9aSsb155480 pkt.xfer_mode = VIO_DRING_MODE_V1_0; 16991ae08745Sheppo pkt.operations = 0; /* server will set bits of valid operations */ 17001ae08745Sheppo pkt.vdisk_type = 0; /* server will set to valid device type */ 170117cadca8Slm66018 pkt.vdisk_media = 0; /* server will set to valid media type */ 17021ae08745Sheppo pkt.vdisk_size = 0; /* server will set to valid size */ 17031ae08745Sheppo 17040a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 17053af08d82Slm66018 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 17061ae08745Sheppo 1707f3241e46Sanbui if ((status != 0) || (msglen != sizeof (vd_attr_msg_t))) { 17083af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 17098cd10891Snarayan "id(%lx) rv(%d) size(%ld)", vdc->instance, 17108cd10891Snarayan vdc->curr_server->ldc_handle, status, msglen); 1711f3241e46Sanbui if (msglen != sizeof (vd_attr_msg_t)) 17121ae08745Sheppo status = ENOMSG; 17131ae08745Sheppo } 17141ae08745Sheppo 17151ae08745Sheppo return (status); 17161ae08745Sheppo } 17171ae08745Sheppo 17180a55fbb7Slm66018 /* 17190a55fbb7Slm66018 * Function: 17203af08d82Slm66018 * vdc_attr_negotiation() 17213af08d82Slm66018 * 17223af08d82Slm66018 * Description: 17233af08d82Slm66018 * 17243af08d82Slm66018 * Arguments: 17253af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 17263af08d82Slm66018 * 17273af08d82Slm66018 * Return Code: 17283af08d82Slm66018 * 0 - Success 17293af08d82Slm66018 */ 17303af08d82Slm66018 static int 17313af08d82Slm66018 vdc_attr_negotiation(vdc_t *vdcp) 17323af08d82Slm66018 { 17333af08d82Slm66018 int status; 17343af08d82Slm66018 vio_msg_t vio_msg; 17353af08d82Slm66018 17363af08d82Slm66018 if (status = vdc_init_attr_negotiation(vdcp)) 17373af08d82Slm66018 return (status); 17383af08d82Slm66018 17393af08d82Slm66018 /* release lock and wait for response */ 17403af08d82Slm66018 mutex_exit(&vdcp->lock); 17413af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 17423af08d82Slm66018 mutex_enter(&vdcp->lock); 17433af08d82Slm66018 if (status) { 17443af08d82Slm66018 DMSG(vdcp, 0, 17453af08d82Slm66018 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 17463af08d82Slm66018 vdcp->instance, status); 17473af08d82Slm66018 return (status); 17483af08d82Slm66018 } 17493af08d82Slm66018 17503af08d82Slm66018 /* check type and sub_type ... */ 17513af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 17523af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 17533af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 17543af08d82Slm66018 vdcp->instance); 17553af08d82Slm66018 return (EPROTO); 17563af08d82Slm66018 } 17573af08d82Slm66018 17583af08d82Slm66018 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 17593af08d82Slm66018 } 17603af08d82Slm66018 17613af08d82Slm66018 17623af08d82Slm66018 /* 17633af08d82Slm66018 * Function: 17640a55fbb7Slm66018 * vdc_init_dring_negotiate() 17650a55fbb7Slm66018 * 17660a55fbb7Slm66018 * Description: 17670a55fbb7Slm66018 * 17680a55fbb7Slm66018 * Arguments: 17690a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 17700a55fbb7Slm66018 * 17710a55fbb7Slm66018 * Return Code: 17720a55fbb7Slm66018 * 0 - Success 17730a55fbb7Slm66018 */ 17741ae08745Sheppo static int 17751ae08745Sheppo vdc_init_dring_negotiate(vdc_t *vdc) 17761ae08745Sheppo { 17771ae08745Sheppo vio_dring_reg_msg_t pkt; 17781ae08745Sheppo size_t msglen = sizeof (pkt); 17791ae08745Sheppo int status = -1; 17803af08d82Slm66018 int retry; 17813af08d82Slm66018 int nretries = 10; 17821ae08745Sheppo 17831ae08745Sheppo ASSERT(vdc != NULL); 17841ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 17851ae08745Sheppo 17863af08d82Slm66018 for (retry = 0; retry < nretries; retry++) { 17871ae08745Sheppo status = vdc_init_descriptor_ring(vdc); 17883af08d82Slm66018 if (status != EAGAIN) 17893af08d82Slm66018 break; 17903af08d82Slm66018 drv_usecwait(vdc_min_timeout_ldc); 17913af08d82Slm66018 } 17923af08d82Slm66018 17931ae08745Sheppo if (status != 0) { 17943af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 17951ae08745Sheppo vdc->instance, status); 17961ae08745Sheppo return (status); 17971ae08745Sheppo } 17983af08d82Slm66018 17993af08d82Slm66018 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1800e1ebb9ecSlm66018 vdc->instance, status); 18011ae08745Sheppo 18021ae08745Sheppo /* fill in tag */ 18031ae08745Sheppo pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 18041ae08745Sheppo pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 18051ae08745Sheppo pkt.tag.vio_subtype_env = VIO_DRING_REG; 18061ae08745Sheppo pkt.tag.vio_sid = vdc->session_id; 18071ae08745Sheppo /* fill in payload */ 18081ae08745Sheppo pkt.dring_ident = 0; 1809e1ebb9ecSlm66018 pkt.num_descriptors = vdc->dring_len; 1810e1ebb9ecSlm66018 pkt.descriptor_size = vdc->dring_entry_size; 18111ae08745Sheppo pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 18121ae08745Sheppo pkt.ncookies = vdc->dring_cookie_count; 18131ae08745Sheppo pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 18141ae08745Sheppo 18150a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 18161ae08745Sheppo if (status != 0) { 18173af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1818e1ebb9ecSlm66018 vdc->instance, status); 18191ae08745Sheppo } 18201ae08745Sheppo 18211ae08745Sheppo return (status); 18221ae08745Sheppo } 18231ae08745Sheppo 18241ae08745Sheppo 18253af08d82Slm66018 /* 18263af08d82Slm66018 * Function: 18273af08d82Slm66018 * vdc_dring_negotiation() 18283af08d82Slm66018 * 18293af08d82Slm66018 * Description: 18303af08d82Slm66018 * 18313af08d82Slm66018 * Arguments: 18323af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 18333af08d82Slm66018 * 18343af08d82Slm66018 * Return Code: 18353af08d82Slm66018 * 0 - Success 18363af08d82Slm66018 */ 18373af08d82Slm66018 static int 18383af08d82Slm66018 vdc_dring_negotiation(vdc_t *vdcp) 18393af08d82Slm66018 { 18403af08d82Slm66018 int status; 18413af08d82Slm66018 vio_msg_t vio_msg; 18423af08d82Slm66018 18433af08d82Slm66018 if (status = vdc_init_dring_negotiate(vdcp)) 18443af08d82Slm66018 return (status); 18453af08d82Slm66018 18463af08d82Slm66018 /* release lock and wait for response */ 18473af08d82Slm66018 mutex_exit(&vdcp->lock); 18483af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 18493af08d82Slm66018 mutex_enter(&vdcp->lock); 18503af08d82Slm66018 if (status) { 18513af08d82Slm66018 DMSG(vdcp, 0, 18523af08d82Slm66018 "[%d] Failed waiting for Dring negotiation response," 18533af08d82Slm66018 " rv(%d)", vdcp->instance, status); 18543af08d82Slm66018 return (status); 18553af08d82Slm66018 } 18563af08d82Slm66018 18573af08d82Slm66018 /* check type and sub_type ... */ 18583af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 18593af08d82Slm66018 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 18603af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 18613af08d82Slm66018 vdcp->instance); 18623af08d82Slm66018 return (EPROTO); 18633af08d82Slm66018 } 18643af08d82Slm66018 18653af08d82Slm66018 return (vdc_handle_dring_reg_msg(vdcp, 18663af08d82Slm66018 (vio_dring_reg_msg_t *)&vio_msg)); 18673af08d82Slm66018 } 18683af08d82Slm66018 18693af08d82Slm66018 18703af08d82Slm66018 /* 18713af08d82Slm66018 * Function: 18723af08d82Slm66018 * vdc_send_rdx() 18733af08d82Slm66018 * 18743af08d82Slm66018 * Description: 18753af08d82Slm66018 * 18763af08d82Slm66018 * Arguments: 18773af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 18783af08d82Slm66018 * 18793af08d82Slm66018 * Return Code: 18803af08d82Slm66018 * 0 - Success 18813af08d82Slm66018 */ 18823af08d82Slm66018 static int 18833af08d82Slm66018 vdc_send_rdx(vdc_t *vdcp) 18843af08d82Slm66018 { 18853af08d82Slm66018 vio_msg_t msg; 18863af08d82Slm66018 size_t msglen = sizeof (vio_msg_t); 18873af08d82Slm66018 int status; 18883af08d82Slm66018 18893af08d82Slm66018 /* 18903af08d82Slm66018 * Send an RDX message to vds to indicate we are ready 18913af08d82Slm66018 * to send data 18923af08d82Slm66018 */ 18933af08d82Slm66018 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 18943af08d82Slm66018 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 18953af08d82Slm66018 msg.tag.vio_subtype_env = VIO_RDX; 18963af08d82Slm66018 msg.tag.vio_sid = vdcp->session_id; 18973af08d82Slm66018 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 18983af08d82Slm66018 if (status != 0) { 18993af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 19003af08d82Slm66018 vdcp->instance, status); 19013af08d82Slm66018 } 19023af08d82Slm66018 19033af08d82Slm66018 return (status); 19043af08d82Slm66018 } 19053af08d82Slm66018 19063af08d82Slm66018 /* 19073af08d82Slm66018 * Function: 19083af08d82Slm66018 * vdc_handle_rdx() 19093af08d82Slm66018 * 19103af08d82Slm66018 * Description: 19113af08d82Slm66018 * 19123af08d82Slm66018 * Arguments: 19133af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19143af08d82Slm66018 * msgp - received msg 19153af08d82Slm66018 * 19163af08d82Slm66018 * Return Code: 19173af08d82Slm66018 * 0 - Success 19183af08d82Slm66018 */ 19193af08d82Slm66018 static int 19203af08d82Slm66018 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 19213af08d82Slm66018 { 19223af08d82Slm66018 _NOTE(ARGUNUSED(vdcp)) 19233af08d82Slm66018 _NOTE(ARGUNUSED(msgp)) 19243af08d82Slm66018 19253af08d82Slm66018 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 19263af08d82Slm66018 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 19273af08d82Slm66018 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 19283af08d82Slm66018 19293af08d82Slm66018 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 19303af08d82Slm66018 19313af08d82Slm66018 return (0); 19323af08d82Slm66018 } 19333af08d82Slm66018 19343af08d82Slm66018 /* 19353af08d82Slm66018 * Function: 19363af08d82Slm66018 * vdc_rdx_exchange() 19373af08d82Slm66018 * 19383af08d82Slm66018 * Description: 19393af08d82Slm66018 * 19403af08d82Slm66018 * Arguments: 19413af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 19423af08d82Slm66018 * 19433af08d82Slm66018 * Return Code: 19443af08d82Slm66018 * 0 - Success 19453af08d82Slm66018 */ 19463af08d82Slm66018 static int 19473af08d82Slm66018 vdc_rdx_exchange(vdc_t *vdcp) 19483af08d82Slm66018 { 19493af08d82Slm66018 int status; 19503af08d82Slm66018 vio_msg_t vio_msg; 19513af08d82Slm66018 19523af08d82Slm66018 if (status = vdc_send_rdx(vdcp)) 19533af08d82Slm66018 return (status); 19543af08d82Slm66018 19553af08d82Slm66018 /* release lock and wait for response */ 19563af08d82Slm66018 mutex_exit(&vdcp->lock); 19573af08d82Slm66018 status = vdc_wait_for_response(vdcp, &vio_msg); 19583af08d82Slm66018 mutex_enter(&vdcp->lock); 19593af08d82Slm66018 if (status) { 196087a7269eSachartre DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 196187a7269eSachartre vdcp->instance, status); 19623af08d82Slm66018 return (status); 19633af08d82Slm66018 } 19643af08d82Slm66018 19653af08d82Slm66018 /* check type and sub_type ... */ 19663af08d82Slm66018 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 19673af08d82Slm66018 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 196887a7269eSachartre DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 19693af08d82Slm66018 return (EPROTO); 19703af08d82Slm66018 } 19713af08d82Slm66018 19723af08d82Slm66018 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 19733af08d82Slm66018 } 19743af08d82Slm66018 19753af08d82Slm66018 19761ae08745Sheppo /* -------------------------------------------------------------------------- */ 19771ae08745Sheppo 19781ae08745Sheppo /* 19791ae08745Sheppo * LDC helper routines 19801ae08745Sheppo */ 19811ae08745Sheppo 19823af08d82Slm66018 static int 19833af08d82Slm66018 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 19843af08d82Slm66018 { 19853af08d82Slm66018 int status; 198617cadca8Slm66018 uint64_t delay_time; 19873af08d82Slm66018 size_t len; 19883af08d82Slm66018 1989ea43803bSAlexandre Chartre /* 1990ea43803bSAlexandre Chartre * Until we get a blocking ldc read we have to retry until the entire 1991ea43803bSAlexandre Chartre * LDC message has arrived before ldc_read() will return that message. 1992ea43803bSAlexandre Chartre * If ldc_read() succeed but returns a zero length message then that 1993ea43803bSAlexandre Chartre * means that the LDC queue is empty and we have to wait for a 1994ea43803bSAlexandre Chartre * notification from the LDC callback which will set the read_state to 1995ea43803bSAlexandre Chartre * VDC_READ_PENDING. Note we also bail out if the channel is reset or 1996ea43803bSAlexandre Chartre * goes away. 1997ea43803bSAlexandre Chartre */ 1998ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_init_delay; 19993af08d82Slm66018 2000ea43803bSAlexandre Chartre for (;;) { 2001ea43803bSAlexandre Chartre 2002ea43803bSAlexandre Chartre len = *nbytesp; 2003ea43803bSAlexandre Chartre /* 2004ea43803bSAlexandre Chartre * vdc->curr_server is protected by vdc->lock but to avoid 2005ea43803bSAlexandre Chartre * contentions we don't take the lock here. We can do this 2006ea43803bSAlexandre Chartre * safely because vdc_recv() is only called from thread 2007ea43803bSAlexandre Chartre * process_msg_thread() which is also the only thread that 2008ea43803bSAlexandre Chartre * can change vdc->curr_server. 2009ea43803bSAlexandre Chartre */ 2010ea43803bSAlexandre Chartre status = ldc_read(vdc->curr_server->ldc_handle, 2011ea43803bSAlexandre Chartre (caddr_t)msgp, &len); 2012ea43803bSAlexandre Chartre 2013ea43803bSAlexandre Chartre if (status == EAGAIN) { 2014ea43803bSAlexandre Chartre delay_time *= 2; 2015ea43803bSAlexandre Chartre if (delay_time >= vdc_ldc_read_max_delay) 2016ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_max_delay; 2017ea43803bSAlexandre Chartre delay(delay_time); 2018ea43803bSAlexandre Chartre continue; 2019ea43803bSAlexandre Chartre } 2020ea43803bSAlexandre Chartre 2021ea43803bSAlexandre Chartre if (status != 0) { 2022ea43803bSAlexandre Chartre DMSG(vdc, 0, "ldc_read returned %d\n", status); 2023ea43803bSAlexandre Chartre break; 2024ea43803bSAlexandre Chartre } 2025ea43803bSAlexandre Chartre 2026ea43803bSAlexandre Chartre if (len != 0) { 2027ea43803bSAlexandre Chartre *nbytesp = len; 2028ea43803bSAlexandre Chartre break; 2029ea43803bSAlexandre Chartre } 2030ea43803bSAlexandre Chartre 2031ea43803bSAlexandre Chartre mutex_enter(&vdc->read_lock); 20323af08d82Slm66018 20333af08d82Slm66018 while (vdc->read_state != VDC_READ_PENDING) { 20343af08d82Slm66018 20353af08d82Slm66018 /* detect if the connection has been reset */ 20363af08d82Slm66018 if (vdc->read_state == VDC_READ_RESET) { 2037ea43803bSAlexandre Chartre mutex_exit(&vdc->read_lock); 2038ea43803bSAlexandre Chartre return (ECONNRESET); 20393af08d82Slm66018 } 20403af08d82Slm66018 2041ea43803bSAlexandre Chartre vdc->read_state = VDC_READ_WAITING; 20423af08d82Slm66018 cv_wait(&vdc->read_cv, &vdc->read_lock); 20433af08d82Slm66018 } 20443af08d82Slm66018 20453af08d82Slm66018 vdc->read_state = VDC_READ_IDLE; 20463af08d82Slm66018 mutex_exit(&vdc->read_lock); 20473af08d82Slm66018 2048ea43803bSAlexandre Chartre delay_time = vdc_ldc_read_init_delay; 2049ea43803bSAlexandre Chartre } 2050ea43803bSAlexandre Chartre 20513af08d82Slm66018 return (status); 20523af08d82Slm66018 } 20533af08d82Slm66018 20543af08d82Slm66018 20553af08d82Slm66018 20563af08d82Slm66018 #ifdef DEBUG 20573af08d82Slm66018 void 20583af08d82Slm66018 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 20593af08d82Slm66018 { 20603af08d82Slm66018 char *ms, *ss, *ses; 20613af08d82Slm66018 switch (msg->tag.vio_msgtype) { 20623af08d82Slm66018 #define Q(_s) case _s : ms = #_s; break; 20633af08d82Slm66018 Q(VIO_TYPE_CTRL) 20643af08d82Slm66018 Q(VIO_TYPE_DATA) 20653af08d82Slm66018 Q(VIO_TYPE_ERR) 20663af08d82Slm66018 #undef Q 20673af08d82Slm66018 default: ms = "unknown"; break; 20683af08d82Slm66018 } 20693af08d82Slm66018 20703af08d82Slm66018 switch (msg->tag.vio_subtype) { 20713af08d82Slm66018 #define Q(_s) case _s : ss = #_s; break; 20723af08d82Slm66018 Q(VIO_SUBTYPE_INFO) 20733af08d82Slm66018 Q(VIO_SUBTYPE_ACK) 20743af08d82Slm66018 Q(VIO_SUBTYPE_NACK) 20753af08d82Slm66018 #undef Q 20763af08d82Slm66018 default: ss = "unknown"; break; 20773af08d82Slm66018 } 20783af08d82Slm66018 20793af08d82Slm66018 switch (msg->tag.vio_subtype_env) { 20803af08d82Slm66018 #define Q(_s) case _s : ses = #_s; break; 20813af08d82Slm66018 Q(VIO_VER_INFO) 20823af08d82Slm66018 Q(VIO_ATTR_INFO) 20833af08d82Slm66018 Q(VIO_DRING_REG) 20843af08d82Slm66018 Q(VIO_DRING_UNREG) 20853af08d82Slm66018 Q(VIO_RDX) 20863af08d82Slm66018 Q(VIO_PKT_DATA) 20873af08d82Slm66018 Q(VIO_DESC_DATA) 20883af08d82Slm66018 Q(VIO_DRING_DATA) 20893af08d82Slm66018 #undef Q 20903af08d82Slm66018 default: ses = "unknown"; break; 20913af08d82Slm66018 } 20923af08d82Slm66018 20933af08d82Slm66018 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 20943af08d82Slm66018 msg->tag.vio_msgtype, msg->tag.vio_subtype, 20953af08d82Slm66018 msg->tag.vio_subtype_env, ms, ss, ses); 20963af08d82Slm66018 } 20973af08d82Slm66018 #endif 20983af08d82Slm66018 20991ae08745Sheppo /* 21001ae08745Sheppo * Function: 21011ae08745Sheppo * vdc_send() 21021ae08745Sheppo * 21031ae08745Sheppo * Description: 21041ae08745Sheppo * The function encapsulates the call to write a message using LDC. 21051ae08745Sheppo * If LDC indicates that the call failed due to the queue being full, 210617cadca8Slm66018 * we retry the ldc_write(), otherwise we return the error returned by LDC. 21071ae08745Sheppo * 21081ae08745Sheppo * Arguments: 21091ae08745Sheppo * ldc_handle - LDC handle for the channel this instance of vdc uses 21101ae08745Sheppo * pkt - address of LDC message to be sent 21111ae08745Sheppo * msglen - the size of the message being sent. When the function 21121ae08745Sheppo * returns, this contains the number of bytes written. 21131ae08745Sheppo * 21141ae08745Sheppo * Return Code: 21151ae08745Sheppo * 0 - Success. 21161ae08745Sheppo * EINVAL - pkt or msglen were NULL 21171ae08745Sheppo * ECONNRESET - The connection was not up. 21181ae08745Sheppo * EWOULDBLOCK - LDC queue is full 21191ae08745Sheppo * xxx - other error codes returned by ldc_write 21201ae08745Sheppo */ 21211ae08745Sheppo static int 21220a55fbb7Slm66018 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 21231ae08745Sheppo { 21241ae08745Sheppo size_t size = 0; 21251ae08745Sheppo int status = 0; 21263af08d82Slm66018 clock_t delay_ticks; 21271ae08745Sheppo 21280a55fbb7Slm66018 ASSERT(vdc != NULL); 21290a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 21301ae08745Sheppo ASSERT(msglen != NULL); 21311ae08745Sheppo ASSERT(*msglen != 0); 21321ae08745Sheppo 21333af08d82Slm66018 #ifdef DEBUG 213417cadca8Slm66018 vdc_decode_tag(vdc, (vio_msg_t *)(uintptr_t)pkt); 21353af08d82Slm66018 #endif 21363af08d82Slm66018 /* 21373af08d82Slm66018 * Wait indefinitely to send if channel 21383af08d82Slm66018 * is busy, but bail out if we succeed or 21393af08d82Slm66018 * if the channel closes or is reset. 21403af08d82Slm66018 */ 21413af08d82Slm66018 delay_ticks = vdc_hz_min_ldc_delay; 21421ae08745Sheppo do { 21431ae08745Sheppo size = *msglen; 21448cd10891Snarayan status = ldc_write(vdc->curr_server->ldc_handle, pkt, &size); 21453af08d82Slm66018 if (status == EWOULDBLOCK) { 21463af08d82Slm66018 delay(delay_ticks); 21473af08d82Slm66018 /* geometric backoff */ 21483af08d82Slm66018 delay_ticks *= 2; 21493af08d82Slm66018 if (delay_ticks > vdc_hz_max_ldc_delay) 21503af08d82Slm66018 delay_ticks = vdc_hz_max_ldc_delay; 21513af08d82Slm66018 } 21523af08d82Slm66018 } while (status == EWOULDBLOCK); 21531ae08745Sheppo 21540a55fbb7Slm66018 /* if LDC had serious issues --- reset vdc state */ 21550a55fbb7Slm66018 if (status == EIO || status == ECONNRESET) { 21563af08d82Slm66018 /* LDC had serious issues --- reset vdc state */ 21573af08d82Slm66018 mutex_enter(&vdc->read_lock); 21583af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 21593af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 21603af08d82Slm66018 cv_signal(&vdc->read_cv); 21613af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 21623af08d82Slm66018 mutex_exit(&vdc->read_lock); 21633af08d82Slm66018 21643af08d82Slm66018 /* wake up any waiters in the reset thread */ 21653af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 21663af08d82Slm66018 DMSG(vdc, 0, "[%d] write reset - " 21673af08d82Slm66018 "vdc is resetting ..\n", vdc->instance); 21683af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 21693af08d82Slm66018 cv_signal(&vdc->initwait_cv); 21703af08d82Slm66018 } 21713af08d82Slm66018 21723af08d82Slm66018 return (ECONNRESET); 21730a55fbb7Slm66018 } 21740a55fbb7Slm66018 21751ae08745Sheppo /* return the last size written */ 21761ae08745Sheppo *msglen = size; 21771ae08745Sheppo 21781ae08745Sheppo return (status); 21791ae08745Sheppo } 21801ae08745Sheppo 21811ae08745Sheppo /* 21821ae08745Sheppo * Function: 2183655fd6a9Sachartre * vdc_get_md_node 21841ae08745Sheppo * 21851ae08745Sheppo * Description: 21868cd10891Snarayan * Get the MD, the device node for the given disk instance. The 21878cd10891Snarayan * caller is responsible for cleaning up the reference to the 21888cd10891Snarayan * returned MD (mdpp) by calling md_fini_handle(). 21891ae08745Sheppo * 21901ae08745Sheppo * Arguments: 21911ae08745Sheppo * dip - dev info pointer for this instance of the device driver. 2192655fd6a9Sachartre * mdpp - the returned MD. 2193655fd6a9Sachartre * vd_nodep - the returned device node. 21941ae08745Sheppo * 21951ae08745Sheppo * Return Code: 21961ae08745Sheppo * 0 - Success. 21971ae08745Sheppo * ENOENT - Expected node or property did not exist. 21981ae08745Sheppo * ENXIO - Unexpected error communicating with MD framework 21991ae08745Sheppo */ 22001ae08745Sheppo static int 22018cd10891Snarayan vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep) 22021ae08745Sheppo { 22031ae08745Sheppo int status = ENOENT; 22041ae08745Sheppo char *node_name = NULL; 22051ae08745Sheppo md_t *mdp = NULL; 22061ae08745Sheppo int num_nodes; 22071ae08745Sheppo int num_vdevs; 22081ae08745Sheppo mde_cookie_t rootnode; 22091ae08745Sheppo mde_cookie_t *listp = NULL; 22101ae08745Sheppo boolean_t found_inst = B_FALSE; 22111ae08745Sheppo int listsz; 22121ae08745Sheppo int idx; 22131ae08745Sheppo uint64_t md_inst; 22141ae08745Sheppo int obp_inst; 22151ae08745Sheppo int instance = ddi_get_instance(dip); 22161ae08745Sheppo 22171ae08745Sheppo /* 22181ae08745Sheppo * Get the OBP instance number for comparison with the MD instance 22191ae08745Sheppo * 22201ae08745Sheppo * The "cfg-handle" property of a vdc node in an MD contains the MD's 22211ae08745Sheppo * notion of "instance", or unique identifier, for that node; OBP 22221ae08745Sheppo * stores the value of the "cfg-handle" MD property as the value of 22231ae08745Sheppo * the "reg" property on the node in the device tree it builds from 22241ae08745Sheppo * the MD and passes to Solaris. Thus, we look up the devinfo node's 22251ae08745Sheppo * "reg" property value to uniquely identify this device instance. 22261ae08745Sheppo * If the "reg" property cannot be found, the device tree state is 22271ae08745Sheppo * presumably so broken that there is no point in continuing. 22281ae08745Sheppo */ 22291ae08745Sheppo if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 22301ae08745Sheppo cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 22311ae08745Sheppo return (ENOENT); 22321ae08745Sheppo } 22331ae08745Sheppo obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 22341ae08745Sheppo OBP_REG, -1); 22353af08d82Slm66018 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 22361ae08745Sheppo 22371ae08745Sheppo /* 2238655fd6a9Sachartre * We now walk the MD nodes to find the node for this vdisk. 22391ae08745Sheppo */ 22401ae08745Sheppo if ((mdp = md_get_handle()) == NULL) { 22411ae08745Sheppo cmn_err(CE_WARN, "unable to init machine description"); 22421ae08745Sheppo return (ENXIO); 22431ae08745Sheppo } 22441ae08745Sheppo 22451ae08745Sheppo num_nodes = md_node_count(mdp); 22461ae08745Sheppo ASSERT(num_nodes > 0); 22471ae08745Sheppo 22481ae08745Sheppo listsz = num_nodes * sizeof (mde_cookie_t); 22491ae08745Sheppo 22501ae08745Sheppo /* allocate memory for nodes */ 22511ae08745Sheppo listp = kmem_zalloc(listsz, KM_SLEEP); 22521ae08745Sheppo 22531ae08745Sheppo rootnode = md_root_node(mdp); 22541ae08745Sheppo ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 22551ae08745Sheppo 22561ae08745Sheppo /* 22571ae08745Sheppo * Search for all the virtual devices, we will then check to see which 22581ae08745Sheppo * ones are disk nodes. 22591ae08745Sheppo */ 22601ae08745Sheppo num_vdevs = md_scan_dag(mdp, rootnode, 22611ae08745Sheppo md_find_name(mdp, VDC_MD_VDEV_NAME), 22621ae08745Sheppo md_find_name(mdp, "fwd"), listp); 22631ae08745Sheppo 22641ae08745Sheppo if (num_vdevs <= 0) { 22651ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 22661ae08745Sheppo status = ENOENT; 22671ae08745Sheppo goto done; 22681ae08745Sheppo } 22691ae08745Sheppo 22703af08d82Slm66018 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 22711ae08745Sheppo for (idx = 0; idx < num_vdevs; idx++) { 22721ae08745Sheppo status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 22731ae08745Sheppo if ((status != 0) || (node_name == NULL)) { 22741ae08745Sheppo cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 22751ae08745Sheppo ": err %d", VDC_MD_VDEV_NAME, status); 22761ae08745Sheppo continue; 22771ae08745Sheppo } 22781ae08745Sheppo 22793af08d82Slm66018 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 22801ae08745Sheppo if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 22811ae08745Sheppo status = md_get_prop_val(mdp, listp[idx], 22821ae08745Sheppo VDC_MD_CFG_HDL, &md_inst); 22833af08d82Slm66018 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 22843af08d82Slm66018 instance, md_inst); 22851ae08745Sheppo if ((status == 0) && (md_inst == obp_inst)) { 22861ae08745Sheppo found_inst = B_TRUE; 22871ae08745Sheppo break; 22881ae08745Sheppo } 22891ae08745Sheppo } 22901ae08745Sheppo } 22911ae08745Sheppo 22920a55fbb7Slm66018 if (!found_inst) { 22933af08d82Slm66018 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 22941ae08745Sheppo status = ENOENT; 22951ae08745Sheppo goto done; 22961ae08745Sheppo } 22973af08d82Slm66018 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 22981ae08745Sheppo 2299655fd6a9Sachartre *vd_nodep = listp[idx]; 2300655fd6a9Sachartre *mdpp = mdp; 2301655fd6a9Sachartre done: 2302655fd6a9Sachartre kmem_free(listp, listsz); 2303655fd6a9Sachartre return (status); 2304655fd6a9Sachartre } 2305655fd6a9Sachartre 2306655fd6a9Sachartre /* 2307655fd6a9Sachartre * Function: 23088cd10891Snarayan * vdc_init_ports 2309655fd6a9Sachartre * 2310655fd6a9Sachartre * Description: 23118cd10891Snarayan * Initialize all the ports for this vdisk instance. 2312655fd6a9Sachartre * 2313655fd6a9Sachartre * Arguments: 23148cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 23158cd10891Snarayan * mdp - md pointer 23168cd10891Snarayan * vd_nodep - device md node. 2317655fd6a9Sachartre * 2318655fd6a9Sachartre * Return Code: 2319655fd6a9Sachartre * 0 - Success. 2320655fd6a9Sachartre * ENOENT - Expected node or property did not exist. 2321655fd6a9Sachartre */ 2322655fd6a9Sachartre static int 23238cd10891Snarayan vdc_init_ports(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_nodep) 2324655fd6a9Sachartre { 2325655fd6a9Sachartre int status = 0; 23268cd10891Snarayan int idx; 23278cd10891Snarayan int num_nodes; 23288cd10891Snarayan int num_vports; 23298cd10891Snarayan int num_chans; 23308cd10891Snarayan int listsz; 23318cd10891Snarayan mde_cookie_t vd_port; 23328cd10891Snarayan mde_cookie_t *chanp = NULL; 23338cd10891Snarayan mde_cookie_t *portp = NULL; 23348cd10891Snarayan vdc_server_t *srvr; 23358cd10891Snarayan vdc_server_t *prev_srvr = NULL; 2336655fd6a9Sachartre 23378cd10891Snarayan /* 23388cd10891Snarayan * We now walk the MD nodes to find the port nodes for this vdisk. 23398cd10891Snarayan */ 2340655fd6a9Sachartre num_nodes = md_node_count(mdp); 2341655fd6a9Sachartre ASSERT(num_nodes > 0); 2342655fd6a9Sachartre 2343655fd6a9Sachartre listsz = num_nodes * sizeof (mde_cookie_t); 2344655fd6a9Sachartre 2345655fd6a9Sachartre /* allocate memory for nodes */ 23468cd10891Snarayan portp = kmem_zalloc(listsz, KM_SLEEP); 2347655fd6a9Sachartre chanp = kmem_zalloc(listsz, KM_SLEEP); 2348655fd6a9Sachartre 23498cd10891Snarayan num_vports = md_scan_dag(mdp, vd_nodep, 23508cd10891Snarayan md_find_name(mdp, VDC_MD_PORT_NAME), 23518cd10891Snarayan md_find_name(mdp, "fwd"), portp); 23528cd10891Snarayan if (num_vports == 0) { 23538cd10891Snarayan DMSGX(0, "Found no '%s' node for '%s' port\n", 23548cd10891Snarayan VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 23558cd10891Snarayan status = ENOENT; 23568cd10891Snarayan goto done; 23578cd10891Snarayan } 23588cd10891Snarayan 23598cd10891Snarayan DMSGX(1, "Found %d '%s' node(s) for '%s' port\n", 23608cd10891Snarayan num_vports, VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME); 23618cd10891Snarayan 23628cd10891Snarayan vdc->num_servers = 0; 23638cd10891Snarayan for (idx = 0; idx < num_vports; idx++) { 23648cd10891Snarayan 23658cd10891Snarayan /* initialize this port */ 23668cd10891Snarayan vd_port = portp[idx]; 23678cd10891Snarayan srvr = kmem_zalloc(sizeof (vdc_server_t), KM_SLEEP); 23688cd10891Snarayan srvr->vdcp = vdc; 2369*00e3a3e9SAlexandre Chartre srvr->svc_state = VDC_SERVICE_OFFLINE; 2370*00e3a3e9SAlexandre Chartre srvr->log_state = VDC_SERVICE_NONE; 23718cd10891Snarayan 23728cd10891Snarayan /* get port id */ 23738cd10891Snarayan if (md_get_prop_val(mdp, vd_port, VDC_MD_ID, &srvr->id) != 0) { 23748cd10891Snarayan cmn_err(CE_NOTE, "vDisk port '%s' property not found", 23758cd10891Snarayan VDC_MD_ID); 23768cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 23778cd10891Snarayan continue; 23788cd10891Snarayan } 23798cd10891Snarayan 23808cd10891Snarayan /* set the connection timeout */ 23818cd10891Snarayan if (md_get_prop_val(mdp, vd_port, VDC_MD_TIMEOUT, 23828cd10891Snarayan &srvr->ctimeout) != 0) { 23838cd10891Snarayan srvr->ctimeout = 0; 23848cd10891Snarayan } 23858cd10891Snarayan 23868cd10891Snarayan /* get the ldc id */ 23878cd10891Snarayan num_chans = md_scan_dag(mdp, vd_port, 23881ae08745Sheppo md_find_name(mdp, VDC_MD_CHAN_NAME), 23891ae08745Sheppo md_find_name(mdp, "fwd"), chanp); 23901ae08745Sheppo 23911ae08745Sheppo /* expecting at least one channel */ 23921ae08745Sheppo if (num_chans <= 0) { 23931ae08745Sheppo cmn_err(CE_NOTE, "No '%s' node for '%s' port", 23941ae08745Sheppo VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 23958cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 23968cd10891Snarayan continue; 23971ae08745Sheppo } else if (num_chans != 1) { 23988cd10891Snarayan DMSGX(0, "Expected 1 '%s' node for '%s' port, " 23998cd10891Snarayan "found %d\n", VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 24008cd10891Snarayan num_chans); 24011ae08745Sheppo } 24021ae08745Sheppo 24031ae08745Sheppo /* 24041ae08745Sheppo * We use the first channel found (index 0), irrespective of how 24051ae08745Sheppo * many are there in total. 24061ae08745Sheppo */ 24078cd10891Snarayan if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, 24088cd10891Snarayan &srvr->ldc_id) != 0) { 24098cd10891Snarayan cmn_err(CE_NOTE, "Channel '%s' property not found", 24108cd10891Snarayan VDC_MD_ID); 24118cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 24128cd10891Snarayan continue; 24138cd10891Snarayan } 24148cd10891Snarayan 24158cd10891Snarayan /* 24168cd10891Snarayan * now initialise LDC channel which will be used to 24178cd10891Snarayan * communicate with this server 24188cd10891Snarayan */ 24198cd10891Snarayan if (vdc_do_ldc_init(vdc, srvr) != 0) { 24208cd10891Snarayan kmem_free(srvr, sizeof (vdc_server_t)); 24218cd10891Snarayan continue; 24228cd10891Snarayan } 24238cd10891Snarayan 24248cd10891Snarayan /* add server to list */ 2425d7400d00Sachartre if (prev_srvr) 24268cd10891Snarayan prev_srvr->next = srvr; 2427d7400d00Sachartre else 24288cd10891Snarayan vdc->server_list = srvr; 2429d7400d00Sachartre 24308cd10891Snarayan prev_srvr = srvr; 24318cd10891Snarayan 24328cd10891Snarayan /* inc numbers of servers */ 24338cd10891Snarayan vdc->num_servers++; 24348cd10891Snarayan } 24358cd10891Snarayan 24368cd10891Snarayan /* 24378cd10891Snarayan * Adjust the max number of handshake retries to match 24388cd10891Snarayan * the number of vdisk servers. 24398cd10891Snarayan */ 24408cd10891Snarayan if (vdc_hshake_retries < vdc->num_servers) 24418cd10891Snarayan vdc_hshake_retries = vdc->num_servers; 24428cd10891Snarayan 24438cd10891Snarayan /* pick first server as current server */ 24448cd10891Snarayan if (vdc->server_list != NULL) { 24458cd10891Snarayan vdc->curr_server = vdc->server_list; 24468cd10891Snarayan status = 0; 24478cd10891Snarayan } else { 24481ae08745Sheppo status = ENOENT; 24491ae08745Sheppo } 24501ae08745Sheppo 24511ae08745Sheppo done: 24521ae08745Sheppo kmem_free(chanp, listsz); 24538cd10891Snarayan kmem_free(portp, listsz); 24541ae08745Sheppo return (status); 24551ae08745Sheppo } 24561ae08745Sheppo 24578cd10891Snarayan 24588cd10891Snarayan /* 24598cd10891Snarayan * Function: 24608cd10891Snarayan * vdc_do_ldc_up 24618cd10891Snarayan * 24628cd10891Snarayan * Description: 24638cd10891Snarayan * Bring the channel for the current server up. 24648cd10891Snarayan * 24658cd10891Snarayan * Arguments: 24668cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 24678cd10891Snarayan * 24688cd10891Snarayan * Return Code: 24698cd10891Snarayan * 0 - Success. 24708cd10891Snarayan * EINVAL - Driver is detaching / LDC error 24718cd10891Snarayan * ECONNREFUSED - Other end is not listening 24728cd10891Snarayan */ 24730a55fbb7Slm66018 static int 24740a55fbb7Slm66018 vdc_do_ldc_up(vdc_t *vdc) 24750a55fbb7Slm66018 { 24760a55fbb7Slm66018 int status; 24773af08d82Slm66018 ldc_status_t ldc_state; 24780a55fbb7Slm66018 24798cd10891Snarayan ASSERT(MUTEX_HELD(&vdc->lock)); 24808cd10891Snarayan 24813af08d82Slm66018 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 24828cd10891Snarayan vdc->instance, vdc->curr_server->ldc_id); 24833af08d82Slm66018 24843af08d82Slm66018 if (vdc->lifecycle == VDC_LC_DETACHING) 24853af08d82Slm66018 return (EINVAL); 24860a55fbb7Slm66018 24878cd10891Snarayan if ((status = ldc_up(vdc->curr_server->ldc_handle)) != 0) { 24880a55fbb7Slm66018 switch (status) { 24890a55fbb7Slm66018 case ECONNREFUSED: /* listener not ready at other end */ 24903af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 24918cd10891Snarayan vdc->instance, vdc->curr_server->ldc_id, status); 24920a55fbb7Slm66018 status = 0; 24930a55fbb7Slm66018 break; 24940a55fbb7Slm66018 default: 24953af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 24968cd10891Snarayan "channel=%ld, err=%d", vdc->instance, 24978cd10891Snarayan vdc->curr_server->ldc_id, status); 24983af08d82Slm66018 break; 24993af08d82Slm66018 } 25003af08d82Slm66018 } 25013af08d82Slm66018 25028cd10891Snarayan if (ldc_status(vdc->curr_server->ldc_handle, &ldc_state) == 0) { 25038cd10891Snarayan vdc->curr_server->ldc_state = ldc_state; 25043af08d82Slm66018 if (ldc_state == LDC_UP) { 25053af08d82Slm66018 DMSG(vdc, 0, "[%d] LDC channel already up\n", 25063af08d82Slm66018 vdc->instance); 25073af08d82Slm66018 vdc->seq_num = 1; 25083af08d82Slm66018 vdc->seq_num_reply = 0; 25090a55fbb7Slm66018 } 25100a55fbb7Slm66018 } 25110a55fbb7Slm66018 25120a55fbb7Slm66018 return (status); 25130a55fbb7Slm66018 } 25140a55fbb7Slm66018 25150a55fbb7Slm66018 /* 25160a55fbb7Slm66018 * Function: 25170a55fbb7Slm66018 * vdc_terminate_ldc() 25180a55fbb7Slm66018 * 25190a55fbb7Slm66018 * Description: 25200a55fbb7Slm66018 * 25210a55fbb7Slm66018 * Arguments: 25220a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 25238cd10891Snarayan * srvr - vdc per-server info structure 25240a55fbb7Slm66018 * 25250a55fbb7Slm66018 * Return Code: 25260a55fbb7Slm66018 * None 25270a55fbb7Slm66018 */ 25281ae08745Sheppo static void 25298cd10891Snarayan vdc_terminate_ldc(vdc_t *vdc, vdc_server_t *srvr) 25301ae08745Sheppo { 25311ae08745Sheppo int instance = ddi_get_instance(vdc->dip); 25321ae08745Sheppo 25338cd10891Snarayan if (srvr->state & VDC_LDC_OPEN) { 25348cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 25358cd10891Snarayan (void) ldc_close(srvr->ldc_handle); 25368cd10891Snarayan } 25378cd10891Snarayan if (srvr->state & VDC_LDC_CB) { 25388cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 25398cd10891Snarayan (void) ldc_unreg_callback(srvr->ldc_handle); 25408cd10891Snarayan } 25418cd10891Snarayan if (srvr->state & VDC_LDC_INIT) { 25428cd10891Snarayan DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 25438cd10891Snarayan (void) ldc_fini(srvr->ldc_handle); 25448cd10891Snarayan srvr->ldc_handle = NULL; 25458cd10891Snarayan } 25468cd10891Snarayan 25478cd10891Snarayan srvr->state &= ~(VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN); 25488cd10891Snarayan } 25498cd10891Snarayan 25508cd10891Snarayan /* 25518cd10891Snarayan * Function: 25528cd10891Snarayan * vdc_fini_ports() 25538cd10891Snarayan * 25548cd10891Snarayan * Description: 25558cd10891Snarayan * Finalize all ports by closing the channel associated with each 25568cd10891Snarayan * port and also freeing the server structure. 25578cd10891Snarayan * 25588cd10891Snarayan * Arguments: 25598cd10891Snarayan * vdc - soft state pointer for this instance of the device driver. 25608cd10891Snarayan * 25618cd10891Snarayan * Return Code: 25628cd10891Snarayan * None 25638cd10891Snarayan */ 25648cd10891Snarayan static void 25658cd10891Snarayan vdc_fini_ports(vdc_t *vdc) 25668cd10891Snarayan { 25678cd10891Snarayan int instance = ddi_get_instance(vdc->dip); 25688cd10891Snarayan vdc_server_t *srvr, *prev_srvr; 25698cd10891Snarayan 25701ae08745Sheppo ASSERT(vdc != NULL); 25711ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 25721ae08745Sheppo 25733af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 25741ae08745Sheppo 25758cd10891Snarayan srvr = vdc->server_list; 25768cd10891Snarayan 25778cd10891Snarayan while (srvr) { 25788cd10891Snarayan 25798cd10891Snarayan vdc_terminate_ldc(vdc, srvr); 25808cd10891Snarayan 25818cd10891Snarayan /* next server */ 25828cd10891Snarayan prev_srvr = srvr; 25838cd10891Snarayan srvr = srvr->next; 25848cd10891Snarayan 25858cd10891Snarayan /* free server */ 25868cd10891Snarayan kmem_free(prev_srvr, sizeof (vdc_server_t)); 25871ae08745Sheppo } 25881ae08745Sheppo 25898cd10891Snarayan vdc->server_list = NULL; 2590*00e3a3e9SAlexandre Chartre vdc->num_servers = 0; 25911ae08745Sheppo } 25921ae08745Sheppo 25931ae08745Sheppo /* -------------------------------------------------------------------------- */ 25941ae08745Sheppo 25951ae08745Sheppo /* 25961ae08745Sheppo * Descriptor Ring helper routines 25971ae08745Sheppo */ 25981ae08745Sheppo 25990a55fbb7Slm66018 /* 26000a55fbb7Slm66018 * Function: 26010a55fbb7Slm66018 * vdc_init_descriptor_ring() 26020a55fbb7Slm66018 * 26030a55fbb7Slm66018 * Description: 26040a55fbb7Slm66018 * 26050a55fbb7Slm66018 * Arguments: 26060a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 26070a55fbb7Slm66018 * 26080a55fbb7Slm66018 * Return Code: 26090a55fbb7Slm66018 * 0 - Success 26100a55fbb7Slm66018 */ 26111ae08745Sheppo static int 26121ae08745Sheppo vdc_init_descriptor_ring(vdc_t *vdc) 26131ae08745Sheppo { 26141ae08745Sheppo vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 26150a55fbb7Slm66018 int status = 0; 26161ae08745Sheppo int i; 26171ae08745Sheppo 26183af08d82Slm66018 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 26191ae08745Sheppo 26201ae08745Sheppo ASSERT(vdc != NULL); 26211ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 26221ae08745Sheppo 2623e1ebb9ecSlm66018 /* ensure we have enough room to store max sized block */ 2624e1ebb9ecSlm66018 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2625e1ebb9ecSlm66018 26260a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 26273af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2628e1ebb9ecSlm66018 /* 2629e1ebb9ecSlm66018 * Calculate the maximum block size we can transmit using one 2630e1ebb9ecSlm66018 * Descriptor Ring entry from the attributes returned by the 2631e1ebb9ecSlm66018 * vDisk server. This is subject to a minimum of 'maxphys' 2632e1ebb9ecSlm66018 * as we do not have the capability to split requests over 2633e1ebb9ecSlm66018 * multiple DRing entries. 2634e1ebb9ecSlm66018 */ 263565908c77Syu, larry liu - Sun Microsystems - Beijing China if ((vdc->max_xfer_sz * vdc->vdisk_bsize) < maxphys) { 26363af08d82Slm66018 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2637e1ebb9ecSlm66018 vdc->instance); 2638e1ebb9ecSlm66018 vdc->dring_max_cookies = maxphys / PAGESIZE; 2639e1ebb9ecSlm66018 } else { 2640e1ebb9ecSlm66018 vdc->dring_max_cookies = 264165908c77Syu, larry liu - Sun Microsystems - Beijing China (vdc->max_xfer_sz * vdc->vdisk_bsize) / PAGESIZE; 2642e1ebb9ecSlm66018 } 2643e1ebb9ecSlm66018 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2644e1ebb9ecSlm66018 (sizeof (ldc_mem_cookie_t) * 2645e1ebb9ecSlm66018 (vdc->dring_max_cookies - 1))); 2646e1ebb9ecSlm66018 vdc->dring_len = VD_DRING_LEN; 2647e1ebb9ecSlm66018 2648e1ebb9ecSlm66018 status = ldc_mem_dring_create(vdc->dring_len, 26498cd10891Snarayan vdc->dring_entry_size, &vdc->dring_hdl); 26508cd10891Snarayan if ((vdc->dring_hdl == NULL) || (status != 0)) { 26513af08d82Slm66018 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2652e1ebb9ecSlm66018 vdc->instance); 26531ae08745Sheppo return (status); 26541ae08745Sheppo } 26550a55fbb7Slm66018 vdc->initialized |= VDC_DRING_INIT; 26560a55fbb7Slm66018 } 26571ae08745Sheppo 26580a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 26593af08d82Slm66018 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 26600a55fbb7Slm66018 vdc->dring_cookie = 26610a55fbb7Slm66018 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 26621ae08745Sheppo 26638cd10891Snarayan status = ldc_mem_dring_bind(vdc->curr_server->ldc_handle, 26648cd10891Snarayan vdc->dring_hdl, 26654bac2208Snarayan LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 26660a55fbb7Slm66018 &vdc->dring_cookie[0], 26671ae08745Sheppo &vdc->dring_cookie_count); 26681ae08745Sheppo if (status != 0) { 26693af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 26703af08d82Slm66018 "(%lx) to channel (%lx) status=%d\n", 26718cd10891Snarayan vdc->instance, vdc->dring_hdl, 26728cd10891Snarayan vdc->curr_server->ldc_handle, status); 26731ae08745Sheppo return (status); 26741ae08745Sheppo } 26751ae08745Sheppo ASSERT(vdc->dring_cookie_count == 1); 26761ae08745Sheppo vdc->initialized |= VDC_DRING_BOUND; 26770a55fbb7Slm66018 } 26781ae08745Sheppo 26798cd10891Snarayan status = ldc_mem_dring_info(vdc->dring_hdl, &vdc->dring_mem_info); 26801ae08745Sheppo if (status != 0) { 26813af08d82Slm66018 DMSG(vdc, 0, 26823af08d82Slm66018 "[%d] Failed to get info for descriptor ring (%lx)\n", 26838cd10891Snarayan vdc->instance, vdc->dring_hdl); 26841ae08745Sheppo return (status); 26851ae08745Sheppo } 26861ae08745Sheppo 26870a55fbb7Slm66018 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 26883af08d82Slm66018 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 26890a55fbb7Slm66018 26901ae08745Sheppo /* Allocate the local copy of this dring */ 26910a55fbb7Slm66018 vdc->local_dring = 2692e1ebb9ecSlm66018 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 26931ae08745Sheppo KM_SLEEP); 26941ae08745Sheppo vdc->initialized |= VDC_DRING_LOCAL; 26950a55fbb7Slm66018 } 26961ae08745Sheppo 26971ae08745Sheppo /* 26980a55fbb7Slm66018 * Mark all DRing entries as free and initialize the private 26990a55fbb7Slm66018 * descriptor's memory handles. If any entry is initialized, 27000a55fbb7Slm66018 * we need to free it later so we set the bit in 'initialized' 27010a55fbb7Slm66018 * at the start. 27021ae08745Sheppo */ 27031ae08745Sheppo vdc->initialized |= VDC_DRING_ENTRY; 2704e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 27051ae08745Sheppo dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 27061ae08745Sheppo dep->hdr.dstate = VIO_DESC_FREE; 27071ae08745Sheppo 27088cd10891Snarayan status = ldc_mem_alloc_handle(vdc->curr_server->ldc_handle, 27091ae08745Sheppo &vdc->local_dring[i].desc_mhdl); 27101ae08745Sheppo if (status != 0) { 27113af08d82Slm66018 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 27121ae08745Sheppo " descriptor %d", vdc->instance, i); 27131ae08745Sheppo return (status); 27141ae08745Sheppo } 27153af08d82Slm66018 vdc->local_dring[i].is_free = B_TRUE; 27161ae08745Sheppo vdc->local_dring[i].dep = dep; 27171ae08745Sheppo } 27181ae08745Sheppo 27193af08d82Slm66018 /* Initialize the starting index */ 27203af08d82Slm66018 vdc->dring_curr_idx = 0; 27211ae08745Sheppo 27221ae08745Sheppo return (status); 27231ae08745Sheppo } 27241ae08745Sheppo 27250a55fbb7Slm66018 /* 27260a55fbb7Slm66018 * Function: 27270a55fbb7Slm66018 * vdc_destroy_descriptor_ring() 27280a55fbb7Slm66018 * 27290a55fbb7Slm66018 * Description: 27300a55fbb7Slm66018 * 27310a55fbb7Slm66018 * Arguments: 27320a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 27330a55fbb7Slm66018 * 27340a55fbb7Slm66018 * Return Code: 27350a55fbb7Slm66018 * None 27360a55fbb7Slm66018 */ 27371ae08745Sheppo static void 27381ae08745Sheppo vdc_destroy_descriptor_ring(vdc_t *vdc) 27391ae08745Sheppo { 27400a55fbb7Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 27411ae08745Sheppo ldc_mem_handle_t mhdl = NULL; 27423af08d82Slm66018 ldc_mem_info_t minfo; 27431ae08745Sheppo int status = -1; 27441ae08745Sheppo int i; /* loop */ 27451ae08745Sheppo 27461ae08745Sheppo ASSERT(vdc != NULL); 27471ae08745Sheppo ASSERT(mutex_owned(&vdc->lock)); 27481ae08745Sheppo 27493af08d82Slm66018 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 27501ae08745Sheppo 27511ae08745Sheppo if (vdc->initialized & VDC_DRING_ENTRY) { 27523af08d82Slm66018 DMSG(vdc, 0, 27533af08d82Slm66018 "[%d] Removing Local DRing entries\n", vdc->instance); 2754e1ebb9ecSlm66018 for (i = 0; i < vdc->dring_len; i++) { 27550a55fbb7Slm66018 ldep = &vdc->local_dring[i]; 27560a55fbb7Slm66018 mhdl = ldep->desc_mhdl; 27571ae08745Sheppo 27580a55fbb7Slm66018 if (mhdl == NULL) 27590a55fbb7Slm66018 continue; 27600a55fbb7Slm66018 27613af08d82Slm66018 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 27623af08d82Slm66018 DMSG(vdc, 0, 27633af08d82Slm66018 "ldc_mem_info returned an error: %d\n", 27643af08d82Slm66018 status); 27653af08d82Slm66018 27663af08d82Slm66018 /* 27673af08d82Slm66018 * This must mean that the mem handle 27683af08d82Slm66018 * is not valid. Clear it out so that 27693af08d82Slm66018 * no one tries to use it. 27703af08d82Slm66018 */ 27713af08d82Slm66018 ldep->desc_mhdl = NULL; 27723af08d82Slm66018 continue; 27733af08d82Slm66018 } 27743af08d82Slm66018 27753af08d82Slm66018 if (minfo.status == LDC_BOUND) { 27763af08d82Slm66018 (void) ldc_mem_unbind_handle(mhdl); 27773af08d82Slm66018 } 27783af08d82Slm66018 27791ae08745Sheppo (void) ldc_mem_free_handle(mhdl); 27803af08d82Slm66018 27813af08d82Slm66018 ldep->desc_mhdl = NULL; 27821ae08745Sheppo } 27831ae08745Sheppo vdc->initialized &= ~VDC_DRING_ENTRY; 27841ae08745Sheppo } 27851ae08745Sheppo 27861ae08745Sheppo if (vdc->initialized & VDC_DRING_LOCAL) { 27873af08d82Slm66018 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 27881ae08745Sheppo kmem_free(vdc->local_dring, 2789e1ebb9ecSlm66018 vdc->dring_len * sizeof (vdc_local_desc_t)); 27901ae08745Sheppo vdc->initialized &= ~VDC_DRING_LOCAL; 27911ae08745Sheppo } 27921ae08745Sheppo 27931ae08745Sheppo if (vdc->initialized & VDC_DRING_BOUND) { 27943af08d82Slm66018 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 27958cd10891Snarayan status = ldc_mem_dring_unbind(vdc->dring_hdl); 27961ae08745Sheppo if (status == 0) { 27971ae08745Sheppo vdc->initialized &= ~VDC_DRING_BOUND; 27981ae08745Sheppo } else { 27993af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 28008cd10891Snarayan vdc->instance, status, vdc->dring_hdl); 28011ae08745Sheppo } 28023af08d82Slm66018 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 28031ae08745Sheppo } 28041ae08745Sheppo 28051ae08745Sheppo if (vdc->initialized & VDC_DRING_INIT) { 28063af08d82Slm66018 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 28078cd10891Snarayan status = ldc_mem_dring_destroy(vdc->dring_hdl); 28081ae08745Sheppo if (status == 0) { 28098cd10891Snarayan vdc->dring_hdl = NULL; 28101ae08745Sheppo bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 28111ae08745Sheppo vdc->initialized &= ~VDC_DRING_INIT; 28121ae08745Sheppo } else { 28133af08d82Slm66018 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 28148cd10891Snarayan vdc->instance, status, vdc->dring_hdl); 28151ae08745Sheppo } 28161ae08745Sheppo } 28171ae08745Sheppo } 28181ae08745Sheppo 28191ae08745Sheppo /* 28203af08d82Slm66018 * Function: 282190e2f9dcSlm66018 * vdc_map_to_shared_dring() 28221ae08745Sheppo * 28231ae08745Sheppo * Description: 28243af08d82Slm66018 * Copy contents of the local descriptor to the shared 28253af08d82Slm66018 * memory descriptor. 28261ae08745Sheppo * 28273af08d82Slm66018 * Arguments: 28283af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 28293af08d82Slm66018 * idx - descriptor ring index 28303af08d82Slm66018 * 28313af08d82Slm66018 * Return Code: 28323af08d82Slm66018 * None 28331ae08745Sheppo */ 28341ae08745Sheppo static int 28353af08d82Slm66018 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 28361ae08745Sheppo { 28373af08d82Slm66018 vdc_local_desc_t *ldep; 28383af08d82Slm66018 vd_dring_entry_t *dep; 28393af08d82Slm66018 int rv; 28401ae08745Sheppo 28413af08d82Slm66018 ldep = &(vdcp->local_dring[idx]); 28421ae08745Sheppo 28433af08d82Slm66018 /* for now leave in the old pop_mem_hdl stuff */ 28443af08d82Slm66018 if (ldep->nbytes > 0) { 28453af08d82Slm66018 rv = vdc_populate_mem_hdl(vdcp, ldep); 28463af08d82Slm66018 if (rv) { 28473af08d82Slm66018 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 28483af08d82Slm66018 vdcp->instance); 28493af08d82Slm66018 return (rv); 28503af08d82Slm66018 } 28513af08d82Slm66018 } 28521ae08745Sheppo 28533af08d82Slm66018 /* 28543af08d82Slm66018 * fill in the data details into the DRing 28553af08d82Slm66018 */ 2856d10e4ef2Snarayan dep = ldep->dep; 28571ae08745Sheppo ASSERT(dep != NULL); 28581ae08745Sheppo 28593af08d82Slm66018 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 28603af08d82Slm66018 dep->payload.operation = ldep->operation; 28613af08d82Slm66018 dep->payload.addr = ldep->offset; 28623af08d82Slm66018 dep->payload.nbytes = ldep->nbytes; 2863055d7c80Scarlsonj dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 28643af08d82Slm66018 dep->payload.slice = ldep->slice; 28653af08d82Slm66018 dep->hdr.dstate = VIO_DESC_READY; 28663af08d82Slm66018 dep->hdr.ack = 1; /* request an ACK for every message */ 28671ae08745Sheppo 28683af08d82Slm66018 return (0); 28691ae08745Sheppo } 28701ae08745Sheppo 28711ae08745Sheppo /* 28721ae08745Sheppo * Function: 28733af08d82Slm66018 * vdc_send_request 28743af08d82Slm66018 * 28753af08d82Slm66018 * Description: 28763af08d82Slm66018 * This routine writes the data to be transmitted to vds into the 28773af08d82Slm66018 * descriptor, notifies vds that the ring has been updated and 28783af08d82Slm66018 * then waits for the request to be processed. 28793af08d82Slm66018 * 28803af08d82Slm66018 * Arguments: 28813af08d82Slm66018 * vdcp - the soft state pointer 28823af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 28833af08d82Slm66018 * addr - address of data buf to be read/written. 28843af08d82Slm66018 * nbytes - number of bytes to read/write 28853af08d82Slm66018 * slice - the disk slice this request is for 28863af08d82Slm66018 * offset - relative disk offset 2887*00e3a3e9SAlexandre Chartre * bufp - buf of operation 28883af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 28893af08d82Slm66018 * 28903af08d82Slm66018 * Return Codes: 28913af08d82Slm66018 * 0 28923af08d82Slm66018 * ENXIO 28933af08d82Slm66018 */ 28943af08d82Slm66018 static int 28953af08d82Slm66018 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2896*00e3a3e9SAlexandre Chartre size_t nbytes, int slice, diskaddr_t offset, buf_t *bufp, 2897*00e3a3e9SAlexandre Chartre vio_desc_direction_t dir, int flags) 28983af08d82Slm66018 { 2899366a92acSlm66018 int rv = 0; 2900366a92acSlm66018 29013af08d82Slm66018 ASSERT(vdcp != NULL); 290287a7269eSachartre ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 29033af08d82Slm66018 29043af08d82Slm66018 mutex_enter(&vdcp->lock); 29053af08d82Slm66018 2906366a92acSlm66018 /* 2907366a92acSlm66018 * If this is a block read/write operation we update the I/O statistics 2908366a92acSlm66018 * to indicate that the request is being put on the waitq to be 2909366a92acSlm66018 * serviced. 2910366a92acSlm66018 * 2911366a92acSlm66018 * We do it here (a common routine for both synchronous and strategy 2912366a92acSlm66018 * calls) for performance reasons - we are already holding vdc->lock 2913366a92acSlm66018 * so there is no extra locking overhead. We would have to explicitly 2914366a92acSlm66018 * grab the 'lock' mutex to update the stats if we were to do this 2915366a92acSlm66018 * higher up the stack in vdc_strategy() et. al. 2916366a92acSlm66018 */ 2917366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2918*00e3a3e9SAlexandre Chartre DTRACE_IO1(start, buf_t *, bufp); 291990e2f9dcSlm66018 VD_KSTAT_WAITQ_ENTER(vdcp); 2920366a92acSlm66018 } 2921366a92acSlm66018 2922*00e3a3e9SAlexandre Chartre /* 2923*00e3a3e9SAlexandre Chartre * If the request does not expect the state to be VDC_STATE_RUNNING 2924*00e3a3e9SAlexandre Chartre * then we just try to populate the descriptor ring once. 2925*00e3a3e9SAlexandre Chartre */ 2926*00e3a3e9SAlexandre Chartre if (!(flags & VDC_OP_STATE_RUNNING)) { 2927*00e3a3e9SAlexandre Chartre rv = vdc_populate_descriptor(vdcp, operation, addr, 2928*00e3a3e9SAlexandre Chartre nbytes, slice, offset, bufp, dir, flags); 2929*00e3a3e9SAlexandre Chartre goto done; 2930*00e3a3e9SAlexandre Chartre } 2931*00e3a3e9SAlexandre Chartre 29323af08d82Slm66018 do { 29333c96341aSnarayan while (vdcp->state != VDC_STATE_RUNNING) { 29343af08d82Slm66018 29353c96341aSnarayan /* return error if detaching */ 29363c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 2937366a92acSlm66018 rv = ENXIO; 2938366a92acSlm66018 goto done; 29393c96341aSnarayan } 2940655fd6a9Sachartre 29412f5224aeSachartre /* 29422f5224aeSachartre * If we are panicking and the disk is not ready then 29432f5224aeSachartre * we can't send any request because we can't complete 29442f5224aeSachartre * the handshake now. 29452f5224aeSachartre */ 29462f5224aeSachartre if (ddi_in_panic()) { 2947366a92acSlm66018 rv = EIO; 2948366a92acSlm66018 goto done; 29492f5224aeSachartre } 29502f5224aeSachartre 2951*00e3a3e9SAlexandre Chartre /* 2952*00e3a3e9SAlexandre Chartre * If the state is faulted, notify that a new I/O is 2953*00e3a3e9SAlexandre Chartre * being submitted to force the system to check if any 2954*00e3a3e9SAlexandre Chartre * server has recovered. 2955*00e3a3e9SAlexandre Chartre */ 2956*00e3a3e9SAlexandre Chartre if (vdcp->state == VDC_STATE_FAILED) { 2957*00e3a3e9SAlexandre Chartre vdcp->io_pending = B_TRUE; 2958*00e3a3e9SAlexandre Chartre cv_signal(&vdcp->io_pending_cv); 2959*00e3a3e9SAlexandre Chartre } 2960*00e3a3e9SAlexandre Chartre 2961655fd6a9Sachartre cv_wait(&vdcp->running_cv, &vdcp->lock); 2962*00e3a3e9SAlexandre Chartre 2963*00e3a3e9SAlexandre Chartre /* if service is still faulted then fail the request */ 2964*00e3a3e9SAlexandre Chartre if (vdcp->state == VDC_STATE_FAILED) { 2965*00e3a3e9SAlexandre Chartre rv = EIO; 2966*00e3a3e9SAlexandre Chartre goto done; 2967*00e3a3e9SAlexandre Chartre } 29683c96341aSnarayan } 29693c96341aSnarayan 29703af08d82Slm66018 } while (vdc_populate_descriptor(vdcp, operation, addr, 2971*00e3a3e9SAlexandre Chartre nbytes, slice, offset, bufp, dir, flags)); 29723af08d82Slm66018 2973366a92acSlm66018 done: 2974366a92acSlm66018 /* 2975366a92acSlm66018 * If this is a block read/write we update the I/O statistics kstat 2976366a92acSlm66018 * to indicate that this request has been placed on the queue for 2977366a92acSlm66018 * processing (i.e sent to the vDisk server) - iostat(1M) will 2978366a92acSlm66018 * report the time waiting for the vDisk server under the %b column 2979366a92acSlm66018 * In the case of an error we simply take it off the wait queue. 2980366a92acSlm66018 */ 2981366a92acSlm66018 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2982366a92acSlm66018 if (rv == 0) { 298390e2f9dcSlm66018 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 2984*00e3a3e9SAlexandre Chartre DTRACE_PROBE1(send, buf_t *, bufp); 2985366a92acSlm66018 } else { 2986366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 298790e2f9dcSlm66018 VD_KSTAT_WAITQ_EXIT(vdcp); 2988*00e3a3e9SAlexandre Chartre DTRACE_IO1(done, buf_t *, bufp); 2989366a92acSlm66018 } 2990366a92acSlm66018 } 2991366a92acSlm66018 29923af08d82Slm66018 mutex_exit(&vdcp->lock); 2993366a92acSlm66018 2994366a92acSlm66018 return (rv); 29953af08d82Slm66018 } 29963af08d82Slm66018 29973af08d82Slm66018 29983af08d82Slm66018 /* 29993af08d82Slm66018 * Function: 30001ae08745Sheppo * vdc_populate_descriptor 30011ae08745Sheppo * 30021ae08745Sheppo * Description: 30031ae08745Sheppo * This routine writes the data to be transmitted to vds into the 30041ae08745Sheppo * descriptor, notifies vds that the ring has been updated and 30051ae08745Sheppo * then waits for the request to be processed. 30061ae08745Sheppo * 30071ae08745Sheppo * Arguments: 30083af08d82Slm66018 * vdcp - the soft state pointer 30091ae08745Sheppo * operation - operation we want vds to perform (VD_OP_XXX) 30103af08d82Slm66018 * addr - address of data buf to be read/written. 30113af08d82Slm66018 * nbytes - number of bytes to read/write 30123af08d82Slm66018 * slice - the disk slice this request is for 30133af08d82Slm66018 * offset - relative disk offset 3014*00e3a3e9SAlexandre Chartre * bufp - buf of operation 30153af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 30161ae08745Sheppo * 30171ae08745Sheppo * Return Codes: 30181ae08745Sheppo * 0 30191ae08745Sheppo * EAGAIN 302017cadca8Slm66018 * ECONNRESET 30211ae08745Sheppo * ENXIO 30221ae08745Sheppo */ 30231ae08745Sheppo static int 30243af08d82Slm66018 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 3025*00e3a3e9SAlexandre Chartre size_t nbytes, int slice, diskaddr_t offset, 3026*00e3a3e9SAlexandre Chartre buf_t *bufp, vio_desc_direction_t dir, int flags) 30271ae08745Sheppo { 30283af08d82Slm66018 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 30293af08d82Slm66018 int idx; /* Index of DRing entry used */ 30303af08d82Slm66018 int next_idx; 30311ae08745Sheppo vio_dring_msg_t dmsg; 30323af08d82Slm66018 size_t msglen; 30338e6a2a04Slm66018 int rv; 30341ae08745Sheppo 30353af08d82Slm66018 ASSERT(MUTEX_HELD(&vdcp->lock)); 30363af08d82Slm66018 vdcp->threads_pending++; 30373af08d82Slm66018 loop: 30383af08d82Slm66018 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 30391ae08745Sheppo 30403af08d82Slm66018 /* Get next available D-Ring entry */ 30413af08d82Slm66018 idx = vdcp->dring_curr_idx; 30423af08d82Slm66018 local_dep = &(vdcp->local_dring[idx]); 30431ae08745Sheppo 30443af08d82Slm66018 if (!local_dep->is_free) { 30453af08d82Slm66018 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 30463af08d82Slm66018 vdcp->instance); 30473af08d82Slm66018 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 30483af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 30493af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 30503af08d82Slm66018 goto loop; 30513af08d82Slm66018 } 30523af08d82Slm66018 vdcp->threads_pending--; 30533af08d82Slm66018 return (ECONNRESET); 30541ae08745Sheppo } 30551ae08745Sheppo 30563af08d82Slm66018 next_idx = idx + 1; 30573af08d82Slm66018 if (next_idx >= vdcp->dring_len) 30583af08d82Slm66018 next_idx = 0; 30593af08d82Slm66018 vdcp->dring_curr_idx = next_idx; 30601ae08745Sheppo 30613af08d82Slm66018 ASSERT(local_dep->is_free); 30621ae08745Sheppo 30633af08d82Slm66018 local_dep->operation = operation; 3064d10e4ef2Snarayan local_dep->addr = addr; 30653af08d82Slm66018 local_dep->nbytes = nbytes; 30663af08d82Slm66018 local_dep->slice = slice; 30673af08d82Slm66018 local_dep->offset = offset; 3068*00e3a3e9SAlexandre Chartre local_dep->buf = bufp; 30693af08d82Slm66018 local_dep->dir = dir; 3070*00e3a3e9SAlexandre Chartre local_dep->flags = flags; 30713af08d82Slm66018 30723af08d82Slm66018 local_dep->is_free = B_FALSE; 30733af08d82Slm66018 30743af08d82Slm66018 rv = vdc_map_to_shared_dring(vdcp, idx); 30753af08d82Slm66018 if (rv) { 30763af08d82Slm66018 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 30773af08d82Slm66018 vdcp->instance); 30783af08d82Slm66018 /* free the descriptor */ 30793af08d82Slm66018 local_dep->is_free = B_TRUE; 30803af08d82Slm66018 vdcp->dring_curr_idx = idx; 30813af08d82Slm66018 cv_wait(&vdcp->membind_cv, &vdcp->lock); 30823af08d82Slm66018 if (vdcp->state == VDC_STATE_RUNNING || 30833af08d82Slm66018 vdcp->state == VDC_STATE_HANDLE_PENDING) { 30843af08d82Slm66018 goto loop; 30851ae08745Sheppo } 30863af08d82Slm66018 vdcp->threads_pending--; 30873af08d82Slm66018 return (ECONNRESET); 30881ae08745Sheppo } 30891ae08745Sheppo 30901ae08745Sheppo /* 30911ae08745Sheppo * Send a msg with the DRing details to vds 30921ae08745Sheppo */ 30931ae08745Sheppo VIO_INIT_DRING_DATA_TAG(dmsg); 30943af08d82Slm66018 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 30953af08d82Slm66018 dmsg.dring_ident = vdcp->dring_ident; 30961ae08745Sheppo dmsg.start_idx = idx; 30971ae08745Sheppo dmsg.end_idx = idx; 30983af08d82Slm66018 vdcp->seq_num++; 30991ae08745Sheppo 3100366a92acSlm66018 DTRACE_PROBE2(populate, int, vdcp->instance, 3101366a92acSlm66018 vdc_local_desc_t *, local_dep); 31023af08d82Slm66018 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 31033af08d82Slm66018 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 31041ae08745Sheppo 31053af08d82Slm66018 /* 31063af08d82Slm66018 * note we're still holding the lock here to 31073af08d82Slm66018 * make sure the message goes out in order !!!... 31083af08d82Slm66018 */ 31093af08d82Slm66018 msglen = sizeof (dmsg); 31103af08d82Slm66018 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 31113af08d82Slm66018 switch (rv) { 31123af08d82Slm66018 case ECONNRESET: 31133af08d82Slm66018 /* 31143af08d82Slm66018 * vdc_send initiates the reset on failure. 31153af08d82Slm66018 * Since the transaction has already been put 31163af08d82Slm66018 * on the local dring, it will automatically get 31173af08d82Slm66018 * retried when the channel is reset. Given that, 31183af08d82Slm66018 * it is ok to just return success even though the 31193af08d82Slm66018 * send failed. 31203af08d82Slm66018 */ 31213af08d82Slm66018 rv = 0; 31223af08d82Slm66018 break; 3123d10e4ef2Snarayan 31243af08d82Slm66018 case 0: /* EOK */ 31253af08d82Slm66018 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 31263af08d82Slm66018 break; 3127d10e4ef2Snarayan 31283af08d82Slm66018 default: 31293af08d82Slm66018 goto cleanup_and_exit; 31303af08d82Slm66018 } 3131e1ebb9ecSlm66018 31323af08d82Slm66018 vdcp->threads_pending--; 31333af08d82Slm66018 return (rv); 31343af08d82Slm66018 31353af08d82Slm66018 cleanup_and_exit: 31363af08d82Slm66018 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 31373af08d82Slm66018 return (ENXIO); 31381ae08745Sheppo } 31391ae08745Sheppo 31401ae08745Sheppo /* 31413af08d82Slm66018 * Function: 3142*00e3a3e9SAlexandre Chartre * vdc_do_op 3143*00e3a3e9SAlexandre Chartre * 3144*00e3a3e9SAlexandre Chartre * Description: 3145*00e3a3e9SAlexandre Chartre * Wrapper around vdc_submit_request(). Each request is associated with a 3146*00e3a3e9SAlexandre Chartre * buf structure. If a buf structure is provided (bufp != NULL) then the 3147*00e3a3e9SAlexandre Chartre * request will be submitted with that buf, and the caller can wait for 3148*00e3a3e9SAlexandre Chartre * completion of the request with biowait(). If a buf structure is not 3149*00e3a3e9SAlexandre Chartre * provided (bufp == NULL) then a buf structure is created and the function 3150*00e3a3e9SAlexandre Chartre * waits for the completion of the request. 3151*00e3a3e9SAlexandre Chartre * 3152*00e3a3e9SAlexandre Chartre * If the flag VD_OP_STATE_RUNNING is set then vdc_submit_request() will 3153*00e3a3e9SAlexandre Chartre * submit the request only when the vdisk is in state VD_STATE_RUNNING. 3154*00e3a3e9SAlexandre Chartre * If the vdisk is not in that state then the vdc_submit_request() will 3155*00e3a3e9SAlexandre Chartre * wait for that state to be reached. After the request is submitted, the 3156*00e3a3e9SAlexandre Chartre * reply will be processed asynchronously by the vdc_process_msg_thread() 3157*00e3a3e9SAlexandre Chartre * thread. 3158*00e3a3e9SAlexandre Chartre * 3159*00e3a3e9SAlexandre Chartre * If the flag VD_OP_STATE_RUNNING is not set then vdc_submit_request() 3160*00e3a3e9SAlexandre Chartre * submit the request whatever the state of the vdisk is. Then vdc_do_op() 3161*00e3a3e9SAlexandre Chartre * will wait for a reply message, process the reply and complete the 3162*00e3a3e9SAlexandre Chartre * request. 3163*00e3a3e9SAlexandre Chartre * 3164*00e3a3e9SAlexandre Chartre * Arguments: 3165*00e3a3e9SAlexandre Chartre * vdc - the soft state pointer 3166*00e3a3e9SAlexandre Chartre * op - operation we want vds to perform (VD_OP_XXX) 3167*00e3a3e9SAlexandre Chartre * addr - address of data buf to be read/written. 3168*00e3a3e9SAlexandre Chartre * nbytes - number of bytes to read/write 3169*00e3a3e9SAlexandre Chartre * slice - the disk slice this request is for 3170*00e3a3e9SAlexandre Chartre * offset - relative disk offset 3171*00e3a3e9SAlexandre Chartre * bufp - buf structure associated with the request (can be NULL). 3172*00e3a3e9SAlexandre Chartre * dir - direction of operation (READ/WRITE/BOTH) 3173*00e3a3e9SAlexandre Chartre * flags - flags for the request. 3174*00e3a3e9SAlexandre Chartre * 3175*00e3a3e9SAlexandre Chartre * Return Codes: 3176*00e3a3e9SAlexandre Chartre * 0 - the request has been succesfully submitted and completed. 3177*00e3a3e9SAlexandre Chartre * != 0 - the request has failed. In that case, if a buf structure 3178*00e3a3e9SAlexandre Chartre * was provided (bufp != NULL) then the B_ERROR flag is set 3179*00e3a3e9SAlexandre Chartre * and the b_error field of the buf structure is set to EIO. 3180*00e3a3e9SAlexandre Chartre */ 3181*00e3a3e9SAlexandre Chartre static int 3182*00e3a3e9SAlexandre Chartre vdc_do_op(vdc_t *vdc, int op, caddr_t addr, size_t nbytes, int slice, 3183*00e3a3e9SAlexandre Chartre diskaddr_t offset, struct buf *bufp, vio_desc_direction_t dir, int flags) 3184*00e3a3e9SAlexandre Chartre { 3185*00e3a3e9SAlexandre Chartre vio_msg_t vio_msg; 3186*00e3a3e9SAlexandre Chartre struct buf buf; 3187*00e3a3e9SAlexandre Chartre int rv; 3188*00e3a3e9SAlexandre Chartre 3189*00e3a3e9SAlexandre Chartre if (bufp == NULL) { 3190*00e3a3e9SAlexandre Chartre /* 3191*00e3a3e9SAlexandre Chartre * We use buf just as a convenient way to get a notification 3192*00e3a3e9SAlexandre Chartre * that the request is completed, so we initialize buf to the 3193*00e3a3e9SAlexandre Chartre * minimum we need. 3194*00e3a3e9SAlexandre Chartre */ 3195*00e3a3e9SAlexandre Chartre bioinit(&buf); 3196*00e3a3e9SAlexandre Chartre buf.b_bcount = nbytes; 3197*00e3a3e9SAlexandre Chartre buf.b_flags = B_BUSY; 3198*00e3a3e9SAlexandre Chartre bufp = &buf; 3199*00e3a3e9SAlexandre Chartre } 3200*00e3a3e9SAlexandre Chartre 3201*00e3a3e9SAlexandre Chartre rv = vdc_send_request(vdc, op, addr, nbytes, slice, offset, bufp, 3202*00e3a3e9SAlexandre Chartre dir, flags); 3203*00e3a3e9SAlexandre Chartre 3204*00e3a3e9SAlexandre Chartre if (rv != 0) 3205*00e3a3e9SAlexandre Chartre goto done; 3206*00e3a3e9SAlexandre Chartre 3207*00e3a3e9SAlexandre Chartre /* 3208*00e3a3e9SAlexandre Chartre * If the request should be done in VDC_STATE_RUNNING state then the 3209*00e3a3e9SAlexandre Chartre * reply will be received and processed by vdc_process_msg_thread() 3210*00e3a3e9SAlexandre Chartre * and we just have to handle the panic case. Otherwise we have to 3211*00e3a3e9SAlexandre Chartre * wait for the reply message and process it. 3212*00e3a3e9SAlexandre Chartre */ 3213*00e3a3e9SAlexandre Chartre if (flags & VDC_OP_STATE_RUNNING) { 3214*00e3a3e9SAlexandre Chartre 3215*00e3a3e9SAlexandre Chartre if (ddi_in_panic()) { 3216*00e3a3e9SAlexandre Chartre rv = vdc_drain_response(vdc, bufp); 3217*00e3a3e9SAlexandre Chartre goto done; 3218*00e3a3e9SAlexandre Chartre } 3219*00e3a3e9SAlexandre Chartre 3220*00e3a3e9SAlexandre Chartre } else { 3221*00e3a3e9SAlexandre Chartre /* wait for the response message */ 3222*00e3a3e9SAlexandre Chartre rv = vdc_wait_for_response(vdc, &vio_msg); 3223*00e3a3e9SAlexandre Chartre if (rv) { 3224*00e3a3e9SAlexandre Chartre /* 3225*00e3a3e9SAlexandre Chartre * If this is a block read/write we update the I/O 3226*00e3a3e9SAlexandre Chartre * statistics kstat to take it off the run queue. 3227*00e3a3e9SAlexandre Chartre */ 3228*00e3a3e9SAlexandre Chartre mutex_enter(&vdc->lock); 3229*00e3a3e9SAlexandre Chartre if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 3230*00e3a3e9SAlexandre Chartre VD_UPDATE_ERR_STATS(vdc, vd_transerrs); 3231*00e3a3e9SAlexandre Chartre VD_KSTAT_RUNQ_EXIT(vdc); 3232*00e3a3e9SAlexandre Chartre DTRACE_IO1(done, buf_t *, bufp); 3233*00e3a3e9SAlexandre Chartre } 3234*00e3a3e9SAlexandre Chartre mutex_exit(&vdc->lock); 3235*00e3a3e9SAlexandre Chartre goto done; 3236*00e3a3e9SAlexandre Chartre } 3237*00e3a3e9SAlexandre Chartre 3238*00e3a3e9SAlexandre Chartre rv = vdc_process_data_msg(vdc, &vio_msg); 3239*00e3a3e9SAlexandre Chartre if (rv) 3240*00e3a3e9SAlexandre Chartre goto done; 3241*00e3a3e9SAlexandre Chartre } 3242*00e3a3e9SAlexandre Chartre 3243*00e3a3e9SAlexandre Chartre if (bufp == &buf) 3244*00e3a3e9SAlexandre Chartre rv = biowait(bufp); 3245*00e3a3e9SAlexandre Chartre 3246*00e3a3e9SAlexandre Chartre done: 3247*00e3a3e9SAlexandre Chartre if (bufp == &buf) { 3248*00e3a3e9SAlexandre Chartre biofini(bufp); 3249*00e3a3e9SAlexandre Chartre } else if (rv != 0) { 3250*00e3a3e9SAlexandre Chartre bioerror(bufp, EIO); 3251*00e3a3e9SAlexandre Chartre biodone(bufp); 3252*00e3a3e9SAlexandre Chartre } 3253*00e3a3e9SAlexandre Chartre 3254*00e3a3e9SAlexandre Chartre return (rv); 3255*00e3a3e9SAlexandre Chartre } 3256*00e3a3e9SAlexandre Chartre 3257*00e3a3e9SAlexandre Chartre /* 3258*00e3a3e9SAlexandre Chartre * Function: 32593af08d82Slm66018 * vdc_do_sync_op 32603af08d82Slm66018 * 32613af08d82Slm66018 * Description: 3262*00e3a3e9SAlexandre Chartre * Wrapper around vdc_do_op that serializes requests. 32633af08d82Slm66018 * 32643af08d82Slm66018 * Arguments: 32653af08d82Slm66018 * vdcp - the soft state pointer 32663af08d82Slm66018 * operation - operation we want vds to perform (VD_OP_XXX) 32673af08d82Slm66018 * addr - address of data buf to be read/written. 32683af08d82Slm66018 * nbytes - number of bytes to read/write 32693af08d82Slm66018 * slice - the disk slice this request is for 32703af08d82Slm66018 * offset - relative disk offset 32713af08d82Slm66018 * dir - direction of operation (READ/WRITE/BOTH) 32722f5224aeSachartre * rconflict - check for reservation conflict in case of failure 32732f5224aeSachartre * 32742f5224aeSachartre * rconflict should be set to B_TRUE by most callers. Callers invoking the 32752f5224aeSachartre * VD_OP_SCSICMD operation can set rconflict to B_FALSE if they check the 3276*00e3a3e9SAlexandre Chartre * result of a successful operation with vdc_scsi_status(). 32773af08d82Slm66018 * 32783af08d82Slm66018 * Return Codes: 32793af08d82Slm66018 * 0 32803af08d82Slm66018 * EAGAIN 32813af08d82Slm66018 * EFAULT 32823af08d82Slm66018 * ENXIO 32833af08d82Slm66018 * EIO 32840a55fbb7Slm66018 */ 32853af08d82Slm66018 static int 32863af08d82Slm66018 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 3287*00e3a3e9SAlexandre Chartre int slice, diskaddr_t offset, vio_desc_direction_t dir, boolean_t rconflict) 32883af08d82Slm66018 { 32893af08d82Slm66018 int status; 3290*00e3a3e9SAlexandre Chartre int flags = VDC_OP_NORMAL; 32911ae08745Sheppo 32921ae08745Sheppo /* 32933af08d82Slm66018 * Grab the lock, if blocked wait until the server 32943af08d82Slm66018 * response causes us to wake up again. 32953af08d82Slm66018 */ 32963af08d82Slm66018 mutex_enter(&vdcp->lock); 32973af08d82Slm66018 vdcp->sync_op_cnt++; 329811f54b6eSAlexandre Chartre while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) { 329911f54b6eSAlexandre Chartre if (ddi_in_panic()) { 330011f54b6eSAlexandre Chartre /* don't block if we are panicking */ 330111f54b6eSAlexandre Chartre vdcp->sync_op_cnt--; 330211f54b6eSAlexandre Chartre mutex_exit(&vdcp->lock); 330311f54b6eSAlexandre Chartre return (EIO); 330411f54b6eSAlexandre Chartre } else { 33053af08d82Slm66018 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 330611f54b6eSAlexandre Chartre } 330711f54b6eSAlexandre Chartre } 33083af08d82Slm66018 33093af08d82Slm66018 if (vdcp->state == VDC_STATE_DETACH) { 33103af08d82Slm66018 cv_broadcast(&vdcp->sync_blocked_cv); 33113af08d82Slm66018 vdcp->sync_op_cnt--; 33123af08d82Slm66018 mutex_exit(&vdcp->lock); 33133af08d82Slm66018 return (ENXIO); 33143af08d82Slm66018 } 33153af08d82Slm66018 33163af08d82Slm66018 /* now block anyone other thread entering after us */ 33173af08d82Slm66018 vdcp->sync_op_blocked = B_TRUE; 3318*00e3a3e9SAlexandre Chartre 33193af08d82Slm66018 mutex_exit(&vdcp->lock); 33203af08d82Slm66018 3321*00e3a3e9SAlexandre Chartre if (!rconflict) 3322*00e3a3e9SAlexandre Chartre flags &= ~VDC_OP_ERRCHK_CONFLICT; 3323*00e3a3e9SAlexandre Chartre 3324*00e3a3e9SAlexandre Chartre status = vdc_do_op(vdcp, operation, addr, nbytes, slice, offset, 3325*00e3a3e9SAlexandre Chartre NULL, dir, flags); 33263af08d82Slm66018 3327655fd6a9Sachartre mutex_enter(&vdcp->lock); 3328655fd6a9Sachartre 3329*00e3a3e9SAlexandre Chartre DMSG(vdcp, 2, ": operation returned %d\n", status); 33303af08d82Slm66018 33313c96341aSnarayan if (vdcp->state == VDC_STATE_DETACH) { 33323af08d82Slm66018 status = ENXIO; 33332f5224aeSachartre } 33342f5224aeSachartre 33353af08d82Slm66018 vdcp->sync_op_blocked = B_FALSE; 33363af08d82Slm66018 vdcp->sync_op_cnt--; 33373af08d82Slm66018 33383af08d82Slm66018 /* signal the next waiting thread */ 33393af08d82Slm66018 cv_signal(&vdcp->sync_blocked_cv); 33402f5224aeSachartre 33413af08d82Slm66018 mutex_exit(&vdcp->lock); 33423af08d82Slm66018 33433af08d82Slm66018 return (status); 33443af08d82Slm66018 } 33453af08d82Slm66018 33463af08d82Slm66018 33473af08d82Slm66018 /* 33483af08d82Slm66018 * Function: 33493af08d82Slm66018 * vdc_drain_response() 33503af08d82Slm66018 * 33513af08d82Slm66018 * Description: 33521ae08745Sheppo * When a guest is panicking, the completion of requests needs to be 33531ae08745Sheppo * handled differently because interrupts are disabled and vdc 33541ae08745Sheppo * will not get messages. We have to poll for the messages instead. 33553af08d82Slm66018 * 33563c2ebf09Sachartre * Note: since we are panicking we don't implement the io:::done 33573c2ebf09Sachartre * DTrace probe or update the I/O statistics kstats. 3358366a92acSlm66018 * 33593af08d82Slm66018 * Arguments: 33603af08d82Slm66018 * vdc - soft state pointer for this instance of the device driver. 3361*00e3a3e9SAlexandre Chartre * buf - if buf is NULL then we drain all responses, otherwise we 33623c2ebf09Sachartre * poll until we receive a ACK/NACK for the specific I/O 33633c2ebf09Sachartre * described by buf. 33643af08d82Slm66018 * 33653af08d82Slm66018 * Return Code: 336611f54b6eSAlexandre Chartre * 0 - Success. If we were expecting a response to a particular 3367*00e3a3e9SAlexandre Chartre * request then this means that a response has been received. 33681ae08745Sheppo */ 33693af08d82Slm66018 static int 3370*00e3a3e9SAlexandre Chartre vdc_drain_response(vdc_t *vdc, struct buf *buf) 33713af08d82Slm66018 { 33723af08d82Slm66018 int rv, idx, retries; 33733af08d82Slm66018 size_t msglen; 33743af08d82Slm66018 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 33753af08d82Slm66018 vio_dring_msg_t dmsg; 33763c2ebf09Sachartre struct buf *mbuf; 337711f54b6eSAlexandre Chartre boolean_t ack; 337811f54b6eSAlexandre Chartre 33793af08d82Slm66018 mutex_enter(&vdc->lock); 33803af08d82Slm66018 33811ae08745Sheppo retries = 0; 33821ae08745Sheppo for (;;) { 33831ae08745Sheppo msglen = sizeof (dmsg); 33848cd10891Snarayan rv = ldc_read(vdc->curr_server->ldc_handle, (caddr_t)&dmsg, 33858cd10891Snarayan &msglen); 33868e6a2a04Slm66018 if (rv) { 33878e6a2a04Slm66018 rv = EINVAL; 33881ae08745Sheppo break; 33891ae08745Sheppo } 33901ae08745Sheppo 33911ae08745Sheppo /* 33921ae08745Sheppo * if there are no packets wait and check again 33931ae08745Sheppo */ 33948e6a2a04Slm66018 if ((rv == 0) && (msglen == 0)) { 33951ae08745Sheppo if (retries++ > vdc_dump_retries) { 33968e6a2a04Slm66018 rv = EAGAIN; 33971ae08745Sheppo break; 33981ae08745Sheppo } 33991ae08745Sheppo 3400d10e4ef2Snarayan drv_usecwait(vdc_usec_timeout_dump); 34011ae08745Sheppo continue; 34021ae08745Sheppo } 34031ae08745Sheppo 34041ae08745Sheppo /* 34051ae08745Sheppo * Ignore all messages that are not ACKs/NACKs to 34061ae08745Sheppo * DRing requests. 34071ae08745Sheppo */ 34081ae08745Sheppo if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 34091ae08745Sheppo (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 34103af08d82Slm66018 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 34111ae08745Sheppo dmsg.tag.vio_msgtype, 34121ae08745Sheppo dmsg.tag.vio_subtype, 34131ae08745Sheppo dmsg.tag.vio_subtype_env); 34141ae08745Sheppo continue; 34151ae08745Sheppo } 34161ae08745Sheppo 34171ae08745Sheppo /* 341811f54b6eSAlexandre Chartre * Record if the packet was ACK'ed or not. If the packet was not 341911f54b6eSAlexandre Chartre * ACK'ed then we will just mark the request as failed; we don't 342011f54b6eSAlexandre Chartre * want to reset the connection at this point. 34211ae08745Sheppo */ 34221ae08745Sheppo switch (dmsg.tag.vio_subtype) { 34231ae08745Sheppo case VIO_SUBTYPE_ACK: 342411f54b6eSAlexandre Chartre ack = B_TRUE; 34251ae08745Sheppo break; 34261ae08745Sheppo case VIO_SUBTYPE_NACK: 342711f54b6eSAlexandre Chartre ack = B_FALSE; 34281ae08745Sheppo break; 34291ae08745Sheppo default: 34301ae08745Sheppo continue; 34311ae08745Sheppo } 34321ae08745Sheppo 34333af08d82Slm66018 idx = dmsg.start_idx; 34343af08d82Slm66018 if (idx >= vdc->dring_len) { 34353af08d82Slm66018 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 3436e1ebb9ecSlm66018 vdc->instance, idx); 34373af08d82Slm66018 continue; 34381ae08745Sheppo } 34393af08d82Slm66018 ldep = &vdc->local_dring[idx]; 34403af08d82Slm66018 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 34413af08d82Slm66018 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 34423af08d82Slm66018 vdc->instance, idx, ldep->dep->hdr.dstate); 34431ae08745Sheppo continue; 34441ae08745Sheppo } 34451ae08745Sheppo 3446*00e3a3e9SAlexandre Chartre mbuf = ldep->buf; 3447*00e3a3e9SAlexandre Chartre ASSERT(mbuf != NULL); 3448*00e3a3e9SAlexandre Chartre mbuf->b_resid = mbuf->b_bcount - ldep->dep->payload.nbytes; 3449*00e3a3e9SAlexandre Chartre bioerror(mbuf, ack ? ldep->dep->payload.status : EIO); 34503c2ebf09Sachartre biodone(mbuf); 3451*00e3a3e9SAlexandre Chartre 34523af08d82Slm66018 rv = vdc_depopulate_descriptor(vdc, idx); 34533c2ebf09Sachartre if (buf != NULL && buf == mbuf) { 34543c2ebf09Sachartre rv = 0; 345511f54b6eSAlexandre Chartre goto done; 345611f54b6eSAlexandre Chartre } 34573af08d82Slm66018 34583c2ebf09Sachartre /* if this is the last descriptor - break out of loop */ 34593c2ebf09Sachartre if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) { 34603c2ebf09Sachartre /* 346111f54b6eSAlexandre Chartre * If we were expecting a response for a particular 346211f54b6eSAlexandre Chartre * request then we return with an error otherwise we 346311f54b6eSAlexandre Chartre * have successfully completed the drain. 34643c2ebf09Sachartre */ 3465*00e3a3e9SAlexandre Chartre rv = (buf != NULL)? ESRCH: 0; 34663c2ebf09Sachartre break; 34673c2ebf09Sachartre } 34683c2ebf09Sachartre } 34693c2ebf09Sachartre 347011f54b6eSAlexandre Chartre done: 34713af08d82Slm66018 mutex_exit(&vdc->lock); 34723af08d82Slm66018 DMSG(vdc, 0, "End idx=%d\n", idx); 34733af08d82Slm66018 34743af08d82Slm66018 return (rv); 34751ae08745Sheppo } 34761ae08745Sheppo 34771ae08745Sheppo 34780a55fbb7Slm66018 /* 34790a55fbb7Slm66018 * Function: 34800a55fbb7Slm66018 * vdc_depopulate_descriptor() 34810a55fbb7Slm66018 * 34820a55fbb7Slm66018 * Description: 34830a55fbb7Slm66018 * 34840a55fbb7Slm66018 * Arguments: 34850a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 34860a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 34870a55fbb7Slm66018 * 34880a55fbb7Slm66018 * Return Code: 34890a55fbb7Slm66018 * 0 - Success 34900a55fbb7Slm66018 */ 34911ae08745Sheppo static int 34921ae08745Sheppo vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 34931ae08745Sheppo { 34941ae08745Sheppo vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 34951ae08745Sheppo vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 34961ae08745Sheppo int status = ENXIO; 34978e6a2a04Slm66018 int rv = 0; 34981ae08745Sheppo 34991ae08745Sheppo ASSERT(vdc != NULL); 3500e1ebb9ecSlm66018 ASSERT(idx < vdc->dring_len); 35011ae08745Sheppo ldep = &vdc->local_dring[idx]; 35021ae08745Sheppo ASSERT(ldep != NULL); 35033af08d82Slm66018 ASSERT(MUTEX_HELD(&vdc->lock)); 35043af08d82Slm66018 3505366a92acSlm66018 DTRACE_PROBE2(depopulate, int, vdc->instance, vdc_local_desc_t *, ldep); 35063af08d82Slm66018 DMSG(vdc, 2, ": idx = %d\n", idx); 3507366a92acSlm66018 35081ae08745Sheppo dep = ldep->dep; 35091ae08745Sheppo ASSERT(dep != NULL); 3510e1ebb9ecSlm66018 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3511e1ebb9ecSlm66018 (dep->payload.status == ECANCELED)); 35121ae08745Sheppo 3513e1ebb9ecSlm66018 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 35143af08d82Slm66018 35153af08d82Slm66018 ldep->is_free = B_TRUE; 35161ae08745Sheppo status = dep->payload.status; 3517205eeb1aSlm66018 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 35181ae08745Sheppo 3519eff7243fSlm66018 /* 3520eff7243fSlm66018 * If no buffers were used to transfer information to the server when 3521eff7243fSlm66018 * populating the descriptor then no memory handles need to be unbound 3522eff7243fSlm66018 * and we can return now. 3523eff7243fSlm66018 */ 3524eff7243fSlm66018 if (ldep->nbytes == 0) { 3525eff7243fSlm66018 cv_signal(&vdc->dring_free_cv); 35268e6a2a04Slm66018 return (status); 3527eff7243fSlm66018 } 35288e6a2a04Slm66018 35291ae08745Sheppo /* 35301ae08745Sheppo * If the upper layer passed in a misaligned address we copied the 35311ae08745Sheppo * data into an aligned buffer before sending it to LDC - we now 35321ae08745Sheppo * copy it back to the original buffer. 35331ae08745Sheppo */ 35341ae08745Sheppo if (ldep->align_addr) { 35351ae08745Sheppo ASSERT(ldep->addr != NULL); 35361ae08745Sheppo 35373c96341aSnarayan if (dep->payload.nbytes > 0) 35383c96341aSnarayan bcopy(ldep->align_addr, ldep->addr, 35393c96341aSnarayan dep->payload.nbytes); 35401ae08745Sheppo kmem_free(ldep->align_addr, 35413c96341aSnarayan sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 35421ae08745Sheppo ldep->align_addr = NULL; 35431ae08745Sheppo } 35441ae08745Sheppo 35458e6a2a04Slm66018 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 35468e6a2a04Slm66018 if (rv != 0) { 35473af08d82Slm66018 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 35488e6a2a04Slm66018 vdc->instance, ldep->desc_mhdl, idx, rv); 35498e6a2a04Slm66018 /* 35508e6a2a04Slm66018 * The error returned by the vDisk server is more informative 35518e6a2a04Slm66018 * and thus has a higher priority but if it isn't set we ensure 35528e6a2a04Slm66018 * that this function returns an error. 35538e6a2a04Slm66018 */ 35548e6a2a04Slm66018 if (status == 0) 35558e6a2a04Slm66018 status = EINVAL; 35561ae08745Sheppo } 35571ae08745Sheppo 35583af08d82Slm66018 cv_signal(&vdc->membind_cv); 35593af08d82Slm66018 cv_signal(&vdc->dring_free_cv); 35603af08d82Slm66018 35611ae08745Sheppo return (status); 35621ae08745Sheppo } 35631ae08745Sheppo 35640a55fbb7Slm66018 /* 35650a55fbb7Slm66018 * Function: 35660a55fbb7Slm66018 * vdc_populate_mem_hdl() 35670a55fbb7Slm66018 * 35680a55fbb7Slm66018 * Description: 35690a55fbb7Slm66018 * 35700a55fbb7Slm66018 * Arguments: 35710a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 35720a55fbb7Slm66018 * idx - Index of the Descriptor Ring entry being modified 35730a55fbb7Slm66018 * addr - virtual address being mapped in 35740a55fbb7Slm66018 * nybtes - number of bytes in 'addr' 35750a55fbb7Slm66018 * operation - the vDisk operation being performed (VD_OP_xxx) 35760a55fbb7Slm66018 * 35770a55fbb7Slm66018 * Return Code: 35780a55fbb7Slm66018 * 0 - Success 35790a55fbb7Slm66018 */ 35801ae08745Sheppo static int 35813af08d82Slm66018 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 35821ae08745Sheppo { 35831ae08745Sheppo vd_dring_entry_t *dep = NULL; 35841ae08745Sheppo ldc_mem_handle_t mhdl; 35851ae08745Sheppo caddr_t vaddr; 35863af08d82Slm66018 size_t nbytes; 35874bac2208Snarayan uint8_t perm = LDC_MEM_RW; 35884bac2208Snarayan uint8_t maptype; 35891ae08745Sheppo int rv = 0; 35901ae08745Sheppo int i; 35911ae08745Sheppo 35923af08d82Slm66018 ASSERT(vdcp != NULL); 35931ae08745Sheppo 35943af08d82Slm66018 dep = ldep->dep; 35951ae08745Sheppo mhdl = ldep->desc_mhdl; 35961ae08745Sheppo 35973af08d82Slm66018 switch (ldep->dir) { 35983af08d82Slm66018 case VIO_read_dir: 35991ae08745Sheppo perm = LDC_MEM_W; 36001ae08745Sheppo break; 36011ae08745Sheppo 36023af08d82Slm66018 case VIO_write_dir: 36031ae08745Sheppo perm = LDC_MEM_R; 36041ae08745Sheppo break; 36051ae08745Sheppo 36063af08d82Slm66018 case VIO_both_dir: 36071ae08745Sheppo perm = LDC_MEM_RW; 36081ae08745Sheppo break; 36091ae08745Sheppo 36101ae08745Sheppo default: 36111ae08745Sheppo ASSERT(0); /* catch bad programming in vdc */ 36121ae08745Sheppo } 36131ae08745Sheppo 36141ae08745Sheppo /* 36151ae08745Sheppo * LDC expects any addresses passed in to be 8-byte aligned. We need 36161ae08745Sheppo * to copy the contents of any misaligned buffers to a newly allocated 36171ae08745Sheppo * buffer and bind it instead (and copy the the contents back to the 36181ae08745Sheppo * original buffer passed in when depopulating the descriptor) 36191ae08745Sheppo */ 36203af08d82Slm66018 vaddr = ldep->addr; 36213af08d82Slm66018 nbytes = ldep->nbytes; 36223af08d82Slm66018 if (((uint64_t)vaddr & 0x7) != 0) { 3623d10e4ef2Snarayan ASSERT(ldep->align_addr == NULL); 36241ae08745Sheppo ldep->align_addr = 36253af08d82Slm66018 kmem_alloc(sizeof (caddr_t) * 36263af08d82Slm66018 P2ROUNDUP(nbytes, 8), KM_SLEEP); 36273af08d82Slm66018 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 36283af08d82Slm66018 "(buf=%p nb=%ld op=%d)\n", 36293af08d82Slm66018 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 36303af08d82Slm66018 nbytes, ldep->operation); 36313af08d82Slm66018 if (perm != LDC_MEM_W) 36323af08d82Slm66018 bcopy(vaddr, ldep->align_addr, nbytes); 36331ae08745Sheppo vaddr = ldep->align_addr; 36341ae08745Sheppo } 36351ae08745Sheppo 36364bac2208Snarayan maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 36371ae08745Sheppo rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 363887a7269eSachartre maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 36393af08d82Slm66018 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 36403af08d82Slm66018 vdcp->instance, dep->payload.ncookies); 36411ae08745Sheppo if (rv != 0) { 36423af08d82Slm66018 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 36433af08d82Slm66018 "(mhdl=%p, buf=%p, err=%d)\n", 36443af08d82Slm66018 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 36451ae08745Sheppo if (ldep->align_addr) { 36461ae08745Sheppo kmem_free(ldep->align_addr, 3647d10e4ef2Snarayan sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 36481ae08745Sheppo ldep->align_addr = NULL; 36491ae08745Sheppo } 36501ae08745Sheppo return (EAGAIN); 36511ae08745Sheppo } 36521ae08745Sheppo 36531ae08745Sheppo /* 36541ae08745Sheppo * Get the other cookies (if any). 36551ae08745Sheppo */ 36561ae08745Sheppo for (i = 1; i < dep->payload.ncookies; i++) { 36571ae08745Sheppo rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 36581ae08745Sheppo if (rv != 0) { 36591ae08745Sheppo (void) ldc_mem_unbind_handle(mhdl); 36603af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3661e1ebb9ecSlm66018 "(mhdl=%lx cnum=%d), err=%d", 36623af08d82Slm66018 vdcp->instance, mhdl, i, rv); 36631ae08745Sheppo if (ldep->align_addr) { 36641ae08745Sheppo kmem_free(ldep->align_addr, 36653c96341aSnarayan sizeof (caddr_t) * ldep->nbytes); 36661ae08745Sheppo ldep->align_addr = NULL; 36671ae08745Sheppo } 36681ae08745Sheppo return (EAGAIN); 36691ae08745Sheppo } 36701ae08745Sheppo } 36711ae08745Sheppo 36721ae08745Sheppo return (rv); 36731ae08745Sheppo } 36741ae08745Sheppo 36751ae08745Sheppo /* 36761ae08745Sheppo * Interrupt handlers for messages from LDC 36771ae08745Sheppo */ 36781ae08745Sheppo 36790a55fbb7Slm66018 /* 36800a55fbb7Slm66018 * Function: 36810a55fbb7Slm66018 * vdc_handle_cb() 36820a55fbb7Slm66018 * 36830a55fbb7Slm66018 * Description: 36840a55fbb7Slm66018 * 36850a55fbb7Slm66018 * Arguments: 36860a55fbb7Slm66018 * event - Type of event (LDC_EVT_xxx) that triggered the callback 36870a55fbb7Slm66018 * arg - soft state pointer for this instance of the device driver. 36880a55fbb7Slm66018 * 36890a55fbb7Slm66018 * Return Code: 36900a55fbb7Slm66018 * 0 - Success 36910a55fbb7Slm66018 */ 36921ae08745Sheppo static uint_t 36931ae08745Sheppo vdc_handle_cb(uint64_t event, caddr_t arg) 36941ae08745Sheppo { 36951ae08745Sheppo ldc_status_t ldc_state; 36961ae08745Sheppo int rv = 0; 36978cd10891Snarayan vdc_server_t *srvr = (vdc_server_t *)(void *)arg; 36988cd10891Snarayan vdc_t *vdc = srvr->vdcp; 36991ae08745Sheppo 37001ae08745Sheppo ASSERT(vdc != NULL); 37011ae08745Sheppo 37023af08d82Slm66018 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 37031ae08745Sheppo 37048cd10891Snarayan /* If callback is not for the current server, ignore it */ 37058cd10891Snarayan mutex_enter(&vdc->lock); 37068cd10891Snarayan 37078cd10891Snarayan if (vdc->curr_server != srvr) { 37088cd10891Snarayan DMSG(vdc, 0, "[%d] Ignoring event 0x%lx for port@%ld\n", 37098cd10891Snarayan vdc->instance, event, srvr->id); 37108cd10891Snarayan mutex_exit(&vdc->lock); 37118cd10891Snarayan return (LDC_SUCCESS); 37128cd10891Snarayan } 37138cd10891Snarayan 37141ae08745Sheppo /* 37151ae08745Sheppo * Depending on the type of event that triggered this callback, 37163af08d82Slm66018 * we modify the handshake state or read the data. 37171ae08745Sheppo * 37181ae08745Sheppo * NOTE: not done as a switch() as event could be triggered by 37191ae08745Sheppo * a state change and a read request. Also the ordering of the 37201ae08745Sheppo * check for the event types is deliberate. 37211ae08745Sheppo */ 37221ae08745Sheppo if (event & LDC_EVT_UP) { 37233af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 37243af08d82Slm66018 37251ae08745Sheppo /* get LDC state */ 37268cd10891Snarayan rv = ldc_status(srvr->ldc_handle, &ldc_state); 37271ae08745Sheppo if (rv != 0) { 37283af08d82Slm66018 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 37291ae08745Sheppo vdc->instance, rv); 37308cd10891Snarayan mutex_exit(&vdc->lock); 37311ae08745Sheppo return (LDC_SUCCESS); 37321ae08745Sheppo } 37338cd10891Snarayan if (srvr->ldc_state != LDC_UP && 37348cd10891Snarayan ldc_state == LDC_UP) { 37351ae08745Sheppo /* 37363af08d82Slm66018 * Reset the transaction sequence numbers when 37373af08d82Slm66018 * LDC comes up. We then kick off the handshake 37383af08d82Slm66018 * negotiation with the vDisk server. 37391ae08745Sheppo */ 37400a55fbb7Slm66018 vdc->seq_num = 1; 37411ae08745Sheppo vdc->seq_num_reply = 0; 3742*00e3a3e9SAlexandre Chartre vdc->io_pending = B_TRUE; 37438cd10891Snarayan srvr->ldc_state = ldc_state; 37443af08d82Slm66018 cv_signal(&vdc->initwait_cv); 3745*00e3a3e9SAlexandre Chartre cv_signal(&vdc->io_pending_cv); 37463af08d82Slm66018 } 37471ae08745Sheppo } 37481ae08745Sheppo 37491ae08745Sheppo if (event & LDC_EVT_READ) { 375017cadca8Slm66018 DMSG(vdc, 1, "[%d] Received LDC_EVT_READ\n", vdc->instance); 37513af08d82Slm66018 mutex_enter(&vdc->read_lock); 37523af08d82Slm66018 cv_signal(&vdc->read_cv); 37533af08d82Slm66018 vdc->read_state = VDC_READ_PENDING; 37543af08d82Slm66018 mutex_exit(&vdc->read_lock); 37558cd10891Snarayan mutex_exit(&vdc->lock); 37561ae08745Sheppo 37571ae08745Sheppo /* that's all we have to do - no need to handle DOWN/RESET */ 37581ae08745Sheppo return (LDC_SUCCESS); 37591ae08745Sheppo } 37601ae08745Sheppo 37613af08d82Slm66018 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 37620a55fbb7Slm66018 37633af08d82Slm66018 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 37643af08d82Slm66018 37653af08d82Slm66018 /* 37663af08d82Slm66018 * Need to wake up any readers so they will 37673af08d82Slm66018 * detect that a reset has occurred. 37683af08d82Slm66018 */ 37693af08d82Slm66018 mutex_enter(&vdc->read_lock); 37703af08d82Slm66018 if ((vdc->read_state == VDC_READ_WAITING) || 37713af08d82Slm66018 (vdc->read_state == VDC_READ_RESET)) 37723af08d82Slm66018 cv_signal(&vdc->read_cv); 37733af08d82Slm66018 vdc->read_state = VDC_READ_RESET; 37743af08d82Slm66018 mutex_exit(&vdc->read_lock); 37750a55fbb7Slm66018 37763af08d82Slm66018 /* wake up any threads waiting for connection to come up */ 37773af08d82Slm66018 if (vdc->state == VDC_STATE_INIT_WAITING) { 37783af08d82Slm66018 vdc->state = VDC_STATE_RESETTING; 37793af08d82Slm66018 cv_signal(&vdc->initwait_cv); 3780*00e3a3e9SAlexandre Chartre } else if (vdc->state == VDC_STATE_FAILED) { 3781*00e3a3e9SAlexandre Chartre vdc->io_pending = B_TRUE; 3782*00e3a3e9SAlexandre Chartre cv_signal(&vdc->io_pending_cv); 37831ae08745Sheppo } 37841ae08745Sheppo 37851ae08745Sheppo } 37861ae08745Sheppo 37878cd10891Snarayan mutex_exit(&vdc->lock); 37888cd10891Snarayan 37891ae08745Sheppo if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 37903af08d82Slm66018 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 37911ae08745Sheppo vdc->instance, event); 37921ae08745Sheppo 37931ae08745Sheppo return (LDC_SUCCESS); 37941ae08745Sheppo } 37951ae08745Sheppo 37963af08d82Slm66018 /* 37973af08d82Slm66018 * Function: 37983af08d82Slm66018 * vdc_wait_for_response() 37993af08d82Slm66018 * 38003af08d82Slm66018 * Description: 38013af08d82Slm66018 * Block waiting for a response from the server. If there is 38023af08d82Slm66018 * no data the thread block on the read_cv that is signalled 38033af08d82Slm66018 * by the callback when an EVT_READ occurs. 38043af08d82Slm66018 * 38053af08d82Slm66018 * Arguments: 38063af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 38073af08d82Slm66018 * 38083af08d82Slm66018 * Return Code: 38093af08d82Slm66018 * 0 - Success 38103af08d82Slm66018 */ 38113af08d82Slm66018 static int 38123af08d82Slm66018 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 38133af08d82Slm66018 { 38143af08d82Slm66018 size_t nbytes = sizeof (*msgp); 38153af08d82Slm66018 int status; 38163af08d82Slm66018 38173af08d82Slm66018 ASSERT(vdcp != NULL); 38183af08d82Slm66018 38193af08d82Slm66018 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 38203af08d82Slm66018 38213af08d82Slm66018 status = vdc_recv(vdcp, msgp, &nbytes); 38223af08d82Slm66018 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 38233af08d82Slm66018 status, (int)nbytes); 38243af08d82Slm66018 if (status) { 38253af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 38263af08d82Slm66018 vdcp->instance, status); 38273af08d82Slm66018 return (status); 38283af08d82Slm66018 } 38293af08d82Slm66018 38303af08d82Slm66018 if (nbytes < sizeof (vio_msg_tag_t)) { 38313af08d82Slm66018 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 38323af08d82Slm66018 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 38333af08d82Slm66018 return (ENOMSG); 38343af08d82Slm66018 } 38353af08d82Slm66018 38363af08d82Slm66018 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 38373af08d82Slm66018 msgp->tag.vio_msgtype, 38383af08d82Slm66018 msgp->tag.vio_subtype, 38393af08d82Slm66018 msgp->tag.vio_subtype_env); 38403af08d82Slm66018 38413af08d82Slm66018 /* 38423af08d82Slm66018 * Verify the Session ID of the message 38433af08d82Slm66018 * 38443af08d82Slm66018 * Every message after the Version has been negotiated should 38453af08d82Slm66018 * have the correct session ID set. 38463af08d82Slm66018 */ 38473af08d82Slm66018 if ((msgp->tag.vio_sid != vdcp->session_id) && 38483af08d82Slm66018 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 38493af08d82Slm66018 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 38503af08d82Slm66018 "expected 0x%lx [seq num %lx @ %d]", 38513af08d82Slm66018 vdcp->instance, msgp->tag.vio_sid, 38523af08d82Slm66018 vdcp->session_id, 38533af08d82Slm66018 ((vio_dring_msg_t *)msgp)->seq_num, 38543af08d82Slm66018 ((vio_dring_msg_t *)msgp)->start_idx); 38553af08d82Slm66018 return (ENOMSG); 38563af08d82Slm66018 } 38573af08d82Slm66018 return (0); 38583af08d82Slm66018 } 38593af08d82Slm66018 38603af08d82Slm66018 38613af08d82Slm66018 /* 38623af08d82Slm66018 * Function: 38633af08d82Slm66018 * vdc_resubmit_backup_dring() 38643af08d82Slm66018 * 38653af08d82Slm66018 * Description: 38663af08d82Slm66018 * Resubmit each descriptor in the backed up dring to 38673af08d82Slm66018 * vDisk server. The Dring was backed up during connection 38683af08d82Slm66018 * reset. 38693af08d82Slm66018 * 38703af08d82Slm66018 * Arguments: 38713af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 38723af08d82Slm66018 * 38733af08d82Slm66018 * Return Code: 38743af08d82Slm66018 * 0 - Success 38753af08d82Slm66018 */ 38763af08d82Slm66018 static int 38773af08d82Slm66018 vdc_resubmit_backup_dring(vdc_t *vdcp) 38783af08d82Slm66018 { 387990e2f9dcSlm66018 int processed = 0; 38803af08d82Slm66018 int count; 38813af08d82Slm66018 int b_idx; 388290e2f9dcSlm66018 int rv = 0; 38833af08d82Slm66018 int dring_size; 38843af08d82Slm66018 vdc_local_desc_t *curr_ldep; 38853af08d82Slm66018 38863af08d82Slm66018 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 38873af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 38883af08d82Slm66018 3889655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3890655fd6a9Sachartre /* the pending requests have already been processed */ 3891655fd6a9Sachartre return (0); 3892655fd6a9Sachartre } 3893655fd6a9Sachartre 38943af08d82Slm66018 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 38953af08d82Slm66018 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 38963af08d82Slm66018 38973af08d82Slm66018 /* 38983af08d82Slm66018 * Walk the backup copy of the local descriptor ring and 38993af08d82Slm66018 * resubmit all the outstanding transactions. 39003af08d82Slm66018 */ 39013af08d82Slm66018 b_idx = vdcp->local_dring_backup_tail; 39023af08d82Slm66018 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 39033af08d82Slm66018 39043af08d82Slm66018 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 39053af08d82Slm66018 3906eff7243fSlm66018 /* only resubmit outstanding transactions */ 39073af08d82Slm66018 if (!curr_ldep->is_free) { 39083af08d82Slm66018 39093af08d82Slm66018 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3910*00e3a3e9SAlexandre Chartre 3911*00e3a3e9SAlexandre Chartre rv = vdc_do_op(vdcp, curr_ldep->operation, 39123af08d82Slm66018 curr_ldep->addr, curr_ldep->nbytes, 39133af08d82Slm66018 curr_ldep->slice, curr_ldep->offset, 3914*00e3a3e9SAlexandre Chartre curr_ldep->buf, curr_ldep->dir, 3915*00e3a3e9SAlexandre Chartre curr_ldep->flags & ~VDC_OP_STATE_RUNNING); 391690e2f9dcSlm66018 39173af08d82Slm66018 if (rv) { 3918*00e3a3e9SAlexandre Chartre DMSG(vdcp, 1, "[%d] resubmit entry %d failed\n", 39193af08d82Slm66018 vdcp->instance, b_idx); 392090e2f9dcSlm66018 goto done; 39213af08d82Slm66018 } 39223af08d82Slm66018 392390e2f9dcSlm66018 /* 3924630f014dSrameshc * Mark this entry as free so that we will not resubmit 3925630f014dSrameshc * this "done" request again, if we were to use the same 3926630f014dSrameshc * backup_dring again in future. This could happen when 3927630f014dSrameshc * a reset happens while processing the backup_dring. 3928630f014dSrameshc */ 3929630f014dSrameshc curr_ldep->is_free = B_TRUE; 393090e2f9dcSlm66018 processed++; 39313af08d82Slm66018 } 39323af08d82Slm66018 39333af08d82Slm66018 /* get the next element to submit */ 39343af08d82Slm66018 if (++b_idx >= vdcp->local_dring_backup_len) 39353af08d82Slm66018 b_idx = 0; 39363af08d82Slm66018 } 39373af08d82Slm66018 39383af08d82Slm66018 /* all done - now clear up pending dring copy */ 39393af08d82Slm66018 dring_size = vdcp->local_dring_backup_len * 39403af08d82Slm66018 sizeof (vdcp->local_dring_backup[0]); 39413af08d82Slm66018 39423af08d82Slm66018 (void) kmem_free(vdcp->local_dring_backup, dring_size); 39433af08d82Slm66018 39443af08d82Slm66018 vdcp->local_dring_backup = NULL; 39453af08d82Slm66018 394690e2f9dcSlm66018 done: 394790e2f9dcSlm66018 DTRACE_PROBE2(processed, int, processed, vdc_t *, vdcp); 394890e2f9dcSlm66018 394990e2f9dcSlm66018 return (rv); 39503af08d82Slm66018 } 39513af08d82Slm66018 39523af08d82Slm66018 /* 39533af08d82Slm66018 * Function: 3954655fd6a9Sachartre * vdc_cancel_backup_dring 3955655fd6a9Sachartre * 3956655fd6a9Sachartre * Description: 3957655fd6a9Sachartre * Cancel each descriptor in the backed up dring to vDisk server. 3958655fd6a9Sachartre * The Dring was backed up during connection reset. 3959655fd6a9Sachartre * 3960655fd6a9Sachartre * Arguments: 3961655fd6a9Sachartre * vdcp - soft state pointer for this instance of the device driver. 3962655fd6a9Sachartre * 3963655fd6a9Sachartre * Return Code: 3964655fd6a9Sachartre * None 3965655fd6a9Sachartre */ 3966655fd6a9Sachartre void 396790e2f9dcSlm66018 vdc_cancel_backup_dring(vdc_t *vdcp) 3968655fd6a9Sachartre { 3969655fd6a9Sachartre vdc_local_desc_t *ldep; 3970655fd6a9Sachartre struct buf *bufp; 3971655fd6a9Sachartre int count; 3972655fd6a9Sachartre int b_idx; 3973655fd6a9Sachartre int dring_size; 397490e2f9dcSlm66018 int cancelled = 0; 3975655fd6a9Sachartre 3976655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 3977*00e3a3e9SAlexandre Chartre ASSERT(vdcp->state == VDC_STATE_FAILED); 3978655fd6a9Sachartre 3979655fd6a9Sachartre if (vdcp->local_dring_backup == NULL) { 3980655fd6a9Sachartre /* the pending requests have already been processed */ 3981655fd6a9Sachartre return; 3982655fd6a9Sachartre } 3983655fd6a9Sachartre 3984655fd6a9Sachartre DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3985655fd6a9Sachartre vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3986655fd6a9Sachartre 3987655fd6a9Sachartre /* 3988655fd6a9Sachartre * Walk the backup copy of the local descriptor ring and 3989655fd6a9Sachartre * cancel all the outstanding transactions. 3990655fd6a9Sachartre */ 3991655fd6a9Sachartre b_idx = vdcp->local_dring_backup_tail; 3992655fd6a9Sachartre for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3993655fd6a9Sachartre 3994655fd6a9Sachartre ldep = &(vdcp->local_dring_backup[b_idx]); 3995655fd6a9Sachartre 3996655fd6a9Sachartre /* only cancel outstanding transactions */ 3997655fd6a9Sachartre if (!ldep->is_free) { 3998655fd6a9Sachartre 3999655fd6a9Sachartre DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 400090e2f9dcSlm66018 cancelled++; 4001655fd6a9Sachartre 4002655fd6a9Sachartre /* 4003655fd6a9Sachartre * All requests have already been cleared from the 4004655fd6a9Sachartre * local descriptor ring and the LDC channel has been 4005655fd6a9Sachartre * reset so we will never get any reply for these 4006655fd6a9Sachartre * requests. Now we just have to notify threads waiting 4007655fd6a9Sachartre * for replies that the request has failed. 4008655fd6a9Sachartre */ 4009*00e3a3e9SAlexandre Chartre bufp = ldep->buf; 4010655fd6a9Sachartre ASSERT(bufp != NULL); 4011655fd6a9Sachartre bufp->b_resid = bufp->b_bcount; 4012*00e3a3e9SAlexandre Chartre if (ldep->operation == VD_OP_BREAD || 4013*00e3a3e9SAlexandre Chartre ldep->operation == VD_OP_BWRITE) { 4014366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 401590e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 4016366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 4017*00e3a3e9SAlexandre Chartre } 4018655fd6a9Sachartre bioerror(bufp, EIO); 4019655fd6a9Sachartre biodone(bufp); 4020655fd6a9Sachartre } 4021655fd6a9Sachartre 4022655fd6a9Sachartre /* get the next element to cancel */ 4023655fd6a9Sachartre if (++b_idx >= vdcp->local_dring_backup_len) 4024655fd6a9Sachartre b_idx = 0; 4025655fd6a9Sachartre } 4026655fd6a9Sachartre 4027655fd6a9Sachartre /* all done - now clear up pending dring copy */ 4028655fd6a9Sachartre dring_size = vdcp->local_dring_backup_len * 4029655fd6a9Sachartre sizeof (vdcp->local_dring_backup[0]); 4030655fd6a9Sachartre 4031655fd6a9Sachartre (void) kmem_free(vdcp->local_dring_backup, dring_size); 4032655fd6a9Sachartre 4033655fd6a9Sachartre vdcp->local_dring_backup = NULL; 4034655fd6a9Sachartre 403590e2f9dcSlm66018 DTRACE_PROBE2(cancelled, int, cancelled, vdc_t *, vdcp); 4036655fd6a9Sachartre } 4037655fd6a9Sachartre 4038655fd6a9Sachartre /* 4039655fd6a9Sachartre * Function: 4040655fd6a9Sachartre * vdc_connection_timeout 4041655fd6a9Sachartre * 4042655fd6a9Sachartre * Description: 4043655fd6a9Sachartre * This function is invoked if the timeout set to establish the connection 4044655fd6a9Sachartre * with vds expires. This will happen if we spend too much time in the 4045*00e3a3e9SAlexandre Chartre * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. 4046655fd6a9Sachartre * 4047655fd6a9Sachartre * If the timeout does not expire, it will be cancelled when we reach the 4048*00e3a3e9SAlexandre Chartre * VDC_STATE_HANDLE_PENDING, VDC_STATE_FAILED or VDC_STATE_DETACH state. 4049*00e3a3e9SAlexandre Chartre * This function can also be invoked while we are in those states, in 4050*00e3a3e9SAlexandre Chartre * which case we do nothing because the timeout is being cancelled. 4051655fd6a9Sachartre * 4052655fd6a9Sachartre * Arguments: 4053655fd6a9Sachartre * arg - argument of the timeout function actually a soft state 4054655fd6a9Sachartre * pointer for the instance of the device driver. 4055655fd6a9Sachartre * 4056655fd6a9Sachartre * Return Code: 4057655fd6a9Sachartre * None 4058655fd6a9Sachartre */ 4059655fd6a9Sachartre void 4060655fd6a9Sachartre vdc_connection_timeout(void *arg) 4061655fd6a9Sachartre { 4062655fd6a9Sachartre vdc_t *vdcp = (vdc_t *)arg; 4063655fd6a9Sachartre 4064655fd6a9Sachartre mutex_enter(&vdcp->lock); 4065655fd6a9Sachartre 4066655fd6a9Sachartre if (vdcp->state == VDC_STATE_HANDLE_PENDING || 4067*00e3a3e9SAlexandre Chartre vdcp->state == VDC_STATE_DETACH || 4068*00e3a3e9SAlexandre Chartre vdcp->state == VDC_STATE_FAILED) { 4069655fd6a9Sachartre /* 4070*00e3a3e9SAlexandre Chartre * The connection has just been re-established, has failed or 4071655fd6a9Sachartre * we are detaching. 4072655fd6a9Sachartre */ 4073655fd6a9Sachartre vdcp->ctimeout_reached = B_FALSE; 4074*00e3a3e9SAlexandre Chartre } else { 4075*00e3a3e9SAlexandre Chartre vdcp->ctimeout_reached = B_TRUE; 4076655fd6a9Sachartre } 4077655fd6a9Sachartre 4078655fd6a9Sachartre mutex_exit(&vdcp->lock); 4079655fd6a9Sachartre } 4080655fd6a9Sachartre 4081655fd6a9Sachartre /* 4082655fd6a9Sachartre * Function: 40833af08d82Slm66018 * vdc_backup_local_dring() 40843af08d82Slm66018 * 40853af08d82Slm66018 * Description: 40863af08d82Slm66018 * Backup the current dring in the event of a reset. The Dring 40873af08d82Slm66018 * transactions will be resubmitted to the server when the 40883af08d82Slm66018 * connection is restored. 40893af08d82Slm66018 * 40903af08d82Slm66018 * Arguments: 40913af08d82Slm66018 * vdcp - soft state pointer for this instance of the device driver. 40923af08d82Slm66018 * 40933af08d82Slm66018 * Return Code: 40943af08d82Slm66018 * NONE 40953af08d82Slm66018 */ 40963af08d82Slm66018 static void 40973af08d82Slm66018 vdc_backup_local_dring(vdc_t *vdcp) 40983af08d82Slm66018 { 40993af08d82Slm66018 int dring_size; 41003af08d82Slm66018 4101655fd6a9Sachartre ASSERT(MUTEX_HELD(&vdcp->lock)); 41023af08d82Slm66018 ASSERT(vdcp->state == VDC_STATE_RESETTING); 41033af08d82Slm66018 41043af08d82Slm66018 /* 41053af08d82Slm66018 * If the backup dring is stil around, it means 41063af08d82Slm66018 * that the last restore did not complete. However, 41073af08d82Slm66018 * since we never got back into the running state, 41083af08d82Slm66018 * the backup copy we have is still valid. 41093af08d82Slm66018 */ 41103af08d82Slm66018 if (vdcp->local_dring_backup != NULL) { 41113af08d82Slm66018 DMSG(vdcp, 1, "reusing local descriptor ring backup " 41123af08d82Slm66018 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 41133af08d82Slm66018 vdcp->local_dring_backup_tail); 41143af08d82Slm66018 return; 41153af08d82Slm66018 } 41163af08d82Slm66018 4117655fd6a9Sachartre /* 4118655fd6a9Sachartre * The backup dring can be NULL and the local dring may not be 4119655fd6a9Sachartre * initialized. This can happen if we had a reset while establishing 4120655fd6a9Sachartre * a new connection but after the connection has timed out. In that 4121655fd6a9Sachartre * case the backup dring is NULL because the requests have been 4122655fd6a9Sachartre * cancelled and the request occured before the local dring is 4123655fd6a9Sachartre * initialized. 4124655fd6a9Sachartre */ 4125655fd6a9Sachartre if (!(vdcp->initialized & VDC_DRING_LOCAL)) 4126655fd6a9Sachartre return; 4127655fd6a9Sachartre 41283af08d82Slm66018 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 41293af08d82Slm66018 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 41303af08d82Slm66018 41313af08d82Slm66018 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 41323af08d82Slm66018 41333af08d82Slm66018 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 41343af08d82Slm66018 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 41353af08d82Slm66018 41363af08d82Slm66018 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 41373af08d82Slm66018 vdcp->local_dring_backup_len = vdcp->dring_len; 41383af08d82Slm66018 } 41393af08d82Slm66018 41408cd10891Snarayan static void 41418cd10891Snarayan vdc_switch_server(vdc_t *vdcp) 41428cd10891Snarayan { 41438cd10891Snarayan int rv; 41448cd10891Snarayan vdc_server_t *curr_server, *new_server; 41458cd10891Snarayan 41468cd10891Snarayan ASSERT(MUTEX_HELD(&vdcp->lock)); 41478cd10891Snarayan 41488cd10891Snarayan /* if there is only one server return back */ 41498cd10891Snarayan if (vdcp->num_servers == 1) { 41508cd10891Snarayan return; 41518cd10891Snarayan } 41528cd10891Snarayan 41538cd10891Snarayan /* Get current and next server */ 41548cd10891Snarayan curr_server = vdcp->curr_server; 41558cd10891Snarayan new_server = 41568cd10891Snarayan (curr_server->next) ? curr_server->next : vdcp->server_list; 41578cd10891Snarayan ASSERT(curr_server != new_server); 41588cd10891Snarayan 41598cd10891Snarayan /* bring current server's channel down */ 41608cd10891Snarayan rv = ldc_down(curr_server->ldc_handle); 41618cd10891Snarayan if (rv) { 41628cd10891Snarayan DMSG(vdcp, 0, "[%d] Cannot bring channel down, port %ld\n", 41638cd10891Snarayan vdcp->instance, curr_server->id); 41648cd10891Snarayan return; 41658cd10891Snarayan } 41668cd10891Snarayan 41678cd10891Snarayan /* switch the server */ 41688cd10891Snarayan vdcp->curr_server = new_server; 41698cd10891Snarayan 41708cd10891Snarayan DMSG(vdcp, 0, "[%d] Switched to next vdisk server, port@%ld, ldc@%ld\n", 41718cd10891Snarayan vdcp->instance, vdcp->curr_server->id, vdcp->curr_server->ldc_id); 41728cd10891Snarayan } 41738cd10891Snarayan 4174*00e3a3e9SAlexandre Chartre static void 4175*00e3a3e9SAlexandre Chartre vdc_print_svc_status(vdc_t *vdcp) 4176*00e3a3e9SAlexandre Chartre { 4177*00e3a3e9SAlexandre Chartre int instance; 4178*00e3a3e9SAlexandre Chartre uint64_t ldc_id, port_id; 4179*00e3a3e9SAlexandre Chartre vdc_service_state_t svc_state; 4180*00e3a3e9SAlexandre Chartre 4181*00e3a3e9SAlexandre Chartre ASSERT(mutex_owned(&vdcp->lock)); 4182*00e3a3e9SAlexandre Chartre 4183*00e3a3e9SAlexandre Chartre svc_state = vdcp->curr_server->svc_state; 4184*00e3a3e9SAlexandre Chartre 4185*00e3a3e9SAlexandre Chartre if (vdcp->curr_server->log_state == svc_state) 4186*00e3a3e9SAlexandre Chartre return; 4187*00e3a3e9SAlexandre Chartre 4188*00e3a3e9SAlexandre Chartre instance = vdcp->instance; 4189*00e3a3e9SAlexandre Chartre ldc_id = vdcp->curr_server->ldc_id; 4190*00e3a3e9SAlexandre Chartre port_id = vdcp->curr_server->id; 4191*00e3a3e9SAlexandre Chartre 4192*00e3a3e9SAlexandre Chartre switch (svc_state) { 4193*00e3a3e9SAlexandre Chartre 4194*00e3a3e9SAlexandre Chartre case VDC_SERVICE_OFFLINE: 4195*00e3a3e9SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d is offline\n", instance); 4196*00e3a3e9SAlexandre Chartre break; 4197*00e3a3e9SAlexandre Chartre 4198*00e3a3e9SAlexandre Chartre case VDC_SERVICE_CONNECTED: 4199*00e3a3e9SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d is connected using ldc@%ld,%ld\n", 4200*00e3a3e9SAlexandre Chartre instance, ldc_id, port_id); 4201*00e3a3e9SAlexandre Chartre break; 4202*00e3a3e9SAlexandre Chartre 4203*00e3a3e9SAlexandre Chartre case VDC_SERVICE_ONLINE: 4204*00e3a3e9SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d is online using ldc@%ld,%ld\n", 4205*00e3a3e9SAlexandre Chartre instance, ldc_id, port_id); 4206*00e3a3e9SAlexandre Chartre break; 4207*00e3a3e9SAlexandre Chartre 4208*00e3a3e9SAlexandre Chartre case VDC_SERVICE_FAILED: 4209*00e3a3e9SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d access to service failed " 4210*00e3a3e9SAlexandre Chartre "using ldc@%ld,%ld\n", instance, ldc_id, port_id); 4211*00e3a3e9SAlexandre Chartre break; 4212*00e3a3e9SAlexandre Chartre 4213*00e3a3e9SAlexandre Chartre case VDC_SERVICE_FAULTED: 4214*00e3a3e9SAlexandre Chartre cmn_err(CE_CONT, "?vdisk@%d access to backend failed " 4215*00e3a3e9SAlexandre Chartre "using ldc@%ld,%ld\n", instance, ldc_id, port_id); 4216*00e3a3e9SAlexandre Chartre break; 4217*00e3a3e9SAlexandre Chartre 4218*00e3a3e9SAlexandre Chartre default: 4219*00e3a3e9SAlexandre Chartre ASSERT(0); 4220*00e3a3e9SAlexandre Chartre break; 4221*00e3a3e9SAlexandre Chartre } 4222*00e3a3e9SAlexandre Chartre 4223*00e3a3e9SAlexandre Chartre vdcp->curr_server->log_state = svc_state; 4224*00e3a3e9SAlexandre Chartre } 4225*00e3a3e9SAlexandre Chartre 42261ae08745Sheppo /* -------------------------------------------------------------------------- */ 42271ae08745Sheppo 42281ae08745Sheppo /* 42291ae08745Sheppo * The following functions process the incoming messages from vds 42301ae08745Sheppo */ 42311ae08745Sheppo 42320a55fbb7Slm66018 /* 42330a55fbb7Slm66018 * Function: 42340a55fbb7Slm66018 * vdc_process_msg_thread() 42350a55fbb7Slm66018 * 42360a55fbb7Slm66018 * Description: 42370a55fbb7Slm66018 * 42383af08d82Slm66018 * Main VDC message processing thread. Each vDisk instance 42393af08d82Slm66018 * consists of a copy of this thread. This thread triggers 42403af08d82Slm66018 * all the handshakes and data exchange with the server. It 42413af08d82Slm66018 * also handles all channel resets 42423af08d82Slm66018 * 42430a55fbb7Slm66018 * Arguments: 42440a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 42450a55fbb7Slm66018 * 42460a55fbb7Slm66018 * Return Code: 42470a55fbb7Slm66018 * None 42480a55fbb7Slm66018 */ 42491ae08745Sheppo static void 42503af08d82Slm66018 vdc_process_msg_thread(vdc_t *vdcp) 42511ae08745Sheppo { 42521ae08745Sheppo int status; 4253655fd6a9Sachartre int ctimeout; 4254655fd6a9Sachartre timeout_id_t tmid = 0; 42558cd10891Snarayan clock_t ldcup_timeout = 0; 4256*00e3a3e9SAlexandre Chartre vdc_server_t *srvr; 4257*00e3a3e9SAlexandre Chartre vdc_service_state_t svc_state; 42581ae08745Sheppo 42593af08d82Slm66018 mutex_enter(&vdcp->lock); 42601ae08745Sheppo 42611ae08745Sheppo for (;;) { 42621ae08745Sheppo 42633af08d82Slm66018 #define Q(_s) (vdcp->state == _s) ? #_s : 42643af08d82Slm66018 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 42653af08d82Slm66018 Q(VDC_STATE_INIT) 42663af08d82Slm66018 Q(VDC_STATE_INIT_WAITING) 42673af08d82Slm66018 Q(VDC_STATE_NEGOTIATE) 42683af08d82Slm66018 Q(VDC_STATE_HANDLE_PENDING) 4269*00e3a3e9SAlexandre Chartre Q(VDC_STATE_FAULTED) 4270*00e3a3e9SAlexandre Chartre Q(VDC_STATE_FAILED) 42713af08d82Slm66018 Q(VDC_STATE_RUNNING) 42723af08d82Slm66018 Q(VDC_STATE_RESETTING) 42733af08d82Slm66018 Q(VDC_STATE_DETACH) 42743af08d82Slm66018 "UNKNOWN"); 42751ae08745Sheppo 42763af08d82Slm66018 switch (vdcp->state) { 42773af08d82Slm66018 case VDC_STATE_INIT: 42783af08d82Slm66018 4279655fd6a9Sachartre /* 4280655fd6a9Sachartre * If requested, start a timeout to check if the 4281655fd6a9Sachartre * connection with vds is established in the 4282655fd6a9Sachartre * specified delay. If the timeout expires, we 4283655fd6a9Sachartre * will cancel any pending request. 4284655fd6a9Sachartre * 4285655fd6a9Sachartre * If some reset have occurred while establishing 4286655fd6a9Sachartre * the connection, we already have a timeout armed 4287655fd6a9Sachartre * and in that case we don't need to arm a new one. 42888cd10891Snarayan * 42898cd10891Snarayan * The same rule applies when there are multiple vds'. 42908cd10891Snarayan * If either a connection cannot be established or 42918cd10891Snarayan * the handshake times out, the connection thread will 42928cd10891Snarayan * try another server. The 'ctimeout' will report 42938cd10891Snarayan * back an error after it expires irrespective of 42948cd10891Snarayan * whether the vdisk is trying to connect to just 42958cd10891Snarayan * one or multiple servers. 4296655fd6a9Sachartre */ 4297655fd6a9Sachartre ctimeout = (vdc_timeout != 0)? 42988cd10891Snarayan vdc_timeout : vdcp->curr_server->ctimeout; 4299655fd6a9Sachartre 4300655fd6a9Sachartre if (ctimeout != 0 && tmid == 0) { 4301655fd6a9Sachartre tmid = timeout(vdc_connection_timeout, vdcp, 43028cd10891Snarayan ctimeout * drv_usectohz(MICROSEC)); 4303655fd6a9Sachartre } 4304655fd6a9Sachartre 4305*00e3a3e9SAlexandre Chartre /* Switch to STATE_DETACH if drv is detaching */ 4306*00e3a3e9SAlexandre Chartre if (vdcp->lifecycle == VDC_LC_DETACHING) { 4307*00e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_DETACH; 4308*00e3a3e9SAlexandre Chartre break; 4309*00e3a3e9SAlexandre Chartre } 4310*00e3a3e9SAlexandre Chartre 4311*00e3a3e9SAlexandre Chartre /* Check if the timeout has been reached */ 4312*00e3a3e9SAlexandre Chartre if (vdcp->ctimeout_reached) { 4313*00e3a3e9SAlexandre Chartre ASSERT(tmid != 0); 4314*00e3a3e9SAlexandre Chartre tmid = 0; 4315*00e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_FAILED; 4316*00e3a3e9SAlexandre Chartre break; 4317*00e3a3e9SAlexandre Chartre } 4318*00e3a3e9SAlexandre Chartre 43198cd10891Snarayan /* Check if we are re-initializing repeatedly */ 43208cd10891Snarayan if (vdcp->hshake_cnt > vdc_hshake_retries && 4321655fd6a9Sachartre vdcp->lifecycle != VDC_LC_ONLINE) { 43228cd10891Snarayan 43238cd10891Snarayan DMSG(vdcp, 0, "[%d] too many handshakes,cnt=%d", 43248cd10891Snarayan vdcp->instance, vdcp->hshake_cnt); 4325*00e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_FAILED; 43268cd10891Snarayan break; 43278cd10891Snarayan } 43288cd10891Snarayan 43298cd10891Snarayan /* Switch server */ 43308cd10891Snarayan if (vdcp->hshake_cnt > 0) 43318cd10891Snarayan vdc_switch_server(vdcp); 43328cd10891Snarayan vdcp->hshake_cnt++; 43338cd10891Snarayan 43343af08d82Slm66018 /* Bring up connection with vds via LDC */ 43353af08d82Slm66018 status = vdc_start_ldc_connection(vdcp); 43368cd10891Snarayan if (status != EINVAL) { 43373af08d82Slm66018 vdcp->state = VDC_STATE_INIT_WAITING; 4338*00e3a3e9SAlexandre Chartre } else { 4339*00e3a3e9SAlexandre Chartre vdcp->curr_server->svc_state = 4340*00e3a3e9SAlexandre Chartre VDC_SERVICE_FAILED; 4341*00e3a3e9SAlexandre Chartre vdc_print_svc_status(vdcp); 43423af08d82Slm66018 } 43433af08d82Slm66018 break; 43443af08d82Slm66018 43453af08d82Slm66018 case VDC_STATE_INIT_WAITING: 43463af08d82Slm66018 43478cd10891Snarayan /* if channel is UP, start negotiation */ 43488cd10891Snarayan if (vdcp->curr_server->ldc_state == LDC_UP) { 43498cd10891Snarayan vdcp->state = VDC_STATE_NEGOTIATE; 43508cd10891Snarayan break; 43518cd10891Snarayan } 43528cd10891Snarayan 43538cd10891Snarayan /* 4354*00e3a3e9SAlexandre Chartre * Wait for LDC_UP. If it times out and we have multiple 4355*00e3a3e9SAlexandre Chartre * servers then we will retry using a different server. 43568cd10891Snarayan */ 4357*00e3a3e9SAlexandre Chartre ldcup_timeout = ddi_get_lbolt() + (vdc_ldcup_timeout * 43588cd10891Snarayan drv_usectohz(MICROSEC)); 4359*00e3a3e9SAlexandre Chartre status = cv_timedwait(&vdcp->initwait_cv, &vdcp->lock, 4360*00e3a3e9SAlexandre Chartre ldcup_timeout); 43618cd10891Snarayan if (status == -1 && 43628cd10891Snarayan vdcp->state == VDC_STATE_INIT_WAITING && 43638cd10891Snarayan vdcp->curr_server->ldc_state != LDC_UP) { 43648cd10891Snarayan /* timed out & still waiting */ 4365*00e3a3e9SAlexandre Chartre vdcp->curr_server->svc_state = 4366*00e3a3e9SAlexandre Chartre VDC_SERVICE_FAILED; 4367*00e3a3e9SAlexandre Chartre vdc_print_svc_status(vdcp); 43688cd10891Snarayan vdcp->state = VDC_STATE_INIT; 43698cd10891Snarayan break; 43708cd10891Snarayan } 43718cd10891Snarayan 43723af08d82Slm66018 if (vdcp->state != VDC_STATE_INIT_WAITING) { 43733af08d82Slm66018 DMSG(vdcp, 0, 43743af08d82Slm66018 "state moved to %d out from under us...\n", 43753af08d82Slm66018 vdcp->state); 43763af08d82Slm66018 } 43773af08d82Slm66018 break; 43783af08d82Slm66018 43793af08d82Slm66018 case VDC_STATE_NEGOTIATE: 43803af08d82Slm66018 switch (status = vdc_ver_negotiation(vdcp)) { 43813af08d82Slm66018 case 0: 43823af08d82Slm66018 break; 43833af08d82Slm66018 default: 43843af08d82Slm66018 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 43853af08d82Slm66018 status); 43863af08d82Slm66018 goto reset; 43873af08d82Slm66018 } 43883af08d82Slm66018 43893af08d82Slm66018 switch (status = vdc_attr_negotiation(vdcp)) { 43903af08d82Slm66018 case 0: 43913af08d82Slm66018 break; 43923af08d82Slm66018 default: 43933af08d82Slm66018 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 43943af08d82Slm66018 status); 43953af08d82Slm66018 goto reset; 43963af08d82Slm66018 } 43973af08d82Slm66018 43983af08d82Slm66018 switch (status = vdc_dring_negotiation(vdcp)) { 43993af08d82Slm66018 case 0: 44003af08d82Slm66018 break; 44013af08d82Slm66018 default: 44023af08d82Slm66018 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 44033af08d82Slm66018 status); 44043af08d82Slm66018 goto reset; 44053af08d82Slm66018 } 44063af08d82Slm66018 44073af08d82Slm66018 switch (status = vdc_rdx_exchange(vdcp)) { 44083af08d82Slm66018 case 0: 44093af08d82Slm66018 vdcp->state = VDC_STATE_HANDLE_PENDING; 44103af08d82Slm66018 goto done; 44113af08d82Slm66018 default: 44123af08d82Slm66018 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 44133af08d82Slm66018 status); 44143af08d82Slm66018 goto reset; 44153af08d82Slm66018 } 44163af08d82Slm66018 reset: 44173af08d82Slm66018 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 44183af08d82Slm66018 status); 44193af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4420655fd6a9Sachartre vdcp->self_reset = B_TRUE; 4421*00e3a3e9SAlexandre Chartre vdcp->curr_server->svc_state = VDC_SERVICE_FAILED; 4422*00e3a3e9SAlexandre Chartre vdc_print_svc_status(vdcp); 44233af08d82Slm66018 done: 44243af08d82Slm66018 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 44253af08d82Slm66018 vdcp->state); 44263af08d82Slm66018 break; 44273af08d82Slm66018 44283af08d82Slm66018 case VDC_STATE_HANDLE_PENDING: 44293af08d82Slm66018 4430*00e3a3e9SAlexandre Chartre DMSG(vdcp, 0, "[%d] connection to service domain is up", 4431*00e3a3e9SAlexandre Chartre vdcp->instance); 4432*00e3a3e9SAlexandre Chartre vdcp->curr_server->svc_state = VDC_SERVICE_CONNECTED; 4433*00e3a3e9SAlexandre Chartre 4434*00e3a3e9SAlexandre Chartre mutex_exit(&vdcp->lock); 4435*00e3a3e9SAlexandre Chartre 4436655fd6a9Sachartre /* 4437*00e3a3e9SAlexandre Chartre * If we have multiple servers, check that the backend 4438*00e3a3e9SAlexandre Chartre * is effectively available before resubmitting any IO. 4439655fd6a9Sachartre */ 4440*00e3a3e9SAlexandre Chartre if (vdcp->num_servers > 1 && 4441*00e3a3e9SAlexandre Chartre vdc_eio_check(vdcp, 0) != 0) { 4442*00e3a3e9SAlexandre Chartre mutex_enter(&vdcp->lock); 4443*00e3a3e9SAlexandre Chartre vdcp->curr_server->svc_state = 4444*00e3a3e9SAlexandre Chartre VDC_SERVICE_FAULTED; 4445*00e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_FAULTED; 4446655fd6a9Sachartre break; 4447655fd6a9Sachartre } 4448655fd6a9Sachartre 4449*00e3a3e9SAlexandre Chartre if (tmid != 0) { 4450*00e3a3e9SAlexandre Chartre (void) untimeout(tmid); 4451*00e3a3e9SAlexandre Chartre tmid = 0; 4452*00e3a3e9SAlexandre Chartre vdcp->ctimeout_reached = B_FALSE; 4453*00e3a3e9SAlexandre Chartre } 4454*00e3a3e9SAlexandre Chartre 4455*00e3a3e9SAlexandre Chartre /* 4456*00e3a3e9SAlexandre Chartre * Setup devid 4457*00e3a3e9SAlexandre Chartre */ 4458*00e3a3e9SAlexandre Chartre (void) vdc_setup_devid(vdcp); 4459*00e3a3e9SAlexandre Chartre 4460*00e3a3e9SAlexandre Chartre status = vdc_resubmit_backup_dring(vdcp); 4461*00e3a3e9SAlexandre Chartre 4462*00e3a3e9SAlexandre Chartre mutex_enter(&vdcp->lock); 4463*00e3a3e9SAlexandre Chartre 4464*00e3a3e9SAlexandre Chartre if (status) { 4465*00e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_RESETTING; 4466*00e3a3e9SAlexandre Chartre vdcp->self_reset = B_TRUE; 4467*00e3a3e9SAlexandre Chartre vdcp->curr_server->svc_state = 4468*00e3a3e9SAlexandre Chartre VDC_SERVICE_FAILED; 4469*00e3a3e9SAlexandre Chartre vdc_print_svc_status(vdcp); 4470*00e3a3e9SAlexandre Chartre } else { 4471*00e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_RUNNING; 4472*00e3a3e9SAlexandre Chartre } 4473*00e3a3e9SAlexandre Chartre break; 4474*00e3a3e9SAlexandre Chartre 4475*00e3a3e9SAlexandre Chartre case VDC_STATE_FAULTED: 4476*00e3a3e9SAlexandre Chartre /* 4477*00e3a3e9SAlexandre Chartre * Server is faulted because the backend is unavailable. 4478*00e3a3e9SAlexandre Chartre * If all servers are faulted then we mark the service 4479*00e3a3e9SAlexandre Chartre * as failed, otherwise we reset to switch to another 4480*00e3a3e9SAlexandre Chartre * server. 4481*00e3a3e9SAlexandre Chartre */ 4482*00e3a3e9SAlexandre Chartre vdc_print_svc_status(vdcp); 4483*00e3a3e9SAlexandre Chartre 4484*00e3a3e9SAlexandre Chartre /* check if all servers are faulted */ 4485*00e3a3e9SAlexandre Chartre for (srvr = vdcp->server_list; srvr != NULL; 4486*00e3a3e9SAlexandre Chartre srvr = srvr->next) { 4487*00e3a3e9SAlexandre Chartre svc_state = srvr->svc_state; 4488*00e3a3e9SAlexandre Chartre if (svc_state != VDC_SERVICE_FAULTED) 4489*00e3a3e9SAlexandre Chartre break; 4490*00e3a3e9SAlexandre Chartre } 4491*00e3a3e9SAlexandre Chartre 4492*00e3a3e9SAlexandre Chartre if (srvr != NULL) { 4493*00e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_RESETTING; 4494*00e3a3e9SAlexandre Chartre vdcp->self_reset = B_TRUE; 4495*00e3a3e9SAlexandre Chartre } else { 4496*00e3a3e9SAlexandre Chartre vdcp->state = VDC_STATE_FAILED; 4497*00e3a3e9SAlexandre Chartre } 4498*00e3a3e9SAlexandre Chartre break; 4499*00e3a3e9SAlexandre Chartre 4500*00e3a3e9SAlexandre Chartre case VDC_STATE_FAILED: 4501*00e3a3e9SAlexandre Chartre /* 4502*00e3a3e9SAlexandre Chartre * We reach this state when we are unable to access the 4503*00e3a3e9SAlexandre Chartre * backend from any server, either because of a maximum 4504*00e3a3e9SAlexandre Chartre * connection retries or timeout, or because the backend 4505*00e3a3e9SAlexandre Chartre * is unavailable. 4506*00e3a3e9SAlexandre Chartre * 4507*00e3a3e9SAlexandre Chartre * Then we cancel the backup DRing so that errors get 4508*00e3a3e9SAlexandre Chartre * reported and we wait for a new I/O before attempting 4509*00e3a3e9SAlexandre Chartre * another connection. 4510*00e3a3e9SAlexandre Chartre */ 4511*00e3a3e9SAlexandre Chartre cmn_err(CE_NOTE, "vdisk@%d disk access failed", 4512*00e3a3e9SAlexandre Chartre vdcp->instance); 4513*00e3a3e9SAlexandre Chartre 4514*00e3a3e9SAlexandre Chartre /* cancel any timeout */ 4515655fd6a9Sachartre if (tmid != 0) { 4516655fd6a9Sachartre (void) untimeout(tmid); 4517655fd6a9Sachartre tmid = 0; 4518655fd6a9Sachartre } 45193af08d82Slm66018 4520*00e3a3e9SAlexandre Chartre /* cancel pending I/Os */ 4521*00e3a3e9SAlexandre Chartre cv_broadcast(&vdcp->running_cv); 4522*00e3a3e9SAlexandre Chartre vdc_cancel_backup_dring(vdcp); 4523*00e3a3e9SAlexandre Chartre 4524*00e3a3e9SAlexandre Chartre /* wait for new I/O */ 4525*00e3a3e9SAlexandre Chartre while (!vdcp->io_pending) 4526*00e3a3e9SAlexandre Chartre cv_wait(&vdcp->io_pending_cv, &vdcp->lock); 4527*00e3a3e9SAlexandre Chartre 4528*00e3a3e9SAlexandre Chartre /* 4529*00e3a3e9SAlexandre Chartre * There's a new IO pending. Try to re-establish a 4530*00e3a3e9SAlexandre Chartre * connection. Mark all services as offline, so that 4531*00e3a3e9SAlexandre Chartre * we don't stop again before having retried all 4532*00e3a3e9SAlexandre Chartre * servers. 4533*00e3a3e9SAlexandre Chartre */ 4534*00e3a3e9SAlexandre Chartre for (srvr = vdcp->server_list; srvr != NULL; 4535*00e3a3e9SAlexandre Chartre srvr = srvr->next) { 4536*00e3a3e9SAlexandre Chartre srvr->svc_state = VDC_SERVICE_OFFLINE; 4537*00e3a3e9SAlexandre Chartre } 4538*00e3a3e9SAlexandre Chartre 4539*00e3a3e9SAlexandre Chartre /* reset variables */ 4540*00e3a3e9SAlexandre Chartre vdcp->hshake_cnt = 0; 4541*00e3a3e9SAlexandre Chartre vdcp->ctimeout_reached = B_FALSE; 4542*00e3a3e9SAlexandre Chartre 45433af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4544*00e3a3e9SAlexandre Chartre vdcp->self_reset = B_TRUE; 45453af08d82Slm66018 break; 45463af08d82Slm66018 45473af08d82Slm66018 /* enter running state */ 45483af08d82Slm66018 case VDC_STATE_RUNNING: 45493af08d82Slm66018 /* 45503af08d82Slm66018 * Signal anyone waiting for the connection 45513af08d82Slm66018 * to come on line. 45523af08d82Slm66018 */ 45533af08d82Slm66018 vdcp->hshake_cnt = 0; 45543af08d82Slm66018 cv_broadcast(&vdcp->running_cv); 45552f5224aeSachartre 4556*00e3a3e9SAlexandre Chartre /* backend has to be checked after reset */ 4557*00e3a3e9SAlexandre Chartre if (vdcp->failfast_interval != 0 || 4558*00e3a3e9SAlexandre Chartre vdcp->num_servers > 1) 4559*00e3a3e9SAlexandre Chartre cv_signal(&vdcp->eio_cv); 45602f5224aeSachartre 45612f5224aeSachartre /* ownership is lost during reset */ 45622f5224aeSachartre if (vdcp->ownership & VDC_OWNERSHIP_WANTED) 45632f5224aeSachartre vdcp->ownership |= VDC_OWNERSHIP_RESET; 45642f5224aeSachartre cv_signal(&vdcp->ownership_cv); 45652f5224aeSachartre 4566*00e3a3e9SAlexandre Chartre vdcp->curr_server->svc_state = VDC_SERVICE_ONLINE; 4567*00e3a3e9SAlexandre Chartre vdc_print_svc_status(vdcp); 4568d7400d00Sachartre 45693af08d82Slm66018 mutex_exit(&vdcp->lock); 45703af08d82Slm66018 45713af08d82Slm66018 for (;;) { 45723af08d82Slm66018 vio_msg_t msg; 45733af08d82Slm66018 status = vdc_wait_for_response(vdcp, &msg); 45743af08d82Slm66018 if (status) break; 45753af08d82Slm66018 45763af08d82Slm66018 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 45773af08d82Slm66018 vdcp->instance); 45783af08d82Slm66018 status = vdc_process_data_msg(vdcp, &msg); 45791ae08745Sheppo if (status) { 45803af08d82Slm66018 DMSG(vdcp, 1, "[%d] process_data_msg " 45813af08d82Slm66018 "returned err=%d\n", vdcp->instance, 45823af08d82Slm66018 status); 45831ae08745Sheppo break; 45841ae08745Sheppo } 45851ae08745Sheppo 45863af08d82Slm66018 } 4587e1ebb9ecSlm66018 45883af08d82Slm66018 mutex_enter(&vdcp->lock); 45893af08d82Slm66018 4590*00e3a3e9SAlexandre Chartre /* all servers are now offline */ 4591*00e3a3e9SAlexandre Chartre for (srvr = vdcp->server_list; srvr != NULL; 4592*00e3a3e9SAlexandre Chartre srvr = srvr->next) { 4593*00e3a3e9SAlexandre Chartre srvr->svc_state = VDC_SERVICE_OFFLINE; 4594*00e3a3e9SAlexandre Chartre srvr->log_state = VDC_SERVICE_NONE; 4595*00e3a3e9SAlexandre Chartre } 4596*00e3a3e9SAlexandre Chartre 4597*00e3a3e9SAlexandre Chartre vdc_print_svc_status(vdcp); 4598d7400d00Sachartre 45993af08d82Slm66018 vdcp->state = VDC_STATE_RESETTING; 4600690555a1Sachartre vdcp->self_reset = B_TRUE; 46013af08d82Slm66018 break; 46023af08d82Slm66018 46033af08d82Slm66018 case VDC_STATE_RESETTING: 4604655fd6a9Sachartre /* 4605655fd6a9Sachartre * When we reach this state, we either come from the 4606655fd6a9Sachartre * VDC_STATE_RUNNING state and we can have pending 4607655fd6a9Sachartre * request but no timeout is armed; or we come from 4608655fd6a9Sachartre * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 4609655fd6a9Sachartre * VDC_HANDLE_PENDING state and there is no pending 4610655fd6a9Sachartre * request or pending requests have already been copied 4611655fd6a9Sachartre * into the backup dring. So we can safely keep the 4612655fd6a9Sachartre * connection timeout armed while we are in this state. 4613655fd6a9Sachartre */ 4614655fd6a9Sachartre 46153af08d82Slm66018 DMSG(vdcp, 0, "Initiating channel reset " 46163af08d82Slm66018 "(pending = %d)\n", (int)vdcp->threads_pending); 46173af08d82Slm66018 46183af08d82Slm66018 if (vdcp->self_reset) { 46193af08d82Slm66018 DMSG(vdcp, 0, 46203af08d82Slm66018 "[%d] calling stop_ldc_connection.\n", 46213af08d82Slm66018 vdcp->instance); 46223af08d82Slm66018 status = vdc_stop_ldc_connection(vdcp); 46233af08d82Slm66018 vdcp->self_reset = B_FALSE; 46241ae08745Sheppo } 46251ae08745Sheppo 46261ae08745Sheppo /* 46273af08d82Slm66018 * Wait for all threads currently waiting 46283af08d82Slm66018 * for a free dring entry to use. 46291ae08745Sheppo */ 46303af08d82Slm66018 while (vdcp->threads_pending) { 46313af08d82Slm66018 cv_broadcast(&vdcp->membind_cv); 46323af08d82Slm66018 cv_broadcast(&vdcp->dring_free_cv); 46333af08d82Slm66018 mutex_exit(&vdcp->lock); 4634205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4635205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 46363af08d82Slm66018 mutex_enter(&vdcp->lock); 46371ae08745Sheppo } 46381ae08745Sheppo 46393af08d82Slm66018 ASSERT(vdcp->threads_pending == 0); 46401ae08745Sheppo 46413af08d82Slm66018 /* Sanity check that no thread is receiving */ 46423af08d82Slm66018 ASSERT(vdcp->read_state != VDC_READ_WAITING); 46430a55fbb7Slm66018 46443af08d82Slm66018 vdcp->read_state = VDC_READ_IDLE; 4645*00e3a3e9SAlexandre Chartre vdcp->io_pending = B_FALSE; 4646*00e3a3e9SAlexandre Chartre 4647*00e3a3e9SAlexandre Chartre /* 4648*00e3a3e9SAlexandre Chartre * Cleanup any pending eio. These I/Os are going to 4649*00e3a3e9SAlexandre Chartre * be resubmitted. 4650*00e3a3e9SAlexandre Chartre */ 4651*00e3a3e9SAlexandre Chartre vdc_eio_unqueue(vdcp, 0, B_FALSE); 46523af08d82Slm66018 46533af08d82Slm66018 vdc_backup_local_dring(vdcp); 46543af08d82Slm66018 46553af08d82Slm66018 /* cleanup the old d-ring */ 46563af08d82Slm66018 vdc_destroy_descriptor_ring(vdcp); 46573af08d82Slm66018 46583af08d82Slm66018 /* go and start again */ 46593af08d82Slm66018 vdcp->state = VDC_STATE_INIT; 46603af08d82Slm66018 46610a55fbb7Slm66018 break; 46620a55fbb7Slm66018 46633af08d82Slm66018 case VDC_STATE_DETACH: 46643af08d82Slm66018 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 46653af08d82Slm66018 vdcp->instance); 46663af08d82Slm66018 4667655fd6a9Sachartre /* cancel any pending timeout */ 4668655fd6a9Sachartre mutex_exit(&vdcp->lock); 4669655fd6a9Sachartre if (tmid != 0) { 4670655fd6a9Sachartre (void) untimeout(tmid); 4671655fd6a9Sachartre tmid = 0; 4672655fd6a9Sachartre } 4673655fd6a9Sachartre mutex_enter(&vdcp->lock); 4674655fd6a9Sachartre 46753c96341aSnarayan /* 46763c96341aSnarayan * Signal anyone waiting for connection 46773c96341aSnarayan * to come online 46783c96341aSnarayan */ 46793c96341aSnarayan cv_broadcast(&vdcp->running_cv); 46803c96341aSnarayan 4681*00e3a3e9SAlexandre Chartre while (vdcp->sync_op_cnt > 0) { 4682*00e3a3e9SAlexandre Chartre cv_broadcast(&vdcp->sync_blocked_cv); 46833af08d82Slm66018 mutex_exit(&vdcp->lock); 4684205eeb1aSlm66018 /* give the waiters enough time to wake up */ 4685205eeb1aSlm66018 delay(vdc_hz_min_ldc_delay); 46863af08d82Slm66018 mutex_enter(&vdcp->lock); 46870a55fbb7Slm66018 } 46881ae08745Sheppo 46893af08d82Slm66018 mutex_exit(&vdcp->lock); 46903af08d82Slm66018 46913af08d82Slm66018 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 46923af08d82Slm66018 vdcp->instance); 46933af08d82Slm66018 thread_exit(); 46943af08d82Slm66018 break; 46953af08d82Slm66018 } 46963af08d82Slm66018 } 46970a55fbb7Slm66018 } 46980a55fbb7Slm66018 46990a55fbb7Slm66018 47000a55fbb7Slm66018 /* 47010a55fbb7Slm66018 * Function: 47020a55fbb7Slm66018 * vdc_process_data_msg() 47030a55fbb7Slm66018 * 47040a55fbb7Slm66018 * Description: 47050a55fbb7Slm66018 * This function is called by the message processing thread each time 47060a55fbb7Slm66018 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 47070a55fbb7Slm66018 * be an ACK or NACK from vds[1] which vdc handles as follows. 47080a55fbb7Slm66018 * ACK - wake up the waiting thread 47090a55fbb7Slm66018 * NACK - resend any messages necessary 47100a55fbb7Slm66018 * 47110a55fbb7Slm66018 * [1] Although the message format allows it, vds should not send a 47120a55fbb7Slm66018 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 47130a55fbb7Slm66018 * some bizarre reason it does, vdc will reset the connection. 47140a55fbb7Slm66018 * 47150a55fbb7Slm66018 * Arguments: 47160a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 47170a55fbb7Slm66018 * msg - the LDC message sent by vds 47180a55fbb7Slm66018 * 47190a55fbb7Slm66018 * Return Code: 47200a55fbb7Slm66018 * 0 - Success. 47210a55fbb7Slm66018 * > 0 - error value returned by LDC 47220a55fbb7Slm66018 */ 47230a55fbb7Slm66018 static int 47243af08d82Slm66018 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 47250a55fbb7Slm66018 { 47260a55fbb7Slm66018 int status = 0; 47273af08d82Slm66018 vio_dring_msg_t *dring_msg; 4728d10e4ef2Snarayan vdc_local_desc_t *ldep = NULL; 47293af08d82Slm66018 int start, end; 47303af08d82Slm66018 int idx; 473190e2f9dcSlm66018 int op; 47320a55fbb7Slm66018 47333af08d82Slm66018 dring_msg = (vio_dring_msg_t *)msg; 47340a55fbb7Slm66018 47353af08d82Slm66018 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 47363af08d82Slm66018 ASSERT(vdcp != NULL); 47373af08d82Slm66018 47383af08d82Slm66018 mutex_enter(&vdcp->lock); 47390a55fbb7Slm66018 47400a55fbb7Slm66018 /* 47410a55fbb7Slm66018 * Check to see if the message has bogus data 47420a55fbb7Slm66018 */ 4743e1ebb9ecSlm66018 idx = start = dring_msg->start_idx; 47440a55fbb7Slm66018 end = dring_msg->end_idx; 47453af08d82Slm66018 if ((start >= vdcp->dring_len) || 47463af08d82Slm66018 (end >= vdcp->dring_len) || (end < -1)) { 474790e2f9dcSlm66018 /* 474890e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 474990e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 475090e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 475190e2f9dcSlm66018 */ 475290e2f9dcSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 47533af08d82Slm66018 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 47543af08d82Slm66018 vdcp->instance, start, end); 47553af08d82Slm66018 mutex_exit(&vdcp->lock); 4756e1ebb9ecSlm66018 return (EINVAL); 47570a55fbb7Slm66018 } 47580a55fbb7Slm66018 47590a55fbb7Slm66018 /* 47600a55fbb7Slm66018 * Verify that the sequence number is what vdc expects. 47610a55fbb7Slm66018 */ 47623af08d82Slm66018 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4763e1ebb9ecSlm66018 case VDC_SEQ_NUM_TODO: 4764e1ebb9ecSlm66018 break; /* keep processing this message */ 4765e1ebb9ecSlm66018 case VDC_SEQ_NUM_SKIP: 47663af08d82Slm66018 mutex_exit(&vdcp->lock); 4767e1ebb9ecSlm66018 return (0); 4768e1ebb9ecSlm66018 case VDC_SEQ_NUM_INVALID: 476990e2f9dcSlm66018 /* 477090e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 477190e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 477290e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 477390e2f9dcSlm66018 */ 4774366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 477590e2f9dcSlm66018 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4776366a92acSlm66018 mutex_exit(&vdcp->lock); 47770a55fbb7Slm66018 return (ENXIO); 47780a55fbb7Slm66018 } 47790a55fbb7Slm66018 47803af08d82Slm66018 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 478190e2f9dcSlm66018 /* 478290e2f9dcSlm66018 * Update the I/O statistics to indicate that an error ocurred. 478390e2f9dcSlm66018 * 478490e2f9dcSlm66018 * We need to update the run queue if a read or write request 478590e2f9dcSlm66018 * is being NACKed - otherwise there will appear to be an 478690e2f9dcSlm66018 * indefinite outstanding request and statistics reported by 478790e2f9dcSlm66018 * iostat(1M) will be incorrect. The transaction will be 478890e2f9dcSlm66018 * resubmitted from the backup DRing following the reset 478990e2f9dcSlm66018 * and the wait/run queues will be entered again. 479090e2f9dcSlm66018 */ 479190e2f9dcSlm66018 ldep = &vdcp->local_dring[idx]; 479290e2f9dcSlm66018 op = ldep->operation; 479390e2f9dcSlm66018 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 4794*00e3a3e9SAlexandre Chartre DTRACE_IO1(done, buf_t *, ldep->buf); 479590e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 479690e2f9dcSlm66018 } 4797366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 479890e2f9dcSlm66018 VDC_DUMP_DRING_MSG(dring_msg); 479990e2f9dcSlm66018 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 48003af08d82Slm66018 mutex_exit(&vdcp->lock); 4801e1ebb9ecSlm66018 return (EIO); 48020a55fbb7Slm66018 48033af08d82Slm66018 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 480490e2f9dcSlm66018 /* 480590e2f9dcSlm66018 * Update the I/O statistics to indicate that an error occurred. 480690e2f9dcSlm66018 * No need to update the wait/run queues as no specific read or 480790e2f9dcSlm66018 * write request is being completed in response to this 'msg'. 480890e2f9dcSlm66018 */ 4809366a92acSlm66018 VD_UPDATE_ERR_STATS(vdcp, vd_protoerrs); 48103af08d82Slm66018 mutex_exit(&vdcp->lock); 4811e1ebb9ecSlm66018 return (EPROTO); 4812e1ebb9ecSlm66018 } 4813e1ebb9ecSlm66018 48143af08d82Slm66018 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 48153af08d82Slm66018 ASSERT(start == end); 48163af08d82Slm66018 48173af08d82Slm66018 ldep = &vdcp->local_dring[idx]; 48183af08d82Slm66018 4819*00e3a3e9SAlexandre Chartre DMSG(vdcp, 1, ": state 0x%x\n", ldep->dep->hdr.dstate); 48203af08d82Slm66018 4821e1ebb9ecSlm66018 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 48223af08d82Slm66018 struct buf *bufp; 4823e1ebb9ecSlm66018 4824*00e3a3e9SAlexandre Chartre status = ldep->dep->payload.status; 4825d10e4ef2Snarayan 4826*00e3a3e9SAlexandre Chartre bufp = ldep->buf; 48273af08d82Slm66018 ASSERT(bufp != NULL); 4828*00e3a3e9SAlexandre Chartre 4829*00e3a3e9SAlexandre Chartre bufp->b_resid = bufp->b_bcount - ldep->dep->payload.nbytes; 48303af08d82Slm66018 bioerror(bufp, status); 4831*00e3a3e9SAlexandre Chartre 4832*00e3a3e9SAlexandre Chartre if (status != 0) { 4833*00e3a3e9SAlexandre Chartre DMSG(vdcp, 1, "I/O status=%d\n", status); 4834d10e4ef2Snarayan } 48352f5224aeSachartre 48363c96341aSnarayan DMSG(vdcp, 1, 4837*00e3a3e9SAlexandre Chartre "I/O complete req=%ld bytes resp=%ld bytes\n", 48383c96341aSnarayan bufp->b_bcount, ldep->dep->payload.nbytes); 48392f5224aeSachartre 48402f5224aeSachartre /* 4841*00e3a3e9SAlexandre Chartre * If the request has failed and we have multiple servers or 4842*00e3a3e9SAlexandre Chartre * failfast is enabled then we will have to defer the completion 4843*00e3a3e9SAlexandre Chartre * of the request until we have checked that the vdisk backend 4844*00e3a3e9SAlexandre Chartre * is effectively available (if multiple server) or that there 4845*00e3a3e9SAlexandre Chartre * is no reservation conflict (if failfast). 48462f5224aeSachartre */ 4847*00e3a3e9SAlexandre Chartre if ((status != 0 && 4848*00e3a3e9SAlexandre Chartre (vdcp->num_servers > 1 && 4849*00e3a3e9SAlexandre Chartre (ldep->flags & VDC_OP_ERRCHK_BACKEND)) || 4850*00e3a3e9SAlexandre Chartre (vdcp->failfast_interval != 0 && 4851*00e3a3e9SAlexandre Chartre (ldep->flags & VDC_OP_ERRCHK_CONFLICT)))) { 4852*00e3a3e9SAlexandre Chartre /* 4853*00e3a3e9SAlexandre Chartre * The I/O has failed and we need to check the error. 4854*00e3a3e9SAlexandre Chartre */ 4855*00e3a3e9SAlexandre Chartre (void) vdc_eio_queue(vdcp, idx); 48562f5224aeSachartre } else { 4857*00e3a3e9SAlexandre Chartre op = ldep->operation; 4858*00e3a3e9SAlexandre Chartre if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 4859366a92acSlm66018 if (status == 0) { 4860366a92acSlm66018 VD_UPDATE_IO_STATS(vdcp, op, 4861366a92acSlm66018 ldep->dep->payload.nbytes); 4862*00e3a3e9SAlexandre Chartre } else { 4863*00e3a3e9SAlexandre Chartre VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 4864366a92acSlm66018 } 486590e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdcp); 4866366a92acSlm66018 DTRACE_IO1(done, buf_t *, bufp); 48672f5224aeSachartre } 4868*00e3a3e9SAlexandre Chartre (void) vdc_depopulate_descriptor(vdcp, idx); 4869*00e3a3e9SAlexandre Chartre biodone(bufp); 48700a55fbb7Slm66018 } 48713af08d82Slm66018 } 48723af08d82Slm66018 48733af08d82Slm66018 /* let the arrival signal propogate */ 48743af08d82Slm66018 mutex_exit(&vdcp->lock); 48750a55fbb7Slm66018 4876e1ebb9ecSlm66018 /* probe gives the count of how many entries were processed */ 4877366a92acSlm66018 DTRACE_PROBE2(processed, int, 1, vdc_t *, vdcp); 48780a55fbb7Slm66018 48793af08d82Slm66018 return (0); 48800a55fbb7Slm66018 } 48810a55fbb7Slm66018 48820a55fbb7Slm66018 48830a55fbb7Slm66018 /* 48840a55fbb7Slm66018 * Function: 48850a55fbb7Slm66018 * vdc_handle_ver_msg() 48860a55fbb7Slm66018 * 48870a55fbb7Slm66018 * Description: 48880a55fbb7Slm66018 * 48890a55fbb7Slm66018 * Arguments: 48900a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 48910a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 48920a55fbb7Slm66018 * 48930a55fbb7Slm66018 * Return Code: 48940a55fbb7Slm66018 * 0 - Success 48950a55fbb7Slm66018 */ 48960a55fbb7Slm66018 static int 48970a55fbb7Slm66018 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 48980a55fbb7Slm66018 { 48990a55fbb7Slm66018 int status = 0; 49000a55fbb7Slm66018 49010a55fbb7Slm66018 ASSERT(vdc != NULL); 49020a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 49030a55fbb7Slm66018 49040a55fbb7Slm66018 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 49050a55fbb7Slm66018 return (EPROTO); 49060a55fbb7Slm66018 } 49070a55fbb7Slm66018 49080a55fbb7Slm66018 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 49090a55fbb7Slm66018 return (EINVAL); 49100a55fbb7Slm66018 } 49110a55fbb7Slm66018 49120a55fbb7Slm66018 switch (ver_msg->tag.vio_subtype) { 49130a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 49140a55fbb7Slm66018 /* 49150a55fbb7Slm66018 * We check to see if the version returned is indeed supported 49160a55fbb7Slm66018 * (The server may have also adjusted the minor number downwards 49170a55fbb7Slm66018 * and if so 'ver_msg' will contain the actual version agreed) 49180a55fbb7Slm66018 */ 49190a55fbb7Slm66018 if (vdc_is_supported_version(ver_msg)) { 49200a55fbb7Slm66018 vdc->ver.major = ver_msg->ver_major; 49210a55fbb7Slm66018 vdc->ver.minor = ver_msg->ver_minor; 49220a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 49230a55fbb7Slm66018 } else { 49240a55fbb7Slm66018 status = EPROTO; 49250a55fbb7Slm66018 } 49260a55fbb7Slm66018 break; 49270a55fbb7Slm66018 49280a55fbb7Slm66018 case VIO_SUBTYPE_NACK: 49290a55fbb7Slm66018 /* 49300a55fbb7Slm66018 * call vdc_is_supported_version() which will return the next 49310a55fbb7Slm66018 * supported version (if any) in 'ver_msg' 49320a55fbb7Slm66018 */ 49330a55fbb7Slm66018 (void) vdc_is_supported_version(ver_msg); 49340a55fbb7Slm66018 if (ver_msg->ver_major > 0) { 49350a55fbb7Slm66018 size_t len = sizeof (*ver_msg); 49360a55fbb7Slm66018 49370a55fbb7Slm66018 ASSERT(vdc->ver.major > 0); 49380a55fbb7Slm66018 49390a55fbb7Slm66018 /* reset the necessary fields and resend */ 49400a55fbb7Slm66018 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 49410a55fbb7Slm66018 ver_msg->dev_class = VDEV_DISK; 49420a55fbb7Slm66018 49430a55fbb7Slm66018 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 49443af08d82Slm66018 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 49450a55fbb7Slm66018 vdc->instance, status); 49460a55fbb7Slm66018 if (len != sizeof (*ver_msg)) 49470a55fbb7Slm66018 status = EBADMSG; 49480a55fbb7Slm66018 } else { 494987a7269eSachartre DMSG(vdc, 0, "[%d] No common version with vDisk server", 495087a7269eSachartre vdc->instance); 49510a55fbb7Slm66018 status = ENOTSUP; 49520a55fbb7Slm66018 } 49530a55fbb7Slm66018 49540a55fbb7Slm66018 break; 49551ae08745Sheppo case VIO_SUBTYPE_INFO: 49561ae08745Sheppo /* 49571ae08745Sheppo * Handle the case where vds starts handshake 4958eff7243fSlm66018 * (for now only vdc is the instigator) 49591ae08745Sheppo */ 49601ae08745Sheppo status = ENOTSUP; 49611ae08745Sheppo break; 49621ae08745Sheppo 49631ae08745Sheppo default: 49640a55fbb7Slm66018 status = EINVAL; 49651ae08745Sheppo break; 49661ae08745Sheppo } 49671ae08745Sheppo 49680a55fbb7Slm66018 return (status); 49690a55fbb7Slm66018 } 49700a55fbb7Slm66018 49710a55fbb7Slm66018 /* 49720a55fbb7Slm66018 * Function: 49730a55fbb7Slm66018 * vdc_handle_attr_msg() 49740a55fbb7Slm66018 * 49750a55fbb7Slm66018 * Description: 49760a55fbb7Slm66018 * 49770a55fbb7Slm66018 * Arguments: 49780a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 49790a55fbb7Slm66018 * attr_msg - LDC message sent by vDisk server 49800a55fbb7Slm66018 * 49810a55fbb7Slm66018 * Return Code: 49820a55fbb7Slm66018 * 0 - Success 49830a55fbb7Slm66018 */ 49840a55fbb7Slm66018 static int 49850a55fbb7Slm66018 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 49860a55fbb7Slm66018 { 49870a55fbb7Slm66018 int status = 0; 4988*00e3a3e9SAlexandre Chartre vd_disk_type_t old_type; 49890a55fbb7Slm66018 49900a55fbb7Slm66018 ASSERT(vdc != NULL); 49910a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 49920a55fbb7Slm66018 49930a55fbb7Slm66018 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 49940a55fbb7Slm66018 return (EPROTO); 49950a55fbb7Slm66018 } 49960a55fbb7Slm66018 49970a55fbb7Slm66018 switch (attr_msg->tag.vio_subtype) { 49981ae08745Sheppo case VIO_SUBTYPE_ACK: 49991ae08745Sheppo /* 50001ae08745Sheppo * We now verify the attributes sent by vds. 50011ae08745Sheppo */ 500278fcd0a1Sachartre if (attr_msg->vdisk_size == 0) { 500378fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid disk size from vds", 500478fcd0a1Sachartre vdc->instance); 500578fcd0a1Sachartre status = EINVAL; 500678fcd0a1Sachartre break; 500778fcd0a1Sachartre } 500878fcd0a1Sachartre 500978fcd0a1Sachartre if (attr_msg->max_xfer_sz == 0) { 501078fcd0a1Sachartre DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 501178fcd0a1Sachartre vdc->instance); 501278fcd0a1Sachartre status = EINVAL; 501378fcd0a1Sachartre break; 501478fcd0a1Sachartre } 501578fcd0a1Sachartre 50162f5224aeSachartre if (attr_msg->vdisk_size == VD_SIZE_UNKNOWN) { 50172f5224aeSachartre DMSG(vdc, 0, "[%d] Unknown disk size from vds", 50182f5224aeSachartre vdc->instance); 50192f5224aeSachartre attr_msg->vdisk_size = 0; 50202f5224aeSachartre } 502165908c77Syu, larry liu - Sun Microsystems - Beijing China 502265908c77Syu, larry liu - Sun Microsystems - Beijing China /* update the VIO block size */ 502365908c77Syu, larry liu - Sun Microsystems - Beijing China if (attr_msg->vdisk_block_size > 0 && 502465908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_update_vio_bsize(vdc, 502565908c77Syu, larry liu - Sun Microsystems - Beijing China attr_msg->vdisk_block_size) != 0) { 502665908c77Syu, larry liu - Sun Microsystems - Beijing China DMSG(vdc, 0, "[%d] Invalid block size (%u) from vds", 502765908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->instance, attr_msg->vdisk_block_size); 502865908c77Syu, larry liu - Sun Microsystems - Beijing China status = EINVAL; 502965908c77Syu, larry liu - Sun Microsystems - Beijing China break; 503065908c77Syu, larry liu - Sun Microsystems - Beijing China } 503165908c77Syu, larry liu - Sun Microsystems - Beijing China 5032de3a5331SRamesh Chitrothu /* update disk, block and transfer sizes */ 5033*00e3a3e9SAlexandre Chartre old_type = vdc->vdisk_type; 5034de3a5331SRamesh Chitrothu vdc_update_size(vdc, attr_msg->vdisk_size, 5035de3a5331SRamesh Chitrothu attr_msg->vdisk_block_size, attr_msg->max_xfer_sz); 50361ae08745Sheppo vdc->vdisk_type = attr_msg->vdisk_type; 503717cadca8Slm66018 vdc->operations = attr_msg->operations; 503817cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) 503917cadca8Slm66018 vdc->vdisk_media = attr_msg->vdisk_media; 504017cadca8Slm66018 else 504117cadca8Slm66018 vdc->vdisk_media = 0; 50421ae08745Sheppo 50433af08d82Slm66018 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 5044e1ebb9ecSlm66018 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 50453af08d82Slm66018 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 504665908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->instance, vdc->vdisk_bsize, 5047e1ebb9ecSlm66018 attr_msg->vdisk_block_size); 5048e1ebb9ecSlm66018 5049f0ca1d9aSsb155480 if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) || 50501ae08745Sheppo (attr_msg->vdisk_size > INT64_MAX) || 505117cadca8Slm66018 (attr_msg->operations == 0) || 50521ae08745Sheppo (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 50533af08d82Slm66018 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 5054e1ebb9ecSlm66018 vdc->instance); 50551ae08745Sheppo status = EINVAL; 50561ae08745Sheppo break; 50571ae08745Sheppo } 50581ae08745Sheppo 505978fcd0a1Sachartre /* 506078fcd0a1Sachartre * Now that we have received all attributes we can create a 506178fcd0a1Sachartre * fake geometry for the disk. 506278fcd0a1Sachartre */ 506378fcd0a1Sachartre vdc_create_fake_geometry(vdc); 5064*00e3a3e9SAlexandre Chartre 5065*00e3a3e9SAlexandre Chartre /* 5066*00e3a3e9SAlexandre Chartre * If the disk type was previously unknown and device nodes 5067*00e3a3e9SAlexandre Chartre * were created then the driver would have created 8 device 5068*00e3a3e9SAlexandre Chartre * nodes. If we now find out that this is a single-slice disk 5069*00e3a3e9SAlexandre Chartre * then we need to re-create the appropriate device nodes. 5070*00e3a3e9SAlexandre Chartre */ 5071*00e3a3e9SAlexandre Chartre if (old_type == VD_DISK_TYPE_UNK && 5072*00e3a3e9SAlexandre Chartre (vdc->initialized & VDC_MINOR) && 5073*00e3a3e9SAlexandre Chartre vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5074*00e3a3e9SAlexandre Chartre ddi_remove_minor_node(vdc->dip, NULL); 5075*00e3a3e9SAlexandre Chartre (void) devfs_clean(ddi_get_parent(vdc->dip), 5076*00e3a3e9SAlexandre Chartre NULL, DV_CLEAN_FORCE); 5077*00e3a3e9SAlexandre Chartre if (vdc_create_device_nodes(vdc) != 0) { 5078*00e3a3e9SAlexandre Chartre DMSG(vdc, 0, "![%d] Failed to update " 5079*00e3a3e9SAlexandre Chartre "device nodes", vdc->instance); 5080*00e3a3e9SAlexandre Chartre } 5081*00e3a3e9SAlexandre Chartre } 5082*00e3a3e9SAlexandre Chartre 50831ae08745Sheppo break; 50841ae08745Sheppo 50851ae08745Sheppo case VIO_SUBTYPE_NACK: 50861ae08745Sheppo /* 50871ae08745Sheppo * vds could not handle the attributes we sent so we 50881ae08745Sheppo * stop negotiating. 50891ae08745Sheppo */ 50901ae08745Sheppo status = EPROTO; 50911ae08745Sheppo break; 50921ae08745Sheppo 50931ae08745Sheppo case VIO_SUBTYPE_INFO: 50941ae08745Sheppo /* 50951ae08745Sheppo * Handle the case where vds starts the handshake 50961ae08745Sheppo * (for now; vdc is the only supported instigatior) 50971ae08745Sheppo */ 50981ae08745Sheppo status = ENOTSUP; 50991ae08745Sheppo break; 51001ae08745Sheppo 51011ae08745Sheppo default: 51021ae08745Sheppo status = ENOTSUP; 51031ae08745Sheppo break; 51041ae08745Sheppo } 51051ae08745Sheppo 51060a55fbb7Slm66018 return (status); 51071ae08745Sheppo } 51081ae08745Sheppo 51090a55fbb7Slm66018 /* 51100a55fbb7Slm66018 * Function: 51110a55fbb7Slm66018 * vdc_handle_dring_reg_msg() 51120a55fbb7Slm66018 * 51130a55fbb7Slm66018 * Description: 51140a55fbb7Slm66018 * 51150a55fbb7Slm66018 * Arguments: 51160a55fbb7Slm66018 * vdc - soft state pointer for this instance of the driver. 51170a55fbb7Slm66018 * dring_msg - LDC message sent by vDisk server 51180a55fbb7Slm66018 * 51190a55fbb7Slm66018 * Return Code: 51200a55fbb7Slm66018 * 0 - Success 51210a55fbb7Slm66018 */ 51220a55fbb7Slm66018 static int 51230a55fbb7Slm66018 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 51240a55fbb7Slm66018 { 51250a55fbb7Slm66018 int status = 0; 51261ae08745Sheppo 51270a55fbb7Slm66018 ASSERT(vdc != NULL); 51280a55fbb7Slm66018 ASSERT(mutex_owned(&vdc->lock)); 51290a55fbb7Slm66018 51300a55fbb7Slm66018 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 51310a55fbb7Slm66018 return (EPROTO); 51320a55fbb7Slm66018 } 51330a55fbb7Slm66018 51340a55fbb7Slm66018 switch (dring_msg->tag.vio_subtype) { 51350a55fbb7Slm66018 case VIO_SUBTYPE_ACK: 51361ae08745Sheppo /* save the received dring_ident */ 51371ae08745Sheppo vdc->dring_ident = dring_msg->dring_ident; 51383af08d82Slm66018 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 5139e1ebb9ecSlm66018 vdc->instance, vdc->dring_ident); 51401ae08745Sheppo break; 51411ae08745Sheppo 51421ae08745Sheppo case VIO_SUBTYPE_NACK: 51431ae08745Sheppo /* 51441ae08745Sheppo * vds could not handle the DRing info we sent so we 51451ae08745Sheppo * stop negotiating. 51461ae08745Sheppo */ 51473af08d82Slm66018 DMSG(vdc, 0, "[%d] server could not register DRing\n", 51483af08d82Slm66018 vdc->instance); 51491ae08745Sheppo status = EPROTO; 51501ae08745Sheppo break; 51511ae08745Sheppo 51521ae08745Sheppo case VIO_SUBTYPE_INFO: 51531ae08745Sheppo /* 51541ae08745Sheppo * Handle the case where vds starts handshake 51551ae08745Sheppo * (for now only vdc is the instigatior) 51561ae08745Sheppo */ 51571ae08745Sheppo status = ENOTSUP; 51581ae08745Sheppo break; 51591ae08745Sheppo default: 51601ae08745Sheppo status = ENOTSUP; 51611ae08745Sheppo } 51621ae08745Sheppo 51631ae08745Sheppo return (status); 51641ae08745Sheppo } 51651ae08745Sheppo 51661ae08745Sheppo /* 51671ae08745Sheppo * Function: 51681ae08745Sheppo * vdc_verify_seq_num() 51691ae08745Sheppo * 51701ae08745Sheppo * Description: 5171e1ebb9ecSlm66018 * This functions verifies that the sequence number sent back by the vDisk 5172e1ebb9ecSlm66018 * server with the latest message is what is expected (i.e. it is greater 5173e1ebb9ecSlm66018 * than the last seq num sent by the vDisk server and less than or equal 5174e1ebb9ecSlm66018 * to the last seq num generated by vdc). 5175e1ebb9ecSlm66018 * 5176e1ebb9ecSlm66018 * It then checks the request ID to see if any requests need processing 5177e1ebb9ecSlm66018 * in the DRing. 51781ae08745Sheppo * 51791ae08745Sheppo * Arguments: 51801ae08745Sheppo * vdc - soft state pointer for this instance of the driver. 51811ae08745Sheppo * dring_msg - pointer to the LDC message sent by vds 51821ae08745Sheppo * 51831ae08745Sheppo * Return Code: 5184e1ebb9ecSlm66018 * VDC_SEQ_NUM_TODO - Message needs to be processed 5185e1ebb9ecSlm66018 * VDC_SEQ_NUM_SKIP - Message has already been processed 5186e1ebb9ecSlm66018 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 5187e1ebb9ecSlm66018 * vdc cannot deal with them 51881ae08745Sheppo */ 5189e1ebb9ecSlm66018 static int 5190e1ebb9ecSlm66018 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 51911ae08745Sheppo { 51921ae08745Sheppo ASSERT(vdc != NULL); 51931ae08745Sheppo ASSERT(dring_msg != NULL); 5194d10e4ef2Snarayan ASSERT(mutex_owned(&vdc->lock)); 51951ae08745Sheppo 51961ae08745Sheppo /* 51971ae08745Sheppo * Check to see if the messages were responded to in the correct 5198e1ebb9ecSlm66018 * order by vds. 51991ae08745Sheppo */ 5200e1ebb9ecSlm66018 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 5201e1ebb9ecSlm66018 (dring_msg->seq_num > vdc->seq_num)) { 52023af08d82Slm66018 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 5203e1ebb9ecSlm66018 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 5204e1ebb9ecSlm66018 vdc->instance, dring_msg->seq_num, 5205e1ebb9ecSlm66018 vdc->seq_num_reply, vdc->seq_num, 5206e1ebb9ecSlm66018 vdc->req_id_proc, vdc->req_id); 5207e1ebb9ecSlm66018 return (VDC_SEQ_NUM_INVALID); 52081ae08745Sheppo } 5209e1ebb9ecSlm66018 vdc->seq_num_reply = dring_msg->seq_num; 52101ae08745Sheppo 5211e1ebb9ecSlm66018 if (vdc->req_id_proc < vdc->req_id) 5212e1ebb9ecSlm66018 return (VDC_SEQ_NUM_TODO); 5213e1ebb9ecSlm66018 else 5214e1ebb9ecSlm66018 return (VDC_SEQ_NUM_SKIP); 52151ae08745Sheppo } 52161ae08745Sheppo 52170a55fbb7Slm66018 52180a55fbb7Slm66018 /* 52190a55fbb7Slm66018 * Function: 52200a55fbb7Slm66018 * vdc_is_supported_version() 52210a55fbb7Slm66018 * 52220a55fbb7Slm66018 * Description: 52230a55fbb7Slm66018 * This routine checks if the major/minor version numbers specified in 52240a55fbb7Slm66018 * 'ver_msg' are supported. If not it finds the next version that is 52250a55fbb7Slm66018 * in the supported version list 'vdc_version[]' and sets the fields in 52260a55fbb7Slm66018 * 'ver_msg' to those values 52270a55fbb7Slm66018 * 52280a55fbb7Slm66018 * Arguments: 52290a55fbb7Slm66018 * ver_msg - LDC message sent by vDisk server 52300a55fbb7Slm66018 * 52310a55fbb7Slm66018 * Return Code: 52320a55fbb7Slm66018 * B_TRUE - Success 52330a55fbb7Slm66018 * B_FALSE - Version not supported 52340a55fbb7Slm66018 */ 52350a55fbb7Slm66018 static boolean_t 52360a55fbb7Slm66018 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 52370a55fbb7Slm66018 { 52380a55fbb7Slm66018 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 52390a55fbb7Slm66018 52400a55fbb7Slm66018 for (int i = 0; i < vdc_num_versions; i++) { 52410a55fbb7Slm66018 ASSERT(vdc_version[i].major > 0); 52420a55fbb7Slm66018 ASSERT((i == 0) || 52430a55fbb7Slm66018 (vdc_version[i].major < vdc_version[i-1].major)); 52440a55fbb7Slm66018 52450a55fbb7Slm66018 /* 52460a55fbb7Slm66018 * If the major versions match, adjust the minor version, if 52470a55fbb7Slm66018 * necessary, down to the highest value supported by this 52480a55fbb7Slm66018 * client. The server should support all minor versions lower 52490a55fbb7Slm66018 * than the value it sent 52500a55fbb7Slm66018 */ 52510a55fbb7Slm66018 if (ver_msg->ver_major == vdc_version[i].major) { 52520a55fbb7Slm66018 if (ver_msg->ver_minor > vdc_version[i].minor) { 52533af08d82Slm66018 DMSGX(0, 52543af08d82Slm66018 "Adjusting minor version from %u to %u", 52550a55fbb7Slm66018 ver_msg->ver_minor, vdc_version[i].minor); 52560a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 52570a55fbb7Slm66018 } 52580a55fbb7Slm66018 return (B_TRUE); 52590a55fbb7Slm66018 } 52600a55fbb7Slm66018 52610a55fbb7Slm66018 /* 52620a55fbb7Slm66018 * If the message contains a higher major version number, set 52630a55fbb7Slm66018 * the message's major/minor versions to the current values 52640a55fbb7Slm66018 * and return false, so this message will get resent with 52650a55fbb7Slm66018 * these values, and the server will potentially try again 52660a55fbb7Slm66018 * with the same or a lower version 52670a55fbb7Slm66018 */ 52680a55fbb7Slm66018 if (ver_msg->ver_major > vdc_version[i].major) { 52690a55fbb7Slm66018 ver_msg->ver_major = vdc_version[i].major; 52700a55fbb7Slm66018 ver_msg->ver_minor = vdc_version[i].minor; 52713af08d82Slm66018 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 52720a55fbb7Slm66018 ver_msg->ver_major, ver_msg->ver_minor); 52730a55fbb7Slm66018 52740a55fbb7Slm66018 return (B_FALSE); 52750a55fbb7Slm66018 } 52760a55fbb7Slm66018 52770a55fbb7Slm66018 /* 52780a55fbb7Slm66018 * Otherwise, the message's major version is less than the 52790a55fbb7Slm66018 * current major version, so continue the loop to the next 52800a55fbb7Slm66018 * (lower) supported version 52810a55fbb7Slm66018 */ 52820a55fbb7Slm66018 } 52830a55fbb7Slm66018 52840a55fbb7Slm66018 /* 52850a55fbb7Slm66018 * No common version was found; "ground" the version pair in the 52860a55fbb7Slm66018 * message to terminate negotiation 52870a55fbb7Slm66018 */ 52880a55fbb7Slm66018 ver_msg->ver_major = 0; 52890a55fbb7Slm66018 ver_msg->ver_minor = 0; 52900a55fbb7Slm66018 52910a55fbb7Slm66018 return (B_FALSE); 52920a55fbb7Slm66018 } 52931ae08745Sheppo /* -------------------------------------------------------------------------- */ 52941ae08745Sheppo 52951ae08745Sheppo /* 52961ae08745Sheppo * DKIO(7) support 52971ae08745Sheppo */ 52981ae08745Sheppo 52991ae08745Sheppo typedef struct vdc_dk_arg { 53001ae08745Sheppo struct dk_callback dkc; 53011ae08745Sheppo int mode; 53021ae08745Sheppo dev_t dev; 53031ae08745Sheppo vdc_t *vdc; 53041ae08745Sheppo } vdc_dk_arg_t; 53051ae08745Sheppo 53061ae08745Sheppo /* 53071ae08745Sheppo * Function: 53081ae08745Sheppo * vdc_dkio_flush_cb() 53091ae08745Sheppo * 53101ae08745Sheppo * Description: 53111ae08745Sheppo * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 53121ae08745Sheppo * by kernel code. 53131ae08745Sheppo * 53141ae08745Sheppo * Arguments: 53151ae08745Sheppo * arg - a pointer to a vdc_dk_arg_t structure. 53161ae08745Sheppo */ 53171ae08745Sheppo void 53181ae08745Sheppo vdc_dkio_flush_cb(void *arg) 53191ae08745Sheppo { 53201ae08745Sheppo struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 53211ae08745Sheppo struct dk_callback *dkc = NULL; 53221ae08745Sheppo vdc_t *vdc = NULL; 53231ae08745Sheppo int rv; 53241ae08745Sheppo 53251ae08745Sheppo if (dk_arg == NULL) { 53263af08d82Slm66018 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 53271ae08745Sheppo return; 53281ae08745Sheppo } 53291ae08745Sheppo dkc = &dk_arg->dkc; 53301ae08745Sheppo vdc = dk_arg->vdc; 53311ae08745Sheppo ASSERT(vdc != NULL); 53321ae08745Sheppo 53333af08d82Slm66018 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 5334*00e3a3e9SAlexandre Chartre VDCPART(dk_arg->dev), 0, VIO_both_dir, B_TRUE); 53351ae08745Sheppo if (rv != 0) { 53363af08d82Slm66018 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 5337e1ebb9ecSlm66018 vdc->instance, rv, 53381ae08745Sheppo ddi_model_convert_from(dk_arg->mode & FMODELS)); 53391ae08745Sheppo } 53401ae08745Sheppo 53411ae08745Sheppo /* 53421ae08745Sheppo * Trigger the call back to notify the caller the the ioctl call has 53431ae08745Sheppo * been completed. 53441ae08745Sheppo */ 53451ae08745Sheppo if ((dk_arg->mode & FKIOCTL) && 53461ae08745Sheppo (dkc != NULL) && 53471ae08745Sheppo (dkc->dkc_callback != NULL)) { 53481ae08745Sheppo ASSERT(dkc->dkc_cookie != NULL); 53498e6a2a04Slm66018 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 53501ae08745Sheppo } 53511ae08745Sheppo 53521ae08745Sheppo /* Indicate that one less DKIO write flush is outstanding */ 53531ae08745Sheppo mutex_enter(&vdc->lock); 53541ae08745Sheppo vdc->dkio_flush_pending--; 53551ae08745Sheppo ASSERT(vdc->dkio_flush_pending >= 0); 53561ae08745Sheppo mutex_exit(&vdc->lock); 53578e6a2a04Slm66018 53588e6a2a04Slm66018 /* free the mem that was allocated when the callback was dispatched */ 53598e6a2a04Slm66018 kmem_free(arg, sizeof (vdc_dk_arg_t)); 53601ae08745Sheppo } 53611ae08745Sheppo 53621ae08745Sheppo /* 536387a7269eSachartre * Function: 53649642afceSachartre * vdc_dkio_gapart() 536587a7269eSachartre * 536687a7269eSachartre * Description: 536787a7269eSachartre * This function implements the DKIOCGAPART ioctl. 536887a7269eSachartre * 536987a7269eSachartre * Arguments: 537078fcd0a1Sachartre * vdc - soft state pointer 537187a7269eSachartre * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 537287a7269eSachartre * flag - ioctl flags 537387a7269eSachartre */ 537487a7269eSachartre static int 53759642afceSachartre vdc_dkio_gapart(vdc_t *vdc, caddr_t arg, int flag) 537687a7269eSachartre { 537778fcd0a1Sachartre struct dk_geom *geom; 5378342440ecSPrasad Singamsetty struct extvtoc *vtoc; 537987a7269eSachartre union { 538087a7269eSachartre struct dk_map map[NDKMAP]; 538187a7269eSachartre struct dk_map32 map32[NDKMAP]; 538287a7269eSachartre } data; 538387a7269eSachartre int i, rv, size; 538487a7269eSachartre 538578fcd0a1Sachartre mutex_enter(&vdc->lock); 538687a7269eSachartre 538778fcd0a1Sachartre if ((rv = vdc_validate_geometry(vdc)) != 0) { 538878fcd0a1Sachartre mutex_exit(&vdc->lock); 538987a7269eSachartre return (rv); 539078fcd0a1Sachartre } 539187a7269eSachartre 5392342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) { 5393342440ecSPrasad Singamsetty mutex_exit(&vdc->lock); 5394342440ecSPrasad Singamsetty return (EOVERFLOW); 5395342440ecSPrasad Singamsetty } 5396342440ecSPrasad Singamsetty 539778fcd0a1Sachartre vtoc = vdc->vtoc; 539878fcd0a1Sachartre geom = vdc->geom; 539987a7269eSachartre 540087a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 540187a7269eSachartre 540278fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 540378fcd0a1Sachartre data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 540478fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 540578fcd0a1Sachartre data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 540687a7269eSachartre } 540787a7269eSachartre size = NDKMAP * sizeof (struct dk_map32); 540887a7269eSachartre 540987a7269eSachartre } else { 541087a7269eSachartre 541178fcd0a1Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 541278fcd0a1Sachartre data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 541378fcd0a1Sachartre (geom->dkg_nhead * geom->dkg_nsect); 541478fcd0a1Sachartre data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 541587a7269eSachartre } 541687a7269eSachartre size = NDKMAP * sizeof (struct dk_map); 541787a7269eSachartre 541887a7269eSachartre } 541987a7269eSachartre 542078fcd0a1Sachartre mutex_exit(&vdc->lock); 542178fcd0a1Sachartre 542287a7269eSachartre if (ddi_copyout(&data, arg, size, flag) != 0) 542387a7269eSachartre return (EFAULT); 542487a7269eSachartre 542587a7269eSachartre return (0); 542687a7269eSachartre } 542787a7269eSachartre 542887a7269eSachartre /* 542987a7269eSachartre * Function: 54309642afceSachartre * vdc_dkio_partition() 54319642afceSachartre * 54329642afceSachartre * Description: 54339642afceSachartre * This function implements the DKIOCPARTITION ioctl. 54349642afceSachartre * 54359642afceSachartre * Arguments: 54369642afceSachartre * vdc - soft state pointer 54379642afceSachartre * arg - a pointer to a struct partition64 structure 54389642afceSachartre * flag - ioctl flags 54399642afceSachartre */ 54409642afceSachartre static int 54419642afceSachartre vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag) 54429642afceSachartre { 54439642afceSachartre struct partition64 p64; 54449642afceSachartre efi_gpt_t *gpt; 54459642afceSachartre efi_gpe_t *gpe; 54469642afceSachartre vd_efi_dev_t edev; 54479642afceSachartre uint_t partno; 54489642afceSachartre int rv; 54499642afceSachartre 54509642afceSachartre if (ddi_copyin(arg, &p64, sizeof (struct partition64), flag)) { 54519642afceSachartre return (EFAULT); 54529642afceSachartre } 54539642afceSachartre 545465908c77Syu, larry liu - Sun Microsystems - Beijing China VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 54559642afceSachartre 54569642afceSachartre if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) { 54579642afceSachartre return (rv); 54589642afceSachartre } 54599642afceSachartre 54609642afceSachartre partno = p64.p_partno; 54619642afceSachartre 54629642afceSachartre if (partno >= gpt->efi_gpt_NumberOfPartitionEntries) { 54639642afceSachartre vd_efi_free(&edev, gpt, gpe); 54649642afceSachartre return (ESRCH); 54659642afceSachartre } 54669642afceSachartre 54679642afceSachartre bcopy(&gpe[partno].efi_gpe_PartitionTypeGUID, &p64.p_type, 54689642afceSachartre sizeof (struct uuid)); 54699642afceSachartre p64.p_start = gpe[partno].efi_gpe_StartingLBA; 54709642afceSachartre p64.p_size = gpe[partno].efi_gpe_EndingLBA - p64.p_start + 1; 54719642afceSachartre 54729642afceSachartre if (ddi_copyout(&p64, arg, sizeof (struct partition64), flag)) { 54739642afceSachartre vd_efi_free(&edev, gpt, gpe); 54749642afceSachartre return (EFAULT); 54759642afceSachartre } 54769642afceSachartre 54779642afceSachartre vd_efi_free(&edev, gpt, gpe); 54789642afceSachartre return (0); 54799642afceSachartre } 54809642afceSachartre 54819642afceSachartre /* 54829642afceSachartre * Function: 548387a7269eSachartre * vdc_dioctl_rwcmd() 548487a7269eSachartre * 548587a7269eSachartre * Description: 548687a7269eSachartre * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 548787a7269eSachartre * for DKC_DIRECT disks to read or write at an absolute disk offset. 548887a7269eSachartre * 548987a7269eSachartre * Arguments: 549087a7269eSachartre * dev - device 549187a7269eSachartre * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 549287a7269eSachartre * flag - ioctl flags 549387a7269eSachartre */ 549487a7269eSachartre static int 549565908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_dioctl_rwcmd(vdc_t *vdc, caddr_t arg, int flag) 549687a7269eSachartre { 549787a7269eSachartre struct dadkio_rwcmd32 rwcmd32; 549887a7269eSachartre struct dadkio_rwcmd rwcmd; 549987a7269eSachartre struct iovec aiov; 550087a7269eSachartre struct uio auio; 550187a7269eSachartre int rw, status; 550287a7269eSachartre struct buf *buf; 550387a7269eSachartre 550487a7269eSachartre if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 550587a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 550687a7269eSachartre sizeof (struct dadkio_rwcmd32), flag)) { 550787a7269eSachartre return (EFAULT); 550887a7269eSachartre } 550987a7269eSachartre rwcmd.cmd = rwcmd32.cmd; 551087a7269eSachartre rwcmd.flags = rwcmd32.flags; 551187a7269eSachartre rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 551287a7269eSachartre rwcmd.buflen = rwcmd32.buflen; 551387a7269eSachartre rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 551487a7269eSachartre } else { 551587a7269eSachartre if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 551687a7269eSachartre sizeof (struct dadkio_rwcmd), flag)) { 551787a7269eSachartre return (EFAULT); 551887a7269eSachartre } 551987a7269eSachartre } 552087a7269eSachartre 552187a7269eSachartre switch (rwcmd.cmd) { 552287a7269eSachartre case DADKIO_RWCMD_READ: 552387a7269eSachartre rw = B_READ; 552487a7269eSachartre break; 552587a7269eSachartre case DADKIO_RWCMD_WRITE: 552687a7269eSachartre rw = B_WRITE; 552787a7269eSachartre break; 552887a7269eSachartre default: 552987a7269eSachartre return (EINVAL); 553087a7269eSachartre } 553187a7269eSachartre 553287a7269eSachartre bzero((caddr_t)&aiov, sizeof (struct iovec)); 553387a7269eSachartre aiov.iov_base = rwcmd.bufaddr; 553487a7269eSachartre aiov.iov_len = rwcmd.buflen; 553587a7269eSachartre 553687a7269eSachartre bzero((caddr_t)&auio, sizeof (struct uio)); 553787a7269eSachartre auio.uio_iov = &aiov; 553887a7269eSachartre auio.uio_iovcnt = 1; 553965908c77Syu, larry liu - Sun Microsystems - Beijing China auio.uio_loffset = rwcmd.blkaddr * vdc->vdisk_bsize; 554087a7269eSachartre auio.uio_resid = rwcmd.buflen; 554187a7269eSachartre auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 554287a7269eSachartre 554387a7269eSachartre buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 554487a7269eSachartre bioinit(buf); 554587a7269eSachartre /* 554687a7269eSachartre * We use the private field of buf to specify that this is an 554787a7269eSachartre * I/O using an absolute offset. 554887a7269eSachartre */ 554987a7269eSachartre buf->b_private = (void *)VD_SLICE_NONE; 555087a7269eSachartre 555165908c77Syu, larry liu - Sun Microsystems - Beijing China status = physio(vdc_strategy, buf, VD_MAKE_DEV(vdc->instance, 0), 555265908c77Syu, larry liu - Sun Microsystems - Beijing China rw, vdc_min, &auio); 555387a7269eSachartre 555487a7269eSachartre biofini(buf); 555587a7269eSachartre kmem_free(buf, sizeof (buf_t)); 555687a7269eSachartre 555787a7269eSachartre return (status); 555887a7269eSachartre } 555987a7269eSachartre 556087a7269eSachartre /* 55612f5224aeSachartre * Allocate a buffer for a VD_OP_SCSICMD operation. The size of the allocated 55622f5224aeSachartre * buffer is returned in alloc_len. 55632f5224aeSachartre */ 55642f5224aeSachartre static vd_scsi_t * 55652f5224aeSachartre vdc_scsi_alloc(int cdb_len, int sense_len, int datain_len, int dataout_len, 55662f5224aeSachartre int *alloc_len) 55672f5224aeSachartre { 55682f5224aeSachartre vd_scsi_t *vd_scsi; 55692f5224aeSachartre int vd_scsi_len = VD_SCSI_SIZE; 55702f5224aeSachartre 55712f5224aeSachartre vd_scsi_len += P2ROUNDUP(cdb_len, sizeof (uint64_t)); 55722f5224aeSachartre vd_scsi_len += P2ROUNDUP(sense_len, sizeof (uint64_t)); 55732f5224aeSachartre vd_scsi_len += P2ROUNDUP(datain_len, sizeof (uint64_t)); 55742f5224aeSachartre vd_scsi_len += P2ROUNDUP(dataout_len, sizeof (uint64_t)); 55752f5224aeSachartre 55762f5224aeSachartre ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); 55772f5224aeSachartre 55782f5224aeSachartre vd_scsi = kmem_zalloc(vd_scsi_len, KM_SLEEP); 55792f5224aeSachartre 55802f5224aeSachartre vd_scsi->cdb_len = cdb_len; 55812f5224aeSachartre vd_scsi->sense_len = sense_len; 55822f5224aeSachartre vd_scsi->datain_len = datain_len; 55832f5224aeSachartre vd_scsi->dataout_len = dataout_len; 55842f5224aeSachartre 55852f5224aeSachartre *alloc_len = vd_scsi_len; 55862f5224aeSachartre 55872f5224aeSachartre return (vd_scsi); 55882f5224aeSachartre } 55892f5224aeSachartre 55902f5224aeSachartre /* 55912f5224aeSachartre * Convert the status of a SCSI command to a Solaris return code. 55922f5224aeSachartre * 55932f5224aeSachartre * Arguments: 55942f5224aeSachartre * vd_scsi - The SCSI operation buffer. 55952f5224aeSachartre * log_error - indicate if an error message should be logged. 55962f5224aeSachartre * 55972f5224aeSachartre * Note that our SCSI error messages are rather primitive for the moment 55982f5224aeSachartre * and could be improved by decoding some data like the SCSI command and 55992f5224aeSachartre * the sense key. 56002f5224aeSachartre * 56012f5224aeSachartre * Return value: 56022f5224aeSachartre * 0 - Status is good. 56032f5224aeSachartre * EACCES - Status reports a reservation conflict. 56042f5224aeSachartre * ENOTSUP - Status reports a check condition and sense key 56052f5224aeSachartre * reports an illegal request. 56062f5224aeSachartre * EIO - Any other status. 56072f5224aeSachartre */ 56082f5224aeSachartre static int 56092f5224aeSachartre vdc_scsi_status(vdc_t *vdc, vd_scsi_t *vd_scsi, boolean_t log_error) 56102f5224aeSachartre { 56112f5224aeSachartre int rv; 56122f5224aeSachartre char path_str[MAXPATHLEN]; 56132f5224aeSachartre char panic_str[VDC_RESV_CONFLICT_FMT_LEN + MAXPATHLEN]; 56142f5224aeSachartre union scsi_cdb *cdb; 56152f5224aeSachartre struct scsi_extended_sense *sense; 56162f5224aeSachartre 56172f5224aeSachartre if (vd_scsi->cmd_status == STATUS_GOOD) 56182f5224aeSachartre /* no error */ 56192f5224aeSachartre return (0); 56202f5224aeSachartre 56212f5224aeSachartre /* when the tunable vdc_scsi_log_error is true we log all errors */ 56222f5224aeSachartre if (vdc_scsi_log_error) 56232f5224aeSachartre log_error = B_TRUE; 56242f5224aeSachartre 56252f5224aeSachartre if (log_error) { 56262f5224aeSachartre cmn_err(CE_WARN, "%s (vdc%d):\tError for Command: 0x%x)\n", 56272f5224aeSachartre ddi_pathname(vdc->dip, path_str), vdc->instance, 56282f5224aeSachartre GETCMD(VD_SCSI_DATA_CDB(vd_scsi))); 56292f5224aeSachartre } 56302f5224aeSachartre 56312f5224aeSachartre /* default returned value */ 56322f5224aeSachartre rv = EIO; 56332f5224aeSachartre 56342f5224aeSachartre switch (vd_scsi->cmd_status) { 56352f5224aeSachartre 56362f5224aeSachartre case STATUS_CHECK: 56372f5224aeSachartre case STATUS_TERMINATED: 56382f5224aeSachartre if (log_error) 56392f5224aeSachartre cmn_err(CE_CONT, "\tCheck Condition Error\n"); 56402f5224aeSachartre 56412f5224aeSachartre /* check sense buffer */ 56422f5224aeSachartre if (vd_scsi->sense_len == 0 || 56432f5224aeSachartre vd_scsi->sense_status != STATUS_GOOD) { 56442f5224aeSachartre if (log_error) 56452f5224aeSachartre cmn_err(CE_CONT, "\tNo Sense Data Available\n"); 56462f5224aeSachartre break; 56472f5224aeSachartre } 56482f5224aeSachartre 56492f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 56502f5224aeSachartre 56512f5224aeSachartre if (log_error) { 56522f5224aeSachartre cmn_err(CE_CONT, "\tSense Key: 0x%x\n" 56532f5224aeSachartre "\tASC: 0x%x, ASCQ: 0x%x\n", 56542f5224aeSachartre scsi_sense_key((uint8_t *)sense), 56552f5224aeSachartre scsi_sense_asc((uint8_t *)sense), 56562f5224aeSachartre scsi_sense_ascq((uint8_t *)sense)); 56572f5224aeSachartre } 56582f5224aeSachartre 56592f5224aeSachartre if (scsi_sense_key((uint8_t *)sense) == KEY_ILLEGAL_REQUEST) 56602f5224aeSachartre rv = ENOTSUP; 56612f5224aeSachartre break; 56622f5224aeSachartre 56632f5224aeSachartre case STATUS_BUSY: 56642f5224aeSachartre if (log_error) 56652f5224aeSachartre cmn_err(CE_NOTE, "\tDevice Busy\n"); 56662f5224aeSachartre break; 56672f5224aeSachartre 56682f5224aeSachartre case STATUS_RESERVATION_CONFLICT: 56692f5224aeSachartre /* 56702f5224aeSachartre * If the command was PERSISTENT_RESERVATION_[IN|OUT] then 56712f5224aeSachartre * reservation conflict could be due to various reasons like 56722f5224aeSachartre * incorrect keys, not registered or not reserved etc. So, 56732f5224aeSachartre * we should not panic in that case. 56742f5224aeSachartre */ 56752f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 56762f5224aeSachartre if (vdc->failfast_interval != 0 && 56772f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_IN && 56782f5224aeSachartre cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_OUT) { 56792f5224aeSachartre /* failfast is enabled so we have to panic */ 56802f5224aeSachartre (void) snprintf(panic_str, sizeof (panic_str), 56812f5224aeSachartre VDC_RESV_CONFLICT_FMT_STR "%s", 56822f5224aeSachartre ddi_pathname(vdc->dip, path_str)); 56832f5224aeSachartre panic(panic_str); 56842f5224aeSachartre } 56852f5224aeSachartre if (log_error) 56862f5224aeSachartre cmn_err(CE_NOTE, "\tReservation Conflict\n"); 56872f5224aeSachartre rv = EACCES; 56882f5224aeSachartre break; 56892f5224aeSachartre 56902f5224aeSachartre case STATUS_QFULL: 56912f5224aeSachartre if (log_error) 56922f5224aeSachartre cmn_err(CE_NOTE, "\tQueue Full\n"); 56932f5224aeSachartre break; 56942f5224aeSachartre 56952f5224aeSachartre case STATUS_MET: 56962f5224aeSachartre case STATUS_INTERMEDIATE: 56972f5224aeSachartre case STATUS_SCSI2: 56982f5224aeSachartre case STATUS_INTERMEDIATE_MET: 56992f5224aeSachartre case STATUS_ACA_ACTIVE: 57002f5224aeSachartre if (log_error) 57012f5224aeSachartre cmn_err(CE_CONT, 57022f5224aeSachartre "\tUnexpected SCSI status received: 0x%x\n", 57032f5224aeSachartre vd_scsi->cmd_status); 57042f5224aeSachartre break; 57052f5224aeSachartre 57062f5224aeSachartre default: 57072f5224aeSachartre if (log_error) 57082f5224aeSachartre cmn_err(CE_CONT, 57092f5224aeSachartre "\tInvalid SCSI status received: 0x%x\n", 57102f5224aeSachartre vd_scsi->cmd_status); 57112f5224aeSachartre break; 57122f5224aeSachartre } 57132f5224aeSachartre 57142f5224aeSachartre return (rv); 57152f5224aeSachartre } 57162f5224aeSachartre 57172f5224aeSachartre /* 57182f5224aeSachartre * Implemented the USCSICMD uscsi(7I) ioctl. This ioctl is converted to 57192f5224aeSachartre * a VD_OP_SCSICMD operation which is sent to the vdisk server. If a SCSI 57202f5224aeSachartre * reset is requested (i.e. a flag USCSI_RESET* is set) then the ioctl is 57212f5224aeSachartre * converted to a VD_OP_RESET operation. 57222f5224aeSachartre */ 57232f5224aeSachartre static int 57242f5224aeSachartre vdc_uscsi_cmd(vdc_t *vdc, caddr_t arg, int mode) 57252f5224aeSachartre { 57262f5224aeSachartre struct uscsi_cmd uscsi; 57272f5224aeSachartre struct uscsi_cmd32 uscsi32; 57282f5224aeSachartre vd_scsi_t *vd_scsi; 57292f5224aeSachartre int vd_scsi_len; 57302f5224aeSachartre union scsi_cdb *cdb; 57312f5224aeSachartre struct scsi_extended_sense *sense; 57322f5224aeSachartre char *datain, *dataout; 57332f5224aeSachartre size_t cdb_len, datain_len, dataout_len, sense_len; 57342f5224aeSachartre int rv; 57352f5224aeSachartre 57362f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 57372f5224aeSachartre if (ddi_copyin(arg, &uscsi32, sizeof (struct uscsi_cmd32), 57382f5224aeSachartre mode) != 0) 57392f5224aeSachartre return (EFAULT); 57402f5224aeSachartre uscsi_cmd32touscsi_cmd((&uscsi32), (&uscsi)); 57412f5224aeSachartre } else { 57422f5224aeSachartre if (ddi_copyin(arg, &uscsi, sizeof (struct uscsi_cmd), 57432f5224aeSachartre mode) != 0) 57442f5224aeSachartre return (EFAULT); 57452f5224aeSachartre } 57462f5224aeSachartre 57472f5224aeSachartre /* a uscsi reset is converted to a VD_OP_RESET operation */ 57482f5224aeSachartre if (uscsi.uscsi_flags & (USCSI_RESET | USCSI_RESET_LUN | 57492f5224aeSachartre USCSI_RESET_ALL)) { 5750*00e3a3e9SAlexandre Chartre rv = vdc_do_sync_op(vdc, VD_OP_RESET, NULL, 0, 0, 0, 5751*00e3a3e9SAlexandre Chartre VIO_both_dir, B_TRUE); 57522f5224aeSachartre return (rv); 57532f5224aeSachartre } 57542f5224aeSachartre 57552f5224aeSachartre /* cdb buffer length */ 57562f5224aeSachartre cdb_len = uscsi.uscsi_cdblen; 57572f5224aeSachartre 57582f5224aeSachartre /* data in and out buffers length */ 57592f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 57602f5224aeSachartre datain_len = uscsi.uscsi_buflen; 57612f5224aeSachartre dataout_len = 0; 57622f5224aeSachartre } else { 57632f5224aeSachartre datain_len = 0; 57642f5224aeSachartre dataout_len = uscsi.uscsi_buflen; 57652f5224aeSachartre } 57662f5224aeSachartre 57672f5224aeSachartre /* sense buffer length */ 57682f5224aeSachartre if (uscsi.uscsi_flags & USCSI_RQENABLE) 57692f5224aeSachartre sense_len = uscsi.uscsi_rqlen; 57702f5224aeSachartre else 57712f5224aeSachartre sense_len = 0; 57722f5224aeSachartre 57732f5224aeSachartre /* allocate buffer for the VD_SCSICMD_OP operation */ 57742f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 57752f5224aeSachartre &vd_scsi_len); 57762f5224aeSachartre 57772f5224aeSachartre /* 57782f5224aeSachartre * The documentation of USCSI_ISOLATE and USCSI_DIAGNOSE is very vague, 57792f5224aeSachartre * but basically they prevent a SCSI command from being retried in case 57802f5224aeSachartre * of an error. 57812f5224aeSachartre */ 57822f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_ISOLATE) || 57832f5224aeSachartre (uscsi.uscsi_flags & USCSI_DIAGNOSE)) 57842f5224aeSachartre vd_scsi->options |= VD_SCSI_OPT_NORETRY; 57852f5224aeSachartre 57862f5224aeSachartre /* set task attribute */ 57872f5224aeSachartre if (uscsi.uscsi_flags & USCSI_NOTAG) { 57882f5224aeSachartre vd_scsi->task_attribute = 0; 57892f5224aeSachartre } else { 57902f5224aeSachartre if (uscsi.uscsi_flags & USCSI_HEAD) 57912f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 57922f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_HTAG) 57932f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_HQUEUE; 57942f5224aeSachartre else if (uscsi.uscsi_flags & USCSI_OTAG) 57952f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ORDERED; 57962f5224aeSachartre else 57972f5224aeSachartre vd_scsi->task_attribute = 0; 57982f5224aeSachartre } 57992f5224aeSachartre 58002f5224aeSachartre /* set timeout */ 58012f5224aeSachartre vd_scsi->timeout = uscsi.uscsi_timeout; 58022f5224aeSachartre 58032f5224aeSachartre /* copy-in cdb data */ 58042f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 58052f5224aeSachartre if (ddi_copyin(uscsi.uscsi_cdb, cdb, cdb_len, mode) != 0) { 58062f5224aeSachartre rv = EFAULT; 58072f5224aeSachartre goto done; 58082f5224aeSachartre } 58092f5224aeSachartre 58102f5224aeSachartre /* keep a pointer to the sense buffer */ 58112f5224aeSachartre sense = VD_SCSI_DATA_SENSE(vd_scsi); 58122f5224aeSachartre 58132f5224aeSachartre /* keep a pointer to the data-in buffer */ 58142f5224aeSachartre datain = (char *)VD_SCSI_DATA_IN(vd_scsi); 58152f5224aeSachartre 58162f5224aeSachartre /* copy-in request data to the data-out buffer */ 58172f5224aeSachartre dataout = (char *)VD_SCSI_DATA_OUT(vd_scsi); 58182f5224aeSachartre if (!(uscsi.uscsi_flags & USCSI_READ)) { 58192f5224aeSachartre if (ddi_copyin(uscsi.uscsi_bufaddr, dataout, dataout_len, 58202f5224aeSachartre mode)) { 58212f5224aeSachartre rv = EFAULT; 58222f5224aeSachartre goto done; 58232f5224aeSachartre } 58242f5224aeSachartre } 58252f5224aeSachartre 58262f5224aeSachartre /* submit the request */ 58272f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 5828*00e3a3e9SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 58292f5224aeSachartre 58302f5224aeSachartre if (rv != 0) 58312f5224aeSachartre goto done; 58322f5224aeSachartre 58332f5224aeSachartre /* update scsi status */ 58342f5224aeSachartre uscsi.uscsi_status = vd_scsi->cmd_status; 58352f5224aeSachartre 58362f5224aeSachartre /* update sense data */ 58372f5224aeSachartre if ((uscsi.uscsi_flags & USCSI_RQENABLE) && 58382f5224aeSachartre (uscsi.uscsi_status == STATUS_CHECK || 58392f5224aeSachartre uscsi.uscsi_status == STATUS_TERMINATED)) { 58402f5224aeSachartre 58412f5224aeSachartre uscsi.uscsi_rqstatus = vd_scsi->sense_status; 58422f5224aeSachartre 58432f5224aeSachartre if (uscsi.uscsi_rqstatus == STATUS_GOOD) { 58442f5224aeSachartre uscsi.uscsi_rqresid = uscsi.uscsi_rqlen - 58452f5224aeSachartre vd_scsi->sense_len; 58462f5224aeSachartre if (ddi_copyout(sense, uscsi.uscsi_rqbuf, 58472f5224aeSachartre vd_scsi->sense_len, mode) != 0) { 58482f5224aeSachartre rv = EFAULT; 58492f5224aeSachartre goto done; 58502f5224aeSachartre } 58512f5224aeSachartre } 58522f5224aeSachartre } 58532f5224aeSachartre 58542f5224aeSachartre /* update request data */ 58552f5224aeSachartre if (uscsi.uscsi_status == STATUS_GOOD) { 58562f5224aeSachartre if (uscsi.uscsi_flags & USCSI_READ) { 58572f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 58582f5224aeSachartre vd_scsi->datain_len; 58592f5224aeSachartre if (ddi_copyout(datain, uscsi.uscsi_bufaddr, 58602f5224aeSachartre vd_scsi->datain_len, mode) != 0) { 58612f5224aeSachartre rv = EFAULT; 58622f5224aeSachartre goto done; 58632f5224aeSachartre } 58642f5224aeSachartre } else { 58652f5224aeSachartre uscsi.uscsi_resid = uscsi.uscsi_buflen - 58662f5224aeSachartre vd_scsi->dataout_len; 58672f5224aeSachartre } 58682f5224aeSachartre } 58692f5224aeSachartre 58702f5224aeSachartre /* copy-out result */ 58712f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 58722f5224aeSachartre uscsi_cmdtouscsi_cmd32((&uscsi), (&uscsi32)); 58732f5224aeSachartre if (ddi_copyout(&uscsi32, arg, sizeof (struct uscsi_cmd32), 58742f5224aeSachartre mode) != 0) { 58752f5224aeSachartre rv = EFAULT; 58762f5224aeSachartre goto done; 58772f5224aeSachartre } 58782f5224aeSachartre } else { 58792f5224aeSachartre if (ddi_copyout(&uscsi, arg, sizeof (struct uscsi_cmd), 58802f5224aeSachartre mode) != 0) { 58812f5224aeSachartre rv = EFAULT; 58822f5224aeSachartre goto done; 58832f5224aeSachartre } 58842f5224aeSachartre } 58852f5224aeSachartre 58862f5224aeSachartre /* get the return code from the SCSI command status */ 58872f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, 58882f5224aeSachartre !(uscsi.uscsi_flags & USCSI_SILENT)); 58892f5224aeSachartre 58902f5224aeSachartre done: 58912f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 58922f5224aeSachartre return (rv); 58932f5224aeSachartre } 58942f5224aeSachartre 58952f5224aeSachartre /* 58962f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT IN command. 58972f5224aeSachartre * 58982f5224aeSachartre * Arguments: 58992f5224aeSachartre * cmd - SCSI PERSISTENT IN command 59002f5224aeSachartre * len - length of the SCSI input buffer 59012f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 59022f5224aeSachartre * 59032f5224aeSachartre * Returned Value: 59042f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 59052f5224aeSachartre */ 59062f5224aeSachartre static vd_scsi_t * 59072f5224aeSachartre vdc_scsi_alloc_persistent_in(uchar_t cmd, int len, int *vd_scsi_len) 59082f5224aeSachartre { 59092f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 59102f5224aeSachartre vd_scsi_t *vd_scsi; 59112f5224aeSachartre union scsi_cdb *cdb; 59122f5224aeSachartre 59132f5224aeSachartre cdb_len = CDB_GROUP1; 59142f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 59152f5224aeSachartre datain_len = len; 59162f5224aeSachartre dataout_len = 0; 59172f5224aeSachartre 59182f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 59192f5224aeSachartre vd_scsi_len); 59202f5224aeSachartre 59212f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 59222f5224aeSachartre 59232f5224aeSachartre /* set cdb */ 59242f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_IN; 59252f5224aeSachartre cdb->cdb_opaque[1] = cmd; 59262f5224aeSachartre FORMG1COUNT(cdb, datain_len); 59272f5224aeSachartre 59282f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 59292f5224aeSachartre 59302f5224aeSachartre return (vd_scsi); 59312f5224aeSachartre } 59322f5224aeSachartre 59332f5224aeSachartre /* 59342f5224aeSachartre * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT OUT command. 59352f5224aeSachartre * 59362f5224aeSachartre * Arguments: 59372f5224aeSachartre * cmd - SCSI PERSISTENT OUT command 59382f5224aeSachartre * len - length of the SCSI output buffer 59392f5224aeSachartre * vd_scsi_len - return the length of the allocated buffer 59402f5224aeSachartre * 59412f5224aeSachartre * Returned Code: 59422f5224aeSachartre * a pointer to the allocated VD_OP_SCSICMD buffer. 59432f5224aeSachartre */ 59442f5224aeSachartre static vd_scsi_t * 59452f5224aeSachartre vdc_scsi_alloc_persistent_out(uchar_t cmd, int len, int *vd_scsi_len) 59462f5224aeSachartre { 59472f5224aeSachartre int cdb_len, sense_len, datain_len, dataout_len; 59482f5224aeSachartre vd_scsi_t *vd_scsi; 59492f5224aeSachartre union scsi_cdb *cdb; 59502f5224aeSachartre 59512f5224aeSachartre cdb_len = CDB_GROUP1; 59522f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 59532f5224aeSachartre datain_len = 0; 59542f5224aeSachartre dataout_len = len; 59552f5224aeSachartre 59562f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 59572f5224aeSachartre vd_scsi_len); 59582f5224aeSachartre 59592f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 59602f5224aeSachartre 59612f5224aeSachartre /* set cdb */ 59622f5224aeSachartre cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_OUT; 59632f5224aeSachartre cdb->cdb_opaque[1] = cmd; 59642f5224aeSachartre FORMG1COUNT(cdb, dataout_len); 59652f5224aeSachartre 59662f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 59672f5224aeSachartre 59682f5224aeSachartre return (vd_scsi); 59692f5224aeSachartre } 59702f5224aeSachartre 59712f5224aeSachartre /* 59722f5224aeSachartre * Implement the MHIOCGRP_INKEYS mhd(7i) ioctl. The ioctl is converted 59732f5224aeSachartre * to a SCSI PERSISTENT IN READ KEYS command which is sent to the vdisk 59742f5224aeSachartre * server with a VD_OP_SCSICMD operation. 59752f5224aeSachartre */ 59762f5224aeSachartre static int 59772f5224aeSachartre vdc_mhd_inkeys(vdc_t *vdc, caddr_t arg, int mode) 59782f5224aeSachartre { 59792f5224aeSachartre vd_scsi_t *vd_scsi; 59802f5224aeSachartre mhioc_inkeys_t inkeys; 59812f5224aeSachartre mhioc_key_list_t klist; 59822f5224aeSachartre struct mhioc_inkeys32 inkeys32; 59832f5224aeSachartre struct mhioc_key_list32 klist32; 59842f5224aeSachartre sd_prin_readkeys_t *scsi_keys; 59852f5224aeSachartre void *user_keys; 59862f5224aeSachartre int vd_scsi_len; 59872f5224aeSachartre int listsize, listlen, rv; 59882f5224aeSachartre 59892f5224aeSachartre /* copyin arguments */ 59902f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 59912f5224aeSachartre rv = ddi_copyin(arg, &inkeys32, sizeof (inkeys32), mode); 59922f5224aeSachartre if (rv != 0) 59932f5224aeSachartre return (EFAULT); 59942f5224aeSachartre 59952f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inkeys32.li, &klist32, 59962f5224aeSachartre sizeof (klist32), mode); 59972f5224aeSachartre if (rv != 0) 59982f5224aeSachartre return (EFAULT); 59992f5224aeSachartre 60002f5224aeSachartre listsize = klist32.listsize; 60012f5224aeSachartre } else { 60022f5224aeSachartre rv = ddi_copyin(arg, &inkeys, sizeof (inkeys), mode); 60032f5224aeSachartre if (rv != 0) 60042f5224aeSachartre return (EFAULT); 60052f5224aeSachartre 60062f5224aeSachartre rv = ddi_copyin(inkeys.li, &klist, sizeof (klist), mode); 60072f5224aeSachartre if (rv != 0) 60082f5224aeSachartre return (EFAULT); 60092f5224aeSachartre 60102f5224aeSachartre listsize = klist.listsize; 60112f5224aeSachartre } 60122f5224aeSachartre 60132f5224aeSachartre /* build SCSI VD_OP request */ 60142f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_KEYS, 60152f5224aeSachartre sizeof (sd_prin_readkeys_t) - sizeof (caddr_t) + 60162f5224aeSachartre (sizeof (mhioc_resv_key_t) * listsize), &vd_scsi_len); 60172f5224aeSachartre 60182f5224aeSachartre scsi_keys = (sd_prin_readkeys_t *)VD_SCSI_DATA_IN(vd_scsi); 60192f5224aeSachartre 60202f5224aeSachartre /* submit the request */ 60212f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 6022*00e3a3e9SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 60232f5224aeSachartre 60242f5224aeSachartre if (rv != 0) 60252f5224aeSachartre goto done; 60262f5224aeSachartre 60272f5224aeSachartre listlen = scsi_keys->len / MHIOC_RESV_KEY_SIZE; 60282f5224aeSachartre 60292f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 60302f5224aeSachartre inkeys32.generation = scsi_keys->generation; 60312f5224aeSachartre rv = ddi_copyout(&inkeys32, arg, sizeof (inkeys32), mode); 60322f5224aeSachartre if (rv != 0) { 60332f5224aeSachartre rv = EFAULT; 60342f5224aeSachartre goto done; 60352f5224aeSachartre } 60362f5224aeSachartre 60372f5224aeSachartre klist32.listlen = listlen; 60382f5224aeSachartre rv = ddi_copyout(&klist32, (caddr_t)(uintptr_t)inkeys32.li, 60392f5224aeSachartre sizeof (klist32), mode); 60402f5224aeSachartre if (rv != 0) { 60412f5224aeSachartre rv = EFAULT; 60422f5224aeSachartre goto done; 60432f5224aeSachartre } 60442f5224aeSachartre 60452f5224aeSachartre user_keys = (caddr_t)(uintptr_t)klist32.list; 60462f5224aeSachartre } else { 60472f5224aeSachartre inkeys.generation = scsi_keys->generation; 60482f5224aeSachartre rv = ddi_copyout(&inkeys, arg, sizeof (inkeys), mode); 60492f5224aeSachartre if (rv != 0) { 60502f5224aeSachartre rv = EFAULT; 60512f5224aeSachartre goto done; 60522f5224aeSachartre } 60532f5224aeSachartre 60542f5224aeSachartre klist.listlen = listlen; 60552f5224aeSachartre rv = ddi_copyout(&klist, inkeys.li, sizeof (klist), mode); 60562f5224aeSachartre if (rv != 0) { 60572f5224aeSachartre rv = EFAULT; 60582f5224aeSachartre goto done; 60592f5224aeSachartre } 60602f5224aeSachartre 60612f5224aeSachartre user_keys = klist.list; 60622f5224aeSachartre } 60632f5224aeSachartre 60642f5224aeSachartre /* copy out keys */ 60652f5224aeSachartre if (listlen > 0 && listsize > 0) { 60662f5224aeSachartre if (listsize < listlen) 60672f5224aeSachartre listlen = listsize; 60682f5224aeSachartre rv = ddi_copyout(&scsi_keys->keylist, user_keys, 60692f5224aeSachartre listlen * MHIOC_RESV_KEY_SIZE, mode); 60702f5224aeSachartre if (rv != 0) 60712f5224aeSachartre rv = EFAULT; 60722f5224aeSachartre } 60732f5224aeSachartre 60742f5224aeSachartre if (rv == 0) 60752f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 60762f5224aeSachartre 60772f5224aeSachartre done: 60782f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 60792f5224aeSachartre 60802f5224aeSachartre return (rv); 60812f5224aeSachartre } 60822f5224aeSachartre 60832f5224aeSachartre /* 60842f5224aeSachartre * Implement the MHIOCGRP_INRESV mhd(7i) ioctl. The ioctl is converted 60852f5224aeSachartre * to a SCSI PERSISTENT IN READ RESERVATION command which is sent to 60862f5224aeSachartre * the vdisk server with a VD_OP_SCSICMD operation. 60872f5224aeSachartre */ 60882f5224aeSachartre static int 60892f5224aeSachartre vdc_mhd_inresv(vdc_t *vdc, caddr_t arg, int mode) 60902f5224aeSachartre { 60912f5224aeSachartre vd_scsi_t *vd_scsi; 60922f5224aeSachartre mhioc_inresvs_t inresv; 60932f5224aeSachartre mhioc_resv_desc_list_t rlist; 60942f5224aeSachartre struct mhioc_inresvs32 inresv32; 60952f5224aeSachartre struct mhioc_resv_desc_list32 rlist32; 60962f5224aeSachartre mhioc_resv_desc_t mhd_resv; 60972f5224aeSachartre sd_prin_readresv_t *scsi_resv; 60982f5224aeSachartre sd_readresv_desc_t *resv; 60992f5224aeSachartre mhioc_resv_desc_t *user_resv; 61002f5224aeSachartre int vd_scsi_len; 61012f5224aeSachartre int listsize, listlen, i, rv; 61022f5224aeSachartre 61032f5224aeSachartre /* copyin arguments */ 61042f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 61052f5224aeSachartre rv = ddi_copyin(arg, &inresv32, sizeof (inresv32), mode); 61062f5224aeSachartre if (rv != 0) 61072f5224aeSachartre return (EFAULT); 61082f5224aeSachartre 61092f5224aeSachartre rv = ddi_copyin((caddr_t)(uintptr_t)inresv32.li, &rlist32, 61102f5224aeSachartre sizeof (rlist32), mode); 61112f5224aeSachartre if (rv != 0) 61122f5224aeSachartre return (EFAULT); 61132f5224aeSachartre 61142f5224aeSachartre listsize = rlist32.listsize; 61152f5224aeSachartre } else { 61162f5224aeSachartre rv = ddi_copyin(arg, &inresv, sizeof (inresv), mode); 61172f5224aeSachartre if (rv != 0) 61182f5224aeSachartre return (EFAULT); 61192f5224aeSachartre 61202f5224aeSachartre rv = ddi_copyin(inresv.li, &rlist, sizeof (rlist), mode); 61212f5224aeSachartre if (rv != 0) 61222f5224aeSachartre return (EFAULT); 61232f5224aeSachartre 61242f5224aeSachartre listsize = rlist.listsize; 61252f5224aeSachartre } 61262f5224aeSachartre 61272f5224aeSachartre /* build SCSI VD_OP request */ 61282f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_RESV, 61292f5224aeSachartre sizeof (sd_prin_readresv_t) - sizeof (caddr_t) + 61302f5224aeSachartre (SCSI3_RESV_DESC_LEN * listsize), &vd_scsi_len); 61312f5224aeSachartre 61322f5224aeSachartre scsi_resv = (sd_prin_readresv_t *)VD_SCSI_DATA_IN(vd_scsi); 61332f5224aeSachartre 61342f5224aeSachartre /* submit the request */ 61352f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 6136*00e3a3e9SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 61372f5224aeSachartre 61382f5224aeSachartre if (rv != 0) 61392f5224aeSachartre goto done; 61402f5224aeSachartre 61412f5224aeSachartre listlen = scsi_resv->len / SCSI3_RESV_DESC_LEN; 61422f5224aeSachartre 61432f5224aeSachartre if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 61442f5224aeSachartre inresv32.generation = scsi_resv->generation; 61452f5224aeSachartre rv = ddi_copyout(&inresv32, arg, sizeof (inresv32), mode); 61462f5224aeSachartre if (rv != 0) { 61472f5224aeSachartre rv = EFAULT; 61482f5224aeSachartre goto done; 61492f5224aeSachartre } 61502f5224aeSachartre 61512f5224aeSachartre rlist32.listlen = listlen; 61522f5224aeSachartre rv = ddi_copyout(&rlist32, (caddr_t)(uintptr_t)inresv32.li, 61532f5224aeSachartre sizeof (rlist32), mode); 61542f5224aeSachartre if (rv != 0) { 61552f5224aeSachartre rv = EFAULT; 61562f5224aeSachartre goto done; 61572f5224aeSachartre } 61582f5224aeSachartre 61592f5224aeSachartre user_resv = (mhioc_resv_desc_t *)(uintptr_t)rlist32.list; 61602f5224aeSachartre } else { 61612f5224aeSachartre inresv.generation = scsi_resv->generation; 61622f5224aeSachartre rv = ddi_copyout(&inresv, arg, sizeof (inresv), mode); 61632f5224aeSachartre if (rv != 0) { 61642f5224aeSachartre rv = EFAULT; 61652f5224aeSachartre goto done; 61662f5224aeSachartre } 61672f5224aeSachartre 61682f5224aeSachartre rlist.listlen = listlen; 61692f5224aeSachartre rv = ddi_copyout(&rlist, inresv.li, sizeof (rlist), mode); 61702f5224aeSachartre if (rv != 0) { 61712f5224aeSachartre rv = EFAULT; 61722f5224aeSachartre goto done; 61732f5224aeSachartre } 61742f5224aeSachartre 61752f5224aeSachartre user_resv = rlist.list; 61762f5224aeSachartre } 61772f5224aeSachartre 61782f5224aeSachartre /* copy out reservations */ 61792f5224aeSachartre if (listsize > 0 && listlen > 0) { 61802f5224aeSachartre if (listsize < listlen) 61812f5224aeSachartre listlen = listsize; 61822f5224aeSachartre resv = (sd_readresv_desc_t *)&scsi_resv->readresv_desc; 61832f5224aeSachartre 61842f5224aeSachartre for (i = 0; i < listlen; i++) { 61852f5224aeSachartre mhd_resv.type = resv->type; 61862f5224aeSachartre mhd_resv.scope = resv->scope; 61872f5224aeSachartre mhd_resv.scope_specific_addr = 61882f5224aeSachartre BE_32(resv->scope_specific_addr); 61892f5224aeSachartre bcopy(&resv->resvkey, &mhd_resv.key, 61902f5224aeSachartre MHIOC_RESV_KEY_SIZE); 61912f5224aeSachartre 61922f5224aeSachartre rv = ddi_copyout(&mhd_resv, user_resv, 61932f5224aeSachartre sizeof (mhd_resv), mode); 61942f5224aeSachartre if (rv != 0) { 61952f5224aeSachartre rv = EFAULT; 61962f5224aeSachartre goto done; 61972f5224aeSachartre } 61982f5224aeSachartre resv++; 61992f5224aeSachartre user_resv++; 62002f5224aeSachartre } 62012f5224aeSachartre } 62022f5224aeSachartre 62032f5224aeSachartre if (rv == 0) 62042f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 62052f5224aeSachartre 62062f5224aeSachartre done: 62072f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 62082f5224aeSachartre return (rv); 62092f5224aeSachartre } 62102f5224aeSachartre 62112f5224aeSachartre /* 62122f5224aeSachartre * Implement the MHIOCGRP_REGISTER mhd(7i) ioctl. The ioctl is converted 62132f5224aeSachartre * to a SCSI PERSISTENT OUT REGISTER command which is sent to the vdisk 62142f5224aeSachartre * server with a VD_OP_SCSICMD operation. 62152f5224aeSachartre */ 62162f5224aeSachartre static int 62172f5224aeSachartre vdc_mhd_register(vdc_t *vdc, caddr_t arg, int mode) 62182f5224aeSachartre { 62192f5224aeSachartre vd_scsi_t *vd_scsi; 62202f5224aeSachartre sd_prout_t *scsi_prout; 62212f5224aeSachartre mhioc_register_t mhd_reg; 62222f5224aeSachartre int vd_scsi_len, rv; 62232f5224aeSachartre 62242f5224aeSachartre /* copyin arguments */ 62252f5224aeSachartre rv = ddi_copyin(arg, &mhd_reg, sizeof (mhd_reg), mode); 62262f5224aeSachartre if (rv != 0) 62272f5224aeSachartre return (EFAULT); 62282f5224aeSachartre 62292f5224aeSachartre /* build SCSI VD_OP request */ 62302f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTER, 62312f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 62322f5224aeSachartre 62332f5224aeSachartre /* set parameters */ 62342f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 62352f5224aeSachartre bcopy(mhd_reg.oldkey.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 62362f5224aeSachartre bcopy(mhd_reg.newkey.key, scsi_prout->service_key, MHIOC_RESV_KEY_SIZE); 62372f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_reg.aptpl; 62382f5224aeSachartre 62392f5224aeSachartre /* submit the request */ 62402f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 6241*00e3a3e9SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 62422f5224aeSachartre 62432f5224aeSachartre if (rv == 0) 62442f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 62452f5224aeSachartre 62462f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 62472f5224aeSachartre return (rv); 62482f5224aeSachartre } 62492f5224aeSachartre 62502f5224aeSachartre /* 62512f5224aeSachartre * Implement the MHIOCGRP_RESERVE mhd(7i) ioctl. The ioctl is converted 62522f5224aeSachartre * to a SCSI PERSISTENT OUT RESERVE command which is sent to the vdisk 62532f5224aeSachartre * server with a VD_OP_SCSICMD operation. 62542f5224aeSachartre */ 62552f5224aeSachartre static int 62562f5224aeSachartre vdc_mhd_reserve(vdc_t *vdc, caddr_t arg, int mode) 62572f5224aeSachartre { 62582f5224aeSachartre union scsi_cdb *cdb; 62592f5224aeSachartre vd_scsi_t *vd_scsi; 62602f5224aeSachartre sd_prout_t *scsi_prout; 62612f5224aeSachartre mhioc_resv_desc_t mhd_resv; 62622f5224aeSachartre int vd_scsi_len, rv; 62632f5224aeSachartre 62642f5224aeSachartre /* copyin arguments */ 62652f5224aeSachartre rv = ddi_copyin(arg, &mhd_resv, sizeof (mhd_resv), mode); 62662f5224aeSachartre if (rv != 0) 62672f5224aeSachartre return (EFAULT); 62682f5224aeSachartre 62692f5224aeSachartre /* build SCSI VD_OP request */ 62702f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_RESERVE, 62712f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 62722f5224aeSachartre 62732f5224aeSachartre /* set parameters */ 62742f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 62752f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 62762f5224aeSachartre bcopy(mhd_resv.key.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 62772f5224aeSachartre scsi_prout->scope_address = mhd_resv.scope_specific_addr; 62782f5224aeSachartre cdb->cdb_opaque[2] = mhd_resv.type; 62792f5224aeSachartre 62802f5224aeSachartre /* submit the request */ 62812f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 6282*00e3a3e9SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 62832f5224aeSachartre 62842f5224aeSachartre if (rv == 0) 62852f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 62862f5224aeSachartre 62872f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 62882f5224aeSachartre return (rv); 62892f5224aeSachartre } 62902f5224aeSachartre 62912f5224aeSachartre /* 62922f5224aeSachartre * Implement the MHIOCGRP_PREEMPTANDABORT mhd(7i) ioctl. The ioctl is 62932f5224aeSachartre * converted to a SCSI PERSISTENT OUT PREEMPT AND ABORT command which 62942f5224aeSachartre * is sent to the vdisk server with a VD_OP_SCSICMD operation. 62952f5224aeSachartre */ 62962f5224aeSachartre static int 62972f5224aeSachartre vdc_mhd_preemptabort(vdc_t *vdc, caddr_t arg, int mode) 62982f5224aeSachartre { 62992f5224aeSachartre union scsi_cdb *cdb; 63002f5224aeSachartre vd_scsi_t *vd_scsi; 63012f5224aeSachartre sd_prout_t *scsi_prout; 63022f5224aeSachartre mhioc_preemptandabort_t mhd_preempt; 63032f5224aeSachartre int vd_scsi_len, rv; 63042f5224aeSachartre 63052f5224aeSachartre /* copyin arguments */ 63062f5224aeSachartre rv = ddi_copyin(arg, &mhd_preempt, sizeof (mhd_preempt), mode); 63072f5224aeSachartre if (rv != 0) 63082f5224aeSachartre return (EFAULT); 63092f5224aeSachartre 63102f5224aeSachartre /* build SCSI VD_OP request */ 63112f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_PREEMPTANDABORT, 63122f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 63132f5224aeSachartre 63142f5224aeSachartre /* set parameters */ 63152f5224aeSachartre vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 63162f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 63172f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 63182f5224aeSachartre bcopy(mhd_preempt.resvdesc.key.key, scsi_prout->res_key, 63192f5224aeSachartre MHIOC_RESV_KEY_SIZE); 63202f5224aeSachartre bcopy(mhd_preempt.victim_key.key, scsi_prout->service_key, 63212f5224aeSachartre MHIOC_RESV_KEY_SIZE); 63222f5224aeSachartre scsi_prout->scope_address = mhd_preempt.resvdesc.scope_specific_addr; 63232f5224aeSachartre cdb->cdb_opaque[2] = mhd_preempt.resvdesc.type; 63242f5224aeSachartre 63252f5224aeSachartre /* submit the request */ 63262f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 6327*00e3a3e9SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 63282f5224aeSachartre 63292f5224aeSachartre if (rv == 0) 63302f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 63312f5224aeSachartre 63322f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 63332f5224aeSachartre return (rv); 63342f5224aeSachartre } 63352f5224aeSachartre 63362f5224aeSachartre /* 63372f5224aeSachartre * Implement the MHIOCGRP_REGISTERANDIGNOREKEY mhd(7i) ioctl. The ioctl 63382f5224aeSachartre * is converted to a SCSI PERSISTENT OUT REGISTER AND IGNORE EXISTING KEY 63392f5224aeSachartre * command which is sent to the vdisk server with a VD_OP_SCSICMD operation. 63402f5224aeSachartre */ 63412f5224aeSachartre static int 63422f5224aeSachartre vdc_mhd_registerignore(vdc_t *vdc, caddr_t arg, int mode) 63432f5224aeSachartre { 63442f5224aeSachartre vd_scsi_t *vd_scsi; 63452f5224aeSachartre sd_prout_t *scsi_prout; 63462f5224aeSachartre mhioc_registerandignorekey_t mhd_regi; 63472f5224aeSachartre int vd_scsi_len, rv; 63482f5224aeSachartre 63492f5224aeSachartre /* copyin arguments */ 63502f5224aeSachartre rv = ddi_copyin(arg, &mhd_regi, sizeof (mhd_regi), mode); 63512f5224aeSachartre if (rv != 0) 63522f5224aeSachartre return (EFAULT); 63532f5224aeSachartre 63542f5224aeSachartre /* build SCSI VD_OP request */ 63552f5224aeSachartre vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTERANDIGNOREKEY, 63562f5224aeSachartre sizeof (sd_prout_t), &vd_scsi_len); 63572f5224aeSachartre 63582f5224aeSachartre /* set parameters */ 63592f5224aeSachartre scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 63602f5224aeSachartre bcopy(mhd_regi.newkey.key, scsi_prout->service_key, 63612f5224aeSachartre MHIOC_RESV_KEY_SIZE); 63622f5224aeSachartre scsi_prout->aptpl = (uchar_t)mhd_regi.aptpl; 63632f5224aeSachartre 63642f5224aeSachartre /* submit the request */ 63652f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 6366*00e3a3e9SAlexandre Chartre 0, 0, VIO_both_dir, B_FALSE); 63672f5224aeSachartre 63682f5224aeSachartre if (rv == 0) 63692f5224aeSachartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 63702f5224aeSachartre 63712f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 63722f5224aeSachartre return (rv); 63732f5224aeSachartre } 63742f5224aeSachartre 63752f5224aeSachartre /* 6376*00e3a3e9SAlexandre Chartre * This function is used to send a (simple) SCSI command and check errors. 63772f5224aeSachartre */ 63782f5224aeSachartre static int 6379*00e3a3e9SAlexandre Chartre vdc_eio_scsi_cmd(vdc_t *vdc, uchar_t scmd, int flags) 63802f5224aeSachartre { 63812f5224aeSachartre int cdb_len, sense_len, vd_scsi_len; 63822f5224aeSachartre vd_scsi_t *vd_scsi; 63832f5224aeSachartre union scsi_cdb *cdb; 63842f5224aeSachartre int rv; 63852f5224aeSachartre 63862f5224aeSachartre ASSERT(scmd == SCMD_TEST_UNIT_READY || scmd == SCMD_WRITE_G1); 63872f5224aeSachartre 63882f5224aeSachartre if (scmd == SCMD_WRITE_G1) 63892f5224aeSachartre cdb_len = CDB_GROUP1; 63902f5224aeSachartre else 63912f5224aeSachartre cdb_len = CDB_GROUP0; 63922f5224aeSachartre 63932f5224aeSachartre sense_len = sizeof (struct scsi_extended_sense); 63942f5224aeSachartre 63952f5224aeSachartre vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, 0, 0, &vd_scsi_len); 63962f5224aeSachartre 63972f5224aeSachartre /* set cdb */ 63982f5224aeSachartre cdb = VD_SCSI_DATA_CDB(vd_scsi); 63992f5224aeSachartre cdb->scc_cmd = scmd; 64002f5224aeSachartre 64012f5224aeSachartre vd_scsi->timeout = vdc_scsi_timeout; 64022f5224aeSachartre 64032f5224aeSachartre /* 6404*00e3a3e9SAlexandre Chartre * Submit the request. Note the operation should not request that any 6405*00e3a3e9SAlexandre Chartre * error is checked because this function is precisely called when 6406*00e3a3e9SAlexandre Chartre * checking errors. 64072f5224aeSachartre */ 6408*00e3a3e9SAlexandre Chartre ASSERT((flags & VDC_OP_ERRCHK) == 0); 6409*00e3a3e9SAlexandre Chartre 6410*00e3a3e9SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 6411*00e3a3e9SAlexandre Chartre 0, 0, NULL, VIO_both_dir, flags); 64122f5224aeSachartre 64132f5224aeSachartre if (rv == 0) 6414*00e3a3e9SAlexandre Chartre rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 64152f5224aeSachartre 64162f5224aeSachartre kmem_free(vd_scsi, vd_scsi_len); 64172f5224aeSachartre return (rv); 64182f5224aeSachartre } 64192f5224aeSachartre 64202f5224aeSachartre /* 6421*00e3a3e9SAlexandre Chartre * This function is used to check if a SCSI backend is accessible. It will 6422*00e3a3e9SAlexandre Chartre * also detect reservation conflict if failfast is enabled, and panic the 6423*00e3a3e9SAlexandre Chartre * system in that case. 64242f5224aeSachartre * 64252f5224aeSachartre * Returned Code: 6426*00e3a3e9SAlexandre Chartre * 0 - disk is accessible 6427*00e3a3e9SAlexandre Chartre * != 0 - disk is inaccessible or unable to check if disk is accessible 64282f5224aeSachartre */ 6429*00e3a3e9SAlexandre Chartre static int 6430*00e3a3e9SAlexandre Chartre vdc_eio_scsi_check(vdc_t *vdc, int flags) 64312f5224aeSachartre { 64322f5224aeSachartre int failure = 0; 6433*00e3a3e9SAlexandre Chartre int rv; 64342f5224aeSachartre 64352f5224aeSachartre /* 64362f5224aeSachartre * Send a TEST UNIT READY command. The command will panic 6437*00e3a3e9SAlexandre Chartre * the system if it fails with a reservation conflict and 6438*00e3a3e9SAlexandre Chartre * failfast is enabled. If there is a reservation conflict 6439*00e3a3e9SAlexandre Chartre * and failfast is not enabled then the function will return 6440*00e3a3e9SAlexandre Chartre * EACCES. In that case, there's no problem with accessing 6441*00e3a3e9SAlexandre Chartre * the backend, it is just reserved. 64422f5224aeSachartre */ 6443*00e3a3e9SAlexandre Chartre rv = vdc_eio_scsi_cmd(vdc, SCMD_TEST_UNIT_READY, flags); 6444*00e3a3e9SAlexandre Chartre if (rv != 0 && rv != EACCES) 64452f5224aeSachartre failure++; 64462f5224aeSachartre 6447*00e3a3e9SAlexandre Chartre /* we don't need to do more checking if failfast is not enabled */ 6448*00e3a3e9SAlexandre Chartre if (vdc->failfast_interval == 0) 6449*00e3a3e9SAlexandre Chartre return (failure); 6450*00e3a3e9SAlexandre Chartre 64512f5224aeSachartre /* 64522f5224aeSachartre * With SPC-3 compliant devices TEST UNIT READY will succeed on 64532f5224aeSachartre * a reserved device, so we also do a WRITE(10) of zero byte in 64542f5224aeSachartre * order to provoke a Reservation Conflict status on those newer 64552f5224aeSachartre * devices. 64562f5224aeSachartre */ 6457*00e3a3e9SAlexandre Chartre if (vdc_eio_scsi_cmd(vdc, SCMD_WRITE_G1, flags) != 0) 64582f5224aeSachartre failure++; 64592f5224aeSachartre 64602f5224aeSachartre return (failure); 64612f5224aeSachartre } 64622f5224aeSachartre 64632f5224aeSachartre /* 6464*00e3a3e9SAlexandre Chartre * This function is used to check if a backend is effectively accessible. 64652f5224aeSachartre * 6466*00e3a3e9SAlexandre Chartre * Returned Code: 6467*00e3a3e9SAlexandre Chartre * 0 - disk is accessible 6468*00e3a3e9SAlexandre Chartre * != 0 - disk is inaccessible or unable to check if disk is accessible 6469*00e3a3e9SAlexandre Chartre */ 6470*00e3a3e9SAlexandre Chartre static int 6471*00e3a3e9SAlexandre Chartre vdc_eio_check(vdc_t *vdc, int flags) 6472*00e3a3e9SAlexandre Chartre { 6473*00e3a3e9SAlexandre Chartre char *buffer; 6474*00e3a3e9SAlexandre Chartre diskaddr_t blkno; 6475*00e3a3e9SAlexandre Chartre int rv; 6476*00e3a3e9SAlexandre Chartre 6477*00e3a3e9SAlexandre Chartre ASSERT((flags & VDC_OP_ERRCHK) == 0); 6478*00e3a3e9SAlexandre Chartre 6479*00e3a3e9SAlexandre Chartre if (VD_OP_SUPPORTED(vdc->operations, VD_OP_SCSICMD)) 6480*00e3a3e9SAlexandre Chartre return (vdc_eio_scsi_check(vdc, flags)); 6481*00e3a3e9SAlexandre Chartre 6482*00e3a3e9SAlexandre Chartre ASSERT(vdc->failfast_interval == 0); 6483*00e3a3e9SAlexandre Chartre 6484*00e3a3e9SAlexandre Chartre /* 6485*00e3a3e9SAlexandre Chartre * If the backend does not support SCSI operations then we simply 6486*00e3a3e9SAlexandre Chartre * check if the backend is accessible by reading some data blocks. 6487*00e3a3e9SAlexandre Chartre * We first try to read a random block, to try to avoid getting 6488*00e3a3e9SAlexandre Chartre * a block that might have been cached on the service domain. Then 6489*00e3a3e9SAlexandre Chartre * we try the last block, and finally the first block. 6490*00e3a3e9SAlexandre Chartre * 6491*00e3a3e9SAlexandre Chartre * We return success as soon as we are able to read any block. 6492*00e3a3e9SAlexandre Chartre */ 6493*00e3a3e9SAlexandre Chartre buffer = kmem_alloc(vdc->vdisk_bsize, KM_SLEEP); 6494*00e3a3e9SAlexandre Chartre 6495*00e3a3e9SAlexandre Chartre if (vdc->vdisk_size > 0) { 6496*00e3a3e9SAlexandre Chartre 6497*00e3a3e9SAlexandre Chartre /* try a random block */ 6498*00e3a3e9SAlexandre Chartre (void) random_get_pseudo_bytes((uint8_t *)&blkno, 6499*00e3a3e9SAlexandre Chartre sizeof (diskaddr_t)); 6500*00e3a3e9SAlexandre Chartre blkno = blkno % vdc->vdisk_size; 6501*00e3a3e9SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BREAD, (caddr_t)buffer, 6502*00e3a3e9SAlexandre Chartre vdc->vdisk_bsize, VD_SLICE_NONE, blkno, NULL, 6503*00e3a3e9SAlexandre Chartre VIO_read_dir, flags); 6504*00e3a3e9SAlexandre Chartre 6505*00e3a3e9SAlexandre Chartre if (rv == 0) 6506*00e3a3e9SAlexandre Chartre goto done; 6507*00e3a3e9SAlexandre Chartre 6508*00e3a3e9SAlexandre Chartre /* try the last block */ 6509*00e3a3e9SAlexandre Chartre blkno = vdc->vdisk_size - 1; 6510*00e3a3e9SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BREAD, (caddr_t)buffer, 6511*00e3a3e9SAlexandre Chartre vdc->vdisk_bsize, VD_SLICE_NONE, blkno, NULL, 6512*00e3a3e9SAlexandre Chartre VIO_read_dir, flags); 6513*00e3a3e9SAlexandre Chartre 6514*00e3a3e9SAlexandre Chartre if (rv == 0) 6515*00e3a3e9SAlexandre Chartre goto done; 6516*00e3a3e9SAlexandre Chartre } 6517*00e3a3e9SAlexandre Chartre 6518*00e3a3e9SAlexandre Chartre /* try block 0 */ 6519*00e3a3e9SAlexandre Chartre blkno = 0; 6520*00e3a3e9SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BREAD, (caddr_t)buffer, vdc->vdisk_bsize, 6521*00e3a3e9SAlexandre Chartre VD_SLICE_NONE, blkno, NULL, VIO_read_dir, flags); 6522*00e3a3e9SAlexandre Chartre 6523*00e3a3e9SAlexandre Chartre done: 6524*00e3a3e9SAlexandre Chartre kmem_free(buffer, vdc->vdisk_bsize); 6525*00e3a3e9SAlexandre Chartre return (rv); 6526*00e3a3e9SAlexandre Chartre } 6527*00e3a3e9SAlexandre Chartre 6528*00e3a3e9SAlexandre Chartre /* 6529*00e3a3e9SAlexandre Chartre * Add a pending I/O to the eio queue. An I/O is added to this queue 6530*00e3a3e9SAlexandre Chartre * when it has failed and failfast is enabled or the vdisk has multiple 6531*00e3a3e9SAlexandre Chartre * servers. It will then be handled by the eio thread (vdc_eio_thread). 6532*00e3a3e9SAlexandre Chartre * The eio queue is ordered starting with the most recent I/O added. 65332f5224aeSachartre */ 65342f5224aeSachartre static vdc_io_t * 6535*00e3a3e9SAlexandre Chartre vdc_eio_queue(vdc_t *vdc, int index) 65362f5224aeSachartre { 65372f5224aeSachartre vdc_io_t *vio; 65382f5224aeSachartre 65392f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 65402f5224aeSachartre 65412f5224aeSachartre vio = kmem_alloc(sizeof (vdc_io_t), KM_SLEEP); 6542*00e3a3e9SAlexandre Chartre vio->vio_next = vdc->eio_queue; 6543*00e3a3e9SAlexandre Chartre vio->vio_index = index; 65442f5224aeSachartre vio->vio_qtime = ddi_get_lbolt(); 65452f5224aeSachartre 6546*00e3a3e9SAlexandre Chartre vdc->eio_queue = vio; 65472f5224aeSachartre 6548*00e3a3e9SAlexandre Chartre /* notify the eio thread that a new I/O is queued */ 6549*00e3a3e9SAlexandre Chartre cv_signal(&vdc->eio_cv); 65502f5224aeSachartre 65512f5224aeSachartre return (vio); 65522f5224aeSachartre } 65532f5224aeSachartre 65542f5224aeSachartre /* 6555*00e3a3e9SAlexandre Chartre * Remove I/Os added before the indicated deadline from the eio queue. A 6556*00e3a3e9SAlexandre Chartre * deadline of 0 means that all I/Os have to be unqueued. The complete_io 6557*00e3a3e9SAlexandre Chartre * boolean specifies if unqueued I/Os should be marked as completed or not. 65582f5224aeSachartre */ 65592f5224aeSachartre static void 6560*00e3a3e9SAlexandre Chartre vdc_eio_unqueue(vdc_t *vdc, clock_t deadline, boolean_t complete_io) 65612f5224aeSachartre { 6562*00e3a3e9SAlexandre Chartre struct buf *buf; 65632f5224aeSachartre vdc_io_t *vio, *vio_tmp; 6564*00e3a3e9SAlexandre Chartre int index, op; 65652f5224aeSachartre 65662f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->lock)); 65672f5224aeSachartre 65682f5224aeSachartre vio_tmp = NULL; 6569*00e3a3e9SAlexandre Chartre vio = vdc->eio_queue; 65702f5224aeSachartre 65712f5224aeSachartre if (deadline != 0) { 65722f5224aeSachartre /* 6573*00e3a3e9SAlexandre Chartre * Skip any io queued after the deadline. The eio queue is 6574*00e3a3e9SAlexandre Chartre * ordered starting with the last I/O added to the queue. 65752f5224aeSachartre */ 65762f5224aeSachartre while (vio != NULL && vio->vio_qtime > deadline) { 65772f5224aeSachartre vio_tmp = vio; 65782f5224aeSachartre vio = vio->vio_next; 65792f5224aeSachartre } 65802f5224aeSachartre } 65812f5224aeSachartre 65822f5224aeSachartre if (vio == NULL) 65832f5224aeSachartre /* nothing to unqueue */ 65842f5224aeSachartre return; 65852f5224aeSachartre 65862f5224aeSachartre /* update the queue */ 65872f5224aeSachartre if (vio_tmp == NULL) 6588*00e3a3e9SAlexandre Chartre vdc->eio_queue = NULL; 65892f5224aeSachartre else 65902f5224aeSachartre vio_tmp->vio_next = NULL; 65912f5224aeSachartre 65922f5224aeSachartre /* 6593*00e3a3e9SAlexandre Chartre * Free and complete unqueued I/Os if this was requested. All I/Os 6594*00e3a3e9SAlexandre Chartre * have a block I/O data transfer structure (buf) and they are 6595*00e3a3e9SAlexandre Chartre * completed by calling biodone(). 65962f5224aeSachartre */ 65972f5224aeSachartre while (vio != NULL) { 65982f5224aeSachartre vio_tmp = vio->vio_next; 6599*00e3a3e9SAlexandre Chartre 6600*00e3a3e9SAlexandre Chartre if (complete_io) { 6601*00e3a3e9SAlexandre Chartre index = vio->vio_index; 6602*00e3a3e9SAlexandre Chartre op = vdc->local_dring[index].operation; 6603*00e3a3e9SAlexandre Chartre buf = vdc->local_dring[index].buf; 6604*00e3a3e9SAlexandre Chartre (void) vdc_depopulate_descriptor(vdc, index); 6605*00e3a3e9SAlexandre Chartre ASSERT(buf->b_flags & B_ERROR); 6606*00e3a3e9SAlexandre Chartre if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 6607*00e3a3e9SAlexandre Chartre VD_UPDATE_ERR_STATS(vdc, vd_softerrs); 660890e2f9dcSlm66018 VD_KSTAT_RUNQ_EXIT(vdc); 6609*00e3a3e9SAlexandre Chartre DTRACE_IO1(done, buf_t *, buf); 66102f5224aeSachartre } 6611*00e3a3e9SAlexandre Chartre biodone(buf); 66122f5224aeSachartre } 66132f5224aeSachartre 6614*00e3a3e9SAlexandre Chartre kmem_free(vio, sizeof (vdc_io_t)); 6615*00e3a3e9SAlexandre Chartre vio = vio_tmp; 6616*00e3a3e9SAlexandre Chartre } 66172f5224aeSachartre } 66182f5224aeSachartre 66192f5224aeSachartre /* 6620*00e3a3e9SAlexandre Chartre * Error I/O Thread. There is one eio thread for each virtual disk that 6621*00e3a3e9SAlexandre Chartre * has multiple servers or for which failfast is enabled. Failfast can only 6622*00e3a3e9SAlexandre Chartre * be enabled for vdisk supporting SCSI commands. 66232f5224aeSachartre * 6624*00e3a3e9SAlexandre Chartre * While failfast is enabled, the eio thread sends a TEST UNIT READY 66252f5224aeSachartre * and a zero size WRITE(10) SCSI commands on a regular basis to check that 66262f5224aeSachartre * we still have access to the disk. If a command fails with a RESERVATION 66272f5224aeSachartre * CONFLICT error then the system will immediatly panic. 66282f5224aeSachartre * 6629*00e3a3e9SAlexandre Chartre * The eio thread is also woken up when an I/O has failed. It then checks 66302f5224aeSachartre * the access to the disk to ensure that the I/O failure was not due to a 6631*00e3a3e9SAlexandre Chartre * reservation conflict or to the backend been inaccessible. 66322f5224aeSachartre * 66332f5224aeSachartre */ 66342f5224aeSachartre static void 6635*00e3a3e9SAlexandre Chartre vdc_eio_thread(void *arg) 66362f5224aeSachartre { 66372f5224aeSachartre int status; 66382f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 66392f5224aeSachartre clock_t timeout, starttime; 66402f5224aeSachartre 66412f5224aeSachartre mutex_enter(&vdc->lock); 66422f5224aeSachartre 6643*00e3a3e9SAlexandre Chartre while (vdc->failfast_interval != 0 || vdc->num_servers > 1) { 6644*00e3a3e9SAlexandre Chartre /* 6645*00e3a3e9SAlexandre Chartre * Wait if there is nothing in the eio queue or if the state 6646*00e3a3e9SAlexandre Chartre * is not VDC_STATE_RUNNING. 6647*00e3a3e9SAlexandre Chartre */ 6648*00e3a3e9SAlexandre Chartre if (vdc->eio_queue == NULL || vdc->state != VDC_STATE_RUNNING) { 6649*00e3a3e9SAlexandre Chartre if (vdc->failfast_interval != 0) { 6650*00e3a3e9SAlexandre Chartre timeout = ddi_get_lbolt() + 6651*00e3a3e9SAlexandre Chartre drv_usectohz(vdc->failfast_interval); 6652*00e3a3e9SAlexandre Chartre (void) cv_timedwait(&vdc->eio_cv, &vdc->lock, 6653*00e3a3e9SAlexandre Chartre timeout); 6654*00e3a3e9SAlexandre Chartre } else { 6655*00e3a3e9SAlexandre Chartre ASSERT(vdc->num_servers > 1); 6656*00e3a3e9SAlexandre Chartre (void) cv_wait(&vdc->eio_cv, &vdc->lock); 6657*00e3a3e9SAlexandre Chartre } 66582f5224aeSachartre 6659*00e3a3e9SAlexandre Chartre if (vdc->state != VDC_STATE_RUNNING) 6660*00e3a3e9SAlexandre Chartre continue; 6661*00e3a3e9SAlexandre Chartre } 66622f5224aeSachartre 66632f5224aeSachartre mutex_exit(&vdc->lock); 66642f5224aeSachartre 6665*00e3a3e9SAlexandre Chartre starttime = ddi_get_lbolt(); 6666*00e3a3e9SAlexandre Chartre 6667*00e3a3e9SAlexandre Chartre /* check error */ 6668*00e3a3e9SAlexandre Chartre status = vdc_eio_check(vdc, VDC_OP_STATE_RUNNING); 66692f5224aeSachartre 66702f5224aeSachartre mutex_enter(&vdc->lock); 66712f5224aeSachartre /* 6672*00e3a3e9SAlexandre Chartre * We have dropped the lock to check the backend so we have 6673*00e3a3e9SAlexandre Chartre * to check that the eio thread is still enabled. 66742f5224aeSachartre */ 6675*00e3a3e9SAlexandre Chartre if (vdc->failfast_interval == 0 && vdc->num_servers <= 1) 66762f5224aeSachartre break; 66772f5224aeSachartre 66782f5224aeSachartre /* 6679*00e3a3e9SAlexandre Chartre * If the eio queue is empty or we are not in running state 6680*00e3a3e9SAlexandre Chartre * anymore then there is nothing to do. 66812f5224aeSachartre */ 6682*00e3a3e9SAlexandre Chartre if (vdc->state != VDC_STATE_RUNNING || vdc->eio_queue == NULL) 66832f5224aeSachartre continue; 66842f5224aeSachartre 6685*00e3a3e9SAlexandre Chartre if (status == 0) { 6686*00e3a3e9SAlexandre Chartre /* 6687*00e3a3e9SAlexandre Chartre * The backend access has been successfully checked, 6688*00e3a3e9SAlexandre Chartre * we can complete any I/O queued before the last check. 6689*00e3a3e9SAlexandre Chartre */ 6690*00e3a3e9SAlexandre Chartre vdc_eio_unqueue(vdc, starttime, B_TRUE); 6691*00e3a3e9SAlexandre Chartre 6692*00e3a3e9SAlexandre Chartre } else if (vdc->num_servers > 1) { 6693*00e3a3e9SAlexandre Chartre /* 6694*00e3a3e9SAlexandre Chartre * The backend is inaccessible for a disk with multiple 6695*00e3a3e9SAlexandre Chartre * servers. So we force a reset to switch to another 6696*00e3a3e9SAlexandre Chartre * server. The reset will also clear the eio queue and 6697*00e3a3e9SAlexandre Chartre * resubmit all pending I/Os. 6698*00e3a3e9SAlexandre Chartre */ 6699*00e3a3e9SAlexandre Chartre mutex_enter(&vdc->read_lock); 6700*00e3a3e9SAlexandre Chartre vdc->read_state = VDC_READ_RESET; 6701*00e3a3e9SAlexandre Chartre cv_signal(&vdc->read_cv); 6702*00e3a3e9SAlexandre Chartre mutex_exit(&vdc->read_lock); 6703*00e3a3e9SAlexandre Chartre } 67042f5224aeSachartre } 67052f5224aeSachartre 67062f5224aeSachartre /* 6707*00e3a3e9SAlexandre Chartre * The thread is being stopped so we can complete any queued I/O. 67082f5224aeSachartre */ 6709*00e3a3e9SAlexandre Chartre vdc_eio_unqueue(vdc, 0, B_TRUE); 6710*00e3a3e9SAlexandre Chartre vdc->eio_thread = NULL; 67112f5224aeSachartre mutex_exit(&vdc->lock); 67122f5224aeSachartre thread_exit(); 67132f5224aeSachartre } 67142f5224aeSachartre 67152f5224aeSachartre /* 67162f5224aeSachartre * Implement the MHIOCENFAILFAST mhd(7i) ioctl. 67172f5224aeSachartre */ 67182f5224aeSachartre static int 67192f5224aeSachartre vdc_failfast(vdc_t *vdc, caddr_t arg, int mode) 67202f5224aeSachartre { 67212f5224aeSachartre unsigned int mh_time; 67222f5224aeSachartre 67232f5224aeSachartre if (ddi_copyin((void *)arg, &mh_time, sizeof (int), mode)) 67242f5224aeSachartre return (EFAULT); 67252f5224aeSachartre 67262f5224aeSachartre mutex_enter(&vdc->lock); 6727*00e3a3e9SAlexandre Chartre if (mh_time != 0 && vdc->eio_thread == NULL) { 6728*00e3a3e9SAlexandre Chartre vdc->eio_thread = thread_create(NULL, 0, 6729*00e3a3e9SAlexandre Chartre vdc_eio_thread, vdc, 0, &p0, TS_RUN, 67302f5224aeSachartre v.v_maxsyspri - 2); 67312f5224aeSachartre } 67322f5224aeSachartre 6733*00e3a3e9SAlexandre Chartre vdc->failfast_interval = ((long)mh_time) * MILLISEC; 6734*00e3a3e9SAlexandre Chartre cv_signal(&vdc->eio_cv); 67352f5224aeSachartre mutex_exit(&vdc->lock); 67362f5224aeSachartre 67372f5224aeSachartre return (0); 67382f5224aeSachartre } 67392f5224aeSachartre 67402f5224aeSachartre /* 67412f5224aeSachartre * Implement the MHIOCTKOWN and MHIOCRELEASE mhd(7i) ioctls. These ioctls are 67422f5224aeSachartre * converted to VD_OP_SET_ACCESS operations. 67432f5224aeSachartre */ 67442f5224aeSachartre static int 6745*00e3a3e9SAlexandre Chartre vdc_access_set(vdc_t *vdc, uint64_t flags) 67462f5224aeSachartre { 67472f5224aeSachartre int rv; 67482f5224aeSachartre 67492f5224aeSachartre /* submit owership command request */ 67502f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_SET_ACCESS, (caddr_t)&flags, 6751*00e3a3e9SAlexandre Chartre sizeof (uint64_t), 0, 0, VIO_both_dir, B_TRUE); 67522f5224aeSachartre 67532f5224aeSachartre return (rv); 67542f5224aeSachartre } 67552f5224aeSachartre 67562f5224aeSachartre /* 67572f5224aeSachartre * Implement the MHIOCSTATUS mhd(7i) ioctl. This ioctl is converted to a 67582f5224aeSachartre * VD_OP_GET_ACCESS operation. 67592f5224aeSachartre */ 67602f5224aeSachartre static int 6761*00e3a3e9SAlexandre Chartre vdc_access_get(vdc_t *vdc, uint64_t *status) 67622f5224aeSachartre { 67632f5224aeSachartre int rv; 67642f5224aeSachartre 67652f5224aeSachartre /* submit owership command request */ 67662f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_ACCESS, (caddr_t)status, 6767*00e3a3e9SAlexandre Chartre sizeof (uint64_t), 0, 0, VIO_both_dir, B_TRUE); 67682f5224aeSachartre 67692f5224aeSachartre return (rv); 67702f5224aeSachartre } 67712f5224aeSachartre 67722f5224aeSachartre /* 67732f5224aeSachartre * Disk Ownership Thread. 67742f5224aeSachartre * 67752f5224aeSachartre * When we have taken the ownership of a disk, this thread waits to be 67762f5224aeSachartre * notified when the LDC channel is reset so that it can recover the 67772f5224aeSachartre * ownership. 67782f5224aeSachartre * 67792f5224aeSachartre * Note that the thread handling the LDC reset (vdc_process_msg_thread()) 67802f5224aeSachartre * can not be used to do the ownership recovery because it has to be 67812f5224aeSachartre * running to handle the reply message to the ownership operation. 67822f5224aeSachartre */ 67832f5224aeSachartre static void 67842f5224aeSachartre vdc_ownership_thread(void *arg) 67852f5224aeSachartre { 67862f5224aeSachartre vdc_t *vdc = (vdc_t *)arg; 67872f5224aeSachartre clock_t timeout; 67882f5224aeSachartre uint64_t status; 67892f5224aeSachartre 67902f5224aeSachartre mutex_enter(&vdc->ownership_lock); 67912f5224aeSachartre mutex_enter(&vdc->lock); 67922f5224aeSachartre 67932f5224aeSachartre while (vdc->ownership & VDC_OWNERSHIP_WANTED) { 67942f5224aeSachartre 67952f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_RESET) || 67962f5224aeSachartre !(vdc->ownership & VDC_OWNERSHIP_GRANTED)) { 67972f5224aeSachartre /* 67982f5224aeSachartre * There was a reset so the ownership has been lost, 67992f5224aeSachartre * try to recover. We do this without using the preempt 68002f5224aeSachartre * option so that we don't steal the ownership from 68012f5224aeSachartre * someone who has preempted us. 68022f5224aeSachartre */ 68032f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership lost, recovering", 68042f5224aeSachartre vdc->instance); 68052f5224aeSachartre 68062f5224aeSachartre vdc->ownership &= ~(VDC_OWNERSHIP_RESET | 68072f5224aeSachartre VDC_OWNERSHIP_GRANTED); 68082f5224aeSachartre 68092f5224aeSachartre mutex_exit(&vdc->lock); 68102f5224aeSachartre 68112f5224aeSachartre status = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 6812*00e3a3e9SAlexandre Chartre VD_ACCESS_SET_PRESERVE); 68132f5224aeSachartre 68142f5224aeSachartre mutex_enter(&vdc->lock); 68152f5224aeSachartre 68162f5224aeSachartre if (status == 0) { 68172f5224aeSachartre DMSG(vdc, 0, "[%d] Ownership recovered", 68182f5224aeSachartre vdc->instance); 68192f5224aeSachartre vdc->ownership |= VDC_OWNERSHIP_GRANTED; 68202f5224aeSachartre } else { 68212f5224aeSachartre DMSG(vdc, 0, "[%d] Fail to recover ownership", 68222f5224aeSachartre vdc->instance); 68232f5224aeSachartre } 68242f5224aeSachartre 68252f5224aeSachartre } 68262f5224aeSachartre 68272f5224aeSachartre /* 68282f5224aeSachartre * If we have the ownership then we just wait for an event 68292f5224aeSachartre * to happen (LDC reset), otherwise we will retry to recover 68302f5224aeSachartre * after a delay. 68312f5224aeSachartre */ 68322f5224aeSachartre if (vdc->ownership & VDC_OWNERSHIP_GRANTED) 68332f5224aeSachartre timeout = 0; 68342f5224aeSachartre else 68352f5224aeSachartre timeout = ddi_get_lbolt() + 68362f5224aeSachartre drv_usectohz(vdc_ownership_delay); 68372f5224aeSachartre 68382f5224aeSachartre /* Release the ownership_lock and wait on the vdc lock */ 68392f5224aeSachartre mutex_exit(&vdc->ownership_lock); 68402f5224aeSachartre 68412f5224aeSachartre if (timeout == 0) 68422f5224aeSachartre (void) cv_wait(&vdc->ownership_cv, &vdc->lock); 68432f5224aeSachartre else 68442f5224aeSachartre (void) cv_timedwait(&vdc->ownership_cv, 68452f5224aeSachartre &vdc->lock, timeout); 68462f5224aeSachartre 68472f5224aeSachartre mutex_exit(&vdc->lock); 68482f5224aeSachartre 68492f5224aeSachartre mutex_enter(&vdc->ownership_lock); 68502f5224aeSachartre mutex_enter(&vdc->lock); 68512f5224aeSachartre } 68522f5224aeSachartre 68532f5224aeSachartre vdc->ownership_thread = NULL; 68542f5224aeSachartre mutex_exit(&vdc->lock); 68552f5224aeSachartre mutex_exit(&vdc->ownership_lock); 68562f5224aeSachartre 68572f5224aeSachartre thread_exit(); 68582f5224aeSachartre } 68592f5224aeSachartre 68602f5224aeSachartre static void 68612f5224aeSachartre vdc_ownership_update(vdc_t *vdc, int ownership_flags) 68622f5224aeSachartre { 68632f5224aeSachartre ASSERT(MUTEX_HELD(&vdc->ownership_lock)); 68642f5224aeSachartre 68652f5224aeSachartre mutex_enter(&vdc->lock); 68662f5224aeSachartre vdc->ownership = ownership_flags; 68672f5224aeSachartre if ((vdc->ownership & VDC_OWNERSHIP_WANTED) && 68682f5224aeSachartre vdc->ownership_thread == NULL) { 68692f5224aeSachartre /* start ownership thread */ 68702f5224aeSachartre vdc->ownership_thread = thread_create(NULL, 0, 68712f5224aeSachartre vdc_ownership_thread, vdc, 0, &p0, TS_RUN, 68722f5224aeSachartre v.v_maxsyspri - 2); 68732f5224aeSachartre } else { 68742f5224aeSachartre /* notify the ownership thread */ 68752f5224aeSachartre cv_signal(&vdc->ownership_cv); 68762f5224aeSachartre } 68772f5224aeSachartre mutex_exit(&vdc->lock); 68782f5224aeSachartre } 68792f5224aeSachartre 68802f5224aeSachartre /* 68812f5224aeSachartre * Get the size and the block size of a virtual disk from the vdisk server. 68822f5224aeSachartre */ 68832f5224aeSachartre static int 6884de3a5331SRamesh Chitrothu vdc_get_capacity(vdc_t *vdc, size_t *dsk_size, size_t *blk_size) 68852f5224aeSachartre { 68862f5224aeSachartre int rv = 0; 68872f5224aeSachartre size_t alloc_len; 68882f5224aeSachartre vd_capacity_t *vd_cap; 68892f5224aeSachartre 6890de3a5331SRamesh Chitrothu ASSERT(MUTEX_NOT_HELD(&vdc->lock)); 68912f5224aeSachartre 68922f5224aeSachartre alloc_len = P2ROUNDUP(sizeof (vd_capacity_t), sizeof (uint64_t)); 68932f5224aeSachartre 68942f5224aeSachartre vd_cap = kmem_zalloc(alloc_len, KM_SLEEP); 68952f5224aeSachartre 68962f5224aeSachartre rv = vdc_do_sync_op(vdc, VD_OP_GET_CAPACITY, (caddr_t)vd_cap, alloc_len, 6897*00e3a3e9SAlexandre Chartre 0, 0, VIO_both_dir, B_TRUE); 68982f5224aeSachartre 6899de3a5331SRamesh Chitrothu *dsk_size = vd_cap->vdisk_size; 6900de3a5331SRamesh Chitrothu *blk_size = vd_cap->vdisk_block_size; 69012f5224aeSachartre 69022f5224aeSachartre kmem_free(vd_cap, alloc_len); 69032f5224aeSachartre return (rv); 69042f5224aeSachartre } 69052f5224aeSachartre 69062f5224aeSachartre /* 6907de3a5331SRamesh Chitrothu * Check the disk capacity. Disk size information is updated if size has 6908de3a5331SRamesh Chitrothu * changed. 6909de3a5331SRamesh Chitrothu * 6910de3a5331SRamesh Chitrothu * Return 0 if the disk capacity is available, or non-zero if it is not. 6911de3a5331SRamesh Chitrothu */ 6912de3a5331SRamesh Chitrothu static int 6913de3a5331SRamesh Chitrothu vdc_check_capacity(vdc_t *vdc) 6914de3a5331SRamesh Chitrothu { 6915de3a5331SRamesh Chitrothu size_t dsk_size, blk_size; 6916de3a5331SRamesh Chitrothu int rv; 6917de3a5331SRamesh Chitrothu 69183f4df6d3SAlexandre Chartre /* 69193f4df6d3SAlexandre Chartre * If the vdisk does not support the VD_OP_GET_CAPACITY operation 69203f4df6d3SAlexandre Chartre * then the disk capacity has been retrieved during the handshake 69213f4df6d3SAlexandre Chartre * and there's nothing more to do here. 69223f4df6d3SAlexandre Chartre */ 69233f4df6d3SAlexandre Chartre if (!VD_OP_SUPPORTED(vdc->operations, VD_OP_GET_CAPACITY)) 69243f4df6d3SAlexandre Chartre return (0); 69253f4df6d3SAlexandre Chartre 6926de3a5331SRamesh Chitrothu if ((rv = vdc_get_capacity(vdc, &dsk_size, &blk_size)) != 0) 6927de3a5331SRamesh Chitrothu return (rv); 6928de3a5331SRamesh Chitrothu 692965908c77Syu, larry liu - Sun Microsystems - Beijing China if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || blk_size == 0) 6930de3a5331SRamesh Chitrothu return (EINVAL); 6931de3a5331SRamesh Chitrothu 6932de3a5331SRamesh Chitrothu mutex_enter(&vdc->lock); 693365908c77Syu, larry liu - Sun Microsystems - Beijing China /* 693465908c77Syu, larry liu - Sun Microsystems - Beijing China * First try to update the VIO block size (which is the same as the 693565908c77Syu, larry liu - Sun Microsystems - Beijing China * vdisk block size). If this returns an error then that means that 693665908c77Syu, larry liu - Sun Microsystems - Beijing China * we can not use that block size so basically the vdisk is unusable 693765908c77Syu, larry liu - Sun Microsystems - Beijing China * and we return an error. 693865908c77Syu, larry liu - Sun Microsystems - Beijing China */ 693965908c77Syu, larry liu - Sun Microsystems - Beijing China rv = vdc_update_vio_bsize(vdc, blk_size); 694065908c77Syu, larry liu - Sun Microsystems - Beijing China if (rv == 0) 6941de3a5331SRamesh Chitrothu vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz); 694265908c77Syu, larry liu - Sun Microsystems - Beijing China 6943de3a5331SRamesh Chitrothu mutex_exit(&vdc->lock); 6944de3a5331SRamesh Chitrothu 694565908c77Syu, larry liu - Sun Microsystems - Beijing China return (rv); 6946de3a5331SRamesh Chitrothu } 6947de3a5331SRamesh Chitrothu 6948de3a5331SRamesh Chitrothu /* 69491ae08745Sheppo * This structure is used in the DKIO(7I) array below. 69501ae08745Sheppo */ 69511ae08745Sheppo typedef struct vdc_dk_ioctl { 69521ae08745Sheppo uint8_t op; /* VD_OP_XXX value */ 69531ae08745Sheppo int cmd; /* Solaris ioctl operation number */ 69541ae08745Sheppo size_t nbytes; /* size of structure to be copied */ 69550a55fbb7Slm66018 69560a55fbb7Slm66018 /* function to convert between vDisk and Solaris structure formats */ 6957d10e4ef2Snarayan int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 6958d10e4ef2Snarayan int mode, int dir); 69591ae08745Sheppo } vdc_dk_ioctl_t; 69601ae08745Sheppo 69611ae08745Sheppo /* 69621ae08745Sheppo * Subset of DKIO(7I) operations currently supported 69631ae08745Sheppo */ 69641ae08745Sheppo static vdc_dk_ioctl_t dk_ioctl[] = { 6965eff7243fSlm66018 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 69660a55fbb7Slm66018 vdc_null_copy_func}, 69670a55fbb7Slm66018 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 69684bac2208Snarayan vdc_get_wce_convert}, 69690a55fbb7Slm66018 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 69704bac2208Snarayan vdc_set_wce_convert}, 69710a55fbb7Slm66018 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 69720a55fbb7Slm66018 vdc_get_vtoc_convert}, 69730a55fbb7Slm66018 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 69740a55fbb7Slm66018 vdc_set_vtoc_convert}, 6975342440ecSPrasad Singamsetty {VD_OP_GET_VTOC, DKIOCGEXTVTOC, sizeof (vd_vtoc_t), 6976342440ecSPrasad Singamsetty vdc_get_extvtoc_convert}, 6977342440ecSPrasad Singamsetty {VD_OP_SET_VTOC, DKIOCSEXTVTOC, sizeof (vd_vtoc_t), 6978342440ecSPrasad Singamsetty vdc_set_extvtoc_convert}, 69790a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 69800a55fbb7Slm66018 vdc_get_geom_convert}, 69810a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 69820a55fbb7Slm66018 vdc_get_geom_convert}, 69830a55fbb7Slm66018 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 69840a55fbb7Slm66018 vdc_get_geom_convert}, 69850a55fbb7Slm66018 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 69860a55fbb7Slm66018 vdc_set_geom_convert}, 69874bac2208Snarayan {VD_OP_GET_EFI, DKIOCGETEFI, 0, 69884bac2208Snarayan vdc_get_efi_convert}, 69894bac2208Snarayan {VD_OP_SET_EFI, DKIOCSETEFI, 0, 69904bac2208Snarayan vdc_set_efi_convert}, 69910a55fbb7Slm66018 699287a7269eSachartre /* DIOCTL_RWCMD is converted to a read or a write */ 699387a7269eSachartre {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 699487a7269eSachartre 69952f5224aeSachartre /* mhd(7I) non-shared multihost disks ioctls */ 69962f5224aeSachartre {0, MHIOCTKOWN, 0, vdc_null_copy_func}, 69972f5224aeSachartre {0, MHIOCRELEASE, 0, vdc_null_copy_func}, 69982f5224aeSachartre {0, MHIOCSTATUS, 0, vdc_null_copy_func}, 69992f5224aeSachartre {0, MHIOCQRESERVE, 0, vdc_null_copy_func}, 70002f5224aeSachartre 70012f5224aeSachartre /* mhd(7I) shared multihost disks ioctls */ 70022f5224aeSachartre {0, MHIOCGRP_INKEYS, 0, vdc_null_copy_func}, 70032f5224aeSachartre {0, MHIOCGRP_INRESV, 0, vdc_null_copy_func}, 70042f5224aeSachartre {0, MHIOCGRP_REGISTER, 0, vdc_null_copy_func}, 70052f5224aeSachartre {0, MHIOCGRP_RESERVE, 0, vdc_null_copy_func}, 70062f5224aeSachartre {0, MHIOCGRP_PREEMPTANDABORT, 0, vdc_null_copy_func}, 70072f5224aeSachartre {0, MHIOCGRP_REGISTERANDIGNOREKEY, 0, vdc_null_copy_func}, 70082f5224aeSachartre 70092f5224aeSachartre /* mhd(7I) failfast ioctl */ 70102f5224aeSachartre {0, MHIOCENFAILFAST, 0, vdc_null_copy_func}, 70112f5224aeSachartre 70120a55fbb7Slm66018 /* 70130a55fbb7Slm66018 * These particular ioctls are not sent to the server - vdc fakes up 70140a55fbb7Slm66018 * the necessary info. 70150a55fbb7Slm66018 */ 70160a55fbb7Slm66018 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 70170a55fbb7Slm66018 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 70180a55fbb7Slm66018 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 70199642afceSachartre {0, DKIOCPARTITION, 0, vdc_null_copy_func }, 702087a7269eSachartre {0, DKIOCGAPART, 0, vdc_null_copy_func }, 70210a55fbb7Slm66018 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 70220a55fbb7Slm66018 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 70231ae08745Sheppo }; 70241ae08745Sheppo 70251ae08745Sheppo /* 7026edcc0754Sachartre * This function handles ioctl requests from the vd_efi_alloc_and_read() 7027edcc0754Sachartre * function and forward them to the vdisk. 70282f5224aeSachartre */ 70292f5224aeSachartre static int 7030edcc0754Sachartre vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg) 70312f5224aeSachartre { 7032edcc0754Sachartre vdc_t *vdc = (vdc_t *)vdisk; 7033edcc0754Sachartre dev_t dev; 70342f5224aeSachartre int rval; 7035edcc0754Sachartre 7036edcc0754Sachartre dev = makedevice(ddi_driver_major(vdc->dip), 7037edcc0754Sachartre VD_MAKE_DEV(vdc->instance, 0)); 7038edcc0754Sachartre 7039edcc0754Sachartre return (vd_process_ioctl(dev, cmd, (caddr_t)arg, FKIOCTL, &rval)); 70402f5224aeSachartre } 70412f5224aeSachartre 70422f5224aeSachartre /* 70431ae08745Sheppo * Function: 70441ae08745Sheppo * vd_process_ioctl() 70451ae08745Sheppo * 70461ae08745Sheppo * Description: 70470a55fbb7Slm66018 * This routine processes disk specific ioctl calls 70481ae08745Sheppo * 70491ae08745Sheppo * Arguments: 70501ae08745Sheppo * dev - the device number 70511ae08745Sheppo * cmd - the operation [dkio(7I)] to be processed 70521ae08745Sheppo * arg - pointer to user provided structure 70531ae08745Sheppo * (contains data to be set or reference parameter for get) 70541ae08745Sheppo * mode - bit flag, indicating open settings, 32/64 bit type, etc 70552f5224aeSachartre * rvalp - pointer to return value for calling process. 70561ae08745Sheppo * 70571ae08745Sheppo * Return Code: 70581ae08745Sheppo * 0 70591ae08745Sheppo * EFAULT 70601ae08745Sheppo * ENXIO 70611ae08745Sheppo * EIO 70621ae08745Sheppo * ENOTSUP 70631ae08745Sheppo */ 70641ae08745Sheppo static int 70652f5224aeSachartre vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp) 70661ae08745Sheppo { 70670d0c8d4bSnarayan int instance = VDCUNIT(dev); 70681ae08745Sheppo vdc_t *vdc = NULL; 70691ae08745Sheppo int rv = -1; 70701ae08745Sheppo int idx = 0; /* index into dk_ioctl[] */ 70711ae08745Sheppo size_t len = 0; /* #bytes to send to vds */ 70721ae08745Sheppo size_t alloc_len = 0; /* #bytes to allocate mem for */ 70731ae08745Sheppo caddr_t mem_p = NULL; 70741ae08745Sheppo size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 70753af08d82Slm66018 vdc_dk_ioctl_t *iop; 70761ae08745Sheppo 70771ae08745Sheppo vdc = ddi_get_soft_state(vdc_state, instance); 70781ae08745Sheppo if (vdc == NULL) { 70791ae08745Sheppo cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 70801ae08745Sheppo instance); 70811ae08745Sheppo return (ENXIO); 70821ae08745Sheppo } 70831ae08745Sheppo 70843af08d82Slm66018 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 70853af08d82Slm66018 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 70861ae08745Sheppo 70872f5224aeSachartre if (rvalp != NULL) { 70882f5224aeSachartre /* the return value of the ioctl is 0 by default */ 70892f5224aeSachartre *rvalp = 0; 70902f5224aeSachartre } 70912f5224aeSachartre 70921ae08745Sheppo /* 70931ae08745Sheppo * Validate the ioctl operation to be performed. 70941ae08745Sheppo * 70951ae08745Sheppo * If we have looped through the array without finding a match then we 70961ae08745Sheppo * don't support this ioctl. 70971ae08745Sheppo */ 70981ae08745Sheppo for (idx = 0; idx < nioctls; idx++) { 70991ae08745Sheppo if (cmd == dk_ioctl[idx].cmd) 71001ae08745Sheppo break; 71011ae08745Sheppo } 71021ae08745Sheppo 71031ae08745Sheppo if (idx >= nioctls) { 71043af08d82Slm66018 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 7105e1ebb9ecSlm66018 vdc->instance, cmd); 71061ae08745Sheppo return (ENOTSUP); 71071ae08745Sheppo } 71081ae08745Sheppo 71093af08d82Slm66018 iop = &(dk_ioctl[idx]); 71103af08d82Slm66018 71114bac2208Snarayan if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 71124bac2208Snarayan /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 71134bac2208Snarayan dk_efi_t dk_efi; 71144bac2208Snarayan 71154bac2208Snarayan rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 71164bac2208Snarayan if (rv != 0) 71174bac2208Snarayan return (EFAULT); 71184bac2208Snarayan 71194bac2208Snarayan len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 71204bac2208Snarayan } else { 71213af08d82Slm66018 len = iop->nbytes; 71224bac2208Snarayan } 71231ae08745Sheppo 71242f5224aeSachartre /* check if the ioctl is applicable */ 71251ae08745Sheppo switch (cmd) { 71261ae08745Sheppo case CDROMREADOFFSET: 71271ae08745Sheppo case DKIOCREMOVABLE: 71281ae08745Sheppo return (ENOTTY); 71291ae08745Sheppo 71302f5224aeSachartre case USCSICMD: 71312f5224aeSachartre case MHIOCTKOWN: 71322f5224aeSachartre case MHIOCSTATUS: 71332f5224aeSachartre case MHIOCQRESERVE: 71342f5224aeSachartre case MHIOCRELEASE: 71352f5224aeSachartre case MHIOCGRP_INKEYS: 71362f5224aeSachartre case MHIOCGRP_INRESV: 71372f5224aeSachartre case MHIOCGRP_REGISTER: 71382f5224aeSachartre case MHIOCGRP_RESERVE: 71392f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 71402f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 71412f5224aeSachartre case MHIOCENFAILFAST: 71422f5224aeSachartre if (vdc->cinfo == NULL) 71432f5224aeSachartre return (ENXIO); 71442f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_SCSI_CCS) 71452f5224aeSachartre return (ENOTTY); 71462f5224aeSachartre break; 71472f5224aeSachartre 71482f5224aeSachartre case DIOCTL_RWCMD: 71492f5224aeSachartre if (vdc->cinfo == NULL) 71502f5224aeSachartre return (ENXIO); 71512f5224aeSachartre if (vdc->cinfo->dki_ctype != DKC_DIRECT) 71522f5224aeSachartre return (ENOTTY); 71532f5224aeSachartre break; 71542f5224aeSachartre 71552f5224aeSachartre case DKIOCINFO: 71562f5224aeSachartre if (vdc->cinfo == NULL) 71572f5224aeSachartre return (ENXIO); 71582f5224aeSachartre break; 71592f5224aeSachartre 71602f5224aeSachartre case DKIOCGMEDIAINFO: 71612f5224aeSachartre if (vdc->minfo == NULL) 71622f5224aeSachartre return (ENXIO); 71632f5224aeSachartre if (vdc_check_capacity(vdc) != 0) 71642f5224aeSachartre /* disk capacity is not available */ 71652f5224aeSachartre return (EIO); 71662f5224aeSachartre break; 71672f5224aeSachartre } 71682f5224aeSachartre 71692f5224aeSachartre /* 71702f5224aeSachartre * Deal with ioctls which require a processing different than 71712f5224aeSachartre * converting ioctl arguments and sending a corresponding 71722f5224aeSachartre * VD operation. 71732f5224aeSachartre */ 71742f5224aeSachartre switch (cmd) { 71752f5224aeSachartre 71762f5224aeSachartre case USCSICMD: 71772f5224aeSachartre { 71782f5224aeSachartre return (vdc_uscsi_cmd(vdc, arg, mode)); 71792f5224aeSachartre } 71802f5224aeSachartre 71812f5224aeSachartre case MHIOCTKOWN: 71822f5224aeSachartre { 71832f5224aeSachartre mutex_enter(&vdc->ownership_lock); 71842f5224aeSachartre /* 71852f5224aeSachartre * We have to set VDC_OWNERSHIP_WANTED now so that the ownership 71862f5224aeSachartre * can be flagged with VDC_OWNERSHIP_RESET if the LDC is reset 71872f5224aeSachartre * while we are processing the ioctl. 71882f5224aeSachartre */ 71892f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED); 71902f5224aeSachartre 71912f5224aeSachartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 7192*00e3a3e9SAlexandre Chartre VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE); 71932f5224aeSachartre if (rv == 0) { 71942f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED | 71952f5224aeSachartre VDC_OWNERSHIP_GRANTED); 71962f5224aeSachartre } else { 71972f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 71982f5224aeSachartre } 71992f5224aeSachartre mutex_exit(&vdc->ownership_lock); 72002f5224aeSachartre return (rv); 72012f5224aeSachartre } 72022f5224aeSachartre 72032f5224aeSachartre case MHIOCRELEASE: 72042f5224aeSachartre { 72052f5224aeSachartre mutex_enter(&vdc->ownership_lock); 7206*00e3a3e9SAlexandre Chartre rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR); 72072f5224aeSachartre if (rv == 0) { 72082f5224aeSachartre vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 72092f5224aeSachartre } 72102f5224aeSachartre mutex_exit(&vdc->ownership_lock); 72112f5224aeSachartre return (rv); 72122f5224aeSachartre } 72132f5224aeSachartre 72142f5224aeSachartre case MHIOCSTATUS: 72152f5224aeSachartre { 72162f5224aeSachartre uint64_t status; 72172f5224aeSachartre 7218*00e3a3e9SAlexandre Chartre rv = vdc_access_get(vdc, &status); 72192f5224aeSachartre if (rv == 0 && rvalp != NULL) 72202f5224aeSachartre *rvalp = (status & VD_ACCESS_ALLOWED)? 0 : 1; 72212f5224aeSachartre return (rv); 72222f5224aeSachartre } 72232f5224aeSachartre 72242f5224aeSachartre case MHIOCQRESERVE: 72252f5224aeSachartre { 7226*00e3a3e9SAlexandre Chartre rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE); 72272f5224aeSachartre return (rv); 72282f5224aeSachartre } 72292f5224aeSachartre 72302f5224aeSachartre case MHIOCGRP_INKEYS: 72312f5224aeSachartre { 72322f5224aeSachartre return (vdc_mhd_inkeys(vdc, arg, mode)); 72332f5224aeSachartre } 72342f5224aeSachartre 72352f5224aeSachartre case MHIOCGRP_INRESV: 72362f5224aeSachartre { 72372f5224aeSachartre return (vdc_mhd_inresv(vdc, arg, mode)); 72382f5224aeSachartre } 72392f5224aeSachartre 72402f5224aeSachartre case MHIOCGRP_REGISTER: 72412f5224aeSachartre { 72422f5224aeSachartre return (vdc_mhd_register(vdc, arg, mode)); 72432f5224aeSachartre } 72442f5224aeSachartre 72452f5224aeSachartre case MHIOCGRP_RESERVE: 72462f5224aeSachartre { 72472f5224aeSachartre return (vdc_mhd_reserve(vdc, arg, mode)); 72482f5224aeSachartre } 72492f5224aeSachartre 72502f5224aeSachartre case MHIOCGRP_PREEMPTANDABORT: 72512f5224aeSachartre { 72522f5224aeSachartre return (vdc_mhd_preemptabort(vdc, arg, mode)); 72532f5224aeSachartre } 72542f5224aeSachartre 72552f5224aeSachartre case MHIOCGRP_REGISTERANDIGNOREKEY: 72562f5224aeSachartre { 72572f5224aeSachartre return (vdc_mhd_registerignore(vdc, arg, mode)); 72582f5224aeSachartre } 72592f5224aeSachartre 72602f5224aeSachartre case MHIOCENFAILFAST: 72612f5224aeSachartre { 72622f5224aeSachartre rv = vdc_failfast(vdc, arg, mode); 72632f5224aeSachartre return (rv); 72642f5224aeSachartre } 72652f5224aeSachartre 726687a7269eSachartre case DIOCTL_RWCMD: 726787a7269eSachartre { 726865908c77Syu, larry liu - Sun Microsystems - Beijing China return (vdc_dioctl_rwcmd(vdc, arg, mode)); 726987a7269eSachartre } 727087a7269eSachartre 727187a7269eSachartre case DKIOCGAPART: 727287a7269eSachartre { 72739642afceSachartre return (vdc_dkio_gapart(vdc, arg, mode)); 72749642afceSachartre } 72759642afceSachartre 72769642afceSachartre case DKIOCPARTITION: 72779642afceSachartre { 72789642afceSachartre return (vdc_dkio_partition(vdc, arg, mode)); 727987a7269eSachartre } 728087a7269eSachartre 72811ae08745Sheppo case DKIOCINFO: 72821ae08745Sheppo { 72831ae08745Sheppo struct dk_cinfo cinfo; 72841ae08745Sheppo 72851ae08745Sheppo bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 72860d0c8d4bSnarayan cinfo.dki_partition = VDCPART(dev); 72871ae08745Sheppo 72881ae08745Sheppo rv = ddi_copyout(&cinfo, (void *)arg, 72891ae08745Sheppo sizeof (struct dk_cinfo), mode); 72901ae08745Sheppo if (rv != 0) 72911ae08745Sheppo return (EFAULT); 72921ae08745Sheppo 72931ae08745Sheppo return (0); 72941ae08745Sheppo } 72951ae08745Sheppo 72961ae08745Sheppo case DKIOCGMEDIAINFO: 72978e6a2a04Slm66018 { 72982f5224aeSachartre ASSERT(vdc->vdisk_size != 0); 7299de3a5331SRamesh Chitrothu ASSERT(vdc->minfo->dki_capacity != 0); 73001ae08745Sheppo rv = ddi_copyout(vdc->minfo, (void *)arg, 73011ae08745Sheppo sizeof (struct dk_minfo), mode); 73021ae08745Sheppo if (rv != 0) 73031ae08745Sheppo return (EFAULT); 73041ae08745Sheppo 73051ae08745Sheppo return (0); 73061ae08745Sheppo } 73071ae08745Sheppo 73088e6a2a04Slm66018 case DKIOCFLUSHWRITECACHE: 73098e6a2a04Slm66018 { 731017cadca8Slm66018 struct dk_callback *dkc = 731117cadca8Slm66018 (struct dk_callback *)(uintptr_t)arg; 73128e6a2a04Slm66018 vdc_dk_arg_t *dkarg = NULL; 73138e6a2a04Slm66018 73143af08d82Slm66018 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 73153af08d82Slm66018 instance, mode); 73168e6a2a04Slm66018 73178e6a2a04Slm66018 /* 73188e6a2a04Slm66018 * If arg is NULL, then there is no callback function 73198e6a2a04Slm66018 * registered and the call operates synchronously; we 73208e6a2a04Slm66018 * break and continue with the rest of the function and 73218e6a2a04Slm66018 * wait for vds to return (i.e. after the request to 73228e6a2a04Slm66018 * vds returns successfully, all writes completed prior 73238e6a2a04Slm66018 * to the ioctl will have been flushed from the disk 73248e6a2a04Slm66018 * write cache to persistent media. 73258e6a2a04Slm66018 * 73268e6a2a04Slm66018 * If a callback function is registered, we dispatch 73278e6a2a04Slm66018 * the request on a task queue and return immediately. 73288e6a2a04Slm66018 * The callback will deal with informing the calling 73298e6a2a04Slm66018 * thread that the flush request is completed. 73308e6a2a04Slm66018 */ 73318e6a2a04Slm66018 if (dkc == NULL) 73328e6a2a04Slm66018 break; 73338e6a2a04Slm66018 7334eff7243fSlm66018 /* 7335eff7243fSlm66018 * the asynchronous callback is only supported if 7336eff7243fSlm66018 * invoked from within the kernel 7337eff7243fSlm66018 */ 7338eff7243fSlm66018 if ((mode & FKIOCTL) == 0) 7339eff7243fSlm66018 return (ENOTSUP); 7340eff7243fSlm66018 73418e6a2a04Slm66018 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 73428e6a2a04Slm66018 73438e6a2a04Slm66018 dkarg->mode = mode; 73448e6a2a04Slm66018 dkarg->dev = dev; 73458e6a2a04Slm66018 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 73468e6a2a04Slm66018 73478e6a2a04Slm66018 mutex_enter(&vdc->lock); 73488e6a2a04Slm66018 vdc->dkio_flush_pending++; 73498e6a2a04Slm66018 dkarg->vdc = vdc; 73508e6a2a04Slm66018 mutex_exit(&vdc->lock); 73518e6a2a04Slm66018 73528e6a2a04Slm66018 /* put the request on a task queue */ 73538e6a2a04Slm66018 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 73548e6a2a04Slm66018 (void *)dkarg, DDI_SLEEP); 73553af08d82Slm66018 if (rv == NULL) { 73563af08d82Slm66018 /* clean up if dispatch fails */ 73573af08d82Slm66018 mutex_enter(&vdc->lock); 73583af08d82Slm66018 vdc->dkio_flush_pending--; 735978fcd0a1Sachartre mutex_exit(&vdc->lock); 73603af08d82Slm66018 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 73613af08d82Slm66018 } 73628e6a2a04Slm66018 73638e6a2a04Slm66018 return (rv == NULL ? ENOMEM : 0); 73648e6a2a04Slm66018 } 73658e6a2a04Slm66018 } 73668e6a2a04Slm66018 73671ae08745Sheppo /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 73683af08d82Slm66018 ASSERT(iop->op != 0); 73691ae08745Sheppo 737017cadca8Slm66018 /* check if the vDisk server handles the operation for this vDisk */ 737117cadca8Slm66018 if (VD_OP_SUPPORTED(vdc->operations, iop->op) == B_FALSE) { 737217cadca8Slm66018 DMSG(vdc, 0, "[%d] Unsupported VD_OP operation (0x%x)\n", 737317cadca8Slm66018 vdc->instance, iop->op); 737417cadca8Slm66018 return (ENOTSUP); 737517cadca8Slm66018 } 737617cadca8Slm66018 73771ae08745Sheppo /* LDC requires that the memory being mapped is 8-byte aligned */ 73781ae08745Sheppo alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 73793af08d82Slm66018 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 73803af08d82Slm66018 instance, len, alloc_len); 73811ae08745Sheppo 7382eff7243fSlm66018 if (alloc_len > 0) 73831ae08745Sheppo mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 73841ae08745Sheppo 73850a55fbb7Slm66018 /* 7386eff7243fSlm66018 * Call the conversion function for this ioctl which, if necessary, 73870a55fbb7Slm66018 * converts from the Solaris format to the format ARC'ed 73880a55fbb7Slm66018 * as part of the vDisk protocol (FWARC 2006/195) 73890a55fbb7Slm66018 */ 73903af08d82Slm66018 ASSERT(iop->convert != NULL); 73913af08d82Slm66018 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 73921ae08745Sheppo if (rv != 0) { 73933af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 7394e1ebb9ecSlm66018 instance, rv, cmd); 73951ae08745Sheppo if (mem_p != NULL) 73961ae08745Sheppo kmem_free(mem_p, alloc_len); 73970a55fbb7Slm66018 return (rv); 73981ae08745Sheppo } 73991ae08745Sheppo 74001ae08745Sheppo /* 74011ae08745Sheppo * send request to vds to service the ioctl. 74021ae08745Sheppo */ 74033af08d82Slm66018 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 7404*00e3a3e9SAlexandre Chartre VDCPART(dev), 0, VIO_both_dir, B_TRUE); 740578fcd0a1Sachartre 74061ae08745Sheppo if (rv != 0) { 74071ae08745Sheppo /* 74081ae08745Sheppo * This is not necessarily an error. The ioctl could 74091ae08745Sheppo * be returning a value such as ENOTTY to indicate 74101ae08745Sheppo * that the ioctl is not applicable. 74111ae08745Sheppo */ 74123af08d82Slm66018 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 7413e1ebb9ecSlm66018 instance, rv, cmd); 74141ae08745Sheppo if (mem_p != NULL) 74151ae08745Sheppo kmem_free(mem_p, alloc_len); 7416d10e4ef2Snarayan 74171ae08745Sheppo return (rv); 74181ae08745Sheppo } 74191ae08745Sheppo 74201ae08745Sheppo /* 74210a55fbb7Slm66018 * Call the conversion function (if it exists) for this ioctl 74220a55fbb7Slm66018 * which converts from the format ARC'ed as part of the vDisk 74230a55fbb7Slm66018 * protocol (FWARC 2006/195) back to a format understood by 74240a55fbb7Slm66018 * the rest of Solaris. 74251ae08745Sheppo */ 74263af08d82Slm66018 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 74270a55fbb7Slm66018 if (rv != 0) { 74283af08d82Slm66018 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 7429e1ebb9ecSlm66018 instance, rv, cmd); 74301ae08745Sheppo if (mem_p != NULL) 74311ae08745Sheppo kmem_free(mem_p, alloc_len); 74320a55fbb7Slm66018 return (rv); 74331ae08745Sheppo } 74341ae08745Sheppo 74351ae08745Sheppo if (mem_p != NULL) 74361ae08745Sheppo kmem_free(mem_p, alloc_len); 74371ae08745Sheppo 74381ae08745Sheppo return (rv); 74391ae08745Sheppo } 74401ae08745Sheppo 74411ae08745Sheppo /* 74421ae08745Sheppo * Function: 74430a55fbb7Slm66018 * 74440a55fbb7Slm66018 * Description: 74450a55fbb7Slm66018 * This is an empty conversion function used by ioctl calls which 74460a55fbb7Slm66018 * do not need to convert the data being passed in/out to userland 74470a55fbb7Slm66018 */ 74480a55fbb7Slm66018 static int 7449d10e4ef2Snarayan vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 74500a55fbb7Slm66018 { 7451d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 74520a55fbb7Slm66018 _NOTE(ARGUNUSED(from)) 74530a55fbb7Slm66018 _NOTE(ARGUNUSED(to)) 74540a55fbb7Slm66018 _NOTE(ARGUNUSED(mode)) 74550a55fbb7Slm66018 _NOTE(ARGUNUSED(dir)) 74560a55fbb7Slm66018 74570a55fbb7Slm66018 return (0); 74580a55fbb7Slm66018 } 74590a55fbb7Slm66018 74604bac2208Snarayan static int 74614bac2208Snarayan vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 74624bac2208Snarayan int mode, int dir) 74634bac2208Snarayan { 74644bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 74654bac2208Snarayan 74664bac2208Snarayan if (dir == VD_COPYIN) 74674bac2208Snarayan return (0); /* nothing to do */ 74684bac2208Snarayan 74694bac2208Snarayan if (ddi_copyout(from, to, sizeof (int), mode) != 0) 74704bac2208Snarayan return (EFAULT); 74714bac2208Snarayan 74724bac2208Snarayan return (0); 74734bac2208Snarayan } 74744bac2208Snarayan 74754bac2208Snarayan static int 74764bac2208Snarayan vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 74774bac2208Snarayan int mode, int dir) 74784bac2208Snarayan { 74794bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 74804bac2208Snarayan 74814bac2208Snarayan if (dir == VD_COPYOUT) 74824bac2208Snarayan return (0); /* nothing to do */ 74834bac2208Snarayan 74844bac2208Snarayan if (ddi_copyin(from, to, sizeof (int), mode) != 0) 74854bac2208Snarayan return (EFAULT); 74864bac2208Snarayan 74874bac2208Snarayan return (0); 74884bac2208Snarayan } 74894bac2208Snarayan 74900a55fbb7Slm66018 /* 74910a55fbb7Slm66018 * Function: 74920a55fbb7Slm66018 * vdc_get_vtoc_convert() 74930a55fbb7Slm66018 * 74940a55fbb7Slm66018 * Description: 7495d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGVTOC 7496d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 7497d10e4ef2Snarayan * 7498d10e4ef2Snarayan * In the struct vtoc definition, the timestamp field is marked as not 7499d10e4ef2Snarayan * supported so it is not part of vDisk protocol (FWARC 2006/195). 7500d10e4ef2Snarayan * However SVM uses that field to check it can write into the VTOC, 7501d10e4ef2Snarayan * so we fake up the info of that field. 75020a55fbb7Slm66018 * 75030a55fbb7Slm66018 * Arguments: 7504d10e4ef2Snarayan * vdc - the vDisk client 75050a55fbb7Slm66018 * from - the buffer containing the data to be copied from 75060a55fbb7Slm66018 * to - the buffer to be copied to 75070a55fbb7Slm66018 * mode - flags passed to ioctl() call 75080a55fbb7Slm66018 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 75090a55fbb7Slm66018 * 75100a55fbb7Slm66018 * Return Code: 75110a55fbb7Slm66018 * 0 - Success 75120a55fbb7Slm66018 * ENXIO - incorrect buffer passed in. 7513d10e4ef2Snarayan * EFAULT - ddi_copyout routine encountered an error. 75140a55fbb7Slm66018 */ 75150a55fbb7Slm66018 static int 7516d10e4ef2Snarayan vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 75170a55fbb7Slm66018 { 7518d10e4ef2Snarayan int i; 7519342440ecSPrasad Singamsetty struct vtoc vtoc; 7520342440ecSPrasad Singamsetty struct vtoc32 vtoc32; 7521342440ecSPrasad Singamsetty struct extvtoc evtoc; 7522342440ecSPrasad Singamsetty int rv; 75230a55fbb7Slm66018 75240a55fbb7Slm66018 if (dir != VD_COPYOUT) 75250a55fbb7Slm66018 return (0); /* nothing to do */ 75260a55fbb7Slm66018 75270a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 75280a55fbb7Slm66018 return (ENXIO); 75290a55fbb7Slm66018 7530342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) 7531342440ecSPrasad Singamsetty return (EOVERFLOW); 75320a55fbb7Slm66018 7533342440ecSPrasad Singamsetty VD_VTOC2VTOC((vd_vtoc_t *)from, &evtoc); 7534d10e4ef2Snarayan 7535d10e4ef2Snarayan /* fake the VTOC timestamp field */ 7536d10e4ef2Snarayan for (i = 0; i < V_NUMPAR; i++) { 7537342440ecSPrasad Singamsetty evtoc.timestamp[i] = vdc->vtoc->timestamp[i]; 7538d10e4ef2Snarayan } 7539d10e4ef2Snarayan 75400a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 754117cadca8Slm66018 /* LINTED E_ASSIGN_NARROW_CONV */ 7542342440ecSPrasad Singamsetty extvtoctovtoc32(evtoc, vtoc32); 7543342440ecSPrasad Singamsetty rv = ddi_copyout(&vtoc32, to, sizeof (vtoc32), mode); 75440a55fbb7Slm66018 if (rv != 0) 75450a55fbb7Slm66018 rv = EFAULT; 7546342440ecSPrasad Singamsetty } else { 7547342440ecSPrasad Singamsetty extvtoctovtoc(evtoc, vtoc); 7548342440ecSPrasad Singamsetty rv = ddi_copyout(&vtoc, to, sizeof (vtoc), mode); 7549342440ecSPrasad Singamsetty if (rv != 0) 7550342440ecSPrasad Singamsetty rv = EFAULT; 7551342440ecSPrasad Singamsetty } 75520a55fbb7Slm66018 75530a55fbb7Slm66018 return (rv); 75540a55fbb7Slm66018 } 75550a55fbb7Slm66018 75560a55fbb7Slm66018 /* 75570a55fbb7Slm66018 * Function: 75580a55fbb7Slm66018 * vdc_set_vtoc_convert() 75590a55fbb7Slm66018 * 75600a55fbb7Slm66018 * Description: 7561d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSVTOC 7562d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 75630a55fbb7Slm66018 * 75640a55fbb7Slm66018 * Arguments: 7565d10e4ef2Snarayan * vdc - the vDisk client 75660a55fbb7Slm66018 * from - Buffer with data 75670a55fbb7Slm66018 * to - Buffer where data is to be copied to 75680a55fbb7Slm66018 * mode - flags passed to ioctl 75690a55fbb7Slm66018 * dir - direction of copy (in or out) 75700a55fbb7Slm66018 * 75710a55fbb7Slm66018 * Return Code: 75720a55fbb7Slm66018 * 0 - Success 75730a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 75740a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 75750a55fbb7Slm66018 */ 75760a55fbb7Slm66018 static int 7577d10e4ef2Snarayan vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 75780a55fbb7Slm66018 { 7579342440ecSPrasad Singamsetty void *uvtoc; 7580342440ecSPrasad Singamsetty struct vtoc vtoc; 7581342440ecSPrasad Singamsetty struct vtoc32 vtoc32; 7582342440ecSPrasad Singamsetty struct extvtoc evtoc; 7583342440ecSPrasad Singamsetty int i, rv; 75840a55fbb7Slm66018 75850a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 75860a55fbb7Slm66018 return (ENXIO); 75870a55fbb7Slm66018 7588342440ecSPrasad Singamsetty if (vdc->vdisk_size > VD_OLDVTOC_LIMIT) 7589342440ecSPrasad Singamsetty return (EOVERFLOW); 75902f5224aeSachartre 7591342440ecSPrasad Singamsetty uvtoc = (dir == VD_COPYIN)? from : to; 75920a55fbb7Slm66018 75930a55fbb7Slm66018 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 7594342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &vtoc32, sizeof (vtoc32), mode); 7595342440ecSPrasad Singamsetty if (rv != 0) 7596342440ecSPrasad Singamsetty return (EFAULT); 7597342440ecSPrasad Singamsetty vtoc32toextvtoc(vtoc32, evtoc); 75980a55fbb7Slm66018 } else { 7599342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &vtoc, sizeof (vtoc), mode); 7600342440ecSPrasad Singamsetty if (rv != 0) 7601342440ecSPrasad Singamsetty return (EFAULT); 7602342440ecSPrasad Singamsetty vtoctoextvtoc(vtoc, evtoc); 76030a55fbb7Slm66018 } 76040a55fbb7Slm66018 76052f5224aeSachartre if (dir == VD_COPYOUT) { 76062f5224aeSachartre /* 76072f5224aeSachartre * The disk label may have changed. Revalidate the disk 76085b98b509Sachartre * geometry. This will also update the device nodes. 76092f5224aeSachartre */ 76102f5224aeSachartre vdc_validate(vdc); 76112f5224aeSachartre 76122f5224aeSachartre /* 76132f5224aeSachartre * We also need to keep track of the timestamp fields. 76142f5224aeSachartre */ 76152f5224aeSachartre for (i = 0; i < V_NUMPAR; i++) { 7616342440ecSPrasad Singamsetty vdc->vtoc->timestamp[i] = evtoc.timestamp[i]; 7617342440ecSPrasad Singamsetty } 7618342440ecSPrasad Singamsetty 7619342440ecSPrasad Singamsetty } else { 7620342440ecSPrasad Singamsetty VTOC2VD_VTOC(&evtoc, (vd_vtoc_t *)to); 76212f5224aeSachartre } 76222f5224aeSachartre 76232f5224aeSachartre return (0); 76242f5224aeSachartre } 76252f5224aeSachartre 7626342440ecSPrasad Singamsetty static int 7627342440ecSPrasad Singamsetty vdc_get_extvtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7628342440ecSPrasad Singamsetty { 7629342440ecSPrasad Singamsetty int i, rv; 7630342440ecSPrasad Singamsetty struct extvtoc evtoc; 7631342440ecSPrasad Singamsetty 7632342440ecSPrasad Singamsetty if (dir != VD_COPYOUT) 7633342440ecSPrasad Singamsetty return (0); /* nothing to do */ 7634342440ecSPrasad Singamsetty 7635342440ecSPrasad Singamsetty if ((from == NULL) || (to == NULL)) 7636342440ecSPrasad Singamsetty return (ENXIO); 7637342440ecSPrasad Singamsetty 7638342440ecSPrasad Singamsetty VD_VTOC2VTOC((vd_vtoc_t *)from, &evtoc); 7639342440ecSPrasad Singamsetty 7640342440ecSPrasad Singamsetty /* fake the VTOC timestamp field */ 7641342440ecSPrasad Singamsetty for (i = 0; i < V_NUMPAR; i++) { 7642342440ecSPrasad Singamsetty evtoc.timestamp[i] = vdc->vtoc->timestamp[i]; 7643342440ecSPrasad Singamsetty } 7644342440ecSPrasad Singamsetty 7645342440ecSPrasad Singamsetty rv = ddi_copyout(&evtoc, to, sizeof (struct extvtoc), mode); 7646342440ecSPrasad Singamsetty if (rv != 0) 7647342440ecSPrasad Singamsetty rv = EFAULT; 7648342440ecSPrasad Singamsetty 7649342440ecSPrasad Singamsetty return (rv); 7650342440ecSPrasad Singamsetty } 7651342440ecSPrasad Singamsetty 7652342440ecSPrasad Singamsetty static int 7653342440ecSPrasad Singamsetty vdc_set_extvtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7654342440ecSPrasad Singamsetty { 7655342440ecSPrasad Singamsetty void *uvtoc; 7656342440ecSPrasad Singamsetty struct extvtoc evtoc; 7657342440ecSPrasad Singamsetty int i, rv; 7658342440ecSPrasad Singamsetty 7659342440ecSPrasad Singamsetty if ((from == NULL) || (to == NULL)) 7660342440ecSPrasad Singamsetty return (ENXIO); 7661342440ecSPrasad Singamsetty 7662342440ecSPrasad Singamsetty uvtoc = (dir == VD_COPYIN)? from : to; 7663342440ecSPrasad Singamsetty 7664342440ecSPrasad Singamsetty rv = ddi_copyin(uvtoc, &evtoc, sizeof (struct extvtoc), mode); 7665342440ecSPrasad Singamsetty if (rv != 0) 7666342440ecSPrasad Singamsetty return (EFAULT); 7667342440ecSPrasad Singamsetty 7668342440ecSPrasad Singamsetty if (dir == VD_COPYOUT) { 7669342440ecSPrasad Singamsetty /* 7670342440ecSPrasad Singamsetty * The disk label may have changed. Revalidate the disk 7671342440ecSPrasad Singamsetty * geometry. This will also update the device nodes. 7672342440ecSPrasad Singamsetty */ 7673342440ecSPrasad Singamsetty vdc_validate(vdc); 7674342440ecSPrasad Singamsetty 7675342440ecSPrasad Singamsetty /* 7676342440ecSPrasad Singamsetty * We also need to keep track of the timestamp fields. 7677342440ecSPrasad Singamsetty */ 7678342440ecSPrasad Singamsetty for (i = 0; i < V_NUMPAR; i++) { 7679342440ecSPrasad Singamsetty vdc->vtoc->timestamp[i] = evtoc.timestamp[i]; 7680342440ecSPrasad Singamsetty } 7681342440ecSPrasad Singamsetty 7682342440ecSPrasad Singamsetty } else { 7683342440ecSPrasad Singamsetty VTOC2VD_VTOC(&evtoc, (vd_vtoc_t *)to); 7684342440ecSPrasad Singamsetty } 76850a55fbb7Slm66018 76860a55fbb7Slm66018 return (0); 76870a55fbb7Slm66018 } 76880a55fbb7Slm66018 76890a55fbb7Slm66018 /* 76900a55fbb7Slm66018 * Function: 76910a55fbb7Slm66018 * vdc_get_geom_convert() 76920a55fbb7Slm66018 * 76930a55fbb7Slm66018 * Description: 7694d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCGGEOM, 7695d10e4ef2Snarayan * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 7696d10e4ef2Snarayan * defined in FWARC 2006/195 76970a55fbb7Slm66018 * 76980a55fbb7Slm66018 * Arguments: 7699d10e4ef2Snarayan * vdc - the vDisk client 77000a55fbb7Slm66018 * from - Buffer with data 77010a55fbb7Slm66018 * to - Buffer where data is to be copied to 77020a55fbb7Slm66018 * mode - flags passed to ioctl 77030a55fbb7Slm66018 * dir - direction of copy (in or out) 77040a55fbb7Slm66018 * 77050a55fbb7Slm66018 * Return Code: 77060a55fbb7Slm66018 * 0 - Success 77070a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 7708d10e4ef2Snarayan * EFAULT - ddi_copyout of data failed 77090a55fbb7Slm66018 */ 77100a55fbb7Slm66018 static int 7711d10e4ef2Snarayan vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 77120a55fbb7Slm66018 { 7713d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7714d10e4ef2Snarayan 77150a55fbb7Slm66018 struct dk_geom geom; 77160a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 77170a55fbb7Slm66018 int rv = 0; 77180a55fbb7Slm66018 77190a55fbb7Slm66018 if (dir != VD_COPYOUT) 77200a55fbb7Slm66018 return (0); /* nothing to do */ 77210a55fbb7Slm66018 77220a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 77230a55fbb7Slm66018 return (ENXIO); 77240a55fbb7Slm66018 77250a55fbb7Slm66018 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 77260a55fbb7Slm66018 rv = ddi_copyout(&geom, to, copy_len, mode); 77270a55fbb7Slm66018 if (rv != 0) 77280a55fbb7Slm66018 rv = EFAULT; 77290a55fbb7Slm66018 77300a55fbb7Slm66018 return (rv); 77310a55fbb7Slm66018 } 77320a55fbb7Slm66018 77330a55fbb7Slm66018 /* 77340a55fbb7Slm66018 * Function: 77350a55fbb7Slm66018 * vdc_set_geom_convert() 77360a55fbb7Slm66018 * 77370a55fbb7Slm66018 * Description: 7738d10e4ef2Snarayan * This routine performs the necessary convertions from the DKIOCSGEOM 7739d10e4ef2Snarayan * Solaris structure to the format defined in FWARC 2006/195. 77400a55fbb7Slm66018 * 77410a55fbb7Slm66018 * Arguments: 7742d10e4ef2Snarayan * vdc - the vDisk client 77430a55fbb7Slm66018 * from - Buffer with data 77440a55fbb7Slm66018 * to - Buffer where data is to be copied to 77450a55fbb7Slm66018 * mode - flags passed to ioctl 77460a55fbb7Slm66018 * dir - direction of copy (in or out) 77470a55fbb7Slm66018 * 77480a55fbb7Slm66018 * Return Code: 77490a55fbb7Slm66018 * 0 - Success 77500a55fbb7Slm66018 * ENXIO - Invalid buffer passed in 77510a55fbb7Slm66018 * EFAULT - ddi_copyin of data failed 77520a55fbb7Slm66018 */ 77530a55fbb7Slm66018 static int 7754d10e4ef2Snarayan vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 77550a55fbb7Slm66018 { 7756d10e4ef2Snarayan _NOTE(ARGUNUSED(vdc)) 7757d10e4ef2Snarayan 77580a55fbb7Slm66018 vd_geom_t vdgeom; 77590a55fbb7Slm66018 void *tmp_mem = NULL; 77600a55fbb7Slm66018 int copy_len = sizeof (struct dk_geom); 77610a55fbb7Slm66018 int rv = 0; 77620a55fbb7Slm66018 77630a55fbb7Slm66018 if (dir != VD_COPYIN) 77640a55fbb7Slm66018 return (0); /* nothing to do */ 77650a55fbb7Slm66018 77660a55fbb7Slm66018 if ((from == NULL) || (to == NULL)) 77670a55fbb7Slm66018 return (ENXIO); 77680a55fbb7Slm66018 77690a55fbb7Slm66018 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 77700a55fbb7Slm66018 77710a55fbb7Slm66018 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 77720a55fbb7Slm66018 if (rv != 0) { 77730a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 77740a55fbb7Slm66018 return (EFAULT); 77750a55fbb7Slm66018 } 77760a55fbb7Slm66018 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 77770a55fbb7Slm66018 bcopy(&vdgeom, to, sizeof (vdgeom)); 77780a55fbb7Slm66018 kmem_free(tmp_mem, copy_len); 77790a55fbb7Slm66018 77800a55fbb7Slm66018 return (0); 77810a55fbb7Slm66018 } 77820a55fbb7Slm66018 77834bac2208Snarayan static int 77844bac2208Snarayan vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 77854bac2208Snarayan { 77864bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 77874bac2208Snarayan 77884bac2208Snarayan vd_efi_t *vd_efi; 77894bac2208Snarayan dk_efi_t dk_efi; 77904bac2208Snarayan int rv = 0; 77914bac2208Snarayan void *uaddr; 77924bac2208Snarayan 77934bac2208Snarayan if ((from == NULL) || (to == NULL)) 77944bac2208Snarayan return (ENXIO); 77954bac2208Snarayan 77964bac2208Snarayan if (dir == VD_COPYIN) { 77974bac2208Snarayan 77984bac2208Snarayan vd_efi = (vd_efi_t *)to; 77994bac2208Snarayan 78004bac2208Snarayan rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 78014bac2208Snarayan if (rv != 0) 78024bac2208Snarayan return (EFAULT); 78034bac2208Snarayan 78044bac2208Snarayan vd_efi->lba = dk_efi.dki_lba; 78054bac2208Snarayan vd_efi->length = dk_efi.dki_length; 78064bac2208Snarayan bzero(vd_efi->data, vd_efi->length); 78074bac2208Snarayan 78084bac2208Snarayan } else { 78094bac2208Snarayan 78104bac2208Snarayan rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 78114bac2208Snarayan if (rv != 0) 78124bac2208Snarayan return (EFAULT); 78134bac2208Snarayan 78144bac2208Snarayan uaddr = dk_efi.dki_data; 78154bac2208Snarayan 78164bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 78174bac2208Snarayan 78184bac2208Snarayan VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 78194bac2208Snarayan 78204bac2208Snarayan rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 78214bac2208Snarayan mode); 78224bac2208Snarayan if (rv != 0) 78234bac2208Snarayan return (EFAULT); 78244bac2208Snarayan 78254bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 78264bac2208Snarayan } 78274bac2208Snarayan 78284bac2208Snarayan return (0); 78294bac2208Snarayan } 78304bac2208Snarayan 78314bac2208Snarayan static int 78324bac2208Snarayan vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 78334bac2208Snarayan { 78344bac2208Snarayan _NOTE(ARGUNUSED(vdc)) 78354bac2208Snarayan 78364bac2208Snarayan dk_efi_t dk_efi; 78374bac2208Snarayan void *uaddr; 78384bac2208Snarayan 78392f5224aeSachartre if (dir == VD_COPYOUT) { 78402f5224aeSachartre /* 78412f5224aeSachartre * The disk label may have changed. Revalidate the disk 78425b98b509Sachartre * geometry. This will also update the device nodes. 78432f5224aeSachartre */ 78442f5224aeSachartre vdc_validate(vdc); 78452f5224aeSachartre return (0); 78462f5224aeSachartre } 78474bac2208Snarayan 78484bac2208Snarayan if ((from == NULL) || (to == NULL)) 78494bac2208Snarayan return (ENXIO); 78504bac2208Snarayan 78514bac2208Snarayan if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 78524bac2208Snarayan return (EFAULT); 78534bac2208Snarayan 78544bac2208Snarayan uaddr = dk_efi.dki_data; 78554bac2208Snarayan 78564bac2208Snarayan dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 78574bac2208Snarayan 78584bac2208Snarayan if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 78594bac2208Snarayan return (EFAULT); 78604bac2208Snarayan 78614bac2208Snarayan DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 78624bac2208Snarayan 78634bac2208Snarayan kmem_free(dk_efi.dki_data, dk_efi.dki_length); 78644bac2208Snarayan 78654bac2208Snarayan return (0); 78664bac2208Snarayan } 78674bac2208Snarayan 786817cadca8Slm66018 786917cadca8Slm66018 /* -------------------------------------------------------------------------- */ 787017cadca8Slm66018 78710a55fbb7Slm66018 /* 78720a55fbb7Slm66018 * Function: 78731ae08745Sheppo * vdc_create_fake_geometry() 78741ae08745Sheppo * 78751ae08745Sheppo * Description: 787617cadca8Slm66018 * This routine fakes up the disk info needed for some DKIO ioctls such 787717cadca8Slm66018 * as DKIOCINFO and DKIOCGMEDIAINFO [just like lofi(7D) and ramdisk(7D) do] 78781ae08745Sheppo * 787917cadca8Slm66018 * Note: This function must not be called until the vDisk attributes have 788017cadca8Slm66018 * been exchanged as part of the handshake with the vDisk server. 78811ae08745Sheppo * 78821ae08745Sheppo * Arguments: 78831ae08745Sheppo * vdc - soft state pointer for this instance of the device driver. 78841ae08745Sheppo * 78851ae08745Sheppo * Return Code: 788678fcd0a1Sachartre * none. 78871ae08745Sheppo */ 788878fcd0a1Sachartre static void 78891ae08745Sheppo vdc_create_fake_geometry(vdc_t *vdc) 78901ae08745Sheppo { 78911ae08745Sheppo ASSERT(vdc != NULL); 789278fcd0a1Sachartre ASSERT(vdc->max_xfer_sz != 0); 78930d0c8d4bSnarayan 78940d0c8d4bSnarayan /* 78951ae08745Sheppo * DKIOCINFO support 78961ae08745Sheppo */ 789778fcd0a1Sachartre if (vdc->cinfo == NULL) 78981ae08745Sheppo vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 78991ae08745Sheppo 79001ae08745Sheppo (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 79011ae08745Sheppo (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 790265908c77Syu, larry liu - Sun Microsystems - Beijing China /* max_xfer_sz is #blocks so we don't need to divide by vdisk_bsize */ 79038e6a2a04Slm66018 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 79042f5224aeSachartre 790587a7269eSachartre /* 79062f5224aeSachartre * We set the controller type to DKC_SCSI_CCS only if the VD_OP_SCSICMD 79072f5224aeSachartre * operation is supported, otherwise the controller type is DKC_DIRECT. 79082f5224aeSachartre * Version 1.0 does not support the VD_OP_SCSICMD operation, so the 79092f5224aeSachartre * controller type is always DKC_DIRECT in that case. 79102f5224aeSachartre * 791117cadca8Slm66018 * If the virtual disk is backed by a physical CD/DVD device or 791217cadca8Slm66018 * an ISO image, modify the controller type to indicate this 791387a7269eSachartre */ 791417cadca8Slm66018 switch (vdc->vdisk_media) { 791517cadca8Slm66018 case VD_MEDIA_CD: 791617cadca8Slm66018 case VD_MEDIA_DVD: 791717cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_CDROM; 791817cadca8Slm66018 break; 791917cadca8Slm66018 case VD_MEDIA_FIXED: 79202f5224aeSachartre if (VD_OP_SUPPORTED(vdc->operations, VD_OP_SCSICMD)) 79212f5224aeSachartre vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 79222f5224aeSachartre else 792387a7269eSachartre vdc->cinfo->dki_ctype = DKC_DIRECT; 792417cadca8Slm66018 break; 792517cadca8Slm66018 default: 792617cadca8Slm66018 /* in the case of v1.0 we default to a fixed disk */ 792717cadca8Slm66018 vdc->cinfo->dki_ctype = DKC_DIRECT; 792817cadca8Slm66018 break; 792917cadca8Slm66018 } 79301ae08745Sheppo vdc->cinfo->dki_flags = DKI_FMTVOL; 79311ae08745Sheppo vdc->cinfo->dki_cnum = 0; 79321ae08745Sheppo vdc->cinfo->dki_addr = 0; 79331ae08745Sheppo vdc->cinfo->dki_space = 0; 79341ae08745Sheppo vdc->cinfo->dki_prio = 0; 79351ae08745Sheppo vdc->cinfo->dki_vec = 0; 79361ae08745Sheppo vdc->cinfo->dki_unit = vdc->instance; 79371ae08745Sheppo vdc->cinfo->dki_slave = 0; 79381ae08745Sheppo /* 79391ae08745Sheppo * The partition number will be created on the fly depending on the 79401ae08745Sheppo * actual slice (i.e. minor node) that is used to request the data. 79411ae08745Sheppo */ 79421ae08745Sheppo vdc->cinfo->dki_partition = 0; 79431ae08745Sheppo 79441ae08745Sheppo /* 79451ae08745Sheppo * DKIOCGMEDIAINFO support 79461ae08745Sheppo */ 79470a55fbb7Slm66018 if (vdc->minfo == NULL) 79481ae08745Sheppo vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 794917cadca8Slm66018 795017cadca8Slm66018 if (vio_ver_is_supported(vdc->ver, 1, 1)) { 795117cadca8Slm66018 vdc->minfo->dki_media_type = 795217cadca8Slm66018 VD_MEDIATYPE2DK_MEDIATYPE(vdc->vdisk_media); 795317cadca8Slm66018 } else { 79541ae08745Sheppo vdc->minfo->dki_media_type = DK_FIXED_DISK; 795517cadca8Slm66018 } 795617cadca8Slm66018 79574bac2208Snarayan vdc->minfo->dki_capacity = vdc->vdisk_size; 795865908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->minfo->dki_lbsize = vdc->vdisk_bsize; 795978fcd0a1Sachartre } 79601ae08745Sheppo 796178fcd0a1Sachartre static ushort_t 796278fcd0a1Sachartre vdc_lbl2cksum(struct dk_label *label) 796378fcd0a1Sachartre { 796478fcd0a1Sachartre int count; 796578fcd0a1Sachartre ushort_t sum, *sp; 796678fcd0a1Sachartre 796778fcd0a1Sachartre count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 796878fcd0a1Sachartre sp = (ushort_t *)label; 796978fcd0a1Sachartre sum = 0; 797078fcd0a1Sachartre while (count--) { 797178fcd0a1Sachartre sum ^= *sp++; 797278fcd0a1Sachartre } 797378fcd0a1Sachartre 797478fcd0a1Sachartre return (sum); 79750a55fbb7Slm66018 } 79760a55fbb7Slm66018 7977de3a5331SRamesh Chitrothu static void 7978de3a5331SRamesh Chitrothu vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size) 7979de3a5331SRamesh Chitrothu { 7980de3a5331SRamesh Chitrothu vd_err_stats_t *stp; 7981de3a5331SRamesh Chitrothu 7982de3a5331SRamesh Chitrothu ASSERT(MUTEX_HELD(&vdc->lock)); 7983de3a5331SRamesh Chitrothu ASSERT(xfr_size != 0); 7984de3a5331SRamesh Chitrothu 7985de3a5331SRamesh Chitrothu /* 7986de3a5331SRamesh Chitrothu * If the disk size is unknown or sizes are unchanged then don't 7987de3a5331SRamesh Chitrothu * update anything. 7988de3a5331SRamesh Chitrothu */ 7989de3a5331SRamesh Chitrothu if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || 799065908c77Syu, larry liu - Sun Microsystems - Beijing China (blk_size == vdc->vdisk_bsize && dsk_size == vdc->vdisk_size && 7991de3a5331SRamesh Chitrothu xfr_size == vdc->max_xfer_sz)) 7992de3a5331SRamesh Chitrothu return; 7993de3a5331SRamesh Chitrothu 7994de3a5331SRamesh Chitrothu /* 7995de3a5331SRamesh Chitrothu * We don't know at compile time what the vDisk server will think 7996de3a5331SRamesh Chitrothu * are good values but we apply a large (arbitrary) upper bound to 7997de3a5331SRamesh Chitrothu * prevent memory exhaustion in vdc if it was allocating a DRing 7998de3a5331SRamesh Chitrothu * based of huge values sent by the server. We probably will never 7999de3a5331SRamesh Chitrothu * exceed this except if the message was garbage. 8000de3a5331SRamesh Chitrothu */ 8001de3a5331SRamesh Chitrothu if ((xfr_size * blk_size) > (PAGESIZE * DEV_BSIZE)) { 8002de3a5331SRamesh Chitrothu DMSG(vdc, 0, "[%d] vds block transfer size too big;" 8003de3a5331SRamesh Chitrothu " using max supported by vdc", vdc->instance); 800465908c77Syu, larry liu - Sun Microsystems - Beijing China xfr_size = maxphys / blk_size; 8005de3a5331SRamesh Chitrothu } 8006de3a5331SRamesh Chitrothu 8007de3a5331SRamesh Chitrothu vdc->max_xfer_sz = xfr_size; 800865908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vdisk_bsize = blk_size; 8009de3a5331SRamesh Chitrothu vdc->vdisk_size = dsk_size; 8010de3a5331SRamesh Chitrothu 8011de3a5331SRamesh Chitrothu stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 8012de3a5331SRamesh Chitrothu stp->vd_capacity.value.ui64 = dsk_size * blk_size; 8013de3a5331SRamesh Chitrothu 8014de3a5331SRamesh Chitrothu vdc->minfo->dki_capacity = dsk_size; 8015de3a5331SRamesh Chitrothu vdc->minfo->dki_lbsize = (uint_t)blk_size; 8016de3a5331SRamesh Chitrothu } 8017de3a5331SRamesh Chitrothu 80180a55fbb7Slm66018 /* 801965908c77Syu, larry liu - Sun Microsystems - Beijing China * Update information about the VIO block size. The VIO block size is the 802065908c77Syu, larry liu - Sun Microsystems - Beijing China * same as the vdisk block size which is stored in vdc->vdisk_bsize so we 802165908c77Syu, larry liu - Sun Microsystems - Beijing China * do not store that information again. 802265908c77Syu, larry liu - Sun Microsystems - Beijing China * 802365908c77Syu, larry liu - Sun Microsystems - Beijing China * However, buf structures will always use a logical block size of 512 bytes 802465908c77Syu, larry liu - Sun Microsystems - Beijing China * (DEV_BSIZE) and we will need to convert logical block numbers to VIO block 802565908c77Syu, larry liu - Sun Microsystems - Beijing China * numbers for each read or write operation using vdc_strategy(). To speed up 802665908c77Syu, larry liu - Sun Microsystems - Beijing China * this conversion, we expect the VIO block size to be a power of 2 and a 802765908c77Syu, larry liu - Sun Microsystems - Beijing China * multiple 512 bytes (DEV_BSIZE), and we cache some useful information. 802865908c77Syu, larry liu - Sun Microsystems - Beijing China * 802965908c77Syu, larry liu - Sun Microsystems - Beijing China * The function return EINVAL if the new VIO block size (blk_size) is not a 803065908c77Syu, larry liu - Sun Microsystems - Beijing China * power of 2 or not a multiple of 512 bytes, otherwise it returns 0. 803165908c77Syu, larry liu - Sun Microsystems - Beijing China */ 803265908c77Syu, larry liu - Sun Microsystems - Beijing China static int 803365908c77Syu, larry liu - Sun Microsystems - Beijing China vdc_update_vio_bsize(vdc_t *vdc, uint32_t blk_size) 803465908c77Syu, larry liu - Sun Microsystems - Beijing China { 803565908c77Syu, larry liu - Sun Microsystems - Beijing China uint32_t ratio, n; 803665908c77Syu, larry liu - Sun Microsystems - Beijing China int nshift = 0; 803765908c77Syu, larry liu - Sun Microsystems - Beijing China 803865908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = 0; 803965908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = 0; 804065908c77Syu, larry liu - Sun Microsystems - Beijing China 804165908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(blk_size > 0); 804265908c77Syu, larry liu - Sun Microsystems - Beijing China 804365908c77Syu, larry liu - Sun Microsystems - Beijing China if ((blk_size % DEV_BSIZE) != 0) 804465908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 804565908c77Syu, larry liu - Sun Microsystems - Beijing China 804665908c77Syu, larry liu - Sun Microsystems - Beijing China ratio = blk_size / DEV_BSIZE; 804765908c77Syu, larry liu - Sun Microsystems - Beijing China 804865908c77Syu, larry liu - Sun Microsystems - Beijing China for (n = ratio; n > 1; n >>= 1) { 804965908c77Syu, larry liu - Sun Microsystems - Beijing China if ((n & 0x1) != 0) { 805065908c77Syu, larry liu - Sun Microsystems - Beijing China /* blk_size is not a power of 2 */ 805165908c77Syu, larry liu - Sun Microsystems - Beijing China return (EINVAL); 805265908c77Syu, larry liu - Sun Microsystems - Beijing China } 805365908c77Syu, larry liu - Sun Microsystems - Beijing China nshift++; 805465908c77Syu, larry liu - Sun Microsystems - Beijing China } 805565908c77Syu, larry liu - Sun Microsystems - Beijing China 805665908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bshift = nshift; 805765908c77Syu, larry liu - Sun Microsystems - Beijing China vdc->vio_bmask = ratio - 1; 805865908c77Syu, larry liu - Sun Microsystems - Beijing China 805965908c77Syu, larry liu - Sun Microsystems - Beijing China return (0); 806065908c77Syu, larry liu - Sun Microsystems - Beijing China } 806165908c77Syu, larry liu - Sun Microsystems - Beijing China 806265908c77Syu, larry liu - Sun Microsystems - Beijing China /* 80630a55fbb7Slm66018 * Function: 806478fcd0a1Sachartre * vdc_validate_geometry 80650a55fbb7Slm66018 * 80660a55fbb7Slm66018 * Description: 806778fcd0a1Sachartre * This routine discovers the label and geometry of the disk. It stores 806878fcd0a1Sachartre * the disk label and related information in the vdc structure. If it 806978fcd0a1Sachartre * fails to validate the geometry or to discover the disk label then 807078fcd0a1Sachartre * the label is marked as unknown (VD_DISK_LABEL_UNK). 80710a55fbb7Slm66018 * 80720a55fbb7Slm66018 * Arguments: 80730a55fbb7Slm66018 * vdc - soft state pointer for this instance of the device driver. 80740a55fbb7Slm66018 * 80750a55fbb7Slm66018 * Return Code: 807678fcd0a1Sachartre * 0 - success. 807778fcd0a1Sachartre * EINVAL - unknown disk label. 807878fcd0a1Sachartre * ENOTSUP - geometry not applicable (EFI label). 807978fcd0a1Sachartre * EIO - error accessing the disk. 80800a55fbb7Slm66018 */ 80810a55fbb7Slm66018 static int 808278fcd0a1Sachartre vdc_validate_geometry(vdc_t *vdc) 80830a55fbb7Slm66018 { 80840a55fbb7Slm66018 dev_t dev; 80852f5224aeSachartre int rv, rval; 808665908c77Syu, larry liu - Sun Microsystems - Beijing China struct dk_label *label; 808778fcd0a1Sachartre struct dk_geom geom; 8088342440ecSPrasad Singamsetty struct extvtoc vtoc; 8089edcc0754Sachartre efi_gpt_t *gpt; 8090edcc0754Sachartre efi_gpe_t *gpe; 8091edcc0754Sachartre vd_efi_dev_t edev; 80920a55fbb7Slm66018 80930a55fbb7Slm66018 ASSERT(vdc != NULL); 809478fcd0a1Sachartre ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 809578fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 80960a55fbb7Slm66018 809778fcd0a1Sachartre mutex_exit(&vdc->lock); 8098de3a5331SRamesh Chitrothu /* 8099de3a5331SRamesh Chitrothu * Check the disk capacity in case it has changed. If that fails then 8100de3a5331SRamesh Chitrothu * we proceed and we will be using the disk size we currently have. 8101de3a5331SRamesh Chitrothu */ 8102de3a5331SRamesh Chitrothu (void) vdc_check_capacity(vdc); 81030a55fbb7Slm66018 dev = makedevice(ddi_driver_major(vdc->dip), 81040a55fbb7Slm66018 VD_MAKE_DEV(vdc->instance, 0)); 81054bac2208Snarayan 81062f5224aeSachartre rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL, &rval); 810778fcd0a1Sachartre if (rv == 0) 8108342440ecSPrasad Singamsetty rv = vd_process_ioctl(dev, DKIOCGEXTVTOC, (caddr_t)&vtoc, 81092f5224aeSachartre FKIOCTL, &rval); 81100d0c8d4bSnarayan 81114bac2208Snarayan if (rv == ENOTSUP) { 81124bac2208Snarayan /* 81134bac2208Snarayan * If the device does not support VTOC then we try 81144bac2208Snarayan * to read an EFI label. 8115edcc0754Sachartre * 8116edcc0754Sachartre * We need to know the block size and the disk size to 8117edcc0754Sachartre * be able to read an EFI label. 81184bac2208Snarayan */ 8119edcc0754Sachartre if (vdc->vdisk_size == 0) { 8120edcc0754Sachartre mutex_enter(&vdc->lock); 8121edcc0754Sachartre vdc_store_label_unk(vdc); 8122de3a5331SRamesh Chitrothu return (EIO); 8123edcc0754Sachartre } 81244bac2208Snarayan 812565908c77Syu, larry liu - Sun Microsystems - Beijing China VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 8126edcc0754Sachartre 8127edcc0754Sachartre rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe); 81284bac2208Snarayan 81294bac2208Snarayan if (rv) { 81303af08d82Slm66018 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 81314bac2208Snarayan vdc->instance, rv); 813278fcd0a1Sachartre mutex_enter(&vdc->lock); 813378fcd0a1Sachartre vdc_store_label_unk(vdc); 813478fcd0a1Sachartre return (EIO); 813578fcd0a1Sachartre } 813678fcd0a1Sachartre 813778fcd0a1Sachartre mutex_enter(&vdc->lock); 8138edcc0754Sachartre vdc_store_label_efi(vdc, gpt, gpe); 8139edcc0754Sachartre vd_efi_free(&edev, gpt, gpe); 814078fcd0a1Sachartre return (ENOTSUP); 814178fcd0a1Sachartre } 814278fcd0a1Sachartre 814378fcd0a1Sachartre if (rv != 0) { 814478fcd0a1Sachartre DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 814578fcd0a1Sachartre vdc->instance, rv); 814678fcd0a1Sachartre mutex_enter(&vdc->lock); 814778fcd0a1Sachartre vdc_store_label_unk(vdc); 814878fcd0a1Sachartre if (rv != EINVAL) 814978fcd0a1Sachartre rv = EIO; 81504bac2208Snarayan return (rv); 81514bac2208Snarayan } 81524bac2208Snarayan 815378fcd0a1Sachartre /* check that geometry and vtoc are valid */ 815478fcd0a1Sachartre if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 815578fcd0a1Sachartre vtoc.v_sanity != VTOC_SANE) { 815678fcd0a1Sachartre mutex_enter(&vdc->lock); 815778fcd0a1Sachartre vdc_store_label_unk(vdc); 815878fcd0a1Sachartre return (EINVAL); 815978fcd0a1Sachartre } 81604bac2208Snarayan 816178fcd0a1Sachartre /* 816278fcd0a1Sachartre * We have a disk and a valid VTOC. However this does not mean 816378fcd0a1Sachartre * that the disk currently have a VTOC label. The returned VTOC may 816478fcd0a1Sachartre * be a default VTOC to be used for configuring the disk (this is 816578fcd0a1Sachartre * what is done for disk image). So we read the label from the 816678fcd0a1Sachartre * beginning of the disk to ensure we really have a VTOC label. 816778fcd0a1Sachartre * 816878fcd0a1Sachartre * FUTURE: This could be the default way for reading the VTOC 816978fcd0a1Sachartre * from the disk as opposed to sending the VD_OP_GET_VTOC 817078fcd0a1Sachartre * to the server. This will be the default if vdc is implemented 817178fcd0a1Sachartre * ontop of cmlb. 817278fcd0a1Sachartre */ 817378fcd0a1Sachartre 817478fcd0a1Sachartre /* 817578fcd0a1Sachartre * Single slice disk does not support read using an absolute disk 817678fcd0a1Sachartre * offset so we just rely on the DKIOCGVTOC ioctl in that case. 817778fcd0a1Sachartre */ 817878fcd0a1Sachartre if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 817978fcd0a1Sachartre mutex_enter(&vdc->lock); 818078fcd0a1Sachartre if (vtoc.v_nparts != 1) { 818178fcd0a1Sachartre vdc_store_label_unk(vdc); 818278fcd0a1Sachartre return (EINVAL); 818378fcd0a1Sachartre } 818478fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 81854bac2208Snarayan return (0); 81864bac2208Snarayan } 81874bac2208Snarayan 818878fcd0a1Sachartre if (vtoc.v_nparts != V_NUMPAR) { 818978fcd0a1Sachartre mutex_enter(&vdc->lock); 819078fcd0a1Sachartre vdc_store_label_unk(vdc); 819178fcd0a1Sachartre return (EINVAL); 81920a55fbb7Slm66018 } 8193d10e4ef2Snarayan 8194d10e4ef2Snarayan /* 8195c813bb04SGabriel Carrillo * Most CD/DVDs do not have a disk label and the label is 8196c813bb04SGabriel Carrillo * generated by the disk driver. So the on-disk label check 8197c813bb04SGabriel Carrillo * below may fail and we return now to avoid this problem. 8198c813bb04SGabriel Carrillo */ 8199c813bb04SGabriel Carrillo if (vdc->vdisk_media == VD_MEDIA_CD || 8200c813bb04SGabriel Carrillo vdc->vdisk_media == VD_MEDIA_DVD) { 8201c813bb04SGabriel Carrillo mutex_enter(&vdc->lock); 8202c813bb04SGabriel Carrillo vdc_store_label_vtoc(vdc, &geom, &vtoc); 8203c813bb04SGabriel Carrillo return (0); 8204c813bb04SGabriel Carrillo } 8205c813bb04SGabriel Carrillo 8206c813bb04SGabriel Carrillo /* 8207d10e4ef2Snarayan * Read disk label from start of disk 8208d10e4ef2Snarayan */ 820965908c77Syu, larry liu - Sun Microsystems - Beijing China label = kmem_alloc(vdc->vdisk_bsize, KM_SLEEP); 8210*00e3a3e9SAlexandre Chartre 8211*00e3a3e9SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_BREAD, (caddr_t)label, vdc->vdisk_bsize, 8212*00e3a3e9SAlexandre Chartre VD_SLICE_NONE, 0, NULL, VIO_read_dir, VDC_OP_NORMAL); 82130a55fbb7Slm66018 821465908c77Syu, larry liu - Sun Microsystems - Beijing China if (rv != 0 || label->dkl_magic != DKL_MAGIC || 821565908c77Syu, larry liu - Sun Microsystems - Beijing China label->dkl_cksum != vdc_lbl2cksum(label)) { 821678fcd0a1Sachartre DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 821778fcd0a1Sachartre vdc->instance); 821865908c77Syu, larry liu - Sun Microsystems - Beijing China kmem_free(label, vdc->vdisk_bsize); 821978fcd0a1Sachartre mutex_enter(&vdc->lock); 822078fcd0a1Sachartre vdc_store_label_unk(vdc); 822178fcd0a1Sachartre return (EINVAL); 822278fcd0a1Sachartre } 822378fcd0a1Sachartre 822465908c77Syu, larry liu - Sun Microsystems - Beijing China kmem_free(label, vdc->vdisk_bsize); 822578fcd0a1Sachartre mutex_enter(&vdc->lock); 822678fcd0a1Sachartre vdc_store_label_vtoc(vdc, &geom, &vtoc); 822778fcd0a1Sachartre return (0); 822878fcd0a1Sachartre } 822978fcd0a1Sachartre 823078fcd0a1Sachartre /* 823178fcd0a1Sachartre * Function: 823278fcd0a1Sachartre * vdc_validate 823378fcd0a1Sachartre * 823478fcd0a1Sachartre * Description: 823578fcd0a1Sachartre * This routine discovers the label of the disk and create the 823678fcd0a1Sachartre * appropriate device nodes if the label has changed. 823778fcd0a1Sachartre * 823878fcd0a1Sachartre * Arguments: 823978fcd0a1Sachartre * vdc - soft state pointer for this instance of the device driver. 824078fcd0a1Sachartre * 824178fcd0a1Sachartre * Return Code: 824278fcd0a1Sachartre * none. 824378fcd0a1Sachartre */ 824478fcd0a1Sachartre static void 824578fcd0a1Sachartre vdc_validate(vdc_t *vdc) 824678fcd0a1Sachartre { 824778fcd0a1Sachartre vd_disk_label_t old_label; 8248edcc0754Sachartre vd_slice_t old_slice[V_NUMPAR]; 824978fcd0a1Sachartre int rv; 825078fcd0a1Sachartre 825178fcd0a1Sachartre ASSERT(!MUTEX_HELD(&vdc->lock)); 825278fcd0a1Sachartre 825378fcd0a1Sachartre mutex_enter(&vdc->lock); 825478fcd0a1Sachartre 825578fcd0a1Sachartre /* save the current label and vtoc */ 825678fcd0a1Sachartre old_label = vdc->vdisk_label; 8257edcc0754Sachartre bcopy(vdc->slice, &old_slice, sizeof (vd_slice_t) * V_NUMPAR); 825878fcd0a1Sachartre 825978fcd0a1Sachartre /* check the geometry */ 826078fcd0a1Sachartre (void) vdc_validate_geometry(vdc); 826178fcd0a1Sachartre 826278fcd0a1Sachartre /* if the disk label has changed, update device nodes */ 8263*00e3a3e9SAlexandre Chartre if (vdc->vdisk_type == VD_DISK_TYPE_DISK && 8264*00e3a3e9SAlexandre Chartre vdc->vdisk_label != old_label) { 826578fcd0a1Sachartre 826678fcd0a1Sachartre if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 826778fcd0a1Sachartre rv = vdc_create_device_nodes_efi(vdc); 826878fcd0a1Sachartre else 826978fcd0a1Sachartre rv = vdc_create_device_nodes_vtoc(vdc); 827078fcd0a1Sachartre 827178fcd0a1Sachartre if (rv != 0) { 827278fcd0a1Sachartre DMSG(vdc, 0, "![%d] Failed to update device nodes", 827378fcd0a1Sachartre vdc->instance); 827478fcd0a1Sachartre } 827578fcd0a1Sachartre } 827678fcd0a1Sachartre 827778fcd0a1Sachartre mutex_exit(&vdc->lock); 827878fcd0a1Sachartre } 827978fcd0a1Sachartre 828078fcd0a1Sachartre static void 828178fcd0a1Sachartre vdc_validate_task(void *arg) 828278fcd0a1Sachartre { 828378fcd0a1Sachartre vdc_t *vdc = (vdc_t *)arg; 828478fcd0a1Sachartre 828578fcd0a1Sachartre vdc_validate(vdc); 828678fcd0a1Sachartre 828778fcd0a1Sachartre mutex_enter(&vdc->lock); 828878fcd0a1Sachartre ASSERT(vdc->validate_pending > 0); 828978fcd0a1Sachartre vdc->validate_pending--; 829078fcd0a1Sachartre mutex_exit(&vdc->lock); 82911ae08745Sheppo } 82924bac2208Snarayan 82934bac2208Snarayan /* 82944bac2208Snarayan * Function: 82954bac2208Snarayan * vdc_setup_devid() 82964bac2208Snarayan * 82974bac2208Snarayan * Description: 82984bac2208Snarayan * This routine discovers the devid of a vDisk. It requests the devid of 82994bac2208Snarayan * the underlying device from the vDisk server, builds an encapsulated 83004bac2208Snarayan * devid based on the retrieved devid and registers that new devid to 83014bac2208Snarayan * the vDisk. 83024bac2208Snarayan * 83034bac2208Snarayan * Arguments: 83044bac2208Snarayan * vdc - soft state pointer for this instance of the device driver. 83054bac2208Snarayan * 83064bac2208Snarayan * Return Code: 83074bac2208Snarayan * 0 - A devid was succesfully registered for the vDisk 83084bac2208Snarayan */ 83094bac2208Snarayan static int 83104bac2208Snarayan vdc_setup_devid(vdc_t *vdc) 83114bac2208Snarayan { 83124bac2208Snarayan int rv; 83134bac2208Snarayan vd_devid_t *vd_devid; 83144bac2208Snarayan size_t bufsize, bufid_len; 8315*00e3a3e9SAlexandre Chartre ddi_devid_t vdisk_devid; 8316*00e3a3e9SAlexandre Chartre char *devid_str; 83174bac2208Snarayan 83184bac2208Snarayan /* 83194bac2208Snarayan * At first sight, we don't know the size of the devid that the 83204bac2208Snarayan * server will return but this size will be encoded into the 83214bac2208Snarayan * reply. So we do a first request using a default size then we 83224bac2208Snarayan * check if this size was large enough. If not then we do a second 83234bac2208Snarayan * request with the correct size returned by the server. Note that 83244bac2208Snarayan * ldc requires size to be 8-byte aligned. 83254bac2208Snarayan */ 83264bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 83274bac2208Snarayan sizeof (uint64_t)); 83284bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 83294bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 83304bac2208Snarayan 8331*00e3a3e9SAlexandre Chartre rv = vdc_do_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 8332*00e3a3e9SAlexandre Chartre bufsize, 0, 0, NULL, VIO_both_dir, 0); 83333af08d82Slm66018 8334*00e3a3e9SAlexandre Chartre DMSG(vdc, 2, "do_op returned %d\n", rv); 83353af08d82Slm66018 83364bac2208Snarayan if (rv) { 83374bac2208Snarayan kmem_free(vd_devid, bufsize); 83384bac2208Snarayan return (rv); 83394bac2208Snarayan } 83404bac2208Snarayan 83414bac2208Snarayan if (vd_devid->length > bufid_len) { 83424bac2208Snarayan /* 83434bac2208Snarayan * The returned devid is larger than the buffer used. Try again 83444bac2208Snarayan * with a buffer with the right size. 83454bac2208Snarayan */ 83464bac2208Snarayan kmem_free(vd_devid, bufsize); 83474bac2208Snarayan bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 83484bac2208Snarayan sizeof (uint64_t)); 83494bac2208Snarayan vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 83504bac2208Snarayan bufid_len = bufsize - sizeof (vd_efi_t) - 1; 83514bac2208Snarayan 8352*00e3a3e9SAlexandre Chartre rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 8353*00e3a3e9SAlexandre Chartre bufsize, 0, 0, VIO_both_dir, B_TRUE); 83543af08d82Slm66018 83554bac2208Snarayan if (rv) { 83564bac2208Snarayan kmem_free(vd_devid, bufsize); 83574bac2208Snarayan return (rv); 83584bac2208Snarayan } 83594bac2208Snarayan } 83604bac2208Snarayan 83614bac2208Snarayan /* 83624bac2208Snarayan * The virtual disk should have the same device id as the one associated 83634bac2208Snarayan * with the physical disk it is mapped on, otherwise sharing a disk 83644bac2208Snarayan * between a LDom and a non-LDom may not work (for example for a shared 83654bac2208Snarayan * SVM disk set). 83664bac2208Snarayan * 83674bac2208Snarayan * The DDI framework does not allow creating a device id with any 83684bac2208Snarayan * type so we first create a device id of type DEVID_ENCAP and then 83694bac2208Snarayan * we restore the orignal type of the physical device. 83704bac2208Snarayan */ 83714bac2208Snarayan 83723af08d82Slm66018 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 83733af08d82Slm66018 83744bac2208Snarayan /* build an encapsulated devid based on the returned devid */ 83754bac2208Snarayan if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 8376*00e3a3e9SAlexandre Chartre vd_devid->id, &vdisk_devid) != DDI_SUCCESS) { 83773af08d82Slm66018 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 83784bac2208Snarayan kmem_free(vd_devid, bufsize); 83794bac2208Snarayan return (1); 83804bac2208Snarayan } 83814bac2208Snarayan 8382*00e3a3e9SAlexandre Chartre DEVID_FORMTYPE((impl_devid_t *)vdisk_devid, vd_devid->type); 83834bac2208Snarayan 8384*00e3a3e9SAlexandre Chartre ASSERT(ddi_devid_valid(vdisk_devid) == DDI_SUCCESS); 83854bac2208Snarayan 83864bac2208Snarayan kmem_free(vd_devid, bufsize); 83874bac2208Snarayan 8388*00e3a3e9SAlexandre Chartre if (vdc->devid != NULL) { 8389*00e3a3e9SAlexandre Chartre /* check that the devid hasn't changed */ 8390*00e3a3e9SAlexandre Chartre if (ddi_devid_compare(vdisk_devid, vdc->devid) == 0) { 8391*00e3a3e9SAlexandre Chartre ddi_devid_free(vdisk_devid); 8392*00e3a3e9SAlexandre Chartre return (0); 8393*00e3a3e9SAlexandre Chartre } 8394*00e3a3e9SAlexandre Chartre 8395*00e3a3e9SAlexandre Chartre cmn_err(CE_WARN, "vdisk@%d backend devid has changed", 8396*00e3a3e9SAlexandre Chartre vdc->instance); 8397*00e3a3e9SAlexandre Chartre 8398*00e3a3e9SAlexandre Chartre devid_str = ddi_devid_str_encode(vdc->devid, NULL); 8399*00e3a3e9SAlexandre Chartre 8400*00e3a3e9SAlexandre Chartre cmn_err(CE_CONT, "vdisk@%d backend initial devid: %s", 8401*00e3a3e9SAlexandre Chartre vdc->instance, 8402*00e3a3e9SAlexandre Chartre (devid_str)? devid_str : "<encoding error>"); 8403*00e3a3e9SAlexandre Chartre 8404*00e3a3e9SAlexandre Chartre if (devid_str) 8405*00e3a3e9SAlexandre Chartre ddi_devid_str_free(devid_str); 8406*00e3a3e9SAlexandre Chartre 8407*00e3a3e9SAlexandre Chartre devid_str = ddi_devid_str_encode(vdisk_devid, NULL); 8408*00e3a3e9SAlexandre Chartre 8409*00e3a3e9SAlexandre Chartre cmn_err(CE_CONT, "vdisk@%d backend current devid: %s", 8410*00e3a3e9SAlexandre Chartre vdc->instance, 8411*00e3a3e9SAlexandre Chartre (devid_str)? devid_str : "<encoding error>"); 8412*00e3a3e9SAlexandre Chartre 8413*00e3a3e9SAlexandre Chartre if (devid_str) 8414*00e3a3e9SAlexandre Chartre ddi_devid_str_free(devid_str); 8415*00e3a3e9SAlexandre Chartre 8416*00e3a3e9SAlexandre Chartre ddi_devid_free(vdisk_devid); 84174bac2208Snarayan return (1); 84184bac2208Snarayan } 84194bac2208Snarayan 8420*00e3a3e9SAlexandre Chartre if (ddi_devid_register(vdc->dip, vdisk_devid) != DDI_SUCCESS) { 8421*00e3a3e9SAlexandre Chartre DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 8422*00e3a3e9SAlexandre Chartre ddi_devid_free(vdisk_devid); 8423*00e3a3e9SAlexandre Chartre return (1); 8424*00e3a3e9SAlexandre Chartre } 8425*00e3a3e9SAlexandre Chartre 8426*00e3a3e9SAlexandre Chartre vdc->devid = vdisk_devid; 8427*00e3a3e9SAlexandre Chartre 84284bac2208Snarayan return (0); 84294bac2208Snarayan } 84304bac2208Snarayan 84314bac2208Snarayan static void 8432edcc0754Sachartre vdc_store_label_efi(vdc_t *vdc, efi_gpt_t *gpt, efi_gpe_t *gpe) 84334bac2208Snarayan { 8434edcc0754Sachartre int i, nparts; 84354bac2208Snarayan 843678fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 843778fcd0a1Sachartre 843878fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_EFI; 8439342440ecSPrasad Singamsetty bzero(vdc->vtoc, sizeof (struct extvtoc)); 844078fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 8441edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 8442edcc0754Sachartre 8443edcc0754Sachartre nparts = gpt->efi_gpt_NumberOfPartitionEntries; 8444edcc0754Sachartre 8445edcc0754Sachartre for (i = 0; i < nparts && i < VD_EFI_WD_SLICE; i++) { 8446edcc0754Sachartre 8447d84f0041SAlexandre Chartre if (gpe[i].efi_gpe_StartingLBA == 0 && 8448edcc0754Sachartre gpe[i].efi_gpe_EndingLBA == 0) { 8449edcc0754Sachartre continue; 84504bac2208Snarayan } 8451edcc0754Sachartre 8452edcc0754Sachartre vdc->slice[i].start = gpe[i].efi_gpe_StartingLBA; 8453edcc0754Sachartre vdc->slice[i].nblocks = gpe[i].efi_gpe_EndingLBA - 8454edcc0754Sachartre gpe[i].efi_gpe_StartingLBA + 1; 8455edcc0754Sachartre } 8456edcc0754Sachartre 8457edcc0754Sachartre ASSERT(vdc->vdisk_size != 0); 8458edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].start = 0; 8459edcc0754Sachartre vdc->slice[VD_EFI_WD_SLICE].nblocks = vdc->vdisk_size; 8460edcc0754Sachartre 84614bac2208Snarayan } 846278fcd0a1Sachartre 846378fcd0a1Sachartre static void 8464342440ecSPrasad Singamsetty vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct extvtoc *vtoc) 846578fcd0a1Sachartre { 8466edcc0754Sachartre int i; 8467edcc0754Sachartre 846878fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 846965908c77Syu, larry liu - Sun Microsystems - Beijing China ASSERT(vdc->vdisk_bsize == vtoc->v_sectorsz); 847078fcd0a1Sachartre 847178fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_VTOC; 8472342440ecSPrasad Singamsetty bcopy(vtoc, vdc->vtoc, sizeof (struct extvtoc)); 847378fcd0a1Sachartre bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 8474edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 8475edcc0754Sachartre 8476edcc0754Sachartre for (i = 0; i < vtoc->v_nparts; i++) { 8477edcc0754Sachartre vdc->slice[i].start = vtoc->v_part[i].p_start; 8478edcc0754Sachartre vdc->slice[i].nblocks = vtoc->v_part[i].p_size; 8479edcc0754Sachartre } 848078fcd0a1Sachartre } 848178fcd0a1Sachartre 848278fcd0a1Sachartre static void 848378fcd0a1Sachartre vdc_store_label_unk(vdc_t *vdc) 848478fcd0a1Sachartre { 848578fcd0a1Sachartre ASSERT(MUTEX_HELD(&vdc->lock)); 848678fcd0a1Sachartre 848778fcd0a1Sachartre vdc->vdisk_label = VD_DISK_LABEL_UNK; 8488342440ecSPrasad Singamsetty bzero(vdc->vtoc, sizeof (struct extvtoc)); 848978fcd0a1Sachartre bzero(vdc->geom, sizeof (struct dk_geom)); 8490edcc0754Sachartre bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 849178fcd0a1Sachartre } 8492